patch.rs
367 lines
| 11.7 KiB
| application/rls-services+xml
|
RustLexer
Antoine Cezar
|
r46097 | use byteorder::{BigEndian, ByteOrder}; | ||
/// A chunk of data to insert, delete or replace in a patch | ||||
/// | ||||
/// A chunk is: | ||||
/// - an insertion when `!data.is_empty() && start == end` | ||||
/// - an deletion when `data.is_empty() && start < end` | ||||
/// - a replacement when `!data.is_empty() && start < end` | ||||
/// - not doing anything when `data.is_empty() && start == end` | ||||
#[derive(Debug, Clone)] | ||||
struct PatchFrag<'a> { | ||||
/// The start position of the chunk of data to replace | ||||
start: i32, | ||||
/// The end position of the chunk of data to replace (open end interval) | ||||
end: i32, | ||||
/// The data replacing the chunk | ||||
data: &'a [u8], | ||||
} | ||||
impl<'a> PatchFrag<'a> { | ||||
/// Adjusted start of the chunk to replace. | ||||
/// | ||||
/// Offset allow to take into account the growth/shrinkage of data | ||||
/// induced by previously applied chunks. | ||||
fn start_offseted_by(&self, offset: i32) -> i32 { | ||||
self.start + offset | ||||
} | ||||
/// Adjusted end of the chunk to replace. | ||||
/// | ||||
/// Offset allow to take into account the growth/shrinkage of data | ||||
/// induced by previously applied chunks. | ||||
fn end_offseted_by(&self, offset: i32) -> i32 { | ||||
self.start_offseted_by(offset) + (self.data.len() as i32) | ||||
} | ||||
/// Length of the replaced chunk. | ||||
fn replaced_len(&self) -> i32 { | ||||
self.end - self.start | ||||
} | ||||
/// Length difference between the replacing data and the replaced data. | ||||
fn len_diff(&self) -> i32 { | ||||
(self.data.len() as i32) - self.replaced_len() | ||||
} | ||||
} | ||||
/// The delta between two revisions data. | ||||
#[derive(Debug, Clone)] | ||||
pub struct PatchList<'a> { | ||||
/// A collection of chunks to apply. | ||||
/// | ||||
/// Those chunks are: | ||||
/// - ordered from the left-most replacement to the right-most replacement | ||||
/// - non-overlapping, meaning that two chucks can not change the same | ||||
/// chunk of the patched data | ||||
frags: Vec<PatchFrag<'a>>, | ||||
} | ||||
impl<'a> PatchList<'a> { | ||||
/// Create a `PatchList` from bytes. | ||||
pub fn new(data: &'a [u8]) -> Self { | ||||
let mut frags = vec![]; | ||||
let mut data = data; | ||||
while !data.is_empty() { | ||||
let start = BigEndian::read_i32(&data[0..]); | ||||
let end = BigEndian::read_i32(&data[4..]); | ||||
let len = BigEndian::read_i32(&data[8..]); | ||||
assert!(0 <= start && start <= end && len >= 0); | ||||
frags.push(PatchFrag { | ||||
start, | ||||
end, | ||||
data: &data[12..12 + (len as usize)], | ||||
}); | ||||
data = &data[12 + (len as usize)..]; | ||||
} | ||||
PatchList { frags } | ||||
} | ||||
/// Return the final length of data after patching | ||||
/// given its initial length . | ||||
fn size(&self, initial_size: i32) -> i32 { | ||||
self.frags | ||||
.iter() | ||||
.fold(initial_size, |acc, frag| acc + frag.len_diff()) | ||||
} | ||||
/// Apply the patch to some data. | ||||
pub fn apply(&self, initial: &[u8]) -> Vec<u8> { | ||||
let mut last: usize = 0; | ||||
let mut vec = | ||||
Vec::with_capacity(self.size(initial.len() as i32) as usize); | ||||
for PatchFrag { start, end, data } in self.frags.iter() { | ||||
vec.extend(&initial[last..(*start as usize)]); | ||||
vec.extend(data.iter()); | ||||
last = *end as usize; | ||||
} | ||||
vec.extend(&initial[last..]); | ||||
vec | ||||
} | ||||
/// Combine two patch lists into a single patch list. | ||||
/// | ||||
/// Applying consecutive patches can lead to waste of time and memory | ||||
/// as the changes introduced by one patch can be overridden by the next. | ||||
/// Combining patches optimizes the whole patching sequence. | ||||
fn combine(&mut self, other: &mut Self) -> Self { | ||||
let mut frags = vec![]; | ||||
// Keep track of each growth/shrinkage resulting from applying a chunk | ||||
// in order to adjust the start/end of subsequent chunks. | ||||
let mut offset = 0i32; | ||||
// Keep track of the chunk of self.chunks to process. | ||||
let mut pos = 0; | ||||
// For each chunk of `other`, chunks of `self` are processed | ||||
// until they start after the end of the current chunk. | ||||
for PatchFrag { start, end, data } in other.frags.iter() { | ||||
// Add chunks of `self` that start before this chunk of `other` | ||||
// without overlap. | ||||
while pos < self.frags.len() | ||||
&& self.frags[pos].end_offseted_by(offset) <= *start | ||||
{ | ||||
let first = self.frags[pos].clone(); | ||||
offset += first.len_diff(); | ||||
frags.push(first); | ||||
pos += 1; | ||||
} | ||||
// The current chunk of `self` starts before this chunk of `other` | ||||
// with overlap. | ||||
// The left-most part of data is added as an insertion chunk. | ||||
// The right-most part data is kept in the chunk. | ||||
if pos < self.frags.len() | ||||
&& self.frags[pos].start_offseted_by(offset) < *start | ||||
{ | ||||
let first = &mut self.frags[pos]; | ||||
let (data_left, data_right) = first.data.split_at( | ||||
(*start - first.start_offseted_by(offset)) as usize, | ||||
); | ||||
let left = PatchFrag { | ||||
start: first.start, | ||||
end: first.start, | ||||
data: data_left, | ||||
}; | ||||
first.data = data_right; | ||||
offset += left.len_diff(); | ||||
frags.push(left); | ||||
// There is no index incrementation because the right-most part | ||||
// needs further examination. | ||||
} | ||||
// At this point remaining chunks of `self` starts after | ||||
// the current chunk of `other`. | ||||
// `start_offset` will be used to adjust the start of the current | ||||
// chunk of `other`. | ||||
// Offset tracking continues with `end_offset` to adjust the end | ||||
// of the current chunk of `other`. | ||||
let mut next_offset = offset; | ||||
// Discard the chunks of `self` that are totally overridden | ||||
// by the current chunk of `other` | ||||
while pos < self.frags.len() | ||||
&& self.frags[pos].end_offseted_by(next_offset) <= *end | ||||
{ | ||||
let first = &self.frags[pos]; | ||||
next_offset += first.len_diff(); | ||||
pos += 1; | ||||
} | ||||
// Truncate the left-most part of chunk of `self` that overlaps | ||||
// the current chunk of `other`. | ||||
if pos < self.frags.len() | ||||
&& self.frags[pos].start_offseted_by(next_offset) < *end | ||||
{ | ||||
let first = &mut self.frags[pos]; | ||||
let how_much_to_discard = | ||||
*end - first.start_offseted_by(next_offset); | ||||
first.data = &first.data[(how_much_to_discard as usize)..]; | ||||
next_offset += how_much_to_discard; | ||||
} | ||||
// Add the chunk of `other` with adjusted position. | ||||
frags.push(PatchFrag { | ||||
start: *start - offset, | ||||
end: *end - next_offset, | ||||
data, | ||||
}); | ||||
// Go back to normal offset tracking for the next `o` chunk | ||||
offset = next_offset; | ||||
} | ||||
// Add remaining chunks of `self`. | ||||
for elt in &self.frags[pos..] { | ||||
frags.push(elt.clone()); | ||||
} | ||||
PatchList { frags } | ||||
} | ||||
} | ||||
/// Combine a list of patch list into a single patch optimized patch list. | ||||
pub fn fold_patch_lists<'a>(lists: &[PatchList<'a>]) -> PatchList<'a> { | ||||
if lists.len() <= 1 { | ||||
if lists.is_empty() { | ||||
PatchList { frags: vec![] } | ||||
} else { | ||||
lists[0].clone() | ||||
} | ||||
} else { | ||||
let (left, right) = lists.split_at(lists.len() / 2); | ||||
let mut left_res = fold_patch_lists(left); | ||||
let mut right_res = fold_patch_lists(right); | ||||
left_res.combine(&mut right_res) | ||||
} | ||||
} | ||||
#[cfg(test)] | ||||
mod tests { | ||||
use super::*; | ||||
struct PatchDataBuilder { | ||||
data: Vec<u8>, | ||||
} | ||||
impl PatchDataBuilder { | ||||
pub fn new() -> Self { | ||||
Self { data: vec![] } | ||||
} | ||||
pub fn replace( | ||||
&mut self, | ||||
start: usize, | ||||
end: usize, | ||||
data: &[u8], | ||||
) -> &mut Self { | ||||
assert!(start <= end); | ||||
self.data.extend(&(start as i32).to_be_bytes()); | ||||
self.data.extend(&(end as i32).to_be_bytes()); | ||||
self.data.extend(&(data.len() as i32).to_be_bytes()); | ||||
self.data.extend(data.iter()); | ||||
self | ||||
} | ||||
pub fn get(&mut self) -> &[u8] { | ||||
&self.data[..] | ||||
} | ||||
} | ||||
#[test] | ||||
fn test_ends_before() { | ||||
let data = vec![0u8, 0u8, 0u8]; | ||||
let mut patch1_data = PatchDataBuilder::new(); | ||||
patch1_data.replace(0, 1, &[1, 2]); | ||||
let mut patch1 = PatchList::new(patch1_data.get()); | ||||
let mut patch2_data = PatchDataBuilder::new(); | ||||
patch2_data.replace(2, 4, &[3, 4]); | ||||
let mut patch2 = PatchList::new(patch2_data.get()); | ||||
let patch = patch1.combine(&mut patch2); | ||||
let result = patch.apply(&data); | ||||
assert_eq!(result, vec![1u8, 2, 3, 4]); | ||||
} | ||||
#[test] | ||||
fn test_starts_after() { | ||||
let data = vec![0u8, 0u8, 0u8]; | ||||
let mut patch1_data = PatchDataBuilder::new(); | ||||
patch1_data.replace(2, 3, &[3]); | ||||
let mut patch1 = PatchList::new(patch1_data.get()); | ||||
let mut patch2_data = PatchDataBuilder::new(); | ||||
patch2_data.replace(1, 2, &[1, 2]); | ||||
let mut patch2 = PatchList::new(patch2_data.get()); | ||||
let patch = patch1.combine(&mut patch2); | ||||
let result = patch.apply(&data); | ||||
assert_eq!(result, vec![0u8, 1, 2, 3]); | ||||
} | ||||
#[test] | ||||
fn test_overridden() { | ||||
let data = vec![0u8, 0, 0]; | ||||
let mut patch1_data = PatchDataBuilder::new(); | ||||
patch1_data.replace(1, 2, &[3, 4]); | ||||
let mut patch1 = PatchList::new(patch1_data.get()); | ||||
let mut patch2_data = PatchDataBuilder::new(); | ||||
patch2_data.replace(1, 4, &[1, 2, 3]); | ||||
let mut patch2 = PatchList::new(patch2_data.get()); | ||||
let patch = patch1.combine(&mut patch2); | ||||
let result = patch.apply(&data); | ||||
assert_eq!(result, vec![0u8, 1, 2, 3]); | ||||
} | ||||
#[test] | ||||
fn test_right_most_part_is_overridden() { | ||||
let data = vec![0u8, 0, 0]; | ||||
let mut patch1_data = PatchDataBuilder::new(); | ||||
patch1_data.replace(0, 1, &[1, 3]); | ||||
let mut patch1 = PatchList::new(patch1_data.get()); | ||||
let mut patch2_data = PatchDataBuilder::new(); | ||||
patch2_data.replace(1, 4, &[2, 3, 4]); | ||||
let mut patch2 = PatchList::new(patch2_data.get()); | ||||
let patch = patch1.combine(&mut patch2); | ||||
let result = patch.apply(&data); | ||||
assert_eq!(result, vec![1u8, 2, 3, 4]); | ||||
} | ||||
#[test] | ||||
fn test_left_most_part_is_overridden() { | ||||
let data = vec![0u8, 0, 0]; | ||||
let mut patch1_data = PatchDataBuilder::new(); | ||||
patch1_data.replace(1, 3, &[1, 3, 4]); | ||||
let mut patch1 = PatchList::new(patch1_data.get()); | ||||
let mut patch2_data = PatchDataBuilder::new(); | ||||
patch2_data.replace(0, 2, &[1, 2]); | ||||
let mut patch2 = PatchList::new(patch2_data.get()); | ||||
let patch = patch1.combine(&mut patch2); | ||||
let result = patch.apply(&data); | ||||
assert_eq!(result, vec![1u8, 2, 3, 4]); | ||||
} | ||||
#[test] | ||||
fn test_mid_is_overridden() { | ||||
let data = vec![0u8, 0, 0]; | ||||
let mut patch1_data = PatchDataBuilder::new(); | ||||
patch1_data.replace(0, 3, &[1, 3, 3, 4]); | ||||
let mut patch1 = PatchList::new(patch1_data.get()); | ||||
let mut patch2_data = PatchDataBuilder::new(); | ||||
patch2_data.replace(1, 3, &[2, 3]); | ||||
let mut patch2 = PatchList::new(patch2_data.get()); | ||||
let patch = patch1.combine(&mut patch2); | ||||
let result = patch.apply(&data); | ||||
assert_eq!(result, vec![1u8, 2, 3, 4]); | ||||
} | ||||
} | ||||