use byteorder::{BigEndian, ByteOrder}; /// A chunk of data to insert, delete or replace in a patch /// /// A chunk is: /// - an insertion when `!data.is_empty() && start == end` /// - an deletion when `data.is_empty() && start < end` /// - a replacement when `!data.is_empty() && start < end` /// - not doing anything when `data.is_empty() && start == end` #[derive(Debug, Clone)] struct PatchFrag<'a> { /// The start position of the chunk of data to replace start: i32, /// The end position of the chunk of data to replace (open end interval) end: i32, /// The data replacing the chunk data: &'a [u8], } impl<'a> PatchFrag<'a> { /// Adjusted start of the chunk to replace. /// /// Offset allow to take into account the growth/shrinkage of data /// induced by previously applied chunks. fn start_offseted_by(&self, offset: i32) -> i32 { self.start + offset } /// Adjusted end of the chunk to replace. /// /// Offset allow to take into account the growth/shrinkage of data /// induced by previously applied chunks. fn end_offseted_by(&self, offset: i32) -> i32 { self.start_offseted_by(offset) + (self.data.len() as i32) } /// Length of the replaced chunk. fn replaced_len(&self) -> i32 { self.end - self.start } /// Length difference between the replacing data and the replaced data. fn len_diff(&self) -> i32 { (self.data.len() as i32) - self.replaced_len() } } /// The delta between two revisions data. #[derive(Debug, Clone)] pub struct PatchList<'a> { /// A collection of chunks to apply. /// /// Those chunks are: /// - ordered from the left-most replacement to the right-most replacement /// - non-overlapping, meaning that two chucks can not change the same /// chunk of the patched data frags: Vec>, } impl<'a> PatchList<'a> { /// Create a `PatchList` from bytes. pub fn new(data: &'a [u8]) -> Self { let mut frags = vec![]; let mut data = data; while !data.is_empty() { let start = BigEndian::read_i32(&data[0..]); let end = BigEndian::read_i32(&data[4..]); let len = BigEndian::read_i32(&data[8..]); assert!(0 <= start && start <= end && len >= 0); frags.push(PatchFrag { start, end, data: &data[12..12 + (len as usize)], }); data = &data[12 + (len as usize)..]; } PatchList { frags } } /// Return the final length of data after patching /// given its initial length . fn size(&self, initial_size: i32) -> i32 { self.frags .iter() .fold(initial_size, |acc, frag| acc + frag.len_diff()) } /// Apply the patch to some data. pub fn apply(&self, initial: &[u8]) -> Vec { let mut last: usize = 0; let mut vec = Vec::with_capacity(self.size(initial.len() as i32) as usize); for PatchFrag { start, end, data } in self.frags.iter() { vec.extend(&initial[last..(*start as usize)]); vec.extend(data.iter()); last = *end as usize; } vec.extend(&initial[last..]); vec } /// Combine two patch lists into a single patch list. /// /// Applying consecutive patches can lead to waste of time and memory /// as the changes introduced by one patch can be overridden by the next. /// Combining patches optimizes the whole patching sequence. fn combine(&mut self, other: &mut Self) -> Self { let mut frags = vec![]; // Keep track of each growth/shrinkage resulting from applying a chunk // in order to adjust the start/end of subsequent chunks. let mut offset = 0i32; // Keep track of the chunk of self.chunks to process. let mut pos = 0; // For each chunk of `other`, chunks of `self` are processed // until they start after the end of the current chunk. for PatchFrag { start, end, data } in other.frags.iter() { // Add chunks of `self` that start before this chunk of `other` // without overlap. while pos < self.frags.len() && self.frags[pos].end_offseted_by(offset) <= *start { let first = self.frags[pos].clone(); offset += first.len_diff(); frags.push(first); pos += 1; } // The current chunk of `self` starts before this chunk of `other` // with overlap. // The left-most part of data is added as an insertion chunk. // The right-most part data is kept in the chunk. if pos < self.frags.len() && self.frags[pos].start_offseted_by(offset) < *start { let first = &mut self.frags[pos]; let (data_left, data_right) = first.data.split_at( (*start - first.start_offseted_by(offset)) as usize, ); let left = PatchFrag { start: first.start, end: first.start, data: data_left, }; first.data = data_right; offset += left.len_diff(); frags.push(left); // There is no index incrementation because the right-most part // needs further examination. } // At this point remaining chunks of `self` starts after // the current chunk of `other`. // `start_offset` will be used to adjust the start of the current // chunk of `other`. // Offset tracking continues with `end_offset` to adjust the end // of the current chunk of `other`. let mut next_offset = offset; // Discard the chunks of `self` that are totally overridden // by the current chunk of `other` while pos < self.frags.len() && self.frags[pos].end_offseted_by(next_offset) <= *end { let first = &self.frags[pos]; next_offset += first.len_diff(); pos += 1; } // Truncate the left-most part of chunk of `self` that overlaps // the current chunk of `other`. if pos < self.frags.len() && self.frags[pos].start_offseted_by(next_offset) < *end { let first = &mut self.frags[pos]; let how_much_to_discard = *end - first.start_offseted_by(next_offset); first.data = &first.data[(how_much_to_discard as usize)..]; next_offset += how_much_to_discard; } // Add the chunk of `other` with adjusted position. frags.push(PatchFrag { start: *start - offset, end: *end - next_offset, data, }); // Go back to normal offset tracking for the next `o` chunk offset = next_offset; } // Add remaining chunks of `self`. for elt in &self.frags[pos..] { frags.push(elt.clone()); } PatchList { frags } } } /// Combine a list of patch list into a single patch optimized patch list. pub fn fold_patch_lists<'a>(lists: &[PatchList<'a>]) -> PatchList<'a> { if lists.len() <= 1 { if lists.is_empty() { PatchList { frags: vec![] } } else { lists[0].clone() } } else { let (left, right) = lists.split_at(lists.len() / 2); let mut left_res = fold_patch_lists(left); let mut right_res = fold_patch_lists(right); left_res.combine(&mut right_res) } } #[cfg(test)] mod tests { use super::*; struct PatchDataBuilder { data: Vec, } impl PatchDataBuilder { pub fn new() -> Self { Self { data: vec![] } } pub fn replace( &mut self, start: usize, end: usize, data: &[u8], ) -> &mut Self { assert!(start <= end); self.data.extend(&(start as i32).to_be_bytes()); self.data.extend(&(end as i32).to_be_bytes()); self.data.extend(&(data.len() as i32).to_be_bytes()); self.data.extend(data.iter()); self } pub fn get(&mut self) -> &[u8] { &self.data[..] } } #[test] fn test_ends_before() { let data = vec![0u8, 0u8, 0u8]; let mut patch1_data = PatchDataBuilder::new(); patch1_data.replace(0, 1, &[1, 2]); let mut patch1 = PatchList::new(patch1_data.get()); let mut patch2_data = PatchDataBuilder::new(); patch2_data.replace(2, 4, &[3, 4]); let mut patch2 = PatchList::new(patch2_data.get()); let patch = patch1.combine(&mut patch2); let result = patch.apply(&data); assert_eq!(result, vec![1u8, 2, 3, 4]); } #[test] fn test_starts_after() { let data = vec![0u8, 0u8, 0u8]; let mut patch1_data = PatchDataBuilder::new(); patch1_data.replace(2, 3, &[3]); let mut patch1 = PatchList::new(patch1_data.get()); let mut patch2_data = PatchDataBuilder::new(); patch2_data.replace(1, 2, &[1, 2]); let mut patch2 = PatchList::new(patch2_data.get()); let patch = patch1.combine(&mut patch2); let result = patch.apply(&data); assert_eq!(result, vec![0u8, 1, 2, 3]); } #[test] fn test_overridden() { let data = vec![0u8, 0, 0]; let mut patch1_data = PatchDataBuilder::new(); patch1_data.replace(1, 2, &[3, 4]); let mut patch1 = PatchList::new(patch1_data.get()); let mut patch2_data = PatchDataBuilder::new(); patch2_data.replace(1, 4, &[1, 2, 3]); let mut patch2 = PatchList::new(patch2_data.get()); let patch = patch1.combine(&mut patch2); let result = patch.apply(&data); assert_eq!(result, vec![0u8, 1, 2, 3]); } #[test] fn test_right_most_part_is_overridden() { let data = vec![0u8, 0, 0]; let mut patch1_data = PatchDataBuilder::new(); patch1_data.replace(0, 1, &[1, 3]); let mut patch1 = PatchList::new(patch1_data.get()); let mut patch2_data = PatchDataBuilder::new(); patch2_data.replace(1, 4, &[2, 3, 4]); let mut patch2 = PatchList::new(patch2_data.get()); let patch = patch1.combine(&mut patch2); let result = patch.apply(&data); assert_eq!(result, vec![1u8, 2, 3, 4]); } #[test] fn test_left_most_part_is_overridden() { let data = vec![0u8, 0, 0]; let mut patch1_data = PatchDataBuilder::new(); patch1_data.replace(1, 3, &[1, 3, 4]); let mut patch1 = PatchList::new(patch1_data.get()); let mut patch2_data = PatchDataBuilder::new(); patch2_data.replace(0, 2, &[1, 2]); let mut patch2 = PatchList::new(patch2_data.get()); let patch = patch1.combine(&mut patch2); let result = patch.apply(&data); assert_eq!(result, vec![1u8, 2, 3, 4]); } #[test] fn test_mid_is_overridden() { let data = vec![0u8, 0, 0]; let mut patch1_data = PatchDataBuilder::new(); patch1_data.replace(0, 3, &[1, 3, 3, 4]); let mut patch1 = PatchList::new(patch1_data.get()); let mut patch2_data = PatchDataBuilder::new(); patch2_data.replace(1, 3, &[2, 3]); let mut patch2 = PatchList::new(patch2_data.get()); let patch = patch1.combine(&mut patch2); let result = patch.apply(&data); assert_eq!(result, vec![1u8, 2, 3, 4]); } }