patch.rs
369 lines
| 11.8 KiB
| application/rls-services+xml
|
RustLexer
Antoine Cezar
|
r46097 | use byteorder::{BigEndian, ByteOrder}; | ||
/// A chunk of data to insert, delete or replace in a patch | ||||
/// | ||||
/// A chunk is: | ||||
/// - an insertion when `!data.is_empty() && start == end` | ||||
/// - an deletion when `data.is_empty() && start < end` | ||||
/// - a replacement when `!data.is_empty() && start < end` | ||||
/// - not doing anything when `data.is_empty() && start == end` | ||||
#[derive(Debug, Clone)] | ||||
Antoine cezar
|
r46170 | struct Chunk<'a> { | ||
Antoine Cezar
|
r46097 | /// The start position of the chunk of data to replace | ||
Antoine cezar
|
r46171 | start: u32, | ||
Antoine Cezar
|
r46097 | /// The end position of the chunk of data to replace (open end interval) | ||
Antoine cezar
|
r46171 | end: u32, | ||
Antoine Cezar
|
r46097 | /// The data replacing the chunk | ||
data: &'a [u8], | ||||
} | ||||
Antoine cezar
|
r46172 | impl Chunk<'_> { | ||
Antoine Cezar
|
r46097 | /// Adjusted start of the chunk to replace. | ||
/// | ||||
Antoine cezar
|
r46173 | /// The offset, taking into account the growth/shrinkage of data | ||
Antoine Cezar
|
r46097 | /// induced by previously applied chunks. | ||
Antoine cezar
|
r46174 | fn start_offset_by(&self, offset: i32) -> u32 { | ||
Antoine cezar
|
r46171 | let start = self.start as i32 + offset; | ||
assert!(start >= 0, "negative chunk start should never happen"); | ||||
start as u32 | ||||
Antoine Cezar
|
r46097 | } | ||
/// Adjusted end of the chunk to replace. | ||||
/// | ||||
Antoine cezar
|
r46173 | /// The offset, taking into account the growth/shrinkage of data | ||
Antoine Cezar
|
r46097 | /// induced by previously applied chunks. | ||
Antoine cezar
|
r46174 | fn end_offset_by(&self, offset: i32) -> u32 { | ||
self.start_offset_by(offset) + self.data.len() as u32 | ||||
Antoine Cezar
|
r46097 | } | ||
/// Length of the replaced chunk. | ||||
Antoine cezar
|
r46171 | fn replaced_len(&self) -> u32 { | ||
Antoine Cezar
|
r46097 | self.end - self.start | ||
} | ||||
/// Length difference between the replacing data and the replaced data. | ||||
fn len_diff(&self) -> i32 { | ||||
Antoine cezar
|
r46171 | self.data.len() as i32 - self.replaced_len() as i32 | ||
Antoine Cezar
|
r46097 | } | ||
} | ||||
/// The delta between two revisions data. | ||||
#[derive(Debug, Clone)] | ||||
pub struct PatchList<'a> { | ||||
/// A collection of chunks to apply. | ||||
/// | ||||
/// Those chunks are: | ||||
/// - ordered from the left-most replacement to the right-most replacement | ||||
/// - non-overlapping, meaning that two chucks can not change the same | ||||
/// chunk of the patched data | ||||
Antoine cezar
|
r46170 | chunks: Vec<Chunk<'a>>, | ||
Antoine Cezar
|
r46097 | } | ||
impl<'a> PatchList<'a> { | ||||
/// Create a `PatchList` from bytes. | ||||
pub fn new(data: &'a [u8]) -> Self { | ||||
Antoine cezar
|
r46170 | let mut chunks = vec![]; | ||
Antoine Cezar
|
r46097 | let mut data = data; | ||
while !data.is_empty() { | ||||
Antoine cezar
|
r46171 | let start = BigEndian::read_u32(&data[0..]); | ||
let end = BigEndian::read_u32(&data[4..]); | ||||
let len = BigEndian::read_u32(&data[8..]); | ||||
assert!(start <= end); | ||||
Antoine cezar
|
r46170 | chunks.push(Chunk { | ||
Antoine Cezar
|
r46097 | start, | ||
end, | ||||
data: &data[12..12 + (len as usize)], | ||||
}); | ||||
data = &data[12 + (len as usize)..]; | ||||
} | ||||
Antoine cezar
|
r46170 | PatchList { chunks } | ||
Antoine Cezar
|
r46097 | } | ||
/// Return the final length of data after patching | ||||
/// given its initial length . | ||||
fn size(&self, initial_size: i32) -> i32 { | ||||
Antoine cezar
|
r46170 | self.chunks | ||
Antoine Cezar
|
r46097 | .iter() | ||
Antoine cezar
|
r46170 | .fold(initial_size, |acc, chunk| acc + chunk.len_diff()) | ||
Antoine Cezar
|
r46097 | } | ||
/// Apply the patch to some data. | ||||
pub fn apply(&self, initial: &[u8]) -> Vec<u8> { | ||||
let mut last: usize = 0; | ||||
let mut vec = | ||||
Vec::with_capacity(self.size(initial.len() as i32) as usize); | ||||
Antoine cezar
|
r46170 | for Chunk { start, end, data } in self.chunks.iter() { | ||
Antoine Cezar
|
r46097 | vec.extend(&initial[last..(*start as usize)]); | ||
vec.extend(data.iter()); | ||||
last = *end as usize; | ||||
} | ||||
vec.extend(&initial[last..]); | ||||
vec | ||||
} | ||||
/// Combine two patch lists into a single patch list. | ||||
/// | ||||
/// Applying consecutive patches can lead to waste of time and memory | ||||
/// as the changes introduced by one patch can be overridden by the next. | ||||
/// Combining patches optimizes the whole patching sequence. | ||||
fn combine(&mut self, other: &mut Self) -> Self { | ||||
Antoine cezar
|
r46170 | let mut chunks = vec![]; | ||
Antoine Cezar
|
r46097 | |||
// Keep track of each growth/shrinkage resulting from applying a chunk | ||||
// in order to adjust the start/end of subsequent chunks. | ||||
let mut offset = 0i32; | ||||
// Keep track of the chunk of self.chunks to process. | ||||
let mut pos = 0; | ||||
// For each chunk of `other`, chunks of `self` are processed | ||||
// until they start after the end of the current chunk. | ||||
Antoine cezar
|
r46170 | for Chunk { start, end, data } in other.chunks.iter() { | ||
Antoine Cezar
|
r46097 | // Add chunks of `self` that start before this chunk of `other` | ||
// without overlap. | ||||
Antoine cezar
|
r46170 | while pos < self.chunks.len() | ||
Antoine cezar
|
r46174 | && self.chunks[pos].end_offset_by(offset) <= *start | ||
Antoine Cezar
|
r46097 | { | ||
Antoine cezar
|
r46170 | let first = self.chunks[pos].clone(); | ||
Antoine Cezar
|
r46097 | offset += first.len_diff(); | ||
Antoine cezar
|
r46170 | chunks.push(first); | ||
Antoine Cezar
|
r46097 | pos += 1; | ||
} | ||||
// The current chunk of `self` starts before this chunk of `other` | ||||
// with overlap. | ||||
// The left-most part of data is added as an insertion chunk. | ||||
// The right-most part data is kept in the chunk. | ||||
Antoine cezar
|
r46170 | if pos < self.chunks.len() | ||
Antoine cezar
|
r46174 | && self.chunks[pos].start_offset_by(offset) < *start | ||
Antoine Cezar
|
r46097 | { | ||
Antoine cezar
|
r46170 | let first = &mut self.chunks[pos]; | ||
Antoine Cezar
|
r46097 | |||
let (data_left, data_right) = first.data.split_at( | ||||
Antoine cezar
|
r46174 | (*start - first.start_offset_by(offset)) as usize, | ||
Antoine Cezar
|
r46097 | ); | ||
Antoine cezar
|
r46170 | let left = Chunk { | ||
Antoine Cezar
|
r46097 | start: first.start, | ||
end: first.start, | ||||
data: data_left, | ||||
}; | ||||
first.data = data_right; | ||||
offset += left.len_diff(); | ||||
Antoine cezar
|
r46170 | chunks.push(left); | ||
Antoine Cezar
|
r46097 | |||
// There is no index incrementation because the right-most part | ||||
// needs further examination. | ||||
} | ||||
// At this point remaining chunks of `self` starts after | ||||
// the current chunk of `other`. | ||||
// `start_offset` will be used to adjust the start of the current | ||||
// chunk of `other`. | ||||
// Offset tracking continues with `end_offset` to adjust the end | ||||
// of the current chunk of `other`. | ||||
let mut next_offset = offset; | ||||
// Discard the chunks of `self` that are totally overridden | ||||
// by the current chunk of `other` | ||||
Antoine cezar
|
r46170 | while pos < self.chunks.len() | ||
Antoine cezar
|
r46174 | && self.chunks[pos].end_offset_by(next_offset) <= *end | ||
Antoine Cezar
|
r46097 | { | ||
Antoine cezar
|
r46170 | let first = &self.chunks[pos]; | ||
Antoine Cezar
|
r46097 | next_offset += first.len_diff(); | ||
pos += 1; | ||||
} | ||||
// Truncate the left-most part of chunk of `self` that overlaps | ||||
// the current chunk of `other`. | ||||
Antoine cezar
|
r46170 | if pos < self.chunks.len() | ||
Antoine cezar
|
r46174 | && self.chunks[pos].start_offset_by(next_offset) < *end | ||
Antoine Cezar
|
r46097 | { | ||
Antoine cezar
|
r46170 | let first = &mut self.chunks[pos]; | ||
Antoine Cezar
|
r46097 | |||
let how_much_to_discard = | ||||
Antoine cezar
|
r46174 | *end - first.start_offset_by(next_offset); | ||
Antoine Cezar
|
r46097 | |||
first.data = &first.data[(how_much_to_discard as usize)..]; | ||||
Antoine cezar
|
r46171 | next_offset += how_much_to_discard as i32; | ||
Antoine Cezar
|
r46097 | } | ||
// Add the chunk of `other` with adjusted position. | ||||
Antoine cezar
|
r46170 | chunks.push(Chunk { | ||
Antoine cezar
|
r46171 | start: (*start as i32 - offset) as u32, | ||
end: (*end as i32 - next_offset) as u32, | ||||
Antoine Cezar
|
r46097 | data, | ||
}); | ||||
// Go back to normal offset tracking for the next `o` chunk | ||||
offset = next_offset; | ||||
} | ||||
// Add remaining chunks of `self`. | ||||
Antoine cezar
|
r46170 | for elt in &self.chunks[pos..] { | ||
chunks.push(elt.clone()); | ||||
Antoine Cezar
|
r46097 | } | ||
Antoine cezar
|
r46170 | PatchList { chunks } | ||
Antoine Cezar
|
r46097 | } | ||
} | ||||
/// Combine a list of patch list into a single patch optimized patch list. | ||||
pub fn fold_patch_lists<'a>(lists: &[PatchList<'a>]) -> PatchList<'a> { | ||||
if lists.len() <= 1 { | ||||
if lists.is_empty() { | ||||
Antoine cezar
|
r46170 | PatchList { chunks: vec![] } | ||
Antoine Cezar
|
r46097 | } else { | ||
lists[0].clone() | ||||
} | ||||
} else { | ||||
let (left, right) = lists.split_at(lists.len() / 2); | ||||
let mut left_res = fold_patch_lists(left); | ||||
let mut right_res = fold_patch_lists(right); | ||||
left_res.combine(&mut right_res) | ||||
} | ||||
} | ||||
#[cfg(test)] | ||||
mod tests { | ||||
use super::*; | ||||
struct PatchDataBuilder { | ||||
data: Vec<u8>, | ||||
} | ||||
impl PatchDataBuilder { | ||||
pub fn new() -> Self { | ||||
Self { data: vec![] } | ||||
} | ||||
pub fn replace( | ||||
&mut self, | ||||
start: usize, | ||||
end: usize, | ||||
data: &[u8], | ||||
) -> &mut Self { | ||||
assert!(start <= end); | ||||
self.data.extend(&(start as i32).to_be_bytes()); | ||||
self.data.extend(&(end as i32).to_be_bytes()); | ||||
self.data.extend(&(data.len() as i32).to_be_bytes()); | ||||
self.data.extend(data.iter()); | ||||
self | ||||
} | ||||
pub fn get(&mut self) -> &[u8] { | ||||
Antoine cezar
|
r46168 | &self.data | ||
Antoine Cezar
|
r46097 | } | ||
} | ||||
#[test] | ||||
fn test_ends_before() { | ||||
let data = vec![0u8, 0u8, 0u8]; | ||||
let mut patch1_data = PatchDataBuilder::new(); | ||||
patch1_data.replace(0, 1, &[1, 2]); | ||||
let mut patch1 = PatchList::new(patch1_data.get()); | ||||
let mut patch2_data = PatchDataBuilder::new(); | ||||
patch2_data.replace(2, 4, &[3, 4]); | ||||
let mut patch2 = PatchList::new(patch2_data.get()); | ||||
let patch = patch1.combine(&mut patch2); | ||||
let result = patch.apply(&data); | ||||
assert_eq!(result, vec![1u8, 2, 3, 4]); | ||||
} | ||||
#[test] | ||||
fn test_starts_after() { | ||||
let data = vec![0u8, 0u8, 0u8]; | ||||
let mut patch1_data = PatchDataBuilder::new(); | ||||
patch1_data.replace(2, 3, &[3]); | ||||
let mut patch1 = PatchList::new(patch1_data.get()); | ||||
let mut patch2_data = PatchDataBuilder::new(); | ||||
patch2_data.replace(1, 2, &[1, 2]); | ||||
let mut patch2 = PatchList::new(patch2_data.get()); | ||||
let patch = patch1.combine(&mut patch2); | ||||
let result = patch.apply(&data); | ||||
assert_eq!(result, vec![0u8, 1, 2, 3]); | ||||
} | ||||
#[test] | ||||
fn test_overridden() { | ||||
let data = vec![0u8, 0, 0]; | ||||
let mut patch1_data = PatchDataBuilder::new(); | ||||
patch1_data.replace(1, 2, &[3, 4]); | ||||
let mut patch1 = PatchList::new(patch1_data.get()); | ||||
let mut patch2_data = PatchDataBuilder::new(); | ||||
patch2_data.replace(1, 4, &[1, 2, 3]); | ||||
let mut patch2 = PatchList::new(patch2_data.get()); | ||||
let patch = patch1.combine(&mut patch2); | ||||
let result = patch.apply(&data); | ||||
assert_eq!(result, vec![0u8, 1, 2, 3]); | ||||
} | ||||
#[test] | ||||
fn test_right_most_part_is_overridden() { | ||||
let data = vec![0u8, 0, 0]; | ||||
let mut patch1_data = PatchDataBuilder::new(); | ||||
patch1_data.replace(0, 1, &[1, 3]); | ||||
let mut patch1 = PatchList::new(patch1_data.get()); | ||||
let mut patch2_data = PatchDataBuilder::new(); | ||||
patch2_data.replace(1, 4, &[2, 3, 4]); | ||||
let mut patch2 = PatchList::new(patch2_data.get()); | ||||
let patch = patch1.combine(&mut patch2); | ||||
let result = patch.apply(&data); | ||||
assert_eq!(result, vec![1u8, 2, 3, 4]); | ||||
} | ||||
#[test] | ||||
fn test_left_most_part_is_overridden() { | ||||
let data = vec![0u8, 0, 0]; | ||||
let mut patch1_data = PatchDataBuilder::new(); | ||||
patch1_data.replace(1, 3, &[1, 3, 4]); | ||||
let mut patch1 = PatchList::new(patch1_data.get()); | ||||
let mut patch2_data = PatchDataBuilder::new(); | ||||
patch2_data.replace(0, 2, &[1, 2]); | ||||
let mut patch2 = PatchList::new(patch2_data.get()); | ||||
let patch = patch1.combine(&mut patch2); | ||||
let result = patch.apply(&data); | ||||
assert_eq!(result, vec![1u8, 2, 3, 4]); | ||||
} | ||||
#[test] | ||||
fn test_mid_is_overridden() { | ||||
let data = vec![0u8, 0, 0]; | ||||
let mut patch1_data = PatchDataBuilder::new(); | ||||
patch1_data.replace(0, 3, &[1, 3, 3, 4]); | ||||
let mut patch1 = PatchList::new(patch1_data.get()); | ||||
let mut patch2_data = PatchDataBuilder::new(); | ||||
patch2_data.replace(1, 3, &[2, 3]); | ||||
let mut patch2 = PatchList::new(patch2_data.get()); | ||||
let patch = patch1.combine(&mut patch2); | ||||
let result = patch.apply(&data); | ||||
assert_eq!(result, vec![1u8, 2, 3, 4]); | ||||
} | ||||
} | ||||