##// END OF EJS Templates
dirstate-v2: Reserve a few bytes of space for future extensions...
dirstate-v2: Reserve a few bytes of space for future extensions See doc-comment Differential Revision: https://phab.mercurial-scm.org/D11101

File last commit:

r46174:b68b1910 default
r48484:852262e2 default
Show More
patch.rs
369 lines | 11.8 KiB | application/rls-services+xml | RustLexer
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 use byteorder::{BigEndian, ByteOrder};
/// A chunk of data to insert, delete or replace in a patch
///
/// A chunk is:
/// - an insertion when `!data.is_empty() && start == end`
/// - an deletion when `data.is_empty() && start < end`
/// - a replacement when `!data.is_empty() && start < end`
/// - not doing anything when `data.is_empty() && start == end`
#[derive(Debug, Clone)]
Antoine cezar
hg-core: use the term `chunk` instead of `frag` (D8958#inline-15000 followup)...
r46170 struct Chunk<'a> {
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 /// The start position of the chunk of data to replace
Antoine cezar
hg-core: use `u32` instead of `i32` in `Chunk` (D8958#inline-15001 followup)...
r46171 start: u32,
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 /// The end position of the chunk of data to replace (open end interval)
Antoine cezar
hg-core: use `u32` instead of `i32` in `Chunk` (D8958#inline-15001 followup)...
r46171 end: u32,
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 /// The data replacing the chunk
data: &'a [u8],
}
Antoine cezar
hg-core: use anonymous lifetime for `impl Chunk` (D8958#inline-15003 followup)...
r46172 impl Chunk<'_> {
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 /// Adjusted start of the chunk to replace.
///
Antoine cezar
hg-core: minor rewording in docstring (D8958#inline-15005 followup)...
r46173 /// The offset, taking into account the growth/shrinkage of data
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 /// induced by previously applied chunks.
Antoine cezar
hg-core: renaming of `Chunk` offset methods (D8958#inline-15002 followup)...
r46174 fn start_offset_by(&self, offset: i32) -> u32 {
Antoine cezar
hg-core: use `u32` instead of `i32` in `Chunk` (D8958#inline-15001 followup)...
r46171 let start = self.start as i32 + offset;
assert!(start >= 0, "negative chunk start should never happen");
start as u32
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 }
/// Adjusted end of the chunk to replace.
///
Antoine cezar
hg-core: minor rewording in docstring (D8958#inline-15005 followup)...
r46173 /// The offset, taking into account the growth/shrinkage of data
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 /// induced by previously applied chunks.
Antoine cezar
hg-core: renaming of `Chunk` offset methods (D8958#inline-15002 followup)...
r46174 fn end_offset_by(&self, offset: i32) -> u32 {
self.start_offset_by(offset) + self.data.len() as u32
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 }
/// Length of the replaced chunk.
Antoine cezar
hg-core: use `u32` instead of `i32` in `Chunk` (D8958#inline-15001 followup)...
r46171 fn replaced_len(&self) -> u32 {
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 self.end - self.start
}
/// Length difference between the replacing data and the replaced data.
fn len_diff(&self) -> i32 {
Antoine cezar
hg-core: use `u32` instead of `i32` in `Chunk` (D8958#inline-15001 followup)...
r46171 self.data.len() as i32 - self.replaced_len() as i32
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 }
}
/// The delta between two revisions data.
#[derive(Debug, Clone)]
pub struct PatchList<'a> {
/// A collection of chunks to apply.
///
/// Those chunks are:
/// - ordered from the left-most replacement to the right-most replacement
/// - non-overlapping, meaning that two chucks can not change the same
/// chunk of the patched data
Antoine cezar
hg-core: use the term `chunk` instead of `frag` (D8958#inline-15000 followup)...
r46170 chunks: Vec<Chunk<'a>>,
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 }
impl<'a> PatchList<'a> {
/// Create a `PatchList` from bytes.
pub fn new(data: &'a [u8]) -> Self {
Antoine cezar
hg-core: use the term `chunk` instead of `frag` (D8958#inline-15000 followup)...
r46170 let mut chunks = vec![];
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 let mut data = data;
while !data.is_empty() {
Antoine cezar
hg-core: use `u32` instead of `i32` in `Chunk` (D8958#inline-15001 followup)...
r46171 let start = BigEndian::read_u32(&data[0..]);
let end = BigEndian::read_u32(&data[4..]);
let len = BigEndian::read_u32(&data[8..]);
assert!(start <= end);
Antoine cezar
hg-core: use the term `chunk` instead of `frag` (D8958#inline-15000 followup)...
r46170 chunks.push(Chunk {
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 start,
end,
data: &data[12..12 + (len as usize)],
});
data = &data[12 + (len as usize)..];
}
Antoine cezar
hg-core: use the term `chunk` instead of `frag` (D8958#inline-15000 followup)...
r46170 PatchList { chunks }
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 }
/// Return the final length of data after patching
/// given its initial length .
fn size(&self, initial_size: i32) -> i32 {
Antoine cezar
hg-core: use the term `chunk` instead of `frag` (D8958#inline-15000 followup)...
r46170 self.chunks
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 .iter()
Antoine cezar
hg-core: use the term `chunk` instead of `frag` (D8958#inline-15000 followup)...
r46170 .fold(initial_size, |acc, chunk| acc + chunk.len_diff())
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 }
/// Apply the patch to some data.
pub fn apply(&self, initial: &[u8]) -> Vec<u8> {
let mut last: usize = 0;
let mut vec =
Vec::with_capacity(self.size(initial.len() as i32) as usize);
Antoine cezar
hg-core: use the term `chunk` instead of `frag` (D8958#inline-15000 followup)...
r46170 for Chunk { start, end, data } in self.chunks.iter() {
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 vec.extend(&initial[last..(*start as usize)]);
vec.extend(data.iter());
last = *end as usize;
}
vec.extend(&initial[last..]);
vec
}
/// Combine two patch lists into a single patch list.
///
/// Applying consecutive patches can lead to waste of time and memory
/// as the changes introduced by one patch can be overridden by the next.
/// Combining patches optimizes the whole patching sequence.
fn combine(&mut self, other: &mut Self) -> Self {
Antoine cezar
hg-core: use the term `chunk` instead of `frag` (D8958#inline-15000 followup)...
r46170 let mut chunks = vec![];
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097
// Keep track of each growth/shrinkage resulting from applying a chunk
// in order to adjust the start/end of subsequent chunks.
let mut offset = 0i32;
// Keep track of the chunk of self.chunks to process.
let mut pos = 0;
// For each chunk of `other`, chunks of `self` are processed
// until they start after the end of the current chunk.
Antoine cezar
hg-core: use the term `chunk` instead of `frag` (D8958#inline-15000 followup)...
r46170 for Chunk { start, end, data } in other.chunks.iter() {
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 // Add chunks of `self` that start before this chunk of `other`
// without overlap.
Antoine cezar
hg-core: use the term `chunk` instead of `frag` (D8958#inline-15000 followup)...
r46170 while pos < self.chunks.len()
Antoine cezar
hg-core: renaming of `Chunk` offset methods (D8958#inline-15002 followup)...
r46174 && self.chunks[pos].end_offset_by(offset) <= *start
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 {
Antoine cezar
hg-core: use the term `chunk` instead of `frag` (D8958#inline-15000 followup)...
r46170 let first = self.chunks[pos].clone();
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 offset += first.len_diff();
Antoine cezar
hg-core: use the term `chunk` instead of `frag` (D8958#inline-15000 followup)...
r46170 chunks.push(first);
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 pos += 1;
}
// The current chunk of `self` starts before this chunk of `other`
// with overlap.
// The left-most part of data is added as an insertion chunk.
// The right-most part data is kept in the chunk.
Antoine cezar
hg-core: use the term `chunk` instead of `frag` (D8958#inline-15000 followup)...
r46170 if pos < self.chunks.len()
Antoine cezar
hg-core: renaming of `Chunk` offset methods (D8958#inline-15002 followup)...
r46174 && self.chunks[pos].start_offset_by(offset) < *start
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 {
Antoine cezar
hg-core: use the term `chunk` instead of `frag` (D8958#inline-15000 followup)...
r46170 let first = &mut self.chunks[pos];
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097
let (data_left, data_right) = first.data.split_at(
Antoine cezar
hg-core: renaming of `Chunk` offset methods (D8958#inline-15002 followup)...
r46174 (*start - first.start_offset_by(offset)) as usize,
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 );
Antoine cezar
hg-core: use the term `chunk` instead of `frag` (D8958#inline-15000 followup)...
r46170 let left = Chunk {
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 start: first.start,
end: first.start,
data: data_left,
};
first.data = data_right;
offset += left.len_diff();
Antoine cezar
hg-core: use the term `chunk` instead of `frag` (D8958#inline-15000 followup)...
r46170 chunks.push(left);
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097
// There is no index incrementation because the right-most part
// needs further examination.
}
// At this point remaining chunks of `self` starts after
// the current chunk of `other`.
// `start_offset` will be used to adjust the start of the current
// chunk of `other`.
// Offset tracking continues with `end_offset` to adjust the end
// of the current chunk of `other`.
let mut next_offset = offset;
// Discard the chunks of `self` that are totally overridden
// by the current chunk of `other`
Antoine cezar
hg-core: use the term `chunk` instead of `frag` (D8958#inline-15000 followup)...
r46170 while pos < self.chunks.len()
Antoine cezar
hg-core: renaming of `Chunk` offset methods (D8958#inline-15002 followup)...
r46174 && self.chunks[pos].end_offset_by(next_offset) <= *end
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 {
Antoine cezar
hg-core: use the term `chunk` instead of `frag` (D8958#inline-15000 followup)...
r46170 let first = &self.chunks[pos];
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 next_offset += first.len_diff();
pos += 1;
}
// Truncate the left-most part of chunk of `self` that overlaps
// the current chunk of `other`.
Antoine cezar
hg-core: use the term `chunk` instead of `frag` (D8958#inline-15000 followup)...
r46170 if pos < self.chunks.len()
Antoine cezar
hg-core: renaming of `Chunk` offset methods (D8958#inline-15002 followup)...
r46174 && self.chunks[pos].start_offset_by(next_offset) < *end
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 {
Antoine cezar
hg-core: use the term `chunk` instead of `frag` (D8958#inline-15000 followup)...
r46170 let first = &mut self.chunks[pos];
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097
let how_much_to_discard =
Antoine cezar
hg-core: renaming of `Chunk` offset methods (D8958#inline-15002 followup)...
r46174 *end - first.start_offset_by(next_offset);
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097
first.data = &first.data[(how_much_to_discard as usize)..];
Antoine cezar
hg-core: use `u32` instead of `i32` in `Chunk` (D8958#inline-15001 followup)...
r46171 next_offset += how_much_to_discard as i32;
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 }
// Add the chunk of `other` with adjusted position.
Antoine cezar
hg-core: use the term `chunk` instead of `frag` (D8958#inline-15000 followup)...
r46170 chunks.push(Chunk {
Antoine cezar
hg-core: use `u32` instead of `i32` in `Chunk` (D8958#inline-15001 followup)...
r46171 start: (*start as i32 - offset) as u32,
end: (*end as i32 - next_offset) as u32,
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 data,
});
// Go back to normal offset tracking for the next `o` chunk
offset = next_offset;
}
// Add remaining chunks of `self`.
Antoine cezar
hg-core: use the term `chunk` instead of `frag` (D8958#inline-15000 followup)...
r46170 for elt in &self.chunks[pos..] {
chunks.push(elt.clone());
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 }
Antoine cezar
hg-core: use the term `chunk` instead of `frag` (D8958#inline-15000 followup)...
r46170 PatchList { chunks }
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 }
}
/// Combine a list of patch list into a single patch optimized patch list.
pub fn fold_patch_lists<'a>(lists: &[PatchList<'a>]) -> PatchList<'a> {
if lists.len() <= 1 {
if lists.is_empty() {
Antoine cezar
hg-core: use the term `chunk` instead of `frag` (D8958#inline-15000 followup)...
r46170 PatchList { chunks: vec![] }
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 } else {
lists[0].clone()
}
} else {
let (left, right) = lists.split_at(lists.len() / 2);
let mut left_res = fold_patch_lists(left);
let mut right_res = fold_patch_lists(right);
left_res.combine(&mut right_res)
}
}
#[cfg(test)]
mod tests {
use super::*;
struct PatchDataBuilder {
data: Vec<u8>,
}
impl PatchDataBuilder {
pub fn new() -> Self {
Self { data: vec![] }
}
pub fn replace(
&mut self,
start: usize,
end: usize,
data: &[u8],
) -> &mut Self {
assert!(start <= end);
self.data.extend(&(start as i32).to_be_bytes());
self.data.extend(&(end as i32).to_be_bytes());
self.data.extend(&(data.len() as i32).to_be_bytes());
self.data.extend(data.iter());
self
}
pub fn get(&mut self) -> &[u8] {
Antoine cezar
hg-core: remove useless code (D8958#inline-14988 followup)...
r46168 &self.data
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 }
}
#[test]
fn test_ends_before() {
let data = vec![0u8, 0u8, 0u8];
let mut patch1_data = PatchDataBuilder::new();
patch1_data.replace(0, 1, &[1, 2]);
let mut patch1 = PatchList::new(patch1_data.get());
let mut patch2_data = PatchDataBuilder::new();
patch2_data.replace(2, 4, &[3, 4]);
let mut patch2 = PatchList::new(patch2_data.get());
let patch = patch1.combine(&mut patch2);
let result = patch.apply(&data);
assert_eq!(result, vec![1u8, 2, 3, 4]);
}
#[test]
fn test_starts_after() {
let data = vec![0u8, 0u8, 0u8];
let mut patch1_data = PatchDataBuilder::new();
patch1_data.replace(2, 3, &[3]);
let mut patch1 = PatchList::new(patch1_data.get());
let mut patch2_data = PatchDataBuilder::new();
patch2_data.replace(1, 2, &[1, 2]);
let mut patch2 = PatchList::new(patch2_data.get());
let patch = patch1.combine(&mut patch2);
let result = patch.apply(&data);
assert_eq!(result, vec![0u8, 1, 2, 3]);
}
#[test]
fn test_overridden() {
let data = vec![0u8, 0, 0];
let mut patch1_data = PatchDataBuilder::new();
patch1_data.replace(1, 2, &[3, 4]);
let mut patch1 = PatchList::new(patch1_data.get());
let mut patch2_data = PatchDataBuilder::new();
patch2_data.replace(1, 4, &[1, 2, 3]);
let mut patch2 = PatchList::new(patch2_data.get());
let patch = patch1.combine(&mut patch2);
let result = patch.apply(&data);
assert_eq!(result, vec![0u8, 1, 2, 3]);
}
#[test]
fn test_right_most_part_is_overridden() {
let data = vec![0u8, 0, 0];
let mut patch1_data = PatchDataBuilder::new();
patch1_data.replace(0, 1, &[1, 3]);
let mut patch1 = PatchList::new(patch1_data.get());
let mut patch2_data = PatchDataBuilder::new();
patch2_data.replace(1, 4, &[2, 3, 4]);
let mut patch2 = PatchList::new(patch2_data.get());
let patch = patch1.combine(&mut patch2);
let result = patch.apply(&data);
assert_eq!(result, vec![1u8, 2, 3, 4]);
}
#[test]
fn test_left_most_part_is_overridden() {
let data = vec![0u8, 0, 0];
let mut patch1_data = PatchDataBuilder::new();
patch1_data.replace(1, 3, &[1, 3, 4]);
let mut patch1 = PatchList::new(patch1_data.get());
let mut patch2_data = PatchDataBuilder::new();
patch2_data.replace(0, 2, &[1, 2]);
let mut patch2 = PatchList::new(patch2_data.get());
let patch = patch1.combine(&mut patch2);
let result = patch.apply(&data);
assert_eq!(result, vec![1u8, 2, 3, 4]);
}
#[test]
fn test_mid_is_overridden() {
let data = vec![0u8, 0, 0];
let mut patch1_data = PatchDataBuilder::new();
patch1_data.replace(0, 3, &[1, 3, 3, 4]);
let mut patch1 = PatchList::new(patch1_data.get());
let mut patch2_data = PatchDataBuilder::new();
patch2_data.replace(1, 3, &[2, 3]);
let mut patch2 = PatchList::new(patch2_data.get());
let patch = patch1.combine(&mut patch2);
let result = patch.apply(&data);
assert_eq!(result, vec![1u8, 2, 3, 4]);
}
}