##// END OF EJS Templates
hg-core: renaming of `Chunk` offset methods (D8958#inline-15002 followup)...
Antoine cezar -
r46174:b68b1910 default
parent child Browse files
Show More
@@ -1,369 +1,369 b''
1 1 use byteorder::{BigEndian, ByteOrder};
2 2
3 3 /// A chunk of data to insert, delete or replace in a patch
4 4 ///
5 5 /// A chunk is:
6 6 /// - an insertion when `!data.is_empty() && start == end`
7 7 /// - an deletion when `data.is_empty() && start < end`
8 8 /// - a replacement when `!data.is_empty() && start < end`
9 9 /// - not doing anything when `data.is_empty() && start == end`
10 10 #[derive(Debug, Clone)]
11 11 struct Chunk<'a> {
12 12 /// The start position of the chunk of data to replace
13 13 start: u32,
14 14 /// The end position of the chunk of data to replace (open end interval)
15 15 end: u32,
16 16 /// The data replacing the chunk
17 17 data: &'a [u8],
18 18 }
19 19
20 20 impl Chunk<'_> {
21 21 /// Adjusted start of the chunk to replace.
22 22 ///
23 23 /// The offset, taking into account the growth/shrinkage of data
24 24 /// induced by previously applied chunks.
25 fn start_offseted_by(&self, offset: i32) -> u32 {
25 fn start_offset_by(&self, offset: i32) -> u32 {
26 26 let start = self.start as i32 + offset;
27 27 assert!(start >= 0, "negative chunk start should never happen");
28 28 start as u32
29 29 }
30 30
31 31 /// Adjusted end of the chunk to replace.
32 32 ///
33 33 /// The offset, taking into account the growth/shrinkage of data
34 34 /// induced by previously applied chunks.
35 fn end_offseted_by(&self, offset: i32) -> u32 {
36 self.start_offseted_by(offset) + self.data.len() as u32
35 fn end_offset_by(&self, offset: i32) -> u32 {
36 self.start_offset_by(offset) + self.data.len() as u32
37 37 }
38 38
39 39 /// Length of the replaced chunk.
40 40 fn replaced_len(&self) -> u32 {
41 41 self.end - self.start
42 42 }
43 43
44 44 /// Length difference between the replacing data and the replaced data.
45 45 fn len_diff(&self) -> i32 {
46 46 self.data.len() as i32 - self.replaced_len() as i32
47 47 }
48 48 }
49 49
50 50 /// The delta between two revisions data.
51 51 #[derive(Debug, Clone)]
52 52 pub struct PatchList<'a> {
53 53 /// A collection of chunks to apply.
54 54 ///
55 55 /// Those chunks are:
56 56 /// - ordered from the left-most replacement to the right-most replacement
57 57 /// - non-overlapping, meaning that two chucks can not change the same
58 58 /// chunk of the patched data
59 59 chunks: Vec<Chunk<'a>>,
60 60 }
61 61
62 62 impl<'a> PatchList<'a> {
63 63 /// Create a `PatchList` from bytes.
64 64 pub fn new(data: &'a [u8]) -> Self {
65 65 let mut chunks = vec![];
66 66 let mut data = data;
67 67 while !data.is_empty() {
68 68 let start = BigEndian::read_u32(&data[0..]);
69 69 let end = BigEndian::read_u32(&data[4..]);
70 70 let len = BigEndian::read_u32(&data[8..]);
71 71 assert!(start <= end);
72 72 chunks.push(Chunk {
73 73 start,
74 74 end,
75 75 data: &data[12..12 + (len as usize)],
76 76 });
77 77 data = &data[12 + (len as usize)..];
78 78 }
79 79 PatchList { chunks }
80 80 }
81 81
82 82 /// Return the final length of data after patching
83 83 /// given its initial length .
84 84 fn size(&self, initial_size: i32) -> i32 {
85 85 self.chunks
86 86 .iter()
87 87 .fold(initial_size, |acc, chunk| acc + chunk.len_diff())
88 88 }
89 89
90 90 /// Apply the patch to some data.
91 91 pub fn apply(&self, initial: &[u8]) -> Vec<u8> {
92 92 let mut last: usize = 0;
93 93 let mut vec =
94 94 Vec::with_capacity(self.size(initial.len() as i32) as usize);
95 95 for Chunk { start, end, data } in self.chunks.iter() {
96 96 vec.extend(&initial[last..(*start as usize)]);
97 97 vec.extend(data.iter());
98 98 last = *end as usize;
99 99 }
100 100 vec.extend(&initial[last..]);
101 101 vec
102 102 }
103 103
104 104 /// Combine two patch lists into a single patch list.
105 105 ///
106 106 /// Applying consecutive patches can lead to waste of time and memory
107 107 /// as the changes introduced by one patch can be overridden by the next.
108 108 /// Combining patches optimizes the whole patching sequence.
109 109 fn combine(&mut self, other: &mut Self) -> Self {
110 110 let mut chunks = vec![];
111 111
112 112 // Keep track of each growth/shrinkage resulting from applying a chunk
113 113 // in order to adjust the start/end of subsequent chunks.
114 114 let mut offset = 0i32;
115 115
116 116 // Keep track of the chunk of self.chunks to process.
117 117 let mut pos = 0;
118 118
119 119 // For each chunk of `other`, chunks of `self` are processed
120 120 // until they start after the end of the current chunk.
121 121 for Chunk { start, end, data } in other.chunks.iter() {
122 122 // Add chunks of `self` that start before this chunk of `other`
123 123 // without overlap.
124 124 while pos < self.chunks.len()
125 && self.chunks[pos].end_offseted_by(offset) <= *start
125 && self.chunks[pos].end_offset_by(offset) <= *start
126 126 {
127 127 let first = self.chunks[pos].clone();
128 128 offset += first.len_diff();
129 129 chunks.push(first);
130 130 pos += 1;
131 131 }
132 132
133 133 // The current chunk of `self` starts before this chunk of `other`
134 134 // with overlap.
135 135 // The left-most part of data is added as an insertion chunk.
136 136 // The right-most part data is kept in the chunk.
137 137 if pos < self.chunks.len()
138 && self.chunks[pos].start_offseted_by(offset) < *start
138 && self.chunks[pos].start_offset_by(offset) < *start
139 139 {
140 140 let first = &mut self.chunks[pos];
141 141
142 142 let (data_left, data_right) = first.data.split_at(
143 (*start - first.start_offseted_by(offset)) as usize,
143 (*start - first.start_offset_by(offset)) as usize,
144 144 );
145 145 let left = Chunk {
146 146 start: first.start,
147 147 end: first.start,
148 148 data: data_left,
149 149 };
150 150
151 151 first.data = data_right;
152 152
153 153 offset += left.len_diff();
154 154
155 155 chunks.push(left);
156 156
157 157 // There is no index incrementation because the right-most part
158 158 // needs further examination.
159 159 }
160 160
161 161 // At this point remaining chunks of `self` starts after
162 162 // the current chunk of `other`.
163 163
164 164 // `start_offset` will be used to adjust the start of the current
165 165 // chunk of `other`.
166 166 // Offset tracking continues with `end_offset` to adjust the end
167 167 // of the current chunk of `other`.
168 168 let mut next_offset = offset;
169 169
170 170 // Discard the chunks of `self` that are totally overridden
171 171 // by the current chunk of `other`
172 172 while pos < self.chunks.len()
173 && self.chunks[pos].end_offseted_by(next_offset) <= *end
173 && self.chunks[pos].end_offset_by(next_offset) <= *end
174 174 {
175 175 let first = &self.chunks[pos];
176 176 next_offset += first.len_diff();
177 177 pos += 1;
178 178 }
179 179
180 180 // Truncate the left-most part of chunk of `self` that overlaps
181 181 // the current chunk of `other`.
182 182 if pos < self.chunks.len()
183 && self.chunks[pos].start_offseted_by(next_offset) < *end
183 && self.chunks[pos].start_offset_by(next_offset) < *end
184 184 {
185 185 let first = &mut self.chunks[pos];
186 186
187 187 let how_much_to_discard =
188 *end - first.start_offseted_by(next_offset);
188 *end - first.start_offset_by(next_offset);
189 189
190 190 first.data = &first.data[(how_much_to_discard as usize)..];
191 191
192 192 next_offset += how_much_to_discard as i32;
193 193 }
194 194
195 195 // Add the chunk of `other` with adjusted position.
196 196 chunks.push(Chunk {
197 197 start: (*start as i32 - offset) as u32,
198 198 end: (*end as i32 - next_offset) as u32,
199 199 data,
200 200 });
201 201
202 202 // Go back to normal offset tracking for the next `o` chunk
203 203 offset = next_offset;
204 204 }
205 205
206 206 // Add remaining chunks of `self`.
207 207 for elt in &self.chunks[pos..] {
208 208 chunks.push(elt.clone());
209 209 }
210 210 PatchList { chunks }
211 211 }
212 212 }
213 213
214 214 /// Combine a list of patch list into a single patch optimized patch list.
215 215 pub fn fold_patch_lists<'a>(lists: &[PatchList<'a>]) -> PatchList<'a> {
216 216 if lists.len() <= 1 {
217 217 if lists.is_empty() {
218 218 PatchList { chunks: vec![] }
219 219 } else {
220 220 lists[0].clone()
221 221 }
222 222 } else {
223 223 let (left, right) = lists.split_at(lists.len() / 2);
224 224 let mut left_res = fold_patch_lists(left);
225 225 let mut right_res = fold_patch_lists(right);
226 226 left_res.combine(&mut right_res)
227 227 }
228 228 }
229 229
230 230 #[cfg(test)]
231 231 mod tests {
232 232 use super::*;
233 233
234 234 struct PatchDataBuilder {
235 235 data: Vec<u8>,
236 236 }
237 237
238 238 impl PatchDataBuilder {
239 239 pub fn new() -> Self {
240 240 Self { data: vec![] }
241 241 }
242 242
243 243 pub fn replace(
244 244 &mut self,
245 245 start: usize,
246 246 end: usize,
247 247 data: &[u8],
248 248 ) -> &mut Self {
249 249 assert!(start <= end);
250 250 self.data.extend(&(start as i32).to_be_bytes());
251 251 self.data.extend(&(end as i32).to_be_bytes());
252 252 self.data.extend(&(data.len() as i32).to_be_bytes());
253 253 self.data.extend(data.iter());
254 254 self
255 255 }
256 256
257 257 pub fn get(&mut self) -> &[u8] {
258 258 &self.data
259 259 }
260 260 }
261 261
262 262 #[test]
263 263 fn test_ends_before() {
264 264 let data = vec![0u8, 0u8, 0u8];
265 265 let mut patch1_data = PatchDataBuilder::new();
266 266 patch1_data.replace(0, 1, &[1, 2]);
267 267 let mut patch1 = PatchList::new(patch1_data.get());
268 268
269 269 let mut patch2_data = PatchDataBuilder::new();
270 270 patch2_data.replace(2, 4, &[3, 4]);
271 271 let mut patch2 = PatchList::new(patch2_data.get());
272 272
273 273 let patch = patch1.combine(&mut patch2);
274 274
275 275 let result = patch.apply(&data);
276 276
277 277 assert_eq!(result, vec![1u8, 2, 3, 4]);
278 278 }
279 279
280 280 #[test]
281 281 fn test_starts_after() {
282 282 let data = vec![0u8, 0u8, 0u8];
283 283 let mut patch1_data = PatchDataBuilder::new();
284 284 patch1_data.replace(2, 3, &[3]);
285 285 let mut patch1 = PatchList::new(patch1_data.get());
286 286
287 287 let mut patch2_data = PatchDataBuilder::new();
288 288 patch2_data.replace(1, 2, &[1, 2]);
289 289 let mut patch2 = PatchList::new(patch2_data.get());
290 290
291 291 let patch = patch1.combine(&mut patch2);
292 292
293 293 let result = patch.apply(&data);
294 294
295 295 assert_eq!(result, vec![0u8, 1, 2, 3]);
296 296 }
297 297
298 298 #[test]
299 299 fn test_overridden() {
300 300 let data = vec![0u8, 0, 0];
301 301 let mut patch1_data = PatchDataBuilder::new();
302 302 patch1_data.replace(1, 2, &[3, 4]);
303 303 let mut patch1 = PatchList::new(patch1_data.get());
304 304
305 305 let mut patch2_data = PatchDataBuilder::new();
306 306 patch2_data.replace(1, 4, &[1, 2, 3]);
307 307 let mut patch2 = PatchList::new(patch2_data.get());
308 308
309 309 let patch = patch1.combine(&mut patch2);
310 310
311 311 let result = patch.apply(&data);
312 312
313 313 assert_eq!(result, vec![0u8, 1, 2, 3]);
314 314 }
315 315
316 316 #[test]
317 317 fn test_right_most_part_is_overridden() {
318 318 let data = vec![0u8, 0, 0];
319 319 let mut patch1_data = PatchDataBuilder::new();
320 320 patch1_data.replace(0, 1, &[1, 3]);
321 321 let mut patch1 = PatchList::new(patch1_data.get());
322 322
323 323 let mut patch2_data = PatchDataBuilder::new();
324 324 patch2_data.replace(1, 4, &[2, 3, 4]);
325 325 let mut patch2 = PatchList::new(patch2_data.get());
326 326
327 327 let patch = patch1.combine(&mut patch2);
328 328
329 329 let result = patch.apply(&data);
330 330
331 331 assert_eq!(result, vec![1u8, 2, 3, 4]);
332 332 }
333 333
334 334 #[test]
335 335 fn test_left_most_part_is_overridden() {
336 336 let data = vec![0u8, 0, 0];
337 337 let mut patch1_data = PatchDataBuilder::new();
338 338 patch1_data.replace(1, 3, &[1, 3, 4]);
339 339 let mut patch1 = PatchList::new(patch1_data.get());
340 340
341 341 let mut patch2_data = PatchDataBuilder::new();
342 342 patch2_data.replace(0, 2, &[1, 2]);
343 343 let mut patch2 = PatchList::new(patch2_data.get());
344 344
345 345 let patch = patch1.combine(&mut patch2);
346 346
347 347 let result = patch.apply(&data);
348 348
349 349 assert_eq!(result, vec![1u8, 2, 3, 4]);
350 350 }
351 351
352 352 #[test]
353 353 fn test_mid_is_overridden() {
354 354 let data = vec![0u8, 0, 0];
355 355 let mut patch1_data = PatchDataBuilder::new();
356 356 patch1_data.replace(0, 3, &[1, 3, 3, 4]);
357 357 let mut patch1 = PatchList::new(patch1_data.get());
358 358
359 359 let mut patch2_data = PatchDataBuilder::new();
360 360 patch2_data.replace(1, 3, &[2, 3]);
361 361 let mut patch2 = PatchList::new(patch2_data.get());
362 362
363 363 let patch = patch1.combine(&mut patch2);
364 364
365 365 let result = patch.apply(&data);
366 366
367 367 assert_eq!(result, vec![1u8, 2, 3, 4]);
368 368 }
369 369 }
General Comments 0
You need to be logged in to leave comments. Login now