##// END OF EJS Templates
hg-core: use `u32` instead of `i32` in `Chunk` (D8958#inline-15001 followup)...
Antoine cezar -
r46171:d07e4656 default
parent child Browse files
Show More
@@ -1,367 +1,369
1 1 use byteorder::{BigEndian, ByteOrder};
2 2
3 3 /// A chunk of data to insert, delete or replace in a patch
4 4 ///
5 5 /// A chunk is:
6 6 /// - an insertion when `!data.is_empty() && start == end`
7 7 /// - an deletion when `data.is_empty() && start < end`
8 8 /// - a replacement when `!data.is_empty() && start < end`
9 9 /// - not doing anything when `data.is_empty() && start == end`
10 10 #[derive(Debug, Clone)]
11 11 struct Chunk<'a> {
12 12 /// The start position of the chunk of data to replace
13 start: i32,
13 start: u32,
14 14 /// The end position of the chunk of data to replace (open end interval)
15 end: i32,
15 end: u32,
16 16 /// The data replacing the chunk
17 17 data: &'a [u8],
18 18 }
19 19
20 20 impl<'a> Chunk<'a> {
21 21 /// Adjusted start of the chunk to replace.
22 22 ///
23 23 /// Offset allow to take into account the growth/shrinkage of data
24 24 /// induced by previously applied chunks.
25 fn start_offseted_by(&self, offset: i32) -> i32 {
26 self.start + offset
25 fn start_offseted_by(&self, offset: i32) -> u32 {
26 let start = self.start as i32 + offset;
27 assert!(start >= 0, "negative chunk start should never happen");
28 start as u32
27 29 }
28 30
29 31 /// Adjusted end of the chunk to replace.
30 32 ///
31 33 /// Offset allow to take into account the growth/shrinkage of data
32 34 /// induced by previously applied chunks.
33 fn end_offseted_by(&self, offset: i32) -> i32 {
34 self.start_offseted_by(offset) + (self.data.len() as i32)
35 fn end_offseted_by(&self, offset: i32) -> u32 {
36 self.start_offseted_by(offset) + self.data.len() as u32
35 37 }
36 38
37 39 /// Length of the replaced chunk.
38 fn replaced_len(&self) -> i32 {
40 fn replaced_len(&self) -> u32 {
39 41 self.end - self.start
40 42 }
41 43
42 44 /// Length difference between the replacing data and the replaced data.
43 45 fn len_diff(&self) -> i32 {
44 (self.data.len() as i32) - self.replaced_len()
46 self.data.len() as i32 - self.replaced_len() as i32
45 47 }
46 48 }
47 49
48 50 /// The delta between two revisions data.
49 51 #[derive(Debug, Clone)]
50 52 pub struct PatchList<'a> {
51 53 /// A collection of chunks to apply.
52 54 ///
53 55 /// Those chunks are:
54 56 /// - ordered from the left-most replacement to the right-most replacement
55 57 /// - non-overlapping, meaning that two chucks can not change the same
56 58 /// chunk of the patched data
57 59 chunks: Vec<Chunk<'a>>,
58 60 }
59 61
60 62 impl<'a> PatchList<'a> {
61 63 /// Create a `PatchList` from bytes.
62 64 pub fn new(data: &'a [u8]) -> Self {
63 65 let mut chunks = vec![];
64 66 let mut data = data;
65 67 while !data.is_empty() {
66 let start = BigEndian::read_i32(&data[0..]);
67 let end = BigEndian::read_i32(&data[4..]);
68 let len = BigEndian::read_i32(&data[8..]);
69 assert!(0 <= start && start <= end && len >= 0);
68 let start = BigEndian::read_u32(&data[0..]);
69 let end = BigEndian::read_u32(&data[4..]);
70 let len = BigEndian::read_u32(&data[8..]);
71 assert!(start <= end);
70 72 chunks.push(Chunk {
71 73 start,
72 74 end,
73 75 data: &data[12..12 + (len as usize)],
74 76 });
75 77 data = &data[12 + (len as usize)..];
76 78 }
77 79 PatchList { chunks }
78 80 }
79 81
80 82 /// Return the final length of data after patching
81 83 /// given its initial length .
82 84 fn size(&self, initial_size: i32) -> i32 {
83 85 self.chunks
84 86 .iter()
85 87 .fold(initial_size, |acc, chunk| acc + chunk.len_diff())
86 88 }
87 89
88 90 /// Apply the patch to some data.
89 91 pub fn apply(&self, initial: &[u8]) -> Vec<u8> {
90 92 let mut last: usize = 0;
91 93 let mut vec =
92 94 Vec::with_capacity(self.size(initial.len() as i32) as usize);
93 95 for Chunk { start, end, data } in self.chunks.iter() {
94 96 vec.extend(&initial[last..(*start as usize)]);
95 97 vec.extend(data.iter());
96 98 last = *end as usize;
97 99 }
98 100 vec.extend(&initial[last..]);
99 101 vec
100 102 }
101 103
102 104 /// Combine two patch lists into a single patch list.
103 105 ///
104 106 /// Applying consecutive patches can lead to waste of time and memory
105 107 /// as the changes introduced by one patch can be overridden by the next.
106 108 /// Combining patches optimizes the whole patching sequence.
107 109 fn combine(&mut self, other: &mut Self) -> Self {
108 110 let mut chunks = vec![];
109 111
110 112 // Keep track of each growth/shrinkage resulting from applying a chunk
111 113 // in order to adjust the start/end of subsequent chunks.
112 114 let mut offset = 0i32;
113 115
114 116 // Keep track of the chunk of self.chunks to process.
115 117 let mut pos = 0;
116 118
117 119 // For each chunk of `other`, chunks of `self` are processed
118 120 // until they start after the end of the current chunk.
119 121 for Chunk { start, end, data } in other.chunks.iter() {
120 122 // Add chunks of `self` that start before this chunk of `other`
121 123 // without overlap.
122 124 while pos < self.chunks.len()
123 125 && self.chunks[pos].end_offseted_by(offset) <= *start
124 126 {
125 127 let first = self.chunks[pos].clone();
126 128 offset += first.len_diff();
127 129 chunks.push(first);
128 130 pos += 1;
129 131 }
130 132
131 133 // The current chunk of `self` starts before this chunk of `other`
132 134 // with overlap.
133 135 // The left-most part of data is added as an insertion chunk.
134 136 // The right-most part data is kept in the chunk.
135 137 if pos < self.chunks.len()
136 138 && self.chunks[pos].start_offseted_by(offset) < *start
137 139 {
138 140 let first = &mut self.chunks[pos];
139 141
140 142 let (data_left, data_right) = first.data.split_at(
141 143 (*start - first.start_offseted_by(offset)) as usize,
142 144 );
143 145 let left = Chunk {
144 146 start: first.start,
145 147 end: first.start,
146 148 data: data_left,
147 149 };
148 150
149 151 first.data = data_right;
150 152
151 153 offset += left.len_diff();
152 154
153 155 chunks.push(left);
154 156
155 157 // There is no index incrementation because the right-most part
156 158 // needs further examination.
157 159 }
158 160
159 161 // At this point remaining chunks of `self` starts after
160 162 // the current chunk of `other`.
161 163
162 164 // `start_offset` will be used to adjust the start of the current
163 165 // chunk of `other`.
164 166 // Offset tracking continues with `end_offset` to adjust the end
165 167 // of the current chunk of `other`.
166 168 let mut next_offset = offset;
167 169
168 170 // Discard the chunks of `self` that are totally overridden
169 171 // by the current chunk of `other`
170 172 while pos < self.chunks.len()
171 173 && self.chunks[pos].end_offseted_by(next_offset) <= *end
172 174 {
173 175 let first = &self.chunks[pos];
174 176 next_offset += first.len_diff();
175 177 pos += 1;
176 178 }
177 179
178 180 // Truncate the left-most part of chunk of `self` that overlaps
179 181 // the current chunk of `other`.
180 182 if pos < self.chunks.len()
181 183 && self.chunks[pos].start_offseted_by(next_offset) < *end
182 184 {
183 185 let first = &mut self.chunks[pos];
184 186
185 187 let how_much_to_discard =
186 188 *end - first.start_offseted_by(next_offset);
187 189
188 190 first.data = &first.data[(how_much_to_discard as usize)..];
189 191
190 next_offset += how_much_to_discard;
192 next_offset += how_much_to_discard as i32;
191 193 }
192 194
193 195 // Add the chunk of `other` with adjusted position.
194 196 chunks.push(Chunk {
195 start: *start - offset,
196 end: *end - next_offset,
197 start: (*start as i32 - offset) as u32,
198 end: (*end as i32 - next_offset) as u32,
197 199 data,
198 200 });
199 201
200 202 // Go back to normal offset tracking for the next `o` chunk
201 203 offset = next_offset;
202 204 }
203 205
204 206 // Add remaining chunks of `self`.
205 207 for elt in &self.chunks[pos..] {
206 208 chunks.push(elt.clone());
207 209 }
208 210 PatchList { chunks }
209 211 }
210 212 }
211 213
212 214 /// Combine a list of patch list into a single patch optimized patch list.
213 215 pub fn fold_patch_lists<'a>(lists: &[PatchList<'a>]) -> PatchList<'a> {
214 216 if lists.len() <= 1 {
215 217 if lists.is_empty() {
216 218 PatchList { chunks: vec![] }
217 219 } else {
218 220 lists[0].clone()
219 221 }
220 222 } else {
221 223 let (left, right) = lists.split_at(lists.len() / 2);
222 224 let mut left_res = fold_patch_lists(left);
223 225 let mut right_res = fold_patch_lists(right);
224 226 left_res.combine(&mut right_res)
225 227 }
226 228 }
227 229
228 230 #[cfg(test)]
229 231 mod tests {
230 232 use super::*;
231 233
232 234 struct PatchDataBuilder {
233 235 data: Vec<u8>,
234 236 }
235 237
236 238 impl PatchDataBuilder {
237 239 pub fn new() -> Self {
238 240 Self { data: vec![] }
239 241 }
240 242
241 243 pub fn replace(
242 244 &mut self,
243 245 start: usize,
244 246 end: usize,
245 247 data: &[u8],
246 248 ) -> &mut Self {
247 249 assert!(start <= end);
248 250 self.data.extend(&(start as i32).to_be_bytes());
249 251 self.data.extend(&(end as i32).to_be_bytes());
250 252 self.data.extend(&(data.len() as i32).to_be_bytes());
251 253 self.data.extend(data.iter());
252 254 self
253 255 }
254 256
255 257 pub fn get(&mut self) -> &[u8] {
256 258 &self.data
257 259 }
258 260 }
259 261
260 262 #[test]
261 263 fn test_ends_before() {
262 264 let data = vec![0u8, 0u8, 0u8];
263 265 let mut patch1_data = PatchDataBuilder::new();
264 266 patch1_data.replace(0, 1, &[1, 2]);
265 267 let mut patch1 = PatchList::new(patch1_data.get());
266 268
267 269 let mut patch2_data = PatchDataBuilder::new();
268 270 patch2_data.replace(2, 4, &[3, 4]);
269 271 let mut patch2 = PatchList::new(patch2_data.get());
270 272
271 273 let patch = patch1.combine(&mut patch2);
272 274
273 275 let result = patch.apply(&data);
274 276
275 277 assert_eq!(result, vec![1u8, 2, 3, 4]);
276 278 }
277 279
278 280 #[test]
279 281 fn test_starts_after() {
280 282 let data = vec![0u8, 0u8, 0u8];
281 283 let mut patch1_data = PatchDataBuilder::new();
282 284 patch1_data.replace(2, 3, &[3]);
283 285 let mut patch1 = PatchList::new(patch1_data.get());
284 286
285 287 let mut patch2_data = PatchDataBuilder::new();
286 288 patch2_data.replace(1, 2, &[1, 2]);
287 289 let mut patch2 = PatchList::new(patch2_data.get());
288 290
289 291 let patch = patch1.combine(&mut patch2);
290 292
291 293 let result = patch.apply(&data);
292 294
293 295 assert_eq!(result, vec![0u8, 1, 2, 3]);
294 296 }
295 297
296 298 #[test]
297 299 fn test_overridden() {
298 300 let data = vec![0u8, 0, 0];
299 301 let mut patch1_data = PatchDataBuilder::new();
300 302 patch1_data.replace(1, 2, &[3, 4]);
301 303 let mut patch1 = PatchList::new(patch1_data.get());
302 304
303 305 let mut patch2_data = PatchDataBuilder::new();
304 306 patch2_data.replace(1, 4, &[1, 2, 3]);
305 307 let mut patch2 = PatchList::new(patch2_data.get());
306 308
307 309 let patch = patch1.combine(&mut patch2);
308 310
309 311 let result = patch.apply(&data);
310 312
311 313 assert_eq!(result, vec![0u8, 1, 2, 3]);
312 314 }
313 315
314 316 #[test]
315 317 fn test_right_most_part_is_overridden() {
316 318 let data = vec![0u8, 0, 0];
317 319 let mut patch1_data = PatchDataBuilder::new();
318 320 patch1_data.replace(0, 1, &[1, 3]);
319 321 let mut patch1 = PatchList::new(patch1_data.get());
320 322
321 323 let mut patch2_data = PatchDataBuilder::new();
322 324 patch2_data.replace(1, 4, &[2, 3, 4]);
323 325 let mut patch2 = PatchList::new(patch2_data.get());
324 326
325 327 let patch = patch1.combine(&mut patch2);
326 328
327 329 let result = patch.apply(&data);
328 330
329 331 assert_eq!(result, vec![1u8, 2, 3, 4]);
330 332 }
331 333
332 334 #[test]
333 335 fn test_left_most_part_is_overridden() {
334 336 let data = vec![0u8, 0, 0];
335 337 let mut patch1_data = PatchDataBuilder::new();
336 338 patch1_data.replace(1, 3, &[1, 3, 4]);
337 339 let mut patch1 = PatchList::new(patch1_data.get());
338 340
339 341 let mut patch2_data = PatchDataBuilder::new();
340 342 patch2_data.replace(0, 2, &[1, 2]);
341 343 let mut patch2 = PatchList::new(patch2_data.get());
342 344
343 345 let patch = patch1.combine(&mut patch2);
344 346
345 347 let result = patch.apply(&data);
346 348
347 349 assert_eq!(result, vec![1u8, 2, 3, 4]);
348 350 }
349 351
350 352 #[test]
351 353 fn test_mid_is_overridden() {
352 354 let data = vec![0u8, 0, 0];
353 355 let mut patch1_data = PatchDataBuilder::new();
354 356 patch1_data.replace(0, 3, &[1, 3, 3, 4]);
355 357 let mut patch1 = PatchList::new(patch1_data.get());
356 358
357 359 let mut patch2_data = PatchDataBuilder::new();
358 360 patch2_data.replace(1, 3, &[2, 3]);
359 361 let mut patch2 = PatchList::new(patch2_data.get());
360 362
361 363 let patch = patch1.combine(&mut patch2);
362 364
363 365 let result = patch.apply(&data);
364 366
365 367 assert_eq!(result, vec![1u8, 2, 3, 4]);
366 368 }
367 369 }
General Comments 0
You need to be logged in to leave comments. Login now