Show More
@@ -0,0 +1,409 b'' | |||
|
1 | // Copyright 2019 Raphaël Gomès <rgomes@octobus.net> | |
|
2 | // | |
|
3 | // This software may be used and distributed according to the terms of the | |
|
4 | // GNU General Public License version 2 or any later version. | |
|
5 | ||
|
6 | use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt}; | |
|
7 | use std::collections::HashMap; | |
|
8 | use std::io::Cursor; | |
|
9 | use {DirstatePackError, DirstateParseError}; | |
|
10 | ||
|
11 | #[derive(Debug, PartialEq, Copy, Clone)] | |
|
12 | pub struct DirstateParents<'a> { | |
|
13 | pub p1: &'a [u8], | |
|
14 | pub p2: &'a [u8], | |
|
15 | } | |
|
16 | /// The C implementation uses all signed types. This will be an issue | |
|
17 | /// either when 4GB+ source files are commonplace or in 2038, whichever | |
|
18 | /// comes first. | |
|
19 | #[derive(Debug, PartialEq)] | |
|
20 | pub struct DirstateEntry { | |
|
21 | pub state: i8, | |
|
22 | pub mode: i32, | |
|
23 | pub mtime: i32, | |
|
24 | pub size: i32, | |
|
25 | } | |
|
26 | pub type DirstateVec = Vec<(Vec<u8>, DirstateEntry)>; | |
|
27 | ||
|
28 | #[derive(Debug, PartialEq)] | |
|
29 | pub struct CopyVecEntry<'a> { | |
|
30 | pub path: &'a [u8], | |
|
31 | pub copy_path: &'a [u8], | |
|
32 | } | |
|
33 | pub type CopyVec<'a> = Vec<CopyVecEntry<'a>>; | |
|
34 | ||
|
35 | /// Parents are stored in the dirstate as byte hashes. | |
|
36 | const PARENT_SIZE: usize = 20; | |
|
37 | /// Dirstate entries have a static part of 8 + 32 + 32 + 32 + 32 bits. | |
|
38 | const MIN_ENTRY_SIZE: usize = 17; | |
|
39 | ||
|
40 | pub fn parse_dirstate( | |
|
41 | contents: &[u8], | |
|
42 | ) -> Result<(DirstateParents, DirstateVec, CopyVec), DirstateParseError> { | |
|
43 | if contents.len() < PARENT_SIZE * 2 { | |
|
44 | return Err(DirstateParseError::TooLittleData); | |
|
45 | } | |
|
46 | ||
|
47 | let mut dirstate_vec = vec![]; | |
|
48 | let mut copies = vec![]; | |
|
49 | let mut curr_pos = PARENT_SIZE * 2; | |
|
50 | let parents = DirstateParents { | |
|
51 | p1: &contents[..PARENT_SIZE], | |
|
52 | p2: &contents[PARENT_SIZE..curr_pos], | |
|
53 | }; | |
|
54 | ||
|
55 | while curr_pos < contents.len() { | |
|
56 | if curr_pos + MIN_ENTRY_SIZE > contents.len() { | |
|
57 | return Err(DirstateParseError::Overflow); | |
|
58 | } | |
|
59 | let entry_bytes = &contents[curr_pos..]; | |
|
60 | ||
|
61 | let mut cursor = Cursor::new(entry_bytes); | |
|
62 | let state = cursor.read_i8()?; | |
|
63 | let mode = cursor.read_i32::<BigEndian>()?; | |
|
64 | let size = cursor.read_i32::<BigEndian>()?; | |
|
65 | let mtime = cursor.read_i32::<BigEndian>()?; | |
|
66 | let path_len = cursor.read_i32::<BigEndian>()? as usize; | |
|
67 | ||
|
68 | if path_len > contents.len() - curr_pos { | |
|
69 | return Err(DirstateParseError::Overflow); | |
|
70 | } | |
|
71 | ||
|
72 | // Slice instead of allocating a Vec needed for `read_exact` | |
|
73 | let path = &entry_bytes[MIN_ENTRY_SIZE..MIN_ENTRY_SIZE + (path_len)]; | |
|
74 | ||
|
75 | let (path, copy) = match memchr::memchr(0, path) { | |
|
76 | None => (path, None), | |
|
77 | Some(i) => (&path[..i], Some(&path[(i + 1)..])), | |
|
78 | }; | |
|
79 | ||
|
80 | if let Some(copy_path) = copy { | |
|
81 | copies.push(CopyVecEntry { path, copy_path }); | |
|
82 | }; | |
|
83 | dirstate_vec.push(( | |
|
84 | path.to_owned(), | |
|
85 | DirstateEntry { | |
|
86 | state, | |
|
87 | mode, | |
|
88 | size, | |
|
89 | mtime, | |
|
90 | }, | |
|
91 | )); | |
|
92 | curr_pos = curr_pos + MIN_ENTRY_SIZE + (path_len); | |
|
93 | } | |
|
94 | ||
|
95 | Ok((parents, dirstate_vec, copies)) | |
|
96 | } | |
|
97 | ||
|
98 | pub fn pack_dirstate( | |
|
99 | dirstate_vec: &DirstateVec, | |
|
100 | copymap: &HashMap<Vec<u8>, Vec<u8>>, | |
|
101 | parents: DirstateParents, | |
|
102 | now: i32, | |
|
103 | ) -> Result<(Vec<u8>, DirstateVec), DirstatePackError> { | |
|
104 | if parents.p1.len() != PARENT_SIZE || parents.p2.len() != PARENT_SIZE { | |
|
105 | return Err(DirstatePackError::CorruptedParent); | |
|
106 | } | |
|
107 | ||
|
108 | let expected_size: usize = dirstate_vec | |
|
109 | .iter() | |
|
110 | .map(|(ref filename, _)| { | |
|
111 | let mut length = MIN_ENTRY_SIZE + filename.len(); | |
|
112 | if let Some(ref copy) = copymap.get(filename) { | |
|
113 | length += copy.len() + 1; | |
|
114 | } | |
|
115 | length | |
|
116 | }) | |
|
117 | .sum(); | |
|
118 | let expected_size = expected_size + PARENT_SIZE * 2; | |
|
119 | ||
|
120 | let mut packed = Vec::with_capacity(expected_size); | |
|
121 | let mut new_dirstate_vec = vec![]; | |
|
122 | ||
|
123 | packed.extend(parents.p1); | |
|
124 | packed.extend(parents.p2); | |
|
125 | ||
|
126 | for (ref filename, entry) in dirstate_vec { | |
|
127 | let mut new_filename: Vec<u8> = filename.to_owned(); | |
|
128 | let mut new_mtime: i32 = entry.mtime; | |
|
129 | if entry.state == 'n' as i8 && entry.mtime == now.into() { | |
|
130 | // The file was last modified "simultaneously" with the current | |
|
131 | // write to dirstate (i.e. within the same second for file- | |
|
132 | // systems with a granularity of 1 sec). This commonly happens | |
|
133 | // for at least a couple of files on 'update'. | |
|
134 | // The user could change the file without changing its size | |
|
135 | // within the same second. Invalidate the file's mtime in | |
|
136 | // dirstate, forcing future 'status' calls to compare the | |
|
137 | // contents of the file if the size is the same. This prevents | |
|
138 | // mistakenly treating such files as clean. | |
|
139 | new_mtime = -1; | |
|
140 | new_dirstate_vec.push(( | |
|
141 | filename.to_owned(), | |
|
142 | DirstateEntry { | |
|
143 | mtime: new_mtime, | |
|
144 | ..*entry | |
|
145 | }, | |
|
146 | )); | |
|
147 | } | |
|
148 | ||
|
149 | if let Some(copy) = copymap.get(filename) { | |
|
150 | new_filename.push('\0' as u8); | |
|
151 | new_filename.extend(copy); | |
|
152 | } | |
|
153 | ||
|
154 | packed.write_i8(entry.state)?; | |
|
155 | packed.write_i32::<BigEndian>(entry.mode)?; | |
|
156 | packed.write_i32::<BigEndian>(entry.size)?; | |
|
157 | packed.write_i32::<BigEndian>(new_mtime)?; | |
|
158 | packed.write_i32::<BigEndian>(new_filename.len() as i32)?; | |
|
159 | packed.extend(new_filename) | |
|
160 | } | |
|
161 | ||
|
162 | if packed.len() != expected_size { | |
|
163 | return Err(DirstatePackError::BadSize(expected_size, packed.len())); | |
|
164 | } | |
|
165 | ||
|
166 | Ok((packed, new_dirstate_vec)) | |
|
167 | } | |
|
168 | ||
|
169 | #[cfg(test)] | |
|
170 | mod tests { | |
|
171 | use super::*; | |
|
172 | ||
|
173 | #[test] | |
|
174 | fn test_pack_dirstate_empty() { | |
|
175 | let dirstate_vec: DirstateVec = vec![]; | |
|
176 | let copymap = HashMap::new(); | |
|
177 | let parents = DirstateParents { | |
|
178 | p1: b"12345678910111213141", | |
|
179 | p2: b"00000000000000000000", | |
|
180 | }; | |
|
181 | let now: i32 = 15000000; | |
|
182 | let expected = | |
|
183 | (b"1234567891011121314100000000000000000000".to_vec(), vec![]); | |
|
184 | ||
|
185 | assert_eq!( | |
|
186 | expected, | |
|
187 | pack_dirstate(&dirstate_vec, ©map, parents, now).unwrap() | |
|
188 | ); | |
|
189 | } | |
|
190 | #[test] | |
|
191 | fn test_pack_dirstate_one_entry() { | |
|
192 | let dirstate_vec: DirstateVec = vec![( | |
|
193 | vec!['f' as u8, '1' as u8], | |
|
194 | DirstateEntry { | |
|
195 | state: 'n' as i8, | |
|
196 | mode: 0o644, | |
|
197 | size: 0, | |
|
198 | mtime: 791231220, | |
|
199 | }, | |
|
200 | )]; | |
|
201 | let copymap = HashMap::new(); | |
|
202 | let parents = DirstateParents { | |
|
203 | p1: b"12345678910111213141", | |
|
204 | p2: b"00000000000000000000", | |
|
205 | }; | |
|
206 | let now: i32 = 15000000; | |
|
207 | let expected = ( | |
|
208 | [ | |
|
209 | 49, 50, 51, 52, 53, 54, 55, 56, 57, 49, 48, 49, 49, 49, 50, | |
|
210 | 49, 51, 49, 52, 49, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, | |
|
211 | 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 110, 0, 0, 1, 164, 0, | |
|
212 | 0, 0, 0, 47, 41, 58, 244, 0, 0, 0, 2, 102, 49, | |
|
213 | ] | |
|
214 | .to_vec(), | |
|
215 | vec![], | |
|
216 | ); | |
|
217 | ||
|
218 | assert_eq!( | |
|
219 | expected, | |
|
220 | pack_dirstate(&dirstate_vec, ©map, parents, now).unwrap() | |
|
221 | ); | |
|
222 | } | |
|
223 | #[test] | |
|
224 | fn test_pack_dirstate_one_entry_with_copy() { | |
|
225 | let dirstate_vec: DirstateVec = vec![( | |
|
226 | b"f1".to_vec(), | |
|
227 | DirstateEntry { | |
|
228 | state: 'n' as i8, | |
|
229 | mode: 0o644, | |
|
230 | size: 0, | |
|
231 | mtime: 791231220, | |
|
232 | }, | |
|
233 | )]; | |
|
234 | let mut copymap = HashMap::new(); | |
|
235 | copymap.insert(b"f1".to_vec(), b"copyname".to_vec()); | |
|
236 | let parents = DirstateParents { | |
|
237 | p1: b"12345678910111213141", | |
|
238 | p2: b"00000000000000000000", | |
|
239 | }; | |
|
240 | let now: i32 = 15000000; | |
|
241 | let expected = ( | |
|
242 | [ | |
|
243 | 49, 50, 51, 52, 53, 54, 55, 56, 57, 49, 48, 49, 49, 49, 50, | |
|
244 | 49, 51, 49, 52, 49, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, | |
|
245 | 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 110, 0, 0, 1, 164, 0, | |
|
246 | 0, 0, 0, 47, 41, 58, 244, 0, 0, 0, 11, 102, 49, 0, 99, 111, | |
|
247 | 112, 121, 110, 97, 109, 101, | |
|
248 | ] | |
|
249 | .to_vec(), | |
|
250 | vec![], | |
|
251 | ); | |
|
252 | ||
|
253 | assert_eq!( | |
|
254 | expected, | |
|
255 | pack_dirstate(&dirstate_vec, ©map, parents, now).unwrap() | |
|
256 | ); | |
|
257 | } | |
|
258 | ||
|
259 | #[test] | |
|
260 | fn test_parse_pack_one_entry_with_copy() { | |
|
261 | let dirstate_vec: DirstateVec = vec![( | |
|
262 | b"f1".to_vec(), | |
|
263 | DirstateEntry { | |
|
264 | state: 'n' as i8, | |
|
265 | mode: 0o644, | |
|
266 | size: 0, | |
|
267 | mtime: 791231220, | |
|
268 | }, | |
|
269 | )]; | |
|
270 | let mut copymap = HashMap::new(); | |
|
271 | copymap.insert(b"f1".to_vec(), b"copyname".to_vec()); | |
|
272 | let parents = DirstateParents { | |
|
273 | p1: b"12345678910111213141", | |
|
274 | p2: b"00000000000000000000", | |
|
275 | }; | |
|
276 | let now: i32 = 15000000; | |
|
277 | let result = | |
|
278 | pack_dirstate(&dirstate_vec, ©map, parents, now).unwrap(); | |
|
279 | ||
|
280 | assert_eq!( | |
|
281 | ( | |
|
282 | parents, | |
|
283 | dirstate_vec, | |
|
284 | copymap | |
|
285 | .iter() | |
|
286 | .map(|(k, v)| CopyVecEntry { | |
|
287 | path: k.as_slice(), | |
|
288 | copy_path: v.as_slice() | |
|
289 | }) | |
|
290 | .collect() | |
|
291 | ), | |
|
292 | parse_dirstate(result.0.as_slice()).unwrap() | |
|
293 | ) | |
|
294 | } | |
|
295 | ||
|
296 | #[test] | |
|
297 | fn test_parse_pack_multiple_entries_with_copy() { | |
|
298 | let dirstate_vec: DirstateVec = vec![ | |
|
299 | ( | |
|
300 | b"f1".to_vec(), | |
|
301 | DirstateEntry { | |
|
302 | state: 'n' as i8, | |
|
303 | mode: 0o644, | |
|
304 | size: 0, | |
|
305 | mtime: 791231220, | |
|
306 | }, | |
|
307 | ), | |
|
308 | ( | |
|
309 | b"f2".to_vec(), | |
|
310 | DirstateEntry { | |
|
311 | state: 'm' as i8, | |
|
312 | mode: 0o777, | |
|
313 | size: 1000, | |
|
314 | mtime: 791231220, | |
|
315 | }, | |
|
316 | ), | |
|
317 | ( | |
|
318 | b"f3".to_vec(), | |
|
319 | DirstateEntry { | |
|
320 | state: 'r' as i8, | |
|
321 | mode: 0o644, | |
|
322 | size: 234553, | |
|
323 | mtime: 791231220, | |
|
324 | }, | |
|
325 | ), | |
|
326 | ( | |
|
327 | b"f4\xF6".to_vec(), | |
|
328 | DirstateEntry { | |
|
329 | state: 'a' as i8, | |
|
330 | mode: 0o644, | |
|
331 | size: -1, | |
|
332 | mtime: -1, | |
|
333 | }, | |
|
334 | ), | |
|
335 | ]; | |
|
336 | let mut copymap = HashMap::new(); | |
|
337 | copymap.insert(b"f1".to_vec(), b"copyname".to_vec()); | |
|
338 | copymap.insert(b"f4\xF6".to_vec(), b"copyname2".to_vec()); | |
|
339 | let parents = DirstateParents { | |
|
340 | p1: b"12345678910111213141", | |
|
341 | p2: b"00000000000000000000", | |
|
342 | }; | |
|
343 | let now: i32 = 15000000; | |
|
344 | let result = | |
|
345 | pack_dirstate(&dirstate_vec, ©map, parents, now).unwrap(); | |
|
346 | ||
|
347 | assert_eq!( | |
|
348 | (parents, dirstate_vec, copymap), | |
|
349 | parse_dirstate(result.0.as_slice()) | |
|
350 | .and_then(|(p, dvec, cvec)| Ok(( | |
|
351 | p, | |
|
352 | dvec, | |
|
353 | cvec.iter() | |
|
354 | .map(|entry| ( | |
|
355 | entry.path.to_vec(), | |
|
356 | entry.copy_path.to_vec() | |
|
357 | )) | |
|
358 | .collect() | |
|
359 | ))) | |
|
360 | .unwrap() | |
|
361 | ) | |
|
362 | } | |
|
363 | ||
|
364 | #[test] | |
|
365 | /// https://www.mercurial-scm.org/repo/hg/rev/af3f26b6bba4 | |
|
366 | fn test_parse_pack_one_entry_with_copy_and_time_conflict() { | |
|
367 | let dirstate_vec: DirstateVec = vec![( | |
|
368 | b"f1".to_vec(), | |
|
369 | DirstateEntry { | |
|
370 | state: 'n' as i8, | |
|
371 | mode: 0o644, | |
|
372 | size: 0, | |
|
373 | mtime: 15000000, | |
|
374 | }, | |
|
375 | )]; | |
|
376 | let mut copymap = HashMap::new(); | |
|
377 | copymap.insert(b"f1".to_vec(), b"copyname".to_vec()); | |
|
378 | let parents = DirstateParents { | |
|
379 | p1: b"12345678910111213141", | |
|
380 | p2: b"00000000000000000000", | |
|
381 | }; | |
|
382 | let now: i32 = 15000000; | |
|
383 | let result = | |
|
384 | pack_dirstate(&dirstate_vec, ©map, parents, now).unwrap(); | |
|
385 | ||
|
386 | assert_eq!( | |
|
387 | ( | |
|
388 | parents, | |
|
389 | vec![( | |
|
390 | b"f1".to_vec(), | |
|
391 | DirstateEntry { | |
|
392 | state: 'n' as i8, | |
|
393 | mode: 0o644, | |
|
394 | size: 0, | |
|
395 | mtime: -1 | |
|
396 | } | |
|
397 | )], | |
|
398 | copymap | |
|
399 | .iter() | |
|
400 | .map(|(k, v)| CopyVecEntry { | |
|
401 | path: k.as_slice(), | |
|
402 | copy_path: v.as_slice() | |
|
403 | }) | |
|
404 | .collect() | |
|
405 | ), | |
|
406 | parse_dirstate(result.0.as_slice()).unwrap() | |
|
407 | ) | |
|
408 | } | |
|
409 | } |
@@ -1,12 +1,16 b'' | |||
|
1 | 1 | [package] |
|
2 | 2 | name = "hg-core" |
|
3 | 3 | version = "0.1.0" |
|
4 | 4 | authors = ["Georges Racinet <gracinet@anybox.fr>"] |
|
5 | 5 | description = "Mercurial pure Rust core library, with no assumption on Python bindings (FFI)" |
|
6 | 6 | |
|
7 | 7 | [lib] |
|
8 | 8 | name = "hg" |
|
9 | 9 | |
|
10 | 10 | [dev-dependencies] |
|
11 | 11 | rand = "*" |
|
12 | 12 | rand_pcg = "*" |
|
13 | ||
|
14 | [dependencies] | |
|
15 | memchr = "2.2.0" | |
|
16 | byteorder = "1.3.1" No newline at end of file |
@@ -1,42 +1,71 b'' | |||
|
1 | 1 | // Copyright 2018 Georges Racinet <gracinet@anybox.fr> |
|
2 | 2 | // |
|
3 | 3 | // This software may be used and distributed according to the terms of the |
|
4 | 4 | // GNU General Public License version 2 or any later version. |
|
5 | extern crate byteorder; | |
|
6 | extern crate memchr; | |
|
7 | ||
|
5 | 8 | mod ancestors; |
|
6 | 9 | pub mod dagops; |
|
7 | 10 | pub use ancestors::{AncestorsIterator, LazyAncestors, MissingAncestors}; |
|
8 | 11 | pub mod testing; // unconditionally built, for use from integration tests |
|
9 | 12 | pub mod discovery; |
|
10 | 13 | |
|
11 | 14 | /// Mercurial revision numbers |
|
12 | 15 | /// |
|
13 | 16 | /// As noted in revlog.c, revision numbers are actually encoded in |
|
14 | 17 | /// 4 bytes, and are liberally converted to ints, whence the i32 |
|
15 | 18 | pub type Revision = i32; |
|
16 | 19 | |
|
17 | 20 | |
|
18 | 21 | /// Marker expressing the absence of a parent |
|
19 | 22 | /// |
|
20 | 23 | /// Independently of the actual representation, `NULL_REVISION` is guaranteed |
|
21 | 24 | /// to be smaller that all existing revisions. |
|
22 | 25 | pub const NULL_REVISION: Revision = -1; |
|
23 | 26 | |
|
24 | 27 | /// Same as `mercurial.node.wdirrev` |
|
25 | 28 | /// |
|
26 | 29 | /// This is also equal to `i32::max_value()`, but it's better to spell |
|
27 | 30 | /// it out explicitely, same as in `mercurial.node` |
|
28 | 31 | pub const WORKING_DIRECTORY_REVISION: Revision = 0x7fffffff; |
|
29 | 32 | |
|
30 | 33 | /// The simplest expression of what we need of Mercurial DAGs. |
|
31 | 34 | pub trait Graph { |
|
32 | 35 | /// Return the two parents of the given `Revision`. |
|
33 | 36 | /// |
|
34 | 37 | /// Each of the parents can be independently `NULL_REVISION` |
|
35 | 38 | fn parents(&self, Revision) -> Result<[Revision; 2], GraphError>; |
|
36 | 39 | } |
|
37 | 40 | |
|
38 | 41 | #[derive(Clone, Debug, PartialEq)] |
|
39 | 42 | pub enum GraphError { |
|
40 | 43 | ParentOutOfRange(Revision), |
|
41 | 44 | WorkingDirectoryUnsupported, |
|
42 | 45 | } |
|
46 | ||
|
47 | #[derive(Clone, Debug, PartialEq)] | |
|
48 | pub enum DirstateParseError { | |
|
49 | TooLittleData, | |
|
50 | Overflow, | |
|
51 | CorruptedEntry(String), | |
|
52 | } | |
|
53 | ||
|
54 | #[derive(Debug, PartialEq)] | |
|
55 | pub enum DirstatePackError { | |
|
56 | CorruptedEntry(String), | |
|
57 | CorruptedParent, | |
|
58 | BadSize(usize, usize), | |
|
59 | } | |
|
60 | ||
|
61 | impl From<std::io::Error> for DirstatePackError { | |
|
62 | fn from(e: std::io::Error) -> Self { | |
|
63 | DirstatePackError::CorruptedEntry(e.to_string()) | |
|
64 | } | |
|
65 | } | |
|
66 | ||
|
67 | impl From<std::io::Error> for DirstateParseError { | |
|
68 | fn from(e: std::io::Error) -> Self { | |
|
69 | DirstateParseError::CorruptedEntry(e.to_string()) | |
|
70 | } | |
|
71 | } |
General Comments 0
You need to be logged in to leave comments.
Login now