##// END OF EJS Templates
rust-dirstate: add rust implementation of `parse_dirstate` and `pack_dirstate`...
Raphaël Gomès -
r42873:d1786c1d default
parent child Browse files
Show More
@@ -0,0 +1,409 b''
1 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
2 //
3 // This software may be used and distributed according to the terms of the
4 // GNU General Public License version 2 or any later version.
5
6 use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
7 use std::collections::HashMap;
8 use std::io::Cursor;
9 use {DirstatePackError, DirstateParseError};
10
11 #[derive(Debug, PartialEq, Copy, Clone)]
12 pub struct DirstateParents<'a> {
13 pub p1: &'a [u8],
14 pub p2: &'a [u8],
15 }
16 /// The C implementation uses all signed types. This will be an issue
17 /// either when 4GB+ source files are commonplace or in 2038, whichever
18 /// comes first.
19 #[derive(Debug, PartialEq)]
20 pub struct DirstateEntry {
21 pub state: i8,
22 pub mode: i32,
23 pub mtime: i32,
24 pub size: i32,
25 }
26 pub type DirstateVec = Vec<(Vec<u8>, DirstateEntry)>;
27
28 #[derive(Debug, PartialEq)]
29 pub struct CopyVecEntry<'a> {
30 pub path: &'a [u8],
31 pub copy_path: &'a [u8],
32 }
33 pub type CopyVec<'a> = Vec<CopyVecEntry<'a>>;
34
35 /// Parents are stored in the dirstate as byte hashes.
36 const PARENT_SIZE: usize = 20;
37 /// Dirstate entries have a static part of 8 + 32 + 32 + 32 + 32 bits.
38 const MIN_ENTRY_SIZE: usize = 17;
39
40 pub fn parse_dirstate(
41 contents: &[u8],
42 ) -> Result<(DirstateParents, DirstateVec, CopyVec), DirstateParseError> {
43 if contents.len() < PARENT_SIZE * 2 {
44 return Err(DirstateParseError::TooLittleData);
45 }
46
47 let mut dirstate_vec = vec![];
48 let mut copies = vec![];
49 let mut curr_pos = PARENT_SIZE * 2;
50 let parents = DirstateParents {
51 p1: &contents[..PARENT_SIZE],
52 p2: &contents[PARENT_SIZE..curr_pos],
53 };
54
55 while curr_pos < contents.len() {
56 if curr_pos + MIN_ENTRY_SIZE > contents.len() {
57 return Err(DirstateParseError::Overflow);
58 }
59 let entry_bytes = &contents[curr_pos..];
60
61 let mut cursor = Cursor::new(entry_bytes);
62 let state = cursor.read_i8()?;
63 let mode = cursor.read_i32::<BigEndian>()?;
64 let size = cursor.read_i32::<BigEndian>()?;
65 let mtime = cursor.read_i32::<BigEndian>()?;
66 let path_len = cursor.read_i32::<BigEndian>()? as usize;
67
68 if path_len > contents.len() - curr_pos {
69 return Err(DirstateParseError::Overflow);
70 }
71
72 // Slice instead of allocating a Vec needed for `read_exact`
73 let path = &entry_bytes[MIN_ENTRY_SIZE..MIN_ENTRY_SIZE + (path_len)];
74
75 let (path, copy) = match memchr::memchr(0, path) {
76 None => (path, None),
77 Some(i) => (&path[..i], Some(&path[(i + 1)..])),
78 };
79
80 if let Some(copy_path) = copy {
81 copies.push(CopyVecEntry { path, copy_path });
82 };
83 dirstate_vec.push((
84 path.to_owned(),
85 DirstateEntry {
86 state,
87 mode,
88 size,
89 mtime,
90 },
91 ));
92 curr_pos = curr_pos + MIN_ENTRY_SIZE + (path_len);
93 }
94
95 Ok((parents, dirstate_vec, copies))
96 }
97
98 pub fn pack_dirstate(
99 dirstate_vec: &DirstateVec,
100 copymap: &HashMap<Vec<u8>, Vec<u8>>,
101 parents: DirstateParents,
102 now: i32,
103 ) -> Result<(Vec<u8>, DirstateVec), DirstatePackError> {
104 if parents.p1.len() != PARENT_SIZE || parents.p2.len() != PARENT_SIZE {
105 return Err(DirstatePackError::CorruptedParent);
106 }
107
108 let expected_size: usize = dirstate_vec
109 .iter()
110 .map(|(ref filename, _)| {
111 let mut length = MIN_ENTRY_SIZE + filename.len();
112 if let Some(ref copy) = copymap.get(filename) {
113 length += copy.len() + 1;
114 }
115 length
116 })
117 .sum();
118 let expected_size = expected_size + PARENT_SIZE * 2;
119
120 let mut packed = Vec::with_capacity(expected_size);
121 let mut new_dirstate_vec = vec![];
122
123 packed.extend(parents.p1);
124 packed.extend(parents.p2);
125
126 for (ref filename, entry) in dirstate_vec {
127 let mut new_filename: Vec<u8> = filename.to_owned();
128 let mut new_mtime: i32 = entry.mtime;
129 if entry.state == 'n' as i8 && entry.mtime == now.into() {
130 // The file was last modified "simultaneously" with the current
131 // write to dirstate (i.e. within the same second for file-
132 // systems with a granularity of 1 sec). This commonly happens
133 // for at least a couple of files on 'update'.
134 // The user could change the file without changing its size
135 // within the same second. Invalidate the file's mtime in
136 // dirstate, forcing future 'status' calls to compare the
137 // contents of the file if the size is the same. This prevents
138 // mistakenly treating such files as clean.
139 new_mtime = -1;
140 new_dirstate_vec.push((
141 filename.to_owned(),
142 DirstateEntry {
143 mtime: new_mtime,
144 ..*entry
145 },
146 ));
147 }
148
149 if let Some(copy) = copymap.get(filename) {
150 new_filename.push('\0' as u8);
151 new_filename.extend(copy);
152 }
153
154 packed.write_i8(entry.state)?;
155 packed.write_i32::<BigEndian>(entry.mode)?;
156 packed.write_i32::<BigEndian>(entry.size)?;
157 packed.write_i32::<BigEndian>(new_mtime)?;
158 packed.write_i32::<BigEndian>(new_filename.len() as i32)?;
159 packed.extend(new_filename)
160 }
161
162 if packed.len() != expected_size {
163 return Err(DirstatePackError::BadSize(expected_size, packed.len()));
164 }
165
166 Ok((packed, new_dirstate_vec))
167 }
168
169 #[cfg(test)]
170 mod tests {
171 use super::*;
172
173 #[test]
174 fn test_pack_dirstate_empty() {
175 let dirstate_vec: DirstateVec = vec![];
176 let copymap = HashMap::new();
177 let parents = DirstateParents {
178 p1: b"12345678910111213141",
179 p2: b"00000000000000000000",
180 };
181 let now: i32 = 15000000;
182 let expected =
183 (b"1234567891011121314100000000000000000000".to_vec(), vec![]);
184
185 assert_eq!(
186 expected,
187 pack_dirstate(&dirstate_vec, &copymap, parents, now).unwrap()
188 );
189 }
190 #[test]
191 fn test_pack_dirstate_one_entry() {
192 let dirstate_vec: DirstateVec = vec![(
193 vec!['f' as u8, '1' as u8],
194 DirstateEntry {
195 state: 'n' as i8,
196 mode: 0o644,
197 size: 0,
198 mtime: 791231220,
199 },
200 )];
201 let copymap = HashMap::new();
202 let parents = DirstateParents {
203 p1: b"12345678910111213141",
204 p2: b"00000000000000000000",
205 };
206 let now: i32 = 15000000;
207 let expected = (
208 [
209 49, 50, 51, 52, 53, 54, 55, 56, 57, 49, 48, 49, 49, 49, 50,
210 49, 51, 49, 52, 49, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
211 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 110, 0, 0, 1, 164, 0,
212 0, 0, 0, 47, 41, 58, 244, 0, 0, 0, 2, 102, 49,
213 ]
214 .to_vec(),
215 vec![],
216 );
217
218 assert_eq!(
219 expected,
220 pack_dirstate(&dirstate_vec, &copymap, parents, now).unwrap()
221 );
222 }
223 #[test]
224 fn test_pack_dirstate_one_entry_with_copy() {
225 let dirstate_vec: DirstateVec = vec![(
226 b"f1".to_vec(),
227 DirstateEntry {
228 state: 'n' as i8,
229 mode: 0o644,
230 size: 0,
231 mtime: 791231220,
232 },
233 )];
234 let mut copymap = HashMap::new();
235 copymap.insert(b"f1".to_vec(), b"copyname".to_vec());
236 let parents = DirstateParents {
237 p1: b"12345678910111213141",
238 p2: b"00000000000000000000",
239 };
240 let now: i32 = 15000000;
241 let expected = (
242 [
243 49, 50, 51, 52, 53, 54, 55, 56, 57, 49, 48, 49, 49, 49, 50,
244 49, 51, 49, 52, 49, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
245 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 110, 0, 0, 1, 164, 0,
246 0, 0, 0, 47, 41, 58, 244, 0, 0, 0, 11, 102, 49, 0, 99, 111,
247 112, 121, 110, 97, 109, 101,
248 ]
249 .to_vec(),
250 vec![],
251 );
252
253 assert_eq!(
254 expected,
255 pack_dirstate(&dirstate_vec, &copymap, parents, now).unwrap()
256 );
257 }
258
259 #[test]
260 fn test_parse_pack_one_entry_with_copy() {
261 let dirstate_vec: DirstateVec = vec![(
262 b"f1".to_vec(),
263 DirstateEntry {
264 state: 'n' as i8,
265 mode: 0o644,
266 size: 0,
267 mtime: 791231220,
268 },
269 )];
270 let mut copymap = HashMap::new();
271 copymap.insert(b"f1".to_vec(), b"copyname".to_vec());
272 let parents = DirstateParents {
273 p1: b"12345678910111213141",
274 p2: b"00000000000000000000",
275 };
276 let now: i32 = 15000000;
277 let result =
278 pack_dirstate(&dirstate_vec, &copymap, parents, now).unwrap();
279
280 assert_eq!(
281 (
282 parents,
283 dirstate_vec,
284 copymap
285 .iter()
286 .map(|(k, v)| CopyVecEntry {
287 path: k.as_slice(),
288 copy_path: v.as_slice()
289 })
290 .collect()
291 ),
292 parse_dirstate(result.0.as_slice()).unwrap()
293 )
294 }
295
296 #[test]
297 fn test_parse_pack_multiple_entries_with_copy() {
298 let dirstate_vec: DirstateVec = vec![
299 (
300 b"f1".to_vec(),
301 DirstateEntry {
302 state: 'n' as i8,
303 mode: 0o644,
304 size: 0,
305 mtime: 791231220,
306 },
307 ),
308 (
309 b"f2".to_vec(),
310 DirstateEntry {
311 state: 'm' as i8,
312 mode: 0o777,
313 size: 1000,
314 mtime: 791231220,
315 },
316 ),
317 (
318 b"f3".to_vec(),
319 DirstateEntry {
320 state: 'r' as i8,
321 mode: 0o644,
322 size: 234553,
323 mtime: 791231220,
324 },
325 ),
326 (
327 b"f4\xF6".to_vec(),
328 DirstateEntry {
329 state: 'a' as i8,
330 mode: 0o644,
331 size: -1,
332 mtime: -1,
333 },
334 ),
335 ];
336 let mut copymap = HashMap::new();
337 copymap.insert(b"f1".to_vec(), b"copyname".to_vec());
338 copymap.insert(b"f4\xF6".to_vec(), b"copyname2".to_vec());
339 let parents = DirstateParents {
340 p1: b"12345678910111213141",
341 p2: b"00000000000000000000",
342 };
343 let now: i32 = 15000000;
344 let result =
345 pack_dirstate(&dirstate_vec, &copymap, parents, now).unwrap();
346
347 assert_eq!(
348 (parents, dirstate_vec, copymap),
349 parse_dirstate(result.0.as_slice())
350 .and_then(|(p, dvec, cvec)| Ok((
351 p,
352 dvec,
353 cvec.iter()
354 .map(|entry| (
355 entry.path.to_vec(),
356 entry.copy_path.to_vec()
357 ))
358 .collect()
359 )))
360 .unwrap()
361 )
362 }
363
364 #[test]
365 /// https://www.mercurial-scm.org/repo/hg/rev/af3f26b6bba4
366 fn test_parse_pack_one_entry_with_copy_and_time_conflict() {
367 let dirstate_vec: DirstateVec = vec![(
368 b"f1".to_vec(),
369 DirstateEntry {
370 state: 'n' as i8,
371 mode: 0o644,
372 size: 0,
373 mtime: 15000000,
374 },
375 )];
376 let mut copymap = HashMap::new();
377 copymap.insert(b"f1".to_vec(), b"copyname".to_vec());
378 let parents = DirstateParents {
379 p1: b"12345678910111213141",
380 p2: b"00000000000000000000",
381 };
382 let now: i32 = 15000000;
383 let result =
384 pack_dirstate(&dirstate_vec, &copymap, parents, now).unwrap();
385
386 assert_eq!(
387 (
388 parents,
389 vec![(
390 b"f1".to_vec(),
391 DirstateEntry {
392 state: 'n' as i8,
393 mode: 0o644,
394 size: 0,
395 mtime: -1
396 }
397 )],
398 copymap
399 .iter()
400 .map(|(k, v)| CopyVecEntry {
401 path: k.as_slice(),
402 copy_path: v.as_slice()
403 })
404 .collect()
405 ),
406 parse_dirstate(result.0.as_slice()).unwrap()
407 )
408 }
409 }
@@ -1,12 +1,16 b''
1 [package]
1 [package]
2 name = "hg-core"
2 name = "hg-core"
3 version = "0.1.0"
3 version = "0.1.0"
4 authors = ["Georges Racinet <gracinet@anybox.fr>"]
4 authors = ["Georges Racinet <gracinet@anybox.fr>"]
5 description = "Mercurial pure Rust core library, with no assumption on Python bindings (FFI)"
5 description = "Mercurial pure Rust core library, with no assumption on Python bindings (FFI)"
6
6
7 [lib]
7 [lib]
8 name = "hg"
8 name = "hg"
9
9
10 [dev-dependencies]
10 [dev-dependencies]
11 rand = "*"
11 rand = "*"
12 rand_pcg = "*"
12 rand_pcg = "*"
13
14 [dependencies]
15 memchr = "2.2.0"
16 byteorder = "1.3.1" No newline at end of file
@@ -1,42 +1,71 b''
1 // Copyright 2018 Georges Racinet <gracinet@anybox.fr>
1 // Copyright 2018 Georges Racinet <gracinet@anybox.fr>
2 //
2 //
3 // This software may be used and distributed according to the terms of the
3 // This software may be used and distributed according to the terms of the
4 // GNU General Public License version 2 or any later version.
4 // GNU General Public License version 2 or any later version.
5 extern crate byteorder;
6 extern crate memchr;
7
5 mod ancestors;
8 mod ancestors;
6 pub mod dagops;
9 pub mod dagops;
7 pub use ancestors::{AncestorsIterator, LazyAncestors, MissingAncestors};
10 pub use ancestors::{AncestorsIterator, LazyAncestors, MissingAncestors};
8 pub mod testing; // unconditionally built, for use from integration tests
11 pub mod testing; // unconditionally built, for use from integration tests
9 pub mod discovery;
12 pub mod discovery;
10
13
11 /// Mercurial revision numbers
14 /// Mercurial revision numbers
12 ///
15 ///
13 /// As noted in revlog.c, revision numbers are actually encoded in
16 /// As noted in revlog.c, revision numbers are actually encoded in
14 /// 4 bytes, and are liberally converted to ints, whence the i32
17 /// 4 bytes, and are liberally converted to ints, whence the i32
15 pub type Revision = i32;
18 pub type Revision = i32;
16
19
17
20
18 /// Marker expressing the absence of a parent
21 /// Marker expressing the absence of a parent
19 ///
22 ///
20 /// Independently of the actual representation, `NULL_REVISION` is guaranteed
23 /// Independently of the actual representation, `NULL_REVISION` is guaranteed
21 /// to be smaller that all existing revisions.
24 /// to be smaller that all existing revisions.
22 pub const NULL_REVISION: Revision = -1;
25 pub const NULL_REVISION: Revision = -1;
23
26
24 /// Same as `mercurial.node.wdirrev`
27 /// Same as `mercurial.node.wdirrev`
25 ///
28 ///
26 /// This is also equal to `i32::max_value()`, but it's better to spell
29 /// This is also equal to `i32::max_value()`, but it's better to spell
27 /// it out explicitely, same as in `mercurial.node`
30 /// it out explicitely, same as in `mercurial.node`
28 pub const WORKING_DIRECTORY_REVISION: Revision = 0x7fffffff;
31 pub const WORKING_DIRECTORY_REVISION: Revision = 0x7fffffff;
29
32
30 /// The simplest expression of what we need of Mercurial DAGs.
33 /// The simplest expression of what we need of Mercurial DAGs.
31 pub trait Graph {
34 pub trait Graph {
32 /// Return the two parents of the given `Revision`.
35 /// Return the two parents of the given `Revision`.
33 ///
36 ///
34 /// Each of the parents can be independently `NULL_REVISION`
37 /// Each of the parents can be independently `NULL_REVISION`
35 fn parents(&self, Revision) -> Result<[Revision; 2], GraphError>;
38 fn parents(&self, Revision) -> Result<[Revision; 2], GraphError>;
36 }
39 }
37
40
38 #[derive(Clone, Debug, PartialEq)]
41 #[derive(Clone, Debug, PartialEq)]
39 pub enum GraphError {
42 pub enum GraphError {
40 ParentOutOfRange(Revision),
43 ParentOutOfRange(Revision),
41 WorkingDirectoryUnsupported,
44 WorkingDirectoryUnsupported,
42 }
45 }
46
47 #[derive(Clone, Debug, PartialEq)]
48 pub enum DirstateParseError {
49 TooLittleData,
50 Overflow,
51 CorruptedEntry(String),
52 }
53
54 #[derive(Debug, PartialEq)]
55 pub enum DirstatePackError {
56 CorruptedEntry(String),
57 CorruptedParent,
58 BadSize(usize, usize),
59 }
60
61 impl From<std::io::Error> for DirstatePackError {
62 fn from(e: std::io::Error) -> Self {
63 DirstatePackError::CorruptedEntry(e.to_string())
64 }
65 }
66
67 impl From<std::io::Error> for DirstateParseError {
68 fn from(e: std::io::Error) -> Self {
69 DirstateParseError::CorruptedEntry(e.to_string())
70 }
71 }
General Comments 0
You need to be logged in to leave comments. Login now