##// END OF EJS Templates
rust: Rewrite dirstate parsing usin the `bytes-cast` crate...
Simon Sapin -
r47336:f88e8ae0 default
parent child Browse files
Show More
@@ -310,7 +310,6 dependencies = [
310 "im-rc 15.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
310 "im-rc 15.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
311 "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
311 "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
312 "log 0.4.11 (registry+https://github.com/rust-lang/crates.io-index)",
312 "log 0.4.11 (registry+https://github.com/rust-lang/crates.io-index)",
313 "memchr 2.3.4 (registry+https://github.com/rust-lang/crates.io-index)",
314 "memmap 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
313 "memmap 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
315 "micro-timer 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
314 "micro-timer 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
316 "pretty_assertions 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)",
315 "pretty_assertions 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -15,7 +15,6 derive_more = "0.99"
15 home = "0.5"
15 home = "0.5"
16 im-rc = "15.0.*"
16 im-rc = "15.0.*"
17 lazy_static = "1.4.0"
17 lazy_static = "1.4.0"
18 memchr = "2.3.3"
19 rand = "0.7.3"
18 rand = "0.7.3"
20 rand_pcg = "0.2.1"
19 rand_pcg = "0.2.1"
21 rand_distr = "0.2.2"
20 rand_distr = "0.2.2"
@@ -7,6 +7,7
7
7
8 use crate::errors::HgError;
8 use crate::errors::HgError;
9 use crate::{utils::hg_path::HgPathBuf, FastHashMap};
9 use crate::{utils::hg_path::HgPathBuf, FastHashMap};
10 use bytes_cast::{unaligned, BytesCast};
10 use std::collections::hash_map;
11 use std::collections::hash_map;
11 use std::convert::TryFrom;
12 use std::convert::TryFrom;
12
13
@@ -17,7 +18,8 pub mod dirstate_tree;
17 pub mod parsers;
18 pub mod parsers;
18 pub mod status;
19 pub mod status;
19
20
20 #[derive(Debug, PartialEq, Clone)]
21 #[derive(Debug, PartialEq, Clone, BytesCast)]
22 #[repr(C)]
21 pub struct DirstateParents {
23 pub struct DirstateParents {
22 pub p1: [u8; 20],
24 pub p1: [u8; 20],
23 pub p2: [u8; 20],
25 pub p2: [u8; 20],
@@ -34,6 +36,16 pub struct DirstateEntry {
34 pub size: i32,
36 pub size: i32,
35 }
37 }
36
38
39 #[derive(BytesCast)]
40 #[repr(C)]
41 struct RawEntry {
42 state: u8,
43 mode: unaligned::I32Be,
44 size: unaligned::I32Be,
45 mtime: unaligned::I32Be,
46 length: unaligned::I32Be,
47 }
48
37 /// A `DirstateEntry` with a size of `-2` means that it was merged from the
49 /// A `DirstateEntry` with a size of `-2` means that it was merged from the
38 /// other parent. This allows revert to pick the right status back during a
50 /// other parent. This allows revert to pick the right status back during a
39 /// merge.
51 /// merge.
@@ -386,10 +386,10 impl DirstateMap {
386 }
386 }
387
387
388 #[timed]
388 #[timed]
389 pub fn read(
389 pub fn read<'a>(
390 &mut self,
390 &mut self,
391 file_contents: &[u8],
391 file_contents: &'a [u8],
392 ) -> Result<Option<DirstateParents>, DirstateError> {
392 ) -> Result<Option<&'a DirstateParents>, DirstateError> {
393 if file_contents.is_empty() {
393 if file_contents.is_empty() {
394 return Ok(None);
394 return Ok(None);
395 }
395 }
@@ -6,13 +6,13
6 use crate::errors::HgError;
6 use crate::errors::HgError;
7 use crate::utils::hg_path::HgPath;
7 use crate::utils::hg_path::HgPath;
8 use crate::{
8 use crate::{
9 dirstate::{CopyMap, EntryState, StateMap},
9 dirstate::{CopyMap, EntryState, RawEntry, StateMap},
10 DirstateEntry, DirstateParents,
10 DirstateEntry, DirstateParents,
11 };
11 };
12 use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
12 use byteorder::{BigEndian, WriteBytesExt};
13 use bytes_cast::BytesCast;
13 use micro_timer::timed;
14 use micro_timer::timed;
14 use std::convert::{TryFrom, TryInto};
15 use std::convert::{TryFrom, TryInto};
15 use std::io::Cursor;
16 use std::time::Duration;
16 use std::time::Duration;
17
17
18 /// Parents are stored in the dirstate as byte hashes.
18 /// Parents are stored in the dirstate as byte hashes.
@@ -21,65 +21,45 pub const PARENT_SIZE: usize = 20;
21 const MIN_ENTRY_SIZE: usize = 17;
21 const MIN_ENTRY_SIZE: usize = 17;
22
22
23 type ParseResult<'a> = (
23 type ParseResult<'a> = (
24 DirstateParents,
24 &'a DirstateParents,
25 Vec<(&'a HgPath, DirstateEntry)>,
25 Vec<(&'a HgPath, DirstateEntry)>,
26 Vec<(&'a HgPath, &'a HgPath)>,
26 Vec<(&'a HgPath, &'a HgPath)>,
27 );
27 );
28
28
29 #[timed]
29 #[timed]
30 pub fn parse_dirstate(contents: &[u8]) -> Result<ParseResult, HgError> {
30 pub fn parse_dirstate(mut contents: &[u8]) -> Result<ParseResult, HgError> {
31 if contents.len() < PARENT_SIZE * 2 {
31 let mut copies = Vec::new();
32 return Err(HgError::corrupted("Too little data for dirstate."));
32 let mut entries = Vec::new();
33 }
34 let mut copies = vec![];
35 let mut entries = vec![];
36
33
37 let mut curr_pos = PARENT_SIZE * 2;
34 let (parents, rest) = DirstateParents::from_bytes(contents)
38 let parents = DirstateParents {
35 .map_err(|_| HgError::corrupted("Too little data for dirstate."))?;
39 p1: contents[..PARENT_SIZE].try_into().unwrap(),
36 contents = rest;
40 p2: contents[PARENT_SIZE..curr_pos].try_into().unwrap(),
37 while !contents.is_empty() {
41 };
38 let (raw_entry, rest) = RawEntry::from_bytes(contents)
39 .map_err(|_| HgError::corrupted("Overflow in dirstate."))?;
42
40
43 while curr_pos < contents.len() {
41 let entry = DirstateEntry {
44 if curr_pos + MIN_ENTRY_SIZE > contents.len() {
42 state: EntryState::try_from(raw_entry.state)?,
45 return Err(HgError::corrupted("Overflow in dirstate."));
43 mode: raw_entry.mode.get(),
46 }
44 mtime: raw_entry.mtime.get(),
47 let entry_bytes = &contents[curr_pos..];
45 size: raw_entry.size.get(),
46 };
47 let (paths, rest) =
48 u8::slice_from_bytes(rest, raw_entry.length.get() as usize)
49 .map_err(|_| HgError::corrupted("Overflow in dirstate."))?;
48
50
49 let mut cursor = Cursor::new(entry_bytes);
51 // `paths` is either a single path, or two paths separated by a NULL
50 // Unwraping errors from `byteorder` as we’ve already checked
52 // byte
51 // `MIN_ENTRY_SIZE` so the input should never be too short.
53 let mut iter = paths.splitn(2, |&byte| byte == b'\0');
52 let state = EntryState::try_from(cursor.read_u8().unwrap())?;
54 let path = HgPath::new(
53 let mode = cursor.read_i32::<BigEndian>().unwrap();
55 iter.next().expect("splitn always yields at least one item"),
54 let size = cursor.read_i32::<BigEndian>().unwrap();
56 );
55 let mtime = cursor.read_i32::<BigEndian>().unwrap();
57 if let Some(copy_source) = iter.next() {
56 let path_len = cursor.read_i32::<BigEndian>().unwrap() as usize;
58 copies.push((path, HgPath::new(copy_source)));
57
58 if path_len > contents.len() - curr_pos {
59 return Err(HgError::corrupted("Overflow in dirstate."));
60 }
59 }
61
60
62 // Slice instead of allocating a Vec needed for `read_exact`
61 entries.push((path, entry));
63 let path = &entry_bytes[MIN_ENTRY_SIZE..MIN_ENTRY_SIZE + (path_len)];
62 contents = rest;
64
65 let (path, copy) = match memchr::memchr(0, path) {
66 None => (path, None),
67 Some(i) => (&path[..i], Some(&path[(i + 1)..])),
68 };
69
70 if let Some(copy_path) = copy {
71 copies.push((HgPath::new(path), HgPath::new(copy_path)));
72 };
73 entries.push((
74 HgPath::new(path),
75 DirstateEntry {
76 state,
77 mode,
78 size,
79 mtime,
80 },
81 ));
82 curr_pos = curr_pos + MIN_ENTRY_SIZE + (path_len);
83 }
63 }
84 Ok((parents, entries, copies))
64 Ok((parents, entries, copies))
85 }
65 }
@@ -374,7 +354,7 mod tests {
374 .collect();
354 .collect();
375
355
376 assert_eq!(
356 assert_eq!(
377 (parents, state_map, copymap),
357 (&parents, state_map, copymap),
378 (new_parents, new_state_map, new_copy_map)
358 (new_parents, new_state_map, new_copy_map)
379 )
359 )
380 }
360 }
@@ -452,7 +432,7 mod tests {
452 .collect();
432 .collect();
453
433
454 assert_eq!(
434 assert_eq!(
455 (parents, state_map, copymap),
435 (&parents, state_map, copymap),
456 (new_parents, new_state_map, new_copy_map)
436 (new_parents, new_state_map, new_copy_map)
457 )
437 )
458 }
438 }
@@ -499,7 +479,7 mod tests {
499
479
500 assert_eq!(
480 assert_eq!(
501 (
481 (
502 parents,
482 &parents,
503 [(
483 [(
504 HgPathBuf::from_bytes(b"f1"),
484 HgPathBuf::from_bytes(b"f1"),
505 DirstateEntry {
485 DirstateEntry {
General Comments 0
You need to be logged in to leave comments. Login now