##// END OF EJS Templates
rust: use the bytes-cast crate to parse persistent nodemaps...
Simon Sapin -
r47107:1eb72345 default draft
parent child Browse files
Show More
@@ -55,6 +55,24 b' version = "1.3.4"'
55 source = "registry+https://github.com/rust-lang/crates.io-index"
55 source = "registry+https://github.com/rust-lang/crates.io-index"
56
56
57 [[package]]
57 [[package]]
58 name = "bytes-cast"
59 version = "0.1.0"
60 source = "registry+https://github.com/rust-lang/crates.io-index"
61 dependencies = [
62 "bytes-cast-derive 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
63 ]
64
65 [[package]]
66 name = "bytes-cast-derive"
67 version = "0.1.0"
68 source = "registry+https://github.com/rust-lang/crates.io-index"
69 dependencies = [
70 "proc-macro2 1.0.24 (registry+https://github.com/rust-lang/crates.io-index)",
71 "quote 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)",
72 "syn 1.0.54 (registry+https://github.com/rust-lang/crates.io-index)",
73 ]
74
75 [[package]]
58 name = "cc"
76 name = "cc"
59 version = "1.0.66"
77 version = "1.0.66"
60 source = "registry+https://github.com/rust-lang/crates.io-index"
78 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -277,6 +295,7 b' name = "hg-core"'
277 version = "0.1.0"
295 version = "0.1.0"
278 dependencies = [
296 dependencies = [
279 "byteorder 1.3.4 (registry+https://github.com/rust-lang/crates.io-index)",
297 "byteorder 1.3.4 (registry+https://github.com/rust-lang/crates.io-index)",
298 "bytes-cast 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
280 "clap 2.33.3 (registry+https://github.com/rust-lang/crates.io-index)",
299 "clap 2.33.3 (registry+https://github.com/rust-lang/crates.io-index)",
281 "crossbeam-channel 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)",
300 "crossbeam-channel 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)",
282 "flate2 1.0.19 (registry+https://github.com/rust-lang/crates.io-index)",
301 "flate2 1.0.19 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -910,6 +929,8 b' dependencies = ['
910 "checksum bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693"
929 "checksum bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693"
911 "checksum bitmaps 2.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "031043d04099746d8db04daf1fa424b2bc8bd69d92b25962dcde24da39ab64a2"
930 "checksum bitmaps 2.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "031043d04099746d8db04daf1fa424b2bc8bd69d92b25962dcde24da39ab64a2"
912 "checksum byteorder 1.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de"
931 "checksum byteorder 1.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de"
932 "checksum bytes-cast 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3196ba300c7bc9282a4331e878496cb3e9603a898a8f1446601317163e16ca52"
933 "checksum bytes-cast-derive 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "cb936af9de38476664d6b58e529aff30d482e4ce1c5e150293d00730b0d81fdb"
913 "checksum cc 1.0.66 (registry+https://github.com/rust-lang/crates.io-index)" = "4c0496836a84f8d0495758516b8621a622beb77c0fed418570e50764093ced48"
934 "checksum cc 1.0.66 (registry+https://github.com/rust-lang/crates.io-index)" = "4c0496836a84f8d0495758516b8621a622beb77c0fed418570e50764093ced48"
914 "checksum cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822"
935 "checksum cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822"
915 "checksum cfg-if 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
936 "checksum cfg-if 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
@@ -9,6 +9,7 b' edition = "2018"'
9 name = "hg"
9 name = "hg"
10
10
11 [dependencies]
11 [dependencies]
12 bytes-cast = "0.1"
12 byteorder = "1.3.4"
13 byteorder = "1.3.4"
13 hex = "0.4.2"
14 hex = "0.4.2"
14 im-rc = "15.0.*"
15 im-rc = "15.0.*"
@@ -17,12 +17,12 b' use super::{'
17 RevlogIndex, NULL_REVISION,
17 RevlogIndex, NULL_REVISION,
18 };
18 };
19
19
20 use bytes_cast::{unaligned, BytesCast};
20 use std::cmp::max;
21 use std::cmp::max;
21 use std::fmt;
22 use std::fmt;
22 use std::mem;
23 use std::mem::{self, align_of, size_of};
23 use std::ops::Deref;
24 use std::ops::Deref;
24 use std::ops::Index;
25 use std::ops::Index;
25 use std::slice;
26
26
27 #[derive(Debug, PartialEq)]
27 #[derive(Debug, PartialEq)]
28 pub enum NodeMapError {
28 pub enum NodeMapError {
@@ -149,7 +149,7 b' pub trait MutableNodeMap: NodeMap {'
149 /// Low level NodeTree [`Blocks`] elements
149 /// Low level NodeTree [`Blocks`] elements
150 ///
150 ///
151 /// These are exactly as for instance on persistent storage.
151 /// These are exactly as for instance on persistent storage.
152 type RawElement = i32;
152 type RawElement = unaligned::I32Be;
153
153
154 /// High level representation of values in NodeTree
154 /// High level representation of values in NodeTree
155 /// [`Blocks`](struct.Block.html)
155 /// [`Blocks`](struct.Block.html)
@@ -168,23 +168,24 b' impl From<RawElement> for Element {'
168 ///
168 ///
169 /// See [`Block`](struct.Block.html) for explanation about the encoding.
169 /// See [`Block`](struct.Block.html) for explanation about the encoding.
170 fn from(raw: RawElement) -> Element {
170 fn from(raw: RawElement) -> Element {
171 if raw >= 0 {
171 let int = raw.get();
172 Element::Block(raw as usize)
172 if int >= 0 {
173 } else if raw == -1 {
173 Element::Block(int as usize)
174 } else if int == -1 {
174 Element::None
175 Element::None
175 } else {
176 } else {
176 Element::Rev(-raw - 2)
177 Element::Rev(-int - 2)
177 }
178 }
178 }
179 }
179 }
180 }
180
181
181 impl From<Element> for RawElement {
182 impl From<Element> for RawElement {
182 fn from(element: Element) -> RawElement {
183 fn from(element: Element) -> RawElement {
183 match element {
184 RawElement::from(match element {
184 Element::None => 0,
185 Element::None => 0,
185 Element::Block(i) => i as RawElement,
186 Element::Block(i) => i as i32,
186 Element::Rev(rev) => -rev - 2,
187 Element::Rev(rev) => -rev - 2,
187 }
188 })
188 }
189 }
189 }
190 }
190
191
@@ -212,42 +213,24 b' impl From<Element> for RawElement {'
212 /// represented at all, because we want an immutable empty nodetree
213 /// represented at all, because we want an immutable empty nodetree
213 /// to be valid.
214 /// to be valid.
214
215
215 #[derive(Copy, Clone)]
216 const ELEMENTS_PER_BLOCK: usize = 16; // number of different values in a nybble
216 pub struct Block([u8; BLOCK_SIZE]);
217
217
218 /// Not derivable for arrays of length >32 until const generics are stable
218 #[derive(Copy, Clone, BytesCast, PartialEq)]
219 impl PartialEq for Block {
219 #[repr(transparent)]
220 fn eq(&self, other: &Self) -> bool {
220 pub struct Block([RawElement; ELEMENTS_PER_BLOCK]);
221 self.0[..] == other.0[..]
222 }
223 }
224
225 pub const BLOCK_SIZE: usize = 64;
226
221
227 impl Block {
222 impl Block {
228 fn new() -> Self {
223 fn new() -> Self {
229 // -1 in 2's complement to create an absent node
224 let absent_node = RawElement::from(-1);
230 let byte: u8 = 255;
225 Block([absent_node; ELEMENTS_PER_BLOCK])
231 Block([byte; BLOCK_SIZE])
232 }
226 }
233
227
234 fn get(&self, nybble: u8) -> Element {
228 fn get(&self, nybble: u8) -> Element {
235 let index = nybble as usize * mem::size_of::<RawElement>();
229 self.0[nybble as usize].into()
236 Element::from(RawElement::from_be_bytes([
237 self.0[index],
238 self.0[index + 1],
239 self.0[index + 2],
240 self.0[index + 3],
241 ]))
242 }
230 }
243
231
244 fn set(&mut self, nybble: u8, element: Element) {
232 fn set(&mut self, nybble: u8, element: Element) {
245 let values = RawElement::to_be_bytes(element.into());
233 self.0[nybble as usize] = element.into()
246 let index = nybble as usize * mem::size_of::<RawElement>();
247 self.0[index] = values[0];
248 self.0[index + 1] = values[1];
249 self.0[index + 2] = values[2];
250 self.0[index + 3] = values[3];
251 }
234 }
252 }
235 }
253
236
@@ -398,16 +381,17 b' impl NodeTree {'
398 // Transmute the `Vec<Block>` to a `Vec<u8>`. Blocks are contiguous
381 // Transmute the `Vec<Block>` to a `Vec<u8>`. Blocks are contiguous
399 // bytes, so this is perfectly safe.
382 // bytes, so this is perfectly safe.
400 let bytes = unsafe {
383 let bytes = unsafe {
401 // Assert that `Block` hasn't been changed and has no padding
384 // Check for compatible allocation layout.
402 let _: [u8; 4 * BLOCK_SIZE] =
385 // (Optimized away by constant-folding + dead code elimination.)
403 std::mem::transmute([Block::new(); 4]);
386 assert_eq!(size_of::<Block>(), 64);
387 assert_eq!(align_of::<Block>(), 1);
404
388
405 // /!\ Any use of `vec` after this is use-after-free.
389 // /!\ Any use of `vec` after this is use-after-free.
406 // TODO: use `into_raw_parts` once stabilized
390 // TODO: use `into_raw_parts` once stabilized
407 Vec::from_raw_parts(
391 Vec::from_raw_parts(
408 vec.as_ptr() as *mut u8,
392 vec.as_ptr() as *mut u8,
409 vec.len() * BLOCK_SIZE,
393 vec.len() * size_of::<Block>(),
410 vec.capacity() * BLOCK_SIZE,
394 vec.capacity() * size_of::<Block>(),
411 )
395 )
412 };
396 };
413 (readonly, bytes)
397 (readonly, bytes)
@@ -613,7 +597,7 b' impl NodeTreeBytes {'
613 amount: usize,
597 amount: usize,
614 ) -> Self {
598 ) -> Self {
615 assert!(buffer.len() >= amount);
599 assert!(buffer.len() >= amount);
616 let len_in_blocks = amount / BLOCK_SIZE;
600 let len_in_blocks = amount / size_of::<Block>();
617 NodeTreeBytes {
601 NodeTreeBytes {
618 buffer,
602 buffer,
619 len_in_blocks,
603 len_in_blocks,
@@ -625,12 +609,11 b' impl Deref for NodeTreeBytes {'
625 type Target = [Block];
609 type Target = [Block];
626
610
627 fn deref(&self) -> &[Block] {
611 fn deref(&self) -> &[Block] {
628 unsafe {
612 Block::slice_from_bytes(&self.buffer, self.len_in_blocks)
629 slice::from_raw_parts(
613 // `NodeTreeBytes::new` already asserted that `self.buffer` is
630 (&self.buffer).as_ptr() as *const Block,
614 // large enough.
631 self.len_in_blocks,
615 .unwrap()
632 )
616 .0
633 }
634 }
617 }
635 }
618 }
636
619
@@ -774,13 +757,13 b' mod tests {'
774 let mut raw = [255u8; 64];
757 let mut raw = [255u8; 64];
775
758
776 let mut counter = 0;
759 let mut counter = 0;
777 for val in [0, 15, -2, -1, -3].iter() {
760 for val in [0_i32, 15, -2, -1, -3].iter() {
778 for byte in RawElement::to_be_bytes(*val).iter() {
761 for byte in val.to_be_bytes().iter() {
779 raw[counter] = *byte;
762 raw[counter] = *byte;
780 counter += 1;
763 counter += 1;
781 }
764 }
782 }
765 }
783 let block = Block(raw);
766 let (block, _) = Block::from_bytes(&raw).unwrap();
784 assert_eq!(block.get(0), Element::Block(0));
767 assert_eq!(block.get(0), Element::Block(0));
785 assert_eq!(block.get(1), Element::Block(15));
768 assert_eq!(block.get(1), Element::Block(15));
786 assert_eq!(block.get(3), Element::None);
769 assert_eq!(block.get(3), Element::None);
@@ -1108,7 +1091,7 b' mod tests {'
1108 let (_, bytes) = idx.nt.into_readonly_and_added_bytes();
1091 let (_, bytes) = idx.nt.into_readonly_and_added_bytes();
1109
1092
1110 // only the root block has been changed
1093 // only the root block has been changed
1111 assert_eq!(bytes.len(), BLOCK_SIZE);
1094 assert_eq!(bytes.len(), size_of::<Block>());
1112 // big endian for -2
1095 // big endian for -2
1113 assert_eq!(&bytes[4..2 * 4], [255, 255, 255, 254]);
1096 assert_eq!(&bytes[4..2 * 4], [255, 255, 255, 254]);
1114 // big endian for -6
1097 // big endian for -6
@@ -1,5 +1,5 b''
1 use bytes_cast::{unaligned, BytesCast};
1 use memmap::Mmap;
2 use memmap::Mmap;
2 use std::convert::TryInto;
3 use std::path::{Path, PathBuf};
3 use std::path::{Path, PathBuf};
4
4
5 use super::revlog::RevlogError;
5 use super::revlog::RevlogError;
@@ -13,6 +13,16 b' pub(super) struct NodeMapDocket {'
13 // TODO: keep here more of the data from `parse()` when we need it
13 // TODO: keep here more of the data from `parse()` when we need it
14 }
14 }
15
15
16 #[derive(BytesCast)]
17 #[repr(C)]
18 struct DocketHeader {
19 uid_size: u8,
20 _tip_rev: unaligned::U64Be,
21 data_length: unaligned::U64Be,
22 _data_unused: unaligned::U64Be,
23 tip_node_size: unaligned::U64Be,
24 }
25
16 impl NodeMapDocket {
26 impl NodeMapDocket {
17 /// Return `Ok(None)` when the caller should proceed without a persistent
27 /// Return `Ok(None)` when the caller should proceed without a persistent
18 /// nodemap:
28 /// nodemap:
@@ -36,25 +46,22 b' impl NodeMapDocket {'
36 Ok(bytes) => bytes,
46 Ok(bytes) => bytes,
37 };
47 };
38
48
39 let mut input = if let Some((&ONDISK_VERSION, rest)) =
49 let input = if let Some((&ONDISK_VERSION, rest)) =
40 docket_bytes.split_first()
50 docket_bytes.split_first()
41 {
51 {
42 rest
52 rest
43 } else {
53 } else {
44 return Ok(None);
54 return Ok(None);
45 };
55 };
46 let input = &mut input;
47
56
48 let uid_size = read_u8(input)? as usize;
57 let (header, rest) = DocketHeader::from_bytes(input)?;
49 let _tip_rev = read_be_u64(input)?;
58 let uid_size = header.uid_size as usize;
50 // TODO: do we care about overflow for 4 GB+ nodemap files on 32-bit
59 // TODO: do we care about overflow for 4 GB+ nodemap files on 32-bit
51 // systems?
60 // systems?
52 let data_length = read_be_u64(input)? as usize;
61 let tip_node_size = header.tip_node_size.get() as usize;
53 let _data_unused = read_be_u64(input)?;
62 let data_length = header.data_length.get() as usize;
54 let tip_node_size = read_be_u64(input)? as usize;
63 let (uid, rest) = u8::slice_from_bytes(rest, uid_size)?;
55 let uid = read_bytes(input, uid_size)?;
64 let (_tip_node, _rest) = u8::slice_from_bytes(rest, tip_node_size)?;
56 let _tip_node = read_bytes(input, tip_node_size)?;
57
58 let uid =
65 let uid =
59 std::str::from_utf8(uid).map_err(|_| RevlogError::Corrupted)?;
66 std::str::from_utf8(uid).map_err(|_| RevlogError::Corrupted)?;
60 let docket = NodeMapDocket { data_length };
67 let docket = NodeMapDocket { data_length };
@@ -81,29 +88,6 b' impl NodeMapDocket {'
81 }
88 }
82 }
89 }
83
90
84 fn read_bytes<'a>(
85 input: &mut &'a [u8],
86 count: usize,
87 ) -> Result<&'a [u8], RevlogError> {
88 if let Some(start) = input.get(..count) {
89 *input = &input[count..];
90 Ok(start)
91 } else {
92 Err(RevlogError::Corrupted)
93 }
94 }
95
96 fn read_u8<'a>(input: &mut &[u8]) -> Result<u8, RevlogError> {
97 Ok(read_bytes(input, 1)?[0])
98 }
99
100 fn read_be_u64<'a>(input: &mut &[u8]) -> Result<u64, RevlogError> {
101 let array = read_bytes(input, std::mem::size_of::<u64>())?
102 .try_into()
103 .unwrap();
104 Ok(u64::from_be_bytes(array))
105 }
106
107 fn rawdata_path(docket_path: &Path, uid: &str) -> PathBuf {
91 fn rawdata_path(docket_path: &Path, uid: &str) -> PathBuf {
108 let docket_name = docket_path
92 let docket_name = docket_path
109 .file_name()
93 .file_name()
@@ -29,6 +29,12 b' pub enum RevlogError {'
29 UnknowDataFormat(u8),
29 UnknowDataFormat(u8),
30 }
30 }
31
31
32 impl From<bytes_cast::FromBytesError> for RevlogError {
33 fn from(_: bytes_cast::FromBytesError) -> Self {
34 RevlogError::Corrupted
35 }
36 }
37
32 /// Read only implementation of revlog.
38 /// Read only implementation of revlog.
33 pub struct Revlog {
39 pub struct Revlog {
34 /// When index and data are not interleaved: bytes of the revlog index.
40 /// When index and data are not interleaved: bytes of the revlog index.
General Comments 0
You need to be logged in to leave comments. Login now