Show More
@@ -0,0 +1,119 b'' | |||||
|
1 | use memmap::Mmap; | |||
|
2 | use std::convert::TryInto; | |||
|
3 | use std::path::{Path, PathBuf}; | |||
|
4 | ||||
|
5 | use super::revlog::{mmap_open, RevlogError}; | |||
|
6 | use crate::utils::strip_suffix; | |||
|
7 | ||||
|
8 | const ONDISK_VERSION: u8 = 1; | |||
|
9 | ||||
|
10 | pub(super) struct NodeMapDocket { | |||
|
11 | pub data_length: usize, | |||
|
12 | // TODO: keep here more of the data from `parse()` when we need it | |||
|
13 | } | |||
|
14 | ||||
|
15 | impl NodeMapDocket { | |||
|
16 | /// Return `Ok(None)` when the caller should proceed without a persistent | |||
|
17 | /// nodemap: | |||
|
18 | /// | |||
|
19 | /// * This revlog does not have a `.n` docket file (it is not generated for | |||
|
20 | /// small revlogs), or | |||
|
21 | /// * The docket has an unsupported version number (repositories created by | |||
|
22 | /// later hg, maybe that should be a requirement instead?), or | |||
|
23 | /// * The docket file points to a missing (likely deleted) data file (this | |||
|
24 | /// can happen in a rare race condition). | |||
|
25 | pub fn read_from_file( | |||
|
26 | index_path: &Path, | |||
|
27 | ) -> Result<Option<(Self, Mmap)>, RevlogError> { | |||
|
28 | let docket_path = index_path.with_extension("n"); | |||
|
29 | let docket_bytes = match std::fs::read(&docket_path) { | |||
|
30 | Err(e) if e.kind() == std::io::ErrorKind::NotFound => { | |||
|
31 | return Ok(None) | |||
|
32 | } | |||
|
33 | Err(e) => return Err(RevlogError::IoError(e)), | |||
|
34 | Ok(bytes) => bytes, | |||
|
35 | }; | |||
|
36 | ||||
|
37 | let mut input = if let Some((&ONDISK_VERSION, rest)) = | |||
|
38 | docket_bytes.split_first() | |||
|
39 | { | |||
|
40 | rest | |||
|
41 | } else { | |||
|
42 | return Ok(None); | |||
|
43 | }; | |||
|
44 | let input = &mut input; | |||
|
45 | ||||
|
46 | let uid_size = read_u8(input)? as usize; | |||
|
47 | let _tip_rev = read_be_u64(input)?; | |||
|
48 | // TODO: do we care about overflow for 4 GB+ nodemap files on 32-bit | |||
|
49 | // systems? | |||
|
50 | let data_length = read_be_u64(input)? as usize; | |||
|
51 | let _data_unused = read_be_u64(input)?; | |||
|
52 | let tip_node_size = read_be_u64(input)? as usize; | |||
|
53 | let uid = read_bytes(input, uid_size)?; | |||
|
54 | let _tip_node = read_bytes(input, tip_node_size)?; | |||
|
55 | ||||
|
56 | let uid = | |||
|
57 | std::str::from_utf8(uid).map_err(|_| RevlogError::Corrupted)?; | |||
|
58 | let docket = NodeMapDocket { data_length }; | |||
|
59 | ||||
|
60 | let data_path = rawdata_path(&docket_path, uid); | |||
|
61 | // TODO: use `std::fs::read` here when the `persistent-nodemap.mmap` | |||
|
62 | // config is false? | |||
|
63 | match mmap_open(&data_path) { | |||
|
64 | Ok(mmap) => { | |||
|
65 | if mmap.len() >= data_length { | |||
|
66 | Ok(Some((docket, mmap))) | |||
|
67 | } else { | |||
|
68 | Err(RevlogError::Corrupted) | |||
|
69 | } | |||
|
70 | } | |||
|
71 | Err(error) => { | |||
|
72 | if error.kind() == std::io::ErrorKind::NotFound { | |||
|
73 | Ok(None) | |||
|
74 | } else { | |||
|
75 | Err(RevlogError::IoError(error)) | |||
|
76 | } | |||
|
77 | } | |||
|
78 | } | |||
|
79 | } | |||
|
80 | } | |||
|
81 | ||||
|
82 | fn read_bytes<'a>( | |||
|
83 | input: &mut &'a [u8], | |||
|
84 | count: usize, | |||
|
85 | ) -> Result<&'a [u8], RevlogError> { | |||
|
86 | if let Some(start) = input.get(..count) { | |||
|
87 | *input = &input[count..]; | |||
|
88 | Ok(start) | |||
|
89 | } else { | |||
|
90 | Err(RevlogError::Corrupted) | |||
|
91 | } | |||
|
92 | } | |||
|
93 | ||||
|
94 | fn read_u8<'a>(input: &mut &[u8]) -> Result<u8, RevlogError> { | |||
|
95 | Ok(read_bytes(input, 1)?[0]) | |||
|
96 | } | |||
|
97 | ||||
|
98 | fn read_be_u64<'a>(input: &mut &[u8]) -> Result<u64, RevlogError> { | |||
|
99 | let array = read_bytes(input, std::mem::size_of::<u64>())? | |||
|
100 | .try_into() | |||
|
101 | .unwrap(); | |||
|
102 | Ok(u64::from_be_bytes(array)) | |||
|
103 | } | |||
|
104 | ||||
|
105 | fn rawdata_path(docket_path: &Path, uid: &str) -> PathBuf { | |||
|
106 | let docket_name = docket_path | |||
|
107 | .file_name() | |||
|
108 | .expect("expected a base name") | |||
|
109 | .to_str() | |||
|
110 | .expect("expected an ASCII file name in the store"); | |||
|
111 | let prefix = strip_suffix(docket_name, ".n.a") | |||
|
112 | .or_else(|| strip_suffix(docket_name, ".n")) | |||
|
113 | .expect("expected docket path in .n or .n.a"); | |||
|
114 | let name = format!("{}-{}.nd", prefix, uid); | |||
|
115 | docket_path | |||
|
116 | .parent() | |||
|
117 | .expect("expected a non-root path") | |||
|
118 | .join(name) | |||
|
119 | } |
@@ -69,4 +69,8 b' const SUPPORTED: &[&str] = &[' | |||||
69 | "revlogv1", |
|
69 | "revlogv1", | |
70 | "sparserevlog", |
|
70 | "sparserevlog", | |
71 | "store", |
|
71 | "store", | |
|
72 | // As of this writing everything rhg does is read-only. | |||
|
73 | // When it starts writing to the repository, it’ll need to either keep the | |||
|
74 | // persistent nodemap up to date or remove this entry: | |||
|
75 | "persistent-nodemap", | |||
72 | ]; |
|
76 | ]; |
@@ -7,6 +7,7 b'' | |||||
7 |
|
7 | |||
8 | pub mod node; |
|
8 | pub mod node; | |
9 | pub mod nodemap; |
|
9 | pub mod nodemap; | |
|
10 | mod nodemap_docket; | |||
10 | pub mod path_encode; |
|
11 | pub mod path_encode; | |
11 | pub use node::{Node, NodeError, NodePrefix, NodePrefixRef}; |
|
12 | pub use node::{Node, NodeError, NodePrefix, NodePrefixRef}; | |
12 | pub mod changelog; |
|
13 | pub mod changelog; |
@@ -132,6 +132,16 b' impl Index {' | |||||
132 | } |
|
132 | } | |
133 | } |
|
133 | } | |
134 |
|
134 | |||
|
135 | impl super::RevlogIndex for Index { | |||
|
136 | fn len(&self) -> usize { | |||
|
137 | self.len() | |||
|
138 | } | |||
|
139 | ||||
|
140 | fn node(&self, rev: Revision) -> Option<&Node> { | |||
|
141 | self.get_entry(rev).map(|entry| entry.hash()) | |||
|
142 | } | |||
|
143 | } | |||
|
144 | ||||
135 | #[derive(Debug)] |
|
145 | #[derive(Debug)] | |
136 | pub struct IndexEntry<'a> { |
|
146 | pub struct IndexEntry<'a> { | |
137 | bytes: &'a [u8], |
|
147 | bytes: &'a [u8], | |
@@ -190,7 +200,7 b" impl<'a> IndexEntry<'a> {" | |||||
190 | /// |
|
200 | /// | |
191 | /// Currently, SHA-1 is used and only the first 20 bytes of this field |
|
201 | /// Currently, SHA-1 is used and only the first 20 bytes of this field | |
192 | /// are used. |
|
202 | /// are used. | |
193 | pub fn hash(&self) -> &Node { |
|
203 | pub fn hash(&self) -> &'a Node { | |
194 | (&self.bytes[32..52]).try_into().unwrap() |
|
204 | (&self.bytes[32..52]).try_into().unwrap() | |
195 | } |
|
205 | } | |
196 | } |
|
206 | } |
@@ -14,6 +14,9 b' use zstd;' | |||||
14 |
|
14 | |||
15 | use super::index::Index; |
|
15 | use super::index::Index; | |
16 | use super::node::{NodePrefixRef, NODE_BYTES_LENGTH, NULL_NODE}; |
|
16 | use super::node::{NodePrefixRef, NODE_BYTES_LENGTH, NULL_NODE}; | |
|
17 | use super::nodemap; | |||
|
18 | use super::nodemap::NodeMap; | |||
|
19 | use super::nodemap_docket::NodeMapDocket; | |||
17 | use super::patch; |
|
20 | use super::patch; | |
18 | use crate::revlog::Revision; |
|
21 | use crate::revlog::Revision; | |
19 |
|
22 | |||
@@ -27,7 +30,7 b' pub enum RevlogError {' | |||||
27 | UnknowDataFormat(u8), |
|
30 | UnknowDataFormat(u8), | |
28 | } |
|
31 | } | |
29 |
|
32 | |||
30 | fn mmap_open(path: &Path) -> Result<Mmap, std::io::Error> { |
|
33 | pub(super) fn mmap_open(path: &Path) -> Result<Mmap, std::io::Error> { | |
31 | let file = File::open(path)?; |
|
34 | let file = File::open(path)?; | |
32 | let mmap = unsafe { MmapOptions::new().map(&file) }?; |
|
35 | let mmap = unsafe { MmapOptions::new().map(&file) }?; | |
33 | Ok(mmap) |
|
36 | Ok(mmap) | |
@@ -41,6 +44,8 b' pub struct Revlog {' | |||||
41 | index: Index, |
|
44 | index: Index, | |
42 | /// When index and data are not interleaved: bytes of the revlog data |
|
45 | /// When index and data are not interleaved: bytes of the revlog data | |
43 | data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>, |
|
46 | data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>, | |
|
47 | /// When present on disk: the persistent nodemap for this revlog | |||
|
48 | nodemap: Option<nodemap::NodeTree>, | |||
44 | } |
|
49 | } | |
45 |
|
50 | |||
46 | impl Revlog { |
|
51 | impl Revlog { | |
@@ -77,7 +82,20 b' impl Revlog {' | |||||
77 | Some(Box::new(data_mmap)) |
|
82 | Some(Box::new(data_mmap)) | |
78 | }; |
|
83 | }; | |
79 |
|
84 | |||
80 | Ok(Revlog { index, data_bytes }) |
|
85 | let nodemap = NodeMapDocket::read_from_file(index_path)?.map( | |
|
86 | |(docket, data)| { | |||
|
87 | nodemap::NodeTree::load_bytes( | |||
|
88 | Box::new(data), | |||
|
89 | docket.data_length, | |||
|
90 | ) | |||
|
91 | }, | |||
|
92 | ); | |||
|
93 | ||||
|
94 | Ok(Revlog { | |||
|
95 | index, | |||
|
96 | data_bytes, | |||
|
97 | nodemap, | |||
|
98 | }) | |||
81 | } |
|
99 | } | |
82 |
|
100 | |||
83 | /// Return number of entries of the `Revlog`. |
|
101 | /// Return number of entries of the `Revlog`. | |
@@ -96,8 +114,20 b' impl Revlog {' | |||||
96 | &self, |
|
114 | &self, | |
97 | node: NodePrefixRef, |
|
115 | node: NodePrefixRef, | |
98 | ) -> Result<Revision, RevlogError> { |
|
116 | ) -> Result<Revision, RevlogError> { | |
99 | // This is brute force. But it is fast enough for now. |
|
117 | if let Some(nodemap) = &self.nodemap { | |
100 | // Optimization will come later. |
|
118 | return nodemap | |
|
119 | .find_bin(&self.index, node) | |||
|
120 | // TODO: propagate details of this error: | |||
|
121 | .map_err(|_| RevlogError::Corrupted)? | |||
|
122 | .ok_or(RevlogError::InvalidRevision); | |||
|
123 | } | |||
|
124 | ||||
|
125 | // Fallback to linear scan when a persistent nodemap is not present. | |||
|
126 | // This happens when the persistent-nodemap experimental feature is not | |||
|
127 | // enabled, or for small revlogs. | |||
|
128 | // | |||
|
129 | // TODO: consider building a non-persistent nodemap in memory to | |||
|
130 | // optimize these cases. | |||
101 | let mut found_by_prefix = None; |
|
131 | let mut found_by_prefix = None; | |
102 | for rev in (0..self.len() as Revision).rev() { |
|
132 | for rev in (0..self.len() as Revision).rev() { | |
103 | let index_entry = |
|
133 | let index_entry = |
@@ -167,3 +167,12 b" impl<'a> Escaped for &'a HgPath {" | |||||
167 | self.as_bytes().escaped_bytes() |
|
167 | self.as_bytes().escaped_bytes() | |
168 | } |
|
168 | } | |
169 | } |
|
169 | } | |
|
170 | ||||
|
171 | // TODO: use the str method when we require Rust 1.45 | |||
|
172 | pub(crate) fn strip_suffix<'a>(s: &'a str, suffix: &str) -> Option<&'a str> { | |||
|
173 | if s.ends_with(suffix) { | |||
|
174 | Some(&s[..s.len() - suffix.len()]) | |||
|
175 | } else { | |||
|
176 | None | |||
|
177 | } | |||
|
178 | } |
@@ -196,5 +196,9 b' Persistent nodemap' | |||||
196 | .hg/store/00changelog.d |
|
196 | .hg/store/00changelog.d | |
197 | .hg/store/00changelog.i |
|
197 | .hg/store/00changelog.i | |
198 | .hg/store/00changelog.n |
|
198 | .hg/store/00changelog.n | |
|
199 | ||||
|
200 | Specifying revisions by changeset ID | |||
199 | $ rhg files -r c3ae8dec9fad |
|
201 | $ rhg files -r c3ae8dec9fad | |
200 | [252] |
|
202 | of | |
|
203 | $ rhg cat -r c3ae8dec9fad of | |||
|
204 | r5000 |
General Comments 0
You need to be logged in to leave comments.
Login now