##// END OF EJS Templates
rhg: use persistent nodemap when available...
Simon Sapin -
r46706:9eb07ab3 default
parent child Browse files
Show More
@@ -0,0 +1,119 b''
1 use memmap::Mmap;
2 use std::convert::TryInto;
3 use std::path::{Path, PathBuf};
4
5 use super::revlog::{mmap_open, RevlogError};
6 use crate::utils::strip_suffix;
7
8 const ONDISK_VERSION: u8 = 1;
9
10 pub(super) struct NodeMapDocket {
11 pub data_length: usize,
12 // TODO: keep here more of the data from `parse()` when we need it
13 }
14
15 impl NodeMapDocket {
16 /// Return `Ok(None)` when the caller should proceed without a persistent
17 /// nodemap:
18 ///
19 /// * This revlog does not have a `.n` docket file (it is not generated for
20 /// small revlogs), or
21 /// * The docket has an unsupported version number (repositories created by
22 /// later hg, maybe that should be a requirement instead?), or
23 /// * The docket file points to a missing (likely deleted) data file (this
24 /// can happen in a rare race condition).
25 pub fn read_from_file(
26 index_path: &Path,
27 ) -> Result<Option<(Self, Mmap)>, RevlogError> {
28 let docket_path = index_path.with_extension("n");
29 let docket_bytes = match std::fs::read(&docket_path) {
30 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
31 return Ok(None)
32 }
33 Err(e) => return Err(RevlogError::IoError(e)),
34 Ok(bytes) => bytes,
35 };
36
37 let mut input = if let Some((&ONDISK_VERSION, rest)) =
38 docket_bytes.split_first()
39 {
40 rest
41 } else {
42 return Ok(None);
43 };
44 let input = &mut input;
45
46 let uid_size = read_u8(input)? as usize;
47 let _tip_rev = read_be_u64(input)?;
48 // TODO: do we care about overflow for 4 GB+ nodemap files on 32-bit
49 // systems?
50 let data_length = read_be_u64(input)? as usize;
51 let _data_unused = read_be_u64(input)?;
52 let tip_node_size = read_be_u64(input)? as usize;
53 let uid = read_bytes(input, uid_size)?;
54 let _tip_node = read_bytes(input, tip_node_size)?;
55
56 let uid =
57 std::str::from_utf8(uid).map_err(|_| RevlogError::Corrupted)?;
58 let docket = NodeMapDocket { data_length };
59
60 let data_path = rawdata_path(&docket_path, uid);
61 // TODO: use `std::fs::read` here when the `persistent-nodemap.mmap`
62 // config is false?
63 match mmap_open(&data_path) {
64 Ok(mmap) => {
65 if mmap.len() >= data_length {
66 Ok(Some((docket, mmap)))
67 } else {
68 Err(RevlogError::Corrupted)
69 }
70 }
71 Err(error) => {
72 if error.kind() == std::io::ErrorKind::NotFound {
73 Ok(None)
74 } else {
75 Err(RevlogError::IoError(error))
76 }
77 }
78 }
79 }
80 }
81
82 fn read_bytes<'a>(
83 input: &mut &'a [u8],
84 count: usize,
85 ) -> Result<&'a [u8], RevlogError> {
86 if let Some(start) = input.get(..count) {
87 *input = &input[count..];
88 Ok(start)
89 } else {
90 Err(RevlogError::Corrupted)
91 }
92 }
93
94 fn read_u8<'a>(input: &mut &[u8]) -> Result<u8, RevlogError> {
95 Ok(read_bytes(input, 1)?[0])
96 }
97
98 fn read_be_u64<'a>(input: &mut &[u8]) -> Result<u64, RevlogError> {
99 let array = read_bytes(input, std::mem::size_of::<u64>())?
100 .try_into()
101 .unwrap();
102 Ok(u64::from_be_bytes(array))
103 }
104
105 fn rawdata_path(docket_path: &Path, uid: &str) -> PathBuf {
106 let docket_name = docket_path
107 .file_name()
108 .expect("expected a base name")
109 .to_str()
110 .expect("expected an ASCII file name in the store");
111 let prefix = strip_suffix(docket_name, ".n.a")
112 .or_else(|| strip_suffix(docket_name, ".n"))
113 .expect("expected docket path in .n or .n.a");
114 let name = format!("{}-{}.nd", prefix, uid);
115 docket_path
116 .parent()
117 .expect("expected a non-root path")
118 .join(name)
119 }
@@ -69,4 +69,8 b' const SUPPORTED: &[&str] = &['
69 "revlogv1",
69 "revlogv1",
70 "sparserevlog",
70 "sparserevlog",
71 "store",
71 "store",
72 // As of this writing everything rhg does is read-only.
73 // When it starts writing to the repository, it’ll need to either keep the
74 // persistent nodemap up to date or remove this entry:
75 "persistent-nodemap",
72 ];
76 ];
@@ -7,6 +7,7 b''
7
7
8 pub mod node;
8 pub mod node;
9 pub mod nodemap;
9 pub mod nodemap;
10 mod nodemap_docket;
10 pub mod path_encode;
11 pub mod path_encode;
11 pub use node::{Node, NodeError, NodePrefix, NodePrefixRef};
12 pub use node::{Node, NodeError, NodePrefix, NodePrefixRef};
12 pub mod changelog;
13 pub mod changelog;
@@ -132,6 +132,16 b' impl Index {'
132 }
132 }
133 }
133 }
134
134
135 impl super::RevlogIndex for Index {
136 fn len(&self) -> usize {
137 self.len()
138 }
139
140 fn node(&self, rev: Revision) -> Option<&Node> {
141 self.get_entry(rev).map(|entry| entry.hash())
142 }
143 }
144
135 #[derive(Debug)]
145 #[derive(Debug)]
136 pub struct IndexEntry<'a> {
146 pub struct IndexEntry<'a> {
137 bytes: &'a [u8],
147 bytes: &'a [u8],
@@ -190,7 +200,7 b" impl<'a> IndexEntry<'a> {"
190 ///
200 ///
191 /// Currently, SHA-1 is used and only the first 20 bytes of this field
201 /// Currently, SHA-1 is used and only the first 20 bytes of this field
192 /// are used.
202 /// are used.
193 pub fn hash(&self) -> &Node {
203 pub fn hash(&self) -> &'a Node {
194 (&self.bytes[32..52]).try_into().unwrap()
204 (&self.bytes[32..52]).try_into().unwrap()
195 }
205 }
196 }
206 }
@@ -14,6 +14,9 b' use zstd;'
14
14
15 use super::index::Index;
15 use super::index::Index;
16 use super::node::{NodePrefixRef, NODE_BYTES_LENGTH, NULL_NODE};
16 use super::node::{NodePrefixRef, NODE_BYTES_LENGTH, NULL_NODE};
17 use super::nodemap;
18 use super::nodemap::NodeMap;
19 use super::nodemap_docket::NodeMapDocket;
17 use super::patch;
20 use super::patch;
18 use crate::revlog::Revision;
21 use crate::revlog::Revision;
19
22
@@ -27,7 +30,7 b' pub enum RevlogError {'
27 UnknowDataFormat(u8),
30 UnknowDataFormat(u8),
28 }
31 }
29
32
30 fn mmap_open(path: &Path) -> Result<Mmap, std::io::Error> {
33 pub(super) fn mmap_open(path: &Path) -> Result<Mmap, std::io::Error> {
31 let file = File::open(path)?;
34 let file = File::open(path)?;
32 let mmap = unsafe { MmapOptions::new().map(&file) }?;
35 let mmap = unsafe { MmapOptions::new().map(&file) }?;
33 Ok(mmap)
36 Ok(mmap)
@@ -41,6 +44,8 b' pub struct Revlog {'
41 index: Index,
44 index: Index,
42 /// When index and data are not interleaved: bytes of the revlog data
45 /// When index and data are not interleaved: bytes of the revlog data
43 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
46 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
47 /// When present on disk: the persistent nodemap for this revlog
48 nodemap: Option<nodemap::NodeTree>,
44 }
49 }
45
50
46 impl Revlog {
51 impl Revlog {
@@ -77,7 +82,20 b' impl Revlog {'
77 Some(Box::new(data_mmap))
82 Some(Box::new(data_mmap))
78 };
83 };
79
84
80 Ok(Revlog { index, data_bytes })
85 let nodemap = NodeMapDocket::read_from_file(index_path)?.map(
86 |(docket, data)| {
87 nodemap::NodeTree::load_bytes(
88 Box::new(data),
89 docket.data_length,
90 )
91 },
92 );
93
94 Ok(Revlog {
95 index,
96 data_bytes,
97 nodemap,
98 })
81 }
99 }
82
100
83 /// Return number of entries of the `Revlog`.
101 /// Return number of entries of the `Revlog`.
@@ -96,8 +114,20 b' impl Revlog {'
96 &self,
114 &self,
97 node: NodePrefixRef,
115 node: NodePrefixRef,
98 ) -> Result<Revision, RevlogError> {
116 ) -> Result<Revision, RevlogError> {
99 // This is brute force. But it is fast enough for now.
117 if let Some(nodemap) = &self.nodemap {
100 // Optimization will come later.
118 return nodemap
119 .find_bin(&self.index, node)
120 // TODO: propagate details of this error:
121 .map_err(|_| RevlogError::Corrupted)?
122 .ok_or(RevlogError::InvalidRevision);
123 }
124
125 // Fallback to linear scan when a persistent nodemap is not present.
126 // This happens when the persistent-nodemap experimental feature is not
127 // enabled, or for small revlogs.
128 //
129 // TODO: consider building a non-persistent nodemap in memory to
130 // optimize these cases.
101 let mut found_by_prefix = None;
131 let mut found_by_prefix = None;
102 for rev in (0..self.len() as Revision).rev() {
132 for rev in (0..self.len() as Revision).rev() {
103 let index_entry =
133 let index_entry =
@@ -167,3 +167,12 b" impl<'a> Escaped for &'a HgPath {"
167 self.as_bytes().escaped_bytes()
167 self.as_bytes().escaped_bytes()
168 }
168 }
169 }
169 }
170
171 // TODO: use the str method when we require Rust 1.45
172 pub(crate) fn strip_suffix<'a>(s: &'a str, suffix: &str) -> Option<&'a str> {
173 if s.ends_with(suffix) {
174 Some(&s[..s.len() - suffix.len()])
175 } else {
176 None
177 }
178 }
@@ -196,5 +196,9 b' Persistent nodemap'
196 .hg/store/00changelog.d
196 .hg/store/00changelog.d
197 .hg/store/00changelog.i
197 .hg/store/00changelog.i
198 .hg/store/00changelog.n
198 .hg/store/00changelog.n
199
200 Specifying revisions by changeset ID
199 $ rhg files -r c3ae8dec9fad
201 $ rhg files -r c3ae8dec9fad
200 [252]
202 of
203 $ rhg cat -r c3ae8dec9fad of
204 r5000
General Comments 0
You need to be logged in to leave comments. Login now