##// END OF EJS Templates
revlog: make the rust test for node hex prefix resolution exercise the nodemap
Arseniy Alekseyev -
r51879:eccf7dc7 stable
parent child Browse files
Show More
@@ -1,825 +1,843 b''
1 // Copyright 2018-2023 Georges Racinet <georges.racinet@octobus.net>
1 // Copyright 2018-2023 Georges Racinet <georges.racinet@octobus.net>
2 // and Mercurial contributors
2 // and Mercurial contributors
3 //
3 //
4 // This software may be used and distributed according to the terms of the
4 // This software may be used and distributed according to the terms of the
5 // GNU General Public License version 2 or any later version.
5 // GNU General Public License version 2 or any later version.
6 //! Mercurial concepts for handling revision history
6 //! Mercurial concepts for handling revision history
7
7
8 pub mod node;
8 pub mod node;
9 pub mod nodemap;
9 pub mod nodemap;
10 mod nodemap_docket;
10 mod nodemap_docket;
11 pub mod path_encode;
11 pub mod path_encode;
12 pub use node::{FromHexError, Node, NodePrefix};
12 pub use node::{FromHexError, Node, NodePrefix};
13 pub mod changelog;
13 pub mod changelog;
14 pub mod filelog;
14 pub mod filelog;
15 pub mod index;
15 pub mod index;
16 pub mod manifest;
16 pub mod manifest;
17 pub mod patch;
17 pub mod patch;
18
18
19 use std::borrow::Cow;
19 use std::borrow::Cow;
20 use std::io::Read;
20 use std::io::Read;
21 use std::ops::Deref;
21 use std::ops::Deref;
22 use std::path::Path;
22 use std::path::Path;
23
23
24 use flate2::read::ZlibDecoder;
24 use flate2::read::ZlibDecoder;
25 use sha1::{Digest, Sha1};
25 use sha1::{Digest, Sha1};
26 use std::cell::RefCell;
26 use std::cell::RefCell;
27 use zstd;
27 use zstd;
28
28
29 use self::node::{NODE_BYTES_LENGTH, NULL_NODE};
29 use self::node::{NODE_BYTES_LENGTH, NULL_NODE};
30 use self::nodemap_docket::NodeMapDocket;
30 use self::nodemap_docket::NodeMapDocket;
31 use super::index::Index;
31 use super::index::Index;
32 use super::nodemap::{NodeMap, NodeMapError};
32 use super::nodemap::{NodeMap, NodeMapError};
33 use crate::errors::HgError;
33 use crate::errors::HgError;
34 use crate::vfs::Vfs;
34 use crate::vfs::Vfs;
35
35
36 /// Mercurial revision numbers
36 /// Mercurial revision numbers
37 ///
37 ///
38 /// As noted in revlog.c, revision numbers are actually encoded in
38 /// As noted in revlog.c, revision numbers are actually encoded in
39 /// 4 bytes, and are liberally converted to ints, whence the i32
39 /// 4 bytes, and are liberally converted to ints, whence the i32
40 pub type Revision = i32;
40 pub type Revision = i32;
41
41
42 /// Marker expressing the absence of a parent
42 /// Marker expressing the absence of a parent
43 ///
43 ///
44 /// Independently of the actual representation, `NULL_REVISION` is guaranteed
44 /// Independently of the actual representation, `NULL_REVISION` is guaranteed
45 /// to be smaller than all existing revisions.
45 /// to be smaller than all existing revisions.
46 pub const NULL_REVISION: Revision = -1;
46 pub const NULL_REVISION: Revision = -1;
47
47
48 /// Same as `mercurial.node.wdirrev`
48 /// Same as `mercurial.node.wdirrev`
49 ///
49 ///
50 /// This is also equal to `i32::max_value()`, but it's better to spell
50 /// This is also equal to `i32::max_value()`, but it's better to spell
51 /// it out explicitely, same as in `mercurial.node`
51 /// it out explicitely, same as in `mercurial.node`
52 #[allow(clippy::unreadable_literal)]
52 #[allow(clippy::unreadable_literal)]
53 pub const WORKING_DIRECTORY_REVISION: Revision = 0x7fffffff;
53 pub const WORKING_DIRECTORY_REVISION: Revision = 0x7fffffff;
54
54
55 pub const WORKING_DIRECTORY_HEX: &str =
55 pub const WORKING_DIRECTORY_HEX: &str =
56 "ffffffffffffffffffffffffffffffffffffffff";
56 "ffffffffffffffffffffffffffffffffffffffff";
57
57
58 /// The simplest expression of what we need of Mercurial DAGs.
58 /// The simplest expression of what we need of Mercurial DAGs.
59 pub trait Graph {
59 pub trait Graph {
60 /// Return the two parents of the given `Revision`.
60 /// Return the two parents of the given `Revision`.
61 ///
61 ///
62 /// Each of the parents can be independently `NULL_REVISION`
62 /// Each of the parents can be independently `NULL_REVISION`
63 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError>;
63 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError>;
64 }
64 }
65
65
66 #[derive(Clone, Debug, PartialEq)]
66 #[derive(Clone, Debug, PartialEq)]
67 pub enum GraphError {
67 pub enum GraphError {
68 ParentOutOfRange(Revision),
68 ParentOutOfRange(Revision),
69 WorkingDirectoryUnsupported,
69 WorkingDirectoryUnsupported,
70 }
70 }
71
71
72 /// The Mercurial Revlog Index
72 /// The Mercurial Revlog Index
73 ///
73 ///
74 /// This is currently limited to the minimal interface that is needed for
74 /// This is currently limited to the minimal interface that is needed for
75 /// the [`nodemap`](nodemap/index.html) module
75 /// the [`nodemap`](nodemap/index.html) module
76 pub trait RevlogIndex {
76 pub trait RevlogIndex {
77 /// Total number of Revisions referenced in this index
77 /// Total number of Revisions referenced in this index
78 fn len(&self) -> usize;
78 fn len(&self) -> usize;
79
79
80 fn is_empty(&self) -> bool {
80 fn is_empty(&self) -> bool {
81 self.len() == 0
81 self.len() == 0
82 }
82 }
83
83
84 /// Return a reference to the Node or `None` if rev is out of bounds
84 /// Return a reference to the Node or `None` if rev is out of bounds
85 ///
85 ///
86 /// `NULL_REVISION` is not considered to be out of bounds.
86 /// `NULL_REVISION` is not considered to be out of bounds.
87 fn node(&self, rev: Revision) -> Option<&Node>;
87 fn node(&self, rev: Revision) -> Option<&Node>;
88 }
88 }
89
89
90 const REVISION_FLAG_CENSORED: u16 = 1 << 15;
90 const REVISION_FLAG_CENSORED: u16 = 1 << 15;
91 const REVISION_FLAG_ELLIPSIS: u16 = 1 << 14;
91 const REVISION_FLAG_ELLIPSIS: u16 = 1 << 14;
92 const REVISION_FLAG_EXTSTORED: u16 = 1 << 13;
92 const REVISION_FLAG_EXTSTORED: u16 = 1 << 13;
93 const REVISION_FLAG_HASCOPIESINFO: u16 = 1 << 12;
93 const REVISION_FLAG_HASCOPIESINFO: u16 = 1 << 12;
94
94
95 // Keep this in sync with REVIDX_KNOWN_FLAGS in
95 // Keep this in sync with REVIDX_KNOWN_FLAGS in
96 // mercurial/revlogutils/flagutil.py
96 // mercurial/revlogutils/flagutil.py
97 const REVIDX_KNOWN_FLAGS: u16 = REVISION_FLAG_CENSORED
97 const REVIDX_KNOWN_FLAGS: u16 = REVISION_FLAG_CENSORED
98 | REVISION_FLAG_ELLIPSIS
98 | REVISION_FLAG_ELLIPSIS
99 | REVISION_FLAG_EXTSTORED
99 | REVISION_FLAG_EXTSTORED
100 | REVISION_FLAG_HASCOPIESINFO;
100 | REVISION_FLAG_HASCOPIESINFO;
101
101
102 const NULL_REVLOG_ENTRY_FLAGS: u16 = 0;
102 const NULL_REVLOG_ENTRY_FLAGS: u16 = 0;
103
103
104 #[derive(Debug, derive_more::From)]
104 #[derive(Debug, derive_more::From)]
105 pub enum RevlogError {
105 pub enum RevlogError {
106 InvalidRevision,
106 InvalidRevision,
107 /// Working directory is not supported
107 /// Working directory is not supported
108 WDirUnsupported,
108 WDirUnsupported,
109 /// Found more than one entry whose ID match the requested prefix
109 /// Found more than one entry whose ID match the requested prefix
110 AmbiguousPrefix,
110 AmbiguousPrefix,
111 #[from]
111 #[from]
112 Other(HgError),
112 Other(HgError),
113 }
113 }
114
114
115 impl From<NodeMapError> for RevlogError {
115 impl From<NodeMapError> for RevlogError {
116 fn from(error: NodeMapError) -> Self {
116 fn from(error: NodeMapError) -> Self {
117 match error {
117 match error {
118 NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
118 NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
119 NodeMapError::RevisionNotInIndex(rev) => RevlogError::corrupted(
119 NodeMapError::RevisionNotInIndex(rev) => RevlogError::corrupted(
120 format!("nodemap point to revision {} not in index", rev),
120 format!("nodemap point to revision {} not in index", rev),
121 ),
121 ),
122 }
122 }
123 }
123 }
124 }
124 }
125
125
126 fn corrupted<S: AsRef<str>>(context: S) -> HgError {
126 fn corrupted<S: AsRef<str>>(context: S) -> HgError {
127 HgError::corrupted(format!("corrupted revlog, {}", context.as_ref()))
127 HgError::corrupted(format!("corrupted revlog, {}", context.as_ref()))
128 }
128 }
129
129
130 impl RevlogError {
130 impl RevlogError {
131 fn corrupted<S: AsRef<str>>(context: S) -> Self {
131 fn corrupted<S: AsRef<str>>(context: S) -> Self {
132 RevlogError::Other(corrupted(context))
132 RevlogError::Other(corrupted(context))
133 }
133 }
134 }
134 }
135
135
136 /// Read only implementation of revlog.
136 /// Read only implementation of revlog.
137 pub struct Revlog {
137 pub struct Revlog {
138 /// When index and data are not interleaved: bytes of the revlog index.
138 /// When index and data are not interleaved: bytes of the revlog index.
139 /// When index and data are interleaved: bytes of the revlog index and
139 /// When index and data are interleaved: bytes of the revlog index and
140 /// data.
140 /// data.
141 index: Index,
141 index: Index,
142 /// When index and data are not interleaved: bytes of the revlog data
142 /// When index and data are not interleaved: bytes of the revlog data
143 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
143 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
144 /// When present on disk: the persistent nodemap for this revlog
144 /// When present on disk: the persistent nodemap for this revlog
145 nodemap: Option<nodemap::NodeTree>,
145 nodemap: Option<nodemap::NodeTree>,
146 }
146 }
147
147
148 impl Revlog {
148 impl Revlog {
149 /// Open a revlog index file.
149 /// Open a revlog index file.
150 ///
150 ///
151 /// It will also open the associated data file if index and data are not
151 /// It will also open the associated data file if index and data are not
152 /// interleaved.
152 /// interleaved.
153 pub fn open(
153 pub fn open(
154 store_vfs: &Vfs,
154 store_vfs: &Vfs,
155 index_path: impl AsRef<Path>,
155 index_path: impl AsRef<Path>,
156 data_path: Option<&Path>,
156 data_path: Option<&Path>,
157 use_nodemap: bool,
157 use_nodemap: bool,
158 ) -> Result<Self, HgError> {
158 ) -> Result<Self, HgError> {
159 Self::open_gen(store_vfs, index_path, data_path, use_nodemap, None)
160 }
161
162 fn open_gen(
163 store_vfs: &Vfs,
164 index_path: impl AsRef<Path>,
165 data_path: Option<&Path>,
166 use_nodemap: bool,
167 nodemap_for_test: Option<nodemap::NodeTree>,
168 ) -> Result<Self, HgError> {
159 let index_path = index_path.as_ref();
169 let index_path = index_path.as_ref();
160 let index = {
170 let index = {
161 match store_vfs.mmap_open_opt(&index_path)? {
171 match store_vfs.mmap_open_opt(&index_path)? {
162 None => Index::new(Box::new(vec![])),
172 None => Index::new(Box::new(vec![])),
163 Some(index_mmap) => {
173 Some(index_mmap) => {
164 let index = Index::new(Box::new(index_mmap))?;
174 let index = Index::new(Box::new(index_mmap))?;
165 Ok(index)
175 Ok(index)
166 }
176 }
167 }
177 }
168 }?;
178 }?;
169
179
170 let default_data_path = index_path.with_extension("d");
180 let default_data_path = index_path.with_extension("d");
171
181
172 // type annotation required
182 // type annotation required
173 // won't recognize Mmap as Deref<Target = [u8]>
183 // won't recognize Mmap as Deref<Target = [u8]>
174 let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
184 let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
175 if index.is_inline() {
185 if index.is_inline() {
176 None
186 None
177 } else {
187 } else {
178 let data_path = data_path.unwrap_or(&default_data_path);
188 let data_path = data_path.unwrap_or(&default_data_path);
179 let data_mmap = store_vfs.mmap_open(data_path)?;
189 let data_mmap = store_vfs.mmap_open(data_path)?;
180 Some(Box::new(data_mmap))
190 Some(Box::new(data_mmap))
181 };
191 };
182
192
183 let nodemap = if index.is_inline() || !use_nodemap {
193 let nodemap = if index.is_inline() || !use_nodemap {
184 None
194 None
185 } else {
195 } else {
186 NodeMapDocket::read_from_file(store_vfs, index_path)?.map(
196 NodeMapDocket::read_from_file(store_vfs, index_path)?.map(
187 |(docket, data)| {
197 |(docket, data)| {
188 nodemap::NodeTree::load_bytes(
198 nodemap::NodeTree::load_bytes(
189 Box::new(data),
199 Box::new(data),
190 docket.data_length,
200 docket.data_length,
191 )
201 )
192 },
202 },
193 )
203 )
194 };
204 };
195
205
206 let nodemap = nodemap_for_test.or(nodemap);
207
196 Ok(Revlog {
208 Ok(Revlog {
197 index,
209 index,
198 data_bytes,
210 data_bytes,
199 nodemap,
211 nodemap,
200 })
212 })
201 }
213 }
202
214
203 /// Return number of entries of the `Revlog`.
215 /// Return number of entries of the `Revlog`.
204 pub fn len(&self) -> usize {
216 pub fn len(&self) -> usize {
205 self.index.len()
217 self.index.len()
206 }
218 }
207
219
208 /// Returns `true` if the `Revlog` has zero `entries`.
220 /// Returns `true` if the `Revlog` has zero `entries`.
209 pub fn is_empty(&self) -> bool {
221 pub fn is_empty(&self) -> bool {
210 self.index.is_empty()
222 self.index.is_empty()
211 }
223 }
212
224
213 /// Returns the node ID for the given revision number, if it exists in this
225 /// Returns the node ID for the given revision number, if it exists in this
214 /// revlog
226 /// revlog
215 pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
227 pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
216 if rev == NULL_REVISION {
228 if rev == NULL_REVISION {
217 return Some(&NULL_NODE);
229 return Some(&NULL_NODE);
218 }
230 }
219 Some(self.index.get_entry(rev)?.hash())
231 Some(self.index.get_entry(rev)?.hash())
220 }
232 }
221
233
222 /// Return the revision number for the given node ID, if it exists in this
234 /// Return the revision number for the given node ID, if it exists in this
223 /// revlog
235 /// revlog
224 pub fn rev_from_node(
236 pub fn rev_from_node(
225 &self,
237 &self,
226 node: NodePrefix,
238 node: NodePrefix,
227 ) -> Result<Revision, RevlogError> {
239 ) -> Result<Revision, RevlogError> {
228 if let Some(nodemap) = &self.nodemap {
240 if let Some(nodemap) = &self.nodemap {
229 nodemap
241 nodemap
230 .find_bin(&self.index, node)?
242 .find_bin(&self.index, node)?
231 .ok_or(RevlogError::InvalidRevision)
243 .ok_or(RevlogError::InvalidRevision)
232 } else {
244 } else {
233 self.rev_from_node_no_persistent_nodemap(node)
245 self.rev_from_node_no_persistent_nodemap(node)
234 }
246 }
235 }
247 }
236
248
237 /// Same as `rev_from_node`, without using a persistent nodemap
249 /// Same as `rev_from_node`, without using a persistent nodemap
238 ///
250 ///
239 /// This is used as fallback when a persistent nodemap is not present.
251 /// This is used as fallback when a persistent nodemap is not present.
240 /// This happens when the persistent-nodemap experimental feature is not
252 /// This happens when the persistent-nodemap experimental feature is not
241 /// enabled, or for small revlogs.
253 /// enabled, or for small revlogs.
242 fn rev_from_node_no_persistent_nodemap(
254 fn rev_from_node_no_persistent_nodemap(
243 &self,
255 &self,
244 node: NodePrefix,
256 node: NodePrefix,
245 ) -> Result<Revision, RevlogError> {
257 ) -> Result<Revision, RevlogError> {
246 // Linear scan of the revlog
258 // Linear scan of the revlog
247 // TODO: consider building a non-persistent nodemap in memory to
259 // TODO: consider building a non-persistent nodemap in memory to
248 // optimize these cases.
260 // optimize these cases.
249 let mut found_by_prefix = None;
261 let mut found_by_prefix = None;
250 for rev in (-1..self.len() as Revision).rev() {
262 for rev in (-1..self.len() as Revision).rev() {
251 let candidate_node = if rev == -1 {
263 let candidate_node = if rev == -1 {
252 NULL_NODE
264 NULL_NODE
253 } else {
265 } else {
254 let index_entry =
266 let index_entry =
255 self.index.get_entry(rev).ok_or_else(|| {
267 self.index.get_entry(rev).ok_or_else(|| {
256 HgError::corrupted(
268 HgError::corrupted(
257 "revlog references a revision not in the index",
269 "revlog references a revision not in the index",
258 )
270 )
259 })?;
271 })?;
260 *index_entry.hash()
272 *index_entry.hash()
261 };
273 };
262 if node == candidate_node {
274 if node == candidate_node {
263 return Ok(rev);
275 return Ok(rev);
264 }
276 }
265 if node.is_prefix_of(&candidate_node) {
277 if node.is_prefix_of(&candidate_node) {
266 if found_by_prefix.is_some() {
278 if found_by_prefix.is_some() {
267 return Err(RevlogError::AmbiguousPrefix);
279 return Err(RevlogError::AmbiguousPrefix);
268 }
280 }
269 found_by_prefix = Some(rev)
281 found_by_prefix = Some(rev)
270 }
282 }
271 }
283 }
272 found_by_prefix.ok_or(RevlogError::InvalidRevision)
284 found_by_prefix.ok_or(RevlogError::InvalidRevision)
273 }
285 }
274
286
275 /// Returns whether the given revision exists in this revlog.
287 /// Returns whether the given revision exists in this revlog.
276 pub fn has_rev(&self, rev: Revision) -> bool {
288 pub fn has_rev(&self, rev: Revision) -> bool {
277 self.index.get_entry(rev).is_some()
289 self.index.get_entry(rev).is_some()
278 }
290 }
279
291
280 /// Return the full data associated to a revision.
292 /// Return the full data associated to a revision.
281 ///
293 ///
282 /// All entries required to build the final data out of deltas will be
294 /// All entries required to build the final data out of deltas will be
283 /// retrieved as needed, and the deltas will be applied to the inital
295 /// retrieved as needed, and the deltas will be applied to the inital
284 /// snapshot to rebuild the final data.
296 /// snapshot to rebuild the final data.
285 pub fn get_rev_data(
297 pub fn get_rev_data(
286 &self,
298 &self,
287 rev: Revision,
299 rev: Revision,
288 ) -> Result<Cow<[u8]>, RevlogError> {
300 ) -> Result<Cow<[u8]>, RevlogError> {
289 if rev == NULL_REVISION {
301 if rev == NULL_REVISION {
290 return Ok(Cow::Borrowed(&[]));
302 return Ok(Cow::Borrowed(&[]));
291 };
303 };
292 Ok(self.get_entry(rev)?.data()?)
304 Ok(self.get_entry(rev)?.data()?)
293 }
305 }
294
306
295 /// Check the hash of some given data against the recorded hash.
307 /// Check the hash of some given data against the recorded hash.
296 pub fn check_hash(
308 pub fn check_hash(
297 &self,
309 &self,
298 p1: Revision,
310 p1: Revision,
299 p2: Revision,
311 p2: Revision,
300 expected: &[u8],
312 expected: &[u8],
301 data: &[u8],
313 data: &[u8],
302 ) -> bool {
314 ) -> bool {
303 let e1 = self.index.get_entry(p1);
315 let e1 = self.index.get_entry(p1);
304 let h1 = match e1 {
316 let h1 = match e1 {
305 Some(ref entry) => entry.hash(),
317 Some(ref entry) => entry.hash(),
306 None => &NULL_NODE,
318 None => &NULL_NODE,
307 };
319 };
308 let e2 = self.index.get_entry(p2);
320 let e2 = self.index.get_entry(p2);
309 let h2 = match e2 {
321 let h2 = match e2 {
310 Some(ref entry) => entry.hash(),
322 Some(ref entry) => entry.hash(),
311 None => &NULL_NODE,
323 None => &NULL_NODE,
312 };
324 };
313
325
314 hash(data, h1.as_bytes(), h2.as_bytes()) == expected
326 hash(data, h1.as_bytes(), h2.as_bytes()) == expected
315 }
327 }
316
328
317 /// Build the full data of a revision out its snapshot
329 /// Build the full data of a revision out its snapshot
318 /// and its deltas.
330 /// and its deltas.
319 fn build_data_from_deltas(
331 fn build_data_from_deltas(
320 snapshot: RevlogEntry,
332 snapshot: RevlogEntry,
321 deltas: &[RevlogEntry],
333 deltas: &[RevlogEntry],
322 ) -> Result<Vec<u8>, HgError> {
334 ) -> Result<Vec<u8>, HgError> {
323 let snapshot = snapshot.data_chunk()?;
335 let snapshot = snapshot.data_chunk()?;
324 let deltas = deltas
336 let deltas = deltas
325 .iter()
337 .iter()
326 .rev()
338 .rev()
327 .map(RevlogEntry::data_chunk)
339 .map(RevlogEntry::data_chunk)
328 .collect::<Result<Vec<_>, _>>()?;
340 .collect::<Result<Vec<_>, _>>()?;
329 let patches: Vec<_> =
341 let patches: Vec<_> =
330 deltas.iter().map(|d| patch::PatchList::new(d)).collect();
342 deltas.iter().map(|d| patch::PatchList::new(d)).collect();
331 let patch = patch::fold_patch_lists(&patches);
343 let patch = patch::fold_patch_lists(&patches);
332 Ok(patch.apply(&snapshot))
344 Ok(patch.apply(&snapshot))
333 }
345 }
334
346
335 /// Return the revlog data.
347 /// Return the revlog data.
336 fn data(&self) -> &[u8] {
348 fn data(&self) -> &[u8] {
337 match &self.data_bytes {
349 match &self.data_bytes {
338 Some(data_bytes) => data_bytes,
350 Some(data_bytes) => data_bytes,
339 None => panic!(
351 None => panic!(
340 "forgot to load the data or trying to access inline data"
352 "forgot to load the data or trying to access inline data"
341 ),
353 ),
342 }
354 }
343 }
355 }
344
356
345 pub fn make_null_entry(&self) -> RevlogEntry {
357 pub fn make_null_entry(&self) -> RevlogEntry {
346 RevlogEntry {
358 RevlogEntry {
347 revlog: self,
359 revlog: self,
348 rev: NULL_REVISION,
360 rev: NULL_REVISION,
349 bytes: b"",
361 bytes: b"",
350 compressed_len: 0,
362 compressed_len: 0,
351 uncompressed_len: 0,
363 uncompressed_len: 0,
352 base_rev_or_base_of_delta_chain: None,
364 base_rev_or_base_of_delta_chain: None,
353 p1: NULL_REVISION,
365 p1: NULL_REVISION,
354 p2: NULL_REVISION,
366 p2: NULL_REVISION,
355 flags: NULL_REVLOG_ENTRY_FLAGS,
367 flags: NULL_REVLOG_ENTRY_FLAGS,
356 hash: NULL_NODE,
368 hash: NULL_NODE,
357 }
369 }
358 }
370 }
359
371
360 /// Get an entry of the revlog.
372 /// Get an entry of the revlog.
361 pub fn get_entry(
373 pub fn get_entry(
362 &self,
374 &self,
363 rev: Revision,
375 rev: Revision,
364 ) -> Result<RevlogEntry, RevlogError> {
376 ) -> Result<RevlogEntry, RevlogError> {
365 if rev == NULL_REVISION {
377 if rev == NULL_REVISION {
366 return Ok(self.make_null_entry());
378 return Ok(self.make_null_entry());
367 }
379 }
368 let index_entry = self
380 let index_entry = self
369 .index
381 .index
370 .get_entry(rev)
382 .get_entry(rev)
371 .ok_or(RevlogError::InvalidRevision)?;
383 .ok_or(RevlogError::InvalidRevision)?;
372 let start = index_entry.offset();
384 let start = index_entry.offset();
373 let end = start + index_entry.compressed_len() as usize;
385 let end = start + index_entry.compressed_len() as usize;
374 let data = if self.index.is_inline() {
386 let data = if self.index.is_inline() {
375 self.index.data(start, end)
387 self.index.data(start, end)
376 } else {
388 } else {
377 &self.data()[start..end]
389 &self.data()[start..end]
378 };
390 };
379 let entry = RevlogEntry {
391 let entry = RevlogEntry {
380 revlog: self,
392 revlog: self,
381 rev,
393 rev,
382 bytes: data,
394 bytes: data,
383 compressed_len: index_entry.compressed_len(),
395 compressed_len: index_entry.compressed_len(),
384 uncompressed_len: index_entry.uncompressed_len(),
396 uncompressed_len: index_entry.uncompressed_len(),
385 base_rev_or_base_of_delta_chain: if index_entry
397 base_rev_or_base_of_delta_chain: if index_entry
386 .base_revision_or_base_of_delta_chain()
398 .base_revision_or_base_of_delta_chain()
387 == rev
399 == rev
388 {
400 {
389 None
401 None
390 } else {
402 } else {
391 Some(index_entry.base_revision_or_base_of_delta_chain())
403 Some(index_entry.base_revision_or_base_of_delta_chain())
392 },
404 },
393 p1: index_entry.p1(),
405 p1: index_entry.p1(),
394 p2: index_entry.p2(),
406 p2: index_entry.p2(),
395 flags: index_entry.flags(),
407 flags: index_entry.flags(),
396 hash: *index_entry.hash(),
408 hash: *index_entry.hash(),
397 };
409 };
398 Ok(entry)
410 Ok(entry)
399 }
411 }
400
412
401 /// when resolving internal references within revlog, any errors
413 /// when resolving internal references within revlog, any errors
402 /// should be reported as corruption, instead of e.g. "invalid revision"
414 /// should be reported as corruption, instead of e.g. "invalid revision"
403 fn get_entry_internal(
415 fn get_entry_internal(
404 &self,
416 &self,
405 rev: Revision,
417 rev: Revision,
406 ) -> Result<RevlogEntry, HgError> {
418 ) -> Result<RevlogEntry, HgError> {
407 self.get_entry(rev)
419 self.get_entry(rev)
408 .map_err(|_| corrupted(format!("revision {} out of range", rev)))
420 .map_err(|_| corrupted(format!("revision {} out of range", rev)))
409 }
421 }
410 }
422 }
411
423
412 /// The revlog entry's bytes and the necessary informations to extract
424 /// The revlog entry's bytes and the necessary informations to extract
413 /// the entry's data.
425 /// the entry's data.
414 #[derive(Clone)]
426 #[derive(Clone)]
415 pub struct RevlogEntry<'revlog> {
427 pub struct RevlogEntry<'revlog> {
416 revlog: &'revlog Revlog,
428 revlog: &'revlog Revlog,
417 rev: Revision,
429 rev: Revision,
418 bytes: &'revlog [u8],
430 bytes: &'revlog [u8],
419 compressed_len: u32,
431 compressed_len: u32,
420 uncompressed_len: i32,
432 uncompressed_len: i32,
421 base_rev_or_base_of_delta_chain: Option<Revision>,
433 base_rev_or_base_of_delta_chain: Option<Revision>,
422 p1: Revision,
434 p1: Revision,
423 p2: Revision,
435 p2: Revision,
424 flags: u16,
436 flags: u16,
425 hash: Node,
437 hash: Node,
426 }
438 }
427
439
428 thread_local! {
440 thread_local! {
429 // seems fine to [unwrap] here: this can only fail due to memory allocation
441 // seems fine to [unwrap] here: this can only fail due to memory allocation
430 // failing, and it's normal for that to cause panic.
442 // failing, and it's normal for that to cause panic.
431 static ZSTD_DECODER : RefCell<zstd::bulk::Decompressor<'static>> =
443 static ZSTD_DECODER : RefCell<zstd::bulk::Decompressor<'static>> =
432 RefCell::new(zstd::bulk::Decompressor::new().ok().unwrap());
444 RefCell::new(zstd::bulk::Decompressor::new().ok().unwrap());
433 }
445 }
434
446
435 fn zstd_decompress_to_buffer(
447 fn zstd_decompress_to_buffer(
436 bytes: &[u8],
448 bytes: &[u8],
437 buf: &mut Vec<u8>,
449 buf: &mut Vec<u8>,
438 ) -> Result<usize, std::io::Error> {
450 ) -> Result<usize, std::io::Error> {
439 ZSTD_DECODER
451 ZSTD_DECODER
440 .with(|decoder| decoder.borrow_mut().decompress_to_buffer(bytes, buf))
452 .with(|decoder| decoder.borrow_mut().decompress_to_buffer(bytes, buf))
441 }
453 }
442
454
443 impl<'revlog> RevlogEntry<'revlog> {
455 impl<'revlog> RevlogEntry<'revlog> {
444 pub fn revision(&self) -> Revision {
456 pub fn revision(&self) -> Revision {
445 self.rev
457 self.rev
446 }
458 }
447
459
448 pub fn node(&self) -> &Node {
460 pub fn node(&self) -> &Node {
449 &self.hash
461 &self.hash
450 }
462 }
451
463
452 pub fn uncompressed_len(&self) -> Option<u32> {
464 pub fn uncompressed_len(&self) -> Option<u32> {
453 u32::try_from(self.uncompressed_len).ok()
465 u32::try_from(self.uncompressed_len).ok()
454 }
466 }
455
467
456 pub fn has_p1(&self) -> bool {
468 pub fn has_p1(&self) -> bool {
457 self.p1 != NULL_REVISION
469 self.p1 != NULL_REVISION
458 }
470 }
459
471
460 pub fn p1_entry(
472 pub fn p1_entry(
461 &self,
473 &self,
462 ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
474 ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
463 if self.p1 == NULL_REVISION {
475 if self.p1 == NULL_REVISION {
464 Ok(None)
476 Ok(None)
465 } else {
477 } else {
466 Ok(Some(self.revlog.get_entry(self.p1)?))
478 Ok(Some(self.revlog.get_entry(self.p1)?))
467 }
479 }
468 }
480 }
469
481
470 pub fn p2_entry(
482 pub fn p2_entry(
471 &self,
483 &self,
472 ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
484 ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
473 if self.p2 == NULL_REVISION {
485 if self.p2 == NULL_REVISION {
474 Ok(None)
486 Ok(None)
475 } else {
487 } else {
476 Ok(Some(self.revlog.get_entry(self.p2)?))
488 Ok(Some(self.revlog.get_entry(self.p2)?))
477 }
489 }
478 }
490 }
479
491
480 pub fn p1(&self) -> Option<Revision> {
492 pub fn p1(&self) -> Option<Revision> {
481 if self.p1 == NULL_REVISION {
493 if self.p1 == NULL_REVISION {
482 None
494 None
483 } else {
495 } else {
484 Some(self.p1)
496 Some(self.p1)
485 }
497 }
486 }
498 }
487
499
488 pub fn p2(&self) -> Option<Revision> {
500 pub fn p2(&self) -> Option<Revision> {
489 if self.p2 == NULL_REVISION {
501 if self.p2 == NULL_REVISION {
490 None
502 None
491 } else {
503 } else {
492 Some(self.p2)
504 Some(self.p2)
493 }
505 }
494 }
506 }
495
507
496 pub fn is_censored(&self) -> bool {
508 pub fn is_censored(&self) -> bool {
497 (self.flags & REVISION_FLAG_CENSORED) != 0
509 (self.flags & REVISION_FLAG_CENSORED) != 0
498 }
510 }
499
511
500 pub fn has_length_affecting_flag_processor(&self) -> bool {
512 pub fn has_length_affecting_flag_processor(&self) -> bool {
501 // Relevant Python code: revlog.size()
513 // Relevant Python code: revlog.size()
502 // note: ELLIPSIS is known to not change the content
514 // note: ELLIPSIS is known to not change the content
503 (self.flags & (REVIDX_KNOWN_FLAGS ^ REVISION_FLAG_ELLIPSIS)) != 0
515 (self.flags & (REVIDX_KNOWN_FLAGS ^ REVISION_FLAG_ELLIPSIS)) != 0
504 }
516 }
505
517
506 /// The data for this entry, after resolving deltas if any.
518 /// The data for this entry, after resolving deltas if any.
507 pub fn rawdata(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
519 pub fn rawdata(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
508 let mut entry = self.clone();
520 let mut entry = self.clone();
509 let mut delta_chain = vec![];
521 let mut delta_chain = vec![];
510
522
511 // The meaning of `base_rev_or_base_of_delta_chain` depends on
523 // The meaning of `base_rev_or_base_of_delta_chain` depends on
512 // generaldelta. See the doc on `ENTRY_DELTA_BASE` in
524 // generaldelta. See the doc on `ENTRY_DELTA_BASE` in
513 // `mercurial/revlogutils/constants.py` and the code in
525 // `mercurial/revlogutils/constants.py` and the code in
514 // [_chaininfo] and in [index_deltachain].
526 // [_chaininfo] and in [index_deltachain].
515 let uses_generaldelta = self.revlog.index.uses_generaldelta();
527 let uses_generaldelta = self.revlog.index.uses_generaldelta();
516 while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain {
528 while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain {
517 let base_rev = if uses_generaldelta {
529 let base_rev = if uses_generaldelta {
518 base_rev
530 base_rev
519 } else {
531 } else {
520 entry.rev - 1
532 entry.rev - 1
521 };
533 };
522 delta_chain.push(entry);
534 delta_chain.push(entry);
523 entry = self.revlog.get_entry_internal(base_rev)?;
535 entry = self.revlog.get_entry_internal(base_rev)?;
524 }
536 }
525
537
526 let data = if delta_chain.is_empty() {
538 let data = if delta_chain.is_empty() {
527 entry.data_chunk()?
539 entry.data_chunk()?
528 } else {
540 } else {
529 Revlog::build_data_from_deltas(entry, &delta_chain)?.into()
541 Revlog::build_data_from_deltas(entry, &delta_chain)?.into()
530 };
542 };
531
543
532 Ok(data)
544 Ok(data)
533 }
545 }
534
546
535 fn check_data(
547 fn check_data(
536 &self,
548 &self,
537 data: Cow<'revlog, [u8]>,
549 data: Cow<'revlog, [u8]>,
538 ) -> Result<Cow<'revlog, [u8]>, HgError> {
550 ) -> Result<Cow<'revlog, [u8]>, HgError> {
539 if self.revlog.check_hash(
551 if self.revlog.check_hash(
540 self.p1,
552 self.p1,
541 self.p2,
553 self.p2,
542 self.hash.as_bytes(),
554 self.hash.as_bytes(),
543 &data,
555 &data,
544 ) {
556 ) {
545 Ok(data)
557 Ok(data)
546 } else {
558 } else {
547 if (self.flags & REVISION_FLAG_ELLIPSIS) != 0 {
559 if (self.flags & REVISION_FLAG_ELLIPSIS) != 0 {
548 return Err(HgError::unsupported(
560 return Err(HgError::unsupported(
549 "ellipsis revisions are not supported by rhg",
561 "ellipsis revisions are not supported by rhg",
550 ));
562 ));
551 }
563 }
552 Err(corrupted(format!(
564 Err(corrupted(format!(
553 "hash check failed for revision {}",
565 "hash check failed for revision {}",
554 self.rev
566 self.rev
555 )))
567 )))
556 }
568 }
557 }
569 }
558
570
559 pub fn data(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
571 pub fn data(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
560 let data = self.rawdata()?;
572 let data = self.rawdata()?;
561 if self.rev == NULL_REVISION {
573 if self.rev == NULL_REVISION {
562 return Ok(data);
574 return Ok(data);
563 }
575 }
564 if self.is_censored() {
576 if self.is_censored() {
565 return Err(HgError::CensoredNodeError);
577 return Err(HgError::CensoredNodeError);
566 }
578 }
567 self.check_data(data)
579 self.check_data(data)
568 }
580 }
569
581
570 /// Extract the data contained in the entry.
582 /// Extract the data contained in the entry.
571 /// This may be a delta. (See `is_delta`.)
583 /// This may be a delta. (See `is_delta`.)
572 fn data_chunk(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
584 fn data_chunk(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
573 if self.bytes.is_empty() {
585 if self.bytes.is_empty() {
574 return Ok(Cow::Borrowed(&[]));
586 return Ok(Cow::Borrowed(&[]));
575 }
587 }
576 match self.bytes[0] {
588 match self.bytes[0] {
577 // Revision data is the entirety of the entry, including this
589 // Revision data is the entirety of the entry, including this
578 // header.
590 // header.
579 b'\0' => Ok(Cow::Borrowed(self.bytes)),
591 b'\0' => Ok(Cow::Borrowed(self.bytes)),
580 // Raw revision data follows.
592 // Raw revision data follows.
581 b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
593 b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
582 // zlib (RFC 1950) data.
594 // zlib (RFC 1950) data.
583 b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
595 b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
584 // zstd data.
596 // zstd data.
585 b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
597 b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
586 // A proper new format should have had a repo/store requirement.
598 // A proper new format should have had a repo/store requirement.
587 format_type => Err(corrupted(format!(
599 format_type => Err(corrupted(format!(
588 "unknown compression header '{}'",
600 "unknown compression header '{}'",
589 format_type
601 format_type
590 ))),
602 ))),
591 }
603 }
592 }
604 }
593
605
594 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, HgError> {
606 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, HgError> {
595 let mut decoder = ZlibDecoder::new(self.bytes);
607 let mut decoder = ZlibDecoder::new(self.bytes);
596 if self.is_delta() {
608 if self.is_delta() {
597 let mut buf = Vec::with_capacity(self.compressed_len as usize);
609 let mut buf = Vec::with_capacity(self.compressed_len as usize);
598 decoder
610 decoder
599 .read_to_end(&mut buf)
611 .read_to_end(&mut buf)
600 .map_err(|e| corrupted(e.to_string()))?;
612 .map_err(|e| corrupted(e.to_string()))?;
601 Ok(buf)
613 Ok(buf)
602 } else {
614 } else {
603 let cap = self.uncompressed_len.max(0) as usize;
615 let cap = self.uncompressed_len.max(0) as usize;
604 let mut buf = vec![0; cap];
616 let mut buf = vec![0; cap];
605 decoder
617 decoder
606 .read_exact(&mut buf)
618 .read_exact(&mut buf)
607 .map_err(|e| corrupted(e.to_string()))?;
619 .map_err(|e| corrupted(e.to_string()))?;
608 Ok(buf)
620 Ok(buf)
609 }
621 }
610 }
622 }
611
623
612 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, HgError> {
624 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, HgError> {
613 let cap = self.uncompressed_len.max(0) as usize;
625 let cap = self.uncompressed_len.max(0) as usize;
614 if self.is_delta() {
626 if self.is_delta() {
615 // [cap] is usually an over-estimate of the space needed because
627 // [cap] is usually an over-estimate of the space needed because
616 // it's the length of delta-decoded data, but we're interested
628 // it's the length of delta-decoded data, but we're interested
617 // in the size of the delta.
629 // in the size of the delta.
618 // This means we have to [shrink_to_fit] to avoid holding on
630 // This means we have to [shrink_to_fit] to avoid holding on
619 // to a large chunk of memory, but it also means we must have a
631 // to a large chunk of memory, but it also means we must have a
620 // fallback branch, for the case when the delta is longer than
632 // fallback branch, for the case when the delta is longer than
621 // the original data (surprisingly, this does happen in practice)
633 // the original data (surprisingly, this does happen in practice)
622 let mut buf = Vec::with_capacity(cap);
634 let mut buf = Vec::with_capacity(cap);
623 match zstd_decompress_to_buffer(self.bytes, &mut buf) {
635 match zstd_decompress_to_buffer(self.bytes, &mut buf) {
624 Ok(_) => buf.shrink_to_fit(),
636 Ok(_) => buf.shrink_to_fit(),
625 Err(_) => {
637 Err(_) => {
626 buf.clear();
638 buf.clear();
627 zstd::stream::copy_decode(self.bytes, &mut buf)
639 zstd::stream::copy_decode(self.bytes, &mut buf)
628 .map_err(|e| corrupted(e.to_string()))?;
640 .map_err(|e| corrupted(e.to_string()))?;
629 }
641 }
630 };
642 };
631 Ok(buf)
643 Ok(buf)
632 } else {
644 } else {
633 let mut buf = Vec::with_capacity(cap);
645 let mut buf = Vec::with_capacity(cap);
634 let len = zstd_decompress_to_buffer(self.bytes, &mut buf)
646 let len = zstd_decompress_to_buffer(self.bytes, &mut buf)
635 .map_err(|e| corrupted(e.to_string()))?;
647 .map_err(|e| corrupted(e.to_string()))?;
636 if len != self.uncompressed_len as usize {
648 if len != self.uncompressed_len as usize {
637 Err(corrupted("uncompressed length does not match"))
649 Err(corrupted("uncompressed length does not match"))
638 } else {
650 } else {
639 Ok(buf)
651 Ok(buf)
640 }
652 }
641 }
653 }
642 }
654 }
643
655
644 /// Tell if the entry is a snapshot or a delta
656 /// Tell if the entry is a snapshot or a delta
645 /// (influences on decompression).
657 /// (influences on decompression).
646 fn is_delta(&self) -> bool {
658 fn is_delta(&self) -> bool {
647 self.base_rev_or_base_of_delta_chain.is_some()
659 self.base_rev_or_base_of_delta_chain.is_some()
648 }
660 }
649 }
661 }
650
662
651 /// Calculate the hash of a revision given its data and its parents.
663 /// Calculate the hash of a revision given its data and its parents.
652 fn hash(
664 fn hash(
653 data: &[u8],
665 data: &[u8],
654 p1_hash: &[u8],
666 p1_hash: &[u8],
655 p2_hash: &[u8],
667 p2_hash: &[u8],
656 ) -> [u8; NODE_BYTES_LENGTH] {
668 ) -> [u8; NODE_BYTES_LENGTH] {
657 let mut hasher = Sha1::new();
669 let mut hasher = Sha1::new();
658 let (a, b) = (p1_hash, p2_hash);
670 let (a, b) = (p1_hash, p2_hash);
659 if a > b {
671 if a > b {
660 hasher.update(b);
672 hasher.update(b);
661 hasher.update(a);
673 hasher.update(a);
662 } else {
674 } else {
663 hasher.update(a);
675 hasher.update(a);
664 hasher.update(b);
676 hasher.update(b);
665 }
677 }
666 hasher.update(data);
678 hasher.update(data);
667 *hasher.finalize().as_ref()
679 *hasher.finalize().as_ref()
668 }
680 }
669
681
670 #[cfg(test)]
682 #[cfg(test)]
671 mod tests {
683 mod tests {
672 use super::*;
684 use super::*;
673 use crate::index::{IndexEntryBuilder, INDEX_ENTRY_SIZE};
685 use crate::index::{IndexEntryBuilder, INDEX_ENTRY_SIZE};
674 use itertools::Itertools;
686 use itertools::Itertools;
675
687
676 #[test]
688 #[test]
677 fn test_empty() {
689 fn test_empty() {
678 let temp = tempfile::tempdir().unwrap();
690 let temp = tempfile::tempdir().unwrap();
679 let vfs = Vfs { base: temp.path() };
691 let vfs = Vfs { base: temp.path() };
680 std::fs::write(temp.path().join("foo.i"), b"").unwrap();
692 std::fs::write(temp.path().join("foo.i"), b"").unwrap();
681 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
693 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
682 assert!(revlog.is_empty());
694 assert!(revlog.is_empty());
683 assert_eq!(revlog.len(), 0);
695 assert_eq!(revlog.len(), 0);
684 assert!(revlog.get_entry(0).is_err());
696 assert!(revlog.get_entry(0).is_err());
685 assert!(!revlog.has_rev(0));
697 assert!(!revlog.has_rev(0));
686 assert_eq!(
698 assert_eq!(
687 revlog.rev_from_node(NULL_NODE.into()).unwrap(),
699 revlog.rev_from_node(NULL_NODE.into()).unwrap(),
688 NULL_REVISION
700 NULL_REVISION
689 );
701 );
690 let null_entry = revlog.get_entry(NULL_REVISION).ok().unwrap();
702 let null_entry = revlog.get_entry(NULL_REVISION).ok().unwrap();
691 assert_eq!(null_entry.revision(), NULL_REVISION);
703 assert_eq!(null_entry.revision(), NULL_REVISION);
692 assert!(null_entry.data().unwrap().is_empty());
704 assert!(null_entry.data().unwrap().is_empty());
693 }
705 }
694
706
695 #[test]
707 #[test]
696 fn test_inline() {
708 fn test_inline() {
697 let temp = tempfile::tempdir().unwrap();
709 let temp = tempfile::tempdir().unwrap();
698 let vfs = Vfs { base: temp.path() };
710 let vfs = Vfs { base: temp.path() };
699 let node0 = Node::from_hex("2ed2a3912a0b24502043eae84ee4b279c18b90dd")
711 let node0 = Node::from_hex("2ed2a3912a0b24502043eae84ee4b279c18b90dd")
700 .unwrap();
712 .unwrap();
701 let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
713 let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
702 .unwrap();
714 .unwrap();
703 let node2 = Node::from_hex("dd6ad206e907be60927b5a3117b97dffb2590582")
715 let node2 = Node::from_hex("dd6ad206e907be60927b5a3117b97dffb2590582")
704 .unwrap();
716 .unwrap();
705 let entry0_bytes = IndexEntryBuilder::new()
717 let entry0_bytes = IndexEntryBuilder::new()
706 .is_first(true)
718 .is_first(true)
707 .with_version(1)
719 .with_version(1)
708 .with_inline(true)
720 .with_inline(true)
709 .with_offset(INDEX_ENTRY_SIZE)
721 .with_offset(INDEX_ENTRY_SIZE)
710 .with_node(node0)
722 .with_node(node0)
711 .build();
723 .build();
712 let entry1_bytes = IndexEntryBuilder::new()
724 let entry1_bytes = IndexEntryBuilder::new()
713 .with_offset(INDEX_ENTRY_SIZE)
725 .with_offset(INDEX_ENTRY_SIZE)
714 .with_node(node1)
726 .with_node(node1)
715 .build();
727 .build();
716 let entry2_bytes = IndexEntryBuilder::new()
728 let entry2_bytes = IndexEntryBuilder::new()
717 .with_offset(INDEX_ENTRY_SIZE)
729 .with_offset(INDEX_ENTRY_SIZE)
718 .with_p1(0)
730 .with_p1(0)
719 .with_p2(1)
731 .with_p2(1)
720 .with_node(node2)
732 .with_node(node2)
721 .build();
733 .build();
722 let contents = vec![entry0_bytes, entry1_bytes, entry2_bytes]
734 let contents = vec![entry0_bytes, entry1_bytes, entry2_bytes]
723 .into_iter()
735 .into_iter()
724 .flatten()
736 .flatten()
725 .collect_vec();
737 .collect_vec();
726 std::fs::write(temp.path().join("foo.i"), contents).unwrap();
738 std::fs::write(temp.path().join("foo.i"), contents).unwrap();
727 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
739 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
728
740
729 let entry0 = revlog.get_entry(0).ok().unwrap();
741 let entry0 = revlog.get_entry(0).ok().unwrap();
730 assert_eq!(entry0.revision(), 0);
742 assert_eq!(entry0.revision(), 0);
731 assert_eq!(*entry0.node(), node0);
743 assert_eq!(*entry0.node(), node0);
732 assert!(!entry0.has_p1());
744 assert!(!entry0.has_p1());
733 assert_eq!(entry0.p1(), None);
745 assert_eq!(entry0.p1(), None);
734 assert_eq!(entry0.p2(), None);
746 assert_eq!(entry0.p2(), None);
735 let p1_entry = entry0.p1_entry().unwrap();
747 let p1_entry = entry0.p1_entry().unwrap();
736 assert!(p1_entry.is_none());
748 assert!(p1_entry.is_none());
737 let p2_entry = entry0.p2_entry().unwrap();
749 let p2_entry = entry0.p2_entry().unwrap();
738 assert!(p2_entry.is_none());
750 assert!(p2_entry.is_none());
739
751
740 let entry1 = revlog.get_entry(1).ok().unwrap();
752 let entry1 = revlog.get_entry(1).ok().unwrap();
741 assert_eq!(entry1.revision(), 1);
753 assert_eq!(entry1.revision(), 1);
742 assert_eq!(*entry1.node(), node1);
754 assert_eq!(*entry1.node(), node1);
743 assert!(!entry1.has_p1());
755 assert!(!entry1.has_p1());
744 assert_eq!(entry1.p1(), None);
756 assert_eq!(entry1.p1(), None);
745 assert_eq!(entry1.p2(), None);
757 assert_eq!(entry1.p2(), None);
746 let p1_entry = entry1.p1_entry().unwrap();
758 let p1_entry = entry1.p1_entry().unwrap();
747 assert!(p1_entry.is_none());
759 assert!(p1_entry.is_none());
748 let p2_entry = entry1.p2_entry().unwrap();
760 let p2_entry = entry1.p2_entry().unwrap();
749 assert!(p2_entry.is_none());
761 assert!(p2_entry.is_none());
750
762
751 let entry2 = revlog.get_entry(2).ok().unwrap();
763 let entry2 = revlog.get_entry(2).ok().unwrap();
752 assert_eq!(entry2.revision(), 2);
764 assert_eq!(entry2.revision(), 2);
753 assert_eq!(*entry2.node(), node2);
765 assert_eq!(*entry2.node(), node2);
754 assert!(entry2.has_p1());
766 assert!(entry2.has_p1());
755 assert_eq!(entry2.p1(), Some(0));
767 assert_eq!(entry2.p1(), Some(0));
756 assert_eq!(entry2.p2(), Some(1));
768 assert_eq!(entry2.p2(), Some(1));
757 let p1_entry = entry2.p1_entry().unwrap();
769 let p1_entry = entry2.p1_entry().unwrap();
758 assert!(p1_entry.is_some());
770 assert!(p1_entry.is_some());
759 assert_eq!(p1_entry.unwrap().revision(), 0);
771 assert_eq!(p1_entry.unwrap().revision(), 0);
760 let p2_entry = entry2.p2_entry().unwrap();
772 let p2_entry = entry2.p2_entry().unwrap();
761 assert!(p2_entry.is_some());
773 assert!(p2_entry.is_some());
762 assert_eq!(p2_entry.unwrap().revision(), 1);
774 assert_eq!(p2_entry.unwrap().revision(), 1);
763 }
775 }
764
776
765 #[test]
777 #[test]
766 fn test_nodemap() {
778 fn test_nodemap() {
767 let temp = tempfile::tempdir().unwrap();
779 let temp = tempfile::tempdir().unwrap();
768 let vfs = Vfs { base: temp.path() };
780 let vfs = Vfs { base: temp.path() };
769
781
770 // building a revlog with a forced Node starting with zeros
782 // building a revlog with a forced Node starting with zeros
771 // This is a corruption, but it does not preclude using the nodemap
783 // This is a corruption, but it does not preclude using the nodemap
772 // if we don't try and access the data
784 // if we don't try and access the data
773 let node0 = Node::from_hex("00d2a3912a0b24502043eae84ee4b279c18b90dd")
785 let node0 = Node::from_hex("00d2a3912a0b24502043eae84ee4b279c18b90dd")
774 .unwrap();
786 .unwrap();
775 let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
787 let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
776 .unwrap();
788 .unwrap();
777 let entry0_bytes = IndexEntryBuilder::new()
789 let entry0_bytes = IndexEntryBuilder::new()
778 .is_first(true)
790 .is_first(true)
779 .with_version(1)
791 .with_version(1)
780 .with_inline(true)
792 .with_inline(true)
781 .with_offset(INDEX_ENTRY_SIZE)
793 .with_offset(INDEX_ENTRY_SIZE)
782 .with_node(node0)
794 .with_node(node0)
783 .build();
795 .build();
784 let entry1_bytes = IndexEntryBuilder::new()
796 let entry1_bytes = IndexEntryBuilder::new()
785 .with_offset(INDEX_ENTRY_SIZE)
797 .with_offset(INDEX_ENTRY_SIZE)
786 .with_node(node1)
798 .with_node(node1)
787 .build();
799 .build();
788 let contents = vec![entry0_bytes, entry1_bytes]
800 let contents = vec![entry0_bytes, entry1_bytes]
789 .into_iter()
801 .into_iter()
790 .flatten()
802 .flatten()
791 .collect_vec();
803 .collect_vec();
792 std::fs::write(temp.path().join("foo.i"), contents).unwrap();
804 std::fs::write(temp.path().join("foo.i"), contents).unwrap();
793 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
805
806 let mut idx = nodemap::tests::TestNtIndex::new();
807 idx.insert_node(0, node0).unwrap();
808 idx.insert_node(1, node1).unwrap();
809
810 let revlog =
811 Revlog::open_gen(&vfs, "foo.i", None, true, Some(idx.nt)).unwrap();
794
812
795 // accessing the data shows the corruption
813 // accessing the data shows the corruption
796 revlog.get_entry(0).unwrap().data().unwrap_err();
814 revlog.get_entry(0).unwrap().data().unwrap_err();
797
815
798 assert_eq!(revlog.rev_from_node(NULL_NODE.into()).unwrap(), -1);
816 assert_eq!(revlog.rev_from_node(NULL_NODE.into()).unwrap(), -1);
799 assert_eq!(revlog.rev_from_node(node0.into()).unwrap(), 0);
817 assert_eq!(revlog.rev_from_node(node0.into()).unwrap(), 0);
800 assert_eq!(revlog.rev_from_node(node1.into()).unwrap(), 1);
818 assert_eq!(revlog.rev_from_node(node1.into()).unwrap(), 1);
801 assert_eq!(
819 assert_eq!(
802 revlog
820 revlog
803 .rev_from_node(NodePrefix::from_hex("000").unwrap())
821 .rev_from_node(NodePrefix::from_hex("000").unwrap())
804 .unwrap(),
822 .unwrap(),
805 -1
823 -1
806 );
824 );
807 assert_eq!(
825 assert_eq!(
808 revlog
826 revlog
809 .rev_from_node(NodePrefix::from_hex("b00").unwrap())
827 .rev_from_node(NodePrefix::from_hex("b00").unwrap())
810 .unwrap(),
828 .unwrap(),
811 1
829 1
812 );
830 );
813 // RevlogError does not implement PartialEq
831 // RevlogError does not implement PartialEq
814 // (ultimately because io::Error does not)
832 // (ultimately because io::Error does not)
815 match revlog
833 match revlog
816 .rev_from_node(NodePrefix::from_hex("00").unwrap())
834 .rev_from_node(NodePrefix::from_hex("00").unwrap())
817 .expect_err("Expected to give AmbiguousPrefix error")
835 .expect_err("Expected to give AmbiguousPrefix error")
818 {
836 {
819 RevlogError::AmbiguousPrefix => (),
837 RevlogError::AmbiguousPrefix => (),
820 e => {
838 e => {
821 panic!("Got another error than AmbiguousPrefix: {:?}", e);
839 panic!("Got another error than AmbiguousPrefix: {:?}", e);
822 }
840 }
823 };
841 };
824 }
842 }
825 }
843 }
@@ -1,1067 +1,1074 b''
1 // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net>
1 // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net>
2 // and Mercurial contributors
2 // and Mercurial contributors
3 //
3 //
4 // This software may be used and distributed according to the terms of the
4 // This software may be used and distributed according to the terms of the
5 // GNU General Public License version 2 or any later version.
5 // GNU General Public License version 2 or any later version.
6 //! Indexing facilities for fast retrieval of `Revision` from `Node`
6 //! Indexing facilities for fast retrieval of `Revision` from `Node`
7 //!
7 //!
8 //! This provides a variation on the 16-ary radix tree that is
8 //! This provides a variation on the 16-ary radix tree that is
9 //! provided as "nodetree" in revlog.c, ready for append-only persistence
9 //! provided as "nodetree" in revlog.c, ready for append-only persistence
10 //! on disk.
10 //! on disk.
11 //!
11 //!
12 //! Following existing implicit conventions, the "nodemap" terminology
12 //! Following existing implicit conventions, the "nodemap" terminology
13 //! is used in a more abstract context.
13 //! is used in a more abstract context.
14
14
15 use super::{
15 use super::{
16 node::NULL_NODE, Node, NodePrefix, Revision, RevlogIndex, NULL_REVISION,
16 node::NULL_NODE, Node, NodePrefix, Revision, RevlogIndex, NULL_REVISION,
17 };
17 };
18
18
19 use bytes_cast::{unaligned, BytesCast};
19 use bytes_cast::{unaligned, BytesCast};
20 use std::cmp::max;
20 use std::cmp::max;
21 use std::fmt;
21 use std::fmt;
22 use std::mem::{self, align_of, size_of};
22 use std::mem::{self, align_of, size_of};
23 use std::ops::Deref;
23 use std::ops::Deref;
24 use std::ops::Index;
24 use std::ops::Index;
25
25
26 #[derive(Debug, PartialEq)]
26 #[derive(Debug, PartialEq)]
27 pub enum NodeMapError {
27 pub enum NodeMapError {
28 /// A `NodePrefix` matches several [`Revision`]s.
28 /// A `NodePrefix` matches several [`Revision`]s.
29 ///
29 ///
30 /// This can be returned by methods meant for (at most) one match.
30 /// This can be returned by methods meant for (at most) one match.
31 MultipleResults,
31 MultipleResults,
32 /// A `Revision` stored in the nodemap could not be found in the index
32 /// A `Revision` stored in the nodemap could not be found in the index
33 RevisionNotInIndex(Revision),
33 RevisionNotInIndex(Revision),
34 }
34 }
35
35
36 /// Mapping system from Mercurial nodes to revision numbers.
36 /// Mapping system from Mercurial nodes to revision numbers.
37 ///
37 ///
38 /// ## `RevlogIndex` and `NodeMap`
38 /// ## `RevlogIndex` and `NodeMap`
39 ///
39 ///
40 /// One way to think about their relationship is that
40 /// One way to think about their relationship is that
41 /// the `NodeMap` is a prefix-oriented reverse index of the [`Node`]
41 /// the `NodeMap` is a prefix-oriented reverse index of the [`Node`]
42 /// information carried by a [`RevlogIndex`].
42 /// information carried by a [`RevlogIndex`].
43 ///
43 ///
44 /// Many of the methods in this trait take a `RevlogIndex` argument
44 /// Many of the methods in this trait take a `RevlogIndex` argument
45 /// which is used for validation of their results. This index must naturally
45 /// which is used for validation of their results. This index must naturally
46 /// be the one the `NodeMap` is about, and it must be consistent.
46 /// be the one the `NodeMap` is about, and it must be consistent.
47 ///
47 ///
48 /// Notably, the `NodeMap` must not store
48 /// Notably, the `NodeMap` must not store
49 /// information about more `Revision` values than there are in the index.
49 /// information about more `Revision` values than there are in the index.
50 /// In these methods, an encountered `Revision` is not in the index, a
50 /// In these methods, an encountered `Revision` is not in the index, a
51 /// [RevisionNotInIndex](NodeMapError) error is returned.
51 /// [RevisionNotInIndex](NodeMapError) error is returned.
52 ///
52 ///
53 /// In insert operations, the rule is thus that the `NodeMap` must always
53 /// In insert operations, the rule is thus that the `NodeMap` must always
54 /// be updated after the `RevlogIndex` it is about.
54 /// be updated after the `RevlogIndex` it is about.
55 pub trait NodeMap {
55 pub trait NodeMap {
56 /// Find the unique `Revision` having the given `Node`
56 /// Find the unique `Revision` having the given `Node`
57 ///
57 ///
58 /// If no Revision matches the given `Node`, `Ok(None)` is returned.
58 /// If no Revision matches the given `Node`, `Ok(None)` is returned.
59 fn find_node(
59 fn find_node(
60 &self,
60 &self,
61 index: &impl RevlogIndex,
61 index: &impl RevlogIndex,
62 node: &Node,
62 node: &Node,
63 ) -> Result<Option<Revision>, NodeMapError> {
63 ) -> Result<Option<Revision>, NodeMapError> {
64 self.find_bin(index, node.into())
64 self.find_bin(index, node.into())
65 }
65 }
66
66
67 /// Find the unique Revision whose `Node` starts with a given binary prefix
67 /// Find the unique Revision whose `Node` starts with a given binary prefix
68 ///
68 ///
69 /// If no Revision matches the given prefix, `Ok(None)` is returned.
69 /// If no Revision matches the given prefix, `Ok(None)` is returned.
70 ///
70 ///
71 /// If several Revisions match the given prefix, a
71 /// If several Revisions match the given prefix, a
72 /// [MultipleResults](NodeMapError) error is returned.
72 /// [MultipleResults](NodeMapError) error is returned.
73 fn find_bin(
73 fn find_bin(
74 &self,
74 &self,
75 idx: &impl RevlogIndex,
75 idx: &impl RevlogIndex,
76 prefix: NodePrefix,
76 prefix: NodePrefix,
77 ) -> Result<Option<Revision>, NodeMapError>;
77 ) -> Result<Option<Revision>, NodeMapError>;
78
78
79 /// Give the size of the shortest node prefix that determines
79 /// Give the size of the shortest node prefix that determines
80 /// the revision uniquely.
80 /// the revision uniquely.
81 ///
81 ///
82 /// From a binary node prefix, if it is matched in the node map, this
82 /// From a binary node prefix, if it is matched in the node map, this
83 /// returns the number of hexadecimal digits that would had sufficed
83 /// returns the number of hexadecimal digits that would had sufficed
84 /// to find the revision uniquely.
84 /// to find the revision uniquely.
85 ///
85 ///
86 /// Returns `None` if no [`Revision`] could be found for the prefix.
86 /// Returns `None` if no [`Revision`] could be found for the prefix.
87 ///
87 ///
88 /// If several Revisions match the given prefix, a
88 /// If several Revisions match the given prefix, a
89 /// [MultipleResults](NodeMapError) error is returned.
89 /// [MultipleResults](NodeMapError) error is returned.
90 fn unique_prefix_len_bin(
90 fn unique_prefix_len_bin(
91 &self,
91 &self,
92 idx: &impl RevlogIndex,
92 idx: &impl RevlogIndex,
93 node_prefix: NodePrefix,
93 node_prefix: NodePrefix,
94 ) -> Result<Option<usize>, NodeMapError>;
94 ) -> Result<Option<usize>, NodeMapError>;
95
95
96 /// Same as [unique_prefix_len_bin](Self::unique_prefix_len_bin), with
96 /// Same as [unique_prefix_len_bin](Self::unique_prefix_len_bin), with
97 /// a full [`Node`] as input
97 /// a full [`Node`] as input
98 fn unique_prefix_len_node(
98 fn unique_prefix_len_node(
99 &self,
99 &self,
100 idx: &impl RevlogIndex,
100 idx: &impl RevlogIndex,
101 node: &Node,
101 node: &Node,
102 ) -> Result<Option<usize>, NodeMapError> {
102 ) -> Result<Option<usize>, NodeMapError> {
103 self.unique_prefix_len_bin(idx, node.into())
103 self.unique_prefix_len_bin(idx, node.into())
104 }
104 }
105 }
105 }
106
106
107 pub trait MutableNodeMap: NodeMap {
107 pub trait MutableNodeMap: NodeMap {
108 fn insert<I: RevlogIndex>(
108 fn insert<I: RevlogIndex>(
109 &mut self,
109 &mut self,
110 index: &I,
110 index: &I,
111 node: &Node,
111 node: &Node,
112 rev: Revision,
112 rev: Revision,
113 ) -> Result<(), NodeMapError>;
113 ) -> Result<(), NodeMapError>;
114 }
114 }
115
115
116 /// Low level NodeTree [`Block`] elements
116 /// Low level NodeTree [`Block`] elements
117 ///
117 ///
118 /// These are exactly as for instance on persistent storage.
118 /// These are exactly as for instance on persistent storage.
119 type RawElement = unaligned::I32Be;
119 type RawElement = unaligned::I32Be;
120
120
121 /// High level representation of values in NodeTree
121 /// High level representation of values in NodeTree
122 /// [`Blocks`](struct.Block.html)
122 /// [`Blocks`](struct.Block.html)
123 ///
123 ///
124 /// This is the high level representation that most algorithms should
124 /// This is the high level representation that most algorithms should
125 /// use.
125 /// use.
126 #[derive(Clone, Debug, Eq, PartialEq)]
126 #[derive(Clone, Debug, Eq, PartialEq)]
127 enum Element {
127 enum Element {
128 Rev(Revision),
128 Rev(Revision),
129 Block(usize),
129 Block(usize),
130 None,
130 None,
131 }
131 }
132
132
133 impl From<RawElement> for Element {
133 impl From<RawElement> for Element {
134 /// Conversion from low level representation, after endianness conversion.
134 /// Conversion from low level representation, after endianness conversion.
135 ///
135 ///
136 /// See [`Block`](struct.Block.html) for explanation about the encoding.
136 /// See [`Block`](struct.Block.html) for explanation about the encoding.
137 fn from(raw: RawElement) -> Element {
137 fn from(raw: RawElement) -> Element {
138 let int = raw.get();
138 let int = raw.get();
139 if int >= 0 {
139 if int >= 0 {
140 Element::Block(int as usize)
140 Element::Block(int as usize)
141 } else if int == -1 {
141 } else if int == -1 {
142 Element::None
142 Element::None
143 } else {
143 } else {
144 Element::Rev(-int - 2)
144 Element::Rev(-int - 2)
145 }
145 }
146 }
146 }
147 }
147 }
148
148
149 impl From<Element> for RawElement {
149 impl From<Element> for RawElement {
150 fn from(element: Element) -> RawElement {
150 fn from(element: Element) -> RawElement {
151 RawElement::from(match element {
151 RawElement::from(match element {
152 Element::None => 0,
152 Element::None => 0,
153 Element::Block(i) => i as i32,
153 Element::Block(i) => i as i32,
154 Element::Rev(rev) => -rev - 2,
154 Element::Rev(rev) => -rev - 2,
155 })
155 })
156 }
156 }
157 }
157 }
158
158
159 const ELEMENTS_PER_BLOCK: usize = 16; // number of different values in a nybble
159 const ELEMENTS_PER_BLOCK: usize = 16; // number of different values in a nybble
160
160
161 /// A logical block of the [`NodeTree`], packed with a fixed size.
161 /// A logical block of the [`NodeTree`], packed with a fixed size.
162 ///
162 ///
163 /// These are always used in container types implementing `Index<Block>`,
163 /// These are always used in container types implementing `Index<Block>`,
164 /// such as `&Block`
164 /// such as `&Block`
165 ///
165 ///
166 /// As an array of integers, its ith element encodes that the
166 /// As an array of integers, its ith element encodes that the
167 /// ith potential edge from the block, representing the ith hexadecimal digit
167 /// ith potential edge from the block, representing the ith hexadecimal digit
168 /// (nybble) `i` is either:
168 /// (nybble) `i` is either:
169 ///
169 ///
170 /// - absent (value -1)
170 /// - absent (value -1)
171 /// - another `Block` in the same indexable container (value β‰₯ 0)
171 /// - another `Block` in the same indexable container (value β‰₯ 0)
172 /// - a [`Revision`] leaf (value ≀ -2)
172 /// - a [`Revision`] leaf (value ≀ -2)
173 ///
173 ///
174 /// Endianness has to be fixed for consistency on shared storage across
174 /// Endianness has to be fixed for consistency on shared storage across
175 /// different architectures.
175 /// different architectures.
176 ///
176 ///
177 /// A key difference with the C `nodetree` is that we need to be
177 /// A key difference with the C `nodetree` is that we need to be
178 /// able to represent the [`Block`] at index 0, hence -1 is the empty marker
178 /// able to represent the [`Block`] at index 0, hence -1 is the empty marker
179 /// rather than 0 and the [`Revision`] range upper limit of -2 instead of -1.
179 /// rather than 0 and the [`Revision`] range upper limit of -2 instead of -1.
180 ///
180 ///
181 /// Another related difference is that `NULL_REVISION` (-1) is not
181 /// Another related difference is that `NULL_REVISION` (-1) is not
182 /// represented at all, because we want an immutable empty nodetree
182 /// represented at all, because we want an immutable empty nodetree
183 /// to be valid.
183 /// to be valid.
184 #[derive(Copy, Clone, BytesCast, PartialEq)]
184 #[derive(Copy, Clone, BytesCast, PartialEq)]
185 #[repr(transparent)]
185 #[repr(transparent)]
186 pub struct Block([RawElement; ELEMENTS_PER_BLOCK]);
186 pub struct Block([RawElement; ELEMENTS_PER_BLOCK]);
187
187
188 impl Block {
188 impl Block {
189 fn new() -> Self {
189 fn new() -> Self {
190 let absent_node = RawElement::from(-1);
190 let absent_node = RawElement::from(-1);
191 Block([absent_node; ELEMENTS_PER_BLOCK])
191 Block([absent_node; ELEMENTS_PER_BLOCK])
192 }
192 }
193
193
194 fn get(&self, nybble: u8) -> Element {
194 fn get(&self, nybble: u8) -> Element {
195 self.0[nybble as usize].into()
195 self.0[nybble as usize].into()
196 }
196 }
197
197
198 fn set(&mut self, nybble: u8, element: Element) {
198 fn set(&mut self, nybble: u8, element: Element) {
199 self.0[nybble as usize] = element.into()
199 self.0[nybble as usize] = element.into()
200 }
200 }
201 }
201 }
202
202
203 impl fmt::Debug for Block {
203 impl fmt::Debug for Block {
204 /// sparse representation for testing and debugging purposes
204 /// sparse representation for testing and debugging purposes
205 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
205 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
206 f.debug_map()
206 f.debug_map()
207 .entries((0..16).filter_map(|i| match self.get(i) {
207 .entries((0..16).filter_map(|i| match self.get(i) {
208 Element::None => None,
208 Element::None => None,
209 element => Some((i, element)),
209 element => Some((i, element)),
210 }))
210 }))
211 .finish()
211 .finish()
212 }
212 }
213 }
213 }
214
214
215 /// A mutable 16-radix tree with the root block logically at the end
215 /// A mutable 16-radix tree with the root block logically at the end
216 ///
216 ///
217 /// Because of the append only nature of our node trees, we need to
217 /// Because of the append only nature of our node trees, we need to
218 /// keep the original untouched and store new blocks separately.
218 /// keep the original untouched and store new blocks separately.
219 ///
219 ///
220 /// The mutable root [`Block`] is kept apart so that we don't have to rebump
220 /// The mutable root [`Block`] is kept apart so that we don't have to rebump
221 /// it on each insertion.
221 /// it on each insertion.
222 pub struct NodeTree {
222 pub struct NodeTree {
223 readonly: Box<dyn Deref<Target = [Block]> + Send>,
223 readonly: Box<dyn Deref<Target = [Block]> + Send>,
224 growable: Vec<Block>,
224 growable: Vec<Block>,
225 root: Block,
225 root: Block,
226 masked_inner_blocks: usize,
226 masked_inner_blocks: usize,
227 }
227 }
228
228
229 impl Index<usize> for NodeTree {
229 impl Index<usize> for NodeTree {
230 type Output = Block;
230 type Output = Block;
231
231
232 fn index(&self, i: usize) -> &Block {
232 fn index(&self, i: usize) -> &Block {
233 let ro_len = self.readonly.len();
233 let ro_len = self.readonly.len();
234 if i < ro_len {
234 if i < ro_len {
235 &self.readonly[i]
235 &self.readonly[i]
236 } else if i == ro_len + self.growable.len() {
236 } else if i == ro_len + self.growable.len() {
237 &self.root
237 &self.root
238 } else {
238 } else {
239 &self.growable[i - ro_len]
239 &self.growable[i - ro_len]
240 }
240 }
241 }
241 }
242 }
242 }
243
243
244 /// Return `None` unless the [`Node`] for `rev` has given prefix in `idx`.
244 /// Return `None` unless the [`Node`] for `rev` has given prefix in `idx`.
245 fn has_prefix_or_none(
245 fn has_prefix_or_none(
246 idx: &impl RevlogIndex,
246 idx: &impl RevlogIndex,
247 prefix: NodePrefix,
247 prefix: NodePrefix,
248 rev: Revision,
248 rev: Revision,
249 ) -> Result<Option<Revision>, NodeMapError> {
249 ) -> Result<Option<Revision>, NodeMapError> {
250 idx.node(rev)
250 idx.node(rev)
251 .ok_or(NodeMapError::RevisionNotInIndex(rev))
251 .ok_or(NodeMapError::RevisionNotInIndex(rev))
252 .map(|node| {
252 .map(|node| {
253 if prefix.is_prefix_of(node) {
253 if prefix.is_prefix_of(node) {
254 Some(rev)
254 Some(rev)
255 } else {
255 } else {
256 None
256 None
257 }
257 }
258 })
258 })
259 }
259 }
260
260
261 /// validate that the candidate's node starts indeed with given prefix,
261 /// validate that the candidate's node starts indeed with given prefix,
262 /// and treat ambiguities related to [`NULL_REVISION`].
262 /// and treat ambiguities related to [`NULL_REVISION`].
263 ///
263 ///
264 /// From the data in the NodeTree, one can only conclude that some
264 /// From the data in the NodeTree, one can only conclude that some
265 /// revision is the only one for a *subprefix* of the one being looked up.
265 /// revision is the only one for a *subprefix* of the one being looked up.
266 fn validate_candidate(
266 fn validate_candidate(
267 idx: &impl RevlogIndex,
267 idx: &impl RevlogIndex,
268 prefix: NodePrefix,
268 prefix: NodePrefix,
269 candidate: (Option<Revision>, usize),
269 candidate: (Option<Revision>, usize),
270 ) -> Result<(Option<Revision>, usize), NodeMapError> {
270 ) -> Result<(Option<Revision>, usize), NodeMapError> {
271 let (rev, steps) = candidate;
271 let (rev, steps) = candidate;
272 if let Some(nz_nybble) = prefix.first_different_nybble(&NULL_NODE) {
272 if let Some(nz_nybble) = prefix.first_different_nybble(&NULL_NODE) {
273 rev.map_or(Ok((None, steps)), |r| {
273 rev.map_or(Ok((None, steps)), |r| {
274 has_prefix_or_none(idx, prefix, r)
274 has_prefix_or_none(idx, prefix, r)
275 .map(|opt| (opt, max(steps, nz_nybble + 1)))
275 .map(|opt| (opt, max(steps, nz_nybble + 1)))
276 })
276 })
277 } else {
277 } else {
278 // the prefix is only made of zeros; NULL_REVISION always matches it
278 // the prefix is only made of zeros; NULL_REVISION always matches it
279 // and any other *valid* result is an ambiguity
279 // and any other *valid* result is an ambiguity
280 match rev {
280 match rev {
281 None => Ok((Some(NULL_REVISION), steps + 1)),
281 None => Ok((Some(NULL_REVISION), steps + 1)),
282 Some(r) => match has_prefix_or_none(idx, prefix, r)? {
282 Some(r) => match has_prefix_or_none(idx, prefix, r)? {
283 None => Ok((Some(NULL_REVISION), steps + 1)),
283 None => Ok((Some(NULL_REVISION), steps + 1)),
284 _ => Err(NodeMapError::MultipleResults),
284 _ => Err(NodeMapError::MultipleResults),
285 },
285 },
286 }
286 }
287 }
287 }
288 }
288 }
289
289
290 impl NodeTree {
290 impl NodeTree {
291 /// Initiate a NodeTree from an immutable slice-like of `Block`
291 /// Initiate a NodeTree from an immutable slice-like of `Block`
292 ///
292 ///
293 /// We keep `readonly` and clone its root block if it isn't empty.
293 /// We keep `readonly` and clone its root block if it isn't empty.
294 fn new(readonly: Box<dyn Deref<Target = [Block]> + Send>) -> Self {
294 fn new(readonly: Box<dyn Deref<Target = [Block]> + Send>) -> Self {
295 let root = readonly.last().cloned().unwrap_or_else(Block::new);
295 let root = readonly.last().cloned().unwrap_or_else(Block::new);
296 NodeTree {
296 NodeTree {
297 readonly,
297 readonly,
298 growable: Vec::new(),
298 growable: Vec::new(),
299 root,
299 root,
300 masked_inner_blocks: 0,
300 masked_inner_blocks: 0,
301 }
301 }
302 }
302 }
303
303
304 /// Create from an opaque bunch of bytes
304 /// Create from an opaque bunch of bytes
305 ///
305 ///
306 /// The created [`NodeTreeBytes`] from `bytes`,
306 /// The created [`NodeTreeBytes`] from `bytes`,
307 /// of which exactly `amount` bytes are used.
307 /// of which exactly `amount` bytes are used.
308 ///
308 ///
309 /// - `buffer` could be derived from `PyBuffer` and `Mmap` objects.
309 /// - `buffer` could be derived from `PyBuffer` and `Mmap` objects.
310 /// - `amount` is expressed in bytes, and is not automatically derived from
310 /// - `amount` is expressed in bytes, and is not automatically derived from
311 /// `bytes`, so that a caller that manages them atomically can perform
311 /// `bytes`, so that a caller that manages them atomically can perform
312 /// temporary disk serializations and still rollback easily if needed.
312 /// temporary disk serializations and still rollback easily if needed.
313 /// First use-case for this would be to support Mercurial shell hooks.
313 /// First use-case for this would be to support Mercurial shell hooks.
314 ///
314 ///
315 /// panics if `buffer` is smaller than `amount`
315 /// panics if `buffer` is smaller than `amount`
316 pub fn load_bytes(
316 pub fn load_bytes(
317 bytes: Box<dyn Deref<Target = [u8]> + Send>,
317 bytes: Box<dyn Deref<Target = [u8]> + Send>,
318 amount: usize,
318 amount: usize,
319 ) -> Self {
319 ) -> Self {
320 NodeTree::new(Box::new(NodeTreeBytes::new(bytes, amount)))
320 NodeTree::new(Box::new(NodeTreeBytes::new(bytes, amount)))
321 }
321 }
322
322
323 /// Retrieve added [`Block`]s and the original immutable data
323 /// Retrieve added [`Block`]s and the original immutable data
324 pub fn into_readonly_and_added(
324 pub fn into_readonly_and_added(
325 self,
325 self,
326 ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<Block>) {
326 ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<Block>) {
327 let mut vec = self.growable;
327 let mut vec = self.growable;
328 let readonly = self.readonly;
328 let readonly = self.readonly;
329 if readonly.last() != Some(&self.root) {
329 if readonly.last() != Some(&self.root) {
330 vec.push(self.root);
330 vec.push(self.root);
331 }
331 }
332 (readonly, vec)
332 (readonly, vec)
333 }
333 }
334
334
335 /// Retrieve added [`Block]s as bytes, ready to be written to persistent
335 /// Retrieve added [`Block]s as bytes, ready to be written to persistent
336 /// storage
336 /// storage
337 pub fn into_readonly_and_added_bytes(
337 pub fn into_readonly_and_added_bytes(
338 self,
338 self,
339 ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<u8>) {
339 ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<u8>) {
340 let (readonly, vec) = self.into_readonly_and_added();
340 let (readonly, vec) = self.into_readonly_and_added();
341 // Prevent running `v`'s destructor so we are in complete control
341 // Prevent running `v`'s destructor so we are in complete control
342 // of the allocation.
342 // of the allocation.
343 let vec = mem::ManuallyDrop::new(vec);
343 let vec = mem::ManuallyDrop::new(vec);
344
344
345 // Transmute the `Vec<Block>` to a `Vec<u8>`. Blocks are contiguous
345 // Transmute the `Vec<Block>` to a `Vec<u8>`. Blocks are contiguous
346 // bytes, so this is perfectly safe.
346 // bytes, so this is perfectly safe.
347 let bytes = unsafe {
347 let bytes = unsafe {
348 // Check for compatible allocation layout.
348 // Check for compatible allocation layout.
349 // (Optimized away by constant-folding + dead code elimination.)
349 // (Optimized away by constant-folding + dead code elimination.)
350 assert_eq!(size_of::<Block>(), 64);
350 assert_eq!(size_of::<Block>(), 64);
351 assert_eq!(align_of::<Block>(), 1);
351 assert_eq!(align_of::<Block>(), 1);
352
352
353 // /!\ Any use of `vec` after this is use-after-free.
353 // /!\ Any use of `vec` after this is use-after-free.
354 // TODO: use `into_raw_parts` once stabilized
354 // TODO: use `into_raw_parts` once stabilized
355 Vec::from_raw_parts(
355 Vec::from_raw_parts(
356 vec.as_ptr() as *mut u8,
356 vec.as_ptr() as *mut u8,
357 vec.len() * size_of::<Block>(),
357 vec.len() * size_of::<Block>(),
358 vec.capacity() * size_of::<Block>(),
358 vec.capacity() * size_of::<Block>(),
359 )
359 )
360 };
360 };
361 (readonly, bytes)
361 (readonly, bytes)
362 }
362 }
363
363
364 /// Total number of blocks
364 /// Total number of blocks
365 fn len(&self) -> usize {
365 fn len(&self) -> usize {
366 self.readonly.len() + self.growable.len() + 1
366 self.readonly.len() + self.growable.len() + 1
367 }
367 }
368
368
369 /// Implemented for completeness
369 /// Implemented for completeness
370 ///
370 ///
371 /// A `NodeTree` always has at least the mutable root block.
371 /// A `NodeTree` always has at least the mutable root block.
372 #[allow(dead_code)]
372 #[allow(dead_code)]
373 fn is_empty(&self) -> bool {
373 fn is_empty(&self) -> bool {
374 false
374 false
375 }
375 }
376
376
377 /// Main working method for `NodeTree` searches
377 /// Main working method for `NodeTree` searches
378 ///
378 ///
379 /// The first returned value is the result of analysing `NodeTree` data
379 /// The first returned value is the result of analysing `NodeTree` data
380 /// *alone*: whereas `None` guarantees that the given prefix is absent
380 /// *alone*: whereas `None` guarantees that the given prefix is absent
381 /// from the [`NodeTree`] data (but still could match [`NULL_NODE`]), with
381 /// from the [`NodeTree`] data (but still could match [`NULL_NODE`]), with
382 /// `Some(rev)`, it is to be understood that `rev` is the unique
382 /// `Some(rev)`, it is to be understood that `rev` is the unique
383 /// [`Revision`] that could match the prefix. Actually, all that can
383 /// [`Revision`] that could match the prefix. Actually, all that can
384 /// be inferred from
384 /// be inferred from
385 /// the `NodeTree` data is that `rev` is the revision with the longest
385 /// the `NodeTree` data is that `rev` is the revision with the longest
386 /// common node prefix with the given prefix.
386 /// common node prefix with the given prefix.
387 ///
387 ///
388 /// The second returned value is the size of the smallest subprefix
388 /// The second returned value is the size of the smallest subprefix
389 /// of `prefix` that would give the same result, i.e. not the
389 /// of `prefix` that would give the same result, i.e. not the
390 /// [MultipleResults](NodeMapError) error variant (again, using only the
390 /// [MultipleResults](NodeMapError) error variant (again, using only the
391 /// data of the [`NodeTree`]).
391 /// data of the [`NodeTree`]).
392 fn lookup(
392 fn lookup(
393 &self,
393 &self,
394 prefix: NodePrefix,
394 prefix: NodePrefix,
395 ) -> Result<(Option<Revision>, usize), NodeMapError> {
395 ) -> Result<(Option<Revision>, usize), NodeMapError> {
396 for (i, visit_item) in self.visit(prefix).enumerate() {
396 for (i, visit_item) in self.visit(prefix).enumerate() {
397 if let Some(opt) = visit_item.final_revision() {
397 if let Some(opt) = visit_item.final_revision() {
398 return Ok((opt, i + 1));
398 return Ok((opt, i + 1));
399 }
399 }
400 }
400 }
401 Err(NodeMapError::MultipleResults)
401 Err(NodeMapError::MultipleResults)
402 }
402 }
403
403
404 fn visit(&self, prefix: NodePrefix) -> NodeTreeVisitor {
404 fn visit(&self, prefix: NodePrefix) -> NodeTreeVisitor {
405 NodeTreeVisitor {
405 NodeTreeVisitor {
406 nt: self,
406 nt: self,
407 prefix,
407 prefix,
408 visit: self.len() - 1,
408 visit: self.len() - 1,
409 nybble_idx: 0,
409 nybble_idx: 0,
410 done: false,
410 done: false,
411 }
411 }
412 }
412 }
413 /// Return a mutable reference for `Block` at index `idx`.
413 /// Return a mutable reference for `Block` at index `idx`.
414 ///
414 ///
415 /// If `idx` lies in the immutable area, then the reference is to
415 /// If `idx` lies in the immutable area, then the reference is to
416 /// a newly appended copy.
416 /// a newly appended copy.
417 ///
417 ///
418 /// Returns (new_idx, glen, mut_ref) where
418 /// Returns (new_idx, glen, mut_ref) where
419 ///
419 ///
420 /// - `new_idx` is the index of the mutable `Block`
420 /// - `new_idx` is the index of the mutable `Block`
421 /// - `mut_ref` is a mutable reference to the mutable Block.
421 /// - `mut_ref` is a mutable reference to the mutable Block.
422 /// - `glen` is the new length of `self.growable`
422 /// - `glen` is the new length of `self.growable`
423 ///
423 ///
424 /// Note: the caller wouldn't be allowed to query `self.growable.len()`
424 /// Note: the caller wouldn't be allowed to query `self.growable.len()`
425 /// itself because of the mutable borrow taken with the returned `Block`
425 /// itself because of the mutable borrow taken with the returned `Block`
426 fn mutable_block(&mut self, idx: usize) -> (usize, &mut Block, usize) {
426 fn mutable_block(&mut self, idx: usize) -> (usize, &mut Block, usize) {
427 let ro_blocks = &self.readonly;
427 let ro_blocks = &self.readonly;
428 let ro_len = ro_blocks.len();
428 let ro_len = ro_blocks.len();
429 let glen = self.growable.len();
429 let glen = self.growable.len();
430 if idx < ro_len {
430 if idx < ro_len {
431 self.masked_inner_blocks += 1;
431 self.masked_inner_blocks += 1;
432 self.growable.push(ro_blocks[idx]);
432 self.growable.push(ro_blocks[idx]);
433 (glen + ro_len, &mut self.growable[glen], glen + 1)
433 (glen + ro_len, &mut self.growable[glen], glen + 1)
434 } else if glen + ro_len == idx {
434 } else if glen + ro_len == idx {
435 (idx, &mut self.root, glen)
435 (idx, &mut self.root, glen)
436 } else {
436 } else {
437 (idx, &mut self.growable[idx - ro_len], glen)
437 (idx, &mut self.growable[idx - ro_len], glen)
438 }
438 }
439 }
439 }
440
440
441 /// Main insertion method
441 /// Main insertion method
442 ///
442 ///
443 /// This will dive in the node tree to find the deepest `Block` for
443 /// This will dive in the node tree to find the deepest `Block` for
444 /// `node`, split it as much as needed and record `node` in there.
444 /// `node`, split it as much as needed and record `node` in there.
445 /// The method then backtracks, updating references in all the visited
445 /// The method then backtracks, updating references in all the visited
446 /// blocks from the root.
446 /// blocks from the root.
447 ///
447 ///
448 /// All the mutated `Block` are copied first to the growable part if
448 /// All the mutated `Block` are copied first to the growable part if
449 /// needed. That happens for those in the immutable part except the root.
449 /// needed. That happens for those in the immutable part except the root.
450 pub fn insert<I: RevlogIndex>(
450 pub fn insert<I: RevlogIndex>(
451 &mut self,
451 &mut self,
452 index: &I,
452 index: &I,
453 node: &Node,
453 node: &Node,
454 rev: Revision,
454 rev: Revision,
455 ) -> Result<(), NodeMapError> {
455 ) -> Result<(), NodeMapError> {
456 let ro_len = &self.readonly.len();
456 let ro_len = &self.readonly.len();
457
457
458 let mut visit_steps: Vec<_> = self.visit(node.into()).collect();
458 let mut visit_steps: Vec<_> = self.visit(node.into()).collect();
459 let read_nybbles = visit_steps.len();
459 let read_nybbles = visit_steps.len();
460 // visit_steps cannot be empty, since we always visit the root block
460 // visit_steps cannot be empty, since we always visit the root block
461 let deepest = visit_steps.pop().unwrap();
461 let deepest = visit_steps.pop().unwrap();
462
462
463 let (mut block_idx, mut block, mut glen) =
463 let (mut block_idx, mut block, mut glen) =
464 self.mutable_block(deepest.block_idx);
464 self.mutable_block(deepest.block_idx);
465
465
466 if let Element::Rev(old_rev) = deepest.element {
466 if let Element::Rev(old_rev) = deepest.element {
467 let old_node = index
467 let old_node = index
468 .node(old_rev)
468 .node(old_rev)
469 .ok_or(NodeMapError::RevisionNotInIndex(old_rev))?;
469 .ok_or(NodeMapError::RevisionNotInIndex(old_rev))?;
470 if old_node == node {
470 if old_node == node {
471 return Ok(()); // avoid creating lots of useless blocks
471 return Ok(()); // avoid creating lots of useless blocks
472 }
472 }
473
473
474 // Looping over the tail of nybbles in both nodes, creating
474 // Looping over the tail of nybbles in both nodes, creating
475 // new blocks until we find the difference
475 // new blocks until we find the difference
476 let mut new_block_idx = ro_len + glen;
476 let mut new_block_idx = ro_len + glen;
477 let mut nybble = deepest.nybble;
477 let mut nybble = deepest.nybble;
478 for nybble_pos in read_nybbles..node.nybbles_len() {
478 for nybble_pos in read_nybbles..node.nybbles_len() {
479 block.set(nybble, Element::Block(new_block_idx));
479 block.set(nybble, Element::Block(new_block_idx));
480
480
481 let new_nybble = node.get_nybble(nybble_pos);
481 let new_nybble = node.get_nybble(nybble_pos);
482 let old_nybble = old_node.get_nybble(nybble_pos);
482 let old_nybble = old_node.get_nybble(nybble_pos);
483
483
484 if old_nybble == new_nybble {
484 if old_nybble == new_nybble {
485 self.growable.push(Block::new());
485 self.growable.push(Block::new());
486 block = &mut self.growable[glen];
486 block = &mut self.growable[glen];
487 glen += 1;
487 glen += 1;
488 new_block_idx += 1;
488 new_block_idx += 1;
489 nybble = new_nybble;
489 nybble = new_nybble;
490 } else {
490 } else {
491 let mut new_block = Block::new();
491 let mut new_block = Block::new();
492 new_block.set(old_nybble, Element::Rev(old_rev));
492 new_block.set(old_nybble, Element::Rev(old_rev));
493 new_block.set(new_nybble, Element::Rev(rev));
493 new_block.set(new_nybble, Element::Rev(rev));
494 self.growable.push(new_block);
494 self.growable.push(new_block);
495 break;
495 break;
496 }
496 }
497 }
497 }
498 } else {
498 } else {
499 // Free slot in the deepest block: no splitting has to be done
499 // Free slot in the deepest block: no splitting has to be done
500 block.set(deepest.nybble, Element::Rev(rev));
500 block.set(deepest.nybble, Element::Rev(rev));
501 }
501 }
502
502
503 // Backtrack over visit steps to update references
503 // Backtrack over visit steps to update references
504 while let Some(visited) = visit_steps.pop() {
504 while let Some(visited) = visit_steps.pop() {
505 let to_write = Element::Block(block_idx);
505 let to_write = Element::Block(block_idx);
506 if visit_steps.is_empty() {
506 if visit_steps.is_empty() {
507 self.root.set(visited.nybble, to_write);
507 self.root.set(visited.nybble, to_write);
508 break;
508 break;
509 }
509 }
510 let (new_idx, block, _) = self.mutable_block(visited.block_idx);
510 let (new_idx, block, _) = self.mutable_block(visited.block_idx);
511 if block.get(visited.nybble) == to_write {
511 if block.get(visited.nybble) == to_write {
512 break;
512 break;
513 }
513 }
514 block.set(visited.nybble, to_write);
514 block.set(visited.nybble, to_write);
515 block_idx = new_idx;
515 block_idx = new_idx;
516 }
516 }
517 Ok(())
517 Ok(())
518 }
518 }
519
519
520 /// Make the whole `NodeTree` logically empty, without touching the
520 /// Make the whole `NodeTree` logically empty, without touching the
521 /// immutable part.
521 /// immutable part.
522 pub fn invalidate_all(&mut self) {
522 pub fn invalidate_all(&mut self) {
523 self.root = Block::new();
523 self.root = Block::new();
524 self.growable = Vec::new();
524 self.growable = Vec::new();
525 self.masked_inner_blocks = self.readonly.len();
525 self.masked_inner_blocks = self.readonly.len();
526 }
526 }
527
527
528 /// Return the number of blocks in the readonly part that are currently
528 /// Return the number of blocks in the readonly part that are currently
529 /// masked in the mutable part.
529 /// masked in the mutable part.
530 ///
530 ///
531 /// The `NodeTree` structure has no efficient way to know how many blocks
531 /// The `NodeTree` structure has no efficient way to know how many blocks
532 /// are already unreachable in the readonly part.
532 /// are already unreachable in the readonly part.
533 ///
533 ///
534 /// After a call to `invalidate_all()`, the returned number can be actually
534 /// After a call to `invalidate_all()`, the returned number can be actually
535 /// bigger than the whole readonly part, a conventional way to mean that
535 /// bigger than the whole readonly part, a conventional way to mean that
536 /// all the readonly blocks have been masked. This is what is really
536 /// all the readonly blocks have been masked. This is what is really
537 /// useful to the caller and does not require to know how many were
537 /// useful to the caller and does not require to know how many were
538 /// actually unreachable to begin with.
538 /// actually unreachable to begin with.
539 pub fn masked_readonly_blocks(&self) -> usize {
539 pub fn masked_readonly_blocks(&self) -> usize {
540 if let Some(readonly_root) = self.readonly.last() {
540 if let Some(readonly_root) = self.readonly.last() {
541 if readonly_root == &self.root {
541 if readonly_root == &self.root {
542 return 0;
542 return 0;
543 }
543 }
544 } else {
544 } else {
545 return 0;
545 return 0;
546 }
546 }
547 self.masked_inner_blocks + 1
547 self.masked_inner_blocks + 1
548 }
548 }
549 }
549 }
550
550
551 pub struct NodeTreeBytes {
551 pub struct NodeTreeBytes {
552 buffer: Box<dyn Deref<Target = [u8]> + Send>,
552 buffer: Box<dyn Deref<Target = [u8]> + Send>,
553 len_in_blocks: usize,
553 len_in_blocks: usize,
554 }
554 }
555
555
556 impl NodeTreeBytes {
556 impl NodeTreeBytes {
557 fn new(
557 fn new(
558 buffer: Box<dyn Deref<Target = [u8]> + Send>,
558 buffer: Box<dyn Deref<Target = [u8]> + Send>,
559 amount: usize,
559 amount: usize,
560 ) -> Self {
560 ) -> Self {
561 assert!(buffer.len() >= amount);
561 assert!(buffer.len() >= amount);
562 let len_in_blocks = amount / size_of::<Block>();
562 let len_in_blocks = amount / size_of::<Block>();
563 NodeTreeBytes {
563 NodeTreeBytes {
564 buffer,
564 buffer,
565 len_in_blocks,
565 len_in_blocks,
566 }
566 }
567 }
567 }
568 }
568 }
569
569
570 impl Deref for NodeTreeBytes {
570 impl Deref for NodeTreeBytes {
571 type Target = [Block];
571 type Target = [Block];
572
572
573 fn deref(&self) -> &[Block] {
573 fn deref(&self) -> &[Block] {
574 Block::slice_from_bytes(&self.buffer, self.len_in_blocks)
574 Block::slice_from_bytes(&self.buffer, self.len_in_blocks)
575 // `NodeTreeBytes::new` already asserted that `self.buffer` is
575 // `NodeTreeBytes::new` already asserted that `self.buffer` is
576 // large enough.
576 // large enough.
577 .unwrap()
577 .unwrap()
578 .0
578 .0
579 }
579 }
580 }
580 }
581
581
582 struct NodeTreeVisitor<'n> {
582 struct NodeTreeVisitor<'n> {
583 nt: &'n NodeTree,
583 nt: &'n NodeTree,
584 prefix: NodePrefix,
584 prefix: NodePrefix,
585 visit: usize,
585 visit: usize,
586 nybble_idx: usize,
586 nybble_idx: usize,
587 done: bool,
587 done: bool,
588 }
588 }
589
589
590 #[derive(Debug, PartialEq, Clone)]
590 #[derive(Debug, PartialEq, Clone)]
591 struct NodeTreeVisitItem {
591 struct NodeTreeVisitItem {
592 block_idx: usize,
592 block_idx: usize,
593 nybble: u8,
593 nybble: u8,
594 element: Element,
594 element: Element,
595 }
595 }
596
596
597 impl<'n> Iterator for NodeTreeVisitor<'n> {
597 impl<'n> Iterator for NodeTreeVisitor<'n> {
598 type Item = NodeTreeVisitItem;
598 type Item = NodeTreeVisitItem;
599
599
600 fn next(&mut self) -> Option<Self::Item> {
600 fn next(&mut self) -> Option<Self::Item> {
601 if self.done || self.nybble_idx >= self.prefix.nybbles_len() {
601 if self.done || self.nybble_idx >= self.prefix.nybbles_len() {
602 return None;
602 return None;
603 }
603 }
604
604
605 let nybble = self.prefix.get_nybble(self.nybble_idx);
605 let nybble = self.prefix.get_nybble(self.nybble_idx);
606 self.nybble_idx += 1;
606 self.nybble_idx += 1;
607
607
608 let visit = self.visit;
608 let visit = self.visit;
609 let element = self.nt[visit].get(nybble);
609 let element = self.nt[visit].get(nybble);
610 if let Element::Block(idx) = element {
610 if let Element::Block(idx) = element {
611 self.visit = idx;
611 self.visit = idx;
612 } else {
612 } else {
613 self.done = true;
613 self.done = true;
614 }
614 }
615
615
616 Some(NodeTreeVisitItem {
616 Some(NodeTreeVisitItem {
617 block_idx: visit,
617 block_idx: visit,
618 nybble,
618 nybble,
619 element,
619 element,
620 })
620 })
621 }
621 }
622 }
622 }
623
623
624 impl NodeTreeVisitItem {
624 impl NodeTreeVisitItem {
625 // Return `Some(opt)` if this item is final, with `opt` being the
625 // Return `Some(opt)` if this item is final, with `opt` being the
626 // `Revision` that it may represent.
626 // `Revision` that it may represent.
627 //
627 //
628 // If the item is not terminal, return `None`
628 // If the item is not terminal, return `None`
629 fn final_revision(&self) -> Option<Option<Revision>> {
629 fn final_revision(&self) -> Option<Option<Revision>> {
630 match self.element {
630 match self.element {
631 Element::Block(_) => None,
631 Element::Block(_) => None,
632 Element::Rev(r) => Some(Some(r)),
632 Element::Rev(r) => Some(Some(r)),
633 Element::None => Some(None),
633 Element::None => Some(None),
634 }
634 }
635 }
635 }
636 }
636 }
637
637
638 impl From<Vec<Block>> for NodeTree {
638 impl From<Vec<Block>> for NodeTree {
639 fn from(vec: Vec<Block>) -> Self {
639 fn from(vec: Vec<Block>) -> Self {
640 Self::new(Box::new(vec))
640 Self::new(Box::new(vec))
641 }
641 }
642 }
642 }
643
643
644 impl fmt::Debug for NodeTree {
644 impl fmt::Debug for NodeTree {
645 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
645 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
646 let readonly: &[Block] = &*self.readonly;
646 let readonly: &[Block] = &*self.readonly;
647 write!(
647 write!(
648 f,
648 f,
649 "readonly: {:?}, growable: {:?}, root: {:?}",
649 "readonly: {:?}, growable: {:?}, root: {:?}",
650 readonly, self.growable, self.root
650 readonly, self.growable, self.root
651 )
651 )
652 }
652 }
653 }
653 }
654
654
655 impl Default for NodeTree {
655 impl Default for NodeTree {
656 /// Create a fully mutable empty NodeTree
656 /// Create a fully mutable empty NodeTree
657 fn default() -> Self {
657 fn default() -> Self {
658 NodeTree::new(Box::new(Vec::new()))
658 NodeTree::new(Box::new(Vec::new()))
659 }
659 }
660 }
660 }
661
661
662 impl NodeMap for NodeTree {
662 impl NodeMap for NodeTree {
663 fn find_bin<'a>(
663 fn find_bin<'a>(
664 &self,
664 &self,
665 idx: &impl RevlogIndex,
665 idx: &impl RevlogIndex,
666 prefix: NodePrefix,
666 prefix: NodePrefix,
667 ) -> Result<Option<Revision>, NodeMapError> {
667 ) -> Result<Option<Revision>, NodeMapError> {
668 validate_candidate(idx, prefix, self.lookup(prefix)?)
668 validate_candidate(idx, prefix, self.lookup(prefix)?)
669 .map(|(opt, _shortest)| opt)
669 .map(|(opt, _shortest)| opt)
670 }
670 }
671
671
672 fn unique_prefix_len_bin<'a>(
672 fn unique_prefix_len_bin<'a>(
673 &self,
673 &self,
674 idx: &impl RevlogIndex,
674 idx: &impl RevlogIndex,
675 prefix: NodePrefix,
675 prefix: NodePrefix,
676 ) -> Result<Option<usize>, NodeMapError> {
676 ) -> Result<Option<usize>, NodeMapError> {
677 validate_candidate(idx, prefix, self.lookup(prefix)?)
677 validate_candidate(idx, prefix, self.lookup(prefix)?)
678 .map(|(opt, shortest)| opt.map(|_rev| shortest))
678 .map(|(opt, shortest)| opt.map(|_rev| shortest))
679 }
679 }
680 }
680 }
681
681
682 #[cfg(test)]
682 #[cfg(test)]
683 mod tests {
683 pub mod tests {
684 use super::NodeMapError::*;
684 use super::NodeMapError::*;
685 use super::*;
685 use super::*;
686 use crate::revlog::node::{hex_pad_right, Node};
686 use crate::revlog::node::{hex_pad_right, Node};
687 use std::collections::HashMap;
687 use std::collections::HashMap;
688
688
689 /// Creates a `Block` using a syntax close to the `Debug` output
689 /// Creates a `Block` using a syntax close to the `Debug` output
690 macro_rules! block {
690 macro_rules! block {
691 {$($nybble:tt : $variant:ident($val:tt)),*} => (
691 {$($nybble:tt : $variant:ident($val:tt)),*} => (
692 {
692 {
693 let mut block = Block::new();
693 let mut block = Block::new();
694 $(block.set($nybble, Element::$variant($val)));*;
694 $(block.set($nybble, Element::$variant($val)));*;
695 block
695 block
696 }
696 }
697 )
697 )
698 }
698 }
699
699
700 #[test]
700 #[test]
701 fn test_block_debug() {
701 fn test_block_debug() {
702 let mut block = Block::new();
702 let mut block = Block::new();
703 block.set(1, Element::Rev(3));
703 block.set(1, Element::Rev(3));
704 block.set(10, Element::Block(0));
704 block.set(10, Element::Block(0));
705 assert_eq!(format!("{:?}", block), "{1: Rev(3), 10: Block(0)}");
705 assert_eq!(format!("{:?}", block), "{1: Rev(3), 10: Block(0)}");
706 }
706 }
707
707
708 #[test]
708 #[test]
709 fn test_block_macro() {
709 fn test_block_macro() {
710 let block = block! {5: Block(2)};
710 let block = block! {5: Block(2)};
711 assert_eq!(format!("{:?}", block), "{5: Block(2)}");
711 assert_eq!(format!("{:?}", block), "{5: Block(2)}");
712
712
713 let block = block! {13: Rev(15), 5: Block(2)};
713 let block = block! {13: Rev(15), 5: Block(2)};
714 assert_eq!(format!("{:?}", block), "{5: Block(2), 13: Rev(15)}");
714 assert_eq!(format!("{:?}", block), "{5: Block(2), 13: Rev(15)}");
715 }
715 }
716
716
717 #[test]
717 #[test]
718 fn test_raw_block() {
718 fn test_raw_block() {
719 let mut raw = [255u8; 64];
719 let mut raw = [255u8; 64];
720
720
721 let mut counter = 0;
721 let mut counter = 0;
722 for val in [0_i32, 15, -2, -1, -3].iter() {
722 for val in [0_i32, 15, -2, -1, -3].iter() {
723 for byte in val.to_be_bytes().iter() {
723 for byte in val.to_be_bytes().iter() {
724 raw[counter] = *byte;
724 raw[counter] = *byte;
725 counter += 1;
725 counter += 1;
726 }
726 }
727 }
727 }
728 let (block, _) = Block::from_bytes(&raw).unwrap();
728 let (block, _) = Block::from_bytes(&raw).unwrap();
729 assert_eq!(block.get(0), Element::Block(0));
729 assert_eq!(block.get(0), Element::Block(0));
730 assert_eq!(block.get(1), Element::Block(15));
730 assert_eq!(block.get(1), Element::Block(15));
731 assert_eq!(block.get(3), Element::None);
731 assert_eq!(block.get(3), Element::None);
732 assert_eq!(block.get(2), Element::Rev(0));
732 assert_eq!(block.get(2), Element::Rev(0));
733 assert_eq!(block.get(4), Element::Rev(1));
733 assert_eq!(block.get(4), Element::Rev(1));
734 }
734 }
735
735
736 type TestIndex = HashMap<Revision, Node>;
736 type TestIndex = HashMap<Revision, Node>;
737
737
738 impl RevlogIndex for TestIndex {
738 impl RevlogIndex for TestIndex {
739 fn node(&self, rev: Revision) -> Option<&Node> {
739 fn node(&self, rev: Revision) -> Option<&Node> {
740 self.get(&rev)
740 self.get(&rev)
741 }
741 }
742
742
743 fn len(&self) -> usize {
743 fn len(&self) -> usize {
744 self.len()
744 self.len()
745 }
745 }
746 }
746 }
747
747
748 /// Pad hexadecimal Node prefix with zeros on the right
748 /// Pad hexadecimal Node prefix with zeros on the right
749 ///
749 ///
750 /// This avoids having to repeatedly write very long hexadecimal
750 /// This avoids having to repeatedly write very long hexadecimal
751 /// strings for test data, and brings actual hash size independency.
751 /// strings for test data, and brings actual hash size independency.
752 #[cfg(test)]
752 #[cfg(test)]
753 fn pad_node(hex: &str) -> Node {
753 fn pad_node(hex: &str) -> Node {
754 Node::from_hex(&hex_pad_right(hex)).unwrap()
754 Node::from_hex(&hex_pad_right(hex)).unwrap()
755 }
755 }
756
756
757 /// Pad hexadecimal Node prefix with zeros on the right, then insert
757 /// Pad hexadecimal Node prefix with zeros on the right, then insert
758 fn pad_insert(idx: &mut TestIndex, rev: Revision, hex: &str) {
758 fn pad_insert(idx: &mut TestIndex, rev: Revision, hex: &str) {
759 idx.insert(rev, pad_node(hex));
759 idx.insert(rev, pad_node(hex));
760 }
760 }
761
761
762 fn sample_nodetree() -> NodeTree {
762 fn sample_nodetree() -> NodeTree {
763 NodeTree::from(vec![
763 NodeTree::from(vec![
764 block![0: Rev(9)],
764 block![0: Rev(9)],
765 block![0: Rev(0), 1: Rev(9)],
765 block![0: Rev(0), 1: Rev(9)],
766 block![0: Block(1), 1:Rev(1)],
766 block![0: Block(1), 1:Rev(1)],
767 ])
767 ])
768 }
768 }
769
769
770 fn hex(s: &str) -> NodePrefix {
770 fn hex(s: &str) -> NodePrefix {
771 NodePrefix::from_hex(s).unwrap()
771 NodePrefix::from_hex(s).unwrap()
772 }
772 }
773
773
774 #[test]
774 #[test]
775 fn test_nt_debug() {
775 fn test_nt_debug() {
776 let nt = sample_nodetree();
776 let nt = sample_nodetree();
777 assert_eq!(
777 assert_eq!(
778 format!("{:?}", nt),
778 format!("{:?}", nt),
779 "readonly: \
779 "readonly: \
780 [{0: Rev(9)}, {0: Rev(0), 1: Rev(9)}, {0: Block(1), 1: Rev(1)}], \
780 [{0: Rev(9)}, {0: Rev(0), 1: Rev(9)}, {0: Block(1), 1: Rev(1)}], \
781 growable: [], \
781 growable: [], \
782 root: {0: Block(1), 1: Rev(1)}",
782 root: {0: Block(1), 1: Rev(1)}",
783 );
783 );
784 }
784 }
785
785
786 #[test]
786 #[test]
787 fn test_immutable_find_simplest() -> Result<(), NodeMapError> {
787 fn test_immutable_find_simplest() -> Result<(), NodeMapError> {
788 let mut idx: TestIndex = HashMap::new();
788 let mut idx: TestIndex = HashMap::new();
789 pad_insert(&mut idx, 1, "1234deadcafe");
789 pad_insert(&mut idx, 1, "1234deadcafe");
790
790
791 let nt = NodeTree::from(vec![block! {1: Rev(1)}]);
791 let nt = NodeTree::from(vec![block! {1: Rev(1)}]);
792 assert_eq!(nt.find_bin(&idx, hex("1"))?, Some(1));
792 assert_eq!(nt.find_bin(&idx, hex("1"))?, Some(1));
793 assert_eq!(nt.find_bin(&idx, hex("12"))?, Some(1));
793 assert_eq!(nt.find_bin(&idx, hex("12"))?, Some(1));
794 assert_eq!(nt.find_bin(&idx, hex("1234de"))?, Some(1));
794 assert_eq!(nt.find_bin(&idx, hex("1234de"))?, Some(1));
795 assert_eq!(nt.find_bin(&idx, hex("1a"))?, None);
795 assert_eq!(nt.find_bin(&idx, hex("1a"))?, None);
796 assert_eq!(nt.find_bin(&idx, hex("ab"))?, None);
796 assert_eq!(nt.find_bin(&idx, hex("ab"))?, None);
797
797
798 // and with full binary Nodes
798 // and with full binary Nodes
799 assert_eq!(nt.find_node(&idx, idx.get(&1).unwrap())?, Some(1));
799 assert_eq!(nt.find_node(&idx, idx.get(&1).unwrap())?, Some(1));
800 let unknown = Node::from_hex(&hex_pad_right("3d")).unwrap();
800 let unknown = Node::from_hex(&hex_pad_right("3d")).unwrap();
801 assert_eq!(nt.find_node(&idx, &unknown)?, None);
801 assert_eq!(nt.find_node(&idx, &unknown)?, None);
802 Ok(())
802 Ok(())
803 }
803 }
804
804
805 #[test]
805 #[test]
806 fn test_immutable_find_one_jump() {
806 fn test_immutable_find_one_jump() {
807 let mut idx = TestIndex::new();
807 let mut idx = TestIndex::new();
808 pad_insert(&mut idx, 9, "012");
808 pad_insert(&mut idx, 9, "012");
809 pad_insert(&mut idx, 0, "00a");
809 pad_insert(&mut idx, 0, "00a");
810
810
811 let nt = sample_nodetree();
811 let nt = sample_nodetree();
812
812
813 assert_eq!(nt.find_bin(&idx, hex("0")), Err(MultipleResults));
813 assert_eq!(nt.find_bin(&idx, hex("0")), Err(MultipleResults));
814 assert_eq!(nt.find_bin(&idx, hex("01")), Ok(Some(9)));
814 assert_eq!(nt.find_bin(&idx, hex("01")), Ok(Some(9)));
815 assert_eq!(nt.find_bin(&idx, hex("00")), Err(MultipleResults));
815 assert_eq!(nt.find_bin(&idx, hex("00")), Err(MultipleResults));
816 assert_eq!(nt.find_bin(&idx, hex("00a")), Ok(Some(0)));
816 assert_eq!(nt.find_bin(&idx, hex("00a")), Ok(Some(0)));
817 assert_eq!(nt.unique_prefix_len_bin(&idx, hex("00a")), Ok(Some(3)));
817 assert_eq!(nt.unique_prefix_len_bin(&idx, hex("00a")), Ok(Some(3)));
818 assert_eq!(nt.find_bin(&idx, hex("000")), Ok(Some(NULL_REVISION)));
818 assert_eq!(nt.find_bin(&idx, hex("000")), Ok(Some(NULL_REVISION)));
819 }
819 }
820
820
821 #[test]
821 #[test]
822 fn test_mutated_find() -> Result<(), NodeMapError> {
822 fn test_mutated_find() -> Result<(), NodeMapError> {
823 let mut idx = TestIndex::new();
823 let mut idx = TestIndex::new();
824 pad_insert(&mut idx, 9, "012");
824 pad_insert(&mut idx, 9, "012");
825 pad_insert(&mut idx, 0, "00a");
825 pad_insert(&mut idx, 0, "00a");
826 pad_insert(&mut idx, 2, "cafe");
826 pad_insert(&mut idx, 2, "cafe");
827 pad_insert(&mut idx, 3, "15");
827 pad_insert(&mut idx, 3, "15");
828 pad_insert(&mut idx, 1, "10");
828 pad_insert(&mut idx, 1, "10");
829
829
830 let nt = NodeTree {
830 let nt = NodeTree {
831 readonly: sample_nodetree().readonly,
831 readonly: sample_nodetree().readonly,
832 growable: vec![block![0: Rev(1), 5: Rev(3)]],
832 growable: vec![block![0: Rev(1), 5: Rev(3)]],
833 root: block![0: Block(1), 1:Block(3), 12: Rev(2)],
833 root: block![0: Block(1), 1:Block(3), 12: Rev(2)],
834 masked_inner_blocks: 1,
834 masked_inner_blocks: 1,
835 };
835 };
836 assert_eq!(nt.find_bin(&idx, hex("10"))?, Some(1));
836 assert_eq!(nt.find_bin(&idx, hex("10"))?, Some(1));
837 assert_eq!(nt.find_bin(&idx, hex("c"))?, Some(2));
837 assert_eq!(nt.find_bin(&idx, hex("c"))?, Some(2));
838 assert_eq!(nt.unique_prefix_len_bin(&idx, hex("c"))?, Some(1));
838 assert_eq!(nt.unique_prefix_len_bin(&idx, hex("c"))?, Some(1));
839 assert_eq!(nt.find_bin(&idx, hex("00")), Err(MultipleResults));
839 assert_eq!(nt.find_bin(&idx, hex("00")), Err(MultipleResults));
840 assert_eq!(nt.find_bin(&idx, hex("000"))?, Some(NULL_REVISION));
840 assert_eq!(nt.find_bin(&idx, hex("000"))?, Some(NULL_REVISION));
841 assert_eq!(nt.unique_prefix_len_bin(&idx, hex("000"))?, Some(3));
841 assert_eq!(nt.unique_prefix_len_bin(&idx, hex("000"))?, Some(3));
842 assert_eq!(nt.find_bin(&idx, hex("01"))?, Some(9));
842 assert_eq!(nt.find_bin(&idx, hex("01"))?, Some(9));
843 assert_eq!(nt.masked_readonly_blocks(), 2);
843 assert_eq!(nt.masked_readonly_blocks(), 2);
844 Ok(())
844 Ok(())
845 }
845 }
846
846
847 struct TestNtIndex {
847 pub struct TestNtIndex {
848 index: TestIndex,
848 pub index: TestIndex,
849 nt: NodeTree,
849 pub nt: NodeTree,
850 }
850 }
851
851
852 impl TestNtIndex {
852 impl TestNtIndex {
853 fn new() -> Self {
853 pub fn new() -> Self {
854 TestNtIndex {
854 TestNtIndex {
855 index: HashMap::new(),
855 index: HashMap::new(),
856 nt: NodeTree::default(),
856 nt: NodeTree::default(),
857 }
857 }
858 }
858 }
859
859
860 fn insert(
860 pub fn insert_node(
861 &mut self,
862 rev: Revision,
863 node: Node,
864 ) -> Result<(), NodeMapError> {
865 self.index.insert(rev, node);
866 self.nt.insert(&self.index, &node, rev)?;
867 Ok(())
868 }
869
870 pub fn insert(
861 &mut self,
871 &mut self,
862 rev: Revision,
872 rev: Revision,
863 hex: &str,
873 hex: &str,
864 ) -> Result<(), NodeMapError> {
874 ) -> Result<(), NodeMapError> {
865 let node = pad_node(hex);
875 return self.insert_node(rev, pad_node(hex));
866 self.index.insert(rev, node);
867 self.nt.insert(&self.index, &node, rev)?;
868 Ok(())
869 }
876 }
870
877
871 fn find_hex(
878 fn find_hex(
872 &self,
879 &self,
873 prefix: &str,
880 prefix: &str,
874 ) -> Result<Option<Revision>, NodeMapError> {
881 ) -> Result<Option<Revision>, NodeMapError> {
875 self.nt.find_bin(&self.index, hex(prefix))
882 self.nt.find_bin(&self.index, hex(prefix))
876 }
883 }
877
884
878 fn unique_prefix_len_hex(
885 fn unique_prefix_len_hex(
879 &self,
886 &self,
880 prefix: &str,
887 prefix: &str,
881 ) -> Result<Option<usize>, NodeMapError> {
888 ) -> Result<Option<usize>, NodeMapError> {
882 self.nt.unique_prefix_len_bin(&self.index, hex(prefix))
889 self.nt.unique_prefix_len_bin(&self.index, hex(prefix))
883 }
890 }
884
891
885 /// Drain `added` and restart a new one
892 /// Drain `added` and restart a new one
886 fn commit(self) -> Self {
893 fn commit(self) -> Self {
887 let mut as_vec: Vec<Block> =
894 let mut as_vec: Vec<Block> =
888 self.nt.readonly.iter().copied().collect();
895 self.nt.readonly.iter().copied().collect();
889 as_vec.extend(self.nt.growable);
896 as_vec.extend(self.nt.growable);
890 as_vec.push(self.nt.root);
897 as_vec.push(self.nt.root);
891
898
892 Self {
899 Self {
893 index: self.index,
900 index: self.index,
894 nt: NodeTree::from(as_vec),
901 nt: NodeTree::from(as_vec),
895 }
902 }
896 }
903 }
897 }
904 }
898
905
899 #[test]
906 #[test]
900 fn test_insert_full_mutable() -> Result<(), NodeMapError> {
907 fn test_insert_full_mutable() -> Result<(), NodeMapError> {
901 let mut idx = TestNtIndex::new();
908 let mut idx = TestNtIndex::new();
902 idx.insert(0, "1234")?;
909 idx.insert(0, "1234")?;
903 assert_eq!(idx.find_hex("1")?, Some(0));
910 assert_eq!(idx.find_hex("1")?, Some(0));
904 assert_eq!(idx.find_hex("12")?, Some(0));
911 assert_eq!(idx.find_hex("12")?, Some(0));
905
912
906 // let's trigger a simple split
913 // let's trigger a simple split
907 idx.insert(1, "1a34")?;
914 idx.insert(1, "1a34")?;
908 assert_eq!(idx.nt.growable.len(), 1);
915 assert_eq!(idx.nt.growable.len(), 1);
909 assert_eq!(idx.find_hex("12")?, Some(0));
916 assert_eq!(idx.find_hex("12")?, Some(0));
910 assert_eq!(idx.find_hex("1a")?, Some(1));
917 assert_eq!(idx.find_hex("1a")?, Some(1));
911
918
912 // reinserting is a no_op
919 // reinserting is a no_op
913 idx.insert(1, "1a34")?;
920 idx.insert(1, "1a34")?;
914 assert_eq!(idx.nt.growable.len(), 1);
921 assert_eq!(idx.nt.growable.len(), 1);
915 assert_eq!(idx.find_hex("12")?, Some(0));
922 assert_eq!(idx.find_hex("12")?, Some(0));
916 assert_eq!(idx.find_hex("1a")?, Some(1));
923 assert_eq!(idx.find_hex("1a")?, Some(1));
917
924
918 idx.insert(2, "1a01")?;
925 idx.insert(2, "1a01")?;
919 assert_eq!(idx.nt.growable.len(), 2);
926 assert_eq!(idx.nt.growable.len(), 2);
920 assert_eq!(idx.find_hex("1a"), Err(NodeMapError::MultipleResults));
927 assert_eq!(idx.find_hex("1a"), Err(NodeMapError::MultipleResults));
921 assert_eq!(idx.find_hex("12")?, Some(0));
928 assert_eq!(idx.find_hex("12")?, Some(0));
922 assert_eq!(idx.find_hex("1a3")?, Some(1));
929 assert_eq!(idx.find_hex("1a3")?, Some(1));
923 assert_eq!(idx.find_hex("1a0")?, Some(2));
930 assert_eq!(idx.find_hex("1a0")?, Some(2));
924 assert_eq!(idx.find_hex("1a12")?, None);
931 assert_eq!(idx.find_hex("1a12")?, None);
925
932
926 // now let's make it split and create more than one additional block
933 // now let's make it split and create more than one additional block
927 idx.insert(3, "1a345")?;
934 idx.insert(3, "1a345")?;
928 assert_eq!(idx.nt.growable.len(), 4);
935 assert_eq!(idx.nt.growable.len(), 4);
929 assert_eq!(idx.find_hex("1a340")?, Some(1));
936 assert_eq!(idx.find_hex("1a340")?, Some(1));
930 assert_eq!(idx.find_hex("1a345")?, Some(3));
937 assert_eq!(idx.find_hex("1a345")?, Some(3));
931 assert_eq!(idx.find_hex("1a341")?, None);
938 assert_eq!(idx.find_hex("1a341")?, None);
932
939
933 // there's no readonly block to mask
940 // there's no readonly block to mask
934 assert_eq!(idx.nt.masked_readonly_blocks(), 0);
941 assert_eq!(idx.nt.masked_readonly_blocks(), 0);
935 Ok(())
942 Ok(())
936 }
943 }
937
944
938 #[test]
945 #[test]
939 fn test_unique_prefix_len_zero_prefix() {
946 fn test_unique_prefix_len_zero_prefix() {
940 let mut idx = TestNtIndex::new();
947 let mut idx = TestNtIndex::new();
941 idx.insert(0, "00000abcd").unwrap();
948 idx.insert(0, "00000abcd").unwrap();
942
949
943 assert_eq!(idx.find_hex("000"), Err(NodeMapError::MultipleResults));
950 assert_eq!(idx.find_hex("000"), Err(NodeMapError::MultipleResults));
944 // in the nodetree proper, this will be found at the first nybble
951 // in the nodetree proper, this will be found at the first nybble
945 // yet the correct answer for unique_prefix_len is not 1, nor 1+1,
952 // yet the correct answer for unique_prefix_len is not 1, nor 1+1,
946 // but the first difference with `NULL_NODE`
953 // but the first difference with `NULL_NODE`
947 assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6)));
954 assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6)));
948 assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6)));
955 assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6)));
949
956
950 // same with odd result
957 // same with odd result
951 idx.insert(1, "00123").unwrap();
958 idx.insert(1, "00123").unwrap();
952 assert_eq!(idx.unique_prefix_len_hex("001"), Ok(Some(3)));
959 assert_eq!(idx.unique_prefix_len_hex("001"), Ok(Some(3)));
953 assert_eq!(idx.unique_prefix_len_hex("0012"), Ok(Some(3)));
960 assert_eq!(idx.unique_prefix_len_hex("0012"), Ok(Some(3)));
954
961
955 // these are unchanged of course
962 // these are unchanged of course
956 assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6)));
963 assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6)));
957 assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6)));
964 assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6)));
958 }
965 }
959
966
960 #[test]
967 #[test]
961 fn test_insert_extreme_splitting() -> Result<(), NodeMapError> {
968 fn test_insert_extreme_splitting() -> Result<(), NodeMapError> {
962 // check that the splitting loop is long enough
969 // check that the splitting loop is long enough
963 let mut nt_idx = TestNtIndex::new();
970 let mut nt_idx = TestNtIndex::new();
964 let nt = &mut nt_idx.nt;
971 let nt = &mut nt_idx.nt;
965 let idx = &mut nt_idx.index;
972 let idx = &mut nt_idx.index;
966
973
967 let node0_hex = hex_pad_right("444444");
974 let node0_hex = hex_pad_right("444444");
968 let mut node1_hex = hex_pad_right("444444");
975 let mut node1_hex = hex_pad_right("444444");
969 node1_hex.pop();
976 node1_hex.pop();
970 node1_hex.push('5');
977 node1_hex.push('5');
971 let node0 = Node::from_hex(&node0_hex).unwrap();
978 let node0 = Node::from_hex(&node0_hex).unwrap();
972 let node1 = Node::from_hex(&node1_hex).unwrap();
979 let node1 = Node::from_hex(&node1_hex).unwrap();
973
980
974 idx.insert(0, node0);
981 idx.insert(0, node0);
975 nt.insert(idx, &node0, 0)?;
982 nt.insert(idx, &node0, 0)?;
976 idx.insert(1, node1);
983 idx.insert(1, node1);
977 nt.insert(idx, &node1, 1)?;
984 nt.insert(idx, &node1, 1)?;
978
985
979 assert_eq!(nt.find_bin(idx, (&node0).into())?, Some(0));
986 assert_eq!(nt.find_bin(idx, (&node0).into())?, Some(0));
980 assert_eq!(nt.find_bin(idx, (&node1).into())?, Some(1));
987 assert_eq!(nt.find_bin(idx, (&node1).into())?, Some(1));
981 Ok(())
988 Ok(())
982 }
989 }
983
990
984 #[test]
991 #[test]
985 fn test_insert_partly_immutable() -> Result<(), NodeMapError> {
992 fn test_insert_partly_immutable() -> Result<(), NodeMapError> {
986 let mut idx = TestNtIndex::new();
993 let mut idx = TestNtIndex::new();
987 idx.insert(0, "1234")?;
994 idx.insert(0, "1234")?;
988 idx.insert(1, "1235")?;
995 idx.insert(1, "1235")?;
989 idx.insert(2, "131")?;
996 idx.insert(2, "131")?;
990 idx.insert(3, "cafe")?;
997 idx.insert(3, "cafe")?;
991 let mut idx = idx.commit();
998 let mut idx = idx.commit();
992 assert_eq!(idx.find_hex("1234")?, Some(0));
999 assert_eq!(idx.find_hex("1234")?, Some(0));
993 assert_eq!(idx.find_hex("1235")?, Some(1));
1000 assert_eq!(idx.find_hex("1235")?, Some(1));
994 assert_eq!(idx.find_hex("131")?, Some(2));
1001 assert_eq!(idx.find_hex("131")?, Some(2));
995 assert_eq!(idx.find_hex("cafe")?, Some(3));
1002 assert_eq!(idx.find_hex("cafe")?, Some(3));
996 // we did not add anything since init from readonly
1003 // we did not add anything since init from readonly
997 assert_eq!(idx.nt.masked_readonly_blocks(), 0);
1004 assert_eq!(idx.nt.masked_readonly_blocks(), 0);
998
1005
999 idx.insert(4, "123A")?;
1006 idx.insert(4, "123A")?;
1000 assert_eq!(idx.find_hex("1234")?, Some(0));
1007 assert_eq!(idx.find_hex("1234")?, Some(0));
1001 assert_eq!(idx.find_hex("1235")?, Some(1));
1008 assert_eq!(idx.find_hex("1235")?, Some(1));
1002 assert_eq!(idx.find_hex("131")?, Some(2));
1009 assert_eq!(idx.find_hex("131")?, Some(2));
1003 assert_eq!(idx.find_hex("cafe")?, Some(3));
1010 assert_eq!(idx.find_hex("cafe")?, Some(3));
1004 assert_eq!(idx.find_hex("123A")?, Some(4));
1011 assert_eq!(idx.find_hex("123A")?, Some(4));
1005 // we masked blocks for all prefixes of "123", including the root
1012 // we masked blocks for all prefixes of "123", including the root
1006 assert_eq!(idx.nt.masked_readonly_blocks(), 4);
1013 assert_eq!(idx.nt.masked_readonly_blocks(), 4);
1007
1014
1008 eprintln!("{:?}", idx.nt);
1015 eprintln!("{:?}", idx.nt);
1009 idx.insert(5, "c0")?;
1016 idx.insert(5, "c0")?;
1010 assert_eq!(idx.find_hex("cafe")?, Some(3));
1017 assert_eq!(idx.find_hex("cafe")?, Some(3));
1011 assert_eq!(idx.find_hex("c0")?, Some(5));
1018 assert_eq!(idx.find_hex("c0")?, Some(5));
1012 assert_eq!(idx.find_hex("c1")?, None);
1019 assert_eq!(idx.find_hex("c1")?, None);
1013 assert_eq!(idx.find_hex("1234")?, Some(0));
1020 assert_eq!(idx.find_hex("1234")?, Some(0));
1014 // inserting "c0" is just splitting the 'c' slot of the mutable root,
1021 // inserting "c0" is just splitting the 'c' slot of the mutable root,
1015 // it doesn't mask anything
1022 // it doesn't mask anything
1016 assert_eq!(idx.nt.masked_readonly_blocks(), 4);
1023 assert_eq!(idx.nt.masked_readonly_blocks(), 4);
1017
1024
1018 Ok(())
1025 Ok(())
1019 }
1026 }
1020
1027
1021 #[test]
1028 #[test]
1022 fn test_invalidate_all() -> Result<(), NodeMapError> {
1029 fn test_invalidate_all() -> Result<(), NodeMapError> {
1023 let mut idx = TestNtIndex::new();
1030 let mut idx = TestNtIndex::new();
1024 idx.insert(0, "1234")?;
1031 idx.insert(0, "1234")?;
1025 idx.insert(1, "1235")?;
1032 idx.insert(1, "1235")?;
1026 idx.insert(2, "131")?;
1033 idx.insert(2, "131")?;
1027 idx.insert(3, "cafe")?;
1034 idx.insert(3, "cafe")?;
1028 let mut idx = idx.commit();
1035 let mut idx = idx.commit();
1029
1036
1030 idx.nt.invalidate_all();
1037 idx.nt.invalidate_all();
1031
1038
1032 assert_eq!(idx.find_hex("1234")?, None);
1039 assert_eq!(idx.find_hex("1234")?, None);
1033 assert_eq!(idx.find_hex("1235")?, None);
1040 assert_eq!(idx.find_hex("1235")?, None);
1034 assert_eq!(idx.find_hex("131")?, None);
1041 assert_eq!(idx.find_hex("131")?, None);
1035 assert_eq!(idx.find_hex("cafe")?, None);
1042 assert_eq!(idx.find_hex("cafe")?, None);
1036 // all the readonly blocks have been masked, this is the
1043 // all the readonly blocks have been masked, this is the
1037 // conventional expected response
1044 // conventional expected response
1038 assert_eq!(idx.nt.masked_readonly_blocks(), idx.nt.readonly.len() + 1);
1045 assert_eq!(idx.nt.masked_readonly_blocks(), idx.nt.readonly.len() + 1);
1039 Ok(())
1046 Ok(())
1040 }
1047 }
1041
1048
1042 #[test]
1049 #[test]
1043 fn test_into_added_empty() {
1050 fn test_into_added_empty() {
1044 assert!(sample_nodetree().into_readonly_and_added().1.is_empty());
1051 assert!(sample_nodetree().into_readonly_and_added().1.is_empty());
1045 assert!(sample_nodetree()
1052 assert!(sample_nodetree()
1046 .into_readonly_and_added_bytes()
1053 .into_readonly_and_added_bytes()
1047 .1
1054 .1
1048 .is_empty());
1055 .is_empty());
1049 }
1056 }
1050
1057
1051 #[test]
1058 #[test]
1052 fn test_into_added_bytes() -> Result<(), NodeMapError> {
1059 fn test_into_added_bytes() -> Result<(), NodeMapError> {
1053 let mut idx = TestNtIndex::new();
1060 let mut idx = TestNtIndex::new();
1054 idx.insert(0, "1234")?;
1061 idx.insert(0, "1234")?;
1055 let mut idx = idx.commit();
1062 let mut idx = idx.commit();
1056 idx.insert(4, "cafe")?;
1063 idx.insert(4, "cafe")?;
1057 let (_, bytes) = idx.nt.into_readonly_and_added_bytes();
1064 let (_, bytes) = idx.nt.into_readonly_and_added_bytes();
1058
1065
1059 // only the root block has been changed
1066 // only the root block has been changed
1060 assert_eq!(bytes.len(), size_of::<Block>());
1067 assert_eq!(bytes.len(), size_of::<Block>());
1061 // big endian for -2
1068 // big endian for -2
1062 assert_eq!(&bytes[4..2 * 4], [255, 255, 255, 254]);
1069 assert_eq!(&bytes[4..2 * 4], [255, 255, 255, 254]);
1063 // big endian for -6
1070 // big endian for -6
1064 assert_eq!(&bytes[12 * 4..13 * 4], [255, 255, 255, 250]);
1071 assert_eq!(&bytes[12 * 4..13 * 4], [255, 255, 255, 250]);
1065 Ok(())
1072 Ok(())
1066 }
1073 }
1067 }
1074 }
General Comments 0
You need to be logged in to leave comments. Login now