##// END OF EJS Templates
rust-nodemap: abstracting the indexing...
Georges Racinet -
r44645:220d4d2e default
parent child Browse files
Show More
@@ -1,388 +1,404 b''
1 1 // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net>
2 2 // and Mercurial contributors
3 3 //
4 4 // This software may be used and distributed according to the terms of the
5 5 // GNU General Public License version 2 or any later version.
6 6 //! Indexing facilities for fast retrieval of `Revision` from `Node`
7 7 //!
8 8 //! This provides a variation on the 16-ary radix tree that is
9 9 //! provided as "nodetree" in revlog.c, ready for append-only persistence
10 10 //! on disk.
11 11 //!
12 12 //! Following existing implicit conventions, the "nodemap" terminology
13 13 //! is used in a more abstract context.
14 14
15 15 use super::{
16 16 Node, NodeError, NodePrefix, NodePrefixRef, Revision, RevlogIndex,
17 17 };
18 18 use std::fmt;
19 19 use std::ops::Deref;
20 use std::ops::Index;
20 21
21 22 #[derive(Debug, PartialEq)]
22 23 pub enum NodeMapError {
23 24 MultipleResults,
24 25 InvalidNodePrefix(NodeError),
25 26 /// A `Revision` stored in the nodemap could not be found in the index
26 27 RevisionNotInIndex(Revision),
27 28 }
28 29
29 30 impl From<NodeError> for NodeMapError {
30 31 fn from(err: NodeError) -> Self {
31 32 NodeMapError::InvalidNodePrefix(err)
32 33 }
33 34 }
34 35
35 36 /// Mapping system from Mercurial nodes to revision numbers.
36 37 ///
37 38 /// ## `RevlogIndex` and `NodeMap`
38 39 ///
39 40 /// One way to think about their relationship is that
40 41 /// the `NodeMap` is a prefix-oriented reverse index of the `Node` information
41 42 /// carried by a [`RevlogIndex`].
42 43 ///
43 44 /// Many of the methods in this trait take a `RevlogIndex` argument
44 45 /// which is used for validation of their results. This index must naturally
45 46 /// be the one the `NodeMap` is about, and it must be consistent.
46 47 ///
47 48 /// Notably, the `NodeMap` must not store
48 49 /// information about more `Revision` values than there are in the index.
49 50 /// In these methods, an encountered `Revision` is not in the index, a
50 51 /// [`RevisionNotInIndex`] error is returned.
51 52 ///
52 53 /// In insert operations, the rule is thus that the `NodeMap` must always
53 54 /// be updated after the `RevlogIndex`
54 55 /// be updated first, and the `NodeMap` second.
55 56 ///
56 57 /// [`RevisionNotInIndex`]: enum.NodeMapError.html#variant.RevisionNotInIndex
57 58 /// [`RevlogIndex`]: ../trait.RevlogIndex.html
58 59 pub trait NodeMap {
59 60 /// Find the unique `Revision` having the given `Node`
60 61 ///
61 62 /// If no Revision matches the given `Node`, `Ok(None)` is returned.
62 63 fn find_node(
63 64 &self,
64 65 index: &impl RevlogIndex,
65 66 node: &Node,
66 67 ) -> Result<Option<Revision>, NodeMapError> {
67 68 self.find_bin(index, node.into())
68 69 }
69 70
70 71 /// Find the unique Revision whose `Node` starts with a given binary prefix
71 72 ///
72 73 /// If no Revision matches the given prefix, `Ok(None)` is returned.
73 74 ///
74 75 /// If several Revisions match the given prefix, a [`MultipleResults`]
75 76 /// error is returned.
76 77 fn find_bin<'a>(
77 78 &self,
78 79 idx: &impl RevlogIndex,
79 80 prefix: NodePrefixRef<'a>,
80 81 ) -> Result<Option<Revision>, NodeMapError>;
81 82
82 83 /// Find the unique Revision whose `Node` hexadecimal string representation
83 84 /// starts with a given prefix
84 85 ///
85 86 /// If no Revision matches the given prefix, `Ok(None)` is returned.
86 87 ///
87 88 /// If several Revisions match the given prefix, a [`MultipleResults`]
88 89 /// error is returned.
89 90 fn find_hex(
90 91 &self,
91 92 idx: &impl RevlogIndex,
92 93 prefix: &str,
93 94 ) -> Result<Option<Revision>, NodeMapError> {
94 95 self.find_bin(idx, NodePrefix::from_hex(prefix)?.borrow())
95 96 }
96 97 }
97 98
98 99 /// Low level NodeTree [`Blocks`] elements
99 100 ///
100 101 /// These are exactly as for instance on persistent storage.
101 102 type RawElement = i32;
102 103
103 104 /// High level representation of values in NodeTree
104 105 /// [`Blocks`](struct.Block.html)
105 106 ///
106 107 /// This is the high level representation that most algorithms should
107 108 /// use.
108 109 #[derive(Clone, Debug, Eq, PartialEq)]
109 110 enum Element {
110 111 Rev(Revision),
111 112 Block(usize),
112 113 None,
113 114 }
114 115
115 116 impl From<RawElement> for Element {
116 117 /// Conversion from low level representation, after endianness conversion.
117 118 ///
118 119 /// See [`Block`](struct.Block.html) for explanation about the encoding.
119 120 fn from(raw: RawElement) -> Element {
120 121 if raw >= 0 {
121 122 Element::Block(raw as usize)
122 123 } else if raw == -1 {
123 124 Element::None
124 125 } else {
125 126 Element::Rev(-raw - 2)
126 127 }
127 128 }
128 129 }
129 130
130 131 impl From<Element> for RawElement {
131 132 fn from(element: Element) -> RawElement {
132 133 match element {
133 134 Element::None => 0,
134 135 Element::Block(i) => i as RawElement,
135 136 Element::Rev(rev) => -rev - 2,
136 137 }
137 138 }
138 139 }
139 140
140 141 /// A logical block of the `NodeTree`, packed with a fixed size.
141 142 ///
142 143 /// These are always used in container types implementing `Index<Block>`,
143 144 /// such as `&Block`
144 145 ///
145 146 /// As an array of integers, its ith element encodes that the
146 147 /// ith potential edge from the block, representing the ith hexadecimal digit
147 148 /// (nybble) `i` is either:
148 149 ///
149 150 /// - absent (value -1)
150 151 /// - another `Block` in the same indexable container (value β‰₯ 0)
151 152 /// - a `Revision` leaf (value ≀ -2)
152 153 ///
153 154 /// Endianness has to be fixed for consistency on shared storage across
154 155 /// different architectures.
155 156 ///
156 157 /// A key difference with the C `nodetree` is that we need to be
157 158 /// able to represent the [`Block`] at index 0, hence -1 is the empty marker
158 159 /// rather than 0 and the `Revision` range upper limit of -2 instead of -1.
159 160 ///
160 161 /// Another related difference is that `NULL_REVISION` (-1) is not
161 162 /// represented at all, because we want an immutable empty nodetree
162 163 /// to be valid.
163 164
164 165 #[derive(Clone, PartialEq)]
165 166 pub struct Block([RawElement; 16]);
166 167
167 168 impl Block {
168 169 fn new() -> Self {
169 170 Block([-1; 16])
170 171 }
171 172
172 173 fn get(&self, nybble: u8) -> Element {
173 174 Element::from(RawElement::from_be(self.0[nybble as usize]))
174 175 }
175 176
176 177 fn set(&mut self, nybble: u8, element: Element) {
177 178 self.0[nybble as usize] = RawElement::to_be(element.into())
178 179 }
179 180 }
180 181
181 182 impl fmt::Debug for Block {
182 183 /// sparse representation for testing and debugging purposes
183 184 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
184 185 f.debug_map()
185 186 .entries((0..16).filter_map(|i| match self.get(i) {
186 187 Element::None => None,
187 188 element => Some((i, element)),
188 189 }))
189 190 .finish()
190 191 }
191 192 }
192 193
193 194 /// A 16-radix tree with the root block at the end
194 195 pub struct NodeTree {
195 196 readonly: Box<dyn Deref<Target = [Block]> + Send>,
196 197 }
197 198
199 impl Index<usize> for NodeTree {
200 type Output = Block;
201
202 fn index(&self, i: usize) -> &Block {
203 &self.readonly[i]
204 }
205 }
206
198 207 /// Return `None` unless the `Node` for `rev` has given prefix in `index`.
199 208 fn has_prefix_or_none<'p>(
200 209 idx: &impl RevlogIndex,
201 210 prefix: NodePrefixRef<'p>,
202 211 rev: Revision,
203 212 ) -> Result<Option<Revision>, NodeMapError> {
204 213 idx.node(rev)
205 214 .ok_or_else(|| NodeMapError::RevisionNotInIndex(rev))
206 215 .map(|node| {
207 216 if prefix.is_prefix_of(node) {
208 217 Some(rev)
209 218 } else {
210 219 None
211 220 }
212 221 })
213 222 }
214 223
215 224 impl NodeTree {
225 fn len(&self) -> usize {
226 self.readonly.len()
227 }
228
229 fn is_empty(&self) -> bool {
230 self.len() == 0
231 }
232
216 233 /// Main working method for `NodeTree` searches
217 234 ///
218 235 /// This partial implementation lacks special cases for NULL_REVISION
219 236 fn lookup<'p>(
220 237 &self,
221 238 prefix: NodePrefixRef<'p>,
222 239 ) -> Result<Option<Revision>, NodeMapError> {
223 let blocks: &[Block] = &*self.readonly;
224 if blocks.is_empty() {
240 if self.is_empty() {
225 241 return Ok(None);
226 242 }
227 let mut visit = blocks.len() - 1;
243 let mut visit = self.len() - 1;
228 244 for i in 0..prefix.len() {
229 245 let nybble = prefix.get_nybble(i);
230 match blocks[visit].get(nybble) {
246 match self[visit].get(nybble) {
231 247 Element::None => return Ok(None),
232 248 Element::Rev(r) => return Ok(Some(r)),
233 249 Element::Block(idx) => visit = idx,
234 250 }
235 251 }
236 252 Err(NodeMapError::MultipleResults)
237 253 }
238 254 }
239 255
240 256 impl From<Vec<Block>> for NodeTree {
241 257 fn from(vec: Vec<Block>) -> Self {
242 258 NodeTree {
243 259 readonly: Box::new(vec),
244 260 }
245 261 }
246 262 }
247 263
248 264 impl fmt::Debug for NodeTree {
249 265 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
250 266 let blocks: &[Block] = &*self.readonly;
251 267 write!(f, "readonly: {:?}", blocks)
252 268 }
253 269 }
254 270
255 271 impl NodeMap for NodeTree {
256 272 fn find_bin<'a>(
257 273 &self,
258 274 idx: &impl RevlogIndex,
259 275 prefix: NodePrefixRef<'a>,
260 276 ) -> Result<Option<Revision>, NodeMapError> {
261 277 self.lookup(prefix.clone()).and_then(|opt| {
262 278 opt.map_or(Ok(None), |rev| has_prefix_or_none(idx, prefix, rev))
263 279 })
264 280 }
265 281 }
266 282
267 283 #[cfg(test)]
268 284 mod tests {
269 285 use super::NodeMapError::*;
270 286 use super::*;
271 287 use crate::revlog::node::{hex_pad_right, Node};
272 288 use std::collections::HashMap;
273 289
274 290 /// Creates a `Block` using a syntax close to the `Debug` output
275 291 macro_rules! block {
276 292 {$($nybble:tt : $variant:ident($val:tt)),*} => (
277 293 {
278 294 let mut block = Block::new();
279 295 $(block.set($nybble, Element::$variant($val)));*;
280 296 block
281 297 }
282 298 )
283 299 }
284 300
285 301 #[test]
286 302 fn test_block_debug() {
287 303 let mut block = Block::new();
288 304 block.set(1, Element::Rev(3));
289 305 block.set(10, Element::Block(0));
290 306 assert_eq!(format!("{:?}", block), "{1: Rev(3), 10: Block(0)}");
291 307 }
292 308
293 309 #[test]
294 310 fn test_block_macro() {
295 311 let block = block! {5: Block(2)};
296 312 assert_eq!(format!("{:?}", block), "{5: Block(2)}");
297 313
298 314 let block = block! {13: Rev(15), 5: Block(2)};
299 315 assert_eq!(format!("{:?}", block), "{5: Block(2), 13: Rev(15)}");
300 316 }
301 317
302 318 #[test]
303 319 fn test_raw_block() {
304 320 let mut raw = [-1; 16];
305 321 raw[0] = 0;
306 322 raw[1] = RawElement::to_be(15);
307 323 raw[2] = RawElement::to_be(-2);
308 324 raw[3] = RawElement::to_be(-1);
309 325 raw[4] = RawElement::to_be(-3);
310 326 let block = Block(raw);
311 327 assert_eq!(block.get(0), Element::Block(0));
312 328 assert_eq!(block.get(1), Element::Block(15));
313 329 assert_eq!(block.get(3), Element::None);
314 330 assert_eq!(block.get(2), Element::Rev(0));
315 331 assert_eq!(block.get(4), Element::Rev(1));
316 332 }
317 333
318 334 type TestIndex = HashMap<Revision, Node>;
319 335
320 336 impl RevlogIndex for TestIndex {
321 337 fn node(&self, rev: Revision) -> Option<&Node> {
322 338 self.get(&rev)
323 339 }
324 340
325 341 fn len(&self) -> usize {
326 342 self.len()
327 343 }
328 344 }
329 345
330 346 /// Pad hexadecimal Node prefix with zeros on the right, then insert
331 347 ///
332 348 /// This avoids having to repeatedly write very long hexadecimal
333 349 /// strings for test data, and brings actual hash size independency.
334 350 fn pad_insert(idx: &mut TestIndex, rev: Revision, hex: &str) {
335 351 idx.insert(rev, Node::from_hex(&hex_pad_right(hex)).unwrap());
336 352 }
337 353
338 354 fn sample_nodetree() -> NodeTree {
339 355 NodeTree::from(vec![
340 356 block![0: Rev(9)],
341 357 block![0: Rev(0), 1: Rev(9)],
342 358 block![0: Block(1), 1:Rev(1)],
343 359 ])
344 360 }
345 361
346 362 #[test]
347 363 fn test_nt_debug() {
348 364 let nt = sample_nodetree();
349 365 assert_eq!(
350 366 format!("{:?}", nt),
351 367 "readonly: \
352 368 [{0: Rev(9)}, {0: Rev(0), 1: Rev(9)}, {0: Block(1), 1: Rev(1)}]"
353 369 );
354 370 }
355 371
356 372 #[test]
357 373 fn test_immutable_find_simplest() -> Result<(), NodeMapError> {
358 374 let mut idx: TestIndex = HashMap::new();
359 375 pad_insert(&mut idx, 1, "1234deadcafe");
360 376
361 377 let nt = NodeTree::from(vec![block![1: Rev(1)]]);
362 378 assert_eq!(nt.find_hex(&idx, "1")?, Some(1));
363 379 assert_eq!(nt.find_hex(&idx, "12")?, Some(1));
364 380 assert_eq!(nt.find_hex(&idx, "1234de")?, Some(1));
365 381 assert_eq!(nt.find_hex(&idx, "1a")?, None);
366 382 assert_eq!(nt.find_hex(&idx, "ab")?, None);
367 383
368 384 // and with full binary Nodes
369 385 assert_eq!(nt.find_node(&idx, idx.get(&1).unwrap())?, Some(1));
370 386 let unknown = Node::from_hex(&hex_pad_right("3d")).unwrap();
371 387 assert_eq!(nt.find_node(&idx, &unknown)?, None);
372 388 Ok(())
373 389 }
374 390
375 391 #[test]
376 392 fn test_immutable_find_one_jump() {
377 393 let mut idx = TestIndex::new();
378 394 pad_insert(&mut idx, 9, "012");
379 395 pad_insert(&mut idx, 0, "00a");
380 396
381 397 let nt = sample_nodetree();
382 398
383 399 assert_eq!(nt.find_hex(&idx, "0"), Err(MultipleResults));
384 400 assert_eq!(nt.find_hex(&idx, "01"), Ok(Some(9)));
385 401 assert_eq!(nt.find_hex(&idx, "00"), Ok(Some(0)));
386 402 assert_eq!(nt.find_hex(&idx, "00a"), Ok(Some(0)));
387 403 }
388 404 }
General Comments 0
You need to be logged in to leave comments. Login now