Show More
@@ -1,388 +1,404 b'' | |||
|
1 | 1 | // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net> |
|
2 | 2 | // and Mercurial contributors |
|
3 | 3 | // |
|
4 | 4 | // This software may be used and distributed according to the terms of the |
|
5 | 5 | // GNU General Public License version 2 or any later version. |
|
6 | 6 | //! Indexing facilities for fast retrieval of `Revision` from `Node` |
|
7 | 7 | //! |
|
8 | 8 | //! This provides a variation on the 16-ary radix tree that is |
|
9 | 9 | //! provided as "nodetree" in revlog.c, ready for append-only persistence |
|
10 | 10 | //! on disk. |
|
11 | 11 | //! |
|
12 | 12 | //! Following existing implicit conventions, the "nodemap" terminology |
|
13 | 13 | //! is used in a more abstract context. |
|
14 | 14 | |
|
15 | 15 | use super::{ |
|
16 | 16 | Node, NodeError, NodePrefix, NodePrefixRef, Revision, RevlogIndex, |
|
17 | 17 | }; |
|
18 | 18 | use std::fmt; |
|
19 | 19 | use std::ops::Deref; |
|
20 | use std::ops::Index; | |
|
20 | 21 | |
|
21 | 22 | #[derive(Debug, PartialEq)] |
|
22 | 23 | pub enum NodeMapError { |
|
23 | 24 | MultipleResults, |
|
24 | 25 | InvalidNodePrefix(NodeError), |
|
25 | 26 | /// A `Revision` stored in the nodemap could not be found in the index |
|
26 | 27 | RevisionNotInIndex(Revision), |
|
27 | 28 | } |
|
28 | 29 | |
|
29 | 30 | impl From<NodeError> for NodeMapError { |
|
30 | 31 | fn from(err: NodeError) -> Self { |
|
31 | 32 | NodeMapError::InvalidNodePrefix(err) |
|
32 | 33 | } |
|
33 | 34 | } |
|
34 | 35 | |
|
35 | 36 | /// Mapping system from Mercurial nodes to revision numbers. |
|
36 | 37 | /// |
|
37 | 38 | /// ## `RevlogIndex` and `NodeMap` |
|
38 | 39 | /// |
|
39 | 40 | /// One way to think about their relationship is that |
|
40 | 41 | /// the `NodeMap` is a prefix-oriented reverse index of the `Node` information |
|
41 | 42 | /// carried by a [`RevlogIndex`]. |
|
42 | 43 | /// |
|
43 | 44 | /// Many of the methods in this trait take a `RevlogIndex` argument |
|
44 | 45 | /// which is used for validation of their results. This index must naturally |
|
45 | 46 | /// be the one the `NodeMap` is about, and it must be consistent. |
|
46 | 47 | /// |
|
47 | 48 | /// Notably, the `NodeMap` must not store |
|
48 | 49 | /// information about more `Revision` values than there are in the index. |
|
49 | 50 | /// In these methods, an encountered `Revision` is not in the index, a |
|
50 | 51 | /// [`RevisionNotInIndex`] error is returned. |
|
51 | 52 | /// |
|
52 | 53 | /// In insert operations, the rule is thus that the `NodeMap` must always |
|
53 | 54 | /// be updated after the `RevlogIndex` |
|
54 | 55 | /// be updated first, and the `NodeMap` second. |
|
55 | 56 | /// |
|
56 | 57 | /// [`RevisionNotInIndex`]: enum.NodeMapError.html#variant.RevisionNotInIndex |
|
57 | 58 | /// [`RevlogIndex`]: ../trait.RevlogIndex.html |
|
58 | 59 | pub trait NodeMap { |
|
59 | 60 | /// Find the unique `Revision` having the given `Node` |
|
60 | 61 | /// |
|
61 | 62 | /// If no Revision matches the given `Node`, `Ok(None)` is returned. |
|
62 | 63 | fn find_node( |
|
63 | 64 | &self, |
|
64 | 65 | index: &impl RevlogIndex, |
|
65 | 66 | node: &Node, |
|
66 | 67 | ) -> Result<Option<Revision>, NodeMapError> { |
|
67 | 68 | self.find_bin(index, node.into()) |
|
68 | 69 | } |
|
69 | 70 | |
|
70 | 71 | /// Find the unique Revision whose `Node` starts with a given binary prefix |
|
71 | 72 | /// |
|
72 | 73 | /// If no Revision matches the given prefix, `Ok(None)` is returned. |
|
73 | 74 | /// |
|
74 | 75 | /// If several Revisions match the given prefix, a [`MultipleResults`] |
|
75 | 76 | /// error is returned. |
|
76 | 77 | fn find_bin<'a>( |
|
77 | 78 | &self, |
|
78 | 79 | idx: &impl RevlogIndex, |
|
79 | 80 | prefix: NodePrefixRef<'a>, |
|
80 | 81 | ) -> Result<Option<Revision>, NodeMapError>; |
|
81 | 82 | |
|
82 | 83 | /// Find the unique Revision whose `Node` hexadecimal string representation |
|
83 | 84 | /// starts with a given prefix |
|
84 | 85 | /// |
|
85 | 86 | /// If no Revision matches the given prefix, `Ok(None)` is returned. |
|
86 | 87 | /// |
|
87 | 88 | /// If several Revisions match the given prefix, a [`MultipleResults`] |
|
88 | 89 | /// error is returned. |
|
89 | 90 | fn find_hex( |
|
90 | 91 | &self, |
|
91 | 92 | idx: &impl RevlogIndex, |
|
92 | 93 | prefix: &str, |
|
93 | 94 | ) -> Result<Option<Revision>, NodeMapError> { |
|
94 | 95 | self.find_bin(idx, NodePrefix::from_hex(prefix)?.borrow()) |
|
95 | 96 | } |
|
96 | 97 | } |
|
97 | 98 | |
|
98 | 99 | /// Low level NodeTree [`Blocks`] elements |
|
99 | 100 | /// |
|
100 | 101 | /// These are exactly as for instance on persistent storage. |
|
101 | 102 | type RawElement = i32; |
|
102 | 103 | |
|
103 | 104 | /// High level representation of values in NodeTree |
|
104 | 105 | /// [`Blocks`](struct.Block.html) |
|
105 | 106 | /// |
|
106 | 107 | /// This is the high level representation that most algorithms should |
|
107 | 108 | /// use. |
|
108 | 109 | #[derive(Clone, Debug, Eq, PartialEq)] |
|
109 | 110 | enum Element { |
|
110 | 111 | Rev(Revision), |
|
111 | 112 | Block(usize), |
|
112 | 113 | None, |
|
113 | 114 | } |
|
114 | 115 | |
|
115 | 116 | impl From<RawElement> for Element { |
|
116 | 117 | /// Conversion from low level representation, after endianness conversion. |
|
117 | 118 | /// |
|
118 | 119 | /// See [`Block`](struct.Block.html) for explanation about the encoding. |
|
119 | 120 | fn from(raw: RawElement) -> Element { |
|
120 | 121 | if raw >= 0 { |
|
121 | 122 | Element::Block(raw as usize) |
|
122 | 123 | } else if raw == -1 { |
|
123 | 124 | Element::None |
|
124 | 125 | } else { |
|
125 | 126 | Element::Rev(-raw - 2) |
|
126 | 127 | } |
|
127 | 128 | } |
|
128 | 129 | } |
|
129 | 130 | |
|
130 | 131 | impl From<Element> for RawElement { |
|
131 | 132 | fn from(element: Element) -> RawElement { |
|
132 | 133 | match element { |
|
133 | 134 | Element::None => 0, |
|
134 | 135 | Element::Block(i) => i as RawElement, |
|
135 | 136 | Element::Rev(rev) => -rev - 2, |
|
136 | 137 | } |
|
137 | 138 | } |
|
138 | 139 | } |
|
139 | 140 | |
|
140 | 141 | /// A logical block of the `NodeTree`, packed with a fixed size. |
|
141 | 142 | /// |
|
142 | 143 | /// These are always used in container types implementing `Index<Block>`, |
|
143 | 144 | /// such as `&Block` |
|
144 | 145 | /// |
|
145 | 146 | /// As an array of integers, its ith element encodes that the |
|
146 | 147 | /// ith potential edge from the block, representing the ith hexadecimal digit |
|
147 | 148 | /// (nybble) `i` is either: |
|
148 | 149 | /// |
|
149 | 150 | /// - absent (value -1) |
|
150 | 151 | /// - another `Block` in the same indexable container (value β₯ 0) |
|
151 | 152 | /// - a `Revision` leaf (value β€ -2) |
|
152 | 153 | /// |
|
153 | 154 | /// Endianness has to be fixed for consistency on shared storage across |
|
154 | 155 | /// different architectures. |
|
155 | 156 | /// |
|
156 | 157 | /// A key difference with the C `nodetree` is that we need to be |
|
157 | 158 | /// able to represent the [`Block`] at index 0, hence -1 is the empty marker |
|
158 | 159 | /// rather than 0 and the `Revision` range upper limit of -2 instead of -1. |
|
159 | 160 | /// |
|
160 | 161 | /// Another related difference is that `NULL_REVISION` (-1) is not |
|
161 | 162 | /// represented at all, because we want an immutable empty nodetree |
|
162 | 163 | /// to be valid. |
|
163 | 164 | |
|
164 | 165 | #[derive(Clone, PartialEq)] |
|
165 | 166 | pub struct Block([RawElement; 16]); |
|
166 | 167 | |
|
167 | 168 | impl Block { |
|
168 | 169 | fn new() -> Self { |
|
169 | 170 | Block([-1; 16]) |
|
170 | 171 | } |
|
171 | 172 | |
|
172 | 173 | fn get(&self, nybble: u8) -> Element { |
|
173 | 174 | Element::from(RawElement::from_be(self.0[nybble as usize])) |
|
174 | 175 | } |
|
175 | 176 | |
|
176 | 177 | fn set(&mut self, nybble: u8, element: Element) { |
|
177 | 178 | self.0[nybble as usize] = RawElement::to_be(element.into()) |
|
178 | 179 | } |
|
179 | 180 | } |
|
180 | 181 | |
|
181 | 182 | impl fmt::Debug for Block { |
|
182 | 183 | /// sparse representation for testing and debugging purposes |
|
183 | 184 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
|
184 | 185 | f.debug_map() |
|
185 | 186 | .entries((0..16).filter_map(|i| match self.get(i) { |
|
186 | 187 | Element::None => None, |
|
187 | 188 | element => Some((i, element)), |
|
188 | 189 | })) |
|
189 | 190 | .finish() |
|
190 | 191 | } |
|
191 | 192 | } |
|
192 | 193 | |
|
193 | 194 | /// A 16-radix tree with the root block at the end |
|
194 | 195 | pub struct NodeTree { |
|
195 | 196 | readonly: Box<dyn Deref<Target = [Block]> + Send>, |
|
196 | 197 | } |
|
197 | 198 | |
|
199 | impl Index<usize> for NodeTree { | |
|
200 | type Output = Block; | |
|
201 | ||
|
202 | fn index(&self, i: usize) -> &Block { | |
|
203 | &self.readonly[i] | |
|
204 | } | |
|
205 | } | |
|
206 | ||
|
198 | 207 | /// Return `None` unless the `Node` for `rev` has given prefix in `index`. |
|
199 | 208 | fn has_prefix_or_none<'p>( |
|
200 | 209 | idx: &impl RevlogIndex, |
|
201 | 210 | prefix: NodePrefixRef<'p>, |
|
202 | 211 | rev: Revision, |
|
203 | 212 | ) -> Result<Option<Revision>, NodeMapError> { |
|
204 | 213 | idx.node(rev) |
|
205 | 214 | .ok_or_else(|| NodeMapError::RevisionNotInIndex(rev)) |
|
206 | 215 | .map(|node| { |
|
207 | 216 | if prefix.is_prefix_of(node) { |
|
208 | 217 | Some(rev) |
|
209 | 218 | } else { |
|
210 | 219 | None |
|
211 | 220 | } |
|
212 | 221 | }) |
|
213 | 222 | } |
|
214 | 223 | |
|
215 | 224 | impl NodeTree { |
|
225 | fn len(&self) -> usize { | |
|
226 | self.readonly.len() | |
|
227 | } | |
|
228 | ||
|
229 | fn is_empty(&self) -> bool { | |
|
230 | self.len() == 0 | |
|
231 | } | |
|
232 | ||
|
216 | 233 | /// Main working method for `NodeTree` searches |
|
217 | 234 | /// |
|
218 | 235 | /// This partial implementation lacks special cases for NULL_REVISION |
|
219 | 236 | fn lookup<'p>( |
|
220 | 237 | &self, |
|
221 | 238 | prefix: NodePrefixRef<'p>, |
|
222 | 239 | ) -> Result<Option<Revision>, NodeMapError> { |
|
223 | let blocks: &[Block] = &*self.readonly; | |
|
224 | if blocks.is_empty() { | |
|
240 | if self.is_empty() { | |
|
225 | 241 | return Ok(None); |
|
226 | 242 | } |
|
227 |
let mut visit = |
|
|
243 | let mut visit = self.len() - 1; | |
|
228 | 244 | for i in 0..prefix.len() { |
|
229 | 245 | let nybble = prefix.get_nybble(i); |
|
230 |
match |
|
|
246 | match self[visit].get(nybble) { | |
|
231 | 247 | Element::None => return Ok(None), |
|
232 | 248 | Element::Rev(r) => return Ok(Some(r)), |
|
233 | 249 | Element::Block(idx) => visit = idx, |
|
234 | 250 | } |
|
235 | 251 | } |
|
236 | 252 | Err(NodeMapError::MultipleResults) |
|
237 | 253 | } |
|
238 | 254 | } |
|
239 | 255 | |
|
240 | 256 | impl From<Vec<Block>> for NodeTree { |
|
241 | 257 | fn from(vec: Vec<Block>) -> Self { |
|
242 | 258 | NodeTree { |
|
243 | 259 | readonly: Box::new(vec), |
|
244 | 260 | } |
|
245 | 261 | } |
|
246 | 262 | } |
|
247 | 263 | |
|
248 | 264 | impl fmt::Debug for NodeTree { |
|
249 | 265 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
|
250 | 266 | let blocks: &[Block] = &*self.readonly; |
|
251 | 267 | write!(f, "readonly: {:?}", blocks) |
|
252 | 268 | } |
|
253 | 269 | } |
|
254 | 270 | |
|
255 | 271 | impl NodeMap for NodeTree { |
|
256 | 272 | fn find_bin<'a>( |
|
257 | 273 | &self, |
|
258 | 274 | idx: &impl RevlogIndex, |
|
259 | 275 | prefix: NodePrefixRef<'a>, |
|
260 | 276 | ) -> Result<Option<Revision>, NodeMapError> { |
|
261 | 277 | self.lookup(prefix.clone()).and_then(|opt| { |
|
262 | 278 | opt.map_or(Ok(None), |rev| has_prefix_or_none(idx, prefix, rev)) |
|
263 | 279 | }) |
|
264 | 280 | } |
|
265 | 281 | } |
|
266 | 282 | |
|
267 | 283 | #[cfg(test)] |
|
268 | 284 | mod tests { |
|
269 | 285 | use super::NodeMapError::*; |
|
270 | 286 | use super::*; |
|
271 | 287 | use crate::revlog::node::{hex_pad_right, Node}; |
|
272 | 288 | use std::collections::HashMap; |
|
273 | 289 | |
|
274 | 290 | /// Creates a `Block` using a syntax close to the `Debug` output |
|
275 | 291 | macro_rules! block { |
|
276 | 292 | {$($nybble:tt : $variant:ident($val:tt)),*} => ( |
|
277 | 293 | { |
|
278 | 294 | let mut block = Block::new(); |
|
279 | 295 | $(block.set($nybble, Element::$variant($val)));*; |
|
280 | 296 | block |
|
281 | 297 | } |
|
282 | 298 | ) |
|
283 | 299 | } |
|
284 | 300 | |
|
285 | 301 | #[test] |
|
286 | 302 | fn test_block_debug() { |
|
287 | 303 | let mut block = Block::new(); |
|
288 | 304 | block.set(1, Element::Rev(3)); |
|
289 | 305 | block.set(10, Element::Block(0)); |
|
290 | 306 | assert_eq!(format!("{:?}", block), "{1: Rev(3), 10: Block(0)}"); |
|
291 | 307 | } |
|
292 | 308 | |
|
293 | 309 | #[test] |
|
294 | 310 | fn test_block_macro() { |
|
295 | 311 | let block = block! {5: Block(2)}; |
|
296 | 312 | assert_eq!(format!("{:?}", block), "{5: Block(2)}"); |
|
297 | 313 | |
|
298 | 314 | let block = block! {13: Rev(15), 5: Block(2)}; |
|
299 | 315 | assert_eq!(format!("{:?}", block), "{5: Block(2), 13: Rev(15)}"); |
|
300 | 316 | } |
|
301 | 317 | |
|
302 | 318 | #[test] |
|
303 | 319 | fn test_raw_block() { |
|
304 | 320 | let mut raw = [-1; 16]; |
|
305 | 321 | raw[0] = 0; |
|
306 | 322 | raw[1] = RawElement::to_be(15); |
|
307 | 323 | raw[2] = RawElement::to_be(-2); |
|
308 | 324 | raw[3] = RawElement::to_be(-1); |
|
309 | 325 | raw[4] = RawElement::to_be(-3); |
|
310 | 326 | let block = Block(raw); |
|
311 | 327 | assert_eq!(block.get(0), Element::Block(0)); |
|
312 | 328 | assert_eq!(block.get(1), Element::Block(15)); |
|
313 | 329 | assert_eq!(block.get(3), Element::None); |
|
314 | 330 | assert_eq!(block.get(2), Element::Rev(0)); |
|
315 | 331 | assert_eq!(block.get(4), Element::Rev(1)); |
|
316 | 332 | } |
|
317 | 333 | |
|
318 | 334 | type TestIndex = HashMap<Revision, Node>; |
|
319 | 335 | |
|
320 | 336 | impl RevlogIndex for TestIndex { |
|
321 | 337 | fn node(&self, rev: Revision) -> Option<&Node> { |
|
322 | 338 | self.get(&rev) |
|
323 | 339 | } |
|
324 | 340 | |
|
325 | 341 | fn len(&self) -> usize { |
|
326 | 342 | self.len() |
|
327 | 343 | } |
|
328 | 344 | } |
|
329 | 345 | |
|
330 | 346 | /// Pad hexadecimal Node prefix with zeros on the right, then insert |
|
331 | 347 | /// |
|
332 | 348 | /// This avoids having to repeatedly write very long hexadecimal |
|
333 | 349 | /// strings for test data, and brings actual hash size independency. |
|
334 | 350 | fn pad_insert(idx: &mut TestIndex, rev: Revision, hex: &str) { |
|
335 | 351 | idx.insert(rev, Node::from_hex(&hex_pad_right(hex)).unwrap()); |
|
336 | 352 | } |
|
337 | 353 | |
|
338 | 354 | fn sample_nodetree() -> NodeTree { |
|
339 | 355 | NodeTree::from(vec![ |
|
340 | 356 | block![0: Rev(9)], |
|
341 | 357 | block![0: Rev(0), 1: Rev(9)], |
|
342 | 358 | block![0: Block(1), 1:Rev(1)], |
|
343 | 359 | ]) |
|
344 | 360 | } |
|
345 | 361 | |
|
346 | 362 | #[test] |
|
347 | 363 | fn test_nt_debug() { |
|
348 | 364 | let nt = sample_nodetree(); |
|
349 | 365 | assert_eq!( |
|
350 | 366 | format!("{:?}", nt), |
|
351 | 367 | "readonly: \ |
|
352 | 368 | [{0: Rev(9)}, {0: Rev(0), 1: Rev(9)}, {0: Block(1), 1: Rev(1)}]" |
|
353 | 369 | ); |
|
354 | 370 | } |
|
355 | 371 | |
|
356 | 372 | #[test] |
|
357 | 373 | fn test_immutable_find_simplest() -> Result<(), NodeMapError> { |
|
358 | 374 | let mut idx: TestIndex = HashMap::new(); |
|
359 | 375 | pad_insert(&mut idx, 1, "1234deadcafe"); |
|
360 | 376 | |
|
361 | 377 | let nt = NodeTree::from(vec![block![1: Rev(1)]]); |
|
362 | 378 | assert_eq!(nt.find_hex(&idx, "1")?, Some(1)); |
|
363 | 379 | assert_eq!(nt.find_hex(&idx, "12")?, Some(1)); |
|
364 | 380 | assert_eq!(nt.find_hex(&idx, "1234de")?, Some(1)); |
|
365 | 381 | assert_eq!(nt.find_hex(&idx, "1a")?, None); |
|
366 | 382 | assert_eq!(nt.find_hex(&idx, "ab")?, None); |
|
367 | 383 | |
|
368 | 384 | // and with full binary Nodes |
|
369 | 385 | assert_eq!(nt.find_node(&idx, idx.get(&1).unwrap())?, Some(1)); |
|
370 | 386 | let unknown = Node::from_hex(&hex_pad_right("3d")).unwrap(); |
|
371 | 387 | assert_eq!(nt.find_node(&idx, &unknown)?, None); |
|
372 | 388 | Ok(()) |
|
373 | 389 | } |
|
374 | 390 | |
|
375 | 391 | #[test] |
|
376 | 392 | fn test_immutable_find_one_jump() { |
|
377 | 393 | let mut idx = TestIndex::new(); |
|
378 | 394 | pad_insert(&mut idx, 9, "012"); |
|
379 | 395 | pad_insert(&mut idx, 0, "00a"); |
|
380 | 396 | |
|
381 | 397 | let nt = sample_nodetree(); |
|
382 | 398 | |
|
383 | 399 | assert_eq!(nt.find_hex(&idx, "0"), Err(MultipleResults)); |
|
384 | 400 | assert_eq!(nt.find_hex(&idx, "01"), Ok(Some(9))); |
|
385 | 401 | assert_eq!(nt.find_hex(&idx, "00"), Ok(Some(0))); |
|
386 | 402 | assert_eq!(nt.find_hex(&idx, "00a"), Ok(Some(0))); |
|
387 | 403 | } |
|
388 | 404 | } |
General Comments 0
You need to be logged in to leave comments.
Login now