##// END OF EJS Templates
rust: Simplify error type for reading hex node IDs...
Simon Sapin -
r47156:5893706a default
parent child Browse files
Show More
@@ -1,68 +1,68 b''
1 1 // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net>
2 2 // and Mercurial contributors
3 3 //
4 4 // This software may be used and distributed according to the terms of the
5 5 // GNU General Public License version 2 or any later version.
6 6 //! Mercurial concepts for handling revision history
7 7
8 8 pub mod node;
9 9 pub mod nodemap;
10 10 mod nodemap_docket;
11 11 pub mod path_encode;
12 pub use node::{Node, NodeError, NodePrefix, NodePrefixRef};
12 pub use node::{FromHexError, Node, NodePrefix, NodePrefixRef};
13 13 pub mod changelog;
14 14 pub mod index;
15 15 pub mod manifest;
16 16 pub mod patch;
17 17 pub mod revlog;
18 18
19 19 /// Mercurial revision numbers
20 20 ///
21 21 /// As noted in revlog.c, revision numbers are actually encoded in
22 22 /// 4 bytes, and are liberally converted to ints, whence the i32
23 23 pub type Revision = i32;
24 24
25 25 /// Marker expressing the absence of a parent
26 26 ///
27 27 /// Independently of the actual representation, `NULL_REVISION` is guaranteed
28 28 /// to be smaller than all existing revisions.
29 29 pub const NULL_REVISION: Revision = -1;
30 30
31 31 /// Same as `mercurial.node.wdirrev`
32 32 ///
33 33 /// This is also equal to `i32::max_value()`, but it's better to spell
34 34 /// it out explicitely, same as in `mercurial.node`
35 35 #[allow(clippy::unreadable_literal)]
36 36 pub const WORKING_DIRECTORY_REVISION: Revision = 0x7fffffff;
37 37
38 38 /// The simplest expression of what we need of Mercurial DAGs.
39 39 pub trait Graph {
40 40 /// Return the two parents of the given `Revision`.
41 41 ///
42 42 /// Each of the parents can be independently `NULL_REVISION`
43 43 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError>;
44 44 }
45 45
46 46 #[derive(Clone, Debug, PartialEq)]
47 47 pub enum GraphError {
48 48 ParentOutOfRange(Revision),
49 49 WorkingDirectoryUnsupported,
50 50 }
51 51
52 52 /// The Mercurial Revlog Index
53 53 ///
54 54 /// This is currently limited to the minimal interface that is needed for
55 55 /// the [`nodemap`](nodemap/index.html) module
56 56 pub trait RevlogIndex {
57 57 /// Total number of Revisions referenced in this index
58 58 fn len(&self) -> usize;
59 59
60 60 fn is_empty(&self) -> bool {
61 61 self.len() == 0
62 62 }
63 63
64 64 /// Return a reference to the Node or `None` if rev is out of bounds
65 65 ///
66 66 /// `NULL_REVISION` is not considered to be out of bounds.
67 67 fn node(&self, rev: Revision) -> Option<&Node>;
68 68 }
@@ -1,459 +1,412 b''
1 1 // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
2 2 //
3 3 // This software may be used and distributed according to the terms of the
4 4 // GNU General Public License version 2 or any later version.
5 5
6 6 //! Definitions and utilities for Revision nodes
7 7 //!
8 8 //! In Mercurial code base, it is customary to call "a node" the binary SHA
9 9 //! of a revision.
10 10
11 11 use bytes_cast::BytesCast;
12 use hex::{self, FromHex, FromHexError};
12 use hex::{self, FromHex};
13 13 use std::convert::TryFrom;
14 14 use std::fmt;
15 15
16 16 /// The length in bytes of a `Node`
17 17 ///
18 18 /// This constant is meant to ease refactors of this module, and
19 19 /// are private so that calling code does not expect all nodes have
20 20 /// the same size, should we support several formats concurrently in
21 21 /// the future.
22 22 pub const NODE_BYTES_LENGTH: usize = 20;
23 23
24 24 /// Id of the null node.
25 25 ///
26 26 /// Used to indicate the absence of node.
27 27 pub const NULL_NODE_ID: [u8; NODE_BYTES_LENGTH] = [0u8; NODE_BYTES_LENGTH];
28 28
29 29 /// The length in bytes of a `Node`
30 30 ///
31 31 /// see also `NODES_BYTES_LENGTH` about it being private.
32 32 const NODE_NYBBLES_LENGTH: usize = 2 * NODE_BYTES_LENGTH;
33 33
34 34 /// Private alias for readability and to ease future change
35 35 type NodeData = [u8; NODE_BYTES_LENGTH];
36 36
37 37 /// Binary revision SHA
38 38 ///
39 39 /// ## Future changes of hash size
40 40 ///
41 41 /// To accomodate future changes of hash size, Rust callers
42 42 /// should use the conversion methods at the boundaries (FFI, actual
43 43 /// computation of hashes and I/O) only, and only if required.
44 44 ///
45 45 /// All other callers outside of unit tests should just handle `Node` values
46 46 /// and never make any assumption on the actual length, using [`nybbles_len`]
47 47 /// if they need a loop boundary.
48 48 ///
49 49 /// All methods that create a `Node` either take a type that enforces
50 /// the size or fail immediately at runtime with [`ExactLengthRequired`].
50 /// the size or return an error at runtime.
51 51 ///
52 52 /// [`nybbles_len`]: #method.nybbles_len
53 /// [`ExactLengthRequired`]: struct.NodeError#variant.ExactLengthRequired
54 53 #[derive(Clone, Debug, PartialEq, BytesCast)]
55 54 #[repr(transparent)]
56 55 pub struct Node {
57 56 data: NodeData,
58 57 }
59 58
60 59 /// The node value for NULL_REVISION
61 60 pub const NULL_NODE: Node = Node {
62 61 data: [0; NODE_BYTES_LENGTH],
63 62 };
64 63
65 64 impl From<NodeData> for Node {
66 65 fn from(data: NodeData) -> Node {
67 66 Node { data }
68 67 }
69 68 }
70 69
71 70 /// Return an error if the slice has an unexpected length
72 71 impl<'a> TryFrom<&'a [u8]> for &'a Node {
73 72 type Error = ();
74 73
75 74 #[inline]
76 75 fn try_from(bytes: &'a [u8]) -> Result<&'a Node, Self::Error> {
77 76 match Node::from_bytes(bytes) {
78 77 Ok((node, rest)) if rest.is_empty() => Ok(node),
79 78 _ => Err(()),
80 79 }
81 80 }
82 81 }
83 82
84 83 impl fmt::LowerHex for Node {
85 84 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
86 85 for &byte in &self.data {
87 86 write!(f, "{:02x}", byte)?
88 87 }
89 88 Ok(())
90 89 }
91 90 }
92 91
93 #[derive(Debug, PartialEq)]
94 pub enum NodeError {
95 ExactLengthRequired(usize, String),
96 PrefixTooLong(String),
97 HexError(FromHexError, String),
98 }
92 #[derive(Debug)]
93 pub struct FromHexError;
99 94
100 95 /// Low level utility function, also for prefixes
101 96 fn get_nybble(s: &[u8], i: usize) -> u8 {
102 97 if i % 2 == 0 {
103 98 s[i / 2] >> 4
104 99 } else {
105 100 s[i / 2] & 0x0f
106 101 }
107 102 }
108 103
109 104 impl Node {
110 105 /// Retrieve the `i`th half-byte of the binary data.
111 106 ///
112 107 /// This is also the `i`th hexadecimal digit in numeric form,
113 108 /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
114 109 pub fn get_nybble(&self, i: usize) -> u8 {
115 110 get_nybble(&self.data, i)
116 111 }
117 112
118 113 /// Length of the data, in nybbles
119 114 pub fn nybbles_len(&self) -> usize {
120 115 // public exposure as an instance method only, so that we can
121 116 // easily support several sizes of hashes if needed in the future.
122 117 NODE_NYBBLES_LENGTH
123 118 }
124 119
125 120 /// Convert from hexadecimal string representation
126 121 ///
127 122 /// Exact length is required.
128 123 ///
129 124 /// To be used in FFI and I/O only, in order to facilitate future
130 125 /// changes of hash format.
131 pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Node, NodeError> {
126 pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Node, FromHexError> {
132 127 Ok(NodeData::from_hex(hex.as_ref())
133 .map_err(|e| NodeError::from((e, hex)))?
128 .map_err(|_| FromHexError)?
134 129 .into())
135 130 }
136 131
137 132 /// Provide access to binary data
138 133 ///
139 134 /// This is needed by FFI layers, for instance to return expected
140 135 /// binary values to Python.
141 136 pub fn as_bytes(&self) -> &[u8] {
142 137 &self.data
143 138 }
144 139 }
145 140
146 impl<T: AsRef<[u8]>> From<(FromHexError, T)> for NodeError {
147 fn from(err_offender: (FromHexError, T)) -> Self {
148 let (err, offender) = err_offender;
149 let offender = String::from_utf8_lossy(offender.as_ref()).into_owned();
150 match err {
151 FromHexError::InvalidStringLength => {
152 NodeError::ExactLengthRequired(NODE_NYBBLES_LENGTH, offender)
153 }
154 _ => NodeError::HexError(err, offender),
155 }
156 }
157 }
158
159 141 /// The beginning of a binary revision SHA.
160 142 ///
161 143 /// Since it can potentially come from an hexadecimal representation with
162 144 /// odd length, it needs to carry around whether the last 4 bits are relevant
163 145 /// or not.
164 146 #[derive(Debug, PartialEq)]
165 147 pub struct NodePrefix {
166 148 buf: Vec<u8>,
167 149 is_odd: bool,
168 150 }
169 151
170 152 impl NodePrefix {
171 153 /// Convert from hexadecimal string representation
172 154 ///
173 155 /// Similarly to `hex::decode`, can be used with Unicode string types
174 156 /// (`String`, `&str`) as well as bytes.
175 157 ///
176 158 /// To be used in FFI and I/O only, in order to facilitate future
177 159 /// changes of hash format.
178 pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Self, NodeError> {
160 pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Self, FromHexError> {
179 161 let hex = hex.as_ref();
180 162 let len = hex.len();
181 163 if len > NODE_NYBBLES_LENGTH {
182 return Err(NodeError::PrefixTooLong(
183 String::from_utf8_lossy(hex).to_owned().to_string(),
184 ));
164 return Err(FromHexError);
185 165 }
186 166
187 167 let is_odd = len % 2 == 1;
188 168 let even_part = if is_odd { &hex[..len - 1] } else { hex };
189 169 let mut buf: Vec<u8> =
190 Vec::from_hex(&even_part).map_err(|e| (e, hex))?;
170 Vec::from_hex(&even_part).map_err(|_| FromHexError)?;
191 171
192 172 if is_odd {
193 173 let latest_char = char::from(hex[len - 1]);
194 let latest_nybble = latest_char.to_digit(16).ok_or_else(|| {
195 (
196 FromHexError::InvalidHexCharacter {
197 c: latest_char,
198 index: len - 1,
199 },
200 hex,
201 )
202 })? as u8;
174 let latest_nybble =
175 latest_char.to_digit(16).ok_or_else(|| FromHexError)? as u8;
203 176 buf.push(latest_nybble << 4);
204 177 }
205 178 Ok(NodePrefix { buf, is_odd })
206 179 }
207 180
208 181 pub fn borrow(&self) -> NodePrefixRef {
209 182 NodePrefixRef {
210 183 buf: &self.buf,
211 184 is_odd: self.is_odd,
212 185 }
213 186 }
214 187 }
215 188
216 189 #[derive(Clone, Debug, PartialEq)]
217 190 pub struct NodePrefixRef<'a> {
218 191 buf: &'a [u8],
219 192 is_odd: bool,
220 193 }
221 194
222 195 impl<'a> NodePrefixRef<'a> {
223 196 pub fn len(&self) -> usize {
224 197 if self.is_odd {
225 198 self.buf.len() * 2 - 1
226 199 } else {
227 200 self.buf.len() * 2
228 201 }
229 202 }
230 203
231 204 pub fn is_empty(&self) -> bool {
232 205 self.len() == 0
233 206 }
234 207
235 208 pub fn is_prefix_of(&self, node: &Node) -> bool {
236 209 if self.is_odd {
237 210 let buf = self.buf;
238 211 let last_pos = buf.len() - 1;
239 212 node.data.starts_with(buf.split_at(last_pos).0)
240 213 && node.data[last_pos] >> 4 == buf[last_pos] >> 4
241 214 } else {
242 215 node.data.starts_with(self.buf)
243 216 }
244 217 }
245 218
246 219 /// Retrieve the `i`th half-byte from the prefix.
247 220 ///
248 221 /// This is also the `i`th hexadecimal digit in numeric form,
249 222 /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
250 223 pub fn get_nybble(&self, i: usize) -> u8 {
251 224 assert!(i < self.len());
252 225 get_nybble(self.buf, i)
253 226 }
254 227
255 228 /// Return the index first nybble that's different from `node`
256 229 ///
257 230 /// If the return value is `None` that means that `self` is
258 231 /// a prefix of `node`, but the current method is a bit slower
259 232 /// than `is_prefix_of`.
260 233 ///
261 234 /// Returned index is as in `get_nybble`, i.e., starting at 0.
262 235 pub fn first_different_nybble(&self, node: &Node) -> Option<usize> {
263 236 let buf = self.buf;
264 237 let until = if self.is_odd {
265 238 buf.len() - 1
266 239 } else {
267 240 buf.len()
268 241 };
269 242 for (i, item) in buf.iter().enumerate().take(until) {
270 243 if *item != node.data[i] {
271 244 return if *item & 0xf0 == node.data[i] & 0xf0 {
272 245 Some(2 * i + 1)
273 246 } else {
274 247 Some(2 * i)
275 248 };
276 249 }
277 250 }
278 251 if self.is_odd && buf[until] & 0xf0 != node.data[until] & 0xf0 {
279 252 Some(until * 2)
280 253 } else {
281 254 None
282 255 }
283 256 }
284 257 }
285 258
286 259 /// A shortcut for full `Node` references
287 260 impl<'a> From<&'a Node> for NodePrefixRef<'a> {
288 261 fn from(node: &'a Node) -> Self {
289 262 NodePrefixRef {
290 263 buf: &node.data,
291 264 is_odd: false,
292 265 }
293 266 }
294 267 }
295 268
296 269 impl PartialEq<Node> for NodePrefixRef<'_> {
297 270 fn eq(&self, other: &Node) -> bool {
298 271 !self.is_odd && self.buf == other.data
299 272 }
300 273 }
301 274
302 275 #[cfg(test)]
303 276 mod tests {
304 277 use super::*;
305 278
306 279 fn sample_node() -> Node {
307 280 let mut data = [0; NODE_BYTES_LENGTH];
308 281 data.copy_from_slice(&[
309 282 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba,
310 283 0x98, 0x76, 0x54, 0x32, 0x10, 0xde, 0xad, 0xbe, 0xef,
311 284 ]);
312 285 data.into()
313 286 }
314 287
315 288 /// Pad an hexadecimal string to reach `NODE_NYBBLES_LENGTH`
316 289 ///check_hash
317 290 /// The padding is made with zeros
318 291 pub fn hex_pad_right(hex: &str) -> String {
319 292 let mut res = hex.to_string();
320 293 while res.len() < NODE_NYBBLES_LENGTH {
321 294 res.push('0');
322 295 }
323 296 res
324 297 }
325 298
326 299 fn sample_node_hex() -> String {
327 300 hex_pad_right("0123456789abcdeffedcba9876543210deadbeef")
328 301 }
329 302
330 303 #[test]
331 304 fn test_node_from_hex() {
332 assert_eq!(Node::from_hex(&sample_node_hex()), Ok(sample_node()));
305 assert_eq!(Node::from_hex(&sample_node_hex()).unwrap(), sample_node());
333 306
334 307 let mut short = hex_pad_right("0123");
335 308 short.pop();
336 309 short.pop();
337 assert_eq!(
338 Node::from_hex(&short),
339 Err(NodeError::ExactLengthRequired(NODE_NYBBLES_LENGTH, short)),
340 );
310 assert!(Node::from_hex(&short).is_err());
341 311
342 312 let not_hex = hex_pad_right("012... oops");
343 assert_eq!(
344 Node::from_hex(&not_hex),
345 Err(NodeError::HexError(
346 FromHexError::InvalidHexCharacter { c: '.', index: 3 },
347 not_hex,
348 )),
349 );
313 assert!(Node::from_hex(&not_hex).is_err(),);
350 314 }
351 315
352 316 #[test]
353 317 fn test_node_encode_hex() {
354 318 assert_eq!(format!("{:x}", sample_node()), sample_node_hex());
355 319 }
356 320
357 321 #[test]
358 fn test_prefix_from_hex() -> Result<(), NodeError> {
322 fn test_prefix_from_hex() -> Result<(), FromHexError> {
359 323 assert_eq!(
360 324 NodePrefix::from_hex("0e1")?,
361 325 NodePrefix {
362 326 buf: vec![14, 16],
363 327 is_odd: true
364 328 }
365 329 );
366 330 assert_eq!(
367 331 NodePrefix::from_hex("0e1a")?,
368 332 NodePrefix {
369 333 buf: vec![14, 26],
370 334 is_odd: false
371 335 }
372 336 );
373 337
374 338 // checking limit case
375 339 let node_as_vec = sample_node().data.iter().cloned().collect();
376 340 assert_eq!(
377 341 NodePrefix::from_hex(sample_node_hex())?,
378 342 NodePrefix {
379 343 buf: node_as_vec,
380 344 is_odd: false
381 345 }
382 346 );
383 347
384 348 Ok(())
385 349 }
386 350
387 351 #[test]
388 352 fn test_prefix_from_hex_errors() {
389 assert_eq!(
390 NodePrefix::from_hex("testgr"),
391 Err(NodeError::HexError(
392 FromHexError::InvalidHexCharacter { c: 't', index: 0 },
393 "testgr".to_string()
394 ))
395 );
353 assert!(NodePrefix::from_hex("testgr").is_err());
396 354 let mut long = format!("{:x}", NULL_NODE);
397 355 long.push('c');
398 match NodePrefix::from_hex(&long)
399 .expect_err("should be refused as too long")
400 {
401 NodeError::PrefixTooLong(s) => assert_eq!(s, long),
402 err => panic!(format!("Should have been TooLong, got {:?}", err)),
403 }
356 assert!(NodePrefix::from_hex(&long).is_err())
404 357 }
405 358
406 359 #[test]
407 fn test_is_prefix_of() -> Result<(), NodeError> {
360 fn test_is_prefix_of() -> Result<(), FromHexError> {
408 361 let mut node_data = [0; NODE_BYTES_LENGTH];
409 362 node_data[0] = 0x12;
410 363 node_data[1] = 0xca;
411 364 let node = Node::from(node_data);
412 365 assert!(NodePrefix::from_hex("12")?.borrow().is_prefix_of(&node));
413 366 assert!(!NodePrefix::from_hex("1a")?.borrow().is_prefix_of(&node));
414 367 assert!(NodePrefix::from_hex("12c")?.borrow().is_prefix_of(&node));
415 368 assert!(!NodePrefix::from_hex("12d")?.borrow().is_prefix_of(&node));
416 369 Ok(())
417 370 }
418 371
419 372 #[test]
420 fn test_get_nybble() -> Result<(), NodeError> {
373 fn test_get_nybble() -> Result<(), FromHexError> {
421 374 let prefix = NodePrefix::from_hex("dead6789cafe")?;
422 375 assert_eq!(prefix.borrow().get_nybble(0), 13);
423 376 assert_eq!(prefix.borrow().get_nybble(7), 9);
424 377 Ok(())
425 378 }
426 379
427 380 #[test]
428 381 fn test_first_different_nybble_even_prefix() {
429 382 let prefix = NodePrefix::from_hex("12ca").unwrap();
430 383 let prefref = prefix.borrow();
431 384 let mut node = Node::from([0; NODE_BYTES_LENGTH]);
432 385 assert_eq!(prefref.first_different_nybble(&node), Some(0));
433 386 node.data[0] = 0x13;
434 387 assert_eq!(prefref.first_different_nybble(&node), Some(1));
435 388 node.data[0] = 0x12;
436 389 assert_eq!(prefref.first_different_nybble(&node), Some(2));
437 390 node.data[1] = 0xca;
438 391 // now it is a prefix
439 392 assert_eq!(prefref.first_different_nybble(&node), None);
440 393 }
441 394
442 395 #[test]
443 396 fn test_first_different_nybble_odd_prefix() {
444 397 let prefix = NodePrefix::from_hex("12c").unwrap();
445 398 let prefref = prefix.borrow();
446 399 let mut node = Node::from([0; NODE_BYTES_LENGTH]);
447 400 assert_eq!(prefref.first_different_nybble(&node), Some(0));
448 401 node.data[0] = 0x13;
449 402 assert_eq!(prefref.first_different_nybble(&node), Some(1));
450 403 node.data[0] = 0x12;
451 404 assert_eq!(prefref.first_different_nybble(&node), Some(2));
452 405 node.data[1] = 0xca;
453 406 // now it is a prefix
454 407 assert_eq!(prefref.first_different_nybble(&node), None);
455 408 }
456 409 }
457 410
458 411 #[cfg(test)]
459 412 pub use tests::hex_pad_right;
@@ -1,1101 +1,1101 b''
1 1 // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net>
2 2 // and Mercurial contributors
3 3 //
4 4 // This software may be used and distributed according to the terms of the
5 5 // GNU General Public License version 2 or any later version.
6 6 //! Indexing facilities for fast retrieval of `Revision` from `Node`
7 7 //!
8 8 //! This provides a variation on the 16-ary radix tree that is
9 9 //! provided as "nodetree" in revlog.c, ready for append-only persistence
10 10 //! on disk.
11 11 //!
12 12 //! Following existing implicit conventions, the "nodemap" terminology
13 13 //! is used in a more abstract context.
14 14
15 15 use super::{
16 node::NULL_NODE, Node, NodeError, NodePrefix, NodePrefixRef, Revision,
16 node::NULL_NODE, FromHexError, Node, NodePrefix, NodePrefixRef, Revision,
17 17 RevlogIndex, NULL_REVISION,
18 18 };
19 19
20 20 use bytes_cast::{unaligned, BytesCast};
21 21 use std::cmp::max;
22 22 use std::fmt;
23 23 use std::mem::{self, align_of, size_of};
24 24 use std::ops::Deref;
25 25 use std::ops::Index;
26 26
27 27 #[derive(Debug, PartialEq)]
28 28 pub enum NodeMapError {
29 29 MultipleResults,
30 InvalidNodePrefix(NodeError),
30 InvalidNodePrefix,
31 31 /// A `Revision` stored in the nodemap could not be found in the index
32 32 RevisionNotInIndex(Revision),
33 33 }
34 34
35 impl From<NodeError> for NodeMapError {
36 fn from(err: NodeError) -> Self {
37 NodeMapError::InvalidNodePrefix(err)
35 impl From<FromHexError> for NodeMapError {
36 fn from(_: FromHexError) -> Self {
37 NodeMapError::InvalidNodePrefix
38 38 }
39 39 }
40 40
41 41 /// Mapping system from Mercurial nodes to revision numbers.
42 42 ///
43 43 /// ## `RevlogIndex` and `NodeMap`
44 44 ///
45 45 /// One way to think about their relationship is that
46 46 /// the `NodeMap` is a prefix-oriented reverse index of the `Node` information
47 47 /// carried by a [`RevlogIndex`].
48 48 ///
49 49 /// Many of the methods in this trait take a `RevlogIndex` argument
50 50 /// which is used for validation of their results. This index must naturally
51 51 /// be the one the `NodeMap` is about, and it must be consistent.
52 52 ///
53 53 /// Notably, the `NodeMap` must not store
54 54 /// information about more `Revision` values than there are in the index.
55 55 /// In these methods, an encountered `Revision` is not in the index, a
56 56 /// [`RevisionNotInIndex`] error is returned.
57 57 ///
58 58 /// In insert operations, the rule is thus that the `NodeMap` must always
59 59 /// be updated after the `RevlogIndex`
60 60 /// be updated first, and the `NodeMap` second.
61 61 ///
62 62 /// [`RevisionNotInIndex`]: enum.NodeMapError.html#variant.RevisionNotInIndex
63 63 /// [`RevlogIndex`]: ../trait.RevlogIndex.html
64 64 pub trait NodeMap {
65 65 /// Find the unique `Revision` having the given `Node`
66 66 ///
67 67 /// If no Revision matches the given `Node`, `Ok(None)` is returned.
68 68 fn find_node(
69 69 &self,
70 70 index: &impl RevlogIndex,
71 71 node: &Node,
72 72 ) -> Result<Option<Revision>, NodeMapError> {
73 73 self.find_bin(index, node.into())
74 74 }
75 75
76 76 /// Find the unique Revision whose `Node` starts with a given binary prefix
77 77 ///
78 78 /// If no Revision matches the given prefix, `Ok(None)` is returned.
79 79 ///
80 80 /// If several Revisions match the given prefix, a [`MultipleResults`]
81 81 /// error is returned.
82 82 fn find_bin<'a>(
83 83 &self,
84 84 idx: &impl RevlogIndex,
85 85 prefix: NodePrefixRef<'a>,
86 86 ) -> Result<Option<Revision>, NodeMapError>;
87 87
88 88 /// Find the unique Revision whose `Node` hexadecimal string representation
89 89 /// starts with a given prefix
90 90 ///
91 91 /// If no Revision matches the given prefix, `Ok(None)` is returned.
92 92 ///
93 93 /// If several Revisions match the given prefix, a [`MultipleResults`]
94 94 /// error is returned.
95 95 fn find_hex(
96 96 &self,
97 97 idx: &impl RevlogIndex,
98 98 prefix: &str,
99 99 ) -> Result<Option<Revision>, NodeMapError> {
100 100 self.find_bin(idx, NodePrefix::from_hex(prefix)?.borrow())
101 101 }
102 102
103 103 /// Give the size of the shortest node prefix that determines
104 104 /// the revision uniquely.
105 105 ///
106 106 /// From a binary node prefix, if it is matched in the node map, this
107 107 /// returns the number of hexadecimal digits that would had sufficed
108 108 /// to find the revision uniquely.
109 109 ///
110 110 /// Returns `None` if no `Revision` could be found for the prefix.
111 111 ///
112 112 /// If several Revisions match the given prefix, a [`MultipleResults`]
113 113 /// error is returned.
114 114 fn unique_prefix_len_bin<'a>(
115 115 &self,
116 116 idx: &impl RevlogIndex,
117 117 node_prefix: NodePrefixRef<'a>,
118 118 ) -> Result<Option<usize>, NodeMapError>;
119 119
120 120 /// Same as `unique_prefix_len_bin`, with the hexadecimal representation
121 121 /// of the prefix as input.
122 122 fn unique_prefix_len_hex(
123 123 &self,
124 124 idx: &impl RevlogIndex,
125 125 prefix: &str,
126 126 ) -> Result<Option<usize>, NodeMapError> {
127 127 self.unique_prefix_len_bin(idx, NodePrefix::from_hex(prefix)?.borrow())
128 128 }
129 129
130 130 /// Same as `unique_prefix_len_bin`, with a full `Node` as input
131 131 fn unique_prefix_len_node(
132 132 &self,
133 133 idx: &impl RevlogIndex,
134 134 node: &Node,
135 135 ) -> Result<Option<usize>, NodeMapError> {
136 136 self.unique_prefix_len_bin(idx, node.into())
137 137 }
138 138 }
139 139
140 140 pub trait MutableNodeMap: NodeMap {
141 141 fn insert<I: RevlogIndex>(
142 142 &mut self,
143 143 index: &I,
144 144 node: &Node,
145 145 rev: Revision,
146 146 ) -> Result<(), NodeMapError>;
147 147 }
148 148
149 149 /// Low level NodeTree [`Blocks`] elements
150 150 ///
151 151 /// These are exactly as for instance on persistent storage.
152 152 type RawElement = unaligned::I32Be;
153 153
154 154 /// High level representation of values in NodeTree
155 155 /// [`Blocks`](struct.Block.html)
156 156 ///
157 157 /// This is the high level representation that most algorithms should
158 158 /// use.
159 159 #[derive(Clone, Debug, Eq, PartialEq)]
160 160 enum Element {
161 161 Rev(Revision),
162 162 Block(usize),
163 163 None,
164 164 }
165 165
166 166 impl From<RawElement> for Element {
167 167 /// Conversion from low level representation, after endianness conversion.
168 168 ///
169 169 /// See [`Block`](struct.Block.html) for explanation about the encoding.
170 170 fn from(raw: RawElement) -> Element {
171 171 let int = raw.get();
172 172 if int >= 0 {
173 173 Element::Block(int as usize)
174 174 } else if int == -1 {
175 175 Element::None
176 176 } else {
177 177 Element::Rev(-int - 2)
178 178 }
179 179 }
180 180 }
181 181
182 182 impl From<Element> for RawElement {
183 183 fn from(element: Element) -> RawElement {
184 184 RawElement::from(match element {
185 185 Element::None => 0,
186 186 Element::Block(i) => i as i32,
187 187 Element::Rev(rev) => -rev - 2,
188 188 })
189 189 }
190 190 }
191 191
192 192 /// A logical block of the `NodeTree`, packed with a fixed size.
193 193 ///
194 194 /// These are always used in container types implementing `Index<Block>`,
195 195 /// such as `&Block`
196 196 ///
197 197 /// As an array of integers, its ith element encodes that the
198 198 /// ith potential edge from the block, representing the ith hexadecimal digit
199 199 /// (nybble) `i` is either:
200 200 ///
201 201 /// - absent (value -1)
202 202 /// - another `Block` in the same indexable container (value ≥ 0)
203 203 /// - a `Revision` leaf (value ≤ -2)
204 204 ///
205 205 /// Endianness has to be fixed for consistency on shared storage across
206 206 /// different architectures.
207 207 ///
208 208 /// A key difference with the C `nodetree` is that we need to be
209 209 /// able to represent the [`Block`] at index 0, hence -1 is the empty marker
210 210 /// rather than 0 and the `Revision` range upper limit of -2 instead of -1.
211 211 ///
212 212 /// Another related difference is that `NULL_REVISION` (-1) is not
213 213 /// represented at all, because we want an immutable empty nodetree
214 214 /// to be valid.
215 215
216 216 const ELEMENTS_PER_BLOCK: usize = 16; // number of different values in a nybble
217 217
218 218 #[derive(Copy, Clone, BytesCast, PartialEq)]
219 219 #[repr(transparent)]
220 220 pub struct Block([RawElement; ELEMENTS_PER_BLOCK]);
221 221
222 222 impl Block {
223 223 fn new() -> Self {
224 224 let absent_node = RawElement::from(-1);
225 225 Block([absent_node; ELEMENTS_PER_BLOCK])
226 226 }
227 227
228 228 fn get(&self, nybble: u8) -> Element {
229 229 self.0[nybble as usize].into()
230 230 }
231 231
232 232 fn set(&mut self, nybble: u8, element: Element) {
233 233 self.0[nybble as usize] = element.into()
234 234 }
235 235 }
236 236
237 237 impl fmt::Debug for Block {
238 238 /// sparse representation for testing and debugging purposes
239 239 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
240 240 f.debug_map()
241 241 .entries((0..16).filter_map(|i| match self.get(i) {
242 242 Element::None => None,
243 243 element => Some((i, element)),
244 244 }))
245 245 .finish()
246 246 }
247 247 }
248 248
249 249 /// A mutable 16-radix tree with the root block logically at the end
250 250 ///
251 251 /// Because of the append only nature of our node trees, we need to
252 252 /// keep the original untouched and store new blocks separately.
253 253 ///
254 254 /// The mutable root `Block` is kept apart so that we don't have to rebump
255 255 /// it on each insertion.
256 256 pub struct NodeTree {
257 257 readonly: Box<dyn Deref<Target = [Block]> + Send>,
258 258 growable: Vec<Block>,
259 259 root: Block,
260 260 masked_inner_blocks: usize,
261 261 }
262 262
263 263 impl Index<usize> for NodeTree {
264 264 type Output = Block;
265 265
266 266 fn index(&self, i: usize) -> &Block {
267 267 let ro_len = self.readonly.len();
268 268 if i < ro_len {
269 269 &self.readonly[i]
270 270 } else if i == ro_len + self.growable.len() {
271 271 &self.root
272 272 } else {
273 273 &self.growable[i - ro_len]
274 274 }
275 275 }
276 276 }
277 277
278 278 /// Return `None` unless the `Node` for `rev` has given prefix in `index`.
279 279 fn has_prefix_or_none(
280 280 idx: &impl RevlogIndex,
281 281 prefix: NodePrefixRef,
282 282 rev: Revision,
283 283 ) -> Result<Option<Revision>, NodeMapError> {
284 284 idx.node(rev)
285 285 .ok_or_else(|| NodeMapError::RevisionNotInIndex(rev))
286 286 .map(|node| {
287 287 if prefix.is_prefix_of(node) {
288 288 Some(rev)
289 289 } else {
290 290 None
291 291 }
292 292 })
293 293 }
294 294
295 295 /// validate that the candidate's node starts indeed with given prefix,
296 296 /// and treat ambiguities related to `NULL_REVISION`.
297 297 ///
298 298 /// From the data in the NodeTree, one can only conclude that some
299 299 /// revision is the only one for a *subprefix* of the one being looked up.
300 300 fn validate_candidate(
301 301 idx: &impl RevlogIndex,
302 302 prefix: NodePrefixRef,
303 303 candidate: (Option<Revision>, usize),
304 304 ) -> Result<(Option<Revision>, usize), NodeMapError> {
305 305 let (rev, steps) = candidate;
306 306 if let Some(nz_nybble) = prefix.first_different_nybble(&NULL_NODE) {
307 307 rev.map_or(Ok((None, steps)), |r| {
308 308 has_prefix_or_none(idx, prefix, r)
309 309 .map(|opt| (opt, max(steps, nz_nybble + 1)))
310 310 })
311 311 } else {
312 312 // the prefix is only made of zeros; NULL_REVISION always matches it
313 313 // and any other *valid* result is an ambiguity
314 314 match rev {
315 315 None => Ok((Some(NULL_REVISION), steps + 1)),
316 316 Some(r) => match has_prefix_or_none(idx, prefix, r)? {
317 317 None => Ok((Some(NULL_REVISION), steps + 1)),
318 318 _ => Err(NodeMapError::MultipleResults),
319 319 },
320 320 }
321 321 }
322 322 }
323 323
324 324 impl NodeTree {
325 325 /// Initiate a NodeTree from an immutable slice-like of `Block`
326 326 ///
327 327 /// We keep `readonly` and clone its root block if it isn't empty.
328 328 fn new(readonly: Box<dyn Deref<Target = [Block]> + Send>) -> Self {
329 329 let root = readonly.last().cloned().unwrap_or_else(Block::new);
330 330 NodeTree {
331 331 readonly,
332 332 growable: Vec::new(),
333 333 root,
334 334 masked_inner_blocks: 0,
335 335 }
336 336 }
337 337
338 338 /// Create from an opaque bunch of bytes
339 339 ///
340 340 /// The created `NodeTreeBytes` from `buffer`,
341 341 /// of which exactly `amount` bytes are used.
342 342 ///
343 343 /// - `buffer` could be derived from `PyBuffer` and `Mmap` objects.
344 344 /// - `offset` allows for the final file format to include fixed data
345 345 /// (generation number, behavioural flags)
346 346 /// - `amount` is expressed in bytes, and is not automatically derived from
347 347 /// `bytes`, so that a caller that manages them atomically can perform
348 348 /// temporary disk serializations and still rollback easily if needed.
349 349 /// First use-case for this would be to support Mercurial shell hooks.
350 350 ///
351 351 /// panics if `buffer` is smaller than `amount`
352 352 pub fn load_bytes(
353 353 bytes: Box<dyn Deref<Target = [u8]> + Send>,
354 354 amount: usize,
355 355 ) -> Self {
356 356 NodeTree::new(Box::new(NodeTreeBytes::new(bytes, amount)))
357 357 }
358 358
359 359 /// Retrieve added `Block` and the original immutable data
360 360 pub fn into_readonly_and_added(
361 361 self,
362 362 ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<Block>) {
363 363 let mut vec = self.growable;
364 364 let readonly = self.readonly;
365 365 if readonly.last() != Some(&self.root) {
366 366 vec.push(self.root);
367 367 }
368 368 (readonly, vec)
369 369 }
370 370
371 371 /// Retrieve added `Blocks` as bytes, ready to be written to persistent
372 372 /// storage
373 373 pub fn into_readonly_and_added_bytes(
374 374 self,
375 375 ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<u8>) {
376 376 let (readonly, vec) = self.into_readonly_and_added();
377 377 // Prevent running `v`'s destructor so we are in complete control
378 378 // of the allocation.
379 379 let vec = mem::ManuallyDrop::new(vec);
380 380
381 381 // Transmute the `Vec<Block>` to a `Vec<u8>`. Blocks are contiguous
382 382 // bytes, so this is perfectly safe.
383 383 let bytes = unsafe {
384 384 // Check for compatible allocation layout.
385 385 // (Optimized away by constant-folding + dead code elimination.)
386 386 assert_eq!(size_of::<Block>(), 64);
387 387 assert_eq!(align_of::<Block>(), 1);
388 388
389 389 // /!\ Any use of `vec` after this is use-after-free.
390 390 // TODO: use `into_raw_parts` once stabilized
391 391 Vec::from_raw_parts(
392 392 vec.as_ptr() as *mut u8,
393 393 vec.len() * size_of::<Block>(),
394 394 vec.capacity() * size_of::<Block>(),
395 395 )
396 396 };
397 397 (readonly, bytes)
398 398 }
399 399
400 400 /// Total number of blocks
401 401 fn len(&self) -> usize {
402 402 self.readonly.len() + self.growable.len() + 1
403 403 }
404 404
405 405 /// Implemented for completeness
406 406 ///
407 407 /// A `NodeTree` always has at least the mutable root block.
408 408 #[allow(dead_code)]
409 409 fn is_empty(&self) -> bool {
410 410 false
411 411 }
412 412
413 413 /// Main working method for `NodeTree` searches
414 414 ///
415 415 /// The first returned value is the result of analysing `NodeTree` data
416 416 /// *alone*: whereas `None` guarantees that the given prefix is absent
417 417 /// from the `NodeTree` data (but still could match `NULL_NODE`), with
418 418 /// `Some(rev)`, it is to be understood that `rev` is the unique `Revision`
419 419 /// that could match the prefix. Actually, all that can be inferred from
420 420 /// the `NodeTree` data is that `rev` is the revision with the longest
421 421 /// common node prefix with the given prefix.
422 422 ///
423 423 /// The second returned value is the size of the smallest subprefix
424 424 /// of `prefix` that would give the same result, i.e. not the
425 425 /// `MultipleResults` error variant (again, using only the data of the
426 426 /// `NodeTree`).
427 427 fn lookup(
428 428 &self,
429 429 prefix: NodePrefixRef,
430 430 ) -> Result<(Option<Revision>, usize), NodeMapError> {
431 431 for (i, visit_item) in self.visit(prefix).enumerate() {
432 432 if let Some(opt) = visit_item.final_revision() {
433 433 return Ok((opt, i + 1));
434 434 }
435 435 }
436 436 Err(NodeMapError::MultipleResults)
437 437 }
438 438
439 439 fn visit<'n, 'p>(
440 440 &'n self,
441 441 prefix: NodePrefixRef<'p>,
442 442 ) -> NodeTreeVisitor<'n, 'p> {
443 443 NodeTreeVisitor {
444 444 nt: self,
445 445 prefix,
446 446 visit: self.len() - 1,
447 447 nybble_idx: 0,
448 448 done: false,
449 449 }
450 450 }
451 451 /// Return a mutable reference for `Block` at index `idx`.
452 452 ///
453 453 /// If `idx` lies in the immutable area, then the reference is to
454 454 /// a newly appended copy.
455 455 ///
456 456 /// Returns (new_idx, glen, mut_ref) where
457 457 ///
458 458 /// - `new_idx` is the index of the mutable `Block`
459 459 /// - `mut_ref` is a mutable reference to the mutable Block.
460 460 /// - `glen` is the new length of `self.growable`
461 461 ///
462 462 /// Note: the caller wouldn't be allowed to query `self.growable.len()`
463 463 /// itself because of the mutable borrow taken with the returned `Block`
464 464 fn mutable_block(&mut self, idx: usize) -> (usize, &mut Block, usize) {
465 465 let ro_blocks = &self.readonly;
466 466 let ro_len = ro_blocks.len();
467 467 let glen = self.growable.len();
468 468 if idx < ro_len {
469 469 self.masked_inner_blocks += 1;
470 470 self.growable.push(ro_blocks[idx]);
471 471 (glen + ro_len, &mut self.growable[glen], glen + 1)
472 472 } else if glen + ro_len == idx {
473 473 (idx, &mut self.root, glen)
474 474 } else {
475 475 (idx, &mut self.growable[idx - ro_len], glen)
476 476 }
477 477 }
478 478
479 479 /// Main insertion method
480 480 ///
481 481 /// This will dive in the node tree to find the deepest `Block` for
482 482 /// `node`, split it as much as needed and record `node` in there.
483 483 /// The method then backtracks, updating references in all the visited
484 484 /// blocks from the root.
485 485 ///
486 486 /// All the mutated `Block` are copied first to the growable part if
487 487 /// needed. That happens for those in the immutable part except the root.
488 488 pub fn insert<I: RevlogIndex>(
489 489 &mut self,
490 490 index: &I,
491 491 node: &Node,
492 492 rev: Revision,
493 493 ) -> Result<(), NodeMapError> {
494 494 let ro_len = &self.readonly.len();
495 495
496 496 let mut visit_steps: Vec<_> = self.visit(node.into()).collect();
497 497 let read_nybbles = visit_steps.len();
498 498 // visit_steps cannot be empty, since we always visit the root block
499 499 let deepest = visit_steps.pop().unwrap();
500 500
501 501 let (mut block_idx, mut block, mut glen) =
502 502 self.mutable_block(deepest.block_idx);
503 503
504 504 if let Element::Rev(old_rev) = deepest.element {
505 505 let old_node = index
506 506 .node(old_rev)
507 507 .ok_or_else(|| NodeMapError::RevisionNotInIndex(old_rev))?;
508 508 if old_node == node {
509 509 return Ok(()); // avoid creating lots of useless blocks
510 510 }
511 511
512 512 // Looping over the tail of nybbles in both nodes, creating
513 513 // new blocks until we find the difference
514 514 let mut new_block_idx = ro_len + glen;
515 515 let mut nybble = deepest.nybble;
516 516 for nybble_pos in read_nybbles..node.nybbles_len() {
517 517 block.set(nybble, Element::Block(new_block_idx));
518 518
519 519 let new_nybble = node.get_nybble(nybble_pos);
520 520 let old_nybble = old_node.get_nybble(nybble_pos);
521 521
522 522 if old_nybble == new_nybble {
523 523 self.growable.push(Block::new());
524 524 block = &mut self.growable[glen];
525 525 glen += 1;
526 526 new_block_idx += 1;
527 527 nybble = new_nybble;
528 528 } else {
529 529 let mut new_block = Block::new();
530 530 new_block.set(old_nybble, Element::Rev(old_rev));
531 531 new_block.set(new_nybble, Element::Rev(rev));
532 532 self.growable.push(new_block);
533 533 break;
534 534 }
535 535 }
536 536 } else {
537 537 // Free slot in the deepest block: no splitting has to be done
538 538 block.set(deepest.nybble, Element::Rev(rev));
539 539 }
540 540
541 541 // Backtrack over visit steps to update references
542 542 while let Some(visited) = visit_steps.pop() {
543 543 let to_write = Element::Block(block_idx);
544 544 if visit_steps.is_empty() {
545 545 self.root.set(visited.nybble, to_write);
546 546 break;
547 547 }
548 548 let (new_idx, block, _) = self.mutable_block(visited.block_idx);
549 549 if block.get(visited.nybble) == to_write {
550 550 break;
551 551 }
552 552 block.set(visited.nybble, to_write);
553 553 block_idx = new_idx;
554 554 }
555 555 Ok(())
556 556 }
557 557
558 558 /// Make the whole `NodeTree` logically empty, without touching the
559 559 /// immutable part.
560 560 pub fn invalidate_all(&mut self) {
561 561 self.root = Block::new();
562 562 self.growable = Vec::new();
563 563 self.masked_inner_blocks = self.readonly.len();
564 564 }
565 565
566 566 /// Return the number of blocks in the readonly part that are currently
567 567 /// masked in the mutable part.
568 568 ///
569 569 /// The `NodeTree` structure has no efficient way to know how many blocks
570 570 /// are already unreachable in the readonly part.
571 571 ///
572 572 /// After a call to `invalidate_all()`, the returned number can be actually
573 573 /// bigger than the whole readonly part, a conventional way to mean that
574 574 /// all the readonly blocks have been masked. This is what is really
575 575 /// useful to the caller and does not require to know how many were
576 576 /// actually unreachable to begin with.
577 577 pub fn masked_readonly_blocks(&self) -> usize {
578 578 if let Some(readonly_root) = self.readonly.last() {
579 579 if readonly_root == &self.root {
580 580 return 0;
581 581 }
582 582 } else {
583 583 return 0;
584 584 }
585 585 self.masked_inner_blocks + 1
586 586 }
587 587 }
588 588
589 589 pub struct NodeTreeBytes {
590 590 buffer: Box<dyn Deref<Target = [u8]> + Send>,
591 591 len_in_blocks: usize,
592 592 }
593 593
594 594 impl NodeTreeBytes {
595 595 fn new(
596 596 buffer: Box<dyn Deref<Target = [u8]> + Send>,
597 597 amount: usize,
598 598 ) -> Self {
599 599 assert!(buffer.len() >= amount);
600 600 let len_in_blocks = amount / size_of::<Block>();
601 601 NodeTreeBytes {
602 602 buffer,
603 603 len_in_blocks,
604 604 }
605 605 }
606 606 }
607 607
608 608 impl Deref for NodeTreeBytes {
609 609 type Target = [Block];
610 610
611 611 fn deref(&self) -> &[Block] {
612 612 Block::slice_from_bytes(&self.buffer, self.len_in_blocks)
613 613 // `NodeTreeBytes::new` already asserted that `self.buffer` is
614 614 // large enough.
615 615 .unwrap()
616 616 .0
617 617 }
618 618 }
619 619
620 620 struct NodeTreeVisitor<'n, 'p> {
621 621 nt: &'n NodeTree,
622 622 prefix: NodePrefixRef<'p>,
623 623 visit: usize,
624 624 nybble_idx: usize,
625 625 done: bool,
626 626 }
627 627
628 628 #[derive(Debug, PartialEq, Clone)]
629 629 struct NodeTreeVisitItem {
630 630 block_idx: usize,
631 631 nybble: u8,
632 632 element: Element,
633 633 }
634 634
635 635 impl<'n, 'p> Iterator for NodeTreeVisitor<'n, 'p> {
636 636 type Item = NodeTreeVisitItem;
637 637
638 638 fn next(&mut self) -> Option<Self::Item> {
639 639 if self.done || self.nybble_idx >= self.prefix.len() {
640 640 return None;
641 641 }
642 642
643 643 let nybble = self.prefix.get_nybble(self.nybble_idx);
644 644 self.nybble_idx += 1;
645 645
646 646 let visit = self.visit;
647 647 let element = self.nt[visit].get(nybble);
648 648 if let Element::Block(idx) = element {
649 649 self.visit = idx;
650 650 } else {
651 651 self.done = true;
652 652 }
653 653
654 654 Some(NodeTreeVisitItem {
655 655 block_idx: visit,
656 656 nybble,
657 657 element,
658 658 })
659 659 }
660 660 }
661 661
662 662 impl NodeTreeVisitItem {
663 663 // Return `Some(opt)` if this item is final, with `opt` being the
664 664 // `Revision` that it may represent.
665 665 //
666 666 // If the item is not terminal, return `None`
667 667 fn final_revision(&self) -> Option<Option<Revision>> {
668 668 match self.element {
669 669 Element::Block(_) => None,
670 670 Element::Rev(r) => Some(Some(r)),
671 671 Element::None => Some(None),
672 672 }
673 673 }
674 674 }
675 675
676 676 impl From<Vec<Block>> for NodeTree {
677 677 fn from(vec: Vec<Block>) -> Self {
678 678 Self::new(Box::new(vec))
679 679 }
680 680 }
681 681
682 682 impl fmt::Debug for NodeTree {
683 683 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
684 684 let readonly: &[Block] = &*self.readonly;
685 685 write!(
686 686 f,
687 687 "readonly: {:?}, growable: {:?}, root: {:?}",
688 688 readonly, self.growable, self.root
689 689 )
690 690 }
691 691 }
692 692
693 693 impl Default for NodeTree {
694 694 /// Create a fully mutable empty NodeTree
695 695 fn default() -> Self {
696 696 NodeTree::new(Box::new(Vec::new()))
697 697 }
698 698 }
699 699
700 700 impl NodeMap for NodeTree {
701 701 fn find_bin<'a>(
702 702 &self,
703 703 idx: &impl RevlogIndex,
704 704 prefix: NodePrefixRef<'a>,
705 705 ) -> Result<Option<Revision>, NodeMapError> {
706 706 validate_candidate(idx, prefix.clone(), self.lookup(prefix)?)
707 707 .map(|(opt, _shortest)| opt)
708 708 }
709 709
710 710 fn unique_prefix_len_bin<'a>(
711 711 &self,
712 712 idx: &impl RevlogIndex,
713 713 prefix: NodePrefixRef<'a>,
714 714 ) -> Result<Option<usize>, NodeMapError> {
715 715 validate_candidate(idx, prefix.clone(), self.lookup(prefix)?)
716 716 .map(|(opt, shortest)| opt.map(|_rev| shortest))
717 717 }
718 718 }
719 719
720 720 #[cfg(test)]
721 721 mod tests {
722 722 use super::NodeMapError::*;
723 723 use super::*;
724 724 use crate::revlog::node::{hex_pad_right, Node};
725 725 use std::collections::HashMap;
726 726
727 727 /// Creates a `Block` using a syntax close to the `Debug` output
728 728 macro_rules! block {
729 729 {$($nybble:tt : $variant:ident($val:tt)),*} => (
730 730 {
731 731 let mut block = Block::new();
732 732 $(block.set($nybble, Element::$variant($val)));*;
733 733 block
734 734 }
735 735 )
736 736 }
737 737
738 738 #[test]
739 739 fn test_block_debug() {
740 740 let mut block = Block::new();
741 741 block.set(1, Element::Rev(3));
742 742 block.set(10, Element::Block(0));
743 743 assert_eq!(format!("{:?}", block), "{1: Rev(3), 10: Block(0)}");
744 744 }
745 745
746 746 #[test]
747 747 fn test_block_macro() {
748 748 let block = block! {5: Block(2)};
749 749 assert_eq!(format!("{:?}", block), "{5: Block(2)}");
750 750
751 751 let block = block! {13: Rev(15), 5: Block(2)};
752 752 assert_eq!(format!("{:?}", block), "{5: Block(2), 13: Rev(15)}");
753 753 }
754 754
755 755 #[test]
756 756 fn test_raw_block() {
757 757 let mut raw = [255u8; 64];
758 758
759 759 let mut counter = 0;
760 760 for val in [0_i32, 15, -2, -1, -3].iter() {
761 761 for byte in val.to_be_bytes().iter() {
762 762 raw[counter] = *byte;
763 763 counter += 1;
764 764 }
765 765 }
766 766 let (block, _) = Block::from_bytes(&raw).unwrap();
767 767 assert_eq!(block.get(0), Element::Block(0));
768 768 assert_eq!(block.get(1), Element::Block(15));
769 769 assert_eq!(block.get(3), Element::None);
770 770 assert_eq!(block.get(2), Element::Rev(0));
771 771 assert_eq!(block.get(4), Element::Rev(1));
772 772 }
773 773
774 774 type TestIndex = HashMap<Revision, Node>;
775 775
776 776 impl RevlogIndex for TestIndex {
777 777 fn node(&self, rev: Revision) -> Option<&Node> {
778 778 self.get(&rev)
779 779 }
780 780
781 781 fn len(&self) -> usize {
782 782 self.len()
783 783 }
784 784 }
785 785
786 786 /// Pad hexadecimal Node prefix with zeros on the right
787 787 ///
788 788 /// This avoids having to repeatedly write very long hexadecimal
789 789 /// strings for test data, and brings actual hash size independency.
790 790 #[cfg(test)]
791 791 fn pad_node(hex: &str) -> Node {
792 792 Node::from_hex(&hex_pad_right(hex)).unwrap()
793 793 }
794 794
795 795 /// Pad hexadecimal Node prefix with zeros on the right, then insert
796 796 fn pad_insert(idx: &mut TestIndex, rev: Revision, hex: &str) {
797 797 idx.insert(rev, pad_node(hex));
798 798 }
799 799
800 800 fn sample_nodetree() -> NodeTree {
801 801 NodeTree::from(vec![
802 802 block![0: Rev(9)],
803 803 block![0: Rev(0), 1: Rev(9)],
804 804 block![0: Block(1), 1:Rev(1)],
805 805 ])
806 806 }
807 807
808 808 #[test]
809 809 fn test_nt_debug() {
810 810 let nt = sample_nodetree();
811 811 assert_eq!(
812 812 format!("{:?}", nt),
813 813 "readonly: \
814 814 [{0: Rev(9)}, {0: Rev(0), 1: Rev(9)}, {0: Block(1), 1: Rev(1)}], \
815 815 growable: [], \
816 816 root: {0: Block(1), 1: Rev(1)}",
817 817 );
818 818 }
819 819
820 820 #[test]
821 821 fn test_immutable_find_simplest() -> Result<(), NodeMapError> {
822 822 let mut idx: TestIndex = HashMap::new();
823 823 pad_insert(&mut idx, 1, "1234deadcafe");
824 824
825 825 let nt = NodeTree::from(vec![block! {1: Rev(1)}]);
826 826 assert_eq!(nt.find_hex(&idx, "1")?, Some(1));
827 827 assert_eq!(nt.find_hex(&idx, "12")?, Some(1));
828 828 assert_eq!(nt.find_hex(&idx, "1234de")?, Some(1));
829 829 assert_eq!(nt.find_hex(&idx, "1a")?, None);
830 830 assert_eq!(nt.find_hex(&idx, "ab")?, None);
831 831
832 832 // and with full binary Nodes
833 833 assert_eq!(nt.find_node(&idx, idx.get(&1).unwrap())?, Some(1));
834 834 let unknown = Node::from_hex(&hex_pad_right("3d")).unwrap();
835 835 assert_eq!(nt.find_node(&idx, &unknown)?, None);
836 836 Ok(())
837 837 }
838 838
839 839 #[test]
840 840 fn test_immutable_find_one_jump() {
841 841 let mut idx = TestIndex::new();
842 842 pad_insert(&mut idx, 9, "012");
843 843 pad_insert(&mut idx, 0, "00a");
844 844
845 845 let nt = sample_nodetree();
846 846
847 847 assert_eq!(nt.find_hex(&idx, "0"), Err(MultipleResults));
848 848 assert_eq!(nt.find_hex(&idx, "01"), Ok(Some(9)));
849 849 assert_eq!(nt.find_hex(&idx, "00"), Err(MultipleResults));
850 850 assert_eq!(nt.find_hex(&idx, "00a"), Ok(Some(0)));
851 851 assert_eq!(nt.unique_prefix_len_hex(&idx, "00a"), Ok(Some(3)));
852 852 assert_eq!(nt.find_hex(&idx, "000"), Ok(Some(NULL_REVISION)));
853 853 }
854 854
855 855 #[test]
856 856 fn test_mutated_find() -> Result<(), NodeMapError> {
857 857 let mut idx = TestIndex::new();
858 858 pad_insert(&mut idx, 9, "012");
859 859 pad_insert(&mut idx, 0, "00a");
860 860 pad_insert(&mut idx, 2, "cafe");
861 861 pad_insert(&mut idx, 3, "15");
862 862 pad_insert(&mut idx, 1, "10");
863 863
864 864 let nt = NodeTree {
865 865 readonly: sample_nodetree().readonly,
866 866 growable: vec![block![0: Rev(1), 5: Rev(3)]],
867 867 root: block![0: Block(1), 1:Block(3), 12: Rev(2)],
868 868 masked_inner_blocks: 1,
869 869 };
870 870 assert_eq!(nt.find_hex(&idx, "10")?, Some(1));
871 871 assert_eq!(nt.find_hex(&idx, "c")?, Some(2));
872 872 assert_eq!(nt.unique_prefix_len_hex(&idx, "c")?, Some(1));
873 873 assert_eq!(nt.find_hex(&idx, "00"), Err(MultipleResults));
874 874 assert_eq!(nt.find_hex(&idx, "000")?, Some(NULL_REVISION));
875 875 assert_eq!(nt.unique_prefix_len_hex(&idx, "000")?, Some(3));
876 876 assert_eq!(nt.find_hex(&idx, "01")?, Some(9));
877 877 assert_eq!(nt.masked_readonly_blocks(), 2);
878 878 Ok(())
879 879 }
880 880
881 881 struct TestNtIndex {
882 882 index: TestIndex,
883 883 nt: NodeTree,
884 884 }
885 885
886 886 impl TestNtIndex {
887 887 fn new() -> Self {
888 888 TestNtIndex {
889 889 index: HashMap::new(),
890 890 nt: NodeTree::default(),
891 891 }
892 892 }
893 893
894 894 fn insert(
895 895 &mut self,
896 896 rev: Revision,
897 897 hex: &str,
898 898 ) -> Result<(), NodeMapError> {
899 899 let node = pad_node(hex);
900 900 self.index.insert(rev, node.clone());
901 901 self.nt.insert(&self.index, &node, rev)?;
902 902 Ok(())
903 903 }
904 904
905 905 fn find_hex(
906 906 &self,
907 907 prefix: &str,
908 908 ) -> Result<Option<Revision>, NodeMapError> {
909 909 self.nt.find_hex(&self.index, prefix)
910 910 }
911 911
912 912 fn unique_prefix_len_hex(
913 913 &self,
914 914 prefix: &str,
915 915 ) -> Result<Option<usize>, NodeMapError> {
916 916 self.nt.unique_prefix_len_hex(&self.index, prefix)
917 917 }
918 918
919 919 /// Drain `added` and restart a new one
920 920 fn commit(self) -> Self {
921 921 let mut as_vec: Vec<Block> =
922 922 self.nt.readonly.iter().map(|block| block.clone()).collect();
923 923 as_vec.extend(self.nt.growable);
924 924 as_vec.push(self.nt.root);
925 925
926 926 Self {
927 927 index: self.index,
928 928 nt: NodeTree::from(as_vec).into(),
929 929 }
930 930 }
931 931 }
932 932
933 933 #[test]
934 934 fn test_insert_full_mutable() -> Result<(), NodeMapError> {
935 935 let mut idx = TestNtIndex::new();
936 936 idx.insert(0, "1234")?;
937 937 assert_eq!(idx.find_hex("1")?, Some(0));
938 938 assert_eq!(idx.find_hex("12")?, Some(0));
939 939
940 940 // let's trigger a simple split
941 941 idx.insert(1, "1a34")?;
942 942 assert_eq!(idx.nt.growable.len(), 1);
943 943 assert_eq!(idx.find_hex("12")?, Some(0));
944 944 assert_eq!(idx.find_hex("1a")?, Some(1));
945 945
946 946 // reinserting is a no_op
947 947 idx.insert(1, "1a34")?;
948 948 assert_eq!(idx.nt.growable.len(), 1);
949 949 assert_eq!(idx.find_hex("12")?, Some(0));
950 950 assert_eq!(idx.find_hex("1a")?, Some(1));
951 951
952 952 idx.insert(2, "1a01")?;
953 953 assert_eq!(idx.nt.growable.len(), 2);
954 954 assert_eq!(idx.find_hex("1a"), Err(NodeMapError::MultipleResults));
955 955 assert_eq!(idx.find_hex("12")?, Some(0));
956 956 assert_eq!(idx.find_hex("1a3")?, Some(1));
957 957 assert_eq!(idx.find_hex("1a0")?, Some(2));
958 958 assert_eq!(idx.find_hex("1a12")?, None);
959 959
960 960 // now let's make it split and create more than one additional block
961 961 idx.insert(3, "1a345")?;
962 962 assert_eq!(idx.nt.growable.len(), 4);
963 963 assert_eq!(idx.find_hex("1a340")?, Some(1));
964 964 assert_eq!(idx.find_hex("1a345")?, Some(3));
965 965 assert_eq!(idx.find_hex("1a341")?, None);
966 966
967 967 // there's no readonly block to mask
968 968 assert_eq!(idx.nt.masked_readonly_blocks(), 0);
969 969 Ok(())
970 970 }
971 971
972 972 #[test]
973 973 fn test_unique_prefix_len_zero_prefix() {
974 974 let mut idx = TestNtIndex::new();
975 975 idx.insert(0, "00000abcd").unwrap();
976 976
977 977 assert_eq!(idx.find_hex("000"), Err(NodeMapError::MultipleResults));
978 978 // in the nodetree proper, this will be found at the first nybble
979 979 // yet the correct answer for unique_prefix_len is not 1, nor 1+1,
980 980 // but the first difference with `NULL_NODE`
981 981 assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6)));
982 982 assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6)));
983 983
984 984 // same with odd result
985 985 idx.insert(1, "00123").unwrap();
986 986 assert_eq!(idx.unique_prefix_len_hex("001"), Ok(Some(3)));
987 987 assert_eq!(idx.unique_prefix_len_hex("0012"), Ok(Some(3)));
988 988
989 989 // these are unchanged of course
990 990 assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6)));
991 991 assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6)));
992 992 }
993 993
994 994 #[test]
995 995 fn test_insert_extreme_splitting() -> Result<(), NodeMapError> {
996 996 // check that the splitting loop is long enough
997 997 let mut nt_idx = TestNtIndex::new();
998 998 let nt = &mut nt_idx.nt;
999 999 let idx = &mut nt_idx.index;
1000 1000
1001 1001 let node0_hex = hex_pad_right("444444");
1002 1002 let mut node1_hex = hex_pad_right("444444").clone();
1003 1003 node1_hex.pop();
1004 1004 node1_hex.push('5');
1005 1005 let node0 = Node::from_hex(&node0_hex).unwrap();
1006 1006 let node1 = Node::from_hex(&node1_hex).unwrap();
1007 1007
1008 1008 idx.insert(0, node0.clone());
1009 1009 nt.insert(idx, &node0, 0)?;
1010 1010 idx.insert(1, node1.clone());
1011 1011 nt.insert(idx, &node1, 1)?;
1012 1012
1013 1013 assert_eq!(nt.find_bin(idx, (&node0).into())?, Some(0));
1014 1014 assert_eq!(nt.find_bin(idx, (&node1).into())?, Some(1));
1015 1015 Ok(())
1016 1016 }
1017 1017
1018 1018 #[test]
1019 1019 fn test_insert_partly_immutable() -> Result<(), NodeMapError> {
1020 1020 let mut idx = TestNtIndex::new();
1021 1021 idx.insert(0, "1234")?;
1022 1022 idx.insert(1, "1235")?;
1023 1023 idx.insert(2, "131")?;
1024 1024 idx.insert(3, "cafe")?;
1025 1025 let mut idx = idx.commit();
1026 1026 assert_eq!(idx.find_hex("1234")?, Some(0));
1027 1027 assert_eq!(idx.find_hex("1235")?, Some(1));
1028 1028 assert_eq!(idx.find_hex("131")?, Some(2));
1029 1029 assert_eq!(idx.find_hex("cafe")?, Some(3));
1030 1030 // we did not add anything since init from readonly
1031 1031 assert_eq!(idx.nt.masked_readonly_blocks(), 0);
1032 1032
1033 1033 idx.insert(4, "123A")?;
1034 1034 assert_eq!(idx.find_hex("1234")?, Some(0));
1035 1035 assert_eq!(idx.find_hex("1235")?, Some(1));
1036 1036 assert_eq!(idx.find_hex("131")?, Some(2));
1037 1037 assert_eq!(idx.find_hex("cafe")?, Some(3));
1038 1038 assert_eq!(idx.find_hex("123A")?, Some(4));
1039 1039 // we masked blocks for all prefixes of "123", including the root
1040 1040 assert_eq!(idx.nt.masked_readonly_blocks(), 4);
1041 1041
1042 1042 eprintln!("{:?}", idx.nt);
1043 1043 idx.insert(5, "c0")?;
1044 1044 assert_eq!(idx.find_hex("cafe")?, Some(3));
1045 1045 assert_eq!(idx.find_hex("c0")?, Some(5));
1046 1046 assert_eq!(idx.find_hex("c1")?, None);
1047 1047 assert_eq!(idx.find_hex("1234")?, Some(0));
1048 1048 // inserting "c0" is just splitting the 'c' slot of the mutable root,
1049 1049 // it doesn't mask anything
1050 1050 assert_eq!(idx.nt.masked_readonly_blocks(), 4);
1051 1051
1052 1052 Ok(())
1053 1053 }
1054 1054
1055 1055 #[test]
1056 1056 fn test_invalidate_all() -> Result<(), NodeMapError> {
1057 1057 let mut idx = TestNtIndex::new();
1058 1058 idx.insert(0, "1234")?;
1059 1059 idx.insert(1, "1235")?;
1060 1060 idx.insert(2, "131")?;
1061 1061 idx.insert(3, "cafe")?;
1062 1062 let mut idx = idx.commit();
1063 1063
1064 1064 idx.nt.invalidate_all();
1065 1065
1066 1066 assert_eq!(idx.find_hex("1234")?, None);
1067 1067 assert_eq!(idx.find_hex("1235")?, None);
1068 1068 assert_eq!(idx.find_hex("131")?, None);
1069 1069 assert_eq!(idx.find_hex("cafe")?, None);
1070 1070 // all the readonly blocks have been masked, this is the
1071 1071 // conventional expected response
1072 1072 assert_eq!(idx.nt.masked_readonly_blocks(), idx.nt.readonly.len() + 1);
1073 1073 Ok(())
1074 1074 }
1075 1075
1076 1076 #[test]
1077 1077 fn test_into_added_empty() {
1078 1078 assert!(sample_nodetree().into_readonly_and_added().1.is_empty());
1079 1079 assert!(sample_nodetree()
1080 1080 .into_readonly_and_added_bytes()
1081 1081 .1
1082 1082 .is_empty());
1083 1083 }
1084 1084
1085 1085 #[test]
1086 1086 fn test_into_added_bytes() -> Result<(), NodeMapError> {
1087 1087 let mut idx = TestNtIndex::new();
1088 1088 idx.insert(0, "1234")?;
1089 1089 let mut idx = idx.commit();
1090 1090 idx.insert(4, "cafe")?;
1091 1091 let (_, bytes) = idx.nt.into_readonly_and_added_bytes();
1092 1092
1093 1093 // only the root block has been changed
1094 1094 assert_eq!(bytes.len(), size_of::<Block>());
1095 1095 // big endian for -2
1096 1096 assert_eq!(&bytes[4..2 * 4], [255, 255, 255, 254]);
1097 1097 // big endian for -6
1098 1098 assert_eq!(&bytes[12 * 4..13 * 4], [255, 255, 255, 250]);
1099 1099 Ok(())
1100 1100 }
1101 1101 }
@@ -1,496 +1,491 b''
1 1 // revlog.rs
2 2 //
3 3 // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 use crate::{
9 9 cindex,
10 10 utils::{node_from_py_bytes, node_from_py_object},
11 11 };
12 12 use cpython::{
13 13 buffer::{Element, PyBuffer},
14 14 exc::{IndexError, ValueError},
15 15 ObjectProtocol, PyBytes, PyClone, PyDict, PyErr, PyModule, PyObject,
16 16 PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject,
17 17 };
18 18 use hg::{
19 19 nodemap::{Block, NodeMapError, NodeTree},
20 20 revlog::{nodemap::NodeMap, RevlogIndex},
21 NodeError, Revision,
21 Revision,
22 22 };
23 23 use std::cell::RefCell;
24 24
25 25 /// Return a Struct implementing the Graph trait
26 26 pub(crate) fn pyindex_to_graph(
27 27 py: Python,
28 28 index: PyObject,
29 29 ) -> PyResult<cindex::Index> {
30 30 match index.extract::<MixedIndex>(py) {
31 31 Ok(midx) => Ok(midx.clone_cindex(py)),
32 32 Err(_) => cindex::Index::new(py, index),
33 33 }
34 34 }
35 35
36 36 py_class!(pub class MixedIndex |py| {
37 37 data cindex: RefCell<cindex::Index>;
38 38 data nt: RefCell<Option<NodeTree>>;
39 39 data docket: RefCell<Option<PyObject>>;
40 40 // Holds a reference to the mmap'ed persistent nodemap data
41 41 data mmap: RefCell<Option<PyBuffer>>;
42 42
43 43 def __new__(_cls, cindex: PyObject) -> PyResult<MixedIndex> {
44 44 Self::new(py, cindex)
45 45 }
46 46
47 47 /// Compatibility layer used for Python consumers needing access to the C index
48 48 ///
49 49 /// Only use case so far is `scmutil.shortesthexnodeidprefix`,
50 50 /// that may need to build a custom `nodetree`, based on a specified revset.
51 51 /// With a Rust implementation of the nodemap, we will be able to get rid of
52 52 /// this, by exposing our own standalone nodemap class,
53 53 /// ready to accept `MixedIndex`.
54 54 def get_cindex(&self) -> PyResult<PyObject> {
55 55 Ok(self.cindex(py).borrow().inner().clone_ref(py))
56 56 }
57 57
58 58 // Index API involving nodemap, as defined in mercurial/pure/parsers.py
59 59
60 60 /// Return Revision if found, raises a bare `error.RevlogError`
61 61 /// in case of ambiguity, same as C version does
62 62 def get_rev(&self, node: PyBytes) -> PyResult<Option<Revision>> {
63 63 let opt = self.get_nodetree(py)?.borrow();
64 64 let nt = opt.as_ref().unwrap();
65 65 let idx = &*self.cindex(py).borrow();
66 66 let node = node_from_py_bytes(py, &node)?;
67 67 nt.find_bin(idx, (&node).into()).map_err(|e| nodemap_error(py, e))
68 68 }
69 69
70 70 /// same as `get_rev()` but raises a bare `error.RevlogError` if node
71 71 /// is not found.
72 72 ///
73 73 /// No need to repeat `node` in the exception, `mercurial/revlog.py`
74 74 /// will catch and rewrap with it
75 75 def rev(&self, node: PyBytes) -> PyResult<Revision> {
76 76 self.get_rev(py, node)?.ok_or_else(|| revlog_error(py))
77 77 }
78 78
79 79 /// return True if the node exist in the index
80 80 def has_node(&self, node: PyBytes) -> PyResult<bool> {
81 81 self.get_rev(py, node).map(|opt| opt.is_some())
82 82 }
83 83
84 84 /// find length of shortest hex nodeid of a binary ID
85 85 def shortest(&self, node: PyBytes) -> PyResult<usize> {
86 86 let opt = self.get_nodetree(py)?.borrow();
87 87 let nt = opt.as_ref().unwrap();
88 88 let idx = &*self.cindex(py).borrow();
89 89 match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?)
90 90 {
91 91 Ok(Some(l)) => Ok(l),
92 92 Ok(None) => Err(revlog_error(py)),
93 93 Err(e) => Err(nodemap_error(py, e)),
94 94 }
95 95 }
96 96
97 97 def partialmatch(&self, node: PyObject) -> PyResult<Option<PyBytes>> {
98 98 let opt = self.get_nodetree(py)?.borrow();
99 99 let nt = opt.as_ref().unwrap();
100 100 let idx = &*self.cindex(py).borrow();
101 101
102 102 let node_as_string = if cfg!(feature = "python3-sys") {
103 103 node.cast_as::<PyString>(py)?.to_string(py)?.to_string()
104 104 }
105 105 else {
106 106 let node = node.extract::<PyBytes>(py)?;
107 107 String::from_utf8_lossy(node.data(py)).to_string()
108 108 };
109 109
110 110 nt.find_hex(idx, &node_as_string)
111 111 // TODO make an inner API returning the node directly
112 112 .map(|opt| opt.map(
113 113 |rev| PyBytes::new(py, idx.node(rev).unwrap().as_bytes())))
114 114 .map_err(|e| nodemap_error(py, e))
115 115
116 116 }
117 117
118 118 /// append an index entry
119 119 def append(&self, tup: PyTuple) -> PyResult<PyObject> {
120 120 if tup.len(py) < 8 {
121 121 // this is better than the panic promised by tup.get_item()
122 122 return Err(
123 123 PyErr::new::<IndexError, _>(py, "tuple index out of range"))
124 124 }
125 125 let node_bytes = tup.get_item(py, 7).extract(py)?;
126 126 let node = node_from_py_object(py, &node_bytes)?;
127 127
128 128 let mut idx = self.cindex(py).borrow_mut();
129 129 let rev = idx.len() as Revision;
130 130
131 131 idx.append(py, tup)?;
132 132 self.get_nodetree(py)?.borrow_mut().as_mut().unwrap()
133 133 .insert(&*idx, &node, rev)
134 134 .map_err(|e| nodemap_error(py, e))?;
135 135 Ok(py.None())
136 136 }
137 137
138 138 def __delitem__(&self, key: PyObject) -> PyResult<()> {
139 139 // __delitem__ is both for `del idx[r]` and `del idx[r1:r2]`
140 140 self.cindex(py).borrow().inner().del_item(py, key)?;
141 141 let mut opt = self.get_nodetree(py)?.borrow_mut();
142 142 let mut nt = opt.as_mut().unwrap();
143 143 nt.invalidate_all();
144 144 self.fill_nodemap(py, &mut nt)?;
145 145 Ok(())
146 146 }
147 147
148 148 //
149 149 // Reforwarded C index API
150 150 //
151 151
152 152 // index_methods (tp_methods). Same ordering as in revlog.c
153 153
154 154 /// return the gca set of the given revs
155 155 def ancestors(&self, *args, **kw) -> PyResult<PyObject> {
156 156 self.call_cindex(py, "ancestors", args, kw)
157 157 }
158 158
159 159 /// return the heads of the common ancestors of the given revs
160 160 def commonancestorsheads(&self, *args, **kw) -> PyResult<PyObject> {
161 161 self.call_cindex(py, "commonancestorsheads", args, kw)
162 162 }
163 163
164 164 /// Clear the index caches and inner py_class data.
165 165 /// It is Python's responsibility to call `update_nodemap_data` again.
166 166 def clearcaches(&self, *args, **kw) -> PyResult<PyObject> {
167 167 self.nt(py).borrow_mut().take();
168 168 self.docket(py).borrow_mut().take();
169 169 self.mmap(py).borrow_mut().take();
170 170 self.call_cindex(py, "clearcaches", args, kw)
171 171 }
172 172
173 173 /// get an index entry
174 174 def get(&self, *args, **kw) -> PyResult<PyObject> {
175 175 self.call_cindex(py, "get", args, kw)
176 176 }
177 177
178 178 /// compute phases
179 179 def computephasesmapsets(&self, *args, **kw) -> PyResult<PyObject> {
180 180 self.call_cindex(py, "computephasesmapsets", args, kw)
181 181 }
182 182
183 183 /// reachableroots
184 184 def reachableroots2(&self, *args, **kw) -> PyResult<PyObject> {
185 185 self.call_cindex(py, "reachableroots2", args, kw)
186 186 }
187 187
188 188 /// get head revisions
189 189 def headrevs(&self, *args, **kw) -> PyResult<PyObject> {
190 190 self.call_cindex(py, "headrevs", args, kw)
191 191 }
192 192
193 193 /// get filtered head revisions
194 194 def headrevsfiltered(&self, *args, **kw) -> PyResult<PyObject> {
195 195 self.call_cindex(py, "headrevsfiltered", args, kw)
196 196 }
197 197
198 198 /// True if the object is a snapshot
199 199 def issnapshot(&self, *args, **kw) -> PyResult<PyObject> {
200 200 self.call_cindex(py, "issnapshot", args, kw)
201 201 }
202 202
203 203 /// Gather snapshot data in a cache dict
204 204 def findsnapshots(&self, *args, **kw) -> PyResult<PyObject> {
205 205 self.call_cindex(py, "findsnapshots", args, kw)
206 206 }
207 207
208 208 /// determine revisions with deltas to reconstruct fulltext
209 209 def deltachain(&self, *args, **kw) -> PyResult<PyObject> {
210 210 self.call_cindex(py, "deltachain", args, kw)
211 211 }
212 212
213 213 /// slice planned chunk read to reach a density threshold
214 214 def slicechunktodensity(&self, *args, **kw) -> PyResult<PyObject> {
215 215 self.call_cindex(py, "slicechunktodensity", args, kw)
216 216 }
217 217
218 218 /// stats for the index
219 219 def stats(&self, *args, **kw) -> PyResult<PyObject> {
220 220 self.call_cindex(py, "stats", args, kw)
221 221 }
222 222
223 223 // index_sequence_methods and index_mapping_methods.
224 224 //
225 225 // Since we call back through the high level Python API,
226 226 // there's no point making a distinction between index_get
227 227 // and index_getitem.
228 228
229 229 def __len__(&self) -> PyResult<usize> {
230 230 self.cindex(py).borrow().inner().len(py)
231 231 }
232 232
233 233 def __getitem__(&self, key: PyObject) -> PyResult<PyObject> {
234 234 // this conversion seems needless, but that's actually because
235 235 // `index_getitem` does not handle conversion from PyLong,
236 236 // which expressions such as [e for e in index] internally use.
237 237 // Note that we don't seem to have a direct way to call
238 238 // PySequence_GetItem (does the job), which would possibly be better
239 239 // for performance
240 240 let key = match key.extract::<Revision>(py) {
241 241 Ok(rev) => rev.to_py_object(py).into_object(),
242 242 Err(_) => key,
243 243 };
244 244 self.cindex(py).borrow().inner().get_item(py, key)
245 245 }
246 246
247 247 def __setitem__(&self, key: PyObject, value: PyObject) -> PyResult<()> {
248 248 self.cindex(py).borrow().inner().set_item(py, key, value)
249 249 }
250 250
251 251 def __contains__(&self, item: PyObject) -> PyResult<bool> {
252 252 // ObjectProtocol does not seem to provide contains(), so
253 253 // this is an equivalent implementation of the index_contains()
254 254 // defined in revlog.c
255 255 let cindex = self.cindex(py).borrow();
256 256 match item.extract::<Revision>(py) {
257 257 Ok(rev) => {
258 258 Ok(rev >= -1 && rev < cindex.inner().len(py)? as Revision)
259 259 }
260 260 Err(_) => {
261 261 cindex.inner().call_method(
262 262 py,
263 263 "has_node",
264 264 PyTuple::new(py, &[item]),
265 265 None)?
266 266 .extract(py)
267 267 }
268 268 }
269 269 }
270 270
271 271 def nodemap_data_all(&self) -> PyResult<PyBytes> {
272 272 self.inner_nodemap_data_all(py)
273 273 }
274 274
275 275 def nodemap_data_incremental(&self) -> PyResult<PyObject> {
276 276 self.inner_nodemap_data_incremental(py)
277 277 }
278 278 def update_nodemap_data(
279 279 &self,
280 280 docket: PyObject,
281 281 nm_data: PyObject
282 282 ) -> PyResult<PyObject> {
283 283 self.inner_update_nodemap_data(py, docket, nm_data)
284 284 }
285 285
286 286
287 287 });
288 288
289 289 impl MixedIndex {
290 290 fn new(py: Python, cindex: PyObject) -> PyResult<MixedIndex> {
291 291 Self::create_instance(
292 292 py,
293 293 RefCell::new(cindex::Index::new(py, cindex)?),
294 294 RefCell::new(None),
295 295 RefCell::new(None),
296 296 RefCell::new(None),
297 297 )
298 298 }
299 299
300 300 /// This is scaffolding at this point, but it could also become
301 301 /// a way to start a persistent nodemap or perform a
302 302 /// vacuum / repack operation
303 303 fn fill_nodemap(
304 304 &self,
305 305 py: Python,
306 306 nt: &mut NodeTree,
307 307 ) -> PyResult<PyObject> {
308 308 let index = self.cindex(py).borrow();
309 309 for r in 0..index.len() {
310 310 let rev = r as Revision;
311 311 // in this case node() won't ever return None
312 312 nt.insert(&*index, index.node(rev).unwrap(), rev)
313 313 .map_err(|e| nodemap_error(py, e))?
314 314 }
315 315 Ok(py.None())
316 316 }
317 317
318 318 fn get_nodetree<'a>(
319 319 &'a self,
320 320 py: Python<'a>,
321 321 ) -> PyResult<&'a RefCell<Option<NodeTree>>> {
322 322 if self.nt(py).borrow().is_none() {
323 323 let readonly = Box::new(Vec::new());
324 324 let mut nt = NodeTree::load_bytes(readonly, 0);
325 325 self.fill_nodemap(py, &mut nt)?;
326 326 self.nt(py).borrow_mut().replace(nt);
327 327 }
328 328 Ok(self.nt(py))
329 329 }
330 330
331 331 /// forward a method call to the underlying C index
332 332 fn call_cindex(
333 333 &self,
334 334 py: Python,
335 335 name: &str,
336 336 args: &PyTuple,
337 337 kwargs: Option<&PyDict>,
338 338 ) -> PyResult<PyObject> {
339 339 self.cindex(py)
340 340 .borrow()
341 341 .inner()
342 342 .call_method(py, name, args, kwargs)
343 343 }
344 344
345 345 pub fn clone_cindex(&self, py: Python) -> cindex::Index {
346 346 self.cindex(py).borrow().clone_ref(py)
347 347 }
348 348
349 349 /// Returns the full nodemap bytes to be written as-is to disk
350 350 fn inner_nodemap_data_all(&self, py: Python) -> PyResult<PyBytes> {
351 351 let nodemap = self.get_nodetree(py)?.borrow_mut().take().unwrap();
352 352 let (readonly, bytes) = nodemap.into_readonly_and_added_bytes();
353 353
354 354 // If there's anything readonly, we need to build the data again from
355 355 // scratch
356 356 let bytes = if readonly.len() > 0 {
357 357 let mut nt = NodeTree::load_bytes(Box::new(vec![]), 0);
358 358 self.fill_nodemap(py, &mut nt)?;
359 359
360 360 let (readonly, bytes) = nt.into_readonly_and_added_bytes();
361 361 assert_eq!(readonly.len(), 0);
362 362
363 363 bytes
364 364 } else {
365 365 bytes
366 366 };
367 367
368 368 let bytes = PyBytes::new(py, &bytes);
369 369 Ok(bytes)
370 370 }
371 371
372 372 /// Returns the last saved docket along with the size of any changed data
373 373 /// (in number of blocks), and said data as bytes.
374 374 fn inner_nodemap_data_incremental(
375 375 &self,
376 376 py: Python,
377 377 ) -> PyResult<PyObject> {
378 378 let docket = self.docket(py).borrow();
379 379 let docket = match docket.as_ref() {
380 380 Some(d) => d,
381 381 None => return Ok(py.None()),
382 382 };
383 383
384 384 let node_tree = self.get_nodetree(py)?.borrow_mut().take().unwrap();
385 385 let masked_blocks = node_tree.masked_readonly_blocks();
386 386 let (_, data) = node_tree.into_readonly_and_added_bytes();
387 387 let changed = masked_blocks * std::mem::size_of::<Block>();
388 388
389 389 Ok((docket, changed, PyBytes::new(py, &data))
390 390 .to_py_object(py)
391 391 .into_object())
392 392 }
393 393
394 394 /// Update the nodemap from the new (mmaped) data.
395 395 /// The docket is kept as a reference for later incremental calls.
396 396 fn inner_update_nodemap_data(
397 397 &self,
398 398 py: Python,
399 399 docket: PyObject,
400 400 nm_data: PyObject,
401 401 ) -> PyResult<PyObject> {
402 402 let buf = PyBuffer::get(py, &nm_data)?;
403 403 let len = buf.item_count();
404 404
405 405 // Build a slice from the mmap'ed buffer data
406 406 let cbuf = buf.buf_ptr();
407 407 let bytes = if std::mem::size_of::<u8>() == buf.item_size()
408 408 && buf.is_c_contiguous()
409 409 && u8::is_compatible_format(buf.format())
410 410 {
411 411 unsafe { std::slice::from_raw_parts(cbuf as *const u8, len) }
412 412 } else {
413 413 return Err(PyErr::new::<ValueError, _>(
414 414 py,
415 415 "Nodemap data buffer has an invalid memory representation"
416 416 .to_string(),
417 417 ));
418 418 };
419 419
420 420 // Keep a reference to the mmap'ed buffer, otherwise we get a dangling
421 421 // pointer.
422 422 self.mmap(py).borrow_mut().replace(buf);
423 423
424 424 let mut nt = NodeTree::load_bytes(Box::new(bytes), len);
425 425
426 426 let data_tip =
427 427 docket.getattr(py, "tip_rev")?.extract::<Revision>(py)?;
428 428 self.docket(py).borrow_mut().replace(docket.clone_ref(py));
429 429 let idx = self.cindex(py).borrow();
430 430 let current_tip = idx.len();
431 431
432 432 for r in (data_tip + 1)..current_tip as Revision {
433 433 let rev = r as Revision;
434 434 // in this case node() won't ever return None
435 435 nt.insert(&*idx, idx.node(rev).unwrap(), rev)
436 436 .map_err(|e| nodemap_error(py, e))?
437 437 }
438 438
439 439 *self.nt(py).borrow_mut() = Some(nt);
440 440
441 441 Ok(py.None())
442 442 }
443 443 }
444 444
445 445 fn revlog_error(py: Python) -> PyErr {
446 446 match py
447 447 .import("mercurial.error")
448 448 .and_then(|m| m.get(py, "RevlogError"))
449 449 {
450 450 Err(e) => e,
451 451 Ok(cls) => PyErr::from_instance(py, cls),
452 452 }
453 453 }
454 454
455 455 fn rev_not_in_index(py: Python, rev: Revision) -> PyErr {
456 456 PyErr::new::<ValueError, _>(
457 457 py,
458 458 format!(
459 459 "Inconsistency: Revision {} found in nodemap \
460 460 is not in revlog index",
461 461 rev
462 462 ),
463 463 )
464 464 }
465 465
466 466 /// Standard treatment of NodeMapError
467 467 fn nodemap_error(py: Python, err: NodeMapError) -> PyErr {
468 468 match err {
469 469 NodeMapError::MultipleResults => revlog_error(py),
470 470 NodeMapError::RevisionNotInIndex(r) => rev_not_in_index(py, r),
471 NodeMapError::InvalidNodePrefix(s) => invalid_node_prefix(py, &s),
471 NodeMapError::InvalidNodePrefix => {
472 PyErr::new::<ValueError, _>(py, "Invalid node or prefix")
473 }
472 474 }
473 475 }
474 476
475 fn invalid_node_prefix(py: Python, ne: &NodeError) -> PyErr {
476 PyErr::new::<ValueError, _>(
477 py,
478 format!("Invalid node or prefix: {:?}", ne),
479 )
480 }
481
482 477 /// Create the module, with __package__ given from parent
483 478 pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
484 479 let dotted_name = &format!("{}.revlog", package);
485 480 let m = PyModule::new(py, dotted_name)?;
486 481 m.add(py, "__package__", package)?;
487 482 m.add(py, "__doc__", "RevLog - Rust implementations")?;
488 483
489 484 m.add_class::<MixedIndex>(py)?;
490 485
491 486 let sys = PyModule::import(py, "sys")?;
492 487 let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
493 488 sys_modules.set_item(py, dotted_name, &m)?;
494 489
495 490 Ok(m)
496 491 }
General Comments 0
You need to be logged in to leave comments. Login now