Show More
@@ -1,682 +1,775 b'' | |||
|
1 | 1 | use std::fmt::Debug; |
|
2 | 2 | use std::ops::Deref; |
|
3 | 3 | |
|
4 | 4 | use byteorder::{BigEndian, ByteOrder}; |
|
5 | use bytes_cast::{unaligned, BytesCast}; | |
|
5 | 6 | |
|
7 | use super::REVIDX_KNOWN_FLAGS; | |
|
6 | 8 | use crate::errors::HgError; |
|
9 | use crate::node::{NODE_BYTES_LENGTH, STORED_NODE_ID_BYTES}; | |
|
7 | 10 | use crate::revlog::node::Node; |
|
8 | 11 | use crate::revlog::{Revision, NULL_REVISION}; |
|
9 | use crate::{Graph, GraphError, RevlogIndex, UncheckedRevision}; | |
|
12 | use crate::{Graph, GraphError, RevlogError, RevlogIndex, UncheckedRevision}; | |
|
10 | 13 | |
|
11 | 14 | pub const INDEX_ENTRY_SIZE: usize = 64; |
|
15 | pub const COMPRESSION_MODE_INLINE: u8 = 2; | |
|
12 | 16 | |
|
13 | 17 | pub struct IndexHeader { |
|
14 | 18 | header_bytes: [u8; 4], |
|
15 | 19 | } |
|
16 | 20 | |
|
17 | 21 | #[derive(Copy, Clone)] |
|
18 | 22 | pub struct IndexHeaderFlags { |
|
19 | 23 | flags: u16, |
|
20 | 24 | } |
|
21 | 25 | |
|
22 | 26 | /// Corresponds to the high bits of `_format_flags` in python |
|
23 | 27 | impl IndexHeaderFlags { |
|
24 | 28 | /// Corresponds to FLAG_INLINE_DATA in python |
|
25 | 29 | pub fn is_inline(self) -> bool { |
|
26 | 30 | self.flags & 1 != 0 |
|
27 | 31 | } |
|
28 | 32 | /// Corresponds to FLAG_GENERALDELTA in python |
|
29 | 33 | pub fn uses_generaldelta(self) -> bool { |
|
30 | 34 | self.flags & 2 != 0 |
|
31 | 35 | } |
|
32 | 36 | } |
|
33 | 37 | |
|
34 | 38 | /// Corresponds to the INDEX_HEADER structure, |
|
35 | 39 | /// which is parsed as a `header` variable in `_loadindex` in `revlog.py` |
|
36 | 40 | impl IndexHeader { |
|
37 | 41 | fn format_flags(&self) -> IndexHeaderFlags { |
|
38 | 42 | // No "unknown flags" check here, unlike in python. Maybe there should |
|
39 | 43 | // be. |
|
40 | 44 | IndexHeaderFlags { |
|
41 | 45 | flags: BigEndian::read_u16(&self.header_bytes[0..2]), |
|
42 | 46 | } |
|
43 | 47 | } |
|
44 | 48 | |
|
45 | 49 | /// The only revlog version currently supported by rhg. |
|
46 | 50 | const REVLOGV1: u16 = 1; |
|
47 | 51 | |
|
48 | 52 | /// Corresponds to `_format_version` in Python. |
|
49 | 53 | fn format_version(&self) -> u16 { |
|
50 | 54 | BigEndian::read_u16(&self.header_bytes[2..4]) |
|
51 | 55 | } |
|
52 | 56 | |
|
53 | 57 | const EMPTY_INDEX_HEADER: IndexHeader = IndexHeader { |
|
54 | 58 | // We treat an empty file as a valid index with no entries. |
|
55 | 59 | // Here we make an arbitrary choice of what we assume the format of the |
|
56 | 60 | // index to be (V1, using generaldelta). |
|
57 | 61 | // This doesn't matter too much, since we're only doing read-only |
|
58 | 62 | // access. but the value corresponds to the `new_header` variable in |
|
59 | 63 | // `revlog.py`, `_loadindex` |
|
60 | 64 | header_bytes: [0, 3, 0, 1], |
|
61 | 65 | }; |
|
62 | 66 | |
|
63 | 67 | fn parse(index_bytes: &[u8]) -> Result<IndexHeader, HgError> { |
|
64 | 68 | if index_bytes.is_empty() { |
|
65 | 69 | return Ok(IndexHeader::EMPTY_INDEX_HEADER); |
|
66 | 70 | } |
|
67 | 71 | if index_bytes.len() < 4 { |
|
68 | 72 | return Err(HgError::corrupted( |
|
69 | 73 | "corrupted revlog: can't read the index format header", |
|
70 | 74 | )); |
|
71 | 75 | } |
|
72 | 76 | Ok(IndexHeader { |
|
73 | 77 | header_bytes: { |
|
74 | 78 | let bytes: [u8; 4] = |
|
75 | 79 | index_bytes[0..4].try_into().expect("impossible"); |
|
76 | 80 | bytes |
|
77 | 81 | }, |
|
78 | 82 | }) |
|
79 | 83 | } |
|
80 | 84 | } |
|
81 | 85 | |
|
82 | 86 | /// Abstracts the access to the index bytes since they can be spread between |
|
83 | 87 | /// the immutable (bytes) part and the mutable (added) part if any appends |
|
84 | 88 | /// happened. This makes it transparent for the callers. |
|
85 | 89 | struct IndexData { |
|
86 | 90 | /// Immutable bytes, most likely taken from disk |
|
87 | 91 | bytes: Box<dyn Deref<Target = [u8]> + Send>, |
|
88 | 92 | /// Bytes that were added after reading the index |
|
89 | 93 | added: Vec<u8>, |
|
90 | 94 | } |
|
91 | 95 | |
|
92 | 96 | impl IndexData { |
|
93 | 97 | pub fn new(bytes: Box<dyn Deref<Target = [u8]> + Send>) -> Self { |
|
94 | 98 | Self { |
|
95 | 99 | bytes, |
|
96 | 100 | added: vec![], |
|
97 | 101 | } |
|
98 | 102 | } |
|
99 | 103 | |
|
100 | 104 | pub fn len(&self) -> usize { |
|
101 | 105 | self.bytes.len() + self.added.len() |
|
102 | 106 | } |
|
103 | 107 | } |
|
104 | 108 | |
|
105 | 109 | impl std::ops::Index<std::ops::Range<usize>> for IndexData { |
|
106 | 110 | type Output = [u8]; |
|
107 | 111 | |
|
108 | 112 | fn index(&self, index: std::ops::Range<usize>) -> &Self::Output { |
|
109 | 113 | let start = index.start; |
|
110 | 114 | let end = index.end; |
|
111 | 115 | let immutable_len = self.bytes.len(); |
|
112 | 116 | if start < immutable_len { |
|
113 | 117 | if end > immutable_len { |
|
114 | 118 | panic!("index data cannot span existing and added ranges"); |
|
115 | 119 | } |
|
116 | 120 | &self.bytes[index] |
|
117 | 121 | } else { |
|
118 | 122 | &self.added[start - immutable_len..end - immutable_len] |
|
119 | 123 | } |
|
120 | 124 | } |
|
121 | 125 | } |
|
122 | 126 | |
|
127 | pub struct RevisionDataParams { | |
|
128 | flags: u16, | |
|
129 | data_offset: u64, | |
|
130 | data_compressed_length: i32, | |
|
131 | data_uncompressed_length: i32, | |
|
132 | data_delta_base: i32, | |
|
133 | link_rev: i32, | |
|
134 | parent_rev_1: i32, | |
|
135 | parent_rev_2: i32, | |
|
136 | node_id: [u8; NODE_BYTES_LENGTH], | |
|
137 | _sidedata_offset: u64, | |
|
138 | _sidedata_compressed_length: i32, | |
|
139 | data_compression_mode: u8, | |
|
140 | _sidedata_compression_mode: u8, | |
|
141 | _rank: i32, | |
|
142 | } | |
|
143 | ||
|
144 | #[derive(BytesCast)] | |
|
145 | #[repr(C)] | |
|
146 | pub struct RevisionDataV1 { | |
|
147 | data_offset_or_flags: unaligned::U64Be, | |
|
148 | data_compressed_length: unaligned::I32Be, | |
|
149 | data_uncompressed_length: unaligned::I32Be, | |
|
150 | data_delta_base: unaligned::I32Be, | |
|
151 | link_rev: unaligned::I32Be, | |
|
152 | parent_rev_1: unaligned::I32Be, | |
|
153 | parent_rev_2: unaligned::I32Be, | |
|
154 | node_id: [u8; STORED_NODE_ID_BYTES], | |
|
155 | } | |
|
156 | ||
|
157 | fn _static_assert_size_of_revision_data_v1() { | |
|
158 | let _ = std::mem::transmute::<RevisionDataV1, [u8; 64]>; | |
|
159 | } | |
|
160 | ||
|
161 | impl RevisionDataParams { | |
|
162 | pub fn validate(&self) -> Result<(), RevlogError> { | |
|
163 | if self.flags & !REVIDX_KNOWN_FLAGS != 0 { | |
|
164 | return Err(RevlogError::corrupted(format!( | |
|
165 | "unknown revlog index flags: {}", | |
|
166 | self.flags | |
|
167 | ))); | |
|
168 | } | |
|
169 | if self.data_compression_mode != COMPRESSION_MODE_INLINE { | |
|
170 | return Err(RevlogError::corrupted(format!( | |
|
171 | "invalid data compression mode: {}", | |
|
172 | self.data_compression_mode | |
|
173 | ))); | |
|
174 | } | |
|
175 | // FIXME isn't this only for v2 or changelog v2? | |
|
176 | if self._sidedata_compression_mode != COMPRESSION_MODE_INLINE { | |
|
177 | return Err(RevlogError::corrupted(format!( | |
|
178 | "invalid sidedata compression mode: {}", | |
|
179 | self._sidedata_compression_mode | |
|
180 | ))); | |
|
181 | } | |
|
182 | Ok(()) | |
|
183 | } | |
|
184 | ||
|
185 | pub fn into_v1(self) -> RevisionDataV1 { | |
|
186 | let data_offset_or_flags = self.data_offset << 16 | self.flags as u64; | |
|
187 | let mut node_id = [0; STORED_NODE_ID_BYTES]; | |
|
188 | node_id[..NODE_BYTES_LENGTH].copy_from_slice(&self.node_id); | |
|
189 | RevisionDataV1 { | |
|
190 | data_offset_or_flags: data_offset_or_flags.into(), | |
|
191 | data_compressed_length: self.data_compressed_length.into(), | |
|
192 | data_uncompressed_length: self.data_uncompressed_length.into(), | |
|
193 | data_delta_base: self.data_delta_base.into(), | |
|
194 | link_rev: self.link_rev.into(), | |
|
195 | parent_rev_1: self.parent_rev_1.into(), | |
|
196 | parent_rev_2: self.parent_rev_2.into(), | |
|
197 | node_id, | |
|
198 | } | |
|
199 | } | |
|
200 | } | |
|
201 | ||
|
123 | 202 | /// A Revlog index |
|
124 | 203 | pub struct Index { |
|
125 | 204 | bytes: IndexData, |
|
126 | 205 | /// Offsets of starts of index blocks. |
|
127 | 206 | /// Only needed when the index is interleaved with data. |
|
128 | 207 | offsets: Option<Vec<usize>>, |
|
129 | 208 | uses_generaldelta: bool, |
|
130 | 209 | } |
|
131 | 210 | |
|
132 | 211 | impl Debug for Index { |
|
133 | 212 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |
|
134 | 213 | f.debug_struct("Index") |
|
135 | 214 | .field("offsets", &self.offsets) |
|
136 | 215 | .field("uses_generaldelta", &self.uses_generaldelta) |
|
137 | 216 | .finish() |
|
138 | 217 | } |
|
139 | 218 | } |
|
140 | 219 | |
|
141 | 220 | impl Graph for Index { |
|
142 | 221 | fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> { |
|
143 | 222 | let err = || GraphError::ParentOutOfRange(rev); |
|
144 | 223 | match self.get_entry(rev) { |
|
145 | 224 | Some(entry) => { |
|
146 | 225 | // The C implementation checks that the parents are valid |
|
147 | 226 | // before returning |
|
148 | 227 | Ok([ |
|
149 | 228 | self.check_revision(entry.p1()).ok_or_else(err)?, |
|
150 | 229 | self.check_revision(entry.p2()).ok_or_else(err)?, |
|
151 | 230 | ]) |
|
152 | 231 | } |
|
153 | 232 | None => Ok([NULL_REVISION, NULL_REVISION]), |
|
154 | 233 | } |
|
155 | 234 | } |
|
156 | 235 | } |
|
157 | 236 | |
|
158 | 237 | impl Index { |
|
159 | 238 | /// Create an index from bytes. |
|
160 | 239 | /// Calculate the start of each entry when is_inline is true. |
|
161 | 240 | pub fn new( |
|
162 | 241 | bytes: Box<dyn Deref<Target = [u8]> + Send>, |
|
163 | 242 | ) -> Result<Self, HgError> { |
|
164 | 243 | let header = IndexHeader::parse(bytes.as_ref())?; |
|
165 | 244 | |
|
166 | 245 | if header.format_version() != IndexHeader::REVLOGV1 { |
|
167 | 246 | // A proper new version should have had a repo/store |
|
168 | 247 | // requirement. |
|
169 | 248 | return Err(HgError::corrupted("unsupported revlog version")); |
|
170 | 249 | } |
|
171 | 250 | |
|
172 | 251 | // This is only correct because we know version is REVLOGV1. |
|
173 | 252 | // In v2 we always use generaldelta, while in v0 we never use |
|
174 | 253 | // generaldelta. Similar for [is_inline] (it's only used in v1). |
|
175 | 254 | let uses_generaldelta = header.format_flags().uses_generaldelta(); |
|
176 | 255 | |
|
177 | 256 | if header.format_flags().is_inline() { |
|
178 | 257 | let mut offset: usize = 0; |
|
179 | 258 | let mut offsets = Vec::new(); |
|
180 | 259 | |
|
181 | 260 | while offset + INDEX_ENTRY_SIZE <= bytes.len() { |
|
182 | 261 | offsets.push(offset); |
|
183 | 262 | let end = offset + INDEX_ENTRY_SIZE; |
|
184 | 263 | let entry = IndexEntry { |
|
185 | 264 | bytes: &bytes[offset..end], |
|
186 | 265 | offset_override: None, |
|
187 | 266 | }; |
|
188 | 267 | |
|
189 | 268 | offset += INDEX_ENTRY_SIZE + entry.compressed_len() as usize; |
|
190 | 269 | } |
|
191 | 270 | |
|
192 | 271 | if offset == bytes.len() { |
|
193 | 272 | Ok(Self { |
|
194 | 273 | bytes: IndexData::new(bytes), |
|
195 | 274 | offsets: Some(offsets), |
|
196 | 275 | uses_generaldelta, |
|
197 | 276 | }) |
|
198 | 277 | } else { |
|
199 | 278 | Err(HgError::corrupted("unexpected inline revlog length")) |
|
200 | 279 | } |
|
201 | 280 | } else { |
|
202 | 281 | Ok(Self { |
|
203 | 282 | bytes: IndexData::new(bytes), |
|
204 | 283 | offsets: None, |
|
205 | 284 | uses_generaldelta, |
|
206 | 285 | }) |
|
207 | 286 | } |
|
208 | 287 | } |
|
209 | 288 | |
|
210 | 289 | pub fn uses_generaldelta(&self) -> bool { |
|
211 | 290 | self.uses_generaldelta |
|
212 | 291 | } |
|
213 | 292 | |
|
214 | 293 | /// Value of the inline flag. |
|
215 | 294 | pub fn is_inline(&self) -> bool { |
|
216 | 295 | self.offsets.is_some() |
|
217 | 296 | } |
|
218 | 297 | |
|
219 | 298 | /// Return a slice of bytes if `revlog` is inline. Panic if not. |
|
220 | 299 | pub fn data(&self, start: usize, end: usize) -> &[u8] { |
|
221 | 300 | if !self.is_inline() { |
|
222 | 301 | panic!("tried to access data in the index of a revlog that is not inline"); |
|
223 | 302 | } |
|
224 | 303 | &self.bytes[start..end] |
|
225 | 304 | } |
|
226 | 305 | |
|
227 | 306 | /// Return number of entries of the revlog index. |
|
228 | 307 | pub fn len(&self) -> usize { |
|
229 | 308 | if let Some(offsets) = &self.offsets { |
|
230 | 309 | offsets.len() |
|
231 | 310 | } else { |
|
232 | 311 | self.bytes.len() / INDEX_ENTRY_SIZE |
|
233 | 312 | } |
|
234 | 313 | } |
|
235 | 314 | |
|
236 | 315 | /// Returns `true` if the `Index` has zero `entries`. |
|
237 | 316 | pub fn is_empty(&self) -> bool { |
|
238 | 317 | self.len() == 0 |
|
239 | 318 | } |
|
240 | 319 | |
|
241 | 320 | /// Return the index entry corresponding to the given revision if it |
|
242 | 321 | /// exists. |
|
243 | 322 | pub fn get_entry(&self, rev: Revision) -> Option<IndexEntry> { |
|
244 | 323 | if rev == NULL_REVISION { |
|
245 | 324 | return None; |
|
246 | 325 | } |
|
247 | 326 | Some(if let Some(offsets) = &self.offsets { |
|
248 | 327 | self.get_entry_inline(rev, offsets) |
|
249 | 328 | } else { |
|
250 | 329 | self.get_entry_separated(rev) |
|
251 | 330 | }) |
|
252 | 331 | } |
|
253 | 332 | |
|
254 | 333 | fn get_entry_inline( |
|
255 | 334 | &self, |
|
256 | 335 | rev: Revision, |
|
257 | 336 | offsets: &[usize], |
|
258 | 337 | ) -> IndexEntry { |
|
259 | 338 | let start = offsets[rev.0 as usize]; |
|
260 | 339 | let end = start + INDEX_ENTRY_SIZE; |
|
261 | 340 | let bytes = &self.bytes[start..end]; |
|
262 | 341 | |
|
263 | 342 | // See IndexEntry for an explanation of this override. |
|
264 | 343 | let offset_override = Some(end); |
|
265 | 344 | |
|
266 | 345 | IndexEntry { |
|
267 | 346 | bytes, |
|
268 | 347 | offset_override, |
|
269 | 348 | } |
|
270 | 349 | } |
|
271 | 350 | |
|
272 | 351 | fn get_entry_separated(&self, rev: Revision) -> IndexEntry { |
|
273 | 352 | let start = rev.0 as usize * INDEX_ENTRY_SIZE; |
|
274 | 353 | let end = start + INDEX_ENTRY_SIZE; |
|
275 | 354 | let bytes = &self.bytes[start..end]; |
|
276 | 355 | |
|
277 | 356 | // Override the offset of the first revision as its bytes are used |
|
278 | 357 | // for the index's metadata (saving space because it is always 0) |
|
279 | 358 | let offset_override = if rev == Revision(0) { Some(0) } else { None }; |
|
280 | 359 | |
|
281 | 360 | IndexEntry { |
|
282 | 361 | bytes, |
|
283 | 362 | offset_override, |
|
284 | 363 | } |
|
285 | 364 | } |
|
365 | ||
|
366 | /// TODO move this to the trait probably, along with other things | |
|
367 | pub fn append( | |
|
368 | &mut self, | |
|
369 | revision_data: RevisionDataParams, | |
|
370 | ) -> Result<(), RevlogError> { | |
|
371 | revision_data.validate()?; | |
|
372 | let new_offset = self.bytes.len(); | |
|
373 | if let Some(offsets) = self.offsets.as_mut() { | |
|
374 | offsets.push(new_offset) | |
|
375 | } | |
|
376 | self.bytes.added.extend(revision_data.into_v1().as_bytes()); | |
|
377 | Ok(()) | |
|
378 | } | |
|
286 | 379 | } |
|
287 | 380 | |
|
288 | 381 | impl super::RevlogIndex for Index { |
|
289 | 382 | fn len(&self) -> usize { |
|
290 | 383 | self.len() |
|
291 | 384 | } |
|
292 | 385 | |
|
293 | 386 | fn node(&self, rev: Revision) -> Option<&Node> { |
|
294 | 387 | self.get_entry(rev).map(|entry| entry.hash()) |
|
295 | 388 | } |
|
296 | 389 | } |
|
297 | 390 | |
|
298 | 391 | #[derive(Debug)] |
|
299 | 392 | pub struct IndexEntry<'a> { |
|
300 | 393 | bytes: &'a [u8], |
|
301 | 394 | /// Allows to override the offset value of the entry. |
|
302 | 395 | /// |
|
303 | 396 | /// For interleaved index and data, the offset stored in the index |
|
304 | 397 | /// corresponds to the separated data offset. |
|
305 | 398 | /// It has to be overridden with the actual offset in the interleaved |
|
306 | 399 | /// index which is just after the index block. |
|
307 | 400 | /// |
|
308 | 401 | /// For separated index and data, the offset stored in the first index |
|
309 | 402 | /// entry is mixed with the index headers. |
|
310 | 403 | /// It has to be overridden with 0. |
|
311 | 404 | offset_override: Option<usize>, |
|
312 | 405 | } |
|
313 | 406 | |
|
314 | 407 | impl<'a> IndexEntry<'a> { |
|
315 | 408 | /// Return the offset of the data. |
|
316 | 409 | pub fn offset(&self) -> usize { |
|
317 | 410 | if let Some(offset_override) = self.offset_override { |
|
318 | 411 | offset_override |
|
319 | 412 | } else { |
|
320 | 413 | let mut bytes = [0; 8]; |
|
321 | 414 | bytes[2..8].copy_from_slice(&self.bytes[0..=5]); |
|
322 | 415 | BigEndian::read_u64(&bytes[..]) as usize |
|
323 | 416 | } |
|
324 | 417 | } |
|
325 | 418 | |
|
326 | 419 | pub fn flags(&self) -> u16 { |
|
327 | 420 | BigEndian::read_u16(&self.bytes[6..=7]) |
|
328 | 421 | } |
|
329 | 422 | |
|
330 | 423 | /// Return the compressed length of the data. |
|
331 | 424 | pub fn compressed_len(&self) -> u32 { |
|
332 | 425 | BigEndian::read_u32(&self.bytes[8..=11]) |
|
333 | 426 | } |
|
334 | 427 | |
|
335 | 428 | /// Return the uncompressed length of the data. |
|
336 | 429 | pub fn uncompressed_len(&self) -> i32 { |
|
337 | 430 | BigEndian::read_i32(&self.bytes[12..=15]) |
|
338 | 431 | } |
|
339 | 432 | |
|
340 | 433 | /// Return the revision upon which the data has been derived. |
|
341 | 434 | pub fn base_revision_or_base_of_delta_chain(&self) -> UncheckedRevision { |
|
342 | 435 | // TODO Maybe return an Option when base_revision == rev? |
|
343 | 436 | // Requires to add rev to IndexEntry |
|
344 | 437 | |
|
345 | 438 | BigEndian::read_i32(&self.bytes[16..]).into() |
|
346 | 439 | } |
|
347 | 440 | |
|
348 | 441 | pub fn link_revision(&self) -> UncheckedRevision { |
|
349 | 442 | BigEndian::read_i32(&self.bytes[20..]).into() |
|
350 | 443 | } |
|
351 | 444 | |
|
352 | 445 | pub fn p1(&self) -> UncheckedRevision { |
|
353 | 446 | BigEndian::read_i32(&self.bytes[24..]).into() |
|
354 | 447 | } |
|
355 | 448 | |
|
356 | 449 | pub fn p2(&self) -> UncheckedRevision { |
|
357 | 450 | BigEndian::read_i32(&self.bytes[28..]).into() |
|
358 | 451 | } |
|
359 | 452 | |
|
360 | 453 | /// Return the hash of revision's full text. |
|
361 | 454 | /// |
|
362 | 455 | /// Currently, SHA-1 is used and only the first 20 bytes of this field |
|
363 | 456 | /// are used. |
|
364 | 457 | pub fn hash(&self) -> &'a Node { |
|
365 | 458 | (&self.bytes[32..52]).try_into().unwrap() |
|
366 | 459 | } |
|
367 | 460 | } |
|
368 | 461 | |
|
369 | 462 | #[cfg(test)] |
|
370 | 463 | mod tests { |
|
371 | 464 | use super::*; |
|
372 | 465 | use crate::node::NULL_NODE; |
|
373 | 466 | |
|
374 | 467 | #[cfg(test)] |
|
375 | 468 | #[derive(Debug, Copy, Clone)] |
|
376 | 469 | pub struct IndexEntryBuilder { |
|
377 | 470 | is_first: bool, |
|
378 | 471 | is_inline: bool, |
|
379 | 472 | is_general_delta: bool, |
|
380 | 473 | version: u16, |
|
381 | 474 | offset: usize, |
|
382 | 475 | compressed_len: usize, |
|
383 | 476 | uncompressed_len: usize, |
|
384 | 477 | base_revision_or_base_of_delta_chain: Revision, |
|
385 | 478 | link_revision: Revision, |
|
386 | 479 | p1: Revision, |
|
387 | 480 | p2: Revision, |
|
388 | 481 | node: Node, |
|
389 | 482 | } |
|
390 | 483 | |
|
391 | 484 | #[cfg(test)] |
|
392 | 485 | impl IndexEntryBuilder { |
|
393 | 486 | #[allow(clippy::new_without_default)] |
|
394 | 487 | pub fn new() -> Self { |
|
395 | 488 | Self { |
|
396 | 489 | is_first: false, |
|
397 | 490 | is_inline: false, |
|
398 | 491 | is_general_delta: true, |
|
399 | 492 | version: 1, |
|
400 | 493 | offset: 0, |
|
401 | 494 | compressed_len: 0, |
|
402 | 495 | uncompressed_len: 0, |
|
403 | 496 | base_revision_or_base_of_delta_chain: Revision(0), |
|
404 | 497 | link_revision: Revision(0), |
|
405 | 498 | p1: NULL_REVISION, |
|
406 | 499 | p2: NULL_REVISION, |
|
407 | 500 | node: NULL_NODE, |
|
408 | 501 | } |
|
409 | 502 | } |
|
410 | 503 | |
|
411 | 504 | pub fn is_first(&mut self, value: bool) -> &mut Self { |
|
412 | 505 | self.is_first = value; |
|
413 | 506 | self |
|
414 | 507 | } |
|
415 | 508 | |
|
416 | 509 | pub fn with_inline(&mut self, value: bool) -> &mut Self { |
|
417 | 510 | self.is_inline = value; |
|
418 | 511 | self |
|
419 | 512 | } |
|
420 | 513 | |
|
421 | 514 | pub fn with_general_delta(&mut self, value: bool) -> &mut Self { |
|
422 | 515 | self.is_general_delta = value; |
|
423 | 516 | self |
|
424 | 517 | } |
|
425 | 518 | |
|
426 | 519 | pub fn with_version(&mut self, value: u16) -> &mut Self { |
|
427 | 520 | self.version = value; |
|
428 | 521 | self |
|
429 | 522 | } |
|
430 | 523 | |
|
431 | 524 | pub fn with_offset(&mut self, value: usize) -> &mut Self { |
|
432 | 525 | self.offset = value; |
|
433 | 526 | self |
|
434 | 527 | } |
|
435 | 528 | |
|
436 | 529 | pub fn with_compressed_len(&mut self, value: usize) -> &mut Self { |
|
437 | 530 | self.compressed_len = value; |
|
438 | 531 | self |
|
439 | 532 | } |
|
440 | 533 | |
|
441 | 534 | pub fn with_uncompressed_len(&mut self, value: usize) -> &mut Self { |
|
442 | 535 | self.uncompressed_len = value; |
|
443 | 536 | self |
|
444 | 537 | } |
|
445 | 538 | |
|
446 | 539 | pub fn with_base_revision_or_base_of_delta_chain( |
|
447 | 540 | &mut self, |
|
448 | 541 | value: Revision, |
|
449 | 542 | ) -> &mut Self { |
|
450 | 543 | self.base_revision_or_base_of_delta_chain = value; |
|
451 | 544 | self |
|
452 | 545 | } |
|
453 | 546 | |
|
454 | 547 | pub fn with_link_revision(&mut self, value: Revision) -> &mut Self { |
|
455 | 548 | self.link_revision = value; |
|
456 | 549 | self |
|
457 | 550 | } |
|
458 | 551 | |
|
459 | 552 | pub fn with_p1(&mut self, value: Revision) -> &mut Self { |
|
460 | 553 | self.p1 = value; |
|
461 | 554 | self |
|
462 | 555 | } |
|
463 | 556 | |
|
464 | 557 | pub fn with_p2(&mut self, value: Revision) -> &mut Self { |
|
465 | 558 | self.p2 = value; |
|
466 | 559 | self |
|
467 | 560 | } |
|
468 | 561 | |
|
469 | 562 | pub fn with_node(&mut self, value: Node) -> &mut Self { |
|
470 | 563 | self.node = value; |
|
471 | 564 | self |
|
472 | 565 | } |
|
473 | 566 | |
|
474 | 567 | pub fn build(&self) -> Vec<u8> { |
|
475 | 568 | let mut bytes = Vec::with_capacity(INDEX_ENTRY_SIZE); |
|
476 | 569 | if self.is_first { |
|
477 | 570 | bytes.extend(&match (self.is_general_delta, self.is_inline) { |
|
478 | 571 | (false, false) => [0u8, 0], |
|
479 | 572 | (false, true) => [0u8, 1], |
|
480 | 573 | (true, false) => [0u8, 2], |
|
481 | 574 | (true, true) => [0u8, 3], |
|
482 | 575 | }); |
|
483 | 576 | bytes.extend(&self.version.to_be_bytes()); |
|
484 | 577 | // Remaining offset bytes. |
|
485 | 578 | bytes.extend(&[0u8; 2]); |
|
486 | 579 | } else { |
|
487 | 580 | // Offset stored on 48 bits (6 bytes) |
|
488 | 581 | bytes.extend(&(self.offset as u64).to_be_bytes()[2..]); |
|
489 | 582 | } |
|
490 | 583 | bytes.extend(&[0u8; 2]); // Revision flags. |
|
491 | 584 | bytes.extend(&(self.compressed_len as u32).to_be_bytes()); |
|
492 | 585 | bytes.extend(&(self.uncompressed_len as u32).to_be_bytes()); |
|
493 | 586 | bytes.extend( |
|
494 | 587 | &self.base_revision_or_base_of_delta_chain.0.to_be_bytes(), |
|
495 | 588 | ); |
|
496 | 589 | bytes.extend(&self.link_revision.0.to_be_bytes()); |
|
497 | 590 | bytes.extend(&self.p1.0.to_be_bytes()); |
|
498 | 591 | bytes.extend(&self.p2.0.to_be_bytes()); |
|
499 | 592 | bytes.extend(self.node.as_bytes()); |
|
500 | 593 | bytes.extend(vec![0u8; 12]); |
|
501 | 594 | bytes |
|
502 | 595 | } |
|
503 | 596 | } |
|
504 | 597 | |
|
505 | 598 | pub fn is_inline(index_bytes: &[u8]) -> bool { |
|
506 | 599 | IndexHeader::parse(index_bytes) |
|
507 | 600 | .expect("too short") |
|
508 | 601 | .format_flags() |
|
509 | 602 | .is_inline() |
|
510 | 603 | } |
|
511 | 604 | |
|
512 | 605 | pub fn uses_generaldelta(index_bytes: &[u8]) -> bool { |
|
513 | 606 | IndexHeader::parse(index_bytes) |
|
514 | 607 | .expect("too short") |
|
515 | 608 | .format_flags() |
|
516 | 609 | .uses_generaldelta() |
|
517 | 610 | } |
|
518 | 611 | |
|
519 | 612 | pub fn get_version(index_bytes: &[u8]) -> u16 { |
|
520 | 613 | IndexHeader::parse(index_bytes) |
|
521 | 614 | .expect("too short") |
|
522 | 615 | .format_version() |
|
523 | 616 | } |
|
524 | 617 | |
|
525 | 618 | #[test] |
|
526 | 619 | fn flags_when_no_inline_flag_test() { |
|
527 | 620 | let bytes = IndexEntryBuilder::new() |
|
528 | 621 | .is_first(true) |
|
529 | 622 | .with_general_delta(false) |
|
530 | 623 | .with_inline(false) |
|
531 | 624 | .build(); |
|
532 | 625 | |
|
533 | 626 | assert!(!is_inline(&bytes)); |
|
534 | 627 | assert!(!uses_generaldelta(&bytes)); |
|
535 | 628 | } |
|
536 | 629 | |
|
537 | 630 | #[test] |
|
538 | 631 | fn flags_when_inline_flag_test() { |
|
539 | 632 | let bytes = IndexEntryBuilder::new() |
|
540 | 633 | .is_first(true) |
|
541 | 634 | .with_general_delta(false) |
|
542 | 635 | .with_inline(true) |
|
543 | 636 | .build(); |
|
544 | 637 | |
|
545 | 638 | assert!(is_inline(&bytes)); |
|
546 | 639 | assert!(!uses_generaldelta(&bytes)); |
|
547 | 640 | } |
|
548 | 641 | |
|
549 | 642 | #[test] |
|
550 | 643 | fn flags_when_inline_and_generaldelta_flags_test() { |
|
551 | 644 | let bytes = IndexEntryBuilder::new() |
|
552 | 645 | .is_first(true) |
|
553 | 646 | .with_general_delta(true) |
|
554 | 647 | .with_inline(true) |
|
555 | 648 | .build(); |
|
556 | 649 | |
|
557 | 650 | assert!(is_inline(&bytes)); |
|
558 | 651 | assert!(uses_generaldelta(&bytes)); |
|
559 | 652 | } |
|
560 | 653 | |
|
561 | 654 | #[test] |
|
562 | 655 | fn test_offset() { |
|
563 | 656 | let bytes = IndexEntryBuilder::new().with_offset(1).build(); |
|
564 | 657 | let entry = IndexEntry { |
|
565 | 658 | bytes: &bytes, |
|
566 | 659 | offset_override: None, |
|
567 | 660 | }; |
|
568 | 661 | |
|
569 | 662 | assert_eq!(entry.offset(), 1) |
|
570 | 663 | } |
|
571 | 664 | |
|
572 | 665 | #[test] |
|
573 | 666 | fn test_with_overridden_offset() { |
|
574 | 667 | let bytes = IndexEntryBuilder::new().with_offset(1).build(); |
|
575 | 668 | let entry = IndexEntry { |
|
576 | 669 | bytes: &bytes, |
|
577 | 670 | offset_override: Some(2), |
|
578 | 671 | }; |
|
579 | 672 | |
|
580 | 673 | assert_eq!(entry.offset(), 2) |
|
581 | 674 | } |
|
582 | 675 | |
|
583 | 676 | #[test] |
|
584 | 677 | fn test_compressed_len() { |
|
585 | 678 | let bytes = IndexEntryBuilder::new().with_compressed_len(1).build(); |
|
586 | 679 | let entry = IndexEntry { |
|
587 | 680 | bytes: &bytes, |
|
588 | 681 | offset_override: None, |
|
589 | 682 | }; |
|
590 | 683 | |
|
591 | 684 | assert_eq!(entry.compressed_len(), 1) |
|
592 | 685 | } |
|
593 | 686 | |
|
594 | 687 | #[test] |
|
595 | 688 | fn test_uncompressed_len() { |
|
596 | 689 | let bytes = IndexEntryBuilder::new().with_uncompressed_len(1).build(); |
|
597 | 690 | let entry = IndexEntry { |
|
598 | 691 | bytes: &bytes, |
|
599 | 692 | offset_override: None, |
|
600 | 693 | }; |
|
601 | 694 | |
|
602 | 695 | assert_eq!(entry.uncompressed_len(), 1) |
|
603 | 696 | } |
|
604 | 697 | |
|
605 | 698 | #[test] |
|
606 | 699 | fn test_base_revision_or_base_of_delta_chain() { |
|
607 | 700 | let bytes = IndexEntryBuilder::new() |
|
608 | 701 | .with_base_revision_or_base_of_delta_chain(Revision(1)) |
|
609 | 702 | .build(); |
|
610 | 703 | let entry = IndexEntry { |
|
611 | 704 | bytes: &bytes, |
|
612 | 705 | offset_override: None, |
|
613 | 706 | }; |
|
614 | 707 | |
|
615 | 708 | assert_eq!(entry.base_revision_or_base_of_delta_chain(), 1.into()) |
|
616 | 709 | } |
|
617 | 710 | |
|
618 | 711 | #[test] |
|
619 | 712 | fn link_revision_test() { |
|
620 | 713 | let bytes = IndexEntryBuilder::new() |
|
621 | 714 | .with_link_revision(Revision(123)) |
|
622 | 715 | .build(); |
|
623 | 716 | |
|
624 | 717 | let entry = IndexEntry { |
|
625 | 718 | bytes: &bytes, |
|
626 | 719 | offset_override: None, |
|
627 | 720 | }; |
|
628 | 721 | |
|
629 | 722 | assert_eq!(entry.link_revision(), 123.into()); |
|
630 | 723 | } |
|
631 | 724 | |
|
632 | 725 | #[test] |
|
633 | 726 | fn p1_test() { |
|
634 | 727 | let bytes = IndexEntryBuilder::new().with_p1(Revision(123)).build(); |
|
635 | 728 | |
|
636 | 729 | let entry = IndexEntry { |
|
637 | 730 | bytes: &bytes, |
|
638 | 731 | offset_override: None, |
|
639 | 732 | }; |
|
640 | 733 | |
|
641 | 734 | assert_eq!(entry.p1(), 123.into()); |
|
642 | 735 | } |
|
643 | 736 | |
|
644 | 737 | #[test] |
|
645 | 738 | fn p2_test() { |
|
646 | 739 | let bytes = IndexEntryBuilder::new().with_p2(Revision(123)).build(); |
|
647 | 740 | |
|
648 | 741 | let entry = IndexEntry { |
|
649 | 742 | bytes: &bytes, |
|
650 | 743 | offset_override: None, |
|
651 | 744 | }; |
|
652 | 745 | |
|
653 | 746 | assert_eq!(entry.p2(), 123.into()); |
|
654 | 747 | } |
|
655 | 748 | |
|
656 | 749 | #[test] |
|
657 | 750 | fn node_test() { |
|
658 | 751 | let node = Node::from_hex("0123456789012345678901234567890123456789") |
|
659 | 752 | .unwrap(); |
|
660 | 753 | let bytes = IndexEntryBuilder::new().with_node(node).build(); |
|
661 | 754 | |
|
662 | 755 | let entry = IndexEntry { |
|
663 | 756 | bytes: &bytes, |
|
664 | 757 | offset_override: None, |
|
665 | 758 | }; |
|
666 | 759 | |
|
667 | 760 | assert_eq!(*entry.hash(), node); |
|
668 | 761 | } |
|
669 | 762 | |
|
670 | 763 | #[test] |
|
671 | 764 | fn version_test() { |
|
672 | 765 | let bytes = IndexEntryBuilder::new() |
|
673 | 766 | .is_first(true) |
|
674 | 767 | .with_version(2) |
|
675 | 768 | .build(); |
|
676 | 769 | |
|
677 | 770 | assert_eq!(get_version(&bytes), 2) |
|
678 | 771 | } |
|
679 | 772 | } |
|
680 | 773 | |
|
681 | 774 | #[cfg(test)] |
|
682 | 775 | pub use tests::IndexEntryBuilder; |
@@ -1,429 +1,433 b'' | |||
|
1 | 1 | // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net> |
|
2 | 2 | // |
|
3 | 3 | // This software may be used and distributed according to the terms of the |
|
4 | 4 | // GNU General Public License version 2 or any later version. |
|
5 | 5 | |
|
6 | 6 | //! Definitions and utilities for Revision nodes |
|
7 | 7 | //! |
|
8 | 8 | //! In Mercurial code base, it is customary to call "a node" the binary SHA |
|
9 | 9 | //! of a revision. |
|
10 | 10 | |
|
11 | 11 | use crate::errors::HgError; |
|
12 | 12 | use bytes_cast::BytesCast; |
|
13 | 13 | use std::fmt; |
|
14 | 14 | |
|
15 | 15 | /// The length in bytes of a `Node` |
|
16 | 16 | /// |
|
17 | 17 | /// This constant is meant to ease refactors of this module, and |
|
18 | 18 | /// are private so that calling code does not expect all nodes have |
|
19 | 19 | /// the same size, should we support several formats concurrently in |
|
20 | 20 | /// the future. |
|
21 | 21 | pub const NODE_BYTES_LENGTH: usize = 20; |
|
22 | 22 | |
|
23 | /// The length in bytes set aside on disk for a `Node`. Revlog up to v1 only | |
|
24 | /// use 20 out of those 32. | |
|
25 | pub const STORED_NODE_ID_BYTES: usize = 32; | |
|
26 | ||
|
23 | 27 | /// Id of the null node. |
|
24 | 28 | /// |
|
25 | 29 | /// Used to indicate the absence of node. |
|
26 | 30 | pub const NULL_NODE_ID: [u8; NODE_BYTES_LENGTH] = [0u8; NODE_BYTES_LENGTH]; |
|
27 | 31 | |
|
28 | 32 | /// The length in bytes of a `Node` |
|
29 | 33 | /// |
|
30 | 34 | /// see also `NODES_BYTES_LENGTH` about it being private. |
|
31 | 35 | const NODE_NYBBLES_LENGTH: usize = 2 * NODE_BYTES_LENGTH; |
|
32 | 36 | |
|
33 | 37 | /// Default for UI presentation |
|
34 | 38 | const SHORT_PREFIX_DEFAULT_NYBBLES_LENGTH: u8 = 12; |
|
35 | 39 | |
|
36 | 40 | /// Private alias for readability and to ease future change |
|
37 | 41 | type NodeData = [u8; NODE_BYTES_LENGTH]; |
|
38 | 42 | |
|
39 | 43 | /// Binary revision SHA |
|
40 | 44 | /// |
|
41 | 45 | /// ## Future changes of hash size |
|
42 | 46 | /// |
|
43 | 47 | /// To accomodate future changes of hash size, Rust callers |
|
44 | 48 | /// should use the conversion methods at the boundaries (FFI, actual |
|
45 | 49 | /// computation of hashes and I/O) only, and only if required. |
|
46 | 50 | /// |
|
47 | 51 | /// All other callers outside of unit tests should just handle `Node` values |
|
48 | 52 | /// and never make any assumption on the actual length, using [`nybbles_len`] |
|
49 | 53 | /// if they need a loop boundary. |
|
50 | 54 | /// |
|
51 | 55 | /// All methods that create a `Node` either take a type that enforces |
|
52 | 56 | /// the size or return an error at runtime. |
|
53 | 57 | /// |
|
54 | 58 | /// [`nybbles_len`]: #method.nybbles_len |
|
55 | 59 | #[derive(Copy, Clone, PartialEq, BytesCast, derive_more::From)] |
|
56 | 60 | #[repr(transparent)] |
|
57 | 61 | pub struct Node { |
|
58 | 62 | data: NodeData, |
|
59 | 63 | } |
|
60 | 64 | |
|
61 | 65 | impl fmt::Debug for Node { |
|
62 | 66 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
|
63 | 67 | let n = format!("{:x?}", self.data); |
|
64 | 68 | // We're using debug_tuple because it makes the output a little |
|
65 | 69 | // more compact without losing data. |
|
66 | 70 | f.debug_tuple("Node").field(&n).finish() |
|
67 | 71 | } |
|
68 | 72 | } |
|
69 | 73 | |
|
70 | 74 | /// The node value for NULL_REVISION |
|
71 | 75 | pub const NULL_NODE: Node = Node { |
|
72 | 76 | data: [0; NODE_BYTES_LENGTH], |
|
73 | 77 | }; |
|
74 | 78 | |
|
75 | 79 | /// Return an error if the slice has an unexpected length |
|
76 | 80 | impl<'a> TryFrom<&'a [u8]> for &'a Node { |
|
77 | 81 | type Error = (); |
|
78 | 82 | |
|
79 | 83 | #[inline] |
|
80 | 84 | fn try_from(bytes: &'a [u8]) -> Result<Self, Self::Error> { |
|
81 | 85 | match Node::from_bytes(bytes) { |
|
82 | 86 | Ok((node, rest)) if rest.is_empty() => Ok(node), |
|
83 | 87 | _ => Err(()), |
|
84 | 88 | } |
|
85 | 89 | } |
|
86 | 90 | } |
|
87 | 91 | |
|
88 | 92 | /// Return an error if the slice has an unexpected length |
|
89 | 93 | impl TryFrom<&'_ [u8]> for Node { |
|
90 | 94 | type Error = std::array::TryFromSliceError; |
|
91 | 95 | |
|
92 | 96 | #[inline] |
|
93 | 97 | fn try_from(bytes: &'_ [u8]) -> Result<Self, Self::Error> { |
|
94 | 98 | let data = bytes.try_into()?; |
|
95 | 99 | Ok(Self { data }) |
|
96 | 100 | } |
|
97 | 101 | } |
|
98 | 102 | |
|
99 | 103 | impl From<&'_ NodeData> for Node { |
|
100 | 104 | #[inline] |
|
101 | 105 | fn from(data: &'_ NodeData) -> Self { |
|
102 | 106 | Self { data: *data } |
|
103 | 107 | } |
|
104 | 108 | } |
|
105 | 109 | |
|
106 | 110 | impl fmt::LowerHex for Node { |
|
107 | 111 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
|
108 | 112 | for &byte in &self.data { |
|
109 | 113 | write!(f, "{:02x}", byte)? |
|
110 | 114 | } |
|
111 | 115 | Ok(()) |
|
112 | 116 | } |
|
113 | 117 | } |
|
114 | 118 | |
|
115 | 119 | #[derive(Debug)] |
|
116 | 120 | pub struct FromHexError; |
|
117 | 121 | |
|
118 | 122 | /// Low level utility function, also for prefixes |
|
119 | 123 | fn get_nybble(s: &[u8], i: usize) -> u8 { |
|
120 | 124 | if i % 2 == 0 { |
|
121 | 125 | s[i / 2] >> 4 |
|
122 | 126 | } else { |
|
123 | 127 | s[i / 2] & 0x0f |
|
124 | 128 | } |
|
125 | 129 | } |
|
126 | 130 | |
|
127 | 131 | impl Node { |
|
128 | 132 | /// Retrieve the `i`th half-byte of the binary data. |
|
129 | 133 | /// |
|
130 | 134 | /// This is also the `i`th hexadecimal digit in numeric form, |
|
131 | 135 | /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble). |
|
132 | 136 | pub fn get_nybble(&self, i: usize) -> u8 { |
|
133 | 137 | get_nybble(&self.data, i) |
|
134 | 138 | } |
|
135 | 139 | |
|
136 | 140 | /// Length of the data, in nybbles |
|
137 | 141 | pub fn nybbles_len(&self) -> usize { |
|
138 | 142 | // public exposure as an instance method only, so that we can |
|
139 | 143 | // easily support several sizes of hashes if needed in the future. |
|
140 | 144 | NODE_NYBBLES_LENGTH |
|
141 | 145 | } |
|
142 | 146 | |
|
143 | 147 | /// Convert from hexadecimal string representation |
|
144 | 148 | /// |
|
145 | 149 | /// Exact length is required. |
|
146 | 150 | /// |
|
147 | 151 | /// To be used in FFI and I/O only, in order to facilitate future |
|
148 | 152 | /// changes of hash format. |
|
149 | 153 | pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Node, FromHexError> { |
|
150 | 154 | let prefix = NodePrefix::from_hex(hex)?; |
|
151 | 155 | if prefix.nybbles_len() == NODE_NYBBLES_LENGTH { |
|
152 | 156 | Ok(Self { data: prefix.data }) |
|
153 | 157 | } else { |
|
154 | 158 | Err(FromHexError) |
|
155 | 159 | } |
|
156 | 160 | } |
|
157 | 161 | |
|
158 | 162 | /// `from_hex`, but for input from an internal file of the repository such |
|
159 | 163 | /// as a changelog or manifest entry. |
|
160 | 164 | /// |
|
161 | 165 | /// An error is treated as repository corruption. |
|
162 | 166 | pub fn from_hex_for_repo(hex: impl AsRef<[u8]>) -> Result<Node, HgError> { |
|
163 | 167 | Self::from_hex(hex.as_ref()).map_err(|FromHexError| { |
|
164 | 168 | HgError::CorruptedRepository(format!( |
|
165 | 169 | "Expected a full hexadecimal node ID, found {}", |
|
166 | 170 | String::from_utf8_lossy(hex.as_ref()) |
|
167 | 171 | )) |
|
168 | 172 | }) |
|
169 | 173 | } |
|
170 | 174 | |
|
171 | 175 | /// Provide access to binary data |
|
172 | 176 | /// |
|
173 | 177 | /// This is needed by FFI layers, for instance to return expected |
|
174 | 178 | /// binary values to Python. |
|
175 | 179 | pub fn as_bytes(&self) -> &[u8] { |
|
176 | 180 | &self.data |
|
177 | 181 | } |
|
178 | 182 | |
|
179 | 183 | pub fn short(&self) -> NodePrefix { |
|
180 | 184 | NodePrefix { |
|
181 | 185 | nybbles_len: SHORT_PREFIX_DEFAULT_NYBBLES_LENGTH, |
|
182 | 186 | data: self.data, |
|
183 | 187 | } |
|
184 | 188 | } |
|
185 | 189 | |
|
186 | 190 | pub fn pad_to_256_bits(&self) -> [u8; 32] { |
|
187 | 191 | let mut bits = [0; 32]; |
|
188 | 192 | bits[..NODE_BYTES_LENGTH].copy_from_slice(&self.data); |
|
189 | 193 | bits |
|
190 | 194 | } |
|
191 | 195 | } |
|
192 | 196 | |
|
193 | 197 | /// The beginning of a binary revision SHA. |
|
194 | 198 | /// |
|
195 | 199 | /// Since it can potentially come from an hexadecimal representation with |
|
196 | 200 | /// odd length, it needs to carry around whether the last 4 bits are relevant |
|
197 | 201 | /// or not. |
|
198 | 202 | #[derive(Debug, PartialEq, Copy, Clone)] |
|
199 | 203 | pub struct NodePrefix { |
|
200 | 204 | /// In `1..=NODE_NYBBLES_LENGTH` |
|
201 | 205 | nybbles_len: u8, |
|
202 | 206 | /// The first `4 * length_in_nybbles` bits are used (considering bits |
|
203 | 207 | /// within a bytes in big-endian: most significant first), the rest |
|
204 | 208 | /// are zero. |
|
205 | 209 | data: NodeData, |
|
206 | 210 | } |
|
207 | 211 | |
|
208 | 212 | impl NodePrefix { |
|
209 | 213 | /// Convert from hexadecimal string representation |
|
210 | 214 | /// |
|
211 | 215 | /// Similarly to `hex::decode`, can be used with Unicode string types |
|
212 | 216 | /// (`String`, `&str`) as well as bytes. |
|
213 | 217 | /// |
|
214 | 218 | /// To be used in FFI and I/O only, in order to facilitate future |
|
215 | 219 | /// changes of hash format. |
|
216 | 220 | pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Self, FromHexError> { |
|
217 | 221 | let hex = hex.as_ref(); |
|
218 | 222 | let len = hex.len(); |
|
219 | 223 | if len > NODE_NYBBLES_LENGTH || len == 0 { |
|
220 | 224 | return Err(FromHexError); |
|
221 | 225 | } |
|
222 | 226 | |
|
223 | 227 | let mut data = [0; NODE_BYTES_LENGTH]; |
|
224 | 228 | let mut nybbles_len = 0; |
|
225 | 229 | for &ascii_byte in hex { |
|
226 | 230 | let nybble = match char::from(ascii_byte).to_digit(16) { |
|
227 | 231 | Some(digit) => digit as u8, |
|
228 | 232 | None => return Err(FromHexError), |
|
229 | 233 | }; |
|
230 | 234 | // Fill in the upper half of a byte first, then the lower half. |
|
231 | 235 | let shift = if nybbles_len % 2 == 0 { 4 } else { 0 }; |
|
232 | 236 | data[nybbles_len as usize / 2] |= nybble << shift; |
|
233 | 237 | nybbles_len += 1; |
|
234 | 238 | } |
|
235 | 239 | Ok(Self { data, nybbles_len }) |
|
236 | 240 | } |
|
237 | 241 | |
|
238 | 242 | pub fn nybbles_len(&self) -> usize { |
|
239 | 243 | self.nybbles_len as _ |
|
240 | 244 | } |
|
241 | 245 | |
|
242 | 246 | pub fn is_prefix_of(&self, node: &Node) -> bool { |
|
243 | 247 | let full_bytes = self.nybbles_len() / 2; |
|
244 | 248 | if self.data[..full_bytes] != node.data[..full_bytes] { |
|
245 | 249 | return false; |
|
246 | 250 | } |
|
247 | 251 | if self.nybbles_len() % 2 == 0 { |
|
248 | 252 | return true; |
|
249 | 253 | } |
|
250 | 254 | let last = self.nybbles_len() - 1; |
|
251 | 255 | self.get_nybble(last) == node.get_nybble(last) |
|
252 | 256 | } |
|
253 | 257 | |
|
254 | 258 | /// Retrieve the `i`th half-byte from the prefix. |
|
255 | 259 | /// |
|
256 | 260 | /// This is also the `i`th hexadecimal digit in numeric form, |
|
257 | 261 | /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble). |
|
258 | 262 | pub fn get_nybble(&self, i: usize) -> u8 { |
|
259 | 263 | assert!(i < self.nybbles_len()); |
|
260 | 264 | get_nybble(&self.data, i) |
|
261 | 265 | } |
|
262 | 266 | |
|
263 | 267 | fn iter_nybbles(&self) -> impl Iterator<Item = u8> + '_ { |
|
264 | 268 | (0..self.nybbles_len()).map(move |i| get_nybble(&self.data, i)) |
|
265 | 269 | } |
|
266 | 270 | |
|
267 | 271 | /// Return the index first nybble that's different from `node` |
|
268 | 272 | /// |
|
269 | 273 | /// If the return value is `None` that means that `self` is |
|
270 | 274 | /// a prefix of `node`, but the current method is a bit slower |
|
271 | 275 | /// than `is_prefix_of`. |
|
272 | 276 | /// |
|
273 | 277 | /// Returned index is as in `get_nybble`, i.e., starting at 0. |
|
274 | 278 | pub fn first_different_nybble(&self, node: &Node) -> Option<usize> { |
|
275 | 279 | self.iter_nybbles() |
|
276 | 280 | .zip(NodePrefix::from(*node).iter_nybbles()) |
|
277 | 281 | .position(|(a, b)| a != b) |
|
278 | 282 | } |
|
279 | 283 | } |
|
280 | 284 | |
|
281 | 285 | impl fmt::LowerHex for NodePrefix { |
|
282 | 286 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
|
283 | 287 | let full_bytes = self.nybbles_len() / 2; |
|
284 | 288 | for &byte in &self.data[..full_bytes] { |
|
285 | 289 | write!(f, "{:02x}", byte)? |
|
286 | 290 | } |
|
287 | 291 | if self.nybbles_len() % 2 == 1 { |
|
288 | 292 | let last = self.nybbles_len() - 1; |
|
289 | 293 | write!(f, "{:x}", self.get_nybble(last))? |
|
290 | 294 | } |
|
291 | 295 | Ok(()) |
|
292 | 296 | } |
|
293 | 297 | } |
|
294 | 298 | |
|
295 | 299 | /// A shortcut for full `Node` references |
|
296 | 300 | impl From<&'_ Node> for NodePrefix { |
|
297 | 301 | fn from(node: &'_ Node) -> Self { |
|
298 | 302 | NodePrefix { |
|
299 | 303 | nybbles_len: node.nybbles_len() as _, |
|
300 | 304 | data: node.data, |
|
301 | 305 | } |
|
302 | 306 | } |
|
303 | 307 | } |
|
304 | 308 | |
|
305 | 309 | /// A shortcut for full `Node` references |
|
306 | 310 | impl From<Node> for NodePrefix { |
|
307 | 311 | fn from(node: Node) -> Self { |
|
308 | 312 | NodePrefix { |
|
309 | 313 | nybbles_len: node.nybbles_len() as _, |
|
310 | 314 | data: node.data, |
|
311 | 315 | } |
|
312 | 316 | } |
|
313 | 317 | } |
|
314 | 318 | |
|
315 | 319 | impl PartialEq<Node> for NodePrefix { |
|
316 | 320 | fn eq(&self, other: &Node) -> bool { |
|
317 | 321 | self.data == other.data && self.nybbles_len() == other.nybbles_len() |
|
318 | 322 | } |
|
319 | 323 | } |
|
320 | 324 | |
|
321 | 325 | #[cfg(test)] |
|
322 | 326 | mod tests { |
|
323 | 327 | use super::*; |
|
324 | 328 | |
|
325 | 329 | const SAMPLE_NODE_HEX: &str = "0123456789abcdeffedcba9876543210deadbeef"; |
|
326 | 330 | const SAMPLE_NODE: Node = Node { |
|
327 | 331 | data: [ |
|
328 | 332 | 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba, |
|
329 | 333 | 0x98, 0x76, 0x54, 0x32, 0x10, 0xde, 0xad, 0xbe, 0xef, |
|
330 | 334 | ], |
|
331 | 335 | }; |
|
332 | 336 | |
|
333 | 337 | /// Pad an hexadecimal string to reach `NODE_NYBBLES_LENGTH` |
|
334 | 338 | /// The padding is made with zeros. |
|
335 | 339 | pub fn hex_pad_right(hex: &str) -> String { |
|
336 | 340 | let mut res = hex.to_string(); |
|
337 | 341 | while res.len() < NODE_NYBBLES_LENGTH { |
|
338 | 342 | res.push('0'); |
|
339 | 343 | } |
|
340 | 344 | res |
|
341 | 345 | } |
|
342 | 346 | |
|
343 | 347 | #[test] |
|
344 | 348 | fn test_node_from_hex() { |
|
345 | 349 | let not_hex = "012... oops"; |
|
346 | 350 | let too_short = "0123"; |
|
347 | 351 | let too_long = format!("{}0", SAMPLE_NODE_HEX); |
|
348 | 352 | assert_eq!(Node::from_hex(SAMPLE_NODE_HEX).unwrap(), SAMPLE_NODE); |
|
349 | 353 | assert!(Node::from_hex(not_hex).is_err()); |
|
350 | 354 | assert!(Node::from_hex(too_short).is_err()); |
|
351 | 355 | assert!(Node::from_hex(too_long).is_err()); |
|
352 | 356 | } |
|
353 | 357 | |
|
354 | 358 | #[test] |
|
355 | 359 | fn test_node_encode_hex() { |
|
356 | 360 | assert_eq!(format!("{:x}", SAMPLE_NODE), SAMPLE_NODE_HEX); |
|
357 | 361 | } |
|
358 | 362 | |
|
359 | 363 | #[test] |
|
360 | 364 | fn test_prefix_from_to_hex() -> Result<(), FromHexError> { |
|
361 | 365 | assert_eq!(format!("{:x}", NodePrefix::from_hex("0e1")?), "0e1"); |
|
362 | 366 | assert_eq!(format!("{:x}", NodePrefix::from_hex("0e1a")?), "0e1a"); |
|
363 | 367 | assert_eq!( |
|
364 | 368 | format!("{:x}", NodePrefix::from_hex(SAMPLE_NODE_HEX)?), |
|
365 | 369 | SAMPLE_NODE_HEX |
|
366 | 370 | ); |
|
367 | 371 | Ok(()) |
|
368 | 372 | } |
|
369 | 373 | |
|
370 | 374 | #[test] |
|
371 | 375 | fn test_prefix_from_hex_errors() { |
|
372 | 376 | assert!(NodePrefix::from_hex("testgr").is_err()); |
|
373 | 377 | let mut long = format!("{:x}", NULL_NODE); |
|
374 | 378 | long.push('c'); |
|
375 | 379 | assert!(NodePrefix::from_hex(&long).is_err()) |
|
376 | 380 | } |
|
377 | 381 | |
|
378 | 382 | #[test] |
|
379 | 383 | fn test_is_prefix_of() -> Result<(), FromHexError> { |
|
380 | 384 | let mut node_data = [0; NODE_BYTES_LENGTH]; |
|
381 | 385 | node_data[0] = 0x12; |
|
382 | 386 | node_data[1] = 0xca; |
|
383 | 387 | let node = Node::from(node_data); |
|
384 | 388 | assert!(NodePrefix::from_hex("12")?.is_prefix_of(&node)); |
|
385 | 389 | assert!(!NodePrefix::from_hex("1a")?.is_prefix_of(&node)); |
|
386 | 390 | assert!(NodePrefix::from_hex("12c")?.is_prefix_of(&node)); |
|
387 | 391 | assert!(!NodePrefix::from_hex("12d")?.is_prefix_of(&node)); |
|
388 | 392 | Ok(()) |
|
389 | 393 | } |
|
390 | 394 | |
|
391 | 395 | #[test] |
|
392 | 396 | fn test_get_nybble() -> Result<(), FromHexError> { |
|
393 | 397 | let prefix = NodePrefix::from_hex("dead6789cafe")?; |
|
394 | 398 | assert_eq!(prefix.get_nybble(0), 13); |
|
395 | 399 | assert_eq!(prefix.get_nybble(7), 9); |
|
396 | 400 | Ok(()) |
|
397 | 401 | } |
|
398 | 402 | |
|
399 | 403 | #[test] |
|
400 | 404 | fn test_first_different_nybble_even_prefix() { |
|
401 | 405 | let prefix = NodePrefix::from_hex("12ca").unwrap(); |
|
402 | 406 | let mut node = Node::from([0; NODE_BYTES_LENGTH]); |
|
403 | 407 | assert_eq!(prefix.first_different_nybble(&node), Some(0)); |
|
404 | 408 | node.data[0] = 0x13; |
|
405 | 409 | assert_eq!(prefix.first_different_nybble(&node), Some(1)); |
|
406 | 410 | node.data[0] = 0x12; |
|
407 | 411 | assert_eq!(prefix.first_different_nybble(&node), Some(2)); |
|
408 | 412 | node.data[1] = 0xca; |
|
409 | 413 | // now it is a prefix |
|
410 | 414 | assert_eq!(prefix.first_different_nybble(&node), None); |
|
411 | 415 | } |
|
412 | 416 | |
|
413 | 417 | #[test] |
|
414 | 418 | fn test_first_different_nybble_odd_prefix() { |
|
415 | 419 | let prefix = NodePrefix::from_hex("12c").unwrap(); |
|
416 | 420 | let mut node = Node::from([0; NODE_BYTES_LENGTH]); |
|
417 | 421 | assert_eq!(prefix.first_different_nybble(&node), Some(0)); |
|
418 | 422 | node.data[0] = 0x13; |
|
419 | 423 | assert_eq!(prefix.first_different_nybble(&node), Some(1)); |
|
420 | 424 | node.data[0] = 0x12; |
|
421 | 425 | assert_eq!(prefix.first_different_nybble(&node), Some(2)); |
|
422 | 426 | node.data[1] = 0xca; |
|
423 | 427 | // now it is a prefix |
|
424 | 428 | assert_eq!(prefix.first_different_nybble(&node), None); |
|
425 | 429 | } |
|
426 | 430 | } |
|
427 | 431 | |
|
428 | 432 | #[cfg(test)] |
|
429 | 433 | pub use tests::hex_pad_right; |
General Comments 0
You need to be logged in to leave comments.
Login now