##// END OF EJS Templates
rustdoc: fixed warnings about links...
Georges Racinet -
r51272:331a3cbe default
parent child Browse files
Show More
@@ -1,119 +1,119 b''
1 1 use std::fs;
2 2 use std::io;
3 3 use std::os::unix::fs::{MetadataExt, PermissionsExt};
4 4 use std::path::Path;
5 5
6 6 const EXECFLAGS: u32 = 0o111;
7 7
8 8 fn is_executable(path: impl AsRef<Path>) -> Result<bool, io::Error> {
9 9 let metadata = fs::metadata(path)?;
10 10 let mode = metadata.mode();
11 11 Ok(mode & EXECFLAGS != 0)
12 12 }
13 13
14 14 fn make_executable(path: impl AsRef<Path>) -> Result<(), io::Error> {
15 15 let mode = fs::metadata(path.as_ref())?.mode();
16 16 fs::set_permissions(
17 17 path,
18 18 fs::Permissions::from_mode((mode & 0o777) | EXECFLAGS),
19 19 )?;
20 20 Ok(())
21 21 }
22 22
23 23 fn copy_mode(
24 24 src: impl AsRef<Path>,
25 25 dst: impl AsRef<Path>,
26 26 ) -> Result<(), io::Error> {
27 27 let mode = match fs::symlink_metadata(src) {
28 28 Ok(metadata) => metadata.mode(),
29 29 Err(e) if e.kind() == io::ErrorKind::NotFound =>
30 30 // copymode in python has a more complicated handling of FileNotFound
31 31 // error, which we don't need because all it does is applying
32 32 // umask, which the OS already does when we mkdir.
33 33 {
34 34 return Ok(())
35 35 }
36 36 Err(e) => return Err(e),
37 37 };
38 38 fs::set_permissions(dst, fs::Permissions::from_mode(mode))?;
39 39 Ok(())
40 40 }
41 41
42 42 fn check_exec_impl(path: impl AsRef<Path>) -> Result<bool, io::Error> {
43 43 let basedir = path.as_ref().join(".hg");
44 44 let cachedir = basedir.join("wcache");
45 45 let storedir = basedir.join("store");
46 46
47 47 if !cachedir.exists() {
48 48 // we want to create the 'cache' directory, not the '.hg' one.
49 49 // Automatically creating '.hg' directory could silently spawn
50 50 // invalid Mercurial repositories. That seems like a bad idea.
51 51 fs::create_dir(&cachedir)
52 52 .and_then(|()| {
53 53 if storedir.exists() {
54 54 copy_mode(&storedir, &cachedir)
55 55 } else {
56 56 copy_mode(&basedir, &cachedir)
57 57 }
58 58 })
59 59 .ok();
60 60 }
61 61
62 62 let leave_file: bool;
63 63 let checkdir: &Path;
64 64 let checkisexec = cachedir.join("checkisexec");
65 65 let checknoexec = cachedir.join("checknoexec");
66 66 if cachedir.is_dir() {
67 67 // Check if both files already exist in cache and have correct
68 68 // permissions. if so, we assume that permissions work.
69 69 // If not, we delete the files and try again.
70 70 match is_executable(&checkisexec) {
71 71 Err(e) if e.kind() == io::ErrorKind::NotFound => (),
72 72 Err(e) => return Err(e),
73 73 Ok(is_exec) => {
74 74 if is_exec {
75 75 let noexec_is_exec = match is_executable(&checknoexec) {
76 76 Err(e) if e.kind() == io::ErrorKind::NotFound => {
77 77 fs::write(&checknoexec, "")?;
78 78 is_executable(&checknoexec)?
79 79 }
80 80 Err(e) => return Err(e),
81 81 Ok(exec) => exec,
82 82 };
83 83 if !noexec_is_exec {
84 84 // check-exec is exec and check-no-exec is not exec
85 85 return Ok(true);
86 86 }
87 87 fs::remove_file(&checknoexec)?;
88 88 }
89 89 fs::remove_file(&checkisexec)?;
90 90 }
91 91 }
92 92 checkdir = &cachedir;
93 93 leave_file = true;
94 94 } else {
95 95 // no cache directory (probably because .hg doesn't exist):
96 96 // check directly in `path` and don't leave the temp file behind
97 97 checkdir = path.as_ref();
98 98 leave_file = false;
99 99 };
100 100
101 101 let tmp_file = tempfile::NamedTempFile::new_in(checkdir)?;
102 102 if !is_executable(tmp_file.path())? {
103 103 make_executable(tmp_file.path())?;
104 104 if is_executable(tmp_file.path())? {
105 105 if leave_file {
106 106 tmp_file.persist(checkisexec).ok();
107 107 }
108 108 return Ok(true);
109 109 }
110 110 }
111 111
112 112 Ok(false)
113 113 }
114 114
115 /// This function is a rust rewrite of [checkexec] function from [posix.py]
115 /// This function is a rust rewrite of `checkexec` function from `posix.py`
116 116 /// Returns true if the filesystem supports execute permissions.
117 117 pub fn check_exec(path: impl AsRef<Path>) -> bool {
118 118 check_exec_impl(path).unwrap_or(false)
119 119 }
@@ -1,1069 +1,1069 b''
1 1 // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net>
2 2 // and Mercurial contributors
3 3 //
4 4 // This software may be used and distributed according to the terms of the
5 5 // GNU General Public License version 2 or any later version.
6 6 //! Indexing facilities for fast retrieval of `Revision` from `Node`
7 7 //!
8 8 //! This provides a variation on the 16-ary radix tree that is
9 9 //! provided as "nodetree" in revlog.c, ready for append-only persistence
10 10 //! on disk.
11 11 //!
12 12 //! Following existing implicit conventions, the "nodemap" terminology
13 13 //! is used in a more abstract context.
14 14
15 15 use super::{
16 16 node::NULL_NODE, Node, NodePrefix, Revision, RevlogIndex, NULL_REVISION,
17 17 };
18 18
19 19 use bytes_cast::{unaligned, BytesCast};
20 20 use std::cmp::max;
21 21 use std::fmt;
22 22 use std::mem::{self, align_of, size_of};
23 23 use std::ops::Deref;
24 24 use std::ops::Index;
25 25
26 26 #[derive(Debug, PartialEq)]
27 27 pub enum NodeMapError {
28 28 MultipleResults,
29 29 /// A `Revision` stored in the nodemap could not be found in the index
30 30 RevisionNotInIndex(Revision),
31 31 }
32 32
33 33 /// Mapping system from Mercurial nodes to revision numbers.
34 34 ///
35 35 /// ## `RevlogIndex` and `NodeMap`
36 36 ///
37 37 /// One way to think about their relationship is that
38 38 /// the `NodeMap` is a prefix-oriented reverse index of the `Node` information
39 39 /// carried by a [`RevlogIndex`].
40 40 ///
41 41 /// Many of the methods in this trait take a `RevlogIndex` argument
42 42 /// which is used for validation of their results. This index must naturally
43 43 /// be the one the `NodeMap` is about, and it must be consistent.
44 44 ///
45 45 /// Notably, the `NodeMap` must not store
46 46 /// information about more `Revision` values than there are in the index.
47 47 /// In these methods, an encountered `Revision` is not in the index, a
48 48 /// [`RevisionNotInIndex`] error is returned.
49 49 ///
50 50 /// In insert operations, the rule is thus that the `NodeMap` must always
51 51 /// be updated after the `RevlogIndex`
52 52 /// be updated first, and the `NodeMap` second.
53 53 ///
54 54 /// [`RevisionNotInIndex`]: enum.NodeMapError.html#variant.RevisionNotInIndex
55 55 /// [`RevlogIndex`]: ../trait.RevlogIndex.html
56 56 pub trait NodeMap {
57 57 /// Find the unique `Revision` having the given `Node`
58 58 ///
59 59 /// If no Revision matches the given `Node`, `Ok(None)` is returned.
60 60 fn find_node(
61 61 &self,
62 62 index: &impl RevlogIndex,
63 63 node: &Node,
64 64 ) -> Result<Option<Revision>, NodeMapError> {
65 65 self.find_bin(index, node.into())
66 66 }
67 67
68 68 /// Find the unique Revision whose `Node` starts with a given binary prefix
69 69 ///
70 70 /// If no Revision matches the given prefix, `Ok(None)` is returned.
71 71 ///
72 /// If several Revisions match the given prefix, a [`MultipleResults`]
73 /// error is returned.
72 /// If several Revisions match the given prefix, a
73 /// [MultipleResults](NodeMapError) error is returned.
74 74 fn find_bin(
75 75 &self,
76 76 idx: &impl RevlogIndex,
77 77 prefix: NodePrefix,
78 78 ) -> Result<Option<Revision>, NodeMapError>;
79 79
80 80 /// Give the size of the shortest node prefix that determines
81 81 /// the revision uniquely.
82 82 ///
83 83 /// From a binary node prefix, if it is matched in the node map, this
84 84 /// returns the number of hexadecimal digits that would had sufficed
85 85 /// to find the revision uniquely.
86 86 ///
87 87 /// Returns `None` if no `Revision` could be found for the prefix.
88 88 ///
89 /// If several Revisions match the given prefix, a [`MultipleResults`]
90 /// error is returned.
89 /// If several Revisions match the given prefix, a
90 /// [MultipleResults](NodeMapError) error is returned.
91 91 fn unique_prefix_len_bin(
92 92 &self,
93 93 idx: &impl RevlogIndex,
94 94 node_prefix: NodePrefix,
95 95 ) -> Result<Option<usize>, NodeMapError>;
96 96
97 97 /// Same as `unique_prefix_len_bin`, with a full `Node` as input
98 98 fn unique_prefix_len_node(
99 99 &self,
100 100 idx: &impl RevlogIndex,
101 101 node: &Node,
102 102 ) -> Result<Option<usize>, NodeMapError> {
103 103 self.unique_prefix_len_bin(idx, node.into())
104 104 }
105 105 }
106 106
107 107 pub trait MutableNodeMap: NodeMap {
108 108 fn insert<I: RevlogIndex>(
109 109 &mut self,
110 110 index: &I,
111 111 node: &Node,
112 112 rev: Revision,
113 113 ) -> Result<(), NodeMapError>;
114 114 }
115 115
116 /// Low level NodeTree [`Blocks`] elements
116 /// Low level NodeTree [`Block`] elements
117 117 ///
118 118 /// These are exactly as for instance on persistent storage.
119 119 type RawElement = unaligned::I32Be;
120 120
121 121 /// High level representation of values in NodeTree
122 122 /// [`Blocks`](struct.Block.html)
123 123 ///
124 124 /// This is the high level representation that most algorithms should
125 125 /// use.
126 126 #[derive(Clone, Debug, Eq, PartialEq)]
127 127 enum Element {
128 128 Rev(Revision),
129 129 Block(usize),
130 130 None,
131 131 }
132 132
133 133 impl From<RawElement> for Element {
134 134 /// Conversion from low level representation, after endianness conversion.
135 135 ///
136 136 /// See [`Block`](struct.Block.html) for explanation about the encoding.
137 137 fn from(raw: RawElement) -> Element {
138 138 let int = raw.get();
139 139 if int >= 0 {
140 140 Element::Block(int as usize)
141 141 } else if int == -1 {
142 142 Element::None
143 143 } else {
144 144 Element::Rev(-int - 2)
145 145 }
146 146 }
147 147 }
148 148
149 149 impl From<Element> for RawElement {
150 150 fn from(element: Element) -> RawElement {
151 151 RawElement::from(match element {
152 152 Element::None => 0,
153 153 Element::Block(i) => i as i32,
154 154 Element::Rev(rev) => -rev - 2,
155 155 })
156 156 }
157 157 }
158 158
159 159 /// A logical block of the `NodeTree`, packed with a fixed size.
160 160 ///
161 161 /// These are always used in container types implementing `Index<Block>`,
162 162 /// such as `&Block`
163 163 ///
164 164 /// As an array of integers, its ith element encodes that the
165 165 /// ith potential edge from the block, representing the ith hexadecimal digit
166 166 /// (nybble) `i` is either:
167 167 ///
168 168 /// - absent (value -1)
169 169 /// - another `Block` in the same indexable container (value β‰₯ 0)
170 170 /// - a `Revision` leaf (value ≀ -2)
171 171 ///
172 172 /// Endianness has to be fixed for consistency on shared storage across
173 173 /// different architectures.
174 174 ///
175 175 /// A key difference with the C `nodetree` is that we need to be
176 176 /// able to represent the [`Block`] at index 0, hence -1 is the empty marker
177 177 /// rather than 0 and the `Revision` range upper limit of -2 instead of -1.
178 178 ///
179 179 /// Another related difference is that `NULL_REVISION` (-1) is not
180 180 /// represented at all, because we want an immutable empty nodetree
181 181 /// to be valid.
182 182
183 183 const ELEMENTS_PER_BLOCK: usize = 16; // number of different values in a nybble
184 184
185 185 #[derive(Copy, Clone, BytesCast, PartialEq)]
186 186 #[repr(transparent)]
187 187 pub struct Block([RawElement; ELEMENTS_PER_BLOCK]);
188 188
189 189 impl Block {
190 190 fn new() -> Self {
191 191 let absent_node = RawElement::from(-1);
192 192 Block([absent_node; ELEMENTS_PER_BLOCK])
193 193 }
194 194
195 195 fn get(&self, nybble: u8) -> Element {
196 196 self.0[nybble as usize].into()
197 197 }
198 198
199 199 fn set(&mut self, nybble: u8, element: Element) {
200 200 self.0[nybble as usize] = element.into()
201 201 }
202 202 }
203 203
204 204 impl fmt::Debug for Block {
205 205 /// sparse representation for testing and debugging purposes
206 206 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
207 207 f.debug_map()
208 208 .entries((0..16).filter_map(|i| match self.get(i) {
209 209 Element::None => None,
210 210 element => Some((i, element)),
211 211 }))
212 212 .finish()
213 213 }
214 214 }
215 215
216 216 /// A mutable 16-radix tree with the root block logically at the end
217 217 ///
218 218 /// Because of the append only nature of our node trees, we need to
219 219 /// keep the original untouched and store new blocks separately.
220 220 ///
221 221 /// The mutable root `Block` is kept apart so that we don't have to rebump
222 222 /// it on each insertion.
223 223 pub struct NodeTree {
224 224 readonly: Box<dyn Deref<Target = [Block]> + Send>,
225 225 growable: Vec<Block>,
226 226 root: Block,
227 227 masked_inner_blocks: usize,
228 228 }
229 229
230 230 impl Index<usize> for NodeTree {
231 231 type Output = Block;
232 232
233 233 fn index(&self, i: usize) -> &Block {
234 234 let ro_len = self.readonly.len();
235 235 if i < ro_len {
236 236 &self.readonly[i]
237 237 } else if i == ro_len + self.growable.len() {
238 238 &self.root
239 239 } else {
240 240 &self.growable[i - ro_len]
241 241 }
242 242 }
243 243 }
244 244
245 245 /// Return `None` unless the `Node` for `rev` has given prefix in `index`.
246 246 fn has_prefix_or_none(
247 247 idx: &impl RevlogIndex,
248 248 prefix: NodePrefix,
249 249 rev: Revision,
250 250 ) -> Result<Option<Revision>, NodeMapError> {
251 251 idx.node(rev)
252 252 .ok_or(NodeMapError::RevisionNotInIndex(rev))
253 253 .map(|node| {
254 254 if prefix.is_prefix_of(node) {
255 255 Some(rev)
256 256 } else {
257 257 None
258 258 }
259 259 })
260 260 }
261 261
262 262 /// validate that the candidate's node starts indeed with given prefix,
263 263 /// and treat ambiguities related to `NULL_REVISION`.
264 264 ///
265 265 /// From the data in the NodeTree, one can only conclude that some
266 266 /// revision is the only one for a *subprefix* of the one being looked up.
267 267 fn validate_candidate(
268 268 idx: &impl RevlogIndex,
269 269 prefix: NodePrefix,
270 270 candidate: (Option<Revision>, usize),
271 271 ) -> Result<(Option<Revision>, usize), NodeMapError> {
272 272 let (rev, steps) = candidate;
273 273 if let Some(nz_nybble) = prefix.first_different_nybble(&NULL_NODE) {
274 274 rev.map_or(Ok((None, steps)), |r| {
275 275 has_prefix_or_none(idx, prefix, r)
276 276 .map(|opt| (opt, max(steps, nz_nybble + 1)))
277 277 })
278 278 } else {
279 279 // the prefix is only made of zeros; NULL_REVISION always matches it
280 280 // and any other *valid* result is an ambiguity
281 281 match rev {
282 282 None => Ok((Some(NULL_REVISION), steps + 1)),
283 283 Some(r) => match has_prefix_or_none(idx, prefix, r)? {
284 284 None => Ok((Some(NULL_REVISION), steps + 1)),
285 285 _ => Err(NodeMapError::MultipleResults),
286 286 },
287 287 }
288 288 }
289 289 }
290 290
291 291 impl NodeTree {
292 292 /// Initiate a NodeTree from an immutable slice-like of `Block`
293 293 ///
294 294 /// We keep `readonly` and clone its root block if it isn't empty.
295 295 fn new(readonly: Box<dyn Deref<Target = [Block]> + Send>) -> Self {
296 296 let root = readonly.last().cloned().unwrap_or_else(Block::new);
297 297 NodeTree {
298 298 readonly,
299 299 growable: Vec::new(),
300 300 root,
301 301 masked_inner_blocks: 0,
302 302 }
303 303 }
304 304
305 305 /// Create from an opaque bunch of bytes
306 306 ///
307 307 /// The created `NodeTreeBytes` from `buffer`,
308 308 /// of which exactly `amount` bytes are used.
309 309 ///
310 310 /// - `buffer` could be derived from `PyBuffer` and `Mmap` objects.
311 311 /// - `offset` allows for the final file format to include fixed data
312 312 /// (generation number, behavioural flags)
313 313 /// - `amount` is expressed in bytes, and is not automatically derived from
314 314 /// `bytes`, so that a caller that manages them atomically can perform
315 315 /// temporary disk serializations and still rollback easily if needed.
316 316 /// First use-case for this would be to support Mercurial shell hooks.
317 317 ///
318 318 /// panics if `buffer` is smaller than `amount`
319 319 pub fn load_bytes(
320 320 bytes: Box<dyn Deref<Target = [u8]> + Send>,
321 321 amount: usize,
322 322 ) -> Self {
323 323 NodeTree::new(Box::new(NodeTreeBytes::new(bytes, amount)))
324 324 }
325 325
326 326 /// Retrieve added `Block` and the original immutable data
327 327 pub fn into_readonly_and_added(
328 328 self,
329 329 ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<Block>) {
330 330 let mut vec = self.growable;
331 331 let readonly = self.readonly;
332 332 if readonly.last() != Some(&self.root) {
333 333 vec.push(self.root);
334 334 }
335 335 (readonly, vec)
336 336 }
337 337
338 338 /// Retrieve added `Blocks` as bytes, ready to be written to persistent
339 339 /// storage
340 340 pub fn into_readonly_and_added_bytes(
341 341 self,
342 342 ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<u8>) {
343 343 let (readonly, vec) = self.into_readonly_and_added();
344 344 // Prevent running `v`'s destructor so we are in complete control
345 345 // of the allocation.
346 346 let vec = mem::ManuallyDrop::new(vec);
347 347
348 348 // Transmute the `Vec<Block>` to a `Vec<u8>`. Blocks are contiguous
349 349 // bytes, so this is perfectly safe.
350 350 let bytes = unsafe {
351 351 // Check for compatible allocation layout.
352 352 // (Optimized away by constant-folding + dead code elimination.)
353 353 assert_eq!(size_of::<Block>(), 64);
354 354 assert_eq!(align_of::<Block>(), 1);
355 355
356 356 // /!\ Any use of `vec` after this is use-after-free.
357 357 // TODO: use `into_raw_parts` once stabilized
358 358 Vec::from_raw_parts(
359 359 vec.as_ptr() as *mut u8,
360 360 vec.len() * size_of::<Block>(),
361 361 vec.capacity() * size_of::<Block>(),
362 362 )
363 363 };
364 364 (readonly, bytes)
365 365 }
366 366
367 367 /// Total number of blocks
368 368 fn len(&self) -> usize {
369 369 self.readonly.len() + self.growable.len() + 1
370 370 }
371 371
372 372 /// Implemented for completeness
373 373 ///
374 374 /// A `NodeTree` always has at least the mutable root block.
375 375 #[allow(dead_code)]
376 376 fn is_empty(&self) -> bool {
377 377 false
378 378 }
379 379
380 380 /// Main working method for `NodeTree` searches
381 381 ///
382 382 /// The first returned value is the result of analysing `NodeTree` data
383 383 /// *alone*: whereas `None` guarantees that the given prefix is absent
384 384 /// from the `NodeTree` data (but still could match `NULL_NODE`), with
385 385 /// `Some(rev)`, it is to be understood that `rev` is the unique `Revision`
386 386 /// that could match the prefix. Actually, all that can be inferred from
387 387 /// the `NodeTree` data is that `rev` is the revision with the longest
388 388 /// common node prefix with the given prefix.
389 389 ///
390 390 /// The second returned value is the size of the smallest subprefix
391 391 /// of `prefix` that would give the same result, i.e. not the
392 392 /// `MultipleResults` error variant (again, using only the data of the
393 393 /// `NodeTree`).
394 394 fn lookup(
395 395 &self,
396 396 prefix: NodePrefix,
397 397 ) -> Result<(Option<Revision>, usize), NodeMapError> {
398 398 for (i, visit_item) in self.visit(prefix).enumerate() {
399 399 if let Some(opt) = visit_item.final_revision() {
400 400 return Ok((opt, i + 1));
401 401 }
402 402 }
403 403 Err(NodeMapError::MultipleResults)
404 404 }
405 405
406 406 fn visit(&self, prefix: NodePrefix) -> NodeTreeVisitor {
407 407 NodeTreeVisitor {
408 408 nt: self,
409 409 prefix,
410 410 visit: self.len() - 1,
411 411 nybble_idx: 0,
412 412 done: false,
413 413 }
414 414 }
415 415 /// Return a mutable reference for `Block` at index `idx`.
416 416 ///
417 417 /// If `idx` lies in the immutable area, then the reference is to
418 418 /// a newly appended copy.
419 419 ///
420 420 /// Returns (new_idx, glen, mut_ref) where
421 421 ///
422 422 /// - `new_idx` is the index of the mutable `Block`
423 423 /// - `mut_ref` is a mutable reference to the mutable Block.
424 424 /// - `glen` is the new length of `self.growable`
425 425 ///
426 426 /// Note: the caller wouldn't be allowed to query `self.growable.len()`
427 427 /// itself because of the mutable borrow taken with the returned `Block`
428 428 fn mutable_block(&mut self, idx: usize) -> (usize, &mut Block, usize) {
429 429 let ro_blocks = &self.readonly;
430 430 let ro_len = ro_blocks.len();
431 431 let glen = self.growable.len();
432 432 if idx < ro_len {
433 433 self.masked_inner_blocks += 1;
434 434 self.growable.push(ro_blocks[idx]);
435 435 (glen + ro_len, &mut self.growable[glen], glen + 1)
436 436 } else if glen + ro_len == idx {
437 437 (idx, &mut self.root, glen)
438 438 } else {
439 439 (idx, &mut self.growable[idx - ro_len], glen)
440 440 }
441 441 }
442 442
443 443 /// Main insertion method
444 444 ///
445 445 /// This will dive in the node tree to find the deepest `Block` for
446 446 /// `node`, split it as much as needed and record `node` in there.
447 447 /// The method then backtracks, updating references in all the visited
448 448 /// blocks from the root.
449 449 ///
450 450 /// All the mutated `Block` are copied first to the growable part if
451 451 /// needed. That happens for those in the immutable part except the root.
452 452 pub fn insert<I: RevlogIndex>(
453 453 &mut self,
454 454 index: &I,
455 455 node: &Node,
456 456 rev: Revision,
457 457 ) -> Result<(), NodeMapError> {
458 458 let ro_len = &self.readonly.len();
459 459
460 460 let mut visit_steps: Vec<_> = self.visit(node.into()).collect();
461 461 let read_nybbles = visit_steps.len();
462 462 // visit_steps cannot be empty, since we always visit the root block
463 463 let deepest = visit_steps.pop().unwrap();
464 464
465 465 let (mut block_idx, mut block, mut glen) =
466 466 self.mutable_block(deepest.block_idx);
467 467
468 468 if let Element::Rev(old_rev) = deepest.element {
469 469 let old_node = index
470 470 .node(old_rev)
471 471 .ok_or(NodeMapError::RevisionNotInIndex(old_rev))?;
472 472 if old_node == node {
473 473 return Ok(()); // avoid creating lots of useless blocks
474 474 }
475 475
476 476 // Looping over the tail of nybbles in both nodes, creating
477 477 // new blocks until we find the difference
478 478 let mut new_block_idx = ro_len + glen;
479 479 let mut nybble = deepest.nybble;
480 480 for nybble_pos in read_nybbles..node.nybbles_len() {
481 481 block.set(nybble, Element::Block(new_block_idx));
482 482
483 483 let new_nybble = node.get_nybble(nybble_pos);
484 484 let old_nybble = old_node.get_nybble(nybble_pos);
485 485
486 486 if old_nybble == new_nybble {
487 487 self.growable.push(Block::new());
488 488 block = &mut self.growable[glen];
489 489 glen += 1;
490 490 new_block_idx += 1;
491 491 nybble = new_nybble;
492 492 } else {
493 493 let mut new_block = Block::new();
494 494 new_block.set(old_nybble, Element::Rev(old_rev));
495 495 new_block.set(new_nybble, Element::Rev(rev));
496 496 self.growable.push(new_block);
497 497 break;
498 498 }
499 499 }
500 500 } else {
501 501 // Free slot in the deepest block: no splitting has to be done
502 502 block.set(deepest.nybble, Element::Rev(rev));
503 503 }
504 504
505 505 // Backtrack over visit steps to update references
506 506 while let Some(visited) = visit_steps.pop() {
507 507 let to_write = Element::Block(block_idx);
508 508 if visit_steps.is_empty() {
509 509 self.root.set(visited.nybble, to_write);
510 510 break;
511 511 }
512 512 let (new_idx, block, _) = self.mutable_block(visited.block_idx);
513 513 if block.get(visited.nybble) == to_write {
514 514 break;
515 515 }
516 516 block.set(visited.nybble, to_write);
517 517 block_idx = new_idx;
518 518 }
519 519 Ok(())
520 520 }
521 521
522 522 /// Make the whole `NodeTree` logically empty, without touching the
523 523 /// immutable part.
524 524 pub fn invalidate_all(&mut self) {
525 525 self.root = Block::new();
526 526 self.growable = Vec::new();
527 527 self.masked_inner_blocks = self.readonly.len();
528 528 }
529 529
530 530 /// Return the number of blocks in the readonly part that are currently
531 531 /// masked in the mutable part.
532 532 ///
533 533 /// The `NodeTree` structure has no efficient way to know how many blocks
534 534 /// are already unreachable in the readonly part.
535 535 ///
536 536 /// After a call to `invalidate_all()`, the returned number can be actually
537 537 /// bigger than the whole readonly part, a conventional way to mean that
538 538 /// all the readonly blocks have been masked. This is what is really
539 539 /// useful to the caller and does not require to know how many were
540 540 /// actually unreachable to begin with.
541 541 pub fn masked_readonly_blocks(&self) -> usize {
542 542 if let Some(readonly_root) = self.readonly.last() {
543 543 if readonly_root == &self.root {
544 544 return 0;
545 545 }
546 546 } else {
547 547 return 0;
548 548 }
549 549 self.masked_inner_blocks + 1
550 550 }
551 551 }
552 552
553 553 pub struct NodeTreeBytes {
554 554 buffer: Box<dyn Deref<Target = [u8]> + Send>,
555 555 len_in_blocks: usize,
556 556 }
557 557
558 558 impl NodeTreeBytes {
559 559 fn new(
560 560 buffer: Box<dyn Deref<Target = [u8]> + Send>,
561 561 amount: usize,
562 562 ) -> Self {
563 563 assert!(buffer.len() >= amount);
564 564 let len_in_blocks = amount / size_of::<Block>();
565 565 NodeTreeBytes {
566 566 buffer,
567 567 len_in_blocks,
568 568 }
569 569 }
570 570 }
571 571
572 572 impl Deref for NodeTreeBytes {
573 573 type Target = [Block];
574 574
575 575 fn deref(&self) -> &[Block] {
576 576 Block::slice_from_bytes(&self.buffer, self.len_in_blocks)
577 577 // `NodeTreeBytes::new` already asserted that `self.buffer` is
578 578 // large enough.
579 579 .unwrap()
580 580 .0
581 581 }
582 582 }
583 583
584 584 struct NodeTreeVisitor<'n> {
585 585 nt: &'n NodeTree,
586 586 prefix: NodePrefix,
587 587 visit: usize,
588 588 nybble_idx: usize,
589 589 done: bool,
590 590 }
591 591
592 592 #[derive(Debug, PartialEq, Clone)]
593 593 struct NodeTreeVisitItem {
594 594 block_idx: usize,
595 595 nybble: u8,
596 596 element: Element,
597 597 }
598 598
599 599 impl<'n> Iterator for NodeTreeVisitor<'n> {
600 600 type Item = NodeTreeVisitItem;
601 601
602 602 fn next(&mut self) -> Option<Self::Item> {
603 603 if self.done || self.nybble_idx >= self.prefix.nybbles_len() {
604 604 return None;
605 605 }
606 606
607 607 let nybble = self.prefix.get_nybble(self.nybble_idx);
608 608 self.nybble_idx += 1;
609 609
610 610 let visit = self.visit;
611 611 let element = self.nt[visit].get(nybble);
612 612 if let Element::Block(idx) = element {
613 613 self.visit = idx;
614 614 } else {
615 615 self.done = true;
616 616 }
617 617
618 618 Some(NodeTreeVisitItem {
619 619 block_idx: visit,
620 620 nybble,
621 621 element,
622 622 })
623 623 }
624 624 }
625 625
626 626 impl NodeTreeVisitItem {
627 627 // Return `Some(opt)` if this item is final, with `opt` being the
628 628 // `Revision` that it may represent.
629 629 //
630 630 // If the item is not terminal, return `None`
631 631 fn final_revision(&self) -> Option<Option<Revision>> {
632 632 match self.element {
633 633 Element::Block(_) => None,
634 634 Element::Rev(r) => Some(Some(r)),
635 635 Element::None => Some(None),
636 636 }
637 637 }
638 638 }
639 639
640 640 impl From<Vec<Block>> for NodeTree {
641 641 fn from(vec: Vec<Block>) -> Self {
642 642 Self::new(Box::new(vec))
643 643 }
644 644 }
645 645
646 646 impl fmt::Debug for NodeTree {
647 647 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
648 648 let readonly: &[Block] = &*self.readonly;
649 649 write!(
650 650 f,
651 651 "readonly: {:?}, growable: {:?}, root: {:?}",
652 652 readonly, self.growable, self.root
653 653 )
654 654 }
655 655 }
656 656
657 657 impl Default for NodeTree {
658 658 /// Create a fully mutable empty NodeTree
659 659 fn default() -> Self {
660 660 NodeTree::new(Box::new(Vec::new()))
661 661 }
662 662 }
663 663
664 664 impl NodeMap for NodeTree {
665 665 fn find_bin<'a>(
666 666 &self,
667 667 idx: &impl RevlogIndex,
668 668 prefix: NodePrefix,
669 669 ) -> Result<Option<Revision>, NodeMapError> {
670 670 validate_candidate(idx, prefix, self.lookup(prefix)?)
671 671 .map(|(opt, _shortest)| opt)
672 672 }
673 673
674 674 fn unique_prefix_len_bin<'a>(
675 675 &self,
676 676 idx: &impl RevlogIndex,
677 677 prefix: NodePrefix,
678 678 ) -> Result<Option<usize>, NodeMapError> {
679 679 validate_candidate(idx, prefix, self.lookup(prefix)?)
680 680 .map(|(opt, shortest)| opt.map(|_rev| shortest))
681 681 }
682 682 }
683 683
684 684 #[cfg(test)]
685 685 mod tests {
686 686 use super::NodeMapError::*;
687 687 use super::*;
688 688 use crate::revlog::node::{hex_pad_right, Node};
689 689 use std::collections::HashMap;
690 690
691 691 /// Creates a `Block` using a syntax close to the `Debug` output
692 692 macro_rules! block {
693 693 {$($nybble:tt : $variant:ident($val:tt)),*} => (
694 694 {
695 695 let mut block = Block::new();
696 696 $(block.set($nybble, Element::$variant($val)));*;
697 697 block
698 698 }
699 699 )
700 700 }
701 701
702 702 #[test]
703 703 fn test_block_debug() {
704 704 let mut block = Block::new();
705 705 block.set(1, Element::Rev(3));
706 706 block.set(10, Element::Block(0));
707 707 assert_eq!(format!("{:?}", block), "{1: Rev(3), 10: Block(0)}");
708 708 }
709 709
710 710 #[test]
711 711 fn test_block_macro() {
712 712 let block = block! {5: Block(2)};
713 713 assert_eq!(format!("{:?}", block), "{5: Block(2)}");
714 714
715 715 let block = block! {13: Rev(15), 5: Block(2)};
716 716 assert_eq!(format!("{:?}", block), "{5: Block(2), 13: Rev(15)}");
717 717 }
718 718
719 719 #[test]
720 720 fn test_raw_block() {
721 721 let mut raw = [255u8; 64];
722 722
723 723 let mut counter = 0;
724 724 for val in [0_i32, 15, -2, -1, -3].iter() {
725 725 for byte in val.to_be_bytes().iter() {
726 726 raw[counter] = *byte;
727 727 counter += 1;
728 728 }
729 729 }
730 730 let (block, _) = Block::from_bytes(&raw).unwrap();
731 731 assert_eq!(block.get(0), Element::Block(0));
732 732 assert_eq!(block.get(1), Element::Block(15));
733 733 assert_eq!(block.get(3), Element::None);
734 734 assert_eq!(block.get(2), Element::Rev(0));
735 735 assert_eq!(block.get(4), Element::Rev(1));
736 736 }
737 737
738 738 type TestIndex = HashMap<Revision, Node>;
739 739
740 740 impl RevlogIndex for TestIndex {
741 741 fn node(&self, rev: Revision) -> Option<&Node> {
742 742 self.get(&rev)
743 743 }
744 744
745 745 fn len(&self) -> usize {
746 746 self.len()
747 747 }
748 748 }
749 749
750 750 /// Pad hexadecimal Node prefix with zeros on the right
751 751 ///
752 752 /// This avoids having to repeatedly write very long hexadecimal
753 753 /// strings for test data, and brings actual hash size independency.
754 754 #[cfg(test)]
755 755 fn pad_node(hex: &str) -> Node {
756 756 Node::from_hex(&hex_pad_right(hex)).unwrap()
757 757 }
758 758
759 759 /// Pad hexadecimal Node prefix with zeros on the right, then insert
760 760 fn pad_insert(idx: &mut TestIndex, rev: Revision, hex: &str) {
761 761 idx.insert(rev, pad_node(hex));
762 762 }
763 763
764 764 fn sample_nodetree() -> NodeTree {
765 765 NodeTree::from(vec![
766 766 block![0: Rev(9)],
767 767 block![0: Rev(0), 1: Rev(9)],
768 768 block![0: Block(1), 1:Rev(1)],
769 769 ])
770 770 }
771 771
772 772 fn hex(s: &str) -> NodePrefix {
773 773 NodePrefix::from_hex(s).unwrap()
774 774 }
775 775
776 776 #[test]
777 777 fn test_nt_debug() {
778 778 let nt = sample_nodetree();
779 779 assert_eq!(
780 780 format!("{:?}", nt),
781 781 "readonly: \
782 782 [{0: Rev(9)}, {0: Rev(0), 1: Rev(9)}, {0: Block(1), 1: Rev(1)}], \
783 783 growable: [], \
784 784 root: {0: Block(1), 1: Rev(1)}",
785 785 );
786 786 }
787 787
788 788 #[test]
789 789 fn test_immutable_find_simplest() -> Result<(), NodeMapError> {
790 790 let mut idx: TestIndex = HashMap::new();
791 791 pad_insert(&mut idx, 1, "1234deadcafe");
792 792
793 793 let nt = NodeTree::from(vec![block! {1: Rev(1)}]);
794 794 assert_eq!(nt.find_bin(&idx, hex("1"))?, Some(1));
795 795 assert_eq!(nt.find_bin(&idx, hex("12"))?, Some(1));
796 796 assert_eq!(nt.find_bin(&idx, hex("1234de"))?, Some(1));
797 797 assert_eq!(nt.find_bin(&idx, hex("1a"))?, None);
798 798 assert_eq!(nt.find_bin(&idx, hex("ab"))?, None);
799 799
800 800 // and with full binary Nodes
801 801 assert_eq!(nt.find_node(&idx, idx.get(&1).unwrap())?, Some(1));
802 802 let unknown = Node::from_hex(&hex_pad_right("3d")).unwrap();
803 803 assert_eq!(nt.find_node(&idx, &unknown)?, None);
804 804 Ok(())
805 805 }
806 806
807 807 #[test]
808 808 fn test_immutable_find_one_jump() {
809 809 let mut idx = TestIndex::new();
810 810 pad_insert(&mut idx, 9, "012");
811 811 pad_insert(&mut idx, 0, "00a");
812 812
813 813 let nt = sample_nodetree();
814 814
815 815 assert_eq!(nt.find_bin(&idx, hex("0")), Err(MultipleResults));
816 816 assert_eq!(nt.find_bin(&idx, hex("01")), Ok(Some(9)));
817 817 assert_eq!(nt.find_bin(&idx, hex("00")), Err(MultipleResults));
818 818 assert_eq!(nt.find_bin(&idx, hex("00a")), Ok(Some(0)));
819 819 assert_eq!(nt.unique_prefix_len_bin(&idx, hex("00a")), Ok(Some(3)));
820 820 assert_eq!(nt.find_bin(&idx, hex("000")), Ok(Some(NULL_REVISION)));
821 821 }
822 822
823 823 #[test]
824 824 fn test_mutated_find() -> Result<(), NodeMapError> {
825 825 let mut idx = TestIndex::new();
826 826 pad_insert(&mut idx, 9, "012");
827 827 pad_insert(&mut idx, 0, "00a");
828 828 pad_insert(&mut idx, 2, "cafe");
829 829 pad_insert(&mut idx, 3, "15");
830 830 pad_insert(&mut idx, 1, "10");
831 831
832 832 let nt = NodeTree {
833 833 readonly: sample_nodetree().readonly,
834 834 growable: vec![block![0: Rev(1), 5: Rev(3)]],
835 835 root: block![0: Block(1), 1:Block(3), 12: Rev(2)],
836 836 masked_inner_blocks: 1,
837 837 };
838 838 assert_eq!(nt.find_bin(&idx, hex("10"))?, Some(1));
839 839 assert_eq!(nt.find_bin(&idx, hex("c"))?, Some(2));
840 840 assert_eq!(nt.unique_prefix_len_bin(&idx, hex("c"))?, Some(1));
841 841 assert_eq!(nt.find_bin(&idx, hex("00")), Err(MultipleResults));
842 842 assert_eq!(nt.find_bin(&idx, hex("000"))?, Some(NULL_REVISION));
843 843 assert_eq!(nt.unique_prefix_len_bin(&idx, hex("000"))?, Some(3));
844 844 assert_eq!(nt.find_bin(&idx, hex("01"))?, Some(9));
845 845 assert_eq!(nt.masked_readonly_blocks(), 2);
846 846 Ok(())
847 847 }
848 848
849 849 struct TestNtIndex {
850 850 index: TestIndex,
851 851 nt: NodeTree,
852 852 }
853 853
854 854 impl TestNtIndex {
855 855 fn new() -> Self {
856 856 TestNtIndex {
857 857 index: HashMap::new(),
858 858 nt: NodeTree::default(),
859 859 }
860 860 }
861 861
862 862 fn insert(
863 863 &mut self,
864 864 rev: Revision,
865 865 hex: &str,
866 866 ) -> Result<(), NodeMapError> {
867 867 let node = pad_node(hex);
868 868 self.index.insert(rev, node);
869 869 self.nt.insert(&self.index, &node, rev)?;
870 870 Ok(())
871 871 }
872 872
873 873 fn find_hex(
874 874 &self,
875 875 prefix: &str,
876 876 ) -> Result<Option<Revision>, NodeMapError> {
877 877 self.nt.find_bin(&self.index, hex(prefix))
878 878 }
879 879
880 880 fn unique_prefix_len_hex(
881 881 &self,
882 882 prefix: &str,
883 883 ) -> Result<Option<usize>, NodeMapError> {
884 884 self.nt.unique_prefix_len_bin(&self.index, hex(prefix))
885 885 }
886 886
887 887 /// Drain `added` and restart a new one
888 888 fn commit(self) -> Self {
889 889 let mut as_vec: Vec<Block> =
890 890 self.nt.readonly.iter().copied().collect();
891 891 as_vec.extend(self.nt.growable);
892 892 as_vec.push(self.nt.root);
893 893
894 894 Self {
895 895 index: self.index,
896 896 nt: NodeTree::from(as_vec),
897 897 }
898 898 }
899 899 }
900 900
901 901 #[test]
902 902 fn test_insert_full_mutable() -> Result<(), NodeMapError> {
903 903 let mut idx = TestNtIndex::new();
904 904 idx.insert(0, "1234")?;
905 905 assert_eq!(idx.find_hex("1")?, Some(0));
906 906 assert_eq!(idx.find_hex("12")?, Some(0));
907 907
908 908 // let's trigger a simple split
909 909 idx.insert(1, "1a34")?;
910 910 assert_eq!(idx.nt.growable.len(), 1);
911 911 assert_eq!(idx.find_hex("12")?, Some(0));
912 912 assert_eq!(idx.find_hex("1a")?, Some(1));
913 913
914 914 // reinserting is a no_op
915 915 idx.insert(1, "1a34")?;
916 916 assert_eq!(idx.nt.growable.len(), 1);
917 917 assert_eq!(idx.find_hex("12")?, Some(0));
918 918 assert_eq!(idx.find_hex("1a")?, Some(1));
919 919
920 920 idx.insert(2, "1a01")?;
921 921 assert_eq!(idx.nt.growable.len(), 2);
922 922 assert_eq!(idx.find_hex("1a"), Err(NodeMapError::MultipleResults));
923 923 assert_eq!(idx.find_hex("12")?, Some(0));
924 924 assert_eq!(idx.find_hex("1a3")?, Some(1));
925 925 assert_eq!(idx.find_hex("1a0")?, Some(2));
926 926 assert_eq!(idx.find_hex("1a12")?, None);
927 927
928 928 // now let's make it split and create more than one additional block
929 929 idx.insert(3, "1a345")?;
930 930 assert_eq!(idx.nt.growable.len(), 4);
931 931 assert_eq!(idx.find_hex("1a340")?, Some(1));
932 932 assert_eq!(idx.find_hex("1a345")?, Some(3));
933 933 assert_eq!(idx.find_hex("1a341")?, None);
934 934
935 935 // there's no readonly block to mask
936 936 assert_eq!(idx.nt.masked_readonly_blocks(), 0);
937 937 Ok(())
938 938 }
939 939
940 940 #[test]
941 941 fn test_unique_prefix_len_zero_prefix() {
942 942 let mut idx = TestNtIndex::new();
943 943 idx.insert(0, "00000abcd").unwrap();
944 944
945 945 assert_eq!(idx.find_hex("000"), Err(NodeMapError::MultipleResults));
946 946 // in the nodetree proper, this will be found at the first nybble
947 947 // yet the correct answer for unique_prefix_len is not 1, nor 1+1,
948 948 // but the first difference with `NULL_NODE`
949 949 assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6)));
950 950 assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6)));
951 951
952 952 // same with odd result
953 953 idx.insert(1, "00123").unwrap();
954 954 assert_eq!(idx.unique_prefix_len_hex("001"), Ok(Some(3)));
955 955 assert_eq!(idx.unique_prefix_len_hex("0012"), Ok(Some(3)));
956 956
957 957 // these are unchanged of course
958 958 assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6)));
959 959 assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6)));
960 960 }
961 961
962 962 #[test]
963 963 fn test_insert_extreme_splitting() -> Result<(), NodeMapError> {
964 964 // check that the splitting loop is long enough
965 965 let mut nt_idx = TestNtIndex::new();
966 966 let nt = &mut nt_idx.nt;
967 967 let idx = &mut nt_idx.index;
968 968
969 969 let node0_hex = hex_pad_right("444444");
970 970 let mut node1_hex = hex_pad_right("444444");
971 971 node1_hex.pop();
972 972 node1_hex.push('5');
973 973 let node0 = Node::from_hex(&node0_hex).unwrap();
974 974 let node1 = Node::from_hex(&node1_hex).unwrap();
975 975
976 976 idx.insert(0, node0);
977 977 nt.insert(idx, &node0, 0)?;
978 978 idx.insert(1, node1);
979 979 nt.insert(idx, &node1, 1)?;
980 980
981 981 assert_eq!(nt.find_bin(idx, (&node0).into())?, Some(0));
982 982 assert_eq!(nt.find_bin(idx, (&node1).into())?, Some(1));
983 983 Ok(())
984 984 }
985 985
986 986 #[test]
987 987 fn test_insert_partly_immutable() -> Result<(), NodeMapError> {
988 988 let mut idx = TestNtIndex::new();
989 989 idx.insert(0, "1234")?;
990 990 idx.insert(1, "1235")?;
991 991 idx.insert(2, "131")?;
992 992 idx.insert(3, "cafe")?;
993 993 let mut idx = idx.commit();
994 994 assert_eq!(idx.find_hex("1234")?, Some(0));
995 995 assert_eq!(idx.find_hex("1235")?, Some(1));
996 996 assert_eq!(idx.find_hex("131")?, Some(2));
997 997 assert_eq!(idx.find_hex("cafe")?, Some(3));
998 998 // we did not add anything since init from readonly
999 999 assert_eq!(idx.nt.masked_readonly_blocks(), 0);
1000 1000
1001 1001 idx.insert(4, "123A")?;
1002 1002 assert_eq!(idx.find_hex("1234")?, Some(0));
1003 1003 assert_eq!(idx.find_hex("1235")?, Some(1));
1004 1004 assert_eq!(idx.find_hex("131")?, Some(2));
1005 1005 assert_eq!(idx.find_hex("cafe")?, Some(3));
1006 1006 assert_eq!(idx.find_hex("123A")?, Some(4));
1007 1007 // we masked blocks for all prefixes of "123", including the root
1008 1008 assert_eq!(idx.nt.masked_readonly_blocks(), 4);
1009 1009
1010 1010 eprintln!("{:?}", idx.nt);
1011 1011 idx.insert(5, "c0")?;
1012 1012 assert_eq!(idx.find_hex("cafe")?, Some(3));
1013 1013 assert_eq!(idx.find_hex("c0")?, Some(5));
1014 1014 assert_eq!(idx.find_hex("c1")?, None);
1015 1015 assert_eq!(idx.find_hex("1234")?, Some(0));
1016 1016 // inserting "c0" is just splitting the 'c' slot of the mutable root,
1017 1017 // it doesn't mask anything
1018 1018 assert_eq!(idx.nt.masked_readonly_blocks(), 4);
1019 1019
1020 1020 Ok(())
1021 1021 }
1022 1022
1023 1023 #[test]
1024 1024 fn test_invalidate_all() -> Result<(), NodeMapError> {
1025 1025 let mut idx = TestNtIndex::new();
1026 1026 idx.insert(0, "1234")?;
1027 1027 idx.insert(1, "1235")?;
1028 1028 idx.insert(2, "131")?;
1029 1029 idx.insert(3, "cafe")?;
1030 1030 let mut idx = idx.commit();
1031 1031
1032 1032 idx.nt.invalidate_all();
1033 1033
1034 1034 assert_eq!(idx.find_hex("1234")?, None);
1035 1035 assert_eq!(idx.find_hex("1235")?, None);
1036 1036 assert_eq!(idx.find_hex("131")?, None);
1037 1037 assert_eq!(idx.find_hex("cafe")?, None);
1038 1038 // all the readonly blocks have been masked, this is the
1039 1039 // conventional expected response
1040 1040 assert_eq!(idx.nt.masked_readonly_blocks(), idx.nt.readonly.len() + 1);
1041 1041 Ok(())
1042 1042 }
1043 1043
1044 1044 #[test]
1045 1045 fn test_into_added_empty() {
1046 1046 assert!(sample_nodetree().into_readonly_and_added().1.is_empty());
1047 1047 assert!(sample_nodetree()
1048 1048 .into_readonly_and_added_bytes()
1049 1049 .1
1050 1050 .is_empty());
1051 1051 }
1052 1052
1053 1053 #[test]
1054 1054 fn test_into_added_bytes() -> Result<(), NodeMapError> {
1055 1055 let mut idx = TestNtIndex::new();
1056 1056 idx.insert(0, "1234")?;
1057 1057 let mut idx = idx.commit();
1058 1058 idx.insert(4, "cafe")?;
1059 1059 let (_, bytes) = idx.nt.into_readonly_and_added_bytes();
1060 1060
1061 1061 // only the root block has been changed
1062 1062 assert_eq!(bytes.len(), size_of::<Block>());
1063 1063 // big endian for -2
1064 1064 assert_eq!(&bytes[4..2 * 4], [255, 255, 255, 254]);
1065 1065 // big endian for -6
1066 1066 assert_eq!(&bytes[12 * 4..13 * 4], [255, 255, 255, 250]);
1067 1067 Ok(())
1068 1068 }
1069 1069 }
@@ -1,532 +1,532 b''
1 1 // utils module
2 2 //
3 3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Contains useful functions, traits, structs, etc. for use in core.
9 9
10 10 use crate::errors::{HgError, IoErrorContext};
11 11 use crate::utils::hg_path::HgPath;
12 12 use im_rc::ordmap::DiffItem;
13 13 use im_rc::ordmap::OrdMap;
14 14 use std::cell::Cell;
15 15 use std::fmt;
16 16 use std::{io::Write, ops::Deref};
17 17
18 18 pub mod debug;
19 19 pub mod files;
20 20 pub mod hg_path;
21 21 pub mod path_auditor;
22 22
23 23 /// Useful until rust/issues/56345 is stable
24 24 ///
25 25 /// # Examples
26 26 ///
27 27 /// ```
28 28 /// use crate::hg::utils::find_slice_in_slice;
29 29 ///
30 30 /// let haystack = b"This is the haystack".to_vec();
31 31 /// assert_eq!(find_slice_in_slice(&haystack, b"the"), Some(8));
32 32 /// assert_eq!(find_slice_in_slice(&haystack, b"not here"), None);
33 33 /// ```
34 34 pub fn find_slice_in_slice<T>(slice: &[T], needle: &[T]) -> Option<usize>
35 35 where
36 36 for<'a> &'a [T]: PartialEq,
37 37 {
38 38 slice
39 39 .windows(needle.len())
40 40 .position(|window| window == needle)
41 41 }
42 42
43 43 /// Replaces the `from` slice with the `to` slice inside the `buf` slice.
44 44 ///
45 45 /// # Examples
46 46 ///
47 47 /// ```
48 48 /// use crate::hg::utils::replace_slice;
49 49 /// let mut line = b"I hate writing tests!".to_vec();
50 50 /// replace_slice(&mut line, b"hate", b"love");
51 51 /// assert_eq!(
52 52 /// line,
53 53 /// b"I love writing tests!".to_vec()
54 54 /// );
55 55 /// ```
56 56 pub fn replace_slice<T>(buf: &mut [T], from: &[T], to: &[T])
57 57 where
58 58 T: Clone + PartialEq,
59 59 {
60 60 if buf.len() < from.len() || from.len() != to.len() {
61 61 return;
62 62 }
63 63 for i in 0..=buf.len() - from.len() {
64 64 if buf[i..].starts_with(from) {
65 65 buf[i..(i + from.len())].clone_from_slice(to);
66 66 }
67 67 }
68 68 }
69 69
70 70 pub trait SliceExt {
71 71 fn trim_end(&self) -> &Self;
72 72 fn trim_start(&self) -> &Self;
73 73 fn trim_end_matches(&self, f: impl FnMut(u8) -> bool) -> &Self;
74 74 fn trim_start_matches(&self, f: impl FnMut(u8) -> bool) -> &Self;
75 75 fn trim(&self) -> &Self;
76 76 fn drop_prefix(&self, needle: &Self) -> Option<&Self>;
77 77 fn split_2(&self, separator: u8) -> Option<(&[u8], &[u8])>;
78 78 fn split_2_by_slice(&self, separator: &[u8]) -> Option<(&[u8], &[u8])>;
79 79 }
80 80
81 81 impl SliceExt for [u8] {
82 82 fn trim_end(&self) -> &[u8] {
83 83 self.trim_end_matches(|byte| byte.is_ascii_whitespace())
84 84 }
85 85
86 86 fn trim_start(&self) -> &[u8] {
87 87 self.trim_start_matches(|byte| byte.is_ascii_whitespace())
88 88 }
89 89
90 90 fn trim_end_matches(&self, mut f: impl FnMut(u8) -> bool) -> &Self {
91 91 if let Some(last) = self.iter().rposition(|&byte| !f(byte)) {
92 92 &self[..=last]
93 93 } else {
94 94 &[]
95 95 }
96 96 }
97 97
98 98 fn trim_start_matches(&self, mut f: impl FnMut(u8) -> bool) -> &Self {
99 99 if let Some(first) = self.iter().position(|&byte| !f(byte)) {
100 100 &self[first..]
101 101 } else {
102 102 &[]
103 103 }
104 104 }
105 105
106 106 /// ```
107 107 /// use hg::utils::SliceExt;
108 108 /// assert_eq!(
109 109 /// b" to trim ".trim(),
110 110 /// b"to trim"
111 111 /// );
112 112 /// assert_eq!(
113 113 /// b"to trim ".trim(),
114 114 /// b"to trim"
115 115 /// );
116 116 /// assert_eq!(
117 117 /// b" to trim".trim(),
118 118 /// b"to trim"
119 119 /// );
120 120 /// ```
121 121 fn trim(&self) -> &[u8] {
122 122 self.trim_start().trim_end()
123 123 }
124 124
125 125 fn drop_prefix(&self, needle: &Self) -> Option<&Self> {
126 126 if self.starts_with(needle) {
127 127 Some(&self[needle.len()..])
128 128 } else {
129 129 None
130 130 }
131 131 }
132 132
133 133 fn split_2(&self, separator: u8) -> Option<(&[u8], &[u8])> {
134 134 let mut iter = self.splitn(2, |&byte| byte == separator);
135 135 let a = iter.next()?;
136 136 let b = iter.next()?;
137 137 Some((a, b))
138 138 }
139 139
140 140 fn split_2_by_slice(&self, separator: &[u8]) -> Option<(&[u8], &[u8])> {
141 141 find_slice_in_slice(self, separator)
142 142 .map(|pos| (&self[..pos], &self[pos + separator.len()..]))
143 143 }
144 144 }
145 145
146 146 pub trait Escaped {
147 147 /// Return bytes escaped for display to the user
148 148 fn escaped_bytes(&self) -> Vec<u8>;
149 149 }
150 150
151 151 impl Escaped for u8 {
152 152 fn escaped_bytes(&self) -> Vec<u8> {
153 153 let mut acc = vec![];
154 154 match self {
155 155 c @ b'\'' | c @ b'\\' => {
156 156 acc.push(b'\\');
157 157 acc.push(*c);
158 158 }
159 159 b'\t' => {
160 160 acc.extend(br"\\t");
161 161 }
162 162 b'\n' => {
163 163 acc.extend(br"\\n");
164 164 }
165 165 b'\r' => {
166 166 acc.extend(br"\\r");
167 167 }
168 168 c if (*c < b' ' || *c >= 127) => {
169 169 write!(acc, "\\x{:x}", self).unwrap();
170 170 }
171 171 c => {
172 172 acc.push(*c);
173 173 }
174 174 }
175 175 acc
176 176 }
177 177 }
178 178
179 179 impl<'a, T: Escaped> Escaped for &'a [T] {
180 180 fn escaped_bytes(&self) -> Vec<u8> {
181 181 self.iter().flat_map(Escaped::escaped_bytes).collect()
182 182 }
183 183 }
184 184
185 185 impl<T: Escaped> Escaped for Vec<T> {
186 186 fn escaped_bytes(&self) -> Vec<u8> {
187 187 self.deref().escaped_bytes()
188 188 }
189 189 }
190 190
191 191 impl<'a> Escaped for &'a HgPath {
192 192 fn escaped_bytes(&self) -> Vec<u8> {
193 193 self.as_bytes().escaped_bytes()
194 194 }
195 195 }
196 196
197 197 #[cfg(unix)]
198 198 pub fn shell_quote(value: &[u8]) -> Vec<u8> {
199 199 if value.iter().all(|&byte| {
200 200 matches!(
201 201 byte,
202 202 b'a'..=b'z'
203 203 | b'A'..=b'Z'
204 204 | b'0'..=b'9'
205 205 | b'.'
206 206 | b'_'
207 207 | b'/'
208 208 | b'+'
209 209 | b'-'
210 210 )
211 211 }) {
212 212 value.to_owned()
213 213 } else {
214 214 let mut quoted = Vec::with_capacity(value.len() + 2);
215 215 quoted.push(b'\'');
216 216 for &byte in value {
217 217 if byte == b'\'' {
218 218 quoted.push(b'\\');
219 219 }
220 220 quoted.push(byte);
221 221 }
222 222 quoted.push(b'\'');
223 223 quoted
224 224 }
225 225 }
226 226
227 227 pub fn current_dir() -> Result<std::path::PathBuf, HgError> {
228 228 std::env::current_dir().map_err(|error| HgError::IoError {
229 229 error,
230 230 context: IoErrorContext::CurrentDir,
231 231 })
232 232 }
233 233
234 234 pub fn current_exe() -> Result<std::path::PathBuf, HgError> {
235 235 std::env::current_exe().map_err(|error| HgError::IoError {
236 236 error,
237 237 context: IoErrorContext::CurrentExe,
238 238 })
239 239 }
240 240
241 241 /// Expand `$FOO` and `${FOO}` environment variables in the given byte string
242 242 pub fn expand_vars(s: &[u8]) -> std::borrow::Cow<[u8]> {
243 243 lazy_static::lazy_static! {
244 244 /// https://github.com/python/cpython/blob/3.9/Lib/posixpath.py#L301
245 245 /// The `x` makes whitespace ignored.
246 246 /// `-u` disables the Unicode flag, which makes `\w` like Python with the ASCII flag.
247 247 static ref VAR_RE: regex::bytes::Regex =
248 248 regex::bytes::Regex::new(r"(?x-u)
249 249 \$
250 250 (?:
251 251 (\w+)
252 252 |
253 253 \{
254 254 ([^}]*)
255 255 \}
256 256 )
257 257 ").unwrap();
258 258 }
259 259 VAR_RE.replace_all(s, |captures: &regex::bytes::Captures| {
260 260 let var_name = files::get_os_str_from_bytes(
261 261 captures
262 262 .get(1)
263 263 .or_else(|| captures.get(2))
264 264 .expect("either side of `|` must participate in match")
265 265 .as_bytes(),
266 266 );
267 267 std::env::var_os(var_name)
268 268 .map(files::get_bytes_from_os_str)
269 269 .unwrap_or_else(|| {
270 270 // Referencing an environment variable that does not exist.
271 271 // Leave the $FOO reference as-is.
272 272 captures[0].to_owned()
273 273 })
274 274 })
275 275 }
276 276
277 277 #[test]
278 278 fn test_expand_vars() {
279 279 // Modifying process-global state in a test isn’t great,
280 280 // but hopefully this won’t collide with anything.
281 281 std::env::set_var("TEST_EXPAND_VAR", "1");
282 282 assert_eq!(
283 283 expand_vars(b"before/$TEST_EXPAND_VAR/after"),
284 284 &b"before/1/after"[..]
285 285 );
286 286 assert_eq!(
287 287 expand_vars(b"before${TEST_EXPAND_VAR}${TEST_EXPAND_VAR}${TEST_EXPAND_VAR}after"),
288 288 &b"before111after"[..]
289 289 );
290 290 let s = b"before $SOME_LONG_NAME_THAT_WE_ASSUME_IS_NOT_AN_ACTUAL_ENV_VAR after";
291 291 assert_eq!(expand_vars(s), &s[..]);
292 292 }
293 293
294 294 pub(crate) enum MergeResult<V> {
295 295 Left,
296 296 Right,
297 297 New(V),
298 298 }
299 299
300 300 /// Return the union of the two given maps,
301 301 /// calling `merge(key, left_value, right_value)` to resolve keys that exist in
302 302 /// both.
303 303 ///
304 /// CC https://github.com/bodil/im-rs/issues/166
304 /// CC <https://github.com/bodil/im-rs/issues/166>
305 305 pub(crate) fn ordmap_union_with_merge<K, V>(
306 306 left: OrdMap<K, V>,
307 307 right: OrdMap<K, V>,
308 308 mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>,
309 309 ) -> OrdMap<K, V>
310 310 where
311 311 K: Clone + Ord,
312 312 V: Clone + PartialEq,
313 313 {
314 314 if left.ptr_eq(&right) {
315 315 // One of the two maps is an unmodified clone of the other
316 316 left
317 317 } else if left.len() / 2 > right.len() {
318 318 // When two maps have different sizes,
319 319 // their size difference is a lower bound on
320 320 // how many keys of the larger map are not also in the smaller map.
321 321 // This in turn is a lower bound on the number of differences in
322 322 // `OrdMap::diff` and the "amount of work" that would be done
323 323 // by `ordmap_union_with_merge_by_diff`.
324 324 //
325 325 // Here `left` is more than twice the size of `right`,
326 326 // so the number of differences is more than the total size of
327 327 // `right`. Therefore an algorithm based on iterating `right`
328 328 // is more efficient.
329 329 //
330 330 // This helps a lot when a tiny (or empty) map is merged
331 331 // with a large one.
332 332 ordmap_union_with_merge_by_iter(left, right, merge)
333 333 } else if left.len() < right.len() / 2 {
334 334 // Same as above but with `left` and `right` swapped
335 335 ordmap_union_with_merge_by_iter(right, left, |key, a, b| {
336 336 // Also swapped in `merge` arguments:
337 337 match merge(key, b, a) {
338 338 MergeResult::New(v) => MergeResult::New(v),
339 339 // … and swap back in `merge` result:
340 340 MergeResult::Left => MergeResult::Right,
341 341 MergeResult::Right => MergeResult::Left,
342 342 }
343 343 })
344 344 } else {
345 345 // For maps of similar size, use the algorithm based on `OrdMap::diff`
346 346 ordmap_union_with_merge_by_diff(left, right, merge)
347 347 }
348 348 }
349 349
350 350 /// Efficient if `right` is much smaller than `left`
351 351 fn ordmap_union_with_merge_by_iter<K, V>(
352 352 mut left: OrdMap<K, V>,
353 353 right: OrdMap<K, V>,
354 354 mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>,
355 355 ) -> OrdMap<K, V>
356 356 where
357 357 K: Clone + Ord,
358 358 V: Clone,
359 359 {
360 360 for (key, right_value) in right {
361 361 match left.get(&key) {
362 362 None => {
363 363 left.insert(key, right_value);
364 364 }
365 365 Some(left_value) => match merge(&key, left_value, &right_value) {
366 366 MergeResult::Left => {}
367 367 MergeResult::Right => {
368 368 left.insert(key, right_value);
369 369 }
370 370 MergeResult::New(new_value) => {
371 371 left.insert(key, new_value);
372 372 }
373 373 },
374 374 }
375 375 }
376 376 left
377 377 }
378 378
379 379 /// Fallback when both maps are of similar size
380 380 fn ordmap_union_with_merge_by_diff<K, V>(
381 381 mut left: OrdMap<K, V>,
382 382 mut right: OrdMap<K, V>,
383 383 mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>,
384 384 ) -> OrdMap<K, V>
385 385 where
386 386 K: Clone + Ord,
387 387 V: Clone + PartialEq,
388 388 {
389 389 // (key, value) pairs that would need to be inserted in either map
390 390 // in order to turn it into the union.
391 391 //
392 392 // TODO: if/when https://github.com/bodil/im-rs/pull/168 is accepted,
393 393 // change these from `Vec<(K, V)>` to `Vec<(&K, Cow<V>)>`
394 394 // with `left_updates` only borrowing from `right` and `right_updates` from
395 395 // `left`, and with `Cow::Owned` used for `MergeResult::New`.
396 396 //
397 397 // This would allow moving all `.clone()` calls to after we’ve decided
398 398 // which of `right_updates` or `left_updates` to use
399 399 // (value ones becoming `Cow::into_owned`),
400 400 // and avoid making clones we don’t end up using.
401 401 let mut left_updates = Vec::new();
402 402 let mut right_updates = Vec::new();
403 403
404 404 for difference in left.diff(&right) {
405 405 match difference {
406 406 DiffItem::Add(key, value) => {
407 407 left_updates.push((key.clone(), value.clone()))
408 408 }
409 409 DiffItem::Remove(key, value) => {
410 410 right_updates.push((key.clone(), value.clone()))
411 411 }
412 412 DiffItem::Update {
413 413 old: (key, left_value),
414 414 new: (_, right_value),
415 415 } => match merge(key, left_value, right_value) {
416 416 MergeResult::Left => {
417 417 right_updates.push((key.clone(), left_value.clone()))
418 418 }
419 419 MergeResult::Right => {
420 420 left_updates.push((key.clone(), right_value.clone()))
421 421 }
422 422 MergeResult::New(new_value) => {
423 423 left_updates.push((key.clone(), new_value.clone()));
424 424 right_updates.push((key.clone(), new_value))
425 425 }
426 426 },
427 427 }
428 428 }
429 429 if left_updates.len() < right_updates.len() {
430 430 for (key, value) in left_updates {
431 431 left.insert(key, value);
432 432 }
433 433 left
434 434 } else {
435 435 for (key, value) in right_updates {
436 436 right.insert(key, value);
437 437 }
438 438 right
439 439 }
440 440 }
441 441
442 442 /// Join items of the iterable with the given separator, similar to Python’s
443 443 /// `separator.join(iter)`.
444 444 ///
445 445 /// Formatting the return value consumes the iterator.
446 446 /// Formatting it again will produce an empty string.
447 447 pub fn join_display(
448 448 iter: impl IntoIterator<Item = impl fmt::Display>,
449 449 separator: impl fmt::Display,
450 450 ) -> impl fmt::Display {
451 451 JoinDisplay {
452 452 iter: Cell::new(Some(iter.into_iter())),
453 453 separator,
454 454 }
455 455 }
456 456
457 457 struct JoinDisplay<I, S> {
458 458 iter: Cell<Option<I>>,
459 459 separator: S,
460 460 }
461 461
462 462 impl<I, T, S> fmt::Display for JoinDisplay<I, S>
463 463 where
464 464 I: Iterator<Item = T>,
465 465 T: fmt::Display,
466 466 S: fmt::Display,
467 467 {
468 468 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
469 469 if let Some(mut iter) = self.iter.take() {
470 470 if let Some(first) = iter.next() {
471 471 first.fmt(f)?;
472 472 }
473 473 for value in iter {
474 474 self.separator.fmt(f)?;
475 475 value.fmt(f)?;
476 476 }
477 477 }
478 478 Ok(())
479 479 }
480 480 }
481 481
482 482 /// Like `Iterator::filter_map`, but over a fallible iterator of `Result`s.
483 483 ///
484 484 /// The callback is only called for incoming `Ok` values. Errors are passed
485 485 /// through as-is. In order to let it use the `?` operator the callback is
486 486 /// expected to return a `Result` of `Option`, instead of an `Option` of
487 487 /// `Result`.
488 488 pub fn filter_map_results<'a, I, F, A, B, E>(
489 489 iter: I,
490 490 f: F,
491 491 ) -> impl Iterator<Item = Result<B, E>> + 'a
492 492 where
493 493 I: Iterator<Item = Result<A, E>> + 'a,
494 494 F: Fn(A) -> Result<Option<B>, E> + 'a,
495 495 {
496 496 iter.filter_map(move |result| match result {
497 497 Ok(node) => f(node).transpose(),
498 498 Err(e) => Some(Err(e)),
499 499 })
500 500 }
501 501
502 502 /// Force the global rayon threadpool to not exceed 16 concurrent threads
503 503 /// unless the user has specified a value.
504 504 /// This is a stop-gap measure until we figure out why using more than 16
505 505 /// threads makes `status` slower for each additional thread.
506 506 ///
507 507 /// TODO find the underlying cause and fix it, then remove this.
508 508 ///
509 509 /// # Errors
510 510 ///
511 511 /// Returns an error if the global threadpool has already been initialized if
512 512 /// we try to initialize it.
513 513 pub fn cap_default_rayon_threads() -> Result<(), rayon::ThreadPoolBuildError> {
514 514 const THREAD_CAP: usize = 16;
515 515
516 516 if std::env::var("RAYON_NUM_THREADS").is_err() {
517 517 let available_parallelism = std::thread::available_parallelism()
518 518 .map(usize::from)
519 519 .unwrap_or(1);
520 520 let new_thread_count = THREAD_CAP.min(available_parallelism);
521 521 let res = rayon::ThreadPoolBuilder::new()
522 522 .num_threads(new_thread_count)
523 523 .build_global();
524 524 if res.is_ok() {
525 525 log::trace!(
526 526 "Capped the rayon threadpool to {new_thread_count} threads",
527 527 );
528 528 }
529 529 return res;
530 530 }
531 531 Ok(())
532 532 }
@@ -1,812 +1,812 b''
1 1 // hg_path.rs
2 2 //
3 3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 use crate::utils::SliceExt;
9 9 use std::borrow::Borrow;
10 10 use std::borrow::Cow;
11 11 use std::ffi::{OsStr, OsString};
12 12 use std::fmt;
13 13 use std::ops::Deref;
14 14 use std::path::{Path, PathBuf};
15 15
16 16 #[derive(Debug, Eq, PartialEq)]
17 17 pub enum HgPathError {
18 18 /// Bytes from the invalid `HgPath`
19 19 LeadingSlash(Vec<u8>),
20 20 ConsecutiveSlashes {
21 21 bytes: Vec<u8>,
22 22 second_slash_index: usize,
23 23 },
24 24 ContainsNullByte {
25 25 bytes: Vec<u8>,
26 26 null_byte_index: usize,
27 27 },
28 28 /// Bytes
29 29 DecodeError(Vec<u8>),
30 30 /// The rest come from audit errors
31 31 EndsWithSlash(HgPathBuf),
32 32 ContainsIllegalComponent(HgPathBuf),
33 33 /// Path is inside the `.hg` folder
34 34 InsideDotHg(HgPathBuf),
35 35 IsInsideNestedRepo {
36 36 path: HgPathBuf,
37 37 nested_repo: HgPathBuf,
38 38 },
39 39 TraversesSymbolicLink {
40 40 path: HgPathBuf,
41 41 symlink: HgPathBuf,
42 42 },
43 43 NotFsCompliant(HgPathBuf),
44 44 /// `path` is the smallest invalid path
45 45 NotUnderRoot {
46 46 path: PathBuf,
47 47 root: PathBuf,
48 48 },
49 49 }
50 50
51 51 impl fmt::Display for HgPathError {
52 52 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
53 53 match self {
54 54 HgPathError::LeadingSlash(bytes) => {
55 55 write!(f, "Invalid HgPath '{:?}': has a leading slash.", bytes)
56 56 }
57 57 HgPathError::ConsecutiveSlashes {
58 58 bytes,
59 59 second_slash_index: pos,
60 60 } => write!(
61 61 f,
62 62 "Invalid HgPath '{:?}': consecutive slashes at pos {}.",
63 63 bytes, pos
64 64 ),
65 65 HgPathError::ContainsNullByte {
66 66 bytes,
67 67 null_byte_index: pos,
68 68 } => write!(
69 69 f,
70 70 "Invalid HgPath '{:?}': contains null byte at pos {}.",
71 71 bytes, pos
72 72 ),
73 73 HgPathError::DecodeError(bytes) => write!(
74 74 f,
75 75 "Invalid HgPath '{:?}': could not be decoded.",
76 76 bytes
77 77 ),
78 78 HgPathError::EndsWithSlash(path) => {
79 79 write!(f, "Audit failed for '{}': ends with a slash.", path)
80 80 }
81 81 HgPathError::ContainsIllegalComponent(path) => write!(
82 82 f,
83 83 "Audit failed for '{}': contains an illegal component.",
84 84 path
85 85 ),
86 86 HgPathError::InsideDotHg(path) => write!(
87 87 f,
88 88 "Audit failed for '{}': is inside the '.hg' folder.",
89 89 path
90 90 ),
91 91 HgPathError::IsInsideNestedRepo {
92 92 path,
93 93 nested_repo: nested,
94 94 } => {
95 95 write!(f,
96 96 "Audit failed for '{}': is inside a nested repository '{}'.",
97 97 path, nested
98 98 )
99 99 }
100 100 HgPathError::TraversesSymbolicLink { path, symlink } => write!(
101 101 f,
102 102 "Audit failed for '{}': traverses symbolic link '{}'.",
103 103 path, symlink
104 104 ),
105 105 HgPathError::NotFsCompliant(path) => write!(
106 106 f,
107 107 "Audit failed for '{}': cannot be turned into a \
108 108 filesystem path.",
109 109 path
110 110 ),
111 111 HgPathError::NotUnderRoot { path, root } => write!(
112 112 f,
113 113 "Audit failed for '{}': not under root {}.",
114 114 path.display(),
115 115 root.display()
116 116 ),
117 117 }
118 118 }
119 119 }
120 120
121 121 impl From<HgPathError> for std::io::Error {
122 122 fn from(e: HgPathError) -> Self {
123 123 std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string())
124 124 }
125 125 }
126 126
127 127 /// This is a repository-relative path (or canonical path):
128 128 /// - no null characters
129 129 /// - `/` separates directories
130 130 /// - no consecutive slashes
131 131 /// - no leading slash,
132 132 /// - no `.` nor `..` of special meaning
133 133 /// - stored in repository and shared across platforms
134 134 ///
135 135 /// Note: there is no guarantee of any `HgPath` being well-formed at any point
136 136 /// in its lifetime for performance reasons and to ease ergonomics. It is
137 137 /// however checked using the `check_state` method before any file-system
138 138 /// operation.
139 139 ///
140 140 /// This allows us to be encoding-transparent as much as possible, until really
141 141 /// needed; `HgPath` can be transformed into a platform-specific path (`OsStr`
142 142 /// or `Path`) whenever more complex operations are needed:
143 143 /// On Unix, it's just byte-to-byte conversion. On Windows, it has to be
144 144 /// decoded from MBCS to WTF-8. If WindowsUTF8Plan is implemented, the source
145 145 /// character encoding will be determined on a per-repository basis.
146 146 #[derive(Eq, Ord, PartialEq, PartialOrd, Hash)]
147 147 #[repr(transparent)]
148 148 pub struct HgPath {
149 149 inner: [u8],
150 150 }
151 151
152 152 impl HgPath {
153 153 pub fn new<S: AsRef<[u8]> + ?Sized>(s: &S) -> &Self {
154 154 unsafe { &*(s.as_ref() as *const [u8] as *const Self) }
155 155 }
156 156 pub fn is_empty(&self) -> bool {
157 157 self.inner.is_empty()
158 158 }
159 159 pub fn len(&self) -> usize {
160 160 self.inner.len()
161 161 }
162 162 fn to_hg_path_buf(&self) -> HgPathBuf {
163 163 HgPathBuf {
164 164 inner: self.inner.to_owned(),
165 165 }
166 166 }
167 167 pub fn bytes(&self) -> std::slice::Iter<u8> {
168 168 self.inner.iter()
169 169 }
170 170 pub fn to_ascii_uppercase(&self) -> HgPathBuf {
171 171 HgPathBuf::from(self.inner.to_ascii_uppercase())
172 172 }
173 173 pub fn to_ascii_lowercase(&self) -> HgPathBuf {
174 174 HgPathBuf::from(self.inner.to_ascii_lowercase())
175 175 }
176 176 pub fn as_bytes(&self) -> &[u8] {
177 177 &self.inner
178 178 }
179 179 pub fn contains(&self, other: u8) -> bool {
180 180 self.inner.contains(&other)
181 181 }
182 182 pub fn starts_with(&self, needle: impl AsRef<Self>) -> bool {
183 183 self.inner.starts_with(needle.as_ref().as_bytes())
184 184 }
185 185 pub fn trim_trailing_slash(&self) -> &Self {
186 186 Self::new(if self.inner.last() == Some(&b'/') {
187 187 &self.inner[..self.inner.len() - 1]
188 188 } else {
189 189 &self.inner[..]
190 190 })
191 191 }
192 192 /// Returns a tuple of slices `(base, filename)` resulting from the split
193 193 /// at the rightmost `/`, if any.
194 194 ///
195 195 /// # Examples:
196 196 ///
197 197 /// ```
198 198 /// use hg::utils::hg_path::HgPath;
199 199 ///
200 200 /// let path = HgPath::new(b"cool/hg/path").split_filename();
201 201 /// assert_eq!(path, (HgPath::new(b"cool/hg"), HgPath::new(b"path")));
202 202 ///
203 203 /// let path = HgPath::new(b"pathwithoutsep").split_filename();
204 204 /// assert_eq!(path, (HgPath::new(b""), HgPath::new(b"pathwithoutsep")));
205 205 /// ```
206 206 pub fn split_filename(&self) -> (&Self, &Self) {
207 207 match &self.inner.iter().rposition(|c| *c == b'/') {
208 208 None => (HgPath::new(""), self),
209 209 Some(size) => (
210 210 HgPath::new(&self.inner[..*size]),
211 211 HgPath::new(&self.inner[*size + 1..]),
212 212 ),
213 213 }
214 214 }
215 215
216 216 pub fn join(&self, path: &HgPath) -> HgPathBuf {
217 217 let mut buf = self.to_owned();
218 218 buf.push(path);
219 219 buf
220 220 }
221 221
222 222 pub fn components(&self) -> impl Iterator<Item = &HgPath> {
223 223 self.inner.split(|&byte| byte == b'/').map(HgPath::new)
224 224 }
225 225
226 226 /// Returns the first (that is "root-most") slash-separated component of
227 227 /// the path, and the rest after the first slash if there is one.
228 228 pub fn split_first_component(&self) -> (&HgPath, Option<&HgPath>) {
229 229 match self.inner.split_2(b'/') {
230 230 Some((a, b)) => (HgPath::new(a), Some(HgPath::new(b))),
231 231 None => (self, None),
232 232 }
233 233 }
234 234
235 235 pub fn parent(&self) -> &Self {
236 236 let inner = self.as_bytes();
237 237 HgPath::new(match inner.iter().rposition(|b| *b == b'/') {
238 238 Some(pos) => &inner[..pos],
239 239 None => &[],
240 240 })
241 241 }
242 242 /// Given a base directory, returns the slice of `self` relative to the
243 243 /// base directory. If `base` is not a directory (does not end with a
244 244 /// `b'/'`), returns `None`.
245 245 pub fn relative_to(&self, base: impl AsRef<Self>) -> Option<&Self> {
246 246 let base = base.as_ref();
247 247 if base.is_empty() {
248 248 return Some(self);
249 249 }
250 250 let is_dir = base.as_bytes().ends_with(b"/");
251 251 if is_dir && self.starts_with(base) {
252 252 Some(Self::new(&self.inner[base.len()..]))
253 253 } else {
254 254 None
255 255 }
256 256 }
257 257
258 258 #[cfg(windows)]
259 259 /// Copied from the Python stdlib's `os.path.splitdrive` implementation.
260 260 ///
261 261 /// Split a pathname into drive/UNC sharepoint and relative path
262 262 /// specifiers. Returns a 2-tuple (drive_or_unc, path); either part may
263 263 /// be empty.
264 264 ///
265 265 /// If you assign
266 266 /// result = split_drive(p)
267 267 /// It is always true that:
268 268 /// result[0] + result[1] == p
269 269 ///
270 270 /// If the path contained a drive letter, drive_or_unc will contain
271 271 /// everything up to and including the colon.
272 272 /// e.g. split_drive("c:/dir") returns ("c:", "/dir")
273 273 ///
274 274 /// If the path contained a UNC path, the drive_or_unc will contain the
275 275 /// host name and share up to but not including the fourth directory
276 276 /// separator character.
277 277 /// e.g. split_drive("//host/computer/dir") returns ("//host/computer",
278 278 /// "/dir")
279 279 ///
280 280 /// Paths cannot contain both a drive letter and a UNC path.
281 281 pub fn split_drive<'a>(&self) -> (&HgPath, &HgPath) {
282 282 let bytes = self.as_bytes();
283 283 let is_sep = |b| std::path::is_separator(b as char);
284 284
285 285 if self.len() < 2 {
286 286 (HgPath::new(b""), &self)
287 287 } else if is_sep(bytes[0])
288 288 && is_sep(bytes[1])
289 289 && (self.len() == 2 || !is_sep(bytes[2]))
290 290 {
291 291 // Is a UNC path:
292 292 // vvvvvvvvvvvvvvvvvvvv drive letter or UNC path
293 293 // \\machine\mountpoint\directory\etc\...
294 294 // directory ^^^^^^^^^^^^^^^
295 295
296 296 let machine_end_index = bytes[2..].iter().position(|b| is_sep(*b));
297 297 let mountpoint_start_index = if let Some(i) = machine_end_index {
298 298 i + 2
299 299 } else {
300 300 return (HgPath::new(b""), &self);
301 301 };
302 302
303 303 match bytes[mountpoint_start_index + 1..]
304 304 .iter()
305 305 .position(|b| is_sep(*b))
306 306 {
307 307 // A UNC path can't have two slashes in a row
308 308 // (after the initial two)
309 309 Some(0) => (HgPath::new(b""), &self),
310 310 Some(i) => {
311 311 let (a, b) =
312 312 bytes.split_at(mountpoint_start_index + 1 + i);
313 313 (HgPath::new(a), HgPath::new(b))
314 314 }
315 315 None => (&self, HgPath::new(b"")),
316 316 }
317 317 } else if bytes[1] == b':' {
318 318 // Drive path c:\directory
319 319 let (a, b) = bytes.split_at(2);
320 320 (HgPath::new(a), HgPath::new(b))
321 321 } else {
322 322 (HgPath::new(b""), &self)
323 323 }
324 324 }
325 325
326 326 #[cfg(unix)]
327 327 /// Split a pathname into drive and path. On Posix, drive is always empty.
328 328 pub fn split_drive(&self) -> (&HgPath, &HgPath) {
329 329 (HgPath::new(b""), self)
330 330 }
331 331
332 332 /// Checks for errors in the path, short-circuiting at the first one.
333 333 /// This generates fine-grained errors useful for debugging.
334 334 /// To simply check if the path is valid during tests, use `is_valid`.
335 335 pub fn check_state(&self) -> Result<(), HgPathError> {
336 336 if self.is_empty() {
337 337 return Ok(());
338 338 }
339 339 let bytes = self.as_bytes();
340 340 let mut previous_byte = None;
341 341
342 342 if bytes[0] == b'/' {
343 343 return Err(HgPathError::LeadingSlash(bytes.to_vec()));
344 344 }
345 345 for (index, byte) in bytes.iter().enumerate() {
346 346 match byte {
347 347 0 => {
348 348 return Err(HgPathError::ContainsNullByte {
349 349 bytes: bytes.to_vec(),
350 350 null_byte_index: index,
351 351 })
352 352 }
353 353 b'/' => {
354 354 if previous_byte.is_some() && previous_byte == Some(b'/') {
355 355 return Err(HgPathError::ConsecutiveSlashes {
356 356 bytes: bytes.to_vec(),
357 357 second_slash_index: index,
358 358 });
359 359 }
360 360 }
361 361 _ => (),
362 362 };
363 363 previous_byte = Some(*byte);
364 364 }
365 365 Ok(())
366 366 }
367 367
368 368 #[cfg(test)]
369 369 /// Only usable during tests to force developers to handle invalid states
370 370 fn is_valid(&self) -> bool {
371 371 self.check_state().is_ok()
372 372 }
373 373 }
374 374
375 375 impl fmt::Debug for HgPath {
376 376 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
377 377 write!(f, "HgPath({:?})", String::from_utf8_lossy(&self.inner))
378 378 }
379 379 }
380 380
381 381 impl fmt::Display for HgPath {
382 382 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
383 383 write!(f, "{}", String::from_utf8_lossy(&self.inner))
384 384 }
385 385 }
386 386
387 387 #[derive(
388 388 Default, Eq, Ord, Clone, PartialEq, PartialOrd, Hash, derive_more::From,
389 389 )]
390 390 pub struct HgPathBuf {
391 391 inner: Vec<u8>,
392 392 }
393 393
394 394 impl HgPathBuf {
395 395 pub fn new() -> Self {
396 396 Default::default()
397 397 }
398 398
399 399 pub fn push<T: ?Sized + AsRef<HgPath>>(&mut self, other: &T) {
400 400 if !self.inner.is_empty() && self.inner.last() != Some(&b'/') {
401 401 self.inner.push(b'/');
402 402 }
403 403 self.inner.extend(other.as_ref().bytes())
404 404 }
405 405
406 406 pub fn push_byte(&mut self, byte: u8) {
407 407 self.inner.push(byte);
408 408 }
409 409 pub fn from_bytes(s: &[u8]) -> HgPathBuf {
410 410 HgPath::new(s).to_owned()
411 411 }
412 412 pub fn into_vec(self) -> Vec<u8> {
413 413 self.inner
414 414 }
415 415 }
416 416
417 417 impl fmt::Debug for HgPathBuf {
418 418 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
419 419 write!(f, "HgPathBuf({:?})", String::from_utf8_lossy(&self.inner))
420 420 }
421 421 }
422 422
423 423 impl fmt::Display for HgPathBuf {
424 424 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
425 425 write!(f, "{}", String::from_utf8_lossy(&self.inner))
426 426 }
427 427 }
428 428
429 429 impl Deref for HgPathBuf {
430 430 type Target = HgPath;
431 431
432 432 #[inline]
433 433 fn deref(&self) -> &HgPath {
434 434 HgPath::new(&self.inner)
435 435 }
436 436 }
437 437
438 438 impl<T: ?Sized + AsRef<HgPath>> From<&T> for HgPathBuf {
439 439 fn from(s: &T) -> HgPathBuf {
440 440 s.as_ref().to_owned()
441 441 }
442 442 }
443 443
444 444 impl From<HgPathBuf> for Vec<u8> {
445 445 fn from(val: HgPathBuf) -> Self {
446 446 val.inner
447 447 }
448 448 }
449 449
450 450 impl Borrow<HgPath> for HgPathBuf {
451 451 fn borrow(&self) -> &HgPath {
452 452 HgPath::new(self.as_bytes())
453 453 }
454 454 }
455 455
456 456 impl ToOwned for HgPath {
457 457 type Owned = HgPathBuf;
458 458
459 459 fn to_owned(&self) -> HgPathBuf {
460 460 self.to_hg_path_buf()
461 461 }
462 462 }
463 463
464 464 impl AsRef<HgPath> for HgPath {
465 465 fn as_ref(&self) -> &HgPath {
466 466 self
467 467 }
468 468 }
469 469
470 470 impl AsRef<HgPath> for HgPathBuf {
471 471 fn as_ref(&self) -> &HgPath {
472 472 self
473 473 }
474 474 }
475 475
476 476 impl Extend<u8> for HgPathBuf {
477 477 fn extend<T: IntoIterator<Item = u8>>(&mut self, iter: T) {
478 478 self.inner.extend(iter);
479 479 }
480 480 }
481 481
482 /// TODO: Once https://www.mercurial-scm.org/wiki/WindowsUTF8Plan is
482 /// TODO: Once <https://www.mercurial-scm.org/wiki/WindowsUTF8Plan> is
483 483 /// implemented, these conversion utils will have to work differently depending
484 484 /// on the repository encoding: either `UTF-8` or `MBCS`.
485 485
486 486 pub fn hg_path_to_os_string<P: AsRef<HgPath>>(
487 487 hg_path: P,
488 488 ) -> Result<OsString, HgPathError> {
489 489 hg_path.as_ref().check_state()?;
490 490 let os_str;
491 491 #[cfg(unix)]
492 492 {
493 493 use std::os::unix::ffi::OsStrExt;
494 494 os_str = std::ffi::OsStr::from_bytes(hg_path.as_ref().as_bytes());
495 495 }
496 496 // TODO Handle other platforms
497 497 // TODO: convert from WTF8 to Windows MBCS (ANSI encoding).
498 498 Ok(os_str.to_os_string())
499 499 }
500 500
501 501 pub fn hg_path_to_path_buf<P: AsRef<HgPath>>(
502 502 hg_path: P,
503 503 ) -> Result<PathBuf, HgPathError> {
504 504 Ok(Path::new(&hg_path_to_os_string(hg_path)?).to_path_buf())
505 505 }
506 506
507 507 pub fn os_string_to_hg_path_buf<S: AsRef<OsStr>>(
508 508 os_string: S,
509 509 ) -> Result<HgPathBuf, HgPathError> {
510 510 let buf;
511 511 #[cfg(unix)]
512 512 {
513 513 use std::os::unix::ffi::OsStrExt;
514 514 buf = HgPathBuf::from_bytes(os_string.as_ref().as_bytes());
515 515 }
516 516 // TODO Handle other platforms
517 517 // TODO: convert from WTF8 to Windows MBCS (ANSI encoding).
518 518
519 519 buf.check_state()?;
520 520 Ok(buf)
521 521 }
522 522
523 523 pub fn path_to_hg_path_buf<P: AsRef<Path>>(
524 524 path: P,
525 525 ) -> Result<HgPathBuf, HgPathError> {
526 526 let buf;
527 527 let os_str = path.as_ref().as_os_str();
528 528 #[cfg(unix)]
529 529 {
530 530 use std::os::unix::ffi::OsStrExt;
531 531 buf = HgPathBuf::from_bytes(os_str.as_bytes());
532 532 }
533 533 // TODO Handle other platforms
534 534 // TODO: convert from WTF8 to Windows MBCS (ANSI encoding).
535 535
536 536 buf.check_state()?;
537 537 Ok(buf)
538 538 }
539 539
540 540 impl TryFrom<PathBuf> for HgPathBuf {
541 541 type Error = HgPathError;
542 542 fn try_from(path: PathBuf) -> Result<Self, Self::Error> {
543 543 path_to_hg_path_buf(path)
544 544 }
545 545 }
546 546
547 547 impl From<HgPathBuf> for Cow<'_, HgPath> {
548 548 fn from(path: HgPathBuf) -> Self {
549 549 Cow::Owned(path)
550 550 }
551 551 }
552 552
553 553 impl<'a> From<&'a HgPath> for Cow<'a, HgPath> {
554 554 fn from(path: &'a HgPath) -> Self {
555 555 Cow::Borrowed(path)
556 556 }
557 557 }
558 558
559 559 impl<'a> From<&'a HgPathBuf> for Cow<'a, HgPath> {
560 560 fn from(path: &'a HgPathBuf) -> Self {
561 561 Cow::Borrowed(&**path)
562 562 }
563 563 }
564 564
565 565 #[cfg(test)]
566 566 mod tests {
567 567 use super::*;
568 568 use pretty_assertions::assert_eq;
569 569
570 570 #[test]
571 571 fn test_path_states() {
572 572 assert_eq!(
573 573 Err(HgPathError::LeadingSlash(b"/".to_vec())),
574 574 HgPath::new(b"/").check_state()
575 575 );
576 576 assert_eq!(
577 577 Err(HgPathError::ConsecutiveSlashes {
578 578 bytes: b"a/b//c".to_vec(),
579 579 second_slash_index: 4
580 580 }),
581 581 HgPath::new(b"a/b//c").check_state()
582 582 );
583 583 assert_eq!(
584 584 Err(HgPathError::ContainsNullByte {
585 585 bytes: b"a/b/\0c".to_vec(),
586 586 null_byte_index: 4
587 587 }),
588 588 HgPath::new(b"a/b/\0c").check_state()
589 589 );
590 590 // TODO test HgPathError::DecodeError for the Windows implementation.
591 591 assert_eq!(true, HgPath::new(b"").is_valid());
592 592 assert_eq!(true, HgPath::new(b"a/b/c").is_valid());
593 593 // Backslashes in paths are not significant, but allowed
594 594 assert_eq!(true, HgPath::new(br"a\b/c").is_valid());
595 595 // Dots in paths are not significant, but allowed
596 596 assert_eq!(true, HgPath::new(b"a/b/../c/").is_valid());
597 597 assert_eq!(true, HgPath::new(b"./a/b/../c/").is_valid());
598 598 }
599 599
600 600 #[test]
601 601 fn test_iter() {
602 602 let path = HgPath::new(b"a");
603 603 let mut iter = path.bytes();
604 604 assert_eq!(Some(&b'a'), iter.next());
605 605 assert_eq!(None, iter.next_back());
606 606 assert_eq!(None, iter.next());
607 607
608 608 let path = HgPath::new(b"a");
609 609 let mut iter = path.bytes();
610 610 assert_eq!(Some(&b'a'), iter.next_back());
611 611 assert_eq!(None, iter.next_back());
612 612 assert_eq!(None, iter.next());
613 613
614 614 let path = HgPath::new(b"abc");
615 615 let mut iter = path.bytes();
616 616 assert_eq!(Some(&b'a'), iter.next());
617 617 assert_eq!(Some(&b'c'), iter.next_back());
618 618 assert_eq!(Some(&b'b'), iter.next_back());
619 619 assert_eq!(None, iter.next_back());
620 620 assert_eq!(None, iter.next());
621 621
622 622 let path = HgPath::new(b"abc");
623 623 let mut iter = path.bytes();
624 624 assert_eq!(Some(&b'a'), iter.next());
625 625 assert_eq!(Some(&b'b'), iter.next());
626 626 assert_eq!(Some(&b'c'), iter.next());
627 627 assert_eq!(None, iter.next_back());
628 628 assert_eq!(None, iter.next());
629 629
630 630 let path = HgPath::new(b"abc");
631 631 let iter = path.bytes();
632 632 let mut vec = Vec::new();
633 633 vec.extend(iter);
634 634 assert_eq!(vec![b'a', b'b', b'c'], vec);
635 635
636 636 let path = HgPath::new(b"abc");
637 637 let mut iter = path.bytes();
638 638 assert_eq!(Some(2), iter.rposition(|c| *c == b'c'));
639 639
640 640 let path = HgPath::new(b"abc");
641 641 let mut iter = path.bytes();
642 642 assert_eq!(None, iter.rposition(|c| *c == b'd'));
643 643 }
644 644
645 645 #[test]
646 646 fn test_join() {
647 647 let path = HgPathBuf::from_bytes(b"a").join(HgPath::new(b"b"));
648 648 assert_eq!(b"a/b", path.as_bytes());
649 649
650 650 let path = HgPathBuf::from_bytes(b"a/").join(HgPath::new(b"b/c"));
651 651 assert_eq!(b"a/b/c", path.as_bytes());
652 652
653 653 // No leading slash if empty before join
654 654 let path = HgPathBuf::new().join(HgPath::new(b"b/c"));
655 655 assert_eq!(b"b/c", path.as_bytes());
656 656
657 657 // The leading slash is an invalid representation of an `HgPath`, but
658 658 // it can happen. This creates another invalid representation of
659 659 // consecutive bytes.
660 660 // TODO What should be done in this case? Should we silently remove
661 661 // the extra slash? Should we change the signature to a problematic
662 662 // `Result<HgPathBuf, HgPathError>`, or should we just keep it so and
663 663 // let the error happen upon filesystem interaction?
664 664 let path = HgPathBuf::from_bytes(b"a/").join(HgPath::new(b"/b"));
665 665 assert_eq!(b"a//b", path.as_bytes());
666 666 let path = HgPathBuf::from_bytes(b"a").join(HgPath::new(b"/b"));
667 667 assert_eq!(b"a//b", path.as_bytes());
668 668 }
669 669
670 670 #[test]
671 671 fn test_relative_to() {
672 672 let path = HgPath::new(b"");
673 673 let base = HgPath::new(b"");
674 674 assert_eq!(Some(path), path.relative_to(base));
675 675
676 676 let path = HgPath::new(b"path");
677 677 let base = HgPath::new(b"");
678 678 assert_eq!(Some(path), path.relative_to(base));
679 679
680 680 let path = HgPath::new(b"a");
681 681 let base = HgPath::new(b"b");
682 682 assert_eq!(None, path.relative_to(base));
683 683
684 684 let path = HgPath::new(b"a/b");
685 685 let base = HgPath::new(b"a");
686 686 assert_eq!(None, path.relative_to(base));
687 687
688 688 let path = HgPath::new(b"a/b");
689 689 let base = HgPath::new(b"a/");
690 690 assert_eq!(Some(HgPath::new(b"b")), path.relative_to(base));
691 691
692 692 let path = HgPath::new(b"nested/path/to/b");
693 693 let base = HgPath::new(b"nested/path/");
694 694 assert_eq!(Some(HgPath::new(b"to/b")), path.relative_to(base));
695 695
696 696 let path = HgPath::new(b"ends/with/dir/");
697 697 let base = HgPath::new(b"ends/");
698 698 assert_eq!(Some(HgPath::new(b"with/dir/")), path.relative_to(base));
699 699 }
700 700
701 701 #[test]
702 702 #[cfg(unix)]
703 703 fn test_split_drive() {
704 704 // Taken from the Python stdlib's tests
705 705 assert_eq!(
706 706 HgPath::new(br"/foo/bar").split_drive(),
707 707 (HgPath::new(b""), HgPath::new(br"/foo/bar"))
708 708 );
709 709 assert_eq!(
710 710 HgPath::new(br"foo:bar").split_drive(),
711 711 (HgPath::new(b""), HgPath::new(br"foo:bar"))
712 712 );
713 713 assert_eq!(
714 714 HgPath::new(br":foo:bar").split_drive(),
715 715 (HgPath::new(b""), HgPath::new(br":foo:bar"))
716 716 );
717 717 // Also try NT paths; should not split them
718 718 assert_eq!(
719 719 HgPath::new(br"c:\foo\bar").split_drive(),
720 720 (HgPath::new(b""), HgPath::new(br"c:\foo\bar"))
721 721 );
722 722 assert_eq!(
723 723 HgPath::new(b"c:/foo/bar").split_drive(),
724 724 (HgPath::new(b""), HgPath::new(br"c:/foo/bar"))
725 725 );
726 726 assert_eq!(
727 727 HgPath::new(br"\\conky\mountpoint\foo\bar").split_drive(),
728 728 (
729 729 HgPath::new(b""),
730 730 HgPath::new(br"\\conky\mountpoint\foo\bar")
731 731 )
732 732 );
733 733 }
734 734
735 735 #[test]
736 736 #[cfg(windows)]
737 737 fn test_split_drive() {
738 738 assert_eq!(
739 739 HgPath::new(br"c:\foo\bar").split_drive(),
740 740 (HgPath::new(br"c:"), HgPath::new(br"\foo\bar"))
741 741 );
742 742 assert_eq!(
743 743 HgPath::new(b"c:/foo/bar").split_drive(),
744 744 (HgPath::new(br"c:"), HgPath::new(br"/foo/bar"))
745 745 );
746 746 assert_eq!(
747 747 HgPath::new(br"\\conky\mountpoint\foo\bar").split_drive(),
748 748 (
749 749 HgPath::new(br"\\conky\mountpoint"),
750 750 HgPath::new(br"\foo\bar")
751 751 )
752 752 );
753 753 assert_eq!(
754 754 HgPath::new(br"//conky/mountpoint/foo/bar").split_drive(),
755 755 (
756 756 HgPath::new(br"//conky/mountpoint"),
757 757 HgPath::new(br"/foo/bar")
758 758 )
759 759 );
760 760 assert_eq!(
761 761 HgPath::new(br"\\\conky\mountpoint\foo\bar").split_drive(),
762 762 (
763 763 HgPath::new(br""),
764 764 HgPath::new(br"\\\conky\mountpoint\foo\bar")
765 765 )
766 766 );
767 767 assert_eq!(
768 768 HgPath::new(br"///conky/mountpoint/foo/bar").split_drive(),
769 769 (
770 770 HgPath::new(br""),
771 771 HgPath::new(br"///conky/mountpoint/foo/bar")
772 772 )
773 773 );
774 774 assert_eq!(
775 775 HgPath::new(br"\\conky\\mountpoint\foo\bar").split_drive(),
776 776 (
777 777 HgPath::new(br""),
778 778 HgPath::new(br"\\conky\\mountpoint\foo\bar")
779 779 )
780 780 );
781 781 assert_eq!(
782 782 HgPath::new(br"//conky//mountpoint/foo/bar").split_drive(),
783 783 (
784 784 HgPath::new(br""),
785 785 HgPath::new(br"//conky//mountpoint/foo/bar")
786 786 )
787 787 );
788 788 // UNC part containing U+0130
789 789 assert_eq!(
790 790 HgPath::new(b"//conky/MOUNTPO\xc4\xb0NT/foo/bar").split_drive(),
791 791 (
792 792 HgPath::new(b"//conky/MOUNTPO\xc4\xb0NT"),
793 793 HgPath::new(br"/foo/bar")
794 794 )
795 795 );
796 796 }
797 797
798 798 #[test]
799 799 fn test_parent() {
800 800 let path = HgPath::new(b"");
801 801 assert_eq!(path.parent(), path);
802 802
803 803 let path = HgPath::new(b"a");
804 804 assert_eq!(path.parent(), HgPath::new(b""));
805 805
806 806 let path = HgPath::new(b"a/b");
807 807 assert_eq!(path.parent(), HgPath::new(b"a"));
808 808
809 809 let path = HgPath::new(b"a/other/b");
810 810 assert_eq!(path.parent(), HgPath::new(b"a/other"));
811 811 }
812 812 }
General Comments 0
You need to be logged in to leave comments. Login now