Show More
@@ -1,111 +1,114 b'' | |||
|
1 | 1 | use std::fmt; |
|
2 | 2 | |
|
3 | 3 | /// Common error cases that can happen in many different APIs |
|
4 | 4 | #[derive(Debug)] |
|
5 | 5 | pub enum HgError { |
|
6 | 6 | IoError { |
|
7 | 7 | error: std::io::Error, |
|
8 | 8 | context: IoErrorContext, |
|
9 | 9 | }, |
|
10 | 10 | |
|
11 | 11 | /// A file under `.hg/` normally only written by Mercurial |
|
12 | 12 | /// |
|
13 | 13 | /// The given string is a short explanation for users, not intended to be |
|
14 | 14 | /// machine-readable. |
|
15 | 15 | CorruptedRepository(String), |
|
16 | 16 | |
|
17 | 17 | /// The respository or requested operation involves a feature not |
|
18 | 18 | /// supported by the Rust implementation. Falling back to the Python |
|
19 | 19 | /// implementation may or may not work. |
|
20 | 20 | /// |
|
21 | 21 | /// The given string is a short explanation for users, not intended to be |
|
22 | 22 | /// machine-readable. |
|
23 | 23 | UnsupportedFeature(String), |
|
24 | 24 | } |
|
25 | 25 | |
|
26 | 26 | /// Details about where an I/O error happened |
|
27 | 27 | #[derive(Debug, derive_more::From)] |
|
28 | 28 | pub enum IoErrorContext { |
|
29 | 29 | /// A filesystem operation returned `std::io::Error` |
|
30 | 30 | #[from] |
|
31 | 31 | File(std::path::PathBuf), |
|
32 | 32 | /// `std::env::current_dir` returned `std::io::Error` |
|
33 | 33 | CurrentDir, |
|
34 | 34 | } |
|
35 | 35 | |
|
36 | 36 | impl HgError { |
|
37 | 37 | pub fn corrupted(explanation: impl Into<String>) -> Self { |
|
38 | // TODO: capture a backtrace here and keep it in the error value | |
|
39 | // to aid debugging? | |
|
40 | // https://doc.rust-lang.org/std/backtrace/struct.Backtrace.html | |
|
38 | 41 | HgError::CorruptedRepository(explanation.into()) |
|
39 | 42 | } |
|
40 | 43 | } |
|
41 | 44 | |
|
42 | 45 | // TODO: use `DisplayBytes` instead to show non-Unicode filenames losslessly? |
|
43 | 46 | impl fmt::Display for HgError { |
|
44 | 47 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
|
45 | 48 | match self { |
|
46 | 49 | HgError::IoError { error, context } => { |
|
47 | 50 | write!(f, "{}: {}", error, context) |
|
48 | 51 | } |
|
49 | 52 | HgError::CorruptedRepository(explanation) => { |
|
50 | 53 | write!(f, "corrupted repository: {}", explanation) |
|
51 | 54 | } |
|
52 | 55 | HgError::UnsupportedFeature(explanation) => { |
|
53 | 56 | write!(f, "unsupported feature: {}", explanation) |
|
54 | 57 | } |
|
55 | 58 | } |
|
56 | 59 | } |
|
57 | 60 | } |
|
58 | 61 | |
|
59 | 62 | // TODO: use `DisplayBytes` instead to show non-Unicode filenames losslessly? |
|
60 | 63 | impl fmt::Display for IoErrorContext { |
|
61 | 64 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
|
62 | 65 | match self { |
|
63 | 66 | IoErrorContext::File(path) => path.display().fmt(f), |
|
64 | 67 | IoErrorContext::CurrentDir => f.write_str("current directory"), |
|
65 | 68 | } |
|
66 | 69 | } |
|
67 | 70 | } |
|
68 | 71 | |
|
69 | 72 | pub trait IoResultExt<T> { |
|
70 | 73 | /// Annotate a possible I/O error as related to a file at the given path. |
|
71 | 74 | /// |
|
72 | 75 | /// This allows printing something like βFile not found: example.txtβ |
|
73 | 76 | /// instead of just βFile not foundβ. |
|
74 | 77 | /// |
|
75 | 78 | /// Converts a `Result` with `std::io::Error` into one with `HgError`. |
|
76 | 79 | fn for_file(self, path: &std::path::Path) -> Result<T, HgError>; |
|
77 | 80 | } |
|
78 | 81 | |
|
79 | 82 | impl<T> IoResultExt<T> for std::io::Result<T> { |
|
80 | 83 | fn for_file(self, path: &std::path::Path) -> Result<T, HgError> { |
|
81 | 84 | self.map_err(|error| HgError::IoError { |
|
82 | 85 | error, |
|
83 | 86 | context: IoErrorContext::File(path.to_owned()), |
|
84 | 87 | }) |
|
85 | 88 | } |
|
86 | 89 | } |
|
87 | 90 | |
|
88 | 91 | pub trait HgResultExt<T> { |
|
89 | 92 | /// Handle missing files separately from other I/O error cases. |
|
90 | 93 | /// |
|
91 | 94 | /// Wraps the `Ok` type in an `Option`: |
|
92 | 95 | /// |
|
93 | 96 | /// * `Ok(x)` becomes `Ok(Some(x))` |
|
94 | 97 | /// * An I/O "not found" error becomes `Ok(None)` |
|
95 | 98 | /// * Other errors are unchanged |
|
96 | 99 | fn io_not_found_as_none(self) -> Result<Option<T>, HgError>; |
|
97 | 100 | } |
|
98 | 101 | |
|
99 | 102 | impl<T> HgResultExt<T> for Result<T, HgError> { |
|
100 | 103 | fn io_not_found_as_none(self) -> Result<Option<T>, HgError> { |
|
101 | 104 | match self { |
|
102 | 105 | Ok(x) => Ok(Some(x)), |
|
103 | 106 | Err(HgError::IoError { error, .. }) |
|
104 | 107 | if error.kind() == std::io::ErrorKind::NotFound => |
|
105 | 108 | { |
|
106 | 109 | Ok(None) |
|
107 | 110 | } |
|
108 | 111 | Err(other_error) => Err(other_error), |
|
109 | 112 | } |
|
110 | 113 | } |
|
111 | 114 | } |
@@ -1,115 +1,116 b'' | |||
|
1 | 1 | // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net> |
|
2 | 2 | // and Mercurial contributors |
|
3 | 3 | // |
|
4 | 4 | // This software may be used and distributed according to the terms of the |
|
5 | 5 | // GNU General Public License version 2 or any later version. |
|
6 | ||
|
6 | 7 | mod ancestors; |
|
7 | 8 | pub mod dagops; |
|
8 | 9 | pub mod errors; |
|
9 | 10 | pub use ancestors::{AncestorsIterator, LazyAncestors, MissingAncestors}; |
|
10 | 11 | mod dirstate; |
|
11 | 12 | pub mod discovery; |
|
12 | 13 | pub mod requirements; |
|
13 | 14 | pub mod testing; // unconditionally built, for use from integration tests |
|
14 | 15 | pub use dirstate::{ |
|
15 | 16 | dirs_multiset::{DirsMultiset, DirsMultisetIter}, |
|
16 | 17 | dirstate_map::DirstateMap, |
|
17 | 18 | parsers::{pack_dirstate, parse_dirstate, PARENT_SIZE}, |
|
18 | 19 | status::{ |
|
19 | 20 | status, BadMatch, BadType, DirstateStatus, StatusError, StatusOptions, |
|
20 | 21 | }, |
|
21 | 22 | CopyMap, CopyMapIter, DirstateEntry, DirstateParents, EntryState, |
|
22 | 23 | StateMap, StateMapIter, |
|
23 | 24 | }; |
|
24 | 25 | pub mod copy_tracing; |
|
25 | 26 | mod filepatterns; |
|
26 | 27 | pub mod matchers; |
|
27 | 28 | pub mod repo; |
|
28 | 29 | pub mod revlog; |
|
29 | 30 | pub use revlog::*; |
|
30 | 31 | pub mod config; |
|
31 | 32 | pub mod operations; |
|
32 | 33 | pub mod revset; |
|
33 | 34 | pub mod utils; |
|
34 | 35 | |
|
35 | 36 | use crate::utils::hg_path::{HgPathBuf, HgPathError}; |
|
36 | 37 | pub use filepatterns::{ |
|
37 | 38 | parse_pattern_syntax, read_pattern_file, IgnorePattern, |
|
38 | 39 | PatternFileWarning, PatternSyntax, |
|
39 | 40 | }; |
|
40 | 41 | use std::collections::HashMap; |
|
41 | 42 | use twox_hash::RandomXxHashBuilder64; |
|
42 | 43 | |
|
43 | 44 | /// This is a contract between the `micro-timer` crate and us, to expose |
|
44 | 45 | /// the `log` crate as `crate::log`. |
|
45 | 46 | use log; |
|
46 | 47 | |
|
47 | 48 | pub type LineNumber = usize; |
|
48 | 49 | |
|
49 | 50 | /// Rust's default hasher is too slow because it tries to prevent collision |
|
50 | 51 | /// attacks. We are not concerned about those: if an ill-minded person has |
|
51 | 52 | /// write access to your repository, you have other issues. |
|
52 | 53 | pub type FastHashMap<K, V> = HashMap<K, V, RandomXxHashBuilder64>; |
|
53 | 54 | |
|
54 | 55 | #[derive(Debug, PartialEq)] |
|
55 | 56 | pub enum DirstateMapError { |
|
56 | 57 | PathNotFound(HgPathBuf), |
|
57 | 58 | EmptyPath, |
|
58 | 59 | InvalidPath(HgPathError), |
|
59 | 60 | } |
|
60 | 61 | |
|
61 | 62 | impl ToString for DirstateMapError { |
|
62 | 63 | fn to_string(&self) -> String { |
|
63 | 64 | match self { |
|
64 | 65 | DirstateMapError::PathNotFound(_) => { |
|
65 | 66 | "expected a value, found none".to_string() |
|
66 | 67 | } |
|
67 | 68 | DirstateMapError::EmptyPath => "Overflow in dirstate.".to_string(), |
|
68 | 69 | DirstateMapError::InvalidPath(e) => e.to_string(), |
|
69 | 70 | } |
|
70 | 71 | } |
|
71 | 72 | } |
|
72 | 73 | |
|
73 | 74 | #[derive(Debug, derive_more::From)] |
|
74 | 75 | pub enum DirstateError { |
|
75 | 76 | Map(DirstateMapError), |
|
76 | 77 | Common(errors::HgError), |
|
77 | 78 | } |
|
78 | 79 | |
|
79 | 80 | #[derive(Debug, derive_more::From)] |
|
80 | 81 | pub enum PatternError { |
|
81 | 82 | #[from] |
|
82 | 83 | Path(HgPathError), |
|
83 | 84 | UnsupportedSyntax(String), |
|
84 | 85 | UnsupportedSyntaxInFile(String, String, usize), |
|
85 | 86 | TooLong(usize), |
|
86 | 87 | #[from] |
|
87 | 88 | IO(std::io::Error), |
|
88 | 89 | /// Needed a pattern that can be turned into a regex but got one that |
|
89 | 90 | /// can't. This should only happen through programmer error. |
|
90 | 91 | NonRegexPattern(IgnorePattern), |
|
91 | 92 | } |
|
92 | 93 | |
|
93 | 94 | impl ToString for PatternError { |
|
94 | 95 | fn to_string(&self) -> String { |
|
95 | 96 | match self { |
|
96 | 97 | PatternError::UnsupportedSyntax(syntax) => { |
|
97 | 98 | format!("Unsupported syntax {}", syntax) |
|
98 | 99 | } |
|
99 | 100 | PatternError::UnsupportedSyntaxInFile(syntax, file_path, line) => { |
|
100 | 101 | format!( |
|
101 | 102 | "{}:{}: unsupported syntax {}", |
|
102 | 103 | file_path, line, syntax |
|
103 | 104 | ) |
|
104 | 105 | } |
|
105 | 106 | PatternError::TooLong(size) => { |
|
106 | 107 | format!("matcher pattern is too long ({} bytes)", size) |
|
107 | 108 | } |
|
108 | 109 | PatternError::IO(e) => e.to_string(), |
|
109 | 110 | PatternError::Path(e) => e.to_string(), |
|
110 | 111 | PatternError::NonRegexPattern(pattern) => { |
|
111 | 112 | format!("'{:?}' cannot be turned into a regex", pattern) |
|
112 | 113 | } |
|
113 | 114 | } |
|
114 | 115 | } |
|
115 | 116 | } |
@@ -1,76 +1,75 b'' | |||
|
1 | 1 | // list_tracked_files.rs |
|
2 | 2 | // |
|
3 | 3 | // Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net> |
|
4 | 4 | // |
|
5 | 5 | // This software may be used and distributed according to the terms of the |
|
6 | 6 | // GNU General Public License version 2 or any later version. |
|
7 | 7 | |
|
8 | 8 | use std::path::PathBuf; |
|
9 | 9 | |
|
10 | 10 | use crate::repo::Repo; |
|
11 | 11 | use crate::revlog::changelog::Changelog; |
|
12 | 12 | use crate::revlog::manifest::Manifest; |
|
13 | 13 | use crate::revlog::path_encode::path_encode; |
|
14 | 14 | use crate::revlog::revlog::Revlog; |
|
15 | 15 | use crate::revlog::revlog::RevlogError; |
|
16 | 16 | use crate::revlog::Node; |
|
17 | 17 | use crate::utils::files::get_path_from_bytes; |
|
18 | 18 | use crate::utils::hg_path::{HgPath, HgPathBuf}; |
|
19 | 19 | |
|
20 | 20 | const METADATA_DELIMITER: [u8; 2] = [b'\x01', b'\n']; |
|
21 | 21 | |
|
22 | 22 | /// List files under Mercurial control at a given revision. |
|
23 | 23 | /// |
|
24 | 24 | /// * `root`: Repository root |
|
25 | 25 | /// * `rev`: The revision to cat the files from. |
|
26 | 26 | /// * `files`: The files to output. |
|
27 | 27 | pub fn cat( |
|
28 | 28 | repo: &Repo, |
|
29 | 29 | revset: &str, |
|
30 | 30 | files: &[HgPathBuf], |
|
31 | 31 | ) -> Result<Vec<u8>, RevlogError> { |
|
32 | 32 | let rev = crate::revset::resolve_single(revset, repo)?; |
|
33 | 33 | let changelog = Changelog::open(repo)?; |
|
34 | 34 | let manifest = Manifest::open(repo)?; |
|
35 | 35 | let changelog_entry = changelog.get_rev(rev)?; |
|
36 | let manifest_node = Node::from_hex(&changelog_entry.manifest_node()?) | |
|
37 | .map_err(|_| RevlogError::Corrupted)?; | |
|
36 | let manifest_node = | |
|
37 | Node::from_hex_for_repo(&changelog_entry.manifest_node()?)?; | |
|
38 | 38 | let manifest_entry = manifest.get_node(manifest_node.into())?; |
|
39 | 39 | let mut bytes = vec![]; |
|
40 | 40 | |
|
41 | 41 | for (manifest_file, node_bytes) in manifest_entry.files_with_nodes() { |
|
42 | 42 | for cat_file in files.iter() { |
|
43 | 43 | if cat_file.as_bytes() == manifest_file.as_bytes() { |
|
44 | 44 | let index_path = store_path(manifest_file, b".i"); |
|
45 | 45 | let data_path = store_path(manifest_file, b".d"); |
|
46 | 46 | |
|
47 | 47 | let file_log = |
|
48 | 48 | Revlog::open(repo, &index_path, Some(&data_path))?; |
|
49 | let file_node = Node::from_hex(node_bytes) | |
|
50 | .map_err(|_| RevlogError::Corrupted)?; | |
|
49 | let file_node = Node::from_hex_for_repo(node_bytes)?; | |
|
51 | 50 | let file_rev = file_log.get_node_rev(file_node.into())?; |
|
52 | 51 | let data = file_log.get_rev_data(file_rev)?; |
|
53 | 52 | if data.starts_with(&METADATA_DELIMITER) { |
|
54 | 53 | let end_delimiter_position = data |
|
55 | 54 | [METADATA_DELIMITER.len()..] |
|
56 | 55 | .windows(METADATA_DELIMITER.len()) |
|
57 | 56 | .position(|bytes| bytes == METADATA_DELIMITER); |
|
58 | 57 | if let Some(position) = end_delimiter_position { |
|
59 | 58 | let offset = METADATA_DELIMITER.len() * 2; |
|
60 | 59 | bytes.extend(data[position + offset..].iter()); |
|
61 | 60 | } |
|
62 | 61 | } else { |
|
63 | 62 | bytes.extend(data); |
|
64 | 63 | } |
|
65 | 64 | } |
|
66 | 65 | } |
|
67 | 66 | } |
|
68 | 67 | |
|
69 | 68 | Ok(bytes) |
|
70 | 69 | } |
|
71 | 70 | |
|
72 | 71 | fn store_path(hg_path: &HgPath, suffix: &[u8]) -> PathBuf { |
|
73 | 72 | let encoded_bytes = |
|
74 | 73 | path_encode(&[b"data/", hg_path.as_bytes(), suffix].concat()); |
|
75 | 74 | get_path_from_bytes(&encoded_bytes).into() |
|
76 | 75 | } |
@@ -1,72 +1,67 b'' | |||
|
1 | 1 | // list_tracked_files.rs |
|
2 | 2 | // |
|
3 | 3 | // Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net> |
|
4 | 4 | // |
|
5 | 5 | // This software may be used and distributed according to the terms of the |
|
6 | 6 | // GNU General Public License version 2 or any later version. |
|
7 | 7 | |
|
8 | 8 | use crate::dirstate::parsers::parse_dirstate; |
|
9 |
use crate::errors:: |
|
|
9 | use crate::errors::HgError; | |
|
10 | 10 | use crate::repo::Repo; |
|
11 | 11 | use crate::revlog::changelog::Changelog; |
|
12 | 12 | use crate::revlog::manifest::{Manifest, ManifestEntry}; |
|
13 | 13 | use crate::revlog::node::Node; |
|
14 | 14 | use crate::revlog::revlog::RevlogError; |
|
15 | 15 | use crate::utils::hg_path::HgPath; |
|
16 | 16 | use crate::EntryState; |
|
17 | 17 | use rayon::prelude::*; |
|
18 | 18 | |
|
19 | 19 | /// List files under Mercurial control in the working directory |
|
20 | 20 | /// by reading the dirstate |
|
21 | 21 | pub struct Dirstate { |
|
22 | 22 | /// The `dirstate` content. |
|
23 | 23 | content: Vec<u8>, |
|
24 | 24 | } |
|
25 | 25 | |
|
26 | 26 | impl Dirstate { |
|
27 | 27 | pub fn new(repo: &Repo) -> Result<Self, HgError> { |
|
28 | let content = repo | |
|
29 | .hg_vfs() | |
|
30 | .read("dirstate") | |
|
31 | // TODO: this will be more accurate when we use `HgError` in | |
|
32 | // `Vfs::read`. | |
|
33 | .for_file("dirstate".as_ref())?; | |
|
28 | let content = repo.hg_vfs().read("dirstate")?; | |
|
34 | 29 | Ok(Self { content }) |
|
35 | 30 | } |
|
36 | 31 | |
|
37 | 32 | pub fn tracked_files(&self) -> Result<Vec<&HgPath>, HgError> { |
|
38 | 33 | let (_, entries, _) = parse_dirstate(&self.content)?; |
|
39 | 34 | let mut files: Vec<&HgPath> = entries |
|
40 | 35 | .into_iter() |
|
41 | 36 | .filter_map(|(path, entry)| match entry.state { |
|
42 | 37 | EntryState::Removed => None, |
|
43 | 38 | _ => Some(path), |
|
44 | 39 | }) |
|
45 | 40 | .collect(); |
|
46 | 41 | files.par_sort_unstable(); |
|
47 | 42 | Ok(files) |
|
48 | 43 | } |
|
49 | 44 | } |
|
50 | 45 | |
|
51 | 46 | /// List files under Mercurial control at a given revision. |
|
52 | 47 | pub fn list_rev_tracked_files( |
|
53 | 48 | repo: &Repo, |
|
54 | 49 | revset: &str, |
|
55 | 50 | ) -> Result<FilesForRev, RevlogError> { |
|
56 | 51 | let rev = crate::revset::resolve_single(revset, repo)?; |
|
57 | 52 | let changelog = Changelog::open(repo)?; |
|
58 | 53 | let manifest = Manifest::open(repo)?; |
|
59 | 54 | let changelog_entry = changelog.get_rev(rev)?; |
|
60 | let manifest_node = Node::from_hex(&changelog_entry.manifest_node()?) | |
|
61 | .map_err(|_| RevlogError::Corrupted)?; | |
|
55 | let manifest_node = | |
|
56 | Node::from_hex_for_repo(&changelog_entry.manifest_node()?)?; | |
|
62 | 57 | let manifest_entry = manifest.get_node(manifest_node.into())?; |
|
63 | 58 | Ok(FilesForRev(manifest_entry)) |
|
64 | 59 | } |
|
65 | 60 | |
|
66 | 61 | pub struct FilesForRev(ManifestEntry); |
|
67 | 62 | |
|
68 | 63 | impl FilesForRev { |
|
69 | 64 | pub fn iter(&self) -> impl Iterator<Item = &HgPath> { |
|
70 | 65 | self.0.files() |
|
71 | 66 | } |
|
72 | 67 | } |
@@ -1,91 +1,86 b'' | |||
|
1 | use crate::errors::HgError; | |
|
1 | use crate::errors::{HgError, IoResultExt}; | |
|
2 | 2 | use crate::operations::{find_root, FindRootError}; |
|
3 | 3 | use crate::requirements; |
|
4 | 4 | use memmap::{Mmap, MmapOptions}; |
|
5 | 5 | use std::path::{Path, PathBuf}; |
|
6 | 6 | |
|
7 | 7 | /// A repository on disk |
|
8 | 8 | pub struct Repo { |
|
9 | 9 | working_directory: PathBuf, |
|
10 | 10 | dot_hg: PathBuf, |
|
11 | 11 | store: PathBuf, |
|
12 | 12 | } |
|
13 | 13 | |
|
14 | 14 | /// Filesystem access abstraction for the contents of a given "base" diretory |
|
15 | 15 | #[derive(Clone, Copy)] |
|
16 | 16 | pub(crate) struct Vfs<'a> { |
|
17 | 17 | base: &'a Path, |
|
18 | 18 | } |
|
19 | 19 | |
|
20 | 20 | impl Repo { |
|
21 | 21 | /// Returns `None` if the given path doesnβt look like a repository |
|
22 | 22 | /// (doesnβt contain a `.hg` sub-directory). |
|
23 | 23 | pub fn for_path(root: impl Into<PathBuf>) -> Self { |
|
24 | 24 | let working_directory = root.into(); |
|
25 | 25 | let dot_hg = working_directory.join(".hg"); |
|
26 | 26 | Self { |
|
27 | 27 | store: dot_hg.join("store"), |
|
28 | 28 | dot_hg, |
|
29 | 29 | working_directory, |
|
30 | 30 | } |
|
31 | 31 | } |
|
32 | 32 | |
|
33 | 33 | pub fn find() -> Result<Self, FindRootError> { |
|
34 | 34 | find_root().map(Self::for_path) |
|
35 | 35 | } |
|
36 | 36 | |
|
37 | 37 | pub fn check_requirements(&self) -> Result<(), HgError> { |
|
38 | 38 | requirements::check(self) |
|
39 | 39 | } |
|
40 | 40 | |
|
41 | 41 | pub fn working_directory_path(&self) -> &Path { |
|
42 | 42 | &self.working_directory |
|
43 | 43 | } |
|
44 | 44 | |
|
45 | 45 | /// For accessing repository files (in `.hg`), except for the store |
|
46 | 46 | /// (`.hg/store`). |
|
47 | 47 | pub(crate) fn hg_vfs(&self) -> Vfs<'_> { |
|
48 | 48 | Vfs { base: &self.dot_hg } |
|
49 | 49 | } |
|
50 | 50 | |
|
51 | 51 | /// For accessing repository store files (in `.hg/store`) |
|
52 | 52 | pub(crate) fn store_vfs(&self) -> Vfs<'_> { |
|
53 | 53 | Vfs { base: &self.store } |
|
54 | 54 | } |
|
55 | 55 | |
|
56 | 56 | /// For accessing the working copy |
|
57 | 57 | |
|
58 | 58 | // The undescore prefix silences the "never used" warning. Remove before |
|
59 | 59 | // using. |
|
60 | 60 | pub(crate) fn _working_directory_vfs(&self) -> Vfs<'_> { |
|
61 | 61 | Vfs { |
|
62 | 62 | base: &self.working_directory, |
|
63 | 63 | } |
|
64 | 64 | } |
|
65 | 65 | } |
|
66 | 66 | |
|
67 | 67 | impl Vfs<'_> { |
|
68 | 68 | pub(crate) fn read( |
|
69 | 69 | &self, |
|
70 | 70 | relative_path: impl AsRef<Path>, |
|
71 |
) -> |
|
|
72 |
|
|
|
73 | } | |
|
74 | ||
|
75 | pub(crate) fn open( | |
|
76 | &self, | |
|
77 | relative_path: impl AsRef<Path>, | |
|
78 | ) -> std::io::Result<std::fs::File> { | |
|
79 | std::fs::File::open(self.base.join(relative_path)) | |
|
71 | ) -> Result<Vec<u8>, HgError> { | |
|
72 | let path = self.base.join(relative_path); | |
|
73 | std::fs::read(&path).for_file(&path) | |
|
80 | 74 | } |
|
81 | 75 | |
|
82 | 76 | pub(crate) fn mmap_open( |
|
83 | 77 | &self, |
|
84 | 78 | relative_path: impl AsRef<Path>, |
|
85 |
) -> |
|
|
86 |
let |
|
|
79 | ) -> Result<Mmap, HgError> { | |
|
80 | let path = self.base.join(relative_path); | |
|
81 | let file = std::fs::File::open(&path).for_file(&path)?; | |
|
87 | 82 | // TODO: what are the safety requirements here? |
|
88 | let mmap = unsafe { MmapOptions::new().map(&file) }?; | |
|
83 | let mmap = unsafe { MmapOptions::new().map(&file) }.for_file(&path)?; | |
|
89 | 84 | Ok(mmap) |
|
90 | 85 | } |
|
91 | 86 | } |
@@ -1,70 +1,67 b'' | |||
|
1 |
use crate::errors::{HgError, HgResultExt |
|
|
1 | use crate::errors::{HgError, HgResultExt}; | |
|
2 | 2 | use crate::repo::Repo; |
|
3 | 3 | |
|
4 | 4 | fn parse(bytes: &[u8]) -> Result<Vec<String>, HgError> { |
|
5 | 5 | // The Python code reading this file uses `str.splitlines` |
|
6 | 6 | // which looks for a number of line separators (even including a couple of |
|
7 | 7 | // non-ASCII ones), but Python code writing it always uses `\n`. |
|
8 | 8 | let lines = bytes.split(|&byte| byte == b'\n'); |
|
9 | 9 | |
|
10 | 10 | lines |
|
11 | 11 | .filter(|line| !line.is_empty()) |
|
12 | 12 | .map(|line| { |
|
13 | 13 | // Python uses Unicode `str.isalnum` but feature names are all |
|
14 | 14 | // ASCII |
|
15 | 15 | if line[0].is_ascii_alphanumeric() && line.is_ascii() { |
|
16 | 16 | Ok(String::from_utf8(line.into()).unwrap()) |
|
17 | 17 | } else { |
|
18 | 18 | Err(HgError::corrupted("parse error in 'requires' file")) |
|
19 | 19 | } |
|
20 | 20 | }) |
|
21 | 21 | .collect() |
|
22 | 22 | } |
|
23 | 23 | |
|
24 | 24 | pub fn load(repo: &Repo) -> Result<Vec<String>, HgError> { |
|
25 |
if let Some(bytes) = |
|
|
26 | .hg_vfs() | |
|
27 | .read("requires") | |
|
28 | .for_file("requires".as_ref()) | |
|
29 | .io_not_found_as_none()? | |
|
25 | if let Some(bytes) = | |
|
26 | repo.hg_vfs().read("requires").io_not_found_as_none()? | |
|
30 | 27 | { |
|
31 | 28 | parse(&bytes) |
|
32 | 29 | } else { |
|
33 | 30 | // Treat a missing file the same as an empty file. |
|
34 | 31 | // From `mercurial/localrepo.py`: |
|
35 | 32 | // > requires file contains a newline-delimited list of |
|
36 | 33 | // > features/capabilities the opener (us) must have in order to use |
|
37 | 34 | // > the repository. This file was introduced in Mercurial 0.9.2, |
|
38 | 35 | // > which means very old repositories may not have one. We assume |
|
39 | 36 | // > a missing file translates to no requirements. |
|
40 | 37 | Ok(Vec::new()) |
|
41 | 38 | } |
|
42 | 39 | } |
|
43 | 40 | |
|
44 | 41 | pub fn check(repo: &Repo) -> Result<(), HgError> { |
|
45 | 42 | for feature in load(repo)? { |
|
46 | 43 | if !SUPPORTED.contains(&&*feature) { |
|
47 | 44 | // TODO: collect and all unknown features and include them in the |
|
48 | 45 | // error message? |
|
49 | 46 | return Err(HgError::UnsupportedFeature(format!( |
|
50 | 47 | "repository requires feature unknown to this Mercurial: {}", |
|
51 | 48 | feature |
|
52 | 49 | ))); |
|
53 | 50 | } |
|
54 | 51 | } |
|
55 | 52 | Ok(()) |
|
56 | 53 | } |
|
57 | 54 | |
|
58 | 55 | // TODO: set this to actually-supported features |
|
59 | 56 | const SUPPORTED: &[&str] = &[ |
|
60 | 57 | "dotencode", |
|
61 | 58 | "fncache", |
|
62 | 59 | "generaldelta", |
|
63 | 60 | "revlogv1", |
|
64 | 61 | "sparserevlog", |
|
65 | 62 | "store", |
|
66 | 63 | // As of this writing everything rhg does is read-only. |
|
67 | 64 | // When it starts writing to the repository, itβll need to either keep the |
|
68 | 65 | // persistent nodemap up to date or remove this entry: |
|
69 | 66 | "persistent-nodemap", |
|
70 | 67 | ]; |
@@ -1,58 +1,61 b'' | |||
|
1 | use crate::errors::HgError; | |
|
1 | 2 | use crate::repo::Repo; |
|
2 | 3 | use crate::revlog::revlog::{Revlog, RevlogError}; |
|
3 | 4 | use crate::revlog::NodePrefix; |
|
4 | 5 | use crate::revlog::Revision; |
|
5 | 6 | |
|
6 | 7 | /// A specialized `Revlog` to work with `changelog` data format. |
|
7 | 8 | pub struct Changelog { |
|
8 | 9 | /// The generic `revlog` format. |
|
9 | 10 | pub(crate) revlog: Revlog, |
|
10 | 11 | } |
|
11 | 12 | |
|
12 | 13 | impl Changelog { |
|
13 | 14 | /// Open the `changelog` of a repository given by its root. |
|
14 | 15 | pub fn open(repo: &Repo) -> Result<Self, RevlogError> { |
|
15 | 16 | let revlog = Revlog::open(repo, "00changelog.i", None)?; |
|
16 | 17 | Ok(Self { revlog }) |
|
17 | 18 | } |
|
18 | 19 | |
|
19 | 20 | /// Return the `ChangelogEntry` a given node id. |
|
20 | 21 | pub fn get_node( |
|
21 | 22 | &self, |
|
22 | 23 | node: NodePrefix, |
|
23 | 24 | ) -> Result<ChangelogEntry, RevlogError> { |
|
24 | 25 | let rev = self.revlog.get_node_rev(node)?; |
|
25 | 26 | self.get_rev(rev) |
|
26 | 27 | } |
|
27 | 28 | |
|
28 | 29 | /// Return the `ChangelogEntry` of a given node revision. |
|
29 | 30 | pub fn get_rev( |
|
30 | 31 | &self, |
|
31 | 32 | rev: Revision, |
|
32 | 33 | ) -> Result<ChangelogEntry, RevlogError> { |
|
33 | 34 | let bytes = self.revlog.get_rev_data(rev)?; |
|
34 | 35 | Ok(ChangelogEntry { bytes }) |
|
35 | 36 | } |
|
36 | 37 | } |
|
37 | 38 | |
|
38 | 39 | /// `Changelog` entry which knows how to interpret the `changelog` data bytes. |
|
39 | 40 | #[derive(Debug)] |
|
40 | 41 | pub struct ChangelogEntry { |
|
41 | 42 | /// The data bytes of the `changelog` entry. |
|
42 | 43 | bytes: Vec<u8>, |
|
43 | 44 | } |
|
44 | 45 | |
|
45 | 46 | impl ChangelogEntry { |
|
46 | 47 | /// Return an iterator over the lines of the entry. |
|
47 | 48 | pub fn lines(&self) -> impl Iterator<Item = &[u8]> { |
|
48 | 49 | self.bytes |
|
49 | 50 | .split(|b| b == &b'\n') |
|
50 | 51 | .filter(|line| !line.is_empty()) |
|
51 | 52 | } |
|
52 | 53 | |
|
53 | 54 | /// Return the node id of the `manifest` referenced by this `changelog` |
|
54 | 55 | /// entry. |
|
55 | 56 | pub fn manifest_node(&self) -> Result<&[u8], RevlogError> { |
|
56 | self.lines().next().ok_or(RevlogError::Corrupted) | |
|
57 | self.lines() | |
|
58 | .next() | |
|
59 | .ok_or_else(|| HgError::corrupted("empty changelog entry").into()) | |
|
57 | 60 | } |
|
58 | 61 | } |
@@ -1,402 +1,404 b'' | |||
|
1 | 1 | use std::convert::TryInto; |
|
2 | 2 | use std::ops::Deref; |
|
3 | 3 | |
|
4 | 4 | use byteorder::{BigEndian, ByteOrder}; |
|
5 | 5 | |
|
6 | use crate::errors::HgError; | |
|
6 | 7 | use crate::revlog::node::Node; |
|
7 | 8 | use crate::revlog::revlog::RevlogError; |
|
8 | 9 | use crate::revlog::{Revision, NULL_REVISION}; |
|
9 | 10 | |
|
10 | 11 | pub const INDEX_ENTRY_SIZE: usize = 64; |
|
11 | 12 | |
|
12 | 13 | /// A Revlog index |
|
13 | 14 | pub struct Index { |
|
14 | 15 | bytes: Box<dyn Deref<Target = [u8]> + Send>, |
|
15 | 16 | /// Offsets of starts of index blocks. |
|
16 | 17 | /// Only needed when the index is interleaved with data. |
|
17 | 18 | offsets: Option<Vec<usize>>, |
|
18 | 19 | } |
|
19 | 20 | |
|
20 | 21 | impl Index { |
|
21 | 22 | /// Create an index from bytes. |
|
22 | 23 | /// Calculate the start of each entry when is_inline is true. |
|
23 | 24 | pub fn new( |
|
24 | 25 | bytes: Box<dyn Deref<Target = [u8]> + Send>, |
|
25 | 26 | ) -> Result<Self, RevlogError> { |
|
26 | 27 | if is_inline(&bytes) { |
|
27 | 28 | let mut offset: usize = 0; |
|
28 | 29 | let mut offsets = Vec::new(); |
|
29 | 30 | |
|
30 | 31 | while offset + INDEX_ENTRY_SIZE <= bytes.len() { |
|
31 | 32 | offsets.push(offset); |
|
32 | 33 | let end = offset + INDEX_ENTRY_SIZE; |
|
33 | 34 | let entry = IndexEntry { |
|
34 | 35 | bytes: &bytes[offset..end], |
|
35 | 36 | offset_override: None, |
|
36 | 37 | }; |
|
37 | 38 | |
|
38 | 39 | offset += INDEX_ENTRY_SIZE + entry.compressed_len(); |
|
39 | 40 | } |
|
40 | 41 | |
|
41 | 42 | if offset == bytes.len() { |
|
42 | 43 | Ok(Self { |
|
43 | 44 | bytes, |
|
44 | 45 | offsets: Some(offsets), |
|
45 | 46 | }) |
|
46 | 47 | } else { |
|
47 |
Err( |
|
|
48 | Err(HgError::corrupted("unexpected inline revlog length") | |
|
49 | .into()) | |
|
48 | 50 | } |
|
49 | 51 | } else { |
|
50 | 52 | Ok(Self { |
|
51 | 53 | bytes, |
|
52 | 54 | offsets: None, |
|
53 | 55 | }) |
|
54 | 56 | } |
|
55 | 57 | } |
|
56 | 58 | |
|
57 | 59 | /// Value of the inline flag. |
|
58 | 60 | pub fn is_inline(&self) -> bool { |
|
59 | 61 | is_inline(&self.bytes) |
|
60 | 62 | } |
|
61 | 63 | |
|
62 | 64 | /// Return a slice of bytes if `revlog` is inline. Panic if not. |
|
63 | 65 | pub fn data(&self, start: usize, end: usize) -> &[u8] { |
|
64 | 66 | if !self.is_inline() { |
|
65 | 67 | panic!("tried to access data in the index of a revlog that is not inline"); |
|
66 | 68 | } |
|
67 | 69 | &self.bytes[start..end] |
|
68 | 70 | } |
|
69 | 71 | |
|
70 | 72 | /// Return number of entries of the revlog index. |
|
71 | 73 | pub fn len(&self) -> usize { |
|
72 | 74 | if let Some(offsets) = &self.offsets { |
|
73 | 75 | offsets.len() |
|
74 | 76 | } else { |
|
75 | 77 | self.bytes.len() / INDEX_ENTRY_SIZE |
|
76 | 78 | } |
|
77 | 79 | } |
|
78 | 80 | |
|
79 | 81 | /// Returns `true` if the `Index` has zero `entries`. |
|
80 | 82 | pub fn is_empty(&self) -> bool { |
|
81 | 83 | self.len() == 0 |
|
82 | 84 | } |
|
83 | 85 | |
|
84 | 86 | /// Return the index entry corresponding to the given revision if it |
|
85 | 87 | /// exists. |
|
86 | 88 | pub fn get_entry(&self, rev: Revision) -> Option<IndexEntry> { |
|
87 | 89 | if rev == NULL_REVISION { |
|
88 | 90 | return None; |
|
89 | 91 | } |
|
90 | 92 | if let Some(offsets) = &self.offsets { |
|
91 | 93 | self.get_entry_inline(rev, offsets) |
|
92 | 94 | } else { |
|
93 | 95 | self.get_entry_separated(rev) |
|
94 | 96 | } |
|
95 | 97 | } |
|
96 | 98 | |
|
97 | 99 | fn get_entry_inline( |
|
98 | 100 | &self, |
|
99 | 101 | rev: Revision, |
|
100 | 102 | offsets: &[usize], |
|
101 | 103 | ) -> Option<IndexEntry> { |
|
102 | 104 | let start = *offsets.get(rev as usize)?; |
|
103 | 105 | let end = start.checked_add(INDEX_ENTRY_SIZE)?; |
|
104 | 106 | let bytes = &self.bytes[start..end]; |
|
105 | 107 | |
|
106 | 108 | // See IndexEntry for an explanation of this override. |
|
107 | 109 | let offset_override = Some(end); |
|
108 | 110 | |
|
109 | 111 | Some(IndexEntry { |
|
110 | 112 | bytes, |
|
111 | 113 | offset_override, |
|
112 | 114 | }) |
|
113 | 115 | } |
|
114 | 116 | |
|
115 | 117 | fn get_entry_separated(&self, rev: Revision) -> Option<IndexEntry> { |
|
116 | 118 | let max_rev = self.bytes.len() / INDEX_ENTRY_SIZE; |
|
117 | 119 | if rev as usize >= max_rev { |
|
118 | 120 | return None; |
|
119 | 121 | } |
|
120 | 122 | let start = rev as usize * INDEX_ENTRY_SIZE; |
|
121 | 123 | let end = start + INDEX_ENTRY_SIZE; |
|
122 | 124 | let bytes = &self.bytes[start..end]; |
|
123 | 125 | |
|
124 | 126 | // Override the offset of the first revision as its bytes are used |
|
125 | 127 | // for the index's metadata (saving space because it is always 0) |
|
126 | 128 | let offset_override = if rev == 0 { Some(0) } else { None }; |
|
127 | 129 | |
|
128 | 130 | Some(IndexEntry { |
|
129 | 131 | bytes, |
|
130 | 132 | offset_override, |
|
131 | 133 | }) |
|
132 | 134 | } |
|
133 | 135 | } |
|
134 | 136 | |
|
135 | 137 | impl super::RevlogIndex for Index { |
|
136 | 138 | fn len(&self) -> usize { |
|
137 | 139 | self.len() |
|
138 | 140 | } |
|
139 | 141 | |
|
140 | 142 | fn node(&self, rev: Revision) -> Option<&Node> { |
|
141 | 143 | self.get_entry(rev).map(|entry| entry.hash()) |
|
142 | 144 | } |
|
143 | 145 | } |
|
144 | 146 | |
|
145 | 147 | #[derive(Debug)] |
|
146 | 148 | pub struct IndexEntry<'a> { |
|
147 | 149 | bytes: &'a [u8], |
|
148 | 150 | /// Allows to override the offset value of the entry. |
|
149 | 151 | /// |
|
150 | 152 | /// For interleaved index and data, the offset stored in the index |
|
151 | 153 | /// corresponds to the separated data offset. |
|
152 | 154 | /// It has to be overridden with the actual offset in the interleaved |
|
153 | 155 | /// index which is just after the index block. |
|
154 | 156 | /// |
|
155 | 157 | /// For separated index and data, the offset stored in the first index |
|
156 | 158 | /// entry is mixed with the index headers. |
|
157 | 159 | /// It has to be overridden with 0. |
|
158 | 160 | offset_override: Option<usize>, |
|
159 | 161 | } |
|
160 | 162 | |
|
161 | 163 | impl<'a> IndexEntry<'a> { |
|
162 | 164 | /// Return the offset of the data. |
|
163 | 165 | pub fn offset(&self) -> usize { |
|
164 | 166 | if let Some(offset_override) = self.offset_override { |
|
165 | 167 | offset_override |
|
166 | 168 | } else { |
|
167 | 169 | let mut bytes = [0; 8]; |
|
168 | 170 | bytes[2..8].copy_from_slice(&self.bytes[0..=5]); |
|
169 | 171 | BigEndian::read_u64(&bytes[..]) as usize |
|
170 | 172 | } |
|
171 | 173 | } |
|
172 | 174 | |
|
173 | 175 | /// Return the compressed length of the data. |
|
174 | 176 | pub fn compressed_len(&self) -> usize { |
|
175 | 177 | BigEndian::read_u32(&self.bytes[8..=11]) as usize |
|
176 | 178 | } |
|
177 | 179 | |
|
178 | 180 | /// Return the uncompressed length of the data. |
|
179 | 181 | pub fn uncompressed_len(&self) -> usize { |
|
180 | 182 | BigEndian::read_u32(&self.bytes[12..=15]) as usize |
|
181 | 183 | } |
|
182 | 184 | |
|
183 | 185 | /// Return the revision upon which the data has been derived. |
|
184 | 186 | pub fn base_revision(&self) -> Revision { |
|
185 | 187 | // TODO Maybe return an Option when base_revision == rev? |
|
186 | 188 | // Requires to add rev to IndexEntry |
|
187 | 189 | |
|
188 | 190 | BigEndian::read_i32(&self.bytes[16..]) |
|
189 | 191 | } |
|
190 | 192 | |
|
191 | 193 | pub fn p1(&self) -> Revision { |
|
192 | 194 | BigEndian::read_i32(&self.bytes[24..]) |
|
193 | 195 | } |
|
194 | 196 | |
|
195 | 197 | pub fn p2(&self) -> Revision { |
|
196 | 198 | BigEndian::read_i32(&self.bytes[28..]) |
|
197 | 199 | } |
|
198 | 200 | |
|
199 | 201 | /// Return the hash of revision's full text. |
|
200 | 202 | /// |
|
201 | 203 | /// Currently, SHA-1 is used and only the first 20 bytes of this field |
|
202 | 204 | /// are used. |
|
203 | 205 | pub fn hash(&self) -> &'a Node { |
|
204 | 206 | (&self.bytes[32..52]).try_into().unwrap() |
|
205 | 207 | } |
|
206 | 208 | } |
|
207 | 209 | |
|
208 | 210 | /// Value of the inline flag. |
|
209 | 211 | pub fn is_inline(index_bytes: &[u8]) -> bool { |
|
210 | 212 | match &index_bytes[0..=1] { |
|
211 | 213 | [0, 0] | [0, 2] => false, |
|
212 | 214 | _ => true, |
|
213 | 215 | } |
|
214 | 216 | } |
|
215 | 217 | |
|
216 | 218 | #[cfg(test)] |
|
217 | 219 | mod tests { |
|
218 | 220 | use super::*; |
|
219 | 221 | |
|
220 | 222 | #[cfg(test)] |
|
221 | 223 | #[derive(Debug, Copy, Clone)] |
|
222 | 224 | pub struct IndexEntryBuilder { |
|
223 | 225 | is_first: bool, |
|
224 | 226 | is_inline: bool, |
|
225 | 227 | is_general_delta: bool, |
|
226 | 228 | version: u16, |
|
227 | 229 | offset: usize, |
|
228 | 230 | compressed_len: usize, |
|
229 | 231 | uncompressed_len: usize, |
|
230 | 232 | base_revision: Revision, |
|
231 | 233 | } |
|
232 | 234 | |
|
233 | 235 | #[cfg(test)] |
|
234 | 236 | impl IndexEntryBuilder { |
|
235 | 237 | pub fn new() -> Self { |
|
236 | 238 | Self { |
|
237 | 239 | is_first: false, |
|
238 | 240 | is_inline: false, |
|
239 | 241 | is_general_delta: true, |
|
240 | 242 | version: 2, |
|
241 | 243 | offset: 0, |
|
242 | 244 | compressed_len: 0, |
|
243 | 245 | uncompressed_len: 0, |
|
244 | 246 | base_revision: 0, |
|
245 | 247 | } |
|
246 | 248 | } |
|
247 | 249 | |
|
248 | 250 | pub fn is_first(&mut self, value: bool) -> &mut Self { |
|
249 | 251 | self.is_first = value; |
|
250 | 252 | self |
|
251 | 253 | } |
|
252 | 254 | |
|
253 | 255 | pub fn with_inline(&mut self, value: bool) -> &mut Self { |
|
254 | 256 | self.is_inline = value; |
|
255 | 257 | self |
|
256 | 258 | } |
|
257 | 259 | |
|
258 | 260 | pub fn with_general_delta(&mut self, value: bool) -> &mut Self { |
|
259 | 261 | self.is_general_delta = value; |
|
260 | 262 | self |
|
261 | 263 | } |
|
262 | 264 | |
|
263 | 265 | pub fn with_version(&mut self, value: u16) -> &mut Self { |
|
264 | 266 | self.version = value; |
|
265 | 267 | self |
|
266 | 268 | } |
|
267 | 269 | |
|
268 | 270 | pub fn with_offset(&mut self, value: usize) -> &mut Self { |
|
269 | 271 | self.offset = value; |
|
270 | 272 | self |
|
271 | 273 | } |
|
272 | 274 | |
|
273 | 275 | pub fn with_compressed_len(&mut self, value: usize) -> &mut Self { |
|
274 | 276 | self.compressed_len = value; |
|
275 | 277 | self |
|
276 | 278 | } |
|
277 | 279 | |
|
278 | 280 | pub fn with_uncompressed_len(&mut self, value: usize) -> &mut Self { |
|
279 | 281 | self.uncompressed_len = value; |
|
280 | 282 | self |
|
281 | 283 | } |
|
282 | 284 | |
|
283 | 285 | pub fn with_base_revision(&mut self, value: Revision) -> &mut Self { |
|
284 | 286 | self.base_revision = value; |
|
285 | 287 | self |
|
286 | 288 | } |
|
287 | 289 | |
|
288 | 290 | pub fn build(&self) -> Vec<u8> { |
|
289 | 291 | let mut bytes = Vec::with_capacity(INDEX_ENTRY_SIZE); |
|
290 | 292 | if self.is_first { |
|
291 | 293 | bytes.extend(&match (self.is_general_delta, self.is_inline) { |
|
292 | 294 | (false, false) => [0u8, 0], |
|
293 | 295 | (false, true) => [0u8, 1], |
|
294 | 296 | (true, false) => [0u8, 2], |
|
295 | 297 | (true, true) => [0u8, 3], |
|
296 | 298 | }); |
|
297 | 299 | bytes.extend(&self.version.to_be_bytes()); |
|
298 | 300 | // Remaining offset bytes. |
|
299 | 301 | bytes.extend(&[0u8; 2]); |
|
300 | 302 | } else { |
|
301 | 303 | // Offset is only 6 bytes will usize is 8. |
|
302 | 304 | bytes.extend(&self.offset.to_be_bytes()[2..]); |
|
303 | 305 | } |
|
304 | 306 | bytes.extend(&[0u8; 2]); // Revision flags. |
|
305 | 307 | bytes.extend(&self.compressed_len.to_be_bytes()[4..]); |
|
306 | 308 | bytes.extend(&self.uncompressed_len.to_be_bytes()[4..]); |
|
307 | 309 | bytes.extend(&self.base_revision.to_be_bytes()); |
|
308 | 310 | bytes |
|
309 | 311 | } |
|
310 | 312 | } |
|
311 | 313 | |
|
312 | 314 | #[test] |
|
313 | 315 | fn is_not_inline_when_no_inline_flag_test() { |
|
314 | 316 | let bytes = IndexEntryBuilder::new() |
|
315 | 317 | .is_first(true) |
|
316 | 318 | .with_general_delta(false) |
|
317 | 319 | .with_inline(false) |
|
318 | 320 | .build(); |
|
319 | 321 | |
|
320 | 322 | assert_eq!(is_inline(&bytes), false) |
|
321 | 323 | } |
|
322 | 324 | |
|
323 | 325 | #[test] |
|
324 | 326 | fn is_inline_when_inline_flag_test() { |
|
325 | 327 | let bytes = IndexEntryBuilder::new() |
|
326 | 328 | .is_first(true) |
|
327 | 329 | .with_general_delta(false) |
|
328 | 330 | .with_inline(true) |
|
329 | 331 | .build(); |
|
330 | 332 | |
|
331 | 333 | assert_eq!(is_inline(&bytes), true) |
|
332 | 334 | } |
|
333 | 335 | |
|
334 | 336 | #[test] |
|
335 | 337 | fn is_inline_when_inline_and_generaldelta_flags_test() { |
|
336 | 338 | let bytes = IndexEntryBuilder::new() |
|
337 | 339 | .is_first(true) |
|
338 | 340 | .with_general_delta(true) |
|
339 | 341 | .with_inline(true) |
|
340 | 342 | .build(); |
|
341 | 343 | |
|
342 | 344 | assert_eq!(is_inline(&bytes), true) |
|
343 | 345 | } |
|
344 | 346 | |
|
345 | 347 | #[test] |
|
346 | 348 | fn test_offset() { |
|
347 | 349 | let bytes = IndexEntryBuilder::new().with_offset(1).build(); |
|
348 | 350 | let entry = IndexEntry { |
|
349 | 351 | bytes: &bytes, |
|
350 | 352 | offset_override: None, |
|
351 | 353 | }; |
|
352 | 354 | |
|
353 | 355 | assert_eq!(entry.offset(), 1) |
|
354 | 356 | } |
|
355 | 357 | |
|
356 | 358 | #[test] |
|
357 | 359 | fn test_with_overridden_offset() { |
|
358 | 360 | let bytes = IndexEntryBuilder::new().with_offset(1).build(); |
|
359 | 361 | let entry = IndexEntry { |
|
360 | 362 | bytes: &bytes, |
|
361 | 363 | offset_override: Some(2), |
|
362 | 364 | }; |
|
363 | 365 | |
|
364 | 366 | assert_eq!(entry.offset(), 2) |
|
365 | 367 | } |
|
366 | 368 | |
|
367 | 369 | #[test] |
|
368 | 370 | fn test_compressed_len() { |
|
369 | 371 | let bytes = IndexEntryBuilder::new().with_compressed_len(1).build(); |
|
370 | 372 | let entry = IndexEntry { |
|
371 | 373 | bytes: &bytes, |
|
372 | 374 | offset_override: None, |
|
373 | 375 | }; |
|
374 | 376 | |
|
375 | 377 | assert_eq!(entry.compressed_len(), 1) |
|
376 | 378 | } |
|
377 | 379 | |
|
378 | 380 | #[test] |
|
379 | 381 | fn test_uncompressed_len() { |
|
380 | 382 | let bytes = IndexEntryBuilder::new().with_uncompressed_len(1).build(); |
|
381 | 383 | let entry = IndexEntry { |
|
382 | 384 | bytes: &bytes, |
|
383 | 385 | offset_override: None, |
|
384 | 386 | }; |
|
385 | 387 | |
|
386 | 388 | assert_eq!(entry.uncompressed_len(), 1) |
|
387 | 389 | } |
|
388 | 390 | |
|
389 | 391 | #[test] |
|
390 | 392 | fn test_base_revision() { |
|
391 | 393 | let bytes = IndexEntryBuilder::new().with_base_revision(1).build(); |
|
392 | 394 | let entry = IndexEntry { |
|
393 | 395 | bytes: &bytes, |
|
394 | 396 | offset_override: None, |
|
395 | 397 | }; |
|
396 | 398 | |
|
397 | 399 | assert_eq!(entry.base_revision(), 1) |
|
398 | 400 | } |
|
399 | 401 | } |
|
400 | 402 | |
|
401 | 403 | #[cfg(test)] |
|
402 | 404 | pub use tests::IndexEntryBuilder; |
@@ -1,384 +1,398 b'' | |||
|
1 | 1 | // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net> |
|
2 | 2 | // |
|
3 | 3 | // This software may be used and distributed according to the terms of the |
|
4 | 4 | // GNU General Public License version 2 or any later version. |
|
5 | 5 | |
|
6 | 6 | //! Definitions and utilities for Revision nodes |
|
7 | 7 | //! |
|
8 | 8 | //! In Mercurial code base, it is customary to call "a node" the binary SHA |
|
9 | 9 | //! of a revision. |
|
10 | 10 | |
|
11 | use crate::errors::HgError; | |
|
11 | 12 | use bytes_cast::BytesCast; |
|
12 | 13 | use std::convert::{TryFrom, TryInto}; |
|
13 | 14 | use std::fmt; |
|
14 | 15 | |
|
15 | 16 | /// The length in bytes of a `Node` |
|
16 | 17 | /// |
|
17 | 18 | /// This constant is meant to ease refactors of this module, and |
|
18 | 19 | /// are private so that calling code does not expect all nodes have |
|
19 | 20 | /// the same size, should we support several formats concurrently in |
|
20 | 21 | /// the future. |
|
21 | 22 | pub const NODE_BYTES_LENGTH: usize = 20; |
|
22 | 23 | |
|
23 | 24 | /// Id of the null node. |
|
24 | 25 | /// |
|
25 | 26 | /// Used to indicate the absence of node. |
|
26 | 27 | pub const NULL_NODE_ID: [u8; NODE_BYTES_LENGTH] = [0u8; NODE_BYTES_LENGTH]; |
|
27 | 28 | |
|
28 | 29 | /// The length in bytes of a `Node` |
|
29 | 30 | /// |
|
30 | 31 | /// see also `NODES_BYTES_LENGTH` about it being private. |
|
31 | 32 | const NODE_NYBBLES_LENGTH: usize = 2 * NODE_BYTES_LENGTH; |
|
32 | 33 | |
|
33 | 34 | /// Private alias for readability and to ease future change |
|
34 | 35 | type NodeData = [u8; NODE_BYTES_LENGTH]; |
|
35 | 36 | |
|
36 | 37 | /// Binary revision SHA |
|
37 | 38 | /// |
|
38 | 39 | /// ## Future changes of hash size |
|
39 | 40 | /// |
|
40 | 41 | /// To accomodate future changes of hash size, Rust callers |
|
41 | 42 | /// should use the conversion methods at the boundaries (FFI, actual |
|
42 | 43 | /// computation of hashes and I/O) only, and only if required. |
|
43 | 44 | /// |
|
44 | 45 | /// All other callers outside of unit tests should just handle `Node` values |
|
45 | 46 | /// and never make any assumption on the actual length, using [`nybbles_len`] |
|
46 | 47 | /// if they need a loop boundary. |
|
47 | 48 | /// |
|
48 | 49 | /// All methods that create a `Node` either take a type that enforces |
|
49 | 50 | /// the size or return an error at runtime. |
|
50 | 51 | /// |
|
51 | 52 | /// [`nybbles_len`]: #method.nybbles_len |
|
52 | 53 | #[derive(Copy, Clone, Debug, PartialEq, BytesCast, derive_more::From)] |
|
53 | 54 | #[repr(transparent)] |
|
54 | 55 | pub struct Node { |
|
55 | 56 | data: NodeData, |
|
56 | 57 | } |
|
57 | 58 | |
|
58 | 59 | /// The node value for NULL_REVISION |
|
59 | 60 | pub const NULL_NODE: Node = Node { |
|
60 | 61 | data: [0; NODE_BYTES_LENGTH], |
|
61 | 62 | }; |
|
62 | 63 | |
|
63 | 64 | /// Return an error if the slice has an unexpected length |
|
64 | 65 | impl<'a> TryFrom<&'a [u8]> for &'a Node { |
|
65 | 66 | type Error = (); |
|
66 | 67 | |
|
67 | 68 | #[inline] |
|
68 | 69 | fn try_from(bytes: &'a [u8]) -> Result<Self, Self::Error> { |
|
69 | 70 | match Node::from_bytes(bytes) { |
|
70 | 71 | Ok((node, rest)) if rest.is_empty() => Ok(node), |
|
71 | 72 | _ => Err(()), |
|
72 | 73 | } |
|
73 | 74 | } |
|
74 | 75 | } |
|
75 | 76 | |
|
76 | 77 | /// Return an error if the slice has an unexpected length |
|
77 | 78 | impl TryFrom<&'_ [u8]> for Node { |
|
78 | 79 | type Error = std::array::TryFromSliceError; |
|
79 | 80 | |
|
80 | 81 | #[inline] |
|
81 | 82 | fn try_from(bytes: &'_ [u8]) -> Result<Self, Self::Error> { |
|
82 | 83 | let data = bytes.try_into()?; |
|
83 | 84 | Ok(Self { data }) |
|
84 | 85 | } |
|
85 | 86 | } |
|
86 | 87 | |
|
87 | 88 | impl fmt::LowerHex for Node { |
|
88 | 89 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
|
89 | 90 | for &byte in &self.data { |
|
90 | 91 | write!(f, "{:02x}", byte)? |
|
91 | 92 | } |
|
92 | 93 | Ok(()) |
|
93 | 94 | } |
|
94 | 95 | } |
|
95 | 96 | |
|
96 | 97 | #[derive(Debug)] |
|
97 | 98 | pub struct FromHexError; |
|
98 | 99 | |
|
99 | 100 | /// Low level utility function, also for prefixes |
|
100 | 101 | fn get_nybble(s: &[u8], i: usize) -> u8 { |
|
101 | 102 | if i % 2 == 0 { |
|
102 | 103 | s[i / 2] >> 4 |
|
103 | 104 | } else { |
|
104 | 105 | s[i / 2] & 0x0f |
|
105 | 106 | } |
|
106 | 107 | } |
|
107 | 108 | |
|
108 | 109 | impl Node { |
|
109 | 110 | /// Retrieve the `i`th half-byte of the binary data. |
|
110 | 111 | /// |
|
111 | 112 | /// This is also the `i`th hexadecimal digit in numeric form, |
|
112 | 113 | /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble). |
|
113 | 114 | pub fn get_nybble(&self, i: usize) -> u8 { |
|
114 | 115 | get_nybble(&self.data, i) |
|
115 | 116 | } |
|
116 | 117 | |
|
117 | 118 | /// Length of the data, in nybbles |
|
118 | 119 | pub fn nybbles_len(&self) -> usize { |
|
119 | 120 | // public exposure as an instance method only, so that we can |
|
120 | 121 | // easily support several sizes of hashes if needed in the future. |
|
121 | 122 | NODE_NYBBLES_LENGTH |
|
122 | 123 | } |
|
123 | 124 | |
|
124 | 125 | /// Convert from hexadecimal string representation |
|
125 | 126 | /// |
|
126 | 127 | /// Exact length is required. |
|
127 | 128 | /// |
|
128 | 129 | /// To be used in FFI and I/O only, in order to facilitate future |
|
129 | 130 | /// changes of hash format. |
|
130 | 131 | pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Node, FromHexError> { |
|
131 | 132 | let prefix = NodePrefix::from_hex(hex)?; |
|
132 | 133 | if prefix.nybbles_len() == NODE_NYBBLES_LENGTH { |
|
133 | 134 | Ok(Self { data: prefix.data }) |
|
134 | 135 | } else { |
|
135 | 136 | Err(FromHexError) |
|
136 | 137 | } |
|
137 | 138 | } |
|
138 | 139 | |
|
140 | /// `from_hex`, but for input from an internal file of the repository such | |
|
141 | /// as a changelog or manifest entry. | |
|
142 | /// | |
|
143 | /// An error is treated as repository corruption. | |
|
144 | pub fn from_hex_for_repo(hex: impl AsRef<[u8]>) -> Result<Node, HgError> { | |
|
145 | Self::from_hex(hex.as_ref()).map_err(|FromHexError| { | |
|
146 | HgError::CorruptedRepository(format!( | |
|
147 | "Expected a full hexadecimal node ID, found {}", | |
|
148 | String::from_utf8_lossy(hex.as_ref()) | |
|
149 | )) | |
|
150 | }) | |
|
151 | } | |
|
152 | ||
|
139 | 153 | /// Provide access to binary data |
|
140 | 154 | /// |
|
141 | 155 | /// This is needed by FFI layers, for instance to return expected |
|
142 | 156 | /// binary values to Python. |
|
143 | 157 | pub fn as_bytes(&self) -> &[u8] { |
|
144 | 158 | &self.data |
|
145 | 159 | } |
|
146 | 160 | } |
|
147 | 161 | |
|
148 | 162 | /// The beginning of a binary revision SHA. |
|
149 | 163 | /// |
|
150 | 164 | /// Since it can potentially come from an hexadecimal representation with |
|
151 | 165 | /// odd length, it needs to carry around whether the last 4 bits are relevant |
|
152 | 166 | /// or not. |
|
153 | 167 | #[derive(Debug, PartialEq, Copy, Clone)] |
|
154 | 168 | pub struct NodePrefix { |
|
155 | 169 | /// In `1..=NODE_NYBBLES_LENGTH` |
|
156 | 170 | nybbles_len: u8, |
|
157 | 171 | /// The first `4 * length_in_nybbles` bits are used (considering bits |
|
158 | 172 | /// within a bytes in big-endian: most significant first), the rest |
|
159 | 173 | /// are zero. |
|
160 | 174 | data: NodeData, |
|
161 | 175 | } |
|
162 | 176 | |
|
163 | 177 | impl NodePrefix { |
|
164 | 178 | /// Convert from hexadecimal string representation |
|
165 | 179 | /// |
|
166 | 180 | /// Similarly to `hex::decode`, can be used with Unicode string types |
|
167 | 181 | /// (`String`, `&str`) as well as bytes. |
|
168 | 182 | /// |
|
169 | 183 | /// To be used in FFI and I/O only, in order to facilitate future |
|
170 | 184 | /// changes of hash format. |
|
171 | 185 | pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Self, FromHexError> { |
|
172 | 186 | let hex = hex.as_ref(); |
|
173 | 187 | let len = hex.len(); |
|
174 | 188 | if len > NODE_NYBBLES_LENGTH || len == 0 { |
|
175 | 189 | return Err(FromHexError); |
|
176 | 190 | } |
|
177 | 191 | |
|
178 | 192 | let mut data = [0; NODE_BYTES_LENGTH]; |
|
179 | 193 | let mut nybbles_len = 0; |
|
180 | 194 | for &ascii_byte in hex { |
|
181 | 195 | let nybble = match char::from(ascii_byte).to_digit(16) { |
|
182 | 196 | Some(digit) => digit as u8, |
|
183 | 197 | None => return Err(FromHexError), |
|
184 | 198 | }; |
|
185 | 199 | // Fill in the upper half of a byte first, then the lower half. |
|
186 | 200 | let shift = if nybbles_len % 2 == 0 { 4 } else { 0 }; |
|
187 | 201 | data[nybbles_len as usize / 2] |= nybble << shift; |
|
188 | 202 | nybbles_len += 1; |
|
189 | 203 | } |
|
190 | 204 | Ok(Self { data, nybbles_len }) |
|
191 | 205 | } |
|
192 | 206 | |
|
193 | 207 | pub fn nybbles_len(&self) -> usize { |
|
194 | 208 | self.nybbles_len as _ |
|
195 | 209 | } |
|
196 | 210 | |
|
197 | 211 | pub fn is_prefix_of(&self, node: &Node) -> bool { |
|
198 | 212 | let full_bytes = self.nybbles_len() / 2; |
|
199 | 213 | if self.data[..full_bytes] != node.data[..full_bytes] { |
|
200 | 214 | return false; |
|
201 | 215 | } |
|
202 | 216 | if self.nybbles_len() % 2 == 0 { |
|
203 | 217 | return true; |
|
204 | 218 | } |
|
205 | 219 | let last = self.nybbles_len() - 1; |
|
206 | 220 | self.get_nybble(last) == node.get_nybble(last) |
|
207 | 221 | } |
|
208 | 222 | |
|
209 | 223 | /// Retrieve the `i`th half-byte from the prefix. |
|
210 | 224 | /// |
|
211 | 225 | /// This is also the `i`th hexadecimal digit in numeric form, |
|
212 | 226 | /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble). |
|
213 | 227 | pub fn get_nybble(&self, i: usize) -> u8 { |
|
214 | 228 | assert!(i < self.nybbles_len()); |
|
215 | 229 | get_nybble(&self.data, i) |
|
216 | 230 | } |
|
217 | 231 | |
|
218 | 232 | fn iter_nybbles(&self) -> impl Iterator<Item = u8> + '_ { |
|
219 | 233 | (0..self.nybbles_len()).map(move |i| get_nybble(&self.data, i)) |
|
220 | 234 | } |
|
221 | 235 | |
|
222 | 236 | /// Return the index first nybble that's different from `node` |
|
223 | 237 | /// |
|
224 | 238 | /// If the return value is `None` that means that `self` is |
|
225 | 239 | /// a prefix of `node`, but the current method is a bit slower |
|
226 | 240 | /// than `is_prefix_of`. |
|
227 | 241 | /// |
|
228 | 242 | /// Returned index is as in `get_nybble`, i.e., starting at 0. |
|
229 | 243 | pub fn first_different_nybble(&self, node: &Node) -> Option<usize> { |
|
230 | 244 | self.iter_nybbles() |
|
231 | 245 | .zip(NodePrefix::from(*node).iter_nybbles()) |
|
232 | 246 | .position(|(a, b)| a != b) |
|
233 | 247 | } |
|
234 | 248 | } |
|
235 | 249 | |
|
236 | 250 | impl fmt::LowerHex for NodePrefix { |
|
237 | 251 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
|
238 | 252 | let full_bytes = self.nybbles_len() / 2; |
|
239 | 253 | for &byte in &self.data[..full_bytes] { |
|
240 | 254 | write!(f, "{:02x}", byte)? |
|
241 | 255 | } |
|
242 | 256 | if self.nybbles_len() % 2 == 1 { |
|
243 | 257 | let last = self.nybbles_len() - 1; |
|
244 | 258 | write!(f, "{:x}", self.get_nybble(last))? |
|
245 | 259 | } |
|
246 | 260 | Ok(()) |
|
247 | 261 | } |
|
248 | 262 | } |
|
249 | 263 | |
|
250 | 264 | /// A shortcut for full `Node` references |
|
251 | 265 | impl From<&'_ Node> for NodePrefix { |
|
252 | 266 | fn from(node: &'_ Node) -> Self { |
|
253 | 267 | NodePrefix { |
|
254 | 268 | nybbles_len: node.nybbles_len() as _, |
|
255 | 269 | data: node.data, |
|
256 | 270 | } |
|
257 | 271 | } |
|
258 | 272 | } |
|
259 | 273 | |
|
260 | 274 | /// A shortcut for full `Node` references |
|
261 | 275 | impl From<Node> for NodePrefix { |
|
262 | 276 | fn from(node: Node) -> Self { |
|
263 | 277 | NodePrefix { |
|
264 | 278 | nybbles_len: node.nybbles_len() as _, |
|
265 | 279 | data: node.data, |
|
266 | 280 | } |
|
267 | 281 | } |
|
268 | 282 | } |
|
269 | 283 | |
|
270 | 284 | impl PartialEq<Node> for NodePrefix { |
|
271 | 285 | fn eq(&self, other: &Node) -> bool { |
|
272 | 286 | Self::from(*other) == *self |
|
273 | 287 | } |
|
274 | 288 | } |
|
275 | 289 | |
|
276 | 290 | #[cfg(test)] |
|
277 | 291 | mod tests { |
|
278 | 292 | use super::*; |
|
279 | 293 | |
|
280 | 294 | const SAMPLE_NODE_HEX: &str = "0123456789abcdeffedcba9876543210deadbeef"; |
|
281 | 295 | const SAMPLE_NODE: Node = Node { |
|
282 | 296 | data: [ |
|
283 | 297 | 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba, |
|
284 | 298 | 0x98, 0x76, 0x54, 0x32, 0x10, 0xde, 0xad, 0xbe, 0xef, |
|
285 | 299 | ], |
|
286 | 300 | }; |
|
287 | 301 | |
|
288 | 302 | /// Pad an hexadecimal string to reach `NODE_NYBBLES_LENGTH` |
|
289 | 303 | /// The padding is made with zeros. |
|
290 | 304 | pub fn hex_pad_right(hex: &str) -> String { |
|
291 | 305 | let mut res = hex.to_string(); |
|
292 | 306 | while res.len() < NODE_NYBBLES_LENGTH { |
|
293 | 307 | res.push('0'); |
|
294 | 308 | } |
|
295 | 309 | res |
|
296 | 310 | } |
|
297 | 311 | |
|
298 | 312 | #[test] |
|
299 | 313 | fn test_node_from_hex() { |
|
300 | 314 | let not_hex = "012... oops"; |
|
301 | 315 | let too_short = "0123"; |
|
302 | 316 | let too_long = format!("{}0", SAMPLE_NODE_HEX); |
|
303 | 317 | assert_eq!(Node::from_hex(SAMPLE_NODE_HEX).unwrap(), SAMPLE_NODE); |
|
304 | 318 | assert!(Node::from_hex(not_hex).is_err()); |
|
305 | 319 | assert!(Node::from_hex(too_short).is_err()); |
|
306 | 320 | assert!(Node::from_hex(&too_long).is_err()); |
|
307 | 321 | } |
|
308 | 322 | |
|
309 | 323 | #[test] |
|
310 | 324 | fn test_node_encode_hex() { |
|
311 | 325 | assert_eq!(format!("{:x}", SAMPLE_NODE), SAMPLE_NODE_HEX); |
|
312 | 326 | } |
|
313 | 327 | |
|
314 | 328 | #[test] |
|
315 | 329 | fn test_prefix_from_to_hex() -> Result<(), FromHexError> { |
|
316 | 330 | assert_eq!(format!("{:x}", NodePrefix::from_hex("0e1")?), "0e1"); |
|
317 | 331 | assert_eq!(format!("{:x}", NodePrefix::from_hex("0e1a")?), "0e1a"); |
|
318 | 332 | assert_eq!( |
|
319 | 333 | format!("{:x}", NodePrefix::from_hex(SAMPLE_NODE_HEX)?), |
|
320 | 334 | SAMPLE_NODE_HEX |
|
321 | 335 | ); |
|
322 | 336 | Ok(()) |
|
323 | 337 | } |
|
324 | 338 | |
|
325 | 339 | #[test] |
|
326 | 340 | fn test_prefix_from_hex_errors() { |
|
327 | 341 | assert!(NodePrefix::from_hex("testgr").is_err()); |
|
328 | 342 | let mut long = format!("{:x}", NULL_NODE); |
|
329 | 343 | long.push('c'); |
|
330 | 344 | assert!(NodePrefix::from_hex(&long).is_err()) |
|
331 | 345 | } |
|
332 | 346 | |
|
333 | 347 | #[test] |
|
334 | 348 | fn test_is_prefix_of() -> Result<(), FromHexError> { |
|
335 | 349 | let mut node_data = [0; NODE_BYTES_LENGTH]; |
|
336 | 350 | node_data[0] = 0x12; |
|
337 | 351 | node_data[1] = 0xca; |
|
338 | 352 | let node = Node::from(node_data); |
|
339 | 353 | assert!(NodePrefix::from_hex("12")?.is_prefix_of(&node)); |
|
340 | 354 | assert!(!NodePrefix::from_hex("1a")?.is_prefix_of(&node)); |
|
341 | 355 | assert!(NodePrefix::from_hex("12c")?.is_prefix_of(&node)); |
|
342 | 356 | assert!(!NodePrefix::from_hex("12d")?.is_prefix_of(&node)); |
|
343 | 357 | Ok(()) |
|
344 | 358 | } |
|
345 | 359 | |
|
346 | 360 | #[test] |
|
347 | 361 | fn test_get_nybble() -> Result<(), FromHexError> { |
|
348 | 362 | let prefix = NodePrefix::from_hex("dead6789cafe")?; |
|
349 | 363 | assert_eq!(prefix.get_nybble(0), 13); |
|
350 | 364 | assert_eq!(prefix.get_nybble(7), 9); |
|
351 | 365 | Ok(()) |
|
352 | 366 | } |
|
353 | 367 | |
|
354 | 368 | #[test] |
|
355 | 369 | fn test_first_different_nybble_even_prefix() { |
|
356 | 370 | let prefix = NodePrefix::from_hex("12ca").unwrap(); |
|
357 | 371 | let mut node = Node::from([0; NODE_BYTES_LENGTH]); |
|
358 | 372 | assert_eq!(prefix.first_different_nybble(&node), Some(0)); |
|
359 | 373 | node.data[0] = 0x13; |
|
360 | 374 | assert_eq!(prefix.first_different_nybble(&node), Some(1)); |
|
361 | 375 | node.data[0] = 0x12; |
|
362 | 376 | assert_eq!(prefix.first_different_nybble(&node), Some(2)); |
|
363 | 377 | node.data[1] = 0xca; |
|
364 | 378 | // now it is a prefix |
|
365 | 379 | assert_eq!(prefix.first_different_nybble(&node), None); |
|
366 | 380 | } |
|
367 | 381 | |
|
368 | 382 | #[test] |
|
369 | 383 | fn test_first_different_nybble_odd_prefix() { |
|
370 | 384 | let prefix = NodePrefix::from_hex("12c").unwrap(); |
|
371 | 385 | let mut node = Node::from([0; NODE_BYTES_LENGTH]); |
|
372 | 386 | assert_eq!(prefix.first_different_nybble(&node), Some(0)); |
|
373 | 387 | node.data[0] = 0x13; |
|
374 | 388 | assert_eq!(prefix.first_different_nybble(&node), Some(1)); |
|
375 | 389 | node.data[0] = 0x12; |
|
376 | 390 | assert_eq!(prefix.first_different_nybble(&node), Some(2)); |
|
377 | 391 | node.data[1] = 0xca; |
|
378 | 392 | // now it is a prefix |
|
379 | 393 | assert_eq!(prefix.first_different_nybble(&node), None); |
|
380 | 394 | } |
|
381 | 395 | } |
|
382 | 396 | |
|
383 | 397 | #[cfg(test)] |
|
384 | 398 | pub use tests::hex_pad_right; |
@@ -1,105 +1,110 b'' | |||
|
1 | use crate::errors::{HgError, HgResultExt}; | |
|
1 | 2 | use bytes_cast::{unaligned, BytesCast}; |
|
2 | 3 | use memmap::Mmap; |
|
3 | 4 | use std::path::{Path, PathBuf}; |
|
4 | 5 | |
|
5 | 6 | use super::revlog::RevlogError; |
|
6 | 7 | use crate::repo::Repo; |
|
7 | 8 | use crate::utils::strip_suffix; |
|
8 | 9 | |
|
9 | 10 | const ONDISK_VERSION: u8 = 1; |
|
10 | 11 | |
|
11 | 12 | pub(super) struct NodeMapDocket { |
|
12 | 13 | pub data_length: usize, |
|
13 | 14 | // TODO: keep here more of the data from `parse()` when we need it |
|
14 | 15 | } |
|
15 | 16 | |
|
16 | 17 | #[derive(BytesCast)] |
|
17 | 18 | #[repr(C)] |
|
18 | 19 | struct DocketHeader { |
|
19 | 20 | uid_size: u8, |
|
20 | 21 | _tip_rev: unaligned::U64Be, |
|
21 | 22 | data_length: unaligned::U64Be, |
|
22 | 23 | _data_unused: unaligned::U64Be, |
|
23 | 24 | tip_node_size: unaligned::U64Be, |
|
24 | 25 | } |
|
25 | 26 | |
|
26 | 27 | impl NodeMapDocket { |
|
27 | 28 | /// Return `Ok(None)` when the caller should proceed without a persistent |
|
28 | 29 | /// nodemap: |
|
29 | 30 | /// |
|
30 | 31 | /// * This revlog does not have a `.n` docket file (it is not generated for |
|
31 | 32 | /// small revlogs), or |
|
32 | 33 | /// * The docket has an unsupported version number (repositories created by |
|
33 | 34 | /// later hg, maybe that should be a requirement instead?), or |
|
34 | 35 | /// * The docket file points to a missing (likely deleted) data file (this |
|
35 | 36 | /// can happen in a rare race condition). |
|
36 | 37 | pub fn read_from_file( |
|
37 | 38 | repo: &Repo, |
|
38 | 39 | index_path: &Path, |
|
39 | 40 | ) -> Result<Option<(Self, Mmap)>, RevlogError> { |
|
40 | 41 | let docket_path = index_path.with_extension("n"); |
|
41 | let docket_bytes = match repo.store_vfs().read(&docket_path) { | |
|
42 | Err(e) if e.kind() == std::io::ErrorKind::NotFound => { | |
|
43 | return Ok(None) | |
|
44 |
|
|
|
45 | Err(e) => return Err(RevlogError::IoError(e)), | |
|
46 | Ok(bytes) => bytes, | |
|
42 | let docket_bytes = if let Some(bytes) = | |
|
43 | repo.store_vfs().read(&docket_path).io_not_found_as_none()? | |
|
44 | { | |
|
45 | bytes | |
|
46 | } else { | |
|
47 | return Ok(None); | |
|
47 | 48 | }; |
|
48 | 49 | |
|
49 | 50 | let input = if let Some((&ONDISK_VERSION, rest)) = |
|
50 | 51 | docket_bytes.split_first() |
|
51 | 52 | { |
|
52 | 53 | rest |
|
53 | 54 | } else { |
|
54 | 55 | return Ok(None); |
|
55 | 56 | }; |
|
56 | 57 | |
|
57 | let (header, rest) = DocketHeader::from_bytes(input)?; | |
|
58 | /// Treat any error as a parse error | |
|
59 | fn parse<T, E>(result: Result<T, E>) -> Result<T, RevlogError> { | |
|
60 | result.map_err(|_| { | |
|
61 | HgError::corrupted("nodemap docket parse error").into() | |
|
62 | }) | |
|
63 | } | |
|
64 | ||
|
65 | let (header, rest) = parse(DocketHeader::from_bytes(input))?; | |
|
58 | 66 | let uid_size = header.uid_size as usize; |
|
59 | 67 | // TODO: do we care about overflow for 4 GB+ nodemap files on 32-bit |
|
60 | 68 | // systems? |
|
61 | 69 | let tip_node_size = header.tip_node_size.get() as usize; |
|
62 | 70 | let data_length = header.data_length.get() as usize; |
|
63 | let (uid, rest) = u8::slice_from_bytes(rest, uid_size)?; | |
|
64 | let (_tip_node, _rest) = u8::slice_from_bytes(rest, tip_node_size)?; | |
|
65 | let uid = | |
|
66 |
|
|
|
71 | let (uid, rest) = parse(u8::slice_from_bytes(rest, uid_size))?; | |
|
72 | let (_tip_node, _rest) = | |
|
73 | parse(u8::slice_from_bytes(rest, tip_node_size))?; | |
|
74 | let uid = parse(std::str::from_utf8(uid))?; | |
|
67 | 75 | let docket = NodeMapDocket { data_length }; |
|
68 | 76 | |
|
69 | 77 | let data_path = rawdata_path(&docket_path, uid); |
|
70 |
// TODO: use ` |
|
|
78 | // TODO: use `vfs.read()` here when the `persistent-nodemap.mmap` | |
|
71 | 79 | // config is false? |
|
72 | match repo.store_vfs().mmap_open(&data_path) { | |
|
73 | Ok(mmap) => { | |
|
74 |
|
|
|
75 | Ok(Some((docket, mmap))) | |
|
76 | } else { | |
|
77 | Err(RevlogError::Corrupted) | |
|
78 | } | |
|
80 | if let Some(mmap) = repo | |
|
81 | .store_vfs() | |
|
82 | .mmap_open(&data_path) | |
|
83 | .io_not_found_as_none()? | |
|
84 | { | |
|
85 | if mmap.len() >= data_length { | |
|
86 | Ok(Some((docket, mmap))) | |
|
87 | } else { | |
|
88 | Err(HgError::corrupted("persistent nodemap too short").into()) | |
|
79 | 89 | } |
|
80 | Err(error) => { | |
|
81 | if error.kind() == std::io::ErrorKind::NotFound { | |
|
82 | Ok(None) | |
|
83 | } else { | |
|
84 | Err(RevlogError::IoError(error)) | |
|
85 | } | |
|
86 | } | |
|
90 | } else { | |
|
91 | Ok(None) | |
|
87 | 92 | } |
|
88 | 93 | } |
|
89 | 94 | } |
|
90 | 95 | |
|
91 | 96 | fn rawdata_path(docket_path: &Path, uid: &str) -> PathBuf { |
|
92 | 97 | let docket_name = docket_path |
|
93 | 98 | .file_name() |
|
94 | 99 | .expect("expected a base name") |
|
95 | 100 | .to_str() |
|
96 | 101 | .expect("expected an ASCII file name in the store"); |
|
97 | 102 | let prefix = strip_suffix(docket_name, ".n.a") |
|
98 | 103 | .or_else(|| strip_suffix(docket_name, ".n")) |
|
99 | 104 | .expect("expected docket path in .n or .n.a"); |
|
100 | 105 | let name = format!("{}-{}.nd", prefix, uid); |
|
101 | 106 | docket_path |
|
102 | 107 | .parent() |
|
103 | 108 | .expect("expected a non-root path") |
|
104 | 109 | .join(name) |
|
105 | 110 | } |
@@ -1,387 +1,393 b'' | |||
|
1 | 1 | use std::borrow::Cow; |
|
2 | 2 | use std::io::Read; |
|
3 | 3 | use std::ops::Deref; |
|
4 | 4 | use std::path::Path; |
|
5 | 5 | |
|
6 | 6 | use byteorder::{BigEndian, ByteOrder}; |
|
7 | 7 | use crypto::digest::Digest; |
|
8 | 8 | use crypto::sha1::Sha1; |
|
9 | 9 | use flate2::read::ZlibDecoder; |
|
10 | 10 | use micro_timer::timed; |
|
11 | 11 | use zstd; |
|
12 | 12 | |
|
13 | 13 | use super::index::Index; |
|
14 | 14 | use super::node::{NodePrefix, NODE_BYTES_LENGTH, NULL_NODE}; |
|
15 | 15 | use super::nodemap; |
|
16 | use super::nodemap::NodeMap; | |
|
16 | use super::nodemap::{NodeMap, NodeMapError}; | |
|
17 | 17 | use super::nodemap_docket::NodeMapDocket; |
|
18 | 18 | use super::patch; |
|
19 | use crate::errors::HgError; | |
|
19 | 20 | use crate::repo::Repo; |
|
20 | 21 | use crate::revlog::Revision; |
|
21 | 22 | |
|
23 | #[derive(derive_more::From)] | |
|
22 | 24 | pub enum RevlogError { |
|
23 | IoError(std::io::Error), | |
|
24 | UnsuportedVersion(u16), | |
|
25 | 25 | InvalidRevision, |
|
26 | 26 | /// Found more than one entry whose ID match the requested prefix |
|
27 | 27 | AmbiguousPrefix, |
|
28 | Corrupted, | |
|
29 | UnknowDataFormat(u8), | |
|
28 | #[from] | |
|
29 | Other(HgError), | |
|
30 | 30 | } |
|
31 | 31 | |
|
32 |
impl From< |
|
|
33 |
fn from( |
|
|
34 | RevlogError::Corrupted | |
|
32 | impl From<NodeMapError> for RevlogError { | |
|
33 | fn from(error: NodeMapError) -> Self { | |
|
34 | match error { | |
|
35 | NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix, | |
|
36 | NodeMapError::RevisionNotInIndex(_) => RevlogError::corrupted(), | |
|
37 | } | |
|
38 | } | |
|
39 | } | |
|
40 | ||
|
41 | impl RevlogError { | |
|
42 | fn corrupted() -> Self { | |
|
43 | RevlogError::Other(HgError::corrupted("corrupted revlog")) | |
|
35 | 44 | } |
|
36 | 45 | } |
|
37 | 46 | |
|
38 | 47 | /// Read only implementation of revlog. |
|
39 | 48 | pub struct Revlog { |
|
40 | 49 | /// When index and data are not interleaved: bytes of the revlog index. |
|
41 | 50 | /// When index and data are interleaved: bytes of the revlog index and |
|
42 | 51 | /// data. |
|
43 | 52 | index: Index, |
|
44 | 53 | /// When index and data are not interleaved: bytes of the revlog data |
|
45 | 54 | data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>, |
|
46 | 55 | /// When present on disk: the persistent nodemap for this revlog |
|
47 | 56 | nodemap: Option<nodemap::NodeTree>, |
|
48 | 57 | } |
|
49 | 58 | |
|
50 | 59 | impl Revlog { |
|
51 | 60 | /// Open a revlog index file. |
|
52 | 61 | /// |
|
53 | 62 | /// It will also open the associated data file if index and data are not |
|
54 | 63 | /// interleaved. |
|
55 | 64 | #[timed] |
|
56 | 65 | pub fn open( |
|
57 | 66 | repo: &Repo, |
|
58 | 67 | index_path: impl AsRef<Path>, |
|
59 | 68 | data_path: Option<&Path>, |
|
60 | 69 | ) -> Result<Self, RevlogError> { |
|
61 | 70 | let index_path = index_path.as_ref(); |
|
62 | let index_mmap = repo | |
|
63 | .store_vfs() | |
|
64 | .mmap_open(&index_path) | |
|
65 | .map_err(RevlogError::IoError)?; | |
|
71 | let index_mmap = repo.store_vfs().mmap_open(&index_path)?; | |
|
66 | 72 | |
|
67 | 73 | let version = get_version(&index_mmap); |
|
68 | 74 | if version != 1 { |
|
69 | return Err(RevlogError::UnsuportedVersion(version)); | |
|
75 | // A proper new version should have had a repo/store requirement. | |
|
76 | return Err(RevlogError::corrupted()); | |
|
70 | 77 | } |
|
71 | 78 | |
|
72 | 79 | let index = Index::new(Box::new(index_mmap))?; |
|
73 | 80 | |
|
74 | 81 | let default_data_path = index_path.with_extension("d"); |
|
75 | 82 | |
|
76 | 83 | // type annotation required |
|
77 | 84 | // won't recognize Mmap as Deref<Target = [u8]> |
|
78 | 85 | let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> = |
|
79 | 86 | if index.is_inline() { |
|
80 | 87 | None |
|
81 | 88 | } else { |
|
82 | 89 | let data_path = data_path.unwrap_or(&default_data_path); |
|
83 | let data_mmap = repo | |
|
84 | .store_vfs() | |
|
85 | .mmap_open(data_path) | |
|
86 | .map_err(RevlogError::IoError)?; | |
|
90 | let data_mmap = repo.store_vfs().mmap_open(data_path)?; | |
|
87 | 91 | Some(Box::new(data_mmap)) |
|
88 | 92 | }; |
|
89 | 93 | |
|
90 | 94 | let nodemap = NodeMapDocket::read_from_file(repo, index_path)?.map( |
|
91 | 95 | |(docket, data)| { |
|
92 | 96 | nodemap::NodeTree::load_bytes( |
|
93 | 97 | Box::new(data), |
|
94 | 98 | docket.data_length, |
|
95 | 99 | ) |
|
96 | 100 | }, |
|
97 | 101 | ); |
|
98 | 102 | |
|
99 | 103 | Ok(Revlog { |
|
100 | 104 | index, |
|
101 | 105 | data_bytes, |
|
102 | 106 | nodemap, |
|
103 | 107 | }) |
|
104 | 108 | } |
|
105 | 109 | |
|
106 | 110 | /// Return number of entries of the `Revlog`. |
|
107 | 111 | pub fn len(&self) -> usize { |
|
108 | 112 | self.index.len() |
|
109 | 113 | } |
|
110 | 114 | |
|
111 | 115 | /// Returns `true` if the `Revlog` has zero `entries`. |
|
112 | 116 | pub fn is_empty(&self) -> bool { |
|
113 | 117 | self.index.is_empty() |
|
114 | 118 | } |
|
115 | 119 | |
|
116 | 120 | /// Return the full data associated to a node. |
|
117 | 121 | #[timed] |
|
118 | 122 | pub fn get_node_rev( |
|
119 | 123 | &self, |
|
120 | 124 | node: NodePrefix, |
|
121 | 125 | ) -> Result<Revision, RevlogError> { |
|
122 | 126 | if let Some(nodemap) = &self.nodemap { |
|
123 | 127 | return nodemap |
|
124 | .find_bin(&self.index, node) | |
|
125 | // TODO: propagate details of this error: | |
|
126 | .map_err(|_| RevlogError::Corrupted)? | |
|
128 | .find_bin(&self.index, node)? | |
|
127 | 129 | .ok_or(RevlogError::InvalidRevision); |
|
128 | 130 | } |
|
129 | 131 | |
|
130 | 132 | // Fallback to linear scan when a persistent nodemap is not present. |
|
131 | 133 | // This happens when the persistent-nodemap experimental feature is not |
|
132 | 134 | // enabled, or for small revlogs. |
|
133 | 135 | // |
|
134 | 136 | // TODO: consider building a non-persistent nodemap in memory to |
|
135 | 137 | // optimize these cases. |
|
136 | 138 | let mut found_by_prefix = None; |
|
137 | 139 | for rev in (0..self.len() as Revision).rev() { |
|
138 | 140 | let index_entry = |
|
139 |
self.index.get_entry(rev).ok_or( |
|
|
141 | self.index.get_entry(rev).ok_or(HgError::corrupted( | |
|
142 | "revlog references a revision not in the index", | |
|
143 | ))?; | |
|
140 | 144 | if node == *index_entry.hash() { |
|
141 | 145 | return Ok(rev); |
|
142 | 146 | } |
|
143 | 147 | if node.is_prefix_of(index_entry.hash()) { |
|
144 | 148 | if found_by_prefix.is_some() { |
|
145 | 149 | return Err(RevlogError::AmbiguousPrefix); |
|
146 | 150 | } |
|
147 | 151 | found_by_prefix = Some(rev) |
|
148 | 152 | } |
|
149 | 153 | } |
|
150 | 154 | found_by_prefix.ok_or(RevlogError::InvalidRevision) |
|
151 | 155 | } |
|
152 | 156 | |
|
153 | 157 | /// Returns whether the given revision exists in this revlog. |
|
154 | 158 | pub fn has_rev(&self, rev: Revision) -> bool { |
|
155 | 159 | self.index.get_entry(rev).is_some() |
|
156 | 160 | } |
|
157 | 161 | |
|
158 | 162 | /// Return the full data associated to a revision. |
|
159 | 163 | /// |
|
160 | 164 | /// All entries required to build the final data out of deltas will be |
|
161 | 165 | /// retrieved as needed, and the deltas will be applied to the inital |
|
162 | 166 | /// snapshot to rebuild the final data. |
|
163 | 167 | #[timed] |
|
164 | 168 | pub fn get_rev_data(&self, rev: Revision) -> Result<Vec<u8>, RevlogError> { |
|
165 | 169 | // Todo return -> Cow |
|
166 | 170 | let mut entry = self.get_entry(rev)?; |
|
167 | 171 | let mut delta_chain = vec![]; |
|
168 | 172 | while let Some(base_rev) = entry.base_rev { |
|
169 | 173 | delta_chain.push(entry); |
|
170 | entry = | |
|
171 |
|
|
|
174 | entry = self | |
|
175 | .get_entry(base_rev) | |
|
176 | .map_err(|_| RevlogError::corrupted())?; | |
|
172 | 177 | } |
|
173 | 178 | |
|
174 | 179 | // TODO do not look twice in the index |
|
175 | 180 | let index_entry = self |
|
176 | 181 | .index |
|
177 | 182 | .get_entry(rev) |
|
178 | 183 | .ok_or(RevlogError::InvalidRevision)?; |
|
179 | 184 | |
|
180 | 185 | let data: Vec<u8> = if delta_chain.is_empty() { |
|
181 | 186 | entry.data()?.into() |
|
182 | 187 | } else { |
|
183 | 188 | Revlog::build_data_from_deltas(entry, &delta_chain)? |
|
184 | 189 | }; |
|
185 | 190 | |
|
186 | 191 | if self.check_hash( |
|
187 | 192 | index_entry.p1(), |
|
188 | 193 | index_entry.p2(), |
|
189 | 194 | index_entry.hash().as_bytes(), |
|
190 | 195 | &data, |
|
191 | 196 | ) { |
|
192 | 197 | Ok(data) |
|
193 | 198 | } else { |
|
194 |
Err(RevlogError:: |
|
|
199 | Err(RevlogError::corrupted()) | |
|
195 | 200 | } |
|
196 | 201 | } |
|
197 | 202 | |
|
198 | 203 | /// Check the hash of some given data against the recorded hash. |
|
199 | 204 | pub fn check_hash( |
|
200 | 205 | &self, |
|
201 | 206 | p1: Revision, |
|
202 | 207 | p2: Revision, |
|
203 | 208 | expected: &[u8], |
|
204 | 209 | data: &[u8], |
|
205 | 210 | ) -> bool { |
|
206 | 211 | let e1 = self.index.get_entry(p1); |
|
207 | 212 | let h1 = match e1 { |
|
208 | 213 | Some(ref entry) => entry.hash(), |
|
209 | 214 | None => &NULL_NODE, |
|
210 | 215 | }; |
|
211 | 216 | let e2 = self.index.get_entry(p2); |
|
212 | 217 | let h2 = match e2 { |
|
213 | 218 | Some(ref entry) => entry.hash(), |
|
214 | 219 | None => &NULL_NODE, |
|
215 | 220 | }; |
|
216 | 221 | |
|
217 | 222 | hash(data, h1.as_bytes(), h2.as_bytes()).as_slice() == expected |
|
218 | 223 | } |
|
219 | 224 | |
|
220 | 225 | /// Build the full data of a revision out its snapshot |
|
221 | 226 | /// and its deltas. |
|
222 | 227 | #[timed] |
|
223 | 228 | fn build_data_from_deltas( |
|
224 | 229 | snapshot: RevlogEntry, |
|
225 | 230 | deltas: &[RevlogEntry], |
|
226 | 231 | ) -> Result<Vec<u8>, RevlogError> { |
|
227 | 232 | let snapshot = snapshot.data()?; |
|
228 | 233 | let deltas = deltas |
|
229 | 234 | .iter() |
|
230 | 235 | .rev() |
|
231 | 236 | .map(RevlogEntry::data) |
|
232 | 237 | .collect::<Result<Vec<Cow<'_, [u8]>>, RevlogError>>()?; |
|
233 | 238 | let patches: Vec<_> = |
|
234 | 239 | deltas.iter().map(|d| patch::PatchList::new(d)).collect(); |
|
235 | 240 | let patch = patch::fold_patch_lists(&patches); |
|
236 | 241 | Ok(patch.apply(&snapshot)) |
|
237 | 242 | } |
|
238 | 243 | |
|
239 | 244 | /// Return the revlog data. |
|
240 | 245 | fn data(&self) -> &[u8] { |
|
241 | 246 | match self.data_bytes { |
|
242 | 247 | Some(ref data_bytes) => &data_bytes, |
|
243 | 248 | None => panic!( |
|
244 | 249 | "forgot to load the data or trying to access inline data" |
|
245 | 250 | ), |
|
246 | 251 | } |
|
247 | 252 | } |
|
248 | 253 | |
|
249 | 254 | /// Get an entry of the revlog. |
|
250 | 255 | fn get_entry(&self, rev: Revision) -> Result<RevlogEntry, RevlogError> { |
|
251 | 256 | let index_entry = self |
|
252 | 257 | .index |
|
253 | 258 | .get_entry(rev) |
|
254 | 259 | .ok_or(RevlogError::InvalidRevision)?; |
|
255 | 260 | let start = index_entry.offset(); |
|
256 | 261 | let end = start + index_entry.compressed_len(); |
|
257 | 262 | let data = if self.index.is_inline() { |
|
258 | 263 | self.index.data(start, end) |
|
259 | 264 | } else { |
|
260 | 265 | &self.data()[start..end] |
|
261 | 266 | }; |
|
262 | 267 | let entry = RevlogEntry { |
|
263 | 268 | rev, |
|
264 | 269 | bytes: data, |
|
265 | 270 | compressed_len: index_entry.compressed_len(), |
|
266 | 271 | uncompressed_len: index_entry.uncompressed_len(), |
|
267 | 272 | base_rev: if index_entry.base_revision() == rev { |
|
268 | 273 | None |
|
269 | 274 | } else { |
|
270 | 275 | Some(index_entry.base_revision()) |
|
271 | 276 | }, |
|
272 | 277 | }; |
|
273 | 278 | Ok(entry) |
|
274 | 279 | } |
|
275 | 280 | } |
|
276 | 281 | |
|
277 | 282 | /// The revlog entry's bytes and the necessary informations to extract |
|
278 | 283 | /// the entry's data. |
|
279 | 284 | #[derive(Debug)] |
|
280 | 285 | pub struct RevlogEntry<'a> { |
|
281 | 286 | rev: Revision, |
|
282 | 287 | bytes: &'a [u8], |
|
283 | 288 | compressed_len: usize, |
|
284 | 289 | uncompressed_len: usize, |
|
285 | 290 | base_rev: Option<Revision>, |
|
286 | 291 | } |
|
287 | 292 | |
|
288 | 293 | impl<'a> RevlogEntry<'a> { |
|
289 | 294 | /// Extract the data contained in the entry. |
|
290 | 295 | pub fn data(&self) -> Result<Cow<'_, [u8]>, RevlogError> { |
|
291 | 296 | if self.bytes.is_empty() { |
|
292 | 297 | return Ok(Cow::Borrowed(&[])); |
|
293 | 298 | } |
|
294 | 299 | match self.bytes[0] { |
|
295 | 300 | // Revision data is the entirety of the entry, including this |
|
296 | 301 | // header. |
|
297 | 302 | b'\0' => Ok(Cow::Borrowed(self.bytes)), |
|
298 | 303 | // Raw revision data follows. |
|
299 | 304 | b'u' => Ok(Cow::Borrowed(&self.bytes[1..])), |
|
300 | 305 | // zlib (RFC 1950) data. |
|
301 | 306 | b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)), |
|
302 | 307 | // zstd data. |
|
303 | 308 | b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)), |
|
304 | format_type => Err(RevlogError::UnknowDataFormat(format_type)), | |
|
309 | // A proper new format should have had a repo/store requirement. | |
|
310 | _format_type => Err(RevlogError::corrupted()), | |
|
305 | 311 | } |
|
306 | 312 | } |
|
307 | 313 | |
|
308 | 314 | fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, RevlogError> { |
|
309 | 315 | let mut decoder = ZlibDecoder::new(self.bytes); |
|
310 | 316 | if self.is_delta() { |
|
311 | 317 | let mut buf = Vec::with_capacity(self.compressed_len); |
|
312 | 318 | decoder |
|
313 | 319 | .read_to_end(&mut buf) |
|
314 |
. |
|
|
320 | .map_err(|_| RevlogError::corrupted())?; | |
|
315 | 321 | Ok(buf) |
|
316 | 322 | } else { |
|
317 | 323 | let mut buf = vec![0; self.uncompressed_len]; |
|
318 | 324 | decoder |
|
319 | 325 | .read_exact(&mut buf) |
|
320 |
. |
|
|
326 | .map_err(|_| RevlogError::corrupted())?; | |
|
321 | 327 | Ok(buf) |
|
322 | 328 | } |
|
323 | 329 | } |
|
324 | 330 | |
|
325 | 331 | fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, RevlogError> { |
|
326 | 332 | if self.is_delta() { |
|
327 | 333 | let mut buf = Vec::with_capacity(self.compressed_len); |
|
328 | 334 | zstd::stream::copy_decode(self.bytes, &mut buf) |
|
329 |
. |
|
|
335 | .map_err(|_| RevlogError::corrupted())?; | |
|
330 | 336 | Ok(buf) |
|
331 | 337 | } else { |
|
332 | 338 | let mut buf = vec![0; self.uncompressed_len]; |
|
333 | 339 | let len = zstd::block::decompress_to_buffer(self.bytes, &mut buf) |
|
334 |
. |
|
|
340 | .map_err(|_| RevlogError::corrupted())?; | |
|
335 | 341 | if len != self.uncompressed_len { |
|
336 |
Err(RevlogError:: |
|
|
342 | Err(RevlogError::corrupted()) | |
|
337 | 343 | } else { |
|
338 | 344 | Ok(buf) |
|
339 | 345 | } |
|
340 | 346 | } |
|
341 | 347 | } |
|
342 | 348 | |
|
343 | 349 | /// Tell if the entry is a snapshot or a delta |
|
344 | 350 | /// (influences on decompression). |
|
345 | 351 | fn is_delta(&self) -> bool { |
|
346 | 352 | self.base_rev.is_some() |
|
347 | 353 | } |
|
348 | 354 | } |
|
349 | 355 | |
|
350 | 356 | /// Format version of the revlog. |
|
351 | 357 | pub fn get_version(index_bytes: &[u8]) -> u16 { |
|
352 | 358 | BigEndian::read_u16(&index_bytes[2..=3]) |
|
353 | 359 | } |
|
354 | 360 | |
|
355 | 361 | /// Calculate the hash of a revision given its data and its parents. |
|
356 | 362 | fn hash(data: &[u8], p1_hash: &[u8], p2_hash: &[u8]) -> Vec<u8> { |
|
357 | 363 | let mut hasher = Sha1::new(); |
|
358 | 364 | let (a, b) = (p1_hash, p2_hash); |
|
359 | 365 | if a > b { |
|
360 | 366 | hasher.input(b); |
|
361 | 367 | hasher.input(a); |
|
362 | 368 | } else { |
|
363 | 369 | hasher.input(a); |
|
364 | 370 | hasher.input(b); |
|
365 | 371 | } |
|
366 | 372 | hasher.input(data); |
|
367 | 373 | let mut hash = vec![0; NODE_BYTES_LENGTH]; |
|
368 | 374 | hasher.result(&mut hash); |
|
369 | 375 | hash |
|
370 | 376 | } |
|
371 | 377 | |
|
372 | 378 | #[cfg(test)] |
|
373 | 379 | mod tests { |
|
374 | 380 | use super::*; |
|
375 | 381 | |
|
376 | 382 | use super::super::index::IndexEntryBuilder; |
|
377 | 383 | |
|
378 | 384 | #[test] |
|
379 | 385 | fn version_test() { |
|
380 | 386 | let bytes = IndexEntryBuilder::new() |
|
381 | 387 | .is_first(true) |
|
382 | 388 | .with_version(1) |
|
383 | 389 | .build(); |
|
384 | 390 | |
|
385 | 391 | assert_eq!(get_version(&bytes), 1) |
|
386 | 392 | } |
|
387 | 393 | } |
@@ -1,146 +1,123 b'' | |||
|
1 | 1 | use crate::exitcode; |
|
2 | 2 | use crate::ui::utf8_to_local; |
|
3 | 3 | use crate::ui::UiError; |
|
4 | 4 | use format_bytes::format_bytes; |
|
5 | 5 | use hg::errors::HgError; |
|
6 | 6 | use hg::operations::FindRootError; |
|
7 | 7 | use hg::revlog::revlog::RevlogError; |
|
8 | 8 | use hg::utils::files::get_bytes_from_path; |
|
9 | 9 | use std::convert::From; |
|
10 | 10 | use std::path::PathBuf; |
|
11 | 11 | |
|
12 | 12 | /// The kind of command error |
|
13 | 13 | #[derive(Debug, derive_more::From)] |
|
14 | 14 | pub enum CommandError { |
|
15 | 15 | /// The root of the repository cannot be found |
|
16 | 16 | RootNotFound(PathBuf), |
|
17 | 17 | /// The current directory cannot be found |
|
18 | 18 | CurrentDirNotFound(std::io::Error), |
|
19 | 19 | /// The standard output stream cannot be written to |
|
20 | 20 | StdoutError, |
|
21 | 21 | /// The standard error stream cannot be written to |
|
22 | 22 | StderrError, |
|
23 | 23 | /// The command aborted |
|
24 | 24 | Abort(Option<Vec<u8>>), |
|
25 | 25 | /// A mercurial capability as not been implemented. |
|
26 | 26 | Unimplemented, |
|
27 | 27 | /// Common cases |
|
28 | 28 | #[from] |
|
29 | 29 | Other(HgError), |
|
30 | 30 | } |
|
31 | 31 | |
|
32 | 32 | impl CommandError { |
|
33 | 33 | pub fn get_exit_code(&self) -> exitcode::ExitCode { |
|
34 | 34 | match self { |
|
35 | 35 | CommandError::RootNotFound(_) => exitcode::ABORT, |
|
36 | 36 | CommandError::CurrentDirNotFound(_) => exitcode::ABORT, |
|
37 | 37 | CommandError::StdoutError => exitcode::ABORT, |
|
38 | 38 | CommandError::StderrError => exitcode::ABORT, |
|
39 | 39 | CommandError::Abort(_) => exitcode::ABORT, |
|
40 | 40 | CommandError::Unimplemented => exitcode::UNIMPLEMENTED_COMMAND, |
|
41 | 41 | CommandError::Other(HgError::UnsupportedFeature(_)) => { |
|
42 | 42 | exitcode::UNIMPLEMENTED_COMMAND |
|
43 | 43 | } |
|
44 | 44 | CommandError::Other(_) => exitcode::ABORT, |
|
45 | 45 | } |
|
46 | 46 | } |
|
47 | 47 | |
|
48 | 48 | /// Return the message corresponding to the error if any |
|
49 | 49 | pub fn get_error_message_bytes(&self) -> Option<Vec<u8>> { |
|
50 | 50 | match self { |
|
51 | 51 | CommandError::RootNotFound(path) => { |
|
52 | 52 | let bytes = get_bytes_from_path(path); |
|
53 | 53 | Some(format_bytes!( |
|
54 | 54 | b"abort: no repository found in '{}' (.hg not found)!\n", |
|
55 | 55 | bytes.as_slice() |
|
56 | 56 | )) |
|
57 | 57 | } |
|
58 | 58 | CommandError::CurrentDirNotFound(e) => Some(format_bytes!( |
|
59 | 59 | b"abort: error getting current working directory: {}\n", |
|
60 | 60 | e.to_string().as_bytes(), |
|
61 | 61 | )), |
|
62 | 62 | CommandError::Abort(message) => message.to_owned(), |
|
63 | 63 | |
|
64 | 64 | CommandError::StdoutError |
|
65 | 65 | | CommandError::StderrError |
|
66 | 66 | | CommandError::Unimplemented |
|
67 | 67 | | CommandError::Other(HgError::UnsupportedFeature(_)) => None, |
|
68 | 68 | |
|
69 | 69 | CommandError::Other(e) => { |
|
70 | 70 | Some(format_bytes!(b"{}\n", e.to_string().as_bytes())) |
|
71 | 71 | } |
|
72 | 72 | } |
|
73 | 73 | } |
|
74 | 74 | |
|
75 | 75 | /// Exist the process with the corresponding exit code. |
|
76 | 76 | pub fn exit(&self) { |
|
77 | 77 | std::process::exit(self.get_exit_code()) |
|
78 | 78 | } |
|
79 | 79 | } |
|
80 | 80 | |
|
81 | 81 | impl From<UiError> for CommandError { |
|
82 | 82 | fn from(error: UiError) -> Self { |
|
83 | 83 | match error { |
|
84 | 84 | UiError::StdoutError(_) => CommandError::StdoutError, |
|
85 | 85 | UiError::StderrError(_) => CommandError::StderrError, |
|
86 | 86 | } |
|
87 | 87 | } |
|
88 | 88 | } |
|
89 | 89 | |
|
90 | 90 | impl From<FindRootError> for CommandError { |
|
91 | 91 | fn from(err: FindRootError) -> Self { |
|
92 | 92 | match err { |
|
93 | 93 | FindRootError::RootNotFound(path) => { |
|
94 | 94 | CommandError::RootNotFound(path) |
|
95 | 95 | } |
|
96 | 96 | FindRootError::GetCurrentDirError(e) => { |
|
97 | 97 | CommandError::CurrentDirNotFound(e) |
|
98 | 98 | } |
|
99 | 99 | } |
|
100 | 100 | } |
|
101 | 101 | } |
|
102 | 102 | |
|
103 | 103 | impl From<(RevlogError, &str)> for CommandError { |
|
104 | 104 | fn from((err, rev): (RevlogError, &str)) -> CommandError { |
|
105 | 105 | match err { |
|
106 | RevlogError::IoError(err) => CommandError::Abort(Some( | |
|
107 | utf8_to_local(&format!("abort: {}\n", err)).into(), | |
|
108 | )), | |
|
109 | 106 | RevlogError::InvalidRevision => CommandError::Abort(Some( |
|
110 | 107 | utf8_to_local(&format!( |
|
111 | 108 | "abort: invalid revision identifier {}\n", |
|
112 | 109 | rev |
|
113 | 110 | )) |
|
114 | 111 | .into(), |
|
115 | 112 | )), |
|
116 | 113 | RevlogError::AmbiguousPrefix => CommandError::Abort(Some( |
|
117 | 114 | utf8_to_local(&format!( |
|
118 | 115 | "abort: ambiguous revision identifier {}\n", |
|
119 | 116 | rev |
|
120 | 117 | )) |
|
121 | 118 | .into(), |
|
122 | 119 | )), |
|
123 |
RevlogError:: |
|
|
124 | CommandError::Abort(Some( | |
|
125 | utf8_to_local(&format!( | |
|
126 | "abort: unsupported revlog version {}\n", | |
|
127 | version | |
|
128 | )) | |
|
129 | .into(), | |
|
130 | )) | |
|
131 | } | |
|
132 | RevlogError::Corrupted => { | |
|
133 | CommandError::Abort(Some("abort: corrupted revlog\n".into())) | |
|
134 | } | |
|
135 | RevlogError::UnknowDataFormat(format) => { | |
|
136 | CommandError::Abort(Some( | |
|
137 | utf8_to_local(&format!( | |
|
138 | "abort: unknow revlog dataformat {:?}\n", | |
|
139 | format | |
|
140 | )) | |
|
141 | .into(), | |
|
142 | )) | |
|
143 | } | |
|
120 | RevlogError::Other(err) => CommandError::Other(err), | |
|
144 | 121 | } |
|
145 | 122 | } |
|
146 | 123 | } |
General Comments 0
You need to be logged in to leave comments.
Login now