##// END OF EJS Templates
rust: use HgError in RevlogError and Vfs...
Simon Sapin -
r47172:43d63979 default
parent child Browse files
Show More
@@ -1,111 +1,114 b''
1 1 use std::fmt;
2 2
3 3 /// Common error cases that can happen in many different APIs
4 4 #[derive(Debug)]
5 5 pub enum HgError {
6 6 IoError {
7 7 error: std::io::Error,
8 8 context: IoErrorContext,
9 9 },
10 10
11 11 /// A file under `.hg/` normally only written by Mercurial
12 12 ///
13 13 /// The given string is a short explanation for users, not intended to be
14 14 /// machine-readable.
15 15 CorruptedRepository(String),
16 16
17 17 /// The respository or requested operation involves a feature not
18 18 /// supported by the Rust implementation. Falling back to the Python
19 19 /// implementation may or may not work.
20 20 ///
21 21 /// The given string is a short explanation for users, not intended to be
22 22 /// machine-readable.
23 23 UnsupportedFeature(String),
24 24 }
25 25
26 26 /// Details about where an I/O error happened
27 27 #[derive(Debug, derive_more::From)]
28 28 pub enum IoErrorContext {
29 29 /// A filesystem operation returned `std::io::Error`
30 30 #[from]
31 31 File(std::path::PathBuf),
32 32 /// `std::env::current_dir` returned `std::io::Error`
33 33 CurrentDir,
34 34 }
35 35
36 36 impl HgError {
37 37 pub fn corrupted(explanation: impl Into<String>) -> Self {
38 // TODO: capture a backtrace here and keep it in the error value
39 // to aid debugging?
40 // https://doc.rust-lang.org/std/backtrace/struct.Backtrace.html
38 41 HgError::CorruptedRepository(explanation.into())
39 42 }
40 43 }
41 44
42 45 // TODO: use `DisplayBytes` instead to show non-Unicode filenames losslessly?
43 46 impl fmt::Display for HgError {
44 47 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
45 48 match self {
46 49 HgError::IoError { error, context } => {
47 50 write!(f, "{}: {}", error, context)
48 51 }
49 52 HgError::CorruptedRepository(explanation) => {
50 53 write!(f, "corrupted repository: {}", explanation)
51 54 }
52 55 HgError::UnsupportedFeature(explanation) => {
53 56 write!(f, "unsupported feature: {}", explanation)
54 57 }
55 58 }
56 59 }
57 60 }
58 61
59 62 // TODO: use `DisplayBytes` instead to show non-Unicode filenames losslessly?
60 63 impl fmt::Display for IoErrorContext {
61 64 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
62 65 match self {
63 66 IoErrorContext::File(path) => path.display().fmt(f),
64 67 IoErrorContext::CurrentDir => f.write_str("current directory"),
65 68 }
66 69 }
67 70 }
68 71
69 72 pub trait IoResultExt<T> {
70 73 /// Annotate a possible I/O error as related to a file at the given path.
71 74 ///
72 75 /// This allows printing something like β€œFile not found: example.txt”
73 76 /// instead of just β€œFile not found”.
74 77 ///
75 78 /// Converts a `Result` with `std::io::Error` into one with `HgError`.
76 79 fn for_file(self, path: &std::path::Path) -> Result<T, HgError>;
77 80 }
78 81
79 82 impl<T> IoResultExt<T> for std::io::Result<T> {
80 83 fn for_file(self, path: &std::path::Path) -> Result<T, HgError> {
81 84 self.map_err(|error| HgError::IoError {
82 85 error,
83 86 context: IoErrorContext::File(path.to_owned()),
84 87 })
85 88 }
86 89 }
87 90
88 91 pub trait HgResultExt<T> {
89 92 /// Handle missing files separately from other I/O error cases.
90 93 ///
91 94 /// Wraps the `Ok` type in an `Option`:
92 95 ///
93 96 /// * `Ok(x)` becomes `Ok(Some(x))`
94 97 /// * An I/O "not found" error becomes `Ok(None)`
95 98 /// * Other errors are unchanged
96 99 fn io_not_found_as_none(self) -> Result<Option<T>, HgError>;
97 100 }
98 101
99 102 impl<T> HgResultExt<T> for Result<T, HgError> {
100 103 fn io_not_found_as_none(self) -> Result<Option<T>, HgError> {
101 104 match self {
102 105 Ok(x) => Ok(Some(x)),
103 106 Err(HgError::IoError { error, .. })
104 107 if error.kind() == std::io::ErrorKind::NotFound =>
105 108 {
106 109 Ok(None)
107 110 }
108 111 Err(other_error) => Err(other_error),
109 112 }
110 113 }
111 114 }
@@ -1,115 +1,116 b''
1 1 // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net>
2 2 // and Mercurial contributors
3 3 //
4 4 // This software may be used and distributed according to the terms of the
5 5 // GNU General Public License version 2 or any later version.
6
6 7 mod ancestors;
7 8 pub mod dagops;
8 9 pub mod errors;
9 10 pub use ancestors::{AncestorsIterator, LazyAncestors, MissingAncestors};
10 11 mod dirstate;
11 12 pub mod discovery;
12 13 pub mod requirements;
13 14 pub mod testing; // unconditionally built, for use from integration tests
14 15 pub use dirstate::{
15 16 dirs_multiset::{DirsMultiset, DirsMultisetIter},
16 17 dirstate_map::DirstateMap,
17 18 parsers::{pack_dirstate, parse_dirstate, PARENT_SIZE},
18 19 status::{
19 20 status, BadMatch, BadType, DirstateStatus, StatusError, StatusOptions,
20 21 },
21 22 CopyMap, CopyMapIter, DirstateEntry, DirstateParents, EntryState,
22 23 StateMap, StateMapIter,
23 24 };
24 25 pub mod copy_tracing;
25 26 mod filepatterns;
26 27 pub mod matchers;
27 28 pub mod repo;
28 29 pub mod revlog;
29 30 pub use revlog::*;
30 31 pub mod config;
31 32 pub mod operations;
32 33 pub mod revset;
33 34 pub mod utils;
34 35
35 36 use crate::utils::hg_path::{HgPathBuf, HgPathError};
36 37 pub use filepatterns::{
37 38 parse_pattern_syntax, read_pattern_file, IgnorePattern,
38 39 PatternFileWarning, PatternSyntax,
39 40 };
40 41 use std::collections::HashMap;
41 42 use twox_hash::RandomXxHashBuilder64;
42 43
43 44 /// This is a contract between the `micro-timer` crate and us, to expose
44 45 /// the `log` crate as `crate::log`.
45 46 use log;
46 47
47 48 pub type LineNumber = usize;
48 49
49 50 /// Rust's default hasher is too slow because it tries to prevent collision
50 51 /// attacks. We are not concerned about those: if an ill-minded person has
51 52 /// write access to your repository, you have other issues.
52 53 pub type FastHashMap<K, V> = HashMap<K, V, RandomXxHashBuilder64>;
53 54
54 55 #[derive(Debug, PartialEq)]
55 56 pub enum DirstateMapError {
56 57 PathNotFound(HgPathBuf),
57 58 EmptyPath,
58 59 InvalidPath(HgPathError),
59 60 }
60 61
61 62 impl ToString for DirstateMapError {
62 63 fn to_string(&self) -> String {
63 64 match self {
64 65 DirstateMapError::PathNotFound(_) => {
65 66 "expected a value, found none".to_string()
66 67 }
67 68 DirstateMapError::EmptyPath => "Overflow in dirstate.".to_string(),
68 69 DirstateMapError::InvalidPath(e) => e.to_string(),
69 70 }
70 71 }
71 72 }
72 73
73 74 #[derive(Debug, derive_more::From)]
74 75 pub enum DirstateError {
75 76 Map(DirstateMapError),
76 77 Common(errors::HgError),
77 78 }
78 79
79 80 #[derive(Debug, derive_more::From)]
80 81 pub enum PatternError {
81 82 #[from]
82 83 Path(HgPathError),
83 84 UnsupportedSyntax(String),
84 85 UnsupportedSyntaxInFile(String, String, usize),
85 86 TooLong(usize),
86 87 #[from]
87 88 IO(std::io::Error),
88 89 /// Needed a pattern that can be turned into a regex but got one that
89 90 /// can't. This should only happen through programmer error.
90 91 NonRegexPattern(IgnorePattern),
91 92 }
92 93
93 94 impl ToString for PatternError {
94 95 fn to_string(&self) -> String {
95 96 match self {
96 97 PatternError::UnsupportedSyntax(syntax) => {
97 98 format!("Unsupported syntax {}", syntax)
98 99 }
99 100 PatternError::UnsupportedSyntaxInFile(syntax, file_path, line) => {
100 101 format!(
101 102 "{}:{}: unsupported syntax {}",
102 103 file_path, line, syntax
103 104 )
104 105 }
105 106 PatternError::TooLong(size) => {
106 107 format!("matcher pattern is too long ({} bytes)", size)
107 108 }
108 109 PatternError::IO(e) => e.to_string(),
109 110 PatternError::Path(e) => e.to_string(),
110 111 PatternError::NonRegexPattern(pattern) => {
111 112 format!("'{:?}' cannot be turned into a regex", pattern)
112 113 }
113 114 }
114 115 }
115 116 }
@@ -1,76 +1,75 b''
1 1 // list_tracked_files.rs
2 2 //
3 3 // Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 use std::path::PathBuf;
9 9
10 10 use crate::repo::Repo;
11 11 use crate::revlog::changelog::Changelog;
12 12 use crate::revlog::manifest::Manifest;
13 13 use crate::revlog::path_encode::path_encode;
14 14 use crate::revlog::revlog::Revlog;
15 15 use crate::revlog::revlog::RevlogError;
16 16 use crate::revlog::Node;
17 17 use crate::utils::files::get_path_from_bytes;
18 18 use crate::utils::hg_path::{HgPath, HgPathBuf};
19 19
20 20 const METADATA_DELIMITER: [u8; 2] = [b'\x01', b'\n'];
21 21
22 22 /// List files under Mercurial control at a given revision.
23 23 ///
24 24 /// * `root`: Repository root
25 25 /// * `rev`: The revision to cat the files from.
26 26 /// * `files`: The files to output.
27 27 pub fn cat(
28 28 repo: &Repo,
29 29 revset: &str,
30 30 files: &[HgPathBuf],
31 31 ) -> Result<Vec<u8>, RevlogError> {
32 32 let rev = crate::revset::resolve_single(revset, repo)?;
33 33 let changelog = Changelog::open(repo)?;
34 34 let manifest = Manifest::open(repo)?;
35 35 let changelog_entry = changelog.get_rev(rev)?;
36 let manifest_node = Node::from_hex(&changelog_entry.manifest_node()?)
37 .map_err(|_| RevlogError::Corrupted)?;
36 let manifest_node =
37 Node::from_hex_for_repo(&changelog_entry.manifest_node()?)?;
38 38 let manifest_entry = manifest.get_node(manifest_node.into())?;
39 39 let mut bytes = vec![];
40 40
41 41 for (manifest_file, node_bytes) in manifest_entry.files_with_nodes() {
42 42 for cat_file in files.iter() {
43 43 if cat_file.as_bytes() == manifest_file.as_bytes() {
44 44 let index_path = store_path(manifest_file, b".i");
45 45 let data_path = store_path(manifest_file, b".d");
46 46
47 47 let file_log =
48 48 Revlog::open(repo, &index_path, Some(&data_path))?;
49 let file_node = Node::from_hex(node_bytes)
50 .map_err(|_| RevlogError::Corrupted)?;
49 let file_node = Node::from_hex_for_repo(node_bytes)?;
51 50 let file_rev = file_log.get_node_rev(file_node.into())?;
52 51 let data = file_log.get_rev_data(file_rev)?;
53 52 if data.starts_with(&METADATA_DELIMITER) {
54 53 let end_delimiter_position = data
55 54 [METADATA_DELIMITER.len()..]
56 55 .windows(METADATA_DELIMITER.len())
57 56 .position(|bytes| bytes == METADATA_DELIMITER);
58 57 if let Some(position) = end_delimiter_position {
59 58 let offset = METADATA_DELIMITER.len() * 2;
60 59 bytes.extend(data[position + offset..].iter());
61 60 }
62 61 } else {
63 62 bytes.extend(data);
64 63 }
65 64 }
66 65 }
67 66 }
68 67
69 68 Ok(bytes)
70 69 }
71 70
72 71 fn store_path(hg_path: &HgPath, suffix: &[u8]) -> PathBuf {
73 72 let encoded_bytes =
74 73 path_encode(&[b"data/", hg_path.as_bytes(), suffix].concat());
75 74 get_path_from_bytes(&encoded_bytes).into()
76 75 }
@@ -1,72 +1,67 b''
1 1 // list_tracked_files.rs
2 2 //
3 3 // Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 use crate::dirstate::parsers::parse_dirstate;
9 use crate::errors::{HgError, IoResultExt};
9 use crate::errors::HgError;
10 10 use crate::repo::Repo;
11 11 use crate::revlog::changelog::Changelog;
12 12 use crate::revlog::manifest::{Manifest, ManifestEntry};
13 13 use crate::revlog::node::Node;
14 14 use crate::revlog::revlog::RevlogError;
15 15 use crate::utils::hg_path::HgPath;
16 16 use crate::EntryState;
17 17 use rayon::prelude::*;
18 18
19 19 /// List files under Mercurial control in the working directory
20 20 /// by reading the dirstate
21 21 pub struct Dirstate {
22 22 /// The `dirstate` content.
23 23 content: Vec<u8>,
24 24 }
25 25
26 26 impl Dirstate {
27 27 pub fn new(repo: &Repo) -> Result<Self, HgError> {
28 let content = repo
29 .hg_vfs()
30 .read("dirstate")
31 // TODO: this will be more accurate when we use `HgError` in
32 // `Vfs::read`.
33 .for_file("dirstate".as_ref())?;
28 let content = repo.hg_vfs().read("dirstate")?;
34 29 Ok(Self { content })
35 30 }
36 31
37 32 pub fn tracked_files(&self) -> Result<Vec<&HgPath>, HgError> {
38 33 let (_, entries, _) = parse_dirstate(&self.content)?;
39 34 let mut files: Vec<&HgPath> = entries
40 35 .into_iter()
41 36 .filter_map(|(path, entry)| match entry.state {
42 37 EntryState::Removed => None,
43 38 _ => Some(path),
44 39 })
45 40 .collect();
46 41 files.par_sort_unstable();
47 42 Ok(files)
48 43 }
49 44 }
50 45
51 46 /// List files under Mercurial control at a given revision.
52 47 pub fn list_rev_tracked_files(
53 48 repo: &Repo,
54 49 revset: &str,
55 50 ) -> Result<FilesForRev, RevlogError> {
56 51 let rev = crate::revset::resolve_single(revset, repo)?;
57 52 let changelog = Changelog::open(repo)?;
58 53 let manifest = Manifest::open(repo)?;
59 54 let changelog_entry = changelog.get_rev(rev)?;
60 let manifest_node = Node::from_hex(&changelog_entry.manifest_node()?)
61 .map_err(|_| RevlogError::Corrupted)?;
55 let manifest_node =
56 Node::from_hex_for_repo(&changelog_entry.manifest_node()?)?;
62 57 let manifest_entry = manifest.get_node(manifest_node.into())?;
63 58 Ok(FilesForRev(manifest_entry))
64 59 }
65 60
66 61 pub struct FilesForRev(ManifestEntry);
67 62
68 63 impl FilesForRev {
69 64 pub fn iter(&self) -> impl Iterator<Item = &HgPath> {
70 65 self.0.files()
71 66 }
72 67 }
@@ -1,91 +1,86 b''
1 use crate::errors::HgError;
1 use crate::errors::{HgError, IoResultExt};
2 2 use crate::operations::{find_root, FindRootError};
3 3 use crate::requirements;
4 4 use memmap::{Mmap, MmapOptions};
5 5 use std::path::{Path, PathBuf};
6 6
7 7 /// A repository on disk
8 8 pub struct Repo {
9 9 working_directory: PathBuf,
10 10 dot_hg: PathBuf,
11 11 store: PathBuf,
12 12 }
13 13
14 14 /// Filesystem access abstraction for the contents of a given "base" diretory
15 15 #[derive(Clone, Copy)]
16 16 pub(crate) struct Vfs<'a> {
17 17 base: &'a Path,
18 18 }
19 19
20 20 impl Repo {
21 21 /// Returns `None` if the given path doesn’t look like a repository
22 22 /// (doesn’t contain a `.hg` sub-directory).
23 23 pub fn for_path(root: impl Into<PathBuf>) -> Self {
24 24 let working_directory = root.into();
25 25 let dot_hg = working_directory.join(".hg");
26 26 Self {
27 27 store: dot_hg.join("store"),
28 28 dot_hg,
29 29 working_directory,
30 30 }
31 31 }
32 32
33 33 pub fn find() -> Result<Self, FindRootError> {
34 34 find_root().map(Self::for_path)
35 35 }
36 36
37 37 pub fn check_requirements(&self) -> Result<(), HgError> {
38 38 requirements::check(self)
39 39 }
40 40
41 41 pub fn working_directory_path(&self) -> &Path {
42 42 &self.working_directory
43 43 }
44 44
45 45 /// For accessing repository files (in `.hg`), except for the store
46 46 /// (`.hg/store`).
47 47 pub(crate) fn hg_vfs(&self) -> Vfs<'_> {
48 48 Vfs { base: &self.dot_hg }
49 49 }
50 50
51 51 /// For accessing repository store files (in `.hg/store`)
52 52 pub(crate) fn store_vfs(&self) -> Vfs<'_> {
53 53 Vfs { base: &self.store }
54 54 }
55 55
56 56 /// For accessing the working copy
57 57
58 58 // The undescore prefix silences the "never used" warning. Remove before
59 59 // using.
60 60 pub(crate) fn _working_directory_vfs(&self) -> Vfs<'_> {
61 61 Vfs {
62 62 base: &self.working_directory,
63 63 }
64 64 }
65 65 }
66 66
67 67 impl Vfs<'_> {
68 68 pub(crate) fn read(
69 69 &self,
70 70 relative_path: impl AsRef<Path>,
71 ) -> std::io::Result<Vec<u8>> {
72 std::fs::read(self.base.join(relative_path))
73 }
74
75 pub(crate) fn open(
76 &self,
77 relative_path: impl AsRef<Path>,
78 ) -> std::io::Result<std::fs::File> {
79 std::fs::File::open(self.base.join(relative_path))
71 ) -> Result<Vec<u8>, HgError> {
72 let path = self.base.join(relative_path);
73 std::fs::read(&path).for_file(&path)
80 74 }
81 75
82 76 pub(crate) fn mmap_open(
83 77 &self,
84 78 relative_path: impl AsRef<Path>,
85 ) -> std::io::Result<Mmap> {
86 let file = self.open(relative_path)?;
79 ) -> Result<Mmap, HgError> {
80 let path = self.base.join(relative_path);
81 let file = std::fs::File::open(&path).for_file(&path)?;
87 82 // TODO: what are the safety requirements here?
88 let mmap = unsafe { MmapOptions::new().map(&file) }?;
83 let mmap = unsafe { MmapOptions::new().map(&file) }.for_file(&path)?;
89 84 Ok(mmap)
90 85 }
91 86 }
@@ -1,70 +1,67 b''
1 use crate::errors::{HgError, HgResultExt, IoResultExt};
1 use crate::errors::{HgError, HgResultExt};
2 2 use crate::repo::Repo;
3 3
4 4 fn parse(bytes: &[u8]) -> Result<Vec<String>, HgError> {
5 5 // The Python code reading this file uses `str.splitlines`
6 6 // which looks for a number of line separators (even including a couple of
7 7 // non-ASCII ones), but Python code writing it always uses `\n`.
8 8 let lines = bytes.split(|&byte| byte == b'\n');
9 9
10 10 lines
11 11 .filter(|line| !line.is_empty())
12 12 .map(|line| {
13 13 // Python uses Unicode `str.isalnum` but feature names are all
14 14 // ASCII
15 15 if line[0].is_ascii_alphanumeric() && line.is_ascii() {
16 16 Ok(String::from_utf8(line.into()).unwrap())
17 17 } else {
18 18 Err(HgError::corrupted("parse error in 'requires' file"))
19 19 }
20 20 })
21 21 .collect()
22 22 }
23 23
24 24 pub fn load(repo: &Repo) -> Result<Vec<String>, HgError> {
25 if let Some(bytes) = repo
26 .hg_vfs()
27 .read("requires")
28 .for_file("requires".as_ref())
29 .io_not_found_as_none()?
25 if let Some(bytes) =
26 repo.hg_vfs().read("requires").io_not_found_as_none()?
30 27 {
31 28 parse(&bytes)
32 29 } else {
33 30 // Treat a missing file the same as an empty file.
34 31 // From `mercurial/localrepo.py`:
35 32 // > requires file contains a newline-delimited list of
36 33 // > features/capabilities the opener (us) must have in order to use
37 34 // > the repository. This file was introduced in Mercurial 0.9.2,
38 35 // > which means very old repositories may not have one. We assume
39 36 // > a missing file translates to no requirements.
40 37 Ok(Vec::new())
41 38 }
42 39 }
43 40
44 41 pub fn check(repo: &Repo) -> Result<(), HgError> {
45 42 for feature in load(repo)? {
46 43 if !SUPPORTED.contains(&&*feature) {
47 44 // TODO: collect and all unknown features and include them in the
48 45 // error message?
49 46 return Err(HgError::UnsupportedFeature(format!(
50 47 "repository requires feature unknown to this Mercurial: {}",
51 48 feature
52 49 )));
53 50 }
54 51 }
55 52 Ok(())
56 53 }
57 54
58 55 // TODO: set this to actually-supported features
59 56 const SUPPORTED: &[&str] = &[
60 57 "dotencode",
61 58 "fncache",
62 59 "generaldelta",
63 60 "revlogv1",
64 61 "sparserevlog",
65 62 "store",
66 63 // As of this writing everything rhg does is read-only.
67 64 // When it starts writing to the repository, it’ll need to either keep the
68 65 // persistent nodemap up to date or remove this entry:
69 66 "persistent-nodemap",
70 67 ];
@@ -1,58 +1,61 b''
1 use crate::errors::HgError;
1 2 use crate::repo::Repo;
2 3 use crate::revlog::revlog::{Revlog, RevlogError};
3 4 use crate::revlog::NodePrefix;
4 5 use crate::revlog::Revision;
5 6
6 7 /// A specialized `Revlog` to work with `changelog` data format.
7 8 pub struct Changelog {
8 9 /// The generic `revlog` format.
9 10 pub(crate) revlog: Revlog,
10 11 }
11 12
12 13 impl Changelog {
13 14 /// Open the `changelog` of a repository given by its root.
14 15 pub fn open(repo: &Repo) -> Result<Self, RevlogError> {
15 16 let revlog = Revlog::open(repo, "00changelog.i", None)?;
16 17 Ok(Self { revlog })
17 18 }
18 19
19 20 /// Return the `ChangelogEntry` a given node id.
20 21 pub fn get_node(
21 22 &self,
22 23 node: NodePrefix,
23 24 ) -> Result<ChangelogEntry, RevlogError> {
24 25 let rev = self.revlog.get_node_rev(node)?;
25 26 self.get_rev(rev)
26 27 }
27 28
28 29 /// Return the `ChangelogEntry` of a given node revision.
29 30 pub fn get_rev(
30 31 &self,
31 32 rev: Revision,
32 33 ) -> Result<ChangelogEntry, RevlogError> {
33 34 let bytes = self.revlog.get_rev_data(rev)?;
34 35 Ok(ChangelogEntry { bytes })
35 36 }
36 37 }
37 38
38 39 /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
39 40 #[derive(Debug)]
40 41 pub struct ChangelogEntry {
41 42 /// The data bytes of the `changelog` entry.
42 43 bytes: Vec<u8>,
43 44 }
44 45
45 46 impl ChangelogEntry {
46 47 /// Return an iterator over the lines of the entry.
47 48 pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
48 49 self.bytes
49 50 .split(|b| b == &b'\n')
50 51 .filter(|line| !line.is_empty())
51 52 }
52 53
53 54 /// Return the node id of the `manifest` referenced by this `changelog`
54 55 /// entry.
55 56 pub fn manifest_node(&self) -> Result<&[u8], RevlogError> {
56 self.lines().next().ok_or(RevlogError::Corrupted)
57 self.lines()
58 .next()
59 .ok_or_else(|| HgError::corrupted("empty changelog entry").into())
57 60 }
58 61 }
@@ -1,402 +1,404 b''
1 1 use std::convert::TryInto;
2 2 use std::ops::Deref;
3 3
4 4 use byteorder::{BigEndian, ByteOrder};
5 5
6 use crate::errors::HgError;
6 7 use crate::revlog::node::Node;
7 8 use crate::revlog::revlog::RevlogError;
8 9 use crate::revlog::{Revision, NULL_REVISION};
9 10
10 11 pub const INDEX_ENTRY_SIZE: usize = 64;
11 12
12 13 /// A Revlog index
13 14 pub struct Index {
14 15 bytes: Box<dyn Deref<Target = [u8]> + Send>,
15 16 /// Offsets of starts of index blocks.
16 17 /// Only needed when the index is interleaved with data.
17 18 offsets: Option<Vec<usize>>,
18 19 }
19 20
20 21 impl Index {
21 22 /// Create an index from bytes.
22 23 /// Calculate the start of each entry when is_inline is true.
23 24 pub fn new(
24 25 bytes: Box<dyn Deref<Target = [u8]> + Send>,
25 26 ) -> Result<Self, RevlogError> {
26 27 if is_inline(&bytes) {
27 28 let mut offset: usize = 0;
28 29 let mut offsets = Vec::new();
29 30
30 31 while offset + INDEX_ENTRY_SIZE <= bytes.len() {
31 32 offsets.push(offset);
32 33 let end = offset + INDEX_ENTRY_SIZE;
33 34 let entry = IndexEntry {
34 35 bytes: &bytes[offset..end],
35 36 offset_override: None,
36 37 };
37 38
38 39 offset += INDEX_ENTRY_SIZE + entry.compressed_len();
39 40 }
40 41
41 42 if offset == bytes.len() {
42 43 Ok(Self {
43 44 bytes,
44 45 offsets: Some(offsets),
45 46 })
46 47 } else {
47 Err(RevlogError::Corrupted)
48 Err(HgError::corrupted("unexpected inline revlog length")
49 .into())
48 50 }
49 51 } else {
50 52 Ok(Self {
51 53 bytes,
52 54 offsets: None,
53 55 })
54 56 }
55 57 }
56 58
57 59 /// Value of the inline flag.
58 60 pub fn is_inline(&self) -> bool {
59 61 is_inline(&self.bytes)
60 62 }
61 63
62 64 /// Return a slice of bytes if `revlog` is inline. Panic if not.
63 65 pub fn data(&self, start: usize, end: usize) -> &[u8] {
64 66 if !self.is_inline() {
65 67 panic!("tried to access data in the index of a revlog that is not inline");
66 68 }
67 69 &self.bytes[start..end]
68 70 }
69 71
70 72 /// Return number of entries of the revlog index.
71 73 pub fn len(&self) -> usize {
72 74 if let Some(offsets) = &self.offsets {
73 75 offsets.len()
74 76 } else {
75 77 self.bytes.len() / INDEX_ENTRY_SIZE
76 78 }
77 79 }
78 80
79 81 /// Returns `true` if the `Index` has zero `entries`.
80 82 pub fn is_empty(&self) -> bool {
81 83 self.len() == 0
82 84 }
83 85
84 86 /// Return the index entry corresponding to the given revision if it
85 87 /// exists.
86 88 pub fn get_entry(&self, rev: Revision) -> Option<IndexEntry> {
87 89 if rev == NULL_REVISION {
88 90 return None;
89 91 }
90 92 if let Some(offsets) = &self.offsets {
91 93 self.get_entry_inline(rev, offsets)
92 94 } else {
93 95 self.get_entry_separated(rev)
94 96 }
95 97 }
96 98
97 99 fn get_entry_inline(
98 100 &self,
99 101 rev: Revision,
100 102 offsets: &[usize],
101 103 ) -> Option<IndexEntry> {
102 104 let start = *offsets.get(rev as usize)?;
103 105 let end = start.checked_add(INDEX_ENTRY_SIZE)?;
104 106 let bytes = &self.bytes[start..end];
105 107
106 108 // See IndexEntry for an explanation of this override.
107 109 let offset_override = Some(end);
108 110
109 111 Some(IndexEntry {
110 112 bytes,
111 113 offset_override,
112 114 })
113 115 }
114 116
115 117 fn get_entry_separated(&self, rev: Revision) -> Option<IndexEntry> {
116 118 let max_rev = self.bytes.len() / INDEX_ENTRY_SIZE;
117 119 if rev as usize >= max_rev {
118 120 return None;
119 121 }
120 122 let start = rev as usize * INDEX_ENTRY_SIZE;
121 123 let end = start + INDEX_ENTRY_SIZE;
122 124 let bytes = &self.bytes[start..end];
123 125
124 126 // Override the offset of the first revision as its bytes are used
125 127 // for the index's metadata (saving space because it is always 0)
126 128 let offset_override = if rev == 0 { Some(0) } else { None };
127 129
128 130 Some(IndexEntry {
129 131 bytes,
130 132 offset_override,
131 133 })
132 134 }
133 135 }
134 136
135 137 impl super::RevlogIndex for Index {
136 138 fn len(&self) -> usize {
137 139 self.len()
138 140 }
139 141
140 142 fn node(&self, rev: Revision) -> Option<&Node> {
141 143 self.get_entry(rev).map(|entry| entry.hash())
142 144 }
143 145 }
144 146
145 147 #[derive(Debug)]
146 148 pub struct IndexEntry<'a> {
147 149 bytes: &'a [u8],
148 150 /// Allows to override the offset value of the entry.
149 151 ///
150 152 /// For interleaved index and data, the offset stored in the index
151 153 /// corresponds to the separated data offset.
152 154 /// It has to be overridden with the actual offset in the interleaved
153 155 /// index which is just after the index block.
154 156 ///
155 157 /// For separated index and data, the offset stored in the first index
156 158 /// entry is mixed with the index headers.
157 159 /// It has to be overridden with 0.
158 160 offset_override: Option<usize>,
159 161 }
160 162
161 163 impl<'a> IndexEntry<'a> {
162 164 /// Return the offset of the data.
163 165 pub fn offset(&self) -> usize {
164 166 if let Some(offset_override) = self.offset_override {
165 167 offset_override
166 168 } else {
167 169 let mut bytes = [0; 8];
168 170 bytes[2..8].copy_from_slice(&self.bytes[0..=5]);
169 171 BigEndian::read_u64(&bytes[..]) as usize
170 172 }
171 173 }
172 174
173 175 /// Return the compressed length of the data.
174 176 pub fn compressed_len(&self) -> usize {
175 177 BigEndian::read_u32(&self.bytes[8..=11]) as usize
176 178 }
177 179
178 180 /// Return the uncompressed length of the data.
179 181 pub fn uncompressed_len(&self) -> usize {
180 182 BigEndian::read_u32(&self.bytes[12..=15]) as usize
181 183 }
182 184
183 185 /// Return the revision upon which the data has been derived.
184 186 pub fn base_revision(&self) -> Revision {
185 187 // TODO Maybe return an Option when base_revision == rev?
186 188 // Requires to add rev to IndexEntry
187 189
188 190 BigEndian::read_i32(&self.bytes[16..])
189 191 }
190 192
191 193 pub fn p1(&self) -> Revision {
192 194 BigEndian::read_i32(&self.bytes[24..])
193 195 }
194 196
195 197 pub fn p2(&self) -> Revision {
196 198 BigEndian::read_i32(&self.bytes[28..])
197 199 }
198 200
199 201 /// Return the hash of revision's full text.
200 202 ///
201 203 /// Currently, SHA-1 is used and only the first 20 bytes of this field
202 204 /// are used.
203 205 pub fn hash(&self) -> &'a Node {
204 206 (&self.bytes[32..52]).try_into().unwrap()
205 207 }
206 208 }
207 209
208 210 /// Value of the inline flag.
209 211 pub fn is_inline(index_bytes: &[u8]) -> bool {
210 212 match &index_bytes[0..=1] {
211 213 [0, 0] | [0, 2] => false,
212 214 _ => true,
213 215 }
214 216 }
215 217
216 218 #[cfg(test)]
217 219 mod tests {
218 220 use super::*;
219 221
220 222 #[cfg(test)]
221 223 #[derive(Debug, Copy, Clone)]
222 224 pub struct IndexEntryBuilder {
223 225 is_first: bool,
224 226 is_inline: bool,
225 227 is_general_delta: bool,
226 228 version: u16,
227 229 offset: usize,
228 230 compressed_len: usize,
229 231 uncompressed_len: usize,
230 232 base_revision: Revision,
231 233 }
232 234
233 235 #[cfg(test)]
234 236 impl IndexEntryBuilder {
235 237 pub fn new() -> Self {
236 238 Self {
237 239 is_first: false,
238 240 is_inline: false,
239 241 is_general_delta: true,
240 242 version: 2,
241 243 offset: 0,
242 244 compressed_len: 0,
243 245 uncompressed_len: 0,
244 246 base_revision: 0,
245 247 }
246 248 }
247 249
248 250 pub fn is_first(&mut self, value: bool) -> &mut Self {
249 251 self.is_first = value;
250 252 self
251 253 }
252 254
253 255 pub fn with_inline(&mut self, value: bool) -> &mut Self {
254 256 self.is_inline = value;
255 257 self
256 258 }
257 259
258 260 pub fn with_general_delta(&mut self, value: bool) -> &mut Self {
259 261 self.is_general_delta = value;
260 262 self
261 263 }
262 264
263 265 pub fn with_version(&mut self, value: u16) -> &mut Self {
264 266 self.version = value;
265 267 self
266 268 }
267 269
268 270 pub fn with_offset(&mut self, value: usize) -> &mut Self {
269 271 self.offset = value;
270 272 self
271 273 }
272 274
273 275 pub fn with_compressed_len(&mut self, value: usize) -> &mut Self {
274 276 self.compressed_len = value;
275 277 self
276 278 }
277 279
278 280 pub fn with_uncompressed_len(&mut self, value: usize) -> &mut Self {
279 281 self.uncompressed_len = value;
280 282 self
281 283 }
282 284
283 285 pub fn with_base_revision(&mut self, value: Revision) -> &mut Self {
284 286 self.base_revision = value;
285 287 self
286 288 }
287 289
288 290 pub fn build(&self) -> Vec<u8> {
289 291 let mut bytes = Vec::with_capacity(INDEX_ENTRY_SIZE);
290 292 if self.is_first {
291 293 bytes.extend(&match (self.is_general_delta, self.is_inline) {
292 294 (false, false) => [0u8, 0],
293 295 (false, true) => [0u8, 1],
294 296 (true, false) => [0u8, 2],
295 297 (true, true) => [0u8, 3],
296 298 });
297 299 bytes.extend(&self.version.to_be_bytes());
298 300 // Remaining offset bytes.
299 301 bytes.extend(&[0u8; 2]);
300 302 } else {
301 303 // Offset is only 6 bytes will usize is 8.
302 304 bytes.extend(&self.offset.to_be_bytes()[2..]);
303 305 }
304 306 bytes.extend(&[0u8; 2]); // Revision flags.
305 307 bytes.extend(&self.compressed_len.to_be_bytes()[4..]);
306 308 bytes.extend(&self.uncompressed_len.to_be_bytes()[4..]);
307 309 bytes.extend(&self.base_revision.to_be_bytes());
308 310 bytes
309 311 }
310 312 }
311 313
312 314 #[test]
313 315 fn is_not_inline_when_no_inline_flag_test() {
314 316 let bytes = IndexEntryBuilder::new()
315 317 .is_first(true)
316 318 .with_general_delta(false)
317 319 .with_inline(false)
318 320 .build();
319 321
320 322 assert_eq!(is_inline(&bytes), false)
321 323 }
322 324
323 325 #[test]
324 326 fn is_inline_when_inline_flag_test() {
325 327 let bytes = IndexEntryBuilder::new()
326 328 .is_first(true)
327 329 .with_general_delta(false)
328 330 .with_inline(true)
329 331 .build();
330 332
331 333 assert_eq!(is_inline(&bytes), true)
332 334 }
333 335
334 336 #[test]
335 337 fn is_inline_when_inline_and_generaldelta_flags_test() {
336 338 let bytes = IndexEntryBuilder::new()
337 339 .is_first(true)
338 340 .with_general_delta(true)
339 341 .with_inline(true)
340 342 .build();
341 343
342 344 assert_eq!(is_inline(&bytes), true)
343 345 }
344 346
345 347 #[test]
346 348 fn test_offset() {
347 349 let bytes = IndexEntryBuilder::new().with_offset(1).build();
348 350 let entry = IndexEntry {
349 351 bytes: &bytes,
350 352 offset_override: None,
351 353 };
352 354
353 355 assert_eq!(entry.offset(), 1)
354 356 }
355 357
356 358 #[test]
357 359 fn test_with_overridden_offset() {
358 360 let bytes = IndexEntryBuilder::new().with_offset(1).build();
359 361 let entry = IndexEntry {
360 362 bytes: &bytes,
361 363 offset_override: Some(2),
362 364 };
363 365
364 366 assert_eq!(entry.offset(), 2)
365 367 }
366 368
367 369 #[test]
368 370 fn test_compressed_len() {
369 371 let bytes = IndexEntryBuilder::new().with_compressed_len(1).build();
370 372 let entry = IndexEntry {
371 373 bytes: &bytes,
372 374 offset_override: None,
373 375 };
374 376
375 377 assert_eq!(entry.compressed_len(), 1)
376 378 }
377 379
378 380 #[test]
379 381 fn test_uncompressed_len() {
380 382 let bytes = IndexEntryBuilder::new().with_uncompressed_len(1).build();
381 383 let entry = IndexEntry {
382 384 bytes: &bytes,
383 385 offset_override: None,
384 386 };
385 387
386 388 assert_eq!(entry.uncompressed_len(), 1)
387 389 }
388 390
389 391 #[test]
390 392 fn test_base_revision() {
391 393 let bytes = IndexEntryBuilder::new().with_base_revision(1).build();
392 394 let entry = IndexEntry {
393 395 bytes: &bytes,
394 396 offset_override: None,
395 397 };
396 398
397 399 assert_eq!(entry.base_revision(), 1)
398 400 }
399 401 }
400 402
401 403 #[cfg(test)]
402 404 pub use tests::IndexEntryBuilder;
@@ -1,384 +1,398 b''
1 1 // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
2 2 //
3 3 // This software may be used and distributed according to the terms of the
4 4 // GNU General Public License version 2 or any later version.
5 5
6 6 //! Definitions and utilities for Revision nodes
7 7 //!
8 8 //! In Mercurial code base, it is customary to call "a node" the binary SHA
9 9 //! of a revision.
10 10
11 use crate::errors::HgError;
11 12 use bytes_cast::BytesCast;
12 13 use std::convert::{TryFrom, TryInto};
13 14 use std::fmt;
14 15
15 16 /// The length in bytes of a `Node`
16 17 ///
17 18 /// This constant is meant to ease refactors of this module, and
18 19 /// are private so that calling code does not expect all nodes have
19 20 /// the same size, should we support several formats concurrently in
20 21 /// the future.
21 22 pub const NODE_BYTES_LENGTH: usize = 20;
22 23
23 24 /// Id of the null node.
24 25 ///
25 26 /// Used to indicate the absence of node.
26 27 pub const NULL_NODE_ID: [u8; NODE_BYTES_LENGTH] = [0u8; NODE_BYTES_LENGTH];
27 28
28 29 /// The length in bytes of a `Node`
29 30 ///
30 31 /// see also `NODES_BYTES_LENGTH` about it being private.
31 32 const NODE_NYBBLES_LENGTH: usize = 2 * NODE_BYTES_LENGTH;
32 33
33 34 /// Private alias for readability and to ease future change
34 35 type NodeData = [u8; NODE_BYTES_LENGTH];
35 36
36 37 /// Binary revision SHA
37 38 ///
38 39 /// ## Future changes of hash size
39 40 ///
40 41 /// To accomodate future changes of hash size, Rust callers
41 42 /// should use the conversion methods at the boundaries (FFI, actual
42 43 /// computation of hashes and I/O) only, and only if required.
43 44 ///
44 45 /// All other callers outside of unit tests should just handle `Node` values
45 46 /// and never make any assumption on the actual length, using [`nybbles_len`]
46 47 /// if they need a loop boundary.
47 48 ///
48 49 /// All methods that create a `Node` either take a type that enforces
49 50 /// the size or return an error at runtime.
50 51 ///
51 52 /// [`nybbles_len`]: #method.nybbles_len
52 53 #[derive(Copy, Clone, Debug, PartialEq, BytesCast, derive_more::From)]
53 54 #[repr(transparent)]
54 55 pub struct Node {
55 56 data: NodeData,
56 57 }
57 58
58 59 /// The node value for NULL_REVISION
59 60 pub const NULL_NODE: Node = Node {
60 61 data: [0; NODE_BYTES_LENGTH],
61 62 };
62 63
63 64 /// Return an error if the slice has an unexpected length
64 65 impl<'a> TryFrom<&'a [u8]> for &'a Node {
65 66 type Error = ();
66 67
67 68 #[inline]
68 69 fn try_from(bytes: &'a [u8]) -> Result<Self, Self::Error> {
69 70 match Node::from_bytes(bytes) {
70 71 Ok((node, rest)) if rest.is_empty() => Ok(node),
71 72 _ => Err(()),
72 73 }
73 74 }
74 75 }
75 76
76 77 /// Return an error if the slice has an unexpected length
77 78 impl TryFrom<&'_ [u8]> for Node {
78 79 type Error = std::array::TryFromSliceError;
79 80
80 81 #[inline]
81 82 fn try_from(bytes: &'_ [u8]) -> Result<Self, Self::Error> {
82 83 let data = bytes.try_into()?;
83 84 Ok(Self { data })
84 85 }
85 86 }
86 87
87 88 impl fmt::LowerHex for Node {
88 89 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
89 90 for &byte in &self.data {
90 91 write!(f, "{:02x}", byte)?
91 92 }
92 93 Ok(())
93 94 }
94 95 }
95 96
96 97 #[derive(Debug)]
97 98 pub struct FromHexError;
98 99
99 100 /// Low level utility function, also for prefixes
100 101 fn get_nybble(s: &[u8], i: usize) -> u8 {
101 102 if i % 2 == 0 {
102 103 s[i / 2] >> 4
103 104 } else {
104 105 s[i / 2] & 0x0f
105 106 }
106 107 }
107 108
108 109 impl Node {
109 110 /// Retrieve the `i`th half-byte of the binary data.
110 111 ///
111 112 /// This is also the `i`th hexadecimal digit in numeric form,
112 113 /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
113 114 pub fn get_nybble(&self, i: usize) -> u8 {
114 115 get_nybble(&self.data, i)
115 116 }
116 117
117 118 /// Length of the data, in nybbles
118 119 pub fn nybbles_len(&self) -> usize {
119 120 // public exposure as an instance method only, so that we can
120 121 // easily support several sizes of hashes if needed in the future.
121 122 NODE_NYBBLES_LENGTH
122 123 }
123 124
124 125 /// Convert from hexadecimal string representation
125 126 ///
126 127 /// Exact length is required.
127 128 ///
128 129 /// To be used in FFI and I/O only, in order to facilitate future
129 130 /// changes of hash format.
130 131 pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Node, FromHexError> {
131 132 let prefix = NodePrefix::from_hex(hex)?;
132 133 if prefix.nybbles_len() == NODE_NYBBLES_LENGTH {
133 134 Ok(Self { data: prefix.data })
134 135 } else {
135 136 Err(FromHexError)
136 137 }
137 138 }
138 139
140 /// `from_hex`, but for input from an internal file of the repository such
141 /// as a changelog or manifest entry.
142 ///
143 /// An error is treated as repository corruption.
144 pub fn from_hex_for_repo(hex: impl AsRef<[u8]>) -> Result<Node, HgError> {
145 Self::from_hex(hex.as_ref()).map_err(|FromHexError| {
146 HgError::CorruptedRepository(format!(
147 "Expected a full hexadecimal node ID, found {}",
148 String::from_utf8_lossy(hex.as_ref())
149 ))
150 })
151 }
152
139 153 /// Provide access to binary data
140 154 ///
141 155 /// This is needed by FFI layers, for instance to return expected
142 156 /// binary values to Python.
143 157 pub fn as_bytes(&self) -> &[u8] {
144 158 &self.data
145 159 }
146 160 }
147 161
148 162 /// The beginning of a binary revision SHA.
149 163 ///
150 164 /// Since it can potentially come from an hexadecimal representation with
151 165 /// odd length, it needs to carry around whether the last 4 bits are relevant
152 166 /// or not.
153 167 #[derive(Debug, PartialEq, Copy, Clone)]
154 168 pub struct NodePrefix {
155 169 /// In `1..=NODE_NYBBLES_LENGTH`
156 170 nybbles_len: u8,
157 171 /// The first `4 * length_in_nybbles` bits are used (considering bits
158 172 /// within a bytes in big-endian: most significant first), the rest
159 173 /// are zero.
160 174 data: NodeData,
161 175 }
162 176
163 177 impl NodePrefix {
164 178 /// Convert from hexadecimal string representation
165 179 ///
166 180 /// Similarly to `hex::decode`, can be used with Unicode string types
167 181 /// (`String`, `&str`) as well as bytes.
168 182 ///
169 183 /// To be used in FFI and I/O only, in order to facilitate future
170 184 /// changes of hash format.
171 185 pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Self, FromHexError> {
172 186 let hex = hex.as_ref();
173 187 let len = hex.len();
174 188 if len > NODE_NYBBLES_LENGTH || len == 0 {
175 189 return Err(FromHexError);
176 190 }
177 191
178 192 let mut data = [0; NODE_BYTES_LENGTH];
179 193 let mut nybbles_len = 0;
180 194 for &ascii_byte in hex {
181 195 let nybble = match char::from(ascii_byte).to_digit(16) {
182 196 Some(digit) => digit as u8,
183 197 None => return Err(FromHexError),
184 198 };
185 199 // Fill in the upper half of a byte first, then the lower half.
186 200 let shift = if nybbles_len % 2 == 0 { 4 } else { 0 };
187 201 data[nybbles_len as usize / 2] |= nybble << shift;
188 202 nybbles_len += 1;
189 203 }
190 204 Ok(Self { data, nybbles_len })
191 205 }
192 206
193 207 pub fn nybbles_len(&self) -> usize {
194 208 self.nybbles_len as _
195 209 }
196 210
197 211 pub fn is_prefix_of(&self, node: &Node) -> bool {
198 212 let full_bytes = self.nybbles_len() / 2;
199 213 if self.data[..full_bytes] != node.data[..full_bytes] {
200 214 return false;
201 215 }
202 216 if self.nybbles_len() % 2 == 0 {
203 217 return true;
204 218 }
205 219 let last = self.nybbles_len() - 1;
206 220 self.get_nybble(last) == node.get_nybble(last)
207 221 }
208 222
209 223 /// Retrieve the `i`th half-byte from the prefix.
210 224 ///
211 225 /// This is also the `i`th hexadecimal digit in numeric form,
212 226 /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
213 227 pub fn get_nybble(&self, i: usize) -> u8 {
214 228 assert!(i < self.nybbles_len());
215 229 get_nybble(&self.data, i)
216 230 }
217 231
218 232 fn iter_nybbles(&self) -> impl Iterator<Item = u8> + '_ {
219 233 (0..self.nybbles_len()).map(move |i| get_nybble(&self.data, i))
220 234 }
221 235
222 236 /// Return the index first nybble that's different from `node`
223 237 ///
224 238 /// If the return value is `None` that means that `self` is
225 239 /// a prefix of `node`, but the current method is a bit slower
226 240 /// than `is_prefix_of`.
227 241 ///
228 242 /// Returned index is as in `get_nybble`, i.e., starting at 0.
229 243 pub fn first_different_nybble(&self, node: &Node) -> Option<usize> {
230 244 self.iter_nybbles()
231 245 .zip(NodePrefix::from(*node).iter_nybbles())
232 246 .position(|(a, b)| a != b)
233 247 }
234 248 }
235 249
236 250 impl fmt::LowerHex for NodePrefix {
237 251 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
238 252 let full_bytes = self.nybbles_len() / 2;
239 253 for &byte in &self.data[..full_bytes] {
240 254 write!(f, "{:02x}", byte)?
241 255 }
242 256 if self.nybbles_len() % 2 == 1 {
243 257 let last = self.nybbles_len() - 1;
244 258 write!(f, "{:x}", self.get_nybble(last))?
245 259 }
246 260 Ok(())
247 261 }
248 262 }
249 263
250 264 /// A shortcut for full `Node` references
251 265 impl From<&'_ Node> for NodePrefix {
252 266 fn from(node: &'_ Node) -> Self {
253 267 NodePrefix {
254 268 nybbles_len: node.nybbles_len() as _,
255 269 data: node.data,
256 270 }
257 271 }
258 272 }
259 273
260 274 /// A shortcut for full `Node` references
261 275 impl From<Node> for NodePrefix {
262 276 fn from(node: Node) -> Self {
263 277 NodePrefix {
264 278 nybbles_len: node.nybbles_len() as _,
265 279 data: node.data,
266 280 }
267 281 }
268 282 }
269 283
270 284 impl PartialEq<Node> for NodePrefix {
271 285 fn eq(&self, other: &Node) -> bool {
272 286 Self::from(*other) == *self
273 287 }
274 288 }
275 289
276 290 #[cfg(test)]
277 291 mod tests {
278 292 use super::*;
279 293
280 294 const SAMPLE_NODE_HEX: &str = "0123456789abcdeffedcba9876543210deadbeef";
281 295 const SAMPLE_NODE: Node = Node {
282 296 data: [
283 297 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba,
284 298 0x98, 0x76, 0x54, 0x32, 0x10, 0xde, 0xad, 0xbe, 0xef,
285 299 ],
286 300 };
287 301
288 302 /// Pad an hexadecimal string to reach `NODE_NYBBLES_LENGTH`
289 303 /// The padding is made with zeros.
290 304 pub fn hex_pad_right(hex: &str) -> String {
291 305 let mut res = hex.to_string();
292 306 while res.len() < NODE_NYBBLES_LENGTH {
293 307 res.push('0');
294 308 }
295 309 res
296 310 }
297 311
298 312 #[test]
299 313 fn test_node_from_hex() {
300 314 let not_hex = "012... oops";
301 315 let too_short = "0123";
302 316 let too_long = format!("{}0", SAMPLE_NODE_HEX);
303 317 assert_eq!(Node::from_hex(SAMPLE_NODE_HEX).unwrap(), SAMPLE_NODE);
304 318 assert!(Node::from_hex(not_hex).is_err());
305 319 assert!(Node::from_hex(too_short).is_err());
306 320 assert!(Node::from_hex(&too_long).is_err());
307 321 }
308 322
309 323 #[test]
310 324 fn test_node_encode_hex() {
311 325 assert_eq!(format!("{:x}", SAMPLE_NODE), SAMPLE_NODE_HEX);
312 326 }
313 327
314 328 #[test]
315 329 fn test_prefix_from_to_hex() -> Result<(), FromHexError> {
316 330 assert_eq!(format!("{:x}", NodePrefix::from_hex("0e1")?), "0e1");
317 331 assert_eq!(format!("{:x}", NodePrefix::from_hex("0e1a")?), "0e1a");
318 332 assert_eq!(
319 333 format!("{:x}", NodePrefix::from_hex(SAMPLE_NODE_HEX)?),
320 334 SAMPLE_NODE_HEX
321 335 );
322 336 Ok(())
323 337 }
324 338
325 339 #[test]
326 340 fn test_prefix_from_hex_errors() {
327 341 assert!(NodePrefix::from_hex("testgr").is_err());
328 342 let mut long = format!("{:x}", NULL_NODE);
329 343 long.push('c');
330 344 assert!(NodePrefix::from_hex(&long).is_err())
331 345 }
332 346
333 347 #[test]
334 348 fn test_is_prefix_of() -> Result<(), FromHexError> {
335 349 let mut node_data = [0; NODE_BYTES_LENGTH];
336 350 node_data[0] = 0x12;
337 351 node_data[1] = 0xca;
338 352 let node = Node::from(node_data);
339 353 assert!(NodePrefix::from_hex("12")?.is_prefix_of(&node));
340 354 assert!(!NodePrefix::from_hex("1a")?.is_prefix_of(&node));
341 355 assert!(NodePrefix::from_hex("12c")?.is_prefix_of(&node));
342 356 assert!(!NodePrefix::from_hex("12d")?.is_prefix_of(&node));
343 357 Ok(())
344 358 }
345 359
346 360 #[test]
347 361 fn test_get_nybble() -> Result<(), FromHexError> {
348 362 let prefix = NodePrefix::from_hex("dead6789cafe")?;
349 363 assert_eq!(prefix.get_nybble(0), 13);
350 364 assert_eq!(prefix.get_nybble(7), 9);
351 365 Ok(())
352 366 }
353 367
354 368 #[test]
355 369 fn test_first_different_nybble_even_prefix() {
356 370 let prefix = NodePrefix::from_hex("12ca").unwrap();
357 371 let mut node = Node::from([0; NODE_BYTES_LENGTH]);
358 372 assert_eq!(prefix.first_different_nybble(&node), Some(0));
359 373 node.data[0] = 0x13;
360 374 assert_eq!(prefix.first_different_nybble(&node), Some(1));
361 375 node.data[0] = 0x12;
362 376 assert_eq!(prefix.first_different_nybble(&node), Some(2));
363 377 node.data[1] = 0xca;
364 378 // now it is a prefix
365 379 assert_eq!(prefix.first_different_nybble(&node), None);
366 380 }
367 381
368 382 #[test]
369 383 fn test_first_different_nybble_odd_prefix() {
370 384 let prefix = NodePrefix::from_hex("12c").unwrap();
371 385 let mut node = Node::from([0; NODE_BYTES_LENGTH]);
372 386 assert_eq!(prefix.first_different_nybble(&node), Some(0));
373 387 node.data[0] = 0x13;
374 388 assert_eq!(prefix.first_different_nybble(&node), Some(1));
375 389 node.data[0] = 0x12;
376 390 assert_eq!(prefix.first_different_nybble(&node), Some(2));
377 391 node.data[1] = 0xca;
378 392 // now it is a prefix
379 393 assert_eq!(prefix.first_different_nybble(&node), None);
380 394 }
381 395 }
382 396
383 397 #[cfg(test)]
384 398 pub use tests::hex_pad_right;
@@ -1,105 +1,110 b''
1 use crate::errors::{HgError, HgResultExt};
1 2 use bytes_cast::{unaligned, BytesCast};
2 3 use memmap::Mmap;
3 4 use std::path::{Path, PathBuf};
4 5
5 6 use super::revlog::RevlogError;
6 7 use crate::repo::Repo;
7 8 use crate::utils::strip_suffix;
8 9
9 10 const ONDISK_VERSION: u8 = 1;
10 11
11 12 pub(super) struct NodeMapDocket {
12 13 pub data_length: usize,
13 14 // TODO: keep here more of the data from `parse()` when we need it
14 15 }
15 16
16 17 #[derive(BytesCast)]
17 18 #[repr(C)]
18 19 struct DocketHeader {
19 20 uid_size: u8,
20 21 _tip_rev: unaligned::U64Be,
21 22 data_length: unaligned::U64Be,
22 23 _data_unused: unaligned::U64Be,
23 24 tip_node_size: unaligned::U64Be,
24 25 }
25 26
26 27 impl NodeMapDocket {
27 28 /// Return `Ok(None)` when the caller should proceed without a persistent
28 29 /// nodemap:
29 30 ///
30 31 /// * This revlog does not have a `.n` docket file (it is not generated for
31 32 /// small revlogs), or
32 33 /// * The docket has an unsupported version number (repositories created by
33 34 /// later hg, maybe that should be a requirement instead?), or
34 35 /// * The docket file points to a missing (likely deleted) data file (this
35 36 /// can happen in a rare race condition).
36 37 pub fn read_from_file(
37 38 repo: &Repo,
38 39 index_path: &Path,
39 40 ) -> Result<Option<(Self, Mmap)>, RevlogError> {
40 41 let docket_path = index_path.with_extension("n");
41 let docket_bytes = match repo.store_vfs().read(&docket_path) {
42 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
43 return Ok(None)
44 }
45 Err(e) => return Err(RevlogError::IoError(e)),
46 Ok(bytes) => bytes,
42 let docket_bytes = if let Some(bytes) =
43 repo.store_vfs().read(&docket_path).io_not_found_as_none()?
44 {
45 bytes
46 } else {
47 return Ok(None);
47 48 };
48 49
49 50 let input = if let Some((&ONDISK_VERSION, rest)) =
50 51 docket_bytes.split_first()
51 52 {
52 53 rest
53 54 } else {
54 55 return Ok(None);
55 56 };
56 57
57 let (header, rest) = DocketHeader::from_bytes(input)?;
58 /// Treat any error as a parse error
59 fn parse<T, E>(result: Result<T, E>) -> Result<T, RevlogError> {
60 result.map_err(|_| {
61 HgError::corrupted("nodemap docket parse error").into()
62 })
63 }
64
65 let (header, rest) = parse(DocketHeader::from_bytes(input))?;
58 66 let uid_size = header.uid_size as usize;
59 67 // TODO: do we care about overflow for 4 GB+ nodemap files on 32-bit
60 68 // systems?
61 69 let tip_node_size = header.tip_node_size.get() as usize;
62 70 let data_length = header.data_length.get() as usize;
63 let (uid, rest) = u8::slice_from_bytes(rest, uid_size)?;
64 let (_tip_node, _rest) = u8::slice_from_bytes(rest, tip_node_size)?;
65 let uid =
66 std::str::from_utf8(uid).map_err(|_| RevlogError::Corrupted)?;
71 let (uid, rest) = parse(u8::slice_from_bytes(rest, uid_size))?;
72 let (_tip_node, _rest) =
73 parse(u8::slice_from_bytes(rest, tip_node_size))?;
74 let uid = parse(std::str::from_utf8(uid))?;
67 75 let docket = NodeMapDocket { data_length };
68 76
69 77 let data_path = rawdata_path(&docket_path, uid);
70 // TODO: use `std::fs::read` here when the `persistent-nodemap.mmap`
78 // TODO: use `vfs.read()` here when the `persistent-nodemap.mmap`
71 79 // config is false?
72 match repo.store_vfs().mmap_open(&data_path) {
73 Ok(mmap) => {
74 if mmap.len() >= data_length {
75 Ok(Some((docket, mmap)))
76 } else {
77 Err(RevlogError::Corrupted)
78 }
80 if let Some(mmap) = repo
81 .store_vfs()
82 .mmap_open(&data_path)
83 .io_not_found_as_none()?
84 {
85 if mmap.len() >= data_length {
86 Ok(Some((docket, mmap)))
87 } else {
88 Err(HgError::corrupted("persistent nodemap too short").into())
79 89 }
80 Err(error) => {
81 if error.kind() == std::io::ErrorKind::NotFound {
82 Ok(None)
83 } else {
84 Err(RevlogError::IoError(error))
85 }
86 }
90 } else {
91 Ok(None)
87 92 }
88 93 }
89 94 }
90 95
91 96 fn rawdata_path(docket_path: &Path, uid: &str) -> PathBuf {
92 97 let docket_name = docket_path
93 98 .file_name()
94 99 .expect("expected a base name")
95 100 .to_str()
96 101 .expect("expected an ASCII file name in the store");
97 102 let prefix = strip_suffix(docket_name, ".n.a")
98 103 .or_else(|| strip_suffix(docket_name, ".n"))
99 104 .expect("expected docket path in .n or .n.a");
100 105 let name = format!("{}-{}.nd", prefix, uid);
101 106 docket_path
102 107 .parent()
103 108 .expect("expected a non-root path")
104 109 .join(name)
105 110 }
@@ -1,387 +1,393 b''
1 1 use std::borrow::Cow;
2 2 use std::io::Read;
3 3 use std::ops::Deref;
4 4 use std::path::Path;
5 5
6 6 use byteorder::{BigEndian, ByteOrder};
7 7 use crypto::digest::Digest;
8 8 use crypto::sha1::Sha1;
9 9 use flate2::read::ZlibDecoder;
10 10 use micro_timer::timed;
11 11 use zstd;
12 12
13 13 use super::index::Index;
14 14 use super::node::{NodePrefix, NODE_BYTES_LENGTH, NULL_NODE};
15 15 use super::nodemap;
16 use super::nodemap::NodeMap;
16 use super::nodemap::{NodeMap, NodeMapError};
17 17 use super::nodemap_docket::NodeMapDocket;
18 18 use super::patch;
19 use crate::errors::HgError;
19 20 use crate::repo::Repo;
20 21 use crate::revlog::Revision;
21 22
23 #[derive(derive_more::From)]
22 24 pub enum RevlogError {
23 IoError(std::io::Error),
24 UnsuportedVersion(u16),
25 25 InvalidRevision,
26 26 /// Found more than one entry whose ID match the requested prefix
27 27 AmbiguousPrefix,
28 Corrupted,
29 UnknowDataFormat(u8),
28 #[from]
29 Other(HgError),
30 30 }
31 31
32 impl From<bytes_cast::FromBytesError> for RevlogError {
33 fn from(_: bytes_cast::FromBytesError) -> Self {
34 RevlogError::Corrupted
32 impl From<NodeMapError> for RevlogError {
33 fn from(error: NodeMapError) -> Self {
34 match error {
35 NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
36 NodeMapError::RevisionNotInIndex(_) => RevlogError::corrupted(),
37 }
38 }
39 }
40
41 impl RevlogError {
42 fn corrupted() -> Self {
43 RevlogError::Other(HgError::corrupted("corrupted revlog"))
35 44 }
36 45 }
37 46
38 47 /// Read only implementation of revlog.
39 48 pub struct Revlog {
40 49 /// When index and data are not interleaved: bytes of the revlog index.
41 50 /// When index and data are interleaved: bytes of the revlog index and
42 51 /// data.
43 52 index: Index,
44 53 /// When index and data are not interleaved: bytes of the revlog data
45 54 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
46 55 /// When present on disk: the persistent nodemap for this revlog
47 56 nodemap: Option<nodemap::NodeTree>,
48 57 }
49 58
50 59 impl Revlog {
51 60 /// Open a revlog index file.
52 61 ///
53 62 /// It will also open the associated data file if index and data are not
54 63 /// interleaved.
55 64 #[timed]
56 65 pub fn open(
57 66 repo: &Repo,
58 67 index_path: impl AsRef<Path>,
59 68 data_path: Option<&Path>,
60 69 ) -> Result<Self, RevlogError> {
61 70 let index_path = index_path.as_ref();
62 let index_mmap = repo
63 .store_vfs()
64 .mmap_open(&index_path)
65 .map_err(RevlogError::IoError)?;
71 let index_mmap = repo.store_vfs().mmap_open(&index_path)?;
66 72
67 73 let version = get_version(&index_mmap);
68 74 if version != 1 {
69 return Err(RevlogError::UnsuportedVersion(version));
75 // A proper new version should have had a repo/store requirement.
76 return Err(RevlogError::corrupted());
70 77 }
71 78
72 79 let index = Index::new(Box::new(index_mmap))?;
73 80
74 81 let default_data_path = index_path.with_extension("d");
75 82
76 83 // type annotation required
77 84 // won't recognize Mmap as Deref<Target = [u8]>
78 85 let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
79 86 if index.is_inline() {
80 87 None
81 88 } else {
82 89 let data_path = data_path.unwrap_or(&default_data_path);
83 let data_mmap = repo
84 .store_vfs()
85 .mmap_open(data_path)
86 .map_err(RevlogError::IoError)?;
90 let data_mmap = repo.store_vfs().mmap_open(data_path)?;
87 91 Some(Box::new(data_mmap))
88 92 };
89 93
90 94 let nodemap = NodeMapDocket::read_from_file(repo, index_path)?.map(
91 95 |(docket, data)| {
92 96 nodemap::NodeTree::load_bytes(
93 97 Box::new(data),
94 98 docket.data_length,
95 99 )
96 100 },
97 101 );
98 102
99 103 Ok(Revlog {
100 104 index,
101 105 data_bytes,
102 106 nodemap,
103 107 })
104 108 }
105 109
106 110 /// Return number of entries of the `Revlog`.
107 111 pub fn len(&self) -> usize {
108 112 self.index.len()
109 113 }
110 114
111 115 /// Returns `true` if the `Revlog` has zero `entries`.
112 116 pub fn is_empty(&self) -> bool {
113 117 self.index.is_empty()
114 118 }
115 119
116 120 /// Return the full data associated to a node.
117 121 #[timed]
118 122 pub fn get_node_rev(
119 123 &self,
120 124 node: NodePrefix,
121 125 ) -> Result<Revision, RevlogError> {
122 126 if let Some(nodemap) = &self.nodemap {
123 127 return nodemap
124 .find_bin(&self.index, node)
125 // TODO: propagate details of this error:
126 .map_err(|_| RevlogError::Corrupted)?
128 .find_bin(&self.index, node)?
127 129 .ok_or(RevlogError::InvalidRevision);
128 130 }
129 131
130 132 // Fallback to linear scan when a persistent nodemap is not present.
131 133 // This happens when the persistent-nodemap experimental feature is not
132 134 // enabled, or for small revlogs.
133 135 //
134 136 // TODO: consider building a non-persistent nodemap in memory to
135 137 // optimize these cases.
136 138 let mut found_by_prefix = None;
137 139 for rev in (0..self.len() as Revision).rev() {
138 140 let index_entry =
139 self.index.get_entry(rev).ok_or(RevlogError::Corrupted)?;
141 self.index.get_entry(rev).ok_or(HgError::corrupted(
142 "revlog references a revision not in the index",
143 ))?;
140 144 if node == *index_entry.hash() {
141 145 return Ok(rev);
142 146 }
143 147 if node.is_prefix_of(index_entry.hash()) {
144 148 if found_by_prefix.is_some() {
145 149 return Err(RevlogError::AmbiguousPrefix);
146 150 }
147 151 found_by_prefix = Some(rev)
148 152 }
149 153 }
150 154 found_by_prefix.ok_or(RevlogError::InvalidRevision)
151 155 }
152 156
153 157 /// Returns whether the given revision exists in this revlog.
154 158 pub fn has_rev(&self, rev: Revision) -> bool {
155 159 self.index.get_entry(rev).is_some()
156 160 }
157 161
158 162 /// Return the full data associated to a revision.
159 163 ///
160 164 /// All entries required to build the final data out of deltas will be
161 165 /// retrieved as needed, and the deltas will be applied to the inital
162 166 /// snapshot to rebuild the final data.
163 167 #[timed]
164 168 pub fn get_rev_data(&self, rev: Revision) -> Result<Vec<u8>, RevlogError> {
165 169 // Todo return -> Cow
166 170 let mut entry = self.get_entry(rev)?;
167 171 let mut delta_chain = vec![];
168 172 while let Some(base_rev) = entry.base_rev {
169 173 delta_chain.push(entry);
170 entry =
171 self.get_entry(base_rev).or(Err(RevlogError::Corrupted))?;
174 entry = self
175 .get_entry(base_rev)
176 .map_err(|_| RevlogError::corrupted())?;
172 177 }
173 178
174 179 // TODO do not look twice in the index
175 180 let index_entry = self
176 181 .index
177 182 .get_entry(rev)
178 183 .ok_or(RevlogError::InvalidRevision)?;
179 184
180 185 let data: Vec<u8> = if delta_chain.is_empty() {
181 186 entry.data()?.into()
182 187 } else {
183 188 Revlog::build_data_from_deltas(entry, &delta_chain)?
184 189 };
185 190
186 191 if self.check_hash(
187 192 index_entry.p1(),
188 193 index_entry.p2(),
189 194 index_entry.hash().as_bytes(),
190 195 &data,
191 196 ) {
192 197 Ok(data)
193 198 } else {
194 Err(RevlogError::Corrupted)
199 Err(RevlogError::corrupted())
195 200 }
196 201 }
197 202
198 203 /// Check the hash of some given data against the recorded hash.
199 204 pub fn check_hash(
200 205 &self,
201 206 p1: Revision,
202 207 p2: Revision,
203 208 expected: &[u8],
204 209 data: &[u8],
205 210 ) -> bool {
206 211 let e1 = self.index.get_entry(p1);
207 212 let h1 = match e1 {
208 213 Some(ref entry) => entry.hash(),
209 214 None => &NULL_NODE,
210 215 };
211 216 let e2 = self.index.get_entry(p2);
212 217 let h2 = match e2 {
213 218 Some(ref entry) => entry.hash(),
214 219 None => &NULL_NODE,
215 220 };
216 221
217 222 hash(data, h1.as_bytes(), h2.as_bytes()).as_slice() == expected
218 223 }
219 224
220 225 /// Build the full data of a revision out its snapshot
221 226 /// and its deltas.
222 227 #[timed]
223 228 fn build_data_from_deltas(
224 229 snapshot: RevlogEntry,
225 230 deltas: &[RevlogEntry],
226 231 ) -> Result<Vec<u8>, RevlogError> {
227 232 let snapshot = snapshot.data()?;
228 233 let deltas = deltas
229 234 .iter()
230 235 .rev()
231 236 .map(RevlogEntry::data)
232 237 .collect::<Result<Vec<Cow<'_, [u8]>>, RevlogError>>()?;
233 238 let patches: Vec<_> =
234 239 deltas.iter().map(|d| patch::PatchList::new(d)).collect();
235 240 let patch = patch::fold_patch_lists(&patches);
236 241 Ok(patch.apply(&snapshot))
237 242 }
238 243
239 244 /// Return the revlog data.
240 245 fn data(&self) -> &[u8] {
241 246 match self.data_bytes {
242 247 Some(ref data_bytes) => &data_bytes,
243 248 None => panic!(
244 249 "forgot to load the data or trying to access inline data"
245 250 ),
246 251 }
247 252 }
248 253
249 254 /// Get an entry of the revlog.
250 255 fn get_entry(&self, rev: Revision) -> Result<RevlogEntry, RevlogError> {
251 256 let index_entry = self
252 257 .index
253 258 .get_entry(rev)
254 259 .ok_or(RevlogError::InvalidRevision)?;
255 260 let start = index_entry.offset();
256 261 let end = start + index_entry.compressed_len();
257 262 let data = if self.index.is_inline() {
258 263 self.index.data(start, end)
259 264 } else {
260 265 &self.data()[start..end]
261 266 };
262 267 let entry = RevlogEntry {
263 268 rev,
264 269 bytes: data,
265 270 compressed_len: index_entry.compressed_len(),
266 271 uncompressed_len: index_entry.uncompressed_len(),
267 272 base_rev: if index_entry.base_revision() == rev {
268 273 None
269 274 } else {
270 275 Some(index_entry.base_revision())
271 276 },
272 277 };
273 278 Ok(entry)
274 279 }
275 280 }
276 281
277 282 /// The revlog entry's bytes and the necessary informations to extract
278 283 /// the entry's data.
279 284 #[derive(Debug)]
280 285 pub struct RevlogEntry<'a> {
281 286 rev: Revision,
282 287 bytes: &'a [u8],
283 288 compressed_len: usize,
284 289 uncompressed_len: usize,
285 290 base_rev: Option<Revision>,
286 291 }
287 292
288 293 impl<'a> RevlogEntry<'a> {
289 294 /// Extract the data contained in the entry.
290 295 pub fn data(&self) -> Result<Cow<'_, [u8]>, RevlogError> {
291 296 if self.bytes.is_empty() {
292 297 return Ok(Cow::Borrowed(&[]));
293 298 }
294 299 match self.bytes[0] {
295 300 // Revision data is the entirety of the entry, including this
296 301 // header.
297 302 b'\0' => Ok(Cow::Borrowed(self.bytes)),
298 303 // Raw revision data follows.
299 304 b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
300 305 // zlib (RFC 1950) data.
301 306 b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
302 307 // zstd data.
303 308 b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
304 format_type => Err(RevlogError::UnknowDataFormat(format_type)),
309 // A proper new format should have had a repo/store requirement.
310 _format_type => Err(RevlogError::corrupted()),
305 311 }
306 312 }
307 313
308 314 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, RevlogError> {
309 315 let mut decoder = ZlibDecoder::new(self.bytes);
310 316 if self.is_delta() {
311 317 let mut buf = Vec::with_capacity(self.compressed_len);
312 318 decoder
313 319 .read_to_end(&mut buf)
314 .or(Err(RevlogError::Corrupted))?;
320 .map_err(|_| RevlogError::corrupted())?;
315 321 Ok(buf)
316 322 } else {
317 323 let mut buf = vec![0; self.uncompressed_len];
318 324 decoder
319 325 .read_exact(&mut buf)
320 .or(Err(RevlogError::Corrupted))?;
326 .map_err(|_| RevlogError::corrupted())?;
321 327 Ok(buf)
322 328 }
323 329 }
324 330
325 331 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, RevlogError> {
326 332 if self.is_delta() {
327 333 let mut buf = Vec::with_capacity(self.compressed_len);
328 334 zstd::stream::copy_decode(self.bytes, &mut buf)
329 .or(Err(RevlogError::Corrupted))?;
335 .map_err(|_| RevlogError::corrupted())?;
330 336 Ok(buf)
331 337 } else {
332 338 let mut buf = vec![0; self.uncompressed_len];
333 339 let len = zstd::block::decompress_to_buffer(self.bytes, &mut buf)
334 .or(Err(RevlogError::Corrupted))?;
340 .map_err(|_| RevlogError::corrupted())?;
335 341 if len != self.uncompressed_len {
336 Err(RevlogError::Corrupted)
342 Err(RevlogError::corrupted())
337 343 } else {
338 344 Ok(buf)
339 345 }
340 346 }
341 347 }
342 348
343 349 /// Tell if the entry is a snapshot or a delta
344 350 /// (influences on decompression).
345 351 fn is_delta(&self) -> bool {
346 352 self.base_rev.is_some()
347 353 }
348 354 }
349 355
350 356 /// Format version of the revlog.
351 357 pub fn get_version(index_bytes: &[u8]) -> u16 {
352 358 BigEndian::read_u16(&index_bytes[2..=3])
353 359 }
354 360
355 361 /// Calculate the hash of a revision given its data and its parents.
356 362 fn hash(data: &[u8], p1_hash: &[u8], p2_hash: &[u8]) -> Vec<u8> {
357 363 let mut hasher = Sha1::new();
358 364 let (a, b) = (p1_hash, p2_hash);
359 365 if a > b {
360 366 hasher.input(b);
361 367 hasher.input(a);
362 368 } else {
363 369 hasher.input(a);
364 370 hasher.input(b);
365 371 }
366 372 hasher.input(data);
367 373 let mut hash = vec![0; NODE_BYTES_LENGTH];
368 374 hasher.result(&mut hash);
369 375 hash
370 376 }
371 377
372 378 #[cfg(test)]
373 379 mod tests {
374 380 use super::*;
375 381
376 382 use super::super::index::IndexEntryBuilder;
377 383
378 384 #[test]
379 385 fn version_test() {
380 386 let bytes = IndexEntryBuilder::new()
381 387 .is_first(true)
382 388 .with_version(1)
383 389 .build();
384 390
385 391 assert_eq!(get_version(&bytes), 1)
386 392 }
387 393 }
@@ -1,146 +1,123 b''
1 1 use crate::exitcode;
2 2 use crate::ui::utf8_to_local;
3 3 use crate::ui::UiError;
4 4 use format_bytes::format_bytes;
5 5 use hg::errors::HgError;
6 6 use hg::operations::FindRootError;
7 7 use hg::revlog::revlog::RevlogError;
8 8 use hg::utils::files::get_bytes_from_path;
9 9 use std::convert::From;
10 10 use std::path::PathBuf;
11 11
12 12 /// The kind of command error
13 13 #[derive(Debug, derive_more::From)]
14 14 pub enum CommandError {
15 15 /// The root of the repository cannot be found
16 16 RootNotFound(PathBuf),
17 17 /// The current directory cannot be found
18 18 CurrentDirNotFound(std::io::Error),
19 19 /// The standard output stream cannot be written to
20 20 StdoutError,
21 21 /// The standard error stream cannot be written to
22 22 StderrError,
23 23 /// The command aborted
24 24 Abort(Option<Vec<u8>>),
25 25 /// A mercurial capability as not been implemented.
26 26 Unimplemented,
27 27 /// Common cases
28 28 #[from]
29 29 Other(HgError),
30 30 }
31 31
32 32 impl CommandError {
33 33 pub fn get_exit_code(&self) -> exitcode::ExitCode {
34 34 match self {
35 35 CommandError::RootNotFound(_) => exitcode::ABORT,
36 36 CommandError::CurrentDirNotFound(_) => exitcode::ABORT,
37 37 CommandError::StdoutError => exitcode::ABORT,
38 38 CommandError::StderrError => exitcode::ABORT,
39 39 CommandError::Abort(_) => exitcode::ABORT,
40 40 CommandError::Unimplemented => exitcode::UNIMPLEMENTED_COMMAND,
41 41 CommandError::Other(HgError::UnsupportedFeature(_)) => {
42 42 exitcode::UNIMPLEMENTED_COMMAND
43 43 }
44 44 CommandError::Other(_) => exitcode::ABORT,
45 45 }
46 46 }
47 47
48 48 /// Return the message corresponding to the error if any
49 49 pub fn get_error_message_bytes(&self) -> Option<Vec<u8>> {
50 50 match self {
51 51 CommandError::RootNotFound(path) => {
52 52 let bytes = get_bytes_from_path(path);
53 53 Some(format_bytes!(
54 54 b"abort: no repository found in '{}' (.hg not found)!\n",
55 55 bytes.as_slice()
56 56 ))
57 57 }
58 58 CommandError::CurrentDirNotFound(e) => Some(format_bytes!(
59 59 b"abort: error getting current working directory: {}\n",
60 60 e.to_string().as_bytes(),
61 61 )),
62 62 CommandError::Abort(message) => message.to_owned(),
63 63
64 64 CommandError::StdoutError
65 65 | CommandError::StderrError
66 66 | CommandError::Unimplemented
67 67 | CommandError::Other(HgError::UnsupportedFeature(_)) => None,
68 68
69 69 CommandError::Other(e) => {
70 70 Some(format_bytes!(b"{}\n", e.to_string().as_bytes()))
71 71 }
72 72 }
73 73 }
74 74
75 75 /// Exist the process with the corresponding exit code.
76 76 pub fn exit(&self) {
77 77 std::process::exit(self.get_exit_code())
78 78 }
79 79 }
80 80
81 81 impl From<UiError> for CommandError {
82 82 fn from(error: UiError) -> Self {
83 83 match error {
84 84 UiError::StdoutError(_) => CommandError::StdoutError,
85 85 UiError::StderrError(_) => CommandError::StderrError,
86 86 }
87 87 }
88 88 }
89 89
90 90 impl From<FindRootError> for CommandError {
91 91 fn from(err: FindRootError) -> Self {
92 92 match err {
93 93 FindRootError::RootNotFound(path) => {
94 94 CommandError::RootNotFound(path)
95 95 }
96 96 FindRootError::GetCurrentDirError(e) => {
97 97 CommandError::CurrentDirNotFound(e)
98 98 }
99 99 }
100 100 }
101 101 }
102 102
103 103 impl From<(RevlogError, &str)> for CommandError {
104 104 fn from((err, rev): (RevlogError, &str)) -> CommandError {
105 105 match err {
106 RevlogError::IoError(err) => CommandError::Abort(Some(
107 utf8_to_local(&format!("abort: {}\n", err)).into(),
108 )),
109 106 RevlogError::InvalidRevision => CommandError::Abort(Some(
110 107 utf8_to_local(&format!(
111 108 "abort: invalid revision identifier {}\n",
112 109 rev
113 110 ))
114 111 .into(),
115 112 )),
116 113 RevlogError::AmbiguousPrefix => CommandError::Abort(Some(
117 114 utf8_to_local(&format!(
118 115 "abort: ambiguous revision identifier {}\n",
119 116 rev
120 117 ))
121 118 .into(),
122 119 )),
123 RevlogError::UnsuportedVersion(version) => {
124 CommandError::Abort(Some(
125 utf8_to_local(&format!(
126 "abort: unsupported revlog version {}\n",
127 version
128 ))
129 .into(),
130 ))
131 }
132 RevlogError::Corrupted => {
133 CommandError::Abort(Some("abort: corrupted revlog\n".into()))
134 }
135 RevlogError::UnknowDataFormat(format) => {
136 CommandError::Abort(Some(
137 utf8_to_local(&format!(
138 "abort: unknow revlog dataformat {:?}\n",
139 format
140 ))
141 .into(),
142 ))
143 }
120 RevlogError::Other(err) => CommandError::Other(err),
144 121 }
145 122 }
146 123 }
General Comments 0
You need to be logged in to leave comments. Login now