##// END OF EJS Templates
rust: use HgError in RevlogError and Vfs...
Simon Sapin -
r47172:43d63979 default
parent child Browse files
Show More
@@ -1,111 +1,114 b''
1 use std::fmt;
1 use std::fmt;
2
2
3 /// Common error cases that can happen in many different APIs
3 /// Common error cases that can happen in many different APIs
4 #[derive(Debug)]
4 #[derive(Debug)]
5 pub enum HgError {
5 pub enum HgError {
6 IoError {
6 IoError {
7 error: std::io::Error,
7 error: std::io::Error,
8 context: IoErrorContext,
8 context: IoErrorContext,
9 },
9 },
10
10
11 /// A file under `.hg/` normally only written by Mercurial
11 /// A file under `.hg/` normally only written by Mercurial
12 ///
12 ///
13 /// The given string is a short explanation for users, not intended to be
13 /// The given string is a short explanation for users, not intended to be
14 /// machine-readable.
14 /// machine-readable.
15 CorruptedRepository(String),
15 CorruptedRepository(String),
16
16
17 /// The respository or requested operation involves a feature not
17 /// The respository or requested operation involves a feature not
18 /// supported by the Rust implementation. Falling back to the Python
18 /// supported by the Rust implementation. Falling back to the Python
19 /// implementation may or may not work.
19 /// implementation may or may not work.
20 ///
20 ///
21 /// The given string is a short explanation for users, not intended to be
21 /// The given string is a short explanation for users, not intended to be
22 /// machine-readable.
22 /// machine-readable.
23 UnsupportedFeature(String),
23 UnsupportedFeature(String),
24 }
24 }
25
25
26 /// Details about where an I/O error happened
26 /// Details about where an I/O error happened
27 #[derive(Debug, derive_more::From)]
27 #[derive(Debug, derive_more::From)]
28 pub enum IoErrorContext {
28 pub enum IoErrorContext {
29 /// A filesystem operation returned `std::io::Error`
29 /// A filesystem operation returned `std::io::Error`
30 #[from]
30 #[from]
31 File(std::path::PathBuf),
31 File(std::path::PathBuf),
32 /// `std::env::current_dir` returned `std::io::Error`
32 /// `std::env::current_dir` returned `std::io::Error`
33 CurrentDir,
33 CurrentDir,
34 }
34 }
35
35
36 impl HgError {
36 impl HgError {
37 pub fn corrupted(explanation: impl Into<String>) -> Self {
37 pub fn corrupted(explanation: impl Into<String>) -> Self {
38 // TODO: capture a backtrace here and keep it in the error value
39 // to aid debugging?
40 // https://doc.rust-lang.org/std/backtrace/struct.Backtrace.html
38 HgError::CorruptedRepository(explanation.into())
41 HgError::CorruptedRepository(explanation.into())
39 }
42 }
40 }
43 }
41
44
42 // TODO: use `DisplayBytes` instead to show non-Unicode filenames losslessly?
45 // TODO: use `DisplayBytes` instead to show non-Unicode filenames losslessly?
43 impl fmt::Display for HgError {
46 impl fmt::Display for HgError {
44 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
47 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
45 match self {
48 match self {
46 HgError::IoError { error, context } => {
49 HgError::IoError { error, context } => {
47 write!(f, "{}: {}", error, context)
50 write!(f, "{}: {}", error, context)
48 }
51 }
49 HgError::CorruptedRepository(explanation) => {
52 HgError::CorruptedRepository(explanation) => {
50 write!(f, "corrupted repository: {}", explanation)
53 write!(f, "corrupted repository: {}", explanation)
51 }
54 }
52 HgError::UnsupportedFeature(explanation) => {
55 HgError::UnsupportedFeature(explanation) => {
53 write!(f, "unsupported feature: {}", explanation)
56 write!(f, "unsupported feature: {}", explanation)
54 }
57 }
55 }
58 }
56 }
59 }
57 }
60 }
58
61
59 // TODO: use `DisplayBytes` instead to show non-Unicode filenames losslessly?
62 // TODO: use `DisplayBytes` instead to show non-Unicode filenames losslessly?
60 impl fmt::Display for IoErrorContext {
63 impl fmt::Display for IoErrorContext {
61 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
64 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
62 match self {
65 match self {
63 IoErrorContext::File(path) => path.display().fmt(f),
66 IoErrorContext::File(path) => path.display().fmt(f),
64 IoErrorContext::CurrentDir => f.write_str("current directory"),
67 IoErrorContext::CurrentDir => f.write_str("current directory"),
65 }
68 }
66 }
69 }
67 }
70 }
68
71
69 pub trait IoResultExt<T> {
72 pub trait IoResultExt<T> {
70 /// Annotate a possible I/O error as related to a file at the given path.
73 /// Annotate a possible I/O error as related to a file at the given path.
71 ///
74 ///
72 /// This allows printing something like β€œFile not found: example.txt”
75 /// This allows printing something like β€œFile not found: example.txt”
73 /// instead of just β€œFile not found”.
76 /// instead of just β€œFile not found”.
74 ///
77 ///
75 /// Converts a `Result` with `std::io::Error` into one with `HgError`.
78 /// Converts a `Result` with `std::io::Error` into one with `HgError`.
76 fn for_file(self, path: &std::path::Path) -> Result<T, HgError>;
79 fn for_file(self, path: &std::path::Path) -> Result<T, HgError>;
77 }
80 }
78
81
79 impl<T> IoResultExt<T> for std::io::Result<T> {
82 impl<T> IoResultExt<T> for std::io::Result<T> {
80 fn for_file(self, path: &std::path::Path) -> Result<T, HgError> {
83 fn for_file(self, path: &std::path::Path) -> Result<T, HgError> {
81 self.map_err(|error| HgError::IoError {
84 self.map_err(|error| HgError::IoError {
82 error,
85 error,
83 context: IoErrorContext::File(path.to_owned()),
86 context: IoErrorContext::File(path.to_owned()),
84 })
87 })
85 }
88 }
86 }
89 }
87
90
88 pub trait HgResultExt<T> {
91 pub trait HgResultExt<T> {
89 /// Handle missing files separately from other I/O error cases.
92 /// Handle missing files separately from other I/O error cases.
90 ///
93 ///
91 /// Wraps the `Ok` type in an `Option`:
94 /// Wraps the `Ok` type in an `Option`:
92 ///
95 ///
93 /// * `Ok(x)` becomes `Ok(Some(x))`
96 /// * `Ok(x)` becomes `Ok(Some(x))`
94 /// * An I/O "not found" error becomes `Ok(None)`
97 /// * An I/O "not found" error becomes `Ok(None)`
95 /// * Other errors are unchanged
98 /// * Other errors are unchanged
96 fn io_not_found_as_none(self) -> Result<Option<T>, HgError>;
99 fn io_not_found_as_none(self) -> Result<Option<T>, HgError>;
97 }
100 }
98
101
99 impl<T> HgResultExt<T> for Result<T, HgError> {
102 impl<T> HgResultExt<T> for Result<T, HgError> {
100 fn io_not_found_as_none(self) -> Result<Option<T>, HgError> {
103 fn io_not_found_as_none(self) -> Result<Option<T>, HgError> {
101 match self {
104 match self {
102 Ok(x) => Ok(Some(x)),
105 Ok(x) => Ok(Some(x)),
103 Err(HgError::IoError { error, .. })
106 Err(HgError::IoError { error, .. })
104 if error.kind() == std::io::ErrorKind::NotFound =>
107 if error.kind() == std::io::ErrorKind::NotFound =>
105 {
108 {
106 Ok(None)
109 Ok(None)
107 }
110 }
108 Err(other_error) => Err(other_error),
111 Err(other_error) => Err(other_error),
109 }
112 }
110 }
113 }
111 }
114 }
@@ -1,115 +1,116 b''
1 // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net>
1 // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net>
2 // and Mercurial contributors
2 // and Mercurial contributors
3 //
3 //
4 // This software may be used and distributed according to the terms of the
4 // This software may be used and distributed according to the terms of the
5 // GNU General Public License version 2 or any later version.
5 // GNU General Public License version 2 or any later version.
6
6 mod ancestors;
7 mod ancestors;
7 pub mod dagops;
8 pub mod dagops;
8 pub mod errors;
9 pub mod errors;
9 pub use ancestors::{AncestorsIterator, LazyAncestors, MissingAncestors};
10 pub use ancestors::{AncestorsIterator, LazyAncestors, MissingAncestors};
10 mod dirstate;
11 mod dirstate;
11 pub mod discovery;
12 pub mod discovery;
12 pub mod requirements;
13 pub mod requirements;
13 pub mod testing; // unconditionally built, for use from integration tests
14 pub mod testing; // unconditionally built, for use from integration tests
14 pub use dirstate::{
15 pub use dirstate::{
15 dirs_multiset::{DirsMultiset, DirsMultisetIter},
16 dirs_multiset::{DirsMultiset, DirsMultisetIter},
16 dirstate_map::DirstateMap,
17 dirstate_map::DirstateMap,
17 parsers::{pack_dirstate, parse_dirstate, PARENT_SIZE},
18 parsers::{pack_dirstate, parse_dirstate, PARENT_SIZE},
18 status::{
19 status::{
19 status, BadMatch, BadType, DirstateStatus, StatusError, StatusOptions,
20 status, BadMatch, BadType, DirstateStatus, StatusError, StatusOptions,
20 },
21 },
21 CopyMap, CopyMapIter, DirstateEntry, DirstateParents, EntryState,
22 CopyMap, CopyMapIter, DirstateEntry, DirstateParents, EntryState,
22 StateMap, StateMapIter,
23 StateMap, StateMapIter,
23 };
24 };
24 pub mod copy_tracing;
25 pub mod copy_tracing;
25 mod filepatterns;
26 mod filepatterns;
26 pub mod matchers;
27 pub mod matchers;
27 pub mod repo;
28 pub mod repo;
28 pub mod revlog;
29 pub mod revlog;
29 pub use revlog::*;
30 pub use revlog::*;
30 pub mod config;
31 pub mod config;
31 pub mod operations;
32 pub mod operations;
32 pub mod revset;
33 pub mod revset;
33 pub mod utils;
34 pub mod utils;
34
35
35 use crate::utils::hg_path::{HgPathBuf, HgPathError};
36 use crate::utils::hg_path::{HgPathBuf, HgPathError};
36 pub use filepatterns::{
37 pub use filepatterns::{
37 parse_pattern_syntax, read_pattern_file, IgnorePattern,
38 parse_pattern_syntax, read_pattern_file, IgnorePattern,
38 PatternFileWarning, PatternSyntax,
39 PatternFileWarning, PatternSyntax,
39 };
40 };
40 use std::collections::HashMap;
41 use std::collections::HashMap;
41 use twox_hash::RandomXxHashBuilder64;
42 use twox_hash::RandomXxHashBuilder64;
42
43
43 /// This is a contract between the `micro-timer` crate and us, to expose
44 /// This is a contract between the `micro-timer` crate and us, to expose
44 /// the `log` crate as `crate::log`.
45 /// the `log` crate as `crate::log`.
45 use log;
46 use log;
46
47
47 pub type LineNumber = usize;
48 pub type LineNumber = usize;
48
49
49 /// Rust's default hasher is too slow because it tries to prevent collision
50 /// Rust's default hasher is too slow because it tries to prevent collision
50 /// attacks. We are not concerned about those: if an ill-minded person has
51 /// attacks. We are not concerned about those: if an ill-minded person has
51 /// write access to your repository, you have other issues.
52 /// write access to your repository, you have other issues.
52 pub type FastHashMap<K, V> = HashMap<K, V, RandomXxHashBuilder64>;
53 pub type FastHashMap<K, V> = HashMap<K, V, RandomXxHashBuilder64>;
53
54
54 #[derive(Debug, PartialEq)]
55 #[derive(Debug, PartialEq)]
55 pub enum DirstateMapError {
56 pub enum DirstateMapError {
56 PathNotFound(HgPathBuf),
57 PathNotFound(HgPathBuf),
57 EmptyPath,
58 EmptyPath,
58 InvalidPath(HgPathError),
59 InvalidPath(HgPathError),
59 }
60 }
60
61
61 impl ToString for DirstateMapError {
62 impl ToString for DirstateMapError {
62 fn to_string(&self) -> String {
63 fn to_string(&self) -> String {
63 match self {
64 match self {
64 DirstateMapError::PathNotFound(_) => {
65 DirstateMapError::PathNotFound(_) => {
65 "expected a value, found none".to_string()
66 "expected a value, found none".to_string()
66 }
67 }
67 DirstateMapError::EmptyPath => "Overflow in dirstate.".to_string(),
68 DirstateMapError::EmptyPath => "Overflow in dirstate.".to_string(),
68 DirstateMapError::InvalidPath(e) => e.to_string(),
69 DirstateMapError::InvalidPath(e) => e.to_string(),
69 }
70 }
70 }
71 }
71 }
72 }
72
73
73 #[derive(Debug, derive_more::From)]
74 #[derive(Debug, derive_more::From)]
74 pub enum DirstateError {
75 pub enum DirstateError {
75 Map(DirstateMapError),
76 Map(DirstateMapError),
76 Common(errors::HgError),
77 Common(errors::HgError),
77 }
78 }
78
79
79 #[derive(Debug, derive_more::From)]
80 #[derive(Debug, derive_more::From)]
80 pub enum PatternError {
81 pub enum PatternError {
81 #[from]
82 #[from]
82 Path(HgPathError),
83 Path(HgPathError),
83 UnsupportedSyntax(String),
84 UnsupportedSyntax(String),
84 UnsupportedSyntaxInFile(String, String, usize),
85 UnsupportedSyntaxInFile(String, String, usize),
85 TooLong(usize),
86 TooLong(usize),
86 #[from]
87 #[from]
87 IO(std::io::Error),
88 IO(std::io::Error),
88 /// Needed a pattern that can be turned into a regex but got one that
89 /// Needed a pattern that can be turned into a regex but got one that
89 /// can't. This should only happen through programmer error.
90 /// can't. This should only happen through programmer error.
90 NonRegexPattern(IgnorePattern),
91 NonRegexPattern(IgnorePattern),
91 }
92 }
92
93
93 impl ToString for PatternError {
94 impl ToString for PatternError {
94 fn to_string(&self) -> String {
95 fn to_string(&self) -> String {
95 match self {
96 match self {
96 PatternError::UnsupportedSyntax(syntax) => {
97 PatternError::UnsupportedSyntax(syntax) => {
97 format!("Unsupported syntax {}", syntax)
98 format!("Unsupported syntax {}", syntax)
98 }
99 }
99 PatternError::UnsupportedSyntaxInFile(syntax, file_path, line) => {
100 PatternError::UnsupportedSyntaxInFile(syntax, file_path, line) => {
100 format!(
101 format!(
101 "{}:{}: unsupported syntax {}",
102 "{}:{}: unsupported syntax {}",
102 file_path, line, syntax
103 file_path, line, syntax
103 )
104 )
104 }
105 }
105 PatternError::TooLong(size) => {
106 PatternError::TooLong(size) => {
106 format!("matcher pattern is too long ({} bytes)", size)
107 format!("matcher pattern is too long ({} bytes)", size)
107 }
108 }
108 PatternError::IO(e) => e.to_string(),
109 PatternError::IO(e) => e.to_string(),
109 PatternError::Path(e) => e.to_string(),
110 PatternError::Path(e) => e.to_string(),
110 PatternError::NonRegexPattern(pattern) => {
111 PatternError::NonRegexPattern(pattern) => {
111 format!("'{:?}' cannot be turned into a regex", pattern)
112 format!("'{:?}' cannot be turned into a regex", pattern)
112 }
113 }
113 }
114 }
114 }
115 }
115 }
116 }
@@ -1,76 +1,75 b''
1 // list_tracked_files.rs
1 // list_tracked_files.rs
2 //
2 //
3 // Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net>
3 // Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 use std::path::PathBuf;
8 use std::path::PathBuf;
9
9
10 use crate::repo::Repo;
10 use crate::repo::Repo;
11 use crate::revlog::changelog::Changelog;
11 use crate::revlog::changelog::Changelog;
12 use crate::revlog::manifest::Manifest;
12 use crate::revlog::manifest::Manifest;
13 use crate::revlog::path_encode::path_encode;
13 use crate::revlog::path_encode::path_encode;
14 use crate::revlog::revlog::Revlog;
14 use crate::revlog::revlog::Revlog;
15 use crate::revlog::revlog::RevlogError;
15 use crate::revlog::revlog::RevlogError;
16 use crate::revlog::Node;
16 use crate::revlog::Node;
17 use crate::utils::files::get_path_from_bytes;
17 use crate::utils::files::get_path_from_bytes;
18 use crate::utils::hg_path::{HgPath, HgPathBuf};
18 use crate::utils::hg_path::{HgPath, HgPathBuf};
19
19
20 const METADATA_DELIMITER: [u8; 2] = [b'\x01', b'\n'];
20 const METADATA_DELIMITER: [u8; 2] = [b'\x01', b'\n'];
21
21
22 /// List files under Mercurial control at a given revision.
22 /// List files under Mercurial control at a given revision.
23 ///
23 ///
24 /// * `root`: Repository root
24 /// * `root`: Repository root
25 /// * `rev`: The revision to cat the files from.
25 /// * `rev`: The revision to cat the files from.
26 /// * `files`: The files to output.
26 /// * `files`: The files to output.
27 pub fn cat(
27 pub fn cat(
28 repo: &Repo,
28 repo: &Repo,
29 revset: &str,
29 revset: &str,
30 files: &[HgPathBuf],
30 files: &[HgPathBuf],
31 ) -> Result<Vec<u8>, RevlogError> {
31 ) -> Result<Vec<u8>, RevlogError> {
32 let rev = crate::revset::resolve_single(revset, repo)?;
32 let rev = crate::revset::resolve_single(revset, repo)?;
33 let changelog = Changelog::open(repo)?;
33 let changelog = Changelog::open(repo)?;
34 let manifest = Manifest::open(repo)?;
34 let manifest = Manifest::open(repo)?;
35 let changelog_entry = changelog.get_rev(rev)?;
35 let changelog_entry = changelog.get_rev(rev)?;
36 let manifest_node = Node::from_hex(&changelog_entry.manifest_node()?)
36 let manifest_node =
37 .map_err(|_| RevlogError::Corrupted)?;
37 Node::from_hex_for_repo(&changelog_entry.manifest_node()?)?;
38 let manifest_entry = manifest.get_node(manifest_node.into())?;
38 let manifest_entry = manifest.get_node(manifest_node.into())?;
39 let mut bytes = vec![];
39 let mut bytes = vec![];
40
40
41 for (manifest_file, node_bytes) in manifest_entry.files_with_nodes() {
41 for (manifest_file, node_bytes) in manifest_entry.files_with_nodes() {
42 for cat_file in files.iter() {
42 for cat_file in files.iter() {
43 if cat_file.as_bytes() == manifest_file.as_bytes() {
43 if cat_file.as_bytes() == manifest_file.as_bytes() {
44 let index_path = store_path(manifest_file, b".i");
44 let index_path = store_path(manifest_file, b".i");
45 let data_path = store_path(manifest_file, b".d");
45 let data_path = store_path(manifest_file, b".d");
46
46
47 let file_log =
47 let file_log =
48 Revlog::open(repo, &index_path, Some(&data_path))?;
48 Revlog::open(repo, &index_path, Some(&data_path))?;
49 let file_node = Node::from_hex(node_bytes)
49 let file_node = Node::from_hex_for_repo(node_bytes)?;
50 .map_err(|_| RevlogError::Corrupted)?;
51 let file_rev = file_log.get_node_rev(file_node.into())?;
50 let file_rev = file_log.get_node_rev(file_node.into())?;
52 let data = file_log.get_rev_data(file_rev)?;
51 let data = file_log.get_rev_data(file_rev)?;
53 if data.starts_with(&METADATA_DELIMITER) {
52 if data.starts_with(&METADATA_DELIMITER) {
54 let end_delimiter_position = data
53 let end_delimiter_position = data
55 [METADATA_DELIMITER.len()..]
54 [METADATA_DELIMITER.len()..]
56 .windows(METADATA_DELIMITER.len())
55 .windows(METADATA_DELIMITER.len())
57 .position(|bytes| bytes == METADATA_DELIMITER);
56 .position(|bytes| bytes == METADATA_DELIMITER);
58 if let Some(position) = end_delimiter_position {
57 if let Some(position) = end_delimiter_position {
59 let offset = METADATA_DELIMITER.len() * 2;
58 let offset = METADATA_DELIMITER.len() * 2;
60 bytes.extend(data[position + offset..].iter());
59 bytes.extend(data[position + offset..].iter());
61 }
60 }
62 } else {
61 } else {
63 bytes.extend(data);
62 bytes.extend(data);
64 }
63 }
65 }
64 }
66 }
65 }
67 }
66 }
68
67
69 Ok(bytes)
68 Ok(bytes)
70 }
69 }
71
70
72 fn store_path(hg_path: &HgPath, suffix: &[u8]) -> PathBuf {
71 fn store_path(hg_path: &HgPath, suffix: &[u8]) -> PathBuf {
73 let encoded_bytes =
72 let encoded_bytes =
74 path_encode(&[b"data/", hg_path.as_bytes(), suffix].concat());
73 path_encode(&[b"data/", hg_path.as_bytes(), suffix].concat());
75 get_path_from_bytes(&encoded_bytes).into()
74 get_path_from_bytes(&encoded_bytes).into()
76 }
75 }
@@ -1,72 +1,67 b''
1 // list_tracked_files.rs
1 // list_tracked_files.rs
2 //
2 //
3 // Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net>
3 // Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 use crate::dirstate::parsers::parse_dirstate;
8 use crate::dirstate::parsers::parse_dirstate;
9 use crate::errors::{HgError, IoResultExt};
9 use crate::errors::HgError;
10 use crate::repo::Repo;
10 use crate::repo::Repo;
11 use crate::revlog::changelog::Changelog;
11 use crate::revlog::changelog::Changelog;
12 use crate::revlog::manifest::{Manifest, ManifestEntry};
12 use crate::revlog::manifest::{Manifest, ManifestEntry};
13 use crate::revlog::node::Node;
13 use crate::revlog::node::Node;
14 use crate::revlog::revlog::RevlogError;
14 use crate::revlog::revlog::RevlogError;
15 use crate::utils::hg_path::HgPath;
15 use crate::utils::hg_path::HgPath;
16 use crate::EntryState;
16 use crate::EntryState;
17 use rayon::prelude::*;
17 use rayon::prelude::*;
18
18
19 /// List files under Mercurial control in the working directory
19 /// List files under Mercurial control in the working directory
20 /// by reading the dirstate
20 /// by reading the dirstate
21 pub struct Dirstate {
21 pub struct Dirstate {
22 /// The `dirstate` content.
22 /// The `dirstate` content.
23 content: Vec<u8>,
23 content: Vec<u8>,
24 }
24 }
25
25
26 impl Dirstate {
26 impl Dirstate {
27 pub fn new(repo: &Repo) -> Result<Self, HgError> {
27 pub fn new(repo: &Repo) -> Result<Self, HgError> {
28 let content = repo
28 let content = repo.hg_vfs().read("dirstate")?;
29 .hg_vfs()
30 .read("dirstate")
31 // TODO: this will be more accurate when we use `HgError` in
32 // `Vfs::read`.
33 .for_file("dirstate".as_ref())?;
34 Ok(Self { content })
29 Ok(Self { content })
35 }
30 }
36
31
37 pub fn tracked_files(&self) -> Result<Vec<&HgPath>, HgError> {
32 pub fn tracked_files(&self) -> Result<Vec<&HgPath>, HgError> {
38 let (_, entries, _) = parse_dirstate(&self.content)?;
33 let (_, entries, _) = parse_dirstate(&self.content)?;
39 let mut files: Vec<&HgPath> = entries
34 let mut files: Vec<&HgPath> = entries
40 .into_iter()
35 .into_iter()
41 .filter_map(|(path, entry)| match entry.state {
36 .filter_map(|(path, entry)| match entry.state {
42 EntryState::Removed => None,
37 EntryState::Removed => None,
43 _ => Some(path),
38 _ => Some(path),
44 })
39 })
45 .collect();
40 .collect();
46 files.par_sort_unstable();
41 files.par_sort_unstable();
47 Ok(files)
42 Ok(files)
48 }
43 }
49 }
44 }
50
45
51 /// List files under Mercurial control at a given revision.
46 /// List files under Mercurial control at a given revision.
52 pub fn list_rev_tracked_files(
47 pub fn list_rev_tracked_files(
53 repo: &Repo,
48 repo: &Repo,
54 revset: &str,
49 revset: &str,
55 ) -> Result<FilesForRev, RevlogError> {
50 ) -> Result<FilesForRev, RevlogError> {
56 let rev = crate::revset::resolve_single(revset, repo)?;
51 let rev = crate::revset::resolve_single(revset, repo)?;
57 let changelog = Changelog::open(repo)?;
52 let changelog = Changelog::open(repo)?;
58 let manifest = Manifest::open(repo)?;
53 let manifest = Manifest::open(repo)?;
59 let changelog_entry = changelog.get_rev(rev)?;
54 let changelog_entry = changelog.get_rev(rev)?;
60 let manifest_node = Node::from_hex(&changelog_entry.manifest_node()?)
55 let manifest_node =
61 .map_err(|_| RevlogError::Corrupted)?;
56 Node::from_hex_for_repo(&changelog_entry.manifest_node()?)?;
62 let manifest_entry = manifest.get_node(manifest_node.into())?;
57 let manifest_entry = manifest.get_node(manifest_node.into())?;
63 Ok(FilesForRev(manifest_entry))
58 Ok(FilesForRev(manifest_entry))
64 }
59 }
65
60
66 pub struct FilesForRev(ManifestEntry);
61 pub struct FilesForRev(ManifestEntry);
67
62
68 impl FilesForRev {
63 impl FilesForRev {
69 pub fn iter(&self) -> impl Iterator<Item = &HgPath> {
64 pub fn iter(&self) -> impl Iterator<Item = &HgPath> {
70 self.0.files()
65 self.0.files()
71 }
66 }
72 }
67 }
@@ -1,91 +1,86 b''
1 use crate::errors::HgError;
1 use crate::errors::{HgError, IoResultExt};
2 use crate::operations::{find_root, FindRootError};
2 use crate::operations::{find_root, FindRootError};
3 use crate::requirements;
3 use crate::requirements;
4 use memmap::{Mmap, MmapOptions};
4 use memmap::{Mmap, MmapOptions};
5 use std::path::{Path, PathBuf};
5 use std::path::{Path, PathBuf};
6
6
7 /// A repository on disk
7 /// A repository on disk
8 pub struct Repo {
8 pub struct Repo {
9 working_directory: PathBuf,
9 working_directory: PathBuf,
10 dot_hg: PathBuf,
10 dot_hg: PathBuf,
11 store: PathBuf,
11 store: PathBuf,
12 }
12 }
13
13
14 /// Filesystem access abstraction for the contents of a given "base" diretory
14 /// Filesystem access abstraction for the contents of a given "base" diretory
15 #[derive(Clone, Copy)]
15 #[derive(Clone, Copy)]
16 pub(crate) struct Vfs<'a> {
16 pub(crate) struct Vfs<'a> {
17 base: &'a Path,
17 base: &'a Path,
18 }
18 }
19
19
20 impl Repo {
20 impl Repo {
21 /// Returns `None` if the given path doesn’t look like a repository
21 /// Returns `None` if the given path doesn’t look like a repository
22 /// (doesn’t contain a `.hg` sub-directory).
22 /// (doesn’t contain a `.hg` sub-directory).
23 pub fn for_path(root: impl Into<PathBuf>) -> Self {
23 pub fn for_path(root: impl Into<PathBuf>) -> Self {
24 let working_directory = root.into();
24 let working_directory = root.into();
25 let dot_hg = working_directory.join(".hg");
25 let dot_hg = working_directory.join(".hg");
26 Self {
26 Self {
27 store: dot_hg.join("store"),
27 store: dot_hg.join("store"),
28 dot_hg,
28 dot_hg,
29 working_directory,
29 working_directory,
30 }
30 }
31 }
31 }
32
32
33 pub fn find() -> Result<Self, FindRootError> {
33 pub fn find() -> Result<Self, FindRootError> {
34 find_root().map(Self::for_path)
34 find_root().map(Self::for_path)
35 }
35 }
36
36
37 pub fn check_requirements(&self) -> Result<(), HgError> {
37 pub fn check_requirements(&self) -> Result<(), HgError> {
38 requirements::check(self)
38 requirements::check(self)
39 }
39 }
40
40
41 pub fn working_directory_path(&self) -> &Path {
41 pub fn working_directory_path(&self) -> &Path {
42 &self.working_directory
42 &self.working_directory
43 }
43 }
44
44
45 /// For accessing repository files (in `.hg`), except for the store
45 /// For accessing repository files (in `.hg`), except for the store
46 /// (`.hg/store`).
46 /// (`.hg/store`).
47 pub(crate) fn hg_vfs(&self) -> Vfs<'_> {
47 pub(crate) fn hg_vfs(&self) -> Vfs<'_> {
48 Vfs { base: &self.dot_hg }
48 Vfs { base: &self.dot_hg }
49 }
49 }
50
50
51 /// For accessing repository store files (in `.hg/store`)
51 /// For accessing repository store files (in `.hg/store`)
52 pub(crate) fn store_vfs(&self) -> Vfs<'_> {
52 pub(crate) fn store_vfs(&self) -> Vfs<'_> {
53 Vfs { base: &self.store }
53 Vfs { base: &self.store }
54 }
54 }
55
55
56 /// For accessing the working copy
56 /// For accessing the working copy
57
57
58 // The undescore prefix silences the "never used" warning. Remove before
58 // The undescore prefix silences the "never used" warning. Remove before
59 // using.
59 // using.
60 pub(crate) fn _working_directory_vfs(&self) -> Vfs<'_> {
60 pub(crate) fn _working_directory_vfs(&self) -> Vfs<'_> {
61 Vfs {
61 Vfs {
62 base: &self.working_directory,
62 base: &self.working_directory,
63 }
63 }
64 }
64 }
65 }
65 }
66
66
67 impl Vfs<'_> {
67 impl Vfs<'_> {
68 pub(crate) fn read(
68 pub(crate) fn read(
69 &self,
69 &self,
70 relative_path: impl AsRef<Path>,
70 relative_path: impl AsRef<Path>,
71 ) -> std::io::Result<Vec<u8>> {
71 ) -> Result<Vec<u8>, HgError> {
72 std::fs::read(self.base.join(relative_path))
72 let path = self.base.join(relative_path);
73 }
73 std::fs::read(&path).for_file(&path)
74
75 pub(crate) fn open(
76 &self,
77 relative_path: impl AsRef<Path>,
78 ) -> std::io::Result<std::fs::File> {
79 std::fs::File::open(self.base.join(relative_path))
80 }
74 }
81
75
82 pub(crate) fn mmap_open(
76 pub(crate) fn mmap_open(
83 &self,
77 &self,
84 relative_path: impl AsRef<Path>,
78 relative_path: impl AsRef<Path>,
85 ) -> std::io::Result<Mmap> {
79 ) -> Result<Mmap, HgError> {
86 let file = self.open(relative_path)?;
80 let path = self.base.join(relative_path);
81 let file = std::fs::File::open(&path).for_file(&path)?;
87 // TODO: what are the safety requirements here?
82 // TODO: what are the safety requirements here?
88 let mmap = unsafe { MmapOptions::new().map(&file) }?;
83 let mmap = unsafe { MmapOptions::new().map(&file) }.for_file(&path)?;
89 Ok(mmap)
84 Ok(mmap)
90 }
85 }
91 }
86 }
@@ -1,70 +1,67 b''
1 use crate::errors::{HgError, HgResultExt, IoResultExt};
1 use crate::errors::{HgError, HgResultExt};
2 use crate::repo::Repo;
2 use crate::repo::Repo;
3
3
4 fn parse(bytes: &[u8]) -> Result<Vec<String>, HgError> {
4 fn parse(bytes: &[u8]) -> Result<Vec<String>, HgError> {
5 // The Python code reading this file uses `str.splitlines`
5 // The Python code reading this file uses `str.splitlines`
6 // which looks for a number of line separators (even including a couple of
6 // which looks for a number of line separators (even including a couple of
7 // non-ASCII ones), but Python code writing it always uses `\n`.
7 // non-ASCII ones), but Python code writing it always uses `\n`.
8 let lines = bytes.split(|&byte| byte == b'\n');
8 let lines = bytes.split(|&byte| byte == b'\n');
9
9
10 lines
10 lines
11 .filter(|line| !line.is_empty())
11 .filter(|line| !line.is_empty())
12 .map(|line| {
12 .map(|line| {
13 // Python uses Unicode `str.isalnum` but feature names are all
13 // Python uses Unicode `str.isalnum` but feature names are all
14 // ASCII
14 // ASCII
15 if line[0].is_ascii_alphanumeric() && line.is_ascii() {
15 if line[0].is_ascii_alphanumeric() && line.is_ascii() {
16 Ok(String::from_utf8(line.into()).unwrap())
16 Ok(String::from_utf8(line.into()).unwrap())
17 } else {
17 } else {
18 Err(HgError::corrupted("parse error in 'requires' file"))
18 Err(HgError::corrupted("parse error in 'requires' file"))
19 }
19 }
20 })
20 })
21 .collect()
21 .collect()
22 }
22 }
23
23
24 pub fn load(repo: &Repo) -> Result<Vec<String>, HgError> {
24 pub fn load(repo: &Repo) -> Result<Vec<String>, HgError> {
25 if let Some(bytes) = repo
25 if let Some(bytes) =
26 .hg_vfs()
26 repo.hg_vfs().read("requires").io_not_found_as_none()?
27 .read("requires")
28 .for_file("requires".as_ref())
29 .io_not_found_as_none()?
30 {
27 {
31 parse(&bytes)
28 parse(&bytes)
32 } else {
29 } else {
33 // Treat a missing file the same as an empty file.
30 // Treat a missing file the same as an empty file.
34 // From `mercurial/localrepo.py`:
31 // From `mercurial/localrepo.py`:
35 // > requires file contains a newline-delimited list of
32 // > requires file contains a newline-delimited list of
36 // > features/capabilities the opener (us) must have in order to use
33 // > features/capabilities the opener (us) must have in order to use
37 // > the repository. This file was introduced in Mercurial 0.9.2,
34 // > the repository. This file was introduced in Mercurial 0.9.2,
38 // > which means very old repositories may not have one. We assume
35 // > which means very old repositories may not have one. We assume
39 // > a missing file translates to no requirements.
36 // > a missing file translates to no requirements.
40 Ok(Vec::new())
37 Ok(Vec::new())
41 }
38 }
42 }
39 }
43
40
44 pub fn check(repo: &Repo) -> Result<(), HgError> {
41 pub fn check(repo: &Repo) -> Result<(), HgError> {
45 for feature in load(repo)? {
42 for feature in load(repo)? {
46 if !SUPPORTED.contains(&&*feature) {
43 if !SUPPORTED.contains(&&*feature) {
47 // TODO: collect and all unknown features and include them in the
44 // TODO: collect and all unknown features and include them in the
48 // error message?
45 // error message?
49 return Err(HgError::UnsupportedFeature(format!(
46 return Err(HgError::UnsupportedFeature(format!(
50 "repository requires feature unknown to this Mercurial: {}",
47 "repository requires feature unknown to this Mercurial: {}",
51 feature
48 feature
52 )));
49 )));
53 }
50 }
54 }
51 }
55 Ok(())
52 Ok(())
56 }
53 }
57
54
58 // TODO: set this to actually-supported features
55 // TODO: set this to actually-supported features
59 const SUPPORTED: &[&str] = &[
56 const SUPPORTED: &[&str] = &[
60 "dotencode",
57 "dotencode",
61 "fncache",
58 "fncache",
62 "generaldelta",
59 "generaldelta",
63 "revlogv1",
60 "revlogv1",
64 "sparserevlog",
61 "sparserevlog",
65 "store",
62 "store",
66 // As of this writing everything rhg does is read-only.
63 // As of this writing everything rhg does is read-only.
67 // When it starts writing to the repository, it’ll need to either keep the
64 // When it starts writing to the repository, it’ll need to either keep the
68 // persistent nodemap up to date or remove this entry:
65 // persistent nodemap up to date or remove this entry:
69 "persistent-nodemap",
66 "persistent-nodemap",
70 ];
67 ];
@@ -1,58 +1,61 b''
1 use crate::errors::HgError;
1 use crate::repo::Repo;
2 use crate::repo::Repo;
2 use crate::revlog::revlog::{Revlog, RevlogError};
3 use crate::revlog::revlog::{Revlog, RevlogError};
3 use crate::revlog::NodePrefix;
4 use crate::revlog::NodePrefix;
4 use crate::revlog::Revision;
5 use crate::revlog::Revision;
5
6
6 /// A specialized `Revlog` to work with `changelog` data format.
7 /// A specialized `Revlog` to work with `changelog` data format.
7 pub struct Changelog {
8 pub struct Changelog {
8 /// The generic `revlog` format.
9 /// The generic `revlog` format.
9 pub(crate) revlog: Revlog,
10 pub(crate) revlog: Revlog,
10 }
11 }
11
12
12 impl Changelog {
13 impl Changelog {
13 /// Open the `changelog` of a repository given by its root.
14 /// Open the `changelog` of a repository given by its root.
14 pub fn open(repo: &Repo) -> Result<Self, RevlogError> {
15 pub fn open(repo: &Repo) -> Result<Self, RevlogError> {
15 let revlog = Revlog::open(repo, "00changelog.i", None)?;
16 let revlog = Revlog::open(repo, "00changelog.i", None)?;
16 Ok(Self { revlog })
17 Ok(Self { revlog })
17 }
18 }
18
19
19 /// Return the `ChangelogEntry` a given node id.
20 /// Return the `ChangelogEntry` a given node id.
20 pub fn get_node(
21 pub fn get_node(
21 &self,
22 &self,
22 node: NodePrefix,
23 node: NodePrefix,
23 ) -> Result<ChangelogEntry, RevlogError> {
24 ) -> Result<ChangelogEntry, RevlogError> {
24 let rev = self.revlog.get_node_rev(node)?;
25 let rev = self.revlog.get_node_rev(node)?;
25 self.get_rev(rev)
26 self.get_rev(rev)
26 }
27 }
27
28
28 /// Return the `ChangelogEntry` of a given node revision.
29 /// Return the `ChangelogEntry` of a given node revision.
29 pub fn get_rev(
30 pub fn get_rev(
30 &self,
31 &self,
31 rev: Revision,
32 rev: Revision,
32 ) -> Result<ChangelogEntry, RevlogError> {
33 ) -> Result<ChangelogEntry, RevlogError> {
33 let bytes = self.revlog.get_rev_data(rev)?;
34 let bytes = self.revlog.get_rev_data(rev)?;
34 Ok(ChangelogEntry { bytes })
35 Ok(ChangelogEntry { bytes })
35 }
36 }
36 }
37 }
37
38
38 /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
39 /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
39 #[derive(Debug)]
40 #[derive(Debug)]
40 pub struct ChangelogEntry {
41 pub struct ChangelogEntry {
41 /// The data bytes of the `changelog` entry.
42 /// The data bytes of the `changelog` entry.
42 bytes: Vec<u8>,
43 bytes: Vec<u8>,
43 }
44 }
44
45
45 impl ChangelogEntry {
46 impl ChangelogEntry {
46 /// Return an iterator over the lines of the entry.
47 /// Return an iterator over the lines of the entry.
47 pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
48 pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
48 self.bytes
49 self.bytes
49 .split(|b| b == &b'\n')
50 .split(|b| b == &b'\n')
50 .filter(|line| !line.is_empty())
51 .filter(|line| !line.is_empty())
51 }
52 }
52
53
53 /// Return the node id of the `manifest` referenced by this `changelog`
54 /// Return the node id of the `manifest` referenced by this `changelog`
54 /// entry.
55 /// entry.
55 pub fn manifest_node(&self) -> Result<&[u8], RevlogError> {
56 pub fn manifest_node(&self) -> Result<&[u8], RevlogError> {
56 self.lines().next().ok_or(RevlogError::Corrupted)
57 self.lines()
58 .next()
59 .ok_or_else(|| HgError::corrupted("empty changelog entry").into())
57 }
60 }
58 }
61 }
@@ -1,402 +1,404 b''
1 use std::convert::TryInto;
1 use std::convert::TryInto;
2 use std::ops::Deref;
2 use std::ops::Deref;
3
3
4 use byteorder::{BigEndian, ByteOrder};
4 use byteorder::{BigEndian, ByteOrder};
5
5
6 use crate::errors::HgError;
6 use crate::revlog::node::Node;
7 use crate::revlog::node::Node;
7 use crate::revlog::revlog::RevlogError;
8 use crate::revlog::revlog::RevlogError;
8 use crate::revlog::{Revision, NULL_REVISION};
9 use crate::revlog::{Revision, NULL_REVISION};
9
10
10 pub const INDEX_ENTRY_SIZE: usize = 64;
11 pub const INDEX_ENTRY_SIZE: usize = 64;
11
12
12 /// A Revlog index
13 /// A Revlog index
13 pub struct Index {
14 pub struct Index {
14 bytes: Box<dyn Deref<Target = [u8]> + Send>,
15 bytes: Box<dyn Deref<Target = [u8]> + Send>,
15 /// Offsets of starts of index blocks.
16 /// Offsets of starts of index blocks.
16 /// Only needed when the index is interleaved with data.
17 /// Only needed when the index is interleaved with data.
17 offsets: Option<Vec<usize>>,
18 offsets: Option<Vec<usize>>,
18 }
19 }
19
20
20 impl Index {
21 impl Index {
21 /// Create an index from bytes.
22 /// Create an index from bytes.
22 /// Calculate the start of each entry when is_inline is true.
23 /// Calculate the start of each entry when is_inline is true.
23 pub fn new(
24 pub fn new(
24 bytes: Box<dyn Deref<Target = [u8]> + Send>,
25 bytes: Box<dyn Deref<Target = [u8]> + Send>,
25 ) -> Result<Self, RevlogError> {
26 ) -> Result<Self, RevlogError> {
26 if is_inline(&bytes) {
27 if is_inline(&bytes) {
27 let mut offset: usize = 0;
28 let mut offset: usize = 0;
28 let mut offsets = Vec::new();
29 let mut offsets = Vec::new();
29
30
30 while offset + INDEX_ENTRY_SIZE <= bytes.len() {
31 while offset + INDEX_ENTRY_SIZE <= bytes.len() {
31 offsets.push(offset);
32 offsets.push(offset);
32 let end = offset + INDEX_ENTRY_SIZE;
33 let end = offset + INDEX_ENTRY_SIZE;
33 let entry = IndexEntry {
34 let entry = IndexEntry {
34 bytes: &bytes[offset..end],
35 bytes: &bytes[offset..end],
35 offset_override: None,
36 offset_override: None,
36 };
37 };
37
38
38 offset += INDEX_ENTRY_SIZE + entry.compressed_len();
39 offset += INDEX_ENTRY_SIZE + entry.compressed_len();
39 }
40 }
40
41
41 if offset == bytes.len() {
42 if offset == bytes.len() {
42 Ok(Self {
43 Ok(Self {
43 bytes,
44 bytes,
44 offsets: Some(offsets),
45 offsets: Some(offsets),
45 })
46 })
46 } else {
47 } else {
47 Err(RevlogError::Corrupted)
48 Err(HgError::corrupted("unexpected inline revlog length")
49 .into())
48 }
50 }
49 } else {
51 } else {
50 Ok(Self {
52 Ok(Self {
51 bytes,
53 bytes,
52 offsets: None,
54 offsets: None,
53 })
55 })
54 }
56 }
55 }
57 }
56
58
57 /// Value of the inline flag.
59 /// Value of the inline flag.
58 pub fn is_inline(&self) -> bool {
60 pub fn is_inline(&self) -> bool {
59 is_inline(&self.bytes)
61 is_inline(&self.bytes)
60 }
62 }
61
63
62 /// Return a slice of bytes if `revlog` is inline. Panic if not.
64 /// Return a slice of bytes if `revlog` is inline. Panic if not.
63 pub fn data(&self, start: usize, end: usize) -> &[u8] {
65 pub fn data(&self, start: usize, end: usize) -> &[u8] {
64 if !self.is_inline() {
66 if !self.is_inline() {
65 panic!("tried to access data in the index of a revlog that is not inline");
67 panic!("tried to access data in the index of a revlog that is not inline");
66 }
68 }
67 &self.bytes[start..end]
69 &self.bytes[start..end]
68 }
70 }
69
71
70 /// Return number of entries of the revlog index.
72 /// Return number of entries of the revlog index.
71 pub fn len(&self) -> usize {
73 pub fn len(&self) -> usize {
72 if let Some(offsets) = &self.offsets {
74 if let Some(offsets) = &self.offsets {
73 offsets.len()
75 offsets.len()
74 } else {
76 } else {
75 self.bytes.len() / INDEX_ENTRY_SIZE
77 self.bytes.len() / INDEX_ENTRY_SIZE
76 }
78 }
77 }
79 }
78
80
79 /// Returns `true` if the `Index` has zero `entries`.
81 /// Returns `true` if the `Index` has zero `entries`.
80 pub fn is_empty(&self) -> bool {
82 pub fn is_empty(&self) -> bool {
81 self.len() == 0
83 self.len() == 0
82 }
84 }
83
85
84 /// Return the index entry corresponding to the given revision if it
86 /// Return the index entry corresponding to the given revision if it
85 /// exists.
87 /// exists.
86 pub fn get_entry(&self, rev: Revision) -> Option<IndexEntry> {
88 pub fn get_entry(&self, rev: Revision) -> Option<IndexEntry> {
87 if rev == NULL_REVISION {
89 if rev == NULL_REVISION {
88 return None;
90 return None;
89 }
91 }
90 if let Some(offsets) = &self.offsets {
92 if let Some(offsets) = &self.offsets {
91 self.get_entry_inline(rev, offsets)
93 self.get_entry_inline(rev, offsets)
92 } else {
94 } else {
93 self.get_entry_separated(rev)
95 self.get_entry_separated(rev)
94 }
96 }
95 }
97 }
96
98
97 fn get_entry_inline(
99 fn get_entry_inline(
98 &self,
100 &self,
99 rev: Revision,
101 rev: Revision,
100 offsets: &[usize],
102 offsets: &[usize],
101 ) -> Option<IndexEntry> {
103 ) -> Option<IndexEntry> {
102 let start = *offsets.get(rev as usize)?;
104 let start = *offsets.get(rev as usize)?;
103 let end = start.checked_add(INDEX_ENTRY_SIZE)?;
105 let end = start.checked_add(INDEX_ENTRY_SIZE)?;
104 let bytes = &self.bytes[start..end];
106 let bytes = &self.bytes[start..end];
105
107
106 // See IndexEntry for an explanation of this override.
108 // See IndexEntry for an explanation of this override.
107 let offset_override = Some(end);
109 let offset_override = Some(end);
108
110
109 Some(IndexEntry {
111 Some(IndexEntry {
110 bytes,
112 bytes,
111 offset_override,
113 offset_override,
112 })
114 })
113 }
115 }
114
116
115 fn get_entry_separated(&self, rev: Revision) -> Option<IndexEntry> {
117 fn get_entry_separated(&self, rev: Revision) -> Option<IndexEntry> {
116 let max_rev = self.bytes.len() / INDEX_ENTRY_SIZE;
118 let max_rev = self.bytes.len() / INDEX_ENTRY_SIZE;
117 if rev as usize >= max_rev {
119 if rev as usize >= max_rev {
118 return None;
120 return None;
119 }
121 }
120 let start = rev as usize * INDEX_ENTRY_SIZE;
122 let start = rev as usize * INDEX_ENTRY_SIZE;
121 let end = start + INDEX_ENTRY_SIZE;
123 let end = start + INDEX_ENTRY_SIZE;
122 let bytes = &self.bytes[start..end];
124 let bytes = &self.bytes[start..end];
123
125
124 // Override the offset of the first revision as its bytes are used
126 // Override the offset of the first revision as its bytes are used
125 // for the index's metadata (saving space because it is always 0)
127 // for the index's metadata (saving space because it is always 0)
126 let offset_override = if rev == 0 { Some(0) } else { None };
128 let offset_override = if rev == 0 { Some(0) } else { None };
127
129
128 Some(IndexEntry {
130 Some(IndexEntry {
129 bytes,
131 bytes,
130 offset_override,
132 offset_override,
131 })
133 })
132 }
134 }
133 }
135 }
134
136
135 impl super::RevlogIndex for Index {
137 impl super::RevlogIndex for Index {
136 fn len(&self) -> usize {
138 fn len(&self) -> usize {
137 self.len()
139 self.len()
138 }
140 }
139
141
140 fn node(&self, rev: Revision) -> Option<&Node> {
142 fn node(&self, rev: Revision) -> Option<&Node> {
141 self.get_entry(rev).map(|entry| entry.hash())
143 self.get_entry(rev).map(|entry| entry.hash())
142 }
144 }
143 }
145 }
144
146
145 #[derive(Debug)]
147 #[derive(Debug)]
146 pub struct IndexEntry<'a> {
148 pub struct IndexEntry<'a> {
147 bytes: &'a [u8],
149 bytes: &'a [u8],
148 /// Allows to override the offset value of the entry.
150 /// Allows to override the offset value of the entry.
149 ///
151 ///
150 /// For interleaved index and data, the offset stored in the index
152 /// For interleaved index and data, the offset stored in the index
151 /// corresponds to the separated data offset.
153 /// corresponds to the separated data offset.
152 /// It has to be overridden with the actual offset in the interleaved
154 /// It has to be overridden with the actual offset in the interleaved
153 /// index which is just after the index block.
155 /// index which is just after the index block.
154 ///
156 ///
155 /// For separated index and data, the offset stored in the first index
157 /// For separated index and data, the offset stored in the first index
156 /// entry is mixed with the index headers.
158 /// entry is mixed with the index headers.
157 /// It has to be overridden with 0.
159 /// It has to be overridden with 0.
158 offset_override: Option<usize>,
160 offset_override: Option<usize>,
159 }
161 }
160
162
161 impl<'a> IndexEntry<'a> {
163 impl<'a> IndexEntry<'a> {
162 /// Return the offset of the data.
164 /// Return the offset of the data.
163 pub fn offset(&self) -> usize {
165 pub fn offset(&self) -> usize {
164 if let Some(offset_override) = self.offset_override {
166 if let Some(offset_override) = self.offset_override {
165 offset_override
167 offset_override
166 } else {
168 } else {
167 let mut bytes = [0; 8];
169 let mut bytes = [0; 8];
168 bytes[2..8].copy_from_slice(&self.bytes[0..=5]);
170 bytes[2..8].copy_from_slice(&self.bytes[0..=5]);
169 BigEndian::read_u64(&bytes[..]) as usize
171 BigEndian::read_u64(&bytes[..]) as usize
170 }
172 }
171 }
173 }
172
174
173 /// Return the compressed length of the data.
175 /// Return the compressed length of the data.
174 pub fn compressed_len(&self) -> usize {
176 pub fn compressed_len(&self) -> usize {
175 BigEndian::read_u32(&self.bytes[8..=11]) as usize
177 BigEndian::read_u32(&self.bytes[8..=11]) as usize
176 }
178 }
177
179
178 /// Return the uncompressed length of the data.
180 /// Return the uncompressed length of the data.
179 pub fn uncompressed_len(&self) -> usize {
181 pub fn uncompressed_len(&self) -> usize {
180 BigEndian::read_u32(&self.bytes[12..=15]) as usize
182 BigEndian::read_u32(&self.bytes[12..=15]) as usize
181 }
183 }
182
184
183 /// Return the revision upon which the data has been derived.
185 /// Return the revision upon which the data has been derived.
184 pub fn base_revision(&self) -> Revision {
186 pub fn base_revision(&self) -> Revision {
185 // TODO Maybe return an Option when base_revision == rev?
187 // TODO Maybe return an Option when base_revision == rev?
186 // Requires to add rev to IndexEntry
188 // Requires to add rev to IndexEntry
187
189
188 BigEndian::read_i32(&self.bytes[16..])
190 BigEndian::read_i32(&self.bytes[16..])
189 }
191 }
190
192
191 pub fn p1(&self) -> Revision {
193 pub fn p1(&self) -> Revision {
192 BigEndian::read_i32(&self.bytes[24..])
194 BigEndian::read_i32(&self.bytes[24..])
193 }
195 }
194
196
195 pub fn p2(&self) -> Revision {
197 pub fn p2(&self) -> Revision {
196 BigEndian::read_i32(&self.bytes[28..])
198 BigEndian::read_i32(&self.bytes[28..])
197 }
199 }
198
200
199 /// Return the hash of revision's full text.
201 /// Return the hash of revision's full text.
200 ///
202 ///
201 /// Currently, SHA-1 is used and only the first 20 bytes of this field
203 /// Currently, SHA-1 is used and only the first 20 bytes of this field
202 /// are used.
204 /// are used.
203 pub fn hash(&self) -> &'a Node {
205 pub fn hash(&self) -> &'a Node {
204 (&self.bytes[32..52]).try_into().unwrap()
206 (&self.bytes[32..52]).try_into().unwrap()
205 }
207 }
206 }
208 }
207
209
208 /// Value of the inline flag.
210 /// Value of the inline flag.
209 pub fn is_inline(index_bytes: &[u8]) -> bool {
211 pub fn is_inline(index_bytes: &[u8]) -> bool {
210 match &index_bytes[0..=1] {
212 match &index_bytes[0..=1] {
211 [0, 0] | [0, 2] => false,
213 [0, 0] | [0, 2] => false,
212 _ => true,
214 _ => true,
213 }
215 }
214 }
216 }
215
217
216 #[cfg(test)]
218 #[cfg(test)]
217 mod tests {
219 mod tests {
218 use super::*;
220 use super::*;
219
221
220 #[cfg(test)]
222 #[cfg(test)]
221 #[derive(Debug, Copy, Clone)]
223 #[derive(Debug, Copy, Clone)]
222 pub struct IndexEntryBuilder {
224 pub struct IndexEntryBuilder {
223 is_first: bool,
225 is_first: bool,
224 is_inline: bool,
226 is_inline: bool,
225 is_general_delta: bool,
227 is_general_delta: bool,
226 version: u16,
228 version: u16,
227 offset: usize,
229 offset: usize,
228 compressed_len: usize,
230 compressed_len: usize,
229 uncompressed_len: usize,
231 uncompressed_len: usize,
230 base_revision: Revision,
232 base_revision: Revision,
231 }
233 }
232
234
233 #[cfg(test)]
235 #[cfg(test)]
234 impl IndexEntryBuilder {
236 impl IndexEntryBuilder {
235 pub fn new() -> Self {
237 pub fn new() -> Self {
236 Self {
238 Self {
237 is_first: false,
239 is_first: false,
238 is_inline: false,
240 is_inline: false,
239 is_general_delta: true,
241 is_general_delta: true,
240 version: 2,
242 version: 2,
241 offset: 0,
243 offset: 0,
242 compressed_len: 0,
244 compressed_len: 0,
243 uncompressed_len: 0,
245 uncompressed_len: 0,
244 base_revision: 0,
246 base_revision: 0,
245 }
247 }
246 }
248 }
247
249
248 pub fn is_first(&mut self, value: bool) -> &mut Self {
250 pub fn is_first(&mut self, value: bool) -> &mut Self {
249 self.is_first = value;
251 self.is_first = value;
250 self
252 self
251 }
253 }
252
254
253 pub fn with_inline(&mut self, value: bool) -> &mut Self {
255 pub fn with_inline(&mut self, value: bool) -> &mut Self {
254 self.is_inline = value;
256 self.is_inline = value;
255 self
257 self
256 }
258 }
257
259
258 pub fn with_general_delta(&mut self, value: bool) -> &mut Self {
260 pub fn with_general_delta(&mut self, value: bool) -> &mut Self {
259 self.is_general_delta = value;
261 self.is_general_delta = value;
260 self
262 self
261 }
263 }
262
264
263 pub fn with_version(&mut self, value: u16) -> &mut Self {
265 pub fn with_version(&mut self, value: u16) -> &mut Self {
264 self.version = value;
266 self.version = value;
265 self
267 self
266 }
268 }
267
269
268 pub fn with_offset(&mut self, value: usize) -> &mut Self {
270 pub fn with_offset(&mut self, value: usize) -> &mut Self {
269 self.offset = value;
271 self.offset = value;
270 self
272 self
271 }
273 }
272
274
273 pub fn with_compressed_len(&mut self, value: usize) -> &mut Self {
275 pub fn with_compressed_len(&mut self, value: usize) -> &mut Self {
274 self.compressed_len = value;
276 self.compressed_len = value;
275 self
277 self
276 }
278 }
277
279
278 pub fn with_uncompressed_len(&mut self, value: usize) -> &mut Self {
280 pub fn with_uncompressed_len(&mut self, value: usize) -> &mut Self {
279 self.uncompressed_len = value;
281 self.uncompressed_len = value;
280 self
282 self
281 }
283 }
282
284
283 pub fn with_base_revision(&mut self, value: Revision) -> &mut Self {
285 pub fn with_base_revision(&mut self, value: Revision) -> &mut Self {
284 self.base_revision = value;
286 self.base_revision = value;
285 self
287 self
286 }
288 }
287
289
288 pub fn build(&self) -> Vec<u8> {
290 pub fn build(&self) -> Vec<u8> {
289 let mut bytes = Vec::with_capacity(INDEX_ENTRY_SIZE);
291 let mut bytes = Vec::with_capacity(INDEX_ENTRY_SIZE);
290 if self.is_first {
292 if self.is_first {
291 bytes.extend(&match (self.is_general_delta, self.is_inline) {
293 bytes.extend(&match (self.is_general_delta, self.is_inline) {
292 (false, false) => [0u8, 0],
294 (false, false) => [0u8, 0],
293 (false, true) => [0u8, 1],
295 (false, true) => [0u8, 1],
294 (true, false) => [0u8, 2],
296 (true, false) => [0u8, 2],
295 (true, true) => [0u8, 3],
297 (true, true) => [0u8, 3],
296 });
298 });
297 bytes.extend(&self.version.to_be_bytes());
299 bytes.extend(&self.version.to_be_bytes());
298 // Remaining offset bytes.
300 // Remaining offset bytes.
299 bytes.extend(&[0u8; 2]);
301 bytes.extend(&[0u8; 2]);
300 } else {
302 } else {
301 // Offset is only 6 bytes will usize is 8.
303 // Offset is only 6 bytes will usize is 8.
302 bytes.extend(&self.offset.to_be_bytes()[2..]);
304 bytes.extend(&self.offset.to_be_bytes()[2..]);
303 }
305 }
304 bytes.extend(&[0u8; 2]); // Revision flags.
306 bytes.extend(&[0u8; 2]); // Revision flags.
305 bytes.extend(&self.compressed_len.to_be_bytes()[4..]);
307 bytes.extend(&self.compressed_len.to_be_bytes()[4..]);
306 bytes.extend(&self.uncompressed_len.to_be_bytes()[4..]);
308 bytes.extend(&self.uncompressed_len.to_be_bytes()[4..]);
307 bytes.extend(&self.base_revision.to_be_bytes());
309 bytes.extend(&self.base_revision.to_be_bytes());
308 bytes
310 bytes
309 }
311 }
310 }
312 }
311
313
312 #[test]
314 #[test]
313 fn is_not_inline_when_no_inline_flag_test() {
315 fn is_not_inline_when_no_inline_flag_test() {
314 let bytes = IndexEntryBuilder::new()
316 let bytes = IndexEntryBuilder::new()
315 .is_first(true)
317 .is_first(true)
316 .with_general_delta(false)
318 .with_general_delta(false)
317 .with_inline(false)
319 .with_inline(false)
318 .build();
320 .build();
319
321
320 assert_eq!(is_inline(&bytes), false)
322 assert_eq!(is_inline(&bytes), false)
321 }
323 }
322
324
323 #[test]
325 #[test]
324 fn is_inline_when_inline_flag_test() {
326 fn is_inline_when_inline_flag_test() {
325 let bytes = IndexEntryBuilder::new()
327 let bytes = IndexEntryBuilder::new()
326 .is_first(true)
328 .is_first(true)
327 .with_general_delta(false)
329 .with_general_delta(false)
328 .with_inline(true)
330 .with_inline(true)
329 .build();
331 .build();
330
332
331 assert_eq!(is_inline(&bytes), true)
333 assert_eq!(is_inline(&bytes), true)
332 }
334 }
333
335
334 #[test]
336 #[test]
335 fn is_inline_when_inline_and_generaldelta_flags_test() {
337 fn is_inline_when_inline_and_generaldelta_flags_test() {
336 let bytes = IndexEntryBuilder::new()
338 let bytes = IndexEntryBuilder::new()
337 .is_first(true)
339 .is_first(true)
338 .with_general_delta(true)
340 .with_general_delta(true)
339 .with_inline(true)
341 .with_inline(true)
340 .build();
342 .build();
341
343
342 assert_eq!(is_inline(&bytes), true)
344 assert_eq!(is_inline(&bytes), true)
343 }
345 }
344
346
345 #[test]
347 #[test]
346 fn test_offset() {
348 fn test_offset() {
347 let bytes = IndexEntryBuilder::new().with_offset(1).build();
349 let bytes = IndexEntryBuilder::new().with_offset(1).build();
348 let entry = IndexEntry {
350 let entry = IndexEntry {
349 bytes: &bytes,
351 bytes: &bytes,
350 offset_override: None,
352 offset_override: None,
351 };
353 };
352
354
353 assert_eq!(entry.offset(), 1)
355 assert_eq!(entry.offset(), 1)
354 }
356 }
355
357
356 #[test]
358 #[test]
357 fn test_with_overridden_offset() {
359 fn test_with_overridden_offset() {
358 let bytes = IndexEntryBuilder::new().with_offset(1).build();
360 let bytes = IndexEntryBuilder::new().with_offset(1).build();
359 let entry = IndexEntry {
361 let entry = IndexEntry {
360 bytes: &bytes,
362 bytes: &bytes,
361 offset_override: Some(2),
363 offset_override: Some(2),
362 };
364 };
363
365
364 assert_eq!(entry.offset(), 2)
366 assert_eq!(entry.offset(), 2)
365 }
367 }
366
368
367 #[test]
369 #[test]
368 fn test_compressed_len() {
370 fn test_compressed_len() {
369 let bytes = IndexEntryBuilder::new().with_compressed_len(1).build();
371 let bytes = IndexEntryBuilder::new().with_compressed_len(1).build();
370 let entry = IndexEntry {
372 let entry = IndexEntry {
371 bytes: &bytes,
373 bytes: &bytes,
372 offset_override: None,
374 offset_override: None,
373 };
375 };
374
376
375 assert_eq!(entry.compressed_len(), 1)
377 assert_eq!(entry.compressed_len(), 1)
376 }
378 }
377
379
378 #[test]
380 #[test]
379 fn test_uncompressed_len() {
381 fn test_uncompressed_len() {
380 let bytes = IndexEntryBuilder::new().with_uncompressed_len(1).build();
382 let bytes = IndexEntryBuilder::new().with_uncompressed_len(1).build();
381 let entry = IndexEntry {
383 let entry = IndexEntry {
382 bytes: &bytes,
384 bytes: &bytes,
383 offset_override: None,
385 offset_override: None,
384 };
386 };
385
387
386 assert_eq!(entry.uncompressed_len(), 1)
388 assert_eq!(entry.uncompressed_len(), 1)
387 }
389 }
388
390
389 #[test]
391 #[test]
390 fn test_base_revision() {
392 fn test_base_revision() {
391 let bytes = IndexEntryBuilder::new().with_base_revision(1).build();
393 let bytes = IndexEntryBuilder::new().with_base_revision(1).build();
392 let entry = IndexEntry {
394 let entry = IndexEntry {
393 bytes: &bytes,
395 bytes: &bytes,
394 offset_override: None,
396 offset_override: None,
395 };
397 };
396
398
397 assert_eq!(entry.base_revision(), 1)
399 assert_eq!(entry.base_revision(), 1)
398 }
400 }
399 }
401 }
400
402
401 #[cfg(test)]
403 #[cfg(test)]
402 pub use tests::IndexEntryBuilder;
404 pub use tests::IndexEntryBuilder;
@@ -1,384 +1,398 b''
1 // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
1 // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
2 //
2 //
3 // This software may be used and distributed according to the terms of the
3 // This software may be used and distributed according to the terms of the
4 // GNU General Public License version 2 or any later version.
4 // GNU General Public License version 2 or any later version.
5
5
6 //! Definitions and utilities for Revision nodes
6 //! Definitions and utilities for Revision nodes
7 //!
7 //!
8 //! In Mercurial code base, it is customary to call "a node" the binary SHA
8 //! In Mercurial code base, it is customary to call "a node" the binary SHA
9 //! of a revision.
9 //! of a revision.
10
10
11 use crate::errors::HgError;
11 use bytes_cast::BytesCast;
12 use bytes_cast::BytesCast;
12 use std::convert::{TryFrom, TryInto};
13 use std::convert::{TryFrom, TryInto};
13 use std::fmt;
14 use std::fmt;
14
15
15 /// The length in bytes of a `Node`
16 /// The length in bytes of a `Node`
16 ///
17 ///
17 /// This constant is meant to ease refactors of this module, and
18 /// This constant is meant to ease refactors of this module, and
18 /// are private so that calling code does not expect all nodes have
19 /// are private so that calling code does not expect all nodes have
19 /// the same size, should we support several formats concurrently in
20 /// the same size, should we support several formats concurrently in
20 /// the future.
21 /// the future.
21 pub const NODE_BYTES_LENGTH: usize = 20;
22 pub const NODE_BYTES_LENGTH: usize = 20;
22
23
23 /// Id of the null node.
24 /// Id of the null node.
24 ///
25 ///
25 /// Used to indicate the absence of node.
26 /// Used to indicate the absence of node.
26 pub const NULL_NODE_ID: [u8; NODE_BYTES_LENGTH] = [0u8; NODE_BYTES_LENGTH];
27 pub const NULL_NODE_ID: [u8; NODE_BYTES_LENGTH] = [0u8; NODE_BYTES_LENGTH];
27
28
28 /// The length in bytes of a `Node`
29 /// The length in bytes of a `Node`
29 ///
30 ///
30 /// see also `NODES_BYTES_LENGTH` about it being private.
31 /// see also `NODES_BYTES_LENGTH` about it being private.
31 const NODE_NYBBLES_LENGTH: usize = 2 * NODE_BYTES_LENGTH;
32 const NODE_NYBBLES_LENGTH: usize = 2 * NODE_BYTES_LENGTH;
32
33
33 /// Private alias for readability and to ease future change
34 /// Private alias for readability and to ease future change
34 type NodeData = [u8; NODE_BYTES_LENGTH];
35 type NodeData = [u8; NODE_BYTES_LENGTH];
35
36
36 /// Binary revision SHA
37 /// Binary revision SHA
37 ///
38 ///
38 /// ## Future changes of hash size
39 /// ## Future changes of hash size
39 ///
40 ///
40 /// To accomodate future changes of hash size, Rust callers
41 /// To accomodate future changes of hash size, Rust callers
41 /// should use the conversion methods at the boundaries (FFI, actual
42 /// should use the conversion methods at the boundaries (FFI, actual
42 /// computation of hashes and I/O) only, and only if required.
43 /// computation of hashes and I/O) only, and only if required.
43 ///
44 ///
44 /// All other callers outside of unit tests should just handle `Node` values
45 /// All other callers outside of unit tests should just handle `Node` values
45 /// and never make any assumption on the actual length, using [`nybbles_len`]
46 /// and never make any assumption on the actual length, using [`nybbles_len`]
46 /// if they need a loop boundary.
47 /// if they need a loop boundary.
47 ///
48 ///
48 /// All methods that create a `Node` either take a type that enforces
49 /// All methods that create a `Node` either take a type that enforces
49 /// the size or return an error at runtime.
50 /// the size or return an error at runtime.
50 ///
51 ///
51 /// [`nybbles_len`]: #method.nybbles_len
52 /// [`nybbles_len`]: #method.nybbles_len
52 #[derive(Copy, Clone, Debug, PartialEq, BytesCast, derive_more::From)]
53 #[derive(Copy, Clone, Debug, PartialEq, BytesCast, derive_more::From)]
53 #[repr(transparent)]
54 #[repr(transparent)]
54 pub struct Node {
55 pub struct Node {
55 data: NodeData,
56 data: NodeData,
56 }
57 }
57
58
58 /// The node value for NULL_REVISION
59 /// The node value for NULL_REVISION
59 pub const NULL_NODE: Node = Node {
60 pub const NULL_NODE: Node = Node {
60 data: [0; NODE_BYTES_LENGTH],
61 data: [0; NODE_BYTES_LENGTH],
61 };
62 };
62
63
63 /// Return an error if the slice has an unexpected length
64 /// Return an error if the slice has an unexpected length
64 impl<'a> TryFrom<&'a [u8]> for &'a Node {
65 impl<'a> TryFrom<&'a [u8]> for &'a Node {
65 type Error = ();
66 type Error = ();
66
67
67 #[inline]
68 #[inline]
68 fn try_from(bytes: &'a [u8]) -> Result<Self, Self::Error> {
69 fn try_from(bytes: &'a [u8]) -> Result<Self, Self::Error> {
69 match Node::from_bytes(bytes) {
70 match Node::from_bytes(bytes) {
70 Ok((node, rest)) if rest.is_empty() => Ok(node),
71 Ok((node, rest)) if rest.is_empty() => Ok(node),
71 _ => Err(()),
72 _ => Err(()),
72 }
73 }
73 }
74 }
74 }
75 }
75
76
76 /// Return an error if the slice has an unexpected length
77 /// Return an error if the slice has an unexpected length
77 impl TryFrom<&'_ [u8]> for Node {
78 impl TryFrom<&'_ [u8]> for Node {
78 type Error = std::array::TryFromSliceError;
79 type Error = std::array::TryFromSliceError;
79
80
80 #[inline]
81 #[inline]
81 fn try_from(bytes: &'_ [u8]) -> Result<Self, Self::Error> {
82 fn try_from(bytes: &'_ [u8]) -> Result<Self, Self::Error> {
82 let data = bytes.try_into()?;
83 let data = bytes.try_into()?;
83 Ok(Self { data })
84 Ok(Self { data })
84 }
85 }
85 }
86 }
86
87
87 impl fmt::LowerHex for Node {
88 impl fmt::LowerHex for Node {
88 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
89 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
89 for &byte in &self.data {
90 for &byte in &self.data {
90 write!(f, "{:02x}", byte)?
91 write!(f, "{:02x}", byte)?
91 }
92 }
92 Ok(())
93 Ok(())
93 }
94 }
94 }
95 }
95
96
96 #[derive(Debug)]
97 #[derive(Debug)]
97 pub struct FromHexError;
98 pub struct FromHexError;
98
99
99 /// Low level utility function, also for prefixes
100 /// Low level utility function, also for prefixes
100 fn get_nybble(s: &[u8], i: usize) -> u8 {
101 fn get_nybble(s: &[u8], i: usize) -> u8 {
101 if i % 2 == 0 {
102 if i % 2 == 0 {
102 s[i / 2] >> 4
103 s[i / 2] >> 4
103 } else {
104 } else {
104 s[i / 2] & 0x0f
105 s[i / 2] & 0x0f
105 }
106 }
106 }
107 }
107
108
108 impl Node {
109 impl Node {
109 /// Retrieve the `i`th half-byte of the binary data.
110 /// Retrieve the `i`th half-byte of the binary data.
110 ///
111 ///
111 /// This is also the `i`th hexadecimal digit in numeric form,
112 /// This is also the `i`th hexadecimal digit in numeric form,
112 /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
113 /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
113 pub fn get_nybble(&self, i: usize) -> u8 {
114 pub fn get_nybble(&self, i: usize) -> u8 {
114 get_nybble(&self.data, i)
115 get_nybble(&self.data, i)
115 }
116 }
116
117
117 /// Length of the data, in nybbles
118 /// Length of the data, in nybbles
118 pub fn nybbles_len(&self) -> usize {
119 pub fn nybbles_len(&self) -> usize {
119 // public exposure as an instance method only, so that we can
120 // public exposure as an instance method only, so that we can
120 // easily support several sizes of hashes if needed in the future.
121 // easily support several sizes of hashes if needed in the future.
121 NODE_NYBBLES_LENGTH
122 NODE_NYBBLES_LENGTH
122 }
123 }
123
124
124 /// Convert from hexadecimal string representation
125 /// Convert from hexadecimal string representation
125 ///
126 ///
126 /// Exact length is required.
127 /// Exact length is required.
127 ///
128 ///
128 /// To be used in FFI and I/O only, in order to facilitate future
129 /// To be used in FFI and I/O only, in order to facilitate future
129 /// changes of hash format.
130 /// changes of hash format.
130 pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Node, FromHexError> {
131 pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Node, FromHexError> {
131 let prefix = NodePrefix::from_hex(hex)?;
132 let prefix = NodePrefix::from_hex(hex)?;
132 if prefix.nybbles_len() == NODE_NYBBLES_LENGTH {
133 if prefix.nybbles_len() == NODE_NYBBLES_LENGTH {
133 Ok(Self { data: prefix.data })
134 Ok(Self { data: prefix.data })
134 } else {
135 } else {
135 Err(FromHexError)
136 Err(FromHexError)
136 }
137 }
137 }
138 }
138
139
140 /// `from_hex`, but for input from an internal file of the repository such
141 /// as a changelog or manifest entry.
142 ///
143 /// An error is treated as repository corruption.
144 pub fn from_hex_for_repo(hex: impl AsRef<[u8]>) -> Result<Node, HgError> {
145 Self::from_hex(hex.as_ref()).map_err(|FromHexError| {
146 HgError::CorruptedRepository(format!(
147 "Expected a full hexadecimal node ID, found {}",
148 String::from_utf8_lossy(hex.as_ref())
149 ))
150 })
151 }
152
139 /// Provide access to binary data
153 /// Provide access to binary data
140 ///
154 ///
141 /// This is needed by FFI layers, for instance to return expected
155 /// This is needed by FFI layers, for instance to return expected
142 /// binary values to Python.
156 /// binary values to Python.
143 pub fn as_bytes(&self) -> &[u8] {
157 pub fn as_bytes(&self) -> &[u8] {
144 &self.data
158 &self.data
145 }
159 }
146 }
160 }
147
161
148 /// The beginning of a binary revision SHA.
162 /// The beginning of a binary revision SHA.
149 ///
163 ///
150 /// Since it can potentially come from an hexadecimal representation with
164 /// Since it can potentially come from an hexadecimal representation with
151 /// odd length, it needs to carry around whether the last 4 bits are relevant
165 /// odd length, it needs to carry around whether the last 4 bits are relevant
152 /// or not.
166 /// or not.
153 #[derive(Debug, PartialEq, Copy, Clone)]
167 #[derive(Debug, PartialEq, Copy, Clone)]
154 pub struct NodePrefix {
168 pub struct NodePrefix {
155 /// In `1..=NODE_NYBBLES_LENGTH`
169 /// In `1..=NODE_NYBBLES_LENGTH`
156 nybbles_len: u8,
170 nybbles_len: u8,
157 /// The first `4 * length_in_nybbles` bits are used (considering bits
171 /// The first `4 * length_in_nybbles` bits are used (considering bits
158 /// within a bytes in big-endian: most significant first), the rest
172 /// within a bytes in big-endian: most significant first), the rest
159 /// are zero.
173 /// are zero.
160 data: NodeData,
174 data: NodeData,
161 }
175 }
162
176
163 impl NodePrefix {
177 impl NodePrefix {
164 /// Convert from hexadecimal string representation
178 /// Convert from hexadecimal string representation
165 ///
179 ///
166 /// Similarly to `hex::decode`, can be used with Unicode string types
180 /// Similarly to `hex::decode`, can be used with Unicode string types
167 /// (`String`, `&str`) as well as bytes.
181 /// (`String`, `&str`) as well as bytes.
168 ///
182 ///
169 /// To be used in FFI and I/O only, in order to facilitate future
183 /// To be used in FFI and I/O only, in order to facilitate future
170 /// changes of hash format.
184 /// changes of hash format.
171 pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Self, FromHexError> {
185 pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Self, FromHexError> {
172 let hex = hex.as_ref();
186 let hex = hex.as_ref();
173 let len = hex.len();
187 let len = hex.len();
174 if len > NODE_NYBBLES_LENGTH || len == 0 {
188 if len > NODE_NYBBLES_LENGTH || len == 0 {
175 return Err(FromHexError);
189 return Err(FromHexError);
176 }
190 }
177
191
178 let mut data = [0; NODE_BYTES_LENGTH];
192 let mut data = [0; NODE_BYTES_LENGTH];
179 let mut nybbles_len = 0;
193 let mut nybbles_len = 0;
180 for &ascii_byte in hex {
194 for &ascii_byte in hex {
181 let nybble = match char::from(ascii_byte).to_digit(16) {
195 let nybble = match char::from(ascii_byte).to_digit(16) {
182 Some(digit) => digit as u8,
196 Some(digit) => digit as u8,
183 None => return Err(FromHexError),
197 None => return Err(FromHexError),
184 };
198 };
185 // Fill in the upper half of a byte first, then the lower half.
199 // Fill in the upper half of a byte first, then the lower half.
186 let shift = if nybbles_len % 2 == 0 { 4 } else { 0 };
200 let shift = if nybbles_len % 2 == 0 { 4 } else { 0 };
187 data[nybbles_len as usize / 2] |= nybble << shift;
201 data[nybbles_len as usize / 2] |= nybble << shift;
188 nybbles_len += 1;
202 nybbles_len += 1;
189 }
203 }
190 Ok(Self { data, nybbles_len })
204 Ok(Self { data, nybbles_len })
191 }
205 }
192
206
193 pub fn nybbles_len(&self) -> usize {
207 pub fn nybbles_len(&self) -> usize {
194 self.nybbles_len as _
208 self.nybbles_len as _
195 }
209 }
196
210
197 pub fn is_prefix_of(&self, node: &Node) -> bool {
211 pub fn is_prefix_of(&self, node: &Node) -> bool {
198 let full_bytes = self.nybbles_len() / 2;
212 let full_bytes = self.nybbles_len() / 2;
199 if self.data[..full_bytes] != node.data[..full_bytes] {
213 if self.data[..full_bytes] != node.data[..full_bytes] {
200 return false;
214 return false;
201 }
215 }
202 if self.nybbles_len() % 2 == 0 {
216 if self.nybbles_len() % 2 == 0 {
203 return true;
217 return true;
204 }
218 }
205 let last = self.nybbles_len() - 1;
219 let last = self.nybbles_len() - 1;
206 self.get_nybble(last) == node.get_nybble(last)
220 self.get_nybble(last) == node.get_nybble(last)
207 }
221 }
208
222
209 /// Retrieve the `i`th half-byte from the prefix.
223 /// Retrieve the `i`th half-byte from the prefix.
210 ///
224 ///
211 /// This is also the `i`th hexadecimal digit in numeric form,
225 /// This is also the `i`th hexadecimal digit in numeric form,
212 /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
226 /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
213 pub fn get_nybble(&self, i: usize) -> u8 {
227 pub fn get_nybble(&self, i: usize) -> u8 {
214 assert!(i < self.nybbles_len());
228 assert!(i < self.nybbles_len());
215 get_nybble(&self.data, i)
229 get_nybble(&self.data, i)
216 }
230 }
217
231
218 fn iter_nybbles(&self) -> impl Iterator<Item = u8> + '_ {
232 fn iter_nybbles(&self) -> impl Iterator<Item = u8> + '_ {
219 (0..self.nybbles_len()).map(move |i| get_nybble(&self.data, i))
233 (0..self.nybbles_len()).map(move |i| get_nybble(&self.data, i))
220 }
234 }
221
235
222 /// Return the index first nybble that's different from `node`
236 /// Return the index first nybble that's different from `node`
223 ///
237 ///
224 /// If the return value is `None` that means that `self` is
238 /// If the return value is `None` that means that `self` is
225 /// a prefix of `node`, but the current method is a bit slower
239 /// a prefix of `node`, but the current method is a bit slower
226 /// than `is_prefix_of`.
240 /// than `is_prefix_of`.
227 ///
241 ///
228 /// Returned index is as in `get_nybble`, i.e., starting at 0.
242 /// Returned index is as in `get_nybble`, i.e., starting at 0.
229 pub fn first_different_nybble(&self, node: &Node) -> Option<usize> {
243 pub fn first_different_nybble(&self, node: &Node) -> Option<usize> {
230 self.iter_nybbles()
244 self.iter_nybbles()
231 .zip(NodePrefix::from(*node).iter_nybbles())
245 .zip(NodePrefix::from(*node).iter_nybbles())
232 .position(|(a, b)| a != b)
246 .position(|(a, b)| a != b)
233 }
247 }
234 }
248 }
235
249
236 impl fmt::LowerHex for NodePrefix {
250 impl fmt::LowerHex for NodePrefix {
237 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
251 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
238 let full_bytes = self.nybbles_len() / 2;
252 let full_bytes = self.nybbles_len() / 2;
239 for &byte in &self.data[..full_bytes] {
253 for &byte in &self.data[..full_bytes] {
240 write!(f, "{:02x}", byte)?
254 write!(f, "{:02x}", byte)?
241 }
255 }
242 if self.nybbles_len() % 2 == 1 {
256 if self.nybbles_len() % 2 == 1 {
243 let last = self.nybbles_len() - 1;
257 let last = self.nybbles_len() - 1;
244 write!(f, "{:x}", self.get_nybble(last))?
258 write!(f, "{:x}", self.get_nybble(last))?
245 }
259 }
246 Ok(())
260 Ok(())
247 }
261 }
248 }
262 }
249
263
250 /// A shortcut for full `Node` references
264 /// A shortcut for full `Node` references
251 impl From<&'_ Node> for NodePrefix {
265 impl From<&'_ Node> for NodePrefix {
252 fn from(node: &'_ Node) -> Self {
266 fn from(node: &'_ Node) -> Self {
253 NodePrefix {
267 NodePrefix {
254 nybbles_len: node.nybbles_len() as _,
268 nybbles_len: node.nybbles_len() as _,
255 data: node.data,
269 data: node.data,
256 }
270 }
257 }
271 }
258 }
272 }
259
273
260 /// A shortcut for full `Node` references
274 /// A shortcut for full `Node` references
261 impl From<Node> for NodePrefix {
275 impl From<Node> for NodePrefix {
262 fn from(node: Node) -> Self {
276 fn from(node: Node) -> Self {
263 NodePrefix {
277 NodePrefix {
264 nybbles_len: node.nybbles_len() as _,
278 nybbles_len: node.nybbles_len() as _,
265 data: node.data,
279 data: node.data,
266 }
280 }
267 }
281 }
268 }
282 }
269
283
270 impl PartialEq<Node> for NodePrefix {
284 impl PartialEq<Node> for NodePrefix {
271 fn eq(&self, other: &Node) -> bool {
285 fn eq(&self, other: &Node) -> bool {
272 Self::from(*other) == *self
286 Self::from(*other) == *self
273 }
287 }
274 }
288 }
275
289
276 #[cfg(test)]
290 #[cfg(test)]
277 mod tests {
291 mod tests {
278 use super::*;
292 use super::*;
279
293
280 const SAMPLE_NODE_HEX: &str = "0123456789abcdeffedcba9876543210deadbeef";
294 const SAMPLE_NODE_HEX: &str = "0123456789abcdeffedcba9876543210deadbeef";
281 const SAMPLE_NODE: Node = Node {
295 const SAMPLE_NODE: Node = Node {
282 data: [
296 data: [
283 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba,
297 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba,
284 0x98, 0x76, 0x54, 0x32, 0x10, 0xde, 0xad, 0xbe, 0xef,
298 0x98, 0x76, 0x54, 0x32, 0x10, 0xde, 0xad, 0xbe, 0xef,
285 ],
299 ],
286 };
300 };
287
301
288 /// Pad an hexadecimal string to reach `NODE_NYBBLES_LENGTH`
302 /// Pad an hexadecimal string to reach `NODE_NYBBLES_LENGTH`
289 /// The padding is made with zeros.
303 /// The padding is made with zeros.
290 pub fn hex_pad_right(hex: &str) -> String {
304 pub fn hex_pad_right(hex: &str) -> String {
291 let mut res = hex.to_string();
305 let mut res = hex.to_string();
292 while res.len() < NODE_NYBBLES_LENGTH {
306 while res.len() < NODE_NYBBLES_LENGTH {
293 res.push('0');
307 res.push('0');
294 }
308 }
295 res
309 res
296 }
310 }
297
311
298 #[test]
312 #[test]
299 fn test_node_from_hex() {
313 fn test_node_from_hex() {
300 let not_hex = "012... oops";
314 let not_hex = "012... oops";
301 let too_short = "0123";
315 let too_short = "0123";
302 let too_long = format!("{}0", SAMPLE_NODE_HEX);
316 let too_long = format!("{}0", SAMPLE_NODE_HEX);
303 assert_eq!(Node::from_hex(SAMPLE_NODE_HEX).unwrap(), SAMPLE_NODE);
317 assert_eq!(Node::from_hex(SAMPLE_NODE_HEX).unwrap(), SAMPLE_NODE);
304 assert!(Node::from_hex(not_hex).is_err());
318 assert!(Node::from_hex(not_hex).is_err());
305 assert!(Node::from_hex(too_short).is_err());
319 assert!(Node::from_hex(too_short).is_err());
306 assert!(Node::from_hex(&too_long).is_err());
320 assert!(Node::from_hex(&too_long).is_err());
307 }
321 }
308
322
309 #[test]
323 #[test]
310 fn test_node_encode_hex() {
324 fn test_node_encode_hex() {
311 assert_eq!(format!("{:x}", SAMPLE_NODE), SAMPLE_NODE_HEX);
325 assert_eq!(format!("{:x}", SAMPLE_NODE), SAMPLE_NODE_HEX);
312 }
326 }
313
327
314 #[test]
328 #[test]
315 fn test_prefix_from_to_hex() -> Result<(), FromHexError> {
329 fn test_prefix_from_to_hex() -> Result<(), FromHexError> {
316 assert_eq!(format!("{:x}", NodePrefix::from_hex("0e1")?), "0e1");
330 assert_eq!(format!("{:x}", NodePrefix::from_hex("0e1")?), "0e1");
317 assert_eq!(format!("{:x}", NodePrefix::from_hex("0e1a")?), "0e1a");
331 assert_eq!(format!("{:x}", NodePrefix::from_hex("0e1a")?), "0e1a");
318 assert_eq!(
332 assert_eq!(
319 format!("{:x}", NodePrefix::from_hex(SAMPLE_NODE_HEX)?),
333 format!("{:x}", NodePrefix::from_hex(SAMPLE_NODE_HEX)?),
320 SAMPLE_NODE_HEX
334 SAMPLE_NODE_HEX
321 );
335 );
322 Ok(())
336 Ok(())
323 }
337 }
324
338
325 #[test]
339 #[test]
326 fn test_prefix_from_hex_errors() {
340 fn test_prefix_from_hex_errors() {
327 assert!(NodePrefix::from_hex("testgr").is_err());
341 assert!(NodePrefix::from_hex("testgr").is_err());
328 let mut long = format!("{:x}", NULL_NODE);
342 let mut long = format!("{:x}", NULL_NODE);
329 long.push('c');
343 long.push('c');
330 assert!(NodePrefix::from_hex(&long).is_err())
344 assert!(NodePrefix::from_hex(&long).is_err())
331 }
345 }
332
346
333 #[test]
347 #[test]
334 fn test_is_prefix_of() -> Result<(), FromHexError> {
348 fn test_is_prefix_of() -> Result<(), FromHexError> {
335 let mut node_data = [0; NODE_BYTES_LENGTH];
349 let mut node_data = [0; NODE_BYTES_LENGTH];
336 node_data[0] = 0x12;
350 node_data[0] = 0x12;
337 node_data[1] = 0xca;
351 node_data[1] = 0xca;
338 let node = Node::from(node_data);
352 let node = Node::from(node_data);
339 assert!(NodePrefix::from_hex("12")?.is_prefix_of(&node));
353 assert!(NodePrefix::from_hex("12")?.is_prefix_of(&node));
340 assert!(!NodePrefix::from_hex("1a")?.is_prefix_of(&node));
354 assert!(!NodePrefix::from_hex("1a")?.is_prefix_of(&node));
341 assert!(NodePrefix::from_hex("12c")?.is_prefix_of(&node));
355 assert!(NodePrefix::from_hex("12c")?.is_prefix_of(&node));
342 assert!(!NodePrefix::from_hex("12d")?.is_prefix_of(&node));
356 assert!(!NodePrefix::from_hex("12d")?.is_prefix_of(&node));
343 Ok(())
357 Ok(())
344 }
358 }
345
359
346 #[test]
360 #[test]
347 fn test_get_nybble() -> Result<(), FromHexError> {
361 fn test_get_nybble() -> Result<(), FromHexError> {
348 let prefix = NodePrefix::from_hex("dead6789cafe")?;
362 let prefix = NodePrefix::from_hex("dead6789cafe")?;
349 assert_eq!(prefix.get_nybble(0), 13);
363 assert_eq!(prefix.get_nybble(0), 13);
350 assert_eq!(prefix.get_nybble(7), 9);
364 assert_eq!(prefix.get_nybble(7), 9);
351 Ok(())
365 Ok(())
352 }
366 }
353
367
354 #[test]
368 #[test]
355 fn test_first_different_nybble_even_prefix() {
369 fn test_first_different_nybble_even_prefix() {
356 let prefix = NodePrefix::from_hex("12ca").unwrap();
370 let prefix = NodePrefix::from_hex("12ca").unwrap();
357 let mut node = Node::from([0; NODE_BYTES_LENGTH]);
371 let mut node = Node::from([0; NODE_BYTES_LENGTH]);
358 assert_eq!(prefix.first_different_nybble(&node), Some(0));
372 assert_eq!(prefix.first_different_nybble(&node), Some(0));
359 node.data[0] = 0x13;
373 node.data[0] = 0x13;
360 assert_eq!(prefix.first_different_nybble(&node), Some(1));
374 assert_eq!(prefix.first_different_nybble(&node), Some(1));
361 node.data[0] = 0x12;
375 node.data[0] = 0x12;
362 assert_eq!(prefix.first_different_nybble(&node), Some(2));
376 assert_eq!(prefix.first_different_nybble(&node), Some(2));
363 node.data[1] = 0xca;
377 node.data[1] = 0xca;
364 // now it is a prefix
378 // now it is a prefix
365 assert_eq!(prefix.first_different_nybble(&node), None);
379 assert_eq!(prefix.first_different_nybble(&node), None);
366 }
380 }
367
381
368 #[test]
382 #[test]
369 fn test_first_different_nybble_odd_prefix() {
383 fn test_first_different_nybble_odd_prefix() {
370 let prefix = NodePrefix::from_hex("12c").unwrap();
384 let prefix = NodePrefix::from_hex("12c").unwrap();
371 let mut node = Node::from([0; NODE_BYTES_LENGTH]);
385 let mut node = Node::from([0; NODE_BYTES_LENGTH]);
372 assert_eq!(prefix.first_different_nybble(&node), Some(0));
386 assert_eq!(prefix.first_different_nybble(&node), Some(0));
373 node.data[0] = 0x13;
387 node.data[0] = 0x13;
374 assert_eq!(prefix.first_different_nybble(&node), Some(1));
388 assert_eq!(prefix.first_different_nybble(&node), Some(1));
375 node.data[0] = 0x12;
389 node.data[0] = 0x12;
376 assert_eq!(prefix.first_different_nybble(&node), Some(2));
390 assert_eq!(prefix.first_different_nybble(&node), Some(2));
377 node.data[1] = 0xca;
391 node.data[1] = 0xca;
378 // now it is a prefix
392 // now it is a prefix
379 assert_eq!(prefix.first_different_nybble(&node), None);
393 assert_eq!(prefix.first_different_nybble(&node), None);
380 }
394 }
381 }
395 }
382
396
383 #[cfg(test)]
397 #[cfg(test)]
384 pub use tests::hex_pad_right;
398 pub use tests::hex_pad_right;
@@ -1,105 +1,110 b''
1 use crate::errors::{HgError, HgResultExt};
1 use bytes_cast::{unaligned, BytesCast};
2 use bytes_cast::{unaligned, BytesCast};
2 use memmap::Mmap;
3 use memmap::Mmap;
3 use std::path::{Path, PathBuf};
4 use std::path::{Path, PathBuf};
4
5
5 use super::revlog::RevlogError;
6 use super::revlog::RevlogError;
6 use crate::repo::Repo;
7 use crate::repo::Repo;
7 use crate::utils::strip_suffix;
8 use crate::utils::strip_suffix;
8
9
9 const ONDISK_VERSION: u8 = 1;
10 const ONDISK_VERSION: u8 = 1;
10
11
11 pub(super) struct NodeMapDocket {
12 pub(super) struct NodeMapDocket {
12 pub data_length: usize,
13 pub data_length: usize,
13 // TODO: keep here more of the data from `parse()` when we need it
14 // TODO: keep here more of the data from `parse()` when we need it
14 }
15 }
15
16
16 #[derive(BytesCast)]
17 #[derive(BytesCast)]
17 #[repr(C)]
18 #[repr(C)]
18 struct DocketHeader {
19 struct DocketHeader {
19 uid_size: u8,
20 uid_size: u8,
20 _tip_rev: unaligned::U64Be,
21 _tip_rev: unaligned::U64Be,
21 data_length: unaligned::U64Be,
22 data_length: unaligned::U64Be,
22 _data_unused: unaligned::U64Be,
23 _data_unused: unaligned::U64Be,
23 tip_node_size: unaligned::U64Be,
24 tip_node_size: unaligned::U64Be,
24 }
25 }
25
26
26 impl NodeMapDocket {
27 impl NodeMapDocket {
27 /// Return `Ok(None)` when the caller should proceed without a persistent
28 /// Return `Ok(None)` when the caller should proceed without a persistent
28 /// nodemap:
29 /// nodemap:
29 ///
30 ///
30 /// * This revlog does not have a `.n` docket file (it is not generated for
31 /// * This revlog does not have a `.n` docket file (it is not generated for
31 /// small revlogs), or
32 /// small revlogs), or
32 /// * The docket has an unsupported version number (repositories created by
33 /// * The docket has an unsupported version number (repositories created by
33 /// later hg, maybe that should be a requirement instead?), or
34 /// later hg, maybe that should be a requirement instead?), or
34 /// * The docket file points to a missing (likely deleted) data file (this
35 /// * The docket file points to a missing (likely deleted) data file (this
35 /// can happen in a rare race condition).
36 /// can happen in a rare race condition).
36 pub fn read_from_file(
37 pub fn read_from_file(
37 repo: &Repo,
38 repo: &Repo,
38 index_path: &Path,
39 index_path: &Path,
39 ) -> Result<Option<(Self, Mmap)>, RevlogError> {
40 ) -> Result<Option<(Self, Mmap)>, RevlogError> {
40 let docket_path = index_path.with_extension("n");
41 let docket_path = index_path.with_extension("n");
41 let docket_bytes = match repo.store_vfs().read(&docket_path) {
42 let docket_bytes = if let Some(bytes) =
42 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
43 repo.store_vfs().read(&docket_path).io_not_found_as_none()?
43 return Ok(None)
44 {
44 }
45 bytes
45 Err(e) => return Err(RevlogError::IoError(e)),
46 } else {
46 Ok(bytes) => bytes,
47 return Ok(None);
47 };
48 };
48
49
49 let input = if let Some((&ONDISK_VERSION, rest)) =
50 let input = if let Some((&ONDISK_VERSION, rest)) =
50 docket_bytes.split_first()
51 docket_bytes.split_first()
51 {
52 {
52 rest
53 rest
53 } else {
54 } else {
54 return Ok(None);
55 return Ok(None);
55 };
56 };
56
57
57 let (header, rest) = DocketHeader::from_bytes(input)?;
58 /// Treat any error as a parse error
59 fn parse<T, E>(result: Result<T, E>) -> Result<T, RevlogError> {
60 result.map_err(|_| {
61 HgError::corrupted("nodemap docket parse error").into()
62 })
63 }
64
65 let (header, rest) = parse(DocketHeader::from_bytes(input))?;
58 let uid_size = header.uid_size as usize;
66 let uid_size = header.uid_size as usize;
59 // TODO: do we care about overflow for 4 GB+ nodemap files on 32-bit
67 // TODO: do we care about overflow for 4 GB+ nodemap files on 32-bit
60 // systems?
68 // systems?
61 let tip_node_size = header.tip_node_size.get() as usize;
69 let tip_node_size = header.tip_node_size.get() as usize;
62 let data_length = header.data_length.get() as usize;
70 let data_length = header.data_length.get() as usize;
63 let (uid, rest) = u8::slice_from_bytes(rest, uid_size)?;
71 let (uid, rest) = parse(u8::slice_from_bytes(rest, uid_size))?;
64 let (_tip_node, _rest) = u8::slice_from_bytes(rest, tip_node_size)?;
72 let (_tip_node, _rest) =
65 let uid =
73 parse(u8::slice_from_bytes(rest, tip_node_size))?;
66 std::str::from_utf8(uid).map_err(|_| RevlogError::Corrupted)?;
74 let uid = parse(std::str::from_utf8(uid))?;
67 let docket = NodeMapDocket { data_length };
75 let docket = NodeMapDocket { data_length };
68
76
69 let data_path = rawdata_path(&docket_path, uid);
77 let data_path = rawdata_path(&docket_path, uid);
70 // TODO: use `std::fs::read` here when the `persistent-nodemap.mmap`
78 // TODO: use `vfs.read()` here when the `persistent-nodemap.mmap`
71 // config is false?
79 // config is false?
72 match repo.store_vfs().mmap_open(&data_path) {
80 if let Some(mmap) = repo
73 Ok(mmap) => {
81 .store_vfs()
74 if mmap.len() >= data_length {
82 .mmap_open(&data_path)
75 Ok(Some((docket, mmap)))
83 .io_not_found_as_none()?
76 } else {
84 {
77 Err(RevlogError::Corrupted)
85 if mmap.len() >= data_length {
78 }
86 Ok(Some((docket, mmap)))
87 } else {
88 Err(HgError::corrupted("persistent nodemap too short").into())
79 }
89 }
80 Err(error) => {
90 } else {
81 if error.kind() == std::io::ErrorKind::NotFound {
91 Ok(None)
82 Ok(None)
83 } else {
84 Err(RevlogError::IoError(error))
85 }
86 }
87 }
92 }
88 }
93 }
89 }
94 }
90
95
91 fn rawdata_path(docket_path: &Path, uid: &str) -> PathBuf {
96 fn rawdata_path(docket_path: &Path, uid: &str) -> PathBuf {
92 let docket_name = docket_path
97 let docket_name = docket_path
93 .file_name()
98 .file_name()
94 .expect("expected a base name")
99 .expect("expected a base name")
95 .to_str()
100 .to_str()
96 .expect("expected an ASCII file name in the store");
101 .expect("expected an ASCII file name in the store");
97 let prefix = strip_suffix(docket_name, ".n.a")
102 let prefix = strip_suffix(docket_name, ".n.a")
98 .or_else(|| strip_suffix(docket_name, ".n"))
103 .or_else(|| strip_suffix(docket_name, ".n"))
99 .expect("expected docket path in .n or .n.a");
104 .expect("expected docket path in .n or .n.a");
100 let name = format!("{}-{}.nd", prefix, uid);
105 let name = format!("{}-{}.nd", prefix, uid);
101 docket_path
106 docket_path
102 .parent()
107 .parent()
103 .expect("expected a non-root path")
108 .expect("expected a non-root path")
104 .join(name)
109 .join(name)
105 }
110 }
@@ -1,387 +1,393 b''
1 use std::borrow::Cow;
1 use std::borrow::Cow;
2 use std::io::Read;
2 use std::io::Read;
3 use std::ops::Deref;
3 use std::ops::Deref;
4 use std::path::Path;
4 use std::path::Path;
5
5
6 use byteorder::{BigEndian, ByteOrder};
6 use byteorder::{BigEndian, ByteOrder};
7 use crypto::digest::Digest;
7 use crypto::digest::Digest;
8 use crypto::sha1::Sha1;
8 use crypto::sha1::Sha1;
9 use flate2::read::ZlibDecoder;
9 use flate2::read::ZlibDecoder;
10 use micro_timer::timed;
10 use micro_timer::timed;
11 use zstd;
11 use zstd;
12
12
13 use super::index::Index;
13 use super::index::Index;
14 use super::node::{NodePrefix, NODE_BYTES_LENGTH, NULL_NODE};
14 use super::node::{NodePrefix, NODE_BYTES_LENGTH, NULL_NODE};
15 use super::nodemap;
15 use super::nodemap;
16 use super::nodemap::NodeMap;
16 use super::nodemap::{NodeMap, NodeMapError};
17 use super::nodemap_docket::NodeMapDocket;
17 use super::nodemap_docket::NodeMapDocket;
18 use super::patch;
18 use super::patch;
19 use crate::errors::HgError;
19 use crate::repo::Repo;
20 use crate::repo::Repo;
20 use crate::revlog::Revision;
21 use crate::revlog::Revision;
21
22
23 #[derive(derive_more::From)]
22 pub enum RevlogError {
24 pub enum RevlogError {
23 IoError(std::io::Error),
24 UnsuportedVersion(u16),
25 InvalidRevision,
25 InvalidRevision,
26 /// Found more than one entry whose ID match the requested prefix
26 /// Found more than one entry whose ID match the requested prefix
27 AmbiguousPrefix,
27 AmbiguousPrefix,
28 Corrupted,
28 #[from]
29 UnknowDataFormat(u8),
29 Other(HgError),
30 }
30 }
31
31
32 impl From<bytes_cast::FromBytesError> for RevlogError {
32 impl From<NodeMapError> for RevlogError {
33 fn from(_: bytes_cast::FromBytesError) -> Self {
33 fn from(error: NodeMapError) -> Self {
34 RevlogError::Corrupted
34 match error {
35 NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
36 NodeMapError::RevisionNotInIndex(_) => RevlogError::corrupted(),
37 }
38 }
39 }
40
41 impl RevlogError {
42 fn corrupted() -> Self {
43 RevlogError::Other(HgError::corrupted("corrupted revlog"))
35 }
44 }
36 }
45 }
37
46
38 /// Read only implementation of revlog.
47 /// Read only implementation of revlog.
39 pub struct Revlog {
48 pub struct Revlog {
40 /// When index and data are not interleaved: bytes of the revlog index.
49 /// When index and data are not interleaved: bytes of the revlog index.
41 /// When index and data are interleaved: bytes of the revlog index and
50 /// When index and data are interleaved: bytes of the revlog index and
42 /// data.
51 /// data.
43 index: Index,
52 index: Index,
44 /// When index and data are not interleaved: bytes of the revlog data
53 /// When index and data are not interleaved: bytes of the revlog data
45 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
54 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
46 /// When present on disk: the persistent nodemap for this revlog
55 /// When present on disk: the persistent nodemap for this revlog
47 nodemap: Option<nodemap::NodeTree>,
56 nodemap: Option<nodemap::NodeTree>,
48 }
57 }
49
58
50 impl Revlog {
59 impl Revlog {
51 /// Open a revlog index file.
60 /// Open a revlog index file.
52 ///
61 ///
53 /// It will also open the associated data file if index and data are not
62 /// It will also open the associated data file if index and data are not
54 /// interleaved.
63 /// interleaved.
55 #[timed]
64 #[timed]
56 pub fn open(
65 pub fn open(
57 repo: &Repo,
66 repo: &Repo,
58 index_path: impl AsRef<Path>,
67 index_path: impl AsRef<Path>,
59 data_path: Option<&Path>,
68 data_path: Option<&Path>,
60 ) -> Result<Self, RevlogError> {
69 ) -> Result<Self, RevlogError> {
61 let index_path = index_path.as_ref();
70 let index_path = index_path.as_ref();
62 let index_mmap = repo
71 let index_mmap = repo.store_vfs().mmap_open(&index_path)?;
63 .store_vfs()
64 .mmap_open(&index_path)
65 .map_err(RevlogError::IoError)?;
66
72
67 let version = get_version(&index_mmap);
73 let version = get_version(&index_mmap);
68 if version != 1 {
74 if version != 1 {
69 return Err(RevlogError::UnsuportedVersion(version));
75 // A proper new version should have had a repo/store requirement.
76 return Err(RevlogError::corrupted());
70 }
77 }
71
78
72 let index = Index::new(Box::new(index_mmap))?;
79 let index = Index::new(Box::new(index_mmap))?;
73
80
74 let default_data_path = index_path.with_extension("d");
81 let default_data_path = index_path.with_extension("d");
75
82
76 // type annotation required
83 // type annotation required
77 // won't recognize Mmap as Deref<Target = [u8]>
84 // won't recognize Mmap as Deref<Target = [u8]>
78 let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
85 let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
79 if index.is_inline() {
86 if index.is_inline() {
80 None
87 None
81 } else {
88 } else {
82 let data_path = data_path.unwrap_or(&default_data_path);
89 let data_path = data_path.unwrap_or(&default_data_path);
83 let data_mmap = repo
90 let data_mmap = repo.store_vfs().mmap_open(data_path)?;
84 .store_vfs()
85 .mmap_open(data_path)
86 .map_err(RevlogError::IoError)?;
87 Some(Box::new(data_mmap))
91 Some(Box::new(data_mmap))
88 };
92 };
89
93
90 let nodemap = NodeMapDocket::read_from_file(repo, index_path)?.map(
94 let nodemap = NodeMapDocket::read_from_file(repo, index_path)?.map(
91 |(docket, data)| {
95 |(docket, data)| {
92 nodemap::NodeTree::load_bytes(
96 nodemap::NodeTree::load_bytes(
93 Box::new(data),
97 Box::new(data),
94 docket.data_length,
98 docket.data_length,
95 )
99 )
96 },
100 },
97 );
101 );
98
102
99 Ok(Revlog {
103 Ok(Revlog {
100 index,
104 index,
101 data_bytes,
105 data_bytes,
102 nodemap,
106 nodemap,
103 })
107 })
104 }
108 }
105
109
106 /// Return number of entries of the `Revlog`.
110 /// Return number of entries of the `Revlog`.
107 pub fn len(&self) -> usize {
111 pub fn len(&self) -> usize {
108 self.index.len()
112 self.index.len()
109 }
113 }
110
114
111 /// Returns `true` if the `Revlog` has zero `entries`.
115 /// Returns `true` if the `Revlog` has zero `entries`.
112 pub fn is_empty(&self) -> bool {
116 pub fn is_empty(&self) -> bool {
113 self.index.is_empty()
117 self.index.is_empty()
114 }
118 }
115
119
116 /// Return the full data associated to a node.
120 /// Return the full data associated to a node.
117 #[timed]
121 #[timed]
118 pub fn get_node_rev(
122 pub fn get_node_rev(
119 &self,
123 &self,
120 node: NodePrefix,
124 node: NodePrefix,
121 ) -> Result<Revision, RevlogError> {
125 ) -> Result<Revision, RevlogError> {
122 if let Some(nodemap) = &self.nodemap {
126 if let Some(nodemap) = &self.nodemap {
123 return nodemap
127 return nodemap
124 .find_bin(&self.index, node)
128 .find_bin(&self.index, node)?
125 // TODO: propagate details of this error:
126 .map_err(|_| RevlogError::Corrupted)?
127 .ok_or(RevlogError::InvalidRevision);
129 .ok_or(RevlogError::InvalidRevision);
128 }
130 }
129
131
130 // Fallback to linear scan when a persistent nodemap is not present.
132 // Fallback to linear scan when a persistent nodemap is not present.
131 // This happens when the persistent-nodemap experimental feature is not
133 // This happens when the persistent-nodemap experimental feature is not
132 // enabled, or for small revlogs.
134 // enabled, or for small revlogs.
133 //
135 //
134 // TODO: consider building a non-persistent nodemap in memory to
136 // TODO: consider building a non-persistent nodemap in memory to
135 // optimize these cases.
137 // optimize these cases.
136 let mut found_by_prefix = None;
138 let mut found_by_prefix = None;
137 for rev in (0..self.len() as Revision).rev() {
139 for rev in (0..self.len() as Revision).rev() {
138 let index_entry =
140 let index_entry =
139 self.index.get_entry(rev).ok_or(RevlogError::Corrupted)?;
141 self.index.get_entry(rev).ok_or(HgError::corrupted(
142 "revlog references a revision not in the index",
143 ))?;
140 if node == *index_entry.hash() {
144 if node == *index_entry.hash() {
141 return Ok(rev);
145 return Ok(rev);
142 }
146 }
143 if node.is_prefix_of(index_entry.hash()) {
147 if node.is_prefix_of(index_entry.hash()) {
144 if found_by_prefix.is_some() {
148 if found_by_prefix.is_some() {
145 return Err(RevlogError::AmbiguousPrefix);
149 return Err(RevlogError::AmbiguousPrefix);
146 }
150 }
147 found_by_prefix = Some(rev)
151 found_by_prefix = Some(rev)
148 }
152 }
149 }
153 }
150 found_by_prefix.ok_or(RevlogError::InvalidRevision)
154 found_by_prefix.ok_or(RevlogError::InvalidRevision)
151 }
155 }
152
156
153 /// Returns whether the given revision exists in this revlog.
157 /// Returns whether the given revision exists in this revlog.
154 pub fn has_rev(&self, rev: Revision) -> bool {
158 pub fn has_rev(&self, rev: Revision) -> bool {
155 self.index.get_entry(rev).is_some()
159 self.index.get_entry(rev).is_some()
156 }
160 }
157
161
158 /// Return the full data associated to a revision.
162 /// Return the full data associated to a revision.
159 ///
163 ///
160 /// All entries required to build the final data out of deltas will be
164 /// All entries required to build the final data out of deltas will be
161 /// retrieved as needed, and the deltas will be applied to the inital
165 /// retrieved as needed, and the deltas will be applied to the inital
162 /// snapshot to rebuild the final data.
166 /// snapshot to rebuild the final data.
163 #[timed]
167 #[timed]
164 pub fn get_rev_data(&self, rev: Revision) -> Result<Vec<u8>, RevlogError> {
168 pub fn get_rev_data(&self, rev: Revision) -> Result<Vec<u8>, RevlogError> {
165 // Todo return -> Cow
169 // Todo return -> Cow
166 let mut entry = self.get_entry(rev)?;
170 let mut entry = self.get_entry(rev)?;
167 let mut delta_chain = vec![];
171 let mut delta_chain = vec![];
168 while let Some(base_rev) = entry.base_rev {
172 while let Some(base_rev) = entry.base_rev {
169 delta_chain.push(entry);
173 delta_chain.push(entry);
170 entry =
174 entry = self
171 self.get_entry(base_rev).or(Err(RevlogError::Corrupted))?;
175 .get_entry(base_rev)
176 .map_err(|_| RevlogError::corrupted())?;
172 }
177 }
173
178
174 // TODO do not look twice in the index
179 // TODO do not look twice in the index
175 let index_entry = self
180 let index_entry = self
176 .index
181 .index
177 .get_entry(rev)
182 .get_entry(rev)
178 .ok_or(RevlogError::InvalidRevision)?;
183 .ok_or(RevlogError::InvalidRevision)?;
179
184
180 let data: Vec<u8> = if delta_chain.is_empty() {
185 let data: Vec<u8> = if delta_chain.is_empty() {
181 entry.data()?.into()
186 entry.data()?.into()
182 } else {
187 } else {
183 Revlog::build_data_from_deltas(entry, &delta_chain)?
188 Revlog::build_data_from_deltas(entry, &delta_chain)?
184 };
189 };
185
190
186 if self.check_hash(
191 if self.check_hash(
187 index_entry.p1(),
192 index_entry.p1(),
188 index_entry.p2(),
193 index_entry.p2(),
189 index_entry.hash().as_bytes(),
194 index_entry.hash().as_bytes(),
190 &data,
195 &data,
191 ) {
196 ) {
192 Ok(data)
197 Ok(data)
193 } else {
198 } else {
194 Err(RevlogError::Corrupted)
199 Err(RevlogError::corrupted())
195 }
200 }
196 }
201 }
197
202
198 /// Check the hash of some given data against the recorded hash.
203 /// Check the hash of some given data against the recorded hash.
199 pub fn check_hash(
204 pub fn check_hash(
200 &self,
205 &self,
201 p1: Revision,
206 p1: Revision,
202 p2: Revision,
207 p2: Revision,
203 expected: &[u8],
208 expected: &[u8],
204 data: &[u8],
209 data: &[u8],
205 ) -> bool {
210 ) -> bool {
206 let e1 = self.index.get_entry(p1);
211 let e1 = self.index.get_entry(p1);
207 let h1 = match e1 {
212 let h1 = match e1 {
208 Some(ref entry) => entry.hash(),
213 Some(ref entry) => entry.hash(),
209 None => &NULL_NODE,
214 None => &NULL_NODE,
210 };
215 };
211 let e2 = self.index.get_entry(p2);
216 let e2 = self.index.get_entry(p2);
212 let h2 = match e2 {
217 let h2 = match e2 {
213 Some(ref entry) => entry.hash(),
218 Some(ref entry) => entry.hash(),
214 None => &NULL_NODE,
219 None => &NULL_NODE,
215 };
220 };
216
221
217 hash(data, h1.as_bytes(), h2.as_bytes()).as_slice() == expected
222 hash(data, h1.as_bytes(), h2.as_bytes()).as_slice() == expected
218 }
223 }
219
224
220 /// Build the full data of a revision out its snapshot
225 /// Build the full data of a revision out its snapshot
221 /// and its deltas.
226 /// and its deltas.
222 #[timed]
227 #[timed]
223 fn build_data_from_deltas(
228 fn build_data_from_deltas(
224 snapshot: RevlogEntry,
229 snapshot: RevlogEntry,
225 deltas: &[RevlogEntry],
230 deltas: &[RevlogEntry],
226 ) -> Result<Vec<u8>, RevlogError> {
231 ) -> Result<Vec<u8>, RevlogError> {
227 let snapshot = snapshot.data()?;
232 let snapshot = snapshot.data()?;
228 let deltas = deltas
233 let deltas = deltas
229 .iter()
234 .iter()
230 .rev()
235 .rev()
231 .map(RevlogEntry::data)
236 .map(RevlogEntry::data)
232 .collect::<Result<Vec<Cow<'_, [u8]>>, RevlogError>>()?;
237 .collect::<Result<Vec<Cow<'_, [u8]>>, RevlogError>>()?;
233 let patches: Vec<_> =
238 let patches: Vec<_> =
234 deltas.iter().map(|d| patch::PatchList::new(d)).collect();
239 deltas.iter().map(|d| patch::PatchList::new(d)).collect();
235 let patch = patch::fold_patch_lists(&patches);
240 let patch = patch::fold_patch_lists(&patches);
236 Ok(patch.apply(&snapshot))
241 Ok(patch.apply(&snapshot))
237 }
242 }
238
243
239 /// Return the revlog data.
244 /// Return the revlog data.
240 fn data(&self) -> &[u8] {
245 fn data(&self) -> &[u8] {
241 match self.data_bytes {
246 match self.data_bytes {
242 Some(ref data_bytes) => &data_bytes,
247 Some(ref data_bytes) => &data_bytes,
243 None => panic!(
248 None => panic!(
244 "forgot to load the data or trying to access inline data"
249 "forgot to load the data or trying to access inline data"
245 ),
250 ),
246 }
251 }
247 }
252 }
248
253
249 /// Get an entry of the revlog.
254 /// Get an entry of the revlog.
250 fn get_entry(&self, rev: Revision) -> Result<RevlogEntry, RevlogError> {
255 fn get_entry(&self, rev: Revision) -> Result<RevlogEntry, RevlogError> {
251 let index_entry = self
256 let index_entry = self
252 .index
257 .index
253 .get_entry(rev)
258 .get_entry(rev)
254 .ok_or(RevlogError::InvalidRevision)?;
259 .ok_or(RevlogError::InvalidRevision)?;
255 let start = index_entry.offset();
260 let start = index_entry.offset();
256 let end = start + index_entry.compressed_len();
261 let end = start + index_entry.compressed_len();
257 let data = if self.index.is_inline() {
262 let data = if self.index.is_inline() {
258 self.index.data(start, end)
263 self.index.data(start, end)
259 } else {
264 } else {
260 &self.data()[start..end]
265 &self.data()[start..end]
261 };
266 };
262 let entry = RevlogEntry {
267 let entry = RevlogEntry {
263 rev,
268 rev,
264 bytes: data,
269 bytes: data,
265 compressed_len: index_entry.compressed_len(),
270 compressed_len: index_entry.compressed_len(),
266 uncompressed_len: index_entry.uncompressed_len(),
271 uncompressed_len: index_entry.uncompressed_len(),
267 base_rev: if index_entry.base_revision() == rev {
272 base_rev: if index_entry.base_revision() == rev {
268 None
273 None
269 } else {
274 } else {
270 Some(index_entry.base_revision())
275 Some(index_entry.base_revision())
271 },
276 },
272 };
277 };
273 Ok(entry)
278 Ok(entry)
274 }
279 }
275 }
280 }
276
281
277 /// The revlog entry's bytes and the necessary informations to extract
282 /// The revlog entry's bytes and the necessary informations to extract
278 /// the entry's data.
283 /// the entry's data.
279 #[derive(Debug)]
284 #[derive(Debug)]
280 pub struct RevlogEntry<'a> {
285 pub struct RevlogEntry<'a> {
281 rev: Revision,
286 rev: Revision,
282 bytes: &'a [u8],
287 bytes: &'a [u8],
283 compressed_len: usize,
288 compressed_len: usize,
284 uncompressed_len: usize,
289 uncompressed_len: usize,
285 base_rev: Option<Revision>,
290 base_rev: Option<Revision>,
286 }
291 }
287
292
288 impl<'a> RevlogEntry<'a> {
293 impl<'a> RevlogEntry<'a> {
289 /// Extract the data contained in the entry.
294 /// Extract the data contained in the entry.
290 pub fn data(&self) -> Result<Cow<'_, [u8]>, RevlogError> {
295 pub fn data(&self) -> Result<Cow<'_, [u8]>, RevlogError> {
291 if self.bytes.is_empty() {
296 if self.bytes.is_empty() {
292 return Ok(Cow::Borrowed(&[]));
297 return Ok(Cow::Borrowed(&[]));
293 }
298 }
294 match self.bytes[0] {
299 match self.bytes[0] {
295 // Revision data is the entirety of the entry, including this
300 // Revision data is the entirety of the entry, including this
296 // header.
301 // header.
297 b'\0' => Ok(Cow::Borrowed(self.bytes)),
302 b'\0' => Ok(Cow::Borrowed(self.bytes)),
298 // Raw revision data follows.
303 // Raw revision data follows.
299 b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
304 b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
300 // zlib (RFC 1950) data.
305 // zlib (RFC 1950) data.
301 b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
306 b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
302 // zstd data.
307 // zstd data.
303 b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
308 b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
304 format_type => Err(RevlogError::UnknowDataFormat(format_type)),
309 // A proper new format should have had a repo/store requirement.
310 _format_type => Err(RevlogError::corrupted()),
305 }
311 }
306 }
312 }
307
313
308 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, RevlogError> {
314 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, RevlogError> {
309 let mut decoder = ZlibDecoder::new(self.bytes);
315 let mut decoder = ZlibDecoder::new(self.bytes);
310 if self.is_delta() {
316 if self.is_delta() {
311 let mut buf = Vec::with_capacity(self.compressed_len);
317 let mut buf = Vec::with_capacity(self.compressed_len);
312 decoder
318 decoder
313 .read_to_end(&mut buf)
319 .read_to_end(&mut buf)
314 .or(Err(RevlogError::Corrupted))?;
320 .map_err(|_| RevlogError::corrupted())?;
315 Ok(buf)
321 Ok(buf)
316 } else {
322 } else {
317 let mut buf = vec![0; self.uncompressed_len];
323 let mut buf = vec![0; self.uncompressed_len];
318 decoder
324 decoder
319 .read_exact(&mut buf)
325 .read_exact(&mut buf)
320 .or(Err(RevlogError::Corrupted))?;
326 .map_err(|_| RevlogError::corrupted())?;
321 Ok(buf)
327 Ok(buf)
322 }
328 }
323 }
329 }
324
330
325 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, RevlogError> {
331 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, RevlogError> {
326 if self.is_delta() {
332 if self.is_delta() {
327 let mut buf = Vec::with_capacity(self.compressed_len);
333 let mut buf = Vec::with_capacity(self.compressed_len);
328 zstd::stream::copy_decode(self.bytes, &mut buf)
334 zstd::stream::copy_decode(self.bytes, &mut buf)
329 .or(Err(RevlogError::Corrupted))?;
335 .map_err(|_| RevlogError::corrupted())?;
330 Ok(buf)
336 Ok(buf)
331 } else {
337 } else {
332 let mut buf = vec![0; self.uncompressed_len];
338 let mut buf = vec![0; self.uncompressed_len];
333 let len = zstd::block::decompress_to_buffer(self.bytes, &mut buf)
339 let len = zstd::block::decompress_to_buffer(self.bytes, &mut buf)
334 .or(Err(RevlogError::Corrupted))?;
340 .map_err(|_| RevlogError::corrupted())?;
335 if len != self.uncompressed_len {
341 if len != self.uncompressed_len {
336 Err(RevlogError::Corrupted)
342 Err(RevlogError::corrupted())
337 } else {
343 } else {
338 Ok(buf)
344 Ok(buf)
339 }
345 }
340 }
346 }
341 }
347 }
342
348
343 /// Tell if the entry is a snapshot or a delta
349 /// Tell if the entry is a snapshot or a delta
344 /// (influences on decompression).
350 /// (influences on decompression).
345 fn is_delta(&self) -> bool {
351 fn is_delta(&self) -> bool {
346 self.base_rev.is_some()
352 self.base_rev.is_some()
347 }
353 }
348 }
354 }
349
355
350 /// Format version of the revlog.
356 /// Format version of the revlog.
351 pub fn get_version(index_bytes: &[u8]) -> u16 {
357 pub fn get_version(index_bytes: &[u8]) -> u16 {
352 BigEndian::read_u16(&index_bytes[2..=3])
358 BigEndian::read_u16(&index_bytes[2..=3])
353 }
359 }
354
360
355 /// Calculate the hash of a revision given its data and its parents.
361 /// Calculate the hash of a revision given its data and its parents.
356 fn hash(data: &[u8], p1_hash: &[u8], p2_hash: &[u8]) -> Vec<u8> {
362 fn hash(data: &[u8], p1_hash: &[u8], p2_hash: &[u8]) -> Vec<u8> {
357 let mut hasher = Sha1::new();
363 let mut hasher = Sha1::new();
358 let (a, b) = (p1_hash, p2_hash);
364 let (a, b) = (p1_hash, p2_hash);
359 if a > b {
365 if a > b {
360 hasher.input(b);
366 hasher.input(b);
361 hasher.input(a);
367 hasher.input(a);
362 } else {
368 } else {
363 hasher.input(a);
369 hasher.input(a);
364 hasher.input(b);
370 hasher.input(b);
365 }
371 }
366 hasher.input(data);
372 hasher.input(data);
367 let mut hash = vec![0; NODE_BYTES_LENGTH];
373 let mut hash = vec![0; NODE_BYTES_LENGTH];
368 hasher.result(&mut hash);
374 hasher.result(&mut hash);
369 hash
375 hash
370 }
376 }
371
377
372 #[cfg(test)]
378 #[cfg(test)]
373 mod tests {
379 mod tests {
374 use super::*;
380 use super::*;
375
381
376 use super::super::index::IndexEntryBuilder;
382 use super::super::index::IndexEntryBuilder;
377
383
378 #[test]
384 #[test]
379 fn version_test() {
385 fn version_test() {
380 let bytes = IndexEntryBuilder::new()
386 let bytes = IndexEntryBuilder::new()
381 .is_first(true)
387 .is_first(true)
382 .with_version(1)
388 .with_version(1)
383 .build();
389 .build();
384
390
385 assert_eq!(get_version(&bytes), 1)
391 assert_eq!(get_version(&bytes), 1)
386 }
392 }
387 }
393 }
@@ -1,146 +1,123 b''
1 use crate::exitcode;
1 use crate::exitcode;
2 use crate::ui::utf8_to_local;
2 use crate::ui::utf8_to_local;
3 use crate::ui::UiError;
3 use crate::ui::UiError;
4 use format_bytes::format_bytes;
4 use format_bytes::format_bytes;
5 use hg::errors::HgError;
5 use hg::errors::HgError;
6 use hg::operations::FindRootError;
6 use hg::operations::FindRootError;
7 use hg::revlog::revlog::RevlogError;
7 use hg::revlog::revlog::RevlogError;
8 use hg::utils::files::get_bytes_from_path;
8 use hg::utils::files::get_bytes_from_path;
9 use std::convert::From;
9 use std::convert::From;
10 use std::path::PathBuf;
10 use std::path::PathBuf;
11
11
12 /// The kind of command error
12 /// The kind of command error
13 #[derive(Debug, derive_more::From)]
13 #[derive(Debug, derive_more::From)]
14 pub enum CommandError {
14 pub enum CommandError {
15 /// The root of the repository cannot be found
15 /// The root of the repository cannot be found
16 RootNotFound(PathBuf),
16 RootNotFound(PathBuf),
17 /// The current directory cannot be found
17 /// The current directory cannot be found
18 CurrentDirNotFound(std::io::Error),
18 CurrentDirNotFound(std::io::Error),
19 /// The standard output stream cannot be written to
19 /// The standard output stream cannot be written to
20 StdoutError,
20 StdoutError,
21 /// The standard error stream cannot be written to
21 /// The standard error stream cannot be written to
22 StderrError,
22 StderrError,
23 /// The command aborted
23 /// The command aborted
24 Abort(Option<Vec<u8>>),
24 Abort(Option<Vec<u8>>),
25 /// A mercurial capability as not been implemented.
25 /// A mercurial capability as not been implemented.
26 Unimplemented,
26 Unimplemented,
27 /// Common cases
27 /// Common cases
28 #[from]
28 #[from]
29 Other(HgError),
29 Other(HgError),
30 }
30 }
31
31
32 impl CommandError {
32 impl CommandError {
33 pub fn get_exit_code(&self) -> exitcode::ExitCode {
33 pub fn get_exit_code(&self) -> exitcode::ExitCode {
34 match self {
34 match self {
35 CommandError::RootNotFound(_) => exitcode::ABORT,
35 CommandError::RootNotFound(_) => exitcode::ABORT,
36 CommandError::CurrentDirNotFound(_) => exitcode::ABORT,
36 CommandError::CurrentDirNotFound(_) => exitcode::ABORT,
37 CommandError::StdoutError => exitcode::ABORT,
37 CommandError::StdoutError => exitcode::ABORT,
38 CommandError::StderrError => exitcode::ABORT,
38 CommandError::StderrError => exitcode::ABORT,
39 CommandError::Abort(_) => exitcode::ABORT,
39 CommandError::Abort(_) => exitcode::ABORT,
40 CommandError::Unimplemented => exitcode::UNIMPLEMENTED_COMMAND,
40 CommandError::Unimplemented => exitcode::UNIMPLEMENTED_COMMAND,
41 CommandError::Other(HgError::UnsupportedFeature(_)) => {
41 CommandError::Other(HgError::UnsupportedFeature(_)) => {
42 exitcode::UNIMPLEMENTED_COMMAND
42 exitcode::UNIMPLEMENTED_COMMAND
43 }
43 }
44 CommandError::Other(_) => exitcode::ABORT,
44 CommandError::Other(_) => exitcode::ABORT,
45 }
45 }
46 }
46 }
47
47
48 /// Return the message corresponding to the error if any
48 /// Return the message corresponding to the error if any
49 pub fn get_error_message_bytes(&self) -> Option<Vec<u8>> {
49 pub fn get_error_message_bytes(&self) -> Option<Vec<u8>> {
50 match self {
50 match self {
51 CommandError::RootNotFound(path) => {
51 CommandError::RootNotFound(path) => {
52 let bytes = get_bytes_from_path(path);
52 let bytes = get_bytes_from_path(path);
53 Some(format_bytes!(
53 Some(format_bytes!(
54 b"abort: no repository found in '{}' (.hg not found)!\n",
54 b"abort: no repository found in '{}' (.hg not found)!\n",
55 bytes.as_slice()
55 bytes.as_slice()
56 ))
56 ))
57 }
57 }
58 CommandError::CurrentDirNotFound(e) => Some(format_bytes!(
58 CommandError::CurrentDirNotFound(e) => Some(format_bytes!(
59 b"abort: error getting current working directory: {}\n",
59 b"abort: error getting current working directory: {}\n",
60 e.to_string().as_bytes(),
60 e.to_string().as_bytes(),
61 )),
61 )),
62 CommandError::Abort(message) => message.to_owned(),
62 CommandError::Abort(message) => message.to_owned(),
63
63
64 CommandError::StdoutError
64 CommandError::StdoutError
65 | CommandError::StderrError
65 | CommandError::StderrError
66 | CommandError::Unimplemented
66 | CommandError::Unimplemented
67 | CommandError::Other(HgError::UnsupportedFeature(_)) => None,
67 | CommandError::Other(HgError::UnsupportedFeature(_)) => None,
68
68
69 CommandError::Other(e) => {
69 CommandError::Other(e) => {
70 Some(format_bytes!(b"{}\n", e.to_string().as_bytes()))
70 Some(format_bytes!(b"{}\n", e.to_string().as_bytes()))
71 }
71 }
72 }
72 }
73 }
73 }
74
74
75 /// Exist the process with the corresponding exit code.
75 /// Exist the process with the corresponding exit code.
76 pub fn exit(&self) {
76 pub fn exit(&self) {
77 std::process::exit(self.get_exit_code())
77 std::process::exit(self.get_exit_code())
78 }
78 }
79 }
79 }
80
80
81 impl From<UiError> for CommandError {
81 impl From<UiError> for CommandError {
82 fn from(error: UiError) -> Self {
82 fn from(error: UiError) -> Self {
83 match error {
83 match error {
84 UiError::StdoutError(_) => CommandError::StdoutError,
84 UiError::StdoutError(_) => CommandError::StdoutError,
85 UiError::StderrError(_) => CommandError::StderrError,
85 UiError::StderrError(_) => CommandError::StderrError,
86 }
86 }
87 }
87 }
88 }
88 }
89
89
90 impl From<FindRootError> for CommandError {
90 impl From<FindRootError> for CommandError {
91 fn from(err: FindRootError) -> Self {
91 fn from(err: FindRootError) -> Self {
92 match err {
92 match err {
93 FindRootError::RootNotFound(path) => {
93 FindRootError::RootNotFound(path) => {
94 CommandError::RootNotFound(path)
94 CommandError::RootNotFound(path)
95 }
95 }
96 FindRootError::GetCurrentDirError(e) => {
96 FindRootError::GetCurrentDirError(e) => {
97 CommandError::CurrentDirNotFound(e)
97 CommandError::CurrentDirNotFound(e)
98 }
98 }
99 }
99 }
100 }
100 }
101 }
101 }
102
102
103 impl From<(RevlogError, &str)> for CommandError {
103 impl From<(RevlogError, &str)> for CommandError {
104 fn from((err, rev): (RevlogError, &str)) -> CommandError {
104 fn from((err, rev): (RevlogError, &str)) -> CommandError {
105 match err {
105 match err {
106 RevlogError::IoError(err) => CommandError::Abort(Some(
107 utf8_to_local(&format!("abort: {}\n", err)).into(),
108 )),
109 RevlogError::InvalidRevision => CommandError::Abort(Some(
106 RevlogError::InvalidRevision => CommandError::Abort(Some(
110 utf8_to_local(&format!(
107 utf8_to_local(&format!(
111 "abort: invalid revision identifier {}\n",
108 "abort: invalid revision identifier {}\n",
112 rev
109 rev
113 ))
110 ))
114 .into(),
111 .into(),
115 )),
112 )),
116 RevlogError::AmbiguousPrefix => CommandError::Abort(Some(
113 RevlogError::AmbiguousPrefix => CommandError::Abort(Some(
117 utf8_to_local(&format!(
114 utf8_to_local(&format!(
118 "abort: ambiguous revision identifier {}\n",
115 "abort: ambiguous revision identifier {}\n",
119 rev
116 rev
120 ))
117 ))
121 .into(),
118 .into(),
122 )),
119 )),
123 RevlogError::UnsuportedVersion(version) => {
120 RevlogError::Other(err) => CommandError::Other(err),
124 CommandError::Abort(Some(
125 utf8_to_local(&format!(
126 "abort: unsupported revlog version {}\n",
127 version
128 ))
129 .into(),
130 ))
131 }
132 RevlogError::Corrupted => {
133 CommandError::Abort(Some("abort: corrupted revlog\n".into()))
134 }
135 RevlogError::UnknowDataFormat(format) => {
136 CommandError::Abort(Some(
137 utf8_to_local(&format!(
138 "abort: unknow revlog dataformat {:?}\n",
139 format
140 ))
141 .into(),
142 ))
143 }
144 }
121 }
145 }
122 }
146 }
123 }
General Comments 0
You need to be logged in to leave comments. Login now