##// END OF EJS Templates
rhg: centralize parsing of `--rev` CLI arguments...
Simon Sapin -
r47162:4b381dbb default
parent child Browse files
Show More
@@ -0,0 +1,53 b''
1 //! The revset query language
2 //!
3 //! <https://www.mercurial-scm.org/repo/hg/help/revsets>
4
5 use crate::repo::Repo;
6 use crate::revlog::changelog::Changelog;
7 use crate::revlog::revlog::{Revlog, RevlogError};
8 use crate::revlog::NodePrefix;
9 use crate::revlog::{Revision, NULL_REVISION};
10
11 /// Resolve a query string into a single revision.
12 ///
13 /// Only some of the revset language is implemented yet.
14 pub fn resolve_single(
15 input: &str,
16 repo: &Repo,
17 ) -> Result<Revision, RevlogError> {
18 let changelog = Changelog::open(repo)?;
19
20 match resolve_rev_number_or_hex_prefix(input, &changelog.revlog) {
21 Err(RevlogError::InvalidRevision) => {} // Try other syntax
22 result => return result,
23 }
24
25 if input == "null" {
26 return Ok(NULL_REVISION);
27 }
28
29 // TODO: support for the rest of the language here.
30
31 Err(RevlogError::InvalidRevision)
32 }
33
34 /// Resolve the small subset of the language suitable for revlogs other than
35 /// the changelog, such as in `hg debugdata --manifest` CLI argument.
36 ///
37 /// * A non-negative decimal integer for a revision number, or
38 /// * An hexadecimal string, for the unique node ID that starts with this
39 /// prefix
40 pub fn resolve_rev_number_or_hex_prefix(
41 input: &str,
42 revlog: &Revlog,
43 ) -> Result<Revision, RevlogError> {
44 if let Ok(integer) = input.parse::<i32>() {
45 if integer >= 0 && revlog.has_rev(integer) {
46 return Ok(integer);
47 }
48 }
49 if let Ok(prefix) = NodePrefix::from_hex(input) {
50 return revlog.get_node_rev(prefix);
51 }
52 Err(RevlogError::InvalidRevision)
53 }
@@ -1,188 +1,189 b''
1 // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net>
1 // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net>
2 // and Mercurial contributors
2 // and Mercurial contributors
3 //
3 //
4 // This software may be used and distributed according to the terms of the
4 // This software may be used and distributed according to the terms of the
5 // GNU General Public License version 2 or any later version.
5 // GNU General Public License version 2 or any later version.
6 mod ancestors;
6 mod ancestors;
7 pub mod dagops;
7 pub mod dagops;
8 pub use ancestors::{AncestorsIterator, LazyAncestors, MissingAncestors};
8 pub use ancestors::{AncestorsIterator, LazyAncestors, MissingAncestors};
9 mod dirstate;
9 mod dirstate;
10 pub mod discovery;
10 pub mod discovery;
11 pub mod requirements;
11 pub mod requirements;
12 pub mod testing; // unconditionally built, for use from integration tests
12 pub mod testing; // unconditionally built, for use from integration tests
13 pub use dirstate::{
13 pub use dirstate::{
14 dirs_multiset::{DirsMultiset, DirsMultisetIter},
14 dirs_multiset::{DirsMultiset, DirsMultisetIter},
15 dirstate_map::DirstateMap,
15 dirstate_map::DirstateMap,
16 parsers::{pack_dirstate, parse_dirstate, PARENT_SIZE},
16 parsers::{pack_dirstate, parse_dirstate, PARENT_SIZE},
17 status::{
17 status::{
18 status, BadMatch, BadType, DirstateStatus, StatusError, StatusOptions,
18 status, BadMatch, BadType, DirstateStatus, StatusError, StatusOptions,
19 },
19 },
20 CopyMap, CopyMapIter, DirstateEntry, DirstateParents, EntryState,
20 CopyMap, CopyMapIter, DirstateEntry, DirstateParents, EntryState,
21 StateMap, StateMapIter,
21 StateMap, StateMapIter,
22 };
22 };
23 pub mod copy_tracing;
23 pub mod copy_tracing;
24 mod filepatterns;
24 mod filepatterns;
25 pub mod matchers;
25 pub mod matchers;
26 pub mod repo;
26 pub mod repo;
27 pub mod revlog;
27 pub mod revlog;
28 pub use revlog::*;
28 pub use revlog::*;
29 pub mod config;
29 pub mod config;
30 pub mod operations;
30 pub mod operations;
31 pub mod revset;
31 pub mod utils;
32 pub mod utils;
32
33
33 use crate::utils::hg_path::{HgPathBuf, HgPathError};
34 use crate::utils::hg_path::{HgPathBuf, HgPathError};
34 pub use filepatterns::{
35 pub use filepatterns::{
35 parse_pattern_syntax, read_pattern_file, IgnorePattern,
36 parse_pattern_syntax, read_pattern_file, IgnorePattern,
36 PatternFileWarning, PatternSyntax,
37 PatternFileWarning, PatternSyntax,
37 };
38 };
38 use std::collections::HashMap;
39 use std::collections::HashMap;
39 use twox_hash::RandomXxHashBuilder64;
40 use twox_hash::RandomXxHashBuilder64;
40
41
41 /// This is a contract between the `micro-timer` crate and us, to expose
42 /// This is a contract between the `micro-timer` crate and us, to expose
42 /// the `log` crate as `crate::log`.
43 /// the `log` crate as `crate::log`.
43 use log;
44 use log;
44
45
45 pub type LineNumber = usize;
46 pub type LineNumber = usize;
46
47
47 /// Rust's default hasher is too slow because it tries to prevent collision
48 /// Rust's default hasher is too slow because it tries to prevent collision
48 /// attacks. We are not concerned about those: if an ill-minded person has
49 /// attacks. We are not concerned about those: if an ill-minded person has
49 /// write access to your repository, you have other issues.
50 /// write access to your repository, you have other issues.
50 pub type FastHashMap<K, V> = HashMap<K, V, RandomXxHashBuilder64>;
51 pub type FastHashMap<K, V> = HashMap<K, V, RandomXxHashBuilder64>;
51
52
52 #[derive(Clone, Debug, PartialEq)]
53 #[derive(Clone, Debug, PartialEq)]
53 pub enum DirstateParseError {
54 pub enum DirstateParseError {
54 TooLittleData,
55 TooLittleData,
55 Overflow,
56 Overflow,
56 // TODO refactor to use bytes instead of String
57 // TODO refactor to use bytes instead of String
57 CorruptedEntry(String),
58 CorruptedEntry(String),
58 Damaged,
59 Damaged,
59 }
60 }
60
61
61 impl From<std::io::Error> for DirstateParseError {
62 impl From<std::io::Error> for DirstateParseError {
62 fn from(e: std::io::Error) -> Self {
63 fn from(e: std::io::Error) -> Self {
63 DirstateParseError::CorruptedEntry(e.to_string())
64 DirstateParseError::CorruptedEntry(e.to_string())
64 }
65 }
65 }
66 }
66
67
67 impl ToString for DirstateParseError {
68 impl ToString for DirstateParseError {
68 fn to_string(&self) -> String {
69 fn to_string(&self) -> String {
69 use crate::DirstateParseError::*;
70 use crate::DirstateParseError::*;
70 match self {
71 match self {
71 TooLittleData => "Too little data for dirstate.".to_string(),
72 TooLittleData => "Too little data for dirstate.".to_string(),
72 Overflow => "Overflow in dirstate.".to_string(),
73 Overflow => "Overflow in dirstate.".to_string(),
73 CorruptedEntry(e) => format!("Corrupted entry: {:?}.", e),
74 CorruptedEntry(e) => format!("Corrupted entry: {:?}.", e),
74 Damaged => "Dirstate appears to be damaged.".to_string(),
75 Damaged => "Dirstate appears to be damaged.".to_string(),
75 }
76 }
76 }
77 }
77 }
78 }
78
79
79 #[derive(Debug, PartialEq)]
80 #[derive(Debug, PartialEq)]
80 pub enum DirstatePackError {
81 pub enum DirstatePackError {
81 CorruptedEntry(String),
82 CorruptedEntry(String),
82 CorruptedParent,
83 CorruptedParent,
83 BadSize(usize, usize),
84 BadSize(usize, usize),
84 }
85 }
85
86
86 impl From<std::io::Error> for DirstatePackError {
87 impl From<std::io::Error> for DirstatePackError {
87 fn from(e: std::io::Error) -> Self {
88 fn from(e: std::io::Error) -> Self {
88 DirstatePackError::CorruptedEntry(e.to_string())
89 DirstatePackError::CorruptedEntry(e.to_string())
89 }
90 }
90 }
91 }
91 #[derive(Debug, PartialEq)]
92 #[derive(Debug, PartialEq)]
92 pub enum DirstateMapError {
93 pub enum DirstateMapError {
93 PathNotFound(HgPathBuf),
94 PathNotFound(HgPathBuf),
94 EmptyPath,
95 EmptyPath,
95 InvalidPath(HgPathError),
96 InvalidPath(HgPathError),
96 }
97 }
97
98
98 impl ToString for DirstateMapError {
99 impl ToString for DirstateMapError {
99 fn to_string(&self) -> String {
100 fn to_string(&self) -> String {
100 match self {
101 match self {
101 DirstateMapError::PathNotFound(_) => {
102 DirstateMapError::PathNotFound(_) => {
102 "expected a value, found none".to_string()
103 "expected a value, found none".to_string()
103 }
104 }
104 DirstateMapError::EmptyPath => "Overflow in dirstate.".to_string(),
105 DirstateMapError::EmptyPath => "Overflow in dirstate.".to_string(),
105 DirstateMapError::InvalidPath(e) => e.to_string(),
106 DirstateMapError::InvalidPath(e) => e.to_string(),
106 }
107 }
107 }
108 }
108 }
109 }
109
110
110 #[derive(Debug)]
111 #[derive(Debug)]
111 pub enum DirstateError {
112 pub enum DirstateError {
112 Parse(DirstateParseError),
113 Parse(DirstateParseError),
113 Pack(DirstatePackError),
114 Pack(DirstatePackError),
114 Map(DirstateMapError),
115 Map(DirstateMapError),
115 IO(std::io::Error),
116 IO(std::io::Error),
116 }
117 }
117
118
118 impl From<DirstateParseError> for DirstateError {
119 impl From<DirstateParseError> for DirstateError {
119 fn from(e: DirstateParseError) -> Self {
120 fn from(e: DirstateParseError) -> Self {
120 DirstateError::Parse(e)
121 DirstateError::Parse(e)
121 }
122 }
122 }
123 }
123
124
124 impl From<DirstatePackError> for DirstateError {
125 impl From<DirstatePackError> for DirstateError {
125 fn from(e: DirstatePackError) -> Self {
126 fn from(e: DirstatePackError) -> Self {
126 DirstateError::Pack(e)
127 DirstateError::Pack(e)
127 }
128 }
128 }
129 }
129
130
130 #[derive(Debug)]
131 #[derive(Debug)]
131 pub enum PatternError {
132 pub enum PatternError {
132 Path(HgPathError),
133 Path(HgPathError),
133 UnsupportedSyntax(String),
134 UnsupportedSyntax(String),
134 UnsupportedSyntaxInFile(String, String, usize),
135 UnsupportedSyntaxInFile(String, String, usize),
135 TooLong(usize),
136 TooLong(usize),
136 IO(std::io::Error),
137 IO(std::io::Error),
137 /// Needed a pattern that can be turned into a regex but got one that
138 /// Needed a pattern that can be turned into a regex but got one that
138 /// can't. This should only happen through programmer error.
139 /// can't. This should only happen through programmer error.
139 NonRegexPattern(IgnorePattern),
140 NonRegexPattern(IgnorePattern),
140 }
141 }
141
142
142 impl ToString for PatternError {
143 impl ToString for PatternError {
143 fn to_string(&self) -> String {
144 fn to_string(&self) -> String {
144 match self {
145 match self {
145 PatternError::UnsupportedSyntax(syntax) => {
146 PatternError::UnsupportedSyntax(syntax) => {
146 format!("Unsupported syntax {}", syntax)
147 format!("Unsupported syntax {}", syntax)
147 }
148 }
148 PatternError::UnsupportedSyntaxInFile(syntax, file_path, line) => {
149 PatternError::UnsupportedSyntaxInFile(syntax, file_path, line) => {
149 format!(
150 format!(
150 "{}:{}: unsupported syntax {}",
151 "{}:{}: unsupported syntax {}",
151 file_path, line, syntax
152 file_path, line, syntax
152 )
153 )
153 }
154 }
154 PatternError::TooLong(size) => {
155 PatternError::TooLong(size) => {
155 format!("matcher pattern is too long ({} bytes)", size)
156 format!("matcher pattern is too long ({} bytes)", size)
156 }
157 }
157 PatternError::IO(e) => e.to_string(),
158 PatternError::IO(e) => e.to_string(),
158 PatternError::Path(e) => e.to_string(),
159 PatternError::Path(e) => e.to_string(),
159 PatternError::NonRegexPattern(pattern) => {
160 PatternError::NonRegexPattern(pattern) => {
160 format!("'{:?}' cannot be turned into a regex", pattern)
161 format!("'{:?}' cannot be turned into a regex", pattern)
161 }
162 }
162 }
163 }
163 }
164 }
164 }
165 }
165
166
166 impl From<DirstateMapError> for DirstateError {
167 impl From<DirstateMapError> for DirstateError {
167 fn from(e: DirstateMapError) -> Self {
168 fn from(e: DirstateMapError) -> Self {
168 DirstateError::Map(e)
169 DirstateError::Map(e)
169 }
170 }
170 }
171 }
171
172
172 impl From<std::io::Error> for DirstateError {
173 impl From<std::io::Error> for DirstateError {
173 fn from(e: std::io::Error) -> Self {
174 fn from(e: std::io::Error) -> Self {
174 DirstateError::IO(e)
175 DirstateError::IO(e)
175 }
176 }
176 }
177 }
177
178
178 impl From<std::io::Error> for PatternError {
179 impl From<std::io::Error> for PatternError {
179 fn from(e: std::io::Error) -> Self {
180 fn from(e: std::io::Error) -> Self {
180 PatternError::IO(e)
181 PatternError::IO(e)
181 }
182 }
182 }
183 }
183
184
184 impl From<HgPathError> for PatternError {
185 impl From<HgPathError> for PatternError {
185 fn from(e: HgPathError) -> Self {
186 fn from(e: HgPathError) -> Self {
186 PatternError::Path(e)
187 PatternError::Path(e)
187 }
188 }
188 }
189 }
@@ -1,135 +1,125 b''
1 // list_tracked_files.rs
1 // list_tracked_files.rs
2 //
2 //
3 // Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net>
3 // Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 use std::convert::From;
8 use std::convert::From;
9 use std::path::PathBuf;
9 use std::path::PathBuf;
10
10
11 use crate::repo::Repo;
11 use crate::repo::Repo;
12 use crate::revlog::changelog::Changelog;
12 use crate::revlog::changelog::Changelog;
13 use crate::revlog::manifest::Manifest;
13 use crate::revlog::manifest::Manifest;
14 use crate::revlog::path_encode::path_encode;
14 use crate::revlog::path_encode::path_encode;
15 use crate::revlog::revlog::Revlog;
15 use crate::revlog::revlog::Revlog;
16 use crate::revlog::revlog::RevlogError;
16 use crate::revlog::revlog::RevlogError;
17 use crate::revlog::Node;
17 use crate::revlog::Node;
18 use crate::revlog::NodePrefix;
19 use crate::revlog::Revision;
20 use crate::utils::files::get_path_from_bytes;
18 use crate::utils::files::get_path_from_bytes;
21 use crate::utils::hg_path::{HgPath, HgPathBuf};
19 use crate::utils::hg_path::{HgPath, HgPathBuf};
22
20
23 const METADATA_DELIMITER: [u8; 2] = [b'\x01', b'\n'];
21 const METADATA_DELIMITER: [u8; 2] = [b'\x01', b'\n'];
24
22
25 /// Kind of error encountered by `CatRev`
23 /// Kind of error encountered by `CatRev`
26 #[derive(Debug)]
24 #[derive(Debug)]
27 pub enum CatRevErrorKind {
25 pub enum CatRevErrorKind {
28 /// Error when reading a `revlog` file.
26 /// Error when reading a `revlog` file.
29 IoError(std::io::Error),
27 IoError(std::io::Error),
30 /// The revision has not been found.
28 /// The revision has not been found.
31 InvalidRevision,
29 InvalidRevision,
32 /// Found more than one revision whose ID match the requested prefix
30 /// Found more than one revision whose ID match the requested prefix
33 AmbiguousPrefix,
31 AmbiguousPrefix,
34 /// A `revlog` file is corrupted.
32 /// A `revlog` file is corrupted.
35 CorruptedRevlog,
33 CorruptedRevlog,
36 /// The `revlog` format version is not supported.
34 /// The `revlog` format version is not supported.
37 UnsuportedRevlogVersion(u16),
35 UnsuportedRevlogVersion(u16),
38 /// The `revlog` data format is not supported.
36 /// The `revlog` data format is not supported.
39 UnknowRevlogDataFormat(u8),
37 UnknowRevlogDataFormat(u8),
40 }
38 }
41
39
42 /// A `CatRev` error
40 /// A `CatRev` error
43 #[derive(Debug)]
41 #[derive(Debug)]
44 pub struct CatRevError {
42 pub struct CatRevError {
45 /// Kind of error encountered by `CatRev`
43 /// Kind of error encountered by `CatRev`
46 pub kind: CatRevErrorKind,
44 pub kind: CatRevErrorKind,
47 }
45 }
48
46
49 impl From<CatRevErrorKind> for CatRevError {
47 impl From<CatRevErrorKind> for CatRevError {
50 fn from(kind: CatRevErrorKind) -> Self {
48 fn from(kind: CatRevErrorKind) -> Self {
51 CatRevError { kind }
49 CatRevError { kind }
52 }
50 }
53 }
51 }
54
52
55 impl From<RevlogError> for CatRevError {
53 impl From<RevlogError> for CatRevError {
56 fn from(err: RevlogError) -> Self {
54 fn from(err: RevlogError) -> Self {
57 match err {
55 match err {
58 RevlogError::IoError(err) => CatRevErrorKind::IoError(err),
56 RevlogError::IoError(err) => CatRevErrorKind::IoError(err),
59 RevlogError::UnsuportedVersion(version) => {
57 RevlogError::UnsuportedVersion(version) => {
60 CatRevErrorKind::UnsuportedRevlogVersion(version)
58 CatRevErrorKind::UnsuportedRevlogVersion(version)
61 }
59 }
62 RevlogError::InvalidRevision => CatRevErrorKind::InvalidRevision,
60 RevlogError::InvalidRevision => CatRevErrorKind::InvalidRevision,
63 RevlogError::AmbiguousPrefix => CatRevErrorKind::AmbiguousPrefix,
61 RevlogError::AmbiguousPrefix => CatRevErrorKind::AmbiguousPrefix,
64 RevlogError::Corrupted => CatRevErrorKind::CorruptedRevlog,
62 RevlogError::Corrupted => CatRevErrorKind::CorruptedRevlog,
65 RevlogError::UnknowDataFormat(format) => {
63 RevlogError::UnknowDataFormat(format) => {
66 CatRevErrorKind::UnknowRevlogDataFormat(format)
64 CatRevErrorKind::UnknowRevlogDataFormat(format)
67 }
65 }
68 }
66 }
69 .into()
67 .into()
70 }
68 }
71 }
69 }
72
70
73 /// List files under Mercurial control at a given revision.
71 /// List files under Mercurial control at a given revision.
74 ///
72 ///
75 /// * `root`: Repository root
73 /// * `root`: Repository root
76 /// * `rev`: The revision to cat the files from.
74 /// * `rev`: The revision to cat the files from.
77 /// * `files`: The files to output.
75 /// * `files`: The files to output.
78 pub fn cat(
76 pub fn cat(
79 repo: &Repo,
77 repo: &Repo,
80 rev: &str,
78 revset: &str,
81 files: &[HgPathBuf],
79 files: &[HgPathBuf],
82 ) -> Result<Vec<u8>, CatRevError> {
80 ) -> Result<Vec<u8>, CatRevError> {
81 let rev = crate::revset::resolve_single(revset, repo)?;
83 let changelog = Changelog::open(repo)?;
82 let changelog = Changelog::open(repo)?;
84 let manifest = Manifest::open(repo)?;
83 let manifest = Manifest::open(repo)?;
85
84 let changelog_entry = changelog.get_rev(rev)?;
86 let changelog_entry = match rev.parse::<Revision>() {
87 Ok(rev) => changelog.get_rev(rev)?,
88 _ => {
89 let changelog_node = NodePrefix::from_hex(&rev)
90 .map_err(|_| CatRevErrorKind::InvalidRevision)?;
91 changelog.get_node(changelog_node)?
92 }
93 };
94 let manifest_node = Node::from_hex(&changelog_entry.manifest_node()?)
85 let manifest_node = Node::from_hex(&changelog_entry.manifest_node()?)
95 .map_err(|_| CatRevErrorKind::CorruptedRevlog)?;
86 .map_err(|_| CatRevErrorKind::CorruptedRevlog)?;
96
97 let manifest_entry = manifest.get_node(manifest_node.into())?;
87 let manifest_entry = manifest.get_node(manifest_node.into())?;
98 let mut bytes = vec![];
88 let mut bytes = vec![];
99
89
100 for (manifest_file, node_bytes) in manifest_entry.files_with_nodes() {
90 for (manifest_file, node_bytes) in manifest_entry.files_with_nodes() {
101 for cat_file in files.iter() {
91 for cat_file in files.iter() {
102 if cat_file.as_bytes() == manifest_file.as_bytes() {
92 if cat_file.as_bytes() == manifest_file.as_bytes() {
103 let index_path = store_path(manifest_file, b".i");
93 let index_path = store_path(manifest_file, b".i");
104 let data_path = store_path(manifest_file, b".d");
94 let data_path = store_path(manifest_file, b".d");
105
95
106 let file_log =
96 let file_log =
107 Revlog::open(repo, &index_path, Some(&data_path))?;
97 Revlog::open(repo, &index_path, Some(&data_path))?;
108 let file_node = Node::from_hex(node_bytes)
98 let file_node = Node::from_hex(node_bytes)
109 .map_err(|_| CatRevErrorKind::CorruptedRevlog)?;
99 .map_err(|_| CatRevErrorKind::CorruptedRevlog)?;
110 let file_rev = file_log.get_node_rev(file_node.into())?;
100 let file_rev = file_log.get_node_rev(file_node.into())?;
111 let data = file_log.get_rev_data(file_rev)?;
101 let data = file_log.get_rev_data(file_rev)?;
112 if data.starts_with(&METADATA_DELIMITER) {
102 if data.starts_with(&METADATA_DELIMITER) {
113 let end_delimiter_position = data
103 let end_delimiter_position = data
114 [METADATA_DELIMITER.len()..]
104 [METADATA_DELIMITER.len()..]
115 .windows(METADATA_DELIMITER.len())
105 .windows(METADATA_DELIMITER.len())
116 .position(|bytes| bytes == METADATA_DELIMITER);
106 .position(|bytes| bytes == METADATA_DELIMITER);
117 if let Some(position) = end_delimiter_position {
107 if let Some(position) = end_delimiter_position {
118 let offset = METADATA_DELIMITER.len() * 2;
108 let offset = METADATA_DELIMITER.len() * 2;
119 bytes.extend(data[position + offset..].iter());
109 bytes.extend(data[position + offset..].iter());
120 }
110 }
121 } else {
111 } else {
122 bytes.extend(data);
112 bytes.extend(data);
123 }
113 }
124 }
114 }
125 }
115 }
126 }
116 }
127
117
128 Ok(bytes)
118 Ok(bytes)
129 }
119 }
130
120
131 fn store_path(hg_path: &HgPath, suffix: &[u8]) -> PathBuf {
121 fn store_path(hg_path: &HgPath, suffix: &[u8]) -> PathBuf {
132 let encoded_bytes =
122 let encoded_bytes =
133 path_encode(&[b"data/", hg_path.as_bytes(), suffix].concat());
123 path_encode(&[b"data/", hg_path.as_bytes(), suffix].concat());
134 get_path_from_bytes(&encoded_bytes).into()
124 get_path_from_bytes(&encoded_bytes).into()
135 }
125 }
@@ -1,102 +1,92 b''
1 // debugdata.rs
1 // debugdata.rs
2 //
2 //
3 // Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net>
3 // Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 use crate::repo::Repo;
8 use crate::repo::Repo;
9 use crate::revlog::revlog::{Revlog, RevlogError};
9 use crate::revlog::revlog::{Revlog, RevlogError};
10 use crate::revlog::NodePrefix;
11 use crate::revlog::Revision;
12
10
13 /// Kind of data to debug
11 /// Kind of data to debug
14 #[derive(Debug, Copy, Clone)]
12 #[derive(Debug, Copy, Clone)]
15 pub enum DebugDataKind {
13 pub enum DebugDataKind {
16 Changelog,
14 Changelog,
17 Manifest,
15 Manifest,
18 }
16 }
19
17
20 /// Kind of error encountered by DebugData
18 /// Kind of error encountered by DebugData
21 #[derive(Debug)]
19 #[derive(Debug)]
22 pub enum DebugDataErrorKind {
20 pub enum DebugDataErrorKind {
23 /// Error when reading a `revlog` file.
21 /// Error when reading a `revlog` file.
24 IoError(std::io::Error),
22 IoError(std::io::Error),
25 /// The revision has not been found.
23 /// The revision has not been found.
26 InvalidRevision,
24 InvalidRevision,
27 /// Found more than one revision whose ID match the requested prefix
25 /// Found more than one revision whose ID match the requested prefix
28 AmbiguousPrefix,
26 AmbiguousPrefix,
29 /// A `revlog` file is corrupted.
27 /// A `revlog` file is corrupted.
30 CorruptedRevlog,
28 CorruptedRevlog,
31 /// The `revlog` format version is not supported.
29 /// The `revlog` format version is not supported.
32 UnsuportedRevlogVersion(u16),
30 UnsuportedRevlogVersion(u16),
33 /// The `revlog` data format is not supported.
31 /// The `revlog` data format is not supported.
34 UnknowRevlogDataFormat(u8),
32 UnknowRevlogDataFormat(u8),
35 }
33 }
36
34
37 /// A DebugData error
35 /// A DebugData error
38 #[derive(Debug)]
36 #[derive(Debug)]
39 pub struct DebugDataError {
37 pub struct DebugDataError {
40 /// Kind of error encountered by DebugData
38 /// Kind of error encountered by DebugData
41 pub kind: DebugDataErrorKind,
39 pub kind: DebugDataErrorKind,
42 }
40 }
43
41
44 impl From<DebugDataErrorKind> for DebugDataError {
42 impl From<DebugDataErrorKind> for DebugDataError {
45 fn from(kind: DebugDataErrorKind) -> Self {
43 fn from(kind: DebugDataErrorKind) -> Self {
46 DebugDataError { kind }
44 DebugDataError { kind }
47 }
45 }
48 }
46 }
49
47
50 impl From<std::io::Error> for DebugDataError {
48 impl From<std::io::Error> for DebugDataError {
51 fn from(err: std::io::Error) -> Self {
49 fn from(err: std::io::Error) -> Self {
52 let kind = DebugDataErrorKind::IoError(err);
50 let kind = DebugDataErrorKind::IoError(err);
53 DebugDataError { kind }
51 DebugDataError { kind }
54 }
52 }
55 }
53 }
56
54
57 impl From<RevlogError> for DebugDataError {
55 impl From<RevlogError> for DebugDataError {
58 fn from(err: RevlogError) -> Self {
56 fn from(err: RevlogError) -> Self {
59 match err {
57 match err {
60 RevlogError::IoError(err) => DebugDataErrorKind::IoError(err),
58 RevlogError::IoError(err) => DebugDataErrorKind::IoError(err),
61 RevlogError::UnsuportedVersion(version) => {
59 RevlogError::UnsuportedVersion(version) => {
62 DebugDataErrorKind::UnsuportedRevlogVersion(version)
60 DebugDataErrorKind::UnsuportedRevlogVersion(version)
63 }
61 }
64 RevlogError::InvalidRevision => {
62 RevlogError::InvalidRevision => {
65 DebugDataErrorKind::InvalidRevision
63 DebugDataErrorKind::InvalidRevision
66 }
64 }
67 RevlogError::AmbiguousPrefix => {
65 RevlogError::AmbiguousPrefix => {
68 DebugDataErrorKind::AmbiguousPrefix
66 DebugDataErrorKind::AmbiguousPrefix
69 }
67 }
70 RevlogError::Corrupted => DebugDataErrorKind::CorruptedRevlog,
68 RevlogError::Corrupted => DebugDataErrorKind::CorruptedRevlog,
71 RevlogError::UnknowDataFormat(format) => {
69 RevlogError::UnknowDataFormat(format) => {
72 DebugDataErrorKind::UnknowRevlogDataFormat(format)
70 DebugDataErrorKind::UnknowRevlogDataFormat(format)
73 }
71 }
74 }
72 }
75 .into()
73 .into()
76 }
74 }
77 }
75 }
78
76
79 /// Dump the contents data of a revision.
77 /// Dump the contents data of a revision.
80 pub fn debug_data(
78 pub fn debug_data(
81 repo: &Repo,
79 repo: &Repo,
82 rev: &str,
80 revset: &str,
83 kind: DebugDataKind,
81 kind: DebugDataKind,
84 ) -> Result<Vec<u8>, DebugDataError> {
82 ) -> Result<Vec<u8>, DebugDataError> {
85 let index_file = match kind {
83 let index_file = match kind {
86 DebugDataKind::Changelog => "00changelog.i",
84 DebugDataKind::Changelog => "00changelog.i",
87 DebugDataKind::Manifest => "00manifest.i",
85 DebugDataKind::Manifest => "00manifest.i",
88 };
86 };
89 let revlog = Revlog::open(repo, index_file, None)?;
87 let revlog = Revlog::open(repo, index_file, None)?;
90
88 let rev =
91 let data = match rev.parse::<Revision>() {
89 crate::revset::resolve_rev_number_or_hex_prefix(revset, &revlog)?;
92 Ok(rev) => revlog.get_rev_data(rev)?,
90 let data = revlog.get_rev_data(rev)?;
93 _ => {
94 let node = NodePrefix::from_hex(&rev)
95 .map_err(|_| DebugDataErrorKind::InvalidRevision)?;
96 let rev = revlog.get_node_rev(node)?;
97 revlog.get_rev_data(rev)?
98 }
99 };
100
101 Ok(data)
91 Ok(data)
102 }
92 }
@@ -1,165 +1,157 b''
1 // list_tracked_files.rs
1 // list_tracked_files.rs
2 //
2 //
3 // Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net>
3 // Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 use crate::dirstate::parsers::parse_dirstate;
8 use crate::dirstate::parsers::parse_dirstate;
9 use crate::repo::Repo;
9 use crate::repo::Repo;
10 use crate::revlog::changelog::Changelog;
10 use crate::revlog::changelog::Changelog;
11 use crate::revlog::manifest::{Manifest, ManifestEntry};
11 use crate::revlog::manifest::{Manifest, ManifestEntry};
12 use crate::revlog::node::{Node, NodePrefix};
12 use crate::revlog::node::Node;
13 use crate::revlog::revlog::RevlogError;
13 use crate::revlog::revlog::RevlogError;
14 use crate::revlog::Revision;
15 use crate::utils::hg_path::HgPath;
14 use crate::utils::hg_path::HgPath;
16 use crate::{DirstateParseError, EntryState};
15 use crate::{DirstateParseError, EntryState};
17 use rayon::prelude::*;
16 use rayon::prelude::*;
18 use std::convert::From;
17 use std::convert::From;
19
18
20 /// Kind of error encountered by `ListDirstateTrackedFiles`
19 /// Kind of error encountered by `ListDirstateTrackedFiles`
21 #[derive(Debug)]
20 #[derive(Debug)]
22 pub enum ListDirstateTrackedFilesErrorKind {
21 pub enum ListDirstateTrackedFilesErrorKind {
23 /// Error when reading the `dirstate` file
22 /// Error when reading the `dirstate` file
24 IoError(std::io::Error),
23 IoError(std::io::Error),
25 /// Error when parsing the `dirstate` file
24 /// Error when parsing the `dirstate` file
26 ParseError(DirstateParseError),
25 ParseError(DirstateParseError),
27 }
26 }
28
27
29 /// A `ListDirstateTrackedFiles` error
28 /// A `ListDirstateTrackedFiles` error
30 #[derive(Debug)]
29 #[derive(Debug)]
31 pub struct ListDirstateTrackedFilesError {
30 pub struct ListDirstateTrackedFilesError {
32 /// Kind of error encountered by `ListDirstateTrackedFiles`
31 /// Kind of error encountered by `ListDirstateTrackedFiles`
33 pub kind: ListDirstateTrackedFilesErrorKind,
32 pub kind: ListDirstateTrackedFilesErrorKind,
34 }
33 }
35
34
36 impl From<ListDirstateTrackedFilesErrorKind>
35 impl From<ListDirstateTrackedFilesErrorKind>
37 for ListDirstateTrackedFilesError
36 for ListDirstateTrackedFilesError
38 {
37 {
39 fn from(kind: ListDirstateTrackedFilesErrorKind) -> Self {
38 fn from(kind: ListDirstateTrackedFilesErrorKind) -> Self {
40 ListDirstateTrackedFilesError { kind }
39 ListDirstateTrackedFilesError { kind }
41 }
40 }
42 }
41 }
43
42
44 impl From<std::io::Error> for ListDirstateTrackedFilesError {
43 impl From<std::io::Error> for ListDirstateTrackedFilesError {
45 fn from(err: std::io::Error) -> Self {
44 fn from(err: std::io::Error) -> Self {
46 let kind = ListDirstateTrackedFilesErrorKind::IoError(err);
45 let kind = ListDirstateTrackedFilesErrorKind::IoError(err);
47 ListDirstateTrackedFilesError { kind }
46 ListDirstateTrackedFilesError { kind }
48 }
47 }
49 }
48 }
50
49
51 /// List files under Mercurial control in the working directory
50 /// List files under Mercurial control in the working directory
52 /// by reading the dirstate
51 /// by reading the dirstate
53 pub struct Dirstate {
52 pub struct Dirstate {
54 /// The `dirstate` content.
53 /// The `dirstate` content.
55 content: Vec<u8>,
54 content: Vec<u8>,
56 }
55 }
57
56
58 impl Dirstate {
57 impl Dirstate {
59 pub fn new(repo: &Repo) -> Result<Self, ListDirstateTrackedFilesError> {
58 pub fn new(repo: &Repo) -> Result<Self, ListDirstateTrackedFilesError> {
60 let content = repo.hg_vfs().read("dirstate")?;
59 let content = repo.hg_vfs().read("dirstate")?;
61 Ok(Self { content })
60 Ok(Self { content })
62 }
61 }
63
62
64 pub fn tracked_files(
63 pub fn tracked_files(
65 &self,
64 &self,
66 ) -> Result<Vec<&HgPath>, ListDirstateTrackedFilesError> {
65 ) -> Result<Vec<&HgPath>, ListDirstateTrackedFilesError> {
67 let (_, entries, _) = parse_dirstate(&self.content)
66 let (_, entries, _) = parse_dirstate(&self.content)
68 .map_err(ListDirstateTrackedFilesErrorKind::ParseError)?;
67 .map_err(ListDirstateTrackedFilesErrorKind::ParseError)?;
69 let mut files: Vec<&HgPath> = entries
68 let mut files: Vec<&HgPath> = entries
70 .into_iter()
69 .into_iter()
71 .filter_map(|(path, entry)| match entry.state {
70 .filter_map(|(path, entry)| match entry.state {
72 EntryState::Removed => None,
71 EntryState::Removed => None,
73 _ => Some(path),
72 _ => Some(path),
74 })
73 })
75 .collect();
74 .collect();
76 files.par_sort_unstable();
75 files.par_sort_unstable();
77 Ok(files)
76 Ok(files)
78 }
77 }
79 }
78 }
80
79
81 /// Kind of error encountered by `ListRevTrackedFiles`
80 /// Kind of error encountered by `ListRevTrackedFiles`
82 #[derive(Debug)]
81 #[derive(Debug)]
83 pub enum ListRevTrackedFilesErrorKind {
82 pub enum ListRevTrackedFilesErrorKind {
84 /// Error when reading a `revlog` file.
83 /// Error when reading a `revlog` file.
85 IoError(std::io::Error),
84 IoError(std::io::Error),
86 /// The revision has not been found.
85 /// The revision has not been found.
87 InvalidRevision,
86 InvalidRevision,
88 /// Found more than one revision whose ID match the requested prefix
87 /// Found more than one revision whose ID match the requested prefix
89 AmbiguousPrefix,
88 AmbiguousPrefix,
90 /// A `revlog` file is corrupted.
89 /// A `revlog` file is corrupted.
91 CorruptedRevlog,
90 CorruptedRevlog,
92 /// The `revlog` format version is not supported.
91 /// The `revlog` format version is not supported.
93 UnsuportedRevlogVersion(u16),
92 UnsuportedRevlogVersion(u16),
94 /// The `revlog` data format is not supported.
93 /// The `revlog` data format is not supported.
95 UnknowRevlogDataFormat(u8),
94 UnknowRevlogDataFormat(u8),
96 }
95 }
97
96
98 /// A `ListRevTrackedFiles` error
97 /// A `ListRevTrackedFiles` error
99 #[derive(Debug)]
98 #[derive(Debug)]
100 pub struct ListRevTrackedFilesError {
99 pub struct ListRevTrackedFilesError {
101 /// Kind of error encountered by `ListRevTrackedFiles`
100 /// Kind of error encountered by `ListRevTrackedFiles`
102 pub kind: ListRevTrackedFilesErrorKind,
101 pub kind: ListRevTrackedFilesErrorKind,
103 }
102 }
104
103
105 impl From<ListRevTrackedFilesErrorKind> for ListRevTrackedFilesError {
104 impl From<ListRevTrackedFilesErrorKind> for ListRevTrackedFilesError {
106 fn from(kind: ListRevTrackedFilesErrorKind) -> Self {
105 fn from(kind: ListRevTrackedFilesErrorKind) -> Self {
107 ListRevTrackedFilesError { kind }
106 ListRevTrackedFilesError { kind }
108 }
107 }
109 }
108 }
110
109
111 impl From<RevlogError> for ListRevTrackedFilesError {
110 impl From<RevlogError> for ListRevTrackedFilesError {
112 fn from(err: RevlogError) -> Self {
111 fn from(err: RevlogError) -> Self {
113 match err {
112 match err {
114 RevlogError::IoError(err) => {
113 RevlogError::IoError(err) => {
115 ListRevTrackedFilesErrorKind::IoError(err)
114 ListRevTrackedFilesErrorKind::IoError(err)
116 }
115 }
117 RevlogError::UnsuportedVersion(version) => {
116 RevlogError::UnsuportedVersion(version) => {
118 ListRevTrackedFilesErrorKind::UnsuportedRevlogVersion(version)
117 ListRevTrackedFilesErrorKind::UnsuportedRevlogVersion(version)
119 }
118 }
120 RevlogError::InvalidRevision => {
119 RevlogError::InvalidRevision => {
121 ListRevTrackedFilesErrorKind::InvalidRevision
120 ListRevTrackedFilesErrorKind::InvalidRevision
122 }
121 }
123 RevlogError::AmbiguousPrefix => {
122 RevlogError::AmbiguousPrefix => {
124 ListRevTrackedFilesErrorKind::AmbiguousPrefix
123 ListRevTrackedFilesErrorKind::AmbiguousPrefix
125 }
124 }
126 RevlogError::Corrupted => {
125 RevlogError::Corrupted => {
127 ListRevTrackedFilesErrorKind::CorruptedRevlog
126 ListRevTrackedFilesErrorKind::CorruptedRevlog
128 }
127 }
129 RevlogError::UnknowDataFormat(format) => {
128 RevlogError::UnknowDataFormat(format) => {
130 ListRevTrackedFilesErrorKind::UnknowRevlogDataFormat(format)
129 ListRevTrackedFilesErrorKind::UnknowRevlogDataFormat(format)
131 }
130 }
132 }
131 }
133 .into()
132 .into()
134 }
133 }
135 }
134 }
136
135
137 /// List files under Mercurial control at a given revision.
136 /// List files under Mercurial control at a given revision.
138 pub fn list_rev_tracked_files(
137 pub fn list_rev_tracked_files(
139 repo: &Repo,
138 repo: &Repo,
140 rev: &str,
139 revset: &str,
141 ) -> Result<FilesForRev, ListRevTrackedFilesError> {
140 ) -> Result<FilesForRev, ListRevTrackedFilesError> {
141 let rev = crate::revset::resolve_single(revset, repo)?;
142 let changelog = Changelog::open(repo)?;
142 let changelog = Changelog::open(repo)?;
143 let manifest = Manifest::open(repo)?;
143 let manifest = Manifest::open(repo)?;
144
144 let changelog_entry = changelog.get_rev(rev)?;
145 let changelog_entry = match rev.parse::<Revision>() {
146 Ok(rev) => changelog.get_rev(rev)?,
147 _ => {
148 let changelog_node = NodePrefix::from_hex(&rev)
149 .or(Err(ListRevTrackedFilesErrorKind::InvalidRevision))?;
150 changelog.get_node(changelog_node)?
151 }
152 };
153 let manifest_node = Node::from_hex(&changelog_entry.manifest_node()?)
145 let manifest_node = Node::from_hex(&changelog_entry.manifest_node()?)
154 .or(Err(ListRevTrackedFilesErrorKind::CorruptedRevlog))?;
146 .or(Err(ListRevTrackedFilesErrorKind::CorruptedRevlog))?;
155 let manifest_entry = manifest.get_node(manifest_node.into())?;
147 let manifest_entry = manifest.get_node(manifest_node.into())?;
156 Ok(FilesForRev(manifest_entry))
148 Ok(FilesForRev(manifest_entry))
157 }
149 }
158
150
159 pub struct FilesForRev(ManifestEntry);
151 pub struct FilesForRev(ManifestEntry);
160
152
161 impl FilesForRev {
153 impl FilesForRev {
162 pub fn iter(&self) -> impl Iterator<Item = &HgPath> {
154 pub fn iter(&self) -> impl Iterator<Item = &HgPath> {
163 self.0.files()
155 self.0.files()
164 }
156 }
165 }
157 }
@@ -1,58 +1,58 b''
1 use crate::repo::Repo;
1 use crate::repo::Repo;
2 use crate::revlog::revlog::{Revlog, RevlogError};
2 use crate::revlog::revlog::{Revlog, RevlogError};
3 use crate::revlog::NodePrefix;
3 use crate::revlog::NodePrefix;
4 use crate::revlog::Revision;
4 use crate::revlog::Revision;
5
5
6 /// A specialized `Revlog` to work with `changelog` data format.
6 /// A specialized `Revlog` to work with `changelog` data format.
7 pub struct Changelog {
7 pub struct Changelog {
8 /// The generic `revlog` format.
8 /// The generic `revlog` format.
9 revlog: Revlog,
9 pub(crate) revlog: Revlog,
10 }
10 }
11
11
12 impl Changelog {
12 impl Changelog {
13 /// Open the `changelog` of a repository given by its root.
13 /// Open the `changelog` of a repository given by its root.
14 pub fn open(repo: &Repo) -> Result<Self, RevlogError> {
14 pub fn open(repo: &Repo) -> Result<Self, RevlogError> {
15 let revlog = Revlog::open(repo, "00changelog.i", None)?;
15 let revlog = Revlog::open(repo, "00changelog.i", None)?;
16 Ok(Self { revlog })
16 Ok(Self { revlog })
17 }
17 }
18
18
19 /// Return the `ChangelogEntry` a given node id.
19 /// Return the `ChangelogEntry` a given node id.
20 pub fn get_node(
20 pub fn get_node(
21 &self,
21 &self,
22 node: NodePrefix,
22 node: NodePrefix,
23 ) -> Result<ChangelogEntry, RevlogError> {
23 ) -> Result<ChangelogEntry, RevlogError> {
24 let rev = self.revlog.get_node_rev(node)?;
24 let rev = self.revlog.get_node_rev(node)?;
25 self.get_rev(rev)
25 self.get_rev(rev)
26 }
26 }
27
27
28 /// Return the `ChangelogEntry` of a given node revision.
28 /// Return the `ChangelogEntry` of a given node revision.
29 pub fn get_rev(
29 pub fn get_rev(
30 &self,
30 &self,
31 rev: Revision,
31 rev: Revision,
32 ) -> Result<ChangelogEntry, RevlogError> {
32 ) -> Result<ChangelogEntry, RevlogError> {
33 let bytes = self.revlog.get_rev_data(rev)?;
33 let bytes = self.revlog.get_rev_data(rev)?;
34 Ok(ChangelogEntry { bytes })
34 Ok(ChangelogEntry { bytes })
35 }
35 }
36 }
36 }
37
37
38 /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
38 /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
39 #[derive(Debug)]
39 #[derive(Debug)]
40 pub struct ChangelogEntry {
40 pub struct ChangelogEntry {
41 /// The data bytes of the `changelog` entry.
41 /// The data bytes of the `changelog` entry.
42 bytes: Vec<u8>,
42 bytes: Vec<u8>,
43 }
43 }
44
44
45 impl ChangelogEntry {
45 impl ChangelogEntry {
46 /// Return an iterator over the lines of the entry.
46 /// Return an iterator over the lines of the entry.
47 pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
47 pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
48 self.bytes
48 self.bytes
49 .split(|b| b == &b'\n')
49 .split(|b| b == &b'\n')
50 .filter(|line| !line.is_empty())
50 .filter(|line| !line.is_empty())
51 }
51 }
52
52
53 /// Return the node id of the `manifest` referenced by this `changelog`
53 /// Return the node id of the `manifest` referenced by this `changelog`
54 /// entry.
54 /// entry.
55 pub fn manifest_node(&self) -> Result<&[u8], RevlogError> {
55 pub fn manifest_node(&self) -> Result<&[u8], RevlogError> {
56 self.lines().next().ok_or(RevlogError::Corrupted)
56 self.lines().next().ok_or(RevlogError::Corrupted)
57 }
57 }
58 }
58 }
@@ -1,382 +1,387 b''
1 use std::borrow::Cow;
1 use std::borrow::Cow;
2 use std::io::Read;
2 use std::io::Read;
3 use std::ops::Deref;
3 use std::ops::Deref;
4 use std::path::Path;
4 use std::path::Path;
5
5
6 use byteorder::{BigEndian, ByteOrder};
6 use byteorder::{BigEndian, ByteOrder};
7 use crypto::digest::Digest;
7 use crypto::digest::Digest;
8 use crypto::sha1::Sha1;
8 use crypto::sha1::Sha1;
9 use flate2::read::ZlibDecoder;
9 use flate2::read::ZlibDecoder;
10 use micro_timer::timed;
10 use micro_timer::timed;
11 use zstd;
11 use zstd;
12
12
13 use super::index::Index;
13 use super::index::Index;
14 use super::node::{NodePrefix, NODE_BYTES_LENGTH, NULL_NODE};
14 use super::node::{NodePrefix, NODE_BYTES_LENGTH, NULL_NODE};
15 use super::nodemap;
15 use super::nodemap;
16 use super::nodemap::NodeMap;
16 use super::nodemap::NodeMap;
17 use super::nodemap_docket::NodeMapDocket;
17 use super::nodemap_docket::NodeMapDocket;
18 use super::patch;
18 use super::patch;
19 use crate::repo::Repo;
19 use crate::repo::Repo;
20 use crate::revlog::Revision;
20 use crate::revlog::Revision;
21
21
22 pub enum RevlogError {
22 pub enum RevlogError {
23 IoError(std::io::Error),
23 IoError(std::io::Error),
24 UnsuportedVersion(u16),
24 UnsuportedVersion(u16),
25 InvalidRevision,
25 InvalidRevision,
26 /// Found more than one entry whose ID match the requested prefix
26 /// Found more than one entry whose ID match the requested prefix
27 AmbiguousPrefix,
27 AmbiguousPrefix,
28 Corrupted,
28 Corrupted,
29 UnknowDataFormat(u8),
29 UnknowDataFormat(u8),
30 }
30 }
31
31
32 impl From<bytes_cast::FromBytesError> for RevlogError {
32 impl From<bytes_cast::FromBytesError> for RevlogError {
33 fn from(_: bytes_cast::FromBytesError) -> Self {
33 fn from(_: bytes_cast::FromBytesError) -> Self {
34 RevlogError::Corrupted
34 RevlogError::Corrupted
35 }
35 }
36 }
36 }
37
37
38 /// Read only implementation of revlog.
38 /// Read only implementation of revlog.
39 pub struct Revlog {
39 pub struct Revlog {
40 /// When index and data are not interleaved: bytes of the revlog index.
40 /// When index and data are not interleaved: bytes of the revlog index.
41 /// When index and data are interleaved: bytes of the revlog index and
41 /// When index and data are interleaved: bytes of the revlog index and
42 /// data.
42 /// data.
43 index: Index,
43 index: Index,
44 /// When index and data are not interleaved: bytes of the revlog data
44 /// When index and data are not interleaved: bytes of the revlog data
45 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
45 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
46 /// When present on disk: the persistent nodemap for this revlog
46 /// When present on disk: the persistent nodemap for this revlog
47 nodemap: Option<nodemap::NodeTree>,
47 nodemap: Option<nodemap::NodeTree>,
48 }
48 }
49
49
50 impl Revlog {
50 impl Revlog {
51 /// Open a revlog index file.
51 /// Open a revlog index file.
52 ///
52 ///
53 /// It will also open the associated data file if index and data are not
53 /// It will also open the associated data file if index and data are not
54 /// interleaved.
54 /// interleaved.
55 #[timed]
55 #[timed]
56 pub fn open(
56 pub fn open(
57 repo: &Repo,
57 repo: &Repo,
58 index_path: impl AsRef<Path>,
58 index_path: impl AsRef<Path>,
59 data_path: Option<&Path>,
59 data_path: Option<&Path>,
60 ) -> Result<Self, RevlogError> {
60 ) -> Result<Self, RevlogError> {
61 let index_path = index_path.as_ref();
61 let index_path = index_path.as_ref();
62 let index_mmap = repo
62 let index_mmap = repo
63 .store_vfs()
63 .store_vfs()
64 .mmap_open(&index_path)
64 .mmap_open(&index_path)
65 .map_err(RevlogError::IoError)?;
65 .map_err(RevlogError::IoError)?;
66
66
67 let version = get_version(&index_mmap);
67 let version = get_version(&index_mmap);
68 if version != 1 {
68 if version != 1 {
69 return Err(RevlogError::UnsuportedVersion(version));
69 return Err(RevlogError::UnsuportedVersion(version));
70 }
70 }
71
71
72 let index = Index::new(Box::new(index_mmap))?;
72 let index = Index::new(Box::new(index_mmap))?;
73
73
74 let default_data_path = index_path.with_extension("d");
74 let default_data_path = index_path.with_extension("d");
75
75
76 // type annotation required
76 // type annotation required
77 // won't recognize Mmap as Deref<Target = [u8]>
77 // won't recognize Mmap as Deref<Target = [u8]>
78 let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
78 let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
79 if index.is_inline() {
79 if index.is_inline() {
80 None
80 None
81 } else {
81 } else {
82 let data_path = data_path.unwrap_or(&default_data_path);
82 let data_path = data_path.unwrap_or(&default_data_path);
83 let data_mmap = repo
83 let data_mmap = repo
84 .store_vfs()
84 .store_vfs()
85 .mmap_open(data_path)
85 .mmap_open(data_path)
86 .map_err(RevlogError::IoError)?;
86 .map_err(RevlogError::IoError)?;
87 Some(Box::new(data_mmap))
87 Some(Box::new(data_mmap))
88 };
88 };
89
89
90 let nodemap = NodeMapDocket::read_from_file(repo, index_path)?.map(
90 let nodemap = NodeMapDocket::read_from_file(repo, index_path)?.map(
91 |(docket, data)| {
91 |(docket, data)| {
92 nodemap::NodeTree::load_bytes(
92 nodemap::NodeTree::load_bytes(
93 Box::new(data),
93 Box::new(data),
94 docket.data_length,
94 docket.data_length,
95 )
95 )
96 },
96 },
97 );
97 );
98
98
99 Ok(Revlog {
99 Ok(Revlog {
100 index,
100 index,
101 data_bytes,
101 data_bytes,
102 nodemap,
102 nodemap,
103 })
103 })
104 }
104 }
105
105
106 /// Return number of entries of the `Revlog`.
106 /// Return number of entries of the `Revlog`.
107 pub fn len(&self) -> usize {
107 pub fn len(&self) -> usize {
108 self.index.len()
108 self.index.len()
109 }
109 }
110
110
111 /// Returns `true` if the `Revlog` has zero `entries`.
111 /// Returns `true` if the `Revlog` has zero `entries`.
112 pub fn is_empty(&self) -> bool {
112 pub fn is_empty(&self) -> bool {
113 self.index.is_empty()
113 self.index.is_empty()
114 }
114 }
115
115
116 /// Return the full data associated to a node.
116 /// Return the full data associated to a node.
117 #[timed]
117 #[timed]
118 pub fn get_node_rev(
118 pub fn get_node_rev(
119 &self,
119 &self,
120 node: NodePrefix,
120 node: NodePrefix,
121 ) -> Result<Revision, RevlogError> {
121 ) -> Result<Revision, RevlogError> {
122 if let Some(nodemap) = &self.nodemap {
122 if let Some(nodemap) = &self.nodemap {
123 return nodemap
123 return nodemap
124 .find_bin(&self.index, node)
124 .find_bin(&self.index, node)
125 // TODO: propagate details of this error:
125 // TODO: propagate details of this error:
126 .map_err(|_| RevlogError::Corrupted)?
126 .map_err(|_| RevlogError::Corrupted)?
127 .ok_or(RevlogError::InvalidRevision);
127 .ok_or(RevlogError::InvalidRevision);
128 }
128 }
129
129
130 // Fallback to linear scan when a persistent nodemap is not present.
130 // Fallback to linear scan when a persistent nodemap is not present.
131 // This happens when the persistent-nodemap experimental feature is not
131 // This happens when the persistent-nodemap experimental feature is not
132 // enabled, or for small revlogs.
132 // enabled, or for small revlogs.
133 //
133 //
134 // TODO: consider building a non-persistent nodemap in memory to
134 // TODO: consider building a non-persistent nodemap in memory to
135 // optimize these cases.
135 // optimize these cases.
136 let mut found_by_prefix = None;
136 let mut found_by_prefix = None;
137 for rev in (0..self.len() as Revision).rev() {
137 for rev in (0..self.len() as Revision).rev() {
138 let index_entry =
138 let index_entry =
139 self.index.get_entry(rev).ok_or(RevlogError::Corrupted)?;
139 self.index.get_entry(rev).ok_or(RevlogError::Corrupted)?;
140 if node == *index_entry.hash() {
140 if node == *index_entry.hash() {
141 return Ok(rev);
141 return Ok(rev);
142 }
142 }
143 if node.is_prefix_of(index_entry.hash()) {
143 if node.is_prefix_of(index_entry.hash()) {
144 if found_by_prefix.is_some() {
144 if found_by_prefix.is_some() {
145 return Err(RevlogError::AmbiguousPrefix);
145 return Err(RevlogError::AmbiguousPrefix);
146 }
146 }
147 found_by_prefix = Some(rev)
147 found_by_prefix = Some(rev)
148 }
148 }
149 }
149 }
150 found_by_prefix.ok_or(RevlogError::InvalidRevision)
150 found_by_prefix.ok_or(RevlogError::InvalidRevision)
151 }
151 }
152
152
153 /// Returns whether the given revision exists in this revlog.
154 pub fn has_rev(&self, rev: Revision) -> bool {
155 self.index.get_entry(rev).is_some()
156 }
157
153 /// Return the full data associated to a revision.
158 /// Return the full data associated to a revision.
154 ///
159 ///
155 /// All entries required to build the final data out of deltas will be
160 /// All entries required to build the final data out of deltas will be
156 /// retrieved as needed, and the deltas will be applied to the inital
161 /// retrieved as needed, and the deltas will be applied to the inital
157 /// snapshot to rebuild the final data.
162 /// snapshot to rebuild the final data.
158 #[timed]
163 #[timed]
159 pub fn get_rev_data(&self, rev: Revision) -> Result<Vec<u8>, RevlogError> {
164 pub fn get_rev_data(&self, rev: Revision) -> Result<Vec<u8>, RevlogError> {
160 // Todo return -> Cow
165 // Todo return -> Cow
161 let mut entry = self.get_entry(rev)?;
166 let mut entry = self.get_entry(rev)?;
162 let mut delta_chain = vec![];
167 let mut delta_chain = vec![];
163 while let Some(base_rev) = entry.base_rev {
168 while let Some(base_rev) = entry.base_rev {
164 delta_chain.push(entry);
169 delta_chain.push(entry);
165 entry =
170 entry =
166 self.get_entry(base_rev).or(Err(RevlogError::Corrupted))?;
171 self.get_entry(base_rev).or(Err(RevlogError::Corrupted))?;
167 }
172 }
168
173
169 // TODO do not look twice in the index
174 // TODO do not look twice in the index
170 let index_entry = self
175 let index_entry = self
171 .index
176 .index
172 .get_entry(rev)
177 .get_entry(rev)
173 .ok_or(RevlogError::InvalidRevision)?;
178 .ok_or(RevlogError::InvalidRevision)?;
174
179
175 let data: Vec<u8> = if delta_chain.is_empty() {
180 let data: Vec<u8> = if delta_chain.is_empty() {
176 entry.data()?.into()
181 entry.data()?.into()
177 } else {
182 } else {
178 Revlog::build_data_from_deltas(entry, &delta_chain)?
183 Revlog::build_data_from_deltas(entry, &delta_chain)?
179 };
184 };
180
185
181 if self.check_hash(
186 if self.check_hash(
182 index_entry.p1(),
187 index_entry.p1(),
183 index_entry.p2(),
188 index_entry.p2(),
184 index_entry.hash().as_bytes(),
189 index_entry.hash().as_bytes(),
185 &data,
190 &data,
186 ) {
191 ) {
187 Ok(data)
192 Ok(data)
188 } else {
193 } else {
189 Err(RevlogError::Corrupted)
194 Err(RevlogError::Corrupted)
190 }
195 }
191 }
196 }
192
197
193 /// Check the hash of some given data against the recorded hash.
198 /// Check the hash of some given data against the recorded hash.
194 pub fn check_hash(
199 pub fn check_hash(
195 &self,
200 &self,
196 p1: Revision,
201 p1: Revision,
197 p2: Revision,
202 p2: Revision,
198 expected: &[u8],
203 expected: &[u8],
199 data: &[u8],
204 data: &[u8],
200 ) -> bool {
205 ) -> bool {
201 let e1 = self.index.get_entry(p1);
206 let e1 = self.index.get_entry(p1);
202 let h1 = match e1 {
207 let h1 = match e1 {
203 Some(ref entry) => entry.hash(),
208 Some(ref entry) => entry.hash(),
204 None => &NULL_NODE,
209 None => &NULL_NODE,
205 };
210 };
206 let e2 = self.index.get_entry(p2);
211 let e2 = self.index.get_entry(p2);
207 let h2 = match e2 {
212 let h2 = match e2 {
208 Some(ref entry) => entry.hash(),
213 Some(ref entry) => entry.hash(),
209 None => &NULL_NODE,
214 None => &NULL_NODE,
210 };
215 };
211
216
212 hash(data, h1.as_bytes(), h2.as_bytes()).as_slice() == expected
217 hash(data, h1.as_bytes(), h2.as_bytes()).as_slice() == expected
213 }
218 }
214
219
215 /// Build the full data of a revision out its snapshot
220 /// Build the full data of a revision out its snapshot
216 /// and its deltas.
221 /// and its deltas.
217 #[timed]
222 #[timed]
218 fn build_data_from_deltas(
223 fn build_data_from_deltas(
219 snapshot: RevlogEntry,
224 snapshot: RevlogEntry,
220 deltas: &[RevlogEntry],
225 deltas: &[RevlogEntry],
221 ) -> Result<Vec<u8>, RevlogError> {
226 ) -> Result<Vec<u8>, RevlogError> {
222 let snapshot = snapshot.data()?;
227 let snapshot = snapshot.data()?;
223 let deltas = deltas
228 let deltas = deltas
224 .iter()
229 .iter()
225 .rev()
230 .rev()
226 .map(RevlogEntry::data)
231 .map(RevlogEntry::data)
227 .collect::<Result<Vec<Cow<'_, [u8]>>, RevlogError>>()?;
232 .collect::<Result<Vec<Cow<'_, [u8]>>, RevlogError>>()?;
228 let patches: Vec<_> =
233 let patches: Vec<_> =
229 deltas.iter().map(|d| patch::PatchList::new(d)).collect();
234 deltas.iter().map(|d| patch::PatchList::new(d)).collect();
230 let patch = patch::fold_patch_lists(&patches);
235 let patch = patch::fold_patch_lists(&patches);
231 Ok(patch.apply(&snapshot))
236 Ok(patch.apply(&snapshot))
232 }
237 }
233
238
234 /// Return the revlog data.
239 /// Return the revlog data.
235 fn data(&self) -> &[u8] {
240 fn data(&self) -> &[u8] {
236 match self.data_bytes {
241 match self.data_bytes {
237 Some(ref data_bytes) => &data_bytes,
242 Some(ref data_bytes) => &data_bytes,
238 None => panic!(
243 None => panic!(
239 "forgot to load the data or trying to access inline data"
244 "forgot to load the data or trying to access inline data"
240 ),
245 ),
241 }
246 }
242 }
247 }
243
248
244 /// Get an entry of the revlog.
249 /// Get an entry of the revlog.
245 fn get_entry(&self, rev: Revision) -> Result<RevlogEntry, RevlogError> {
250 fn get_entry(&self, rev: Revision) -> Result<RevlogEntry, RevlogError> {
246 let index_entry = self
251 let index_entry = self
247 .index
252 .index
248 .get_entry(rev)
253 .get_entry(rev)
249 .ok_or(RevlogError::InvalidRevision)?;
254 .ok_or(RevlogError::InvalidRevision)?;
250 let start = index_entry.offset();
255 let start = index_entry.offset();
251 let end = start + index_entry.compressed_len();
256 let end = start + index_entry.compressed_len();
252 let data = if self.index.is_inline() {
257 let data = if self.index.is_inline() {
253 self.index.data(start, end)
258 self.index.data(start, end)
254 } else {
259 } else {
255 &self.data()[start..end]
260 &self.data()[start..end]
256 };
261 };
257 let entry = RevlogEntry {
262 let entry = RevlogEntry {
258 rev,
263 rev,
259 bytes: data,
264 bytes: data,
260 compressed_len: index_entry.compressed_len(),
265 compressed_len: index_entry.compressed_len(),
261 uncompressed_len: index_entry.uncompressed_len(),
266 uncompressed_len: index_entry.uncompressed_len(),
262 base_rev: if index_entry.base_revision() == rev {
267 base_rev: if index_entry.base_revision() == rev {
263 None
268 None
264 } else {
269 } else {
265 Some(index_entry.base_revision())
270 Some(index_entry.base_revision())
266 },
271 },
267 };
272 };
268 Ok(entry)
273 Ok(entry)
269 }
274 }
270 }
275 }
271
276
272 /// The revlog entry's bytes and the necessary informations to extract
277 /// The revlog entry's bytes and the necessary informations to extract
273 /// the entry's data.
278 /// the entry's data.
274 #[derive(Debug)]
279 #[derive(Debug)]
275 pub struct RevlogEntry<'a> {
280 pub struct RevlogEntry<'a> {
276 rev: Revision,
281 rev: Revision,
277 bytes: &'a [u8],
282 bytes: &'a [u8],
278 compressed_len: usize,
283 compressed_len: usize,
279 uncompressed_len: usize,
284 uncompressed_len: usize,
280 base_rev: Option<Revision>,
285 base_rev: Option<Revision>,
281 }
286 }
282
287
283 impl<'a> RevlogEntry<'a> {
288 impl<'a> RevlogEntry<'a> {
284 /// Extract the data contained in the entry.
289 /// Extract the data contained in the entry.
285 pub fn data(&self) -> Result<Cow<'_, [u8]>, RevlogError> {
290 pub fn data(&self) -> Result<Cow<'_, [u8]>, RevlogError> {
286 if self.bytes.is_empty() {
291 if self.bytes.is_empty() {
287 return Ok(Cow::Borrowed(&[]));
292 return Ok(Cow::Borrowed(&[]));
288 }
293 }
289 match self.bytes[0] {
294 match self.bytes[0] {
290 // Revision data is the entirety of the entry, including this
295 // Revision data is the entirety of the entry, including this
291 // header.
296 // header.
292 b'\0' => Ok(Cow::Borrowed(self.bytes)),
297 b'\0' => Ok(Cow::Borrowed(self.bytes)),
293 // Raw revision data follows.
298 // Raw revision data follows.
294 b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
299 b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
295 // zlib (RFC 1950) data.
300 // zlib (RFC 1950) data.
296 b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
301 b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
297 // zstd data.
302 // zstd data.
298 b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
303 b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
299 format_type => Err(RevlogError::UnknowDataFormat(format_type)),
304 format_type => Err(RevlogError::UnknowDataFormat(format_type)),
300 }
305 }
301 }
306 }
302
307
303 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, RevlogError> {
308 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, RevlogError> {
304 let mut decoder = ZlibDecoder::new(self.bytes);
309 let mut decoder = ZlibDecoder::new(self.bytes);
305 if self.is_delta() {
310 if self.is_delta() {
306 let mut buf = Vec::with_capacity(self.compressed_len);
311 let mut buf = Vec::with_capacity(self.compressed_len);
307 decoder
312 decoder
308 .read_to_end(&mut buf)
313 .read_to_end(&mut buf)
309 .or(Err(RevlogError::Corrupted))?;
314 .or(Err(RevlogError::Corrupted))?;
310 Ok(buf)
315 Ok(buf)
311 } else {
316 } else {
312 let mut buf = vec![0; self.uncompressed_len];
317 let mut buf = vec![0; self.uncompressed_len];
313 decoder
318 decoder
314 .read_exact(&mut buf)
319 .read_exact(&mut buf)
315 .or(Err(RevlogError::Corrupted))?;
320 .or(Err(RevlogError::Corrupted))?;
316 Ok(buf)
321 Ok(buf)
317 }
322 }
318 }
323 }
319
324
320 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, RevlogError> {
325 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, RevlogError> {
321 if self.is_delta() {
326 if self.is_delta() {
322 let mut buf = Vec::with_capacity(self.compressed_len);
327 let mut buf = Vec::with_capacity(self.compressed_len);
323 zstd::stream::copy_decode(self.bytes, &mut buf)
328 zstd::stream::copy_decode(self.bytes, &mut buf)
324 .or(Err(RevlogError::Corrupted))?;
329 .or(Err(RevlogError::Corrupted))?;
325 Ok(buf)
330 Ok(buf)
326 } else {
331 } else {
327 let mut buf = vec![0; self.uncompressed_len];
332 let mut buf = vec![0; self.uncompressed_len];
328 let len = zstd::block::decompress_to_buffer(self.bytes, &mut buf)
333 let len = zstd::block::decompress_to_buffer(self.bytes, &mut buf)
329 .or(Err(RevlogError::Corrupted))?;
334 .or(Err(RevlogError::Corrupted))?;
330 if len != self.uncompressed_len {
335 if len != self.uncompressed_len {
331 Err(RevlogError::Corrupted)
336 Err(RevlogError::Corrupted)
332 } else {
337 } else {
333 Ok(buf)
338 Ok(buf)
334 }
339 }
335 }
340 }
336 }
341 }
337
342
338 /// Tell if the entry is a snapshot or a delta
343 /// Tell if the entry is a snapshot or a delta
339 /// (influences on decompression).
344 /// (influences on decompression).
340 fn is_delta(&self) -> bool {
345 fn is_delta(&self) -> bool {
341 self.base_rev.is_some()
346 self.base_rev.is_some()
342 }
347 }
343 }
348 }
344
349
345 /// Format version of the revlog.
350 /// Format version of the revlog.
346 pub fn get_version(index_bytes: &[u8]) -> u16 {
351 pub fn get_version(index_bytes: &[u8]) -> u16 {
347 BigEndian::read_u16(&index_bytes[2..=3])
352 BigEndian::read_u16(&index_bytes[2..=3])
348 }
353 }
349
354
350 /// Calculate the hash of a revision given its data and its parents.
355 /// Calculate the hash of a revision given its data and its parents.
351 fn hash(data: &[u8], p1_hash: &[u8], p2_hash: &[u8]) -> Vec<u8> {
356 fn hash(data: &[u8], p1_hash: &[u8], p2_hash: &[u8]) -> Vec<u8> {
352 let mut hasher = Sha1::new();
357 let mut hasher = Sha1::new();
353 let (a, b) = (p1_hash, p2_hash);
358 let (a, b) = (p1_hash, p2_hash);
354 if a > b {
359 if a > b {
355 hasher.input(b);
360 hasher.input(b);
356 hasher.input(a);
361 hasher.input(a);
357 } else {
362 } else {
358 hasher.input(a);
363 hasher.input(a);
359 hasher.input(b);
364 hasher.input(b);
360 }
365 }
361 hasher.input(data);
366 hasher.input(data);
362 let mut hash = vec![0; NODE_BYTES_LENGTH];
367 let mut hash = vec![0; NODE_BYTES_LENGTH];
363 hasher.result(&mut hash);
368 hasher.result(&mut hash);
364 hash
369 hash
365 }
370 }
366
371
367 #[cfg(test)]
372 #[cfg(test)]
368 mod tests {
373 mod tests {
369 use super::*;
374 use super::*;
370
375
371 use super::super::index::IndexEntryBuilder;
376 use super::super::index::IndexEntryBuilder;
372
377
373 #[test]
378 #[test]
374 fn version_test() {
379 fn version_test() {
375 let bytes = IndexEntryBuilder::new()
380 let bytes = IndexEntryBuilder::new()
376 .is_first(true)
381 .is_first(true)
377 .with_version(1)
382 .with_version(1)
378 .build();
383 .build();
379
384
380 assert_eq!(get_version(&bytes), 1)
385 assert_eq!(get_version(&bytes), 1)
381 }
386 }
382 }
387 }
General Comments 0
You need to be logged in to leave comments. Login now