##// END OF EJS Templates
rhg: centralize parsing of `--rev` CLI arguments...
Simon Sapin -
r47162:4b381dbb default
parent child Browse files
Show More
@@ -0,0 +1,53 b''
1 //! The revset query language
2 //!
3 //! <https://www.mercurial-scm.org/repo/hg/help/revsets>
4
5 use crate::repo::Repo;
6 use crate::revlog::changelog::Changelog;
7 use crate::revlog::revlog::{Revlog, RevlogError};
8 use crate::revlog::NodePrefix;
9 use crate::revlog::{Revision, NULL_REVISION};
10
11 /// Resolve a query string into a single revision.
12 ///
13 /// Only some of the revset language is implemented yet.
14 pub fn resolve_single(
15 input: &str,
16 repo: &Repo,
17 ) -> Result<Revision, RevlogError> {
18 let changelog = Changelog::open(repo)?;
19
20 match resolve_rev_number_or_hex_prefix(input, &changelog.revlog) {
21 Err(RevlogError::InvalidRevision) => {} // Try other syntax
22 result => return result,
23 }
24
25 if input == "null" {
26 return Ok(NULL_REVISION);
27 }
28
29 // TODO: support for the rest of the language here.
30
31 Err(RevlogError::InvalidRevision)
32 }
33
34 /// Resolve the small subset of the language suitable for revlogs other than
35 /// the changelog, such as in `hg debugdata --manifest` CLI argument.
36 ///
37 /// * A non-negative decimal integer for a revision number, or
38 /// * An hexadecimal string, for the unique node ID that starts with this
39 /// prefix
40 pub fn resolve_rev_number_or_hex_prefix(
41 input: &str,
42 revlog: &Revlog,
43 ) -> Result<Revision, RevlogError> {
44 if let Ok(integer) = input.parse::<i32>() {
45 if integer >= 0 && revlog.has_rev(integer) {
46 return Ok(integer);
47 }
48 }
49 if let Ok(prefix) = NodePrefix::from_hex(input) {
50 return revlog.get_node_rev(prefix);
51 }
52 Err(RevlogError::InvalidRevision)
53 }
@@ -1,188 +1,189 b''
1 1 // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net>
2 2 // and Mercurial contributors
3 3 //
4 4 // This software may be used and distributed according to the terms of the
5 5 // GNU General Public License version 2 or any later version.
6 6 mod ancestors;
7 7 pub mod dagops;
8 8 pub use ancestors::{AncestorsIterator, LazyAncestors, MissingAncestors};
9 9 mod dirstate;
10 10 pub mod discovery;
11 11 pub mod requirements;
12 12 pub mod testing; // unconditionally built, for use from integration tests
13 13 pub use dirstate::{
14 14 dirs_multiset::{DirsMultiset, DirsMultisetIter},
15 15 dirstate_map::DirstateMap,
16 16 parsers::{pack_dirstate, parse_dirstate, PARENT_SIZE},
17 17 status::{
18 18 status, BadMatch, BadType, DirstateStatus, StatusError, StatusOptions,
19 19 },
20 20 CopyMap, CopyMapIter, DirstateEntry, DirstateParents, EntryState,
21 21 StateMap, StateMapIter,
22 22 };
23 23 pub mod copy_tracing;
24 24 mod filepatterns;
25 25 pub mod matchers;
26 26 pub mod repo;
27 27 pub mod revlog;
28 28 pub use revlog::*;
29 29 pub mod config;
30 30 pub mod operations;
31 pub mod revset;
31 32 pub mod utils;
32 33
33 34 use crate::utils::hg_path::{HgPathBuf, HgPathError};
34 35 pub use filepatterns::{
35 36 parse_pattern_syntax, read_pattern_file, IgnorePattern,
36 37 PatternFileWarning, PatternSyntax,
37 38 };
38 39 use std::collections::HashMap;
39 40 use twox_hash::RandomXxHashBuilder64;
40 41
41 42 /// This is a contract between the `micro-timer` crate and us, to expose
42 43 /// the `log` crate as `crate::log`.
43 44 use log;
44 45
45 46 pub type LineNumber = usize;
46 47
47 48 /// Rust's default hasher is too slow because it tries to prevent collision
48 49 /// attacks. We are not concerned about those: if an ill-minded person has
49 50 /// write access to your repository, you have other issues.
50 51 pub type FastHashMap<K, V> = HashMap<K, V, RandomXxHashBuilder64>;
51 52
52 53 #[derive(Clone, Debug, PartialEq)]
53 54 pub enum DirstateParseError {
54 55 TooLittleData,
55 56 Overflow,
56 57 // TODO refactor to use bytes instead of String
57 58 CorruptedEntry(String),
58 59 Damaged,
59 60 }
60 61
61 62 impl From<std::io::Error> for DirstateParseError {
62 63 fn from(e: std::io::Error) -> Self {
63 64 DirstateParseError::CorruptedEntry(e.to_string())
64 65 }
65 66 }
66 67
67 68 impl ToString for DirstateParseError {
68 69 fn to_string(&self) -> String {
69 70 use crate::DirstateParseError::*;
70 71 match self {
71 72 TooLittleData => "Too little data for dirstate.".to_string(),
72 73 Overflow => "Overflow in dirstate.".to_string(),
73 74 CorruptedEntry(e) => format!("Corrupted entry: {:?}.", e),
74 75 Damaged => "Dirstate appears to be damaged.".to_string(),
75 76 }
76 77 }
77 78 }
78 79
79 80 #[derive(Debug, PartialEq)]
80 81 pub enum DirstatePackError {
81 82 CorruptedEntry(String),
82 83 CorruptedParent,
83 84 BadSize(usize, usize),
84 85 }
85 86
86 87 impl From<std::io::Error> for DirstatePackError {
87 88 fn from(e: std::io::Error) -> Self {
88 89 DirstatePackError::CorruptedEntry(e.to_string())
89 90 }
90 91 }
91 92 #[derive(Debug, PartialEq)]
92 93 pub enum DirstateMapError {
93 94 PathNotFound(HgPathBuf),
94 95 EmptyPath,
95 96 InvalidPath(HgPathError),
96 97 }
97 98
98 99 impl ToString for DirstateMapError {
99 100 fn to_string(&self) -> String {
100 101 match self {
101 102 DirstateMapError::PathNotFound(_) => {
102 103 "expected a value, found none".to_string()
103 104 }
104 105 DirstateMapError::EmptyPath => "Overflow in dirstate.".to_string(),
105 106 DirstateMapError::InvalidPath(e) => e.to_string(),
106 107 }
107 108 }
108 109 }
109 110
110 111 #[derive(Debug)]
111 112 pub enum DirstateError {
112 113 Parse(DirstateParseError),
113 114 Pack(DirstatePackError),
114 115 Map(DirstateMapError),
115 116 IO(std::io::Error),
116 117 }
117 118
118 119 impl From<DirstateParseError> for DirstateError {
119 120 fn from(e: DirstateParseError) -> Self {
120 121 DirstateError::Parse(e)
121 122 }
122 123 }
123 124
124 125 impl From<DirstatePackError> for DirstateError {
125 126 fn from(e: DirstatePackError) -> Self {
126 127 DirstateError::Pack(e)
127 128 }
128 129 }
129 130
130 131 #[derive(Debug)]
131 132 pub enum PatternError {
132 133 Path(HgPathError),
133 134 UnsupportedSyntax(String),
134 135 UnsupportedSyntaxInFile(String, String, usize),
135 136 TooLong(usize),
136 137 IO(std::io::Error),
137 138 /// Needed a pattern that can be turned into a regex but got one that
138 139 /// can't. This should only happen through programmer error.
139 140 NonRegexPattern(IgnorePattern),
140 141 }
141 142
142 143 impl ToString for PatternError {
143 144 fn to_string(&self) -> String {
144 145 match self {
145 146 PatternError::UnsupportedSyntax(syntax) => {
146 147 format!("Unsupported syntax {}", syntax)
147 148 }
148 149 PatternError::UnsupportedSyntaxInFile(syntax, file_path, line) => {
149 150 format!(
150 151 "{}:{}: unsupported syntax {}",
151 152 file_path, line, syntax
152 153 )
153 154 }
154 155 PatternError::TooLong(size) => {
155 156 format!("matcher pattern is too long ({} bytes)", size)
156 157 }
157 158 PatternError::IO(e) => e.to_string(),
158 159 PatternError::Path(e) => e.to_string(),
159 160 PatternError::NonRegexPattern(pattern) => {
160 161 format!("'{:?}' cannot be turned into a regex", pattern)
161 162 }
162 163 }
163 164 }
164 165 }
165 166
166 167 impl From<DirstateMapError> for DirstateError {
167 168 fn from(e: DirstateMapError) -> Self {
168 169 DirstateError::Map(e)
169 170 }
170 171 }
171 172
172 173 impl From<std::io::Error> for DirstateError {
173 174 fn from(e: std::io::Error) -> Self {
174 175 DirstateError::IO(e)
175 176 }
176 177 }
177 178
178 179 impl From<std::io::Error> for PatternError {
179 180 fn from(e: std::io::Error) -> Self {
180 181 PatternError::IO(e)
181 182 }
182 183 }
183 184
184 185 impl From<HgPathError> for PatternError {
185 186 fn from(e: HgPathError) -> Self {
186 187 PatternError::Path(e)
187 188 }
188 189 }
@@ -1,135 +1,125 b''
1 1 // list_tracked_files.rs
2 2 //
3 3 // Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 use std::convert::From;
9 9 use std::path::PathBuf;
10 10
11 11 use crate::repo::Repo;
12 12 use crate::revlog::changelog::Changelog;
13 13 use crate::revlog::manifest::Manifest;
14 14 use crate::revlog::path_encode::path_encode;
15 15 use crate::revlog::revlog::Revlog;
16 16 use crate::revlog::revlog::RevlogError;
17 17 use crate::revlog::Node;
18 use crate::revlog::NodePrefix;
19 use crate::revlog::Revision;
20 18 use crate::utils::files::get_path_from_bytes;
21 19 use crate::utils::hg_path::{HgPath, HgPathBuf};
22 20
23 21 const METADATA_DELIMITER: [u8; 2] = [b'\x01', b'\n'];
24 22
25 23 /// Kind of error encountered by `CatRev`
26 24 #[derive(Debug)]
27 25 pub enum CatRevErrorKind {
28 26 /// Error when reading a `revlog` file.
29 27 IoError(std::io::Error),
30 28 /// The revision has not been found.
31 29 InvalidRevision,
32 30 /// Found more than one revision whose ID match the requested prefix
33 31 AmbiguousPrefix,
34 32 /// A `revlog` file is corrupted.
35 33 CorruptedRevlog,
36 34 /// The `revlog` format version is not supported.
37 35 UnsuportedRevlogVersion(u16),
38 36 /// The `revlog` data format is not supported.
39 37 UnknowRevlogDataFormat(u8),
40 38 }
41 39
42 40 /// A `CatRev` error
43 41 #[derive(Debug)]
44 42 pub struct CatRevError {
45 43 /// Kind of error encountered by `CatRev`
46 44 pub kind: CatRevErrorKind,
47 45 }
48 46
49 47 impl From<CatRevErrorKind> for CatRevError {
50 48 fn from(kind: CatRevErrorKind) -> Self {
51 49 CatRevError { kind }
52 50 }
53 51 }
54 52
55 53 impl From<RevlogError> for CatRevError {
56 54 fn from(err: RevlogError) -> Self {
57 55 match err {
58 56 RevlogError::IoError(err) => CatRevErrorKind::IoError(err),
59 57 RevlogError::UnsuportedVersion(version) => {
60 58 CatRevErrorKind::UnsuportedRevlogVersion(version)
61 59 }
62 60 RevlogError::InvalidRevision => CatRevErrorKind::InvalidRevision,
63 61 RevlogError::AmbiguousPrefix => CatRevErrorKind::AmbiguousPrefix,
64 62 RevlogError::Corrupted => CatRevErrorKind::CorruptedRevlog,
65 63 RevlogError::UnknowDataFormat(format) => {
66 64 CatRevErrorKind::UnknowRevlogDataFormat(format)
67 65 }
68 66 }
69 67 .into()
70 68 }
71 69 }
72 70
73 71 /// List files under Mercurial control at a given revision.
74 72 ///
75 73 /// * `root`: Repository root
76 74 /// * `rev`: The revision to cat the files from.
77 75 /// * `files`: The files to output.
78 76 pub fn cat(
79 77 repo: &Repo,
80 rev: &str,
78 revset: &str,
81 79 files: &[HgPathBuf],
82 80 ) -> Result<Vec<u8>, CatRevError> {
81 let rev = crate::revset::resolve_single(revset, repo)?;
83 82 let changelog = Changelog::open(repo)?;
84 83 let manifest = Manifest::open(repo)?;
85
86 let changelog_entry = match rev.parse::<Revision>() {
87 Ok(rev) => changelog.get_rev(rev)?,
88 _ => {
89 let changelog_node = NodePrefix::from_hex(&rev)
90 .map_err(|_| CatRevErrorKind::InvalidRevision)?;
91 changelog.get_node(changelog_node)?
92 }
93 };
84 let changelog_entry = changelog.get_rev(rev)?;
94 85 let manifest_node = Node::from_hex(&changelog_entry.manifest_node()?)
95 86 .map_err(|_| CatRevErrorKind::CorruptedRevlog)?;
96
97 87 let manifest_entry = manifest.get_node(manifest_node.into())?;
98 88 let mut bytes = vec![];
99 89
100 90 for (manifest_file, node_bytes) in manifest_entry.files_with_nodes() {
101 91 for cat_file in files.iter() {
102 92 if cat_file.as_bytes() == manifest_file.as_bytes() {
103 93 let index_path = store_path(manifest_file, b".i");
104 94 let data_path = store_path(manifest_file, b".d");
105 95
106 96 let file_log =
107 97 Revlog::open(repo, &index_path, Some(&data_path))?;
108 98 let file_node = Node::from_hex(node_bytes)
109 99 .map_err(|_| CatRevErrorKind::CorruptedRevlog)?;
110 100 let file_rev = file_log.get_node_rev(file_node.into())?;
111 101 let data = file_log.get_rev_data(file_rev)?;
112 102 if data.starts_with(&METADATA_DELIMITER) {
113 103 let end_delimiter_position = data
114 104 [METADATA_DELIMITER.len()..]
115 105 .windows(METADATA_DELIMITER.len())
116 106 .position(|bytes| bytes == METADATA_DELIMITER);
117 107 if let Some(position) = end_delimiter_position {
118 108 let offset = METADATA_DELIMITER.len() * 2;
119 109 bytes.extend(data[position + offset..].iter());
120 110 }
121 111 } else {
122 112 bytes.extend(data);
123 113 }
124 114 }
125 115 }
126 116 }
127 117
128 118 Ok(bytes)
129 119 }
130 120
131 121 fn store_path(hg_path: &HgPath, suffix: &[u8]) -> PathBuf {
132 122 let encoded_bytes =
133 123 path_encode(&[b"data/", hg_path.as_bytes(), suffix].concat());
134 124 get_path_from_bytes(&encoded_bytes).into()
135 125 }
@@ -1,102 +1,92 b''
1 1 // debugdata.rs
2 2 //
3 3 // Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 use crate::repo::Repo;
9 9 use crate::revlog::revlog::{Revlog, RevlogError};
10 use crate::revlog::NodePrefix;
11 use crate::revlog::Revision;
12 10
13 11 /// Kind of data to debug
14 12 #[derive(Debug, Copy, Clone)]
15 13 pub enum DebugDataKind {
16 14 Changelog,
17 15 Manifest,
18 16 }
19 17
20 18 /// Kind of error encountered by DebugData
21 19 #[derive(Debug)]
22 20 pub enum DebugDataErrorKind {
23 21 /// Error when reading a `revlog` file.
24 22 IoError(std::io::Error),
25 23 /// The revision has not been found.
26 24 InvalidRevision,
27 25 /// Found more than one revision whose ID match the requested prefix
28 26 AmbiguousPrefix,
29 27 /// A `revlog` file is corrupted.
30 28 CorruptedRevlog,
31 29 /// The `revlog` format version is not supported.
32 30 UnsuportedRevlogVersion(u16),
33 31 /// The `revlog` data format is not supported.
34 32 UnknowRevlogDataFormat(u8),
35 33 }
36 34
37 35 /// A DebugData error
38 36 #[derive(Debug)]
39 37 pub struct DebugDataError {
40 38 /// Kind of error encountered by DebugData
41 39 pub kind: DebugDataErrorKind,
42 40 }
43 41
44 42 impl From<DebugDataErrorKind> for DebugDataError {
45 43 fn from(kind: DebugDataErrorKind) -> Self {
46 44 DebugDataError { kind }
47 45 }
48 46 }
49 47
50 48 impl From<std::io::Error> for DebugDataError {
51 49 fn from(err: std::io::Error) -> Self {
52 50 let kind = DebugDataErrorKind::IoError(err);
53 51 DebugDataError { kind }
54 52 }
55 53 }
56 54
57 55 impl From<RevlogError> for DebugDataError {
58 56 fn from(err: RevlogError) -> Self {
59 57 match err {
60 58 RevlogError::IoError(err) => DebugDataErrorKind::IoError(err),
61 59 RevlogError::UnsuportedVersion(version) => {
62 60 DebugDataErrorKind::UnsuportedRevlogVersion(version)
63 61 }
64 62 RevlogError::InvalidRevision => {
65 63 DebugDataErrorKind::InvalidRevision
66 64 }
67 65 RevlogError::AmbiguousPrefix => {
68 66 DebugDataErrorKind::AmbiguousPrefix
69 67 }
70 68 RevlogError::Corrupted => DebugDataErrorKind::CorruptedRevlog,
71 69 RevlogError::UnknowDataFormat(format) => {
72 70 DebugDataErrorKind::UnknowRevlogDataFormat(format)
73 71 }
74 72 }
75 73 .into()
76 74 }
77 75 }
78 76
79 77 /// Dump the contents data of a revision.
80 78 pub fn debug_data(
81 79 repo: &Repo,
82 rev: &str,
80 revset: &str,
83 81 kind: DebugDataKind,
84 82 ) -> Result<Vec<u8>, DebugDataError> {
85 83 let index_file = match kind {
86 84 DebugDataKind::Changelog => "00changelog.i",
87 85 DebugDataKind::Manifest => "00manifest.i",
88 86 };
89 87 let revlog = Revlog::open(repo, index_file, None)?;
90
91 let data = match rev.parse::<Revision>() {
92 Ok(rev) => revlog.get_rev_data(rev)?,
93 _ => {
94 let node = NodePrefix::from_hex(&rev)
95 .map_err(|_| DebugDataErrorKind::InvalidRevision)?;
96 let rev = revlog.get_node_rev(node)?;
97 revlog.get_rev_data(rev)?
98 }
99 };
100
88 let rev =
89 crate::revset::resolve_rev_number_or_hex_prefix(revset, &revlog)?;
90 let data = revlog.get_rev_data(rev)?;
101 91 Ok(data)
102 92 }
@@ -1,165 +1,157 b''
1 1 // list_tracked_files.rs
2 2 //
3 3 // Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 use crate::dirstate::parsers::parse_dirstate;
9 9 use crate::repo::Repo;
10 10 use crate::revlog::changelog::Changelog;
11 11 use crate::revlog::manifest::{Manifest, ManifestEntry};
12 use crate::revlog::node::{Node, NodePrefix};
12 use crate::revlog::node::Node;
13 13 use crate::revlog::revlog::RevlogError;
14 use crate::revlog::Revision;
15 14 use crate::utils::hg_path::HgPath;
16 15 use crate::{DirstateParseError, EntryState};
17 16 use rayon::prelude::*;
18 17 use std::convert::From;
19 18
20 19 /// Kind of error encountered by `ListDirstateTrackedFiles`
21 20 #[derive(Debug)]
22 21 pub enum ListDirstateTrackedFilesErrorKind {
23 22 /// Error when reading the `dirstate` file
24 23 IoError(std::io::Error),
25 24 /// Error when parsing the `dirstate` file
26 25 ParseError(DirstateParseError),
27 26 }
28 27
29 28 /// A `ListDirstateTrackedFiles` error
30 29 #[derive(Debug)]
31 30 pub struct ListDirstateTrackedFilesError {
32 31 /// Kind of error encountered by `ListDirstateTrackedFiles`
33 32 pub kind: ListDirstateTrackedFilesErrorKind,
34 33 }
35 34
36 35 impl From<ListDirstateTrackedFilesErrorKind>
37 36 for ListDirstateTrackedFilesError
38 37 {
39 38 fn from(kind: ListDirstateTrackedFilesErrorKind) -> Self {
40 39 ListDirstateTrackedFilesError { kind }
41 40 }
42 41 }
43 42
44 43 impl From<std::io::Error> for ListDirstateTrackedFilesError {
45 44 fn from(err: std::io::Error) -> Self {
46 45 let kind = ListDirstateTrackedFilesErrorKind::IoError(err);
47 46 ListDirstateTrackedFilesError { kind }
48 47 }
49 48 }
50 49
51 50 /// List files under Mercurial control in the working directory
52 51 /// by reading the dirstate
53 52 pub struct Dirstate {
54 53 /// The `dirstate` content.
55 54 content: Vec<u8>,
56 55 }
57 56
58 57 impl Dirstate {
59 58 pub fn new(repo: &Repo) -> Result<Self, ListDirstateTrackedFilesError> {
60 59 let content = repo.hg_vfs().read("dirstate")?;
61 60 Ok(Self { content })
62 61 }
63 62
64 63 pub fn tracked_files(
65 64 &self,
66 65 ) -> Result<Vec<&HgPath>, ListDirstateTrackedFilesError> {
67 66 let (_, entries, _) = parse_dirstate(&self.content)
68 67 .map_err(ListDirstateTrackedFilesErrorKind::ParseError)?;
69 68 let mut files: Vec<&HgPath> = entries
70 69 .into_iter()
71 70 .filter_map(|(path, entry)| match entry.state {
72 71 EntryState::Removed => None,
73 72 _ => Some(path),
74 73 })
75 74 .collect();
76 75 files.par_sort_unstable();
77 76 Ok(files)
78 77 }
79 78 }
80 79
81 80 /// Kind of error encountered by `ListRevTrackedFiles`
82 81 #[derive(Debug)]
83 82 pub enum ListRevTrackedFilesErrorKind {
84 83 /// Error when reading a `revlog` file.
85 84 IoError(std::io::Error),
86 85 /// The revision has not been found.
87 86 InvalidRevision,
88 87 /// Found more than one revision whose ID match the requested prefix
89 88 AmbiguousPrefix,
90 89 /// A `revlog` file is corrupted.
91 90 CorruptedRevlog,
92 91 /// The `revlog` format version is not supported.
93 92 UnsuportedRevlogVersion(u16),
94 93 /// The `revlog` data format is not supported.
95 94 UnknowRevlogDataFormat(u8),
96 95 }
97 96
98 97 /// A `ListRevTrackedFiles` error
99 98 #[derive(Debug)]
100 99 pub struct ListRevTrackedFilesError {
101 100 /// Kind of error encountered by `ListRevTrackedFiles`
102 101 pub kind: ListRevTrackedFilesErrorKind,
103 102 }
104 103
105 104 impl From<ListRevTrackedFilesErrorKind> for ListRevTrackedFilesError {
106 105 fn from(kind: ListRevTrackedFilesErrorKind) -> Self {
107 106 ListRevTrackedFilesError { kind }
108 107 }
109 108 }
110 109
111 110 impl From<RevlogError> for ListRevTrackedFilesError {
112 111 fn from(err: RevlogError) -> Self {
113 112 match err {
114 113 RevlogError::IoError(err) => {
115 114 ListRevTrackedFilesErrorKind::IoError(err)
116 115 }
117 116 RevlogError::UnsuportedVersion(version) => {
118 117 ListRevTrackedFilesErrorKind::UnsuportedRevlogVersion(version)
119 118 }
120 119 RevlogError::InvalidRevision => {
121 120 ListRevTrackedFilesErrorKind::InvalidRevision
122 121 }
123 122 RevlogError::AmbiguousPrefix => {
124 123 ListRevTrackedFilesErrorKind::AmbiguousPrefix
125 124 }
126 125 RevlogError::Corrupted => {
127 126 ListRevTrackedFilesErrorKind::CorruptedRevlog
128 127 }
129 128 RevlogError::UnknowDataFormat(format) => {
130 129 ListRevTrackedFilesErrorKind::UnknowRevlogDataFormat(format)
131 130 }
132 131 }
133 132 .into()
134 133 }
135 134 }
136 135
137 136 /// List files under Mercurial control at a given revision.
138 137 pub fn list_rev_tracked_files(
139 138 repo: &Repo,
140 rev: &str,
139 revset: &str,
141 140 ) -> Result<FilesForRev, ListRevTrackedFilesError> {
141 let rev = crate::revset::resolve_single(revset, repo)?;
142 142 let changelog = Changelog::open(repo)?;
143 143 let manifest = Manifest::open(repo)?;
144
145 let changelog_entry = match rev.parse::<Revision>() {
146 Ok(rev) => changelog.get_rev(rev)?,
147 _ => {
148 let changelog_node = NodePrefix::from_hex(&rev)
149 .or(Err(ListRevTrackedFilesErrorKind::InvalidRevision))?;
150 changelog.get_node(changelog_node)?
151 }
152 };
144 let changelog_entry = changelog.get_rev(rev)?;
153 145 let manifest_node = Node::from_hex(&changelog_entry.manifest_node()?)
154 146 .or(Err(ListRevTrackedFilesErrorKind::CorruptedRevlog))?;
155 147 let manifest_entry = manifest.get_node(manifest_node.into())?;
156 148 Ok(FilesForRev(manifest_entry))
157 149 }
158 150
159 151 pub struct FilesForRev(ManifestEntry);
160 152
161 153 impl FilesForRev {
162 154 pub fn iter(&self) -> impl Iterator<Item = &HgPath> {
163 155 self.0.files()
164 156 }
165 157 }
@@ -1,58 +1,58 b''
1 1 use crate::repo::Repo;
2 2 use crate::revlog::revlog::{Revlog, RevlogError};
3 3 use crate::revlog::NodePrefix;
4 4 use crate::revlog::Revision;
5 5
6 6 /// A specialized `Revlog` to work with `changelog` data format.
7 7 pub struct Changelog {
8 8 /// The generic `revlog` format.
9 revlog: Revlog,
9 pub(crate) revlog: Revlog,
10 10 }
11 11
12 12 impl Changelog {
13 13 /// Open the `changelog` of a repository given by its root.
14 14 pub fn open(repo: &Repo) -> Result<Self, RevlogError> {
15 15 let revlog = Revlog::open(repo, "00changelog.i", None)?;
16 16 Ok(Self { revlog })
17 17 }
18 18
19 19 /// Return the `ChangelogEntry` a given node id.
20 20 pub fn get_node(
21 21 &self,
22 22 node: NodePrefix,
23 23 ) -> Result<ChangelogEntry, RevlogError> {
24 24 let rev = self.revlog.get_node_rev(node)?;
25 25 self.get_rev(rev)
26 26 }
27 27
28 28 /// Return the `ChangelogEntry` of a given node revision.
29 29 pub fn get_rev(
30 30 &self,
31 31 rev: Revision,
32 32 ) -> Result<ChangelogEntry, RevlogError> {
33 33 let bytes = self.revlog.get_rev_data(rev)?;
34 34 Ok(ChangelogEntry { bytes })
35 35 }
36 36 }
37 37
38 38 /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
39 39 #[derive(Debug)]
40 40 pub struct ChangelogEntry {
41 41 /// The data bytes of the `changelog` entry.
42 42 bytes: Vec<u8>,
43 43 }
44 44
45 45 impl ChangelogEntry {
46 46 /// Return an iterator over the lines of the entry.
47 47 pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
48 48 self.bytes
49 49 .split(|b| b == &b'\n')
50 50 .filter(|line| !line.is_empty())
51 51 }
52 52
53 53 /// Return the node id of the `manifest` referenced by this `changelog`
54 54 /// entry.
55 55 pub fn manifest_node(&self) -> Result<&[u8], RevlogError> {
56 56 self.lines().next().ok_or(RevlogError::Corrupted)
57 57 }
58 58 }
@@ -1,382 +1,387 b''
1 1 use std::borrow::Cow;
2 2 use std::io::Read;
3 3 use std::ops::Deref;
4 4 use std::path::Path;
5 5
6 6 use byteorder::{BigEndian, ByteOrder};
7 7 use crypto::digest::Digest;
8 8 use crypto::sha1::Sha1;
9 9 use flate2::read::ZlibDecoder;
10 10 use micro_timer::timed;
11 11 use zstd;
12 12
13 13 use super::index::Index;
14 14 use super::node::{NodePrefix, NODE_BYTES_LENGTH, NULL_NODE};
15 15 use super::nodemap;
16 16 use super::nodemap::NodeMap;
17 17 use super::nodemap_docket::NodeMapDocket;
18 18 use super::patch;
19 19 use crate::repo::Repo;
20 20 use crate::revlog::Revision;
21 21
22 22 pub enum RevlogError {
23 23 IoError(std::io::Error),
24 24 UnsuportedVersion(u16),
25 25 InvalidRevision,
26 26 /// Found more than one entry whose ID match the requested prefix
27 27 AmbiguousPrefix,
28 28 Corrupted,
29 29 UnknowDataFormat(u8),
30 30 }
31 31
32 32 impl From<bytes_cast::FromBytesError> for RevlogError {
33 33 fn from(_: bytes_cast::FromBytesError) -> Self {
34 34 RevlogError::Corrupted
35 35 }
36 36 }
37 37
38 38 /// Read only implementation of revlog.
39 39 pub struct Revlog {
40 40 /// When index and data are not interleaved: bytes of the revlog index.
41 41 /// When index and data are interleaved: bytes of the revlog index and
42 42 /// data.
43 43 index: Index,
44 44 /// When index and data are not interleaved: bytes of the revlog data
45 45 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
46 46 /// When present on disk: the persistent nodemap for this revlog
47 47 nodemap: Option<nodemap::NodeTree>,
48 48 }
49 49
50 50 impl Revlog {
51 51 /// Open a revlog index file.
52 52 ///
53 53 /// It will also open the associated data file if index and data are not
54 54 /// interleaved.
55 55 #[timed]
56 56 pub fn open(
57 57 repo: &Repo,
58 58 index_path: impl AsRef<Path>,
59 59 data_path: Option<&Path>,
60 60 ) -> Result<Self, RevlogError> {
61 61 let index_path = index_path.as_ref();
62 62 let index_mmap = repo
63 63 .store_vfs()
64 64 .mmap_open(&index_path)
65 65 .map_err(RevlogError::IoError)?;
66 66
67 67 let version = get_version(&index_mmap);
68 68 if version != 1 {
69 69 return Err(RevlogError::UnsuportedVersion(version));
70 70 }
71 71
72 72 let index = Index::new(Box::new(index_mmap))?;
73 73
74 74 let default_data_path = index_path.with_extension("d");
75 75
76 76 // type annotation required
77 77 // won't recognize Mmap as Deref<Target = [u8]>
78 78 let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
79 79 if index.is_inline() {
80 80 None
81 81 } else {
82 82 let data_path = data_path.unwrap_or(&default_data_path);
83 83 let data_mmap = repo
84 84 .store_vfs()
85 85 .mmap_open(data_path)
86 86 .map_err(RevlogError::IoError)?;
87 87 Some(Box::new(data_mmap))
88 88 };
89 89
90 90 let nodemap = NodeMapDocket::read_from_file(repo, index_path)?.map(
91 91 |(docket, data)| {
92 92 nodemap::NodeTree::load_bytes(
93 93 Box::new(data),
94 94 docket.data_length,
95 95 )
96 96 },
97 97 );
98 98
99 99 Ok(Revlog {
100 100 index,
101 101 data_bytes,
102 102 nodemap,
103 103 })
104 104 }
105 105
106 106 /// Return number of entries of the `Revlog`.
107 107 pub fn len(&self) -> usize {
108 108 self.index.len()
109 109 }
110 110
111 111 /// Returns `true` if the `Revlog` has zero `entries`.
112 112 pub fn is_empty(&self) -> bool {
113 113 self.index.is_empty()
114 114 }
115 115
116 116 /// Return the full data associated to a node.
117 117 #[timed]
118 118 pub fn get_node_rev(
119 119 &self,
120 120 node: NodePrefix,
121 121 ) -> Result<Revision, RevlogError> {
122 122 if let Some(nodemap) = &self.nodemap {
123 123 return nodemap
124 124 .find_bin(&self.index, node)
125 125 // TODO: propagate details of this error:
126 126 .map_err(|_| RevlogError::Corrupted)?
127 127 .ok_or(RevlogError::InvalidRevision);
128 128 }
129 129
130 130 // Fallback to linear scan when a persistent nodemap is not present.
131 131 // This happens when the persistent-nodemap experimental feature is not
132 132 // enabled, or for small revlogs.
133 133 //
134 134 // TODO: consider building a non-persistent nodemap in memory to
135 135 // optimize these cases.
136 136 let mut found_by_prefix = None;
137 137 for rev in (0..self.len() as Revision).rev() {
138 138 let index_entry =
139 139 self.index.get_entry(rev).ok_or(RevlogError::Corrupted)?;
140 140 if node == *index_entry.hash() {
141 141 return Ok(rev);
142 142 }
143 143 if node.is_prefix_of(index_entry.hash()) {
144 144 if found_by_prefix.is_some() {
145 145 return Err(RevlogError::AmbiguousPrefix);
146 146 }
147 147 found_by_prefix = Some(rev)
148 148 }
149 149 }
150 150 found_by_prefix.ok_or(RevlogError::InvalidRevision)
151 151 }
152 152
153 /// Returns whether the given revision exists in this revlog.
154 pub fn has_rev(&self, rev: Revision) -> bool {
155 self.index.get_entry(rev).is_some()
156 }
157
153 158 /// Return the full data associated to a revision.
154 159 ///
155 160 /// All entries required to build the final data out of deltas will be
156 161 /// retrieved as needed, and the deltas will be applied to the inital
157 162 /// snapshot to rebuild the final data.
158 163 #[timed]
159 164 pub fn get_rev_data(&self, rev: Revision) -> Result<Vec<u8>, RevlogError> {
160 165 // Todo return -> Cow
161 166 let mut entry = self.get_entry(rev)?;
162 167 let mut delta_chain = vec![];
163 168 while let Some(base_rev) = entry.base_rev {
164 169 delta_chain.push(entry);
165 170 entry =
166 171 self.get_entry(base_rev).or(Err(RevlogError::Corrupted))?;
167 172 }
168 173
169 174 // TODO do not look twice in the index
170 175 let index_entry = self
171 176 .index
172 177 .get_entry(rev)
173 178 .ok_or(RevlogError::InvalidRevision)?;
174 179
175 180 let data: Vec<u8> = if delta_chain.is_empty() {
176 181 entry.data()?.into()
177 182 } else {
178 183 Revlog::build_data_from_deltas(entry, &delta_chain)?
179 184 };
180 185
181 186 if self.check_hash(
182 187 index_entry.p1(),
183 188 index_entry.p2(),
184 189 index_entry.hash().as_bytes(),
185 190 &data,
186 191 ) {
187 192 Ok(data)
188 193 } else {
189 194 Err(RevlogError::Corrupted)
190 195 }
191 196 }
192 197
193 198 /// Check the hash of some given data against the recorded hash.
194 199 pub fn check_hash(
195 200 &self,
196 201 p1: Revision,
197 202 p2: Revision,
198 203 expected: &[u8],
199 204 data: &[u8],
200 205 ) -> bool {
201 206 let e1 = self.index.get_entry(p1);
202 207 let h1 = match e1 {
203 208 Some(ref entry) => entry.hash(),
204 209 None => &NULL_NODE,
205 210 };
206 211 let e2 = self.index.get_entry(p2);
207 212 let h2 = match e2 {
208 213 Some(ref entry) => entry.hash(),
209 214 None => &NULL_NODE,
210 215 };
211 216
212 217 hash(data, h1.as_bytes(), h2.as_bytes()).as_slice() == expected
213 218 }
214 219
215 220 /// Build the full data of a revision out its snapshot
216 221 /// and its deltas.
217 222 #[timed]
218 223 fn build_data_from_deltas(
219 224 snapshot: RevlogEntry,
220 225 deltas: &[RevlogEntry],
221 226 ) -> Result<Vec<u8>, RevlogError> {
222 227 let snapshot = snapshot.data()?;
223 228 let deltas = deltas
224 229 .iter()
225 230 .rev()
226 231 .map(RevlogEntry::data)
227 232 .collect::<Result<Vec<Cow<'_, [u8]>>, RevlogError>>()?;
228 233 let patches: Vec<_> =
229 234 deltas.iter().map(|d| patch::PatchList::new(d)).collect();
230 235 let patch = patch::fold_patch_lists(&patches);
231 236 Ok(patch.apply(&snapshot))
232 237 }
233 238
234 239 /// Return the revlog data.
235 240 fn data(&self) -> &[u8] {
236 241 match self.data_bytes {
237 242 Some(ref data_bytes) => &data_bytes,
238 243 None => panic!(
239 244 "forgot to load the data or trying to access inline data"
240 245 ),
241 246 }
242 247 }
243 248
244 249 /// Get an entry of the revlog.
245 250 fn get_entry(&self, rev: Revision) -> Result<RevlogEntry, RevlogError> {
246 251 let index_entry = self
247 252 .index
248 253 .get_entry(rev)
249 254 .ok_or(RevlogError::InvalidRevision)?;
250 255 let start = index_entry.offset();
251 256 let end = start + index_entry.compressed_len();
252 257 let data = if self.index.is_inline() {
253 258 self.index.data(start, end)
254 259 } else {
255 260 &self.data()[start..end]
256 261 };
257 262 let entry = RevlogEntry {
258 263 rev,
259 264 bytes: data,
260 265 compressed_len: index_entry.compressed_len(),
261 266 uncompressed_len: index_entry.uncompressed_len(),
262 267 base_rev: if index_entry.base_revision() == rev {
263 268 None
264 269 } else {
265 270 Some(index_entry.base_revision())
266 271 },
267 272 };
268 273 Ok(entry)
269 274 }
270 275 }
271 276
272 277 /// The revlog entry's bytes and the necessary informations to extract
273 278 /// the entry's data.
274 279 #[derive(Debug)]
275 280 pub struct RevlogEntry<'a> {
276 281 rev: Revision,
277 282 bytes: &'a [u8],
278 283 compressed_len: usize,
279 284 uncompressed_len: usize,
280 285 base_rev: Option<Revision>,
281 286 }
282 287
283 288 impl<'a> RevlogEntry<'a> {
284 289 /// Extract the data contained in the entry.
285 290 pub fn data(&self) -> Result<Cow<'_, [u8]>, RevlogError> {
286 291 if self.bytes.is_empty() {
287 292 return Ok(Cow::Borrowed(&[]));
288 293 }
289 294 match self.bytes[0] {
290 295 // Revision data is the entirety of the entry, including this
291 296 // header.
292 297 b'\0' => Ok(Cow::Borrowed(self.bytes)),
293 298 // Raw revision data follows.
294 299 b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
295 300 // zlib (RFC 1950) data.
296 301 b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
297 302 // zstd data.
298 303 b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
299 304 format_type => Err(RevlogError::UnknowDataFormat(format_type)),
300 305 }
301 306 }
302 307
303 308 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, RevlogError> {
304 309 let mut decoder = ZlibDecoder::new(self.bytes);
305 310 if self.is_delta() {
306 311 let mut buf = Vec::with_capacity(self.compressed_len);
307 312 decoder
308 313 .read_to_end(&mut buf)
309 314 .or(Err(RevlogError::Corrupted))?;
310 315 Ok(buf)
311 316 } else {
312 317 let mut buf = vec![0; self.uncompressed_len];
313 318 decoder
314 319 .read_exact(&mut buf)
315 320 .or(Err(RevlogError::Corrupted))?;
316 321 Ok(buf)
317 322 }
318 323 }
319 324
320 325 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, RevlogError> {
321 326 if self.is_delta() {
322 327 let mut buf = Vec::with_capacity(self.compressed_len);
323 328 zstd::stream::copy_decode(self.bytes, &mut buf)
324 329 .or(Err(RevlogError::Corrupted))?;
325 330 Ok(buf)
326 331 } else {
327 332 let mut buf = vec![0; self.uncompressed_len];
328 333 let len = zstd::block::decompress_to_buffer(self.bytes, &mut buf)
329 334 .or(Err(RevlogError::Corrupted))?;
330 335 if len != self.uncompressed_len {
331 336 Err(RevlogError::Corrupted)
332 337 } else {
333 338 Ok(buf)
334 339 }
335 340 }
336 341 }
337 342
338 343 /// Tell if the entry is a snapshot or a delta
339 344 /// (influences on decompression).
340 345 fn is_delta(&self) -> bool {
341 346 self.base_rev.is_some()
342 347 }
343 348 }
344 349
345 350 /// Format version of the revlog.
346 351 pub fn get_version(index_bytes: &[u8]) -> u16 {
347 352 BigEndian::read_u16(&index_bytes[2..=3])
348 353 }
349 354
350 355 /// Calculate the hash of a revision given its data and its parents.
351 356 fn hash(data: &[u8], p1_hash: &[u8], p2_hash: &[u8]) -> Vec<u8> {
352 357 let mut hasher = Sha1::new();
353 358 let (a, b) = (p1_hash, p2_hash);
354 359 if a > b {
355 360 hasher.input(b);
356 361 hasher.input(a);
357 362 } else {
358 363 hasher.input(a);
359 364 hasher.input(b);
360 365 }
361 366 hasher.input(data);
362 367 let mut hash = vec![0; NODE_BYTES_LENGTH];
363 368 hasher.result(&mut hash);
364 369 hash
365 370 }
366 371
367 372 #[cfg(test)]
368 373 mod tests {
369 374 use super::*;
370 375
371 376 use super::super::index::IndexEntryBuilder;
372 377
373 378 #[test]
374 379 fn version_test() {
375 380 let bytes = IndexEntryBuilder::new()
376 381 .is_first(true)
377 382 .with_version(1)
378 383 .build();
379 384
380 385 assert_eq!(get_version(&bytes), 1)
381 386 }
382 387 }
General Comments 0
You need to be logged in to leave comments. Login now