##// END OF EJS Templates
rust: introduce Repo and Vfs types for filesystem abstraction...
Simon Sapin -
r46782:8a491439 default
parent child Browse files
Show More
@@ -0,0 +1,92 b''
1 use crate::operations::{find_root, FindRootError};
2 use crate::requirements;
3 use memmap::{Mmap, MmapOptions};
4 use std::path::{Path, PathBuf};
5
6 /// A repository on disk
7 pub struct Repo {
8 working_directory: PathBuf,
9 dot_hg: PathBuf,
10 store: PathBuf,
11 }
12
13 /// Filesystem access abstraction for the contents of a given "base" diretory
14 #[derive(Clone, Copy)]
15 pub(crate) struct Vfs<'a> {
16 base: &'a Path,
17 }
18
19 impl Repo {
20 /// Returns `None` if the given path doesn’t look like a repository
21 /// (doesn’t contain a `.hg` sub-directory).
22 pub fn for_path(root: impl Into<PathBuf>) -> Self {
23 let working_directory = root.into();
24 let dot_hg = working_directory.join(".hg");
25 Self {
26 store: dot_hg.join("store"),
27 dot_hg,
28 working_directory,
29 }
30 }
31
32 pub fn find() -> Result<Self, FindRootError> {
33 find_root().map(Self::for_path)
34 }
35
36 pub fn check_requirements(
37 &self,
38 ) -> Result<(), requirements::RequirementsError> {
39 requirements::check(self)
40 }
41
42 pub fn working_directory_path(&self) -> &Path {
43 &self.working_directory
44 }
45
46 /// For accessing repository files (in `.hg`), except for the store
47 /// (`.hg/store`).
48 pub(crate) fn hg_vfs(&self) -> Vfs<'_> {
49 Vfs { base: &self.dot_hg }
50 }
51
52 /// For accessing repository store files (in `.hg/store`)
53 pub(crate) fn store_vfs(&self) -> Vfs<'_> {
54 Vfs { base: &self.store }
55 }
56
57 /// For accessing the working copy
58
59 // The undescore prefix silences the "never used" warning. Remove before
60 // using.
61 pub(crate) fn _working_directory_vfs(&self) -> Vfs<'_> {
62 Vfs {
63 base: &self.working_directory,
64 }
65 }
66 }
67
68 impl Vfs<'_> {
69 pub(crate) fn read(
70 &self,
71 relative_path: impl AsRef<Path>,
72 ) -> std::io::Result<Vec<u8>> {
73 std::fs::read(self.base.join(relative_path))
74 }
75
76 pub(crate) fn open(
77 &self,
78 relative_path: impl AsRef<Path>,
79 ) -> std::io::Result<std::fs::File> {
80 std::fs::File::open(self.base.join(relative_path))
81 }
82
83 pub(crate) fn mmap_open(
84 &self,
85 relative_path: impl AsRef<Path>,
86 ) -> std::io::Result<Mmap> {
87 let file = self.open(relative_path)?;
88 // TODO: what are the safety requirements here?
89 let mmap = unsafe { MmapOptions::new().map(&file) }?;
90 Ok(mmap)
91 }
92 }
@@ -1,194 +1,195 b''
1 1 // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net>
2 2 // and Mercurial contributors
3 3 //
4 4 // This software may be used and distributed according to the terms of the
5 5 // GNU General Public License version 2 or any later version.
6 6 mod ancestors;
7 7 pub mod dagops;
8 8 pub use ancestors::{AncestorsIterator, LazyAncestors, MissingAncestors};
9 9 mod dirstate;
10 10 pub mod discovery;
11 11 pub mod requirements;
12 12 pub mod testing; // unconditionally built, for use from integration tests
13 13 pub use dirstate::{
14 14 dirs_multiset::{DirsMultiset, DirsMultisetIter},
15 15 dirstate_map::DirstateMap,
16 16 parsers::{pack_dirstate, parse_dirstate, PARENT_SIZE},
17 17 status::{
18 18 status, BadMatch, BadType, DirstateStatus, StatusError, StatusOptions,
19 19 },
20 20 CopyMap, CopyMapIter, DirstateEntry, DirstateParents, EntryState,
21 21 StateMap, StateMapIter,
22 22 };
23 23 pub mod copy_tracing;
24 24 mod filepatterns;
25 25 pub mod matchers;
26 pub mod repo;
26 27 pub mod revlog;
27 28 pub use revlog::*;
28 29 pub mod operations;
29 30 pub mod utils;
30 31
31 32 // Remove this to see (potential) non-artificial compile failures. MacOS
32 33 // *should* compile, but fail to compile tests for example as of 2020-03-06
33 34 #[cfg(not(target_os = "linux"))]
34 35 compile_error!(
35 36 "`hg-core` has only been tested on Linux and will most \
36 37 likely not behave correctly on other platforms."
37 38 );
38 39
39 40 use crate::utils::hg_path::{HgPathBuf, HgPathError};
40 41 pub use filepatterns::{
41 42 parse_pattern_syntax, read_pattern_file, IgnorePattern,
42 43 PatternFileWarning, PatternSyntax,
43 44 };
44 45 use std::collections::HashMap;
45 46 use twox_hash::RandomXxHashBuilder64;
46 47
47 48 /// This is a contract between the `micro-timer` crate and us, to expose
48 49 /// the `log` crate as `crate::log`.
49 50 use log;
50 51
51 52 pub type LineNumber = usize;
52 53
53 54 /// Rust's default hasher is too slow because it tries to prevent collision
54 55 /// attacks. We are not concerned about those: if an ill-minded person has
55 56 /// write access to your repository, you have other issues.
56 57 pub type FastHashMap<K, V> = HashMap<K, V, RandomXxHashBuilder64>;
57 58
58 59 #[derive(Clone, Debug, PartialEq)]
59 60 pub enum DirstateParseError {
60 61 TooLittleData,
61 62 Overflow,
62 63 // TODO refactor to use bytes instead of String
63 64 CorruptedEntry(String),
64 65 Damaged,
65 66 }
66 67
67 68 impl From<std::io::Error> for DirstateParseError {
68 69 fn from(e: std::io::Error) -> Self {
69 70 DirstateParseError::CorruptedEntry(e.to_string())
70 71 }
71 72 }
72 73
73 74 impl ToString for DirstateParseError {
74 75 fn to_string(&self) -> String {
75 76 use crate::DirstateParseError::*;
76 77 match self {
77 78 TooLittleData => "Too little data for dirstate.".to_string(),
78 79 Overflow => "Overflow in dirstate.".to_string(),
79 80 CorruptedEntry(e) => format!("Corrupted entry: {:?}.", e),
80 81 Damaged => "Dirstate appears to be damaged.".to_string(),
81 82 }
82 83 }
83 84 }
84 85
85 86 #[derive(Debug, PartialEq)]
86 87 pub enum DirstatePackError {
87 88 CorruptedEntry(String),
88 89 CorruptedParent,
89 90 BadSize(usize, usize),
90 91 }
91 92
92 93 impl From<std::io::Error> for DirstatePackError {
93 94 fn from(e: std::io::Error) -> Self {
94 95 DirstatePackError::CorruptedEntry(e.to_string())
95 96 }
96 97 }
97 98 #[derive(Debug, PartialEq)]
98 99 pub enum DirstateMapError {
99 100 PathNotFound(HgPathBuf),
100 101 EmptyPath,
101 102 InvalidPath(HgPathError),
102 103 }
103 104
104 105 impl ToString for DirstateMapError {
105 106 fn to_string(&self) -> String {
106 107 match self {
107 108 DirstateMapError::PathNotFound(_) => {
108 109 "expected a value, found none".to_string()
109 110 }
110 111 DirstateMapError::EmptyPath => "Overflow in dirstate.".to_string(),
111 112 DirstateMapError::InvalidPath(e) => e.to_string(),
112 113 }
113 114 }
114 115 }
115 116
116 117 #[derive(Debug)]
117 118 pub enum DirstateError {
118 119 Parse(DirstateParseError),
119 120 Pack(DirstatePackError),
120 121 Map(DirstateMapError),
121 122 IO(std::io::Error),
122 123 }
123 124
124 125 impl From<DirstateParseError> for DirstateError {
125 126 fn from(e: DirstateParseError) -> Self {
126 127 DirstateError::Parse(e)
127 128 }
128 129 }
129 130
130 131 impl From<DirstatePackError> for DirstateError {
131 132 fn from(e: DirstatePackError) -> Self {
132 133 DirstateError::Pack(e)
133 134 }
134 135 }
135 136
136 137 #[derive(Debug)]
137 138 pub enum PatternError {
138 139 Path(HgPathError),
139 140 UnsupportedSyntax(String),
140 141 UnsupportedSyntaxInFile(String, String, usize),
141 142 TooLong(usize),
142 143 IO(std::io::Error),
143 144 /// Needed a pattern that can be turned into a regex but got one that
144 145 /// can't. This should only happen through programmer error.
145 146 NonRegexPattern(IgnorePattern),
146 147 }
147 148
148 149 impl ToString for PatternError {
149 150 fn to_string(&self) -> String {
150 151 match self {
151 152 PatternError::UnsupportedSyntax(syntax) => {
152 153 format!("Unsupported syntax {}", syntax)
153 154 }
154 155 PatternError::UnsupportedSyntaxInFile(syntax, file_path, line) => {
155 156 format!(
156 157 "{}:{}: unsupported syntax {}",
157 158 file_path, line, syntax
158 159 )
159 160 }
160 161 PatternError::TooLong(size) => {
161 162 format!("matcher pattern is too long ({} bytes)", size)
162 163 }
163 164 PatternError::IO(e) => e.to_string(),
164 165 PatternError::Path(e) => e.to_string(),
165 166 PatternError::NonRegexPattern(pattern) => {
166 167 format!("'{:?}' cannot be turned into a regex", pattern)
167 168 }
168 169 }
169 170 }
170 171 }
171 172
172 173 impl From<DirstateMapError> for DirstateError {
173 174 fn from(e: DirstateMapError) -> Self {
174 175 DirstateError::Map(e)
175 176 }
176 177 }
177 178
178 179 impl From<std::io::Error> for DirstateError {
179 180 fn from(e: std::io::Error) -> Self {
180 181 DirstateError::IO(e)
181 182 }
182 183 }
183 184
184 185 impl From<std::io::Error> for PatternError {
185 186 fn from(e: std::io::Error) -> Self {
186 187 PatternError::IO(e)
187 188 }
188 189 }
189 190
190 191 impl From<HgPathError> for PatternError {
191 192 fn from(e: HgPathError) -> Self {
192 193 PatternError::Path(e)
193 194 }
194 195 }
@@ -1,139 +1,135 b''
1 1 // list_tracked_files.rs
2 2 //
3 3 // Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 use std::convert::From;
9 use std::path::{Path, PathBuf};
9 use std::path::PathBuf;
10 10
11 use crate::repo::Repo;
11 12 use crate::revlog::changelog::Changelog;
12 13 use crate::revlog::manifest::Manifest;
13 14 use crate::revlog::path_encode::path_encode;
14 15 use crate::revlog::revlog::Revlog;
15 16 use crate::revlog::revlog::RevlogError;
16 17 use crate::revlog::Node;
17 18 use crate::revlog::NodePrefix;
18 19 use crate::revlog::Revision;
19 20 use crate::utils::files::get_path_from_bytes;
20 21 use crate::utils::hg_path::{HgPath, HgPathBuf};
21 22
22 23 const METADATA_DELIMITER: [u8; 2] = [b'\x01', b'\n'];
23 24
24 25 /// Kind of error encountered by `CatRev`
25 26 #[derive(Debug)]
26 27 pub enum CatRevErrorKind {
27 28 /// Error when reading a `revlog` file.
28 29 IoError(std::io::Error),
29 30 /// The revision has not been found.
30 31 InvalidRevision,
31 32 /// Found more than one revision whose ID match the requested prefix
32 33 AmbiguousPrefix,
33 34 /// A `revlog` file is corrupted.
34 35 CorruptedRevlog,
35 36 /// The `revlog` format version is not supported.
36 37 UnsuportedRevlogVersion(u16),
37 38 /// The `revlog` data format is not supported.
38 39 UnknowRevlogDataFormat(u8),
39 40 }
40 41
41 42 /// A `CatRev` error
42 43 #[derive(Debug)]
43 44 pub struct CatRevError {
44 45 /// Kind of error encountered by `CatRev`
45 46 pub kind: CatRevErrorKind,
46 47 }
47 48
48 49 impl From<CatRevErrorKind> for CatRevError {
49 50 fn from(kind: CatRevErrorKind) -> Self {
50 51 CatRevError { kind }
51 52 }
52 53 }
53 54
54 55 impl From<RevlogError> for CatRevError {
55 56 fn from(err: RevlogError) -> Self {
56 57 match err {
57 58 RevlogError::IoError(err) => CatRevErrorKind::IoError(err),
58 59 RevlogError::UnsuportedVersion(version) => {
59 60 CatRevErrorKind::UnsuportedRevlogVersion(version)
60 61 }
61 62 RevlogError::InvalidRevision => CatRevErrorKind::InvalidRevision,
62 63 RevlogError::AmbiguousPrefix => CatRevErrorKind::AmbiguousPrefix,
63 64 RevlogError::Corrupted => CatRevErrorKind::CorruptedRevlog,
64 65 RevlogError::UnknowDataFormat(format) => {
65 66 CatRevErrorKind::UnknowRevlogDataFormat(format)
66 67 }
67 68 }
68 69 .into()
69 70 }
70 71 }
71 72
72 73 /// List files under Mercurial control at a given revision.
73 74 ///
74 75 /// * `root`: Repository root
75 76 /// * `rev`: The revision to cat the files from.
76 77 /// * `files`: The files to output.
77 78 pub fn cat(
78 root: &Path,
79 repo: &Repo,
79 80 rev: &str,
80 81 files: &[HgPathBuf],
81 82 ) -> Result<Vec<u8>, CatRevError> {
82 let changelog = Changelog::open(&root)?;
83 let manifest = Manifest::open(&root)?;
83 let changelog = Changelog::open(repo)?;
84 let manifest = Manifest::open(repo)?;
84 85
85 86 let changelog_entry = match rev.parse::<Revision>() {
86 87 Ok(rev) => changelog.get_rev(rev)?,
87 88 _ => {
88 89 let changelog_node = NodePrefix::from_hex(&rev)
89 90 .map_err(|_| CatRevErrorKind::InvalidRevision)?;
90 91 changelog.get_node(changelog_node.borrow())?
91 92 }
92 93 };
93 94 let manifest_node = Node::from_hex(&changelog_entry.manifest_node()?)
94 95 .map_err(|_| CatRevErrorKind::CorruptedRevlog)?;
95 96
96 97 let manifest_entry = manifest.get_node((&manifest_node).into())?;
97 98 let mut bytes = vec![];
98 99
99 100 for (manifest_file, node_bytes) in manifest_entry.files_with_nodes() {
100 101 for cat_file in files.iter() {
101 102 if cat_file.as_bytes() == manifest_file.as_bytes() {
102 let index_path = store_path(root, manifest_file, b".i");
103 let data_path = store_path(root, manifest_file, b".d");
103 let index_path = store_path(manifest_file, b".i");
104 let data_path = store_path(manifest_file, b".d");
104 105
105 let file_log = Revlog::open(&index_path, Some(&data_path))?;
106 let file_log =
107 Revlog::open(repo, &index_path, Some(&data_path))?;
106 108 let file_node = Node::from_hex(node_bytes)
107 109 .map_err(|_| CatRevErrorKind::CorruptedRevlog)?;
108 110 let file_rev = file_log.get_node_rev((&file_node).into())?;
109 111 let data = file_log.get_rev_data(file_rev)?;
110 112 if data.starts_with(&METADATA_DELIMITER) {
111 113 let end_delimiter_position = data
112 114 [METADATA_DELIMITER.len()..]
113 115 .windows(METADATA_DELIMITER.len())
114 116 .position(|bytes| bytes == METADATA_DELIMITER);
115 117 if let Some(position) = end_delimiter_position {
116 118 let offset = METADATA_DELIMITER.len() * 2;
117 119 bytes.extend(data[position + offset..].iter());
118 120 }
119 121 } else {
120 122 bytes.extend(data);
121 123 }
122 124 }
123 125 }
124 126 }
125 127
126 128 Ok(bytes)
127 129 }
128 130
129 fn store_path(root: &Path, hg_path: &HgPath, suffix: &[u8]) -> PathBuf {
131 fn store_path(hg_path: &HgPath, suffix: &[u8]) -> PathBuf {
130 132 let encoded_bytes =
131 133 path_encode(&[b"data/", hg_path.as_bytes(), suffix].concat());
132 [
133 root,
134 &Path::new(".hg/store/"),
135 get_path_from_bytes(&encoded_bytes),
136 ]
137 .iter()
138 .collect()
134 get_path_from_bytes(&encoded_bytes).into()
139 135 }
@@ -1,103 +1,102 b''
1 1 // debugdata.rs
2 2 //
3 3 // Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 use std::path::Path;
9
8 use crate::repo::Repo;
10 9 use crate::revlog::revlog::{Revlog, RevlogError};
11 10 use crate::revlog::NodePrefix;
12 11 use crate::revlog::Revision;
13 12
14 13 /// Kind of data to debug
15 14 #[derive(Debug, Copy, Clone)]
16 15 pub enum DebugDataKind {
17 16 Changelog,
18 17 Manifest,
19 18 }
20 19
21 20 /// Kind of error encountered by DebugData
22 21 #[derive(Debug)]
23 22 pub enum DebugDataErrorKind {
24 23 /// Error when reading a `revlog` file.
25 24 IoError(std::io::Error),
26 25 /// The revision has not been found.
27 26 InvalidRevision,
28 27 /// Found more than one revision whose ID match the requested prefix
29 28 AmbiguousPrefix,
30 29 /// A `revlog` file is corrupted.
31 30 CorruptedRevlog,
32 31 /// The `revlog` format version is not supported.
33 32 UnsuportedRevlogVersion(u16),
34 33 /// The `revlog` data format is not supported.
35 34 UnknowRevlogDataFormat(u8),
36 35 }
37 36
38 37 /// A DebugData error
39 38 #[derive(Debug)]
40 39 pub struct DebugDataError {
41 40 /// Kind of error encountered by DebugData
42 41 pub kind: DebugDataErrorKind,
43 42 }
44 43
45 44 impl From<DebugDataErrorKind> for DebugDataError {
46 45 fn from(kind: DebugDataErrorKind) -> Self {
47 46 DebugDataError { kind }
48 47 }
49 48 }
50 49
51 50 impl From<std::io::Error> for DebugDataError {
52 51 fn from(err: std::io::Error) -> Self {
53 52 let kind = DebugDataErrorKind::IoError(err);
54 53 DebugDataError { kind }
55 54 }
56 55 }
57 56
58 57 impl From<RevlogError> for DebugDataError {
59 58 fn from(err: RevlogError) -> Self {
60 59 match err {
61 60 RevlogError::IoError(err) => DebugDataErrorKind::IoError(err),
62 61 RevlogError::UnsuportedVersion(version) => {
63 62 DebugDataErrorKind::UnsuportedRevlogVersion(version)
64 63 }
65 64 RevlogError::InvalidRevision => {
66 65 DebugDataErrorKind::InvalidRevision
67 66 }
68 67 RevlogError::AmbiguousPrefix => {
69 68 DebugDataErrorKind::AmbiguousPrefix
70 69 }
71 70 RevlogError::Corrupted => DebugDataErrorKind::CorruptedRevlog,
72 71 RevlogError::UnknowDataFormat(format) => {
73 72 DebugDataErrorKind::UnknowRevlogDataFormat(format)
74 73 }
75 74 }
76 75 .into()
77 76 }
78 77 }
79 78
80 79 /// Dump the contents data of a revision.
81 80 pub fn debug_data(
82 root: &Path,
81 repo: &Repo,
83 82 rev: &str,
84 83 kind: DebugDataKind,
85 84 ) -> Result<Vec<u8>, DebugDataError> {
86 85 let index_file = match kind {
87 DebugDataKind::Changelog => root.join(".hg/store/00changelog.i"),
88 DebugDataKind::Manifest => root.join(".hg/store/00manifest.i"),
86 DebugDataKind::Changelog => "00changelog.i",
87 DebugDataKind::Manifest => "00manifest.i",
89 88 };
90 let revlog = Revlog::open(&index_file, None)?;
89 let revlog = Revlog::open(repo, index_file, None)?;
91 90
92 91 let data = match rev.parse::<Revision>() {
93 92 Ok(rev) => revlog.get_rev_data(rev)?,
94 93 _ => {
95 94 let node = NodePrefix::from_hex(&rev)
96 95 .map_err(|_| DebugDataErrorKind::InvalidRevision)?;
97 96 let rev = revlog.get_node_rev(node.borrow())?;
98 97 revlog.get_rev_data(rev)?
99 98 }
100 99 };
101 100
102 101 Ok(data)
103 102 }
@@ -1,167 +1,165 b''
1 1 // list_tracked_files.rs
2 2 //
3 3 // Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 use crate::dirstate::parsers::parse_dirstate;
9 use crate::repo::Repo;
9 10 use crate::revlog::changelog::Changelog;
10 11 use crate::revlog::manifest::{Manifest, ManifestEntry};
11 12 use crate::revlog::node::{Node, NodePrefix};
12 13 use crate::revlog::revlog::RevlogError;
13 14 use crate::revlog::Revision;
14 15 use crate::utils::hg_path::HgPath;
15 16 use crate::{DirstateParseError, EntryState};
16 17 use rayon::prelude::*;
17 18 use std::convert::From;
18 use std::fs;
19 use std::path::Path;
20 19
21 20 /// Kind of error encountered by `ListDirstateTrackedFiles`
22 21 #[derive(Debug)]
23 22 pub enum ListDirstateTrackedFilesErrorKind {
24 23 /// Error when reading the `dirstate` file
25 24 IoError(std::io::Error),
26 25 /// Error when parsing the `dirstate` file
27 26 ParseError(DirstateParseError),
28 27 }
29 28
30 29 /// A `ListDirstateTrackedFiles` error
31 30 #[derive(Debug)]
32 31 pub struct ListDirstateTrackedFilesError {
33 32 /// Kind of error encountered by `ListDirstateTrackedFiles`
34 33 pub kind: ListDirstateTrackedFilesErrorKind,
35 34 }
36 35
37 36 impl From<ListDirstateTrackedFilesErrorKind>
38 37 for ListDirstateTrackedFilesError
39 38 {
40 39 fn from(kind: ListDirstateTrackedFilesErrorKind) -> Self {
41 40 ListDirstateTrackedFilesError { kind }
42 41 }
43 42 }
44 43
45 44 impl From<std::io::Error> for ListDirstateTrackedFilesError {
46 45 fn from(err: std::io::Error) -> Self {
47 46 let kind = ListDirstateTrackedFilesErrorKind::IoError(err);
48 47 ListDirstateTrackedFilesError { kind }
49 48 }
50 49 }
51 50
52 51 /// List files under Mercurial control in the working directory
53 52 /// by reading the dirstate
54 53 pub struct Dirstate {
55 54 /// The `dirstate` content.
56 55 content: Vec<u8>,
57 56 }
58 57
59 58 impl Dirstate {
60 pub fn new(root: &Path) -> Result<Self, ListDirstateTrackedFilesError> {
61 let dirstate = root.join(".hg/dirstate");
62 let content = fs::read(&dirstate)?;
59 pub fn new(repo: &Repo) -> Result<Self, ListDirstateTrackedFilesError> {
60 let content = repo.hg_vfs().read("dirstate")?;
63 61 Ok(Self { content })
64 62 }
65 63
66 64 pub fn tracked_files(
67 65 &self,
68 66 ) -> Result<Vec<&HgPath>, ListDirstateTrackedFilesError> {
69 67 let (_, entries, _) = parse_dirstate(&self.content)
70 68 .map_err(ListDirstateTrackedFilesErrorKind::ParseError)?;
71 69 let mut files: Vec<&HgPath> = entries
72 70 .into_iter()
73 71 .filter_map(|(path, entry)| match entry.state {
74 72 EntryState::Removed => None,
75 73 _ => Some(path),
76 74 })
77 75 .collect();
78 76 files.par_sort_unstable();
79 77 Ok(files)
80 78 }
81 79 }
82 80
83 81 /// Kind of error encountered by `ListRevTrackedFiles`
84 82 #[derive(Debug)]
85 83 pub enum ListRevTrackedFilesErrorKind {
86 84 /// Error when reading a `revlog` file.
87 85 IoError(std::io::Error),
88 86 /// The revision has not been found.
89 87 InvalidRevision,
90 88 /// Found more than one revision whose ID match the requested prefix
91 89 AmbiguousPrefix,
92 90 /// A `revlog` file is corrupted.
93 91 CorruptedRevlog,
94 92 /// The `revlog` format version is not supported.
95 93 UnsuportedRevlogVersion(u16),
96 94 /// The `revlog` data format is not supported.
97 95 UnknowRevlogDataFormat(u8),
98 96 }
99 97
100 98 /// A `ListRevTrackedFiles` error
101 99 #[derive(Debug)]
102 100 pub struct ListRevTrackedFilesError {
103 101 /// Kind of error encountered by `ListRevTrackedFiles`
104 102 pub kind: ListRevTrackedFilesErrorKind,
105 103 }
106 104
107 105 impl From<ListRevTrackedFilesErrorKind> for ListRevTrackedFilesError {
108 106 fn from(kind: ListRevTrackedFilesErrorKind) -> Self {
109 107 ListRevTrackedFilesError { kind }
110 108 }
111 109 }
112 110
113 111 impl From<RevlogError> for ListRevTrackedFilesError {
114 112 fn from(err: RevlogError) -> Self {
115 113 match err {
116 114 RevlogError::IoError(err) => {
117 115 ListRevTrackedFilesErrorKind::IoError(err)
118 116 }
119 117 RevlogError::UnsuportedVersion(version) => {
120 118 ListRevTrackedFilesErrorKind::UnsuportedRevlogVersion(version)
121 119 }
122 120 RevlogError::InvalidRevision => {
123 121 ListRevTrackedFilesErrorKind::InvalidRevision
124 122 }
125 123 RevlogError::AmbiguousPrefix => {
126 124 ListRevTrackedFilesErrorKind::AmbiguousPrefix
127 125 }
128 126 RevlogError::Corrupted => {
129 127 ListRevTrackedFilesErrorKind::CorruptedRevlog
130 128 }
131 129 RevlogError::UnknowDataFormat(format) => {
132 130 ListRevTrackedFilesErrorKind::UnknowRevlogDataFormat(format)
133 131 }
134 132 }
135 133 .into()
136 134 }
137 135 }
138 136
139 137 /// List files under Mercurial control at a given revision.
140 138 pub fn list_rev_tracked_files(
141 root: &Path,
139 repo: &Repo,
142 140 rev: &str,
143 141 ) -> Result<FilesForRev, ListRevTrackedFilesError> {
144 let changelog = Changelog::open(root)?;
145 let manifest = Manifest::open(root)?;
142 let changelog = Changelog::open(repo)?;
143 let manifest = Manifest::open(repo)?;
146 144
147 145 let changelog_entry = match rev.parse::<Revision>() {
148 146 Ok(rev) => changelog.get_rev(rev)?,
149 147 _ => {
150 148 let changelog_node = NodePrefix::from_hex(&rev)
151 149 .or(Err(ListRevTrackedFilesErrorKind::InvalidRevision))?;
152 150 changelog.get_node(changelog_node.borrow())?
153 151 }
154 152 };
155 153 let manifest_node = Node::from_hex(&changelog_entry.manifest_node()?)
156 154 .or(Err(ListRevTrackedFilesErrorKind::CorruptedRevlog))?;
157 155 let manifest_entry = manifest.get_node((&manifest_node).into())?;
158 156 Ok(FilesForRev(manifest_entry))
159 157 }
160 158
161 159 pub struct FilesForRev(ManifestEntry);
162 160
163 161 impl FilesForRev {
164 162 pub fn iter(&self) -> impl Iterator<Item = &HgPath> {
165 163 self.0.files()
166 164 }
167 165 }
@@ -1,76 +1,76 b''
1 use crate::repo::Repo;
1 2 use std::io;
2 use std::path::Path;
3 3
4 4 #[derive(Debug)]
5 5 pub enum RequirementsError {
6 6 // TODO: include a path?
7 7 Io(io::Error),
8 8 /// The `requires` file is corrupted
9 9 Corrupted,
10 10 /// The repository requires a feature that we don't support
11 11 Unsupported {
12 12 feature: String,
13 13 },
14 14 }
15 15
16 16 fn parse(bytes: &[u8]) -> Result<Vec<String>, ()> {
17 17 // The Python code reading this file uses `str.splitlines`
18 18 // which looks for a number of line separators (even including a couple of
19 19 // non-ASCII ones), but Python code writing it always uses `\n`.
20 20 let lines = bytes.split(|&byte| byte == b'\n');
21 21
22 22 lines
23 23 .filter(|line| !line.is_empty())
24 24 .map(|line| {
25 25 // Python uses Unicode `str.isalnum` but feature names are all
26 26 // ASCII
27 27 if line[0].is_ascii_alphanumeric() && line.is_ascii() {
28 28 Ok(String::from_utf8(line.into()).unwrap())
29 29 } else {
30 30 Err(())
31 31 }
32 32 })
33 33 .collect()
34 34 }
35 35
36 pub fn load(repo_root: &Path) -> Result<Vec<String>, RequirementsError> {
37 match std::fs::read(repo_root.join(".hg").join("requires")) {
36 pub fn load(repo: &Repo) -> Result<Vec<String>, RequirementsError> {
37 match repo.hg_vfs().read("requires") {
38 38 Ok(bytes) => parse(&bytes).map_err(|()| RequirementsError::Corrupted),
39 39
40 40 // Treat a missing file the same as an empty file.
41 41 // From `mercurial/localrepo.py`:
42 42 // > requires file contains a newline-delimited list of
43 43 // > features/capabilities the opener (us) must have in order to use
44 44 // > the repository. This file was introduced in Mercurial 0.9.2,
45 45 // > which means very old repositories may not have one. We assume
46 46 // > a missing file translates to no requirements.
47 47 Err(error) if error.kind() == std::io::ErrorKind::NotFound => {
48 48 Ok(Vec::new())
49 49 }
50 50
51 51 Err(error) => Err(RequirementsError::Io(error))?,
52 52 }
53 53 }
54 54
55 pub fn check(repo_root: &Path) -> Result<(), RequirementsError> {
56 for feature in load(repo_root)? {
55 pub fn check(repo: &Repo) -> Result<(), RequirementsError> {
56 for feature in load(repo)? {
57 57 if !SUPPORTED.contains(&&*feature) {
58 58 return Err(RequirementsError::Unsupported { feature });
59 59 }
60 60 }
61 61 Ok(())
62 62 }
63 63
64 64 // TODO: set this to actually-supported features
65 65 const SUPPORTED: &[&str] = &[
66 66 "dotencode",
67 67 "fncache",
68 68 "generaldelta",
69 69 "revlogv1",
70 70 "sparserevlog",
71 71 "store",
72 72 // As of this writing everything rhg does is read-only.
73 73 // When it starts writing to the repository, it’ll need to either keep the
74 74 // persistent nodemap up to date or remove this entry:
75 75 "persistent-nodemap",
76 76 ];
@@ -1,59 +1,58 b''
1 use crate::repo::Repo;
1 2 use crate::revlog::revlog::{Revlog, RevlogError};
2 3 use crate::revlog::NodePrefixRef;
3 4 use crate::revlog::Revision;
4 use std::path::Path;
5 5
6 6 /// A specialized `Revlog` to work with `changelog` data format.
7 7 pub struct Changelog {
8 8 /// The generic `revlog` format.
9 9 revlog: Revlog,
10 10 }
11 11
12 12 impl Changelog {
13 13 /// Open the `changelog` of a repository given by its root.
14 pub fn open(root: &Path) -> Result<Self, RevlogError> {
15 let index_file = root.join(".hg/store/00changelog.i");
16 let revlog = Revlog::open(&index_file, None)?;
14 pub fn open(repo: &Repo) -> Result<Self, RevlogError> {
15 let revlog = Revlog::open(repo, "00changelog.i", None)?;
17 16 Ok(Self { revlog })
18 17 }
19 18
20 19 /// Return the `ChangelogEntry` a given node id.
21 20 pub fn get_node(
22 21 &self,
23 22 node: NodePrefixRef,
24 23 ) -> Result<ChangelogEntry, RevlogError> {
25 24 let rev = self.revlog.get_node_rev(node)?;
26 25 self.get_rev(rev)
27 26 }
28 27
29 28 /// Return the `ChangelogEntry` of a given node revision.
30 29 pub fn get_rev(
31 30 &self,
32 31 rev: Revision,
33 32 ) -> Result<ChangelogEntry, RevlogError> {
34 33 let bytes = self.revlog.get_rev_data(rev)?;
35 34 Ok(ChangelogEntry { bytes })
36 35 }
37 36 }
38 37
39 38 /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
40 39 #[derive(Debug)]
41 40 pub struct ChangelogEntry {
42 41 /// The data bytes of the `changelog` entry.
43 42 bytes: Vec<u8>,
44 43 }
45 44
46 45 impl ChangelogEntry {
47 46 /// Return an iterator over the lines of the entry.
48 47 pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
49 48 self.bytes
50 49 .split(|b| b == &b'\n')
51 50 .filter(|line| !line.is_empty())
52 51 }
53 52
54 53 /// Return the node id of the `manifest` referenced by this `changelog`
55 54 /// entry.
56 55 pub fn manifest_node(&self) -> Result<&[u8], RevlogError> {
57 56 self.lines().next().ok_or(RevlogError::Corrupted)
58 57 }
59 58 }
@@ -1,77 +1,76 b''
1 use crate::repo::Repo;
1 2 use crate::revlog::revlog::{Revlog, RevlogError};
2 3 use crate::revlog::NodePrefixRef;
3 4 use crate::revlog::Revision;
4 5 use crate::utils::hg_path::HgPath;
5 use std::path::Path;
6 6
7 7 /// A specialized `Revlog` to work with `manifest` data format.
8 8 pub struct Manifest {
9 9 /// The generic `revlog` format.
10 10 revlog: Revlog,
11 11 }
12 12
13 13 impl Manifest {
14 14 /// Open the `manifest` of a repository given by its root.
15 pub fn open(root: &Path) -> Result<Self, RevlogError> {
16 let index_file = root.join(".hg/store/00manifest.i");
17 let revlog = Revlog::open(&index_file, None)?;
15 pub fn open(repo: &Repo) -> Result<Self, RevlogError> {
16 let revlog = Revlog::open(repo, "00manifest.i", None)?;
18 17 Ok(Self { revlog })
19 18 }
20 19
21 20 /// Return the `ManifestEntry` of a given node id.
22 21 pub fn get_node(
23 22 &self,
24 23 node: NodePrefixRef,
25 24 ) -> Result<ManifestEntry, RevlogError> {
26 25 let rev = self.revlog.get_node_rev(node)?;
27 26 self.get_rev(rev)
28 27 }
29 28
30 29 /// Return the `ManifestEntry` of a given node revision.
31 30 pub fn get_rev(
32 31 &self,
33 32 rev: Revision,
34 33 ) -> Result<ManifestEntry, RevlogError> {
35 34 let bytes = self.revlog.get_rev_data(rev)?;
36 35 Ok(ManifestEntry { bytes })
37 36 }
38 37 }
39 38
40 39 /// `Manifest` entry which knows how to interpret the `manifest` data bytes.
41 40 #[derive(Debug)]
42 41 pub struct ManifestEntry {
43 42 bytes: Vec<u8>,
44 43 }
45 44
46 45 impl ManifestEntry {
47 46 /// Return an iterator over the lines of the entry.
48 47 pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
49 48 self.bytes
50 49 .split(|b| b == &b'\n')
51 50 .filter(|line| !line.is_empty())
52 51 }
53 52
54 53 /// Return an iterator over the files of the entry.
55 54 pub fn files(&self) -> impl Iterator<Item = &HgPath> {
56 55 self.lines().filter(|line| !line.is_empty()).map(|line| {
57 56 let pos = line
58 57 .iter()
59 58 .position(|x| x == &b'\0')
60 59 .expect("manifest line should contain \\0");
61 60 HgPath::new(&line[..pos])
62 61 })
63 62 }
64 63
65 64 /// Return an iterator over the files of the entry.
66 65 pub fn files_with_nodes(&self) -> impl Iterator<Item = (&HgPath, &[u8])> {
67 66 self.lines().filter(|line| !line.is_empty()).map(|line| {
68 67 let pos = line
69 68 .iter()
70 69 .position(|x| x == &b'\0')
71 70 .expect("manifest line should contain \\0");
72 71 let hash_start = pos + 1;
73 72 let hash_end = hash_start + 40;
74 73 (HgPath::new(&line[..pos]), &line[hash_start..hash_end])
75 74 })
76 75 }
77 76 }
@@ -1,119 +1,121 b''
1 1 use memmap::Mmap;
2 2 use std::convert::TryInto;
3 3 use std::path::{Path, PathBuf};
4 4
5 use super::revlog::{mmap_open, RevlogError};
5 use super::revlog::RevlogError;
6 use crate::repo::Repo;
6 7 use crate::utils::strip_suffix;
7 8
8 9 const ONDISK_VERSION: u8 = 1;
9 10
10 11 pub(super) struct NodeMapDocket {
11 12 pub data_length: usize,
12 13 // TODO: keep here more of the data from `parse()` when we need it
13 14 }
14 15
15 16 impl NodeMapDocket {
16 17 /// Return `Ok(None)` when the caller should proceed without a persistent
17 18 /// nodemap:
18 19 ///
19 20 /// * This revlog does not have a `.n` docket file (it is not generated for
20 21 /// small revlogs), or
21 22 /// * The docket has an unsupported version number (repositories created by
22 23 /// later hg, maybe that should be a requirement instead?), or
23 24 /// * The docket file points to a missing (likely deleted) data file (this
24 25 /// can happen in a rare race condition).
25 26 pub fn read_from_file(
27 repo: &Repo,
26 28 index_path: &Path,
27 29 ) -> Result<Option<(Self, Mmap)>, RevlogError> {
28 30 let docket_path = index_path.with_extension("n");
29 let docket_bytes = match std::fs::read(&docket_path) {
31 let docket_bytes = match repo.store_vfs().read(&docket_path) {
30 32 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
31 33 return Ok(None)
32 34 }
33 35 Err(e) => return Err(RevlogError::IoError(e)),
34 36 Ok(bytes) => bytes,
35 37 };
36 38
37 39 let mut input = if let Some((&ONDISK_VERSION, rest)) =
38 40 docket_bytes.split_first()
39 41 {
40 42 rest
41 43 } else {
42 44 return Ok(None);
43 45 };
44 46 let input = &mut input;
45 47
46 48 let uid_size = read_u8(input)? as usize;
47 49 let _tip_rev = read_be_u64(input)?;
48 50 // TODO: do we care about overflow for 4 GB+ nodemap files on 32-bit
49 51 // systems?
50 52 let data_length = read_be_u64(input)? as usize;
51 53 let _data_unused = read_be_u64(input)?;
52 54 let tip_node_size = read_be_u64(input)? as usize;
53 55 let uid = read_bytes(input, uid_size)?;
54 56 let _tip_node = read_bytes(input, tip_node_size)?;
55 57
56 58 let uid =
57 59 std::str::from_utf8(uid).map_err(|_| RevlogError::Corrupted)?;
58 60 let docket = NodeMapDocket { data_length };
59 61
60 62 let data_path = rawdata_path(&docket_path, uid);
61 63 // TODO: use `std::fs::read` here when the `persistent-nodemap.mmap`
62 64 // config is false?
63 match mmap_open(&data_path) {
65 match repo.store_vfs().mmap_open(&data_path) {
64 66 Ok(mmap) => {
65 67 if mmap.len() >= data_length {
66 68 Ok(Some((docket, mmap)))
67 69 } else {
68 70 Err(RevlogError::Corrupted)
69 71 }
70 72 }
71 73 Err(error) => {
72 74 if error.kind() == std::io::ErrorKind::NotFound {
73 75 Ok(None)
74 76 } else {
75 77 Err(RevlogError::IoError(error))
76 78 }
77 79 }
78 80 }
79 81 }
80 82 }
81 83
82 84 fn read_bytes<'a>(
83 85 input: &mut &'a [u8],
84 86 count: usize,
85 87 ) -> Result<&'a [u8], RevlogError> {
86 88 if let Some(start) = input.get(..count) {
87 89 *input = &input[count..];
88 90 Ok(start)
89 91 } else {
90 92 Err(RevlogError::Corrupted)
91 93 }
92 94 }
93 95
94 96 fn read_u8<'a>(input: &mut &[u8]) -> Result<u8, RevlogError> {
95 97 Ok(read_bytes(input, 1)?[0])
96 98 }
97 99
98 100 fn read_be_u64<'a>(input: &mut &[u8]) -> Result<u64, RevlogError> {
99 101 let array = read_bytes(input, std::mem::size_of::<u64>())?
100 102 .try_into()
101 103 .unwrap();
102 104 Ok(u64::from_be_bytes(array))
103 105 }
104 106
105 107 fn rawdata_path(docket_path: &Path, uid: &str) -> PathBuf {
106 108 let docket_name = docket_path
107 109 .file_name()
108 110 .expect("expected a base name")
109 111 .to_str()
110 112 .expect("expected an ASCII file name in the store");
111 113 let prefix = strip_suffix(docket_name, ".n.a")
112 114 .or_else(|| strip_suffix(docket_name, ".n"))
113 115 .expect("expected docket path in .n or .n.a");
114 116 let name = format!("{}-{}.nd", prefix, uid);
115 117 docket_path
116 118 .parent()
117 119 .expect("expected a non-root path")
118 120 .join(name)
119 121 }
@@ -1,377 +1,376 b''
1 1 use std::borrow::Cow;
2 use std::fs::File;
3 2 use std::io::Read;
4 3 use std::ops::Deref;
5 4 use std::path::Path;
6 5
7 6 use byteorder::{BigEndian, ByteOrder};
8 7 use crypto::digest::Digest;
9 8 use crypto::sha1::Sha1;
10 9 use flate2::read::ZlibDecoder;
11 use memmap::{Mmap, MmapOptions};
12 10 use micro_timer::timed;
13 11 use zstd;
14 12
15 13 use super::index::Index;
16 14 use super::node::{NodePrefixRef, NODE_BYTES_LENGTH, NULL_NODE};
17 15 use super::nodemap;
18 16 use super::nodemap::NodeMap;
19 17 use super::nodemap_docket::NodeMapDocket;
20 18 use super::patch;
19 use crate::repo::Repo;
21 20 use crate::revlog::Revision;
22 21
23 22 pub enum RevlogError {
24 23 IoError(std::io::Error),
25 24 UnsuportedVersion(u16),
26 25 InvalidRevision,
27 26 /// Found more than one entry whose ID match the requested prefix
28 27 AmbiguousPrefix,
29 28 Corrupted,
30 29 UnknowDataFormat(u8),
31 30 }
32 31
33 pub(super) fn mmap_open(path: &Path) -> Result<Mmap, std::io::Error> {
34 let file = File::open(path)?;
35 let mmap = unsafe { MmapOptions::new().map(&file) }?;
36 Ok(mmap)
37 }
38
39 32 /// Read only implementation of revlog.
40 33 pub struct Revlog {
41 34 /// When index and data are not interleaved: bytes of the revlog index.
42 35 /// When index and data are interleaved: bytes of the revlog index and
43 36 /// data.
44 37 index: Index,
45 38 /// When index and data are not interleaved: bytes of the revlog data
46 39 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
47 40 /// When present on disk: the persistent nodemap for this revlog
48 41 nodemap: Option<nodemap::NodeTree>,
49 42 }
50 43
51 44 impl Revlog {
52 45 /// Open a revlog index file.
53 46 ///
54 47 /// It will also open the associated data file if index and data are not
55 48 /// interleaved.
56 49 #[timed]
57 50 pub fn open(
58 index_path: &Path,
51 repo: &Repo,
52 index_path: impl AsRef<Path>,
59 53 data_path: Option<&Path>,
60 54 ) -> Result<Self, RevlogError> {
61 let index_mmap =
62 mmap_open(&index_path).map_err(RevlogError::IoError)?;
55 let index_path = index_path.as_ref();
56 let index_mmap = repo
57 .store_vfs()
58 .mmap_open(&index_path)
59 .map_err(RevlogError::IoError)?;
63 60
64 61 let version = get_version(&index_mmap);
65 62 if version != 1 {
66 63 return Err(RevlogError::UnsuportedVersion(version));
67 64 }
68 65
69 66 let index = Index::new(Box::new(index_mmap))?;
70 67
71 68 let default_data_path = index_path.with_extension("d");
72 69
73 70 // type annotation required
74 71 // won't recognize Mmap as Deref<Target = [u8]>
75 72 let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
76 73 if index.is_inline() {
77 74 None
78 75 } else {
79 76 let data_path = data_path.unwrap_or(&default_data_path);
80 let data_mmap =
81 mmap_open(data_path).map_err(RevlogError::IoError)?;
77 let data_mmap = repo
78 .store_vfs()
79 .mmap_open(data_path)
80 .map_err(RevlogError::IoError)?;
82 81 Some(Box::new(data_mmap))
83 82 };
84 83
85 let nodemap = NodeMapDocket::read_from_file(index_path)?.map(
84 let nodemap = NodeMapDocket::read_from_file(repo, index_path)?.map(
86 85 |(docket, data)| {
87 86 nodemap::NodeTree::load_bytes(
88 87 Box::new(data),
89 88 docket.data_length,
90 89 )
91 90 },
92 91 );
93 92
94 93 Ok(Revlog {
95 94 index,
96 95 data_bytes,
97 96 nodemap,
98 97 })
99 98 }
100 99
101 100 /// Return number of entries of the `Revlog`.
102 101 pub fn len(&self) -> usize {
103 102 self.index.len()
104 103 }
105 104
106 105 /// Returns `true` if the `Revlog` has zero `entries`.
107 106 pub fn is_empty(&self) -> bool {
108 107 self.index.is_empty()
109 108 }
110 109
111 110 /// Return the full data associated to a node.
112 111 #[timed]
113 112 pub fn get_node_rev(
114 113 &self,
115 114 node: NodePrefixRef,
116 115 ) -> Result<Revision, RevlogError> {
117 116 if let Some(nodemap) = &self.nodemap {
118 117 return nodemap
119 118 .find_bin(&self.index, node)
120 119 // TODO: propagate details of this error:
121 120 .map_err(|_| RevlogError::Corrupted)?
122 121 .ok_or(RevlogError::InvalidRevision);
123 122 }
124 123
125 124 // Fallback to linear scan when a persistent nodemap is not present.
126 125 // This happens when the persistent-nodemap experimental feature is not
127 126 // enabled, or for small revlogs.
128 127 //
129 128 // TODO: consider building a non-persistent nodemap in memory to
130 129 // optimize these cases.
131 130 let mut found_by_prefix = None;
132 131 for rev in (0..self.len() as Revision).rev() {
133 132 let index_entry =
134 133 self.index.get_entry(rev).ok_or(RevlogError::Corrupted)?;
135 134 if node == *index_entry.hash() {
136 135 return Ok(rev);
137 136 }
138 137 if node.is_prefix_of(index_entry.hash()) {
139 138 if found_by_prefix.is_some() {
140 139 return Err(RevlogError::AmbiguousPrefix);
141 140 }
142 141 found_by_prefix = Some(rev)
143 142 }
144 143 }
145 144 found_by_prefix.ok_or(RevlogError::InvalidRevision)
146 145 }
147 146
148 147 /// Return the full data associated to a revision.
149 148 ///
150 149 /// All entries required to build the final data out of deltas will be
151 150 /// retrieved as needed, and the deltas will be applied to the inital
152 151 /// snapshot to rebuild the final data.
153 152 #[timed]
154 153 pub fn get_rev_data(&self, rev: Revision) -> Result<Vec<u8>, RevlogError> {
155 154 // Todo return -> Cow
156 155 let mut entry = self.get_entry(rev)?;
157 156 let mut delta_chain = vec![];
158 157 while let Some(base_rev) = entry.base_rev {
159 158 delta_chain.push(entry);
160 159 entry =
161 160 self.get_entry(base_rev).or(Err(RevlogError::Corrupted))?;
162 161 }
163 162
164 163 // TODO do not look twice in the index
165 164 let index_entry = self
166 165 .index
167 166 .get_entry(rev)
168 167 .ok_or(RevlogError::InvalidRevision)?;
169 168
170 169 let data: Vec<u8> = if delta_chain.is_empty() {
171 170 entry.data()?.into()
172 171 } else {
173 172 Revlog::build_data_from_deltas(entry, &delta_chain)?
174 173 };
175 174
176 175 if self.check_hash(
177 176 index_entry.p1(),
178 177 index_entry.p2(),
179 178 index_entry.hash().as_bytes(),
180 179 &data,
181 180 ) {
182 181 Ok(data)
183 182 } else {
184 183 Err(RevlogError::Corrupted)
185 184 }
186 185 }
187 186
188 187 /// Check the hash of some given data against the recorded hash.
189 188 pub fn check_hash(
190 189 &self,
191 190 p1: Revision,
192 191 p2: Revision,
193 192 expected: &[u8],
194 193 data: &[u8],
195 194 ) -> bool {
196 195 let e1 = self.index.get_entry(p1);
197 196 let h1 = match e1 {
198 197 Some(ref entry) => entry.hash(),
199 198 None => &NULL_NODE,
200 199 };
201 200 let e2 = self.index.get_entry(p2);
202 201 let h2 = match e2 {
203 202 Some(ref entry) => entry.hash(),
204 203 None => &NULL_NODE,
205 204 };
206 205
207 206 hash(data, h1.as_bytes(), h2.as_bytes()).as_slice() == expected
208 207 }
209 208
210 209 /// Build the full data of a revision out its snapshot
211 210 /// and its deltas.
212 211 #[timed]
213 212 fn build_data_from_deltas(
214 213 snapshot: RevlogEntry,
215 214 deltas: &[RevlogEntry],
216 215 ) -> Result<Vec<u8>, RevlogError> {
217 216 let snapshot = snapshot.data()?;
218 217 let deltas = deltas
219 218 .iter()
220 219 .rev()
221 220 .map(RevlogEntry::data)
222 221 .collect::<Result<Vec<Cow<'_, [u8]>>, RevlogError>>()?;
223 222 let patches: Vec<_> =
224 223 deltas.iter().map(|d| patch::PatchList::new(d)).collect();
225 224 let patch = patch::fold_patch_lists(&patches);
226 225 Ok(patch.apply(&snapshot))
227 226 }
228 227
229 228 /// Return the revlog data.
230 229 fn data(&self) -> &[u8] {
231 230 match self.data_bytes {
232 231 Some(ref data_bytes) => &data_bytes,
233 232 None => panic!(
234 233 "forgot to load the data or trying to access inline data"
235 234 ),
236 235 }
237 236 }
238 237
239 238 /// Get an entry of the revlog.
240 239 fn get_entry(&self, rev: Revision) -> Result<RevlogEntry, RevlogError> {
241 240 let index_entry = self
242 241 .index
243 242 .get_entry(rev)
244 243 .ok_or(RevlogError::InvalidRevision)?;
245 244 let start = index_entry.offset();
246 245 let end = start + index_entry.compressed_len();
247 246 let data = if self.index.is_inline() {
248 247 self.index.data(start, end)
249 248 } else {
250 249 &self.data()[start..end]
251 250 };
252 251 let entry = RevlogEntry {
253 252 rev,
254 253 bytes: data,
255 254 compressed_len: index_entry.compressed_len(),
256 255 uncompressed_len: index_entry.uncompressed_len(),
257 256 base_rev: if index_entry.base_revision() == rev {
258 257 None
259 258 } else {
260 259 Some(index_entry.base_revision())
261 260 },
262 261 };
263 262 Ok(entry)
264 263 }
265 264 }
266 265
267 266 /// The revlog entry's bytes and the necessary informations to extract
268 267 /// the entry's data.
269 268 #[derive(Debug)]
270 269 pub struct RevlogEntry<'a> {
271 270 rev: Revision,
272 271 bytes: &'a [u8],
273 272 compressed_len: usize,
274 273 uncompressed_len: usize,
275 274 base_rev: Option<Revision>,
276 275 }
277 276
278 277 impl<'a> RevlogEntry<'a> {
279 278 /// Extract the data contained in the entry.
280 279 pub fn data(&self) -> Result<Cow<'_, [u8]>, RevlogError> {
281 280 if self.bytes.is_empty() {
282 281 return Ok(Cow::Borrowed(&[]));
283 282 }
284 283 match self.bytes[0] {
285 284 // Revision data is the entirety of the entry, including this
286 285 // header.
287 286 b'\0' => Ok(Cow::Borrowed(self.bytes)),
288 287 // Raw revision data follows.
289 288 b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
290 289 // zlib (RFC 1950) data.
291 290 b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
292 291 // zstd data.
293 292 b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
294 293 format_type => Err(RevlogError::UnknowDataFormat(format_type)),
295 294 }
296 295 }
297 296
298 297 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, RevlogError> {
299 298 let mut decoder = ZlibDecoder::new(self.bytes);
300 299 if self.is_delta() {
301 300 let mut buf = Vec::with_capacity(self.compressed_len);
302 301 decoder
303 302 .read_to_end(&mut buf)
304 303 .or(Err(RevlogError::Corrupted))?;
305 304 Ok(buf)
306 305 } else {
307 306 let mut buf = vec![0; self.uncompressed_len];
308 307 decoder
309 308 .read_exact(&mut buf)
310 309 .or(Err(RevlogError::Corrupted))?;
311 310 Ok(buf)
312 311 }
313 312 }
314 313
315 314 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, RevlogError> {
316 315 if self.is_delta() {
317 316 let mut buf = Vec::with_capacity(self.compressed_len);
318 317 zstd::stream::copy_decode(self.bytes, &mut buf)
319 318 .or(Err(RevlogError::Corrupted))?;
320 319 Ok(buf)
321 320 } else {
322 321 let mut buf = vec![0; self.uncompressed_len];
323 322 let len = zstd::block::decompress_to_buffer(self.bytes, &mut buf)
324 323 .or(Err(RevlogError::Corrupted))?;
325 324 if len != self.uncompressed_len {
326 325 Err(RevlogError::Corrupted)
327 326 } else {
328 327 Ok(buf)
329 328 }
330 329 }
331 330 }
332 331
333 332 /// Tell if the entry is a snapshot or a delta
334 333 /// (influences on decompression).
335 334 fn is_delta(&self) -> bool {
336 335 self.base_rev.is_some()
337 336 }
338 337 }
339 338
340 339 /// Format version of the revlog.
341 340 pub fn get_version(index_bytes: &[u8]) -> u16 {
342 341 BigEndian::read_u16(&index_bytes[2..=3])
343 342 }
344 343
345 344 /// Calculate the hash of a revision given its data and its parents.
346 345 fn hash(data: &[u8], p1_hash: &[u8], p2_hash: &[u8]) -> Vec<u8> {
347 346 let mut hasher = Sha1::new();
348 347 let (a, b) = (p1_hash, p2_hash);
349 348 if a > b {
350 349 hasher.input(b);
351 350 hasher.input(a);
352 351 } else {
353 352 hasher.input(a);
354 353 hasher.input(b);
355 354 }
356 355 hasher.input(data);
357 356 let mut hash = vec![0; NODE_BYTES_LENGTH];
358 357 hasher.result(&mut hash);
359 358 hash
360 359 }
361 360
362 361 #[cfg(test)]
363 362 mod tests {
364 363 use super::*;
365 364
366 365 use super::super::index::IndexEntryBuilder;
367 366
368 367 #[test]
369 368 fn version_test() {
370 369 let bytes = IndexEntryBuilder::new()
371 370 .is_first(true)
372 371 .with_version(1)
373 372 .build();
374 373
375 374 assert_eq!(get_version(&bytes), 1)
376 375 }
377 376 }
@@ -1,106 +1,105 b''
1 1 use crate::commands::Command;
2 2 use crate::error::{CommandError, CommandErrorKind};
3 3 use crate::ui::utf8_to_local;
4 4 use crate::ui::Ui;
5 use hg::operations::find_root;
6 5 use hg::operations::{cat, CatRevError, CatRevErrorKind};
7 use hg::requirements;
6 use hg::repo::Repo;
8 7 use hg::utils::hg_path::HgPathBuf;
9 8 use micro_timer::timed;
10 9 use std::convert::TryFrom;
11 10
12 11 pub const HELP_TEXT: &str = "
13 12 Output the current or given revision of files
14 13 ";
15 14
16 15 pub struct CatCommand<'a> {
17 16 rev: Option<&'a str>,
18 17 files: Vec<&'a str>,
19 18 }
20 19
21 20 impl<'a> CatCommand<'a> {
22 21 pub fn new(rev: Option<&'a str>, files: Vec<&'a str>) -> Self {
23 22 Self { rev, files }
24 23 }
25 24
26 25 fn display(&self, ui: &Ui, data: &[u8]) -> Result<(), CommandError> {
27 26 ui.write_stdout(data)?;
28 27 Ok(())
29 28 }
30 29 }
31 30
32 31 impl<'a> Command for CatCommand<'a> {
33 32 #[timed]
34 33 fn run(&self, ui: &Ui) -> Result<(), CommandError> {
35 let root = find_root()?;
36 requirements::check(&root)?;
34 let repo = Repo::find()?;
35 repo.check_requirements()?;
37 36 let cwd = std::env::current_dir()
38 37 .or_else(|e| Err(CommandErrorKind::CurrentDirNotFound(e)))?;
39 38
40 39 let mut files = vec![];
41 40 for file in self.files.iter() {
42 41 let normalized = cwd.join(&file);
43 42 let stripped = normalized
44 .strip_prefix(&root)
43 .strip_prefix(&repo.working_directory_path())
45 44 .or(Err(CommandErrorKind::Abort(None)))?;
46 45 let hg_file = HgPathBuf::try_from(stripped.to_path_buf())
47 46 .or(Err(CommandErrorKind::Abort(None)))?;
48 47 files.push(hg_file);
49 48 }
50 49
51 50 match self.rev {
52 51 Some(rev) => {
53 let data = cat(&root, rev, &files)
52 let data = cat(&repo, rev, &files)
54 53 .map_err(|e| map_rev_error(rev, e))?;
55 54 self.display(ui, &data)
56 55 }
57 56 None => Err(CommandErrorKind::Unimplemented.into()),
58 57 }
59 58 }
60 59 }
61 60
62 61 /// Convert `CatRevErrorKind` to `CommandError`
63 62 fn map_rev_error(rev: &str, err: CatRevError) -> CommandError {
64 63 CommandError {
65 64 kind: match err.kind {
66 65 CatRevErrorKind::IoError(err) => CommandErrorKind::Abort(Some(
67 66 utf8_to_local(&format!("abort: {}\n", err)).into(),
68 67 )),
69 68 CatRevErrorKind::InvalidRevision => CommandErrorKind::Abort(Some(
70 69 utf8_to_local(&format!(
71 70 "abort: invalid revision identifier {}\n",
72 71 rev
73 72 ))
74 73 .into(),
75 74 )),
76 75 CatRevErrorKind::AmbiguousPrefix => CommandErrorKind::Abort(Some(
77 76 utf8_to_local(&format!(
78 77 "abort: ambiguous revision identifier {}\n",
79 78 rev
80 79 ))
81 80 .into(),
82 81 )),
83 82 CatRevErrorKind::UnsuportedRevlogVersion(version) => {
84 83 CommandErrorKind::Abort(Some(
85 84 utf8_to_local(&format!(
86 85 "abort: unsupported revlog version {}\n",
87 86 version
88 87 ))
89 88 .into(),
90 89 ))
91 90 }
92 91 CatRevErrorKind::CorruptedRevlog => CommandErrorKind::Abort(Some(
93 92 "abort: corrupted revlog\n".into(),
94 93 )),
95 94 CatRevErrorKind::UnknowRevlogDataFormat(format) => {
96 95 CommandErrorKind::Abort(Some(
97 96 utf8_to_local(&format!(
98 97 "abort: unknow revlog dataformat {:?}\n",
99 98 format
100 99 ))
101 100 .into(),
102 101 ))
103 102 }
104 103 },
105 104 }
106 105 }
@@ -1,91 +1,91 b''
1 1 use crate::commands::Command;
2 2 use crate::error::{CommandError, CommandErrorKind};
3 3 use crate::ui::utf8_to_local;
4 4 use crate::ui::Ui;
5 use hg::operations::find_root;
6 5 use hg::operations::{
7 6 debug_data, DebugDataError, DebugDataErrorKind, DebugDataKind,
8 7 };
8 use hg::repo::Repo;
9 9 use micro_timer::timed;
10 10
11 11 pub const HELP_TEXT: &str = "
12 12 Dump the contents of a data file revision
13 13 ";
14 14
15 15 pub struct DebugDataCommand<'a> {
16 16 rev: &'a str,
17 17 kind: DebugDataKind,
18 18 }
19 19
20 20 impl<'a> DebugDataCommand<'a> {
21 21 pub fn new(rev: &'a str, kind: DebugDataKind) -> Self {
22 22 DebugDataCommand { rev, kind }
23 23 }
24 24 }
25 25
26 26 impl<'a> Command for DebugDataCommand<'a> {
27 27 #[timed]
28 28 fn run(&self, ui: &Ui) -> Result<(), CommandError> {
29 let root = find_root()?;
30 let data = debug_data(&root, self.rev, self.kind)
29 let repo = Repo::find()?;
30 let data = debug_data(&repo, self.rev, self.kind)
31 31 .map_err(|e| to_command_error(self.rev, e))?;
32 32
33 33 let mut stdout = ui.stdout_buffer();
34 34 stdout.write_all(&data)?;
35 35 stdout.flush()?;
36 36
37 37 Ok(())
38 38 }
39 39 }
40 40
41 41 /// Convert operation errors to command errors
42 42 fn to_command_error(rev: &str, err: DebugDataError) -> CommandError {
43 43 match err.kind {
44 44 DebugDataErrorKind::IoError(err) => CommandError {
45 45 kind: CommandErrorKind::Abort(Some(
46 46 utf8_to_local(&format!("abort: {}\n", err)).into(),
47 47 )),
48 48 },
49 49 DebugDataErrorKind::InvalidRevision => CommandError {
50 50 kind: CommandErrorKind::Abort(Some(
51 51 utf8_to_local(&format!(
52 52 "abort: invalid revision identifier{}\n",
53 53 rev
54 54 ))
55 55 .into(),
56 56 )),
57 57 },
58 58 DebugDataErrorKind::AmbiguousPrefix => CommandError {
59 59 kind: CommandErrorKind::Abort(Some(
60 60 utf8_to_local(&format!(
61 61 "abort: ambiguous revision identifier{}\n",
62 62 rev
63 63 ))
64 64 .into(),
65 65 )),
66 66 },
67 67 DebugDataErrorKind::UnsuportedRevlogVersion(version) => CommandError {
68 68 kind: CommandErrorKind::Abort(Some(
69 69 utf8_to_local(&format!(
70 70 "abort: unsupported revlog version {}\n",
71 71 version
72 72 ))
73 73 .into(),
74 74 )),
75 75 },
76 76 DebugDataErrorKind::CorruptedRevlog => CommandError {
77 77 kind: CommandErrorKind::Abort(Some(
78 78 "abort: corrupted revlog\n".into(),
79 79 )),
80 80 },
81 81 DebugDataErrorKind::UnknowRevlogDataFormat(format) => CommandError {
82 82 kind: CommandErrorKind::Abort(Some(
83 83 utf8_to_local(&format!(
84 84 "abort: unknow revlog dataformat {:?}\n",
85 85 format
86 86 ))
87 87 .into(),
88 88 )),
89 89 },
90 90 }
91 91 }
@@ -1,30 +1,30 b''
1 1 use crate::commands::Command;
2 2 use crate::error::CommandError;
3 3 use crate::ui::Ui;
4 use hg::operations::find_root;
4 use hg::repo::Repo;
5 5 use hg::requirements;
6 6
7 7 pub const HELP_TEXT: &str = "
8 8 Print the current repo requirements.
9 9 ";
10 10
11 11 pub struct DebugRequirementsCommand {}
12 12
13 13 impl DebugRequirementsCommand {
14 14 pub fn new() -> Self {
15 15 DebugRequirementsCommand {}
16 16 }
17 17 }
18 18
19 19 impl Command for DebugRequirementsCommand {
20 20 fn run(&self, ui: &Ui) -> Result<(), CommandError> {
21 let root = find_root()?;
21 let repo = Repo::find()?;
22 22 let mut output = String::new();
23 for req in requirements::load(&root)? {
23 for req in requirements::load(&repo)? {
24 24 output.push_str(&req);
25 25 output.push('\n');
26 26 }
27 27 ui.write_stdout(output.as_bytes())?;
28 28 Ok(())
29 29 }
30 30 }
@@ -1,144 +1,142 b''
1 1 use crate::commands::Command;
2 2 use crate::error::{CommandError, CommandErrorKind};
3 3 use crate::ui::utf8_to_local;
4 4 use crate::ui::Ui;
5 use hg::operations::find_root;
6 5 use hg::operations::{
7 6 list_rev_tracked_files, ListRevTrackedFilesError,
8 7 ListRevTrackedFilesErrorKind,
9 8 };
10 9 use hg::operations::{
11 10 Dirstate, ListDirstateTrackedFilesError, ListDirstateTrackedFilesErrorKind,
12 11 };
13 use hg::requirements;
12 use hg::repo::Repo;
14 13 use hg::utils::files::{get_bytes_from_path, relativize_path};
15 14 use hg::utils::hg_path::{HgPath, HgPathBuf};
16 use std::path::Path;
17 15
18 16 pub const HELP_TEXT: &str = "
19 17 List tracked files.
20 18
21 19 Returns 0 on success.
22 20 ";
23 21
24 22 pub struct FilesCommand<'a> {
25 23 rev: Option<&'a str>,
26 24 }
27 25
28 26 impl<'a> FilesCommand<'a> {
29 27 pub fn new(rev: Option<&'a str>) -> Self {
30 28 FilesCommand { rev }
31 29 }
32 30
33 31 fn display_files(
34 32 &self,
35 33 ui: &Ui,
36 root: &Path,
34 repo: &Repo,
37 35 files: impl IntoIterator<Item = &'a HgPath>,
38 36 ) -> Result<(), CommandError> {
39 37 let cwd = std::env::current_dir()
40 38 .or_else(|e| Err(CommandErrorKind::CurrentDirNotFound(e)))?;
41 39 let rooted_cwd = cwd
42 .strip_prefix(root)
40 .strip_prefix(repo.working_directory_path())
43 41 .expect("cwd was already checked within the repository");
44 42 let rooted_cwd = HgPathBuf::from(get_bytes_from_path(rooted_cwd));
45 43
46 44 let mut stdout = ui.stdout_buffer();
47 45
48 46 for file in files {
49 47 stdout.write_all(relativize_path(file, &rooted_cwd).as_ref())?;
50 48 stdout.write_all(b"\n")?;
51 49 }
52 50 stdout.flush()?;
53 51 Ok(())
54 52 }
55 53 }
56 54
57 55 impl<'a> Command for FilesCommand<'a> {
58 56 fn run(&self, ui: &Ui) -> Result<(), CommandError> {
59 let root = find_root()?;
60 requirements::check(&root)?;
57 let repo = Repo::find()?;
58 repo.check_requirements()?;
61 59 if let Some(rev) = self.rev {
62 let files = list_rev_tracked_files(&root, rev)
60 let files = list_rev_tracked_files(&repo, rev)
63 61 .map_err(|e| map_rev_error(rev, e))?;
64 self.display_files(ui, &root, files.iter())
62 self.display_files(ui, &repo, files.iter())
65 63 } else {
66 let distate = Dirstate::new(&root).map_err(map_dirstate_error)?;
64 let distate = Dirstate::new(&repo).map_err(map_dirstate_error)?;
67 65 let files = distate.tracked_files().map_err(map_dirstate_error)?;
68 self.display_files(ui, &root, files)
66 self.display_files(ui, &repo, files)
69 67 }
70 68 }
71 69 }
72 70
73 71 /// Convert `ListRevTrackedFilesErrorKind` to `CommandError`
74 72 fn map_rev_error(rev: &str, err: ListRevTrackedFilesError) -> CommandError {
75 73 CommandError {
76 74 kind: match err.kind {
77 75 ListRevTrackedFilesErrorKind::IoError(err) => {
78 76 CommandErrorKind::Abort(Some(
79 77 utf8_to_local(&format!("abort: {}\n", err)).into(),
80 78 ))
81 79 }
82 80 ListRevTrackedFilesErrorKind::InvalidRevision => {
83 81 CommandErrorKind::Abort(Some(
84 82 utf8_to_local(&format!(
85 83 "abort: invalid revision identifier {}\n",
86 84 rev
87 85 ))
88 86 .into(),
89 87 ))
90 88 }
91 89 ListRevTrackedFilesErrorKind::AmbiguousPrefix => {
92 90 CommandErrorKind::Abort(Some(
93 91 utf8_to_local(&format!(
94 92 "abort: ambiguous revision identifier {}\n",
95 93 rev
96 94 ))
97 95 .into(),
98 96 ))
99 97 }
100 98 ListRevTrackedFilesErrorKind::UnsuportedRevlogVersion(version) => {
101 99 CommandErrorKind::Abort(Some(
102 100 utf8_to_local(&format!(
103 101 "abort: unsupported revlog version {}\n",
104 102 version
105 103 ))
106 104 .into(),
107 105 ))
108 106 }
109 107 ListRevTrackedFilesErrorKind::CorruptedRevlog => {
110 108 CommandErrorKind::Abort(Some(
111 109 "abort: corrupted revlog\n".into(),
112 110 ))
113 111 }
114 112 ListRevTrackedFilesErrorKind::UnknowRevlogDataFormat(format) => {
115 113 CommandErrorKind::Abort(Some(
116 114 utf8_to_local(&format!(
117 115 "abort: unknow revlog dataformat {:?}\n",
118 116 format
119 117 ))
120 118 .into(),
121 119 ))
122 120 }
123 121 },
124 122 }
125 123 }
126 124
127 125 /// Convert `ListDirstateTrackedFilesError` to `CommandError`
128 126 fn map_dirstate_error(err: ListDirstateTrackedFilesError) -> CommandError {
129 127 CommandError {
130 128 kind: match err.kind {
131 129 ListDirstateTrackedFilesErrorKind::IoError(err) => {
132 130 CommandErrorKind::Abort(Some(
133 131 utf8_to_local(&format!("abort: {}\n", err)).into(),
134 132 ))
135 133 }
136 134 ListDirstateTrackedFilesErrorKind::ParseError(_) => {
137 135 CommandErrorKind::Abort(Some(
138 136 // TODO find a better error message
139 137 b"abort: parse error\n".to_vec(),
140 138 ))
141 139 }
142 140 },
143 141 }
144 142 }
@@ -1,32 +1,29 b''
1 1 use crate::commands::Command;
2 2 use crate::error::CommandError;
3 3 use crate::ui::Ui;
4 4 use format_bytes::format_bytes;
5 use hg::operations::find_root;
5 use hg::repo::Repo;
6 6 use hg::utils::files::get_bytes_from_path;
7 7
8 8 pub const HELP_TEXT: &str = "
9 9 Print the root directory of the current repository.
10 10
11 11 Returns 0 on success.
12 12 ";
13 13
14 14 pub struct RootCommand {}
15 15
16 16 impl RootCommand {
17 17 pub fn new() -> Self {
18 18 RootCommand {}
19 19 }
20 20 }
21 21
22 22 impl Command for RootCommand {
23 23 fn run(&self, ui: &Ui) -> Result<(), CommandError> {
24 let path_buf = find_root()?;
25
26 let bytes = get_bytes_from_path(path_buf);
27
24 let repo = Repo::find()?;
25 let bytes = get_bytes_from_path(repo.working_directory_path());
28 26 ui.write_stdout(&format_bytes!(b"{}\n", bytes.as_slice()))?;
29
30 27 Ok(())
31 28 }
32 29 }
General Comments 0
You need to be logged in to leave comments. Login now