##// END OF EJS Templates
rhg: `cat` command: print error messages for missing files...
Simon Sapin -
r47478:b1f2c2b3 default
parent child Browse files
Show More
@@ -1,75 +1,105 b''
1 1 // list_tracked_files.rs
2 2 //
3 3 // Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 use std::path::PathBuf;
9 9
10 10 use crate::repo::Repo;
11 11 use crate::revlog::changelog::Changelog;
12 12 use crate::revlog::manifest::Manifest;
13 13 use crate::revlog::path_encode::path_encode;
14 14 use crate::revlog::revlog::Revlog;
15 15 use crate::revlog::revlog::RevlogError;
16 16 use crate::revlog::Node;
17 17 use crate::utils::files::get_path_from_bytes;
18 18 use crate::utils::hg_path::{HgPath, HgPathBuf};
19 19
20 pub struct CatOutput {
21 /// Whether any file in the manifest matched the paths given as CLI
22 /// arguments
23 pub found_any: bool,
24 /// The contents of matching files, in manifest order
25 pub concatenated: Vec<u8>,
26 /// Which of the CLI arguments did not match any manifest file
27 pub missing: Vec<HgPathBuf>,
28 /// The node ID that the given revset was resolved to
29 pub node: Node,
30 }
31
20 32 const METADATA_DELIMITER: [u8; 2] = [b'\x01', b'\n'];
21 33
22 /// List files under Mercurial control at a given revision.
34 /// Output the given revision of files
23 35 ///
24 36 /// * `root`: Repository root
25 37 /// * `rev`: The revision to cat the files from.
26 38 /// * `files`: The files to output.
27 pub fn cat(
39 pub fn cat<'a>(
28 40 repo: &Repo,
29 41 revset: &str,
30 files: &[HgPathBuf],
31 ) -> Result<Vec<u8>, RevlogError> {
42 files: &'a [HgPathBuf],
43 ) -> Result<CatOutput, RevlogError> {
32 44 let rev = crate::revset::resolve_single(revset, repo)?;
33 45 let changelog = Changelog::open(repo)?;
34 46 let manifest = Manifest::open(repo)?;
35 47 let changelog_entry = changelog.get_rev(rev)?;
48 let node = *changelog
49 .node_from_rev(rev)
50 .expect("should succeed when changelog.get_rev did");
36 51 let manifest_node =
37 52 Node::from_hex_for_repo(&changelog_entry.manifest_node()?)?;
38 53 let manifest_entry = manifest.get_node(manifest_node.into())?;
39 54 let mut bytes = vec![];
55 let mut matched = vec![false; files.len()];
56 let mut found_any = false;
40 57
41 58 for (manifest_file, node_bytes) in manifest_entry.files_with_nodes() {
42 for cat_file in files.iter() {
59 for (cat_file, is_matched) in files.iter().zip(&mut matched) {
43 60 if cat_file.as_bytes() == manifest_file.as_bytes() {
61 *is_matched = true;
62 found_any = true;
44 63 let index_path = store_path(manifest_file, b".i");
45 64 let data_path = store_path(manifest_file, b".d");
46 65
47 66 let file_log =
48 67 Revlog::open(repo, &index_path, Some(&data_path))?;
49 68 let file_node = Node::from_hex_for_repo(node_bytes)?;
50 69 let file_rev = file_log.get_node_rev(file_node.into())?;
51 70 let data = file_log.get_rev_data(file_rev)?;
52 71 if data.starts_with(&METADATA_DELIMITER) {
53 72 let end_delimiter_position = data
54 73 [METADATA_DELIMITER.len()..]
55 74 .windows(METADATA_DELIMITER.len())
56 75 .position(|bytes| bytes == METADATA_DELIMITER);
57 76 if let Some(position) = end_delimiter_position {
58 77 let offset = METADATA_DELIMITER.len() * 2;
59 78 bytes.extend(data[position + offset..].iter());
60 79 }
61 80 } else {
62 81 bytes.extend(data);
63 82 }
64 83 }
65 84 }
66 85 }
67 86
68 Ok(bytes)
87 let missing: Vec<_> = files
88 .iter()
89 .zip(&matched)
90 .filter(|pair| !*pair.1)
91 .map(|pair| pair.0.clone())
92 .collect();
93 Ok(CatOutput {
94 found_any,
95 concatenated: bytes,
96 missing,
97 node,
98 })
69 99 }
70 100
71 101 fn store_path(hg_path: &HgPath, suffix: &[u8]) -> PathBuf {
72 102 let encoded_bytes =
73 103 path_encode(&[b"data/", hg_path.as_bytes(), suffix].concat());
74 104 get_path_from_bytes(&encoded_bytes).into()
75 105 }
@@ -1,12 +1,12 b''
1 1 //! A distinction is made between operations and commands.
2 2 //! An operation is what can be done whereas a command is what is exposed by
3 3 //! the cli. A single command can use several operations to achieve its goal.
4 4
5 5 mod cat;
6 6 mod debugdata;
7 7 mod dirstate_status;
8 8 mod list_tracked_files;
9 pub use cat::cat;
9 pub use cat::{cat, CatOutput};
10 10 pub use debugdata::{debug_data, DebugDataKind};
11 11 pub use list_tracked_files::Dirstate;
12 12 pub use list_tracked_files::{list_rev_tracked_files, FilesForRev};
@@ -1,61 +1,65 b''
1 1 use crate::errors::HgError;
2 2 use crate::repo::Repo;
3 3 use crate::revlog::revlog::{Revlog, RevlogError};
4 use crate::revlog::NodePrefix;
5 4 use crate::revlog::Revision;
5 use crate::revlog::{Node, NodePrefix};
6 6
7 7 /// A specialized `Revlog` to work with `changelog` data format.
8 8 pub struct Changelog {
9 9 /// The generic `revlog` format.
10 10 pub(crate) revlog: Revlog,
11 11 }
12 12
13 13 impl Changelog {
14 14 /// Open the `changelog` of a repository given by its root.
15 15 pub fn open(repo: &Repo) -> Result<Self, RevlogError> {
16 16 let revlog = Revlog::open(repo, "00changelog.i", None)?;
17 17 Ok(Self { revlog })
18 18 }
19 19
20 20 /// Return the `ChangelogEntry` a given node id.
21 21 pub fn get_node(
22 22 &self,
23 23 node: NodePrefix,
24 24 ) -> Result<ChangelogEntry, RevlogError> {
25 25 let rev = self.revlog.get_node_rev(node)?;
26 26 self.get_rev(rev)
27 27 }
28 28
29 29 /// Return the `ChangelogEntry` of a given node revision.
30 30 pub fn get_rev(
31 31 &self,
32 32 rev: Revision,
33 33 ) -> Result<ChangelogEntry, RevlogError> {
34 34 let bytes = self.revlog.get_rev_data(rev)?;
35 35 Ok(ChangelogEntry { bytes })
36 36 }
37
38 pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
39 Some(self.revlog.index.get_entry(rev)?.hash())
40 }
37 41 }
38 42
39 43 /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
40 44 #[derive(Debug)]
41 45 pub struct ChangelogEntry {
42 46 /// The data bytes of the `changelog` entry.
43 47 bytes: Vec<u8>,
44 48 }
45 49
46 50 impl ChangelogEntry {
47 51 /// Return an iterator over the lines of the entry.
48 52 pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
49 53 self.bytes
50 54 .split(|b| b == &b'\n')
51 55 .filter(|line| !line.is_empty())
52 56 }
53 57
54 58 /// Return the node id of the `manifest` referenced by this `changelog`
55 59 /// entry.
56 60 pub fn manifest_node(&self) -> Result<&[u8], RevlogError> {
57 61 self.lines()
58 62 .next()
59 63 .ok_or_else(|| HgError::corrupted("empty changelog entry").into())
60 64 }
61 65 }
@@ -1,405 +1,415 b''
1 1 // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
2 2 //
3 3 // This software may be used and distributed according to the terms of the
4 4 // GNU General Public License version 2 or any later version.
5 5
6 6 //! Definitions and utilities for Revision nodes
7 7 //!
8 8 //! In Mercurial code base, it is customary to call "a node" the binary SHA
9 9 //! of a revision.
10 10
11 11 use crate::errors::HgError;
12 12 use bytes_cast::BytesCast;
13 13 use std::convert::{TryFrom, TryInto};
14 14 use std::fmt;
15 15
16 16 /// The length in bytes of a `Node`
17 17 ///
18 18 /// This constant is meant to ease refactors of this module, and
19 19 /// are private so that calling code does not expect all nodes have
20 20 /// the same size, should we support several formats concurrently in
21 21 /// the future.
22 22 pub const NODE_BYTES_LENGTH: usize = 20;
23 23
24 24 /// Id of the null node.
25 25 ///
26 26 /// Used to indicate the absence of node.
27 27 pub const NULL_NODE_ID: [u8; NODE_BYTES_LENGTH] = [0u8; NODE_BYTES_LENGTH];
28 28
29 29 /// The length in bytes of a `Node`
30 30 ///
31 31 /// see also `NODES_BYTES_LENGTH` about it being private.
32 32 const NODE_NYBBLES_LENGTH: usize = 2 * NODE_BYTES_LENGTH;
33 33
34 /// Default for UI presentation
35 const SHORT_PREFIX_DEFAULT_NYBBLES_LENGTH: u8 = 12;
36
34 37 /// Private alias for readability and to ease future change
35 38 type NodeData = [u8; NODE_BYTES_LENGTH];
36 39
37 40 /// Binary revision SHA
38 41 ///
39 42 /// ## Future changes of hash size
40 43 ///
41 44 /// To accomodate future changes of hash size, Rust callers
42 45 /// should use the conversion methods at the boundaries (FFI, actual
43 46 /// computation of hashes and I/O) only, and only if required.
44 47 ///
45 48 /// All other callers outside of unit tests should just handle `Node` values
46 49 /// and never make any assumption on the actual length, using [`nybbles_len`]
47 50 /// if they need a loop boundary.
48 51 ///
49 52 /// All methods that create a `Node` either take a type that enforces
50 53 /// the size or return an error at runtime.
51 54 ///
52 55 /// [`nybbles_len`]: #method.nybbles_len
53 56 #[derive(Copy, Clone, Debug, PartialEq, BytesCast, derive_more::From)]
54 57 #[repr(transparent)]
55 58 pub struct Node {
56 59 data: NodeData,
57 60 }
58 61
59 62 /// The node value for NULL_REVISION
60 63 pub const NULL_NODE: Node = Node {
61 64 data: [0; NODE_BYTES_LENGTH],
62 65 };
63 66
64 67 /// Return an error if the slice has an unexpected length
65 68 impl<'a> TryFrom<&'a [u8]> for &'a Node {
66 69 type Error = ();
67 70
68 71 #[inline]
69 72 fn try_from(bytes: &'a [u8]) -> Result<Self, Self::Error> {
70 73 match Node::from_bytes(bytes) {
71 74 Ok((node, rest)) if rest.is_empty() => Ok(node),
72 75 _ => Err(()),
73 76 }
74 77 }
75 78 }
76 79
77 80 /// Return an error if the slice has an unexpected length
78 81 impl TryFrom<&'_ [u8]> for Node {
79 82 type Error = std::array::TryFromSliceError;
80 83
81 84 #[inline]
82 85 fn try_from(bytes: &'_ [u8]) -> Result<Self, Self::Error> {
83 86 let data = bytes.try_into()?;
84 87 Ok(Self { data })
85 88 }
86 89 }
87 90
88 91 impl From<&'_ NodeData> for Node {
89 92 #[inline]
90 93 fn from(data: &'_ NodeData) -> Self {
91 94 Self { data: *data }
92 95 }
93 96 }
94 97
95 98 impl fmt::LowerHex for Node {
96 99 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
97 100 for &byte in &self.data {
98 101 write!(f, "{:02x}", byte)?
99 102 }
100 103 Ok(())
101 104 }
102 105 }
103 106
104 107 #[derive(Debug)]
105 108 pub struct FromHexError;
106 109
107 110 /// Low level utility function, also for prefixes
108 111 fn get_nybble(s: &[u8], i: usize) -> u8 {
109 112 if i % 2 == 0 {
110 113 s[i / 2] >> 4
111 114 } else {
112 115 s[i / 2] & 0x0f
113 116 }
114 117 }
115 118
116 119 impl Node {
117 120 /// Retrieve the `i`th half-byte of the binary data.
118 121 ///
119 122 /// This is also the `i`th hexadecimal digit in numeric form,
120 123 /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
121 124 pub fn get_nybble(&self, i: usize) -> u8 {
122 125 get_nybble(&self.data, i)
123 126 }
124 127
125 128 /// Length of the data, in nybbles
126 129 pub fn nybbles_len(&self) -> usize {
127 130 // public exposure as an instance method only, so that we can
128 131 // easily support several sizes of hashes if needed in the future.
129 132 NODE_NYBBLES_LENGTH
130 133 }
131 134
132 135 /// Convert from hexadecimal string representation
133 136 ///
134 137 /// Exact length is required.
135 138 ///
136 139 /// To be used in FFI and I/O only, in order to facilitate future
137 140 /// changes of hash format.
138 141 pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Node, FromHexError> {
139 142 let prefix = NodePrefix::from_hex(hex)?;
140 143 if prefix.nybbles_len() == NODE_NYBBLES_LENGTH {
141 144 Ok(Self { data: prefix.data })
142 145 } else {
143 146 Err(FromHexError)
144 147 }
145 148 }
146 149
147 150 /// `from_hex`, but for input from an internal file of the repository such
148 151 /// as a changelog or manifest entry.
149 152 ///
150 153 /// An error is treated as repository corruption.
151 154 pub fn from_hex_for_repo(hex: impl AsRef<[u8]>) -> Result<Node, HgError> {
152 155 Self::from_hex(hex.as_ref()).map_err(|FromHexError| {
153 156 HgError::CorruptedRepository(format!(
154 157 "Expected a full hexadecimal node ID, found {}",
155 158 String::from_utf8_lossy(hex.as_ref())
156 159 ))
157 160 })
158 161 }
159 162
160 163 /// Provide access to binary data
161 164 ///
162 165 /// This is needed by FFI layers, for instance to return expected
163 166 /// binary values to Python.
164 167 pub fn as_bytes(&self) -> &[u8] {
165 168 &self.data
166 169 }
170
171 pub fn short(&self) -> NodePrefix {
172 NodePrefix {
173 nybbles_len: SHORT_PREFIX_DEFAULT_NYBBLES_LENGTH,
174 data: self.data,
175 }
176 }
167 177 }
168 178
169 179 /// The beginning of a binary revision SHA.
170 180 ///
171 181 /// Since it can potentially come from an hexadecimal representation with
172 182 /// odd length, it needs to carry around whether the last 4 bits are relevant
173 183 /// or not.
174 184 #[derive(Debug, PartialEq, Copy, Clone)]
175 185 pub struct NodePrefix {
176 186 /// In `1..=NODE_NYBBLES_LENGTH`
177 187 nybbles_len: u8,
178 188 /// The first `4 * length_in_nybbles` bits are used (considering bits
179 189 /// within a bytes in big-endian: most significant first), the rest
180 190 /// are zero.
181 191 data: NodeData,
182 192 }
183 193
184 194 impl NodePrefix {
185 195 /// Convert from hexadecimal string representation
186 196 ///
187 197 /// Similarly to `hex::decode`, can be used with Unicode string types
188 198 /// (`String`, `&str`) as well as bytes.
189 199 ///
190 200 /// To be used in FFI and I/O only, in order to facilitate future
191 201 /// changes of hash format.
192 202 pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Self, FromHexError> {
193 203 let hex = hex.as_ref();
194 204 let len = hex.len();
195 205 if len > NODE_NYBBLES_LENGTH || len == 0 {
196 206 return Err(FromHexError);
197 207 }
198 208
199 209 let mut data = [0; NODE_BYTES_LENGTH];
200 210 let mut nybbles_len = 0;
201 211 for &ascii_byte in hex {
202 212 let nybble = match char::from(ascii_byte).to_digit(16) {
203 213 Some(digit) => digit as u8,
204 214 None => return Err(FromHexError),
205 215 };
206 216 // Fill in the upper half of a byte first, then the lower half.
207 217 let shift = if nybbles_len % 2 == 0 { 4 } else { 0 };
208 218 data[nybbles_len as usize / 2] |= nybble << shift;
209 219 nybbles_len += 1;
210 220 }
211 221 Ok(Self { data, nybbles_len })
212 222 }
213 223
214 224 pub fn nybbles_len(&self) -> usize {
215 225 self.nybbles_len as _
216 226 }
217 227
218 228 pub fn is_prefix_of(&self, node: &Node) -> bool {
219 229 let full_bytes = self.nybbles_len() / 2;
220 230 if self.data[..full_bytes] != node.data[..full_bytes] {
221 231 return false;
222 232 }
223 233 if self.nybbles_len() % 2 == 0 {
224 234 return true;
225 235 }
226 236 let last = self.nybbles_len() - 1;
227 237 self.get_nybble(last) == node.get_nybble(last)
228 238 }
229 239
230 240 /// Retrieve the `i`th half-byte from the prefix.
231 241 ///
232 242 /// This is also the `i`th hexadecimal digit in numeric form,
233 243 /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
234 244 pub fn get_nybble(&self, i: usize) -> u8 {
235 245 assert!(i < self.nybbles_len());
236 246 get_nybble(&self.data, i)
237 247 }
238 248
239 249 fn iter_nybbles(&self) -> impl Iterator<Item = u8> + '_ {
240 250 (0..self.nybbles_len()).map(move |i| get_nybble(&self.data, i))
241 251 }
242 252
243 253 /// Return the index first nybble that's different from `node`
244 254 ///
245 255 /// If the return value is `None` that means that `self` is
246 256 /// a prefix of `node`, but the current method is a bit slower
247 257 /// than `is_prefix_of`.
248 258 ///
249 259 /// Returned index is as in `get_nybble`, i.e., starting at 0.
250 260 pub fn first_different_nybble(&self, node: &Node) -> Option<usize> {
251 261 self.iter_nybbles()
252 262 .zip(NodePrefix::from(*node).iter_nybbles())
253 263 .position(|(a, b)| a != b)
254 264 }
255 265 }
256 266
257 267 impl fmt::LowerHex for NodePrefix {
258 268 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
259 269 let full_bytes = self.nybbles_len() / 2;
260 270 for &byte in &self.data[..full_bytes] {
261 271 write!(f, "{:02x}", byte)?
262 272 }
263 273 if self.nybbles_len() % 2 == 1 {
264 274 let last = self.nybbles_len() - 1;
265 275 write!(f, "{:x}", self.get_nybble(last))?
266 276 }
267 277 Ok(())
268 278 }
269 279 }
270 280
271 281 /// A shortcut for full `Node` references
272 282 impl From<&'_ Node> for NodePrefix {
273 283 fn from(node: &'_ Node) -> Self {
274 284 NodePrefix {
275 285 nybbles_len: node.nybbles_len() as _,
276 286 data: node.data,
277 287 }
278 288 }
279 289 }
280 290
281 291 /// A shortcut for full `Node` references
282 292 impl From<Node> for NodePrefix {
283 293 fn from(node: Node) -> Self {
284 294 NodePrefix {
285 295 nybbles_len: node.nybbles_len() as _,
286 296 data: node.data,
287 297 }
288 298 }
289 299 }
290 300
291 301 impl PartialEq<Node> for NodePrefix {
292 302 fn eq(&self, other: &Node) -> bool {
293 303 Self::from(*other) == *self
294 304 }
295 305 }
296 306
297 307 #[cfg(test)]
298 308 mod tests {
299 309 use super::*;
300 310
301 311 const SAMPLE_NODE_HEX: &str = "0123456789abcdeffedcba9876543210deadbeef";
302 312 const SAMPLE_NODE: Node = Node {
303 313 data: [
304 314 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba,
305 315 0x98, 0x76, 0x54, 0x32, 0x10, 0xde, 0xad, 0xbe, 0xef,
306 316 ],
307 317 };
308 318
309 319 /// Pad an hexadecimal string to reach `NODE_NYBBLES_LENGTH`
310 320 /// The padding is made with zeros.
311 321 pub fn hex_pad_right(hex: &str) -> String {
312 322 let mut res = hex.to_string();
313 323 while res.len() < NODE_NYBBLES_LENGTH {
314 324 res.push('0');
315 325 }
316 326 res
317 327 }
318 328
319 329 #[test]
320 330 fn test_node_from_hex() {
321 331 let not_hex = "012... oops";
322 332 let too_short = "0123";
323 333 let too_long = format!("{}0", SAMPLE_NODE_HEX);
324 334 assert_eq!(Node::from_hex(SAMPLE_NODE_HEX).unwrap(), SAMPLE_NODE);
325 335 assert!(Node::from_hex(not_hex).is_err());
326 336 assert!(Node::from_hex(too_short).is_err());
327 337 assert!(Node::from_hex(&too_long).is_err());
328 338 }
329 339
330 340 #[test]
331 341 fn test_node_encode_hex() {
332 342 assert_eq!(format!("{:x}", SAMPLE_NODE), SAMPLE_NODE_HEX);
333 343 }
334 344
335 345 #[test]
336 346 fn test_prefix_from_to_hex() -> Result<(), FromHexError> {
337 347 assert_eq!(format!("{:x}", NodePrefix::from_hex("0e1")?), "0e1");
338 348 assert_eq!(format!("{:x}", NodePrefix::from_hex("0e1a")?), "0e1a");
339 349 assert_eq!(
340 350 format!("{:x}", NodePrefix::from_hex(SAMPLE_NODE_HEX)?),
341 351 SAMPLE_NODE_HEX
342 352 );
343 353 Ok(())
344 354 }
345 355
346 356 #[test]
347 357 fn test_prefix_from_hex_errors() {
348 358 assert!(NodePrefix::from_hex("testgr").is_err());
349 359 let mut long = format!("{:x}", NULL_NODE);
350 360 long.push('c');
351 361 assert!(NodePrefix::from_hex(&long).is_err())
352 362 }
353 363
354 364 #[test]
355 365 fn test_is_prefix_of() -> Result<(), FromHexError> {
356 366 let mut node_data = [0; NODE_BYTES_LENGTH];
357 367 node_data[0] = 0x12;
358 368 node_data[1] = 0xca;
359 369 let node = Node::from(node_data);
360 370 assert!(NodePrefix::from_hex("12")?.is_prefix_of(&node));
361 371 assert!(!NodePrefix::from_hex("1a")?.is_prefix_of(&node));
362 372 assert!(NodePrefix::from_hex("12c")?.is_prefix_of(&node));
363 373 assert!(!NodePrefix::from_hex("12d")?.is_prefix_of(&node));
364 374 Ok(())
365 375 }
366 376
367 377 #[test]
368 378 fn test_get_nybble() -> Result<(), FromHexError> {
369 379 let prefix = NodePrefix::from_hex("dead6789cafe")?;
370 380 assert_eq!(prefix.get_nybble(0), 13);
371 381 assert_eq!(prefix.get_nybble(7), 9);
372 382 Ok(())
373 383 }
374 384
375 385 #[test]
376 386 fn test_first_different_nybble_even_prefix() {
377 387 let prefix = NodePrefix::from_hex("12ca").unwrap();
378 388 let mut node = Node::from([0; NODE_BYTES_LENGTH]);
379 389 assert_eq!(prefix.first_different_nybble(&node), Some(0));
380 390 node.data[0] = 0x13;
381 391 assert_eq!(prefix.first_different_nybble(&node), Some(1));
382 392 node.data[0] = 0x12;
383 393 assert_eq!(prefix.first_different_nybble(&node), Some(2));
384 394 node.data[1] = 0xca;
385 395 // now it is a prefix
386 396 assert_eq!(prefix.first_different_nybble(&node), None);
387 397 }
388 398
389 399 #[test]
390 400 fn test_first_different_nybble_odd_prefix() {
391 401 let prefix = NodePrefix::from_hex("12c").unwrap();
392 402 let mut node = Node::from([0; NODE_BYTES_LENGTH]);
393 403 assert_eq!(prefix.first_different_nybble(&node), Some(0));
394 404 node.data[0] = 0x13;
395 405 assert_eq!(prefix.first_different_nybble(&node), Some(1));
396 406 node.data[0] = 0x12;
397 407 assert_eq!(prefix.first_different_nybble(&node), Some(2));
398 408 node.data[1] = 0xca;
399 409 // now it is a prefix
400 410 assert_eq!(prefix.first_different_nybble(&node), None);
401 411 }
402 412 }
403 413
404 414 #[cfg(test)]
405 415 pub use tests::hex_pad_right;
@@ -1,393 +1,393 b''
1 1 use std::borrow::Cow;
2 2 use std::io::Read;
3 3 use std::ops::Deref;
4 4 use std::path::Path;
5 5
6 6 use byteorder::{BigEndian, ByteOrder};
7 7 use crypto::digest::Digest;
8 8 use crypto::sha1::Sha1;
9 9 use flate2::read::ZlibDecoder;
10 10 use micro_timer::timed;
11 11 use zstd;
12 12
13 13 use super::index::Index;
14 14 use super::node::{NodePrefix, NODE_BYTES_LENGTH, NULL_NODE};
15 15 use super::nodemap;
16 16 use super::nodemap::{NodeMap, NodeMapError};
17 17 use super::nodemap_docket::NodeMapDocket;
18 18 use super::patch;
19 19 use crate::errors::HgError;
20 20 use crate::repo::Repo;
21 21 use crate::revlog::Revision;
22 22
23 23 #[derive(derive_more::From)]
24 24 pub enum RevlogError {
25 25 InvalidRevision,
26 26 /// Found more than one entry whose ID match the requested prefix
27 27 AmbiguousPrefix,
28 28 #[from]
29 29 Other(HgError),
30 30 }
31 31
32 32 impl From<NodeMapError> for RevlogError {
33 33 fn from(error: NodeMapError) -> Self {
34 34 match error {
35 35 NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
36 36 NodeMapError::RevisionNotInIndex(_) => RevlogError::corrupted(),
37 37 }
38 38 }
39 39 }
40 40
41 41 impl RevlogError {
42 42 fn corrupted() -> Self {
43 43 RevlogError::Other(HgError::corrupted("corrupted revlog"))
44 44 }
45 45 }
46 46
47 47 /// Read only implementation of revlog.
48 48 pub struct Revlog {
49 49 /// When index and data are not interleaved: bytes of the revlog index.
50 50 /// When index and data are interleaved: bytes of the revlog index and
51 51 /// data.
52 index: Index,
52 pub(crate) index: Index,
53 53 /// When index and data are not interleaved: bytes of the revlog data
54 54 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
55 55 /// When present on disk: the persistent nodemap for this revlog
56 56 nodemap: Option<nodemap::NodeTree>,
57 57 }
58 58
59 59 impl Revlog {
60 60 /// Open a revlog index file.
61 61 ///
62 62 /// It will also open the associated data file if index and data are not
63 63 /// interleaved.
64 64 #[timed]
65 65 pub fn open(
66 66 repo: &Repo,
67 67 index_path: impl AsRef<Path>,
68 68 data_path: Option<&Path>,
69 69 ) -> Result<Self, RevlogError> {
70 70 let index_path = index_path.as_ref();
71 71 let index_mmap = repo.store_vfs().mmap_open(&index_path)?;
72 72
73 73 let version = get_version(&index_mmap);
74 74 if version != 1 {
75 75 // A proper new version should have had a repo/store requirement.
76 76 return Err(RevlogError::corrupted());
77 77 }
78 78
79 79 let index = Index::new(Box::new(index_mmap))?;
80 80
81 81 let default_data_path = index_path.with_extension("d");
82 82
83 83 // type annotation required
84 84 // won't recognize Mmap as Deref<Target = [u8]>
85 85 let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
86 86 if index.is_inline() {
87 87 None
88 88 } else {
89 89 let data_path = data_path.unwrap_or(&default_data_path);
90 90 let data_mmap = repo.store_vfs().mmap_open(data_path)?;
91 91 Some(Box::new(data_mmap))
92 92 };
93 93
94 94 let nodemap = NodeMapDocket::read_from_file(repo, index_path)?.map(
95 95 |(docket, data)| {
96 96 nodemap::NodeTree::load_bytes(
97 97 Box::new(data),
98 98 docket.data_length,
99 99 )
100 100 },
101 101 );
102 102
103 103 Ok(Revlog {
104 104 index,
105 105 data_bytes,
106 106 nodemap,
107 107 })
108 108 }
109 109
110 110 /// Return number of entries of the `Revlog`.
111 111 pub fn len(&self) -> usize {
112 112 self.index.len()
113 113 }
114 114
115 115 /// Returns `true` if the `Revlog` has zero `entries`.
116 116 pub fn is_empty(&self) -> bool {
117 117 self.index.is_empty()
118 118 }
119 119
120 120 /// Return the full data associated to a node.
121 121 #[timed]
122 122 pub fn get_node_rev(
123 123 &self,
124 124 node: NodePrefix,
125 125 ) -> Result<Revision, RevlogError> {
126 126 if let Some(nodemap) = &self.nodemap {
127 127 return nodemap
128 128 .find_bin(&self.index, node)?
129 129 .ok_or(RevlogError::InvalidRevision);
130 130 }
131 131
132 132 // Fallback to linear scan when a persistent nodemap is not present.
133 133 // This happens when the persistent-nodemap experimental feature is not
134 134 // enabled, or for small revlogs.
135 135 //
136 136 // TODO: consider building a non-persistent nodemap in memory to
137 137 // optimize these cases.
138 138 let mut found_by_prefix = None;
139 139 for rev in (0..self.len() as Revision).rev() {
140 140 let index_entry =
141 141 self.index.get_entry(rev).ok_or(HgError::corrupted(
142 142 "revlog references a revision not in the index",
143 143 ))?;
144 144 if node == *index_entry.hash() {
145 145 return Ok(rev);
146 146 }
147 147 if node.is_prefix_of(index_entry.hash()) {
148 148 if found_by_prefix.is_some() {
149 149 return Err(RevlogError::AmbiguousPrefix);
150 150 }
151 151 found_by_prefix = Some(rev)
152 152 }
153 153 }
154 154 found_by_prefix.ok_or(RevlogError::InvalidRevision)
155 155 }
156 156
157 157 /// Returns whether the given revision exists in this revlog.
158 158 pub fn has_rev(&self, rev: Revision) -> bool {
159 159 self.index.get_entry(rev).is_some()
160 160 }
161 161
162 162 /// Return the full data associated to a revision.
163 163 ///
164 164 /// All entries required to build the final data out of deltas will be
165 165 /// retrieved as needed, and the deltas will be applied to the inital
166 166 /// snapshot to rebuild the final data.
167 167 #[timed]
168 168 pub fn get_rev_data(&self, rev: Revision) -> Result<Vec<u8>, RevlogError> {
169 169 // Todo return -> Cow
170 170 let mut entry = self.get_entry(rev)?;
171 171 let mut delta_chain = vec![];
172 172 while let Some(base_rev) = entry.base_rev {
173 173 delta_chain.push(entry);
174 174 entry = self
175 175 .get_entry(base_rev)
176 176 .map_err(|_| RevlogError::corrupted())?;
177 177 }
178 178
179 179 // TODO do not look twice in the index
180 180 let index_entry = self
181 181 .index
182 182 .get_entry(rev)
183 183 .ok_or(RevlogError::InvalidRevision)?;
184 184
185 185 let data: Vec<u8> = if delta_chain.is_empty() {
186 186 entry.data()?.into()
187 187 } else {
188 188 Revlog::build_data_from_deltas(entry, &delta_chain)?
189 189 };
190 190
191 191 if self.check_hash(
192 192 index_entry.p1(),
193 193 index_entry.p2(),
194 194 index_entry.hash().as_bytes(),
195 195 &data,
196 196 ) {
197 197 Ok(data)
198 198 } else {
199 199 Err(RevlogError::corrupted())
200 200 }
201 201 }
202 202
203 203 /// Check the hash of some given data against the recorded hash.
204 204 pub fn check_hash(
205 205 &self,
206 206 p1: Revision,
207 207 p2: Revision,
208 208 expected: &[u8],
209 209 data: &[u8],
210 210 ) -> bool {
211 211 let e1 = self.index.get_entry(p1);
212 212 let h1 = match e1 {
213 213 Some(ref entry) => entry.hash(),
214 214 None => &NULL_NODE,
215 215 };
216 216 let e2 = self.index.get_entry(p2);
217 217 let h2 = match e2 {
218 218 Some(ref entry) => entry.hash(),
219 219 None => &NULL_NODE,
220 220 };
221 221
222 222 hash(data, h1.as_bytes(), h2.as_bytes()).as_slice() == expected
223 223 }
224 224
225 225 /// Build the full data of a revision out its snapshot
226 226 /// and its deltas.
227 227 #[timed]
228 228 fn build_data_from_deltas(
229 229 snapshot: RevlogEntry,
230 230 deltas: &[RevlogEntry],
231 231 ) -> Result<Vec<u8>, RevlogError> {
232 232 let snapshot = snapshot.data()?;
233 233 let deltas = deltas
234 234 .iter()
235 235 .rev()
236 236 .map(RevlogEntry::data)
237 237 .collect::<Result<Vec<Cow<'_, [u8]>>, RevlogError>>()?;
238 238 let patches: Vec<_> =
239 239 deltas.iter().map(|d| patch::PatchList::new(d)).collect();
240 240 let patch = patch::fold_patch_lists(&patches);
241 241 Ok(patch.apply(&snapshot))
242 242 }
243 243
244 244 /// Return the revlog data.
245 245 fn data(&self) -> &[u8] {
246 246 match self.data_bytes {
247 247 Some(ref data_bytes) => &data_bytes,
248 248 None => panic!(
249 249 "forgot to load the data or trying to access inline data"
250 250 ),
251 251 }
252 252 }
253 253
254 254 /// Get an entry of the revlog.
255 255 fn get_entry(&self, rev: Revision) -> Result<RevlogEntry, RevlogError> {
256 256 let index_entry = self
257 257 .index
258 258 .get_entry(rev)
259 259 .ok_or(RevlogError::InvalidRevision)?;
260 260 let start = index_entry.offset();
261 261 let end = start + index_entry.compressed_len();
262 262 let data = if self.index.is_inline() {
263 263 self.index.data(start, end)
264 264 } else {
265 265 &self.data()[start..end]
266 266 };
267 267 let entry = RevlogEntry {
268 268 rev,
269 269 bytes: data,
270 270 compressed_len: index_entry.compressed_len(),
271 271 uncompressed_len: index_entry.uncompressed_len(),
272 272 base_rev: if index_entry.base_revision() == rev {
273 273 None
274 274 } else {
275 275 Some(index_entry.base_revision())
276 276 },
277 277 };
278 278 Ok(entry)
279 279 }
280 280 }
281 281
282 282 /// The revlog entry's bytes and the necessary informations to extract
283 283 /// the entry's data.
284 284 #[derive(Debug)]
285 285 pub struct RevlogEntry<'a> {
286 286 rev: Revision,
287 287 bytes: &'a [u8],
288 288 compressed_len: usize,
289 289 uncompressed_len: usize,
290 290 base_rev: Option<Revision>,
291 291 }
292 292
293 293 impl<'a> RevlogEntry<'a> {
294 294 /// Extract the data contained in the entry.
295 295 pub fn data(&self) -> Result<Cow<'_, [u8]>, RevlogError> {
296 296 if self.bytes.is_empty() {
297 297 return Ok(Cow::Borrowed(&[]));
298 298 }
299 299 match self.bytes[0] {
300 300 // Revision data is the entirety of the entry, including this
301 301 // header.
302 302 b'\0' => Ok(Cow::Borrowed(self.bytes)),
303 303 // Raw revision data follows.
304 304 b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
305 305 // zlib (RFC 1950) data.
306 306 b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
307 307 // zstd data.
308 308 b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
309 309 // A proper new format should have had a repo/store requirement.
310 310 _format_type => Err(RevlogError::corrupted()),
311 311 }
312 312 }
313 313
314 314 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, RevlogError> {
315 315 let mut decoder = ZlibDecoder::new(self.bytes);
316 316 if self.is_delta() {
317 317 let mut buf = Vec::with_capacity(self.compressed_len);
318 318 decoder
319 319 .read_to_end(&mut buf)
320 320 .map_err(|_| RevlogError::corrupted())?;
321 321 Ok(buf)
322 322 } else {
323 323 let mut buf = vec![0; self.uncompressed_len];
324 324 decoder
325 325 .read_exact(&mut buf)
326 326 .map_err(|_| RevlogError::corrupted())?;
327 327 Ok(buf)
328 328 }
329 329 }
330 330
331 331 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, RevlogError> {
332 332 if self.is_delta() {
333 333 let mut buf = Vec::with_capacity(self.compressed_len);
334 334 zstd::stream::copy_decode(self.bytes, &mut buf)
335 335 .map_err(|_| RevlogError::corrupted())?;
336 336 Ok(buf)
337 337 } else {
338 338 let mut buf = vec![0; self.uncompressed_len];
339 339 let len = zstd::block::decompress_to_buffer(self.bytes, &mut buf)
340 340 .map_err(|_| RevlogError::corrupted())?;
341 341 if len != self.uncompressed_len {
342 342 Err(RevlogError::corrupted())
343 343 } else {
344 344 Ok(buf)
345 345 }
346 346 }
347 347 }
348 348
349 349 /// Tell if the entry is a snapshot or a delta
350 350 /// (influences on decompression).
351 351 fn is_delta(&self) -> bool {
352 352 self.base_rev.is_some()
353 353 }
354 354 }
355 355
356 356 /// Format version of the revlog.
357 357 pub fn get_version(index_bytes: &[u8]) -> u16 {
358 358 BigEndian::read_u16(&index_bytes[2..=3])
359 359 }
360 360
361 361 /// Calculate the hash of a revision given its data and its parents.
362 362 fn hash(data: &[u8], p1_hash: &[u8], p2_hash: &[u8]) -> Vec<u8> {
363 363 let mut hasher = Sha1::new();
364 364 let (a, b) = (p1_hash, p2_hash);
365 365 if a > b {
366 366 hasher.input(b);
367 367 hasher.input(a);
368 368 } else {
369 369 hasher.input(a);
370 370 hasher.input(b);
371 371 }
372 372 hasher.input(data);
373 373 let mut hash = vec![0; NODE_BYTES_LENGTH];
374 374 hasher.result(&mut hash);
375 375 hash
376 376 }
377 377
378 378 #[cfg(test)]
379 379 mod tests {
380 380 use super::*;
381 381
382 382 use super::super::index::IndexEntryBuilder;
383 383
384 384 #[test]
385 385 fn version_test() {
386 386 let bytes = IndexEntryBuilder::new()
387 387 .is_first(true)
388 388 .with_version(1)
389 389 .build();
390 390
391 391 assert_eq!(get_version(&bytes), 1)
392 392 }
393 393 }
@@ -1,69 +1,84 b''
1 1 use crate::error::CommandError;
2 2 use clap::Arg;
3 use format_bytes::format_bytes;
3 4 use hg::operations::cat;
4 5 use hg::utils::hg_path::HgPathBuf;
5 6 use micro_timer::timed;
6 7 use std::convert::TryFrom;
7 8
8 9 pub const HELP_TEXT: &str = "
9 10 Output the current or given revision of files
10 11 ";
11 12
12 13 pub fn args() -> clap::App<'static, 'static> {
13 14 clap::SubCommand::with_name("cat")
14 15 .arg(
15 16 Arg::with_name("rev")
16 17 .help("search the repository as it is in REV")
17 18 .short("-r")
18 19 .long("--revision")
19 20 .value_name("REV")
20 21 .takes_value(true),
21 22 )
22 23 .arg(
23 24 clap::Arg::with_name("files")
24 25 .required(true)
25 26 .multiple(true)
26 27 .empty_values(false)
27 28 .value_name("FILE")
28 29 .help("Activity to start: activity@category"),
29 30 )
30 31 .about(HELP_TEXT)
31 32 }
32 33
33 34 #[timed]
34 35 pub fn run(invocation: &crate::CliInvocation) -> Result<(), CommandError> {
35 36 let rev = invocation.subcommand_args.value_of("rev");
36 37 let file_args = match invocation.subcommand_args.values_of("files") {
37 38 Some(files) => files.collect(),
38 39 None => vec![],
39 40 };
40 41
41 42 let repo = invocation.repo?;
42 43 let cwd = hg::utils::current_dir()?;
43 44 let working_directory = repo.working_directory_path();
44 45 let working_directory = cwd.join(working_directory); // Make it absolute
45 46
46 47 let mut files = vec![];
47 48 for file in file_args.iter() {
48 49 // TODO: actually normalize `..` path segments etc?
49 50 let normalized = cwd.join(&file);
50 51 let stripped = normalized
51 52 .strip_prefix(&working_directory)
52 53 // TODO: error message for path arguments outside of the repo
53 54 .map_err(|_| CommandError::abort(""))?;
54 55 let hg_file = HgPathBuf::try_from(stripped.to_path_buf())
55 56 .map_err(|e| CommandError::abort(e.to_string()))?;
56 57 files.push(hg_file);
57 58 }
58 59
59 60 match rev {
60 61 Some(rev) => {
61 let data = cat(&repo, rev, &files).map_err(|e| (e, rev))?;
62 invocation.ui.write_stdout(&data)?;
63 Ok(())
62 let output = cat(&repo, rev, &files).map_err(|e| (e, rev))?;
63 invocation.ui.write_stdout(&output.concatenated)?;
64 if !output.missing.is_empty() {
65 let short = format!("{:x}", output.node.short()).into_bytes();
66 for path in &output.missing {
67 invocation.ui.write_stderr(&format_bytes!(
68 b"{}: no such file in rev {}\n",
69 path.as_bytes(),
70 short
71 ))?;
72 }
73 }
74 if output.found_any {
75 Ok(())
76 } else {
77 Err(CommandError::Unsuccessful)
78 }
64 79 }
65 80 None => Err(CommandError::unsupported(
66 81 "`rhg cat` without `--rev` / `-r`",
67 82 )),
68 83 }
69 84 }
@@ -1,143 +1,146 b''
1 1 use crate::ui::utf8_to_local;
2 2 use crate::ui::UiError;
3 3 use crate::NoRepoInCwdError;
4 4 use format_bytes::format_bytes;
5 5 use hg::config::{ConfigError, ConfigParseError};
6 6 use hg::errors::HgError;
7 7 use hg::repo::RepoError;
8 8 use hg::revlog::revlog::RevlogError;
9 9 use hg::utils::files::get_bytes_from_path;
10 10 use std::convert::From;
11 11
12 12 /// The kind of command error
13 13 #[derive(Debug)]
14 14 pub enum CommandError {
15 15 /// Exit with an error message and "standard" failure exit code.
16 16 Abort { message: Vec<u8> },
17 17
18 /// Exit with a failure exit code but no message.
19 Unsuccessful,
20
18 21 /// Encountered something (such as a CLI argument, repository layout, …)
19 22 /// not supported by this version of `rhg`. Depending on configuration
20 23 /// `rhg` may attempt to silently fall back to Python-based `hg`, which
21 24 /// may or may not support this feature.
22 25 UnsupportedFeature { message: Vec<u8> },
23 26 }
24 27
25 28 impl CommandError {
26 29 pub fn abort(message: impl AsRef<str>) -> Self {
27 30 CommandError::Abort {
28 31 // TODO: bytes-based (instead of Unicode-based) formatting
29 32 // of error messages to handle non-UTF-8 filenames etc:
30 33 // https://www.mercurial-scm.org/wiki/EncodingStrategy#Mixing_output
31 34 message: utf8_to_local(message.as_ref()).into(),
32 35 }
33 36 }
34 37
35 38 pub fn unsupported(message: impl AsRef<str>) -> Self {
36 39 CommandError::UnsupportedFeature {
37 40 message: utf8_to_local(message.as_ref()).into(),
38 41 }
39 42 }
40 43 }
41 44
42 45 /// For now we don’t differenciate between invalid CLI args and valid for `hg`
43 46 /// but not supported yet by `rhg`.
44 47 impl From<clap::Error> for CommandError {
45 48 fn from(error: clap::Error) -> Self {
46 49 CommandError::unsupported(error.to_string())
47 50 }
48 51 }
49 52
50 53 impl From<HgError> for CommandError {
51 54 fn from(error: HgError) -> Self {
52 55 match error {
53 56 HgError::UnsupportedFeature(message) => {
54 57 CommandError::unsupported(message)
55 58 }
56 59 _ => CommandError::abort(error.to_string()),
57 60 }
58 61 }
59 62 }
60 63
61 64 impl From<UiError> for CommandError {
62 65 fn from(_error: UiError) -> Self {
63 66 // If we already failed writing to stdout or stderr,
64 67 // writing an error message to stderr about it would be likely to fail
65 68 // too.
66 69 CommandError::abort("")
67 70 }
68 71 }
69 72
70 73 impl From<RepoError> for CommandError {
71 74 fn from(error: RepoError) -> Self {
72 75 match error {
73 76 RepoError::NotFound { at } => CommandError::Abort {
74 77 message: format_bytes!(
75 78 b"abort: repository {} not found",
76 79 get_bytes_from_path(at)
77 80 ),
78 81 },
79 82 RepoError::ConfigParseError(error) => error.into(),
80 83 RepoError::Other(error) => error.into(),
81 84 }
82 85 }
83 86 }
84 87
85 88 impl<'a> From<&'a NoRepoInCwdError> for CommandError {
86 89 fn from(error: &'a NoRepoInCwdError) -> Self {
87 90 let NoRepoInCwdError { cwd } = error;
88 91 CommandError::Abort {
89 92 message: format_bytes!(
90 93 b"abort: no repository found in '{}' (.hg not found)!",
91 94 get_bytes_from_path(cwd)
92 95 ),
93 96 }
94 97 }
95 98 }
96 99
97 100 impl From<ConfigError> for CommandError {
98 101 fn from(error: ConfigError) -> Self {
99 102 match error {
100 103 ConfigError::Parse(error) => error.into(),
101 104 ConfigError::Other(error) => error.into(),
102 105 }
103 106 }
104 107 }
105 108
106 109 impl From<ConfigParseError> for CommandError {
107 110 fn from(error: ConfigParseError) -> Self {
108 111 let ConfigParseError {
109 112 origin,
110 113 line,
111 114 message,
112 115 } = error;
113 116 let line_message = if let Some(line_number) = line {
114 117 format_bytes!(b":{}", line_number.to_string().into_bytes())
115 118 } else {
116 119 Vec::new()
117 120 };
118 121 CommandError::Abort {
119 122 message: format_bytes!(
120 123 b"config error at {}{}: {}",
121 124 origin,
122 125 line_message,
123 126 message
124 127 ),
125 128 }
126 129 }
127 130 }
128 131
129 132 impl From<(RevlogError, &str)> for CommandError {
130 133 fn from((err, rev): (RevlogError, &str)) -> CommandError {
131 134 match err {
132 135 RevlogError::InvalidRevision => CommandError::abort(format!(
133 136 "abort: invalid revision identifier: {}",
134 137 rev
135 138 )),
136 139 RevlogError::AmbiguousPrefix => CommandError::abort(format!(
137 140 "abort: ambiguous revision identifier: {}",
138 141 rev
139 142 )),
140 143 RevlogError::Other(error) => error.into(),
141 144 }
142 145 }
143 146 }
@@ -1,10 +1,13 b''
1 1 pub type ExitCode = i32;
2 2
3 3 /// Successful exit
4 4 pub const OK: ExitCode = 0;
5 5
6 6 /// Generic abort
7 7 pub const ABORT: ExitCode = 255;
8 8
9 /// Generic something completed but did not succeed
10 pub const UNSUCCESSFUL: ExitCode = 1;
11
9 12 /// Command or feature not implemented by rhg
10 13 pub const UNIMPLEMENTED: ExitCode = 252;
@@ -1,440 +1,442 b''
1 1 extern crate log;
2 2 use crate::ui::Ui;
3 3 use clap::App;
4 4 use clap::AppSettings;
5 5 use clap::Arg;
6 6 use clap::ArgMatches;
7 7 use format_bytes::{format_bytes, join};
8 8 use hg::config::Config;
9 9 use hg::repo::{Repo, RepoError};
10 10 use hg::utils::files::{get_bytes_from_os_str, get_path_from_bytes};
11 11 use hg::utils::SliceExt;
12 12 use std::ffi::OsString;
13 13 use std::path::PathBuf;
14 14 use std::process::Command;
15 15
16 16 mod blackbox;
17 17 mod error;
18 18 mod exitcode;
19 19 mod ui;
20 20 use error::CommandError;
21 21
22 22 fn main_with_result(
23 23 process_start_time: &blackbox::ProcessStartTime,
24 24 ui: &ui::Ui,
25 25 repo: Result<&Repo, &NoRepoInCwdError>,
26 26 config: &Config,
27 27 ) -> Result<(), CommandError> {
28 28 check_extensions(config)?;
29 29
30 30 let app = App::new("rhg")
31 31 .global_setting(AppSettings::AllowInvalidUtf8)
32 32 .setting(AppSettings::SubcommandRequired)
33 33 .setting(AppSettings::VersionlessSubcommands)
34 34 .arg(
35 35 Arg::with_name("repository")
36 36 .help("repository root directory")
37 37 .short("-R")
38 38 .long("--repository")
39 39 .value_name("REPO")
40 40 .takes_value(true)
41 41 // Both ok: `hg -R ./foo log` or `hg log -R ./foo`
42 42 .global(true),
43 43 )
44 44 .arg(
45 45 Arg::with_name("config")
46 46 .help("set/override config option (use 'section.name=value')")
47 47 .long("--config")
48 48 .value_name("CONFIG")
49 49 .takes_value(true)
50 50 .global(true)
51 51 // Ok: `--config section.key1=val --config section.key2=val2`
52 52 .multiple(true)
53 53 // Not ok: `--config section.key1=val section.key2=val2`
54 54 .number_of_values(1),
55 55 )
56 56 .arg(
57 57 Arg::with_name("cwd")
58 58 .help("change working directory")
59 59 .long("--cwd")
60 60 .value_name("DIR")
61 61 .takes_value(true)
62 62 .global(true),
63 63 )
64 64 .version("0.0.1");
65 65 let app = add_subcommand_args(app);
66 66
67 67 let matches = app.clone().get_matches_safe()?;
68 68
69 69 let (subcommand_name, subcommand_matches) = matches.subcommand();
70 70 let run = subcommand_run_fn(subcommand_name)
71 71 .expect("unknown subcommand name from clap despite AppSettings::SubcommandRequired");
72 72 let subcommand_args = subcommand_matches
73 73 .expect("no subcommand arguments from clap despite AppSettings::SubcommandRequired");
74 74
75 75 let invocation = CliInvocation {
76 76 ui,
77 77 subcommand_args,
78 78 config,
79 79 repo,
80 80 };
81 81 let blackbox = blackbox::Blackbox::new(&invocation, process_start_time)?;
82 82 blackbox.log_command_start();
83 83 let result = run(&invocation);
84 84 blackbox.log_command_end(exit_code(&result));
85 85 result
86 86 }
87 87
88 88 fn main() {
89 89 // Run this first, before we find out if the blackbox extension is even
90 90 // enabled, in order to include everything in-between in the duration
91 91 // measurements. Reading config files can be slow if they’re on NFS.
92 92 let process_start_time = blackbox::ProcessStartTime::now();
93 93
94 94 env_logger::init();
95 95 let ui = ui::Ui::new();
96 96
97 97 let early_args = EarlyArgs::parse(std::env::args_os());
98 98
99 99 let initial_current_dir = early_args.cwd.map(|cwd| {
100 100 let cwd = get_path_from_bytes(&cwd);
101 101 std::env::current_dir()
102 102 .and_then(|initial| {
103 103 std::env::set_current_dir(cwd)?;
104 104 Ok(initial)
105 105 })
106 106 .unwrap_or_else(|error| {
107 107 exit(
108 108 &None,
109 109 &ui,
110 110 OnUnsupported::Abort,
111 111 Err(CommandError::abort(format!(
112 112 "abort: {}: '{}'",
113 113 error,
114 114 cwd.display()
115 115 ))),
116 116 )
117 117 })
118 118 });
119 119
120 120 let non_repo_config =
121 121 Config::load(early_args.config).unwrap_or_else(|error| {
122 122 // Normally this is decided based on config, but we don’t have that
123 123 // available. As of this writing config loading never returns an
124 124 // "unsupported" error but that is not enforced by the type system.
125 125 let on_unsupported = OnUnsupported::Abort;
126 126
127 127 exit(&initial_current_dir, &ui, on_unsupported, Err(error.into()))
128 128 });
129 129
130 130 if let Some(repo_path_bytes) = &early_args.repo {
131 131 lazy_static::lazy_static! {
132 132 static ref SCHEME_RE: regex::bytes::Regex =
133 133 // Same as `_matchscheme` in `mercurial/util.py`
134 134 regex::bytes::Regex::new("^[a-zA-Z0-9+.\\-]+:").unwrap();
135 135 }
136 136 if SCHEME_RE.is_match(&repo_path_bytes) {
137 137 exit(
138 138 &initial_current_dir,
139 139 &ui,
140 140 OnUnsupported::from_config(&non_repo_config),
141 141 Err(CommandError::UnsupportedFeature {
142 142 message: format_bytes!(
143 143 b"URL-like --repository {}",
144 144 repo_path_bytes
145 145 ),
146 146 }),
147 147 )
148 148 }
149 149 }
150 150 let repo_path = early_args.repo.as_deref().map(get_path_from_bytes);
151 151 let repo_result = match Repo::find(&non_repo_config, repo_path) {
152 152 Ok(repo) => Ok(repo),
153 153 Err(RepoError::NotFound { at }) if repo_path.is_none() => {
154 154 // Not finding a repo is not fatal yet, if `-R` was not given
155 155 Err(NoRepoInCwdError { cwd: at })
156 156 }
157 157 Err(error) => exit(
158 158 &initial_current_dir,
159 159 &ui,
160 160 OnUnsupported::from_config(&non_repo_config),
161 161 Err(error.into()),
162 162 ),
163 163 };
164 164
165 165 let config = if let Ok(repo) = &repo_result {
166 166 repo.config()
167 167 } else {
168 168 &non_repo_config
169 169 };
170 170
171 171 let result = main_with_result(
172 172 &process_start_time,
173 173 &ui,
174 174 repo_result.as_ref(),
175 175 config,
176 176 );
177 177 exit(
178 178 &initial_current_dir,
179 179 &ui,
180 180 OnUnsupported::from_config(config),
181 181 result,
182 182 )
183 183 }
184 184
185 185 fn exit_code(result: &Result<(), CommandError>) -> i32 {
186 186 match result {
187 187 Ok(()) => exitcode::OK,
188 188 Err(CommandError::Abort { .. }) => exitcode::ABORT,
189 Err(CommandError::Unsuccessful) => exitcode::UNSUCCESSFUL,
189 190
190 191 // Exit with a specific code and no error message to let a potential
191 192 // wrapper script fallback to Python-based Mercurial.
192 193 Err(CommandError::UnsupportedFeature { .. }) => {
193 194 exitcode::UNIMPLEMENTED
194 195 }
195 196 }
196 197 }
197 198
198 199 fn exit(
199 200 initial_current_dir: &Option<PathBuf>,
200 201 ui: &Ui,
201 202 mut on_unsupported: OnUnsupported,
202 203 result: Result<(), CommandError>,
203 204 ) -> ! {
204 205 if let (
205 206 OnUnsupported::Fallback { executable },
206 207 Err(CommandError::UnsupportedFeature { .. }),
207 208 ) = (&on_unsupported, &result)
208 209 {
209 210 let mut args = std::env::args_os();
210 211 let executable_path = get_path_from_bytes(&executable);
211 212 let this_executable = args.next().expect("exepcted argv[0] to exist");
212 213 if executable_path == &PathBuf::from(this_executable) {
213 214 // Avoid spawning infinitely many processes until resource
214 215 // exhaustion.
215 216 let _ = ui.write_stderr(&format_bytes!(
216 217 b"Blocking recursive fallback. The 'rhg.fallback-executable = {}' config \
217 218 points to `rhg` itself.\n",
218 219 executable
219 220 ));
220 221 on_unsupported = OnUnsupported::Abort
221 222 } else {
222 223 // `args` is now `argv[1..]` since we’ve already consumed `argv[0]`
223 224 let mut command = Command::new(executable_path);
224 225 command.args(args);
225 226 if let Some(initial) = initial_current_dir {
226 227 command.current_dir(initial);
227 228 }
228 229 let result = command.status();
229 230 match result {
230 231 Ok(status) => std::process::exit(
231 232 status.code().unwrap_or(exitcode::ABORT),
232 233 ),
233 234 Err(error) => {
234 235 let _ = ui.write_stderr(&format_bytes!(
235 236 b"tried to fall back to a '{}' sub-process but got error {}\n",
236 237 executable, format_bytes::Utf8(error)
237 238 ));
238 239 on_unsupported = OnUnsupported::Abort
239 240 }
240 241 }
241 242 }
242 243 }
243 244 match &result {
244 245 Ok(_) => {}
246 Err(CommandError::Unsuccessful) => {}
245 247 Err(CommandError::Abort { message }) => {
246 248 if !message.is_empty() {
247 249 // Ignore errors when writing to stderr, we’re already exiting
248 250 // with failure code so there’s not much more we can do.
249 251 let _ = ui.write_stderr(&format_bytes!(b"{}\n", message));
250 252 }
251 253 }
252 254 Err(CommandError::UnsupportedFeature { message }) => {
253 255 match on_unsupported {
254 256 OnUnsupported::Abort => {
255 257 let _ = ui.write_stderr(&format_bytes!(
256 258 b"unsupported feature: {}\n",
257 259 message
258 260 ));
259 261 }
260 262 OnUnsupported::AbortSilent => {}
261 263 OnUnsupported::Fallback { .. } => unreachable!(),
262 264 }
263 265 }
264 266 }
265 267 std::process::exit(exit_code(&result))
266 268 }
267 269
268 270 macro_rules! subcommands {
269 271 ($( $command: ident )+) => {
270 272 mod commands {
271 273 $(
272 274 pub mod $command;
273 275 )+
274 276 }
275 277
276 278 fn add_subcommand_args<'a, 'b>(app: App<'a, 'b>) -> App<'a, 'b> {
277 279 app
278 280 $(
279 281 .subcommand(commands::$command::args())
280 282 )+
281 283 }
282 284
283 285 pub type RunFn = fn(&CliInvocation) -> Result<(), CommandError>;
284 286
285 287 fn subcommand_run_fn(name: &str) -> Option<RunFn> {
286 288 match name {
287 289 $(
288 290 stringify!($command) => Some(commands::$command::run),
289 291 )+
290 292 _ => None,
291 293 }
292 294 }
293 295 };
294 296 }
295 297
296 298 subcommands! {
297 299 cat
298 300 debugdata
299 301 debugrequirements
300 302 files
301 303 root
302 304 config
303 305 }
304 306 pub struct CliInvocation<'a> {
305 307 ui: &'a Ui,
306 308 subcommand_args: &'a ArgMatches<'a>,
307 309 config: &'a Config,
308 310 /// References inside `Result` is a bit peculiar but allow
309 311 /// `invocation.repo?` to work out with `&CliInvocation` since this
310 312 /// `Result` type is `Copy`.
311 313 repo: Result<&'a Repo, &'a NoRepoInCwdError>,
312 314 }
313 315
314 316 struct NoRepoInCwdError {
315 317 cwd: PathBuf,
316 318 }
317 319
318 320 /// CLI arguments to be parsed "early" in order to be able to read
319 321 /// configuration before using Clap. Ideally we would also use Clap for this,
320 322 /// see <https://github.com/clap-rs/clap/discussions/2366>.
321 323 ///
322 324 /// These arguments are still declared when we do use Clap later, so that Clap
323 325 /// does not return an error for their presence.
324 326 struct EarlyArgs {
325 327 /// Values of all `--config` arguments. (Possibly none)
326 328 config: Vec<Vec<u8>>,
327 329 /// Value of the `-R` or `--repository` argument, if any.
328 330 repo: Option<Vec<u8>>,
329 331 /// Value of the `--cwd` argument, if any.
330 332 cwd: Option<Vec<u8>>,
331 333 }
332 334
333 335 impl EarlyArgs {
334 336 fn parse(args: impl IntoIterator<Item = OsString>) -> Self {
335 337 let mut args = args.into_iter().map(get_bytes_from_os_str);
336 338 let mut config = Vec::new();
337 339 let mut repo = None;
338 340 let mut cwd = None;
339 341 // Use `while let` instead of `for` so that we can also call
340 342 // `args.next()` inside the loop.
341 343 while let Some(arg) = args.next() {
342 344 if arg == b"--config" {
343 345 if let Some(value) = args.next() {
344 346 config.push(value)
345 347 }
346 348 } else if let Some(value) = arg.drop_prefix(b"--config=") {
347 349 config.push(value.to_owned())
348 350 }
349 351
350 352 if arg == b"--cwd" {
351 353 if let Some(value) = args.next() {
352 354 cwd = Some(value)
353 355 }
354 356 } else if let Some(value) = arg.drop_prefix(b"--cwd=") {
355 357 cwd = Some(value.to_owned())
356 358 }
357 359
358 360 if arg == b"--repository" || arg == b"-R" {
359 361 if let Some(value) = args.next() {
360 362 repo = Some(value)
361 363 }
362 364 } else if let Some(value) = arg.drop_prefix(b"--repository=") {
363 365 repo = Some(value.to_owned())
364 366 } else if let Some(value) = arg.drop_prefix(b"-R") {
365 367 repo = Some(value.to_owned())
366 368 }
367 369 }
368 370 Self { config, repo, cwd }
369 371 }
370 372 }
371 373
372 374 /// What to do when encountering some unsupported feature.
373 375 ///
374 376 /// See `HgError::UnsupportedFeature` and `CommandError::UnsupportedFeature`.
375 377 enum OnUnsupported {
376 378 /// Print an error message describing what feature is not supported,
377 379 /// and exit with code 252.
378 380 Abort,
379 381 /// Silently exit with code 252.
380 382 AbortSilent,
381 383 /// Try running a Python implementation
382 384 Fallback { executable: Vec<u8> },
383 385 }
384 386
385 387 impl OnUnsupported {
386 388 const DEFAULT: Self = OnUnsupported::Abort;
387 389 const DEFAULT_FALLBACK_EXECUTABLE: &'static [u8] = b"hg";
388 390
389 391 fn from_config(config: &Config) -> Self {
390 392 match config
391 393 .get(b"rhg", b"on-unsupported")
392 394 .map(|value| value.to_ascii_lowercase())
393 395 .as_deref()
394 396 {
395 397 Some(b"abort") => OnUnsupported::Abort,
396 398 Some(b"abort-silent") => OnUnsupported::AbortSilent,
397 399 Some(b"fallback") => OnUnsupported::Fallback {
398 400 executable: config
399 401 .get(b"rhg", b"fallback-executable")
400 402 .unwrap_or(Self::DEFAULT_FALLBACK_EXECUTABLE)
401 403 .to_owned(),
402 404 },
403 405 None => Self::DEFAULT,
404 406 Some(_) => {
405 407 // TODO: warn about unknown config value
406 408 Self::DEFAULT
407 409 }
408 410 }
409 411 }
410 412 }
411 413
412 414 const SUPPORTED_EXTENSIONS: &[&[u8]] = &[b"blackbox", b"share"];
413 415
414 416 fn check_extensions(config: &Config) -> Result<(), CommandError> {
415 417 let enabled = config.get_section_keys(b"extensions");
416 418
417 419 let mut unsupported = enabled;
418 420 for supported in SUPPORTED_EXTENSIONS {
419 421 unsupported.remove(supported);
420 422 }
421 423
422 424 if let Some(ignored_list) =
423 425 config.get_simple_list(b"rhg", b"ignored-extensions")
424 426 {
425 427 for ignored in ignored_list {
426 428 unsupported.remove(ignored);
427 429 }
428 430 }
429 431
430 432 if unsupported.is_empty() {
431 433 Ok(())
432 434 } else {
433 435 Err(CommandError::UnsupportedFeature {
434 436 message: format_bytes!(
435 437 b"extensions: {} (consider adding them to 'rhg.ignored-extensions' config)",
436 438 join(unsupported, b", ")
437 439 ),
438 440 })
439 441 }
440 442 }
General Comments 0
You need to be logged in to leave comments. Login now