##// END OF EJS Templates
rhg: Expose FilelogEntry that wraps RevlogEntry...
Simon Sapin -
r49374:35c47015 default
parent child Browse files
Show More
@@ -1,89 +1,117
1 1 use crate::errors::HgError;
2 2 use crate::repo::Repo;
3 3 use crate::revlog::path_encode::path_encode;
4 use crate::revlog::revlog::RevlogEntry;
4 5 use crate::revlog::revlog::{Revlog, RevlogError};
5 6 use crate::revlog::NodePrefix;
6 7 use crate::revlog::Revision;
7 8 use crate::utils::files::get_path_from_bytes;
8 9 use crate::utils::hg_path::HgPath;
9 10 use crate::utils::SliceExt;
10 11 use std::path::PathBuf;
11 12
12 13 /// A specialized `Revlog` to work with file data logs.
13 14 pub struct Filelog {
14 15 /// The generic `revlog` format.
15 16 revlog: Revlog,
16 17 }
17 18
18 19 impl Filelog {
19 20 pub fn open(repo: &Repo, file_path: &HgPath) -> Result<Self, HgError> {
20 21 let index_path = store_path(file_path, b".i");
21 22 let data_path = store_path(file_path, b".d");
22 23 let revlog = Revlog::open(repo, index_path, Some(&data_path))?;
23 24 Ok(Self { revlog })
24 25 }
25 26
26 /// The given node ID is that of the file as found in a manifest, not of a
27 /// The given node ID is that of the file as found in a filelog, not of a
27 28 /// changeset.
28 29 pub fn data_for_node(
29 30 &self,
30 31 file_node: impl Into<NodePrefix>,
31 32 ) -> Result<FilelogRevisionData, RevlogError> {
32 33 let file_rev = self.revlog.rev_from_node(file_node.into())?;
33 34 self.data_for_rev(file_rev)
34 35 }
35 36
36 /// The given revision is that of the file as found in a manifest, not of a
37 /// The given revision is that of the file as found in a filelog, not of a
37 38 /// changeset.
38 39 pub fn data_for_rev(
39 40 &self,
40 41 file_rev: Revision,
41 42 ) -> Result<FilelogRevisionData, RevlogError> {
42 43 let data: Vec<u8> = self.revlog.get_rev_data(file_rev)?.into_owned();
43 44 Ok(FilelogRevisionData(data.into()))
44 45 }
46
47 /// The given node ID is that of the file as found in a filelog, not of a
48 /// changeset.
49 pub fn entry_for_node(
50 &self,
51 file_node: impl Into<NodePrefix>,
52 ) -> Result<FilelogEntry, RevlogError> {
53 let file_rev = self.revlog.rev_from_node(file_node.into())?;
54 self.entry_for_rev(file_rev)
55 }
56
57 /// The given revision is that of the file as found in a filelog, not of a
58 /// changeset.
59 pub fn entry_for_rev(
60 &self,
61 file_rev: Revision,
62 ) -> Result<FilelogEntry, RevlogError> {
63 Ok(FilelogEntry(self.revlog.get_entry(file_rev)?))
64 }
45 65 }
46 66
47 67 fn store_path(hg_path: &HgPath, suffix: &[u8]) -> PathBuf {
48 68 let encoded_bytes =
49 69 path_encode(&[b"data/", hg_path.as_bytes(), suffix].concat());
50 70 get_path_from_bytes(&encoded_bytes).into()
51 71 }
52 72
73 pub struct FilelogEntry<'a>(RevlogEntry<'a>);
74
75 impl FilelogEntry<'_> {
76 pub fn data(&self) -> Result<FilelogRevisionData, HgError> {
77 Ok(FilelogRevisionData(self.0.data()?.into_owned()))
78 }
79 }
80
53 81 /// The data for one revision in a filelog, uncompressed and delta-resolved.
54 82 pub struct FilelogRevisionData(Vec<u8>);
55 83
56 84 impl FilelogRevisionData {
57 85 /// Split into metadata and data
58 86 pub fn split(&self) -> Result<(Option<&[u8]>, &[u8]), HgError> {
59 87 const DELIMITER: &[u8; 2] = &[b'\x01', b'\n'];
60 88
61 89 if let Some(rest) = self.0.drop_prefix(DELIMITER) {
62 90 if let Some((metadata, data)) = rest.split_2_by_slice(DELIMITER) {
63 91 Ok((Some(metadata), data))
64 92 } else {
65 93 Err(HgError::corrupted(
66 94 "Missing metadata end delimiter in filelog entry",
67 95 ))
68 96 }
69 97 } else {
70 98 Ok((None, &self.0))
71 99 }
72 100 }
73 101
74 102 /// Returns the file contents at this revision, stripped of any metadata
75 103 pub fn file_data(&self) -> Result<&[u8], HgError> {
76 104 let (_metadata, data) = self.split()?;
77 105 Ok(data)
78 106 }
79 107
80 108 /// Consume the entry, and convert it into data, discarding any metadata,
81 109 /// if present.
82 110 pub fn into_file_data(self) -> Result<Vec<u8>, HgError> {
83 111 if let (Some(_metadata), data) = self.split()? {
84 112 Ok(data.to_owned())
85 113 } else {
86 114 Ok(self.0)
87 115 }
88 116 }
89 117 }
@@ -1,431 +1,434
1 1 use std::borrow::Cow;
2 2 use std::io::Read;
3 3 use std::ops::Deref;
4 4 use std::path::Path;
5 5
6 6 use flate2::read::ZlibDecoder;
7 7 use micro_timer::timed;
8 8 use sha1::{Digest, Sha1};
9 9 use zstd;
10 10
11 11 use super::index::Index;
12 12 use super::node::{NodePrefix, NODE_BYTES_LENGTH, NULL_NODE};
13 13 use super::nodemap;
14 14 use super::nodemap::{NodeMap, NodeMapError};
15 15 use super::nodemap_docket::NodeMapDocket;
16 16 use super::patch;
17 17 use crate::errors::HgError;
18 18 use crate::repo::Repo;
19 19 use crate::revlog::Revision;
20 20 use crate::{Node, NULL_REVISION};
21 21
22 22 #[derive(derive_more::From)]
23 23 pub enum RevlogError {
24 24 InvalidRevision,
25 25 /// Working directory is not supported
26 26 WDirUnsupported,
27 27 /// Found more than one entry whose ID match the requested prefix
28 28 AmbiguousPrefix,
29 29 #[from]
30 30 Other(HgError),
31 31 }
32 32
33 33 impl From<NodeMapError> for RevlogError {
34 34 fn from(error: NodeMapError) -> Self {
35 35 match error {
36 36 NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
37 37 NodeMapError::RevisionNotInIndex(_) => RevlogError::corrupted(),
38 38 }
39 39 }
40 40 }
41 41
42 fn corrupted() -> HgError {
43 HgError::corrupted("corrupted revlog")
44 }
45
42 46 impl RevlogError {
43 47 fn corrupted() -> Self {
44 RevlogError::Other(HgError::corrupted("corrupted revlog"))
48 RevlogError::Other(corrupted())
45 49 }
46 50 }
47 51
48 52 /// Read only implementation of revlog.
49 53 pub struct Revlog {
50 54 /// When index and data are not interleaved: bytes of the revlog index.
51 55 /// When index and data are interleaved: bytes of the revlog index and
52 56 /// data.
53 57 index: Index,
54 58 /// When index and data are not interleaved: bytes of the revlog data
55 59 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
56 60 /// When present on disk: the persistent nodemap for this revlog
57 61 nodemap: Option<nodemap::NodeTree>,
58 62 }
59 63
60 64 impl Revlog {
61 65 /// Open a revlog index file.
62 66 ///
63 67 /// It will also open the associated data file if index and data are not
64 68 /// interleaved.
65 69 #[timed]
66 70 pub fn open(
67 71 repo: &Repo,
68 72 index_path: impl AsRef<Path>,
69 73 data_path: Option<&Path>,
70 74 ) -> Result<Self, HgError> {
71 75 let index_path = index_path.as_ref();
72 76 let index = {
73 77 match repo.store_vfs().mmap_open_opt(&index_path)? {
74 78 None => Index::new(Box::new(vec![])),
75 79 Some(index_mmap) => {
76 80 let index = Index::new(Box::new(index_mmap))?;
77 81 Ok(index)
78 82 }
79 83 }
80 84 }?;
81 85
82 86 let default_data_path = index_path.with_extension("d");
83 87
84 88 // type annotation required
85 89 // won't recognize Mmap as Deref<Target = [u8]>
86 90 let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
87 91 if index.is_inline() {
88 92 None
89 93 } else {
90 94 let data_path = data_path.unwrap_or(&default_data_path);
91 95 let data_mmap = repo.store_vfs().mmap_open(data_path)?;
92 96 Some(Box::new(data_mmap))
93 97 };
94 98
95 99 let nodemap = if index.is_inline() {
96 100 None
97 101 } else {
98 102 NodeMapDocket::read_from_file(repo, index_path)?.map(
99 103 |(docket, data)| {
100 104 nodemap::NodeTree::load_bytes(
101 105 Box::new(data),
102 106 docket.data_length,
103 107 )
104 108 },
105 109 )
106 110 };
107 111
108 112 Ok(Revlog {
109 113 index,
110 114 data_bytes,
111 115 nodemap,
112 116 })
113 117 }
114 118
115 119 /// Return number of entries of the `Revlog`.
116 120 pub fn len(&self) -> usize {
117 121 self.index.len()
118 122 }
119 123
120 124 /// Returns `true` if the `Revlog` has zero `entries`.
121 125 pub fn is_empty(&self) -> bool {
122 126 self.index.is_empty()
123 127 }
124 128
125 129 /// Returns the node ID for the given revision number, if it exists in this
126 130 /// revlog
127 131 pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
128 132 if rev == NULL_REVISION {
129 133 return Some(&NULL_NODE);
130 134 }
131 135 Some(self.index.get_entry(rev)?.hash())
132 136 }
133 137
134 138 /// Return the revision number for the given node ID, if it exists in this
135 139 /// revlog
136 140 #[timed]
137 141 pub fn rev_from_node(
138 142 &self,
139 143 node: NodePrefix,
140 144 ) -> Result<Revision, RevlogError> {
141 145 if node.is_prefix_of(&NULL_NODE) {
142 146 return Ok(NULL_REVISION);
143 147 }
144 148
145 149 if let Some(nodemap) = &self.nodemap {
146 150 return nodemap
147 151 .find_bin(&self.index, node)?
148 152 .ok_or(RevlogError::InvalidRevision);
149 153 }
150 154
151 155 // Fallback to linear scan when a persistent nodemap is not present.
152 156 // This happens when the persistent-nodemap experimental feature is not
153 157 // enabled, or for small revlogs.
154 158 //
155 159 // TODO: consider building a non-persistent nodemap in memory to
156 160 // optimize these cases.
157 161 let mut found_by_prefix = None;
158 162 for rev in (0..self.len() as Revision).rev() {
159 163 let index_entry =
160 164 self.index.get_entry(rev).ok_or(HgError::corrupted(
161 165 "revlog references a revision not in the index",
162 166 ))?;
163 167 if node == *index_entry.hash() {
164 168 return Ok(rev);
165 169 }
166 170 if node.is_prefix_of(index_entry.hash()) {
167 171 if found_by_prefix.is_some() {
168 172 return Err(RevlogError::AmbiguousPrefix);
169 173 }
170 174 found_by_prefix = Some(rev)
171 175 }
172 176 }
173 177 found_by_prefix.ok_or(RevlogError::InvalidRevision)
174 178 }
175 179
176 180 /// Returns whether the given revision exists in this revlog.
177 181 pub fn has_rev(&self, rev: Revision) -> bool {
178 182 self.index.get_entry(rev).is_some()
179 183 }
180 184
181 185 /// Return the full data associated to a revision.
182 186 ///
183 187 /// All entries required to build the final data out of deltas will be
184 188 /// retrieved as needed, and the deltas will be applied to the inital
185 189 /// snapshot to rebuild the final data.
186 190 #[timed]
187 191 pub fn get_rev_data(
188 192 &self,
189 193 rev: Revision,
190 194 ) -> Result<Cow<[u8]>, RevlogError> {
191 195 if rev == NULL_REVISION {
192 196 return Ok(Cow::Borrowed(&[]));
193 197 };
194 self.get_entry(rev)?.data()
198 Ok(self.get_entry(rev)?.data()?)
195 199 }
196 200
197 201 /// Check the hash of some given data against the recorded hash.
198 202 pub fn check_hash(
199 203 &self,
200 204 p1: Revision,
201 205 p2: Revision,
202 206 expected: &[u8],
203 207 data: &[u8],
204 208 ) -> bool {
205 209 let e1 = self.index.get_entry(p1);
206 210 let h1 = match e1 {
207 211 Some(ref entry) => entry.hash(),
208 212 None => &NULL_NODE,
209 213 };
210 214 let e2 = self.index.get_entry(p2);
211 215 let h2 = match e2 {
212 216 Some(ref entry) => entry.hash(),
213 217 None => &NULL_NODE,
214 218 };
215 219
216 220 &hash(data, h1.as_bytes(), h2.as_bytes()) == expected
217 221 }
218 222
219 223 /// Build the full data of a revision out its snapshot
220 224 /// and its deltas.
221 225 #[timed]
222 226 fn build_data_from_deltas(
223 227 snapshot: RevlogEntry,
224 228 deltas: &[RevlogEntry],
225 ) -> Result<Vec<u8>, RevlogError> {
229 ) -> Result<Vec<u8>, HgError> {
226 230 let snapshot = snapshot.data_chunk()?;
227 231 let deltas = deltas
228 232 .iter()
229 233 .rev()
230 234 .map(RevlogEntry::data_chunk)
231 .collect::<Result<Vec<Cow<'_, [u8]>>, RevlogError>>()?;
235 .collect::<Result<Vec<_>, _>>()?;
232 236 let patches: Vec<_> =
233 237 deltas.iter().map(|d| patch::PatchList::new(d)).collect();
234 238 let patch = patch::fold_patch_lists(&patches);
235 239 Ok(patch.apply(&snapshot))
236 240 }
237 241
238 242 /// Return the revlog data.
239 243 fn data(&self) -> &[u8] {
240 244 match self.data_bytes {
241 245 Some(ref data_bytes) => &data_bytes,
242 246 None => panic!(
243 247 "forgot to load the data or trying to access inline data"
244 248 ),
245 249 }
246 250 }
247 251
248 252 /// Get an entry of the revlog.
249 fn get_entry(&self, rev: Revision) -> Result<RevlogEntry, RevlogError> {
253 pub fn get_entry(
254 &self,
255 rev: Revision,
256 ) -> Result<RevlogEntry, RevlogError> {
250 257 let index_entry = self
251 258 .index
252 259 .get_entry(rev)
253 260 .ok_or(RevlogError::InvalidRevision)?;
254 261 let start = index_entry.offset();
255 262 let end = start + index_entry.compressed_len();
256 263 let data = if self.index.is_inline() {
257 264 self.index.data(start, end)
258 265 } else {
259 266 &self.data()[start..end]
260 267 };
261 268 let entry = RevlogEntry {
262 269 revlog: self,
263 270 rev,
264 271 bytes: data,
265 272 compressed_len: index_entry.compressed_len(),
266 273 uncompressed_len: index_entry.uncompressed_len(),
267 274 base_rev_or_base_of_delta_chain: if index_entry
268 275 .base_revision_or_base_of_delta_chain()
269 276 == rev
270 277 {
271 278 None
272 279 } else {
273 280 Some(index_entry.base_revision_or_base_of_delta_chain())
274 281 },
275 282 };
276 283 Ok(entry)
277 284 }
278 285
279 286 /// when resolving internal references within revlog, any errors
280 287 /// should be reported as corruption, instead of e.g. "invalid revision"
281 288 fn get_entry_internal(
282 289 &self,
283 290 rev: Revision,
284 ) -> Result<RevlogEntry, RevlogError> {
285 return self.get_entry(rev).map_err(|_| RevlogError::corrupted());
291 ) -> Result<RevlogEntry, HgError> {
292 return self.get_entry(rev).map_err(|_| corrupted());
286 293 }
287 294 }
288 295
289 296 /// The revlog entry's bytes and the necessary informations to extract
290 297 /// the entry's data.
291 298 #[derive(Clone)]
292 299 pub struct RevlogEntry<'a> {
293 300 revlog: &'a Revlog,
294 301 rev: Revision,
295 302 bytes: &'a [u8],
296 303 compressed_len: usize,
297 304 uncompressed_len: usize,
298 305 base_rev_or_base_of_delta_chain: Option<Revision>,
299 306 }
300 307
301 308 impl<'a> RevlogEntry<'a> {
302 309 pub fn revision(&self) -> Revision {
303 310 self.rev
304 311 }
305 312
306 313 /// The data for this entry, after resolving deltas if any.
307 pub fn data(&self) -> Result<Cow<'a, [u8]>, RevlogError> {
314 pub fn data(&self) -> Result<Cow<'a, [u8]>, HgError> {
308 315 let mut entry = self.clone();
309 316 let mut delta_chain = vec![];
310 317
311 318 // The meaning of `base_rev_or_base_of_delta_chain` depends on
312 319 // generaldelta. See the doc on `ENTRY_DELTA_BASE` in
313 320 // `mercurial/revlogutils/constants.py` and the code in
314 321 // [_chaininfo] and in [index_deltachain].
315 322 let uses_generaldelta = self.revlog.index.uses_generaldelta();
316 323 while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain {
317 324 let base_rev = if uses_generaldelta {
318 325 base_rev
319 326 } else {
320 327 entry.rev - 1
321 328 };
322 329 delta_chain.push(entry);
323 330 entry = self.revlog.get_entry_internal(base_rev)?;
324 331 }
325 332
326 333 // TODO do not look twice in the index
327 334 let index_entry = self
328 335 .revlog
329 336 .index
330 337 .get_entry(self.rev)
331 .ok_or(RevlogError::InvalidRevision)?;
338 .ok_or_else(corrupted)?;
332 339
333 340 let data = if delta_chain.is_empty() {
334 341 entry.data_chunk()?
335 342 } else {
336 343 Revlog::build_data_from_deltas(entry, &delta_chain)?.into()
337 344 };
338 345
339 346 if self.revlog.check_hash(
340 347 index_entry.p1(),
341 348 index_entry.p2(),
342 349 index_entry.hash().as_bytes(),
343 350 &data,
344 351 ) {
345 352 Ok(data)
346 353 } else {
347 Err(RevlogError::corrupted())
354 Err(corrupted())
348 355 }
349 356 }
350 357
351 358 /// Extract the data contained in the entry.
352 359 /// This may be a delta. (See `is_delta`.)
353 fn data_chunk(&self) -> Result<Cow<'a, [u8]>, RevlogError> {
360 fn data_chunk(&self) -> Result<Cow<'a, [u8]>, HgError> {
354 361 if self.bytes.is_empty() {
355 362 return Ok(Cow::Borrowed(&[]));
356 363 }
357 364 match self.bytes[0] {
358 365 // Revision data is the entirety of the entry, including this
359 366 // header.
360 367 b'\0' => Ok(Cow::Borrowed(self.bytes)),
361 368 // Raw revision data follows.
362 369 b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
363 370 // zlib (RFC 1950) data.
364 371 b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
365 372 // zstd data.
366 373 b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
367 374 // A proper new format should have had a repo/store requirement.
368 _format_type => Err(RevlogError::corrupted()),
375 _format_type => Err(corrupted()),
369 376 }
370 377 }
371 378
372 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, RevlogError> {
379 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, HgError> {
373 380 let mut decoder = ZlibDecoder::new(self.bytes);
374 381 if self.is_delta() {
375 382 let mut buf = Vec::with_capacity(self.compressed_len);
376 decoder
377 .read_to_end(&mut buf)
378 .map_err(|_| RevlogError::corrupted())?;
383 decoder.read_to_end(&mut buf).map_err(|_| corrupted())?;
379 384 Ok(buf)
380 385 } else {
381 386 let mut buf = vec![0; self.uncompressed_len];
382 decoder
383 .read_exact(&mut buf)
384 .map_err(|_| RevlogError::corrupted())?;
387 decoder.read_exact(&mut buf).map_err(|_| corrupted())?;
385 388 Ok(buf)
386 389 }
387 390 }
388 391
389 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, RevlogError> {
392 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, HgError> {
390 393 if self.is_delta() {
391 394 let mut buf = Vec::with_capacity(self.compressed_len);
392 395 zstd::stream::copy_decode(self.bytes, &mut buf)
393 .map_err(|_| RevlogError::corrupted())?;
396 .map_err(|_| corrupted())?;
394 397 Ok(buf)
395 398 } else {
396 399 let mut buf = vec![0; self.uncompressed_len];
397 400 let len = zstd::block::decompress_to_buffer(self.bytes, &mut buf)
398 .map_err(|_| RevlogError::corrupted())?;
401 .map_err(|_| corrupted())?;
399 402 if len != self.uncompressed_len {
400 Err(RevlogError::corrupted())
403 Err(corrupted())
401 404 } else {
402 405 Ok(buf)
403 406 }
404 407 }
405 408 }
406 409
407 410 /// Tell if the entry is a snapshot or a delta
408 411 /// (influences on decompression).
409 412 fn is_delta(&self) -> bool {
410 413 self.base_rev_or_base_of_delta_chain.is_some()
411 414 }
412 415 }
413 416
414 417 /// Calculate the hash of a revision given its data and its parents.
415 418 fn hash(
416 419 data: &[u8],
417 420 p1_hash: &[u8],
418 421 p2_hash: &[u8],
419 422 ) -> [u8; NODE_BYTES_LENGTH] {
420 423 let mut hasher = Sha1::new();
421 424 let (a, b) = (p1_hash, p2_hash);
422 425 if a > b {
423 426 hasher.update(b);
424 427 hasher.update(a);
425 428 } else {
426 429 hasher.update(a);
427 430 hasher.update(b);
428 431 }
429 432 hasher.update(data);
430 433 *hasher.finalize().as_ref()
431 434 }
@@ -1,538 +1,539
1 1 // status.rs
2 2 //
3 3 // Copyright 2020, Georges Racinet <georges.racinets@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 use crate::error::CommandError;
9 9 use crate::ui::Ui;
10 10 use crate::utils::path_utils::RelativizePaths;
11 11 use clap::{Arg, SubCommand};
12 12 use format_bytes::format_bytes;
13 13 use hg;
14 14 use hg::config::Config;
15 15 use hg::dirstate::has_exec_bit;
16 16 use hg::dirstate::status::StatusPath;
17 17 use hg::dirstate::TruncatedTimestamp;
18 18 use hg::dirstate::RANGE_MASK_31BIT;
19 19 use hg::errors::{HgError, IoResultExt};
20 20 use hg::lock::LockError;
21 21 use hg::manifest::Manifest;
22 22 use hg::matchers::AlwaysMatcher;
23 23 use hg::repo::Repo;
24 24 use hg::utils::files::get_bytes_from_os_string;
25 25 use hg::utils::files::get_bytes_from_path;
26 26 use hg::utils::files::get_path_from_bytes;
27 27 use hg::utils::hg_path::{hg_path_to_path_buf, HgPath};
28 28 use hg::StatusOptions;
29 29 use log::info;
30 30 use std::io;
31 31 use std::path::PathBuf;
32 32
33 33 pub const HELP_TEXT: &str = "
34 34 Show changed files in the working directory
35 35
36 36 This is a pure Rust version of `hg status`.
37 37
38 38 Some options might be missing, check the list below.
39 39 ";
40 40
41 41 pub fn args() -> clap::App<'static, 'static> {
42 42 SubCommand::with_name("status")
43 43 .alias("st")
44 44 .about(HELP_TEXT)
45 45 .arg(
46 46 Arg::with_name("all")
47 47 .help("show status of all files")
48 48 .short("-A")
49 49 .long("--all"),
50 50 )
51 51 .arg(
52 52 Arg::with_name("modified")
53 53 .help("show only modified files")
54 54 .short("-m")
55 55 .long("--modified"),
56 56 )
57 57 .arg(
58 58 Arg::with_name("added")
59 59 .help("show only added files")
60 60 .short("-a")
61 61 .long("--added"),
62 62 )
63 63 .arg(
64 64 Arg::with_name("removed")
65 65 .help("show only removed files")
66 66 .short("-r")
67 67 .long("--removed"),
68 68 )
69 69 .arg(
70 70 Arg::with_name("clean")
71 71 .help("show only clean files")
72 72 .short("-c")
73 73 .long("--clean"),
74 74 )
75 75 .arg(
76 76 Arg::with_name("deleted")
77 77 .help("show only deleted files")
78 78 .short("-d")
79 79 .long("--deleted"),
80 80 )
81 81 .arg(
82 82 Arg::with_name("unknown")
83 83 .help("show only unknown (not tracked) files")
84 84 .short("-u")
85 85 .long("--unknown"),
86 86 )
87 87 .arg(
88 88 Arg::with_name("ignored")
89 89 .help("show only ignored files")
90 90 .short("-i")
91 91 .long("--ignored"),
92 92 )
93 93 .arg(
94 94 Arg::with_name("copies")
95 95 .help("show source of copied files (DEFAULT: ui.statuscopies)")
96 96 .short("-C")
97 97 .long("--copies"),
98 98 )
99 99 .arg(
100 100 Arg::with_name("no-status")
101 101 .help("hide status prefix")
102 102 .short("-n")
103 103 .long("--no-status"),
104 104 )
105 105 }
106 106
107 107 /// Pure data type allowing the caller to specify file states to display
108 108 #[derive(Copy, Clone, Debug)]
109 109 pub struct DisplayStates {
110 110 pub modified: bool,
111 111 pub added: bool,
112 112 pub removed: bool,
113 113 pub clean: bool,
114 114 pub deleted: bool,
115 115 pub unknown: bool,
116 116 pub ignored: bool,
117 117 }
118 118
119 119 pub const DEFAULT_DISPLAY_STATES: DisplayStates = DisplayStates {
120 120 modified: true,
121 121 added: true,
122 122 removed: true,
123 123 clean: false,
124 124 deleted: true,
125 125 unknown: true,
126 126 ignored: false,
127 127 };
128 128
129 129 pub const ALL_DISPLAY_STATES: DisplayStates = DisplayStates {
130 130 modified: true,
131 131 added: true,
132 132 removed: true,
133 133 clean: true,
134 134 deleted: true,
135 135 unknown: true,
136 136 ignored: true,
137 137 };
138 138
139 139 impl DisplayStates {
140 140 pub fn is_empty(&self) -> bool {
141 141 !(self.modified
142 142 || self.added
143 143 || self.removed
144 144 || self.clean
145 145 || self.deleted
146 146 || self.unknown
147 147 || self.ignored)
148 148 }
149 149 }
150 150
151 151 pub fn run(invocation: &crate::CliInvocation) -> Result<(), CommandError> {
152 152 let status_enabled_default = false;
153 153 let status_enabled = invocation.config.get_option(b"rhg", b"status")?;
154 154 if !status_enabled.unwrap_or(status_enabled_default) {
155 155 return Err(CommandError::unsupported(
156 156 "status is experimental in rhg (enable it with 'rhg.status = true' \
157 157 or enable fallback with 'rhg.on-unsupported = fallback')"
158 158 ));
159 159 }
160 160
161 161 // TODO: lift these limitations
162 162 if invocation.config.get_bool(b"ui", b"tweakdefaults")? {
163 163 return Err(CommandError::unsupported(
164 164 "ui.tweakdefaults is not yet supported with rhg status",
165 165 ));
166 166 }
167 167 if invocation.config.get_bool(b"ui", b"statuscopies")? {
168 168 return Err(CommandError::unsupported(
169 169 "ui.statuscopies is not yet supported with rhg status",
170 170 ));
171 171 }
172 172 if invocation
173 173 .config
174 174 .get(b"commands", b"status.terse")
175 175 .is_some()
176 176 {
177 177 return Err(CommandError::unsupported(
178 178 "status.terse is not yet supported with rhg status",
179 179 ));
180 180 }
181 181
182 182 let ui = invocation.ui;
183 183 let config = invocation.config;
184 184 let args = invocation.subcommand_args;
185 185
186 186 let verbose = !ui.plain()
187 187 && !args.is_present("print0")
188 188 && (config.get_bool(b"ui", b"verbose")?
189 189 || config.get_bool(b"commands", b"status.verbose")?);
190 190 if verbose {
191 191 return Err(CommandError::unsupported(
192 192 "verbose status is not supported yet",
193 193 ));
194 194 }
195 195
196 196 let all = args.is_present("all");
197 197 let display_states = if all {
198 198 // TODO when implementing `--quiet`: it excludes clean files
199 199 // from `--all`
200 200 ALL_DISPLAY_STATES
201 201 } else {
202 202 let requested = DisplayStates {
203 203 modified: args.is_present("modified"),
204 204 added: args.is_present("added"),
205 205 removed: args.is_present("removed"),
206 206 clean: args.is_present("clean"),
207 207 deleted: args.is_present("deleted"),
208 208 unknown: args.is_present("unknown"),
209 209 ignored: args.is_present("ignored"),
210 210 };
211 211 if requested.is_empty() {
212 212 DEFAULT_DISPLAY_STATES
213 213 } else {
214 214 requested
215 215 }
216 216 };
217 217 let no_status = args.is_present("no-status");
218 218 let list_copies = all
219 219 || args.is_present("copies")
220 220 || config.get_bool(b"ui", b"statuscopies")?;
221 221
222 222 let repo = invocation.repo?;
223 223
224 224 if repo.has_sparse() || repo.has_narrow() {
225 225 return Err(CommandError::unsupported(
226 226 "rhg status is not supported for sparse checkouts or narrow clones yet"
227 227 ));
228 228 }
229 229
230 230 let mut dmap = repo.dirstate_map_mut()?;
231 231
232 232 let options = StatusOptions {
233 233 // we're currently supporting file systems with exec flags only
234 234 // anyway
235 235 check_exec: true,
236 236 list_clean: display_states.clean,
237 237 list_unknown: display_states.unknown,
238 238 list_ignored: display_states.ignored,
239 239 list_copies,
240 240 collect_traversed_dirs: false,
241 241 };
242 242 let (mut ds_status, pattern_warnings) = dmap.status(
243 243 &AlwaysMatcher,
244 244 repo.working_directory_path().to_owned(),
245 245 ignore_files(repo, config),
246 246 options,
247 247 )?;
248 248 for warning in pattern_warnings {
249 249 match warning {
250 250 hg::PatternFileWarning::InvalidSyntax(path, syntax) => ui
251 251 .write_stderr(&format_bytes!(
252 252 b"{}: ignoring invalid syntax '{}'\n",
253 253 get_bytes_from_path(path),
254 254 &*syntax
255 255 ))?,
256 256 hg::PatternFileWarning::NoSuchFile(path) => {
257 257 let path = if let Ok(relative) =
258 258 path.strip_prefix(repo.working_directory_path())
259 259 {
260 260 relative
261 261 } else {
262 262 &*path
263 263 };
264 264 ui.write_stderr(&format_bytes!(
265 265 b"skipping unreadable pattern file '{}': \
266 266 No such file or directory\n",
267 267 get_bytes_from_path(path),
268 268 ))?
269 269 }
270 270 }
271 271 }
272 272
273 273 for (path, error) in ds_status.bad {
274 274 let error = match error {
275 275 hg::BadMatch::OsError(code) => {
276 276 std::io::Error::from_raw_os_error(code).to_string()
277 277 }
278 278 hg::BadMatch::BadType(ty) => {
279 279 format!("unsupported file type (type is {})", ty)
280 280 }
281 281 };
282 282 ui.write_stderr(&format_bytes!(
283 283 b"{}: {}\n",
284 284 path.as_bytes(),
285 285 error.as_bytes()
286 286 ))?
287 287 }
288 288 if !ds_status.unsure.is_empty() {
289 289 info!(
290 290 "Files to be rechecked by retrieval from filelog: {:?}",
291 291 ds_status.unsure.iter().map(|s| &s.path).collect::<Vec<_>>()
292 292 );
293 293 }
294 294 let mut fixup = Vec::new();
295 295 if !ds_status.unsure.is_empty()
296 296 && (display_states.modified || display_states.clean)
297 297 {
298 298 let p1 = repo.dirstate_parents()?.p1;
299 299 let manifest = repo.manifest_for_node(p1).map_err(|e| {
300 300 CommandError::from((e, &*format!("{:x}", p1.short())))
301 301 })?;
302 302 for to_check in ds_status.unsure {
303 303 if unsure_is_modified(repo, &manifest, &to_check.path)? {
304 304 if display_states.modified {
305 305 ds_status.modified.push(to_check);
306 306 }
307 307 } else {
308 308 if display_states.clean {
309 309 ds_status.clean.push(to_check.clone());
310 310 }
311 311 fixup.push(to_check.path.into_owned())
312 312 }
313 313 }
314 314 }
315 315 let relative_paths = (!ui.plain())
316 316 && config
317 317 .get_option(b"commands", b"status.relative")?
318 318 .unwrap_or(config.get_bool(b"ui", b"relative-paths")?);
319 319 let output = DisplayStatusPaths {
320 320 ui,
321 321 no_status,
322 322 relativize: if relative_paths {
323 323 Some(RelativizePaths::new(repo)?)
324 324 } else {
325 325 None
326 326 },
327 327 };
328 328 if display_states.modified {
329 329 output.display(b"M", ds_status.modified)?;
330 330 }
331 331 if display_states.added {
332 332 output.display(b"A", ds_status.added)?;
333 333 }
334 334 if display_states.removed {
335 335 output.display(b"R", ds_status.removed)?;
336 336 }
337 337 if display_states.deleted {
338 338 output.display(b"!", ds_status.deleted)?;
339 339 }
340 340 if display_states.unknown {
341 341 output.display(b"?", ds_status.unknown)?;
342 342 }
343 343 if display_states.ignored {
344 344 output.display(b"I", ds_status.ignored)?;
345 345 }
346 346 if display_states.clean {
347 347 output.display(b"C", ds_status.clean)?;
348 348 }
349 349
350 350 let mut dirstate_write_needed = ds_status.dirty;
351 351 let filesystem_time_at_status_start =
352 352 ds_status.filesystem_time_at_status_start;
353 353
354 354 if (fixup.is_empty() || filesystem_time_at_status_start.is_none())
355 355 && !dirstate_write_needed
356 356 {
357 357 // Nothing to update
358 358 return Ok(());
359 359 }
360 360
361 361 // Update the dirstate on disk if we can
362 362 let with_lock_result =
363 363 repo.try_with_wlock_no_wait(|| -> Result<(), CommandError> {
364 364 if let Some(mtime_boundary) = filesystem_time_at_status_start {
365 365 for hg_path in fixup {
366 366 use std::os::unix::fs::MetadataExt;
367 367 let fs_path = hg_path_to_path_buf(&hg_path)
368 368 .expect("HgPath conversion");
369 369 // Specifically do not reuse `fs_metadata` from
370 370 // `unsure_is_clean` which was needed before reading
371 371 // contents. Here we access metadata again after reading
372 372 // content, in case it changed in the meantime.
373 373 let fs_metadata = repo
374 374 .working_directory_vfs()
375 375 .symlink_metadata(&fs_path)?;
376 376 if let Some(mtime) =
377 377 TruncatedTimestamp::for_reliable_mtime_of(
378 378 &fs_metadata,
379 379 &mtime_boundary,
380 380 )
381 381 .when_reading_file(&fs_path)?
382 382 {
383 383 let mode = fs_metadata.mode();
384 384 let size = fs_metadata.len() as u32 & RANGE_MASK_31BIT;
385 385 let mut entry = dmap
386 386 .get(&hg_path)?
387 387 .expect("ambiguous file not in dirstate");
388 388 entry.set_clean(mode, size, mtime);
389 389 dmap.add_file(&hg_path, entry)?;
390 390 dirstate_write_needed = true
391 391 }
392 392 }
393 393 }
394 394 drop(dmap); // Avoid "already mutably borrowed" RefCell panics
395 395 if dirstate_write_needed {
396 396 repo.write_dirstate()?
397 397 }
398 398 Ok(())
399 399 });
400 400 match with_lock_result {
401 401 Ok(closure_result) => closure_result?,
402 402 Err(LockError::AlreadyHeld) => {
403 403 // Not updating the dirstate is not ideal but not critical:
404 404 // don’t keep our caller waiting until some other Mercurial
405 405 // process releases the lock.
406 406 }
407 407 Err(LockError::Other(HgError::IoError { error, .. }))
408 408 if error.kind() == io::ErrorKind::PermissionDenied =>
409 409 {
410 410 // `hg status` on a read-only repository is fine
411 411 }
412 412 Err(LockError::Other(error)) => {
413 413 // Report other I/O errors
414 414 Err(error)?
415 415 }
416 416 }
417 417 Ok(())
418 418 }
419 419
420 420 fn ignore_files(repo: &Repo, config: &Config) -> Vec<PathBuf> {
421 421 let mut ignore_files = Vec::new();
422 422 let repo_ignore = repo.working_directory_vfs().join(".hgignore");
423 423 if repo_ignore.exists() {
424 424 ignore_files.push(repo_ignore)
425 425 }
426 426 for (key, value) in config.iter_section(b"ui") {
427 427 if key == b"ignore" || key.starts_with(b"ignore.") {
428 428 let path = get_path_from_bytes(value);
429 429 // TODO: expand "~/" and environment variable here, like Python
430 430 // does with `os.path.expanduser` and `os.path.expandvars`
431 431
432 432 let joined = repo.working_directory_path().join(path);
433 433 ignore_files.push(joined);
434 434 }
435 435 }
436 436 ignore_files
437 437 }
438 438
439 439 struct DisplayStatusPaths<'a> {
440 440 ui: &'a Ui,
441 441 no_status: bool,
442 442 relativize: Option<RelativizePaths>,
443 443 }
444 444
445 445 impl DisplayStatusPaths<'_> {
446 446 // Probably more elegant to use a Deref or Borrow trait rather than
447 447 // harcode HgPathBuf, but probably not really useful at this point
448 448 fn display(
449 449 &self,
450 450 status_prefix: &[u8],
451 451 mut paths: Vec<StatusPath<'_>>,
452 452 ) -> Result<(), CommandError> {
453 453 paths.sort_unstable();
454 454 for StatusPath { path, copy_source } in paths {
455 455 let relative;
456 456 let path = if let Some(relativize) = &self.relativize {
457 457 relative = relativize.relativize(&path);
458 458 &*relative
459 459 } else {
460 460 path.as_bytes()
461 461 };
462 462 // TODO optim, probably lots of unneeded copies here, especially
463 463 // if out stream is buffered
464 464 if self.no_status {
465 465 self.ui.write_stdout(&format_bytes!(b"{}\n", path))?
466 466 } else {
467 467 self.ui.write_stdout(&format_bytes!(
468 468 b"{} {}\n",
469 469 status_prefix,
470 470 path
471 471 ))?
472 472 }
473 473 if let Some(source) = copy_source {
474 474 self.ui.write_stdout(&format_bytes!(
475 475 b" {}\n",
476 476 source.as_bytes()
477 477 ))?
478 478 }
479 479 }
480 480 Ok(())
481 481 }
482 482 }
483 483
484 484 /// Check if a file is modified by comparing actual repo store and file system.
485 485 ///
486 486 /// This meant to be used for those that the dirstate cannot resolve, due
487 487 /// to time resolution limits.
488 488 fn unsure_is_modified(
489 489 repo: &Repo,
490 490 manifest: &Manifest,
491 491 hg_path: &HgPath,
492 492 ) -> Result<bool, HgError> {
493 493 let vfs = repo.working_directory_vfs();
494 494 let fs_path = hg_path_to_path_buf(hg_path).expect("HgPath conversion");
495 495 let fs_metadata = vfs.symlink_metadata(&fs_path)?;
496 496 let is_symlink = fs_metadata.file_type().is_symlink();
497 497 // TODO: Also account for `FALLBACK_SYMLINK` and `FALLBACK_EXEC` from the
498 498 // dirstate
499 499 let fs_flags = if is_symlink {
500 500 Some(b'l')
501 501 } else if has_exec_bit(&fs_metadata) {
502 502 Some(b'x')
503 503 } else {
504 504 None
505 505 };
506 506
507 507 let entry = manifest
508 508 .find_by_path(hg_path)?
509 509 .expect("ambgious file not in p1");
510 510 if entry.flags != fs_flags {
511 511 return Ok(true);
512 512 }
513 513 let filelog = repo.filelog(hg_path)?;
514 514 let fs_len = fs_metadata.len();
515 let filelog_entry =
516 filelog.entry_for_node(entry.node_id()?).map_err(|_| {
517 HgError::corrupted("filelog missing node from manifest")
518 })?;
515 519 // TODO: check `fs_len` here like below, but based on
516 520 // `RevlogEntry::uncompressed_len` without decompressing the full filelog
517 521 // contents where possible. This is only valid if the revlog data does not
518 522 // contain metadata. See how Python’s `revlog.rawsize` calls
519 523 // `storageutil.filerevisioncopied`.
520 524 // (Maybe also check for content-modifying flags? See `revlog.size`.)
521 let filelog_entry =
522 filelog.data_for_node(entry.node_id()?).map_err(|_| {
523 HgError::corrupted("filelog missing node from manifest")
524 })?;
525 let contents_in_p1 = filelog_entry.file_data()?;
525 let filelog_data = filelog_entry.data()?;
526 let contents_in_p1 = filelog_data.file_data()?;
526 527 if contents_in_p1.len() as u64 != fs_len {
527 528 // No need to read the file contents:
528 529 // it cannot be equal if it has a different length.
529 530 return Ok(true);
530 531 }
531 532
532 533 let fs_contents = if is_symlink {
533 534 get_bytes_from_os_string(vfs.read_link(fs_path)?.into_os_string())
534 535 } else {
535 536 vfs.read(fs_path)?
536 537 };
537 538 Ok(contents_in_p1 != &*fs_contents)
538 539 }
General Comments 0
You need to be logged in to leave comments. Login now