##// END OF EJS Templates
rhg: Add support for dirstate-v2...
Simon Sapin -
r48165:bd88b6bf default
parent child Browse files
Show More
@@ -1,141 +1,149 b''
1 1 // dirstate module
2 2 //
3 3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
9 9 use crate::errors::HgError;
10 use crate::revlog::node::NULL_NODE;
10 11 use crate::revlog::Node;
11 12 use crate::utils::hg_path::{HgPath, HgPathBuf};
12 13 use crate::FastHashMap;
13 14 use bytes_cast::{unaligned, BytesCast};
14 15 use std::convert::TryFrom;
15 16
16 17 pub mod dirs_multiset;
17 18 pub mod dirstate_map;
18 19 pub mod parsers;
19 20 pub mod status;
20 21
21 22 #[derive(Debug, PartialEq, Clone, BytesCast)]
22 23 #[repr(C)]
23 24 pub struct DirstateParents {
24 25 pub p1: Node,
25 26 pub p2: Node,
26 27 }
27 28
29 impl DirstateParents {
30 pub const NULL: Self = Self {
31 p1: NULL_NODE,
32 p2: NULL_NODE,
33 };
34 }
35
28 36 /// The C implementation uses all signed types. This will be an issue
29 37 /// either when 4GB+ source files are commonplace or in 2038, whichever
30 38 /// comes first.
31 39 #[derive(Debug, PartialEq, Copy, Clone)]
32 40 pub struct DirstateEntry {
33 41 pub state: EntryState,
34 42 pub mode: i32,
35 43 pub mtime: i32,
36 44 pub size: i32,
37 45 }
38 46
39 47 impl DirstateEntry {
40 48 pub fn is_non_normal(&self) -> bool {
41 49 self.state != EntryState::Normal || self.mtime == MTIME_UNSET
42 50 }
43 51
44 52 pub fn is_from_other_parent(&self) -> bool {
45 53 self.state == EntryState::Normal && self.size == SIZE_FROM_OTHER_PARENT
46 54 }
47 55
48 56 // TODO: other platforms
49 57 #[cfg(unix)]
50 58 pub fn mode_changed(
51 59 &self,
52 60 filesystem_metadata: &std::fs::Metadata,
53 61 ) -> bool {
54 62 use std::os::unix::fs::MetadataExt;
55 63 const EXEC_BIT_MASK: u32 = 0o100;
56 64 let dirstate_exec_bit = (self.mode as u32) & EXEC_BIT_MASK;
57 65 let fs_exec_bit = filesystem_metadata.mode() & EXEC_BIT_MASK;
58 66 dirstate_exec_bit != fs_exec_bit
59 67 }
60 68 }
61 69
62 70 #[derive(BytesCast)]
63 71 #[repr(C)]
64 72 struct RawEntry {
65 73 state: u8,
66 74 mode: unaligned::I32Be,
67 75 size: unaligned::I32Be,
68 76 mtime: unaligned::I32Be,
69 77 length: unaligned::I32Be,
70 78 }
71 79
72 80 const MTIME_UNSET: i32 = -1;
73 81
74 82 /// A `DirstateEntry` with a size of `-2` means that it was merged from the
75 83 /// other parent. This allows revert to pick the right status back during a
76 84 /// merge.
77 85 pub const SIZE_FROM_OTHER_PARENT: i32 = -2;
78 86
79 87 pub type StateMap = FastHashMap<HgPathBuf, DirstateEntry>;
80 88 pub type StateMapIter<'a> = Box<
81 89 dyn Iterator<
82 90 Item = Result<(&'a HgPath, DirstateEntry), DirstateV2ParseError>,
83 91 > + Send
84 92 + 'a,
85 93 >;
86 94
87 95 pub type CopyMap = FastHashMap<HgPathBuf, HgPathBuf>;
88 96 pub type CopyMapIter<'a> = Box<
89 97 dyn Iterator<Item = Result<(&'a HgPath, &'a HgPath), DirstateV2ParseError>>
90 98 + Send
91 99 + 'a,
92 100 >;
93 101
94 102 #[derive(Copy, Clone, Debug, Eq, PartialEq)]
95 103 pub enum EntryState {
96 104 Normal,
97 105 Added,
98 106 Removed,
99 107 Merged,
100 108 Unknown,
101 109 }
102 110
103 111 impl EntryState {
104 112 pub fn is_tracked(self) -> bool {
105 113 use EntryState::*;
106 114 match self {
107 115 Normal | Added | Merged => true,
108 116 Removed | Unknown => false,
109 117 }
110 118 }
111 119 }
112 120
113 121 impl TryFrom<u8> for EntryState {
114 122 type Error = HgError;
115 123
116 124 fn try_from(value: u8) -> Result<Self, Self::Error> {
117 125 match value {
118 126 b'n' => Ok(EntryState::Normal),
119 127 b'a' => Ok(EntryState::Added),
120 128 b'r' => Ok(EntryState::Removed),
121 129 b'm' => Ok(EntryState::Merged),
122 130 b'?' => Ok(EntryState::Unknown),
123 131 _ => Err(HgError::CorruptedRepository(format!(
124 132 "Incorrect dirstate entry state {}",
125 133 value
126 134 ))),
127 135 }
128 136 }
129 137 }
130 138
131 139 impl Into<u8> for EntryState {
132 140 fn into(self) -> u8 {
133 141 match self {
134 142 EntryState::Normal => b'n',
135 143 EntryState::Added => b'a',
136 144 EntryState::Removed => b'r',
137 145 EntryState::Merged => b'm',
138 146 EntryState::Unknown => b'?',
139 147 }
140 148 }
141 149 }
@@ -1,5 +1,5 b''
1 1 pub mod dirstate_map;
2 2 pub mod dispatch;
3 3 pub mod on_disk;
4 4 pub mod path_with_basename;
5 mod status;
5 pub mod status;
@@ -1,543 +1,574 b''
1 1 //! The "version 2" disk representation of the dirstate
2 2 //!
3 3 //! # File format
4 4 //!
5 5 //! The file starts with a fixed-sized header, whose layout is defined by the
6 6 //! `Header` struct. Its `root` field contains the slice (offset and length) to
7 7 //! the nodes representing the files and directories at the root of the
8 8 //! repository. Each node is also fixed-size, defined by the `Node` struct.
9 9 //! Nodes in turn contain slices to variable-size paths, and to their own child
10 10 //! nodes (if any) for nested files and directories.
11 11
12 12 use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
13 13 use crate::dirstate_tree::path_with_basename::WithBasename;
14 14 use crate::errors::HgError;
15 15 use crate::utils::hg_path::HgPath;
16 16 use crate::DirstateEntry;
17 17 use crate::DirstateError;
18 18 use crate::DirstateParents;
19 19 use crate::EntryState;
20 20 use bytes_cast::unaligned::{I32Be, I64Be, U32Be, U64Be};
21 21 use bytes_cast::BytesCast;
22 22 use std::borrow::Cow;
23 23 use std::convert::TryFrom;
24 24 use std::time::{Duration, SystemTime, UNIX_EPOCH};
25 25
26 26 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
27 27 /// This a redundant sanity check more than an actual "magic number" since
28 28 /// `.hg/requires` already governs which format should be used.
29 29 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
30 30
31 31 #[derive(BytesCast)]
32 32 #[repr(C)]
33 33 struct Header {
34 34 marker: [u8; V2_FORMAT_MARKER.len()],
35 35
36 36 /// `dirstatemap.parents()` in `mercurial/dirstate.py` relies on this
37 37 /// `parents` field being at this offset, immediately after `marker`.
38 38 parents: DirstateParents,
39 39
40 40 root: ChildNodes,
41 41 nodes_with_entry_count: Size,
42 42 nodes_with_copy_source_count: Size,
43 43 }
44 44
45 45 #[derive(BytesCast)]
46 46 #[repr(C)]
47 47 pub(super) struct Node {
48 48 full_path: PathSlice,
49 49
50 50 /// In bytes from `self.full_path.start`
51 51 base_name_start: Size,
52 52
53 53 copy_source: OptPathSlice,
54 54 children: ChildNodes,
55 55 pub(super) tracked_descendants_count: Size,
56 56
57 57 /// Dependending on the value of `state`:
58 58 ///
59 59 /// * A null byte: `data` is not used.
60 60 ///
61 61 /// * A `n`, `a`, `r`, or `m` ASCII byte: `state` and `data` together
62 62 /// represent a dirstate entry like in the v1 format.
63 63 ///
64 64 /// * A `d` ASCII byte: the bytes of `data` should instead be interpreted
65 65 /// as the `Timestamp` for the mtime of a cached directory.
66 66 ///
67 67 /// The presence of this state means that at some point, this path in
68 68 /// the working directory was observed:
69 69 ///
70 70 /// - To be a directory
71 71 /// - With the modification time as given by `Timestamp`
72 72 /// - That timestamp was already strictly in the past when observed,
73 73 /// meaning that later changes cannot happen in the same clock tick
74 74 /// and must cause a different modification time (unless the system
75 75 /// clock jumps back and we get unlucky, which is not impossible but
76 76 /// but deemed unlikely enough).
77 77 /// - The directory did not contain any child entry that did not have a
78 78 /// corresponding dirstate node.
79 79 ///
80 80 /// This means that if `std::fs::symlink_metadata` later reports the
81 81 /// same modification time, we don’t need to call `std::fs::read_dir`
82 82 /// again for this directory and can iterate child dirstate nodes
83 83 /// instead.
84 84 state: u8,
85 85 data: Entry,
86 86 }
87 87
88 88 #[derive(BytesCast, Copy, Clone)]
89 89 #[repr(C)]
90 90 struct Entry {
91 91 mode: I32Be,
92 92 mtime: I32Be,
93 93 size: I32Be,
94 94 }
95 95
96 96 /// Duration since the Unix epoch
97 97 #[derive(BytesCast, Copy, Clone, PartialEq)]
98 98 #[repr(C)]
99 99 pub(super) struct Timestamp {
100 100 seconds: I64Be,
101 101
102 102 /// In `0 .. 1_000_000_000`.
103 103 ///
104 104 /// This timestamp is later or earlier than `(seconds, 0)` by this many
105 105 /// nanoseconds, if `seconds` is non-negative or negative, respectively.
106 106 nanoseconds: U32Be,
107 107 }
108 108
109 109 /// Counted in bytes from the start of the file
110 110 ///
111 111 /// NOTE: If we decide to never support `.hg/dirstate` files larger than 4 GiB
112 112 /// we could save space by using `U32Be` instead.
113 113 type Offset = U64Be;
114 114
115 115 /// Counted in number of items
116 116 ///
117 117 /// NOTE: not supporting directories with more than 4 billion direct children,
118 118 /// or filenames more than 4 GiB.
119 119 type Size = U32Be;
120 120
121 121 /// Location of consecutive, fixed-size items.
122 122 ///
123 123 /// An item can be a single byte for paths, or a struct with
124 124 /// `derive(BytesCast)`.
125 125 #[derive(BytesCast, Copy, Clone)]
126 126 #[repr(C)]
127 127 struct Slice {
128 128 start: Offset,
129 129 len: Size,
130 130 }
131 131
132 132 /// A contiguous sequence of `len` times `Node`, representing the child nodes
133 133 /// of either some other node or of the repository root.
134 134 ///
135 135 /// Always sorted by ascending `full_path`, to allow binary search.
136 136 /// Since nodes with the same parent nodes also have the same parent path,
137 137 /// only the `base_name`s need to be compared during binary search.
138 138 type ChildNodes = Slice;
139 139
140 140 /// A `HgPath` of `len` bytes
141 141 type PathSlice = Slice;
142 142
143 143 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
144 144 type OptPathSlice = Slice;
145 145
146 146 /// Make sure that size-affecting changes are made knowingly
147 147 fn _static_assert_size_of() {
148 148 let _ = std::mem::transmute::<Header, [u8; 72]>;
149 149 let _ = std::mem::transmute::<Node, [u8; 57]>;
150 150 }
151 151
152 152 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
153 153 ///
154 154 /// This should only happen if Mercurial is buggy or a repository is corrupted.
155 155 #[derive(Debug)]
156 156 pub struct DirstateV2ParseError;
157 157
158 158 impl From<DirstateV2ParseError> for HgError {
159 159 fn from(_: DirstateV2ParseError) -> Self {
160 160 HgError::corrupted("dirstate-v2 parse error")
161 161 }
162 162 }
163 163
164 164 impl From<DirstateV2ParseError> for crate::DirstateError {
165 165 fn from(error: DirstateV2ParseError) -> Self {
166 166 HgError::from(error).into()
167 167 }
168 168 }
169 169
170 fn read_header(on_disk: &[u8]) -> Result<&Header, DirstateV2ParseError> {
171 let (header, _) =
172 Header::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
173 if header.marker == *V2_FORMAT_MARKER {
174 Ok(header)
175 } else {
176 Err(DirstateV2ParseError)
177 }
178 }
179
170 180 pub(super) fn read<'on_disk>(
171 181 on_disk: &'on_disk [u8],
172 182 ) -> Result<
173 183 (DirstateMap<'on_disk>, Option<DirstateParents>),
174 184 DirstateV2ParseError,
175 185 > {
176 186 if on_disk.is_empty() {
177 187 return Ok((DirstateMap::empty(on_disk), None));
178 188 }
179 let (header, _) =
180 Header::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
181 let Header {
182 marker,
183 parents,
184 root,
185 nodes_with_entry_count,
186 nodes_with_copy_source_count,
187 } = header;
188 if marker != V2_FORMAT_MARKER {
189 return Err(DirstateV2ParseError);
190 }
189 let header = read_header(on_disk)?;
191 190 let dirstate_map = DirstateMap {
192 191 on_disk,
193 192 root: dirstate_map::ChildNodes::OnDisk(read_slice::<Node>(
194 on_disk, *root,
193 on_disk,
194 header.root,
195 195 )?),
196 nodes_with_entry_count: nodes_with_entry_count.get(),
197 nodes_with_copy_source_count: nodes_with_copy_source_count.get(),
196 nodes_with_entry_count: header.nodes_with_entry_count.get(),
197 nodes_with_copy_source_count: header
198 .nodes_with_copy_source_count
199 .get(),
198 200 };
199 let parents = Some(parents.clone());
201 let parents = Some(header.parents.clone());
200 202 Ok((dirstate_map, parents))
201 203 }
202 204
203 205 impl Node {
204 206 pub(super) fn full_path<'on_disk>(
205 207 &self,
206 208 on_disk: &'on_disk [u8],
207 209 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
208 210 read_hg_path(on_disk, self.full_path)
209 211 }
210 212
211 213 pub(super) fn base_name_start<'on_disk>(
212 214 &self,
213 215 ) -> Result<usize, DirstateV2ParseError> {
214 216 let start = self.base_name_start.get();
215 217 if start < self.full_path.len.get() {
216 218 let start = usize::try_from(start)
217 219 // u32 -> usize, could only panic on a 16-bit CPU
218 220 .expect("dirstate-v2 base_name_start out of bounds");
219 221 Ok(start)
220 222 } else {
221 223 Err(DirstateV2ParseError)
222 224 }
223 225 }
224 226
225 227 pub(super) fn base_name<'on_disk>(
226 228 &self,
227 229 on_disk: &'on_disk [u8],
228 230 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
229 231 let full_path = self.full_path(on_disk)?;
230 232 let base_name_start = self.base_name_start()?;
231 233 Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
232 234 }
233 235
234 236 pub(super) fn path<'on_disk>(
235 237 &self,
236 238 on_disk: &'on_disk [u8],
237 239 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
238 240 Ok(WithBasename::from_raw_parts(
239 241 Cow::Borrowed(self.full_path(on_disk)?),
240 242 self.base_name_start()?,
241 243 ))
242 244 }
243 245
244 246 pub(super) fn has_copy_source<'on_disk>(&self) -> bool {
245 247 self.copy_source.start.get() != 0
246 248 }
247 249
248 250 pub(super) fn copy_source<'on_disk>(
249 251 &self,
250 252 on_disk: &'on_disk [u8],
251 253 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
252 254 Ok(if self.has_copy_source() {
253 255 Some(read_hg_path(on_disk, self.copy_source)?)
254 256 } else {
255 257 None
256 258 })
257 259 }
258 260
259 261 pub(super) fn node_data(
260 262 &self,
261 263 ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
262 264 let entry = |state| {
263 265 dirstate_map::NodeData::Entry(self.entry_with_given_state(state))
264 266 };
265 267
266 268 match self.state {
267 269 b'\0' => Ok(dirstate_map::NodeData::None),
268 270 b'd' => Ok(dirstate_map::NodeData::CachedDirectory {
269 271 mtime: *self.data.as_timestamp(),
270 272 }),
271 273 b'n' => Ok(entry(EntryState::Normal)),
272 274 b'a' => Ok(entry(EntryState::Added)),
273 275 b'r' => Ok(entry(EntryState::Removed)),
274 276 b'm' => Ok(entry(EntryState::Merged)),
275 277 _ => Err(DirstateV2ParseError),
276 278 }
277 279 }
278 280
279 281 pub(super) fn cached_directory_mtime(&self) -> Option<&Timestamp> {
280 282 if self.state == b'd' {
281 283 Some(self.data.as_timestamp())
282 284 } else {
283 285 None
284 286 }
285 287 }
286 288
287 289 pub(super) fn state(
288 290 &self,
289 291 ) -> Result<Option<EntryState>, DirstateV2ParseError> {
290 292 match self.state {
291 293 b'\0' | b'd' => Ok(None),
292 294 b'n' => Ok(Some(EntryState::Normal)),
293 295 b'a' => Ok(Some(EntryState::Added)),
294 296 b'r' => Ok(Some(EntryState::Removed)),
295 297 b'm' => Ok(Some(EntryState::Merged)),
296 298 _ => Err(DirstateV2ParseError),
297 299 }
298 300 }
299 301
300 302 fn entry_with_given_state(&self, state: EntryState) -> DirstateEntry {
301 303 DirstateEntry {
302 304 state,
303 305 mode: self.data.mode.get(),
304 306 mtime: self.data.mtime.get(),
305 307 size: self.data.size.get(),
306 308 }
307 309 }
308 310
309 311 pub(super) fn entry(
310 312 &self,
311 313 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
312 314 Ok(self
313 315 .state()?
314 316 .map(|state| self.entry_with_given_state(state)))
315 317 }
316 318
317 319 pub(super) fn children<'on_disk>(
318 320 &self,
319 321 on_disk: &'on_disk [u8],
320 322 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
321 323 read_slice::<Node>(on_disk, self.children)
322 324 }
323 325
324 326 pub(super) fn to_in_memory_node<'on_disk>(
325 327 &self,
326 328 on_disk: &'on_disk [u8],
327 329 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
328 330 Ok(dirstate_map::Node {
329 331 children: dirstate_map::ChildNodes::OnDisk(
330 332 self.children(on_disk)?,
331 333 ),
332 334 copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
333 335 data: self.node_data()?,
334 336 tracked_descendants_count: self.tracked_descendants_count.get(),
335 337 })
336 338 }
337 339 }
338 340
339 341 impl Entry {
340 342 fn from_timestamp(timestamp: Timestamp) -> Self {
341 343 // Safety: both types implement the `ByteCast` trait, so we could
342 344 // safely use `as_bytes` and `from_bytes` to do this conversion. Using
343 345 // `transmute` instead makes the compiler check that the two types
344 346 // have the same size, which eliminates the error case of
345 347 // `from_bytes`.
346 348 unsafe { std::mem::transmute::<Timestamp, Entry>(timestamp) }
347 349 }
348 350
349 351 fn as_timestamp(&self) -> &Timestamp {
350 352 // Safety: same as above in `from_timestamp`
351 353 unsafe { &*(self as *const Entry as *const Timestamp) }
352 354 }
353 355 }
354 356
355 357 impl Timestamp {
356 358 pub fn seconds(&self) -> i64 {
357 359 self.seconds.get()
358 360 }
359 361 }
360 362
361 363 impl From<SystemTime> for Timestamp {
362 364 fn from(system_time: SystemTime) -> Self {
363 365 let (secs, nanos) = match system_time.duration_since(UNIX_EPOCH) {
364 366 Ok(duration) => {
365 367 (duration.as_secs() as i64, duration.subsec_nanos())
366 368 }
367 369 Err(error) => {
368 370 let negative = error.duration();
369 371 (-(negative.as_secs() as i64), negative.subsec_nanos())
370 372 }
371 373 };
372 374 Timestamp {
373 375 seconds: secs.into(),
374 376 nanoseconds: nanos.into(),
375 377 }
376 378 }
377 379 }
378 380
379 381 impl From<&'_ Timestamp> for SystemTime {
380 382 fn from(timestamp: &'_ Timestamp) -> Self {
381 383 let secs = timestamp.seconds.get();
382 384 let nanos = timestamp.nanoseconds.get();
383 385 if secs >= 0 {
384 386 UNIX_EPOCH + Duration::new(secs as u64, nanos)
385 387 } else {
386 388 UNIX_EPOCH - Duration::new((-secs) as u64, nanos)
387 389 }
388 390 }
389 391 }
390 392
391 393 fn read_hg_path(
392 394 on_disk: &[u8],
393 395 slice: Slice,
394 396 ) -> Result<&HgPath, DirstateV2ParseError> {
395 397 let bytes = read_slice::<u8>(on_disk, slice)?;
396 398 Ok(HgPath::new(bytes))
397 399 }
398 400
399 401 fn read_slice<T>(
400 402 on_disk: &[u8],
401 403 slice: Slice,
402 404 ) -> Result<&[T], DirstateV2ParseError>
403 405 where
404 406 T: BytesCast,
405 407 {
406 408 // Either `usize::MAX` would result in "out of bounds" error since a single
407 409 // `&[u8]` cannot occupy the entire addess space.
408 410 let start = usize::try_from(slice.start.get()).unwrap_or(std::usize::MAX);
409 411 let len = usize::try_from(slice.len.get()).unwrap_or(std::usize::MAX);
410 412 on_disk
411 413 .get(start..)
412 414 .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
413 415 .map(|(slice, _rest)| slice)
414 416 .ok_or_else(|| DirstateV2ParseError)
415 417 }
416 418
419 pub(crate) fn parse_dirstate_parents(
420 on_disk: &[u8],
421 ) -> Result<&DirstateParents, HgError> {
422 Ok(&read_header(on_disk)?.parents)
423 }
424
425 pub(crate) fn for_each_tracked_path<'on_disk>(
426 on_disk: &'on_disk [u8],
427 mut f: impl FnMut(&'on_disk HgPath),
428 ) -> Result<(), DirstateV2ParseError> {
429 let header = read_header(on_disk)?;
430 fn recur<'on_disk>(
431 on_disk: &'on_disk [u8],
432 nodes: Slice,
433 f: &mut impl FnMut(&'on_disk HgPath),
434 ) -> Result<(), DirstateV2ParseError> {
435 for node in read_slice::<Node>(on_disk, nodes)? {
436 if let Some(state) = node.state()? {
437 if state.is_tracked() {
438 f(node.full_path(on_disk)?)
439 }
440 }
441 recur(on_disk, node.children, f)?
442 }
443 Ok(())
444 }
445 recur(on_disk, header.root, &mut f)
446 }
447
417 448 pub(super) fn write(
418 449 dirstate_map: &mut DirstateMap,
419 450 parents: DirstateParents,
420 451 ) -> Result<Vec<u8>, DirstateError> {
421 452 let header_len = std::mem::size_of::<Header>();
422 453
423 454 // This ignores the space for paths, and for nodes without an entry.
424 455 // TODO: better estimate? Skip the `Vec` and write to a file directly?
425 456 let size_guess = header_len
426 457 + std::mem::size_of::<Node>()
427 458 * dirstate_map.nodes_with_entry_count as usize;
428 459 let mut out = Vec::with_capacity(size_guess);
429 460
430 461 // Keep space for the header. We’ll fill it out at the end when we know the
431 462 // actual offset for the root nodes.
432 463 out.resize(header_len, 0_u8);
433 464
434 465 let root =
435 466 write_nodes(dirstate_map, dirstate_map.root.as_ref(), &mut out)?;
436 467
437 468 let header = Header {
438 469 marker: *V2_FORMAT_MARKER,
439 470 parents: parents,
440 471 root,
441 472 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
442 473 nodes_with_copy_source_count: dirstate_map
443 474 .nodes_with_copy_source_count
444 475 .into(),
445 476 };
446 477 out[..header_len].copy_from_slice(header.as_bytes());
447 478 Ok(out)
448 479 }
449 480
450 481 fn write_nodes(
451 482 dirstate_map: &DirstateMap,
452 483 nodes: dirstate_map::ChildNodesRef,
453 484 out: &mut Vec<u8>,
454 485 ) -> Result<ChildNodes, DirstateError> {
455 486 // `dirstate_map::ChildNodes` is a `HashMap` with undefined iteration
456 487 // order. Sort to enable binary search in the written file.
457 488 let nodes = nodes.sorted();
458 489
459 490 // First accumulate serialized nodes in a `Vec`
460 491 let mut on_disk_nodes = Vec::with_capacity(nodes.len());
461 492 for node in nodes {
462 493 let children = write_nodes(
463 494 dirstate_map,
464 495 node.children(dirstate_map.on_disk)?,
465 496 out,
466 497 )?;
467 498 let full_path = node.full_path(dirstate_map.on_disk)?;
468 499 let full_path = write_slice::<u8>(full_path.as_bytes(), out);
469 500 let copy_source =
470 501 if let Some(source) = node.copy_source(dirstate_map.on_disk)? {
471 502 write_slice::<u8>(source.as_bytes(), out)
472 503 } else {
473 504 Slice {
474 505 start: 0.into(),
475 506 len: 0.into(),
476 507 }
477 508 };
478 509 on_disk_nodes.push(match node {
479 510 NodeRef::InMemory(path, node) => {
480 511 let (state, data) = match &node.data {
481 512 dirstate_map::NodeData::Entry(entry) => (
482 513 entry.state.into(),
483 514 Entry {
484 515 mode: entry.mode.into(),
485 516 mtime: entry.mtime.into(),
486 517 size: entry.size.into(),
487 518 },
488 519 ),
489 520 dirstate_map::NodeData::CachedDirectory { mtime } => {
490 521 (b'd', Entry::from_timestamp(*mtime))
491 522 }
492 523 dirstate_map::NodeData::None => (
493 524 b'\0',
494 525 Entry {
495 526 mode: 0.into(),
496 527 mtime: 0.into(),
497 528 size: 0.into(),
498 529 },
499 530 ),
500 531 };
501 532 Node {
502 533 children,
503 534 copy_source,
504 535 full_path,
505 536 base_name_start: u32::try_from(path.base_name_start())
506 537 // Could only panic for paths over 4 GiB
507 538 .expect("dirstate-v2 offset overflow")
508 539 .into(),
509 540 tracked_descendants_count: node
510 541 .tracked_descendants_count
511 542 .into(),
512 543 state,
513 544 data,
514 545 }
515 546 }
516 547 NodeRef::OnDisk(node) => Node {
517 548 children,
518 549 copy_source,
519 550 full_path,
520 551 ..*node
521 552 },
522 553 })
523 554 }
524 555 // … so we can write them contiguously
525 556 Ok(write_slice::<Node>(&on_disk_nodes, out))
526 557 }
527 558
528 559 fn write_slice<T>(slice: &[T], out: &mut Vec<u8>) -> Slice
529 560 where
530 561 T: BytesCast,
531 562 {
532 563 let start = u64::try_from(out.len())
533 564 // Could only panic on a 128-bit CPU with a dirstate over 16 EiB
534 565 .expect("dirstate-v2 offset overflow")
535 566 .into();
536 567 let len = u32::try_from(slice.len())
537 568 // Could only panic for paths over 4 GiB or nodes with over 4 billions
538 569 // child nodes
539 570 .expect("dirstate-v2 offset overflow")
540 571 .into();
541 572 out.extend(slice.as_bytes());
542 573 Slice { start, len }
543 574 }
@@ -1,68 +1,79 b''
1 1 // list_tracked_files.rs
2 2 //
3 3 // Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 use crate::dirstate::parsers::parse_dirstate_entries;
9 use crate::dirstate_tree::on_disk::for_each_tracked_path;
9 10 use crate::errors::HgError;
10 11 use crate::repo::Repo;
11 12 use crate::revlog::changelog::Changelog;
12 13 use crate::revlog::manifest::{Manifest, ManifestEntry};
13 14 use crate::revlog::node::Node;
14 15 use crate::revlog::revlog::RevlogError;
15 16 use crate::utils::hg_path::HgPath;
17 use crate::DirstateError;
16 18 use rayon::prelude::*;
17 19
18 20 /// List files under Mercurial control in the working directory
19 21 /// by reading the dirstate
20 22 pub struct Dirstate {
21 23 /// The `dirstate` content.
22 24 content: Vec<u8>,
25 dirstate_v2: bool,
23 26 }
24 27
25 28 impl Dirstate {
26 29 pub fn new(repo: &Repo) -> Result<Self, HgError> {
27 let content = repo.hg_vfs().read("dirstate")?;
28 Ok(Self { content })
30 Ok(Self {
31 content: repo.hg_vfs().read("dirstate")?,
32 dirstate_v2: repo.has_dirstate_v2(),
33 })
29 34 }
30 35
31 pub fn tracked_files(&self) -> Result<Vec<&HgPath>, HgError> {
36 pub fn tracked_files(&self) -> Result<Vec<&HgPath>, DirstateError> {
32 37 let mut files = Vec::new();
33 let _parents = parse_dirstate_entries(
34 &self.content,
35 |path, entry, _copy_source| {
36 if entry.state.is_tracked() {
37 files.push(path)
38 }
39 Ok(())
40 },
41 )?;
38 if !self.content.is_empty() {
39 if self.dirstate_v2 {
40 for_each_tracked_path(&self.content, |path| files.push(path))?
41 } else {
42 let _parents = parse_dirstate_entries(
43 &self.content,
44 |path, entry, _copy_source| {
45 if entry.state.is_tracked() {
46 files.push(path)
47 }
48 Ok(())
49 },
50 )?;
51 }
52 }
42 53 files.par_sort_unstable();
43 54 Ok(files)
44 55 }
45 56 }
46 57
47 58 /// List files under Mercurial control at a given revision.
48 59 pub fn list_rev_tracked_files(
49 60 repo: &Repo,
50 61 revset: &str,
51 62 ) -> Result<FilesForRev, RevlogError> {
52 63 let rev = crate::revset::resolve_single(revset, repo)?;
53 64 let changelog = Changelog::open(repo)?;
54 65 let manifest = Manifest::open(repo)?;
55 66 let changelog_entry = changelog.get_rev(rev)?;
56 67 let manifest_node =
57 68 Node::from_hex_for_repo(&changelog_entry.manifest_node()?)?;
58 69 let manifest_entry = manifest.get_node(manifest_node.into())?;
59 70 Ok(FilesForRev(manifest_entry))
60 71 }
61 72
62 73 pub struct FilesForRev(ManifestEntry);
63 74
64 75 impl FilesForRev {
65 76 pub fn iter(&self) -> impl Iterator<Item = &HgPath> {
66 77 self.0.files()
67 78 }
68 79 }
@@ -1,266 +1,277 b''
1 1 use crate::config::{Config, ConfigError, ConfigParseError};
2 2 use crate::errors::{HgError, IoErrorContext, IoResultExt};
3 3 use crate::requirements;
4 4 use crate::utils::files::get_path_from_bytes;
5 5 use crate::utils::SliceExt;
6 6 use memmap::{Mmap, MmapOptions};
7 7 use std::collections::HashSet;
8 8 use std::path::{Path, PathBuf};
9 9
10 10 /// A repository on disk
11 11 pub struct Repo {
12 12 working_directory: PathBuf,
13 13 dot_hg: PathBuf,
14 14 store: PathBuf,
15 15 requirements: HashSet<String>,
16 16 config: Config,
17 17 }
18 18
19 19 #[derive(Debug, derive_more::From)]
20 20 pub enum RepoError {
21 21 NotFound {
22 22 at: PathBuf,
23 23 },
24 24 #[from]
25 25 ConfigParseError(ConfigParseError),
26 26 #[from]
27 27 Other(HgError),
28 28 }
29 29
30 30 impl From<ConfigError> for RepoError {
31 31 fn from(error: ConfigError) -> Self {
32 32 match error {
33 33 ConfigError::Parse(error) => error.into(),
34 34 ConfigError::Other(error) => error.into(),
35 35 }
36 36 }
37 37 }
38 38
39 39 /// Filesystem access abstraction for the contents of a given "base" diretory
40 40 #[derive(Clone, Copy)]
41 41 pub struct Vfs<'a> {
42 42 pub(crate) base: &'a Path,
43 43 }
44 44
45 45 impl Repo {
46 46 /// Find a repository, either at the given path (which must contain a `.hg`
47 47 /// sub-directory) or by searching the current directory and its
48 48 /// ancestors.
49 49 ///
50 50 /// A method with two very different "modes" like this usually a code smell
51 51 /// to make two methods instead, but in this case an `Option` is what rhg
52 52 /// sub-commands get from Clap for the `-R` / `--repository` CLI argument.
53 53 /// Having two methods would just move that `if` to almost all callers.
54 54 pub fn find(
55 55 config: &Config,
56 56 explicit_path: Option<&Path>,
57 57 ) -> Result<Self, RepoError> {
58 58 if let Some(root) = explicit_path {
59 59 if root.join(".hg").is_dir() {
60 60 Self::new_at_path(root.to_owned(), config)
61 61 } else if root.is_file() {
62 62 Err(HgError::unsupported("bundle repository").into())
63 63 } else {
64 64 Err(RepoError::NotFound {
65 65 at: root.to_owned(),
66 66 })
67 67 }
68 68 } else {
69 69 let current_directory = crate::utils::current_dir()?;
70 70 // ancestors() is inclusive: it first yields `current_directory`
71 71 // as-is.
72 72 for ancestor in current_directory.ancestors() {
73 73 if ancestor.join(".hg").is_dir() {
74 74 return Self::new_at_path(ancestor.to_owned(), config);
75 75 }
76 76 }
77 77 Err(RepoError::NotFound {
78 78 at: current_directory,
79 79 })
80 80 }
81 81 }
82 82
83 83 /// To be called after checking that `.hg` is a sub-directory
84 84 fn new_at_path(
85 85 working_directory: PathBuf,
86 86 config: &Config,
87 87 ) -> Result<Self, RepoError> {
88 88 let dot_hg = working_directory.join(".hg");
89 89
90 90 let mut repo_config_files = Vec::new();
91 91 repo_config_files.push(dot_hg.join("hgrc"));
92 92 repo_config_files.push(dot_hg.join("hgrc-not-shared"));
93 93
94 94 let hg_vfs = Vfs { base: &dot_hg };
95 95 let mut reqs = requirements::load_if_exists(hg_vfs)?;
96 96 let relative =
97 97 reqs.contains(requirements::RELATIVE_SHARED_REQUIREMENT);
98 98 let shared =
99 99 reqs.contains(requirements::SHARED_REQUIREMENT) || relative;
100 100
101 101 // From `mercurial/localrepo.py`:
102 102 //
103 103 // if .hg/requires contains the sharesafe requirement, it means
104 104 // there exists a `.hg/store/requires` too and we should read it
105 105 // NOTE: presence of SHARESAFE_REQUIREMENT imply that store requirement
106 106 // is present. We never write SHARESAFE_REQUIREMENT for a repo if store
107 107 // is not present, refer checkrequirementscompat() for that
108 108 //
109 109 // However, if SHARESAFE_REQUIREMENT is not present, it means that the
110 110 // repository was shared the old way. We check the share source
111 111 // .hg/requires for SHARESAFE_REQUIREMENT to detect whether the
112 112 // current repository needs to be reshared
113 113 let share_safe = reqs.contains(requirements::SHARESAFE_REQUIREMENT);
114 114
115 115 let store_path;
116 116 if !shared {
117 117 store_path = dot_hg.join("store");
118 118 } else {
119 119 let bytes = hg_vfs.read("sharedpath")?;
120 120 let mut shared_path =
121 121 get_path_from_bytes(bytes.trim_end_newlines()).to_owned();
122 122 if relative {
123 123 shared_path = dot_hg.join(shared_path)
124 124 }
125 125 if !shared_path.is_dir() {
126 126 return Err(HgError::corrupted(format!(
127 127 ".hg/sharedpath points to nonexistent directory {}",
128 128 shared_path.display()
129 129 ))
130 130 .into());
131 131 }
132 132
133 133 store_path = shared_path.join("store");
134 134
135 135 let source_is_share_safe =
136 136 requirements::load(Vfs { base: &shared_path })?
137 137 .contains(requirements::SHARESAFE_REQUIREMENT);
138 138
139 139 if share_safe && !source_is_share_safe {
140 140 return Err(match config
141 141 .get(b"share", b"safe-mismatch.source-not-safe")
142 142 {
143 143 Some(b"abort") | None => HgError::abort(
144 144 "abort: share source does not support share-safe requirement\n\
145 145 (see `hg help config.format.use-share-safe` for more information)",
146 146 ),
147 147 _ => HgError::unsupported("share-safe downgrade"),
148 148 }
149 149 .into());
150 150 } else if source_is_share_safe && !share_safe {
151 151 return Err(
152 152 match config.get(b"share", b"safe-mismatch.source-safe") {
153 153 Some(b"abort") | None => HgError::abort(
154 154 "abort: version mismatch: source uses share-safe \
155 155 functionality while the current share does not\n\
156 156 (see `hg help config.format.use-share-safe` for more information)",
157 157 ),
158 158 _ => HgError::unsupported("share-safe upgrade"),
159 159 }
160 160 .into(),
161 161 );
162 162 }
163 163
164 164 if share_safe {
165 165 repo_config_files.insert(0, shared_path.join("hgrc"))
166 166 }
167 167 }
168 168 if share_safe {
169 169 reqs.extend(requirements::load(Vfs { base: &store_path })?);
170 170 }
171 171
172 172 let repo_config = if std::env::var_os("HGRCSKIPREPO").is_none() {
173 173 config.combine_with_repo(&repo_config_files)?
174 174 } else {
175 175 config.clone()
176 176 };
177 177
178 178 let repo = Self {
179 179 requirements: reqs,
180 180 working_directory,
181 181 store: store_path,
182 182 dot_hg,
183 183 config: repo_config,
184 184 };
185 185
186 186 requirements::check(&repo)?;
187 187
188 188 Ok(repo)
189 189 }
190 190
191 191 pub fn working_directory_path(&self) -> &Path {
192 192 &self.working_directory
193 193 }
194 194
195 195 pub fn requirements(&self) -> &HashSet<String> {
196 196 &self.requirements
197 197 }
198 198
199 199 pub fn config(&self) -> &Config {
200 200 &self.config
201 201 }
202 202
203 203 /// For accessing repository files (in `.hg`), except for the store
204 204 /// (`.hg/store`).
205 205 pub fn hg_vfs(&self) -> Vfs<'_> {
206 206 Vfs { base: &self.dot_hg }
207 207 }
208 208
209 209 /// For accessing repository store files (in `.hg/store`)
210 210 pub fn store_vfs(&self) -> Vfs<'_> {
211 211 Vfs { base: &self.store }
212 212 }
213 213
214 214 /// For accessing the working copy
215 215 pub fn working_directory_vfs(&self) -> Vfs<'_> {
216 216 Vfs {
217 217 base: &self.working_directory,
218 218 }
219 219 }
220 220
221 pub fn has_dirstate_v2(&self) -> bool {
222 self.requirements
223 .contains(requirements::DIRSTATE_V2_REQUIREMENT)
224 }
225
221 226 pub fn dirstate_parents(
222 227 &self,
223 228 ) -> Result<crate::dirstate::DirstateParents, HgError> {
224 229 let dirstate = self.hg_vfs().mmap_open("dirstate")?;
225 let parents =
226 crate::dirstate::parsers::parse_dirstate_parents(&dirstate)?;
230 if dirstate.is_empty() {
231 return Ok(crate::dirstate::DirstateParents::NULL);
232 }
233 let parents = if self.has_dirstate_v2() {
234 crate::dirstate_tree::on_disk::parse_dirstate_parents(&dirstate)?
235 } else {
236 crate::dirstate::parsers::parse_dirstate_parents(&dirstate)?
237 };
227 238 Ok(parents.clone())
228 239 }
229 240 }
230 241
231 242 impl Vfs<'_> {
232 243 pub fn join(&self, relative_path: impl AsRef<Path>) -> PathBuf {
233 244 self.base.join(relative_path)
234 245 }
235 246
236 247 pub fn read(
237 248 &self,
238 249 relative_path: impl AsRef<Path>,
239 250 ) -> Result<Vec<u8>, HgError> {
240 251 let path = self.join(relative_path);
241 252 std::fs::read(&path).when_reading_file(&path)
242 253 }
243 254
244 255 pub fn mmap_open(
245 256 &self,
246 257 relative_path: impl AsRef<Path>,
247 258 ) -> Result<Mmap, HgError> {
248 259 let path = self.base.join(relative_path);
249 260 let file = std::fs::File::open(&path).when_reading_file(&path)?;
250 261 // TODO: what are the safety requirements here?
251 262 let mmap = unsafe { MmapOptions::new().map(&file) }
252 263 .when_reading_file(&path)?;
253 264 Ok(mmap)
254 265 }
255 266
256 267 pub fn rename(
257 268 &self,
258 269 relative_from: impl AsRef<Path>,
259 270 relative_to: impl AsRef<Path>,
260 271 ) -> Result<(), HgError> {
261 272 let from = self.join(relative_from);
262 273 let to = self.join(relative_to);
263 274 std::fs::rename(&from, &to)
264 275 .with_context(|| IoErrorContext::RenamingFile { from, to })
265 276 }
266 277 }
@@ -1,153 +1,156 b''
1 1 use crate::errors::{HgError, HgResultExt};
2 2 use crate::repo::{Repo, Vfs};
3 3 use crate::utils::join_display;
4 4 use std::collections::HashSet;
5 5
6 6 fn parse(bytes: &[u8]) -> Result<HashSet<String>, HgError> {
7 7 // The Python code reading this file uses `str.splitlines`
8 8 // which looks for a number of line separators (even including a couple of
9 9 // non-ASCII ones), but Python code writing it always uses `\n`.
10 10 let lines = bytes.split(|&byte| byte == b'\n');
11 11
12 12 lines
13 13 .filter(|line| !line.is_empty())
14 14 .map(|line| {
15 15 // Python uses Unicode `str.isalnum` but feature names are all
16 16 // ASCII
17 17 if line[0].is_ascii_alphanumeric() && line.is_ascii() {
18 18 Ok(String::from_utf8(line.into()).unwrap())
19 19 } else {
20 20 Err(HgError::corrupted("parse error in 'requires' file"))
21 21 }
22 22 })
23 23 .collect()
24 24 }
25 25
26 26 pub(crate) fn load(hg_vfs: Vfs) -> Result<HashSet<String>, HgError> {
27 27 parse(&hg_vfs.read("requires")?)
28 28 }
29 29
30 30 pub(crate) fn load_if_exists(hg_vfs: Vfs) -> Result<HashSet<String>, HgError> {
31 31 if let Some(bytes) = hg_vfs.read("requires").io_not_found_as_none()? {
32 32 parse(&bytes)
33 33 } else {
34 34 // Treat a missing file the same as an empty file.
35 35 // From `mercurial/localrepo.py`:
36 36 // > requires file contains a newline-delimited list of
37 37 // > features/capabilities the opener (us) must have in order to use
38 38 // > the repository. This file was introduced in Mercurial 0.9.2,
39 39 // > which means very old repositories may not have one. We assume
40 40 // > a missing file translates to no requirements.
41 41 Ok(HashSet::new())
42 42 }
43 43 }
44 44
45 45 pub(crate) fn check(repo: &Repo) -> Result<(), HgError> {
46 46 let unknown: Vec<_> = repo
47 47 .requirements()
48 48 .iter()
49 49 .map(String::as_str)
50 50 // .filter(|feature| !ALL_SUPPORTED.contains(feature.as_str()))
51 51 .filter(|feature| {
52 52 !REQUIRED.contains(feature) && !SUPPORTED.contains(feature)
53 53 })
54 54 .collect();
55 55 if !unknown.is_empty() {
56 56 return Err(HgError::unsupported(format!(
57 57 "repository requires feature unknown to this Mercurial: {}",
58 58 join_display(&unknown, ", ")
59 59 )));
60 60 }
61 61 let missing: Vec<_> = REQUIRED
62 62 .iter()
63 63 .filter(|&&feature| !repo.requirements().contains(feature))
64 64 .collect();
65 65 if !missing.is_empty() {
66 66 return Err(HgError::unsupported(format!(
67 67 "repository is missing feature required by this Mercurial: {}",
68 68 join_display(&missing, ", ")
69 69 )));
70 70 }
71 71 Ok(())
72 72 }
73 73
74 74 /// rhg does not support repositories that are *missing* any of these features
75 75 const REQUIRED: &[&str] = &["revlogv1", "store", "fncache", "dotencode"];
76 76
77 77 /// rhg supports repository with or without these
78 78 const SUPPORTED: &[&str] = &[
79 79 "generaldelta",
80 80 SHARED_REQUIREMENT,
81 81 SHARESAFE_REQUIREMENT,
82 82 SPARSEREVLOG_REQUIREMENT,
83 83 RELATIVE_SHARED_REQUIREMENT,
84 84 REVLOG_COMPRESSION_ZSTD,
85 DIRSTATE_V2_REQUIREMENT,
85 86 // As of this writing everything rhg does is read-only.
86 87 // When it starts writing to the repository, it’ll need to either keep the
87 88 // persistent nodemap up to date or remove this entry:
88 89 NODEMAP_REQUIREMENT,
89 90 ];
90 91
91 92 // Copied from mercurial/requirements.py:
92 93
94 pub(crate) const DIRSTATE_V2_REQUIREMENT: &str = "exp-dirstate-v2";
95
93 96 /// When narrowing is finalized and no longer subject to format changes,
94 97 /// we should move this to just "narrow" or similar.
95 98 #[allow(unused)]
96 99 pub(crate) const NARROW_REQUIREMENT: &str = "narrowhg-experimental";
97 100
98 101 /// Enables sparse working directory usage
99 102 #[allow(unused)]
100 103 pub(crate) const SPARSE_REQUIREMENT: &str = "exp-sparse";
101 104
102 105 /// Enables the internal phase which is used to hide changesets instead
103 106 /// of stripping them
104 107 #[allow(unused)]
105 108 pub(crate) const INTERNAL_PHASE_REQUIREMENT: &str = "internal-phase";
106 109
107 110 /// Stores manifest in Tree structure
108 111 #[allow(unused)]
109 112 pub(crate) const TREEMANIFEST_REQUIREMENT: &str = "treemanifest";
110 113
111 114 /// Increment the sub-version when the revlog v2 format changes to lock out old
112 115 /// clients.
113 116 #[allow(unused)]
114 117 pub(crate) const REVLOGV2_REQUIREMENT: &str = "exp-revlogv2.1";
115 118
116 119 /// A repository with the sparserevlog feature will have delta chains that
117 120 /// can spread over a larger span. Sparse reading cuts these large spans into
118 121 /// pieces, so that each piece isn't too big.
119 122 /// Without the sparserevlog capability, reading from the repository could use
120 123 /// huge amounts of memory, because the whole span would be read at once,
121 124 /// including all the intermediate revisions that aren't pertinent for the
122 125 /// chain. This is why once a repository has enabled sparse-read, it becomes
123 126 /// required.
124 127 #[allow(unused)]
125 128 pub(crate) const SPARSEREVLOG_REQUIREMENT: &str = "sparserevlog";
126 129
127 130 /// A repository with the the copies-sidedata-changeset requirement will store
128 131 /// copies related information in changeset's sidedata.
129 132 #[allow(unused)]
130 133 pub(crate) const COPIESSDC_REQUIREMENT: &str = "exp-copies-sidedata-changeset";
131 134
132 135 /// The repository use persistent nodemap for the changelog and the manifest.
133 136 #[allow(unused)]
134 137 pub(crate) const NODEMAP_REQUIREMENT: &str = "persistent-nodemap";
135 138
136 139 /// Denotes that the current repository is a share
137 140 #[allow(unused)]
138 141 pub(crate) const SHARED_REQUIREMENT: &str = "shared";
139 142
140 143 /// Denotes that current repository is a share and the shared source path is
141 144 /// relative to the current repository root path
142 145 #[allow(unused)]
143 146 pub(crate) const RELATIVE_SHARED_REQUIREMENT: &str = "relshared";
144 147
145 148 /// A repository with share implemented safely. The repository has different
146 149 /// store and working copy requirements i.e. both `.hg/requires` and
147 150 /// `.hg/store/requires` are present.
148 151 #[allow(unused)]
149 152 pub(crate) const SHARESAFE_REQUIREMENT: &str = "share-safe";
150 153
151 154 /// A repository that use zstd compression inside its revlog
152 155 #[allow(unused)]
153 156 pub(crate) const REVLOG_COMPRESSION_ZSTD: &str = "revlog-compression-zstd";
@@ -1,318 +1,322 b''
1 1 // status.rs
2 2 //
3 3 // Copyright 2020, Georges Racinet <georges.racinets@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 use crate::error::CommandError;
9 9 use crate::ui::Ui;
10 10 use clap::{Arg, SubCommand};
11 11 use hg;
12 use hg::dirstate_tree::dirstate_map::DirstateMap;
12 13 use hg::errors::HgResultExt;
13 14 use hg::errors::IoResultExt;
14 15 use hg::matchers::AlwaysMatcher;
15 16 use hg::operations::cat;
16 17 use hg::repo::Repo;
17 18 use hg::revlog::node::Node;
18 19 use hg::utils::hg_path::{hg_path_to_os_string, HgPath};
19 use hg::{DirstateMap, StatusError};
20 use hg::StatusError;
20 21 use hg::{HgPathCow, StatusOptions};
21 22 use log::{info, warn};
22 23 use std::convert::TryInto;
23 24 use std::fs;
24 25 use std::io::BufReader;
25 26 use std::io::Read;
26 27
27 28 pub const HELP_TEXT: &str = "
28 29 Show changed files in the working directory
29 30
30 31 This is a pure Rust version of `hg status`.
31 32
32 33 Some options might be missing, check the list below.
33 34 ";
34 35
35 36 pub fn args() -> clap::App<'static, 'static> {
36 37 SubCommand::with_name("status")
37 38 .alias("st")
38 39 .about(HELP_TEXT)
39 40 .arg(
40 41 Arg::with_name("all")
41 42 .help("show status of all files")
42 43 .short("-A")
43 44 .long("--all"),
44 45 )
45 46 .arg(
46 47 Arg::with_name("modified")
47 48 .help("show only modified files")
48 49 .short("-m")
49 50 .long("--modified"),
50 51 )
51 52 .arg(
52 53 Arg::with_name("added")
53 54 .help("show only added files")
54 55 .short("-a")
55 56 .long("--added"),
56 57 )
57 58 .arg(
58 59 Arg::with_name("removed")
59 60 .help("show only removed files")
60 61 .short("-r")
61 62 .long("--removed"),
62 63 )
63 64 .arg(
64 65 Arg::with_name("clean")
65 66 .help("show only clean files")
66 67 .short("-c")
67 68 .long("--clean"),
68 69 )
69 70 .arg(
70 71 Arg::with_name("deleted")
71 72 .help("show only deleted files")
72 73 .short("-d")
73 74 .long("--deleted"),
74 75 )
75 76 .arg(
76 77 Arg::with_name("unknown")
77 78 .help("show only unknown (not tracked) files")
78 79 .short("-u")
79 80 .long("--unknown"),
80 81 )
81 82 .arg(
82 83 Arg::with_name("ignored")
83 84 .help("show only ignored files")
84 85 .short("-i")
85 86 .long("--ignored"),
86 87 )
87 88 }
88 89
89 90 /// Pure data type allowing the caller to specify file states to display
90 91 #[derive(Copy, Clone, Debug)]
91 92 pub struct DisplayStates {
92 93 pub modified: bool,
93 94 pub added: bool,
94 95 pub removed: bool,
95 96 pub clean: bool,
96 97 pub deleted: bool,
97 98 pub unknown: bool,
98 99 pub ignored: bool,
99 100 }
100 101
101 102 pub const DEFAULT_DISPLAY_STATES: DisplayStates = DisplayStates {
102 103 modified: true,
103 104 added: true,
104 105 removed: true,
105 106 clean: false,
106 107 deleted: true,
107 108 unknown: true,
108 109 ignored: false,
109 110 };
110 111
111 112 pub const ALL_DISPLAY_STATES: DisplayStates = DisplayStates {
112 113 modified: true,
113 114 added: true,
114 115 removed: true,
115 116 clean: true,
116 117 deleted: true,
117 118 unknown: true,
118 119 ignored: true,
119 120 };
120 121
121 122 impl DisplayStates {
122 123 pub fn is_empty(&self) -> bool {
123 124 !(self.modified
124 125 || self.added
125 126 || self.removed
126 127 || self.clean
127 128 || self.deleted
128 129 || self.unknown
129 130 || self.ignored)
130 131 }
131 132 }
132 133
133 134 pub fn run(invocation: &crate::CliInvocation) -> Result<(), CommandError> {
134 135 let status_enabled_default = false;
135 136 let status_enabled = invocation.config.get_option(b"rhg", b"status")?;
136 137 if !status_enabled.unwrap_or(status_enabled_default) {
137 138 return Err(CommandError::unsupported(
138 139 "status is experimental in rhg (enable it with 'rhg.status = true' \
139 140 or enable fallback with 'rhg.on-unsupported = fallback')"
140 141 ));
141 142 }
142 143
143 144 let ui = invocation.ui;
144 145 let args = invocation.subcommand_args;
145 146 let display_states = if args.is_present("all") {
146 147 // TODO when implementing `--quiet`: it excludes clean files
147 148 // from `--all`
148 149 ALL_DISPLAY_STATES
149 150 } else {
150 151 let requested = DisplayStates {
151 152 modified: args.is_present("modified"),
152 153 added: args.is_present("added"),
153 154 removed: args.is_present("removed"),
154 155 clean: args.is_present("clean"),
155 156 deleted: args.is_present("deleted"),
156 157 unknown: args.is_present("unknown"),
157 158 ignored: args.is_present("ignored"),
158 159 };
159 160 if requested.is_empty() {
160 161 DEFAULT_DISPLAY_STATES
161 162 } else {
162 163 requested
163 164 }
164 165 };
165 166
166 167 let repo = invocation.repo?;
167 let mut dmap = DirstateMap::new();
168 168 let dirstate_data =
169 169 repo.hg_vfs().mmap_open("dirstate").io_not_found_as_none()?;
170 170 let dirstate_data = match &dirstate_data {
171 171 Some(mmap) => &**mmap,
172 172 None => b"",
173 173 };
174 let parents = dmap.read(dirstate_data)?;
174 let (mut dmap, parents) = if repo.has_dirstate_v2() {
175 DirstateMap::new_v2(dirstate_data)?
176 } else {
177 DirstateMap::new_v1(dirstate_data)?
178 };
175 179 let options = StatusOptions {
176 180 // TODO should be provided by the dirstate parsing and
177 181 // hence be stored on dmap. Using a value that assumes we aren't
178 182 // below the time resolution granularity of the FS and the
179 183 // dirstate.
180 184 last_normal_time: 0,
181 185 // we're currently supporting file systems with exec flags only
182 186 // anyway
183 187 check_exec: true,
184 188 list_clean: display_states.clean,
185 189 list_unknown: display_states.unknown,
186 190 list_ignored: display_states.ignored,
187 191 collect_traversed_dirs: false,
188 192 };
189 193 let ignore_file = repo.working_directory_vfs().join(".hgignore"); // TODO hardcoded
190 let (mut ds_status, pattern_warnings) = hg::status(
191 &dmap,
194 let (mut ds_status, pattern_warnings) = hg::dirstate_tree::status::status(
195 &mut dmap,
192 196 &AlwaysMatcher,
193 197 repo.working_directory_path().to_owned(),
194 198 vec![ignore_file],
195 199 options,
196 200 )?;
197 201 if !pattern_warnings.is_empty() {
198 202 warn!("Pattern warnings: {:?}", &pattern_warnings);
199 203 }
200 204
201 205 if !ds_status.bad.is_empty() {
202 206 warn!("Bad matches {:?}", &(ds_status.bad))
203 207 }
204 208 if !ds_status.unsure.is_empty() {
205 209 info!(
206 210 "Files to be rechecked by retrieval from filelog: {:?}",
207 211 &ds_status.unsure
208 212 );
209 213 }
210 214 if !ds_status.unsure.is_empty()
211 215 && (display_states.modified || display_states.clean)
212 216 {
213 217 let p1: Node = parents
214 218 .expect(
215 219 "Dirstate with no parents should not list any file to
216 220 be rechecked for modifications",
217 221 )
218 222 .p1
219 223 .into();
220 224 let p1_hex = format!("{:x}", p1);
221 225 for to_check in ds_status.unsure {
222 226 if cat_file_is_modified(repo, &to_check, &p1_hex)? {
223 227 if display_states.modified {
224 228 ds_status.modified.push(to_check);
225 229 }
226 230 } else {
227 231 if display_states.clean {
228 232 ds_status.clean.push(to_check);
229 233 }
230 234 }
231 235 }
232 236 }
233 237 if display_states.modified {
234 238 display_status_paths(ui, &mut ds_status.modified, b"M")?;
235 239 }
236 240 if display_states.added {
237 241 display_status_paths(ui, &mut ds_status.added, b"A")?;
238 242 }
239 243 if display_states.removed {
240 244 display_status_paths(ui, &mut ds_status.removed, b"R")?;
241 245 }
242 246 if display_states.deleted {
243 247 display_status_paths(ui, &mut ds_status.deleted, b"!")?;
244 248 }
245 249 if display_states.unknown {
246 250 display_status_paths(ui, &mut ds_status.unknown, b"?")?;
247 251 }
248 252 if display_states.ignored {
249 253 display_status_paths(ui, &mut ds_status.ignored, b"I")?;
250 254 }
251 255 if display_states.clean {
252 256 display_status_paths(ui, &mut ds_status.clean, b"C")?;
253 257 }
254 258 Ok(())
255 259 }
256 260
257 261 // Probably more elegant to use a Deref or Borrow trait rather than
258 262 // harcode HgPathBuf, but probably not really useful at this point
259 263 fn display_status_paths(
260 264 ui: &Ui,
261 265 paths: &mut [HgPathCow],
262 266 status_prefix: &[u8],
263 267 ) -> Result<(), CommandError> {
264 268 paths.sort_unstable();
265 269 for path in paths {
266 270 // Same TODO as in commands::root
267 271 let bytes: &[u8] = path.as_bytes();
268 272 // TODO optim, probably lots of unneeded copies here, especially
269 273 // if out stream is buffered
270 274 ui.write_stdout(&[status_prefix, b" ", bytes, b"\n"].concat())?;
271 275 }
272 276 Ok(())
273 277 }
274 278
275 279 /// Check if a file is modified by comparing actual repo store and file system.
276 280 ///
277 281 /// This meant to be used for those that the dirstate cannot resolve, due
278 282 /// to time resolution limits.
279 283 ///
280 284 /// TODO: detect permission bits and similar metadata modifications
281 285 fn cat_file_is_modified(
282 286 repo: &Repo,
283 287 hg_path: &HgPath,
284 288 rev: &str,
285 289 ) -> Result<bool, CommandError> {
286 290 // TODO CatRev expects &[HgPathBuf], something like
287 291 // &[impl Deref<HgPath>] would be nicer and should avoid the copy
288 292 let path_bufs = [hg_path.into()];
289 293 // TODO IIUC CatRev returns a simple Vec<u8> for all files
290 294 // being able to tell them apart as (path, bytes) would be nicer
291 295 // and OPTIM would allow manifest resolution just once.
292 296 let output = cat(repo, rev, &path_bufs).map_err(|e| (e, rev))?;
293 297
294 298 let fs_path = repo
295 299 .working_directory_vfs()
296 300 .join(hg_path_to_os_string(hg_path).expect("HgPath conversion"));
297 301 let hg_data_len: u64 = match output.concatenated.len().try_into() {
298 302 Ok(v) => v,
299 303 Err(_) => {
300 304 // conversion of data length to u64 failed,
301 305 // good luck for any file to have this content
302 306 return Ok(true);
303 307 }
304 308 };
305 309 let fobj = fs::File::open(&fs_path).when_reading_file(&fs_path)?;
306 310 if fobj.metadata().map_err(|e| StatusError::from(e))?.len() != hg_data_len
307 311 {
308 312 return Ok(true);
309 313 }
310 314 for (fs_byte, hg_byte) in
311 315 BufReader::new(fobj).bytes().zip(output.concatenated)
312 316 {
313 317 if fs_byte.map_err(|e| StatusError::from(e))? != hg_byte {
314 318 return Ok(true);
315 319 }
316 320 }
317 321 Ok(false)
318 322 }
General Comments 0
You need to be logged in to leave comments. Login now