##// END OF EJS Templates
rhg-files: reuse centralized dirstate logic...
Raphaël Gomès -
r50875:95ffa065 default
parent child Browse files
Show More
@@ -1,875 +1,849
1 1 //! The "version 2" disk representation of the dirstate
2 2 //!
3 3 //! See `mercurial/helptext/internals/dirstate-v2.txt`
4 4
5 5 use crate::dirstate::{DirstateV2Data, TruncatedTimestamp};
6 6 use crate::dirstate_tree::dirstate_map::DirstateVersion;
7 7 use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
8 8 use crate::dirstate_tree::path_with_basename::WithBasename;
9 9 use crate::errors::HgError;
10 10 use crate::utils::hg_path::HgPath;
11 11 use crate::DirstateEntry;
12 12 use crate::DirstateError;
13 13 use crate::DirstateParents;
14 14 use bitflags::bitflags;
15 15 use bytes_cast::unaligned::{U16Be, U32Be};
16 16 use bytes_cast::BytesCast;
17 17 use format_bytes::format_bytes;
18 18 use rand::Rng;
19 19 use std::borrow::Cow;
20 20 use std::fmt::Write;
21 21
22 22 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
23 23 /// This a redundant sanity check more than an actual "magic number" since
24 24 /// `.hg/requires` already governs which format should be used.
25 25 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
26 26
27 27 /// Keep space for 256-bit hashes
28 28 const STORED_NODE_ID_BYTES: usize = 32;
29 29
30 30 /// … even though only 160 bits are used for now, with SHA-1
31 31 const USED_NODE_ID_BYTES: usize = 20;
32 32
33 33 pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
34 34 pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
35 35
36 36 /// Must match constants of the same names in `mercurial/dirstateutils/v2.py`
37 37 const TREE_METADATA_SIZE: usize = 44;
38 38 const NODE_SIZE: usize = 44;
39 39
40 40 /// Make sure that size-affecting changes are made knowingly
41 41 #[allow(unused)]
42 42 fn static_assert_size_of() {
43 43 let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>;
44 44 let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>;
45 45 let _ = std::mem::transmute::<Node, [u8; NODE_SIZE]>;
46 46 }
47 47
48 48 // Must match `HEADER` in `mercurial/dirstateutils/docket.py`
49 49 #[derive(BytesCast)]
50 50 #[repr(C)]
51 51 struct DocketHeader {
52 52 marker: [u8; V2_FORMAT_MARKER.len()],
53 53 parent_1: [u8; STORED_NODE_ID_BYTES],
54 54 parent_2: [u8; STORED_NODE_ID_BYTES],
55 55
56 56 metadata: TreeMetadata,
57 57
58 58 /// Counted in bytes
59 59 data_size: Size,
60 60
61 61 uuid_size: u8,
62 62 }
63 63
64 64 pub struct Docket<'on_disk> {
65 65 header: &'on_disk DocketHeader,
66 66 pub uuid: &'on_disk [u8],
67 67 }
68 68
69 69 /// Fields are documented in the *Tree metadata in the docket file*
70 70 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
71 71 #[derive(BytesCast)]
72 72 #[repr(C)]
73 73 pub struct TreeMetadata {
74 74 root_nodes: ChildNodes,
75 75 nodes_with_entry_count: Size,
76 76 nodes_with_copy_source_count: Size,
77 77 unreachable_bytes: Size,
78 78 unused: [u8; 4],
79 79
80 80 /// See *Optional hash of ignore patterns* section of
81 81 /// `mercurial/helptext/internals/dirstate-v2.txt`
82 82 ignore_patterns_hash: IgnorePatternsHash,
83 83 }
84 84
85 85 /// Fields are documented in the *The data file format*
86 86 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
87 87 #[derive(BytesCast, Debug)]
88 88 #[repr(C)]
89 89 pub(super) struct Node {
90 90 full_path: PathSlice,
91 91
92 92 /// In bytes from `self.full_path.start`
93 93 base_name_start: PathSize,
94 94
95 95 copy_source: OptPathSlice,
96 96 children: ChildNodes,
97 97 pub(super) descendants_with_entry_count: Size,
98 98 pub(super) tracked_descendants_count: Size,
99 99 flags: U16Be,
100 100 size: U32Be,
101 101 mtime: PackedTruncatedTimestamp,
102 102 }
103 103
104 104 bitflags! {
105 105 #[repr(C)]
106 106 struct Flags: u16 {
107 107 const WDIR_TRACKED = 1 << 0;
108 108 const P1_TRACKED = 1 << 1;
109 109 const P2_INFO = 1 << 2;
110 110 const MODE_EXEC_PERM = 1 << 3;
111 111 const MODE_IS_SYMLINK = 1 << 4;
112 112 const HAS_FALLBACK_EXEC = 1 << 5;
113 113 const FALLBACK_EXEC = 1 << 6;
114 114 const HAS_FALLBACK_SYMLINK = 1 << 7;
115 115 const FALLBACK_SYMLINK = 1 << 8;
116 116 const EXPECTED_STATE_IS_MODIFIED = 1 << 9;
117 117 const HAS_MODE_AND_SIZE = 1 <<10;
118 118 const HAS_MTIME = 1 <<11;
119 119 const MTIME_SECOND_AMBIGUOUS = 1 << 12;
120 120 const DIRECTORY = 1 <<13;
121 121 const ALL_UNKNOWN_RECORDED = 1 <<14;
122 122 const ALL_IGNORED_RECORDED = 1 <<15;
123 123 }
124 124 }
125 125
126 126 /// Duration since the Unix epoch
127 127 #[derive(BytesCast, Copy, Clone, Debug)]
128 128 #[repr(C)]
129 129 struct PackedTruncatedTimestamp {
130 130 truncated_seconds: U32Be,
131 131 nanoseconds: U32Be,
132 132 }
133 133
134 134 /// Counted in bytes from the start of the file
135 135 ///
136 136 /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
137 137 type Offset = U32Be;
138 138
139 139 /// Counted in number of items
140 140 ///
141 141 /// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
142 142 type Size = U32Be;
143 143
144 144 /// Counted in bytes
145 145 ///
146 146 /// NOTE: we choose not to support file names/paths longer than 64 KiB.
147 147 type PathSize = U16Be;
148 148
149 149 /// A contiguous sequence of `len` times `Node`, representing the child nodes
150 150 /// of either some other node or of the repository root.
151 151 ///
152 152 /// Always sorted by ascending `full_path`, to allow binary search.
153 153 /// Since nodes with the same parent nodes also have the same parent path,
154 154 /// only the `base_name`s need to be compared during binary search.
155 155 #[derive(BytesCast, Copy, Clone, Debug)]
156 156 #[repr(C)]
157 157 struct ChildNodes {
158 158 start: Offset,
159 159 len: Size,
160 160 }
161 161
162 162 /// A `HgPath` of `len` bytes
163 163 #[derive(BytesCast, Copy, Clone, Debug)]
164 164 #[repr(C)]
165 165 struct PathSlice {
166 166 start: Offset,
167 167 len: PathSize,
168 168 }
169 169
170 170 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
171 171 type OptPathSlice = PathSlice;
172 172
173 173 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
174 174 ///
175 175 /// This should only happen if Mercurial is buggy or a repository is corrupted.
176 176 #[derive(Debug)]
177 177 pub struct DirstateV2ParseError {
178 178 message: String,
179 179 }
180 180
181 181 impl DirstateV2ParseError {
182 182 pub fn new<S: Into<String>>(message: S) -> Self {
183 183 Self {
184 184 message: message.into(),
185 185 }
186 186 }
187 187 }
188 188
189 189 impl From<DirstateV2ParseError> for HgError {
190 190 fn from(e: DirstateV2ParseError) -> Self {
191 191 HgError::corrupted(format!("dirstate-v2 parse error: {}", e.message))
192 192 }
193 193 }
194 194
195 195 impl From<DirstateV2ParseError> for crate::DirstateError {
196 196 fn from(error: DirstateV2ParseError) -> Self {
197 197 HgError::from(error).into()
198 198 }
199 199 }
200 200
201 201 impl TreeMetadata {
202 202 pub fn as_bytes(&self) -> &[u8] {
203 203 BytesCast::as_bytes(self)
204 204 }
205 205 }
206 206
207 207 impl<'on_disk> Docket<'on_disk> {
208 208 /// Generate the identifier for a new data file
209 209 ///
210 210 /// TODO: support the `HGTEST_UUIDFILE` environment variable.
211 211 /// See `mercurial/revlogutils/docket.py`
212 212 pub fn new_uid() -> String {
213 213 const ID_LENGTH: usize = 8;
214 214 let mut id = String::with_capacity(ID_LENGTH);
215 215 let mut rng = rand::thread_rng();
216 216 for _ in 0..ID_LENGTH {
217 217 // One random hexadecimal digit.
218 218 // `unwrap` never panics because `impl Write for String`
219 219 // never returns an error.
220 220 write!(&mut id, "{:x}", rng.gen_range(0..16)).unwrap();
221 221 }
222 222 id
223 223 }
224 224
225 225 pub fn serialize(
226 226 parents: DirstateParents,
227 227 tree_metadata: TreeMetadata,
228 228 data_size: u64,
229 229 uuid: &[u8],
230 230 ) -> Result<Vec<u8>, std::num::TryFromIntError> {
231 231 let header = DocketHeader {
232 232 marker: *V2_FORMAT_MARKER,
233 233 parent_1: parents.p1.pad_to_256_bits(),
234 234 parent_2: parents.p2.pad_to_256_bits(),
235 235 metadata: tree_metadata,
236 236 data_size: u32::try_from(data_size)?.into(),
237 237 uuid_size: uuid.len().try_into()?,
238 238 };
239 239 let header = header.as_bytes();
240 240 let mut docket = Vec::with_capacity(header.len() + uuid.len());
241 241 docket.extend_from_slice(header);
242 242 docket.extend_from_slice(uuid);
243 243 Ok(docket)
244 244 }
245 245
246 246 pub fn parents(&self) -> DirstateParents {
247 247 use crate::Node;
248 248 let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
249 249 .unwrap();
250 250 let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
251 251 .unwrap();
252 252 DirstateParents { p1, p2 }
253 253 }
254 254
255 255 pub fn tree_metadata(&self) -> &[u8] {
256 256 self.header.metadata.as_bytes()
257 257 }
258 258
259 259 pub fn data_size(&self) -> usize {
260 260 // This `unwrap` could only panic on a 16-bit CPU
261 261 self.header.data_size.get().try_into().unwrap()
262 262 }
263 263
264 264 pub fn data_filename(&self) -> String {
265 265 String::from_utf8(format_bytes!(b"dirstate.{}", self.uuid)).unwrap()
266 266 }
267 267 }
268 268
269 269 pub fn read_docket(
270 270 on_disk: &[u8],
271 271 ) -> Result<Docket<'_>, DirstateV2ParseError> {
272 272 let (header, uuid) = DocketHeader::from_bytes(on_disk).map_err(|e| {
273 273 DirstateV2ParseError::new(format!("when reading docket, {}", e))
274 274 })?;
275 275 let uuid_size = header.uuid_size as usize;
276 276 if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
277 277 Ok(Docket { header, uuid })
278 278 } else {
279 279 Err(DirstateV2ParseError::new(
280 280 "invalid format marker or uuid size",
281 281 ))
282 282 }
283 283 }
284 284
285 285 pub(super) fn read<'on_disk>(
286 286 on_disk: &'on_disk [u8],
287 287 metadata: &[u8],
288 288 ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
289 289 if on_disk.is_empty() {
290 290 let mut map = DirstateMap::empty(on_disk);
291 291 map.dirstate_version = DirstateVersion::V2;
292 292 return Ok(map);
293 293 }
294 294 let (meta, _) = TreeMetadata::from_bytes(metadata).map_err(|e| {
295 295 DirstateV2ParseError::new(format!("when parsing tree metadata, {}", e))
296 296 })?;
297 297 let dirstate_map = DirstateMap {
298 298 on_disk,
299 299 root: dirstate_map::ChildNodes::OnDisk(
300 300 read_nodes(on_disk, meta.root_nodes).map_err(|mut e| {
301 301 e.message = format!("{}, when reading root notes", e.message);
302 302 e
303 303 })?,
304 304 ),
305 305 nodes_with_entry_count: meta.nodes_with_entry_count.get(),
306 306 nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),
307 307 ignore_patterns_hash: meta.ignore_patterns_hash,
308 308 unreachable_bytes: meta.unreachable_bytes.get(),
309 309 old_data_size: on_disk.len(),
310 310 dirstate_version: DirstateVersion::V2,
311 311 };
312 312 Ok(dirstate_map)
313 313 }
314 314
315 315 impl Node {
316 316 pub(super) fn full_path<'on_disk>(
317 317 &self,
318 318 on_disk: &'on_disk [u8],
319 319 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
320 320 read_hg_path(on_disk, self.full_path)
321 321 }
322 322
323 323 pub(super) fn base_name_start(
324 324 &self,
325 325 ) -> Result<usize, DirstateV2ParseError> {
326 326 let start = self.base_name_start.get();
327 327 if start < self.full_path.len.get() {
328 328 let start = usize::try_from(start)
329 329 // u32 -> usize, could only panic on a 16-bit CPU
330 330 .expect("dirstate-v2 base_name_start out of bounds");
331 331 Ok(start)
332 332 } else {
333 333 Err(DirstateV2ParseError::new("not enough bytes for base name"))
334 334 }
335 335 }
336 336
337 337 pub(super) fn base_name<'on_disk>(
338 338 &self,
339 339 on_disk: &'on_disk [u8],
340 340 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
341 341 let full_path = self.full_path(on_disk)?;
342 342 let base_name_start = self.base_name_start()?;
343 343 Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
344 344 }
345 345
346 346 pub(super) fn path<'on_disk>(
347 347 &self,
348 348 on_disk: &'on_disk [u8],
349 349 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
350 350 Ok(WithBasename::from_raw_parts(
351 351 Cow::Borrowed(self.full_path(on_disk)?),
352 352 self.base_name_start()?,
353 353 ))
354 354 }
355 355
356 356 pub(super) fn has_copy_source(&self) -> bool {
357 357 self.copy_source.start.get() != 0
358 358 }
359 359
360 360 pub(super) fn copy_source<'on_disk>(
361 361 &self,
362 362 on_disk: &'on_disk [u8],
363 363 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
364 364 Ok(if self.has_copy_source() {
365 365 Some(read_hg_path(on_disk, self.copy_source)?)
366 366 } else {
367 367 None
368 368 })
369 369 }
370 370
371 371 fn flags(&self) -> Flags {
372 372 Flags::from_bits_truncate(self.flags.get())
373 373 }
374 374
375 375 fn has_entry(&self) -> bool {
376 376 self.flags().intersects(
377 377 Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO,
378 378 )
379 379 }
380 380
381 381 pub(super) fn node_data(
382 382 &self,
383 383 ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
384 384 if self.has_entry() {
385 385 Ok(dirstate_map::NodeData::Entry(self.assume_entry()?))
386 386 } else if let Some(mtime) = self.cached_directory_mtime()? {
387 387 Ok(dirstate_map::NodeData::CachedDirectory { mtime })
388 388 } else {
389 389 Ok(dirstate_map::NodeData::None)
390 390 }
391 391 }
392 392
393 393 pub(super) fn cached_directory_mtime(
394 394 &self,
395 395 ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
396 396 // For now we do not have code to handle the absence of
397 397 // ALL_UNKNOWN_RECORDED, so we ignore the mtime if the flag is
398 398 // unset.
399 399 if self.flags().contains(Flags::DIRECTORY)
400 400 && self.flags().contains(Flags::HAS_MTIME)
401 401 && self.flags().contains(Flags::ALL_UNKNOWN_RECORDED)
402 402 {
403 403 Ok(Some(self.mtime()?))
404 404 } else {
405 405 Ok(None)
406 406 }
407 407 }
408 408
409 409 fn synthesize_unix_mode(&self) -> u32 {
410 410 let file_type = if self.flags().contains(Flags::MODE_IS_SYMLINK) {
411 411 libc::S_IFLNK
412 412 } else {
413 413 libc::S_IFREG
414 414 };
415 415 let permissions = if self.flags().contains(Flags::MODE_EXEC_PERM) {
416 416 0o755
417 417 } else {
418 418 0o644
419 419 };
420 420 file_type | permissions
421 421 }
422 422
423 423 fn mtime(&self) -> Result<TruncatedTimestamp, DirstateV2ParseError> {
424 424 let mut m: TruncatedTimestamp = self.mtime.try_into()?;
425 425 if self.flags().contains(Flags::MTIME_SECOND_AMBIGUOUS) {
426 426 m.second_ambiguous = true;
427 427 }
428 428 Ok(m)
429 429 }
430 430
431 431 fn assume_entry(&self) -> Result<DirstateEntry, DirstateV2ParseError> {
432 432 // TODO: convert through raw bits instead?
433 433 let wc_tracked = self.flags().contains(Flags::WDIR_TRACKED);
434 434 let p1_tracked = self.flags().contains(Flags::P1_TRACKED);
435 435 let p2_info = self.flags().contains(Flags::P2_INFO);
436 436 let mode_size = if self.flags().contains(Flags::HAS_MODE_AND_SIZE)
437 437 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
438 438 {
439 439 Some((self.synthesize_unix_mode(), self.size.into()))
440 440 } else {
441 441 None
442 442 };
443 443 let mtime = if self.flags().contains(Flags::HAS_MTIME)
444 444 && !self.flags().contains(Flags::DIRECTORY)
445 445 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
446 446 {
447 447 Some(self.mtime()?)
448 448 } else {
449 449 None
450 450 };
451 451 let fallback_exec = if self.flags().contains(Flags::HAS_FALLBACK_EXEC)
452 452 {
453 453 Some(self.flags().contains(Flags::FALLBACK_EXEC))
454 454 } else {
455 455 None
456 456 };
457 457 let fallback_symlink =
458 458 if self.flags().contains(Flags::HAS_FALLBACK_SYMLINK) {
459 459 Some(self.flags().contains(Flags::FALLBACK_SYMLINK))
460 460 } else {
461 461 None
462 462 };
463 463 Ok(DirstateEntry::from_v2_data(DirstateV2Data {
464 464 wc_tracked,
465 465 p1_tracked,
466 466 p2_info,
467 467 mode_size,
468 468 mtime,
469 469 fallback_exec,
470 470 fallback_symlink,
471 471 }))
472 472 }
473 473
474 474 pub(super) fn entry(
475 475 &self,
476 476 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
477 477 if self.has_entry() {
478 478 Ok(Some(self.assume_entry()?))
479 479 } else {
480 480 Ok(None)
481 481 }
482 482 }
483 483
484 484 pub(super) fn children<'on_disk>(
485 485 &self,
486 486 on_disk: &'on_disk [u8],
487 487 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
488 488 read_nodes(on_disk, self.children)
489 489 }
490 490
491 491 pub(super) fn to_in_memory_node<'on_disk>(
492 492 &self,
493 493 on_disk: &'on_disk [u8],
494 494 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
495 495 Ok(dirstate_map::Node {
496 496 children: dirstate_map::ChildNodes::OnDisk(
497 497 self.children(on_disk)?,
498 498 ),
499 499 copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
500 500 data: self.node_data()?,
501 501 descendants_with_entry_count: self
502 502 .descendants_with_entry_count
503 503 .get(),
504 504 tracked_descendants_count: self.tracked_descendants_count.get(),
505 505 })
506 506 }
507 507
508 508 fn from_dirstate_entry(
509 509 entry: &DirstateEntry,
510 510 ) -> (Flags, U32Be, PackedTruncatedTimestamp) {
511 511 let DirstateV2Data {
512 512 wc_tracked,
513 513 p1_tracked,
514 514 p2_info,
515 515 mode_size: mode_size_opt,
516 516 mtime: mtime_opt,
517 517 fallback_exec,
518 518 fallback_symlink,
519 519 } = entry.v2_data();
520 520 // TODO: convert through raw flag bits instead?
521 521 let mut flags = Flags::empty();
522 522 flags.set(Flags::WDIR_TRACKED, wc_tracked);
523 523 flags.set(Flags::P1_TRACKED, p1_tracked);
524 524 flags.set(Flags::P2_INFO, p2_info);
525 525 let size = if let Some((m, s)) = mode_size_opt {
526 526 let exec_perm = m & (libc::S_IXUSR as u32) != 0;
527 527 let is_symlink = m & (libc::S_IFMT as u32) == libc::S_IFLNK as u32;
528 528 flags.set(Flags::MODE_EXEC_PERM, exec_perm);
529 529 flags.set(Flags::MODE_IS_SYMLINK, is_symlink);
530 530 flags.insert(Flags::HAS_MODE_AND_SIZE);
531 531 s.into()
532 532 } else {
533 533 0.into()
534 534 };
535 535 let mtime = if let Some(m) = mtime_opt {
536 536 flags.insert(Flags::HAS_MTIME);
537 537 if m.second_ambiguous {
538 538 flags.insert(Flags::MTIME_SECOND_AMBIGUOUS);
539 539 };
540 540 m.into()
541 541 } else {
542 542 PackedTruncatedTimestamp::null()
543 543 };
544 544 if let Some(f_exec) = fallback_exec {
545 545 flags.insert(Flags::HAS_FALLBACK_EXEC);
546 546 if f_exec {
547 547 flags.insert(Flags::FALLBACK_EXEC);
548 548 }
549 549 }
550 550 if let Some(f_symlink) = fallback_symlink {
551 551 flags.insert(Flags::HAS_FALLBACK_SYMLINK);
552 552 if f_symlink {
553 553 flags.insert(Flags::FALLBACK_SYMLINK);
554 554 }
555 555 }
556 556 (flags, size, mtime)
557 557 }
558 558 }
559 559
560 560 fn read_hg_path(
561 561 on_disk: &[u8],
562 562 slice: PathSlice,
563 563 ) -> Result<&HgPath, DirstateV2ParseError> {
564 564 read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
565 565 }
566 566
567 567 fn read_nodes(
568 568 on_disk: &[u8],
569 569 slice: ChildNodes,
570 570 ) -> Result<&[Node], DirstateV2ParseError> {
571 571 read_slice(on_disk, slice.start, slice.len.get())
572 572 }
573 573
574 574 fn read_slice<T, Len>(
575 575 on_disk: &[u8],
576 576 start: Offset,
577 577 len: Len,
578 578 ) -> Result<&[T], DirstateV2ParseError>
579 579 where
580 580 T: BytesCast,
581 581 Len: TryInto<usize>,
582 582 {
583 583 // Either `usize::MAX` would result in "out of bounds" error since a single
584 584 // `&[u8]` cannot occupy the entire addess space.
585 585 let start = start.get().try_into().unwrap_or(std::usize::MAX);
586 586 let len = len.try_into().unwrap_or(std::usize::MAX);
587 587 let bytes = match on_disk.get(start..) {
588 588 Some(bytes) => bytes,
589 589 None => {
590 590 return Err(DirstateV2ParseError::new(
591 591 "not enough bytes from disk",
592 592 ))
593 593 }
594 594 };
595 595 T::slice_from_bytes(bytes, len)
596 596 .map_err(|e| {
597 597 DirstateV2ParseError::new(format!("when reading a slice, {}", e))
598 598 })
599 599 .map(|(slice, _rest)| slice)
600 600 }
601 601
602 pub(crate) fn for_each_tracked_path<'on_disk>(
603 on_disk: &'on_disk [u8],
604 metadata: &[u8],
605 mut f: impl FnMut(&'on_disk HgPath),
606 ) -> Result<(), DirstateV2ParseError> {
607 let (meta, _) = TreeMetadata::from_bytes(metadata).map_err(|e| {
608 DirstateV2ParseError::new(format!("when parsing tree metadata, {}", e))
609 })?;
610 fn recur<'on_disk>(
611 on_disk: &'on_disk [u8],
612 nodes: ChildNodes,
613 f: &mut impl FnMut(&'on_disk HgPath),
614 ) -> Result<(), DirstateV2ParseError> {
615 for node in read_nodes(on_disk, nodes)? {
616 if let Some(entry) = node.entry()? {
617 if entry.tracked() {
618 f(node.full_path(on_disk)?)
619 }
620 }
621 recur(on_disk, node.children, f)?
622 }
623 Ok(())
624 }
625 recur(on_disk, meta.root_nodes, &mut f)
626 }
627
628 602 /// Returns new data and metadata, together with whether that data should be
629 603 /// appended to the existing data file whose content is at
630 604 /// `dirstate_map.on_disk` (true), instead of written to a new data file
631 605 /// (false), and the previous size of data on disk.
632 606 pub(super) fn write(
633 607 dirstate_map: &DirstateMap,
634 608 can_append: bool,
635 609 ) -> Result<(Vec<u8>, TreeMetadata, bool, usize), DirstateError> {
636 610 let append = can_append && dirstate_map.write_should_append();
637 611
638 612 // This ignores the space for paths, and for nodes without an entry.
639 613 // TODO: better estimate? Skip the `Vec` and write to a file directly?
640 614 let size_guess = std::mem::size_of::<Node>()
641 615 * dirstate_map.nodes_with_entry_count as usize;
642 616
643 617 let mut writer = Writer {
644 618 dirstate_map,
645 619 append,
646 620 out: Vec::with_capacity(size_guess),
647 621 };
648 622
649 623 let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;
650 624
651 625 let unreachable_bytes = if append {
652 626 dirstate_map.unreachable_bytes
653 627 } else {
654 628 0
655 629 };
656 630 let meta = TreeMetadata {
657 631 root_nodes,
658 632 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
659 633 nodes_with_copy_source_count: dirstate_map
660 634 .nodes_with_copy_source_count
661 635 .into(),
662 636 unreachable_bytes: unreachable_bytes.into(),
663 637 unused: [0; 4],
664 638 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
665 639 };
666 640 Ok((writer.out, meta, append, dirstate_map.old_data_size))
667 641 }
668 642
669 643 struct Writer<'dmap, 'on_disk> {
670 644 dirstate_map: &'dmap DirstateMap<'on_disk>,
671 645 append: bool,
672 646 out: Vec<u8>,
673 647 }
674 648
675 649 impl Writer<'_, '_> {
676 650 fn write_nodes(
677 651 &mut self,
678 652 nodes: dirstate_map::ChildNodesRef,
679 653 ) -> Result<ChildNodes, DirstateError> {
680 654 // Reuse already-written nodes if possible
681 655 if self.append {
682 656 if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {
683 657 let start = self.on_disk_offset_of(nodes_slice).expect(
684 658 "dirstate-v2 OnDisk nodes not found within on_disk",
685 659 );
686 660 let len = child_nodes_len_from_usize(nodes_slice.len());
687 661 return Ok(ChildNodes { start, len });
688 662 }
689 663 }
690 664
691 665 // `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has
692 666 // undefined iteration order. Sort to enable binary search in the
693 667 // written file.
694 668 let nodes = nodes.sorted();
695 669 let nodes_len = nodes.len();
696 670
697 671 // First accumulate serialized nodes in a `Vec`
698 672 let mut on_disk_nodes = Vec::with_capacity(nodes_len);
699 673 for node in nodes {
700 674 let children =
701 675 self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;
702 676 let full_path = node.full_path(self.dirstate_map.on_disk)?;
703 677 let full_path = self.write_path(full_path.as_bytes());
704 678 let copy_source = if let Some(source) =
705 679 node.copy_source(self.dirstate_map.on_disk)?
706 680 {
707 681 self.write_path(source.as_bytes())
708 682 } else {
709 683 PathSlice {
710 684 start: 0.into(),
711 685 len: 0.into(),
712 686 }
713 687 };
714 688 on_disk_nodes.push(match node {
715 689 NodeRef::InMemory(path, node) => {
716 690 let (flags, size, mtime) = match &node.data {
717 691 dirstate_map::NodeData::Entry(entry) => {
718 692 Node::from_dirstate_entry(entry)
719 693 }
720 694 dirstate_map::NodeData::CachedDirectory { mtime } => {
721 695 // we currently never set a mtime if unknown file
722 696 // are present.
723 697 // So if we have a mtime for a directory, we know
724 698 // they are no unknown
725 699 // files and we
726 700 // blindly set ALL_UNKNOWN_RECORDED.
727 701 //
728 702 // We never set ALL_IGNORED_RECORDED since we
729 703 // don't track that case
730 704 // currently.
731 705 let mut flags = Flags::DIRECTORY
732 706 | Flags::HAS_MTIME
733 707 | Flags::ALL_UNKNOWN_RECORDED;
734 708 if mtime.second_ambiguous {
735 709 flags.insert(Flags::MTIME_SECOND_AMBIGUOUS)
736 710 }
737 711 (flags, 0.into(), (*mtime).into())
738 712 }
739 713 dirstate_map::NodeData::None => (
740 714 Flags::DIRECTORY,
741 715 0.into(),
742 716 PackedTruncatedTimestamp::null(),
743 717 ),
744 718 };
745 719 Node {
746 720 children,
747 721 copy_source,
748 722 full_path,
749 723 base_name_start: u16::try_from(path.base_name_start())
750 724 // Could only panic for paths over 64 KiB
751 725 .expect("dirstate-v2 path length overflow")
752 726 .into(),
753 727 descendants_with_entry_count: node
754 728 .descendants_with_entry_count
755 729 .into(),
756 730 tracked_descendants_count: node
757 731 .tracked_descendants_count
758 732 .into(),
759 733 flags: flags.bits().into(),
760 734 size,
761 735 mtime,
762 736 }
763 737 }
764 738 NodeRef::OnDisk(node) => Node {
765 739 children,
766 740 copy_source,
767 741 full_path,
768 742 ..*node
769 743 },
770 744 })
771 745 }
772 746 // … so we can write them contiguously, after writing everything else
773 747 // they refer to.
774 748 let start = self.current_offset();
775 749 let len = child_nodes_len_from_usize(nodes_len);
776 750 self.out.extend(on_disk_nodes.as_bytes());
777 751 Ok(ChildNodes { start, len })
778 752 }
779 753
780 754 /// If the given slice of items is within `on_disk`, returns its offset
781 755 /// from the start of `on_disk`.
782 756 fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>
783 757 where
784 758 T: BytesCast,
785 759 {
786 760 fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {
787 761 let start = slice.as_ptr() as usize;
788 762 let end = start + slice.len();
789 763 start..=end
790 764 }
791 765 let slice_addresses = address_range(slice.as_bytes());
792 766 let on_disk_addresses = address_range(self.dirstate_map.on_disk);
793 767 if on_disk_addresses.contains(slice_addresses.start())
794 768 && on_disk_addresses.contains(slice_addresses.end())
795 769 {
796 770 let offset = slice_addresses.start() - on_disk_addresses.start();
797 771 Some(offset_from_usize(offset))
798 772 } else {
799 773 None
800 774 }
801 775 }
802 776
803 777 fn current_offset(&mut self) -> Offset {
804 778 let mut offset = self.out.len();
805 779 if self.append {
806 780 offset += self.dirstate_map.on_disk.len()
807 781 }
808 782 offset_from_usize(offset)
809 783 }
810 784
811 785 fn write_path(&mut self, slice: &[u8]) -> PathSlice {
812 786 let len = path_len_from_usize(slice.len());
813 787 // Reuse an already-written path if possible
814 788 if self.append {
815 789 if let Some(start) = self.on_disk_offset_of(slice) {
816 790 return PathSlice { start, len };
817 791 }
818 792 }
819 793 let start = self.current_offset();
820 794 self.out.extend(slice.as_bytes());
821 795 PathSlice { start, len }
822 796 }
823 797 }
824 798
825 799 fn offset_from_usize(x: usize) -> Offset {
826 800 u32::try_from(x)
827 801 // Could only panic for a dirstate file larger than 4 GiB
828 802 .expect("dirstate-v2 offset overflow")
829 803 .into()
830 804 }
831 805
832 806 fn child_nodes_len_from_usize(x: usize) -> Size {
833 807 u32::try_from(x)
834 808 // Could only panic with over 4 billion nodes
835 809 .expect("dirstate-v2 slice length overflow")
836 810 .into()
837 811 }
838 812
839 813 fn path_len_from_usize(x: usize) -> PathSize {
840 814 u16::try_from(x)
841 815 // Could only panic for paths over 64 KiB
842 816 .expect("dirstate-v2 path length overflow")
843 817 .into()
844 818 }
845 819
846 820 impl From<TruncatedTimestamp> for PackedTruncatedTimestamp {
847 821 fn from(timestamp: TruncatedTimestamp) -> Self {
848 822 Self {
849 823 truncated_seconds: timestamp.truncated_seconds().into(),
850 824 nanoseconds: timestamp.nanoseconds().into(),
851 825 }
852 826 }
853 827 }
854 828
855 829 impl TryFrom<PackedTruncatedTimestamp> for TruncatedTimestamp {
856 830 type Error = DirstateV2ParseError;
857 831
858 832 fn try_from(
859 833 timestamp: PackedTruncatedTimestamp,
860 834 ) -> Result<Self, Self::Error> {
861 835 Self::from_already_truncated(
862 836 timestamp.truncated_seconds.get(),
863 837 timestamp.nanoseconds.get(),
864 838 false,
865 839 )
866 840 }
867 841 }
868 842 impl PackedTruncatedTimestamp {
869 843 fn null() -> Self {
870 844 Self {
871 845 truncated_seconds: 0.into(),
872 846 nanoseconds: 0.into(),
873 847 }
874 848 }
875 849 }
@@ -1,82 +1,29
1 1 // list_tracked_files.rs
2 2 //
3 3 // Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 use crate::dirstate::parsers::parse_dirstate_entries;
9 use crate::dirstate_tree::on_disk::{for_each_tracked_path, read_docket};
10 8 use crate::errors::HgError;
11 9 use crate::repo::Repo;
12 10 use crate::revlog::manifest::Manifest;
13 11 use crate::revlog::RevlogError;
14 12 use crate::utils::hg_path::HgPath;
15 use crate::DirstateError;
16 use rayon::prelude::*;
17
18 /// List files under Mercurial control in the working directory
19 /// by reading the dirstate
20 pub struct Dirstate {
21 /// The `dirstate` content.
22 content: Vec<u8>,
23 v2_metadata: Option<Vec<u8>>,
24 }
25
26 impl Dirstate {
27 pub fn new(repo: &Repo) -> Result<Self, HgError> {
28 let mut content = repo.hg_vfs().read("dirstate")?;
29 let v2_metadata = if repo.has_dirstate_v2() {
30 let docket = read_docket(&content)?;
31 let meta = docket.tree_metadata().to_vec();
32 content = repo.hg_vfs().read(docket.data_filename())?;
33 Some(meta)
34 } else {
35 None
36 };
37 Ok(Self {
38 content,
39 v2_metadata,
40 })
41 }
42
43 pub fn tracked_files(&self) -> Result<Vec<&HgPath>, DirstateError> {
44 let mut files = Vec::new();
45 if !self.content.is_empty() {
46 if let Some(meta) = &self.v2_metadata {
47 for_each_tracked_path(&self.content, meta, |path| {
48 files.push(path)
49 })?
50 } else {
51 let _parents = parse_dirstate_entries(
52 &self.content,
53 |path, entry, _copy_source| {
54 if entry.tracked() {
55 files.push(path)
56 }
57 Ok(())
58 },
59 )?;
60 }
61 }
62 files.par_sort_unstable();
63 Ok(files)
64 }
65 }
66 13
67 14 /// List files under Mercurial control at a given revision.
68 15 pub fn list_rev_tracked_files(
69 16 repo: &Repo,
70 17 revset: &str,
71 18 ) -> Result<FilesForRev, RevlogError> {
72 19 let rev = crate::revset::resolve_single(revset, repo)?;
73 20 Ok(FilesForRev(repo.manifest_for_rev(rev)?))
74 21 }
75 22
76 23 pub struct FilesForRev(Manifest);
77 24
78 25 impl FilesForRev {
79 26 pub fn iter(&self) -> impl Iterator<Item = Result<&HgPath, HgError>> {
80 27 self.0.iter().map(|entry| Ok(entry?.path))
81 28 }
82 29 }
@@ -1,11 +1,10
1 1 //! A distinction is made between operations and commands.
2 2 //! An operation is what can be done whereas a command is what is exposed by
3 3 //! the cli. A single command can use several operations to achieve its goal.
4 4
5 5 mod cat;
6 6 mod debugdata;
7 7 mod list_tracked_files;
8 8 pub use cat::{cat, CatOutput};
9 9 pub use debugdata::{debug_data, DebugDataKind};
10 pub use list_tracked_files::Dirstate;
11 10 pub use list_tracked_files::{list_rev_tracked_files, FilesForRev};
@@ -1,101 +1,110
1 1 use crate::error::CommandError;
2 2 use crate::ui::Ui;
3 3 use crate::utils::path_utils::RelativizePaths;
4 4 use clap::Arg;
5 5 use hg::errors::HgError;
6 6 use hg::operations::list_rev_tracked_files;
7 use hg::operations::Dirstate;
8 7 use hg::repo::Repo;
8 use hg::utils::filter_map_results;
9 9 use hg::utils::hg_path::HgPath;
10 use rayon::prelude::*;
10 11
11 12 pub const HELP_TEXT: &str = "
12 13 List tracked files.
13 14
14 15 Returns 0 on success.
15 16 ";
16 17
17 18 pub fn args() -> clap::Command {
18 19 clap::command!("files")
19 20 .arg(
20 21 Arg::new("rev")
21 22 .help("search the repository as it is in REV")
22 23 .short('r')
23 24 .long("revision")
24 25 .value_name("REV"),
25 26 )
26 27 .about(HELP_TEXT)
27 28 }
28 29
29 30 pub fn run(invocation: &crate::CliInvocation) -> Result<(), CommandError> {
30 31 let relative = invocation.config.get(b"ui", b"relative-paths");
31 32 if relative.is_some() {
32 33 return Err(CommandError::unsupported(
33 34 "non-default ui.relative-paths",
34 35 ));
35 36 }
36 37
37 38 let rev = invocation.subcommand_args.get_one::<String>("rev");
38 39
39 40 let repo = invocation.repo?;
40 41
41 42 // It seems better if this check is removed: this would correspond to
42 43 // automatically enabling the extension if the repo requires it.
43 44 // However we need this check to be in sync with vanilla hg so hg tests
44 45 // pass.
45 46 if repo.has_sparse()
46 47 && invocation.config.get(b"extensions", b"sparse").is_none()
47 48 {
48 49 return Err(CommandError::unsupported(
49 50 "repo is using sparse, but sparse extension is not enabled",
50 51 ));
51 52 }
52 53
53 54 if let Some(rev) = rev {
54 55 if repo.has_narrow() {
55 56 return Err(CommandError::unsupported(
56 57 "rhg files -r <rev> is not supported in narrow clones",
57 58 ));
58 59 }
59 60 let files = list_rev_tracked_files(repo, rev)
60 61 .map_err(|e| (e, rev.as_ref()))?;
61 62 display_files(invocation.ui, repo, files.iter())
62 63 } else {
63 64 // The dirstate always reflects the sparse narrowspec, so if
64 65 // we only have sparse without narrow all is fine.
65 66 // If we have narrow, then [hg files] needs to check if
66 67 // the store narrowspec is in sync with the one of the dirstate,
67 68 // so we can't support that without explicit code.
68 69 if repo.has_narrow() {
69 70 return Err(CommandError::unsupported(
70 71 "rhg files is not supported in narrow clones",
71 72 ));
72 73 }
73 let dirstate = Dirstate::new(repo)?;
74 let files = dirstate.tracked_files()?;
74 let dirstate = repo.dirstate_map()?;
75 let files_res: Result<Vec<_>, _> =
76 filter_map_results(dirstate.iter(), |(path, entry)| {
77 Ok(if entry.tracked() { Some(path) } else { None })
78 })
79 .collect();
80
81 let mut files = files_res?;
82 files.par_sort_unstable();
83
75 84 display_files(invocation.ui, repo, files.into_iter().map(Ok))
76 85 }
77 86 }
78 87
79 88 fn display_files<'a>(
80 89 ui: &Ui,
81 90 repo: &Repo,
82 91 files: impl IntoIterator<Item = Result<&'a HgPath, HgError>>,
83 92 ) -> Result<(), CommandError> {
84 93 let mut stdout = ui.stdout_buffer();
85 94 let mut any = false;
86 95
87 96 let relativize = RelativizePaths::new(repo)?;
88 97 for result in files {
89 98 let path = result?;
90 99 stdout.write_all(&relativize.relativize(path))?;
91 100 stdout.write_all(b"\n")?;
92 101 any = true;
93 102 }
94 103
95 104 stdout.flush()?;
96 105 if any {
97 106 Ok(())
98 107 } else {
99 108 Err(CommandError::Unsuccessful)
100 109 }
101 110 }
General Comments 0
You need to be logged in to leave comments. Login now