##// END OF EJS Templates
dirstate-v2: Use 32-bit integers instead of 64-bit for offsets...
Simon Sapin -
r48270:f23eafb0 default
parent child Browse files
Show More
@@ -1,603 +1,602 b''
1 1 //! The "version 2" disk representation of the dirstate
2 2 //!
3 3 //! # File format
4 4 //!
5 5 //! The file starts with a fixed-sized header, whose layout is defined by the
6 6 //! `Header` struct. Its `root` field contains the slice (offset and length) to
7 7 //! the nodes representing the files and directories at the root of the
8 8 //! repository. Each node is also fixed-size, defined by the `Node` struct.
9 9 //! Nodes in turn contain slices to variable-size paths, and to their own child
10 10 //! nodes (if any) for nested files and directories.
11 11
12 12 use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
13 13 use crate::dirstate_tree::path_with_basename::WithBasename;
14 14 use crate::errors::HgError;
15 15 use crate::utils::hg_path::HgPath;
16 16 use crate::DirstateEntry;
17 17 use crate::DirstateError;
18 18 use crate::DirstateParents;
19 19 use crate::EntryState;
20 use bytes_cast::unaligned::{I32Be, I64Be, U32Be, U64Be};
20 use bytes_cast::unaligned::{I32Be, I64Be, U32Be};
21 21 use bytes_cast::BytesCast;
22 22 use std::borrow::Cow;
23 23 use std::convert::TryFrom;
24 24 use std::time::{Duration, SystemTime, UNIX_EPOCH};
25 25
26 26 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
27 27 /// This a redundant sanity check more than an actual "magic number" since
28 28 /// `.hg/requires` already governs which format should be used.
29 29 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
30 30
31 31 pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
32 32 pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
33 33
34 34 #[derive(BytesCast)]
35 35 #[repr(C)]
36 36 struct Header {
37 37 marker: [u8; V2_FORMAT_MARKER.len()],
38 38
39 39 /// `dirstatemap.parents()` in `mercurial/dirstate.py` relies on this
40 40 /// `parents` field being at this offset, immediately after `marker`.
41 41 parents: DirstateParents,
42 42
43 43 root: ChildNodes,
44 44 nodes_with_entry_count: Size,
45 45 nodes_with_copy_source_count: Size,
46 46
47 47 /// If non-zero, a hash of ignore files that were used for some previous
48 48 /// run of the `status` algorithm.
49 49 ///
50 50 /// We define:
51 51 ///
52 52 /// * "Root" ignore files are `.hgignore` at the root of the repository if
53 53 /// it exists, and files from `ui.ignore.*` config. This set of files is
54 54 /// then sorted by the string representation of their path.
55 55 /// * The "expanded contents" of an ignore files is the byte string made
56 56 /// by concatenating its contents with the "expanded contents" of other
57 57 /// files included with `include:` or `subinclude:` files, in inclusion
58 58 /// order. This definition is recursive, as included files can
59 59 /// themselves include more files.
60 60 ///
61 61 /// This hash is defined as the SHA-1 of the concatenation (in sorted
62 62 /// order) of the "expanded contents" of each "root" ignore file.
63 63 /// (Note that computing this does not require actually concatenating byte
64 64 /// strings into contiguous memory, instead SHA-1 hashing can be done
65 65 /// incrementally.)
66 66 ignore_patterns_hash: IgnorePatternsHash,
67 67 }
68 68
69 69 #[derive(BytesCast)]
70 70 #[repr(C)]
71 71 pub(super) struct Node {
72 72 full_path: PathSlice,
73 73
74 74 /// In bytes from `self.full_path.start`
75 75 base_name_start: Size,
76 76
77 77 copy_source: OptPathSlice,
78 78 children: ChildNodes,
79 79 pub(super) tracked_descendants_count: Size,
80 80
81 81 /// Dependending on the value of `state`:
82 82 ///
83 83 /// * A null byte: `data` is not used.
84 84 ///
85 85 /// * A `n`, `a`, `r`, or `m` ASCII byte: `state` and `data` together
86 86 /// represent a dirstate entry like in the v1 format.
87 87 ///
88 88 /// * A `d` ASCII byte: the bytes of `data` should instead be interpreted
89 89 /// as the `Timestamp` for the mtime of a cached directory.
90 90 ///
91 91 /// The presence of this state means that at some point, this path in
92 92 /// the working directory was observed:
93 93 ///
94 94 /// - To be a directory
95 95 /// - With the modification time as given by `Timestamp`
96 96 /// - That timestamp was already strictly in the past when observed,
97 97 /// meaning that later changes cannot happen in the same clock tick
98 98 /// and must cause a different modification time (unless the system
99 99 /// clock jumps back and we get unlucky, which is not impossible but
100 100 /// but deemed unlikely enough).
101 101 /// - All direct children of this directory (as returned by
102 102 /// `std::fs::read_dir`) either have a corresponding dirstate node, or
103 103 /// are ignored by ignore patterns whose hash is in
104 104 /// `Header::ignore_patterns_hash`.
105 105 ///
106 106 /// This means that if `std::fs::symlink_metadata` later reports the
107 107 /// same modification time and ignored patterns haven’t changed, a run
108 108 /// of status that is not listing ignored files can skip calling
109 109 /// `std::fs::read_dir` again for this directory, iterate child
110 110 /// dirstate nodes instead.
111 111 state: u8,
112 112 data: Entry,
113 113 }
114 114
115 115 #[derive(BytesCast, Copy, Clone)]
116 116 #[repr(C)]
117 117 struct Entry {
118 118 mode: I32Be,
119 119 mtime: I32Be,
120 120 size: I32Be,
121 121 }
122 122
123 123 /// Duration since the Unix epoch
124 124 #[derive(BytesCast, Copy, Clone, PartialEq)]
125 125 #[repr(C)]
126 126 pub(super) struct Timestamp {
127 127 seconds: I64Be,
128 128
129 129 /// In `0 .. 1_000_000_000`.
130 130 ///
131 131 /// This timestamp is later or earlier than `(seconds, 0)` by this many
132 132 /// nanoseconds, if `seconds` is non-negative or negative, respectively.
133 133 nanoseconds: U32Be,
134 134 }
135 135
136 136 /// Counted in bytes from the start of the file
137 137 ///
138 /// NOTE: If we decide to never support `.hg/dirstate` files larger than 4 GiB
139 /// we could save space by using `U32Be` instead.
140 type Offset = U64Be;
138 /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
139 type Offset = U32Be;
141 140
142 141 /// Counted in number of items
143 142 ///
144 143 /// NOTE: not supporting directories with more than 4 billion direct children,
145 144 /// or filenames more than 4 GiB.
146 145 type Size = U32Be;
147 146
148 147 /// Location of consecutive, fixed-size items.
149 148 ///
150 149 /// An item can be a single byte for paths, or a struct with
151 150 /// `derive(BytesCast)`.
152 151 #[derive(BytesCast, Copy, Clone)]
153 152 #[repr(C)]
154 153 struct Slice {
155 154 start: Offset,
156 155 len: Size,
157 156 }
158 157
159 158 /// A contiguous sequence of `len` times `Node`, representing the child nodes
160 159 /// of either some other node or of the repository root.
161 160 ///
162 161 /// Always sorted by ascending `full_path`, to allow binary search.
163 162 /// Since nodes with the same parent nodes also have the same parent path,
164 163 /// only the `base_name`s need to be compared during binary search.
165 164 type ChildNodes = Slice;
166 165
167 166 /// A `HgPath` of `len` bytes
168 167 type PathSlice = Slice;
169 168
170 169 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
171 170 type OptPathSlice = Slice;
172 171
173 172 /// Make sure that size-affecting changes are made knowingly
174 173 fn _static_assert_size_of() {
175 let _ = std::mem::transmute::<Header, [u8; 92]>;
176 let _ = std::mem::transmute::<Node, [u8; 57]>;
174 let _ = std::mem::transmute::<Header, [u8; 88]>;
175 let _ = std::mem::transmute::<Node, [u8; 45]>;
177 176 }
178 177
179 178 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
180 179 ///
181 180 /// This should only happen if Mercurial is buggy or a repository is corrupted.
182 181 #[derive(Debug)]
183 182 pub struct DirstateV2ParseError;
184 183
185 184 impl From<DirstateV2ParseError> for HgError {
186 185 fn from(_: DirstateV2ParseError) -> Self {
187 186 HgError::corrupted("dirstate-v2 parse error")
188 187 }
189 188 }
190 189
191 190 impl From<DirstateV2ParseError> for crate::DirstateError {
192 191 fn from(error: DirstateV2ParseError) -> Self {
193 192 HgError::from(error).into()
194 193 }
195 194 }
196 195
197 196 fn read_header(on_disk: &[u8]) -> Result<&Header, DirstateV2ParseError> {
198 197 let (header, _) =
199 198 Header::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
200 199 if header.marker == *V2_FORMAT_MARKER {
201 200 Ok(header)
202 201 } else {
203 202 Err(DirstateV2ParseError)
204 203 }
205 204 }
206 205
207 206 pub(super) fn read<'on_disk>(
208 207 on_disk: &'on_disk [u8],
209 208 ) -> Result<
210 209 (DirstateMap<'on_disk>, Option<DirstateParents>),
211 210 DirstateV2ParseError,
212 211 > {
213 212 if on_disk.is_empty() {
214 213 return Ok((DirstateMap::empty(on_disk), None));
215 214 }
216 215 let header = read_header(on_disk)?;
217 216 let dirstate_map = DirstateMap {
218 217 on_disk,
219 218 root: dirstate_map::ChildNodes::OnDisk(read_slice::<Node>(
220 219 on_disk,
221 220 header.root,
222 221 )?),
223 222 nodes_with_entry_count: header.nodes_with_entry_count.get(),
224 223 nodes_with_copy_source_count: header
225 224 .nodes_with_copy_source_count
226 225 .get(),
227 226 ignore_patterns_hash: header.ignore_patterns_hash,
228 227 };
229 228 let parents = Some(header.parents.clone());
230 229 Ok((dirstate_map, parents))
231 230 }
232 231
233 232 impl Node {
234 233 pub(super) fn full_path<'on_disk>(
235 234 &self,
236 235 on_disk: &'on_disk [u8],
237 236 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
238 237 read_hg_path(on_disk, self.full_path)
239 238 }
240 239
241 240 pub(super) fn base_name_start<'on_disk>(
242 241 &self,
243 242 ) -> Result<usize, DirstateV2ParseError> {
244 243 let start = self.base_name_start.get();
245 244 if start < self.full_path.len.get() {
246 245 let start = usize::try_from(start)
247 246 // u32 -> usize, could only panic on a 16-bit CPU
248 247 .expect("dirstate-v2 base_name_start out of bounds");
249 248 Ok(start)
250 249 } else {
251 250 Err(DirstateV2ParseError)
252 251 }
253 252 }
254 253
255 254 pub(super) fn base_name<'on_disk>(
256 255 &self,
257 256 on_disk: &'on_disk [u8],
258 257 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
259 258 let full_path = self.full_path(on_disk)?;
260 259 let base_name_start = self.base_name_start()?;
261 260 Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
262 261 }
263 262
264 263 pub(super) fn path<'on_disk>(
265 264 &self,
266 265 on_disk: &'on_disk [u8],
267 266 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
268 267 Ok(WithBasename::from_raw_parts(
269 268 Cow::Borrowed(self.full_path(on_disk)?),
270 269 self.base_name_start()?,
271 270 ))
272 271 }
273 272
274 273 pub(super) fn has_copy_source<'on_disk>(&self) -> bool {
275 274 self.copy_source.start.get() != 0
276 275 }
277 276
278 277 pub(super) fn copy_source<'on_disk>(
279 278 &self,
280 279 on_disk: &'on_disk [u8],
281 280 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
282 281 Ok(if self.has_copy_source() {
283 282 Some(read_hg_path(on_disk, self.copy_source)?)
284 283 } else {
285 284 None
286 285 })
287 286 }
288 287
289 288 pub(super) fn node_data(
290 289 &self,
291 290 ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
292 291 let entry = |state| {
293 292 dirstate_map::NodeData::Entry(self.entry_with_given_state(state))
294 293 };
295 294
296 295 match self.state {
297 296 b'\0' => Ok(dirstate_map::NodeData::None),
298 297 b'd' => Ok(dirstate_map::NodeData::CachedDirectory {
299 298 mtime: *self.data.as_timestamp(),
300 299 }),
301 300 b'n' => Ok(entry(EntryState::Normal)),
302 301 b'a' => Ok(entry(EntryState::Added)),
303 302 b'r' => Ok(entry(EntryState::Removed)),
304 303 b'm' => Ok(entry(EntryState::Merged)),
305 304 _ => Err(DirstateV2ParseError),
306 305 }
307 306 }
308 307
309 308 pub(super) fn cached_directory_mtime(&self) -> Option<&Timestamp> {
310 309 if self.state == b'd' {
311 310 Some(self.data.as_timestamp())
312 311 } else {
313 312 None
314 313 }
315 314 }
316 315
317 316 pub(super) fn state(
318 317 &self,
319 318 ) -> Result<Option<EntryState>, DirstateV2ParseError> {
320 319 match self.state {
321 320 b'\0' | b'd' => Ok(None),
322 321 b'n' => Ok(Some(EntryState::Normal)),
323 322 b'a' => Ok(Some(EntryState::Added)),
324 323 b'r' => Ok(Some(EntryState::Removed)),
325 324 b'm' => Ok(Some(EntryState::Merged)),
326 325 _ => Err(DirstateV2ParseError),
327 326 }
328 327 }
329 328
330 329 fn entry_with_given_state(&self, state: EntryState) -> DirstateEntry {
331 330 DirstateEntry {
332 331 state,
333 332 mode: self.data.mode.get(),
334 333 mtime: self.data.mtime.get(),
335 334 size: self.data.size.get(),
336 335 }
337 336 }
338 337
339 338 pub(super) fn entry(
340 339 &self,
341 340 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
342 341 Ok(self
343 342 .state()?
344 343 .map(|state| self.entry_with_given_state(state)))
345 344 }
346 345
347 346 pub(super) fn children<'on_disk>(
348 347 &self,
349 348 on_disk: &'on_disk [u8],
350 349 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
351 350 read_slice::<Node>(on_disk, self.children)
352 351 }
353 352
354 353 pub(super) fn to_in_memory_node<'on_disk>(
355 354 &self,
356 355 on_disk: &'on_disk [u8],
357 356 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
358 357 Ok(dirstate_map::Node {
359 358 children: dirstate_map::ChildNodes::OnDisk(
360 359 self.children(on_disk)?,
361 360 ),
362 361 copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
363 362 data: self.node_data()?,
364 363 tracked_descendants_count: self.tracked_descendants_count.get(),
365 364 })
366 365 }
367 366 }
368 367
369 368 impl Entry {
370 369 fn from_timestamp(timestamp: Timestamp) -> Self {
371 370 // Safety: both types implement the `ByteCast` trait, so we could
372 371 // safely use `as_bytes` and `from_bytes` to do this conversion. Using
373 372 // `transmute` instead makes the compiler check that the two types
374 373 // have the same size, which eliminates the error case of
375 374 // `from_bytes`.
376 375 unsafe { std::mem::transmute::<Timestamp, Entry>(timestamp) }
377 376 }
378 377
379 378 fn as_timestamp(&self) -> &Timestamp {
380 379 // Safety: same as above in `from_timestamp`
381 380 unsafe { &*(self as *const Entry as *const Timestamp) }
382 381 }
383 382 }
384 383
385 384 impl Timestamp {
386 385 pub fn seconds(&self) -> i64 {
387 386 self.seconds.get()
388 387 }
389 388 }
390 389
391 390 impl From<SystemTime> for Timestamp {
392 391 fn from(system_time: SystemTime) -> Self {
393 392 let (secs, nanos) = match system_time.duration_since(UNIX_EPOCH) {
394 393 Ok(duration) => {
395 394 (duration.as_secs() as i64, duration.subsec_nanos())
396 395 }
397 396 Err(error) => {
398 397 let negative = error.duration();
399 398 (-(negative.as_secs() as i64), negative.subsec_nanos())
400 399 }
401 400 };
402 401 Timestamp {
403 402 seconds: secs.into(),
404 403 nanoseconds: nanos.into(),
405 404 }
406 405 }
407 406 }
408 407
409 408 impl From<&'_ Timestamp> for SystemTime {
410 409 fn from(timestamp: &'_ Timestamp) -> Self {
411 410 let secs = timestamp.seconds.get();
412 411 let nanos = timestamp.nanoseconds.get();
413 412 if secs >= 0 {
414 413 UNIX_EPOCH + Duration::new(secs as u64, nanos)
415 414 } else {
416 415 UNIX_EPOCH - Duration::new((-secs) as u64, nanos)
417 416 }
418 417 }
419 418 }
420 419
421 420 fn read_hg_path(
422 421 on_disk: &[u8],
423 422 slice: Slice,
424 423 ) -> Result<&HgPath, DirstateV2ParseError> {
425 424 let bytes = read_slice::<u8>(on_disk, slice)?;
426 425 Ok(HgPath::new(bytes))
427 426 }
428 427
429 428 fn read_slice<T>(
430 429 on_disk: &[u8],
431 430 slice: Slice,
432 431 ) -> Result<&[T], DirstateV2ParseError>
433 432 where
434 433 T: BytesCast,
435 434 {
436 435 // Either `usize::MAX` would result in "out of bounds" error since a single
437 436 // `&[u8]` cannot occupy the entire addess space.
438 437 let start = usize::try_from(slice.start.get()).unwrap_or(std::usize::MAX);
439 438 let len = usize::try_from(slice.len.get()).unwrap_or(std::usize::MAX);
440 439 on_disk
441 440 .get(start..)
442 441 .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
443 442 .map(|(slice, _rest)| slice)
444 443 .ok_or_else(|| DirstateV2ParseError)
445 444 }
446 445
447 446 pub(crate) fn parse_dirstate_parents(
448 447 on_disk: &[u8],
449 448 ) -> Result<&DirstateParents, HgError> {
450 449 Ok(&read_header(on_disk)?.parents)
451 450 }
452 451
453 452 pub(crate) fn for_each_tracked_path<'on_disk>(
454 453 on_disk: &'on_disk [u8],
455 454 mut f: impl FnMut(&'on_disk HgPath),
456 455 ) -> Result<(), DirstateV2ParseError> {
457 456 let header = read_header(on_disk)?;
458 457 fn recur<'on_disk>(
459 458 on_disk: &'on_disk [u8],
460 459 nodes: Slice,
461 460 f: &mut impl FnMut(&'on_disk HgPath),
462 461 ) -> Result<(), DirstateV2ParseError> {
463 462 for node in read_slice::<Node>(on_disk, nodes)? {
464 463 if let Some(state) = node.state()? {
465 464 if state.is_tracked() {
466 465 f(node.full_path(on_disk)?)
467 466 }
468 467 }
469 468 recur(on_disk, node.children, f)?
470 469 }
471 470 Ok(())
472 471 }
473 472 recur(on_disk, header.root, &mut f)
474 473 }
475 474
476 475 pub(super) fn write(
477 476 dirstate_map: &mut DirstateMap,
478 477 parents: DirstateParents,
479 478 ) -> Result<Vec<u8>, DirstateError> {
480 479 let header_len = std::mem::size_of::<Header>();
481 480
482 481 // This ignores the space for paths, and for nodes without an entry.
483 482 // TODO: better estimate? Skip the `Vec` and write to a file directly?
484 483 let size_guess = header_len
485 484 + std::mem::size_of::<Node>()
486 485 * dirstate_map.nodes_with_entry_count as usize;
487 486 let mut out = Vec::with_capacity(size_guess);
488 487
489 488 // Keep space for the header. We’ll fill it out at the end when we know the
490 489 // actual offset for the root nodes.
491 490 out.resize(header_len, 0_u8);
492 491
493 492 let root =
494 493 write_nodes(dirstate_map, dirstate_map.root.as_ref(), &mut out)?;
495 494
496 495 let header = Header {
497 496 marker: *V2_FORMAT_MARKER,
498 497 parents: parents,
499 498 root,
500 499 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
501 500 nodes_with_copy_source_count: dirstate_map
502 501 .nodes_with_copy_source_count
503 502 .into(),
504 503 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
505 504 };
506 505 out[..header_len].copy_from_slice(header.as_bytes());
507 506 Ok(out)
508 507 }
509 508
510 509 fn write_nodes(
511 510 dirstate_map: &DirstateMap,
512 511 nodes: dirstate_map::ChildNodesRef,
513 512 out: &mut Vec<u8>,
514 513 ) -> Result<ChildNodes, DirstateError> {
515 514 // `dirstate_map::ChildNodes` is a `HashMap` with undefined iteration
516 515 // order. Sort to enable binary search in the written file.
517 516 let nodes = nodes.sorted();
518 517
519 518 // First accumulate serialized nodes in a `Vec`
520 519 let mut on_disk_nodes = Vec::with_capacity(nodes.len());
521 520 for node in nodes {
522 521 let children = write_nodes(
523 522 dirstate_map,
524 523 node.children(dirstate_map.on_disk)?,
525 524 out,
526 525 )?;
527 526 let full_path = node.full_path(dirstate_map.on_disk)?;
528 527 let full_path = write_slice::<u8>(full_path.as_bytes(), out);
529 528 let copy_source =
530 529 if let Some(source) = node.copy_source(dirstate_map.on_disk)? {
531 530 write_slice::<u8>(source.as_bytes(), out)
532 531 } else {
533 532 Slice {
534 533 start: 0.into(),
535 534 len: 0.into(),
536 535 }
537 536 };
538 537 on_disk_nodes.push(match node {
539 538 NodeRef::InMemory(path, node) => {
540 539 let (state, data) = match &node.data {
541 540 dirstate_map::NodeData::Entry(entry) => (
542 541 entry.state.into(),
543 542 Entry {
544 543 mode: entry.mode.into(),
545 544 mtime: entry.mtime.into(),
546 545 size: entry.size.into(),
547 546 },
548 547 ),
549 548 dirstate_map::NodeData::CachedDirectory { mtime } => {
550 549 (b'd', Entry::from_timestamp(*mtime))
551 550 }
552 551 dirstate_map::NodeData::None => (
553 552 b'\0',
554 553 Entry {
555 554 mode: 0.into(),
556 555 mtime: 0.into(),
557 556 size: 0.into(),
558 557 },
559 558 ),
560 559 };
561 560 Node {
562 561 children,
563 562 copy_source,
564 563 full_path,
565 564 base_name_start: u32::try_from(path.base_name_start())
566 565 // Could only panic for paths over 4 GiB
567 566 .expect("dirstate-v2 offset overflow")
568 567 .into(),
569 568 tracked_descendants_count: node
570 569 .tracked_descendants_count
571 570 .into(),
572 571 state,
573 572 data,
574 573 }
575 574 }
576 575 NodeRef::OnDisk(node) => Node {
577 576 children,
578 577 copy_source,
579 578 full_path,
580 579 ..*node
581 580 },
582 581 })
583 582 }
584 583 // … so we can write them contiguously
585 584 Ok(write_slice::<Node>(&on_disk_nodes, out))
586 585 }
587 586
588 587 fn write_slice<T>(slice: &[T], out: &mut Vec<u8>) -> Slice
589 588 where
590 589 T: BytesCast,
591 590 {
592 let start = u64::try_from(out.len())
593 // Could only panic on a 128-bit CPU with a dirstate over 16 EiB
591 let start = u32::try_from(out.len())
592 // Could only panic for a dirstate file larger than 4 GiB
594 593 .expect("dirstate-v2 offset overflow")
595 594 .into();
596 595 let len = u32::try_from(slice.len())
597 596 // Could only panic for paths over 4 GiB or nodes with over 4 billions
598 597 // child nodes
599 598 .expect("dirstate-v2 offset overflow")
600 599 .into();
601 600 out.extend(slice.as_bytes());
602 601 Slice { start, len }
603 602 }
@@ -1,424 +1,424 b''
1 1 #testcases dirstate-v1 dirstate-v1-tree dirstate-v2
2 2
3 3 #if dirstate-v1-tree
4 4 #require rust
5 5 $ echo '[experimental]' >> $HGRCPATH
6 6 $ echo 'dirstate-tree.in-memory=1' >> $HGRCPATH
7 7 #endif
8 8
9 9 #if dirstate-v2
10 10 #require rust
11 11 $ echo '[format]' >> $HGRCPATH
12 12 $ echo 'exp-dirstate-v2=1' >> $HGRCPATH
13 13 #endif
14 14
15 15 $ hg init ignorerepo
16 16 $ cd ignorerepo
17 17
18 18 debugignore with no hgignore should be deterministic:
19 19 $ hg debugignore
20 20 <nevermatcher>
21 21
22 22 Issue562: .hgignore requires newline at end:
23 23
24 24 $ touch foo
25 25 $ touch bar
26 26 $ touch baz
27 27 $ cat > makeignore.py <<EOF
28 28 > f = open(".hgignore", "w")
29 29 > f.write("ignore\n")
30 30 > f.write("foo\n")
31 31 > # No EOL here
32 32 > f.write("bar")
33 33 > f.close()
34 34 > EOF
35 35
36 36 $ "$PYTHON" makeignore.py
37 37
38 38 Should display baz only:
39 39
40 40 $ hg status
41 41 ? baz
42 42
43 43 $ rm foo bar baz .hgignore makeignore.py
44 44
45 45 $ touch a.o
46 46 $ touch a.c
47 47 $ touch syntax
48 48 $ mkdir dir
49 49 $ touch dir/a.o
50 50 $ touch dir/b.o
51 51 $ touch dir/c.o
52 52
53 53 $ hg add dir/a.o
54 54 $ hg commit -m 0
55 55 $ hg add dir/b.o
56 56
57 57 $ hg status
58 58 A dir/b.o
59 59 ? a.c
60 60 ? a.o
61 61 ? dir/c.o
62 62 ? syntax
63 63
64 64 $ echo "*.o" > .hgignore
65 65 $ hg status
66 66 abort: $TESTTMP/ignorerepo/.hgignore: invalid pattern (relre): *.o (glob)
67 67 [255]
68 68
69 69 Ensure given files are relative to cwd
70 70
71 71 $ echo "dir/.*\.o" > .hgignore
72 72 $ hg status -i
73 73 I dir/c.o
74 74
75 75 $ hg debugignore dir/c.o dir/missing.o
76 76 dir/c.o is ignored
77 77 (ignore rule in $TESTTMP/ignorerepo/.hgignore, line 1: 'dir/.*\.o') (glob)
78 78 dir/missing.o is ignored
79 79 (ignore rule in $TESTTMP/ignorerepo/.hgignore, line 1: 'dir/.*\.o') (glob)
80 80 $ cd dir
81 81 $ hg debugignore c.o missing.o
82 82 c.o is ignored
83 83 (ignore rule in $TESTTMP/ignorerepo/.hgignore, line 1: 'dir/.*\.o') (glob)
84 84 missing.o is ignored
85 85 (ignore rule in $TESTTMP/ignorerepo/.hgignore, line 1: 'dir/.*\.o') (glob)
86 86
87 87 For icasefs, inexact matches also work, except for missing files
88 88
89 89 #if icasefs
90 90 $ hg debugignore c.O missing.O
91 91 c.o is ignored
92 92 (ignore rule in $TESTTMP/ignorerepo/.hgignore, line 1: 'dir/.*\.o') (glob)
93 93 missing.O is not ignored
94 94 #endif
95 95
96 96 $ cd ..
97 97
98 98 $ echo ".*\.o" > .hgignore
99 99 $ hg status
100 100 A dir/b.o
101 101 ? .hgignore
102 102 ? a.c
103 103 ? syntax
104 104
105 105 Ensure that comments work:
106 106
107 107 $ touch 'foo#bar' 'quux#' 'quu0#'
108 108 #if no-windows
109 109 $ touch 'baz\' 'baz\wat' 'ba0\#wat' 'ba1\\' 'ba1\\wat' 'quu0\'
110 110 #endif
111 111
112 112 $ cat <<'EOF' >> .hgignore
113 113 > # full-line comment
114 114 > # whitespace-only comment line
115 115 > syntax# pattern, no whitespace, then comment
116 116 > a.c # pattern, then whitespace, then comment
117 117 > baz\\# # (escaped) backslash, then comment
118 118 > ba0\\\#w # (escaped) backslash, escaped comment character, then comment
119 119 > ba1\\\\# # (escaped) backslashes, then comment
120 120 > foo\#b # escaped comment character
121 121 > quux\## escaped comment character at end of name
122 122 > EOF
123 123 $ hg status
124 124 A dir/b.o
125 125 ? .hgignore
126 126 ? quu0#
127 127 ? quu0\ (no-windows !)
128 128
129 129 $ cat <<'EOF' > .hgignore
130 130 > .*\.o
131 131 > syntax: glob
132 132 > syntax# pattern, no whitespace, then comment
133 133 > a.c # pattern, then whitespace, then comment
134 134 > baz\\#* # (escaped) backslash, then comment
135 135 > ba0\\\#w* # (escaped) backslash, escaped comment character, then comment
136 136 > ba1\\\\#* # (escaped) backslashes, then comment
137 137 > foo\#b* # escaped comment character
138 138 > quux\## escaped comment character at end of name
139 139 > quu0[\#]# escaped comment character inside [...]
140 140 > EOF
141 141 $ hg status
142 142 A dir/b.o
143 143 ? .hgignore
144 144 ? ba1\\wat (no-windows !)
145 145 ? baz\wat (no-windows !)
146 146 ? quu0\ (no-windows !)
147 147
148 148 $ rm 'foo#bar' 'quux#' 'quu0#'
149 149 #if no-windows
150 150 $ rm 'baz\' 'baz\wat' 'ba0\#wat' 'ba1\\' 'ba1\\wat' 'quu0\'
151 151 #endif
152 152
153 153 Check that '^\.' does not ignore the root directory:
154 154
155 155 $ echo "^\." > .hgignore
156 156 $ hg status
157 157 A dir/b.o
158 158 ? a.c
159 159 ? a.o
160 160 ? dir/c.o
161 161 ? syntax
162 162
163 163 Test that patterns from ui.ignore options are read:
164 164
165 165 $ echo > .hgignore
166 166 $ cat >> $HGRCPATH << EOF
167 167 > [ui]
168 168 > ignore.other = $TESTTMP/ignorerepo/.hg/testhgignore
169 169 > EOF
170 170 $ echo "glob:**.o" > .hg/testhgignore
171 171 $ hg status
172 172 A dir/b.o
173 173 ? .hgignore
174 174 ? a.c
175 175 ? syntax
176 176
177 177 empty out testhgignore
178 178 $ echo > .hg/testhgignore
179 179
180 180 Test relative ignore path (issue4473):
181 181
182 182 $ cat >> $HGRCPATH << EOF
183 183 > [ui]
184 184 > ignore.relative = .hg/testhgignorerel
185 185 > EOF
186 186 $ echo "glob:*.o" > .hg/testhgignorerel
187 187 $ cd dir
188 188 $ hg status
189 189 A dir/b.o
190 190 ? .hgignore
191 191 ? a.c
192 192 ? syntax
193 193 $ hg debugignore
194 194 <includematcher includes='.*\\.o(?:/|$)'>
195 195
196 196 $ cd ..
197 197 $ echo > .hg/testhgignorerel
198 198 $ echo "syntax: glob" > .hgignore
199 199 $ echo "re:.*\.o" >> .hgignore
200 200 $ hg status
201 201 A dir/b.o
202 202 ? .hgignore
203 203 ? a.c
204 204 ? syntax
205 205
206 206 $ echo "syntax: invalid" > .hgignore
207 207 $ hg status
208 208 $TESTTMP/ignorerepo/.hgignore: ignoring invalid syntax 'invalid'
209 209 A dir/b.o
210 210 ? .hgignore
211 211 ? a.c
212 212 ? a.o
213 213 ? dir/c.o
214 214 ? syntax
215 215
216 216 $ echo "syntax: glob" > .hgignore
217 217 $ echo "*.o" >> .hgignore
218 218 $ hg status
219 219 A dir/b.o
220 220 ? .hgignore
221 221 ? a.c
222 222 ? syntax
223 223
224 224 $ echo "relglob:syntax*" > .hgignore
225 225 $ hg status
226 226 A dir/b.o
227 227 ? .hgignore
228 228 ? a.c
229 229 ? a.o
230 230 ? dir/c.o
231 231
232 232 $ echo "relglob:*" > .hgignore
233 233 $ hg status
234 234 A dir/b.o
235 235
236 236 $ cd dir
237 237 $ hg status .
238 238 A b.o
239 239
240 240 $ hg debugignore
241 241 <includematcher includes='.*(?:/|$)'>
242 242
243 243 $ hg debugignore b.o
244 244 b.o is ignored
245 245 (ignore rule in $TESTTMP/ignorerepo/.hgignore, line 1: '*') (glob)
246 246
247 247 $ cd ..
248 248
249 249 Check patterns that match only the directory
250 250
251 251 "(fsmonitor !)" below assumes that fsmonitor is enabled with
252 252 "walk_on_invalidate = false" (default), which doesn't involve
253 253 re-walking whole repository at detection of .hgignore change.
254 254
255 255 $ echo "^dir\$" > .hgignore
256 256 $ hg status
257 257 A dir/b.o
258 258 ? .hgignore
259 259 ? a.c
260 260 ? a.o
261 261 ? dir/c.o (fsmonitor !)
262 262 ? syntax
263 263
264 264 Check recursive glob pattern matches no directories (dir/**/c.o matches dir/c.o)
265 265
266 266 $ echo "syntax: glob" > .hgignore
267 267 $ echo "dir/**/c.o" >> .hgignore
268 268 $ touch dir/c.o
269 269 $ mkdir dir/subdir
270 270 $ touch dir/subdir/c.o
271 271 $ hg status
272 272 A dir/b.o
273 273 ? .hgignore
274 274 ? a.c
275 275 ? a.o
276 276 ? syntax
277 277 $ hg debugignore a.c
278 278 a.c is not ignored
279 279 $ hg debugignore dir/c.o
280 280 dir/c.o is ignored
281 281 (ignore rule in $TESTTMP/ignorerepo/.hgignore, line 2: 'dir/**/c.o') (glob)
282 282
283 283 Check rooted globs
284 284
285 285 $ hg purge --all --config extensions.purge=
286 286 $ echo "syntax: rootglob" > .hgignore
287 287 $ echo "a/*.ext" >> .hgignore
288 288 $ for p in a b/a aa; do mkdir -p $p; touch $p/b.ext; done
289 289 $ hg status -A 'set:**.ext'
290 290 ? aa/b.ext
291 291 ? b/a/b.ext
292 292 I a/b.ext
293 293
294 294 Check using 'include:' in ignore file
295 295
296 296 $ hg purge --all --config extensions.purge=
297 297 $ touch foo.included
298 298
299 299 $ echo ".*.included" > otherignore
300 300 $ hg status -I "include:otherignore"
301 301 ? foo.included
302 302
303 303 $ echo "include:otherignore" >> .hgignore
304 304 $ hg status
305 305 A dir/b.o
306 306 ? .hgignore
307 307 ? otherignore
308 308
309 309 Check recursive uses of 'include:'
310 310
311 311 $ echo "include:nested/ignore" >> otherignore
312 312 $ mkdir nested nested/more
313 313 $ echo "glob:*ignore" > nested/ignore
314 314 $ echo "rootglob:a" >> nested/ignore
315 315 $ touch a nested/a nested/more/a
316 316 $ hg status
317 317 A dir/b.o
318 318 ? nested/a
319 319 ? nested/more/a
320 320 $ rm a nested/a nested/more/a
321 321
322 322 $ cp otherignore goodignore
323 323 $ echo "include:badignore" >> otherignore
324 324 $ hg status
325 325 skipping unreadable pattern file 'badignore': $ENOENT$
326 326 A dir/b.o
327 327
328 328 $ mv goodignore otherignore
329 329
330 330 Check using 'include:' while in a non-root directory
331 331
332 332 $ cd ..
333 333 $ hg -R ignorerepo status
334 334 A dir/b.o
335 335 $ cd ignorerepo
336 336
337 337 Check including subincludes
338 338
339 339 $ hg revert -q --all
340 340 $ hg purge --all --config extensions.purge=
341 341 $ echo ".hgignore" > .hgignore
342 342 $ mkdir dir1 dir2
343 343 $ touch dir1/file1 dir1/file2 dir2/file1 dir2/file2
344 344 $ echo "subinclude:dir2/.hgignore" >> .hgignore
345 345 $ echo "glob:file*2" > dir2/.hgignore
346 346 $ hg status
347 347 ? dir1/file1
348 348 ? dir1/file2
349 349 ? dir2/file1
350 350
351 351 Check including subincludes with other patterns
352 352
353 353 $ echo "subinclude:dir1/.hgignore" >> .hgignore
354 354
355 355 $ mkdir dir1/subdir
356 356 $ touch dir1/subdir/file1
357 357 $ echo "rootglob:f?le1" > dir1/.hgignore
358 358 $ hg status
359 359 ? dir1/file2
360 360 ? dir1/subdir/file1
361 361 ? dir2/file1
362 362 $ rm dir1/subdir/file1
363 363
364 364 $ echo "regexp:f.le1" > dir1/.hgignore
365 365 $ hg status
366 366 ? dir1/file2
367 367 ? dir2/file1
368 368
369 369 Check multiple levels of sub-ignores
370 370
371 371 $ touch dir1/subdir/subfile1 dir1/subdir/subfile3 dir1/subdir/subfile4
372 372 $ echo "subinclude:subdir/.hgignore" >> dir1/.hgignore
373 373 $ echo "glob:subfil*3" >> dir1/subdir/.hgignore
374 374
375 375 $ hg status
376 376 ? dir1/file2
377 377 ? dir1/subdir/subfile4
378 378 ? dir2/file1
379 379
380 380 Check include subignore at the same level
381 381
382 382 $ mv dir1/subdir/.hgignore dir1/.hgignoretwo
383 383 $ echo "regexp:f.le1" > dir1/.hgignore
384 384 $ echo "subinclude:.hgignoretwo" >> dir1/.hgignore
385 385 $ echo "glob:file*2" > dir1/.hgignoretwo
386 386
387 387 $ hg status | grep file2
388 388 [1]
389 389 $ hg debugignore dir1/file2
390 390 dir1/file2 is ignored
391 391 (ignore rule in dir2/.hgignore, line 1: 'file*2')
392 392
393 393 #if windows
394 394
395 395 Windows paths are accepted on input
396 396
397 397 $ rm dir1/.hgignore
398 398 $ echo "dir1/file*" >> .hgignore
399 399 $ hg debugignore "dir1\file2"
400 400 dir1/file2 is ignored
401 401 (ignore rule in $TESTTMP\ignorerepo\.hgignore, line 4: 'dir1/file*')
402 402 $ hg up -qC .
403 403
404 404 #endif
405 405
406 406 #if dirstate-v2
407 407
408 408 Check the hash of ignore patterns written in the dirstate at offset
409 12 + 20 + 20 + 8 + 4 + 4 + 4 = 72
409 12 + 20 + 20 + 4 + 4 + 4 + 4 = 68
410 410
411 411 $ hg status > /dev/null
412 412 $ cat .hg/testhgignore .hg/testhgignorerel .hgignore dir2/.hgignore dir1/.hgignore dir1/.hgignoretwo | $TESTDIR/f --sha1
413 413 sha1=6e315b60f15fb5dfa02be00f3e2c8f923051f5ff
414 >>> import binascii; print(binascii.hexlify(open(".hg/dirstate", "rb").read()[72:][:20]).decode())
414 >>> import binascii; print(binascii.hexlify(open(".hg/dirstate", "rb").read()[68:][:20]).decode())
415 415 6e315b60f15fb5dfa02be00f3e2c8f923051f5ff
416 416
417 417 $ echo rel > .hg/testhgignorerel
418 418 $ hg status > /dev/null
419 419 $ cat .hg/testhgignore .hg/testhgignorerel .hgignore dir2/.hgignore dir1/.hgignore dir1/.hgignoretwo | $TESTDIR/f --sha1
420 420 sha1=dea19cc7119213f24b6b582a4bae7b0cb063e34e
421 >>> import binascii; print(binascii.hexlify(open(".hg/dirstate", "rb").read()[72:][:20]).decode())
421 >>> import binascii; print(binascii.hexlify(open(".hg/dirstate", "rb").read()[68:][:20]).decode())
422 422 dea19cc7119213f24b6b582a4bae7b0cb063e34e
423 423
424 424 #endif
General Comments 0
You need to be logged in to leave comments. Login now