##// END OF EJS Templates
rhg: Add Repo::write_dirstate...
Simon Sapin -
r49249:2097f635 default
parent child Browse files
Show More
@@ -1,1139 +1,1139 b''
1 1 use bytes_cast::BytesCast;
2 2 use micro_timer::timed;
3 3 use std::borrow::Cow;
4 4 use std::path::PathBuf;
5 5
6 6 use super::on_disk;
7 7 use super::on_disk::DirstateV2ParseError;
8 8 use super::owning::OwningDirstateMap;
9 9 use super::path_with_basename::WithBasename;
10 10 use crate::dirstate::parsers::pack_entry;
11 11 use crate::dirstate::parsers::packed_entry_size;
12 12 use crate::dirstate::parsers::parse_dirstate_entries;
13 13 use crate::dirstate::CopyMapIter;
14 14 use crate::dirstate::StateMapIter;
15 15 use crate::dirstate::TruncatedTimestamp;
16 16 use crate::dirstate::SIZE_FROM_OTHER_PARENT;
17 17 use crate::dirstate::SIZE_NON_NORMAL;
18 18 use crate::matchers::Matcher;
19 19 use crate::utils::hg_path::{HgPath, HgPathBuf};
20 20 use crate::DirstateEntry;
21 21 use crate::DirstateError;
22 22 use crate::DirstateParents;
23 23 use crate::DirstateStatus;
24 24 use crate::EntryState;
25 25 use crate::FastHashMap;
26 26 use crate::PatternFileWarning;
27 27 use crate::StatusError;
28 28 use crate::StatusOptions;
29 29
30 30 /// Append to an existing data file if the amount of unreachable data (not used
31 31 /// anymore) is less than this fraction of the total amount of existing data.
32 32 const ACCEPTABLE_UNREACHABLE_BYTES_RATIO: f32 = 0.5;
33 33
34 34 pub struct DirstateMap<'on_disk> {
35 35 /// Contents of the `.hg/dirstate` file
36 36 pub(super) on_disk: &'on_disk [u8],
37 37
38 38 pub(super) root: ChildNodes<'on_disk>,
39 39
40 40 /// Number of nodes anywhere in the tree that have `.entry.is_some()`.
41 41 pub(super) nodes_with_entry_count: u32,
42 42
43 43 /// Number of nodes anywhere in the tree that have
44 44 /// `.copy_source.is_some()`.
45 45 pub(super) nodes_with_copy_source_count: u32,
46 46
47 47 /// See on_disk::Header
48 48 pub(super) ignore_patterns_hash: on_disk::IgnorePatternsHash,
49 49
50 50 /// How many bytes of `on_disk` are not used anymore
51 51 pub(super) unreachable_bytes: u32,
52 52 }
53 53
54 54 /// Using a plain `HgPathBuf` of the full path from the repository root as a
55 55 /// map key would also work: all paths in a given map have the same parent
56 56 /// path, so comparing full paths gives the same result as comparing base
57 57 /// names. However `HashMap` would waste time always re-hashing the same
58 58 /// string prefix.
59 59 pub(super) type NodeKey<'on_disk> = WithBasename<Cow<'on_disk, HgPath>>;
60 60
61 61 /// Similar to `&'tree Cow<'on_disk, HgPath>`, but can also be returned
62 62 /// for on-disk nodes that don’t actually have a `Cow` to borrow.
63 63 pub(super) enum BorrowedPath<'tree, 'on_disk> {
64 64 InMemory(&'tree HgPathBuf),
65 65 OnDisk(&'on_disk HgPath),
66 66 }
67 67
68 68 pub(super) enum ChildNodes<'on_disk> {
69 69 InMemory(FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
70 70 OnDisk(&'on_disk [on_disk::Node]),
71 71 }
72 72
73 73 pub(super) enum ChildNodesRef<'tree, 'on_disk> {
74 74 InMemory(&'tree FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
75 75 OnDisk(&'on_disk [on_disk::Node]),
76 76 }
77 77
78 78 pub(super) enum NodeRef<'tree, 'on_disk> {
79 79 InMemory(&'tree NodeKey<'on_disk>, &'tree Node<'on_disk>),
80 80 OnDisk(&'on_disk on_disk::Node),
81 81 }
82 82
83 83 impl<'tree, 'on_disk> BorrowedPath<'tree, 'on_disk> {
84 84 pub fn detach_from_tree(&self) -> Cow<'on_disk, HgPath> {
85 85 match *self {
86 86 BorrowedPath::InMemory(in_memory) => Cow::Owned(in_memory.clone()),
87 87 BorrowedPath::OnDisk(on_disk) => Cow::Borrowed(on_disk),
88 88 }
89 89 }
90 90 }
91 91
92 92 impl<'tree, 'on_disk> std::ops::Deref for BorrowedPath<'tree, 'on_disk> {
93 93 type Target = HgPath;
94 94
95 95 fn deref(&self) -> &HgPath {
96 96 match *self {
97 97 BorrowedPath::InMemory(in_memory) => in_memory,
98 98 BorrowedPath::OnDisk(on_disk) => on_disk,
99 99 }
100 100 }
101 101 }
102 102
103 103 impl Default for ChildNodes<'_> {
104 104 fn default() -> Self {
105 105 ChildNodes::InMemory(Default::default())
106 106 }
107 107 }
108 108
109 109 impl<'on_disk> ChildNodes<'on_disk> {
110 110 pub(super) fn as_ref<'tree>(
111 111 &'tree self,
112 112 ) -> ChildNodesRef<'tree, 'on_disk> {
113 113 match self {
114 114 ChildNodes::InMemory(nodes) => ChildNodesRef::InMemory(nodes),
115 115 ChildNodes::OnDisk(nodes) => ChildNodesRef::OnDisk(nodes),
116 116 }
117 117 }
118 118
119 119 pub(super) fn is_empty(&self) -> bool {
120 120 match self {
121 121 ChildNodes::InMemory(nodes) => nodes.is_empty(),
122 122 ChildNodes::OnDisk(nodes) => nodes.is_empty(),
123 123 }
124 124 }
125 125
126 126 fn make_mut(
127 127 &mut self,
128 128 on_disk: &'on_disk [u8],
129 129 unreachable_bytes: &mut u32,
130 130 ) -> Result<
131 131 &mut FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>,
132 132 DirstateV2ParseError,
133 133 > {
134 134 match self {
135 135 ChildNodes::InMemory(nodes) => Ok(nodes),
136 136 ChildNodes::OnDisk(nodes) => {
137 137 *unreachable_bytes +=
138 138 std::mem::size_of_val::<[on_disk::Node]>(nodes) as u32;
139 139 let nodes = nodes
140 140 .iter()
141 141 .map(|node| {
142 142 Ok((
143 143 node.path(on_disk)?,
144 144 node.to_in_memory_node(on_disk)?,
145 145 ))
146 146 })
147 147 .collect::<Result<_, _>>()?;
148 148 *self = ChildNodes::InMemory(nodes);
149 149 match self {
150 150 ChildNodes::InMemory(nodes) => Ok(nodes),
151 151 ChildNodes::OnDisk(_) => unreachable!(),
152 152 }
153 153 }
154 154 }
155 155 }
156 156 }
157 157
158 158 impl<'tree, 'on_disk> ChildNodesRef<'tree, 'on_disk> {
159 159 pub(super) fn get(
160 160 &self,
161 161 base_name: &HgPath,
162 162 on_disk: &'on_disk [u8],
163 163 ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> {
164 164 match self {
165 165 ChildNodesRef::InMemory(nodes) => Ok(nodes
166 166 .get_key_value(base_name)
167 167 .map(|(k, v)| NodeRef::InMemory(k, v))),
168 168 ChildNodesRef::OnDisk(nodes) => {
169 169 let mut parse_result = Ok(());
170 170 let search_result = nodes.binary_search_by(|node| {
171 171 match node.base_name(on_disk) {
172 172 Ok(node_base_name) => node_base_name.cmp(base_name),
173 173 Err(e) => {
174 174 parse_result = Err(e);
175 175 // Dummy comparison result, `search_result` won’t
176 176 // be used since `parse_result` is an error
177 177 std::cmp::Ordering::Equal
178 178 }
179 179 }
180 180 });
181 181 parse_result.map(|()| {
182 182 search_result.ok().map(|i| NodeRef::OnDisk(&nodes[i]))
183 183 })
184 184 }
185 185 }
186 186 }
187 187
188 188 /// Iterate in undefined order
189 189 pub(super) fn iter(
190 190 &self,
191 191 ) -> impl Iterator<Item = NodeRef<'tree, 'on_disk>> {
192 192 match self {
193 193 ChildNodesRef::InMemory(nodes) => itertools::Either::Left(
194 194 nodes.iter().map(|(k, v)| NodeRef::InMemory(k, v)),
195 195 ),
196 196 ChildNodesRef::OnDisk(nodes) => {
197 197 itertools::Either::Right(nodes.iter().map(NodeRef::OnDisk))
198 198 }
199 199 }
200 200 }
201 201
202 202 /// Iterate in parallel in undefined order
203 203 pub(super) fn par_iter(
204 204 &self,
205 205 ) -> impl rayon::iter::ParallelIterator<Item = NodeRef<'tree, 'on_disk>>
206 206 {
207 207 use rayon::prelude::*;
208 208 match self {
209 209 ChildNodesRef::InMemory(nodes) => rayon::iter::Either::Left(
210 210 nodes.par_iter().map(|(k, v)| NodeRef::InMemory(k, v)),
211 211 ),
212 212 ChildNodesRef::OnDisk(nodes) => rayon::iter::Either::Right(
213 213 nodes.par_iter().map(NodeRef::OnDisk),
214 214 ),
215 215 }
216 216 }
217 217
218 218 pub(super) fn sorted(&self) -> Vec<NodeRef<'tree, 'on_disk>> {
219 219 match self {
220 220 ChildNodesRef::InMemory(nodes) => {
221 221 let mut vec: Vec<_> = nodes
222 222 .iter()
223 223 .map(|(k, v)| NodeRef::InMemory(k, v))
224 224 .collect();
225 225 fn sort_key<'a>(node: &'a NodeRef) -> &'a HgPath {
226 226 match node {
227 227 NodeRef::InMemory(path, _node) => path.base_name(),
228 228 NodeRef::OnDisk(_) => unreachable!(),
229 229 }
230 230 }
231 231 // `sort_unstable_by_key` doesn’t allow keys borrowing from the
232 232 // value: https://github.com/rust-lang/rust/issues/34162
233 233 vec.sort_unstable_by(|a, b| sort_key(a).cmp(sort_key(b)));
234 234 vec
235 235 }
236 236 ChildNodesRef::OnDisk(nodes) => {
237 237 // Nodes on disk are already sorted
238 238 nodes.iter().map(NodeRef::OnDisk).collect()
239 239 }
240 240 }
241 241 }
242 242 }
243 243
244 244 impl<'tree, 'on_disk> NodeRef<'tree, 'on_disk> {
245 245 pub(super) fn full_path(
246 246 &self,
247 247 on_disk: &'on_disk [u8],
248 248 ) -> Result<&'tree HgPath, DirstateV2ParseError> {
249 249 match self {
250 250 NodeRef::InMemory(path, _node) => Ok(path.full_path()),
251 251 NodeRef::OnDisk(node) => node.full_path(on_disk),
252 252 }
253 253 }
254 254
255 255 /// Returns a `BorrowedPath`, which can be turned into a `Cow<'on_disk,
256 256 /// HgPath>` detached from `'tree`
257 257 pub(super) fn full_path_borrowed(
258 258 &self,
259 259 on_disk: &'on_disk [u8],
260 260 ) -> Result<BorrowedPath<'tree, 'on_disk>, DirstateV2ParseError> {
261 261 match self {
262 262 NodeRef::InMemory(path, _node) => match path.full_path() {
263 263 Cow::Borrowed(on_disk) => Ok(BorrowedPath::OnDisk(on_disk)),
264 264 Cow::Owned(in_memory) => Ok(BorrowedPath::InMemory(in_memory)),
265 265 },
266 266 NodeRef::OnDisk(node) => {
267 267 Ok(BorrowedPath::OnDisk(node.full_path(on_disk)?))
268 268 }
269 269 }
270 270 }
271 271
272 272 pub(super) fn base_name(
273 273 &self,
274 274 on_disk: &'on_disk [u8],
275 275 ) -> Result<&'tree HgPath, DirstateV2ParseError> {
276 276 match self {
277 277 NodeRef::InMemory(path, _node) => Ok(path.base_name()),
278 278 NodeRef::OnDisk(node) => node.base_name(on_disk),
279 279 }
280 280 }
281 281
282 282 pub(super) fn children(
283 283 &self,
284 284 on_disk: &'on_disk [u8],
285 285 ) -> Result<ChildNodesRef<'tree, 'on_disk>, DirstateV2ParseError> {
286 286 match self {
287 287 NodeRef::InMemory(_path, node) => Ok(node.children.as_ref()),
288 288 NodeRef::OnDisk(node) => {
289 289 Ok(ChildNodesRef::OnDisk(node.children(on_disk)?))
290 290 }
291 291 }
292 292 }
293 293
294 294 pub(super) fn has_copy_source(&self) -> bool {
295 295 match self {
296 296 NodeRef::InMemory(_path, node) => node.copy_source.is_some(),
297 297 NodeRef::OnDisk(node) => node.has_copy_source(),
298 298 }
299 299 }
300 300
301 301 pub(super) fn copy_source(
302 302 &self,
303 303 on_disk: &'on_disk [u8],
304 304 ) -> Result<Option<&'tree HgPath>, DirstateV2ParseError> {
305 305 match self {
306 306 NodeRef::InMemory(_path, node) => {
307 307 Ok(node.copy_source.as_ref().map(|s| &**s))
308 308 }
309 309 NodeRef::OnDisk(node) => node.copy_source(on_disk),
310 310 }
311 311 }
312 312
313 313 pub(super) fn entry(
314 314 &self,
315 315 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
316 316 match self {
317 317 NodeRef::InMemory(_path, node) => {
318 318 Ok(node.data.as_entry().copied())
319 319 }
320 320 NodeRef::OnDisk(node) => node.entry(),
321 321 }
322 322 }
323 323
324 324 pub(super) fn state(
325 325 &self,
326 326 ) -> Result<Option<EntryState>, DirstateV2ParseError> {
327 327 Ok(self.entry()?.map(|e| e.state()))
328 328 }
329 329
330 330 pub(super) fn cached_directory_mtime(
331 331 &self,
332 332 ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
333 333 match self {
334 334 NodeRef::InMemory(_path, node) => Ok(match node.data {
335 335 NodeData::CachedDirectory { mtime } => Some(mtime),
336 336 _ => None,
337 337 }),
338 338 NodeRef::OnDisk(node) => node.cached_directory_mtime(),
339 339 }
340 340 }
341 341
342 342 pub(super) fn descendants_with_entry_count(&self) -> u32 {
343 343 match self {
344 344 NodeRef::InMemory(_path, node) => {
345 345 node.descendants_with_entry_count
346 346 }
347 347 NodeRef::OnDisk(node) => node.descendants_with_entry_count.get(),
348 348 }
349 349 }
350 350
351 351 pub(super) fn tracked_descendants_count(&self) -> u32 {
352 352 match self {
353 353 NodeRef::InMemory(_path, node) => node.tracked_descendants_count,
354 354 NodeRef::OnDisk(node) => node.tracked_descendants_count.get(),
355 355 }
356 356 }
357 357 }
358 358
359 359 /// Represents a file or a directory
360 360 #[derive(Default)]
361 361 pub(super) struct Node<'on_disk> {
362 362 pub(super) data: NodeData,
363 363
364 364 pub(super) copy_source: Option<Cow<'on_disk, HgPath>>,
365 365
366 366 pub(super) children: ChildNodes<'on_disk>,
367 367
368 368 /// How many (non-inclusive) descendants of this node have an entry.
369 369 pub(super) descendants_with_entry_count: u32,
370 370
371 371 /// How many (non-inclusive) descendants of this node have an entry whose
372 372 /// state is "tracked".
373 373 pub(super) tracked_descendants_count: u32,
374 374 }
375 375
376 376 pub(super) enum NodeData {
377 377 Entry(DirstateEntry),
378 378 CachedDirectory { mtime: TruncatedTimestamp },
379 379 None,
380 380 }
381 381
382 382 impl Default for NodeData {
383 383 fn default() -> Self {
384 384 NodeData::None
385 385 }
386 386 }
387 387
388 388 impl NodeData {
389 389 fn has_entry(&self) -> bool {
390 390 match self {
391 391 NodeData::Entry(_) => true,
392 392 _ => false,
393 393 }
394 394 }
395 395
396 396 fn as_entry(&self) -> Option<&DirstateEntry> {
397 397 match self {
398 398 NodeData::Entry(entry) => Some(entry),
399 399 _ => None,
400 400 }
401 401 }
402 402 }
403 403
404 404 impl<'on_disk> DirstateMap<'on_disk> {
405 405 pub(super) fn empty(on_disk: &'on_disk [u8]) -> Self {
406 406 Self {
407 407 on_disk,
408 408 root: ChildNodes::default(),
409 409 nodes_with_entry_count: 0,
410 410 nodes_with_copy_source_count: 0,
411 411 ignore_patterns_hash: [0; on_disk::IGNORE_PATTERNS_HASH_LEN],
412 412 unreachable_bytes: 0,
413 413 }
414 414 }
415 415
416 416 #[timed]
417 417 pub fn new_v2(
418 418 on_disk: &'on_disk [u8],
419 419 data_size: usize,
420 420 metadata: &[u8],
421 421 ) -> Result<Self, DirstateError> {
422 422 if let Some(data) = on_disk.get(..data_size) {
423 423 Ok(on_disk::read(data, metadata)?)
424 424 } else {
425 425 Err(DirstateV2ParseError.into())
426 426 }
427 427 }
428 428
429 429 #[timed]
430 430 pub fn new_v1(
431 431 on_disk: &'on_disk [u8],
432 432 ) -> Result<(Self, Option<DirstateParents>), DirstateError> {
433 433 let mut map = Self::empty(on_disk);
434 434 if map.on_disk.is_empty() {
435 435 return Ok((map, None));
436 436 }
437 437
438 438 let parents = parse_dirstate_entries(
439 439 map.on_disk,
440 440 |path, entry, copy_source| {
441 441 let tracked = entry.state().is_tracked();
442 442 let node = Self::get_or_insert_node(
443 443 map.on_disk,
444 444 &mut map.unreachable_bytes,
445 445 &mut map.root,
446 446 path,
447 447 WithBasename::to_cow_borrowed,
448 448 |ancestor| {
449 449 if tracked {
450 450 ancestor.tracked_descendants_count += 1
451 451 }
452 452 ancestor.descendants_with_entry_count += 1
453 453 },
454 454 )?;
455 455 assert!(
456 456 !node.data.has_entry(),
457 457 "duplicate dirstate entry in read"
458 458 );
459 459 assert!(
460 460 node.copy_source.is_none(),
461 461 "duplicate dirstate entry in read"
462 462 );
463 463 node.data = NodeData::Entry(*entry);
464 464 node.copy_source = copy_source.map(Cow::Borrowed);
465 465 map.nodes_with_entry_count += 1;
466 466 if copy_source.is_some() {
467 467 map.nodes_with_copy_source_count += 1
468 468 }
469 469 Ok(())
470 470 },
471 471 )?;
472 472 let parents = Some(parents.clone());
473 473
474 474 Ok((map, parents))
475 475 }
476 476
477 477 /// Assuming dirstate-v2 format, returns whether the next write should
478 478 /// append to the existing data file that contains `self.on_disk` (true),
479 479 /// or create a new data file from scratch (false).
480 480 pub(super) fn write_should_append(&self) -> bool {
481 481 let ratio = self.unreachable_bytes as f32 / self.on_disk.len() as f32;
482 482 ratio < ACCEPTABLE_UNREACHABLE_BYTES_RATIO
483 483 }
484 484
485 485 fn get_node<'tree>(
486 486 &'tree self,
487 487 path: &HgPath,
488 488 ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> {
489 489 let mut children = self.root.as_ref();
490 490 let mut components = path.components();
491 491 let mut component =
492 492 components.next().expect("expected at least one components");
493 493 loop {
494 494 if let Some(child) = children.get(component, self.on_disk)? {
495 495 if let Some(next_component) = components.next() {
496 496 component = next_component;
497 497 children = child.children(self.on_disk)?;
498 498 } else {
499 499 return Ok(Some(child));
500 500 }
501 501 } else {
502 502 return Ok(None);
503 503 }
504 504 }
505 505 }
506 506
507 507 /// Returns a mutable reference to the node at `path` if it exists
508 508 ///
509 509 /// This takes `root` instead of `&mut self` so that callers can mutate
510 510 /// other fields while the returned borrow is still valid
511 511 fn get_node_mut<'tree>(
512 512 on_disk: &'on_disk [u8],
513 513 unreachable_bytes: &mut u32,
514 514 root: &'tree mut ChildNodes<'on_disk>,
515 515 path: &HgPath,
516 516 ) -> Result<Option<&'tree mut Node<'on_disk>>, DirstateV2ParseError> {
517 517 let mut children = root;
518 518 let mut components = path.components();
519 519 let mut component =
520 520 components.next().expect("expected at least one components");
521 521 loop {
522 522 if let Some(child) = children
523 523 .make_mut(on_disk, unreachable_bytes)?
524 524 .get_mut(component)
525 525 {
526 526 if let Some(next_component) = components.next() {
527 527 component = next_component;
528 528 children = &mut child.children;
529 529 } else {
530 530 return Ok(Some(child));
531 531 }
532 532 } else {
533 533 return Ok(None);
534 534 }
535 535 }
536 536 }
537 537
538 538 pub(super) fn get_or_insert<'tree, 'path>(
539 539 &'tree mut self,
540 540 path: &HgPath,
541 541 ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> {
542 542 Self::get_or_insert_node(
543 543 self.on_disk,
544 544 &mut self.unreachable_bytes,
545 545 &mut self.root,
546 546 path,
547 547 WithBasename::to_cow_owned,
548 548 |_| {},
549 549 )
550 550 }
551 551
552 552 fn get_or_insert_node<'tree, 'path>(
553 553 on_disk: &'on_disk [u8],
554 554 unreachable_bytes: &mut u32,
555 555 root: &'tree mut ChildNodes<'on_disk>,
556 556 path: &'path HgPath,
557 557 to_cow: impl Fn(
558 558 WithBasename<&'path HgPath>,
559 559 ) -> WithBasename<Cow<'on_disk, HgPath>>,
560 560 mut each_ancestor: impl FnMut(&mut Node),
561 561 ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> {
562 562 let mut child_nodes = root;
563 563 let mut inclusive_ancestor_paths =
564 564 WithBasename::inclusive_ancestors_of(path);
565 565 let mut ancestor_path = inclusive_ancestor_paths
566 566 .next()
567 567 .expect("expected at least one inclusive ancestor");
568 568 loop {
569 569 // TODO: can we avoid allocating an owned key in cases where the
570 570 // map already contains that key, without introducing double
571 571 // lookup?
572 572 let child_node = child_nodes
573 573 .make_mut(on_disk, unreachable_bytes)?
574 574 .entry(to_cow(ancestor_path))
575 575 .or_default();
576 576 if let Some(next) = inclusive_ancestor_paths.next() {
577 577 each_ancestor(child_node);
578 578 ancestor_path = next;
579 579 child_nodes = &mut child_node.children;
580 580 } else {
581 581 return Ok(child_node);
582 582 }
583 583 }
584 584 }
585 585
586 586 fn add_or_remove_file(
587 587 &mut self,
588 588 path: &HgPath,
589 589 old_state: Option<EntryState>,
590 590 new_entry: DirstateEntry,
591 591 ) -> Result<(), DirstateV2ParseError> {
592 592 let had_entry = old_state.is_some();
593 593 let was_tracked = old_state.map_or(false, |s| s.is_tracked());
594 594 let tracked_count_increment =
595 595 match (was_tracked, new_entry.state().is_tracked()) {
596 596 (false, true) => 1,
597 597 (true, false) => -1,
598 598 _ => 0,
599 599 };
600 600
601 601 let node = Self::get_or_insert_node(
602 602 self.on_disk,
603 603 &mut self.unreachable_bytes,
604 604 &mut self.root,
605 605 path,
606 606 WithBasename::to_cow_owned,
607 607 |ancestor| {
608 608 if !had_entry {
609 609 ancestor.descendants_with_entry_count += 1;
610 610 }
611 611
612 612 // We can’t use `+= increment` because the counter is unsigned,
613 613 // and we want debug builds to detect accidental underflow
614 614 // through zero
615 615 match tracked_count_increment {
616 616 1 => ancestor.tracked_descendants_count += 1,
617 617 -1 => ancestor.tracked_descendants_count -= 1,
618 618 _ => {}
619 619 }
620 620 },
621 621 )?;
622 622 if !had_entry {
623 623 self.nodes_with_entry_count += 1
624 624 }
625 625 node.data = NodeData::Entry(new_entry);
626 626 Ok(())
627 627 }
628 628
629 629 fn iter_nodes<'tree>(
630 630 &'tree self,
631 631 ) -> impl Iterator<
632 632 Item = Result<NodeRef<'tree, 'on_disk>, DirstateV2ParseError>,
633 633 > + 'tree {
634 634 // Depth first tree traversal.
635 635 //
636 636 // If we could afford internal iteration and recursion,
637 637 // this would look like:
638 638 //
639 639 // ```
640 640 // fn traverse_children(
641 641 // children: &ChildNodes,
642 642 // each: &mut impl FnMut(&Node),
643 643 // ) {
644 644 // for child in children.values() {
645 645 // traverse_children(&child.children, each);
646 646 // each(child);
647 647 // }
648 648 // }
649 649 // ```
650 650 //
651 651 // However we want an external iterator and therefore can’t use the
652 652 // call stack. Use an explicit stack instead:
653 653 let mut stack = Vec::new();
654 654 let mut iter = self.root.as_ref().iter();
655 655 std::iter::from_fn(move || {
656 656 while let Some(child_node) = iter.next() {
657 657 let children = match child_node.children(self.on_disk) {
658 658 Ok(children) => children,
659 659 Err(error) => return Some(Err(error)),
660 660 };
661 661 // Pseudo-recursion
662 662 let new_iter = children.iter();
663 663 let old_iter = std::mem::replace(&mut iter, new_iter);
664 664 stack.push((child_node, old_iter));
665 665 }
666 666 // Found the end of a `children.iter()` iterator.
667 667 if let Some((child_node, next_iter)) = stack.pop() {
668 668 // "Return" from pseudo-recursion by restoring state from the
669 669 // explicit stack
670 670 iter = next_iter;
671 671
672 672 Some(Ok(child_node))
673 673 } else {
674 674 // Reached the bottom of the stack, we’re done
675 675 None
676 676 }
677 677 })
678 678 }
679 679
680 680 fn count_dropped_path(unreachable_bytes: &mut u32, path: &Cow<HgPath>) {
681 681 if let Cow::Borrowed(path) = path {
682 682 *unreachable_bytes += path.len() as u32
683 683 }
684 684 }
685 685 }
686 686
687 687 /// Like `Iterator::filter_map`, but over a fallible iterator of `Result`s.
688 688 ///
689 689 /// The callback is only called for incoming `Ok` values. Errors are passed
690 690 /// through as-is. In order to let it use the `?` operator the callback is
691 691 /// expected to return a `Result` of `Option`, instead of an `Option` of
692 692 /// `Result`.
693 693 fn filter_map_results<'a, I, F, A, B, E>(
694 694 iter: I,
695 695 f: F,
696 696 ) -> impl Iterator<Item = Result<B, E>> + 'a
697 697 where
698 698 I: Iterator<Item = Result<A, E>> + 'a,
699 699 F: Fn(A) -> Result<Option<B>, E> + 'a,
700 700 {
701 701 iter.filter_map(move |result| match result {
702 702 Ok(node) => f(node).transpose(),
703 703 Err(e) => Some(Err(e)),
704 704 })
705 705 }
706 706
707 707 impl OwningDirstateMap {
708 708 pub fn clear(&mut self) {
709 709 let map = self.get_map_mut();
710 710 map.root = Default::default();
711 711 map.nodes_with_entry_count = 0;
712 712 map.nodes_with_copy_source_count = 0;
713 713 }
714 714
715 715 pub fn set_entry(
716 716 &mut self,
717 717 filename: &HgPath,
718 718 entry: DirstateEntry,
719 719 ) -> Result<(), DirstateV2ParseError> {
720 720 let map = self.get_map_mut();
721 721 map.get_or_insert(&filename)?.data = NodeData::Entry(entry);
722 722 Ok(())
723 723 }
724 724
725 725 pub fn add_file(
726 726 &mut self,
727 727 filename: &HgPath,
728 728 entry: DirstateEntry,
729 729 ) -> Result<(), DirstateError> {
730 730 let old_state = self.get(filename)?.map(|e| e.state());
731 731 let map = self.get_map_mut();
732 732 Ok(map.add_or_remove_file(filename, old_state, entry)?)
733 733 }
734 734
735 735 pub fn remove_file(
736 736 &mut self,
737 737 filename: &HgPath,
738 738 in_merge: bool,
739 739 ) -> Result<(), DirstateError> {
740 740 let old_entry_opt = self.get(filename)?;
741 741 let old_state = old_entry_opt.map(|e| e.state());
742 742 let mut size = 0;
743 743 if in_merge {
744 744 // XXX we should not be able to have 'm' state and 'FROM_P2' if not
745 745 // during a merge. So I (marmoute) am not sure we need the
746 746 // conditionnal at all. Adding double checking this with assert
747 747 // would be nice.
748 748 if let Some(old_entry) = old_entry_opt {
749 749 // backup the previous state
750 750 if old_entry.state() == EntryState::Merged {
751 751 size = SIZE_NON_NORMAL;
752 752 } else if old_entry.state() == EntryState::Normal
753 753 && old_entry.size() == SIZE_FROM_OTHER_PARENT
754 754 {
755 755 // other parent
756 756 size = SIZE_FROM_OTHER_PARENT;
757 757 }
758 758 }
759 759 }
760 760 if size == 0 {
761 761 self.copy_map_remove(filename)?;
762 762 }
763 763 let map = self.get_map_mut();
764 764 let entry = DirstateEntry::new_removed(size);
765 765 Ok(map.add_or_remove_file(filename, old_state, entry)?)
766 766 }
767 767
768 768 pub fn drop_entry_and_copy_source(
769 769 &mut self,
770 770 filename: &HgPath,
771 771 ) -> Result<(), DirstateError> {
772 772 let was_tracked = self
773 773 .get(filename)?
774 774 .map_or(false, |e| e.state().is_tracked());
775 775 let map = self.get_map_mut();
776 776 struct Dropped {
777 777 was_tracked: bool,
778 778 had_entry: bool,
779 779 had_copy_source: bool,
780 780 }
781 781
782 782 /// If this returns `Ok(Some((dropped, removed)))`, then
783 783 ///
784 784 /// * `dropped` is about the leaf node that was at `filename`
785 785 /// * `removed` is whether this particular level of recursion just
786 786 /// removed a node in `nodes`.
787 787 fn recur<'on_disk>(
788 788 on_disk: &'on_disk [u8],
789 789 unreachable_bytes: &mut u32,
790 790 nodes: &mut ChildNodes<'on_disk>,
791 791 path: &HgPath,
792 792 ) -> Result<Option<(Dropped, bool)>, DirstateV2ParseError> {
793 793 let (first_path_component, rest_of_path) =
794 794 path.split_first_component();
795 795 let nodes = nodes.make_mut(on_disk, unreachable_bytes)?;
796 796 let node = if let Some(node) = nodes.get_mut(first_path_component)
797 797 {
798 798 node
799 799 } else {
800 800 return Ok(None);
801 801 };
802 802 let dropped;
803 803 if let Some(rest) = rest_of_path {
804 804 if let Some((d, removed)) = recur(
805 805 on_disk,
806 806 unreachable_bytes,
807 807 &mut node.children,
808 808 rest,
809 809 )? {
810 810 dropped = d;
811 811 if dropped.had_entry {
812 812 node.descendants_with_entry_count -= 1;
813 813 }
814 814 if dropped.was_tracked {
815 815 node.tracked_descendants_count -= 1;
816 816 }
817 817
818 818 // Directory caches must be invalidated when removing a
819 819 // child node
820 820 if removed {
821 821 if let NodeData::CachedDirectory { .. } = &node.data {
822 822 node.data = NodeData::None
823 823 }
824 824 }
825 825 } else {
826 826 return Ok(None);
827 827 }
828 828 } else {
829 829 let had_entry = node.data.has_entry();
830 830 if had_entry {
831 831 node.data = NodeData::None
832 832 }
833 833 if let Some(source) = &node.copy_source {
834 834 DirstateMap::count_dropped_path(unreachable_bytes, source);
835 835 node.copy_source = None
836 836 }
837 837 dropped = Dropped {
838 838 was_tracked: node
839 839 .data
840 840 .as_entry()
841 841 .map_or(false, |entry| entry.state().is_tracked()),
842 842 had_entry,
843 843 had_copy_source: node.copy_source.take().is_some(),
844 844 };
845 845 }
846 846 // After recursion, for both leaf (rest_of_path is None) nodes and
847 847 // parent nodes, remove a node if it just became empty.
848 848 let remove = !node.data.has_entry()
849 849 && node.copy_source.is_none()
850 850 && node.children.is_empty();
851 851 if remove {
852 852 let (key, _) =
853 853 nodes.remove_entry(first_path_component).unwrap();
854 854 DirstateMap::count_dropped_path(
855 855 unreachable_bytes,
856 856 key.full_path(),
857 857 )
858 858 }
859 859 Ok(Some((dropped, remove)))
860 860 }
861 861
862 862 if let Some((dropped, _removed)) = recur(
863 863 map.on_disk,
864 864 &mut map.unreachable_bytes,
865 865 &mut map.root,
866 866 filename,
867 867 )? {
868 868 if dropped.had_entry {
869 869 map.nodes_with_entry_count -= 1
870 870 }
871 871 if dropped.had_copy_source {
872 872 map.nodes_with_copy_source_count -= 1
873 873 }
874 874 } else {
875 875 debug_assert!(!was_tracked);
876 876 }
877 877 Ok(())
878 878 }
879 879
880 880 pub fn has_tracked_dir(
881 881 &mut self,
882 882 directory: &HgPath,
883 883 ) -> Result<bool, DirstateError> {
884 884 let map = self.get_map_mut();
885 885 if let Some(node) = map.get_node(directory)? {
886 886 // A node without a `DirstateEntry` was created to hold child
887 887 // nodes, and is therefore a directory.
888 888 let state = node.state()?;
889 889 Ok(state.is_none() && node.tracked_descendants_count() > 0)
890 890 } else {
891 891 Ok(false)
892 892 }
893 893 }
894 894
895 895 pub fn has_dir(
896 896 &mut self,
897 897 directory: &HgPath,
898 898 ) -> Result<bool, DirstateError> {
899 899 let map = self.get_map_mut();
900 900 if let Some(node) = map.get_node(directory)? {
901 901 // A node without a `DirstateEntry` was created to hold child
902 902 // nodes, and is therefore a directory.
903 903 let state = node.state()?;
904 904 Ok(state.is_none() && node.descendants_with_entry_count() > 0)
905 905 } else {
906 906 Ok(false)
907 907 }
908 908 }
909 909
910 910 #[timed]
911 911 pub fn pack_v1(
912 912 &self,
913 913 parents: DirstateParents,
914 914 ) -> Result<Vec<u8>, DirstateError> {
915 915 let map = self.get_map();
916 916 // Optizimation (to be measured?): pre-compute size to avoid `Vec`
917 917 // reallocations
918 918 let mut size = parents.as_bytes().len();
919 919 for node in map.iter_nodes() {
920 920 let node = node?;
921 921 if node.entry()?.is_some() {
922 922 size += packed_entry_size(
923 923 node.full_path(map.on_disk)?,
924 924 node.copy_source(map.on_disk)?,
925 925 );
926 926 }
927 927 }
928 928
929 929 let mut packed = Vec::with_capacity(size);
930 930 packed.extend(parents.as_bytes());
931 931
932 932 for node in map.iter_nodes() {
933 933 let node = node?;
934 934 if let Some(entry) = node.entry()? {
935 935 pack_entry(
936 936 node.full_path(map.on_disk)?,
937 937 &entry,
938 938 node.copy_source(map.on_disk)?,
939 939 &mut packed,
940 940 );
941 941 }
942 942 }
943 943 Ok(packed)
944 944 }
945 945
946 946 /// Returns new data and metadata together with whether that data should be
947 947 /// appended to the existing data file whose content is at
948 948 /// `map.on_disk` (true), instead of written to a new data file
949 949 /// (false).
950 950 #[timed]
951 951 pub fn pack_v2(
952 952 &self,
953 953 can_append: bool,
954 ) -> Result<(Vec<u8>, Vec<u8>, bool), DirstateError> {
954 ) -> Result<(Vec<u8>, on_disk::TreeMetadata, bool), DirstateError> {
955 955 let map = self.get_map();
956 956 on_disk::write(map, can_append)
957 957 }
958 958
959 959 pub fn status<'a>(
960 960 &'a mut self,
961 961 matcher: &'a (dyn Matcher + Sync),
962 962 root_dir: PathBuf,
963 963 ignore_files: Vec<PathBuf>,
964 964 options: StatusOptions,
965 965 ) -> Result<(DirstateStatus<'a>, Vec<PatternFileWarning>), StatusError>
966 966 {
967 967 let map = self.get_map_mut();
968 968 super::status::status(map, matcher, root_dir, ignore_files, options)
969 969 }
970 970
971 971 pub fn copy_map_len(&self) -> usize {
972 972 let map = self.get_map();
973 973 map.nodes_with_copy_source_count as usize
974 974 }
975 975
976 976 pub fn copy_map_iter(&self) -> CopyMapIter<'_> {
977 977 let map = self.get_map();
978 978 Box::new(filter_map_results(map.iter_nodes(), move |node| {
979 979 Ok(if let Some(source) = node.copy_source(map.on_disk)? {
980 980 Some((node.full_path(map.on_disk)?, source))
981 981 } else {
982 982 None
983 983 })
984 984 }))
985 985 }
986 986
987 987 pub fn copy_map_contains_key(
988 988 &self,
989 989 key: &HgPath,
990 990 ) -> Result<bool, DirstateV2ParseError> {
991 991 let map = self.get_map();
992 992 Ok(if let Some(node) = map.get_node(key)? {
993 993 node.has_copy_source()
994 994 } else {
995 995 false
996 996 })
997 997 }
998 998
999 999 pub fn copy_map_get(
1000 1000 &self,
1001 1001 key: &HgPath,
1002 1002 ) -> Result<Option<&HgPath>, DirstateV2ParseError> {
1003 1003 let map = self.get_map();
1004 1004 if let Some(node) = map.get_node(key)? {
1005 1005 if let Some(source) = node.copy_source(map.on_disk)? {
1006 1006 return Ok(Some(source));
1007 1007 }
1008 1008 }
1009 1009 Ok(None)
1010 1010 }
1011 1011
1012 1012 pub fn copy_map_remove(
1013 1013 &mut self,
1014 1014 key: &HgPath,
1015 1015 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
1016 1016 let map = self.get_map_mut();
1017 1017 let count = &mut map.nodes_with_copy_source_count;
1018 1018 let unreachable_bytes = &mut map.unreachable_bytes;
1019 1019 Ok(DirstateMap::get_node_mut(
1020 1020 map.on_disk,
1021 1021 unreachable_bytes,
1022 1022 &mut map.root,
1023 1023 key,
1024 1024 )?
1025 1025 .and_then(|node| {
1026 1026 if let Some(source) = &node.copy_source {
1027 1027 *count -= 1;
1028 1028 DirstateMap::count_dropped_path(unreachable_bytes, source);
1029 1029 }
1030 1030 node.copy_source.take().map(Cow::into_owned)
1031 1031 }))
1032 1032 }
1033 1033
1034 1034 pub fn copy_map_insert(
1035 1035 &mut self,
1036 1036 key: HgPathBuf,
1037 1037 value: HgPathBuf,
1038 1038 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
1039 1039 let map = self.get_map_mut();
1040 1040 let node = DirstateMap::get_or_insert_node(
1041 1041 map.on_disk,
1042 1042 &mut map.unreachable_bytes,
1043 1043 &mut map.root,
1044 1044 &key,
1045 1045 WithBasename::to_cow_owned,
1046 1046 |_ancestor| {},
1047 1047 )?;
1048 1048 if node.copy_source.is_none() {
1049 1049 map.nodes_with_copy_source_count += 1
1050 1050 }
1051 1051 Ok(node.copy_source.replace(value.into()).map(Cow::into_owned))
1052 1052 }
1053 1053
1054 1054 pub fn len(&self) -> usize {
1055 1055 let map = self.get_map();
1056 1056 map.nodes_with_entry_count as usize
1057 1057 }
1058 1058
1059 1059 pub fn contains_key(
1060 1060 &self,
1061 1061 key: &HgPath,
1062 1062 ) -> Result<bool, DirstateV2ParseError> {
1063 1063 Ok(self.get(key)?.is_some())
1064 1064 }
1065 1065
1066 1066 pub fn get(
1067 1067 &self,
1068 1068 key: &HgPath,
1069 1069 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
1070 1070 let map = self.get_map();
1071 1071 Ok(if let Some(node) = map.get_node(key)? {
1072 1072 node.entry()?
1073 1073 } else {
1074 1074 None
1075 1075 })
1076 1076 }
1077 1077
1078 1078 pub fn iter(&self) -> StateMapIter<'_> {
1079 1079 let map = self.get_map();
1080 1080 Box::new(filter_map_results(map.iter_nodes(), move |node| {
1081 1081 Ok(if let Some(entry) = node.entry()? {
1082 1082 Some((node.full_path(map.on_disk)?, entry))
1083 1083 } else {
1084 1084 None
1085 1085 })
1086 1086 }))
1087 1087 }
1088 1088
1089 1089 pub fn iter_tracked_dirs(
1090 1090 &mut self,
1091 1091 ) -> Result<
1092 1092 Box<
1093 1093 dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>>
1094 1094 + Send
1095 1095 + '_,
1096 1096 >,
1097 1097 DirstateError,
1098 1098 > {
1099 1099 let map = self.get_map_mut();
1100 1100 let on_disk = map.on_disk;
1101 1101 Ok(Box::new(filter_map_results(
1102 1102 map.iter_nodes(),
1103 1103 move |node| {
1104 1104 Ok(if node.tracked_descendants_count() > 0 {
1105 1105 Some(node.full_path(on_disk)?)
1106 1106 } else {
1107 1107 None
1108 1108 })
1109 1109 },
1110 1110 )))
1111 1111 }
1112 1112
1113 1113 pub fn debug_iter(
1114 1114 &self,
1115 1115 all: bool,
1116 1116 ) -> Box<
1117 1117 dyn Iterator<
1118 1118 Item = Result<
1119 1119 (&HgPath, (u8, i32, i32, i32)),
1120 1120 DirstateV2ParseError,
1121 1121 >,
1122 1122 > + Send
1123 1123 + '_,
1124 1124 > {
1125 1125 let map = self.get_map();
1126 1126 Box::new(filter_map_results(map.iter_nodes(), move |node| {
1127 1127 let debug_tuple = if let Some(entry) = node.entry()? {
1128 1128 entry.debug_tuple()
1129 1129 } else if !all {
1130 1130 return Ok(None);
1131 1131 } else if let Some(mtime) = node.cached_directory_mtime()? {
1132 1132 (b' ', 0, -1, mtime.truncated_seconds() as i32)
1133 1133 } else {
1134 1134 (b' ', 0, -1, -1)
1135 1135 };
1136 1136 Ok(Some((node.full_path(map.on_disk)?, debug_tuple)))
1137 1137 }))
1138 1138 }
1139 1139 }
@@ -1,791 +1,837 b''
1 1 //! The "version 2" disk representation of the dirstate
2 2 //!
3 3 //! See `mercurial/helptext/internals/dirstate-v2.txt`
4 4
5 5 use crate::dirstate::TruncatedTimestamp;
6 6 use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
7 7 use crate::dirstate_tree::path_with_basename::WithBasename;
8 8 use crate::errors::HgError;
9 9 use crate::utils::hg_path::HgPath;
10 10 use crate::DirstateEntry;
11 11 use crate::DirstateError;
12 12 use crate::DirstateParents;
13 13 use bitflags::bitflags;
14 14 use bytes_cast::unaligned::{U16Be, U32Be};
15 15 use bytes_cast::BytesCast;
16 16 use format_bytes::format_bytes;
17 use rand::Rng;
17 18 use std::borrow::Cow;
18 19 use std::convert::{TryFrom, TryInto};
20 use std::fmt::Write;
19 21
20 22 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
21 23 /// This a redundant sanity check more than an actual "magic number" since
22 24 /// `.hg/requires` already governs which format should be used.
23 25 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
24 26
25 27 /// Keep space for 256-bit hashes
26 28 const STORED_NODE_ID_BYTES: usize = 32;
27 29
28 30 /// … even though only 160 bits are used for now, with SHA-1
29 31 const USED_NODE_ID_BYTES: usize = 20;
30 32
31 33 pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
32 34 pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
33 35
34 36 /// Must match constants of the same names in `mercurial/dirstateutils/v2.py`
35 37 const TREE_METADATA_SIZE: usize = 44;
36 38 const NODE_SIZE: usize = 44;
37 39
38 40 /// Make sure that size-affecting changes are made knowingly
39 41 #[allow(unused)]
40 42 fn static_assert_size_of() {
41 43 let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>;
42 44 let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>;
43 45 let _ = std::mem::transmute::<Node, [u8; NODE_SIZE]>;
44 46 }
45 47
46 48 // Must match `HEADER` in `mercurial/dirstateutils/docket.py`
47 49 #[derive(BytesCast)]
48 50 #[repr(C)]
49 51 struct DocketHeader {
50 52 marker: [u8; V2_FORMAT_MARKER.len()],
51 53 parent_1: [u8; STORED_NODE_ID_BYTES],
52 54 parent_2: [u8; STORED_NODE_ID_BYTES],
53 55
54 56 metadata: TreeMetadata,
55 57
56 58 /// Counted in bytes
57 59 data_size: Size,
58 60
59 61 uuid_size: u8,
60 62 }
61 63
62 64 pub struct Docket<'on_disk> {
63 65 header: &'on_disk DocketHeader,
64 66 pub uuid: &'on_disk [u8],
65 67 }
66 68
67 69 /// Fields are documented in the *Tree metadata in the docket file*
68 70 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
69 71 #[derive(BytesCast)]
70 72 #[repr(C)]
71 struct TreeMetadata {
73 pub struct TreeMetadata {
72 74 root_nodes: ChildNodes,
73 75 nodes_with_entry_count: Size,
74 76 nodes_with_copy_source_count: Size,
75 77 unreachable_bytes: Size,
76 78 unused: [u8; 4],
77 79
78 80 /// See *Optional hash of ignore patterns* section of
79 81 /// `mercurial/helptext/internals/dirstate-v2.txt`
80 82 ignore_patterns_hash: IgnorePatternsHash,
81 83 }
82 84
83 85 /// Fields are documented in the *The data file format*
84 86 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
85 87 #[derive(BytesCast)]
86 88 #[repr(C)]
87 89 pub(super) struct Node {
88 90 full_path: PathSlice,
89 91
90 92 /// In bytes from `self.full_path.start`
91 93 base_name_start: PathSize,
92 94
93 95 copy_source: OptPathSlice,
94 96 children: ChildNodes,
95 97 pub(super) descendants_with_entry_count: Size,
96 98 pub(super) tracked_descendants_count: Size,
97 99 flags: U16Be,
98 100 size: U32Be,
99 101 mtime: PackedTruncatedTimestamp,
100 102 }
101 103
102 104 bitflags! {
103 105 #[repr(C)]
104 106 struct Flags: u16 {
105 107 const WDIR_TRACKED = 1 << 0;
106 108 const P1_TRACKED = 1 << 1;
107 109 const P2_INFO = 1 << 2;
108 110 const MODE_EXEC_PERM = 1 << 3;
109 111 const MODE_IS_SYMLINK = 1 << 4;
110 112 const HAS_FALLBACK_EXEC = 1 << 5;
111 113 const FALLBACK_EXEC = 1 << 6;
112 114 const HAS_FALLBACK_SYMLINK = 1 << 7;
113 115 const FALLBACK_SYMLINK = 1 << 8;
114 116 const EXPECTED_STATE_IS_MODIFIED = 1 << 9;
115 117 const HAS_MODE_AND_SIZE = 1 <<10;
116 118 const HAS_MTIME = 1 <<11;
117 119 const MTIME_SECOND_AMBIGUOUS = 1 << 12;
118 120 const DIRECTORY = 1 <<13;
119 121 const ALL_UNKNOWN_RECORDED = 1 <<14;
120 122 const ALL_IGNORED_RECORDED = 1 <<15;
121 123 }
122 124 }
123 125
124 126 /// Duration since the Unix epoch
125 127 #[derive(BytesCast, Copy, Clone)]
126 128 #[repr(C)]
127 129 struct PackedTruncatedTimestamp {
128 130 truncated_seconds: U32Be,
129 131 nanoseconds: U32Be,
130 132 }
131 133
132 134 /// Counted in bytes from the start of the file
133 135 ///
134 136 /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
135 137 type Offset = U32Be;
136 138
137 139 /// Counted in number of items
138 140 ///
139 141 /// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
140 142 type Size = U32Be;
141 143
142 144 /// Counted in bytes
143 145 ///
144 146 /// NOTE: we choose not to support file names/paths longer than 64 KiB.
145 147 type PathSize = U16Be;
146 148
147 149 /// A contiguous sequence of `len` times `Node`, representing the child nodes
148 150 /// of either some other node or of the repository root.
149 151 ///
150 152 /// Always sorted by ascending `full_path`, to allow binary search.
151 153 /// Since nodes with the same parent nodes also have the same parent path,
152 154 /// only the `base_name`s need to be compared during binary search.
153 155 #[derive(BytesCast, Copy, Clone)]
154 156 #[repr(C)]
155 157 struct ChildNodes {
156 158 start: Offset,
157 159 len: Size,
158 160 }
159 161
160 162 /// A `HgPath` of `len` bytes
161 163 #[derive(BytesCast, Copy, Clone)]
162 164 #[repr(C)]
163 165 struct PathSlice {
164 166 start: Offset,
165 167 len: PathSize,
166 168 }
167 169
168 170 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
169 171 type OptPathSlice = PathSlice;
170 172
171 173 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
172 174 ///
173 175 /// This should only happen if Mercurial is buggy or a repository is corrupted.
174 176 #[derive(Debug)]
175 177 pub struct DirstateV2ParseError;
176 178
177 179 impl From<DirstateV2ParseError> for HgError {
178 180 fn from(_: DirstateV2ParseError) -> Self {
179 181 HgError::corrupted("dirstate-v2 parse error")
180 182 }
181 183 }
182 184
183 185 impl From<DirstateV2ParseError> for crate::DirstateError {
184 186 fn from(error: DirstateV2ParseError) -> Self {
185 187 HgError::from(error).into()
186 188 }
187 189 }
188 190
191 impl TreeMetadata {
192 pub fn as_bytes(&self) -> &[u8] {
193 BytesCast::as_bytes(self)
194 }
195 }
196
189 197 impl<'on_disk> Docket<'on_disk> {
198 /// Generate the identifier for a new data file
199 ///
200 /// TODO: support the `HGTEST_UUIDFILE` environment variable.
201 /// See `mercurial/revlogutils/docket.py`
202 pub fn new_uid() -> String {
203 const ID_LENGTH: usize = 8;
204 let mut id = String::with_capacity(ID_LENGTH);
205 let mut rng = rand::thread_rng();
206 for _ in 0..ID_LENGTH {
207 // One random hexadecimal digit.
208 // `unwrap` never panics because `impl Write for String`
209 // never returns an error.
210 write!(&mut id, "{:x}", rng.gen_range(0, 16)).unwrap();
211 }
212 id
213 }
214
215 pub fn serialize(
216 parents: DirstateParents,
217 tree_metadata: TreeMetadata,
218 data_size: u64,
219 uuid: &[u8],
220 ) -> Result<Vec<u8>, std::num::TryFromIntError> {
221 let header = DocketHeader {
222 marker: *V2_FORMAT_MARKER,
223 parent_1: parents.p1.pad_to_256_bits(),
224 parent_2: parents.p2.pad_to_256_bits(),
225 metadata: tree_metadata,
226 data_size: u32::try_from(data_size)?.into(),
227 uuid_size: uuid.len().try_into()?,
228 };
229 let header = header.as_bytes();
230 let mut docket = Vec::with_capacity(header.len() + uuid.len());
231 docket.extend_from_slice(header);
232 docket.extend_from_slice(uuid);
233 Ok(docket)
234 }
235
190 236 pub fn parents(&self) -> DirstateParents {
191 237 use crate::Node;
192 238 let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
193 239 .unwrap()
194 240 .clone();
195 241 let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
196 242 .unwrap()
197 243 .clone();
198 244 DirstateParents { p1, p2 }
199 245 }
200 246
201 247 pub fn tree_metadata(&self) -> &[u8] {
202 248 self.header.metadata.as_bytes()
203 249 }
204 250
205 251 pub fn data_size(&self) -> usize {
206 252 // This `unwrap` could only panic on a 16-bit CPU
207 253 self.header.data_size.get().try_into().unwrap()
208 254 }
209 255
210 256 pub fn data_filename(&self) -> String {
211 257 String::from_utf8(format_bytes!(b"dirstate.{}", self.uuid)).unwrap()
212 258 }
213 259 }
214 260
215 261 pub fn read_docket(
216 262 on_disk: &[u8],
217 263 ) -> Result<Docket<'_>, DirstateV2ParseError> {
218 264 let (header, uuid) =
219 265 DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
220 266 let uuid_size = header.uuid_size as usize;
221 267 if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
222 268 Ok(Docket { header, uuid })
223 269 } else {
224 270 Err(DirstateV2ParseError)
225 271 }
226 272 }
227 273
228 274 pub(super) fn read<'on_disk>(
229 275 on_disk: &'on_disk [u8],
230 276 metadata: &[u8],
231 277 ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
232 278 if on_disk.is_empty() {
233 279 return Ok(DirstateMap::empty(on_disk));
234 280 }
235 281 let (meta, _) = TreeMetadata::from_bytes(metadata)
236 282 .map_err(|_| DirstateV2ParseError)?;
237 283 let dirstate_map = DirstateMap {
238 284 on_disk,
239 285 root: dirstate_map::ChildNodes::OnDisk(read_nodes(
240 286 on_disk,
241 287 meta.root_nodes,
242 288 )?),
243 289 nodes_with_entry_count: meta.nodes_with_entry_count.get(),
244 290 nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),
245 291 ignore_patterns_hash: meta.ignore_patterns_hash,
246 292 unreachable_bytes: meta.unreachable_bytes.get(),
247 293 };
248 294 Ok(dirstate_map)
249 295 }
250 296
251 297 impl Node {
252 298 pub(super) fn full_path<'on_disk>(
253 299 &self,
254 300 on_disk: &'on_disk [u8],
255 301 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
256 302 read_hg_path(on_disk, self.full_path)
257 303 }
258 304
259 305 pub(super) fn base_name_start<'on_disk>(
260 306 &self,
261 307 ) -> Result<usize, DirstateV2ParseError> {
262 308 let start = self.base_name_start.get();
263 309 if start < self.full_path.len.get() {
264 310 let start = usize::try_from(start)
265 311 // u32 -> usize, could only panic on a 16-bit CPU
266 312 .expect("dirstate-v2 base_name_start out of bounds");
267 313 Ok(start)
268 314 } else {
269 315 Err(DirstateV2ParseError)
270 316 }
271 317 }
272 318
273 319 pub(super) fn base_name<'on_disk>(
274 320 &self,
275 321 on_disk: &'on_disk [u8],
276 322 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
277 323 let full_path = self.full_path(on_disk)?;
278 324 let base_name_start = self.base_name_start()?;
279 325 Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
280 326 }
281 327
282 328 pub(super) fn path<'on_disk>(
283 329 &self,
284 330 on_disk: &'on_disk [u8],
285 331 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
286 332 Ok(WithBasename::from_raw_parts(
287 333 Cow::Borrowed(self.full_path(on_disk)?),
288 334 self.base_name_start()?,
289 335 ))
290 336 }
291 337
292 338 pub(super) fn has_copy_source<'on_disk>(&self) -> bool {
293 339 self.copy_source.start.get() != 0
294 340 }
295 341
296 342 pub(super) fn copy_source<'on_disk>(
297 343 &self,
298 344 on_disk: &'on_disk [u8],
299 345 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
300 346 Ok(if self.has_copy_source() {
301 347 Some(read_hg_path(on_disk, self.copy_source)?)
302 348 } else {
303 349 None
304 350 })
305 351 }
306 352
307 353 fn flags(&self) -> Flags {
308 354 Flags::from_bits_truncate(self.flags.get())
309 355 }
310 356
311 357 fn has_entry(&self) -> bool {
312 358 self.flags().intersects(
313 359 Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO,
314 360 )
315 361 }
316 362
317 363 pub(super) fn node_data(
318 364 &self,
319 365 ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
320 366 if self.has_entry() {
321 367 Ok(dirstate_map::NodeData::Entry(self.assume_entry()?))
322 368 } else if let Some(mtime) = self.cached_directory_mtime()? {
323 369 Ok(dirstate_map::NodeData::CachedDirectory { mtime })
324 370 } else {
325 371 Ok(dirstate_map::NodeData::None)
326 372 }
327 373 }
328 374
329 375 pub(super) fn cached_directory_mtime(
330 376 &self,
331 377 ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
332 378 // For now we do not have code to handle the absence of
333 379 // ALL_UNKNOWN_RECORDED, so we ignore the mtime if the flag is
334 380 // unset.
335 381 if self.flags().contains(Flags::DIRECTORY)
336 382 && self.flags().contains(Flags::HAS_MTIME)
337 383 && self.flags().contains(Flags::ALL_UNKNOWN_RECORDED)
338 384 {
339 385 Ok(Some(self.mtime.try_into()?))
340 386 } else {
341 387 Ok(None)
342 388 }
343 389 }
344 390
345 391 fn synthesize_unix_mode(&self) -> u32 {
346 392 let file_type = if self.flags().contains(Flags::MODE_IS_SYMLINK) {
347 393 libc::S_IFLNK
348 394 } else {
349 395 libc::S_IFREG
350 396 };
351 397 let permisions = if self.flags().contains(Flags::MODE_EXEC_PERM) {
352 398 0o755
353 399 } else {
354 400 0o644
355 401 };
356 402 file_type | permisions
357 403 }
358 404
359 405 fn assume_entry(&self) -> Result<DirstateEntry, DirstateV2ParseError> {
360 406 // TODO: convert through raw bits instead?
361 407 let wdir_tracked = self.flags().contains(Flags::WDIR_TRACKED);
362 408 let p1_tracked = self.flags().contains(Flags::P1_TRACKED);
363 409 let p2_info = self.flags().contains(Flags::P2_INFO);
364 410 let mode_size = if self.flags().contains(Flags::HAS_MODE_AND_SIZE)
365 411 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
366 412 {
367 413 Some((self.synthesize_unix_mode(), self.size.into()))
368 414 } else {
369 415 None
370 416 };
371 417 let mtime = if self.flags().contains(Flags::HAS_MTIME)
372 418 && !self.flags().contains(Flags::DIRECTORY)
373 419 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
374 420 {
375 421 let mut m: TruncatedTimestamp = self.mtime.try_into()?;
376 422 if self.flags().contains(Flags::MTIME_SECOND_AMBIGUOUS) {
377 423 m.second_ambiguous = true;
378 424 }
379 425 Some(m)
380 426 } else {
381 427 None
382 428 };
383 429 let fallback_exec = if self.flags().contains(Flags::HAS_FALLBACK_EXEC)
384 430 {
385 431 Some(self.flags().contains(Flags::FALLBACK_EXEC))
386 432 } else {
387 433 None
388 434 };
389 435 let fallback_symlink =
390 436 if self.flags().contains(Flags::HAS_FALLBACK_SYMLINK) {
391 437 Some(self.flags().contains(Flags::FALLBACK_SYMLINK))
392 438 } else {
393 439 None
394 440 };
395 441 Ok(DirstateEntry::from_v2_data(
396 442 wdir_tracked,
397 443 p1_tracked,
398 444 p2_info,
399 445 mode_size,
400 446 mtime,
401 447 fallback_exec,
402 448 fallback_symlink,
403 449 ))
404 450 }
405 451
406 452 pub(super) fn entry(
407 453 &self,
408 454 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
409 455 if self.has_entry() {
410 456 Ok(Some(self.assume_entry()?))
411 457 } else {
412 458 Ok(None)
413 459 }
414 460 }
415 461
416 462 pub(super) fn children<'on_disk>(
417 463 &self,
418 464 on_disk: &'on_disk [u8],
419 465 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
420 466 read_nodes(on_disk, self.children)
421 467 }
422 468
423 469 pub(super) fn to_in_memory_node<'on_disk>(
424 470 &self,
425 471 on_disk: &'on_disk [u8],
426 472 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
427 473 Ok(dirstate_map::Node {
428 474 children: dirstate_map::ChildNodes::OnDisk(
429 475 self.children(on_disk)?,
430 476 ),
431 477 copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
432 478 data: self.node_data()?,
433 479 descendants_with_entry_count: self
434 480 .descendants_with_entry_count
435 481 .get(),
436 482 tracked_descendants_count: self.tracked_descendants_count.get(),
437 483 })
438 484 }
439 485
440 486 fn from_dirstate_entry(
441 487 entry: &DirstateEntry,
442 488 ) -> (Flags, U32Be, PackedTruncatedTimestamp) {
443 489 let (
444 490 wdir_tracked,
445 491 p1_tracked,
446 492 p2_info,
447 493 mode_size_opt,
448 494 mtime_opt,
449 495 fallback_exec,
450 496 fallback_symlink,
451 497 ) = entry.v2_data();
452 498 // TODO: convert throug raw flag bits instead?
453 499 let mut flags = Flags::empty();
454 500 flags.set(Flags::WDIR_TRACKED, wdir_tracked);
455 501 flags.set(Flags::P1_TRACKED, p1_tracked);
456 502 flags.set(Flags::P2_INFO, p2_info);
457 503 let size = if let Some((m, s)) = mode_size_opt {
458 504 let exec_perm = m & libc::S_IXUSR != 0;
459 505 let is_symlink = m & libc::S_IFMT == libc::S_IFLNK;
460 506 flags.set(Flags::MODE_EXEC_PERM, exec_perm);
461 507 flags.set(Flags::MODE_IS_SYMLINK, is_symlink);
462 508 flags.insert(Flags::HAS_MODE_AND_SIZE);
463 509 s.into()
464 510 } else {
465 511 0.into()
466 512 };
467 513 let mtime = if let Some(m) = mtime_opt {
468 514 flags.insert(Flags::HAS_MTIME);
469 515 if m.second_ambiguous {
470 516 flags.insert(Flags::MTIME_SECOND_AMBIGUOUS);
471 517 };
472 518 m.into()
473 519 } else {
474 520 PackedTruncatedTimestamp::null()
475 521 };
476 522 if let Some(f_exec) = fallback_exec {
477 523 flags.insert(Flags::HAS_FALLBACK_EXEC);
478 524 if f_exec {
479 525 flags.insert(Flags::FALLBACK_EXEC);
480 526 }
481 527 }
482 528 if let Some(f_symlink) = fallback_symlink {
483 529 flags.insert(Flags::HAS_FALLBACK_SYMLINK);
484 530 if f_symlink {
485 531 flags.insert(Flags::FALLBACK_SYMLINK);
486 532 }
487 533 }
488 534 (flags, size, mtime)
489 535 }
490 536 }
491 537
492 538 fn read_hg_path(
493 539 on_disk: &[u8],
494 540 slice: PathSlice,
495 541 ) -> Result<&HgPath, DirstateV2ParseError> {
496 542 read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
497 543 }
498 544
499 545 fn read_nodes(
500 546 on_disk: &[u8],
501 547 slice: ChildNodes,
502 548 ) -> Result<&[Node], DirstateV2ParseError> {
503 549 read_slice(on_disk, slice.start, slice.len.get())
504 550 }
505 551
506 552 fn read_slice<T, Len>(
507 553 on_disk: &[u8],
508 554 start: Offset,
509 555 len: Len,
510 556 ) -> Result<&[T], DirstateV2ParseError>
511 557 where
512 558 T: BytesCast,
513 559 Len: TryInto<usize>,
514 560 {
515 561 // Either `usize::MAX` would result in "out of bounds" error since a single
516 562 // `&[u8]` cannot occupy the entire addess space.
517 563 let start = start.get().try_into().unwrap_or(std::usize::MAX);
518 564 let len = len.try_into().unwrap_or(std::usize::MAX);
519 565 on_disk
520 566 .get(start..)
521 567 .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
522 568 .map(|(slice, _rest)| slice)
523 569 .ok_or_else(|| DirstateV2ParseError)
524 570 }
525 571
526 572 pub(crate) fn for_each_tracked_path<'on_disk>(
527 573 on_disk: &'on_disk [u8],
528 574 metadata: &[u8],
529 575 mut f: impl FnMut(&'on_disk HgPath),
530 576 ) -> Result<(), DirstateV2ParseError> {
531 577 let (meta, _) = TreeMetadata::from_bytes(metadata)
532 578 .map_err(|_| DirstateV2ParseError)?;
533 579 fn recur<'on_disk>(
534 580 on_disk: &'on_disk [u8],
535 581 nodes: ChildNodes,
536 582 f: &mut impl FnMut(&'on_disk HgPath),
537 583 ) -> Result<(), DirstateV2ParseError> {
538 584 for node in read_nodes(on_disk, nodes)? {
539 585 if let Some(entry) = node.entry()? {
540 586 if entry.state().is_tracked() {
541 587 f(node.full_path(on_disk)?)
542 588 }
543 589 }
544 590 recur(on_disk, node.children, f)?
545 591 }
546 592 Ok(())
547 593 }
548 594 recur(on_disk, meta.root_nodes, &mut f)
549 595 }
550 596
551 597 /// Returns new data and metadata, together with whether that data should be
552 598 /// appended to the existing data file whose content is at
553 599 /// `dirstate_map.on_disk` (true), instead of written to a new data file
554 600 /// (false).
555 601 pub(super) fn write(
556 602 dirstate_map: &DirstateMap,
557 603 can_append: bool,
558 ) -> Result<(Vec<u8>, Vec<u8>, bool), DirstateError> {
604 ) -> Result<(Vec<u8>, TreeMetadata, bool), DirstateError> {
559 605 let append = can_append && dirstate_map.write_should_append();
560 606
561 607 // This ignores the space for paths, and for nodes without an entry.
562 608 // TODO: better estimate? Skip the `Vec` and write to a file directly?
563 609 let size_guess = std::mem::size_of::<Node>()
564 610 * dirstate_map.nodes_with_entry_count as usize;
565 611
566 612 let mut writer = Writer {
567 613 dirstate_map,
568 614 append,
569 615 out: Vec::with_capacity(size_guess),
570 616 };
571 617
572 618 let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;
573 619
574 620 let meta = TreeMetadata {
575 621 root_nodes,
576 622 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
577 623 nodes_with_copy_source_count: dirstate_map
578 624 .nodes_with_copy_source_count
579 625 .into(),
580 626 unreachable_bytes: dirstate_map.unreachable_bytes.into(),
581 627 unused: [0; 4],
582 628 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
583 629 };
584 Ok((writer.out, meta.as_bytes().to_vec(), append))
630 Ok((writer.out, meta, append))
585 631 }
586 632
587 633 struct Writer<'dmap, 'on_disk> {
588 634 dirstate_map: &'dmap DirstateMap<'on_disk>,
589 635 append: bool,
590 636 out: Vec<u8>,
591 637 }
592 638
593 639 impl Writer<'_, '_> {
594 640 fn write_nodes(
595 641 &mut self,
596 642 nodes: dirstate_map::ChildNodesRef,
597 643 ) -> Result<ChildNodes, DirstateError> {
598 644 // Reuse already-written nodes if possible
599 645 if self.append {
600 646 if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {
601 647 let start = self.on_disk_offset_of(nodes_slice).expect(
602 648 "dirstate-v2 OnDisk nodes not found within on_disk",
603 649 );
604 650 let len = child_nodes_len_from_usize(nodes_slice.len());
605 651 return Ok(ChildNodes { start, len });
606 652 }
607 653 }
608 654
609 655 // `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has
610 656 // undefined iteration order. Sort to enable binary search in the
611 657 // written file.
612 658 let nodes = nodes.sorted();
613 659 let nodes_len = nodes.len();
614 660
615 661 // First accumulate serialized nodes in a `Vec`
616 662 let mut on_disk_nodes = Vec::with_capacity(nodes_len);
617 663 for node in nodes {
618 664 let children =
619 665 self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;
620 666 let full_path = node.full_path(self.dirstate_map.on_disk)?;
621 667 let full_path = self.write_path(full_path.as_bytes());
622 668 let copy_source = if let Some(source) =
623 669 node.copy_source(self.dirstate_map.on_disk)?
624 670 {
625 671 self.write_path(source.as_bytes())
626 672 } else {
627 673 PathSlice {
628 674 start: 0.into(),
629 675 len: 0.into(),
630 676 }
631 677 };
632 678 on_disk_nodes.push(match node {
633 679 NodeRef::InMemory(path, node) => {
634 680 let (flags, size, mtime) = match &node.data {
635 681 dirstate_map::NodeData::Entry(entry) => {
636 682 Node::from_dirstate_entry(entry)
637 683 }
638 684 dirstate_map::NodeData::CachedDirectory { mtime } => (
639 685 // we currently never set a mtime if unknown file
640 686 // are present.
641 687 // So if we have a mtime for a directory, we know
642 688 // they are no unknown
643 689 // files and we
644 690 // blindly set ALL_UNKNOWN_RECORDED.
645 691 //
646 692 // We never set ALL_IGNORED_RECORDED since we
647 693 // don't track that case
648 694 // currently.
649 695 Flags::DIRECTORY
650 696 | Flags::HAS_MTIME
651 697 | Flags::ALL_UNKNOWN_RECORDED,
652 698 0.into(),
653 699 (*mtime).into(),
654 700 ),
655 701 dirstate_map::NodeData::None => (
656 702 Flags::DIRECTORY,
657 703 0.into(),
658 704 PackedTruncatedTimestamp::null(),
659 705 ),
660 706 };
661 707 Node {
662 708 children,
663 709 copy_source,
664 710 full_path,
665 711 base_name_start: u16::try_from(path.base_name_start())
666 712 // Could only panic for paths over 64 KiB
667 713 .expect("dirstate-v2 path length overflow")
668 714 .into(),
669 715 descendants_with_entry_count: node
670 716 .descendants_with_entry_count
671 717 .into(),
672 718 tracked_descendants_count: node
673 719 .tracked_descendants_count
674 720 .into(),
675 721 flags: flags.bits().into(),
676 722 size,
677 723 mtime,
678 724 }
679 725 }
680 726 NodeRef::OnDisk(node) => Node {
681 727 children,
682 728 copy_source,
683 729 full_path,
684 730 ..*node
685 731 },
686 732 })
687 733 }
688 734 // … so we can write them contiguously, after writing everything else
689 735 // they refer to.
690 736 let start = self.current_offset();
691 737 let len = child_nodes_len_from_usize(nodes_len);
692 738 self.out.extend(on_disk_nodes.as_bytes());
693 739 Ok(ChildNodes { start, len })
694 740 }
695 741
696 742 /// If the given slice of items is within `on_disk`, returns its offset
697 743 /// from the start of `on_disk`.
698 744 fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>
699 745 where
700 746 T: BytesCast,
701 747 {
702 748 fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {
703 749 let start = slice.as_ptr() as usize;
704 750 let end = start + slice.len();
705 751 start..=end
706 752 }
707 753 let slice_addresses = address_range(slice.as_bytes());
708 754 let on_disk_addresses = address_range(self.dirstate_map.on_disk);
709 755 if on_disk_addresses.contains(slice_addresses.start())
710 756 && on_disk_addresses.contains(slice_addresses.end())
711 757 {
712 758 let offset = slice_addresses.start() - on_disk_addresses.start();
713 759 Some(offset_from_usize(offset))
714 760 } else {
715 761 None
716 762 }
717 763 }
718 764
719 765 fn current_offset(&mut self) -> Offset {
720 766 let mut offset = self.out.len();
721 767 if self.append {
722 768 offset += self.dirstate_map.on_disk.len()
723 769 }
724 770 offset_from_usize(offset)
725 771 }
726 772
727 773 fn write_path(&mut self, slice: &[u8]) -> PathSlice {
728 774 let len = path_len_from_usize(slice.len());
729 775 // Reuse an already-written path if possible
730 776 if self.append {
731 777 if let Some(start) = self.on_disk_offset_of(slice) {
732 778 return PathSlice { start, len };
733 779 }
734 780 }
735 781 let start = self.current_offset();
736 782 self.out.extend(slice.as_bytes());
737 783 PathSlice { start, len }
738 784 }
739 785 }
740 786
741 787 fn offset_from_usize(x: usize) -> Offset {
742 788 u32::try_from(x)
743 789 // Could only panic for a dirstate file larger than 4 GiB
744 790 .expect("dirstate-v2 offset overflow")
745 791 .into()
746 792 }
747 793
748 794 fn child_nodes_len_from_usize(x: usize) -> Size {
749 795 u32::try_from(x)
750 796 // Could only panic with over 4 billion nodes
751 797 .expect("dirstate-v2 slice length overflow")
752 798 .into()
753 799 }
754 800
755 801 fn path_len_from_usize(x: usize) -> PathSize {
756 802 u16::try_from(x)
757 803 // Could only panic for paths over 64 KiB
758 804 .expect("dirstate-v2 path length overflow")
759 805 .into()
760 806 }
761 807
762 808 impl From<TruncatedTimestamp> for PackedTruncatedTimestamp {
763 809 fn from(timestamp: TruncatedTimestamp) -> Self {
764 810 Self {
765 811 truncated_seconds: timestamp.truncated_seconds().into(),
766 812 nanoseconds: timestamp.nanoseconds().into(),
767 813 }
768 814 }
769 815 }
770 816
771 817 impl TryFrom<PackedTruncatedTimestamp> for TruncatedTimestamp {
772 818 type Error = DirstateV2ParseError;
773 819
774 820 fn try_from(
775 821 timestamp: PackedTruncatedTimestamp,
776 822 ) -> Result<Self, Self::Error> {
777 823 Self::from_already_truncated(
778 824 timestamp.truncated_seconds.get(),
779 825 timestamp.nanoseconds.get(),
780 826 false,
781 827 )
782 828 }
783 829 }
784 830 impl PackedTruncatedTimestamp {
785 831 fn null() -> Self {
786 832 Self {
787 833 truncated_seconds: 0.into(),
788 834 nanoseconds: 0.into(),
789 835 }
790 836 }
791 837 }
@@ -1,464 +1,532 b''
1 1 use crate::changelog::Changelog;
2 2 use crate::config::{Config, ConfigError, ConfigParseError};
3 3 use crate::dirstate::DirstateParents;
4 4 use crate::dirstate_tree::dirstate_map::DirstateMap;
5 use crate::dirstate_tree::on_disk::Docket as DirstateDocket;
5 6 use crate::dirstate_tree::owning::OwningDirstateMap;
6 use crate::errors::HgError;
7 7 use crate::errors::HgResultExt;
8 use crate::errors::{HgError, IoResultExt};
8 9 use crate::exit_codes;
9 10 use crate::lock::{try_with_lock_no_wait, LockError};
10 11 use crate::manifest::{Manifest, Manifestlog};
11 12 use crate::revlog::filelog::Filelog;
12 13 use crate::revlog::revlog::RevlogError;
13 14 use crate::utils::files::get_path_from_bytes;
14 15 use crate::utils::hg_path::HgPath;
15 16 use crate::utils::SliceExt;
16 17 use crate::vfs::{is_dir, is_file, Vfs};
17 18 use crate::{requirements, NodePrefix};
18 19 use crate::{DirstateError, Revision};
19 20 use std::cell::{Ref, RefCell, RefMut};
20 21 use std::collections::HashSet;
22 use std::io::Seek;
23 use std::io::SeekFrom;
24 use std::io::Write as IoWrite;
21 25 use std::path::{Path, PathBuf};
22 26
23 27 /// A repository on disk
24 28 pub struct Repo {
25 29 working_directory: PathBuf,
26 30 dot_hg: PathBuf,
27 31 store: PathBuf,
28 32 requirements: HashSet<String>,
29 33 config: Config,
30 34 dirstate_parents: LazyCell<DirstateParents, HgError>,
31 35 dirstate_data_file_uuid: LazyCell<Option<Vec<u8>>, HgError>,
32 36 dirstate_map: LazyCell<OwningDirstateMap, DirstateError>,
33 37 changelog: LazyCell<Changelog, HgError>,
34 38 manifestlog: LazyCell<Manifestlog, HgError>,
35 39 }
36 40
37 41 #[derive(Debug, derive_more::From)]
38 42 pub enum RepoError {
39 43 NotFound {
40 44 at: PathBuf,
41 45 },
42 46 #[from]
43 47 ConfigParseError(ConfigParseError),
44 48 #[from]
45 49 Other(HgError),
46 50 }
47 51
48 52 impl From<ConfigError> for RepoError {
49 53 fn from(error: ConfigError) -> Self {
50 54 match error {
51 55 ConfigError::Parse(error) => error.into(),
52 56 ConfigError::Other(error) => error.into(),
53 57 }
54 58 }
55 59 }
56 60
57 61 impl Repo {
58 62 /// tries to find nearest repository root in current working directory or
59 63 /// its ancestors
60 64 pub fn find_repo_root() -> Result<PathBuf, RepoError> {
61 65 let current_directory = crate::utils::current_dir()?;
62 66 // ancestors() is inclusive: it first yields `current_directory`
63 67 // as-is.
64 68 for ancestor in current_directory.ancestors() {
65 69 if is_dir(ancestor.join(".hg"))? {
66 70 return Ok(ancestor.to_path_buf());
67 71 }
68 72 }
69 73 return Err(RepoError::NotFound {
70 74 at: current_directory,
71 75 });
72 76 }
73 77
74 78 /// Find a repository, either at the given path (which must contain a `.hg`
75 79 /// sub-directory) or by searching the current directory and its
76 80 /// ancestors.
77 81 ///
78 82 /// A method with two very different "modes" like this usually a code smell
79 83 /// to make two methods instead, but in this case an `Option` is what rhg
80 84 /// sub-commands get from Clap for the `-R` / `--repository` CLI argument.
81 85 /// Having two methods would just move that `if` to almost all callers.
82 86 pub fn find(
83 87 config: &Config,
84 88 explicit_path: Option<PathBuf>,
85 89 ) -> Result<Self, RepoError> {
86 90 if let Some(root) = explicit_path {
87 91 if is_dir(root.join(".hg"))? {
88 92 Self::new_at_path(root.to_owned(), config)
89 93 } else if is_file(&root)? {
90 94 Err(HgError::unsupported("bundle repository").into())
91 95 } else {
92 96 Err(RepoError::NotFound {
93 97 at: root.to_owned(),
94 98 })
95 99 }
96 100 } else {
97 101 let root = Self::find_repo_root()?;
98 102 Self::new_at_path(root, config)
99 103 }
100 104 }
101 105
102 106 /// To be called after checking that `.hg` is a sub-directory
103 107 fn new_at_path(
104 108 working_directory: PathBuf,
105 109 config: &Config,
106 110 ) -> Result<Self, RepoError> {
107 111 let dot_hg = working_directory.join(".hg");
108 112
109 113 let mut repo_config_files = Vec::new();
110 114 repo_config_files.push(dot_hg.join("hgrc"));
111 115 repo_config_files.push(dot_hg.join("hgrc-not-shared"));
112 116
113 117 let hg_vfs = Vfs { base: &dot_hg };
114 118 let mut reqs = requirements::load_if_exists(hg_vfs)?;
115 119 let relative =
116 120 reqs.contains(requirements::RELATIVE_SHARED_REQUIREMENT);
117 121 let shared =
118 122 reqs.contains(requirements::SHARED_REQUIREMENT) || relative;
119 123
120 124 // From `mercurial/localrepo.py`:
121 125 //
122 126 // if .hg/requires contains the sharesafe requirement, it means
123 127 // there exists a `.hg/store/requires` too and we should read it
124 128 // NOTE: presence of SHARESAFE_REQUIREMENT imply that store requirement
125 129 // is present. We never write SHARESAFE_REQUIREMENT for a repo if store
126 130 // is not present, refer checkrequirementscompat() for that
127 131 //
128 132 // However, if SHARESAFE_REQUIREMENT is not present, it means that the
129 133 // repository was shared the old way. We check the share source
130 134 // .hg/requires for SHARESAFE_REQUIREMENT to detect whether the
131 135 // current repository needs to be reshared
132 136 let share_safe = reqs.contains(requirements::SHARESAFE_REQUIREMENT);
133 137
134 138 let store_path;
135 139 if !shared {
136 140 store_path = dot_hg.join("store");
137 141 } else {
138 142 let bytes = hg_vfs.read("sharedpath")?;
139 143 let mut shared_path =
140 144 get_path_from_bytes(bytes.trim_end_matches(|b| b == b'\n'))
141 145 .to_owned();
142 146 if relative {
143 147 shared_path = dot_hg.join(shared_path)
144 148 }
145 149 if !is_dir(&shared_path)? {
146 150 return Err(HgError::corrupted(format!(
147 151 ".hg/sharedpath points to nonexistent directory {}",
148 152 shared_path.display()
149 153 ))
150 154 .into());
151 155 }
152 156
153 157 store_path = shared_path.join("store");
154 158
155 159 let source_is_share_safe =
156 160 requirements::load(Vfs { base: &shared_path })?
157 161 .contains(requirements::SHARESAFE_REQUIREMENT);
158 162
159 163 if share_safe && !source_is_share_safe {
160 164 return Err(match config
161 165 .get(b"share", b"safe-mismatch.source-not-safe")
162 166 {
163 167 Some(b"abort") | None => HgError::abort(
164 168 "abort: share source does not support share-safe requirement\n\
165 169 (see `hg help config.format.use-share-safe` for more information)",
166 170 exit_codes::ABORT,
167 171 ),
168 172 _ => HgError::unsupported("share-safe downgrade"),
169 173 }
170 174 .into());
171 175 } else if source_is_share_safe && !share_safe {
172 176 return Err(
173 177 match config.get(b"share", b"safe-mismatch.source-safe") {
174 178 Some(b"abort") | None => HgError::abort(
175 179 "abort: version mismatch: source uses share-safe \
176 180 functionality while the current share does not\n\
177 181 (see `hg help config.format.use-share-safe` for more information)",
178 182 exit_codes::ABORT,
179 183 ),
180 184 _ => HgError::unsupported("share-safe upgrade"),
181 185 }
182 186 .into(),
183 187 );
184 188 }
185 189
186 190 if share_safe {
187 191 repo_config_files.insert(0, shared_path.join("hgrc"))
188 192 }
189 193 }
190 194 if share_safe {
191 195 reqs.extend(requirements::load(Vfs { base: &store_path })?);
192 196 }
193 197
194 198 let repo_config = if std::env::var_os("HGRCSKIPREPO").is_none() {
195 199 config.combine_with_repo(&repo_config_files)?
196 200 } else {
197 201 config.clone()
198 202 };
199 203
200 204 let repo = Self {
201 205 requirements: reqs,
202 206 working_directory,
203 207 store: store_path,
204 208 dot_hg,
205 209 config: repo_config,
206 210 dirstate_parents: LazyCell::new(Self::read_dirstate_parents),
207 211 dirstate_data_file_uuid: LazyCell::new(
208 212 Self::read_dirstate_data_file_uuid,
209 213 ),
210 214 dirstate_map: LazyCell::new(Self::new_dirstate_map),
211 215 changelog: LazyCell::new(Changelog::open),
212 216 manifestlog: LazyCell::new(Manifestlog::open),
213 217 };
214 218
215 219 requirements::check(&repo)?;
216 220
217 221 Ok(repo)
218 222 }
219 223
220 224 pub fn working_directory_path(&self) -> &Path {
221 225 &self.working_directory
222 226 }
223 227
224 228 pub fn requirements(&self) -> &HashSet<String> {
225 229 &self.requirements
226 230 }
227 231
228 232 pub fn config(&self) -> &Config {
229 233 &self.config
230 234 }
231 235
232 236 /// For accessing repository files (in `.hg`), except for the store
233 237 /// (`.hg/store`).
234 238 pub fn hg_vfs(&self) -> Vfs<'_> {
235 239 Vfs { base: &self.dot_hg }
236 240 }
237 241
238 242 /// For accessing repository store files (in `.hg/store`)
239 243 pub fn store_vfs(&self) -> Vfs<'_> {
240 244 Vfs { base: &self.store }
241 245 }
242 246
243 247 /// For accessing the working copy
244 248 pub fn working_directory_vfs(&self) -> Vfs<'_> {
245 249 Vfs {
246 250 base: &self.working_directory,
247 251 }
248 252 }
249 253
250 254 pub fn try_with_wlock_no_wait<R>(
251 255 &self,
252 256 f: impl FnOnce() -> R,
253 257 ) -> Result<R, LockError> {
254 258 try_with_lock_no_wait(self.hg_vfs(), "wlock", f)
255 259 }
256 260
257 261 pub fn has_dirstate_v2(&self) -> bool {
258 262 self.requirements
259 263 .contains(requirements::DIRSTATE_V2_REQUIREMENT)
260 264 }
261 265
262 266 pub fn has_sparse(&self) -> bool {
263 267 self.requirements.contains(requirements::SPARSE_REQUIREMENT)
264 268 }
265 269
266 270 pub fn has_narrow(&self) -> bool {
267 271 self.requirements.contains(requirements::NARROW_REQUIREMENT)
268 272 }
269 273
270 274 fn dirstate_file_contents(&self) -> Result<Vec<u8>, HgError> {
271 275 Ok(self
272 276 .hg_vfs()
273 277 .read("dirstate")
274 278 .io_not_found_as_none()?
275 279 .unwrap_or(Vec::new()))
276 280 }
277 281
278 282 pub fn dirstate_parents(&self) -> Result<DirstateParents, HgError> {
279 283 Ok(*self.dirstate_parents.get_or_init(self)?)
280 284 }
281 285
282 286 fn read_dirstate_parents(&self) -> Result<DirstateParents, HgError> {
283 287 let dirstate = self.dirstate_file_contents()?;
284 288 let parents = if dirstate.is_empty() {
285 289 if self.has_dirstate_v2() {
286 290 self.dirstate_data_file_uuid.set(None);
287 291 }
288 292 DirstateParents::NULL
289 293 } else if self.has_dirstate_v2() {
290 294 let docket =
291 295 crate::dirstate_tree::on_disk::read_docket(&dirstate)?;
292 296 self.dirstate_data_file_uuid
293 297 .set(Some(docket.uuid.to_owned()));
294 298 docket.parents()
295 299 } else {
296 300 crate::dirstate::parsers::parse_dirstate_parents(&dirstate)?
297 301 .clone()
298 302 };
299 303 self.dirstate_parents.set(parents);
300 304 Ok(parents)
301 305 }
302 306
303 307 fn read_dirstate_data_file_uuid(
304 308 &self,
305 309 ) -> Result<Option<Vec<u8>>, HgError> {
306 310 assert!(
307 311 self.has_dirstate_v2(),
308 312 "accessing dirstate data file ID without dirstate-v2"
309 313 );
310 314 let dirstate = self.dirstate_file_contents()?;
311 315 if dirstate.is_empty() {
312 316 self.dirstate_parents.set(DirstateParents::NULL);
313 317 Ok(None)
314 318 } else {
315 319 let docket =
316 320 crate::dirstate_tree::on_disk::read_docket(&dirstate)?;
317 321 self.dirstate_parents.set(docket.parents());
318 322 Ok(Some(docket.uuid.to_owned()))
319 323 }
320 324 }
321 325
322 326 fn new_dirstate_map(&self) -> Result<OwningDirstateMap, DirstateError> {
323 327 let dirstate_file_contents = self.dirstate_file_contents()?;
324 328 if dirstate_file_contents.is_empty() {
325 329 self.dirstate_parents.set(DirstateParents::NULL);
326 330 if self.has_dirstate_v2() {
327 331 self.dirstate_data_file_uuid.set(None);
328 332 }
329 333 Ok(OwningDirstateMap::new_empty(Vec::new()))
330 334 } else if self.has_dirstate_v2() {
331 335 let docket = crate::dirstate_tree::on_disk::read_docket(
332 336 &dirstate_file_contents,
333 337 )?;
334 338 self.dirstate_parents.set(docket.parents());
335 339 self.dirstate_data_file_uuid
336 340 .set(Some(docket.uuid.to_owned()));
337 341 let data_size = docket.data_size();
338 342 let metadata = docket.tree_metadata();
339 343 let mut map = if let Some(data_mmap) = self
340 344 .hg_vfs()
341 345 .mmap_open(docket.data_filename())
342 346 .io_not_found_as_none()?
343 347 {
344 348 OwningDirstateMap::new_empty(data_mmap)
345 349 } else {
346 350 OwningDirstateMap::new_empty(Vec::new())
347 351 };
348 352 let (on_disk, placeholder) = map.get_pair_mut();
349 353 *placeholder = DirstateMap::new_v2(on_disk, data_size, metadata)?;
350 354 Ok(map)
351 355 } else {
352 356 let mut map = OwningDirstateMap::new_empty(dirstate_file_contents);
353 357 let (on_disk, placeholder) = map.get_pair_mut();
354 358 let (inner, parents) = DirstateMap::new_v1(on_disk)?;
355 359 self.dirstate_parents
356 360 .set(parents.unwrap_or(DirstateParents::NULL));
357 361 *placeholder = inner;
358 362 Ok(map)
359 363 }
360 364 }
361 365
362 366 pub fn dirstate_map(
363 367 &self,
364 368 ) -> Result<Ref<OwningDirstateMap>, DirstateError> {
365 369 self.dirstate_map.get_or_init(self)
366 370 }
367 371
368 372 pub fn dirstate_map_mut(
369 373 &self,
370 374 ) -> Result<RefMut<OwningDirstateMap>, DirstateError> {
371 375 self.dirstate_map.get_mut_or_init(self)
372 376 }
373 377
374 378 pub fn changelog(&self) -> Result<Ref<Changelog>, HgError> {
375 379 self.changelog.get_or_init(self)
376 380 }
377 381
378 382 pub fn changelog_mut(&self) -> Result<RefMut<Changelog>, HgError> {
379 383 self.changelog.get_mut_or_init(self)
380 384 }
381 385
382 386 pub fn manifestlog(&self) -> Result<Ref<Manifestlog>, HgError> {
383 387 self.manifestlog.get_or_init(self)
384 388 }
385 389
386 390 pub fn manifestlog_mut(&self) -> Result<RefMut<Manifestlog>, HgError> {
387 391 self.manifestlog.get_mut_or_init(self)
388 392 }
389 393
390 394 /// Returns the manifest of the *changeset* with the given node ID
391 395 pub fn manifest_for_node(
392 396 &self,
393 397 node: impl Into<NodePrefix>,
394 398 ) -> Result<Manifest, RevlogError> {
395 399 self.manifestlog()?.data_for_node(
396 400 self.changelog()?
397 401 .data_for_node(node.into())?
398 402 .manifest_node()?
399 403 .into(),
400 404 )
401 405 }
402 406
403 407 /// Returns the manifest of the *changeset* with the given revision number
404 408 pub fn manifest_for_rev(
405 409 &self,
406 410 revision: Revision,
407 411 ) -> Result<Manifest, RevlogError> {
408 412 self.manifestlog()?.data_for_node(
409 413 self.changelog()?
410 414 .data_for_rev(revision)?
411 415 .manifest_node()?
412 416 .into(),
413 417 )
414 418 }
415 419
416 420 pub fn filelog(&self, path: &HgPath) -> Result<Filelog, HgError> {
417 421 Filelog::open(self, path)
418 422 }
423
424 /// Write to disk any updates that were made through `dirstate_map_mut`.
425 ///
426 /// The "wlock" must be held while calling this.
427 /// See for example `try_with_wlock_no_wait`.
428 ///
429 /// TODO: have a `WritableRepo` type only accessible while holding the
430 /// lock?
431 pub fn write_dirstate(&self) -> Result<(), DirstateError> {
432 let map = self.dirstate_map()?;
433 // TODO: Maintain a `DirstateMap::dirty` flag, and return early here if
434 // it’s unset
435 let parents = self.dirstate_parents()?;
436 let packed_dirstate = if self.has_dirstate_v2() {
437 let uuid = self.dirstate_data_file_uuid.get_or_init(self)?;
438 let mut uuid = uuid.as_ref();
439 let can_append = uuid.is_some();
440 let (data, tree_metadata, append) = map.pack_v2(can_append)?;
441 if !append {
442 uuid = None
443 }
444 let uuid = if let Some(uuid) = uuid {
445 std::str::from_utf8(uuid)
446 .map_err(|_| {
447 HgError::corrupted("non-UTF-8 dirstate data file ID")
448 })?
449 .to_owned()
450 } else {
451 DirstateDocket::new_uid()
452 };
453 let data_filename = format!("dirstate.{}", uuid);
454 let data_filename = self.hg_vfs().join(data_filename);
455 let mut options = std::fs::OpenOptions::new();
456 if append {
457 options.append(true);
458 } else {
459 options.write(true).create_new(true);
460 }
461 let data_size = (|| {
462 // TODO: loop and try another random ID if !append and this
463 // returns `ErrorKind::AlreadyExists`? Collision chance of two
464 // random IDs is one in 2**32
465 let mut file = options.open(&data_filename)?;
466 file.write_all(&data)?;
467 file.flush()?;
468 // TODO: use https://doc.rust-lang.org/std/io/trait.Seek.html#method.stream_position when we require Rust 1.51+
469 file.seek(SeekFrom::Current(0))
470 })()
471 .when_writing_file(&data_filename)?;
472 DirstateDocket::serialize(
473 parents,
474 tree_metadata,
475 data_size,
476 uuid.as_bytes(),
477 )
478 .map_err(|_: std::num::TryFromIntError| {
479 HgError::corrupted("overflow in dirstate docket serialization")
480 })?
481 } else {
482 map.pack_v1(parents)?
483 };
484 self.hg_vfs().atomic_write("dirstate", &packed_dirstate)?;
485 Ok(())
486 }
419 487 }
420 488
421 489 /// Lazily-initialized component of `Repo` with interior mutability
422 490 ///
423 491 /// This differs from `OnceCell` in that the value can still be "deinitialized"
424 492 /// later by setting its inner `Option` to `None`.
425 493 struct LazyCell<T, E> {
426 494 value: RefCell<Option<T>>,
427 495 // `Fn`s that don’t capture environment are zero-size, so this box does
428 496 // not allocate:
429 497 init: Box<dyn Fn(&Repo) -> Result<T, E>>,
430 498 }
431 499
432 500 impl<T, E> LazyCell<T, E> {
433 501 fn new(init: impl Fn(&Repo) -> Result<T, E> + 'static) -> Self {
434 502 Self {
435 503 value: RefCell::new(None),
436 504 init: Box::new(init),
437 505 }
438 506 }
439 507
440 508 fn set(&self, value: T) {
441 509 *self.value.borrow_mut() = Some(value)
442 510 }
443 511
444 512 fn get_or_init(&self, repo: &Repo) -> Result<Ref<T>, E> {
445 513 let mut borrowed = self.value.borrow();
446 514 if borrowed.is_none() {
447 515 drop(borrowed);
448 516 // Only use `borrow_mut` if it is really needed to avoid panic in
449 517 // case there is another outstanding borrow but mutation is not
450 518 // needed.
451 519 *self.value.borrow_mut() = Some((self.init)(repo)?);
452 520 borrowed = self.value.borrow()
453 521 }
454 522 Ok(Ref::map(borrowed, |option| option.as_ref().unwrap()))
455 523 }
456 524
457 525 fn get_mut_or_init(&self, repo: &Repo) -> Result<RefMut<T>, E> {
458 526 let mut borrowed = self.value.borrow_mut();
459 527 if borrowed.is_none() {
460 528 *borrowed = Some((self.init)(repo)?);
461 529 }
462 530 Ok(RefMut::map(borrowed, |option| option.as_mut().unwrap()))
463 531 }
464 532 }
@@ -1,415 +1,421 b''
1 1 // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
2 2 //
3 3 // This software may be used and distributed according to the terms of the
4 4 // GNU General Public License version 2 or any later version.
5 5
6 6 //! Definitions and utilities for Revision nodes
7 7 //!
8 8 //! In Mercurial code base, it is customary to call "a node" the binary SHA
9 9 //! of a revision.
10 10
11 11 use crate::errors::HgError;
12 12 use bytes_cast::BytesCast;
13 13 use std::convert::{TryFrom, TryInto};
14 14 use std::fmt;
15 15
16 16 /// The length in bytes of a `Node`
17 17 ///
18 18 /// This constant is meant to ease refactors of this module, and
19 19 /// are private so that calling code does not expect all nodes have
20 20 /// the same size, should we support several formats concurrently in
21 21 /// the future.
22 22 pub const NODE_BYTES_LENGTH: usize = 20;
23 23
24 24 /// Id of the null node.
25 25 ///
26 26 /// Used to indicate the absence of node.
27 27 pub const NULL_NODE_ID: [u8; NODE_BYTES_LENGTH] = [0u8; NODE_BYTES_LENGTH];
28 28
29 29 /// The length in bytes of a `Node`
30 30 ///
31 31 /// see also `NODES_BYTES_LENGTH` about it being private.
32 32 const NODE_NYBBLES_LENGTH: usize = 2 * NODE_BYTES_LENGTH;
33 33
34 34 /// Default for UI presentation
35 35 const SHORT_PREFIX_DEFAULT_NYBBLES_LENGTH: u8 = 12;
36 36
37 37 /// Private alias for readability and to ease future change
38 38 type NodeData = [u8; NODE_BYTES_LENGTH];
39 39
40 40 /// Binary revision SHA
41 41 ///
42 42 /// ## Future changes of hash size
43 43 ///
44 44 /// To accomodate future changes of hash size, Rust callers
45 45 /// should use the conversion methods at the boundaries (FFI, actual
46 46 /// computation of hashes and I/O) only, and only if required.
47 47 ///
48 48 /// All other callers outside of unit tests should just handle `Node` values
49 49 /// and never make any assumption on the actual length, using [`nybbles_len`]
50 50 /// if they need a loop boundary.
51 51 ///
52 52 /// All methods that create a `Node` either take a type that enforces
53 53 /// the size or return an error at runtime.
54 54 ///
55 55 /// [`nybbles_len`]: #method.nybbles_len
56 56 #[derive(Copy, Clone, Debug, PartialEq, BytesCast, derive_more::From)]
57 57 #[repr(transparent)]
58 58 pub struct Node {
59 59 data: NodeData,
60 60 }
61 61
62 62 /// The node value for NULL_REVISION
63 63 pub const NULL_NODE: Node = Node {
64 64 data: [0; NODE_BYTES_LENGTH],
65 65 };
66 66
67 67 /// Return an error if the slice has an unexpected length
68 68 impl<'a> TryFrom<&'a [u8]> for &'a Node {
69 69 type Error = ();
70 70
71 71 #[inline]
72 72 fn try_from(bytes: &'a [u8]) -> Result<Self, Self::Error> {
73 73 match Node::from_bytes(bytes) {
74 74 Ok((node, rest)) if rest.is_empty() => Ok(node),
75 75 _ => Err(()),
76 76 }
77 77 }
78 78 }
79 79
80 80 /// Return an error if the slice has an unexpected length
81 81 impl TryFrom<&'_ [u8]> for Node {
82 82 type Error = std::array::TryFromSliceError;
83 83
84 84 #[inline]
85 85 fn try_from(bytes: &'_ [u8]) -> Result<Self, Self::Error> {
86 86 let data = bytes.try_into()?;
87 87 Ok(Self { data })
88 88 }
89 89 }
90 90
91 91 impl From<&'_ NodeData> for Node {
92 92 #[inline]
93 93 fn from(data: &'_ NodeData) -> Self {
94 94 Self { data: *data }
95 95 }
96 96 }
97 97
98 98 impl fmt::LowerHex for Node {
99 99 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
100 100 for &byte in &self.data {
101 101 write!(f, "{:02x}", byte)?
102 102 }
103 103 Ok(())
104 104 }
105 105 }
106 106
107 107 #[derive(Debug)]
108 108 pub struct FromHexError;
109 109
110 110 /// Low level utility function, also for prefixes
111 111 fn get_nybble(s: &[u8], i: usize) -> u8 {
112 112 if i % 2 == 0 {
113 113 s[i / 2] >> 4
114 114 } else {
115 115 s[i / 2] & 0x0f
116 116 }
117 117 }
118 118
119 119 impl Node {
120 120 /// Retrieve the `i`th half-byte of the binary data.
121 121 ///
122 122 /// This is also the `i`th hexadecimal digit in numeric form,
123 123 /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
124 124 pub fn get_nybble(&self, i: usize) -> u8 {
125 125 get_nybble(&self.data, i)
126 126 }
127 127
128 128 /// Length of the data, in nybbles
129 129 pub fn nybbles_len(&self) -> usize {
130 130 // public exposure as an instance method only, so that we can
131 131 // easily support several sizes of hashes if needed in the future.
132 132 NODE_NYBBLES_LENGTH
133 133 }
134 134
135 135 /// Convert from hexadecimal string representation
136 136 ///
137 137 /// Exact length is required.
138 138 ///
139 139 /// To be used in FFI and I/O only, in order to facilitate future
140 140 /// changes of hash format.
141 141 pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Node, FromHexError> {
142 142 let prefix = NodePrefix::from_hex(hex)?;
143 143 if prefix.nybbles_len() == NODE_NYBBLES_LENGTH {
144 144 Ok(Self { data: prefix.data })
145 145 } else {
146 146 Err(FromHexError)
147 147 }
148 148 }
149 149
150 150 /// `from_hex`, but for input from an internal file of the repository such
151 151 /// as a changelog or manifest entry.
152 152 ///
153 153 /// An error is treated as repository corruption.
154 154 pub fn from_hex_for_repo(hex: impl AsRef<[u8]>) -> Result<Node, HgError> {
155 155 Self::from_hex(hex.as_ref()).map_err(|FromHexError| {
156 156 HgError::CorruptedRepository(format!(
157 157 "Expected a full hexadecimal node ID, found {}",
158 158 String::from_utf8_lossy(hex.as_ref())
159 159 ))
160 160 })
161 161 }
162 162
163 163 /// Provide access to binary data
164 164 ///
165 165 /// This is needed by FFI layers, for instance to return expected
166 166 /// binary values to Python.
167 167 pub fn as_bytes(&self) -> &[u8] {
168 168 &self.data
169 169 }
170 170
171 171 pub fn short(&self) -> NodePrefix {
172 172 NodePrefix {
173 173 nybbles_len: SHORT_PREFIX_DEFAULT_NYBBLES_LENGTH,
174 174 data: self.data,
175 175 }
176 176 }
177
178 pub fn pad_to_256_bits(&self) -> [u8; 32] {
179 let mut bits = [0; 32];
180 bits[..NODE_BYTES_LENGTH].copy_from_slice(&self.data);
181 bits
182 }
177 183 }
178 184
179 185 /// The beginning of a binary revision SHA.
180 186 ///
181 187 /// Since it can potentially come from an hexadecimal representation with
182 188 /// odd length, it needs to carry around whether the last 4 bits are relevant
183 189 /// or not.
184 190 #[derive(Debug, PartialEq, Copy, Clone)]
185 191 pub struct NodePrefix {
186 192 /// In `1..=NODE_NYBBLES_LENGTH`
187 193 nybbles_len: u8,
188 194 /// The first `4 * length_in_nybbles` bits are used (considering bits
189 195 /// within a bytes in big-endian: most significant first), the rest
190 196 /// are zero.
191 197 data: NodeData,
192 198 }
193 199
194 200 impl NodePrefix {
195 201 /// Convert from hexadecimal string representation
196 202 ///
197 203 /// Similarly to `hex::decode`, can be used with Unicode string types
198 204 /// (`String`, `&str`) as well as bytes.
199 205 ///
200 206 /// To be used in FFI and I/O only, in order to facilitate future
201 207 /// changes of hash format.
202 208 pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Self, FromHexError> {
203 209 let hex = hex.as_ref();
204 210 let len = hex.len();
205 211 if len > NODE_NYBBLES_LENGTH || len == 0 {
206 212 return Err(FromHexError);
207 213 }
208 214
209 215 let mut data = [0; NODE_BYTES_LENGTH];
210 216 let mut nybbles_len = 0;
211 217 for &ascii_byte in hex {
212 218 let nybble = match char::from(ascii_byte).to_digit(16) {
213 219 Some(digit) => digit as u8,
214 220 None => return Err(FromHexError),
215 221 };
216 222 // Fill in the upper half of a byte first, then the lower half.
217 223 let shift = if nybbles_len % 2 == 0 { 4 } else { 0 };
218 224 data[nybbles_len as usize / 2] |= nybble << shift;
219 225 nybbles_len += 1;
220 226 }
221 227 Ok(Self { data, nybbles_len })
222 228 }
223 229
224 230 pub fn nybbles_len(&self) -> usize {
225 231 self.nybbles_len as _
226 232 }
227 233
228 234 pub fn is_prefix_of(&self, node: &Node) -> bool {
229 235 let full_bytes = self.nybbles_len() / 2;
230 236 if self.data[..full_bytes] != node.data[..full_bytes] {
231 237 return false;
232 238 }
233 239 if self.nybbles_len() % 2 == 0 {
234 240 return true;
235 241 }
236 242 let last = self.nybbles_len() - 1;
237 243 self.get_nybble(last) == node.get_nybble(last)
238 244 }
239 245
240 246 /// Retrieve the `i`th half-byte from the prefix.
241 247 ///
242 248 /// This is also the `i`th hexadecimal digit in numeric form,
243 249 /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
244 250 pub fn get_nybble(&self, i: usize) -> u8 {
245 251 assert!(i < self.nybbles_len());
246 252 get_nybble(&self.data, i)
247 253 }
248 254
249 255 fn iter_nybbles(&self) -> impl Iterator<Item = u8> + '_ {
250 256 (0..self.nybbles_len()).map(move |i| get_nybble(&self.data, i))
251 257 }
252 258
253 259 /// Return the index first nybble that's different from `node`
254 260 ///
255 261 /// If the return value is `None` that means that `self` is
256 262 /// a prefix of `node`, but the current method is a bit slower
257 263 /// than `is_prefix_of`.
258 264 ///
259 265 /// Returned index is as in `get_nybble`, i.e., starting at 0.
260 266 pub fn first_different_nybble(&self, node: &Node) -> Option<usize> {
261 267 self.iter_nybbles()
262 268 .zip(NodePrefix::from(*node).iter_nybbles())
263 269 .position(|(a, b)| a != b)
264 270 }
265 271 }
266 272
267 273 impl fmt::LowerHex for NodePrefix {
268 274 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
269 275 let full_bytes = self.nybbles_len() / 2;
270 276 for &byte in &self.data[..full_bytes] {
271 277 write!(f, "{:02x}", byte)?
272 278 }
273 279 if self.nybbles_len() % 2 == 1 {
274 280 let last = self.nybbles_len() - 1;
275 281 write!(f, "{:x}", self.get_nybble(last))?
276 282 }
277 283 Ok(())
278 284 }
279 285 }
280 286
281 287 /// A shortcut for full `Node` references
282 288 impl From<&'_ Node> for NodePrefix {
283 289 fn from(node: &'_ Node) -> Self {
284 290 NodePrefix {
285 291 nybbles_len: node.nybbles_len() as _,
286 292 data: node.data,
287 293 }
288 294 }
289 295 }
290 296
291 297 /// A shortcut for full `Node` references
292 298 impl From<Node> for NodePrefix {
293 299 fn from(node: Node) -> Self {
294 300 NodePrefix {
295 301 nybbles_len: node.nybbles_len() as _,
296 302 data: node.data,
297 303 }
298 304 }
299 305 }
300 306
301 307 impl PartialEq<Node> for NodePrefix {
302 308 fn eq(&self, other: &Node) -> bool {
303 309 Self::from(*other) == *self
304 310 }
305 311 }
306 312
307 313 #[cfg(test)]
308 314 mod tests {
309 315 use super::*;
310 316
311 317 const SAMPLE_NODE_HEX: &str = "0123456789abcdeffedcba9876543210deadbeef";
312 318 const SAMPLE_NODE: Node = Node {
313 319 data: [
314 320 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba,
315 321 0x98, 0x76, 0x54, 0x32, 0x10, 0xde, 0xad, 0xbe, 0xef,
316 322 ],
317 323 };
318 324
319 325 /// Pad an hexadecimal string to reach `NODE_NYBBLES_LENGTH`
320 326 /// The padding is made with zeros.
321 327 pub fn hex_pad_right(hex: &str) -> String {
322 328 let mut res = hex.to_string();
323 329 while res.len() < NODE_NYBBLES_LENGTH {
324 330 res.push('0');
325 331 }
326 332 res
327 333 }
328 334
329 335 #[test]
330 336 fn test_node_from_hex() {
331 337 let not_hex = "012... oops";
332 338 let too_short = "0123";
333 339 let too_long = format!("{}0", SAMPLE_NODE_HEX);
334 340 assert_eq!(Node::from_hex(SAMPLE_NODE_HEX).unwrap(), SAMPLE_NODE);
335 341 assert!(Node::from_hex(not_hex).is_err());
336 342 assert!(Node::from_hex(too_short).is_err());
337 343 assert!(Node::from_hex(&too_long).is_err());
338 344 }
339 345
340 346 #[test]
341 347 fn test_node_encode_hex() {
342 348 assert_eq!(format!("{:x}", SAMPLE_NODE), SAMPLE_NODE_HEX);
343 349 }
344 350
345 351 #[test]
346 352 fn test_prefix_from_to_hex() -> Result<(), FromHexError> {
347 353 assert_eq!(format!("{:x}", NodePrefix::from_hex("0e1")?), "0e1");
348 354 assert_eq!(format!("{:x}", NodePrefix::from_hex("0e1a")?), "0e1a");
349 355 assert_eq!(
350 356 format!("{:x}", NodePrefix::from_hex(SAMPLE_NODE_HEX)?),
351 357 SAMPLE_NODE_HEX
352 358 );
353 359 Ok(())
354 360 }
355 361
356 362 #[test]
357 363 fn test_prefix_from_hex_errors() {
358 364 assert!(NodePrefix::from_hex("testgr").is_err());
359 365 let mut long = format!("{:x}", NULL_NODE);
360 366 long.push('c');
361 367 assert!(NodePrefix::from_hex(&long).is_err())
362 368 }
363 369
364 370 #[test]
365 371 fn test_is_prefix_of() -> Result<(), FromHexError> {
366 372 let mut node_data = [0; NODE_BYTES_LENGTH];
367 373 node_data[0] = 0x12;
368 374 node_data[1] = 0xca;
369 375 let node = Node::from(node_data);
370 376 assert!(NodePrefix::from_hex("12")?.is_prefix_of(&node));
371 377 assert!(!NodePrefix::from_hex("1a")?.is_prefix_of(&node));
372 378 assert!(NodePrefix::from_hex("12c")?.is_prefix_of(&node));
373 379 assert!(!NodePrefix::from_hex("12d")?.is_prefix_of(&node));
374 380 Ok(())
375 381 }
376 382
377 383 #[test]
378 384 fn test_get_nybble() -> Result<(), FromHexError> {
379 385 let prefix = NodePrefix::from_hex("dead6789cafe")?;
380 386 assert_eq!(prefix.get_nybble(0), 13);
381 387 assert_eq!(prefix.get_nybble(7), 9);
382 388 Ok(())
383 389 }
384 390
385 391 #[test]
386 392 fn test_first_different_nybble_even_prefix() {
387 393 let prefix = NodePrefix::from_hex("12ca").unwrap();
388 394 let mut node = Node::from([0; NODE_BYTES_LENGTH]);
389 395 assert_eq!(prefix.first_different_nybble(&node), Some(0));
390 396 node.data[0] = 0x13;
391 397 assert_eq!(prefix.first_different_nybble(&node), Some(1));
392 398 node.data[0] = 0x12;
393 399 assert_eq!(prefix.first_different_nybble(&node), Some(2));
394 400 node.data[1] = 0xca;
395 401 // now it is a prefix
396 402 assert_eq!(prefix.first_different_nybble(&node), None);
397 403 }
398 404
399 405 #[test]
400 406 fn test_first_different_nybble_odd_prefix() {
401 407 let prefix = NodePrefix::from_hex("12c").unwrap();
402 408 let mut node = Node::from([0; NODE_BYTES_LENGTH]);
403 409 assert_eq!(prefix.first_different_nybble(&node), Some(0));
404 410 node.data[0] = 0x13;
405 411 assert_eq!(prefix.first_different_nybble(&node), Some(1));
406 412 node.data[0] = 0x12;
407 413 assert_eq!(prefix.first_different_nybble(&node), Some(2));
408 414 node.data[1] = 0xca;
409 415 // now it is a prefix
410 416 assert_eq!(prefix.first_different_nybble(&node), None);
411 417 }
412 418 }
413 419
414 420 #[cfg(test)]
415 421 pub use tests::hex_pad_right;
@@ -1,499 +1,499 b''
1 1 // dirstate_map.rs
2 2 //
3 3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Bindings for the `hg::dirstate::dirstate_map` file provided by the
9 9 //! `hg-core` package.
10 10
11 11 use std::cell::{RefCell, RefMut};
12 12 use std::convert::TryInto;
13 13
14 14 use cpython::{
15 15 exc, PyBool, PyBytes, PyClone, PyDict, PyErr, PyList, PyNone, PyObject,
16 16 PyResult, Python, PythonObject, ToPyObject, UnsafePyLeaked,
17 17 };
18 18
19 19 use crate::{
20 20 dirstate::copymap::{CopyMap, CopyMapItemsIterator, CopyMapKeysIterator},
21 21 dirstate::item::DirstateItem,
22 22 pybytes_deref::PyBytesDeref,
23 23 };
24 24 use hg::{
25 25 dirstate::StateMapIter,
26 26 dirstate_tree::dirstate_map::DirstateMap as TreeDirstateMap,
27 27 dirstate_tree::on_disk::DirstateV2ParseError,
28 28 dirstate_tree::owning::OwningDirstateMap,
29 29 revlog::Node,
30 30 utils::files::normalize_case,
31 31 utils::hg_path::{HgPath, HgPathBuf},
32 32 DirstateEntry, DirstateError, DirstateParents, EntryState,
33 33 };
34 34
35 35 // TODO
36 36 // This object needs to share references to multiple members of its Rust
37 37 // inner struct, namely `copy_map`, `dirs` and `all_dirs`.
38 38 // Right now `CopyMap` is done, but it needs to have an explicit reference
39 39 // to `RustDirstateMap` which itself needs to have an encapsulation for
40 40 // every method in `CopyMap` (copymapcopy, etc.).
41 41 // This is ugly and hard to maintain.
42 42 // The same logic applies to `dirs` and `all_dirs`, however the `Dirs`
43 43 // `py_class!` is already implemented and does not mention
44 44 // `RustDirstateMap`, rightfully so.
45 45 // All attributes also have to have a separate refcount data attribute for
46 46 // leaks, with all methods that go along for reference sharing.
47 47 py_class!(pub class DirstateMap |py| {
48 48 @shared data inner: OwningDirstateMap;
49 49
50 50 /// Returns a `(dirstate_map, parents)` tuple
51 51 @staticmethod
52 52 def new_v1(
53 53 on_disk: PyBytes,
54 54 ) -> PyResult<PyObject> {
55 55 let on_disk = PyBytesDeref::new(py, on_disk);
56 56 let mut map = OwningDirstateMap::new_empty(on_disk);
57 57 let (on_disk, map_placeholder) = map.get_pair_mut();
58 58
59 59 let (actual_map, parents) = TreeDirstateMap::new_v1(on_disk)
60 60 .map_err(|e| dirstate_error(py, e))?;
61 61 *map_placeholder = actual_map;
62 62 let map = Self::create_instance(py, map)?;
63 63 let parents = parents.map(|p| {
64 64 let p1 = PyBytes::new(py, p.p1.as_bytes());
65 65 let p2 = PyBytes::new(py, p.p2.as_bytes());
66 66 (p1, p2)
67 67 });
68 68 Ok((map, parents).to_py_object(py).into_object())
69 69 }
70 70
71 71 /// Returns a DirstateMap
72 72 @staticmethod
73 73 def new_v2(
74 74 on_disk: PyBytes,
75 75 data_size: usize,
76 76 tree_metadata: PyBytes,
77 77 ) -> PyResult<PyObject> {
78 78 let dirstate_error = |e: DirstateError| {
79 79 PyErr::new::<exc::OSError, _>(py, format!("Dirstate error: {:?}", e))
80 80 };
81 81 let on_disk = PyBytesDeref::new(py, on_disk);
82 82 let mut map = OwningDirstateMap::new_empty(on_disk);
83 83 let (on_disk, map_placeholder) = map.get_pair_mut();
84 84 *map_placeholder = TreeDirstateMap::new_v2(
85 85 on_disk, data_size, tree_metadata.data(py),
86 86 ).map_err(dirstate_error)?;
87 87 let map = Self::create_instance(py, map)?;
88 88 Ok(map.into_object())
89 89 }
90 90
91 91 def clear(&self) -> PyResult<PyObject> {
92 92 self.inner(py).borrow_mut().clear();
93 93 Ok(py.None())
94 94 }
95 95
96 96 def get(
97 97 &self,
98 98 key: PyObject,
99 99 default: Option<PyObject> = None
100 100 ) -> PyResult<Option<PyObject>> {
101 101 let key = key.extract::<PyBytes>(py)?;
102 102 match self
103 103 .inner(py)
104 104 .borrow()
105 105 .get(HgPath::new(key.data(py)))
106 106 .map_err(|e| v2_error(py, e))?
107 107 {
108 108 Some(entry) => {
109 109 Ok(Some(DirstateItem::new_as_pyobject(py, entry)?))
110 110 },
111 111 None => Ok(default)
112 112 }
113 113 }
114 114
115 115 def set_dirstate_item(
116 116 &self,
117 117 path: PyObject,
118 118 item: DirstateItem
119 119 ) -> PyResult<PyObject> {
120 120 let f = path.extract::<PyBytes>(py)?;
121 121 let filename = HgPath::new(f.data(py));
122 122 self.inner(py)
123 123 .borrow_mut()
124 124 .set_entry(filename, item.get_entry(py))
125 125 .map_err(|e| v2_error(py, e))?;
126 126 Ok(py.None())
127 127 }
128 128
129 129 def addfile(
130 130 &self,
131 131 f: PyBytes,
132 132 item: DirstateItem,
133 133 ) -> PyResult<PyNone> {
134 134 let filename = HgPath::new(f.data(py));
135 135 let entry = item.get_entry(py);
136 136 self.inner(py)
137 137 .borrow_mut()
138 138 .add_file(filename, entry)
139 139 .map_err(|e |dirstate_error(py, e))?;
140 140 Ok(PyNone)
141 141 }
142 142
143 143 def removefile(
144 144 &self,
145 145 f: PyObject,
146 146 in_merge: PyObject
147 147 ) -> PyResult<PyObject> {
148 148 self.inner(py).borrow_mut()
149 149 .remove_file(
150 150 HgPath::new(f.extract::<PyBytes>(py)?.data(py)),
151 151 in_merge.extract::<PyBool>(py)?.is_true(),
152 152 )
153 153 .or_else(|_| {
154 154 Err(PyErr::new::<exc::OSError, _>(
155 155 py,
156 156 "Dirstate error".to_string(),
157 157 ))
158 158 })?;
159 159 Ok(py.None())
160 160 }
161 161
162 162 def drop_item_and_copy_source(
163 163 &self,
164 164 f: PyBytes,
165 165 ) -> PyResult<PyNone> {
166 166 self.inner(py)
167 167 .borrow_mut()
168 168 .drop_entry_and_copy_source(HgPath::new(f.data(py)))
169 169 .map_err(|e |dirstate_error(py, e))?;
170 170 Ok(PyNone)
171 171 }
172 172
173 173 def hastrackeddir(&self, d: PyObject) -> PyResult<PyBool> {
174 174 let d = d.extract::<PyBytes>(py)?;
175 175 Ok(self.inner(py).borrow_mut()
176 176 .has_tracked_dir(HgPath::new(d.data(py)))
177 177 .map_err(|e| {
178 178 PyErr::new::<exc::ValueError, _>(py, e.to_string())
179 179 })?
180 180 .to_py_object(py))
181 181 }
182 182
183 183 def hasdir(&self, d: PyObject) -> PyResult<PyBool> {
184 184 let d = d.extract::<PyBytes>(py)?;
185 185 Ok(self.inner(py).borrow_mut()
186 186 .has_dir(HgPath::new(d.data(py)))
187 187 .map_err(|e| {
188 188 PyErr::new::<exc::ValueError, _>(py, e.to_string())
189 189 })?
190 190 .to_py_object(py))
191 191 }
192 192
193 193 def write_v1(
194 194 &self,
195 195 p1: PyObject,
196 196 p2: PyObject,
197 197 ) -> PyResult<PyBytes> {
198 198 let inner = self.inner(py).borrow();
199 199 let parents = DirstateParents {
200 200 p1: extract_node_id(py, &p1)?,
201 201 p2: extract_node_id(py, &p2)?,
202 202 };
203 203 let result = inner.pack_v1(parents);
204 204 match result {
205 205 Ok(packed) => Ok(PyBytes::new(py, &packed)),
206 206 Err(_) => Err(PyErr::new::<exc::OSError, _>(
207 207 py,
208 208 "Dirstate error".to_string(),
209 209 )),
210 210 }
211 211 }
212 212
213 213 /// Returns new data together with whether that data should be appended to
214 214 /// the existing data file whose content is at `self.on_disk` (True),
215 215 /// instead of written to a new data file (False).
216 216 def write_v2(
217 217 &self,
218 218 can_append: bool,
219 219 ) -> PyResult<PyObject> {
220 220 let inner = self.inner(py).borrow();
221 221 let result = inner.pack_v2(can_append);
222 222 match result {
223 223 Ok((packed, tree_metadata, append)) => {
224 224 let packed = PyBytes::new(py, &packed);
225 let tree_metadata = PyBytes::new(py, &tree_metadata);
225 let tree_metadata = PyBytes::new(py, tree_metadata.as_bytes());
226 226 let tuple = (packed, tree_metadata, append);
227 227 Ok(tuple.to_py_object(py).into_object())
228 228 },
229 229 Err(_) => Err(PyErr::new::<exc::OSError, _>(
230 230 py,
231 231 "Dirstate error".to_string(),
232 232 )),
233 233 }
234 234 }
235 235
236 236 def filefoldmapasdict(&self) -> PyResult<PyDict> {
237 237 let dict = PyDict::new(py);
238 238 for item in self.inner(py).borrow_mut().iter() {
239 239 let (path, entry) = item.map_err(|e| v2_error(py, e))?;
240 240 if entry.state() != EntryState::Removed {
241 241 let key = normalize_case(path);
242 242 let value = path;
243 243 dict.set_item(
244 244 py,
245 245 PyBytes::new(py, key.as_bytes()).into_object(),
246 246 PyBytes::new(py, value.as_bytes()).into_object(),
247 247 )?;
248 248 }
249 249 }
250 250 Ok(dict)
251 251 }
252 252
253 253 def __len__(&self) -> PyResult<usize> {
254 254 Ok(self.inner(py).borrow().len())
255 255 }
256 256
257 257 def __contains__(&self, key: PyObject) -> PyResult<bool> {
258 258 let key = key.extract::<PyBytes>(py)?;
259 259 self.inner(py)
260 260 .borrow()
261 261 .contains_key(HgPath::new(key.data(py)))
262 262 .map_err(|e| v2_error(py, e))
263 263 }
264 264
265 265 def __getitem__(&self, key: PyObject) -> PyResult<PyObject> {
266 266 let key = key.extract::<PyBytes>(py)?;
267 267 let key = HgPath::new(key.data(py));
268 268 match self
269 269 .inner(py)
270 270 .borrow()
271 271 .get(key)
272 272 .map_err(|e| v2_error(py, e))?
273 273 {
274 274 Some(entry) => {
275 275 Ok(DirstateItem::new_as_pyobject(py, entry)?)
276 276 },
277 277 None => Err(PyErr::new::<exc::KeyError, _>(
278 278 py,
279 279 String::from_utf8_lossy(key.as_bytes()),
280 280 )),
281 281 }
282 282 }
283 283
284 284 def keys(&self) -> PyResult<DirstateMapKeysIterator> {
285 285 let leaked_ref = self.inner(py).leak_immutable();
286 286 DirstateMapKeysIterator::from_inner(
287 287 py,
288 288 unsafe { leaked_ref.map(py, |o| o.iter()) },
289 289 )
290 290 }
291 291
292 292 def items(&self) -> PyResult<DirstateMapItemsIterator> {
293 293 let leaked_ref = self.inner(py).leak_immutable();
294 294 DirstateMapItemsIterator::from_inner(
295 295 py,
296 296 unsafe { leaked_ref.map(py, |o| o.iter()) },
297 297 )
298 298 }
299 299
300 300 def __iter__(&self) -> PyResult<DirstateMapKeysIterator> {
301 301 let leaked_ref = self.inner(py).leak_immutable();
302 302 DirstateMapKeysIterator::from_inner(
303 303 py,
304 304 unsafe { leaked_ref.map(py, |o| o.iter()) },
305 305 )
306 306 }
307 307
308 308 // TODO all copymap* methods, see docstring above
309 309 def copymapcopy(&self) -> PyResult<PyDict> {
310 310 let dict = PyDict::new(py);
311 311 for item in self.inner(py).borrow().copy_map_iter() {
312 312 let (key, value) = item.map_err(|e| v2_error(py, e))?;
313 313 dict.set_item(
314 314 py,
315 315 PyBytes::new(py, key.as_bytes()),
316 316 PyBytes::new(py, value.as_bytes()),
317 317 )?;
318 318 }
319 319 Ok(dict)
320 320 }
321 321
322 322 def copymapgetitem(&self, key: PyObject) -> PyResult<PyBytes> {
323 323 let key = key.extract::<PyBytes>(py)?;
324 324 match self
325 325 .inner(py)
326 326 .borrow()
327 327 .copy_map_get(HgPath::new(key.data(py)))
328 328 .map_err(|e| v2_error(py, e))?
329 329 {
330 330 Some(copy) => Ok(PyBytes::new(py, copy.as_bytes())),
331 331 None => Err(PyErr::new::<exc::KeyError, _>(
332 332 py,
333 333 String::from_utf8_lossy(key.data(py)),
334 334 )),
335 335 }
336 336 }
337 337 def copymap(&self) -> PyResult<CopyMap> {
338 338 CopyMap::from_inner(py, self.clone_ref(py))
339 339 }
340 340
341 341 def copymaplen(&self) -> PyResult<usize> {
342 342 Ok(self.inner(py).borrow().copy_map_len())
343 343 }
344 344 def copymapcontains(&self, key: PyObject) -> PyResult<bool> {
345 345 let key = key.extract::<PyBytes>(py)?;
346 346 self.inner(py)
347 347 .borrow()
348 348 .copy_map_contains_key(HgPath::new(key.data(py)))
349 349 .map_err(|e| v2_error(py, e))
350 350 }
351 351 def copymapget(
352 352 &self,
353 353 key: PyObject,
354 354 default: Option<PyObject>
355 355 ) -> PyResult<Option<PyObject>> {
356 356 let key = key.extract::<PyBytes>(py)?;
357 357 match self
358 358 .inner(py)
359 359 .borrow()
360 360 .copy_map_get(HgPath::new(key.data(py)))
361 361 .map_err(|e| v2_error(py, e))?
362 362 {
363 363 Some(copy) => Ok(Some(
364 364 PyBytes::new(py, copy.as_bytes()).into_object(),
365 365 )),
366 366 None => Ok(default),
367 367 }
368 368 }
369 369 def copymapsetitem(
370 370 &self,
371 371 key: PyObject,
372 372 value: PyObject
373 373 ) -> PyResult<PyObject> {
374 374 let key = key.extract::<PyBytes>(py)?;
375 375 let value = value.extract::<PyBytes>(py)?;
376 376 self.inner(py)
377 377 .borrow_mut()
378 378 .copy_map_insert(
379 379 HgPathBuf::from_bytes(key.data(py)),
380 380 HgPathBuf::from_bytes(value.data(py)),
381 381 )
382 382 .map_err(|e| v2_error(py, e))?;
383 383 Ok(py.None())
384 384 }
385 385 def copymappop(
386 386 &self,
387 387 key: PyObject,
388 388 default: Option<PyObject>
389 389 ) -> PyResult<Option<PyObject>> {
390 390 let key = key.extract::<PyBytes>(py)?;
391 391 match self
392 392 .inner(py)
393 393 .borrow_mut()
394 394 .copy_map_remove(HgPath::new(key.data(py)))
395 395 .map_err(|e| v2_error(py, e))?
396 396 {
397 397 Some(copy) => Ok(Some(
398 398 PyBytes::new(py, copy.as_bytes()).into_object(),
399 399 )),
400 400 None => Ok(default),
401 401 }
402 402 }
403 403
404 404 def copymapiter(&self) -> PyResult<CopyMapKeysIterator> {
405 405 let leaked_ref = self.inner(py).leak_immutable();
406 406 CopyMapKeysIterator::from_inner(
407 407 py,
408 408 unsafe { leaked_ref.map(py, |o| o.copy_map_iter()) },
409 409 )
410 410 }
411 411
412 412 def copymapitemsiter(&self) -> PyResult<CopyMapItemsIterator> {
413 413 let leaked_ref = self.inner(py).leak_immutable();
414 414 CopyMapItemsIterator::from_inner(
415 415 py,
416 416 unsafe { leaked_ref.map(py, |o| o.copy_map_iter()) },
417 417 )
418 418 }
419 419
420 420 def tracked_dirs(&self) -> PyResult<PyList> {
421 421 let dirs = PyList::new(py, &[]);
422 422 for path in self.inner(py).borrow_mut().iter_tracked_dirs()
423 423 .map_err(|e |dirstate_error(py, e))?
424 424 {
425 425 let path = path.map_err(|e| v2_error(py, e))?;
426 426 let path = PyBytes::new(py, path.as_bytes());
427 427 dirs.append(py, path.into_object())
428 428 }
429 429 Ok(dirs)
430 430 }
431 431
432 432 def debug_iter(&self, all: bool) -> PyResult<PyList> {
433 433 let dirs = PyList::new(py, &[]);
434 434 for item in self.inner(py).borrow().debug_iter(all) {
435 435 let (path, (state, mode, size, mtime)) =
436 436 item.map_err(|e| v2_error(py, e))?;
437 437 let path = PyBytes::new(py, path.as_bytes());
438 438 let item = (path, state, mode, size, mtime);
439 439 dirs.append(py, item.to_py_object(py).into_object())
440 440 }
441 441 Ok(dirs)
442 442 }
443 443 });
444 444
445 445 impl DirstateMap {
446 446 pub fn get_inner_mut<'a>(
447 447 &'a self,
448 448 py: Python<'a>,
449 449 ) -> RefMut<'a, OwningDirstateMap> {
450 450 self.inner(py).borrow_mut()
451 451 }
452 452 fn translate_key(
453 453 py: Python,
454 454 res: Result<(&HgPath, DirstateEntry), DirstateV2ParseError>,
455 455 ) -> PyResult<Option<PyBytes>> {
456 456 let (f, _entry) = res.map_err(|e| v2_error(py, e))?;
457 457 Ok(Some(PyBytes::new(py, f.as_bytes())))
458 458 }
459 459 fn translate_key_value(
460 460 py: Python,
461 461 res: Result<(&HgPath, DirstateEntry), DirstateV2ParseError>,
462 462 ) -> PyResult<Option<(PyBytes, PyObject)>> {
463 463 let (f, entry) = res.map_err(|e| v2_error(py, e))?;
464 464 Ok(Some((
465 465 PyBytes::new(py, f.as_bytes()),
466 466 DirstateItem::new_as_pyobject(py, entry)?,
467 467 )))
468 468 }
469 469 }
470 470
471 471 py_shared_iterator!(
472 472 DirstateMapKeysIterator,
473 473 UnsafePyLeaked<StateMapIter<'static>>,
474 474 DirstateMap::translate_key,
475 475 Option<PyBytes>
476 476 );
477 477
478 478 py_shared_iterator!(
479 479 DirstateMapItemsIterator,
480 480 UnsafePyLeaked<StateMapIter<'static>>,
481 481 DirstateMap::translate_key_value,
482 482 Option<(PyBytes, PyObject)>
483 483 );
484 484
485 485 fn extract_node_id(py: Python, obj: &PyObject) -> PyResult<Node> {
486 486 let bytes = obj.extract::<PyBytes>(py)?;
487 487 match bytes.data(py).try_into() {
488 488 Ok(s) => Ok(s),
489 489 Err(e) => Err(PyErr::new::<exc::ValueError, _>(py, e.to_string())),
490 490 }
491 491 }
492 492
493 493 pub(super) fn v2_error(py: Python<'_>, _: DirstateV2ParseError) -> PyErr {
494 494 PyErr::new::<exc::ValueError, _>(py, "corrupted dirstate-v2")
495 495 }
496 496
497 497 fn dirstate_error(py: Python<'_>, e: DirstateError) -> PyErr {
498 498 PyErr::new::<exc::OSError, _>(py, format!("Dirstate error: {:?}", e))
499 499 }
General Comments 0
You need to be logged in to leave comments. Login now