##// END OF EJS Templates
dirstate: use more than a bool to control append behavior...
marmoute -
r51116:1891086f stable
parent child Browse files
Show More
@@ -1,688 +1,693 b''
1 1 # dirstatemap.py
2 2 #
3 3 # This software may be used and distributed according to the terms of the
4 4 # GNU General Public License version 2 or any later version.
5 5
6 6
7 7 from .i18n import _
8 8
9 9 from . import (
10 10 error,
11 11 pathutil,
12 12 policy,
13 13 txnutil,
14 14 util,
15 15 )
16 16
17 17 from .dirstateutils import (
18 18 docket as docketmod,
19 19 v2,
20 20 )
21 21
22 22 parsers = policy.importmod('parsers')
23 23 rustmod = policy.importrust('dirstate')
24 24
25 25 propertycache = util.propertycache
26 26
27 27 if rustmod is None:
28 28 DirstateItem = parsers.DirstateItem
29 29 else:
30 30 DirstateItem = rustmod.DirstateItem
31 31
32 32 rangemask = 0x7FFFFFFF
33 33
34 WRITE_MODE_AUTO = 0
35 WRITE_MODE_FORCE_NEW = 1
36
34 37
35 38 class _dirstatemapcommon:
36 39 """
37 40 Methods that are identical for both implementations of the dirstatemap
38 41 class, with and without Rust extensions enabled.
39 42 """
40 43
41 44 # please pytype
42 45
43 46 _map = None
44 47 copymap = None
45 48
46 49 def __init__(self, ui, opener, root, nodeconstants, use_dirstate_v2):
47 50 self._use_dirstate_v2 = use_dirstate_v2
48 51 self._nodeconstants = nodeconstants
49 52 self._ui = ui
50 53 self._opener = opener
51 54 self._root = root
52 55 self._filename = b'dirstate'
53 56 self._nodelen = 20 # Also update Rust code when changing this!
54 57 self._parents = None
55 58 self._dirtyparents = False
56 59 self._docket = None
57 60
58 61 # for consistent view between _pl() and _read() invocations
59 62 self._pendingmode = None
60 63
61 64 def preload(self):
62 65 """Loads the underlying data, if it's not already loaded"""
63 66 self._map
64 67
65 68 def get(self, key, default=None):
66 69 return self._map.get(key, default)
67 70
68 71 def __len__(self):
69 72 return len(self._map)
70 73
71 74 def __iter__(self):
72 75 return iter(self._map)
73 76
74 77 def __contains__(self, key):
75 78 return key in self._map
76 79
77 80 def __getitem__(self, item):
78 81 return self._map[item]
79 82
80 83 ### disk interaction
81 84
82 85 def _opendirstatefile(self):
83 86 fp, mode = txnutil.trypending(self._root, self._opener, self._filename)
84 87 if self._pendingmode is not None and self._pendingmode != mode:
85 88 fp.close()
86 89 raise error.Abort(
87 90 _(b'working directory state may be changed parallelly')
88 91 )
89 92 self._pendingmode = mode
90 93 return fp
91 94
92 95 def _readdirstatefile(self, size=-1):
93 96 try:
94 97 with self._opendirstatefile() as fp:
95 98 return fp.read(size)
96 99 except FileNotFoundError:
97 100 # File doesn't exist, so the current state is empty
98 101 return b''
99 102
100 103 @property
101 104 def docket(self):
102 105 if not self._docket:
103 106 if not self._use_dirstate_v2:
104 107 raise error.ProgrammingError(
105 108 b'dirstate only has a docket in v2 format'
106 109 )
107 110 self._docket = docketmod.DirstateDocket.parse(
108 111 self._readdirstatefile(), self._nodeconstants
109 112 )
110 113 return self._docket
111 114
112 115 def write_v2_no_append(self, tr, st, meta, packed):
113 116 old_docket = self.docket
114 117 new_docket = docketmod.DirstateDocket.with_new_uuid(
115 118 self.parents(), len(packed), meta
116 119 )
117 120 if old_docket.uuid == new_docket.uuid:
118 121 raise error.ProgrammingError(b'dirstate docket name collision')
119 122 data_filename = new_docket.data_filename()
120 123 self._opener.write(data_filename, packed)
121 124 # Write the new docket after the new data file has been
122 125 # written. Because `st` was opened with `atomictemp=True`,
123 126 # the actual `.hg/dirstate` file is only affected on close.
124 127 st.write(new_docket.serialize())
125 128 st.close()
126 129 # Remove the old data file after the new docket pointing to
127 130 # the new data file was written.
128 131 if old_docket.uuid:
129 132 data_filename = old_docket.data_filename()
130 133 unlink = lambda _tr=None: self._opener.unlink(data_filename)
131 134 if tr:
132 135 category = b"dirstate-v2-clean-" + old_docket.uuid
133 136 tr.addpostclose(category, unlink)
134 137 else:
135 138 unlink()
136 139 self._docket = new_docket
137 140
138 141 ### reading/setting parents
139 142
140 143 def parents(self):
141 144 if not self._parents:
142 145 if self._use_dirstate_v2:
143 146 self._parents = self.docket.parents
144 147 else:
145 148 read_len = self._nodelen * 2
146 149 st = self._readdirstatefile(read_len)
147 150 l = len(st)
148 151 if l == read_len:
149 152 self._parents = (
150 153 st[: self._nodelen],
151 154 st[self._nodelen : 2 * self._nodelen],
152 155 )
153 156 elif l == 0:
154 157 self._parents = (
155 158 self._nodeconstants.nullid,
156 159 self._nodeconstants.nullid,
157 160 )
158 161 else:
159 162 raise error.Abort(
160 163 _(b'working directory state appears damaged!')
161 164 )
162 165
163 166 return self._parents
164 167
165 168
166 169 class dirstatemap(_dirstatemapcommon):
167 170 """Map encapsulating the dirstate's contents.
168 171
169 172 The dirstate contains the following state:
170 173
171 174 - `identity` is the identity of the dirstate file, which can be used to
172 175 detect when changes have occurred to the dirstate file.
173 176
174 177 - `parents` is a pair containing the parents of the working copy. The
175 178 parents are updated by calling `setparents`.
176 179
177 180 - the state map maps filenames to tuples of (state, mode, size, mtime),
178 181 where state is a single character representing 'normal', 'added',
179 182 'removed', or 'merged'. It is read by treating the dirstate as a
180 183 dict. File state is updated by calling various methods (see each
181 184 documentation for details):
182 185
183 186 - `reset_state`,
184 187 - `set_tracked`
185 188 - `set_untracked`
186 189 - `set_clean`
187 190 - `set_possibly_dirty`
188 191
189 192 - `copymap` maps destination filenames to their source filename.
190 193
191 194 The dirstate also provides the following views onto the state:
192 195
193 196 - `filefoldmap` is a dict mapping normalized filenames to the denormalized
194 197 form that they appear as in the dirstate.
195 198
196 199 - `dirfoldmap` is a dict mapping normalized directory names to the
197 200 denormalized form that they appear as in the dirstate.
198 201 """
199 202
200 203 ### Core data storage and access
201 204
202 205 @propertycache
203 206 def _map(self):
204 207 self._map = {}
205 208 self.read()
206 209 return self._map
207 210
208 211 @propertycache
209 212 def copymap(self):
210 213 self.copymap = {}
211 214 self._map
212 215 return self.copymap
213 216
214 217 def clear(self):
215 218 self._map.clear()
216 219 self.copymap.clear()
217 220 self.setparents(self._nodeconstants.nullid, self._nodeconstants.nullid)
218 221 util.clearcachedproperty(self, b"_dirs")
219 222 util.clearcachedproperty(self, b"_alldirs")
220 223 util.clearcachedproperty(self, b"filefoldmap")
221 224 util.clearcachedproperty(self, b"dirfoldmap")
222 225
223 226 def items(self):
224 227 return self._map.items()
225 228
226 229 # forward for python2,3 compat
227 230 iteritems = items
228 231
229 232 def debug_iter(self, all):
230 233 """
231 234 Return an iterator of (filename, state, mode, size, mtime) tuples
232 235
233 236 `all` is unused when Rust is not enabled
234 237 """
235 238 for (filename, item) in self.items():
236 239 yield (filename, item.state, item.mode, item.size, item.mtime)
237 240
238 241 def keys(self):
239 242 return self._map.keys()
240 243
241 244 ### reading/setting parents
242 245
243 246 def setparents(self, p1, p2, fold_p2=False):
244 247 self._parents = (p1, p2)
245 248 self._dirtyparents = True
246 249 copies = {}
247 250 if fold_p2:
248 251 for f, s in self._map.items():
249 252 # Discard "merged" markers when moving away from a merge state
250 253 if s.p2_info:
251 254 source = self.copymap.pop(f, None)
252 255 if source:
253 256 copies[f] = source
254 257 s.drop_merge_data()
255 258 return copies
256 259
257 260 ### disk interaction
258 261
259 262 def read(self):
260 263 # ignore HG_PENDING because identity is used only for writing
261 264 self.identity = util.filestat.frompath(
262 265 self._opener.join(self._filename)
263 266 )
264 267
265 268 if self._use_dirstate_v2:
266 269 if not self.docket.uuid:
267 270 return
268 271 st = self._opener.read(self.docket.data_filename())
269 272 else:
270 273 st = self._readdirstatefile()
271 274
272 275 if not st:
273 276 return
274 277
275 278 # TODO: adjust this estimate for dirstate-v2
276 279 if util.safehasattr(parsers, b'dict_new_presized'):
277 280 # Make an estimate of the number of files in the dirstate based on
278 281 # its size. This trades wasting some memory for avoiding costly
279 282 # resizes. Each entry have a prefix of 17 bytes followed by one or
280 283 # two path names. Studies on various large-scale real-world repositories
281 284 # found 54 bytes a reasonable upper limit for the average path names.
282 285 # Copy entries are ignored for the sake of this estimate.
283 286 self._map = parsers.dict_new_presized(len(st) // 71)
284 287
285 288 # Python's garbage collector triggers a GC each time a certain number
286 289 # of container objects (the number being defined by
287 290 # gc.get_threshold()) are allocated. parse_dirstate creates a tuple
288 291 # for each file in the dirstate. The C version then immediately marks
289 292 # them as not to be tracked by the collector. However, this has no
290 293 # effect on when GCs are triggered, only on what objects the GC looks
291 294 # into. This means that O(number of files) GCs are unavoidable.
292 295 # Depending on when in the process's lifetime the dirstate is parsed,
293 296 # this can get very expensive. As a workaround, disable GC while
294 297 # parsing the dirstate.
295 298 #
296 299 # (we cannot decorate the function directly since it is in a C module)
297 300 if self._use_dirstate_v2:
298 301 p = self.docket.parents
299 302 meta = self.docket.tree_metadata
300 303 parse_dirstate = util.nogc(v2.parse_dirstate)
301 304 parse_dirstate(self._map, self.copymap, st, meta)
302 305 else:
303 306 parse_dirstate = util.nogc(parsers.parse_dirstate)
304 307 p = parse_dirstate(self._map, self.copymap, st)
305 308 if not self._dirtyparents:
306 309 self.setparents(*p)
307 310
308 311 # Avoid excess attribute lookups by fast pathing certain checks
309 312 self.__contains__ = self._map.__contains__
310 313 self.__getitem__ = self._map.__getitem__
311 314 self.get = self._map.get
312 315
313 316 def write(self, tr, st):
314 317 if self._use_dirstate_v2:
315 318 packed, meta = v2.pack_dirstate(self._map, self.copymap)
316 319 self.write_v2_no_append(tr, st, meta, packed)
317 320 else:
318 321 packed = parsers.pack_dirstate(
319 322 self._map, self.copymap, self.parents()
320 323 )
321 324 st.write(packed)
322 325 st.close()
323 326 self._dirtyparents = False
324 327
325 328 @propertycache
326 329 def identity(self):
327 330 self._map
328 331 return self.identity
329 332
330 333 ### code related to maintaining and accessing "extra" property
331 334 # (e.g. "has_dir")
332 335
333 336 def _dirs_incr(self, filename, old_entry=None):
334 337 """increment the dirstate counter if applicable"""
335 338 if (
336 339 old_entry is None or old_entry.removed
337 340 ) and "_dirs" in self.__dict__:
338 341 self._dirs.addpath(filename)
339 342 if old_entry is None and "_alldirs" in self.__dict__:
340 343 self._alldirs.addpath(filename)
341 344
342 345 def _dirs_decr(self, filename, old_entry=None, remove_variant=False):
343 346 """decrement the dirstate counter if applicable"""
344 347 if old_entry is not None:
345 348 if "_dirs" in self.__dict__ and not old_entry.removed:
346 349 self._dirs.delpath(filename)
347 350 if "_alldirs" in self.__dict__ and not remove_variant:
348 351 self._alldirs.delpath(filename)
349 352 elif remove_variant and "_alldirs" in self.__dict__:
350 353 self._alldirs.addpath(filename)
351 354 if "filefoldmap" in self.__dict__:
352 355 normed = util.normcase(filename)
353 356 self.filefoldmap.pop(normed, None)
354 357
355 358 @propertycache
356 359 def filefoldmap(self):
357 360 """Returns a dictionary mapping normalized case paths to their
358 361 non-normalized versions.
359 362 """
360 363 try:
361 364 makefilefoldmap = parsers.make_file_foldmap
362 365 except AttributeError:
363 366 pass
364 367 else:
365 368 return makefilefoldmap(
366 369 self._map, util.normcasespec, util.normcasefallback
367 370 )
368 371
369 372 f = {}
370 373 normcase = util.normcase
371 374 for name, s in self._map.items():
372 375 if not s.removed:
373 376 f[normcase(name)] = name
374 377 f[b'.'] = b'.' # prevents useless util.fspath() invocation
375 378 return f
376 379
377 380 @propertycache
378 381 def dirfoldmap(self):
379 382 f = {}
380 383 normcase = util.normcase
381 384 for name in self._dirs:
382 385 f[normcase(name)] = name
383 386 return f
384 387
385 388 def hastrackeddir(self, d):
386 389 """
387 390 Returns True if the dirstate contains a tracked (not removed) file
388 391 in this directory.
389 392 """
390 393 return d in self._dirs
391 394
392 395 def hasdir(self, d):
393 396 """
394 397 Returns True if the dirstate contains a file (tracked or removed)
395 398 in this directory.
396 399 """
397 400 return d in self._alldirs
398 401
399 402 @propertycache
400 403 def _dirs(self):
401 404 return pathutil.dirs(self._map, only_tracked=True)
402 405
403 406 @propertycache
404 407 def _alldirs(self):
405 408 return pathutil.dirs(self._map)
406 409
407 410 ### code related to manipulation of entries and copy-sources
408 411
409 412 def reset_state(
410 413 self,
411 414 filename,
412 415 wc_tracked=False,
413 416 p1_tracked=False,
414 417 p2_info=False,
415 418 has_meaningful_mtime=True,
416 419 parentfiledata=None,
417 420 ):
418 421 """Set a entry to a given state, diregarding all previous state
419 422
420 423 This is to be used by the part of the dirstate API dedicated to
421 424 adjusting the dirstate after a update/merge.
422 425
423 426 note: calling this might result to no entry existing at all if the
424 427 dirstate map does not see any point at having one for this file
425 428 anymore.
426 429 """
427 430 # copy information are now outdated
428 431 # (maybe new information should be in directly passed to this function)
429 432 self.copymap.pop(filename, None)
430 433
431 434 if not (p1_tracked or p2_info or wc_tracked):
432 435 old_entry = self._map.get(filename)
433 436 self._drop_entry(filename)
434 437 self._dirs_decr(filename, old_entry=old_entry)
435 438 return
436 439
437 440 old_entry = self._map.get(filename)
438 441 self._dirs_incr(filename, old_entry)
439 442 entry = DirstateItem(
440 443 wc_tracked=wc_tracked,
441 444 p1_tracked=p1_tracked,
442 445 p2_info=p2_info,
443 446 has_meaningful_mtime=has_meaningful_mtime,
444 447 parentfiledata=parentfiledata,
445 448 )
446 449 self._map[filename] = entry
447 450
448 451 def set_tracked(self, filename):
449 452 new = False
450 453 entry = self.get(filename)
451 454 if entry is None:
452 455 self._dirs_incr(filename)
453 456 entry = DirstateItem(
454 457 wc_tracked=True,
455 458 )
456 459
457 460 self._map[filename] = entry
458 461 new = True
459 462 elif not entry.tracked:
460 463 self._dirs_incr(filename, entry)
461 464 entry.set_tracked()
462 465 self._refresh_entry(filename, entry)
463 466 new = True
464 467 else:
465 468 # XXX This is probably overkill for more case, but we need this to
466 469 # fully replace the `normallookup` call with `set_tracked` one.
467 470 # Consider smoothing this in the future.
468 471 entry.set_possibly_dirty()
469 472 self._refresh_entry(filename, entry)
470 473 return new
471 474
472 475 def set_untracked(self, f):
473 476 """Mark a file as no longer tracked in the dirstate map"""
474 477 entry = self.get(f)
475 478 if entry is None:
476 479 return False
477 480 else:
478 481 self._dirs_decr(f, old_entry=entry, remove_variant=not entry.added)
479 482 if not entry.p2_info:
480 483 self.copymap.pop(f, None)
481 484 entry.set_untracked()
482 485 self._refresh_entry(f, entry)
483 486 return True
484 487
485 488 def set_clean(self, filename, mode, size, mtime):
486 489 """mark a file as back to a clean state"""
487 490 entry = self[filename]
488 491 size = size & rangemask
489 492 entry.set_clean(mode, size, mtime)
490 493 self._refresh_entry(filename, entry)
491 494 self.copymap.pop(filename, None)
492 495
493 496 def set_possibly_dirty(self, filename):
494 497 """record that the current state of the file on disk is unknown"""
495 498 entry = self[filename]
496 499 entry.set_possibly_dirty()
497 500 self._refresh_entry(filename, entry)
498 501
499 502 def _refresh_entry(self, f, entry):
500 503 """record updated state of an entry"""
501 504 if not entry.any_tracked:
502 505 self._map.pop(f, None)
503 506
504 507 def _drop_entry(self, f):
505 508 """remove any entry for file f
506 509
507 510 This should also drop associated copy information
508 511
509 512 The fact we actually need to drop it is the responsability of the caller"""
510 513 self._map.pop(f, None)
511 514 self.copymap.pop(f, None)
512 515
513 516
514 517 if rustmod is not None:
515 518
516 519 class dirstatemap(_dirstatemapcommon):
517 520
518 521 ### Core data storage and access
519 522
520 523 @propertycache
521 524 def _map(self):
522 525 """
523 526 Fills the Dirstatemap when called.
524 527 """
525 528 # ignore HG_PENDING because identity is used only for writing
526 529 self.identity = util.filestat.frompath(
527 530 self._opener.join(self._filename)
528 531 )
529 532
530 533 if self._use_dirstate_v2:
531 534 if self.docket.uuid:
532 535 # TODO: use mmap when possible
533 536 data = self._opener.read(self.docket.data_filename())
534 537 else:
535 538 data = b''
536 539 self._map = rustmod.DirstateMap.new_v2(
537 540 data, self.docket.data_size, self.docket.tree_metadata
538 541 )
539 542 parents = self.docket.parents
540 543 else:
541 544 self._map, parents = rustmod.DirstateMap.new_v1(
542 545 self._readdirstatefile()
543 546 )
544 547
545 548 if parents and not self._dirtyparents:
546 549 self.setparents(*parents)
547 550
548 551 self.__contains__ = self._map.__contains__
549 552 self.__getitem__ = self._map.__getitem__
550 553 self.get = self._map.get
551 554 return self._map
552 555
553 556 @property
554 557 def copymap(self):
555 558 return self._map.copymap()
556 559
557 560 def debug_iter(self, all):
558 561 """
559 562 Return an iterator of (filename, state, mode, size, mtime) tuples
560 563
561 564 `all`: also include with `state == b' '` dirstate tree nodes that
562 565 don't have an associated `DirstateItem`.
563 566
564 567 """
565 568 return self._map.debug_iter(all)
566 569
567 570 def clear(self):
568 571 self._map.clear()
569 572 self.setparents(
570 573 self._nodeconstants.nullid, self._nodeconstants.nullid
571 574 )
572 575 util.clearcachedproperty(self, b"_dirs")
573 576 util.clearcachedproperty(self, b"_alldirs")
574 577 util.clearcachedproperty(self, b"dirfoldmap")
575 578
576 579 def items(self):
577 580 return self._map.items()
578 581
579 582 # forward for python2,3 compat
580 583 iteritems = items
581 584
582 585 def keys(self):
583 586 return iter(self._map)
584 587
585 588 ### reading/setting parents
586 589
587 590 def setparents(self, p1, p2, fold_p2=False):
588 591 self._parents = (p1, p2)
589 592 self._dirtyparents = True
590 593 copies = {}
591 594 if fold_p2:
592 595 copies = self._map.setparents_fixup()
593 596 return copies
594 597
595 598 ### disk interaction
596 599
597 600 @propertycache
598 601 def identity(self):
599 602 self._map
600 603 return self.identity
601 604
602 605 def write(self, tr, st):
603 606 if not self._use_dirstate_v2:
604 607 p1, p2 = self.parents()
605 608 packed = self._map.write_v1(p1, p2)
606 609 st.write(packed)
607 610 st.close()
608 611 self._dirtyparents = False
609 612 return
610 613
611 614 # We can only append to an existing data file if there is one
612 can_append = self.docket.uuid is not None
613 packed, meta, append = self._map.write_v2(can_append)
615 write_mode = WRITE_MODE_AUTO
616 if self.docket.uuid is None:
617 write_mode = WRITE_MODE_FORCE_NEW
618 packed, meta, append = self._map.write_v2(write_mode)
614 619 if append:
615 620 docket = self.docket
616 621 data_filename = docket.data_filename()
617 622 with self._opener(data_filename, b'r+b') as fp:
618 623 fp.seek(docket.data_size)
619 624 assert fp.tell() == docket.data_size
620 625 written = fp.write(packed)
621 626 if written is not None: # py2 may return None
622 627 assert written == len(packed), (written, len(packed))
623 628 docket.data_size += len(packed)
624 629 docket.parents = self.parents()
625 630 docket.tree_metadata = meta
626 631 st.write(docket.serialize())
627 632 st.close()
628 633 else:
629 634 self.write_v2_no_append(tr, st, meta, packed)
630 635 # Reload from the newly-written file
631 636 util.clearcachedproperty(self, b"_map")
632 637 self._dirtyparents = False
633 638
634 639 ### code related to maintaining and accessing "extra" property
635 640 # (e.g. "has_dir")
636 641
637 642 @propertycache
638 643 def filefoldmap(self):
639 644 """Returns a dictionary mapping normalized case paths to their
640 645 non-normalized versions.
641 646 """
642 647 return self._map.filefoldmapasdict()
643 648
644 649 def hastrackeddir(self, d):
645 650 return self._map.hastrackeddir(d)
646 651
647 652 def hasdir(self, d):
648 653 return self._map.hasdir(d)
649 654
650 655 @propertycache
651 656 def dirfoldmap(self):
652 657 f = {}
653 658 normcase = util.normcase
654 659 for name in self._map.tracked_dirs():
655 660 f[normcase(name)] = name
656 661 return f
657 662
658 663 ### code related to manipulation of entries and copy-sources
659 664
660 665 def set_tracked(self, f):
661 666 return self._map.set_tracked(f)
662 667
663 668 def set_untracked(self, f):
664 669 return self._map.set_untracked(f)
665 670
666 671 def set_clean(self, filename, mode, size, mtime):
667 672 self._map.set_clean(filename, mode, size, mtime)
668 673
669 674 def set_possibly_dirty(self, f):
670 675 self._map.set_possibly_dirty(f)
671 676
672 677 def reset_state(
673 678 self,
674 679 filename,
675 680 wc_tracked=False,
676 681 p1_tracked=False,
677 682 p2_info=False,
678 683 has_meaningful_mtime=True,
679 684 parentfiledata=None,
680 685 ):
681 686 return self._map.reset_state(
682 687 filename,
683 688 wc_tracked,
684 689 p1_tracked,
685 690 p2_info,
686 691 has_meaningful_mtime,
687 692 parentfiledata,
688 693 )
@@ -1,1907 +1,1913 b''
1 1 use bytes_cast::BytesCast;
2 2 use micro_timer::timed;
3 3 use std::borrow::Cow;
4 4 use std::path::PathBuf;
5 5
6 6 use super::on_disk;
7 7 use super::on_disk::DirstateV2ParseError;
8 8 use super::owning::OwningDirstateMap;
9 9 use super::path_with_basename::WithBasename;
10 10 use crate::dirstate::parsers::pack_entry;
11 11 use crate::dirstate::parsers::packed_entry_size;
12 12 use crate::dirstate::parsers::parse_dirstate_entries;
13 13 use crate::dirstate::CopyMapIter;
14 14 use crate::dirstate::DirstateV2Data;
15 15 use crate::dirstate::ParentFileData;
16 16 use crate::dirstate::StateMapIter;
17 17 use crate::dirstate::TruncatedTimestamp;
18 18 use crate::matchers::Matcher;
19 19 use crate::utils::hg_path::{HgPath, HgPathBuf};
20 20 use crate::DirstateEntry;
21 21 use crate::DirstateError;
22 22 use crate::DirstateMapError;
23 23 use crate::DirstateParents;
24 24 use crate::DirstateStatus;
25 25 use crate::FastHashbrownMap as FastHashMap;
26 26 use crate::PatternFileWarning;
27 27 use crate::StatusError;
28 28 use crate::StatusOptions;
29 29
30 30 /// Append to an existing data file if the amount of unreachable data (not used
31 31 /// anymore) is less than this fraction of the total amount of existing data.
32 32 const ACCEPTABLE_UNREACHABLE_BYTES_RATIO: f32 = 0.5;
33 33
34 34 #[derive(Debug, PartialEq, Eq)]
35 35 /// Version of the on-disk format
36 36 pub enum DirstateVersion {
37 37 V1,
38 38 V2,
39 39 }
40 40
41 #[derive(Debug, PartialEq, Eq)]
42 pub enum DirstateMapWriteMode {
43 Auto,
44 ForceNewDataFile,
45 }
46
41 47 #[derive(Debug)]
42 48 pub struct DirstateMap<'on_disk> {
43 49 /// Contents of the `.hg/dirstate` file
44 50 pub(super) on_disk: &'on_disk [u8],
45 51
46 52 pub(super) root: ChildNodes<'on_disk>,
47 53
48 54 /// Number of nodes anywhere in the tree that have `.entry.is_some()`.
49 55 pub(super) nodes_with_entry_count: u32,
50 56
51 57 /// Number of nodes anywhere in the tree that have
52 58 /// `.copy_source.is_some()`.
53 59 pub(super) nodes_with_copy_source_count: u32,
54 60
55 61 /// See on_disk::Header
56 62 pub(super) ignore_patterns_hash: on_disk::IgnorePatternsHash,
57 63
58 64 /// How many bytes of `on_disk` are not used anymore
59 65 pub(super) unreachable_bytes: u32,
60 66
61 67 /// Size of the data used to first load this `DirstateMap`. Used in case
62 68 /// we need to write some new metadata, but no new data on disk.
63 69 pub(super) old_data_size: usize,
64 70
65 71 pub(super) dirstate_version: DirstateVersion,
66 72 }
67 73
68 74 /// Using a plain `HgPathBuf` of the full path from the repository root as a
69 75 /// map key would also work: all paths in a given map have the same parent
70 76 /// path, so comparing full paths gives the same result as comparing base
71 77 /// names. However `HashMap` would waste time always re-hashing the same
72 78 /// string prefix.
73 79 pub(super) type NodeKey<'on_disk> = WithBasename<Cow<'on_disk, HgPath>>;
74 80
75 81 /// Similar to `&'tree Cow<'on_disk, HgPath>`, but can also be returned
76 82 /// for on-disk nodes that don’t actually have a `Cow` to borrow.
77 83 #[derive(Debug)]
78 84 pub(super) enum BorrowedPath<'tree, 'on_disk> {
79 85 InMemory(&'tree HgPathBuf),
80 86 OnDisk(&'on_disk HgPath),
81 87 }
82 88
83 89 #[derive(Debug)]
84 90 pub(super) enum ChildNodes<'on_disk> {
85 91 InMemory(FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
86 92 OnDisk(&'on_disk [on_disk::Node]),
87 93 }
88 94
89 95 #[derive(Debug)]
90 96 pub(super) enum ChildNodesRef<'tree, 'on_disk> {
91 97 InMemory(&'tree FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
92 98 OnDisk(&'on_disk [on_disk::Node]),
93 99 }
94 100
95 101 #[derive(Debug)]
96 102 pub(super) enum NodeRef<'tree, 'on_disk> {
97 103 InMemory(&'tree NodeKey<'on_disk>, &'tree Node<'on_disk>),
98 104 OnDisk(&'on_disk on_disk::Node),
99 105 }
100 106
101 107 impl<'tree, 'on_disk> BorrowedPath<'tree, 'on_disk> {
102 108 pub fn detach_from_tree(&self) -> Cow<'on_disk, HgPath> {
103 109 match *self {
104 110 BorrowedPath::InMemory(in_memory) => Cow::Owned(in_memory.clone()),
105 111 BorrowedPath::OnDisk(on_disk) => Cow::Borrowed(on_disk),
106 112 }
107 113 }
108 114 }
109 115
110 116 impl<'tree, 'on_disk> std::ops::Deref for BorrowedPath<'tree, 'on_disk> {
111 117 type Target = HgPath;
112 118
113 119 fn deref(&self) -> &HgPath {
114 120 match *self {
115 121 BorrowedPath::InMemory(in_memory) => in_memory,
116 122 BorrowedPath::OnDisk(on_disk) => on_disk,
117 123 }
118 124 }
119 125 }
120 126
121 127 impl Default for ChildNodes<'_> {
122 128 fn default() -> Self {
123 129 ChildNodes::InMemory(Default::default())
124 130 }
125 131 }
126 132
127 133 impl<'on_disk> ChildNodes<'on_disk> {
128 134 pub(super) fn as_ref<'tree>(
129 135 &'tree self,
130 136 ) -> ChildNodesRef<'tree, 'on_disk> {
131 137 match self {
132 138 ChildNodes::InMemory(nodes) => ChildNodesRef::InMemory(nodes),
133 139 ChildNodes::OnDisk(nodes) => ChildNodesRef::OnDisk(nodes),
134 140 }
135 141 }
136 142
137 143 pub(super) fn is_empty(&self) -> bool {
138 144 match self {
139 145 ChildNodes::InMemory(nodes) => nodes.is_empty(),
140 146 ChildNodes::OnDisk(nodes) => nodes.is_empty(),
141 147 }
142 148 }
143 149
144 150 fn make_mut(
145 151 &mut self,
146 152 on_disk: &'on_disk [u8],
147 153 unreachable_bytes: &mut u32,
148 154 ) -> Result<
149 155 &mut FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>,
150 156 DirstateV2ParseError,
151 157 > {
152 158 match self {
153 159 ChildNodes::InMemory(nodes) => Ok(nodes),
154 160 ChildNodes::OnDisk(nodes) => {
155 161 *unreachable_bytes +=
156 162 std::mem::size_of_val::<[on_disk::Node]>(nodes) as u32;
157 163 let nodes = nodes
158 164 .iter()
159 165 .map(|node| {
160 166 Ok((
161 167 node.path(on_disk)?,
162 168 node.to_in_memory_node(on_disk)?,
163 169 ))
164 170 })
165 171 .collect::<Result<_, _>>()?;
166 172 *self = ChildNodes::InMemory(nodes);
167 173 match self {
168 174 ChildNodes::InMemory(nodes) => Ok(nodes),
169 175 ChildNodes::OnDisk(_) => unreachable!(),
170 176 }
171 177 }
172 178 }
173 179 }
174 180 }
175 181
176 182 impl<'tree, 'on_disk> ChildNodesRef<'tree, 'on_disk> {
177 183 pub(super) fn get(
178 184 &self,
179 185 base_name: &HgPath,
180 186 on_disk: &'on_disk [u8],
181 187 ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> {
182 188 match self {
183 189 ChildNodesRef::InMemory(nodes) => Ok(nodes
184 190 .get_key_value(base_name)
185 191 .map(|(k, v)| NodeRef::InMemory(k, v))),
186 192 ChildNodesRef::OnDisk(nodes) => {
187 193 let mut parse_result = Ok(());
188 194 let search_result = nodes.binary_search_by(|node| {
189 195 match node.base_name(on_disk) {
190 196 Ok(node_base_name) => node_base_name.cmp(base_name),
191 197 Err(e) => {
192 198 parse_result = Err(e);
193 199 // Dummy comparison result, `search_result` won’t
194 200 // be used since `parse_result` is an error
195 201 std::cmp::Ordering::Equal
196 202 }
197 203 }
198 204 });
199 205 parse_result.map(|()| {
200 206 search_result.ok().map(|i| NodeRef::OnDisk(&nodes[i]))
201 207 })
202 208 }
203 209 }
204 210 }
205 211
206 212 /// Iterate in undefined order
207 213 pub(super) fn iter(
208 214 &self,
209 215 ) -> impl Iterator<Item = NodeRef<'tree, 'on_disk>> {
210 216 match self {
211 217 ChildNodesRef::InMemory(nodes) => itertools::Either::Left(
212 218 nodes.iter().map(|(k, v)| NodeRef::InMemory(k, v)),
213 219 ),
214 220 ChildNodesRef::OnDisk(nodes) => {
215 221 itertools::Either::Right(nodes.iter().map(NodeRef::OnDisk))
216 222 }
217 223 }
218 224 }
219 225
220 226 /// Iterate in parallel in undefined order
221 227 pub(super) fn par_iter(
222 228 &self,
223 229 ) -> impl rayon::iter::ParallelIterator<Item = NodeRef<'tree, 'on_disk>>
224 230 {
225 231 use rayon::prelude::*;
226 232 match self {
227 233 ChildNodesRef::InMemory(nodes) => rayon::iter::Either::Left(
228 234 nodes.par_iter().map(|(k, v)| NodeRef::InMemory(k, v)),
229 235 ),
230 236 ChildNodesRef::OnDisk(nodes) => rayon::iter::Either::Right(
231 237 nodes.par_iter().map(NodeRef::OnDisk),
232 238 ),
233 239 }
234 240 }
235 241
236 242 pub(super) fn sorted(&self) -> Vec<NodeRef<'tree, 'on_disk>> {
237 243 match self {
238 244 ChildNodesRef::InMemory(nodes) => {
239 245 let mut vec: Vec<_> = nodes
240 246 .iter()
241 247 .map(|(k, v)| NodeRef::InMemory(k, v))
242 248 .collect();
243 249 fn sort_key<'a>(node: &'a NodeRef) -> &'a HgPath {
244 250 match node {
245 251 NodeRef::InMemory(path, _node) => path.base_name(),
246 252 NodeRef::OnDisk(_) => unreachable!(),
247 253 }
248 254 }
249 255 // `sort_unstable_by_key` doesn’t allow keys borrowing from the
250 256 // value: https://github.com/rust-lang/rust/issues/34162
251 257 vec.sort_unstable_by(|a, b| sort_key(a).cmp(sort_key(b)));
252 258 vec
253 259 }
254 260 ChildNodesRef::OnDisk(nodes) => {
255 261 // Nodes on disk are already sorted
256 262 nodes.iter().map(NodeRef::OnDisk).collect()
257 263 }
258 264 }
259 265 }
260 266 }
261 267
262 268 impl<'tree, 'on_disk> NodeRef<'tree, 'on_disk> {
263 269 pub(super) fn full_path(
264 270 &self,
265 271 on_disk: &'on_disk [u8],
266 272 ) -> Result<&'tree HgPath, DirstateV2ParseError> {
267 273 match self {
268 274 NodeRef::InMemory(path, _node) => Ok(path.full_path()),
269 275 NodeRef::OnDisk(node) => node.full_path(on_disk),
270 276 }
271 277 }
272 278
273 279 /// Returns a `BorrowedPath`, which can be turned into a `Cow<'on_disk,
274 280 /// HgPath>` detached from `'tree`
275 281 pub(super) fn full_path_borrowed(
276 282 &self,
277 283 on_disk: &'on_disk [u8],
278 284 ) -> Result<BorrowedPath<'tree, 'on_disk>, DirstateV2ParseError> {
279 285 match self {
280 286 NodeRef::InMemory(path, _node) => match path.full_path() {
281 287 Cow::Borrowed(on_disk) => Ok(BorrowedPath::OnDisk(on_disk)),
282 288 Cow::Owned(in_memory) => Ok(BorrowedPath::InMemory(in_memory)),
283 289 },
284 290 NodeRef::OnDisk(node) => {
285 291 Ok(BorrowedPath::OnDisk(node.full_path(on_disk)?))
286 292 }
287 293 }
288 294 }
289 295
290 296 pub(super) fn base_name(
291 297 &self,
292 298 on_disk: &'on_disk [u8],
293 299 ) -> Result<&'tree HgPath, DirstateV2ParseError> {
294 300 match self {
295 301 NodeRef::InMemory(path, _node) => Ok(path.base_name()),
296 302 NodeRef::OnDisk(node) => node.base_name(on_disk),
297 303 }
298 304 }
299 305
300 306 pub(super) fn children(
301 307 &self,
302 308 on_disk: &'on_disk [u8],
303 309 ) -> Result<ChildNodesRef<'tree, 'on_disk>, DirstateV2ParseError> {
304 310 match self {
305 311 NodeRef::InMemory(_path, node) => Ok(node.children.as_ref()),
306 312 NodeRef::OnDisk(node) => {
307 313 Ok(ChildNodesRef::OnDisk(node.children(on_disk)?))
308 314 }
309 315 }
310 316 }
311 317
312 318 pub(super) fn has_copy_source(&self) -> bool {
313 319 match self {
314 320 NodeRef::InMemory(_path, node) => node.copy_source.is_some(),
315 321 NodeRef::OnDisk(node) => node.has_copy_source(),
316 322 }
317 323 }
318 324
319 325 pub(super) fn copy_source(
320 326 &self,
321 327 on_disk: &'on_disk [u8],
322 328 ) -> Result<Option<&'tree HgPath>, DirstateV2ParseError> {
323 329 match self {
324 330 NodeRef::InMemory(_path, node) => {
325 331 Ok(node.copy_source.as_ref().map(|s| &**s))
326 332 }
327 333 NodeRef::OnDisk(node) => node.copy_source(on_disk),
328 334 }
329 335 }
330 336 /// Returns a `BorrowedPath`, which can be turned into a `Cow<'on_disk,
331 337 /// HgPath>` detached from `'tree`
332 338 pub(super) fn copy_source_borrowed(
333 339 &self,
334 340 on_disk: &'on_disk [u8],
335 341 ) -> Result<Option<BorrowedPath<'tree, 'on_disk>>, DirstateV2ParseError>
336 342 {
337 343 Ok(match self {
338 344 NodeRef::InMemory(_path, node) => {
339 345 node.copy_source.as_ref().map(|source| match source {
340 346 Cow::Borrowed(on_disk) => BorrowedPath::OnDisk(on_disk),
341 347 Cow::Owned(in_memory) => BorrowedPath::InMemory(in_memory),
342 348 })
343 349 }
344 350 NodeRef::OnDisk(node) => node
345 351 .copy_source(on_disk)?
346 352 .map(|source| BorrowedPath::OnDisk(source)),
347 353 })
348 354 }
349 355
350 356 pub(super) fn entry(
351 357 &self,
352 358 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
353 359 match self {
354 360 NodeRef::InMemory(_path, node) => {
355 361 Ok(node.data.as_entry().copied())
356 362 }
357 363 NodeRef::OnDisk(node) => node.entry(),
358 364 }
359 365 }
360 366
361 367 pub(super) fn cached_directory_mtime(
362 368 &self,
363 369 ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
364 370 match self {
365 371 NodeRef::InMemory(_path, node) => Ok(match node.data {
366 372 NodeData::CachedDirectory { mtime } => Some(mtime),
367 373 _ => None,
368 374 }),
369 375 NodeRef::OnDisk(node) => node.cached_directory_mtime(),
370 376 }
371 377 }
372 378
373 379 pub(super) fn descendants_with_entry_count(&self) -> u32 {
374 380 match self {
375 381 NodeRef::InMemory(_path, node) => {
376 382 node.descendants_with_entry_count
377 383 }
378 384 NodeRef::OnDisk(node) => node.descendants_with_entry_count.get(),
379 385 }
380 386 }
381 387
382 388 pub(super) fn tracked_descendants_count(&self) -> u32 {
383 389 match self {
384 390 NodeRef::InMemory(_path, node) => node.tracked_descendants_count,
385 391 NodeRef::OnDisk(node) => node.tracked_descendants_count.get(),
386 392 }
387 393 }
388 394 }
389 395
390 396 /// Represents a file or a directory
391 397 #[derive(Default, Debug)]
392 398 pub(super) struct Node<'on_disk> {
393 399 pub(super) data: NodeData,
394 400
395 401 pub(super) copy_source: Option<Cow<'on_disk, HgPath>>,
396 402
397 403 pub(super) children: ChildNodes<'on_disk>,
398 404
399 405 /// How many (non-inclusive) descendants of this node have an entry.
400 406 pub(super) descendants_with_entry_count: u32,
401 407
402 408 /// How many (non-inclusive) descendants of this node have an entry whose
403 409 /// state is "tracked".
404 410 pub(super) tracked_descendants_count: u32,
405 411 }
406 412
407 413 #[derive(Debug)]
408 414 pub(super) enum NodeData {
409 415 Entry(DirstateEntry),
410 416 CachedDirectory { mtime: TruncatedTimestamp },
411 417 None,
412 418 }
413 419
414 420 impl Default for NodeData {
415 421 fn default() -> Self {
416 422 NodeData::None
417 423 }
418 424 }
419 425
420 426 impl NodeData {
421 427 fn has_entry(&self) -> bool {
422 428 match self {
423 429 NodeData::Entry(_) => true,
424 430 _ => false,
425 431 }
426 432 }
427 433
428 434 fn as_entry(&self) -> Option<&DirstateEntry> {
429 435 match self {
430 436 NodeData::Entry(entry) => Some(entry),
431 437 _ => None,
432 438 }
433 439 }
434 440
435 441 fn as_entry_mut(&mut self) -> Option<&mut DirstateEntry> {
436 442 match self {
437 443 NodeData::Entry(entry) => Some(entry),
438 444 _ => None,
439 445 }
440 446 }
441 447 }
442 448
443 449 impl<'on_disk> DirstateMap<'on_disk> {
444 450 pub(super) fn empty(on_disk: &'on_disk [u8]) -> Self {
445 451 Self {
446 452 on_disk,
447 453 root: ChildNodes::default(),
448 454 nodes_with_entry_count: 0,
449 455 nodes_with_copy_source_count: 0,
450 456 ignore_patterns_hash: [0; on_disk::IGNORE_PATTERNS_HASH_LEN],
451 457 unreachable_bytes: 0,
452 458 old_data_size: 0,
453 459 dirstate_version: DirstateVersion::V1,
454 460 }
455 461 }
456 462
457 463 #[timed]
458 464 pub fn new_v2(
459 465 on_disk: &'on_disk [u8],
460 466 data_size: usize,
461 467 metadata: &[u8],
462 468 ) -> Result<Self, DirstateError> {
463 469 if let Some(data) = on_disk.get(..data_size) {
464 470 Ok(on_disk::read(data, metadata)?)
465 471 } else {
466 472 Err(DirstateV2ParseError::new("not enough bytes on disk").into())
467 473 }
468 474 }
469 475
470 476 #[timed]
471 477 pub fn new_v1(
472 478 on_disk: &'on_disk [u8],
473 479 ) -> Result<(Self, Option<DirstateParents>), DirstateError> {
474 480 let mut map = Self::empty(on_disk);
475 481 if map.on_disk.is_empty() {
476 482 return Ok((map, None));
477 483 }
478 484
479 485 let parents = parse_dirstate_entries(
480 486 map.on_disk,
481 487 |path, entry, copy_source| {
482 488 let tracked = entry.tracked();
483 489 let node = Self::get_or_insert_node_inner(
484 490 map.on_disk,
485 491 &mut map.unreachable_bytes,
486 492 &mut map.root,
487 493 path,
488 494 WithBasename::to_cow_borrowed,
489 495 |ancestor| {
490 496 if tracked {
491 497 ancestor.tracked_descendants_count += 1
492 498 }
493 499 ancestor.descendants_with_entry_count += 1
494 500 },
495 501 )?;
496 502 assert!(
497 503 !node.data.has_entry(),
498 504 "duplicate dirstate entry in read"
499 505 );
500 506 assert!(
501 507 node.copy_source.is_none(),
502 508 "duplicate dirstate entry in read"
503 509 );
504 510 node.data = NodeData::Entry(*entry);
505 511 node.copy_source = copy_source.map(Cow::Borrowed);
506 512 map.nodes_with_entry_count += 1;
507 513 if copy_source.is_some() {
508 514 map.nodes_with_copy_source_count += 1
509 515 }
510 516 Ok(())
511 517 },
512 518 )?;
513 519 let parents = Some(parents.clone());
514 520
515 521 Ok((map, parents))
516 522 }
517 523
518 524 /// Assuming dirstate-v2 format, returns whether the next write should
519 525 /// append to the existing data file that contains `self.on_disk` (true),
520 526 /// or create a new data file from scratch (false).
521 527 pub(super) fn write_should_append(&self) -> bool {
522 528 let ratio = self.unreachable_bytes as f32 / self.on_disk.len() as f32;
523 529 ratio < ACCEPTABLE_UNREACHABLE_BYTES_RATIO
524 530 }
525 531
526 532 fn get_node<'tree>(
527 533 &'tree self,
528 534 path: &HgPath,
529 535 ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> {
530 536 let mut children = self.root.as_ref();
531 537 let mut components = path.components();
532 538 let mut component =
533 539 components.next().expect("expected at least one components");
534 540 loop {
535 541 if let Some(child) = children.get(component, self.on_disk)? {
536 542 if let Some(next_component) = components.next() {
537 543 component = next_component;
538 544 children = child.children(self.on_disk)?;
539 545 } else {
540 546 return Ok(Some(child));
541 547 }
542 548 } else {
543 549 return Ok(None);
544 550 }
545 551 }
546 552 }
547 553
548 554 /// Returns a mutable reference to the node at `path` if it exists
549 555 ///
550 556 /// `each_ancestor` is a callback that is called for each ancestor node
551 557 /// when descending the tree. It is used to keep the different counters
552 558 /// of the `DirstateMap` up-to-date.
553 559 fn get_node_mut<'tree>(
554 560 &'tree mut self,
555 561 path: &HgPath,
556 562 each_ancestor: impl FnMut(&mut Node),
557 563 ) -> Result<Option<&'tree mut Node<'on_disk>>, DirstateV2ParseError> {
558 564 Self::get_node_mut_inner(
559 565 self.on_disk,
560 566 &mut self.unreachable_bytes,
561 567 &mut self.root,
562 568 path,
563 569 each_ancestor,
564 570 )
565 571 }
566 572
567 573 /// Lower-level version of `get_node_mut`.
568 574 ///
569 575 /// This takes `root` instead of `&mut self` so that callers can mutate
570 576 /// other fields while the returned borrow is still valid.
571 577 ///
572 578 /// `each_ancestor` is a callback that is called for each ancestor node
573 579 /// when descending the tree. It is used to keep the different counters
574 580 /// of the `DirstateMap` up-to-date.
575 581 fn get_node_mut_inner<'tree>(
576 582 on_disk: &'on_disk [u8],
577 583 unreachable_bytes: &mut u32,
578 584 root: &'tree mut ChildNodes<'on_disk>,
579 585 path: &HgPath,
580 586 mut each_ancestor: impl FnMut(&mut Node),
581 587 ) -> Result<Option<&'tree mut Node<'on_disk>>, DirstateV2ParseError> {
582 588 let mut children = root;
583 589 let mut components = path.components();
584 590 let mut component =
585 591 components.next().expect("expected at least one components");
586 592 loop {
587 593 if let Some(child) = children
588 594 .make_mut(on_disk, unreachable_bytes)?
589 595 .get_mut(component)
590 596 {
591 597 if let Some(next_component) = components.next() {
592 598 each_ancestor(child);
593 599 component = next_component;
594 600 children = &mut child.children;
595 601 } else {
596 602 return Ok(Some(child));
597 603 }
598 604 } else {
599 605 return Ok(None);
600 606 }
601 607 }
602 608 }
603 609
604 610 /// Get a mutable reference to the node at `path`, creating it if it does
605 611 /// not exist.
606 612 ///
607 613 /// `each_ancestor` is a callback that is called for each ancestor node
608 614 /// when descending the tree. It is used to keep the different counters
609 615 /// of the `DirstateMap` up-to-date.
610 616 fn get_or_insert_node<'tree, 'path>(
611 617 &'tree mut self,
612 618 path: &'path HgPath,
613 619 each_ancestor: impl FnMut(&mut Node),
614 620 ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> {
615 621 Self::get_or_insert_node_inner(
616 622 self.on_disk,
617 623 &mut self.unreachable_bytes,
618 624 &mut self.root,
619 625 path,
620 626 WithBasename::to_cow_owned,
621 627 each_ancestor,
622 628 )
623 629 }
624 630
625 631 /// Lower-level version of `get_or_insert_node_inner`, which is used when
626 632 /// parsing disk data to remove allocations for new nodes.
627 633 fn get_or_insert_node_inner<'tree, 'path>(
628 634 on_disk: &'on_disk [u8],
629 635 unreachable_bytes: &mut u32,
630 636 root: &'tree mut ChildNodes<'on_disk>,
631 637 path: &'path HgPath,
632 638 to_cow: impl Fn(
633 639 WithBasename<&'path HgPath>,
634 640 ) -> WithBasename<Cow<'on_disk, HgPath>>,
635 641 mut each_ancestor: impl FnMut(&mut Node),
636 642 ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> {
637 643 let mut child_nodes = root;
638 644 let mut inclusive_ancestor_paths =
639 645 WithBasename::inclusive_ancestors_of(path);
640 646 let mut ancestor_path = inclusive_ancestor_paths
641 647 .next()
642 648 .expect("expected at least one inclusive ancestor");
643 649 loop {
644 650 let (_, child_node) = child_nodes
645 651 .make_mut(on_disk, unreachable_bytes)?
646 652 .raw_entry_mut()
647 653 .from_key(ancestor_path.base_name())
648 654 .or_insert_with(|| (to_cow(ancestor_path), Node::default()));
649 655 if let Some(next) = inclusive_ancestor_paths.next() {
650 656 each_ancestor(child_node);
651 657 ancestor_path = next;
652 658 child_nodes = &mut child_node.children;
653 659 } else {
654 660 return Ok(child_node);
655 661 }
656 662 }
657 663 }
658 664
659 665 fn reset_state(
660 666 &mut self,
661 667 filename: &HgPath,
662 668 old_entry_opt: Option<DirstateEntry>,
663 669 wc_tracked: bool,
664 670 p1_tracked: bool,
665 671 p2_info: bool,
666 672 has_meaningful_mtime: bool,
667 673 parent_file_data_opt: Option<ParentFileData>,
668 674 ) -> Result<(), DirstateError> {
669 675 let (had_entry, was_tracked) = match old_entry_opt {
670 676 Some(old_entry) => (true, old_entry.tracked()),
671 677 None => (false, false),
672 678 };
673 679 let node = self.get_or_insert_node(filename, |ancestor| {
674 680 if !had_entry {
675 681 ancestor.descendants_with_entry_count += 1;
676 682 }
677 683 if was_tracked {
678 684 if !wc_tracked {
679 685 ancestor.tracked_descendants_count = ancestor
680 686 .tracked_descendants_count
681 687 .checked_sub(1)
682 688 .expect("tracked count to be >= 0");
683 689 }
684 690 } else {
685 691 if wc_tracked {
686 692 ancestor.tracked_descendants_count += 1;
687 693 }
688 694 }
689 695 })?;
690 696
691 697 let v2_data = if let Some(parent_file_data) = parent_file_data_opt {
692 698 DirstateV2Data {
693 699 wc_tracked,
694 700 p1_tracked,
695 701 p2_info,
696 702 mode_size: parent_file_data.mode_size,
697 703 mtime: if has_meaningful_mtime {
698 704 parent_file_data.mtime
699 705 } else {
700 706 None
701 707 },
702 708 ..Default::default()
703 709 }
704 710 } else {
705 711 DirstateV2Data {
706 712 wc_tracked,
707 713 p1_tracked,
708 714 p2_info,
709 715 ..Default::default()
710 716 }
711 717 };
712 718 node.data = NodeData::Entry(DirstateEntry::from_v2_data(v2_data));
713 719 if !had_entry {
714 720 self.nodes_with_entry_count += 1;
715 721 }
716 722 Ok(())
717 723 }
718 724
719 725 fn set_tracked(
720 726 &mut self,
721 727 filename: &HgPath,
722 728 old_entry_opt: Option<DirstateEntry>,
723 729 ) -> Result<bool, DirstateV2ParseError> {
724 730 let was_tracked = old_entry_opt.map_or(false, |e| e.tracked());
725 731 let had_entry = old_entry_opt.is_some();
726 732 let tracked_count_increment = if was_tracked { 0 } else { 1 };
727 733 let mut new = false;
728 734
729 735 let node = self.get_or_insert_node(filename, |ancestor| {
730 736 if !had_entry {
731 737 ancestor.descendants_with_entry_count += 1;
732 738 }
733 739
734 740 ancestor.tracked_descendants_count += tracked_count_increment;
735 741 })?;
736 742 if let Some(old_entry) = old_entry_opt {
737 743 let mut e = old_entry.clone();
738 744 if e.tracked() {
739 745 // XXX
740 746 // This is probably overkill for more case, but we need this to
741 747 // fully replace the `normallookup` call with `set_tracked`
742 748 // one. Consider smoothing this in the future.
743 749 e.set_possibly_dirty();
744 750 } else {
745 751 new = true;
746 752 e.set_tracked();
747 753 }
748 754 node.data = NodeData::Entry(e)
749 755 } else {
750 756 node.data = NodeData::Entry(DirstateEntry::new_tracked());
751 757 self.nodes_with_entry_count += 1;
752 758 new = true;
753 759 };
754 760 Ok(new)
755 761 }
756 762
757 763 /// Set a node as untracked in the dirstate.
758 764 ///
759 765 /// It is the responsibility of the caller to remove the copy source and/or
760 766 /// the entry itself if appropriate.
761 767 ///
762 768 /// # Panics
763 769 ///
764 770 /// Panics if the node does not exist.
765 771 fn set_untracked(
766 772 &mut self,
767 773 filename: &HgPath,
768 774 old_entry: DirstateEntry,
769 775 ) -> Result<(), DirstateV2ParseError> {
770 776 let node = self
771 777 .get_node_mut(filename, |ancestor| {
772 778 ancestor.tracked_descendants_count = ancestor
773 779 .tracked_descendants_count
774 780 .checked_sub(1)
775 781 .expect("tracked_descendants_count should be >= 0");
776 782 })?
777 783 .expect("node should exist");
778 784 let mut new_entry = old_entry.clone();
779 785 new_entry.set_untracked();
780 786 node.data = NodeData::Entry(new_entry);
781 787 Ok(())
782 788 }
783 789
784 790 /// Set a node as clean in the dirstate.
785 791 ///
786 792 /// It is the responsibility of the caller to remove the copy source.
787 793 ///
788 794 /// # Panics
789 795 ///
790 796 /// Panics if the node does not exist.
791 797 fn set_clean(
792 798 &mut self,
793 799 filename: &HgPath,
794 800 old_entry: DirstateEntry,
795 801 mode: u32,
796 802 size: u32,
797 803 mtime: TruncatedTimestamp,
798 804 ) -> Result<(), DirstateError> {
799 805 let node = self
800 806 .get_node_mut(filename, |ancestor| {
801 807 if !old_entry.tracked() {
802 808 ancestor.tracked_descendants_count += 1;
803 809 }
804 810 })?
805 811 .expect("node should exist");
806 812 let mut new_entry = old_entry.clone();
807 813 new_entry.set_clean(mode, size, mtime);
808 814 node.data = NodeData::Entry(new_entry);
809 815 Ok(())
810 816 }
811 817
812 818 /// Set a node as possibly dirty in the dirstate.
813 819 ///
814 820 /// # Panics
815 821 ///
816 822 /// Panics if the node does not exist.
817 823 fn set_possibly_dirty(
818 824 &mut self,
819 825 filename: &HgPath,
820 826 ) -> Result<(), DirstateError> {
821 827 let node = self
822 828 .get_node_mut(filename, |_ancestor| {})?
823 829 .expect("node should exist");
824 830 let entry = node.data.as_entry_mut().expect("entry should exist");
825 831 entry.set_possibly_dirty();
826 832 node.data = NodeData::Entry(*entry);
827 833 Ok(())
828 834 }
829 835
830 836 /// Clears the cached mtime for the (potential) folder at `path`.
831 837 pub(super) fn clear_cached_mtime(
832 838 &mut self,
833 839 path: &HgPath,
834 840 ) -> Result<(), DirstateV2ParseError> {
835 841 let node = match self.get_node_mut(path, |_ancestor| {})? {
836 842 Some(node) => node,
837 843 None => return Ok(()),
838 844 };
839 845 if let NodeData::CachedDirectory { .. } = &node.data {
840 846 node.data = NodeData::None
841 847 }
842 848 Ok(())
843 849 }
844 850
845 851 /// Sets the cached mtime for the (potential) folder at `path`.
846 852 pub(super) fn set_cached_mtime(
847 853 &mut self,
848 854 path: &HgPath,
849 855 mtime: TruncatedTimestamp,
850 856 ) -> Result<(), DirstateV2ParseError> {
851 857 let node = match self.get_node_mut(path, |_ancestor| {})? {
852 858 Some(node) => node,
853 859 None => return Ok(()),
854 860 };
855 861 match &node.data {
856 862 NodeData::Entry(_) => {} // Don’t overwrite an entry
857 863 NodeData::CachedDirectory { .. } | NodeData::None => {
858 864 node.data = NodeData::CachedDirectory { mtime }
859 865 }
860 866 }
861 867 Ok(())
862 868 }
863 869
864 870 fn iter_nodes<'tree>(
865 871 &'tree self,
866 872 ) -> impl Iterator<
867 873 Item = Result<NodeRef<'tree, 'on_disk>, DirstateV2ParseError>,
868 874 > + 'tree {
869 875 // Depth first tree traversal.
870 876 //
871 877 // If we could afford internal iteration and recursion,
872 878 // this would look like:
873 879 //
874 880 // ```
875 881 // fn traverse_children(
876 882 // children: &ChildNodes,
877 883 // each: &mut impl FnMut(&Node),
878 884 // ) {
879 885 // for child in children.values() {
880 886 // traverse_children(&child.children, each);
881 887 // each(child);
882 888 // }
883 889 // }
884 890 // ```
885 891 //
886 892 // However we want an external iterator and therefore can’t use the
887 893 // call stack. Use an explicit stack instead:
888 894 let mut stack = Vec::new();
889 895 let mut iter = self.root.as_ref().iter();
890 896 std::iter::from_fn(move || {
891 897 while let Some(child_node) = iter.next() {
892 898 let children = match child_node.children(self.on_disk) {
893 899 Ok(children) => children,
894 900 Err(error) => return Some(Err(error)),
895 901 };
896 902 // Pseudo-recursion
897 903 let new_iter = children.iter();
898 904 let old_iter = std::mem::replace(&mut iter, new_iter);
899 905 stack.push((child_node, old_iter));
900 906 }
901 907 // Found the end of a `children.iter()` iterator.
902 908 if let Some((child_node, next_iter)) = stack.pop() {
903 909 // "Return" from pseudo-recursion by restoring state from the
904 910 // explicit stack
905 911 iter = next_iter;
906 912
907 913 Some(Ok(child_node))
908 914 } else {
909 915 // Reached the bottom of the stack, we’re done
910 916 None
911 917 }
912 918 })
913 919 }
914 920
915 921 fn count_dropped_path(unreachable_bytes: &mut u32, path: &Cow<HgPath>) {
916 922 if let Cow::Borrowed(path) = path {
917 923 *unreachable_bytes += path.len() as u32
918 924 }
919 925 }
920 926 }
921 927
922 928 /// Like `Iterator::filter_map`, but over a fallible iterator of `Result`s.
923 929 ///
924 930 /// The callback is only called for incoming `Ok` values. Errors are passed
925 931 /// through as-is. In order to let it use the `?` operator the callback is
926 932 /// expected to return a `Result` of `Option`, instead of an `Option` of
927 933 /// `Result`.
928 934 fn filter_map_results<'a, I, F, A, B, E>(
929 935 iter: I,
930 936 f: F,
931 937 ) -> impl Iterator<Item = Result<B, E>> + 'a
932 938 where
933 939 I: Iterator<Item = Result<A, E>> + 'a,
934 940 F: Fn(A) -> Result<Option<B>, E> + 'a,
935 941 {
936 942 iter.filter_map(move |result| match result {
937 943 Ok(node) => f(node).transpose(),
938 944 Err(e) => Some(Err(e)),
939 945 })
940 946 }
941 947
942 948 impl OwningDirstateMap {
943 949 pub fn clear(&mut self) {
944 950 self.with_dmap_mut(|map| {
945 951 map.root = Default::default();
946 952 map.nodes_with_entry_count = 0;
947 953 map.nodes_with_copy_source_count = 0;
948 954 });
949 955 }
950 956
951 957 pub fn set_tracked(
952 958 &mut self,
953 959 filename: &HgPath,
954 960 ) -> Result<bool, DirstateV2ParseError> {
955 961 let old_entry_opt = self.get(filename)?;
956 962 self.with_dmap_mut(|map| map.set_tracked(filename, old_entry_opt))
957 963 }
958 964
959 965 pub fn set_untracked(
960 966 &mut self,
961 967 filename: &HgPath,
962 968 ) -> Result<bool, DirstateError> {
963 969 let old_entry_opt = self.get(filename)?;
964 970 match old_entry_opt {
965 971 None => Ok(false),
966 972 Some(old_entry) => {
967 973 if !old_entry.tracked() {
968 974 // `DirstateMap::set_untracked` is not a noop if
969 975 // already not tracked as it will decrement the
970 976 // tracked counters while going down.
971 977 return Ok(true);
972 978 }
973 979 if old_entry.added() {
974 980 // Untracking an "added" entry will just result in a
975 981 // worthless entry (and other parts of the code will
976 982 // complain about it), just drop it entirely.
977 983 self.drop_entry_and_copy_source(filename)?;
978 984 return Ok(true);
979 985 }
980 986 if !old_entry.p2_info() {
981 987 self.copy_map_remove(filename)?;
982 988 }
983 989
984 990 self.with_dmap_mut(|map| {
985 991 map.set_untracked(filename, old_entry)?;
986 992 Ok(true)
987 993 })
988 994 }
989 995 }
990 996 }
991 997
992 998 pub fn set_clean(
993 999 &mut self,
994 1000 filename: &HgPath,
995 1001 mode: u32,
996 1002 size: u32,
997 1003 mtime: TruncatedTimestamp,
998 1004 ) -> Result<(), DirstateError> {
999 1005 let old_entry = match self.get(filename)? {
1000 1006 None => {
1001 1007 return Err(
1002 1008 DirstateMapError::PathNotFound(filename.into()).into()
1003 1009 )
1004 1010 }
1005 1011 Some(e) => e,
1006 1012 };
1007 1013 self.copy_map_remove(filename)?;
1008 1014 self.with_dmap_mut(|map| {
1009 1015 map.set_clean(filename, old_entry, mode, size, mtime)
1010 1016 })
1011 1017 }
1012 1018
1013 1019 pub fn set_possibly_dirty(
1014 1020 &mut self,
1015 1021 filename: &HgPath,
1016 1022 ) -> Result<(), DirstateError> {
1017 1023 if self.get(filename)?.is_none() {
1018 1024 return Err(DirstateMapError::PathNotFound(filename.into()).into());
1019 1025 }
1020 1026 self.with_dmap_mut(|map| map.set_possibly_dirty(filename))
1021 1027 }
1022 1028
1023 1029 pub fn reset_state(
1024 1030 &mut self,
1025 1031 filename: &HgPath,
1026 1032 wc_tracked: bool,
1027 1033 p1_tracked: bool,
1028 1034 p2_info: bool,
1029 1035 has_meaningful_mtime: bool,
1030 1036 parent_file_data_opt: Option<ParentFileData>,
1031 1037 ) -> Result<(), DirstateError> {
1032 1038 if !(p1_tracked || p2_info || wc_tracked) {
1033 1039 self.drop_entry_and_copy_source(filename)?;
1034 1040 return Ok(());
1035 1041 }
1036 1042 self.copy_map_remove(filename)?;
1037 1043 let old_entry_opt = self.get(filename)?;
1038 1044 self.with_dmap_mut(|map| {
1039 1045 map.reset_state(
1040 1046 filename,
1041 1047 old_entry_opt,
1042 1048 wc_tracked,
1043 1049 p1_tracked,
1044 1050 p2_info,
1045 1051 has_meaningful_mtime,
1046 1052 parent_file_data_opt,
1047 1053 )
1048 1054 })
1049 1055 }
1050 1056
1051 1057 pub fn drop_entry_and_copy_source(
1052 1058 &mut self,
1053 1059 filename: &HgPath,
1054 1060 ) -> Result<(), DirstateError> {
1055 1061 let was_tracked = self.get(filename)?.map_or(false, |e| e.tracked());
1056 1062 struct Dropped {
1057 1063 was_tracked: bool,
1058 1064 had_entry: bool,
1059 1065 had_copy_source: bool,
1060 1066 }
1061 1067
1062 1068 /// If this returns `Ok(Some((dropped, removed)))`, then
1063 1069 ///
1064 1070 /// * `dropped` is about the leaf node that was at `filename`
1065 1071 /// * `removed` is whether this particular level of recursion just
1066 1072 /// removed a node in `nodes`.
1067 1073 fn recur<'on_disk>(
1068 1074 on_disk: &'on_disk [u8],
1069 1075 unreachable_bytes: &mut u32,
1070 1076 nodes: &mut ChildNodes<'on_disk>,
1071 1077 path: &HgPath,
1072 1078 ) -> Result<Option<(Dropped, bool)>, DirstateV2ParseError> {
1073 1079 let (first_path_component, rest_of_path) =
1074 1080 path.split_first_component();
1075 1081 let nodes = nodes.make_mut(on_disk, unreachable_bytes)?;
1076 1082 let node = if let Some(node) = nodes.get_mut(first_path_component)
1077 1083 {
1078 1084 node
1079 1085 } else {
1080 1086 return Ok(None);
1081 1087 };
1082 1088 let dropped;
1083 1089 if let Some(rest) = rest_of_path {
1084 1090 if let Some((d, removed)) = recur(
1085 1091 on_disk,
1086 1092 unreachable_bytes,
1087 1093 &mut node.children,
1088 1094 rest,
1089 1095 )? {
1090 1096 dropped = d;
1091 1097 if dropped.had_entry {
1092 1098 node.descendants_with_entry_count = node
1093 1099 .descendants_with_entry_count
1094 1100 .checked_sub(1)
1095 1101 .expect(
1096 1102 "descendants_with_entry_count should be >= 0",
1097 1103 );
1098 1104 }
1099 1105 if dropped.was_tracked {
1100 1106 node.tracked_descendants_count = node
1101 1107 .tracked_descendants_count
1102 1108 .checked_sub(1)
1103 1109 .expect(
1104 1110 "tracked_descendants_count should be >= 0",
1105 1111 );
1106 1112 }
1107 1113
1108 1114 // Directory caches must be invalidated when removing a
1109 1115 // child node
1110 1116 if removed {
1111 1117 if let NodeData::CachedDirectory { .. } = &node.data {
1112 1118 node.data = NodeData::None
1113 1119 }
1114 1120 }
1115 1121 } else {
1116 1122 return Ok(None);
1117 1123 }
1118 1124 } else {
1119 1125 let entry = node.data.as_entry();
1120 1126 let was_tracked = entry.map_or(false, |entry| entry.tracked());
1121 1127 let had_entry = entry.is_some();
1122 1128 if had_entry {
1123 1129 node.data = NodeData::None
1124 1130 }
1125 1131 let mut had_copy_source = false;
1126 1132 if let Some(source) = &node.copy_source {
1127 1133 DirstateMap::count_dropped_path(unreachable_bytes, source);
1128 1134 had_copy_source = true;
1129 1135 node.copy_source = None
1130 1136 }
1131 1137 dropped = Dropped {
1132 1138 was_tracked,
1133 1139 had_entry,
1134 1140 had_copy_source,
1135 1141 };
1136 1142 }
1137 1143 // After recursion, for both leaf (rest_of_path is None) nodes and
1138 1144 // parent nodes, remove a node if it just became empty.
1139 1145 let remove = !node.data.has_entry()
1140 1146 && node.copy_source.is_none()
1141 1147 && node.children.is_empty();
1142 1148 if remove {
1143 1149 let (key, _) =
1144 1150 nodes.remove_entry(first_path_component).unwrap();
1145 1151 DirstateMap::count_dropped_path(
1146 1152 unreachable_bytes,
1147 1153 key.full_path(),
1148 1154 )
1149 1155 }
1150 1156 Ok(Some((dropped, remove)))
1151 1157 }
1152 1158
1153 1159 self.with_dmap_mut(|map| {
1154 1160 if let Some((dropped, _removed)) = recur(
1155 1161 map.on_disk,
1156 1162 &mut map.unreachable_bytes,
1157 1163 &mut map.root,
1158 1164 filename,
1159 1165 )? {
1160 1166 if dropped.had_entry {
1161 1167 map.nodes_with_entry_count = map
1162 1168 .nodes_with_entry_count
1163 1169 .checked_sub(1)
1164 1170 .expect("nodes_with_entry_count should be >= 0");
1165 1171 }
1166 1172 if dropped.had_copy_source {
1167 1173 map.nodes_with_copy_source_count = map
1168 1174 .nodes_with_copy_source_count
1169 1175 .checked_sub(1)
1170 1176 .expect("nodes_with_copy_source_count should be >= 0");
1171 1177 }
1172 1178 } else {
1173 1179 debug_assert!(!was_tracked);
1174 1180 }
1175 1181 Ok(())
1176 1182 })
1177 1183 }
1178 1184
1179 1185 pub fn has_tracked_dir(
1180 1186 &mut self,
1181 1187 directory: &HgPath,
1182 1188 ) -> Result<bool, DirstateError> {
1183 1189 self.with_dmap_mut(|map| {
1184 1190 if let Some(node) = map.get_node(directory)? {
1185 1191 // A node without a `DirstateEntry` was created to hold child
1186 1192 // nodes, and is therefore a directory.
1187 1193 let is_dir = node.entry()?.is_none();
1188 1194 Ok(is_dir && node.tracked_descendants_count() > 0)
1189 1195 } else {
1190 1196 Ok(false)
1191 1197 }
1192 1198 })
1193 1199 }
1194 1200
1195 1201 pub fn has_dir(
1196 1202 &mut self,
1197 1203 directory: &HgPath,
1198 1204 ) -> Result<bool, DirstateError> {
1199 1205 self.with_dmap_mut(|map| {
1200 1206 if let Some(node) = map.get_node(directory)? {
1201 1207 // A node without a `DirstateEntry` was created to hold child
1202 1208 // nodes, and is therefore a directory.
1203 1209 let is_dir = node.entry()?.is_none();
1204 1210 Ok(is_dir && node.descendants_with_entry_count() > 0)
1205 1211 } else {
1206 1212 Ok(false)
1207 1213 }
1208 1214 })
1209 1215 }
1210 1216
1211 1217 #[timed]
1212 1218 pub fn pack_v1(
1213 1219 &self,
1214 1220 parents: DirstateParents,
1215 1221 ) -> Result<Vec<u8>, DirstateError> {
1216 1222 let map = self.get_map();
1217 1223 // Optizimation (to be measured?): pre-compute size to avoid `Vec`
1218 1224 // reallocations
1219 1225 let mut size = parents.as_bytes().len();
1220 1226 for node in map.iter_nodes() {
1221 1227 let node = node?;
1222 1228 if node.entry()?.is_some() {
1223 1229 size += packed_entry_size(
1224 1230 node.full_path(map.on_disk)?,
1225 1231 node.copy_source(map.on_disk)?,
1226 1232 );
1227 1233 }
1228 1234 }
1229 1235
1230 1236 let mut packed = Vec::with_capacity(size);
1231 1237 packed.extend(parents.as_bytes());
1232 1238
1233 1239 for node in map.iter_nodes() {
1234 1240 let node = node?;
1235 1241 if let Some(entry) = node.entry()? {
1236 1242 pack_entry(
1237 1243 node.full_path(map.on_disk)?,
1238 1244 &entry,
1239 1245 node.copy_source(map.on_disk)?,
1240 1246 &mut packed,
1241 1247 );
1242 1248 }
1243 1249 }
1244 1250 Ok(packed)
1245 1251 }
1246 1252
1247 1253 /// Returns new data and metadata together with whether that data should be
1248 1254 /// appended to the existing data file whose content is at
1249 1255 /// `map.on_disk` (true), instead of written to a new data file
1250 1256 /// (false), and the previous size of data on disk.
1251 1257 #[timed]
1252 1258 pub fn pack_v2(
1253 1259 &self,
1254 can_append: bool,
1260 write_mode: DirstateMapWriteMode,
1255 1261 ) -> Result<(Vec<u8>, on_disk::TreeMetadata, bool, usize), DirstateError>
1256 1262 {
1257 1263 let map = self.get_map();
1258 on_disk::write(map, can_append)
1264 on_disk::write(map, write_mode)
1259 1265 }
1260 1266
1261 1267 /// `callback` allows the caller to process and do something with the
1262 1268 /// results of the status. This is needed to do so efficiently (i.e.
1263 1269 /// without cloning the `DirstateStatus` object with its paths) because
1264 1270 /// we need to borrow from `Self`.
1265 1271 pub fn with_status<R>(
1266 1272 &mut self,
1267 1273 matcher: &(dyn Matcher + Sync),
1268 1274 root_dir: PathBuf,
1269 1275 ignore_files: Vec<PathBuf>,
1270 1276 options: StatusOptions,
1271 1277 callback: impl for<'r> FnOnce(
1272 1278 Result<(DirstateStatus<'r>, Vec<PatternFileWarning>), StatusError>,
1273 1279 ) -> R,
1274 1280 ) -> R {
1275 1281 self.with_dmap_mut(|map| {
1276 1282 callback(super::status::status(
1277 1283 map,
1278 1284 matcher,
1279 1285 root_dir,
1280 1286 ignore_files,
1281 1287 options,
1282 1288 ))
1283 1289 })
1284 1290 }
1285 1291
1286 1292 pub fn copy_map_len(&self) -> usize {
1287 1293 let map = self.get_map();
1288 1294 map.nodes_with_copy_source_count as usize
1289 1295 }
1290 1296
1291 1297 pub fn copy_map_iter(&self) -> CopyMapIter<'_> {
1292 1298 let map = self.get_map();
1293 1299 Box::new(filter_map_results(map.iter_nodes(), move |node| {
1294 1300 Ok(if let Some(source) = node.copy_source(map.on_disk)? {
1295 1301 Some((node.full_path(map.on_disk)?, source))
1296 1302 } else {
1297 1303 None
1298 1304 })
1299 1305 }))
1300 1306 }
1301 1307
1302 1308 pub fn copy_map_contains_key(
1303 1309 &self,
1304 1310 key: &HgPath,
1305 1311 ) -> Result<bool, DirstateV2ParseError> {
1306 1312 let map = self.get_map();
1307 1313 Ok(if let Some(node) = map.get_node(key)? {
1308 1314 node.has_copy_source()
1309 1315 } else {
1310 1316 false
1311 1317 })
1312 1318 }
1313 1319
1314 1320 pub fn copy_map_get(
1315 1321 &self,
1316 1322 key: &HgPath,
1317 1323 ) -> Result<Option<&HgPath>, DirstateV2ParseError> {
1318 1324 let map = self.get_map();
1319 1325 if let Some(node) = map.get_node(key)? {
1320 1326 if let Some(source) = node.copy_source(map.on_disk)? {
1321 1327 return Ok(Some(source));
1322 1328 }
1323 1329 }
1324 1330 Ok(None)
1325 1331 }
1326 1332
1327 1333 pub fn copy_map_remove(
1328 1334 &mut self,
1329 1335 key: &HgPath,
1330 1336 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
1331 1337 self.with_dmap_mut(|map| {
1332 1338 let count = &mut map.nodes_with_copy_source_count;
1333 1339 let unreachable_bytes = &mut map.unreachable_bytes;
1334 1340 Ok(DirstateMap::get_node_mut_inner(
1335 1341 map.on_disk,
1336 1342 unreachable_bytes,
1337 1343 &mut map.root,
1338 1344 key,
1339 1345 |_ancestor| {},
1340 1346 )?
1341 1347 .and_then(|node| {
1342 1348 if let Some(source) = &node.copy_source {
1343 1349 *count = count
1344 1350 .checked_sub(1)
1345 1351 .expect("nodes_with_copy_source_count should be >= 0");
1346 1352 DirstateMap::count_dropped_path(unreachable_bytes, source);
1347 1353 }
1348 1354 node.copy_source.take().map(Cow::into_owned)
1349 1355 }))
1350 1356 })
1351 1357 }
1352 1358
1353 1359 pub fn copy_map_insert(
1354 1360 &mut self,
1355 1361 key: &HgPath,
1356 1362 value: &HgPath,
1357 1363 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
1358 1364 self.with_dmap_mut(|map| {
1359 1365 let node = map.get_or_insert_node(&key, |_ancestor| {})?;
1360 1366 let had_copy_source = node.copy_source.is_none();
1361 1367 let old = node
1362 1368 .copy_source
1363 1369 .replace(value.to_owned().into())
1364 1370 .map(Cow::into_owned);
1365 1371 if had_copy_source {
1366 1372 map.nodes_with_copy_source_count += 1
1367 1373 }
1368 1374 Ok(old)
1369 1375 })
1370 1376 }
1371 1377
1372 1378 pub fn len(&self) -> usize {
1373 1379 let map = self.get_map();
1374 1380 map.nodes_with_entry_count as usize
1375 1381 }
1376 1382
1377 1383 pub fn contains_key(
1378 1384 &self,
1379 1385 key: &HgPath,
1380 1386 ) -> Result<bool, DirstateV2ParseError> {
1381 1387 Ok(self.get(key)?.is_some())
1382 1388 }
1383 1389
1384 1390 pub fn get(
1385 1391 &self,
1386 1392 key: &HgPath,
1387 1393 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
1388 1394 let map = self.get_map();
1389 1395 Ok(if let Some(node) = map.get_node(key)? {
1390 1396 node.entry()?
1391 1397 } else {
1392 1398 None
1393 1399 })
1394 1400 }
1395 1401
1396 1402 pub fn iter(&self) -> StateMapIter<'_> {
1397 1403 let map = self.get_map();
1398 1404 Box::new(filter_map_results(map.iter_nodes(), move |node| {
1399 1405 Ok(if let Some(entry) = node.entry()? {
1400 1406 Some((node.full_path(map.on_disk)?, entry))
1401 1407 } else {
1402 1408 None
1403 1409 })
1404 1410 }))
1405 1411 }
1406 1412
1407 1413 pub fn iter_tracked_dirs(
1408 1414 &mut self,
1409 1415 ) -> Result<
1410 1416 Box<
1411 1417 dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>>
1412 1418 + Send
1413 1419 + '_,
1414 1420 >,
1415 1421 DirstateError,
1416 1422 > {
1417 1423 let map = self.get_map();
1418 1424 let on_disk = map.on_disk;
1419 1425 Ok(Box::new(filter_map_results(
1420 1426 map.iter_nodes(),
1421 1427 move |node| {
1422 1428 Ok(if node.tracked_descendants_count() > 0 {
1423 1429 Some(node.full_path(on_disk)?)
1424 1430 } else {
1425 1431 None
1426 1432 })
1427 1433 },
1428 1434 )))
1429 1435 }
1430 1436
1431 1437 /// Only public because it needs to be exposed to the Python layer.
1432 1438 /// It is not the full `setparents` logic, only the parts that mutate the
1433 1439 /// entries.
1434 1440 pub fn setparents_fixup(
1435 1441 &mut self,
1436 1442 ) -> Result<Vec<(HgPathBuf, HgPathBuf)>, DirstateV2ParseError> {
1437 1443 // XXX
1438 1444 // All the copying and re-querying is quite inefficient, but this is
1439 1445 // still a lot better than doing it from Python.
1440 1446 //
1441 1447 // The better solution is to develop a mechanism for `iter_mut`,
1442 1448 // which will be a lot more involved: we're dealing with a lazy,
1443 1449 // append-mostly, tree-like data structure. This will do for now.
1444 1450 let mut copies = vec![];
1445 1451 let mut files_with_p2_info = vec![];
1446 1452 for res in self.iter() {
1447 1453 let (path, entry) = res?;
1448 1454 if entry.p2_info() {
1449 1455 files_with_p2_info.push(path.to_owned())
1450 1456 }
1451 1457 }
1452 1458 self.with_dmap_mut(|map| {
1453 1459 for path in files_with_p2_info.iter() {
1454 1460 let node = map.get_or_insert_node(path, |_| {})?;
1455 1461 let entry =
1456 1462 node.data.as_entry_mut().expect("entry should exist");
1457 1463 entry.drop_merge_data();
1458 1464 if let Some(source) = node.copy_source.take().as_deref() {
1459 1465 copies.push((path.to_owned(), source.to_owned()));
1460 1466 }
1461 1467 }
1462 1468 Ok(copies)
1463 1469 })
1464 1470 }
1465 1471
1466 1472 pub fn debug_iter(
1467 1473 &self,
1468 1474 all: bool,
1469 1475 ) -> Box<
1470 1476 dyn Iterator<
1471 1477 Item = Result<
1472 1478 (&HgPath, (u8, i32, i32, i32)),
1473 1479 DirstateV2ParseError,
1474 1480 >,
1475 1481 > + Send
1476 1482 + '_,
1477 1483 > {
1478 1484 let map = self.get_map();
1479 1485 Box::new(filter_map_results(map.iter_nodes(), move |node| {
1480 1486 let debug_tuple = if let Some(entry) = node.entry()? {
1481 1487 entry.debug_tuple()
1482 1488 } else if !all {
1483 1489 return Ok(None);
1484 1490 } else if let Some(mtime) = node.cached_directory_mtime()? {
1485 1491 (b' ', 0, -1, mtime.truncated_seconds() as i32)
1486 1492 } else {
1487 1493 (b' ', 0, -1, -1)
1488 1494 };
1489 1495 Ok(Some((node.full_path(map.on_disk)?, debug_tuple)))
1490 1496 }))
1491 1497 }
1492 1498 }
1493 1499 #[cfg(test)]
1494 1500 mod tests {
1495 1501 use super::*;
1496 1502
1497 1503 /// Shortcut to return tracked descendants of a path.
1498 1504 /// Panics if the path does not exist.
1499 1505 fn tracked_descendants(map: &OwningDirstateMap, path: &[u8]) -> u32 {
1500 1506 let path = dbg!(HgPath::new(path));
1501 1507 let node = map.get_map().get_node(path);
1502 1508 node.unwrap().unwrap().tracked_descendants_count()
1503 1509 }
1504 1510
1505 1511 /// Shortcut to return descendants with an entry.
1506 1512 /// Panics if the path does not exist.
1507 1513 fn descendants_with_an_entry(map: &OwningDirstateMap, path: &[u8]) -> u32 {
1508 1514 let path = dbg!(HgPath::new(path));
1509 1515 let node = map.get_map().get_node(path);
1510 1516 node.unwrap().unwrap().descendants_with_entry_count()
1511 1517 }
1512 1518
1513 1519 fn assert_does_not_exist(map: &OwningDirstateMap, path: &[u8]) {
1514 1520 let path = dbg!(HgPath::new(path));
1515 1521 let node = map.get_map().get_node(path);
1516 1522 assert!(node.unwrap().is_none());
1517 1523 }
1518 1524
1519 1525 /// Shortcut for path creation in tests
1520 1526 fn p(b: &[u8]) -> &HgPath {
1521 1527 HgPath::new(b)
1522 1528 }
1523 1529
1524 1530 /// Test the very simple case a single tracked file
1525 1531 #[test]
1526 1532 fn test_tracked_descendants_simple() -> Result<(), DirstateError> {
1527 1533 let mut map = OwningDirstateMap::new_empty(vec![]);
1528 1534 assert_eq!(map.len(), 0);
1529 1535
1530 1536 map.set_tracked(p(b"some/nested/path"))?;
1531 1537
1532 1538 assert_eq!(map.len(), 1);
1533 1539 assert_eq!(tracked_descendants(&map, b"some"), 1);
1534 1540 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1535 1541 assert_eq!(tracked_descendants(&map, b"some/nested/path"), 0);
1536 1542
1537 1543 map.set_untracked(p(b"some/nested/path"))?;
1538 1544 assert_eq!(map.len(), 0);
1539 1545 assert!(map.get_map().get_node(p(b"some"))?.is_none());
1540 1546
1541 1547 Ok(())
1542 1548 }
1543 1549
1544 1550 /// Test the simple case of all tracked, but multiple files
1545 1551 #[test]
1546 1552 fn test_tracked_descendants_multiple() -> Result<(), DirstateError> {
1547 1553 let mut map = OwningDirstateMap::new_empty(vec![]);
1548 1554
1549 1555 map.set_tracked(p(b"some/nested/path"))?;
1550 1556 map.set_tracked(p(b"some/nested/file"))?;
1551 1557 // one layer without any files to test deletion cascade
1552 1558 map.set_tracked(p(b"some/other/nested/path"))?;
1553 1559 map.set_tracked(p(b"root_file"))?;
1554 1560 map.set_tracked(p(b"some/file"))?;
1555 1561 map.set_tracked(p(b"some/file2"))?;
1556 1562 map.set_tracked(p(b"some/file3"))?;
1557 1563
1558 1564 assert_eq!(map.len(), 7);
1559 1565 assert_eq!(tracked_descendants(&map, b"some"), 6);
1560 1566 assert_eq!(tracked_descendants(&map, b"some/nested"), 2);
1561 1567 assert_eq!(tracked_descendants(&map, b"some/other"), 1);
1562 1568 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1563 1569 assert_eq!(tracked_descendants(&map, b"some/nested/path"), 0);
1564 1570
1565 1571 map.set_untracked(p(b"some/nested/path"))?;
1566 1572 assert_eq!(map.len(), 6);
1567 1573 assert_eq!(tracked_descendants(&map, b"some"), 5);
1568 1574 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1569 1575 assert_eq!(tracked_descendants(&map, b"some/other"), 1);
1570 1576 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1571 1577
1572 1578 map.set_untracked(p(b"some/nested/file"))?;
1573 1579 assert_eq!(map.len(), 5);
1574 1580 assert_eq!(tracked_descendants(&map, b"some"), 4);
1575 1581 assert_eq!(tracked_descendants(&map, b"some/other"), 1);
1576 1582 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1577 1583 assert_does_not_exist(&map, b"some_nested");
1578 1584
1579 1585 map.set_untracked(p(b"some/other/nested/path"))?;
1580 1586 assert_eq!(map.len(), 4);
1581 1587 assert_eq!(tracked_descendants(&map, b"some"), 3);
1582 1588 assert_does_not_exist(&map, b"some/other");
1583 1589
1584 1590 map.set_untracked(p(b"root_file"))?;
1585 1591 assert_eq!(map.len(), 3);
1586 1592 assert_eq!(tracked_descendants(&map, b"some"), 3);
1587 1593 assert_does_not_exist(&map, b"root_file");
1588 1594
1589 1595 map.set_untracked(p(b"some/file"))?;
1590 1596 assert_eq!(map.len(), 2);
1591 1597 assert_eq!(tracked_descendants(&map, b"some"), 2);
1592 1598 assert_does_not_exist(&map, b"some/file");
1593 1599
1594 1600 map.set_untracked(p(b"some/file2"))?;
1595 1601 assert_eq!(map.len(), 1);
1596 1602 assert_eq!(tracked_descendants(&map, b"some"), 1);
1597 1603 assert_does_not_exist(&map, b"some/file2");
1598 1604
1599 1605 map.set_untracked(p(b"some/file3"))?;
1600 1606 assert_eq!(map.len(), 0);
1601 1607 assert_does_not_exist(&map, b"some/file3");
1602 1608
1603 1609 Ok(())
1604 1610 }
1605 1611
1606 1612 /// Check with a mix of tracked and non-tracked items
1607 1613 #[test]
1608 1614 fn test_tracked_descendants_different() -> Result<(), DirstateError> {
1609 1615 let mut map = OwningDirstateMap::new_empty(vec![]);
1610 1616
1611 1617 // A file that was just added
1612 1618 map.set_tracked(p(b"some/nested/path"))?;
1613 1619 // This has no information, the dirstate should ignore it
1614 1620 map.reset_state(p(b"some/file"), false, false, false, false, None)?;
1615 1621 assert_does_not_exist(&map, b"some/file");
1616 1622
1617 1623 // A file that was removed
1618 1624 map.reset_state(
1619 1625 p(b"some/nested/file"),
1620 1626 false,
1621 1627 true,
1622 1628 false,
1623 1629 false,
1624 1630 None,
1625 1631 )?;
1626 1632 assert!(!map.get(p(b"some/nested/file"))?.unwrap().tracked());
1627 1633 // Only present in p2
1628 1634 map.reset_state(p(b"some/file3"), false, false, true, false, None)?;
1629 1635 assert!(!map.get(p(b"some/file3"))?.unwrap().tracked());
1630 1636 // A file that was merged
1631 1637 map.reset_state(p(b"root_file"), true, true, true, false, None)?;
1632 1638 assert!(map.get(p(b"root_file"))?.unwrap().tracked());
1633 1639 // A file that is added, with info from p2
1634 1640 // XXX is that actually possible?
1635 1641 map.reset_state(p(b"some/file2"), true, false, true, false, None)?;
1636 1642 assert!(map.get(p(b"some/file2"))?.unwrap().tracked());
1637 1643 // A clean file
1638 1644 // One layer without any files to test deletion cascade
1639 1645 map.reset_state(
1640 1646 p(b"some/other/nested/path"),
1641 1647 true,
1642 1648 true,
1643 1649 false,
1644 1650 false,
1645 1651 None,
1646 1652 )?;
1647 1653 assert!(map.get(p(b"some/other/nested/path"))?.unwrap().tracked());
1648 1654
1649 1655 assert_eq!(map.len(), 6);
1650 1656 assert_eq!(tracked_descendants(&map, b"some"), 3);
1651 1657 assert_eq!(descendants_with_an_entry(&map, b"some"), 5);
1652 1658 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1653 1659 assert_eq!(descendants_with_an_entry(&map, b"some/other/nested"), 1);
1654 1660 assert_eq!(tracked_descendants(&map, b"some/other/nested/path"), 0);
1655 1661 assert_eq!(
1656 1662 descendants_with_an_entry(&map, b"some/other/nested/path"),
1657 1663 0
1658 1664 );
1659 1665 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1660 1666 assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 2);
1661 1667
1662 1668 // might as well check this
1663 1669 map.set_untracked(p(b"path/does/not/exist"))?;
1664 1670 assert_eq!(map.len(), 6);
1665 1671
1666 1672 map.set_untracked(p(b"some/other/nested/path"))?;
1667 1673 // It is set untracked but not deleted since it held other information
1668 1674 assert_eq!(map.len(), 6);
1669 1675 assert_eq!(tracked_descendants(&map, b"some"), 2);
1670 1676 assert_eq!(descendants_with_an_entry(&map, b"some"), 5);
1671 1677 assert_eq!(descendants_with_an_entry(&map, b"some/other"), 1);
1672 1678 assert_eq!(descendants_with_an_entry(&map, b"some/other/nested"), 1);
1673 1679 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1674 1680 assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 2);
1675 1681
1676 1682 map.set_untracked(p(b"some/nested/path"))?;
1677 1683 // It is set untracked *and* deleted since it was only added
1678 1684 assert_eq!(map.len(), 5);
1679 1685 assert_eq!(tracked_descendants(&map, b"some"), 1);
1680 1686 assert_eq!(descendants_with_an_entry(&map, b"some"), 4);
1681 1687 assert_eq!(tracked_descendants(&map, b"some/nested"), 0);
1682 1688 assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 1);
1683 1689 assert_does_not_exist(&map, b"some/nested/path");
1684 1690
1685 1691 map.set_untracked(p(b"root_file"))?;
1686 1692 // Untracked but not deleted
1687 1693 assert_eq!(map.len(), 5);
1688 1694 assert!(map.get(p(b"root_file"))?.is_some());
1689 1695
1690 1696 map.set_untracked(p(b"some/file2"))?;
1691 1697 assert_eq!(map.len(), 5);
1692 1698 assert_eq!(tracked_descendants(&map, b"some"), 0);
1693 1699 assert!(map.get(p(b"some/file2"))?.is_some());
1694 1700
1695 1701 map.set_untracked(p(b"some/file3"))?;
1696 1702 assert_eq!(map.len(), 5);
1697 1703 assert_eq!(tracked_descendants(&map, b"some"), 0);
1698 1704 assert!(map.get(p(b"some/file3"))?.is_some());
1699 1705
1700 1706 Ok(())
1701 1707 }
1702 1708
1703 1709 /// Check that copies counter is correctly updated
1704 1710 #[test]
1705 1711 fn test_copy_source() -> Result<(), DirstateError> {
1706 1712 let mut map = OwningDirstateMap::new_empty(vec![]);
1707 1713
1708 1714 // Clean file
1709 1715 map.reset_state(p(b"files/clean"), true, true, false, false, None)?;
1710 1716 // Merged file
1711 1717 map.reset_state(p(b"files/from_p2"), true, true, true, false, None)?;
1712 1718 // Removed file
1713 1719 map.reset_state(p(b"removed"), false, true, false, false, None)?;
1714 1720 // Added file
1715 1721 map.reset_state(p(b"files/added"), true, false, false, false, None)?;
1716 1722 // Add copy
1717 1723 map.copy_map_insert(p(b"files/clean"), p(b"clean_copy_source"))?;
1718 1724 assert_eq!(map.copy_map_len(), 1);
1719 1725
1720 1726 // Copy override
1721 1727 map.copy_map_insert(p(b"files/clean"), p(b"other_clean_copy_source"))?;
1722 1728 assert_eq!(map.copy_map_len(), 1);
1723 1729
1724 1730 // Multiple copies
1725 1731 map.copy_map_insert(p(b"removed"), p(b"removed_copy_source"))?;
1726 1732 assert_eq!(map.copy_map_len(), 2);
1727 1733
1728 1734 map.copy_map_insert(p(b"files/added"), p(b"added_copy_source"))?;
1729 1735 assert_eq!(map.copy_map_len(), 3);
1730 1736
1731 1737 // Added, so the entry is completely removed
1732 1738 map.set_untracked(p(b"files/added"))?;
1733 1739 assert_does_not_exist(&map, b"files/added");
1734 1740 assert_eq!(map.copy_map_len(), 2);
1735 1741
1736 1742 // Removed, so the entry is kept around, so is its copy
1737 1743 map.set_untracked(p(b"removed"))?;
1738 1744 assert!(map.get(p(b"removed"))?.is_some());
1739 1745 assert_eq!(map.copy_map_len(), 2);
1740 1746
1741 1747 // Clean, so the entry is kept around, but not its copy
1742 1748 map.set_untracked(p(b"files/clean"))?;
1743 1749 assert!(map.get(p(b"files/clean"))?.is_some());
1744 1750 assert_eq!(map.copy_map_len(), 1);
1745 1751
1746 1752 map.copy_map_insert(p(b"files/from_p2"), p(b"from_p2_copy_source"))?;
1747 1753 assert_eq!(map.copy_map_len(), 2);
1748 1754
1749 1755 // Info from p2, so its copy source info is kept around
1750 1756 map.set_untracked(p(b"files/from_p2"))?;
1751 1757 assert!(map.get(p(b"files/from_p2"))?.is_some());
1752 1758 assert_eq!(map.copy_map_len(), 2);
1753 1759
1754 1760 Ok(())
1755 1761 }
1756 1762
1757 1763 /// Test with "on disk" data. For the sake of this test, the "on disk" data
1758 1764 /// does not actually come from the disk, but it's opaque to the code being
1759 1765 /// tested.
1760 1766 #[test]
1761 1767 fn test_on_disk() -> Result<(), DirstateError> {
1762 1768 // First let's create some data to put "on disk"
1763 1769 let mut map = OwningDirstateMap::new_empty(vec![]);
1764 1770
1765 1771 // A file that was just added
1766 1772 map.set_tracked(p(b"some/nested/added"))?;
1767 1773 map.copy_map_insert(p(b"some/nested/added"), p(b"added_copy_source"))?;
1768 1774
1769 1775 // A file that was removed
1770 1776 map.reset_state(
1771 1777 p(b"some/nested/removed"),
1772 1778 false,
1773 1779 true,
1774 1780 false,
1775 1781 false,
1776 1782 None,
1777 1783 )?;
1778 1784 // Only present in p2
1779 1785 map.reset_state(
1780 1786 p(b"other/p2_info_only"),
1781 1787 false,
1782 1788 false,
1783 1789 true,
1784 1790 false,
1785 1791 None,
1786 1792 )?;
1787 1793 map.copy_map_insert(
1788 1794 p(b"other/p2_info_only"),
1789 1795 p(b"other/p2_info_copy_source"),
1790 1796 )?;
1791 1797 // A file that was merged
1792 1798 map.reset_state(p(b"merged"), true, true, true, false, None)?;
1793 1799 // A file that is added, with info from p2
1794 1800 // XXX is that actually possible?
1795 1801 map.reset_state(
1796 1802 p(b"other/added_with_p2"),
1797 1803 true,
1798 1804 false,
1799 1805 true,
1800 1806 false,
1801 1807 None,
1802 1808 )?;
1803 1809 // One layer without any files to test deletion cascade
1804 1810 // A clean file
1805 1811 map.reset_state(
1806 1812 p(b"some/other/nested/clean"),
1807 1813 true,
1808 1814 true,
1809 1815 false,
1810 1816 false,
1811 1817 None,
1812 1818 )?;
1813 1819
1814 1820 let (packed, metadata, _should_append, _old_data_size) =
1815 map.pack_v2(false)?;
1821 map.pack_v2(DirstateMapWriteMode::ForceNewDataFile)?;
1816 1822 let packed_len = packed.len();
1817 1823 assert!(packed_len > 0);
1818 1824
1819 1825 // Recreate "from disk"
1820 1826 let mut map = OwningDirstateMap::new_v2(
1821 1827 packed,
1822 1828 packed_len,
1823 1829 metadata.as_bytes(),
1824 1830 )?;
1825 1831
1826 1832 // Check that everything is accounted for
1827 1833 assert!(map.contains_key(p(b"some/nested/added"))?);
1828 1834 assert!(map.contains_key(p(b"some/nested/removed"))?);
1829 1835 assert!(map.contains_key(p(b"merged"))?);
1830 1836 assert!(map.contains_key(p(b"other/p2_info_only"))?);
1831 1837 assert!(map.contains_key(p(b"other/added_with_p2"))?);
1832 1838 assert!(map.contains_key(p(b"some/other/nested/clean"))?);
1833 1839 assert_eq!(
1834 1840 map.copy_map_get(p(b"some/nested/added"))?,
1835 1841 Some(p(b"added_copy_source"))
1836 1842 );
1837 1843 assert_eq!(
1838 1844 map.copy_map_get(p(b"other/p2_info_only"))?,
1839 1845 Some(p(b"other/p2_info_copy_source"))
1840 1846 );
1841 1847 assert_eq!(tracked_descendants(&map, b"some"), 2);
1842 1848 assert_eq!(descendants_with_an_entry(&map, b"some"), 3);
1843 1849 assert_eq!(tracked_descendants(&map, b"other"), 1);
1844 1850 assert_eq!(descendants_with_an_entry(&map, b"other"), 2);
1845 1851 assert_eq!(tracked_descendants(&map, b"some/other"), 1);
1846 1852 assert_eq!(descendants_with_an_entry(&map, b"some/other"), 1);
1847 1853 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1848 1854 assert_eq!(descendants_with_an_entry(&map, b"some/other/nested"), 1);
1849 1855 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1850 1856 assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 2);
1851 1857 assert_eq!(map.len(), 6);
1852 1858 assert_eq!(map.get_map().unreachable_bytes, 0);
1853 1859 assert_eq!(map.copy_map_len(), 2);
1854 1860
1855 1861 // Shouldn't change anything since it's already not tracked
1856 1862 map.set_untracked(p(b"some/nested/removed"))?;
1857 1863 assert_eq!(map.get_map().unreachable_bytes, 0);
1858 1864
1859 1865 match map.get_map().root {
1860 1866 ChildNodes::InMemory(_) => {
1861 1867 panic!("root should not have been mutated")
1862 1868 }
1863 1869 _ => (),
1864 1870 }
1865 1871 // We haven't mutated enough (nothing, actually), we should still be in
1866 1872 // the append strategy
1867 1873 assert!(map.get_map().write_should_append());
1868 1874
1869 1875 // But this mutates the structure, so there should be unreachable_bytes
1870 1876 assert!(map.set_untracked(p(b"some/nested/added"))?);
1871 1877 let unreachable_bytes = map.get_map().unreachable_bytes;
1872 1878 assert!(unreachable_bytes > 0);
1873 1879
1874 1880 match map.get_map().root {
1875 1881 ChildNodes::OnDisk(_) => panic!("root should have been mutated"),
1876 1882 _ => (),
1877 1883 }
1878 1884
1879 1885 // This should not mutate the structure either, since `root` has
1880 1886 // already been mutated along with its direct children.
1881 1887 map.set_untracked(p(b"merged"))?;
1882 1888 assert_eq!(map.get_map().unreachable_bytes, unreachable_bytes);
1883 1889
1884 1890 match map.get_map().get_node(p(b"other/added_with_p2"))?.unwrap() {
1885 1891 NodeRef::InMemory(_, _) => {
1886 1892 panic!("'other/added_with_p2' should not have been mutated")
1887 1893 }
1888 1894 _ => (),
1889 1895 }
1890 1896 // But this should, since it's in a different path
1891 1897 // than `<root>some/nested/add`
1892 1898 map.set_untracked(p(b"other/added_with_p2"))?;
1893 1899 assert!(map.get_map().unreachable_bytes > unreachable_bytes);
1894 1900
1895 1901 match map.get_map().get_node(p(b"other/added_with_p2"))?.unwrap() {
1896 1902 NodeRef::OnDisk(_) => {
1897 1903 panic!("'other/added_with_p2' should have been mutated")
1898 1904 }
1899 1905 _ => (),
1900 1906 }
1901 1907
1902 1908 // We have rewritten most of the tree, we should create a new file
1903 1909 assert!(!map.get_map().write_should_append());
1904 1910
1905 1911 Ok(())
1906 1912 }
1907 1913 }
@@ -1,883 +1,888 b''
1 1 //! The "version 2" disk representation of the dirstate
2 2 //!
3 3 //! See `mercurial/helptext/internals/dirstate-v2.txt`
4 4
5 5 use crate::dirstate::{DirstateV2Data, TruncatedTimestamp};
6 6 use crate::dirstate_tree::dirstate_map::DirstateVersion;
7 use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
7 use crate::dirstate_tree::dirstate_map::{
8 self, DirstateMap, DirstateMapWriteMode, NodeRef,
9 };
8 10 use crate::dirstate_tree::path_with_basename::WithBasename;
9 11 use crate::errors::HgError;
10 12 use crate::utils::hg_path::HgPath;
11 13 use crate::DirstateEntry;
12 14 use crate::DirstateError;
13 15 use crate::DirstateParents;
14 16 use bitflags::bitflags;
15 17 use bytes_cast::unaligned::{U16Be, U32Be};
16 18 use bytes_cast::BytesCast;
17 19 use format_bytes::format_bytes;
18 20 use rand::Rng;
19 21 use std::borrow::Cow;
20 22 use std::convert::{TryFrom, TryInto};
21 23 use std::fmt::Write;
22 24
23 25 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
24 26 /// This a redundant sanity check more than an actual "magic number" since
25 27 /// `.hg/requires` already governs which format should be used.
26 28 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
27 29
28 30 /// Keep space for 256-bit hashes
29 31 const STORED_NODE_ID_BYTES: usize = 32;
30 32
31 33 /// … even though only 160 bits are used for now, with SHA-1
32 34 const USED_NODE_ID_BYTES: usize = 20;
33 35
34 36 pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
35 37 pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
36 38
37 39 /// Must match constants of the same names in `mercurial/dirstateutils/v2.py`
38 40 const TREE_METADATA_SIZE: usize = 44;
39 41 const NODE_SIZE: usize = 44;
40 42
41 43 /// Make sure that size-affecting changes are made knowingly
42 44 #[allow(unused)]
43 45 fn static_assert_size_of() {
44 46 let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>;
45 47 let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>;
46 48 let _ = std::mem::transmute::<Node, [u8; NODE_SIZE]>;
47 49 }
48 50
49 51 // Must match `HEADER` in `mercurial/dirstateutils/docket.py`
50 52 #[derive(BytesCast)]
51 53 #[repr(C)]
52 54 struct DocketHeader {
53 55 marker: [u8; V2_FORMAT_MARKER.len()],
54 56 parent_1: [u8; STORED_NODE_ID_BYTES],
55 57 parent_2: [u8; STORED_NODE_ID_BYTES],
56 58
57 59 metadata: TreeMetadata,
58 60
59 61 /// Counted in bytes
60 62 data_size: Size,
61 63
62 64 uuid_size: u8,
63 65 }
64 66
65 67 pub struct Docket<'on_disk> {
66 68 header: &'on_disk DocketHeader,
67 69 pub uuid: &'on_disk [u8],
68 70 }
69 71
70 72 /// Fields are documented in the *Tree metadata in the docket file*
71 73 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
72 74 #[derive(BytesCast)]
73 75 #[repr(C)]
74 76 pub struct TreeMetadata {
75 77 root_nodes: ChildNodes,
76 78 nodes_with_entry_count: Size,
77 79 nodes_with_copy_source_count: Size,
78 80 unreachable_bytes: Size,
79 81 unused: [u8; 4],
80 82
81 83 /// See *Optional hash of ignore patterns* section of
82 84 /// `mercurial/helptext/internals/dirstate-v2.txt`
83 85 ignore_patterns_hash: IgnorePatternsHash,
84 86 }
85 87
86 88 /// Fields are documented in the *The data file format*
87 89 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
88 90 #[derive(BytesCast, Debug)]
89 91 #[repr(C)]
90 92 pub(super) struct Node {
91 93 full_path: PathSlice,
92 94
93 95 /// In bytes from `self.full_path.start`
94 96 base_name_start: PathSize,
95 97
96 98 copy_source: OptPathSlice,
97 99 children: ChildNodes,
98 100 pub(super) descendants_with_entry_count: Size,
99 101 pub(super) tracked_descendants_count: Size,
100 102 flags: U16Be,
101 103 size: U32Be,
102 104 mtime: PackedTruncatedTimestamp,
103 105 }
104 106
105 107 bitflags! {
106 108 #[repr(C)]
107 109 struct Flags: u16 {
108 110 const WDIR_TRACKED = 1 << 0;
109 111 const P1_TRACKED = 1 << 1;
110 112 const P2_INFO = 1 << 2;
111 113 const MODE_EXEC_PERM = 1 << 3;
112 114 const MODE_IS_SYMLINK = 1 << 4;
113 115 const HAS_FALLBACK_EXEC = 1 << 5;
114 116 const FALLBACK_EXEC = 1 << 6;
115 117 const HAS_FALLBACK_SYMLINK = 1 << 7;
116 118 const FALLBACK_SYMLINK = 1 << 8;
117 119 const EXPECTED_STATE_IS_MODIFIED = 1 << 9;
118 120 const HAS_MODE_AND_SIZE = 1 <<10;
119 121 const HAS_MTIME = 1 <<11;
120 122 const MTIME_SECOND_AMBIGUOUS = 1 << 12;
121 123 const DIRECTORY = 1 <<13;
122 124 const ALL_UNKNOWN_RECORDED = 1 <<14;
123 125 const ALL_IGNORED_RECORDED = 1 <<15;
124 126 }
125 127 }
126 128
127 129 /// Duration since the Unix epoch
128 130 #[derive(BytesCast, Copy, Clone, Debug)]
129 131 #[repr(C)]
130 132 struct PackedTruncatedTimestamp {
131 133 truncated_seconds: U32Be,
132 134 nanoseconds: U32Be,
133 135 }
134 136
135 137 /// Counted in bytes from the start of the file
136 138 ///
137 139 /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
138 140 type Offset = U32Be;
139 141
140 142 /// Counted in number of items
141 143 ///
142 144 /// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
143 145 type Size = U32Be;
144 146
145 147 /// Counted in bytes
146 148 ///
147 149 /// NOTE: we choose not to support file names/paths longer than 64 KiB.
148 150 type PathSize = U16Be;
149 151
150 152 /// A contiguous sequence of `len` times `Node`, representing the child nodes
151 153 /// of either some other node or of the repository root.
152 154 ///
153 155 /// Always sorted by ascending `full_path`, to allow binary search.
154 156 /// Since nodes with the same parent nodes also have the same parent path,
155 157 /// only the `base_name`s need to be compared during binary search.
156 158 #[derive(BytesCast, Copy, Clone, Debug)]
157 159 #[repr(C)]
158 160 struct ChildNodes {
159 161 start: Offset,
160 162 len: Size,
161 163 }
162 164
163 165 /// A `HgPath` of `len` bytes
164 166 #[derive(BytesCast, Copy, Clone, Debug)]
165 167 #[repr(C)]
166 168 struct PathSlice {
167 169 start: Offset,
168 170 len: PathSize,
169 171 }
170 172
171 173 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
172 174 type OptPathSlice = PathSlice;
173 175
174 176 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
175 177 ///
176 178 /// This should only happen if Mercurial is buggy or a repository is corrupted.
177 179 #[derive(Debug)]
178 180 pub struct DirstateV2ParseError {
179 181 message: String,
180 182 }
181 183
182 184 impl DirstateV2ParseError {
183 185 pub fn new<S: Into<String>>(message: S) -> Self {
184 186 Self {
185 187 message: message.into(),
186 188 }
187 189 }
188 190 }
189 191
190 192 impl From<DirstateV2ParseError> for HgError {
191 193 fn from(e: DirstateV2ParseError) -> Self {
192 194 HgError::corrupted(format!("dirstate-v2 parse error: {}", e.message))
193 195 }
194 196 }
195 197
196 198 impl From<DirstateV2ParseError> for crate::DirstateError {
197 199 fn from(error: DirstateV2ParseError) -> Self {
198 200 HgError::from(error).into()
199 201 }
200 202 }
201 203
202 204 impl TreeMetadata {
203 205 pub fn as_bytes(&self) -> &[u8] {
204 206 BytesCast::as_bytes(self)
205 207 }
206 208 }
207 209
208 210 impl<'on_disk> Docket<'on_disk> {
209 211 /// Generate the identifier for a new data file
210 212 ///
211 213 /// TODO: support the `HGTEST_UUIDFILE` environment variable.
212 214 /// See `mercurial/revlogutils/docket.py`
213 215 pub fn new_uid() -> String {
214 216 const ID_LENGTH: usize = 8;
215 217 let mut id = String::with_capacity(ID_LENGTH);
216 218 let mut rng = rand::thread_rng();
217 219 for _ in 0..ID_LENGTH {
218 220 // One random hexadecimal digit.
219 221 // `unwrap` never panics because `impl Write for String`
220 222 // never returns an error.
221 223 write!(&mut id, "{:x}", rng.gen_range(0..16)).unwrap();
222 224 }
223 225 id
224 226 }
225 227
226 228 pub fn serialize(
227 229 parents: DirstateParents,
228 230 tree_metadata: TreeMetadata,
229 231 data_size: u64,
230 232 uuid: &[u8],
231 233 ) -> Result<Vec<u8>, std::num::TryFromIntError> {
232 234 let header = DocketHeader {
233 235 marker: *V2_FORMAT_MARKER,
234 236 parent_1: parents.p1.pad_to_256_bits(),
235 237 parent_2: parents.p2.pad_to_256_bits(),
236 238 metadata: tree_metadata,
237 239 data_size: u32::try_from(data_size)?.into(),
238 240 uuid_size: uuid.len().try_into()?,
239 241 };
240 242 let header = header.as_bytes();
241 243 let mut docket = Vec::with_capacity(header.len() + uuid.len());
242 244 docket.extend_from_slice(header);
243 245 docket.extend_from_slice(uuid);
244 246 Ok(docket)
245 247 }
246 248
247 249 pub fn parents(&self) -> DirstateParents {
248 250 use crate::Node;
249 251 let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
250 252 .unwrap()
251 253 .clone();
252 254 let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
253 255 .unwrap()
254 256 .clone();
255 257 DirstateParents { p1, p2 }
256 258 }
257 259
258 260 pub fn tree_metadata(&self) -> &[u8] {
259 261 self.header.metadata.as_bytes()
260 262 }
261 263
262 264 pub fn data_size(&self) -> usize {
263 265 // This `unwrap` could only panic on a 16-bit CPU
264 266 self.header.data_size.get().try_into().unwrap()
265 267 }
266 268
267 269 pub fn data_filename(&self) -> String {
268 270 String::from_utf8(format_bytes!(b"dirstate.{}", self.uuid)).unwrap()
269 271 }
270 272 }
271 273
272 274 pub fn read_docket(
273 275 on_disk: &[u8],
274 276 ) -> Result<Docket<'_>, DirstateV2ParseError> {
275 277 let (header, uuid) = DocketHeader::from_bytes(on_disk).map_err(|e| {
276 278 DirstateV2ParseError::new(format!("when reading docket, {}", e))
277 279 })?;
278 280 let uuid_size = header.uuid_size as usize;
279 281 if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
280 282 Ok(Docket { header, uuid })
281 283 } else {
282 284 Err(DirstateV2ParseError::new(
283 285 "invalid format marker or uuid size",
284 286 ))
285 287 }
286 288 }
287 289
288 290 pub(super) fn read<'on_disk>(
289 291 on_disk: &'on_disk [u8],
290 292 metadata: &[u8],
291 293 ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
292 294 if on_disk.is_empty() {
293 295 let mut map = DirstateMap::empty(on_disk);
294 296 map.dirstate_version = DirstateVersion::V2;
295 297 return Ok(map);
296 298 }
297 299 let (meta, _) = TreeMetadata::from_bytes(metadata).map_err(|e| {
298 300 DirstateV2ParseError::new(format!("when parsing tree metadata, {}", e))
299 301 })?;
300 302 let dirstate_map = DirstateMap {
301 303 on_disk,
302 304 root: dirstate_map::ChildNodes::OnDisk(
303 305 read_nodes(on_disk, meta.root_nodes).map_err(|mut e| {
304 306 e.message = format!("{}, when reading root notes", e.message);
305 307 e
306 308 })?,
307 309 ),
308 310 nodes_with_entry_count: meta.nodes_with_entry_count.get(),
309 311 nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),
310 312 ignore_patterns_hash: meta.ignore_patterns_hash,
311 313 unreachable_bytes: meta.unreachable_bytes.get(),
312 314 old_data_size: on_disk.len(),
313 315 dirstate_version: DirstateVersion::V2,
314 316 };
315 317 Ok(dirstate_map)
316 318 }
317 319
318 320 impl Node {
319 321 pub(super) fn full_path<'on_disk>(
320 322 &self,
321 323 on_disk: &'on_disk [u8],
322 324 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
323 325 read_hg_path(on_disk, self.full_path)
324 326 }
325 327
326 328 pub(super) fn base_name_start<'on_disk>(
327 329 &self,
328 330 ) -> Result<usize, DirstateV2ParseError> {
329 331 let start = self.base_name_start.get();
330 332 if start < self.full_path.len.get() {
331 333 let start = usize::try_from(start)
332 334 // u32 -> usize, could only panic on a 16-bit CPU
333 335 .expect("dirstate-v2 base_name_start out of bounds");
334 336 Ok(start)
335 337 } else {
336 338 Err(DirstateV2ParseError::new("not enough bytes for base name"))
337 339 }
338 340 }
339 341
340 342 pub(super) fn base_name<'on_disk>(
341 343 &self,
342 344 on_disk: &'on_disk [u8],
343 345 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
344 346 let full_path = self.full_path(on_disk)?;
345 347 let base_name_start = self.base_name_start()?;
346 348 Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
347 349 }
348 350
349 351 pub(super) fn path<'on_disk>(
350 352 &self,
351 353 on_disk: &'on_disk [u8],
352 354 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
353 355 Ok(WithBasename::from_raw_parts(
354 356 Cow::Borrowed(self.full_path(on_disk)?),
355 357 self.base_name_start()?,
356 358 ))
357 359 }
358 360
359 361 pub(super) fn has_copy_source<'on_disk>(&self) -> bool {
360 362 self.copy_source.start.get() != 0
361 363 }
362 364
363 365 pub(super) fn copy_source<'on_disk>(
364 366 &self,
365 367 on_disk: &'on_disk [u8],
366 368 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
367 369 Ok(if self.has_copy_source() {
368 370 Some(read_hg_path(on_disk, self.copy_source)?)
369 371 } else {
370 372 None
371 373 })
372 374 }
373 375
374 376 fn flags(&self) -> Flags {
375 377 Flags::from_bits_truncate(self.flags.get())
376 378 }
377 379
378 380 fn has_entry(&self) -> bool {
379 381 self.flags().intersects(
380 382 Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO,
381 383 )
382 384 }
383 385
384 386 pub(super) fn node_data(
385 387 &self,
386 388 ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
387 389 if self.has_entry() {
388 390 Ok(dirstate_map::NodeData::Entry(self.assume_entry()?))
389 391 } else if let Some(mtime) = self.cached_directory_mtime()? {
390 392 Ok(dirstate_map::NodeData::CachedDirectory { mtime })
391 393 } else {
392 394 Ok(dirstate_map::NodeData::None)
393 395 }
394 396 }
395 397
396 398 pub(super) fn cached_directory_mtime(
397 399 &self,
398 400 ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
399 401 // For now we do not have code to handle the absence of
400 402 // ALL_UNKNOWN_RECORDED, so we ignore the mtime if the flag is
401 403 // unset.
402 404 if self.flags().contains(Flags::DIRECTORY)
403 405 && self.flags().contains(Flags::HAS_MTIME)
404 406 && self.flags().contains(Flags::ALL_UNKNOWN_RECORDED)
405 407 {
406 408 Ok(Some(self.mtime()?))
407 409 } else {
408 410 Ok(None)
409 411 }
410 412 }
411 413
412 414 fn synthesize_unix_mode(&self) -> u32 {
413 415 let file_type = if self.flags().contains(Flags::MODE_IS_SYMLINK) {
414 416 libc::S_IFLNK
415 417 } else {
416 418 libc::S_IFREG
417 419 };
418 420 let permisions = if self.flags().contains(Flags::MODE_EXEC_PERM) {
419 421 0o755
420 422 } else {
421 423 0o644
422 424 };
423 425 (file_type | permisions).into()
424 426 }
425 427
426 428 fn mtime(&self) -> Result<TruncatedTimestamp, DirstateV2ParseError> {
427 429 let mut m: TruncatedTimestamp = self.mtime.try_into()?;
428 430 if self.flags().contains(Flags::MTIME_SECOND_AMBIGUOUS) {
429 431 m.second_ambiguous = true;
430 432 }
431 433 Ok(m)
432 434 }
433 435
434 436 fn assume_entry(&self) -> Result<DirstateEntry, DirstateV2ParseError> {
435 437 // TODO: convert through raw bits instead?
436 438 let wc_tracked = self.flags().contains(Flags::WDIR_TRACKED);
437 439 let p1_tracked = self.flags().contains(Flags::P1_TRACKED);
438 440 let p2_info = self.flags().contains(Flags::P2_INFO);
439 441 let mode_size = if self.flags().contains(Flags::HAS_MODE_AND_SIZE)
440 442 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
441 443 {
442 444 Some((self.synthesize_unix_mode(), self.size.into()))
443 445 } else {
444 446 None
445 447 };
446 448 let mtime = if self.flags().contains(Flags::HAS_MTIME)
447 449 && !self.flags().contains(Flags::DIRECTORY)
448 450 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
449 451 {
450 452 Some(self.mtime()?)
451 453 } else {
452 454 None
453 455 };
454 456 let fallback_exec = if self.flags().contains(Flags::HAS_FALLBACK_EXEC)
455 457 {
456 458 Some(self.flags().contains(Flags::FALLBACK_EXEC))
457 459 } else {
458 460 None
459 461 };
460 462 let fallback_symlink =
461 463 if self.flags().contains(Flags::HAS_FALLBACK_SYMLINK) {
462 464 Some(self.flags().contains(Flags::FALLBACK_SYMLINK))
463 465 } else {
464 466 None
465 467 };
466 468 Ok(DirstateEntry::from_v2_data(DirstateV2Data {
467 469 wc_tracked,
468 470 p1_tracked,
469 471 p2_info,
470 472 mode_size,
471 473 mtime,
472 474 fallback_exec,
473 475 fallback_symlink,
474 476 }))
475 477 }
476 478
477 479 pub(super) fn entry(
478 480 &self,
479 481 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
480 482 if self.has_entry() {
481 483 Ok(Some(self.assume_entry()?))
482 484 } else {
483 485 Ok(None)
484 486 }
485 487 }
486 488
487 489 pub(super) fn children<'on_disk>(
488 490 &self,
489 491 on_disk: &'on_disk [u8],
490 492 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
491 493 read_nodes(on_disk, self.children)
492 494 }
493 495
494 496 pub(super) fn to_in_memory_node<'on_disk>(
495 497 &self,
496 498 on_disk: &'on_disk [u8],
497 499 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
498 500 Ok(dirstate_map::Node {
499 501 children: dirstate_map::ChildNodes::OnDisk(
500 502 self.children(on_disk)?,
501 503 ),
502 504 copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
503 505 data: self.node_data()?,
504 506 descendants_with_entry_count: self
505 507 .descendants_with_entry_count
506 508 .get(),
507 509 tracked_descendants_count: self.tracked_descendants_count.get(),
508 510 })
509 511 }
510 512
511 513 fn from_dirstate_entry(
512 514 entry: &DirstateEntry,
513 515 ) -> (Flags, U32Be, PackedTruncatedTimestamp) {
514 516 let DirstateV2Data {
515 517 wc_tracked,
516 518 p1_tracked,
517 519 p2_info,
518 520 mode_size: mode_size_opt,
519 521 mtime: mtime_opt,
520 522 fallback_exec,
521 523 fallback_symlink,
522 524 } = entry.v2_data();
523 525 // TODO: convert through raw flag bits instead?
524 526 let mut flags = Flags::empty();
525 527 flags.set(Flags::WDIR_TRACKED, wc_tracked);
526 528 flags.set(Flags::P1_TRACKED, p1_tracked);
527 529 flags.set(Flags::P2_INFO, p2_info);
528 530 let size = if let Some((m, s)) = mode_size_opt {
529 531 let exec_perm = m & (libc::S_IXUSR as u32) != 0;
530 532 let is_symlink = m & (libc::S_IFMT as u32) == libc::S_IFLNK as u32;
531 533 flags.set(Flags::MODE_EXEC_PERM, exec_perm);
532 534 flags.set(Flags::MODE_IS_SYMLINK, is_symlink);
533 535 flags.insert(Flags::HAS_MODE_AND_SIZE);
534 536 s.into()
535 537 } else {
536 538 0.into()
537 539 };
538 540 let mtime = if let Some(m) = mtime_opt {
539 541 flags.insert(Flags::HAS_MTIME);
540 542 if m.second_ambiguous {
541 543 flags.insert(Flags::MTIME_SECOND_AMBIGUOUS);
542 544 };
543 545 m.into()
544 546 } else {
545 547 PackedTruncatedTimestamp::null()
546 548 };
547 549 if let Some(f_exec) = fallback_exec {
548 550 flags.insert(Flags::HAS_FALLBACK_EXEC);
549 551 if f_exec {
550 552 flags.insert(Flags::FALLBACK_EXEC);
551 553 }
552 554 }
553 555 if let Some(f_symlink) = fallback_symlink {
554 556 flags.insert(Flags::HAS_FALLBACK_SYMLINK);
555 557 if f_symlink {
556 558 flags.insert(Flags::FALLBACK_SYMLINK);
557 559 }
558 560 }
559 561 (flags, size, mtime)
560 562 }
561 563 }
562 564
563 565 fn read_hg_path(
564 566 on_disk: &[u8],
565 567 slice: PathSlice,
566 568 ) -> Result<&HgPath, DirstateV2ParseError> {
567 569 read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
568 570 }
569 571
570 572 fn read_nodes(
571 573 on_disk: &[u8],
572 574 slice: ChildNodes,
573 575 ) -> Result<&[Node], DirstateV2ParseError> {
574 576 read_slice(on_disk, slice.start, slice.len.get())
575 577 }
576 578
577 579 fn read_slice<T, Len>(
578 580 on_disk: &[u8],
579 581 start: Offset,
580 582 len: Len,
581 583 ) -> Result<&[T], DirstateV2ParseError>
582 584 where
583 585 T: BytesCast,
584 586 Len: TryInto<usize>,
585 587 {
586 588 // Either `usize::MAX` would result in "out of bounds" error since a single
587 589 // `&[u8]` cannot occupy the entire addess space.
588 590 let start = start.get().try_into().unwrap_or(std::usize::MAX);
589 591 let len = len.try_into().unwrap_or(std::usize::MAX);
590 592 let bytes = match on_disk.get(start..) {
591 593 Some(bytes) => bytes,
592 594 None => {
593 595 return Err(DirstateV2ParseError::new(
594 596 "not enough bytes from disk",
595 597 ))
596 598 }
597 599 };
598 600 T::slice_from_bytes(bytes, len)
599 601 .map_err(|e| {
600 602 DirstateV2ParseError::new(format!("when reading a slice, {}", e))
601 603 })
602 604 .map(|(slice, _rest)| slice)
603 605 }
604 606
605 607 pub(crate) fn for_each_tracked_path<'on_disk>(
606 608 on_disk: &'on_disk [u8],
607 609 metadata: &[u8],
608 610 mut f: impl FnMut(&'on_disk HgPath),
609 611 ) -> Result<(), DirstateV2ParseError> {
610 612 let (meta, _) = TreeMetadata::from_bytes(metadata).map_err(|e| {
611 613 DirstateV2ParseError::new(format!("when parsing tree metadata, {}", e))
612 614 })?;
613 615 fn recur<'on_disk>(
614 616 on_disk: &'on_disk [u8],
615 617 nodes: ChildNodes,
616 618 f: &mut impl FnMut(&'on_disk HgPath),
617 619 ) -> Result<(), DirstateV2ParseError> {
618 620 for node in read_nodes(on_disk, nodes)? {
619 621 if let Some(entry) = node.entry()? {
620 622 if entry.tracked() {
621 623 f(node.full_path(on_disk)?)
622 624 }
623 625 }
624 626 recur(on_disk, node.children, f)?
625 627 }
626 628 Ok(())
627 629 }
628 630 recur(on_disk, meta.root_nodes, &mut f)
629 631 }
630 632
631 633 /// Returns new data and metadata, together with whether that data should be
632 634 /// appended to the existing data file whose content is at
633 635 /// `dirstate_map.on_disk` (true), instead of written to a new data file
634 636 /// (false), and the previous size of data on disk.
635 637 pub(super) fn write(
636 638 dirstate_map: &DirstateMap,
637 can_append: bool,
639 write_mode: DirstateMapWriteMode,
638 640 ) -> Result<(Vec<u8>, TreeMetadata, bool, usize), DirstateError> {
639 let append = can_append && dirstate_map.write_should_append();
641 let append = match write_mode {
642 DirstateMapWriteMode::Auto => dirstate_map.write_should_append(),
643 DirstateMapWriteMode::ForceNewDataFile => false,
644 };
640 645 if append {
641 646 log::trace!("appending to the dirstate data file");
642 647 } else {
643 648 log::trace!("creating new dirstate data file");
644 649 }
645 650
646 651 // This ignores the space for paths, and for nodes without an entry.
647 652 // TODO: better estimate? Skip the `Vec` and write to a file directly?
648 653 let size_guess = std::mem::size_of::<Node>()
649 654 * dirstate_map.nodes_with_entry_count as usize;
650 655
651 656 let mut writer = Writer {
652 657 dirstate_map,
653 658 append,
654 659 out: Vec::with_capacity(size_guess),
655 660 };
656 661
657 662 let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;
658 663
659 664 let unreachable_bytes = if append {
660 665 dirstate_map.unreachable_bytes
661 666 } else {
662 667 0
663 668 };
664 669 let meta = TreeMetadata {
665 670 root_nodes,
666 671 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
667 672 nodes_with_copy_source_count: dirstate_map
668 673 .nodes_with_copy_source_count
669 674 .into(),
670 675 unreachable_bytes: unreachable_bytes.into(),
671 676 unused: [0; 4],
672 677 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
673 678 };
674 679 Ok((writer.out, meta, append, dirstate_map.old_data_size))
675 680 }
676 681
677 682 struct Writer<'dmap, 'on_disk> {
678 683 dirstate_map: &'dmap DirstateMap<'on_disk>,
679 684 append: bool,
680 685 out: Vec<u8>,
681 686 }
682 687
683 688 impl Writer<'_, '_> {
684 689 fn write_nodes(
685 690 &mut self,
686 691 nodes: dirstate_map::ChildNodesRef,
687 692 ) -> Result<ChildNodes, DirstateError> {
688 693 // Reuse already-written nodes if possible
689 694 if self.append {
690 695 if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {
691 696 let start = self.on_disk_offset_of(nodes_slice).expect(
692 697 "dirstate-v2 OnDisk nodes not found within on_disk",
693 698 );
694 699 let len = child_nodes_len_from_usize(nodes_slice.len());
695 700 return Ok(ChildNodes { start, len });
696 701 }
697 702 }
698 703
699 704 // `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has
700 705 // undefined iteration order. Sort to enable binary search in the
701 706 // written file.
702 707 let nodes = nodes.sorted();
703 708 let nodes_len = nodes.len();
704 709
705 710 // First accumulate serialized nodes in a `Vec`
706 711 let mut on_disk_nodes = Vec::with_capacity(nodes_len);
707 712 for node in nodes {
708 713 let children =
709 714 self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;
710 715 let full_path = node.full_path(self.dirstate_map.on_disk)?;
711 716 let full_path = self.write_path(full_path.as_bytes());
712 717 let copy_source = if let Some(source) =
713 718 node.copy_source(self.dirstate_map.on_disk)?
714 719 {
715 720 self.write_path(source.as_bytes())
716 721 } else {
717 722 PathSlice {
718 723 start: 0.into(),
719 724 len: 0.into(),
720 725 }
721 726 };
722 727 on_disk_nodes.push(match node {
723 728 NodeRef::InMemory(path, node) => {
724 729 let (flags, size, mtime) = match &node.data {
725 730 dirstate_map::NodeData::Entry(entry) => {
726 731 Node::from_dirstate_entry(entry)
727 732 }
728 733 dirstate_map::NodeData::CachedDirectory { mtime } => {
729 734 // we currently never set a mtime if unknown file
730 735 // are present.
731 736 // So if we have a mtime for a directory, we know
732 737 // they are no unknown
733 738 // files and we
734 739 // blindly set ALL_UNKNOWN_RECORDED.
735 740 //
736 741 // We never set ALL_IGNORED_RECORDED since we
737 742 // don't track that case
738 743 // currently.
739 744 let mut flags = Flags::DIRECTORY
740 745 | Flags::HAS_MTIME
741 746 | Flags::ALL_UNKNOWN_RECORDED;
742 747 if mtime.second_ambiguous {
743 748 flags.insert(Flags::MTIME_SECOND_AMBIGUOUS)
744 749 }
745 750 (flags, 0.into(), (*mtime).into())
746 751 }
747 752 dirstate_map::NodeData::None => (
748 753 Flags::DIRECTORY,
749 754 0.into(),
750 755 PackedTruncatedTimestamp::null(),
751 756 ),
752 757 };
753 758 Node {
754 759 children,
755 760 copy_source,
756 761 full_path,
757 762 base_name_start: u16::try_from(path.base_name_start())
758 763 // Could only panic for paths over 64 KiB
759 764 .expect("dirstate-v2 path length overflow")
760 765 .into(),
761 766 descendants_with_entry_count: node
762 767 .descendants_with_entry_count
763 768 .into(),
764 769 tracked_descendants_count: node
765 770 .tracked_descendants_count
766 771 .into(),
767 772 flags: flags.bits().into(),
768 773 size,
769 774 mtime,
770 775 }
771 776 }
772 777 NodeRef::OnDisk(node) => Node {
773 778 children,
774 779 copy_source,
775 780 full_path,
776 781 ..*node
777 782 },
778 783 })
779 784 }
780 785 // … so we can write them contiguously, after writing everything else
781 786 // they refer to.
782 787 let start = self.current_offset();
783 788 let len = child_nodes_len_from_usize(nodes_len);
784 789 self.out.extend(on_disk_nodes.as_bytes());
785 790 Ok(ChildNodes { start, len })
786 791 }
787 792
788 793 /// If the given slice of items is within `on_disk`, returns its offset
789 794 /// from the start of `on_disk`.
790 795 fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>
791 796 where
792 797 T: BytesCast,
793 798 {
794 799 fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {
795 800 let start = slice.as_ptr() as usize;
796 801 let end = start + slice.len();
797 802 start..=end
798 803 }
799 804 let slice_addresses = address_range(slice.as_bytes());
800 805 let on_disk_addresses = address_range(self.dirstate_map.on_disk);
801 806 if on_disk_addresses.contains(slice_addresses.start())
802 807 && on_disk_addresses.contains(slice_addresses.end())
803 808 {
804 809 let offset = slice_addresses.start() - on_disk_addresses.start();
805 810 Some(offset_from_usize(offset))
806 811 } else {
807 812 None
808 813 }
809 814 }
810 815
811 816 fn current_offset(&mut self) -> Offset {
812 817 let mut offset = self.out.len();
813 818 if self.append {
814 819 offset += self.dirstate_map.on_disk.len()
815 820 }
816 821 offset_from_usize(offset)
817 822 }
818 823
819 824 fn write_path(&mut self, slice: &[u8]) -> PathSlice {
820 825 let len = path_len_from_usize(slice.len());
821 826 // Reuse an already-written path if possible
822 827 if self.append {
823 828 if let Some(start) = self.on_disk_offset_of(slice) {
824 829 return PathSlice { start, len };
825 830 }
826 831 }
827 832 let start = self.current_offset();
828 833 self.out.extend(slice.as_bytes());
829 834 PathSlice { start, len }
830 835 }
831 836 }
832 837
833 838 fn offset_from_usize(x: usize) -> Offset {
834 839 u32::try_from(x)
835 840 // Could only panic for a dirstate file larger than 4 GiB
836 841 .expect("dirstate-v2 offset overflow")
837 842 .into()
838 843 }
839 844
840 845 fn child_nodes_len_from_usize(x: usize) -> Size {
841 846 u32::try_from(x)
842 847 // Could only panic with over 4 billion nodes
843 848 .expect("dirstate-v2 slice length overflow")
844 849 .into()
845 850 }
846 851
847 852 fn path_len_from_usize(x: usize) -> PathSize {
848 853 u16::try_from(x)
849 854 // Could only panic for paths over 64 KiB
850 855 .expect("dirstate-v2 path length overflow")
851 856 .into()
852 857 }
853 858
854 859 impl From<TruncatedTimestamp> for PackedTruncatedTimestamp {
855 860 fn from(timestamp: TruncatedTimestamp) -> Self {
856 861 Self {
857 862 truncated_seconds: timestamp.truncated_seconds().into(),
858 863 nanoseconds: timestamp.nanoseconds().into(),
859 864 }
860 865 }
861 866 }
862 867
863 868 impl TryFrom<PackedTruncatedTimestamp> for TruncatedTimestamp {
864 869 type Error = DirstateV2ParseError;
865 870
866 871 fn try_from(
867 872 timestamp: PackedTruncatedTimestamp,
868 873 ) -> Result<Self, Self::Error> {
869 874 Self::from_already_truncated(
870 875 timestamp.truncated_seconds.get(),
871 876 timestamp.nanoseconds.get(),
872 877 false,
873 878 )
874 879 }
875 880 }
876 881 impl PackedTruncatedTimestamp {
877 882 fn null() -> Self {
878 883 Self {
879 884 truncated_seconds: 0.into(),
880 885 nanoseconds: 0.into(),
881 886 }
882 887 }
883 888 }
@@ -1,577 +1,582 b''
1 1 use crate::changelog::Changelog;
2 2 use crate::config::{Config, ConfigError, ConfigParseError};
3 3 use crate::dirstate::DirstateParents;
4 use crate::dirstate_tree::dirstate_map::DirstateMapWriteMode;
4 5 use crate::dirstate_tree::on_disk::Docket as DirstateDocket;
5 6 use crate::dirstate_tree::owning::OwningDirstateMap;
6 7 use crate::errors::HgResultExt;
7 8 use crate::errors::{HgError, IoResultExt};
8 9 use crate::lock::{try_with_lock_no_wait, LockError};
9 10 use crate::manifest::{Manifest, Manifestlog};
10 11 use crate::revlog::filelog::Filelog;
11 12 use crate::revlog::revlog::RevlogError;
12 13 use crate::utils::files::get_path_from_bytes;
13 14 use crate::utils::hg_path::HgPath;
14 15 use crate::utils::SliceExt;
15 16 use crate::vfs::{is_dir, is_file, Vfs};
16 17 use crate::{requirements, NodePrefix};
17 18 use crate::{DirstateError, Revision};
18 19 use std::cell::{Ref, RefCell, RefMut};
19 20 use std::collections::HashSet;
20 21 use std::io::Seek;
21 22 use std::io::SeekFrom;
22 23 use std::io::Write as IoWrite;
23 24 use std::path::{Path, PathBuf};
24 25
25 26 /// A repository on disk
26 27 pub struct Repo {
27 28 working_directory: PathBuf,
28 29 dot_hg: PathBuf,
29 30 store: PathBuf,
30 31 requirements: HashSet<String>,
31 32 config: Config,
32 33 dirstate_parents: LazyCell<DirstateParents>,
33 34 dirstate_data_file_uuid: LazyCell<Option<Vec<u8>>>,
34 35 dirstate_map: LazyCell<OwningDirstateMap>,
35 36 changelog: LazyCell<Changelog>,
36 37 manifestlog: LazyCell<Manifestlog>,
37 38 }
38 39
39 40 #[derive(Debug, derive_more::From)]
40 41 pub enum RepoError {
41 42 NotFound {
42 43 at: PathBuf,
43 44 },
44 45 #[from]
45 46 ConfigParseError(ConfigParseError),
46 47 #[from]
47 48 Other(HgError),
48 49 }
49 50
50 51 impl From<ConfigError> for RepoError {
51 52 fn from(error: ConfigError) -> Self {
52 53 match error {
53 54 ConfigError::Parse(error) => error.into(),
54 55 ConfigError::Other(error) => error.into(),
55 56 }
56 57 }
57 58 }
58 59
59 60 impl Repo {
60 61 /// tries to find nearest repository root in current working directory or
61 62 /// its ancestors
62 63 pub fn find_repo_root() -> Result<PathBuf, RepoError> {
63 64 let current_directory = crate::utils::current_dir()?;
64 65 // ancestors() is inclusive: it first yields `current_directory`
65 66 // as-is.
66 67 for ancestor in current_directory.ancestors() {
67 68 if is_dir(ancestor.join(".hg"))? {
68 69 return Ok(ancestor.to_path_buf());
69 70 }
70 71 }
71 72 return Err(RepoError::NotFound {
72 73 at: current_directory,
73 74 });
74 75 }
75 76
76 77 /// Find a repository, either at the given path (which must contain a `.hg`
77 78 /// sub-directory) or by searching the current directory and its
78 79 /// ancestors.
79 80 ///
80 81 /// A method with two very different "modes" like this usually a code smell
81 82 /// to make two methods instead, but in this case an `Option` is what rhg
82 83 /// sub-commands get from Clap for the `-R` / `--repository` CLI argument.
83 84 /// Having two methods would just move that `if` to almost all callers.
84 85 pub fn find(
85 86 config: &Config,
86 87 explicit_path: Option<PathBuf>,
87 88 ) -> Result<Self, RepoError> {
88 89 if let Some(root) = explicit_path {
89 90 if is_dir(root.join(".hg"))? {
90 91 Self::new_at_path(root.to_owned(), config)
91 92 } else if is_file(&root)? {
92 93 Err(HgError::unsupported("bundle repository").into())
93 94 } else {
94 95 Err(RepoError::NotFound {
95 96 at: root.to_owned(),
96 97 })
97 98 }
98 99 } else {
99 100 let root = Self::find_repo_root()?;
100 101 Self::new_at_path(root, config)
101 102 }
102 103 }
103 104
104 105 /// To be called after checking that `.hg` is a sub-directory
105 106 fn new_at_path(
106 107 working_directory: PathBuf,
107 108 config: &Config,
108 109 ) -> Result<Self, RepoError> {
109 110 let dot_hg = working_directory.join(".hg");
110 111
111 112 let mut repo_config_files = Vec::new();
112 113 repo_config_files.push(dot_hg.join("hgrc"));
113 114 repo_config_files.push(dot_hg.join("hgrc-not-shared"));
114 115
115 116 let hg_vfs = Vfs { base: &dot_hg };
116 117 let mut reqs = requirements::load_if_exists(hg_vfs)?;
117 118 let relative =
118 119 reqs.contains(requirements::RELATIVE_SHARED_REQUIREMENT);
119 120 let shared =
120 121 reqs.contains(requirements::SHARED_REQUIREMENT) || relative;
121 122
122 123 // From `mercurial/localrepo.py`:
123 124 //
124 125 // if .hg/requires contains the sharesafe requirement, it means
125 126 // there exists a `.hg/store/requires` too and we should read it
126 127 // NOTE: presence of SHARESAFE_REQUIREMENT imply that store requirement
127 128 // is present. We never write SHARESAFE_REQUIREMENT for a repo if store
128 129 // is not present, refer checkrequirementscompat() for that
129 130 //
130 131 // However, if SHARESAFE_REQUIREMENT is not present, it means that the
131 132 // repository was shared the old way. We check the share source
132 133 // .hg/requires for SHARESAFE_REQUIREMENT to detect whether the
133 134 // current repository needs to be reshared
134 135 let share_safe = reqs.contains(requirements::SHARESAFE_REQUIREMENT);
135 136
136 137 let store_path;
137 138 if !shared {
138 139 store_path = dot_hg.join("store");
139 140 } else {
140 141 let bytes = hg_vfs.read("sharedpath")?;
141 142 let mut shared_path =
142 143 get_path_from_bytes(bytes.trim_end_matches(|b| b == b'\n'))
143 144 .to_owned();
144 145 if relative {
145 146 shared_path = dot_hg.join(shared_path)
146 147 }
147 148 if !is_dir(&shared_path)? {
148 149 return Err(HgError::corrupted(format!(
149 150 ".hg/sharedpath points to nonexistent directory {}",
150 151 shared_path.display()
151 152 ))
152 153 .into());
153 154 }
154 155
155 156 store_path = shared_path.join("store");
156 157
157 158 let source_is_share_safe =
158 159 requirements::load(Vfs { base: &shared_path })?
159 160 .contains(requirements::SHARESAFE_REQUIREMENT);
160 161
161 162 if share_safe != source_is_share_safe {
162 163 return Err(HgError::unsupported("share-safe mismatch").into());
163 164 }
164 165
165 166 if share_safe {
166 167 repo_config_files.insert(0, shared_path.join("hgrc"))
167 168 }
168 169 }
169 170 if share_safe {
170 171 reqs.extend(requirements::load(Vfs { base: &store_path })?);
171 172 }
172 173
173 174 let repo_config = if std::env::var_os("HGRCSKIPREPO").is_none() {
174 175 config.combine_with_repo(&repo_config_files)?
175 176 } else {
176 177 config.clone()
177 178 };
178 179
179 180 let repo = Self {
180 181 requirements: reqs,
181 182 working_directory,
182 183 store: store_path,
183 184 dot_hg,
184 185 config: repo_config,
185 186 dirstate_parents: LazyCell::new(),
186 187 dirstate_data_file_uuid: LazyCell::new(),
187 188 dirstate_map: LazyCell::new(),
188 189 changelog: LazyCell::new(),
189 190 manifestlog: LazyCell::new(),
190 191 };
191 192
192 193 requirements::check(&repo)?;
193 194
194 195 Ok(repo)
195 196 }
196 197
197 198 pub fn working_directory_path(&self) -> &Path {
198 199 &self.working_directory
199 200 }
200 201
201 202 pub fn requirements(&self) -> &HashSet<String> {
202 203 &self.requirements
203 204 }
204 205
205 206 pub fn config(&self) -> &Config {
206 207 &self.config
207 208 }
208 209
209 210 /// For accessing repository files (in `.hg`), except for the store
210 211 /// (`.hg/store`).
211 212 pub fn hg_vfs(&self) -> Vfs<'_> {
212 213 Vfs { base: &self.dot_hg }
213 214 }
214 215
215 216 /// For accessing repository store files (in `.hg/store`)
216 217 pub fn store_vfs(&self) -> Vfs<'_> {
217 218 Vfs { base: &self.store }
218 219 }
219 220
220 221 /// For accessing the working copy
221 222 pub fn working_directory_vfs(&self) -> Vfs<'_> {
222 223 Vfs {
223 224 base: &self.working_directory,
224 225 }
225 226 }
226 227
227 228 pub fn try_with_wlock_no_wait<R>(
228 229 &self,
229 230 f: impl FnOnce() -> R,
230 231 ) -> Result<R, LockError> {
231 232 try_with_lock_no_wait(self.hg_vfs(), "wlock", f)
232 233 }
233 234
234 235 pub fn has_dirstate_v2(&self) -> bool {
235 236 self.requirements
236 237 .contains(requirements::DIRSTATE_V2_REQUIREMENT)
237 238 }
238 239
239 240 pub fn has_sparse(&self) -> bool {
240 241 self.requirements.contains(requirements::SPARSE_REQUIREMENT)
241 242 }
242 243
243 244 pub fn has_narrow(&self) -> bool {
244 245 self.requirements.contains(requirements::NARROW_REQUIREMENT)
245 246 }
246 247
247 248 pub fn has_nodemap(&self) -> bool {
248 249 self.requirements
249 250 .contains(requirements::NODEMAP_REQUIREMENT)
250 251 }
251 252
252 253 fn dirstate_file_contents(&self) -> Result<Vec<u8>, HgError> {
253 254 Ok(self
254 255 .hg_vfs()
255 256 .read("dirstate")
256 257 .io_not_found_as_none()?
257 258 .unwrap_or(Vec::new()))
258 259 }
259 260
260 261 pub fn dirstate_parents(&self) -> Result<DirstateParents, HgError> {
261 262 Ok(*self
262 263 .dirstate_parents
263 264 .get_or_init(|| self.read_dirstate_parents())?)
264 265 }
265 266
266 267 fn read_dirstate_parents(&self) -> Result<DirstateParents, HgError> {
267 268 let dirstate = self.dirstate_file_contents()?;
268 269 let parents = if dirstate.is_empty() {
269 270 if self.has_dirstate_v2() {
270 271 self.dirstate_data_file_uuid.set(None);
271 272 }
272 273 DirstateParents::NULL
273 274 } else if self.has_dirstate_v2() {
274 275 let docket =
275 276 crate::dirstate_tree::on_disk::read_docket(&dirstate)?;
276 277 self.dirstate_data_file_uuid
277 278 .set(Some(docket.uuid.to_owned()));
278 279 docket.parents()
279 280 } else {
280 281 crate::dirstate::parsers::parse_dirstate_parents(&dirstate)?
281 282 .clone()
282 283 };
283 284 self.dirstate_parents.set(parents);
284 285 Ok(parents)
285 286 }
286 287
287 288 fn read_dirstate_data_file_uuid(
288 289 &self,
289 290 ) -> Result<Option<Vec<u8>>, HgError> {
290 291 assert!(
291 292 self.has_dirstate_v2(),
292 293 "accessing dirstate data file ID without dirstate-v2"
293 294 );
294 295 let dirstate = self.dirstate_file_contents()?;
295 296 if dirstate.is_empty() {
296 297 self.dirstate_parents.set(DirstateParents::NULL);
297 298 Ok(None)
298 299 } else {
299 300 let docket =
300 301 crate::dirstate_tree::on_disk::read_docket(&dirstate)?;
301 302 self.dirstate_parents.set(docket.parents());
302 303 Ok(Some(docket.uuid.to_owned()))
303 304 }
304 305 }
305 306
306 307 fn new_dirstate_map(&self) -> Result<OwningDirstateMap, DirstateError> {
307 308 let dirstate_file_contents = self.dirstate_file_contents()?;
308 309 if dirstate_file_contents.is_empty() {
309 310 self.dirstate_parents.set(DirstateParents::NULL);
310 311 if self.has_dirstate_v2() {
311 312 self.dirstate_data_file_uuid.set(None);
312 313 }
313 314 Ok(OwningDirstateMap::new_empty(Vec::new()))
314 315 } else if self.has_dirstate_v2() {
315 316 let docket = crate::dirstate_tree::on_disk::read_docket(
316 317 &dirstate_file_contents,
317 318 )?;
318 319 self.dirstate_parents.set(docket.parents());
319 320 self.dirstate_data_file_uuid
320 321 .set(Some(docket.uuid.to_owned()));
321 322 let data_size = docket.data_size();
322 323 let metadata = docket.tree_metadata();
323 324 if crate::vfs::is_on_nfs_mount(docket.data_filename()) {
324 325 // Don't mmap on NFS to prevent `SIGBUS` error on deletion
325 326 OwningDirstateMap::new_v2(
326 327 self.hg_vfs().read(docket.data_filename())?,
327 328 data_size,
328 329 metadata,
329 330 )
330 331 } else if let Some(data_mmap) = self
331 332 .hg_vfs()
332 333 .mmap_open(docket.data_filename())
333 334 .io_not_found_as_none()?
334 335 {
335 336 OwningDirstateMap::new_v2(data_mmap, data_size, metadata)
336 337 } else {
337 338 OwningDirstateMap::new_v2(Vec::new(), data_size, metadata)
338 339 }
339 340 } else {
340 341 let (map, parents) =
341 342 OwningDirstateMap::new_v1(dirstate_file_contents)?;
342 343 self.dirstate_parents.set(parents);
343 344 Ok(map)
344 345 }
345 346 }
346 347
347 348 pub fn dirstate_map(
348 349 &self,
349 350 ) -> Result<Ref<OwningDirstateMap>, DirstateError> {
350 351 self.dirstate_map.get_or_init(|| self.new_dirstate_map())
351 352 }
352 353
353 354 pub fn dirstate_map_mut(
354 355 &self,
355 356 ) -> Result<RefMut<OwningDirstateMap>, DirstateError> {
356 357 self.dirstate_map
357 358 .get_mut_or_init(|| self.new_dirstate_map())
358 359 }
359 360
360 361 fn new_changelog(&self) -> Result<Changelog, HgError> {
361 362 Changelog::open(&self.store_vfs(), self.has_nodemap())
362 363 }
363 364
364 365 pub fn changelog(&self) -> Result<Ref<Changelog>, HgError> {
365 366 self.changelog.get_or_init(|| self.new_changelog())
366 367 }
367 368
368 369 pub fn changelog_mut(&self) -> Result<RefMut<Changelog>, HgError> {
369 370 self.changelog.get_mut_or_init(|| self.new_changelog())
370 371 }
371 372
372 373 fn new_manifestlog(&self) -> Result<Manifestlog, HgError> {
373 374 Manifestlog::open(&self.store_vfs(), self.has_nodemap())
374 375 }
375 376
376 377 pub fn manifestlog(&self) -> Result<Ref<Manifestlog>, HgError> {
377 378 self.manifestlog.get_or_init(|| self.new_manifestlog())
378 379 }
379 380
380 381 pub fn manifestlog_mut(&self) -> Result<RefMut<Manifestlog>, HgError> {
381 382 self.manifestlog.get_mut_or_init(|| self.new_manifestlog())
382 383 }
383 384
384 385 /// Returns the manifest of the *changeset* with the given node ID
385 386 pub fn manifest_for_node(
386 387 &self,
387 388 node: impl Into<NodePrefix>,
388 389 ) -> Result<Manifest, RevlogError> {
389 390 self.manifestlog()?.data_for_node(
390 391 self.changelog()?
391 392 .data_for_node(node.into())?
392 393 .manifest_node()?
393 394 .into(),
394 395 )
395 396 }
396 397
397 398 /// Returns the manifest of the *changeset* with the given revision number
398 399 pub fn manifest_for_rev(
399 400 &self,
400 401 revision: Revision,
401 402 ) -> Result<Manifest, RevlogError> {
402 403 self.manifestlog()?.data_for_node(
403 404 self.changelog()?
404 405 .data_for_rev(revision)?
405 406 .manifest_node()?
406 407 .into(),
407 408 )
408 409 }
409 410
410 411 pub fn has_subrepos(&self) -> Result<bool, DirstateError> {
411 412 if let Some(entry) = self.dirstate_map()?.get(HgPath::new(".hgsub"))? {
412 413 Ok(entry.tracked())
413 414 } else {
414 415 Ok(false)
415 416 }
416 417 }
417 418
418 419 pub fn filelog(&self, path: &HgPath) -> Result<Filelog, HgError> {
419 420 Filelog::open(self, path)
420 421 }
421 422
422 423 /// Write to disk any updates that were made through `dirstate_map_mut`.
423 424 ///
424 425 /// The "wlock" must be held while calling this.
425 426 /// See for example `try_with_wlock_no_wait`.
426 427 ///
427 428 /// TODO: have a `WritableRepo` type only accessible while holding the
428 429 /// lock?
429 430 pub fn write_dirstate(&self) -> Result<(), DirstateError> {
430 431 let map = self.dirstate_map()?;
431 432 // TODO: Maintain a `DirstateMap::dirty` flag, and return early here if
432 433 // it’s unset
433 434 let parents = self.dirstate_parents()?;
434 435 let (packed_dirstate, old_uuid_to_remove) = if self.has_dirstate_v2() {
435 436 let uuid_opt = self
436 437 .dirstate_data_file_uuid
437 438 .get_or_init(|| self.read_dirstate_data_file_uuid())?;
438 439 let uuid_opt = uuid_opt.as_ref();
439 let can_append = uuid_opt.is_some();
440 let write_mode = if uuid_opt.is_some() {
441 DirstateMapWriteMode::Auto
442 } else {
443 DirstateMapWriteMode::ForceNewDataFile
444 };
440 445 let (data, tree_metadata, append, old_data_size) =
441 map.pack_v2(can_append)?;
446 map.pack_v2(write_mode)?;
442 447
443 448 // Reuse the uuid, or generate a new one, keeping the old for
444 449 // deletion.
445 450 let (uuid, old_uuid) = match uuid_opt {
446 451 Some(uuid) => {
447 452 let as_str = std::str::from_utf8(uuid)
448 453 .map_err(|_| {
449 454 HgError::corrupted(
450 455 "non-UTF-8 dirstate data file ID",
451 456 )
452 457 })?
453 458 .to_owned();
454 459 if append {
455 460 (as_str, None)
456 461 } else {
457 462 (DirstateDocket::new_uid(), Some(as_str))
458 463 }
459 464 }
460 465 None => (DirstateDocket::new_uid(), None),
461 466 };
462 467
463 468 let data_filename = format!("dirstate.{}", uuid);
464 469 let data_filename = self.hg_vfs().join(data_filename);
465 470 let mut options = std::fs::OpenOptions::new();
466 471 options.write(true);
467 472
468 473 // Why are we not using the O_APPEND flag when appending?
469 474 //
470 475 // - O_APPEND makes it trickier to deal with garbage at the end of
471 476 // the file, left by a previous uncommitted transaction. By
472 477 // starting the write at [old_data_size] we make sure we erase
473 478 // all such garbage.
474 479 //
475 480 // - O_APPEND requires to special-case 0-byte writes, whereas we
476 481 // don't need that.
477 482 //
478 483 // - Some OSes have bugs in implementation O_APPEND:
479 484 // revlog.py talks about a Solaris bug, but we also saw some ZFS
480 485 // bug: https://github.com/openzfs/zfs/pull/3124,
481 486 // https://github.com/openzfs/zfs/issues/13370
482 487 //
483 488 if !append {
484 489 log::trace!("creating a new dirstate data file");
485 490 options.create_new(true);
486 491 } else {
487 492 log::trace!("appending to the dirstate data file");
488 493 }
489 494
490 495 let data_size = (|| {
491 496 // TODO: loop and try another random ID if !append and this
492 497 // returns `ErrorKind::AlreadyExists`? Collision chance of two
493 498 // random IDs is one in 2**32
494 499 let mut file = options.open(&data_filename)?;
495 500 if append {
496 501 file.seek(SeekFrom::Start(old_data_size as u64))?;
497 502 }
498 503 file.write_all(&data)?;
499 504 file.flush()?;
500 505 file.seek(SeekFrom::Current(0))
501 506 })()
502 507 .when_writing_file(&data_filename)?;
503 508
504 509 let packed_dirstate = DirstateDocket::serialize(
505 510 parents,
506 511 tree_metadata,
507 512 data_size,
508 513 uuid.as_bytes(),
509 514 )
510 515 .map_err(|_: std::num::TryFromIntError| {
511 516 HgError::corrupted("overflow in dirstate docket serialization")
512 517 })?;
513 518
514 519 (packed_dirstate, old_uuid)
515 520 } else {
516 521 (map.pack_v1(parents)?, None)
517 522 };
518 523
519 524 let vfs = self.hg_vfs();
520 525 vfs.atomic_write("dirstate", &packed_dirstate)?;
521 526 if let Some(uuid) = old_uuid_to_remove {
522 527 // Remove the old data file after the new docket pointing to the
523 528 // new data file was written.
524 529 vfs.remove_file(format!("dirstate.{}", uuid))?;
525 530 }
526 531 Ok(())
527 532 }
528 533 }
529 534
530 535 /// Lazily-initialized component of `Repo` with interior mutability
531 536 ///
532 537 /// This differs from `OnceCell` in that the value can still be "deinitialized"
533 538 /// later by setting its inner `Option` to `None`. It also takes the
534 539 /// initialization function as an argument when the value is requested, not
535 540 /// when the instance is created.
536 541 struct LazyCell<T> {
537 542 value: RefCell<Option<T>>,
538 543 }
539 544
540 545 impl<T> LazyCell<T> {
541 546 fn new() -> Self {
542 547 Self {
543 548 value: RefCell::new(None),
544 549 }
545 550 }
546 551
547 552 fn set(&self, value: T) {
548 553 *self.value.borrow_mut() = Some(value)
549 554 }
550 555
551 556 fn get_or_init<E>(
552 557 &self,
553 558 init: impl Fn() -> Result<T, E>,
554 559 ) -> Result<Ref<T>, E> {
555 560 let mut borrowed = self.value.borrow();
556 561 if borrowed.is_none() {
557 562 drop(borrowed);
558 563 // Only use `borrow_mut` if it is really needed to avoid panic in
559 564 // case there is another outstanding borrow but mutation is not
560 565 // needed.
561 566 *self.value.borrow_mut() = Some(init()?);
562 567 borrowed = self.value.borrow()
563 568 }
564 569 Ok(Ref::map(borrowed, |option| option.as_ref().unwrap()))
565 570 }
566 571
567 572 fn get_mut_or_init<E>(
568 573 &self,
569 574 init: impl Fn() -> Result<T, E>,
570 575 ) -> Result<RefMut<T>, E> {
571 576 let mut borrowed = self.value.borrow_mut();
572 577 if borrowed.is_none() {
573 578 *borrowed = Some(init()?);
574 579 }
575 580 Ok(RefMut::map(borrowed, |option| option.as_mut().unwrap()))
576 581 }
577 582 }
@@ -1,544 +1,550 b''
1 1 // dirstate_map.rs
2 2 //
3 3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Bindings for the `hg::dirstate::dirstate_map` file provided by the
9 9 //! `hg-core` package.
10 10
11 11 use std::cell::{RefCell, RefMut};
12 12 use std::convert::TryInto;
13 13
14 14 use cpython::{
15 15 exc, PyBool, PyBytes, PyClone, PyDict, PyErr, PyList, PyNone, PyObject,
16 16 PyResult, Python, PythonObject, ToPyObject, UnsafePyLeaked,
17 17 };
18 18 use hg::dirstate::{ParentFileData, TruncatedTimestamp};
19 19
20 20 use crate::{
21 21 dirstate::copymap::{CopyMap, CopyMapItemsIterator, CopyMapKeysIterator},
22 22 dirstate::item::DirstateItem,
23 23 pybytes_deref::PyBytesDeref,
24 24 };
25 25 use hg::{
26 dirstate::StateMapIter, dirstate_tree::on_disk::DirstateV2ParseError,
26 dirstate::StateMapIter, dirstate_tree::dirstate_map::DirstateMapWriteMode,
27 dirstate_tree::on_disk::DirstateV2ParseError,
27 28 dirstate_tree::owning::OwningDirstateMap, revlog::Node,
28 29 utils::files::normalize_case, utils::hg_path::HgPath, DirstateEntry,
29 30 DirstateError, DirstateParents,
30 31 };
31 32
32 33 // TODO
33 34 // This object needs to share references to multiple members of its Rust
34 35 // inner struct, namely `copy_map`, `dirs` and `all_dirs`.
35 36 // Right now `CopyMap` is done, but it needs to have an explicit reference
36 37 // to `RustDirstateMap` which itself needs to have an encapsulation for
37 38 // every method in `CopyMap` (copymapcopy, etc.).
38 39 // This is ugly and hard to maintain.
39 40 // The same logic applies to `dirs` and `all_dirs`, however the `Dirs`
40 41 // `py_class!` is already implemented and does not mention
41 42 // `RustDirstateMap`, rightfully so.
42 43 // All attributes also have to have a separate refcount data attribute for
43 44 // leaks, with all methods that go along for reference sharing.
44 45 py_class!(pub class DirstateMap |py| {
45 46 @shared data inner: OwningDirstateMap;
46 47
47 48 /// Returns a `(dirstate_map, parents)` tuple
48 49 @staticmethod
49 50 def new_v1(
50 51 on_disk: PyBytes,
51 52 ) -> PyResult<PyObject> {
52 53 let on_disk = PyBytesDeref::new(py, on_disk);
53 54 let (map, parents) = OwningDirstateMap::new_v1(on_disk)
54 55 .map_err(|e| dirstate_error(py, e))?;
55 56 let map = Self::create_instance(py, map)?;
56 57 let p1 = PyBytes::new(py, parents.p1.as_bytes());
57 58 let p2 = PyBytes::new(py, parents.p2.as_bytes());
58 59 let parents = (p1, p2);
59 60 Ok((map, parents).to_py_object(py).into_object())
60 61 }
61 62
62 63 /// Returns a DirstateMap
63 64 @staticmethod
64 65 def new_v2(
65 66 on_disk: PyBytes,
66 67 data_size: usize,
67 68 tree_metadata: PyBytes,
68 69 ) -> PyResult<PyObject> {
69 70 let dirstate_error = |e: DirstateError| {
70 71 PyErr::new::<exc::OSError, _>(py, format!("Dirstate error: {:?}", e))
71 72 };
72 73 let on_disk = PyBytesDeref::new(py, on_disk);
73 74 let map = OwningDirstateMap::new_v2(
74 75 on_disk, data_size, tree_metadata.data(py),
75 76 ).map_err(dirstate_error)?;
76 77 let map = Self::create_instance(py, map)?;
77 78 Ok(map.into_object())
78 79 }
79 80
80 81 def clear(&self) -> PyResult<PyObject> {
81 82 self.inner(py).borrow_mut().clear();
82 83 Ok(py.None())
83 84 }
84 85
85 86 def get(
86 87 &self,
87 88 key: PyObject,
88 89 default: Option<PyObject> = None
89 90 ) -> PyResult<Option<PyObject>> {
90 91 let key = key.extract::<PyBytes>(py)?;
91 92 match self
92 93 .inner(py)
93 94 .borrow()
94 95 .get(HgPath::new(key.data(py)))
95 96 .map_err(|e| v2_error(py, e))?
96 97 {
97 98 Some(entry) => {
98 99 Ok(Some(DirstateItem::new_as_pyobject(py, entry)?))
99 100 },
100 101 None => Ok(default)
101 102 }
102 103 }
103 104
104 105 def set_tracked(&self, f: PyObject) -> PyResult<PyBool> {
105 106 let bytes = f.extract::<PyBytes>(py)?;
106 107 let path = HgPath::new(bytes.data(py));
107 108 let res = self.inner(py).borrow_mut().set_tracked(path);
108 109 let was_tracked = res.or_else(|_| {
109 110 Err(PyErr::new::<exc::OSError, _>(py, "Dirstate error".to_string()))
110 111 })?;
111 112 Ok(was_tracked.to_py_object(py))
112 113 }
113 114
114 115 def set_untracked(&self, f: PyObject) -> PyResult<PyBool> {
115 116 let bytes = f.extract::<PyBytes>(py)?;
116 117 let path = HgPath::new(bytes.data(py));
117 118 let res = self.inner(py).borrow_mut().set_untracked(path);
118 119 let was_tracked = res.or_else(|_| {
119 120 Err(PyErr::new::<exc::OSError, _>(py, "Dirstate error".to_string()))
120 121 })?;
121 122 Ok(was_tracked.to_py_object(py))
122 123 }
123 124
124 125 def set_clean(
125 126 &self,
126 127 f: PyObject,
127 128 mode: u32,
128 129 size: u32,
129 130 mtime: (i64, u32, bool)
130 131 ) -> PyResult<PyNone> {
131 132 let (mtime_s, mtime_ns, second_ambiguous) = mtime;
132 133 let timestamp = TruncatedTimestamp::new_truncate(
133 134 mtime_s, mtime_ns, second_ambiguous
134 135 );
135 136 let bytes = f.extract::<PyBytes>(py)?;
136 137 let path = HgPath::new(bytes.data(py));
137 138 let res = self.inner(py).borrow_mut().set_clean(
138 139 path, mode, size, timestamp,
139 140 );
140 141 res.or_else(|_| {
141 142 Err(PyErr::new::<exc::OSError, _>(py, "Dirstate error".to_string()))
142 143 })?;
143 144 Ok(PyNone)
144 145 }
145 146
146 147 def set_possibly_dirty(&self, f: PyObject) -> PyResult<PyNone> {
147 148 let bytes = f.extract::<PyBytes>(py)?;
148 149 let path = HgPath::new(bytes.data(py));
149 150 let res = self.inner(py).borrow_mut().set_possibly_dirty(path);
150 151 res.or_else(|_| {
151 152 Err(PyErr::new::<exc::OSError, _>(py, "Dirstate error".to_string()))
152 153 })?;
153 154 Ok(PyNone)
154 155 }
155 156
156 157 def reset_state(
157 158 &self,
158 159 f: PyObject,
159 160 wc_tracked: bool,
160 161 p1_tracked: bool,
161 162 p2_info: bool,
162 163 has_meaningful_mtime: bool,
163 164 parentfiledata: Option<(u32, u32, Option<(i64, u32, bool)>)>,
164 165 ) -> PyResult<PyNone> {
165 166 let mut has_meaningful_mtime = has_meaningful_mtime;
166 167 let parent_file_data = match parentfiledata {
167 168 None => {
168 169 has_meaningful_mtime = false;
169 170 None
170 171 },
171 172 Some(data) => {
172 173 let (mode, size, mtime_info) = data;
173 174 let mtime = if let Some(mtime_info) = mtime_info {
174 175 let (mtime_s, mtime_ns, second_ambiguous) = mtime_info;
175 176 let timestamp = TruncatedTimestamp::new_truncate(
176 177 mtime_s, mtime_ns, second_ambiguous
177 178 );
178 179 Some(timestamp)
179 180 } else {
180 181 has_meaningful_mtime = false;
181 182 None
182 183 };
183 184 Some(ParentFileData {
184 185 mode_size: Some((mode, size)),
185 186 mtime,
186 187 })
187 188 }
188 189 };
189 190 let bytes = f.extract::<PyBytes>(py)?;
190 191 let path = HgPath::new(bytes.data(py));
191 192 let res = self.inner(py).borrow_mut().reset_state(
192 193 path,
193 194 wc_tracked,
194 195 p1_tracked,
195 196 p2_info,
196 197 has_meaningful_mtime,
197 198 parent_file_data,
198 199 );
199 200 res.or_else(|_| {
200 201 Err(PyErr::new::<exc::OSError, _>(py, "Dirstate error".to_string()))
201 202 })?;
202 203 Ok(PyNone)
203 204 }
204 205
205 206 def hastrackeddir(&self, d: PyObject) -> PyResult<PyBool> {
206 207 let d = d.extract::<PyBytes>(py)?;
207 208 Ok(self.inner(py).borrow_mut()
208 209 .has_tracked_dir(HgPath::new(d.data(py)))
209 210 .map_err(|e| {
210 211 PyErr::new::<exc::ValueError, _>(py, e.to_string())
211 212 })?
212 213 .to_py_object(py))
213 214 }
214 215
215 216 def hasdir(&self, d: PyObject) -> PyResult<PyBool> {
216 217 let d = d.extract::<PyBytes>(py)?;
217 218 Ok(self.inner(py).borrow_mut()
218 219 .has_dir(HgPath::new(d.data(py)))
219 220 .map_err(|e| {
220 221 PyErr::new::<exc::ValueError, _>(py, e.to_string())
221 222 })?
222 223 .to_py_object(py))
223 224 }
224 225
225 226 def write_v1(
226 227 &self,
227 228 p1: PyObject,
228 229 p2: PyObject,
229 230 ) -> PyResult<PyBytes> {
230 231 let inner = self.inner(py).borrow();
231 232 let parents = DirstateParents {
232 233 p1: extract_node_id(py, &p1)?,
233 234 p2: extract_node_id(py, &p2)?,
234 235 };
235 236 let result = inner.pack_v1(parents);
236 237 match result {
237 238 Ok(packed) => Ok(PyBytes::new(py, &packed)),
238 239 Err(_) => Err(PyErr::new::<exc::OSError, _>(
239 240 py,
240 241 "Dirstate error".to_string(),
241 242 )),
242 243 }
243 244 }
244 245
245 246 /// Returns new data together with whether that data should be appended to
246 247 /// the existing data file whose content is at `self.on_disk` (True),
247 248 /// instead of written to a new data file (False).
248 249 def write_v2(
249 250 &self,
250 can_append: bool,
251 write_mode: usize,
251 252 ) -> PyResult<PyObject> {
252 253 let inner = self.inner(py).borrow();
253 let result = inner.pack_v2(can_append);
254 let rust_write_mode = match write_mode {
255 0 => DirstateMapWriteMode::Auto,
256 1 => DirstateMapWriteMode::ForceNewDataFile,
257 _ => DirstateMapWriteMode::Auto, // XXX should we error out?
258 };
259 let result = inner.pack_v2(rust_write_mode);
254 260 match result {
255 261 Ok((packed, tree_metadata, append, _old_data_size)) => {
256 262 let packed = PyBytes::new(py, &packed);
257 263 let tree_metadata = PyBytes::new(py, tree_metadata.as_bytes());
258 264 let tuple = (packed, tree_metadata, append);
259 265 Ok(tuple.to_py_object(py).into_object())
260 266 },
261 267 Err(_) => Err(PyErr::new::<exc::OSError, _>(
262 268 py,
263 269 "Dirstate error".to_string(),
264 270 )),
265 271 }
266 272 }
267 273
268 274 def filefoldmapasdict(&self) -> PyResult<PyDict> {
269 275 let dict = PyDict::new(py);
270 276 for item in self.inner(py).borrow_mut().iter() {
271 277 let (path, entry) = item.map_err(|e| v2_error(py, e))?;
272 278 if !entry.removed() {
273 279 let key = normalize_case(path);
274 280 let value = path;
275 281 dict.set_item(
276 282 py,
277 283 PyBytes::new(py, key.as_bytes()).into_object(),
278 284 PyBytes::new(py, value.as_bytes()).into_object(),
279 285 )?;
280 286 }
281 287 }
282 288 Ok(dict)
283 289 }
284 290
285 291 def __len__(&self) -> PyResult<usize> {
286 292 Ok(self.inner(py).borrow().len())
287 293 }
288 294
289 295 def __contains__(&self, key: PyObject) -> PyResult<bool> {
290 296 let key = key.extract::<PyBytes>(py)?;
291 297 self.inner(py)
292 298 .borrow()
293 299 .contains_key(HgPath::new(key.data(py)))
294 300 .map_err(|e| v2_error(py, e))
295 301 }
296 302
297 303 def __getitem__(&self, key: PyObject) -> PyResult<PyObject> {
298 304 let key = key.extract::<PyBytes>(py)?;
299 305 let key = HgPath::new(key.data(py));
300 306 match self
301 307 .inner(py)
302 308 .borrow()
303 309 .get(key)
304 310 .map_err(|e| v2_error(py, e))?
305 311 {
306 312 Some(entry) => {
307 313 Ok(DirstateItem::new_as_pyobject(py, entry)?)
308 314 },
309 315 None => Err(PyErr::new::<exc::KeyError, _>(
310 316 py,
311 317 String::from_utf8_lossy(key.as_bytes()),
312 318 )),
313 319 }
314 320 }
315 321
316 322 def keys(&self) -> PyResult<DirstateMapKeysIterator> {
317 323 let leaked_ref = self.inner(py).leak_immutable();
318 324 DirstateMapKeysIterator::from_inner(
319 325 py,
320 326 unsafe { leaked_ref.map(py, |o| o.iter()) },
321 327 )
322 328 }
323 329
324 330 def items(&self) -> PyResult<DirstateMapItemsIterator> {
325 331 let leaked_ref = self.inner(py).leak_immutable();
326 332 DirstateMapItemsIterator::from_inner(
327 333 py,
328 334 unsafe { leaked_ref.map(py, |o| o.iter()) },
329 335 )
330 336 }
331 337
332 338 def __iter__(&self) -> PyResult<DirstateMapKeysIterator> {
333 339 let leaked_ref = self.inner(py).leak_immutable();
334 340 DirstateMapKeysIterator::from_inner(
335 341 py,
336 342 unsafe { leaked_ref.map(py, |o| o.iter()) },
337 343 )
338 344 }
339 345
340 346 // TODO all copymap* methods, see docstring above
341 347 def copymapcopy(&self) -> PyResult<PyDict> {
342 348 let dict = PyDict::new(py);
343 349 for item in self.inner(py).borrow().copy_map_iter() {
344 350 let (key, value) = item.map_err(|e| v2_error(py, e))?;
345 351 dict.set_item(
346 352 py,
347 353 PyBytes::new(py, key.as_bytes()),
348 354 PyBytes::new(py, value.as_bytes()),
349 355 )?;
350 356 }
351 357 Ok(dict)
352 358 }
353 359
354 360 def copymapgetitem(&self, key: PyObject) -> PyResult<PyBytes> {
355 361 let key = key.extract::<PyBytes>(py)?;
356 362 match self
357 363 .inner(py)
358 364 .borrow()
359 365 .copy_map_get(HgPath::new(key.data(py)))
360 366 .map_err(|e| v2_error(py, e))?
361 367 {
362 368 Some(copy) => Ok(PyBytes::new(py, copy.as_bytes())),
363 369 None => Err(PyErr::new::<exc::KeyError, _>(
364 370 py,
365 371 String::from_utf8_lossy(key.data(py)),
366 372 )),
367 373 }
368 374 }
369 375 def copymap(&self) -> PyResult<CopyMap> {
370 376 CopyMap::from_inner(py, self.clone_ref(py))
371 377 }
372 378
373 379 def copymaplen(&self) -> PyResult<usize> {
374 380 Ok(self.inner(py).borrow().copy_map_len())
375 381 }
376 382 def copymapcontains(&self, key: PyObject) -> PyResult<bool> {
377 383 let key = key.extract::<PyBytes>(py)?;
378 384 self.inner(py)
379 385 .borrow()
380 386 .copy_map_contains_key(HgPath::new(key.data(py)))
381 387 .map_err(|e| v2_error(py, e))
382 388 }
383 389 def copymapget(
384 390 &self,
385 391 key: PyObject,
386 392 default: Option<PyObject>
387 393 ) -> PyResult<Option<PyObject>> {
388 394 let key = key.extract::<PyBytes>(py)?;
389 395 match self
390 396 .inner(py)
391 397 .borrow()
392 398 .copy_map_get(HgPath::new(key.data(py)))
393 399 .map_err(|e| v2_error(py, e))?
394 400 {
395 401 Some(copy) => Ok(Some(
396 402 PyBytes::new(py, copy.as_bytes()).into_object(),
397 403 )),
398 404 None => Ok(default),
399 405 }
400 406 }
401 407 def copymapsetitem(
402 408 &self,
403 409 key: PyObject,
404 410 value: PyObject
405 411 ) -> PyResult<PyObject> {
406 412 let key = key.extract::<PyBytes>(py)?;
407 413 let value = value.extract::<PyBytes>(py)?;
408 414 self.inner(py)
409 415 .borrow_mut()
410 416 .copy_map_insert(
411 417 HgPath::new(key.data(py)),
412 418 HgPath::new(value.data(py)),
413 419 )
414 420 .map_err(|e| v2_error(py, e))?;
415 421 Ok(py.None())
416 422 }
417 423 def copymappop(
418 424 &self,
419 425 key: PyObject,
420 426 default: Option<PyObject>
421 427 ) -> PyResult<Option<PyObject>> {
422 428 let key = key.extract::<PyBytes>(py)?;
423 429 match self
424 430 .inner(py)
425 431 .borrow_mut()
426 432 .copy_map_remove(HgPath::new(key.data(py)))
427 433 .map_err(|e| v2_error(py, e))?
428 434 {
429 435 Some(copy) => Ok(Some(
430 436 PyBytes::new(py, copy.as_bytes()).into_object(),
431 437 )),
432 438 None => Ok(default),
433 439 }
434 440 }
435 441
436 442 def copymapiter(&self) -> PyResult<CopyMapKeysIterator> {
437 443 let leaked_ref = self.inner(py).leak_immutable();
438 444 CopyMapKeysIterator::from_inner(
439 445 py,
440 446 unsafe { leaked_ref.map(py, |o| o.copy_map_iter()) },
441 447 )
442 448 }
443 449
444 450 def copymapitemsiter(&self) -> PyResult<CopyMapItemsIterator> {
445 451 let leaked_ref = self.inner(py).leak_immutable();
446 452 CopyMapItemsIterator::from_inner(
447 453 py,
448 454 unsafe { leaked_ref.map(py, |o| o.copy_map_iter()) },
449 455 )
450 456 }
451 457
452 458 def tracked_dirs(&self) -> PyResult<PyList> {
453 459 let dirs = PyList::new(py, &[]);
454 460 for path in self.inner(py).borrow_mut().iter_tracked_dirs()
455 461 .map_err(|e |dirstate_error(py, e))?
456 462 {
457 463 let path = path.map_err(|e| v2_error(py, e))?;
458 464 let path = PyBytes::new(py, path.as_bytes());
459 465 dirs.append(py, path.into_object())
460 466 }
461 467 Ok(dirs)
462 468 }
463 469
464 470 def setparents_fixup(&self) -> PyResult<PyDict> {
465 471 let dict = PyDict::new(py);
466 472 let copies = self.inner(py).borrow_mut().setparents_fixup();
467 473 for (key, value) in copies.map_err(|e| v2_error(py, e))? {
468 474 dict.set_item(
469 475 py,
470 476 PyBytes::new(py, key.as_bytes()),
471 477 PyBytes::new(py, value.as_bytes()),
472 478 )?;
473 479 }
474 480 Ok(dict)
475 481 }
476 482
477 483 def debug_iter(&self, all: bool) -> PyResult<PyList> {
478 484 let dirs = PyList::new(py, &[]);
479 485 for item in self.inner(py).borrow().debug_iter(all) {
480 486 let (path, (state, mode, size, mtime)) =
481 487 item.map_err(|e| v2_error(py, e))?;
482 488 let path = PyBytes::new(py, path.as_bytes());
483 489 let item = (path, state, mode, size, mtime);
484 490 dirs.append(py, item.to_py_object(py).into_object())
485 491 }
486 492 Ok(dirs)
487 493 }
488 494 });
489 495
490 496 impl DirstateMap {
491 497 pub fn get_inner_mut<'a>(
492 498 &'a self,
493 499 py: Python<'a>,
494 500 ) -> RefMut<'a, OwningDirstateMap> {
495 501 self.inner(py).borrow_mut()
496 502 }
497 503 fn translate_key(
498 504 py: Python,
499 505 res: Result<(&HgPath, DirstateEntry), DirstateV2ParseError>,
500 506 ) -> PyResult<Option<PyBytes>> {
501 507 let (f, _entry) = res.map_err(|e| v2_error(py, e))?;
502 508 Ok(Some(PyBytes::new(py, f.as_bytes())))
503 509 }
504 510 fn translate_key_value(
505 511 py: Python,
506 512 res: Result<(&HgPath, DirstateEntry), DirstateV2ParseError>,
507 513 ) -> PyResult<Option<(PyBytes, PyObject)>> {
508 514 let (f, entry) = res.map_err(|e| v2_error(py, e))?;
509 515 Ok(Some((
510 516 PyBytes::new(py, f.as_bytes()),
511 517 DirstateItem::new_as_pyobject(py, entry)?,
512 518 )))
513 519 }
514 520 }
515 521
516 522 py_shared_iterator!(
517 523 DirstateMapKeysIterator,
518 524 UnsafePyLeaked<StateMapIter<'static>>,
519 525 DirstateMap::translate_key,
520 526 Option<PyBytes>
521 527 );
522 528
523 529 py_shared_iterator!(
524 530 DirstateMapItemsIterator,
525 531 UnsafePyLeaked<StateMapIter<'static>>,
526 532 DirstateMap::translate_key_value,
527 533 Option<(PyBytes, PyObject)>
528 534 );
529 535
530 536 fn extract_node_id(py: Python, obj: &PyObject) -> PyResult<Node> {
531 537 let bytes = obj.extract::<PyBytes>(py)?;
532 538 match bytes.data(py).try_into() {
533 539 Ok(s) => Ok(s),
534 540 Err(e) => Err(PyErr::new::<exc::ValueError, _>(py, e.to_string())),
535 541 }
536 542 }
537 543
538 544 pub(super) fn v2_error(py: Python<'_>, _: DirstateV2ParseError) -> PyErr {
539 545 PyErr::new::<exc::ValueError, _>(py, "corrupted dirstate-v2")
540 546 }
541 547
542 548 fn dirstate_error(py: Python<'_>, e: DirstateError) -> PyErr {
543 549 PyErr::new::<exc::OSError, _>(py, format!("Dirstate error: {:?}", e))
544 550 }
General Comments 0
You need to be logged in to leave comments. Login now