##// END OF EJS Templates
rhg: remember the inode of .hg/dirstate...
Raphaël Gomès -
r51140:dbe09fb0 stable
parent child Browse files
Show More
@@ -1,737 +1,750 b''
1 1 # dirstatemap.py
2 2 #
3 3 # This software may be used and distributed according to the terms of the
4 4 # GNU General Public License version 2 or any later version.
5 5
6 6
7 7 from .i18n import _
8 8
9 9 from . import (
10 10 error,
11 11 pathutil,
12 12 policy,
13 13 testing,
14 14 txnutil,
15 15 util,
16 16 )
17 17
18 18 from .dirstateutils import (
19 19 docket as docketmod,
20 20 v2,
21 21 )
22 22
23 23 parsers = policy.importmod('parsers')
24 24 rustmod = policy.importrust('dirstate')
25 25
26 26 propertycache = util.propertycache
27 27
28 28 if rustmod is None:
29 29 DirstateItem = parsers.DirstateItem
30 30 else:
31 31 DirstateItem = rustmod.DirstateItem
32 32
33 33 rangemask = 0x7FFFFFFF
34 34
35 35 WRITE_MODE_AUTO = 0
36 36 WRITE_MODE_FORCE_NEW = 1
37 37 WRITE_MODE_FORCE_APPEND = 2
38 38
39 39
40 40 V2_MAX_READ_ATTEMPTS = 5
41 41
42 42
43 43 class _dirstatemapcommon:
44 44 """
45 45 Methods that are identical for both implementations of the dirstatemap
46 46 class, with and without Rust extensions enabled.
47 47 """
48 48
49 49 # please pytype
50 50
51 51 _map = None
52 52 copymap = None
53 53
54 54 def __init__(self, ui, opener, root, nodeconstants, use_dirstate_v2):
55 55 self._use_dirstate_v2 = use_dirstate_v2
56 56 self._nodeconstants = nodeconstants
57 57 self._ui = ui
58 58 self._opener = opener
59 59 self._root = root
60 60 self._filename = b'dirstate'
61 61 self._nodelen = 20 # Also update Rust code when changing this!
62 62 self._parents = None
63 63 self._dirtyparents = False
64 64 self._docket = None
65 65 write_mode = ui.config(b"devel", b"dirstate.v2.data_update_mode")
66 66 if write_mode == b"auto":
67 67 self._write_mode = WRITE_MODE_AUTO
68 68 elif write_mode == b"force-append":
69 69 self._write_mode = WRITE_MODE_FORCE_APPEND
70 70 elif write_mode == b"force-new":
71 71 self._write_mode = WRITE_MODE_FORCE_NEW
72 72 else:
73 73 # unknown value, fallback to default
74 74 self._write_mode = WRITE_MODE_AUTO
75 75
76 76 # for consistent view between _pl() and _read() invocations
77 77 self._pendingmode = None
78 78
79 79 def _set_identity(self):
80 80 # ignore HG_PENDING because identity is used only for writing
81 81 file_path = self._opener.join(self._filename)
82 82 self.identity = util.filestat.frompath(file_path)
83 83
84 84 def preload(self):
85 85 """Loads the underlying data, if it's not already loaded"""
86 86 self._map
87 87
88 88 def get(self, key, default=None):
89 89 return self._map.get(key, default)
90 90
91 91 def __len__(self):
92 92 return len(self._map)
93 93
94 94 def __iter__(self):
95 95 return iter(self._map)
96 96
97 97 def __contains__(self, key):
98 98 return key in self._map
99 99
100 100 def __getitem__(self, item):
101 101 return self._map[item]
102 102
103 103 ### disk interaction
104 104
105 105 def _opendirstatefile(self):
106 106 fp, mode = txnutil.trypending(self._root, self._opener, self._filename)
107 107 if self._pendingmode is not None and self._pendingmode != mode:
108 108 fp.close()
109 109 raise error.Abort(
110 110 _(b'working directory state may be changed parallelly')
111 111 )
112 112 self._pendingmode = mode
113 113 return fp
114 114
115 115 def _readdirstatefile(self, size=-1):
116 116 try:
117 117 with self._opendirstatefile() as fp:
118 118 return fp.read(size)
119 119 except FileNotFoundError:
120 120 # File doesn't exist, so the current state is empty
121 121 return b''
122 122
123 123 @property
124 124 def docket(self):
125 125 if not self._docket:
126 126 if not self._use_dirstate_v2:
127 127 raise error.ProgrammingError(
128 128 b'dirstate only has a docket in v2 format'
129 129 )
130 130 self._set_identity()
131 131 self._docket = docketmod.DirstateDocket.parse(
132 132 self._readdirstatefile(), self._nodeconstants
133 133 )
134 134 return self._docket
135 135
136 136 def _read_v2_data(self):
137 137 data = None
138 138 attempts = 0
139 139 while attempts < V2_MAX_READ_ATTEMPTS:
140 140 attempts += 1
141 141 try:
142 142 # TODO: use mmap when possible
143 143 data = self._opener.read(self.docket.data_filename())
144 144 except FileNotFoundError:
145 145 # read race detected between docket and data file
146 146 # reload the docket and retry
147 147 self._docket = None
148 148 if data is None:
149 149 assert attempts >= V2_MAX_READ_ATTEMPTS
150 150 msg = b"dirstate read race happened %d times in a row"
151 151 msg %= attempts
152 152 raise error.Abort(msg)
153 153 return self._opener.read(self.docket.data_filename())
154 154
155 155 def write_v2_no_append(self, tr, st, meta, packed):
156 156 old_docket = self.docket
157 157 new_docket = docketmod.DirstateDocket.with_new_uuid(
158 158 self.parents(), len(packed), meta
159 159 )
160 160 if old_docket.uuid == new_docket.uuid:
161 161 raise error.ProgrammingError(b'dirstate docket name collision')
162 162 data_filename = new_docket.data_filename()
163 163 self._opener.write(data_filename, packed)
164 164 # Write the new docket after the new data file has been
165 165 # written. Because `st` was opened with `atomictemp=True`,
166 166 # the actual `.hg/dirstate` file is only affected on close.
167 167 st.write(new_docket.serialize())
168 168 st.close()
169 169 # Remove the old data file after the new docket pointing to
170 170 # the new data file was written.
171 171 if old_docket.uuid:
172 172 data_filename = old_docket.data_filename()
173 173 unlink = lambda _tr=None: self._opener.unlink(data_filename)
174 174 if tr:
175 175 category = b"dirstate-v2-clean-" + old_docket.uuid
176 176 tr.addpostclose(category, unlink)
177 177 else:
178 178 unlink()
179 179 self._docket = new_docket
180 180
181 181 ### reading/setting parents
182 182
183 183 def parents(self):
184 184 if not self._parents:
185 185 if self._use_dirstate_v2:
186 186 self._parents = self.docket.parents
187 187 else:
188 188 read_len = self._nodelen * 2
189 189 st = self._readdirstatefile(read_len)
190 190 l = len(st)
191 191 if l == read_len:
192 192 self._parents = (
193 193 st[: self._nodelen],
194 194 st[self._nodelen : 2 * self._nodelen],
195 195 )
196 196 elif l == 0:
197 197 self._parents = (
198 198 self._nodeconstants.nullid,
199 199 self._nodeconstants.nullid,
200 200 )
201 201 else:
202 202 raise error.Abort(
203 203 _(b'working directory state appears damaged!')
204 204 )
205 205
206 206 return self._parents
207 207
208 208
209 209 class dirstatemap(_dirstatemapcommon):
210 210 """Map encapsulating the dirstate's contents.
211 211
212 212 The dirstate contains the following state:
213 213
214 214 - `identity` is the identity of the dirstate file, which can be used to
215 215 detect when changes have occurred to the dirstate file.
216 216
217 217 - `parents` is a pair containing the parents of the working copy. The
218 218 parents are updated by calling `setparents`.
219 219
220 220 - the state map maps filenames to tuples of (state, mode, size, mtime),
221 221 where state is a single character representing 'normal', 'added',
222 222 'removed', or 'merged'. It is read by treating the dirstate as a
223 223 dict. File state is updated by calling various methods (see each
224 224 documentation for details):
225 225
226 226 - `reset_state`,
227 227 - `set_tracked`
228 228 - `set_untracked`
229 229 - `set_clean`
230 230 - `set_possibly_dirty`
231 231
232 232 - `copymap` maps destination filenames to their source filename.
233 233
234 234 The dirstate also provides the following views onto the state:
235 235
236 236 - `filefoldmap` is a dict mapping normalized filenames to the denormalized
237 237 form that they appear as in the dirstate.
238 238
239 239 - `dirfoldmap` is a dict mapping normalized directory names to the
240 240 denormalized form that they appear as in the dirstate.
241 241 """
242 242
243 243 ### Core data storage and access
244 244
245 245 @propertycache
246 246 def _map(self):
247 247 self._map = {}
248 248 self.read()
249 249 return self._map
250 250
251 251 @propertycache
252 252 def copymap(self):
253 253 self.copymap = {}
254 254 self._map
255 255 return self.copymap
256 256
257 257 def clear(self):
258 258 self._map.clear()
259 259 self.copymap.clear()
260 260 self.setparents(self._nodeconstants.nullid, self._nodeconstants.nullid)
261 261 util.clearcachedproperty(self, b"_dirs")
262 262 util.clearcachedproperty(self, b"_alldirs")
263 263 util.clearcachedproperty(self, b"filefoldmap")
264 264 util.clearcachedproperty(self, b"dirfoldmap")
265 265
266 266 def items(self):
267 267 return self._map.items()
268 268
269 269 # forward for python2,3 compat
270 270 iteritems = items
271 271
272 272 def debug_iter(self, all):
273 273 """
274 274 Return an iterator of (filename, state, mode, size, mtime) tuples
275 275
276 276 `all` is unused when Rust is not enabled
277 277 """
278 278 for (filename, item) in self.items():
279 279 yield (filename, item.state, item.mode, item.size, item.mtime)
280 280
281 281 def keys(self):
282 282 return self._map.keys()
283 283
284 284 ### reading/setting parents
285 285
286 286 def setparents(self, p1, p2, fold_p2=False):
287 287 self._parents = (p1, p2)
288 288 self._dirtyparents = True
289 289 copies = {}
290 290 if fold_p2:
291 291 for f, s in self._map.items():
292 292 # Discard "merged" markers when moving away from a merge state
293 293 if s.p2_info:
294 294 source = self.copymap.pop(f, None)
295 295 if source:
296 296 copies[f] = source
297 297 s.drop_merge_data()
298 298 return copies
299 299
300 300 ### disk interaction
301 301
302 302 def read(self):
303 303 testing.wait_on_cfg(self._ui, b'dirstate.pre-read-file')
304 304 if self._use_dirstate_v2:
305 305
306 306 if not self.docket.uuid:
307 307 return
308 308 testing.wait_on_cfg(self._ui, b'dirstate.post-docket-read-file')
309 309 st = self._read_v2_data()
310 310 else:
311 311 self._set_identity()
312 312 st = self._readdirstatefile()
313 313
314 314 if not st:
315 315 return
316 316
317 317 # TODO: adjust this estimate for dirstate-v2
318 318 if util.safehasattr(parsers, b'dict_new_presized'):
319 319 # Make an estimate of the number of files in the dirstate based on
320 320 # its size. This trades wasting some memory for avoiding costly
321 321 # resizes. Each entry have a prefix of 17 bytes followed by one or
322 322 # two path names. Studies on various large-scale real-world repositories
323 323 # found 54 bytes a reasonable upper limit for the average path names.
324 324 # Copy entries are ignored for the sake of this estimate.
325 325 self._map = parsers.dict_new_presized(len(st) // 71)
326 326
327 327 # Python's garbage collector triggers a GC each time a certain number
328 328 # of container objects (the number being defined by
329 329 # gc.get_threshold()) are allocated. parse_dirstate creates a tuple
330 330 # for each file in the dirstate. The C version then immediately marks
331 331 # them as not to be tracked by the collector. However, this has no
332 332 # effect on when GCs are triggered, only on what objects the GC looks
333 333 # into. This means that O(number of files) GCs are unavoidable.
334 334 # Depending on when in the process's lifetime the dirstate is parsed,
335 335 # this can get very expensive. As a workaround, disable GC while
336 336 # parsing the dirstate.
337 337 #
338 338 # (we cannot decorate the function directly since it is in a C module)
339 339 if self._use_dirstate_v2:
340 340 p = self.docket.parents
341 341 meta = self.docket.tree_metadata
342 342 parse_dirstate = util.nogc(v2.parse_dirstate)
343 343 parse_dirstate(self._map, self.copymap, st, meta)
344 344 else:
345 345 parse_dirstate = util.nogc(parsers.parse_dirstate)
346 346 p = parse_dirstate(self._map, self.copymap, st)
347 347 if not self._dirtyparents:
348 348 self.setparents(*p)
349 349
350 350 # Avoid excess attribute lookups by fast pathing certain checks
351 351 self.__contains__ = self._map.__contains__
352 352 self.__getitem__ = self._map.__getitem__
353 353 self.get = self._map.get
354 354
355 355 def write(self, tr, st):
356 356 if self._use_dirstate_v2:
357 357 packed, meta = v2.pack_dirstate(self._map, self.copymap)
358 358 self.write_v2_no_append(tr, st, meta, packed)
359 359 else:
360 360 packed = parsers.pack_dirstate(
361 361 self._map, self.copymap, self.parents()
362 362 )
363 363 st.write(packed)
364 364 st.close()
365 365 self._dirtyparents = False
366 366
367 367 @propertycache
368 368 def identity(self):
369 369 self._map
370 370 return self.identity
371 371
372 372 ### code related to maintaining and accessing "extra" property
373 373 # (e.g. "has_dir")
374 374
375 375 def _dirs_incr(self, filename, old_entry=None):
376 376 """increment the dirstate counter if applicable"""
377 377 if (
378 378 old_entry is None or old_entry.removed
379 379 ) and "_dirs" in self.__dict__:
380 380 self._dirs.addpath(filename)
381 381 if old_entry is None and "_alldirs" in self.__dict__:
382 382 self._alldirs.addpath(filename)
383 383
384 384 def _dirs_decr(self, filename, old_entry=None, remove_variant=False):
385 385 """decrement the dirstate counter if applicable"""
386 386 if old_entry is not None:
387 387 if "_dirs" in self.__dict__ and not old_entry.removed:
388 388 self._dirs.delpath(filename)
389 389 if "_alldirs" in self.__dict__ and not remove_variant:
390 390 self._alldirs.delpath(filename)
391 391 elif remove_variant and "_alldirs" in self.__dict__:
392 392 self._alldirs.addpath(filename)
393 393 if "filefoldmap" in self.__dict__:
394 394 normed = util.normcase(filename)
395 395 self.filefoldmap.pop(normed, None)
396 396
397 397 @propertycache
398 398 def filefoldmap(self):
399 399 """Returns a dictionary mapping normalized case paths to their
400 400 non-normalized versions.
401 401 """
402 402 try:
403 403 makefilefoldmap = parsers.make_file_foldmap
404 404 except AttributeError:
405 405 pass
406 406 else:
407 407 return makefilefoldmap(
408 408 self._map, util.normcasespec, util.normcasefallback
409 409 )
410 410
411 411 f = {}
412 412 normcase = util.normcase
413 413 for name, s in self._map.items():
414 414 if not s.removed:
415 415 f[normcase(name)] = name
416 416 f[b'.'] = b'.' # prevents useless util.fspath() invocation
417 417 return f
418 418
419 419 @propertycache
420 420 def dirfoldmap(self):
421 421 f = {}
422 422 normcase = util.normcase
423 423 for name in self._dirs:
424 424 f[normcase(name)] = name
425 425 return f
426 426
427 427 def hastrackeddir(self, d):
428 428 """
429 429 Returns True if the dirstate contains a tracked (not removed) file
430 430 in this directory.
431 431 """
432 432 return d in self._dirs
433 433
434 434 def hasdir(self, d):
435 435 """
436 436 Returns True if the dirstate contains a file (tracked or removed)
437 437 in this directory.
438 438 """
439 439 return d in self._alldirs
440 440
441 441 @propertycache
442 442 def _dirs(self):
443 443 return pathutil.dirs(self._map, only_tracked=True)
444 444
445 445 @propertycache
446 446 def _alldirs(self):
447 447 return pathutil.dirs(self._map)
448 448
449 449 ### code related to manipulation of entries and copy-sources
450 450
451 451 def reset_state(
452 452 self,
453 453 filename,
454 454 wc_tracked=False,
455 455 p1_tracked=False,
456 456 p2_info=False,
457 457 has_meaningful_mtime=True,
458 458 parentfiledata=None,
459 459 ):
460 460 """Set a entry to a given state, diregarding all previous state
461 461
462 462 This is to be used by the part of the dirstate API dedicated to
463 463 adjusting the dirstate after a update/merge.
464 464
465 465 note: calling this might result to no entry existing at all if the
466 466 dirstate map does not see any point at having one for this file
467 467 anymore.
468 468 """
469 469 # copy information are now outdated
470 470 # (maybe new information should be in directly passed to this function)
471 471 self.copymap.pop(filename, None)
472 472
473 473 if not (p1_tracked or p2_info or wc_tracked):
474 474 old_entry = self._map.get(filename)
475 475 self._drop_entry(filename)
476 476 self._dirs_decr(filename, old_entry=old_entry)
477 477 return
478 478
479 479 old_entry = self._map.get(filename)
480 480 self._dirs_incr(filename, old_entry)
481 481 entry = DirstateItem(
482 482 wc_tracked=wc_tracked,
483 483 p1_tracked=p1_tracked,
484 484 p2_info=p2_info,
485 485 has_meaningful_mtime=has_meaningful_mtime,
486 486 parentfiledata=parentfiledata,
487 487 )
488 488 self._map[filename] = entry
489 489
490 490 def set_tracked(self, filename):
491 491 new = False
492 492 entry = self.get(filename)
493 493 if entry is None:
494 494 self._dirs_incr(filename)
495 495 entry = DirstateItem(
496 496 wc_tracked=True,
497 497 )
498 498
499 499 self._map[filename] = entry
500 500 new = True
501 501 elif not entry.tracked:
502 502 self._dirs_incr(filename, entry)
503 503 entry.set_tracked()
504 504 self._refresh_entry(filename, entry)
505 505 new = True
506 506 else:
507 507 # XXX This is probably overkill for more case, but we need this to
508 508 # fully replace the `normallookup` call with `set_tracked` one.
509 509 # Consider smoothing this in the future.
510 510 entry.set_possibly_dirty()
511 511 self._refresh_entry(filename, entry)
512 512 return new
513 513
514 514 def set_untracked(self, f):
515 515 """Mark a file as no longer tracked in the dirstate map"""
516 516 entry = self.get(f)
517 517 if entry is None:
518 518 return False
519 519 else:
520 520 self._dirs_decr(f, old_entry=entry, remove_variant=not entry.added)
521 521 if not entry.p2_info:
522 522 self.copymap.pop(f, None)
523 523 entry.set_untracked()
524 524 self._refresh_entry(f, entry)
525 525 return True
526 526
527 527 def set_clean(self, filename, mode, size, mtime):
528 528 """mark a file as back to a clean state"""
529 529 entry = self[filename]
530 530 size = size & rangemask
531 531 entry.set_clean(mode, size, mtime)
532 532 self._refresh_entry(filename, entry)
533 533 self.copymap.pop(filename, None)
534 534
535 535 def set_possibly_dirty(self, filename):
536 536 """record that the current state of the file on disk is unknown"""
537 537 entry = self[filename]
538 538 entry.set_possibly_dirty()
539 539 self._refresh_entry(filename, entry)
540 540
541 541 def _refresh_entry(self, f, entry):
542 542 """record updated state of an entry"""
543 543 if not entry.any_tracked:
544 544 self._map.pop(f, None)
545 545
546 546 def _drop_entry(self, f):
547 547 """remove any entry for file f
548 548
549 549 This should also drop associated copy information
550 550
551 551 The fact we actually need to drop it is the responsability of the caller"""
552 552 self._map.pop(f, None)
553 553 self.copymap.pop(f, None)
554 554
555 555
556 556 if rustmod is not None:
557 557
558 558 class dirstatemap(_dirstatemapcommon):
559 559
560 560 ### Core data storage and access
561 561
562 562 @propertycache
563 563 def _map(self):
564 564 """
565 565 Fills the Dirstatemap when called.
566 566 """
567 567 # ignore HG_PENDING because identity is used only for writing
568 568 self._set_identity()
569 569
570 570 testing.wait_on_cfg(self._ui, b'dirstate.pre-read-file')
571 571 if self._use_dirstate_v2:
572 572 self.docket # load the data if needed
573 inode = (
574 self.identity.stat.st_ino
575 if self.identity is not None
576 and self.identity.stat is not None
577 else None
578 )
573 579 testing.wait_on_cfg(self._ui, b'dirstate.post-docket-read-file')
574 580 if not self.docket.uuid:
575 581 data = b''
576 582 self._map = rustmod.DirstateMap.new_empty()
577 583 else:
578 584 data = self._read_v2_data()
579 585 self._map = rustmod.DirstateMap.new_v2(
580 586 data,
581 587 self.docket.data_size,
582 588 self.docket.tree_metadata,
583 589 self.docket.uuid,
590 inode,
584 591 )
585 592 parents = self.docket.parents
586 593 else:
587 594 self._set_identity()
595 inode = (
596 self.identity.stat.st_ino
597 if self.identity is not None
598 and self.identity.stat is not None
599 else None
600 )
588 601 self._map, parents = rustmod.DirstateMap.new_v1(
589 self._readdirstatefile()
602 self._readdirstatefile(), inode
590 603 )
591 604
592 605 if parents and not self._dirtyparents:
593 606 self.setparents(*parents)
594 607
595 608 self.__contains__ = self._map.__contains__
596 609 self.__getitem__ = self._map.__getitem__
597 610 self.get = self._map.get
598 611 return self._map
599 612
600 613 @property
601 614 def copymap(self):
602 615 return self._map.copymap()
603 616
604 617 def debug_iter(self, all):
605 618 """
606 619 Return an iterator of (filename, state, mode, size, mtime) tuples
607 620
608 621 `all`: also include with `state == b' '` dirstate tree nodes that
609 622 don't have an associated `DirstateItem`.
610 623
611 624 """
612 625 return self._map.debug_iter(all)
613 626
614 627 def clear(self):
615 628 self._map.clear()
616 629 self.setparents(
617 630 self._nodeconstants.nullid, self._nodeconstants.nullid
618 631 )
619 632 util.clearcachedproperty(self, b"_dirs")
620 633 util.clearcachedproperty(self, b"_alldirs")
621 634 util.clearcachedproperty(self, b"dirfoldmap")
622 635
623 636 def items(self):
624 637 return self._map.items()
625 638
626 639 # forward for python2,3 compat
627 640 iteritems = items
628 641
629 642 def keys(self):
630 643 return iter(self._map)
631 644
632 645 ### reading/setting parents
633 646
634 647 def setparents(self, p1, p2, fold_p2=False):
635 648 self._parents = (p1, p2)
636 649 self._dirtyparents = True
637 650 copies = {}
638 651 if fold_p2:
639 652 copies = self._map.setparents_fixup()
640 653 return copies
641 654
642 655 ### disk interaction
643 656
644 657 @propertycache
645 658 def identity(self):
646 659 self._map
647 660 return self.identity
648 661
649 662 def write(self, tr, st):
650 663 if not self._use_dirstate_v2:
651 664 p1, p2 = self.parents()
652 665 packed = self._map.write_v1(p1, p2)
653 666 st.write(packed)
654 667 st.close()
655 668 self._dirtyparents = False
656 669 return
657 670
658 671 # We can only append to an existing data file if there is one
659 672 write_mode = self._write_mode
660 673 if self.docket.uuid is None:
661 674 write_mode = WRITE_MODE_FORCE_NEW
662 675 packed, meta, append = self._map.write_v2(write_mode)
663 676 if append:
664 677 docket = self.docket
665 678 data_filename = docket.data_filename()
666 679 with self._opener(data_filename, b'r+b') as fp:
667 680 fp.seek(docket.data_size)
668 681 assert fp.tell() == docket.data_size
669 682 written = fp.write(packed)
670 683 if written is not None: # py2 may return None
671 684 assert written == len(packed), (written, len(packed))
672 685 docket.data_size += len(packed)
673 686 docket.parents = self.parents()
674 687 docket.tree_metadata = meta
675 688 st.write(docket.serialize())
676 689 st.close()
677 690 else:
678 691 self.write_v2_no_append(tr, st, meta, packed)
679 692 # Reload from the newly-written file
680 693 util.clearcachedproperty(self, b"_map")
681 694 self._dirtyparents = False
682 695
683 696 ### code related to maintaining and accessing "extra" property
684 697 # (e.g. "has_dir")
685 698
686 699 @propertycache
687 700 def filefoldmap(self):
688 701 """Returns a dictionary mapping normalized case paths to their
689 702 non-normalized versions.
690 703 """
691 704 return self._map.filefoldmapasdict()
692 705
693 706 def hastrackeddir(self, d):
694 707 return self._map.hastrackeddir(d)
695 708
696 709 def hasdir(self, d):
697 710 return self._map.hasdir(d)
698 711
699 712 @propertycache
700 713 def dirfoldmap(self):
701 714 f = {}
702 715 normcase = util.normcase
703 716 for name in self._map.tracked_dirs():
704 717 f[normcase(name)] = name
705 718 return f
706 719
707 720 ### code related to manipulation of entries and copy-sources
708 721
709 722 def set_tracked(self, f):
710 723 return self._map.set_tracked(f)
711 724
712 725 def set_untracked(self, f):
713 726 return self._map.set_untracked(f)
714 727
715 728 def set_clean(self, filename, mode, size, mtime):
716 729 self._map.set_clean(filename, mode, size, mtime)
717 730
718 731 def set_possibly_dirty(self, f):
719 732 self._map.set_possibly_dirty(f)
720 733
721 734 def reset_state(
722 735 self,
723 736 filename,
724 737 wc_tracked=False,
725 738 p1_tracked=False,
726 739 p2_info=False,
727 740 has_meaningful_mtime=True,
728 741 parentfiledata=None,
729 742 ):
730 743 return self._map.reset_state(
731 744 filename,
732 745 wc_tracked,
733 746 p1_tracked,
734 747 p2_info,
735 748 has_meaningful_mtime,
736 749 parentfiledata,
737 750 )
@@ -1,1939 +1,1952 b''
1 1 use bytes_cast::BytesCast;
2 2 use micro_timer::timed;
3 3 use std::borrow::Cow;
4 4 use std::path::PathBuf;
5 5
6 6 use super::on_disk;
7 7 use super::on_disk::DirstateV2ParseError;
8 8 use super::owning::OwningDirstateMap;
9 9 use super::path_with_basename::WithBasename;
10 10 use crate::dirstate::parsers::pack_entry;
11 11 use crate::dirstate::parsers::packed_entry_size;
12 12 use crate::dirstate::parsers::parse_dirstate_entries;
13 13 use crate::dirstate::CopyMapIter;
14 14 use crate::dirstate::DirstateV2Data;
15 15 use crate::dirstate::ParentFileData;
16 16 use crate::dirstate::StateMapIter;
17 17 use crate::dirstate::TruncatedTimestamp;
18 18 use crate::matchers::Matcher;
19 19 use crate::utils::hg_path::{HgPath, HgPathBuf};
20 20 use crate::DirstateEntry;
21 21 use crate::DirstateError;
22 22 use crate::DirstateMapError;
23 23 use crate::DirstateParents;
24 24 use crate::DirstateStatus;
25 25 use crate::FastHashbrownMap as FastHashMap;
26 26 use crate::PatternFileWarning;
27 27 use crate::StatusError;
28 28 use crate::StatusOptions;
29 29
30 30 /// Append to an existing data file if the amount of unreachable data (not used
31 31 /// anymore) is less than this fraction of the total amount of existing data.
32 32 const ACCEPTABLE_UNREACHABLE_BYTES_RATIO: f32 = 0.5;
33 33
34 34 #[derive(Debug, PartialEq, Eq)]
35 35 /// Version of the on-disk format
36 36 pub enum DirstateVersion {
37 37 V1,
38 38 V2,
39 39 }
40 40
41 41 #[derive(Debug, PartialEq, Eq)]
42 42 pub enum DirstateMapWriteMode {
43 43 Auto,
44 44 ForceNewDataFile,
45 45 ForceAppend,
46 46 }
47 47
48 48 #[derive(Debug)]
49 49 pub struct DirstateMap<'on_disk> {
50 50 /// Contents of the `.hg/dirstate` file
51 51 pub(super) on_disk: &'on_disk [u8],
52 52
53 53 pub(super) root: ChildNodes<'on_disk>,
54 54
55 55 /// Number of nodes anywhere in the tree that have `.entry.is_some()`.
56 56 pub(super) nodes_with_entry_count: u32,
57 57
58 58 /// Number of nodes anywhere in the tree that have
59 59 /// `.copy_source.is_some()`.
60 60 pub(super) nodes_with_copy_source_count: u32,
61 61
62 62 /// See on_disk::Header
63 63 pub(super) ignore_patterns_hash: on_disk::IgnorePatternsHash,
64 64
65 65 /// How many bytes of `on_disk` are not used anymore
66 66 pub(super) unreachable_bytes: u32,
67 67
68 68 /// Size of the data used to first load this `DirstateMap`. Used in case
69 69 /// we need to write some new metadata, but no new data on disk,
70 70 /// as well as to detect writes that have happened in another process
71 71 /// since first read.
72 72 pub(super) old_data_size: usize,
73 73
74 74 /// UUID used when first loading this `DirstateMap`. Used to check if
75 75 /// the UUID has been changed by another process since first read.
76 76 /// Can be `None` if using dirstate v1 or if it's a brand new dirstate.
77 77 pub(super) old_uuid: Option<Vec<u8>>,
78 78
79 /// Identity of the dirstate file (for dirstate-v1) or the docket file
80 /// (v2). Used to detect if the file has changed from another process.
81 /// Since it's always written atomically, we can compare the inode to
82 /// check the file identity.
83 ///
84 /// TODO On non-Unix systems, something like hashing is a possibility?
85 pub(super) identity: Option<u64>,
86
79 87 pub(super) dirstate_version: DirstateVersion,
80 88
81 89 /// Controlled by config option `devel.dirstate.v2.data_update_mode`
82 90 pub(super) write_mode: DirstateMapWriteMode,
83 91 }
84 92
85 93 /// Using a plain `HgPathBuf` of the full path from the repository root as a
86 94 /// map key would also work: all paths in a given map have the same parent
87 95 /// path, so comparing full paths gives the same result as comparing base
88 96 /// names. However `HashMap` would waste time always re-hashing the same
89 97 /// string prefix.
90 98 pub(super) type NodeKey<'on_disk> = WithBasename<Cow<'on_disk, HgPath>>;
91 99
92 100 /// Similar to `&'tree Cow<'on_disk, HgPath>`, but can also be returned
93 101 /// for on-disk nodes that don’t actually have a `Cow` to borrow.
94 102 #[derive(Debug)]
95 103 pub(super) enum BorrowedPath<'tree, 'on_disk> {
96 104 InMemory(&'tree HgPathBuf),
97 105 OnDisk(&'on_disk HgPath),
98 106 }
99 107
100 108 #[derive(Debug)]
101 109 pub(super) enum ChildNodes<'on_disk> {
102 110 InMemory(FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
103 111 OnDisk(&'on_disk [on_disk::Node]),
104 112 }
105 113
106 114 #[derive(Debug)]
107 115 pub(super) enum ChildNodesRef<'tree, 'on_disk> {
108 116 InMemory(&'tree FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
109 117 OnDisk(&'on_disk [on_disk::Node]),
110 118 }
111 119
112 120 #[derive(Debug)]
113 121 pub(super) enum NodeRef<'tree, 'on_disk> {
114 122 InMemory(&'tree NodeKey<'on_disk>, &'tree Node<'on_disk>),
115 123 OnDisk(&'on_disk on_disk::Node),
116 124 }
117 125
118 126 impl<'tree, 'on_disk> BorrowedPath<'tree, 'on_disk> {
119 127 pub fn detach_from_tree(&self) -> Cow<'on_disk, HgPath> {
120 128 match *self {
121 129 BorrowedPath::InMemory(in_memory) => Cow::Owned(in_memory.clone()),
122 130 BorrowedPath::OnDisk(on_disk) => Cow::Borrowed(on_disk),
123 131 }
124 132 }
125 133 }
126 134
127 135 impl<'tree, 'on_disk> std::ops::Deref for BorrowedPath<'tree, 'on_disk> {
128 136 type Target = HgPath;
129 137
130 138 fn deref(&self) -> &HgPath {
131 139 match *self {
132 140 BorrowedPath::InMemory(in_memory) => in_memory,
133 141 BorrowedPath::OnDisk(on_disk) => on_disk,
134 142 }
135 143 }
136 144 }
137 145
138 146 impl Default for ChildNodes<'_> {
139 147 fn default() -> Self {
140 148 ChildNodes::InMemory(Default::default())
141 149 }
142 150 }
143 151
144 152 impl<'on_disk> ChildNodes<'on_disk> {
145 153 pub(super) fn as_ref<'tree>(
146 154 &'tree self,
147 155 ) -> ChildNodesRef<'tree, 'on_disk> {
148 156 match self {
149 157 ChildNodes::InMemory(nodes) => ChildNodesRef::InMemory(nodes),
150 158 ChildNodes::OnDisk(nodes) => ChildNodesRef::OnDisk(nodes),
151 159 }
152 160 }
153 161
154 162 pub(super) fn is_empty(&self) -> bool {
155 163 match self {
156 164 ChildNodes::InMemory(nodes) => nodes.is_empty(),
157 165 ChildNodes::OnDisk(nodes) => nodes.is_empty(),
158 166 }
159 167 }
160 168
161 169 fn make_mut(
162 170 &mut self,
163 171 on_disk: &'on_disk [u8],
164 172 unreachable_bytes: &mut u32,
165 173 ) -> Result<
166 174 &mut FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>,
167 175 DirstateV2ParseError,
168 176 > {
169 177 match self {
170 178 ChildNodes::InMemory(nodes) => Ok(nodes),
171 179 ChildNodes::OnDisk(nodes) => {
172 180 *unreachable_bytes +=
173 181 std::mem::size_of_val::<[on_disk::Node]>(nodes) as u32;
174 182 let nodes = nodes
175 183 .iter()
176 184 .map(|node| {
177 185 Ok((
178 186 node.path(on_disk)?,
179 187 node.to_in_memory_node(on_disk)?,
180 188 ))
181 189 })
182 190 .collect::<Result<_, _>>()?;
183 191 *self = ChildNodes::InMemory(nodes);
184 192 match self {
185 193 ChildNodes::InMemory(nodes) => Ok(nodes),
186 194 ChildNodes::OnDisk(_) => unreachable!(),
187 195 }
188 196 }
189 197 }
190 198 }
191 199 }
192 200
193 201 impl<'tree, 'on_disk> ChildNodesRef<'tree, 'on_disk> {
194 202 pub(super) fn get(
195 203 &self,
196 204 base_name: &HgPath,
197 205 on_disk: &'on_disk [u8],
198 206 ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> {
199 207 match self {
200 208 ChildNodesRef::InMemory(nodes) => Ok(nodes
201 209 .get_key_value(base_name)
202 210 .map(|(k, v)| NodeRef::InMemory(k, v))),
203 211 ChildNodesRef::OnDisk(nodes) => {
204 212 let mut parse_result = Ok(());
205 213 let search_result = nodes.binary_search_by(|node| {
206 214 match node.base_name(on_disk) {
207 215 Ok(node_base_name) => node_base_name.cmp(base_name),
208 216 Err(e) => {
209 217 parse_result = Err(e);
210 218 // Dummy comparison result, `search_result` won’t
211 219 // be used since `parse_result` is an error
212 220 std::cmp::Ordering::Equal
213 221 }
214 222 }
215 223 });
216 224 parse_result.map(|()| {
217 225 search_result.ok().map(|i| NodeRef::OnDisk(&nodes[i]))
218 226 })
219 227 }
220 228 }
221 229 }
222 230
223 231 /// Iterate in undefined order
224 232 pub(super) fn iter(
225 233 &self,
226 234 ) -> impl Iterator<Item = NodeRef<'tree, 'on_disk>> {
227 235 match self {
228 236 ChildNodesRef::InMemory(nodes) => itertools::Either::Left(
229 237 nodes.iter().map(|(k, v)| NodeRef::InMemory(k, v)),
230 238 ),
231 239 ChildNodesRef::OnDisk(nodes) => {
232 240 itertools::Either::Right(nodes.iter().map(NodeRef::OnDisk))
233 241 }
234 242 }
235 243 }
236 244
237 245 /// Iterate in parallel in undefined order
238 246 pub(super) fn par_iter(
239 247 &self,
240 248 ) -> impl rayon::iter::ParallelIterator<Item = NodeRef<'tree, 'on_disk>>
241 249 {
242 250 use rayon::prelude::*;
243 251 match self {
244 252 ChildNodesRef::InMemory(nodes) => rayon::iter::Either::Left(
245 253 nodes.par_iter().map(|(k, v)| NodeRef::InMemory(k, v)),
246 254 ),
247 255 ChildNodesRef::OnDisk(nodes) => rayon::iter::Either::Right(
248 256 nodes.par_iter().map(NodeRef::OnDisk),
249 257 ),
250 258 }
251 259 }
252 260
253 261 pub(super) fn sorted(&self) -> Vec<NodeRef<'tree, 'on_disk>> {
254 262 match self {
255 263 ChildNodesRef::InMemory(nodes) => {
256 264 let mut vec: Vec<_> = nodes
257 265 .iter()
258 266 .map(|(k, v)| NodeRef::InMemory(k, v))
259 267 .collect();
260 268 fn sort_key<'a>(node: &'a NodeRef) -> &'a HgPath {
261 269 match node {
262 270 NodeRef::InMemory(path, _node) => path.base_name(),
263 271 NodeRef::OnDisk(_) => unreachable!(),
264 272 }
265 273 }
266 274 // `sort_unstable_by_key` doesn’t allow keys borrowing from the
267 275 // value: https://github.com/rust-lang/rust/issues/34162
268 276 vec.sort_unstable_by(|a, b| sort_key(a).cmp(sort_key(b)));
269 277 vec
270 278 }
271 279 ChildNodesRef::OnDisk(nodes) => {
272 280 // Nodes on disk are already sorted
273 281 nodes.iter().map(NodeRef::OnDisk).collect()
274 282 }
275 283 }
276 284 }
277 285 }
278 286
279 287 impl<'tree, 'on_disk> NodeRef<'tree, 'on_disk> {
280 288 pub(super) fn full_path(
281 289 &self,
282 290 on_disk: &'on_disk [u8],
283 291 ) -> Result<&'tree HgPath, DirstateV2ParseError> {
284 292 match self {
285 293 NodeRef::InMemory(path, _node) => Ok(path.full_path()),
286 294 NodeRef::OnDisk(node) => node.full_path(on_disk),
287 295 }
288 296 }
289 297
290 298 /// Returns a `BorrowedPath`, which can be turned into a `Cow<'on_disk,
291 299 /// HgPath>` detached from `'tree`
292 300 pub(super) fn full_path_borrowed(
293 301 &self,
294 302 on_disk: &'on_disk [u8],
295 303 ) -> Result<BorrowedPath<'tree, 'on_disk>, DirstateV2ParseError> {
296 304 match self {
297 305 NodeRef::InMemory(path, _node) => match path.full_path() {
298 306 Cow::Borrowed(on_disk) => Ok(BorrowedPath::OnDisk(on_disk)),
299 307 Cow::Owned(in_memory) => Ok(BorrowedPath::InMemory(in_memory)),
300 308 },
301 309 NodeRef::OnDisk(node) => {
302 310 Ok(BorrowedPath::OnDisk(node.full_path(on_disk)?))
303 311 }
304 312 }
305 313 }
306 314
307 315 pub(super) fn base_name(
308 316 &self,
309 317 on_disk: &'on_disk [u8],
310 318 ) -> Result<&'tree HgPath, DirstateV2ParseError> {
311 319 match self {
312 320 NodeRef::InMemory(path, _node) => Ok(path.base_name()),
313 321 NodeRef::OnDisk(node) => node.base_name(on_disk),
314 322 }
315 323 }
316 324
317 325 pub(super) fn children(
318 326 &self,
319 327 on_disk: &'on_disk [u8],
320 328 ) -> Result<ChildNodesRef<'tree, 'on_disk>, DirstateV2ParseError> {
321 329 match self {
322 330 NodeRef::InMemory(_path, node) => Ok(node.children.as_ref()),
323 331 NodeRef::OnDisk(node) => {
324 332 Ok(ChildNodesRef::OnDisk(node.children(on_disk)?))
325 333 }
326 334 }
327 335 }
328 336
329 337 pub(super) fn has_copy_source(&self) -> bool {
330 338 match self {
331 339 NodeRef::InMemory(_path, node) => node.copy_source.is_some(),
332 340 NodeRef::OnDisk(node) => node.has_copy_source(),
333 341 }
334 342 }
335 343
336 344 pub(super) fn copy_source(
337 345 &self,
338 346 on_disk: &'on_disk [u8],
339 347 ) -> Result<Option<&'tree HgPath>, DirstateV2ParseError> {
340 348 match self {
341 349 NodeRef::InMemory(_path, node) => {
342 350 Ok(node.copy_source.as_ref().map(|s| &**s))
343 351 }
344 352 NodeRef::OnDisk(node) => node.copy_source(on_disk),
345 353 }
346 354 }
347 355 /// Returns a `BorrowedPath`, which can be turned into a `Cow<'on_disk,
348 356 /// HgPath>` detached from `'tree`
349 357 pub(super) fn copy_source_borrowed(
350 358 &self,
351 359 on_disk: &'on_disk [u8],
352 360 ) -> Result<Option<BorrowedPath<'tree, 'on_disk>>, DirstateV2ParseError>
353 361 {
354 362 Ok(match self {
355 363 NodeRef::InMemory(_path, node) => {
356 364 node.copy_source.as_ref().map(|source| match source {
357 365 Cow::Borrowed(on_disk) => BorrowedPath::OnDisk(on_disk),
358 366 Cow::Owned(in_memory) => BorrowedPath::InMemory(in_memory),
359 367 })
360 368 }
361 369 NodeRef::OnDisk(node) => node
362 370 .copy_source(on_disk)?
363 371 .map(|source| BorrowedPath::OnDisk(source)),
364 372 })
365 373 }
366 374
367 375 pub(super) fn entry(
368 376 &self,
369 377 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
370 378 match self {
371 379 NodeRef::InMemory(_path, node) => {
372 380 Ok(node.data.as_entry().copied())
373 381 }
374 382 NodeRef::OnDisk(node) => node.entry(),
375 383 }
376 384 }
377 385
378 386 pub(super) fn cached_directory_mtime(
379 387 &self,
380 388 ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
381 389 match self {
382 390 NodeRef::InMemory(_path, node) => Ok(match node.data {
383 391 NodeData::CachedDirectory { mtime } => Some(mtime),
384 392 _ => None,
385 393 }),
386 394 NodeRef::OnDisk(node) => node.cached_directory_mtime(),
387 395 }
388 396 }
389 397
390 398 pub(super) fn descendants_with_entry_count(&self) -> u32 {
391 399 match self {
392 400 NodeRef::InMemory(_path, node) => {
393 401 node.descendants_with_entry_count
394 402 }
395 403 NodeRef::OnDisk(node) => node.descendants_with_entry_count.get(),
396 404 }
397 405 }
398 406
399 407 pub(super) fn tracked_descendants_count(&self) -> u32 {
400 408 match self {
401 409 NodeRef::InMemory(_path, node) => node.tracked_descendants_count,
402 410 NodeRef::OnDisk(node) => node.tracked_descendants_count.get(),
403 411 }
404 412 }
405 413 }
406 414
407 415 /// Represents a file or a directory
408 416 #[derive(Default, Debug)]
409 417 pub(super) struct Node<'on_disk> {
410 418 pub(super) data: NodeData,
411 419
412 420 pub(super) copy_source: Option<Cow<'on_disk, HgPath>>,
413 421
414 422 pub(super) children: ChildNodes<'on_disk>,
415 423
416 424 /// How many (non-inclusive) descendants of this node have an entry.
417 425 pub(super) descendants_with_entry_count: u32,
418 426
419 427 /// How many (non-inclusive) descendants of this node have an entry whose
420 428 /// state is "tracked".
421 429 pub(super) tracked_descendants_count: u32,
422 430 }
423 431
424 432 #[derive(Debug)]
425 433 pub(super) enum NodeData {
426 434 Entry(DirstateEntry),
427 435 CachedDirectory { mtime: TruncatedTimestamp },
428 436 None,
429 437 }
430 438
431 439 impl Default for NodeData {
432 440 fn default() -> Self {
433 441 NodeData::None
434 442 }
435 443 }
436 444
437 445 impl NodeData {
438 446 fn has_entry(&self) -> bool {
439 447 match self {
440 448 NodeData::Entry(_) => true,
441 449 _ => false,
442 450 }
443 451 }
444 452
445 453 fn as_entry(&self) -> Option<&DirstateEntry> {
446 454 match self {
447 455 NodeData::Entry(entry) => Some(entry),
448 456 _ => None,
449 457 }
450 458 }
451 459
452 460 fn as_entry_mut(&mut self) -> Option<&mut DirstateEntry> {
453 461 match self {
454 462 NodeData::Entry(entry) => Some(entry),
455 463 _ => None,
456 464 }
457 465 }
458 466 }
459 467
460 468 impl<'on_disk> DirstateMap<'on_disk> {
461 469 pub(super) fn empty(on_disk: &'on_disk [u8]) -> Self {
462 470 Self {
463 471 on_disk,
464 472 root: ChildNodes::default(),
465 473 nodes_with_entry_count: 0,
466 474 nodes_with_copy_source_count: 0,
467 475 ignore_patterns_hash: [0; on_disk::IGNORE_PATTERNS_HASH_LEN],
468 476 unreachable_bytes: 0,
469 477 old_data_size: 0,
470 478 old_uuid: None,
479 identity: None,
471 480 dirstate_version: DirstateVersion::V1,
472 481 write_mode: DirstateMapWriteMode::Auto,
473 482 }
474 483 }
475 484
476 485 #[timed]
477 486 pub fn new_v2(
478 487 on_disk: &'on_disk [u8],
479 488 data_size: usize,
480 489 metadata: &[u8],
481 490 uuid: Vec<u8>,
491 identity: Option<u64>,
482 492 ) -> Result<Self, DirstateError> {
483 493 if let Some(data) = on_disk.get(..data_size) {
484 Ok(on_disk::read(data, metadata, uuid)?)
494 Ok(on_disk::read(data, metadata, uuid, identity)?)
485 495 } else {
486 496 Err(DirstateV2ParseError::new("not enough bytes on disk").into())
487 497 }
488 498 }
489 499
490 500 #[timed]
491 501 pub fn new_v1(
492 502 on_disk: &'on_disk [u8],
503 identity: Option<u64>,
493 504 ) -> Result<(Self, Option<DirstateParents>), DirstateError> {
494 505 let mut map = Self::empty(on_disk);
495 506 if map.on_disk.is_empty() {
496 507 return Ok((map, None));
497 508 }
498 509
499 510 let parents = parse_dirstate_entries(
500 511 map.on_disk,
501 512 |path, entry, copy_source| {
502 513 let tracked = entry.tracked();
503 514 let node = Self::get_or_insert_node_inner(
504 515 map.on_disk,
505 516 &mut map.unreachable_bytes,
506 517 &mut map.root,
507 518 path,
508 519 WithBasename::to_cow_borrowed,
509 520 |ancestor| {
510 521 if tracked {
511 522 ancestor.tracked_descendants_count += 1
512 523 }
513 524 ancestor.descendants_with_entry_count += 1
514 525 },
515 526 )?;
516 527 assert!(
517 528 !node.data.has_entry(),
518 529 "duplicate dirstate entry in read"
519 530 );
520 531 assert!(
521 532 node.copy_source.is_none(),
522 533 "duplicate dirstate entry in read"
523 534 );
524 535 node.data = NodeData::Entry(*entry);
525 536 node.copy_source = copy_source.map(Cow::Borrowed);
526 537 map.nodes_with_entry_count += 1;
527 538 if copy_source.is_some() {
528 539 map.nodes_with_copy_source_count += 1
529 540 }
530 541 Ok(())
531 542 },
532 543 )?;
533 544 let parents = Some(parents.clone());
545 map.identity = identity;
534 546
535 547 Ok((map, parents))
536 548 }
537 549
538 550 /// Assuming dirstate-v2 format, returns whether the next write should
539 551 /// append to the existing data file that contains `self.on_disk` (true),
540 552 /// or create a new data file from scratch (false).
541 553 pub(super) fn write_should_append(&self) -> bool {
542 554 match self.write_mode {
543 555 DirstateMapWriteMode::ForceAppend => true,
544 556 DirstateMapWriteMode::ForceNewDataFile => false,
545 557 DirstateMapWriteMode::Auto => {
546 558 let ratio =
547 559 self.unreachable_bytes as f32 / self.on_disk.len() as f32;
548 560 ratio < ACCEPTABLE_UNREACHABLE_BYTES_RATIO
549 561 }
550 562 }
551 563 }
552 564
553 565 fn get_node<'tree>(
554 566 &'tree self,
555 567 path: &HgPath,
556 568 ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> {
557 569 let mut children = self.root.as_ref();
558 570 let mut components = path.components();
559 571 let mut component =
560 572 components.next().expect("expected at least one components");
561 573 loop {
562 574 if let Some(child) = children.get(component, self.on_disk)? {
563 575 if let Some(next_component) = components.next() {
564 576 component = next_component;
565 577 children = child.children(self.on_disk)?;
566 578 } else {
567 579 return Ok(Some(child));
568 580 }
569 581 } else {
570 582 return Ok(None);
571 583 }
572 584 }
573 585 }
574 586
575 587 /// Returns a mutable reference to the node at `path` if it exists
576 588 ///
577 589 /// `each_ancestor` is a callback that is called for each ancestor node
578 590 /// when descending the tree. It is used to keep the different counters
579 591 /// of the `DirstateMap` up-to-date.
580 592 fn get_node_mut<'tree>(
581 593 &'tree mut self,
582 594 path: &HgPath,
583 595 each_ancestor: impl FnMut(&mut Node),
584 596 ) -> Result<Option<&'tree mut Node<'on_disk>>, DirstateV2ParseError> {
585 597 Self::get_node_mut_inner(
586 598 self.on_disk,
587 599 &mut self.unreachable_bytes,
588 600 &mut self.root,
589 601 path,
590 602 each_ancestor,
591 603 )
592 604 }
593 605
594 606 /// Lower-level version of `get_node_mut`.
595 607 ///
596 608 /// This takes `root` instead of `&mut self` so that callers can mutate
597 609 /// other fields while the returned borrow is still valid.
598 610 ///
599 611 /// `each_ancestor` is a callback that is called for each ancestor node
600 612 /// when descending the tree. It is used to keep the different counters
601 613 /// of the `DirstateMap` up-to-date.
602 614 fn get_node_mut_inner<'tree>(
603 615 on_disk: &'on_disk [u8],
604 616 unreachable_bytes: &mut u32,
605 617 root: &'tree mut ChildNodes<'on_disk>,
606 618 path: &HgPath,
607 619 mut each_ancestor: impl FnMut(&mut Node),
608 620 ) -> Result<Option<&'tree mut Node<'on_disk>>, DirstateV2ParseError> {
609 621 let mut children = root;
610 622 let mut components = path.components();
611 623 let mut component =
612 624 components.next().expect("expected at least one components");
613 625 loop {
614 626 if let Some(child) = children
615 627 .make_mut(on_disk, unreachable_bytes)?
616 628 .get_mut(component)
617 629 {
618 630 if let Some(next_component) = components.next() {
619 631 each_ancestor(child);
620 632 component = next_component;
621 633 children = &mut child.children;
622 634 } else {
623 635 return Ok(Some(child));
624 636 }
625 637 } else {
626 638 return Ok(None);
627 639 }
628 640 }
629 641 }
630 642
631 643 /// Get a mutable reference to the node at `path`, creating it if it does
632 644 /// not exist.
633 645 ///
634 646 /// `each_ancestor` is a callback that is called for each ancestor node
635 647 /// when descending the tree. It is used to keep the different counters
636 648 /// of the `DirstateMap` up-to-date.
637 649 fn get_or_insert_node<'tree, 'path>(
638 650 &'tree mut self,
639 651 path: &'path HgPath,
640 652 each_ancestor: impl FnMut(&mut Node),
641 653 ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> {
642 654 Self::get_or_insert_node_inner(
643 655 self.on_disk,
644 656 &mut self.unreachable_bytes,
645 657 &mut self.root,
646 658 path,
647 659 WithBasename::to_cow_owned,
648 660 each_ancestor,
649 661 )
650 662 }
651 663
652 664 /// Lower-level version of `get_or_insert_node_inner`, which is used when
653 665 /// parsing disk data to remove allocations for new nodes.
654 666 fn get_or_insert_node_inner<'tree, 'path>(
655 667 on_disk: &'on_disk [u8],
656 668 unreachable_bytes: &mut u32,
657 669 root: &'tree mut ChildNodes<'on_disk>,
658 670 path: &'path HgPath,
659 671 to_cow: impl Fn(
660 672 WithBasename<&'path HgPath>,
661 673 ) -> WithBasename<Cow<'on_disk, HgPath>>,
662 674 mut each_ancestor: impl FnMut(&mut Node),
663 675 ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> {
664 676 let mut child_nodes = root;
665 677 let mut inclusive_ancestor_paths =
666 678 WithBasename::inclusive_ancestors_of(path);
667 679 let mut ancestor_path = inclusive_ancestor_paths
668 680 .next()
669 681 .expect("expected at least one inclusive ancestor");
670 682 loop {
671 683 let (_, child_node) = child_nodes
672 684 .make_mut(on_disk, unreachable_bytes)?
673 685 .raw_entry_mut()
674 686 .from_key(ancestor_path.base_name())
675 687 .or_insert_with(|| (to_cow(ancestor_path), Node::default()));
676 688 if let Some(next) = inclusive_ancestor_paths.next() {
677 689 each_ancestor(child_node);
678 690 ancestor_path = next;
679 691 child_nodes = &mut child_node.children;
680 692 } else {
681 693 return Ok(child_node);
682 694 }
683 695 }
684 696 }
685 697
686 698 fn reset_state(
687 699 &mut self,
688 700 filename: &HgPath,
689 701 old_entry_opt: Option<DirstateEntry>,
690 702 wc_tracked: bool,
691 703 p1_tracked: bool,
692 704 p2_info: bool,
693 705 has_meaningful_mtime: bool,
694 706 parent_file_data_opt: Option<ParentFileData>,
695 707 ) -> Result<(), DirstateError> {
696 708 let (had_entry, was_tracked) = match old_entry_opt {
697 709 Some(old_entry) => (true, old_entry.tracked()),
698 710 None => (false, false),
699 711 };
700 712 let node = self.get_or_insert_node(filename, |ancestor| {
701 713 if !had_entry {
702 714 ancestor.descendants_with_entry_count += 1;
703 715 }
704 716 if was_tracked {
705 717 if !wc_tracked {
706 718 ancestor.tracked_descendants_count = ancestor
707 719 .tracked_descendants_count
708 720 .checked_sub(1)
709 721 .expect("tracked count to be >= 0");
710 722 }
711 723 } else {
712 724 if wc_tracked {
713 725 ancestor.tracked_descendants_count += 1;
714 726 }
715 727 }
716 728 })?;
717 729
718 730 let v2_data = if let Some(parent_file_data) = parent_file_data_opt {
719 731 DirstateV2Data {
720 732 wc_tracked,
721 733 p1_tracked,
722 734 p2_info,
723 735 mode_size: parent_file_data.mode_size,
724 736 mtime: if has_meaningful_mtime {
725 737 parent_file_data.mtime
726 738 } else {
727 739 None
728 740 },
729 741 ..Default::default()
730 742 }
731 743 } else {
732 744 DirstateV2Data {
733 745 wc_tracked,
734 746 p1_tracked,
735 747 p2_info,
736 748 ..Default::default()
737 749 }
738 750 };
739 751 node.data = NodeData::Entry(DirstateEntry::from_v2_data(v2_data));
740 752 if !had_entry {
741 753 self.nodes_with_entry_count += 1;
742 754 }
743 755 Ok(())
744 756 }
745 757
746 758 fn set_tracked(
747 759 &mut self,
748 760 filename: &HgPath,
749 761 old_entry_opt: Option<DirstateEntry>,
750 762 ) -> Result<bool, DirstateV2ParseError> {
751 763 let was_tracked = old_entry_opt.map_or(false, |e| e.tracked());
752 764 let had_entry = old_entry_opt.is_some();
753 765 let tracked_count_increment = if was_tracked { 0 } else { 1 };
754 766 let mut new = false;
755 767
756 768 let node = self.get_or_insert_node(filename, |ancestor| {
757 769 if !had_entry {
758 770 ancestor.descendants_with_entry_count += 1;
759 771 }
760 772
761 773 ancestor.tracked_descendants_count += tracked_count_increment;
762 774 })?;
763 775 if let Some(old_entry) = old_entry_opt {
764 776 let mut e = old_entry.clone();
765 777 if e.tracked() {
766 778 // XXX
767 779 // This is probably overkill for more case, but we need this to
768 780 // fully replace the `normallookup` call with `set_tracked`
769 781 // one. Consider smoothing this in the future.
770 782 e.set_possibly_dirty();
771 783 } else {
772 784 new = true;
773 785 e.set_tracked();
774 786 }
775 787 node.data = NodeData::Entry(e)
776 788 } else {
777 789 node.data = NodeData::Entry(DirstateEntry::new_tracked());
778 790 self.nodes_with_entry_count += 1;
779 791 new = true;
780 792 };
781 793 Ok(new)
782 794 }
783 795
784 796 /// Set a node as untracked in the dirstate.
785 797 ///
786 798 /// It is the responsibility of the caller to remove the copy source and/or
787 799 /// the entry itself if appropriate.
788 800 ///
789 801 /// # Panics
790 802 ///
791 803 /// Panics if the node does not exist.
792 804 fn set_untracked(
793 805 &mut self,
794 806 filename: &HgPath,
795 807 old_entry: DirstateEntry,
796 808 ) -> Result<(), DirstateV2ParseError> {
797 809 let node = self
798 810 .get_node_mut(filename, |ancestor| {
799 811 ancestor.tracked_descendants_count = ancestor
800 812 .tracked_descendants_count
801 813 .checked_sub(1)
802 814 .expect("tracked_descendants_count should be >= 0");
803 815 })?
804 816 .expect("node should exist");
805 817 let mut new_entry = old_entry.clone();
806 818 new_entry.set_untracked();
807 819 node.data = NodeData::Entry(new_entry);
808 820 Ok(())
809 821 }
810 822
811 823 /// Set a node as clean in the dirstate.
812 824 ///
813 825 /// It is the responsibility of the caller to remove the copy source.
814 826 ///
815 827 /// # Panics
816 828 ///
817 829 /// Panics if the node does not exist.
818 830 fn set_clean(
819 831 &mut self,
820 832 filename: &HgPath,
821 833 old_entry: DirstateEntry,
822 834 mode: u32,
823 835 size: u32,
824 836 mtime: TruncatedTimestamp,
825 837 ) -> Result<(), DirstateError> {
826 838 let node = self
827 839 .get_node_mut(filename, |ancestor| {
828 840 if !old_entry.tracked() {
829 841 ancestor.tracked_descendants_count += 1;
830 842 }
831 843 })?
832 844 .expect("node should exist");
833 845 let mut new_entry = old_entry.clone();
834 846 new_entry.set_clean(mode, size, mtime);
835 847 node.data = NodeData::Entry(new_entry);
836 848 Ok(())
837 849 }
838 850
839 851 /// Set a node as possibly dirty in the dirstate.
840 852 ///
841 853 /// # Panics
842 854 ///
843 855 /// Panics if the node does not exist.
844 856 fn set_possibly_dirty(
845 857 &mut self,
846 858 filename: &HgPath,
847 859 ) -> Result<(), DirstateError> {
848 860 let node = self
849 861 .get_node_mut(filename, |_ancestor| {})?
850 862 .expect("node should exist");
851 863 let entry = node.data.as_entry_mut().expect("entry should exist");
852 864 entry.set_possibly_dirty();
853 865 node.data = NodeData::Entry(*entry);
854 866 Ok(())
855 867 }
856 868
857 869 /// Clears the cached mtime for the (potential) folder at `path`.
858 870 pub(super) fn clear_cached_mtime(
859 871 &mut self,
860 872 path: &HgPath,
861 873 ) -> Result<(), DirstateV2ParseError> {
862 874 let node = match self.get_node_mut(path, |_ancestor| {})? {
863 875 Some(node) => node,
864 876 None => return Ok(()),
865 877 };
866 878 if let NodeData::CachedDirectory { .. } = &node.data {
867 879 node.data = NodeData::None
868 880 }
869 881 Ok(())
870 882 }
871 883
872 884 /// Sets the cached mtime for the (potential) folder at `path`.
873 885 pub(super) fn set_cached_mtime(
874 886 &mut self,
875 887 path: &HgPath,
876 888 mtime: TruncatedTimestamp,
877 889 ) -> Result<(), DirstateV2ParseError> {
878 890 let node = match self.get_node_mut(path, |_ancestor| {})? {
879 891 Some(node) => node,
880 892 None => return Ok(()),
881 893 };
882 894 match &node.data {
883 895 NodeData::Entry(_) => {} // Don’t overwrite an entry
884 896 NodeData::CachedDirectory { .. } | NodeData::None => {
885 897 node.data = NodeData::CachedDirectory { mtime }
886 898 }
887 899 }
888 900 Ok(())
889 901 }
890 902
891 903 fn iter_nodes<'tree>(
892 904 &'tree self,
893 905 ) -> impl Iterator<
894 906 Item = Result<NodeRef<'tree, 'on_disk>, DirstateV2ParseError>,
895 907 > + 'tree {
896 908 // Depth first tree traversal.
897 909 //
898 910 // If we could afford internal iteration and recursion,
899 911 // this would look like:
900 912 //
901 913 // ```
902 914 // fn traverse_children(
903 915 // children: &ChildNodes,
904 916 // each: &mut impl FnMut(&Node),
905 917 // ) {
906 918 // for child in children.values() {
907 919 // traverse_children(&child.children, each);
908 920 // each(child);
909 921 // }
910 922 // }
911 923 // ```
912 924 //
913 925 // However we want an external iterator and therefore can’t use the
914 926 // call stack. Use an explicit stack instead:
915 927 let mut stack = Vec::new();
916 928 let mut iter = self.root.as_ref().iter();
917 929 std::iter::from_fn(move || {
918 930 while let Some(child_node) = iter.next() {
919 931 let children = match child_node.children(self.on_disk) {
920 932 Ok(children) => children,
921 933 Err(error) => return Some(Err(error)),
922 934 };
923 935 // Pseudo-recursion
924 936 let new_iter = children.iter();
925 937 let old_iter = std::mem::replace(&mut iter, new_iter);
926 938 stack.push((child_node, old_iter));
927 939 }
928 940 // Found the end of a `children.iter()` iterator.
929 941 if let Some((child_node, next_iter)) = stack.pop() {
930 942 // "Return" from pseudo-recursion by restoring state from the
931 943 // explicit stack
932 944 iter = next_iter;
933 945
934 946 Some(Ok(child_node))
935 947 } else {
936 948 // Reached the bottom of the stack, we’re done
937 949 None
938 950 }
939 951 })
940 952 }
941 953
942 954 fn count_dropped_path(unreachable_bytes: &mut u32, path: &Cow<HgPath>) {
943 955 if let Cow::Borrowed(path) = path {
944 956 *unreachable_bytes += path.len() as u32
945 957 }
946 958 }
947 959
948 960 pub(crate) fn set_write_mode(&mut self, write_mode: DirstateMapWriteMode) {
949 961 self.write_mode = write_mode;
950 962 }
951 963 }
952 964
953 965 /// Like `Iterator::filter_map`, but over a fallible iterator of `Result`s.
954 966 ///
955 967 /// The callback is only called for incoming `Ok` values. Errors are passed
956 968 /// through as-is. In order to let it use the `?` operator the callback is
957 969 /// expected to return a `Result` of `Option`, instead of an `Option` of
958 970 /// `Result`.
959 971 fn filter_map_results<'a, I, F, A, B, E>(
960 972 iter: I,
961 973 f: F,
962 974 ) -> impl Iterator<Item = Result<B, E>> + 'a
963 975 where
964 976 I: Iterator<Item = Result<A, E>> + 'a,
965 977 F: Fn(A) -> Result<Option<B>, E> + 'a,
966 978 {
967 979 iter.filter_map(move |result| match result {
968 980 Ok(node) => f(node).transpose(),
969 981 Err(e) => Some(Err(e)),
970 982 })
971 983 }
972 984
973 985 impl OwningDirstateMap {
974 986 pub fn clear(&mut self) {
975 987 self.with_dmap_mut(|map| {
976 988 map.root = Default::default();
977 989 map.nodes_with_entry_count = 0;
978 990 map.nodes_with_copy_source_count = 0;
979 991 });
980 992 }
981 993
982 994 pub fn set_tracked(
983 995 &mut self,
984 996 filename: &HgPath,
985 997 ) -> Result<bool, DirstateV2ParseError> {
986 998 let old_entry_opt = self.get(filename)?;
987 999 self.with_dmap_mut(|map| map.set_tracked(filename, old_entry_opt))
988 1000 }
989 1001
990 1002 pub fn set_untracked(
991 1003 &mut self,
992 1004 filename: &HgPath,
993 1005 ) -> Result<bool, DirstateError> {
994 1006 let old_entry_opt = self.get(filename)?;
995 1007 match old_entry_opt {
996 1008 None => Ok(false),
997 1009 Some(old_entry) => {
998 1010 if !old_entry.tracked() {
999 1011 // `DirstateMap::set_untracked` is not a noop if
1000 1012 // already not tracked as it will decrement the
1001 1013 // tracked counters while going down.
1002 1014 return Ok(true);
1003 1015 }
1004 1016 if old_entry.added() {
1005 1017 // Untracking an "added" entry will just result in a
1006 1018 // worthless entry (and other parts of the code will
1007 1019 // complain about it), just drop it entirely.
1008 1020 self.drop_entry_and_copy_source(filename)?;
1009 1021 return Ok(true);
1010 1022 }
1011 1023 if !old_entry.p2_info() {
1012 1024 self.copy_map_remove(filename)?;
1013 1025 }
1014 1026
1015 1027 self.with_dmap_mut(|map| {
1016 1028 map.set_untracked(filename, old_entry)?;
1017 1029 Ok(true)
1018 1030 })
1019 1031 }
1020 1032 }
1021 1033 }
1022 1034
1023 1035 pub fn set_clean(
1024 1036 &mut self,
1025 1037 filename: &HgPath,
1026 1038 mode: u32,
1027 1039 size: u32,
1028 1040 mtime: TruncatedTimestamp,
1029 1041 ) -> Result<(), DirstateError> {
1030 1042 let old_entry = match self.get(filename)? {
1031 1043 None => {
1032 1044 return Err(
1033 1045 DirstateMapError::PathNotFound(filename.into()).into()
1034 1046 )
1035 1047 }
1036 1048 Some(e) => e,
1037 1049 };
1038 1050 self.copy_map_remove(filename)?;
1039 1051 self.with_dmap_mut(|map| {
1040 1052 map.set_clean(filename, old_entry, mode, size, mtime)
1041 1053 })
1042 1054 }
1043 1055
1044 1056 pub fn set_possibly_dirty(
1045 1057 &mut self,
1046 1058 filename: &HgPath,
1047 1059 ) -> Result<(), DirstateError> {
1048 1060 if self.get(filename)?.is_none() {
1049 1061 return Err(DirstateMapError::PathNotFound(filename.into()).into());
1050 1062 }
1051 1063 self.with_dmap_mut(|map| map.set_possibly_dirty(filename))
1052 1064 }
1053 1065
1054 1066 pub fn reset_state(
1055 1067 &mut self,
1056 1068 filename: &HgPath,
1057 1069 wc_tracked: bool,
1058 1070 p1_tracked: bool,
1059 1071 p2_info: bool,
1060 1072 has_meaningful_mtime: bool,
1061 1073 parent_file_data_opt: Option<ParentFileData>,
1062 1074 ) -> Result<(), DirstateError> {
1063 1075 if !(p1_tracked || p2_info || wc_tracked) {
1064 1076 self.drop_entry_and_copy_source(filename)?;
1065 1077 return Ok(());
1066 1078 }
1067 1079 self.copy_map_remove(filename)?;
1068 1080 let old_entry_opt = self.get(filename)?;
1069 1081 self.with_dmap_mut(|map| {
1070 1082 map.reset_state(
1071 1083 filename,
1072 1084 old_entry_opt,
1073 1085 wc_tracked,
1074 1086 p1_tracked,
1075 1087 p2_info,
1076 1088 has_meaningful_mtime,
1077 1089 parent_file_data_opt,
1078 1090 )
1079 1091 })
1080 1092 }
1081 1093
1082 1094 pub fn drop_entry_and_copy_source(
1083 1095 &mut self,
1084 1096 filename: &HgPath,
1085 1097 ) -> Result<(), DirstateError> {
1086 1098 let was_tracked = self.get(filename)?.map_or(false, |e| e.tracked());
1087 1099 struct Dropped {
1088 1100 was_tracked: bool,
1089 1101 had_entry: bool,
1090 1102 had_copy_source: bool,
1091 1103 }
1092 1104
1093 1105 /// If this returns `Ok(Some((dropped, removed)))`, then
1094 1106 ///
1095 1107 /// * `dropped` is about the leaf node that was at `filename`
1096 1108 /// * `removed` is whether this particular level of recursion just
1097 1109 /// removed a node in `nodes`.
1098 1110 fn recur<'on_disk>(
1099 1111 on_disk: &'on_disk [u8],
1100 1112 unreachable_bytes: &mut u32,
1101 1113 nodes: &mut ChildNodes<'on_disk>,
1102 1114 path: &HgPath,
1103 1115 ) -> Result<Option<(Dropped, bool)>, DirstateV2ParseError> {
1104 1116 let (first_path_component, rest_of_path) =
1105 1117 path.split_first_component();
1106 1118 let nodes = nodes.make_mut(on_disk, unreachable_bytes)?;
1107 1119 let node = if let Some(node) = nodes.get_mut(first_path_component)
1108 1120 {
1109 1121 node
1110 1122 } else {
1111 1123 return Ok(None);
1112 1124 };
1113 1125 let dropped;
1114 1126 if let Some(rest) = rest_of_path {
1115 1127 if let Some((d, removed)) = recur(
1116 1128 on_disk,
1117 1129 unreachable_bytes,
1118 1130 &mut node.children,
1119 1131 rest,
1120 1132 )? {
1121 1133 dropped = d;
1122 1134 if dropped.had_entry {
1123 1135 node.descendants_with_entry_count = node
1124 1136 .descendants_with_entry_count
1125 1137 .checked_sub(1)
1126 1138 .expect(
1127 1139 "descendants_with_entry_count should be >= 0",
1128 1140 );
1129 1141 }
1130 1142 if dropped.was_tracked {
1131 1143 node.tracked_descendants_count = node
1132 1144 .tracked_descendants_count
1133 1145 .checked_sub(1)
1134 1146 .expect(
1135 1147 "tracked_descendants_count should be >= 0",
1136 1148 );
1137 1149 }
1138 1150
1139 1151 // Directory caches must be invalidated when removing a
1140 1152 // child node
1141 1153 if removed {
1142 1154 if let NodeData::CachedDirectory { .. } = &node.data {
1143 1155 node.data = NodeData::None
1144 1156 }
1145 1157 }
1146 1158 } else {
1147 1159 return Ok(None);
1148 1160 }
1149 1161 } else {
1150 1162 let entry = node.data.as_entry();
1151 1163 let was_tracked = entry.map_or(false, |entry| entry.tracked());
1152 1164 let had_entry = entry.is_some();
1153 1165 if had_entry {
1154 1166 node.data = NodeData::None
1155 1167 }
1156 1168 let mut had_copy_source = false;
1157 1169 if let Some(source) = &node.copy_source {
1158 1170 DirstateMap::count_dropped_path(unreachable_bytes, source);
1159 1171 had_copy_source = true;
1160 1172 node.copy_source = None
1161 1173 }
1162 1174 dropped = Dropped {
1163 1175 was_tracked,
1164 1176 had_entry,
1165 1177 had_copy_source,
1166 1178 };
1167 1179 }
1168 1180 // After recursion, for both leaf (rest_of_path is None) nodes and
1169 1181 // parent nodes, remove a node if it just became empty.
1170 1182 let remove = !node.data.has_entry()
1171 1183 && node.copy_source.is_none()
1172 1184 && node.children.is_empty();
1173 1185 if remove {
1174 1186 let (key, _) =
1175 1187 nodes.remove_entry(first_path_component).unwrap();
1176 1188 DirstateMap::count_dropped_path(
1177 1189 unreachable_bytes,
1178 1190 key.full_path(),
1179 1191 )
1180 1192 }
1181 1193 Ok(Some((dropped, remove)))
1182 1194 }
1183 1195
1184 1196 self.with_dmap_mut(|map| {
1185 1197 if let Some((dropped, _removed)) = recur(
1186 1198 map.on_disk,
1187 1199 &mut map.unreachable_bytes,
1188 1200 &mut map.root,
1189 1201 filename,
1190 1202 )? {
1191 1203 if dropped.had_entry {
1192 1204 map.nodes_with_entry_count = map
1193 1205 .nodes_with_entry_count
1194 1206 .checked_sub(1)
1195 1207 .expect("nodes_with_entry_count should be >= 0");
1196 1208 }
1197 1209 if dropped.had_copy_source {
1198 1210 map.nodes_with_copy_source_count = map
1199 1211 .nodes_with_copy_source_count
1200 1212 .checked_sub(1)
1201 1213 .expect("nodes_with_copy_source_count should be >= 0");
1202 1214 }
1203 1215 } else {
1204 1216 debug_assert!(!was_tracked);
1205 1217 }
1206 1218 Ok(())
1207 1219 })
1208 1220 }
1209 1221
1210 1222 pub fn has_tracked_dir(
1211 1223 &mut self,
1212 1224 directory: &HgPath,
1213 1225 ) -> Result<bool, DirstateError> {
1214 1226 self.with_dmap_mut(|map| {
1215 1227 if let Some(node) = map.get_node(directory)? {
1216 1228 // A node without a `DirstateEntry` was created to hold child
1217 1229 // nodes, and is therefore a directory.
1218 1230 let is_dir = node.entry()?.is_none();
1219 1231 Ok(is_dir && node.tracked_descendants_count() > 0)
1220 1232 } else {
1221 1233 Ok(false)
1222 1234 }
1223 1235 })
1224 1236 }
1225 1237
1226 1238 pub fn has_dir(
1227 1239 &mut self,
1228 1240 directory: &HgPath,
1229 1241 ) -> Result<bool, DirstateError> {
1230 1242 self.with_dmap_mut(|map| {
1231 1243 if let Some(node) = map.get_node(directory)? {
1232 1244 // A node without a `DirstateEntry` was created to hold child
1233 1245 // nodes, and is therefore a directory.
1234 1246 let is_dir = node.entry()?.is_none();
1235 1247 Ok(is_dir && node.descendants_with_entry_count() > 0)
1236 1248 } else {
1237 1249 Ok(false)
1238 1250 }
1239 1251 })
1240 1252 }
1241 1253
1242 1254 #[timed]
1243 1255 pub fn pack_v1(
1244 1256 &self,
1245 1257 parents: DirstateParents,
1246 1258 ) -> Result<Vec<u8>, DirstateError> {
1247 1259 let map = self.get_map();
1248 1260 // Optizimation (to be measured?): pre-compute size to avoid `Vec`
1249 1261 // reallocations
1250 1262 let mut size = parents.as_bytes().len();
1251 1263 for node in map.iter_nodes() {
1252 1264 let node = node?;
1253 1265 if node.entry()?.is_some() {
1254 1266 size += packed_entry_size(
1255 1267 node.full_path(map.on_disk)?,
1256 1268 node.copy_source(map.on_disk)?,
1257 1269 );
1258 1270 }
1259 1271 }
1260 1272
1261 1273 let mut packed = Vec::with_capacity(size);
1262 1274 packed.extend(parents.as_bytes());
1263 1275
1264 1276 for node in map.iter_nodes() {
1265 1277 let node = node?;
1266 1278 if let Some(entry) = node.entry()? {
1267 1279 pack_entry(
1268 1280 node.full_path(map.on_disk)?,
1269 1281 &entry,
1270 1282 node.copy_source(map.on_disk)?,
1271 1283 &mut packed,
1272 1284 );
1273 1285 }
1274 1286 }
1275 1287 Ok(packed)
1276 1288 }
1277 1289
1278 1290 /// Returns new data and metadata together with whether that data should be
1279 1291 /// appended to the existing data file whose content is at
1280 1292 /// `map.on_disk` (true), instead of written to a new data file
1281 1293 /// (false), and the previous size of data on disk.
1282 1294 #[timed]
1283 1295 pub fn pack_v2(
1284 1296 &self,
1285 1297 write_mode: DirstateMapWriteMode,
1286 1298 ) -> Result<(Vec<u8>, on_disk::TreeMetadata, bool, usize), DirstateError>
1287 1299 {
1288 1300 let map = self.get_map();
1289 1301 on_disk::write(map, write_mode)
1290 1302 }
1291 1303
1292 1304 /// `callback` allows the caller to process and do something with the
1293 1305 /// results of the status. This is needed to do so efficiently (i.e.
1294 1306 /// without cloning the `DirstateStatus` object with its paths) because
1295 1307 /// we need to borrow from `Self`.
1296 1308 pub fn with_status<R>(
1297 1309 &mut self,
1298 1310 matcher: &(dyn Matcher + Sync),
1299 1311 root_dir: PathBuf,
1300 1312 ignore_files: Vec<PathBuf>,
1301 1313 options: StatusOptions,
1302 1314 callback: impl for<'r> FnOnce(
1303 1315 Result<(DirstateStatus<'r>, Vec<PatternFileWarning>), StatusError>,
1304 1316 ) -> R,
1305 1317 ) -> R {
1306 1318 self.with_dmap_mut(|map| {
1307 1319 callback(super::status::status(
1308 1320 map,
1309 1321 matcher,
1310 1322 root_dir,
1311 1323 ignore_files,
1312 1324 options,
1313 1325 ))
1314 1326 })
1315 1327 }
1316 1328
1317 1329 pub fn copy_map_len(&self) -> usize {
1318 1330 let map = self.get_map();
1319 1331 map.nodes_with_copy_source_count as usize
1320 1332 }
1321 1333
1322 1334 pub fn copy_map_iter(&self) -> CopyMapIter<'_> {
1323 1335 let map = self.get_map();
1324 1336 Box::new(filter_map_results(map.iter_nodes(), move |node| {
1325 1337 Ok(if let Some(source) = node.copy_source(map.on_disk)? {
1326 1338 Some((node.full_path(map.on_disk)?, source))
1327 1339 } else {
1328 1340 None
1329 1341 })
1330 1342 }))
1331 1343 }
1332 1344
1333 1345 pub fn copy_map_contains_key(
1334 1346 &self,
1335 1347 key: &HgPath,
1336 1348 ) -> Result<bool, DirstateV2ParseError> {
1337 1349 let map = self.get_map();
1338 1350 Ok(if let Some(node) = map.get_node(key)? {
1339 1351 node.has_copy_source()
1340 1352 } else {
1341 1353 false
1342 1354 })
1343 1355 }
1344 1356
1345 1357 pub fn copy_map_get(
1346 1358 &self,
1347 1359 key: &HgPath,
1348 1360 ) -> Result<Option<&HgPath>, DirstateV2ParseError> {
1349 1361 let map = self.get_map();
1350 1362 if let Some(node) = map.get_node(key)? {
1351 1363 if let Some(source) = node.copy_source(map.on_disk)? {
1352 1364 return Ok(Some(source));
1353 1365 }
1354 1366 }
1355 1367 Ok(None)
1356 1368 }
1357 1369
1358 1370 pub fn copy_map_remove(
1359 1371 &mut self,
1360 1372 key: &HgPath,
1361 1373 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
1362 1374 self.with_dmap_mut(|map| {
1363 1375 let count = &mut map.nodes_with_copy_source_count;
1364 1376 let unreachable_bytes = &mut map.unreachable_bytes;
1365 1377 Ok(DirstateMap::get_node_mut_inner(
1366 1378 map.on_disk,
1367 1379 unreachable_bytes,
1368 1380 &mut map.root,
1369 1381 key,
1370 1382 |_ancestor| {},
1371 1383 )?
1372 1384 .and_then(|node| {
1373 1385 if let Some(source) = &node.copy_source {
1374 1386 *count = count
1375 1387 .checked_sub(1)
1376 1388 .expect("nodes_with_copy_source_count should be >= 0");
1377 1389 DirstateMap::count_dropped_path(unreachable_bytes, source);
1378 1390 }
1379 1391 node.copy_source.take().map(Cow::into_owned)
1380 1392 }))
1381 1393 })
1382 1394 }
1383 1395
1384 1396 pub fn copy_map_insert(
1385 1397 &mut self,
1386 1398 key: &HgPath,
1387 1399 value: &HgPath,
1388 1400 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
1389 1401 self.with_dmap_mut(|map| {
1390 1402 let node = map.get_or_insert_node(&key, |_ancestor| {})?;
1391 1403 let had_copy_source = node.copy_source.is_none();
1392 1404 let old = node
1393 1405 .copy_source
1394 1406 .replace(value.to_owned().into())
1395 1407 .map(Cow::into_owned);
1396 1408 if had_copy_source {
1397 1409 map.nodes_with_copy_source_count += 1
1398 1410 }
1399 1411 Ok(old)
1400 1412 })
1401 1413 }
1402 1414
1403 1415 pub fn len(&self) -> usize {
1404 1416 let map = self.get_map();
1405 1417 map.nodes_with_entry_count as usize
1406 1418 }
1407 1419
1408 1420 pub fn contains_key(
1409 1421 &self,
1410 1422 key: &HgPath,
1411 1423 ) -> Result<bool, DirstateV2ParseError> {
1412 1424 Ok(self.get(key)?.is_some())
1413 1425 }
1414 1426
1415 1427 pub fn get(
1416 1428 &self,
1417 1429 key: &HgPath,
1418 1430 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
1419 1431 let map = self.get_map();
1420 1432 Ok(if let Some(node) = map.get_node(key)? {
1421 1433 node.entry()?
1422 1434 } else {
1423 1435 None
1424 1436 })
1425 1437 }
1426 1438
1427 1439 pub fn iter(&self) -> StateMapIter<'_> {
1428 1440 let map = self.get_map();
1429 1441 Box::new(filter_map_results(map.iter_nodes(), move |node| {
1430 1442 Ok(if let Some(entry) = node.entry()? {
1431 1443 Some((node.full_path(map.on_disk)?, entry))
1432 1444 } else {
1433 1445 None
1434 1446 })
1435 1447 }))
1436 1448 }
1437 1449
1438 1450 pub fn iter_tracked_dirs(
1439 1451 &mut self,
1440 1452 ) -> Result<
1441 1453 Box<
1442 1454 dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>>
1443 1455 + Send
1444 1456 + '_,
1445 1457 >,
1446 1458 DirstateError,
1447 1459 > {
1448 1460 let map = self.get_map();
1449 1461 let on_disk = map.on_disk;
1450 1462 Ok(Box::new(filter_map_results(
1451 1463 map.iter_nodes(),
1452 1464 move |node| {
1453 1465 Ok(if node.tracked_descendants_count() > 0 {
1454 1466 Some(node.full_path(on_disk)?)
1455 1467 } else {
1456 1468 None
1457 1469 })
1458 1470 },
1459 1471 )))
1460 1472 }
1461 1473
1462 1474 /// Only public because it needs to be exposed to the Python layer.
1463 1475 /// It is not the full `setparents` logic, only the parts that mutate the
1464 1476 /// entries.
1465 1477 pub fn setparents_fixup(
1466 1478 &mut self,
1467 1479 ) -> Result<Vec<(HgPathBuf, HgPathBuf)>, DirstateV2ParseError> {
1468 1480 // XXX
1469 1481 // All the copying and re-querying is quite inefficient, but this is
1470 1482 // still a lot better than doing it from Python.
1471 1483 //
1472 1484 // The better solution is to develop a mechanism for `iter_mut`,
1473 1485 // which will be a lot more involved: we're dealing with a lazy,
1474 1486 // append-mostly, tree-like data structure. This will do for now.
1475 1487 let mut copies = vec![];
1476 1488 let mut files_with_p2_info = vec![];
1477 1489 for res in self.iter() {
1478 1490 let (path, entry) = res?;
1479 1491 if entry.p2_info() {
1480 1492 files_with_p2_info.push(path.to_owned())
1481 1493 }
1482 1494 }
1483 1495 self.with_dmap_mut(|map| {
1484 1496 for path in files_with_p2_info.iter() {
1485 1497 let node = map.get_or_insert_node(path, |_| {})?;
1486 1498 let entry =
1487 1499 node.data.as_entry_mut().expect("entry should exist");
1488 1500 entry.drop_merge_data();
1489 1501 if let Some(source) = node.copy_source.take().as_deref() {
1490 1502 copies.push((path.to_owned(), source.to_owned()));
1491 1503 }
1492 1504 }
1493 1505 Ok(copies)
1494 1506 })
1495 1507 }
1496 1508
1497 1509 pub fn debug_iter(
1498 1510 &self,
1499 1511 all: bool,
1500 1512 ) -> Box<
1501 1513 dyn Iterator<
1502 1514 Item = Result<
1503 1515 (&HgPath, (u8, i32, i32, i32)),
1504 1516 DirstateV2ParseError,
1505 1517 >,
1506 1518 > + Send
1507 1519 + '_,
1508 1520 > {
1509 1521 let map = self.get_map();
1510 1522 Box::new(filter_map_results(map.iter_nodes(), move |node| {
1511 1523 let debug_tuple = if let Some(entry) = node.entry()? {
1512 1524 entry.debug_tuple()
1513 1525 } else if !all {
1514 1526 return Ok(None);
1515 1527 } else if let Some(mtime) = node.cached_directory_mtime()? {
1516 1528 (b' ', 0, -1, mtime.truncated_seconds() as i32)
1517 1529 } else {
1518 1530 (b' ', 0, -1, -1)
1519 1531 };
1520 1532 Ok(Some((node.full_path(map.on_disk)?, debug_tuple)))
1521 1533 }))
1522 1534 }
1523 1535 }
1524 1536 #[cfg(test)]
1525 1537 mod tests {
1526 1538 use super::*;
1527 1539
1528 1540 /// Shortcut to return tracked descendants of a path.
1529 1541 /// Panics if the path does not exist.
1530 1542 fn tracked_descendants(map: &OwningDirstateMap, path: &[u8]) -> u32 {
1531 1543 let path = dbg!(HgPath::new(path));
1532 1544 let node = map.get_map().get_node(path);
1533 1545 node.unwrap().unwrap().tracked_descendants_count()
1534 1546 }
1535 1547
1536 1548 /// Shortcut to return descendants with an entry.
1537 1549 /// Panics if the path does not exist.
1538 1550 fn descendants_with_an_entry(map: &OwningDirstateMap, path: &[u8]) -> u32 {
1539 1551 let path = dbg!(HgPath::new(path));
1540 1552 let node = map.get_map().get_node(path);
1541 1553 node.unwrap().unwrap().descendants_with_entry_count()
1542 1554 }
1543 1555
1544 1556 fn assert_does_not_exist(map: &OwningDirstateMap, path: &[u8]) {
1545 1557 let path = dbg!(HgPath::new(path));
1546 1558 let node = map.get_map().get_node(path);
1547 1559 assert!(node.unwrap().is_none());
1548 1560 }
1549 1561
1550 1562 /// Shortcut for path creation in tests
1551 1563 fn p(b: &[u8]) -> &HgPath {
1552 1564 HgPath::new(b)
1553 1565 }
1554 1566
1555 1567 /// Test the very simple case a single tracked file
1556 1568 #[test]
1557 1569 fn test_tracked_descendants_simple() -> Result<(), DirstateError> {
1558 1570 let mut map = OwningDirstateMap::new_empty(vec![]);
1559 1571 assert_eq!(map.len(), 0);
1560 1572
1561 1573 map.set_tracked(p(b"some/nested/path"))?;
1562 1574
1563 1575 assert_eq!(map.len(), 1);
1564 1576 assert_eq!(tracked_descendants(&map, b"some"), 1);
1565 1577 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1566 1578 assert_eq!(tracked_descendants(&map, b"some/nested/path"), 0);
1567 1579
1568 1580 map.set_untracked(p(b"some/nested/path"))?;
1569 1581 assert_eq!(map.len(), 0);
1570 1582 assert!(map.get_map().get_node(p(b"some"))?.is_none());
1571 1583
1572 1584 Ok(())
1573 1585 }
1574 1586
1575 1587 /// Test the simple case of all tracked, but multiple files
1576 1588 #[test]
1577 1589 fn test_tracked_descendants_multiple() -> Result<(), DirstateError> {
1578 1590 let mut map = OwningDirstateMap::new_empty(vec![]);
1579 1591
1580 1592 map.set_tracked(p(b"some/nested/path"))?;
1581 1593 map.set_tracked(p(b"some/nested/file"))?;
1582 1594 // one layer without any files to test deletion cascade
1583 1595 map.set_tracked(p(b"some/other/nested/path"))?;
1584 1596 map.set_tracked(p(b"root_file"))?;
1585 1597 map.set_tracked(p(b"some/file"))?;
1586 1598 map.set_tracked(p(b"some/file2"))?;
1587 1599 map.set_tracked(p(b"some/file3"))?;
1588 1600
1589 1601 assert_eq!(map.len(), 7);
1590 1602 assert_eq!(tracked_descendants(&map, b"some"), 6);
1591 1603 assert_eq!(tracked_descendants(&map, b"some/nested"), 2);
1592 1604 assert_eq!(tracked_descendants(&map, b"some/other"), 1);
1593 1605 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1594 1606 assert_eq!(tracked_descendants(&map, b"some/nested/path"), 0);
1595 1607
1596 1608 map.set_untracked(p(b"some/nested/path"))?;
1597 1609 assert_eq!(map.len(), 6);
1598 1610 assert_eq!(tracked_descendants(&map, b"some"), 5);
1599 1611 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1600 1612 assert_eq!(tracked_descendants(&map, b"some/other"), 1);
1601 1613 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1602 1614
1603 1615 map.set_untracked(p(b"some/nested/file"))?;
1604 1616 assert_eq!(map.len(), 5);
1605 1617 assert_eq!(tracked_descendants(&map, b"some"), 4);
1606 1618 assert_eq!(tracked_descendants(&map, b"some/other"), 1);
1607 1619 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1608 1620 assert_does_not_exist(&map, b"some_nested");
1609 1621
1610 1622 map.set_untracked(p(b"some/other/nested/path"))?;
1611 1623 assert_eq!(map.len(), 4);
1612 1624 assert_eq!(tracked_descendants(&map, b"some"), 3);
1613 1625 assert_does_not_exist(&map, b"some/other");
1614 1626
1615 1627 map.set_untracked(p(b"root_file"))?;
1616 1628 assert_eq!(map.len(), 3);
1617 1629 assert_eq!(tracked_descendants(&map, b"some"), 3);
1618 1630 assert_does_not_exist(&map, b"root_file");
1619 1631
1620 1632 map.set_untracked(p(b"some/file"))?;
1621 1633 assert_eq!(map.len(), 2);
1622 1634 assert_eq!(tracked_descendants(&map, b"some"), 2);
1623 1635 assert_does_not_exist(&map, b"some/file");
1624 1636
1625 1637 map.set_untracked(p(b"some/file2"))?;
1626 1638 assert_eq!(map.len(), 1);
1627 1639 assert_eq!(tracked_descendants(&map, b"some"), 1);
1628 1640 assert_does_not_exist(&map, b"some/file2");
1629 1641
1630 1642 map.set_untracked(p(b"some/file3"))?;
1631 1643 assert_eq!(map.len(), 0);
1632 1644 assert_does_not_exist(&map, b"some/file3");
1633 1645
1634 1646 Ok(())
1635 1647 }
1636 1648
1637 1649 /// Check with a mix of tracked and non-tracked items
1638 1650 #[test]
1639 1651 fn test_tracked_descendants_different() -> Result<(), DirstateError> {
1640 1652 let mut map = OwningDirstateMap::new_empty(vec![]);
1641 1653
1642 1654 // A file that was just added
1643 1655 map.set_tracked(p(b"some/nested/path"))?;
1644 1656 // This has no information, the dirstate should ignore it
1645 1657 map.reset_state(p(b"some/file"), false, false, false, false, None)?;
1646 1658 assert_does_not_exist(&map, b"some/file");
1647 1659
1648 1660 // A file that was removed
1649 1661 map.reset_state(
1650 1662 p(b"some/nested/file"),
1651 1663 false,
1652 1664 true,
1653 1665 false,
1654 1666 false,
1655 1667 None,
1656 1668 )?;
1657 1669 assert!(!map.get(p(b"some/nested/file"))?.unwrap().tracked());
1658 1670 // Only present in p2
1659 1671 map.reset_state(p(b"some/file3"), false, false, true, false, None)?;
1660 1672 assert!(!map.get(p(b"some/file3"))?.unwrap().tracked());
1661 1673 // A file that was merged
1662 1674 map.reset_state(p(b"root_file"), true, true, true, false, None)?;
1663 1675 assert!(map.get(p(b"root_file"))?.unwrap().tracked());
1664 1676 // A file that is added, with info from p2
1665 1677 // XXX is that actually possible?
1666 1678 map.reset_state(p(b"some/file2"), true, false, true, false, None)?;
1667 1679 assert!(map.get(p(b"some/file2"))?.unwrap().tracked());
1668 1680 // A clean file
1669 1681 // One layer without any files to test deletion cascade
1670 1682 map.reset_state(
1671 1683 p(b"some/other/nested/path"),
1672 1684 true,
1673 1685 true,
1674 1686 false,
1675 1687 false,
1676 1688 None,
1677 1689 )?;
1678 1690 assert!(map.get(p(b"some/other/nested/path"))?.unwrap().tracked());
1679 1691
1680 1692 assert_eq!(map.len(), 6);
1681 1693 assert_eq!(tracked_descendants(&map, b"some"), 3);
1682 1694 assert_eq!(descendants_with_an_entry(&map, b"some"), 5);
1683 1695 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1684 1696 assert_eq!(descendants_with_an_entry(&map, b"some/other/nested"), 1);
1685 1697 assert_eq!(tracked_descendants(&map, b"some/other/nested/path"), 0);
1686 1698 assert_eq!(
1687 1699 descendants_with_an_entry(&map, b"some/other/nested/path"),
1688 1700 0
1689 1701 );
1690 1702 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1691 1703 assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 2);
1692 1704
1693 1705 // might as well check this
1694 1706 map.set_untracked(p(b"path/does/not/exist"))?;
1695 1707 assert_eq!(map.len(), 6);
1696 1708
1697 1709 map.set_untracked(p(b"some/other/nested/path"))?;
1698 1710 // It is set untracked but not deleted since it held other information
1699 1711 assert_eq!(map.len(), 6);
1700 1712 assert_eq!(tracked_descendants(&map, b"some"), 2);
1701 1713 assert_eq!(descendants_with_an_entry(&map, b"some"), 5);
1702 1714 assert_eq!(descendants_with_an_entry(&map, b"some/other"), 1);
1703 1715 assert_eq!(descendants_with_an_entry(&map, b"some/other/nested"), 1);
1704 1716 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1705 1717 assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 2);
1706 1718
1707 1719 map.set_untracked(p(b"some/nested/path"))?;
1708 1720 // It is set untracked *and* deleted since it was only added
1709 1721 assert_eq!(map.len(), 5);
1710 1722 assert_eq!(tracked_descendants(&map, b"some"), 1);
1711 1723 assert_eq!(descendants_with_an_entry(&map, b"some"), 4);
1712 1724 assert_eq!(tracked_descendants(&map, b"some/nested"), 0);
1713 1725 assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 1);
1714 1726 assert_does_not_exist(&map, b"some/nested/path");
1715 1727
1716 1728 map.set_untracked(p(b"root_file"))?;
1717 1729 // Untracked but not deleted
1718 1730 assert_eq!(map.len(), 5);
1719 1731 assert!(map.get(p(b"root_file"))?.is_some());
1720 1732
1721 1733 map.set_untracked(p(b"some/file2"))?;
1722 1734 assert_eq!(map.len(), 5);
1723 1735 assert_eq!(tracked_descendants(&map, b"some"), 0);
1724 1736 assert!(map.get(p(b"some/file2"))?.is_some());
1725 1737
1726 1738 map.set_untracked(p(b"some/file3"))?;
1727 1739 assert_eq!(map.len(), 5);
1728 1740 assert_eq!(tracked_descendants(&map, b"some"), 0);
1729 1741 assert!(map.get(p(b"some/file3"))?.is_some());
1730 1742
1731 1743 Ok(())
1732 1744 }
1733 1745
1734 1746 /// Check that copies counter is correctly updated
1735 1747 #[test]
1736 1748 fn test_copy_source() -> Result<(), DirstateError> {
1737 1749 let mut map = OwningDirstateMap::new_empty(vec![]);
1738 1750
1739 1751 // Clean file
1740 1752 map.reset_state(p(b"files/clean"), true, true, false, false, None)?;
1741 1753 // Merged file
1742 1754 map.reset_state(p(b"files/from_p2"), true, true, true, false, None)?;
1743 1755 // Removed file
1744 1756 map.reset_state(p(b"removed"), false, true, false, false, None)?;
1745 1757 // Added file
1746 1758 map.reset_state(p(b"files/added"), true, false, false, false, None)?;
1747 1759 // Add copy
1748 1760 map.copy_map_insert(p(b"files/clean"), p(b"clean_copy_source"))?;
1749 1761 assert_eq!(map.copy_map_len(), 1);
1750 1762
1751 1763 // Copy override
1752 1764 map.copy_map_insert(p(b"files/clean"), p(b"other_clean_copy_source"))?;
1753 1765 assert_eq!(map.copy_map_len(), 1);
1754 1766
1755 1767 // Multiple copies
1756 1768 map.copy_map_insert(p(b"removed"), p(b"removed_copy_source"))?;
1757 1769 assert_eq!(map.copy_map_len(), 2);
1758 1770
1759 1771 map.copy_map_insert(p(b"files/added"), p(b"added_copy_source"))?;
1760 1772 assert_eq!(map.copy_map_len(), 3);
1761 1773
1762 1774 // Added, so the entry is completely removed
1763 1775 map.set_untracked(p(b"files/added"))?;
1764 1776 assert_does_not_exist(&map, b"files/added");
1765 1777 assert_eq!(map.copy_map_len(), 2);
1766 1778
1767 1779 // Removed, so the entry is kept around, so is its copy
1768 1780 map.set_untracked(p(b"removed"))?;
1769 1781 assert!(map.get(p(b"removed"))?.is_some());
1770 1782 assert_eq!(map.copy_map_len(), 2);
1771 1783
1772 1784 // Clean, so the entry is kept around, but not its copy
1773 1785 map.set_untracked(p(b"files/clean"))?;
1774 1786 assert!(map.get(p(b"files/clean"))?.is_some());
1775 1787 assert_eq!(map.copy_map_len(), 1);
1776 1788
1777 1789 map.copy_map_insert(p(b"files/from_p2"), p(b"from_p2_copy_source"))?;
1778 1790 assert_eq!(map.copy_map_len(), 2);
1779 1791
1780 1792 // Info from p2, so its copy source info is kept around
1781 1793 map.set_untracked(p(b"files/from_p2"))?;
1782 1794 assert!(map.get(p(b"files/from_p2"))?.is_some());
1783 1795 assert_eq!(map.copy_map_len(), 2);
1784 1796
1785 1797 Ok(())
1786 1798 }
1787 1799
1788 1800 /// Test with "on disk" data. For the sake of this test, the "on disk" data
1789 1801 /// does not actually come from the disk, but it's opaque to the code being
1790 1802 /// tested.
1791 1803 #[test]
1792 1804 fn test_on_disk() -> Result<(), DirstateError> {
1793 1805 // First let's create some data to put "on disk"
1794 1806 let mut map = OwningDirstateMap::new_empty(vec![]);
1795 1807
1796 1808 // A file that was just added
1797 1809 map.set_tracked(p(b"some/nested/added"))?;
1798 1810 map.copy_map_insert(p(b"some/nested/added"), p(b"added_copy_source"))?;
1799 1811
1800 1812 // A file that was removed
1801 1813 map.reset_state(
1802 1814 p(b"some/nested/removed"),
1803 1815 false,
1804 1816 true,
1805 1817 false,
1806 1818 false,
1807 1819 None,
1808 1820 )?;
1809 1821 // Only present in p2
1810 1822 map.reset_state(
1811 1823 p(b"other/p2_info_only"),
1812 1824 false,
1813 1825 false,
1814 1826 true,
1815 1827 false,
1816 1828 None,
1817 1829 )?;
1818 1830 map.copy_map_insert(
1819 1831 p(b"other/p2_info_only"),
1820 1832 p(b"other/p2_info_copy_source"),
1821 1833 )?;
1822 1834 // A file that was merged
1823 1835 map.reset_state(p(b"merged"), true, true, true, false, None)?;
1824 1836 // A file that is added, with info from p2
1825 1837 // XXX is that actually possible?
1826 1838 map.reset_state(
1827 1839 p(b"other/added_with_p2"),
1828 1840 true,
1829 1841 false,
1830 1842 true,
1831 1843 false,
1832 1844 None,
1833 1845 )?;
1834 1846 // One layer without any files to test deletion cascade
1835 1847 // A clean file
1836 1848 map.reset_state(
1837 1849 p(b"some/other/nested/clean"),
1838 1850 true,
1839 1851 true,
1840 1852 false,
1841 1853 false,
1842 1854 None,
1843 1855 )?;
1844 1856
1845 1857 let (packed, metadata, _should_append, _old_data_size) =
1846 1858 map.pack_v2(DirstateMapWriteMode::ForceNewDataFile)?;
1847 1859 let packed_len = packed.len();
1848 1860 assert!(packed_len > 0);
1849 1861
1850 1862 // Recreate "from disk"
1851 1863 let mut map = OwningDirstateMap::new_v2(
1852 1864 packed,
1853 1865 packed_len,
1854 1866 metadata.as_bytes(),
1855 1867 vec![],
1868 None,
1856 1869 )?;
1857 1870
1858 1871 // Check that everything is accounted for
1859 1872 assert!(map.contains_key(p(b"some/nested/added"))?);
1860 1873 assert!(map.contains_key(p(b"some/nested/removed"))?);
1861 1874 assert!(map.contains_key(p(b"merged"))?);
1862 1875 assert!(map.contains_key(p(b"other/p2_info_only"))?);
1863 1876 assert!(map.contains_key(p(b"other/added_with_p2"))?);
1864 1877 assert!(map.contains_key(p(b"some/other/nested/clean"))?);
1865 1878 assert_eq!(
1866 1879 map.copy_map_get(p(b"some/nested/added"))?,
1867 1880 Some(p(b"added_copy_source"))
1868 1881 );
1869 1882 assert_eq!(
1870 1883 map.copy_map_get(p(b"other/p2_info_only"))?,
1871 1884 Some(p(b"other/p2_info_copy_source"))
1872 1885 );
1873 1886 assert_eq!(tracked_descendants(&map, b"some"), 2);
1874 1887 assert_eq!(descendants_with_an_entry(&map, b"some"), 3);
1875 1888 assert_eq!(tracked_descendants(&map, b"other"), 1);
1876 1889 assert_eq!(descendants_with_an_entry(&map, b"other"), 2);
1877 1890 assert_eq!(tracked_descendants(&map, b"some/other"), 1);
1878 1891 assert_eq!(descendants_with_an_entry(&map, b"some/other"), 1);
1879 1892 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1880 1893 assert_eq!(descendants_with_an_entry(&map, b"some/other/nested"), 1);
1881 1894 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1882 1895 assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 2);
1883 1896 assert_eq!(map.len(), 6);
1884 1897 assert_eq!(map.get_map().unreachable_bytes, 0);
1885 1898 assert_eq!(map.copy_map_len(), 2);
1886 1899
1887 1900 // Shouldn't change anything since it's already not tracked
1888 1901 map.set_untracked(p(b"some/nested/removed"))?;
1889 1902 assert_eq!(map.get_map().unreachable_bytes, 0);
1890 1903
1891 1904 match map.get_map().root {
1892 1905 ChildNodes::InMemory(_) => {
1893 1906 panic!("root should not have been mutated")
1894 1907 }
1895 1908 _ => (),
1896 1909 }
1897 1910 // We haven't mutated enough (nothing, actually), we should still be in
1898 1911 // the append strategy
1899 1912 assert!(map.get_map().write_should_append());
1900 1913
1901 1914 // But this mutates the structure, so there should be unreachable_bytes
1902 1915 assert!(map.set_untracked(p(b"some/nested/added"))?);
1903 1916 let unreachable_bytes = map.get_map().unreachable_bytes;
1904 1917 assert!(unreachable_bytes > 0);
1905 1918
1906 1919 match map.get_map().root {
1907 1920 ChildNodes::OnDisk(_) => panic!("root should have been mutated"),
1908 1921 _ => (),
1909 1922 }
1910 1923
1911 1924 // This should not mutate the structure either, since `root` has
1912 1925 // already been mutated along with its direct children.
1913 1926 map.set_untracked(p(b"merged"))?;
1914 1927 assert_eq!(map.get_map().unreachable_bytes, unreachable_bytes);
1915 1928
1916 1929 match map.get_map().get_node(p(b"other/added_with_p2"))?.unwrap() {
1917 1930 NodeRef::InMemory(_, _) => {
1918 1931 panic!("'other/added_with_p2' should not have been mutated")
1919 1932 }
1920 1933 _ => (),
1921 1934 }
1922 1935 // But this should, since it's in a different path
1923 1936 // than `<root>some/nested/add`
1924 1937 map.set_untracked(p(b"other/added_with_p2"))?;
1925 1938 assert!(map.get_map().unreachable_bytes > unreachable_bytes);
1926 1939
1927 1940 match map.get_map().get_node(p(b"other/added_with_p2"))?.unwrap() {
1928 1941 NodeRef::OnDisk(_) => {
1929 1942 panic!("'other/added_with_p2' should have been mutated")
1930 1943 }
1931 1944 _ => (),
1932 1945 }
1933 1946
1934 1947 // We have rewritten most of the tree, we should create a new file
1935 1948 assert!(!map.get_map().write_should_append());
1936 1949
1937 1950 Ok(())
1938 1951 }
1939 1952 }
@@ -1,892 +1,894 b''
1 1 //! The "version 2" disk representation of the dirstate
2 2 //!
3 3 //! See `mercurial/helptext/internals/dirstate-v2.txt`
4 4
5 5 use crate::dirstate::{DirstateV2Data, TruncatedTimestamp};
6 6 use crate::dirstate_tree::dirstate_map::DirstateVersion;
7 7 use crate::dirstate_tree::dirstate_map::{
8 8 self, DirstateMap, DirstateMapWriteMode, NodeRef,
9 9 };
10 10 use crate::dirstate_tree::path_with_basename::WithBasename;
11 11 use crate::errors::HgError;
12 12 use crate::utils::hg_path::HgPath;
13 13 use crate::DirstateEntry;
14 14 use crate::DirstateError;
15 15 use crate::DirstateParents;
16 16 use bitflags::bitflags;
17 17 use bytes_cast::unaligned::{U16Be, U32Be};
18 18 use bytes_cast::BytesCast;
19 19 use format_bytes::format_bytes;
20 20 use rand::Rng;
21 21 use std::borrow::Cow;
22 22 use std::convert::{TryFrom, TryInto};
23 23 use std::fmt::Write;
24 24
25 25 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
26 26 /// This a redundant sanity check more than an actual "magic number" since
27 27 /// `.hg/requires` already governs which format should be used.
28 28 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
29 29
30 30 /// Keep space for 256-bit hashes
31 31 const STORED_NODE_ID_BYTES: usize = 32;
32 32
33 33 /// … even though only 160 bits are used for now, with SHA-1
34 34 const USED_NODE_ID_BYTES: usize = 20;
35 35
36 36 pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
37 37 pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
38 38
39 39 /// Must match constants of the same names in `mercurial/dirstateutils/v2.py`
40 40 const TREE_METADATA_SIZE: usize = 44;
41 41 const NODE_SIZE: usize = 44;
42 42
43 43 /// Make sure that size-affecting changes are made knowingly
44 44 #[allow(unused)]
45 45 fn static_assert_size_of() {
46 46 let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>;
47 47 let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>;
48 48 let _ = std::mem::transmute::<Node, [u8; NODE_SIZE]>;
49 49 }
50 50
51 51 // Must match `HEADER` in `mercurial/dirstateutils/docket.py`
52 52 #[derive(BytesCast)]
53 53 #[repr(C)]
54 54 struct DocketHeader {
55 55 marker: [u8; V2_FORMAT_MARKER.len()],
56 56 parent_1: [u8; STORED_NODE_ID_BYTES],
57 57 parent_2: [u8; STORED_NODE_ID_BYTES],
58 58
59 59 metadata: TreeMetadata,
60 60
61 61 /// Counted in bytes
62 62 data_size: Size,
63 63
64 64 uuid_size: u8,
65 65 }
66 66
67 67 pub struct Docket<'on_disk> {
68 68 header: &'on_disk DocketHeader,
69 69 pub uuid: &'on_disk [u8],
70 70 }
71 71
72 72 /// Fields are documented in the *Tree metadata in the docket file*
73 73 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
74 74 #[derive(BytesCast)]
75 75 #[repr(C)]
76 76 pub struct TreeMetadata {
77 77 root_nodes: ChildNodes,
78 78 nodes_with_entry_count: Size,
79 79 nodes_with_copy_source_count: Size,
80 80 unreachable_bytes: Size,
81 81 unused: [u8; 4],
82 82
83 83 /// See *Optional hash of ignore patterns* section of
84 84 /// `mercurial/helptext/internals/dirstate-v2.txt`
85 85 ignore_patterns_hash: IgnorePatternsHash,
86 86 }
87 87
88 88 /// Fields are documented in the *The data file format*
89 89 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
90 90 #[derive(BytesCast, Debug)]
91 91 #[repr(C)]
92 92 pub(super) struct Node {
93 93 full_path: PathSlice,
94 94
95 95 /// In bytes from `self.full_path.start`
96 96 base_name_start: PathSize,
97 97
98 98 copy_source: OptPathSlice,
99 99 children: ChildNodes,
100 100 pub(super) descendants_with_entry_count: Size,
101 101 pub(super) tracked_descendants_count: Size,
102 102 flags: U16Be,
103 103 size: U32Be,
104 104 mtime: PackedTruncatedTimestamp,
105 105 }
106 106
107 107 bitflags! {
108 108 #[repr(C)]
109 109 struct Flags: u16 {
110 110 const WDIR_TRACKED = 1 << 0;
111 111 const P1_TRACKED = 1 << 1;
112 112 const P2_INFO = 1 << 2;
113 113 const MODE_EXEC_PERM = 1 << 3;
114 114 const MODE_IS_SYMLINK = 1 << 4;
115 115 const HAS_FALLBACK_EXEC = 1 << 5;
116 116 const FALLBACK_EXEC = 1 << 6;
117 117 const HAS_FALLBACK_SYMLINK = 1 << 7;
118 118 const FALLBACK_SYMLINK = 1 << 8;
119 119 const EXPECTED_STATE_IS_MODIFIED = 1 << 9;
120 120 const HAS_MODE_AND_SIZE = 1 <<10;
121 121 const HAS_MTIME = 1 <<11;
122 122 const MTIME_SECOND_AMBIGUOUS = 1 << 12;
123 123 const DIRECTORY = 1 <<13;
124 124 const ALL_UNKNOWN_RECORDED = 1 <<14;
125 125 const ALL_IGNORED_RECORDED = 1 <<15;
126 126 }
127 127 }
128 128
129 129 /// Duration since the Unix epoch
130 130 #[derive(BytesCast, Copy, Clone, Debug)]
131 131 #[repr(C)]
132 132 struct PackedTruncatedTimestamp {
133 133 truncated_seconds: U32Be,
134 134 nanoseconds: U32Be,
135 135 }
136 136
137 137 /// Counted in bytes from the start of the file
138 138 ///
139 139 /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
140 140 type Offset = U32Be;
141 141
142 142 /// Counted in number of items
143 143 ///
144 144 /// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
145 145 type Size = U32Be;
146 146
147 147 /// Counted in bytes
148 148 ///
149 149 /// NOTE: we choose not to support file names/paths longer than 64 KiB.
150 150 type PathSize = U16Be;
151 151
152 152 /// A contiguous sequence of `len` times `Node`, representing the child nodes
153 153 /// of either some other node or of the repository root.
154 154 ///
155 155 /// Always sorted by ascending `full_path`, to allow binary search.
156 156 /// Since nodes with the same parent nodes also have the same parent path,
157 157 /// only the `base_name`s need to be compared during binary search.
158 158 #[derive(BytesCast, Copy, Clone, Debug)]
159 159 #[repr(C)]
160 160 struct ChildNodes {
161 161 start: Offset,
162 162 len: Size,
163 163 }
164 164
165 165 /// A `HgPath` of `len` bytes
166 166 #[derive(BytesCast, Copy, Clone, Debug)]
167 167 #[repr(C)]
168 168 struct PathSlice {
169 169 start: Offset,
170 170 len: PathSize,
171 171 }
172 172
173 173 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
174 174 type OptPathSlice = PathSlice;
175 175
176 176 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
177 177 ///
178 178 /// This should only happen if Mercurial is buggy or a repository is corrupted.
179 179 #[derive(Debug)]
180 180 pub struct DirstateV2ParseError {
181 181 message: String,
182 182 }
183 183
184 184 impl DirstateV2ParseError {
185 185 pub fn new<S: Into<String>>(message: S) -> Self {
186 186 Self {
187 187 message: message.into(),
188 188 }
189 189 }
190 190 }
191 191
192 192 impl From<DirstateV2ParseError> for HgError {
193 193 fn from(e: DirstateV2ParseError) -> Self {
194 194 HgError::corrupted(format!("dirstate-v2 parse error: {}", e.message))
195 195 }
196 196 }
197 197
198 198 impl From<DirstateV2ParseError> for crate::DirstateError {
199 199 fn from(error: DirstateV2ParseError) -> Self {
200 200 HgError::from(error).into()
201 201 }
202 202 }
203 203
204 204 impl TreeMetadata {
205 205 pub fn as_bytes(&self) -> &[u8] {
206 206 BytesCast::as_bytes(self)
207 207 }
208 208 }
209 209
210 210 impl<'on_disk> Docket<'on_disk> {
211 211 /// Generate the identifier for a new data file
212 212 ///
213 213 /// TODO: support the `HGTEST_UUIDFILE` environment variable.
214 214 /// See `mercurial/revlogutils/docket.py`
215 215 pub fn new_uid() -> String {
216 216 const ID_LENGTH: usize = 8;
217 217 let mut id = String::with_capacity(ID_LENGTH);
218 218 let mut rng = rand::thread_rng();
219 219 for _ in 0..ID_LENGTH {
220 220 // One random hexadecimal digit.
221 221 // `unwrap` never panics because `impl Write for String`
222 222 // never returns an error.
223 223 write!(&mut id, "{:x}", rng.gen_range(0..16)).unwrap();
224 224 }
225 225 id
226 226 }
227 227
228 228 pub fn serialize(
229 229 parents: DirstateParents,
230 230 tree_metadata: TreeMetadata,
231 231 data_size: u64,
232 232 uuid: &[u8],
233 233 ) -> Result<Vec<u8>, std::num::TryFromIntError> {
234 234 let header = DocketHeader {
235 235 marker: *V2_FORMAT_MARKER,
236 236 parent_1: parents.p1.pad_to_256_bits(),
237 237 parent_2: parents.p2.pad_to_256_bits(),
238 238 metadata: tree_metadata,
239 239 data_size: u32::try_from(data_size)?.into(),
240 240 uuid_size: uuid.len().try_into()?,
241 241 };
242 242 let header = header.as_bytes();
243 243 let mut docket = Vec::with_capacity(header.len() + uuid.len());
244 244 docket.extend_from_slice(header);
245 245 docket.extend_from_slice(uuid);
246 246 Ok(docket)
247 247 }
248 248
249 249 pub fn parents(&self) -> DirstateParents {
250 250 use crate::Node;
251 251 let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
252 252 .unwrap()
253 253 .clone();
254 254 let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
255 255 .unwrap()
256 256 .clone();
257 257 DirstateParents { p1, p2 }
258 258 }
259 259
260 260 pub fn tree_metadata(&self) -> &[u8] {
261 261 self.header.metadata.as_bytes()
262 262 }
263 263
264 264 pub fn data_size(&self) -> usize {
265 265 // This `unwrap` could only panic on a 16-bit CPU
266 266 self.header.data_size.get().try_into().unwrap()
267 267 }
268 268
269 269 pub fn data_filename(&self) -> String {
270 270 String::from_utf8(format_bytes!(b"dirstate.{}", self.uuid)).unwrap()
271 271 }
272 272 }
273 273
274 274 pub fn read_docket(
275 275 on_disk: &[u8],
276 276 ) -> Result<Docket<'_>, DirstateV2ParseError> {
277 277 let (header, uuid) = DocketHeader::from_bytes(on_disk).map_err(|e| {
278 278 DirstateV2ParseError::new(format!("when reading docket, {}", e))
279 279 })?;
280 280 let uuid_size = header.uuid_size as usize;
281 281 if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
282 282 Ok(Docket { header, uuid })
283 283 } else {
284 284 Err(DirstateV2ParseError::new(
285 285 "invalid format marker or uuid size",
286 286 ))
287 287 }
288 288 }
289 289
290 290 pub(super) fn read<'on_disk>(
291 291 on_disk: &'on_disk [u8],
292 292 metadata: &[u8],
293 293 uuid: Vec<u8>,
294 identity: Option<u64>,
294 295 ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
295 296 if on_disk.is_empty() {
296 297 let mut map = DirstateMap::empty(on_disk);
297 298 map.dirstate_version = DirstateVersion::V2;
298 299 return Ok(map);
299 300 }
300 301 let (meta, _) = TreeMetadata::from_bytes(metadata).map_err(|e| {
301 302 DirstateV2ParseError::new(format!("when parsing tree metadata, {}", e))
302 303 })?;
303 304 let dirstate_map = DirstateMap {
304 305 on_disk,
305 306 root: dirstate_map::ChildNodes::OnDisk(
306 307 read_nodes(on_disk, meta.root_nodes).map_err(|mut e| {
307 308 e.message = format!("{}, when reading root notes", e.message);
308 309 e
309 310 })?,
310 311 ),
311 312 nodes_with_entry_count: meta.nodes_with_entry_count.get(),
312 313 nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),
313 314 ignore_patterns_hash: meta.ignore_patterns_hash,
314 315 unreachable_bytes: meta.unreachable_bytes.get(),
315 316 old_data_size: on_disk.len(),
316 317 old_uuid: Some(uuid),
318 identity,
317 319 dirstate_version: DirstateVersion::V2,
318 320 write_mode: DirstateMapWriteMode::Auto,
319 321 };
320 322 Ok(dirstate_map)
321 323 }
322 324
323 325 impl Node {
324 326 pub(super) fn full_path<'on_disk>(
325 327 &self,
326 328 on_disk: &'on_disk [u8],
327 329 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
328 330 read_hg_path(on_disk, self.full_path)
329 331 }
330 332
331 333 pub(super) fn base_name_start<'on_disk>(
332 334 &self,
333 335 ) -> Result<usize, DirstateV2ParseError> {
334 336 let start = self.base_name_start.get();
335 337 if start < self.full_path.len.get() {
336 338 let start = usize::try_from(start)
337 339 // u32 -> usize, could only panic on a 16-bit CPU
338 340 .expect("dirstate-v2 base_name_start out of bounds");
339 341 Ok(start)
340 342 } else {
341 343 Err(DirstateV2ParseError::new("not enough bytes for base name"))
342 344 }
343 345 }
344 346
345 347 pub(super) fn base_name<'on_disk>(
346 348 &self,
347 349 on_disk: &'on_disk [u8],
348 350 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
349 351 let full_path = self.full_path(on_disk)?;
350 352 let base_name_start = self.base_name_start()?;
351 353 Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
352 354 }
353 355
354 356 pub(super) fn path<'on_disk>(
355 357 &self,
356 358 on_disk: &'on_disk [u8],
357 359 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
358 360 Ok(WithBasename::from_raw_parts(
359 361 Cow::Borrowed(self.full_path(on_disk)?),
360 362 self.base_name_start()?,
361 363 ))
362 364 }
363 365
364 366 pub(super) fn has_copy_source<'on_disk>(&self) -> bool {
365 367 self.copy_source.start.get() != 0
366 368 }
367 369
368 370 pub(super) fn copy_source<'on_disk>(
369 371 &self,
370 372 on_disk: &'on_disk [u8],
371 373 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
372 374 Ok(if self.has_copy_source() {
373 375 Some(read_hg_path(on_disk, self.copy_source)?)
374 376 } else {
375 377 None
376 378 })
377 379 }
378 380
379 381 fn flags(&self) -> Flags {
380 382 Flags::from_bits_truncate(self.flags.get())
381 383 }
382 384
383 385 fn has_entry(&self) -> bool {
384 386 self.flags().intersects(
385 387 Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO,
386 388 )
387 389 }
388 390
389 391 pub(super) fn node_data(
390 392 &self,
391 393 ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
392 394 if self.has_entry() {
393 395 Ok(dirstate_map::NodeData::Entry(self.assume_entry()?))
394 396 } else if let Some(mtime) = self.cached_directory_mtime()? {
395 397 Ok(dirstate_map::NodeData::CachedDirectory { mtime })
396 398 } else {
397 399 Ok(dirstate_map::NodeData::None)
398 400 }
399 401 }
400 402
401 403 pub(super) fn cached_directory_mtime(
402 404 &self,
403 405 ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
404 406 // For now we do not have code to handle the absence of
405 407 // ALL_UNKNOWN_RECORDED, so we ignore the mtime if the flag is
406 408 // unset.
407 409 if self.flags().contains(Flags::DIRECTORY)
408 410 && self.flags().contains(Flags::HAS_MTIME)
409 411 && self.flags().contains(Flags::ALL_UNKNOWN_RECORDED)
410 412 {
411 413 Ok(Some(self.mtime()?))
412 414 } else {
413 415 Ok(None)
414 416 }
415 417 }
416 418
417 419 fn synthesize_unix_mode(&self) -> u32 {
418 420 let file_type = if self.flags().contains(Flags::MODE_IS_SYMLINK) {
419 421 libc::S_IFLNK
420 422 } else {
421 423 libc::S_IFREG
422 424 };
423 425 let permisions = if self.flags().contains(Flags::MODE_EXEC_PERM) {
424 426 0o755
425 427 } else {
426 428 0o644
427 429 };
428 430 (file_type | permisions).into()
429 431 }
430 432
431 433 fn mtime(&self) -> Result<TruncatedTimestamp, DirstateV2ParseError> {
432 434 let mut m: TruncatedTimestamp = self.mtime.try_into()?;
433 435 if self.flags().contains(Flags::MTIME_SECOND_AMBIGUOUS) {
434 436 m.second_ambiguous = true;
435 437 }
436 438 Ok(m)
437 439 }
438 440
439 441 fn assume_entry(&self) -> Result<DirstateEntry, DirstateV2ParseError> {
440 442 // TODO: convert through raw bits instead?
441 443 let wc_tracked = self.flags().contains(Flags::WDIR_TRACKED);
442 444 let p1_tracked = self.flags().contains(Flags::P1_TRACKED);
443 445 let p2_info = self.flags().contains(Flags::P2_INFO);
444 446 let mode_size = if self.flags().contains(Flags::HAS_MODE_AND_SIZE)
445 447 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
446 448 {
447 449 Some((self.synthesize_unix_mode(), self.size.into()))
448 450 } else {
449 451 None
450 452 };
451 453 let mtime = if self.flags().contains(Flags::HAS_MTIME)
452 454 && !self.flags().contains(Flags::DIRECTORY)
453 455 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
454 456 {
455 457 Some(self.mtime()?)
456 458 } else {
457 459 None
458 460 };
459 461 let fallback_exec = if self.flags().contains(Flags::HAS_FALLBACK_EXEC)
460 462 {
461 463 Some(self.flags().contains(Flags::FALLBACK_EXEC))
462 464 } else {
463 465 None
464 466 };
465 467 let fallback_symlink =
466 468 if self.flags().contains(Flags::HAS_FALLBACK_SYMLINK) {
467 469 Some(self.flags().contains(Flags::FALLBACK_SYMLINK))
468 470 } else {
469 471 None
470 472 };
471 473 Ok(DirstateEntry::from_v2_data(DirstateV2Data {
472 474 wc_tracked,
473 475 p1_tracked,
474 476 p2_info,
475 477 mode_size,
476 478 mtime,
477 479 fallback_exec,
478 480 fallback_symlink,
479 481 }))
480 482 }
481 483
482 484 pub(super) fn entry(
483 485 &self,
484 486 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
485 487 if self.has_entry() {
486 488 Ok(Some(self.assume_entry()?))
487 489 } else {
488 490 Ok(None)
489 491 }
490 492 }
491 493
492 494 pub(super) fn children<'on_disk>(
493 495 &self,
494 496 on_disk: &'on_disk [u8],
495 497 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
496 498 read_nodes(on_disk, self.children)
497 499 }
498 500
499 501 pub(super) fn to_in_memory_node<'on_disk>(
500 502 &self,
501 503 on_disk: &'on_disk [u8],
502 504 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
503 505 Ok(dirstate_map::Node {
504 506 children: dirstate_map::ChildNodes::OnDisk(
505 507 self.children(on_disk)?,
506 508 ),
507 509 copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
508 510 data: self.node_data()?,
509 511 descendants_with_entry_count: self
510 512 .descendants_with_entry_count
511 513 .get(),
512 514 tracked_descendants_count: self.tracked_descendants_count.get(),
513 515 })
514 516 }
515 517
516 518 fn from_dirstate_entry(
517 519 entry: &DirstateEntry,
518 520 ) -> (Flags, U32Be, PackedTruncatedTimestamp) {
519 521 let DirstateV2Data {
520 522 wc_tracked,
521 523 p1_tracked,
522 524 p2_info,
523 525 mode_size: mode_size_opt,
524 526 mtime: mtime_opt,
525 527 fallback_exec,
526 528 fallback_symlink,
527 529 } = entry.v2_data();
528 530 // TODO: convert through raw flag bits instead?
529 531 let mut flags = Flags::empty();
530 532 flags.set(Flags::WDIR_TRACKED, wc_tracked);
531 533 flags.set(Flags::P1_TRACKED, p1_tracked);
532 534 flags.set(Flags::P2_INFO, p2_info);
533 535 let size = if let Some((m, s)) = mode_size_opt {
534 536 let exec_perm = m & (libc::S_IXUSR as u32) != 0;
535 537 let is_symlink = m & (libc::S_IFMT as u32) == libc::S_IFLNK as u32;
536 538 flags.set(Flags::MODE_EXEC_PERM, exec_perm);
537 539 flags.set(Flags::MODE_IS_SYMLINK, is_symlink);
538 540 flags.insert(Flags::HAS_MODE_AND_SIZE);
539 541 s.into()
540 542 } else {
541 543 0.into()
542 544 };
543 545 let mtime = if let Some(m) = mtime_opt {
544 546 flags.insert(Flags::HAS_MTIME);
545 547 if m.second_ambiguous {
546 548 flags.insert(Flags::MTIME_SECOND_AMBIGUOUS);
547 549 };
548 550 m.into()
549 551 } else {
550 552 PackedTruncatedTimestamp::null()
551 553 };
552 554 if let Some(f_exec) = fallback_exec {
553 555 flags.insert(Flags::HAS_FALLBACK_EXEC);
554 556 if f_exec {
555 557 flags.insert(Flags::FALLBACK_EXEC);
556 558 }
557 559 }
558 560 if let Some(f_symlink) = fallback_symlink {
559 561 flags.insert(Flags::HAS_FALLBACK_SYMLINK);
560 562 if f_symlink {
561 563 flags.insert(Flags::FALLBACK_SYMLINK);
562 564 }
563 565 }
564 566 (flags, size, mtime)
565 567 }
566 568 }
567 569
568 570 fn read_hg_path(
569 571 on_disk: &[u8],
570 572 slice: PathSlice,
571 573 ) -> Result<&HgPath, DirstateV2ParseError> {
572 574 read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
573 575 }
574 576
575 577 fn read_nodes(
576 578 on_disk: &[u8],
577 579 slice: ChildNodes,
578 580 ) -> Result<&[Node], DirstateV2ParseError> {
579 581 read_slice(on_disk, slice.start, slice.len.get())
580 582 }
581 583
582 584 fn read_slice<T, Len>(
583 585 on_disk: &[u8],
584 586 start: Offset,
585 587 len: Len,
586 588 ) -> Result<&[T], DirstateV2ParseError>
587 589 where
588 590 T: BytesCast,
589 591 Len: TryInto<usize>,
590 592 {
591 593 // Either `usize::MAX` would result in "out of bounds" error since a single
592 594 // `&[u8]` cannot occupy the entire addess space.
593 595 let start = start.get().try_into().unwrap_or(std::usize::MAX);
594 596 let len = len.try_into().unwrap_or(std::usize::MAX);
595 597 let bytes = match on_disk.get(start..) {
596 598 Some(bytes) => bytes,
597 599 None => {
598 600 return Err(DirstateV2ParseError::new(
599 601 "not enough bytes from disk",
600 602 ))
601 603 }
602 604 };
603 605 T::slice_from_bytes(bytes, len)
604 606 .map_err(|e| {
605 607 DirstateV2ParseError::new(format!("when reading a slice, {}", e))
606 608 })
607 609 .map(|(slice, _rest)| slice)
608 610 }
609 611
610 612 pub(crate) fn for_each_tracked_path<'on_disk>(
611 613 on_disk: &'on_disk [u8],
612 614 metadata: &[u8],
613 615 mut f: impl FnMut(&'on_disk HgPath),
614 616 ) -> Result<(), DirstateV2ParseError> {
615 617 let (meta, _) = TreeMetadata::from_bytes(metadata).map_err(|e| {
616 618 DirstateV2ParseError::new(format!("when parsing tree metadata, {}", e))
617 619 })?;
618 620 fn recur<'on_disk>(
619 621 on_disk: &'on_disk [u8],
620 622 nodes: ChildNodes,
621 623 f: &mut impl FnMut(&'on_disk HgPath),
622 624 ) -> Result<(), DirstateV2ParseError> {
623 625 for node in read_nodes(on_disk, nodes)? {
624 626 if let Some(entry) = node.entry()? {
625 627 if entry.tracked() {
626 628 f(node.full_path(on_disk)?)
627 629 }
628 630 }
629 631 recur(on_disk, node.children, f)?
630 632 }
631 633 Ok(())
632 634 }
633 635 recur(on_disk, meta.root_nodes, &mut f)
634 636 }
635 637
636 638 /// Returns new data and metadata, together with whether that data should be
637 639 /// appended to the existing data file whose content is at
638 640 /// `dirstate_map.on_disk` (true), instead of written to a new data file
639 641 /// (false), and the previous size of data on disk.
640 642 pub(super) fn write(
641 643 dirstate_map: &DirstateMap,
642 644 write_mode: DirstateMapWriteMode,
643 645 ) -> Result<(Vec<u8>, TreeMetadata, bool, usize), DirstateError> {
644 646 let append = match write_mode {
645 647 DirstateMapWriteMode::Auto => dirstate_map.write_should_append(),
646 648 DirstateMapWriteMode::ForceNewDataFile => false,
647 649 DirstateMapWriteMode::ForceAppend => true,
648 650 };
649 651 if append {
650 652 log::trace!("appending to the dirstate data file");
651 653 } else {
652 654 log::trace!("creating new dirstate data file");
653 655 }
654 656
655 657 // This ignores the space for paths, and for nodes without an entry.
656 658 // TODO: better estimate? Skip the `Vec` and write to a file directly?
657 659 let size_guess = std::mem::size_of::<Node>()
658 660 * dirstate_map.nodes_with_entry_count as usize;
659 661
660 662 let mut writer = Writer {
661 663 dirstate_map,
662 664 append,
663 665 out: Vec::with_capacity(size_guess),
664 666 };
665 667
666 668 let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;
667 669
668 670 let unreachable_bytes = if append {
669 671 dirstate_map.unreachable_bytes
670 672 } else {
671 673 0
672 674 };
673 675 let meta = TreeMetadata {
674 676 root_nodes,
675 677 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
676 678 nodes_with_copy_source_count: dirstate_map
677 679 .nodes_with_copy_source_count
678 680 .into(),
679 681 unreachable_bytes: unreachable_bytes.into(),
680 682 unused: [0; 4],
681 683 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
682 684 };
683 685 Ok((writer.out, meta, append, dirstate_map.old_data_size))
684 686 }
685 687
686 688 struct Writer<'dmap, 'on_disk> {
687 689 dirstate_map: &'dmap DirstateMap<'on_disk>,
688 690 append: bool,
689 691 out: Vec<u8>,
690 692 }
691 693
692 694 impl Writer<'_, '_> {
693 695 fn write_nodes(
694 696 &mut self,
695 697 nodes: dirstate_map::ChildNodesRef,
696 698 ) -> Result<ChildNodes, DirstateError> {
697 699 // Reuse already-written nodes if possible
698 700 if self.append {
699 701 if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {
700 702 let start = self.on_disk_offset_of(nodes_slice).expect(
701 703 "dirstate-v2 OnDisk nodes not found within on_disk",
702 704 );
703 705 let len = child_nodes_len_from_usize(nodes_slice.len());
704 706 return Ok(ChildNodes { start, len });
705 707 }
706 708 }
707 709
708 710 // `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has
709 711 // undefined iteration order. Sort to enable binary search in the
710 712 // written file.
711 713 let nodes = nodes.sorted();
712 714 let nodes_len = nodes.len();
713 715
714 716 // First accumulate serialized nodes in a `Vec`
715 717 let mut on_disk_nodes = Vec::with_capacity(nodes_len);
716 718 for node in nodes {
717 719 let children =
718 720 self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;
719 721 let full_path = node.full_path(self.dirstate_map.on_disk)?;
720 722 let full_path = self.write_path(full_path.as_bytes());
721 723 let copy_source = if let Some(source) =
722 724 node.copy_source(self.dirstate_map.on_disk)?
723 725 {
724 726 self.write_path(source.as_bytes())
725 727 } else {
726 728 PathSlice {
727 729 start: 0.into(),
728 730 len: 0.into(),
729 731 }
730 732 };
731 733 on_disk_nodes.push(match node {
732 734 NodeRef::InMemory(path, node) => {
733 735 let (flags, size, mtime) = match &node.data {
734 736 dirstate_map::NodeData::Entry(entry) => {
735 737 Node::from_dirstate_entry(entry)
736 738 }
737 739 dirstate_map::NodeData::CachedDirectory { mtime } => {
738 740 // we currently never set a mtime if unknown file
739 741 // are present.
740 742 // So if we have a mtime for a directory, we know
741 743 // they are no unknown
742 744 // files and we
743 745 // blindly set ALL_UNKNOWN_RECORDED.
744 746 //
745 747 // We never set ALL_IGNORED_RECORDED since we
746 748 // don't track that case
747 749 // currently.
748 750 let mut flags = Flags::DIRECTORY
749 751 | Flags::HAS_MTIME
750 752 | Flags::ALL_UNKNOWN_RECORDED;
751 753 if mtime.second_ambiguous {
752 754 flags.insert(Flags::MTIME_SECOND_AMBIGUOUS)
753 755 }
754 756 (flags, 0.into(), (*mtime).into())
755 757 }
756 758 dirstate_map::NodeData::None => (
757 759 Flags::DIRECTORY,
758 760 0.into(),
759 761 PackedTruncatedTimestamp::null(),
760 762 ),
761 763 };
762 764 Node {
763 765 children,
764 766 copy_source,
765 767 full_path,
766 768 base_name_start: u16::try_from(path.base_name_start())
767 769 // Could only panic for paths over 64 KiB
768 770 .expect("dirstate-v2 path length overflow")
769 771 .into(),
770 772 descendants_with_entry_count: node
771 773 .descendants_with_entry_count
772 774 .into(),
773 775 tracked_descendants_count: node
774 776 .tracked_descendants_count
775 777 .into(),
776 778 flags: flags.bits().into(),
777 779 size,
778 780 mtime,
779 781 }
780 782 }
781 783 NodeRef::OnDisk(node) => Node {
782 784 children,
783 785 copy_source,
784 786 full_path,
785 787 ..*node
786 788 },
787 789 })
788 790 }
789 791 // … so we can write them contiguously, after writing everything else
790 792 // they refer to.
791 793 let start = self.current_offset();
792 794 let len = child_nodes_len_from_usize(nodes_len);
793 795 self.out.extend(on_disk_nodes.as_bytes());
794 796 Ok(ChildNodes { start, len })
795 797 }
796 798
797 799 /// If the given slice of items is within `on_disk`, returns its offset
798 800 /// from the start of `on_disk`.
799 801 fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>
800 802 where
801 803 T: BytesCast,
802 804 {
803 805 fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {
804 806 let start = slice.as_ptr() as usize;
805 807 let end = start + slice.len();
806 808 start..=end
807 809 }
808 810 let slice_addresses = address_range(slice.as_bytes());
809 811 let on_disk_addresses = address_range(self.dirstate_map.on_disk);
810 812 if on_disk_addresses.contains(slice_addresses.start())
811 813 && on_disk_addresses.contains(slice_addresses.end())
812 814 {
813 815 let offset = slice_addresses.start() - on_disk_addresses.start();
814 816 Some(offset_from_usize(offset))
815 817 } else {
816 818 None
817 819 }
818 820 }
819 821
820 822 fn current_offset(&mut self) -> Offset {
821 823 let mut offset = self.out.len();
822 824 if self.append {
823 825 offset += self.dirstate_map.on_disk.len()
824 826 }
825 827 offset_from_usize(offset)
826 828 }
827 829
828 830 fn write_path(&mut self, slice: &[u8]) -> PathSlice {
829 831 let len = path_len_from_usize(slice.len());
830 832 // Reuse an already-written path if possible
831 833 if self.append {
832 834 if let Some(start) = self.on_disk_offset_of(slice) {
833 835 return PathSlice { start, len };
834 836 }
835 837 }
836 838 let start = self.current_offset();
837 839 self.out.extend(slice.as_bytes());
838 840 PathSlice { start, len }
839 841 }
840 842 }
841 843
842 844 fn offset_from_usize(x: usize) -> Offset {
843 845 u32::try_from(x)
844 846 // Could only panic for a dirstate file larger than 4 GiB
845 847 .expect("dirstate-v2 offset overflow")
846 848 .into()
847 849 }
848 850
849 851 fn child_nodes_len_from_usize(x: usize) -> Size {
850 852 u32::try_from(x)
851 853 // Could only panic with over 4 billion nodes
852 854 .expect("dirstate-v2 slice length overflow")
853 855 .into()
854 856 }
855 857
856 858 fn path_len_from_usize(x: usize) -> PathSize {
857 859 u16::try_from(x)
858 860 // Could only panic for paths over 64 KiB
859 861 .expect("dirstate-v2 path length overflow")
860 862 .into()
861 863 }
862 864
863 865 impl From<TruncatedTimestamp> for PackedTruncatedTimestamp {
864 866 fn from(timestamp: TruncatedTimestamp) -> Self {
865 867 Self {
866 868 truncated_seconds: timestamp.truncated_seconds().into(),
867 869 nanoseconds: timestamp.nanoseconds().into(),
868 870 }
869 871 }
870 872 }
871 873
872 874 impl TryFrom<PackedTruncatedTimestamp> for TruncatedTimestamp {
873 875 type Error = DirstateV2ParseError;
874 876
875 877 fn try_from(
876 878 timestamp: PackedTruncatedTimestamp,
877 879 ) -> Result<Self, Self::Error> {
878 880 Self::from_already_truncated(
879 881 timestamp.truncated_seconds.get(),
880 882 timestamp.nanoseconds.get(),
881 883 false,
882 884 )
883 885 }
884 886 }
885 887 impl PackedTruncatedTimestamp {
886 888 fn null() -> Self {
887 889 Self {
888 890 truncated_seconds: 0.into(),
889 891 nanoseconds: 0.into(),
890 892 }
891 893 }
892 894 }
@@ -1,98 +1,106 b''
1 1 use crate::{DirstateError, DirstateParents};
2 2
3 3 use super::dirstate_map::DirstateMap;
4 4 use std::ops::Deref;
5 5
6 6 use ouroboros::self_referencing;
7 7
8 8 /// Keep a `DirstateMap<'on_disk>` next to the `on_disk` buffer that it
9 9 /// borrows.
10 10 #[self_referencing]
11 11 pub struct OwningDirstateMap {
12 12 on_disk: Box<dyn Deref<Target = [u8]> + Send>,
13 13 #[borrows(on_disk)]
14 14 #[covariant]
15 15 map: DirstateMap<'this>,
16 16 }
17 17
18 18 impl OwningDirstateMap {
19 19 pub fn new_empty<OnDisk>(on_disk: OnDisk) -> Self
20 20 where
21 21 OnDisk: Deref<Target = [u8]> + Send + 'static,
22 22 {
23 23 let on_disk = Box::new(on_disk);
24 24
25 25 OwningDirstateMapBuilder {
26 26 on_disk,
27 27 map_builder: |bytes| DirstateMap::empty(&bytes),
28 28 }
29 29 .build()
30 30 }
31 31
32 32 pub fn new_v1<OnDisk>(
33 33 on_disk: OnDisk,
34 identity: Option<u64>,
34 35 ) -> Result<(Self, DirstateParents), DirstateError>
35 36 where
36 37 OnDisk: Deref<Target = [u8]> + Send + 'static,
37 38 {
38 39 let on_disk = Box::new(on_disk);
39 40 let mut parents = DirstateParents::NULL;
40 41
41 42 Ok((
42 43 OwningDirstateMapTryBuilder {
43 44 on_disk,
44 45 map_builder: |bytes| {
45 DirstateMap::new_v1(&bytes).map(|(dmap, p)| {
46 DirstateMap::new_v1(&bytes, identity).map(|(dmap, p)| {
46 47 parents = p.unwrap_or(DirstateParents::NULL);
47 48 dmap
48 49 })
49 50 },
50 51 }
51 52 .try_build()?,
52 53 parents,
53 54 ))
54 55 }
55 56
56 57 pub fn new_v2<OnDisk>(
57 58 on_disk: OnDisk,
58 59 data_size: usize,
59 60 metadata: &[u8],
60 61 uuid: Vec<u8>,
62 identity: Option<u64>,
61 63 ) -> Result<Self, DirstateError>
62 64 where
63 65 OnDisk: Deref<Target = [u8]> + Send + 'static,
64 66 {
65 67 let on_disk = Box::new(on_disk);
66 68
67 69 OwningDirstateMapTryBuilder {
68 70 on_disk,
69 71 map_builder: |bytes| {
70 DirstateMap::new_v2(&bytes, data_size, metadata, uuid)
72 DirstateMap::new_v2(
73 &bytes, data_size, metadata, uuid, identity,
74 )
71 75 },
72 76 }
73 77 .try_build()
74 78 }
75 79
76 80 pub fn with_dmap_mut<R>(
77 81 &mut self,
78 82 f: impl FnOnce(&mut DirstateMap) -> R,
79 83 ) -> R {
80 84 self.with_map_mut(f)
81 85 }
82 86
83 87 pub fn get_map(&self) -> &DirstateMap {
84 88 self.borrow_map()
85 89 }
86 90
87 91 pub fn on_disk(&self) -> &[u8] {
88 92 self.borrow_on_disk()
89 93 }
90 94
91 95 pub fn old_uuid(&self) -> Option<&[u8]> {
92 96 self.get_map().old_uuid.as_deref()
93 97 }
94 98
99 pub fn old_identity(&self) -> Option<u64> {
100 self.get_map().identity
101 }
102
95 103 pub fn old_data_size(&self) -> usize {
96 104 self.get_map().old_data_size
97 105 }
98 106 }
@@ -1,700 +1,738 b''
1 1 use crate::changelog::Changelog;
2 2 use crate::config::{Config, ConfigError, ConfigParseError};
3 3 use crate::dirstate::DirstateParents;
4 4 use crate::dirstate_tree::dirstate_map::DirstateMapWriteMode;
5 5 use crate::dirstate_tree::on_disk::Docket as DirstateDocket;
6 6 use crate::dirstate_tree::owning::OwningDirstateMap;
7 7 use crate::errors::HgResultExt;
8 8 use crate::errors::{HgError, IoResultExt};
9 9 use crate::lock::{try_with_lock_no_wait, LockError};
10 10 use crate::manifest::{Manifest, Manifestlog};
11 11 use crate::revlog::filelog::Filelog;
12 12 use crate::revlog::revlog::RevlogError;
13 13 use crate::utils::debug::debug_wait_for_file_or_print;
14 14 use crate::utils::files::get_path_from_bytes;
15 15 use crate::utils::hg_path::HgPath;
16 16 use crate::utils::SliceExt;
17 17 use crate::vfs::{is_dir, is_file, Vfs};
18 18 use crate::{requirements, NodePrefix};
19 19 use crate::{DirstateError, Revision};
20 20 use std::cell::{Ref, RefCell, RefMut};
21 21 use std::collections::HashSet;
22 22 use std::io::Seek;
23 23 use std::io::SeekFrom;
24 24 use std::io::Write as IoWrite;
25 25 use std::path::{Path, PathBuf};
26 26
27 27 const V2_MAX_READ_ATTEMPTS: usize = 5;
28 28
29 29 /// A repository on disk
30 30 pub struct Repo {
31 31 working_directory: PathBuf,
32 32 dot_hg: PathBuf,
33 33 store: PathBuf,
34 34 requirements: HashSet<String>,
35 35 config: Config,
36 36 dirstate_parents: LazyCell<DirstateParents>,
37 37 dirstate_map: LazyCell<OwningDirstateMap>,
38 38 changelog: LazyCell<Changelog>,
39 39 manifestlog: LazyCell<Manifestlog>,
40 40 }
41 41
42 42 #[derive(Debug, derive_more::From)]
43 43 pub enum RepoError {
44 44 NotFound {
45 45 at: PathBuf,
46 46 },
47 47 #[from]
48 48 ConfigParseError(ConfigParseError),
49 49 #[from]
50 50 Other(HgError),
51 51 }
52 52
53 53 impl From<ConfigError> for RepoError {
54 54 fn from(error: ConfigError) -> Self {
55 55 match error {
56 56 ConfigError::Parse(error) => error.into(),
57 57 ConfigError::Other(error) => error.into(),
58 58 }
59 59 }
60 60 }
61 61
62 62 impl Repo {
63 63 /// tries to find nearest repository root in current working directory or
64 64 /// its ancestors
65 65 pub fn find_repo_root() -> Result<PathBuf, RepoError> {
66 66 let current_directory = crate::utils::current_dir()?;
67 67 // ancestors() is inclusive: it first yields `current_directory`
68 68 // as-is.
69 69 for ancestor in current_directory.ancestors() {
70 70 if is_dir(ancestor.join(".hg"))? {
71 71 return Ok(ancestor.to_path_buf());
72 72 }
73 73 }
74 74 return Err(RepoError::NotFound {
75 75 at: current_directory,
76 76 });
77 77 }
78 78
79 79 /// Find a repository, either at the given path (which must contain a `.hg`
80 80 /// sub-directory) or by searching the current directory and its
81 81 /// ancestors.
82 82 ///
83 83 /// A method with two very different "modes" like this usually a code smell
84 84 /// to make two methods instead, but in this case an `Option` is what rhg
85 85 /// sub-commands get from Clap for the `-R` / `--repository` CLI argument.
86 86 /// Having two methods would just move that `if` to almost all callers.
87 87 pub fn find(
88 88 config: &Config,
89 89 explicit_path: Option<PathBuf>,
90 90 ) -> Result<Self, RepoError> {
91 91 if let Some(root) = explicit_path {
92 92 if is_dir(root.join(".hg"))? {
93 93 Self::new_at_path(root.to_owned(), config)
94 94 } else if is_file(&root)? {
95 95 Err(HgError::unsupported("bundle repository").into())
96 96 } else {
97 97 Err(RepoError::NotFound {
98 98 at: root.to_owned(),
99 99 })
100 100 }
101 101 } else {
102 102 let root = Self::find_repo_root()?;
103 103 Self::new_at_path(root, config)
104 104 }
105 105 }
106 106
107 107 /// To be called after checking that `.hg` is a sub-directory
108 108 fn new_at_path(
109 109 working_directory: PathBuf,
110 110 config: &Config,
111 111 ) -> Result<Self, RepoError> {
112 112 let dot_hg = working_directory.join(".hg");
113 113
114 114 let mut repo_config_files = Vec::new();
115 115 repo_config_files.push(dot_hg.join("hgrc"));
116 116 repo_config_files.push(dot_hg.join("hgrc-not-shared"));
117 117
118 118 let hg_vfs = Vfs { base: &dot_hg };
119 119 let mut reqs = requirements::load_if_exists(hg_vfs)?;
120 120 let relative =
121 121 reqs.contains(requirements::RELATIVE_SHARED_REQUIREMENT);
122 122 let shared =
123 123 reqs.contains(requirements::SHARED_REQUIREMENT) || relative;
124 124
125 125 // From `mercurial/localrepo.py`:
126 126 //
127 127 // if .hg/requires contains the sharesafe requirement, it means
128 128 // there exists a `.hg/store/requires` too and we should read it
129 129 // NOTE: presence of SHARESAFE_REQUIREMENT imply that store requirement
130 130 // is present. We never write SHARESAFE_REQUIREMENT for a repo if store
131 131 // is not present, refer checkrequirementscompat() for that
132 132 //
133 133 // However, if SHARESAFE_REQUIREMENT is not present, it means that the
134 134 // repository was shared the old way. We check the share source
135 135 // .hg/requires for SHARESAFE_REQUIREMENT to detect whether the
136 136 // current repository needs to be reshared
137 137 let share_safe = reqs.contains(requirements::SHARESAFE_REQUIREMENT);
138 138
139 139 let store_path;
140 140 if !shared {
141 141 store_path = dot_hg.join("store");
142 142 } else {
143 143 let bytes = hg_vfs.read("sharedpath")?;
144 144 let mut shared_path =
145 145 get_path_from_bytes(bytes.trim_end_matches(|b| b == b'\n'))
146 146 .to_owned();
147 147 if relative {
148 148 shared_path = dot_hg.join(shared_path)
149 149 }
150 150 if !is_dir(&shared_path)? {
151 151 return Err(HgError::corrupted(format!(
152 152 ".hg/sharedpath points to nonexistent directory {}",
153 153 shared_path.display()
154 154 ))
155 155 .into());
156 156 }
157 157
158 158 store_path = shared_path.join("store");
159 159
160 160 let source_is_share_safe =
161 161 requirements::load(Vfs { base: &shared_path })?
162 162 .contains(requirements::SHARESAFE_REQUIREMENT);
163 163
164 164 if share_safe != source_is_share_safe {
165 165 return Err(HgError::unsupported("share-safe mismatch").into());
166 166 }
167 167
168 168 if share_safe {
169 169 repo_config_files.insert(0, shared_path.join("hgrc"))
170 170 }
171 171 }
172 172 if share_safe {
173 173 reqs.extend(requirements::load(Vfs { base: &store_path })?);
174 174 }
175 175
176 176 let repo_config = if std::env::var_os("HGRCSKIPREPO").is_none() {
177 177 config.combine_with_repo(&repo_config_files)?
178 178 } else {
179 179 config.clone()
180 180 };
181 181
182 182 let repo = Self {
183 183 requirements: reqs,
184 184 working_directory,
185 185 store: store_path,
186 186 dot_hg,
187 187 config: repo_config,
188 188 dirstate_parents: LazyCell::new(),
189 189 dirstate_map: LazyCell::new(),
190 190 changelog: LazyCell::new(),
191 191 manifestlog: LazyCell::new(),
192 192 };
193 193
194 194 requirements::check(&repo)?;
195 195
196 196 Ok(repo)
197 197 }
198 198
199 199 pub fn working_directory_path(&self) -> &Path {
200 200 &self.working_directory
201 201 }
202 202
203 203 pub fn requirements(&self) -> &HashSet<String> {
204 204 &self.requirements
205 205 }
206 206
207 207 pub fn config(&self) -> &Config {
208 208 &self.config
209 209 }
210 210
211 211 /// For accessing repository files (in `.hg`), except for the store
212 212 /// (`.hg/store`).
213 213 pub fn hg_vfs(&self) -> Vfs<'_> {
214 214 Vfs { base: &self.dot_hg }
215 215 }
216 216
217 217 /// For accessing repository store files (in `.hg/store`)
218 218 pub fn store_vfs(&self) -> Vfs<'_> {
219 219 Vfs { base: &self.store }
220 220 }
221 221
222 222 /// For accessing the working copy
223 223 pub fn working_directory_vfs(&self) -> Vfs<'_> {
224 224 Vfs {
225 225 base: &self.working_directory,
226 226 }
227 227 }
228 228
229 229 pub fn try_with_wlock_no_wait<R>(
230 230 &self,
231 231 f: impl FnOnce() -> R,
232 232 ) -> Result<R, LockError> {
233 233 try_with_lock_no_wait(self.hg_vfs(), "wlock", f)
234 234 }
235 235
236 236 pub fn has_dirstate_v2(&self) -> bool {
237 237 self.requirements
238 238 .contains(requirements::DIRSTATE_V2_REQUIREMENT)
239 239 }
240 240
241 241 pub fn has_sparse(&self) -> bool {
242 242 self.requirements.contains(requirements::SPARSE_REQUIREMENT)
243 243 }
244 244
245 245 pub fn has_narrow(&self) -> bool {
246 246 self.requirements.contains(requirements::NARROW_REQUIREMENT)
247 247 }
248 248
249 249 pub fn has_nodemap(&self) -> bool {
250 250 self.requirements
251 251 .contains(requirements::NODEMAP_REQUIREMENT)
252 252 }
253 253
254 254 fn dirstate_file_contents(&self) -> Result<Vec<u8>, HgError> {
255 255 Ok(self
256 256 .hg_vfs()
257 257 .read("dirstate")
258 258 .io_not_found_as_none()?
259 259 .unwrap_or(Vec::new()))
260 260 }
261 261
262 fn dirstate_identity(&self) -> Result<Option<u64>, HgError> {
263 use std::os::unix::fs::MetadataExt;
264 Ok(self
265 .hg_vfs()
266 .symlink_metadata("dirstate")
267 .io_not_found_as_none()?
268 .map(|meta| meta.ino()))
269 }
270
262 271 pub fn dirstate_parents(&self) -> Result<DirstateParents, HgError> {
263 272 Ok(*self
264 273 .dirstate_parents
265 274 .get_or_init(|| self.read_dirstate_parents())?)
266 275 }
267 276
268 277 fn read_dirstate_parents(&self) -> Result<DirstateParents, HgError> {
269 278 let dirstate = self.dirstate_file_contents()?;
270 279 let parents = if dirstate.is_empty() {
271 280 DirstateParents::NULL
272 281 } else if self.has_dirstate_v2() {
273 282 let docket =
274 283 crate::dirstate_tree::on_disk::read_docket(&dirstate)?;
275 284 docket.parents()
276 285 } else {
277 286 crate::dirstate::parsers::parse_dirstate_parents(&dirstate)?
278 287 .clone()
279 288 };
280 289 self.dirstate_parents.set(parents);
281 290 Ok(parents)
282 291 }
283 292
284 293 /// Returns the information read from the dirstate docket necessary to
285 294 /// check if the data file has been updated/deleted by another process
286 295 /// since we last read the dirstate.
287 /// Namely, the data file uuid and the data size.
296 /// Namely, the inode, data file uuid and the data size.
288 297 fn get_dirstate_data_file_integrity(
289 298 &self,
290 ) -> Result<(Option<Vec<u8>>, usize), HgError> {
299 ) -> Result<(Option<u64>, Option<Vec<u8>>, usize), HgError> {
291 300 assert!(
292 301 self.has_dirstate_v2(),
293 302 "accessing dirstate data file ID without dirstate-v2"
294 303 );
304 // Get the identity before the contents since we could have a race
305 // between the two. Having an identity that is too old is fine, but
306 // one that is younger than the content change is bad.
307 let identity = self.dirstate_identity()?;
295 308 let dirstate = self.dirstate_file_contents()?;
296 309 if dirstate.is_empty() {
297 310 self.dirstate_parents.set(DirstateParents::NULL);
298 Ok((None, 0))
311 Ok((identity, None, 0))
299 312 } else {
300 313 let docket =
301 314 crate::dirstate_tree::on_disk::read_docket(&dirstate)?;
302 315 self.dirstate_parents.set(docket.parents());
303 Ok((Some(docket.uuid.to_owned()), docket.data_size()))
316 Ok((identity, Some(docket.uuid.to_owned()), docket.data_size()))
304 317 }
305 318 }
306 319
307 320 fn new_dirstate_map(&self) -> Result<OwningDirstateMap, DirstateError> {
308 321 if self.has_dirstate_v2() {
309 322 // The v2 dirstate is split into a docket and a data file.
310 323 // Since we don't always take the `wlock` to read it
311 324 // (like in `hg status`), it is susceptible to races.
312 325 // A simple retry method should be enough since full rewrites
313 326 // only happen when too much garbage data is present and
314 327 // this race is unlikely.
315 328 let mut tries = 0;
316 329
317 330 while tries < V2_MAX_READ_ATTEMPTS {
318 331 tries += 1;
319 332 match self.read_docket_and_data_file() {
320 333 Ok(m) => {
321 334 return Ok(m);
322 335 }
323 336 Err(e) => match e {
324 337 DirstateError::Common(HgError::RaceDetected(
325 338 context,
326 339 )) => {
327 340 log::info!(
328 341 "dirstate read race detected {} (retry {}/{})",
329 342 context,
330 343 tries,
331 344 V2_MAX_READ_ATTEMPTS,
332 345 );
333 346 continue;
334 347 }
335 348 _ => return Err(e.into()),
336 349 },
337 350 }
338 351 }
339 352 let error = HgError::abort(
340 353 format!("dirstate read race happened {tries} times in a row"),
341 354 255,
342 355 None,
343 356 );
344 357 return Err(DirstateError::Common(error));
345 358 } else {
346 359 debug_wait_for_file_or_print(
347 360 self.config(),
348 361 "dirstate.pre-read-file",
349 362 );
363 let identity = self.dirstate_identity()?;
350 364 let dirstate_file_contents = self.dirstate_file_contents()?;
351 365 return if dirstate_file_contents.is_empty() {
352 366 self.dirstate_parents.set(DirstateParents::NULL);
353 367 Ok(OwningDirstateMap::new_empty(Vec::new()))
354 368 } else {
355 let (map, parents) =
356 OwningDirstateMap::new_v1(dirstate_file_contents)?;
369 let (map, parents) = OwningDirstateMap::new_v1(
370 dirstate_file_contents,
371 identity,
372 )?;
357 373 self.dirstate_parents.set(parents);
358 374 Ok(map)
359 375 };
360 376 }
361 377 }
362 378
363 379 fn read_docket_and_data_file(
364 380 &self,
365 381 ) -> Result<OwningDirstateMap, DirstateError> {
366 382 debug_wait_for_file_or_print(self.config(), "dirstate.pre-read-file");
367 383 let dirstate_file_contents = self.dirstate_file_contents()?;
384 let identity = self.dirstate_identity()?;
368 385 if dirstate_file_contents.is_empty() {
369 386 self.dirstate_parents.set(DirstateParents::NULL);
370 387 return Ok(OwningDirstateMap::new_empty(Vec::new()));
371 388 }
372 389 let docket = crate::dirstate_tree::on_disk::read_docket(
373 390 &dirstate_file_contents,
374 391 )?;
375 392 debug_wait_for_file_or_print(
376 393 self.config(),
377 394 "dirstate.post-docket-read-file",
378 395 );
379 396 self.dirstate_parents.set(docket.parents());
380 397 let uuid = docket.uuid.to_owned();
381 398 let data_size = docket.data_size();
382 399
383 400 let context = "between reading dirstate docket and data file";
384 401 let race_error = HgError::RaceDetected(context.into());
385 402 let metadata = docket.tree_metadata();
386 403
387 404 let mut map = if crate::vfs::is_on_nfs_mount(docket.data_filename()) {
388 405 // Don't mmap on NFS to prevent `SIGBUS` error on deletion
389 406 let contents = self.hg_vfs().read(docket.data_filename());
390 407 let contents = match contents {
391 408 Ok(c) => c,
392 409 Err(HgError::IoError { error, context }) => {
393 410 match error.raw_os_error().expect("real os error") {
394 411 // 2 = ENOENT, No such file or directory
395 412 // 116 = ESTALE, Stale NFS file handle
396 413 //
397 414 // TODO match on `error.kind()` when
398 415 // `ErrorKind::StaleNetworkFileHandle` is stable.
399 416 2 | 116 => {
400 417 // Race where the data file was deleted right after
401 418 // we read the docket, try again
402 419 return Err(race_error.into());
403 420 }
404 421 _ => {
405 422 return Err(
406 423 HgError::IoError { error, context }.into()
407 424 )
408 425 }
409 426 }
410 427 }
411 428 Err(e) => return Err(e.into()),
412 429 };
413 OwningDirstateMap::new_v2(contents, data_size, metadata, uuid)
430 OwningDirstateMap::new_v2(
431 contents, data_size, metadata, uuid, identity,
432 )
414 433 } else {
415 434 match self
416 435 .hg_vfs()
417 436 .mmap_open(docket.data_filename())
418 437 .io_not_found_as_none()
419 438 {
420 439 Ok(Some(data_mmap)) => OwningDirstateMap::new_v2(
421 data_mmap, data_size, metadata, uuid,
440 data_mmap, data_size, metadata, uuid, identity,
422 441 ),
423 442 Ok(None) => {
424 443 // Race where the data file was deleted right after we
425 444 // read the docket, try again
426 445 return Err(race_error.into());
427 446 }
428 447 Err(e) => return Err(e.into()),
429 448 }
430 449 }?;
431 450
432 451 let write_mode_config = self
433 452 .config()
434 453 .get_str(b"devel", b"dirstate.v2.data_update_mode")
435 454 .unwrap_or(Some("auto"))
436 455 .unwrap_or("auto"); // don't bother for devel options
437 456 let write_mode = match write_mode_config {
438 457 "auto" => DirstateMapWriteMode::Auto,
439 458 "force-new" => DirstateMapWriteMode::ForceNewDataFile,
440 459 "force-append" => DirstateMapWriteMode::ForceAppend,
441 460 _ => DirstateMapWriteMode::Auto,
442 461 };
443 462
444 463 map.with_dmap_mut(|m| m.set_write_mode(write_mode));
445 464
446 465 Ok(map)
447 466 }
448 467
449 468 pub fn dirstate_map(
450 469 &self,
451 470 ) -> Result<Ref<OwningDirstateMap>, DirstateError> {
452 471 self.dirstate_map.get_or_init(|| self.new_dirstate_map())
453 472 }
454 473
455 474 pub fn dirstate_map_mut(
456 475 &self,
457 476 ) -> Result<RefMut<OwningDirstateMap>, DirstateError> {
458 477 self.dirstate_map
459 478 .get_mut_or_init(|| self.new_dirstate_map())
460 479 }
461 480
462 481 fn new_changelog(&self) -> Result<Changelog, HgError> {
463 482 Changelog::open(&self.store_vfs(), self.has_nodemap())
464 483 }
465 484
466 485 pub fn changelog(&self) -> Result<Ref<Changelog>, HgError> {
467 486 self.changelog.get_or_init(|| self.new_changelog())
468 487 }
469 488
470 489 pub fn changelog_mut(&self) -> Result<RefMut<Changelog>, HgError> {
471 490 self.changelog.get_mut_or_init(|| self.new_changelog())
472 491 }
473 492
474 493 fn new_manifestlog(&self) -> Result<Manifestlog, HgError> {
475 494 Manifestlog::open(&self.store_vfs(), self.has_nodemap())
476 495 }
477 496
478 497 pub fn manifestlog(&self) -> Result<Ref<Manifestlog>, HgError> {
479 498 self.manifestlog.get_or_init(|| self.new_manifestlog())
480 499 }
481 500
482 501 pub fn manifestlog_mut(&self) -> Result<RefMut<Manifestlog>, HgError> {
483 502 self.manifestlog.get_mut_or_init(|| self.new_manifestlog())
484 503 }
485 504
486 505 /// Returns the manifest of the *changeset* with the given node ID
487 506 pub fn manifest_for_node(
488 507 &self,
489 508 node: impl Into<NodePrefix>,
490 509 ) -> Result<Manifest, RevlogError> {
491 510 self.manifestlog()?.data_for_node(
492 511 self.changelog()?
493 512 .data_for_node(node.into())?
494 513 .manifest_node()?
495 514 .into(),
496 515 )
497 516 }
498 517
499 518 /// Returns the manifest of the *changeset* with the given revision number
500 519 pub fn manifest_for_rev(
501 520 &self,
502 521 revision: Revision,
503 522 ) -> Result<Manifest, RevlogError> {
504 523 self.manifestlog()?.data_for_node(
505 524 self.changelog()?
506 525 .data_for_rev(revision)?
507 526 .manifest_node()?
508 527 .into(),
509 528 )
510 529 }
511 530
512 531 pub fn has_subrepos(&self) -> Result<bool, DirstateError> {
513 532 if let Some(entry) = self.dirstate_map()?.get(HgPath::new(".hgsub"))? {
514 533 Ok(entry.tracked())
515 534 } else {
516 535 Ok(false)
517 536 }
518 537 }
519 538
520 539 pub fn filelog(&self, path: &HgPath) -> Result<Filelog, HgError> {
521 540 Filelog::open(self, path)
522 541 }
523 542
524 543 /// Write to disk any updates that were made through `dirstate_map_mut`.
525 544 ///
526 545 /// The "wlock" must be held while calling this.
527 546 /// See for example `try_with_wlock_no_wait`.
528 547 ///
529 548 /// TODO: have a `WritableRepo` type only accessible while holding the
530 549 /// lock?
531 550 pub fn write_dirstate(&self) -> Result<(), DirstateError> {
532 551 let map = self.dirstate_map()?;
533 552 // TODO: Maintain a `DirstateMap::dirty` flag, and return early here if
534 553 // it’s unset
535 554 let parents = self.dirstate_parents()?;
536 555 let (packed_dirstate, old_uuid_to_remove) = if self.has_dirstate_v2() {
537 let (uuid, data_size) = self.get_dirstate_data_file_integrity()?;
556 let (identity, uuid, data_size) =
557 self.get_dirstate_data_file_integrity()?;
558 let identity_changed = identity != map.old_identity();
538 559 let uuid_changed = uuid.as_deref() != map.old_uuid();
539 560 let data_length_changed = data_size != map.old_data_size();
540 561
541 if uuid_changed || data_length_changed {
542 // If uuid or length changed since last disk read, don't write.
562 if identity_changed || uuid_changed || data_length_changed {
563 // If any of identity, uuid or length have changed since
564 // last disk read, don't write.
543 565 // This is fine because either we're in a command that doesn't
544 566 // write anything too important (like `hg status`), or we're in
545 567 // `hg add` and we're supposed to have taken the lock before
546 568 // reading anyway.
547 569 //
548 570 // TODO complain loudly if we've changed anything important
549 571 // without taking the lock.
550 572 // (see `hg help config.format.use-dirstate-tracked-hint`)
551 573 log::debug!(
552 574 "dirstate has changed since last read, not updating."
553 575 );
554 576 return Ok(());
555 577 }
556 578
557 579 let uuid_opt = map.old_uuid();
558 580 let write_mode = if uuid_opt.is_some() {
559 581 DirstateMapWriteMode::Auto
560 582 } else {
561 583 DirstateMapWriteMode::ForceNewDataFile
562 584 };
563 585 let (data, tree_metadata, append, old_data_size) =
564 586 map.pack_v2(write_mode)?;
565 587
566 588 // Reuse the uuid, or generate a new one, keeping the old for
567 589 // deletion.
568 590 let (uuid, old_uuid) = match uuid_opt {
569 591 Some(uuid) => {
570 592 let as_str = std::str::from_utf8(uuid)
571 593 .map_err(|_| {
572 594 HgError::corrupted(
573 595 "non-UTF-8 dirstate data file ID",
574 596 )
575 597 })?
576 598 .to_owned();
577 599 if append {
578 600 (as_str, None)
579 601 } else {
580 602 (DirstateDocket::new_uid(), Some(as_str))
581 603 }
582 604 }
583 605 None => (DirstateDocket::new_uid(), None),
584 606 };
585 607
586 608 let data_filename = format!("dirstate.{}", uuid);
587 609 let data_filename = self.hg_vfs().join(data_filename);
588 610 let mut options = std::fs::OpenOptions::new();
589 611 options.write(true);
590 612
591 613 // Why are we not using the O_APPEND flag when appending?
592 614 //
593 615 // - O_APPEND makes it trickier to deal with garbage at the end of
594 616 // the file, left by a previous uncommitted transaction. By
595 617 // starting the write at [old_data_size] we make sure we erase
596 618 // all such garbage.
597 619 //
598 620 // - O_APPEND requires to special-case 0-byte writes, whereas we
599 621 // don't need that.
600 622 //
601 623 // - Some OSes have bugs in implementation O_APPEND:
602 624 // revlog.py talks about a Solaris bug, but we also saw some ZFS
603 625 // bug: https://github.com/openzfs/zfs/pull/3124,
604 626 // https://github.com/openzfs/zfs/issues/13370
605 627 //
606 628 if !append {
607 629 log::trace!("creating a new dirstate data file");
608 630 options.create_new(true);
609 631 } else {
610 632 log::trace!("appending to the dirstate data file");
611 633 }
612 634
613 635 let data_size = (|| {
614 636 // TODO: loop and try another random ID if !append and this
615 637 // returns `ErrorKind::AlreadyExists`? Collision chance of two
616 638 // random IDs is one in 2**32
617 639 let mut file = options.open(&data_filename)?;
618 640 if append {
619 641 file.seek(SeekFrom::Start(old_data_size as u64))?;
620 642 }
621 643 file.write_all(&data)?;
622 644 file.flush()?;
623 645 file.seek(SeekFrom::Current(0))
624 646 })()
625 647 .when_writing_file(&data_filename)?;
626 648
627 649 let packed_dirstate = DirstateDocket::serialize(
628 650 parents,
629 651 tree_metadata,
630 652 data_size,
631 653 uuid.as_bytes(),
632 654 )
633 655 .map_err(|_: std::num::TryFromIntError| {
634 656 HgError::corrupted("overflow in dirstate docket serialization")
635 657 })?;
636 658
637 659 (packed_dirstate, old_uuid)
638 660 } else {
661 let identity = self.dirstate_identity()?;
662 if identity != map.old_identity() {
663 // If identity changed since last disk read, don't write.
664 // This is fine because either we're in a command that doesn't
665 // write anything too important (like `hg status`), or we're in
666 // `hg add` and we're supposed to have taken the lock before
667 // reading anyway.
668 //
669 // TODO complain loudly if we've changed anything important
670 // without taking the lock.
671 // (see `hg help config.format.use-dirstate-tracked-hint`)
672 log::debug!(
673 "dirstate has changed since last read, not updating."
674 );
675 return Ok(());
676 }
639 677 (map.pack_v1(parents)?, None)
640 678 };
641 679
642 680 let vfs = self.hg_vfs();
643 681 vfs.atomic_write("dirstate", &packed_dirstate)?;
644 682 if let Some(uuid) = old_uuid_to_remove {
645 683 // Remove the old data file after the new docket pointing to the
646 684 // new data file was written.
647 685 vfs.remove_file(format!("dirstate.{}", uuid))?;
648 686 }
649 687 Ok(())
650 688 }
651 689 }
652 690
653 691 /// Lazily-initialized component of `Repo` with interior mutability
654 692 ///
655 693 /// This differs from `OnceCell` in that the value can still be "deinitialized"
656 694 /// later by setting its inner `Option` to `None`. It also takes the
657 695 /// initialization function as an argument when the value is requested, not
658 696 /// when the instance is created.
659 697 struct LazyCell<T> {
660 698 value: RefCell<Option<T>>,
661 699 }
662 700
663 701 impl<T> LazyCell<T> {
664 702 fn new() -> Self {
665 703 Self {
666 704 value: RefCell::new(None),
667 705 }
668 706 }
669 707
670 708 fn set(&self, value: T) {
671 709 *self.value.borrow_mut() = Some(value)
672 710 }
673 711
674 712 fn get_or_init<E>(
675 713 &self,
676 714 init: impl Fn() -> Result<T, E>,
677 715 ) -> Result<Ref<T>, E> {
678 716 let mut borrowed = self.value.borrow();
679 717 if borrowed.is_none() {
680 718 drop(borrowed);
681 719 // Only use `borrow_mut` if it is really needed to avoid panic in
682 720 // case there is another outstanding borrow but mutation is not
683 721 // needed.
684 722 *self.value.borrow_mut() = Some(init()?);
685 723 borrowed = self.value.borrow()
686 724 }
687 725 Ok(Ref::map(borrowed, |option| option.as_ref().unwrap()))
688 726 }
689 727
690 728 fn get_mut_or_init<E>(
691 729 &self,
692 730 init: impl Fn() -> Result<T, E>,
693 731 ) -> Result<RefMut<T>, E> {
694 732 let mut borrowed = self.value.borrow_mut();
695 733 if borrowed.is_none() {
696 734 *borrowed = Some(init()?);
697 735 }
698 736 Ok(RefMut::map(borrowed, |option| option.as_mut().unwrap()))
699 737 }
700 738 }
@@ -1,561 +1,567 b''
1 1 // dirstate_map.rs
2 2 //
3 3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Bindings for the `hg::dirstate::dirstate_map` file provided by the
9 9 //! `hg-core` package.
10 10
11 11 use std::cell::{RefCell, RefMut};
12 12 use std::convert::TryInto;
13 13
14 14 use cpython::{
15 15 exc, PyBool, PyBytes, PyClone, PyDict, PyErr, PyList, PyNone, PyObject,
16 16 PyResult, Python, PythonObject, ToPyObject, UnsafePyLeaked,
17 17 };
18 18 use hg::dirstate::{ParentFileData, TruncatedTimestamp};
19 19
20 20 use crate::{
21 21 dirstate::copymap::{CopyMap, CopyMapItemsIterator, CopyMapKeysIterator},
22 22 dirstate::item::DirstateItem,
23 23 pybytes_deref::PyBytesDeref,
24 24 };
25 25 use hg::{
26 26 dirstate::StateMapIter, dirstate_tree::dirstate_map::DirstateMapWriteMode,
27 27 dirstate_tree::on_disk::DirstateV2ParseError,
28 28 dirstate_tree::owning::OwningDirstateMap, revlog::Node,
29 29 utils::files::normalize_case, utils::hg_path::HgPath, DirstateEntry,
30 30 DirstateError, DirstateParents,
31 31 };
32 32
33 33 // TODO
34 34 // This object needs to share references to multiple members of its Rust
35 35 // inner struct, namely `copy_map`, `dirs` and `all_dirs`.
36 36 // Right now `CopyMap` is done, but it needs to have an explicit reference
37 37 // to `RustDirstateMap` which itself needs to have an encapsulation for
38 38 // every method in `CopyMap` (copymapcopy, etc.).
39 39 // This is ugly and hard to maintain.
40 40 // The same logic applies to `dirs` and `all_dirs`, however the `Dirs`
41 41 // `py_class!` is already implemented and does not mention
42 42 // `RustDirstateMap`, rightfully so.
43 43 // All attributes also have to have a separate refcount data attribute for
44 44 // leaks, with all methods that go along for reference sharing.
45 45 py_class!(pub class DirstateMap |py| {
46 46 @shared data inner: OwningDirstateMap;
47 47
48 48 /// Returns a `(dirstate_map, parents)` tuple
49 49 @staticmethod
50 50 def new_v1(
51 51 on_disk: PyBytes,
52 identity: Option<u64>,
52 53 ) -> PyResult<PyObject> {
53 54 let on_disk = PyBytesDeref::new(py, on_disk);
54 let (map, parents) = OwningDirstateMap::new_v1(on_disk)
55 let (map, parents) = OwningDirstateMap::new_v1(on_disk, identity)
55 56 .map_err(|e| dirstate_error(py, e))?;
56 57 let map = Self::create_instance(py, map)?;
57 58 let p1 = PyBytes::new(py, parents.p1.as_bytes());
58 59 let p2 = PyBytes::new(py, parents.p2.as_bytes());
59 60 let parents = (p1, p2);
60 61 Ok((map, parents).to_py_object(py).into_object())
61 62 }
62 63
63 64 /// Returns a DirstateMap
64 65 @staticmethod
65 66 def new_v2(
66 67 on_disk: PyBytes,
67 68 data_size: usize,
68 69 tree_metadata: PyBytes,
69 70 uuid: PyBytes,
71 identity: Option<u64>,
70 72 ) -> PyResult<PyObject> {
71 73 let dirstate_error = |e: DirstateError| {
72 74 PyErr::new::<exc::OSError, _>(py, format!("Dirstate error: {:?}", e))
73 75 };
74 76 let on_disk = PyBytesDeref::new(py, on_disk);
75 77 let uuid = uuid.data(py);
76 78 let map = OwningDirstateMap::new_v2(
77 on_disk, data_size, tree_metadata.data(py), uuid.to_owned(),
79 on_disk,
80 data_size,
81 tree_metadata.data(py),
82 uuid.to_owned(),
83 identity,
78 84 ).map_err(dirstate_error)?;
79 85 let map = Self::create_instance(py, map)?;
80 86 Ok(map.into_object())
81 87 }
82 88
83 89 /// Returns an empty DirstateMap. Only used for a new dirstate.
84 90 @staticmethod
85 91 def new_empty() -> PyResult<PyObject> {
86 92 let map = OwningDirstateMap::new_empty(vec![]);
87 93 let map = Self::create_instance(py, map)?;
88 94 Ok(map.into_object())
89 95 }
90 96
91 97 def clear(&self) -> PyResult<PyObject> {
92 98 self.inner(py).borrow_mut().clear();
93 99 Ok(py.None())
94 100 }
95 101
96 102 def get(
97 103 &self,
98 104 key: PyObject,
99 105 default: Option<PyObject> = None
100 106 ) -> PyResult<Option<PyObject>> {
101 107 let key = key.extract::<PyBytes>(py)?;
102 108 match self
103 109 .inner(py)
104 110 .borrow()
105 111 .get(HgPath::new(key.data(py)))
106 112 .map_err(|e| v2_error(py, e))?
107 113 {
108 114 Some(entry) => {
109 115 Ok(Some(DirstateItem::new_as_pyobject(py, entry)?))
110 116 },
111 117 None => Ok(default)
112 118 }
113 119 }
114 120
115 121 def set_tracked(&self, f: PyObject) -> PyResult<PyBool> {
116 122 let bytes = f.extract::<PyBytes>(py)?;
117 123 let path = HgPath::new(bytes.data(py));
118 124 let res = self.inner(py).borrow_mut().set_tracked(path);
119 125 let was_tracked = res.or_else(|_| {
120 126 Err(PyErr::new::<exc::OSError, _>(py, "Dirstate error".to_string()))
121 127 })?;
122 128 Ok(was_tracked.to_py_object(py))
123 129 }
124 130
125 131 def set_untracked(&self, f: PyObject) -> PyResult<PyBool> {
126 132 let bytes = f.extract::<PyBytes>(py)?;
127 133 let path = HgPath::new(bytes.data(py));
128 134 let res = self.inner(py).borrow_mut().set_untracked(path);
129 135 let was_tracked = res.or_else(|_| {
130 136 Err(PyErr::new::<exc::OSError, _>(py, "Dirstate error".to_string()))
131 137 })?;
132 138 Ok(was_tracked.to_py_object(py))
133 139 }
134 140
135 141 def set_clean(
136 142 &self,
137 143 f: PyObject,
138 144 mode: u32,
139 145 size: u32,
140 146 mtime: (i64, u32, bool)
141 147 ) -> PyResult<PyNone> {
142 148 let (mtime_s, mtime_ns, second_ambiguous) = mtime;
143 149 let timestamp = TruncatedTimestamp::new_truncate(
144 150 mtime_s, mtime_ns, second_ambiguous
145 151 );
146 152 let bytes = f.extract::<PyBytes>(py)?;
147 153 let path = HgPath::new(bytes.data(py));
148 154 let res = self.inner(py).borrow_mut().set_clean(
149 155 path, mode, size, timestamp,
150 156 );
151 157 res.or_else(|_| {
152 158 Err(PyErr::new::<exc::OSError, _>(py, "Dirstate error".to_string()))
153 159 })?;
154 160 Ok(PyNone)
155 161 }
156 162
157 163 def set_possibly_dirty(&self, f: PyObject) -> PyResult<PyNone> {
158 164 let bytes = f.extract::<PyBytes>(py)?;
159 165 let path = HgPath::new(bytes.data(py));
160 166 let res = self.inner(py).borrow_mut().set_possibly_dirty(path);
161 167 res.or_else(|_| {
162 168 Err(PyErr::new::<exc::OSError, _>(py, "Dirstate error".to_string()))
163 169 })?;
164 170 Ok(PyNone)
165 171 }
166 172
167 173 def reset_state(
168 174 &self,
169 175 f: PyObject,
170 176 wc_tracked: bool,
171 177 p1_tracked: bool,
172 178 p2_info: bool,
173 179 has_meaningful_mtime: bool,
174 180 parentfiledata: Option<(u32, u32, Option<(i64, u32, bool)>)>,
175 181 ) -> PyResult<PyNone> {
176 182 let mut has_meaningful_mtime = has_meaningful_mtime;
177 183 let parent_file_data = match parentfiledata {
178 184 None => {
179 185 has_meaningful_mtime = false;
180 186 None
181 187 },
182 188 Some(data) => {
183 189 let (mode, size, mtime_info) = data;
184 190 let mtime = if let Some(mtime_info) = mtime_info {
185 191 let (mtime_s, mtime_ns, second_ambiguous) = mtime_info;
186 192 let timestamp = TruncatedTimestamp::new_truncate(
187 193 mtime_s, mtime_ns, second_ambiguous
188 194 );
189 195 Some(timestamp)
190 196 } else {
191 197 has_meaningful_mtime = false;
192 198 None
193 199 };
194 200 Some(ParentFileData {
195 201 mode_size: Some((mode, size)),
196 202 mtime,
197 203 })
198 204 }
199 205 };
200 206 let bytes = f.extract::<PyBytes>(py)?;
201 207 let path = HgPath::new(bytes.data(py));
202 208 let res = self.inner(py).borrow_mut().reset_state(
203 209 path,
204 210 wc_tracked,
205 211 p1_tracked,
206 212 p2_info,
207 213 has_meaningful_mtime,
208 214 parent_file_data,
209 215 );
210 216 res.or_else(|_| {
211 217 Err(PyErr::new::<exc::OSError, _>(py, "Dirstate error".to_string()))
212 218 })?;
213 219 Ok(PyNone)
214 220 }
215 221
216 222 def hastrackeddir(&self, d: PyObject) -> PyResult<PyBool> {
217 223 let d = d.extract::<PyBytes>(py)?;
218 224 Ok(self.inner(py).borrow_mut()
219 225 .has_tracked_dir(HgPath::new(d.data(py)))
220 226 .map_err(|e| {
221 227 PyErr::new::<exc::ValueError, _>(py, e.to_string())
222 228 })?
223 229 .to_py_object(py))
224 230 }
225 231
226 232 def hasdir(&self, d: PyObject) -> PyResult<PyBool> {
227 233 let d = d.extract::<PyBytes>(py)?;
228 234 Ok(self.inner(py).borrow_mut()
229 235 .has_dir(HgPath::new(d.data(py)))
230 236 .map_err(|e| {
231 237 PyErr::new::<exc::ValueError, _>(py, e.to_string())
232 238 })?
233 239 .to_py_object(py))
234 240 }
235 241
236 242 def write_v1(
237 243 &self,
238 244 p1: PyObject,
239 245 p2: PyObject,
240 246 ) -> PyResult<PyBytes> {
241 247 let inner = self.inner(py).borrow();
242 248 let parents = DirstateParents {
243 249 p1: extract_node_id(py, &p1)?,
244 250 p2: extract_node_id(py, &p2)?,
245 251 };
246 252 let result = inner.pack_v1(parents);
247 253 match result {
248 254 Ok(packed) => Ok(PyBytes::new(py, &packed)),
249 255 Err(_) => Err(PyErr::new::<exc::OSError, _>(
250 256 py,
251 257 "Dirstate error".to_string(),
252 258 )),
253 259 }
254 260 }
255 261
256 262 /// Returns new data together with whether that data should be appended to
257 263 /// the existing data file whose content is at `self.on_disk` (True),
258 264 /// instead of written to a new data file (False).
259 265 def write_v2(
260 266 &self,
261 267 write_mode: usize,
262 268 ) -> PyResult<PyObject> {
263 269 let inner = self.inner(py).borrow();
264 270 let rust_write_mode = match write_mode {
265 271 0 => DirstateMapWriteMode::Auto,
266 272 1 => DirstateMapWriteMode::ForceNewDataFile,
267 273 2 => DirstateMapWriteMode::ForceAppend,
268 274 _ => DirstateMapWriteMode::Auto, // XXX should we error out?
269 275 };
270 276 let result = inner.pack_v2(rust_write_mode);
271 277 match result {
272 278 Ok((packed, tree_metadata, append, _old_data_size)) => {
273 279 let packed = PyBytes::new(py, &packed);
274 280 let tree_metadata = PyBytes::new(py, tree_metadata.as_bytes());
275 281 let tuple = (packed, tree_metadata, append);
276 282 Ok(tuple.to_py_object(py).into_object())
277 283 },
278 284 Err(_) => Err(PyErr::new::<exc::OSError, _>(
279 285 py,
280 286 "Dirstate error".to_string(),
281 287 )),
282 288 }
283 289 }
284 290
285 291 def filefoldmapasdict(&self) -> PyResult<PyDict> {
286 292 let dict = PyDict::new(py);
287 293 for item in self.inner(py).borrow_mut().iter() {
288 294 let (path, entry) = item.map_err(|e| v2_error(py, e))?;
289 295 if !entry.removed() {
290 296 let key = normalize_case(path);
291 297 let value = path;
292 298 dict.set_item(
293 299 py,
294 300 PyBytes::new(py, key.as_bytes()).into_object(),
295 301 PyBytes::new(py, value.as_bytes()).into_object(),
296 302 )?;
297 303 }
298 304 }
299 305 Ok(dict)
300 306 }
301 307
302 308 def __len__(&self) -> PyResult<usize> {
303 309 Ok(self.inner(py).borrow().len())
304 310 }
305 311
306 312 def __contains__(&self, key: PyObject) -> PyResult<bool> {
307 313 let key = key.extract::<PyBytes>(py)?;
308 314 self.inner(py)
309 315 .borrow()
310 316 .contains_key(HgPath::new(key.data(py)))
311 317 .map_err(|e| v2_error(py, e))
312 318 }
313 319
314 320 def __getitem__(&self, key: PyObject) -> PyResult<PyObject> {
315 321 let key = key.extract::<PyBytes>(py)?;
316 322 let key = HgPath::new(key.data(py));
317 323 match self
318 324 .inner(py)
319 325 .borrow()
320 326 .get(key)
321 327 .map_err(|e| v2_error(py, e))?
322 328 {
323 329 Some(entry) => {
324 330 Ok(DirstateItem::new_as_pyobject(py, entry)?)
325 331 },
326 332 None => Err(PyErr::new::<exc::KeyError, _>(
327 333 py,
328 334 String::from_utf8_lossy(key.as_bytes()),
329 335 )),
330 336 }
331 337 }
332 338
333 339 def keys(&self) -> PyResult<DirstateMapKeysIterator> {
334 340 let leaked_ref = self.inner(py).leak_immutable();
335 341 DirstateMapKeysIterator::from_inner(
336 342 py,
337 343 unsafe { leaked_ref.map(py, |o| o.iter()) },
338 344 )
339 345 }
340 346
341 347 def items(&self) -> PyResult<DirstateMapItemsIterator> {
342 348 let leaked_ref = self.inner(py).leak_immutable();
343 349 DirstateMapItemsIterator::from_inner(
344 350 py,
345 351 unsafe { leaked_ref.map(py, |o| o.iter()) },
346 352 )
347 353 }
348 354
349 355 def __iter__(&self) -> PyResult<DirstateMapKeysIterator> {
350 356 let leaked_ref = self.inner(py).leak_immutable();
351 357 DirstateMapKeysIterator::from_inner(
352 358 py,
353 359 unsafe { leaked_ref.map(py, |o| o.iter()) },
354 360 )
355 361 }
356 362
357 363 // TODO all copymap* methods, see docstring above
358 364 def copymapcopy(&self) -> PyResult<PyDict> {
359 365 let dict = PyDict::new(py);
360 366 for item in self.inner(py).borrow().copy_map_iter() {
361 367 let (key, value) = item.map_err(|e| v2_error(py, e))?;
362 368 dict.set_item(
363 369 py,
364 370 PyBytes::new(py, key.as_bytes()),
365 371 PyBytes::new(py, value.as_bytes()),
366 372 )?;
367 373 }
368 374 Ok(dict)
369 375 }
370 376
371 377 def copymapgetitem(&self, key: PyObject) -> PyResult<PyBytes> {
372 378 let key = key.extract::<PyBytes>(py)?;
373 379 match self
374 380 .inner(py)
375 381 .borrow()
376 382 .copy_map_get(HgPath::new(key.data(py)))
377 383 .map_err(|e| v2_error(py, e))?
378 384 {
379 385 Some(copy) => Ok(PyBytes::new(py, copy.as_bytes())),
380 386 None => Err(PyErr::new::<exc::KeyError, _>(
381 387 py,
382 388 String::from_utf8_lossy(key.data(py)),
383 389 )),
384 390 }
385 391 }
386 392 def copymap(&self) -> PyResult<CopyMap> {
387 393 CopyMap::from_inner(py, self.clone_ref(py))
388 394 }
389 395
390 396 def copymaplen(&self) -> PyResult<usize> {
391 397 Ok(self.inner(py).borrow().copy_map_len())
392 398 }
393 399 def copymapcontains(&self, key: PyObject) -> PyResult<bool> {
394 400 let key = key.extract::<PyBytes>(py)?;
395 401 self.inner(py)
396 402 .borrow()
397 403 .copy_map_contains_key(HgPath::new(key.data(py)))
398 404 .map_err(|e| v2_error(py, e))
399 405 }
400 406 def copymapget(
401 407 &self,
402 408 key: PyObject,
403 409 default: Option<PyObject>
404 410 ) -> PyResult<Option<PyObject>> {
405 411 let key = key.extract::<PyBytes>(py)?;
406 412 match self
407 413 .inner(py)
408 414 .borrow()
409 415 .copy_map_get(HgPath::new(key.data(py)))
410 416 .map_err(|e| v2_error(py, e))?
411 417 {
412 418 Some(copy) => Ok(Some(
413 419 PyBytes::new(py, copy.as_bytes()).into_object(),
414 420 )),
415 421 None => Ok(default),
416 422 }
417 423 }
418 424 def copymapsetitem(
419 425 &self,
420 426 key: PyObject,
421 427 value: PyObject
422 428 ) -> PyResult<PyObject> {
423 429 let key = key.extract::<PyBytes>(py)?;
424 430 let value = value.extract::<PyBytes>(py)?;
425 431 self.inner(py)
426 432 .borrow_mut()
427 433 .copy_map_insert(
428 434 HgPath::new(key.data(py)),
429 435 HgPath::new(value.data(py)),
430 436 )
431 437 .map_err(|e| v2_error(py, e))?;
432 438 Ok(py.None())
433 439 }
434 440 def copymappop(
435 441 &self,
436 442 key: PyObject,
437 443 default: Option<PyObject>
438 444 ) -> PyResult<Option<PyObject>> {
439 445 let key = key.extract::<PyBytes>(py)?;
440 446 match self
441 447 .inner(py)
442 448 .borrow_mut()
443 449 .copy_map_remove(HgPath::new(key.data(py)))
444 450 .map_err(|e| v2_error(py, e))?
445 451 {
446 452 Some(copy) => Ok(Some(
447 453 PyBytes::new(py, copy.as_bytes()).into_object(),
448 454 )),
449 455 None => Ok(default),
450 456 }
451 457 }
452 458
453 459 def copymapiter(&self) -> PyResult<CopyMapKeysIterator> {
454 460 let leaked_ref = self.inner(py).leak_immutable();
455 461 CopyMapKeysIterator::from_inner(
456 462 py,
457 463 unsafe { leaked_ref.map(py, |o| o.copy_map_iter()) },
458 464 )
459 465 }
460 466
461 467 def copymapitemsiter(&self) -> PyResult<CopyMapItemsIterator> {
462 468 let leaked_ref = self.inner(py).leak_immutable();
463 469 CopyMapItemsIterator::from_inner(
464 470 py,
465 471 unsafe { leaked_ref.map(py, |o| o.copy_map_iter()) },
466 472 )
467 473 }
468 474
469 475 def tracked_dirs(&self) -> PyResult<PyList> {
470 476 let dirs = PyList::new(py, &[]);
471 477 for path in self.inner(py).borrow_mut().iter_tracked_dirs()
472 478 .map_err(|e |dirstate_error(py, e))?
473 479 {
474 480 let path = path.map_err(|e| v2_error(py, e))?;
475 481 let path = PyBytes::new(py, path.as_bytes());
476 482 dirs.append(py, path.into_object())
477 483 }
478 484 Ok(dirs)
479 485 }
480 486
481 487 def setparents_fixup(&self) -> PyResult<PyDict> {
482 488 let dict = PyDict::new(py);
483 489 let copies = self.inner(py).borrow_mut().setparents_fixup();
484 490 for (key, value) in copies.map_err(|e| v2_error(py, e))? {
485 491 dict.set_item(
486 492 py,
487 493 PyBytes::new(py, key.as_bytes()),
488 494 PyBytes::new(py, value.as_bytes()),
489 495 )?;
490 496 }
491 497 Ok(dict)
492 498 }
493 499
494 500 def debug_iter(&self, all: bool) -> PyResult<PyList> {
495 501 let dirs = PyList::new(py, &[]);
496 502 for item in self.inner(py).borrow().debug_iter(all) {
497 503 let (path, (state, mode, size, mtime)) =
498 504 item.map_err(|e| v2_error(py, e))?;
499 505 let path = PyBytes::new(py, path.as_bytes());
500 506 let item = (path, state, mode, size, mtime);
501 507 dirs.append(py, item.to_py_object(py).into_object())
502 508 }
503 509 Ok(dirs)
504 510 }
505 511 });
506 512
507 513 impl DirstateMap {
508 514 pub fn get_inner_mut<'a>(
509 515 &'a self,
510 516 py: Python<'a>,
511 517 ) -> RefMut<'a, OwningDirstateMap> {
512 518 self.inner(py).borrow_mut()
513 519 }
514 520 fn translate_key(
515 521 py: Python,
516 522 res: Result<(&HgPath, DirstateEntry), DirstateV2ParseError>,
517 523 ) -> PyResult<Option<PyBytes>> {
518 524 let (f, _entry) = res.map_err(|e| v2_error(py, e))?;
519 525 Ok(Some(PyBytes::new(py, f.as_bytes())))
520 526 }
521 527 fn translate_key_value(
522 528 py: Python,
523 529 res: Result<(&HgPath, DirstateEntry), DirstateV2ParseError>,
524 530 ) -> PyResult<Option<(PyBytes, PyObject)>> {
525 531 let (f, entry) = res.map_err(|e| v2_error(py, e))?;
526 532 Ok(Some((
527 533 PyBytes::new(py, f.as_bytes()),
528 534 DirstateItem::new_as_pyobject(py, entry)?,
529 535 )))
530 536 }
531 537 }
532 538
533 539 py_shared_iterator!(
534 540 DirstateMapKeysIterator,
535 541 UnsafePyLeaked<StateMapIter<'static>>,
536 542 DirstateMap::translate_key,
537 543 Option<PyBytes>
538 544 );
539 545
540 546 py_shared_iterator!(
541 547 DirstateMapItemsIterator,
542 548 UnsafePyLeaked<StateMapIter<'static>>,
543 549 DirstateMap::translate_key_value,
544 550 Option<(PyBytes, PyObject)>
545 551 );
546 552
547 553 fn extract_node_id(py: Python, obj: &PyObject) -> PyResult<Node> {
548 554 let bytes = obj.extract::<PyBytes>(py)?;
549 555 match bytes.data(py).try_into() {
550 556 Ok(s) => Ok(s),
551 557 Err(e) => Err(PyErr::new::<exc::ValueError, _>(py, e.to_string())),
552 558 }
553 559 }
554 560
555 561 pub(super) fn v2_error(py: Python<'_>, _: DirstateV2ParseError) -> PyErr {
556 562 PyErr::new::<exc::ValueError, _>(py, "corrupted dirstate-v2")
557 563 }
558 564
559 565 fn dirstate_error(py: Python<'_>, e: DirstateError) -> PyErr {
560 566 PyErr::new::<exc::OSError, _>(py, format!("Dirstate error: {:?}", e))
561 567 }
@@ -1,482 +1,460 b''
1 1 =====================================================================
2 2 Check potential race conditions between a status and other operations
3 3 =====================================================================
4 4
5 5 #testcases dirstate-v1 dirstate-v2-append dirstate-v2-rewrite
6 6
7 7 The `hg status` command can run without the wlock, however it might end up
8 8 having to update the on-disk dirstate files, for example to mark ambiguous
9 9 files as clean, or to update directory caches information with dirstate-v2.
10 10
11 11
12 12 If another process updates the dirstate in the meantime we might run into
13 13 trouble. Especially, commands doing semantic changes like `hg add` or
14 14 `hg commit` should not see their update erased by a concurrent status.
15 15
16 16 Unlike commands like `add` or `commit`, `status` only writes the dirstate
17 17 to update caches, no actual information is lost if we fail to write to disk.
18 18
19 19
20 20 This test file is meant to test various cases where such parallel operations
21 21 between a status with reasons to update the dirstate and another semantic
22 22 changes happen.
23 23
24 24
25 25 Setup
26 26 =====
27 27
28 28 $ cat >> $HGRCPATH << EOF
29 29 > [storage]
30 30 > dirstate-v2.slow-path=allow
31 31 > EOF
32 32
33 33 #if no-dirstate-v1
34 34 $ cat >> $HGRCPATH << EOF
35 35 > [format]
36 36 > use-dirstate-v2=yes
37 37 > EOF
38 38 #else
39 39 $ cat >> $HGRCPATH << EOF
40 40 > [format]
41 41 > use-dirstate-v2=no
42 42 > EOF
43 43 #endif
44 44
45 45 #if dirstate-v2-rewrite
46 46 $ d2args="--config devel.dirstate.v2.data_update_mode=force-new"
47 47 #endif
48 48 #if dirstate-v2-append
49 49 $ d2args="--config devel.dirstate.v2.data_update_mode=force-append"
50 50 #endif
51 51
52 52 $ directories="dir dir/nested dir2"
53 53 $ first_files="dir/nested/a dir/b dir/c dir/d dir2/e f"
54 54 $ second_files="g dir/nested/h dir/i dir/j dir2/k dir2/l dir/nested/m"
55 55 $ extra_files="dir/n dir/o p q"
56 56
57 57 $ hg init reference-repo
58 58 $ cd reference-repo
59 59 $ mkdir -p dir/nested dir2
60 60 $ touch -t 200001010000 $first_files $directories
61 61 $ hg commit -Aqm "recreate a bunch of files to facilitate dirstate-v2 append"
62 62 $ touch -t 200001010010 $second_files $directories
63 63 $ hg commit -Aqm "more files to have two commits"
64 64 $ hg log -G -v
65 65 @ changeset: 1:c349430a1631
66 66 | tag: tip
67 67 | user: test
68 68 | date: Thu Jan 01 00:00:00 1970 +0000
69 69 | files: dir/i dir/j dir/nested/h dir/nested/m dir2/k dir2/l g
70 70 | description:
71 71 | more files to have two commits
72 72 |
73 73 |
74 74 o changeset: 0:4f23db756b09
75 75 user: test
76 76 date: Thu Jan 01 00:00:00 1970 +0000
77 77 files: dir/b dir/c dir/d dir/nested/a dir2/e f
78 78 description:
79 79 recreate a bunch of files to facilitate dirstate-v2 append
80 80
81 81
82 82 $ hg manifest
83 83 dir/b
84 84 dir/c
85 85 dir/d
86 86 dir/i
87 87 dir/j
88 88 dir/nested/a
89 89 dir/nested/h
90 90 dir/nested/m
91 91 dir2/e
92 92 dir2/k
93 93 dir2/l
94 94 f
95 95 g
96 96
97 97 Add some unknown files and refresh the dirstate
98 98
99 99 $ touch -t 200001010020 $extra_files
100 100 $ hg add dir/o
101 101 $ hg remove dir/nested/m
102 102
103 103 $ hg st --config devel.dirstate.v2.data_update_mode=force-new
104 104 A dir/o
105 105 R dir/nested/m
106 106 ? dir/n
107 107 ? p
108 108 ? q
109 109 $ hg debugstate
110 110 n 644 0 2000-01-01 00:00:00 dir/b
111 111 n 644 0 2000-01-01 00:00:00 dir/c
112 112 n 644 0 2000-01-01 00:00:00 dir/d
113 113 n 644 0 2000-01-01 00:10:00 dir/i
114 114 n 644 0 2000-01-01 00:10:00 dir/j
115 115 n 644 0 2000-01-01 00:00:00 dir/nested/a
116 116 n 644 0 2000-01-01 00:10:00 dir/nested/h
117 117 r ?????????????????????????????????? dir/nested/m (glob)
118 118 a ?????????????????????????????????? dir/o (glob)
119 119 n 644 0 2000-01-01 00:00:00 dir2/e
120 120 n 644 0 2000-01-01 00:10:00 dir2/k
121 121 n 644 0 2000-01-01 00:10:00 dir2/l
122 122 n 644 0 2000-01-01 00:00:00 f
123 123 n 644 0 2000-01-01 00:10:00 g
124 124 $ hg debugstate > ../reference
125 125 $ cd ..
126 126
127 127 Explain / verify the test principles
128 128 ------------------------------------
129 129
130 130 First, we can properly copy the reference
131 131
132 132 $ cp -a reference-repo sanity-check
133 133 $ cd sanity-check
134 134 $ hg debugstate
135 135 n 644 0 2000-01-01 00:00:00 dir/b
136 136 n 644 0 2000-01-01 00:00:00 dir/c
137 137 n 644 0 2000-01-01 00:00:00 dir/d
138 138 n 644 0 2000-01-01 00:10:00 dir/i
139 139 n 644 0 2000-01-01 00:10:00 dir/j
140 140 n 644 0 2000-01-01 00:00:00 dir/nested/a
141 141 n 644 0 2000-01-01 00:10:00 dir/nested/h
142 142 r ?????????????????????????????????? dir/nested/m (glob)
143 143 a ?????????????????????????????????? dir/o (glob)
144 144 n 644 0 2000-01-01 00:00:00 dir2/e
145 145 n 644 0 2000-01-01 00:10:00 dir2/k
146 146 n 644 0 2000-01-01 00:10:00 dir2/l
147 147 n 644 0 2000-01-01 00:00:00 f
148 148 n 644 0 2000-01-01 00:10:00 g
149 149 $ hg debugstate > ../post-copy
150 150 $ diff ../reference ../post-copy
151 151
152 152 And status thinks the cache is in a proper state
153 153
154 154 $ hg st
155 155 A dir/o
156 156 R dir/nested/m
157 157 ? dir/n
158 158 ? p
159 159 ? q
160 160 $ hg debugstate
161 161 n 644 0 2000-01-01 00:00:00 dir/b
162 162 n 644 0 2000-01-01 00:00:00 dir/c
163 163 n 644 0 2000-01-01 00:00:00 dir/d
164 164 n 644 0 2000-01-01 00:10:00 dir/i
165 165 n 644 0 2000-01-01 00:10:00 dir/j
166 166 n 644 0 2000-01-01 00:00:00 dir/nested/a
167 167 n 644 0 2000-01-01 00:10:00 dir/nested/h
168 168 r ?????????????????????????????????? dir/nested/m (glob)
169 169 a ?????????????????????????????????? dir/o (glob)
170 170 n 644 0 2000-01-01 00:00:00 dir2/e
171 171 n 644 0 2000-01-01 00:10:00 dir2/k
172 172 n 644 0 2000-01-01 00:10:00 dir2/l
173 173 n 644 0 2000-01-01 00:00:00 f
174 174 n 644 0 2000-01-01 00:10:00 g
175 175 $ hg debugstate > ../post-status
176 176 $ diff ../reference ../post-status
177 177
178 178 Then we can start a status that:
179 179 - has some update to do (the touch call)
180 180 - will wait AFTER running status, but before updating the cache on disk
181 181
182 182 $ touch -t 200001010001 dir/c
183 183 $ hg st >$TESTTMP/status-race-lock.out 2>$TESTTMP/status-race-lock.log \
184 184 > --config rhg.on-unsupported=abort \
185 185 > --config devel.sync.status.pre-dirstate-write-file=$TESTTMP/status-race-lock \
186 186 > &
187 187 $ $RUNTESTDIR/testlib/wait-on-file 5 $TESTTMP/status-race-lock.waiting
188 188
189 189 We check it runs the status first by modifying a file and updating another timestamp
190 190
191 191 $ touch -t 200001010003 dir/i
192 192 $ echo babar > dir/j
193 193 $ touch $TESTTMP/status-race-lock
194 194 $ wait
195 195
196 196 The test process should have reported a status before the change we made,
197 197 and should have missed the timestamp update
198 198
199 199 $ cat $TESTTMP/status-race-lock.out
200 200 A dir/o
201 201 R dir/nested/m
202 202 ? dir/n
203 203 ? p
204 204 ? q
205 205 $ cat $TESTTMP/status-race-lock.log
206 206 $ hg debugstate | grep dir/c
207 207 n 644 0 2000-01-01 00:01:00 dir/c
208 208 $ hg debugstate | grep dir/i
209 209 n 644 0 2000-01-01 00:10:00 dir/i
210 210 $ hg debugstate | grep dir/j
211 211 n 644 0 2000-01-01 00:10:00 dir/j
212 212
213 213 final cleanup
214 214
215 215 $ rm $TESTTMP/status-race-lock $TESTTMP/status-race-lock.waiting
216 216 $ cd ..
217 217
218 218 Actual Testing
219 219 ==============
220 220
221 221 Race with a `hg add`
222 222 -------------------
223 223
224 224 $ cp -a reference-repo race-with-add
225 225 $ cd race-with-add
226 226
227 227 spin a `hg status` with some caches to update
228 228
229 229 $ touch -t 200001020001 f
230 230 $ hg st >$TESTTMP/status-race-lock.out 2>$TESTTMP/status-race-lock.log \
231 231 > --config rhg.on-unsupported=abort \
232 232 > --config devel.sync.status.pre-dirstate-write-file=$TESTTMP/status-race-lock \
233 233 > &
234 234 $ $RUNTESTDIR/testlib/wait-on-file 5 $TESTTMP/status-race-lock.waiting
235 235
236 236 Add a file
237 237
238 238 $ hg $d2args add dir/n
239 239 $ touch $TESTTMP/status-race-lock
240 240 $ wait
241 241
242 242 The file should in a "added" state
243 243
244 244 $ hg status
245 A dir/n (no-rhg dirstate-v1 !)
246 A dir/n (no-dirstate-v1 !)
247 A dir/n (missing-correct-output rhg dirstate-v1 !)
245 A dir/n
248 246 A dir/o
249 247 R dir/nested/m
250 ? dir/n (known-bad-output rhg dirstate-v1 !)
251 248 ? p
252 249 ? q
253 250
254 251 The status process should return a consistent result and not crash.
255 252
256 253 $ cat $TESTTMP/status-race-lock.out
257 254 A dir/o
258 255 R dir/nested/m
259 256 ? dir/n
260 257 ? p
261 258 ? q
262 259 $ cat $TESTTMP/status-race-lock.log
263 260
264 261 final cleanup
265 262
266 263 $ rm $TESTTMP/status-race-lock $TESTTMP/status-race-lock.waiting
267 264 $ cd ..
268 265
269 266 Race with a `hg commit`
270 267 ----------------------
271 268
272 269 $ cp -a reference-repo race-with-commit
273 270 $ cd race-with-commit
274 271
275 272 spin a `hg status` with some caches to update
276 273
277 274 $ touch -t 200001020001 dir/j
278 275 $ hg st >$TESTTMP/status-race-lock.out 2>$TESTTMP/status-race-lock.log \
279 276 > --config rhg.on-unsupported=abort \
280 277 > --config devel.sync.status.pre-dirstate-write-file=$TESTTMP/status-race-lock \
281 278 > &
282 279 $ $RUNTESTDIR/testlib/wait-on-file 5 $TESTTMP/status-race-lock.waiting
283 280
284 281 Add a file and force the data file rewrite
285 282
286 283 $ hg $d2args commit -m created-during-status dir/o
287 284 $ touch $TESTTMP/status-race-lock
288 285 $ wait
289 286
290 287 The parent must change and the status should be clean
291 288
292 # XXX rhg misbehaves here
293 #if rhg dirstate-v1
294 $ hg summary
295 parent: 1:c349430a1631
296 more files to have two commits
297 branch: default
298 commit: 1 added, 1 removed, 3 unknown (new branch head)
299 update: 1 new changesets (update)
300 phases: 3 draft
301 $ hg status
302 A dir/o
303 R dir/nested/m
304 ? dir/n
305 ? p
306 ? q
307 #else
308 289 $ hg summary
309 290 parent: 2:2e3b442a2fd4 tip
310 291 created-during-status
311 292 branch: default
312 293 commit: 1 removed, 3 unknown
313 294 update: (current)
314 295 phases: 3 draft
315 296 $ hg status
316 297 R dir/nested/m
317 298 ? dir/n
318 299 ? p
319 300 ? q
320 #endif
321 301
322 302 The status process should return a consistent result and not crash.
323 303
324 304 $ cat $TESTTMP/status-race-lock.out
325 305 A dir/o
326 306 R dir/nested/m
327 307 ? dir/n
328 308 ? p
329 309 ? q
330 310 $ cat $TESTTMP/status-race-lock.log
331 311
332 312 final cleanup
333 313
334 314 $ rm $TESTTMP/status-race-lock $TESTTMP/status-race-lock.waiting
335 315 $ cd ..
336 316
337 317 Race with a `hg update`
338 318 ----------------------
339 319
340 320 $ cp -a reference-repo race-with-update
341 321 $ cd race-with-update
342 322
343 323 spin a `hg status` with some caches to update
344 324
345 325 $ touch -t 200001020001 dir2/k
346 326 $ hg st >$TESTTMP/status-race-lock.out 2>$TESTTMP/status-race-lock.log \
347 327 > --config rhg.on-unsupported=abort \
348 328 > --config devel.sync.status.pre-dirstate-write-file=$TESTTMP/status-race-lock \
349 329 > &
350 330 $ $RUNTESTDIR/testlib/wait-on-file 5 $TESTTMP/status-race-lock.waiting
351 331
352 332 Add a file and force the data file rewrite
353 333
354 334 $ hg $d2args update ".~1"
355 335 0 files updated, 0 files merged, 6 files removed, 0 files unresolved
356 336 $ touch $TESTTMP/status-race-lock
357 337 $ wait
358 338
359 339 The parent must change and the status should be clean
360 340
361 341 $ hg summary
362 342 parent: 0:4f23db756b09
363 343 recreate a bunch of files to facilitate dirstate-v2 append
364 344 branch: default
365 345 commit: 1 added, 3 unknown (new branch head)
366 346 update: 1 new changesets (update)
367 347 phases: 2 draft
368 348 $ hg status
369 349 A dir/o
370 350 ? dir/n
371 351 ? p
372 352 ? q
373 353
374 354 The status process should return a consistent result and not crash.
375 355
376 356 $ cat $TESTTMP/status-race-lock.out
377 357 A dir/o
378 358 R dir/nested/m
379 359 ? dir/n
380 360 ? p
381 361 ? q
382 362 $ cat $TESTTMP/status-race-lock.log
383 363
384 364 final cleanup
385 365
386 366 $ rm $TESTTMP/status-race-lock $TESTTMP/status-race-lock.waiting
387 367 $ cd ..
388 368
389 369 Race with another status
390 370 ------------------------
391 371
392 372 $ cp -a reference-repo race-with-status
393 373 $ cd race-with-status
394 374
395 375 spin a `hg status` with some caches to update
396 376
397 377 $ touch -t 200001010030 dir/nested/h
398 378 $ hg st >$TESTTMP/status-race-lock.out 2>$TESTTMP/status-race-lock.log \
399 379 > --config rhg.on-unsupported=abort \
400 380 > --config devel.sync.status.pre-dirstate-write-file=$TESTTMP/status-race-lock \
401 381 > &
402 382 $ $RUNTESTDIR/testlib/wait-on-file 5 $TESTTMP/status-race-lock.waiting
403 383
404 384 touch g
405 385
406 386 $ touch -t 200001010025 g
407 387 $ hg $d2args status
408 388 A dir/o
409 389 R dir/nested/m
410 390 ? dir/n
411 391 ? p
412 392 ? q
413 393 $ touch $TESTTMP/status-race-lock
414 394 $ wait
415 395
416 396 the first update should be on disk
417 397
418 398 $ hg debugstate --all | grep "g"
419 n 644 0 2000-01-01 00:10:00 g (known-bad-output rhg dirstate-v1 !)
420 n 644 0 2000-01-01 00:25:00 g (rhg no-dirstate-v1 !)
421 n 644 0 2000-01-01 00:25:00 g (no-rhg !)
399 n 644 0 2000-01-01 00:25:00 g
422 400
423 401 The status process should return a consistent result and not crash.
424 402
425 403 $ cat $TESTTMP/status-race-lock.out
426 404 A dir/o
427 405 R dir/nested/m
428 406 ? dir/n
429 407 ? p
430 408 ? q
431 409 $ cat $TESTTMP/status-race-lock.log
432 410
433 411 final cleanup
434 412
435 413 $ rm $TESTTMP/status-race-lock $TESTTMP/status-race-lock.waiting
436 414 $ cd ..
437 415
438 416 Race with the removal of an ambiguous file
439 417 ----------------------è-------------------
440 418
441 419 $ cp -a reference-repo race-with-remove
442 420 $ cd race-with-remove
443 421
444 422 spin a `hg status` with some caches to update
445 423
446 424 $ touch -t 200001010035 dir2/l
447 425 $ hg st >$TESTTMP/status-race-lock.out 2>$TESTTMP/status-race-lock.log \
448 426 > --config rhg.on-unsupported=abort \
449 427 > --config devel.sync.status.pre-dirstate-write-file=$TESTTMP/status-race-lock \
450 428 > &
451 429 $ $RUNTESTDIR/testlib/wait-on-file 5 $TESTTMP/status-race-lock.waiting
452 430
453 431 remove that same file
454 432
455 433 $ hg $d2args remove dir2/l
456 434 $ touch $TESTTMP/status-race-lock
457 435 $ wait
458 436
459 437 file should be marked as removed
460 438
461 439 $ hg status
462 440 A dir/o
463 441 R dir/nested/m
464 442 R dir2/l
465 443 ? dir/n
466 444 ? p
467 445 ? q
468 446
469 447 The status process should return a consistent result and not crash.
470 448
471 449 $ cat $TESTTMP/status-race-lock.out
472 450 A dir/o
473 451 R dir/nested/m
474 452 ? dir/n
475 453 ? p
476 454 ? q
477 455 $ cat $TESTTMP/status-race-lock.log
478 456
479 457 final cleanup
480 458
481 459 $ rm $TESTTMP/status-race-lock $TESTTMP/status-race-lock.waiting
482 460 $ cd ..
General Comments 0
You need to be logged in to leave comments. Login now