##// END OF EJS Templates
dirstate: make sure that status does not overlook the fallback flags...
marmoute -
r49112:50026041 default
parent child Browse files
Show More
@@ -1,1522 +1,1533 b''
1 1 # dirstate.py - working directory tracking for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import contextlib
12 12 import errno
13 13 import os
14 14 import stat
15 15
16 16 from .i18n import _
17 17 from .pycompat import delattr
18 18
19 19 from hgdemandimport import tracing
20 20
21 21 from . import (
22 22 dirstatemap,
23 23 encoding,
24 24 error,
25 25 match as matchmod,
26 26 pathutil,
27 27 policy,
28 28 pycompat,
29 29 scmutil,
30 30 sparse,
31 31 util,
32 32 )
33 33
34 34 from .dirstateutils import (
35 35 timestamp,
36 36 )
37 37
38 38 from .interfaces import (
39 39 dirstate as intdirstate,
40 40 util as interfaceutil,
41 41 )
42 42
43 43 parsers = policy.importmod('parsers')
44 44 rustmod = policy.importrust('dirstate')
45 45
46 46 HAS_FAST_DIRSTATE_V2 = rustmod is not None
47 47
48 48 propertycache = util.propertycache
49 49 filecache = scmutil.filecache
50 50 _rangemask = dirstatemap.rangemask
51 51
52 52 DirstateItem = dirstatemap.DirstateItem
53 53
54 54
55 55 class repocache(filecache):
56 56 """filecache for files in .hg/"""
57 57
58 58 def join(self, obj, fname):
59 59 return obj._opener.join(fname)
60 60
61 61
62 62 class rootcache(filecache):
63 63 """filecache for files in the repository root"""
64 64
65 65 def join(self, obj, fname):
66 66 return obj._join(fname)
67 67
68 68
69 69 def _getfsnow(vfs):
70 70 '''Get "now" timestamp on filesystem'''
71 71 tmpfd, tmpname = vfs.mkstemp()
72 72 try:
73 73 return timestamp.mtime_of(os.fstat(tmpfd))
74 74 finally:
75 75 os.close(tmpfd)
76 76 vfs.unlink(tmpname)
77 77
78 78
79 79 def requires_parents_change(func):
80 80 def wrap(self, *args, **kwargs):
81 81 if not self.pendingparentchange():
82 82 msg = 'calling `%s` outside of a parentchange context'
83 83 msg %= func.__name__
84 84 raise error.ProgrammingError(msg)
85 85 return func(self, *args, **kwargs)
86 86
87 87 return wrap
88 88
89 89
90 90 def requires_no_parents_change(func):
91 91 def wrap(self, *args, **kwargs):
92 92 if self.pendingparentchange():
93 93 msg = 'calling `%s` inside of a parentchange context'
94 94 msg %= func.__name__
95 95 raise error.ProgrammingError(msg)
96 96 return func(self, *args, **kwargs)
97 97
98 98 return wrap
99 99
100 100
101 101 @interfaceutil.implementer(intdirstate.idirstate)
102 102 class dirstate(object):
103 103 def __init__(
104 104 self,
105 105 opener,
106 106 ui,
107 107 root,
108 108 validate,
109 109 sparsematchfn,
110 110 nodeconstants,
111 111 use_dirstate_v2,
112 112 ):
113 113 """Create a new dirstate object.
114 114
115 115 opener is an open()-like callable that can be used to open the
116 116 dirstate file; root is the root of the directory tracked by
117 117 the dirstate.
118 118 """
119 119 self._use_dirstate_v2 = use_dirstate_v2
120 120 self._nodeconstants = nodeconstants
121 121 self._opener = opener
122 122 self._validate = validate
123 123 self._root = root
124 124 self._sparsematchfn = sparsematchfn
125 125 # ntpath.join(root, '') of Python 2.7.9 does not add sep if root is
126 126 # UNC path pointing to root share (issue4557)
127 127 self._rootdir = pathutil.normasprefix(root)
128 128 self._dirty = False
129 129 self._lastnormaltime = timestamp.zero()
130 130 self._ui = ui
131 131 self._filecache = {}
132 132 self._parentwriters = 0
133 133 self._filename = b'dirstate'
134 134 self._pendingfilename = b'%s.pending' % self._filename
135 135 self._plchangecallbacks = {}
136 136 self._origpl = None
137 137 self._mapcls = dirstatemap.dirstatemap
138 138 # Access and cache cwd early, so we don't access it for the first time
139 139 # after a working-copy update caused it to not exist (accessing it then
140 140 # raises an exception).
141 141 self._cwd
142 142
143 143 def prefetch_parents(self):
144 144 """make sure the parents are loaded
145 145
146 146 Used to avoid a race condition.
147 147 """
148 148 self._pl
149 149
150 150 @contextlib.contextmanager
151 151 def parentchange(self):
152 152 """Context manager for handling dirstate parents.
153 153
154 154 If an exception occurs in the scope of the context manager,
155 155 the incoherent dirstate won't be written when wlock is
156 156 released.
157 157 """
158 158 self._parentwriters += 1
159 159 yield
160 160 # Typically we want the "undo" step of a context manager in a
161 161 # finally block so it happens even when an exception
162 162 # occurs. In this case, however, we only want to decrement
163 163 # parentwriters if the code in the with statement exits
164 164 # normally, so we don't have a try/finally here on purpose.
165 165 self._parentwriters -= 1
166 166
167 167 def pendingparentchange(self):
168 168 """Returns true if the dirstate is in the middle of a set of changes
169 169 that modify the dirstate parent.
170 170 """
171 171 return self._parentwriters > 0
172 172
173 173 @propertycache
174 174 def _map(self):
175 175 """Return the dirstate contents (see documentation for dirstatemap)."""
176 176 self._map = self._mapcls(
177 177 self._ui,
178 178 self._opener,
179 179 self._root,
180 180 self._nodeconstants,
181 181 self._use_dirstate_v2,
182 182 )
183 183 return self._map
184 184
185 185 @property
186 186 def _sparsematcher(self):
187 187 """The matcher for the sparse checkout.
188 188
189 189 The working directory may not include every file from a manifest. The
190 190 matcher obtained by this property will match a path if it is to be
191 191 included in the working directory.
192 192 """
193 193 # TODO there is potential to cache this property. For now, the matcher
194 194 # is resolved on every access. (But the called function does use a
195 195 # cache to keep the lookup fast.)
196 196 return self._sparsematchfn()
197 197
198 198 @repocache(b'branch')
199 199 def _branch(self):
200 200 try:
201 201 return self._opener.read(b"branch").strip() or b"default"
202 202 except IOError as inst:
203 203 if inst.errno != errno.ENOENT:
204 204 raise
205 205 return b"default"
206 206
207 207 @property
208 208 def _pl(self):
209 209 return self._map.parents()
210 210
211 211 def hasdir(self, d):
212 212 return self._map.hastrackeddir(d)
213 213
214 214 @rootcache(b'.hgignore')
215 215 def _ignore(self):
216 216 files = self._ignorefiles()
217 217 if not files:
218 218 return matchmod.never()
219 219
220 220 pats = [b'include:%s' % f for f in files]
221 221 return matchmod.match(self._root, b'', [], pats, warn=self._ui.warn)
222 222
223 223 @propertycache
224 224 def _slash(self):
225 225 return self._ui.configbool(b'ui', b'slash') and pycompat.ossep != b'/'
226 226
227 227 @propertycache
228 228 def _checklink(self):
229 229 return util.checklink(self._root)
230 230
231 231 @propertycache
232 232 def _checkexec(self):
233 233 return bool(util.checkexec(self._root))
234 234
235 235 @propertycache
236 236 def _checkcase(self):
237 237 return not util.fscasesensitive(self._join(b'.hg'))
238 238
239 239 def _join(self, f):
240 240 # much faster than os.path.join()
241 241 # it's safe because f is always a relative path
242 242 return self._rootdir + f
243 243
244 244 def flagfunc(self, buildfallback):
245 245 """build a callable that returns flags associated with a filename
246 246
247 247 The information is extracted from three possible layers:
248 248 1. the file system if it supports the information
249 249 2. the "fallback" information stored in the dirstate if any
250 250 3. a more expensive mechanism inferring the flags from the parents.
251 251 """
252 252
253 253 # small hack to cache the result of buildfallback()
254 254 fallback_func = []
255 255
256 256 def get_flags(x):
257 257 entry = None
258 258 fallback_value = None
259 259 try:
260 260 st = os.lstat(self._join(x))
261 261 except OSError:
262 262 return b''
263 263
264 264 if self._checklink:
265 265 if util.statislink(st):
266 266 return b'l'
267 267 else:
268 268 entry = self.get_entry(x)
269 269 if entry.has_fallback_symlink:
270 270 if entry.fallback_symlink:
271 271 return b'l'
272 272 else:
273 273 if not fallback_func:
274 274 fallback_func.append(buildfallback())
275 275 fallback_value = fallback_func[0](x)
276 276 if b'l' in fallback_value:
277 277 return b'l'
278 278
279 279 if self._checkexec:
280 280 if util.statisexec(st):
281 281 return b'x'
282 282 else:
283 283 if entry is None:
284 284 entry = self.get_entry(x)
285 285 if entry.has_fallback_exec:
286 286 if entry.fallback_exec:
287 287 return b'x'
288 288 else:
289 289 if fallback_value is None:
290 290 if not fallback_func:
291 291 fallback_func.append(buildfallback())
292 292 fallback_value = fallback_func[0](x)
293 293 if b'x' in fallback_value:
294 294 return b'x'
295 295 return b''
296 296
297 297 return get_flags
298 298
299 299 @propertycache
300 300 def _cwd(self):
301 301 # internal config: ui.forcecwd
302 302 forcecwd = self._ui.config(b'ui', b'forcecwd')
303 303 if forcecwd:
304 304 return forcecwd
305 305 return encoding.getcwd()
306 306
307 307 def getcwd(self):
308 308 """Return the path from which a canonical path is calculated.
309 309
310 310 This path should be used to resolve file patterns or to convert
311 311 canonical paths back to file paths for display. It shouldn't be
312 312 used to get real file paths. Use vfs functions instead.
313 313 """
314 314 cwd = self._cwd
315 315 if cwd == self._root:
316 316 return b''
317 317 # self._root ends with a path separator if self._root is '/' or 'C:\'
318 318 rootsep = self._root
319 319 if not util.endswithsep(rootsep):
320 320 rootsep += pycompat.ossep
321 321 if cwd.startswith(rootsep):
322 322 return cwd[len(rootsep) :]
323 323 else:
324 324 # we're outside the repo. return an absolute path.
325 325 return cwd
326 326
327 327 def pathto(self, f, cwd=None):
328 328 if cwd is None:
329 329 cwd = self.getcwd()
330 330 path = util.pathto(self._root, cwd, f)
331 331 if self._slash:
332 332 return util.pconvert(path)
333 333 return path
334 334
335 335 def __getitem__(self, key):
336 336 """Return the current state of key (a filename) in the dirstate.
337 337
338 338 States are:
339 339 n normal
340 340 m needs merging
341 341 r marked for removal
342 342 a marked for addition
343 343 ? not tracked
344 344
345 345 XXX The "state" is a bit obscure to be in the "public" API. we should
346 346 consider migrating all user of this to going through the dirstate entry
347 347 instead.
348 348 """
349 349 msg = b"don't use dirstate[file], use dirstate.get_entry(file)"
350 350 util.nouideprecwarn(msg, b'6.1', stacklevel=2)
351 351 entry = self._map.get(key)
352 352 if entry is not None:
353 353 return entry.state
354 354 return b'?'
355 355
356 356 def get_entry(self, path):
357 357 """return a DirstateItem for the associated path"""
358 358 entry = self._map.get(path)
359 359 if entry is None:
360 360 return DirstateItem()
361 361 return entry
362 362
363 363 def __contains__(self, key):
364 364 return key in self._map
365 365
366 366 def __iter__(self):
367 367 return iter(sorted(self._map))
368 368
369 369 def items(self):
370 370 return pycompat.iteritems(self._map)
371 371
372 372 iteritems = items
373 373
374 374 def parents(self):
375 375 return [self._validate(p) for p in self._pl]
376 376
377 377 def p1(self):
378 378 return self._validate(self._pl[0])
379 379
380 380 def p2(self):
381 381 return self._validate(self._pl[1])
382 382
383 383 @property
384 384 def in_merge(self):
385 385 """True if a merge is in progress"""
386 386 return self._pl[1] != self._nodeconstants.nullid
387 387
388 388 def branch(self):
389 389 return encoding.tolocal(self._branch)
390 390
391 391 def setparents(self, p1, p2=None):
392 392 """Set dirstate parents to p1 and p2.
393 393
394 394 When moving from two parents to one, "merged" entries a
395 395 adjusted to normal and previous copy records discarded and
396 396 returned by the call.
397 397
398 398 See localrepo.setparents()
399 399 """
400 400 if p2 is None:
401 401 p2 = self._nodeconstants.nullid
402 402 if self._parentwriters == 0:
403 403 raise ValueError(
404 404 b"cannot set dirstate parent outside of "
405 405 b"dirstate.parentchange context manager"
406 406 )
407 407
408 408 self._dirty = True
409 409 oldp2 = self._pl[1]
410 410 if self._origpl is None:
411 411 self._origpl = self._pl
412 412 nullid = self._nodeconstants.nullid
413 413 # True if we need to fold p2 related state back to a linear case
414 414 fold_p2 = oldp2 != nullid and p2 == nullid
415 415 return self._map.setparents(p1, p2, fold_p2=fold_p2)
416 416
417 417 def setbranch(self, branch):
418 418 self.__class__._branch.set(self, encoding.fromlocal(branch))
419 419 f = self._opener(b'branch', b'w', atomictemp=True, checkambig=True)
420 420 try:
421 421 f.write(self._branch + b'\n')
422 422 f.close()
423 423
424 424 # make sure filecache has the correct stat info for _branch after
425 425 # replacing the underlying file
426 426 ce = self._filecache[b'_branch']
427 427 if ce:
428 428 ce.refresh()
429 429 except: # re-raises
430 430 f.discard()
431 431 raise
432 432
433 433 def invalidate(self):
434 434 """Causes the next access to reread the dirstate.
435 435
436 436 This is different from localrepo.invalidatedirstate() because it always
437 437 rereads the dirstate. Use localrepo.invalidatedirstate() if you want to
438 438 check whether the dirstate has changed before rereading it."""
439 439
440 440 for a in ("_map", "_branch", "_ignore"):
441 441 if a in self.__dict__:
442 442 delattr(self, a)
443 443 self._lastnormaltime = timestamp.zero()
444 444 self._dirty = False
445 445 self._parentwriters = 0
446 446 self._origpl = None
447 447
448 448 def copy(self, source, dest):
449 449 """Mark dest as a copy of source. Unmark dest if source is None."""
450 450 if source == dest:
451 451 return
452 452 self._dirty = True
453 453 if source is not None:
454 454 self._map.copymap[dest] = source
455 455 else:
456 456 self._map.copymap.pop(dest, None)
457 457
458 458 def copied(self, file):
459 459 return self._map.copymap.get(file, None)
460 460
461 461 def copies(self):
462 462 return self._map.copymap
463 463
464 464 @requires_no_parents_change
465 465 def set_tracked(self, filename):
466 466 """a "public" method for generic code to mark a file as tracked
467 467
468 468 This function is to be called outside of "update/merge" case. For
469 469 example by a command like `hg add X`.
470 470
471 471 return True the file was previously untracked, False otherwise.
472 472 """
473 473 self._dirty = True
474 474 entry = self._map.get(filename)
475 475 if entry is None or not entry.tracked:
476 476 self._check_new_tracked_filename(filename)
477 477 return self._map.set_tracked(filename)
478 478
479 479 @requires_no_parents_change
480 480 def set_untracked(self, filename):
481 481 """a "public" method for generic code to mark a file as untracked
482 482
483 483 This function is to be called outside of "update/merge" case. For
484 484 example by a command like `hg remove X`.
485 485
486 486 return True the file was previously tracked, False otherwise.
487 487 """
488 488 ret = self._map.set_untracked(filename)
489 489 if ret:
490 490 self._dirty = True
491 491 return ret
492 492
493 493 @requires_no_parents_change
494 494 def set_clean(self, filename, parentfiledata=None):
495 495 """record that the current state of the file on disk is known to be clean"""
496 496 self._dirty = True
497 497 if parentfiledata:
498 498 (mode, size, mtime) = parentfiledata
499 499 else:
500 500 (mode, size, mtime) = self._get_filedata(filename)
501 501 if not self._map[filename].tracked:
502 502 self._check_new_tracked_filename(filename)
503 503 self._map.set_clean(filename, mode, size, mtime)
504 504 if mtime > self._lastnormaltime:
505 505 # Remember the most recent modification timeslot for status(),
506 506 # to make sure we won't miss future size-preserving file content
507 507 # modifications that happen within the same timeslot.
508 508 self._lastnormaltime = mtime
509 509
510 510 @requires_no_parents_change
511 511 def set_possibly_dirty(self, filename):
512 512 """record that the current state of the file on disk is unknown"""
513 513 self._dirty = True
514 514 self._map.set_possibly_dirty(filename)
515 515
516 516 @requires_parents_change
517 517 def update_file_p1(
518 518 self,
519 519 filename,
520 520 p1_tracked,
521 521 ):
522 522 """Set a file as tracked in the parent (or not)
523 523
524 524 This is to be called when adjust the dirstate to a new parent after an history
525 525 rewriting operation.
526 526
527 527 It should not be called during a merge (p2 != nullid) and only within
528 528 a `with dirstate.parentchange():` context.
529 529 """
530 530 if self.in_merge:
531 531 msg = b'update_file_reference should not be called when merging'
532 532 raise error.ProgrammingError(msg)
533 533 entry = self._map.get(filename)
534 534 if entry is None:
535 535 wc_tracked = False
536 536 else:
537 537 wc_tracked = entry.tracked
538 538 if not (p1_tracked or wc_tracked):
539 539 # the file is no longer relevant to anyone
540 540 if self._map.get(filename) is not None:
541 541 self._map.reset_state(filename)
542 542 self._dirty = True
543 543 elif (not p1_tracked) and wc_tracked:
544 544 if entry is not None and entry.added:
545 545 return # avoid dropping copy information (maybe?)
546 546
547 547 parentfiledata = None
548 548 if wc_tracked and p1_tracked:
549 549 parentfiledata = self._get_filedata(filename)
550 550
551 551 self._map.reset_state(
552 552 filename,
553 553 wc_tracked,
554 554 p1_tracked,
555 555 # the underlying reference might have changed, we will have to
556 556 # check it.
557 557 has_meaningful_mtime=False,
558 558 parentfiledata=parentfiledata,
559 559 )
560 560 if (
561 561 parentfiledata is not None
562 562 and parentfiledata[2] > self._lastnormaltime
563 563 ):
564 564 # Remember the most recent modification timeslot for status(),
565 565 # to make sure we won't miss future size-preserving file content
566 566 # modifications that happen within the same timeslot.
567 567 self._lastnormaltime = parentfiledata[2]
568 568
569 569 @requires_parents_change
570 570 def update_file(
571 571 self,
572 572 filename,
573 573 wc_tracked,
574 574 p1_tracked,
575 575 p2_info=False,
576 576 possibly_dirty=False,
577 577 parentfiledata=None,
578 578 ):
579 579 """update the information about a file in the dirstate
580 580
581 581 This is to be called when the direstates parent changes to keep track
582 582 of what is the file situation in regards to the working copy and its parent.
583 583
584 584 This function must be called within a `dirstate.parentchange` context.
585 585
586 586 note: the API is at an early stage and we might need to adjust it
587 587 depending of what information ends up being relevant and useful to
588 588 other processing.
589 589 """
590 590
591 591 # note: I do not think we need to double check name clash here since we
592 592 # are in a update/merge case that should already have taken care of
593 593 # this. The test agrees
594 594
595 595 self._dirty = True
596 596
597 597 need_parent_file_data = (
598 598 not possibly_dirty and not p2_info and wc_tracked and p1_tracked
599 599 )
600 600
601 601 if need_parent_file_data and parentfiledata is None:
602 602 parentfiledata = self._get_filedata(filename)
603 603
604 604 self._map.reset_state(
605 605 filename,
606 606 wc_tracked,
607 607 p1_tracked,
608 608 p2_info=p2_info,
609 609 has_meaningful_mtime=not possibly_dirty,
610 610 parentfiledata=parentfiledata,
611 611 )
612 612 if (
613 613 parentfiledata is not None
614 614 and parentfiledata[2] > self._lastnormaltime
615 615 ):
616 616 # Remember the most recent modification timeslot for status(),
617 617 # to make sure we won't miss future size-preserving file content
618 618 # modifications that happen within the same timeslot.
619 619 self._lastnormaltime = parentfiledata[2]
620 620
621 621 def _check_new_tracked_filename(self, filename):
622 622 scmutil.checkfilename(filename)
623 623 if self._map.hastrackeddir(filename):
624 624 msg = _(b'directory %r already in dirstate')
625 625 msg %= pycompat.bytestr(filename)
626 626 raise error.Abort(msg)
627 627 # shadows
628 628 for d in pathutil.finddirs(filename):
629 629 if self._map.hastrackeddir(d):
630 630 break
631 631 entry = self._map.get(d)
632 632 if entry is not None and not entry.removed:
633 633 msg = _(b'file %r in dirstate clashes with %r')
634 634 msg %= (pycompat.bytestr(d), pycompat.bytestr(filename))
635 635 raise error.Abort(msg)
636 636
637 637 def _get_filedata(self, filename):
638 638 """returns"""
639 639 s = os.lstat(self._join(filename))
640 640 mode = s.st_mode
641 641 size = s.st_size
642 642 mtime = timestamp.mtime_of(s)
643 643 return (mode, size, mtime)
644 644
645 645 def _discoverpath(self, path, normed, ignoremissing, exists, storemap):
646 646 if exists is None:
647 647 exists = os.path.lexists(os.path.join(self._root, path))
648 648 if not exists:
649 649 # Maybe a path component exists
650 650 if not ignoremissing and b'/' in path:
651 651 d, f = path.rsplit(b'/', 1)
652 652 d = self._normalize(d, False, ignoremissing, None)
653 653 folded = d + b"/" + f
654 654 else:
655 655 # No path components, preserve original case
656 656 folded = path
657 657 else:
658 658 # recursively normalize leading directory components
659 659 # against dirstate
660 660 if b'/' in normed:
661 661 d, f = normed.rsplit(b'/', 1)
662 662 d = self._normalize(d, False, ignoremissing, True)
663 663 r = self._root + b"/" + d
664 664 folded = d + b"/" + util.fspath(f, r)
665 665 else:
666 666 folded = util.fspath(normed, self._root)
667 667 storemap[normed] = folded
668 668
669 669 return folded
670 670
671 671 def _normalizefile(self, path, isknown, ignoremissing=False, exists=None):
672 672 normed = util.normcase(path)
673 673 folded = self._map.filefoldmap.get(normed, None)
674 674 if folded is None:
675 675 if isknown:
676 676 folded = path
677 677 else:
678 678 folded = self._discoverpath(
679 679 path, normed, ignoremissing, exists, self._map.filefoldmap
680 680 )
681 681 return folded
682 682
683 683 def _normalize(self, path, isknown, ignoremissing=False, exists=None):
684 684 normed = util.normcase(path)
685 685 folded = self._map.filefoldmap.get(normed, None)
686 686 if folded is None:
687 687 folded = self._map.dirfoldmap.get(normed, None)
688 688 if folded is None:
689 689 if isknown:
690 690 folded = path
691 691 else:
692 692 # store discovered result in dirfoldmap so that future
693 693 # normalizefile calls don't start matching directories
694 694 folded = self._discoverpath(
695 695 path, normed, ignoremissing, exists, self._map.dirfoldmap
696 696 )
697 697 return folded
698 698
699 699 def normalize(self, path, isknown=False, ignoremissing=False):
700 700 """
701 701 normalize the case of a pathname when on a casefolding filesystem
702 702
703 703 isknown specifies whether the filename came from walking the
704 704 disk, to avoid extra filesystem access.
705 705
706 706 If ignoremissing is True, missing path are returned
707 707 unchanged. Otherwise, we try harder to normalize possibly
708 708 existing path components.
709 709
710 710 The normalized case is determined based on the following precedence:
711 711
712 712 - version of name already stored in the dirstate
713 713 - version of name stored on disk
714 714 - version provided via command arguments
715 715 """
716 716
717 717 if self._checkcase:
718 718 return self._normalize(path, isknown, ignoremissing)
719 719 return path
720 720
721 721 def clear(self):
722 722 self._map.clear()
723 723 self._lastnormaltime = timestamp.zero()
724 724 self._dirty = True
725 725
726 726 def rebuild(self, parent, allfiles, changedfiles=None):
727 727 if changedfiles is None:
728 728 # Rebuild entire dirstate
729 729 to_lookup = allfiles
730 730 to_drop = []
731 731 lastnormaltime = self._lastnormaltime
732 732 self.clear()
733 733 self._lastnormaltime = lastnormaltime
734 734 elif len(changedfiles) < 10:
735 735 # Avoid turning allfiles into a set, which can be expensive if it's
736 736 # large.
737 737 to_lookup = []
738 738 to_drop = []
739 739 for f in changedfiles:
740 740 if f in allfiles:
741 741 to_lookup.append(f)
742 742 else:
743 743 to_drop.append(f)
744 744 else:
745 745 changedfilesset = set(changedfiles)
746 746 to_lookup = changedfilesset & set(allfiles)
747 747 to_drop = changedfilesset - to_lookup
748 748
749 749 if self._origpl is None:
750 750 self._origpl = self._pl
751 751 self._map.setparents(parent, self._nodeconstants.nullid)
752 752
753 753 for f in to_lookup:
754 754
755 755 if self.in_merge:
756 756 self.set_tracked(f)
757 757 else:
758 758 self._map.reset_state(
759 759 f,
760 760 wc_tracked=True,
761 761 p1_tracked=True,
762 762 )
763 763 for f in to_drop:
764 764 self._map.reset_state(f)
765 765
766 766 self._dirty = True
767 767
768 768 def identity(self):
769 769 """Return identity of dirstate itself to detect changing in storage
770 770
771 771 If identity of previous dirstate is equal to this, writing
772 772 changes based on the former dirstate out can keep consistency.
773 773 """
774 774 return self._map.identity
775 775
776 776 def write(self, tr):
777 777 if not self._dirty:
778 778 return
779 779
780 780 filename = self._filename
781 781 if tr:
782 782 # 'dirstate.write()' is not only for writing in-memory
783 783 # changes out, but also for dropping ambiguous timestamp.
784 784 # delayed writing re-raise "ambiguous timestamp issue".
785 785 # See also the wiki page below for detail:
786 786 # https://www.mercurial-scm.org/wiki/DirstateTransactionPlan
787 787
788 788 # record when mtime start to be ambiguous
789 789 now = _getfsnow(self._opener)
790 790
791 791 # delay writing in-memory changes out
792 792 tr.addfilegenerator(
793 793 b'dirstate',
794 794 (self._filename,),
795 795 lambda f: self._writedirstate(tr, f, now=now),
796 796 location=b'plain',
797 797 )
798 798 return
799 799
800 800 st = self._opener(filename, b"w", atomictemp=True, checkambig=True)
801 801 self._writedirstate(tr, st)
802 802
803 803 def addparentchangecallback(self, category, callback):
804 804 """add a callback to be called when the wd parents are changed
805 805
806 806 Callback will be called with the following arguments:
807 807 dirstate, (oldp1, oldp2), (newp1, newp2)
808 808
809 809 Category is a unique identifier to allow overwriting an old callback
810 810 with a newer callback.
811 811 """
812 812 self._plchangecallbacks[category] = callback
813 813
814 814 def _writedirstate(self, tr, st, now=None):
815 815 # notify callbacks about parents change
816 816 if self._origpl is not None and self._origpl != self._pl:
817 817 for c, callback in sorted(
818 818 pycompat.iteritems(self._plchangecallbacks)
819 819 ):
820 820 callback(self, self._origpl, self._pl)
821 821 self._origpl = None
822 822
823 823 if now is None:
824 824 # use the modification time of the newly created temporary file as the
825 825 # filesystem's notion of 'now'
826 826 now = timestamp.mtime_of(util.fstat(st))
827 827
828 828 # enough 'delaywrite' prevents 'pack_dirstate' from dropping
829 829 # timestamp of each entries in dirstate, because of 'now > mtime'
830 830 delaywrite = self._ui.configint(b'debug', b'dirstate.delaywrite')
831 831 if delaywrite > 0:
832 832 # do we have any files to delay for?
833 833 for f, e in pycompat.iteritems(self._map):
834 834 if e.need_delay(now):
835 835 import time # to avoid useless import
836 836
837 837 # rather than sleep n seconds, sleep until the next
838 838 # multiple of n seconds
839 839 clock = time.time()
840 840 start = int(clock) - (int(clock) % delaywrite)
841 841 end = start + delaywrite
842 842 time.sleep(end - clock)
843 843 # trust our estimate that the end is near now
844 844 now = timestamp.timestamp((end, 0))
845 845 break
846 846
847 847 self._map.write(tr, st, now)
848 848 self._lastnormaltime = timestamp.zero()
849 849 self._dirty = False
850 850
851 851 def _dirignore(self, f):
852 852 if self._ignore(f):
853 853 return True
854 854 for p in pathutil.finddirs(f):
855 855 if self._ignore(p):
856 856 return True
857 857 return False
858 858
859 859 def _ignorefiles(self):
860 860 files = []
861 861 if os.path.exists(self._join(b'.hgignore')):
862 862 files.append(self._join(b'.hgignore'))
863 863 for name, path in self._ui.configitems(b"ui"):
864 864 if name == b'ignore' or name.startswith(b'ignore.'):
865 865 # we need to use os.path.join here rather than self._join
866 866 # because path is arbitrary and user-specified
867 867 files.append(os.path.join(self._rootdir, util.expandpath(path)))
868 868 return files
869 869
870 870 def _ignorefileandline(self, f):
871 871 files = collections.deque(self._ignorefiles())
872 872 visited = set()
873 873 while files:
874 874 i = files.popleft()
875 875 patterns = matchmod.readpatternfile(
876 876 i, self._ui.warn, sourceinfo=True
877 877 )
878 878 for pattern, lineno, line in patterns:
879 879 kind, p = matchmod._patsplit(pattern, b'glob')
880 880 if kind == b"subinclude":
881 881 if p not in visited:
882 882 files.append(p)
883 883 continue
884 884 m = matchmod.match(
885 885 self._root, b'', [], [pattern], warn=self._ui.warn
886 886 )
887 887 if m(f):
888 888 return (i, lineno, line)
889 889 visited.add(i)
890 890 return (None, -1, b"")
891 891
892 892 def _walkexplicit(self, match, subrepos):
893 893 """Get stat data about the files explicitly specified by match.
894 894
895 895 Return a triple (results, dirsfound, dirsnotfound).
896 896 - results is a mapping from filename to stat result. It also contains
897 897 listings mapping subrepos and .hg to None.
898 898 - dirsfound is a list of files found to be directories.
899 899 - dirsnotfound is a list of files that the dirstate thinks are
900 900 directories and that were not found."""
901 901
902 902 def badtype(mode):
903 903 kind = _(b'unknown')
904 904 if stat.S_ISCHR(mode):
905 905 kind = _(b'character device')
906 906 elif stat.S_ISBLK(mode):
907 907 kind = _(b'block device')
908 908 elif stat.S_ISFIFO(mode):
909 909 kind = _(b'fifo')
910 910 elif stat.S_ISSOCK(mode):
911 911 kind = _(b'socket')
912 912 elif stat.S_ISDIR(mode):
913 913 kind = _(b'directory')
914 914 return _(b'unsupported file type (type is %s)') % kind
915 915
916 916 badfn = match.bad
917 917 dmap = self._map
918 918 lstat = os.lstat
919 919 getkind = stat.S_IFMT
920 920 dirkind = stat.S_IFDIR
921 921 regkind = stat.S_IFREG
922 922 lnkkind = stat.S_IFLNK
923 923 join = self._join
924 924 dirsfound = []
925 925 foundadd = dirsfound.append
926 926 dirsnotfound = []
927 927 notfoundadd = dirsnotfound.append
928 928
929 929 if not match.isexact() and self._checkcase:
930 930 normalize = self._normalize
931 931 else:
932 932 normalize = None
933 933
934 934 files = sorted(match.files())
935 935 subrepos.sort()
936 936 i, j = 0, 0
937 937 while i < len(files) and j < len(subrepos):
938 938 subpath = subrepos[j] + b"/"
939 939 if files[i] < subpath:
940 940 i += 1
941 941 continue
942 942 while i < len(files) and files[i].startswith(subpath):
943 943 del files[i]
944 944 j += 1
945 945
946 946 if not files or b'' in files:
947 947 files = [b'']
948 948 # constructing the foldmap is expensive, so don't do it for the
949 949 # common case where files is ['']
950 950 normalize = None
951 951 results = dict.fromkeys(subrepos)
952 952 results[b'.hg'] = None
953 953
954 954 for ff in files:
955 955 if normalize:
956 956 nf = normalize(ff, False, True)
957 957 else:
958 958 nf = ff
959 959 if nf in results:
960 960 continue
961 961
962 962 try:
963 963 st = lstat(join(nf))
964 964 kind = getkind(st.st_mode)
965 965 if kind == dirkind:
966 966 if nf in dmap:
967 967 # file replaced by dir on disk but still in dirstate
968 968 results[nf] = None
969 969 foundadd((nf, ff))
970 970 elif kind == regkind or kind == lnkkind:
971 971 results[nf] = st
972 972 else:
973 973 badfn(ff, badtype(kind))
974 974 if nf in dmap:
975 975 results[nf] = None
976 976 except OSError as inst: # nf not found on disk - it is dirstate only
977 977 if nf in dmap: # does it exactly match a missing file?
978 978 results[nf] = None
979 979 else: # does it match a missing directory?
980 980 if self._map.hasdir(nf):
981 981 notfoundadd(nf)
982 982 else:
983 983 badfn(ff, encoding.strtolocal(inst.strerror))
984 984
985 985 # match.files() may contain explicitly-specified paths that shouldn't
986 986 # be taken; drop them from the list of files found. dirsfound/notfound
987 987 # aren't filtered here because they will be tested later.
988 988 if match.anypats():
989 989 for f in list(results):
990 990 if f == b'.hg' or f in subrepos:
991 991 # keep sentinel to disable further out-of-repo walks
992 992 continue
993 993 if not match(f):
994 994 del results[f]
995 995
996 996 # Case insensitive filesystems cannot rely on lstat() failing to detect
997 997 # a case-only rename. Prune the stat object for any file that does not
998 998 # match the case in the filesystem, if there are multiple files that
999 999 # normalize to the same path.
1000 1000 if match.isexact() and self._checkcase:
1001 1001 normed = {}
1002 1002
1003 1003 for f, st in pycompat.iteritems(results):
1004 1004 if st is None:
1005 1005 continue
1006 1006
1007 1007 nc = util.normcase(f)
1008 1008 paths = normed.get(nc)
1009 1009
1010 1010 if paths is None:
1011 1011 paths = set()
1012 1012 normed[nc] = paths
1013 1013
1014 1014 paths.add(f)
1015 1015
1016 1016 for norm, paths in pycompat.iteritems(normed):
1017 1017 if len(paths) > 1:
1018 1018 for path in paths:
1019 1019 folded = self._discoverpath(
1020 1020 path, norm, True, None, self._map.dirfoldmap
1021 1021 )
1022 1022 if path != folded:
1023 1023 results[path] = None
1024 1024
1025 1025 return results, dirsfound, dirsnotfound
1026 1026
1027 1027 def walk(self, match, subrepos, unknown, ignored, full=True):
1028 1028 """
1029 1029 Walk recursively through the directory tree, finding all files
1030 1030 matched by match.
1031 1031
1032 1032 If full is False, maybe skip some known-clean files.
1033 1033
1034 1034 Return a dict mapping filename to stat-like object (either
1035 1035 mercurial.osutil.stat instance or return value of os.stat()).
1036 1036
1037 1037 """
1038 1038 # full is a flag that extensions that hook into walk can use -- this
1039 1039 # implementation doesn't use it at all. This satisfies the contract
1040 1040 # because we only guarantee a "maybe".
1041 1041
1042 1042 if ignored:
1043 1043 ignore = util.never
1044 1044 dirignore = util.never
1045 1045 elif unknown:
1046 1046 ignore = self._ignore
1047 1047 dirignore = self._dirignore
1048 1048 else:
1049 1049 # if not unknown and not ignored, drop dir recursion and step 2
1050 1050 ignore = util.always
1051 1051 dirignore = util.always
1052 1052
1053 1053 matchfn = match.matchfn
1054 1054 matchalways = match.always()
1055 1055 matchtdir = match.traversedir
1056 1056 dmap = self._map
1057 1057 listdir = util.listdir
1058 1058 lstat = os.lstat
1059 1059 dirkind = stat.S_IFDIR
1060 1060 regkind = stat.S_IFREG
1061 1061 lnkkind = stat.S_IFLNK
1062 1062 join = self._join
1063 1063
1064 1064 exact = skipstep3 = False
1065 1065 if match.isexact(): # match.exact
1066 1066 exact = True
1067 1067 dirignore = util.always # skip step 2
1068 1068 elif match.prefix(): # match.match, no patterns
1069 1069 skipstep3 = True
1070 1070
1071 1071 if not exact and self._checkcase:
1072 1072 normalize = self._normalize
1073 1073 normalizefile = self._normalizefile
1074 1074 skipstep3 = False
1075 1075 else:
1076 1076 normalize = self._normalize
1077 1077 normalizefile = None
1078 1078
1079 1079 # step 1: find all explicit files
1080 1080 results, work, dirsnotfound = self._walkexplicit(match, subrepos)
1081 1081 if matchtdir:
1082 1082 for d in work:
1083 1083 matchtdir(d[0])
1084 1084 for d in dirsnotfound:
1085 1085 matchtdir(d)
1086 1086
1087 1087 skipstep3 = skipstep3 and not (work or dirsnotfound)
1088 1088 work = [d for d in work if not dirignore(d[0])]
1089 1089
1090 1090 # step 2: visit subdirectories
1091 1091 def traverse(work, alreadynormed):
1092 1092 wadd = work.append
1093 1093 while work:
1094 1094 tracing.counter('dirstate.walk work', len(work))
1095 1095 nd = work.pop()
1096 1096 visitentries = match.visitchildrenset(nd)
1097 1097 if not visitentries:
1098 1098 continue
1099 1099 if visitentries == b'this' or visitentries == b'all':
1100 1100 visitentries = None
1101 1101 skip = None
1102 1102 if nd != b'':
1103 1103 skip = b'.hg'
1104 1104 try:
1105 1105 with tracing.log('dirstate.walk.traverse listdir %s', nd):
1106 1106 entries = listdir(join(nd), stat=True, skip=skip)
1107 1107 except OSError as inst:
1108 1108 if inst.errno in (errno.EACCES, errno.ENOENT):
1109 1109 match.bad(
1110 1110 self.pathto(nd), encoding.strtolocal(inst.strerror)
1111 1111 )
1112 1112 continue
1113 1113 raise
1114 1114 for f, kind, st in entries:
1115 1115 # Some matchers may return files in the visitentries set,
1116 1116 # instead of 'this', if the matcher explicitly mentions them
1117 1117 # and is not an exactmatcher. This is acceptable; we do not
1118 1118 # make any hard assumptions about file-or-directory below
1119 1119 # based on the presence of `f` in visitentries. If
1120 1120 # visitchildrenset returned a set, we can always skip the
1121 1121 # entries *not* in the set it provided regardless of whether
1122 1122 # they're actually a file or a directory.
1123 1123 if visitentries and f not in visitentries:
1124 1124 continue
1125 1125 if normalizefile:
1126 1126 # even though f might be a directory, we're only
1127 1127 # interested in comparing it to files currently in the
1128 1128 # dmap -- therefore normalizefile is enough
1129 1129 nf = normalizefile(
1130 1130 nd and (nd + b"/" + f) or f, True, True
1131 1131 )
1132 1132 else:
1133 1133 nf = nd and (nd + b"/" + f) or f
1134 1134 if nf not in results:
1135 1135 if kind == dirkind:
1136 1136 if not ignore(nf):
1137 1137 if matchtdir:
1138 1138 matchtdir(nf)
1139 1139 wadd(nf)
1140 1140 if nf in dmap and (matchalways or matchfn(nf)):
1141 1141 results[nf] = None
1142 1142 elif kind == regkind or kind == lnkkind:
1143 1143 if nf in dmap:
1144 1144 if matchalways or matchfn(nf):
1145 1145 results[nf] = st
1146 1146 elif (matchalways or matchfn(nf)) and not ignore(
1147 1147 nf
1148 1148 ):
1149 1149 # unknown file -- normalize if necessary
1150 1150 if not alreadynormed:
1151 1151 nf = normalize(nf, False, True)
1152 1152 results[nf] = st
1153 1153 elif nf in dmap and (matchalways or matchfn(nf)):
1154 1154 results[nf] = None
1155 1155
1156 1156 for nd, d in work:
1157 1157 # alreadynormed means that processwork doesn't have to do any
1158 1158 # expensive directory normalization
1159 1159 alreadynormed = not normalize or nd == d
1160 1160 traverse([d], alreadynormed)
1161 1161
1162 1162 for s in subrepos:
1163 1163 del results[s]
1164 1164 del results[b'.hg']
1165 1165
1166 1166 # step 3: visit remaining files from dmap
1167 1167 if not skipstep3 and not exact:
1168 1168 # If a dmap file is not in results yet, it was either
1169 1169 # a) not matching matchfn b) ignored, c) missing, or d) under a
1170 1170 # symlink directory.
1171 1171 if not results and matchalways:
1172 1172 visit = [f for f in dmap]
1173 1173 else:
1174 1174 visit = [f for f in dmap if f not in results and matchfn(f)]
1175 1175 visit.sort()
1176 1176
1177 1177 if unknown:
1178 1178 # unknown == True means we walked all dirs under the roots
1179 1179 # that wasn't ignored, and everything that matched was stat'ed
1180 1180 # and is already in results.
1181 1181 # The rest must thus be ignored or under a symlink.
1182 1182 audit_path = pathutil.pathauditor(self._root, cached=True)
1183 1183
1184 1184 for nf in iter(visit):
1185 1185 # If a stat for the same file was already added with a
1186 1186 # different case, don't add one for this, since that would
1187 1187 # make it appear as if the file exists under both names
1188 1188 # on disk.
1189 1189 if (
1190 1190 normalizefile
1191 1191 and normalizefile(nf, True, True) in results
1192 1192 ):
1193 1193 results[nf] = None
1194 1194 # Report ignored items in the dmap as long as they are not
1195 1195 # under a symlink directory.
1196 1196 elif audit_path.check(nf):
1197 1197 try:
1198 1198 results[nf] = lstat(join(nf))
1199 1199 # file was just ignored, no links, and exists
1200 1200 except OSError:
1201 1201 # file doesn't exist
1202 1202 results[nf] = None
1203 1203 else:
1204 1204 # It's either missing or under a symlink directory
1205 1205 # which we in this case report as missing
1206 1206 results[nf] = None
1207 1207 else:
1208 1208 # We may not have walked the full directory tree above,
1209 1209 # so stat and check everything we missed.
1210 1210 iv = iter(visit)
1211 1211 for st in util.statfiles([join(i) for i in visit]):
1212 1212 results[next(iv)] = st
1213 1213 return results
1214 1214
1215 1215 def _rust_status(self, matcher, list_clean, list_ignored, list_unknown):
1216 1216 # Force Rayon (Rust parallelism library) to respect the number of
1217 1217 # workers. This is a temporary workaround until Rust code knows
1218 1218 # how to read the config file.
1219 1219 numcpus = self._ui.configint(b"worker", b"numcpus")
1220 1220 if numcpus is not None:
1221 1221 encoding.environ.setdefault(b'RAYON_NUM_THREADS', b'%d' % numcpus)
1222 1222
1223 1223 workers_enabled = self._ui.configbool(b"worker", b"enabled", True)
1224 1224 if not workers_enabled:
1225 1225 encoding.environ[b"RAYON_NUM_THREADS"] = b"1"
1226 1226
1227 1227 (
1228 1228 lookup,
1229 1229 modified,
1230 1230 added,
1231 1231 removed,
1232 1232 deleted,
1233 1233 clean,
1234 1234 ignored,
1235 1235 unknown,
1236 1236 warnings,
1237 1237 bad,
1238 1238 traversed,
1239 1239 dirty,
1240 1240 ) = rustmod.status(
1241 1241 self._map._map,
1242 1242 matcher,
1243 1243 self._rootdir,
1244 1244 self._ignorefiles(),
1245 1245 self._checkexec,
1246 1246 self._lastnormaltime,
1247 1247 bool(list_clean),
1248 1248 bool(list_ignored),
1249 1249 bool(list_unknown),
1250 1250 bool(matcher.traversedir),
1251 1251 )
1252 1252
1253 1253 self._dirty |= dirty
1254 1254
1255 1255 if matcher.traversedir:
1256 1256 for dir in traversed:
1257 1257 matcher.traversedir(dir)
1258 1258
1259 1259 if self._ui.warn:
1260 1260 for item in warnings:
1261 1261 if isinstance(item, tuple):
1262 1262 file_path, syntax = item
1263 1263 msg = _(b"%s: ignoring invalid syntax '%s'\n") % (
1264 1264 file_path,
1265 1265 syntax,
1266 1266 )
1267 1267 self._ui.warn(msg)
1268 1268 else:
1269 1269 msg = _(b"skipping unreadable pattern file '%s': %s\n")
1270 1270 self._ui.warn(
1271 1271 msg
1272 1272 % (
1273 1273 pathutil.canonpath(
1274 1274 self._rootdir, self._rootdir, item
1275 1275 ),
1276 1276 b"No such file or directory",
1277 1277 )
1278 1278 )
1279 1279
1280 1280 for (fn, message) in bad:
1281 1281 matcher.bad(fn, encoding.strtolocal(message))
1282 1282
1283 1283 status = scmutil.status(
1284 1284 modified=modified,
1285 1285 added=added,
1286 1286 removed=removed,
1287 1287 deleted=deleted,
1288 1288 unknown=unknown,
1289 1289 ignored=ignored,
1290 1290 clean=clean,
1291 1291 )
1292 1292 return (lookup, status)
1293 1293
1294 1294 def status(self, match, subrepos, ignored, clean, unknown):
1295 1295 """Determine the status of the working copy relative to the
1296 1296 dirstate and return a pair of (unsure, status), where status is of type
1297 1297 scmutil.status and:
1298 1298
1299 1299 unsure:
1300 1300 files that might have been modified since the dirstate was
1301 1301 written, but need to be read to be sure (size is the same
1302 1302 but mtime differs)
1303 1303 status.modified:
1304 1304 files that have definitely been modified since the dirstate
1305 1305 was written (different size or mode)
1306 1306 status.clean:
1307 1307 files that have definitely not been modified since the
1308 1308 dirstate was written
1309 1309 """
1310 1310 listignored, listclean, listunknown = ignored, clean, unknown
1311 1311 lookup, modified, added, unknown, ignored = [], [], [], [], []
1312 1312 removed, deleted, clean = [], [], []
1313 1313
1314 1314 dmap = self._map
1315 1315 dmap.preload()
1316 1316
1317 1317 use_rust = True
1318 1318
1319 1319 allowed_matchers = (
1320 1320 matchmod.alwaysmatcher,
1321 1321 matchmod.exactmatcher,
1322 1322 matchmod.includematcher,
1323 1323 )
1324 1324
1325 1325 if rustmod is None:
1326 1326 use_rust = False
1327 1327 elif self._checkcase:
1328 1328 # Case-insensitive filesystems are not handled yet
1329 1329 use_rust = False
1330 1330 elif subrepos:
1331 1331 use_rust = False
1332 1332 elif sparse.enabled:
1333 1333 use_rust = False
1334 1334 elif not isinstance(match, allowed_matchers):
1335 1335 # Some matchers have yet to be implemented
1336 1336 use_rust = False
1337 1337
1338 1338 if use_rust:
1339 1339 try:
1340 1340 return self._rust_status(
1341 1341 match, listclean, listignored, listunknown
1342 1342 )
1343 1343 except rustmod.FallbackError:
1344 1344 pass
1345 1345
1346 1346 def noop(f):
1347 1347 pass
1348 1348
1349 1349 dcontains = dmap.__contains__
1350 1350 dget = dmap.__getitem__
1351 1351 ladd = lookup.append # aka "unsure"
1352 1352 madd = modified.append
1353 1353 aadd = added.append
1354 1354 uadd = unknown.append if listunknown else noop
1355 1355 iadd = ignored.append if listignored else noop
1356 1356 radd = removed.append
1357 1357 dadd = deleted.append
1358 1358 cadd = clean.append if listclean else noop
1359 1359 mexact = match.exact
1360 1360 dirignore = self._dirignore
1361 1361 checkexec = self._checkexec
1362 checklink = self._checklink
1362 1363 copymap = self._map.copymap
1363 1364 lastnormaltime = self._lastnormaltime
1364 1365
1365 1366 # We need to do full walks when either
1366 1367 # - we're listing all clean files, or
1367 1368 # - match.traversedir does something, because match.traversedir should
1368 1369 # be called for every dir in the working dir
1369 1370 full = listclean or match.traversedir is not None
1370 1371 for fn, st in pycompat.iteritems(
1371 1372 self.walk(match, subrepos, listunknown, listignored, full=full)
1372 1373 ):
1373 1374 if not dcontains(fn):
1374 1375 if (listignored or mexact(fn)) and dirignore(fn):
1375 1376 if listignored:
1376 1377 iadd(fn)
1377 1378 else:
1378 1379 uadd(fn)
1379 1380 continue
1380 1381
1381 1382 t = dget(fn)
1382 1383 mode = t.mode
1383 1384 size = t.size
1384 1385
1385 1386 if not st and t.tracked:
1386 1387 dadd(fn)
1387 1388 elif t.p2_info:
1388 1389 madd(fn)
1389 1390 elif t.added:
1390 1391 aadd(fn)
1391 1392 elif t.removed:
1392 1393 radd(fn)
1393 1394 elif t.tracked:
1394 if (
1395 if not checklink and t.has_fallback_symlink:
1396 # If the file system does not support symlink, the mode
1397 # might not be correctly stored in the dirstate, so do not
1398 # trust it.
1399 ladd(fn)
1400 elif not checkexec and t.has_fallback_exec:
1401 # If the file system does not support exec bits, the mode
1402 # might not be correctly stored in the dirstate, so do not
1403 # trust it.
1404 ladd(fn)
1405 elif (
1395 1406 size >= 0
1396 1407 and (
1397 1408 (size != st.st_size and size != st.st_size & _rangemask)
1398 1409 or ((mode ^ st.st_mode) & 0o100 and checkexec)
1399 1410 )
1400 1411 or fn in copymap
1401 1412 ):
1402 1413 if stat.S_ISLNK(st.st_mode) and size != st.st_size:
1403 1414 # issue6456: Size returned may be longer due to
1404 1415 # encryption on EXT-4 fscrypt, undecided.
1405 1416 ladd(fn)
1406 1417 else:
1407 1418 madd(fn)
1408 1419 elif not t.mtime_likely_equal_to(timestamp.mtime_of(st)):
1409 1420 ladd(fn)
1410 1421 elif timestamp.mtime_of(st) == lastnormaltime:
1411 1422 # fn may have just been marked as normal and it may have
1412 1423 # changed in the same second without changing its size.
1413 1424 # This can happen if we quickly do multiple commits.
1414 1425 # Force lookup, so we don't miss such a racy file change.
1415 1426 ladd(fn)
1416 1427 elif listclean:
1417 1428 cadd(fn)
1418 1429 status = scmutil.status(
1419 1430 modified, added, removed, deleted, unknown, ignored, clean
1420 1431 )
1421 1432 return (lookup, status)
1422 1433
1423 1434 def matches(self, match):
1424 1435 """
1425 1436 return files in the dirstate (in whatever state) filtered by match
1426 1437 """
1427 1438 dmap = self._map
1428 1439 if rustmod is not None:
1429 1440 dmap = self._map._map
1430 1441
1431 1442 if match.always():
1432 1443 return dmap.keys()
1433 1444 files = match.files()
1434 1445 if match.isexact():
1435 1446 # fast path -- filter the other way around, since typically files is
1436 1447 # much smaller than dmap
1437 1448 return [f for f in files if f in dmap]
1438 1449 if match.prefix() and all(fn in dmap for fn in files):
1439 1450 # fast path -- all the values are known to be files, so just return
1440 1451 # that
1441 1452 return list(files)
1442 1453 return [f for f in dmap if match(f)]
1443 1454
1444 1455 def _actualfilename(self, tr):
1445 1456 if tr:
1446 1457 return self._pendingfilename
1447 1458 else:
1448 1459 return self._filename
1449 1460
1450 1461 def savebackup(self, tr, backupname):
1451 1462 '''Save current dirstate into backup file'''
1452 1463 filename = self._actualfilename(tr)
1453 1464 assert backupname != filename
1454 1465
1455 1466 # use '_writedirstate' instead of 'write' to write changes certainly,
1456 1467 # because the latter omits writing out if transaction is running.
1457 1468 # output file will be used to create backup of dirstate at this point.
1458 1469 if self._dirty or not self._opener.exists(filename):
1459 1470 self._writedirstate(
1460 1471 tr,
1461 1472 self._opener(filename, b"w", atomictemp=True, checkambig=True),
1462 1473 )
1463 1474
1464 1475 if tr:
1465 1476 # ensure that subsequent tr.writepending returns True for
1466 1477 # changes written out above, even if dirstate is never
1467 1478 # changed after this
1468 1479 tr.addfilegenerator(
1469 1480 b'dirstate',
1470 1481 (self._filename,),
1471 1482 lambda f: self._writedirstate(tr, f),
1472 1483 location=b'plain',
1473 1484 )
1474 1485
1475 1486 # ensure that pending file written above is unlinked at
1476 1487 # failure, even if tr.writepending isn't invoked until the
1477 1488 # end of this transaction
1478 1489 tr.registertmp(filename, location=b'plain')
1479 1490
1480 1491 self._opener.tryunlink(backupname)
1481 1492 # hardlink backup is okay because _writedirstate is always called
1482 1493 # with an "atomictemp=True" file.
1483 1494 util.copyfile(
1484 1495 self._opener.join(filename),
1485 1496 self._opener.join(backupname),
1486 1497 hardlink=True,
1487 1498 )
1488 1499
1489 1500 def restorebackup(self, tr, backupname):
1490 1501 '''Restore dirstate by backup file'''
1491 1502 # this "invalidate()" prevents "wlock.release()" from writing
1492 1503 # changes of dirstate out after restoring from backup file
1493 1504 self.invalidate()
1494 1505 filename = self._actualfilename(tr)
1495 1506 o = self._opener
1496 1507 if util.samefile(o.join(backupname), o.join(filename)):
1497 1508 o.unlink(backupname)
1498 1509 else:
1499 1510 o.rename(backupname, filename, checkambig=True)
1500 1511
1501 1512 def clearbackup(self, tr, backupname):
1502 1513 '''Clear backup file'''
1503 1514 self._opener.unlink(backupname)
1504 1515
1505 1516 def verify(self, m1, m2):
1506 1517 """check the dirstate content again the parent manifest and yield errors"""
1507 1518 missing_from_p1 = b"%s in state %s, but not in manifest1\n"
1508 1519 unexpected_in_p1 = b"%s in state %s, but also in manifest1\n"
1509 1520 missing_from_ps = b"%s in state %s, but not in either manifest\n"
1510 1521 missing_from_ds = b"%s in manifest1, but listed as state %s\n"
1511 1522 for f, entry in self.items():
1512 1523 state = entry.state
1513 1524 if state in b"nr" and f not in m1:
1514 1525 yield (missing_from_p1, f, state)
1515 1526 if state in b"a" and f in m1:
1516 1527 yield (unexpected_in_p1, f, state)
1517 1528 if state in b"m" and f not in m1 and f not in m2:
1518 1529 yield (missing_from_ps, f, state)
1519 1530 for f in m1:
1520 1531 state = self.get_entry(f).state
1521 1532 if state not in b"nrm":
1522 1533 yield (missing_from_ds, f, state)
@@ -1,610 +1,616 b''
1 1 The *dirstate* is what Mercurial uses internally to track
2 2 the state of files in the working directory,
3 3 such as set by commands like `hg add` and `hg rm`.
4 4 It also contains some cached data that help make `hg status` faster.
5 5 The name refers both to `.hg/dirstate` on the filesystem
6 6 and the corresponding data structure in memory while a Mercurial process
7 7 is running.
8 8
9 9 The original file format, retroactively dubbed `dirstate-v1`,
10 10 is described at https://www.mercurial-scm.org/wiki/DirState.
11 11 It is made of a flat sequence of unordered variable-size entries,
12 12 so accessing any information in it requires parsing all of it.
13 13 Similarly, saving changes requires rewriting the entire file.
14 14
15 15 The newer `dirsate-v2` file format is designed to fix these limitations
16 16 and make `hg status` faster.
17 17
18 18 User guide
19 19 ==========
20 20
21 21 Compatibility
22 22 -------------
23 23
24 24 The file format is experimental and may still change.
25 25 Different versions of Mercurial may not be compatible with each other
26 26 when working on a local repository that uses this format.
27 27 When using an incompatible version with the experimental format,
28 28 anything can happen including data corruption.
29 29
30 30 Since the dirstate is entirely local and not relevant to the wire protocol,
31 31 `dirstate-v2` does not affect compatibility with remote Mercurial versions.
32 32
33 33 When `share-safe` is enabled, different repositories sharing the same store
34 34 can use different dirstate formats.
35 35
36 36 Enabling `dirsate-v2` for new local repositories
37 37 ------------------------------------------------
38 38
39 39 When creating a new local repository such as with `hg init` or `hg clone`,
40 40 the `exp-dirstate-v2` boolean in the `format` configuration section
41 41 controls whether to use this file format.
42 42 This is disabled by default as of this writing.
43 43 To enable it for a single repository, run for example::
44 44
45 45 $ hg init my-project --config format.exp-dirstate-v2=1
46 46
47 47 Checking the format of an existing local repsitory
48 48 --------------------------------------------------
49 49
50 50 The `debugformat` commands prints information about
51 51 which of multiple optional formats are used in the current repository,
52 52 including `dirstate-v2`::
53 53
54 54 $ hg debugformat
55 55 format-variant repo
56 56 fncache: yes
57 57 dirstate-v2: yes
58 58 […]
59 59
60 60 Upgrading or downgrading an existing local repository
61 61 -----------------------------------------------------
62 62
63 63 The `debugupgrade` command does various upgrades or downgrades
64 64 on a local repository
65 65 based on the current Mercurial version and on configuration.
66 66 The same `format.exp-dirstate-v2` configuration is used again.
67 67
68 68 Example to upgrade::
69 69
70 70 $ hg debugupgrade --config format.exp-dirstate-v2=1
71 71
72 72 Example to downgrade to `dirstate-v1`::
73 73
74 74 $ hg debugupgrade --config format.exp-dirstate-v2=0
75 75
76 76 Both of this commands do nothing but print a list of proposed changes,
77 77 which may include changes unrelated to the dirstate.
78 78 Those other changes are controlled by their own configuration keys.
79 79 Add `--run` to a command to actually apply the proposed changes.
80 80
81 81 Backups of `.hg/requires` and `.hg/dirstate` are created
82 82 in a `.hg/upgradebackup.*` directory.
83 83 If something goes wrong, restoring those files should undo the change.
84 84
85 85 Note that upgrading affects compatibility with older versions of Mercurial
86 86 as noted above.
87 87 This can be relevant when a repository’s files are on a USB drive
88 88 or some other removable media, or shared over the network, etc.
89 89
90 90 Internal filesystem representation
91 91 ==================================
92 92
93 93 Requirements file
94 94 -----------------
95 95
96 96 The `.hg/requires` file indicates which of various optional file formats
97 97 are used by a given repository.
98 98 Mercurial aborts when seeing a requirement it does not know about,
99 99 which avoids older version accidentally messing up a respository
100 100 that uses a format that was introduced later.
101 101 For versions that do support a format, the presence or absence of
102 102 the corresponding requirement indicates whether to use that format.
103 103
104 104 When the file contains a `exp-dirstate-v2` line,
105 105 the `dirstate-v2` format is used.
106 106 With no such line `dirstate-v1` is used.
107 107
108 108 High level description
109 109 ----------------------
110 110
111 111 Whereas `dirstate-v1` uses a single `.hg/disrtate` file,
112 112 in `dirstate-v2` that file is a "docket" file
113 113 that only contains some metadata
114 114 and points to separate data file named `.hg/dirstate.{ID}`,
115 115 where `{ID}` is a random identifier.
116 116
117 117 This separation allows making data files append-only
118 118 and therefore safer to memory-map.
119 119 Creating a new data file (occasionally to clean up unused data)
120 120 can be done with a different ID
121 121 without disrupting another Mercurial process
122 122 that could still be using the previous data file.
123 123
124 124 Both files have a format designed to reduce the need for parsing,
125 125 by using fixed-size binary components as much as possible.
126 126 For data that is not fixed-size,
127 127 references to other parts of a file can be made by storing "pseudo-pointers":
128 128 integers counted in bytes from the start of a file.
129 129 For read-only access no data structure is needed,
130 130 only a bytes buffer (possibly memory-mapped directly from the filesystem)
131 131 with specific parts read on demand.
132 132
133 133 The data file contains "nodes" organized in a tree.
134 134 Each node represents a file or directory inside the working directory
135 135 or its parent changeset.
136 136 This tree has the same structure as the filesystem,
137 137 so a node representing a directory has child nodes representing
138 138 the files and subdirectories contained directly in that directory.
139 139
140 140 The docket file format
141 141 ----------------------
142 142
143 143 This is implemented in `rust/hg-core/src/dirstate_tree/on_disk.rs`
144 144 and `mercurial/dirstateutils/docket.py`.
145 145
146 146 Components of the docket file are found at fixed offsets,
147 147 counted in bytes from the start of the file:
148 148
149 149 * Offset 0:
150 150 The 12-bytes marker string "dirstate-v2\n" ending with a newline character.
151 151 This makes it easier to tell a dirstate-v2 file from a dirstate-v1 file,
152 152 although it is not strictly necessary
153 153 since `.hg/requires` determines which format to use.
154 154
155 155 * Offset 12:
156 156 The changeset node ID on the first parent of the working directory,
157 157 as up to 32 binary bytes.
158 158 If a node ID is shorter (20 bytes for SHA-1),
159 159 it is start-aligned and the rest of the bytes are set to zero.
160 160
161 161 * Offset 44:
162 162 The changeset node ID on the second parent of the working directory,
163 163 or all zeros if there isn’t one.
164 164 Also 32 binary bytes.
165 165
166 166 * Offset 76:
167 167 Tree metadata on 44 bytes, described below.
168 168 Its separation in this documentation from the rest of the docket
169 169 reflects a detail of the current implementation.
170 170 Since tree metadata is also made of fields at fixed offsets, those could
171 171 be inlined here by adding 76 bytes to each offset.
172 172
173 173 * Offset 120:
174 174 The used size of the data file, as a 32-bit big-endian integer.
175 175 The actual size of the data file may be larger
176 176 (if another Mercurial processis in appending to it
177 177 but has not updated the docket yet).
178 178 That extra data must be ignored.
179 179
180 180 * Offset 124:
181 181 The length of the data file identifier, as a 8-bit integer.
182 182
183 183 * Offset 125:
184 184 The data file identifier.
185 185
186 186 * Any additional data is current ignored, and dropped when updating the file.
187 187
188 188 Tree metadata in the docket file
189 189 --------------------------------
190 190
191 191 Tree metadata is similarly made of components at fixed offsets.
192 192 These offsets are counted in bytes from the start of tree metadata,
193 193 which is 76 bytes after the start of the docket file.
194 194
195 195 This metadata can be thought of as the singular root of the tree
196 196 formed by nodes in the data file.
197 197
198 198 * Offset 0:
199 199 Pseudo-pointer to the start of root nodes,
200 200 counted in bytes from the start of the data file,
201 201 as a 32-bit big-endian integer.
202 202 These nodes describe files and directories found directly
203 203 at the root of the working directory.
204 204
205 205 * Offset 4:
206 206 Number of root nodes, as a 32-bit big-endian integer.
207 207
208 208 * Offset 8:
209 209 Total number of nodes in the entire tree that "have a dirstate entry",
210 210 as a 32-bit big-endian integer.
211 211 Those nodes represent files that would be present at all in `dirstate-v1`.
212 212 This is typically less than the total number of nodes.
213 213 This counter is used to implement `len(dirstatemap)`.
214 214
215 215 * Offset 12:
216 216 Number of nodes in the entire tree that have a copy source,
217 217 as a 32-bit big-endian integer.
218 218 At the next commit, these files are recorded
219 219 as having been copied or moved/renamed from that source.
220 220 (A move is recorded as a copy and separate removal of the source.)
221 221 This counter is used to implement `len(dirstatemap.copymap)`.
222 222
223 223 * Offset 16:
224 224 An estimation of how many bytes of the data file
225 225 (within its used size) are unused, as a 32-bit big-endian integer.
226 226 When appending to an existing data file,
227 227 some existing nodes or paths can be unreachable from the new root
228 228 but they still take up space.
229 229 This counter is used to decide when to write a new data file from scratch
230 230 instead of appending to an existing one,
231 231 in order to get rid of that unreachable data
232 232 and avoid unbounded file size growth.
233 233
234 234 * Offset 20:
235 235 These four bytes are currently ignored
236 236 and reset to zero when updating a docket file.
237 237 This is an attempt at forward compatibility:
238 238 future Mercurial versions could use this as a bit field
239 239 to indicate that a dirstate has additional data or constraints.
240 240 Finding a dirstate file with the relevant bit unset indicates that
241 241 it was written by a then-older version
242 242 which is not aware of that future change.
243 243
244 244 * Offset 24:
245 245 Either 20 zero bytes, or a SHA-1 hash as 20 binary bytes.
246 246 When present, the hash is of ignore patterns
247 247 that were used for some previous run of the `status` algorithm.
248 248
249 249 * (Offset 44: end of tree metadata)
250 250
251 251 Optional hash of ignore patterns
252 252 --------------------------------
253 253
254 254 The implementation of `status` at `rust/hg-core/src/dirstate_tree/status.rs`
255 255 has been optimized such that its run time is dominated by calls
256 256 to `stat` for reading the filesystem metadata of a file or directory,
257 257 and to `readdir` for listing the contents of a directory.
258 258 In some cases the algorithm can skip calls to `readdir`
259 259 (saving significant time)
260 260 because the dirstate already contains enough of the relevant information
261 261 to build the correct `status` results.
262 262
263 263 The default configuration of `hg status` is to list unknown files
264 264 but not ignored files.
265 265 In this case, it matters for the `readdir`-skipping optimization
266 266 if a given file used to be ignored but became unknown
267 267 because `.hgignore` changed.
268 268 To detect the possibility of such a change,
269 269 the tree metadata contains an optional hash of all ignore patterns.
270 270
271 271 We define:
272 272
273 273 * "Root" ignore files as:
274 274
275 275 - `.hgignore` at the root of the repository if it exists
276 276 - And all files from `ui.ignore.*` config.
277 277
278 278 This set of files is sorted by the string representation of their path.
279 279
280 280 * The "expanded contents" of an ignore files is the byte string made
281 281 by the concatenation of its contents followed by the "expanded contents"
282 282 of other files included with `include:` or `subinclude:` directives,
283 283 in inclusion order. This definition is recursive, as included files can
284 284 themselves include more files.
285 285
286 286 This hash is defined as the SHA-1 of the concatenation (in sorted
287 287 order) of the "expanded contents" of each "root" ignore file.
288 288 (Note that computing this does not require actually concatenating
289 289 into a single contiguous byte sequence.
290 290 Instead a SHA-1 hasher object can be created
291 291 and fed separate chunks one by one.)
292 292
293 293 The data file format
294 294 --------------------
295 295
296 296 This is implemented in `rust/hg-core/src/dirstate_tree/on_disk.rs`
297 297 and `mercurial/dirstateutils/v2.py`.
298 298
299 299 The data file contains two types of data: paths and nodes.
300 300
301 301 Paths and nodes can be organized in any order in the file, except that sibling
302 302 nodes must be next to each other and sorted by their path.
303 303 Contiguity lets the parent refer to them all
304 304 by their count and a single pseudo-pointer,
305 305 instead of storing one pseudo-pointer per child node.
306 306 Sorting allows using binary seach to find a child node with a given name
307 307 in `O(log(n))` byte sequence comparisons.
308 308
309 309 The current implemention writes paths and child node before a given node
310 310 for ease of figuring out the value of pseudo-pointers by the time the are to be
311 311 written, but this is not an obligation and readers must not rely on it.
312 312
313 313 A path is stored as a byte string anywhere in the file, without delimiter.
314 314 It is refered to by one or more node by a pseudo-pointer to its start, and its
315 315 length in bytes. Since there is no delimiter,
316 316 when a path is a substring of another the same bytes could be reused,
317 317 although the implementation does not exploit this as of this writing.
318 318
319 319 A node is stored on 43 bytes with components at fixed offsets. Paths and
320 320 child nodes relevant to a node are stored externally and referenced though
321 321 pseudo-pointers.
322 322
323 323 All integers are stored in big-endian. All pseudo-pointers are 32-bit integers
324 324 counting bytes from the start of the data file. Path lengths and positions
325 325 are 16-bit integers, also counted in bytes.
326 326
327 327 Node components are:
328 328
329 329 * Offset 0:
330 330 Pseudo-pointer to the full path of this node,
331 331 from the working directory root.
332 332
333 333 * Offset 4:
334 334 Length of the full path.
335 335
336 336 * Offset 6:
337 337 Position of the last `/` path separator within the full path,
338 338 in bytes from the start of the full path,
339 339 or zero if there isn’t one.
340 340 The part of the full path after this position is the "base name".
341 341 Since sibling nodes have the same parent, only their base name vary
342 342 and needs to be considered when doing binary search to find a given path.
343 343
344 344 * Offset 8:
345 345 Pseudo-pointer to the "copy source" path for this node,
346 346 or zero if there is no copy source.
347 347
348 348 * Offset 12:
349 349 Length of the copy source path, or zero if there isn’t one.
350 350
351 351 * Offset 14:
352 352 Pseudo-pointer to the start of child nodes.
353 353
354 354 * Offset 18:
355 355 Number of child nodes, as a 32-bit integer.
356 356 They occupy 43 times this number of bytes
357 357 (not counting space for paths, and further descendants).
358 358
359 359 * Offset 22:
360 360 Number as a 32-bit integer of descendant nodes in this subtree,
361 361 not including this node itself,
362 362 that "have a dirstate entry".
363 363 Those nodes represent files that would be present at all in `dirstate-v1`.
364 364 This is typically less than the total number of descendants.
365 365 This counter is used to implement `has_dir`.
366 366
367 367 * Offset 26:
368 368 Number as a 32-bit integer of descendant nodes in this subtree,
369 369 not including this node itself,
370 370 that represent files tracked in the working directory.
371 371 (For example, `hg rm` makes a file untracked.)
372 372 This counter is used to implement `has_tracked_dir`.
373 373
374 374 * Offset 30:
375 375 A `flags` fields that packs some boolean values as bits of a 16-bit integer.
376 376 Starting from least-significant, bit masks are::
377 377
378 378 WDIR_TRACKED = 1 << 0
379 379 P1_TRACKED = 1 << 1
380 380 P2_INFO = 1 << 2
381 381 MODE_EXEC_PERM = 1 << 3
382 382 MODE_IS_SYMLINK = 1 << 4
383 383 HAS_FALLBACK_EXEC = 1 << 5
384 384 FALLBACK_EXEC = 1 << 6
385 385 HAS_FALLBACK_SYMLINK = 1 << 7
386 386 FALLBACK_SYMLINK = 1 << 8
387 387 EXPECTED_STATE_IS_MODIFIED = 1 << 9
388 388 HAS_MODE_AND_SIZE = 1 << 10
389 389 HAS_MTIME = 1 << 11
390 390 MTIME_SECOND_AMBIGUOUS = 1 << 12
391 391 DIRECTORY = 1 << 13
392 392 ALL_UNKNOWN_RECORDED = 1 << 14
393 393 ALL_IGNORED_RECORDED = 1 << 15
394 394
395 395 The meaning of each bit is described below.
396 396
397 397 Other bits are unset.
398 398 They may be assigned meaning if the future,
399 399 with the limitation that Mercurial versions that pre-date such meaning
400 400 will always reset those bits to unset when writing nodes.
401 401 (A new node is written for any mutation in its subtree,
402 402 leaving the bytes of the old node unreachable
403 403 until the data file is rewritten entirely.)
404 404
405 405 * Offset 32:
406 406 A `size` field described below, as a 32-bit integer.
407 407 Unlike in dirstate-v1, negative values are not used.
408 408
409 409 * Offset 36:
410 410 The seconds component of an `mtime` field described below,
411 411 as a 32-bit integer.
412 412 Unlike in dirstate-v1, negative values are not used.
413 413 When `mtime` is used, this is number of seconds since the Unix epoch
414 414 truncated to its lower 31 bits.
415 415
416 416 * Offset 40:
417 417 The nanoseconds component of an `mtime` field described below,
418 418 as a 32-bit integer.
419 419 When `mtime` is used,
420 420 this is the number of nanoseconds since `mtime.seconds`,
421 421 always stritctly less than one billion.
422 422
423 423 This may be zero if more precision is not available.
424 424 (This can happen because of limitations in any of Mercurial, Python,
425 425 libc, the operating system, …)
426 426
427 427 When comparing two mtimes and either has this component set to zero,
428 428 the sub-second precision of both should be ignored.
429 429 False positives when checking mtime equality due to clock resolution
430 430 are always possible and the status algorithm needs to deal with them,
431 431 but having too many false negatives could be harmful too.
432 432
433 433 * (Offset 44: end of this node)
434 434
435 435 The meaning of the boolean values packed in `flags` is:
436 436
437 437 `WDIR_TRACKED`
438 438 Set if the working directory contains a tracked file at this node’s path.
439 439 This is typically set and unset by `hg add` and `hg rm`.
440 440
441 441 `P1_TRACKED`
442 442 Set if the working directory’s first parent changeset
443 443 (whose node identifier is found in tree metadata)
444 444 contains a tracked file at this node’s path.
445 445 This is a cache to reduce manifest lookups.
446 446
447 447 `P2_INFO`
448 448 Set if the file has been involved in some merge operation.
449 449 Either because it was actually merged,
450 450 or because the version in the second parent p2 version was ahead,
451 451 or because some rename moved it there.
452 452 In either case `hg status` will want it displayed as modified.
453 453
454 454 Files that would be mentioned at all in the `dirstate-v1` file format
455 455 have a node with at least one of the above three bits set in `dirstate-v2`.
456 456 Let’s call these files "tracked anywhere",
457 457 and "untracked" the nodes with all three of these bits unset.
458 458 Untracked nodes are typically for directories:
459 459 they hold child nodes and form the tree structure.
460 460 Additional untracked nodes may also exist.
461 461 Although implementations should strive to clean up nodes
462 462 that are entirely unused, other untracked nodes may also exist.
463 463 For example, a future version of Mercurial might in some cases
464 464 add nodes for untracked files or/and ignored files in the working directory
465 465 in order to optimize `hg status`
466 466 by enabling it to skip `readdir` in more cases.
467 467
468 468 `HAS_MODE_AND_SIZE`
469 469 Must be unset for untracked nodes.
470 470 For files tracked anywhere, if this is set:
471 471 - The `size` field is the expected file size,
472 472 in bytes truncated its lower to 31 bits.
473 473 - The expected execute permission for the file’s owner
474 474 is given by `MODE_EXEC_PERM`
475 475 - The expected file type is given by `MODE_IS_SIMLINK`:
476 476 a symbolic link if set, or a normal file if unset.
477 477 If this is unset the expected size, permission, and file type are unknown.
478 478 The `size` field is unused (set to zero).
479 479
480 480 `HAS_MTIME`
481 481 The nodes contains a "valid" last modification time in the `mtime` field.
482 482
483 483
484 484 It means the `mtime` was already strictly in the past when observed,
485 485 meaning that later changes cannot happen in the same clock tick
486 486 and must cause a different modification time
487 487 (unless the system clock jumps back and we get unlucky,
488 488 which is not impossible but deemed unlikely enough).
489 489
490 490 This means that if `std::fs::symlink_metadata` later reports
491 491 the same modification time
492 492 and ignored patterns haven’t changed,
493 493 we can assume the node to be unchanged on disk.
494 494
495 495 The `mtime` field can then be used to skip more expensive lookup when
496 496 checking the status of "tracked" nodes.
497 497
498 498 It can also be set for node where `DIRECTORY` is set.
499 499 See `DIRECTORY` documentation for details.
500 500
501 501 `DIRECTORY`
502 502 When set, this entry will match a directory that exists or existed on the
503 503 file system.
504 504
505 505 * When `HAS_MTIME` is set a directory has been seen on the file system and
506 506 `mtime` matches its last modificiation time. However, `HAS_MTIME` not being set
507 507 does not indicate the lack of directory on the file system.
508 508
509 509 * When not tracked anywhere, this node does not represent an ignored or
510 510 unknown file on disk.
511 511
512 512 If `HAS_MTIME` is set
513 513 and `mtime` matches the last modification time of the directory on disk,
514 514 the directory is unchanged
515 515 and we can skip calling `std::fs::read_dir` again for this directory,
516 516 and iterate child dirstate nodes instead.
517 517 (as long as `ALL_UNKNOWN_RECORDED` and `ALL_IGNORED_RECORDED` are taken
518 518 into account)
519 519
520 520 `MODE_EXEC_PERM`
521 521 Must be unset if `HAS_MODE_AND_SIZE` is unset.
522 522 If `HAS_MODE_AND_SIZE` is set,
523 523 this indicates whether the file’s own is expected
524 524 to have execute permission.
525 525
526 Beware that on system without fs support for this information, the value
527 stored in the dirstate might be wrong and should not be relied on.
528
526 529 `MODE_IS_SYMLINK`
527 530 Must be unset if `HAS_MODE_AND_SIZE` is unset.
528 531 If `HAS_MODE_AND_SIZE` is set,
529 532 this indicates whether the file is expected to be a symlink
530 533 as opposed to a normal file.
531 534
535 Beware that on system without fs support for this information, the value
536 stored in the dirstate might be wrong and should not be relied on.
537
532 538 `EXPECTED_STATE_IS_MODIFIED`
533 539 Must be unset for untracked nodes.
534 540 For:
535 541 - a file tracked anywhere
536 542 - that has expected metadata (`HAS_MODE_AND_SIZE` and `HAS_MTIME`)
537 543 - if that metadata matches
538 544 metadata found in the working directory with `stat`
539 545 This bit indicates the status of the file.
540 546 If set, the status is modified. If unset, it is clean.
541 547
542 548 In cases where `hg status` needs to read the contents of a file
543 549 because metadata is ambiguous, this bit lets it record the result
544 550 if the result is modified so that a future run of `hg status`
545 551 does not need to do the same again.
546 552 It is valid to never set this bit,
547 553 and consider expected metadata ambiguous if it is set.
548 554
549 555 `ALL_UNKNOWN_RECORDED`
550 556 If set, all "unknown" children existing on disk (at the time of the last
551 557 status) have been recorded and the `mtime` associated with
552 558 `DIRECTORY` can be used for optimization even when "unknown" file
553 559 are listed.
554 560
555 561 Note that the amount recorded "unknown" children can still be zero if None
556 562 where present.
557 563
558 564 Also note that having this flag unset does not imply that no "unknown"
559 565 children have been recorded. Some might be present, but there is no garantee
560 566 that is will be all of them.
561 567
562 568 `ALL_IGNORED_RECORDED`
563 569 If set, all "ignored" children existing on disk (at the time of the last
564 570 status) have been recorded and the `mtime` associated with
565 571 `DIRECTORY` can be used for optimization even when "ignored" file
566 572 are listed.
567 573
568 574 Note that the amount recorded "ignored" children can still be zero if None
569 575 where present.
570 576
571 577 Also note that having this flag unset does not imply that no "ignored"
572 578 children have been recorded. Some might be present, but there is no garantee
573 579 that is will be all of them.
574 580
575 581 `HAS_FALLBACK_EXEC`
576 582 If this flag is set, the entry carries "fallback" information for the
577 583 executable bit in the `FALLBACK_EXEC` flag.
578 584
579 585 Fallback information can be stored in the dirstate to keep track of
580 586 filesystem attribute tracked by Mercurial when the underlying file
581 587 system or operating system does not support that property, (e.g.
582 588 Windows).
583 589
584 590 `FALLBACK_EXEC`
585 591 Should be ignored if `HAS_FALLBACK_EXEC` is unset. If set the file for this
586 592 entry should be considered executable if that information cannot be
587 593 extracted from the file system. If unset it should be considered
588 594 non-executable instead.
589 595
590 596 `HAS_FALLBACK_SYMLINK`
591 597 If this flag is set, the entry carries "fallback" information for symbolic
592 598 link status in the `FALLBACK_SYMLINK` flag.
593 599
594 600 Fallback information can be stored in the dirstate to keep track of
595 601 filesystem attribute tracked by Mercurial when the underlying file
596 602 system or operating system does not support that property, (e.g.
597 603 Windows).
598 604
599 605 `FALLBACK_SYMLINK`
600 606 Should be ignored if `HAS_FALLBACK_SYMLINK` is unset. If set the file for
601 607 this entry should be considered a symlink if that information cannot be
602 608 extracted from the file system. If unset it should be considered a normal
603 609 file instead.
604 610
605 611 `MTIME_SECOND_AMBIGUOUS`
606 612 This flag is relevant only when `HAS_FILE_MTIME` is set. When set, the
607 613 `mtime` stored in the entry is only valid for comparison with timestamps
608 614 that have nanosecond information. If available timestamp does not carries
609 615 nanosecond information, the `mtime` should be ignored and no optimisation
610 616 can be applied.
General Comments 0
You need to be logged in to leave comments. Login now