##// END OF EJS Templates
dirstate: remove `lastnormaltime` mechanism...
Raphaël Gomès -
r49214:a19d1225 default draft
parent child Browse files
Show More
@@ -1,1515 +1,1491 b''
1 1 # dirstate.py - working directory tracking for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import contextlib
12 12 import errno
13 13 import os
14 14 import stat
15 15
16 16 from .i18n import _
17 17 from .pycompat import delattr
18 18
19 19 from hgdemandimport import tracing
20 20
21 21 from . import (
22 22 dirstatemap,
23 23 encoding,
24 24 error,
25 25 match as matchmod,
26 26 pathutil,
27 27 policy,
28 28 pycompat,
29 29 scmutil,
30 30 sparse,
31 31 util,
32 32 )
33 33
34 34 from .dirstateutils import (
35 35 timestamp,
36 36 )
37 37
38 38 from .interfaces import (
39 39 dirstate as intdirstate,
40 40 util as interfaceutil,
41 41 )
42 42
43 43 parsers = policy.importmod('parsers')
44 44 rustmod = policy.importrust('dirstate')
45 45
46 46 HAS_FAST_DIRSTATE_V2 = rustmod is not None
47 47
48 48 propertycache = util.propertycache
49 49 filecache = scmutil.filecache
50 50 _rangemask = dirstatemap.rangemask
51 51
52 52 DirstateItem = dirstatemap.DirstateItem
53 53
54 54
55 55 class repocache(filecache):
56 56 """filecache for files in .hg/"""
57 57
58 58 def join(self, obj, fname):
59 59 return obj._opener.join(fname)
60 60
61 61
62 62 class rootcache(filecache):
63 63 """filecache for files in the repository root"""
64 64
65 65 def join(self, obj, fname):
66 66 return obj._join(fname)
67 67
68 68
69 69 def requires_parents_change(func):
70 70 def wrap(self, *args, **kwargs):
71 71 if not self.pendingparentchange():
72 72 msg = 'calling `%s` outside of a parentchange context'
73 73 msg %= func.__name__
74 74 raise error.ProgrammingError(msg)
75 75 return func(self, *args, **kwargs)
76 76
77 77 return wrap
78 78
79 79
80 80 def requires_no_parents_change(func):
81 81 def wrap(self, *args, **kwargs):
82 82 if self.pendingparentchange():
83 83 msg = 'calling `%s` inside of a parentchange context'
84 84 msg %= func.__name__
85 85 raise error.ProgrammingError(msg)
86 86 return func(self, *args, **kwargs)
87 87
88 88 return wrap
89 89
90 90
91 91 @interfaceutil.implementer(intdirstate.idirstate)
92 92 class dirstate(object):
93 93 def __init__(
94 94 self,
95 95 opener,
96 96 ui,
97 97 root,
98 98 validate,
99 99 sparsematchfn,
100 100 nodeconstants,
101 101 use_dirstate_v2,
102 102 ):
103 103 """Create a new dirstate object.
104 104
105 105 opener is an open()-like callable that can be used to open the
106 106 dirstate file; root is the root of the directory tracked by
107 107 the dirstate.
108 108 """
109 109 self._use_dirstate_v2 = use_dirstate_v2
110 110 self._nodeconstants = nodeconstants
111 111 self._opener = opener
112 112 self._validate = validate
113 113 self._root = root
114 114 self._sparsematchfn = sparsematchfn
115 115 # ntpath.join(root, '') of Python 2.7.9 does not add sep if root is
116 116 # UNC path pointing to root share (issue4557)
117 117 self._rootdir = pathutil.normasprefix(root)
118 118 self._dirty = False
119 self._lastnormaltime = timestamp.zero()
120 119 self._ui = ui
121 120 self._filecache = {}
122 121 self._parentwriters = 0
123 122 self._filename = b'dirstate'
124 123 self._pendingfilename = b'%s.pending' % self._filename
125 124 self._plchangecallbacks = {}
126 125 self._origpl = None
127 126 self._mapcls = dirstatemap.dirstatemap
128 127 # Access and cache cwd early, so we don't access it for the first time
129 128 # after a working-copy update caused it to not exist (accessing it then
130 129 # raises an exception).
131 130 self._cwd
132 131
133 132 def prefetch_parents(self):
134 133 """make sure the parents are loaded
135 134
136 135 Used to avoid a race condition.
137 136 """
138 137 self._pl
139 138
140 139 @contextlib.contextmanager
141 140 def parentchange(self):
142 141 """Context manager for handling dirstate parents.
143 142
144 143 If an exception occurs in the scope of the context manager,
145 144 the incoherent dirstate won't be written when wlock is
146 145 released.
147 146 """
148 147 self._parentwriters += 1
149 148 yield
150 149 # Typically we want the "undo" step of a context manager in a
151 150 # finally block so it happens even when an exception
152 151 # occurs. In this case, however, we only want to decrement
153 152 # parentwriters if the code in the with statement exits
154 153 # normally, so we don't have a try/finally here on purpose.
155 154 self._parentwriters -= 1
156 155
157 156 def pendingparentchange(self):
158 157 """Returns true if the dirstate is in the middle of a set of changes
159 158 that modify the dirstate parent.
160 159 """
161 160 return self._parentwriters > 0
162 161
163 162 @propertycache
164 163 def _map(self):
165 164 """Return the dirstate contents (see documentation for dirstatemap)."""
166 165 self._map = self._mapcls(
167 166 self._ui,
168 167 self._opener,
169 168 self._root,
170 169 self._nodeconstants,
171 170 self._use_dirstate_v2,
172 171 )
173 172 return self._map
174 173
175 174 @property
176 175 def _sparsematcher(self):
177 176 """The matcher for the sparse checkout.
178 177
179 178 The working directory may not include every file from a manifest. The
180 179 matcher obtained by this property will match a path if it is to be
181 180 included in the working directory.
182 181 """
183 182 # TODO there is potential to cache this property. For now, the matcher
184 183 # is resolved on every access. (But the called function does use a
185 184 # cache to keep the lookup fast.)
186 185 return self._sparsematchfn()
187 186
188 187 @repocache(b'branch')
189 188 def _branch(self):
190 189 try:
191 190 return self._opener.read(b"branch").strip() or b"default"
192 191 except IOError as inst:
193 192 if inst.errno != errno.ENOENT:
194 193 raise
195 194 return b"default"
196 195
197 196 @property
198 197 def _pl(self):
199 198 return self._map.parents()
200 199
201 200 def hasdir(self, d):
202 201 return self._map.hastrackeddir(d)
203 202
204 203 @rootcache(b'.hgignore')
205 204 def _ignore(self):
206 205 files = self._ignorefiles()
207 206 if not files:
208 207 return matchmod.never()
209 208
210 209 pats = [b'include:%s' % f for f in files]
211 210 return matchmod.match(self._root, b'', [], pats, warn=self._ui.warn)
212 211
213 212 @propertycache
214 213 def _slash(self):
215 214 return self._ui.configbool(b'ui', b'slash') and pycompat.ossep != b'/'
216 215
217 216 @propertycache
218 217 def _checklink(self):
219 218 return util.checklink(self._root)
220 219
221 220 @propertycache
222 221 def _checkexec(self):
223 222 return bool(util.checkexec(self._root))
224 223
225 224 @propertycache
226 225 def _checkcase(self):
227 226 return not util.fscasesensitive(self._join(b'.hg'))
228 227
229 228 def _join(self, f):
230 229 # much faster than os.path.join()
231 230 # it's safe because f is always a relative path
232 231 return self._rootdir + f
233 232
234 233 def flagfunc(self, buildfallback):
235 234 """build a callable that returns flags associated with a filename
236 235
237 236 The information is extracted from three possible layers:
238 237 1. the file system if it supports the information
239 238 2. the "fallback" information stored in the dirstate if any
240 239 3. a more expensive mechanism inferring the flags from the parents.
241 240 """
242 241
243 242 # small hack to cache the result of buildfallback()
244 243 fallback_func = []
245 244
246 245 def get_flags(x):
247 246 entry = None
248 247 fallback_value = None
249 248 try:
250 249 st = os.lstat(self._join(x))
251 250 except OSError:
252 251 return b''
253 252
254 253 if self._checklink:
255 254 if util.statislink(st):
256 255 return b'l'
257 256 else:
258 257 entry = self.get_entry(x)
259 258 if entry.has_fallback_symlink:
260 259 if entry.fallback_symlink:
261 260 return b'l'
262 261 else:
263 262 if not fallback_func:
264 263 fallback_func.append(buildfallback())
265 264 fallback_value = fallback_func[0](x)
266 265 if b'l' in fallback_value:
267 266 return b'l'
268 267
269 268 if self._checkexec:
270 269 if util.statisexec(st):
271 270 return b'x'
272 271 else:
273 272 if entry is None:
274 273 entry = self.get_entry(x)
275 274 if entry.has_fallback_exec:
276 275 if entry.fallback_exec:
277 276 return b'x'
278 277 else:
279 278 if fallback_value is None:
280 279 if not fallback_func:
281 280 fallback_func.append(buildfallback())
282 281 fallback_value = fallback_func[0](x)
283 282 if b'x' in fallback_value:
284 283 return b'x'
285 284 return b''
286 285
287 286 return get_flags
288 287
289 288 @propertycache
290 289 def _cwd(self):
291 290 # internal config: ui.forcecwd
292 291 forcecwd = self._ui.config(b'ui', b'forcecwd')
293 292 if forcecwd:
294 293 return forcecwd
295 294 return encoding.getcwd()
296 295
297 296 def getcwd(self):
298 297 """Return the path from which a canonical path is calculated.
299 298
300 299 This path should be used to resolve file patterns or to convert
301 300 canonical paths back to file paths for display. It shouldn't be
302 301 used to get real file paths. Use vfs functions instead.
303 302 """
304 303 cwd = self._cwd
305 304 if cwd == self._root:
306 305 return b''
307 306 # self._root ends with a path separator if self._root is '/' or 'C:\'
308 307 rootsep = self._root
309 308 if not util.endswithsep(rootsep):
310 309 rootsep += pycompat.ossep
311 310 if cwd.startswith(rootsep):
312 311 return cwd[len(rootsep) :]
313 312 else:
314 313 # we're outside the repo. return an absolute path.
315 314 return cwd
316 315
317 316 def pathto(self, f, cwd=None):
318 317 if cwd is None:
319 318 cwd = self.getcwd()
320 319 path = util.pathto(self._root, cwd, f)
321 320 if self._slash:
322 321 return util.pconvert(path)
323 322 return path
324 323
325 324 def __getitem__(self, key):
326 325 """Return the current state of key (a filename) in the dirstate.
327 326
328 327 States are:
329 328 n normal
330 329 m needs merging
331 330 r marked for removal
332 331 a marked for addition
333 332 ? not tracked
334 333
335 334 XXX The "state" is a bit obscure to be in the "public" API. we should
336 335 consider migrating all user of this to going through the dirstate entry
337 336 instead.
338 337 """
339 338 msg = b"don't use dirstate[file], use dirstate.get_entry(file)"
340 339 util.nouideprecwarn(msg, b'6.1', stacklevel=2)
341 340 entry = self._map.get(key)
342 341 if entry is not None:
343 342 return entry.state
344 343 return b'?'
345 344
346 345 def get_entry(self, path):
347 346 """return a DirstateItem for the associated path"""
348 347 entry = self._map.get(path)
349 348 if entry is None:
350 349 return DirstateItem()
351 350 return entry
352 351
353 352 def __contains__(self, key):
354 353 return key in self._map
355 354
356 355 def __iter__(self):
357 356 return iter(sorted(self._map))
358 357
359 358 def items(self):
360 359 return pycompat.iteritems(self._map)
361 360
362 361 iteritems = items
363 362
364 363 def parents(self):
365 364 return [self._validate(p) for p in self._pl]
366 365
367 366 def p1(self):
368 367 return self._validate(self._pl[0])
369 368
370 369 def p2(self):
371 370 return self._validate(self._pl[1])
372 371
373 372 @property
374 373 def in_merge(self):
375 374 """True if a merge is in progress"""
376 375 return self._pl[1] != self._nodeconstants.nullid
377 376
378 377 def branch(self):
379 378 return encoding.tolocal(self._branch)
380 379
381 380 def setparents(self, p1, p2=None):
382 381 """Set dirstate parents to p1 and p2.
383 382
384 383 When moving from two parents to one, "merged" entries a
385 384 adjusted to normal and previous copy records discarded and
386 385 returned by the call.
387 386
388 387 See localrepo.setparents()
389 388 """
390 389 if p2 is None:
391 390 p2 = self._nodeconstants.nullid
392 391 if self._parentwriters == 0:
393 392 raise ValueError(
394 393 b"cannot set dirstate parent outside of "
395 394 b"dirstate.parentchange context manager"
396 395 )
397 396
398 397 self._dirty = True
399 398 oldp2 = self._pl[1]
400 399 if self._origpl is None:
401 400 self._origpl = self._pl
402 401 nullid = self._nodeconstants.nullid
403 402 # True if we need to fold p2 related state back to a linear case
404 403 fold_p2 = oldp2 != nullid and p2 == nullid
405 404 return self._map.setparents(p1, p2, fold_p2=fold_p2)
406 405
407 406 def setbranch(self, branch):
408 407 self.__class__._branch.set(self, encoding.fromlocal(branch))
409 408 f = self._opener(b'branch', b'w', atomictemp=True, checkambig=True)
410 409 try:
411 410 f.write(self._branch + b'\n')
412 411 f.close()
413 412
414 413 # make sure filecache has the correct stat info for _branch after
415 414 # replacing the underlying file
416 415 ce = self._filecache[b'_branch']
417 416 if ce:
418 417 ce.refresh()
419 418 except: # re-raises
420 419 f.discard()
421 420 raise
422 421
423 422 def invalidate(self):
424 423 """Causes the next access to reread the dirstate.
425 424
426 425 This is different from localrepo.invalidatedirstate() because it always
427 426 rereads the dirstate. Use localrepo.invalidatedirstate() if you want to
428 427 check whether the dirstate has changed before rereading it."""
429 428
430 429 for a in ("_map", "_branch", "_ignore"):
431 430 if a in self.__dict__:
432 431 delattr(self, a)
433 self._lastnormaltime = timestamp.zero()
434 432 self._dirty = False
435 433 self._parentwriters = 0
436 434 self._origpl = None
437 435
438 436 def copy(self, source, dest):
439 437 """Mark dest as a copy of source. Unmark dest if source is None."""
440 438 if source == dest:
441 439 return
442 440 self._dirty = True
443 441 if source is not None:
444 442 self._map.copymap[dest] = source
445 443 else:
446 444 self._map.copymap.pop(dest, None)
447 445
448 446 def copied(self, file):
449 447 return self._map.copymap.get(file, None)
450 448
451 449 def copies(self):
452 450 return self._map.copymap
453 451
454 452 @requires_no_parents_change
455 453 def set_tracked(self, filename, reset_copy=False):
456 454 """a "public" method for generic code to mark a file as tracked
457 455
458 456 This function is to be called outside of "update/merge" case. For
459 457 example by a command like `hg add X`.
460 458
461 459 if reset_copy is set, any existing copy information will be dropped.
462 460
463 461 return True the file was previously untracked, False otherwise.
464 462 """
465 463 self._dirty = True
466 464 entry = self._map.get(filename)
467 465 if entry is None or not entry.tracked:
468 466 self._check_new_tracked_filename(filename)
469 467 pre_tracked = self._map.set_tracked(filename)
470 468 if reset_copy:
471 469 self._map.copymap.pop(filename, None)
472 470 return pre_tracked
473 471
474 472 @requires_no_parents_change
475 473 def set_untracked(self, filename):
476 474 """a "public" method for generic code to mark a file as untracked
477 475
478 476 This function is to be called outside of "update/merge" case. For
479 477 example by a command like `hg remove X`.
480 478
481 479 return True the file was previously tracked, False otherwise.
482 480 """
483 481 ret = self._map.set_untracked(filename)
484 482 if ret:
485 483 self._dirty = True
486 484 return ret
487 485
488 486 @requires_no_parents_change
489 487 def set_clean(self, filename, parentfiledata):
490 488 """record that the current state of the file on disk is known to be clean"""
491 489 self._dirty = True
492 490 if not self._map[filename].tracked:
493 491 self._check_new_tracked_filename(filename)
494 492 (mode, size, mtime) = parentfiledata
495 493 self._map.set_clean(filename, mode, size, mtime)
496 if mtime > self._lastnormaltime:
497 # Remember the most recent modification timeslot for status(),
498 # to make sure we won't miss future size-preserving file content
499 # modifications that happen within the same timeslot.
500 self._lastnormaltime = mtime
501 494
502 495 @requires_no_parents_change
503 496 def set_possibly_dirty(self, filename):
504 497 """record that the current state of the file on disk is unknown"""
505 498 self._dirty = True
506 499 self._map.set_possibly_dirty(filename)
507 500
508 501 @requires_parents_change
509 502 def update_file_p1(
510 503 self,
511 504 filename,
512 505 p1_tracked,
513 506 ):
514 507 """Set a file as tracked in the parent (or not)
515 508
516 509 This is to be called when adjust the dirstate to a new parent after an history
517 510 rewriting operation.
518 511
519 512 It should not be called during a merge (p2 != nullid) and only within
520 513 a `with dirstate.parentchange():` context.
521 514 """
522 515 if self.in_merge:
523 516 msg = b'update_file_reference should not be called when merging'
524 517 raise error.ProgrammingError(msg)
525 518 entry = self._map.get(filename)
526 519 if entry is None:
527 520 wc_tracked = False
528 521 else:
529 522 wc_tracked = entry.tracked
530 523 if not (p1_tracked or wc_tracked):
531 524 # the file is no longer relevant to anyone
532 525 if self._map.get(filename) is not None:
533 526 self._map.reset_state(filename)
534 527 self._dirty = True
535 528 elif (not p1_tracked) and wc_tracked:
536 529 if entry is not None and entry.added:
537 530 return # avoid dropping copy information (maybe?)
538 531
539 532 self._map.reset_state(
540 533 filename,
541 534 wc_tracked,
542 535 p1_tracked,
543 536 # the underlying reference might have changed, we will have to
544 537 # check it.
545 538 has_meaningful_mtime=False,
546 539 )
547 540
548 541 @requires_parents_change
549 542 def update_file(
550 543 self,
551 544 filename,
552 545 wc_tracked,
553 546 p1_tracked,
554 547 p2_info=False,
555 548 possibly_dirty=False,
556 549 parentfiledata=None,
557 550 ):
558 551 """update the information about a file in the dirstate
559 552
560 553 This is to be called when the direstates parent changes to keep track
561 554 of what is the file situation in regards to the working copy and its parent.
562 555
563 556 This function must be called within a `dirstate.parentchange` context.
564 557
565 558 note: the API is at an early stage and we might need to adjust it
566 559 depending of what information ends up being relevant and useful to
567 560 other processing.
568 561 """
569 562
570 563 # note: I do not think we need to double check name clash here since we
571 564 # are in a update/merge case that should already have taken care of
572 565 # this. The test agrees
573 566
574 567 self._dirty = True
575 568
576 569 self._map.reset_state(
577 570 filename,
578 571 wc_tracked,
579 572 p1_tracked,
580 573 p2_info=p2_info,
581 574 has_meaningful_mtime=not possibly_dirty,
582 575 parentfiledata=parentfiledata,
583 576 )
584 if (
585 parentfiledata is not None
586 and parentfiledata[2] is not None
587 and parentfiledata[2] > self._lastnormaltime
588 ):
589 # Remember the most recent modification timeslot for status(),
590 # to make sure we won't miss future size-preserving file content
591 # modifications that happen within the same timeslot.
592 self._lastnormaltime = parentfiledata[2]
593 577
594 578 def _check_new_tracked_filename(self, filename):
595 579 scmutil.checkfilename(filename)
596 580 if self._map.hastrackeddir(filename):
597 581 msg = _(b'directory %r already in dirstate')
598 582 msg %= pycompat.bytestr(filename)
599 583 raise error.Abort(msg)
600 584 # shadows
601 585 for d in pathutil.finddirs(filename):
602 586 if self._map.hastrackeddir(d):
603 587 break
604 588 entry = self._map.get(d)
605 589 if entry is not None and not entry.removed:
606 590 msg = _(b'file %r in dirstate clashes with %r')
607 591 msg %= (pycompat.bytestr(d), pycompat.bytestr(filename))
608 592 raise error.Abort(msg)
609 593
610 594 def _get_filedata(self, filename):
611 595 """returns"""
612 596 s = os.lstat(self._join(filename))
613 597 mode = s.st_mode
614 598 size = s.st_size
615 599 mtime = timestamp.mtime_of(s)
616 600 return (mode, size, mtime)
617 601
618 602 def _discoverpath(self, path, normed, ignoremissing, exists, storemap):
619 603 if exists is None:
620 604 exists = os.path.lexists(os.path.join(self._root, path))
621 605 if not exists:
622 606 # Maybe a path component exists
623 607 if not ignoremissing and b'/' in path:
624 608 d, f = path.rsplit(b'/', 1)
625 609 d = self._normalize(d, False, ignoremissing, None)
626 610 folded = d + b"/" + f
627 611 else:
628 612 # No path components, preserve original case
629 613 folded = path
630 614 else:
631 615 # recursively normalize leading directory components
632 616 # against dirstate
633 617 if b'/' in normed:
634 618 d, f = normed.rsplit(b'/', 1)
635 619 d = self._normalize(d, False, ignoremissing, True)
636 620 r = self._root + b"/" + d
637 621 folded = d + b"/" + util.fspath(f, r)
638 622 else:
639 623 folded = util.fspath(normed, self._root)
640 624 storemap[normed] = folded
641 625
642 626 return folded
643 627
644 628 def _normalizefile(self, path, isknown, ignoremissing=False, exists=None):
645 629 normed = util.normcase(path)
646 630 folded = self._map.filefoldmap.get(normed, None)
647 631 if folded is None:
648 632 if isknown:
649 633 folded = path
650 634 else:
651 635 folded = self._discoverpath(
652 636 path, normed, ignoremissing, exists, self._map.filefoldmap
653 637 )
654 638 return folded
655 639
656 640 def _normalize(self, path, isknown, ignoremissing=False, exists=None):
657 641 normed = util.normcase(path)
658 642 folded = self._map.filefoldmap.get(normed, None)
659 643 if folded is None:
660 644 folded = self._map.dirfoldmap.get(normed, None)
661 645 if folded is None:
662 646 if isknown:
663 647 folded = path
664 648 else:
665 649 # store discovered result in dirfoldmap so that future
666 650 # normalizefile calls don't start matching directories
667 651 folded = self._discoverpath(
668 652 path, normed, ignoremissing, exists, self._map.dirfoldmap
669 653 )
670 654 return folded
671 655
672 656 def normalize(self, path, isknown=False, ignoremissing=False):
673 657 """
674 658 normalize the case of a pathname when on a casefolding filesystem
675 659
676 660 isknown specifies whether the filename came from walking the
677 661 disk, to avoid extra filesystem access.
678 662
679 663 If ignoremissing is True, missing path are returned
680 664 unchanged. Otherwise, we try harder to normalize possibly
681 665 existing path components.
682 666
683 667 The normalized case is determined based on the following precedence:
684 668
685 669 - version of name already stored in the dirstate
686 670 - version of name stored on disk
687 671 - version provided via command arguments
688 672 """
689 673
690 674 if self._checkcase:
691 675 return self._normalize(path, isknown, ignoremissing)
692 676 return path
693 677
694 678 def clear(self):
695 679 self._map.clear()
696 self._lastnormaltime = timestamp.zero()
697 680 self._dirty = True
698 681
699 682 def rebuild(self, parent, allfiles, changedfiles=None):
700 683 if changedfiles is None:
701 684 # Rebuild entire dirstate
702 685 to_lookup = allfiles
703 686 to_drop = []
704 lastnormaltime = self._lastnormaltime
705 687 self.clear()
706 self._lastnormaltime = lastnormaltime
707 688 elif len(changedfiles) < 10:
708 689 # Avoid turning allfiles into a set, which can be expensive if it's
709 690 # large.
710 691 to_lookup = []
711 692 to_drop = []
712 693 for f in changedfiles:
713 694 if f in allfiles:
714 695 to_lookup.append(f)
715 696 else:
716 697 to_drop.append(f)
717 698 else:
718 699 changedfilesset = set(changedfiles)
719 700 to_lookup = changedfilesset & set(allfiles)
720 701 to_drop = changedfilesset - to_lookup
721 702
722 703 if self._origpl is None:
723 704 self._origpl = self._pl
724 705 self._map.setparents(parent, self._nodeconstants.nullid)
725 706
726 707 for f in to_lookup:
727 708
728 709 if self.in_merge:
729 710 self.set_tracked(f)
730 711 else:
731 712 self._map.reset_state(
732 713 f,
733 714 wc_tracked=True,
734 715 p1_tracked=True,
735 716 )
736 717 for f in to_drop:
737 718 self._map.reset_state(f)
738 719
739 720 self._dirty = True
740 721
741 722 def identity(self):
742 723 """Return identity of dirstate itself to detect changing in storage
743 724
744 725 If identity of previous dirstate is equal to this, writing
745 726 changes based on the former dirstate out can keep consistency.
746 727 """
747 728 return self._map.identity
748 729
749 730 def write(self, tr):
750 731 if not self._dirty:
751 732 return
752 733
753 734 filename = self._filename
754 735 if tr:
755 736 # 'dirstate.write()' is not only for writing in-memory
756 737 # changes out, but also for dropping ambiguous timestamp.
757 738 # delayed writing re-raise "ambiguous timestamp issue".
758 739 # See also the wiki page below for detail:
759 740 # https://www.mercurial-scm.org/wiki/DirstateTransactionPlan
760 741
761 742 # record when mtime start to be ambiguous
762 743 now = timestamp.get_fs_now(self._opener)
763 744
764 745 # delay writing in-memory changes out
765 746 tr.addfilegenerator(
766 747 b'dirstate',
767 748 (self._filename,),
768 749 lambda f: self._writedirstate(tr, f, now=now),
769 750 location=b'plain',
770 751 )
771 752 return
772 753
773 754 st = self._opener(filename, b"w", atomictemp=True, checkambig=True)
774 755 self._writedirstate(tr, st)
775 756
776 757 def addparentchangecallback(self, category, callback):
777 758 """add a callback to be called when the wd parents are changed
778 759
779 760 Callback will be called with the following arguments:
780 761 dirstate, (oldp1, oldp2), (newp1, newp2)
781 762
782 763 Category is a unique identifier to allow overwriting an old callback
783 764 with a newer callback.
784 765 """
785 766 self._plchangecallbacks[category] = callback
786 767
787 768 def _writedirstate(self, tr, st, now=None):
788 769 # notify callbacks about parents change
789 770 if self._origpl is not None and self._origpl != self._pl:
790 771 for c, callback in sorted(
791 772 pycompat.iteritems(self._plchangecallbacks)
792 773 ):
793 774 callback(self, self._origpl, self._pl)
794 775 self._origpl = None
795 776
796 777 if now is None:
797 778 # use the modification time of the newly created temporary file as the
798 779 # filesystem's notion of 'now'
799 780 now = timestamp.mtime_of(util.fstat(st))
800 781
801 782 # enough 'delaywrite' prevents 'pack_dirstate' from dropping
802 783 # timestamp of each entries in dirstate, because of 'now > mtime'
803 784 delaywrite = self._ui.configint(b'debug', b'dirstate.delaywrite')
804 785 if delaywrite > 0:
805 786 # do we have any files to delay for?
806 787 for f, e in pycompat.iteritems(self._map):
807 788 if e.need_delay(now):
808 789 import time # to avoid useless import
809 790
810 791 # rather than sleep n seconds, sleep until the next
811 792 # multiple of n seconds
812 793 clock = time.time()
813 794 start = int(clock) - (int(clock) % delaywrite)
814 795 end = start + delaywrite
815 796 time.sleep(end - clock)
816 797 # trust our estimate that the end is near now
817 798 now = timestamp.timestamp((end, 0))
818 799 break
819 800
820 801 self._map.write(tr, st, now)
821 self._lastnormaltime = timestamp.zero()
822 802 self._dirty = False
823 803
824 804 def _dirignore(self, f):
825 805 if self._ignore(f):
826 806 return True
827 807 for p in pathutil.finddirs(f):
828 808 if self._ignore(p):
829 809 return True
830 810 return False
831 811
832 812 def _ignorefiles(self):
833 813 files = []
834 814 if os.path.exists(self._join(b'.hgignore')):
835 815 files.append(self._join(b'.hgignore'))
836 816 for name, path in self._ui.configitems(b"ui"):
837 817 if name == b'ignore' or name.startswith(b'ignore.'):
838 818 # we need to use os.path.join here rather than self._join
839 819 # because path is arbitrary and user-specified
840 820 files.append(os.path.join(self._rootdir, util.expandpath(path)))
841 821 return files
842 822
843 823 def _ignorefileandline(self, f):
844 824 files = collections.deque(self._ignorefiles())
845 825 visited = set()
846 826 while files:
847 827 i = files.popleft()
848 828 patterns = matchmod.readpatternfile(
849 829 i, self._ui.warn, sourceinfo=True
850 830 )
851 831 for pattern, lineno, line in patterns:
852 832 kind, p = matchmod._patsplit(pattern, b'glob')
853 833 if kind == b"subinclude":
854 834 if p not in visited:
855 835 files.append(p)
856 836 continue
857 837 m = matchmod.match(
858 838 self._root, b'', [], [pattern], warn=self._ui.warn
859 839 )
860 840 if m(f):
861 841 return (i, lineno, line)
862 842 visited.add(i)
863 843 return (None, -1, b"")
864 844
865 845 def _walkexplicit(self, match, subrepos):
866 846 """Get stat data about the files explicitly specified by match.
867 847
868 848 Return a triple (results, dirsfound, dirsnotfound).
869 849 - results is a mapping from filename to stat result. It also contains
870 850 listings mapping subrepos and .hg to None.
871 851 - dirsfound is a list of files found to be directories.
872 852 - dirsnotfound is a list of files that the dirstate thinks are
873 853 directories and that were not found."""
874 854
875 855 def badtype(mode):
876 856 kind = _(b'unknown')
877 857 if stat.S_ISCHR(mode):
878 858 kind = _(b'character device')
879 859 elif stat.S_ISBLK(mode):
880 860 kind = _(b'block device')
881 861 elif stat.S_ISFIFO(mode):
882 862 kind = _(b'fifo')
883 863 elif stat.S_ISSOCK(mode):
884 864 kind = _(b'socket')
885 865 elif stat.S_ISDIR(mode):
886 866 kind = _(b'directory')
887 867 return _(b'unsupported file type (type is %s)') % kind
888 868
889 869 badfn = match.bad
890 870 dmap = self._map
891 871 lstat = os.lstat
892 872 getkind = stat.S_IFMT
893 873 dirkind = stat.S_IFDIR
894 874 regkind = stat.S_IFREG
895 875 lnkkind = stat.S_IFLNK
896 876 join = self._join
897 877 dirsfound = []
898 878 foundadd = dirsfound.append
899 879 dirsnotfound = []
900 880 notfoundadd = dirsnotfound.append
901 881
902 882 if not match.isexact() and self._checkcase:
903 883 normalize = self._normalize
904 884 else:
905 885 normalize = None
906 886
907 887 files = sorted(match.files())
908 888 subrepos.sort()
909 889 i, j = 0, 0
910 890 while i < len(files) and j < len(subrepos):
911 891 subpath = subrepos[j] + b"/"
912 892 if files[i] < subpath:
913 893 i += 1
914 894 continue
915 895 while i < len(files) and files[i].startswith(subpath):
916 896 del files[i]
917 897 j += 1
918 898
919 899 if not files or b'' in files:
920 900 files = [b'']
921 901 # constructing the foldmap is expensive, so don't do it for the
922 902 # common case where files is ['']
923 903 normalize = None
924 904 results = dict.fromkeys(subrepos)
925 905 results[b'.hg'] = None
926 906
927 907 for ff in files:
928 908 if normalize:
929 909 nf = normalize(ff, False, True)
930 910 else:
931 911 nf = ff
932 912 if nf in results:
933 913 continue
934 914
935 915 try:
936 916 st = lstat(join(nf))
937 917 kind = getkind(st.st_mode)
938 918 if kind == dirkind:
939 919 if nf in dmap:
940 920 # file replaced by dir on disk but still in dirstate
941 921 results[nf] = None
942 922 foundadd((nf, ff))
943 923 elif kind == regkind or kind == lnkkind:
944 924 results[nf] = st
945 925 else:
946 926 badfn(ff, badtype(kind))
947 927 if nf in dmap:
948 928 results[nf] = None
949 929 except OSError as inst: # nf not found on disk - it is dirstate only
950 930 if nf in dmap: # does it exactly match a missing file?
951 931 results[nf] = None
952 932 else: # does it match a missing directory?
953 933 if self._map.hasdir(nf):
954 934 notfoundadd(nf)
955 935 else:
956 936 badfn(ff, encoding.strtolocal(inst.strerror))
957 937
958 938 # match.files() may contain explicitly-specified paths that shouldn't
959 939 # be taken; drop them from the list of files found. dirsfound/notfound
960 940 # aren't filtered here because they will be tested later.
961 941 if match.anypats():
962 942 for f in list(results):
963 943 if f == b'.hg' or f in subrepos:
964 944 # keep sentinel to disable further out-of-repo walks
965 945 continue
966 946 if not match(f):
967 947 del results[f]
968 948
969 949 # Case insensitive filesystems cannot rely on lstat() failing to detect
970 950 # a case-only rename. Prune the stat object for any file that does not
971 951 # match the case in the filesystem, if there are multiple files that
972 952 # normalize to the same path.
973 953 if match.isexact() and self._checkcase:
974 954 normed = {}
975 955
976 956 for f, st in pycompat.iteritems(results):
977 957 if st is None:
978 958 continue
979 959
980 960 nc = util.normcase(f)
981 961 paths = normed.get(nc)
982 962
983 963 if paths is None:
984 964 paths = set()
985 965 normed[nc] = paths
986 966
987 967 paths.add(f)
988 968
989 969 for norm, paths in pycompat.iteritems(normed):
990 970 if len(paths) > 1:
991 971 for path in paths:
992 972 folded = self._discoverpath(
993 973 path, norm, True, None, self._map.dirfoldmap
994 974 )
995 975 if path != folded:
996 976 results[path] = None
997 977
998 978 return results, dirsfound, dirsnotfound
999 979
1000 980 def walk(self, match, subrepos, unknown, ignored, full=True):
1001 981 """
1002 982 Walk recursively through the directory tree, finding all files
1003 983 matched by match.
1004 984
1005 985 If full is False, maybe skip some known-clean files.
1006 986
1007 987 Return a dict mapping filename to stat-like object (either
1008 988 mercurial.osutil.stat instance or return value of os.stat()).
1009 989
1010 990 """
1011 991 # full is a flag that extensions that hook into walk can use -- this
1012 992 # implementation doesn't use it at all. This satisfies the contract
1013 993 # because we only guarantee a "maybe".
1014 994
1015 995 if ignored:
1016 996 ignore = util.never
1017 997 dirignore = util.never
1018 998 elif unknown:
1019 999 ignore = self._ignore
1020 1000 dirignore = self._dirignore
1021 1001 else:
1022 1002 # if not unknown and not ignored, drop dir recursion and step 2
1023 1003 ignore = util.always
1024 1004 dirignore = util.always
1025 1005
1026 1006 matchfn = match.matchfn
1027 1007 matchalways = match.always()
1028 1008 matchtdir = match.traversedir
1029 1009 dmap = self._map
1030 1010 listdir = util.listdir
1031 1011 lstat = os.lstat
1032 1012 dirkind = stat.S_IFDIR
1033 1013 regkind = stat.S_IFREG
1034 1014 lnkkind = stat.S_IFLNK
1035 1015 join = self._join
1036 1016
1037 1017 exact = skipstep3 = False
1038 1018 if match.isexact(): # match.exact
1039 1019 exact = True
1040 1020 dirignore = util.always # skip step 2
1041 1021 elif match.prefix(): # match.match, no patterns
1042 1022 skipstep3 = True
1043 1023
1044 1024 if not exact and self._checkcase:
1045 1025 normalize = self._normalize
1046 1026 normalizefile = self._normalizefile
1047 1027 skipstep3 = False
1048 1028 else:
1049 1029 normalize = self._normalize
1050 1030 normalizefile = None
1051 1031
1052 1032 # step 1: find all explicit files
1053 1033 results, work, dirsnotfound = self._walkexplicit(match, subrepos)
1054 1034 if matchtdir:
1055 1035 for d in work:
1056 1036 matchtdir(d[0])
1057 1037 for d in dirsnotfound:
1058 1038 matchtdir(d)
1059 1039
1060 1040 skipstep3 = skipstep3 and not (work or dirsnotfound)
1061 1041 work = [d for d in work if not dirignore(d[0])]
1062 1042
1063 1043 # step 2: visit subdirectories
1064 1044 def traverse(work, alreadynormed):
1065 1045 wadd = work.append
1066 1046 while work:
1067 1047 tracing.counter('dirstate.walk work', len(work))
1068 1048 nd = work.pop()
1069 1049 visitentries = match.visitchildrenset(nd)
1070 1050 if not visitentries:
1071 1051 continue
1072 1052 if visitentries == b'this' or visitentries == b'all':
1073 1053 visitentries = None
1074 1054 skip = None
1075 1055 if nd != b'':
1076 1056 skip = b'.hg'
1077 1057 try:
1078 1058 with tracing.log('dirstate.walk.traverse listdir %s', nd):
1079 1059 entries = listdir(join(nd), stat=True, skip=skip)
1080 1060 except OSError as inst:
1081 1061 if inst.errno in (errno.EACCES, errno.ENOENT):
1082 1062 match.bad(
1083 1063 self.pathto(nd), encoding.strtolocal(inst.strerror)
1084 1064 )
1085 1065 continue
1086 1066 raise
1087 1067 for f, kind, st in entries:
1088 1068 # Some matchers may return files in the visitentries set,
1089 1069 # instead of 'this', if the matcher explicitly mentions them
1090 1070 # and is not an exactmatcher. This is acceptable; we do not
1091 1071 # make any hard assumptions about file-or-directory below
1092 1072 # based on the presence of `f` in visitentries. If
1093 1073 # visitchildrenset returned a set, we can always skip the
1094 1074 # entries *not* in the set it provided regardless of whether
1095 1075 # they're actually a file or a directory.
1096 1076 if visitentries and f not in visitentries:
1097 1077 continue
1098 1078 if normalizefile:
1099 1079 # even though f might be a directory, we're only
1100 1080 # interested in comparing it to files currently in the
1101 1081 # dmap -- therefore normalizefile is enough
1102 1082 nf = normalizefile(
1103 1083 nd and (nd + b"/" + f) or f, True, True
1104 1084 )
1105 1085 else:
1106 1086 nf = nd and (nd + b"/" + f) or f
1107 1087 if nf not in results:
1108 1088 if kind == dirkind:
1109 1089 if not ignore(nf):
1110 1090 if matchtdir:
1111 1091 matchtdir(nf)
1112 1092 wadd(nf)
1113 1093 if nf in dmap and (matchalways or matchfn(nf)):
1114 1094 results[nf] = None
1115 1095 elif kind == regkind or kind == lnkkind:
1116 1096 if nf in dmap:
1117 1097 if matchalways or matchfn(nf):
1118 1098 results[nf] = st
1119 1099 elif (matchalways or matchfn(nf)) and not ignore(
1120 1100 nf
1121 1101 ):
1122 1102 # unknown file -- normalize if necessary
1123 1103 if not alreadynormed:
1124 1104 nf = normalize(nf, False, True)
1125 1105 results[nf] = st
1126 1106 elif nf in dmap and (matchalways or matchfn(nf)):
1127 1107 results[nf] = None
1128 1108
1129 1109 for nd, d in work:
1130 1110 # alreadynormed means that processwork doesn't have to do any
1131 1111 # expensive directory normalization
1132 1112 alreadynormed = not normalize or nd == d
1133 1113 traverse([d], alreadynormed)
1134 1114
1135 1115 for s in subrepos:
1136 1116 del results[s]
1137 1117 del results[b'.hg']
1138 1118
1139 1119 # step 3: visit remaining files from dmap
1140 1120 if not skipstep3 and not exact:
1141 1121 # If a dmap file is not in results yet, it was either
1142 1122 # a) not matching matchfn b) ignored, c) missing, or d) under a
1143 1123 # symlink directory.
1144 1124 if not results and matchalways:
1145 1125 visit = [f for f in dmap]
1146 1126 else:
1147 1127 visit = [f for f in dmap if f not in results and matchfn(f)]
1148 1128 visit.sort()
1149 1129
1150 1130 if unknown:
1151 1131 # unknown == True means we walked all dirs under the roots
1152 1132 # that wasn't ignored, and everything that matched was stat'ed
1153 1133 # and is already in results.
1154 1134 # The rest must thus be ignored or under a symlink.
1155 1135 audit_path = pathutil.pathauditor(self._root, cached=True)
1156 1136
1157 1137 for nf in iter(visit):
1158 1138 # If a stat for the same file was already added with a
1159 1139 # different case, don't add one for this, since that would
1160 1140 # make it appear as if the file exists under both names
1161 1141 # on disk.
1162 1142 if (
1163 1143 normalizefile
1164 1144 and normalizefile(nf, True, True) in results
1165 1145 ):
1166 1146 results[nf] = None
1167 1147 # Report ignored items in the dmap as long as they are not
1168 1148 # under a symlink directory.
1169 1149 elif audit_path.check(nf):
1170 1150 try:
1171 1151 results[nf] = lstat(join(nf))
1172 1152 # file was just ignored, no links, and exists
1173 1153 except OSError:
1174 1154 # file doesn't exist
1175 1155 results[nf] = None
1176 1156 else:
1177 1157 # It's either missing or under a symlink directory
1178 1158 # which we in this case report as missing
1179 1159 results[nf] = None
1180 1160 else:
1181 1161 # We may not have walked the full directory tree above,
1182 1162 # so stat and check everything we missed.
1183 1163 iv = iter(visit)
1184 1164 for st in util.statfiles([join(i) for i in visit]):
1185 1165 results[next(iv)] = st
1186 1166 return results
1187 1167
1188 1168 def _rust_status(self, matcher, list_clean, list_ignored, list_unknown):
1189 1169 # Force Rayon (Rust parallelism library) to respect the number of
1190 1170 # workers. This is a temporary workaround until Rust code knows
1191 1171 # how to read the config file.
1192 1172 numcpus = self._ui.configint(b"worker", b"numcpus")
1193 1173 if numcpus is not None:
1194 1174 encoding.environ.setdefault(b'RAYON_NUM_THREADS', b'%d' % numcpus)
1195 1175
1196 1176 workers_enabled = self._ui.configbool(b"worker", b"enabled", True)
1197 1177 if not workers_enabled:
1198 1178 encoding.environ[b"RAYON_NUM_THREADS"] = b"1"
1199 1179
1200 1180 (
1201 1181 lookup,
1202 1182 modified,
1203 1183 added,
1204 1184 removed,
1205 1185 deleted,
1206 1186 clean,
1207 1187 ignored,
1208 1188 unknown,
1209 1189 warnings,
1210 1190 bad,
1211 1191 traversed,
1212 1192 dirty,
1213 1193 ) = rustmod.status(
1214 1194 self._map._map,
1215 1195 matcher,
1216 1196 self._rootdir,
1217 1197 self._ignorefiles(),
1218 1198 self._checkexec,
1219 self._lastnormaltime,
1220 1199 bool(list_clean),
1221 1200 bool(list_ignored),
1222 1201 bool(list_unknown),
1223 1202 bool(matcher.traversedir),
1224 1203 )
1225 1204
1226 1205 self._dirty |= dirty
1227 1206
1228 1207 if matcher.traversedir:
1229 1208 for dir in traversed:
1230 1209 matcher.traversedir(dir)
1231 1210
1232 1211 if self._ui.warn:
1233 1212 for item in warnings:
1234 1213 if isinstance(item, tuple):
1235 1214 file_path, syntax = item
1236 1215 msg = _(b"%s: ignoring invalid syntax '%s'\n") % (
1237 1216 file_path,
1238 1217 syntax,
1239 1218 )
1240 1219 self._ui.warn(msg)
1241 1220 else:
1242 1221 msg = _(b"skipping unreadable pattern file '%s': %s\n")
1243 1222 self._ui.warn(
1244 1223 msg
1245 1224 % (
1246 1225 pathutil.canonpath(
1247 1226 self._rootdir, self._rootdir, item
1248 1227 ),
1249 1228 b"No such file or directory",
1250 1229 )
1251 1230 )
1252 1231
1253 1232 for (fn, message) in bad:
1254 1233 matcher.bad(fn, encoding.strtolocal(message))
1255 1234
1256 1235 status = scmutil.status(
1257 1236 modified=modified,
1258 1237 added=added,
1259 1238 removed=removed,
1260 1239 deleted=deleted,
1261 1240 unknown=unknown,
1262 1241 ignored=ignored,
1263 1242 clean=clean,
1264 1243 )
1265 1244 return (lookup, status)
1266 1245
1267 1246 def status(self, match, subrepos, ignored, clean, unknown):
1268 1247 """Determine the status of the working copy relative to the
1269 1248 dirstate and return a pair of (unsure, status), where status is of type
1270 1249 scmutil.status and:
1271 1250
1272 1251 unsure:
1273 1252 files that might have been modified since the dirstate was
1274 1253 written, but need to be read to be sure (size is the same
1275 1254 but mtime differs)
1276 1255 status.modified:
1277 1256 files that have definitely been modified since the dirstate
1278 1257 was written (different size or mode)
1279 1258 status.clean:
1280 1259 files that have definitely not been modified since the
1281 1260 dirstate was written
1282 1261 """
1283 1262 listignored, listclean, listunknown = ignored, clean, unknown
1284 1263 lookup, modified, added, unknown, ignored = [], [], [], [], []
1285 1264 removed, deleted, clean = [], [], []
1286 1265
1287 1266 dmap = self._map
1288 1267 dmap.preload()
1289 1268
1290 1269 use_rust = True
1291 1270
1292 1271 allowed_matchers = (
1293 1272 matchmod.alwaysmatcher,
1294 1273 matchmod.exactmatcher,
1295 1274 matchmod.includematcher,
1296 1275 )
1297 1276
1298 1277 if rustmod is None:
1299 1278 use_rust = False
1300 1279 elif self._checkcase:
1301 1280 # Case-insensitive filesystems are not handled yet
1302 1281 use_rust = False
1303 1282 elif subrepos:
1304 1283 use_rust = False
1305 1284 elif sparse.enabled:
1306 1285 use_rust = False
1307 1286 elif not isinstance(match, allowed_matchers):
1308 1287 # Some matchers have yet to be implemented
1309 1288 use_rust = False
1310 1289
1311 1290 # Get the time from the filesystem so we can disambiguate files that
1312 1291 # appear modified in the present or future.
1313 1292 try:
1314 1293 mtime_boundary = timestamp.get_fs_now(self._opener)
1315 1294 except OSError:
1316 1295 # In largefiles or readonly context
1317 1296 mtime_boundary = None
1318 1297
1319 1298 if use_rust:
1320 1299 try:
1321 1300 res = self._rust_status(
1322 1301 match, listclean, listignored, listunknown
1323 1302 )
1324 1303 return res + (mtime_boundary,)
1325 1304 except rustmod.FallbackError:
1326 1305 pass
1327 1306
1328 1307 def noop(f):
1329 1308 pass
1330 1309
1331 1310 dcontains = dmap.__contains__
1332 1311 dget = dmap.__getitem__
1333 1312 ladd = lookup.append # aka "unsure"
1334 1313 madd = modified.append
1335 1314 aadd = added.append
1336 1315 uadd = unknown.append if listunknown else noop
1337 1316 iadd = ignored.append if listignored else noop
1338 1317 radd = removed.append
1339 1318 dadd = deleted.append
1340 1319 cadd = clean.append if listclean else noop
1341 1320 mexact = match.exact
1342 1321 dirignore = self._dirignore
1343 1322 checkexec = self._checkexec
1344 1323 checklink = self._checklink
1345 1324 copymap = self._map.copymap
1346 lastnormaltime = self._lastnormaltime
1347 1325
1348 1326 # We need to do full walks when either
1349 1327 # - we're listing all clean files, or
1350 1328 # - match.traversedir does something, because match.traversedir should
1351 1329 # be called for every dir in the working dir
1352 1330 full = listclean or match.traversedir is not None
1353 1331 for fn, st in pycompat.iteritems(
1354 1332 self.walk(match, subrepos, listunknown, listignored, full=full)
1355 1333 ):
1356 1334 if not dcontains(fn):
1357 1335 if (listignored or mexact(fn)) and dirignore(fn):
1358 1336 if listignored:
1359 1337 iadd(fn)
1360 1338 else:
1361 1339 uadd(fn)
1362 1340 continue
1363 1341
1364 1342 t = dget(fn)
1365 1343 mode = t.mode
1366 1344 size = t.size
1367 1345
1368 1346 if not st and t.tracked:
1369 1347 dadd(fn)
1370 1348 elif t.p2_info:
1371 1349 madd(fn)
1372 1350 elif t.added:
1373 1351 aadd(fn)
1374 1352 elif t.removed:
1375 1353 radd(fn)
1376 1354 elif t.tracked:
1377 1355 if not checklink and t.has_fallback_symlink:
1378 1356 # If the file system does not support symlink, the mode
1379 1357 # might not be correctly stored in the dirstate, so do not
1380 1358 # trust it.
1381 1359 ladd(fn)
1382 1360 elif not checkexec and t.has_fallback_exec:
1383 1361 # If the file system does not support exec bits, the mode
1384 1362 # might not be correctly stored in the dirstate, so do not
1385 1363 # trust it.
1386 1364 ladd(fn)
1387 1365 elif (
1388 1366 size >= 0
1389 1367 and (
1390 1368 (size != st.st_size and size != st.st_size & _rangemask)
1391 1369 or ((mode ^ st.st_mode) & 0o100 and checkexec)
1392 1370 )
1393 1371 or fn in copymap
1394 1372 ):
1395 1373 if stat.S_ISLNK(st.st_mode) and size != st.st_size:
1396 1374 # issue6456: Size returned may be longer due to
1397 1375 # encryption on EXT-4 fscrypt, undecided.
1398 1376 ladd(fn)
1399 1377 else:
1400 1378 madd(fn)
1401 1379 elif not t.mtime_likely_equal_to(timestamp.mtime_of(st)):
1402 ladd(fn)
1403 elif timestamp.mtime_of(st) == lastnormaltime:
1404 # fn may have just been marked as normal and it may have
1405 # changed in the same second without changing its size.
1406 # This can happen if we quickly do multiple commits.
1407 # Force lookup, so we don't miss such a racy file change.
1380 # There might be a change in the future if for example the
1381 # internal clock is off, but this is a case where the issues
1382 # the user would face would be a lot worse and there is
1383 # nothing we can really do.
1408 1384 ladd(fn)
1409 1385 elif listclean:
1410 1386 cadd(fn)
1411 1387 status = scmutil.status(
1412 1388 modified, added, removed, deleted, unknown, ignored, clean
1413 1389 )
1414 1390 return (lookup, status, mtime_boundary)
1415 1391
1416 1392 def matches(self, match):
1417 1393 """
1418 1394 return files in the dirstate (in whatever state) filtered by match
1419 1395 """
1420 1396 dmap = self._map
1421 1397 if rustmod is not None:
1422 1398 dmap = self._map._map
1423 1399
1424 1400 if match.always():
1425 1401 return dmap.keys()
1426 1402 files = match.files()
1427 1403 if match.isexact():
1428 1404 # fast path -- filter the other way around, since typically files is
1429 1405 # much smaller than dmap
1430 1406 return [f for f in files if f in dmap]
1431 1407 if match.prefix() and all(fn in dmap for fn in files):
1432 1408 # fast path -- all the values are known to be files, so just return
1433 1409 # that
1434 1410 return list(files)
1435 1411 return [f for f in dmap if match(f)]
1436 1412
1437 1413 def _actualfilename(self, tr):
1438 1414 if tr:
1439 1415 return self._pendingfilename
1440 1416 else:
1441 1417 return self._filename
1442 1418
1443 1419 def savebackup(self, tr, backupname):
1444 1420 '''Save current dirstate into backup file'''
1445 1421 filename = self._actualfilename(tr)
1446 1422 assert backupname != filename
1447 1423
1448 1424 # use '_writedirstate' instead of 'write' to write changes certainly,
1449 1425 # because the latter omits writing out if transaction is running.
1450 1426 # output file will be used to create backup of dirstate at this point.
1451 1427 if self._dirty or not self._opener.exists(filename):
1452 1428 self._writedirstate(
1453 1429 tr,
1454 1430 self._opener(filename, b"w", atomictemp=True, checkambig=True),
1455 1431 )
1456 1432
1457 1433 if tr:
1458 1434 # ensure that subsequent tr.writepending returns True for
1459 1435 # changes written out above, even if dirstate is never
1460 1436 # changed after this
1461 1437 tr.addfilegenerator(
1462 1438 b'dirstate',
1463 1439 (self._filename,),
1464 1440 lambda f: self._writedirstate(tr, f),
1465 1441 location=b'plain',
1466 1442 )
1467 1443
1468 1444 # ensure that pending file written above is unlinked at
1469 1445 # failure, even if tr.writepending isn't invoked until the
1470 1446 # end of this transaction
1471 1447 tr.registertmp(filename, location=b'plain')
1472 1448
1473 1449 self._opener.tryunlink(backupname)
1474 1450 # hardlink backup is okay because _writedirstate is always called
1475 1451 # with an "atomictemp=True" file.
1476 1452 util.copyfile(
1477 1453 self._opener.join(filename),
1478 1454 self._opener.join(backupname),
1479 1455 hardlink=True,
1480 1456 )
1481 1457
1482 1458 def restorebackup(self, tr, backupname):
1483 1459 '''Restore dirstate by backup file'''
1484 1460 # this "invalidate()" prevents "wlock.release()" from writing
1485 1461 # changes of dirstate out after restoring from backup file
1486 1462 self.invalidate()
1487 1463 filename = self._actualfilename(tr)
1488 1464 o = self._opener
1489 1465 if util.samefile(o.join(backupname), o.join(filename)):
1490 1466 o.unlink(backupname)
1491 1467 else:
1492 1468 o.rename(backupname, filename, checkambig=True)
1493 1469
1494 1470 def clearbackup(self, tr, backupname):
1495 1471 '''Clear backup file'''
1496 1472 self._opener.unlink(backupname)
1497 1473
1498 1474 def verify(self, m1, m2):
1499 1475 """check the dirstate content again the parent manifest and yield errors"""
1500 1476 missing_from_p1 = b"%s in state %s, but not in manifest1\n"
1501 1477 unexpected_in_p1 = b"%s in state %s, but also in manifest1\n"
1502 1478 missing_from_ps = b"%s in state %s, but not in either manifest\n"
1503 1479 missing_from_ds = b"%s in manifest1, but listed as state %s\n"
1504 1480 for f, entry in self.items():
1505 1481 state = entry.state
1506 1482 if state in b"nr" and f not in m1:
1507 1483 yield (missing_from_p1, f, state)
1508 1484 if state in b"a" and f in m1:
1509 1485 yield (unexpected_in_p1, f, state)
1510 1486 if state in b"m" and f not in m1 and f not in m2:
1511 1487 yield (missing_from_ps, f, state)
1512 1488 for f in m1:
1513 1489 state = self.get_entry(f).state
1514 1490 if state not in b"nrm":
1515 1491 yield (missing_from_ds, f, state)
@@ -1,147 +1,142 b''
1 1 // status.rs
2 2 //
3 3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Rust implementation of dirstate.status (dirstate.py).
9 9 //! It is currently missing a lot of functionality compared to the Python one
10 10 //! and will only be triggered in narrow cases.
11 11
12 12 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
13 13
14 14 use crate::{
15 dirstate::TruncatedTimestamp,
16 15 utils::hg_path::{HgPath, HgPathError},
17 16 PatternError,
18 17 };
19 18
20 19 use std::{borrow::Cow, fmt};
21 20
22 21 /// Wrong type of file from a `BadMatch`
23 22 /// Note: a lot of those don't exist on all platforms.
24 23 #[derive(Debug, Copy, Clone)]
25 24 pub enum BadType {
26 25 CharacterDevice,
27 26 BlockDevice,
28 27 FIFO,
29 28 Socket,
30 29 Directory,
31 30 Unknown,
32 31 }
33 32
34 33 impl fmt::Display for BadType {
35 34 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
36 35 f.write_str(match self {
37 36 BadType::CharacterDevice => "character device",
38 37 BadType::BlockDevice => "block device",
39 38 BadType::FIFO => "fifo",
40 39 BadType::Socket => "socket",
41 40 BadType::Directory => "directory",
42 41 BadType::Unknown => "unknown",
43 42 })
44 43 }
45 44 }
46 45
47 46 /// Was explicitly matched but cannot be found/accessed
48 47 #[derive(Debug, Copy, Clone)]
49 48 pub enum BadMatch {
50 49 OsError(i32),
51 50 BadType(BadType),
52 51 }
53 52
54 53 /// `Box<dyn Trait>` is syntactic sugar for `Box<dyn Trait + 'static>`, so add
55 54 /// an explicit lifetime here to not fight `'static` bounds "out of nowhere".
56 55 pub type IgnoreFnType<'a> =
57 56 Box<dyn for<'r> Fn(&'r HgPath) -> bool + Sync + 'a>;
58 57
59 58 /// We have a good mix of owned (from directory traversal) and borrowed (from
60 59 /// the dirstate/explicit) paths, this comes up a lot.
61 60 pub type HgPathCow<'a> = Cow<'a, HgPath>;
62 61
63 62 #[derive(Debug, Copy, Clone)]
64 63 pub struct StatusOptions {
65 /// Remember the most recent modification timeslot for status, to make
66 /// sure we won't miss future size-preserving file content modifications
67 /// that happen within the same timeslot.
68 pub last_normal_time: TruncatedTimestamp,
69 64 /// Whether we are on a filesystem with UNIX-like exec flags
70 65 pub check_exec: bool,
71 66 pub list_clean: bool,
72 67 pub list_unknown: bool,
73 68 pub list_ignored: bool,
74 69 /// Whether to collect traversed dirs for applying a callback later.
75 70 /// Used by `hg purge` for example.
76 71 pub collect_traversed_dirs: bool,
77 72 }
78 73
79 74 #[derive(Debug, Default)]
80 75 pub struct DirstateStatus<'a> {
81 76 /// Tracked files whose contents have changed since the parent revision
82 77 pub modified: Vec<HgPathCow<'a>>,
83 78
84 79 /// Newly-tracked files that were not present in the parent
85 80 pub added: Vec<HgPathCow<'a>>,
86 81
87 82 /// Previously-tracked files that have been (re)moved with an hg command
88 83 pub removed: Vec<HgPathCow<'a>>,
89 84
90 85 /// (Still) tracked files that are missing, (re)moved with an non-hg
91 86 /// command
92 87 pub deleted: Vec<HgPathCow<'a>>,
93 88
94 89 /// Tracked files that are up to date with the parent.
95 90 /// Only pupulated if `StatusOptions::list_clean` is true.
96 91 pub clean: Vec<HgPathCow<'a>>,
97 92
98 93 /// Files in the working directory that are ignored with `.hgignore`.
99 94 /// Only pupulated if `StatusOptions::list_ignored` is true.
100 95 pub ignored: Vec<HgPathCow<'a>>,
101 96
102 97 /// Files in the working directory that are neither tracked nor ignored.
103 98 /// Only pupulated if `StatusOptions::list_unknown` is true.
104 99 pub unknown: Vec<HgPathCow<'a>>,
105 100
106 101 /// Was explicitly matched but cannot be found/accessed
107 102 pub bad: Vec<(HgPathCow<'a>, BadMatch)>,
108 103
109 104 /// Either clean or modified, but we can’t tell from filesystem metadata
110 105 /// alone. The file contents need to be read and compared with that in
111 106 /// the parent.
112 107 pub unsure: Vec<HgPathCow<'a>>,
113 108
114 109 /// Only filled if `collect_traversed_dirs` is `true`
115 110 pub traversed: Vec<HgPathCow<'a>>,
116 111
117 112 /// Whether `status()` made changed to the `DirstateMap` that should be
118 113 /// written back to disk
119 114 pub dirty: bool,
120 115 }
121 116
122 117 #[derive(Debug, derive_more::From)]
123 118 pub enum StatusError {
124 119 /// Generic IO error
125 120 IO(std::io::Error),
126 121 /// An invalid path that cannot be represented in Mercurial was found
127 122 Path(HgPathError),
128 123 /// An invalid "ignore" pattern was found
129 124 Pattern(PatternError),
130 125 /// Corrupted dirstate
131 126 DirstateV2ParseError(DirstateV2ParseError),
132 127 }
133 128
134 129 pub type StatusResult<T> = Result<T, StatusError>;
135 130
136 131 impl fmt::Display for StatusError {
137 132 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
138 133 match self {
139 134 StatusError::IO(error) => error.fmt(f),
140 135 StatusError::Path(error) => error.fmt(f),
141 136 StatusError::Pattern(error) => error.fmt(f),
142 137 StatusError::DirstateV2ParseError(_) => {
143 138 f.write_str("dirstate-v2 parse error")
144 139 }
145 140 }
146 141 }
147 142 }
@@ -1,756 +1,760 b''
1 1 use crate::dirstate::entry::TruncatedTimestamp;
2 2 use crate::dirstate::status::IgnoreFnType;
3 3 use crate::dirstate_tree::dirstate_map::BorrowedPath;
4 4 use crate::dirstate_tree::dirstate_map::ChildNodesRef;
5 5 use crate::dirstate_tree::dirstate_map::DirstateMap;
6 6 use crate::dirstate_tree::dirstate_map::NodeData;
7 7 use crate::dirstate_tree::dirstate_map::NodeRef;
8 8 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
9 9 use crate::matchers::get_ignore_function;
10 10 use crate::matchers::Matcher;
11 11 use crate::utils::files::get_bytes_from_os_string;
12 12 use crate::utils::files::get_path_from_bytes;
13 13 use crate::utils::hg_path::HgPath;
14 14 use crate::BadMatch;
15 15 use crate::DirstateStatus;
16 16 use crate::EntryState;
17 17 use crate::HgPathBuf;
18 18 use crate::PatternFileWarning;
19 19 use crate::StatusError;
20 20 use crate::StatusOptions;
21 21 use micro_timer::timed;
22 22 use rayon::prelude::*;
23 23 use sha1::{Digest, Sha1};
24 24 use std::borrow::Cow;
25 25 use std::io;
26 26 use std::path::Path;
27 27 use std::path::PathBuf;
28 28 use std::sync::Mutex;
29 29 use std::time::SystemTime;
30 30
31 31 /// Returns the status of the working directory compared to its parent
32 32 /// changeset.
33 33 ///
34 34 /// This algorithm is based on traversing the filesystem tree (`fs` in function
35 35 /// and variable names) and dirstate tree at the same time. The core of this
36 36 /// traversal is the recursive `traverse_fs_directory_and_dirstate` function
37 37 /// and its use of `itertools::merge_join_by`. When reaching a path that only
38 38 /// exists in one of the two trees, depending on information requested by
39 39 /// `options` we may need to traverse the remaining subtree.
40 40 #[timed]
41 41 pub fn status<'tree, 'on_disk: 'tree>(
42 42 dmap: &'tree mut DirstateMap<'on_disk>,
43 43 matcher: &(dyn Matcher + Sync),
44 44 root_dir: PathBuf,
45 45 ignore_files: Vec<PathBuf>,
46 46 options: StatusOptions,
47 47 ) -> Result<(DirstateStatus<'on_disk>, Vec<PatternFileWarning>), StatusError> {
48 48 let (ignore_fn, warnings, patterns_changed): (IgnoreFnType, _, _) =
49 49 if options.list_ignored || options.list_unknown {
50 50 let mut hasher = Sha1::new();
51 51 let (ignore_fn, warnings) = get_ignore_function(
52 52 ignore_files,
53 53 &root_dir,
54 54 &mut |pattern_bytes| hasher.update(pattern_bytes),
55 55 )?;
56 56 let new_hash = *hasher.finalize().as_ref();
57 57 let changed = new_hash != dmap.ignore_patterns_hash;
58 58 dmap.ignore_patterns_hash = new_hash;
59 59 (ignore_fn, warnings, Some(changed))
60 60 } else {
61 61 (Box::new(|&_| true), vec![], None)
62 62 };
63 63
64 64 let common = StatusCommon {
65 65 dmap,
66 66 options,
67 67 matcher,
68 68 ignore_fn,
69 69 outcome: Default::default(),
70 70 ignore_patterns_have_changed: patterns_changed,
71 71 new_cachable_directories: Default::default(),
72 72 outated_cached_directories: Default::default(),
73 73 filesystem_time_at_status_start: filesystem_now(&root_dir).ok(),
74 74 };
75 75 let is_at_repo_root = true;
76 76 let hg_path = &BorrowedPath::OnDisk(HgPath::new(""));
77 77 let has_ignored_ancestor = false;
78 78 let root_cached_mtime = None;
79 79 let root_dir_metadata = None;
80 80 // If the path we have for the repository root is a symlink, do follow it.
81 81 // (As opposed to symlinks within the working directory which are not
82 82 // followed, using `std::fs::symlink_metadata`.)
83 83 common.traverse_fs_directory_and_dirstate(
84 84 has_ignored_ancestor,
85 85 dmap.root.as_ref(),
86 86 hg_path,
87 87 &root_dir,
88 88 root_dir_metadata,
89 89 root_cached_mtime,
90 90 is_at_repo_root,
91 91 )?;
92 92 let mut outcome = common.outcome.into_inner().unwrap();
93 93 let new_cachable = common.new_cachable_directories.into_inner().unwrap();
94 94 let outdated = common.outated_cached_directories.into_inner().unwrap();
95 95
96 96 outcome.dirty = common.ignore_patterns_have_changed == Some(true)
97 97 || !outdated.is_empty()
98 98 || !new_cachable.is_empty();
99 99
100 100 // Remove outdated mtimes before adding new mtimes, in case a given
101 101 // directory is both
102 102 for path in &outdated {
103 103 let node = dmap.get_or_insert(path)?;
104 104 if let NodeData::CachedDirectory { .. } = &node.data {
105 105 node.data = NodeData::None
106 106 }
107 107 }
108 108 for (path, mtime) in &new_cachable {
109 109 let node = dmap.get_or_insert(path)?;
110 110 match &node.data {
111 111 NodeData::Entry(_) => {} // Don’t overwrite an entry
112 112 NodeData::CachedDirectory { .. } | NodeData::None => {
113 113 node.data = NodeData::CachedDirectory { mtime: *mtime }
114 114 }
115 115 }
116 116 }
117 117
118 118 Ok((outcome, warnings))
119 119 }
120 120
121 121 /// Bag of random things needed by various parts of the algorithm. Reduces the
122 122 /// number of parameters passed to functions.
123 123 struct StatusCommon<'a, 'tree, 'on_disk: 'tree> {
124 124 dmap: &'tree DirstateMap<'on_disk>,
125 125 options: StatusOptions,
126 126 matcher: &'a (dyn Matcher + Sync),
127 127 ignore_fn: IgnoreFnType<'a>,
128 128 outcome: Mutex<DirstateStatus<'on_disk>>,
129 129 new_cachable_directories:
130 130 Mutex<Vec<(Cow<'on_disk, HgPath>, TruncatedTimestamp)>>,
131 131 outated_cached_directories: Mutex<Vec<Cow<'on_disk, HgPath>>>,
132 132
133 133 /// Whether ignore files like `.hgignore` have changed since the previous
134 134 /// time a `status()` call wrote their hash to the dirstate. `None` means
135 135 /// we don’t know as this run doesn’t list either ignored or uknown files
136 136 /// and therefore isn’t reading `.hgignore`.
137 137 ignore_patterns_have_changed: Option<bool>,
138 138
139 139 /// The current time at the start of the `status()` algorithm, as measured
140 140 /// and possibly truncated by the filesystem.
141 141 filesystem_time_at_status_start: Option<SystemTime>,
142 142 }
143 143
144 144 impl<'a, 'tree, 'on_disk> StatusCommon<'a, 'tree, 'on_disk> {
145 145 fn read_dir(
146 146 &self,
147 147 hg_path: &HgPath,
148 148 fs_path: &Path,
149 149 is_at_repo_root: bool,
150 150 ) -> Result<Vec<DirEntry>, ()> {
151 151 DirEntry::read_dir(fs_path, is_at_repo_root)
152 152 .map_err(|error| self.io_error(error, hg_path))
153 153 }
154 154
155 155 fn io_error(&self, error: std::io::Error, hg_path: &HgPath) {
156 156 let errno = error.raw_os_error().expect("expected real OS error");
157 157 self.outcome
158 158 .lock()
159 159 .unwrap()
160 160 .bad
161 161 .push((hg_path.to_owned().into(), BadMatch::OsError(errno)))
162 162 }
163 163
164 164 fn check_for_outdated_directory_cache(
165 165 &self,
166 166 dirstate_node: &NodeRef<'tree, 'on_disk>,
167 167 ) -> Result<(), DirstateV2ParseError> {
168 168 if self.ignore_patterns_have_changed == Some(true)
169 169 && dirstate_node.cached_directory_mtime()?.is_some()
170 170 {
171 171 self.outated_cached_directories.lock().unwrap().push(
172 172 dirstate_node
173 173 .full_path_borrowed(self.dmap.on_disk)?
174 174 .detach_from_tree(),
175 175 )
176 176 }
177 177 Ok(())
178 178 }
179 179
180 180 /// If this returns true, we can get accurate results by only using
181 181 /// `symlink_metadata` for child nodes that exist in the dirstate and don’t
182 182 /// need to call `read_dir`.
183 183 fn can_skip_fs_readdir(
184 184 &self,
185 185 directory_metadata: Option<&std::fs::Metadata>,
186 186 cached_directory_mtime: Option<TruncatedTimestamp>,
187 187 ) -> bool {
188 188 if !self.options.list_unknown && !self.options.list_ignored {
189 189 // All states that we care about listing have corresponding
190 190 // dirstate entries.
191 191 // This happens for example with `hg status -mard`.
192 192 return true;
193 193 }
194 194 if !self.options.list_ignored
195 195 && self.ignore_patterns_have_changed == Some(false)
196 196 {
197 197 if let Some(cached_mtime) = cached_directory_mtime {
198 198 // The dirstate contains a cached mtime for this directory, set
199 199 // by a previous run of the `status` algorithm which found this
200 200 // directory eligible for `read_dir` caching.
201 201 if let Some(meta) = directory_metadata {
202 202 if cached_mtime
203 203 .likely_equal_to_mtime_of(meta)
204 204 .unwrap_or(false)
205 205 {
206 206 // The mtime of that directory has not changed
207 207 // since then, which means that the results of
208 208 // `read_dir` should also be unchanged.
209 209 return true;
210 210 }
211 211 }
212 212 }
213 213 }
214 214 false
215 215 }
216 216
217 217 /// Returns whether all child entries of the filesystem directory have a
218 218 /// corresponding dirstate node or are ignored.
219 219 fn traverse_fs_directory_and_dirstate(
220 220 &self,
221 221 has_ignored_ancestor: bool,
222 222 dirstate_nodes: ChildNodesRef<'tree, 'on_disk>,
223 223 directory_hg_path: &BorrowedPath<'tree, 'on_disk>,
224 224 directory_fs_path: &Path,
225 225 directory_metadata: Option<&std::fs::Metadata>,
226 226 cached_directory_mtime: Option<TruncatedTimestamp>,
227 227 is_at_repo_root: bool,
228 228 ) -> Result<bool, DirstateV2ParseError> {
229 229 if self.can_skip_fs_readdir(directory_metadata, cached_directory_mtime)
230 230 {
231 231 dirstate_nodes
232 232 .par_iter()
233 233 .map(|dirstate_node| {
234 234 let fs_path = directory_fs_path.join(get_path_from_bytes(
235 235 dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(),
236 236 ));
237 237 match std::fs::symlink_metadata(&fs_path) {
238 238 Ok(fs_metadata) => self.traverse_fs_and_dirstate(
239 239 &fs_path,
240 240 &fs_metadata,
241 241 dirstate_node,
242 242 has_ignored_ancestor,
243 243 ),
244 244 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
245 245 self.traverse_dirstate_only(dirstate_node)
246 246 }
247 247 Err(error) => {
248 248 let hg_path =
249 249 dirstate_node.full_path(self.dmap.on_disk)?;
250 250 Ok(self.io_error(error, hg_path))
251 251 }
252 252 }
253 253 })
254 254 .collect::<Result<_, _>>()?;
255 255
256 256 // We don’t know, so conservatively say this isn’t the case
257 257 let children_all_have_dirstate_node_or_are_ignored = false;
258 258
259 259 return Ok(children_all_have_dirstate_node_or_are_ignored);
260 260 }
261 261
262 262 let mut fs_entries = if let Ok(entries) = self.read_dir(
263 263 directory_hg_path,
264 264 directory_fs_path,
265 265 is_at_repo_root,
266 266 ) {
267 267 entries
268 268 } else {
269 269 // Treat an unreadable directory (typically because of insufficient
270 270 // permissions) like an empty directory. `self.read_dir` has
271 271 // already called `self.io_error` so a warning will be emitted.
272 272 Vec::new()
273 273 };
274 274
275 275 // `merge_join_by` requires both its input iterators to be sorted:
276 276
277 277 let dirstate_nodes = dirstate_nodes.sorted();
278 278 // `sort_unstable_by_key` doesn’t allow keys borrowing from the value:
279 279 // https://github.com/rust-lang/rust/issues/34162
280 280 fs_entries.sort_unstable_by(|e1, e2| e1.base_name.cmp(&e2.base_name));
281 281
282 282 // Propagate here any error that would happen inside the comparison
283 283 // callback below
284 284 for dirstate_node in &dirstate_nodes {
285 285 dirstate_node.base_name(self.dmap.on_disk)?;
286 286 }
287 287 itertools::merge_join_by(
288 288 dirstate_nodes,
289 289 &fs_entries,
290 290 |dirstate_node, fs_entry| {
291 291 // This `unwrap` never panics because we already propagated
292 292 // those errors above
293 293 dirstate_node
294 294 .base_name(self.dmap.on_disk)
295 295 .unwrap()
296 296 .cmp(&fs_entry.base_name)
297 297 },
298 298 )
299 299 .par_bridge()
300 300 .map(|pair| {
301 301 use itertools::EitherOrBoth::*;
302 302 let has_dirstate_node_or_is_ignored;
303 303 match pair {
304 304 Both(dirstate_node, fs_entry) => {
305 305 self.traverse_fs_and_dirstate(
306 306 &fs_entry.full_path,
307 307 &fs_entry.metadata,
308 308 dirstate_node,
309 309 has_ignored_ancestor,
310 310 )?;
311 311 has_dirstate_node_or_is_ignored = true
312 312 }
313 313 Left(dirstate_node) => {
314 314 self.traverse_dirstate_only(dirstate_node)?;
315 315 has_dirstate_node_or_is_ignored = true;
316 316 }
317 317 Right(fs_entry) => {
318 318 has_dirstate_node_or_is_ignored = self.traverse_fs_only(
319 319 has_ignored_ancestor,
320 320 directory_hg_path,
321 321 fs_entry,
322 322 )
323 323 }
324 324 }
325 325 Ok(has_dirstate_node_or_is_ignored)
326 326 })
327 327 .try_reduce(|| true, |a, b| Ok(a && b))
328 328 }
329 329
330 330 fn traverse_fs_and_dirstate(
331 331 &self,
332 332 fs_path: &Path,
333 333 fs_metadata: &std::fs::Metadata,
334 334 dirstate_node: NodeRef<'tree, 'on_disk>,
335 335 has_ignored_ancestor: bool,
336 336 ) -> Result<(), DirstateV2ParseError> {
337 337 self.check_for_outdated_directory_cache(&dirstate_node)?;
338 338 let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
339 339 let file_type = fs_metadata.file_type();
340 340 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
341 341 if !file_or_symlink {
342 342 // If we previously had a file here, it was removed (with
343 343 // `hg rm` or similar) or deleted before it could be
344 344 // replaced by a directory or something else.
345 345 self.mark_removed_or_deleted_if_file(
346 346 &hg_path,
347 347 dirstate_node.state()?,
348 348 );
349 349 }
350 350 if file_type.is_dir() {
351 351 if self.options.collect_traversed_dirs {
352 352 self.outcome
353 353 .lock()
354 354 .unwrap()
355 355 .traversed
356 356 .push(hg_path.detach_from_tree())
357 357 }
358 358 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(hg_path);
359 359 let is_at_repo_root = false;
360 360 let children_all_have_dirstate_node_or_are_ignored = self
361 361 .traverse_fs_directory_and_dirstate(
362 362 is_ignored,
363 363 dirstate_node.children(self.dmap.on_disk)?,
364 364 hg_path,
365 365 fs_path,
366 366 Some(fs_metadata),
367 367 dirstate_node.cached_directory_mtime()?,
368 368 is_at_repo_root,
369 369 )?;
370 370 self.maybe_save_directory_mtime(
371 371 children_all_have_dirstate_node_or_are_ignored,
372 372 fs_metadata,
373 373 dirstate_node,
374 374 )?
375 375 } else {
376 376 if file_or_symlink && self.matcher.matches(hg_path) {
377 377 if let Some(state) = dirstate_node.state()? {
378 378 match state {
379 379 EntryState::Added => self
380 380 .outcome
381 381 .lock()
382 382 .unwrap()
383 383 .added
384 384 .push(hg_path.detach_from_tree()),
385 385 EntryState::Removed => self
386 386 .outcome
387 387 .lock()
388 388 .unwrap()
389 389 .removed
390 390 .push(hg_path.detach_from_tree()),
391 391 EntryState::Merged => self
392 392 .outcome
393 393 .lock()
394 394 .unwrap()
395 395 .modified
396 396 .push(hg_path.detach_from_tree()),
397 397 EntryState::Normal => self
398 398 .handle_normal_file(&dirstate_node, fs_metadata)?,
399 399 }
400 400 } else {
401 401 // `node.entry.is_none()` indicates a "directory"
402 402 // node, but the filesystem has a file
403 403 self.mark_unknown_or_ignored(
404 404 has_ignored_ancestor,
405 405 hg_path,
406 406 );
407 407 }
408 408 }
409 409
410 410 for child_node in dirstate_node.children(self.dmap.on_disk)?.iter()
411 411 {
412 412 self.traverse_dirstate_only(child_node)?
413 413 }
414 414 }
415 415 Ok(())
416 416 }
417 417
418 418 fn maybe_save_directory_mtime(
419 419 &self,
420 420 children_all_have_dirstate_node_or_are_ignored: bool,
421 421 directory_metadata: &std::fs::Metadata,
422 422 dirstate_node: NodeRef<'tree, 'on_disk>,
423 423 ) -> Result<(), DirstateV2ParseError> {
424 424 if children_all_have_dirstate_node_or_are_ignored {
425 425 // All filesystem directory entries from `read_dir` have a
426 426 // corresponding node in the dirstate, so we can reconstitute the
427 427 // names of those entries without calling `read_dir` again.
428 428 if let (Some(status_start), Ok(directory_mtime)) = (
429 429 &self.filesystem_time_at_status_start,
430 430 directory_metadata.modified(),
431 431 ) {
432 432 // Although the Rust standard library’s `SystemTime` type
433 433 // has nanosecond precision, the times reported for a
434 434 // directory’s (or file’s) modified time may have lower
435 435 // resolution based on the filesystem (for example ext3
436 436 // only stores integer seconds), kernel (see
437 437 // https://stackoverflow.com/a/14393315/1162888), etc.
438 438 if &directory_mtime >= status_start {
439 439 // The directory was modified too recently, don’t cache its
440 440 // `read_dir` results.
441 441 //
442 442 // A timeline like this is possible:
443 443 //
444 444 // 1. A change to this directory (direct child was
445 445 // added or removed) cause its mtime to be set
446 446 // (possibly truncated) to `directory_mtime`
447 447 // 2. This `status` algorithm calls `read_dir`
448 448 // 3. An other change is made to the same directory is
449 449 // made so that calling `read_dir` agin would give
450 450 // different results, but soon enough after 1. that
451 451 // the mtime stays the same
452 452 //
453 453 // On a system where the time resolution poor, this
454 454 // scenario is not unlikely if all three steps are caused
455 455 // by the same script.
456 456 } else {
457 457 // We’ve observed (through `status_start`) that time has
458 458 // “progressed” since `directory_mtime`, so any further
459 459 // change to this directory is extremely likely to cause a
460 460 // different mtime.
461 461 //
462 462 // Having the same mtime again is not entirely impossible
463 463 // since the system clock is not monotonous. It could jump
464 464 // backward to some point before `directory_mtime`, then a
465 465 // directory change could potentially happen during exactly
466 466 // the wrong tick.
467 467 //
468 468 // We deem this scenario (unlike the previous one) to be
469 469 // unlikely enough in practice.
470 470 let truncated = TruncatedTimestamp::from(directory_mtime);
471 471 let is_up_to_date = if let Some(cached) =
472 472 dirstate_node.cached_directory_mtime()?
473 473 {
474 474 cached.likely_equal(truncated)
475 475 } else {
476 476 false
477 477 };
478 478 if !is_up_to_date {
479 479 let hg_path = dirstate_node
480 480 .full_path_borrowed(self.dmap.on_disk)?
481 481 .detach_from_tree();
482 482 self.new_cachable_directories
483 483 .lock()
484 484 .unwrap()
485 485 .push((hg_path, truncated))
486 486 }
487 487 }
488 488 }
489 489 }
490 490 Ok(())
491 491 }
492 492
493 493 /// A file with `EntryState::Normal` in the dirstate was found in the
494 494 /// filesystem
495 495 fn handle_normal_file(
496 496 &self,
497 497 dirstate_node: &NodeRef<'tree, 'on_disk>,
498 498 fs_metadata: &std::fs::Metadata,
499 499 ) -> Result<(), DirstateV2ParseError> {
500 500 // Keep the low 31 bits
501 501 fn truncate_u64(value: u64) -> i32 {
502 502 (value & 0x7FFF_FFFF) as i32
503 503 }
504 504
505 505 let entry = dirstate_node
506 506 .entry()?
507 507 .expect("handle_normal_file called with entry-less node");
508 508 let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
509 509 let mode_changed =
510 510 || self.options.check_exec && entry.mode_changed(fs_metadata);
511 511 let size = entry.size();
512 512 let size_changed = size != truncate_u64(fs_metadata.len());
513 513 if size >= 0 && size_changed && fs_metadata.file_type().is_symlink() {
514 514 // issue6456: Size returned may be longer due to encryption
515 515 // on EXT-4 fscrypt. TODO maybe only do it on EXT4?
516 516 self.outcome
517 517 .lock()
518 518 .unwrap()
519 519 .unsure
520 520 .push(hg_path.detach_from_tree())
521 521 } else if dirstate_node.has_copy_source()
522 522 || entry.is_from_other_parent()
523 523 || (size >= 0 && (size_changed || mode_changed()))
524 524 {
525 525 self.outcome
526 526 .lock()
527 527 .unwrap()
528 528 .modified
529 529 .push(hg_path.detach_from_tree())
530 530 } else {
531 531 let mtime_looks_clean;
532 532 if let Some(dirstate_mtime) = entry.truncated_mtime() {
533 533 let fs_mtime = TruncatedTimestamp::for_mtime_of(fs_metadata)
534 534 .expect("OS/libc does not support mtime?");
535 // There might be a change in the future if for example the
536 // internal clock become off while process run, but this is a
537 // case where the issues the user would face
538 // would be a lot worse and there is nothing we
539 // can really do.
535 540 mtime_looks_clean = fs_mtime.likely_equal(dirstate_mtime)
536 && !fs_mtime.likely_equal(self.options.last_normal_time)
537 541 } else {
538 542 // No mtime in the dirstate entry
539 543 mtime_looks_clean = false
540 544 };
541 545 if !mtime_looks_clean {
542 546 self.outcome
543 547 .lock()
544 548 .unwrap()
545 549 .unsure
546 550 .push(hg_path.detach_from_tree())
547 551 } else if self.options.list_clean {
548 552 self.outcome
549 553 .lock()
550 554 .unwrap()
551 555 .clean
552 556 .push(hg_path.detach_from_tree())
553 557 }
554 558 }
555 559 Ok(())
556 560 }
557 561
558 562 /// A node in the dirstate tree has no corresponding filesystem entry
559 563 fn traverse_dirstate_only(
560 564 &self,
561 565 dirstate_node: NodeRef<'tree, 'on_disk>,
562 566 ) -> Result<(), DirstateV2ParseError> {
563 567 self.check_for_outdated_directory_cache(&dirstate_node)?;
564 568 self.mark_removed_or_deleted_if_file(
565 569 &dirstate_node.full_path_borrowed(self.dmap.on_disk)?,
566 570 dirstate_node.state()?,
567 571 );
568 572 dirstate_node
569 573 .children(self.dmap.on_disk)?
570 574 .par_iter()
571 575 .map(|child_node| self.traverse_dirstate_only(child_node))
572 576 .collect()
573 577 }
574 578
575 579 /// A node in the dirstate tree has no corresponding *file* on the
576 580 /// filesystem
577 581 ///
578 582 /// Does nothing on a "directory" node
579 583 fn mark_removed_or_deleted_if_file(
580 584 &self,
581 585 hg_path: &BorrowedPath<'tree, 'on_disk>,
582 586 dirstate_node_state: Option<EntryState>,
583 587 ) {
584 588 if let Some(state) = dirstate_node_state {
585 589 if self.matcher.matches(hg_path) {
586 590 if let EntryState::Removed = state {
587 591 self.outcome
588 592 .lock()
589 593 .unwrap()
590 594 .removed
591 595 .push(hg_path.detach_from_tree())
592 596 } else {
593 597 self.outcome
594 598 .lock()
595 599 .unwrap()
596 600 .deleted
597 601 .push(hg_path.detach_from_tree())
598 602 }
599 603 }
600 604 }
601 605 }
602 606
603 607 /// Something in the filesystem has no corresponding dirstate node
604 608 ///
605 609 /// Returns whether that path is ignored
606 610 fn traverse_fs_only(
607 611 &self,
608 612 has_ignored_ancestor: bool,
609 613 directory_hg_path: &HgPath,
610 614 fs_entry: &DirEntry,
611 615 ) -> bool {
612 616 let hg_path = directory_hg_path.join(&fs_entry.base_name);
613 617 let file_type = fs_entry.metadata.file_type();
614 618 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
615 619 if file_type.is_dir() {
616 620 let is_ignored =
617 621 has_ignored_ancestor || (self.ignore_fn)(&hg_path);
618 622 let traverse_children = if is_ignored {
619 623 // Descendants of an ignored directory are all ignored
620 624 self.options.list_ignored
621 625 } else {
622 626 // Descendants of an unknown directory may be either unknown or
623 627 // ignored
624 628 self.options.list_unknown || self.options.list_ignored
625 629 };
626 630 if traverse_children {
627 631 let is_at_repo_root = false;
628 632 if let Ok(children_fs_entries) = self.read_dir(
629 633 &hg_path,
630 634 &fs_entry.full_path,
631 635 is_at_repo_root,
632 636 ) {
633 637 children_fs_entries.par_iter().for_each(|child_fs_entry| {
634 638 self.traverse_fs_only(
635 639 is_ignored,
636 640 &hg_path,
637 641 child_fs_entry,
638 642 );
639 643 })
640 644 }
641 645 }
642 646 if self.options.collect_traversed_dirs {
643 647 self.outcome.lock().unwrap().traversed.push(hg_path.into())
644 648 }
645 649 is_ignored
646 650 } else {
647 651 if file_or_symlink {
648 652 if self.matcher.matches(&hg_path) {
649 653 self.mark_unknown_or_ignored(
650 654 has_ignored_ancestor,
651 655 &BorrowedPath::InMemory(&hg_path),
652 656 )
653 657 } else {
654 658 // We haven’t computed whether this path is ignored. It
655 659 // might not be, and a future run of status might have a
656 660 // different matcher that matches it. So treat it as not
657 661 // ignored. That is, inhibit readdir caching of the parent
658 662 // directory.
659 663 false
660 664 }
661 665 } else {
662 666 // This is neither a directory, a plain file, or a symlink.
663 667 // Treat it like an ignored file.
664 668 true
665 669 }
666 670 }
667 671 }
668 672
669 673 /// Returns whether that path is ignored
670 674 fn mark_unknown_or_ignored(
671 675 &self,
672 676 has_ignored_ancestor: bool,
673 677 hg_path: &BorrowedPath<'_, 'on_disk>,
674 678 ) -> bool {
675 679 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(&hg_path);
676 680 if is_ignored {
677 681 if self.options.list_ignored {
678 682 self.outcome
679 683 .lock()
680 684 .unwrap()
681 685 .ignored
682 686 .push(hg_path.detach_from_tree())
683 687 }
684 688 } else {
685 689 if self.options.list_unknown {
686 690 self.outcome
687 691 .lock()
688 692 .unwrap()
689 693 .unknown
690 694 .push(hg_path.detach_from_tree())
691 695 }
692 696 }
693 697 is_ignored
694 698 }
695 699 }
696 700
697 701 struct DirEntry {
698 702 base_name: HgPathBuf,
699 703 full_path: PathBuf,
700 704 metadata: std::fs::Metadata,
701 705 }
702 706
703 707 impl DirEntry {
704 708 /// Returns **unsorted** entries in the given directory, with name and
705 709 /// metadata.
706 710 ///
707 711 /// If a `.hg` sub-directory is encountered:
708 712 ///
709 713 /// * At the repository root, ignore that sub-directory
710 714 /// * Elsewhere, we’re listing the content of a sub-repo. Return an empty
711 715 /// list instead.
712 716 fn read_dir(path: &Path, is_at_repo_root: bool) -> io::Result<Vec<Self>> {
713 717 let mut results = Vec::new();
714 718 for entry in path.read_dir()? {
715 719 let entry = entry?;
716 720 let metadata = entry.metadata()?;
717 721 let name = get_bytes_from_os_string(entry.file_name());
718 722 // FIXME don't do this when cached
719 723 if name == b".hg" {
720 724 if is_at_repo_root {
721 725 // Skip the repo’s own .hg (might be a symlink)
722 726 continue;
723 727 } else if metadata.is_dir() {
724 728 // A .hg sub-directory at another location means a subrepo,
725 729 // skip it entirely.
726 730 return Ok(Vec::new());
727 731 }
728 732 }
729 733 results.push(DirEntry {
730 734 base_name: name.into(),
731 735 full_path: entry.path(),
732 736 metadata,
733 737 })
734 738 }
735 739 Ok(results)
736 740 }
737 741 }
738 742
739 743 /// Return the `mtime` of a temporary file newly-created in the `.hg` directory
740 744 /// of the give repository.
741 745 ///
742 746 /// This is similar to `SystemTime::now()`, with the result truncated to the
743 747 /// same time resolution as other files’ modification times. Using `.hg`
744 748 /// instead of the system’s default temporary directory (such as `/tmp`) makes
745 749 /// it more likely the temporary file is in the same disk partition as contents
746 750 /// of the working directory, which can matter since different filesystems may
747 751 /// store timestamps with different resolutions.
748 752 ///
749 753 /// This may fail, typically if we lack write permissions. In that case we
750 754 /// should continue the `status()` algoritm anyway and consider the current
751 755 /// date/time to be unknown.
752 756 fn filesystem_now(repo_root: &Path) -> Result<SystemTime, io::Error> {
753 757 tempfile::tempfile_in(repo_root.join(".hg"))?
754 758 .metadata()?
755 759 .modified()
756 760 }
@@ -1,71 +1,70 b''
1 1 // dirstate.rs
2 2 //
3 3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Bindings for the `hg::dirstate` module provided by the
9 9 //! `hg-core` package.
10 10 //!
11 11 //! From Python, this will be seen as `mercurial.rustext.dirstate`
12 12 mod copymap;
13 13 mod dirs_multiset;
14 14 mod dirstate_map;
15 15 mod item;
16 16 mod status;
17 17 use self::item::DirstateItem;
18 18 use crate::{
19 19 dirstate::{
20 20 dirs_multiset::Dirs, dirstate_map::DirstateMap, status::status_wrapper,
21 21 },
22 22 exceptions,
23 23 };
24 24 use cpython::{PyBytes, PyDict, PyList, PyModule, PyObject, PyResult, Python};
25 25 use hg::dirstate_tree::on_disk::V2_FORMAT_MARKER;
26 26
27 27 /// Create the module, with `__package__` given from parent
28 28 pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
29 29 let dotted_name = &format!("{}.dirstate", package);
30 30 let m = PyModule::new(py, dotted_name)?;
31 31
32 32 env_logger::init();
33 33
34 34 m.add(py, "__package__", package)?;
35 35 m.add(py, "__doc__", "Dirstate - Rust implementation")?;
36 36
37 37 m.add(
38 38 py,
39 39 "FallbackError",
40 40 py.get_type::<exceptions::FallbackError>(),
41 41 )?;
42 42 m.add_class::<Dirs>(py)?;
43 43 m.add_class::<DirstateMap>(py)?;
44 44 m.add_class::<DirstateItem>(py)?;
45 45 m.add(py, "V2_FORMAT_MARKER", PyBytes::new(py, V2_FORMAT_MARKER))?;
46 46 m.add(
47 47 py,
48 48 "status",
49 49 py_fn!(
50 50 py,
51 51 status_wrapper(
52 52 dmap: DirstateMap,
53 53 root_dir: PyObject,
54 54 matcher: PyObject,
55 55 ignorefiles: PyList,
56 56 check_exec: bool,
57 last_normal_time: (u32, u32),
58 57 list_clean: bool,
59 58 list_ignored: bool,
60 59 list_unknown: bool,
61 60 collect_traversed_dirs: bool
62 61 )
63 62 ),
64 63 )?;
65 64
66 65 let sys = PyModule::import(py, "sys")?;
67 66 let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
68 67 sys_modules.set_item(py, dotted_name, &m)?;
69 68
70 69 Ok(m)
71 70 }
@@ -1,301 +1,295 b''
1 1 // status.rs
2 2 //
3 3 // Copyright 2019, Raphaël Gomès <rgomes@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Bindings for the `hg::status` module provided by the
9 9 //! `hg-core` crate. From Python, this will be seen as
10 10 //! `rustext.dirstate.status`.
11 11
12 use crate::dirstate::item::timestamp;
13 12 use crate::{dirstate::DirstateMap, exceptions::FallbackError};
14 13 use cpython::exc::OSError;
15 14 use cpython::{
16 15 exc::ValueError, ObjectProtocol, PyBytes, PyErr, PyList, PyObject,
17 16 PyResult, PyTuple, Python, PythonObject, ToPyObject,
18 17 };
19 18 use hg::{
20 19 matchers::{AlwaysMatcher, FileMatcher, IncludeMatcher},
21 20 parse_pattern_syntax,
22 21 utils::{
23 22 files::{get_bytes_from_path, get_path_from_bytes},
24 23 hg_path::{HgPath, HgPathBuf},
25 24 },
26 25 BadMatch, DirstateStatus, IgnorePattern, PatternFileWarning, StatusError,
27 26 StatusOptions,
28 27 };
29 28 use std::borrow::Borrow;
30 29
31 30 /// This will be useless once trait impls for collection are added to `PyBytes`
32 31 /// upstream.
33 32 fn collect_pybytes_list(
34 33 py: Python,
35 34 collection: &[impl AsRef<HgPath>],
36 35 ) -> PyList {
37 36 let list = PyList::new(py, &[]);
38 37
39 38 for path in collection.iter() {
40 39 list.append(
41 40 py,
42 41 PyBytes::new(py, path.as_ref().as_bytes()).into_object(),
43 42 )
44 43 }
45 44
46 45 list
47 46 }
48 47
49 48 fn collect_bad_matches(
50 49 py: Python,
51 50 collection: &[(impl AsRef<HgPath>, BadMatch)],
52 51 ) -> PyResult<PyList> {
53 52 let list = PyList::new(py, &[]);
54 53
55 54 let os = py.import("os")?;
56 55 let get_error_message = |code: i32| -> PyResult<_> {
57 56 os.call(
58 57 py,
59 58 "strerror",
60 59 PyTuple::new(py, &[code.to_py_object(py).into_object()]),
61 60 None,
62 61 )
63 62 };
64 63
65 64 for (path, bad_match) in collection.iter() {
66 65 let message = match bad_match {
67 66 BadMatch::OsError(code) => get_error_message(*code)?,
68 67 BadMatch::BadType(bad_type) => format!(
69 68 "unsupported file type (type is {})",
70 69 bad_type.to_string()
71 70 )
72 71 .to_py_object(py)
73 72 .into_object(),
74 73 };
75 74 list.append(
76 75 py,
77 76 (PyBytes::new(py, path.as_ref().as_bytes()), message)
78 77 .to_py_object(py)
79 78 .into_object(),
80 79 )
81 80 }
82 81
83 82 Ok(list)
84 83 }
85 84
86 85 fn handle_fallback(py: Python, err: StatusError) -> PyErr {
87 86 match err {
88 87 StatusError::Pattern(e) => {
89 88 let as_string = e.to_string();
90 89 log::trace!("Rust status fallback: `{}`", &as_string);
91 90
92 91 PyErr::new::<FallbackError, _>(py, &as_string)
93 92 }
94 93 StatusError::IO(e) => PyErr::new::<OSError, _>(py, e.to_string()),
95 94 e => PyErr::new::<ValueError, _>(py, e.to_string()),
96 95 }
97 96 }
98 97
99 98 pub fn status_wrapper(
100 99 py: Python,
101 100 dmap: DirstateMap,
102 101 matcher: PyObject,
103 102 root_dir: PyObject,
104 103 ignore_files: PyList,
105 104 check_exec: bool,
106 last_normal_time: (u32, u32),
107 105 list_clean: bool,
108 106 list_ignored: bool,
109 107 list_unknown: bool,
110 108 collect_traversed_dirs: bool,
111 109 ) -> PyResult<PyTuple> {
112 let last_normal_time = timestamp(py, last_normal_time)?;
113 110 let bytes = root_dir.extract::<PyBytes>(py)?;
114 111 let root_dir = get_path_from_bytes(bytes.data(py));
115 112
116 113 let dmap: DirstateMap = dmap.to_py_object(py);
117 114 let mut dmap = dmap.get_inner_mut(py);
118 115
119 116 let ignore_files: PyResult<Vec<_>> = ignore_files
120 117 .iter(py)
121 118 .map(|b| {
122 119 let file = b.extract::<PyBytes>(py)?;
123 120 Ok(get_path_from_bytes(file.data(py)).to_owned())
124 121 })
125 122 .collect();
126 123 let ignore_files = ignore_files?;
127 124
128 125 match matcher.get_type(py).name(py).borrow() {
129 126 "alwaysmatcher" => {
130 127 let matcher = AlwaysMatcher;
131 128 let (status_res, warnings) = dmap
132 129 .status(
133 130 &matcher,
134 131 root_dir.to_path_buf(),
135 132 ignore_files,
136 133 StatusOptions {
137 134 check_exec,
138 last_normal_time,
139 135 list_clean,
140 136 list_ignored,
141 137 list_unknown,
142 138 collect_traversed_dirs,
143 139 },
144 140 )
145 141 .map_err(|e| handle_fallback(py, e))?;
146 142 build_response(py, status_res, warnings)
147 143 }
148 144 "exactmatcher" => {
149 145 let files = matcher.call_method(
150 146 py,
151 147 "files",
152 148 PyTuple::new(py, &[]),
153 149 None,
154 150 )?;
155 151 let files: PyList = files.cast_into(py)?;
156 152 let files: PyResult<Vec<HgPathBuf>> = files
157 153 .iter(py)
158 154 .map(|f| {
159 155 Ok(HgPathBuf::from_bytes(
160 156 f.extract::<PyBytes>(py)?.data(py),
161 157 ))
162 158 })
163 159 .collect();
164 160
165 161 let files = files?;
166 162 let matcher = FileMatcher::new(files.as_ref())
167 163 .map_err(|e| PyErr::new::<ValueError, _>(py, e.to_string()))?;
168 164 let (status_res, warnings) = dmap
169 165 .status(
170 166 &matcher,
171 167 root_dir.to_path_buf(),
172 168 ignore_files,
173 169 StatusOptions {
174 170 check_exec,
175 last_normal_time,
176 171 list_clean,
177 172 list_ignored,
178 173 list_unknown,
179 174 collect_traversed_dirs,
180 175 },
181 176 )
182 177 .map_err(|e| handle_fallback(py, e))?;
183 178 build_response(py, status_res, warnings)
184 179 }
185 180 "includematcher" => {
186 181 // Get the patterns from Python even though most of them are
187 182 // redundant with those we will parse later on, as they include
188 183 // those passed from the command line.
189 184 let ignore_patterns: PyResult<Vec<_>> = matcher
190 185 .getattr(py, "_kindpats")?
191 186 .iter(py)?
192 187 .map(|k| {
193 188 let k = k?;
194 189 let syntax = parse_pattern_syntax(
195 190 &[
196 191 k.get_item(py, 0)?
197 192 .extract::<PyBytes>(py)?
198 193 .data(py),
199 194 &b":"[..],
200 195 ]
201 196 .concat(),
202 197 )
203 198 .map_err(|e| {
204 199 handle_fallback(py, StatusError::Pattern(e))
205 200 })?;
206 201 let pattern = k.get_item(py, 1)?.extract::<PyBytes>(py)?;
207 202 let pattern = pattern.data(py);
208 203 let source = k.get_item(py, 2)?.extract::<PyBytes>(py)?;
209 204 let source = get_path_from_bytes(source.data(py));
210 205 let new = IgnorePattern::new(syntax, pattern, source);
211 206 Ok(new)
212 207 })
213 208 .collect();
214 209
215 210 let ignore_patterns = ignore_patterns?;
216 211
217 212 let matcher = IncludeMatcher::new(ignore_patterns)
218 213 .map_err(|e| handle_fallback(py, e.into()))?;
219 214
220 215 let (status_res, warnings) = dmap
221 216 .status(
222 217 &matcher,
223 218 root_dir.to_path_buf(),
224 219 ignore_files,
225 220 StatusOptions {
226 221 check_exec,
227 last_normal_time,
228 222 list_clean,
229 223 list_ignored,
230 224 list_unknown,
231 225 collect_traversed_dirs,
232 226 },
233 227 )
234 228 .map_err(|e| handle_fallback(py, e))?;
235 229
236 230 build_response(py, status_res, warnings)
237 231 }
238 232 e => Err(PyErr::new::<ValueError, _>(
239 233 py,
240 234 format!("Unsupported matcher {}", e),
241 235 )),
242 236 }
243 237 }
244 238
245 239 fn build_response(
246 240 py: Python,
247 241 status_res: DirstateStatus,
248 242 warnings: Vec<PatternFileWarning>,
249 243 ) -> PyResult<PyTuple> {
250 244 let modified = collect_pybytes_list(py, status_res.modified.as_ref());
251 245 let added = collect_pybytes_list(py, status_res.added.as_ref());
252 246 let removed = collect_pybytes_list(py, status_res.removed.as_ref());
253 247 let deleted = collect_pybytes_list(py, status_res.deleted.as_ref());
254 248 let clean = collect_pybytes_list(py, status_res.clean.as_ref());
255 249 let ignored = collect_pybytes_list(py, status_res.ignored.as_ref());
256 250 let unknown = collect_pybytes_list(py, status_res.unknown.as_ref());
257 251 let unsure = collect_pybytes_list(py, status_res.unsure.as_ref());
258 252 let bad = collect_bad_matches(py, status_res.bad.as_ref())?;
259 253 let traversed = collect_pybytes_list(py, status_res.traversed.as_ref());
260 254 let dirty = status_res.dirty.to_py_object(py);
261 255 let py_warnings = PyList::new(py, &[]);
262 256 for warning in warnings.iter() {
263 257 // We use duck-typing on the Python side for dispatch, good enough for
264 258 // now.
265 259 match warning {
266 260 PatternFileWarning::InvalidSyntax(file, syn) => {
267 261 py_warnings.append(
268 262 py,
269 263 (
270 264 PyBytes::new(py, &get_bytes_from_path(&file)),
271 265 PyBytes::new(py, syn),
272 266 )
273 267 .to_py_object(py)
274 268 .into_object(),
275 269 );
276 270 }
277 271 PatternFileWarning::NoSuchFile(file) => py_warnings.append(
278 272 py,
279 273 PyBytes::new(py, &get_bytes_from_path(&file)).into_object(),
280 274 ),
281 275 }
282 276 }
283 277
284 278 Ok(PyTuple::new(
285 279 py,
286 280 &[
287 281 unsure.into_object(),
288 282 modified.into_object(),
289 283 added.into_object(),
290 284 removed.into_object(),
291 285 deleted.into_object(),
292 286 clean.into_object(),
293 287 ignored.into_object(),
294 288 unknown.into_object(),
295 289 py_warnings.into_object(),
296 290 bad.into_object(),
297 291 traversed.into_object(),
298 292 dirty.into_object(),
299 293 ][..],
300 294 ))
301 295 }
@@ -1,400 +1,396 b''
1 1 // status.rs
2 2 //
3 3 // Copyright 2020, Georges Racinet <georges.racinets@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 use crate::error::CommandError;
9 9 use crate::ui::Ui;
10 10 use crate::utils::path_utils::relativize_paths;
11 11 use clap::{Arg, SubCommand};
12 12 use format_bytes::format_bytes;
13 13 use hg;
14 14 use hg::config::Config;
15 use hg::dirstate::{has_exec_bit, TruncatedTimestamp};
15 use hg::dirstate::has_exec_bit;
16 16 use hg::errors::HgError;
17 17 use hg::manifest::Manifest;
18 18 use hg::matchers::AlwaysMatcher;
19 19 use hg::repo::Repo;
20 20 use hg::utils::files::get_bytes_from_os_string;
21 21 use hg::utils::hg_path::{hg_path_to_os_string, HgPath};
22 22 use hg::{HgPathCow, StatusOptions};
23 23 use log::{info, warn};
24 24
25 25 pub const HELP_TEXT: &str = "
26 26 Show changed files in the working directory
27 27
28 28 This is a pure Rust version of `hg status`.
29 29
30 30 Some options might be missing, check the list below.
31 31 ";
32 32
33 33 pub fn args() -> clap::App<'static, 'static> {
34 34 SubCommand::with_name("status")
35 35 .alias("st")
36 36 .about(HELP_TEXT)
37 37 .arg(
38 38 Arg::with_name("all")
39 39 .help("show status of all files")
40 40 .short("-A")
41 41 .long("--all"),
42 42 )
43 43 .arg(
44 44 Arg::with_name("modified")
45 45 .help("show only modified files")
46 46 .short("-m")
47 47 .long("--modified"),
48 48 )
49 49 .arg(
50 50 Arg::with_name("added")
51 51 .help("show only added files")
52 52 .short("-a")
53 53 .long("--added"),
54 54 )
55 55 .arg(
56 56 Arg::with_name("removed")
57 57 .help("show only removed files")
58 58 .short("-r")
59 59 .long("--removed"),
60 60 )
61 61 .arg(
62 62 Arg::with_name("clean")
63 63 .help("show only clean files")
64 64 .short("-c")
65 65 .long("--clean"),
66 66 )
67 67 .arg(
68 68 Arg::with_name("deleted")
69 69 .help("show only deleted files")
70 70 .short("-d")
71 71 .long("--deleted"),
72 72 )
73 73 .arg(
74 74 Arg::with_name("unknown")
75 75 .help("show only unknown (not tracked) files")
76 76 .short("-u")
77 77 .long("--unknown"),
78 78 )
79 79 .arg(
80 80 Arg::with_name("ignored")
81 81 .help("show only ignored files")
82 82 .short("-i")
83 83 .long("--ignored"),
84 84 )
85 85 .arg(
86 86 Arg::with_name("no-status")
87 87 .help("hide status prefix")
88 88 .short("-n")
89 89 .long("--no-status"),
90 90 )
91 91 }
92 92
93 93 /// Pure data type allowing the caller to specify file states to display
94 94 #[derive(Copy, Clone, Debug)]
95 95 pub struct DisplayStates {
96 96 pub modified: bool,
97 97 pub added: bool,
98 98 pub removed: bool,
99 99 pub clean: bool,
100 100 pub deleted: bool,
101 101 pub unknown: bool,
102 102 pub ignored: bool,
103 103 }
104 104
105 105 pub const DEFAULT_DISPLAY_STATES: DisplayStates = DisplayStates {
106 106 modified: true,
107 107 added: true,
108 108 removed: true,
109 109 clean: false,
110 110 deleted: true,
111 111 unknown: true,
112 112 ignored: false,
113 113 };
114 114
115 115 pub const ALL_DISPLAY_STATES: DisplayStates = DisplayStates {
116 116 modified: true,
117 117 added: true,
118 118 removed: true,
119 119 clean: true,
120 120 deleted: true,
121 121 unknown: true,
122 122 ignored: true,
123 123 };
124 124
125 125 impl DisplayStates {
126 126 pub fn is_empty(&self) -> bool {
127 127 !(self.modified
128 128 || self.added
129 129 || self.removed
130 130 || self.clean
131 131 || self.deleted
132 132 || self.unknown
133 133 || self.ignored)
134 134 }
135 135 }
136 136
137 137 pub fn run(invocation: &crate::CliInvocation) -> Result<(), CommandError> {
138 138 let status_enabled_default = false;
139 139 let status_enabled = invocation.config.get_option(b"rhg", b"status")?;
140 140 if !status_enabled.unwrap_or(status_enabled_default) {
141 141 return Err(CommandError::unsupported(
142 142 "status is experimental in rhg (enable it with 'rhg.status = true' \
143 143 or enable fallback with 'rhg.on-unsupported = fallback')"
144 144 ));
145 145 }
146 146
147 147 // TODO: lift these limitations
148 148 if invocation.config.get_bool(b"ui", b"tweakdefaults")? {
149 149 return Err(CommandError::unsupported(
150 150 "ui.tweakdefaults is not yet supported with rhg status",
151 151 ));
152 152 }
153 153 if invocation.config.get_bool(b"ui", b"statuscopies")? {
154 154 return Err(CommandError::unsupported(
155 155 "ui.statuscopies is not yet supported with rhg status",
156 156 ));
157 157 }
158 158 if invocation
159 159 .config
160 160 .get(b"commands", b"status.terse")
161 161 .is_some()
162 162 {
163 163 return Err(CommandError::unsupported(
164 164 "status.terse is not yet supported with rhg status",
165 165 ));
166 166 }
167 167
168 168 let ui = invocation.ui;
169 169 let config = invocation.config;
170 170 let args = invocation.subcommand_args;
171 171 let display_states = if args.is_present("all") {
172 172 // TODO when implementing `--quiet`: it excludes clean files
173 173 // from `--all`
174 174 ALL_DISPLAY_STATES
175 175 } else {
176 176 let requested = DisplayStates {
177 177 modified: args.is_present("modified"),
178 178 added: args.is_present("added"),
179 179 removed: args.is_present("removed"),
180 180 clean: args.is_present("clean"),
181 181 deleted: args.is_present("deleted"),
182 182 unknown: args.is_present("unknown"),
183 183 ignored: args.is_present("ignored"),
184 184 };
185 185 if requested.is_empty() {
186 186 DEFAULT_DISPLAY_STATES
187 187 } else {
188 188 requested
189 189 }
190 190 };
191 191 let no_status = args.is_present("no-status");
192 192
193 193 let repo = invocation.repo?;
194 194 let mut dmap = repo.dirstate_map_mut()?;
195 195
196 196 let options = StatusOptions {
197 // TODO should be provided by the dirstate parsing and
198 // hence be stored on dmap. Using a value that assumes we aren't
199 // below the time resolution granularity of the FS and the
200 // dirstate.
201 last_normal_time: TruncatedTimestamp::new_truncate(0, 0),
202 197 // we're currently supporting file systems with exec flags only
203 198 // anyway
204 199 check_exec: true,
205 200 list_clean: display_states.clean,
206 201 list_unknown: display_states.unknown,
207 202 list_ignored: display_states.ignored,
208 203 collect_traversed_dirs: false,
209 204 };
210 205 let ignore_file = repo.working_directory_vfs().join(".hgignore"); // TODO hardcoded
211 206 let (mut ds_status, pattern_warnings) = dmap.status(
212 207 &AlwaysMatcher,
213 208 repo.working_directory_path().to_owned(),
214 209 vec![ignore_file],
215 210 options,
216 211 )?;
217 212 if !pattern_warnings.is_empty() {
218 213 warn!("Pattern warnings: {:?}", &pattern_warnings);
219 214 }
220 215
221 216 if !ds_status.bad.is_empty() {
222 217 warn!("Bad matches {:?}", &(ds_status.bad))
223 218 }
224 219 if !ds_status.unsure.is_empty() {
225 220 info!(
226 221 "Files to be rechecked by retrieval from filelog: {:?}",
227 222 &ds_status.unsure
228 223 );
229 224 }
230 225 if !ds_status.unsure.is_empty()
231 226 && (display_states.modified || display_states.clean)
232 227 {
233 228 let p1 = repo.dirstate_parents()?.p1;
234 229 let manifest = repo.manifest_for_node(p1).map_err(|e| {
235 230 CommandError::from((e, &*format!("{:x}", p1.short())))
236 231 })?;
237 232 for to_check in ds_status.unsure {
238 233 if unsure_is_modified(repo, &manifest, &to_check)? {
239 234 if display_states.modified {
240 235 ds_status.modified.push(to_check);
241 236 }
242 237 } else {
243 238 if display_states.clean {
244 239 ds_status.clean.push(to_check);
245 240 }
246 241 }
247 242 }
248 243 }
249 244 if display_states.modified {
250 245 display_status_paths(
251 246 ui,
252 247 repo,
253 248 config,
254 249 no_status,
255 250 &mut ds_status.modified,
256 251 b"M",
257 252 )?;
258 253 }
259 254 if display_states.added {
260 255 display_status_paths(
261 256 ui,
262 257 repo,
263 258 config,
264 259 no_status,
265 260 &mut ds_status.added,
266 261 b"A",
267 262 )?;
268 263 }
269 264 if display_states.removed {
270 265 display_status_paths(
271 266 ui,
272 267 repo,
273 268 config,
274 269 no_status,
275 270 &mut ds_status.removed,
276 271 b"R",
277 272 )?;
278 273 }
279 274 if display_states.deleted {
280 275 display_status_paths(
281 276 ui,
282 277 repo,
283 278 config,
284 279 no_status,
285 280 &mut ds_status.deleted,
286 281 b"!",
287 282 )?;
288 283 }
289 284 if display_states.unknown {
290 285 display_status_paths(
291 286 ui,
292 287 repo,
293 288 config,
294 289 no_status,
295 290 &mut ds_status.unknown,
296 291 b"?",
297 292 )?;
298 293 }
299 294 if display_states.ignored {
300 295 display_status_paths(
301 296 ui,
302 297 repo,
303 298 config,
304 299 no_status,
305 300 &mut ds_status.ignored,
306 301 b"I",
307 302 )?;
308 303 }
309 304 if display_states.clean {
310 305 display_status_paths(
311 306 ui,
312 307 repo,
313 308 config,
314 309 no_status,
315 310 &mut ds_status.clean,
316 311 b"C",
317 312 )?;
318 313 }
319 314 Ok(())
320 315 }
321 316
322 317 // Probably more elegant to use a Deref or Borrow trait rather than
323 318 // harcode HgPathBuf, but probably not really useful at this point
324 319 fn display_status_paths(
325 320 ui: &Ui,
326 321 repo: &Repo,
327 322 config: &Config,
328 323 no_status: bool,
329 324 paths: &mut [HgPathCow],
330 325 status_prefix: &[u8],
331 326 ) -> Result<(), CommandError> {
332 327 paths.sort_unstable();
333 328 let mut relative: bool = config.get_bool(b"ui", b"relative-paths")?;
334 329 relative = config
335 330 .get_option(b"commands", b"status.relative")?
336 331 .unwrap_or(relative);
337 332 let print_path = |path: &[u8]| {
338 333 // TODO optim, probably lots of unneeded copies here, especially
339 334 // if out stream is buffered
340 335 if no_status {
341 336 ui.write_stdout(&format_bytes!(b"{}\n", path))
342 337 } else {
343 338 ui.write_stdout(&format_bytes!(b"{} {}\n", status_prefix, path))
344 339 }
345 340 };
346 341
347 342 if relative && !ui.plain() {
348 343 relativize_paths(repo, paths.iter().map(Ok), |path| {
349 344 print_path(&path)
350 345 })?;
351 346 } else {
352 347 for path in paths {
353 348 print_path(path.as_bytes())?
354 349 }
355 350 }
356 351 Ok(())
357 352 }
358 353
359 354 /// Check if a file is modified by comparing actual repo store and file system.
360 355 ///
361 356 /// This meant to be used for those that the dirstate cannot resolve, due
362 357 /// to time resolution limits.
363 358 fn unsure_is_modified(
364 359 repo: &Repo,
365 360 manifest: &Manifest,
366 361 hg_path: &HgPath,
367 362 ) -> Result<bool, HgError> {
368 363 let vfs = repo.working_directory_vfs();
369 364 let fs_path = hg_path_to_os_string(hg_path).expect("HgPath conversion");
370 365 let fs_metadata = vfs.symlink_metadata(&fs_path)?;
371 366 let is_symlink = fs_metadata.file_type().is_symlink();
372 // TODO: Also account for `FALLBACK_SYMLINK` and `FALLBACK_EXEC` from the dirstate
367 // TODO: Also account for `FALLBACK_SYMLINK` and `FALLBACK_EXEC` from the
368 // dirstate
373 369 let fs_flags = if is_symlink {
374 370 Some(b'l')
375 371 } else if has_exec_bit(&fs_metadata) {
376 372 Some(b'x')
377 373 } else {
378 374 None
379 375 };
380 376
381 377 let entry = manifest
382 378 .find_file(hg_path)?
383 379 .expect("ambgious file not in p1");
384 380 if entry.flags != fs_flags {
385 381 return Ok(true);
386 382 }
387 383 let filelog = repo.filelog(hg_path)?;
388 384 let filelog_entry =
389 385 filelog.data_for_node(entry.node_id()?).map_err(|_| {
390 386 HgError::corrupted("filelog missing node from manifest")
391 387 })?;
392 388 let contents_in_p1 = filelog_entry.data()?;
393 389
394 390 let fs_contents = if is_symlink {
395 391 get_bytes_from_os_string(vfs.read_link(fs_path)?.into_os_string())
396 392 } else {
397 393 vfs.read(fs_path)?
398 394 };
399 395 return Ok(contents_in_p1 != &*fs_contents);
400 396 }
General Comments 0
You need to be logged in to leave comments. Login now