upstream/mercurial-mirror Commit - r49112:50026041

dirstate: make sure that status does not overlook the fallback flags...

marmoute -

r49112:50026041 default

parent child

mercurial/dirstate.py

0 +12 -1

             # dirstate.py - working directory tracking for mercurial
             #
             # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             import contextlib
             import errno
             import os
             import stat
             from .i18n import _
             from .pycompat import delattr
             from hgdemandimport import tracing
             from . import (
                 dirstatemap,
                 encoding,
                 error,
                 match as matchmod,
                 pathutil,
                 policy,
                 pycompat,
                 scmutil,
                 sparse,
                 util,
             )
             from .dirstateutils import (
                 timestamp,
             )
             from .interfaces import (
                 dirstate as intdirstate,
                 util as interfaceutil,
             )
             parsers = policy.importmod('parsers')
             rustmod = policy.importrust('dirstate')
             HAS_FAST_DIRSTATE_V2 = rustmod is not None
             propertycache = util.propertycache
             filecache = scmutil.filecache
             _rangemask = dirstatemap.rangemask
             DirstateItem = dirstatemap.DirstateItem
             class repocache(filecache):
                 """filecache for files in .hg/"""
                 def join(self, obj, fname):
                     return obj._opener.join(fname)
             class rootcache(filecache):
                 """filecache for files in the repository root"""
                 def join(self, obj, fname):
                     return obj._join(fname)
             def _getfsnow(vfs):
                 '''Get "now" timestamp on filesystem'''
                 tmpfd, tmpname = vfs.mkstemp()
                 try:
                     return timestamp.mtime_of(os.fstat(tmpfd))
                 finally:
                     os.close(tmpfd)
                     vfs.unlink(tmpname)
             def requires_parents_change(func):
                 def wrap(self, *args, **kwargs):
                     if not self.pendingparentchange():
                         msg = 'calling `%s` outside of a parentchange context'
                         msg %= func.__name__
                         raise error.ProgrammingError(msg)
                     return func(self, *args, **kwargs)
                 return wrap
             def requires_no_parents_change(func):
                 def wrap(self, *args, **kwargs):
                     if self.pendingparentchange():
                         msg = 'calling `%s` inside of a parentchange context'
                         msg %= func.__name__
                         raise error.ProgrammingError(msg)
                     return func(self, *args, **kwargs)
                 return wrap
             @interfaceutil.implementer(intdirstate.idirstate)
             class dirstate(object):
                 def __init__(
                     self,
                     opener,
                     ui,
                     root,
                     validate,
                     sparsematchfn,
                     nodeconstants,
                     use_dirstate_v2,
                 ):
                     """Create a new dirstate object.
                     opener is an open()-like callable that can be used to open the
                     dirstate file; root is the root of the directory tracked by
                     the dirstate.
                     """
                     self._use_dirstate_v2 = use_dirstate_v2
                     self._nodeconstants = nodeconstants
                     self._opener = opener
                     self._validate = validate
                     self._root = root
                     self._sparsematchfn = sparsematchfn
                     # ntpath.join(root, '') of Python 2.7.9 does not add sep if root is
                     # UNC path pointing to root share (issue4557)
                     self._rootdir = pathutil.normasprefix(root)
                     self._dirty = False
                     self._lastnormaltime = timestamp.zero()
                     self._ui = ui
                     self._filecache = {}
                     self._parentwriters = 0
                     self._filename = b'dirstate'
                     self._pendingfilename = b'%s.pending' % self._filename
                     self._plchangecallbacks = {}
                     self._origpl = None
                     self._mapcls = dirstatemap.dirstatemap
                     # Access and cache cwd early, so we don't access it for the first time
                     # after a working-copy update caused it to not exist (accessing it then
                     # raises an exception).
                     self._cwd
                 def prefetch_parents(self):
                     """make sure the parents are loaded
                     Used to avoid a race condition.
                     """
                     self._pl
                 @contextlib.contextmanager
                 def parentchange(self):
                     """Context manager for handling dirstate parents.
                     If an exception occurs in the scope of the context manager,
                     the incoherent dirstate won't be written when wlock is
                     released.
                     """
                     self._parentwriters += 1
                     yield
                     # Typically we want the "undo" step of a context manager in a
                     # finally block so it happens even when an exception
                     # occurs. In this case, however, we only want to decrement
                     # parentwriters if the code in the with statement exits
                     # normally, so we don't have a try/finally here on purpose.
                     self._parentwriters -= 1
                 def pendingparentchange(self):
                     """Returns true if the dirstate is in the middle of a set of changes
                     that modify the dirstate parent.
                     """
                     return self._parentwriters > 0
                 @propertycache
                 def _map(self):
                     """Return the dirstate contents (see documentation for dirstatemap)."""
                     self._map = self._mapcls(
                         self._ui,
                         self._opener,
                         self._root,
                         self._nodeconstants,
                         self._use_dirstate_v2,
                     )
                     return self._map
                 @property
                 def _sparsematcher(self):
                     """The matcher for the sparse checkout.
                     The working directory may not include every file from a manifest. The
                     matcher obtained by this property will match a path if it is to be
                     included in the working directory.
                     """
                     # TODO there is potential to cache this property. For now, the matcher
                     # is resolved on every access. (But the called function does use a
                     # cache to keep the lookup fast.)
                     return self._sparsematchfn()
                 @repocache(b'branch')
                 def _branch(self):
                     try:
                         return self._opener.read(b"branch").strip() or b"default"
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         return b"default"
                 @property
                 def _pl(self):
                     return self._map.parents()
                 def hasdir(self, d):
                     return self._map.hastrackeddir(d)
                 @rootcache(b'.hgignore')
                 def _ignore(self):
                     files = self._ignorefiles()
                     if not files:
                         return matchmod.never()
                     pats = [b'include:%s' % f for f in files]
                     return matchmod.match(self._root, b'', [], pats, warn=self._ui.warn)
                 @propertycache
                 def _slash(self):
                     return self._ui.configbool(b'ui', b'slash') and pycompat.ossep != b'/'
                 @propertycache
                 def _checklink(self):
                     return util.checklink(self._root)
                 @propertycache
                 def _checkexec(self):
                     return bool(util.checkexec(self._root))
                 @propertycache
                 def _checkcase(self):
                     return not util.fscasesensitive(self._join(b'.hg'))
                 def _join(self, f):
                     # much faster than os.path.join()
                     # it's safe because f is always a relative path
                     return self._rootdir + f
                 def flagfunc(self, buildfallback):
                     """build a callable that returns flags associated with a filename
                     The information is extracted from three possible layers:
 . the file system if it supports the information
 . the "fallback" information stored in the dirstate if any
 . a more expensive mechanism inferring the flags from the parents.
                     """
                     # small hack to cache the result of buildfallback()
                     fallback_func = []
                     def get_flags(x):
                         entry = None
                         fallback_value = None
                         try:
                             st = os.lstat(self._join(x))
                         except OSError:
                             return b''
                         if self._checklink:
                             if util.statislink(st):
                                 return b'l'
                         else:
                             entry = self.get_entry(x)
                             if entry.has_fallback_symlink:
                                 if entry.fallback_symlink:
                                     return b'l'
                             else:
                                 if not fallback_func:
                                     fallback_func.append(buildfallback())
                                 fallback_value = fallback_func[0](x)
                                 if b'l' in fallback_value:
                                     return b'l'
                         if self._checkexec:
                             if util.statisexec(st):
                                 return b'x'
                         else:
                             if entry is None:
                                 entry = self.get_entry(x)
                             if entry.has_fallback_exec:
                                 if entry.fallback_exec:
                                     return b'x'
                             else:
                                 if fallback_value is None:
                                     if not fallback_func:
                                         fallback_func.append(buildfallback())
                                     fallback_value = fallback_func[0](x)
                                 if b'x' in fallback_value:
                                     return b'x'
                         return b''
                     return get_flags
                 @propertycache
                 def _cwd(self):
                     # internal config: ui.forcecwd
                     forcecwd = self._ui.config(b'ui', b'forcecwd')
                     if forcecwd:
                         return forcecwd
                     return encoding.getcwd()
                 def getcwd(self):
                     """Return the path from which a canonical path is calculated.
                     This path should be used to resolve file patterns or to convert
                     canonical paths back to file paths for display. It shouldn't be
                     used to get real file paths. Use vfs functions instead.
                     """
                     cwd = self._cwd
                     if cwd == self._root:
                         return b''
                     # self._root ends with a path separator if self._root is '/' or 'C:\'
                     rootsep = self._root
                     if not util.endswithsep(rootsep):
                         rootsep += pycompat.ossep
                     if cwd.startswith(rootsep):
                         return cwd[len(rootsep) :]
                     else:
                         # we're outside the repo. return an absolute path.
                         return cwd
                 def pathto(self, f, cwd=None):
                     if cwd is None:
                         cwd = self.getcwd()
                     path = util.pathto(self._root, cwd, f)
                     if self._slash:
                         return util.pconvert(path)
                     return path
                 def __getitem__(self, key):
                     """Return the current state of key (a filename) in the dirstate.
                     States are:
                       n  normal
                       m  needs merging
                       r  marked for removal
                       a  marked for addition
                       ?  not tracked
                     XXX The "state" is a bit obscure to be in the "public" API. we should
                     consider migrating all user of this to going through the dirstate entry
                     instead.
                     """
                     msg = b"don't use dirstate[file], use dirstate.get_entry(file)"
                     util.nouideprecwarn(msg, b'6.1', stacklevel=2)
                     entry = self._map.get(key)
                     if entry is not None:
                         return entry.state
                     return b'?'
                 def get_entry(self, path):
                     """return a DirstateItem for the associated path"""
                     entry = self._map.get(path)
                     if entry is None:
                         return DirstateItem()
                     return entry
                 def __contains__(self, key):
                     return key in self._map
                 def __iter__(self):
                     return iter(sorted(self._map))
                 def items(self):
                     return pycompat.iteritems(self._map)
                 iteritems = items
                 def parents(self):
                     return [self._validate(p) for p in self._pl]
                 def p1(self):
                     return self._validate(self._pl[0])
                 def p2(self):
                     return self._validate(self._pl[1])
                 @property
                 def in_merge(self):
                     """True if a merge is in progress"""
                     return self._pl[1] != self._nodeconstants.nullid
                 def branch(self):
                     return encoding.tolocal(self._branch)
                 def setparents(self, p1, p2=None):
                     """Set dirstate parents to p1 and p2.
                     When moving from two parents to one, "merged" entries a
                     adjusted to normal and previous copy records discarded and
                     returned by the call.
                     See localrepo.setparents()
                     """
                     if p2 is None:
                         p2 = self._nodeconstants.nullid
                     if self._parentwriters == 0:
                         raise ValueError(
                             b"cannot set dirstate parent outside of "
                             b"dirstate.parentchange context manager"
                         )
                     self._dirty = True
                     oldp2 = self._pl[1]
                     if self._origpl is None:
                         self._origpl = self._pl
                     nullid = self._nodeconstants.nullid
                     # True if we need to fold p2 related state back to a linear case
                     fold_p2 = oldp2 != nullid and p2 == nullid
                     return self._map.setparents(p1, p2, fold_p2=fold_p2)
                 def setbranch(self, branch):
                     self.__class__._branch.set(self, encoding.fromlocal(branch))
                     f = self._opener(b'branch', b'w', atomictemp=True, checkambig=True)
                     try:
                         f.write(self._branch + b'\n')
                         f.close()
                         # make sure filecache has the correct stat info for _branch after
                         # replacing the underlying file
                         ce = self._filecache[b'_branch']
                         if ce:
                             ce.refresh()
                     except:  # re-raises
                         f.discard()
                         raise
                 def invalidate(self):
                     """Causes the next access to reread the dirstate.
                     This is different from localrepo.invalidatedirstate() because it always
                     rereads the dirstate. Use localrepo.invalidatedirstate() if you want to
                     check whether the dirstate has changed before rereading it."""
                     for a in ("_map", "_branch", "_ignore"):
                         if a in self.__dict__:
                             delattr(self, a)
                     self._lastnormaltime = timestamp.zero()
                     self._dirty = False
                     self._parentwriters = 0
                     self._origpl = None
                 def copy(self, source, dest):
                     """Mark dest as a copy of source. Unmark dest if source is None."""
                     if source == dest:
                         return
                     self._dirty = True
                     if source is not None:
                         self._map.copymap[dest] = source
                     else:
                         self._map.copymap.pop(dest, None)
                 def copied(self, file):
                     return self._map.copymap.get(file, None)
                 def copies(self):
                     return self._map.copymap
                 @requires_no_parents_change
                 def set_tracked(self, filename):
                     """a "public" method for generic code to mark a file as tracked
                     This function is to be called outside of "update/merge" case. For
                     example by a command like `hg add X`.
                     return True the file was previously untracked, False otherwise.
                     """
                     self._dirty = True
                     entry = self._map.get(filename)
                     if entry is None or not entry.tracked:
                         self._check_new_tracked_filename(filename)
                     return self._map.set_tracked(filename)
                 @requires_no_parents_change
                 def set_untracked(self, filename):
                     """a "public" method for generic code to mark a file as untracked
                     This function is to be called outside of "update/merge" case. For
                     example by a command like `hg remove X`.
                     return True the file was previously tracked, False otherwise.
                     """
                     ret = self._map.set_untracked(filename)
                     if ret:
                         self._dirty = True
                     return ret
                 @requires_no_parents_change
                 def set_clean(self, filename, parentfiledata=None):
                     """record that the current state of the file on disk is known to be clean"""
                     self._dirty = True
                     if parentfiledata:
                         (mode, size, mtime) = parentfiledata
                     else:
                         (mode, size, mtime) = self._get_filedata(filename)
                     if not self._map[filename].tracked:
                         self._check_new_tracked_filename(filename)
                     self._map.set_clean(filename, mode, size, mtime)
                     if mtime > self._lastnormaltime:
                         # Remember the most recent modification timeslot for status(),
                         # to make sure we won't miss future size-preserving file content
                         # modifications that happen within the same timeslot.
                         self._lastnormaltime = mtime
                 @requires_no_parents_change
                 def set_possibly_dirty(self, filename):
                     """record that the current state of the file on disk is unknown"""
                     self._dirty = True
                     self._map.set_possibly_dirty(filename)
                 @requires_parents_change
                 def update_file_p1(
                     self,
                     filename,
                     p1_tracked,
                 ):
                     """Set a file as tracked in the parent (or not)
                     This is to be called when adjust the dirstate to a new parent after an history
                     rewriting operation.
                     It should not be called during a merge (p2 != nullid) and only within
                     a `with dirstate.parentchange():` context.
                     """
                     if self.in_merge:
                         msg = b'update_file_reference should not be called when merging'
                         raise error.ProgrammingError(msg)
                     entry = self._map.get(filename)
                     if entry is None:
                         wc_tracked = False
                     else:
                         wc_tracked = entry.tracked
                     if not (p1_tracked or wc_tracked):
                         # the file is no longer relevant to anyone
                         if self._map.get(filename) is not None:
                             self._map.reset_state(filename)
                             self._dirty = True
                     elif (not p1_tracked) and wc_tracked:
                         if entry is not None and entry.added:
                             return  # avoid dropping copy information (maybe?)
                     parentfiledata = None
                     if wc_tracked and p1_tracked:
                         parentfiledata = self._get_filedata(filename)
                     self._map.reset_state(
                         filename,
                         wc_tracked,
                         p1_tracked,
                         # the underlying reference might have changed, we will have to
                         # check it.
                         has_meaningful_mtime=False,
                         parentfiledata=parentfiledata,
                     )
                     if (
                         parentfiledata is not None
                         and parentfiledata[2] > self._lastnormaltime
                     ):
                         # Remember the most recent modification timeslot for status(),
                         # to make sure we won't miss future size-preserving file content
                         # modifications that happen within the same timeslot.
                         self._lastnormaltime = parentfiledata[2]
                 @requires_parents_change
                 def update_file(
                     self,
                     filename,
                     wc_tracked,
                     p1_tracked,
                     p2_info=False,
                     possibly_dirty=False,
                     parentfiledata=None,
                 ):
                     """update the information about a file in the dirstate
                     This is to be called when the direstates parent changes to keep track
                     of what is the file situation in regards to the working copy and its parent.
                     This function must be called within a `dirstate.parentchange` context.
                     note: the API is at an early stage and we might need to adjust it
                     depending of what information ends up being relevant and useful to
                     other processing.
                     """
                     # note: I do not think we need to double check name clash here since we
                     # are in a update/merge case that should already have taken care of
                     # this. The test agrees
                     self._dirty = True
                     need_parent_file_data = (
                         not possibly_dirty and not p2_info and wc_tracked and p1_tracked
                     )
                     if need_parent_file_data and parentfiledata is None:
                         parentfiledata = self._get_filedata(filename)
                     self._map.reset_state(
                         filename,
                         wc_tracked,
                         p1_tracked,
                         p2_info=p2_info,
                         has_meaningful_mtime=not possibly_dirty,
                         parentfiledata=parentfiledata,
                     )
                     if (
                         parentfiledata is not None
                         and parentfiledata[2] > self._lastnormaltime
                     ):
                         # Remember the most recent modification timeslot for status(),
                         # to make sure we won't miss future size-preserving file content
                         # modifications that happen within the same timeslot.
                         self._lastnormaltime = parentfiledata[2]
                 def _check_new_tracked_filename(self, filename):
                     scmutil.checkfilename(filename)
                     if self._map.hastrackeddir(filename):
                         msg = _(b'directory %r already in dirstate')
                         msg %= pycompat.bytestr(filename)
                         raise error.Abort(msg)
                     # shadows
                     for d in pathutil.finddirs(filename):
                         if self._map.hastrackeddir(d):
                             break
                         entry = self._map.get(d)
                         if entry is not None and not entry.removed:
                             msg = _(b'file %r in dirstate clashes with %r')
                             msg %= (pycompat.bytestr(d), pycompat.bytestr(filename))
                             raise error.Abort(msg)
                 def _get_filedata(self, filename):
                     """returns"""
                     s = os.lstat(self._join(filename))
                     mode = s.st_mode
                     size = s.st_size
                     mtime = timestamp.mtime_of(s)
                     return (mode, size, mtime)
                 def _discoverpath(self, path, normed, ignoremissing, exists, storemap):
                     if exists is None:
                         exists = os.path.lexists(os.path.join(self._root, path))
                     if not exists:
                         # Maybe a path component exists
                         if not ignoremissing and b'/' in path:
                             d, f = path.rsplit(b'/', 1)
                             d = self._normalize(d, False, ignoremissing, None)
                             folded = d + b"/" + f
                         else:
                             # No path components, preserve original case
                             folded = path
                     else:
                         # recursively normalize leading directory components
                         # against dirstate
                         if b'/' in normed:
                             d, f = normed.rsplit(b'/', 1)
                             d = self._normalize(d, False, ignoremissing, True)
                             r = self._root + b"/" + d
                             folded = d + b"/" + util.fspath(f, r)
                         else:
                             folded = util.fspath(normed, self._root)
                         storemap[normed] = folded
                     return folded
                 def _normalizefile(self, path, isknown, ignoremissing=False, exists=None):
                     normed = util.normcase(path)
                     folded = self._map.filefoldmap.get(normed, None)
                     if folded is None:
                         if isknown:
                             folded = path
                         else:
                             folded = self._discoverpath(
                                 path, normed, ignoremissing, exists, self._map.filefoldmap
                             )
                     return folded
                 def _normalize(self, path, isknown, ignoremissing=False, exists=None):
                     normed = util.normcase(path)
                     folded = self._map.filefoldmap.get(normed, None)
                     if folded is None:
                         folded = self._map.dirfoldmap.get(normed, None)
                     if folded is None:
                         if isknown:
                             folded = path
                         else:
                             # store discovered result in dirfoldmap so that future
                             # normalizefile calls don't start matching directories
                             folded = self._discoverpath(
                                 path, normed, ignoremissing, exists, self._map.dirfoldmap
                             )
                     return folded
                 def normalize(self, path, isknown=False, ignoremissing=False):
                     """
                     normalize the case of a pathname when on a casefolding filesystem
                     isknown specifies whether the filename came from walking the
                     disk, to avoid extra filesystem access.
                     If ignoremissing is True, missing path are returned
                     unchanged. Otherwise, we try harder to normalize possibly
                     existing path components.
                     The normalized case is determined based on the following precedence:
                     - version of name already stored in the dirstate
                     - version of name stored on disk
                     - version provided via command arguments
                     """
                     if self._checkcase:
                         return self._normalize(path, isknown, ignoremissing)
                     return path
                 def clear(self):
                     self._map.clear()
                     self._lastnormaltime = timestamp.zero()
                     self._dirty = True
                 def rebuild(self, parent, allfiles, changedfiles=None):
                     if changedfiles is None:
                         # Rebuild entire dirstate
                         to_lookup = allfiles
                         to_drop = []
                         lastnormaltime = self._lastnormaltime
                         self.clear()
                         self._lastnormaltime = lastnormaltime
                     elif len(changedfiles) < 10:
                         # Avoid turning allfiles into a set, which can be expensive if it's
                         # large.
                         to_lookup = []
                         to_drop = []
                         for f in changedfiles:
                             if f in allfiles:
                                 to_lookup.append(f)
                             else:
                                 to_drop.append(f)
                     else:
                         changedfilesset = set(changedfiles)
                         to_lookup = changedfilesset & set(allfiles)
                         to_drop = changedfilesset - to_lookup
                     if self._origpl is None:
                         self._origpl = self._pl
                     self._map.setparents(parent, self._nodeconstants.nullid)
                     for f in to_lookup:
                         if self.in_merge:
                             self.set_tracked(f)
                         else:
                             self._map.reset_state(
                                 f,
                                 wc_tracked=True,
                                 p1_tracked=True,
                             )
                     for f in to_drop:
                         self._map.reset_state(f)
                     self._dirty = True
                 def identity(self):
                     """Return identity of dirstate itself to detect changing in storage
                     If identity of previous dirstate is equal to this, writing
                     changes based on the former dirstate out can keep consistency.
                     """
                     return self._map.identity
                 def write(self, tr):
                     if not self._dirty:
                         return
                     filename = self._filename
                     if tr:
                         # 'dirstate.write()' is not only for writing in-memory
                         # changes out, but also for dropping ambiguous timestamp.
                         # delayed writing re-raise "ambiguous timestamp issue".
                         # See also the wiki page below for detail:
                         # https://www.mercurial-scm.org/wiki/DirstateTransactionPlan
                         # record when mtime start to be ambiguous
                         now = _getfsnow(self._opener)
                         # delay writing in-memory changes out
                         tr.addfilegenerator(
                             b'dirstate',
                             (self._filename,),
                             lambda f: self._writedirstate(tr, f, now=now),
                             location=b'plain',
                         )
                         return
                     st = self._opener(filename, b"w", atomictemp=True, checkambig=True)
                     self._writedirstate(tr, st)
                 def addparentchangecallback(self, category, callback):
                     """add a callback to be called when the wd parents are changed
                     Callback will be called with the following arguments:
                         dirstate, (oldp1, oldp2), (newp1, newp2)
                     Category is a unique identifier to allow overwriting an old callback
                     with a newer callback.
                     """
                     self._plchangecallbacks[category] = callback
                 def _writedirstate(self, tr, st, now=None):
                     # notify callbacks about parents change
                     if self._origpl is not None and self._origpl != self._pl:
                         for c, callback in sorted(
                             pycompat.iteritems(self._plchangecallbacks)
                         ):
                             callback(self, self._origpl, self._pl)
                         self._origpl = None
                     if now is None:
                         # use the modification time of the newly created temporary file as the
                         # filesystem's notion of 'now'
                         now = timestamp.mtime_of(util.fstat(st))
                     # enough 'delaywrite' prevents 'pack_dirstate' from dropping
                     # timestamp of each entries in dirstate, because of 'now > mtime'
                     delaywrite = self._ui.configint(b'debug', b'dirstate.delaywrite')
                     if delaywrite > 0:
                         # do we have any files to delay for?
                         for f, e in pycompat.iteritems(self._map):
                             if e.need_delay(now):
                                 import time  # to avoid useless import
                                 # rather than sleep n seconds, sleep until the next
                                 # multiple of n seconds
                                 clock = time.time()
                                 start = int(clock) - (int(clock) % delaywrite)
                                 end = start + delaywrite
                                 time.sleep(end - clock)
                                 # trust our estimate that the end is near now
                                 now = timestamp.timestamp((end, 0))
                                 break
                     self._map.write(tr, st, now)
                     self._lastnormaltime = timestamp.zero()
                     self._dirty = False
                 def _dirignore(self, f):
                     if self._ignore(f):
                         return True
                     for p in pathutil.finddirs(f):
                         if self._ignore(p):
                             return True
                     return False
                 def _ignorefiles(self):
                     files = []
                     if os.path.exists(self._join(b'.hgignore')):
                         files.append(self._join(b'.hgignore'))
                     for name, path in self._ui.configitems(b"ui"):
                         if name == b'ignore' or name.startswith(b'ignore.'):
                             # we need to use os.path.join here rather than self._join
                             # because path is arbitrary and user-specified
                             files.append(os.path.join(self._rootdir, util.expandpath(path)))
                     return files
                 def _ignorefileandline(self, f):
                     files = collections.deque(self._ignorefiles())
                     visited = set()
                     while files:
                         i = files.popleft()
                         patterns = matchmod.readpatternfile(
                             i, self._ui.warn, sourceinfo=True
                         )
                         for pattern, lineno, line in patterns:
                             kind, p = matchmod._patsplit(pattern, b'glob')
                             if kind == b"subinclude":
                                 if p not in visited:
                                     files.append(p)
                                 continue
                             m = matchmod.match(
                                 self._root, b'', [], [pattern], warn=self._ui.warn
                             )
                             if m(f):
                                 return (i, lineno, line)
                         visited.add(i)
                     return (None, -1, b"")
                 def _walkexplicit(self, match, subrepos):
                     """Get stat data about the files explicitly specified by match.
                     Return a triple (results, dirsfound, dirsnotfound).
                     - results is a mapping from filename to stat result. It also contains
                       listings mapping subrepos and .hg to None.
                     - dirsfound is a list of files found to be directories.
                     - dirsnotfound is a list of files that the dirstate thinks are
                       directories and that were not found."""
                     def badtype(mode):
                         kind = _(b'unknown')
                         if stat.S_ISCHR(mode):
                             kind = _(b'character device')
                         elif stat.S_ISBLK(mode):
                             kind = _(b'block device')
                         elif stat.S_ISFIFO(mode):
                             kind = _(b'fifo')
                         elif stat.S_ISSOCK(mode):
                             kind = _(b'socket')
                         elif stat.S_ISDIR(mode):
                             kind = _(b'directory')
                         return _(b'unsupported file type (type is %s)') % kind
                     badfn = match.bad
                     dmap = self._map
                     lstat = os.lstat
                     getkind = stat.S_IFMT
                     dirkind = stat.S_IFDIR
                     regkind = stat.S_IFREG
                     lnkkind = stat.S_IFLNK
                     join = self._join
                     dirsfound = []
                     foundadd = dirsfound.append
                     dirsnotfound = []
                     notfoundadd = dirsnotfound.append
                     if not match.isexact() and self._checkcase:
                         normalize = self._normalize
                     else:
                         normalize = None
                     files = sorted(match.files())
                     subrepos.sort()
                     i, j = 0, 0
                     while i < len(files) and j < len(subrepos):
                         subpath = subrepos[j] + b"/"
                         if files[i] < subpath:
                             i += 1
                             continue
                         while i < len(files) and files[i].startswith(subpath):
                             del files[i]
                         j += 1
                     if not files or b'' in files:
                         files = [b'']
                         # constructing the foldmap is expensive, so don't do it for the
                         # common case where files is ['']
                         normalize = None
                     results = dict.fromkeys(subrepos)
                     results[b'.hg'] = None
                     for ff in files:
                         if normalize:
                             nf = normalize(ff, False, True)
                         else:
                             nf = ff
                         if nf in results:
                             continue
                         try:
                             st = lstat(join(nf))
                             kind = getkind(st.st_mode)
                             if kind == dirkind:
                                 if nf in dmap:
                                     # file replaced by dir on disk but still in dirstate
                                     results[nf] = None
                                 foundadd((nf, ff))
                             elif kind == regkind or kind == lnkkind:
                                 results[nf] = st
                             else:
                                 badfn(ff, badtype(kind))
                                 if nf in dmap:
                                     results[nf] = None
                         except OSError as inst:  # nf not found on disk - it is dirstate only
                             if nf in dmap:  # does it exactly match a missing file?
                                 results[nf] = None
                             else:  # does it match a missing directory?
                                 if self._map.hasdir(nf):
                                     notfoundadd(nf)
                                 else:
                                     badfn(ff, encoding.strtolocal(inst.strerror))
                     # match.files() may contain explicitly-specified paths that shouldn't
                     # be taken; drop them from the list of files found. dirsfound/notfound
                     # aren't filtered here because they will be tested later.
                     if match.anypats():
                         for f in list(results):
                             if f == b'.hg' or f in subrepos:
                                 # keep sentinel to disable further out-of-repo walks
                                 continue
                             if not match(f):
                                 del results[f]
                     # Case insensitive filesystems cannot rely on lstat() failing to detect
                     # a case-only rename.  Prune the stat object for any file that does not
                     # match the case in the filesystem, if there are multiple files that
                     # normalize to the same path.
                     if match.isexact() and self._checkcase:
                         normed = {}
                         for f, st in pycompat.iteritems(results):
                             if st is None:
                                 continue
                             nc = util.normcase(f)
                             paths = normed.get(nc)
                             if paths is None:
                                 paths = set()
                                 normed[nc] = paths
                             paths.add(f)
                         for norm, paths in pycompat.iteritems(normed):
                             if len(paths) > 1:
                                 for path in paths:
                                     folded = self._discoverpath(
                                         path, norm, True, None, self._map.dirfoldmap
                                     )
                                     if path != folded:
                                         results[path] = None
                     return results, dirsfound, dirsnotfound
                 def walk(self, match, subrepos, unknown, ignored, full=True):
                     """
                     Walk recursively through the directory tree, finding all files
                     matched by match.
                     If full is False, maybe skip some known-clean files.
                     Return a dict mapping filename to stat-like object (either
                     mercurial.osutil.stat instance or return value of os.stat()).
                     """
                     # full is a flag that extensions that hook into walk can use -- this
                     # implementation doesn't use it at all. This satisfies the contract
                     # because we only guarantee a "maybe".
                     if ignored:
                         ignore = util.never
                         dirignore = util.never
                     elif unknown:
                         ignore = self._ignore
                         dirignore = self._dirignore
                     else:
                         # if not unknown and not ignored, drop dir recursion and step 2
                         ignore = util.always
                         dirignore = util.always
                     matchfn = match.matchfn
                     matchalways = match.always()
                     matchtdir = match.traversedir
                     dmap = self._map
                     listdir = util.listdir
                     lstat = os.lstat
                     dirkind = stat.S_IFDIR
                     regkind = stat.S_IFREG
                     lnkkind = stat.S_IFLNK
                     join = self._join
                     exact = skipstep3 = False
                     if match.isexact():  # match.exact
                         exact = True
                         dirignore = util.always  # skip step 2
                     elif match.prefix():  # match.match, no patterns
                         skipstep3 = True
                     if not exact and self._checkcase:
                         normalize = self._normalize
                         normalizefile = self._normalizefile
                         skipstep3 = False
                     else:
                         normalize = self._normalize
                         normalizefile = None
                     # step 1: find all explicit files
                     results, work, dirsnotfound = self._walkexplicit(match, subrepos)
                     if matchtdir:
                         for d in work:
                             matchtdir(d[0])
                         for d in dirsnotfound:
                             matchtdir(d)
                     skipstep3 = skipstep3 and not (work or dirsnotfound)
                     work = [d for d in work if not dirignore(d[0])]
                     # step 2: visit subdirectories
                     def traverse(work, alreadynormed):
                         wadd = work.append
                         while work:
                             tracing.counter('dirstate.walk work', len(work))
                             nd = work.pop()
                             visitentries = match.visitchildrenset(nd)
                             if not visitentries:
                                 continue
                             if visitentries == b'this' or visitentries == b'all':
                                 visitentries = None
                             skip = None
                             if nd != b'':
                                 skip = b'.hg'
                             try:
                                 with tracing.log('dirstate.walk.traverse listdir %s', nd):
                                     entries = listdir(join(nd), stat=True, skip=skip)
                             except OSError as inst:
                                 if inst.errno in (errno.EACCES, errno.ENOENT):
                                     match.bad(
                                         self.pathto(nd), encoding.strtolocal(inst.strerror)
                                     )
                                     continue
                                 raise
                             for f, kind, st in entries:
                                 # Some matchers may return files in the visitentries set,
                                 # instead of 'this', if the matcher explicitly mentions them
                                 # and is not an exactmatcher. This is acceptable; we do not
                                 # make any hard assumptions about file-or-directory below
                                 # based on the presence of `f` in visitentries. If
                                 # visitchildrenset returned a set, we can always skip the
                                 # entries *not* in the set it provided regardless of whether
                                 # they're actually a file or a directory.
                                 if visitentries and f not in visitentries:
                                     continue
                                 if normalizefile:
                                     # even though f might be a directory, we're only
                                     # interested in comparing it to files currently in the
                                     # dmap -- therefore normalizefile is enough
                                     nf = normalizefile(
                                         nd and (nd + b"/" + f) or f, True, True
                                     )
                                 else:
                                     nf = nd and (nd + b"/" + f) or f
                                 if nf not in results:
                                     if kind == dirkind:
                                         if not ignore(nf):
                                             if matchtdir:
                                                 matchtdir(nf)
                                             wadd(nf)
                                         if nf in dmap and (matchalways or matchfn(nf)):
                                             results[nf] = None
                                     elif kind == regkind or kind == lnkkind:
                                         if nf in dmap:
                                             if matchalways or matchfn(nf):
                                                 results[nf] = st
                                         elif (matchalways or matchfn(nf)) and not ignore(
                                             nf
                                         ):
                                             # unknown file -- normalize if necessary
                                             if not alreadynormed:
                                                 nf = normalize(nf, False, True)
                                             results[nf] = st
                                     elif nf in dmap and (matchalways or matchfn(nf)):
                                         results[nf] = None
                     for nd, d in work:
                         # alreadynormed means that processwork doesn't have to do any
                         # expensive directory normalization
                         alreadynormed = not normalize or nd == d
                         traverse([d], alreadynormed)
                     for s in subrepos:
                         del results[s]
                     del results[b'.hg']
                     # step 3: visit remaining files from dmap
                     if not skipstep3 and not exact:
                         # If a dmap file is not in results yet, it was either
                         # a) not matching matchfn b) ignored, c) missing, or d) under a
                         # symlink directory.
                         if not results and matchalways:
                             visit = [f for f in dmap]
                         else:
                             visit = [f for f in dmap if f not in results and matchfn(f)]
                         visit.sort()
                         if unknown:
                             # unknown == True means we walked all dirs under the roots
                             # that wasn't ignored, and everything that matched was stat'ed
                             # and is already in results.
                             # The rest must thus be ignored or under a symlink.
                             audit_path = pathutil.pathauditor(self._root, cached=True)
                             for nf in iter(visit):
                                 # If a stat for the same file was already added with a
                                 # different case, don't add one for this, since that would
                                 # make it appear as if the file exists under both names
                                 # on disk.
                                 if (
                                     normalizefile
                                     and normalizefile(nf, True, True) in results
                                 ):
                                     results[nf] = None
                                 # Report ignored items in the dmap as long as they are not
                                 # under a symlink directory.
                                 elif audit_path.check(nf):
                                     try:
                                         results[nf] = lstat(join(nf))
                                         # file was just ignored, no links, and exists
                                     except OSError:
                                         # file doesn't exist
                                         results[nf] = None
                                 else:
                                     # It's either missing or under a symlink directory
                                     # which we in this case report as missing
                                     results[nf] = None
                         else:
                             # We may not have walked the full directory tree above,
                             # so stat and check everything we missed.
                             iv = iter(visit)
                             for st in util.statfiles([join(i) for i in visit]):
                                 results[next(iv)] = st
                     return results
                 def _rust_status(self, matcher, list_clean, list_ignored, list_unknown):
                     # Force Rayon (Rust parallelism library) to respect the number of
                     # workers. This is a temporary workaround until Rust code knows
                     # how to read the config file.
                     numcpus = self._ui.configint(b"worker", b"numcpus")
                     if numcpus is not None:
                         encoding.environ.setdefault(b'RAYON_NUM_THREADS', b'%d' % numcpus)
                     workers_enabled = self._ui.configbool(b"worker", b"enabled", True)
                     if not workers_enabled:
                         encoding.environ[b"RAYON_NUM_THREADS"] = b"1"
                     (
                         lookup,
                         modified,
                         added,
                         removed,
                         deleted,
                         clean,
                         ignored,
                         unknown,
                         warnings,
                         bad,
                         traversed,
                         dirty,
                     ) = rustmod.status(
                         self._map._map,
                         matcher,
                         self._rootdir,
                         self._ignorefiles(),
                         self._checkexec,
                         self._lastnormaltime,
                         bool(list_clean),
                         bool(list_ignored),
                         bool(list_unknown),
                         bool(matcher.traversedir),
                     )
                     self._dirty |= dirty
                     if matcher.traversedir:
                         for dir in traversed:
                             matcher.traversedir(dir)
                     if self._ui.warn:
                         for item in warnings:
                             if isinstance(item, tuple):
                                 file_path, syntax = item
                                 msg = _(b"%s: ignoring invalid syntax '%s'\n") % (
                                     file_path,
                                     syntax,
                                 )
                                 self._ui.warn(msg)
                             else:
                                 msg = _(b"skipping unreadable pattern file '%s': %s\n")
                                 self._ui.warn(
                                     msg
                                     % (
                                         pathutil.canonpath(
                                             self._rootdir, self._rootdir, item
                                         ),
                                         b"No such file or directory",
                                     )
                                 )
                     for (fn, message) in bad:
                         matcher.bad(fn, encoding.strtolocal(message))
                     status = scmutil.status(
                         modified=modified,
                         added=added,
                         removed=removed,
                         deleted=deleted,
                         unknown=unknown,
                         ignored=ignored,
                         clean=clean,
                     )
                     return (lookup, status)
                 def status(self, match, subrepos, ignored, clean, unknown):
                     """Determine the status of the working copy relative to the
                     dirstate and return a pair of (unsure, status), where status is of type
                     scmutil.status and:
                       unsure:
                         files that might have been modified since the dirstate was
                         written, but need to be read to be sure (size is the same
                         but mtime differs)
                       status.modified:
                         files that have definitely been modified since the dirstate
                         was written (different size or mode)
                       status.clean:
                         files that have definitely not been modified since the
                         dirstate was written
                     """
                     listignored, listclean, listunknown = ignored, clean, unknown
                     lookup, modified, added, unknown, ignored = [], [], [], [], []
                     removed, deleted, clean = [], [], []
                     dmap = self._map
                     dmap.preload()
                     use_rust = True
                     allowed_matchers = (
                         matchmod.alwaysmatcher,
                         matchmod.exactmatcher,
                         matchmod.includematcher,
                     )
                     if rustmod is None:
                         use_rust = False
                     elif self._checkcase:
                         # Case-insensitive filesystems are not handled yet
                         use_rust = False
                     elif subrepos:
                         use_rust = False
                     elif sparse.enabled:
                         use_rust = False
                     elif not isinstance(match, allowed_matchers):
                         # Some matchers have yet to be implemented
                         use_rust = False
                     if use_rust:
                         try:
                             return self._rust_status(
                                 match, listclean, listignored, listunknown
                             )
                         except rustmod.FallbackError:
                             pass
                     def noop(f):
                         pass
                     dcontains = dmap.__contains__
                     dget = dmap.__getitem__
                     ladd = lookup.append  # aka "unsure"
                     madd = modified.append
                     aadd = added.append
                     uadd = unknown.append if listunknown else noop
                     iadd = ignored.append if listignored else noop
                     radd = removed.append
                     dadd = deleted.append
                     cadd = clean.append if listclean else noop
                     mexact = match.exact
                     dirignore = self._dirignore
                     checkexec = self._checkexec
+                    checklink = self._checklink
                     copymap = self._map.copymap
                     lastnormaltime = self._lastnormaltime
                     # We need to do full walks when either
                     # - we're listing all clean files, or
                     # - match.traversedir does something, because match.traversedir should
                     #   be called for every dir in the working dir
                     full = listclean or match.traversedir is not None
                     for fn, st in pycompat.iteritems(
                         self.walk(match, subrepos, listunknown, listignored, full=full)
                     ):
                         if not dcontains(fn):
                             if (listignored or mexact(fn)) and dirignore(fn):
                                 if listignored:
                                     iadd(fn)
                             else:
                                 uadd(fn)
                             continue
                         t = dget(fn)
                         mode = t.mode
                         size = t.size
                         if not st and t.tracked:
                             dadd(fn)
                         elif t.p2_info:
                             madd(fn)
                         elif t.added:
                             aadd(fn)
                         elif t.removed:
                             radd(fn)
                         elif t.tracked:
-                            if (
+                            if not checklink and t.has_fallback_symlink:
+                                # If the file system does not support symlink, the mode
+                                # might not be correctly stored in the dirstate, so do not
+                                # trust it.
+                                ladd(fn)
+                            elif not checkexec and t.has_fallback_exec:
+                                # If the file system does not support exec bits, the mode
+                                # might not be correctly stored in the dirstate, so do not
+                                # trust it.
+                                ladd(fn)
+                            elif (
                                 size >= 0
                                 and (
                                     (size != st.st_size and size != st.st_size & _rangemask)
                                     or ((mode ^ st.st_mode) & 0o100 and checkexec)
                                 )
                                 or fn in copymap
                             ):
                                 if stat.S_ISLNK(st.st_mode) and size != st.st_size:
                                     # issue6456: Size returned may be longer due to
                                     # encryption on EXT-4 fscrypt, undecided.
                                     ladd(fn)
                                 else:
                                     madd(fn)
                             elif not t.mtime_likely_equal_to(timestamp.mtime_of(st)):
                                 ladd(fn)
                             elif timestamp.mtime_of(st) == lastnormaltime:
                                 # fn may have just been marked as normal and it may have
                                 # changed in the same second without changing its size.
                                 # This can happen if we quickly do multiple commits.
                                 # Force lookup, so we don't miss such a racy file change.
                                 ladd(fn)
                             elif listclean:
                                 cadd(fn)
                     status = scmutil.status(
                         modified, added, removed, deleted, unknown, ignored, clean
                     )
                     return (lookup, status)
                 def matches(self, match):
                     """
                     return files in the dirstate (in whatever state) filtered by match
                     """
                     dmap = self._map
                     if rustmod is not None:
                         dmap = self._map._map
                     if match.always():
                         return dmap.keys()
                     files = match.files()
                     if match.isexact():
                         # fast path -- filter the other way around, since typically files is
                         # much smaller than dmap
                         return [f for f in files if f in dmap]
                     if match.prefix() and all(fn in dmap for fn in files):
                         # fast path -- all the values are known to be files, so just return
                         # that
                         return list(files)
                     return [f for f in dmap if match(f)]
                 def _actualfilename(self, tr):
                     if tr:
                         return self._pendingfilename
                     else:
                         return self._filename
                 def savebackup(self, tr, backupname):
                     '''Save current dirstate into backup file'''
                     filename = self._actualfilename(tr)
                     assert backupname != filename
                     # use '_writedirstate' instead of 'write' to write changes certainly,
                     # because the latter omits writing out if transaction is running.
                     # output file will be used to create backup of dirstate at this point.
                     if self._dirty or not self._opener.exists(filename):
                         self._writedirstate(
                             tr,
                             self._opener(filename, b"w", atomictemp=True, checkambig=True),
                         )
                     if tr:
                         # ensure that subsequent tr.writepending returns True for
                         # changes written out above, even if dirstate is never
                         # changed after this
                         tr.addfilegenerator(
                             b'dirstate',
                             (self._filename,),
                             lambda f: self._writedirstate(tr, f),
                             location=b'plain',
                         )
                         # ensure that pending file written above is unlinked at
                         # failure, even if tr.writepending isn't invoked until the
                         # end of this transaction
                         tr.registertmp(filename, location=b'plain')
                     self._opener.tryunlink(backupname)
                     # hardlink backup is okay because _writedirstate is always called
                     # with an "atomictemp=True" file.
                     util.copyfile(
                         self._opener.join(filename),
                         self._opener.join(backupname),
                         hardlink=True,
                     )
                 def restorebackup(self, tr, backupname):
                     '''Restore dirstate by backup file'''
                     # this "invalidate()" prevents "wlock.release()" from writing
                     # changes of dirstate out after restoring from backup file
                     self.invalidate()
                     filename = self._actualfilename(tr)
                     o = self._opener
                     if util.samefile(o.join(backupname), o.join(filename)):
                         o.unlink(backupname)
                     else:
                         o.rename(backupname, filename, checkambig=True)
                 def clearbackup(self, tr, backupname):
                     '''Clear backup file'''
                     self._opener.unlink(backupname)
                 def verify(self, m1, m2):
                     """check the dirstate content again the parent manifest and yield errors"""
                     missing_from_p1 = b"%s in state %s, but not in manifest1\n"
                     unexpected_in_p1 = b"%s in state %s, but also in manifest1\n"
                     missing_from_ps = b"%s in state %s, but not in either manifest\n"
                     missing_from_ds = b"%s in manifest1, but listed as state %s\n"
                     for f, entry in self.items():
                         state = entry.state
                         if state in b"nr" and f not in m1:
                             yield (missing_from_p1, f, state)
                         if state in b"a" and f in m1:
                             yield (unexpected_in_p1, f, state)
                         if state in b"m" and f not in m1 and f not in m2:
                             yield (missing_from_ps, f, state)
                     for f in m1:
                         state = self.get_entry(f).state
                         if state not in b"nrm":
                             yield (missing_from_ds, f, state)

mercurial/helptext/internals/dirstate-v2.txt

0 +6 0

             The *dirstate* is what Mercurial uses internally to track
             the state of files in the working directory,
             such as set by commands like `hg add` and `hg rm`.
             It also contains some cached data that help make `hg status` faster.
             The name refers both to `.hg/dirstate` on the filesystem
             and the corresponding data structure in memory while a Mercurial process
             is running.
             The original file format, retroactively dubbed `dirstate-v1`,
             is described at https://www.mercurial-scm.org/wiki/DirState.
             It is made of a flat sequence of unordered variable-size entries,
             so accessing any information in it requires parsing all of it.
             Similarly, saving changes requires rewriting the entire file.
             The newer `dirsate-v2` file format is designed to fix these limitations
             and make `hg status` faster.
             User guide
             ==========
             Compatibility
             -------------
             The file format is experimental and may still change.
             Different versions of Mercurial may not be compatible with each other
             when working on a local repository that uses this format.
             When using an incompatible version with the experimental format,
             anything can happen including data corruption.
             Since the dirstate is entirely local and not relevant to the wire protocol,
             `dirstate-v2` does not affect compatibility with remote Mercurial versions.
             When `share-safe` is enabled, different repositories sharing the same store
             can use different dirstate formats.
             Enabling `dirsate-v2` for new local repositories
             ------------------------------------------------
             When creating a new local repository such as with `hg init` or `hg clone`,
             the `exp-dirstate-v2` boolean in the `format` configuration section
             controls whether to use this file format.
             This is disabled by default as of this writing.
             To enable it for a single repository, run for example::
                 $ hg init my-project --config format.exp-dirstate-v2=1
             Checking the format of an existing local repsitory
             --------------------------------------------------
             The `debugformat` commands prints information about
             which of multiple optional formats are used in the current repository,
             including `dirstate-v2`::
                 $ hg debugformat
                 format-variant     repo
                 fncache:            yes
                 dirstate-v2:        yes
                 […]
             Upgrading or downgrading an existing local repository
             -----------------------------------------------------
             The `debugupgrade` command does various upgrades or downgrades
             on a local repository
             based on the current Mercurial version and on configuration.
             The same `format.exp-dirstate-v2` configuration is used again.
             Example to upgrade::
                 $ hg debugupgrade --config format.exp-dirstate-v2=1
             Example to downgrade to `dirstate-v1`::
                 $ hg debugupgrade --config format.exp-dirstate-v2=0
             Both of this commands do nothing but print a list of proposed changes,
             which may include changes unrelated to the dirstate.
             Those other changes are controlled by their own configuration keys.
             Add `--run` to a command to actually apply the proposed changes.
             Backups of `.hg/requires` and `.hg/dirstate` are created
             in a `.hg/upgradebackup.*` directory.
             If something goes wrong, restoring those files should undo the change.
             Note that upgrading affects compatibility with older versions of Mercurial
             as noted above.
             This can be relevant when a repository’s files are on a USB drive
             or some other removable media, or shared over the network, etc.
             Internal filesystem representation
             ==================================
             Requirements file
             -----------------
             The `.hg/requires` file indicates which of various optional file formats
             are used by a given repository.
             Mercurial aborts when seeing a requirement it does not know about,
             which avoids older version accidentally messing up a respository
             that uses a format that was introduced later.
             For versions that do support a format, the presence or absence of
             the corresponding requirement indicates whether to use that format.
             When the file contains a `exp-dirstate-v2` line,
             the `dirstate-v2` format is used.
             With no such line `dirstate-v1` is used.
             High level description
             ----------------------
             Whereas `dirstate-v1` uses a single `.hg/disrtate` file,
             in `dirstate-v2` that file is a "docket" file
             that only contains some metadata
             and points to separate data file named `.hg/dirstate.{ID}`,
             where `{ID}` is a random identifier.
             This separation allows making data files append-only
             and therefore safer to memory-map.
             Creating a new data file (occasionally to clean up unused data)
             can be done with a different ID
             without disrupting another Mercurial process
             that could still be using the previous data file.
             Both files have a format designed to reduce the need for parsing,
             by using fixed-size binary components as much as possible.
             For data that is not fixed-size,
             references to other parts of a file can be made by storing "pseudo-pointers":
             integers counted in bytes from the start of a file.
             For read-only access no data structure is needed,
             only a bytes buffer (possibly memory-mapped directly from the filesystem)
             with specific parts read on demand.
             The data file contains "nodes" organized in a tree.
             Each node represents a file or directory inside the working directory
             or its parent changeset.
             This tree has the same structure as the filesystem,
             so a node representing a directory has child nodes representing
             the files and subdirectories contained directly in that directory.
             The docket file format
             ----------------------
             This is implemented in `rust/hg-core/src/dirstate_tree/on_disk.rs`
             and `mercurial/dirstateutils/docket.py`.
             Components of the docket file are found at fixed offsets,
             counted in bytes from the start of the file:
             * Offset 0:
               The 12-bytes marker string "dirstate-v2\n" ending with a newline character.
               This makes it easier to tell a dirstate-v2 file from a dirstate-v1 file,
               although it is not strictly necessary
               since `.hg/requires` determines which format to use.
             * Offset 12:
               The changeset node ID on the first parent of the working directory,
               as up to 32 binary bytes.
               If a node ID is shorter (20 bytes for SHA-1),
               it is start-aligned and the rest of the bytes are set to zero.
             * Offset 44:
               The changeset node ID on the second parent of the working directory,
               or all zeros if there isn’t one.
               Also 32 binary bytes.
             * Offset 76:
               Tree metadata on 44 bytes, described below.
               Its separation in this documentation from the rest of the docket
               reflects a detail of the current implementation.
               Since tree metadata is also made of fields at fixed offsets, those could
               be inlined here by adding 76 bytes to each offset.
             * Offset 120:
               The used size of the data file, as a 32-bit big-endian integer.
               The actual size of the data file may be larger
               (if another Mercurial processis in appending to it
               but has not updated the docket yet).
               That extra data must be ignored.
             * Offset 124:
               The length of the data file identifier, as a 8-bit integer.
             * Offset 125:
               The data file identifier.
             * Any additional data is current ignored, and dropped when updating the file.
             Tree metadata in the docket file
             --------------------------------
             Tree metadata is similarly made of components at fixed offsets.
             These offsets are counted in bytes from the start of tree metadata,
             which is 76 bytes after the start of the docket file.
             This metadata can be thought of as the singular root of the tree
             formed by nodes in the data file.
             * Offset 0:
               Pseudo-pointer to the start of root nodes,
               counted in bytes from the start of the data file,
               as a 32-bit big-endian integer.
               These nodes describe files and directories found directly
               at the root of the working directory.
             * Offset 4:
               Number of root nodes, as a 32-bit big-endian integer.
             * Offset 8:
               Total number of nodes in the entire tree that "have a dirstate entry",
               as a 32-bit big-endian integer.
               Those nodes represent files that would be present at all in `dirstate-v1`.
               This is typically less than the total number of nodes.
               This counter is used to implement `len(dirstatemap)`.
             * Offset 12:
               Number of nodes in the entire tree that have a copy source,
               as a 32-bit big-endian integer.
               At the next commit, these files are recorded
               as having been copied or moved/renamed from that source.
               (A move is recorded as a copy and separate removal of the source.)
               This counter is used to implement `len(dirstatemap.copymap)`.
             * Offset 16:
               An estimation of how many bytes of the data file
               (within its used size) are unused, as a 32-bit big-endian integer.
               When appending to an existing data file,
               some existing nodes or paths can be unreachable from the new root
               but they still take up space.
               This counter is used to decide when to write a new data file from scratch
               instead of appending to an existing one,
               in order to get rid of that unreachable data
               and avoid unbounded file size growth.
             * Offset 20:
               These four bytes are currently ignored
               and reset to zero when updating a docket file.
               This is an attempt at forward compatibility:
               future Mercurial versions could use this as a bit field
               to indicate that a dirstate has additional data or constraints.
               Finding a dirstate file with the relevant bit unset indicates that
               it was written by a then-older version
               which is not aware of that future change.
             * Offset 24:
               Either 20 zero bytes, or a SHA-1 hash as 20 binary bytes.
               When present, the hash is of ignore patterns
               that were used for some previous run of the `status` algorithm.
             * (Offset 44: end of tree metadata)
             Optional hash of ignore patterns
             --------------------------------
             The implementation of `status` at `rust/hg-core/src/dirstate_tree/status.rs`
             has been optimized such that its run time is dominated by calls
             to `stat` for reading the filesystem metadata of a file or directory,
             and to `readdir` for listing the contents of a directory.
             In some cases the algorithm can skip calls to `readdir`
             (saving significant time)
             because the dirstate already contains enough of the relevant information
             to build the correct `status` results.
             The default configuration of `hg status` is to list unknown files
             but not ignored files.
             In this case, it matters for the `readdir`-skipping optimization
             if a given file used to be ignored but became unknown
             because `.hgignore` changed.
             To detect the possibility of such a change,
             the tree metadata contains an optional hash of all ignore patterns.
             We define:
             * "Root" ignore files as:
               - `.hgignore` at the root of the repository if it exists
               - And all files from `ui.ignore.*` config.
               This set of files is sorted by the string representation of their path.
             * The "expanded contents" of an ignore files is the byte string made
               by the concatenation of its contents followed by the "expanded contents"
               of other files included with `include:` or `subinclude:` directives,
               in inclusion order. This definition is recursive, as included files can
               themselves include more files.
             This hash is defined as the SHA-1 of the concatenation (in sorted
             order) of the "expanded contents" of each "root" ignore file.
             (Note that computing this does not require actually concatenating
             into a single contiguous byte sequence.
             Instead a SHA-1 hasher object can be created
             and fed separate chunks one by one.)
             The data file format
             --------------------
             This is implemented in `rust/hg-core/src/dirstate_tree/on_disk.rs`
             and `mercurial/dirstateutils/v2.py`.
             The data file contains two types of data: paths and nodes.
             Paths and nodes can be organized in any order in the file, except that sibling
             nodes must be next to each other and sorted by their path.
             Contiguity lets the parent refer to them all
             by their count and a single pseudo-pointer,
             instead of storing one pseudo-pointer per child node.
             Sorting allows using binary seach to find a child node with a given name
             in `O(log(n))` byte sequence comparisons.
             The current implemention writes paths and child node before a given node
             for ease of figuring out the value of pseudo-pointers by the time the are to be
             written, but this is not an obligation and readers must not rely on it.
             A path is stored as a byte string anywhere in the file, without delimiter.
             It is refered to by one or more node by a pseudo-pointer to its start, and its
             length in bytes. Since there is no delimiter,
             when a path is a substring of another the same bytes could be reused,
             although the implementation does not exploit this as of this writing.
             A node is stored on 43 bytes with components at fixed offsets. Paths and
             child nodes relevant to a node are stored externally and referenced though
             pseudo-pointers.
             All integers are stored in big-endian. All pseudo-pointers are 32-bit integers
             counting bytes from the start of the data file. Path lengths and positions
             are 16-bit integers, also counted in bytes.
             Node components are:
             * Offset 0:
               Pseudo-pointer to the full path of this node,
               from the working directory root.
             * Offset 4:
               Length of the full path.
             * Offset 6:
               Position of the last `/` path separator within the full path,
               in bytes from the start of the full path,
               or zero if there isn’t one.
               The part of the full path after this position is the "base name".
               Since sibling nodes have the same parent, only their base name vary
               and needs to be considered when doing binary search to find a given path.
             * Offset 8:
               Pseudo-pointer to the "copy source" path for this node,
               or zero if there is no copy source.
             * Offset 12:
               Length of the copy source path, or zero if there isn’t one.
             * Offset 14:
               Pseudo-pointer to the start of child nodes.
             * Offset 18:
               Number of child nodes, as a 32-bit integer.
               They occupy 43 times this number of bytes
               (not counting space for paths, and further descendants).
             * Offset 22:
               Number as a 32-bit integer of descendant nodes in this subtree,
               not including this node itself,
               that "have a dirstate entry".
               Those nodes represent files that would be present at all in `dirstate-v1`.
               This is typically less than the total number of descendants.
               This counter is used to implement `has_dir`.
             * Offset 26:
               Number as a 32-bit integer of descendant nodes in this subtree,
               not including this node itself,
               that represent files tracked in the working directory.
               (For example, `hg rm` makes a file untracked.)
               This counter is used to implement `has_tracked_dir`.
             * Offset 30:
               A `flags` fields  that packs some boolean values as bits of a 16-bit integer.
               Starting from least-significant, bit masks are::
                 WDIR_TRACKED = 1 << 0
                 P1_TRACKED = 1 << 1
                 P2_INFO = 1 << 2
                 MODE_EXEC_PERM = 1 << 3
                 MODE_IS_SYMLINK = 1 << 4
                 HAS_FALLBACK_EXEC = 1 << 5
                 FALLBACK_EXEC = 1 << 6
                 HAS_FALLBACK_SYMLINK = 1 << 7
                 FALLBACK_SYMLINK = 1 << 8
                 EXPECTED_STATE_IS_MODIFIED = 1 << 9
                 HAS_MODE_AND_SIZE = 1 << 10
                 HAS_MTIME = 1 << 11
                 MTIME_SECOND_AMBIGUOUS = 1 << 12
                 DIRECTORY = 1 << 13
                 ALL_UNKNOWN_RECORDED = 1 << 14
                 ALL_IGNORED_RECORDED = 1 << 15
               The meaning of each bit is described below.
               Other bits are unset.
               They may be assigned meaning if the future,
               with the limitation that Mercurial versions that pre-date such meaning
               will always reset those bits to unset when writing nodes.
               (A new node is written for any mutation in its subtree,
               leaving the bytes of the old node unreachable
               until the data file is rewritten entirely.)
             * Offset 32:
               A `size` field described below, as a 32-bit integer.
               Unlike in dirstate-v1, negative values are not used.
             * Offset 36:
               The seconds component of an `mtime` field described below,
               as a 32-bit integer.
               Unlike in dirstate-v1, negative values are not used.
               When `mtime` is used, this is number of seconds since the Unix epoch
               truncated to its lower 31 bits.
             * Offset 40:
               The nanoseconds component of an `mtime` field described below,
               as a 32-bit integer.
               When `mtime` is used,
               this is the number of nanoseconds since `mtime.seconds`,
               always stritctly less than one billion.
               This may be zero if more precision is not available.
               (This can happen because of limitations in any of Mercurial, Python,
               libc, the operating system, …)
               When comparing two mtimes and either has this component set to zero,
               the sub-second precision of both should be ignored.
               False positives when checking mtime equality due to clock resolution
               are always possible and the status algorithm needs to deal with them,
               but having too many false negatives could be harmful too.
             * (Offset 44: end of this node)
             The meaning of the boolean values packed in `flags` is:
             `WDIR_TRACKED`
                 Set if the working directory contains a tracked file at this node’s path.
                 This is typically set and unset by `hg add` and `hg rm`.
             `P1_TRACKED`
                 Set if the working directory’s first parent changeset
                 (whose node identifier is found in tree metadata)
                 contains a tracked file at this node’s path.
                 This is a cache to reduce manifest lookups.
             `P2_INFO`
                 Set if the file has been involved in some merge operation.
                 Either because it was actually merged,
                 or because the version in the second parent p2 version was ahead,
                 or because some rename moved it there.
                 In either case `hg status` will want it displayed as modified.
             Files that would be mentioned at all in the `dirstate-v1` file format
             have a node with at least one of the above three bits set in `dirstate-v2`.
             Let’s call these files "tracked anywhere",
             and "untracked" the nodes with all three of these bits unset.
             Untracked nodes are typically for directories:
             they hold child nodes and form the tree structure.
             Additional untracked nodes may also exist.
             Although implementations should strive to clean up nodes
             that are entirely unused, other untracked nodes may also exist.
             For example, a future version of Mercurial might in some cases
             add nodes for untracked files or/and ignored files in the working directory
             in order to optimize `hg status`
             by enabling it to skip `readdir` in more cases.
             `HAS_MODE_AND_SIZE`
                 Must be unset for untracked nodes.
                 For files tracked anywhere, if this is set:
                 - The `size` field is the expected file size,
                   in bytes truncated its lower to 31 bits.
                 - The expected execute permission for the file’s owner
                   is given by `MODE_EXEC_PERM`
                 - The expected file type is given by `MODE_IS_SIMLINK`:
                   a symbolic link if set, or a normal file if unset.
                 If this is unset the expected size, permission, and file type are unknown.
                 The `size` field is unused (set to zero).
             `HAS_MTIME`
                 The nodes contains a "valid" last modification time in the `mtime` field.
                 It means the `mtime` was already strictly in the past when observed,
                 meaning that later changes cannot happen in the same clock tick
                 and must cause a different modification time
                 (unless the system clock jumps back and we get unlucky,
                 which is not impossible but deemed unlikely enough).
                 This means that if `std::fs::symlink_metadata` later reports
                 the same modification time
                 and ignored patterns haven’t changed,
                 we can assume the node to be unchanged on disk.
                 The `mtime` field can then be used to skip more expensive lookup when
                 checking the status of "tracked" nodes.
                 It can also be set for node where `DIRECTORY` is set.
                 See `DIRECTORY` documentation for details.
             `DIRECTORY`
                 When set, this entry will match a directory that exists or existed on the
                 file system.
                 * When `HAS_MTIME` is set a directory has been seen on the file system and
                   `mtime` matches its last modificiation time. However, `HAS_MTIME` not being set
                   does not indicate the lack of directory on the file system.
                 * When not tracked anywhere, this node does not represent an ignored or
                   unknown file on disk.
                 If `HAS_MTIME` is set
                 and `mtime` matches the last modification time of the directory on disk,
                 the directory is unchanged
                 and we can skip calling `std::fs::read_dir` again for this directory,
                 and iterate child dirstate nodes instead.
                 (as long as `ALL_UNKNOWN_RECORDED` and `ALL_IGNORED_RECORDED` are taken
                 into account)
             `MODE_EXEC_PERM`
                 Must be unset if `HAS_MODE_AND_SIZE` is unset.
                 If `HAS_MODE_AND_SIZE` is set,
                 this indicates whether the file’s own is expected
                 to have execute permission.
+                Beware that on system without fs support for this information, the value
+                stored in the dirstate might be wrong and should not be relied on.
             `MODE_IS_SYMLINK`
                 Must be unset if `HAS_MODE_AND_SIZE` is unset.
                 If `HAS_MODE_AND_SIZE` is set,
                 this indicates whether the file is expected to be a symlink
                 as opposed to a normal file.
+                Beware that on system without fs support for this information, the value
+                stored in the dirstate might be wrong and should not be relied on.
             `EXPECTED_STATE_IS_MODIFIED`
                 Must be unset for untracked nodes.
                 For:
                 - a file tracked anywhere
                 - that has expected metadata (`HAS_MODE_AND_SIZE` and `HAS_MTIME`)
                 - if that metadata matches
                   metadata found in the working directory with `stat`
                 This bit indicates the status of the file.
                 If set, the status is modified. If unset, it is clean.
                 In cases where `hg status` needs to read the contents of a file
                 because metadata is ambiguous, this bit lets it record the result
                 if the result is modified so that a future run of `hg status`
                 does not need to do the same again.
                 It is valid to never set this bit,
                 and consider expected metadata ambiguous if it is set.
             `ALL_UNKNOWN_RECORDED`
                 If set, all "unknown" children existing on disk (at the time of the last
                 status) have been recorded and the `mtime` associated with
                 `DIRECTORY` can be used for optimization even when "unknown" file
                 are listed.
                 Note that the amount recorded "unknown" children can still be zero if None
                 where present.
                 Also note that having this flag unset does not imply that no "unknown"
                 children have been recorded. Some might be present, but there is no garantee
                 that is will be all of them.
             `ALL_IGNORED_RECORDED`
                 If set, all "ignored" children existing on disk (at the time of the last
                 status) have been recorded and the `mtime` associated with
                 `DIRECTORY` can be used for optimization even when "ignored" file
                 are listed.
                 Note that the amount recorded "ignored" children can still be zero if None
                 where present.
                 Also note that having this flag unset does not imply that no "ignored"
                 children have been recorded. Some might be present, but there is no garantee
                 that is will be all of them.
             `HAS_FALLBACK_EXEC`
                 If this flag is set, the entry carries "fallback" information for the
                 executable bit in the `FALLBACK_EXEC` flag.
                 Fallback information can be stored in the dirstate to keep track of
                 filesystem attribute tracked by Mercurial when the underlying file
                 system or operating system does not support that property, (e.g.
                 Windows).
             `FALLBACK_EXEC`
                 Should be ignored if `HAS_FALLBACK_EXEC` is unset. If set the file for this
                 entry should be considered executable if that information cannot be
                 extracted from the file system. If unset it should be considered
                 non-executable instead.
             `HAS_FALLBACK_SYMLINK`
                 If this flag is set, the entry carries "fallback" information for symbolic
                 link status in the `FALLBACK_SYMLINK` flag.
                 Fallback information can be stored in the dirstate to keep track of
                 filesystem attribute tracked by Mercurial when the underlying file
                 system or operating system does not support that property, (e.g.
                 Windows).
             `FALLBACK_SYMLINK`
                 Should be ignored if `HAS_FALLBACK_SYMLINK` is unset. If set the file for
                 this entry should be considered a symlink if that information cannot be
                 extracted from the file system. If unset it should be considered a normal
                 file instead.
             `MTIME_SECOND_AMBIGUOUS`
                 This flag is relevant only when `HAS_FILE_MTIME` is set.  When set, the
                 `mtime` stored in the entry is only valid for comparison with timestamps
                 that have nanosecond information. If available timestamp does not carries
                 nanosecond information, the `mtime` should be ignored and no optimisation
                 can be applied.

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages