##// END OF EJS Templates
dirstate: make sure that status does not overlook the fallback flags...
marmoute -
r49112:50026041 default
parent child Browse files
Show More
@@ -1,1522 +1,1533 b''
1 # dirstate.py - working directory tracking for mercurial
1 # dirstate.py - working directory tracking for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import collections
10 import collections
11 import contextlib
11 import contextlib
12 import errno
12 import errno
13 import os
13 import os
14 import stat
14 import stat
15
15
16 from .i18n import _
16 from .i18n import _
17 from .pycompat import delattr
17 from .pycompat import delattr
18
18
19 from hgdemandimport import tracing
19 from hgdemandimport import tracing
20
20
21 from . import (
21 from . import (
22 dirstatemap,
22 dirstatemap,
23 encoding,
23 encoding,
24 error,
24 error,
25 match as matchmod,
25 match as matchmod,
26 pathutil,
26 pathutil,
27 policy,
27 policy,
28 pycompat,
28 pycompat,
29 scmutil,
29 scmutil,
30 sparse,
30 sparse,
31 util,
31 util,
32 )
32 )
33
33
34 from .dirstateutils import (
34 from .dirstateutils import (
35 timestamp,
35 timestamp,
36 )
36 )
37
37
38 from .interfaces import (
38 from .interfaces import (
39 dirstate as intdirstate,
39 dirstate as intdirstate,
40 util as interfaceutil,
40 util as interfaceutil,
41 )
41 )
42
42
43 parsers = policy.importmod('parsers')
43 parsers = policy.importmod('parsers')
44 rustmod = policy.importrust('dirstate')
44 rustmod = policy.importrust('dirstate')
45
45
46 HAS_FAST_DIRSTATE_V2 = rustmod is not None
46 HAS_FAST_DIRSTATE_V2 = rustmod is not None
47
47
48 propertycache = util.propertycache
48 propertycache = util.propertycache
49 filecache = scmutil.filecache
49 filecache = scmutil.filecache
50 _rangemask = dirstatemap.rangemask
50 _rangemask = dirstatemap.rangemask
51
51
52 DirstateItem = dirstatemap.DirstateItem
52 DirstateItem = dirstatemap.DirstateItem
53
53
54
54
55 class repocache(filecache):
55 class repocache(filecache):
56 """filecache for files in .hg/"""
56 """filecache for files in .hg/"""
57
57
58 def join(self, obj, fname):
58 def join(self, obj, fname):
59 return obj._opener.join(fname)
59 return obj._opener.join(fname)
60
60
61
61
62 class rootcache(filecache):
62 class rootcache(filecache):
63 """filecache for files in the repository root"""
63 """filecache for files in the repository root"""
64
64
65 def join(self, obj, fname):
65 def join(self, obj, fname):
66 return obj._join(fname)
66 return obj._join(fname)
67
67
68
68
69 def _getfsnow(vfs):
69 def _getfsnow(vfs):
70 '''Get "now" timestamp on filesystem'''
70 '''Get "now" timestamp on filesystem'''
71 tmpfd, tmpname = vfs.mkstemp()
71 tmpfd, tmpname = vfs.mkstemp()
72 try:
72 try:
73 return timestamp.mtime_of(os.fstat(tmpfd))
73 return timestamp.mtime_of(os.fstat(tmpfd))
74 finally:
74 finally:
75 os.close(tmpfd)
75 os.close(tmpfd)
76 vfs.unlink(tmpname)
76 vfs.unlink(tmpname)
77
77
78
78
79 def requires_parents_change(func):
79 def requires_parents_change(func):
80 def wrap(self, *args, **kwargs):
80 def wrap(self, *args, **kwargs):
81 if not self.pendingparentchange():
81 if not self.pendingparentchange():
82 msg = 'calling `%s` outside of a parentchange context'
82 msg = 'calling `%s` outside of a parentchange context'
83 msg %= func.__name__
83 msg %= func.__name__
84 raise error.ProgrammingError(msg)
84 raise error.ProgrammingError(msg)
85 return func(self, *args, **kwargs)
85 return func(self, *args, **kwargs)
86
86
87 return wrap
87 return wrap
88
88
89
89
90 def requires_no_parents_change(func):
90 def requires_no_parents_change(func):
91 def wrap(self, *args, **kwargs):
91 def wrap(self, *args, **kwargs):
92 if self.pendingparentchange():
92 if self.pendingparentchange():
93 msg = 'calling `%s` inside of a parentchange context'
93 msg = 'calling `%s` inside of a parentchange context'
94 msg %= func.__name__
94 msg %= func.__name__
95 raise error.ProgrammingError(msg)
95 raise error.ProgrammingError(msg)
96 return func(self, *args, **kwargs)
96 return func(self, *args, **kwargs)
97
97
98 return wrap
98 return wrap
99
99
100
100
101 @interfaceutil.implementer(intdirstate.idirstate)
101 @interfaceutil.implementer(intdirstate.idirstate)
102 class dirstate(object):
102 class dirstate(object):
103 def __init__(
103 def __init__(
104 self,
104 self,
105 opener,
105 opener,
106 ui,
106 ui,
107 root,
107 root,
108 validate,
108 validate,
109 sparsematchfn,
109 sparsematchfn,
110 nodeconstants,
110 nodeconstants,
111 use_dirstate_v2,
111 use_dirstate_v2,
112 ):
112 ):
113 """Create a new dirstate object.
113 """Create a new dirstate object.
114
114
115 opener is an open()-like callable that can be used to open the
115 opener is an open()-like callable that can be used to open the
116 dirstate file; root is the root of the directory tracked by
116 dirstate file; root is the root of the directory tracked by
117 the dirstate.
117 the dirstate.
118 """
118 """
119 self._use_dirstate_v2 = use_dirstate_v2
119 self._use_dirstate_v2 = use_dirstate_v2
120 self._nodeconstants = nodeconstants
120 self._nodeconstants = nodeconstants
121 self._opener = opener
121 self._opener = opener
122 self._validate = validate
122 self._validate = validate
123 self._root = root
123 self._root = root
124 self._sparsematchfn = sparsematchfn
124 self._sparsematchfn = sparsematchfn
125 # ntpath.join(root, '') of Python 2.7.9 does not add sep if root is
125 # ntpath.join(root, '') of Python 2.7.9 does not add sep if root is
126 # UNC path pointing to root share (issue4557)
126 # UNC path pointing to root share (issue4557)
127 self._rootdir = pathutil.normasprefix(root)
127 self._rootdir = pathutil.normasprefix(root)
128 self._dirty = False
128 self._dirty = False
129 self._lastnormaltime = timestamp.zero()
129 self._lastnormaltime = timestamp.zero()
130 self._ui = ui
130 self._ui = ui
131 self._filecache = {}
131 self._filecache = {}
132 self._parentwriters = 0
132 self._parentwriters = 0
133 self._filename = b'dirstate'
133 self._filename = b'dirstate'
134 self._pendingfilename = b'%s.pending' % self._filename
134 self._pendingfilename = b'%s.pending' % self._filename
135 self._plchangecallbacks = {}
135 self._plchangecallbacks = {}
136 self._origpl = None
136 self._origpl = None
137 self._mapcls = dirstatemap.dirstatemap
137 self._mapcls = dirstatemap.dirstatemap
138 # Access and cache cwd early, so we don't access it for the first time
138 # Access and cache cwd early, so we don't access it for the first time
139 # after a working-copy update caused it to not exist (accessing it then
139 # after a working-copy update caused it to not exist (accessing it then
140 # raises an exception).
140 # raises an exception).
141 self._cwd
141 self._cwd
142
142
143 def prefetch_parents(self):
143 def prefetch_parents(self):
144 """make sure the parents are loaded
144 """make sure the parents are loaded
145
145
146 Used to avoid a race condition.
146 Used to avoid a race condition.
147 """
147 """
148 self._pl
148 self._pl
149
149
150 @contextlib.contextmanager
150 @contextlib.contextmanager
151 def parentchange(self):
151 def parentchange(self):
152 """Context manager for handling dirstate parents.
152 """Context manager for handling dirstate parents.
153
153
154 If an exception occurs in the scope of the context manager,
154 If an exception occurs in the scope of the context manager,
155 the incoherent dirstate won't be written when wlock is
155 the incoherent dirstate won't be written when wlock is
156 released.
156 released.
157 """
157 """
158 self._parentwriters += 1
158 self._parentwriters += 1
159 yield
159 yield
160 # Typically we want the "undo" step of a context manager in a
160 # Typically we want the "undo" step of a context manager in a
161 # finally block so it happens even when an exception
161 # finally block so it happens even when an exception
162 # occurs. In this case, however, we only want to decrement
162 # occurs. In this case, however, we only want to decrement
163 # parentwriters if the code in the with statement exits
163 # parentwriters if the code in the with statement exits
164 # normally, so we don't have a try/finally here on purpose.
164 # normally, so we don't have a try/finally here on purpose.
165 self._parentwriters -= 1
165 self._parentwriters -= 1
166
166
167 def pendingparentchange(self):
167 def pendingparentchange(self):
168 """Returns true if the dirstate is in the middle of a set of changes
168 """Returns true if the dirstate is in the middle of a set of changes
169 that modify the dirstate parent.
169 that modify the dirstate parent.
170 """
170 """
171 return self._parentwriters > 0
171 return self._parentwriters > 0
172
172
173 @propertycache
173 @propertycache
174 def _map(self):
174 def _map(self):
175 """Return the dirstate contents (see documentation for dirstatemap)."""
175 """Return the dirstate contents (see documentation for dirstatemap)."""
176 self._map = self._mapcls(
176 self._map = self._mapcls(
177 self._ui,
177 self._ui,
178 self._opener,
178 self._opener,
179 self._root,
179 self._root,
180 self._nodeconstants,
180 self._nodeconstants,
181 self._use_dirstate_v2,
181 self._use_dirstate_v2,
182 )
182 )
183 return self._map
183 return self._map
184
184
185 @property
185 @property
186 def _sparsematcher(self):
186 def _sparsematcher(self):
187 """The matcher for the sparse checkout.
187 """The matcher for the sparse checkout.
188
188
189 The working directory may not include every file from a manifest. The
189 The working directory may not include every file from a manifest. The
190 matcher obtained by this property will match a path if it is to be
190 matcher obtained by this property will match a path if it is to be
191 included in the working directory.
191 included in the working directory.
192 """
192 """
193 # TODO there is potential to cache this property. For now, the matcher
193 # TODO there is potential to cache this property. For now, the matcher
194 # is resolved on every access. (But the called function does use a
194 # is resolved on every access. (But the called function does use a
195 # cache to keep the lookup fast.)
195 # cache to keep the lookup fast.)
196 return self._sparsematchfn()
196 return self._sparsematchfn()
197
197
198 @repocache(b'branch')
198 @repocache(b'branch')
199 def _branch(self):
199 def _branch(self):
200 try:
200 try:
201 return self._opener.read(b"branch").strip() or b"default"
201 return self._opener.read(b"branch").strip() or b"default"
202 except IOError as inst:
202 except IOError as inst:
203 if inst.errno != errno.ENOENT:
203 if inst.errno != errno.ENOENT:
204 raise
204 raise
205 return b"default"
205 return b"default"
206
206
207 @property
207 @property
208 def _pl(self):
208 def _pl(self):
209 return self._map.parents()
209 return self._map.parents()
210
210
211 def hasdir(self, d):
211 def hasdir(self, d):
212 return self._map.hastrackeddir(d)
212 return self._map.hastrackeddir(d)
213
213
214 @rootcache(b'.hgignore')
214 @rootcache(b'.hgignore')
215 def _ignore(self):
215 def _ignore(self):
216 files = self._ignorefiles()
216 files = self._ignorefiles()
217 if not files:
217 if not files:
218 return matchmod.never()
218 return matchmod.never()
219
219
220 pats = [b'include:%s' % f for f in files]
220 pats = [b'include:%s' % f for f in files]
221 return matchmod.match(self._root, b'', [], pats, warn=self._ui.warn)
221 return matchmod.match(self._root, b'', [], pats, warn=self._ui.warn)
222
222
223 @propertycache
223 @propertycache
224 def _slash(self):
224 def _slash(self):
225 return self._ui.configbool(b'ui', b'slash') and pycompat.ossep != b'/'
225 return self._ui.configbool(b'ui', b'slash') and pycompat.ossep != b'/'
226
226
227 @propertycache
227 @propertycache
228 def _checklink(self):
228 def _checklink(self):
229 return util.checklink(self._root)
229 return util.checklink(self._root)
230
230
231 @propertycache
231 @propertycache
232 def _checkexec(self):
232 def _checkexec(self):
233 return bool(util.checkexec(self._root))
233 return bool(util.checkexec(self._root))
234
234
235 @propertycache
235 @propertycache
236 def _checkcase(self):
236 def _checkcase(self):
237 return not util.fscasesensitive(self._join(b'.hg'))
237 return not util.fscasesensitive(self._join(b'.hg'))
238
238
239 def _join(self, f):
239 def _join(self, f):
240 # much faster than os.path.join()
240 # much faster than os.path.join()
241 # it's safe because f is always a relative path
241 # it's safe because f is always a relative path
242 return self._rootdir + f
242 return self._rootdir + f
243
243
244 def flagfunc(self, buildfallback):
244 def flagfunc(self, buildfallback):
245 """build a callable that returns flags associated with a filename
245 """build a callable that returns flags associated with a filename
246
246
247 The information is extracted from three possible layers:
247 The information is extracted from three possible layers:
248 1. the file system if it supports the information
248 1. the file system if it supports the information
249 2. the "fallback" information stored in the dirstate if any
249 2. the "fallback" information stored in the dirstate if any
250 3. a more expensive mechanism inferring the flags from the parents.
250 3. a more expensive mechanism inferring the flags from the parents.
251 """
251 """
252
252
253 # small hack to cache the result of buildfallback()
253 # small hack to cache the result of buildfallback()
254 fallback_func = []
254 fallback_func = []
255
255
256 def get_flags(x):
256 def get_flags(x):
257 entry = None
257 entry = None
258 fallback_value = None
258 fallback_value = None
259 try:
259 try:
260 st = os.lstat(self._join(x))
260 st = os.lstat(self._join(x))
261 except OSError:
261 except OSError:
262 return b''
262 return b''
263
263
264 if self._checklink:
264 if self._checklink:
265 if util.statislink(st):
265 if util.statislink(st):
266 return b'l'
266 return b'l'
267 else:
267 else:
268 entry = self.get_entry(x)
268 entry = self.get_entry(x)
269 if entry.has_fallback_symlink:
269 if entry.has_fallback_symlink:
270 if entry.fallback_symlink:
270 if entry.fallback_symlink:
271 return b'l'
271 return b'l'
272 else:
272 else:
273 if not fallback_func:
273 if not fallback_func:
274 fallback_func.append(buildfallback())
274 fallback_func.append(buildfallback())
275 fallback_value = fallback_func[0](x)
275 fallback_value = fallback_func[0](x)
276 if b'l' in fallback_value:
276 if b'l' in fallback_value:
277 return b'l'
277 return b'l'
278
278
279 if self._checkexec:
279 if self._checkexec:
280 if util.statisexec(st):
280 if util.statisexec(st):
281 return b'x'
281 return b'x'
282 else:
282 else:
283 if entry is None:
283 if entry is None:
284 entry = self.get_entry(x)
284 entry = self.get_entry(x)
285 if entry.has_fallback_exec:
285 if entry.has_fallback_exec:
286 if entry.fallback_exec:
286 if entry.fallback_exec:
287 return b'x'
287 return b'x'
288 else:
288 else:
289 if fallback_value is None:
289 if fallback_value is None:
290 if not fallback_func:
290 if not fallback_func:
291 fallback_func.append(buildfallback())
291 fallback_func.append(buildfallback())
292 fallback_value = fallback_func[0](x)
292 fallback_value = fallback_func[0](x)
293 if b'x' in fallback_value:
293 if b'x' in fallback_value:
294 return b'x'
294 return b'x'
295 return b''
295 return b''
296
296
297 return get_flags
297 return get_flags
298
298
299 @propertycache
299 @propertycache
300 def _cwd(self):
300 def _cwd(self):
301 # internal config: ui.forcecwd
301 # internal config: ui.forcecwd
302 forcecwd = self._ui.config(b'ui', b'forcecwd')
302 forcecwd = self._ui.config(b'ui', b'forcecwd')
303 if forcecwd:
303 if forcecwd:
304 return forcecwd
304 return forcecwd
305 return encoding.getcwd()
305 return encoding.getcwd()
306
306
307 def getcwd(self):
307 def getcwd(self):
308 """Return the path from which a canonical path is calculated.
308 """Return the path from which a canonical path is calculated.
309
309
310 This path should be used to resolve file patterns or to convert
310 This path should be used to resolve file patterns or to convert
311 canonical paths back to file paths for display. It shouldn't be
311 canonical paths back to file paths for display. It shouldn't be
312 used to get real file paths. Use vfs functions instead.
312 used to get real file paths. Use vfs functions instead.
313 """
313 """
314 cwd = self._cwd
314 cwd = self._cwd
315 if cwd == self._root:
315 if cwd == self._root:
316 return b''
316 return b''
317 # self._root ends with a path separator if self._root is '/' or 'C:\'
317 # self._root ends with a path separator if self._root is '/' or 'C:\'
318 rootsep = self._root
318 rootsep = self._root
319 if not util.endswithsep(rootsep):
319 if not util.endswithsep(rootsep):
320 rootsep += pycompat.ossep
320 rootsep += pycompat.ossep
321 if cwd.startswith(rootsep):
321 if cwd.startswith(rootsep):
322 return cwd[len(rootsep) :]
322 return cwd[len(rootsep) :]
323 else:
323 else:
324 # we're outside the repo. return an absolute path.
324 # we're outside the repo. return an absolute path.
325 return cwd
325 return cwd
326
326
327 def pathto(self, f, cwd=None):
327 def pathto(self, f, cwd=None):
328 if cwd is None:
328 if cwd is None:
329 cwd = self.getcwd()
329 cwd = self.getcwd()
330 path = util.pathto(self._root, cwd, f)
330 path = util.pathto(self._root, cwd, f)
331 if self._slash:
331 if self._slash:
332 return util.pconvert(path)
332 return util.pconvert(path)
333 return path
333 return path
334
334
335 def __getitem__(self, key):
335 def __getitem__(self, key):
336 """Return the current state of key (a filename) in the dirstate.
336 """Return the current state of key (a filename) in the dirstate.
337
337
338 States are:
338 States are:
339 n normal
339 n normal
340 m needs merging
340 m needs merging
341 r marked for removal
341 r marked for removal
342 a marked for addition
342 a marked for addition
343 ? not tracked
343 ? not tracked
344
344
345 XXX The "state" is a bit obscure to be in the "public" API. we should
345 XXX The "state" is a bit obscure to be in the "public" API. we should
346 consider migrating all user of this to going through the dirstate entry
346 consider migrating all user of this to going through the dirstate entry
347 instead.
347 instead.
348 """
348 """
349 msg = b"don't use dirstate[file], use dirstate.get_entry(file)"
349 msg = b"don't use dirstate[file], use dirstate.get_entry(file)"
350 util.nouideprecwarn(msg, b'6.1', stacklevel=2)
350 util.nouideprecwarn(msg, b'6.1', stacklevel=2)
351 entry = self._map.get(key)
351 entry = self._map.get(key)
352 if entry is not None:
352 if entry is not None:
353 return entry.state
353 return entry.state
354 return b'?'
354 return b'?'
355
355
356 def get_entry(self, path):
356 def get_entry(self, path):
357 """return a DirstateItem for the associated path"""
357 """return a DirstateItem for the associated path"""
358 entry = self._map.get(path)
358 entry = self._map.get(path)
359 if entry is None:
359 if entry is None:
360 return DirstateItem()
360 return DirstateItem()
361 return entry
361 return entry
362
362
363 def __contains__(self, key):
363 def __contains__(self, key):
364 return key in self._map
364 return key in self._map
365
365
366 def __iter__(self):
366 def __iter__(self):
367 return iter(sorted(self._map))
367 return iter(sorted(self._map))
368
368
369 def items(self):
369 def items(self):
370 return pycompat.iteritems(self._map)
370 return pycompat.iteritems(self._map)
371
371
372 iteritems = items
372 iteritems = items
373
373
374 def parents(self):
374 def parents(self):
375 return [self._validate(p) for p in self._pl]
375 return [self._validate(p) for p in self._pl]
376
376
377 def p1(self):
377 def p1(self):
378 return self._validate(self._pl[0])
378 return self._validate(self._pl[0])
379
379
380 def p2(self):
380 def p2(self):
381 return self._validate(self._pl[1])
381 return self._validate(self._pl[1])
382
382
383 @property
383 @property
384 def in_merge(self):
384 def in_merge(self):
385 """True if a merge is in progress"""
385 """True if a merge is in progress"""
386 return self._pl[1] != self._nodeconstants.nullid
386 return self._pl[1] != self._nodeconstants.nullid
387
387
388 def branch(self):
388 def branch(self):
389 return encoding.tolocal(self._branch)
389 return encoding.tolocal(self._branch)
390
390
391 def setparents(self, p1, p2=None):
391 def setparents(self, p1, p2=None):
392 """Set dirstate parents to p1 and p2.
392 """Set dirstate parents to p1 and p2.
393
393
394 When moving from two parents to one, "merged" entries a
394 When moving from two parents to one, "merged" entries a
395 adjusted to normal and previous copy records discarded and
395 adjusted to normal and previous copy records discarded and
396 returned by the call.
396 returned by the call.
397
397
398 See localrepo.setparents()
398 See localrepo.setparents()
399 """
399 """
400 if p2 is None:
400 if p2 is None:
401 p2 = self._nodeconstants.nullid
401 p2 = self._nodeconstants.nullid
402 if self._parentwriters == 0:
402 if self._parentwriters == 0:
403 raise ValueError(
403 raise ValueError(
404 b"cannot set dirstate parent outside of "
404 b"cannot set dirstate parent outside of "
405 b"dirstate.parentchange context manager"
405 b"dirstate.parentchange context manager"
406 )
406 )
407
407
408 self._dirty = True
408 self._dirty = True
409 oldp2 = self._pl[1]
409 oldp2 = self._pl[1]
410 if self._origpl is None:
410 if self._origpl is None:
411 self._origpl = self._pl
411 self._origpl = self._pl
412 nullid = self._nodeconstants.nullid
412 nullid = self._nodeconstants.nullid
413 # True if we need to fold p2 related state back to a linear case
413 # True if we need to fold p2 related state back to a linear case
414 fold_p2 = oldp2 != nullid and p2 == nullid
414 fold_p2 = oldp2 != nullid and p2 == nullid
415 return self._map.setparents(p1, p2, fold_p2=fold_p2)
415 return self._map.setparents(p1, p2, fold_p2=fold_p2)
416
416
417 def setbranch(self, branch):
417 def setbranch(self, branch):
418 self.__class__._branch.set(self, encoding.fromlocal(branch))
418 self.__class__._branch.set(self, encoding.fromlocal(branch))
419 f = self._opener(b'branch', b'w', atomictemp=True, checkambig=True)
419 f = self._opener(b'branch', b'w', atomictemp=True, checkambig=True)
420 try:
420 try:
421 f.write(self._branch + b'\n')
421 f.write(self._branch + b'\n')
422 f.close()
422 f.close()
423
423
424 # make sure filecache has the correct stat info for _branch after
424 # make sure filecache has the correct stat info for _branch after
425 # replacing the underlying file
425 # replacing the underlying file
426 ce = self._filecache[b'_branch']
426 ce = self._filecache[b'_branch']
427 if ce:
427 if ce:
428 ce.refresh()
428 ce.refresh()
429 except: # re-raises
429 except: # re-raises
430 f.discard()
430 f.discard()
431 raise
431 raise
432
432
433 def invalidate(self):
433 def invalidate(self):
434 """Causes the next access to reread the dirstate.
434 """Causes the next access to reread the dirstate.
435
435
436 This is different from localrepo.invalidatedirstate() because it always
436 This is different from localrepo.invalidatedirstate() because it always
437 rereads the dirstate. Use localrepo.invalidatedirstate() if you want to
437 rereads the dirstate. Use localrepo.invalidatedirstate() if you want to
438 check whether the dirstate has changed before rereading it."""
438 check whether the dirstate has changed before rereading it."""
439
439
440 for a in ("_map", "_branch", "_ignore"):
440 for a in ("_map", "_branch", "_ignore"):
441 if a in self.__dict__:
441 if a in self.__dict__:
442 delattr(self, a)
442 delattr(self, a)
443 self._lastnormaltime = timestamp.zero()
443 self._lastnormaltime = timestamp.zero()
444 self._dirty = False
444 self._dirty = False
445 self._parentwriters = 0
445 self._parentwriters = 0
446 self._origpl = None
446 self._origpl = None
447
447
448 def copy(self, source, dest):
448 def copy(self, source, dest):
449 """Mark dest as a copy of source. Unmark dest if source is None."""
449 """Mark dest as a copy of source. Unmark dest if source is None."""
450 if source == dest:
450 if source == dest:
451 return
451 return
452 self._dirty = True
452 self._dirty = True
453 if source is not None:
453 if source is not None:
454 self._map.copymap[dest] = source
454 self._map.copymap[dest] = source
455 else:
455 else:
456 self._map.copymap.pop(dest, None)
456 self._map.copymap.pop(dest, None)
457
457
458 def copied(self, file):
458 def copied(self, file):
459 return self._map.copymap.get(file, None)
459 return self._map.copymap.get(file, None)
460
460
461 def copies(self):
461 def copies(self):
462 return self._map.copymap
462 return self._map.copymap
463
463
464 @requires_no_parents_change
464 @requires_no_parents_change
465 def set_tracked(self, filename):
465 def set_tracked(self, filename):
466 """a "public" method for generic code to mark a file as tracked
466 """a "public" method for generic code to mark a file as tracked
467
467
468 This function is to be called outside of "update/merge" case. For
468 This function is to be called outside of "update/merge" case. For
469 example by a command like `hg add X`.
469 example by a command like `hg add X`.
470
470
471 return True the file was previously untracked, False otherwise.
471 return True the file was previously untracked, False otherwise.
472 """
472 """
473 self._dirty = True
473 self._dirty = True
474 entry = self._map.get(filename)
474 entry = self._map.get(filename)
475 if entry is None or not entry.tracked:
475 if entry is None or not entry.tracked:
476 self._check_new_tracked_filename(filename)
476 self._check_new_tracked_filename(filename)
477 return self._map.set_tracked(filename)
477 return self._map.set_tracked(filename)
478
478
479 @requires_no_parents_change
479 @requires_no_parents_change
480 def set_untracked(self, filename):
480 def set_untracked(self, filename):
481 """a "public" method for generic code to mark a file as untracked
481 """a "public" method for generic code to mark a file as untracked
482
482
483 This function is to be called outside of "update/merge" case. For
483 This function is to be called outside of "update/merge" case. For
484 example by a command like `hg remove X`.
484 example by a command like `hg remove X`.
485
485
486 return True the file was previously tracked, False otherwise.
486 return True the file was previously tracked, False otherwise.
487 """
487 """
488 ret = self._map.set_untracked(filename)
488 ret = self._map.set_untracked(filename)
489 if ret:
489 if ret:
490 self._dirty = True
490 self._dirty = True
491 return ret
491 return ret
492
492
493 @requires_no_parents_change
493 @requires_no_parents_change
494 def set_clean(self, filename, parentfiledata=None):
494 def set_clean(self, filename, parentfiledata=None):
495 """record that the current state of the file on disk is known to be clean"""
495 """record that the current state of the file on disk is known to be clean"""
496 self._dirty = True
496 self._dirty = True
497 if parentfiledata:
497 if parentfiledata:
498 (mode, size, mtime) = parentfiledata
498 (mode, size, mtime) = parentfiledata
499 else:
499 else:
500 (mode, size, mtime) = self._get_filedata(filename)
500 (mode, size, mtime) = self._get_filedata(filename)
501 if not self._map[filename].tracked:
501 if not self._map[filename].tracked:
502 self._check_new_tracked_filename(filename)
502 self._check_new_tracked_filename(filename)
503 self._map.set_clean(filename, mode, size, mtime)
503 self._map.set_clean(filename, mode, size, mtime)
504 if mtime > self._lastnormaltime:
504 if mtime > self._lastnormaltime:
505 # Remember the most recent modification timeslot for status(),
505 # Remember the most recent modification timeslot for status(),
506 # to make sure we won't miss future size-preserving file content
506 # to make sure we won't miss future size-preserving file content
507 # modifications that happen within the same timeslot.
507 # modifications that happen within the same timeslot.
508 self._lastnormaltime = mtime
508 self._lastnormaltime = mtime
509
509
510 @requires_no_parents_change
510 @requires_no_parents_change
511 def set_possibly_dirty(self, filename):
511 def set_possibly_dirty(self, filename):
512 """record that the current state of the file on disk is unknown"""
512 """record that the current state of the file on disk is unknown"""
513 self._dirty = True
513 self._dirty = True
514 self._map.set_possibly_dirty(filename)
514 self._map.set_possibly_dirty(filename)
515
515
516 @requires_parents_change
516 @requires_parents_change
517 def update_file_p1(
517 def update_file_p1(
518 self,
518 self,
519 filename,
519 filename,
520 p1_tracked,
520 p1_tracked,
521 ):
521 ):
522 """Set a file as tracked in the parent (or not)
522 """Set a file as tracked in the parent (or not)
523
523
524 This is to be called when adjust the dirstate to a new parent after an history
524 This is to be called when adjust the dirstate to a new parent after an history
525 rewriting operation.
525 rewriting operation.
526
526
527 It should not be called during a merge (p2 != nullid) and only within
527 It should not be called during a merge (p2 != nullid) and only within
528 a `with dirstate.parentchange():` context.
528 a `with dirstate.parentchange():` context.
529 """
529 """
530 if self.in_merge:
530 if self.in_merge:
531 msg = b'update_file_reference should not be called when merging'
531 msg = b'update_file_reference should not be called when merging'
532 raise error.ProgrammingError(msg)
532 raise error.ProgrammingError(msg)
533 entry = self._map.get(filename)
533 entry = self._map.get(filename)
534 if entry is None:
534 if entry is None:
535 wc_tracked = False
535 wc_tracked = False
536 else:
536 else:
537 wc_tracked = entry.tracked
537 wc_tracked = entry.tracked
538 if not (p1_tracked or wc_tracked):
538 if not (p1_tracked or wc_tracked):
539 # the file is no longer relevant to anyone
539 # the file is no longer relevant to anyone
540 if self._map.get(filename) is not None:
540 if self._map.get(filename) is not None:
541 self._map.reset_state(filename)
541 self._map.reset_state(filename)
542 self._dirty = True
542 self._dirty = True
543 elif (not p1_tracked) and wc_tracked:
543 elif (not p1_tracked) and wc_tracked:
544 if entry is not None and entry.added:
544 if entry is not None and entry.added:
545 return # avoid dropping copy information (maybe?)
545 return # avoid dropping copy information (maybe?)
546
546
547 parentfiledata = None
547 parentfiledata = None
548 if wc_tracked and p1_tracked:
548 if wc_tracked and p1_tracked:
549 parentfiledata = self._get_filedata(filename)
549 parentfiledata = self._get_filedata(filename)
550
550
551 self._map.reset_state(
551 self._map.reset_state(
552 filename,
552 filename,
553 wc_tracked,
553 wc_tracked,
554 p1_tracked,
554 p1_tracked,
555 # the underlying reference might have changed, we will have to
555 # the underlying reference might have changed, we will have to
556 # check it.
556 # check it.
557 has_meaningful_mtime=False,
557 has_meaningful_mtime=False,
558 parentfiledata=parentfiledata,
558 parentfiledata=parentfiledata,
559 )
559 )
560 if (
560 if (
561 parentfiledata is not None
561 parentfiledata is not None
562 and parentfiledata[2] > self._lastnormaltime
562 and parentfiledata[2] > self._lastnormaltime
563 ):
563 ):
564 # Remember the most recent modification timeslot for status(),
564 # Remember the most recent modification timeslot for status(),
565 # to make sure we won't miss future size-preserving file content
565 # to make sure we won't miss future size-preserving file content
566 # modifications that happen within the same timeslot.
566 # modifications that happen within the same timeslot.
567 self._lastnormaltime = parentfiledata[2]
567 self._lastnormaltime = parentfiledata[2]
568
568
569 @requires_parents_change
569 @requires_parents_change
570 def update_file(
570 def update_file(
571 self,
571 self,
572 filename,
572 filename,
573 wc_tracked,
573 wc_tracked,
574 p1_tracked,
574 p1_tracked,
575 p2_info=False,
575 p2_info=False,
576 possibly_dirty=False,
576 possibly_dirty=False,
577 parentfiledata=None,
577 parentfiledata=None,
578 ):
578 ):
579 """update the information about a file in the dirstate
579 """update the information about a file in the dirstate
580
580
581 This is to be called when the direstates parent changes to keep track
581 This is to be called when the direstates parent changes to keep track
582 of what is the file situation in regards to the working copy and its parent.
582 of what is the file situation in regards to the working copy and its parent.
583
583
584 This function must be called within a `dirstate.parentchange` context.
584 This function must be called within a `dirstate.parentchange` context.
585
585
586 note: the API is at an early stage and we might need to adjust it
586 note: the API is at an early stage and we might need to adjust it
587 depending of what information ends up being relevant and useful to
587 depending of what information ends up being relevant and useful to
588 other processing.
588 other processing.
589 """
589 """
590
590
591 # note: I do not think we need to double check name clash here since we
591 # note: I do not think we need to double check name clash here since we
592 # are in a update/merge case that should already have taken care of
592 # are in a update/merge case that should already have taken care of
593 # this. The test agrees
593 # this. The test agrees
594
594
595 self._dirty = True
595 self._dirty = True
596
596
597 need_parent_file_data = (
597 need_parent_file_data = (
598 not possibly_dirty and not p2_info and wc_tracked and p1_tracked
598 not possibly_dirty and not p2_info and wc_tracked and p1_tracked
599 )
599 )
600
600
601 if need_parent_file_data and parentfiledata is None:
601 if need_parent_file_data and parentfiledata is None:
602 parentfiledata = self._get_filedata(filename)
602 parentfiledata = self._get_filedata(filename)
603
603
604 self._map.reset_state(
604 self._map.reset_state(
605 filename,
605 filename,
606 wc_tracked,
606 wc_tracked,
607 p1_tracked,
607 p1_tracked,
608 p2_info=p2_info,
608 p2_info=p2_info,
609 has_meaningful_mtime=not possibly_dirty,
609 has_meaningful_mtime=not possibly_dirty,
610 parentfiledata=parentfiledata,
610 parentfiledata=parentfiledata,
611 )
611 )
612 if (
612 if (
613 parentfiledata is not None
613 parentfiledata is not None
614 and parentfiledata[2] > self._lastnormaltime
614 and parentfiledata[2] > self._lastnormaltime
615 ):
615 ):
616 # Remember the most recent modification timeslot for status(),
616 # Remember the most recent modification timeslot for status(),
617 # to make sure we won't miss future size-preserving file content
617 # to make sure we won't miss future size-preserving file content
618 # modifications that happen within the same timeslot.
618 # modifications that happen within the same timeslot.
619 self._lastnormaltime = parentfiledata[2]
619 self._lastnormaltime = parentfiledata[2]
620
620
621 def _check_new_tracked_filename(self, filename):
621 def _check_new_tracked_filename(self, filename):
622 scmutil.checkfilename(filename)
622 scmutil.checkfilename(filename)
623 if self._map.hastrackeddir(filename):
623 if self._map.hastrackeddir(filename):
624 msg = _(b'directory %r already in dirstate')
624 msg = _(b'directory %r already in dirstate')
625 msg %= pycompat.bytestr(filename)
625 msg %= pycompat.bytestr(filename)
626 raise error.Abort(msg)
626 raise error.Abort(msg)
627 # shadows
627 # shadows
628 for d in pathutil.finddirs(filename):
628 for d in pathutil.finddirs(filename):
629 if self._map.hastrackeddir(d):
629 if self._map.hastrackeddir(d):
630 break
630 break
631 entry = self._map.get(d)
631 entry = self._map.get(d)
632 if entry is not None and not entry.removed:
632 if entry is not None and not entry.removed:
633 msg = _(b'file %r in dirstate clashes with %r')
633 msg = _(b'file %r in dirstate clashes with %r')
634 msg %= (pycompat.bytestr(d), pycompat.bytestr(filename))
634 msg %= (pycompat.bytestr(d), pycompat.bytestr(filename))
635 raise error.Abort(msg)
635 raise error.Abort(msg)
636
636
637 def _get_filedata(self, filename):
637 def _get_filedata(self, filename):
638 """returns"""
638 """returns"""
639 s = os.lstat(self._join(filename))
639 s = os.lstat(self._join(filename))
640 mode = s.st_mode
640 mode = s.st_mode
641 size = s.st_size
641 size = s.st_size
642 mtime = timestamp.mtime_of(s)
642 mtime = timestamp.mtime_of(s)
643 return (mode, size, mtime)
643 return (mode, size, mtime)
644
644
645 def _discoverpath(self, path, normed, ignoremissing, exists, storemap):
645 def _discoverpath(self, path, normed, ignoremissing, exists, storemap):
646 if exists is None:
646 if exists is None:
647 exists = os.path.lexists(os.path.join(self._root, path))
647 exists = os.path.lexists(os.path.join(self._root, path))
648 if not exists:
648 if not exists:
649 # Maybe a path component exists
649 # Maybe a path component exists
650 if not ignoremissing and b'/' in path:
650 if not ignoremissing and b'/' in path:
651 d, f = path.rsplit(b'/', 1)
651 d, f = path.rsplit(b'/', 1)
652 d = self._normalize(d, False, ignoremissing, None)
652 d = self._normalize(d, False, ignoremissing, None)
653 folded = d + b"/" + f
653 folded = d + b"/" + f
654 else:
654 else:
655 # No path components, preserve original case
655 # No path components, preserve original case
656 folded = path
656 folded = path
657 else:
657 else:
658 # recursively normalize leading directory components
658 # recursively normalize leading directory components
659 # against dirstate
659 # against dirstate
660 if b'/' in normed:
660 if b'/' in normed:
661 d, f = normed.rsplit(b'/', 1)
661 d, f = normed.rsplit(b'/', 1)
662 d = self._normalize(d, False, ignoremissing, True)
662 d = self._normalize(d, False, ignoremissing, True)
663 r = self._root + b"/" + d
663 r = self._root + b"/" + d
664 folded = d + b"/" + util.fspath(f, r)
664 folded = d + b"/" + util.fspath(f, r)
665 else:
665 else:
666 folded = util.fspath(normed, self._root)
666 folded = util.fspath(normed, self._root)
667 storemap[normed] = folded
667 storemap[normed] = folded
668
668
669 return folded
669 return folded
670
670
671 def _normalizefile(self, path, isknown, ignoremissing=False, exists=None):
671 def _normalizefile(self, path, isknown, ignoremissing=False, exists=None):
672 normed = util.normcase(path)
672 normed = util.normcase(path)
673 folded = self._map.filefoldmap.get(normed, None)
673 folded = self._map.filefoldmap.get(normed, None)
674 if folded is None:
674 if folded is None:
675 if isknown:
675 if isknown:
676 folded = path
676 folded = path
677 else:
677 else:
678 folded = self._discoverpath(
678 folded = self._discoverpath(
679 path, normed, ignoremissing, exists, self._map.filefoldmap
679 path, normed, ignoremissing, exists, self._map.filefoldmap
680 )
680 )
681 return folded
681 return folded
682
682
683 def _normalize(self, path, isknown, ignoremissing=False, exists=None):
683 def _normalize(self, path, isknown, ignoremissing=False, exists=None):
684 normed = util.normcase(path)
684 normed = util.normcase(path)
685 folded = self._map.filefoldmap.get(normed, None)
685 folded = self._map.filefoldmap.get(normed, None)
686 if folded is None:
686 if folded is None:
687 folded = self._map.dirfoldmap.get(normed, None)
687 folded = self._map.dirfoldmap.get(normed, None)
688 if folded is None:
688 if folded is None:
689 if isknown:
689 if isknown:
690 folded = path
690 folded = path
691 else:
691 else:
692 # store discovered result in dirfoldmap so that future
692 # store discovered result in dirfoldmap so that future
693 # normalizefile calls don't start matching directories
693 # normalizefile calls don't start matching directories
694 folded = self._discoverpath(
694 folded = self._discoverpath(
695 path, normed, ignoremissing, exists, self._map.dirfoldmap
695 path, normed, ignoremissing, exists, self._map.dirfoldmap
696 )
696 )
697 return folded
697 return folded
698
698
699 def normalize(self, path, isknown=False, ignoremissing=False):
699 def normalize(self, path, isknown=False, ignoremissing=False):
700 """
700 """
701 normalize the case of a pathname when on a casefolding filesystem
701 normalize the case of a pathname when on a casefolding filesystem
702
702
703 isknown specifies whether the filename came from walking the
703 isknown specifies whether the filename came from walking the
704 disk, to avoid extra filesystem access.
704 disk, to avoid extra filesystem access.
705
705
706 If ignoremissing is True, missing path are returned
706 If ignoremissing is True, missing path are returned
707 unchanged. Otherwise, we try harder to normalize possibly
707 unchanged. Otherwise, we try harder to normalize possibly
708 existing path components.
708 existing path components.
709
709
710 The normalized case is determined based on the following precedence:
710 The normalized case is determined based on the following precedence:
711
711
712 - version of name already stored in the dirstate
712 - version of name already stored in the dirstate
713 - version of name stored on disk
713 - version of name stored on disk
714 - version provided via command arguments
714 - version provided via command arguments
715 """
715 """
716
716
717 if self._checkcase:
717 if self._checkcase:
718 return self._normalize(path, isknown, ignoremissing)
718 return self._normalize(path, isknown, ignoremissing)
719 return path
719 return path
720
720
721 def clear(self):
721 def clear(self):
722 self._map.clear()
722 self._map.clear()
723 self._lastnormaltime = timestamp.zero()
723 self._lastnormaltime = timestamp.zero()
724 self._dirty = True
724 self._dirty = True
725
725
726 def rebuild(self, parent, allfiles, changedfiles=None):
726 def rebuild(self, parent, allfiles, changedfiles=None):
727 if changedfiles is None:
727 if changedfiles is None:
728 # Rebuild entire dirstate
728 # Rebuild entire dirstate
729 to_lookup = allfiles
729 to_lookup = allfiles
730 to_drop = []
730 to_drop = []
731 lastnormaltime = self._lastnormaltime
731 lastnormaltime = self._lastnormaltime
732 self.clear()
732 self.clear()
733 self._lastnormaltime = lastnormaltime
733 self._lastnormaltime = lastnormaltime
734 elif len(changedfiles) < 10:
734 elif len(changedfiles) < 10:
735 # Avoid turning allfiles into a set, which can be expensive if it's
735 # Avoid turning allfiles into a set, which can be expensive if it's
736 # large.
736 # large.
737 to_lookup = []
737 to_lookup = []
738 to_drop = []
738 to_drop = []
739 for f in changedfiles:
739 for f in changedfiles:
740 if f in allfiles:
740 if f in allfiles:
741 to_lookup.append(f)
741 to_lookup.append(f)
742 else:
742 else:
743 to_drop.append(f)
743 to_drop.append(f)
744 else:
744 else:
745 changedfilesset = set(changedfiles)
745 changedfilesset = set(changedfiles)
746 to_lookup = changedfilesset & set(allfiles)
746 to_lookup = changedfilesset & set(allfiles)
747 to_drop = changedfilesset - to_lookup
747 to_drop = changedfilesset - to_lookup
748
748
749 if self._origpl is None:
749 if self._origpl is None:
750 self._origpl = self._pl
750 self._origpl = self._pl
751 self._map.setparents(parent, self._nodeconstants.nullid)
751 self._map.setparents(parent, self._nodeconstants.nullid)
752
752
753 for f in to_lookup:
753 for f in to_lookup:
754
754
755 if self.in_merge:
755 if self.in_merge:
756 self.set_tracked(f)
756 self.set_tracked(f)
757 else:
757 else:
758 self._map.reset_state(
758 self._map.reset_state(
759 f,
759 f,
760 wc_tracked=True,
760 wc_tracked=True,
761 p1_tracked=True,
761 p1_tracked=True,
762 )
762 )
763 for f in to_drop:
763 for f in to_drop:
764 self._map.reset_state(f)
764 self._map.reset_state(f)
765
765
766 self._dirty = True
766 self._dirty = True
767
767
768 def identity(self):
768 def identity(self):
769 """Return identity of dirstate itself to detect changing in storage
769 """Return identity of dirstate itself to detect changing in storage
770
770
771 If identity of previous dirstate is equal to this, writing
771 If identity of previous dirstate is equal to this, writing
772 changes based on the former dirstate out can keep consistency.
772 changes based on the former dirstate out can keep consistency.
773 """
773 """
774 return self._map.identity
774 return self._map.identity
775
775
776 def write(self, tr):
776 def write(self, tr):
777 if not self._dirty:
777 if not self._dirty:
778 return
778 return
779
779
780 filename = self._filename
780 filename = self._filename
781 if tr:
781 if tr:
782 # 'dirstate.write()' is not only for writing in-memory
782 # 'dirstate.write()' is not only for writing in-memory
783 # changes out, but also for dropping ambiguous timestamp.
783 # changes out, but also for dropping ambiguous timestamp.
784 # delayed writing re-raise "ambiguous timestamp issue".
784 # delayed writing re-raise "ambiguous timestamp issue".
785 # See also the wiki page below for detail:
785 # See also the wiki page below for detail:
786 # https://www.mercurial-scm.org/wiki/DirstateTransactionPlan
786 # https://www.mercurial-scm.org/wiki/DirstateTransactionPlan
787
787
788 # record when mtime start to be ambiguous
788 # record when mtime start to be ambiguous
789 now = _getfsnow(self._opener)
789 now = _getfsnow(self._opener)
790
790
791 # delay writing in-memory changes out
791 # delay writing in-memory changes out
792 tr.addfilegenerator(
792 tr.addfilegenerator(
793 b'dirstate',
793 b'dirstate',
794 (self._filename,),
794 (self._filename,),
795 lambda f: self._writedirstate(tr, f, now=now),
795 lambda f: self._writedirstate(tr, f, now=now),
796 location=b'plain',
796 location=b'plain',
797 )
797 )
798 return
798 return
799
799
800 st = self._opener(filename, b"w", atomictemp=True, checkambig=True)
800 st = self._opener(filename, b"w", atomictemp=True, checkambig=True)
801 self._writedirstate(tr, st)
801 self._writedirstate(tr, st)
802
802
803 def addparentchangecallback(self, category, callback):
803 def addparentchangecallback(self, category, callback):
804 """add a callback to be called when the wd parents are changed
804 """add a callback to be called when the wd parents are changed
805
805
806 Callback will be called with the following arguments:
806 Callback will be called with the following arguments:
807 dirstate, (oldp1, oldp2), (newp1, newp2)
807 dirstate, (oldp1, oldp2), (newp1, newp2)
808
808
809 Category is a unique identifier to allow overwriting an old callback
809 Category is a unique identifier to allow overwriting an old callback
810 with a newer callback.
810 with a newer callback.
811 """
811 """
812 self._plchangecallbacks[category] = callback
812 self._plchangecallbacks[category] = callback
813
813
814 def _writedirstate(self, tr, st, now=None):
814 def _writedirstate(self, tr, st, now=None):
815 # notify callbacks about parents change
815 # notify callbacks about parents change
816 if self._origpl is not None and self._origpl != self._pl:
816 if self._origpl is not None and self._origpl != self._pl:
817 for c, callback in sorted(
817 for c, callback in sorted(
818 pycompat.iteritems(self._plchangecallbacks)
818 pycompat.iteritems(self._plchangecallbacks)
819 ):
819 ):
820 callback(self, self._origpl, self._pl)
820 callback(self, self._origpl, self._pl)
821 self._origpl = None
821 self._origpl = None
822
822
823 if now is None:
823 if now is None:
824 # use the modification time of the newly created temporary file as the
824 # use the modification time of the newly created temporary file as the
825 # filesystem's notion of 'now'
825 # filesystem's notion of 'now'
826 now = timestamp.mtime_of(util.fstat(st))
826 now = timestamp.mtime_of(util.fstat(st))
827
827
828 # enough 'delaywrite' prevents 'pack_dirstate' from dropping
828 # enough 'delaywrite' prevents 'pack_dirstate' from dropping
829 # timestamp of each entries in dirstate, because of 'now > mtime'
829 # timestamp of each entries in dirstate, because of 'now > mtime'
830 delaywrite = self._ui.configint(b'debug', b'dirstate.delaywrite')
830 delaywrite = self._ui.configint(b'debug', b'dirstate.delaywrite')
831 if delaywrite > 0:
831 if delaywrite > 0:
832 # do we have any files to delay for?
832 # do we have any files to delay for?
833 for f, e in pycompat.iteritems(self._map):
833 for f, e in pycompat.iteritems(self._map):
834 if e.need_delay(now):
834 if e.need_delay(now):
835 import time # to avoid useless import
835 import time # to avoid useless import
836
836
837 # rather than sleep n seconds, sleep until the next
837 # rather than sleep n seconds, sleep until the next
838 # multiple of n seconds
838 # multiple of n seconds
839 clock = time.time()
839 clock = time.time()
840 start = int(clock) - (int(clock) % delaywrite)
840 start = int(clock) - (int(clock) % delaywrite)
841 end = start + delaywrite
841 end = start + delaywrite
842 time.sleep(end - clock)
842 time.sleep(end - clock)
843 # trust our estimate that the end is near now
843 # trust our estimate that the end is near now
844 now = timestamp.timestamp((end, 0))
844 now = timestamp.timestamp((end, 0))
845 break
845 break
846
846
847 self._map.write(tr, st, now)
847 self._map.write(tr, st, now)
848 self._lastnormaltime = timestamp.zero()
848 self._lastnormaltime = timestamp.zero()
849 self._dirty = False
849 self._dirty = False
850
850
851 def _dirignore(self, f):
851 def _dirignore(self, f):
852 if self._ignore(f):
852 if self._ignore(f):
853 return True
853 return True
854 for p in pathutil.finddirs(f):
854 for p in pathutil.finddirs(f):
855 if self._ignore(p):
855 if self._ignore(p):
856 return True
856 return True
857 return False
857 return False
858
858
859 def _ignorefiles(self):
859 def _ignorefiles(self):
860 files = []
860 files = []
861 if os.path.exists(self._join(b'.hgignore')):
861 if os.path.exists(self._join(b'.hgignore')):
862 files.append(self._join(b'.hgignore'))
862 files.append(self._join(b'.hgignore'))
863 for name, path in self._ui.configitems(b"ui"):
863 for name, path in self._ui.configitems(b"ui"):
864 if name == b'ignore' or name.startswith(b'ignore.'):
864 if name == b'ignore' or name.startswith(b'ignore.'):
865 # we need to use os.path.join here rather than self._join
865 # we need to use os.path.join here rather than self._join
866 # because path is arbitrary and user-specified
866 # because path is arbitrary and user-specified
867 files.append(os.path.join(self._rootdir, util.expandpath(path)))
867 files.append(os.path.join(self._rootdir, util.expandpath(path)))
868 return files
868 return files
869
869
870 def _ignorefileandline(self, f):
870 def _ignorefileandline(self, f):
871 files = collections.deque(self._ignorefiles())
871 files = collections.deque(self._ignorefiles())
872 visited = set()
872 visited = set()
873 while files:
873 while files:
874 i = files.popleft()
874 i = files.popleft()
875 patterns = matchmod.readpatternfile(
875 patterns = matchmod.readpatternfile(
876 i, self._ui.warn, sourceinfo=True
876 i, self._ui.warn, sourceinfo=True
877 )
877 )
878 for pattern, lineno, line in patterns:
878 for pattern, lineno, line in patterns:
879 kind, p = matchmod._patsplit(pattern, b'glob')
879 kind, p = matchmod._patsplit(pattern, b'glob')
880 if kind == b"subinclude":
880 if kind == b"subinclude":
881 if p not in visited:
881 if p not in visited:
882 files.append(p)
882 files.append(p)
883 continue
883 continue
884 m = matchmod.match(
884 m = matchmod.match(
885 self._root, b'', [], [pattern], warn=self._ui.warn
885 self._root, b'', [], [pattern], warn=self._ui.warn
886 )
886 )
887 if m(f):
887 if m(f):
888 return (i, lineno, line)
888 return (i, lineno, line)
889 visited.add(i)
889 visited.add(i)
890 return (None, -1, b"")
890 return (None, -1, b"")
891
891
892 def _walkexplicit(self, match, subrepos):
892 def _walkexplicit(self, match, subrepos):
893 """Get stat data about the files explicitly specified by match.
893 """Get stat data about the files explicitly specified by match.
894
894
895 Return a triple (results, dirsfound, dirsnotfound).
895 Return a triple (results, dirsfound, dirsnotfound).
896 - results is a mapping from filename to stat result. It also contains
896 - results is a mapping from filename to stat result. It also contains
897 listings mapping subrepos and .hg to None.
897 listings mapping subrepos and .hg to None.
898 - dirsfound is a list of files found to be directories.
898 - dirsfound is a list of files found to be directories.
899 - dirsnotfound is a list of files that the dirstate thinks are
899 - dirsnotfound is a list of files that the dirstate thinks are
900 directories and that were not found."""
900 directories and that were not found."""
901
901
902 def badtype(mode):
902 def badtype(mode):
903 kind = _(b'unknown')
903 kind = _(b'unknown')
904 if stat.S_ISCHR(mode):
904 if stat.S_ISCHR(mode):
905 kind = _(b'character device')
905 kind = _(b'character device')
906 elif stat.S_ISBLK(mode):
906 elif stat.S_ISBLK(mode):
907 kind = _(b'block device')
907 kind = _(b'block device')
908 elif stat.S_ISFIFO(mode):
908 elif stat.S_ISFIFO(mode):
909 kind = _(b'fifo')
909 kind = _(b'fifo')
910 elif stat.S_ISSOCK(mode):
910 elif stat.S_ISSOCK(mode):
911 kind = _(b'socket')
911 kind = _(b'socket')
912 elif stat.S_ISDIR(mode):
912 elif stat.S_ISDIR(mode):
913 kind = _(b'directory')
913 kind = _(b'directory')
914 return _(b'unsupported file type (type is %s)') % kind
914 return _(b'unsupported file type (type is %s)') % kind
915
915
916 badfn = match.bad
916 badfn = match.bad
917 dmap = self._map
917 dmap = self._map
918 lstat = os.lstat
918 lstat = os.lstat
919 getkind = stat.S_IFMT
919 getkind = stat.S_IFMT
920 dirkind = stat.S_IFDIR
920 dirkind = stat.S_IFDIR
921 regkind = stat.S_IFREG
921 regkind = stat.S_IFREG
922 lnkkind = stat.S_IFLNK
922 lnkkind = stat.S_IFLNK
923 join = self._join
923 join = self._join
924 dirsfound = []
924 dirsfound = []
925 foundadd = dirsfound.append
925 foundadd = dirsfound.append
926 dirsnotfound = []
926 dirsnotfound = []
927 notfoundadd = dirsnotfound.append
927 notfoundadd = dirsnotfound.append
928
928
929 if not match.isexact() and self._checkcase:
929 if not match.isexact() and self._checkcase:
930 normalize = self._normalize
930 normalize = self._normalize
931 else:
931 else:
932 normalize = None
932 normalize = None
933
933
934 files = sorted(match.files())
934 files = sorted(match.files())
935 subrepos.sort()
935 subrepos.sort()
936 i, j = 0, 0
936 i, j = 0, 0
937 while i < len(files) and j < len(subrepos):
937 while i < len(files) and j < len(subrepos):
938 subpath = subrepos[j] + b"/"
938 subpath = subrepos[j] + b"/"
939 if files[i] < subpath:
939 if files[i] < subpath:
940 i += 1
940 i += 1
941 continue
941 continue
942 while i < len(files) and files[i].startswith(subpath):
942 while i < len(files) and files[i].startswith(subpath):
943 del files[i]
943 del files[i]
944 j += 1
944 j += 1
945
945
946 if not files or b'' in files:
946 if not files or b'' in files:
947 files = [b'']
947 files = [b'']
948 # constructing the foldmap is expensive, so don't do it for the
948 # constructing the foldmap is expensive, so don't do it for the
949 # common case where files is ['']
949 # common case where files is ['']
950 normalize = None
950 normalize = None
951 results = dict.fromkeys(subrepos)
951 results = dict.fromkeys(subrepos)
952 results[b'.hg'] = None
952 results[b'.hg'] = None
953
953
954 for ff in files:
954 for ff in files:
955 if normalize:
955 if normalize:
956 nf = normalize(ff, False, True)
956 nf = normalize(ff, False, True)
957 else:
957 else:
958 nf = ff
958 nf = ff
959 if nf in results:
959 if nf in results:
960 continue
960 continue
961
961
962 try:
962 try:
963 st = lstat(join(nf))
963 st = lstat(join(nf))
964 kind = getkind(st.st_mode)
964 kind = getkind(st.st_mode)
965 if kind == dirkind:
965 if kind == dirkind:
966 if nf in dmap:
966 if nf in dmap:
967 # file replaced by dir on disk but still in dirstate
967 # file replaced by dir on disk but still in dirstate
968 results[nf] = None
968 results[nf] = None
969 foundadd((nf, ff))
969 foundadd((nf, ff))
970 elif kind == regkind or kind == lnkkind:
970 elif kind == regkind or kind == lnkkind:
971 results[nf] = st
971 results[nf] = st
972 else:
972 else:
973 badfn(ff, badtype(kind))
973 badfn(ff, badtype(kind))
974 if nf in dmap:
974 if nf in dmap:
975 results[nf] = None
975 results[nf] = None
976 except OSError as inst: # nf not found on disk - it is dirstate only
976 except OSError as inst: # nf not found on disk - it is dirstate only
977 if nf in dmap: # does it exactly match a missing file?
977 if nf in dmap: # does it exactly match a missing file?
978 results[nf] = None
978 results[nf] = None
979 else: # does it match a missing directory?
979 else: # does it match a missing directory?
980 if self._map.hasdir(nf):
980 if self._map.hasdir(nf):
981 notfoundadd(nf)
981 notfoundadd(nf)
982 else:
982 else:
983 badfn(ff, encoding.strtolocal(inst.strerror))
983 badfn(ff, encoding.strtolocal(inst.strerror))
984
984
985 # match.files() may contain explicitly-specified paths that shouldn't
985 # match.files() may contain explicitly-specified paths that shouldn't
986 # be taken; drop them from the list of files found. dirsfound/notfound
986 # be taken; drop them from the list of files found. dirsfound/notfound
987 # aren't filtered here because they will be tested later.
987 # aren't filtered here because they will be tested later.
988 if match.anypats():
988 if match.anypats():
989 for f in list(results):
989 for f in list(results):
990 if f == b'.hg' or f in subrepos:
990 if f == b'.hg' or f in subrepos:
991 # keep sentinel to disable further out-of-repo walks
991 # keep sentinel to disable further out-of-repo walks
992 continue
992 continue
993 if not match(f):
993 if not match(f):
994 del results[f]
994 del results[f]
995
995
996 # Case insensitive filesystems cannot rely on lstat() failing to detect
996 # Case insensitive filesystems cannot rely on lstat() failing to detect
997 # a case-only rename. Prune the stat object for any file that does not
997 # a case-only rename. Prune the stat object for any file that does not
998 # match the case in the filesystem, if there are multiple files that
998 # match the case in the filesystem, if there are multiple files that
999 # normalize to the same path.
999 # normalize to the same path.
1000 if match.isexact() and self._checkcase:
1000 if match.isexact() and self._checkcase:
1001 normed = {}
1001 normed = {}
1002
1002
1003 for f, st in pycompat.iteritems(results):
1003 for f, st in pycompat.iteritems(results):
1004 if st is None:
1004 if st is None:
1005 continue
1005 continue
1006
1006
1007 nc = util.normcase(f)
1007 nc = util.normcase(f)
1008 paths = normed.get(nc)
1008 paths = normed.get(nc)
1009
1009
1010 if paths is None:
1010 if paths is None:
1011 paths = set()
1011 paths = set()
1012 normed[nc] = paths
1012 normed[nc] = paths
1013
1013
1014 paths.add(f)
1014 paths.add(f)
1015
1015
1016 for norm, paths in pycompat.iteritems(normed):
1016 for norm, paths in pycompat.iteritems(normed):
1017 if len(paths) > 1:
1017 if len(paths) > 1:
1018 for path in paths:
1018 for path in paths:
1019 folded = self._discoverpath(
1019 folded = self._discoverpath(
1020 path, norm, True, None, self._map.dirfoldmap
1020 path, norm, True, None, self._map.dirfoldmap
1021 )
1021 )
1022 if path != folded:
1022 if path != folded:
1023 results[path] = None
1023 results[path] = None
1024
1024
1025 return results, dirsfound, dirsnotfound
1025 return results, dirsfound, dirsnotfound
1026
1026
1027 def walk(self, match, subrepos, unknown, ignored, full=True):
1027 def walk(self, match, subrepos, unknown, ignored, full=True):
1028 """
1028 """
1029 Walk recursively through the directory tree, finding all files
1029 Walk recursively through the directory tree, finding all files
1030 matched by match.
1030 matched by match.
1031
1031
1032 If full is False, maybe skip some known-clean files.
1032 If full is False, maybe skip some known-clean files.
1033
1033
1034 Return a dict mapping filename to stat-like object (either
1034 Return a dict mapping filename to stat-like object (either
1035 mercurial.osutil.stat instance or return value of os.stat()).
1035 mercurial.osutil.stat instance or return value of os.stat()).
1036
1036
1037 """
1037 """
1038 # full is a flag that extensions that hook into walk can use -- this
1038 # full is a flag that extensions that hook into walk can use -- this
1039 # implementation doesn't use it at all. This satisfies the contract
1039 # implementation doesn't use it at all. This satisfies the contract
1040 # because we only guarantee a "maybe".
1040 # because we only guarantee a "maybe".
1041
1041
1042 if ignored:
1042 if ignored:
1043 ignore = util.never
1043 ignore = util.never
1044 dirignore = util.never
1044 dirignore = util.never
1045 elif unknown:
1045 elif unknown:
1046 ignore = self._ignore
1046 ignore = self._ignore
1047 dirignore = self._dirignore
1047 dirignore = self._dirignore
1048 else:
1048 else:
1049 # if not unknown and not ignored, drop dir recursion and step 2
1049 # if not unknown and not ignored, drop dir recursion and step 2
1050 ignore = util.always
1050 ignore = util.always
1051 dirignore = util.always
1051 dirignore = util.always
1052
1052
1053 matchfn = match.matchfn
1053 matchfn = match.matchfn
1054 matchalways = match.always()
1054 matchalways = match.always()
1055 matchtdir = match.traversedir
1055 matchtdir = match.traversedir
1056 dmap = self._map
1056 dmap = self._map
1057 listdir = util.listdir
1057 listdir = util.listdir
1058 lstat = os.lstat
1058 lstat = os.lstat
1059 dirkind = stat.S_IFDIR
1059 dirkind = stat.S_IFDIR
1060 regkind = stat.S_IFREG
1060 regkind = stat.S_IFREG
1061 lnkkind = stat.S_IFLNK
1061 lnkkind = stat.S_IFLNK
1062 join = self._join
1062 join = self._join
1063
1063
1064 exact = skipstep3 = False
1064 exact = skipstep3 = False
1065 if match.isexact(): # match.exact
1065 if match.isexact(): # match.exact
1066 exact = True
1066 exact = True
1067 dirignore = util.always # skip step 2
1067 dirignore = util.always # skip step 2
1068 elif match.prefix(): # match.match, no patterns
1068 elif match.prefix(): # match.match, no patterns
1069 skipstep3 = True
1069 skipstep3 = True
1070
1070
1071 if not exact and self._checkcase:
1071 if not exact and self._checkcase:
1072 normalize = self._normalize
1072 normalize = self._normalize
1073 normalizefile = self._normalizefile
1073 normalizefile = self._normalizefile
1074 skipstep3 = False
1074 skipstep3 = False
1075 else:
1075 else:
1076 normalize = self._normalize
1076 normalize = self._normalize
1077 normalizefile = None
1077 normalizefile = None
1078
1078
1079 # step 1: find all explicit files
1079 # step 1: find all explicit files
1080 results, work, dirsnotfound = self._walkexplicit(match, subrepos)
1080 results, work, dirsnotfound = self._walkexplicit(match, subrepos)
1081 if matchtdir:
1081 if matchtdir:
1082 for d in work:
1082 for d in work:
1083 matchtdir(d[0])
1083 matchtdir(d[0])
1084 for d in dirsnotfound:
1084 for d in dirsnotfound:
1085 matchtdir(d)
1085 matchtdir(d)
1086
1086
1087 skipstep3 = skipstep3 and not (work or dirsnotfound)
1087 skipstep3 = skipstep3 and not (work or dirsnotfound)
1088 work = [d for d in work if not dirignore(d[0])]
1088 work = [d for d in work if not dirignore(d[0])]
1089
1089
1090 # step 2: visit subdirectories
1090 # step 2: visit subdirectories
1091 def traverse(work, alreadynormed):
1091 def traverse(work, alreadynormed):
1092 wadd = work.append
1092 wadd = work.append
1093 while work:
1093 while work:
1094 tracing.counter('dirstate.walk work', len(work))
1094 tracing.counter('dirstate.walk work', len(work))
1095 nd = work.pop()
1095 nd = work.pop()
1096 visitentries = match.visitchildrenset(nd)
1096 visitentries = match.visitchildrenset(nd)
1097 if not visitentries:
1097 if not visitentries:
1098 continue
1098 continue
1099 if visitentries == b'this' or visitentries == b'all':
1099 if visitentries == b'this' or visitentries == b'all':
1100 visitentries = None
1100 visitentries = None
1101 skip = None
1101 skip = None
1102 if nd != b'':
1102 if nd != b'':
1103 skip = b'.hg'
1103 skip = b'.hg'
1104 try:
1104 try:
1105 with tracing.log('dirstate.walk.traverse listdir %s', nd):
1105 with tracing.log('dirstate.walk.traverse listdir %s', nd):
1106 entries = listdir(join(nd), stat=True, skip=skip)
1106 entries = listdir(join(nd), stat=True, skip=skip)
1107 except OSError as inst:
1107 except OSError as inst:
1108 if inst.errno in (errno.EACCES, errno.ENOENT):
1108 if inst.errno in (errno.EACCES, errno.ENOENT):
1109 match.bad(
1109 match.bad(
1110 self.pathto(nd), encoding.strtolocal(inst.strerror)
1110 self.pathto(nd), encoding.strtolocal(inst.strerror)
1111 )
1111 )
1112 continue
1112 continue
1113 raise
1113 raise
1114 for f, kind, st in entries:
1114 for f, kind, st in entries:
1115 # Some matchers may return files in the visitentries set,
1115 # Some matchers may return files in the visitentries set,
1116 # instead of 'this', if the matcher explicitly mentions them
1116 # instead of 'this', if the matcher explicitly mentions them
1117 # and is not an exactmatcher. This is acceptable; we do not
1117 # and is not an exactmatcher. This is acceptable; we do not
1118 # make any hard assumptions about file-or-directory below
1118 # make any hard assumptions about file-or-directory below
1119 # based on the presence of `f` in visitentries. If
1119 # based on the presence of `f` in visitentries. If
1120 # visitchildrenset returned a set, we can always skip the
1120 # visitchildrenset returned a set, we can always skip the
1121 # entries *not* in the set it provided regardless of whether
1121 # entries *not* in the set it provided regardless of whether
1122 # they're actually a file or a directory.
1122 # they're actually a file or a directory.
1123 if visitentries and f not in visitentries:
1123 if visitentries and f not in visitentries:
1124 continue
1124 continue
1125 if normalizefile:
1125 if normalizefile:
1126 # even though f might be a directory, we're only
1126 # even though f might be a directory, we're only
1127 # interested in comparing it to files currently in the
1127 # interested in comparing it to files currently in the
1128 # dmap -- therefore normalizefile is enough
1128 # dmap -- therefore normalizefile is enough
1129 nf = normalizefile(
1129 nf = normalizefile(
1130 nd and (nd + b"/" + f) or f, True, True
1130 nd and (nd + b"/" + f) or f, True, True
1131 )
1131 )
1132 else:
1132 else:
1133 nf = nd and (nd + b"/" + f) or f
1133 nf = nd and (nd + b"/" + f) or f
1134 if nf not in results:
1134 if nf not in results:
1135 if kind == dirkind:
1135 if kind == dirkind:
1136 if not ignore(nf):
1136 if not ignore(nf):
1137 if matchtdir:
1137 if matchtdir:
1138 matchtdir(nf)
1138 matchtdir(nf)
1139 wadd(nf)
1139 wadd(nf)
1140 if nf in dmap and (matchalways or matchfn(nf)):
1140 if nf in dmap and (matchalways or matchfn(nf)):
1141 results[nf] = None
1141 results[nf] = None
1142 elif kind == regkind or kind == lnkkind:
1142 elif kind == regkind or kind == lnkkind:
1143 if nf in dmap:
1143 if nf in dmap:
1144 if matchalways or matchfn(nf):
1144 if matchalways or matchfn(nf):
1145 results[nf] = st
1145 results[nf] = st
1146 elif (matchalways or matchfn(nf)) and not ignore(
1146 elif (matchalways or matchfn(nf)) and not ignore(
1147 nf
1147 nf
1148 ):
1148 ):
1149 # unknown file -- normalize if necessary
1149 # unknown file -- normalize if necessary
1150 if not alreadynormed:
1150 if not alreadynormed:
1151 nf = normalize(nf, False, True)
1151 nf = normalize(nf, False, True)
1152 results[nf] = st
1152 results[nf] = st
1153 elif nf in dmap and (matchalways or matchfn(nf)):
1153 elif nf in dmap and (matchalways or matchfn(nf)):
1154 results[nf] = None
1154 results[nf] = None
1155
1155
1156 for nd, d in work:
1156 for nd, d in work:
1157 # alreadynormed means that processwork doesn't have to do any
1157 # alreadynormed means that processwork doesn't have to do any
1158 # expensive directory normalization
1158 # expensive directory normalization
1159 alreadynormed = not normalize or nd == d
1159 alreadynormed = not normalize or nd == d
1160 traverse([d], alreadynormed)
1160 traverse([d], alreadynormed)
1161
1161
1162 for s in subrepos:
1162 for s in subrepos:
1163 del results[s]
1163 del results[s]
1164 del results[b'.hg']
1164 del results[b'.hg']
1165
1165
1166 # step 3: visit remaining files from dmap
1166 # step 3: visit remaining files from dmap
1167 if not skipstep3 and not exact:
1167 if not skipstep3 and not exact:
1168 # If a dmap file is not in results yet, it was either
1168 # If a dmap file is not in results yet, it was either
1169 # a) not matching matchfn b) ignored, c) missing, or d) under a
1169 # a) not matching matchfn b) ignored, c) missing, or d) under a
1170 # symlink directory.
1170 # symlink directory.
1171 if not results and matchalways:
1171 if not results and matchalways:
1172 visit = [f for f in dmap]
1172 visit = [f for f in dmap]
1173 else:
1173 else:
1174 visit = [f for f in dmap if f not in results and matchfn(f)]
1174 visit = [f for f in dmap if f not in results and matchfn(f)]
1175 visit.sort()
1175 visit.sort()
1176
1176
1177 if unknown:
1177 if unknown:
1178 # unknown == True means we walked all dirs under the roots
1178 # unknown == True means we walked all dirs under the roots
1179 # that wasn't ignored, and everything that matched was stat'ed
1179 # that wasn't ignored, and everything that matched was stat'ed
1180 # and is already in results.
1180 # and is already in results.
1181 # The rest must thus be ignored or under a symlink.
1181 # The rest must thus be ignored or under a symlink.
1182 audit_path = pathutil.pathauditor(self._root, cached=True)
1182 audit_path = pathutil.pathauditor(self._root, cached=True)
1183
1183
1184 for nf in iter(visit):
1184 for nf in iter(visit):
1185 # If a stat for the same file was already added with a
1185 # If a stat for the same file was already added with a
1186 # different case, don't add one for this, since that would
1186 # different case, don't add one for this, since that would
1187 # make it appear as if the file exists under both names
1187 # make it appear as if the file exists under both names
1188 # on disk.
1188 # on disk.
1189 if (
1189 if (
1190 normalizefile
1190 normalizefile
1191 and normalizefile(nf, True, True) in results
1191 and normalizefile(nf, True, True) in results
1192 ):
1192 ):
1193 results[nf] = None
1193 results[nf] = None
1194 # Report ignored items in the dmap as long as they are not
1194 # Report ignored items in the dmap as long as they are not
1195 # under a symlink directory.
1195 # under a symlink directory.
1196 elif audit_path.check(nf):
1196 elif audit_path.check(nf):
1197 try:
1197 try:
1198 results[nf] = lstat(join(nf))
1198 results[nf] = lstat(join(nf))
1199 # file was just ignored, no links, and exists
1199 # file was just ignored, no links, and exists
1200 except OSError:
1200 except OSError:
1201 # file doesn't exist
1201 # file doesn't exist
1202 results[nf] = None
1202 results[nf] = None
1203 else:
1203 else:
1204 # It's either missing or under a symlink directory
1204 # It's either missing or under a symlink directory
1205 # which we in this case report as missing
1205 # which we in this case report as missing
1206 results[nf] = None
1206 results[nf] = None
1207 else:
1207 else:
1208 # We may not have walked the full directory tree above,
1208 # We may not have walked the full directory tree above,
1209 # so stat and check everything we missed.
1209 # so stat and check everything we missed.
1210 iv = iter(visit)
1210 iv = iter(visit)
1211 for st in util.statfiles([join(i) for i in visit]):
1211 for st in util.statfiles([join(i) for i in visit]):
1212 results[next(iv)] = st
1212 results[next(iv)] = st
1213 return results
1213 return results
1214
1214
1215 def _rust_status(self, matcher, list_clean, list_ignored, list_unknown):
1215 def _rust_status(self, matcher, list_clean, list_ignored, list_unknown):
1216 # Force Rayon (Rust parallelism library) to respect the number of
1216 # Force Rayon (Rust parallelism library) to respect the number of
1217 # workers. This is a temporary workaround until Rust code knows
1217 # workers. This is a temporary workaround until Rust code knows
1218 # how to read the config file.
1218 # how to read the config file.
1219 numcpus = self._ui.configint(b"worker", b"numcpus")
1219 numcpus = self._ui.configint(b"worker", b"numcpus")
1220 if numcpus is not None:
1220 if numcpus is not None:
1221 encoding.environ.setdefault(b'RAYON_NUM_THREADS', b'%d' % numcpus)
1221 encoding.environ.setdefault(b'RAYON_NUM_THREADS', b'%d' % numcpus)
1222
1222
1223 workers_enabled = self._ui.configbool(b"worker", b"enabled", True)
1223 workers_enabled = self._ui.configbool(b"worker", b"enabled", True)
1224 if not workers_enabled:
1224 if not workers_enabled:
1225 encoding.environ[b"RAYON_NUM_THREADS"] = b"1"
1225 encoding.environ[b"RAYON_NUM_THREADS"] = b"1"
1226
1226
1227 (
1227 (
1228 lookup,
1228 lookup,
1229 modified,
1229 modified,
1230 added,
1230 added,
1231 removed,
1231 removed,
1232 deleted,
1232 deleted,
1233 clean,
1233 clean,
1234 ignored,
1234 ignored,
1235 unknown,
1235 unknown,
1236 warnings,
1236 warnings,
1237 bad,
1237 bad,
1238 traversed,
1238 traversed,
1239 dirty,
1239 dirty,
1240 ) = rustmod.status(
1240 ) = rustmod.status(
1241 self._map._map,
1241 self._map._map,
1242 matcher,
1242 matcher,
1243 self._rootdir,
1243 self._rootdir,
1244 self._ignorefiles(),
1244 self._ignorefiles(),
1245 self._checkexec,
1245 self._checkexec,
1246 self._lastnormaltime,
1246 self._lastnormaltime,
1247 bool(list_clean),
1247 bool(list_clean),
1248 bool(list_ignored),
1248 bool(list_ignored),
1249 bool(list_unknown),
1249 bool(list_unknown),
1250 bool(matcher.traversedir),
1250 bool(matcher.traversedir),
1251 )
1251 )
1252
1252
1253 self._dirty |= dirty
1253 self._dirty |= dirty
1254
1254
1255 if matcher.traversedir:
1255 if matcher.traversedir:
1256 for dir in traversed:
1256 for dir in traversed:
1257 matcher.traversedir(dir)
1257 matcher.traversedir(dir)
1258
1258
1259 if self._ui.warn:
1259 if self._ui.warn:
1260 for item in warnings:
1260 for item in warnings:
1261 if isinstance(item, tuple):
1261 if isinstance(item, tuple):
1262 file_path, syntax = item
1262 file_path, syntax = item
1263 msg = _(b"%s: ignoring invalid syntax '%s'\n") % (
1263 msg = _(b"%s: ignoring invalid syntax '%s'\n") % (
1264 file_path,
1264 file_path,
1265 syntax,
1265 syntax,
1266 )
1266 )
1267 self._ui.warn(msg)
1267 self._ui.warn(msg)
1268 else:
1268 else:
1269 msg = _(b"skipping unreadable pattern file '%s': %s\n")
1269 msg = _(b"skipping unreadable pattern file '%s': %s\n")
1270 self._ui.warn(
1270 self._ui.warn(
1271 msg
1271 msg
1272 % (
1272 % (
1273 pathutil.canonpath(
1273 pathutil.canonpath(
1274 self._rootdir, self._rootdir, item
1274 self._rootdir, self._rootdir, item
1275 ),
1275 ),
1276 b"No such file or directory",
1276 b"No such file or directory",
1277 )
1277 )
1278 )
1278 )
1279
1279
1280 for (fn, message) in bad:
1280 for (fn, message) in bad:
1281 matcher.bad(fn, encoding.strtolocal(message))
1281 matcher.bad(fn, encoding.strtolocal(message))
1282
1282
1283 status = scmutil.status(
1283 status = scmutil.status(
1284 modified=modified,
1284 modified=modified,
1285 added=added,
1285 added=added,
1286 removed=removed,
1286 removed=removed,
1287 deleted=deleted,
1287 deleted=deleted,
1288 unknown=unknown,
1288 unknown=unknown,
1289 ignored=ignored,
1289 ignored=ignored,
1290 clean=clean,
1290 clean=clean,
1291 )
1291 )
1292 return (lookup, status)
1292 return (lookup, status)
1293
1293
1294 def status(self, match, subrepos, ignored, clean, unknown):
1294 def status(self, match, subrepos, ignored, clean, unknown):
1295 """Determine the status of the working copy relative to the
1295 """Determine the status of the working copy relative to the
1296 dirstate and return a pair of (unsure, status), where status is of type
1296 dirstate and return a pair of (unsure, status), where status is of type
1297 scmutil.status and:
1297 scmutil.status and:
1298
1298
1299 unsure:
1299 unsure:
1300 files that might have been modified since the dirstate was
1300 files that might have been modified since the dirstate was
1301 written, but need to be read to be sure (size is the same
1301 written, but need to be read to be sure (size is the same
1302 but mtime differs)
1302 but mtime differs)
1303 status.modified:
1303 status.modified:
1304 files that have definitely been modified since the dirstate
1304 files that have definitely been modified since the dirstate
1305 was written (different size or mode)
1305 was written (different size or mode)
1306 status.clean:
1306 status.clean:
1307 files that have definitely not been modified since the
1307 files that have definitely not been modified since the
1308 dirstate was written
1308 dirstate was written
1309 """
1309 """
1310 listignored, listclean, listunknown = ignored, clean, unknown
1310 listignored, listclean, listunknown = ignored, clean, unknown
1311 lookup, modified, added, unknown, ignored = [], [], [], [], []
1311 lookup, modified, added, unknown, ignored = [], [], [], [], []
1312 removed, deleted, clean = [], [], []
1312 removed, deleted, clean = [], [], []
1313
1313
1314 dmap = self._map
1314 dmap = self._map
1315 dmap.preload()
1315 dmap.preload()
1316
1316
1317 use_rust = True
1317 use_rust = True
1318
1318
1319 allowed_matchers = (
1319 allowed_matchers = (
1320 matchmod.alwaysmatcher,
1320 matchmod.alwaysmatcher,
1321 matchmod.exactmatcher,
1321 matchmod.exactmatcher,
1322 matchmod.includematcher,
1322 matchmod.includematcher,
1323 )
1323 )
1324
1324
1325 if rustmod is None:
1325 if rustmod is None:
1326 use_rust = False
1326 use_rust = False
1327 elif self._checkcase:
1327 elif self._checkcase:
1328 # Case-insensitive filesystems are not handled yet
1328 # Case-insensitive filesystems are not handled yet
1329 use_rust = False
1329 use_rust = False
1330 elif subrepos:
1330 elif subrepos:
1331 use_rust = False
1331 use_rust = False
1332 elif sparse.enabled:
1332 elif sparse.enabled:
1333 use_rust = False
1333 use_rust = False
1334 elif not isinstance(match, allowed_matchers):
1334 elif not isinstance(match, allowed_matchers):
1335 # Some matchers have yet to be implemented
1335 # Some matchers have yet to be implemented
1336 use_rust = False
1336 use_rust = False
1337
1337
1338 if use_rust:
1338 if use_rust:
1339 try:
1339 try:
1340 return self._rust_status(
1340 return self._rust_status(
1341 match, listclean, listignored, listunknown
1341 match, listclean, listignored, listunknown
1342 )
1342 )
1343 except rustmod.FallbackError:
1343 except rustmod.FallbackError:
1344 pass
1344 pass
1345
1345
1346 def noop(f):
1346 def noop(f):
1347 pass
1347 pass
1348
1348
1349 dcontains = dmap.__contains__
1349 dcontains = dmap.__contains__
1350 dget = dmap.__getitem__
1350 dget = dmap.__getitem__
1351 ladd = lookup.append # aka "unsure"
1351 ladd = lookup.append # aka "unsure"
1352 madd = modified.append
1352 madd = modified.append
1353 aadd = added.append
1353 aadd = added.append
1354 uadd = unknown.append if listunknown else noop
1354 uadd = unknown.append if listunknown else noop
1355 iadd = ignored.append if listignored else noop
1355 iadd = ignored.append if listignored else noop
1356 radd = removed.append
1356 radd = removed.append
1357 dadd = deleted.append
1357 dadd = deleted.append
1358 cadd = clean.append if listclean else noop
1358 cadd = clean.append if listclean else noop
1359 mexact = match.exact
1359 mexact = match.exact
1360 dirignore = self._dirignore
1360 dirignore = self._dirignore
1361 checkexec = self._checkexec
1361 checkexec = self._checkexec
1362 checklink = self._checklink
1362 copymap = self._map.copymap
1363 copymap = self._map.copymap
1363 lastnormaltime = self._lastnormaltime
1364 lastnormaltime = self._lastnormaltime
1364
1365
1365 # We need to do full walks when either
1366 # We need to do full walks when either
1366 # - we're listing all clean files, or
1367 # - we're listing all clean files, or
1367 # - match.traversedir does something, because match.traversedir should
1368 # - match.traversedir does something, because match.traversedir should
1368 # be called for every dir in the working dir
1369 # be called for every dir in the working dir
1369 full = listclean or match.traversedir is not None
1370 full = listclean or match.traversedir is not None
1370 for fn, st in pycompat.iteritems(
1371 for fn, st in pycompat.iteritems(
1371 self.walk(match, subrepos, listunknown, listignored, full=full)
1372 self.walk(match, subrepos, listunknown, listignored, full=full)
1372 ):
1373 ):
1373 if not dcontains(fn):
1374 if not dcontains(fn):
1374 if (listignored or mexact(fn)) and dirignore(fn):
1375 if (listignored or mexact(fn)) and dirignore(fn):
1375 if listignored:
1376 if listignored:
1376 iadd(fn)
1377 iadd(fn)
1377 else:
1378 else:
1378 uadd(fn)
1379 uadd(fn)
1379 continue
1380 continue
1380
1381
1381 t = dget(fn)
1382 t = dget(fn)
1382 mode = t.mode
1383 mode = t.mode
1383 size = t.size
1384 size = t.size
1384
1385
1385 if not st and t.tracked:
1386 if not st and t.tracked:
1386 dadd(fn)
1387 dadd(fn)
1387 elif t.p2_info:
1388 elif t.p2_info:
1388 madd(fn)
1389 madd(fn)
1389 elif t.added:
1390 elif t.added:
1390 aadd(fn)
1391 aadd(fn)
1391 elif t.removed:
1392 elif t.removed:
1392 radd(fn)
1393 radd(fn)
1393 elif t.tracked:
1394 elif t.tracked:
1394 if (
1395 if not checklink and t.has_fallback_symlink:
1396 # If the file system does not support symlink, the mode
1397 # might not be correctly stored in the dirstate, so do not
1398 # trust it.
1399 ladd(fn)
1400 elif not checkexec and t.has_fallback_exec:
1401 # If the file system does not support exec bits, the mode
1402 # might not be correctly stored in the dirstate, so do not
1403 # trust it.
1404 ladd(fn)
1405 elif (
1395 size >= 0
1406 size >= 0
1396 and (
1407 and (
1397 (size != st.st_size and size != st.st_size & _rangemask)
1408 (size != st.st_size and size != st.st_size & _rangemask)
1398 or ((mode ^ st.st_mode) & 0o100 and checkexec)
1409 or ((mode ^ st.st_mode) & 0o100 and checkexec)
1399 )
1410 )
1400 or fn in copymap
1411 or fn in copymap
1401 ):
1412 ):
1402 if stat.S_ISLNK(st.st_mode) and size != st.st_size:
1413 if stat.S_ISLNK(st.st_mode) and size != st.st_size:
1403 # issue6456: Size returned may be longer due to
1414 # issue6456: Size returned may be longer due to
1404 # encryption on EXT-4 fscrypt, undecided.
1415 # encryption on EXT-4 fscrypt, undecided.
1405 ladd(fn)
1416 ladd(fn)
1406 else:
1417 else:
1407 madd(fn)
1418 madd(fn)
1408 elif not t.mtime_likely_equal_to(timestamp.mtime_of(st)):
1419 elif not t.mtime_likely_equal_to(timestamp.mtime_of(st)):
1409 ladd(fn)
1420 ladd(fn)
1410 elif timestamp.mtime_of(st) == lastnormaltime:
1421 elif timestamp.mtime_of(st) == lastnormaltime:
1411 # fn may have just been marked as normal and it may have
1422 # fn may have just been marked as normal and it may have
1412 # changed in the same second without changing its size.
1423 # changed in the same second without changing its size.
1413 # This can happen if we quickly do multiple commits.
1424 # This can happen if we quickly do multiple commits.
1414 # Force lookup, so we don't miss such a racy file change.
1425 # Force lookup, so we don't miss such a racy file change.
1415 ladd(fn)
1426 ladd(fn)
1416 elif listclean:
1427 elif listclean:
1417 cadd(fn)
1428 cadd(fn)
1418 status = scmutil.status(
1429 status = scmutil.status(
1419 modified, added, removed, deleted, unknown, ignored, clean
1430 modified, added, removed, deleted, unknown, ignored, clean
1420 )
1431 )
1421 return (lookup, status)
1432 return (lookup, status)
1422
1433
1423 def matches(self, match):
1434 def matches(self, match):
1424 """
1435 """
1425 return files in the dirstate (in whatever state) filtered by match
1436 return files in the dirstate (in whatever state) filtered by match
1426 """
1437 """
1427 dmap = self._map
1438 dmap = self._map
1428 if rustmod is not None:
1439 if rustmod is not None:
1429 dmap = self._map._map
1440 dmap = self._map._map
1430
1441
1431 if match.always():
1442 if match.always():
1432 return dmap.keys()
1443 return dmap.keys()
1433 files = match.files()
1444 files = match.files()
1434 if match.isexact():
1445 if match.isexact():
1435 # fast path -- filter the other way around, since typically files is
1446 # fast path -- filter the other way around, since typically files is
1436 # much smaller than dmap
1447 # much smaller than dmap
1437 return [f for f in files if f in dmap]
1448 return [f for f in files if f in dmap]
1438 if match.prefix() and all(fn in dmap for fn in files):
1449 if match.prefix() and all(fn in dmap for fn in files):
1439 # fast path -- all the values are known to be files, so just return
1450 # fast path -- all the values are known to be files, so just return
1440 # that
1451 # that
1441 return list(files)
1452 return list(files)
1442 return [f for f in dmap if match(f)]
1453 return [f for f in dmap if match(f)]
1443
1454
1444 def _actualfilename(self, tr):
1455 def _actualfilename(self, tr):
1445 if tr:
1456 if tr:
1446 return self._pendingfilename
1457 return self._pendingfilename
1447 else:
1458 else:
1448 return self._filename
1459 return self._filename
1449
1460
1450 def savebackup(self, tr, backupname):
1461 def savebackup(self, tr, backupname):
1451 '''Save current dirstate into backup file'''
1462 '''Save current dirstate into backup file'''
1452 filename = self._actualfilename(tr)
1463 filename = self._actualfilename(tr)
1453 assert backupname != filename
1464 assert backupname != filename
1454
1465
1455 # use '_writedirstate' instead of 'write' to write changes certainly,
1466 # use '_writedirstate' instead of 'write' to write changes certainly,
1456 # because the latter omits writing out if transaction is running.
1467 # because the latter omits writing out if transaction is running.
1457 # output file will be used to create backup of dirstate at this point.
1468 # output file will be used to create backup of dirstate at this point.
1458 if self._dirty or not self._opener.exists(filename):
1469 if self._dirty or not self._opener.exists(filename):
1459 self._writedirstate(
1470 self._writedirstate(
1460 tr,
1471 tr,
1461 self._opener(filename, b"w", atomictemp=True, checkambig=True),
1472 self._opener(filename, b"w", atomictemp=True, checkambig=True),
1462 )
1473 )
1463
1474
1464 if tr:
1475 if tr:
1465 # ensure that subsequent tr.writepending returns True for
1476 # ensure that subsequent tr.writepending returns True for
1466 # changes written out above, even if dirstate is never
1477 # changes written out above, even if dirstate is never
1467 # changed after this
1478 # changed after this
1468 tr.addfilegenerator(
1479 tr.addfilegenerator(
1469 b'dirstate',
1480 b'dirstate',
1470 (self._filename,),
1481 (self._filename,),
1471 lambda f: self._writedirstate(tr, f),
1482 lambda f: self._writedirstate(tr, f),
1472 location=b'plain',
1483 location=b'plain',
1473 )
1484 )
1474
1485
1475 # ensure that pending file written above is unlinked at
1486 # ensure that pending file written above is unlinked at
1476 # failure, even if tr.writepending isn't invoked until the
1487 # failure, even if tr.writepending isn't invoked until the
1477 # end of this transaction
1488 # end of this transaction
1478 tr.registertmp(filename, location=b'plain')
1489 tr.registertmp(filename, location=b'plain')
1479
1490
1480 self._opener.tryunlink(backupname)
1491 self._opener.tryunlink(backupname)
1481 # hardlink backup is okay because _writedirstate is always called
1492 # hardlink backup is okay because _writedirstate is always called
1482 # with an "atomictemp=True" file.
1493 # with an "atomictemp=True" file.
1483 util.copyfile(
1494 util.copyfile(
1484 self._opener.join(filename),
1495 self._opener.join(filename),
1485 self._opener.join(backupname),
1496 self._opener.join(backupname),
1486 hardlink=True,
1497 hardlink=True,
1487 )
1498 )
1488
1499
1489 def restorebackup(self, tr, backupname):
1500 def restorebackup(self, tr, backupname):
1490 '''Restore dirstate by backup file'''
1501 '''Restore dirstate by backup file'''
1491 # this "invalidate()" prevents "wlock.release()" from writing
1502 # this "invalidate()" prevents "wlock.release()" from writing
1492 # changes of dirstate out after restoring from backup file
1503 # changes of dirstate out after restoring from backup file
1493 self.invalidate()
1504 self.invalidate()
1494 filename = self._actualfilename(tr)
1505 filename = self._actualfilename(tr)
1495 o = self._opener
1506 o = self._opener
1496 if util.samefile(o.join(backupname), o.join(filename)):
1507 if util.samefile(o.join(backupname), o.join(filename)):
1497 o.unlink(backupname)
1508 o.unlink(backupname)
1498 else:
1509 else:
1499 o.rename(backupname, filename, checkambig=True)
1510 o.rename(backupname, filename, checkambig=True)
1500
1511
1501 def clearbackup(self, tr, backupname):
1512 def clearbackup(self, tr, backupname):
1502 '''Clear backup file'''
1513 '''Clear backup file'''
1503 self._opener.unlink(backupname)
1514 self._opener.unlink(backupname)
1504
1515
1505 def verify(self, m1, m2):
1516 def verify(self, m1, m2):
1506 """check the dirstate content again the parent manifest and yield errors"""
1517 """check the dirstate content again the parent manifest and yield errors"""
1507 missing_from_p1 = b"%s in state %s, but not in manifest1\n"
1518 missing_from_p1 = b"%s in state %s, but not in manifest1\n"
1508 unexpected_in_p1 = b"%s in state %s, but also in manifest1\n"
1519 unexpected_in_p1 = b"%s in state %s, but also in manifest1\n"
1509 missing_from_ps = b"%s in state %s, but not in either manifest\n"
1520 missing_from_ps = b"%s in state %s, but not in either manifest\n"
1510 missing_from_ds = b"%s in manifest1, but listed as state %s\n"
1521 missing_from_ds = b"%s in manifest1, but listed as state %s\n"
1511 for f, entry in self.items():
1522 for f, entry in self.items():
1512 state = entry.state
1523 state = entry.state
1513 if state in b"nr" and f not in m1:
1524 if state in b"nr" and f not in m1:
1514 yield (missing_from_p1, f, state)
1525 yield (missing_from_p1, f, state)
1515 if state in b"a" and f in m1:
1526 if state in b"a" and f in m1:
1516 yield (unexpected_in_p1, f, state)
1527 yield (unexpected_in_p1, f, state)
1517 if state in b"m" and f not in m1 and f not in m2:
1528 if state in b"m" and f not in m1 and f not in m2:
1518 yield (missing_from_ps, f, state)
1529 yield (missing_from_ps, f, state)
1519 for f in m1:
1530 for f in m1:
1520 state = self.get_entry(f).state
1531 state = self.get_entry(f).state
1521 if state not in b"nrm":
1532 if state not in b"nrm":
1522 yield (missing_from_ds, f, state)
1533 yield (missing_from_ds, f, state)
@@ -1,610 +1,616 b''
1 The *dirstate* is what Mercurial uses internally to track
1 The *dirstate* is what Mercurial uses internally to track
2 the state of files in the working directory,
2 the state of files in the working directory,
3 such as set by commands like `hg add` and `hg rm`.
3 such as set by commands like `hg add` and `hg rm`.
4 It also contains some cached data that help make `hg status` faster.
4 It also contains some cached data that help make `hg status` faster.
5 The name refers both to `.hg/dirstate` on the filesystem
5 The name refers both to `.hg/dirstate` on the filesystem
6 and the corresponding data structure in memory while a Mercurial process
6 and the corresponding data structure in memory while a Mercurial process
7 is running.
7 is running.
8
8
9 The original file format, retroactively dubbed `dirstate-v1`,
9 The original file format, retroactively dubbed `dirstate-v1`,
10 is described at https://www.mercurial-scm.org/wiki/DirState.
10 is described at https://www.mercurial-scm.org/wiki/DirState.
11 It is made of a flat sequence of unordered variable-size entries,
11 It is made of a flat sequence of unordered variable-size entries,
12 so accessing any information in it requires parsing all of it.
12 so accessing any information in it requires parsing all of it.
13 Similarly, saving changes requires rewriting the entire file.
13 Similarly, saving changes requires rewriting the entire file.
14
14
15 The newer `dirsate-v2` file format is designed to fix these limitations
15 The newer `dirsate-v2` file format is designed to fix these limitations
16 and make `hg status` faster.
16 and make `hg status` faster.
17
17
18 User guide
18 User guide
19 ==========
19 ==========
20
20
21 Compatibility
21 Compatibility
22 -------------
22 -------------
23
23
24 The file format is experimental and may still change.
24 The file format is experimental and may still change.
25 Different versions of Mercurial may not be compatible with each other
25 Different versions of Mercurial may not be compatible with each other
26 when working on a local repository that uses this format.
26 when working on a local repository that uses this format.
27 When using an incompatible version with the experimental format,
27 When using an incompatible version with the experimental format,
28 anything can happen including data corruption.
28 anything can happen including data corruption.
29
29
30 Since the dirstate is entirely local and not relevant to the wire protocol,
30 Since the dirstate is entirely local and not relevant to the wire protocol,
31 `dirstate-v2` does not affect compatibility with remote Mercurial versions.
31 `dirstate-v2` does not affect compatibility with remote Mercurial versions.
32
32
33 When `share-safe` is enabled, different repositories sharing the same store
33 When `share-safe` is enabled, different repositories sharing the same store
34 can use different dirstate formats.
34 can use different dirstate formats.
35
35
36 Enabling `dirsate-v2` for new local repositories
36 Enabling `dirsate-v2` for new local repositories
37 ------------------------------------------------
37 ------------------------------------------------
38
38
39 When creating a new local repository such as with `hg init` or `hg clone`,
39 When creating a new local repository such as with `hg init` or `hg clone`,
40 the `exp-dirstate-v2` boolean in the `format` configuration section
40 the `exp-dirstate-v2` boolean in the `format` configuration section
41 controls whether to use this file format.
41 controls whether to use this file format.
42 This is disabled by default as of this writing.
42 This is disabled by default as of this writing.
43 To enable it for a single repository, run for example::
43 To enable it for a single repository, run for example::
44
44
45 $ hg init my-project --config format.exp-dirstate-v2=1
45 $ hg init my-project --config format.exp-dirstate-v2=1
46
46
47 Checking the format of an existing local repsitory
47 Checking the format of an existing local repsitory
48 --------------------------------------------------
48 --------------------------------------------------
49
49
50 The `debugformat` commands prints information about
50 The `debugformat` commands prints information about
51 which of multiple optional formats are used in the current repository,
51 which of multiple optional formats are used in the current repository,
52 including `dirstate-v2`::
52 including `dirstate-v2`::
53
53
54 $ hg debugformat
54 $ hg debugformat
55 format-variant repo
55 format-variant repo
56 fncache: yes
56 fncache: yes
57 dirstate-v2: yes
57 dirstate-v2: yes
58 […]
58 […]
59
59
60 Upgrading or downgrading an existing local repository
60 Upgrading or downgrading an existing local repository
61 -----------------------------------------------------
61 -----------------------------------------------------
62
62
63 The `debugupgrade` command does various upgrades or downgrades
63 The `debugupgrade` command does various upgrades or downgrades
64 on a local repository
64 on a local repository
65 based on the current Mercurial version and on configuration.
65 based on the current Mercurial version and on configuration.
66 The same `format.exp-dirstate-v2` configuration is used again.
66 The same `format.exp-dirstate-v2` configuration is used again.
67
67
68 Example to upgrade::
68 Example to upgrade::
69
69
70 $ hg debugupgrade --config format.exp-dirstate-v2=1
70 $ hg debugupgrade --config format.exp-dirstate-v2=1
71
71
72 Example to downgrade to `dirstate-v1`::
72 Example to downgrade to `dirstate-v1`::
73
73
74 $ hg debugupgrade --config format.exp-dirstate-v2=0
74 $ hg debugupgrade --config format.exp-dirstate-v2=0
75
75
76 Both of this commands do nothing but print a list of proposed changes,
76 Both of this commands do nothing but print a list of proposed changes,
77 which may include changes unrelated to the dirstate.
77 which may include changes unrelated to the dirstate.
78 Those other changes are controlled by their own configuration keys.
78 Those other changes are controlled by their own configuration keys.
79 Add `--run` to a command to actually apply the proposed changes.
79 Add `--run` to a command to actually apply the proposed changes.
80
80
81 Backups of `.hg/requires` and `.hg/dirstate` are created
81 Backups of `.hg/requires` and `.hg/dirstate` are created
82 in a `.hg/upgradebackup.*` directory.
82 in a `.hg/upgradebackup.*` directory.
83 If something goes wrong, restoring those files should undo the change.
83 If something goes wrong, restoring those files should undo the change.
84
84
85 Note that upgrading affects compatibility with older versions of Mercurial
85 Note that upgrading affects compatibility with older versions of Mercurial
86 as noted above.
86 as noted above.
87 This can be relevant when a repository’s files are on a USB drive
87 This can be relevant when a repository’s files are on a USB drive
88 or some other removable media, or shared over the network, etc.
88 or some other removable media, or shared over the network, etc.
89
89
90 Internal filesystem representation
90 Internal filesystem representation
91 ==================================
91 ==================================
92
92
93 Requirements file
93 Requirements file
94 -----------------
94 -----------------
95
95
96 The `.hg/requires` file indicates which of various optional file formats
96 The `.hg/requires` file indicates which of various optional file formats
97 are used by a given repository.
97 are used by a given repository.
98 Mercurial aborts when seeing a requirement it does not know about,
98 Mercurial aborts when seeing a requirement it does not know about,
99 which avoids older version accidentally messing up a respository
99 which avoids older version accidentally messing up a respository
100 that uses a format that was introduced later.
100 that uses a format that was introduced later.
101 For versions that do support a format, the presence or absence of
101 For versions that do support a format, the presence or absence of
102 the corresponding requirement indicates whether to use that format.
102 the corresponding requirement indicates whether to use that format.
103
103
104 When the file contains a `exp-dirstate-v2` line,
104 When the file contains a `exp-dirstate-v2` line,
105 the `dirstate-v2` format is used.
105 the `dirstate-v2` format is used.
106 With no such line `dirstate-v1` is used.
106 With no such line `dirstate-v1` is used.
107
107
108 High level description
108 High level description
109 ----------------------
109 ----------------------
110
110
111 Whereas `dirstate-v1` uses a single `.hg/disrtate` file,
111 Whereas `dirstate-v1` uses a single `.hg/disrtate` file,
112 in `dirstate-v2` that file is a "docket" file
112 in `dirstate-v2` that file is a "docket" file
113 that only contains some metadata
113 that only contains some metadata
114 and points to separate data file named `.hg/dirstate.{ID}`,
114 and points to separate data file named `.hg/dirstate.{ID}`,
115 where `{ID}` is a random identifier.
115 where `{ID}` is a random identifier.
116
116
117 This separation allows making data files append-only
117 This separation allows making data files append-only
118 and therefore safer to memory-map.
118 and therefore safer to memory-map.
119 Creating a new data file (occasionally to clean up unused data)
119 Creating a new data file (occasionally to clean up unused data)
120 can be done with a different ID
120 can be done with a different ID
121 without disrupting another Mercurial process
121 without disrupting another Mercurial process
122 that could still be using the previous data file.
122 that could still be using the previous data file.
123
123
124 Both files have a format designed to reduce the need for parsing,
124 Both files have a format designed to reduce the need for parsing,
125 by using fixed-size binary components as much as possible.
125 by using fixed-size binary components as much as possible.
126 For data that is not fixed-size,
126 For data that is not fixed-size,
127 references to other parts of a file can be made by storing "pseudo-pointers":
127 references to other parts of a file can be made by storing "pseudo-pointers":
128 integers counted in bytes from the start of a file.
128 integers counted in bytes from the start of a file.
129 For read-only access no data structure is needed,
129 For read-only access no data structure is needed,
130 only a bytes buffer (possibly memory-mapped directly from the filesystem)
130 only a bytes buffer (possibly memory-mapped directly from the filesystem)
131 with specific parts read on demand.
131 with specific parts read on demand.
132
132
133 The data file contains "nodes" organized in a tree.
133 The data file contains "nodes" organized in a tree.
134 Each node represents a file or directory inside the working directory
134 Each node represents a file or directory inside the working directory
135 or its parent changeset.
135 or its parent changeset.
136 This tree has the same structure as the filesystem,
136 This tree has the same structure as the filesystem,
137 so a node representing a directory has child nodes representing
137 so a node representing a directory has child nodes representing
138 the files and subdirectories contained directly in that directory.
138 the files and subdirectories contained directly in that directory.
139
139
140 The docket file format
140 The docket file format
141 ----------------------
141 ----------------------
142
142
143 This is implemented in `rust/hg-core/src/dirstate_tree/on_disk.rs`
143 This is implemented in `rust/hg-core/src/dirstate_tree/on_disk.rs`
144 and `mercurial/dirstateutils/docket.py`.
144 and `mercurial/dirstateutils/docket.py`.
145
145
146 Components of the docket file are found at fixed offsets,
146 Components of the docket file are found at fixed offsets,
147 counted in bytes from the start of the file:
147 counted in bytes from the start of the file:
148
148
149 * Offset 0:
149 * Offset 0:
150 The 12-bytes marker string "dirstate-v2\n" ending with a newline character.
150 The 12-bytes marker string "dirstate-v2\n" ending with a newline character.
151 This makes it easier to tell a dirstate-v2 file from a dirstate-v1 file,
151 This makes it easier to tell a dirstate-v2 file from a dirstate-v1 file,
152 although it is not strictly necessary
152 although it is not strictly necessary
153 since `.hg/requires` determines which format to use.
153 since `.hg/requires` determines which format to use.
154
154
155 * Offset 12:
155 * Offset 12:
156 The changeset node ID on the first parent of the working directory,
156 The changeset node ID on the first parent of the working directory,
157 as up to 32 binary bytes.
157 as up to 32 binary bytes.
158 If a node ID is shorter (20 bytes for SHA-1),
158 If a node ID is shorter (20 bytes for SHA-1),
159 it is start-aligned and the rest of the bytes are set to zero.
159 it is start-aligned and the rest of the bytes are set to zero.
160
160
161 * Offset 44:
161 * Offset 44:
162 The changeset node ID on the second parent of the working directory,
162 The changeset node ID on the second parent of the working directory,
163 or all zeros if there isn’t one.
163 or all zeros if there isn’t one.
164 Also 32 binary bytes.
164 Also 32 binary bytes.
165
165
166 * Offset 76:
166 * Offset 76:
167 Tree metadata on 44 bytes, described below.
167 Tree metadata on 44 bytes, described below.
168 Its separation in this documentation from the rest of the docket
168 Its separation in this documentation from the rest of the docket
169 reflects a detail of the current implementation.
169 reflects a detail of the current implementation.
170 Since tree metadata is also made of fields at fixed offsets, those could
170 Since tree metadata is also made of fields at fixed offsets, those could
171 be inlined here by adding 76 bytes to each offset.
171 be inlined here by adding 76 bytes to each offset.
172
172
173 * Offset 120:
173 * Offset 120:
174 The used size of the data file, as a 32-bit big-endian integer.
174 The used size of the data file, as a 32-bit big-endian integer.
175 The actual size of the data file may be larger
175 The actual size of the data file may be larger
176 (if another Mercurial processis in appending to it
176 (if another Mercurial processis in appending to it
177 but has not updated the docket yet).
177 but has not updated the docket yet).
178 That extra data must be ignored.
178 That extra data must be ignored.
179
179
180 * Offset 124:
180 * Offset 124:
181 The length of the data file identifier, as a 8-bit integer.
181 The length of the data file identifier, as a 8-bit integer.
182
182
183 * Offset 125:
183 * Offset 125:
184 The data file identifier.
184 The data file identifier.
185
185
186 * Any additional data is current ignored, and dropped when updating the file.
186 * Any additional data is current ignored, and dropped when updating the file.
187
187
188 Tree metadata in the docket file
188 Tree metadata in the docket file
189 --------------------------------
189 --------------------------------
190
190
191 Tree metadata is similarly made of components at fixed offsets.
191 Tree metadata is similarly made of components at fixed offsets.
192 These offsets are counted in bytes from the start of tree metadata,
192 These offsets are counted in bytes from the start of tree metadata,
193 which is 76 bytes after the start of the docket file.
193 which is 76 bytes after the start of the docket file.
194
194
195 This metadata can be thought of as the singular root of the tree
195 This metadata can be thought of as the singular root of the tree
196 formed by nodes in the data file.
196 formed by nodes in the data file.
197
197
198 * Offset 0:
198 * Offset 0:
199 Pseudo-pointer to the start of root nodes,
199 Pseudo-pointer to the start of root nodes,
200 counted in bytes from the start of the data file,
200 counted in bytes from the start of the data file,
201 as a 32-bit big-endian integer.
201 as a 32-bit big-endian integer.
202 These nodes describe files and directories found directly
202 These nodes describe files and directories found directly
203 at the root of the working directory.
203 at the root of the working directory.
204
204
205 * Offset 4:
205 * Offset 4:
206 Number of root nodes, as a 32-bit big-endian integer.
206 Number of root nodes, as a 32-bit big-endian integer.
207
207
208 * Offset 8:
208 * Offset 8:
209 Total number of nodes in the entire tree that "have a dirstate entry",
209 Total number of nodes in the entire tree that "have a dirstate entry",
210 as a 32-bit big-endian integer.
210 as a 32-bit big-endian integer.
211 Those nodes represent files that would be present at all in `dirstate-v1`.
211 Those nodes represent files that would be present at all in `dirstate-v1`.
212 This is typically less than the total number of nodes.
212 This is typically less than the total number of nodes.
213 This counter is used to implement `len(dirstatemap)`.
213 This counter is used to implement `len(dirstatemap)`.
214
214
215 * Offset 12:
215 * Offset 12:
216 Number of nodes in the entire tree that have a copy source,
216 Number of nodes in the entire tree that have a copy source,
217 as a 32-bit big-endian integer.
217 as a 32-bit big-endian integer.
218 At the next commit, these files are recorded
218 At the next commit, these files are recorded
219 as having been copied or moved/renamed from that source.
219 as having been copied or moved/renamed from that source.
220 (A move is recorded as a copy and separate removal of the source.)
220 (A move is recorded as a copy and separate removal of the source.)
221 This counter is used to implement `len(dirstatemap.copymap)`.
221 This counter is used to implement `len(dirstatemap.copymap)`.
222
222
223 * Offset 16:
223 * Offset 16:
224 An estimation of how many bytes of the data file
224 An estimation of how many bytes of the data file
225 (within its used size) are unused, as a 32-bit big-endian integer.
225 (within its used size) are unused, as a 32-bit big-endian integer.
226 When appending to an existing data file,
226 When appending to an existing data file,
227 some existing nodes or paths can be unreachable from the new root
227 some existing nodes or paths can be unreachable from the new root
228 but they still take up space.
228 but they still take up space.
229 This counter is used to decide when to write a new data file from scratch
229 This counter is used to decide when to write a new data file from scratch
230 instead of appending to an existing one,
230 instead of appending to an existing one,
231 in order to get rid of that unreachable data
231 in order to get rid of that unreachable data
232 and avoid unbounded file size growth.
232 and avoid unbounded file size growth.
233
233
234 * Offset 20:
234 * Offset 20:
235 These four bytes are currently ignored
235 These four bytes are currently ignored
236 and reset to zero when updating a docket file.
236 and reset to zero when updating a docket file.
237 This is an attempt at forward compatibility:
237 This is an attempt at forward compatibility:
238 future Mercurial versions could use this as a bit field
238 future Mercurial versions could use this as a bit field
239 to indicate that a dirstate has additional data or constraints.
239 to indicate that a dirstate has additional data or constraints.
240 Finding a dirstate file with the relevant bit unset indicates that
240 Finding a dirstate file with the relevant bit unset indicates that
241 it was written by a then-older version
241 it was written by a then-older version
242 which is not aware of that future change.
242 which is not aware of that future change.
243
243
244 * Offset 24:
244 * Offset 24:
245 Either 20 zero bytes, or a SHA-1 hash as 20 binary bytes.
245 Either 20 zero bytes, or a SHA-1 hash as 20 binary bytes.
246 When present, the hash is of ignore patterns
246 When present, the hash is of ignore patterns
247 that were used for some previous run of the `status` algorithm.
247 that were used for some previous run of the `status` algorithm.
248
248
249 * (Offset 44: end of tree metadata)
249 * (Offset 44: end of tree metadata)
250
250
251 Optional hash of ignore patterns
251 Optional hash of ignore patterns
252 --------------------------------
252 --------------------------------
253
253
254 The implementation of `status` at `rust/hg-core/src/dirstate_tree/status.rs`
254 The implementation of `status` at `rust/hg-core/src/dirstate_tree/status.rs`
255 has been optimized such that its run time is dominated by calls
255 has been optimized such that its run time is dominated by calls
256 to `stat` for reading the filesystem metadata of a file or directory,
256 to `stat` for reading the filesystem metadata of a file or directory,
257 and to `readdir` for listing the contents of a directory.
257 and to `readdir` for listing the contents of a directory.
258 In some cases the algorithm can skip calls to `readdir`
258 In some cases the algorithm can skip calls to `readdir`
259 (saving significant time)
259 (saving significant time)
260 because the dirstate already contains enough of the relevant information
260 because the dirstate already contains enough of the relevant information
261 to build the correct `status` results.
261 to build the correct `status` results.
262
262
263 The default configuration of `hg status` is to list unknown files
263 The default configuration of `hg status` is to list unknown files
264 but not ignored files.
264 but not ignored files.
265 In this case, it matters for the `readdir`-skipping optimization
265 In this case, it matters for the `readdir`-skipping optimization
266 if a given file used to be ignored but became unknown
266 if a given file used to be ignored but became unknown
267 because `.hgignore` changed.
267 because `.hgignore` changed.
268 To detect the possibility of such a change,
268 To detect the possibility of such a change,
269 the tree metadata contains an optional hash of all ignore patterns.
269 the tree metadata contains an optional hash of all ignore patterns.
270
270
271 We define:
271 We define:
272
272
273 * "Root" ignore files as:
273 * "Root" ignore files as:
274
274
275 - `.hgignore` at the root of the repository if it exists
275 - `.hgignore` at the root of the repository if it exists
276 - And all files from `ui.ignore.*` config.
276 - And all files from `ui.ignore.*` config.
277
277
278 This set of files is sorted by the string representation of their path.
278 This set of files is sorted by the string representation of their path.
279
279
280 * The "expanded contents" of an ignore files is the byte string made
280 * The "expanded contents" of an ignore files is the byte string made
281 by the concatenation of its contents followed by the "expanded contents"
281 by the concatenation of its contents followed by the "expanded contents"
282 of other files included with `include:` or `subinclude:` directives,
282 of other files included with `include:` or `subinclude:` directives,
283 in inclusion order. This definition is recursive, as included files can
283 in inclusion order. This definition is recursive, as included files can
284 themselves include more files.
284 themselves include more files.
285
285
286 This hash is defined as the SHA-1 of the concatenation (in sorted
286 This hash is defined as the SHA-1 of the concatenation (in sorted
287 order) of the "expanded contents" of each "root" ignore file.
287 order) of the "expanded contents" of each "root" ignore file.
288 (Note that computing this does not require actually concatenating
288 (Note that computing this does not require actually concatenating
289 into a single contiguous byte sequence.
289 into a single contiguous byte sequence.
290 Instead a SHA-1 hasher object can be created
290 Instead a SHA-1 hasher object can be created
291 and fed separate chunks one by one.)
291 and fed separate chunks one by one.)
292
292
293 The data file format
293 The data file format
294 --------------------
294 --------------------
295
295
296 This is implemented in `rust/hg-core/src/dirstate_tree/on_disk.rs`
296 This is implemented in `rust/hg-core/src/dirstate_tree/on_disk.rs`
297 and `mercurial/dirstateutils/v2.py`.
297 and `mercurial/dirstateutils/v2.py`.
298
298
299 The data file contains two types of data: paths and nodes.
299 The data file contains two types of data: paths and nodes.
300
300
301 Paths and nodes can be organized in any order in the file, except that sibling
301 Paths and nodes can be organized in any order in the file, except that sibling
302 nodes must be next to each other and sorted by their path.
302 nodes must be next to each other and sorted by their path.
303 Contiguity lets the parent refer to them all
303 Contiguity lets the parent refer to them all
304 by their count and a single pseudo-pointer,
304 by their count and a single pseudo-pointer,
305 instead of storing one pseudo-pointer per child node.
305 instead of storing one pseudo-pointer per child node.
306 Sorting allows using binary seach to find a child node with a given name
306 Sorting allows using binary seach to find a child node with a given name
307 in `O(log(n))` byte sequence comparisons.
307 in `O(log(n))` byte sequence comparisons.
308
308
309 The current implemention writes paths and child node before a given node
309 The current implemention writes paths and child node before a given node
310 for ease of figuring out the value of pseudo-pointers by the time the are to be
310 for ease of figuring out the value of pseudo-pointers by the time the are to be
311 written, but this is not an obligation and readers must not rely on it.
311 written, but this is not an obligation and readers must not rely on it.
312
312
313 A path is stored as a byte string anywhere in the file, without delimiter.
313 A path is stored as a byte string anywhere in the file, without delimiter.
314 It is refered to by one or more node by a pseudo-pointer to its start, and its
314 It is refered to by one or more node by a pseudo-pointer to its start, and its
315 length in bytes. Since there is no delimiter,
315 length in bytes. Since there is no delimiter,
316 when a path is a substring of another the same bytes could be reused,
316 when a path is a substring of another the same bytes could be reused,
317 although the implementation does not exploit this as of this writing.
317 although the implementation does not exploit this as of this writing.
318
318
319 A node is stored on 43 bytes with components at fixed offsets. Paths and
319 A node is stored on 43 bytes with components at fixed offsets. Paths and
320 child nodes relevant to a node are stored externally and referenced though
320 child nodes relevant to a node are stored externally and referenced though
321 pseudo-pointers.
321 pseudo-pointers.
322
322
323 All integers are stored in big-endian. All pseudo-pointers are 32-bit integers
323 All integers are stored in big-endian. All pseudo-pointers are 32-bit integers
324 counting bytes from the start of the data file. Path lengths and positions
324 counting bytes from the start of the data file. Path lengths and positions
325 are 16-bit integers, also counted in bytes.
325 are 16-bit integers, also counted in bytes.
326
326
327 Node components are:
327 Node components are:
328
328
329 * Offset 0:
329 * Offset 0:
330 Pseudo-pointer to the full path of this node,
330 Pseudo-pointer to the full path of this node,
331 from the working directory root.
331 from the working directory root.
332
332
333 * Offset 4:
333 * Offset 4:
334 Length of the full path.
334 Length of the full path.
335
335
336 * Offset 6:
336 * Offset 6:
337 Position of the last `/` path separator within the full path,
337 Position of the last `/` path separator within the full path,
338 in bytes from the start of the full path,
338 in bytes from the start of the full path,
339 or zero if there isn’t one.
339 or zero if there isn’t one.
340 The part of the full path after this position is the "base name".
340 The part of the full path after this position is the "base name".
341 Since sibling nodes have the same parent, only their base name vary
341 Since sibling nodes have the same parent, only their base name vary
342 and needs to be considered when doing binary search to find a given path.
342 and needs to be considered when doing binary search to find a given path.
343
343
344 * Offset 8:
344 * Offset 8:
345 Pseudo-pointer to the "copy source" path for this node,
345 Pseudo-pointer to the "copy source" path for this node,
346 or zero if there is no copy source.
346 or zero if there is no copy source.
347
347
348 * Offset 12:
348 * Offset 12:
349 Length of the copy source path, or zero if there isn’t one.
349 Length of the copy source path, or zero if there isn’t one.
350
350
351 * Offset 14:
351 * Offset 14:
352 Pseudo-pointer to the start of child nodes.
352 Pseudo-pointer to the start of child nodes.
353
353
354 * Offset 18:
354 * Offset 18:
355 Number of child nodes, as a 32-bit integer.
355 Number of child nodes, as a 32-bit integer.
356 They occupy 43 times this number of bytes
356 They occupy 43 times this number of bytes
357 (not counting space for paths, and further descendants).
357 (not counting space for paths, and further descendants).
358
358
359 * Offset 22:
359 * Offset 22:
360 Number as a 32-bit integer of descendant nodes in this subtree,
360 Number as a 32-bit integer of descendant nodes in this subtree,
361 not including this node itself,
361 not including this node itself,
362 that "have a dirstate entry".
362 that "have a dirstate entry".
363 Those nodes represent files that would be present at all in `dirstate-v1`.
363 Those nodes represent files that would be present at all in `dirstate-v1`.
364 This is typically less than the total number of descendants.
364 This is typically less than the total number of descendants.
365 This counter is used to implement `has_dir`.
365 This counter is used to implement `has_dir`.
366
366
367 * Offset 26:
367 * Offset 26:
368 Number as a 32-bit integer of descendant nodes in this subtree,
368 Number as a 32-bit integer of descendant nodes in this subtree,
369 not including this node itself,
369 not including this node itself,
370 that represent files tracked in the working directory.
370 that represent files tracked in the working directory.
371 (For example, `hg rm` makes a file untracked.)
371 (For example, `hg rm` makes a file untracked.)
372 This counter is used to implement `has_tracked_dir`.
372 This counter is used to implement `has_tracked_dir`.
373
373
374 * Offset 30:
374 * Offset 30:
375 A `flags` fields that packs some boolean values as bits of a 16-bit integer.
375 A `flags` fields that packs some boolean values as bits of a 16-bit integer.
376 Starting from least-significant, bit masks are::
376 Starting from least-significant, bit masks are::
377
377
378 WDIR_TRACKED = 1 << 0
378 WDIR_TRACKED = 1 << 0
379 P1_TRACKED = 1 << 1
379 P1_TRACKED = 1 << 1
380 P2_INFO = 1 << 2
380 P2_INFO = 1 << 2
381 MODE_EXEC_PERM = 1 << 3
381 MODE_EXEC_PERM = 1 << 3
382 MODE_IS_SYMLINK = 1 << 4
382 MODE_IS_SYMLINK = 1 << 4
383 HAS_FALLBACK_EXEC = 1 << 5
383 HAS_FALLBACK_EXEC = 1 << 5
384 FALLBACK_EXEC = 1 << 6
384 FALLBACK_EXEC = 1 << 6
385 HAS_FALLBACK_SYMLINK = 1 << 7
385 HAS_FALLBACK_SYMLINK = 1 << 7
386 FALLBACK_SYMLINK = 1 << 8
386 FALLBACK_SYMLINK = 1 << 8
387 EXPECTED_STATE_IS_MODIFIED = 1 << 9
387 EXPECTED_STATE_IS_MODIFIED = 1 << 9
388 HAS_MODE_AND_SIZE = 1 << 10
388 HAS_MODE_AND_SIZE = 1 << 10
389 HAS_MTIME = 1 << 11
389 HAS_MTIME = 1 << 11
390 MTIME_SECOND_AMBIGUOUS = 1 << 12
390 MTIME_SECOND_AMBIGUOUS = 1 << 12
391 DIRECTORY = 1 << 13
391 DIRECTORY = 1 << 13
392 ALL_UNKNOWN_RECORDED = 1 << 14
392 ALL_UNKNOWN_RECORDED = 1 << 14
393 ALL_IGNORED_RECORDED = 1 << 15
393 ALL_IGNORED_RECORDED = 1 << 15
394
394
395 The meaning of each bit is described below.
395 The meaning of each bit is described below.
396
396
397 Other bits are unset.
397 Other bits are unset.
398 They may be assigned meaning if the future,
398 They may be assigned meaning if the future,
399 with the limitation that Mercurial versions that pre-date such meaning
399 with the limitation that Mercurial versions that pre-date such meaning
400 will always reset those bits to unset when writing nodes.
400 will always reset those bits to unset when writing nodes.
401 (A new node is written for any mutation in its subtree,
401 (A new node is written for any mutation in its subtree,
402 leaving the bytes of the old node unreachable
402 leaving the bytes of the old node unreachable
403 until the data file is rewritten entirely.)
403 until the data file is rewritten entirely.)
404
404
405 * Offset 32:
405 * Offset 32:
406 A `size` field described below, as a 32-bit integer.
406 A `size` field described below, as a 32-bit integer.
407 Unlike in dirstate-v1, negative values are not used.
407 Unlike in dirstate-v1, negative values are not used.
408
408
409 * Offset 36:
409 * Offset 36:
410 The seconds component of an `mtime` field described below,
410 The seconds component of an `mtime` field described below,
411 as a 32-bit integer.
411 as a 32-bit integer.
412 Unlike in dirstate-v1, negative values are not used.
412 Unlike in dirstate-v1, negative values are not used.
413 When `mtime` is used, this is number of seconds since the Unix epoch
413 When `mtime` is used, this is number of seconds since the Unix epoch
414 truncated to its lower 31 bits.
414 truncated to its lower 31 bits.
415
415
416 * Offset 40:
416 * Offset 40:
417 The nanoseconds component of an `mtime` field described below,
417 The nanoseconds component of an `mtime` field described below,
418 as a 32-bit integer.
418 as a 32-bit integer.
419 When `mtime` is used,
419 When `mtime` is used,
420 this is the number of nanoseconds since `mtime.seconds`,
420 this is the number of nanoseconds since `mtime.seconds`,
421 always stritctly less than one billion.
421 always stritctly less than one billion.
422
422
423 This may be zero if more precision is not available.
423 This may be zero if more precision is not available.
424 (This can happen because of limitations in any of Mercurial, Python,
424 (This can happen because of limitations in any of Mercurial, Python,
425 libc, the operating system, …)
425 libc, the operating system, …)
426
426
427 When comparing two mtimes and either has this component set to zero,
427 When comparing two mtimes and either has this component set to zero,
428 the sub-second precision of both should be ignored.
428 the sub-second precision of both should be ignored.
429 False positives when checking mtime equality due to clock resolution
429 False positives when checking mtime equality due to clock resolution
430 are always possible and the status algorithm needs to deal with them,
430 are always possible and the status algorithm needs to deal with them,
431 but having too many false negatives could be harmful too.
431 but having too many false negatives could be harmful too.
432
432
433 * (Offset 44: end of this node)
433 * (Offset 44: end of this node)
434
434
435 The meaning of the boolean values packed in `flags` is:
435 The meaning of the boolean values packed in `flags` is:
436
436
437 `WDIR_TRACKED`
437 `WDIR_TRACKED`
438 Set if the working directory contains a tracked file at this node’s path.
438 Set if the working directory contains a tracked file at this node’s path.
439 This is typically set and unset by `hg add` and `hg rm`.
439 This is typically set and unset by `hg add` and `hg rm`.
440
440
441 `P1_TRACKED`
441 `P1_TRACKED`
442 Set if the working directory’s first parent changeset
442 Set if the working directory’s first parent changeset
443 (whose node identifier is found in tree metadata)
443 (whose node identifier is found in tree metadata)
444 contains a tracked file at this node’s path.
444 contains a tracked file at this node’s path.
445 This is a cache to reduce manifest lookups.
445 This is a cache to reduce manifest lookups.
446
446
447 `P2_INFO`
447 `P2_INFO`
448 Set if the file has been involved in some merge operation.
448 Set if the file has been involved in some merge operation.
449 Either because it was actually merged,
449 Either because it was actually merged,
450 or because the version in the second parent p2 version was ahead,
450 or because the version in the second parent p2 version was ahead,
451 or because some rename moved it there.
451 or because some rename moved it there.
452 In either case `hg status` will want it displayed as modified.
452 In either case `hg status` will want it displayed as modified.
453
453
454 Files that would be mentioned at all in the `dirstate-v1` file format
454 Files that would be mentioned at all in the `dirstate-v1` file format
455 have a node with at least one of the above three bits set in `dirstate-v2`.
455 have a node with at least one of the above three bits set in `dirstate-v2`.
456 Let’s call these files "tracked anywhere",
456 Let’s call these files "tracked anywhere",
457 and "untracked" the nodes with all three of these bits unset.
457 and "untracked" the nodes with all three of these bits unset.
458 Untracked nodes are typically for directories:
458 Untracked nodes are typically for directories:
459 they hold child nodes and form the tree structure.
459 they hold child nodes and form the tree structure.
460 Additional untracked nodes may also exist.
460 Additional untracked nodes may also exist.
461 Although implementations should strive to clean up nodes
461 Although implementations should strive to clean up nodes
462 that are entirely unused, other untracked nodes may also exist.
462 that are entirely unused, other untracked nodes may also exist.
463 For example, a future version of Mercurial might in some cases
463 For example, a future version of Mercurial might in some cases
464 add nodes for untracked files or/and ignored files in the working directory
464 add nodes for untracked files or/and ignored files in the working directory
465 in order to optimize `hg status`
465 in order to optimize `hg status`
466 by enabling it to skip `readdir` in more cases.
466 by enabling it to skip `readdir` in more cases.
467
467
468 `HAS_MODE_AND_SIZE`
468 `HAS_MODE_AND_SIZE`
469 Must be unset for untracked nodes.
469 Must be unset for untracked nodes.
470 For files tracked anywhere, if this is set:
470 For files tracked anywhere, if this is set:
471 - The `size` field is the expected file size,
471 - The `size` field is the expected file size,
472 in bytes truncated its lower to 31 bits.
472 in bytes truncated its lower to 31 bits.
473 - The expected execute permission for the file’s owner
473 - The expected execute permission for the file’s owner
474 is given by `MODE_EXEC_PERM`
474 is given by `MODE_EXEC_PERM`
475 - The expected file type is given by `MODE_IS_SIMLINK`:
475 - The expected file type is given by `MODE_IS_SIMLINK`:
476 a symbolic link if set, or a normal file if unset.
476 a symbolic link if set, or a normal file if unset.
477 If this is unset the expected size, permission, and file type are unknown.
477 If this is unset the expected size, permission, and file type are unknown.
478 The `size` field is unused (set to zero).
478 The `size` field is unused (set to zero).
479
479
480 `HAS_MTIME`
480 `HAS_MTIME`
481 The nodes contains a "valid" last modification time in the `mtime` field.
481 The nodes contains a "valid" last modification time in the `mtime` field.
482
482
483
483
484 It means the `mtime` was already strictly in the past when observed,
484 It means the `mtime` was already strictly in the past when observed,
485 meaning that later changes cannot happen in the same clock tick
485 meaning that later changes cannot happen in the same clock tick
486 and must cause a different modification time
486 and must cause a different modification time
487 (unless the system clock jumps back and we get unlucky,
487 (unless the system clock jumps back and we get unlucky,
488 which is not impossible but deemed unlikely enough).
488 which is not impossible but deemed unlikely enough).
489
489
490 This means that if `std::fs::symlink_metadata` later reports
490 This means that if `std::fs::symlink_metadata` later reports
491 the same modification time
491 the same modification time
492 and ignored patterns haven’t changed,
492 and ignored patterns haven’t changed,
493 we can assume the node to be unchanged on disk.
493 we can assume the node to be unchanged on disk.
494
494
495 The `mtime` field can then be used to skip more expensive lookup when
495 The `mtime` field can then be used to skip more expensive lookup when
496 checking the status of "tracked" nodes.
496 checking the status of "tracked" nodes.
497
497
498 It can also be set for node where `DIRECTORY` is set.
498 It can also be set for node where `DIRECTORY` is set.
499 See `DIRECTORY` documentation for details.
499 See `DIRECTORY` documentation for details.
500
500
501 `DIRECTORY`
501 `DIRECTORY`
502 When set, this entry will match a directory that exists or existed on the
502 When set, this entry will match a directory that exists or existed on the
503 file system.
503 file system.
504
504
505 * When `HAS_MTIME` is set a directory has been seen on the file system and
505 * When `HAS_MTIME` is set a directory has been seen on the file system and
506 `mtime` matches its last modificiation time. However, `HAS_MTIME` not being set
506 `mtime` matches its last modificiation time. However, `HAS_MTIME` not being set
507 does not indicate the lack of directory on the file system.
507 does not indicate the lack of directory on the file system.
508
508
509 * When not tracked anywhere, this node does not represent an ignored or
509 * When not tracked anywhere, this node does not represent an ignored or
510 unknown file on disk.
510 unknown file on disk.
511
511
512 If `HAS_MTIME` is set
512 If `HAS_MTIME` is set
513 and `mtime` matches the last modification time of the directory on disk,
513 and `mtime` matches the last modification time of the directory on disk,
514 the directory is unchanged
514 the directory is unchanged
515 and we can skip calling `std::fs::read_dir` again for this directory,
515 and we can skip calling `std::fs::read_dir` again for this directory,
516 and iterate child dirstate nodes instead.
516 and iterate child dirstate nodes instead.
517 (as long as `ALL_UNKNOWN_RECORDED` and `ALL_IGNORED_RECORDED` are taken
517 (as long as `ALL_UNKNOWN_RECORDED` and `ALL_IGNORED_RECORDED` are taken
518 into account)
518 into account)
519
519
520 `MODE_EXEC_PERM`
520 `MODE_EXEC_PERM`
521 Must be unset if `HAS_MODE_AND_SIZE` is unset.
521 Must be unset if `HAS_MODE_AND_SIZE` is unset.
522 If `HAS_MODE_AND_SIZE` is set,
522 If `HAS_MODE_AND_SIZE` is set,
523 this indicates whether the file’s own is expected
523 this indicates whether the file’s own is expected
524 to have execute permission.
524 to have execute permission.
525
525
526 Beware that on system without fs support for this information, the value
527 stored in the dirstate might be wrong and should not be relied on.
528
526 `MODE_IS_SYMLINK`
529 `MODE_IS_SYMLINK`
527 Must be unset if `HAS_MODE_AND_SIZE` is unset.
530 Must be unset if `HAS_MODE_AND_SIZE` is unset.
528 If `HAS_MODE_AND_SIZE` is set,
531 If `HAS_MODE_AND_SIZE` is set,
529 this indicates whether the file is expected to be a symlink
532 this indicates whether the file is expected to be a symlink
530 as opposed to a normal file.
533 as opposed to a normal file.
531
534
535 Beware that on system without fs support for this information, the value
536 stored in the dirstate might be wrong and should not be relied on.
537
532 `EXPECTED_STATE_IS_MODIFIED`
538 `EXPECTED_STATE_IS_MODIFIED`
533 Must be unset for untracked nodes.
539 Must be unset for untracked nodes.
534 For:
540 For:
535 - a file tracked anywhere
541 - a file tracked anywhere
536 - that has expected metadata (`HAS_MODE_AND_SIZE` and `HAS_MTIME`)
542 - that has expected metadata (`HAS_MODE_AND_SIZE` and `HAS_MTIME`)
537 - if that metadata matches
543 - if that metadata matches
538 metadata found in the working directory with `stat`
544 metadata found in the working directory with `stat`
539 This bit indicates the status of the file.
545 This bit indicates the status of the file.
540 If set, the status is modified. If unset, it is clean.
546 If set, the status is modified. If unset, it is clean.
541
547
542 In cases where `hg status` needs to read the contents of a file
548 In cases where `hg status` needs to read the contents of a file
543 because metadata is ambiguous, this bit lets it record the result
549 because metadata is ambiguous, this bit lets it record the result
544 if the result is modified so that a future run of `hg status`
550 if the result is modified so that a future run of `hg status`
545 does not need to do the same again.
551 does not need to do the same again.
546 It is valid to never set this bit,
552 It is valid to never set this bit,
547 and consider expected metadata ambiguous if it is set.
553 and consider expected metadata ambiguous if it is set.
548
554
549 `ALL_UNKNOWN_RECORDED`
555 `ALL_UNKNOWN_RECORDED`
550 If set, all "unknown" children existing on disk (at the time of the last
556 If set, all "unknown" children existing on disk (at the time of the last
551 status) have been recorded and the `mtime` associated with
557 status) have been recorded and the `mtime` associated with
552 `DIRECTORY` can be used for optimization even when "unknown" file
558 `DIRECTORY` can be used for optimization even when "unknown" file
553 are listed.
559 are listed.
554
560
555 Note that the amount recorded "unknown" children can still be zero if None
561 Note that the amount recorded "unknown" children can still be zero if None
556 where present.
562 where present.
557
563
558 Also note that having this flag unset does not imply that no "unknown"
564 Also note that having this flag unset does not imply that no "unknown"
559 children have been recorded. Some might be present, but there is no garantee
565 children have been recorded. Some might be present, but there is no garantee
560 that is will be all of them.
566 that is will be all of them.
561
567
562 `ALL_IGNORED_RECORDED`
568 `ALL_IGNORED_RECORDED`
563 If set, all "ignored" children existing on disk (at the time of the last
569 If set, all "ignored" children existing on disk (at the time of the last
564 status) have been recorded and the `mtime` associated with
570 status) have been recorded and the `mtime` associated with
565 `DIRECTORY` can be used for optimization even when "ignored" file
571 `DIRECTORY` can be used for optimization even when "ignored" file
566 are listed.
572 are listed.
567
573
568 Note that the amount recorded "ignored" children can still be zero if None
574 Note that the amount recorded "ignored" children can still be zero if None
569 where present.
575 where present.
570
576
571 Also note that having this flag unset does not imply that no "ignored"
577 Also note that having this flag unset does not imply that no "ignored"
572 children have been recorded. Some might be present, but there is no garantee
578 children have been recorded. Some might be present, but there is no garantee
573 that is will be all of them.
579 that is will be all of them.
574
580
575 `HAS_FALLBACK_EXEC`
581 `HAS_FALLBACK_EXEC`
576 If this flag is set, the entry carries "fallback" information for the
582 If this flag is set, the entry carries "fallback" information for the
577 executable bit in the `FALLBACK_EXEC` flag.
583 executable bit in the `FALLBACK_EXEC` flag.
578
584
579 Fallback information can be stored in the dirstate to keep track of
585 Fallback information can be stored in the dirstate to keep track of
580 filesystem attribute tracked by Mercurial when the underlying file
586 filesystem attribute tracked by Mercurial when the underlying file
581 system or operating system does not support that property, (e.g.
587 system or operating system does not support that property, (e.g.
582 Windows).
588 Windows).
583
589
584 `FALLBACK_EXEC`
590 `FALLBACK_EXEC`
585 Should be ignored if `HAS_FALLBACK_EXEC` is unset. If set the file for this
591 Should be ignored if `HAS_FALLBACK_EXEC` is unset. If set the file for this
586 entry should be considered executable if that information cannot be
592 entry should be considered executable if that information cannot be
587 extracted from the file system. If unset it should be considered
593 extracted from the file system. If unset it should be considered
588 non-executable instead.
594 non-executable instead.
589
595
590 `HAS_FALLBACK_SYMLINK`
596 `HAS_FALLBACK_SYMLINK`
591 If this flag is set, the entry carries "fallback" information for symbolic
597 If this flag is set, the entry carries "fallback" information for symbolic
592 link status in the `FALLBACK_SYMLINK` flag.
598 link status in the `FALLBACK_SYMLINK` flag.
593
599
594 Fallback information can be stored in the dirstate to keep track of
600 Fallback information can be stored in the dirstate to keep track of
595 filesystem attribute tracked by Mercurial when the underlying file
601 filesystem attribute tracked by Mercurial when the underlying file
596 system or operating system does not support that property, (e.g.
602 system or operating system does not support that property, (e.g.
597 Windows).
603 Windows).
598
604
599 `FALLBACK_SYMLINK`
605 `FALLBACK_SYMLINK`
600 Should be ignored if `HAS_FALLBACK_SYMLINK` is unset. If set the file for
606 Should be ignored if `HAS_FALLBACK_SYMLINK` is unset. If set the file for
601 this entry should be considered a symlink if that information cannot be
607 this entry should be considered a symlink if that information cannot be
602 extracted from the file system. If unset it should be considered a normal
608 extracted from the file system. If unset it should be considered a normal
603 file instead.
609 file instead.
604
610
605 `MTIME_SECOND_AMBIGUOUS`
611 `MTIME_SECOND_AMBIGUOUS`
606 This flag is relevant only when `HAS_FILE_MTIME` is set. When set, the
612 This flag is relevant only when `HAS_FILE_MTIME` is set. When set, the
607 `mtime` stored in the entry is only valid for comparison with timestamps
613 `mtime` stored in the entry is only valid for comparison with timestamps
608 that have nanosecond information. If available timestamp does not carries
614 that have nanosecond information. If available timestamp does not carries
609 nanosecond information, the `mtime` should be ignored and no optimisation
615 nanosecond information, the `mtime` should be ignored and no optimisation
610 can be applied.
616 can be applied.
General Comments 0
You need to be logged in to leave comments. Login now