##// END OF EJS Templates
rust: using policy.importrust from Python callers...
Georges Racinet -
r42645:f834ee28 default draft
parent child Browse files
Show More
@@ -1,1521 +1,1506
1 # dirstate.py - working directory tracking for mercurial
1 # dirstate.py - working directory tracking for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import collections
10 import collections
11 import contextlib
11 import contextlib
12 import errno
12 import errno
13 import os
13 import os
14 import stat
14 import stat
15
15
16 from .i18n import _
16 from .i18n import _
17 from .node import nullid
17 from .node import nullid
18 from . import (
18 from . import (
19 encoding,
19 encoding,
20 error,
20 error,
21 match as matchmod,
21 match as matchmod,
22 pathutil,
22 pathutil,
23 policy,
23 policy,
24 pycompat,
24 pycompat,
25 scmutil,
25 scmutil,
26 txnutil,
26 txnutil,
27 util,
27 util,
28 )
28 )
29
29
30 try:
31 from . import rustext
32 rustext.__name__ # force actual import (see hgdemandimport)
33 except ImportError:
34 rustext = None
35
36 parsers = policy.importmod(r'parsers')
30 parsers = policy.importmod(r'parsers')
31 dirstatemod = policy.importrust(r'dirstate', default=parsers)
37
32
38 propertycache = util.propertycache
33 propertycache = util.propertycache
39 filecache = scmutil.filecache
34 filecache = scmutil.filecache
40 _rangemask = 0x7fffffff
35 _rangemask = 0x7fffffff
41
36
42 dirstatetuple = parsers.dirstatetuple
37 dirstatetuple = parsers.dirstatetuple
43
38
44 class repocache(filecache):
39 class repocache(filecache):
45 """filecache for files in .hg/"""
40 """filecache for files in .hg/"""
46 def join(self, obj, fname):
41 def join(self, obj, fname):
47 return obj._opener.join(fname)
42 return obj._opener.join(fname)
48
43
49 class rootcache(filecache):
44 class rootcache(filecache):
50 """filecache for files in the repository root"""
45 """filecache for files in the repository root"""
51 def join(self, obj, fname):
46 def join(self, obj, fname):
52 return obj._join(fname)
47 return obj._join(fname)
53
48
54 def _getfsnow(vfs):
49 def _getfsnow(vfs):
55 '''Get "now" timestamp on filesystem'''
50 '''Get "now" timestamp on filesystem'''
56 tmpfd, tmpname = vfs.mkstemp()
51 tmpfd, tmpname = vfs.mkstemp()
57 try:
52 try:
58 return os.fstat(tmpfd)[stat.ST_MTIME]
53 return os.fstat(tmpfd)[stat.ST_MTIME]
59 finally:
54 finally:
60 os.close(tmpfd)
55 os.close(tmpfd)
61 vfs.unlink(tmpname)
56 vfs.unlink(tmpname)
62
57
63 class dirstate(object):
58 class dirstate(object):
64
59
65 def __init__(self, opener, ui, root, validate, sparsematchfn):
60 def __init__(self, opener, ui, root, validate, sparsematchfn):
66 '''Create a new dirstate object.
61 '''Create a new dirstate object.
67
62
68 opener is an open()-like callable that can be used to open the
63 opener is an open()-like callable that can be used to open the
69 dirstate file; root is the root of the directory tracked by
64 dirstate file; root is the root of the directory tracked by
70 the dirstate.
65 the dirstate.
71 '''
66 '''
72 self._opener = opener
67 self._opener = opener
73 self._validate = validate
68 self._validate = validate
74 self._root = root
69 self._root = root
75 self._sparsematchfn = sparsematchfn
70 self._sparsematchfn = sparsematchfn
76 # ntpath.join(root, '') of Python 2.7.9 does not add sep if root is
71 # ntpath.join(root, '') of Python 2.7.9 does not add sep if root is
77 # UNC path pointing to root share (issue4557)
72 # UNC path pointing to root share (issue4557)
78 self._rootdir = pathutil.normasprefix(root)
73 self._rootdir = pathutil.normasprefix(root)
79 self._dirty = False
74 self._dirty = False
80 self._lastnormaltime = 0
75 self._lastnormaltime = 0
81 self._ui = ui
76 self._ui = ui
82 self._filecache = {}
77 self._filecache = {}
83 self._parentwriters = 0
78 self._parentwriters = 0
84 self._filename = 'dirstate'
79 self._filename = 'dirstate'
85 self._pendingfilename = '%s.pending' % self._filename
80 self._pendingfilename = '%s.pending' % self._filename
86 self._plchangecallbacks = {}
81 self._plchangecallbacks = {}
87 self._origpl = None
82 self._origpl = None
88 self._updatedfiles = set()
83 self._updatedfiles = set()
89 self._mapcls = dirstatemap
84 self._mapcls = dirstatemap
90 # Access and cache cwd early, so we don't access it for the first time
85 # Access and cache cwd early, so we don't access it for the first time
91 # after a working-copy update caused it to not exist (accessing it then
86 # after a working-copy update caused it to not exist (accessing it then
92 # raises an exception).
87 # raises an exception).
93 self._cwd
88 self._cwd
94
89
95 @contextlib.contextmanager
90 @contextlib.contextmanager
96 def parentchange(self):
91 def parentchange(self):
97 '''Context manager for handling dirstate parents.
92 '''Context manager for handling dirstate parents.
98
93
99 If an exception occurs in the scope of the context manager,
94 If an exception occurs in the scope of the context manager,
100 the incoherent dirstate won't be written when wlock is
95 the incoherent dirstate won't be written when wlock is
101 released.
96 released.
102 '''
97 '''
103 self._parentwriters += 1
98 self._parentwriters += 1
104 yield
99 yield
105 # Typically we want the "undo" step of a context manager in a
100 # Typically we want the "undo" step of a context manager in a
106 # finally block so it happens even when an exception
101 # finally block so it happens even when an exception
107 # occurs. In this case, however, we only want to decrement
102 # occurs. In this case, however, we only want to decrement
108 # parentwriters if the code in the with statement exits
103 # parentwriters if the code in the with statement exits
109 # normally, so we don't have a try/finally here on purpose.
104 # normally, so we don't have a try/finally here on purpose.
110 self._parentwriters -= 1
105 self._parentwriters -= 1
111
106
112 def pendingparentchange(self):
107 def pendingparentchange(self):
113 '''Returns true if the dirstate is in the middle of a set of changes
108 '''Returns true if the dirstate is in the middle of a set of changes
114 that modify the dirstate parent.
109 that modify the dirstate parent.
115 '''
110 '''
116 return self._parentwriters > 0
111 return self._parentwriters > 0
117
112
118 @propertycache
113 @propertycache
119 def _map(self):
114 def _map(self):
120 """Return the dirstate contents (see documentation for dirstatemap)."""
115 """Return the dirstate contents (see documentation for dirstatemap)."""
121 self._map = self._mapcls(self._ui, self._opener, self._root)
116 self._map = self._mapcls(self._ui, self._opener, self._root)
122 return self._map
117 return self._map
123
118
124 @property
119 @property
125 def _sparsematcher(self):
120 def _sparsematcher(self):
126 """The matcher for the sparse checkout.
121 """The matcher for the sparse checkout.
127
122
128 The working directory may not include every file from a manifest. The
123 The working directory may not include every file from a manifest. The
129 matcher obtained by this property will match a path if it is to be
124 matcher obtained by this property will match a path if it is to be
130 included in the working directory.
125 included in the working directory.
131 """
126 """
132 # TODO there is potential to cache this property. For now, the matcher
127 # TODO there is potential to cache this property. For now, the matcher
133 # is resolved on every access. (But the called function does use a
128 # is resolved on every access. (But the called function does use a
134 # cache to keep the lookup fast.)
129 # cache to keep the lookup fast.)
135 return self._sparsematchfn()
130 return self._sparsematchfn()
136
131
137 @repocache('branch')
132 @repocache('branch')
138 def _branch(self):
133 def _branch(self):
139 try:
134 try:
140 return self._opener.read("branch").strip() or "default"
135 return self._opener.read("branch").strip() or "default"
141 except IOError as inst:
136 except IOError as inst:
142 if inst.errno != errno.ENOENT:
137 if inst.errno != errno.ENOENT:
143 raise
138 raise
144 return "default"
139 return "default"
145
140
146 @property
141 @property
147 def _pl(self):
142 def _pl(self):
148 return self._map.parents()
143 return self._map.parents()
149
144
150 def hasdir(self, d):
145 def hasdir(self, d):
151 return self._map.hastrackeddir(d)
146 return self._map.hastrackeddir(d)
152
147
153 @rootcache('.hgignore')
148 @rootcache('.hgignore')
154 def _ignore(self):
149 def _ignore(self):
155 files = self._ignorefiles()
150 files = self._ignorefiles()
156 if not files:
151 if not files:
157 return matchmod.never()
152 return matchmod.never()
158
153
159 pats = ['include:%s' % f for f in files]
154 pats = ['include:%s' % f for f in files]
160 return matchmod.match(self._root, '', [], pats, warn=self._ui.warn)
155 return matchmod.match(self._root, '', [], pats, warn=self._ui.warn)
161
156
162 @propertycache
157 @propertycache
163 def _slash(self):
158 def _slash(self):
164 return self._ui.configbool('ui', 'slash') and pycompat.ossep != '/'
159 return self._ui.configbool('ui', 'slash') and pycompat.ossep != '/'
165
160
166 @propertycache
161 @propertycache
167 def _checklink(self):
162 def _checklink(self):
168 return util.checklink(self._root)
163 return util.checklink(self._root)
169
164
170 @propertycache
165 @propertycache
171 def _checkexec(self):
166 def _checkexec(self):
172 return util.checkexec(self._root)
167 return util.checkexec(self._root)
173
168
174 @propertycache
169 @propertycache
175 def _checkcase(self):
170 def _checkcase(self):
176 return not util.fscasesensitive(self._join('.hg'))
171 return not util.fscasesensitive(self._join('.hg'))
177
172
178 def _join(self, f):
173 def _join(self, f):
179 # much faster than os.path.join()
174 # much faster than os.path.join()
180 # it's safe because f is always a relative path
175 # it's safe because f is always a relative path
181 return self._rootdir + f
176 return self._rootdir + f
182
177
183 def flagfunc(self, buildfallback):
178 def flagfunc(self, buildfallback):
184 if self._checklink and self._checkexec:
179 if self._checklink and self._checkexec:
185 def f(x):
180 def f(x):
186 try:
181 try:
187 st = os.lstat(self._join(x))
182 st = os.lstat(self._join(x))
188 if util.statislink(st):
183 if util.statislink(st):
189 return 'l'
184 return 'l'
190 if util.statisexec(st):
185 if util.statisexec(st):
191 return 'x'
186 return 'x'
192 except OSError:
187 except OSError:
193 pass
188 pass
194 return ''
189 return ''
195 return f
190 return f
196
191
197 fallback = buildfallback()
192 fallback = buildfallback()
198 if self._checklink:
193 if self._checklink:
199 def f(x):
194 def f(x):
200 if os.path.islink(self._join(x)):
195 if os.path.islink(self._join(x)):
201 return 'l'
196 return 'l'
202 if 'x' in fallback(x):
197 if 'x' in fallback(x):
203 return 'x'
198 return 'x'
204 return ''
199 return ''
205 return f
200 return f
206 if self._checkexec:
201 if self._checkexec:
207 def f(x):
202 def f(x):
208 if 'l' in fallback(x):
203 if 'l' in fallback(x):
209 return 'l'
204 return 'l'
210 if util.isexec(self._join(x)):
205 if util.isexec(self._join(x)):
211 return 'x'
206 return 'x'
212 return ''
207 return ''
213 return f
208 return f
214 else:
209 else:
215 return fallback
210 return fallback
216
211
217 @propertycache
212 @propertycache
218 def _cwd(self):
213 def _cwd(self):
219 # internal config: ui.forcecwd
214 # internal config: ui.forcecwd
220 forcecwd = self._ui.config('ui', 'forcecwd')
215 forcecwd = self._ui.config('ui', 'forcecwd')
221 if forcecwd:
216 if forcecwd:
222 return forcecwd
217 return forcecwd
223 return encoding.getcwd()
218 return encoding.getcwd()
224
219
225 def getcwd(self):
220 def getcwd(self):
226 '''Return the path from which a canonical path is calculated.
221 '''Return the path from which a canonical path is calculated.
227
222
228 This path should be used to resolve file patterns or to convert
223 This path should be used to resolve file patterns or to convert
229 canonical paths back to file paths for display. It shouldn't be
224 canonical paths back to file paths for display. It shouldn't be
230 used to get real file paths. Use vfs functions instead.
225 used to get real file paths. Use vfs functions instead.
231 '''
226 '''
232 cwd = self._cwd
227 cwd = self._cwd
233 if cwd == self._root:
228 if cwd == self._root:
234 return ''
229 return ''
235 # self._root ends with a path separator if self._root is '/' or 'C:\'
230 # self._root ends with a path separator if self._root is '/' or 'C:\'
236 rootsep = self._root
231 rootsep = self._root
237 if not util.endswithsep(rootsep):
232 if not util.endswithsep(rootsep):
238 rootsep += pycompat.ossep
233 rootsep += pycompat.ossep
239 if cwd.startswith(rootsep):
234 if cwd.startswith(rootsep):
240 return cwd[len(rootsep):]
235 return cwd[len(rootsep):]
241 else:
236 else:
242 # we're outside the repo. return an absolute path.
237 # we're outside the repo. return an absolute path.
243 return cwd
238 return cwd
244
239
245 def pathto(self, f, cwd=None):
240 def pathto(self, f, cwd=None):
246 if cwd is None:
241 if cwd is None:
247 cwd = self.getcwd()
242 cwd = self.getcwd()
248 path = util.pathto(self._root, cwd, f)
243 path = util.pathto(self._root, cwd, f)
249 if self._slash:
244 if self._slash:
250 return util.pconvert(path)
245 return util.pconvert(path)
251 return path
246 return path
252
247
253 def __getitem__(self, key):
248 def __getitem__(self, key):
254 '''Return the current state of key (a filename) in the dirstate.
249 '''Return the current state of key (a filename) in the dirstate.
255
250
256 States are:
251 States are:
257 n normal
252 n normal
258 m needs merging
253 m needs merging
259 r marked for removal
254 r marked for removal
260 a marked for addition
255 a marked for addition
261 ? not tracked
256 ? not tracked
262 '''
257 '''
263 return self._map.get(key, ("?",))[0]
258 return self._map.get(key, ("?",))[0]
264
259
265 def __contains__(self, key):
260 def __contains__(self, key):
266 return key in self._map
261 return key in self._map
267
262
268 def __iter__(self):
263 def __iter__(self):
269 return iter(sorted(self._map))
264 return iter(sorted(self._map))
270
265
271 def items(self):
266 def items(self):
272 return self._map.iteritems()
267 return self._map.iteritems()
273
268
274 iteritems = items
269 iteritems = items
275
270
276 def parents(self):
271 def parents(self):
277 return [self._validate(p) for p in self._pl]
272 return [self._validate(p) for p in self._pl]
278
273
279 def p1(self):
274 def p1(self):
280 return self._validate(self._pl[0])
275 return self._validate(self._pl[0])
281
276
282 def p2(self):
277 def p2(self):
283 return self._validate(self._pl[1])
278 return self._validate(self._pl[1])
284
279
285 def branch(self):
280 def branch(self):
286 return encoding.tolocal(self._branch)
281 return encoding.tolocal(self._branch)
287
282
288 def setparents(self, p1, p2=nullid):
283 def setparents(self, p1, p2=nullid):
289 """Set dirstate parents to p1 and p2.
284 """Set dirstate parents to p1 and p2.
290
285
291 When moving from two parents to one, 'm' merged entries a
286 When moving from two parents to one, 'm' merged entries a
292 adjusted to normal and previous copy records discarded and
287 adjusted to normal and previous copy records discarded and
293 returned by the call.
288 returned by the call.
294
289
295 See localrepo.setparents()
290 See localrepo.setparents()
296 """
291 """
297 if self._parentwriters == 0:
292 if self._parentwriters == 0:
298 raise ValueError("cannot set dirstate parent outside of "
293 raise ValueError("cannot set dirstate parent outside of "
299 "dirstate.parentchange context manager")
294 "dirstate.parentchange context manager")
300
295
301 self._dirty = True
296 self._dirty = True
302 oldp2 = self._pl[1]
297 oldp2 = self._pl[1]
303 if self._origpl is None:
298 if self._origpl is None:
304 self._origpl = self._pl
299 self._origpl = self._pl
305 self._map.setparents(p1, p2)
300 self._map.setparents(p1, p2)
306 copies = {}
301 copies = {}
307 if oldp2 != nullid and p2 == nullid:
302 if oldp2 != nullid and p2 == nullid:
308 candidatefiles = self._map.nonnormalset.union(
303 candidatefiles = self._map.nonnormalset.union(
309 self._map.otherparentset)
304 self._map.otherparentset)
310 for f in candidatefiles:
305 for f in candidatefiles:
311 s = self._map.get(f)
306 s = self._map.get(f)
312 if s is None:
307 if s is None:
313 continue
308 continue
314
309
315 # Discard 'm' markers when moving away from a merge state
310 # Discard 'm' markers when moving away from a merge state
316 if s[0] == 'm':
311 if s[0] == 'm':
317 source = self._map.copymap.get(f)
312 source = self._map.copymap.get(f)
318 if source:
313 if source:
319 copies[f] = source
314 copies[f] = source
320 self.normallookup(f)
315 self.normallookup(f)
321 # Also fix up otherparent markers
316 # Also fix up otherparent markers
322 elif s[0] == 'n' and s[2] == -2:
317 elif s[0] == 'n' and s[2] == -2:
323 source = self._map.copymap.get(f)
318 source = self._map.copymap.get(f)
324 if source:
319 if source:
325 copies[f] = source
320 copies[f] = source
326 self.add(f)
321 self.add(f)
327 return copies
322 return copies
328
323
329 def setbranch(self, branch):
324 def setbranch(self, branch):
330 self.__class__._branch.set(self, encoding.fromlocal(branch))
325 self.__class__._branch.set(self, encoding.fromlocal(branch))
331 f = self._opener('branch', 'w', atomictemp=True, checkambig=True)
326 f = self._opener('branch', 'w', atomictemp=True, checkambig=True)
332 try:
327 try:
333 f.write(self._branch + '\n')
328 f.write(self._branch + '\n')
334 f.close()
329 f.close()
335
330
336 # make sure filecache has the correct stat info for _branch after
331 # make sure filecache has the correct stat info for _branch after
337 # replacing the underlying file
332 # replacing the underlying file
338 ce = self._filecache['_branch']
333 ce = self._filecache['_branch']
339 if ce:
334 if ce:
340 ce.refresh()
335 ce.refresh()
341 except: # re-raises
336 except: # re-raises
342 f.discard()
337 f.discard()
343 raise
338 raise
344
339
345 def invalidate(self):
340 def invalidate(self):
346 '''Causes the next access to reread the dirstate.
341 '''Causes the next access to reread the dirstate.
347
342
348 This is different from localrepo.invalidatedirstate() because it always
343 This is different from localrepo.invalidatedirstate() because it always
349 rereads the dirstate. Use localrepo.invalidatedirstate() if you want to
344 rereads the dirstate. Use localrepo.invalidatedirstate() if you want to
350 check whether the dirstate has changed before rereading it.'''
345 check whether the dirstate has changed before rereading it.'''
351
346
352 for a in (r"_map", r"_branch", r"_ignore"):
347 for a in (r"_map", r"_branch", r"_ignore"):
353 if a in self.__dict__:
348 if a in self.__dict__:
354 delattr(self, a)
349 delattr(self, a)
355 self._lastnormaltime = 0
350 self._lastnormaltime = 0
356 self._dirty = False
351 self._dirty = False
357 self._updatedfiles.clear()
352 self._updatedfiles.clear()
358 self._parentwriters = 0
353 self._parentwriters = 0
359 self._origpl = None
354 self._origpl = None
360
355
361 def copy(self, source, dest):
356 def copy(self, source, dest):
362 """Mark dest as a copy of source. Unmark dest if source is None."""
357 """Mark dest as a copy of source. Unmark dest if source is None."""
363 if source == dest:
358 if source == dest:
364 return
359 return
365 self._dirty = True
360 self._dirty = True
366 if source is not None:
361 if source is not None:
367 self._map.copymap[dest] = source
362 self._map.copymap[dest] = source
368 self._updatedfiles.add(source)
363 self._updatedfiles.add(source)
369 self._updatedfiles.add(dest)
364 self._updatedfiles.add(dest)
370 elif self._map.copymap.pop(dest, None):
365 elif self._map.copymap.pop(dest, None):
371 self._updatedfiles.add(dest)
366 self._updatedfiles.add(dest)
372
367
373 def copied(self, file):
368 def copied(self, file):
374 return self._map.copymap.get(file, None)
369 return self._map.copymap.get(file, None)
375
370
376 def copies(self):
371 def copies(self):
377 return self._map.copymap
372 return self._map.copymap
378
373
379 def _addpath(self, f, state, mode, size, mtime):
374 def _addpath(self, f, state, mode, size, mtime):
380 oldstate = self[f]
375 oldstate = self[f]
381 if state == 'a' or oldstate == 'r':
376 if state == 'a' or oldstate == 'r':
382 scmutil.checkfilename(f)
377 scmutil.checkfilename(f)
383 if self._map.hastrackeddir(f):
378 if self._map.hastrackeddir(f):
384 raise error.Abort(_('directory %r already in dirstate') %
379 raise error.Abort(_('directory %r already in dirstate') %
385 pycompat.bytestr(f))
380 pycompat.bytestr(f))
386 # shadows
381 # shadows
387 for d in util.finddirs(f):
382 for d in util.finddirs(f):
388 if self._map.hastrackeddir(d):
383 if self._map.hastrackeddir(d):
389 break
384 break
390 entry = self._map.get(d)
385 entry = self._map.get(d)
391 if entry is not None and entry[0] != 'r':
386 if entry is not None and entry[0] != 'r':
392 raise error.Abort(
387 raise error.Abort(
393 _('file %r in dirstate clashes with %r') %
388 _('file %r in dirstate clashes with %r') %
394 (pycompat.bytestr(d), pycompat.bytestr(f)))
389 (pycompat.bytestr(d), pycompat.bytestr(f)))
395 self._dirty = True
390 self._dirty = True
396 self._updatedfiles.add(f)
391 self._updatedfiles.add(f)
397 self._map.addfile(f, oldstate, state, mode, size, mtime)
392 self._map.addfile(f, oldstate, state, mode, size, mtime)
398
393
399 def normal(self, f):
394 def normal(self, f):
400 '''Mark a file normal and clean.'''
395 '''Mark a file normal and clean.'''
401 s = os.lstat(self._join(f))
396 s = os.lstat(self._join(f))
402 mtime = s[stat.ST_MTIME]
397 mtime = s[stat.ST_MTIME]
403 self._addpath(f, 'n', s.st_mode,
398 self._addpath(f, 'n', s.st_mode,
404 s.st_size & _rangemask, mtime & _rangemask)
399 s.st_size & _rangemask, mtime & _rangemask)
405 self._map.copymap.pop(f, None)
400 self._map.copymap.pop(f, None)
406 if f in self._map.nonnormalset:
401 if f in self._map.nonnormalset:
407 self._map.nonnormalset.remove(f)
402 self._map.nonnormalset.remove(f)
408 if mtime > self._lastnormaltime:
403 if mtime > self._lastnormaltime:
409 # Remember the most recent modification timeslot for status(),
404 # Remember the most recent modification timeslot for status(),
410 # to make sure we won't miss future size-preserving file content
405 # to make sure we won't miss future size-preserving file content
411 # modifications that happen within the same timeslot.
406 # modifications that happen within the same timeslot.
412 self._lastnormaltime = mtime
407 self._lastnormaltime = mtime
413
408
414 def normallookup(self, f):
409 def normallookup(self, f):
415 '''Mark a file normal, but possibly dirty.'''
410 '''Mark a file normal, but possibly dirty.'''
416 if self._pl[1] != nullid:
411 if self._pl[1] != nullid:
417 # if there is a merge going on and the file was either
412 # if there is a merge going on and the file was either
418 # in state 'm' (-1) or coming from other parent (-2) before
413 # in state 'm' (-1) or coming from other parent (-2) before
419 # being removed, restore that state.
414 # being removed, restore that state.
420 entry = self._map.get(f)
415 entry = self._map.get(f)
421 if entry is not None:
416 if entry is not None:
422 if entry[0] == 'r' and entry[2] in (-1, -2):
417 if entry[0] == 'r' and entry[2] in (-1, -2):
423 source = self._map.copymap.get(f)
418 source = self._map.copymap.get(f)
424 if entry[2] == -1:
419 if entry[2] == -1:
425 self.merge(f)
420 self.merge(f)
426 elif entry[2] == -2:
421 elif entry[2] == -2:
427 self.otherparent(f)
422 self.otherparent(f)
428 if source:
423 if source:
429 self.copy(source, f)
424 self.copy(source, f)
430 return
425 return
431 if entry[0] == 'm' or entry[0] == 'n' and entry[2] == -2:
426 if entry[0] == 'm' or entry[0] == 'n' and entry[2] == -2:
432 return
427 return
433 self._addpath(f, 'n', 0, -1, -1)
428 self._addpath(f, 'n', 0, -1, -1)
434 self._map.copymap.pop(f, None)
429 self._map.copymap.pop(f, None)
435
430
436 def otherparent(self, f):
431 def otherparent(self, f):
437 '''Mark as coming from the other parent, always dirty.'''
432 '''Mark as coming from the other parent, always dirty.'''
438 if self._pl[1] == nullid:
433 if self._pl[1] == nullid:
439 raise error.Abort(_("setting %r to other parent "
434 raise error.Abort(_("setting %r to other parent "
440 "only allowed in merges") % f)
435 "only allowed in merges") % f)
441 if f in self and self[f] == 'n':
436 if f in self and self[f] == 'n':
442 # merge-like
437 # merge-like
443 self._addpath(f, 'm', 0, -2, -1)
438 self._addpath(f, 'm', 0, -2, -1)
444 else:
439 else:
445 # add-like
440 # add-like
446 self._addpath(f, 'n', 0, -2, -1)
441 self._addpath(f, 'n', 0, -2, -1)
447 self._map.copymap.pop(f, None)
442 self._map.copymap.pop(f, None)
448
443
449 def add(self, f):
444 def add(self, f):
450 '''Mark a file added.'''
445 '''Mark a file added.'''
451 self._addpath(f, 'a', 0, -1, -1)
446 self._addpath(f, 'a', 0, -1, -1)
452 self._map.copymap.pop(f, None)
447 self._map.copymap.pop(f, None)
453
448
454 def remove(self, f):
449 def remove(self, f):
455 '''Mark a file removed.'''
450 '''Mark a file removed.'''
456 self._dirty = True
451 self._dirty = True
457 oldstate = self[f]
452 oldstate = self[f]
458 size = 0
453 size = 0
459 if self._pl[1] != nullid:
454 if self._pl[1] != nullid:
460 entry = self._map.get(f)
455 entry = self._map.get(f)
461 if entry is not None:
456 if entry is not None:
462 # backup the previous state
457 # backup the previous state
463 if entry[0] == 'm': # merge
458 if entry[0] == 'm': # merge
464 size = -1
459 size = -1
465 elif entry[0] == 'n' and entry[2] == -2: # other parent
460 elif entry[0] == 'n' and entry[2] == -2: # other parent
466 size = -2
461 size = -2
467 self._map.otherparentset.add(f)
462 self._map.otherparentset.add(f)
468 self._updatedfiles.add(f)
463 self._updatedfiles.add(f)
469 self._map.removefile(f, oldstate, size)
464 self._map.removefile(f, oldstate, size)
470 if size == 0:
465 if size == 0:
471 self._map.copymap.pop(f, None)
466 self._map.copymap.pop(f, None)
472
467
473 def merge(self, f):
468 def merge(self, f):
474 '''Mark a file merged.'''
469 '''Mark a file merged.'''
475 if self._pl[1] == nullid:
470 if self._pl[1] == nullid:
476 return self.normallookup(f)
471 return self.normallookup(f)
477 return self.otherparent(f)
472 return self.otherparent(f)
478
473
479 def drop(self, f):
474 def drop(self, f):
480 '''Drop a file from the dirstate'''
475 '''Drop a file from the dirstate'''
481 oldstate = self[f]
476 oldstate = self[f]
482 if self._map.dropfile(f, oldstate):
477 if self._map.dropfile(f, oldstate):
483 self._dirty = True
478 self._dirty = True
484 self._updatedfiles.add(f)
479 self._updatedfiles.add(f)
485 self._map.copymap.pop(f, None)
480 self._map.copymap.pop(f, None)
486
481
487 def _discoverpath(self, path, normed, ignoremissing, exists, storemap):
482 def _discoverpath(self, path, normed, ignoremissing, exists, storemap):
488 if exists is None:
483 if exists is None:
489 exists = os.path.lexists(os.path.join(self._root, path))
484 exists = os.path.lexists(os.path.join(self._root, path))
490 if not exists:
485 if not exists:
491 # Maybe a path component exists
486 # Maybe a path component exists
492 if not ignoremissing and '/' in path:
487 if not ignoremissing and '/' in path:
493 d, f = path.rsplit('/', 1)
488 d, f = path.rsplit('/', 1)
494 d = self._normalize(d, False, ignoremissing, None)
489 d = self._normalize(d, False, ignoremissing, None)
495 folded = d + "/" + f
490 folded = d + "/" + f
496 else:
491 else:
497 # No path components, preserve original case
492 # No path components, preserve original case
498 folded = path
493 folded = path
499 else:
494 else:
500 # recursively normalize leading directory components
495 # recursively normalize leading directory components
501 # against dirstate
496 # against dirstate
502 if '/' in normed:
497 if '/' in normed:
503 d, f = normed.rsplit('/', 1)
498 d, f = normed.rsplit('/', 1)
504 d = self._normalize(d, False, ignoremissing, True)
499 d = self._normalize(d, False, ignoremissing, True)
505 r = self._root + "/" + d
500 r = self._root + "/" + d
506 folded = d + "/" + util.fspath(f, r)
501 folded = d + "/" + util.fspath(f, r)
507 else:
502 else:
508 folded = util.fspath(normed, self._root)
503 folded = util.fspath(normed, self._root)
509 storemap[normed] = folded
504 storemap[normed] = folded
510
505
511 return folded
506 return folded
512
507
513 def _normalizefile(self, path, isknown, ignoremissing=False, exists=None):
508 def _normalizefile(self, path, isknown, ignoremissing=False, exists=None):
514 normed = util.normcase(path)
509 normed = util.normcase(path)
515 folded = self._map.filefoldmap.get(normed, None)
510 folded = self._map.filefoldmap.get(normed, None)
516 if folded is None:
511 if folded is None:
517 if isknown:
512 if isknown:
518 folded = path
513 folded = path
519 else:
514 else:
520 folded = self._discoverpath(path, normed, ignoremissing, exists,
515 folded = self._discoverpath(path, normed, ignoremissing, exists,
521 self._map.filefoldmap)
516 self._map.filefoldmap)
522 return folded
517 return folded
523
518
524 def _normalize(self, path, isknown, ignoremissing=False, exists=None):
519 def _normalize(self, path, isknown, ignoremissing=False, exists=None):
525 normed = util.normcase(path)
520 normed = util.normcase(path)
526 folded = self._map.filefoldmap.get(normed, None)
521 folded = self._map.filefoldmap.get(normed, None)
527 if folded is None:
522 if folded is None:
528 folded = self._map.dirfoldmap.get(normed, None)
523 folded = self._map.dirfoldmap.get(normed, None)
529 if folded is None:
524 if folded is None:
530 if isknown:
525 if isknown:
531 folded = path
526 folded = path
532 else:
527 else:
533 # store discovered result in dirfoldmap so that future
528 # store discovered result in dirfoldmap so that future
534 # normalizefile calls don't start matching directories
529 # normalizefile calls don't start matching directories
535 folded = self._discoverpath(path, normed, ignoremissing, exists,
530 folded = self._discoverpath(path, normed, ignoremissing, exists,
536 self._map.dirfoldmap)
531 self._map.dirfoldmap)
537 return folded
532 return folded
538
533
539 def normalize(self, path, isknown=False, ignoremissing=False):
534 def normalize(self, path, isknown=False, ignoremissing=False):
540 '''
535 '''
541 normalize the case of a pathname when on a casefolding filesystem
536 normalize the case of a pathname when on a casefolding filesystem
542
537
543 isknown specifies whether the filename came from walking the
538 isknown specifies whether the filename came from walking the
544 disk, to avoid extra filesystem access.
539 disk, to avoid extra filesystem access.
545
540
546 If ignoremissing is True, missing path are returned
541 If ignoremissing is True, missing path are returned
547 unchanged. Otherwise, we try harder to normalize possibly
542 unchanged. Otherwise, we try harder to normalize possibly
548 existing path components.
543 existing path components.
549
544
550 The normalized case is determined based on the following precedence:
545 The normalized case is determined based on the following precedence:
551
546
552 - version of name already stored in the dirstate
547 - version of name already stored in the dirstate
553 - version of name stored on disk
548 - version of name stored on disk
554 - version provided via command arguments
549 - version provided via command arguments
555 '''
550 '''
556
551
557 if self._checkcase:
552 if self._checkcase:
558 return self._normalize(path, isknown, ignoremissing)
553 return self._normalize(path, isknown, ignoremissing)
559 return path
554 return path
560
555
561 def clear(self):
556 def clear(self):
562 self._map.clear()
557 self._map.clear()
563 self._lastnormaltime = 0
558 self._lastnormaltime = 0
564 self._updatedfiles.clear()
559 self._updatedfiles.clear()
565 self._dirty = True
560 self._dirty = True
566
561
567 def rebuild(self, parent, allfiles, changedfiles=None):
562 def rebuild(self, parent, allfiles, changedfiles=None):
568 if changedfiles is None:
563 if changedfiles is None:
569 # Rebuild entire dirstate
564 # Rebuild entire dirstate
570 changedfiles = allfiles
565 changedfiles = allfiles
571 lastnormaltime = self._lastnormaltime
566 lastnormaltime = self._lastnormaltime
572 self.clear()
567 self.clear()
573 self._lastnormaltime = lastnormaltime
568 self._lastnormaltime = lastnormaltime
574
569
575 if self._origpl is None:
570 if self._origpl is None:
576 self._origpl = self._pl
571 self._origpl = self._pl
577 self._map.setparents(parent, nullid)
572 self._map.setparents(parent, nullid)
578 for f in changedfiles:
573 for f in changedfiles:
579 if f in allfiles:
574 if f in allfiles:
580 self.normallookup(f)
575 self.normallookup(f)
581 else:
576 else:
582 self.drop(f)
577 self.drop(f)
583
578
584 self._dirty = True
579 self._dirty = True
585
580
586 def identity(self):
581 def identity(self):
587 '''Return identity of dirstate itself to detect changing in storage
582 '''Return identity of dirstate itself to detect changing in storage
588
583
589 If identity of previous dirstate is equal to this, writing
584 If identity of previous dirstate is equal to this, writing
590 changes based on the former dirstate out can keep consistency.
585 changes based on the former dirstate out can keep consistency.
591 '''
586 '''
592 return self._map.identity
587 return self._map.identity
593
588
594 def write(self, tr):
589 def write(self, tr):
595 if not self._dirty:
590 if not self._dirty:
596 return
591 return
597
592
598 filename = self._filename
593 filename = self._filename
599 if tr:
594 if tr:
600 # 'dirstate.write()' is not only for writing in-memory
595 # 'dirstate.write()' is not only for writing in-memory
601 # changes out, but also for dropping ambiguous timestamp.
596 # changes out, but also for dropping ambiguous timestamp.
602 # delayed writing re-raise "ambiguous timestamp issue".
597 # delayed writing re-raise "ambiguous timestamp issue".
603 # See also the wiki page below for detail:
598 # See also the wiki page below for detail:
604 # https://www.mercurial-scm.org/wiki/DirstateTransactionPlan
599 # https://www.mercurial-scm.org/wiki/DirstateTransactionPlan
605
600
606 # emulate dropping timestamp in 'parsers.pack_dirstate'
601 # emulate dropping timestamp in 'parsers.pack_dirstate'
607 now = _getfsnow(self._opener)
602 now = _getfsnow(self._opener)
608 self._map.clearambiguoustimes(self._updatedfiles, now)
603 self._map.clearambiguoustimes(self._updatedfiles, now)
609
604
610 # emulate that all 'dirstate.normal' results are written out
605 # emulate that all 'dirstate.normal' results are written out
611 self._lastnormaltime = 0
606 self._lastnormaltime = 0
612 self._updatedfiles.clear()
607 self._updatedfiles.clear()
613
608
614 # delay writing in-memory changes out
609 # delay writing in-memory changes out
615 tr.addfilegenerator('dirstate', (self._filename,),
610 tr.addfilegenerator('dirstate', (self._filename,),
616 self._writedirstate, location='plain')
611 self._writedirstate, location='plain')
617 return
612 return
618
613
619 st = self._opener(filename, "w", atomictemp=True, checkambig=True)
614 st = self._opener(filename, "w", atomictemp=True, checkambig=True)
620 self._writedirstate(st)
615 self._writedirstate(st)
621
616
622 def addparentchangecallback(self, category, callback):
617 def addparentchangecallback(self, category, callback):
623 """add a callback to be called when the wd parents are changed
618 """add a callback to be called when the wd parents are changed
624
619
625 Callback will be called with the following arguments:
620 Callback will be called with the following arguments:
626 dirstate, (oldp1, oldp2), (newp1, newp2)
621 dirstate, (oldp1, oldp2), (newp1, newp2)
627
622
628 Category is a unique identifier to allow overwriting an old callback
623 Category is a unique identifier to allow overwriting an old callback
629 with a newer callback.
624 with a newer callback.
630 """
625 """
631 self._plchangecallbacks[category] = callback
626 self._plchangecallbacks[category] = callback
632
627
633 def _writedirstate(self, st):
628 def _writedirstate(self, st):
634 # notify callbacks about parents change
629 # notify callbacks about parents change
635 if self._origpl is not None and self._origpl != self._pl:
630 if self._origpl is not None and self._origpl != self._pl:
636 for c, callback in sorted(self._plchangecallbacks.iteritems()):
631 for c, callback in sorted(self._plchangecallbacks.iteritems()):
637 callback(self, self._origpl, self._pl)
632 callback(self, self._origpl, self._pl)
638 self._origpl = None
633 self._origpl = None
639 # use the modification time of the newly created temporary file as the
634 # use the modification time of the newly created temporary file as the
640 # filesystem's notion of 'now'
635 # filesystem's notion of 'now'
641 now = util.fstat(st)[stat.ST_MTIME] & _rangemask
636 now = util.fstat(st)[stat.ST_MTIME] & _rangemask
642
637
643 # enough 'delaywrite' prevents 'pack_dirstate' from dropping
638 # enough 'delaywrite' prevents 'pack_dirstate' from dropping
644 # timestamp of each entries in dirstate, because of 'now > mtime'
639 # timestamp of each entries in dirstate, because of 'now > mtime'
645 delaywrite = self._ui.configint('debug', 'dirstate.delaywrite')
640 delaywrite = self._ui.configint('debug', 'dirstate.delaywrite')
646 if delaywrite > 0:
641 if delaywrite > 0:
647 # do we have any files to delay for?
642 # do we have any files to delay for?
648 for f, e in self._map.iteritems():
643 for f, e in self._map.iteritems():
649 if e[0] == 'n' and e[3] == now:
644 if e[0] == 'n' and e[3] == now:
650 import time # to avoid useless import
645 import time # to avoid useless import
651 # rather than sleep n seconds, sleep until the next
646 # rather than sleep n seconds, sleep until the next
652 # multiple of n seconds
647 # multiple of n seconds
653 clock = time.time()
648 clock = time.time()
654 start = int(clock) - (int(clock) % delaywrite)
649 start = int(clock) - (int(clock) % delaywrite)
655 end = start + delaywrite
650 end = start + delaywrite
656 time.sleep(end - clock)
651 time.sleep(end - clock)
657 now = end # trust our estimate that the end is near now
652 now = end # trust our estimate that the end is near now
658 break
653 break
659
654
660 self._map.write(st, now)
655 self._map.write(st, now)
661 self._lastnormaltime = 0
656 self._lastnormaltime = 0
662 self._dirty = False
657 self._dirty = False
663
658
664 def _dirignore(self, f):
659 def _dirignore(self, f):
665 if self._ignore(f):
660 if self._ignore(f):
666 return True
661 return True
667 for p in util.finddirs(f):
662 for p in util.finddirs(f):
668 if self._ignore(p):
663 if self._ignore(p):
669 return True
664 return True
670 return False
665 return False
671
666
672 def _ignorefiles(self):
667 def _ignorefiles(self):
673 files = []
668 files = []
674 if os.path.exists(self._join('.hgignore')):
669 if os.path.exists(self._join('.hgignore')):
675 files.append(self._join('.hgignore'))
670 files.append(self._join('.hgignore'))
676 for name, path in self._ui.configitems("ui"):
671 for name, path in self._ui.configitems("ui"):
677 if name == 'ignore' or name.startswith('ignore.'):
672 if name == 'ignore' or name.startswith('ignore.'):
678 # we need to use os.path.join here rather than self._join
673 # we need to use os.path.join here rather than self._join
679 # because path is arbitrary and user-specified
674 # because path is arbitrary and user-specified
680 files.append(os.path.join(self._rootdir, util.expandpath(path)))
675 files.append(os.path.join(self._rootdir, util.expandpath(path)))
681 return files
676 return files
682
677
683 def _ignorefileandline(self, f):
678 def _ignorefileandline(self, f):
684 files = collections.deque(self._ignorefiles())
679 files = collections.deque(self._ignorefiles())
685 visited = set()
680 visited = set()
686 while files:
681 while files:
687 i = files.popleft()
682 i = files.popleft()
688 patterns = matchmod.readpatternfile(i, self._ui.warn,
683 patterns = matchmod.readpatternfile(i, self._ui.warn,
689 sourceinfo=True)
684 sourceinfo=True)
690 for pattern, lineno, line in patterns:
685 for pattern, lineno, line in patterns:
691 kind, p = matchmod._patsplit(pattern, 'glob')
686 kind, p = matchmod._patsplit(pattern, 'glob')
692 if kind == "subinclude":
687 if kind == "subinclude":
693 if p not in visited:
688 if p not in visited:
694 files.append(p)
689 files.append(p)
695 continue
690 continue
696 m = matchmod.match(self._root, '', [], [pattern],
691 m = matchmod.match(self._root, '', [], [pattern],
697 warn=self._ui.warn)
692 warn=self._ui.warn)
698 if m(f):
693 if m(f):
699 return (i, lineno, line)
694 return (i, lineno, line)
700 visited.add(i)
695 visited.add(i)
701 return (None, -1, "")
696 return (None, -1, "")
702
697
703 def _walkexplicit(self, match, subrepos):
698 def _walkexplicit(self, match, subrepos):
704 '''Get stat data about the files explicitly specified by match.
699 '''Get stat data about the files explicitly specified by match.
705
700
706 Return a triple (results, dirsfound, dirsnotfound).
701 Return a triple (results, dirsfound, dirsnotfound).
707 - results is a mapping from filename to stat result. It also contains
702 - results is a mapping from filename to stat result. It also contains
708 listings mapping subrepos and .hg to None.
703 listings mapping subrepos and .hg to None.
709 - dirsfound is a list of files found to be directories.
704 - dirsfound is a list of files found to be directories.
710 - dirsnotfound is a list of files that the dirstate thinks are
705 - dirsnotfound is a list of files that the dirstate thinks are
711 directories and that were not found.'''
706 directories and that were not found.'''
712
707
713 def badtype(mode):
708 def badtype(mode):
714 kind = _('unknown')
709 kind = _('unknown')
715 if stat.S_ISCHR(mode):
710 if stat.S_ISCHR(mode):
716 kind = _('character device')
711 kind = _('character device')
717 elif stat.S_ISBLK(mode):
712 elif stat.S_ISBLK(mode):
718 kind = _('block device')
713 kind = _('block device')
719 elif stat.S_ISFIFO(mode):
714 elif stat.S_ISFIFO(mode):
720 kind = _('fifo')
715 kind = _('fifo')
721 elif stat.S_ISSOCK(mode):
716 elif stat.S_ISSOCK(mode):
722 kind = _('socket')
717 kind = _('socket')
723 elif stat.S_ISDIR(mode):
718 elif stat.S_ISDIR(mode):
724 kind = _('directory')
719 kind = _('directory')
725 return _('unsupported file type (type is %s)') % kind
720 return _('unsupported file type (type is %s)') % kind
726
721
727 matchedir = match.explicitdir
722 matchedir = match.explicitdir
728 badfn = match.bad
723 badfn = match.bad
729 dmap = self._map
724 dmap = self._map
730 lstat = os.lstat
725 lstat = os.lstat
731 getkind = stat.S_IFMT
726 getkind = stat.S_IFMT
732 dirkind = stat.S_IFDIR
727 dirkind = stat.S_IFDIR
733 regkind = stat.S_IFREG
728 regkind = stat.S_IFREG
734 lnkkind = stat.S_IFLNK
729 lnkkind = stat.S_IFLNK
735 join = self._join
730 join = self._join
736 dirsfound = []
731 dirsfound = []
737 foundadd = dirsfound.append
732 foundadd = dirsfound.append
738 dirsnotfound = []
733 dirsnotfound = []
739 notfoundadd = dirsnotfound.append
734 notfoundadd = dirsnotfound.append
740
735
741 if not match.isexact() and self._checkcase:
736 if not match.isexact() and self._checkcase:
742 normalize = self._normalize
737 normalize = self._normalize
743 else:
738 else:
744 normalize = None
739 normalize = None
745
740
746 files = sorted(match.files())
741 files = sorted(match.files())
747 subrepos.sort()
742 subrepos.sort()
748 i, j = 0, 0
743 i, j = 0, 0
749 while i < len(files) and j < len(subrepos):
744 while i < len(files) and j < len(subrepos):
750 subpath = subrepos[j] + "/"
745 subpath = subrepos[j] + "/"
751 if files[i] < subpath:
746 if files[i] < subpath:
752 i += 1
747 i += 1
753 continue
748 continue
754 while i < len(files) and files[i].startswith(subpath):
749 while i < len(files) and files[i].startswith(subpath):
755 del files[i]
750 del files[i]
756 j += 1
751 j += 1
757
752
758 if not files or '' in files:
753 if not files or '' in files:
759 files = ['']
754 files = ['']
760 # constructing the foldmap is expensive, so don't do it for the
755 # constructing the foldmap is expensive, so don't do it for the
761 # common case where files is ['']
756 # common case where files is ['']
762 normalize = None
757 normalize = None
763 results = dict.fromkeys(subrepos)
758 results = dict.fromkeys(subrepos)
764 results['.hg'] = None
759 results['.hg'] = None
765
760
766 for ff in files:
761 for ff in files:
767 if normalize:
762 if normalize:
768 nf = normalize(ff, False, True)
763 nf = normalize(ff, False, True)
769 else:
764 else:
770 nf = ff
765 nf = ff
771 if nf in results:
766 if nf in results:
772 continue
767 continue
773
768
774 try:
769 try:
775 st = lstat(join(nf))
770 st = lstat(join(nf))
776 kind = getkind(st.st_mode)
771 kind = getkind(st.st_mode)
777 if kind == dirkind:
772 if kind == dirkind:
778 if nf in dmap:
773 if nf in dmap:
779 # file replaced by dir on disk but still in dirstate
774 # file replaced by dir on disk but still in dirstate
780 results[nf] = None
775 results[nf] = None
781 if matchedir:
776 if matchedir:
782 matchedir(nf)
777 matchedir(nf)
783 foundadd((nf, ff))
778 foundadd((nf, ff))
784 elif kind == regkind or kind == lnkkind:
779 elif kind == regkind or kind == lnkkind:
785 results[nf] = st
780 results[nf] = st
786 else:
781 else:
787 badfn(ff, badtype(kind))
782 badfn(ff, badtype(kind))
788 if nf in dmap:
783 if nf in dmap:
789 results[nf] = None
784 results[nf] = None
790 except OSError as inst: # nf not found on disk - it is dirstate only
785 except OSError as inst: # nf not found on disk - it is dirstate only
791 if nf in dmap: # does it exactly match a missing file?
786 if nf in dmap: # does it exactly match a missing file?
792 results[nf] = None
787 results[nf] = None
793 else: # does it match a missing directory?
788 else: # does it match a missing directory?
794 if self._map.hasdir(nf):
789 if self._map.hasdir(nf):
795 if matchedir:
790 if matchedir:
796 matchedir(nf)
791 matchedir(nf)
797 notfoundadd(nf)
792 notfoundadd(nf)
798 else:
793 else:
799 badfn(ff, encoding.strtolocal(inst.strerror))
794 badfn(ff, encoding.strtolocal(inst.strerror))
800
795
801 # match.files() may contain explicitly-specified paths that shouldn't
796 # match.files() may contain explicitly-specified paths that shouldn't
802 # be taken; drop them from the list of files found. dirsfound/notfound
797 # be taken; drop them from the list of files found. dirsfound/notfound
803 # aren't filtered here because they will be tested later.
798 # aren't filtered here because they will be tested later.
804 if match.anypats():
799 if match.anypats():
805 for f in list(results):
800 for f in list(results):
806 if f == '.hg' or f in subrepos:
801 if f == '.hg' or f in subrepos:
807 # keep sentinel to disable further out-of-repo walks
802 # keep sentinel to disable further out-of-repo walks
808 continue
803 continue
809 if not match(f):
804 if not match(f):
810 del results[f]
805 del results[f]
811
806
812 # Case insensitive filesystems cannot rely on lstat() failing to detect
807 # Case insensitive filesystems cannot rely on lstat() failing to detect
813 # a case-only rename. Prune the stat object for any file that does not
808 # a case-only rename. Prune the stat object for any file that does not
814 # match the case in the filesystem, if there are multiple files that
809 # match the case in the filesystem, if there are multiple files that
815 # normalize to the same path.
810 # normalize to the same path.
816 if match.isexact() and self._checkcase:
811 if match.isexact() and self._checkcase:
817 normed = {}
812 normed = {}
818
813
819 for f, st in results.iteritems():
814 for f, st in results.iteritems():
820 if st is None:
815 if st is None:
821 continue
816 continue
822
817
823 nc = util.normcase(f)
818 nc = util.normcase(f)
824 paths = normed.get(nc)
819 paths = normed.get(nc)
825
820
826 if paths is None:
821 if paths is None:
827 paths = set()
822 paths = set()
828 normed[nc] = paths
823 normed[nc] = paths
829
824
830 paths.add(f)
825 paths.add(f)
831
826
832 for norm, paths in normed.iteritems():
827 for norm, paths in normed.iteritems():
833 if len(paths) > 1:
828 if len(paths) > 1:
834 for path in paths:
829 for path in paths:
835 folded = self._discoverpath(path, norm, True, None,
830 folded = self._discoverpath(path, norm, True, None,
836 self._map.dirfoldmap)
831 self._map.dirfoldmap)
837 if path != folded:
832 if path != folded:
838 results[path] = None
833 results[path] = None
839
834
840 return results, dirsfound, dirsnotfound
835 return results, dirsfound, dirsnotfound
841
836
842 def walk(self, match, subrepos, unknown, ignored, full=True):
837 def walk(self, match, subrepos, unknown, ignored, full=True):
843 '''
838 '''
844 Walk recursively through the directory tree, finding all files
839 Walk recursively through the directory tree, finding all files
845 matched by match.
840 matched by match.
846
841
847 If full is False, maybe skip some known-clean files.
842 If full is False, maybe skip some known-clean files.
848
843
849 Return a dict mapping filename to stat-like object (either
844 Return a dict mapping filename to stat-like object (either
850 mercurial.osutil.stat instance or return value of os.stat()).
845 mercurial.osutil.stat instance or return value of os.stat()).
851
846
852 '''
847 '''
853 # full is a flag that extensions that hook into walk can use -- this
848 # full is a flag that extensions that hook into walk can use -- this
854 # implementation doesn't use it at all. This satisfies the contract
849 # implementation doesn't use it at all. This satisfies the contract
855 # because we only guarantee a "maybe".
850 # because we only guarantee a "maybe".
856
851
857 if ignored:
852 if ignored:
858 ignore = util.never
853 ignore = util.never
859 dirignore = util.never
854 dirignore = util.never
860 elif unknown:
855 elif unknown:
861 ignore = self._ignore
856 ignore = self._ignore
862 dirignore = self._dirignore
857 dirignore = self._dirignore
863 else:
858 else:
864 # if not unknown and not ignored, drop dir recursion and step 2
859 # if not unknown and not ignored, drop dir recursion and step 2
865 ignore = util.always
860 ignore = util.always
866 dirignore = util.always
861 dirignore = util.always
867
862
868 matchfn = match.matchfn
863 matchfn = match.matchfn
869 matchalways = match.always()
864 matchalways = match.always()
870 matchtdir = match.traversedir
865 matchtdir = match.traversedir
871 dmap = self._map
866 dmap = self._map
872 listdir = util.listdir
867 listdir = util.listdir
873 lstat = os.lstat
868 lstat = os.lstat
874 dirkind = stat.S_IFDIR
869 dirkind = stat.S_IFDIR
875 regkind = stat.S_IFREG
870 regkind = stat.S_IFREG
876 lnkkind = stat.S_IFLNK
871 lnkkind = stat.S_IFLNK
877 join = self._join
872 join = self._join
878
873
879 exact = skipstep3 = False
874 exact = skipstep3 = False
880 if match.isexact(): # match.exact
875 if match.isexact(): # match.exact
881 exact = True
876 exact = True
882 dirignore = util.always # skip step 2
877 dirignore = util.always # skip step 2
883 elif match.prefix(): # match.match, no patterns
878 elif match.prefix(): # match.match, no patterns
884 skipstep3 = True
879 skipstep3 = True
885
880
886 if not exact and self._checkcase:
881 if not exact and self._checkcase:
887 normalize = self._normalize
882 normalize = self._normalize
888 normalizefile = self._normalizefile
883 normalizefile = self._normalizefile
889 skipstep3 = False
884 skipstep3 = False
890 else:
885 else:
891 normalize = self._normalize
886 normalize = self._normalize
892 normalizefile = None
887 normalizefile = None
893
888
894 # step 1: find all explicit files
889 # step 1: find all explicit files
895 results, work, dirsnotfound = self._walkexplicit(match, subrepos)
890 results, work, dirsnotfound = self._walkexplicit(match, subrepos)
896
891
897 skipstep3 = skipstep3 and not (work or dirsnotfound)
892 skipstep3 = skipstep3 and not (work or dirsnotfound)
898 work = [d for d in work if not dirignore(d[0])]
893 work = [d for d in work if not dirignore(d[0])]
899
894
900 # step 2: visit subdirectories
895 # step 2: visit subdirectories
901 def traverse(work, alreadynormed):
896 def traverse(work, alreadynormed):
902 wadd = work.append
897 wadd = work.append
903 while work:
898 while work:
904 nd = work.pop()
899 nd = work.pop()
905 visitentries = match.visitchildrenset(nd)
900 visitentries = match.visitchildrenset(nd)
906 if not visitentries:
901 if not visitentries:
907 continue
902 continue
908 if visitentries == 'this' or visitentries == 'all':
903 if visitentries == 'this' or visitentries == 'all':
909 visitentries = None
904 visitentries = None
910 skip = None
905 skip = None
911 if nd != '':
906 if nd != '':
912 skip = '.hg'
907 skip = '.hg'
913 try:
908 try:
914 entries = listdir(join(nd), stat=True, skip=skip)
909 entries = listdir(join(nd), stat=True, skip=skip)
915 except OSError as inst:
910 except OSError as inst:
916 if inst.errno in (errno.EACCES, errno.ENOENT):
911 if inst.errno in (errno.EACCES, errno.ENOENT):
917 match.bad(self.pathto(nd),
912 match.bad(self.pathto(nd),
918 encoding.strtolocal(inst.strerror))
913 encoding.strtolocal(inst.strerror))
919 continue
914 continue
920 raise
915 raise
921 for f, kind, st in entries:
916 for f, kind, st in entries:
922 # Some matchers may return files in the visitentries set,
917 # Some matchers may return files in the visitentries set,
923 # instead of 'this', if the matcher explicitly mentions them
918 # instead of 'this', if the matcher explicitly mentions them
924 # and is not an exactmatcher. This is acceptable; we do not
919 # and is not an exactmatcher. This is acceptable; we do not
925 # make any hard assumptions about file-or-directory below
920 # make any hard assumptions about file-or-directory below
926 # based on the presence of `f` in visitentries. If
921 # based on the presence of `f` in visitentries. If
927 # visitchildrenset returned a set, we can always skip the
922 # visitchildrenset returned a set, we can always skip the
928 # entries *not* in the set it provided regardless of whether
923 # entries *not* in the set it provided regardless of whether
929 # they're actually a file or a directory.
924 # they're actually a file or a directory.
930 if visitentries and f not in visitentries:
925 if visitentries and f not in visitentries:
931 continue
926 continue
932 if normalizefile:
927 if normalizefile:
933 # even though f might be a directory, we're only
928 # even though f might be a directory, we're only
934 # interested in comparing it to files currently in the
929 # interested in comparing it to files currently in the
935 # dmap -- therefore normalizefile is enough
930 # dmap -- therefore normalizefile is enough
936 nf = normalizefile(nd and (nd + "/" + f) or f, True,
931 nf = normalizefile(nd and (nd + "/" + f) or f, True,
937 True)
932 True)
938 else:
933 else:
939 nf = nd and (nd + "/" + f) or f
934 nf = nd and (nd + "/" + f) or f
940 if nf not in results:
935 if nf not in results:
941 if kind == dirkind:
936 if kind == dirkind:
942 if not ignore(nf):
937 if not ignore(nf):
943 if matchtdir:
938 if matchtdir:
944 matchtdir(nf)
939 matchtdir(nf)
945 wadd(nf)
940 wadd(nf)
946 if nf in dmap and (matchalways or matchfn(nf)):
941 if nf in dmap and (matchalways or matchfn(nf)):
947 results[nf] = None
942 results[nf] = None
948 elif kind == regkind or kind == lnkkind:
943 elif kind == regkind or kind == lnkkind:
949 if nf in dmap:
944 if nf in dmap:
950 if matchalways or matchfn(nf):
945 if matchalways or matchfn(nf):
951 results[nf] = st
946 results[nf] = st
952 elif ((matchalways or matchfn(nf))
947 elif ((matchalways or matchfn(nf))
953 and not ignore(nf)):
948 and not ignore(nf)):
954 # unknown file -- normalize if necessary
949 # unknown file -- normalize if necessary
955 if not alreadynormed:
950 if not alreadynormed:
956 nf = normalize(nf, False, True)
951 nf = normalize(nf, False, True)
957 results[nf] = st
952 results[nf] = st
958 elif nf in dmap and (matchalways or matchfn(nf)):
953 elif nf in dmap and (matchalways or matchfn(nf)):
959 results[nf] = None
954 results[nf] = None
960
955
961 for nd, d in work:
956 for nd, d in work:
962 # alreadynormed means that processwork doesn't have to do any
957 # alreadynormed means that processwork doesn't have to do any
963 # expensive directory normalization
958 # expensive directory normalization
964 alreadynormed = not normalize or nd == d
959 alreadynormed = not normalize or nd == d
965 traverse([d], alreadynormed)
960 traverse([d], alreadynormed)
966
961
967 for s in subrepos:
962 for s in subrepos:
968 del results[s]
963 del results[s]
969 del results['.hg']
964 del results['.hg']
970
965
971 # step 3: visit remaining files from dmap
966 # step 3: visit remaining files from dmap
972 if not skipstep3 and not exact:
967 if not skipstep3 and not exact:
973 # If a dmap file is not in results yet, it was either
968 # If a dmap file is not in results yet, it was either
974 # a) not matching matchfn b) ignored, c) missing, or d) under a
969 # a) not matching matchfn b) ignored, c) missing, or d) under a
975 # symlink directory.
970 # symlink directory.
976 if not results and matchalways:
971 if not results and matchalways:
977 visit = [f for f in dmap]
972 visit = [f for f in dmap]
978 else:
973 else:
979 visit = [f for f in dmap if f not in results and matchfn(f)]
974 visit = [f for f in dmap if f not in results and matchfn(f)]
980 visit.sort()
975 visit.sort()
981
976
982 if unknown:
977 if unknown:
983 # unknown == True means we walked all dirs under the roots
978 # unknown == True means we walked all dirs under the roots
984 # that wasn't ignored, and everything that matched was stat'ed
979 # that wasn't ignored, and everything that matched was stat'ed
985 # and is already in results.
980 # and is already in results.
986 # The rest must thus be ignored or under a symlink.
981 # The rest must thus be ignored or under a symlink.
987 audit_path = pathutil.pathauditor(self._root, cached=True)
982 audit_path = pathutil.pathauditor(self._root, cached=True)
988
983
989 for nf in iter(visit):
984 for nf in iter(visit):
990 # If a stat for the same file was already added with a
985 # If a stat for the same file was already added with a
991 # different case, don't add one for this, since that would
986 # different case, don't add one for this, since that would
992 # make it appear as if the file exists under both names
987 # make it appear as if the file exists under both names
993 # on disk.
988 # on disk.
994 if (normalizefile and
989 if (normalizefile and
995 normalizefile(nf, True, True) in results):
990 normalizefile(nf, True, True) in results):
996 results[nf] = None
991 results[nf] = None
997 # Report ignored items in the dmap as long as they are not
992 # Report ignored items in the dmap as long as they are not
998 # under a symlink directory.
993 # under a symlink directory.
999 elif audit_path.check(nf):
994 elif audit_path.check(nf):
1000 try:
995 try:
1001 results[nf] = lstat(join(nf))
996 results[nf] = lstat(join(nf))
1002 # file was just ignored, no links, and exists
997 # file was just ignored, no links, and exists
1003 except OSError:
998 except OSError:
1004 # file doesn't exist
999 # file doesn't exist
1005 results[nf] = None
1000 results[nf] = None
1006 else:
1001 else:
1007 # It's either missing or under a symlink directory
1002 # It's either missing or under a symlink directory
1008 # which we in this case report as missing
1003 # which we in this case report as missing
1009 results[nf] = None
1004 results[nf] = None
1010 else:
1005 else:
1011 # We may not have walked the full directory tree above,
1006 # We may not have walked the full directory tree above,
1012 # so stat and check everything we missed.
1007 # so stat and check everything we missed.
1013 iv = iter(visit)
1008 iv = iter(visit)
1014 for st in util.statfiles([join(i) for i in visit]):
1009 for st in util.statfiles([join(i) for i in visit]):
1015 results[next(iv)] = st
1010 results[next(iv)] = st
1016 return results
1011 return results
1017
1012
1018 def status(self, match, subrepos, ignored, clean, unknown):
1013 def status(self, match, subrepos, ignored, clean, unknown):
1019 '''Determine the status of the working copy relative to the
1014 '''Determine the status of the working copy relative to the
1020 dirstate and return a pair of (unsure, status), where status is of type
1015 dirstate and return a pair of (unsure, status), where status is of type
1021 scmutil.status and:
1016 scmutil.status and:
1022
1017
1023 unsure:
1018 unsure:
1024 files that might have been modified since the dirstate was
1019 files that might have been modified since the dirstate was
1025 written, but need to be read to be sure (size is the same
1020 written, but need to be read to be sure (size is the same
1026 but mtime differs)
1021 but mtime differs)
1027 status.modified:
1022 status.modified:
1028 files that have definitely been modified since the dirstate
1023 files that have definitely been modified since the dirstate
1029 was written (different size or mode)
1024 was written (different size or mode)
1030 status.clean:
1025 status.clean:
1031 files that have definitely not been modified since the
1026 files that have definitely not been modified since the
1032 dirstate was written
1027 dirstate was written
1033 '''
1028 '''
1034 listignored, listclean, listunknown = ignored, clean, unknown
1029 listignored, listclean, listunknown = ignored, clean, unknown
1035 lookup, modified, added, unknown, ignored = [], [], [], [], []
1030 lookup, modified, added, unknown, ignored = [], [], [], [], []
1036 removed, deleted, clean = [], [], []
1031 removed, deleted, clean = [], [], []
1037
1032
1038 dmap = self._map
1033 dmap = self._map
1039 dmap.preload()
1034 dmap.preload()
1040 dcontains = dmap.__contains__
1035 dcontains = dmap.__contains__
1041 dget = dmap.__getitem__
1036 dget = dmap.__getitem__
1042 ladd = lookup.append # aka "unsure"
1037 ladd = lookup.append # aka "unsure"
1043 madd = modified.append
1038 madd = modified.append
1044 aadd = added.append
1039 aadd = added.append
1045 uadd = unknown.append
1040 uadd = unknown.append
1046 iadd = ignored.append
1041 iadd = ignored.append
1047 radd = removed.append
1042 radd = removed.append
1048 dadd = deleted.append
1043 dadd = deleted.append
1049 cadd = clean.append
1044 cadd = clean.append
1050 mexact = match.exact
1045 mexact = match.exact
1051 dirignore = self._dirignore
1046 dirignore = self._dirignore
1052 checkexec = self._checkexec
1047 checkexec = self._checkexec
1053 copymap = self._map.copymap
1048 copymap = self._map.copymap
1054 lastnormaltime = self._lastnormaltime
1049 lastnormaltime = self._lastnormaltime
1055
1050
1056 # We need to do full walks when either
1051 # We need to do full walks when either
1057 # - we're listing all clean files, or
1052 # - we're listing all clean files, or
1058 # - match.traversedir does something, because match.traversedir should
1053 # - match.traversedir does something, because match.traversedir should
1059 # be called for every dir in the working dir
1054 # be called for every dir in the working dir
1060 full = listclean or match.traversedir is not None
1055 full = listclean or match.traversedir is not None
1061 for fn, st in self.walk(match, subrepos, listunknown, listignored,
1056 for fn, st in self.walk(match, subrepos, listunknown, listignored,
1062 full=full).iteritems():
1057 full=full).iteritems():
1063 if not dcontains(fn):
1058 if not dcontains(fn):
1064 if (listignored or mexact(fn)) and dirignore(fn):
1059 if (listignored or mexact(fn)) and dirignore(fn):
1065 if listignored:
1060 if listignored:
1066 iadd(fn)
1061 iadd(fn)
1067 else:
1062 else:
1068 uadd(fn)
1063 uadd(fn)
1069 continue
1064 continue
1070
1065
1071 # This is equivalent to 'state, mode, size, time = dmap[fn]' but not
1066 # This is equivalent to 'state, mode, size, time = dmap[fn]' but not
1072 # written like that for performance reasons. dmap[fn] is not a
1067 # written like that for performance reasons. dmap[fn] is not a
1073 # Python tuple in compiled builds. The CPython UNPACK_SEQUENCE
1068 # Python tuple in compiled builds. The CPython UNPACK_SEQUENCE
1074 # opcode has fast paths when the value to be unpacked is a tuple or
1069 # opcode has fast paths when the value to be unpacked is a tuple or
1075 # a list, but falls back to creating a full-fledged iterator in
1070 # a list, but falls back to creating a full-fledged iterator in
1076 # general. That is much slower than simply accessing and storing the
1071 # general. That is much slower than simply accessing and storing the
1077 # tuple members one by one.
1072 # tuple members one by one.
1078 t = dget(fn)
1073 t = dget(fn)
1079 state = t[0]
1074 state = t[0]
1080 mode = t[1]
1075 mode = t[1]
1081 size = t[2]
1076 size = t[2]
1082 time = t[3]
1077 time = t[3]
1083
1078
1084 if not st and state in "nma":
1079 if not st and state in "nma":
1085 dadd(fn)
1080 dadd(fn)
1086 elif state == 'n':
1081 elif state == 'n':
1087 if (size >= 0 and
1082 if (size >= 0 and
1088 ((size != st.st_size and size != st.st_size & _rangemask)
1083 ((size != st.st_size and size != st.st_size & _rangemask)
1089 or ((mode ^ st.st_mode) & 0o100 and checkexec))
1084 or ((mode ^ st.st_mode) & 0o100 and checkexec))
1090 or size == -2 # other parent
1085 or size == -2 # other parent
1091 or fn in copymap):
1086 or fn in copymap):
1092 madd(fn)
1087 madd(fn)
1093 elif (time != st[stat.ST_MTIME]
1088 elif (time != st[stat.ST_MTIME]
1094 and time != st[stat.ST_MTIME] & _rangemask):
1089 and time != st[stat.ST_MTIME] & _rangemask):
1095 ladd(fn)
1090 ladd(fn)
1096 elif st[stat.ST_MTIME] == lastnormaltime:
1091 elif st[stat.ST_MTIME] == lastnormaltime:
1097 # fn may have just been marked as normal and it may have
1092 # fn may have just been marked as normal and it may have
1098 # changed in the same second without changing its size.
1093 # changed in the same second without changing its size.
1099 # This can happen if we quickly do multiple commits.
1094 # This can happen if we quickly do multiple commits.
1100 # Force lookup, so we don't miss such a racy file change.
1095 # Force lookup, so we don't miss such a racy file change.
1101 ladd(fn)
1096 ladd(fn)
1102 elif listclean:
1097 elif listclean:
1103 cadd(fn)
1098 cadd(fn)
1104 elif state == 'm':
1099 elif state == 'm':
1105 madd(fn)
1100 madd(fn)
1106 elif state == 'a':
1101 elif state == 'a':
1107 aadd(fn)
1102 aadd(fn)
1108 elif state == 'r':
1103 elif state == 'r':
1109 radd(fn)
1104 radd(fn)
1110
1105
1111 return (lookup, scmutil.status(modified, added, removed, deleted,
1106 return (lookup, scmutil.status(modified, added, removed, deleted,
1112 unknown, ignored, clean))
1107 unknown, ignored, clean))
1113
1108
1114 def matches(self, match):
1109 def matches(self, match):
1115 '''
1110 '''
1116 return files in the dirstate (in whatever state) filtered by match
1111 return files in the dirstate (in whatever state) filtered by match
1117 '''
1112 '''
1118 dmap = self._map
1113 dmap = self._map
1119 if match.always():
1114 if match.always():
1120 return dmap.keys()
1115 return dmap.keys()
1121 files = match.files()
1116 files = match.files()
1122 if match.isexact():
1117 if match.isexact():
1123 # fast path -- filter the other way around, since typically files is
1118 # fast path -- filter the other way around, since typically files is
1124 # much smaller than dmap
1119 # much smaller than dmap
1125 return [f for f in files if f in dmap]
1120 return [f for f in files if f in dmap]
1126 if match.prefix() and all(fn in dmap for fn in files):
1121 if match.prefix() and all(fn in dmap for fn in files):
1127 # fast path -- all the values are known to be files, so just return
1122 # fast path -- all the values are known to be files, so just return
1128 # that
1123 # that
1129 return list(files)
1124 return list(files)
1130 return [f for f in dmap if match(f)]
1125 return [f for f in dmap if match(f)]
1131
1126
1132 def _actualfilename(self, tr):
1127 def _actualfilename(self, tr):
1133 if tr:
1128 if tr:
1134 return self._pendingfilename
1129 return self._pendingfilename
1135 else:
1130 else:
1136 return self._filename
1131 return self._filename
1137
1132
1138 def savebackup(self, tr, backupname):
1133 def savebackup(self, tr, backupname):
1139 '''Save current dirstate into backup file'''
1134 '''Save current dirstate into backup file'''
1140 filename = self._actualfilename(tr)
1135 filename = self._actualfilename(tr)
1141 assert backupname != filename
1136 assert backupname != filename
1142
1137
1143 # use '_writedirstate' instead of 'write' to write changes certainly,
1138 # use '_writedirstate' instead of 'write' to write changes certainly,
1144 # because the latter omits writing out if transaction is running.
1139 # because the latter omits writing out if transaction is running.
1145 # output file will be used to create backup of dirstate at this point.
1140 # output file will be used to create backup of dirstate at this point.
1146 if self._dirty or not self._opener.exists(filename):
1141 if self._dirty or not self._opener.exists(filename):
1147 self._writedirstate(self._opener(filename, "w", atomictemp=True,
1142 self._writedirstate(self._opener(filename, "w", atomictemp=True,
1148 checkambig=True))
1143 checkambig=True))
1149
1144
1150 if tr:
1145 if tr:
1151 # ensure that subsequent tr.writepending returns True for
1146 # ensure that subsequent tr.writepending returns True for
1152 # changes written out above, even if dirstate is never
1147 # changes written out above, even if dirstate is never
1153 # changed after this
1148 # changed after this
1154 tr.addfilegenerator('dirstate', (self._filename,),
1149 tr.addfilegenerator('dirstate', (self._filename,),
1155 self._writedirstate, location='plain')
1150 self._writedirstate, location='plain')
1156
1151
1157 # ensure that pending file written above is unlinked at
1152 # ensure that pending file written above is unlinked at
1158 # failure, even if tr.writepending isn't invoked until the
1153 # failure, even if tr.writepending isn't invoked until the
1159 # end of this transaction
1154 # end of this transaction
1160 tr.registertmp(filename, location='plain')
1155 tr.registertmp(filename, location='plain')
1161
1156
1162 self._opener.tryunlink(backupname)
1157 self._opener.tryunlink(backupname)
1163 # hardlink backup is okay because _writedirstate is always called
1158 # hardlink backup is okay because _writedirstate is always called
1164 # with an "atomictemp=True" file.
1159 # with an "atomictemp=True" file.
1165 util.copyfile(self._opener.join(filename),
1160 util.copyfile(self._opener.join(filename),
1166 self._opener.join(backupname), hardlink=True)
1161 self._opener.join(backupname), hardlink=True)
1167
1162
1168 def restorebackup(self, tr, backupname):
1163 def restorebackup(self, tr, backupname):
1169 '''Restore dirstate by backup file'''
1164 '''Restore dirstate by backup file'''
1170 # this "invalidate()" prevents "wlock.release()" from writing
1165 # this "invalidate()" prevents "wlock.release()" from writing
1171 # changes of dirstate out after restoring from backup file
1166 # changes of dirstate out after restoring from backup file
1172 self.invalidate()
1167 self.invalidate()
1173 filename = self._actualfilename(tr)
1168 filename = self._actualfilename(tr)
1174 o = self._opener
1169 o = self._opener
1175 if util.samefile(o.join(backupname), o.join(filename)):
1170 if util.samefile(o.join(backupname), o.join(filename)):
1176 o.unlink(backupname)
1171 o.unlink(backupname)
1177 else:
1172 else:
1178 o.rename(backupname, filename, checkambig=True)
1173 o.rename(backupname, filename, checkambig=True)
1179
1174
1180 def clearbackup(self, tr, backupname):
1175 def clearbackup(self, tr, backupname):
1181 '''Clear backup file'''
1176 '''Clear backup file'''
1182 self._opener.unlink(backupname)
1177 self._opener.unlink(backupname)
1183
1178
1184 class dirstatemap(object):
1179 class dirstatemap(object):
1185 """Map encapsulating the dirstate's contents.
1180 """Map encapsulating the dirstate's contents.
1186
1181
1187 The dirstate contains the following state:
1182 The dirstate contains the following state:
1188
1183
1189 - `identity` is the identity of the dirstate file, which can be used to
1184 - `identity` is the identity of the dirstate file, which can be used to
1190 detect when changes have occurred to the dirstate file.
1185 detect when changes have occurred to the dirstate file.
1191
1186
1192 - `parents` is a pair containing the parents of the working copy. The
1187 - `parents` is a pair containing the parents of the working copy. The
1193 parents are updated by calling `setparents`.
1188 parents are updated by calling `setparents`.
1194
1189
1195 - the state map maps filenames to tuples of (state, mode, size, mtime),
1190 - the state map maps filenames to tuples of (state, mode, size, mtime),
1196 where state is a single character representing 'normal', 'added',
1191 where state is a single character representing 'normal', 'added',
1197 'removed', or 'merged'. It is read by treating the dirstate as a
1192 'removed', or 'merged'. It is read by treating the dirstate as a
1198 dict. File state is updated by calling the `addfile`, `removefile` and
1193 dict. File state is updated by calling the `addfile`, `removefile` and
1199 `dropfile` methods.
1194 `dropfile` methods.
1200
1195
1201 - `copymap` maps destination filenames to their source filename.
1196 - `copymap` maps destination filenames to their source filename.
1202
1197
1203 The dirstate also provides the following views onto the state:
1198 The dirstate also provides the following views onto the state:
1204
1199
1205 - `nonnormalset` is a set of the filenames that have state other
1200 - `nonnormalset` is a set of the filenames that have state other
1206 than 'normal', or are normal but have an mtime of -1 ('normallookup').
1201 than 'normal', or are normal but have an mtime of -1 ('normallookup').
1207
1202
1208 - `otherparentset` is a set of the filenames that are marked as coming
1203 - `otherparentset` is a set of the filenames that are marked as coming
1209 from the second parent when the dirstate is currently being merged.
1204 from the second parent when the dirstate is currently being merged.
1210
1205
1211 - `filefoldmap` is a dict mapping normalized filenames to the denormalized
1206 - `filefoldmap` is a dict mapping normalized filenames to the denormalized
1212 form that they appear as in the dirstate.
1207 form that they appear as in the dirstate.
1213
1208
1214 - `dirfoldmap` is a dict mapping normalized directory names to the
1209 - `dirfoldmap` is a dict mapping normalized directory names to the
1215 denormalized form that they appear as in the dirstate.
1210 denormalized form that they appear as in the dirstate.
1216 """
1211 """
1217
1212
1218 def __init__(self, ui, opener, root):
1213 def __init__(self, ui, opener, root):
1219 self._ui = ui
1214 self._ui = ui
1220 self._opener = opener
1215 self._opener = opener
1221 self._root = root
1216 self._root = root
1222 self._filename = 'dirstate'
1217 self._filename = 'dirstate'
1223
1218
1224 self._parents = None
1219 self._parents = None
1225 self._dirtyparents = False
1220 self._dirtyparents = False
1226
1221
1227 # for consistent view between _pl() and _read() invocations
1222 # for consistent view between _pl() and _read() invocations
1228 self._pendingmode = None
1223 self._pendingmode = None
1229
1224
1230 @propertycache
1225 @propertycache
1231 def _map(self):
1226 def _map(self):
1232 self._map = {}
1227 self._map = {}
1233 self.read()
1228 self.read()
1234 return self._map
1229 return self._map
1235
1230
1236 @propertycache
1231 @propertycache
1237 def copymap(self):
1232 def copymap(self):
1238 self.copymap = {}
1233 self.copymap = {}
1239 self._map
1234 self._map
1240 return self.copymap
1235 return self.copymap
1241
1236
1242 def clear(self):
1237 def clear(self):
1243 self._map.clear()
1238 self._map.clear()
1244 self.copymap.clear()
1239 self.copymap.clear()
1245 self.setparents(nullid, nullid)
1240 self.setparents(nullid, nullid)
1246 util.clearcachedproperty(self, "_dirs")
1241 util.clearcachedproperty(self, "_dirs")
1247 util.clearcachedproperty(self, "_alldirs")
1242 util.clearcachedproperty(self, "_alldirs")
1248 util.clearcachedproperty(self, "filefoldmap")
1243 util.clearcachedproperty(self, "filefoldmap")
1249 util.clearcachedproperty(self, "dirfoldmap")
1244 util.clearcachedproperty(self, "dirfoldmap")
1250 util.clearcachedproperty(self, "nonnormalset")
1245 util.clearcachedproperty(self, "nonnormalset")
1251 util.clearcachedproperty(self, "otherparentset")
1246 util.clearcachedproperty(self, "otherparentset")
1252
1247
1253 def items(self):
1248 def items(self):
1254 return self._map.iteritems()
1249 return self._map.iteritems()
1255
1250
1256 # forward for python2,3 compat
1251 # forward for python2,3 compat
1257 iteritems = items
1252 iteritems = items
1258
1253
1259 def __len__(self):
1254 def __len__(self):
1260 return len(self._map)
1255 return len(self._map)
1261
1256
1262 def __iter__(self):
1257 def __iter__(self):
1263 return iter(self._map)
1258 return iter(self._map)
1264
1259
1265 def get(self, key, default=None):
1260 def get(self, key, default=None):
1266 return self._map.get(key, default)
1261 return self._map.get(key, default)
1267
1262
1268 def __contains__(self, key):
1263 def __contains__(self, key):
1269 return key in self._map
1264 return key in self._map
1270
1265
1271 def __getitem__(self, key):
1266 def __getitem__(self, key):
1272 return self._map[key]
1267 return self._map[key]
1273
1268
1274 def keys(self):
1269 def keys(self):
1275 return self._map.keys()
1270 return self._map.keys()
1276
1271
1277 def preload(self):
1272 def preload(self):
1278 """Loads the underlying data, if it's not already loaded"""
1273 """Loads the underlying data, if it's not already loaded"""
1279 self._map
1274 self._map
1280
1275
1281 def addfile(self, f, oldstate, state, mode, size, mtime):
1276 def addfile(self, f, oldstate, state, mode, size, mtime):
1282 """Add a tracked file to the dirstate."""
1277 """Add a tracked file to the dirstate."""
1283 if oldstate in "?r" and r"_dirs" in self.__dict__:
1278 if oldstate in "?r" and r"_dirs" in self.__dict__:
1284 self._dirs.addpath(f)
1279 self._dirs.addpath(f)
1285 if oldstate == "?" and r"_alldirs" in self.__dict__:
1280 if oldstate == "?" and r"_alldirs" in self.__dict__:
1286 self._alldirs.addpath(f)
1281 self._alldirs.addpath(f)
1287 self._map[f] = dirstatetuple(state, mode, size, mtime)
1282 self._map[f] = dirstatetuple(state, mode, size, mtime)
1288 if state != 'n' or mtime == -1:
1283 if state != 'n' or mtime == -1:
1289 self.nonnormalset.add(f)
1284 self.nonnormalset.add(f)
1290 if size == -2:
1285 if size == -2:
1291 self.otherparentset.add(f)
1286 self.otherparentset.add(f)
1292
1287
1293 def removefile(self, f, oldstate, size):
1288 def removefile(self, f, oldstate, size):
1294 """
1289 """
1295 Mark a file as removed in the dirstate.
1290 Mark a file as removed in the dirstate.
1296
1291
1297 The `size` parameter is used to store sentinel values that indicate
1292 The `size` parameter is used to store sentinel values that indicate
1298 the file's previous state. In the future, we should refactor this
1293 the file's previous state. In the future, we should refactor this
1299 to be more explicit about what that state is.
1294 to be more explicit about what that state is.
1300 """
1295 """
1301 if oldstate not in "?r" and r"_dirs" in self.__dict__:
1296 if oldstate not in "?r" and r"_dirs" in self.__dict__:
1302 self._dirs.delpath(f)
1297 self._dirs.delpath(f)
1303 if oldstate == "?" and r"_alldirs" in self.__dict__:
1298 if oldstate == "?" and r"_alldirs" in self.__dict__:
1304 self._alldirs.addpath(f)
1299 self._alldirs.addpath(f)
1305 if r"filefoldmap" in self.__dict__:
1300 if r"filefoldmap" in self.__dict__:
1306 normed = util.normcase(f)
1301 normed = util.normcase(f)
1307 self.filefoldmap.pop(normed, None)
1302 self.filefoldmap.pop(normed, None)
1308 self._map[f] = dirstatetuple('r', 0, size, 0)
1303 self._map[f] = dirstatetuple('r', 0, size, 0)
1309 self.nonnormalset.add(f)
1304 self.nonnormalset.add(f)
1310
1305
1311 def dropfile(self, f, oldstate):
1306 def dropfile(self, f, oldstate):
1312 """
1307 """
1313 Remove a file from the dirstate. Returns True if the file was
1308 Remove a file from the dirstate. Returns True if the file was
1314 previously recorded.
1309 previously recorded.
1315 """
1310 """
1316 exists = self._map.pop(f, None) is not None
1311 exists = self._map.pop(f, None) is not None
1317 if exists:
1312 if exists:
1318 if oldstate != "r" and r"_dirs" in self.__dict__:
1313 if oldstate != "r" and r"_dirs" in self.__dict__:
1319 self._dirs.delpath(f)
1314 self._dirs.delpath(f)
1320 if r"_alldirs" in self.__dict__:
1315 if r"_alldirs" in self.__dict__:
1321 self._alldirs.delpath(f)
1316 self._alldirs.delpath(f)
1322 if r"filefoldmap" in self.__dict__:
1317 if r"filefoldmap" in self.__dict__:
1323 normed = util.normcase(f)
1318 normed = util.normcase(f)
1324 self.filefoldmap.pop(normed, None)
1319 self.filefoldmap.pop(normed, None)
1325 self.nonnormalset.discard(f)
1320 self.nonnormalset.discard(f)
1326 return exists
1321 return exists
1327
1322
1328 def clearambiguoustimes(self, files, now):
1323 def clearambiguoustimes(self, files, now):
1329 for f in files:
1324 for f in files:
1330 e = self.get(f)
1325 e = self.get(f)
1331 if e is not None and e[0] == 'n' and e[3] == now:
1326 if e is not None and e[0] == 'n' and e[3] == now:
1332 self._map[f] = dirstatetuple(e[0], e[1], e[2], -1)
1327 self._map[f] = dirstatetuple(e[0], e[1], e[2], -1)
1333 self.nonnormalset.add(f)
1328 self.nonnormalset.add(f)
1334
1329
1335 def nonnormalentries(self):
1330 def nonnormalentries(self):
1336 '''Compute the nonnormal dirstate entries from the dmap'''
1331 '''Compute the nonnormal dirstate entries from the dmap'''
1337 try:
1332 try:
1338 return parsers.nonnormalotherparententries(self._map)
1333 return parsers.nonnormalotherparententries(self._map)
1339 except AttributeError:
1334 except AttributeError:
1340 nonnorm = set()
1335 nonnorm = set()
1341 otherparent = set()
1336 otherparent = set()
1342 for fname, e in self._map.iteritems():
1337 for fname, e in self._map.iteritems():
1343 if e[0] != 'n' or e[3] == -1:
1338 if e[0] != 'n' or e[3] == -1:
1344 nonnorm.add(fname)
1339 nonnorm.add(fname)
1345 if e[0] == 'n' and e[2] == -2:
1340 if e[0] == 'n' and e[2] == -2:
1346 otherparent.add(fname)
1341 otherparent.add(fname)
1347 return nonnorm, otherparent
1342 return nonnorm, otherparent
1348
1343
1349 @propertycache
1344 @propertycache
1350 def filefoldmap(self):
1345 def filefoldmap(self):
1351 """Returns a dictionary mapping normalized case paths to their
1346 """Returns a dictionary mapping normalized case paths to their
1352 non-normalized versions.
1347 non-normalized versions.
1353 """
1348 """
1354 try:
1349 try:
1355 makefilefoldmap = parsers.make_file_foldmap
1350 makefilefoldmap = parsers.make_file_foldmap
1356 except AttributeError:
1351 except AttributeError:
1357 pass
1352 pass
1358 else:
1353 else:
1359 return makefilefoldmap(self._map, util.normcasespec,
1354 return makefilefoldmap(self._map, util.normcasespec,
1360 util.normcasefallback)
1355 util.normcasefallback)
1361
1356
1362 f = {}
1357 f = {}
1363 normcase = util.normcase
1358 normcase = util.normcase
1364 for name, s in self._map.iteritems():
1359 for name, s in self._map.iteritems():
1365 if s[0] != 'r':
1360 if s[0] != 'r':
1366 f[normcase(name)] = name
1361 f[normcase(name)] = name
1367 f['.'] = '.' # prevents useless util.fspath() invocation
1362 f['.'] = '.' # prevents useless util.fspath() invocation
1368 return f
1363 return f
1369
1364
1370 def hastrackeddir(self, d):
1365 def hastrackeddir(self, d):
1371 """
1366 """
1372 Returns True if the dirstate contains a tracked (not removed) file
1367 Returns True if the dirstate contains a tracked (not removed) file
1373 in this directory.
1368 in this directory.
1374 """
1369 """
1375 return d in self._dirs
1370 return d in self._dirs
1376
1371
1377 def hasdir(self, d):
1372 def hasdir(self, d):
1378 """
1373 """
1379 Returns True if the dirstate contains a file (tracked or removed)
1374 Returns True if the dirstate contains a file (tracked or removed)
1380 in this directory.
1375 in this directory.
1381 """
1376 """
1382 return d in self._alldirs
1377 return d in self._alldirs
1383
1378
1384 @propertycache
1379 @propertycache
1385 def _dirs(self):
1380 def _dirs(self):
1386 return util.dirs(self._map, 'r')
1381 return util.dirs(self._map, 'r')
1387
1382
1388 @propertycache
1383 @propertycache
1389 def _alldirs(self):
1384 def _alldirs(self):
1390 return util.dirs(self._map)
1385 return util.dirs(self._map)
1391
1386
1392 def _opendirstatefile(self):
1387 def _opendirstatefile(self):
1393 fp, mode = txnutil.trypending(self._root, self._opener, self._filename)
1388 fp, mode = txnutil.trypending(self._root, self._opener, self._filename)
1394 if self._pendingmode is not None and self._pendingmode != mode:
1389 if self._pendingmode is not None and self._pendingmode != mode:
1395 fp.close()
1390 fp.close()
1396 raise error.Abort(_('working directory state may be '
1391 raise error.Abort(_('working directory state may be '
1397 'changed parallelly'))
1392 'changed parallelly'))
1398 self._pendingmode = mode
1393 self._pendingmode = mode
1399 return fp
1394 return fp
1400
1395
1401 def parents(self):
1396 def parents(self):
1402 if not self._parents:
1397 if not self._parents:
1403 try:
1398 try:
1404 fp = self._opendirstatefile()
1399 fp = self._opendirstatefile()
1405 st = fp.read(40)
1400 st = fp.read(40)
1406 fp.close()
1401 fp.close()
1407 except IOError as err:
1402 except IOError as err:
1408 if err.errno != errno.ENOENT:
1403 if err.errno != errno.ENOENT:
1409 raise
1404 raise
1410 # File doesn't exist, so the current state is empty
1405 # File doesn't exist, so the current state is empty
1411 st = ''
1406 st = ''
1412
1407
1413 l = len(st)
1408 l = len(st)
1414 if l == 40:
1409 if l == 40:
1415 self._parents = (st[:20], st[20:40])
1410 self._parents = (st[:20], st[20:40])
1416 elif l == 0:
1411 elif l == 0:
1417 self._parents = (nullid, nullid)
1412 self._parents = (nullid, nullid)
1418 else:
1413 else:
1419 raise error.Abort(_('working directory state appears '
1414 raise error.Abort(_('working directory state appears '
1420 'damaged!'))
1415 'damaged!'))
1421
1416
1422 return self._parents
1417 return self._parents
1423
1418
1424 def setparents(self, p1, p2):
1419 def setparents(self, p1, p2):
1425 self._parents = (p1, p2)
1420 self._parents = (p1, p2)
1426 self._dirtyparents = True
1421 self._dirtyparents = True
1427
1422
1428 def read(self):
1423 def read(self):
1429 # ignore HG_PENDING because identity is used only for writing
1424 # ignore HG_PENDING because identity is used only for writing
1430 self.identity = util.filestat.frompath(
1425 self.identity = util.filestat.frompath(
1431 self._opener.join(self._filename))
1426 self._opener.join(self._filename))
1432
1427
1433 try:
1428 try:
1434 fp = self._opendirstatefile()
1429 fp = self._opendirstatefile()
1435 try:
1430 try:
1436 st = fp.read()
1431 st = fp.read()
1437 finally:
1432 finally:
1438 fp.close()
1433 fp.close()
1439 except IOError as err:
1434 except IOError as err:
1440 if err.errno != errno.ENOENT:
1435 if err.errno != errno.ENOENT:
1441 raise
1436 raise
1442 return
1437 return
1443 if not st:
1438 if not st:
1444 return
1439 return
1445
1440
1446 if util.safehasattr(parsers, 'dict_new_presized'):
1441 if util.safehasattr(parsers, 'dict_new_presized'):
1447 # Make an estimate of the number of files in the dirstate based on
1442 # Make an estimate of the number of files in the dirstate based on
1448 # its size. From a linear regression on a set of real-world repos,
1443 # its size. From a linear regression on a set of real-world repos,
1449 # all over 10,000 files, the size of a dirstate entry is 85
1444 # all over 10,000 files, the size of a dirstate entry is 85
1450 # bytes. The cost of resizing is significantly higher than the cost
1445 # bytes. The cost of resizing is significantly higher than the cost
1451 # of filling in a larger presized dict, so subtract 20% from the
1446 # of filling in a larger presized dict, so subtract 20% from the
1452 # size.
1447 # size.
1453 #
1448 #
1454 # This heuristic is imperfect in many ways, so in a future dirstate
1449 # This heuristic is imperfect in many ways, so in a future dirstate
1455 # format update it makes sense to just record the number of entries
1450 # format update it makes sense to just record the number of entries
1456 # on write.
1451 # on write.
1457 self._map = parsers.dict_new_presized(len(st) // 71)
1452 self._map = parsers.dict_new_presized(len(st) // 71)
1458
1453
1459 # Python's garbage collector triggers a GC each time a certain number
1454 # Python's garbage collector triggers a GC each time a certain number
1460 # of container objects (the number being defined by
1455 # of container objects (the number being defined by
1461 # gc.get_threshold()) are allocated. parse_dirstate creates a tuple
1456 # gc.get_threshold()) are allocated. parse_dirstate creates a tuple
1462 # for each file in the dirstate. The C version then immediately marks
1457 # for each file in the dirstate. The C version then immediately marks
1463 # them as not to be tracked by the collector. However, this has no
1458 # them as not to be tracked by the collector. However, this has no
1464 # effect on when GCs are triggered, only on what objects the GC looks
1459 # effect on when GCs are triggered, only on what objects the GC looks
1465 # into. This means that O(number of files) GCs are unavoidable.
1460 # into. This means that O(number of files) GCs are unavoidable.
1466 # Depending on when in the process's lifetime the dirstate is parsed,
1461 # Depending on when in the process's lifetime the dirstate is parsed,
1467 # this can get very expensive. As a workaround, disable GC while
1462 # this can get very expensive. As a workaround, disable GC while
1468 # parsing the dirstate.
1463 # parsing the dirstate.
1469 #
1464 #
1470 # (we cannot decorate the function directly since it is in a C module)
1465 # (we cannot decorate the function directly since it is in a C module)
1471 if rustext is not None:
1466 parse_dirstate = util.nogc(dirstatemod.parse_dirstate)
1472 parse_dirstate = rustext.dirstate.parse_dirstate
1473 else:
1474 parse_dirstate = parsers.parse_dirstate
1475
1476 parse_dirstate = util.nogc(parse_dirstate)
1477 p = parse_dirstate(self._map, self.copymap, st)
1467 p = parse_dirstate(self._map, self.copymap, st)
1478 if not self._dirtyparents:
1468 if not self._dirtyparents:
1479 self.setparents(*p)
1469 self.setparents(*p)
1480
1470
1481 # Avoid excess attribute lookups by fast pathing certain checks
1471 # Avoid excess attribute lookups by fast pathing certain checks
1482 self.__contains__ = self._map.__contains__
1472 self.__contains__ = self._map.__contains__
1483 self.__getitem__ = self._map.__getitem__
1473 self.__getitem__ = self._map.__getitem__
1484 self.get = self._map.get
1474 self.get = self._map.get
1485
1475
1486 def write(self, st, now):
1476 def write(self, st, now):
1487 if rustext is not None:
1477 st.write(dirstatemod.pack_dirstate(self._map, self.copymap,
1488 pack_dirstate = rustext.dirstate.pack_dirstate
1489 else:
1490 pack_dirstate = parsers.pack_dirstate
1491
1492 st.write(pack_dirstate(self._map, self.copymap,
1493 self.parents(), now))
1478 self.parents(), now))
1494 st.close()
1479 st.close()
1495 self._dirtyparents = False
1480 self._dirtyparents = False
1496 self.nonnormalset, self.otherparentset = self.nonnormalentries()
1481 self.nonnormalset, self.otherparentset = self.nonnormalentries()
1497
1482
1498 @propertycache
1483 @propertycache
1499 def nonnormalset(self):
1484 def nonnormalset(self):
1500 nonnorm, otherparents = self.nonnormalentries()
1485 nonnorm, otherparents = self.nonnormalentries()
1501 self.otherparentset = otherparents
1486 self.otherparentset = otherparents
1502 return nonnorm
1487 return nonnorm
1503
1488
1504 @propertycache
1489 @propertycache
1505 def otherparentset(self):
1490 def otherparentset(self):
1506 nonnorm, otherparents = self.nonnormalentries()
1491 nonnorm, otherparents = self.nonnormalentries()
1507 self.nonnormalset = nonnorm
1492 self.nonnormalset = nonnorm
1508 return otherparents
1493 return otherparents
1509
1494
1510 @propertycache
1495 @propertycache
1511 def identity(self):
1496 def identity(self):
1512 self._map
1497 self._map
1513 return self.identity
1498 return self.identity
1514
1499
1515 @propertycache
1500 @propertycache
1516 def dirfoldmap(self):
1501 def dirfoldmap(self):
1517 f = {}
1502 f = {}
1518 normcase = util.normcase
1503 normcase = util.normcase
1519 for name in self._dirs:
1504 for name in self._dirs:
1520 f[normcase(name)] = name
1505 f[normcase(name)] = name
1521 return f
1506 return f
@@ -1,1529 +1,1526
1 # match.py - filename matching
1 # match.py - filename matching
2 #
2 #
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import, print_function
8 from __future__ import absolute_import, print_function
9
9
10 import copy
10 import copy
11 import itertools
11 import itertools
12 import os
12 import os
13 import re
13 import re
14
14
15 from .i18n import _
15 from .i18n import _
16 from . import (
16 from . import (
17 encoding,
17 encoding,
18 error,
18 error,
19 pathutil,
19 pathutil,
20 policy,
20 pycompat,
21 pycompat,
21 util,
22 util,
22 )
23 )
23 from .utils import (
24 from .utils import (
24 stringutil,
25 stringutil,
25 )
26 )
26
27
27 try:
28 rustmod = policy.importrust('filepatterns')
28 from . import rustext
29 rustext.__name__ # force actual import (see hgdemandimport)
30 except ImportError:
31 rustext = None
32
29
33 allpatternkinds = ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
30 allpatternkinds = ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
34 'rootglob',
31 'rootglob',
35 'listfile', 'listfile0', 'set', 'include', 'subinclude',
32 'listfile', 'listfile0', 'set', 'include', 'subinclude',
36 'rootfilesin')
33 'rootfilesin')
37 cwdrelativepatternkinds = ('relpath', 'glob')
34 cwdrelativepatternkinds = ('relpath', 'glob')
38
35
39 propertycache = util.propertycache
36 propertycache = util.propertycache
40
37
41 def _rematcher(regex):
38 def _rematcher(regex):
42 '''compile the regexp with the best available regexp engine and return a
39 '''compile the regexp with the best available regexp engine and return a
43 matcher function'''
40 matcher function'''
44 m = util.re.compile(regex)
41 m = util.re.compile(regex)
45 try:
42 try:
46 # slightly faster, provided by facebook's re2 bindings
43 # slightly faster, provided by facebook's re2 bindings
47 return m.test_match
44 return m.test_match
48 except AttributeError:
45 except AttributeError:
49 return m.match
46 return m.match
50
47
51 def _expandsets(kindpats, ctx=None, listsubrepos=False, badfn=None):
48 def _expandsets(kindpats, ctx=None, listsubrepos=False, badfn=None):
52 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
49 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
53 matchers = []
50 matchers = []
54 other = []
51 other = []
55
52
56 for kind, pat, source in kindpats:
53 for kind, pat, source in kindpats:
57 if kind == 'set':
54 if kind == 'set':
58 if ctx is None:
55 if ctx is None:
59 raise error.ProgrammingError("fileset expression with no "
56 raise error.ProgrammingError("fileset expression with no "
60 "context")
57 "context")
61 matchers.append(ctx.matchfileset(pat, badfn=badfn))
58 matchers.append(ctx.matchfileset(pat, badfn=badfn))
62
59
63 if listsubrepos:
60 if listsubrepos:
64 for subpath in ctx.substate:
61 for subpath in ctx.substate:
65 sm = ctx.sub(subpath).matchfileset(pat, badfn=badfn)
62 sm = ctx.sub(subpath).matchfileset(pat, badfn=badfn)
66 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
63 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
67 matchers.append(pm)
64 matchers.append(pm)
68
65
69 continue
66 continue
70 other.append((kind, pat, source))
67 other.append((kind, pat, source))
71 return matchers, other
68 return matchers, other
72
69
73 def _expandsubinclude(kindpats, root):
70 def _expandsubinclude(kindpats, root):
74 '''Returns the list of subinclude matcher args and the kindpats without the
71 '''Returns the list of subinclude matcher args and the kindpats without the
75 subincludes in it.'''
72 subincludes in it.'''
76 relmatchers = []
73 relmatchers = []
77 other = []
74 other = []
78
75
79 for kind, pat, source in kindpats:
76 for kind, pat, source in kindpats:
80 if kind == 'subinclude':
77 if kind == 'subinclude':
81 sourceroot = pathutil.dirname(util.normpath(source))
78 sourceroot = pathutil.dirname(util.normpath(source))
82 pat = util.pconvert(pat)
79 pat = util.pconvert(pat)
83 path = pathutil.join(sourceroot, pat)
80 path = pathutil.join(sourceroot, pat)
84
81
85 newroot = pathutil.dirname(path)
82 newroot = pathutil.dirname(path)
86 matcherargs = (newroot, '', [], ['include:%s' % path])
83 matcherargs = (newroot, '', [], ['include:%s' % path])
87
84
88 prefix = pathutil.canonpath(root, root, newroot)
85 prefix = pathutil.canonpath(root, root, newroot)
89 if prefix:
86 if prefix:
90 prefix += '/'
87 prefix += '/'
91 relmatchers.append((prefix, matcherargs))
88 relmatchers.append((prefix, matcherargs))
92 else:
89 else:
93 other.append((kind, pat, source))
90 other.append((kind, pat, source))
94
91
95 return relmatchers, other
92 return relmatchers, other
96
93
97 def _kindpatsalwaysmatch(kindpats):
94 def _kindpatsalwaysmatch(kindpats):
98 """"Checks whether the kindspats match everything, as e.g.
95 """"Checks whether the kindspats match everything, as e.g.
99 'relpath:.' does.
96 'relpath:.' does.
100 """
97 """
101 for kind, pat, source in kindpats:
98 for kind, pat, source in kindpats:
102 if pat != '' or kind not in ['relpath', 'glob']:
99 if pat != '' or kind not in ['relpath', 'glob']:
103 return False
100 return False
104 return True
101 return True
105
102
106 def _buildkindpatsmatcher(matchercls, root, kindpats, ctx=None,
103 def _buildkindpatsmatcher(matchercls, root, kindpats, ctx=None,
107 listsubrepos=False, badfn=None):
104 listsubrepos=False, badfn=None):
108 matchers = []
105 matchers = []
109 fms, kindpats = _expandsets(kindpats, ctx=ctx,
106 fms, kindpats = _expandsets(kindpats, ctx=ctx,
110 listsubrepos=listsubrepos, badfn=badfn)
107 listsubrepos=listsubrepos, badfn=badfn)
111 if kindpats:
108 if kindpats:
112 m = matchercls(root, kindpats, badfn=badfn)
109 m = matchercls(root, kindpats, badfn=badfn)
113 matchers.append(m)
110 matchers.append(m)
114 if fms:
111 if fms:
115 matchers.extend(fms)
112 matchers.extend(fms)
116 if not matchers:
113 if not matchers:
117 return nevermatcher(badfn=badfn)
114 return nevermatcher(badfn=badfn)
118 if len(matchers) == 1:
115 if len(matchers) == 1:
119 return matchers[0]
116 return matchers[0]
120 return unionmatcher(matchers)
117 return unionmatcher(matchers)
121
118
122 def match(root, cwd, patterns=None, include=None, exclude=None, default='glob',
119 def match(root, cwd, patterns=None, include=None, exclude=None, default='glob',
123 auditor=None, ctx=None, listsubrepos=False, warn=None,
120 auditor=None, ctx=None, listsubrepos=False, warn=None,
124 badfn=None, icasefs=False):
121 badfn=None, icasefs=False):
125 r"""build an object to match a set of file patterns
122 r"""build an object to match a set of file patterns
126
123
127 arguments:
124 arguments:
128 root - the canonical root of the tree you're matching against
125 root - the canonical root of the tree you're matching against
129 cwd - the current working directory, if relevant
126 cwd - the current working directory, if relevant
130 patterns - patterns to find
127 patterns - patterns to find
131 include - patterns to include (unless they are excluded)
128 include - patterns to include (unless they are excluded)
132 exclude - patterns to exclude (even if they are included)
129 exclude - patterns to exclude (even if they are included)
133 default - if a pattern in patterns has no explicit type, assume this one
130 default - if a pattern in patterns has no explicit type, assume this one
134 auditor - optional path auditor
131 auditor - optional path auditor
135 ctx - optional changecontext
132 ctx - optional changecontext
136 listsubrepos - if True, recurse into subrepositories
133 listsubrepos - if True, recurse into subrepositories
137 warn - optional function used for printing warnings
134 warn - optional function used for printing warnings
138 badfn - optional bad() callback for this matcher instead of the default
135 badfn - optional bad() callback for this matcher instead of the default
139 icasefs - make a matcher for wdir on case insensitive filesystems, which
136 icasefs - make a matcher for wdir on case insensitive filesystems, which
140 normalizes the given patterns to the case in the filesystem
137 normalizes the given patterns to the case in the filesystem
141
138
142 a pattern is one of:
139 a pattern is one of:
143 'glob:<glob>' - a glob relative to cwd
140 'glob:<glob>' - a glob relative to cwd
144 're:<regexp>' - a regular expression
141 're:<regexp>' - a regular expression
145 'path:<path>' - a path relative to repository root, which is matched
142 'path:<path>' - a path relative to repository root, which is matched
146 recursively
143 recursively
147 'rootfilesin:<path>' - a path relative to repository root, which is
144 'rootfilesin:<path>' - a path relative to repository root, which is
148 matched non-recursively (will not match subdirectories)
145 matched non-recursively (will not match subdirectories)
149 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
146 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
150 'relpath:<path>' - a path relative to cwd
147 'relpath:<path>' - a path relative to cwd
151 'relre:<regexp>' - a regexp that needn't match the start of a name
148 'relre:<regexp>' - a regexp that needn't match the start of a name
152 'set:<fileset>' - a fileset expression
149 'set:<fileset>' - a fileset expression
153 'include:<path>' - a file of patterns to read and include
150 'include:<path>' - a file of patterns to read and include
154 'subinclude:<path>' - a file of patterns to match against files under
151 'subinclude:<path>' - a file of patterns to match against files under
155 the same directory
152 the same directory
156 '<something>' - a pattern of the specified default type
153 '<something>' - a pattern of the specified default type
157
154
158 Usually a patternmatcher is returned:
155 Usually a patternmatcher is returned:
159 >>> match(b'foo', b'.', [b're:.*\.c$', b'path:foo/a', b'*.py'])
156 >>> match(b'foo', b'.', [b're:.*\.c$', b'path:foo/a', b'*.py'])
160 <patternmatcher patterns='.*\\.c$|foo/a(?:/|$)|[^/]*\\.py$'>
157 <patternmatcher patterns='.*\\.c$|foo/a(?:/|$)|[^/]*\\.py$'>
161
158
162 Combining 'patterns' with 'include' (resp. 'exclude') gives an
159 Combining 'patterns' with 'include' (resp. 'exclude') gives an
163 intersectionmatcher (resp. a differencematcher):
160 intersectionmatcher (resp. a differencematcher):
164 >>> type(match(b'foo', b'.', [b're:.*\.c$'], include=[b'path:lib']))
161 >>> type(match(b'foo', b'.', [b're:.*\.c$'], include=[b'path:lib']))
165 <class 'mercurial.match.intersectionmatcher'>
162 <class 'mercurial.match.intersectionmatcher'>
166 >>> type(match(b'foo', b'.', [b're:.*\.c$'], exclude=[b'path:build']))
163 >>> type(match(b'foo', b'.', [b're:.*\.c$'], exclude=[b'path:build']))
167 <class 'mercurial.match.differencematcher'>
164 <class 'mercurial.match.differencematcher'>
168
165
169 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
166 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
170 >>> match(b'foo', b'.', [])
167 >>> match(b'foo', b'.', [])
171 <alwaysmatcher>
168 <alwaysmatcher>
172
169
173 The 'default' argument determines which kind of pattern is assumed if a
170 The 'default' argument determines which kind of pattern is assumed if a
174 pattern has no prefix:
171 pattern has no prefix:
175 >>> match(b'foo', b'.', [b'.*\.c$'], default=b're')
172 >>> match(b'foo', b'.', [b'.*\.c$'], default=b're')
176 <patternmatcher patterns='.*\\.c$'>
173 <patternmatcher patterns='.*\\.c$'>
177 >>> match(b'foo', b'.', [b'main.py'], default=b'relpath')
174 >>> match(b'foo', b'.', [b'main.py'], default=b'relpath')
178 <patternmatcher patterns='main\\.py(?:/|$)'>
175 <patternmatcher patterns='main\\.py(?:/|$)'>
179 >>> match(b'foo', b'.', [b'main.py'], default=b're')
176 >>> match(b'foo', b'.', [b'main.py'], default=b're')
180 <patternmatcher patterns='main.py'>
177 <patternmatcher patterns='main.py'>
181
178
182 The primary use of matchers is to check whether a value (usually a file
179 The primary use of matchers is to check whether a value (usually a file
183 name) matches againset one of the patterns given at initialization. There
180 name) matches againset one of the patterns given at initialization. There
184 are two ways of doing this check.
181 are two ways of doing this check.
185
182
186 >>> m = match(b'foo', b'', [b're:.*\.c$', b'relpath:a'])
183 >>> m = match(b'foo', b'', [b're:.*\.c$', b'relpath:a'])
187
184
188 1. Calling the matcher with a file name returns True if any pattern
185 1. Calling the matcher with a file name returns True if any pattern
189 matches that file name:
186 matches that file name:
190 >>> m(b'a')
187 >>> m(b'a')
191 True
188 True
192 >>> m(b'main.c')
189 >>> m(b'main.c')
193 True
190 True
194 >>> m(b'test.py')
191 >>> m(b'test.py')
195 False
192 False
196
193
197 2. Using the exact() method only returns True if the file name matches one
194 2. Using the exact() method only returns True if the file name matches one
198 of the exact patterns (i.e. not re: or glob: patterns):
195 of the exact patterns (i.e. not re: or glob: patterns):
199 >>> m.exact(b'a')
196 >>> m.exact(b'a')
200 True
197 True
201 >>> m.exact(b'main.c')
198 >>> m.exact(b'main.c')
202 False
199 False
203 """
200 """
204 normalize = _donormalize
201 normalize = _donormalize
205 if icasefs:
202 if icasefs:
206 dirstate = ctx.repo().dirstate
203 dirstate = ctx.repo().dirstate
207 dsnormalize = dirstate.normalize
204 dsnormalize = dirstate.normalize
208
205
209 def normalize(patterns, default, root, cwd, auditor, warn):
206 def normalize(patterns, default, root, cwd, auditor, warn):
210 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
207 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
211 kindpats = []
208 kindpats = []
212 for kind, pats, source in kp:
209 for kind, pats, source in kp:
213 if kind not in ('re', 'relre'): # regex can't be normalized
210 if kind not in ('re', 'relre'): # regex can't be normalized
214 p = pats
211 p = pats
215 pats = dsnormalize(pats)
212 pats = dsnormalize(pats)
216
213
217 # Preserve the original to handle a case only rename.
214 # Preserve the original to handle a case only rename.
218 if p != pats and p in dirstate:
215 if p != pats and p in dirstate:
219 kindpats.append((kind, p, source))
216 kindpats.append((kind, p, source))
220
217
221 kindpats.append((kind, pats, source))
218 kindpats.append((kind, pats, source))
222 return kindpats
219 return kindpats
223
220
224 if patterns:
221 if patterns:
225 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
222 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
226 if _kindpatsalwaysmatch(kindpats):
223 if _kindpatsalwaysmatch(kindpats):
227 m = alwaysmatcher(badfn)
224 m = alwaysmatcher(badfn)
228 else:
225 else:
229 m = _buildkindpatsmatcher(patternmatcher, root, kindpats, ctx=ctx,
226 m = _buildkindpatsmatcher(patternmatcher, root, kindpats, ctx=ctx,
230 listsubrepos=listsubrepos, badfn=badfn)
227 listsubrepos=listsubrepos, badfn=badfn)
231 else:
228 else:
232 # It's a little strange that no patterns means to match everything.
229 # It's a little strange that no patterns means to match everything.
233 # Consider changing this to match nothing (probably using nevermatcher).
230 # Consider changing this to match nothing (probably using nevermatcher).
234 m = alwaysmatcher(badfn)
231 m = alwaysmatcher(badfn)
235
232
236 if include:
233 if include:
237 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
234 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
238 im = _buildkindpatsmatcher(includematcher, root, kindpats, ctx=ctx,
235 im = _buildkindpatsmatcher(includematcher, root, kindpats, ctx=ctx,
239 listsubrepos=listsubrepos, badfn=None)
236 listsubrepos=listsubrepos, badfn=None)
240 m = intersectmatchers(m, im)
237 m = intersectmatchers(m, im)
241 if exclude:
238 if exclude:
242 kindpats = normalize(exclude, 'glob', root, cwd, auditor, warn)
239 kindpats = normalize(exclude, 'glob', root, cwd, auditor, warn)
243 em = _buildkindpatsmatcher(includematcher, root, kindpats, ctx=ctx,
240 em = _buildkindpatsmatcher(includematcher, root, kindpats, ctx=ctx,
244 listsubrepos=listsubrepos, badfn=None)
241 listsubrepos=listsubrepos, badfn=None)
245 m = differencematcher(m, em)
242 m = differencematcher(m, em)
246 return m
243 return m
247
244
248 def exact(files, badfn=None):
245 def exact(files, badfn=None):
249 return exactmatcher(files, badfn=badfn)
246 return exactmatcher(files, badfn=badfn)
250
247
251 def always(badfn=None):
248 def always(badfn=None):
252 return alwaysmatcher(badfn)
249 return alwaysmatcher(badfn)
253
250
254 def never(badfn=None):
251 def never(badfn=None):
255 return nevermatcher(badfn)
252 return nevermatcher(badfn)
256
253
257 def badmatch(match, badfn):
254 def badmatch(match, badfn):
258 """Make a copy of the given matcher, replacing its bad method with the given
255 """Make a copy of the given matcher, replacing its bad method with the given
259 one.
256 one.
260 """
257 """
261 m = copy.copy(match)
258 m = copy.copy(match)
262 m.bad = badfn
259 m.bad = badfn
263 return m
260 return m
264
261
265 def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
262 def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
266 '''Convert 'kind:pat' from the patterns list to tuples with kind and
263 '''Convert 'kind:pat' from the patterns list to tuples with kind and
267 normalized and rooted patterns and with listfiles expanded.'''
264 normalized and rooted patterns and with listfiles expanded.'''
268 kindpats = []
265 kindpats = []
269 for kind, pat in [_patsplit(p, default) for p in patterns]:
266 for kind, pat in [_patsplit(p, default) for p in patterns]:
270 if kind in cwdrelativepatternkinds:
267 if kind in cwdrelativepatternkinds:
271 pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
268 pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
272 elif kind in ('relglob', 'path', 'rootfilesin', 'rootglob'):
269 elif kind in ('relglob', 'path', 'rootfilesin', 'rootglob'):
273 pat = util.normpath(pat)
270 pat = util.normpath(pat)
274 elif kind in ('listfile', 'listfile0'):
271 elif kind in ('listfile', 'listfile0'):
275 try:
272 try:
276 files = util.readfile(pat)
273 files = util.readfile(pat)
277 if kind == 'listfile0':
274 if kind == 'listfile0':
278 files = files.split('\0')
275 files = files.split('\0')
279 else:
276 else:
280 files = files.splitlines()
277 files = files.splitlines()
281 files = [f for f in files if f]
278 files = [f for f in files if f]
282 except EnvironmentError:
279 except EnvironmentError:
283 raise error.Abort(_("unable to read file list (%s)") % pat)
280 raise error.Abort(_("unable to read file list (%s)") % pat)
284 for k, p, source in _donormalize(files, default, root, cwd,
281 for k, p, source in _donormalize(files, default, root, cwd,
285 auditor, warn):
282 auditor, warn):
286 kindpats.append((k, p, pat))
283 kindpats.append((k, p, pat))
287 continue
284 continue
288 elif kind == 'include':
285 elif kind == 'include':
289 try:
286 try:
290 fullpath = os.path.join(root, util.localpath(pat))
287 fullpath = os.path.join(root, util.localpath(pat))
291 includepats = readpatternfile(fullpath, warn)
288 includepats = readpatternfile(fullpath, warn)
292 for k, p, source in _donormalize(includepats, default,
289 for k, p, source in _donormalize(includepats, default,
293 root, cwd, auditor, warn):
290 root, cwd, auditor, warn):
294 kindpats.append((k, p, source or pat))
291 kindpats.append((k, p, source or pat))
295 except error.Abort as inst:
292 except error.Abort as inst:
296 raise error.Abort('%s: %s' % (pat, inst[0]))
293 raise error.Abort('%s: %s' % (pat, inst[0]))
297 except IOError as inst:
294 except IOError as inst:
298 if warn:
295 if warn:
299 warn(_("skipping unreadable pattern file '%s': %s\n") %
296 warn(_("skipping unreadable pattern file '%s': %s\n") %
300 (pat, stringutil.forcebytestr(inst.strerror)))
297 (pat, stringutil.forcebytestr(inst.strerror)))
301 continue
298 continue
302 # else: re or relre - which cannot be normalized
299 # else: re or relre - which cannot be normalized
303 kindpats.append((kind, pat, ''))
300 kindpats.append((kind, pat, ''))
304 return kindpats
301 return kindpats
305
302
306 class basematcher(object):
303 class basematcher(object):
307
304
308 def __init__(self, badfn=None):
305 def __init__(self, badfn=None):
309 if badfn is not None:
306 if badfn is not None:
310 self.bad = badfn
307 self.bad = badfn
311
308
312 def __call__(self, fn):
309 def __call__(self, fn):
313 return self.matchfn(fn)
310 return self.matchfn(fn)
314 # Callbacks related to how the matcher is used by dirstate.walk.
311 # Callbacks related to how the matcher is used by dirstate.walk.
315 # Subscribers to these events must monkeypatch the matcher object.
312 # Subscribers to these events must monkeypatch the matcher object.
316 def bad(self, f, msg):
313 def bad(self, f, msg):
317 '''Callback from dirstate.walk for each explicit file that can't be
314 '''Callback from dirstate.walk for each explicit file that can't be
318 found/accessed, with an error message.'''
315 found/accessed, with an error message.'''
319
316
320 # If an explicitdir is set, it will be called when an explicitly listed
317 # If an explicitdir is set, it will be called when an explicitly listed
321 # directory is visited.
318 # directory is visited.
322 explicitdir = None
319 explicitdir = None
323
320
324 # If an traversedir is set, it will be called when a directory discovered
321 # If an traversedir is set, it will be called when a directory discovered
325 # by recursive traversal is visited.
322 # by recursive traversal is visited.
326 traversedir = None
323 traversedir = None
327
324
328 @propertycache
325 @propertycache
329 def _files(self):
326 def _files(self):
330 return []
327 return []
331
328
332 def files(self):
329 def files(self):
333 '''Explicitly listed files or patterns or roots:
330 '''Explicitly listed files or patterns or roots:
334 if no patterns or .always(): empty list,
331 if no patterns or .always(): empty list,
335 if exact: list exact files,
332 if exact: list exact files,
336 if not .anypats(): list all files and dirs,
333 if not .anypats(): list all files and dirs,
337 else: optimal roots'''
334 else: optimal roots'''
338 return self._files
335 return self._files
339
336
340 @propertycache
337 @propertycache
341 def _fileset(self):
338 def _fileset(self):
342 return set(self._files)
339 return set(self._files)
343
340
344 def exact(self, f):
341 def exact(self, f):
345 '''Returns True if f is in .files().'''
342 '''Returns True if f is in .files().'''
346 return f in self._fileset
343 return f in self._fileset
347
344
348 def matchfn(self, f):
345 def matchfn(self, f):
349 return False
346 return False
350
347
351 def visitdir(self, dir):
348 def visitdir(self, dir):
352 '''Decides whether a directory should be visited based on whether it
349 '''Decides whether a directory should be visited based on whether it
353 has potential matches in it or one of its subdirectories. This is
350 has potential matches in it or one of its subdirectories. This is
354 based on the match's primary, included, and excluded patterns.
351 based on the match's primary, included, and excluded patterns.
355
352
356 Returns the string 'all' if the given directory and all subdirectories
353 Returns the string 'all' if the given directory and all subdirectories
357 should be visited. Otherwise returns True or False indicating whether
354 should be visited. Otherwise returns True or False indicating whether
358 the given directory should be visited.
355 the given directory should be visited.
359 '''
356 '''
360 return True
357 return True
361
358
362 def visitchildrenset(self, dir):
359 def visitchildrenset(self, dir):
363 '''Decides whether a directory should be visited based on whether it
360 '''Decides whether a directory should be visited based on whether it
364 has potential matches in it or one of its subdirectories, and
361 has potential matches in it or one of its subdirectories, and
365 potentially lists which subdirectories of that directory should be
362 potentially lists which subdirectories of that directory should be
366 visited. This is based on the match's primary, included, and excluded
363 visited. This is based on the match's primary, included, and excluded
367 patterns.
364 patterns.
368
365
369 This function is very similar to 'visitdir', and the following mapping
366 This function is very similar to 'visitdir', and the following mapping
370 can be applied:
367 can be applied:
371
368
372 visitdir | visitchildrenlist
369 visitdir | visitchildrenlist
373 ----------+-------------------
370 ----------+-------------------
374 False | set()
371 False | set()
375 'all' | 'all'
372 'all' | 'all'
376 True | 'this' OR non-empty set of subdirs -or files- to visit
373 True | 'this' OR non-empty set of subdirs -or files- to visit
377
374
378 Example:
375 Example:
379 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
376 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
380 the following values (assuming the implementation of visitchildrenset
377 the following values (assuming the implementation of visitchildrenset
381 is capable of recognizing this; some implementations are not).
378 is capable of recognizing this; some implementations are not).
382
379
383 '' -> {'foo', 'qux'}
380 '' -> {'foo', 'qux'}
384 'baz' -> set()
381 'baz' -> set()
385 'foo' -> {'bar'}
382 'foo' -> {'bar'}
386 # Ideally this would be 'all', but since the prefix nature of matchers
383 # Ideally this would be 'all', but since the prefix nature of matchers
387 # is applied to the entire matcher, we have to downgrade this to
384 # is applied to the entire matcher, we have to downgrade this to
388 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
385 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
389 # in.
386 # in.
390 'foo/bar' -> 'this'
387 'foo/bar' -> 'this'
391 'qux' -> 'this'
388 'qux' -> 'this'
392
389
393 Important:
390 Important:
394 Most matchers do not know if they're representing files or
391 Most matchers do not know if they're representing files or
395 directories. They see ['path:dir/f'] and don't know whether 'f' is a
392 directories. They see ['path:dir/f'] and don't know whether 'f' is a
396 file or a directory, so visitchildrenset('dir') for most matchers will
393 file or a directory, so visitchildrenset('dir') for most matchers will
397 return {'f'}, but if the matcher knows it's a file (like exactmatcher
394 return {'f'}, but if the matcher knows it's a file (like exactmatcher
398 does), it may return 'this'. Do not rely on the return being a set
395 does), it may return 'this'. Do not rely on the return being a set
399 indicating that there are no files in this dir to investigate (or
396 indicating that there are no files in this dir to investigate (or
400 equivalently that if there are files to investigate in 'dir' that it
397 equivalently that if there are files to investigate in 'dir' that it
401 will always return 'this').
398 will always return 'this').
402 '''
399 '''
403 return 'this'
400 return 'this'
404
401
405 def always(self):
402 def always(self):
406 '''Matcher will match everything and .files() will be empty --
403 '''Matcher will match everything and .files() will be empty --
407 optimization might be possible.'''
404 optimization might be possible.'''
408 return False
405 return False
409
406
410 def isexact(self):
407 def isexact(self):
411 '''Matcher will match exactly the list of files in .files() --
408 '''Matcher will match exactly the list of files in .files() --
412 optimization might be possible.'''
409 optimization might be possible.'''
413 return False
410 return False
414
411
415 def prefix(self):
412 def prefix(self):
416 '''Matcher will match the paths in .files() recursively --
413 '''Matcher will match the paths in .files() recursively --
417 optimization might be possible.'''
414 optimization might be possible.'''
418 return False
415 return False
419
416
420 def anypats(self):
417 def anypats(self):
421 '''None of .always(), .isexact(), and .prefix() is true --
418 '''None of .always(), .isexact(), and .prefix() is true --
422 optimizations will be difficult.'''
419 optimizations will be difficult.'''
423 return not self.always() and not self.isexact() and not self.prefix()
420 return not self.always() and not self.isexact() and not self.prefix()
424
421
425 class alwaysmatcher(basematcher):
422 class alwaysmatcher(basematcher):
426 '''Matches everything.'''
423 '''Matches everything.'''
427
424
428 def __init__(self, badfn=None):
425 def __init__(self, badfn=None):
429 super(alwaysmatcher, self).__init__(badfn)
426 super(alwaysmatcher, self).__init__(badfn)
430
427
431 def always(self):
428 def always(self):
432 return True
429 return True
433
430
434 def matchfn(self, f):
431 def matchfn(self, f):
435 return True
432 return True
436
433
437 def visitdir(self, dir):
434 def visitdir(self, dir):
438 return 'all'
435 return 'all'
439
436
440 def visitchildrenset(self, dir):
437 def visitchildrenset(self, dir):
441 return 'all'
438 return 'all'
442
439
443 def __repr__(self):
440 def __repr__(self):
444 return r'<alwaysmatcher>'
441 return r'<alwaysmatcher>'
445
442
446 class nevermatcher(basematcher):
443 class nevermatcher(basematcher):
447 '''Matches nothing.'''
444 '''Matches nothing.'''
448
445
449 def __init__(self, badfn=None):
446 def __init__(self, badfn=None):
450 super(nevermatcher, self).__init__(badfn)
447 super(nevermatcher, self).__init__(badfn)
451
448
452 # It's a little weird to say that the nevermatcher is an exact matcher
449 # It's a little weird to say that the nevermatcher is an exact matcher
453 # or a prefix matcher, but it seems to make sense to let callers take
450 # or a prefix matcher, but it seems to make sense to let callers take
454 # fast paths based on either. There will be no exact matches, nor any
451 # fast paths based on either. There will be no exact matches, nor any
455 # prefixes (files() returns []), so fast paths iterating over them should
452 # prefixes (files() returns []), so fast paths iterating over them should
456 # be efficient (and correct).
453 # be efficient (and correct).
457 def isexact(self):
454 def isexact(self):
458 return True
455 return True
459
456
460 def prefix(self):
457 def prefix(self):
461 return True
458 return True
462
459
463 def visitdir(self, dir):
460 def visitdir(self, dir):
464 return False
461 return False
465
462
466 def visitchildrenset(self, dir):
463 def visitchildrenset(self, dir):
467 return set()
464 return set()
468
465
469 def __repr__(self):
466 def __repr__(self):
470 return r'<nevermatcher>'
467 return r'<nevermatcher>'
471
468
472 class predicatematcher(basematcher):
469 class predicatematcher(basematcher):
473 """A matcher adapter for a simple boolean function"""
470 """A matcher adapter for a simple boolean function"""
474
471
475 def __init__(self, predfn, predrepr=None, badfn=None):
472 def __init__(self, predfn, predrepr=None, badfn=None):
476 super(predicatematcher, self).__init__(badfn)
473 super(predicatematcher, self).__init__(badfn)
477 self.matchfn = predfn
474 self.matchfn = predfn
478 self._predrepr = predrepr
475 self._predrepr = predrepr
479
476
480 @encoding.strmethod
477 @encoding.strmethod
481 def __repr__(self):
478 def __repr__(self):
482 s = (stringutil.buildrepr(self._predrepr)
479 s = (stringutil.buildrepr(self._predrepr)
483 or pycompat.byterepr(self.matchfn))
480 or pycompat.byterepr(self.matchfn))
484 return '<predicatenmatcher pred=%s>' % s
481 return '<predicatenmatcher pred=%s>' % s
485
482
486 def normalizerootdir(dir, funcname):
483 def normalizerootdir(dir, funcname):
487 if dir == '.':
484 if dir == '.':
488 util.nouideprecwarn("match.%s() no longer accepts "
485 util.nouideprecwarn("match.%s() no longer accepts "
489 "'.', use '' instead." % funcname, '5.1')
486 "'.', use '' instead." % funcname, '5.1')
490 return ''
487 return ''
491 return dir
488 return dir
492
489
493
490
494 class patternmatcher(basematcher):
491 class patternmatcher(basematcher):
495 """Matches a set of (kind, pat, source) against a 'root' directory.
492 """Matches a set of (kind, pat, source) against a 'root' directory.
496
493
497 >>> kindpats = [
494 >>> kindpats = [
498 ... (b're', br'.*\.c$', b''),
495 ... (b're', br'.*\.c$', b''),
499 ... (b'path', b'foo/a', b''),
496 ... (b'path', b'foo/a', b''),
500 ... (b'relpath', b'b', b''),
497 ... (b'relpath', b'b', b''),
501 ... (b'glob', b'*.h', b''),
498 ... (b'glob', b'*.h', b''),
502 ... ]
499 ... ]
503 >>> m = patternmatcher(b'foo', kindpats)
500 >>> m = patternmatcher(b'foo', kindpats)
504 >>> m(b'main.c') # matches re:.*\.c$
501 >>> m(b'main.c') # matches re:.*\.c$
505 True
502 True
506 >>> m(b'b.txt')
503 >>> m(b'b.txt')
507 False
504 False
508 >>> m(b'foo/a') # matches path:foo/a
505 >>> m(b'foo/a') # matches path:foo/a
509 True
506 True
510 >>> m(b'a') # does not match path:b, since 'root' is 'foo'
507 >>> m(b'a') # does not match path:b, since 'root' is 'foo'
511 False
508 False
512 >>> m(b'b') # matches relpath:b, since 'root' is 'foo'
509 >>> m(b'b') # matches relpath:b, since 'root' is 'foo'
513 True
510 True
514 >>> m(b'lib.h') # matches glob:*.h
511 >>> m(b'lib.h') # matches glob:*.h
515 True
512 True
516
513
517 >>> m.files()
514 >>> m.files()
518 ['', 'foo/a', 'b', '']
515 ['', 'foo/a', 'b', '']
519 >>> m.exact(b'foo/a')
516 >>> m.exact(b'foo/a')
520 True
517 True
521 >>> m.exact(b'b')
518 >>> m.exact(b'b')
522 True
519 True
523 >>> m.exact(b'lib.h') # exact matches are for (rel)path kinds
520 >>> m.exact(b'lib.h') # exact matches are for (rel)path kinds
524 False
521 False
525 """
522 """
526
523
527 def __init__(self, root, kindpats, badfn=None):
524 def __init__(self, root, kindpats, badfn=None):
528 super(patternmatcher, self).__init__(badfn)
525 super(patternmatcher, self).__init__(badfn)
529
526
530 self._files = _explicitfiles(kindpats)
527 self._files = _explicitfiles(kindpats)
531 self._prefix = _prefix(kindpats)
528 self._prefix = _prefix(kindpats)
532 self._pats, self.matchfn = _buildmatch(kindpats, '$', root)
529 self._pats, self.matchfn = _buildmatch(kindpats, '$', root)
533
530
534 @propertycache
531 @propertycache
535 def _dirs(self):
532 def _dirs(self):
536 return set(util.dirs(self._fileset))
533 return set(util.dirs(self._fileset))
537
534
538 def visitdir(self, dir):
535 def visitdir(self, dir):
539 dir = normalizerootdir(dir, 'visitdir')
536 dir = normalizerootdir(dir, 'visitdir')
540 if self._prefix and dir in self._fileset:
537 if self._prefix and dir in self._fileset:
541 return 'all'
538 return 'all'
542 return (dir in self._fileset or
539 return (dir in self._fileset or
543 dir in self._dirs or
540 dir in self._dirs or
544 any(parentdir in self._fileset
541 any(parentdir in self._fileset
545 for parentdir in util.finddirs(dir)))
542 for parentdir in util.finddirs(dir)))
546
543
547 def visitchildrenset(self, dir):
544 def visitchildrenset(self, dir):
548 ret = self.visitdir(dir)
545 ret = self.visitdir(dir)
549 if ret is True:
546 if ret is True:
550 return 'this'
547 return 'this'
551 elif not ret:
548 elif not ret:
552 return set()
549 return set()
553 assert ret == 'all'
550 assert ret == 'all'
554 return 'all'
551 return 'all'
555
552
556 def prefix(self):
553 def prefix(self):
557 return self._prefix
554 return self._prefix
558
555
559 @encoding.strmethod
556 @encoding.strmethod
560 def __repr__(self):
557 def __repr__(self):
561 return ('<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats))
558 return ('<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats))
562
559
563 # This is basically a reimplementation of util.dirs that stores the children
560 # This is basically a reimplementation of util.dirs that stores the children
564 # instead of just a count of them, plus a small optional optimization to avoid
561 # instead of just a count of them, plus a small optional optimization to avoid
565 # some directories we don't need.
562 # some directories we don't need.
566 class _dirchildren(object):
563 class _dirchildren(object):
567 def __init__(self, paths, onlyinclude=None):
564 def __init__(self, paths, onlyinclude=None):
568 self._dirs = {}
565 self._dirs = {}
569 self._onlyinclude = onlyinclude or []
566 self._onlyinclude = onlyinclude or []
570 addpath = self.addpath
567 addpath = self.addpath
571 for f in paths:
568 for f in paths:
572 addpath(f)
569 addpath(f)
573
570
574 def addpath(self, path):
571 def addpath(self, path):
575 if path == '':
572 if path == '':
576 return
573 return
577 dirs = self._dirs
574 dirs = self._dirs
578 findsplitdirs = _dirchildren._findsplitdirs
575 findsplitdirs = _dirchildren._findsplitdirs
579 for d, b in findsplitdirs(path):
576 for d, b in findsplitdirs(path):
580 if d not in self._onlyinclude:
577 if d not in self._onlyinclude:
581 continue
578 continue
582 dirs.setdefault(d, set()).add(b)
579 dirs.setdefault(d, set()).add(b)
583
580
584 @staticmethod
581 @staticmethod
585 def _findsplitdirs(path):
582 def _findsplitdirs(path):
586 # yields (dirname, basename) tuples, walking back to the root. This is
583 # yields (dirname, basename) tuples, walking back to the root. This is
587 # very similar to util.finddirs, except:
584 # very similar to util.finddirs, except:
588 # - produces a (dirname, basename) tuple, not just 'dirname'
585 # - produces a (dirname, basename) tuple, not just 'dirname'
589 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
586 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
590 # slash.
587 # slash.
591 oldpos = len(path)
588 oldpos = len(path)
592 pos = path.rfind('/')
589 pos = path.rfind('/')
593 while pos != -1:
590 while pos != -1:
594 yield path[:pos], path[pos + 1:oldpos]
591 yield path[:pos], path[pos + 1:oldpos]
595 oldpos = pos
592 oldpos = pos
596 pos = path.rfind('/', 0, pos)
593 pos = path.rfind('/', 0, pos)
597 yield '', path[:oldpos]
594 yield '', path[:oldpos]
598
595
599 def get(self, path):
596 def get(self, path):
600 return self._dirs.get(path, set())
597 return self._dirs.get(path, set())
601
598
602 class includematcher(basematcher):
599 class includematcher(basematcher):
603
600
604 def __init__(self, root, kindpats, badfn=None):
601 def __init__(self, root, kindpats, badfn=None):
605 super(includematcher, self).__init__(badfn)
602 super(includematcher, self).__init__(badfn)
606
603
607 self._pats, self.matchfn = _buildmatch(kindpats, '(?:/|$)', root)
604 self._pats, self.matchfn = _buildmatch(kindpats, '(?:/|$)', root)
608 self._prefix = _prefix(kindpats)
605 self._prefix = _prefix(kindpats)
609 roots, dirs, parents = _rootsdirsandparents(kindpats)
606 roots, dirs, parents = _rootsdirsandparents(kindpats)
610 # roots are directories which are recursively included.
607 # roots are directories which are recursively included.
611 self._roots = set(roots)
608 self._roots = set(roots)
612 # dirs are directories which are non-recursively included.
609 # dirs are directories which are non-recursively included.
613 self._dirs = set(dirs)
610 self._dirs = set(dirs)
614 # parents are directories which are non-recursively included because
611 # parents are directories which are non-recursively included because
615 # they are needed to get to items in _dirs or _roots.
612 # they are needed to get to items in _dirs or _roots.
616 self._parents = parents
613 self._parents = parents
617
614
618 def visitdir(self, dir):
615 def visitdir(self, dir):
619 dir = normalizerootdir(dir, 'visitdir')
616 dir = normalizerootdir(dir, 'visitdir')
620 if self._prefix and dir in self._roots:
617 if self._prefix and dir in self._roots:
621 return 'all'
618 return 'all'
622 return (dir in self._roots or
619 return (dir in self._roots or
623 dir in self._dirs or
620 dir in self._dirs or
624 dir in self._parents or
621 dir in self._parents or
625 any(parentdir in self._roots
622 any(parentdir in self._roots
626 for parentdir in util.finddirs(dir)))
623 for parentdir in util.finddirs(dir)))
627
624
628 @propertycache
625 @propertycache
629 def _allparentschildren(self):
626 def _allparentschildren(self):
630 # It may seem odd that we add dirs, roots, and parents, and then
627 # It may seem odd that we add dirs, roots, and parents, and then
631 # restrict to only parents. This is to catch the case of:
628 # restrict to only parents. This is to catch the case of:
632 # dirs = ['foo/bar']
629 # dirs = ['foo/bar']
633 # parents = ['foo']
630 # parents = ['foo']
634 # if we asked for the children of 'foo', but had only added
631 # if we asked for the children of 'foo', but had only added
635 # self._parents, we wouldn't be able to respond ['bar'].
632 # self._parents, we wouldn't be able to respond ['bar'].
636 return _dirchildren(
633 return _dirchildren(
637 itertools.chain(self._dirs, self._roots, self._parents),
634 itertools.chain(self._dirs, self._roots, self._parents),
638 onlyinclude=self._parents)
635 onlyinclude=self._parents)
639
636
640 def visitchildrenset(self, dir):
637 def visitchildrenset(self, dir):
641 if self._prefix and dir in self._roots:
638 if self._prefix and dir in self._roots:
642 return 'all'
639 return 'all'
643 # Note: this does *not* include the 'dir in self._parents' case from
640 # Note: this does *not* include the 'dir in self._parents' case from
644 # visitdir, that's handled below.
641 # visitdir, that's handled below.
645 if ('' in self._roots or
642 if ('' in self._roots or
646 dir in self._roots or
643 dir in self._roots or
647 dir in self._dirs or
644 dir in self._dirs or
648 any(parentdir in self._roots
645 any(parentdir in self._roots
649 for parentdir in util.finddirs(dir))):
646 for parentdir in util.finddirs(dir))):
650 return 'this'
647 return 'this'
651
648
652 if dir in self._parents:
649 if dir in self._parents:
653 return self._allparentschildren.get(dir) or set()
650 return self._allparentschildren.get(dir) or set()
654 return set()
651 return set()
655
652
656 @encoding.strmethod
653 @encoding.strmethod
657 def __repr__(self):
654 def __repr__(self):
658 return ('<includematcher includes=%r>' % pycompat.bytestr(self._pats))
655 return ('<includematcher includes=%r>' % pycompat.bytestr(self._pats))
659
656
660 class exactmatcher(basematcher):
657 class exactmatcher(basematcher):
661 r'''Matches the input files exactly. They are interpreted as paths, not
658 r'''Matches the input files exactly. They are interpreted as paths, not
662 patterns (so no kind-prefixes).
659 patterns (so no kind-prefixes).
663
660
664 >>> m = exactmatcher([b'a.txt', br're:.*\.c$'])
661 >>> m = exactmatcher([b'a.txt', br're:.*\.c$'])
665 >>> m(b'a.txt')
662 >>> m(b'a.txt')
666 True
663 True
667 >>> m(b'b.txt')
664 >>> m(b'b.txt')
668 False
665 False
669
666
670 Input files that would be matched are exactly those returned by .files()
667 Input files that would be matched are exactly those returned by .files()
671 >>> m.files()
668 >>> m.files()
672 ['a.txt', 're:.*\\.c$']
669 ['a.txt', 're:.*\\.c$']
673
670
674 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
671 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
675 >>> m(b'main.c')
672 >>> m(b'main.c')
676 False
673 False
677 >>> m(br're:.*\.c$')
674 >>> m(br're:.*\.c$')
678 True
675 True
679 '''
676 '''
680
677
681 def __init__(self, files, badfn=None):
678 def __init__(self, files, badfn=None):
682 super(exactmatcher, self).__init__(badfn)
679 super(exactmatcher, self).__init__(badfn)
683
680
684 if isinstance(files, list):
681 if isinstance(files, list):
685 self._files = files
682 self._files = files
686 else:
683 else:
687 self._files = list(files)
684 self._files = list(files)
688
685
689 matchfn = basematcher.exact
686 matchfn = basematcher.exact
690
687
691 @propertycache
688 @propertycache
692 def _dirs(self):
689 def _dirs(self):
693 return set(util.dirs(self._fileset))
690 return set(util.dirs(self._fileset))
694
691
695 def visitdir(self, dir):
692 def visitdir(self, dir):
696 dir = normalizerootdir(dir, 'visitdir')
693 dir = normalizerootdir(dir, 'visitdir')
697 return dir in self._dirs
694 return dir in self._dirs
698
695
699 def visitchildrenset(self, dir):
696 def visitchildrenset(self, dir):
700 dir = normalizerootdir(dir, 'visitchildrenset')
697 dir = normalizerootdir(dir, 'visitchildrenset')
701
698
702 if not self._fileset or dir not in self._dirs:
699 if not self._fileset or dir not in self._dirs:
703 return set()
700 return set()
704
701
705 candidates = self._fileset | self._dirs - {''}
702 candidates = self._fileset | self._dirs - {''}
706 if dir != '':
703 if dir != '':
707 d = dir + '/'
704 d = dir + '/'
708 candidates = set(c[len(d):] for c in candidates if
705 candidates = set(c[len(d):] for c in candidates if
709 c.startswith(d))
706 c.startswith(d))
710 # self._dirs includes all of the directories, recursively, so if
707 # self._dirs includes all of the directories, recursively, so if
711 # we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',
708 # we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',
712 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
709 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
713 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
710 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
714 # immediate subdir will be in there without a slash.
711 # immediate subdir will be in there without a slash.
715 ret = {c for c in candidates if '/' not in c}
712 ret = {c for c in candidates if '/' not in c}
716 # We really do not expect ret to be empty, since that would imply that
713 # We really do not expect ret to be empty, since that would imply that
717 # there's something in _dirs that didn't have a file in _fileset.
714 # there's something in _dirs that didn't have a file in _fileset.
718 assert ret
715 assert ret
719 return ret
716 return ret
720
717
721 def isexact(self):
718 def isexact(self):
722 return True
719 return True
723
720
724 @encoding.strmethod
721 @encoding.strmethod
725 def __repr__(self):
722 def __repr__(self):
726 return ('<exactmatcher files=%r>' % self._files)
723 return ('<exactmatcher files=%r>' % self._files)
727
724
728 class differencematcher(basematcher):
725 class differencematcher(basematcher):
729 '''Composes two matchers by matching if the first matches and the second
726 '''Composes two matchers by matching if the first matches and the second
730 does not.
727 does not.
731
728
732 The second matcher's non-matching-attributes (bad, explicitdir,
729 The second matcher's non-matching-attributes (bad, explicitdir,
733 traversedir) are ignored.
730 traversedir) are ignored.
734 '''
731 '''
735 def __init__(self, m1, m2):
732 def __init__(self, m1, m2):
736 super(differencematcher, self).__init__()
733 super(differencematcher, self).__init__()
737 self._m1 = m1
734 self._m1 = m1
738 self._m2 = m2
735 self._m2 = m2
739 self.bad = m1.bad
736 self.bad = m1.bad
740 self.explicitdir = m1.explicitdir
737 self.explicitdir = m1.explicitdir
741 self.traversedir = m1.traversedir
738 self.traversedir = m1.traversedir
742
739
743 def matchfn(self, f):
740 def matchfn(self, f):
744 return self._m1(f) and not self._m2(f)
741 return self._m1(f) and not self._m2(f)
745
742
746 @propertycache
743 @propertycache
747 def _files(self):
744 def _files(self):
748 if self.isexact():
745 if self.isexact():
749 return [f for f in self._m1.files() if self(f)]
746 return [f for f in self._m1.files() if self(f)]
750 # If m1 is not an exact matcher, we can't easily figure out the set of
747 # If m1 is not an exact matcher, we can't easily figure out the set of
751 # files, because its files() are not always files. For example, if
748 # files, because its files() are not always files. For example, if
752 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
749 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
753 # want to remove "dir" from the set even though it would match m2,
750 # want to remove "dir" from the set even though it would match m2,
754 # because the "dir" in m1 may not be a file.
751 # because the "dir" in m1 may not be a file.
755 return self._m1.files()
752 return self._m1.files()
756
753
757 def visitdir(self, dir):
754 def visitdir(self, dir):
758 if self._m2.visitdir(dir) == 'all':
755 if self._m2.visitdir(dir) == 'all':
759 return False
756 return False
760 elif not self._m2.visitdir(dir):
757 elif not self._m2.visitdir(dir):
761 # m2 does not match dir, we can return 'all' here if possible
758 # m2 does not match dir, we can return 'all' here if possible
762 return self._m1.visitdir(dir)
759 return self._m1.visitdir(dir)
763 return bool(self._m1.visitdir(dir))
760 return bool(self._m1.visitdir(dir))
764
761
765 def visitchildrenset(self, dir):
762 def visitchildrenset(self, dir):
766 m2_set = self._m2.visitchildrenset(dir)
763 m2_set = self._m2.visitchildrenset(dir)
767 if m2_set == 'all':
764 if m2_set == 'all':
768 return set()
765 return set()
769 m1_set = self._m1.visitchildrenset(dir)
766 m1_set = self._m1.visitchildrenset(dir)
770 # Possible values for m1: 'all', 'this', set(...), set()
767 # Possible values for m1: 'all', 'this', set(...), set()
771 # Possible values for m2: 'this', set(...), set()
768 # Possible values for m2: 'this', set(...), set()
772 # If m2 has nothing under here that we care about, return m1, even if
769 # If m2 has nothing under here that we care about, return m1, even if
773 # it's 'all'. This is a change in behavior from visitdir, which would
770 # it's 'all'. This is a change in behavior from visitdir, which would
774 # return True, not 'all', for some reason.
771 # return True, not 'all', for some reason.
775 if not m2_set:
772 if not m2_set:
776 return m1_set
773 return m1_set
777 if m1_set in ['all', 'this']:
774 if m1_set in ['all', 'this']:
778 # Never return 'all' here if m2_set is any kind of non-empty (either
775 # Never return 'all' here if m2_set is any kind of non-empty (either
779 # 'this' or set(foo)), since m2 might return set() for a
776 # 'this' or set(foo)), since m2 might return set() for a
780 # subdirectory.
777 # subdirectory.
781 return 'this'
778 return 'this'
782 # Possible values for m1: set(...), set()
779 # Possible values for m1: set(...), set()
783 # Possible values for m2: 'this', set(...)
780 # Possible values for m2: 'this', set(...)
784 # We ignore m2's set results. They're possibly incorrect:
781 # We ignore m2's set results. They're possibly incorrect:
785 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):
782 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):
786 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
783 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
787 # return set(), which is *not* correct, we still need to visit 'dir'!
784 # return set(), which is *not* correct, we still need to visit 'dir'!
788 return m1_set
785 return m1_set
789
786
790 def isexact(self):
787 def isexact(self):
791 return self._m1.isexact()
788 return self._m1.isexact()
792
789
793 @encoding.strmethod
790 @encoding.strmethod
794 def __repr__(self):
791 def __repr__(self):
795 return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2))
792 return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2))
796
793
797 def intersectmatchers(m1, m2):
794 def intersectmatchers(m1, m2):
798 '''Composes two matchers by matching if both of them match.
795 '''Composes two matchers by matching if both of them match.
799
796
800 The second matcher's non-matching-attributes (bad, explicitdir,
797 The second matcher's non-matching-attributes (bad, explicitdir,
801 traversedir) are ignored.
798 traversedir) are ignored.
802 '''
799 '''
803 if m1 is None or m2 is None:
800 if m1 is None or m2 is None:
804 return m1 or m2
801 return m1 or m2
805 if m1.always():
802 if m1.always():
806 m = copy.copy(m2)
803 m = copy.copy(m2)
807 # TODO: Consider encapsulating these things in a class so there's only
804 # TODO: Consider encapsulating these things in a class so there's only
808 # one thing to copy from m1.
805 # one thing to copy from m1.
809 m.bad = m1.bad
806 m.bad = m1.bad
810 m.explicitdir = m1.explicitdir
807 m.explicitdir = m1.explicitdir
811 m.traversedir = m1.traversedir
808 m.traversedir = m1.traversedir
812 return m
809 return m
813 if m2.always():
810 if m2.always():
814 m = copy.copy(m1)
811 m = copy.copy(m1)
815 return m
812 return m
816 return intersectionmatcher(m1, m2)
813 return intersectionmatcher(m1, m2)
817
814
818 class intersectionmatcher(basematcher):
815 class intersectionmatcher(basematcher):
819 def __init__(self, m1, m2):
816 def __init__(self, m1, m2):
820 super(intersectionmatcher, self).__init__()
817 super(intersectionmatcher, self).__init__()
821 self._m1 = m1
818 self._m1 = m1
822 self._m2 = m2
819 self._m2 = m2
823 self.bad = m1.bad
820 self.bad = m1.bad
824 self.explicitdir = m1.explicitdir
821 self.explicitdir = m1.explicitdir
825 self.traversedir = m1.traversedir
822 self.traversedir = m1.traversedir
826
823
827 @propertycache
824 @propertycache
828 def _files(self):
825 def _files(self):
829 if self.isexact():
826 if self.isexact():
830 m1, m2 = self._m1, self._m2
827 m1, m2 = self._m1, self._m2
831 if not m1.isexact():
828 if not m1.isexact():
832 m1, m2 = m2, m1
829 m1, m2 = m2, m1
833 return [f for f in m1.files() if m2(f)]
830 return [f for f in m1.files() if m2(f)]
834 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
831 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
835 # the set of files, because their files() are not always files. For
832 # the set of files, because their files() are not always files. For
836 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
833 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
837 # "path:dir2", we don't want to remove "dir2" from the set.
834 # "path:dir2", we don't want to remove "dir2" from the set.
838 return self._m1.files() + self._m2.files()
835 return self._m1.files() + self._m2.files()
839
836
840 def matchfn(self, f):
837 def matchfn(self, f):
841 return self._m1(f) and self._m2(f)
838 return self._m1(f) and self._m2(f)
842
839
843 def visitdir(self, dir):
840 def visitdir(self, dir):
844 visit1 = self._m1.visitdir(dir)
841 visit1 = self._m1.visitdir(dir)
845 if visit1 == 'all':
842 if visit1 == 'all':
846 return self._m2.visitdir(dir)
843 return self._m2.visitdir(dir)
847 # bool() because visit1=True + visit2='all' should not be 'all'
844 # bool() because visit1=True + visit2='all' should not be 'all'
848 return bool(visit1 and self._m2.visitdir(dir))
845 return bool(visit1 and self._m2.visitdir(dir))
849
846
850 def visitchildrenset(self, dir):
847 def visitchildrenset(self, dir):
851 m1_set = self._m1.visitchildrenset(dir)
848 m1_set = self._m1.visitchildrenset(dir)
852 if not m1_set:
849 if not m1_set:
853 return set()
850 return set()
854 m2_set = self._m2.visitchildrenset(dir)
851 m2_set = self._m2.visitchildrenset(dir)
855 if not m2_set:
852 if not m2_set:
856 return set()
853 return set()
857
854
858 if m1_set == 'all':
855 if m1_set == 'all':
859 return m2_set
856 return m2_set
860 elif m2_set == 'all':
857 elif m2_set == 'all':
861 return m1_set
858 return m1_set
862
859
863 if m1_set == 'this' or m2_set == 'this':
860 if m1_set == 'this' or m2_set == 'this':
864 return 'this'
861 return 'this'
865
862
866 assert isinstance(m1_set, set) and isinstance(m2_set, set)
863 assert isinstance(m1_set, set) and isinstance(m2_set, set)
867 return m1_set.intersection(m2_set)
864 return m1_set.intersection(m2_set)
868
865
869 def always(self):
866 def always(self):
870 return self._m1.always() and self._m2.always()
867 return self._m1.always() and self._m2.always()
871
868
872 def isexact(self):
869 def isexact(self):
873 return self._m1.isexact() or self._m2.isexact()
870 return self._m1.isexact() or self._m2.isexact()
874
871
875 @encoding.strmethod
872 @encoding.strmethod
876 def __repr__(self):
873 def __repr__(self):
877 return ('<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2))
874 return ('<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2))
878
875
879 class subdirmatcher(basematcher):
876 class subdirmatcher(basematcher):
880 """Adapt a matcher to work on a subdirectory only.
877 """Adapt a matcher to work on a subdirectory only.
881
878
882 The paths are remapped to remove/insert the path as needed:
879 The paths are remapped to remove/insert the path as needed:
883
880
884 >>> from . import pycompat
881 >>> from . import pycompat
885 >>> m1 = match(b'root', b'', [b'a.txt', b'sub/b.txt'])
882 >>> m1 = match(b'root', b'', [b'a.txt', b'sub/b.txt'])
886 >>> m2 = subdirmatcher(b'sub', m1)
883 >>> m2 = subdirmatcher(b'sub', m1)
887 >>> m2(b'a.txt')
884 >>> m2(b'a.txt')
888 False
885 False
889 >>> m2(b'b.txt')
886 >>> m2(b'b.txt')
890 True
887 True
891 >>> m2.matchfn(b'a.txt')
888 >>> m2.matchfn(b'a.txt')
892 False
889 False
893 >>> m2.matchfn(b'b.txt')
890 >>> m2.matchfn(b'b.txt')
894 True
891 True
895 >>> m2.files()
892 >>> m2.files()
896 ['b.txt']
893 ['b.txt']
897 >>> m2.exact(b'b.txt')
894 >>> m2.exact(b'b.txt')
898 True
895 True
899 >>> def bad(f, msg):
896 >>> def bad(f, msg):
900 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
897 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
901 >>> m1.bad = bad
898 >>> m1.bad = bad
902 >>> m2.bad(b'x.txt', b'No such file')
899 >>> m2.bad(b'x.txt', b'No such file')
903 sub/x.txt: No such file
900 sub/x.txt: No such file
904 """
901 """
905
902
906 def __init__(self, path, matcher):
903 def __init__(self, path, matcher):
907 super(subdirmatcher, self).__init__()
904 super(subdirmatcher, self).__init__()
908 self._path = path
905 self._path = path
909 self._matcher = matcher
906 self._matcher = matcher
910 self._always = matcher.always()
907 self._always = matcher.always()
911
908
912 self._files = [f[len(path) + 1:] for f in matcher._files
909 self._files = [f[len(path) + 1:] for f in matcher._files
913 if f.startswith(path + "/")]
910 if f.startswith(path + "/")]
914
911
915 # If the parent repo had a path to this subrepo and the matcher is
912 # If the parent repo had a path to this subrepo and the matcher is
916 # a prefix matcher, this submatcher always matches.
913 # a prefix matcher, this submatcher always matches.
917 if matcher.prefix():
914 if matcher.prefix():
918 self._always = any(f == path for f in matcher._files)
915 self._always = any(f == path for f in matcher._files)
919
916
920 def bad(self, f, msg):
917 def bad(self, f, msg):
921 self._matcher.bad(self._path + "/" + f, msg)
918 self._matcher.bad(self._path + "/" + f, msg)
922
919
923 def matchfn(self, f):
920 def matchfn(self, f):
924 # Some information is lost in the superclass's constructor, so we
921 # Some information is lost in the superclass's constructor, so we
925 # can not accurately create the matching function for the subdirectory
922 # can not accurately create the matching function for the subdirectory
926 # from the inputs. Instead, we override matchfn() and visitdir() to
923 # from the inputs. Instead, we override matchfn() and visitdir() to
927 # call the original matcher with the subdirectory path prepended.
924 # call the original matcher with the subdirectory path prepended.
928 return self._matcher.matchfn(self._path + "/" + f)
925 return self._matcher.matchfn(self._path + "/" + f)
929
926
930 def visitdir(self, dir):
927 def visitdir(self, dir):
931 dir = normalizerootdir(dir, 'visitdir')
928 dir = normalizerootdir(dir, 'visitdir')
932 if dir == '':
929 if dir == '':
933 dir = self._path
930 dir = self._path
934 else:
931 else:
935 dir = self._path + "/" + dir
932 dir = self._path + "/" + dir
936 return self._matcher.visitdir(dir)
933 return self._matcher.visitdir(dir)
937
934
938 def visitchildrenset(self, dir):
935 def visitchildrenset(self, dir):
939 dir = normalizerootdir(dir, 'visitchildrenset')
936 dir = normalizerootdir(dir, 'visitchildrenset')
940 if dir == '':
937 if dir == '':
941 dir = self._path
938 dir = self._path
942 else:
939 else:
943 dir = self._path + "/" + dir
940 dir = self._path + "/" + dir
944 return self._matcher.visitchildrenset(dir)
941 return self._matcher.visitchildrenset(dir)
945
942
946 def always(self):
943 def always(self):
947 return self._always
944 return self._always
948
945
949 def prefix(self):
946 def prefix(self):
950 return self._matcher.prefix() and not self._always
947 return self._matcher.prefix() and not self._always
951
948
952 @encoding.strmethod
949 @encoding.strmethod
953 def __repr__(self):
950 def __repr__(self):
954 return ('<subdirmatcher path=%r, matcher=%r>' %
951 return ('<subdirmatcher path=%r, matcher=%r>' %
955 (self._path, self._matcher))
952 (self._path, self._matcher))
956
953
957 class prefixdirmatcher(basematcher):
954 class prefixdirmatcher(basematcher):
958 """Adapt a matcher to work on a parent directory.
955 """Adapt a matcher to work on a parent directory.
959
956
960 The matcher's non-matching-attributes (bad, explicitdir, traversedir) are
957 The matcher's non-matching-attributes (bad, explicitdir, traversedir) are
961 ignored.
958 ignored.
962
959
963 The prefix path should usually be the relative path from the root of
960 The prefix path should usually be the relative path from the root of
964 this matcher to the root of the wrapped matcher.
961 this matcher to the root of the wrapped matcher.
965
962
966 >>> m1 = match(util.localpath(b'root/d/e'), b'f', [b'../a.txt', b'b.txt'])
963 >>> m1 = match(util.localpath(b'root/d/e'), b'f', [b'../a.txt', b'b.txt'])
967 >>> m2 = prefixdirmatcher(b'd/e', m1)
964 >>> m2 = prefixdirmatcher(b'd/e', m1)
968 >>> m2(b'a.txt')
965 >>> m2(b'a.txt')
969 False
966 False
970 >>> m2(b'd/e/a.txt')
967 >>> m2(b'd/e/a.txt')
971 True
968 True
972 >>> m2(b'd/e/b.txt')
969 >>> m2(b'd/e/b.txt')
973 False
970 False
974 >>> m2.files()
971 >>> m2.files()
975 ['d/e/a.txt', 'd/e/f/b.txt']
972 ['d/e/a.txt', 'd/e/f/b.txt']
976 >>> m2.exact(b'd/e/a.txt')
973 >>> m2.exact(b'd/e/a.txt')
977 True
974 True
978 >>> m2.visitdir(b'd')
975 >>> m2.visitdir(b'd')
979 True
976 True
980 >>> m2.visitdir(b'd/e')
977 >>> m2.visitdir(b'd/e')
981 True
978 True
982 >>> m2.visitdir(b'd/e/f')
979 >>> m2.visitdir(b'd/e/f')
983 True
980 True
984 >>> m2.visitdir(b'd/e/g')
981 >>> m2.visitdir(b'd/e/g')
985 False
982 False
986 >>> m2.visitdir(b'd/ef')
983 >>> m2.visitdir(b'd/ef')
987 False
984 False
988 """
985 """
989
986
990 def __init__(self, path, matcher, badfn=None):
987 def __init__(self, path, matcher, badfn=None):
991 super(prefixdirmatcher, self).__init__(badfn)
988 super(prefixdirmatcher, self).__init__(badfn)
992 if not path:
989 if not path:
993 raise error.ProgrammingError('prefix path must not be empty')
990 raise error.ProgrammingError('prefix path must not be empty')
994 self._path = path
991 self._path = path
995 self._pathprefix = path + '/'
992 self._pathprefix = path + '/'
996 self._matcher = matcher
993 self._matcher = matcher
997
994
998 @propertycache
995 @propertycache
999 def _files(self):
996 def _files(self):
1000 return [self._pathprefix + f for f in self._matcher._files]
997 return [self._pathprefix + f for f in self._matcher._files]
1001
998
1002 def matchfn(self, f):
999 def matchfn(self, f):
1003 if not f.startswith(self._pathprefix):
1000 if not f.startswith(self._pathprefix):
1004 return False
1001 return False
1005 return self._matcher.matchfn(f[len(self._pathprefix):])
1002 return self._matcher.matchfn(f[len(self._pathprefix):])
1006
1003
1007 @propertycache
1004 @propertycache
1008 def _pathdirs(self):
1005 def _pathdirs(self):
1009 return set(util.finddirs(self._path))
1006 return set(util.finddirs(self._path))
1010
1007
1011 def visitdir(self, dir):
1008 def visitdir(self, dir):
1012 if dir == self._path:
1009 if dir == self._path:
1013 return self._matcher.visitdir('')
1010 return self._matcher.visitdir('')
1014 if dir.startswith(self._pathprefix):
1011 if dir.startswith(self._pathprefix):
1015 return self._matcher.visitdir(dir[len(self._pathprefix):])
1012 return self._matcher.visitdir(dir[len(self._pathprefix):])
1016 return dir in self._pathdirs
1013 return dir in self._pathdirs
1017
1014
1018 def visitchildrenset(self, dir):
1015 def visitchildrenset(self, dir):
1019 if dir == self._path:
1016 if dir == self._path:
1020 return self._matcher.visitchildrenset('')
1017 return self._matcher.visitchildrenset('')
1021 if dir.startswith(self._pathprefix):
1018 if dir.startswith(self._pathprefix):
1022 return self._matcher.visitchildrenset(dir[len(self._pathprefix):])
1019 return self._matcher.visitchildrenset(dir[len(self._pathprefix):])
1023 if dir in self._pathdirs:
1020 if dir in self._pathdirs:
1024 return 'this'
1021 return 'this'
1025 return set()
1022 return set()
1026
1023
1027 def isexact(self):
1024 def isexact(self):
1028 return self._matcher.isexact()
1025 return self._matcher.isexact()
1029
1026
1030 def prefix(self):
1027 def prefix(self):
1031 return self._matcher.prefix()
1028 return self._matcher.prefix()
1032
1029
1033 @encoding.strmethod
1030 @encoding.strmethod
1034 def __repr__(self):
1031 def __repr__(self):
1035 return ('<prefixdirmatcher path=%r, matcher=%r>'
1032 return ('<prefixdirmatcher path=%r, matcher=%r>'
1036 % (pycompat.bytestr(self._path), self._matcher))
1033 % (pycompat.bytestr(self._path), self._matcher))
1037
1034
1038 class unionmatcher(basematcher):
1035 class unionmatcher(basematcher):
1039 """A matcher that is the union of several matchers.
1036 """A matcher that is the union of several matchers.
1040
1037
1041 The non-matching-attributes (bad, explicitdir, traversedir) are taken from
1038 The non-matching-attributes (bad, explicitdir, traversedir) are taken from
1042 the first matcher.
1039 the first matcher.
1043 """
1040 """
1044
1041
1045 def __init__(self, matchers):
1042 def __init__(self, matchers):
1046 m1 = matchers[0]
1043 m1 = matchers[0]
1047 super(unionmatcher, self).__init__()
1044 super(unionmatcher, self).__init__()
1048 self.explicitdir = m1.explicitdir
1045 self.explicitdir = m1.explicitdir
1049 self.traversedir = m1.traversedir
1046 self.traversedir = m1.traversedir
1050 self._matchers = matchers
1047 self._matchers = matchers
1051
1048
1052 def matchfn(self, f):
1049 def matchfn(self, f):
1053 for match in self._matchers:
1050 for match in self._matchers:
1054 if match(f):
1051 if match(f):
1055 return True
1052 return True
1056 return False
1053 return False
1057
1054
1058 def visitdir(self, dir):
1055 def visitdir(self, dir):
1059 r = False
1056 r = False
1060 for m in self._matchers:
1057 for m in self._matchers:
1061 v = m.visitdir(dir)
1058 v = m.visitdir(dir)
1062 if v == 'all':
1059 if v == 'all':
1063 return v
1060 return v
1064 r |= v
1061 r |= v
1065 return r
1062 return r
1066
1063
1067 def visitchildrenset(self, dir):
1064 def visitchildrenset(self, dir):
1068 r = set()
1065 r = set()
1069 this = False
1066 this = False
1070 for m in self._matchers:
1067 for m in self._matchers:
1071 v = m.visitchildrenset(dir)
1068 v = m.visitchildrenset(dir)
1072 if not v:
1069 if not v:
1073 continue
1070 continue
1074 if v == 'all':
1071 if v == 'all':
1075 return v
1072 return v
1076 if this or v == 'this':
1073 if this or v == 'this':
1077 this = True
1074 this = True
1078 # don't break, we might have an 'all' in here.
1075 # don't break, we might have an 'all' in here.
1079 continue
1076 continue
1080 assert isinstance(v, set)
1077 assert isinstance(v, set)
1081 r = r.union(v)
1078 r = r.union(v)
1082 if this:
1079 if this:
1083 return 'this'
1080 return 'this'
1084 return r
1081 return r
1085
1082
1086 @encoding.strmethod
1083 @encoding.strmethod
1087 def __repr__(self):
1084 def __repr__(self):
1088 return ('<unionmatcher matchers=%r>' % self._matchers)
1085 return ('<unionmatcher matchers=%r>' % self._matchers)
1089
1086
1090 def patkind(pattern, default=None):
1087 def patkind(pattern, default=None):
1091 '''If pattern is 'kind:pat' with a known kind, return kind.
1088 '''If pattern is 'kind:pat' with a known kind, return kind.
1092
1089
1093 >>> patkind(br're:.*\.c$')
1090 >>> patkind(br're:.*\.c$')
1094 're'
1091 're'
1095 >>> patkind(b'glob:*.c')
1092 >>> patkind(b'glob:*.c')
1096 'glob'
1093 'glob'
1097 >>> patkind(b'relpath:test.py')
1094 >>> patkind(b'relpath:test.py')
1098 'relpath'
1095 'relpath'
1099 >>> patkind(b'main.py')
1096 >>> patkind(b'main.py')
1100 >>> patkind(b'main.py', default=b're')
1097 >>> patkind(b'main.py', default=b're')
1101 're'
1098 're'
1102 '''
1099 '''
1103 return _patsplit(pattern, default)[0]
1100 return _patsplit(pattern, default)[0]
1104
1101
1105 def _patsplit(pattern, default):
1102 def _patsplit(pattern, default):
1106 """Split a string into the optional pattern kind prefix and the actual
1103 """Split a string into the optional pattern kind prefix and the actual
1107 pattern."""
1104 pattern."""
1108 if ':' in pattern:
1105 if ':' in pattern:
1109 kind, pat = pattern.split(':', 1)
1106 kind, pat = pattern.split(':', 1)
1110 if kind in allpatternkinds:
1107 if kind in allpatternkinds:
1111 return kind, pat
1108 return kind, pat
1112 return default, pattern
1109 return default, pattern
1113
1110
1114 def _globre(pat):
1111 def _globre(pat):
1115 r'''Convert an extended glob string to a regexp string.
1112 r'''Convert an extended glob string to a regexp string.
1116
1113
1117 >>> from . import pycompat
1114 >>> from . import pycompat
1118 >>> def bprint(s):
1115 >>> def bprint(s):
1119 ... print(pycompat.sysstr(s))
1116 ... print(pycompat.sysstr(s))
1120 >>> bprint(_globre(br'?'))
1117 >>> bprint(_globre(br'?'))
1121 .
1118 .
1122 >>> bprint(_globre(br'*'))
1119 >>> bprint(_globre(br'*'))
1123 [^/]*
1120 [^/]*
1124 >>> bprint(_globre(br'**'))
1121 >>> bprint(_globre(br'**'))
1125 .*
1122 .*
1126 >>> bprint(_globre(br'**/a'))
1123 >>> bprint(_globre(br'**/a'))
1127 (?:.*/)?a
1124 (?:.*/)?a
1128 >>> bprint(_globre(br'a/**/b'))
1125 >>> bprint(_globre(br'a/**/b'))
1129 a/(?:.*/)?b
1126 a/(?:.*/)?b
1130 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1127 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1131 [a*?!^][\^b][^c]
1128 [a*?!^][\^b][^c]
1132 >>> bprint(_globre(br'{a,b}'))
1129 >>> bprint(_globre(br'{a,b}'))
1133 (?:a|b)
1130 (?:a|b)
1134 >>> bprint(_globre(br'.\*\?'))
1131 >>> bprint(_globre(br'.\*\?'))
1135 \.\*\?
1132 \.\*\?
1136 '''
1133 '''
1137 i, n = 0, len(pat)
1134 i, n = 0, len(pat)
1138 res = ''
1135 res = ''
1139 group = 0
1136 group = 0
1140 escape = util.stringutil.regexbytesescapemap.get
1137 escape = util.stringutil.regexbytesescapemap.get
1141 def peek():
1138 def peek():
1142 return i < n and pat[i:i + 1]
1139 return i < n and pat[i:i + 1]
1143 while i < n:
1140 while i < n:
1144 c = pat[i:i + 1]
1141 c = pat[i:i + 1]
1145 i += 1
1142 i += 1
1146 if c not in '*?[{},\\':
1143 if c not in '*?[{},\\':
1147 res += escape(c, c)
1144 res += escape(c, c)
1148 elif c == '*':
1145 elif c == '*':
1149 if peek() == '*':
1146 if peek() == '*':
1150 i += 1
1147 i += 1
1151 if peek() == '/':
1148 if peek() == '/':
1152 i += 1
1149 i += 1
1153 res += '(?:.*/)?'
1150 res += '(?:.*/)?'
1154 else:
1151 else:
1155 res += '.*'
1152 res += '.*'
1156 else:
1153 else:
1157 res += '[^/]*'
1154 res += '[^/]*'
1158 elif c == '?':
1155 elif c == '?':
1159 res += '.'
1156 res += '.'
1160 elif c == '[':
1157 elif c == '[':
1161 j = i
1158 j = i
1162 if j < n and pat[j:j + 1] in '!]':
1159 if j < n and pat[j:j + 1] in '!]':
1163 j += 1
1160 j += 1
1164 while j < n and pat[j:j + 1] != ']':
1161 while j < n and pat[j:j + 1] != ']':
1165 j += 1
1162 j += 1
1166 if j >= n:
1163 if j >= n:
1167 res += '\\['
1164 res += '\\['
1168 else:
1165 else:
1169 stuff = pat[i:j].replace('\\','\\\\')
1166 stuff = pat[i:j].replace('\\','\\\\')
1170 i = j + 1
1167 i = j + 1
1171 if stuff[0:1] == '!':
1168 if stuff[0:1] == '!':
1172 stuff = '^' + stuff[1:]
1169 stuff = '^' + stuff[1:]
1173 elif stuff[0:1] == '^':
1170 elif stuff[0:1] == '^':
1174 stuff = '\\' + stuff
1171 stuff = '\\' + stuff
1175 res = '%s[%s]' % (res, stuff)
1172 res = '%s[%s]' % (res, stuff)
1176 elif c == '{':
1173 elif c == '{':
1177 group += 1
1174 group += 1
1178 res += '(?:'
1175 res += '(?:'
1179 elif c == '}' and group:
1176 elif c == '}' and group:
1180 res += ')'
1177 res += ')'
1181 group -= 1
1178 group -= 1
1182 elif c == ',' and group:
1179 elif c == ',' and group:
1183 res += '|'
1180 res += '|'
1184 elif c == '\\':
1181 elif c == '\\':
1185 p = peek()
1182 p = peek()
1186 if p:
1183 if p:
1187 i += 1
1184 i += 1
1188 res += escape(p, p)
1185 res += escape(p, p)
1189 else:
1186 else:
1190 res += escape(c, c)
1187 res += escape(c, c)
1191 else:
1188 else:
1192 res += escape(c, c)
1189 res += escape(c, c)
1193 return res
1190 return res
1194
1191
1195 def _regex(kind, pat, globsuffix):
1192 def _regex(kind, pat, globsuffix):
1196 '''Convert a (normalized) pattern of any kind into a
1193 '''Convert a (normalized) pattern of any kind into a
1197 regular expression.
1194 regular expression.
1198 globsuffix is appended to the regexp of globs.'''
1195 globsuffix is appended to the regexp of globs.'''
1199
1196
1200 if rustext is not None:
1197 if rustmod is not None:
1201 try:
1198 try:
1202 return rustext.filepatterns.build_single_regex(
1199 return rustmod.build_single_regex(
1203 kind,
1200 kind,
1204 pat,
1201 pat,
1205 globsuffix
1202 globsuffix
1206 )
1203 )
1207 except rustext.filepatterns.PatternError:
1204 except rustmod.PatternError:
1208 raise error.ProgrammingError(
1205 raise error.ProgrammingError(
1209 'not a regex pattern: %s:%s' % (kind, pat)
1206 'not a regex pattern: %s:%s' % (kind, pat)
1210 )
1207 )
1211
1208
1212 if not pat and kind in ('glob', 'relpath'):
1209 if not pat and kind in ('glob', 'relpath'):
1213 return ''
1210 return ''
1214 if kind == 're':
1211 if kind == 're':
1215 return pat
1212 return pat
1216 if kind in ('path', 'relpath'):
1213 if kind in ('path', 'relpath'):
1217 if pat == '.':
1214 if pat == '.':
1218 return ''
1215 return ''
1219 return util.stringutil.reescape(pat) + '(?:/|$)'
1216 return util.stringutil.reescape(pat) + '(?:/|$)'
1220 if kind == 'rootfilesin':
1217 if kind == 'rootfilesin':
1221 if pat == '.':
1218 if pat == '.':
1222 escaped = ''
1219 escaped = ''
1223 else:
1220 else:
1224 # Pattern is a directory name.
1221 # Pattern is a directory name.
1225 escaped = util.stringutil.reescape(pat) + '/'
1222 escaped = util.stringutil.reescape(pat) + '/'
1226 # Anything after the pattern must be a non-directory.
1223 # Anything after the pattern must be a non-directory.
1227 return escaped + '[^/]+$'
1224 return escaped + '[^/]+$'
1228 if kind == 'relglob':
1225 if kind == 'relglob':
1229 return '(?:|.*/)' + _globre(pat) + globsuffix
1226 return '(?:|.*/)' + _globre(pat) + globsuffix
1230 if kind == 'relre':
1227 if kind == 'relre':
1231 if pat.startswith('^'):
1228 if pat.startswith('^'):
1232 return pat
1229 return pat
1233 return '.*' + pat
1230 return '.*' + pat
1234 if kind in ('glob', 'rootglob'):
1231 if kind in ('glob', 'rootglob'):
1235 return _globre(pat) + globsuffix
1232 return _globre(pat) + globsuffix
1236 raise error.ProgrammingError('not a regex pattern: %s:%s' % (kind, pat))
1233 raise error.ProgrammingError('not a regex pattern: %s:%s' % (kind, pat))
1237
1234
1238 def _buildmatch(kindpats, globsuffix, root):
1235 def _buildmatch(kindpats, globsuffix, root):
1239 '''Return regexp string and a matcher function for kindpats.
1236 '''Return regexp string and a matcher function for kindpats.
1240 globsuffix is appended to the regexp of globs.'''
1237 globsuffix is appended to the regexp of globs.'''
1241 matchfuncs = []
1238 matchfuncs = []
1242
1239
1243 subincludes, kindpats = _expandsubinclude(kindpats, root)
1240 subincludes, kindpats = _expandsubinclude(kindpats, root)
1244 if subincludes:
1241 if subincludes:
1245 submatchers = {}
1242 submatchers = {}
1246 def matchsubinclude(f):
1243 def matchsubinclude(f):
1247 for prefix, matcherargs in subincludes:
1244 for prefix, matcherargs in subincludes:
1248 if f.startswith(prefix):
1245 if f.startswith(prefix):
1249 mf = submatchers.get(prefix)
1246 mf = submatchers.get(prefix)
1250 if mf is None:
1247 if mf is None:
1251 mf = match(*matcherargs)
1248 mf = match(*matcherargs)
1252 submatchers[prefix] = mf
1249 submatchers[prefix] = mf
1253
1250
1254 if mf(f[len(prefix):]):
1251 if mf(f[len(prefix):]):
1255 return True
1252 return True
1256 return False
1253 return False
1257 matchfuncs.append(matchsubinclude)
1254 matchfuncs.append(matchsubinclude)
1258
1255
1259 regex = ''
1256 regex = ''
1260 if kindpats:
1257 if kindpats:
1261 if all(k == 'rootfilesin' for k, p, s in kindpats):
1258 if all(k == 'rootfilesin' for k, p, s in kindpats):
1262 dirs = {p for k, p, s in kindpats}
1259 dirs = {p for k, p, s in kindpats}
1263 def mf(f):
1260 def mf(f):
1264 i = f.rfind('/')
1261 i = f.rfind('/')
1265 if i >= 0:
1262 if i >= 0:
1266 dir = f[:i]
1263 dir = f[:i]
1267 else:
1264 else:
1268 dir = '.'
1265 dir = '.'
1269 return dir in dirs
1266 return dir in dirs
1270 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1267 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1271 matchfuncs.append(mf)
1268 matchfuncs.append(mf)
1272 else:
1269 else:
1273 regex, mf = _buildregexmatch(kindpats, globsuffix)
1270 regex, mf = _buildregexmatch(kindpats, globsuffix)
1274 matchfuncs.append(mf)
1271 matchfuncs.append(mf)
1275
1272
1276 if len(matchfuncs) == 1:
1273 if len(matchfuncs) == 1:
1277 return regex, matchfuncs[0]
1274 return regex, matchfuncs[0]
1278 else:
1275 else:
1279 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1276 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1280
1277
1281 MAX_RE_SIZE = 20000
1278 MAX_RE_SIZE = 20000
1282
1279
1283 def _joinregexes(regexps):
1280 def _joinregexes(regexps):
1284 """gather multiple regular expressions into a single one"""
1281 """gather multiple regular expressions into a single one"""
1285 return '|'.join(regexps)
1282 return '|'.join(regexps)
1286
1283
1287 def _buildregexmatch(kindpats, globsuffix):
1284 def _buildregexmatch(kindpats, globsuffix):
1288 """Build a match function from a list of kinds and kindpats,
1285 """Build a match function from a list of kinds and kindpats,
1289 return regexp string and a matcher function.
1286 return regexp string and a matcher function.
1290
1287
1291 Test too large input
1288 Test too large input
1292 >>> _buildregexmatch([
1289 >>> _buildregexmatch([
1293 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1290 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1294 ... ], b'$')
1291 ... ], b'$')
1295 Traceback (most recent call last):
1292 Traceback (most recent call last):
1296 ...
1293 ...
1297 Abort: matcher pattern is too long (20009 bytes)
1294 Abort: matcher pattern is too long (20009 bytes)
1298 """
1295 """
1299 try:
1296 try:
1300 allgroups = []
1297 allgroups = []
1301 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1298 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1302 fullregexp = _joinregexes(regexps)
1299 fullregexp = _joinregexes(regexps)
1303
1300
1304 startidx = 0
1301 startidx = 0
1305 groupsize = 0
1302 groupsize = 0
1306 for idx, r in enumerate(regexps):
1303 for idx, r in enumerate(regexps):
1307 piecesize = len(r)
1304 piecesize = len(r)
1308 if piecesize > MAX_RE_SIZE:
1305 if piecesize > MAX_RE_SIZE:
1309 msg = _("matcher pattern is too long (%d bytes)") % piecesize
1306 msg = _("matcher pattern is too long (%d bytes)") % piecesize
1310 raise error.Abort(msg)
1307 raise error.Abort(msg)
1311 elif (groupsize + piecesize) > MAX_RE_SIZE:
1308 elif (groupsize + piecesize) > MAX_RE_SIZE:
1312 group = regexps[startidx:idx]
1309 group = regexps[startidx:idx]
1313 allgroups.append(_joinregexes(group))
1310 allgroups.append(_joinregexes(group))
1314 startidx = idx
1311 startidx = idx
1315 groupsize = 0
1312 groupsize = 0
1316 groupsize += piecesize + 1
1313 groupsize += piecesize + 1
1317
1314
1318 if startidx == 0:
1315 if startidx == 0:
1319 matcher = _rematcher(fullregexp)
1316 matcher = _rematcher(fullregexp)
1320 func = lambda s: bool(matcher(s))
1317 func = lambda s: bool(matcher(s))
1321 else:
1318 else:
1322 group = regexps[startidx:]
1319 group = regexps[startidx:]
1323 allgroups.append(_joinregexes(group))
1320 allgroups.append(_joinregexes(group))
1324 allmatchers = [_rematcher(g) for g in allgroups]
1321 allmatchers = [_rematcher(g) for g in allgroups]
1325 func = lambda s: any(m(s) for m in allmatchers)
1322 func = lambda s: any(m(s) for m in allmatchers)
1326 return fullregexp, func
1323 return fullregexp, func
1327 except re.error:
1324 except re.error:
1328 for k, p, s in kindpats:
1325 for k, p, s in kindpats:
1329 try:
1326 try:
1330 _rematcher(_regex(k, p, globsuffix))
1327 _rematcher(_regex(k, p, globsuffix))
1331 except re.error:
1328 except re.error:
1332 if s:
1329 if s:
1333 raise error.Abort(_("%s: invalid pattern (%s): %s") %
1330 raise error.Abort(_("%s: invalid pattern (%s): %s") %
1334 (s, k, p))
1331 (s, k, p))
1335 else:
1332 else:
1336 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
1333 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
1337 raise error.Abort(_("invalid pattern"))
1334 raise error.Abort(_("invalid pattern"))
1338
1335
1339 def _patternrootsanddirs(kindpats):
1336 def _patternrootsanddirs(kindpats):
1340 '''Returns roots and directories corresponding to each pattern.
1337 '''Returns roots and directories corresponding to each pattern.
1341
1338
1342 This calculates the roots and directories exactly matching the patterns and
1339 This calculates the roots and directories exactly matching the patterns and
1343 returns a tuple of (roots, dirs) for each. It does not return other
1340 returns a tuple of (roots, dirs) for each. It does not return other
1344 directories which may also need to be considered, like the parent
1341 directories which may also need to be considered, like the parent
1345 directories.
1342 directories.
1346 '''
1343 '''
1347 r = []
1344 r = []
1348 d = []
1345 d = []
1349 for kind, pat, source in kindpats:
1346 for kind, pat, source in kindpats:
1350 if kind in ('glob', 'rootglob'): # find the non-glob prefix
1347 if kind in ('glob', 'rootglob'): # find the non-glob prefix
1351 root = []
1348 root = []
1352 for p in pat.split('/'):
1349 for p in pat.split('/'):
1353 if '[' in p or '{' in p or '*' in p or '?' in p:
1350 if '[' in p or '{' in p or '*' in p or '?' in p:
1354 break
1351 break
1355 root.append(p)
1352 root.append(p)
1356 r.append('/'.join(root))
1353 r.append('/'.join(root))
1357 elif kind in ('relpath', 'path'):
1354 elif kind in ('relpath', 'path'):
1358 if pat == '.':
1355 if pat == '.':
1359 pat = ''
1356 pat = ''
1360 r.append(pat)
1357 r.append(pat)
1361 elif kind in ('rootfilesin',):
1358 elif kind in ('rootfilesin',):
1362 if pat == '.':
1359 if pat == '.':
1363 pat = ''
1360 pat = ''
1364 d.append(pat)
1361 d.append(pat)
1365 else: # relglob, re, relre
1362 else: # relglob, re, relre
1366 r.append('')
1363 r.append('')
1367 return r, d
1364 return r, d
1368
1365
1369 def _roots(kindpats):
1366 def _roots(kindpats):
1370 '''Returns root directories to match recursively from the given patterns.'''
1367 '''Returns root directories to match recursively from the given patterns.'''
1371 roots, dirs = _patternrootsanddirs(kindpats)
1368 roots, dirs = _patternrootsanddirs(kindpats)
1372 return roots
1369 return roots
1373
1370
1374 def _rootsdirsandparents(kindpats):
1371 def _rootsdirsandparents(kindpats):
1375 '''Returns roots and exact directories from patterns.
1372 '''Returns roots and exact directories from patterns.
1376
1373
1377 `roots` are directories to match recursively, `dirs` should
1374 `roots` are directories to match recursively, `dirs` should
1378 be matched non-recursively, and `parents` are the implicitly required
1375 be matched non-recursively, and `parents` are the implicitly required
1379 directories to walk to items in either roots or dirs.
1376 directories to walk to items in either roots or dirs.
1380
1377
1381 Returns a tuple of (roots, dirs, parents).
1378 Returns a tuple of (roots, dirs, parents).
1382
1379
1383 >>> r = _rootsdirsandparents(
1380 >>> r = _rootsdirsandparents(
1384 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1381 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1385 ... (b'glob', b'g*', b'')])
1382 ... (b'glob', b'g*', b'')])
1386 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1383 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1387 (['g/h', 'g/h', ''], []) ['', 'g']
1384 (['g/h', 'g/h', ''], []) ['', 'g']
1388 >>> r = _rootsdirsandparents(
1385 >>> r = _rootsdirsandparents(
1389 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1386 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1390 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1387 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1391 ([], ['g/h', '']) ['', 'g']
1388 ([], ['g/h', '']) ['', 'g']
1392 >>> r = _rootsdirsandparents(
1389 >>> r = _rootsdirsandparents(
1393 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1390 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1394 ... (b'path', b'', b'')])
1391 ... (b'path', b'', b'')])
1395 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1392 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1396 (['r', 'p/p', ''], []) ['', 'p']
1393 (['r', 'p/p', ''], []) ['', 'p']
1397 >>> r = _rootsdirsandparents(
1394 >>> r = _rootsdirsandparents(
1398 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1395 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1399 ... (b'relre', b'rr', b'')])
1396 ... (b'relre', b'rr', b'')])
1400 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1397 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1401 (['', '', ''], []) ['']
1398 (['', '', ''], []) ['']
1402 '''
1399 '''
1403 r, d = _patternrootsanddirs(kindpats)
1400 r, d = _patternrootsanddirs(kindpats)
1404
1401
1405 p = set()
1402 p = set()
1406 # Add the parents as non-recursive/exact directories, since they must be
1403 # Add the parents as non-recursive/exact directories, since they must be
1407 # scanned to get to either the roots or the other exact directories.
1404 # scanned to get to either the roots or the other exact directories.
1408 p.update(util.dirs(d))
1405 p.update(util.dirs(d))
1409 p.update(util.dirs(r))
1406 p.update(util.dirs(r))
1410
1407
1411 # FIXME: all uses of this function convert these to sets, do so before
1408 # FIXME: all uses of this function convert these to sets, do so before
1412 # returning.
1409 # returning.
1413 # FIXME: all uses of this function do not need anything in 'roots' and
1410 # FIXME: all uses of this function do not need anything in 'roots' and
1414 # 'dirs' to also be in 'parents', consider removing them before returning.
1411 # 'dirs' to also be in 'parents', consider removing them before returning.
1415 return r, d, p
1412 return r, d, p
1416
1413
1417 def _explicitfiles(kindpats):
1414 def _explicitfiles(kindpats):
1418 '''Returns the potential explicit filenames from the patterns.
1415 '''Returns the potential explicit filenames from the patterns.
1419
1416
1420 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1417 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1421 ['foo/bar']
1418 ['foo/bar']
1422 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1419 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1423 []
1420 []
1424 '''
1421 '''
1425 # Keep only the pattern kinds where one can specify filenames (vs only
1422 # Keep only the pattern kinds where one can specify filenames (vs only
1426 # directory names).
1423 # directory names).
1427 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
1424 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
1428 return _roots(filable)
1425 return _roots(filable)
1429
1426
1430 def _prefix(kindpats):
1427 def _prefix(kindpats):
1431 '''Whether all the patterns match a prefix (i.e. recursively)'''
1428 '''Whether all the patterns match a prefix (i.e. recursively)'''
1432 for kind, pat, source in kindpats:
1429 for kind, pat, source in kindpats:
1433 if kind not in ('path', 'relpath'):
1430 if kind not in ('path', 'relpath'):
1434 return False
1431 return False
1435 return True
1432 return True
1436
1433
1437 _commentre = None
1434 _commentre = None
1438
1435
1439 def readpatternfile(filepath, warn, sourceinfo=False):
1436 def readpatternfile(filepath, warn, sourceinfo=False):
1440 '''parse a pattern file, returning a list of
1437 '''parse a pattern file, returning a list of
1441 patterns. These patterns should be given to compile()
1438 patterns. These patterns should be given to compile()
1442 to be validated and converted into a match function.
1439 to be validated and converted into a match function.
1443
1440
1444 trailing white space is dropped.
1441 trailing white space is dropped.
1445 the escape character is backslash.
1442 the escape character is backslash.
1446 comments start with #.
1443 comments start with #.
1447 empty lines are skipped.
1444 empty lines are skipped.
1448
1445
1449 lines can be of the following formats:
1446 lines can be of the following formats:
1450
1447
1451 syntax: regexp # defaults following lines to non-rooted regexps
1448 syntax: regexp # defaults following lines to non-rooted regexps
1452 syntax: glob # defaults following lines to non-rooted globs
1449 syntax: glob # defaults following lines to non-rooted globs
1453 re:pattern # non-rooted regular expression
1450 re:pattern # non-rooted regular expression
1454 glob:pattern # non-rooted glob
1451 glob:pattern # non-rooted glob
1455 rootglob:pat # rooted glob (same root as ^ in regexps)
1452 rootglob:pat # rooted glob (same root as ^ in regexps)
1456 pattern # pattern of the current default type
1453 pattern # pattern of the current default type
1457
1454
1458 if sourceinfo is set, returns a list of tuples:
1455 if sourceinfo is set, returns a list of tuples:
1459 (pattern, lineno, originalline).
1456 (pattern, lineno, originalline).
1460 This is useful to debug ignore patterns.
1457 This is useful to debug ignore patterns.
1461 '''
1458 '''
1462
1459
1463 if rustext is not None:
1460 if rustmod is not None:
1464 result, warnings = rustext.filepatterns.read_pattern_file(
1461 result, warnings = rustmod.read_pattern_file(
1465 filepath,
1462 filepath,
1466 bool(warn),
1463 bool(warn),
1467 sourceinfo,
1464 sourceinfo,
1468 )
1465 )
1469
1466
1470 for warning_params in warnings:
1467 for warning_params in warnings:
1471 # Can't be easily emitted from Rust, because it would require
1468 # Can't be easily emitted from Rust, because it would require
1472 # a mechanism for both gettext and calling the `warn` function.
1469 # a mechanism for both gettext and calling the `warn` function.
1473 warn(_("%s: ignoring invalid syntax '%s'\n") % warning_params)
1470 warn(_("%s: ignoring invalid syntax '%s'\n") % warning_params)
1474
1471
1475 return result
1472 return result
1476
1473
1477 syntaxes = {
1474 syntaxes = {
1478 're': 'relre:',
1475 're': 'relre:',
1479 'regexp': 'relre:',
1476 'regexp': 'relre:',
1480 'glob': 'relglob:',
1477 'glob': 'relglob:',
1481 'rootglob': 'rootglob:',
1478 'rootglob': 'rootglob:',
1482 'include': 'include',
1479 'include': 'include',
1483 'subinclude': 'subinclude',
1480 'subinclude': 'subinclude',
1484 }
1481 }
1485 syntax = 'relre:'
1482 syntax = 'relre:'
1486 patterns = []
1483 patterns = []
1487
1484
1488 fp = open(filepath, 'rb')
1485 fp = open(filepath, 'rb')
1489 for lineno, line in enumerate(util.iterfile(fp), start=1):
1486 for lineno, line in enumerate(util.iterfile(fp), start=1):
1490 if "#" in line:
1487 if "#" in line:
1491 global _commentre
1488 global _commentre
1492 if not _commentre:
1489 if not _commentre:
1493 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1490 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1494 # remove comments prefixed by an even number of escapes
1491 # remove comments prefixed by an even number of escapes
1495 m = _commentre.search(line)
1492 m = _commentre.search(line)
1496 if m:
1493 if m:
1497 line = line[:m.end(1)]
1494 line = line[:m.end(1)]
1498 # fixup properly escaped comments that survived the above
1495 # fixup properly escaped comments that survived the above
1499 line = line.replace("\\#", "#")
1496 line = line.replace("\\#", "#")
1500 line = line.rstrip()
1497 line = line.rstrip()
1501 if not line:
1498 if not line:
1502 continue
1499 continue
1503
1500
1504 if line.startswith('syntax:'):
1501 if line.startswith('syntax:'):
1505 s = line[7:].strip()
1502 s = line[7:].strip()
1506 try:
1503 try:
1507 syntax = syntaxes[s]
1504 syntax = syntaxes[s]
1508 except KeyError:
1505 except KeyError:
1509 if warn:
1506 if warn:
1510 warn(_("%s: ignoring invalid syntax '%s'\n") %
1507 warn(_("%s: ignoring invalid syntax '%s'\n") %
1511 (filepath, s))
1508 (filepath, s))
1512 continue
1509 continue
1513
1510
1514 linesyntax = syntax
1511 linesyntax = syntax
1515 for s, rels in syntaxes.iteritems():
1512 for s, rels in syntaxes.iteritems():
1516 if line.startswith(rels):
1513 if line.startswith(rels):
1517 linesyntax = rels
1514 linesyntax = rels
1518 line = line[len(rels):]
1515 line = line[len(rels):]
1519 break
1516 break
1520 elif line.startswith(s+':'):
1517 elif line.startswith(s+':'):
1521 linesyntax = rels
1518 linesyntax = rels
1522 line = line[len(s) + 1:]
1519 line = line[len(s) + 1:]
1523 break
1520 break
1524 if sourceinfo:
1521 if sourceinfo:
1525 patterns.append((linesyntax + line, lineno, line))
1522 patterns.append((linesyntax + line, lineno, line))
1526 else:
1523 else:
1527 patterns.append(linesyntax + line)
1524 patterns.append(linesyntax + line)
1528 fp.close()
1525 fp.close()
1529 return patterns
1526 return patterns
@@ -1,2687 +1,2684
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import os
19 import os
20 import struct
20 import struct
21 import zlib
21 import zlib
22
22
23 # import stuff from node for others to import from revlog
23 # import stuff from node for others to import from revlog
24 from .node import (
24 from .node import (
25 bin,
25 bin,
26 hex,
26 hex,
27 nullhex,
27 nullhex,
28 nullid,
28 nullid,
29 nullrev,
29 nullrev,
30 short,
30 short,
31 wdirfilenodeids,
31 wdirfilenodeids,
32 wdirhex,
32 wdirhex,
33 wdirid,
33 wdirid,
34 wdirrev,
34 wdirrev,
35 )
35 )
36 from .i18n import _
36 from .i18n import _
37 from .revlogutils.constants import (
37 from .revlogutils.constants import (
38 FLAG_GENERALDELTA,
38 FLAG_GENERALDELTA,
39 FLAG_INLINE_DATA,
39 FLAG_INLINE_DATA,
40 REVIDX_DEFAULT_FLAGS,
40 REVIDX_DEFAULT_FLAGS,
41 REVIDX_ELLIPSIS,
41 REVIDX_ELLIPSIS,
42 REVIDX_EXTSTORED,
42 REVIDX_EXTSTORED,
43 REVIDX_FLAGS_ORDER,
43 REVIDX_FLAGS_ORDER,
44 REVIDX_ISCENSORED,
44 REVIDX_ISCENSORED,
45 REVIDX_KNOWN_FLAGS,
45 REVIDX_KNOWN_FLAGS,
46 REVIDX_RAWTEXT_CHANGING_FLAGS,
46 REVIDX_RAWTEXT_CHANGING_FLAGS,
47 REVLOGV0,
47 REVLOGV0,
48 REVLOGV1,
48 REVLOGV1,
49 REVLOGV1_FLAGS,
49 REVLOGV1_FLAGS,
50 REVLOGV2,
50 REVLOGV2,
51 REVLOGV2_FLAGS,
51 REVLOGV2_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
53 REVLOG_DEFAULT_FORMAT,
53 REVLOG_DEFAULT_FORMAT,
54 REVLOG_DEFAULT_VERSION,
54 REVLOG_DEFAULT_VERSION,
55 )
55 )
56 from .thirdparty import (
56 from .thirdparty import (
57 attr,
57 attr,
58 )
58 )
59 from . import (
59 from . import (
60 ancestor,
60 ancestor,
61 dagop,
61 dagop,
62 error,
62 error,
63 mdiff,
63 mdiff,
64 policy,
64 policy,
65 pycompat,
65 pycompat,
66 repository,
66 repository,
67 templatefilters,
67 templatefilters,
68 util,
68 util,
69 )
69 )
70 from .revlogutils import (
70 from .revlogutils import (
71 deltas as deltautil,
71 deltas as deltautil,
72 )
72 )
73 from .utils import (
73 from .utils import (
74 interfaceutil,
74 interfaceutil,
75 storageutil,
75 storageutil,
76 stringutil,
76 stringutil,
77 )
77 )
78
78
79 # blanked usage of all the name to prevent pyflakes constraints
79 # blanked usage of all the name to prevent pyflakes constraints
80 # We need these name available in the module for extensions.
80 # We need these name available in the module for extensions.
81 REVLOGV0
81 REVLOGV0
82 REVLOGV1
82 REVLOGV1
83 REVLOGV2
83 REVLOGV2
84 FLAG_INLINE_DATA
84 FLAG_INLINE_DATA
85 FLAG_GENERALDELTA
85 FLAG_GENERALDELTA
86 REVLOG_DEFAULT_FLAGS
86 REVLOG_DEFAULT_FLAGS
87 REVLOG_DEFAULT_FORMAT
87 REVLOG_DEFAULT_FORMAT
88 REVLOG_DEFAULT_VERSION
88 REVLOG_DEFAULT_VERSION
89 REVLOGV1_FLAGS
89 REVLOGV1_FLAGS
90 REVLOGV2_FLAGS
90 REVLOGV2_FLAGS
91 REVIDX_ISCENSORED
91 REVIDX_ISCENSORED
92 REVIDX_ELLIPSIS
92 REVIDX_ELLIPSIS
93 REVIDX_EXTSTORED
93 REVIDX_EXTSTORED
94 REVIDX_DEFAULT_FLAGS
94 REVIDX_DEFAULT_FLAGS
95 REVIDX_FLAGS_ORDER
95 REVIDX_FLAGS_ORDER
96 REVIDX_KNOWN_FLAGS
96 REVIDX_KNOWN_FLAGS
97 REVIDX_RAWTEXT_CHANGING_FLAGS
97 REVIDX_RAWTEXT_CHANGING_FLAGS
98
98
99 parsers = policy.importmod(r'parsers')
99 parsers = policy.importmod(r'parsers')
100 try:
100 rustancestor = policy.importrust(r'ancestor')
101 from . import rustext
101 rustdagop = policy.importrust(r'dagop')
102 rustext.__name__ # force actual import (see hgdemandimport)
103 except ImportError:
104 rustext = None
105
102
106 # Aliased for performance.
103 # Aliased for performance.
107 _zlibdecompress = zlib.decompress
104 _zlibdecompress = zlib.decompress
108
105
109 # max size of revlog with inline data
106 # max size of revlog with inline data
110 _maxinline = 131072
107 _maxinline = 131072
111 _chunksize = 1048576
108 _chunksize = 1048576
112
109
113 # Store flag processors (cf. 'addflagprocessor()' to register)
110 # Store flag processors (cf. 'addflagprocessor()' to register)
114 _flagprocessors = {
111 _flagprocessors = {
115 REVIDX_ISCENSORED: None,
112 REVIDX_ISCENSORED: None,
116 }
113 }
117
114
118 # Flag processors for REVIDX_ELLIPSIS.
115 # Flag processors for REVIDX_ELLIPSIS.
119 def ellipsisreadprocessor(rl, text):
116 def ellipsisreadprocessor(rl, text):
120 return text, False
117 return text, False
121
118
122 def ellipsiswriteprocessor(rl, text):
119 def ellipsiswriteprocessor(rl, text):
123 return text, False
120 return text, False
124
121
125 def ellipsisrawprocessor(rl, text):
122 def ellipsisrawprocessor(rl, text):
126 return False
123 return False
127
124
128 ellipsisprocessor = (
125 ellipsisprocessor = (
129 ellipsisreadprocessor,
126 ellipsisreadprocessor,
130 ellipsiswriteprocessor,
127 ellipsiswriteprocessor,
131 ellipsisrawprocessor,
128 ellipsisrawprocessor,
132 )
129 )
133
130
134 def addflagprocessor(flag, processor):
131 def addflagprocessor(flag, processor):
135 """Register a flag processor on a revision data flag.
132 """Register a flag processor on a revision data flag.
136
133
137 Invariant:
134 Invariant:
138 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
135 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
139 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
136 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
140 - Only one flag processor can be registered on a specific flag.
137 - Only one flag processor can be registered on a specific flag.
141 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
138 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
142 following signatures:
139 following signatures:
143 - (read) f(self, rawtext) -> text, bool
140 - (read) f(self, rawtext) -> text, bool
144 - (write) f(self, text) -> rawtext, bool
141 - (write) f(self, text) -> rawtext, bool
145 - (raw) f(self, rawtext) -> bool
142 - (raw) f(self, rawtext) -> bool
146 "text" is presented to the user. "rawtext" is stored in revlog data, not
143 "text" is presented to the user. "rawtext" is stored in revlog data, not
147 directly visible to the user.
144 directly visible to the user.
148 The boolean returned by these transforms is used to determine whether
145 The boolean returned by these transforms is used to determine whether
149 the returned text can be used for hash integrity checking. For example,
146 the returned text can be used for hash integrity checking. For example,
150 if "write" returns False, then "text" is used to generate hash. If
147 if "write" returns False, then "text" is used to generate hash. If
151 "write" returns True, that basically means "rawtext" returned by "write"
148 "write" returns True, that basically means "rawtext" returned by "write"
152 should be used to generate hash. Usually, "write" and "read" return
149 should be used to generate hash. Usually, "write" and "read" return
153 different booleans. And "raw" returns a same boolean as "write".
150 different booleans. And "raw" returns a same boolean as "write".
154
151
155 Note: The 'raw' transform is used for changegroup generation and in some
152 Note: The 'raw' transform is used for changegroup generation and in some
156 debug commands. In this case the transform only indicates whether the
153 debug commands. In this case the transform only indicates whether the
157 contents can be used for hash integrity checks.
154 contents can be used for hash integrity checks.
158 """
155 """
159 _insertflagprocessor(flag, processor, _flagprocessors)
156 _insertflagprocessor(flag, processor, _flagprocessors)
160
157
161 def _insertflagprocessor(flag, processor, flagprocessors):
158 def _insertflagprocessor(flag, processor, flagprocessors):
162 if not flag & REVIDX_KNOWN_FLAGS:
159 if not flag & REVIDX_KNOWN_FLAGS:
163 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
160 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
164 raise error.ProgrammingError(msg)
161 raise error.ProgrammingError(msg)
165 if flag not in REVIDX_FLAGS_ORDER:
162 if flag not in REVIDX_FLAGS_ORDER:
166 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
163 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
167 raise error.ProgrammingError(msg)
164 raise error.ProgrammingError(msg)
168 if flag in flagprocessors:
165 if flag in flagprocessors:
169 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
166 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
170 raise error.Abort(msg)
167 raise error.Abort(msg)
171 flagprocessors[flag] = processor
168 flagprocessors[flag] = processor
172
169
173 def getoffset(q):
170 def getoffset(q):
174 return int(q >> 16)
171 return int(q >> 16)
175
172
176 def gettype(q):
173 def gettype(q):
177 return int(q & 0xFFFF)
174 return int(q & 0xFFFF)
178
175
179 def offset_type(offset, type):
176 def offset_type(offset, type):
180 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
177 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
181 raise ValueError('unknown revlog index flags')
178 raise ValueError('unknown revlog index flags')
182 return int(int(offset) << 16 | type)
179 return int(int(offset) << 16 | type)
183
180
184 @attr.s(slots=True, frozen=True)
181 @attr.s(slots=True, frozen=True)
185 class _revisioninfo(object):
182 class _revisioninfo(object):
186 """Information about a revision that allows building its fulltext
183 """Information about a revision that allows building its fulltext
187 node: expected hash of the revision
184 node: expected hash of the revision
188 p1, p2: parent revs of the revision
185 p1, p2: parent revs of the revision
189 btext: built text cache consisting of a one-element list
186 btext: built text cache consisting of a one-element list
190 cachedelta: (baserev, uncompressed_delta) or None
187 cachedelta: (baserev, uncompressed_delta) or None
191 flags: flags associated to the revision storage
188 flags: flags associated to the revision storage
192
189
193 One of btext[0] or cachedelta must be set.
190 One of btext[0] or cachedelta must be set.
194 """
191 """
195 node = attr.ib()
192 node = attr.ib()
196 p1 = attr.ib()
193 p1 = attr.ib()
197 p2 = attr.ib()
194 p2 = attr.ib()
198 btext = attr.ib()
195 btext = attr.ib()
199 textlen = attr.ib()
196 textlen = attr.ib()
200 cachedelta = attr.ib()
197 cachedelta = attr.ib()
201 flags = attr.ib()
198 flags = attr.ib()
202
199
203 @interfaceutil.implementer(repository.irevisiondelta)
200 @interfaceutil.implementer(repository.irevisiondelta)
204 @attr.s(slots=True)
201 @attr.s(slots=True)
205 class revlogrevisiondelta(object):
202 class revlogrevisiondelta(object):
206 node = attr.ib()
203 node = attr.ib()
207 p1node = attr.ib()
204 p1node = attr.ib()
208 p2node = attr.ib()
205 p2node = attr.ib()
209 basenode = attr.ib()
206 basenode = attr.ib()
210 flags = attr.ib()
207 flags = attr.ib()
211 baserevisionsize = attr.ib()
208 baserevisionsize = attr.ib()
212 revision = attr.ib()
209 revision = attr.ib()
213 delta = attr.ib()
210 delta = attr.ib()
214 linknode = attr.ib(default=None)
211 linknode = attr.ib(default=None)
215
212
216 @interfaceutil.implementer(repository.iverifyproblem)
213 @interfaceutil.implementer(repository.iverifyproblem)
217 @attr.s(frozen=True)
214 @attr.s(frozen=True)
218 class revlogproblem(object):
215 class revlogproblem(object):
219 warning = attr.ib(default=None)
216 warning = attr.ib(default=None)
220 error = attr.ib(default=None)
217 error = attr.ib(default=None)
221 node = attr.ib(default=None)
218 node = attr.ib(default=None)
222
219
223 # index v0:
220 # index v0:
224 # 4 bytes: offset
221 # 4 bytes: offset
225 # 4 bytes: compressed length
222 # 4 bytes: compressed length
226 # 4 bytes: base rev
223 # 4 bytes: base rev
227 # 4 bytes: link rev
224 # 4 bytes: link rev
228 # 20 bytes: parent 1 nodeid
225 # 20 bytes: parent 1 nodeid
229 # 20 bytes: parent 2 nodeid
226 # 20 bytes: parent 2 nodeid
230 # 20 bytes: nodeid
227 # 20 bytes: nodeid
231 indexformatv0 = struct.Struct(">4l20s20s20s")
228 indexformatv0 = struct.Struct(">4l20s20s20s")
232 indexformatv0_pack = indexformatv0.pack
229 indexformatv0_pack = indexformatv0.pack
233 indexformatv0_unpack = indexformatv0.unpack
230 indexformatv0_unpack = indexformatv0.unpack
234
231
235 class revlogoldindex(list):
232 class revlogoldindex(list):
236 def __getitem__(self, i):
233 def __getitem__(self, i):
237 if i == -1:
234 if i == -1:
238 return (0, 0, 0, -1, -1, -1, -1, nullid)
235 return (0, 0, 0, -1, -1, -1, -1, nullid)
239 return list.__getitem__(self, i)
236 return list.__getitem__(self, i)
240
237
241 class revlogoldio(object):
238 class revlogoldio(object):
242 def __init__(self):
239 def __init__(self):
243 self.size = indexformatv0.size
240 self.size = indexformatv0.size
244
241
245 def parseindex(self, data, inline):
242 def parseindex(self, data, inline):
246 s = self.size
243 s = self.size
247 index = []
244 index = []
248 nodemap = {nullid: nullrev}
245 nodemap = {nullid: nullrev}
249 n = off = 0
246 n = off = 0
250 l = len(data)
247 l = len(data)
251 while off + s <= l:
248 while off + s <= l:
252 cur = data[off:off + s]
249 cur = data[off:off + s]
253 off += s
250 off += s
254 e = indexformatv0_unpack(cur)
251 e = indexformatv0_unpack(cur)
255 # transform to revlogv1 format
252 # transform to revlogv1 format
256 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
253 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
257 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
254 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
258 index.append(e2)
255 index.append(e2)
259 nodemap[e[6]] = n
256 nodemap[e[6]] = n
260 n += 1
257 n += 1
261
258
262 return revlogoldindex(index), nodemap, None
259 return revlogoldindex(index), nodemap, None
263
260
264 def packentry(self, entry, node, version, rev):
261 def packentry(self, entry, node, version, rev):
265 if gettype(entry[0]):
262 if gettype(entry[0]):
266 raise error.RevlogError(_('index entry flags need revlog '
263 raise error.RevlogError(_('index entry flags need revlog '
267 'version 1'))
264 'version 1'))
268 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
265 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
269 node(entry[5]), node(entry[6]), entry[7])
266 node(entry[5]), node(entry[6]), entry[7])
270 return indexformatv0_pack(*e2)
267 return indexformatv0_pack(*e2)
271
268
272 # index ng:
269 # index ng:
273 # 6 bytes: offset
270 # 6 bytes: offset
274 # 2 bytes: flags
271 # 2 bytes: flags
275 # 4 bytes: compressed length
272 # 4 bytes: compressed length
276 # 4 bytes: uncompressed length
273 # 4 bytes: uncompressed length
277 # 4 bytes: base rev
274 # 4 bytes: base rev
278 # 4 bytes: link rev
275 # 4 bytes: link rev
279 # 4 bytes: parent 1 rev
276 # 4 bytes: parent 1 rev
280 # 4 bytes: parent 2 rev
277 # 4 bytes: parent 2 rev
281 # 32 bytes: nodeid
278 # 32 bytes: nodeid
282 indexformatng = struct.Struct(">Qiiiiii20s12x")
279 indexformatng = struct.Struct(">Qiiiiii20s12x")
283 indexformatng_pack = indexformatng.pack
280 indexformatng_pack = indexformatng.pack
284 versionformat = struct.Struct(">I")
281 versionformat = struct.Struct(">I")
285 versionformat_pack = versionformat.pack
282 versionformat_pack = versionformat.pack
286 versionformat_unpack = versionformat.unpack
283 versionformat_unpack = versionformat.unpack
287
284
288 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
285 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
289 # signed integer)
286 # signed integer)
290 _maxentrysize = 0x7fffffff
287 _maxentrysize = 0x7fffffff
291
288
292 class revlogio(object):
289 class revlogio(object):
293 def __init__(self):
290 def __init__(self):
294 self.size = indexformatng.size
291 self.size = indexformatng.size
295
292
296 def parseindex(self, data, inline):
293 def parseindex(self, data, inline):
297 # call the C implementation to parse the index data
294 # call the C implementation to parse the index data
298 index, cache = parsers.parse_index2(data, inline)
295 index, cache = parsers.parse_index2(data, inline)
299 return index, getattr(index, 'nodemap', None), cache
296 return index, getattr(index, 'nodemap', None), cache
300
297
301 def packentry(self, entry, node, version, rev):
298 def packentry(self, entry, node, version, rev):
302 p = indexformatng_pack(*entry)
299 p = indexformatng_pack(*entry)
303 if rev == 0:
300 if rev == 0:
304 p = versionformat_pack(version) + p[4:]
301 p = versionformat_pack(version) + p[4:]
305 return p
302 return p
306
303
307 class revlog(object):
304 class revlog(object):
308 """
305 """
309 the underlying revision storage object
306 the underlying revision storage object
310
307
311 A revlog consists of two parts, an index and the revision data.
308 A revlog consists of two parts, an index and the revision data.
312
309
313 The index is a file with a fixed record size containing
310 The index is a file with a fixed record size containing
314 information on each revision, including its nodeid (hash), the
311 information on each revision, including its nodeid (hash), the
315 nodeids of its parents, the position and offset of its data within
312 nodeids of its parents, the position and offset of its data within
316 the data file, and the revision it's based on. Finally, each entry
313 the data file, and the revision it's based on. Finally, each entry
317 contains a linkrev entry that can serve as a pointer to external
314 contains a linkrev entry that can serve as a pointer to external
318 data.
315 data.
319
316
320 The revision data itself is a linear collection of data chunks.
317 The revision data itself is a linear collection of data chunks.
321 Each chunk represents a revision and is usually represented as a
318 Each chunk represents a revision and is usually represented as a
322 delta against the previous chunk. To bound lookup time, runs of
319 delta against the previous chunk. To bound lookup time, runs of
323 deltas are limited to about 2 times the length of the original
320 deltas are limited to about 2 times the length of the original
324 version data. This makes retrieval of a version proportional to
321 version data. This makes retrieval of a version proportional to
325 its size, or O(1) relative to the number of revisions.
322 its size, or O(1) relative to the number of revisions.
326
323
327 Both pieces of the revlog are written to in an append-only
324 Both pieces of the revlog are written to in an append-only
328 fashion, which means we never need to rewrite a file to insert or
325 fashion, which means we never need to rewrite a file to insert or
329 remove data, and can use some simple techniques to avoid the need
326 remove data, and can use some simple techniques to avoid the need
330 for locking while reading.
327 for locking while reading.
331
328
332 If checkambig, indexfile is opened with checkambig=True at
329 If checkambig, indexfile is opened with checkambig=True at
333 writing, to avoid file stat ambiguity.
330 writing, to avoid file stat ambiguity.
334
331
335 If mmaplargeindex is True, and an mmapindexthreshold is set, the
332 If mmaplargeindex is True, and an mmapindexthreshold is set, the
336 index will be mmapped rather than read if it is larger than the
333 index will be mmapped rather than read if it is larger than the
337 configured threshold.
334 configured threshold.
338
335
339 If censorable is True, the revlog can have censored revisions.
336 If censorable is True, the revlog can have censored revisions.
340 """
337 """
341 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
338 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
342 mmaplargeindex=False, censorable=False):
339 mmaplargeindex=False, censorable=False):
343 """
340 """
344 create a revlog object
341 create a revlog object
345
342
346 opener is a function that abstracts the file opening operation
343 opener is a function that abstracts the file opening operation
347 and can be used to implement COW semantics or the like.
344 and can be used to implement COW semantics or the like.
348 """
345 """
349 self.indexfile = indexfile
346 self.indexfile = indexfile
350 self.datafile = datafile or (indexfile[:-2] + ".d")
347 self.datafile = datafile or (indexfile[:-2] + ".d")
351 self.opener = opener
348 self.opener = opener
352 # When True, indexfile is opened with checkambig=True at writing, to
349 # When True, indexfile is opened with checkambig=True at writing, to
353 # avoid file stat ambiguity.
350 # avoid file stat ambiguity.
354 self._checkambig = checkambig
351 self._checkambig = checkambig
355 self._mmaplargeindex = mmaplargeindex
352 self._mmaplargeindex = mmaplargeindex
356 self._censorable = censorable
353 self._censorable = censorable
357 # 3-tuple of (node, rev, text) for a raw revision.
354 # 3-tuple of (node, rev, text) for a raw revision.
358 self._revisioncache = None
355 self._revisioncache = None
359 # Maps rev to chain base rev.
356 # Maps rev to chain base rev.
360 self._chainbasecache = util.lrucachedict(100)
357 self._chainbasecache = util.lrucachedict(100)
361 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
358 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
362 self._chunkcache = (0, '')
359 self._chunkcache = (0, '')
363 # How much data to read and cache into the raw revlog data cache.
360 # How much data to read and cache into the raw revlog data cache.
364 self._chunkcachesize = 65536
361 self._chunkcachesize = 65536
365 self._maxchainlen = None
362 self._maxchainlen = None
366 self._deltabothparents = True
363 self._deltabothparents = True
367 self.index = []
364 self.index = []
368 # Mapping of partial identifiers to full nodes.
365 # Mapping of partial identifiers to full nodes.
369 self._pcache = {}
366 self._pcache = {}
370 # Mapping of revision integer to full node.
367 # Mapping of revision integer to full node.
371 self._nodecache = {nullid: nullrev}
368 self._nodecache = {nullid: nullrev}
372 self._nodepos = None
369 self._nodepos = None
373 self._compengine = 'zlib'
370 self._compengine = 'zlib'
374 self._compengineopts = {}
371 self._compengineopts = {}
375 self._maxdeltachainspan = -1
372 self._maxdeltachainspan = -1
376 self._withsparseread = False
373 self._withsparseread = False
377 self._sparserevlog = False
374 self._sparserevlog = False
378 self._srdensitythreshold = 0.50
375 self._srdensitythreshold = 0.50
379 self._srmingapsize = 262144
376 self._srmingapsize = 262144
380
377
381 # Make copy of flag processors so each revlog instance can support
378 # Make copy of flag processors so each revlog instance can support
382 # custom flags.
379 # custom flags.
383 self._flagprocessors = dict(_flagprocessors)
380 self._flagprocessors = dict(_flagprocessors)
384
381
385 # 2-tuple of file handles being used for active writing.
382 # 2-tuple of file handles being used for active writing.
386 self._writinghandles = None
383 self._writinghandles = None
387
384
388 self._loadindex()
385 self._loadindex()
389
386
390 def _loadindex(self):
387 def _loadindex(self):
391 mmapindexthreshold = None
388 mmapindexthreshold = None
392 opts = getattr(self.opener, 'options', {}) or {}
389 opts = getattr(self.opener, 'options', {}) or {}
393
390
394 if 'revlogv2' in opts:
391 if 'revlogv2' in opts:
395 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
392 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
396 elif 'revlogv1' in opts:
393 elif 'revlogv1' in opts:
397 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
394 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
398 if 'generaldelta' in opts:
395 if 'generaldelta' in opts:
399 newversionflags |= FLAG_GENERALDELTA
396 newversionflags |= FLAG_GENERALDELTA
400 elif getattr(self.opener, 'options', None) is not None:
397 elif getattr(self.opener, 'options', None) is not None:
401 # If options provided but no 'revlog*' found, the repository
398 # If options provided but no 'revlog*' found, the repository
402 # would have no 'requires' file in it, which means we have to
399 # would have no 'requires' file in it, which means we have to
403 # stick to the old format.
400 # stick to the old format.
404 newversionflags = REVLOGV0
401 newversionflags = REVLOGV0
405 else:
402 else:
406 newversionflags = REVLOG_DEFAULT_VERSION
403 newversionflags = REVLOG_DEFAULT_VERSION
407
404
408 if 'chunkcachesize' in opts:
405 if 'chunkcachesize' in opts:
409 self._chunkcachesize = opts['chunkcachesize']
406 self._chunkcachesize = opts['chunkcachesize']
410 if 'maxchainlen' in opts:
407 if 'maxchainlen' in opts:
411 self._maxchainlen = opts['maxchainlen']
408 self._maxchainlen = opts['maxchainlen']
412 if 'deltabothparents' in opts:
409 if 'deltabothparents' in opts:
413 self._deltabothparents = opts['deltabothparents']
410 self._deltabothparents = opts['deltabothparents']
414 self._lazydelta = bool(opts.get('lazydelta', True))
411 self._lazydelta = bool(opts.get('lazydelta', True))
415 self._lazydeltabase = False
412 self._lazydeltabase = False
416 if self._lazydelta:
413 if self._lazydelta:
417 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
414 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
418 if 'compengine' in opts:
415 if 'compengine' in opts:
419 self._compengine = opts['compengine']
416 self._compengine = opts['compengine']
420 if 'zlib.level' in opts:
417 if 'zlib.level' in opts:
421 self._compengineopts['zlib.level'] = opts['zlib.level']
418 self._compengineopts['zlib.level'] = opts['zlib.level']
422 if 'zstd.level' in opts:
419 if 'zstd.level' in opts:
423 self._compengineopts['zstd.level'] = opts['zstd.level']
420 self._compengineopts['zstd.level'] = opts['zstd.level']
424 if 'maxdeltachainspan' in opts:
421 if 'maxdeltachainspan' in opts:
425 self._maxdeltachainspan = opts['maxdeltachainspan']
422 self._maxdeltachainspan = opts['maxdeltachainspan']
426 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
423 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
427 mmapindexthreshold = opts['mmapindexthreshold']
424 mmapindexthreshold = opts['mmapindexthreshold']
428 self._sparserevlog = bool(opts.get('sparse-revlog', False))
425 self._sparserevlog = bool(opts.get('sparse-revlog', False))
429 withsparseread = bool(opts.get('with-sparse-read', False))
426 withsparseread = bool(opts.get('with-sparse-read', False))
430 # sparse-revlog forces sparse-read
427 # sparse-revlog forces sparse-read
431 self._withsparseread = self._sparserevlog or withsparseread
428 self._withsparseread = self._sparserevlog or withsparseread
432 if 'sparse-read-density-threshold' in opts:
429 if 'sparse-read-density-threshold' in opts:
433 self._srdensitythreshold = opts['sparse-read-density-threshold']
430 self._srdensitythreshold = opts['sparse-read-density-threshold']
434 if 'sparse-read-min-gap-size' in opts:
431 if 'sparse-read-min-gap-size' in opts:
435 self._srmingapsize = opts['sparse-read-min-gap-size']
432 self._srmingapsize = opts['sparse-read-min-gap-size']
436 if opts.get('enableellipsis'):
433 if opts.get('enableellipsis'):
437 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
434 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
438
435
439 # revlog v0 doesn't have flag processors
436 # revlog v0 doesn't have flag processors
440 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
437 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
441 _insertflagprocessor(flag, processor, self._flagprocessors)
438 _insertflagprocessor(flag, processor, self._flagprocessors)
442
439
443 if self._chunkcachesize <= 0:
440 if self._chunkcachesize <= 0:
444 raise error.RevlogError(_('revlog chunk cache size %r is not '
441 raise error.RevlogError(_('revlog chunk cache size %r is not '
445 'greater than 0') % self._chunkcachesize)
442 'greater than 0') % self._chunkcachesize)
446 elif self._chunkcachesize & (self._chunkcachesize - 1):
443 elif self._chunkcachesize & (self._chunkcachesize - 1):
447 raise error.RevlogError(_('revlog chunk cache size %r is not a '
444 raise error.RevlogError(_('revlog chunk cache size %r is not a '
448 'power of 2') % self._chunkcachesize)
445 'power of 2') % self._chunkcachesize)
449
446
450 indexdata = ''
447 indexdata = ''
451 self._initempty = True
448 self._initempty = True
452 try:
449 try:
453 with self._indexfp() as f:
450 with self._indexfp() as f:
454 if (mmapindexthreshold is not None and
451 if (mmapindexthreshold is not None and
455 self.opener.fstat(f).st_size >= mmapindexthreshold):
452 self.opener.fstat(f).st_size >= mmapindexthreshold):
456 # TODO: should .close() to release resources without
453 # TODO: should .close() to release resources without
457 # relying on Python GC
454 # relying on Python GC
458 indexdata = util.buffer(util.mmapread(f))
455 indexdata = util.buffer(util.mmapread(f))
459 else:
456 else:
460 indexdata = f.read()
457 indexdata = f.read()
461 if len(indexdata) > 0:
458 if len(indexdata) > 0:
462 versionflags = versionformat_unpack(indexdata[:4])[0]
459 versionflags = versionformat_unpack(indexdata[:4])[0]
463 self._initempty = False
460 self._initempty = False
464 else:
461 else:
465 versionflags = newversionflags
462 versionflags = newversionflags
466 except IOError as inst:
463 except IOError as inst:
467 if inst.errno != errno.ENOENT:
464 if inst.errno != errno.ENOENT:
468 raise
465 raise
469
466
470 versionflags = newversionflags
467 versionflags = newversionflags
471
468
472 self.version = versionflags
469 self.version = versionflags
473
470
474 flags = versionflags & ~0xFFFF
471 flags = versionflags & ~0xFFFF
475 fmt = versionflags & 0xFFFF
472 fmt = versionflags & 0xFFFF
476
473
477 if fmt == REVLOGV0:
474 if fmt == REVLOGV0:
478 if flags:
475 if flags:
479 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
476 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
480 'revlog %s') %
477 'revlog %s') %
481 (flags >> 16, fmt, self.indexfile))
478 (flags >> 16, fmt, self.indexfile))
482
479
483 self._inline = False
480 self._inline = False
484 self._generaldelta = False
481 self._generaldelta = False
485
482
486 elif fmt == REVLOGV1:
483 elif fmt == REVLOGV1:
487 if flags & ~REVLOGV1_FLAGS:
484 if flags & ~REVLOGV1_FLAGS:
488 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
485 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
489 'revlog %s') %
486 'revlog %s') %
490 (flags >> 16, fmt, self.indexfile))
487 (flags >> 16, fmt, self.indexfile))
491
488
492 self._inline = versionflags & FLAG_INLINE_DATA
489 self._inline = versionflags & FLAG_INLINE_DATA
493 self._generaldelta = versionflags & FLAG_GENERALDELTA
490 self._generaldelta = versionflags & FLAG_GENERALDELTA
494
491
495 elif fmt == REVLOGV2:
492 elif fmt == REVLOGV2:
496 if flags & ~REVLOGV2_FLAGS:
493 if flags & ~REVLOGV2_FLAGS:
497 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
494 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
498 'revlog %s') %
495 'revlog %s') %
499 (flags >> 16, fmt, self.indexfile))
496 (flags >> 16, fmt, self.indexfile))
500
497
501 self._inline = versionflags & FLAG_INLINE_DATA
498 self._inline = versionflags & FLAG_INLINE_DATA
502 # generaldelta implied by version 2 revlogs.
499 # generaldelta implied by version 2 revlogs.
503 self._generaldelta = True
500 self._generaldelta = True
504
501
505 else:
502 else:
506 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
503 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
507 (fmt, self.indexfile))
504 (fmt, self.indexfile))
508 # sparse-revlog can't be on without general-delta (issue6056)
505 # sparse-revlog can't be on without general-delta (issue6056)
509 if not self._generaldelta:
506 if not self._generaldelta:
510 self._sparserevlog = False
507 self._sparserevlog = False
511
508
512 self._storedeltachains = True
509 self._storedeltachains = True
513
510
514 self._io = revlogio()
511 self._io = revlogio()
515 if self.version == REVLOGV0:
512 if self.version == REVLOGV0:
516 self._io = revlogoldio()
513 self._io = revlogoldio()
517 try:
514 try:
518 d = self._io.parseindex(indexdata, self._inline)
515 d = self._io.parseindex(indexdata, self._inline)
519 except (ValueError, IndexError):
516 except (ValueError, IndexError):
520 raise error.RevlogError(_("index %s is corrupted") %
517 raise error.RevlogError(_("index %s is corrupted") %
521 self.indexfile)
518 self.indexfile)
522 self.index, nodemap, self._chunkcache = d
519 self.index, nodemap, self._chunkcache = d
523 if nodemap is not None:
520 if nodemap is not None:
524 self.nodemap = self._nodecache = nodemap
521 self.nodemap = self._nodecache = nodemap
525 if not self._chunkcache:
522 if not self._chunkcache:
526 self._chunkclear()
523 self._chunkclear()
527 # revnum -> (chain-length, sum-delta-length)
524 # revnum -> (chain-length, sum-delta-length)
528 self._chaininfocache = {}
525 self._chaininfocache = {}
529 # revlog header -> revlog compressor
526 # revlog header -> revlog compressor
530 self._decompressors = {}
527 self._decompressors = {}
531
528
532 @util.propertycache
529 @util.propertycache
533 def _compressor(self):
530 def _compressor(self):
534 engine = util.compengines[self._compengine]
531 engine = util.compengines[self._compengine]
535 return engine.revlogcompressor(self._compengineopts)
532 return engine.revlogcompressor(self._compengineopts)
536
533
537 def _indexfp(self, mode='r'):
534 def _indexfp(self, mode='r'):
538 """file object for the revlog's index file"""
535 """file object for the revlog's index file"""
539 args = {r'mode': mode}
536 args = {r'mode': mode}
540 if mode != 'r':
537 if mode != 'r':
541 args[r'checkambig'] = self._checkambig
538 args[r'checkambig'] = self._checkambig
542 if mode == 'w':
539 if mode == 'w':
543 args[r'atomictemp'] = True
540 args[r'atomictemp'] = True
544 return self.opener(self.indexfile, **args)
541 return self.opener(self.indexfile, **args)
545
542
546 def _datafp(self, mode='r'):
543 def _datafp(self, mode='r'):
547 """file object for the revlog's data file"""
544 """file object for the revlog's data file"""
548 return self.opener(self.datafile, mode=mode)
545 return self.opener(self.datafile, mode=mode)
549
546
550 @contextlib.contextmanager
547 @contextlib.contextmanager
551 def _datareadfp(self, existingfp=None):
548 def _datareadfp(self, existingfp=None):
552 """file object suitable to read data"""
549 """file object suitable to read data"""
553 # Use explicit file handle, if given.
550 # Use explicit file handle, if given.
554 if existingfp is not None:
551 if existingfp is not None:
555 yield existingfp
552 yield existingfp
556
553
557 # Use a file handle being actively used for writes, if available.
554 # Use a file handle being actively used for writes, if available.
558 # There is some danger to doing this because reads will seek the
555 # There is some danger to doing this because reads will seek the
559 # file. However, _writeentry() performs a SEEK_END before all writes,
556 # file. However, _writeentry() performs a SEEK_END before all writes,
560 # so we should be safe.
557 # so we should be safe.
561 elif self._writinghandles:
558 elif self._writinghandles:
562 if self._inline:
559 if self._inline:
563 yield self._writinghandles[0]
560 yield self._writinghandles[0]
564 else:
561 else:
565 yield self._writinghandles[1]
562 yield self._writinghandles[1]
566
563
567 # Otherwise open a new file handle.
564 # Otherwise open a new file handle.
568 else:
565 else:
569 if self._inline:
566 if self._inline:
570 func = self._indexfp
567 func = self._indexfp
571 else:
568 else:
572 func = self._datafp
569 func = self._datafp
573 with func() as fp:
570 with func() as fp:
574 yield fp
571 yield fp
575
572
576 def tip(self):
573 def tip(self):
577 return self.node(len(self.index) - 1)
574 return self.node(len(self.index) - 1)
578 def __contains__(self, rev):
575 def __contains__(self, rev):
579 return 0 <= rev < len(self)
576 return 0 <= rev < len(self)
580 def __len__(self):
577 def __len__(self):
581 return len(self.index)
578 return len(self.index)
582 def __iter__(self):
579 def __iter__(self):
583 return iter(pycompat.xrange(len(self)))
580 return iter(pycompat.xrange(len(self)))
584 def revs(self, start=0, stop=None):
581 def revs(self, start=0, stop=None):
585 """iterate over all rev in this revlog (from start to stop)"""
582 """iterate over all rev in this revlog (from start to stop)"""
586 return storageutil.iterrevs(len(self), start=start, stop=stop)
583 return storageutil.iterrevs(len(self), start=start, stop=stop)
587
584
588 @util.propertycache
585 @util.propertycache
589 def nodemap(self):
586 def nodemap(self):
590 if self.index:
587 if self.index:
591 # populate mapping down to the initial node
588 # populate mapping down to the initial node
592 node0 = self.index[0][7] # get around changelog filtering
589 node0 = self.index[0][7] # get around changelog filtering
593 self.rev(node0)
590 self.rev(node0)
594 return self._nodecache
591 return self._nodecache
595
592
596 def hasnode(self, node):
593 def hasnode(self, node):
597 try:
594 try:
598 self.rev(node)
595 self.rev(node)
599 return True
596 return True
600 except KeyError:
597 except KeyError:
601 return False
598 return False
602
599
603 def candelta(self, baserev, rev):
600 def candelta(self, baserev, rev):
604 """whether two revisions (baserev, rev) can be delta-ed or not"""
601 """whether two revisions (baserev, rev) can be delta-ed or not"""
605 # Disable delta if either rev requires a content-changing flag
602 # Disable delta if either rev requires a content-changing flag
606 # processor (ex. LFS). This is because such flag processor can alter
603 # processor (ex. LFS). This is because such flag processor can alter
607 # the rawtext content that the delta will be based on, and two clients
604 # the rawtext content that the delta will be based on, and two clients
608 # could have a same revlog node with different flags (i.e. different
605 # could have a same revlog node with different flags (i.e. different
609 # rawtext contents) and the delta could be incompatible.
606 # rawtext contents) and the delta could be incompatible.
610 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
607 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
611 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
608 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
612 return False
609 return False
613 return True
610 return True
614
611
615 def clearcaches(self):
612 def clearcaches(self):
616 self._revisioncache = None
613 self._revisioncache = None
617 self._chainbasecache.clear()
614 self._chainbasecache.clear()
618 self._chunkcache = (0, '')
615 self._chunkcache = (0, '')
619 self._pcache = {}
616 self._pcache = {}
620
617
621 try:
618 try:
622 # If we are using the native C version, you are in a fun case
619 # If we are using the native C version, you are in a fun case
623 # where self.index, self.nodemap and self._nodecaches is the same
620 # where self.index, self.nodemap and self._nodecaches is the same
624 # object.
621 # object.
625 self._nodecache.clearcaches()
622 self._nodecache.clearcaches()
626 except AttributeError:
623 except AttributeError:
627 self._nodecache = {nullid: nullrev}
624 self._nodecache = {nullid: nullrev}
628 self._nodepos = None
625 self._nodepos = None
629
626
630 def rev(self, node):
627 def rev(self, node):
631 try:
628 try:
632 return self._nodecache[node]
629 return self._nodecache[node]
633 except TypeError:
630 except TypeError:
634 raise
631 raise
635 except error.RevlogError:
632 except error.RevlogError:
636 # parsers.c radix tree lookup failed
633 # parsers.c radix tree lookup failed
637 if node == wdirid or node in wdirfilenodeids:
634 if node == wdirid or node in wdirfilenodeids:
638 raise error.WdirUnsupported
635 raise error.WdirUnsupported
639 raise error.LookupError(node, self.indexfile, _('no node'))
636 raise error.LookupError(node, self.indexfile, _('no node'))
640 except KeyError:
637 except KeyError:
641 # pure python cache lookup failed
638 # pure python cache lookup failed
642 n = self._nodecache
639 n = self._nodecache
643 i = self.index
640 i = self.index
644 p = self._nodepos
641 p = self._nodepos
645 if p is None:
642 if p is None:
646 p = len(i) - 1
643 p = len(i) - 1
647 else:
644 else:
648 assert p < len(i)
645 assert p < len(i)
649 for r in pycompat.xrange(p, -1, -1):
646 for r in pycompat.xrange(p, -1, -1):
650 v = i[r][7]
647 v = i[r][7]
651 n[v] = r
648 n[v] = r
652 if v == node:
649 if v == node:
653 self._nodepos = r - 1
650 self._nodepos = r - 1
654 return r
651 return r
655 if node == wdirid or node in wdirfilenodeids:
652 if node == wdirid or node in wdirfilenodeids:
656 raise error.WdirUnsupported
653 raise error.WdirUnsupported
657 raise error.LookupError(node, self.indexfile, _('no node'))
654 raise error.LookupError(node, self.indexfile, _('no node'))
658
655
659 # Accessors for index entries.
656 # Accessors for index entries.
660
657
661 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
658 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
662 # are flags.
659 # are flags.
663 def start(self, rev):
660 def start(self, rev):
664 return int(self.index[rev][0] >> 16)
661 return int(self.index[rev][0] >> 16)
665
662
666 def flags(self, rev):
663 def flags(self, rev):
667 return self.index[rev][0] & 0xFFFF
664 return self.index[rev][0] & 0xFFFF
668
665
669 def length(self, rev):
666 def length(self, rev):
670 return self.index[rev][1]
667 return self.index[rev][1]
671
668
672 def rawsize(self, rev):
669 def rawsize(self, rev):
673 """return the length of the uncompressed text for a given revision"""
670 """return the length of the uncompressed text for a given revision"""
674 l = self.index[rev][2]
671 l = self.index[rev][2]
675 if l >= 0:
672 if l >= 0:
676 return l
673 return l
677
674
678 t = self.revision(rev, raw=True)
675 t = self.revision(rev, raw=True)
679 return len(t)
676 return len(t)
680
677
681 def size(self, rev):
678 def size(self, rev):
682 """length of non-raw text (processed by a "read" flag processor)"""
679 """length of non-raw text (processed by a "read" flag processor)"""
683 # fast path: if no "read" flag processor could change the content,
680 # fast path: if no "read" flag processor could change the content,
684 # size is rawsize. note: ELLIPSIS is known to not change the content.
681 # size is rawsize. note: ELLIPSIS is known to not change the content.
685 flags = self.flags(rev)
682 flags = self.flags(rev)
686 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
683 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
687 return self.rawsize(rev)
684 return self.rawsize(rev)
688
685
689 return len(self.revision(rev, raw=False))
686 return len(self.revision(rev, raw=False))
690
687
691 def chainbase(self, rev):
688 def chainbase(self, rev):
692 base = self._chainbasecache.get(rev)
689 base = self._chainbasecache.get(rev)
693 if base is not None:
690 if base is not None:
694 return base
691 return base
695
692
696 index = self.index
693 index = self.index
697 iterrev = rev
694 iterrev = rev
698 base = index[iterrev][3]
695 base = index[iterrev][3]
699 while base != iterrev:
696 while base != iterrev:
700 iterrev = base
697 iterrev = base
701 base = index[iterrev][3]
698 base = index[iterrev][3]
702
699
703 self._chainbasecache[rev] = base
700 self._chainbasecache[rev] = base
704 return base
701 return base
705
702
706 def linkrev(self, rev):
703 def linkrev(self, rev):
707 return self.index[rev][4]
704 return self.index[rev][4]
708
705
709 def parentrevs(self, rev):
706 def parentrevs(self, rev):
710 try:
707 try:
711 entry = self.index[rev]
708 entry = self.index[rev]
712 except IndexError:
709 except IndexError:
713 if rev == wdirrev:
710 if rev == wdirrev:
714 raise error.WdirUnsupported
711 raise error.WdirUnsupported
715 raise
712 raise
716
713
717 return entry[5], entry[6]
714 return entry[5], entry[6]
718
715
719 # fast parentrevs(rev) where rev isn't filtered
716 # fast parentrevs(rev) where rev isn't filtered
720 _uncheckedparentrevs = parentrevs
717 _uncheckedparentrevs = parentrevs
721
718
722 def node(self, rev):
719 def node(self, rev):
723 try:
720 try:
724 return self.index[rev][7]
721 return self.index[rev][7]
725 except IndexError:
722 except IndexError:
726 if rev == wdirrev:
723 if rev == wdirrev:
727 raise error.WdirUnsupported
724 raise error.WdirUnsupported
728 raise
725 raise
729
726
730 # Derived from index values.
727 # Derived from index values.
731
728
732 def end(self, rev):
729 def end(self, rev):
733 return self.start(rev) + self.length(rev)
730 return self.start(rev) + self.length(rev)
734
731
735 def parents(self, node):
732 def parents(self, node):
736 i = self.index
733 i = self.index
737 d = i[self.rev(node)]
734 d = i[self.rev(node)]
738 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
735 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
739
736
740 def chainlen(self, rev):
737 def chainlen(self, rev):
741 return self._chaininfo(rev)[0]
738 return self._chaininfo(rev)[0]
742
739
743 def _chaininfo(self, rev):
740 def _chaininfo(self, rev):
744 chaininfocache = self._chaininfocache
741 chaininfocache = self._chaininfocache
745 if rev in chaininfocache:
742 if rev in chaininfocache:
746 return chaininfocache[rev]
743 return chaininfocache[rev]
747 index = self.index
744 index = self.index
748 generaldelta = self._generaldelta
745 generaldelta = self._generaldelta
749 iterrev = rev
746 iterrev = rev
750 e = index[iterrev]
747 e = index[iterrev]
751 clen = 0
748 clen = 0
752 compresseddeltalen = 0
749 compresseddeltalen = 0
753 while iterrev != e[3]:
750 while iterrev != e[3]:
754 clen += 1
751 clen += 1
755 compresseddeltalen += e[1]
752 compresseddeltalen += e[1]
756 if generaldelta:
753 if generaldelta:
757 iterrev = e[3]
754 iterrev = e[3]
758 else:
755 else:
759 iterrev -= 1
756 iterrev -= 1
760 if iterrev in chaininfocache:
757 if iterrev in chaininfocache:
761 t = chaininfocache[iterrev]
758 t = chaininfocache[iterrev]
762 clen += t[0]
759 clen += t[0]
763 compresseddeltalen += t[1]
760 compresseddeltalen += t[1]
764 break
761 break
765 e = index[iterrev]
762 e = index[iterrev]
766 else:
763 else:
767 # Add text length of base since decompressing that also takes
764 # Add text length of base since decompressing that also takes
768 # work. For cache hits the length is already included.
765 # work. For cache hits the length is already included.
769 compresseddeltalen += e[1]
766 compresseddeltalen += e[1]
770 r = (clen, compresseddeltalen)
767 r = (clen, compresseddeltalen)
771 chaininfocache[rev] = r
768 chaininfocache[rev] = r
772 return r
769 return r
773
770
774 def _deltachain(self, rev, stoprev=None):
771 def _deltachain(self, rev, stoprev=None):
775 """Obtain the delta chain for a revision.
772 """Obtain the delta chain for a revision.
776
773
777 ``stoprev`` specifies a revision to stop at. If not specified, we
774 ``stoprev`` specifies a revision to stop at. If not specified, we
778 stop at the base of the chain.
775 stop at the base of the chain.
779
776
780 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
777 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
781 revs in ascending order and ``stopped`` is a bool indicating whether
778 revs in ascending order and ``stopped`` is a bool indicating whether
782 ``stoprev`` was hit.
779 ``stoprev`` was hit.
783 """
780 """
784 # Try C implementation.
781 # Try C implementation.
785 try:
782 try:
786 return self.index.deltachain(rev, stoprev, self._generaldelta)
783 return self.index.deltachain(rev, stoprev, self._generaldelta)
787 except AttributeError:
784 except AttributeError:
788 pass
785 pass
789
786
790 chain = []
787 chain = []
791
788
792 # Alias to prevent attribute lookup in tight loop.
789 # Alias to prevent attribute lookup in tight loop.
793 index = self.index
790 index = self.index
794 generaldelta = self._generaldelta
791 generaldelta = self._generaldelta
795
792
796 iterrev = rev
793 iterrev = rev
797 e = index[iterrev]
794 e = index[iterrev]
798 while iterrev != e[3] and iterrev != stoprev:
795 while iterrev != e[3] and iterrev != stoprev:
799 chain.append(iterrev)
796 chain.append(iterrev)
800 if generaldelta:
797 if generaldelta:
801 iterrev = e[3]
798 iterrev = e[3]
802 else:
799 else:
803 iterrev -= 1
800 iterrev -= 1
804 e = index[iterrev]
801 e = index[iterrev]
805
802
806 if iterrev == stoprev:
803 if iterrev == stoprev:
807 stopped = True
804 stopped = True
808 else:
805 else:
809 chain.append(iterrev)
806 chain.append(iterrev)
810 stopped = False
807 stopped = False
811
808
812 chain.reverse()
809 chain.reverse()
813 return chain, stopped
810 return chain, stopped
814
811
815 def ancestors(self, revs, stoprev=0, inclusive=False):
812 def ancestors(self, revs, stoprev=0, inclusive=False):
816 """Generate the ancestors of 'revs' in reverse revision order.
813 """Generate the ancestors of 'revs' in reverse revision order.
817 Does not generate revs lower than stoprev.
814 Does not generate revs lower than stoprev.
818
815
819 See the documentation for ancestor.lazyancestors for more details."""
816 See the documentation for ancestor.lazyancestors for more details."""
820
817
821 # first, make sure start revisions aren't filtered
818 # first, make sure start revisions aren't filtered
822 revs = list(revs)
819 revs = list(revs)
823 checkrev = self.node
820 checkrev = self.node
824 for r in revs:
821 for r in revs:
825 checkrev(r)
822 checkrev(r)
826 # and we're sure ancestors aren't filtered as well
823 # and we're sure ancestors aren't filtered as well
827
824
828 if rustext is not None:
825 if rustancestor is not None:
829 lazyancestors = rustext.ancestor.LazyAncestors
826 lazyancestors = rustancestor.LazyAncestors
830 arg = self.index
827 arg = self.index
831 elif util.safehasattr(parsers, 'rustlazyancestors'):
828 elif util.safehasattr(parsers, 'rustlazyancestors'):
832 lazyancestors = ancestor.rustlazyancestors
829 lazyancestors = ancestor.rustlazyancestors
833 arg = self.index
830 arg = self.index
834 else:
831 else:
835 lazyancestors = ancestor.lazyancestors
832 lazyancestors = ancestor.lazyancestors
836 arg = self._uncheckedparentrevs
833 arg = self._uncheckedparentrevs
837 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
834 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
838
835
839 def descendants(self, revs):
836 def descendants(self, revs):
840 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
837 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
841
838
842 def findcommonmissing(self, common=None, heads=None):
839 def findcommonmissing(self, common=None, heads=None):
843 """Return a tuple of the ancestors of common and the ancestors of heads
840 """Return a tuple of the ancestors of common and the ancestors of heads
844 that are not ancestors of common. In revset terminology, we return the
841 that are not ancestors of common. In revset terminology, we return the
845 tuple:
842 tuple:
846
843
847 ::common, (::heads) - (::common)
844 ::common, (::heads) - (::common)
848
845
849 The list is sorted by revision number, meaning it is
846 The list is sorted by revision number, meaning it is
850 topologically sorted.
847 topologically sorted.
851
848
852 'heads' and 'common' are both lists of node IDs. If heads is
849 'heads' and 'common' are both lists of node IDs. If heads is
853 not supplied, uses all of the revlog's heads. If common is not
850 not supplied, uses all of the revlog's heads. If common is not
854 supplied, uses nullid."""
851 supplied, uses nullid."""
855 if common is None:
852 if common is None:
856 common = [nullid]
853 common = [nullid]
857 if heads is None:
854 if heads is None:
858 heads = self.heads()
855 heads = self.heads()
859
856
860 common = [self.rev(n) for n in common]
857 common = [self.rev(n) for n in common]
861 heads = [self.rev(n) for n in heads]
858 heads = [self.rev(n) for n in heads]
862
859
863 # we want the ancestors, but inclusive
860 # we want the ancestors, but inclusive
864 class lazyset(object):
861 class lazyset(object):
865 def __init__(self, lazyvalues):
862 def __init__(self, lazyvalues):
866 self.addedvalues = set()
863 self.addedvalues = set()
867 self.lazyvalues = lazyvalues
864 self.lazyvalues = lazyvalues
868
865
869 def __contains__(self, value):
866 def __contains__(self, value):
870 return value in self.addedvalues or value in self.lazyvalues
867 return value in self.addedvalues or value in self.lazyvalues
871
868
872 def __iter__(self):
869 def __iter__(self):
873 added = self.addedvalues
870 added = self.addedvalues
874 for r in added:
871 for r in added:
875 yield r
872 yield r
876 for r in self.lazyvalues:
873 for r in self.lazyvalues:
877 if not r in added:
874 if not r in added:
878 yield r
875 yield r
879
876
880 def add(self, value):
877 def add(self, value):
881 self.addedvalues.add(value)
878 self.addedvalues.add(value)
882
879
883 def update(self, values):
880 def update(self, values):
884 self.addedvalues.update(values)
881 self.addedvalues.update(values)
885
882
886 has = lazyset(self.ancestors(common))
883 has = lazyset(self.ancestors(common))
887 has.add(nullrev)
884 has.add(nullrev)
888 has.update(common)
885 has.update(common)
889
886
890 # take all ancestors from heads that aren't in has
887 # take all ancestors from heads that aren't in has
891 missing = set()
888 missing = set()
892 visit = collections.deque(r for r in heads if r not in has)
889 visit = collections.deque(r for r in heads if r not in has)
893 while visit:
890 while visit:
894 r = visit.popleft()
891 r = visit.popleft()
895 if r in missing:
892 if r in missing:
896 continue
893 continue
897 else:
894 else:
898 missing.add(r)
895 missing.add(r)
899 for p in self.parentrevs(r):
896 for p in self.parentrevs(r):
900 if p not in has:
897 if p not in has:
901 visit.append(p)
898 visit.append(p)
902 missing = list(missing)
899 missing = list(missing)
903 missing.sort()
900 missing.sort()
904 return has, [self.node(miss) for miss in missing]
901 return has, [self.node(miss) for miss in missing]
905
902
906 def incrementalmissingrevs(self, common=None):
903 def incrementalmissingrevs(self, common=None):
907 """Return an object that can be used to incrementally compute the
904 """Return an object that can be used to incrementally compute the
908 revision numbers of the ancestors of arbitrary sets that are not
905 revision numbers of the ancestors of arbitrary sets that are not
909 ancestors of common. This is an ancestor.incrementalmissingancestors
906 ancestors of common. This is an ancestor.incrementalmissingancestors
910 object.
907 object.
911
908
912 'common' is a list of revision numbers. If common is not supplied, uses
909 'common' is a list of revision numbers. If common is not supplied, uses
913 nullrev.
910 nullrev.
914 """
911 """
915 if common is None:
912 if common is None:
916 common = [nullrev]
913 common = [nullrev]
917
914
918 if rustext is not None:
915 if rustancestor is not None:
919 return rustext.ancestor.MissingAncestors(self.index, common)
916 return rustancestor.MissingAncestors(self.index, common)
920 return ancestor.incrementalmissingancestors(self.parentrevs, common)
917 return ancestor.incrementalmissingancestors(self.parentrevs, common)
921
918
922 def findmissingrevs(self, common=None, heads=None):
919 def findmissingrevs(self, common=None, heads=None):
923 """Return the revision numbers of the ancestors of heads that
920 """Return the revision numbers of the ancestors of heads that
924 are not ancestors of common.
921 are not ancestors of common.
925
922
926 More specifically, return a list of revision numbers corresponding to
923 More specifically, return a list of revision numbers corresponding to
927 nodes N such that every N satisfies the following constraints:
924 nodes N such that every N satisfies the following constraints:
928
925
929 1. N is an ancestor of some node in 'heads'
926 1. N is an ancestor of some node in 'heads'
930 2. N is not an ancestor of any node in 'common'
927 2. N is not an ancestor of any node in 'common'
931
928
932 The list is sorted by revision number, meaning it is
929 The list is sorted by revision number, meaning it is
933 topologically sorted.
930 topologically sorted.
934
931
935 'heads' and 'common' are both lists of revision numbers. If heads is
932 'heads' and 'common' are both lists of revision numbers. If heads is
936 not supplied, uses all of the revlog's heads. If common is not
933 not supplied, uses all of the revlog's heads. If common is not
937 supplied, uses nullid."""
934 supplied, uses nullid."""
938 if common is None:
935 if common is None:
939 common = [nullrev]
936 common = [nullrev]
940 if heads is None:
937 if heads is None:
941 heads = self.headrevs()
938 heads = self.headrevs()
942
939
943 inc = self.incrementalmissingrevs(common=common)
940 inc = self.incrementalmissingrevs(common=common)
944 return inc.missingancestors(heads)
941 return inc.missingancestors(heads)
945
942
946 def findmissing(self, common=None, heads=None):
943 def findmissing(self, common=None, heads=None):
947 """Return the ancestors of heads that are not ancestors of common.
944 """Return the ancestors of heads that are not ancestors of common.
948
945
949 More specifically, return a list of nodes N such that every N
946 More specifically, return a list of nodes N such that every N
950 satisfies the following constraints:
947 satisfies the following constraints:
951
948
952 1. N is an ancestor of some node in 'heads'
949 1. N is an ancestor of some node in 'heads'
953 2. N is not an ancestor of any node in 'common'
950 2. N is not an ancestor of any node in 'common'
954
951
955 The list is sorted by revision number, meaning it is
952 The list is sorted by revision number, meaning it is
956 topologically sorted.
953 topologically sorted.
957
954
958 'heads' and 'common' are both lists of node IDs. If heads is
955 'heads' and 'common' are both lists of node IDs. If heads is
959 not supplied, uses all of the revlog's heads. If common is not
956 not supplied, uses all of the revlog's heads. If common is not
960 supplied, uses nullid."""
957 supplied, uses nullid."""
961 if common is None:
958 if common is None:
962 common = [nullid]
959 common = [nullid]
963 if heads is None:
960 if heads is None:
964 heads = self.heads()
961 heads = self.heads()
965
962
966 common = [self.rev(n) for n in common]
963 common = [self.rev(n) for n in common]
967 heads = [self.rev(n) for n in heads]
964 heads = [self.rev(n) for n in heads]
968
965
969 inc = self.incrementalmissingrevs(common=common)
966 inc = self.incrementalmissingrevs(common=common)
970 return [self.node(r) for r in inc.missingancestors(heads)]
967 return [self.node(r) for r in inc.missingancestors(heads)]
971
968
972 def nodesbetween(self, roots=None, heads=None):
969 def nodesbetween(self, roots=None, heads=None):
973 """Return a topological path from 'roots' to 'heads'.
970 """Return a topological path from 'roots' to 'heads'.
974
971
975 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
972 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
976 topologically sorted list of all nodes N that satisfy both of
973 topologically sorted list of all nodes N that satisfy both of
977 these constraints:
974 these constraints:
978
975
979 1. N is a descendant of some node in 'roots'
976 1. N is a descendant of some node in 'roots'
980 2. N is an ancestor of some node in 'heads'
977 2. N is an ancestor of some node in 'heads'
981
978
982 Every node is considered to be both a descendant and an ancestor
979 Every node is considered to be both a descendant and an ancestor
983 of itself, so every reachable node in 'roots' and 'heads' will be
980 of itself, so every reachable node in 'roots' and 'heads' will be
984 included in 'nodes'.
981 included in 'nodes'.
985
982
986 'outroots' is the list of reachable nodes in 'roots', i.e., the
983 'outroots' is the list of reachable nodes in 'roots', i.e., the
987 subset of 'roots' that is returned in 'nodes'. Likewise,
984 subset of 'roots' that is returned in 'nodes'. Likewise,
988 'outheads' is the subset of 'heads' that is also in 'nodes'.
985 'outheads' is the subset of 'heads' that is also in 'nodes'.
989
986
990 'roots' and 'heads' are both lists of node IDs. If 'roots' is
987 'roots' and 'heads' are both lists of node IDs. If 'roots' is
991 unspecified, uses nullid as the only root. If 'heads' is
988 unspecified, uses nullid as the only root. If 'heads' is
992 unspecified, uses list of all of the revlog's heads."""
989 unspecified, uses list of all of the revlog's heads."""
993 nonodes = ([], [], [])
990 nonodes = ([], [], [])
994 if roots is not None:
991 if roots is not None:
995 roots = list(roots)
992 roots = list(roots)
996 if not roots:
993 if not roots:
997 return nonodes
994 return nonodes
998 lowestrev = min([self.rev(n) for n in roots])
995 lowestrev = min([self.rev(n) for n in roots])
999 else:
996 else:
1000 roots = [nullid] # Everybody's a descendant of nullid
997 roots = [nullid] # Everybody's a descendant of nullid
1001 lowestrev = nullrev
998 lowestrev = nullrev
1002 if (lowestrev == nullrev) and (heads is None):
999 if (lowestrev == nullrev) and (heads is None):
1003 # We want _all_ the nodes!
1000 # We want _all_ the nodes!
1004 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1001 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1005 if heads is None:
1002 if heads is None:
1006 # All nodes are ancestors, so the latest ancestor is the last
1003 # All nodes are ancestors, so the latest ancestor is the last
1007 # node.
1004 # node.
1008 highestrev = len(self) - 1
1005 highestrev = len(self) - 1
1009 # Set ancestors to None to signal that every node is an ancestor.
1006 # Set ancestors to None to signal that every node is an ancestor.
1010 ancestors = None
1007 ancestors = None
1011 # Set heads to an empty dictionary for later discovery of heads
1008 # Set heads to an empty dictionary for later discovery of heads
1012 heads = {}
1009 heads = {}
1013 else:
1010 else:
1014 heads = list(heads)
1011 heads = list(heads)
1015 if not heads:
1012 if not heads:
1016 return nonodes
1013 return nonodes
1017 ancestors = set()
1014 ancestors = set()
1018 # Turn heads into a dictionary so we can remove 'fake' heads.
1015 # Turn heads into a dictionary so we can remove 'fake' heads.
1019 # Also, later we will be using it to filter out the heads we can't
1016 # Also, later we will be using it to filter out the heads we can't
1020 # find from roots.
1017 # find from roots.
1021 heads = dict.fromkeys(heads, False)
1018 heads = dict.fromkeys(heads, False)
1022 # Start at the top and keep marking parents until we're done.
1019 # Start at the top and keep marking parents until we're done.
1023 nodestotag = set(heads)
1020 nodestotag = set(heads)
1024 # Remember where the top was so we can use it as a limit later.
1021 # Remember where the top was so we can use it as a limit later.
1025 highestrev = max([self.rev(n) for n in nodestotag])
1022 highestrev = max([self.rev(n) for n in nodestotag])
1026 while nodestotag:
1023 while nodestotag:
1027 # grab a node to tag
1024 # grab a node to tag
1028 n = nodestotag.pop()
1025 n = nodestotag.pop()
1029 # Never tag nullid
1026 # Never tag nullid
1030 if n == nullid:
1027 if n == nullid:
1031 continue
1028 continue
1032 # A node's revision number represents its place in a
1029 # A node's revision number represents its place in a
1033 # topologically sorted list of nodes.
1030 # topologically sorted list of nodes.
1034 r = self.rev(n)
1031 r = self.rev(n)
1035 if r >= lowestrev:
1032 if r >= lowestrev:
1036 if n not in ancestors:
1033 if n not in ancestors:
1037 # If we are possibly a descendant of one of the roots
1034 # If we are possibly a descendant of one of the roots
1038 # and we haven't already been marked as an ancestor
1035 # and we haven't already been marked as an ancestor
1039 ancestors.add(n) # Mark as ancestor
1036 ancestors.add(n) # Mark as ancestor
1040 # Add non-nullid parents to list of nodes to tag.
1037 # Add non-nullid parents to list of nodes to tag.
1041 nodestotag.update([p for p in self.parents(n) if
1038 nodestotag.update([p for p in self.parents(n) if
1042 p != nullid])
1039 p != nullid])
1043 elif n in heads: # We've seen it before, is it a fake head?
1040 elif n in heads: # We've seen it before, is it a fake head?
1044 # So it is, real heads should not be the ancestors of
1041 # So it is, real heads should not be the ancestors of
1045 # any other heads.
1042 # any other heads.
1046 heads.pop(n)
1043 heads.pop(n)
1047 if not ancestors:
1044 if not ancestors:
1048 return nonodes
1045 return nonodes
1049 # Now that we have our set of ancestors, we want to remove any
1046 # Now that we have our set of ancestors, we want to remove any
1050 # roots that are not ancestors.
1047 # roots that are not ancestors.
1051
1048
1052 # If one of the roots was nullid, everything is included anyway.
1049 # If one of the roots was nullid, everything is included anyway.
1053 if lowestrev > nullrev:
1050 if lowestrev > nullrev:
1054 # But, since we weren't, let's recompute the lowest rev to not
1051 # But, since we weren't, let's recompute the lowest rev to not
1055 # include roots that aren't ancestors.
1052 # include roots that aren't ancestors.
1056
1053
1057 # Filter out roots that aren't ancestors of heads
1054 # Filter out roots that aren't ancestors of heads
1058 roots = [root for root in roots if root in ancestors]
1055 roots = [root for root in roots if root in ancestors]
1059 # Recompute the lowest revision
1056 # Recompute the lowest revision
1060 if roots:
1057 if roots:
1061 lowestrev = min([self.rev(root) for root in roots])
1058 lowestrev = min([self.rev(root) for root in roots])
1062 else:
1059 else:
1063 # No more roots? Return empty list
1060 # No more roots? Return empty list
1064 return nonodes
1061 return nonodes
1065 else:
1062 else:
1066 # We are descending from nullid, and don't need to care about
1063 # We are descending from nullid, and don't need to care about
1067 # any other roots.
1064 # any other roots.
1068 lowestrev = nullrev
1065 lowestrev = nullrev
1069 roots = [nullid]
1066 roots = [nullid]
1070 # Transform our roots list into a set.
1067 # Transform our roots list into a set.
1071 descendants = set(roots)
1068 descendants = set(roots)
1072 # Also, keep the original roots so we can filter out roots that aren't
1069 # Also, keep the original roots so we can filter out roots that aren't
1073 # 'real' roots (i.e. are descended from other roots).
1070 # 'real' roots (i.e. are descended from other roots).
1074 roots = descendants.copy()
1071 roots = descendants.copy()
1075 # Our topologically sorted list of output nodes.
1072 # Our topologically sorted list of output nodes.
1076 orderedout = []
1073 orderedout = []
1077 # Don't start at nullid since we don't want nullid in our output list,
1074 # Don't start at nullid since we don't want nullid in our output list,
1078 # and if nullid shows up in descendants, empty parents will look like
1075 # and if nullid shows up in descendants, empty parents will look like
1079 # they're descendants.
1076 # they're descendants.
1080 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1077 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1081 n = self.node(r)
1078 n = self.node(r)
1082 isdescendant = False
1079 isdescendant = False
1083 if lowestrev == nullrev: # Everybody is a descendant of nullid
1080 if lowestrev == nullrev: # Everybody is a descendant of nullid
1084 isdescendant = True
1081 isdescendant = True
1085 elif n in descendants:
1082 elif n in descendants:
1086 # n is already a descendant
1083 # n is already a descendant
1087 isdescendant = True
1084 isdescendant = True
1088 # This check only needs to be done here because all the roots
1085 # This check only needs to be done here because all the roots
1089 # will start being marked is descendants before the loop.
1086 # will start being marked is descendants before the loop.
1090 if n in roots:
1087 if n in roots:
1091 # If n was a root, check if it's a 'real' root.
1088 # If n was a root, check if it's a 'real' root.
1092 p = tuple(self.parents(n))
1089 p = tuple(self.parents(n))
1093 # If any of its parents are descendants, it's not a root.
1090 # If any of its parents are descendants, it's not a root.
1094 if (p[0] in descendants) or (p[1] in descendants):
1091 if (p[0] in descendants) or (p[1] in descendants):
1095 roots.remove(n)
1092 roots.remove(n)
1096 else:
1093 else:
1097 p = tuple(self.parents(n))
1094 p = tuple(self.parents(n))
1098 # A node is a descendant if either of its parents are
1095 # A node is a descendant if either of its parents are
1099 # descendants. (We seeded the dependents list with the roots
1096 # descendants. (We seeded the dependents list with the roots
1100 # up there, remember?)
1097 # up there, remember?)
1101 if (p[0] in descendants) or (p[1] in descendants):
1098 if (p[0] in descendants) or (p[1] in descendants):
1102 descendants.add(n)
1099 descendants.add(n)
1103 isdescendant = True
1100 isdescendant = True
1104 if isdescendant and ((ancestors is None) or (n in ancestors)):
1101 if isdescendant and ((ancestors is None) or (n in ancestors)):
1105 # Only include nodes that are both descendants and ancestors.
1102 # Only include nodes that are both descendants and ancestors.
1106 orderedout.append(n)
1103 orderedout.append(n)
1107 if (ancestors is not None) and (n in heads):
1104 if (ancestors is not None) and (n in heads):
1108 # We're trying to figure out which heads are reachable
1105 # We're trying to figure out which heads are reachable
1109 # from roots.
1106 # from roots.
1110 # Mark this head as having been reached
1107 # Mark this head as having been reached
1111 heads[n] = True
1108 heads[n] = True
1112 elif ancestors is None:
1109 elif ancestors is None:
1113 # Otherwise, we're trying to discover the heads.
1110 # Otherwise, we're trying to discover the heads.
1114 # Assume this is a head because if it isn't, the next step
1111 # Assume this is a head because if it isn't, the next step
1115 # will eventually remove it.
1112 # will eventually remove it.
1116 heads[n] = True
1113 heads[n] = True
1117 # But, obviously its parents aren't.
1114 # But, obviously its parents aren't.
1118 for p in self.parents(n):
1115 for p in self.parents(n):
1119 heads.pop(p, None)
1116 heads.pop(p, None)
1120 heads = [head for head, flag in heads.iteritems() if flag]
1117 heads = [head for head, flag in heads.iteritems() if flag]
1121 roots = list(roots)
1118 roots = list(roots)
1122 assert orderedout
1119 assert orderedout
1123 assert roots
1120 assert roots
1124 assert heads
1121 assert heads
1125 return (orderedout, roots, heads)
1122 return (orderedout, roots, heads)
1126
1123
1127 def headrevs(self, revs=None):
1124 def headrevs(self, revs=None):
1128 if revs is None:
1125 if revs is None:
1129 try:
1126 try:
1130 return self.index.headrevs()
1127 return self.index.headrevs()
1131 except AttributeError:
1128 except AttributeError:
1132 return self._headrevs()
1129 return self._headrevs()
1133 if rustext is not None:
1130 if rustdagop is not None:
1134 return rustext.dagop.headrevs(self.index, revs)
1131 return rustdagop.headrevs(self.index, revs)
1135 return dagop.headrevs(revs, self._uncheckedparentrevs)
1132 return dagop.headrevs(revs, self._uncheckedparentrevs)
1136
1133
1137 def computephases(self, roots):
1134 def computephases(self, roots):
1138 return self.index.computephasesmapsets(roots)
1135 return self.index.computephasesmapsets(roots)
1139
1136
1140 def _headrevs(self):
1137 def _headrevs(self):
1141 count = len(self)
1138 count = len(self)
1142 if not count:
1139 if not count:
1143 return [nullrev]
1140 return [nullrev]
1144 # we won't iter over filtered rev so nobody is a head at start
1141 # we won't iter over filtered rev so nobody is a head at start
1145 ishead = [0] * (count + 1)
1142 ishead = [0] * (count + 1)
1146 index = self.index
1143 index = self.index
1147 for r in self:
1144 for r in self:
1148 ishead[r] = 1 # I may be an head
1145 ishead[r] = 1 # I may be an head
1149 e = index[r]
1146 e = index[r]
1150 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1147 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1151 return [r for r, val in enumerate(ishead) if val]
1148 return [r for r, val in enumerate(ishead) if val]
1152
1149
1153 def heads(self, start=None, stop=None):
1150 def heads(self, start=None, stop=None):
1154 """return the list of all nodes that have no children
1151 """return the list of all nodes that have no children
1155
1152
1156 if start is specified, only heads that are descendants of
1153 if start is specified, only heads that are descendants of
1157 start will be returned
1154 start will be returned
1158 if stop is specified, it will consider all the revs from stop
1155 if stop is specified, it will consider all the revs from stop
1159 as if they had no children
1156 as if they had no children
1160 """
1157 """
1161 if start is None and stop is None:
1158 if start is None and stop is None:
1162 if not len(self):
1159 if not len(self):
1163 return [nullid]
1160 return [nullid]
1164 return [self.node(r) for r in self.headrevs()]
1161 return [self.node(r) for r in self.headrevs()]
1165
1162
1166 if start is None:
1163 if start is None:
1167 start = nullrev
1164 start = nullrev
1168 else:
1165 else:
1169 start = self.rev(start)
1166 start = self.rev(start)
1170
1167
1171 stoprevs = set(self.rev(n) for n in stop or [])
1168 stoprevs = set(self.rev(n) for n in stop or [])
1172
1169
1173 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1170 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1174 stoprevs=stoprevs)
1171 stoprevs=stoprevs)
1175
1172
1176 return [self.node(rev) for rev in revs]
1173 return [self.node(rev) for rev in revs]
1177
1174
1178 def children(self, node):
1175 def children(self, node):
1179 """find the children of a given node"""
1176 """find the children of a given node"""
1180 c = []
1177 c = []
1181 p = self.rev(node)
1178 p = self.rev(node)
1182 for r in self.revs(start=p + 1):
1179 for r in self.revs(start=p + 1):
1183 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1180 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1184 if prevs:
1181 if prevs:
1185 for pr in prevs:
1182 for pr in prevs:
1186 if pr == p:
1183 if pr == p:
1187 c.append(self.node(r))
1184 c.append(self.node(r))
1188 elif p == nullrev:
1185 elif p == nullrev:
1189 c.append(self.node(r))
1186 c.append(self.node(r))
1190 return c
1187 return c
1191
1188
1192 def commonancestorsheads(self, a, b):
1189 def commonancestorsheads(self, a, b):
1193 """calculate all the heads of the common ancestors of nodes a and b"""
1190 """calculate all the heads of the common ancestors of nodes a and b"""
1194 a, b = self.rev(a), self.rev(b)
1191 a, b = self.rev(a), self.rev(b)
1195 ancs = self._commonancestorsheads(a, b)
1192 ancs = self._commonancestorsheads(a, b)
1196 return pycompat.maplist(self.node, ancs)
1193 return pycompat.maplist(self.node, ancs)
1197
1194
1198 def _commonancestorsheads(self, *revs):
1195 def _commonancestorsheads(self, *revs):
1199 """calculate all the heads of the common ancestors of revs"""
1196 """calculate all the heads of the common ancestors of revs"""
1200 try:
1197 try:
1201 ancs = self.index.commonancestorsheads(*revs)
1198 ancs = self.index.commonancestorsheads(*revs)
1202 except (AttributeError, OverflowError): # C implementation failed
1199 except (AttributeError, OverflowError): # C implementation failed
1203 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1200 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1204 return ancs
1201 return ancs
1205
1202
1206 def isancestor(self, a, b):
1203 def isancestor(self, a, b):
1207 """return True if node a is an ancestor of node b
1204 """return True if node a is an ancestor of node b
1208
1205
1209 A revision is considered an ancestor of itself."""
1206 A revision is considered an ancestor of itself."""
1210 a, b = self.rev(a), self.rev(b)
1207 a, b = self.rev(a), self.rev(b)
1211 return self.isancestorrev(a, b)
1208 return self.isancestorrev(a, b)
1212
1209
1213 def isancestorrev(self, a, b):
1210 def isancestorrev(self, a, b):
1214 """return True if revision a is an ancestor of revision b
1211 """return True if revision a is an ancestor of revision b
1215
1212
1216 A revision is considered an ancestor of itself.
1213 A revision is considered an ancestor of itself.
1217
1214
1218 The implementation of this is trivial but the use of
1215 The implementation of this is trivial but the use of
1219 reachableroots is not."""
1216 reachableroots is not."""
1220 if a == nullrev:
1217 if a == nullrev:
1221 return True
1218 return True
1222 elif a == b:
1219 elif a == b:
1223 return True
1220 return True
1224 elif a > b:
1221 elif a > b:
1225 return False
1222 return False
1226 return bool(self.reachableroots(a, [b], [a], includepath=False))
1223 return bool(self.reachableroots(a, [b], [a], includepath=False))
1227
1224
1228 def reachableroots(self, minroot, heads, roots, includepath=False):
1225 def reachableroots(self, minroot, heads, roots, includepath=False):
1229 """return (heads(::<roots> and <roots>::<heads>))
1226 """return (heads(::<roots> and <roots>::<heads>))
1230
1227
1231 If includepath is True, return (<roots>::<heads>)."""
1228 If includepath is True, return (<roots>::<heads>)."""
1232 try:
1229 try:
1233 return self.index.reachableroots2(minroot, heads, roots,
1230 return self.index.reachableroots2(minroot, heads, roots,
1234 includepath)
1231 includepath)
1235 except AttributeError:
1232 except AttributeError:
1236 return dagop._reachablerootspure(self.parentrevs,
1233 return dagop._reachablerootspure(self.parentrevs,
1237 minroot, roots, heads, includepath)
1234 minroot, roots, heads, includepath)
1238
1235
1239 def ancestor(self, a, b):
1236 def ancestor(self, a, b):
1240 """calculate the "best" common ancestor of nodes a and b"""
1237 """calculate the "best" common ancestor of nodes a and b"""
1241
1238
1242 a, b = self.rev(a), self.rev(b)
1239 a, b = self.rev(a), self.rev(b)
1243 try:
1240 try:
1244 ancs = self.index.ancestors(a, b)
1241 ancs = self.index.ancestors(a, b)
1245 except (AttributeError, OverflowError):
1242 except (AttributeError, OverflowError):
1246 ancs = ancestor.ancestors(self.parentrevs, a, b)
1243 ancs = ancestor.ancestors(self.parentrevs, a, b)
1247 if ancs:
1244 if ancs:
1248 # choose a consistent winner when there's a tie
1245 # choose a consistent winner when there's a tie
1249 return min(map(self.node, ancs))
1246 return min(map(self.node, ancs))
1250 return nullid
1247 return nullid
1251
1248
1252 def _match(self, id):
1249 def _match(self, id):
1253 if isinstance(id, int):
1250 if isinstance(id, int):
1254 # rev
1251 # rev
1255 return self.node(id)
1252 return self.node(id)
1256 if len(id) == 20:
1253 if len(id) == 20:
1257 # possibly a binary node
1254 # possibly a binary node
1258 # odds of a binary node being all hex in ASCII are 1 in 10**25
1255 # odds of a binary node being all hex in ASCII are 1 in 10**25
1259 try:
1256 try:
1260 node = id
1257 node = id
1261 self.rev(node) # quick search the index
1258 self.rev(node) # quick search the index
1262 return node
1259 return node
1263 except error.LookupError:
1260 except error.LookupError:
1264 pass # may be partial hex id
1261 pass # may be partial hex id
1265 try:
1262 try:
1266 # str(rev)
1263 # str(rev)
1267 rev = int(id)
1264 rev = int(id)
1268 if "%d" % rev != id:
1265 if "%d" % rev != id:
1269 raise ValueError
1266 raise ValueError
1270 if rev < 0:
1267 if rev < 0:
1271 rev = len(self) + rev
1268 rev = len(self) + rev
1272 if rev < 0 or rev >= len(self):
1269 if rev < 0 or rev >= len(self):
1273 raise ValueError
1270 raise ValueError
1274 return self.node(rev)
1271 return self.node(rev)
1275 except (ValueError, OverflowError):
1272 except (ValueError, OverflowError):
1276 pass
1273 pass
1277 if len(id) == 40:
1274 if len(id) == 40:
1278 try:
1275 try:
1279 # a full hex nodeid?
1276 # a full hex nodeid?
1280 node = bin(id)
1277 node = bin(id)
1281 self.rev(node)
1278 self.rev(node)
1282 return node
1279 return node
1283 except (TypeError, error.LookupError):
1280 except (TypeError, error.LookupError):
1284 pass
1281 pass
1285
1282
1286 def _partialmatch(self, id):
1283 def _partialmatch(self, id):
1287 # we don't care wdirfilenodeids as they should be always full hash
1284 # we don't care wdirfilenodeids as they should be always full hash
1288 maybewdir = wdirhex.startswith(id)
1285 maybewdir = wdirhex.startswith(id)
1289 try:
1286 try:
1290 partial = self.index.partialmatch(id)
1287 partial = self.index.partialmatch(id)
1291 if partial and self.hasnode(partial):
1288 if partial and self.hasnode(partial):
1292 if maybewdir:
1289 if maybewdir:
1293 # single 'ff...' match in radix tree, ambiguous with wdir
1290 # single 'ff...' match in radix tree, ambiguous with wdir
1294 raise error.RevlogError
1291 raise error.RevlogError
1295 return partial
1292 return partial
1296 if maybewdir:
1293 if maybewdir:
1297 # no 'ff...' match in radix tree, wdir identified
1294 # no 'ff...' match in radix tree, wdir identified
1298 raise error.WdirUnsupported
1295 raise error.WdirUnsupported
1299 return None
1296 return None
1300 except error.RevlogError:
1297 except error.RevlogError:
1301 # parsers.c radix tree lookup gave multiple matches
1298 # parsers.c radix tree lookup gave multiple matches
1302 # fast path: for unfiltered changelog, radix tree is accurate
1299 # fast path: for unfiltered changelog, radix tree is accurate
1303 if not getattr(self, 'filteredrevs', None):
1300 if not getattr(self, 'filteredrevs', None):
1304 raise error.AmbiguousPrefixLookupError(
1301 raise error.AmbiguousPrefixLookupError(
1305 id, self.indexfile, _('ambiguous identifier'))
1302 id, self.indexfile, _('ambiguous identifier'))
1306 # fall through to slow path that filters hidden revisions
1303 # fall through to slow path that filters hidden revisions
1307 except (AttributeError, ValueError):
1304 except (AttributeError, ValueError):
1308 # we are pure python, or key was too short to search radix tree
1305 # we are pure python, or key was too short to search radix tree
1309 pass
1306 pass
1310
1307
1311 if id in self._pcache:
1308 if id in self._pcache:
1312 return self._pcache[id]
1309 return self._pcache[id]
1313
1310
1314 if len(id) <= 40:
1311 if len(id) <= 40:
1315 try:
1312 try:
1316 # hex(node)[:...]
1313 # hex(node)[:...]
1317 l = len(id) // 2 # grab an even number of digits
1314 l = len(id) // 2 # grab an even number of digits
1318 prefix = bin(id[:l * 2])
1315 prefix = bin(id[:l * 2])
1319 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1316 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1320 nl = [n for n in nl if hex(n).startswith(id) and
1317 nl = [n for n in nl if hex(n).startswith(id) and
1321 self.hasnode(n)]
1318 self.hasnode(n)]
1322 if nullhex.startswith(id):
1319 if nullhex.startswith(id):
1323 nl.append(nullid)
1320 nl.append(nullid)
1324 if len(nl) > 0:
1321 if len(nl) > 0:
1325 if len(nl) == 1 and not maybewdir:
1322 if len(nl) == 1 and not maybewdir:
1326 self._pcache[id] = nl[0]
1323 self._pcache[id] = nl[0]
1327 return nl[0]
1324 return nl[0]
1328 raise error.AmbiguousPrefixLookupError(
1325 raise error.AmbiguousPrefixLookupError(
1329 id, self.indexfile, _('ambiguous identifier'))
1326 id, self.indexfile, _('ambiguous identifier'))
1330 if maybewdir:
1327 if maybewdir:
1331 raise error.WdirUnsupported
1328 raise error.WdirUnsupported
1332 return None
1329 return None
1333 except TypeError:
1330 except TypeError:
1334 pass
1331 pass
1335
1332
1336 def lookup(self, id):
1333 def lookup(self, id):
1337 """locate a node based on:
1334 """locate a node based on:
1338 - revision number or str(revision number)
1335 - revision number or str(revision number)
1339 - nodeid or subset of hex nodeid
1336 - nodeid or subset of hex nodeid
1340 """
1337 """
1341 n = self._match(id)
1338 n = self._match(id)
1342 if n is not None:
1339 if n is not None:
1343 return n
1340 return n
1344 n = self._partialmatch(id)
1341 n = self._partialmatch(id)
1345 if n:
1342 if n:
1346 return n
1343 return n
1347
1344
1348 raise error.LookupError(id, self.indexfile, _('no match found'))
1345 raise error.LookupError(id, self.indexfile, _('no match found'))
1349
1346
1350 def shortest(self, node, minlength=1):
1347 def shortest(self, node, minlength=1):
1351 """Find the shortest unambiguous prefix that matches node."""
1348 """Find the shortest unambiguous prefix that matches node."""
1352 def isvalid(prefix):
1349 def isvalid(prefix):
1353 try:
1350 try:
1354 node = self._partialmatch(prefix)
1351 node = self._partialmatch(prefix)
1355 except error.AmbiguousPrefixLookupError:
1352 except error.AmbiguousPrefixLookupError:
1356 return False
1353 return False
1357 except error.WdirUnsupported:
1354 except error.WdirUnsupported:
1358 # single 'ff...' match
1355 # single 'ff...' match
1359 return True
1356 return True
1360 if node is None:
1357 if node is None:
1361 raise error.LookupError(node, self.indexfile, _('no node'))
1358 raise error.LookupError(node, self.indexfile, _('no node'))
1362 return True
1359 return True
1363
1360
1364 def maybewdir(prefix):
1361 def maybewdir(prefix):
1365 return all(c == 'f' for c in pycompat.iterbytestr(prefix))
1362 return all(c == 'f' for c in pycompat.iterbytestr(prefix))
1366
1363
1367 hexnode = hex(node)
1364 hexnode = hex(node)
1368
1365
1369 def disambiguate(hexnode, minlength):
1366 def disambiguate(hexnode, minlength):
1370 """Disambiguate against wdirid."""
1367 """Disambiguate against wdirid."""
1371 for length in range(minlength, 41):
1368 for length in range(minlength, 41):
1372 prefix = hexnode[:length]
1369 prefix = hexnode[:length]
1373 if not maybewdir(prefix):
1370 if not maybewdir(prefix):
1374 return prefix
1371 return prefix
1375
1372
1376 if not getattr(self, 'filteredrevs', None):
1373 if not getattr(self, 'filteredrevs', None):
1377 try:
1374 try:
1378 length = max(self.index.shortest(node), minlength)
1375 length = max(self.index.shortest(node), minlength)
1379 return disambiguate(hexnode, length)
1376 return disambiguate(hexnode, length)
1380 except error.RevlogError:
1377 except error.RevlogError:
1381 if node != wdirid:
1378 if node != wdirid:
1382 raise error.LookupError(node, self.indexfile, _('no node'))
1379 raise error.LookupError(node, self.indexfile, _('no node'))
1383 except AttributeError:
1380 except AttributeError:
1384 # Fall through to pure code
1381 # Fall through to pure code
1385 pass
1382 pass
1386
1383
1387 if node == wdirid:
1384 if node == wdirid:
1388 for length in range(minlength, 41):
1385 for length in range(minlength, 41):
1389 prefix = hexnode[:length]
1386 prefix = hexnode[:length]
1390 if isvalid(prefix):
1387 if isvalid(prefix):
1391 return prefix
1388 return prefix
1392
1389
1393 for length in range(minlength, 41):
1390 for length in range(minlength, 41):
1394 prefix = hexnode[:length]
1391 prefix = hexnode[:length]
1395 if isvalid(prefix):
1392 if isvalid(prefix):
1396 return disambiguate(hexnode, length)
1393 return disambiguate(hexnode, length)
1397
1394
1398 def cmp(self, node, text):
1395 def cmp(self, node, text):
1399 """compare text with a given file revision
1396 """compare text with a given file revision
1400
1397
1401 returns True if text is different than what is stored.
1398 returns True if text is different than what is stored.
1402 """
1399 """
1403 p1, p2 = self.parents(node)
1400 p1, p2 = self.parents(node)
1404 return storageutil.hashrevisionsha1(text, p1, p2) != node
1401 return storageutil.hashrevisionsha1(text, p1, p2) != node
1405
1402
1406 def _cachesegment(self, offset, data):
1403 def _cachesegment(self, offset, data):
1407 """Add a segment to the revlog cache.
1404 """Add a segment to the revlog cache.
1408
1405
1409 Accepts an absolute offset and the data that is at that location.
1406 Accepts an absolute offset and the data that is at that location.
1410 """
1407 """
1411 o, d = self._chunkcache
1408 o, d = self._chunkcache
1412 # try to add to existing cache
1409 # try to add to existing cache
1413 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1410 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1414 self._chunkcache = o, d + data
1411 self._chunkcache = o, d + data
1415 else:
1412 else:
1416 self._chunkcache = offset, data
1413 self._chunkcache = offset, data
1417
1414
1418 def _readsegment(self, offset, length, df=None):
1415 def _readsegment(self, offset, length, df=None):
1419 """Load a segment of raw data from the revlog.
1416 """Load a segment of raw data from the revlog.
1420
1417
1421 Accepts an absolute offset, length to read, and an optional existing
1418 Accepts an absolute offset, length to read, and an optional existing
1422 file handle to read from.
1419 file handle to read from.
1423
1420
1424 If an existing file handle is passed, it will be seeked and the
1421 If an existing file handle is passed, it will be seeked and the
1425 original seek position will NOT be restored.
1422 original seek position will NOT be restored.
1426
1423
1427 Returns a str or buffer of raw byte data.
1424 Returns a str or buffer of raw byte data.
1428
1425
1429 Raises if the requested number of bytes could not be read.
1426 Raises if the requested number of bytes could not be read.
1430 """
1427 """
1431 # Cache data both forward and backward around the requested
1428 # Cache data both forward and backward around the requested
1432 # data, in a fixed size window. This helps speed up operations
1429 # data, in a fixed size window. This helps speed up operations
1433 # involving reading the revlog backwards.
1430 # involving reading the revlog backwards.
1434 cachesize = self._chunkcachesize
1431 cachesize = self._chunkcachesize
1435 realoffset = offset & ~(cachesize - 1)
1432 realoffset = offset & ~(cachesize - 1)
1436 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1433 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1437 - realoffset)
1434 - realoffset)
1438 with self._datareadfp(df) as df:
1435 with self._datareadfp(df) as df:
1439 df.seek(realoffset)
1436 df.seek(realoffset)
1440 d = df.read(reallength)
1437 d = df.read(reallength)
1441
1438
1442 self._cachesegment(realoffset, d)
1439 self._cachesegment(realoffset, d)
1443 if offset != realoffset or reallength != length:
1440 if offset != realoffset or reallength != length:
1444 startoffset = offset - realoffset
1441 startoffset = offset - realoffset
1445 if len(d) - startoffset < length:
1442 if len(d) - startoffset < length:
1446 raise error.RevlogError(
1443 raise error.RevlogError(
1447 _('partial read of revlog %s; expected %d bytes from '
1444 _('partial read of revlog %s; expected %d bytes from '
1448 'offset %d, got %d') %
1445 'offset %d, got %d') %
1449 (self.indexfile if self._inline else self.datafile,
1446 (self.indexfile if self._inline else self.datafile,
1450 length, realoffset, len(d) - startoffset))
1447 length, realoffset, len(d) - startoffset))
1451
1448
1452 return util.buffer(d, startoffset, length)
1449 return util.buffer(d, startoffset, length)
1453
1450
1454 if len(d) < length:
1451 if len(d) < length:
1455 raise error.RevlogError(
1452 raise error.RevlogError(
1456 _('partial read of revlog %s; expected %d bytes from offset '
1453 _('partial read of revlog %s; expected %d bytes from offset '
1457 '%d, got %d') %
1454 '%d, got %d') %
1458 (self.indexfile if self._inline else self.datafile,
1455 (self.indexfile if self._inline else self.datafile,
1459 length, offset, len(d)))
1456 length, offset, len(d)))
1460
1457
1461 return d
1458 return d
1462
1459
1463 def _getsegment(self, offset, length, df=None):
1460 def _getsegment(self, offset, length, df=None):
1464 """Obtain a segment of raw data from the revlog.
1461 """Obtain a segment of raw data from the revlog.
1465
1462
1466 Accepts an absolute offset, length of bytes to obtain, and an
1463 Accepts an absolute offset, length of bytes to obtain, and an
1467 optional file handle to the already-opened revlog. If the file
1464 optional file handle to the already-opened revlog. If the file
1468 handle is used, it's original seek position will not be preserved.
1465 handle is used, it's original seek position will not be preserved.
1469
1466
1470 Requests for data may be returned from a cache.
1467 Requests for data may be returned from a cache.
1471
1468
1472 Returns a str or a buffer instance of raw byte data.
1469 Returns a str or a buffer instance of raw byte data.
1473 """
1470 """
1474 o, d = self._chunkcache
1471 o, d = self._chunkcache
1475 l = len(d)
1472 l = len(d)
1476
1473
1477 # is it in the cache?
1474 # is it in the cache?
1478 cachestart = offset - o
1475 cachestart = offset - o
1479 cacheend = cachestart + length
1476 cacheend = cachestart + length
1480 if cachestart >= 0 and cacheend <= l:
1477 if cachestart >= 0 and cacheend <= l:
1481 if cachestart == 0 and cacheend == l:
1478 if cachestart == 0 and cacheend == l:
1482 return d # avoid a copy
1479 return d # avoid a copy
1483 return util.buffer(d, cachestart, cacheend - cachestart)
1480 return util.buffer(d, cachestart, cacheend - cachestart)
1484
1481
1485 return self._readsegment(offset, length, df=df)
1482 return self._readsegment(offset, length, df=df)
1486
1483
1487 def _getsegmentforrevs(self, startrev, endrev, df=None):
1484 def _getsegmentforrevs(self, startrev, endrev, df=None):
1488 """Obtain a segment of raw data corresponding to a range of revisions.
1485 """Obtain a segment of raw data corresponding to a range of revisions.
1489
1486
1490 Accepts the start and end revisions and an optional already-open
1487 Accepts the start and end revisions and an optional already-open
1491 file handle to be used for reading. If the file handle is read, its
1488 file handle to be used for reading. If the file handle is read, its
1492 seek position will not be preserved.
1489 seek position will not be preserved.
1493
1490
1494 Requests for data may be satisfied by a cache.
1491 Requests for data may be satisfied by a cache.
1495
1492
1496 Returns a 2-tuple of (offset, data) for the requested range of
1493 Returns a 2-tuple of (offset, data) for the requested range of
1497 revisions. Offset is the integer offset from the beginning of the
1494 revisions. Offset is the integer offset from the beginning of the
1498 revlog and data is a str or buffer of the raw byte data.
1495 revlog and data is a str or buffer of the raw byte data.
1499
1496
1500 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1497 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1501 to determine where each revision's data begins and ends.
1498 to determine where each revision's data begins and ends.
1502 """
1499 """
1503 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1500 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1504 # (functions are expensive).
1501 # (functions are expensive).
1505 index = self.index
1502 index = self.index
1506 istart = index[startrev]
1503 istart = index[startrev]
1507 start = int(istart[0] >> 16)
1504 start = int(istart[0] >> 16)
1508 if startrev == endrev:
1505 if startrev == endrev:
1509 end = start + istart[1]
1506 end = start + istart[1]
1510 else:
1507 else:
1511 iend = index[endrev]
1508 iend = index[endrev]
1512 end = int(iend[0] >> 16) + iend[1]
1509 end = int(iend[0] >> 16) + iend[1]
1513
1510
1514 if self._inline:
1511 if self._inline:
1515 start += (startrev + 1) * self._io.size
1512 start += (startrev + 1) * self._io.size
1516 end += (endrev + 1) * self._io.size
1513 end += (endrev + 1) * self._io.size
1517 length = end - start
1514 length = end - start
1518
1515
1519 return start, self._getsegment(start, length, df=df)
1516 return start, self._getsegment(start, length, df=df)
1520
1517
1521 def _chunk(self, rev, df=None):
1518 def _chunk(self, rev, df=None):
1522 """Obtain a single decompressed chunk for a revision.
1519 """Obtain a single decompressed chunk for a revision.
1523
1520
1524 Accepts an integer revision and an optional already-open file handle
1521 Accepts an integer revision and an optional already-open file handle
1525 to be used for reading. If used, the seek position of the file will not
1522 to be used for reading. If used, the seek position of the file will not
1526 be preserved.
1523 be preserved.
1527
1524
1528 Returns a str holding uncompressed data for the requested revision.
1525 Returns a str holding uncompressed data for the requested revision.
1529 """
1526 """
1530 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1527 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1531
1528
1532 def _chunks(self, revs, df=None, targetsize=None):
1529 def _chunks(self, revs, df=None, targetsize=None):
1533 """Obtain decompressed chunks for the specified revisions.
1530 """Obtain decompressed chunks for the specified revisions.
1534
1531
1535 Accepts an iterable of numeric revisions that are assumed to be in
1532 Accepts an iterable of numeric revisions that are assumed to be in
1536 ascending order. Also accepts an optional already-open file handle
1533 ascending order. Also accepts an optional already-open file handle
1537 to be used for reading. If used, the seek position of the file will
1534 to be used for reading. If used, the seek position of the file will
1538 not be preserved.
1535 not be preserved.
1539
1536
1540 This function is similar to calling ``self._chunk()`` multiple times,
1537 This function is similar to calling ``self._chunk()`` multiple times,
1541 but is faster.
1538 but is faster.
1542
1539
1543 Returns a list with decompressed data for each requested revision.
1540 Returns a list with decompressed data for each requested revision.
1544 """
1541 """
1545 if not revs:
1542 if not revs:
1546 return []
1543 return []
1547 start = self.start
1544 start = self.start
1548 length = self.length
1545 length = self.length
1549 inline = self._inline
1546 inline = self._inline
1550 iosize = self._io.size
1547 iosize = self._io.size
1551 buffer = util.buffer
1548 buffer = util.buffer
1552
1549
1553 l = []
1550 l = []
1554 ladd = l.append
1551 ladd = l.append
1555
1552
1556 if not self._withsparseread:
1553 if not self._withsparseread:
1557 slicedchunks = (revs,)
1554 slicedchunks = (revs,)
1558 else:
1555 else:
1559 slicedchunks = deltautil.slicechunk(self, revs,
1556 slicedchunks = deltautil.slicechunk(self, revs,
1560 targetsize=targetsize)
1557 targetsize=targetsize)
1561
1558
1562 for revschunk in slicedchunks:
1559 for revschunk in slicedchunks:
1563 firstrev = revschunk[0]
1560 firstrev = revschunk[0]
1564 # Skip trailing revisions with empty diff
1561 # Skip trailing revisions with empty diff
1565 for lastrev in revschunk[::-1]:
1562 for lastrev in revschunk[::-1]:
1566 if length(lastrev) != 0:
1563 if length(lastrev) != 0:
1567 break
1564 break
1568
1565
1569 try:
1566 try:
1570 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1567 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1571 except OverflowError:
1568 except OverflowError:
1572 # issue4215 - we can't cache a run of chunks greater than
1569 # issue4215 - we can't cache a run of chunks greater than
1573 # 2G on Windows
1570 # 2G on Windows
1574 return [self._chunk(rev, df=df) for rev in revschunk]
1571 return [self._chunk(rev, df=df) for rev in revschunk]
1575
1572
1576 decomp = self.decompress
1573 decomp = self.decompress
1577 for rev in revschunk:
1574 for rev in revschunk:
1578 chunkstart = start(rev)
1575 chunkstart = start(rev)
1579 if inline:
1576 if inline:
1580 chunkstart += (rev + 1) * iosize
1577 chunkstart += (rev + 1) * iosize
1581 chunklength = length(rev)
1578 chunklength = length(rev)
1582 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1579 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1583
1580
1584 return l
1581 return l
1585
1582
1586 def _chunkclear(self):
1583 def _chunkclear(self):
1587 """Clear the raw chunk cache."""
1584 """Clear the raw chunk cache."""
1588 self._chunkcache = (0, '')
1585 self._chunkcache = (0, '')
1589
1586
1590 def deltaparent(self, rev):
1587 def deltaparent(self, rev):
1591 """return deltaparent of the given revision"""
1588 """return deltaparent of the given revision"""
1592 base = self.index[rev][3]
1589 base = self.index[rev][3]
1593 if base == rev:
1590 if base == rev:
1594 return nullrev
1591 return nullrev
1595 elif self._generaldelta:
1592 elif self._generaldelta:
1596 return base
1593 return base
1597 else:
1594 else:
1598 return rev - 1
1595 return rev - 1
1599
1596
1600 def issnapshot(self, rev):
1597 def issnapshot(self, rev):
1601 """tells whether rev is a snapshot
1598 """tells whether rev is a snapshot
1602 """
1599 """
1603 if not self._sparserevlog:
1600 if not self._sparserevlog:
1604 return self.deltaparent(rev) == nullrev
1601 return self.deltaparent(rev) == nullrev
1605 elif util.safehasattr(self.index, 'issnapshot'):
1602 elif util.safehasattr(self.index, 'issnapshot'):
1606 # directly assign the method to cache the testing and access
1603 # directly assign the method to cache the testing and access
1607 self.issnapshot = self.index.issnapshot
1604 self.issnapshot = self.index.issnapshot
1608 return self.issnapshot(rev)
1605 return self.issnapshot(rev)
1609 if rev == nullrev:
1606 if rev == nullrev:
1610 return True
1607 return True
1611 entry = self.index[rev]
1608 entry = self.index[rev]
1612 base = entry[3]
1609 base = entry[3]
1613 if base == rev:
1610 if base == rev:
1614 return True
1611 return True
1615 if base == nullrev:
1612 if base == nullrev:
1616 return True
1613 return True
1617 p1 = entry[5]
1614 p1 = entry[5]
1618 p2 = entry[6]
1615 p2 = entry[6]
1619 if base == p1 or base == p2:
1616 if base == p1 or base == p2:
1620 return False
1617 return False
1621 return self.issnapshot(base)
1618 return self.issnapshot(base)
1622
1619
1623 def snapshotdepth(self, rev):
1620 def snapshotdepth(self, rev):
1624 """number of snapshot in the chain before this one"""
1621 """number of snapshot in the chain before this one"""
1625 if not self.issnapshot(rev):
1622 if not self.issnapshot(rev):
1626 raise error.ProgrammingError('revision %d not a snapshot')
1623 raise error.ProgrammingError('revision %d not a snapshot')
1627 return len(self._deltachain(rev)[0]) - 1
1624 return len(self._deltachain(rev)[0]) - 1
1628
1625
1629 def revdiff(self, rev1, rev2):
1626 def revdiff(self, rev1, rev2):
1630 """return or calculate a delta between two revisions
1627 """return or calculate a delta between two revisions
1631
1628
1632 The delta calculated is in binary form and is intended to be written to
1629 The delta calculated is in binary form and is intended to be written to
1633 revlog data directly. So this function needs raw revision data.
1630 revlog data directly. So this function needs raw revision data.
1634 """
1631 """
1635 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1632 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1636 return bytes(self._chunk(rev2))
1633 return bytes(self._chunk(rev2))
1637
1634
1638 return mdiff.textdiff(self.revision(rev1, raw=True),
1635 return mdiff.textdiff(self.revision(rev1, raw=True),
1639 self.revision(rev2, raw=True))
1636 self.revision(rev2, raw=True))
1640
1637
1641 def revision(self, nodeorrev, _df=None, raw=False):
1638 def revision(self, nodeorrev, _df=None, raw=False):
1642 """return an uncompressed revision of a given node or revision
1639 """return an uncompressed revision of a given node or revision
1643 number.
1640 number.
1644
1641
1645 _df - an existing file handle to read from. (internal-only)
1642 _df - an existing file handle to read from. (internal-only)
1646 raw - an optional argument specifying if the revision data is to be
1643 raw - an optional argument specifying if the revision data is to be
1647 treated as raw data when applying flag transforms. 'raw' should be set
1644 treated as raw data when applying flag transforms. 'raw' should be set
1648 to True when generating changegroups or in debug commands.
1645 to True when generating changegroups or in debug commands.
1649 """
1646 """
1650 if isinstance(nodeorrev, int):
1647 if isinstance(nodeorrev, int):
1651 rev = nodeorrev
1648 rev = nodeorrev
1652 node = self.node(rev)
1649 node = self.node(rev)
1653 else:
1650 else:
1654 node = nodeorrev
1651 node = nodeorrev
1655 rev = None
1652 rev = None
1656
1653
1657 cachedrev = None
1654 cachedrev = None
1658 flags = None
1655 flags = None
1659 rawtext = None
1656 rawtext = None
1660 if node == nullid:
1657 if node == nullid:
1661 return ""
1658 return ""
1662 if self._revisioncache:
1659 if self._revisioncache:
1663 if self._revisioncache[0] == node:
1660 if self._revisioncache[0] == node:
1664 # _cache only stores rawtext
1661 # _cache only stores rawtext
1665 if raw:
1662 if raw:
1666 return self._revisioncache[2]
1663 return self._revisioncache[2]
1667 # duplicated, but good for perf
1664 # duplicated, but good for perf
1668 if rev is None:
1665 if rev is None:
1669 rev = self.rev(node)
1666 rev = self.rev(node)
1670 if flags is None:
1667 if flags is None:
1671 flags = self.flags(rev)
1668 flags = self.flags(rev)
1672 # no extra flags set, no flag processor runs, text = rawtext
1669 # no extra flags set, no flag processor runs, text = rawtext
1673 if flags == REVIDX_DEFAULT_FLAGS:
1670 if flags == REVIDX_DEFAULT_FLAGS:
1674 return self._revisioncache[2]
1671 return self._revisioncache[2]
1675 # rawtext is reusable. need to run flag processor
1672 # rawtext is reusable. need to run flag processor
1676 rawtext = self._revisioncache[2]
1673 rawtext = self._revisioncache[2]
1677
1674
1678 cachedrev = self._revisioncache[1]
1675 cachedrev = self._revisioncache[1]
1679
1676
1680 # look up what we need to read
1677 # look up what we need to read
1681 if rawtext is None:
1678 if rawtext is None:
1682 if rev is None:
1679 if rev is None:
1683 rev = self.rev(node)
1680 rev = self.rev(node)
1684
1681
1685 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1682 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1686 if stopped:
1683 if stopped:
1687 rawtext = self._revisioncache[2]
1684 rawtext = self._revisioncache[2]
1688
1685
1689 # drop cache to save memory
1686 # drop cache to save memory
1690 self._revisioncache = None
1687 self._revisioncache = None
1691
1688
1692 targetsize = None
1689 targetsize = None
1693 rawsize = self.index[rev][2]
1690 rawsize = self.index[rev][2]
1694 if 0 <= rawsize:
1691 if 0 <= rawsize:
1695 targetsize = 4 * rawsize
1692 targetsize = 4 * rawsize
1696
1693
1697 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1694 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1698 if rawtext is None:
1695 if rawtext is None:
1699 rawtext = bytes(bins[0])
1696 rawtext = bytes(bins[0])
1700 bins = bins[1:]
1697 bins = bins[1:]
1701
1698
1702 rawtext = mdiff.patches(rawtext, bins)
1699 rawtext = mdiff.patches(rawtext, bins)
1703 self._revisioncache = (node, rev, rawtext)
1700 self._revisioncache = (node, rev, rawtext)
1704
1701
1705 if flags is None:
1702 if flags is None:
1706 if rev is None:
1703 if rev is None:
1707 rev = self.rev(node)
1704 rev = self.rev(node)
1708 flags = self.flags(rev)
1705 flags = self.flags(rev)
1709
1706
1710 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1707 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1711 if validatehash:
1708 if validatehash:
1712 self.checkhash(text, node, rev=rev)
1709 self.checkhash(text, node, rev=rev)
1713
1710
1714 return text
1711 return text
1715
1712
1716 def hash(self, text, p1, p2):
1713 def hash(self, text, p1, p2):
1717 """Compute a node hash.
1714 """Compute a node hash.
1718
1715
1719 Available as a function so that subclasses can replace the hash
1716 Available as a function so that subclasses can replace the hash
1720 as needed.
1717 as needed.
1721 """
1718 """
1722 return storageutil.hashrevisionsha1(text, p1, p2)
1719 return storageutil.hashrevisionsha1(text, p1, p2)
1723
1720
1724 def _processflags(self, text, flags, operation, raw=False):
1721 def _processflags(self, text, flags, operation, raw=False):
1725 """Inspect revision data flags and applies transforms defined by
1722 """Inspect revision data flags and applies transforms defined by
1726 registered flag processors.
1723 registered flag processors.
1727
1724
1728 ``text`` - the revision data to process
1725 ``text`` - the revision data to process
1729 ``flags`` - the revision flags
1726 ``flags`` - the revision flags
1730 ``operation`` - the operation being performed (read or write)
1727 ``operation`` - the operation being performed (read or write)
1731 ``raw`` - an optional argument describing if the raw transform should be
1728 ``raw`` - an optional argument describing if the raw transform should be
1732 applied.
1729 applied.
1733
1730
1734 This method processes the flags in the order (or reverse order if
1731 This method processes the flags in the order (or reverse order if
1735 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1732 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1736 flag processors registered for present flags. The order of flags defined
1733 flag processors registered for present flags. The order of flags defined
1737 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1734 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1738
1735
1739 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1736 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1740 processed text and ``validatehash`` is a bool indicating whether the
1737 processed text and ``validatehash`` is a bool indicating whether the
1741 returned text should be checked for hash integrity.
1738 returned text should be checked for hash integrity.
1742
1739
1743 Note: If the ``raw`` argument is set, it has precedence over the
1740 Note: If the ``raw`` argument is set, it has precedence over the
1744 operation and will only update the value of ``validatehash``.
1741 operation and will only update the value of ``validatehash``.
1745 """
1742 """
1746 # fast path: no flag processors will run
1743 # fast path: no flag processors will run
1747 if flags == 0:
1744 if flags == 0:
1748 return text, True
1745 return text, True
1749 if not operation in ('read', 'write'):
1746 if not operation in ('read', 'write'):
1750 raise error.ProgrammingError(_("invalid '%s' operation") %
1747 raise error.ProgrammingError(_("invalid '%s' operation") %
1751 operation)
1748 operation)
1752 # Check all flags are known.
1749 # Check all flags are known.
1753 if flags & ~REVIDX_KNOWN_FLAGS:
1750 if flags & ~REVIDX_KNOWN_FLAGS:
1754 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1751 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1755 (flags & ~REVIDX_KNOWN_FLAGS))
1752 (flags & ~REVIDX_KNOWN_FLAGS))
1756 validatehash = True
1753 validatehash = True
1757 # Depending on the operation (read or write), the order might be
1754 # Depending on the operation (read or write), the order might be
1758 # reversed due to non-commutative transforms.
1755 # reversed due to non-commutative transforms.
1759 orderedflags = REVIDX_FLAGS_ORDER
1756 orderedflags = REVIDX_FLAGS_ORDER
1760 if operation == 'write':
1757 if operation == 'write':
1761 orderedflags = reversed(orderedflags)
1758 orderedflags = reversed(orderedflags)
1762
1759
1763 for flag in orderedflags:
1760 for flag in orderedflags:
1764 # If a flagprocessor has been registered for a known flag, apply the
1761 # If a flagprocessor has been registered for a known flag, apply the
1765 # related operation transform and update result tuple.
1762 # related operation transform and update result tuple.
1766 if flag & flags:
1763 if flag & flags:
1767 vhash = True
1764 vhash = True
1768
1765
1769 if flag not in self._flagprocessors:
1766 if flag not in self._flagprocessors:
1770 message = _("missing processor for flag '%#x'") % (flag)
1767 message = _("missing processor for flag '%#x'") % (flag)
1771 raise error.RevlogError(message)
1768 raise error.RevlogError(message)
1772
1769
1773 processor = self._flagprocessors[flag]
1770 processor = self._flagprocessors[flag]
1774 if processor is not None:
1771 if processor is not None:
1775 readtransform, writetransform, rawtransform = processor
1772 readtransform, writetransform, rawtransform = processor
1776
1773
1777 if raw:
1774 if raw:
1778 vhash = rawtransform(self, text)
1775 vhash = rawtransform(self, text)
1779 elif operation == 'read':
1776 elif operation == 'read':
1780 text, vhash = readtransform(self, text)
1777 text, vhash = readtransform(self, text)
1781 else: # write operation
1778 else: # write operation
1782 text, vhash = writetransform(self, text)
1779 text, vhash = writetransform(self, text)
1783 validatehash = validatehash and vhash
1780 validatehash = validatehash and vhash
1784
1781
1785 return text, validatehash
1782 return text, validatehash
1786
1783
1787 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1784 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1788 """Check node hash integrity.
1785 """Check node hash integrity.
1789
1786
1790 Available as a function so that subclasses can extend hash mismatch
1787 Available as a function so that subclasses can extend hash mismatch
1791 behaviors as needed.
1788 behaviors as needed.
1792 """
1789 """
1793 try:
1790 try:
1794 if p1 is None and p2 is None:
1791 if p1 is None and p2 is None:
1795 p1, p2 = self.parents(node)
1792 p1, p2 = self.parents(node)
1796 if node != self.hash(text, p1, p2):
1793 if node != self.hash(text, p1, p2):
1797 # Clear the revision cache on hash failure. The revision cache
1794 # Clear the revision cache on hash failure. The revision cache
1798 # only stores the raw revision and clearing the cache does have
1795 # only stores the raw revision and clearing the cache does have
1799 # the side-effect that we won't have a cache hit when the raw
1796 # the side-effect that we won't have a cache hit when the raw
1800 # revision data is accessed. But this case should be rare and
1797 # revision data is accessed. But this case should be rare and
1801 # it is extra work to teach the cache about the hash
1798 # it is extra work to teach the cache about the hash
1802 # verification state.
1799 # verification state.
1803 if self._revisioncache and self._revisioncache[0] == node:
1800 if self._revisioncache and self._revisioncache[0] == node:
1804 self._revisioncache = None
1801 self._revisioncache = None
1805
1802
1806 revornode = rev
1803 revornode = rev
1807 if revornode is None:
1804 if revornode is None:
1808 revornode = templatefilters.short(hex(node))
1805 revornode = templatefilters.short(hex(node))
1809 raise error.RevlogError(_("integrity check failed on %s:%s")
1806 raise error.RevlogError(_("integrity check failed on %s:%s")
1810 % (self.indexfile, pycompat.bytestr(revornode)))
1807 % (self.indexfile, pycompat.bytestr(revornode)))
1811 except error.RevlogError:
1808 except error.RevlogError:
1812 if self._censorable and storageutil.iscensoredtext(text):
1809 if self._censorable and storageutil.iscensoredtext(text):
1813 raise error.CensoredNodeError(self.indexfile, node, text)
1810 raise error.CensoredNodeError(self.indexfile, node, text)
1814 raise
1811 raise
1815
1812
1816 def _enforceinlinesize(self, tr, fp=None):
1813 def _enforceinlinesize(self, tr, fp=None):
1817 """Check if the revlog is too big for inline and convert if so.
1814 """Check if the revlog is too big for inline and convert if so.
1818
1815
1819 This should be called after revisions are added to the revlog. If the
1816 This should be called after revisions are added to the revlog. If the
1820 revlog has grown too large to be an inline revlog, it will convert it
1817 revlog has grown too large to be an inline revlog, it will convert it
1821 to use multiple index and data files.
1818 to use multiple index and data files.
1822 """
1819 """
1823 tiprev = len(self) - 1
1820 tiprev = len(self) - 1
1824 if (not self._inline or
1821 if (not self._inline or
1825 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1822 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1826 return
1823 return
1827
1824
1828 trinfo = tr.find(self.indexfile)
1825 trinfo = tr.find(self.indexfile)
1829 if trinfo is None:
1826 if trinfo is None:
1830 raise error.RevlogError(_("%s not found in the transaction")
1827 raise error.RevlogError(_("%s not found in the transaction")
1831 % self.indexfile)
1828 % self.indexfile)
1832
1829
1833 trindex = trinfo[2]
1830 trindex = trinfo[2]
1834 if trindex is not None:
1831 if trindex is not None:
1835 dataoff = self.start(trindex)
1832 dataoff = self.start(trindex)
1836 else:
1833 else:
1837 # revlog was stripped at start of transaction, use all leftover data
1834 # revlog was stripped at start of transaction, use all leftover data
1838 trindex = len(self) - 1
1835 trindex = len(self) - 1
1839 dataoff = self.end(tiprev)
1836 dataoff = self.end(tiprev)
1840
1837
1841 tr.add(self.datafile, dataoff)
1838 tr.add(self.datafile, dataoff)
1842
1839
1843 if fp:
1840 if fp:
1844 fp.flush()
1841 fp.flush()
1845 fp.close()
1842 fp.close()
1846 # We can't use the cached file handle after close(). So prevent
1843 # We can't use the cached file handle after close(). So prevent
1847 # its usage.
1844 # its usage.
1848 self._writinghandles = None
1845 self._writinghandles = None
1849
1846
1850 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1847 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1851 for r in self:
1848 for r in self:
1852 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1849 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1853
1850
1854 with self._indexfp('w') as fp:
1851 with self._indexfp('w') as fp:
1855 self.version &= ~FLAG_INLINE_DATA
1852 self.version &= ~FLAG_INLINE_DATA
1856 self._inline = False
1853 self._inline = False
1857 io = self._io
1854 io = self._io
1858 for i in self:
1855 for i in self:
1859 e = io.packentry(self.index[i], self.node, self.version, i)
1856 e = io.packentry(self.index[i], self.node, self.version, i)
1860 fp.write(e)
1857 fp.write(e)
1861
1858
1862 # the temp file replace the real index when we exit the context
1859 # the temp file replace the real index when we exit the context
1863 # manager
1860 # manager
1864
1861
1865 tr.replace(self.indexfile, trindex * self._io.size)
1862 tr.replace(self.indexfile, trindex * self._io.size)
1866 self._chunkclear()
1863 self._chunkclear()
1867
1864
1868 def _nodeduplicatecallback(self, transaction, node):
1865 def _nodeduplicatecallback(self, transaction, node):
1869 """called when trying to add a node already stored.
1866 """called when trying to add a node already stored.
1870 """
1867 """
1871
1868
1872 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1869 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1873 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1870 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1874 """add a revision to the log
1871 """add a revision to the log
1875
1872
1876 text - the revision data to add
1873 text - the revision data to add
1877 transaction - the transaction object used for rollback
1874 transaction - the transaction object used for rollback
1878 link - the linkrev data to add
1875 link - the linkrev data to add
1879 p1, p2 - the parent nodeids of the revision
1876 p1, p2 - the parent nodeids of the revision
1880 cachedelta - an optional precomputed delta
1877 cachedelta - an optional precomputed delta
1881 node - nodeid of revision; typically node is not specified, and it is
1878 node - nodeid of revision; typically node is not specified, and it is
1882 computed by default as hash(text, p1, p2), however subclasses might
1879 computed by default as hash(text, p1, p2), however subclasses might
1883 use different hashing method (and override checkhash() in such case)
1880 use different hashing method (and override checkhash() in such case)
1884 flags - the known flags to set on the revision
1881 flags - the known flags to set on the revision
1885 deltacomputer - an optional deltacomputer instance shared between
1882 deltacomputer - an optional deltacomputer instance shared between
1886 multiple calls
1883 multiple calls
1887 """
1884 """
1888 if link == nullrev:
1885 if link == nullrev:
1889 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1886 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1890 % self.indexfile)
1887 % self.indexfile)
1891
1888
1892 if flags:
1889 if flags:
1893 node = node or self.hash(text, p1, p2)
1890 node = node or self.hash(text, p1, p2)
1894
1891
1895 rawtext, validatehash = self._processflags(text, flags, 'write')
1892 rawtext, validatehash = self._processflags(text, flags, 'write')
1896
1893
1897 # If the flag processor modifies the revision data, ignore any provided
1894 # If the flag processor modifies the revision data, ignore any provided
1898 # cachedelta.
1895 # cachedelta.
1899 if rawtext != text:
1896 if rawtext != text:
1900 cachedelta = None
1897 cachedelta = None
1901
1898
1902 if len(rawtext) > _maxentrysize:
1899 if len(rawtext) > _maxentrysize:
1903 raise error.RevlogError(
1900 raise error.RevlogError(
1904 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1901 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1905 % (self.indexfile, len(rawtext)))
1902 % (self.indexfile, len(rawtext)))
1906
1903
1907 node = node or self.hash(rawtext, p1, p2)
1904 node = node or self.hash(rawtext, p1, p2)
1908 if node in self.nodemap:
1905 if node in self.nodemap:
1909 return node
1906 return node
1910
1907
1911 if validatehash:
1908 if validatehash:
1912 self.checkhash(rawtext, node, p1=p1, p2=p2)
1909 self.checkhash(rawtext, node, p1=p1, p2=p2)
1913
1910
1914 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1911 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1915 flags, cachedelta=cachedelta,
1912 flags, cachedelta=cachedelta,
1916 deltacomputer=deltacomputer)
1913 deltacomputer=deltacomputer)
1917
1914
1918 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1915 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1919 cachedelta=None, deltacomputer=None):
1916 cachedelta=None, deltacomputer=None):
1920 """add a raw revision with known flags, node and parents
1917 """add a raw revision with known flags, node and parents
1921 useful when reusing a revision not stored in this revlog (ex: received
1918 useful when reusing a revision not stored in this revlog (ex: received
1922 over wire, or read from an external bundle).
1919 over wire, or read from an external bundle).
1923 """
1920 """
1924 dfh = None
1921 dfh = None
1925 if not self._inline:
1922 if not self._inline:
1926 dfh = self._datafp("a+")
1923 dfh = self._datafp("a+")
1927 ifh = self._indexfp("a+")
1924 ifh = self._indexfp("a+")
1928 try:
1925 try:
1929 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1926 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1930 flags, cachedelta, ifh, dfh,
1927 flags, cachedelta, ifh, dfh,
1931 deltacomputer=deltacomputer)
1928 deltacomputer=deltacomputer)
1932 finally:
1929 finally:
1933 if dfh:
1930 if dfh:
1934 dfh.close()
1931 dfh.close()
1935 ifh.close()
1932 ifh.close()
1936
1933
1937 def compress(self, data):
1934 def compress(self, data):
1938 """Generate a possibly-compressed representation of data."""
1935 """Generate a possibly-compressed representation of data."""
1939 if not data:
1936 if not data:
1940 return '', data
1937 return '', data
1941
1938
1942 compressed = self._compressor.compress(data)
1939 compressed = self._compressor.compress(data)
1943
1940
1944 if compressed:
1941 if compressed:
1945 # The revlog compressor added the header in the returned data.
1942 # The revlog compressor added the header in the returned data.
1946 return '', compressed
1943 return '', compressed
1947
1944
1948 if data[0:1] == '\0':
1945 if data[0:1] == '\0':
1949 return '', data
1946 return '', data
1950 return 'u', data
1947 return 'u', data
1951
1948
1952 def decompress(self, data):
1949 def decompress(self, data):
1953 """Decompress a revlog chunk.
1950 """Decompress a revlog chunk.
1954
1951
1955 The chunk is expected to begin with a header identifying the
1952 The chunk is expected to begin with a header identifying the
1956 format type so it can be routed to an appropriate decompressor.
1953 format type so it can be routed to an appropriate decompressor.
1957 """
1954 """
1958 if not data:
1955 if not data:
1959 return data
1956 return data
1960
1957
1961 # Revlogs are read much more frequently than they are written and many
1958 # Revlogs are read much more frequently than they are written and many
1962 # chunks only take microseconds to decompress, so performance is
1959 # chunks only take microseconds to decompress, so performance is
1963 # important here.
1960 # important here.
1964 #
1961 #
1965 # We can make a few assumptions about revlogs:
1962 # We can make a few assumptions about revlogs:
1966 #
1963 #
1967 # 1) the majority of chunks will be compressed (as opposed to inline
1964 # 1) the majority of chunks will be compressed (as opposed to inline
1968 # raw data).
1965 # raw data).
1969 # 2) decompressing *any* data will likely by at least 10x slower than
1966 # 2) decompressing *any* data will likely by at least 10x slower than
1970 # returning raw inline data.
1967 # returning raw inline data.
1971 # 3) we want to prioritize common and officially supported compression
1968 # 3) we want to prioritize common and officially supported compression
1972 # engines
1969 # engines
1973 #
1970 #
1974 # It follows that we want to optimize for "decompress compressed data
1971 # It follows that we want to optimize for "decompress compressed data
1975 # when encoded with common and officially supported compression engines"
1972 # when encoded with common and officially supported compression engines"
1976 # case over "raw data" and "data encoded by less common or non-official
1973 # case over "raw data" and "data encoded by less common or non-official
1977 # compression engines." That is why we have the inline lookup first
1974 # compression engines." That is why we have the inline lookup first
1978 # followed by the compengines lookup.
1975 # followed by the compengines lookup.
1979 #
1976 #
1980 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1977 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1981 # compressed chunks. And this matters for changelog and manifest reads.
1978 # compressed chunks. And this matters for changelog and manifest reads.
1982 t = data[0:1]
1979 t = data[0:1]
1983
1980
1984 if t == 'x':
1981 if t == 'x':
1985 try:
1982 try:
1986 return _zlibdecompress(data)
1983 return _zlibdecompress(data)
1987 except zlib.error as e:
1984 except zlib.error as e:
1988 raise error.RevlogError(_('revlog decompress error: %s') %
1985 raise error.RevlogError(_('revlog decompress error: %s') %
1989 stringutil.forcebytestr(e))
1986 stringutil.forcebytestr(e))
1990 # '\0' is more common than 'u' so it goes first.
1987 # '\0' is more common than 'u' so it goes first.
1991 elif t == '\0':
1988 elif t == '\0':
1992 return data
1989 return data
1993 elif t == 'u':
1990 elif t == 'u':
1994 return util.buffer(data, 1)
1991 return util.buffer(data, 1)
1995
1992
1996 try:
1993 try:
1997 compressor = self._decompressors[t]
1994 compressor = self._decompressors[t]
1998 except KeyError:
1995 except KeyError:
1999 try:
1996 try:
2000 engine = util.compengines.forrevlogheader(t)
1997 engine = util.compengines.forrevlogheader(t)
2001 compressor = engine.revlogcompressor(self._compengineopts)
1998 compressor = engine.revlogcompressor(self._compengineopts)
2002 self._decompressors[t] = compressor
1999 self._decompressors[t] = compressor
2003 except KeyError:
2000 except KeyError:
2004 raise error.RevlogError(_('unknown compression type %r') % t)
2001 raise error.RevlogError(_('unknown compression type %r') % t)
2005
2002
2006 return compressor.decompress(data)
2003 return compressor.decompress(data)
2007
2004
2008 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2005 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2009 cachedelta, ifh, dfh, alwayscache=False,
2006 cachedelta, ifh, dfh, alwayscache=False,
2010 deltacomputer=None):
2007 deltacomputer=None):
2011 """internal function to add revisions to the log
2008 """internal function to add revisions to the log
2012
2009
2013 see addrevision for argument descriptions.
2010 see addrevision for argument descriptions.
2014
2011
2015 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2012 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2016
2013
2017 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2014 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2018 be used.
2015 be used.
2019
2016
2020 invariants:
2017 invariants:
2021 - rawtext is optional (can be None); if not set, cachedelta must be set.
2018 - rawtext is optional (can be None); if not set, cachedelta must be set.
2022 if both are set, they must correspond to each other.
2019 if both are set, they must correspond to each other.
2023 """
2020 """
2024 if node == nullid:
2021 if node == nullid:
2025 raise error.RevlogError(_("%s: attempt to add null revision") %
2022 raise error.RevlogError(_("%s: attempt to add null revision") %
2026 self.indexfile)
2023 self.indexfile)
2027 if node == wdirid or node in wdirfilenodeids:
2024 if node == wdirid or node in wdirfilenodeids:
2028 raise error.RevlogError(_("%s: attempt to add wdir revision") %
2025 raise error.RevlogError(_("%s: attempt to add wdir revision") %
2029 self.indexfile)
2026 self.indexfile)
2030
2027
2031 if self._inline:
2028 if self._inline:
2032 fh = ifh
2029 fh = ifh
2033 else:
2030 else:
2034 fh = dfh
2031 fh = dfh
2035
2032
2036 btext = [rawtext]
2033 btext = [rawtext]
2037
2034
2038 curr = len(self)
2035 curr = len(self)
2039 prev = curr - 1
2036 prev = curr - 1
2040 offset = self.end(prev)
2037 offset = self.end(prev)
2041 p1r, p2r = self.rev(p1), self.rev(p2)
2038 p1r, p2r = self.rev(p1), self.rev(p2)
2042
2039
2043 # full versions are inserted when the needed deltas
2040 # full versions are inserted when the needed deltas
2044 # become comparable to the uncompressed text
2041 # become comparable to the uncompressed text
2045 if rawtext is None:
2042 if rawtext is None:
2046 # need rawtext size, before changed by flag processors, which is
2043 # need rawtext size, before changed by flag processors, which is
2047 # the non-raw size. use revlog explicitly to avoid filelog's extra
2044 # the non-raw size. use revlog explicitly to avoid filelog's extra
2048 # logic that might remove metadata size.
2045 # logic that might remove metadata size.
2049 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2046 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2050 cachedelta[1])
2047 cachedelta[1])
2051 else:
2048 else:
2052 textlen = len(rawtext)
2049 textlen = len(rawtext)
2053
2050
2054 if deltacomputer is None:
2051 if deltacomputer is None:
2055 deltacomputer = deltautil.deltacomputer(self)
2052 deltacomputer = deltautil.deltacomputer(self)
2056
2053
2057 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2054 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2058
2055
2059 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2056 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2060
2057
2061 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2058 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2062 deltainfo.base, link, p1r, p2r, node)
2059 deltainfo.base, link, p1r, p2r, node)
2063 self.index.append(e)
2060 self.index.append(e)
2064 self.nodemap[node] = curr
2061 self.nodemap[node] = curr
2065
2062
2066 # Reset the pure node cache start lookup offset to account for new
2063 # Reset the pure node cache start lookup offset to account for new
2067 # revision.
2064 # revision.
2068 if self._nodepos is not None:
2065 if self._nodepos is not None:
2069 self._nodepos = curr
2066 self._nodepos = curr
2070
2067
2071 entry = self._io.packentry(e, self.node, self.version, curr)
2068 entry = self._io.packentry(e, self.node, self.version, curr)
2072 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2069 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2073 link, offset)
2070 link, offset)
2074
2071
2075 rawtext = btext[0]
2072 rawtext = btext[0]
2076
2073
2077 if alwayscache and rawtext is None:
2074 if alwayscache and rawtext is None:
2078 rawtext = deltacomputer.buildtext(revinfo, fh)
2075 rawtext = deltacomputer.buildtext(revinfo, fh)
2079
2076
2080 if type(rawtext) == bytes: # only accept immutable objects
2077 if type(rawtext) == bytes: # only accept immutable objects
2081 self._revisioncache = (node, curr, rawtext)
2078 self._revisioncache = (node, curr, rawtext)
2082 self._chainbasecache[curr] = deltainfo.chainbase
2079 self._chainbasecache[curr] = deltainfo.chainbase
2083 return node
2080 return node
2084
2081
2085 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2082 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2086 # Files opened in a+ mode have inconsistent behavior on various
2083 # Files opened in a+ mode have inconsistent behavior on various
2087 # platforms. Windows requires that a file positioning call be made
2084 # platforms. Windows requires that a file positioning call be made
2088 # when the file handle transitions between reads and writes. See
2085 # when the file handle transitions between reads and writes. See
2089 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2086 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2090 # platforms, Python or the platform itself can be buggy. Some versions
2087 # platforms, Python or the platform itself can be buggy. Some versions
2091 # of Solaris have been observed to not append at the end of the file
2088 # of Solaris have been observed to not append at the end of the file
2092 # if the file was seeked to before the end. See issue4943 for more.
2089 # if the file was seeked to before the end. See issue4943 for more.
2093 #
2090 #
2094 # We work around this issue by inserting a seek() before writing.
2091 # We work around this issue by inserting a seek() before writing.
2095 # Note: This is likely not necessary on Python 3. However, because
2092 # Note: This is likely not necessary on Python 3. However, because
2096 # the file handle is reused for reads and may be seeked there, we need
2093 # the file handle is reused for reads and may be seeked there, we need
2097 # to be careful before changing this.
2094 # to be careful before changing this.
2098 ifh.seek(0, os.SEEK_END)
2095 ifh.seek(0, os.SEEK_END)
2099 if dfh:
2096 if dfh:
2100 dfh.seek(0, os.SEEK_END)
2097 dfh.seek(0, os.SEEK_END)
2101
2098
2102 curr = len(self) - 1
2099 curr = len(self) - 1
2103 if not self._inline:
2100 if not self._inline:
2104 transaction.add(self.datafile, offset)
2101 transaction.add(self.datafile, offset)
2105 transaction.add(self.indexfile, curr * len(entry))
2102 transaction.add(self.indexfile, curr * len(entry))
2106 if data[0]:
2103 if data[0]:
2107 dfh.write(data[0])
2104 dfh.write(data[0])
2108 dfh.write(data[1])
2105 dfh.write(data[1])
2109 ifh.write(entry)
2106 ifh.write(entry)
2110 else:
2107 else:
2111 offset += curr * self._io.size
2108 offset += curr * self._io.size
2112 transaction.add(self.indexfile, offset, curr)
2109 transaction.add(self.indexfile, offset, curr)
2113 ifh.write(entry)
2110 ifh.write(entry)
2114 ifh.write(data[0])
2111 ifh.write(data[0])
2115 ifh.write(data[1])
2112 ifh.write(data[1])
2116 self._enforceinlinesize(transaction, ifh)
2113 self._enforceinlinesize(transaction, ifh)
2117
2114
2118 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2115 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2119 """
2116 """
2120 add a delta group
2117 add a delta group
2121
2118
2122 given a set of deltas, add them to the revision log. the
2119 given a set of deltas, add them to the revision log. the
2123 first delta is against its parent, which should be in our
2120 first delta is against its parent, which should be in our
2124 log, the rest are against the previous delta.
2121 log, the rest are against the previous delta.
2125
2122
2126 If ``addrevisioncb`` is defined, it will be called with arguments of
2123 If ``addrevisioncb`` is defined, it will be called with arguments of
2127 this revlog and the node that was added.
2124 this revlog and the node that was added.
2128 """
2125 """
2129
2126
2130 if self._writinghandles:
2127 if self._writinghandles:
2131 raise error.ProgrammingError('cannot nest addgroup() calls')
2128 raise error.ProgrammingError('cannot nest addgroup() calls')
2132
2129
2133 nodes = []
2130 nodes = []
2134
2131
2135 r = len(self)
2132 r = len(self)
2136 end = 0
2133 end = 0
2137 if r:
2134 if r:
2138 end = self.end(r - 1)
2135 end = self.end(r - 1)
2139 ifh = self._indexfp("a+")
2136 ifh = self._indexfp("a+")
2140 isize = r * self._io.size
2137 isize = r * self._io.size
2141 if self._inline:
2138 if self._inline:
2142 transaction.add(self.indexfile, end + isize, r)
2139 transaction.add(self.indexfile, end + isize, r)
2143 dfh = None
2140 dfh = None
2144 else:
2141 else:
2145 transaction.add(self.indexfile, isize, r)
2142 transaction.add(self.indexfile, isize, r)
2146 transaction.add(self.datafile, end)
2143 transaction.add(self.datafile, end)
2147 dfh = self._datafp("a+")
2144 dfh = self._datafp("a+")
2148 def flush():
2145 def flush():
2149 if dfh:
2146 if dfh:
2150 dfh.flush()
2147 dfh.flush()
2151 ifh.flush()
2148 ifh.flush()
2152
2149
2153 self._writinghandles = (ifh, dfh)
2150 self._writinghandles = (ifh, dfh)
2154
2151
2155 try:
2152 try:
2156 deltacomputer = deltautil.deltacomputer(self)
2153 deltacomputer = deltautil.deltacomputer(self)
2157 # loop through our set of deltas
2154 # loop through our set of deltas
2158 for data in deltas:
2155 for data in deltas:
2159 node, p1, p2, linknode, deltabase, delta, flags = data
2156 node, p1, p2, linknode, deltabase, delta, flags = data
2160 link = linkmapper(linknode)
2157 link = linkmapper(linknode)
2161 flags = flags or REVIDX_DEFAULT_FLAGS
2158 flags = flags or REVIDX_DEFAULT_FLAGS
2162
2159
2163 nodes.append(node)
2160 nodes.append(node)
2164
2161
2165 if node in self.nodemap:
2162 if node in self.nodemap:
2166 self._nodeduplicatecallback(transaction, node)
2163 self._nodeduplicatecallback(transaction, node)
2167 # this can happen if two branches make the same change
2164 # this can happen if two branches make the same change
2168 continue
2165 continue
2169
2166
2170 for p in (p1, p2):
2167 for p in (p1, p2):
2171 if p not in self.nodemap:
2168 if p not in self.nodemap:
2172 raise error.LookupError(p, self.indexfile,
2169 raise error.LookupError(p, self.indexfile,
2173 _('unknown parent'))
2170 _('unknown parent'))
2174
2171
2175 if deltabase not in self.nodemap:
2172 if deltabase not in self.nodemap:
2176 raise error.LookupError(deltabase, self.indexfile,
2173 raise error.LookupError(deltabase, self.indexfile,
2177 _('unknown delta base'))
2174 _('unknown delta base'))
2178
2175
2179 baserev = self.rev(deltabase)
2176 baserev = self.rev(deltabase)
2180
2177
2181 if baserev != nullrev and self.iscensored(baserev):
2178 if baserev != nullrev and self.iscensored(baserev):
2182 # if base is censored, delta must be full replacement in a
2179 # if base is censored, delta must be full replacement in a
2183 # single patch operation
2180 # single patch operation
2184 hlen = struct.calcsize(">lll")
2181 hlen = struct.calcsize(">lll")
2185 oldlen = self.rawsize(baserev)
2182 oldlen = self.rawsize(baserev)
2186 newlen = len(delta) - hlen
2183 newlen = len(delta) - hlen
2187 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2184 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2188 raise error.CensoredBaseError(self.indexfile,
2185 raise error.CensoredBaseError(self.indexfile,
2189 self.node(baserev))
2186 self.node(baserev))
2190
2187
2191 if not flags and self._peek_iscensored(baserev, delta, flush):
2188 if not flags and self._peek_iscensored(baserev, delta, flush):
2192 flags |= REVIDX_ISCENSORED
2189 flags |= REVIDX_ISCENSORED
2193
2190
2194 # We assume consumers of addrevisioncb will want to retrieve
2191 # We assume consumers of addrevisioncb will want to retrieve
2195 # the added revision, which will require a call to
2192 # the added revision, which will require a call to
2196 # revision(). revision() will fast path if there is a cache
2193 # revision(). revision() will fast path if there is a cache
2197 # hit. So, we tell _addrevision() to always cache in this case.
2194 # hit. So, we tell _addrevision() to always cache in this case.
2198 # We're only using addgroup() in the context of changegroup
2195 # We're only using addgroup() in the context of changegroup
2199 # generation so the revision data can always be handled as raw
2196 # generation so the revision data can always be handled as raw
2200 # by the flagprocessor.
2197 # by the flagprocessor.
2201 self._addrevision(node, None, transaction, link,
2198 self._addrevision(node, None, transaction, link,
2202 p1, p2, flags, (baserev, delta),
2199 p1, p2, flags, (baserev, delta),
2203 ifh, dfh,
2200 ifh, dfh,
2204 alwayscache=bool(addrevisioncb),
2201 alwayscache=bool(addrevisioncb),
2205 deltacomputer=deltacomputer)
2202 deltacomputer=deltacomputer)
2206
2203
2207 if addrevisioncb:
2204 if addrevisioncb:
2208 addrevisioncb(self, node)
2205 addrevisioncb(self, node)
2209
2206
2210 if not dfh and not self._inline:
2207 if not dfh and not self._inline:
2211 # addrevision switched from inline to conventional
2208 # addrevision switched from inline to conventional
2212 # reopen the index
2209 # reopen the index
2213 ifh.close()
2210 ifh.close()
2214 dfh = self._datafp("a+")
2211 dfh = self._datafp("a+")
2215 ifh = self._indexfp("a+")
2212 ifh = self._indexfp("a+")
2216 self._writinghandles = (ifh, dfh)
2213 self._writinghandles = (ifh, dfh)
2217 finally:
2214 finally:
2218 self._writinghandles = None
2215 self._writinghandles = None
2219
2216
2220 if dfh:
2217 if dfh:
2221 dfh.close()
2218 dfh.close()
2222 ifh.close()
2219 ifh.close()
2223
2220
2224 return nodes
2221 return nodes
2225
2222
2226 def iscensored(self, rev):
2223 def iscensored(self, rev):
2227 """Check if a file revision is censored."""
2224 """Check if a file revision is censored."""
2228 if not self._censorable:
2225 if not self._censorable:
2229 return False
2226 return False
2230
2227
2231 return self.flags(rev) & REVIDX_ISCENSORED
2228 return self.flags(rev) & REVIDX_ISCENSORED
2232
2229
2233 def _peek_iscensored(self, baserev, delta, flush):
2230 def _peek_iscensored(self, baserev, delta, flush):
2234 """Quickly check if a delta produces a censored revision."""
2231 """Quickly check if a delta produces a censored revision."""
2235 if not self._censorable:
2232 if not self._censorable:
2236 return False
2233 return False
2237
2234
2238 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2235 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2239
2236
2240 def getstrippoint(self, minlink):
2237 def getstrippoint(self, minlink):
2241 """find the minimum rev that must be stripped to strip the linkrev
2238 """find the minimum rev that must be stripped to strip the linkrev
2242
2239
2243 Returns a tuple containing the minimum rev and a set of all revs that
2240 Returns a tuple containing the minimum rev and a set of all revs that
2244 have linkrevs that will be broken by this strip.
2241 have linkrevs that will be broken by this strip.
2245 """
2242 """
2246 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2243 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2247 self.headrevs(),
2244 self.headrevs(),
2248 self.linkrev, self.parentrevs)
2245 self.linkrev, self.parentrevs)
2249
2246
2250 def strip(self, minlink, transaction):
2247 def strip(self, minlink, transaction):
2251 """truncate the revlog on the first revision with a linkrev >= minlink
2248 """truncate the revlog on the first revision with a linkrev >= minlink
2252
2249
2253 This function is called when we're stripping revision minlink and
2250 This function is called when we're stripping revision minlink and
2254 its descendants from the repository.
2251 its descendants from the repository.
2255
2252
2256 We have to remove all revisions with linkrev >= minlink, because
2253 We have to remove all revisions with linkrev >= minlink, because
2257 the equivalent changelog revisions will be renumbered after the
2254 the equivalent changelog revisions will be renumbered after the
2258 strip.
2255 strip.
2259
2256
2260 So we truncate the revlog on the first of these revisions, and
2257 So we truncate the revlog on the first of these revisions, and
2261 trust that the caller has saved the revisions that shouldn't be
2258 trust that the caller has saved the revisions that shouldn't be
2262 removed and that it'll re-add them after this truncation.
2259 removed and that it'll re-add them after this truncation.
2263 """
2260 """
2264 if len(self) == 0:
2261 if len(self) == 0:
2265 return
2262 return
2266
2263
2267 rev, _ = self.getstrippoint(minlink)
2264 rev, _ = self.getstrippoint(minlink)
2268 if rev == len(self):
2265 if rev == len(self):
2269 return
2266 return
2270
2267
2271 # first truncate the files on disk
2268 # first truncate the files on disk
2272 end = self.start(rev)
2269 end = self.start(rev)
2273 if not self._inline:
2270 if not self._inline:
2274 transaction.add(self.datafile, end)
2271 transaction.add(self.datafile, end)
2275 end = rev * self._io.size
2272 end = rev * self._io.size
2276 else:
2273 else:
2277 end += rev * self._io.size
2274 end += rev * self._io.size
2278
2275
2279 transaction.add(self.indexfile, end)
2276 transaction.add(self.indexfile, end)
2280
2277
2281 # then reset internal state in memory to forget those revisions
2278 # then reset internal state in memory to forget those revisions
2282 self._revisioncache = None
2279 self._revisioncache = None
2283 self._chaininfocache = {}
2280 self._chaininfocache = {}
2284 self._chunkclear()
2281 self._chunkclear()
2285 for x in pycompat.xrange(rev, len(self)):
2282 for x in pycompat.xrange(rev, len(self)):
2286 del self.nodemap[self.node(x)]
2283 del self.nodemap[self.node(x)]
2287
2284
2288 del self.index[rev:-1]
2285 del self.index[rev:-1]
2289 self._nodepos = None
2286 self._nodepos = None
2290
2287
2291 def checksize(self):
2288 def checksize(self):
2292 """Check size of index and data files
2289 """Check size of index and data files
2293
2290
2294 return a (dd, di) tuple.
2291 return a (dd, di) tuple.
2295 - dd: extra bytes for the "data" file
2292 - dd: extra bytes for the "data" file
2296 - di: extra bytes for the "index" file
2293 - di: extra bytes for the "index" file
2297
2294
2298 A healthy revlog will return (0, 0).
2295 A healthy revlog will return (0, 0).
2299 """
2296 """
2300 expected = 0
2297 expected = 0
2301 if len(self):
2298 if len(self):
2302 expected = max(0, self.end(len(self) - 1))
2299 expected = max(0, self.end(len(self) - 1))
2303
2300
2304 try:
2301 try:
2305 with self._datafp() as f:
2302 with self._datafp() as f:
2306 f.seek(0, 2)
2303 f.seek(0, 2)
2307 actual = f.tell()
2304 actual = f.tell()
2308 dd = actual - expected
2305 dd = actual - expected
2309 except IOError as inst:
2306 except IOError as inst:
2310 if inst.errno != errno.ENOENT:
2307 if inst.errno != errno.ENOENT:
2311 raise
2308 raise
2312 dd = 0
2309 dd = 0
2313
2310
2314 try:
2311 try:
2315 f = self.opener(self.indexfile)
2312 f = self.opener(self.indexfile)
2316 f.seek(0, 2)
2313 f.seek(0, 2)
2317 actual = f.tell()
2314 actual = f.tell()
2318 f.close()
2315 f.close()
2319 s = self._io.size
2316 s = self._io.size
2320 i = max(0, actual // s)
2317 i = max(0, actual // s)
2321 di = actual - (i * s)
2318 di = actual - (i * s)
2322 if self._inline:
2319 if self._inline:
2323 databytes = 0
2320 databytes = 0
2324 for r in self:
2321 for r in self:
2325 databytes += max(0, self.length(r))
2322 databytes += max(0, self.length(r))
2326 dd = 0
2323 dd = 0
2327 di = actual - len(self) * s - databytes
2324 di = actual - len(self) * s - databytes
2328 except IOError as inst:
2325 except IOError as inst:
2329 if inst.errno != errno.ENOENT:
2326 if inst.errno != errno.ENOENT:
2330 raise
2327 raise
2331 di = 0
2328 di = 0
2332
2329
2333 return (dd, di)
2330 return (dd, di)
2334
2331
2335 def files(self):
2332 def files(self):
2336 res = [self.indexfile]
2333 res = [self.indexfile]
2337 if not self._inline:
2334 if not self._inline:
2338 res.append(self.datafile)
2335 res.append(self.datafile)
2339 return res
2336 return res
2340
2337
2341 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2338 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2342 assumehaveparentrevisions=False,
2339 assumehaveparentrevisions=False,
2343 deltamode=repository.CG_DELTAMODE_STD):
2340 deltamode=repository.CG_DELTAMODE_STD):
2344 if nodesorder not in ('nodes', 'storage', 'linear', None):
2341 if nodesorder not in ('nodes', 'storage', 'linear', None):
2345 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2342 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2346 nodesorder)
2343 nodesorder)
2347
2344
2348 if nodesorder is None and not self._generaldelta:
2345 if nodesorder is None and not self._generaldelta:
2349 nodesorder = 'storage'
2346 nodesorder = 'storage'
2350
2347
2351 if (not self._storedeltachains and
2348 if (not self._storedeltachains and
2352 deltamode != repository.CG_DELTAMODE_PREV):
2349 deltamode != repository.CG_DELTAMODE_PREV):
2353 deltamode = repository.CG_DELTAMODE_FULL
2350 deltamode = repository.CG_DELTAMODE_FULL
2354
2351
2355 return storageutil.emitrevisions(
2352 return storageutil.emitrevisions(
2356 self, nodes, nodesorder, revlogrevisiondelta,
2353 self, nodes, nodesorder, revlogrevisiondelta,
2357 deltaparentfn=self.deltaparent,
2354 deltaparentfn=self.deltaparent,
2358 candeltafn=self.candelta,
2355 candeltafn=self.candelta,
2359 rawsizefn=self.rawsize,
2356 rawsizefn=self.rawsize,
2360 revdifffn=self.revdiff,
2357 revdifffn=self.revdiff,
2361 flagsfn=self.flags,
2358 flagsfn=self.flags,
2362 deltamode=deltamode,
2359 deltamode=deltamode,
2363 revisiondata=revisiondata,
2360 revisiondata=revisiondata,
2364 assumehaveparentrevisions=assumehaveparentrevisions)
2361 assumehaveparentrevisions=assumehaveparentrevisions)
2365
2362
2366 DELTAREUSEALWAYS = 'always'
2363 DELTAREUSEALWAYS = 'always'
2367 DELTAREUSESAMEREVS = 'samerevs'
2364 DELTAREUSESAMEREVS = 'samerevs'
2368 DELTAREUSENEVER = 'never'
2365 DELTAREUSENEVER = 'never'
2369
2366
2370 DELTAREUSEFULLADD = 'fulladd'
2367 DELTAREUSEFULLADD = 'fulladd'
2371
2368
2372 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2369 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2373
2370
2374 def clone(self, tr, destrevlog, addrevisioncb=None,
2371 def clone(self, tr, destrevlog, addrevisioncb=None,
2375 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2372 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2376 """Copy this revlog to another, possibly with format changes.
2373 """Copy this revlog to another, possibly with format changes.
2377
2374
2378 The destination revlog will contain the same revisions and nodes.
2375 The destination revlog will contain the same revisions and nodes.
2379 However, it may not be bit-for-bit identical due to e.g. delta encoding
2376 However, it may not be bit-for-bit identical due to e.g. delta encoding
2380 differences.
2377 differences.
2381
2378
2382 The ``deltareuse`` argument control how deltas from the existing revlog
2379 The ``deltareuse`` argument control how deltas from the existing revlog
2383 are preserved in the destination revlog. The argument can have the
2380 are preserved in the destination revlog. The argument can have the
2384 following values:
2381 following values:
2385
2382
2386 DELTAREUSEALWAYS
2383 DELTAREUSEALWAYS
2387 Deltas will always be reused (if possible), even if the destination
2384 Deltas will always be reused (if possible), even if the destination
2388 revlog would not select the same revisions for the delta. This is the
2385 revlog would not select the same revisions for the delta. This is the
2389 fastest mode of operation.
2386 fastest mode of operation.
2390 DELTAREUSESAMEREVS
2387 DELTAREUSESAMEREVS
2391 Deltas will be reused if the destination revlog would pick the same
2388 Deltas will be reused if the destination revlog would pick the same
2392 revisions for the delta. This mode strikes a balance between speed
2389 revisions for the delta. This mode strikes a balance between speed
2393 and optimization.
2390 and optimization.
2394 DELTAREUSENEVER
2391 DELTAREUSENEVER
2395 Deltas will never be reused. This is the slowest mode of execution.
2392 Deltas will never be reused. This is the slowest mode of execution.
2396 This mode can be used to recompute deltas (e.g. if the diff/delta
2393 This mode can be used to recompute deltas (e.g. if the diff/delta
2397 algorithm changes).
2394 algorithm changes).
2398
2395
2399 Delta computation can be slow, so the choice of delta reuse policy can
2396 Delta computation can be slow, so the choice of delta reuse policy can
2400 significantly affect run time.
2397 significantly affect run time.
2401
2398
2402 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2399 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2403 two extremes. Deltas will be reused if they are appropriate. But if the
2400 two extremes. Deltas will be reused if they are appropriate. But if the
2404 delta could choose a better revision, it will do so. This means if you
2401 delta could choose a better revision, it will do so. This means if you
2405 are converting a non-generaldelta revlog to a generaldelta revlog,
2402 are converting a non-generaldelta revlog to a generaldelta revlog,
2406 deltas will be recomputed if the delta's parent isn't a parent of the
2403 deltas will be recomputed if the delta's parent isn't a parent of the
2407 revision.
2404 revision.
2408
2405
2409 In addition to the delta policy, the ``forcedeltabothparents``
2406 In addition to the delta policy, the ``forcedeltabothparents``
2410 argument controls whether to force compute deltas against both parents
2407 argument controls whether to force compute deltas against both parents
2411 for merges. By default, the current default is used.
2408 for merges. By default, the current default is used.
2412 """
2409 """
2413 if deltareuse not in self.DELTAREUSEALL:
2410 if deltareuse not in self.DELTAREUSEALL:
2414 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2411 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2415
2412
2416 if len(destrevlog):
2413 if len(destrevlog):
2417 raise ValueError(_('destination revlog is not empty'))
2414 raise ValueError(_('destination revlog is not empty'))
2418
2415
2419 if getattr(self, 'filteredrevs', None):
2416 if getattr(self, 'filteredrevs', None):
2420 raise ValueError(_('source revlog has filtered revisions'))
2417 raise ValueError(_('source revlog has filtered revisions'))
2421 if getattr(destrevlog, 'filteredrevs', None):
2418 if getattr(destrevlog, 'filteredrevs', None):
2422 raise ValueError(_('destination revlog has filtered revisions'))
2419 raise ValueError(_('destination revlog has filtered revisions'))
2423
2420
2424 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2421 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2425 # if possible.
2422 # if possible.
2426 oldlazydelta = destrevlog._lazydelta
2423 oldlazydelta = destrevlog._lazydelta
2427 oldlazydeltabase = destrevlog._lazydeltabase
2424 oldlazydeltabase = destrevlog._lazydeltabase
2428 oldamd = destrevlog._deltabothparents
2425 oldamd = destrevlog._deltabothparents
2429
2426
2430 try:
2427 try:
2431 if deltareuse == self.DELTAREUSEALWAYS:
2428 if deltareuse == self.DELTAREUSEALWAYS:
2432 destrevlog._lazydeltabase = True
2429 destrevlog._lazydeltabase = True
2433 destrevlog._lazydelta = True
2430 destrevlog._lazydelta = True
2434 elif deltareuse == self.DELTAREUSESAMEREVS:
2431 elif deltareuse == self.DELTAREUSESAMEREVS:
2435 destrevlog._lazydeltabase = False
2432 destrevlog._lazydeltabase = False
2436 destrevlog._lazydelta = True
2433 destrevlog._lazydelta = True
2437 elif deltareuse == self.DELTAREUSENEVER:
2434 elif deltareuse == self.DELTAREUSENEVER:
2438 destrevlog._lazydeltabase = False
2435 destrevlog._lazydeltabase = False
2439 destrevlog._lazydelta = False
2436 destrevlog._lazydelta = False
2440
2437
2441 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2438 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2442
2439
2443 deltacomputer = deltautil.deltacomputer(destrevlog)
2440 deltacomputer = deltautil.deltacomputer(destrevlog)
2444 index = self.index
2441 index = self.index
2445 for rev in self:
2442 for rev in self:
2446 entry = index[rev]
2443 entry = index[rev]
2447
2444
2448 # Some classes override linkrev to take filtered revs into
2445 # Some classes override linkrev to take filtered revs into
2449 # account. Use raw entry from index.
2446 # account. Use raw entry from index.
2450 flags = entry[0] & 0xffff
2447 flags = entry[0] & 0xffff
2451 linkrev = entry[4]
2448 linkrev = entry[4]
2452 p1 = index[entry[5]][7]
2449 p1 = index[entry[5]][7]
2453 p2 = index[entry[6]][7]
2450 p2 = index[entry[6]][7]
2454 node = entry[7]
2451 node = entry[7]
2455
2452
2456 # (Possibly) reuse the delta from the revlog if allowed and
2453 # (Possibly) reuse the delta from the revlog if allowed and
2457 # the revlog chunk is a delta.
2454 # the revlog chunk is a delta.
2458 cachedelta = None
2455 cachedelta = None
2459 rawtext = None
2456 rawtext = None
2460 if destrevlog._lazydelta:
2457 if destrevlog._lazydelta:
2461 dp = self.deltaparent(rev)
2458 dp = self.deltaparent(rev)
2462 if dp != nullrev:
2459 if dp != nullrev:
2463 cachedelta = (dp, bytes(self._chunk(rev)))
2460 cachedelta = (dp, bytes(self._chunk(rev)))
2464
2461
2465 if not cachedelta:
2462 if not cachedelta:
2466 rawtext = self.revision(rev, raw=True)
2463 rawtext = self.revision(rev, raw=True)
2467
2464
2468
2465
2469 if deltareuse == self.DELTAREUSEFULLADD:
2466 if deltareuse == self.DELTAREUSEFULLADD:
2470 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2467 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2471 cachedelta=cachedelta,
2468 cachedelta=cachedelta,
2472 node=node, flags=flags,
2469 node=node, flags=flags,
2473 deltacomputer=deltacomputer)
2470 deltacomputer=deltacomputer)
2474 else:
2471 else:
2475 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2472 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2476 checkambig=False)
2473 checkambig=False)
2477 dfh = None
2474 dfh = None
2478 if not destrevlog._inline:
2475 if not destrevlog._inline:
2479 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2476 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2480 try:
2477 try:
2481 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2478 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2482 p2, flags, cachedelta, ifh, dfh,
2479 p2, flags, cachedelta, ifh, dfh,
2483 deltacomputer=deltacomputer)
2480 deltacomputer=deltacomputer)
2484 finally:
2481 finally:
2485 if dfh:
2482 if dfh:
2486 dfh.close()
2483 dfh.close()
2487 ifh.close()
2484 ifh.close()
2488
2485
2489 if addrevisioncb:
2486 if addrevisioncb:
2490 addrevisioncb(self, rev, node)
2487 addrevisioncb(self, rev, node)
2491 finally:
2488 finally:
2492 destrevlog._lazydelta = oldlazydelta
2489 destrevlog._lazydelta = oldlazydelta
2493 destrevlog._lazydeltabase = oldlazydeltabase
2490 destrevlog._lazydeltabase = oldlazydeltabase
2494 destrevlog._deltabothparents = oldamd
2491 destrevlog._deltabothparents = oldamd
2495
2492
2496 def censorrevision(self, tr, censornode, tombstone=b''):
2493 def censorrevision(self, tr, censornode, tombstone=b''):
2497 if (self.version & 0xFFFF) == REVLOGV0:
2494 if (self.version & 0xFFFF) == REVLOGV0:
2498 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2495 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2499 self.version)
2496 self.version)
2500
2497
2501 censorrev = self.rev(censornode)
2498 censorrev = self.rev(censornode)
2502 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2499 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2503
2500
2504 if len(tombstone) > self.rawsize(censorrev):
2501 if len(tombstone) > self.rawsize(censorrev):
2505 raise error.Abort(_('censor tombstone must be no longer than '
2502 raise error.Abort(_('censor tombstone must be no longer than '
2506 'censored data'))
2503 'censored data'))
2507
2504
2508 # Rewriting the revlog in place is hard. Our strategy for censoring is
2505 # Rewriting the revlog in place is hard. Our strategy for censoring is
2509 # to create a new revlog, copy all revisions to it, then replace the
2506 # to create a new revlog, copy all revisions to it, then replace the
2510 # revlogs on transaction close.
2507 # revlogs on transaction close.
2511
2508
2512 newindexfile = self.indexfile + b'.tmpcensored'
2509 newindexfile = self.indexfile + b'.tmpcensored'
2513 newdatafile = self.datafile + b'.tmpcensored'
2510 newdatafile = self.datafile + b'.tmpcensored'
2514
2511
2515 # This is a bit dangerous. We could easily have a mismatch of state.
2512 # This is a bit dangerous. We could easily have a mismatch of state.
2516 newrl = revlog(self.opener, newindexfile, newdatafile,
2513 newrl = revlog(self.opener, newindexfile, newdatafile,
2517 censorable=True)
2514 censorable=True)
2518 newrl.version = self.version
2515 newrl.version = self.version
2519 newrl._generaldelta = self._generaldelta
2516 newrl._generaldelta = self._generaldelta
2520 newrl._io = self._io
2517 newrl._io = self._io
2521
2518
2522 for rev in self.revs():
2519 for rev in self.revs():
2523 node = self.node(rev)
2520 node = self.node(rev)
2524 p1, p2 = self.parents(node)
2521 p1, p2 = self.parents(node)
2525
2522
2526 if rev == censorrev:
2523 if rev == censorrev:
2527 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2524 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2528 p1, p2, censornode, REVIDX_ISCENSORED)
2525 p1, p2, censornode, REVIDX_ISCENSORED)
2529
2526
2530 if newrl.deltaparent(rev) != nullrev:
2527 if newrl.deltaparent(rev) != nullrev:
2531 raise error.Abort(_('censored revision stored as delta; '
2528 raise error.Abort(_('censored revision stored as delta; '
2532 'cannot censor'),
2529 'cannot censor'),
2533 hint=_('censoring of revlogs is not '
2530 hint=_('censoring of revlogs is not '
2534 'fully implemented; please report '
2531 'fully implemented; please report '
2535 'this bug'))
2532 'this bug'))
2536 continue
2533 continue
2537
2534
2538 if self.iscensored(rev):
2535 if self.iscensored(rev):
2539 if self.deltaparent(rev) != nullrev:
2536 if self.deltaparent(rev) != nullrev:
2540 raise error.Abort(_('cannot censor due to censored '
2537 raise error.Abort(_('cannot censor due to censored '
2541 'revision having delta stored'))
2538 'revision having delta stored'))
2542 rawtext = self._chunk(rev)
2539 rawtext = self._chunk(rev)
2543 else:
2540 else:
2544 rawtext = self.revision(rev, raw=True)
2541 rawtext = self.revision(rev, raw=True)
2545
2542
2546 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2543 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2547 self.flags(rev))
2544 self.flags(rev))
2548
2545
2549 tr.addbackup(self.indexfile, location='store')
2546 tr.addbackup(self.indexfile, location='store')
2550 if not self._inline:
2547 if not self._inline:
2551 tr.addbackup(self.datafile, location='store')
2548 tr.addbackup(self.datafile, location='store')
2552
2549
2553 self.opener.rename(newrl.indexfile, self.indexfile)
2550 self.opener.rename(newrl.indexfile, self.indexfile)
2554 if not self._inline:
2551 if not self._inline:
2555 self.opener.rename(newrl.datafile, self.datafile)
2552 self.opener.rename(newrl.datafile, self.datafile)
2556
2553
2557 self.clearcaches()
2554 self.clearcaches()
2558 self._loadindex()
2555 self._loadindex()
2559
2556
2560 def verifyintegrity(self, state):
2557 def verifyintegrity(self, state):
2561 """Verifies the integrity of the revlog.
2558 """Verifies the integrity of the revlog.
2562
2559
2563 Yields ``revlogproblem`` instances describing problems that are
2560 Yields ``revlogproblem`` instances describing problems that are
2564 found.
2561 found.
2565 """
2562 """
2566 dd, di = self.checksize()
2563 dd, di = self.checksize()
2567 if dd:
2564 if dd:
2568 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2565 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2569 if di:
2566 if di:
2570 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2567 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2571
2568
2572 version = self.version & 0xFFFF
2569 version = self.version & 0xFFFF
2573
2570
2574 # The verifier tells us what version revlog we should be.
2571 # The verifier tells us what version revlog we should be.
2575 if version != state['expectedversion']:
2572 if version != state['expectedversion']:
2576 yield revlogproblem(
2573 yield revlogproblem(
2577 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2574 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2578 (self.indexfile, version, state['expectedversion']))
2575 (self.indexfile, version, state['expectedversion']))
2579
2576
2580 state['skipread'] = set()
2577 state['skipread'] = set()
2581
2578
2582 for rev in self:
2579 for rev in self:
2583 node = self.node(rev)
2580 node = self.node(rev)
2584
2581
2585 # Verify contents. 4 cases to care about:
2582 # Verify contents. 4 cases to care about:
2586 #
2583 #
2587 # common: the most common case
2584 # common: the most common case
2588 # rename: with a rename
2585 # rename: with a rename
2589 # meta: file content starts with b'\1\n', the metadata
2586 # meta: file content starts with b'\1\n', the metadata
2590 # header defined in filelog.py, but without a rename
2587 # header defined in filelog.py, but without a rename
2591 # ext: content stored externally
2588 # ext: content stored externally
2592 #
2589 #
2593 # More formally, their differences are shown below:
2590 # More formally, their differences are shown below:
2594 #
2591 #
2595 # | common | rename | meta | ext
2592 # | common | rename | meta | ext
2596 # -------------------------------------------------------
2593 # -------------------------------------------------------
2597 # flags() | 0 | 0 | 0 | not 0
2594 # flags() | 0 | 0 | 0 | not 0
2598 # renamed() | False | True | False | ?
2595 # renamed() | False | True | False | ?
2599 # rawtext[0:2]=='\1\n'| False | True | True | ?
2596 # rawtext[0:2]=='\1\n'| False | True | True | ?
2600 #
2597 #
2601 # "rawtext" means the raw text stored in revlog data, which
2598 # "rawtext" means the raw text stored in revlog data, which
2602 # could be retrieved by "revision(rev, raw=True)". "text"
2599 # could be retrieved by "revision(rev, raw=True)". "text"
2603 # mentioned below is "revision(rev, raw=False)".
2600 # mentioned below is "revision(rev, raw=False)".
2604 #
2601 #
2605 # There are 3 different lengths stored physically:
2602 # There are 3 different lengths stored physically:
2606 # 1. L1: rawsize, stored in revlog index
2603 # 1. L1: rawsize, stored in revlog index
2607 # 2. L2: len(rawtext), stored in revlog data
2604 # 2. L2: len(rawtext), stored in revlog data
2608 # 3. L3: len(text), stored in revlog data if flags==0, or
2605 # 3. L3: len(text), stored in revlog data if flags==0, or
2609 # possibly somewhere else if flags!=0
2606 # possibly somewhere else if flags!=0
2610 #
2607 #
2611 # L1 should be equal to L2. L3 could be different from them.
2608 # L1 should be equal to L2. L3 could be different from them.
2612 # "text" may or may not affect commit hash depending on flag
2609 # "text" may or may not affect commit hash depending on flag
2613 # processors (see revlog.addflagprocessor).
2610 # processors (see revlog.addflagprocessor).
2614 #
2611 #
2615 # | common | rename | meta | ext
2612 # | common | rename | meta | ext
2616 # -------------------------------------------------
2613 # -------------------------------------------------
2617 # rawsize() | L1 | L1 | L1 | L1
2614 # rawsize() | L1 | L1 | L1 | L1
2618 # size() | L1 | L2-LM | L1(*) | L1 (?)
2615 # size() | L1 | L2-LM | L1(*) | L1 (?)
2619 # len(rawtext) | L2 | L2 | L2 | L2
2616 # len(rawtext) | L2 | L2 | L2 | L2
2620 # len(text) | L2 | L2 | L2 | L3
2617 # len(text) | L2 | L2 | L2 | L3
2621 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2618 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2622 #
2619 #
2623 # LM: length of metadata, depending on rawtext
2620 # LM: length of metadata, depending on rawtext
2624 # (*): not ideal, see comment in filelog.size
2621 # (*): not ideal, see comment in filelog.size
2625 # (?): could be "- len(meta)" if the resolved content has
2622 # (?): could be "- len(meta)" if the resolved content has
2626 # rename metadata
2623 # rename metadata
2627 #
2624 #
2628 # Checks needed to be done:
2625 # Checks needed to be done:
2629 # 1. length check: L1 == L2, in all cases.
2626 # 1. length check: L1 == L2, in all cases.
2630 # 2. hash check: depending on flag processor, we may need to
2627 # 2. hash check: depending on flag processor, we may need to
2631 # use either "text" (external), or "rawtext" (in revlog).
2628 # use either "text" (external), or "rawtext" (in revlog).
2632
2629
2633 try:
2630 try:
2634 skipflags = state.get('skipflags', 0)
2631 skipflags = state.get('skipflags', 0)
2635 if skipflags:
2632 if skipflags:
2636 skipflags &= self.flags(rev)
2633 skipflags &= self.flags(rev)
2637
2634
2638 if skipflags:
2635 if skipflags:
2639 state['skipread'].add(node)
2636 state['skipread'].add(node)
2640 else:
2637 else:
2641 # Side-effect: read content and verify hash.
2638 # Side-effect: read content and verify hash.
2642 self.revision(node)
2639 self.revision(node)
2643
2640
2644 l1 = self.rawsize(rev)
2641 l1 = self.rawsize(rev)
2645 l2 = len(self.revision(node, raw=True))
2642 l2 = len(self.revision(node, raw=True))
2646
2643
2647 if l1 != l2:
2644 if l1 != l2:
2648 yield revlogproblem(
2645 yield revlogproblem(
2649 error=_('unpacked size is %d, %d expected') % (l2, l1),
2646 error=_('unpacked size is %d, %d expected') % (l2, l1),
2650 node=node)
2647 node=node)
2651
2648
2652 except error.CensoredNodeError:
2649 except error.CensoredNodeError:
2653 if state['erroroncensored']:
2650 if state['erroroncensored']:
2654 yield revlogproblem(error=_('censored file data'),
2651 yield revlogproblem(error=_('censored file data'),
2655 node=node)
2652 node=node)
2656 state['skipread'].add(node)
2653 state['skipread'].add(node)
2657 except Exception as e:
2654 except Exception as e:
2658 yield revlogproblem(
2655 yield revlogproblem(
2659 error=_('unpacking %s: %s') % (short(node),
2656 error=_('unpacking %s: %s') % (short(node),
2660 stringutil.forcebytestr(e)),
2657 stringutil.forcebytestr(e)),
2661 node=node)
2658 node=node)
2662 state['skipread'].add(node)
2659 state['skipread'].add(node)
2663
2660
2664 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2661 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2665 revisionscount=False, trackedsize=False,
2662 revisionscount=False, trackedsize=False,
2666 storedsize=False):
2663 storedsize=False):
2667 d = {}
2664 d = {}
2668
2665
2669 if exclusivefiles:
2666 if exclusivefiles:
2670 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2667 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2671 if not self._inline:
2668 if not self._inline:
2672 d['exclusivefiles'].append((self.opener, self.datafile))
2669 d['exclusivefiles'].append((self.opener, self.datafile))
2673
2670
2674 if sharedfiles:
2671 if sharedfiles:
2675 d['sharedfiles'] = []
2672 d['sharedfiles'] = []
2676
2673
2677 if revisionscount:
2674 if revisionscount:
2678 d['revisionscount'] = len(self)
2675 d['revisionscount'] = len(self)
2679
2676
2680 if trackedsize:
2677 if trackedsize:
2681 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2678 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2682
2679
2683 if storedsize:
2680 if storedsize:
2684 d['storedsize'] = sum(self.opener.stat(path).st_size
2681 d['storedsize'] = sum(self.opener.stat(path).st_size
2685 for path in self.files())
2682 for path in self.files())
2686
2683
2687 return d
2684 return d
General Comments 0
You need to be logged in to leave comments. Login now