##// END OF EJS Templates
rust: using policy.importrust from Python callers...
Georges Racinet -
r42645:f834ee28 default draft
parent child Browse files
Show More
@@ -1,1521 +1,1506
1 1 # dirstate.py - working directory tracking for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import contextlib
12 12 import errno
13 13 import os
14 14 import stat
15 15
16 16 from .i18n import _
17 17 from .node import nullid
18 18 from . import (
19 19 encoding,
20 20 error,
21 21 match as matchmod,
22 22 pathutil,
23 23 policy,
24 24 pycompat,
25 25 scmutil,
26 26 txnutil,
27 27 util,
28 28 )
29 29
30 try:
31 from . import rustext
32 rustext.__name__ # force actual import (see hgdemandimport)
33 except ImportError:
34 rustext = None
35
36 30 parsers = policy.importmod(r'parsers')
31 dirstatemod = policy.importrust(r'dirstate', default=parsers)
37 32
38 33 propertycache = util.propertycache
39 34 filecache = scmutil.filecache
40 35 _rangemask = 0x7fffffff
41 36
42 37 dirstatetuple = parsers.dirstatetuple
43 38
44 39 class repocache(filecache):
45 40 """filecache for files in .hg/"""
46 41 def join(self, obj, fname):
47 42 return obj._opener.join(fname)
48 43
49 44 class rootcache(filecache):
50 45 """filecache for files in the repository root"""
51 46 def join(self, obj, fname):
52 47 return obj._join(fname)
53 48
54 49 def _getfsnow(vfs):
55 50 '''Get "now" timestamp on filesystem'''
56 51 tmpfd, tmpname = vfs.mkstemp()
57 52 try:
58 53 return os.fstat(tmpfd)[stat.ST_MTIME]
59 54 finally:
60 55 os.close(tmpfd)
61 56 vfs.unlink(tmpname)
62 57
63 58 class dirstate(object):
64 59
65 60 def __init__(self, opener, ui, root, validate, sparsematchfn):
66 61 '''Create a new dirstate object.
67 62
68 63 opener is an open()-like callable that can be used to open the
69 64 dirstate file; root is the root of the directory tracked by
70 65 the dirstate.
71 66 '''
72 67 self._opener = opener
73 68 self._validate = validate
74 69 self._root = root
75 70 self._sparsematchfn = sparsematchfn
76 71 # ntpath.join(root, '') of Python 2.7.9 does not add sep if root is
77 72 # UNC path pointing to root share (issue4557)
78 73 self._rootdir = pathutil.normasprefix(root)
79 74 self._dirty = False
80 75 self._lastnormaltime = 0
81 76 self._ui = ui
82 77 self._filecache = {}
83 78 self._parentwriters = 0
84 79 self._filename = 'dirstate'
85 80 self._pendingfilename = '%s.pending' % self._filename
86 81 self._plchangecallbacks = {}
87 82 self._origpl = None
88 83 self._updatedfiles = set()
89 84 self._mapcls = dirstatemap
90 85 # Access and cache cwd early, so we don't access it for the first time
91 86 # after a working-copy update caused it to not exist (accessing it then
92 87 # raises an exception).
93 88 self._cwd
94 89
95 90 @contextlib.contextmanager
96 91 def parentchange(self):
97 92 '''Context manager for handling dirstate parents.
98 93
99 94 If an exception occurs in the scope of the context manager,
100 95 the incoherent dirstate won't be written when wlock is
101 96 released.
102 97 '''
103 98 self._parentwriters += 1
104 99 yield
105 100 # Typically we want the "undo" step of a context manager in a
106 101 # finally block so it happens even when an exception
107 102 # occurs. In this case, however, we only want to decrement
108 103 # parentwriters if the code in the with statement exits
109 104 # normally, so we don't have a try/finally here on purpose.
110 105 self._parentwriters -= 1
111 106
112 107 def pendingparentchange(self):
113 108 '''Returns true if the dirstate is in the middle of a set of changes
114 109 that modify the dirstate parent.
115 110 '''
116 111 return self._parentwriters > 0
117 112
118 113 @propertycache
119 114 def _map(self):
120 115 """Return the dirstate contents (see documentation for dirstatemap)."""
121 116 self._map = self._mapcls(self._ui, self._opener, self._root)
122 117 return self._map
123 118
124 119 @property
125 120 def _sparsematcher(self):
126 121 """The matcher for the sparse checkout.
127 122
128 123 The working directory may not include every file from a manifest. The
129 124 matcher obtained by this property will match a path if it is to be
130 125 included in the working directory.
131 126 """
132 127 # TODO there is potential to cache this property. For now, the matcher
133 128 # is resolved on every access. (But the called function does use a
134 129 # cache to keep the lookup fast.)
135 130 return self._sparsematchfn()
136 131
137 132 @repocache('branch')
138 133 def _branch(self):
139 134 try:
140 135 return self._opener.read("branch").strip() or "default"
141 136 except IOError as inst:
142 137 if inst.errno != errno.ENOENT:
143 138 raise
144 139 return "default"
145 140
146 141 @property
147 142 def _pl(self):
148 143 return self._map.parents()
149 144
150 145 def hasdir(self, d):
151 146 return self._map.hastrackeddir(d)
152 147
153 148 @rootcache('.hgignore')
154 149 def _ignore(self):
155 150 files = self._ignorefiles()
156 151 if not files:
157 152 return matchmod.never()
158 153
159 154 pats = ['include:%s' % f for f in files]
160 155 return matchmod.match(self._root, '', [], pats, warn=self._ui.warn)
161 156
162 157 @propertycache
163 158 def _slash(self):
164 159 return self._ui.configbool('ui', 'slash') and pycompat.ossep != '/'
165 160
166 161 @propertycache
167 162 def _checklink(self):
168 163 return util.checklink(self._root)
169 164
170 165 @propertycache
171 166 def _checkexec(self):
172 167 return util.checkexec(self._root)
173 168
174 169 @propertycache
175 170 def _checkcase(self):
176 171 return not util.fscasesensitive(self._join('.hg'))
177 172
178 173 def _join(self, f):
179 174 # much faster than os.path.join()
180 175 # it's safe because f is always a relative path
181 176 return self._rootdir + f
182 177
183 178 def flagfunc(self, buildfallback):
184 179 if self._checklink and self._checkexec:
185 180 def f(x):
186 181 try:
187 182 st = os.lstat(self._join(x))
188 183 if util.statislink(st):
189 184 return 'l'
190 185 if util.statisexec(st):
191 186 return 'x'
192 187 except OSError:
193 188 pass
194 189 return ''
195 190 return f
196 191
197 192 fallback = buildfallback()
198 193 if self._checklink:
199 194 def f(x):
200 195 if os.path.islink(self._join(x)):
201 196 return 'l'
202 197 if 'x' in fallback(x):
203 198 return 'x'
204 199 return ''
205 200 return f
206 201 if self._checkexec:
207 202 def f(x):
208 203 if 'l' in fallback(x):
209 204 return 'l'
210 205 if util.isexec(self._join(x)):
211 206 return 'x'
212 207 return ''
213 208 return f
214 209 else:
215 210 return fallback
216 211
217 212 @propertycache
218 213 def _cwd(self):
219 214 # internal config: ui.forcecwd
220 215 forcecwd = self._ui.config('ui', 'forcecwd')
221 216 if forcecwd:
222 217 return forcecwd
223 218 return encoding.getcwd()
224 219
225 220 def getcwd(self):
226 221 '''Return the path from which a canonical path is calculated.
227 222
228 223 This path should be used to resolve file patterns or to convert
229 224 canonical paths back to file paths for display. It shouldn't be
230 225 used to get real file paths. Use vfs functions instead.
231 226 '''
232 227 cwd = self._cwd
233 228 if cwd == self._root:
234 229 return ''
235 230 # self._root ends with a path separator if self._root is '/' or 'C:\'
236 231 rootsep = self._root
237 232 if not util.endswithsep(rootsep):
238 233 rootsep += pycompat.ossep
239 234 if cwd.startswith(rootsep):
240 235 return cwd[len(rootsep):]
241 236 else:
242 237 # we're outside the repo. return an absolute path.
243 238 return cwd
244 239
245 240 def pathto(self, f, cwd=None):
246 241 if cwd is None:
247 242 cwd = self.getcwd()
248 243 path = util.pathto(self._root, cwd, f)
249 244 if self._slash:
250 245 return util.pconvert(path)
251 246 return path
252 247
253 248 def __getitem__(self, key):
254 249 '''Return the current state of key (a filename) in the dirstate.
255 250
256 251 States are:
257 252 n normal
258 253 m needs merging
259 254 r marked for removal
260 255 a marked for addition
261 256 ? not tracked
262 257 '''
263 258 return self._map.get(key, ("?",))[0]
264 259
265 260 def __contains__(self, key):
266 261 return key in self._map
267 262
268 263 def __iter__(self):
269 264 return iter(sorted(self._map))
270 265
271 266 def items(self):
272 267 return self._map.iteritems()
273 268
274 269 iteritems = items
275 270
276 271 def parents(self):
277 272 return [self._validate(p) for p in self._pl]
278 273
279 274 def p1(self):
280 275 return self._validate(self._pl[0])
281 276
282 277 def p2(self):
283 278 return self._validate(self._pl[1])
284 279
285 280 def branch(self):
286 281 return encoding.tolocal(self._branch)
287 282
288 283 def setparents(self, p1, p2=nullid):
289 284 """Set dirstate parents to p1 and p2.
290 285
291 286 When moving from two parents to one, 'm' merged entries a
292 287 adjusted to normal and previous copy records discarded and
293 288 returned by the call.
294 289
295 290 See localrepo.setparents()
296 291 """
297 292 if self._parentwriters == 0:
298 293 raise ValueError("cannot set dirstate parent outside of "
299 294 "dirstate.parentchange context manager")
300 295
301 296 self._dirty = True
302 297 oldp2 = self._pl[1]
303 298 if self._origpl is None:
304 299 self._origpl = self._pl
305 300 self._map.setparents(p1, p2)
306 301 copies = {}
307 302 if oldp2 != nullid and p2 == nullid:
308 303 candidatefiles = self._map.nonnormalset.union(
309 304 self._map.otherparentset)
310 305 for f in candidatefiles:
311 306 s = self._map.get(f)
312 307 if s is None:
313 308 continue
314 309
315 310 # Discard 'm' markers when moving away from a merge state
316 311 if s[0] == 'm':
317 312 source = self._map.copymap.get(f)
318 313 if source:
319 314 copies[f] = source
320 315 self.normallookup(f)
321 316 # Also fix up otherparent markers
322 317 elif s[0] == 'n' and s[2] == -2:
323 318 source = self._map.copymap.get(f)
324 319 if source:
325 320 copies[f] = source
326 321 self.add(f)
327 322 return copies
328 323
329 324 def setbranch(self, branch):
330 325 self.__class__._branch.set(self, encoding.fromlocal(branch))
331 326 f = self._opener('branch', 'w', atomictemp=True, checkambig=True)
332 327 try:
333 328 f.write(self._branch + '\n')
334 329 f.close()
335 330
336 331 # make sure filecache has the correct stat info for _branch after
337 332 # replacing the underlying file
338 333 ce = self._filecache['_branch']
339 334 if ce:
340 335 ce.refresh()
341 336 except: # re-raises
342 337 f.discard()
343 338 raise
344 339
345 340 def invalidate(self):
346 341 '''Causes the next access to reread the dirstate.
347 342
348 343 This is different from localrepo.invalidatedirstate() because it always
349 344 rereads the dirstate. Use localrepo.invalidatedirstate() if you want to
350 345 check whether the dirstate has changed before rereading it.'''
351 346
352 347 for a in (r"_map", r"_branch", r"_ignore"):
353 348 if a in self.__dict__:
354 349 delattr(self, a)
355 350 self._lastnormaltime = 0
356 351 self._dirty = False
357 352 self._updatedfiles.clear()
358 353 self._parentwriters = 0
359 354 self._origpl = None
360 355
361 356 def copy(self, source, dest):
362 357 """Mark dest as a copy of source. Unmark dest if source is None."""
363 358 if source == dest:
364 359 return
365 360 self._dirty = True
366 361 if source is not None:
367 362 self._map.copymap[dest] = source
368 363 self._updatedfiles.add(source)
369 364 self._updatedfiles.add(dest)
370 365 elif self._map.copymap.pop(dest, None):
371 366 self._updatedfiles.add(dest)
372 367
373 368 def copied(self, file):
374 369 return self._map.copymap.get(file, None)
375 370
376 371 def copies(self):
377 372 return self._map.copymap
378 373
379 374 def _addpath(self, f, state, mode, size, mtime):
380 375 oldstate = self[f]
381 376 if state == 'a' or oldstate == 'r':
382 377 scmutil.checkfilename(f)
383 378 if self._map.hastrackeddir(f):
384 379 raise error.Abort(_('directory %r already in dirstate') %
385 380 pycompat.bytestr(f))
386 381 # shadows
387 382 for d in util.finddirs(f):
388 383 if self._map.hastrackeddir(d):
389 384 break
390 385 entry = self._map.get(d)
391 386 if entry is not None and entry[0] != 'r':
392 387 raise error.Abort(
393 388 _('file %r in dirstate clashes with %r') %
394 389 (pycompat.bytestr(d), pycompat.bytestr(f)))
395 390 self._dirty = True
396 391 self._updatedfiles.add(f)
397 392 self._map.addfile(f, oldstate, state, mode, size, mtime)
398 393
399 394 def normal(self, f):
400 395 '''Mark a file normal and clean.'''
401 396 s = os.lstat(self._join(f))
402 397 mtime = s[stat.ST_MTIME]
403 398 self._addpath(f, 'n', s.st_mode,
404 399 s.st_size & _rangemask, mtime & _rangemask)
405 400 self._map.copymap.pop(f, None)
406 401 if f in self._map.nonnormalset:
407 402 self._map.nonnormalset.remove(f)
408 403 if mtime > self._lastnormaltime:
409 404 # Remember the most recent modification timeslot for status(),
410 405 # to make sure we won't miss future size-preserving file content
411 406 # modifications that happen within the same timeslot.
412 407 self._lastnormaltime = mtime
413 408
414 409 def normallookup(self, f):
415 410 '''Mark a file normal, but possibly dirty.'''
416 411 if self._pl[1] != nullid:
417 412 # if there is a merge going on and the file was either
418 413 # in state 'm' (-1) or coming from other parent (-2) before
419 414 # being removed, restore that state.
420 415 entry = self._map.get(f)
421 416 if entry is not None:
422 417 if entry[0] == 'r' and entry[2] in (-1, -2):
423 418 source = self._map.copymap.get(f)
424 419 if entry[2] == -1:
425 420 self.merge(f)
426 421 elif entry[2] == -2:
427 422 self.otherparent(f)
428 423 if source:
429 424 self.copy(source, f)
430 425 return
431 426 if entry[0] == 'm' or entry[0] == 'n' and entry[2] == -2:
432 427 return
433 428 self._addpath(f, 'n', 0, -1, -1)
434 429 self._map.copymap.pop(f, None)
435 430
436 431 def otherparent(self, f):
437 432 '''Mark as coming from the other parent, always dirty.'''
438 433 if self._pl[1] == nullid:
439 434 raise error.Abort(_("setting %r to other parent "
440 435 "only allowed in merges") % f)
441 436 if f in self and self[f] == 'n':
442 437 # merge-like
443 438 self._addpath(f, 'm', 0, -2, -1)
444 439 else:
445 440 # add-like
446 441 self._addpath(f, 'n', 0, -2, -1)
447 442 self._map.copymap.pop(f, None)
448 443
449 444 def add(self, f):
450 445 '''Mark a file added.'''
451 446 self._addpath(f, 'a', 0, -1, -1)
452 447 self._map.copymap.pop(f, None)
453 448
454 449 def remove(self, f):
455 450 '''Mark a file removed.'''
456 451 self._dirty = True
457 452 oldstate = self[f]
458 453 size = 0
459 454 if self._pl[1] != nullid:
460 455 entry = self._map.get(f)
461 456 if entry is not None:
462 457 # backup the previous state
463 458 if entry[0] == 'm': # merge
464 459 size = -1
465 460 elif entry[0] == 'n' and entry[2] == -2: # other parent
466 461 size = -2
467 462 self._map.otherparentset.add(f)
468 463 self._updatedfiles.add(f)
469 464 self._map.removefile(f, oldstate, size)
470 465 if size == 0:
471 466 self._map.copymap.pop(f, None)
472 467
473 468 def merge(self, f):
474 469 '''Mark a file merged.'''
475 470 if self._pl[1] == nullid:
476 471 return self.normallookup(f)
477 472 return self.otherparent(f)
478 473
479 474 def drop(self, f):
480 475 '''Drop a file from the dirstate'''
481 476 oldstate = self[f]
482 477 if self._map.dropfile(f, oldstate):
483 478 self._dirty = True
484 479 self._updatedfiles.add(f)
485 480 self._map.copymap.pop(f, None)
486 481
487 482 def _discoverpath(self, path, normed, ignoremissing, exists, storemap):
488 483 if exists is None:
489 484 exists = os.path.lexists(os.path.join(self._root, path))
490 485 if not exists:
491 486 # Maybe a path component exists
492 487 if not ignoremissing and '/' in path:
493 488 d, f = path.rsplit('/', 1)
494 489 d = self._normalize(d, False, ignoremissing, None)
495 490 folded = d + "/" + f
496 491 else:
497 492 # No path components, preserve original case
498 493 folded = path
499 494 else:
500 495 # recursively normalize leading directory components
501 496 # against dirstate
502 497 if '/' in normed:
503 498 d, f = normed.rsplit('/', 1)
504 499 d = self._normalize(d, False, ignoremissing, True)
505 500 r = self._root + "/" + d
506 501 folded = d + "/" + util.fspath(f, r)
507 502 else:
508 503 folded = util.fspath(normed, self._root)
509 504 storemap[normed] = folded
510 505
511 506 return folded
512 507
513 508 def _normalizefile(self, path, isknown, ignoremissing=False, exists=None):
514 509 normed = util.normcase(path)
515 510 folded = self._map.filefoldmap.get(normed, None)
516 511 if folded is None:
517 512 if isknown:
518 513 folded = path
519 514 else:
520 515 folded = self._discoverpath(path, normed, ignoremissing, exists,
521 516 self._map.filefoldmap)
522 517 return folded
523 518
524 519 def _normalize(self, path, isknown, ignoremissing=False, exists=None):
525 520 normed = util.normcase(path)
526 521 folded = self._map.filefoldmap.get(normed, None)
527 522 if folded is None:
528 523 folded = self._map.dirfoldmap.get(normed, None)
529 524 if folded is None:
530 525 if isknown:
531 526 folded = path
532 527 else:
533 528 # store discovered result in dirfoldmap so that future
534 529 # normalizefile calls don't start matching directories
535 530 folded = self._discoverpath(path, normed, ignoremissing, exists,
536 531 self._map.dirfoldmap)
537 532 return folded
538 533
539 534 def normalize(self, path, isknown=False, ignoremissing=False):
540 535 '''
541 536 normalize the case of a pathname when on a casefolding filesystem
542 537
543 538 isknown specifies whether the filename came from walking the
544 539 disk, to avoid extra filesystem access.
545 540
546 541 If ignoremissing is True, missing path are returned
547 542 unchanged. Otherwise, we try harder to normalize possibly
548 543 existing path components.
549 544
550 545 The normalized case is determined based on the following precedence:
551 546
552 547 - version of name already stored in the dirstate
553 548 - version of name stored on disk
554 549 - version provided via command arguments
555 550 '''
556 551
557 552 if self._checkcase:
558 553 return self._normalize(path, isknown, ignoremissing)
559 554 return path
560 555
561 556 def clear(self):
562 557 self._map.clear()
563 558 self._lastnormaltime = 0
564 559 self._updatedfiles.clear()
565 560 self._dirty = True
566 561
567 562 def rebuild(self, parent, allfiles, changedfiles=None):
568 563 if changedfiles is None:
569 564 # Rebuild entire dirstate
570 565 changedfiles = allfiles
571 566 lastnormaltime = self._lastnormaltime
572 567 self.clear()
573 568 self._lastnormaltime = lastnormaltime
574 569
575 570 if self._origpl is None:
576 571 self._origpl = self._pl
577 572 self._map.setparents(parent, nullid)
578 573 for f in changedfiles:
579 574 if f in allfiles:
580 575 self.normallookup(f)
581 576 else:
582 577 self.drop(f)
583 578
584 579 self._dirty = True
585 580
586 581 def identity(self):
587 582 '''Return identity of dirstate itself to detect changing in storage
588 583
589 584 If identity of previous dirstate is equal to this, writing
590 585 changes based on the former dirstate out can keep consistency.
591 586 '''
592 587 return self._map.identity
593 588
594 589 def write(self, tr):
595 590 if not self._dirty:
596 591 return
597 592
598 593 filename = self._filename
599 594 if tr:
600 595 # 'dirstate.write()' is not only for writing in-memory
601 596 # changes out, but also for dropping ambiguous timestamp.
602 597 # delayed writing re-raise "ambiguous timestamp issue".
603 598 # See also the wiki page below for detail:
604 599 # https://www.mercurial-scm.org/wiki/DirstateTransactionPlan
605 600
606 601 # emulate dropping timestamp in 'parsers.pack_dirstate'
607 602 now = _getfsnow(self._opener)
608 603 self._map.clearambiguoustimes(self._updatedfiles, now)
609 604
610 605 # emulate that all 'dirstate.normal' results are written out
611 606 self._lastnormaltime = 0
612 607 self._updatedfiles.clear()
613 608
614 609 # delay writing in-memory changes out
615 610 tr.addfilegenerator('dirstate', (self._filename,),
616 611 self._writedirstate, location='plain')
617 612 return
618 613
619 614 st = self._opener(filename, "w", atomictemp=True, checkambig=True)
620 615 self._writedirstate(st)
621 616
622 617 def addparentchangecallback(self, category, callback):
623 618 """add a callback to be called when the wd parents are changed
624 619
625 620 Callback will be called with the following arguments:
626 621 dirstate, (oldp1, oldp2), (newp1, newp2)
627 622
628 623 Category is a unique identifier to allow overwriting an old callback
629 624 with a newer callback.
630 625 """
631 626 self._plchangecallbacks[category] = callback
632 627
633 628 def _writedirstate(self, st):
634 629 # notify callbacks about parents change
635 630 if self._origpl is not None and self._origpl != self._pl:
636 631 for c, callback in sorted(self._plchangecallbacks.iteritems()):
637 632 callback(self, self._origpl, self._pl)
638 633 self._origpl = None
639 634 # use the modification time of the newly created temporary file as the
640 635 # filesystem's notion of 'now'
641 636 now = util.fstat(st)[stat.ST_MTIME] & _rangemask
642 637
643 638 # enough 'delaywrite' prevents 'pack_dirstate' from dropping
644 639 # timestamp of each entries in dirstate, because of 'now > mtime'
645 640 delaywrite = self._ui.configint('debug', 'dirstate.delaywrite')
646 641 if delaywrite > 0:
647 642 # do we have any files to delay for?
648 643 for f, e in self._map.iteritems():
649 644 if e[0] == 'n' and e[3] == now:
650 645 import time # to avoid useless import
651 646 # rather than sleep n seconds, sleep until the next
652 647 # multiple of n seconds
653 648 clock = time.time()
654 649 start = int(clock) - (int(clock) % delaywrite)
655 650 end = start + delaywrite
656 651 time.sleep(end - clock)
657 652 now = end # trust our estimate that the end is near now
658 653 break
659 654
660 655 self._map.write(st, now)
661 656 self._lastnormaltime = 0
662 657 self._dirty = False
663 658
664 659 def _dirignore(self, f):
665 660 if self._ignore(f):
666 661 return True
667 662 for p in util.finddirs(f):
668 663 if self._ignore(p):
669 664 return True
670 665 return False
671 666
672 667 def _ignorefiles(self):
673 668 files = []
674 669 if os.path.exists(self._join('.hgignore')):
675 670 files.append(self._join('.hgignore'))
676 671 for name, path in self._ui.configitems("ui"):
677 672 if name == 'ignore' or name.startswith('ignore.'):
678 673 # we need to use os.path.join here rather than self._join
679 674 # because path is arbitrary and user-specified
680 675 files.append(os.path.join(self._rootdir, util.expandpath(path)))
681 676 return files
682 677
683 678 def _ignorefileandline(self, f):
684 679 files = collections.deque(self._ignorefiles())
685 680 visited = set()
686 681 while files:
687 682 i = files.popleft()
688 683 patterns = matchmod.readpatternfile(i, self._ui.warn,
689 684 sourceinfo=True)
690 685 for pattern, lineno, line in patterns:
691 686 kind, p = matchmod._patsplit(pattern, 'glob')
692 687 if kind == "subinclude":
693 688 if p not in visited:
694 689 files.append(p)
695 690 continue
696 691 m = matchmod.match(self._root, '', [], [pattern],
697 692 warn=self._ui.warn)
698 693 if m(f):
699 694 return (i, lineno, line)
700 695 visited.add(i)
701 696 return (None, -1, "")
702 697
703 698 def _walkexplicit(self, match, subrepos):
704 699 '''Get stat data about the files explicitly specified by match.
705 700
706 701 Return a triple (results, dirsfound, dirsnotfound).
707 702 - results is a mapping from filename to stat result. It also contains
708 703 listings mapping subrepos and .hg to None.
709 704 - dirsfound is a list of files found to be directories.
710 705 - dirsnotfound is a list of files that the dirstate thinks are
711 706 directories and that were not found.'''
712 707
713 708 def badtype(mode):
714 709 kind = _('unknown')
715 710 if stat.S_ISCHR(mode):
716 711 kind = _('character device')
717 712 elif stat.S_ISBLK(mode):
718 713 kind = _('block device')
719 714 elif stat.S_ISFIFO(mode):
720 715 kind = _('fifo')
721 716 elif stat.S_ISSOCK(mode):
722 717 kind = _('socket')
723 718 elif stat.S_ISDIR(mode):
724 719 kind = _('directory')
725 720 return _('unsupported file type (type is %s)') % kind
726 721
727 722 matchedir = match.explicitdir
728 723 badfn = match.bad
729 724 dmap = self._map
730 725 lstat = os.lstat
731 726 getkind = stat.S_IFMT
732 727 dirkind = stat.S_IFDIR
733 728 regkind = stat.S_IFREG
734 729 lnkkind = stat.S_IFLNK
735 730 join = self._join
736 731 dirsfound = []
737 732 foundadd = dirsfound.append
738 733 dirsnotfound = []
739 734 notfoundadd = dirsnotfound.append
740 735
741 736 if not match.isexact() and self._checkcase:
742 737 normalize = self._normalize
743 738 else:
744 739 normalize = None
745 740
746 741 files = sorted(match.files())
747 742 subrepos.sort()
748 743 i, j = 0, 0
749 744 while i < len(files) and j < len(subrepos):
750 745 subpath = subrepos[j] + "/"
751 746 if files[i] < subpath:
752 747 i += 1
753 748 continue
754 749 while i < len(files) and files[i].startswith(subpath):
755 750 del files[i]
756 751 j += 1
757 752
758 753 if not files or '' in files:
759 754 files = ['']
760 755 # constructing the foldmap is expensive, so don't do it for the
761 756 # common case where files is ['']
762 757 normalize = None
763 758 results = dict.fromkeys(subrepos)
764 759 results['.hg'] = None
765 760
766 761 for ff in files:
767 762 if normalize:
768 763 nf = normalize(ff, False, True)
769 764 else:
770 765 nf = ff
771 766 if nf in results:
772 767 continue
773 768
774 769 try:
775 770 st = lstat(join(nf))
776 771 kind = getkind(st.st_mode)
777 772 if kind == dirkind:
778 773 if nf in dmap:
779 774 # file replaced by dir on disk but still in dirstate
780 775 results[nf] = None
781 776 if matchedir:
782 777 matchedir(nf)
783 778 foundadd((nf, ff))
784 779 elif kind == regkind or kind == lnkkind:
785 780 results[nf] = st
786 781 else:
787 782 badfn(ff, badtype(kind))
788 783 if nf in dmap:
789 784 results[nf] = None
790 785 except OSError as inst: # nf not found on disk - it is dirstate only
791 786 if nf in dmap: # does it exactly match a missing file?
792 787 results[nf] = None
793 788 else: # does it match a missing directory?
794 789 if self._map.hasdir(nf):
795 790 if matchedir:
796 791 matchedir(nf)
797 792 notfoundadd(nf)
798 793 else:
799 794 badfn(ff, encoding.strtolocal(inst.strerror))
800 795
801 796 # match.files() may contain explicitly-specified paths that shouldn't
802 797 # be taken; drop them from the list of files found. dirsfound/notfound
803 798 # aren't filtered here because they will be tested later.
804 799 if match.anypats():
805 800 for f in list(results):
806 801 if f == '.hg' or f in subrepos:
807 802 # keep sentinel to disable further out-of-repo walks
808 803 continue
809 804 if not match(f):
810 805 del results[f]
811 806
812 807 # Case insensitive filesystems cannot rely on lstat() failing to detect
813 808 # a case-only rename. Prune the stat object for any file that does not
814 809 # match the case in the filesystem, if there are multiple files that
815 810 # normalize to the same path.
816 811 if match.isexact() and self._checkcase:
817 812 normed = {}
818 813
819 814 for f, st in results.iteritems():
820 815 if st is None:
821 816 continue
822 817
823 818 nc = util.normcase(f)
824 819 paths = normed.get(nc)
825 820
826 821 if paths is None:
827 822 paths = set()
828 823 normed[nc] = paths
829 824
830 825 paths.add(f)
831 826
832 827 for norm, paths in normed.iteritems():
833 828 if len(paths) > 1:
834 829 for path in paths:
835 830 folded = self._discoverpath(path, norm, True, None,
836 831 self._map.dirfoldmap)
837 832 if path != folded:
838 833 results[path] = None
839 834
840 835 return results, dirsfound, dirsnotfound
841 836
842 837 def walk(self, match, subrepos, unknown, ignored, full=True):
843 838 '''
844 839 Walk recursively through the directory tree, finding all files
845 840 matched by match.
846 841
847 842 If full is False, maybe skip some known-clean files.
848 843
849 844 Return a dict mapping filename to stat-like object (either
850 845 mercurial.osutil.stat instance or return value of os.stat()).
851 846
852 847 '''
853 848 # full is a flag that extensions that hook into walk can use -- this
854 849 # implementation doesn't use it at all. This satisfies the contract
855 850 # because we only guarantee a "maybe".
856 851
857 852 if ignored:
858 853 ignore = util.never
859 854 dirignore = util.never
860 855 elif unknown:
861 856 ignore = self._ignore
862 857 dirignore = self._dirignore
863 858 else:
864 859 # if not unknown and not ignored, drop dir recursion and step 2
865 860 ignore = util.always
866 861 dirignore = util.always
867 862
868 863 matchfn = match.matchfn
869 864 matchalways = match.always()
870 865 matchtdir = match.traversedir
871 866 dmap = self._map
872 867 listdir = util.listdir
873 868 lstat = os.lstat
874 869 dirkind = stat.S_IFDIR
875 870 regkind = stat.S_IFREG
876 871 lnkkind = stat.S_IFLNK
877 872 join = self._join
878 873
879 874 exact = skipstep3 = False
880 875 if match.isexact(): # match.exact
881 876 exact = True
882 877 dirignore = util.always # skip step 2
883 878 elif match.prefix(): # match.match, no patterns
884 879 skipstep3 = True
885 880
886 881 if not exact and self._checkcase:
887 882 normalize = self._normalize
888 883 normalizefile = self._normalizefile
889 884 skipstep3 = False
890 885 else:
891 886 normalize = self._normalize
892 887 normalizefile = None
893 888
894 889 # step 1: find all explicit files
895 890 results, work, dirsnotfound = self._walkexplicit(match, subrepos)
896 891
897 892 skipstep3 = skipstep3 and not (work or dirsnotfound)
898 893 work = [d for d in work if not dirignore(d[0])]
899 894
900 895 # step 2: visit subdirectories
901 896 def traverse(work, alreadynormed):
902 897 wadd = work.append
903 898 while work:
904 899 nd = work.pop()
905 900 visitentries = match.visitchildrenset(nd)
906 901 if not visitentries:
907 902 continue
908 903 if visitentries == 'this' or visitentries == 'all':
909 904 visitentries = None
910 905 skip = None
911 906 if nd != '':
912 907 skip = '.hg'
913 908 try:
914 909 entries = listdir(join(nd), stat=True, skip=skip)
915 910 except OSError as inst:
916 911 if inst.errno in (errno.EACCES, errno.ENOENT):
917 912 match.bad(self.pathto(nd),
918 913 encoding.strtolocal(inst.strerror))
919 914 continue
920 915 raise
921 916 for f, kind, st in entries:
922 917 # Some matchers may return files in the visitentries set,
923 918 # instead of 'this', if the matcher explicitly mentions them
924 919 # and is not an exactmatcher. This is acceptable; we do not
925 920 # make any hard assumptions about file-or-directory below
926 921 # based on the presence of `f` in visitentries. If
927 922 # visitchildrenset returned a set, we can always skip the
928 923 # entries *not* in the set it provided regardless of whether
929 924 # they're actually a file or a directory.
930 925 if visitentries and f not in visitentries:
931 926 continue
932 927 if normalizefile:
933 928 # even though f might be a directory, we're only
934 929 # interested in comparing it to files currently in the
935 930 # dmap -- therefore normalizefile is enough
936 931 nf = normalizefile(nd and (nd + "/" + f) or f, True,
937 932 True)
938 933 else:
939 934 nf = nd and (nd + "/" + f) or f
940 935 if nf not in results:
941 936 if kind == dirkind:
942 937 if not ignore(nf):
943 938 if matchtdir:
944 939 matchtdir(nf)
945 940 wadd(nf)
946 941 if nf in dmap and (matchalways or matchfn(nf)):
947 942 results[nf] = None
948 943 elif kind == regkind or kind == lnkkind:
949 944 if nf in dmap:
950 945 if matchalways or matchfn(nf):
951 946 results[nf] = st
952 947 elif ((matchalways or matchfn(nf))
953 948 and not ignore(nf)):
954 949 # unknown file -- normalize if necessary
955 950 if not alreadynormed:
956 951 nf = normalize(nf, False, True)
957 952 results[nf] = st
958 953 elif nf in dmap and (matchalways or matchfn(nf)):
959 954 results[nf] = None
960 955
961 956 for nd, d in work:
962 957 # alreadynormed means that processwork doesn't have to do any
963 958 # expensive directory normalization
964 959 alreadynormed = not normalize or nd == d
965 960 traverse([d], alreadynormed)
966 961
967 962 for s in subrepos:
968 963 del results[s]
969 964 del results['.hg']
970 965
971 966 # step 3: visit remaining files from dmap
972 967 if not skipstep3 and not exact:
973 968 # If a dmap file is not in results yet, it was either
974 969 # a) not matching matchfn b) ignored, c) missing, or d) under a
975 970 # symlink directory.
976 971 if not results and matchalways:
977 972 visit = [f for f in dmap]
978 973 else:
979 974 visit = [f for f in dmap if f not in results and matchfn(f)]
980 975 visit.sort()
981 976
982 977 if unknown:
983 978 # unknown == True means we walked all dirs under the roots
984 979 # that wasn't ignored, and everything that matched was stat'ed
985 980 # and is already in results.
986 981 # The rest must thus be ignored or under a symlink.
987 982 audit_path = pathutil.pathauditor(self._root, cached=True)
988 983
989 984 for nf in iter(visit):
990 985 # If a stat for the same file was already added with a
991 986 # different case, don't add one for this, since that would
992 987 # make it appear as if the file exists under both names
993 988 # on disk.
994 989 if (normalizefile and
995 990 normalizefile(nf, True, True) in results):
996 991 results[nf] = None
997 992 # Report ignored items in the dmap as long as they are not
998 993 # under a symlink directory.
999 994 elif audit_path.check(nf):
1000 995 try:
1001 996 results[nf] = lstat(join(nf))
1002 997 # file was just ignored, no links, and exists
1003 998 except OSError:
1004 999 # file doesn't exist
1005 1000 results[nf] = None
1006 1001 else:
1007 1002 # It's either missing or under a symlink directory
1008 1003 # which we in this case report as missing
1009 1004 results[nf] = None
1010 1005 else:
1011 1006 # We may not have walked the full directory tree above,
1012 1007 # so stat and check everything we missed.
1013 1008 iv = iter(visit)
1014 1009 for st in util.statfiles([join(i) for i in visit]):
1015 1010 results[next(iv)] = st
1016 1011 return results
1017 1012
1018 1013 def status(self, match, subrepos, ignored, clean, unknown):
1019 1014 '''Determine the status of the working copy relative to the
1020 1015 dirstate and return a pair of (unsure, status), where status is of type
1021 1016 scmutil.status and:
1022 1017
1023 1018 unsure:
1024 1019 files that might have been modified since the dirstate was
1025 1020 written, but need to be read to be sure (size is the same
1026 1021 but mtime differs)
1027 1022 status.modified:
1028 1023 files that have definitely been modified since the dirstate
1029 1024 was written (different size or mode)
1030 1025 status.clean:
1031 1026 files that have definitely not been modified since the
1032 1027 dirstate was written
1033 1028 '''
1034 1029 listignored, listclean, listunknown = ignored, clean, unknown
1035 1030 lookup, modified, added, unknown, ignored = [], [], [], [], []
1036 1031 removed, deleted, clean = [], [], []
1037 1032
1038 1033 dmap = self._map
1039 1034 dmap.preload()
1040 1035 dcontains = dmap.__contains__
1041 1036 dget = dmap.__getitem__
1042 1037 ladd = lookup.append # aka "unsure"
1043 1038 madd = modified.append
1044 1039 aadd = added.append
1045 1040 uadd = unknown.append
1046 1041 iadd = ignored.append
1047 1042 radd = removed.append
1048 1043 dadd = deleted.append
1049 1044 cadd = clean.append
1050 1045 mexact = match.exact
1051 1046 dirignore = self._dirignore
1052 1047 checkexec = self._checkexec
1053 1048 copymap = self._map.copymap
1054 1049 lastnormaltime = self._lastnormaltime
1055 1050
1056 1051 # We need to do full walks when either
1057 1052 # - we're listing all clean files, or
1058 1053 # - match.traversedir does something, because match.traversedir should
1059 1054 # be called for every dir in the working dir
1060 1055 full = listclean or match.traversedir is not None
1061 1056 for fn, st in self.walk(match, subrepos, listunknown, listignored,
1062 1057 full=full).iteritems():
1063 1058 if not dcontains(fn):
1064 1059 if (listignored or mexact(fn)) and dirignore(fn):
1065 1060 if listignored:
1066 1061 iadd(fn)
1067 1062 else:
1068 1063 uadd(fn)
1069 1064 continue
1070 1065
1071 1066 # This is equivalent to 'state, mode, size, time = dmap[fn]' but not
1072 1067 # written like that for performance reasons. dmap[fn] is not a
1073 1068 # Python tuple in compiled builds. The CPython UNPACK_SEQUENCE
1074 1069 # opcode has fast paths when the value to be unpacked is a tuple or
1075 1070 # a list, but falls back to creating a full-fledged iterator in
1076 1071 # general. That is much slower than simply accessing and storing the
1077 1072 # tuple members one by one.
1078 1073 t = dget(fn)
1079 1074 state = t[0]
1080 1075 mode = t[1]
1081 1076 size = t[2]
1082 1077 time = t[3]
1083 1078
1084 1079 if not st and state in "nma":
1085 1080 dadd(fn)
1086 1081 elif state == 'n':
1087 1082 if (size >= 0 and
1088 1083 ((size != st.st_size and size != st.st_size & _rangemask)
1089 1084 or ((mode ^ st.st_mode) & 0o100 and checkexec))
1090 1085 or size == -2 # other parent
1091 1086 or fn in copymap):
1092 1087 madd(fn)
1093 1088 elif (time != st[stat.ST_MTIME]
1094 1089 and time != st[stat.ST_MTIME] & _rangemask):
1095 1090 ladd(fn)
1096 1091 elif st[stat.ST_MTIME] == lastnormaltime:
1097 1092 # fn may have just been marked as normal and it may have
1098 1093 # changed in the same second without changing its size.
1099 1094 # This can happen if we quickly do multiple commits.
1100 1095 # Force lookup, so we don't miss such a racy file change.
1101 1096 ladd(fn)
1102 1097 elif listclean:
1103 1098 cadd(fn)
1104 1099 elif state == 'm':
1105 1100 madd(fn)
1106 1101 elif state == 'a':
1107 1102 aadd(fn)
1108 1103 elif state == 'r':
1109 1104 radd(fn)
1110 1105
1111 1106 return (lookup, scmutil.status(modified, added, removed, deleted,
1112 1107 unknown, ignored, clean))
1113 1108
1114 1109 def matches(self, match):
1115 1110 '''
1116 1111 return files in the dirstate (in whatever state) filtered by match
1117 1112 '''
1118 1113 dmap = self._map
1119 1114 if match.always():
1120 1115 return dmap.keys()
1121 1116 files = match.files()
1122 1117 if match.isexact():
1123 1118 # fast path -- filter the other way around, since typically files is
1124 1119 # much smaller than dmap
1125 1120 return [f for f in files if f in dmap]
1126 1121 if match.prefix() and all(fn in dmap for fn in files):
1127 1122 # fast path -- all the values are known to be files, so just return
1128 1123 # that
1129 1124 return list(files)
1130 1125 return [f for f in dmap if match(f)]
1131 1126
1132 1127 def _actualfilename(self, tr):
1133 1128 if tr:
1134 1129 return self._pendingfilename
1135 1130 else:
1136 1131 return self._filename
1137 1132
1138 1133 def savebackup(self, tr, backupname):
1139 1134 '''Save current dirstate into backup file'''
1140 1135 filename = self._actualfilename(tr)
1141 1136 assert backupname != filename
1142 1137
1143 1138 # use '_writedirstate' instead of 'write' to write changes certainly,
1144 1139 # because the latter omits writing out if transaction is running.
1145 1140 # output file will be used to create backup of dirstate at this point.
1146 1141 if self._dirty or not self._opener.exists(filename):
1147 1142 self._writedirstate(self._opener(filename, "w", atomictemp=True,
1148 1143 checkambig=True))
1149 1144
1150 1145 if tr:
1151 1146 # ensure that subsequent tr.writepending returns True for
1152 1147 # changes written out above, even if dirstate is never
1153 1148 # changed after this
1154 1149 tr.addfilegenerator('dirstate', (self._filename,),
1155 1150 self._writedirstate, location='plain')
1156 1151
1157 1152 # ensure that pending file written above is unlinked at
1158 1153 # failure, even if tr.writepending isn't invoked until the
1159 1154 # end of this transaction
1160 1155 tr.registertmp(filename, location='plain')
1161 1156
1162 1157 self._opener.tryunlink(backupname)
1163 1158 # hardlink backup is okay because _writedirstate is always called
1164 1159 # with an "atomictemp=True" file.
1165 1160 util.copyfile(self._opener.join(filename),
1166 1161 self._opener.join(backupname), hardlink=True)
1167 1162
1168 1163 def restorebackup(self, tr, backupname):
1169 1164 '''Restore dirstate by backup file'''
1170 1165 # this "invalidate()" prevents "wlock.release()" from writing
1171 1166 # changes of dirstate out after restoring from backup file
1172 1167 self.invalidate()
1173 1168 filename = self._actualfilename(tr)
1174 1169 o = self._opener
1175 1170 if util.samefile(o.join(backupname), o.join(filename)):
1176 1171 o.unlink(backupname)
1177 1172 else:
1178 1173 o.rename(backupname, filename, checkambig=True)
1179 1174
1180 1175 def clearbackup(self, tr, backupname):
1181 1176 '''Clear backup file'''
1182 1177 self._opener.unlink(backupname)
1183 1178
1184 1179 class dirstatemap(object):
1185 1180 """Map encapsulating the dirstate's contents.
1186 1181
1187 1182 The dirstate contains the following state:
1188 1183
1189 1184 - `identity` is the identity of the dirstate file, which can be used to
1190 1185 detect when changes have occurred to the dirstate file.
1191 1186
1192 1187 - `parents` is a pair containing the parents of the working copy. The
1193 1188 parents are updated by calling `setparents`.
1194 1189
1195 1190 - the state map maps filenames to tuples of (state, mode, size, mtime),
1196 1191 where state is a single character representing 'normal', 'added',
1197 1192 'removed', or 'merged'. It is read by treating the dirstate as a
1198 1193 dict. File state is updated by calling the `addfile`, `removefile` and
1199 1194 `dropfile` methods.
1200 1195
1201 1196 - `copymap` maps destination filenames to their source filename.
1202 1197
1203 1198 The dirstate also provides the following views onto the state:
1204 1199
1205 1200 - `nonnormalset` is a set of the filenames that have state other
1206 1201 than 'normal', or are normal but have an mtime of -1 ('normallookup').
1207 1202
1208 1203 - `otherparentset` is a set of the filenames that are marked as coming
1209 1204 from the second parent when the dirstate is currently being merged.
1210 1205
1211 1206 - `filefoldmap` is a dict mapping normalized filenames to the denormalized
1212 1207 form that they appear as in the dirstate.
1213 1208
1214 1209 - `dirfoldmap` is a dict mapping normalized directory names to the
1215 1210 denormalized form that they appear as in the dirstate.
1216 1211 """
1217 1212
1218 1213 def __init__(self, ui, opener, root):
1219 1214 self._ui = ui
1220 1215 self._opener = opener
1221 1216 self._root = root
1222 1217 self._filename = 'dirstate'
1223 1218
1224 1219 self._parents = None
1225 1220 self._dirtyparents = False
1226 1221
1227 1222 # for consistent view between _pl() and _read() invocations
1228 1223 self._pendingmode = None
1229 1224
1230 1225 @propertycache
1231 1226 def _map(self):
1232 1227 self._map = {}
1233 1228 self.read()
1234 1229 return self._map
1235 1230
1236 1231 @propertycache
1237 1232 def copymap(self):
1238 1233 self.copymap = {}
1239 1234 self._map
1240 1235 return self.copymap
1241 1236
1242 1237 def clear(self):
1243 1238 self._map.clear()
1244 1239 self.copymap.clear()
1245 1240 self.setparents(nullid, nullid)
1246 1241 util.clearcachedproperty(self, "_dirs")
1247 1242 util.clearcachedproperty(self, "_alldirs")
1248 1243 util.clearcachedproperty(self, "filefoldmap")
1249 1244 util.clearcachedproperty(self, "dirfoldmap")
1250 1245 util.clearcachedproperty(self, "nonnormalset")
1251 1246 util.clearcachedproperty(self, "otherparentset")
1252 1247
1253 1248 def items(self):
1254 1249 return self._map.iteritems()
1255 1250
1256 1251 # forward for python2,3 compat
1257 1252 iteritems = items
1258 1253
1259 1254 def __len__(self):
1260 1255 return len(self._map)
1261 1256
1262 1257 def __iter__(self):
1263 1258 return iter(self._map)
1264 1259
1265 1260 def get(self, key, default=None):
1266 1261 return self._map.get(key, default)
1267 1262
1268 1263 def __contains__(self, key):
1269 1264 return key in self._map
1270 1265
1271 1266 def __getitem__(self, key):
1272 1267 return self._map[key]
1273 1268
1274 1269 def keys(self):
1275 1270 return self._map.keys()
1276 1271
1277 1272 def preload(self):
1278 1273 """Loads the underlying data, if it's not already loaded"""
1279 1274 self._map
1280 1275
1281 1276 def addfile(self, f, oldstate, state, mode, size, mtime):
1282 1277 """Add a tracked file to the dirstate."""
1283 1278 if oldstate in "?r" and r"_dirs" in self.__dict__:
1284 1279 self._dirs.addpath(f)
1285 1280 if oldstate == "?" and r"_alldirs" in self.__dict__:
1286 1281 self._alldirs.addpath(f)
1287 1282 self._map[f] = dirstatetuple(state, mode, size, mtime)
1288 1283 if state != 'n' or mtime == -1:
1289 1284 self.nonnormalset.add(f)
1290 1285 if size == -2:
1291 1286 self.otherparentset.add(f)
1292 1287
1293 1288 def removefile(self, f, oldstate, size):
1294 1289 """
1295 1290 Mark a file as removed in the dirstate.
1296 1291
1297 1292 The `size` parameter is used to store sentinel values that indicate
1298 1293 the file's previous state. In the future, we should refactor this
1299 1294 to be more explicit about what that state is.
1300 1295 """
1301 1296 if oldstate not in "?r" and r"_dirs" in self.__dict__:
1302 1297 self._dirs.delpath(f)
1303 1298 if oldstate == "?" and r"_alldirs" in self.__dict__:
1304 1299 self._alldirs.addpath(f)
1305 1300 if r"filefoldmap" in self.__dict__:
1306 1301 normed = util.normcase(f)
1307 1302 self.filefoldmap.pop(normed, None)
1308 1303 self._map[f] = dirstatetuple('r', 0, size, 0)
1309 1304 self.nonnormalset.add(f)
1310 1305
1311 1306 def dropfile(self, f, oldstate):
1312 1307 """
1313 1308 Remove a file from the dirstate. Returns True if the file was
1314 1309 previously recorded.
1315 1310 """
1316 1311 exists = self._map.pop(f, None) is not None
1317 1312 if exists:
1318 1313 if oldstate != "r" and r"_dirs" in self.__dict__:
1319 1314 self._dirs.delpath(f)
1320 1315 if r"_alldirs" in self.__dict__:
1321 1316 self._alldirs.delpath(f)
1322 1317 if r"filefoldmap" in self.__dict__:
1323 1318 normed = util.normcase(f)
1324 1319 self.filefoldmap.pop(normed, None)
1325 1320 self.nonnormalset.discard(f)
1326 1321 return exists
1327 1322
1328 1323 def clearambiguoustimes(self, files, now):
1329 1324 for f in files:
1330 1325 e = self.get(f)
1331 1326 if e is not None and e[0] == 'n' and e[3] == now:
1332 1327 self._map[f] = dirstatetuple(e[0], e[1], e[2], -1)
1333 1328 self.nonnormalset.add(f)
1334 1329
1335 1330 def nonnormalentries(self):
1336 1331 '''Compute the nonnormal dirstate entries from the dmap'''
1337 1332 try:
1338 1333 return parsers.nonnormalotherparententries(self._map)
1339 1334 except AttributeError:
1340 1335 nonnorm = set()
1341 1336 otherparent = set()
1342 1337 for fname, e in self._map.iteritems():
1343 1338 if e[0] != 'n' or e[3] == -1:
1344 1339 nonnorm.add(fname)
1345 1340 if e[0] == 'n' and e[2] == -2:
1346 1341 otherparent.add(fname)
1347 1342 return nonnorm, otherparent
1348 1343
1349 1344 @propertycache
1350 1345 def filefoldmap(self):
1351 1346 """Returns a dictionary mapping normalized case paths to their
1352 1347 non-normalized versions.
1353 1348 """
1354 1349 try:
1355 1350 makefilefoldmap = parsers.make_file_foldmap
1356 1351 except AttributeError:
1357 1352 pass
1358 1353 else:
1359 1354 return makefilefoldmap(self._map, util.normcasespec,
1360 1355 util.normcasefallback)
1361 1356
1362 1357 f = {}
1363 1358 normcase = util.normcase
1364 1359 for name, s in self._map.iteritems():
1365 1360 if s[0] != 'r':
1366 1361 f[normcase(name)] = name
1367 1362 f['.'] = '.' # prevents useless util.fspath() invocation
1368 1363 return f
1369 1364
1370 1365 def hastrackeddir(self, d):
1371 1366 """
1372 1367 Returns True if the dirstate contains a tracked (not removed) file
1373 1368 in this directory.
1374 1369 """
1375 1370 return d in self._dirs
1376 1371
1377 1372 def hasdir(self, d):
1378 1373 """
1379 1374 Returns True if the dirstate contains a file (tracked or removed)
1380 1375 in this directory.
1381 1376 """
1382 1377 return d in self._alldirs
1383 1378
1384 1379 @propertycache
1385 1380 def _dirs(self):
1386 1381 return util.dirs(self._map, 'r')
1387 1382
1388 1383 @propertycache
1389 1384 def _alldirs(self):
1390 1385 return util.dirs(self._map)
1391 1386
1392 1387 def _opendirstatefile(self):
1393 1388 fp, mode = txnutil.trypending(self._root, self._opener, self._filename)
1394 1389 if self._pendingmode is not None and self._pendingmode != mode:
1395 1390 fp.close()
1396 1391 raise error.Abort(_('working directory state may be '
1397 1392 'changed parallelly'))
1398 1393 self._pendingmode = mode
1399 1394 return fp
1400 1395
1401 1396 def parents(self):
1402 1397 if not self._parents:
1403 1398 try:
1404 1399 fp = self._opendirstatefile()
1405 1400 st = fp.read(40)
1406 1401 fp.close()
1407 1402 except IOError as err:
1408 1403 if err.errno != errno.ENOENT:
1409 1404 raise
1410 1405 # File doesn't exist, so the current state is empty
1411 1406 st = ''
1412 1407
1413 1408 l = len(st)
1414 1409 if l == 40:
1415 1410 self._parents = (st[:20], st[20:40])
1416 1411 elif l == 0:
1417 1412 self._parents = (nullid, nullid)
1418 1413 else:
1419 1414 raise error.Abort(_('working directory state appears '
1420 1415 'damaged!'))
1421 1416
1422 1417 return self._parents
1423 1418
1424 1419 def setparents(self, p1, p2):
1425 1420 self._parents = (p1, p2)
1426 1421 self._dirtyparents = True
1427 1422
1428 1423 def read(self):
1429 1424 # ignore HG_PENDING because identity is used only for writing
1430 1425 self.identity = util.filestat.frompath(
1431 1426 self._opener.join(self._filename))
1432 1427
1433 1428 try:
1434 1429 fp = self._opendirstatefile()
1435 1430 try:
1436 1431 st = fp.read()
1437 1432 finally:
1438 1433 fp.close()
1439 1434 except IOError as err:
1440 1435 if err.errno != errno.ENOENT:
1441 1436 raise
1442 1437 return
1443 1438 if not st:
1444 1439 return
1445 1440
1446 1441 if util.safehasattr(parsers, 'dict_new_presized'):
1447 1442 # Make an estimate of the number of files in the dirstate based on
1448 1443 # its size. From a linear regression on a set of real-world repos,
1449 1444 # all over 10,000 files, the size of a dirstate entry is 85
1450 1445 # bytes. The cost of resizing is significantly higher than the cost
1451 1446 # of filling in a larger presized dict, so subtract 20% from the
1452 1447 # size.
1453 1448 #
1454 1449 # This heuristic is imperfect in many ways, so in a future dirstate
1455 1450 # format update it makes sense to just record the number of entries
1456 1451 # on write.
1457 1452 self._map = parsers.dict_new_presized(len(st) // 71)
1458 1453
1459 1454 # Python's garbage collector triggers a GC each time a certain number
1460 1455 # of container objects (the number being defined by
1461 1456 # gc.get_threshold()) are allocated. parse_dirstate creates a tuple
1462 1457 # for each file in the dirstate. The C version then immediately marks
1463 1458 # them as not to be tracked by the collector. However, this has no
1464 1459 # effect on when GCs are triggered, only on what objects the GC looks
1465 1460 # into. This means that O(number of files) GCs are unavoidable.
1466 1461 # Depending on when in the process's lifetime the dirstate is parsed,
1467 1462 # this can get very expensive. As a workaround, disable GC while
1468 1463 # parsing the dirstate.
1469 1464 #
1470 1465 # (we cannot decorate the function directly since it is in a C module)
1471 if rustext is not None:
1472 parse_dirstate = rustext.dirstate.parse_dirstate
1473 else:
1474 parse_dirstate = parsers.parse_dirstate
1475
1476 parse_dirstate = util.nogc(parse_dirstate)
1466 parse_dirstate = util.nogc(dirstatemod.parse_dirstate)
1477 1467 p = parse_dirstate(self._map, self.copymap, st)
1478 1468 if not self._dirtyparents:
1479 1469 self.setparents(*p)
1480 1470
1481 1471 # Avoid excess attribute lookups by fast pathing certain checks
1482 1472 self.__contains__ = self._map.__contains__
1483 1473 self.__getitem__ = self._map.__getitem__
1484 1474 self.get = self._map.get
1485 1475
1486 1476 def write(self, st, now):
1487 if rustext is not None:
1488 pack_dirstate = rustext.dirstate.pack_dirstate
1489 else:
1490 pack_dirstate = parsers.pack_dirstate
1491
1492 st.write(pack_dirstate(self._map, self.copymap,
1477 st.write(dirstatemod.pack_dirstate(self._map, self.copymap,
1493 1478 self.parents(), now))
1494 1479 st.close()
1495 1480 self._dirtyparents = False
1496 1481 self.nonnormalset, self.otherparentset = self.nonnormalentries()
1497 1482
1498 1483 @propertycache
1499 1484 def nonnormalset(self):
1500 1485 nonnorm, otherparents = self.nonnormalentries()
1501 1486 self.otherparentset = otherparents
1502 1487 return nonnorm
1503 1488
1504 1489 @propertycache
1505 1490 def otherparentset(self):
1506 1491 nonnorm, otherparents = self.nonnormalentries()
1507 1492 self.nonnormalset = nonnorm
1508 1493 return otherparents
1509 1494
1510 1495 @propertycache
1511 1496 def identity(self):
1512 1497 self._map
1513 1498 return self.identity
1514 1499
1515 1500 @propertycache
1516 1501 def dirfoldmap(self):
1517 1502 f = {}
1518 1503 normcase = util.normcase
1519 1504 for name in self._dirs:
1520 1505 f[normcase(name)] = name
1521 1506 return f
@@ -1,1529 +1,1526
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import, print_function
9 9
10 10 import copy
11 11 import itertools
12 12 import os
13 13 import re
14 14
15 15 from .i18n import _
16 16 from . import (
17 17 encoding,
18 18 error,
19 19 pathutil,
20 policy,
20 21 pycompat,
21 22 util,
22 23 )
23 24 from .utils import (
24 25 stringutil,
25 26 )
26 27
27 try:
28 from . import rustext
29 rustext.__name__ # force actual import (see hgdemandimport)
30 except ImportError:
31 rustext = None
28 rustmod = policy.importrust('filepatterns')
32 29
33 30 allpatternkinds = ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
34 31 'rootglob',
35 32 'listfile', 'listfile0', 'set', 'include', 'subinclude',
36 33 'rootfilesin')
37 34 cwdrelativepatternkinds = ('relpath', 'glob')
38 35
39 36 propertycache = util.propertycache
40 37
41 38 def _rematcher(regex):
42 39 '''compile the regexp with the best available regexp engine and return a
43 40 matcher function'''
44 41 m = util.re.compile(regex)
45 42 try:
46 43 # slightly faster, provided by facebook's re2 bindings
47 44 return m.test_match
48 45 except AttributeError:
49 46 return m.match
50 47
51 48 def _expandsets(kindpats, ctx=None, listsubrepos=False, badfn=None):
52 49 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
53 50 matchers = []
54 51 other = []
55 52
56 53 for kind, pat, source in kindpats:
57 54 if kind == 'set':
58 55 if ctx is None:
59 56 raise error.ProgrammingError("fileset expression with no "
60 57 "context")
61 58 matchers.append(ctx.matchfileset(pat, badfn=badfn))
62 59
63 60 if listsubrepos:
64 61 for subpath in ctx.substate:
65 62 sm = ctx.sub(subpath).matchfileset(pat, badfn=badfn)
66 63 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
67 64 matchers.append(pm)
68 65
69 66 continue
70 67 other.append((kind, pat, source))
71 68 return matchers, other
72 69
73 70 def _expandsubinclude(kindpats, root):
74 71 '''Returns the list of subinclude matcher args and the kindpats without the
75 72 subincludes in it.'''
76 73 relmatchers = []
77 74 other = []
78 75
79 76 for kind, pat, source in kindpats:
80 77 if kind == 'subinclude':
81 78 sourceroot = pathutil.dirname(util.normpath(source))
82 79 pat = util.pconvert(pat)
83 80 path = pathutil.join(sourceroot, pat)
84 81
85 82 newroot = pathutil.dirname(path)
86 83 matcherargs = (newroot, '', [], ['include:%s' % path])
87 84
88 85 prefix = pathutil.canonpath(root, root, newroot)
89 86 if prefix:
90 87 prefix += '/'
91 88 relmatchers.append((prefix, matcherargs))
92 89 else:
93 90 other.append((kind, pat, source))
94 91
95 92 return relmatchers, other
96 93
97 94 def _kindpatsalwaysmatch(kindpats):
98 95 """"Checks whether the kindspats match everything, as e.g.
99 96 'relpath:.' does.
100 97 """
101 98 for kind, pat, source in kindpats:
102 99 if pat != '' or kind not in ['relpath', 'glob']:
103 100 return False
104 101 return True
105 102
106 103 def _buildkindpatsmatcher(matchercls, root, kindpats, ctx=None,
107 104 listsubrepos=False, badfn=None):
108 105 matchers = []
109 106 fms, kindpats = _expandsets(kindpats, ctx=ctx,
110 107 listsubrepos=listsubrepos, badfn=badfn)
111 108 if kindpats:
112 109 m = matchercls(root, kindpats, badfn=badfn)
113 110 matchers.append(m)
114 111 if fms:
115 112 matchers.extend(fms)
116 113 if not matchers:
117 114 return nevermatcher(badfn=badfn)
118 115 if len(matchers) == 1:
119 116 return matchers[0]
120 117 return unionmatcher(matchers)
121 118
122 119 def match(root, cwd, patterns=None, include=None, exclude=None, default='glob',
123 120 auditor=None, ctx=None, listsubrepos=False, warn=None,
124 121 badfn=None, icasefs=False):
125 122 r"""build an object to match a set of file patterns
126 123
127 124 arguments:
128 125 root - the canonical root of the tree you're matching against
129 126 cwd - the current working directory, if relevant
130 127 patterns - patterns to find
131 128 include - patterns to include (unless they are excluded)
132 129 exclude - patterns to exclude (even if they are included)
133 130 default - if a pattern in patterns has no explicit type, assume this one
134 131 auditor - optional path auditor
135 132 ctx - optional changecontext
136 133 listsubrepos - if True, recurse into subrepositories
137 134 warn - optional function used for printing warnings
138 135 badfn - optional bad() callback for this matcher instead of the default
139 136 icasefs - make a matcher for wdir on case insensitive filesystems, which
140 137 normalizes the given patterns to the case in the filesystem
141 138
142 139 a pattern is one of:
143 140 'glob:<glob>' - a glob relative to cwd
144 141 're:<regexp>' - a regular expression
145 142 'path:<path>' - a path relative to repository root, which is matched
146 143 recursively
147 144 'rootfilesin:<path>' - a path relative to repository root, which is
148 145 matched non-recursively (will not match subdirectories)
149 146 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
150 147 'relpath:<path>' - a path relative to cwd
151 148 'relre:<regexp>' - a regexp that needn't match the start of a name
152 149 'set:<fileset>' - a fileset expression
153 150 'include:<path>' - a file of patterns to read and include
154 151 'subinclude:<path>' - a file of patterns to match against files under
155 152 the same directory
156 153 '<something>' - a pattern of the specified default type
157 154
158 155 Usually a patternmatcher is returned:
159 156 >>> match(b'foo', b'.', [b're:.*\.c$', b'path:foo/a', b'*.py'])
160 157 <patternmatcher patterns='.*\\.c$|foo/a(?:/|$)|[^/]*\\.py$'>
161 158
162 159 Combining 'patterns' with 'include' (resp. 'exclude') gives an
163 160 intersectionmatcher (resp. a differencematcher):
164 161 >>> type(match(b'foo', b'.', [b're:.*\.c$'], include=[b'path:lib']))
165 162 <class 'mercurial.match.intersectionmatcher'>
166 163 >>> type(match(b'foo', b'.', [b're:.*\.c$'], exclude=[b'path:build']))
167 164 <class 'mercurial.match.differencematcher'>
168 165
169 166 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
170 167 >>> match(b'foo', b'.', [])
171 168 <alwaysmatcher>
172 169
173 170 The 'default' argument determines which kind of pattern is assumed if a
174 171 pattern has no prefix:
175 172 >>> match(b'foo', b'.', [b'.*\.c$'], default=b're')
176 173 <patternmatcher patterns='.*\\.c$'>
177 174 >>> match(b'foo', b'.', [b'main.py'], default=b'relpath')
178 175 <patternmatcher patterns='main\\.py(?:/|$)'>
179 176 >>> match(b'foo', b'.', [b'main.py'], default=b're')
180 177 <patternmatcher patterns='main.py'>
181 178
182 179 The primary use of matchers is to check whether a value (usually a file
183 180 name) matches againset one of the patterns given at initialization. There
184 181 are two ways of doing this check.
185 182
186 183 >>> m = match(b'foo', b'', [b're:.*\.c$', b'relpath:a'])
187 184
188 185 1. Calling the matcher with a file name returns True if any pattern
189 186 matches that file name:
190 187 >>> m(b'a')
191 188 True
192 189 >>> m(b'main.c')
193 190 True
194 191 >>> m(b'test.py')
195 192 False
196 193
197 194 2. Using the exact() method only returns True if the file name matches one
198 195 of the exact patterns (i.e. not re: or glob: patterns):
199 196 >>> m.exact(b'a')
200 197 True
201 198 >>> m.exact(b'main.c')
202 199 False
203 200 """
204 201 normalize = _donormalize
205 202 if icasefs:
206 203 dirstate = ctx.repo().dirstate
207 204 dsnormalize = dirstate.normalize
208 205
209 206 def normalize(patterns, default, root, cwd, auditor, warn):
210 207 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
211 208 kindpats = []
212 209 for kind, pats, source in kp:
213 210 if kind not in ('re', 'relre'): # regex can't be normalized
214 211 p = pats
215 212 pats = dsnormalize(pats)
216 213
217 214 # Preserve the original to handle a case only rename.
218 215 if p != pats and p in dirstate:
219 216 kindpats.append((kind, p, source))
220 217
221 218 kindpats.append((kind, pats, source))
222 219 return kindpats
223 220
224 221 if patterns:
225 222 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
226 223 if _kindpatsalwaysmatch(kindpats):
227 224 m = alwaysmatcher(badfn)
228 225 else:
229 226 m = _buildkindpatsmatcher(patternmatcher, root, kindpats, ctx=ctx,
230 227 listsubrepos=listsubrepos, badfn=badfn)
231 228 else:
232 229 # It's a little strange that no patterns means to match everything.
233 230 # Consider changing this to match nothing (probably using nevermatcher).
234 231 m = alwaysmatcher(badfn)
235 232
236 233 if include:
237 234 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
238 235 im = _buildkindpatsmatcher(includematcher, root, kindpats, ctx=ctx,
239 236 listsubrepos=listsubrepos, badfn=None)
240 237 m = intersectmatchers(m, im)
241 238 if exclude:
242 239 kindpats = normalize(exclude, 'glob', root, cwd, auditor, warn)
243 240 em = _buildkindpatsmatcher(includematcher, root, kindpats, ctx=ctx,
244 241 listsubrepos=listsubrepos, badfn=None)
245 242 m = differencematcher(m, em)
246 243 return m
247 244
248 245 def exact(files, badfn=None):
249 246 return exactmatcher(files, badfn=badfn)
250 247
251 248 def always(badfn=None):
252 249 return alwaysmatcher(badfn)
253 250
254 251 def never(badfn=None):
255 252 return nevermatcher(badfn)
256 253
257 254 def badmatch(match, badfn):
258 255 """Make a copy of the given matcher, replacing its bad method with the given
259 256 one.
260 257 """
261 258 m = copy.copy(match)
262 259 m.bad = badfn
263 260 return m
264 261
265 262 def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
266 263 '''Convert 'kind:pat' from the patterns list to tuples with kind and
267 264 normalized and rooted patterns and with listfiles expanded.'''
268 265 kindpats = []
269 266 for kind, pat in [_patsplit(p, default) for p in patterns]:
270 267 if kind in cwdrelativepatternkinds:
271 268 pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
272 269 elif kind in ('relglob', 'path', 'rootfilesin', 'rootglob'):
273 270 pat = util.normpath(pat)
274 271 elif kind in ('listfile', 'listfile0'):
275 272 try:
276 273 files = util.readfile(pat)
277 274 if kind == 'listfile0':
278 275 files = files.split('\0')
279 276 else:
280 277 files = files.splitlines()
281 278 files = [f for f in files if f]
282 279 except EnvironmentError:
283 280 raise error.Abort(_("unable to read file list (%s)") % pat)
284 281 for k, p, source in _donormalize(files, default, root, cwd,
285 282 auditor, warn):
286 283 kindpats.append((k, p, pat))
287 284 continue
288 285 elif kind == 'include':
289 286 try:
290 287 fullpath = os.path.join(root, util.localpath(pat))
291 288 includepats = readpatternfile(fullpath, warn)
292 289 for k, p, source in _donormalize(includepats, default,
293 290 root, cwd, auditor, warn):
294 291 kindpats.append((k, p, source or pat))
295 292 except error.Abort as inst:
296 293 raise error.Abort('%s: %s' % (pat, inst[0]))
297 294 except IOError as inst:
298 295 if warn:
299 296 warn(_("skipping unreadable pattern file '%s': %s\n") %
300 297 (pat, stringutil.forcebytestr(inst.strerror)))
301 298 continue
302 299 # else: re or relre - which cannot be normalized
303 300 kindpats.append((kind, pat, ''))
304 301 return kindpats
305 302
306 303 class basematcher(object):
307 304
308 305 def __init__(self, badfn=None):
309 306 if badfn is not None:
310 307 self.bad = badfn
311 308
312 309 def __call__(self, fn):
313 310 return self.matchfn(fn)
314 311 # Callbacks related to how the matcher is used by dirstate.walk.
315 312 # Subscribers to these events must monkeypatch the matcher object.
316 313 def bad(self, f, msg):
317 314 '''Callback from dirstate.walk for each explicit file that can't be
318 315 found/accessed, with an error message.'''
319 316
320 317 # If an explicitdir is set, it will be called when an explicitly listed
321 318 # directory is visited.
322 319 explicitdir = None
323 320
324 321 # If an traversedir is set, it will be called when a directory discovered
325 322 # by recursive traversal is visited.
326 323 traversedir = None
327 324
328 325 @propertycache
329 326 def _files(self):
330 327 return []
331 328
332 329 def files(self):
333 330 '''Explicitly listed files or patterns or roots:
334 331 if no patterns or .always(): empty list,
335 332 if exact: list exact files,
336 333 if not .anypats(): list all files and dirs,
337 334 else: optimal roots'''
338 335 return self._files
339 336
340 337 @propertycache
341 338 def _fileset(self):
342 339 return set(self._files)
343 340
344 341 def exact(self, f):
345 342 '''Returns True if f is in .files().'''
346 343 return f in self._fileset
347 344
348 345 def matchfn(self, f):
349 346 return False
350 347
351 348 def visitdir(self, dir):
352 349 '''Decides whether a directory should be visited based on whether it
353 350 has potential matches in it or one of its subdirectories. This is
354 351 based on the match's primary, included, and excluded patterns.
355 352
356 353 Returns the string 'all' if the given directory and all subdirectories
357 354 should be visited. Otherwise returns True or False indicating whether
358 355 the given directory should be visited.
359 356 '''
360 357 return True
361 358
362 359 def visitchildrenset(self, dir):
363 360 '''Decides whether a directory should be visited based on whether it
364 361 has potential matches in it or one of its subdirectories, and
365 362 potentially lists which subdirectories of that directory should be
366 363 visited. This is based on the match's primary, included, and excluded
367 364 patterns.
368 365
369 366 This function is very similar to 'visitdir', and the following mapping
370 367 can be applied:
371 368
372 369 visitdir | visitchildrenlist
373 370 ----------+-------------------
374 371 False | set()
375 372 'all' | 'all'
376 373 True | 'this' OR non-empty set of subdirs -or files- to visit
377 374
378 375 Example:
379 376 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
380 377 the following values (assuming the implementation of visitchildrenset
381 378 is capable of recognizing this; some implementations are not).
382 379
383 380 '' -> {'foo', 'qux'}
384 381 'baz' -> set()
385 382 'foo' -> {'bar'}
386 383 # Ideally this would be 'all', but since the prefix nature of matchers
387 384 # is applied to the entire matcher, we have to downgrade this to
388 385 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
389 386 # in.
390 387 'foo/bar' -> 'this'
391 388 'qux' -> 'this'
392 389
393 390 Important:
394 391 Most matchers do not know if they're representing files or
395 392 directories. They see ['path:dir/f'] and don't know whether 'f' is a
396 393 file or a directory, so visitchildrenset('dir') for most matchers will
397 394 return {'f'}, but if the matcher knows it's a file (like exactmatcher
398 395 does), it may return 'this'. Do not rely on the return being a set
399 396 indicating that there are no files in this dir to investigate (or
400 397 equivalently that if there are files to investigate in 'dir' that it
401 398 will always return 'this').
402 399 '''
403 400 return 'this'
404 401
405 402 def always(self):
406 403 '''Matcher will match everything and .files() will be empty --
407 404 optimization might be possible.'''
408 405 return False
409 406
410 407 def isexact(self):
411 408 '''Matcher will match exactly the list of files in .files() --
412 409 optimization might be possible.'''
413 410 return False
414 411
415 412 def prefix(self):
416 413 '''Matcher will match the paths in .files() recursively --
417 414 optimization might be possible.'''
418 415 return False
419 416
420 417 def anypats(self):
421 418 '''None of .always(), .isexact(), and .prefix() is true --
422 419 optimizations will be difficult.'''
423 420 return not self.always() and not self.isexact() and not self.prefix()
424 421
425 422 class alwaysmatcher(basematcher):
426 423 '''Matches everything.'''
427 424
428 425 def __init__(self, badfn=None):
429 426 super(alwaysmatcher, self).__init__(badfn)
430 427
431 428 def always(self):
432 429 return True
433 430
434 431 def matchfn(self, f):
435 432 return True
436 433
437 434 def visitdir(self, dir):
438 435 return 'all'
439 436
440 437 def visitchildrenset(self, dir):
441 438 return 'all'
442 439
443 440 def __repr__(self):
444 441 return r'<alwaysmatcher>'
445 442
446 443 class nevermatcher(basematcher):
447 444 '''Matches nothing.'''
448 445
449 446 def __init__(self, badfn=None):
450 447 super(nevermatcher, self).__init__(badfn)
451 448
452 449 # It's a little weird to say that the nevermatcher is an exact matcher
453 450 # or a prefix matcher, but it seems to make sense to let callers take
454 451 # fast paths based on either. There will be no exact matches, nor any
455 452 # prefixes (files() returns []), so fast paths iterating over them should
456 453 # be efficient (and correct).
457 454 def isexact(self):
458 455 return True
459 456
460 457 def prefix(self):
461 458 return True
462 459
463 460 def visitdir(self, dir):
464 461 return False
465 462
466 463 def visitchildrenset(self, dir):
467 464 return set()
468 465
469 466 def __repr__(self):
470 467 return r'<nevermatcher>'
471 468
472 469 class predicatematcher(basematcher):
473 470 """A matcher adapter for a simple boolean function"""
474 471
475 472 def __init__(self, predfn, predrepr=None, badfn=None):
476 473 super(predicatematcher, self).__init__(badfn)
477 474 self.matchfn = predfn
478 475 self._predrepr = predrepr
479 476
480 477 @encoding.strmethod
481 478 def __repr__(self):
482 479 s = (stringutil.buildrepr(self._predrepr)
483 480 or pycompat.byterepr(self.matchfn))
484 481 return '<predicatenmatcher pred=%s>' % s
485 482
486 483 def normalizerootdir(dir, funcname):
487 484 if dir == '.':
488 485 util.nouideprecwarn("match.%s() no longer accepts "
489 486 "'.', use '' instead." % funcname, '5.1')
490 487 return ''
491 488 return dir
492 489
493 490
494 491 class patternmatcher(basematcher):
495 492 """Matches a set of (kind, pat, source) against a 'root' directory.
496 493
497 494 >>> kindpats = [
498 495 ... (b're', br'.*\.c$', b''),
499 496 ... (b'path', b'foo/a', b''),
500 497 ... (b'relpath', b'b', b''),
501 498 ... (b'glob', b'*.h', b''),
502 499 ... ]
503 500 >>> m = patternmatcher(b'foo', kindpats)
504 501 >>> m(b'main.c') # matches re:.*\.c$
505 502 True
506 503 >>> m(b'b.txt')
507 504 False
508 505 >>> m(b'foo/a') # matches path:foo/a
509 506 True
510 507 >>> m(b'a') # does not match path:b, since 'root' is 'foo'
511 508 False
512 509 >>> m(b'b') # matches relpath:b, since 'root' is 'foo'
513 510 True
514 511 >>> m(b'lib.h') # matches glob:*.h
515 512 True
516 513
517 514 >>> m.files()
518 515 ['', 'foo/a', 'b', '']
519 516 >>> m.exact(b'foo/a')
520 517 True
521 518 >>> m.exact(b'b')
522 519 True
523 520 >>> m.exact(b'lib.h') # exact matches are for (rel)path kinds
524 521 False
525 522 """
526 523
527 524 def __init__(self, root, kindpats, badfn=None):
528 525 super(patternmatcher, self).__init__(badfn)
529 526
530 527 self._files = _explicitfiles(kindpats)
531 528 self._prefix = _prefix(kindpats)
532 529 self._pats, self.matchfn = _buildmatch(kindpats, '$', root)
533 530
534 531 @propertycache
535 532 def _dirs(self):
536 533 return set(util.dirs(self._fileset))
537 534
538 535 def visitdir(self, dir):
539 536 dir = normalizerootdir(dir, 'visitdir')
540 537 if self._prefix and dir in self._fileset:
541 538 return 'all'
542 539 return (dir in self._fileset or
543 540 dir in self._dirs or
544 541 any(parentdir in self._fileset
545 542 for parentdir in util.finddirs(dir)))
546 543
547 544 def visitchildrenset(self, dir):
548 545 ret = self.visitdir(dir)
549 546 if ret is True:
550 547 return 'this'
551 548 elif not ret:
552 549 return set()
553 550 assert ret == 'all'
554 551 return 'all'
555 552
556 553 def prefix(self):
557 554 return self._prefix
558 555
559 556 @encoding.strmethod
560 557 def __repr__(self):
561 558 return ('<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats))
562 559
563 560 # This is basically a reimplementation of util.dirs that stores the children
564 561 # instead of just a count of them, plus a small optional optimization to avoid
565 562 # some directories we don't need.
566 563 class _dirchildren(object):
567 564 def __init__(self, paths, onlyinclude=None):
568 565 self._dirs = {}
569 566 self._onlyinclude = onlyinclude or []
570 567 addpath = self.addpath
571 568 for f in paths:
572 569 addpath(f)
573 570
574 571 def addpath(self, path):
575 572 if path == '':
576 573 return
577 574 dirs = self._dirs
578 575 findsplitdirs = _dirchildren._findsplitdirs
579 576 for d, b in findsplitdirs(path):
580 577 if d not in self._onlyinclude:
581 578 continue
582 579 dirs.setdefault(d, set()).add(b)
583 580
584 581 @staticmethod
585 582 def _findsplitdirs(path):
586 583 # yields (dirname, basename) tuples, walking back to the root. This is
587 584 # very similar to util.finddirs, except:
588 585 # - produces a (dirname, basename) tuple, not just 'dirname'
589 586 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
590 587 # slash.
591 588 oldpos = len(path)
592 589 pos = path.rfind('/')
593 590 while pos != -1:
594 591 yield path[:pos], path[pos + 1:oldpos]
595 592 oldpos = pos
596 593 pos = path.rfind('/', 0, pos)
597 594 yield '', path[:oldpos]
598 595
599 596 def get(self, path):
600 597 return self._dirs.get(path, set())
601 598
602 599 class includematcher(basematcher):
603 600
604 601 def __init__(self, root, kindpats, badfn=None):
605 602 super(includematcher, self).__init__(badfn)
606 603
607 604 self._pats, self.matchfn = _buildmatch(kindpats, '(?:/|$)', root)
608 605 self._prefix = _prefix(kindpats)
609 606 roots, dirs, parents = _rootsdirsandparents(kindpats)
610 607 # roots are directories which are recursively included.
611 608 self._roots = set(roots)
612 609 # dirs are directories which are non-recursively included.
613 610 self._dirs = set(dirs)
614 611 # parents are directories which are non-recursively included because
615 612 # they are needed to get to items in _dirs or _roots.
616 613 self._parents = parents
617 614
618 615 def visitdir(self, dir):
619 616 dir = normalizerootdir(dir, 'visitdir')
620 617 if self._prefix and dir in self._roots:
621 618 return 'all'
622 619 return (dir in self._roots or
623 620 dir in self._dirs or
624 621 dir in self._parents or
625 622 any(parentdir in self._roots
626 623 for parentdir in util.finddirs(dir)))
627 624
628 625 @propertycache
629 626 def _allparentschildren(self):
630 627 # It may seem odd that we add dirs, roots, and parents, and then
631 628 # restrict to only parents. This is to catch the case of:
632 629 # dirs = ['foo/bar']
633 630 # parents = ['foo']
634 631 # if we asked for the children of 'foo', but had only added
635 632 # self._parents, we wouldn't be able to respond ['bar'].
636 633 return _dirchildren(
637 634 itertools.chain(self._dirs, self._roots, self._parents),
638 635 onlyinclude=self._parents)
639 636
640 637 def visitchildrenset(self, dir):
641 638 if self._prefix and dir in self._roots:
642 639 return 'all'
643 640 # Note: this does *not* include the 'dir in self._parents' case from
644 641 # visitdir, that's handled below.
645 642 if ('' in self._roots or
646 643 dir in self._roots or
647 644 dir in self._dirs or
648 645 any(parentdir in self._roots
649 646 for parentdir in util.finddirs(dir))):
650 647 return 'this'
651 648
652 649 if dir in self._parents:
653 650 return self._allparentschildren.get(dir) or set()
654 651 return set()
655 652
656 653 @encoding.strmethod
657 654 def __repr__(self):
658 655 return ('<includematcher includes=%r>' % pycompat.bytestr(self._pats))
659 656
660 657 class exactmatcher(basematcher):
661 658 r'''Matches the input files exactly. They are interpreted as paths, not
662 659 patterns (so no kind-prefixes).
663 660
664 661 >>> m = exactmatcher([b'a.txt', br're:.*\.c$'])
665 662 >>> m(b'a.txt')
666 663 True
667 664 >>> m(b'b.txt')
668 665 False
669 666
670 667 Input files that would be matched are exactly those returned by .files()
671 668 >>> m.files()
672 669 ['a.txt', 're:.*\\.c$']
673 670
674 671 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
675 672 >>> m(b'main.c')
676 673 False
677 674 >>> m(br're:.*\.c$')
678 675 True
679 676 '''
680 677
681 678 def __init__(self, files, badfn=None):
682 679 super(exactmatcher, self).__init__(badfn)
683 680
684 681 if isinstance(files, list):
685 682 self._files = files
686 683 else:
687 684 self._files = list(files)
688 685
689 686 matchfn = basematcher.exact
690 687
691 688 @propertycache
692 689 def _dirs(self):
693 690 return set(util.dirs(self._fileset))
694 691
695 692 def visitdir(self, dir):
696 693 dir = normalizerootdir(dir, 'visitdir')
697 694 return dir in self._dirs
698 695
699 696 def visitchildrenset(self, dir):
700 697 dir = normalizerootdir(dir, 'visitchildrenset')
701 698
702 699 if not self._fileset or dir not in self._dirs:
703 700 return set()
704 701
705 702 candidates = self._fileset | self._dirs - {''}
706 703 if dir != '':
707 704 d = dir + '/'
708 705 candidates = set(c[len(d):] for c in candidates if
709 706 c.startswith(d))
710 707 # self._dirs includes all of the directories, recursively, so if
711 708 # we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',
712 709 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
713 710 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
714 711 # immediate subdir will be in there without a slash.
715 712 ret = {c for c in candidates if '/' not in c}
716 713 # We really do not expect ret to be empty, since that would imply that
717 714 # there's something in _dirs that didn't have a file in _fileset.
718 715 assert ret
719 716 return ret
720 717
721 718 def isexact(self):
722 719 return True
723 720
724 721 @encoding.strmethod
725 722 def __repr__(self):
726 723 return ('<exactmatcher files=%r>' % self._files)
727 724
728 725 class differencematcher(basematcher):
729 726 '''Composes two matchers by matching if the first matches and the second
730 727 does not.
731 728
732 729 The second matcher's non-matching-attributes (bad, explicitdir,
733 730 traversedir) are ignored.
734 731 '''
735 732 def __init__(self, m1, m2):
736 733 super(differencematcher, self).__init__()
737 734 self._m1 = m1
738 735 self._m2 = m2
739 736 self.bad = m1.bad
740 737 self.explicitdir = m1.explicitdir
741 738 self.traversedir = m1.traversedir
742 739
743 740 def matchfn(self, f):
744 741 return self._m1(f) and not self._m2(f)
745 742
746 743 @propertycache
747 744 def _files(self):
748 745 if self.isexact():
749 746 return [f for f in self._m1.files() if self(f)]
750 747 # If m1 is not an exact matcher, we can't easily figure out the set of
751 748 # files, because its files() are not always files. For example, if
752 749 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
753 750 # want to remove "dir" from the set even though it would match m2,
754 751 # because the "dir" in m1 may not be a file.
755 752 return self._m1.files()
756 753
757 754 def visitdir(self, dir):
758 755 if self._m2.visitdir(dir) == 'all':
759 756 return False
760 757 elif not self._m2.visitdir(dir):
761 758 # m2 does not match dir, we can return 'all' here if possible
762 759 return self._m1.visitdir(dir)
763 760 return bool(self._m1.visitdir(dir))
764 761
765 762 def visitchildrenset(self, dir):
766 763 m2_set = self._m2.visitchildrenset(dir)
767 764 if m2_set == 'all':
768 765 return set()
769 766 m1_set = self._m1.visitchildrenset(dir)
770 767 # Possible values for m1: 'all', 'this', set(...), set()
771 768 # Possible values for m2: 'this', set(...), set()
772 769 # If m2 has nothing under here that we care about, return m1, even if
773 770 # it's 'all'. This is a change in behavior from visitdir, which would
774 771 # return True, not 'all', for some reason.
775 772 if not m2_set:
776 773 return m1_set
777 774 if m1_set in ['all', 'this']:
778 775 # Never return 'all' here if m2_set is any kind of non-empty (either
779 776 # 'this' or set(foo)), since m2 might return set() for a
780 777 # subdirectory.
781 778 return 'this'
782 779 # Possible values for m1: set(...), set()
783 780 # Possible values for m2: 'this', set(...)
784 781 # We ignore m2's set results. They're possibly incorrect:
785 782 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):
786 783 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
787 784 # return set(), which is *not* correct, we still need to visit 'dir'!
788 785 return m1_set
789 786
790 787 def isexact(self):
791 788 return self._m1.isexact()
792 789
793 790 @encoding.strmethod
794 791 def __repr__(self):
795 792 return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2))
796 793
797 794 def intersectmatchers(m1, m2):
798 795 '''Composes two matchers by matching if both of them match.
799 796
800 797 The second matcher's non-matching-attributes (bad, explicitdir,
801 798 traversedir) are ignored.
802 799 '''
803 800 if m1 is None or m2 is None:
804 801 return m1 or m2
805 802 if m1.always():
806 803 m = copy.copy(m2)
807 804 # TODO: Consider encapsulating these things in a class so there's only
808 805 # one thing to copy from m1.
809 806 m.bad = m1.bad
810 807 m.explicitdir = m1.explicitdir
811 808 m.traversedir = m1.traversedir
812 809 return m
813 810 if m2.always():
814 811 m = copy.copy(m1)
815 812 return m
816 813 return intersectionmatcher(m1, m2)
817 814
818 815 class intersectionmatcher(basematcher):
819 816 def __init__(self, m1, m2):
820 817 super(intersectionmatcher, self).__init__()
821 818 self._m1 = m1
822 819 self._m2 = m2
823 820 self.bad = m1.bad
824 821 self.explicitdir = m1.explicitdir
825 822 self.traversedir = m1.traversedir
826 823
827 824 @propertycache
828 825 def _files(self):
829 826 if self.isexact():
830 827 m1, m2 = self._m1, self._m2
831 828 if not m1.isexact():
832 829 m1, m2 = m2, m1
833 830 return [f for f in m1.files() if m2(f)]
834 831 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
835 832 # the set of files, because their files() are not always files. For
836 833 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
837 834 # "path:dir2", we don't want to remove "dir2" from the set.
838 835 return self._m1.files() + self._m2.files()
839 836
840 837 def matchfn(self, f):
841 838 return self._m1(f) and self._m2(f)
842 839
843 840 def visitdir(self, dir):
844 841 visit1 = self._m1.visitdir(dir)
845 842 if visit1 == 'all':
846 843 return self._m2.visitdir(dir)
847 844 # bool() because visit1=True + visit2='all' should not be 'all'
848 845 return bool(visit1 and self._m2.visitdir(dir))
849 846
850 847 def visitchildrenset(self, dir):
851 848 m1_set = self._m1.visitchildrenset(dir)
852 849 if not m1_set:
853 850 return set()
854 851 m2_set = self._m2.visitchildrenset(dir)
855 852 if not m2_set:
856 853 return set()
857 854
858 855 if m1_set == 'all':
859 856 return m2_set
860 857 elif m2_set == 'all':
861 858 return m1_set
862 859
863 860 if m1_set == 'this' or m2_set == 'this':
864 861 return 'this'
865 862
866 863 assert isinstance(m1_set, set) and isinstance(m2_set, set)
867 864 return m1_set.intersection(m2_set)
868 865
869 866 def always(self):
870 867 return self._m1.always() and self._m2.always()
871 868
872 869 def isexact(self):
873 870 return self._m1.isexact() or self._m2.isexact()
874 871
875 872 @encoding.strmethod
876 873 def __repr__(self):
877 874 return ('<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2))
878 875
879 876 class subdirmatcher(basematcher):
880 877 """Adapt a matcher to work on a subdirectory only.
881 878
882 879 The paths are remapped to remove/insert the path as needed:
883 880
884 881 >>> from . import pycompat
885 882 >>> m1 = match(b'root', b'', [b'a.txt', b'sub/b.txt'])
886 883 >>> m2 = subdirmatcher(b'sub', m1)
887 884 >>> m2(b'a.txt')
888 885 False
889 886 >>> m2(b'b.txt')
890 887 True
891 888 >>> m2.matchfn(b'a.txt')
892 889 False
893 890 >>> m2.matchfn(b'b.txt')
894 891 True
895 892 >>> m2.files()
896 893 ['b.txt']
897 894 >>> m2.exact(b'b.txt')
898 895 True
899 896 >>> def bad(f, msg):
900 897 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
901 898 >>> m1.bad = bad
902 899 >>> m2.bad(b'x.txt', b'No such file')
903 900 sub/x.txt: No such file
904 901 """
905 902
906 903 def __init__(self, path, matcher):
907 904 super(subdirmatcher, self).__init__()
908 905 self._path = path
909 906 self._matcher = matcher
910 907 self._always = matcher.always()
911 908
912 909 self._files = [f[len(path) + 1:] for f in matcher._files
913 910 if f.startswith(path + "/")]
914 911
915 912 # If the parent repo had a path to this subrepo and the matcher is
916 913 # a prefix matcher, this submatcher always matches.
917 914 if matcher.prefix():
918 915 self._always = any(f == path for f in matcher._files)
919 916
920 917 def bad(self, f, msg):
921 918 self._matcher.bad(self._path + "/" + f, msg)
922 919
923 920 def matchfn(self, f):
924 921 # Some information is lost in the superclass's constructor, so we
925 922 # can not accurately create the matching function for the subdirectory
926 923 # from the inputs. Instead, we override matchfn() and visitdir() to
927 924 # call the original matcher with the subdirectory path prepended.
928 925 return self._matcher.matchfn(self._path + "/" + f)
929 926
930 927 def visitdir(self, dir):
931 928 dir = normalizerootdir(dir, 'visitdir')
932 929 if dir == '':
933 930 dir = self._path
934 931 else:
935 932 dir = self._path + "/" + dir
936 933 return self._matcher.visitdir(dir)
937 934
938 935 def visitchildrenset(self, dir):
939 936 dir = normalizerootdir(dir, 'visitchildrenset')
940 937 if dir == '':
941 938 dir = self._path
942 939 else:
943 940 dir = self._path + "/" + dir
944 941 return self._matcher.visitchildrenset(dir)
945 942
946 943 def always(self):
947 944 return self._always
948 945
949 946 def prefix(self):
950 947 return self._matcher.prefix() and not self._always
951 948
952 949 @encoding.strmethod
953 950 def __repr__(self):
954 951 return ('<subdirmatcher path=%r, matcher=%r>' %
955 952 (self._path, self._matcher))
956 953
957 954 class prefixdirmatcher(basematcher):
958 955 """Adapt a matcher to work on a parent directory.
959 956
960 957 The matcher's non-matching-attributes (bad, explicitdir, traversedir) are
961 958 ignored.
962 959
963 960 The prefix path should usually be the relative path from the root of
964 961 this matcher to the root of the wrapped matcher.
965 962
966 963 >>> m1 = match(util.localpath(b'root/d/e'), b'f', [b'../a.txt', b'b.txt'])
967 964 >>> m2 = prefixdirmatcher(b'd/e', m1)
968 965 >>> m2(b'a.txt')
969 966 False
970 967 >>> m2(b'd/e/a.txt')
971 968 True
972 969 >>> m2(b'd/e/b.txt')
973 970 False
974 971 >>> m2.files()
975 972 ['d/e/a.txt', 'd/e/f/b.txt']
976 973 >>> m2.exact(b'd/e/a.txt')
977 974 True
978 975 >>> m2.visitdir(b'd')
979 976 True
980 977 >>> m2.visitdir(b'd/e')
981 978 True
982 979 >>> m2.visitdir(b'd/e/f')
983 980 True
984 981 >>> m2.visitdir(b'd/e/g')
985 982 False
986 983 >>> m2.visitdir(b'd/ef')
987 984 False
988 985 """
989 986
990 987 def __init__(self, path, matcher, badfn=None):
991 988 super(prefixdirmatcher, self).__init__(badfn)
992 989 if not path:
993 990 raise error.ProgrammingError('prefix path must not be empty')
994 991 self._path = path
995 992 self._pathprefix = path + '/'
996 993 self._matcher = matcher
997 994
998 995 @propertycache
999 996 def _files(self):
1000 997 return [self._pathprefix + f for f in self._matcher._files]
1001 998
1002 999 def matchfn(self, f):
1003 1000 if not f.startswith(self._pathprefix):
1004 1001 return False
1005 1002 return self._matcher.matchfn(f[len(self._pathprefix):])
1006 1003
1007 1004 @propertycache
1008 1005 def _pathdirs(self):
1009 1006 return set(util.finddirs(self._path))
1010 1007
1011 1008 def visitdir(self, dir):
1012 1009 if dir == self._path:
1013 1010 return self._matcher.visitdir('')
1014 1011 if dir.startswith(self._pathprefix):
1015 1012 return self._matcher.visitdir(dir[len(self._pathprefix):])
1016 1013 return dir in self._pathdirs
1017 1014
1018 1015 def visitchildrenset(self, dir):
1019 1016 if dir == self._path:
1020 1017 return self._matcher.visitchildrenset('')
1021 1018 if dir.startswith(self._pathprefix):
1022 1019 return self._matcher.visitchildrenset(dir[len(self._pathprefix):])
1023 1020 if dir in self._pathdirs:
1024 1021 return 'this'
1025 1022 return set()
1026 1023
1027 1024 def isexact(self):
1028 1025 return self._matcher.isexact()
1029 1026
1030 1027 def prefix(self):
1031 1028 return self._matcher.prefix()
1032 1029
1033 1030 @encoding.strmethod
1034 1031 def __repr__(self):
1035 1032 return ('<prefixdirmatcher path=%r, matcher=%r>'
1036 1033 % (pycompat.bytestr(self._path), self._matcher))
1037 1034
1038 1035 class unionmatcher(basematcher):
1039 1036 """A matcher that is the union of several matchers.
1040 1037
1041 1038 The non-matching-attributes (bad, explicitdir, traversedir) are taken from
1042 1039 the first matcher.
1043 1040 """
1044 1041
1045 1042 def __init__(self, matchers):
1046 1043 m1 = matchers[0]
1047 1044 super(unionmatcher, self).__init__()
1048 1045 self.explicitdir = m1.explicitdir
1049 1046 self.traversedir = m1.traversedir
1050 1047 self._matchers = matchers
1051 1048
1052 1049 def matchfn(self, f):
1053 1050 for match in self._matchers:
1054 1051 if match(f):
1055 1052 return True
1056 1053 return False
1057 1054
1058 1055 def visitdir(self, dir):
1059 1056 r = False
1060 1057 for m in self._matchers:
1061 1058 v = m.visitdir(dir)
1062 1059 if v == 'all':
1063 1060 return v
1064 1061 r |= v
1065 1062 return r
1066 1063
1067 1064 def visitchildrenset(self, dir):
1068 1065 r = set()
1069 1066 this = False
1070 1067 for m in self._matchers:
1071 1068 v = m.visitchildrenset(dir)
1072 1069 if not v:
1073 1070 continue
1074 1071 if v == 'all':
1075 1072 return v
1076 1073 if this or v == 'this':
1077 1074 this = True
1078 1075 # don't break, we might have an 'all' in here.
1079 1076 continue
1080 1077 assert isinstance(v, set)
1081 1078 r = r.union(v)
1082 1079 if this:
1083 1080 return 'this'
1084 1081 return r
1085 1082
1086 1083 @encoding.strmethod
1087 1084 def __repr__(self):
1088 1085 return ('<unionmatcher matchers=%r>' % self._matchers)
1089 1086
1090 1087 def patkind(pattern, default=None):
1091 1088 '''If pattern is 'kind:pat' with a known kind, return kind.
1092 1089
1093 1090 >>> patkind(br're:.*\.c$')
1094 1091 're'
1095 1092 >>> patkind(b'glob:*.c')
1096 1093 'glob'
1097 1094 >>> patkind(b'relpath:test.py')
1098 1095 'relpath'
1099 1096 >>> patkind(b'main.py')
1100 1097 >>> patkind(b'main.py', default=b're')
1101 1098 're'
1102 1099 '''
1103 1100 return _patsplit(pattern, default)[0]
1104 1101
1105 1102 def _patsplit(pattern, default):
1106 1103 """Split a string into the optional pattern kind prefix and the actual
1107 1104 pattern."""
1108 1105 if ':' in pattern:
1109 1106 kind, pat = pattern.split(':', 1)
1110 1107 if kind in allpatternkinds:
1111 1108 return kind, pat
1112 1109 return default, pattern
1113 1110
1114 1111 def _globre(pat):
1115 1112 r'''Convert an extended glob string to a regexp string.
1116 1113
1117 1114 >>> from . import pycompat
1118 1115 >>> def bprint(s):
1119 1116 ... print(pycompat.sysstr(s))
1120 1117 >>> bprint(_globre(br'?'))
1121 1118 .
1122 1119 >>> bprint(_globre(br'*'))
1123 1120 [^/]*
1124 1121 >>> bprint(_globre(br'**'))
1125 1122 .*
1126 1123 >>> bprint(_globre(br'**/a'))
1127 1124 (?:.*/)?a
1128 1125 >>> bprint(_globre(br'a/**/b'))
1129 1126 a/(?:.*/)?b
1130 1127 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1131 1128 [a*?!^][\^b][^c]
1132 1129 >>> bprint(_globre(br'{a,b}'))
1133 1130 (?:a|b)
1134 1131 >>> bprint(_globre(br'.\*\?'))
1135 1132 \.\*\?
1136 1133 '''
1137 1134 i, n = 0, len(pat)
1138 1135 res = ''
1139 1136 group = 0
1140 1137 escape = util.stringutil.regexbytesescapemap.get
1141 1138 def peek():
1142 1139 return i < n and pat[i:i + 1]
1143 1140 while i < n:
1144 1141 c = pat[i:i + 1]
1145 1142 i += 1
1146 1143 if c not in '*?[{},\\':
1147 1144 res += escape(c, c)
1148 1145 elif c == '*':
1149 1146 if peek() == '*':
1150 1147 i += 1
1151 1148 if peek() == '/':
1152 1149 i += 1
1153 1150 res += '(?:.*/)?'
1154 1151 else:
1155 1152 res += '.*'
1156 1153 else:
1157 1154 res += '[^/]*'
1158 1155 elif c == '?':
1159 1156 res += '.'
1160 1157 elif c == '[':
1161 1158 j = i
1162 1159 if j < n and pat[j:j + 1] in '!]':
1163 1160 j += 1
1164 1161 while j < n and pat[j:j + 1] != ']':
1165 1162 j += 1
1166 1163 if j >= n:
1167 1164 res += '\\['
1168 1165 else:
1169 1166 stuff = pat[i:j].replace('\\','\\\\')
1170 1167 i = j + 1
1171 1168 if stuff[0:1] == '!':
1172 1169 stuff = '^' + stuff[1:]
1173 1170 elif stuff[0:1] == '^':
1174 1171 stuff = '\\' + stuff
1175 1172 res = '%s[%s]' % (res, stuff)
1176 1173 elif c == '{':
1177 1174 group += 1
1178 1175 res += '(?:'
1179 1176 elif c == '}' and group:
1180 1177 res += ')'
1181 1178 group -= 1
1182 1179 elif c == ',' and group:
1183 1180 res += '|'
1184 1181 elif c == '\\':
1185 1182 p = peek()
1186 1183 if p:
1187 1184 i += 1
1188 1185 res += escape(p, p)
1189 1186 else:
1190 1187 res += escape(c, c)
1191 1188 else:
1192 1189 res += escape(c, c)
1193 1190 return res
1194 1191
1195 1192 def _regex(kind, pat, globsuffix):
1196 1193 '''Convert a (normalized) pattern of any kind into a
1197 1194 regular expression.
1198 1195 globsuffix is appended to the regexp of globs.'''
1199 1196
1200 if rustext is not None:
1197 if rustmod is not None:
1201 1198 try:
1202 return rustext.filepatterns.build_single_regex(
1199 return rustmod.build_single_regex(
1203 1200 kind,
1204 1201 pat,
1205 1202 globsuffix
1206 1203 )
1207 except rustext.filepatterns.PatternError:
1204 except rustmod.PatternError:
1208 1205 raise error.ProgrammingError(
1209 1206 'not a regex pattern: %s:%s' % (kind, pat)
1210 1207 )
1211 1208
1212 1209 if not pat and kind in ('glob', 'relpath'):
1213 1210 return ''
1214 1211 if kind == 're':
1215 1212 return pat
1216 1213 if kind in ('path', 'relpath'):
1217 1214 if pat == '.':
1218 1215 return ''
1219 1216 return util.stringutil.reescape(pat) + '(?:/|$)'
1220 1217 if kind == 'rootfilesin':
1221 1218 if pat == '.':
1222 1219 escaped = ''
1223 1220 else:
1224 1221 # Pattern is a directory name.
1225 1222 escaped = util.stringutil.reescape(pat) + '/'
1226 1223 # Anything after the pattern must be a non-directory.
1227 1224 return escaped + '[^/]+$'
1228 1225 if kind == 'relglob':
1229 1226 return '(?:|.*/)' + _globre(pat) + globsuffix
1230 1227 if kind == 'relre':
1231 1228 if pat.startswith('^'):
1232 1229 return pat
1233 1230 return '.*' + pat
1234 1231 if kind in ('glob', 'rootglob'):
1235 1232 return _globre(pat) + globsuffix
1236 1233 raise error.ProgrammingError('not a regex pattern: %s:%s' % (kind, pat))
1237 1234
1238 1235 def _buildmatch(kindpats, globsuffix, root):
1239 1236 '''Return regexp string and a matcher function for kindpats.
1240 1237 globsuffix is appended to the regexp of globs.'''
1241 1238 matchfuncs = []
1242 1239
1243 1240 subincludes, kindpats = _expandsubinclude(kindpats, root)
1244 1241 if subincludes:
1245 1242 submatchers = {}
1246 1243 def matchsubinclude(f):
1247 1244 for prefix, matcherargs in subincludes:
1248 1245 if f.startswith(prefix):
1249 1246 mf = submatchers.get(prefix)
1250 1247 if mf is None:
1251 1248 mf = match(*matcherargs)
1252 1249 submatchers[prefix] = mf
1253 1250
1254 1251 if mf(f[len(prefix):]):
1255 1252 return True
1256 1253 return False
1257 1254 matchfuncs.append(matchsubinclude)
1258 1255
1259 1256 regex = ''
1260 1257 if kindpats:
1261 1258 if all(k == 'rootfilesin' for k, p, s in kindpats):
1262 1259 dirs = {p for k, p, s in kindpats}
1263 1260 def mf(f):
1264 1261 i = f.rfind('/')
1265 1262 if i >= 0:
1266 1263 dir = f[:i]
1267 1264 else:
1268 1265 dir = '.'
1269 1266 return dir in dirs
1270 1267 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1271 1268 matchfuncs.append(mf)
1272 1269 else:
1273 1270 regex, mf = _buildregexmatch(kindpats, globsuffix)
1274 1271 matchfuncs.append(mf)
1275 1272
1276 1273 if len(matchfuncs) == 1:
1277 1274 return regex, matchfuncs[0]
1278 1275 else:
1279 1276 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1280 1277
1281 1278 MAX_RE_SIZE = 20000
1282 1279
1283 1280 def _joinregexes(regexps):
1284 1281 """gather multiple regular expressions into a single one"""
1285 1282 return '|'.join(regexps)
1286 1283
1287 1284 def _buildregexmatch(kindpats, globsuffix):
1288 1285 """Build a match function from a list of kinds and kindpats,
1289 1286 return regexp string and a matcher function.
1290 1287
1291 1288 Test too large input
1292 1289 >>> _buildregexmatch([
1293 1290 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1294 1291 ... ], b'$')
1295 1292 Traceback (most recent call last):
1296 1293 ...
1297 1294 Abort: matcher pattern is too long (20009 bytes)
1298 1295 """
1299 1296 try:
1300 1297 allgroups = []
1301 1298 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1302 1299 fullregexp = _joinregexes(regexps)
1303 1300
1304 1301 startidx = 0
1305 1302 groupsize = 0
1306 1303 for idx, r in enumerate(regexps):
1307 1304 piecesize = len(r)
1308 1305 if piecesize > MAX_RE_SIZE:
1309 1306 msg = _("matcher pattern is too long (%d bytes)") % piecesize
1310 1307 raise error.Abort(msg)
1311 1308 elif (groupsize + piecesize) > MAX_RE_SIZE:
1312 1309 group = regexps[startidx:idx]
1313 1310 allgroups.append(_joinregexes(group))
1314 1311 startidx = idx
1315 1312 groupsize = 0
1316 1313 groupsize += piecesize + 1
1317 1314
1318 1315 if startidx == 0:
1319 1316 matcher = _rematcher(fullregexp)
1320 1317 func = lambda s: bool(matcher(s))
1321 1318 else:
1322 1319 group = regexps[startidx:]
1323 1320 allgroups.append(_joinregexes(group))
1324 1321 allmatchers = [_rematcher(g) for g in allgroups]
1325 1322 func = lambda s: any(m(s) for m in allmatchers)
1326 1323 return fullregexp, func
1327 1324 except re.error:
1328 1325 for k, p, s in kindpats:
1329 1326 try:
1330 1327 _rematcher(_regex(k, p, globsuffix))
1331 1328 except re.error:
1332 1329 if s:
1333 1330 raise error.Abort(_("%s: invalid pattern (%s): %s") %
1334 1331 (s, k, p))
1335 1332 else:
1336 1333 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
1337 1334 raise error.Abort(_("invalid pattern"))
1338 1335
1339 1336 def _patternrootsanddirs(kindpats):
1340 1337 '''Returns roots and directories corresponding to each pattern.
1341 1338
1342 1339 This calculates the roots and directories exactly matching the patterns and
1343 1340 returns a tuple of (roots, dirs) for each. It does not return other
1344 1341 directories which may also need to be considered, like the parent
1345 1342 directories.
1346 1343 '''
1347 1344 r = []
1348 1345 d = []
1349 1346 for kind, pat, source in kindpats:
1350 1347 if kind in ('glob', 'rootglob'): # find the non-glob prefix
1351 1348 root = []
1352 1349 for p in pat.split('/'):
1353 1350 if '[' in p or '{' in p or '*' in p or '?' in p:
1354 1351 break
1355 1352 root.append(p)
1356 1353 r.append('/'.join(root))
1357 1354 elif kind in ('relpath', 'path'):
1358 1355 if pat == '.':
1359 1356 pat = ''
1360 1357 r.append(pat)
1361 1358 elif kind in ('rootfilesin',):
1362 1359 if pat == '.':
1363 1360 pat = ''
1364 1361 d.append(pat)
1365 1362 else: # relglob, re, relre
1366 1363 r.append('')
1367 1364 return r, d
1368 1365
1369 1366 def _roots(kindpats):
1370 1367 '''Returns root directories to match recursively from the given patterns.'''
1371 1368 roots, dirs = _patternrootsanddirs(kindpats)
1372 1369 return roots
1373 1370
1374 1371 def _rootsdirsandparents(kindpats):
1375 1372 '''Returns roots and exact directories from patterns.
1376 1373
1377 1374 `roots` are directories to match recursively, `dirs` should
1378 1375 be matched non-recursively, and `parents` are the implicitly required
1379 1376 directories to walk to items in either roots or dirs.
1380 1377
1381 1378 Returns a tuple of (roots, dirs, parents).
1382 1379
1383 1380 >>> r = _rootsdirsandparents(
1384 1381 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1385 1382 ... (b'glob', b'g*', b'')])
1386 1383 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1387 1384 (['g/h', 'g/h', ''], []) ['', 'g']
1388 1385 >>> r = _rootsdirsandparents(
1389 1386 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1390 1387 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1391 1388 ([], ['g/h', '']) ['', 'g']
1392 1389 >>> r = _rootsdirsandparents(
1393 1390 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1394 1391 ... (b'path', b'', b'')])
1395 1392 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1396 1393 (['r', 'p/p', ''], []) ['', 'p']
1397 1394 >>> r = _rootsdirsandparents(
1398 1395 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1399 1396 ... (b'relre', b'rr', b'')])
1400 1397 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1401 1398 (['', '', ''], []) ['']
1402 1399 '''
1403 1400 r, d = _patternrootsanddirs(kindpats)
1404 1401
1405 1402 p = set()
1406 1403 # Add the parents as non-recursive/exact directories, since they must be
1407 1404 # scanned to get to either the roots or the other exact directories.
1408 1405 p.update(util.dirs(d))
1409 1406 p.update(util.dirs(r))
1410 1407
1411 1408 # FIXME: all uses of this function convert these to sets, do so before
1412 1409 # returning.
1413 1410 # FIXME: all uses of this function do not need anything in 'roots' and
1414 1411 # 'dirs' to also be in 'parents', consider removing them before returning.
1415 1412 return r, d, p
1416 1413
1417 1414 def _explicitfiles(kindpats):
1418 1415 '''Returns the potential explicit filenames from the patterns.
1419 1416
1420 1417 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1421 1418 ['foo/bar']
1422 1419 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1423 1420 []
1424 1421 '''
1425 1422 # Keep only the pattern kinds where one can specify filenames (vs only
1426 1423 # directory names).
1427 1424 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
1428 1425 return _roots(filable)
1429 1426
1430 1427 def _prefix(kindpats):
1431 1428 '''Whether all the patterns match a prefix (i.e. recursively)'''
1432 1429 for kind, pat, source in kindpats:
1433 1430 if kind not in ('path', 'relpath'):
1434 1431 return False
1435 1432 return True
1436 1433
1437 1434 _commentre = None
1438 1435
1439 1436 def readpatternfile(filepath, warn, sourceinfo=False):
1440 1437 '''parse a pattern file, returning a list of
1441 1438 patterns. These patterns should be given to compile()
1442 1439 to be validated and converted into a match function.
1443 1440
1444 1441 trailing white space is dropped.
1445 1442 the escape character is backslash.
1446 1443 comments start with #.
1447 1444 empty lines are skipped.
1448 1445
1449 1446 lines can be of the following formats:
1450 1447
1451 1448 syntax: regexp # defaults following lines to non-rooted regexps
1452 1449 syntax: glob # defaults following lines to non-rooted globs
1453 1450 re:pattern # non-rooted regular expression
1454 1451 glob:pattern # non-rooted glob
1455 1452 rootglob:pat # rooted glob (same root as ^ in regexps)
1456 1453 pattern # pattern of the current default type
1457 1454
1458 1455 if sourceinfo is set, returns a list of tuples:
1459 1456 (pattern, lineno, originalline).
1460 1457 This is useful to debug ignore patterns.
1461 1458 '''
1462 1459
1463 if rustext is not None:
1464 result, warnings = rustext.filepatterns.read_pattern_file(
1460 if rustmod is not None:
1461 result, warnings = rustmod.read_pattern_file(
1465 1462 filepath,
1466 1463 bool(warn),
1467 1464 sourceinfo,
1468 1465 )
1469 1466
1470 1467 for warning_params in warnings:
1471 1468 # Can't be easily emitted from Rust, because it would require
1472 1469 # a mechanism for both gettext and calling the `warn` function.
1473 1470 warn(_("%s: ignoring invalid syntax '%s'\n") % warning_params)
1474 1471
1475 1472 return result
1476 1473
1477 1474 syntaxes = {
1478 1475 're': 'relre:',
1479 1476 'regexp': 'relre:',
1480 1477 'glob': 'relglob:',
1481 1478 'rootglob': 'rootglob:',
1482 1479 'include': 'include',
1483 1480 'subinclude': 'subinclude',
1484 1481 }
1485 1482 syntax = 'relre:'
1486 1483 patterns = []
1487 1484
1488 1485 fp = open(filepath, 'rb')
1489 1486 for lineno, line in enumerate(util.iterfile(fp), start=1):
1490 1487 if "#" in line:
1491 1488 global _commentre
1492 1489 if not _commentre:
1493 1490 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1494 1491 # remove comments prefixed by an even number of escapes
1495 1492 m = _commentre.search(line)
1496 1493 if m:
1497 1494 line = line[:m.end(1)]
1498 1495 # fixup properly escaped comments that survived the above
1499 1496 line = line.replace("\\#", "#")
1500 1497 line = line.rstrip()
1501 1498 if not line:
1502 1499 continue
1503 1500
1504 1501 if line.startswith('syntax:'):
1505 1502 s = line[7:].strip()
1506 1503 try:
1507 1504 syntax = syntaxes[s]
1508 1505 except KeyError:
1509 1506 if warn:
1510 1507 warn(_("%s: ignoring invalid syntax '%s'\n") %
1511 1508 (filepath, s))
1512 1509 continue
1513 1510
1514 1511 linesyntax = syntax
1515 1512 for s, rels in syntaxes.iteritems():
1516 1513 if line.startswith(rels):
1517 1514 linesyntax = rels
1518 1515 line = line[len(rels):]
1519 1516 break
1520 1517 elif line.startswith(s+':'):
1521 1518 linesyntax = rels
1522 1519 line = line[len(s) + 1:]
1523 1520 break
1524 1521 if sourceinfo:
1525 1522 patterns.append((linesyntax + line, lineno, line))
1526 1523 else:
1527 1524 patterns.append(linesyntax + line)
1528 1525 fp.close()
1529 1526 return patterns
@@ -1,2687 +1,2684
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import os
20 20 import struct
21 21 import zlib
22 22
23 23 # import stuff from node for others to import from revlog
24 24 from .node import (
25 25 bin,
26 26 hex,
27 27 nullhex,
28 28 nullid,
29 29 nullrev,
30 30 short,
31 31 wdirfilenodeids,
32 32 wdirhex,
33 33 wdirid,
34 34 wdirrev,
35 35 )
36 36 from .i18n import _
37 37 from .revlogutils.constants import (
38 38 FLAG_GENERALDELTA,
39 39 FLAG_INLINE_DATA,
40 40 REVIDX_DEFAULT_FLAGS,
41 41 REVIDX_ELLIPSIS,
42 42 REVIDX_EXTSTORED,
43 43 REVIDX_FLAGS_ORDER,
44 44 REVIDX_ISCENSORED,
45 45 REVIDX_KNOWN_FLAGS,
46 46 REVIDX_RAWTEXT_CHANGING_FLAGS,
47 47 REVLOGV0,
48 48 REVLOGV1,
49 49 REVLOGV1_FLAGS,
50 50 REVLOGV2,
51 51 REVLOGV2_FLAGS,
52 52 REVLOG_DEFAULT_FLAGS,
53 53 REVLOG_DEFAULT_FORMAT,
54 54 REVLOG_DEFAULT_VERSION,
55 55 )
56 56 from .thirdparty import (
57 57 attr,
58 58 )
59 59 from . import (
60 60 ancestor,
61 61 dagop,
62 62 error,
63 63 mdiff,
64 64 policy,
65 65 pycompat,
66 66 repository,
67 67 templatefilters,
68 68 util,
69 69 )
70 70 from .revlogutils import (
71 71 deltas as deltautil,
72 72 )
73 73 from .utils import (
74 74 interfaceutil,
75 75 storageutil,
76 76 stringutil,
77 77 )
78 78
79 79 # blanked usage of all the name to prevent pyflakes constraints
80 80 # We need these name available in the module for extensions.
81 81 REVLOGV0
82 82 REVLOGV1
83 83 REVLOGV2
84 84 FLAG_INLINE_DATA
85 85 FLAG_GENERALDELTA
86 86 REVLOG_DEFAULT_FLAGS
87 87 REVLOG_DEFAULT_FORMAT
88 88 REVLOG_DEFAULT_VERSION
89 89 REVLOGV1_FLAGS
90 90 REVLOGV2_FLAGS
91 91 REVIDX_ISCENSORED
92 92 REVIDX_ELLIPSIS
93 93 REVIDX_EXTSTORED
94 94 REVIDX_DEFAULT_FLAGS
95 95 REVIDX_FLAGS_ORDER
96 96 REVIDX_KNOWN_FLAGS
97 97 REVIDX_RAWTEXT_CHANGING_FLAGS
98 98
99 99 parsers = policy.importmod(r'parsers')
100 try:
101 from . import rustext
102 rustext.__name__ # force actual import (see hgdemandimport)
103 except ImportError:
104 rustext = None
100 rustancestor = policy.importrust(r'ancestor')
101 rustdagop = policy.importrust(r'dagop')
105 102
106 103 # Aliased for performance.
107 104 _zlibdecompress = zlib.decompress
108 105
109 106 # max size of revlog with inline data
110 107 _maxinline = 131072
111 108 _chunksize = 1048576
112 109
113 110 # Store flag processors (cf. 'addflagprocessor()' to register)
114 111 _flagprocessors = {
115 112 REVIDX_ISCENSORED: None,
116 113 }
117 114
118 115 # Flag processors for REVIDX_ELLIPSIS.
119 116 def ellipsisreadprocessor(rl, text):
120 117 return text, False
121 118
122 119 def ellipsiswriteprocessor(rl, text):
123 120 return text, False
124 121
125 122 def ellipsisrawprocessor(rl, text):
126 123 return False
127 124
128 125 ellipsisprocessor = (
129 126 ellipsisreadprocessor,
130 127 ellipsiswriteprocessor,
131 128 ellipsisrawprocessor,
132 129 )
133 130
134 131 def addflagprocessor(flag, processor):
135 132 """Register a flag processor on a revision data flag.
136 133
137 134 Invariant:
138 135 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
139 136 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
140 137 - Only one flag processor can be registered on a specific flag.
141 138 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
142 139 following signatures:
143 140 - (read) f(self, rawtext) -> text, bool
144 141 - (write) f(self, text) -> rawtext, bool
145 142 - (raw) f(self, rawtext) -> bool
146 143 "text" is presented to the user. "rawtext" is stored in revlog data, not
147 144 directly visible to the user.
148 145 The boolean returned by these transforms is used to determine whether
149 146 the returned text can be used for hash integrity checking. For example,
150 147 if "write" returns False, then "text" is used to generate hash. If
151 148 "write" returns True, that basically means "rawtext" returned by "write"
152 149 should be used to generate hash. Usually, "write" and "read" return
153 150 different booleans. And "raw" returns a same boolean as "write".
154 151
155 152 Note: The 'raw' transform is used for changegroup generation and in some
156 153 debug commands. In this case the transform only indicates whether the
157 154 contents can be used for hash integrity checks.
158 155 """
159 156 _insertflagprocessor(flag, processor, _flagprocessors)
160 157
161 158 def _insertflagprocessor(flag, processor, flagprocessors):
162 159 if not flag & REVIDX_KNOWN_FLAGS:
163 160 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
164 161 raise error.ProgrammingError(msg)
165 162 if flag not in REVIDX_FLAGS_ORDER:
166 163 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
167 164 raise error.ProgrammingError(msg)
168 165 if flag in flagprocessors:
169 166 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
170 167 raise error.Abort(msg)
171 168 flagprocessors[flag] = processor
172 169
173 170 def getoffset(q):
174 171 return int(q >> 16)
175 172
176 173 def gettype(q):
177 174 return int(q & 0xFFFF)
178 175
179 176 def offset_type(offset, type):
180 177 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
181 178 raise ValueError('unknown revlog index flags')
182 179 return int(int(offset) << 16 | type)
183 180
184 181 @attr.s(slots=True, frozen=True)
185 182 class _revisioninfo(object):
186 183 """Information about a revision that allows building its fulltext
187 184 node: expected hash of the revision
188 185 p1, p2: parent revs of the revision
189 186 btext: built text cache consisting of a one-element list
190 187 cachedelta: (baserev, uncompressed_delta) or None
191 188 flags: flags associated to the revision storage
192 189
193 190 One of btext[0] or cachedelta must be set.
194 191 """
195 192 node = attr.ib()
196 193 p1 = attr.ib()
197 194 p2 = attr.ib()
198 195 btext = attr.ib()
199 196 textlen = attr.ib()
200 197 cachedelta = attr.ib()
201 198 flags = attr.ib()
202 199
203 200 @interfaceutil.implementer(repository.irevisiondelta)
204 201 @attr.s(slots=True)
205 202 class revlogrevisiondelta(object):
206 203 node = attr.ib()
207 204 p1node = attr.ib()
208 205 p2node = attr.ib()
209 206 basenode = attr.ib()
210 207 flags = attr.ib()
211 208 baserevisionsize = attr.ib()
212 209 revision = attr.ib()
213 210 delta = attr.ib()
214 211 linknode = attr.ib(default=None)
215 212
216 213 @interfaceutil.implementer(repository.iverifyproblem)
217 214 @attr.s(frozen=True)
218 215 class revlogproblem(object):
219 216 warning = attr.ib(default=None)
220 217 error = attr.ib(default=None)
221 218 node = attr.ib(default=None)
222 219
223 220 # index v0:
224 221 # 4 bytes: offset
225 222 # 4 bytes: compressed length
226 223 # 4 bytes: base rev
227 224 # 4 bytes: link rev
228 225 # 20 bytes: parent 1 nodeid
229 226 # 20 bytes: parent 2 nodeid
230 227 # 20 bytes: nodeid
231 228 indexformatv0 = struct.Struct(">4l20s20s20s")
232 229 indexformatv0_pack = indexformatv0.pack
233 230 indexformatv0_unpack = indexformatv0.unpack
234 231
235 232 class revlogoldindex(list):
236 233 def __getitem__(self, i):
237 234 if i == -1:
238 235 return (0, 0, 0, -1, -1, -1, -1, nullid)
239 236 return list.__getitem__(self, i)
240 237
241 238 class revlogoldio(object):
242 239 def __init__(self):
243 240 self.size = indexformatv0.size
244 241
245 242 def parseindex(self, data, inline):
246 243 s = self.size
247 244 index = []
248 245 nodemap = {nullid: nullrev}
249 246 n = off = 0
250 247 l = len(data)
251 248 while off + s <= l:
252 249 cur = data[off:off + s]
253 250 off += s
254 251 e = indexformatv0_unpack(cur)
255 252 # transform to revlogv1 format
256 253 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
257 254 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
258 255 index.append(e2)
259 256 nodemap[e[6]] = n
260 257 n += 1
261 258
262 259 return revlogoldindex(index), nodemap, None
263 260
264 261 def packentry(self, entry, node, version, rev):
265 262 if gettype(entry[0]):
266 263 raise error.RevlogError(_('index entry flags need revlog '
267 264 'version 1'))
268 265 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
269 266 node(entry[5]), node(entry[6]), entry[7])
270 267 return indexformatv0_pack(*e2)
271 268
272 269 # index ng:
273 270 # 6 bytes: offset
274 271 # 2 bytes: flags
275 272 # 4 bytes: compressed length
276 273 # 4 bytes: uncompressed length
277 274 # 4 bytes: base rev
278 275 # 4 bytes: link rev
279 276 # 4 bytes: parent 1 rev
280 277 # 4 bytes: parent 2 rev
281 278 # 32 bytes: nodeid
282 279 indexformatng = struct.Struct(">Qiiiiii20s12x")
283 280 indexformatng_pack = indexformatng.pack
284 281 versionformat = struct.Struct(">I")
285 282 versionformat_pack = versionformat.pack
286 283 versionformat_unpack = versionformat.unpack
287 284
288 285 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
289 286 # signed integer)
290 287 _maxentrysize = 0x7fffffff
291 288
292 289 class revlogio(object):
293 290 def __init__(self):
294 291 self.size = indexformatng.size
295 292
296 293 def parseindex(self, data, inline):
297 294 # call the C implementation to parse the index data
298 295 index, cache = parsers.parse_index2(data, inline)
299 296 return index, getattr(index, 'nodemap', None), cache
300 297
301 298 def packentry(self, entry, node, version, rev):
302 299 p = indexformatng_pack(*entry)
303 300 if rev == 0:
304 301 p = versionformat_pack(version) + p[4:]
305 302 return p
306 303
307 304 class revlog(object):
308 305 """
309 306 the underlying revision storage object
310 307
311 308 A revlog consists of two parts, an index and the revision data.
312 309
313 310 The index is a file with a fixed record size containing
314 311 information on each revision, including its nodeid (hash), the
315 312 nodeids of its parents, the position and offset of its data within
316 313 the data file, and the revision it's based on. Finally, each entry
317 314 contains a linkrev entry that can serve as a pointer to external
318 315 data.
319 316
320 317 The revision data itself is a linear collection of data chunks.
321 318 Each chunk represents a revision and is usually represented as a
322 319 delta against the previous chunk. To bound lookup time, runs of
323 320 deltas are limited to about 2 times the length of the original
324 321 version data. This makes retrieval of a version proportional to
325 322 its size, or O(1) relative to the number of revisions.
326 323
327 324 Both pieces of the revlog are written to in an append-only
328 325 fashion, which means we never need to rewrite a file to insert or
329 326 remove data, and can use some simple techniques to avoid the need
330 327 for locking while reading.
331 328
332 329 If checkambig, indexfile is opened with checkambig=True at
333 330 writing, to avoid file stat ambiguity.
334 331
335 332 If mmaplargeindex is True, and an mmapindexthreshold is set, the
336 333 index will be mmapped rather than read if it is larger than the
337 334 configured threshold.
338 335
339 336 If censorable is True, the revlog can have censored revisions.
340 337 """
341 338 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
342 339 mmaplargeindex=False, censorable=False):
343 340 """
344 341 create a revlog object
345 342
346 343 opener is a function that abstracts the file opening operation
347 344 and can be used to implement COW semantics or the like.
348 345 """
349 346 self.indexfile = indexfile
350 347 self.datafile = datafile or (indexfile[:-2] + ".d")
351 348 self.opener = opener
352 349 # When True, indexfile is opened with checkambig=True at writing, to
353 350 # avoid file stat ambiguity.
354 351 self._checkambig = checkambig
355 352 self._mmaplargeindex = mmaplargeindex
356 353 self._censorable = censorable
357 354 # 3-tuple of (node, rev, text) for a raw revision.
358 355 self._revisioncache = None
359 356 # Maps rev to chain base rev.
360 357 self._chainbasecache = util.lrucachedict(100)
361 358 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
362 359 self._chunkcache = (0, '')
363 360 # How much data to read and cache into the raw revlog data cache.
364 361 self._chunkcachesize = 65536
365 362 self._maxchainlen = None
366 363 self._deltabothparents = True
367 364 self.index = []
368 365 # Mapping of partial identifiers to full nodes.
369 366 self._pcache = {}
370 367 # Mapping of revision integer to full node.
371 368 self._nodecache = {nullid: nullrev}
372 369 self._nodepos = None
373 370 self._compengine = 'zlib'
374 371 self._compengineopts = {}
375 372 self._maxdeltachainspan = -1
376 373 self._withsparseread = False
377 374 self._sparserevlog = False
378 375 self._srdensitythreshold = 0.50
379 376 self._srmingapsize = 262144
380 377
381 378 # Make copy of flag processors so each revlog instance can support
382 379 # custom flags.
383 380 self._flagprocessors = dict(_flagprocessors)
384 381
385 382 # 2-tuple of file handles being used for active writing.
386 383 self._writinghandles = None
387 384
388 385 self._loadindex()
389 386
390 387 def _loadindex(self):
391 388 mmapindexthreshold = None
392 389 opts = getattr(self.opener, 'options', {}) or {}
393 390
394 391 if 'revlogv2' in opts:
395 392 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
396 393 elif 'revlogv1' in opts:
397 394 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
398 395 if 'generaldelta' in opts:
399 396 newversionflags |= FLAG_GENERALDELTA
400 397 elif getattr(self.opener, 'options', None) is not None:
401 398 # If options provided but no 'revlog*' found, the repository
402 399 # would have no 'requires' file in it, which means we have to
403 400 # stick to the old format.
404 401 newversionflags = REVLOGV0
405 402 else:
406 403 newversionflags = REVLOG_DEFAULT_VERSION
407 404
408 405 if 'chunkcachesize' in opts:
409 406 self._chunkcachesize = opts['chunkcachesize']
410 407 if 'maxchainlen' in opts:
411 408 self._maxchainlen = opts['maxchainlen']
412 409 if 'deltabothparents' in opts:
413 410 self._deltabothparents = opts['deltabothparents']
414 411 self._lazydelta = bool(opts.get('lazydelta', True))
415 412 self._lazydeltabase = False
416 413 if self._lazydelta:
417 414 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
418 415 if 'compengine' in opts:
419 416 self._compengine = opts['compengine']
420 417 if 'zlib.level' in opts:
421 418 self._compengineopts['zlib.level'] = opts['zlib.level']
422 419 if 'zstd.level' in opts:
423 420 self._compengineopts['zstd.level'] = opts['zstd.level']
424 421 if 'maxdeltachainspan' in opts:
425 422 self._maxdeltachainspan = opts['maxdeltachainspan']
426 423 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
427 424 mmapindexthreshold = opts['mmapindexthreshold']
428 425 self._sparserevlog = bool(opts.get('sparse-revlog', False))
429 426 withsparseread = bool(opts.get('with-sparse-read', False))
430 427 # sparse-revlog forces sparse-read
431 428 self._withsparseread = self._sparserevlog or withsparseread
432 429 if 'sparse-read-density-threshold' in opts:
433 430 self._srdensitythreshold = opts['sparse-read-density-threshold']
434 431 if 'sparse-read-min-gap-size' in opts:
435 432 self._srmingapsize = opts['sparse-read-min-gap-size']
436 433 if opts.get('enableellipsis'):
437 434 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
438 435
439 436 # revlog v0 doesn't have flag processors
440 437 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
441 438 _insertflagprocessor(flag, processor, self._flagprocessors)
442 439
443 440 if self._chunkcachesize <= 0:
444 441 raise error.RevlogError(_('revlog chunk cache size %r is not '
445 442 'greater than 0') % self._chunkcachesize)
446 443 elif self._chunkcachesize & (self._chunkcachesize - 1):
447 444 raise error.RevlogError(_('revlog chunk cache size %r is not a '
448 445 'power of 2') % self._chunkcachesize)
449 446
450 447 indexdata = ''
451 448 self._initempty = True
452 449 try:
453 450 with self._indexfp() as f:
454 451 if (mmapindexthreshold is not None and
455 452 self.opener.fstat(f).st_size >= mmapindexthreshold):
456 453 # TODO: should .close() to release resources without
457 454 # relying on Python GC
458 455 indexdata = util.buffer(util.mmapread(f))
459 456 else:
460 457 indexdata = f.read()
461 458 if len(indexdata) > 0:
462 459 versionflags = versionformat_unpack(indexdata[:4])[0]
463 460 self._initempty = False
464 461 else:
465 462 versionflags = newversionflags
466 463 except IOError as inst:
467 464 if inst.errno != errno.ENOENT:
468 465 raise
469 466
470 467 versionflags = newversionflags
471 468
472 469 self.version = versionflags
473 470
474 471 flags = versionflags & ~0xFFFF
475 472 fmt = versionflags & 0xFFFF
476 473
477 474 if fmt == REVLOGV0:
478 475 if flags:
479 476 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
480 477 'revlog %s') %
481 478 (flags >> 16, fmt, self.indexfile))
482 479
483 480 self._inline = False
484 481 self._generaldelta = False
485 482
486 483 elif fmt == REVLOGV1:
487 484 if flags & ~REVLOGV1_FLAGS:
488 485 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
489 486 'revlog %s') %
490 487 (flags >> 16, fmt, self.indexfile))
491 488
492 489 self._inline = versionflags & FLAG_INLINE_DATA
493 490 self._generaldelta = versionflags & FLAG_GENERALDELTA
494 491
495 492 elif fmt == REVLOGV2:
496 493 if flags & ~REVLOGV2_FLAGS:
497 494 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
498 495 'revlog %s') %
499 496 (flags >> 16, fmt, self.indexfile))
500 497
501 498 self._inline = versionflags & FLAG_INLINE_DATA
502 499 # generaldelta implied by version 2 revlogs.
503 500 self._generaldelta = True
504 501
505 502 else:
506 503 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
507 504 (fmt, self.indexfile))
508 505 # sparse-revlog can't be on without general-delta (issue6056)
509 506 if not self._generaldelta:
510 507 self._sparserevlog = False
511 508
512 509 self._storedeltachains = True
513 510
514 511 self._io = revlogio()
515 512 if self.version == REVLOGV0:
516 513 self._io = revlogoldio()
517 514 try:
518 515 d = self._io.parseindex(indexdata, self._inline)
519 516 except (ValueError, IndexError):
520 517 raise error.RevlogError(_("index %s is corrupted") %
521 518 self.indexfile)
522 519 self.index, nodemap, self._chunkcache = d
523 520 if nodemap is not None:
524 521 self.nodemap = self._nodecache = nodemap
525 522 if not self._chunkcache:
526 523 self._chunkclear()
527 524 # revnum -> (chain-length, sum-delta-length)
528 525 self._chaininfocache = {}
529 526 # revlog header -> revlog compressor
530 527 self._decompressors = {}
531 528
532 529 @util.propertycache
533 530 def _compressor(self):
534 531 engine = util.compengines[self._compengine]
535 532 return engine.revlogcompressor(self._compengineopts)
536 533
537 534 def _indexfp(self, mode='r'):
538 535 """file object for the revlog's index file"""
539 536 args = {r'mode': mode}
540 537 if mode != 'r':
541 538 args[r'checkambig'] = self._checkambig
542 539 if mode == 'w':
543 540 args[r'atomictemp'] = True
544 541 return self.opener(self.indexfile, **args)
545 542
546 543 def _datafp(self, mode='r'):
547 544 """file object for the revlog's data file"""
548 545 return self.opener(self.datafile, mode=mode)
549 546
550 547 @contextlib.contextmanager
551 548 def _datareadfp(self, existingfp=None):
552 549 """file object suitable to read data"""
553 550 # Use explicit file handle, if given.
554 551 if existingfp is not None:
555 552 yield existingfp
556 553
557 554 # Use a file handle being actively used for writes, if available.
558 555 # There is some danger to doing this because reads will seek the
559 556 # file. However, _writeentry() performs a SEEK_END before all writes,
560 557 # so we should be safe.
561 558 elif self._writinghandles:
562 559 if self._inline:
563 560 yield self._writinghandles[0]
564 561 else:
565 562 yield self._writinghandles[1]
566 563
567 564 # Otherwise open a new file handle.
568 565 else:
569 566 if self._inline:
570 567 func = self._indexfp
571 568 else:
572 569 func = self._datafp
573 570 with func() as fp:
574 571 yield fp
575 572
576 573 def tip(self):
577 574 return self.node(len(self.index) - 1)
578 575 def __contains__(self, rev):
579 576 return 0 <= rev < len(self)
580 577 def __len__(self):
581 578 return len(self.index)
582 579 def __iter__(self):
583 580 return iter(pycompat.xrange(len(self)))
584 581 def revs(self, start=0, stop=None):
585 582 """iterate over all rev in this revlog (from start to stop)"""
586 583 return storageutil.iterrevs(len(self), start=start, stop=stop)
587 584
588 585 @util.propertycache
589 586 def nodemap(self):
590 587 if self.index:
591 588 # populate mapping down to the initial node
592 589 node0 = self.index[0][7] # get around changelog filtering
593 590 self.rev(node0)
594 591 return self._nodecache
595 592
596 593 def hasnode(self, node):
597 594 try:
598 595 self.rev(node)
599 596 return True
600 597 except KeyError:
601 598 return False
602 599
603 600 def candelta(self, baserev, rev):
604 601 """whether two revisions (baserev, rev) can be delta-ed or not"""
605 602 # Disable delta if either rev requires a content-changing flag
606 603 # processor (ex. LFS). This is because such flag processor can alter
607 604 # the rawtext content that the delta will be based on, and two clients
608 605 # could have a same revlog node with different flags (i.e. different
609 606 # rawtext contents) and the delta could be incompatible.
610 607 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
611 608 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
612 609 return False
613 610 return True
614 611
615 612 def clearcaches(self):
616 613 self._revisioncache = None
617 614 self._chainbasecache.clear()
618 615 self._chunkcache = (0, '')
619 616 self._pcache = {}
620 617
621 618 try:
622 619 # If we are using the native C version, you are in a fun case
623 620 # where self.index, self.nodemap and self._nodecaches is the same
624 621 # object.
625 622 self._nodecache.clearcaches()
626 623 except AttributeError:
627 624 self._nodecache = {nullid: nullrev}
628 625 self._nodepos = None
629 626
630 627 def rev(self, node):
631 628 try:
632 629 return self._nodecache[node]
633 630 except TypeError:
634 631 raise
635 632 except error.RevlogError:
636 633 # parsers.c radix tree lookup failed
637 634 if node == wdirid or node in wdirfilenodeids:
638 635 raise error.WdirUnsupported
639 636 raise error.LookupError(node, self.indexfile, _('no node'))
640 637 except KeyError:
641 638 # pure python cache lookup failed
642 639 n = self._nodecache
643 640 i = self.index
644 641 p = self._nodepos
645 642 if p is None:
646 643 p = len(i) - 1
647 644 else:
648 645 assert p < len(i)
649 646 for r in pycompat.xrange(p, -1, -1):
650 647 v = i[r][7]
651 648 n[v] = r
652 649 if v == node:
653 650 self._nodepos = r - 1
654 651 return r
655 652 if node == wdirid or node in wdirfilenodeids:
656 653 raise error.WdirUnsupported
657 654 raise error.LookupError(node, self.indexfile, _('no node'))
658 655
659 656 # Accessors for index entries.
660 657
661 658 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
662 659 # are flags.
663 660 def start(self, rev):
664 661 return int(self.index[rev][0] >> 16)
665 662
666 663 def flags(self, rev):
667 664 return self.index[rev][0] & 0xFFFF
668 665
669 666 def length(self, rev):
670 667 return self.index[rev][1]
671 668
672 669 def rawsize(self, rev):
673 670 """return the length of the uncompressed text for a given revision"""
674 671 l = self.index[rev][2]
675 672 if l >= 0:
676 673 return l
677 674
678 675 t = self.revision(rev, raw=True)
679 676 return len(t)
680 677
681 678 def size(self, rev):
682 679 """length of non-raw text (processed by a "read" flag processor)"""
683 680 # fast path: if no "read" flag processor could change the content,
684 681 # size is rawsize. note: ELLIPSIS is known to not change the content.
685 682 flags = self.flags(rev)
686 683 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
687 684 return self.rawsize(rev)
688 685
689 686 return len(self.revision(rev, raw=False))
690 687
691 688 def chainbase(self, rev):
692 689 base = self._chainbasecache.get(rev)
693 690 if base is not None:
694 691 return base
695 692
696 693 index = self.index
697 694 iterrev = rev
698 695 base = index[iterrev][3]
699 696 while base != iterrev:
700 697 iterrev = base
701 698 base = index[iterrev][3]
702 699
703 700 self._chainbasecache[rev] = base
704 701 return base
705 702
706 703 def linkrev(self, rev):
707 704 return self.index[rev][4]
708 705
709 706 def parentrevs(self, rev):
710 707 try:
711 708 entry = self.index[rev]
712 709 except IndexError:
713 710 if rev == wdirrev:
714 711 raise error.WdirUnsupported
715 712 raise
716 713
717 714 return entry[5], entry[6]
718 715
719 716 # fast parentrevs(rev) where rev isn't filtered
720 717 _uncheckedparentrevs = parentrevs
721 718
722 719 def node(self, rev):
723 720 try:
724 721 return self.index[rev][7]
725 722 except IndexError:
726 723 if rev == wdirrev:
727 724 raise error.WdirUnsupported
728 725 raise
729 726
730 727 # Derived from index values.
731 728
732 729 def end(self, rev):
733 730 return self.start(rev) + self.length(rev)
734 731
735 732 def parents(self, node):
736 733 i = self.index
737 734 d = i[self.rev(node)]
738 735 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
739 736
740 737 def chainlen(self, rev):
741 738 return self._chaininfo(rev)[0]
742 739
743 740 def _chaininfo(self, rev):
744 741 chaininfocache = self._chaininfocache
745 742 if rev in chaininfocache:
746 743 return chaininfocache[rev]
747 744 index = self.index
748 745 generaldelta = self._generaldelta
749 746 iterrev = rev
750 747 e = index[iterrev]
751 748 clen = 0
752 749 compresseddeltalen = 0
753 750 while iterrev != e[3]:
754 751 clen += 1
755 752 compresseddeltalen += e[1]
756 753 if generaldelta:
757 754 iterrev = e[3]
758 755 else:
759 756 iterrev -= 1
760 757 if iterrev in chaininfocache:
761 758 t = chaininfocache[iterrev]
762 759 clen += t[0]
763 760 compresseddeltalen += t[1]
764 761 break
765 762 e = index[iterrev]
766 763 else:
767 764 # Add text length of base since decompressing that also takes
768 765 # work. For cache hits the length is already included.
769 766 compresseddeltalen += e[1]
770 767 r = (clen, compresseddeltalen)
771 768 chaininfocache[rev] = r
772 769 return r
773 770
774 771 def _deltachain(self, rev, stoprev=None):
775 772 """Obtain the delta chain for a revision.
776 773
777 774 ``stoprev`` specifies a revision to stop at. If not specified, we
778 775 stop at the base of the chain.
779 776
780 777 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
781 778 revs in ascending order and ``stopped`` is a bool indicating whether
782 779 ``stoprev`` was hit.
783 780 """
784 781 # Try C implementation.
785 782 try:
786 783 return self.index.deltachain(rev, stoprev, self._generaldelta)
787 784 except AttributeError:
788 785 pass
789 786
790 787 chain = []
791 788
792 789 # Alias to prevent attribute lookup in tight loop.
793 790 index = self.index
794 791 generaldelta = self._generaldelta
795 792
796 793 iterrev = rev
797 794 e = index[iterrev]
798 795 while iterrev != e[3] and iterrev != stoprev:
799 796 chain.append(iterrev)
800 797 if generaldelta:
801 798 iterrev = e[3]
802 799 else:
803 800 iterrev -= 1
804 801 e = index[iterrev]
805 802
806 803 if iterrev == stoprev:
807 804 stopped = True
808 805 else:
809 806 chain.append(iterrev)
810 807 stopped = False
811 808
812 809 chain.reverse()
813 810 return chain, stopped
814 811
815 812 def ancestors(self, revs, stoprev=0, inclusive=False):
816 813 """Generate the ancestors of 'revs' in reverse revision order.
817 814 Does not generate revs lower than stoprev.
818 815
819 816 See the documentation for ancestor.lazyancestors for more details."""
820 817
821 818 # first, make sure start revisions aren't filtered
822 819 revs = list(revs)
823 820 checkrev = self.node
824 821 for r in revs:
825 822 checkrev(r)
826 823 # and we're sure ancestors aren't filtered as well
827 824
828 if rustext is not None:
829 lazyancestors = rustext.ancestor.LazyAncestors
825 if rustancestor is not None:
826 lazyancestors = rustancestor.LazyAncestors
830 827 arg = self.index
831 828 elif util.safehasattr(parsers, 'rustlazyancestors'):
832 829 lazyancestors = ancestor.rustlazyancestors
833 830 arg = self.index
834 831 else:
835 832 lazyancestors = ancestor.lazyancestors
836 833 arg = self._uncheckedparentrevs
837 834 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
838 835
839 836 def descendants(self, revs):
840 837 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
841 838
842 839 def findcommonmissing(self, common=None, heads=None):
843 840 """Return a tuple of the ancestors of common and the ancestors of heads
844 841 that are not ancestors of common. In revset terminology, we return the
845 842 tuple:
846 843
847 844 ::common, (::heads) - (::common)
848 845
849 846 The list is sorted by revision number, meaning it is
850 847 topologically sorted.
851 848
852 849 'heads' and 'common' are both lists of node IDs. If heads is
853 850 not supplied, uses all of the revlog's heads. If common is not
854 851 supplied, uses nullid."""
855 852 if common is None:
856 853 common = [nullid]
857 854 if heads is None:
858 855 heads = self.heads()
859 856
860 857 common = [self.rev(n) for n in common]
861 858 heads = [self.rev(n) for n in heads]
862 859
863 860 # we want the ancestors, but inclusive
864 861 class lazyset(object):
865 862 def __init__(self, lazyvalues):
866 863 self.addedvalues = set()
867 864 self.lazyvalues = lazyvalues
868 865
869 866 def __contains__(self, value):
870 867 return value in self.addedvalues or value in self.lazyvalues
871 868
872 869 def __iter__(self):
873 870 added = self.addedvalues
874 871 for r in added:
875 872 yield r
876 873 for r in self.lazyvalues:
877 874 if not r in added:
878 875 yield r
879 876
880 877 def add(self, value):
881 878 self.addedvalues.add(value)
882 879
883 880 def update(self, values):
884 881 self.addedvalues.update(values)
885 882
886 883 has = lazyset(self.ancestors(common))
887 884 has.add(nullrev)
888 885 has.update(common)
889 886
890 887 # take all ancestors from heads that aren't in has
891 888 missing = set()
892 889 visit = collections.deque(r for r in heads if r not in has)
893 890 while visit:
894 891 r = visit.popleft()
895 892 if r in missing:
896 893 continue
897 894 else:
898 895 missing.add(r)
899 896 for p in self.parentrevs(r):
900 897 if p not in has:
901 898 visit.append(p)
902 899 missing = list(missing)
903 900 missing.sort()
904 901 return has, [self.node(miss) for miss in missing]
905 902
906 903 def incrementalmissingrevs(self, common=None):
907 904 """Return an object that can be used to incrementally compute the
908 905 revision numbers of the ancestors of arbitrary sets that are not
909 906 ancestors of common. This is an ancestor.incrementalmissingancestors
910 907 object.
911 908
912 909 'common' is a list of revision numbers. If common is not supplied, uses
913 910 nullrev.
914 911 """
915 912 if common is None:
916 913 common = [nullrev]
917 914
918 if rustext is not None:
919 return rustext.ancestor.MissingAncestors(self.index, common)
915 if rustancestor is not None:
916 return rustancestor.MissingAncestors(self.index, common)
920 917 return ancestor.incrementalmissingancestors(self.parentrevs, common)
921 918
922 919 def findmissingrevs(self, common=None, heads=None):
923 920 """Return the revision numbers of the ancestors of heads that
924 921 are not ancestors of common.
925 922
926 923 More specifically, return a list of revision numbers corresponding to
927 924 nodes N such that every N satisfies the following constraints:
928 925
929 926 1. N is an ancestor of some node in 'heads'
930 927 2. N is not an ancestor of any node in 'common'
931 928
932 929 The list is sorted by revision number, meaning it is
933 930 topologically sorted.
934 931
935 932 'heads' and 'common' are both lists of revision numbers. If heads is
936 933 not supplied, uses all of the revlog's heads. If common is not
937 934 supplied, uses nullid."""
938 935 if common is None:
939 936 common = [nullrev]
940 937 if heads is None:
941 938 heads = self.headrevs()
942 939
943 940 inc = self.incrementalmissingrevs(common=common)
944 941 return inc.missingancestors(heads)
945 942
946 943 def findmissing(self, common=None, heads=None):
947 944 """Return the ancestors of heads that are not ancestors of common.
948 945
949 946 More specifically, return a list of nodes N such that every N
950 947 satisfies the following constraints:
951 948
952 949 1. N is an ancestor of some node in 'heads'
953 950 2. N is not an ancestor of any node in 'common'
954 951
955 952 The list is sorted by revision number, meaning it is
956 953 topologically sorted.
957 954
958 955 'heads' and 'common' are both lists of node IDs. If heads is
959 956 not supplied, uses all of the revlog's heads. If common is not
960 957 supplied, uses nullid."""
961 958 if common is None:
962 959 common = [nullid]
963 960 if heads is None:
964 961 heads = self.heads()
965 962
966 963 common = [self.rev(n) for n in common]
967 964 heads = [self.rev(n) for n in heads]
968 965
969 966 inc = self.incrementalmissingrevs(common=common)
970 967 return [self.node(r) for r in inc.missingancestors(heads)]
971 968
972 969 def nodesbetween(self, roots=None, heads=None):
973 970 """Return a topological path from 'roots' to 'heads'.
974 971
975 972 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
976 973 topologically sorted list of all nodes N that satisfy both of
977 974 these constraints:
978 975
979 976 1. N is a descendant of some node in 'roots'
980 977 2. N is an ancestor of some node in 'heads'
981 978
982 979 Every node is considered to be both a descendant and an ancestor
983 980 of itself, so every reachable node in 'roots' and 'heads' will be
984 981 included in 'nodes'.
985 982
986 983 'outroots' is the list of reachable nodes in 'roots', i.e., the
987 984 subset of 'roots' that is returned in 'nodes'. Likewise,
988 985 'outheads' is the subset of 'heads' that is also in 'nodes'.
989 986
990 987 'roots' and 'heads' are both lists of node IDs. If 'roots' is
991 988 unspecified, uses nullid as the only root. If 'heads' is
992 989 unspecified, uses list of all of the revlog's heads."""
993 990 nonodes = ([], [], [])
994 991 if roots is not None:
995 992 roots = list(roots)
996 993 if not roots:
997 994 return nonodes
998 995 lowestrev = min([self.rev(n) for n in roots])
999 996 else:
1000 997 roots = [nullid] # Everybody's a descendant of nullid
1001 998 lowestrev = nullrev
1002 999 if (lowestrev == nullrev) and (heads is None):
1003 1000 # We want _all_ the nodes!
1004 1001 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1005 1002 if heads is None:
1006 1003 # All nodes are ancestors, so the latest ancestor is the last
1007 1004 # node.
1008 1005 highestrev = len(self) - 1
1009 1006 # Set ancestors to None to signal that every node is an ancestor.
1010 1007 ancestors = None
1011 1008 # Set heads to an empty dictionary for later discovery of heads
1012 1009 heads = {}
1013 1010 else:
1014 1011 heads = list(heads)
1015 1012 if not heads:
1016 1013 return nonodes
1017 1014 ancestors = set()
1018 1015 # Turn heads into a dictionary so we can remove 'fake' heads.
1019 1016 # Also, later we will be using it to filter out the heads we can't
1020 1017 # find from roots.
1021 1018 heads = dict.fromkeys(heads, False)
1022 1019 # Start at the top and keep marking parents until we're done.
1023 1020 nodestotag = set(heads)
1024 1021 # Remember where the top was so we can use it as a limit later.
1025 1022 highestrev = max([self.rev(n) for n in nodestotag])
1026 1023 while nodestotag:
1027 1024 # grab a node to tag
1028 1025 n = nodestotag.pop()
1029 1026 # Never tag nullid
1030 1027 if n == nullid:
1031 1028 continue
1032 1029 # A node's revision number represents its place in a
1033 1030 # topologically sorted list of nodes.
1034 1031 r = self.rev(n)
1035 1032 if r >= lowestrev:
1036 1033 if n not in ancestors:
1037 1034 # If we are possibly a descendant of one of the roots
1038 1035 # and we haven't already been marked as an ancestor
1039 1036 ancestors.add(n) # Mark as ancestor
1040 1037 # Add non-nullid parents to list of nodes to tag.
1041 1038 nodestotag.update([p for p in self.parents(n) if
1042 1039 p != nullid])
1043 1040 elif n in heads: # We've seen it before, is it a fake head?
1044 1041 # So it is, real heads should not be the ancestors of
1045 1042 # any other heads.
1046 1043 heads.pop(n)
1047 1044 if not ancestors:
1048 1045 return nonodes
1049 1046 # Now that we have our set of ancestors, we want to remove any
1050 1047 # roots that are not ancestors.
1051 1048
1052 1049 # If one of the roots was nullid, everything is included anyway.
1053 1050 if lowestrev > nullrev:
1054 1051 # But, since we weren't, let's recompute the lowest rev to not
1055 1052 # include roots that aren't ancestors.
1056 1053
1057 1054 # Filter out roots that aren't ancestors of heads
1058 1055 roots = [root for root in roots if root in ancestors]
1059 1056 # Recompute the lowest revision
1060 1057 if roots:
1061 1058 lowestrev = min([self.rev(root) for root in roots])
1062 1059 else:
1063 1060 # No more roots? Return empty list
1064 1061 return nonodes
1065 1062 else:
1066 1063 # We are descending from nullid, and don't need to care about
1067 1064 # any other roots.
1068 1065 lowestrev = nullrev
1069 1066 roots = [nullid]
1070 1067 # Transform our roots list into a set.
1071 1068 descendants = set(roots)
1072 1069 # Also, keep the original roots so we can filter out roots that aren't
1073 1070 # 'real' roots (i.e. are descended from other roots).
1074 1071 roots = descendants.copy()
1075 1072 # Our topologically sorted list of output nodes.
1076 1073 orderedout = []
1077 1074 # Don't start at nullid since we don't want nullid in our output list,
1078 1075 # and if nullid shows up in descendants, empty parents will look like
1079 1076 # they're descendants.
1080 1077 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1081 1078 n = self.node(r)
1082 1079 isdescendant = False
1083 1080 if lowestrev == nullrev: # Everybody is a descendant of nullid
1084 1081 isdescendant = True
1085 1082 elif n in descendants:
1086 1083 # n is already a descendant
1087 1084 isdescendant = True
1088 1085 # This check only needs to be done here because all the roots
1089 1086 # will start being marked is descendants before the loop.
1090 1087 if n in roots:
1091 1088 # If n was a root, check if it's a 'real' root.
1092 1089 p = tuple(self.parents(n))
1093 1090 # If any of its parents are descendants, it's not a root.
1094 1091 if (p[0] in descendants) or (p[1] in descendants):
1095 1092 roots.remove(n)
1096 1093 else:
1097 1094 p = tuple(self.parents(n))
1098 1095 # A node is a descendant if either of its parents are
1099 1096 # descendants. (We seeded the dependents list with the roots
1100 1097 # up there, remember?)
1101 1098 if (p[0] in descendants) or (p[1] in descendants):
1102 1099 descendants.add(n)
1103 1100 isdescendant = True
1104 1101 if isdescendant and ((ancestors is None) or (n in ancestors)):
1105 1102 # Only include nodes that are both descendants and ancestors.
1106 1103 orderedout.append(n)
1107 1104 if (ancestors is not None) and (n in heads):
1108 1105 # We're trying to figure out which heads are reachable
1109 1106 # from roots.
1110 1107 # Mark this head as having been reached
1111 1108 heads[n] = True
1112 1109 elif ancestors is None:
1113 1110 # Otherwise, we're trying to discover the heads.
1114 1111 # Assume this is a head because if it isn't, the next step
1115 1112 # will eventually remove it.
1116 1113 heads[n] = True
1117 1114 # But, obviously its parents aren't.
1118 1115 for p in self.parents(n):
1119 1116 heads.pop(p, None)
1120 1117 heads = [head for head, flag in heads.iteritems() if flag]
1121 1118 roots = list(roots)
1122 1119 assert orderedout
1123 1120 assert roots
1124 1121 assert heads
1125 1122 return (orderedout, roots, heads)
1126 1123
1127 1124 def headrevs(self, revs=None):
1128 1125 if revs is None:
1129 1126 try:
1130 1127 return self.index.headrevs()
1131 1128 except AttributeError:
1132 1129 return self._headrevs()
1133 if rustext is not None:
1134 return rustext.dagop.headrevs(self.index, revs)
1130 if rustdagop is not None:
1131 return rustdagop.headrevs(self.index, revs)
1135 1132 return dagop.headrevs(revs, self._uncheckedparentrevs)
1136 1133
1137 1134 def computephases(self, roots):
1138 1135 return self.index.computephasesmapsets(roots)
1139 1136
1140 1137 def _headrevs(self):
1141 1138 count = len(self)
1142 1139 if not count:
1143 1140 return [nullrev]
1144 1141 # we won't iter over filtered rev so nobody is a head at start
1145 1142 ishead = [0] * (count + 1)
1146 1143 index = self.index
1147 1144 for r in self:
1148 1145 ishead[r] = 1 # I may be an head
1149 1146 e = index[r]
1150 1147 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1151 1148 return [r for r, val in enumerate(ishead) if val]
1152 1149
1153 1150 def heads(self, start=None, stop=None):
1154 1151 """return the list of all nodes that have no children
1155 1152
1156 1153 if start is specified, only heads that are descendants of
1157 1154 start will be returned
1158 1155 if stop is specified, it will consider all the revs from stop
1159 1156 as if they had no children
1160 1157 """
1161 1158 if start is None and stop is None:
1162 1159 if not len(self):
1163 1160 return [nullid]
1164 1161 return [self.node(r) for r in self.headrevs()]
1165 1162
1166 1163 if start is None:
1167 1164 start = nullrev
1168 1165 else:
1169 1166 start = self.rev(start)
1170 1167
1171 1168 stoprevs = set(self.rev(n) for n in stop or [])
1172 1169
1173 1170 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1174 1171 stoprevs=stoprevs)
1175 1172
1176 1173 return [self.node(rev) for rev in revs]
1177 1174
1178 1175 def children(self, node):
1179 1176 """find the children of a given node"""
1180 1177 c = []
1181 1178 p = self.rev(node)
1182 1179 for r in self.revs(start=p + 1):
1183 1180 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1184 1181 if prevs:
1185 1182 for pr in prevs:
1186 1183 if pr == p:
1187 1184 c.append(self.node(r))
1188 1185 elif p == nullrev:
1189 1186 c.append(self.node(r))
1190 1187 return c
1191 1188
1192 1189 def commonancestorsheads(self, a, b):
1193 1190 """calculate all the heads of the common ancestors of nodes a and b"""
1194 1191 a, b = self.rev(a), self.rev(b)
1195 1192 ancs = self._commonancestorsheads(a, b)
1196 1193 return pycompat.maplist(self.node, ancs)
1197 1194
1198 1195 def _commonancestorsheads(self, *revs):
1199 1196 """calculate all the heads of the common ancestors of revs"""
1200 1197 try:
1201 1198 ancs = self.index.commonancestorsheads(*revs)
1202 1199 except (AttributeError, OverflowError): # C implementation failed
1203 1200 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1204 1201 return ancs
1205 1202
1206 1203 def isancestor(self, a, b):
1207 1204 """return True if node a is an ancestor of node b
1208 1205
1209 1206 A revision is considered an ancestor of itself."""
1210 1207 a, b = self.rev(a), self.rev(b)
1211 1208 return self.isancestorrev(a, b)
1212 1209
1213 1210 def isancestorrev(self, a, b):
1214 1211 """return True if revision a is an ancestor of revision b
1215 1212
1216 1213 A revision is considered an ancestor of itself.
1217 1214
1218 1215 The implementation of this is trivial but the use of
1219 1216 reachableroots is not."""
1220 1217 if a == nullrev:
1221 1218 return True
1222 1219 elif a == b:
1223 1220 return True
1224 1221 elif a > b:
1225 1222 return False
1226 1223 return bool(self.reachableroots(a, [b], [a], includepath=False))
1227 1224
1228 1225 def reachableroots(self, minroot, heads, roots, includepath=False):
1229 1226 """return (heads(::<roots> and <roots>::<heads>))
1230 1227
1231 1228 If includepath is True, return (<roots>::<heads>)."""
1232 1229 try:
1233 1230 return self.index.reachableroots2(minroot, heads, roots,
1234 1231 includepath)
1235 1232 except AttributeError:
1236 1233 return dagop._reachablerootspure(self.parentrevs,
1237 1234 minroot, roots, heads, includepath)
1238 1235
1239 1236 def ancestor(self, a, b):
1240 1237 """calculate the "best" common ancestor of nodes a and b"""
1241 1238
1242 1239 a, b = self.rev(a), self.rev(b)
1243 1240 try:
1244 1241 ancs = self.index.ancestors(a, b)
1245 1242 except (AttributeError, OverflowError):
1246 1243 ancs = ancestor.ancestors(self.parentrevs, a, b)
1247 1244 if ancs:
1248 1245 # choose a consistent winner when there's a tie
1249 1246 return min(map(self.node, ancs))
1250 1247 return nullid
1251 1248
1252 1249 def _match(self, id):
1253 1250 if isinstance(id, int):
1254 1251 # rev
1255 1252 return self.node(id)
1256 1253 if len(id) == 20:
1257 1254 # possibly a binary node
1258 1255 # odds of a binary node being all hex in ASCII are 1 in 10**25
1259 1256 try:
1260 1257 node = id
1261 1258 self.rev(node) # quick search the index
1262 1259 return node
1263 1260 except error.LookupError:
1264 1261 pass # may be partial hex id
1265 1262 try:
1266 1263 # str(rev)
1267 1264 rev = int(id)
1268 1265 if "%d" % rev != id:
1269 1266 raise ValueError
1270 1267 if rev < 0:
1271 1268 rev = len(self) + rev
1272 1269 if rev < 0 or rev >= len(self):
1273 1270 raise ValueError
1274 1271 return self.node(rev)
1275 1272 except (ValueError, OverflowError):
1276 1273 pass
1277 1274 if len(id) == 40:
1278 1275 try:
1279 1276 # a full hex nodeid?
1280 1277 node = bin(id)
1281 1278 self.rev(node)
1282 1279 return node
1283 1280 except (TypeError, error.LookupError):
1284 1281 pass
1285 1282
1286 1283 def _partialmatch(self, id):
1287 1284 # we don't care wdirfilenodeids as they should be always full hash
1288 1285 maybewdir = wdirhex.startswith(id)
1289 1286 try:
1290 1287 partial = self.index.partialmatch(id)
1291 1288 if partial and self.hasnode(partial):
1292 1289 if maybewdir:
1293 1290 # single 'ff...' match in radix tree, ambiguous with wdir
1294 1291 raise error.RevlogError
1295 1292 return partial
1296 1293 if maybewdir:
1297 1294 # no 'ff...' match in radix tree, wdir identified
1298 1295 raise error.WdirUnsupported
1299 1296 return None
1300 1297 except error.RevlogError:
1301 1298 # parsers.c radix tree lookup gave multiple matches
1302 1299 # fast path: for unfiltered changelog, radix tree is accurate
1303 1300 if not getattr(self, 'filteredrevs', None):
1304 1301 raise error.AmbiguousPrefixLookupError(
1305 1302 id, self.indexfile, _('ambiguous identifier'))
1306 1303 # fall through to slow path that filters hidden revisions
1307 1304 except (AttributeError, ValueError):
1308 1305 # we are pure python, or key was too short to search radix tree
1309 1306 pass
1310 1307
1311 1308 if id in self._pcache:
1312 1309 return self._pcache[id]
1313 1310
1314 1311 if len(id) <= 40:
1315 1312 try:
1316 1313 # hex(node)[:...]
1317 1314 l = len(id) // 2 # grab an even number of digits
1318 1315 prefix = bin(id[:l * 2])
1319 1316 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1320 1317 nl = [n for n in nl if hex(n).startswith(id) and
1321 1318 self.hasnode(n)]
1322 1319 if nullhex.startswith(id):
1323 1320 nl.append(nullid)
1324 1321 if len(nl) > 0:
1325 1322 if len(nl) == 1 and not maybewdir:
1326 1323 self._pcache[id] = nl[0]
1327 1324 return nl[0]
1328 1325 raise error.AmbiguousPrefixLookupError(
1329 1326 id, self.indexfile, _('ambiguous identifier'))
1330 1327 if maybewdir:
1331 1328 raise error.WdirUnsupported
1332 1329 return None
1333 1330 except TypeError:
1334 1331 pass
1335 1332
1336 1333 def lookup(self, id):
1337 1334 """locate a node based on:
1338 1335 - revision number or str(revision number)
1339 1336 - nodeid or subset of hex nodeid
1340 1337 """
1341 1338 n = self._match(id)
1342 1339 if n is not None:
1343 1340 return n
1344 1341 n = self._partialmatch(id)
1345 1342 if n:
1346 1343 return n
1347 1344
1348 1345 raise error.LookupError(id, self.indexfile, _('no match found'))
1349 1346
1350 1347 def shortest(self, node, minlength=1):
1351 1348 """Find the shortest unambiguous prefix that matches node."""
1352 1349 def isvalid(prefix):
1353 1350 try:
1354 1351 node = self._partialmatch(prefix)
1355 1352 except error.AmbiguousPrefixLookupError:
1356 1353 return False
1357 1354 except error.WdirUnsupported:
1358 1355 # single 'ff...' match
1359 1356 return True
1360 1357 if node is None:
1361 1358 raise error.LookupError(node, self.indexfile, _('no node'))
1362 1359 return True
1363 1360
1364 1361 def maybewdir(prefix):
1365 1362 return all(c == 'f' for c in pycompat.iterbytestr(prefix))
1366 1363
1367 1364 hexnode = hex(node)
1368 1365
1369 1366 def disambiguate(hexnode, minlength):
1370 1367 """Disambiguate against wdirid."""
1371 1368 for length in range(minlength, 41):
1372 1369 prefix = hexnode[:length]
1373 1370 if not maybewdir(prefix):
1374 1371 return prefix
1375 1372
1376 1373 if not getattr(self, 'filteredrevs', None):
1377 1374 try:
1378 1375 length = max(self.index.shortest(node), minlength)
1379 1376 return disambiguate(hexnode, length)
1380 1377 except error.RevlogError:
1381 1378 if node != wdirid:
1382 1379 raise error.LookupError(node, self.indexfile, _('no node'))
1383 1380 except AttributeError:
1384 1381 # Fall through to pure code
1385 1382 pass
1386 1383
1387 1384 if node == wdirid:
1388 1385 for length in range(minlength, 41):
1389 1386 prefix = hexnode[:length]
1390 1387 if isvalid(prefix):
1391 1388 return prefix
1392 1389
1393 1390 for length in range(minlength, 41):
1394 1391 prefix = hexnode[:length]
1395 1392 if isvalid(prefix):
1396 1393 return disambiguate(hexnode, length)
1397 1394
1398 1395 def cmp(self, node, text):
1399 1396 """compare text with a given file revision
1400 1397
1401 1398 returns True if text is different than what is stored.
1402 1399 """
1403 1400 p1, p2 = self.parents(node)
1404 1401 return storageutil.hashrevisionsha1(text, p1, p2) != node
1405 1402
1406 1403 def _cachesegment(self, offset, data):
1407 1404 """Add a segment to the revlog cache.
1408 1405
1409 1406 Accepts an absolute offset and the data that is at that location.
1410 1407 """
1411 1408 o, d = self._chunkcache
1412 1409 # try to add to existing cache
1413 1410 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1414 1411 self._chunkcache = o, d + data
1415 1412 else:
1416 1413 self._chunkcache = offset, data
1417 1414
1418 1415 def _readsegment(self, offset, length, df=None):
1419 1416 """Load a segment of raw data from the revlog.
1420 1417
1421 1418 Accepts an absolute offset, length to read, and an optional existing
1422 1419 file handle to read from.
1423 1420
1424 1421 If an existing file handle is passed, it will be seeked and the
1425 1422 original seek position will NOT be restored.
1426 1423
1427 1424 Returns a str or buffer of raw byte data.
1428 1425
1429 1426 Raises if the requested number of bytes could not be read.
1430 1427 """
1431 1428 # Cache data both forward and backward around the requested
1432 1429 # data, in a fixed size window. This helps speed up operations
1433 1430 # involving reading the revlog backwards.
1434 1431 cachesize = self._chunkcachesize
1435 1432 realoffset = offset & ~(cachesize - 1)
1436 1433 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1437 1434 - realoffset)
1438 1435 with self._datareadfp(df) as df:
1439 1436 df.seek(realoffset)
1440 1437 d = df.read(reallength)
1441 1438
1442 1439 self._cachesegment(realoffset, d)
1443 1440 if offset != realoffset or reallength != length:
1444 1441 startoffset = offset - realoffset
1445 1442 if len(d) - startoffset < length:
1446 1443 raise error.RevlogError(
1447 1444 _('partial read of revlog %s; expected %d bytes from '
1448 1445 'offset %d, got %d') %
1449 1446 (self.indexfile if self._inline else self.datafile,
1450 1447 length, realoffset, len(d) - startoffset))
1451 1448
1452 1449 return util.buffer(d, startoffset, length)
1453 1450
1454 1451 if len(d) < length:
1455 1452 raise error.RevlogError(
1456 1453 _('partial read of revlog %s; expected %d bytes from offset '
1457 1454 '%d, got %d') %
1458 1455 (self.indexfile if self._inline else self.datafile,
1459 1456 length, offset, len(d)))
1460 1457
1461 1458 return d
1462 1459
1463 1460 def _getsegment(self, offset, length, df=None):
1464 1461 """Obtain a segment of raw data from the revlog.
1465 1462
1466 1463 Accepts an absolute offset, length of bytes to obtain, and an
1467 1464 optional file handle to the already-opened revlog. If the file
1468 1465 handle is used, it's original seek position will not be preserved.
1469 1466
1470 1467 Requests for data may be returned from a cache.
1471 1468
1472 1469 Returns a str or a buffer instance of raw byte data.
1473 1470 """
1474 1471 o, d = self._chunkcache
1475 1472 l = len(d)
1476 1473
1477 1474 # is it in the cache?
1478 1475 cachestart = offset - o
1479 1476 cacheend = cachestart + length
1480 1477 if cachestart >= 0 and cacheend <= l:
1481 1478 if cachestart == 0 and cacheend == l:
1482 1479 return d # avoid a copy
1483 1480 return util.buffer(d, cachestart, cacheend - cachestart)
1484 1481
1485 1482 return self._readsegment(offset, length, df=df)
1486 1483
1487 1484 def _getsegmentforrevs(self, startrev, endrev, df=None):
1488 1485 """Obtain a segment of raw data corresponding to a range of revisions.
1489 1486
1490 1487 Accepts the start and end revisions and an optional already-open
1491 1488 file handle to be used for reading. If the file handle is read, its
1492 1489 seek position will not be preserved.
1493 1490
1494 1491 Requests for data may be satisfied by a cache.
1495 1492
1496 1493 Returns a 2-tuple of (offset, data) for the requested range of
1497 1494 revisions. Offset is the integer offset from the beginning of the
1498 1495 revlog and data is a str or buffer of the raw byte data.
1499 1496
1500 1497 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1501 1498 to determine where each revision's data begins and ends.
1502 1499 """
1503 1500 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1504 1501 # (functions are expensive).
1505 1502 index = self.index
1506 1503 istart = index[startrev]
1507 1504 start = int(istart[0] >> 16)
1508 1505 if startrev == endrev:
1509 1506 end = start + istart[1]
1510 1507 else:
1511 1508 iend = index[endrev]
1512 1509 end = int(iend[0] >> 16) + iend[1]
1513 1510
1514 1511 if self._inline:
1515 1512 start += (startrev + 1) * self._io.size
1516 1513 end += (endrev + 1) * self._io.size
1517 1514 length = end - start
1518 1515
1519 1516 return start, self._getsegment(start, length, df=df)
1520 1517
1521 1518 def _chunk(self, rev, df=None):
1522 1519 """Obtain a single decompressed chunk for a revision.
1523 1520
1524 1521 Accepts an integer revision and an optional already-open file handle
1525 1522 to be used for reading. If used, the seek position of the file will not
1526 1523 be preserved.
1527 1524
1528 1525 Returns a str holding uncompressed data for the requested revision.
1529 1526 """
1530 1527 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1531 1528
1532 1529 def _chunks(self, revs, df=None, targetsize=None):
1533 1530 """Obtain decompressed chunks for the specified revisions.
1534 1531
1535 1532 Accepts an iterable of numeric revisions that are assumed to be in
1536 1533 ascending order. Also accepts an optional already-open file handle
1537 1534 to be used for reading. If used, the seek position of the file will
1538 1535 not be preserved.
1539 1536
1540 1537 This function is similar to calling ``self._chunk()`` multiple times,
1541 1538 but is faster.
1542 1539
1543 1540 Returns a list with decompressed data for each requested revision.
1544 1541 """
1545 1542 if not revs:
1546 1543 return []
1547 1544 start = self.start
1548 1545 length = self.length
1549 1546 inline = self._inline
1550 1547 iosize = self._io.size
1551 1548 buffer = util.buffer
1552 1549
1553 1550 l = []
1554 1551 ladd = l.append
1555 1552
1556 1553 if not self._withsparseread:
1557 1554 slicedchunks = (revs,)
1558 1555 else:
1559 1556 slicedchunks = deltautil.slicechunk(self, revs,
1560 1557 targetsize=targetsize)
1561 1558
1562 1559 for revschunk in slicedchunks:
1563 1560 firstrev = revschunk[0]
1564 1561 # Skip trailing revisions with empty diff
1565 1562 for lastrev in revschunk[::-1]:
1566 1563 if length(lastrev) != 0:
1567 1564 break
1568 1565
1569 1566 try:
1570 1567 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1571 1568 except OverflowError:
1572 1569 # issue4215 - we can't cache a run of chunks greater than
1573 1570 # 2G on Windows
1574 1571 return [self._chunk(rev, df=df) for rev in revschunk]
1575 1572
1576 1573 decomp = self.decompress
1577 1574 for rev in revschunk:
1578 1575 chunkstart = start(rev)
1579 1576 if inline:
1580 1577 chunkstart += (rev + 1) * iosize
1581 1578 chunklength = length(rev)
1582 1579 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1583 1580
1584 1581 return l
1585 1582
1586 1583 def _chunkclear(self):
1587 1584 """Clear the raw chunk cache."""
1588 1585 self._chunkcache = (0, '')
1589 1586
1590 1587 def deltaparent(self, rev):
1591 1588 """return deltaparent of the given revision"""
1592 1589 base = self.index[rev][3]
1593 1590 if base == rev:
1594 1591 return nullrev
1595 1592 elif self._generaldelta:
1596 1593 return base
1597 1594 else:
1598 1595 return rev - 1
1599 1596
1600 1597 def issnapshot(self, rev):
1601 1598 """tells whether rev is a snapshot
1602 1599 """
1603 1600 if not self._sparserevlog:
1604 1601 return self.deltaparent(rev) == nullrev
1605 1602 elif util.safehasattr(self.index, 'issnapshot'):
1606 1603 # directly assign the method to cache the testing and access
1607 1604 self.issnapshot = self.index.issnapshot
1608 1605 return self.issnapshot(rev)
1609 1606 if rev == nullrev:
1610 1607 return True
1611 1608 entry = self.index[rev]
1612 1609 base = entry[3]
1613 1610 if base == rev:
1614 1611 return True
1615 1612 if base == nullrev:
1616 1613 return True
1617 1614 p1 = entry[5]
1618 1615 p2 = entry[6]
1619 1616 if base == p1 or base == p2:
1620 1617 return False
1621 1618 return self.issnapshot(base)
1622 1619
1623 1620 def snapshotdepth(self, rev):
1624 1621 """number of snapshot in the chain before this one"""
1625 1622 if not self.issnapshot(rev):
1626 1623 raise error.ProgrammingError('revision %d not a snapshot')
1627 1624 return len(self._deltachain(rev)[0]) - 1
1628 1625
1629 1626 def revdiff(self, rev1, rev2):
1630 1627 """return or calculate a delta between two revisions
1631 1628
1632 1629 The delta calculated is in binary form and is intended to be written to
1633 1630 revlog data directly. So this function needs raw revision data.
1634 1631 """
1635 1632 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1636 1633 return bytes(self._chunk(rev2))
1637 1634
1638 1635 return mdiff.textdiff(self.revision(rev1, raw=True),
1639 1636 self.revision(rev2, raw=True))
1640 1637
1641 1638 def revision(self, nodeorrev, _df=None, raw=False):
1642 1639 """return an uncompressed revision of a given node or revision
1643 1640 number.
1644 1641
1645 1642 _df - an existing file handle to read from. (internal-only)
1646 1643 raw - an optional argument specifying if the revision data is to be
1647 1644 treated as raw data when applying flag transforms. 'raw' should be set
1648 1645 to True when generating changegroups or in debug commands.
1649 1646 """
1650 1647 if isinstance(nodeorrev, int):
1651 1648 rev = nodeorrev
1652 1649 node = self.node(rev)
1653 1650 else:
1654 1651 node = nodeorrev
1655 1652 rev = None
1656 1653
1657 1654 cachedrev = None
1658 1655 flags = None
1659 1656 rawtext = None
1660 1657 if node == nullid:
1661 1658 return ""
1662 1659 if self._revisioncache:
1663 1660 if self._revisioncache[0] == node:
1664 1661 # _cache only stores rawtext
1665 1662 if raw:
1666 1663 return self._revisioncache[2]
1667 1664 # duplicated, but good for perf
1668 1665 if rev is None:
1669 1666 rev = self.rev(node)
1670 1667 if flags is None:
1671 1668 flags = self.flags(rev)
1672 1669 # no extra flags set, no flag processor runs, text = rawtext
1673 1670 if flags == REVIDX_DEFAULT_FLAGS:
1674 1671 return self._revisioncache[2]
1675 1672 # rawtext is reusable. need to run flag processor
1676 1673 rawtext = self._revisioncache[2]
1677 1674
1678 1675 cachedrev = self._revisioncache[1]
1679 1676
1680 1677 # look up what we need to read
1681 1678 if rawtext is None:
1682 1679 if rev is None:
1683 1680 rev = self.rev(node)
1684 1681
1685 1682 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1686 1683 if stopped:
1687 1684 rawtext = self._revisioncache[2]
1688 1685
1689 1686 # drop cache to save memory
1690 1687 self._revisioncache = None
1691 1688
1692 1689 targetsize = None
1693 1690 rawsize = self.index[rev][2]
1694 1691 if 0 <= rawsize:
1695 1692 targetsize = 4 * rawsize
1696 1693
1697 1694 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1698 1695 if rawtext is None:
1699 1696 rawtext = bytes(bins[0])
1700 1697 bins = bins[1:]
1701 1698
1702 1699 rawtext = mdiff.patches(rawtext, bins)
1703 1700 self._revisioncache = (node, rev, rawtext)
1704 1701
1705 1702 if flags is None:
1706 1703 if rev is None:
1707 1704 rev = self.rev(node)
1708 1705 flags = self.flags(rev)
1709 1706
1710 1707 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1711 1708 if validatehash:
1712 1709 self.checkhash(text, node, rev=rev)
1713 1710
1714 1711 return text
1715 1712
1716 1713 def hash(self, text, p1, p2):
1717 1714 """Compute a node hash.
1718 1715
1719 1716 Available as a function so that subclasses can replace the hash
1720 1717 as needed.
1721 1718 """
1722 1719 return storageutil.hashrevisionsha1(text, p1, p2)
1723 1720
1724 1721 def _processflags(self, text, flags, operation, raw=False):
1725 1722 """Inspect revision data flags and applies transforms defined by
1726 1723 registered flag processors.
1727 1724
1728 1725 ``text`` - the revision data to process
1729 1726 ``flags`` - the revision flags
1730 1727 ``operation`` - the operation being performed (read or write)
1731 1728 ``raw`` - an optional argument describing if the raw transform should be
1732 1729 applied.
1733 1730
1734 1731 This method processes the flags in the order (or reverse order if
1735 1732 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1736 1733 flag processors registered for present flags. The order of flags defined
1737 1734 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1738 1735
1739 1736 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1740 1737 processed text and ``validatehash`` is a bool indicating whether the
1741 1738 returned text should be checked for hash integrity.
1742 1739
1743 1740 Note: If the ``raw`` argument is set, it has precedence over the
1744 1741 operation and will only update the value of ``validatehash``.
1745 1742 """
1746 1743 # fast path: no flag processors will run
1747 1744 if flags == 0:
1748 1745 return text, True
1749 1746 if not operation in ('read', 'write'):
1750 1747 raise error.ProgrammingError(_("invalid '%s' operation") %
1751 1748 operation)
1752 1749 # Check all flags are known.
1753 1750 if flags & ~REVIDX_KNOWN_FLAGS:
1754 1751 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1755 1752 (flags & ~REVIDX_KNOWN_FLAGS))
1756 1753 validatehash = True
1757 1754 # Depending on the operation (read or write), the order might be
1758 1755 # reversed due to non-commutative transforms.
1759 1756 orderedflags = REVIDX_FLAGS_ORDER
1760 1757 if operation == 'write':
1761 1758 orderedflags = reversed(orderedflags)
1762 1759
1763 1760 for flag in orderedflags:
1764 1761 # If a flagprocessor has been registered for a known flag, apply the
1765 1762 # related operation transform and update result tuple.
1766 1763 if flag & flags:
1767 1764 vhash = True
1768 1765
1769 1766 if flag not in self._flagprocessors:
1770 1767 message = _("missing processor for flag '%#x'") % (flag)
1771 1768 raise error.RevlogError(message)
1772 1769
1773 1770 processor = self._flagprocessors[flag]
1774 1771 if processor is not None:
1775 1772 readtransform, writetransform, rawtransform = processor
1776 1773
1777 1774 if raw:
1778 1775 vhash = rawtransform(self, text)
1779 1776 elif operation == 'read':
1780 1777 text, vhash = readtransform(self, text)
1781 1778 else: # write operation
1782 1779 text, vhash = writetransform(self, text)
1783 1780 validatehash = validatehash and vhash
1784 1781
1785 1782 return text, validatehash
1786 1783
1787 1784 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1788 1785 """Check node hash integrity.
1789 1786
1790 1787 Available as a function so that subclasses can extend hash mismatch
1791 1788 behaviors as needed.
1792 1789 """
1793 1790 try:
1794 1791 if p1 is None and p2 is None:
1795 1792 p1, p2 = self.parents(node)
1796 1793 if node != self.hash(text, p1, p2):
1797 1794 # Clear the revision cache on hash failure. The revision cache
1798 1795 # only stores the raw revision and clearing the cache does have
1799 1796 # the side-effect that we won't have a cache hit when the raw
1800 1797 # revision data is accessed. But this case should be rare and
1801 1798 # it is extra work to teach the cache about the hash
1802 1799 # verification state.
1803 1800 if self._revisioncache and self._revisioncache[0] == node:
1804 1801 self._revisioncache = None
1805 1802
1806 1803 revornode = rev
1807 1804 if revornode is None:
1808 1805 revornode = templatefilters.short(hex(node))
1809 1806 raise error.RevlogError(_("integrity check failed on %s:%s")
1810 1807 % (self.indexfile, pycompat.bytestr(revornode)))
1811 1808 except error.RevlogError:
1812 1809 if self._censorable and storageutil.iscensoredtext(text):
1813 1810 raise error.CensoredNodeError(self.indexfile, node, text)
1814 1811 raise
1815 1812
1816 1813 def _enforceinlinesize(self, tr, fp=None):
1817 1814 """Check if the revlog is too big for inline and convert if so.
1818 1815
1819 1816 This should be called after revisions are added to the revlog. If the
1820 1817 revlog has grown too large to be an inline revlog, it will convert it
1821 1818 to use multiple index and data files.
1822 1819 """
1823 1820 tiprev = len(self) - 1
1824 1821 if (not self._inline or
1825 1822 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1826 1823 return
1827 1824
1828 1825 trinfo = tr.find(self.indexfile)
1829 1826 if trinfo is None:
1830 1827 raise error.RevlogError(_("%s not found in the transaction")
1831 1828 % self.indexfile)
1832 1829
1833 1830 trindex = trinfo[2]
1834 1831 if trindex is not None:
1835 1832 dataoff = self.start(trindex)
1836 1833 else:
1837 1834 # revlog was stripped at start of transaction, use all leftover data
1838 1835 trindex = len(self) - 1
1839 1836 dataoff = self.end(tiprev)
1840 1837
1841 1838 tr.add(self.datafile, dataoff)
1842 1839
1843 1840 if fp:
1844 1841 fp.flush()
1845 1842 fp.close()
1846 1843 # We can't use the cached file handle after close(). So prevent
1847 1844 # its usage.
1848 1845 self._writinghandles = None
1849 1846
1850 1847 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1851 1848 for r in self:
1852 1849 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1853 1850
1854 1851 with self._indexfp('w') as fp:
1855 1852 self.version &= ~FLAG_INLINE_DATA
1856 1853 self._inline = False
1857 1854 io = self._io
1858 1855 for i in self:
1859 1856 e = io.packentry(self.index[i], self.node, self.version, i)
1860 1857 fp.write(e)
1861 1858
1862 1859 # the temp file replace the real index when we exit the context
1863 1860 # manager
1864 1861
1865 1862 tr.replace(self.indexfile, trindex * self._io.size)
1866 1863 self._chunkclear()
1867 1864
1868 1865 def _nodeduplicatecallback(self, transaction, node):
1869 1866 """called when trying to add a node already stored.
1870 1867 """
1871 1868
1872 1869 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1873 1870 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1874 1871 """add a revision to the log
1875 1872
1876 1873 text - the revision data to add
1877 1874 transaction - the transaction object used for rollback
1878 1875 link - the linkrev data to add
1879 1876 p1, p2 - the parent nodeids of the revision
1880 1877 cachedelta - an optional precomputed delta
1881 1878 node - nodeid of revision; typically node is not specified, and it is
1882 1879 computed by default as hash(text, p1, p2), however subclasses might
1883 1880 use different hashing method (and override checkhash() in such case)
1884 1881 flags - the known flags to set on the revision
1885 1882 deltacomputer - an optional deltacomputer instance shared between
1886 1883 multiple calls
1887 1884 """
1888 1885 if link == nullrev:
1889 1886 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1890 1887 % self.indexfile)
1891 1888
1892 1889 if flags:
1893 1890 node = node or self.hash(text, p1, p2)
1894 1891
1895 1892 rawtext, validatehash = self._processflags(text, flags, 'write')
1896 1893
1897 1894 # If the flag processor modifies the revision data, ignore any provided
1898 1895 # cachedelta.
1899 1896 if rawtext != text:
1900 1897 cachedelta = None
1901 1898
1902 1899 if len(rawtext) > _maxentrysize:
1903 1900 raise error.RevlogError(
1904 1901 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1905 1902 % (self.indexfile, len(rawtext)))
1906 1903
1907 1904 node = node or self.hash(rawtext, p1, p2)
1908 1905 if node in self.nodemap:
1909 1906 return node
1910 1907
1911 1908 if validatehash:
1912 1909 self.checkhash(rawtext, node, p1=p1, p2=p2)
1913 1910
1914 1911 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1915 1912 flags, cachedelta=cachedelta,
1916 1913 deltacomputer=deltacomputer)
1917 1914
1918 1915 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1919 1916 cachedelta=None, deltacomputer=None):
1920 1917 """add a raw revision with known flags, node and parents
1921 1918 useful when reusing a revision not stored in this revlog (ex: received
1922 1919 over wire, or read from an external bundle).
1923 1920 """
1924 1921 dfh = None
1925 1922 if not self._inline:
1926 1923 dfh = self._datafp("a+")
1927 1924 ifh = self._indexfp("a+")
1928 1925 try:
1929 1926 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1930 1927 flags, cachedelta, ifh, dfh,
1931 1928 deltacomputer=deltacomputer)
1932 1929 finally:
1933 1930 if dfh:
1934 1931 dfh.close()
1935 1932 ifh.close()
1936 1933
1937 1934 def compress(self, data):
1938 1935 """Generate a possibly-compressed representation of data."""
1939 1936 if not data:
1940 1937 return '', data
1941 1938
1942 1939 compressed = self._compressor.compress(data)
1943 1940
1944 1941 if compressed:
1945 1942 # The revlog compressor added the header in the returned data.
1946 1943 return '', compressed
1947 1944
1948 1945 if data[0:1] == '\0':
1949 1946 return '', data
1950 1947 return 'u', data
1951 1948
1952 1949 def decompress(self, data):
1953 1950 """Decompress a revlog chunk.
1954 1951
1955 1952 The chunk is expected to begin with a header identifying the
1956 1953 format type so it can be routed to an appropriate decompressor.
1957 1954 """
1958 1955 if not data:
1959 1956 return data
1960 1957
1961 1958 # Revlogs are read much more frequently than they are written and many
1962 1959 # chunks only take microseconds to decompress, so performance is
1963 1960 # important here.
1964 1961 #
1965 1962 # We can make a few assumptions about revlogs:
1966 1963 #
1967 1964 # 1) the majority of chunks will be compressed (as opposed to inline
1968 1965 # raw data).
1969 1966 # 2) decompressing *any* data will likely by at least 10x slower than
1970 1967 # returning raw inline data.
1971 1968 # 3) we want to prioritize common and officially supported compression
1972 1969 # engines
1973 1970 #
1974 1971 # It follows that we want to optimize for "decompress compressed data
1975 1972 # when encoded with common and officially supported compression engines"
1976 1973 # case over "raw data" and "data encoded by less common or non-official
1977 1974 # compression engines." That is why we have the inline lookup first
1978 1975 # followed by the compengines lookup.
1979 1976 #
1980 1977 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1981 1978 # compressed chunks. And this matters for changelog and manifest reads.
1982 1979 t = data[0:1]
1983 1980
1984 1981 if t == 'x':
1985 1982 try:
1986 1983 return _zlibdecompress(data)
1987 1984 except zlib.error as e:
1988 1985 raise error.RevlogError(_('revlog decompress error: %s') %
1989 1986 stringutil.forcebytestr(e))
1990 1987 # '\0' is more common than 'u' so it goes first.
1991 1988 elif t == '\0':
1992 1989 return data
1993 1990 elif t == 'u':
1994 1991 return util.buffer(data, 1)
1995 1992
1996 1993 try:
1997 1994 compressor = self._decompressors[t]
1998 1995 except KeyError:
1999 1996 try:
2000 1997 engine = util.compengines.forrevlogheader(t)
2001 1998 compressor = engine.revlogcompressor(self._compengineopts)
2002 1999 self._decompressors[t] = compressor
2003 2000 except KeyError:
2004 2001 raise error.RevlogError(_('unknown compression type %r') % t)
2005 2002
2006 2003 return compressor.decompress(data)
2007 2004
2008 2005 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2009 2006 cachedelta, ifh, dfh, alwayscache=False,
2010 2007 deltacomputer=None):
2011 2008 """internal function to add revisions to the log
2012 2009
2013 2010 see addrevision for argument descriptions.
2014 2011
2015 2012 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2016 2013
2017 2014 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2018 2015 be used.
2019 2016
2020 2017 invariants:
2021 2018 - rawtext is optional (can be None); if not set, cachedelta must be set.
2022 2019 if both are set, they must correspond to each other.
2023 2020 """
2024 2021 if node == nullid:
2025 2022 raise error.RevlogError(_("%s: attempt to add null revision") %
2026 2023 self.indexfile)
2027 2024 if node == wdirid or node in wdirfilenodeids:
2028 2025 raise error.RevlogError(_("%s: attempt to add wdir revision") %
2029 2026 self.indexfile)
2030 2027
2031 2028 if self._inline:
2032 2029 fh = ifh
2033 2030 else:
2034 2031 fh = dfh
2035 2032
2036 2033 btext = [rawtext]
2037 2034
2038 2035 curr = len(self)
2039 2036 prev = curr - 1
2040 2037 offset = self.end(prev)
2041 2038 p1r, p2r = self.rev(p1), self.rev(p2)
2042 2039
2043 2040 # full versions are inserted when the needed deltas
2044 2041 # become comparable to the uncompressed text
2045 2042 if rawtext is None:
2046 2043 # need rawtext size, before changed by flag processors, which is
2047 2044 # the non-raw size. use revlog explicitly to avoid filelog's extra
2048 2045 # logic that might remove metadata size.
2049 2046 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2050 2047 cachedelta[1])
2051 2048 else:
2052 2049 textlen = len(rawtext)
2053 2050
2054 2051 if deltacomputer is None:
2055 2052 deltacomputer = deltautil.deltacomputer(self)
2056 2053
2057 2054 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2058 2055
2059 2056 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2060 2057
2061 2058 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2062 2059 deltainfo.base, link, p1r, p2r, node)
2063 2060 self.index.append(e)
2064 2061 self.nodemap[node] = curr
2065 2062
2066 2063 # Reset the pure node cache start lookup offset to account for new
2067 2064 # revision.
2068 2065 if self._nodepos is not None:
2069 2066 self._nodepos = curr
2070 2067
2071 2068 entry = self._io.packentry(e, self.node, self.version, curr)
2072 2069 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2073 2070 link, offset)
2074 2071
2075 2072 rawtext = btext[0]
2076 2073
2077 2074 if alwayscache and rawtext is None:
2078 2075 rawtext = deltacomputer.buildtext(revinfo, fh)
2079 2076
2080 2077 if type(rawtext) == bytes: # only accept immutable objects
2081 2078 self._revisioncache = (node, curr, rawtext)
2082 2079 self._chainbasecache[curr] = deltainfo.chainbase
2083 2080 return node
2084 2081
2085 2082 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2086 2083 # Files opened in a+ mode have inconsistent behavior on various
2087 2084 # platforms. Windows requires that a file positioning call be made
2088 2085 # when the file handle transitions between reads and writes. See
2089 2086 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2090 2087 # platforms, Python or the platform itself can be buggy. Some versions
2091 2088 # of Solaris have been observed to not append at the end of the file
2092 2089 # if the file was seeked to before the end. See issue4943 for more.
2093 2090 #
2094 2091 # We work around this issue by inserting a seek() before writing.
2095 2092 # Note: This is likely not necessary on Python 3. However, because
2096 2093 # the file handle is reused for reads and may be seeked there, we need
2097 2094 # to be careful before changing this.
2098 2095 ifh.seek(0, os.SEEK_END)
2099 2096 if dfh:
2100 2097 dfh.seek(0, os.SEEK_END)
2101 2098
2102 2099 curr = len(self) - 1
2103 2100 if not self._inline:
2104 2101 transaction.add(self.datafile, offset)
2105 2102 transaction.add(self.indexfile, curr * len(entry))
2106 2103 if data[0]:
2107 2104 dfh.write(data[0])
2108 2105 dfh.write(data[1])
2109 2106 ifh.write(entry)
2110 2107 else:
2111 2108 offset += curr * self._io.size
2112 2109 transaction.add(self.indexfile, offset, curr)
2113 2110 ifh.write(entry)
2114 2111 ifh.write(data[0])
2115 2112 ifh.write(data[1])
2116 2113 self._enforceinlinesize(transaction, ifh)
2117 2114
2118 2115 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2119 2116 """
2120 2117 add a delta group
2121 2118
2122 2119 given a set of deltas, add them to the revision log. the
2123 2120 first delta is against its parent, which should be in our
2124 2121 log, the rest are against the previous delta.
2125 2122
2126 2123 If ``addrevisioncb`` is defined, it will be called with arguments of
2127 2124 this revlog and the node that was added.
2128 2125 """
2129 2126
2130 2127 if self._writinghandles:
2131 2128 raise error.ProgrammingError('cannot nest addgroup() calls')
2132 2129
2133 2130 nodes = []
2134 2131
2135 2132 r = len(self)
2136 2133 end = 0
2137 2134 if r:
2138 2135 end = self.end(r - 1)
2139 2136 ifh = self._indexfp("a+")
2140 2137 isize = r * self._io.size
2141 2138 if self._inline:
2142 2139 transaction.add(self.indexfile, end + isize, r)
2143 2140 dfh = None
2144 2141 else:
2145 2142 transaction.add(self.indexfile, isize, r)
2146 2143 transaction.add(self.datafile, end)
2147 2144 dfh = self._datafp("a+")
2148 2145 def flush():
2149 2146 if dfh:
2150 2147 dfh.flush()
2151 2148 ifh.flush()
2152 2149
2153 2150 self._writinghandles = (ifh, dfh)
2154 2151
2155 2152 try:
2156 2153 deltacomputer = deltautil.deltacomputer(self)
2157 2154 # loop through our set of deltas
2158 2155 for data in deltas:
2159 2156 node, p1, p2, linknode, deltabase, delta, flags = data
2160 2157 link = linkmapper(linknode)
2161 2158 flags = flags or REVIDX_DEFAULT_FLAGS
2162 2159
2163 2160 nodes.append(node)
2164 2161
2165 2162 if node in self.nodemap:
2166 2163 self._nodeduplicatecallback(transaction, node)
2167 2164 # this can happen if two branches make the same change
2168 2165 continue
2169 2166
2170 2167 for p in (p1, p2):
2171 2168 if p not in self.nodemap:
2172 2169 raise error.LookupError(p, self.indexfile,
2173 2170 _('unknown parent'))
2174 2171
2175 2172 if deltabase not in self.nodemap:
2176 2173 raise error.LookupError(deltabase, self.indexfile,
2177 2174 _('unknown delta base'))
2178 2175
2179 2176 baserev = self.rev(deltabase)
2180 2177
2181 2178 if baserev != nullrev and self.iscensored(baserev):
2182 2179 # if base is censored, delta must be full replacement in a
2183 2180 # single patch operation
2184 2181 hlen = struct.calcsize(">lll")
2185 2182 oldlen = self.rawsize(baserev)
2186 2183 newlen = len(delta) - hlen
2187 2184 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2188 2185 raise error.CensoredBaseError(self.indexfile,
2189 2186 self.node(baserev))
2190 2187
2191 2188 if not flags and self._peek_iscensored(baserev, delta, flush):
2192 2189 flags |= REVIDX_ISCENSORED
2193 2190
2194 2191 # We assume consumers of addrevisioncb will want to retrieve
2195 2192 # the added revision, which will require a call to
2196 2193 # revision(). revision() will fast path if there is a cache
2197 2194 # hit. So, we tell _addrevision() to always cache in this case.
2198 2195 # We're only using addgroup() in the context of changegroup
2199 2196 # generation so the revision data can always be handled as raw
2200 2197 # by the flagprocessor.
2201 2198 self._addrevision(node, None, transaction, link,
2202 2199 p1, p2, flags, (baserev, delta),
2203 2200 ifh, dfh,
2204 2201 alwayscache=bool(addrevisioncb),
2205 2202 deltacomputer=deltacomputer)
2206 2203
2207 2204 if addrevisioncb:
2208 2205 addrevisioncb(self, node)
2209 2206
2210 2207 if not dfh and not self._inline:
2211 2208 # addrevision switched from inline to conventional
2212 2209 # reopen the index
2213 2210 ifh.close()
2214 2211 dfh = self._datafp("a+")
2215 2212 ifh = self._indexfp("a+")
2216 2213 self._writinghandles = (ifh, dfh)
2217 2214 finally:
2218 2215 self._writinghandles = None
2219 2216
2220 2217 if dfh:
2221 2218 dfh.close()
2222 2219 ifh.close()
2223 2220
2224 2221 return nodes
2225 2222
2226 2223 def iscensored(self, rev):
2227 2224 """Check if a file revision is censored."""
2228 2225 if not self._censorable:
2229 2226 return False
2230 2227
2231 2228 return self.flags(rev) & REVIDX_ISCENSORED
2232 2229
2233 2230 def _peek_iscensored(self, baserev, delta, flush):
2234 2231 """Quickly check if a delta produces a censored revision."""
2235 2232 if not self._censorable:
2236 2233 return False
2237 2234
2238 2235 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2239 2236
2240 2237 def getstrippoint(self, minlink):
2241 2238 """find the minimum rev that must be stripped to strip the linkrev
2242 2239
2243 2240 Returns a tuple containing the minimum rev and a set of all revs that
2244 2241 have linkrevs that will be broken by this strip.
2245 2242 """
2246 2243 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2247 2244 self.headrevs(),
2248 2245 self.linkrev, self.parentrevs)
2249 2246
2250 2247 def strip(self, minlink, transaction):
2251 2248 """truncate the revlog on the first revision with a linkrev >= minlink
2252 2249
2253 2250 This function is called when we're stripping revision minlink and
2254 2251 its descendants from the repository.
2255 2252
2256 2253 We have to remove all revisions with linkrev >= minlink, because
2257 2254 the equivalent changelog revisions will be renumbered after the
2258 2255 strip.
2259 2256
2260 2257 So we truncate the revlog on the first of these revisions, and
2261 2258 trust that the caller has saved the revisions that shouldn't be
2262 2259 removed and that it'll re-add them after this truncation.
2263 2260 """
2264 2261 if len(self) == 0:
2265 2262 return
2266 2263
2267 2264 rev, _ = self.getstrippoint(minlink)
2268 2265 if rev == len(self):
2269 2266 return
2270 2267
2271 2268 # first truncate the files on disk
2272 2269 end = self.start(rev)
2273 2270 if not self._inline:
2274 2271 transaction.add(self.datafile, end)
2275 2272 end = rev * self._io.size
2276 2273 else:
2277 2274 end += rev * self._io.size
2278 2275
2279 2276 transaction.add(self.indexfile, end)
2280 2277
2281 2278 # then reset internal state in memory to forget those revisions
2282 2279 self._revisioncache = None
2283 2280 self._chaininfocache = {}
2284 2281 self._chunkclear()
2285 2282 for x in pycompat.xrange(rev, len(self)):
2286 2283 del self.nodemap[self.node(x)]
2287 2284
2288 2285 del self.index[rev:-1]
2289 2286 self._nodepos = None
2290 2287
2291 2288 def checksize(self):
2292 2289 """Check size of index and data files
2293 2290
2294 2291 return a (dd, di) tuple.
2295 2292 - dd: extra bytes for the "data" file
2296 2293 - di: extra bytes for the "index" file
2297 2294
2298 2295 A healthy revlog will return (0, 0).
2299 2296 """
2300 2297 expected = 0
2301 2298 if len(self):
2302 2299 expected = max(0, self.end(len(self) - 1))
2303 2300
2304 2301 try:
2305 2302 with self._datafp() as f:
2306 2303 f.seek(0, 2)
2307 2304 actual = f.tell()
2308 2305 dd = actual - expected
2309 2306 except IOError as inst:
2310 2307 if inst.errno != errno.ENOENT:
2311 2308 raise
2312 2309 dd = 0
2313 2310
2314 2311 try:
2315 2312 f = self.opener(self.indexfile)
2316 2313 f.seek(0, 2)
2317 2314 actual = f.tell()
2318 2315 f.close()
2319 2316 s = self._io.size
2320 2317 i = max(0, actual // s)
2321 2318 di = actual - (i * s)
2322 2319 if self._inline:
2323 2320 databytes = 0
2324 2321 for r in self:
2325 2322 databytes += max(0, self.length(r))
2326 2323 dd = 0
2327 2324 di = actual - len(self) * s - databytes
2328 2325 except IOError as inst:
2329 2326 if inst.errno != errno.ENOENT:
2330 2327 raise
2331 2328 di = 0
2332 2329
2333 2330 return (dd, di)
2334 2331
2335 2332 def files(self):
2336 2333 res = [self.indexfile]
2337 2334 if not self._inline:
2338 2335 res.append(self.datafile)
2339 2336 return res
2340 2337
2341 2338 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2342 2339 assumehaveparentrevisions=False,
2343 2340 deltamode=repository.CG_DELTAMODE_STD):
2344 2341 if nodesorder not in ('nodes', 'storage', 'linear', None):
2345 2342 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2346 2343 nodesorder)
2347 2344
2348 2345 if nodesorder is None and not self._generaldelta:
2349 2346 nodesorder = 'storage'
2350 2347
2351 2348 if (not self._storedeltachains and
2352 2349 deltamode != repository.CG_DELTAMODE_PREV):
2353 2350 deltamode = repository.CG_DELTAMODE_FULL
2354 2351
2355 2352 return storageutil.emitrevisions(
2356 2353 self, nodes, nodesorder, revlogrevisiondelta,
2357 2354 deltaparentfn=self.deltaparent,
2358 2355 candeltafn=self.candelta,
2359 2356 rawsizefn=self.rawsize,
2360 2357 revdifffn=self.revdiff,
2361 2358 flagsfn=self.flags,
2362 2359 deltamode=deltamode,
2363 2360 revisiondata=revisiondata,
2364 2361 assumehaveparentrevisions=assumehaveparentrevisions)
2365 2362
2366 2363 DELTAREUSEALWAYS = 'always'
2367 2364 DELTAREUSESAMEREVS = 'samerevs'
2368 2365 DELTAREUSENEVER = 'never'
2369 2366
2370 2367 DELTAREUSEFULLADD = 'fulladd'
2371 2368
2372 2369 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2373 2370
2374 2371 def clone(self, tr, destrevlog, addrevisioncb=None,
2375 2372 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2376 2373 """Copy this revlog to another, possibly with format changes.
2377 2374
2378 2375 The destination revlog will contain the same revisions and nodes.
2379 2376 However, it may not be bit-for-bit identical due to e.g. delta encoding
2380 2377 differences.
2381 2378
2382 2379 The ``deltareuse`` argument control how deltas from the existing revlog
2383 2380 are preserved in the destination revlog. The argument can have the
2384 2381 following values:
2385 2382
2386 2383 DELTAREUSEALWAYS
2387 2384 Deltas will always be reused (if possible), even if the destination
2388 2385 revlog would not select the same revisions for the delta. This is the
2389 2386 fastest mode of operation.
2390 2387 DELTAREUSESAMEREVS
2391 2388 Deltas will be reused if the destination revlog would pick the same
2392 2389 revisions for the delta. This mode strikes a balance between speed
2393 2390 and optimization.
2394 2391 DELTAREUSENEVER
2395 2392 Deltas will never be reused. This is the slowest mode of execution.
2396 2393 This mode can be used to recompute deltas (e.g. if the diff/delta
2397 2394 algorithm changes).
2398 2395
2399 2396 Delta computation can be slow, so the choice of delta reuse policy can
2400 2397 significantly affect run time.
2401 2398
2402 2399 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2403 2400 two extremes. Deltas will be reused if they are appropriate. But if the
2404 2401 delta could choose a better revision, it will do so. This means if you
2405 2402 are converting a non-generaldelta revlog to a generaldelta revlog,
2406 2403 deltas will be recomputed if the delta's parent isn't a parent of the
2407 2404 revision.
2408 2405
2409 2406 In addition to the delta policy, the ``forcedeltabothparents``
2410 2407 argument controls whether to force compute deltas against both parents
2411 2408 for merges. By default, the current default is used.
2412 2409 """
2413 2410 if deltareuse not in self.DELTAREUSEALL:
2414 2411 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2415 2412
2416 2413 if len(destrevlog):
2417 2414 raise ValueError(_('destination revlog is not empty'))
2418 2415
2419 2416 if getattr(self, 'filteredrevs', None):
2420 2417 raise ValueError(_('source revlog has filtered revisions'))
2421 2418 if getattr(destrevlog, 'filteredrevs', None):
2422 2419 raise ValueError(_('destination revlog has filtered revisions'))
2423 2420
2424 2421 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2425 2422 # if possible.
2426 2423 oldlazydelta = destrevlog._lazydelta
2427 2424 oldlazydeltabase = destrevlog._lazydeltabase
2428 2425 oldamd = destrevlog._deltabothparents
2429 2426
2430 2427 try:
2431 2428 if deltareuse == self.DELTAREUSEALWAYS:
2432 2429 destrevlog._lazydeltabase = True
2433 2430 destrevlog._lazydelta = True
2434 2431 elif deltareuse == self.DELTAREUSESAMEREVS:
2435 2432 destrevlog._lazydeltabase = False
2436 2433 destrevlog._lazydelta = True
2437 2434 elif deltareuse == self.DELTAREUSENEVER:
2438 2435 destrevlog._lazydeltabase = False
2439 2436 destrevlog._lazydelta = False
2440 2437
2441 2438 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2442 2439
2443 2440 deltacomputer = deltautil.deltacomputer(destrevlog)
2444 2441 index = self.index
2445 2442 for rev in self:
2446 2443 entry = index[rev]
2447 2444
2448 2445 # Some classes override linkrev to take filtered revs into
2449 2446 # account. Use raw entry from index.
2450 2447 flags = entry[0] & 0xffff
2451 2448 linkrev = entry[4]
2452 2449 p1 = index[entry[5]][7]
2453 2450 p2 = index[entry[6]][7]
2454 2451 node = entry[7]
2455 2452
2456 2453 # (Possibly) reuse the delta from the revlog if allowed and
2457 2454 # the revlog chunk is a delta.
2458 2455 cachedelta = None
2459 2456 rawtext = None
2460 2457 if destrevlog._lazydelta:
2461 2458 dp = self.deltaparent(rev)
2462 2459 if dp != nullrev:
2463 2460 cachedelta = (dp, bytes(self._chunk(rev)))
2464 2461
2465 2462 if not cachedelta:
2466 2463 rawtext = self.revision(rev, raw=True)
2467 2464
2468 2465
2469 2466 if deltareuse == self.DELTAREUSEFULLADD:
2470 2467 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2471 2468 cachedelta=cachedelta,
2472 2469 node=node, flags=flags,
2473 2470 deltacomputer=deltacomputer)
2474 2471 else:
2475 2472 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2476 2473 checkambig=False)
2477 2474 dfh = None
2478 2475 if not destrevlog._inline:
2479 2476 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2480 2477 try:
2481 2478 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2482 2479 p2, flags, cachedelta, ifh, dfh,
2483 2480 deltacomputer=deltacomputer)
2484 2481 finally:
2485 2482 if dfh:
2486 2483 dfh.close()
2487 2484 ifh.close()
2488 2485
2489 2486 if addrevisioncb:
2490 2487 addrevisioncb(self, rev, node)
2491 2488 finally:
2492 2489 destrevlog._lazydelta = oldlazydelta
2493 2490 destrevlog._lazydeltabase = oldlazydeltabase
2494 2491 destrevlog._deltabothparents = oldamd
2495 2492
2496 2493 def censorrevision(self, tr, censornode, tombstone=b''):
2497 2494 if (self.version & 0xFFFF) == REVLOGV0:
2498 2495 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2499 2496 self.version)
2500 2497
2501 2498 censorrev = self.rev(censornode)
2502 2499 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2503 2500
2504 2501 if len(tombstone) > self.rawsize(censorrev):
2505 2502 raise error.Abort(_('censor tombstone must be no longer than '
2506 2503 'censored data'))
2507 2504
2508 2505 # Rewriting the revlog in place is hard. Our strategy for censoring is
2509 2506 # to create a new revlog, copy all revisions to it, then replace the
2510 2507 # revlogs on transaction close.
2511 2508
2512 2509 newindexfile = self.indexfile + b'.tmpcensored'
2513 2510 newdatafile = self.datafile + b'.tmpcensored'
2514 2511
2515 2512 # This is a bit dangerous. We could easily have a mismatch of state.
2516 2513 newrl = revlog(self.opener, newindexfile, newdatafile,
2517 2514 censorable=True)
2518 2515 newrl.version = self.version
2519 2516 newrl._generaldelta = self._generaldelta
2520 2517 newrl._io = self._io
2521 2518
2522 2519 for rev in self.revs():
2523 2520 node = self.node(rev)
2524 2521 p1, p2 = self.parents(node)
2525 2522
2526 2523 if rev == censorrev:
2527 2524 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2528 2525 p1, p2, censornode, REVIDX_ISCENSORED)
2529 2526
2530 2527 if newrl.deltaparent(rev) != nullrev:
2531 2528 raise error.Abort(_('censored revision stored as delta; '
2532 2529 'cannot censor'),
2533 2530 hint=_('censoring of revlogs is not '
2534 2531 'fully implemented; please report '
2535 2532 'this bug'))
2536 2533 continue
2537 2534
2538 2535 if self.iscensored(rev):
2539 2536 if self.deltaparent(rev) != nullrev:
2540 2537 raise error.Abort(_('cannot censor due to censored '
2541 2538 'revision having delta stored'))
2542 2539 rawtext = self._chunk(rev)
2543 2540 else:
2544 2541 rawtext = self.revision(rev, raw=True)
2545 2542
2546 2543 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2547 2544 self.flags(rev))
2548 2545
2549 2546 tr.addbackup(self.indexfile, location='store')
2550 2547 if not self._inline:
2551 2548 tr.addbackup(self.datafile, location='store')
2552 2549
2553 2550 self.opener.rename(newrl.indexfile, self.indexfile)
2554 2551 if not self._inline:
2555 2552 self.opener.rename(newrl.datafile, self.datafile)
2556 2553
2557 2554 self.clearcaches()
2558 2555 self._loadindex()
2559 2556
2560 2557 def verifyintegrity(self, state):
2561 2558 """Verifies the integrity of the revlog.
2562 2559
2563 2560 Yields ``revlogproblem`` instances describing problems that are
2564 2561 found.
2565 2562 """
2566 2563 dd, di = self.checksize()
2567 2564 if dd:
2568 2565 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2569 2566 if di:
2570 2567 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2571 2568
2572 2569 version = self.version & 0xFFFF
2573 2570
2574 2571 # The verifier tells us what version revlog we should be.
2575 2572 if version != state['expectedversion']:
2576 2573 yield revlogproblem(
2577 2574 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2578 2575 (self.indexfile, version, state['expectedversion']))
2579 2576
2580 2577 state['skipread'] = set()
2581 2578
2582 2579 for rev in self:
2583 2580 node = self.node(rev)
2584 2581
2585 2582 # Verify contents. 4 cases to care about:
2586 2583 #
2587 2584 # common: the most common case
2588 2585 # rename: with a rename
2589 2586 # meta: file content starts with b'\1\n', the metadata
2590 2587 # header defined in filelog.py, but without a rename
2591 2588 # ext: content stored externally
2592 2589 #
2593 2590 # More formally, their differences are shown below:
2594 2591 #
2595 2592 # | common | rename | meta | ext
2596 2593 # -------------------------------------------------------
2597 2594 # flags() | 0 | 0 | 0 | not 0
2598 2595 # renamed() | False | True | False | ?
2599 2596 # rawtext[0:2]=='\1\n'| False | True | True | ?
2600 2597 #
2601 2598 # "rawtext" means the raw text stored in revlog data, which
2602 2599 # could be retrieved by "revision(rev, raw=True)". "text"
2603 2600 # mentioned below is "revision(rev, raw=False)".
2604 2601 #
2605 2602 # There are 3 different lengths stored physically:
2606 2603 # 1. L1: rawsize, stored in revlog index
2607 2604 # 2. L2: len(rawtext), stored in revlog data
2608 2605 # 3. L3: len(text), stored in revlog data if flags==0, or
2609 2606 # possibly somewhere else if flags!=0
2610 2607 #
2611 2608 # L1 should be equal to L2. L3 could be different from them.
2612 2609 # "text" may or may not affect commit hash depending on flag
2613 2610 # processors (see revlog.addflagprocessor).
2614 2611 #
2615 2612 # | common | rename | meta | ext
2616 2613 # -------------------------------------------------
2617 2614 # rawsize() | L1 | L1 | L1 | L1
2618 2615 # size() | L1 | L2-LM | L1(*) | L1 (?)
2619 2616 # len(rawtext) | L2 | L2 | L2 | L2
2620 2617 # len(text) | L2 | L2 | L2 | L3
2621 2618 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2622 2619 #
2623 2620 # LM: length of metadata, depending on rawtext
2624 2621 # (*): not ideal, see comment in filelog.size
2625 2622 # (?): could be "- len(meta)" if the resolved content has
2626 2623 # rename metadata
2627 2624 #
2628 2625 # Checks needed to be done:
2629 2626 # 1. length check: L1 == L2, in all cases.
2630 2627 # 2. hash check: depending on flag processor, we may need to
2631 2628 # use either "text" (external), or "rawtext" (in revlog).
2632 2629
2633 2630 try:
2634 2631 skipflags = state.get('skipflags', 0)
2635 2632 if skipflags:
2636 2633 skipflags &= self.flags(rev)
2637 2634
2638 2635 if skipflags:
2639 2636 state['skipread'].add(node)
2640 2637 else:
2641 2638 # Side-effect: read content and verify hash.
2642 2639 self.revision(node)
2643 2640
2644 2641 l1 = self.rawsize(rev)
2645 2642 l2 = len(self.revision(node, raw=True))
2646 2643
2647 2644 if l1 != l2:
2648 2645 yield revlogproblem(
2649 2646 error=_('unpacked size is %d, %d expected') % (l2, l1),
2650 2647 node=node)
2651 2648
2652 2649 except error.CensoredNodeError:
2653 2650 if state['erroroncensored']:
2654 2651 yield revlogproblem(error=_('censored file data'),
2655 2652 node=node)
2656 2653 state['skipread'].add(node)
2657 2654 except Exception as e:
2658 2655 yield revlogproblem(
2659 2656 error=_('unpacking %s: %s') % (short(node),
2660 2657 stringutil.forcebytestr(e)),
2661 2658 node=node)
2662 2659 state['skipread'].add(node)
2663 2660
2664 2661 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2665 2662 revisionscount=False, trackedsize=False,
2666 2663 storedsize=False):
2667 2664 d = {}
2668 2665
2669 2666 if exclusivefiles:
2670 2667 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2671 2668 if not self._inline:
2672 2669 d['exclusivefiles'].append((self.opener, self.datafile))
2673 2670
2674 2671 if sharedfiles:
2675 2672 d['sharedfiles'] = []
2676 2673
2677 2674 if revisionscount:
2678 2675 d['revisionscount'] = len(self)
2679 2676
2680 2677 if trackedsize:
2681 2678 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2682 2679
2683 2680 if storedsize:
2684 2681 d['storedsize'] = sum(self.opener.stat(path).st_size
2685 2682 for path in self.files())
2686 2683
2687 2684 return d
General Comments 0
You need to be logged in to leave comments. Login now