##// END OF EJS Templates
match: document that visitchildrenset might return files...
Kyle Lippincott -
r39296:27946fca default
parent child Browse files
Show More
@@ -1,1501 +1,1504 b''
1 1 # dirstate.py - working directory tracking for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import contextlib
12 12 import errno
13 13 import os
14 14 import stat
15 15
16 16 from .i18n import _
17 17 from .node import nullid
18 18 from . import (
19 19 encoding,
20 20 error,
21 21 match as matchmod,
22 22 pathutil,
23 23 policy,
24 24 pycompat,
25 25 scmutil,
26 26 txnutil,
27 27 util,
28 28 )
29 29
30 30 parsers = policy.importmod(r'parsers')
31 31
32 32 propertycache = util.propertycache
33 33 filecache = scmutil.filecache
34 34 _rangemask = 0x7fffffff
35 35
36 36 dirstatetuple = parsers.dirstatetuple
37 37
38 38 class repocache(filecache):
39 39 """filecache for files in .hg/"""
40 40 def join(self, obj, fname):
41 41 return obj._opener.join(fname)
42 42
43 43 class rootcache(filecache):
44 44 """filecache for files in the repository root"""
45 45 def join(self, obj, fname):
46 46 return obj._join(fname)
47 47
48 48 def _getfsnow(vfs):
49 49 '''Get "now" timestamp on filesystem'''
50 50 tmpfd, tmpname = vfs.mkstemp()
51 51 try:
52 52 return os.fstat(tmpfd)[stat.ST_MTIME]
53 53 finally:
54 54 os.close(tmpfd)
55 55 vfs.unlink(tmpname)
56 56
57 57 class dirstate(object):
58 58
59 59 def __init__(self, opener, ui, root, validate, sparsematchfn):
60 60 '''Create a new dirstate object.
61 61
62 62 opener is an open()-like callable that can be used to open the
63 63 dirstate file; root is the root of the directory tracked by
64 64 the dirstate.
65 65 '''
66 66 self._opener = opener
67 67 self._validate = validate
68 68 self._root = root
69 69 self._sparsematchfn = sparsematchfn
70 70 # ntpath.join(root, '') of Python 2.7.9 does not add sep if root is
71 71 # UNC path pointing to root share (issue4557)
72 72 self._rootdir = pathutil.normasprefix(root)
73 73 self._dirty = False
74 74 self._lastnormaltime = 0
75 75 self._ui = ui
76 76 self._filecache = {}
77 77 self._parentwriters = 0
78 78 self._filename = 'dirstate'
79 79 self._pendingfilename = '%s.pending' % self._filename
80 80 self._plchangecallbacks = {}
81 81 self._origpl = None
82 82 self._updatedfiles = set()
83 83 self._mapcls = dirstatemap
84 84
85 85 @contextlib.contextmanager
86 86 def parentchange(self):
87 87 '''Context manager for handling dirstate parents.
88 88
89 89 If an exception occurs in the scope of the context manager,
90 90 the incoherent dirstate won't be written when wlock is
91 91 released.
92 92 '''
93 93 self._parentwriters += 1
94 94 yield
95 95 # Typically we want the "undo" step of a context manager in a
96 96 # finally block so it happens even when an exception
97 97 # occurs. In this case, however, we only want to decrement
98 98 # parentwriters if the code in the with statement exits
99 99 # normally, so we don't have a try/finally here on purpose.
100 100 self._parentwriters -= 1
101 101
102 102 def pendingparentchange(self):
103 103 '''Returns true if the dirstate is in the middle of a set of changes
104 104 that modify the dirstate parent.
105 105 '''
106 106 return self._parentwriters > 0
107 107
108 108 @propertycache
109 109 def _map(self):
110 110 """Return the dirstate contents (see documentation for dirstatemap)."""
111 111 self._map = self._mapcls(self._ui, self._opener, self._root)
112 112 return self._map
113 113
114 114 @property
115 115 def _sparsematcher(self):
116 116 """The matcher for the sparse checkout.
117 117
118 118 The working directory may not include every file from a manifest. The
119 119 matcher obtained by this property will match a path if it is to be
120 120 included in the working directory.
121 121 """
122 122 # TODO there is potential to cache this property. For now, the matcher
123 123 # is resolved on every access. (But the called function does use a
124 124 # cache to keep the lookup fast.)
125 125 return self._sparsematchfn()
126 126
127 127 @repocache('branch')
128 128 def _branch(self):
129 129 try:
130 130 return self._opener.read("branch").strip() or "default"
131 131 except IOError as inst:
132 132 if inst.errno != errno.ENOENT:
133 133 raise
134 134 return "default"
135 135
136 136 @property
137 137 def _pl(self):
138 138 return self._map.parents()
139 139
140 140 def hasdir(self, d):
141 141 return self._map.hastrackeddir(d)
142 142
143 143 @rootcache('.hgignore')
144 144 def _ignore(self):
145 145 files = self._ignorefiles()
146 146 if not files:
147 147 return matchmod.never(self._root, '')
148 148
149 149 pats = ['include:%s' % f for f in files]
150 150 return matchmod.match(self._root, '', [], pats, warn=self._ui.warn)
151 151
152 152 @propertycache
153 153 def _slash(self):
154 154 return self._ui.configbool('ui', 'slash') and pycompat.ossep != '/'
155 155
156 156 @propertycache
157 157 def _checklink(self):
158 158 return util.checklink(self._root)
159 159
160 160 @propertycache
161 161 def _checkexec(self):
162 162 return util.checkexec(self._root)
163 163
164 164 @propertycache
165 165 def _checkcase(self):
166 166 return not util.fscasesensitive(self._join('.hg'))
167 167
168 168 def _join(self, f):
169 169 # much faster than os.path.join()
170 170 # it's safe because f is always a relative path
171 171 return self._rootdir + f
172 172
173 173 def flagfunc(self, buildfallback):
174 174 if self._checklink and self._checkexec:
175 175 def f(x):
176 176 try:
177 177 st = os.lstat(self._join(x))
178 178 if util.statislink(st):
179 179 return 'l'
180 180 if util.statisexec(st):
181 181 return 'x'
182 182 except OSError:
183 183 pass
184 184 return ''
185 185 return f
186 186
187 187 fallback = buildfallback()
188 188 if self._checklink:
189 189 def f(x):
190 190 if os.path.islink(self._join(x)):
191 191 return 'l'
192 192 if 'x' in fallback(x):
193 193 return 'x'
194 194 return ''
195 195 return f
196 196 if self._checkexec:
197 197 def f(x):
198 198 if 'l' in fallback(x):
199 199 return 'l'
200 200 if util.isexec(self._join(x)):
201 201 return 'x'
202 202 return ''
203 203 return f
204 204 else:
205 205 return fallback
206 206
207 207 @propertycache
208 208 def _cwd(self):
209 209 # internal config: ui.forcecwd
210 210 forcecwd = self._ui.config('ui', 'forcecwd')
211 211 if forcecwd:
212 212 return forcecwd
213 213 return pycompat.getcwd()
214 214
215 215 def getcwd(self):
216 216 '''Return the path from which a canonical path is calculated.
217 217
218 218 This path should be used to resolve file patterns or to convert
219 219 canonical paths back to file paths for display. It shouldn't be
220 220 used to get real file paths. Use vfs functions instead.
221 221 '''
222 222 cwd = self._cwd
223 223 if cwd == self._root:
224 224 return ''
225 225 # self._root ends with a path separator if self._root is '/' or 'C:\'
226 226 rootsep = self._root
227 227 if not util.endswithsep(rootsep):
228 228 rootsep += pycompat.ossep
229 229 if cwd.startswith(rootsep):
230 230 return cwd[len(rootsep):]
231 231 else:
232 232 # we're outside the repo. return an absolute path.
233 233 return cwd
234 234
235 235 def pathto(self, f, cwd=None):
236 236 if cwd is None:
237 237 cwd = self.getcwd()
238 238 path = util.pathto(self._root, cwd, f)
239 239 if self._slash:
240 240 return util.pconvert(path)
241 241 return path
242 242
243 243 def __getitem__(self, key):
244 244 '''Return the current state of key (a filename) in the dirstate.
245 245
246 246 States are:
247 247 n normal
248 248 m needs merging
249 249 r marked for removal
250 250 a marked for addition
251 251 ? not tracked
252 252 '''
253 253 return self._map.get(key, ("?",))[0]
254 254
255 255 def __contains__(self, key):
256 256 return key in self._map
257 257
258 258 def __iter__(self):
259 259 return iter(sorted(self._map))
260 260
261 261 def items(self):
262 262 return self._map.iteritems()
263 263
264 264 iteritems = items
265 265
266 266 def parents(self):
267 267 return [self._validate(p) for p in self._pl]
268 268
269 269 def p1(self):
270 270 return self._validate(self._pl[0])
271 271
272 272 def p2(self):
273 273 return self._validate(self._pl[1])
274 274
275 275 def branch(self):
276 276 return encoding.tolocal(self._branch)
277 277
278 278 def setparents(self, p1, p2=nullid):
279 279 """Set dirstate parents to p1 and p2.
280 280
281 281 When moving from two parents to one, 'm' merged entries a
282 282 adjusted to normal and previous copy records discarded and
283 283 returned by the call.
284 284
285 285 See localrepo.setparents()
286 286 """
287 287 if self._parentwriters == 0:
288 288 raise ValueError("cannot set dirstate parent without "
289 289 "calling dirstate.beginparentchange")
290 290
291 291 self._dirty = True
292 292 oldp2 = self._pl[1]
293 293 if self._origpl is None:
294 294 self._origpl = self._pl
295 295 self._map.setparents(p1, p2)
296 296 copies = {}
297 297 if oldp2 != nullid and p2 == nullid:
298 298 candidatefiles = self._map.nonnormalset.union(
299 299 self._map.otherparentset)
300 300 for f in candidatefiles:
301 301 s = self._map.get(f)
302 302 if s is None:
303 303 continue
304 304
305 305 # Discard 'm' markers when moving away from a merge state
306 306 if s[0] == 'm':
307 307 source = self._map.copymap.get(f)
308 308 if source:
309 309 copies[f] = source
310 310 self.normallookup(f)
311 311 # Also fix up otherparent markers
312 312 elif s[0] == 'n' and s[2] == -2:
313 313 source = self._map.copymap.get(f)
314 314 if source:
315 315 copies[f] = source
316 316 self.add(f)
317 317 return copies
318 318
319 319 def setbranch(self, branch):
320 320 self._branch = encoding.fromlocal(branch)
321 321 f = self._opener('branch', 'w', atomictemp=True, checkambig=True)
322 322 try:
323 323 f.write(self._branch + '\n')
324 324 f.close()
325 325
326 326 # make sure filecache has the correct stat info for _branch after
327 327 # replacing the underlying file
328 328 ce = self._filecache['_branch']
329 329 if ce:
330 330 ce.refresh()
331 331 except: # re-raises
332 332 f.discard()
333 333 raise
334 334
335 335 def invalidate(self):
336 336 '''Causes the next access to reread the dirstate.
337 337
338 338 This is different from localrepo.invalidatedirstate() because it always
339 339 rereads the dirstate. Use localrepo.invalidatedirstate() if you want to
340 340 check whether the dirstate has changed before rereading it.'''
341 341
342 342 for a in (r"_map", r"_branch", r"_ignore"):
343 343 if a in self.__dict__:
344 344 delattr(self, a)
345 345 self._lastnormaltime = 0
346 346 self._dirty = False
347 347 self._updatedfiles.clear()
348 348 self._parentwriters = 0
349 349 self._origpl = None
350 350
351 351 def copy(self, source, dest):
352 352 """Mark dest as a copy of source. Unmark dest if source is None."""
353 353 if source == dest:
354 354 return
355 355 self._dirty = True
356 356 if source is not None:
357 357 self._map.copymap[dest] = source
358 358 self._updatedfiles.add(source)
359 359 self._updatedfiles.add(dest)
360 360 elif self._map.copymap.pop(dest, None):
361 361 self._updatedfiles.add(dest)
362 362
363 363 def copied(self, file):
364 364 return self._map.copymap.get(file, None)
365 365
366 366 def copies(self):
367 367 return self._map.copymap
368 368
369 369 def _addpath(self, f, state, mode, size, mtime):
370 370 oldstate = self[f]
371 371 if state == 'a' or oldstate == 'r':
372 372 scmutil.checkfilename(f)
373 373 if self._map.hastrackeddir(f):
374 374 raise error.Abort(_('directory %r already in dirstate') %
375 375 pycompat.bytestr(f))
376 376 # shadows
377 377 for d in util.finddirs(f):
378 378 if self._map.hastrackeddir(d):
379 379 break
380 380 entry = self._map.get(d)
381 381 if entry is not None and entry[0] != 'r':
382 382 raise error.Abort(
383 383 _('file %r in dirstate clashes with %r') %
384 384 (pycompat.bytestr(d), pycompat.bytestr(f)))
385 385 self._dirty = True
386 386 self._updatedfiles.add(f)
387 387 self._map.addfile(f, oldstate, state, mode, size, mtime)
388 388
389 389 def normal(self, f):
390 390 '''Mark a file normal and clean.'''
391 391 s = os.lstat(self._join(f))
392 392 mtime = s[stat.ST_MTIME]
393 393 self._addpath(f, 'n', s.st_mode,
394 394 s.st_size & _rangemask, mtime & _rangemask)
395 395 self._map.copymap.pop(f, None)
396 396 if f in self._map.nonnormalset:
397 397 self._map.nonnormalset.remove(f)
398 398 if mtime > self._lastnormaltime:
399 399 # Remember the most recent modification timeslot for status(),
400 400 # to make sure we won't miss future size-preserving file content
401 401 # modifications that happen within the same timeslot.
402 402 self._lastnormaltime = mtime
403 403
404 404 def normallookup(self, f):
405 405 '''Mark a file normal, but possibly dirty.'''
406 406 if self._pl[1] != nullid:
407 407 # if there is a merge going on and the file was either
408 408 # in state 'm' (-1) or coming from other parent (-2) before
409 409 # being removed, restore that state.
410 410 entry = self._map.get(f)
411 411 if entry is not None:
412 412 if entry[0] == 'r' and entry[2] in (-1, -2):
413 413 source = self._map.copymap.get(f)
414 414 if entry[2] == -1:
415 415 self.merge(f)
416 416 elif entry[2] == -2:
417 417 self.otherparent(f)
418 418 if source:
419 419 self.copy(source, f)
420 420 return
421 421 if entry[0] == 'm' or entry[0] == 'n' and entry[2] == -2:
422 422 return
423 423 self._addpath(f, 'n', 0, -1, -1)
424 424 self._map.copymap.pop(f, None)
425 425
426 426 def otherparent(self, f):
427 427 '''Mark as coming from the other parent, always dirty.'''
428 428 if self._pl[1] == nullid:
429 429 raise error.Abort(_("setting %r to other parent "
430 430 "only allowed in merges") % f)
431 431 if f in self and self[f] == 'n':
432 432 # merge-like
433 433 self._addpath(f, 'm', 0, -2, -1)
434 434 else:
435 435 # add-like
436 436 self._addpath(f, 'n', 0, -2, -1)
437 437 self._map.copymap.pop(f, None)
438 438
439 439 def add(self, f):
440 440 '''Mark a file added.'''
441 441 self._addpath(f, 'a', 0, -1, -1)
442 442 self._map.copymap.pop(f, None)
443 443
444 444 def remove(self, f):
445 445 '''Mark a file removed.'''
446 446 self._dirty = True
447 447 oldstate = self[f]
448 448 size = 0
449 449 if self._pl[1] != nullid:
450 450 entry = self._map.get(f)
451 451 if entry is not None:
452 452 # backup the previous state
453 453 if entry[0] == 'm': # merge
454 454 size = -1
455 455 elif entry[0] == 'n' and entry[2] == -2: # other parent
456 456 size = -2
457 457 self._map.otherparentset.add(f)
458 458 self._updatedfiles.add(f)
459 459 self._map.removefile(f, oldstate, size)
460 460 if size == 0:
461 461 self._map.copymap.pop(f, None)
462 462
463 463 def merge(self, f):
464 464 '''Mark a file merged.'''
465 465 if self._pl[1] == nullid:
466 466 return self.normallookup(f)
467 467 return self.otherparent(f)
468 468
469 469 def drop(self, f):
470 470 '''Drop a file from the dirstate'''
471 471 oldstate = self[f]
472 472 if self._map.dropfile(f, oldstate):
473 473 self._dirty = True
474 474 self._updatedfiles.add(f)
475 475 self._map.copymap.pop(f, None)
476 476
477 477 def _discoverpath(self, path, normed, ignoremissing, exists, storemap):
478 478 if exists is None:
479 479 exists = os.path.lexists(os.path.join(self._root, path))
480 480 if not exists:
481 481 # Maybe a path component exists
482 482 if not ignoremissing and '/' in path:
483 483 d, f = path.rsplit('/', 1)
484 484 d = self._normalize(d, False, ignoremissing, None)
485 485 folded = d + "/" + f
486 486 else:
487 487 # No path components, preserve original case
488 488 folded = path
489 489 else:
490 490 # recursively normalize leading directory components
491 491 # against dirstate
492 492 if '/' in normed:
493 493 d, f = normed.rsplit('/', 1)
494 494 d = self._normalize(d, False, ignoremissing, True)
495 495 r = self._root + "/" + d
496 496 folded = d + "/" + util.fspath(f, r)
497 497 else:
498 498 folded = util.fspath(normed, self._root)
499 499 storemap[normed] = folded
500 500
501 501 return folded
502 502
503 503 def _normalizefile(self, path, isknown, ignoremissing=False, exists=None):
504 504 normed = util.normcase(path)
505 505 folded = self._map.filefoldmap.get(normed, None)
506 506 if folded is None:
507 507 if isknown:
508 508 folded = path
509 509 else:
510 510 folded = self._discoverpath(path, normed, ignoremissing, exists,
511 511 self._map.filefoldmap)
512 512 return folded
513 513
514 514 def _normalize(self, path, isknown, ignoremissing=False, exists=None):
515 515 normed = util.normcase(path)
516 516 folded = self._map.filefoldmap.get(normed, None)
517 517 if folded is None:
518 518 folded = self._map.dirfoldmap.get(normed, None)
519 519 if folded is None:
520 520 if isknown:
521 521 folded = path
522 522 else:
523 523 # store discovered result in dirfoldmap so that future
524 524 # normalizefile calls don't start matching directories
525 525 folded = self._discoverpath(path, normed, ignoremissing, exists,
526 526 self._map.dirfoldmap)
527 527 return folded
528 528
529 529 def normalize(self, path, isknown=False, ignoremissing=False):
530 530 '''
531 531 normalize the case of a pathname when on a casefolding filesystem
532 532
533 533 isknown specifies whether the filename came from walking the
534 534 disk, to avoid extra filesystem access.
535 535
536 536 If ignoremissing is True, missing path are returned
537 537 unchanged. Otherwise, we try harder to normalize possibly
538 538 existing path components.
539 539
540 540 The normalized case is determined based on the following precedence:
541 541
542 542 - version of name already stored in the dirstate
543 543 - version of name stored on disk
544 544 - version provided via command arguments
545 545 '''
546 546
547 547 if self._checkcase:
548 548 return self._normalize(path, isknown, ignoremissing)
549 549 return path
550 550
551 551 def clear(self):
552 552 self._map.clear()
553 553 self._lastnormaltime = 0
554 554 self._updatedfiles.clear()
555 555 self._dirty = True
556 556
557 557 def rebuild(self, parent, allfiles, changedfiles=None):
558 558 if changedfiles is None:
559 559 # Rebuild entire dirstate
560 560 changedfiles = allfiles
561 561 lastnormaltime = self._lastnormaltime
562 562 self.clear()
563 563 self._lastnormaltime = lastnormaltime
564 564
565 565 if self._origpl is None:
566 566 self._origpl = self._pl
567 567 self._map.setparents(parent, nullid)
568 568 for f in changedfiles:
569 569 if f in allfiles:
570 570 self.normallookup(f)
571 571 else:
572 572 self.drop(f)
573 573
574 574 self._dirty = True
575 575
576 576 def identity(self):
577 577 '''Return identity of dirstate itself to detect changing in storage
578 578
579 579 If identity of previous dirstate is equal to this, writing
580 580 changes based on the former dirstate out can keep consistency.
581 581 '''
582 582 return self._map.identity
583 583
584 584 def write(self, tr):
585 585 if not self._dirty:
586 586 return
587 587
588 588 filename = self._filename
589 589 if tr:
590 590 # 'dirstate.write()' is not only for writing in-memory
591 591 # changes out, but also for dropping ambiguous timestamp.
592 592 # delayed writing re-raise "ambiguous timestamp issue".
593 593 # See also the wiki page below for detail:
594 594 # https://www.mercurial-scm.org/wiki/DirstateTransactionPlan
595 595
596 596 # emulate dropping timestamp in 'parsers.pack_dirstate'
597 597 now = _getfsnow(self._opener)
598 598 self._map.clearambiguoustimes(self._updatedfiles, now)
599 599
600 600 # emulate that all 'dirstate.normal' results are written out
601 601 self._lastnormaltime = 0
602 602 self._updatedfiles.clear()
603 603
604 604 # delay writing in-memory changes out
605 605 tr.addfilegenerator('dirstate', (self._filename,),
606 606 self._writedirstate, location='plain')
607 607 return
608 608
609 609 st = self._opener(filename, "w", atomictemp=True, checkambig=True)
610 610 self._writedirstate(st)
611 611
612 612 def addparentchangecallback(self, category, callback):
613 613 """add a callback to be called when the wd parents are changed
614 614
615 615 Callback will be called with the following arguments:
616 616 dirstate, (oldp1, oldp2), (newp1, newp2)
617 617
618 618 Category is a unique identifier to allow overwriting an old callback
619 619 with a newer callback.
620 620 """
621 621 self._plchangecallbacks[category] = callback
622 622
623 623 def _writedirstate(self, st):
624 624 # notify callbacks about parents change
625 625 if self._origpl is not None and self._origpl != self._pl:
626 626 for c, callback in sorted(self._plchangecallbacks.iteritems()):
627 627 callback(self, self._origpl, self._pl)
628 628 self._origpl = None
629 629 # use the modification time of the newly created temporary file as the
630 630 # filesystem's notion of 'now'
631 631 now = util.fstat(st)[stat.ST_MTIME] & _rangemask
632 632
633 633 # enough 'delaywrite' prevents 'pack_dirstate' from dropping
634 634 # timestamp of each entries in dirstate, because of 'now > mtime'
635 635 delaywrite = self._ui.configint('debug', 'dirstate.delaywrite')
636 636 if delaywrite > 0:
637 637 # do we have any files to delay for?
638 638 for f, e in self._map.iteritems():
639 639 if e[0] == 'n' and e[3] == now:
640 640 import time # to avoid useless import
641 641 # rather than sleep n seconds, sleep until the next
642 642 # multiple of n seconds
643 643 clock = time.time()
644 644 start = int(clock) - (int(clock) % delaywrite)
645 645 end = start + delaywrite
646 646 time.sleep(end - clock)
647 647 now = end # trust our estimate that the end is near now
648 648 break
649 649
650 650 self._map.write(st, now)
651 651 self._lastnormaltime = 0
652 652 self._dirty = False
653 653
654 654 def _dirignore(self, f):
655 655 if f == '.':
656 656 return False
657 657 if self._ignore(f):
658 658 return True
659 659 for p in util.finddirs(f):
660 660 if self._ignore(p):
661 661 return True
662 662 return False
663 663
664 664 def _ignorefiles(self):
665 665 files = []
666 666 if os.path.exists(self._join('.hgignore')):
667 667 files.append(self._join('.hgignore'))
668 668 for name, path in self._ui.configitems("ui"):
669 669 if name == 'ignore' or name.startswith('ignore.'):
670 670 # we need to use os.path.join here rather than self._join
671 671 # because path is arbitrary and user-specified
672 672 files.append(os.path.join(self._rootdir, util.expandpath(path)))
673 673 return files
674 674
675 675 def _ignorefileandline(self, f):
676 676 files = collections.deque(self._ignorefiles())
677 677 visited = set()
678 678 while files:
679 679 i = files.popleft()
680 680 patterns = matchmod.readpatternfile(i, self._ui.warn,
681 681 sourceinfo=True)
682 682 for pattern, lineno, line in patterns:
683 683 kind, p = matchmod._patsplit(pattern, 'glob')
684 684 if kind == "subinclude":
685 685 if p not in visited:
686 686 files.append(p)
687 687 continue
688 688 m = matchmod.match(self._root, '', [], [pattern],
689 689 warn=self._ui.warn)
690 690 if m(f):
691 691 return (i, lineno, line)
692 692 visited.add(i)
693 693 return (None, -1, "")
694 694
695 695 def _walkexplicit(self, match, subrepos):
696 696 '''Get stat data about the files explicitly specified by match.
697 697
698 698 Return a triple (results, dirsfound, dirsnotfound).
699 699 - results is a mapping from filename to stat result. It also contains
700 700 listings mapping subrepos and .hg to None.
701 701 - dirsfound is a list of files found to be directories.
702 702 - dirsnotfound is a list of files that the dirstate thinks are
703 703 directories and that were not found.'''
704 704
705 705 def badtype(mode):
706 706 kind = _('unknown')
707 707 if stat.S_ISCHR(mode):
708 708 kind = _('character device')
709 709 elif stat.S_ISBLK(mode):
710 710 kind = _('block device')
711 711 elif stat.S_ISFIFO(mode):
712 712 kind = _('fifo')
713 713 elif stat.S_ISSOCK(mode):
714 714 kind = _('socket')
715 715 elif stat.S_ISDIR(mode):
716 716 kind = _('directory')
717 717 return _('unsupported file type (type is %s)') % kind
718 718
719 719 matchedir = match.explicitdir
720 720 badfn = match.bad
721 721 dmap = self._map
722 722 lstat = os.lstat
723 723 getkind = stat.S_IFMT
724 724 dirkind = stat.S_IFDIR
725 725 regkind = stat.S_IFREG
726 726 lnkkind = stat.S_IFLNK
727 727 join = self._join
728 728 dirsfound = []
729 729 foundadd = dirsfound.append
730 730 dirsnotfound = []
731 731 notfoundadd = dirsnotfound.append
732 732
733 733 if not match.isexact() and self._checkcase:
734 734 normalize = self._normalize
735 735 else:
736 736 normalize = None
737 737
738 738 files = sorted(match.files())
739 739 subrepos.sort()
740 740 i, j = 0, 0
741 741 while i < len(files) and j < len(subrepos):
742 742 subpath = subrepos[j] + "/"
743 743 if files[i] < subpath:
744 744 i += 1
745 745 continue
746 746 while i < len(files) and files[i].startswith(subpath):
747 747 del files[i]
748 748 j += 1
749 749
750 750 if not files or '.' in files:
751 751 files = ['.']
752 752 results = dict.fromkeys(subrepos)
753 753 results['.hg'] = None
754 754
755 755 for ff in files:
756 756 # constructing the foldmap is expensive, so don't do it for the
757 757 # common case where files is ['.']
758 758 if normalize and ff != '.':
759 759 nf = normalize(ff, False, True)
760 760 else:
761 761 nf = ff
762 762 if nf in results:
763 763 continue
764 764
765 765 try:
766 766 st = lstat(join(nf))
767 767 kind = getkind(st.st_mode)
768 768 if kind == dirkind:
769 769 if nf in dmap:
770 770 # file replaced by dir on disk but still in dirstate
771 771 results[nf] = None
772 772 if matchedir:
773 773 matchedir(nf)
774 774 foundadd((nf, ff))
775 775 elif kind == regkind or kind == lnkkind:
776 776 results[nf] = st
777 777 else:
778 778 badfn(ff, badtype(kind))
779 779 if nf in dmap:
780 780 results[nf] = None
781 781 except OSError as inst: # nf not found on disk - it is dirstate only
782 782 if nf in dmap: # does it exactly match a missing file?
783 783 results[nf] = None
784 784 else: # does it match a missing directory?
785 785 if self._map.hasdir(nf):
786 786 if matchedir:
787 787 matchedir(nf)
788 788 notfoundadd(nf)
789 789 else:
790 790 badfn(ff, encoding.strtolocal(inst.strerror))
791 791
792 792 # match.files() may contain explicitly-specified paths that shouldn't
793 793 # be taken; drop them from the list of files found. dirsfound/notfound
794 794 # aren't filtered here because they will be tested later.
795 795 if match.anypats():
796 796 for f in list(results):
797 797 if f == '.hg' or f in subrepos:
798 798 # keep sentinel to disable further out-of-repo walks
799 799 continue
800 800 if not match(f):
801 801 del results[f]
802 802
803 803 # Case insensitive filesystems cannot rely on lstat() failing to detect
804 804 # a case-only rename. Prune the stat object for any file that does not
805 805 # match the case in the filesystem, if there are multiple files that
806 806 # normalize to the same path.
807 807 if match.isexact() and self._checkcase:
808 808 normed = {}
809 809
810 810 for f, st in results.iteritems():
811 811 if st is None:
812 812 continue
813 813
814 814 nc = util.normcase(f)
815 815 paths = normed.get(nc)
816 816
817 817 if paths is None:
818 818 paths = set()
819 819 normed[nc] = paths
820 820
821 821 paths.add(f)
822 822
823 823 for norm, paths in normed.iteritems():
824 824 if len(paths) > 1:
825 825 for path in paths:
826 826 folded = self._discoverpath(path, norm, True, None,
827 827 self._map.dirfoldmap)
828 828 if path != folded:
829 829 results[path] = None
830 830
831 831 return results, dirsfound, dirsnotfound
832 832
833 833 def walk(self, match, subrepos, unknown, ignored, full=True):
834 834 '''
835 835 Walk recursively through the directory tree, finding all files
836 836 matched by match.
837 837
838 838 If full is False, maybe skip some known-clean files.
839 839
840 840 Return a dict mapping filename to stat-like object (either
841 841 mercurial.osutil.stat instance or return value of os.stat()).
842 842
843 843 '''
844 844 # full is a flag that extensions that hook into walk can use -- this
845 845 # implementation doesn't use it at all. This satisfies the contract
846 846 # because we only guarantee a "maybe".
847 847
848 848 if ignored:
849 849 ignore = util.never
850 850 dirignore = util.never
851 851 elif unknown:
852 852 ignore = self._ignore
853 853 dirignore = self._dirignore
854 854 else:
855 855 # if not unknown and not ignored, drop dir recursion and step 2
856 856 ignore = util.always
857 857 dirignore = util.always
858 858
859 859 matchfn = match.matchfn
860 860 matchalways = match.always()
861 861 matchtdir = match.traversedir
862 862 dmap = self._map
863 863 listdir = util.listdir
864 864 lstat = os.lstat
865 865 dirkind = stat.S_IFDIR
866 866 regkind = stat.S_IFREG
867 867 lnkkind = stat.S_IFLNK
868 868 join = self._join
869 869
870 870 exact = skipstep3 = False
871 871 if match.isexact(): # match.exact
872 872 exact = True
873 873 dirignore = util.always # skip step 2
874 874 elif match.prefix(): # match.match, no patterns
875 875 skipstep3 = True
876 876
877 877 if not exact and self._checkcase:
878 878 normalize = self._normalize
879 879 normalizefile = self._normalizefile
880 880 skipstep3 = False
881 881 else:
882 882 normalize = self._normalize
883 883 normalizefile = None
884 884
885 885 # step 1: find all explicit files
886 886 results, work, dirsnotfound = self._walkexplicit(match, subrepos)
887 887
888 888 skipstep3 = skipstep3 and not (work or dirsnotfound)
889 889 work = [d for d in work if not dirignore(d[0])]
890 890
891 891 # step 2: visit subdirectories
892 892 def traverse(work, alreadynormed):
893 893 wadd = work.append
894 894 while work:
895 895 nd = work.pop()
896 896 visitentries = match.visitchildrenset(nd)
897 897 if not visitentries:
898 898 continue
899 899 if visitentries == 'this' or visitentries == 'all':
900 900 visitentries = None
901 901 skip = None
902 902 if nd == '.':
903 903 nd = ''
904 904 else:
905 905 skip = '.hg'
906 906 try:
907 907 entries = listdir(join(nd), stat=True, skip=skip)
908 908 except OSError as inst:
909 909 if inst.errno in (errno.EACCES, errno.ENOENT):
910 910 match.bad(self.pathto(nd),
911 911 encoding.strtolocal(inst.strerror))
912 912 continue
913 913 raise
914 914 for f, kind, st in entries:
915 # If we needed to inspect any files, visitentries would have
916 # been 'this' or 'all', and we would have set it to None
917 # above. If we have visitentries populated here, we don't
918 # care about any files in this directory, so no need to
919 # check the type of `f`.
915 # Some matchers may return files in the visitentries set,
916 # instead of 'this', if the matcher explicitly mentions them
917 # and is not an exactmatcher. This is acceptable; we do not
918 # make any hard assumptions about file-or-directory below
919 # based on the presence of `f` in visitentries. If
920 # visitchildrenset returned a set, we can always skip the
921 # entries *not* in the set it provided regardless of whether
922 # they're actually a file or a directory.
920 923 if visitentries and f not in visitentries:
921 924 continue
922 925 if normalizefile:
923 926 # even though f might be a directory, we're only
924 927 # interested in comparing it to files currently in the
925 928 # dmap -- therefore normalizefile is enough
926 929 nf = normalizefile(nd and (nd + "/" + f) or f, True,
927 930 True)
928 931 else:
929 932 nf = nd and (nd + "/" + f) or f
930 933 if nf not in results:
931 934 if kind == dirkind:
932 935 if not ignore(nf):
933 936 if matchtdir:
934 937 matchtdir(nf)
935 938 wadd(nf)
936 939 if nf in dmap and (matchalways or matchfn(nf)):
937 940 results[nf] = None
938 941 elif kind == regkind or kind == lnkkind:
939 942 if nf in dmap:
940 943 if matchalways or matchfn(nf):
941 944 results[nf] = st
942 945 elif ((matchalways or matchfn(nf))
943 946 and not ignore(nf)):
944 947 # unknown file -- normalize if necessary
945 948 if not alreadynormed:
946 949 nf = normalize(nf, False, True)
947 950 results[nf] = st
948 951 elif nf in dmap and (matchalways or matchfn(nf)):
949 952 results[nf] = None
950 953
951 954 for nd, d in work:
952 955 # alreadynormed means that processwork doesn't have to do any
953 956 # expensive directory normalization
954 957 alreadynormed = not normalize or nd == d
955 958 traverse([d], alreadynormed)
956 959
957 960 for s in subrepos:
958 961 del results[s]
959 962 del results['.hg']
960 963
961 964 # step 3: visit remaining files from dmap
962 965 if not skipstep3 and not exact:
963 966 # If a dmap file is not in results yet, it was either
964 967 # a) not matching matchfn b) ignored, c) missing, or d) under a
965 968 # symlink directory.
966 969 if not results and matchalways:
967 970 visit = [f for f in dmap]
968 971 else:
969 972 visit = [f for f in dmap if f not in results and matchfn(f)]
970 973 visit.sort()
971 974
972 975 if unknown:
973 976 # unknown == True means we walked all dirs under the roots
974 977 # that wasn't ignored, and everything that matched was stat'ed
975 978 # and is already in results.
976 979 # The rest must thus be ignored or under a symlink.
977 980 audit_path = pathutil.pathauditor(self._root, cached=True)
978 981
979 982 for nf in iter(visit):
980 983 # If a stat for the same file was already added with a
981 984 # different case, don't add one for this, since that would
982 985 # make it appear as if the file exists under both names
983 986 # on disk.
984 987 if (normalizefile and
985 988 normalizefile(nf, True, True) in results):
986 989 results[nf] = None
987 990 # Report ignored items in the dmap as long as they are not
988 991 # under a symlink directory.
989 992 elif audit_path.check(nf):
990 993 try:
991 994 results[nf] = lstat(join(nf))
992 995 # file was just ignored, no links, and exists
993 996 except OSError:
994 997 # file doesn't exist
995 998 results[nf] = None
996 999 else:
997 1000 # It's either missing or under a symlink directory
998 1001 # which we in this case report as missing
999 1002 results[nf] = None
1000 1003 else:
1001 1004 # We may not have walked the full directory tree above,
1002 1005 # so stat and check everything we missed.
1003 1006 iv = iter(visit)
1004 1007 for st in util.statfiles([join(i) for i in visit]):
1005 1008 results[next(iv)] = st
1006 1009 return results
1007 1010
1008 1011 def status(self, match, subrepos, ignored, clean, unknown):
1009 1012 '''Determine the status of the working copy relative to the
1010 1013 dirstate and return a pair of (unsure, status), where status is of type
1011 1014 scmutil.status and:
1012 1015
1013 1016 unsure:
1014 1017 files that might have been modified since the dirstate was
1015 1018 written, but need to be read to be sure (size is the same
1016 1019 but mtime differs)
1017 1020 status.modified:
1018 1021 files that have definitely been modified since the dirstate
1019 1022 was written (different size or mode)
1020 1023 status.clean:
1021 1024 files that have definitely not been modified since the
1022 1025 dirstate was written
1023 1026 '''
1024 1027 listignored, listclean, listunknown = ignored, clean, unknown
1025 1028 lookup, modified, added, unknown, ignored = [], [], [], [], []
1026 1029 removed, deleted, clean = [], [], []
1027 1030
1028 1031 dmap = self._map
1029 1032 dmap.preload()
1030 1033 dcontains = dmap.__contains__
1031 1034 dget = dmap.__getitem__
1032 1035 ladd = lookup.append # aka "unsure"
1033 1036 madd = modified.append
1034 1037 aadd = added.append
1035 1038 uadd = unknown.append
1036 1039 iadd = ignored.append
1037 1040 radd = removed.append
1038 1041 dadd = deleted.append
1039 1042 cadd = clean.append
1040 1043 mexact = match.exact
1041 1044 dirignore = self._dirignore
1042 1045 checkexec = self._checkexec
1043 1046 copymap = self._map.copymap
1044 1047 lastnormaltime = self._lastnormaltime
1045 1048
1046 1049 # We need to do full walks when either
1047 1050 # - we're listing all clean files, or
1048 1051 # - match.traversedir does something, because match.traversedir should
1049 1052 # be called for every dir in the working dir
1050 1053 full = listclean or match.traversedir is not None
1051 1054 for fn, st in self.walk(match, subrepos, listunknown, listignored,
1052 1055 full=full).iteritems():
1053 1056 if not dcontains(fn):
1054 1057 if (listignored or mexact(fn)) and dirignore(fn):
1055 1058 if listignored:
1056 1059 iadd(fn)
1057 1060 else:
1058 1061 uadd(fn)
1059 1062 continue
1060 1063
1061 1064 # This is equivalent to 'state, mode, size, time = dmap[fn]' but not
1062 1065 # written like that for performance reasons. dmap[fn] is not a
1063 1066 # Python tuple in compiled builds. The CPython UNPACK_SEQUENCE
1064 1067 # opcode has fast paths when the value to be unpacked is a tuple or
1065 1068 # a list, but falls back to creating a full-fledged iterator in
1066 1069 # general. That is much slower than simply accessing and storing the
1067 1070 # tuple members one by one.
1068 1071 t = dget(fn)
1069 1072 state = t[0]
1070 1073 mode = t[1]
1071 1074 size = t[2]
1072 1075 time = t[3]
1073 1076
1074 1077 if not st and state in "nma":
1075 1078 dadd(fn)
1076 1079 elif state == 'n':
1077 1080 if (size >= 0 and
1078 1081 ((size != st.st_size and size != st.st_size & _rangemask)
1079 1082 or ((mode ^ st.st_mode) & 0o100 and checkexec))
1080 1083 or size == -2 # other parent
1081 1084 or fn in copymap):
1082 1085 madd(fn)
1083 1086 elif (time != st[stat.ST_MTIME]
1084 1087 and time != st[stat.ST_MTIME] & _rangemask):
1085 1088 ladd(fn)
1086 1089 elif st[stat.ST_MTIME] == lastnormaltime:
1087 1090 # fn may have just been marked as normal and it may have
1088 1091 # changed in the same second without changing its size.
1089 1092 # This can happen if we quickly do multiple commits.
1090 1093 # Force lookup, so we don't miss such a racy file change.
1091 1094 ladd(fn)
1092 1095 elif listclean:
1093 1096 cadd(fn)
1094 1097 elif state == 'm':
1095 1098 madd(fn)
1096 1099 elif state == 'a':
1097 1100 aadd(fn)
1098 1101 elif state == 'r':
1099 1102 radd(fn)
1100 1103
1101 1104 return (lookup, scmutil.status(modified, added, removed, deleted,
1102 1105 unknown, ignored, clean))
1103 1106
1104 1107 def matches(self, match):
1105 1108 '''
1106 1109 return files in the dirstate (in whatever state) filtered by match
1107 1110 '''
1108 1111 dmap = self._map
1109 1112 if match.always():
1110 1113 return dmap.keys()
1111 1114 files = match.files()
1112 1115 if match.isexact():
1113 1116 # fast path -- filter the other way around, since typically files is
1114 1117 # much smaller than dmap
1115 1118 return [f for f in files if f in dmap]
1116 1119 if match.prefix() and all(fn in dmap for fn in files):
1117 1120 # fast path -- all the values are known to be files, so just return
1118 1121 # that
1119 1122 return list(files)
1120 1123 return [f for f in dmap if match(f)]
1121 1124
1122 1125 def _actualfilename(self, tr):
1123 1126 if tr:
1124 1127 return self._pendingfilename
1125 1128 else:
1126 1129 return self._filename
1127 1130
1128 1131 def savebackup(self, tr, backupname):
1129 1132 '''Save current dirstate into backup file'''
1130 1133 filename = self._actualfilename(tr)
1131 1134 assert backupname != filename
1132 1135
1133 1136 # use '_writedirstate' instead of 'write' to write changes certainly,
1134 1137 # because the latter omits writing out if transaction is running.
1135 1138 # output file will be used to create backup of dirstate at this point.
1136 1139 if self._dirty or not self._opener.exists(filename):
1137 1140 self._writedirstate(self._opener(filename, "w", atomictemp=True,
1138 1141 checkambig=True))
1139 1142
1140 1143 if tr:
1141 1144 # ensure that subsequent tr.writepending returns True for
1142 1145 # changes written out above, even if dirstate is never
1143 1146 # changed after this
1144 1147 tr.addfilegenerator('dirstate', (self._filename,),
1145 1148 self._writedirstate, location='plain')
1146 1149
1147 1150 # ensure that pending file written above is unlinked at
1148 1151 # failure, even if tr.writepending isn't invoked until the
1149 1152 # end of this transaction
1150 1153 tr.registertmp(filename, location='plain')
1151 1154
1152 1155 self._opener.tryunlink(backupname)
1153 1156 # hardlink backup is okay because _writedirstate is always called
1154 1157 # with an "atomictemp=True" file.
1155 1158 util.copyfile(self._opener.join(filename),
1156 1159 self._opener.join(backupname), hardlink=True)
1157 1160
1158 1161 def restorebackup(self, tr, backupname):
1159 1162 '''Restore dirstate by backup file'''
1160 1163 # this "invalidate()" prevents "wlock.release()" from writing
1161 1164 # changes of dirstate out after restoring from backup file
1162 1165 self.invalidate()
1163 1166 filename = self._actualfilename(tr)
1164 1167 o = self._opener
1165 1168 if util.samefile(o.join(backupname), o.join(filename)):
1166 1169 o.unlink(backupname)
1167 1170 else:
1168 1171 o.rename(backupname, filename, checkambig=True)
1169 1172
1170 1173 def clearbackup(self, tr, backupname):
1171 1174 '''Clear backup file'''
1172 1175 self._opener.unlink(backupname)
1173 1176
1174 1177 class dirstatemap(object):
1175 1178 """Map encapsulating the dirstate's contents.
1176 1179
1177 1180 The dirstate contains the following state:
1178 1181
1179 1182 - `identity` is the identity of the dirstate file, which can be used to
1180 1183 detect when changes have occurred to the dirstate file.
1181 1184
1182 1185 - `parents` is a pair containing the parents of the working copy. The
1183 1186 parents are updated by calling `setparents`.
1184 1187
1185 1188 - the state map maps filenames to tuples of (state, mode, size, mtime),
1186 1189 where state is a single character representing 'normal', 'added',
1187 1190 'removed', or 'merged'. It is read by treating the dirstate as a
1188 1191 dict. File state is updated by calling the `addfile`, `removefile` and
1189 1192 `dropfile` methods.
1190 1193
1191 1194 - `copymap` maps destination filenames to their source filename.
1192 1195
1193 1196 The dirstate also provides the following views onto the state:
1194 1197
1195 1198 - `nonnormalset` is a set of the filenames that have state other
1196 1199 than 'normal', or are normal but have an mtime of -1 ('normallookup').
1197 1200
1198 1201 - `otherparentset` is a set of the filenames that are marked as coming
1199 1202 from the second parent when the dirstate is currently being merged.
1200 1203
1201 1204 - `filefoldmap` is a dict mapping normalized filenames to the denormalized
1202 1205 form that they appear as in the dirstate.
1203 1206
1204 1207 - `dirfoldmap` is a dict mapping normalized directory names to the
1205 1208 denormalized form that they appear as in the dirstate.
1206 1209 """
1207 1210
1208 1211 def __init__(self, ui, opener, root):
1209 1212 self._ui = ui
1210 1213 self._opener = opener
1211 1214 self._root = root
1212 1215 self._filename = 'dirstate'
1213 1216
1214 1217 self._parents = None
1215 1218 self._dirtyparents = False
1216 1219
1217 1220 # for consistent view between _pl() and _read() invocations
1218 1221 self._pendingmode = None
1219 1222
1220 1223 @propertycache
1221 1224 def _map(self):
1222 1225 self._map = {}
1223 1226 self.read()
1224 1227 return self._map
1225 1228
1226 1229 @propertycache
1227 1230 def copymap(self):
1228 1231 self.copymap = {}
1229 1232 self._map
1230 1233 return self.copymap
1231 1234
1232 1235 def clear(self):
1233 1236 self._map.clear()
1234 1237 self.copymap.clear()
1235 1238 self.setparents(nullid, nullid)
1236 1239 util.clearcachedproperty(self, "_dirs")
1237 1240 util.clearcachedproperty(self, "_alldirs")
1238 1241 util.clearcachedproperty(self, "filefoldmap")
1239 1242 util.clearcachedproperty(self, "dirfoldmap")
1240 1243 util.clearcachedproperty(self, "nonnormalset")
1241 1244 util.clearcachedproperty(self, "otherparentset")
1242 1245
1243 1246 def items(self):
1244 1247 return self._map.iteritems()
1245 1248
1246 1249 # forward for python2,3 compat
1247 1250 iteritems = items
1248 1251
1249 1252 def __len__(self):
1250 1253 return len(self._map)
1251 1254
1252 1255 def __iter__(self):
1253 1256 return iter(self._map)
1254 1257
1255 1258 def get(self, key, default=None):
1256 1259 return self._map.get(key, default)
1257 1260
1258 1261 def __contains__(self, key):
1259 1262 return key in self._map
1260 1263
1261 1264 def __getitem__(self, key):
1262 1265 return self._map[key]
1263 1266
1264 1267 def keys(self):
1265 1268 return self._map.keys()
1266 1269
1267 1270 def preload(self):
1268 1271 """Loads the underlying data, if it's not already loaded"""
1269 1272 self._map
1270 1273
1271 1274 def addfile(self, f, oldstate, state, mode, size, mtime):
1272 1275 """Add a tracked file to the dirstate."""
1273 1276 if oldstate in "?r" and r"_dirs" in self.__dict__:
1274 1277 self._dirs.addpath(f)
1275 1278 if oldstate == "?" and r"_alldirs" in self.__dict__:
1276 1279 self._alldirs.addpath(f)
1277 1280 self._map[f] = dirstatetuple(state, mode, size, mtime)
1278 1281 if state != 'n' or mtime == -1:
1279 1282 self.nonnormalset.add(f)
1280 1283 if size == -2:
1281 1284 self.otherparentset.add(f)
1282 1285
1283 1286 def removefile(self, f, oldstate, size):
1284 1287 """
1285 1288 Mark a file as removed in the dirstate.
1286 1289
1287 1290 The `size` parameter is used to store sentinel values that indicate
1288 1291 the file's previous state. In the future, we should refactor this
1289 1292 to be more explicit about what that state is.
1290 1293 """
1291 1294 if oldstate not in "?r" and r"_dirs" in self.__dict__:
1292 1295 self._dirs.delpath(f)
1293 1296 if oldstate == "?" and r"_alldirs" in self.__dict__:
1294 1297 self._alldirs.addpath(f)
1295 1298 if r"filefoldmap" in self.__dict__:
1296 1299 normed = util.normcase(f)
1297 1300 self.filefoldmap.pop(normed, None)
1298 1301 self._map[f] = dirstatetuple('r', 0, size, 0)
1299 1302 self.nonnormalset.add(f)
1300 1303
1301 1304 def dropfile(self, f, oldstate):
1302 1305 """
1303 1306 Remove a file from the dirstate. Returns True if the file was
1304 1307 previously recorded.
1305 1308 """
1306 1309 exists = self._map.pop(f, None) is not None
1307 1310 if exists:
1308 1311 if oldstate != "r" and r"_dirs" in self.__dict__:
1309 1312 self._dirs.delpath(f)
1310 1313 if r"_alldirs" in self.__dict__:
1311 1314 self._alldirs.delpath(f)
1312 1315 if r"filefoldmap" in self.__dict__:
1313 1316 normed = util.normcase(f)
1314 1317 self.filefoldmap.pop(normed, None)
1315 1318 self.nonnormalset.discard(f)
1316 1319 return exists
1317 1320
1318 1321 def clearambiguoustimes(self, files, now):
1319 1322 for f in files:
1320 1323 e = self.get(f)
1321 1324 if e is not None and e[0] == 'n' and e[3] == now:
1322 1325 self._map[f] = dirstatetuple(e[0], e[1], e[2], -1)
1323 1326 self.nonnormalset.add(f)
1324 1327
1325 1328 def nonnormalentries(self):
1326 1329 '''Compute the nonnormal dirstate entries from the dmap'''
1327 1330 try:
1328 1331 return parsers.nonnormalotherparententries(self._map)
1329 1332 except AttributeError:
1330 1333 nonnorm = set()
1331 1334 otherparent = set()
1332 1335 for fname, e in self._map.iteritems():
1333 1336 if e[0] != 'n' or e[3] == -1:
1334 1337 nonnorm.add(fname)
1335 1338 if e[0] == 'n' and e[2] == -2:
1336 1339 otherparent.add(fname)
1337 1340 return nonnorm, otherparent
1338 1341
1339 1342 @propertycache
1340 1343 def filefoldmap(self):
1341 1344 """Returns a dictionary mapping normalized case paths to their
1342 1345 non-normalized versions.
1343 1346 """
1344 1347 try:
1345 1348 makefilefoldmap = parsers.make_file_foldmap
1346 1349 except AttributeError:
1347 1350 pass
1348 1351 else:
1349 1352 return makefilefoldmap(self._map, util.normcasespec,
1350 1353 util.normcasefallback)
1351 1354
1352 1355 f = {}
1353 1356 normcase = util.normcase
1354 1357 for name, s in self._map.iteritems():
1355 1358 if s[0] != 'r':
1356 1359 f[normcase(name)] = name
1357 1360 f['.'] = '.' # prevents useless util.fspath() invocation
1358 1361 return f
1359 1362
1360 1363 def hastrackeddir(self, d):
1361 1364 """
1362 1365 Returns True if the dirstate contains a tracked (not removed) file
1363 1366 in this directory.
1364 1367 """
1365 1368 return d in self._dirs
1366 1369
1367 1370 def hasdir(self, d):
1368 1371 """
1369 1372 Returns True if the dirstate contains a file (tracked or removed)
1370 1373 in this directory.
1371 1374 """
1372 1375 return d in self._alldirs
1373 1376
1374 1377 @propertycache
1375 1378 def _dirs(self):
1376 1379 return util.dirs(self._map, 'r')
1377 1380
1378 1381 @propertycache
1379 1382 def _alldirs(self):
1380 1383 return util.dirs(self._map)
1381 1384
1382 1385 def _opendirstatefile(self):
1383 1386 fp, mode = txnutil.trypending(self._root, self._opener, self._filename)
1384 1387 if self._pendingmode is not None and self._pendingmode != mode:
1385 1388 fp.close()
1386 1389 raise error.Abort(_('working directory state may be '
1387 1390 'changed parallelly'))
1388 1391 self._pendingmode = mode
1389 1392 return fp
1390 1393
1391 1394 def parents(self):
1392 1395 if not self._parents:
1393 1396 try:
1394 1397 fp = self._opendirstatefile()
1395 1398 st = fp.read(40)
1396 1399 fp.close()
1397 1400 except IOError as err:
1398 1401 if err.errno != errno.ENOENT:
1399 1402 raise
1400 1403 # File doesn't exist, so the current state is empty
1401 1404 st = ''
1402 1405
1403 1406 l = len(st)
1404 1407 if l == 40:
1405 1408 self._parents = st[:20], st[20:40]
1406 1409 elif l == 0:
1407 1410 self._parents = [nullid, nullid]
1408 1411 else:
1409 1412 raise error.Abort(_('working directory state appears '
1410 1413 'damaged!'))
1411 1414
1412 1415 return self._parents
1413 1416
1414 1417 def setparents(self, p1, p2):
1415 1418 self._parents = (p1, p2)
1416 1419 self._dirtyparents = True
1417 1420
1418 1421 def read(self):
1419 1422 # ignore HG_PENDING because identity is used only for writing
1420 1423 self.identity = util.filestat.frompath(
1421 1424 self._opener.join(self._filename))
1422 1425
1423 1426 try:
1424 1427 fp = self._opendirstatefile()
1425 1428 try:
1426 1429 st = fp.read()
1427 1430 finally:
1428 1431 fp.close()
1429 1432 except IOError as err:
1430 1433 if err.errno != errno.ENOENT:
1431 1434 raise
1432 1435 return
1433 1436 if not st:
1434 1437 return
1435 1438
1436 1439 if util.safehasattr(parsers, 'dict_new_presized'):
1437 1440 # Make an estimate of the number of files in the dirstate based on
1438 1441 # its size. From a linear regression on a set of real-world repos,
1439 1442 # all over 10,000 files, the size of a dirstate entry is 85
1440 1443 # bytes. The cost of resizing is significantly higher than the cost
1441 1444 # of filling in a larger presized dict, so subtract 20% from the
1442 1445 # size.
1443 1446 #
1444 1447 # This heuristic is imperfect in many ways, so in a future dirstate
1445 1448 # format update it makes sense to just record the number of entries
1446 1449 # on write.
1447 1450 self._map = parsers.dict_new_presized(len(st) // 71)
1448 1451
1449 1452 # Python's garbage collector triggers a GC each time a certain number
1450 1453 # of container objects (the number being defined by
1451 1454 # gc.get_threshold()) are allocated. parse_dirstate creates a tuple
1452 1455 # for each file in the dirstate. The C version then immediately marks
1453 1456 # them as not to be tracked by the collector. However, this has no
1454 1457 # effect on when GCs are triggered, only on what objects the GC looks
1455 1458 # into. This means that O(number of files) GCs are unavoidable.
1456 1459 # Depending on when in the process's lifetime the dirstate is parsed,
1457 1460 # this can get very expensive. As a workaround, disable GC while
1458 1461 # parsing the dirstate.
1459 1462 #
1460 1463 # (we cannot decorate the function directly since it is in a C module)
1461 1464 parse_dirstate = util.nogc(parsers.parse_dirstate)
1462 1465 p = parse_dirstate(self._map, self.copymap, st)
1463 1466 if not self._dirtyparents:
1464 1467 self.setparents(*p)
1465 1468
1466 1469 # Avoid excess attribute lookups by fast pathing certain checks
1467 1470 self.__contains__ = self._map.__contains__
1468 1471 self.__getitem__ = self._map.__getitem__
1469 1472 self.get = self._map.get
1470 1473
1471 1474 def write(self, st, now):
1472 1475 st.write(parsers.pack_dirstate(self._map, self.copymap,
1473 1476 self.parents(), now))
1474 1477 st.close()
1475 1478 self._dirtyparents = False
1476 1479 self.nonnormalset, self.otherparentset = self.nonnormalentries()
1477 1480
1478 1481 @propertycache
1479 1482 def nonnormalset(self):
1480 1483 nonnorm, otherparents = self.nonnormalentries()
1481 1484 self.otherparentset = otherparents
1482 1485 return nonnorm
1483 1486
1484 1487 @propertycache
1485 1488 def otherparentset(self):
1486 1489 nonnorm, otherparents = self.nonnormalentries()
1487 1490 self.nonnormalset = nonnorm
1488 1491 return otherparents
1489 1492
1490 1493 @propertycache
1491 1494 def identity(self):
1492 1495 self._map
1493 1496 return self.identity
1494 1497
1495 1498 @propertycache
1496 1499 def dirfoldmap(self):
1497 1500 f = {}
1498 1501 normcase = util.normcase
1499 1502 for name in self._dirs:
1500 1503 f[normcase(name)] = name
1501 1504 return f
@@ -1,1321 +1,1332 b''
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import, print_function
9 9
10 10 import copy
11 11 import itertools
12 12 import os
13 13 import re
14 14
15 15 from .i18n import _
16 16 from . import (
17 17 encoding,
18 18 error,
19 19 pathutil,
20 20 pycompat,
21 21 util,
22 22 )
23 23 from .utils import (
24 24 stringutil,
25 25 )
26 26
27 27 allpatternkinds = ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
28 28 'listfile', 'listfile0', 'set', 'include', 'subinclude',
29 29 'rootfilesin')
30 30 cwdrelativepatternkinds = ('relpath', 'glob')
31 31
32 32 propertycache = util.propertycache
33 33
34 34 def _rematcher(regex):
35 35 '''compile the regexp with the best available regexp engine and return a
36 36 matcher function'''
37 37 m = util.re.compile(regex)
38 38 try:
39 39 # slightly faster, provided by facebook's re2 bindings
40 40 return m.test_match
41 41 except AttributeError:
42 42 return m.match
43 43
44 44 def _expandsets(root, cwd, kindpats, ctx, listsubrepos, badfn):
45 45 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
46 46 matchers = []
47 47 other = []
48 48
49 49 for kind, pat, source in kindpats:
50 50 if kind == 'set':
51 51 if not ctx:
52 52 raise error.ProgrammingError("fileset expression with no "
53 53 "context")
54 54 matchers.append(ctx.matchfileset(pat, badfn=badfn))
55 55
56 56 if listsubrepos:
57 57 for subpath in ctx.substate:
58 58 sm = ctx.sub(subpath).matchfileset(pat, badfn=badfn)
59 59 pm = prefixdirmatcher(root, cwd, subpath, sm, badfn=badfn)
60 60 matchers.append(pm)
61 61
62 62 continue
63 63 other.append((kind, pat, source))
64 64 return matchers, other
65 65
66 66 def _expandsubinclude(kindpats, root):
67 67 '''Returns the list of subinclude matcher args and the kindpats without the
68 68 subincludes in it.'''
69 69 relmatchers = []
70 70 other = []
71 71
72 72 for kind, pat, source in kindpats:
73 73 if kind == 'subinclude':
74 74 sourceroot = pathutil.dirname(util.normpath(source))
75 75 pat = util.pconvert(pat)
76 76 path = pathutil.join(sourceroot, pat)
77 77
78 78 newroot = pathutil.dirname(path)
79 79 matcherargs = (newroot, '', [], ['include:%s' % path])
80 80
81 81 prefix = pathutil.canonpath(root, root, newroot)
82 82 if prefix:
83 83 prefix += '/'
84 84 relmatchers.append((prefix, matcherargs))
85 85 else:
86 86 other.append((kind, pat, source))
87 87
88 88 return relmatchers, other
89 89
90 90 def _kindpatsalwaysmatch(kindpats):
91 91 """"Checks whether the kindspats match everything, as e.g.
92 92 'relpath:.' does.
93 93 """
94 94 for kind, pat, source in kindpats:
95 95 if pat != '' or kind not in ['relpath', 'glob']:
96 96 return False
97 97 return True
98 98
99 99 def _buildkindpatsmatcher(matchercls, root, cwd, kindpats, ctx=None,
100 100 listsubrepos=False, badfn=None):
101 101 matchers = []
102 102 fms, kindpats = _expandsets(root, cwd, kindpats, ctx=ctx,
103 103 listsubrepos=listsubrepos, badfn=badfn)
104 104 if kindpats:
105 105 m = matchercls(root, cwd, kindpats, listsubrepos=listsubrepos,
106 106 badfn=badfn)
107 107 matchers.append(m)
108 108 if fms:
109 109 matchers.extend(fms)
110 110 if not matchers:
111 111 return nevermatcher(root, cwd, badfn=badfn)
112 112 if len(matchers) == 1:
113 113 return matchers[0]
114 114 return unionmatcher(matchers)
115 115
116 116 def match(root, cwd, patterns=None, include=None, exclude=None, default='glob',
117 117 exact=False, auditor=None, ctx=None, listsubrepos=False, warn=None,
118 118 badfn=None, icasefs=False):
119 119 """build an object to match a set of file patterns
120 120
121 121 arguments:
122 122 root - the canonical root of the tree you're matching against
123 123 cwd - the current working directory, if relevant
124 124 patterns - patterns to find
125 125 include - patterns to include (unless they are excluded)
126 126 exclude - patterns to exclude (even if they are included)
127 127 default - if a pattern in patterns has no explicit type, assume this one
128 128 exact - patterns are actually filenames (include/exclude still apply)
129 129 warn - optional function used for printing warnings
130 130 badfn - optional bad() callback for this matcher instead of the default
131 131 icasefs - make a matcher for wdir on case insensitive filesystems, which
132 132 normalizes the given patterns to the case in the filesystem
133 133
134 134 a pattern is one of:
135 135 'glob:<glob>' - a glob relative to cwd
136 136 're:<regexp>' - a regular expression
137 137 'path:<path>' - a path relative to repository root, which is matched
138 138 recursively
139 139 'rootfilesin:<path>' - a path relative to repository root, which is
140 140 matched non-recursively (will not match subdirectories)
141 141 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
142 142 'relpath:<path>' - a path relative to cwd
143 143 'relre:<regexp>' - a regexp that needn't match the start of a name
144 144 'set:<fileset>' - a fileset expression
145 145 'include:<path>' - a file of patterns to read and include
146 146 'subinclude:<path>' - a file of patterns to match against files under
147 147 the same directory
148 148 '<something>' - a pattern of the specified default type
149 149 """
150 150 normalize = _donormalize
151 151 if icasefs:
152 152 if exact:
153 153 raise error.ProgrammingError("a case-insensitive exact matcher "
154 154 "doesn't make sense")
155 155 dirstate = ctx.repo().dirstate
156 156 dsnormalize = dirstate.normalize
157 157
158 158 def normalize(patterns, default, root, cwd, auditor, warn):
159 159 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
160 160 kindpats = []
161 161 for kind, pats, source in kp:
162 162 if kind not in ('re', 'relre'): # regex can't be normalized
163 163 p = pats
164 164 pats = dsnormalize(pats)
165 165
166 166 # Preserve the original to handle a case only rename.
167 167 if p != pats and p in dirstate:
168 168 kindpats.append((kind, p, source))
169 169
170 170 kindpats.append((kind, pats, source))
171 171 return kindpats
172 172
173 173 if exact:
174 174 m = exactmatcher(root, cwd, patterns, badfn)
175 175 elif patterns:
176 176 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
177 177 if _kindpatsalwaysmatch(kindpats):
178 178 m = alwaysmatcher(root, cwd, badfn, relativeuipath=True)
179 179 else:
180 180 m = _buildkindpatsmatcher(patternmatcher, root, cwd, kindpats,
181 181 ctx=ctx, listsubrepos=listsubrepos,
182 182 badfn=badfn)
183 183 else:
184 184 # It's a little strange that no patterns means to match everything.
185 185 # Consider changing this to match nothing (probably using nevermatcher).
186 186 m = alwaysmatcher(root, cwd, badfn)
187 187
188 188 if include:
189 189 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
190 190 im = _buildkindpatsmatcher(includematcher, root, cwd, kindpats, ctx=ctx,
191 191 listsubrepos=listsubrepos, badfn=None)
192 192 m = intersectmatchers(m, im)
193 193 if exclude:
194 194 kindpats = normalize(exclude, 'glob', root, cwd, auditor, warn)
195 195 em = _buildkindpatsmatcher(includematcher, root, cwd, kindpats, ctx=ctx,
196 196 listsubrepos=listsubrepos, badfn=None)
197 197 m = differencematcher(m, em)
198 198 return m
199 199
200 200 def exact(root, cwd, files, badfn=None):
201 201 return exactmatcher(root, cwd, files, badfn=badfn)
202 202
203 203 def always(root, cwd):
204 204 return alwaysmatcher(root, cwd)
205 205
206 206 def never(root, cwd):
207 207 return nevermatcher(root, cwd)
208 208
209 209 def badmatch(match, badfn):
210 210 """Make a copy of the given matcher, replacing its bad method with the given
211 211 one.
212 212 """
213 213 m = copy.copy(match)
214 214 m.bad = badfn
215 215 return m
216 216
217 217 def _donormalize(patterns, default, root, cwd, auditor, warn):
218 218 '''Convert 'kind:pat' from the patterns list to tuples with kind and
219 219 normalized and rooted patterns and with listfiles expanded.'''
220 220 kindpats = []
221 221 for kind, pat in [_patsplit(p, default) for p in patterns]:
222 222 if kind in cwdrelativepatternkinds:
223 223 pat = pathutil.canonpath(root, cwd, pat, auditor)
224 224 elif kind in ('relglob', 'path', 'rootfilesin'):
225 225 pat = util.normpath(pat)
226 226 elif kind in ('listfile', 'listfile0'):
227 227 try:
228 228 files = util.readfile(pat)
229 229 if kind == 'listfile0':
230 230 files = files.split('\0')
231 231 else:
232 232 files = files.splitlines()
233 233 files = [f for f in files if f]
234 234 except EnvironmentError:
235 235 raise error.Abort(_("unable to read file list (%s)") % pat)
236 236 for k, p, source in _donormalize(files, default, root, cwd,
237 237 auditor, warn):
238 238 kindpats.append((k, p, pat))
239 239 continue
240 240 elif kind == 'include':
241 241 try:
242 242 fullpath = os.path.join(root, util.localpath(pat))
243 243 includepats = readpatternfile(fullpath, warn)
244 244 for k, p, source in _donormalize(includepats, default,
245 245 root, cwd, auditor, warn):
246 246 kindpats.append((k, p, source or pat))
247 247 except error.Abort as inst:
248 248 raise error.Abort('%s: %s' % (pat, inst[0]))
249 249 except IOError as inst:
250 250 if warn:
251 251 warn(_("skipping unreadable pattern file '%s': %s\n") %
252 252 (pat, stringutil.forcebytestr(inst.strerror)))
253 253 continue
254 254 # else: re or relre - which cannot be normalized
255 255 kindpats.append((kind, pat, ''))
256 256 return kindpats
257 257
258 258 class basematcher(object):
259 259
260 260 def __init__(self, root, cwd, badfn=None, relativeuipath=True):
261 261 self._root = root
262 262 self._cwd = cwd
263 263 if badfn is not None:
264 264 self.bad = badfn
265 265 self._relativeuipath = relativeuipath
266 266
267 267 def __call__(self, fn):
268 268 return self.matchfn(fn)
269 269 def __iter__(self):
270 270 for f in self._files:
271 271 yield f
272 272 # Callbacks related to how the matcher is used by dirstate.walk.
273 273 # Subscribers to these events must monkeypatch the matcher object.
274 274 def bad(self, f, msg):
275 275 '''Callback from dirstate.walk for each explicit file that can't be
276 276 found/accessed, with an error message.'''
277 277
278 278 # If an explicitdir is set, it will be called when an explicitly listed
279 279 # directory is visited.
280 280 explicitdir = None
281 281
282 282 # If an traversedir is set, it will be called when a directory discovered
283 283 # by recursive traversal is visited.
284 284 traversedir = None
285 285
286 286 def abs(self, f):
287 287 '''Convert a repo path back to path that is relative to the root of the
288 288 matcher.'''
289 289 return f
290 290
291 291 def rel(self, f):
292 292 '''Convert repo path back to path that is relative to cwd of matcher.'''
293 293 return util.pathto(self._root, self._cwd, f)
294 294
295 295 def uipath(self, f):
296 296 '''Convert repo path to a display path. If patterns or -I/-X were used
297 297 to create this matcher, the display path will be relative to cwd.
298 298 Otherwise it is relative to the root of the repo.'''
299 299 return (self._relativeuipath and self.rel(f)) or self.abs(f)
300 300
301 301 @propertycache
302 302 def _files(self):
303 303 return []
304 304
305 305 def files(self):
306 306 '''Explicitly listed files or patterns or roots:
307 307 if no patterns or .always(): empty list,
308 308 if exact: list exact files,
309 309 if not .anypats(): list all files and dirs,
310 310 else: optimal roots'''
311 311 return self._files
312 312
313 313 @propertycache
314 314 def _fileset(self):
315 315 return set(self._files)
316 316
317 317 def exact(self, f):
318 318 '''Returns True if f is in .files().'''
319 319 return f in self._fileset
320 320
321 321 def matchfn(self, f):
322 322 return False
323 323
324 324 def visitdir(self, dir):
325 325 '''Decides whether a directory should be visited based on whether it
326 326 has potential matches in it or one of its subdirectories. This is
327 327 based on the match's primary, included, and excluded patterns.
328 328
329 329 Returns the string 'all' if the given directory and all subdirectories
330 330 should be visited. Otherwise returns True or False indicating whether
331 331 the given directory should be visited.
332 332 '''
333 333 return True
334 334
335 335 def visitchildrenset(self, dir):
336 336 '''Decides whether a directory should be visited based on whether it
337 337 has potential matches in it or one of its subdirectories, and
338 338 potentially lists which subdirectories of that directory should be
339 339 visited. This is based on the match's primary, included, and excluded
340 340 patterns.
341 341
342 342 This function is very similar to 'visitdir', and the following mapping
343 343 can be applied:
344 344
345 345 visitdir | visitchildrenlist
346 346 ----------+-------------------
347 347 False | set()
348 348 'all' | 'all'
349 True | 'this' OR non-empty set of subdirs to visit
349 True | 'this' OR non-empty set of subdirs -or files- to visit
350 350
351 351 Example:
352 352 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
353 353 the following values (assuming the implementation of visitchildrenset
354 354 is capable of recognizing this; some implementations are not).
355 355
356 356 '.' -> {'foo', 'qux'}
357 357 'baz' -> set()
358 358 'foo' -> {'bar'}
359 359 # Ideally this would be 'all', but since the prefix nature of matchers
360 # is applied to the entire matcher, we have to downgrade to this
361 # 'this' due to the non-prefix 'rootfilesin'-kind matcher.
360 # is applied to the entire matcher, we have to downgrade this to
361 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
362 # in.
362 363 'foo/bar' -> 'this'
363 364 'qux' -> 'this'
365
366 Important:
367 Most matchers do not know if they're representing files or
368 directories. They see ['path:dir/f'] and don't know whether 'f' is a
369 file or a directory, so visitchildrenset('dir') for most matchers will
370 return {'f'}, but if the matcher knows it's a file (like exactmatcher
371 does), it may return 'this'. Do not rely on the return being a set
372 indicating that there are no files in this dir to investigate (or
373 equivalently that if there are files to investigate in 'dir' that it
374 will always return 'this').
364 375 '''
365 376 return 'this'
366 377
367 378 def always(self):
368 379 '''Matcher will match everything and .files() will be empty --
369 380 optimization might be possible.'''
370 381 return False
371 382
372 383 def isexact(self):
373 384 '''Matcher will match exactly the list of files in .files() --
374 385 optimization might be possible.'''
375 386 return False
376 387
377 388 def prefix(self):
378 389 '''Matcher will match the paths in .files() recursively --
379 390 optimization might be possible.'''
380 391 return False
381 392
382 393 def anypats(self):
383 394 '''None of .always(), .isexact(), and .prefix() is true --
384 395 optimizations will be difficult.'''
385 396 return not self.always() and not self.isexact() and not self.prefix()
386 397
387 398 class alwaysmatcher(basematcher):
388 399 '''Matches everything.'''
389 400
390 401 def __init__(self, root, cwd, badfn=None, relativeuipath=False):
391 402 super(alwaysmatcher, self).__init__(root, cwd, badfn,
392 403 relativeuipath=relativeuipath)
393 404
394 405 def always(self):
395 406 return True
396 407
397 408 def matchfn(self, f):
398 409 return True
399 410
400 411 def visitdir(self, dir):
401 412 return 'all'
402 413
403 414 def visitchildrenset(self, dir):
404 415 return 'all'
405 416
406 417 def __repr__(self):
407 418 return r'<alwaysmatcher>'
408 419
409 420 class nevermatcher(basematcher):
410 421 '''Matches nothing.'''
411 422
412 423 def __init__(self, root, cwd, badfn=None):
413 424 super(nevermatcher, self).__init__(root, cwd, badfn)
414 425
415 426 # It's a little weird to say that the nevermatcher is an exact matcher
416 427 # or a prefix matcher, but it seems to make sense to let callers take
417 428 # fast paths based on either. There will be no exact matches, nor any
418 429 # prefixes (files() returns []), so fast paths iterating over them should
419 430 # be efficient (and correct).
420 431 def isexact(self):
421 432 return True
422 433
423 434 def prefix(self):
424 435 return True
425 436
426 437 def visitdir(self, dir):
427 438 return False
428 439
429 440 def visitchildrenset(self, dir):
430 441 return set()
431 442
432 443 def __repr__(self):
433 444 return r'<nevermatcher>'
434 445
435 446 class predicatematcher(basematcher):
436 447 """A matcher adapter for a simple boolean function"""
437 448
438 449 def __init__(self, root, cwd, predfn, predrepr=None, badfn=None):
439 450 super(predicatematcher, self).__init__(root, cwd, badfn)
440 451 self.matchfn = predfn
441 452 self._predrepr = predrepr
442 453
443 454 @encoding.strmethod
444 455 def __repr__(self):
445 456 s = (stringutil.buildrepr(self._predrepr)
446 457 or pycompat.byterepr(self.matchfn))
447 458 return '<predicatenmatcher pred=%s>' % s
448 459
449 460 class patternmatcher(basematcher):
450 461
451 462 def __init__(self, root, cwd, kindpats, listsubrepos=False, badfn=None):
452 463 super(patternmatcher, self).__init__(root, cwd, badfn)
453 464
454 465 self._files = _explicitfiles(kindpats)
455 466 self._prefix = _prefix(kindpats)
456 467 self._pats, self.matchfn = _buildmatch(kindpats, '$', listsubrepos,
457 468 root)
458 469
459 470 @propertycache
460 471 def _dirs(self):
461 472 return set(util.dirs(self._fileset)) | {'.'}
462 473
463 474 def visitdir(self, dir):
464 475 if self._prefix and dir in self._fileset:
465 476 return 'all'
466 477 return ('.' in self._fileset or
467 478 dir in self._fileset or
468 479 dir in self._dirs or
469 480 any(parentdir in self._fileset
470 481 for parentdir in util.finddirs(dir)))
471 482
472 483 def visitchildrenset(self, dir):
473 484 ret = self.visitdir(dir)
474 485 if ret is True:
475 486 return 'this'
476 487 elif not ret:
477 488 return set()
478 489 assert ret == 'all'
479 490 return 'all'
480 491
481 492 def prefix(self):
482 493 return self._prefix
483 494
484 495 @encoding.strmethod
485 496 def __repr__(self):
486 497 return ('<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats))
487 498
488 499 class includematcher(basematcher):
489 500
490 501 def __init__(self, root, cwd, kindpats, listsubrepos=False, badfn=None):
491 502 super(includematcher, self).__init__(root, cwd, badfn)
492 503
493 504 self._pats, self.matchfn = _buildmatch(kindpats, '(?:/|$)',
494 505 listsubrepos, root)
495 506 self._prefix = _prefix(kindpats)
496 507 roots, dirs, parents = _rootsdirsandparents(kindpats)
497 508 # roots are directories which are recursively included.
498 509 self._roots = set(roots)
499 510 # dirs are directories which are non-recursively included.
500 511 self._dirs = set(dirs)
501 512 # parents are directories which are non-recursively included because
502 513 # they are needed to get to items in _dirs or _roots.
503 514 self._parents = set(parents)
504 515
505 516 def visitdir(self, dir):
506 517 if self._prefix and dir in self._roots:
507 518 return 'all'
508 519 return ('.' in self._roots or
509 520 dir in self._roots or
510 521 dir in self._dirs or
511 522 dir in self._parents or
512 523 any(parentdir in self._roots
513 524 for parentdir in util.finddirs(dir)))
514 525
515 526 def visitchildrenset(self, dir):
516 527 if self._prefix and dir in self._roots:
517 528 return 'all'
518 529 # Note: this does *not* include the 'dir in self._parents' case from
519 530 # visitdir, that's handled below.
520 531 if ('.' in self._roots or
521 532 dir in self._roots or
522 533 dir in self._dirs or
523 534 any(parentdir in self._roots
524 535 for parentdir in util.finddirs(dir))):
525 536 return 'this'
526 537
527 538 ret = set()
528 539 if dir in self._parents:
529 540 # We add a '/' on to `dir` so that we don't return items that are
530 541 # prefixed by `dir` but are actually siblings of `dir`.
531 542 suffixeddir = dir + '/' if dir != '.' else ''
532 543 # Look in all _roots, _dirs, and _parents for things that start with
533 544 # 'suffixeddir'.
534 545 for d in [q for q in
535 546 itertools.chain(self._roots, self._dirs, self._parents) if
536 547 q.startswith(suffixeddir)]:
537 548 # Don't emit '.' in the response for the root directory
538 549 if not suffixeddir and d == '.':
539 550 continue
540 551
541 552 # We return the item name without the `suffixeddir` prefix or a
542 553 # slash suffix
543 554 d = d[len(suffixeddir):]
544 555 if '/' in d:
545 556 # This is a subdirectory-of-a-subdirectory, i.e.
546 557 # suffixeddir='foo/', d was 'foo/bar/baz' before removing
547 558 # 'foo/'.
548 559 d = d[:d.index('/')]
549 560 ret.add(d)
550 561 return ret
551 562
552 563 @encoding.strmethod
553 564 def __repr__(self):
554 565 return ('<includematcher includes=%r>' % pycompat.bytestr(self._pats))
555 566
556 567 class exactmatcher(basematcher):
557 568 '''Matches the input files exactly. They are interpreted as paths, not
558 569 patterns (so no kind-prefixes).
559 570 '''
560 571
561 572 def __init__(self, root, cwd, files, badfn=None):
562 573 super(exactmatcher, self).__init__(root, cwd, badfn)
563 574
564 575 if isinstance(files, list):
565 576 self._files = files
566 577 else:
567 578 self._files = list(files)
568 579
569 580 matchfn = basematcher.exact
570 581
571 582 @propertycache
572 583 def _dirs(self):
573 584 return set(util.dirs(self._fileset)) | {'.'}
574 585
575 586 def visitdir(self, dir):
576 587 return dir in self._dirs
577 588
578 589 def visitchildrenset(self, dir):
579 590 if dir in self._dirs:
580 591 candidates = self._dirs - {'.'}
581 592 if dir != '.':
582 593 d = dir + '/'
583 594 candidates = set(c[len(d):] for c in candidates if
584 595 c.startswith(d))
585 596 # self._dirs includes all of the directories, recursively, so if
586 597 # we're attempting to match foo/bar/baz.txt, it'll have '.', 'foo',
587 598 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
588 599 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
589 600 # immediate subdir will be in there without a slash.
590 601 ret = set(c for c in candidates if '/' not in c)
591 602 # We need to emit 'this' for foo/bar, not set(), not {'baz.txt'}.
592 603 if not ret:
593 604 return 'this'
594 605 return ret
595 606 return set()
596 607
597 608 def isexact(self):
598 609 return True
599 610
600 611 @encoding.strmethod
601 612 def __repr__(self):
602 613 return ('<exactmatcher files=%r>' % self._files)
603 614
604 615 class differencematcher(basematcher):
605 616 '''Composes two matchers by matching if the first matches and the second
606 617 does not.
607 618
608 619 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
609 620 traversedir) are ignored.
610 621 '''
611 622 def __init__(self, m1, m2):
612 623 super(differencematcher, self).__init__(m1._root, m1._cwd)
613 624 self._m1 = m1
614 625 self._m2 = m2
615 626 self.bad = m1.bad
616 627 self.explicitdir = m1.explicitdir
617 628 self.traversedir = m1.traversedir
618 629
619 630 def matchfn(self, f):
620 631 return self._m1(f) and not self._m2(f)
621 632
622 633 @propertycache
623 634 def _files(self):
624 635 if self.isexact():
625 636 return [f for f in self._m1.files() if self(f)]
626 637 # If m1 is not an exact matcher, we can't easily figure out the set of
627 638 # files, because its files() are not always files. For example, if
628 639 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
629 640 # want to remove "dir" from the set even though it would match m2,
630 641 # because the "dir" in m1 may not be a file.
631 642 return self._m1.files()
632 643
633 644 def visitdir(self, dir):
634 645 if self._m2.visitdir(dir) == 'all':
635 646 return False
636 647 return bool(self._m1.visitdir(dir))
637 648
638 649 def visitchildrenset(self, dir):
639 650 m2_set = self._m2.visitchildrenset(dir)
640 651 if m2_set == 'all':
641 652 return set()
642 653 m1_set = self._m1.visitchildrenset(dir)
643 654 # Possible values for m1: 'all', 'this', set(...), set()
644 655 # Possible values for m2: 'this', set(...), set()
645 656 # If m2 has nothing under here that we care about, return m1, even if
646 657 # it's 'all'. This is a change in behavior from visitdir, which would
647 658 # return True, not 'all', for some reason.
648 659 if not m2_set:
649 660 return m1_set
650 661 if m1_set in ['all', 'this']:
651 662 # Never return 'all' here if m2_set is any kind of non-empty (either
652 663 # 'this' or set(foo)), since m2 might return set() for a
653 664 # subdirectory.
654 665 return 'this'
655 666 # Possible values for m1: set(...), set()
656 667 # Possible values for m2: 'this', set(...)
657 668 # We ignore m2's set results. They're possibly incorrect:
658 669 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset('.'):
659 670 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
660 671 # return set(), which is *not* correct, we still need to visit 'dir'!
661 672 return m1_set
662 673
663 674 def isexact(self):
664 675 return self._m1.isexact()
665 676
666 677 @encoding.strmethod
667 678 def __repr__(self):
668 679 return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2))
669 680
670 681 def intersectmatchers(m1, m2):
671 682 '''Composes two matchers by matching if both of them match.
672 683
673 684 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
674 685 traversedir) are ignored.
675 686 '''
676 687 if m1 is None or m2 is None:
677 688 return m1 or m2
678 689 if m1.always():
679 690 m = copy.copy(m2)
680 691 # TODO: Consider encapsulating these things in a class so there's only
681 692 # one thing to copy from m1.
682 693 m.bad = m1.bad
683 694 m.explicitdir = m1.explicitdir
684 695 m.traversedir = m1.traversedir
685 696 m.abs = m1.abs
686 697 m.rel = m1.rel
687 698 m._relativeuipath |= m1._relativeuipath
688 699 return m
689 700 if m2.always():
690 701 m = copy.copy(m1)
691 702 m._relativeuipath |= m2._relativeuipath
692 703 return m
693 704 return intersectionmatcher(m1, m2)
694 705
695 706 class intersectionmatcher(basematcher):
696 707 def __init__(self, m1, m2):
697 708 super(intersectionmatcher, self).__init__(m1._root, m1._cwd)
698 709 self._m1 = m1
699 710 self._m2 = m2
700 711 self.bad = m1.bad
701 712 self.explicitdir = m1.explicitdir
702 713 self.traversedir = m1.traversedir
703 714
704 715 @propertycache
705 716 def _files(self):
706 717 if self.isexact():
707 718 m1, m2 = self._m1, self._m2
708 719 if not m1.isexact():
709 720 m1, m2 = m2, m1
710 721 return [f for f in m1.files() if m2(f)]
711 722 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
712 723 # the set of files, because their files() are not always files. For
713 724 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
714 725 # "path:dir2", we don't want to remove "dir2" from the set.
715 726 return self._m1.files() + self._m2.files()
716 727
717 728 def matchfn(self, f):
718 729 return self._m1(f) and self._m2(f)
719 730
720 731 def visitdir(self, dir):
721 732 visit1 = self._m1.visitdir(dir)
722 733 if visit1 == 'all':
723 734 return self._m2.visitdir(dir)
724 735 # bool() because visit1=True + visit2='all' should not be 'all'
725 736 return bool(visit1 and self._m2.visitdir(dir))
726 737
727 738 def visitchildrenset(self, dir):
728 739 m1_set = self._m1.visitchildrenset(dir)
729 740 if not m1_set:
730 741 return set()
731 742 m2_set = self._m2.visitchildrenset(dir)
732 743 if not m2_set:
733 744 return set()
734 745
735 746 if m1_set == 'all':
736 747 return m2_set
737 748 elif m2_set == 'all':
738 749 return m1_set
739 750
740 751 if m1_set == 'this' or m2_set == 'this':
741 752 return 'this'
742 753
743 754 assert isinstance(m1_set, set) and isinstance(m2_set, set)
744 755 return m1_set.intersection(m2_set)
745 756
746 757 def always(self):
747 758 return self._m1.always() and self._m2.always()
748 759
749 760 def isexact(self):
750 761 return self._m1.isexact() or self._m2.isexact()
751 762
752 763 @encoding.strmethod
753 764 def __repr__(self):
754 765 return ('<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2))
755 766
756 767 class subdirmatcher(basematcher):
757 768 """Adapt a matcher to work on a subdirectory only.
758 769
759 770 The paths are remapped to remove/insert the path as needed:
760 771
761 772 >>> from . import pycompat
762 773 >>> m1 = match(b'root', b'', [b'a.txt', b'sub/b.txt'])
763 774 >>> m2 = subdirmatcher(b'sub', m1)
764 775 >>> bool(m2(b'a.txt'))
765 776 False
766 777 >>> bool(m2(b'b.txt'))
767 778 True
768 779 >>> bool(m2.matchfn(b'a.txt'))
769 780 False
770 781 >>> bool(m2.matchfn(b'b.txt'))
771 782 True
772 783 >>> m2.files()
773 784 ['b.txt']
774 785 >>> m2.exact(b'b.txt')
775 786 True
776 787 >>> util.pconvert(m2.rel(b'b.txt'))
777 788 'sub/b.txt'
778 789 >>> def bad(f, msg):
779 790 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
780 791 >>> m1.bad = bad
781 792 >>> m2.bad(b'x.txt', b'No such file')
782 793 sub/x.txt: No such file
783 794 >>> m2.abs(b'c.txt')
784 795 'sub/c.txt'
785 796 """
786 797
787 798 def __init__(self, path, matcher):
788 799 super(subdirmatcher, self).__init__(matcher._root, matcher._cwd)
789 800 self._path = path
790 801 self._matcher = matcher
791 802 self._always = matcher.always()
792 803
793 804 self._files = [f[len(path) + 1:] for f in matcher._files
794 805 if f.startswith(path + "/")]
795 806
796 807 # If the parent repo had a path to this subrepo and the matcher is
797 808 # a prefix matcher, this submatcher always matches.
798 809 if matcher.prefix():
799 810 self._always = any(f == path for f in matcher._files)
800 811
801 812 def bad(self, f, msg):
802 813 self._matcher.bad(self._path + "/" + f, msg)
803 814
804 815 def abs(self, f):
805 816 return self._matcher.abs(self._path + "/" + f)
806 817
807 818 def rel(self, f):
808 819 return self._matcher.rel(self._path + "/" + f)
809 820
810 821 def uipath(self, f):
811 822 return self._matcher.uipath(self._path + "/" + f)
812 823
813 824 def matchfn(self, f):
814 825 # Some information is lost in the superclass's constructor, so we
815 826 # can not accurately create the matching function for the subdirectory
816 827 # from the inputs. Instead, we override matchfn() and visitdir() to
817 828 # call the original matcher with the subdirectory path prepended.
818 829 return self._matcher.matchfn(self._path + "/" + f)
819 830
820 831 def visitdir(self, dir):
821 832 if dir == '.':
822 833 dir = self._path
823 834 else:
824 835 dir = self._path + "/" + dir
825 836 return self._matcher.visitdir(dir)
826 837
827 838 def visitchildrenset(self, dir):
828 839 if dir == '.':
829 840 dir = self._path
830 841 else:
831 842 dir = self._path + "/" + dir
832 843 return self._matcher.visitchildrenset(dir)
833 844
834 845 def always(self):
835 846 return self._always
836 847
837 848 def prefix(self):
838 849 return self._matcher.prefix() and not self._always
839 850
840 851 @encoding.strmethod
841 852 def __repr__(self):
842 853 return ('<subdirmatcher path=%r, matcher=%r>' %
843 854 (self._path, self._matcher))
844 855
845 856 class prefixdirmatcher(basematcher):
846 857 """Adapt a matcher to work on a parent directory.
847 858
848 859 The matcher's non-matching-attributes (root, cwd, bad, explicitdir,
849 860 traversedir) are ignored.
850 861
851 862 The prefix path should usually be the relative path from the root of
852 863 this matcher to the root of the wrapped matcher.
853 864
854 865 >>> m1 = match(util.localpath(b'root/d/e'), b'f', [b'../a.txt', b'b.txt'])
855 866 >>> m2 = prefixdirmatcher(b'root', b'd/e/f', b'd/e', m1)
856 867 >>> bool(m2(b'a.txt'),)
857 868 False
858 869 >>> bool(m2(b'd/e/a.txt'))
859 870 True
860 871 >>> bool(m2(b'd/e/b.txt'))
861 872 False
862 873 >>> m2.files()
863 874 ['d/e/a.txt', 'd/e/f/b.txt']
864 875 >>> m2.exact(b'd/e/a.txt')
865 876 True
866 877 >>> m2.visitdir(b'd')
867 878 True
868 879 >>> m2.visitdir(b'd/e')
869 880 True
870 881 >>> m2.visitdir(b'd/e/f')
871 882 True
872 883 >>> m2.visitdir(b'd/e/g')
873 884 False
874 885 >>> m2.visitdir(b'd/ef')
875 886 False
876 887 """
877 888
878 889 def __init__(self, root, cwd, path, matcher, badfn=None):
879 890 super(prefixdirmatcher, self).__init__(root, cwd, badfn)
880 891 if not path:
881 892 raise error.ProgrammingError('prefix path must not be empty')
882 893 self._path = path
883 894 self._pathprefix = path + '/'
884 895 self._matcher = matcher
885 896
886 897 @propertycache
887 898 def _files(self):
888 899 return [self._pathprefix + f for f in self._matcher._files]
889 900
890 901 def matchfn(self, f):
891 902 if not f.startswith(self._pathprefix):
892 903 return False
893 904 return self._matcher.matchfn(f[len(self._pathprefix):])
894 905
895 906 @propertycache
896 907 def _pathdirs(self):
897 908 return set(util.finddirs(self._path)) | {'.'}
898 909
899 910 def visitdir(self, dir):
900 911 if dir == self._path:
901 912 return self._matcher.visitdir('.')
902 913 if dir.startswith(self._pathprefix):
903 914 return self._matcher.visitdir(dir[len(self._pathprefix):])
904 915 return dir in self._pathdirs
905 916
906 917 def visitchildrenset(self, dir):
907 918 if dir == self._path:
908 919 return self._matcher.visitchildrenset('.')
909 920 if dir.startswith(self._pathprefix):
910 921 return self._matcher.visitchildrenset(dir[len(self._pathprefix):])
911 922 if dir in self._pathdirs:
912 923 return 'this'
913 924 return set()
914 925
915 926 def isexact(self):
916 927 return self._matcher.isexact()
917 928
918 929 def prefix(self):
919 930 return self._matcher.prefix()
920 931
921 932 @encoding.strmethod
922 933 def __repr__(self):
923 934 return ('<prefixdirmatcher path=%r, matcher=%r>'
924 935 % (pycompat.bytestr(self._path), self._matcher))
925 936
926 937 class unionmatcher(basematcher):
927 938 """A matcher that is the union of several matchers.
928 939
929 940 The non-matching-attributes (root, cwd, bad, explicitdir, traversedir) are
930 941 taken from the first matcher.
931 942 """
932 943
933 944 def __init__(self, matchers):
934 945 m1 = matchers[0]
935 946 super(unionmatcher, self).__init__(m1._root, m1._cwd)
936 947 self.explicitdir = m1.explicitdir
937 948 self.traversedir = m1.traversedir
938 949 self._matchers = matchers
939 950
940 951 def matchfn(self, f):
941 952 for match in self._matchers:
942 953 if match(f):
943 954 return True
944 955 return False
945 956
946 957 def visitdir(self, dir):
947 958 r = False
948 959 for m in self._matchers:
949 960 v = m.visitdir(dir)
950 961 if v == 'all':
951 962 return v
952 963 r |= v
953 964 return r
954 965
955 966 def visitchildrenset(self, dir):
956 967 r = set()
957 968 this = False
958 969 for m in self._matchers:
959 970 v = m.visitchildrenset(dir)
960 971 if not v:
961 972 continue
962 973 if v == 'all':
963 974 return v
964 975 if this or v == 'this':
965 976 this = True
966 977 # don't break, we might have an 'all' in here.
967 978 continue
968 979 assert isinstance(v, set)
969 980 r = r.union(v)
970 981 if this:
971 982 return 'this'
972 983 return r
973 984
974 985 @encoding.strmethod
975 986 def __repr__(self):
976 987 return ('<unionmatcher matchers=%r>' % self._matchers)
977 988
978 989 def patkind(pattern, default=None):
979 990 '''If pattern is 'kind:pat' with a known kind, return kind.'''
980 991 return _patsplit(pattern, default)[0]
981 992
982 993 def _patsplit(pattern, default):
983 994 """Split a string into the optional pattern kind prefix and the actual
984 995 pattern."""
985 996 if ':' in pattern:
986 997 kind, pat = pattern.split(':', 1)
987 998 if kind in allpatternkinds:
988 999 return kind, pat
989 1000 return default, pattern
990 1001
991 1002 def _globre(pat):
992 1003 r'''Convert an extended glob string to a regexp string.
993 1004
994 1005 >>> from . import pycompat
995 1006 >>> def bprint(s):
996 1007 ... print(pycompat.sysstr(s))
997 1008 >>> bprint(_globre(br'?'))
998 1009 .
999 1010 >>> bprint(_globre(br'*'))
1000 1011 [^/]*
1001 1012 >>> bprint(_globre(br'**'))
1002 1013 .*
1003 1014 >>> bprint(_globre(br'**/a'))
1004 1015 (?:.*/)?a
1005 1016 >>> bprint(_globre(br'a/**/b'))
1006 1017 a/(?:.*/)?b
1007 1018 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1008 1019 [a*?!^][\^b][^c]
1009 1020 >>> bprint(_globre(br'{a,b}'))
1010 1021 (?:a|b)
1011 1022 >>> bprint(_globre(br'.\*\?'))
1012 1023 \.\*\?
1013 1024 '''
1014 1025 i, n = 0, len(pat)
1015 1026 res = ''
1016 1027 group = 0
1017 1028 escape = util.stringutil.reescape
1018 1029 def peek():
1019 1030 return i < n and pat[i:i + 1]
1020 1031 while i < n:
1021 1032 c = pat[i:i + 1]
1022 1033 i += 1
1023 1034 if c not in '*?[{},\\':
1024 1035 res += escape(c)
1025 1036 elif c == '*':
1026 1037 if peek() == '*':
1027 1038 i += 1
1028 1039 if peek() == '/':
1029 1040 i += 1
1030 1041 res += '(?:.*/)?'
1031 1042 else:
1032 1043 res += '.*'
1033 1044 else:
1034 1045 res += '[^/]*'
1035 1046 elif c == '?':
1036 1047 res += '.'
1037 1048 elif c == '[':
1038 1049 j = i
1039 1050 if j < n and pat[j:j + 1] in '!]':
1040 1051 j += 1
1041 1052 while j < n and pat[j:j + 1] != ']':
1042 1053 j += 1
1043 1054 if j >= n:
1044 1055 res += '\\['
1045 1056 else:
1046 1057 stuff = pat[i:j].replace('\\','\\\\')
1047 1058 i = j + 1
1048 1059 if stuff[0:1] == '!':
1049 1060 stuff = '^' + stuff[1:]
1050 1061 elif stuff[0:1] == '^':
1051 1062 stuff = '\\' + stuff
1052 1063 res = '%s[%s]' % (res, stuff)
1053 1064 elif c == '{':
1054 1065 group += 1
1055 1066 res += '(?:'
1056 1067 elif c == '}' and group:
1057 1068 res += ')'
1058 1069 group -= 1
1059 1070 elif c == ',' and group:
1060 1071 res += '|'
1061 1072 elif c == '\\':
1062 1073 p = peek()
1063 1074 if p:
1064 1075 i += 1
1065 1076 res += escape(p)
1066 1077 else:
1067 1078 res += escape(c)
1068 1079 else:
1069 1080 res += escape(c)
1070 1081 return res
1071 1082
1072 1083 def _regex(kind, pat, globsuffix):
1073 1084 '''Convert a (normalized) pattern of any kind into a regular expression.
1074 1085 globsuffix is appended to the regexp of globs.'''
1075 1086 if not pat:
1076 1087 return ''
1077 1088 if kind == 're':
1078 1089 return pat
1079 1090 if kind in ('path', 'relpath'):
1080 1091 if pat == '.':
1081 1092 return ''
1082 1093 return util.stringutil.reescape(pat) + '(?:/|$)'
1083 1094 if kind == 'rootfilesin':
1084 1095 if pat == '.':
1085 1096 escaped = ''
1086 1097 else:
1087 1098 # Pattern is a directory name.
1088 1099 escaped = util.stringutil.reescape(pat) + '/'
1089 1100 # Anything after the pattern must be a non-directory.
1090 1101 return escaped + '[^/]+$'
1091 1102 if kind == 'relglob':
1092 1103 return '(?:|.*/)' + _globre(pat) + globsuffix
1093 1104 if kind == 'relre':
1094 1105 if pat.startswith('^'):
1095 1106 return pat
1096 1107 return '.*' + pat
1097 1108 if kind == 'glob':
1098 1109 return _globre(pat) + globsuffix
1099 1110 raise error.ProgrammingError('not a regex pattern: %s:%s' % (kind, pat))
1100 1111
1101 1112 def _buildmatch(kindpats, globsuffix, listsubrepos, root):
1102 1113 '''Return regexp string and a matcher function for kindpats.
1103 1114 globsuffix is appended to the regexp of globs.'''
1104 1115 matchfuncs = []
1105 1116
1106 1117 subincludes, kindpats = _expandsubinclude(kindpats, root)
1107 1118 if subincludes:
1108 1119 submatchers = {}
1109 1120 def matchsubinclude(f):
1110 1121 for prefix, matcherargs in subincludes:
1111 1122 if f.startswith(prefix):
1112 1123 mf = submatchers.get(prefix)
1113 1124 if mf is None:
1114 1125 mf = match(*matcherargs)
1115 1126 submatchers[prefix] = mf
1116 1127
1117 1128 if mf(f[len(prefix):]):
1118 1129 return True
1119 1130 return False
1120 1131 matchfuncs.append(matchsubinclude)
1121 1132
1122 1133 regex = ''
1123 1134 if kindpats:
1124 1135 regex, mf = _buildregexmatch(kindpats, globsuffix)
1125 1136 matchfuncs.append(mf)
1126 1137
1127 1138 if len(matchfuncs) == 1:
1128 1139 return regex, matchfuncs[0]
1129 1140 else:
1130 1141 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1131 1142
1132 1143 def _buildregexmatch(kindpats, globsuffix):
1133 1144 """Build a match function from a list of kinds and kindpats,
1134 1145 return regexp string and a matcher function."""
1135 1146 try:
1136 1147 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
1137 1148 for (k, p, s) in kindpats])
1138 1149 if len(regex) > 20000:
1139 1150 raise OverflowError
1140 1151 return regex, _rematcher(regex)
1141 1152 except OverflowError:
1142 1153 # We're using a Python with a tiny regex engine and we
1143 1154 # made it explode, so we'll divide the pattern list in two
1144 1155 # until it works
1145 1156 l = len(kindpats)
1146 1157 if l < 2:
1147 1158 raise
1148 1159 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
1149 1160 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
1150 1161 return regex, lambda s: a(s) or b(s)
1151 1162 except re.error:
1152 1163 for k, p, s in kindpats:
1153 1164 try:
1154 1165 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
1155 1166 except re.error:
1156 1167 if s:
1157 1168 raise error.Abort(_("%s: invalid pattern (%s): %s") %
1158 1169 (s, k, p))
1159 1170 else:
1160 1171 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
1161 1172 raise error.Abort(_("invalid pattern"))
1162 1173
1163 1174 def _patternrootsanddirs(kindpats):
1164 1175 '''Returns roots and directories corresponding to each pattern.
1165 1176
1166 1177 This calculates the roots and directories exactly matching the patterns and
1167 1178 returns a tuple of (roots, dirs) for each. It does not return other
1168 1179 directories which may also need to be considered, like the parent
1169 1180 directories.
1170 1181 '''
1171 1182 r = []
1172 1183 d = []
1173 1184 for kind, pat, source in kindpats:
1174 1185 if kind == 'glob': # find the non-glob prefix
1175 1186 root = []
1176 1187 for p in pat.split('/'):
1177 1188 if '[' in p or '{' in p or '*' in p or '?' in p:
1178 1189 break
1179 1190 root.append(p)
1180 1191 r.append('/'.join(root) or '.')
1181 1192 elif kind in ('relpath', 'path'):
1182 1193 r.append(pat or '.')
1183 1194 elif kind in ('rootfilesin',):
1184 1195 d.append(pat or '.')
1185 1196 else: # relglob, re, relre
1186 1197 r.append('.')
1187 1198 return r, d
1188 1199
1189 1200 def _roots(kindpats):
1190 1201 '''Returns root directories to match recursively from the given patterns.'''
1191 1202 roots, dirs = _patternrootsanddirs(kindpats)
1192 1203 return roots
1193 1204
1194 1205 def _rootsdirsandparents(kindpats):
1195 1206 '''Returns roots and exact directories from patterns.
1196 1207
1197 1208 `roots` are directories to match recursively, `dirs` should
1198 1209 be matched non-recursively, and `parents` are the implicitly required
1199 1210 directories to walk to items in either roots or dirs.
1200 1211
1201 1212 Returns a tuple of (roots, dirs, parents).
1202 1213
1203 1214 >>> _rootsdirsandparents(
1204 1215 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1205 1216 ... (b'glob', b'g*', b'')])
1206 1217 (['g/h', 'g/h', '.'], [], ['g', '.'])
1207 1218 >>> _rootsdirsandparents(
1208 1219 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1209 1220 ([], ['g/h', '.'], ['g', '.'])
1210 1221 >>> _rootsdirsandparents(
1211 1222 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1212 1223 ... (b'path', b'', b'')])
1213 1224 (['r', 'p/p', '.'], [], ['p', '.'])
1214 1225 >>> _rootsdirsandparents(
1215 1226 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1216 1227 ... (b'relre', b'rr', b'')])
1217 1228 (['.', '.', '.'], [], ['.'])
1218 1229 '''
1219 1230 r, d = _patternrootsanddirs(kindpats)
1220 1231
1221 1232 p = []
1222 1233 # Append the parents as non-recursive/exact directories, since they must be
1223 1234 # scanned to get to either the roots or the other exact directories.
1224 1235 p.extend(util.dirs(d))
1225 1236 p.extend(util.dirs(r))
1226 1237 # util.dirs() does not include the root directory, so add it manually
1227 1238 p.append('.')
1228 1239
1229 1240 return r, d, p
1230 1241
1231 1242 def _explicitfiles(kindpats):
1232 1243 '''Returns the potential explicit filenames from the patterns.
1233 1244
1234 1245 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1235 1246 ['foo/bar']
1236 1247 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1237 1248 []
1238 1249 '''
1239 1250 # Keep only the pattern kinds where one can specify filenames (vs only
1240 1251 # directory names).
1241 1252 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
1242 1253 return _roots(filable)
1243 1254
1244 1255 def _prefix(kindpats):
1245 1256 '''Whether all the patterns match a prefix (i.e. recursively)'''
1246 1257 for kind, pat, source in kindpats:
1247 1258 if kind not in ('path', 'relpath'):
1248 1259 return False
1249 1260 return True
1250 1261
1251 1262 _commentre = None
1252 1263
1253 1264 def readpatternfile(filepath, warn, sourceinfo=False):
1254 1265 '''parse a pattern file, returning a list of
1255 1266 patterns. These patterns should be given to compile()
1256 1267 to be validated and converted into a match function.
1257 1268
1258 1269 trailing white space is dropped.
1259 1270 the escape character is backslash.
1260 1271 comments start with #.
1261 1272 empty lines are skipped.
1262 1273
1263 1274 lines can be of the following formats:
1264 1275
1265 1276 syntax: regexp # defaults following lines to non-rooted regexps
1266 1277 syntax: glob # defaults following lines to non-rooted globs
1267 1278 re:pattern # non-rooted regular expression
1268 1279 glob:pattern # non-rooted glob
1269 1280 pattern # pattern of the current default type
1270 1281
1271 1282 if sourceinfo is set, returns a list of tuples:
1272 1283 (pattern, lineno, originalline). This is useful to debug ignore patterns.
1273 1284 '''
1274 1285
1275 1286 syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:',
1276 1287 'include': 'include', 'subinclude': 'subinclude'}
1277 1288 syntax = 'relre:'
1278 1289 patterns = []
1279 1290
1280 1291 fp = open(filepath, 'rb')
1281 1292 for lineno, line in enumerate(util.iterfile(fp), start=1):
1282 1293 if "#" in line:
1283 1294 global _commentre
1284 1295 if not _commentre:
1285 1296 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1286 1297 # remove comments prefixed by an even number of escapes
1287 1298 m = _commentre.search(line)
1288 1299 if m:
1289 1300 line = line[:m.end(1)]
1290 1301 # fixup properly escaped comments that survived the above
1291 1302 line = line.replace("\\#", "#")
1292 1303 line = line.rstrip()
1293 1304 if not line:
1294 1305 continue
1295 1306
1296 1307 if line.startswith('syntax:'):
1297 1308 s = line[7:].strip()
1298 1309 try:
1299 1310 syntax = syntaxes[s]
1300 1311 except KeyError:
1301 1312 if warn:
1302 1313 warn(_("%s: ignoring invalid syntax '%s'\n") %
1303 1314 (filepath, s))
1304 1315 continue
1305 1316
1306 1317 linesyntax = syntax
1307 1318 for s, rels in syntaxes.iteritems():
1308 1319 if line.startswith(rels):
1309 1320 linesyntax = rels
1310 1321 line = line[len(rels):]
1311 1322 break
1312 1323 elif line.startswith(s+':'):
1313 1324 linesyntax = rels
1314 1325 line = line[len(s) + 1:]
1315 1326 break
1316 1327 if sourceinfo:
1317 1328 patterns.append((linesyntax + line, lineno, line))
1318 1329 else:
1319 1330 patterns.append(linesyntax + line)
1320 1331 fp.close()
1321 1332 return patterns
General Comments 0
You need to be logged in to leave comments. Login now