##// END OF EJS Templates
match: introduce nevermatcher for when no ignore files are present...
Siddharth Agarwal -
r32600:e6ff007e default
parent child Browse files
Show More
@@ -1,1313 +1,1313 b''
1 1 # dirstate.py - working directory tracking for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import contextlib
12 12 import errno
13 13 import os
14 14 import stat
15 15
16 16 from .i18n import _
17 17 from .node import nullid
18 18 from . import (
19 19 encoding,
20 20 error,
21 21 match as matchmod,
22 22 pathutil,
23 23 policy,
24 24 pycompat,
25 25 scmutil,
26 26 txnutil,
27 27 util,
28 28 )
29 29
30 30 parsers = policy.importmod(r'parsers')
31 31
32 32 propertycache = util.propertycache
33 33 filecache = scmutil.filecache
34 34 _rangemask = 0x7fffffff
35 35
36 36 dirstatetuple = parsers.dirstatetuple
37 37
38 38 class repocache(filecache):
39 39 """filecache for files in .hg/"""
40 40 def join(self, obj, fname):
41 41 return obj._opener.join(fname)
42 42
43 43 class rootcache(filecache):
44 44 """filecache for files in the repository root"""
45 45 def join(self, obj, fname):
46 46 return obj._join(fname)
47 47
48 48 def _getfsnow(vfs):
49 49 '''Get "now" timestamp on filesystem'''
50 50 tmpfd, tmpname = vfs.mkstemp()
51 51 try:
52 52 return os.fstat(tmpfd).st_mtime
53 53 finally:
54 54 os.close(tmpfd)
55 55 vfs.unlink(tmpname)
56 56
57 57 def nonnormalentries(dmap):
58 58 '''Compute the nonnormal dirstate entries from the dmap'''
59 59 try:
60 60 return parsers.nonnormalotherparententries(dmap)
61 61 except AttributeError:
62 62 nonnorm = set()
63 63 otherparent = set()
64 64 for fname, e in dmap.iteritems():
65 65 if e[0] != 'n' or e[3] == -1:
66 66 nonnorm.add(fname)
67 67 if e[0] == 'n' and e[2] == -2:
68 68 otherparent.add(fname)
69 69 return nonnorm, otherparent
70 70
71 71 class dirstate(object):
72 72
73 73 def __init__(self, opener, ui, root, validate):
74 74 '''Create a new dirstate object.
75 75
76 76 opener is an open()-like callable that can be used to open the
77 77 dirstate file; root is the root of the directory tracked by
78 78 the dirstate.
79 79 '''
80 80 self._opener = opener
81 81 self._validate = validate
82 82 self._root = root
83 83 # ntpath.join(root, '') of Python 2.7.9 does not add sep if root is
84 84 # UNC path pointing to root share (issue4557)
85 85 self._rootdir = pathutil.normasprefix(root)
86 86 # internal config: ui.forcecwd
87 87 forcecwd = ui.config('ui', 'forcecwd')
88 88 if forcecwd:
89 89 self._cwd = forcecwd
90 90 self._dirty = False
91 91 self._dirtypl = False
92 92 self._lastnormaltime = 0
93 93 self._ui = ui
94 94 self._filecache = {}
95 95 self._parentwriters = 0
96 96 self._filename = 'dirstate'
97 97 self._pendingfilename = '%s.pending' % self._filename
98 98 self._plchangecallbacks = {}
99 99 self._origpl = None
100 100 self._updatedfiles = set()
101 101
102 102 # for consistent view between _pl() and _read() invocations
103 103 self._pendingmode = None
104 104
105 105 @contextlib.contextmanager
106 106 def parentchange(self):
107 107 '''Context manager for handling dirstate parents.
108 108
109 109 If an exception occurs in the scope of the context manager,
110 110 the incoherent dirstate won't be written when wlock is
111 111 released.
112 112 '''
113 113 self._parentwriters += 1
114 114 yield
115 115 # Typically we want the "undo" step of a context manager in a
116 116 # finally block so it happens even when an exception
117 117 # occurs. In this case, however, we only want to decrement
118 118 # parentwriters if the code in the with statement exits
119 119 # normally, so we don't have a try/finally here on purpose.
120 120 self._parentwriters -= 1
121 121
122 122 def beginparentchange(self):
123 123 '''Marks the beginning of a set of changes that involve changing
124 124 the dirstate parents. If there is an exception during this time,
125 125 the dirstate will not be written when the wlock is released. This
126 126 prevents writing an incoherent dirstate where the parent doesn't
127 127 match the contents.
128 128 '''
129 129 self._ui.deprecwarn('beginparentchange is obsoleted by the '
130 130 'parentchange context manager.', '4.3')
131 131 self._parentwriters += 1
132 132
133 133 def endparentchange(self):
134 134 '''Marks the end of a set of changes that involve changing the
135 135 dirstate parents. Once all parent changes have been marked done,
136 136 the wlock will be free to write the dirstate on release.
137 137 '''
138 138 self._ui.deprecwarn('endparentchange is obsoleted by the '
139 139 'parentchange context manager.', '4.3')
140 140 if self._parentwriters > 0:
141 141 self._parentwriters -= 1
142 142
143 143 def pendingparentchange(self):
144 144 '''Returns true if the dirstate is in the middle of a set of changes
145 145 that modify the dirstate parent.
146 146 '''
147 147 return self._parentwriters > 0
148 148
149 149 @propertycache
150 150 def _map(self):
151 151 '''Return the dirstate contents as a map from filename to
152 152 (state, mode, size, time).'''
153 153 self._read()
154 154 return self._map
155 155
156 156 @propertycache
157 157 def _copymap(self):
158 158 self._read()
159 159 return self._copymap
160 160
161 161 @propertycache
162 162 def _nonnormalset(self):
163 163 nonnorm, otherparents = nonnormalentries(self._map)
164 164 self._otherparentset = otherparents
165 165 return nonnorm
166 166
167 167 @propertycache
168 168 def _otherparentset(self):
169 169 nonnorm, otherparents = nonnormalentries(self._map)
170 170 self._nonnormalset = nonnorm
171 171 return otherparents
172 172
173 173 @propertycache
174 174 def _filefoldmap(self):
175 175 try:
176 176 makefilefoldmap = parsers.make_file_foldmap
177 177 except AttributeError:
178 178 pass
179 179 else:
180 180 return makefilefoldmap(self._map, util.normcasespec,
181 181 util.normcasefallback)
182 182
183 183 f = {}
184 184 normcase = util.normcase
185 185 for name, s in self._map.iteritems():
186 186 if s[0] != 'r':
187 187 f[normcase(name)] = name
188 188 f['.'] = '.' # prevents useless util.fspath() invocation
189 189 return f
190 190
191 191 @propertycache
192 192 def _dirfoldmap(self):
193 193 f = {}
194 194 normcase = util.normcase
195 195 for name in self._dirs:
196 196 f[normcase(name)] = name
197 197 return f
198 198
199 199 @repocache('branch')
200 200 def _branch(self):
201 201 try:
202 202 return self._opener.read("branch").strip() or "default"
203 203 except IOError as inst:
204 204 if inst.errno != errno.ENOENT:
205 205 raise
206 206 return "default"
207 207
208 208 @propertycache
209 209 def _pl(self):
210 210 try:
211 211 fp = self._opendirstatefile()
212 212 st = fp.read(40)
213 213 fp.close()
214 214 l = len(st)
215 215 if l == 40:
216 216 return st[:20], st[20:40]
217 217 elif l > 0 and l < 40:
218 218 raise error.Abort(_('working directory state appears damaged!'))
219 219 except IOError as err:
220 220 if err.errno != errno.ENOENT:
221 221 raise
222 222 return [nullid, nullid]
223 223
224 224 @propertycache
225 225 def _dirs(self):
226 226 return util.dirs(self._map, 'r')
227 227
228 228 def dirs(self):
229 229 return self._dirs
230 230
231 231 @rootcache('.hgignore')
232 232 def _ignore(self):
233 233 files = self._ignorefiles()
234 234 if not files:
235 return util.never
235 return matchmod.never(self._root, '')
236 236
237 237 pats = ['include:%s' % f for f in files]
238 238 return matchmod.match(self._root, '', [], pats, warn=self._ui.warn)
239 239
240 240 @propertycache
241 241 def _slash(self):
242 242 return self._ui.configbool('ui', 'slash') and pycompat.ossep != '/'
243 243
244 244 @propertycache
245 245 def _checklink(self):
246 246 return util.checklink(self._root)
247 247
248 248 @propertycache
249 249 def _checkexec(self):
250 250 return util.checkexec(self._root)
251 251
252 252 @propertycache
253 253 def _checkcase(self):
254 254 return not util.fscasesensitive(self._join('.hg'))
255 255
256 256 def _join(self, f):
257 257 # much faster than os.path.join()
258 258 # it's safe because f is always a relative path
259 259 return self._rootdir + f
260 260
261 261 def flagfunc(self, buildfallback):
262 262 if self._checklink and self._checkexec:
263 263 def f(x):
264 264 try:
265 265 st = os.lstat(self._join(x))
266 266 if util.statislink(st):
267 267 return 'l'
268 268 if util.statisexec(st):
269 269 return 'x'
270 270 except OSError:
271 271 pass
272 272 return ''
273 273 return f
274 274
275 275 fallback = buildfallback()
276 276 if self._checklink:
277 277 def f(x):
278 278 if os.path.islink(self._join(x)):
279 279 return 'l'
280 280 if 'x' in fallback(x):
281 281 return 'x'
282 282 return ''
283 283 return f
284 284 if self._checkexec:
285 285 def f(x):
286 286 if 'l' in fallback(x):
287 287 return 'l'
288 288 if util.isexec(self._join(x)):
289 289 return 'x'
290 290 return ''
291 291 return f
292 292 else:
293 293 return fallback
294 294
295 295 @propertycache
296 296 def _cwd(self):
297 297 return pycompat.getcwd()
298 298
299 299 def getcwd(self):
300 300 '''Return the path from which a canonical path is calculated.
301 301
302 302 This path should be used to resolve file patterns or to convert
303 303 canonical paths back to file paths for display. It shouldn't be
304 304 used to get real file paths. Use vfs functions instead.
305 305 '''
306 306 cwd = self._cwd
307 307 if cwd == self._root:
308 308 return ''
309 309 # self._root ends with a path separator if self._root is '/' or 'C:\'
310 310 rootsep = self._root
311 311 if not util.endswithsep(rootsep):
312 312 rootsep += pycompat.ossep
313 313 if cwd.startswith(rootsep):
314 314 return cwd[len(rootsep):]
315 315 else:
316 316 # we're outside the repo. return an absolute path.
317 317 return cwd
318 318
319 319 def pathto(self, f, cwd=None):
320 320 if cwd is None:
321 321 cwd = self.getcwd()
322 322 path = util.pathto(self._root, cwd, f)
323 323 if self._slash:
324 324 return util.pconvert(path)
325 325 return path
326 326
327 327 def __getitem__(self, key):
328 328 '''Return the current state of key (a filename) in the dirstate.
329 329
330 330 States are:
331 331 n normal
332 332 m needs merging
333 333 r marked for removal
334 334 a marked for addition
335 335 ? not tracked
336 336 '''
337 337 return self._map.get(key, ("?",))[0]
338 338
339 339 def __contains__(self, key):
340 340 return key in self._map
341 341
342 342 def __iter__(self):
343 343 for x in sorted(self._map):
344 344 yield x
345 345
346 346 def items(self):
347 347 return self._map.iteritems()
348 348
349 349 iteritems = items
350 350
351 351 def parents(self):
352 352 return [self._validate(p) for p in self._pl]
353 353
354 354 def p1(self):
355 355 return self._validate(self._pl[0])
356 356
357 357 def p2(self):
358 358 return self._validate(self._pl[1])
359 359
360 360 def branch(self):
361 361 return encoding.tolocal(self._branch)
362 362
363 363 def setparents(self, p1, p2=nullid):
364 364 """Set dirstate parents to p1 and p2.
365 365
366 366 When moving from two parents to one, 'm' merged entries a
367 367 adjusted to normal and previous copy records discarded and
368 368 returned by the call.
369 369
370 370 See localrepo.setparents()
371 371 """
372 372 if self._parentwriters == 0:
373 373 raise ValueError("cannot set dirstate parent without "
374 374 "calling dirstate.beginparentchange")
375 375
376 376 self._dirty = self._dirtypl = True
377 377 oldp2 = self._pl[1]
378 378 if self._origpl is None:
379 379 self._origpl = self._pl
380 380 self._pl = p1, p2
381 381 copies = {}
382 382 if oldp2 != nullid and p2 == nullid:
383 383 candidatefiles = self._nonnormalset.union(self._otherparentset)
384 384 for f in candidatefiles:
385 385 s = self._map.get(f)
386 386 if s is None:
387 387 continue
388 388
389 389 # Discard 'm' markers when moving away from a merge state
390 390 if s[0] == 'm':
391 391 if f in self._copymap:
392 392 copies[f] = self._copymap[f]
393 393 self.normallookup(f)
394 394 # Also fix up otherparent markers
395 395 elif s[0] == 'n' and s[2] == -2:
396 396 if f in self._copymap:
397 397 copies[f] = self._copymap[f]
398 398 self.add(f)
399 399 return copies
400 400
401 401 def setbranch(self, branch):
402 402 self._branch = encoding.fromlocal(branch)
403 403 f = self._opener('branch', 'w', atomictemp=True, checkambig=True)
404 404 try:
405 405 f.write(self._branch + '\n')
406 406 f.close()
407 407
408 408 # make sure filecache has the correct stat info for _branch after
409 409 # replacing the underlying file
410 410 ce = self._filecache['_branch']
411 411 if ce:
412 412 ce.refresh()
413 413 except: # re-raises
414 414 f.discard()
415 415 raise
416 416
417 417 def _opendirstatefile(self):
418 418 fp, mode = txnutil.trypending(self._root, self._opener, self._filename)
419 419 if self._pendingmode is not None and self._pendingmode != mode:
420 420 fp.close()
421 421 raise error.Abort(_('working directory state may be '
422 422 'changed parallelly'))
423 423 self._pendingmode = mode
424 424 return fp
425 425
426 426 def _read(self):
427 427 self._map = {}
428 428 self._copymap = {}
429 429 try:
430 430 fp = self._opendirstatefile()
431 431 try:
432 432 st = fp.read()
433 433 finally:
434 434 fp.close()
435 435 except IOError as err:
436 436 if err.errno != errno.ENOENT:
437 437 raise
438 438 return
439 439 if not st:
440 440 return
441 441
442 442 if util.safehasattr(parsers, 'dict_new_presized'):
443 443 # Make an estimate of the number of files in the dirstate based on
444 444 # its size. From a linear regression on a set of real-world repos,
445 445 # all over 10,000 files, the size of a dirstate entry is 85
446 446 # bytes. The cost of resizing is significantly higher than the cost
447 447 # of filling in a larger presized dict, so subtract 20% from the
448 448 # size.
449 449 #
450 450 # This heuristic is imperfect in many ways, so in a future dirstate
451 451 # format update it makes sense to just record the number of entries
452 452 # on write.
453 453 self._map = parsers.dict_new_presized(len(st) / 71)
454 454
455 455 # Python's garbage collector triggers a GC each time a certain number
456 456 # of container objects (the number being defined by
457 457 # gc.get_threshold()) are allocated. parse_dirstate creates a tuple
458 458 # for each file in the dirstate. The C version then immediately marks
459 459 # them as not to be tracked by the collector. However, this has no
460 460 # effect on when GCs are triggered, only on what objects the GC looks
461 461 # into. This means that O(number of files) GCs are unavoidable.
462 462 # Depending on when in the process's lifetime the dirstate is parsed,
463 463 # this can get very expensive. As a workaround, disable GC while
464 464 # parsing the dirstate.
465 465 #
466 466 # (we cannot decorate the function directly since it is in a C module)
467 467 parse_dirstate = util.nogc(parsers.parse_dirstate)
468 468 p = parse_dirstate(self._map, self._copymap, st)
469 469 if not self._dirtypl:
470 470 self._pl = p
471 471
472 472 def invalidate(self):
473 473 for a in ("_map", "_copymap", "_filefoldmap", "_dirfoldmap", "_branch",
474 474 "_pl", "_dirs", "_ignore", "_nonnormalset",
475 475 "_otherparentset"):
476 476 if a in self.__dict__:
477 477 delattr(self, a)
478 478 self._lastnormaltime = 0
479 479 self._dirty = False
480 480 self._updatedfiles.clear()
481 481 self._parentwriters = 0
482 482 self._origpl = None
483 483
484 484 def copy(self, source, dest):
485 485 """Mark dest as a copy of source. Unmark dest if source is None."""
486 486 if source == dest:
487 487 return
488 488 self._dirty = True
489 489 if source is not None:
490 490 self._copymap[dest] = source
491 491 self._updatedfiles.add(source)
492 492 self._updatedfiles.add(dest)
493 493 elif dest in self._copymap:
494 494 del self._copymap[dest]
495 495 self._updatedfiles.add(dest)
496 496
497 497 def copied(self, file):
498 498 return self._copymap.get(file, None)
499 499
500 500 def copies(self):
501 501 return self._copymap
502 502
503 503 def _droppath(self, f):
504 504 if self[f] not in "?r" and "_dirs" in self.__dict__:
505 505 self._dirs.delpath(f)
506 506
507 507 if "_filefoldmap" in self.__dict__:
508 508 normed = util.normcase(f)
509 509 if normed in self._filefoldmap:
510 510 del self._filefoldmap[normed]
511 511
512 512 self._updatedfiles.add(f)
513 513
514 514 def _addpath(self, f, state, mode, size, mtime):
515 515 oldstate = self[f]
516 516 if state == 'a' or oldstate == 'r':
517 517 scmutil.checkfilename(f)
518 518 if f in self._dirs:
519 519 raise error.Abort(_('directory %r already in dirstate') % f)
520 520 # shadows
521 521 for d in util.finddirs(f):
522 522 if d in self._dirs:
523 523 break
524 524 if d in self._map and self[d] != 'r':
525 525 raise error.Abort(
526 526 _('file %r in dirstate clashes with %r') % (d, f))
527 527 if oldstate in "?r" and "_dirs" in self.__dict__:
528 528 self._dirs.addpath(f)
529 529 self._dirty = True
530 530 self._updatedfiles.add(f)
531 531 self._map[f] = dirstatetuple(state, mode, size, mtime)
532 532 if state != 'n' or mtime == -1:
533 533 self._nonnormalset.add(f)
534 534 if size == -2:
535 535 self._otherparentset.add(f)
536 536
537 537 def normal(self, f):
538 538 '''Mark a file normal and clean.'''
539 539 s = os.lstat(self._join(f))
540 540 mtime = s.st_mtime
541 541 self._addpath(f, 'n', s.st_mode,
542 542 s.st_size & _rangemask, mtime & _rangemask)
543 543 if f in self._copymap:
544 544 del self._copymap[f]
545 545 if f in self._nonnormalset:
546 546 self._nonnormalset.remove(f)
547 547 if mtime > self._lastnormaltime:
548 548 # Remember the most recent modification timeslot for status(),
549 549 # to make sure we won't miss future size-preserving file content
550 550 # modifications that happen within the same timeslot.
551 551 self._lastnormaltime = mtime
552 552
553 553 def normallookup(self, f):
554 554 '''Mark a file normal, but possibly dirty.'''
555 555 if self._pl[1] != nullid and f in self._map:
556 556 # if there is a merge going on and the file was either
557 557 # in state 'm' (-1) or coming from other parent (-2) before
558 558 # being removed, restore that state.
559 559 entry = self._map[f]
560 560 if entry[0] == 'r' and entry[2] in (-1, -2):
561 561 source = self._copymap.get(f)
562 562 if entry[2] == -1:
563 563 self.merge(f)
564 564 elif entry[2] == -2:
565 565 self.otherparent(f)
566 566 if source:
567 567 self.copy(source, f)
568 568 return
569 569 if entry[0] == 'm' or entry[0] == 'n' and entry[2] == -2:
570 570 return
571 571 self._addpath(f, 'n', 0, -1, -1)
572 572 if f in self._copymap:
573 573 del self._copymap[f]
574 574 if f in self._nonnormalset:
575 575 self._nonnormalset.remove(f)
576 576
577 577 def otherparent(self, f):
578 578 '''Mark as coming from the other parent, always dirty.'''
579 579 if self._pl[1] == nullid:
580 580 raise error.Abort(_("setting %r to other parent "
581 581 "only allowed in merges") % f)
582 582 if f in self and self[f] == 'n':
583 583 # merge-like
584 584 self._addpath(f, 'm', 0, -2, -1)
585 585 else:
586 586 # add-like
587 587 self._addpath(f, 'n', 0, -2, -1)
588 588
589 589 if f in self._copymap:
590 590 del self._copymap[f]
591 591
592 592 def add(self, f):
593 593 '''Mark a file added.'''
594 594 self._addpath(f, 'a', 0, -1, -1)
595 595 if f in self._copymap:
596 596 del self._copymap[f]
597 597
598 598 def remove(self, f):
599 599 '''Mark a file removed.'''
600 600 self._dirty = True
601 601 self._droppath(f)
602 602 size = 0
603 603 if self._pl[1] != nullid and f in self._map:
604 604 # backup the previous state
605 605 entry = self._map[f]
606 606 if entry[0] == 'm': # merge
607 607 size = -1
608 608 elif entry[0] == 'n' and entry[2] == -2: # other parent
609 609 size = -2
610 610 self._otherparentset.add(f)
611 611 self._map[f] = dirstatetuple('r', 0, size, 0)
612 612 self._nonnormalset.add(f)
613 613 if size == 0 and f in self._copymap:
614 614 del self._copymap[f]
615 615
616 616 def merge(self, f):
617 617 '''Mark a file merged.'''
618 618 if self._pl[1] == nullid:
619 619 return self.normallookup(f)
620 620 return self.otherparent(f)
621 621
622 622 def drop(self, f):
623 623 '''Drop a file from the dirstate'''
624 624 if f in self._map:
625 625 self._dirty = True
626 626 self._droppath(f)
627 627 del self._map[f]
628 628 if f in self._nonnormalset:
629 629 self._nonnormalset.remove(f)
630 630 if f in self._copymap:
631 631 del self._copymap[f]
632 632
633 633 def _discoverpath(self, path, normed, ignoremissing, exists, storemap):
634 634 if exists is None:
635 635 exists = os.path.lexists(os.path.join(self._root, path))
636 636 if not exists:
637 637 # Maybe a path component exists
638 638 if not ignoremissing and '/' in path:
639 639 d, f = path.rsplit('/', 1)
640 640 d = self._normalize(d, False, ignoremissing, None)
641 641 folded = d + "/" + f
642 642 else:
643 643 # No path components, preserve original case
644 644 folded = path
645 645 else:
646 646 # recursively normalize leading directory components
647 647 # against dirstate
648 648 if '/' in normed:
649 649 d, f = normed.rsplit('/', 1)
650 650 d = self._normalize(d, False, ignoremissing, True)
651 651 r = self._root + "/" + d
652 652 folded = d + "/" + util.fspath(f, r)
653 653 else:
654 654 folded = util.fspath(normed, self._root)
655 655 storemap[normed] = folded
656 656
657 657 return folded
658 658
659 659 def _normalizefile(self, path, isknown, ignoremissing=False, exists=None):
660 660 normed = util.normcase(path)
661 661 folded = self._filefoldmap.get(normed, None)
662 662 if folded is None:
663 663 if isknown:
664 664 folded = path
665 665 else:
666 666 folded = self._discoverpath(path, normed, ignoremissing, exists,
667 667 self._filefoldmap)
668 668 return folded
669 669
670 670 def _normalize(self, path, isknown, ignoremissing=False, exists=None):
671 671 normed = util.normcase(path)
672 672 folded = self._filefoldmap.get(normed, None)
673 673 if folded is None:
674 674 folded = self._dirfoldmap.get(normed, None)
675 675 if folded is None:
676 676 if isknown:
677 677 folded = path
678 678 else:
679 679 # store discovered result in dirfoldmap so that future
680 680 # normalizefile calls don't start matching directories
681 681 folded = self._discoverpath(path, normed, ignoremissing, exists,
682 682 self._dirfoldmap)
683 683 return folded
684 684
685 685 def normalize(self, path, isknown=False, ignoremissing=False):
686 686 '''
687 687 normalize the case of a pathname when on a casefolding filesystem
688 688
689 689 isknown specifies whether the filename came from walking the
690 690 disk, to avoid extra filesystem access.
691 691
692 692 If ignoremissing is True, missing path are returned
693 693 unchanged. Otherwise, we try harder to normalize possibly
694 694 existing path components.
695 695
696 696 The normalized case is determined based on the following precedence:
697 697
698 698 - version of name already stored in the dirstate
699 699 - version of name stored on disk
700 700 - version provided via command arguments
701 701 '''
702 702
703 703 if self._checkcase:
704 704 return self._normalize(path, isknown, ignoremissing)
705 705 return path
706 706
707 707 def clear(self):
708 708 self._map = {}
709 709 self._nonnormalset = set()
710 710 self._otherparentset = set()
711 711 if "_dirs" in self.__dict__:
712 712 delattr(self, "_dirs")
713 713 self._copymap = {}
714 714 self._pl = [nullid, nullid]
715 715 self._lastnormaltime = 0
716 716 self._updatedfiles.clear()
717 717 self._dirty = True
718 718
719 719 def rebuild(self, parent, allfiles, changedfiles=None):
720 720 if changedfiles is None:
721 721 # Rebuild entire dirstate
722 722 changedfiles = allfiles
723 723 lastnormaltime = self._lastnormaltime
724 724 self.clear()
725 725 self._lastnormaltime = lastnormaltime
726 726
727 727 if self._origpl is None:
728 728 self._origpl = self._pl
729 729 self._pl = (parent, nullid)
730 730 for f in changedfiles:
731 731 if f in allfiles:
732 732 self.normallookup(f)
733 733 else:
734 734 self.drop(f)
735 735
736 736 self._dirty = True
737 737
738 738 def write(self, tr):
739 739 if not self._dirty:
740 740 return
741 741
742 742 filename = self._filename
743 743 if tr:
744 744 # 'dirstate.write()' is not only for writing in-memory
745 745 # changes out, but also for dropping ambiguous timestamp.
746 746 # delayed writing re-raise "ambiguous timestamp issue".
747 747 # See also the wiki page below for detail:
748 748 # https://www.mercurial-scm.org/wiki/DirstateTransactionPlan
749 749
750 750 # emulate dropping timestamp in 'parsers.pack_dirstate'
751 751 now = _getfsnow(self._opener)
752 752 dmap = self._map
753 753 for f in self._updatedfiles:
754 754 e = dmap.get(f)
755 755 if e is not None and e[0] == 'n' and e[3] == now:
756 756 dmap[f] = dirstatetuple(e[0], e[1], e[2], -1)
757 757 self._nonnormalset.add(f)
758 758
759 759 # emulate that all 'dirstate.normal' results are written out
760 760 self._lastnormaltime = 0
761 761 self._updatedfiles.clear()
762 762
763 763 # delay writing in-memory changes out
764 764 tr.addfilegenerator('dirstate', (self._filename,),
765 765 self._writedirstate, location='plain')
766 766 return
767 767
768 768 st = self._opener(filename, "w", atomictemp=True, checkambig=True)
769 769 self._writedirstate(st)
770 770
771 771 def addparentchangecallback(self, category, callback):
772 772 """add a callback to be called when the wd parents are changed
773 773
774 774 Callback will be called with the following arguments:
775 775 dirstate, (oldp1, oldp2), (newp1, newp2)
776 776
777 777 Category is a unique identifier to allow overwriting an old callback
778 778 with a newer callback.
779 779 """
780 780 self._plchangecallbacks[category] = callback
781 781
782 782 def _writedirstate(self, st):
783 783 # notify callbacks about parents change
784 784 if self._origpl is not None and self._origpl != self._pl:
785 785 for c, callback in sorted(self._plchangecallbacks.iteritems()):
786 786 callback(self, self._origpl, self._pl)
787 787 self._origpl = None
788 788 # use the modification time of the newly created temporary file as the
789 789 # filesystem's notion of 'now'
790 790 now = util.fstat(st).st_mtime & _rangemask
791 791
792 792 # enough 'delaywrite' prevents 'pack_dirstate' from dropping
793 793 # timestamp of each entries in dirstate, because of 'now > mtime'
794 794 delaywrite = self._ui.configint('debug', 'dirstate.delaywrite', 0)
795 795 if delaywrite > 0:
796 796 # do we have any files to delay for?
797 797 for f, e in self._map.iteritems():
798 798 if e[0] == 'n' and e[3] == now:
799 799 import time # to avoid useless import
800 800 # rather than sleep n seconds, sleep until the next
801 801 # multiple of n seconds
802 802 clock = time.time()
803 803 start = int(clock) - (int(clock) % delaywrite)
804 804 end = start + delaywrite
805 805 time.sleep(end - clock)
806 806 now = end # trust our estimate that the end is near now
807 807 break
808 808
809 809 st.write(parsers.pack_dirstate(self._map, self._copymap, self._pl, now))
810 810 self._nonnormalset, self._otherparentset = nonnormalentries(self._map)
811 811 st.close()
812 812 self._lastnormaltime = 0
813 813 self._dirty = self._dirtypl = False
814 814
815 815 def _dirignore(self, f):
816 816 if f == '.':
817 817 return False
818 818 if self._ignore(f):
819 819 return True
820 820 for p in util.finddirs(f):
821 821 if self._ignore(p):
822 822 return True
823 823 return False
824 824
825 825 def _ignorefiles(self):
826 826 files = []
827 827 if os.path.exists(self._join('.hgignore')):
828 828 files.append(self._join('.hgignore'))
829 829 for name, path in self._ui.configitems("ui"):
830 830 if name == 'ignore' or name.startswith('ignore.'):
831 831 # we need to use os.path.join here rather than self._join
832 832 # because path is arbitrary and user-specified
833 833 files.append(os.path.join(self._rootdir, util.expandpath(path)))
834 834 return files
835 835
836 836 def _ignorefileandline(self, f):
837 837 files = collections.deque(self._ignorefiles())
838 838 visited = set()
839 839 while files:
840 840 i = files.popleft()
841 841 patterns = matchmod.readpatternfile(i, self._ui.warn,
842 842 sourceinfo=True)
843 843 for pattern, lineno, line in patterns:
844 844 kind, p = matchmod._patsplit(pattern, 'glob')
845 845 if kind == "subinclude":
846 846 if p not in visited:
847 847 files.append(p)
848 848 continue
849 849 m = matchmod.match(self._root, '', [], [pattern],
850 850 warn=self._ui.warn)
851 851 if m(f):
852 852 return (i, lineno, line)
853 853 visited.add(i)
854 854 return (None, -1, "")
855 855
856 856 def _walkexplicit(self, match, subrepos):
857 857 '''Get stat data about the files explicitly specified by match.
858 858
859 859 Return a triple (results, dirsfound, dirsnotfound).
860 860 - results is a mapping from filename to stat result. It also contains
861 861 listings mapping subrepos and .hg to None.
862 862 - dirsfound is a list of files found to be directories.
863 863 - dirsnotfound is a list of files that the dirstate thinks are
864 864 directories and that were not found.'''
865 865
866 866 def badtype(mode):
867 867 kind = _('unknown')
868 868 if stat.S_ISCHR(mode):
869 869 kind = _('character device')
870 870 elif stat.S_ISBLK(mode):
871 871 kind = _('block device')
872 872 elif stat.S_ISFIFO(mode):
873 873 kind = _('fifo')
874 874 elif stat.S_ISSOCK(mode):
875 875 kind = _('socket')
876 876 elif stat.S_ISDIR(mode):
877 877 kind = _('directory')
878 878 return _('unsupported file type (type is %s)') % kind
879 879
880 880 matchedir = match.explicitdir
881 881 badfn = match.bad
882 882 dmap = self._map
883 883 lstat = os.lstat
884 884 getkind = stat.S_IFMT
885 885 dirkind = stat.S_IFDIR
886 886 regkind = stat.S_IFREG
887 887 lnkkind = stat.S_IFLNK
888 888 join = self._join
889 889 dirsfound = []
890 890 foundadd = dirsfound.append
891 891 dirsnotfound = []
892 892 notfoundadd = dirsnotfound.append
893 893
894 894 if not match.isexact() and self._checkcase:
895 895 normalize = self._normalize
896 896 else:
897 897 normalize = None
898 898
899 899 files = sorted(match.files())
900 900 subrepos.sort()
901 901 i, j = 0, 0
902 902 while i < len(files) and j < len(subrepos):
903 903 subpath = subrepos[j] + "/"
904 904 if files[i] < subpath:
905 905 i += 1
906 906 continue
907 907 while i < len(files) and files[i].startswith(subpath):
908 908 del files[i]
909 909 j += 1
910 910
911 911 if not files or '.' in files:
912 912 files = ['.']
913 913 results = dict.fromkeys(subrepos)
914 914 results['.hg'] = None
915 915
916 916 alldirs = None
917 917 for ff in files:
918 918 # constructing the foldmap is expensive, so don't do it for the
919 919 # common case where files is ['.']
920 920 if normalize and ff != '.':
921 921 nf = normalize(ff, False, True)
922 922 else:
923 923 nf = ff
924 924 if nf in results:
925 925 continue
926 926
927 927 try:
928 928 st = lstat(join(nf))
929 929 kind = getkind(st.st_mode)
930 930 if kind == dirkind:
931 931 if nf in dmap:
932 932 # file replaced by dir on disk but still in dirstate
933 933 results[nf] = None
934 934 if matchedir:
935 935 matchedir(nf)
936 936 foundadd((nf, ff))
937 937 elif kind == regkind or kind == lnkkind:
938 938 results[nf] = st
939 939 else:
940 940 badfn(ff, badtype(kind))
941 941 if nf in dmap:
942 942 results[nf] = None
943 943 except OSError as inst: # nf not found on disk - it is dirstate only
944 944 if nf in dmap: # does it exactly match a missing file?
945 945 results[nf] = None
946 946 else: # does it match a missing directory?
947 947 if alldirs is None:
948 948 alldirs = util.dirs(dmap)
949 949 if nf in alldirs:
950 950 if matchedir:
951 951 matchedir(nf)
952 952 notfoundadd(nf)
953 953 else:
954 954 badfn(ff, inst.strerror)
955 955
956 956 # Case insensitive filesystems cannot rely on lstat() failing to detect
957 957 # a case-only rename. Prune the stat object for any file that does not
958 958 # match the case in the filesystem, if there are multiple files that
959 959 # normalize to the same path.
960 960 if match.isexact() and self._checkcase:
961 961 normed = {}
962 962
963 963 for f, st in results.iteritems():
964 964 if st is None:
965 965 continue
966 966
967 967 nc = util.normcase(f)
968 968 paths = normed.get(nc)
969 969
970 970 if paths is None:
971 971 paths = set()
972 972 normed[nc] = paths
973 973
974 974 paths.add(f)
975 975
976 976 for norm, paths in normed.iteritems():
977 977 if len(paths) > 1:
978 978 for path in paths:
979 979 folded = self._discoverpath(path, norm, True, None,
980 980 self._dirfoldmap)
981 981 if path != folded:
982 982 results[path] = None
983 983
984 984 return results, dirsfound, dirsnotfound
985 985
986 986 def walk(self, match, subrepos, unknown, ignored, full=True):
987 987 '''
988 988 Walk recursively through the directory tree, finding all files
989 989 matched by match.
990 990
991 991 If full is False, maybe skip some known-clean files.
992 992
993 993 Return a dict mapping filename to stat-like object (either
994 994 mercurial.osutil.stat instance or return value of os.stat()).
995 995
996 996 '''
997 997 # full is a flag that extensions that hook into walk can use -- this
998 998 # implementation doesn't use it at all. This satisfies the contract
999 999 # because we only guarantee a "maybe".
1000 1000
1001 1001 if ignored:
1002 1002 ignore = util.never
1003 1003 dirignore = util.never
1004 1004 elif unknown:
1005 1005 ignore = self._ignore
1006 1006 dirignore = self._dirignore
1007 1007 else:
1008 1008 # if not unknown and not ignored, drop dir recursion and step 2
1009 1009 ignore = util.always
1010 1010 dirignore = util.always
1011 1011
1012 1012 matchfn = match.matchfn
1013 1013 matchalways = match.always()
1014 1014 matchtdir = match.traversedir
1015 1015 dmap = self._map
1016 1016 listdir = util.listdir
1017 1017 lstat = os.lstat
1018 1018 dirkind = stat.S_IFDIR
1019 1019 regkind = stat.S_IFREG
1020 1020 lnkkind = stat.S_IFLNK
1021 1021 join = self._join
1022 1022
1023 1023 exact = skipstep3 = False
1024 1024 if match.isexact(): # match.exact
1025 1025 exact = True
1026 1026 dirignore = util.always # skip step 2
1027 1027 elif match.prefix(): # match.match, no patterns
1028 1028 skipstep3 = True
1029 1029
1030 1030 if not exact and self._checkcase:
1031 1031 normalize = self._normalize
1032 1032 normalizefile = self._normalizefile
1033 1033 skipstep3 = False
1034 1034 else:
1035 1035 normalize = self._normalize
1036 1036 normalizefile = None
1037 1037
1038 1038 # step 1: find all explicit files
1039 1039 results, work, dirsnotfound = self._walkexplicit(match, subrepos)
1040 1040
1041 1041 skipstep3 = skipstep3 and not (work or dirsnotfound)
1042 1042 work = [d for d in work if not dirignore(d[0])]
1043 1043
1044 1044 # step 2: visit subdirectories
1045 1045 def traverse(work, alreadynormed):
1046 1046 wadd = work.append
1047 1047 while work:
1048 1048 nd = work.pop()
1049 1049 if not match.visitdir(nd):
1050 1050 continue
1051 1051 skip = None
1052 1052 if nd == '.':
1053 1053 nd = ''
1054 1054 else:
1055 1055 skip = '.hg'
1056 1056 try:
1057 1057 entries = listdir(join(nd), stat=True, skip=skip)
1058 1058 except OSError as inst:
1059 1059 if inst.errno in (errno.EACCES, errno.ENOENT):
1060 1060 match.bad(self.pathto(nd), inst.strerror)
1061 1061 continue
1062 1062 raise
1063 1063 for f, kind, st in entries:
1064 1064 if normalizefile:
1065 1065 # even though f might be a directory, we're only
1066 1066 # interested in comparing it to files currently in the
1067 1067 # dmap -- therefore normalizefile is enough
1068 1068 nf = normalizefile(nd and (nd + "/" + f) or f, True,
1069 1069 True)
1070 1070 else:
1071 1071 nf = nd and (nd + "/" + f) or f
1072 1072 if nf not in results:
1073 1073 if kind == dirkind:
1074 1074 if not ignore(nf):
1075 1075 if matchtdir:
1076 1076 matchtdir(nf)
1077 1077 wadd(nf)
1078 1078 if nf in dmap and (matchalways or matchfn(nf)):
1079 1079 results[nf] = None
1080 1080 elif kind == regkind or kind == lnkkind:
1081 1081 if nf in dmap:
1082 1082 if matchalways or matchfn(nf):
1083 1083 results[nf] = st
1084 1084 elif ((matchalways or matchfn(nf))
1085 1085 and not ignore(nf)):
1086 1086 # unknown file -- normalize if necessary
1087 1087 if not alreadynormed:
1088 1088 nf = normalize(nf, False, True)
1089 1089 results[nf] = st
1090 1090 elif nf in dmap and (matchalways or matchfn(nf)):
1091 1091 results[nf] = None
1092 1092
1093 1093 for nd, d in work:
1094 1094 # alreadynormed means that processwork doesn't have to do any
1095 1095 # expensive directory normalization
1096 1096 alreadynormed = not normalize or nd == d
1097 1097 traverse([d], alreadynormed)
1098 1098
1099 1099 for s in subrepos:
1100 1100 del results[s]
1101 1101 del results['.hg']
1102 1102
1103 1103 # step 3: visit remaining files from dmap
1104 1104 if not skipstep3 and not exact:
1105 1105 # If a dmap file is not in results yet, it was either
1106 1106 # a) not matching matchfn b) ignored, c) missing, or d) under a
1107 1107 # symlink directory.
1108 1108 if not results and matchalways:
1109 1109 visit = [f for f in dmap]
1110 1110 else:
1111 1111 visit = [f for f in dmap if f not in results and matchfn(f)]
1112 1112 visit.sort()
1113 1113
1114 1114 if unknown:
1115 1115 # unknown == True means we walked all dirs under the roots
1116 1116 # that wasn't ignored, and everything that matched was stat'ed
1117 1117 # and is already in results.
1118 1118 # The rest must thus be ignored or under a symlink.
1119 1119 audit_path = pathutil.pathauditor(self._root)
1120 1120
1121 1121 for nf in iter(visit):
1122 1122 # If a stat for the same file was already added with a
1123 1123 # different case, don't add one for this, since that would
1124 1124 # make it appear as if the file exists under both names
1125 1125 # on disk.
1126 1126 if (normalizefile and
1127 1127 normalizefile(nf, True, True) in results):
1128 1128 results[nf] = None
1129 1129 # Report ignored items in the dmap as long as they are not
1130 1130 # under a symlink directory.
1131 1131 elif audit_path.check(nf):
1132 1132 try:
1133 1133 results[nf] = lstat(join(nf))
1134 1134 # file was just ignored, no links, and exists
1135 1135 except OSError:
1136 1136 # file doesn't exist
1137 1137 results[nf] = None
1138 1138 else:
1139 1139 # It's either missing or under a symlink directory
1140 1140 # which we in this case report as missing
1141 1141 results[nf] = None
1142 1142 else:
1143 1143 # We may not have walked the full directory tree above,
1144 1144 # so stat and check everything we missed.
1145 1145 iv = iter(visit)
1146 1146 for st in util.statfiles([join(i) for i in visit]):
1147 1147 results[next(iv)] = st
1148 1148 return results
1149 1149
1150 1150 def status(self, match, subrepos, ignored, clean, unknown):
1151 1151 '''Determine the status of the working copy relative to the
1152 1152 dirstate and return a pair of (unsure, status), where status is of type
1153 1153 scmutil.status and:
1154 1154
1155 1155 unsure:
1156 1156 files that might have been modified since the dirstate was
1157 1157 written, but need to be read to be sure (size is the same
1158 1158 but mtime differs)
1159 1159 status.modified:
1160 1160 files that have definitely been modified since the dirstate
1161 1161 was written (different size or mode)
1162 1162 status.clean:
1163 1163 files that have definitely not been modified since the
1164 1164 dirstate was written
1165 1165 '''
1166 1166 listignored, listclean, listunknown = ignored, clean, unknown
1167 1167 lookup, modified, added, unknown, ignored = [], [], [], [], []
1168 1168 removed, deleted, clean = [], [], []
1169 1169
1170 1170 dmap = self._map
1171 1171 ladd = lookup.append # aka "unsure"
1172 1172 madd = modified.append
1173 1173 aadd = added.append
1174 1174 uadd = unknown.append
1175 1175 iadd = ignored.append
1176 1176 radd = removed.append
1177 1177 dadd = deleted.append
1178 1178 cadd = clean.append
1179 1179 mexact = match.exact
1180 1180 dirignore = self._dirignore
1181 1181 checkexec = self._checkexec
1182 1182 copymap = self._copymap
1183 1183 lastnormaltime = self._lastnormaltime
1184 1184
1185 1185 # We need to do full walks when either
1186 1186 # - we're listing all clean files, or
1187 1187 # - match.traversedir does something, because match.traversedir should
1188 1188 # be called for every dir in the working dir
1189 1189 full = listclean or match.traversedir is not None
1190 1190 for fn, st in self.walk(match, subrepos, listunknown, listignored,
1191 1191 full=full).iteritems():
1192 1192 if fn not in dmap:
1193 1193 if (listignored or mexact(fn)) and dirignore(fn):
1194 1194 if listignored:
1195 1195 iadd(fn)
1196 1196 else:
1197 1197 uadd(fn)
1198 1198 continue
1199 1199
1200 1200 # This is equivalent to 'state, mode, size, time = dmap[fn]' but not
1201 1201 # written like that for performance reasons. dmap[fn] is not a
1202 1202 # Python tuple in compiled builds. The CPython UNPACK_SEQUENCE
1203 1203 # opcode has fast paths when the value to be unpacked is a tuple or
1204 1204 # a list, but falls back to creating a full-fledged iterator in
1205 1205 # general. That is much slower than simply accessing and storing the
1206 1206 # tuple members one by one.
1207 1207 t = dmap[fn]
1208 1208 state = t[0]
1209 1209 mode = t[1]
1210 1210 size = t[2]
1211 1211 time = t[3]
1212 1212
1213 1213 if not st and state in "nma":
1214 1214 dadd(fn)
1215 1215 elif state == 'n':
1216 1216 if (size >= 0 and
1217 1217 ((size != st.st_size and size != st.st_size & _rangemask)
1218 1218 or ((mode ^ st.st_mode) & 0o100 and checkexec))
1219 1219 or size == -2 # other parent
1220 1220 or fn in copymap):
1221 1221 madd(fn)
1222 1222 elif time != st.st_mtime and time != st.st_mtime & _rangemask:
1223 1223 ladd(fn)
1224 1224 elif st.st_mtime == lastnormaltime:
1225 1225 # fn may have just been marked as normal and it may have
1226 1226 # changed in the same second without changing its size.
1227 1227 # This can happen if we quickly do multiple commits.
1228 1228 # Force lookup, so we don't miss such a racy file change.
1229 1229 ladd(fn)
1230 1230 elif listclean:
1231 1231 cadd(fn)
1232 1232 elif state == 'm':
1233 1233 madd(fn)
1234 1234 elif state == 'a':
1235 1235 aadd(fn)
1236 1236 elif state == 'r':
1237 1237 radd(fn)
1238 1238
1239 1239 return (lookup, scmutil.status(modified, added, removed, deleted,
1240 1240 unknown, ignored, clean))
1241 1241
1242 1242 def matches(self, match):
1243 1243 '''
1244 1244 return files in the dirstate (in whatever state) filtered by match
1245 1245 '''
1246 1246 dmap = self._map
1247 1247 if match.always():
1248 1248 return dmap.keys()
1249 1249 files = match.files()
1250 1250 if match.isexact():
1251 1251 # fast path -- filter the other way around, since typically files is
1252 1252 # much smaller than dmap
1253 1253 return [f for f in files if f in dmap]
1254 1254 if match.prefix() and all(fn in dmap for fn in files):
1255 1255 # fast path -- all the values are known to be files, so just return
1256 1256 # that
1257 1257 return list(files)
1258 1258 return [f for f in dmap if match(f)]
1259 1259
1260 1260 def _actualfilename(self, tr):
1261 1261 if tr:
1262 1262 return self._pendingfilename
1263 1263 else:
1264 1264 return self._filename
1265 1265
1266 1266 def savebackup(self, tr, suffix='', prefix=''):
1267 1267 '''Save current dirstate into backup file with suffix'''
1268 1268 assert len(suffix) > 0 or len(prefix) > 0
1269 1269 filename = self._actualfilename(tr)
1270 1270
1271 1271 # use '_writedirstate' instead of 'write' to write changes certainly,
1272 1272 # because the latter omits writing out if transaction is running.
1273 1273 # output file will be used to create backup of dirstate at this point.
1274 1274 if self._dirty or not self._opener.exists(filename):
1275 1275 self._writedirstate(self._opener(filename, "w", atomictemp=True,
1276 1276 checkambig=True))
1277 1277
1278 1278 if tr:
1279 1279 # ensure that subsequent tr.writepending returns True for
1280 1280 # changes written out above, even if dirstate is never
1281 1281 # changed after this
1282 1282 tr.addfilegenerator('dirstate', (self._filename,),
1283 1283 self._writedirstate, location='plain')
1284 1284
1285 1285 # ensure that pending file written above is unlinked at
1286 1286 # failure, even if tr.writepending isn't invoked until the
1287 1287 # end of this transaction
1288 1288 tr.registertmp(filename, location='plain')
1289 1289
1290 1290 backupname = prefix + self._filename + suffix
1291 1291 assert backupname != filename
1292 1292 self._opener.tryunlink(backupname)
1293 1293 # hardlink backup is okay because _writedirstate is always called
1294 1294 # with an "atomictemp=True" file.
1295 1295 util.copyfile(self._opener.join(filename),
1296 1296 self._opener.join(backupname), hardlink=True)
1297 1297
1298 1298 def restorebackup(self, tr, suffix='', prefix=''):
1299 1299 '''Restore dirstate by backup file with suffix'''
1300 1300 assert len(suffix) > 0 or len(prefix) > 0
1301 1301 # this "invalidate()" prevents "wlock.release()" from writing
1302 1302 # changes of dirstate out after restoring from backup file
1303 1303 self.invalidate()
1304 1304 filename = self._actualfilename(tr)
1305 1305 # using self._filename to avoid having "pending" in the backup filename
1306 1306 self._opener.rename(prefix + self._filename + suffix, filename,
1307 1307 checkambig=True)
1308 1308
1309 1309 def clearbackup(self, tr, suffix='', prefix=''):
1310 1310 '''Clear backup file with suffix'''
1311 1311 assert len(suffix) > 0 or len(prefix) > 0
1312 1312 # using self._filename to avoid having "pending" in the backup filename
1313 1313 self._opener.unlink(prefix + self._filename + suffix)
@@ -1,974 +1,996 b''
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import copy
11 11 import os
12 12 import re
13 13
14 14 from .i18n import _
15 15 from . import (
16 16 error,
17 17 pathutil,
18 18 util,
19 19 )
20 20
21 21 propertycache = util.propertycache
22 22
23 23 def _rematcher(regex):
24 24 '''compile the regexp with the best available regexp engine and return a
25 25 matcher function'''
26 26 m = util.re.compile(regex)
27 27 try:
28 28 # slightly faster, provided by facebook's re2 bindings
29 29 return m.test_match
30 30 except AttributeError:
31 31 return m.match
32 32
33 33 def _expandsets(kindpats, ctx, listsubrepos):
34 34 '''Returns the kindpats list with the 'set' patterns expanded.'''
35 35 fset = set()
36 36 other = []
37 37
38 38 for kind, pat, source in kindpats:
39 39 if kind == 'set':
40 40 if not ctx:
41 41 raise error.ProgrammingError("fileset expression with no "
42 42 "context")
43 43 s = ctx.getfileset(pat)
44 44 fset.update(s)
45 45
46 46 if listsubrepos:
47 47 for subpath in ctx.substate:
48 48 s = ctx.sub(subpath).getfileset(pat)
49 49 fset.update(subpath + '/' + f for f in s)
50 50
51 51 continue
52 52 other.append((kind, pat, source))
53 53 return fset, other
54 54
55 55 def _expandsubinclude(kindpats, root):
56 56 '''Returns the list of subinclude matcher args and the kindpats without the
57 57 subincludes in it.'''
58 58 relmatchers = []
59 59 other = []
60 60
61 61 for kind, pat, source in kindpats:
62 62 if kind == 'subinclude':
63 63 sourceroot = pathutil.dirname(util.normpath(source))
64 64 pat = util.pconvert(pat)
65 65 path = pathutil.join(sourceroot, pat)
66 66
67 67 newroot = pathutil.dirname(path)
68 68 matcherargs = (newroot, '', [], ['include:%s' % path])
69 69
70 70 prefix = pathutil.canonpath(root, root, newroot)
71 71 if prefix:
72 72 prefix += '/'
73 73 relmatchers.append((prefix, matcherargs))
74 74 else:
75 75 other.append((kind, pat, source))
76 76
77 77 return relmatchers, other
78 78
79 79 def _kindpatsalwaysmatch(kindpats):
80 80 """"Checks whether the kindspats match everything, as e.g.
81 81 'relpath:.' does.
82 82 """
83 83 for kind, pat, source in kindpats:
84 84 if pat != '' or kind not in ['relpath', 'glob']:
85 85 return False
86 86 return True
87 87
88 88 def match(root, cwd, patterns, include=None, exclude=None, default='glob',
89 89 exact=False, auditor=None, ctx=None, listsubrepos=False, warn=None,
90 90 badfn=None, icasefs=False):
91 91 """build an object to match a set of file patterns
92 92
93 93 arguments:
94 94 root - the canonical root of the tree you're matching against
95 95 cwd - the current working directory, if relevant
96 96 patterns - patterns to find
97 97 include - patterns to include (unless they are excluded)
98 98 exclude - patterns to exclude (even if they are included)
99 99 default - if a pattern in patterns has no explicit type, assume this one
100 100 exact - patterns are actually filenames (include/exclude still apply)
101 101 warn - optional function used for printing warnings
102 102 badfn - optional bad() callback for this matcher instead of the default
103 103 icasefs - make a matcher for wdir on case insensitive filesystems, which
104 104 normalizes the given patterns to the case in the filesystem
105 105
106 106 a pattern is one of:
107 107 'glob:<glob>' - a glob relative to cwd
108 108 're:<regexp>' - a regular expression
109 109 'path:<path>' - a path relative to repository root, which is matched
110 110 recursively
111 111 'rootfilesin:<path>' - a path relative to repository root, which is
112 112 matched non-recursively (will not match subdirectories)
113 113 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
114 114 'relpath:<path>' - a path relative to cwd
115 115 'relre:<regexp>' - a regexp that needn't match the start of a name
116 116 'set:<fileset>' - a fileset expression
117 117 'include:<path>' - a file of patterns to read and include
118 118 'subinclude:<path>' - a file of patterns to match against files under
119 119 the same directory
120 120 '<something>' - a pattern of the specified default type
121 121 """
122 122 normalize = _donormalize
123 123 if icasefs:
124 124 if exact:
125 125 raise error.ProgrammingError("a case-insensitive exact matcher "
126 126 "doesn't make sense")
127 127 dirstate = ctx.repo().dirstate
128 128 dsnormalize = dirstate.normalize
129 129
130 130 def normalize(patterns, default, root, cwd, auditor, warn):
131 131 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
132 132 kindpats = []
133 133 for kind, pats, source in kp:
134 134 if kind not in ('re', 'relre'): # regex can't be normalized
135 135 p = pats
136 136 pats = dsnormalize(pats)
137 137
138 138 # Preserve the original to handle a case only rename.
139 139 if p != pats and p in dirstate:
140 140 kindpats.append((kind, p, source))
141 141
142 142 kindpats.append((kind, pats, source))
143 143 return kindpats
144 144
145 145 if exact:
146 146 m = exactmatcher(root, cwd, patterns, badfn)
147 147 elif patterns:
148 148 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
149 149 if _kindpatsalwaysmatch(kindpats):
150 150 m = alwaysmatcher(root, cwd, badfn, relativeuipath=True)
151 151 else:
152 152 m = patternmatcher(root, cwd, kindpats, ctx=ctx,
153 153 listsubrepos=listsubrepos, badfn=badfn)
154 154 else:
155 155 # It's a little strange that no patterns means to match everything.
156 156 # Consider changing this to match nothing (probably adding a
157 157 # "nevermatcher").
158 158 m = alwaysmatcher(root, cwd, badfn)
159 159
160 160 if include:
161 161 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
162 162 im = includematcher(root, cwd, kindpats, ctx=ctx,
163 163 listsubrepos=listsubrepos, badfn=None)
164 164 m = intersectmatchers(m, im)
165 165 if exclude:
166 166 kindpats = normalize(exclude, 'glob', root, cwd, auditor, warn)
167 167 em = includematcher(root, cwd, kindpats, ctx=ctx,
168 168 listsubrepos=listsubrepos, badfn=None)
169 169 m = differencematcher(m, em)
170 170 return m
171 171
172 172 def exact(root, cwd, files, badfn=None):
173 173 return exactmatcher(root, cwd, files, badfn=badfn)
174 174
175 175 def always(root, cwd):
176 176 return alwaysmatcher(root, cwd)
177 177
178 def never(root, cwd):
179 return nevermatcher(root, cwd)
180
178 181 def badmatch(match, badfn):
179 182 """Make a copy of the given matcher, replacing its bad method with the given
180 183 one.
181 184 """
182 185 m = copy.copy(match)
183 186 m.bad = badfn
184 187 return m
185 188
186 189 def _donormalize(patterns, default, root, cwd, auditor, warn):
187 190 '''Convert 'kind:pat' from the patterns list to tuples with kind and
188 191 normalized and rooted patterns and with listfiles expanded.'''
189 192 kindpats = []
190 193 for kind, pat in [_patsplit(p, default) for p in patterns]:
191 194 if kind in ('glob', 'relpath'):
192 195 pat = pathutil.canonpath(root, cwd, pat, auditor)
193 196 elif kind in ('relglob', 'path', 'rootfilesin'):
194 197 pat = util.normpath(pat)
195 198 elif kind in ('listfile', 'listfile0'):
196 199 try:
197 200 files = util.readfile(pat)
198 201 if kind == 'listfile0':
199 202 files = files.split('\0')
200 203 else:
201 204 files = files.splitlines()
202 205 files = [f for f in files if f]
203 206 except EnvironmentError:
204 207 raise error.Abort(_("unable to read file list (%s)") % pat)
205 208 for k, p, source in _donormalize(files, default, root, cwd,
206 209 auditor, warn):
207 210 kindpats.append((k, p, pat))
208 211 continue
209 212 elif kind == 'include':
210 213 try:
211 214 fullpath = os.path.join(root, util.localpath(pat))
212 215 includepats = readpatternfile(fullpath, warn)
213 216 for k, p, source in _donormalize(includepats, default,
214 217 root, cwd, auditor, warn):
215 218 kindpats.append((k, p, source or pat))
216 219 except error.Abort as inst:
217 220 raise error.Abort('%s: %s' % (pat, inst[0]))
218 221 except IOError as inst:
219 222 if warn:
220 223 warn(_("skipping unreadable pattern file '%s': %s\n") %
221 224 (pat, inst.strerror))
222 225 continue
223 226 # else: re or relre - which cannot be normalized
224 227 kindpats.append((kind, pat, ''))
225 228 return kindpats
226 229
227 230 class basematcher(object):
228 231
229 232 def __init__(self, root, cwd, badfn=None, relativeuipath=True):
230 233 self._root = root
231 234 self._cwd = cwd
232 235 if badfn is not None:
233 236 self.bad = badfn
234 237 self._relativeuipath = relativeuipath
235 238
236 239 def __call__(self, fn):
237 240 return self.matchfn(fn)
238 241 def __iter__(self):
239 242 for f in self._files:
240 243 yield f
241 244 # Callbacks related to how the matcher is used by dirstate.walk.
242 245 # Subscribers to these events must monkeypatch the matcher object.
243 246 def bad(self, f, msg):
244 247 '''Callback from dirstate.walk for each explicit file that can't be
245 248 found/accessed, with an error message.'''
246 249 pass
247 250
248 251 # If an explicitdir is set, it will be called when an explicitly listed
249 252 # directory is visited.
250 253 explicitdir = None
251 254
252 255 # If an traversedir is set, it will be called when a directory discovered
253 256 # by recursive traversal is visited.
254 257 traversedir = None
255 258
256 259 def abs(self, f):
257 260 '''Convert a repo path back to path that is relative to the root of the
258 261 matcher.'''
259 262 return f
260 263
261 264 def rel(self, f):
262 265 '''Convert repo path back to path that is relative to cwd of matcher.'''
263 266 return util.pathto(self._root, self._cwd, f)
264 267
265 268 def uipath(self, f):
266 269 '''Convert repo path to a display path. If patterns or -I/-X were used
267 270 to create this matcher, the display path will be relative to cwd.
268 271 Otherwise it is relative to the root of the repo.'''
269 272 return (self._relativeuipath and self.rel(f)) or self.abs(f)
270 273
271 274 @propertycache
272 275 def _files(self):
273 276 return []
274 277
275 278 def files(self):
276 279 '''Explicitly listed files or patterns or roots:
277 280 if no patterns or .always(): empty list,
278 281 if exact: list exact files,
279 282 if not .anypats(): list all files and dirs,
280 283 else: optimal roots'''
281 284 return self._files
282 285
283 286 @propertycache
284 287 def _fileset(self):
285 288 return set(self._files)
286 289
287 290 def exact(self, f):
288 291 '''Returns True if f is in .files().'''
289 292 return f in self._fileset
290 293
291 294 def matchfn(self, f):
292 295 return False
293 296
294 297 def visitdir(self, dir):
295 298 '''Decides whether a directory should be visited based on whether it
296 299 has potential matches in it or one of its subdirectories. This is
297 300 based on the match's primary, included, and excluded patterns.
298 301
299 302 Returns the string 'all' if the given directory and all subdirectories
300 303 should be visited. Otherwise returns True or False indicating whether
301 304 the given directory should be visited.
302 305
303 306 This function's behavior is undefined if it has returned False for
304 307 one of the dir's parent directories.
305 308 '''
306 309 return False
307 310
308 311 def anypats(self):
309 312 '''Matcher uses patterns or include/exclude.'''
310 313 return False
311 314
312 315 def always(self):
313 316 '''Matcher will match everything and .files() will be empty
314 317 - optimization might be possible and necessary.'''
315 318 return False
316 319
317 320 def isexact(self):
318 321 return False
319 322
320 323 def prefix(self):
321 324 return not self.always() and not self.isexact() and not self.anypats()
322 325
323 326 class alwaysmatcher(basematcher):
324 327 '''Matches everything.'''
325 328
326 329 def __init__(self, root, cwd, badfn=None, relativeuipath=False):
327 330 super(alwaysmatcher, self).__init__(root, cwd, badfn,
328 331 relativeuipath=relativeuipath)
329 332
330 333 def always(self):
331 334 return True
332 335
333 336 def matchfn(self, f):
334 337 return True
335 338
336 339 def visitdir(self, dir):
337 340 return 'all'
338 341
339 342 def __repr__(self):
340 343 return '<alwaysmatcher>'
341 344
345 class nevermatcher(basematcher):
346 '''Matches nothing.'''
347
348 def __init__(self, root, cwd, badfn=None, relativeuipath=False):
349 super(nevermatcher, self).__init__(root, cwd, badfn,
350 relativeuipath=relativeuipath)
351
352 def always(self):
353 return False
354
355 def matchfn(self, f):
356 return False
357
358 def visitdir(self, dir):
359 return False
360
361 def __repr__(self):
362 return '<nevermatcher>'
363
342 364 class patternmatcher(basematcher):
343 365
344 366 def __init__(self, root, cwd, kindpats, ctx=None, listsubrepos=False,
345 367 badfn=None):
346 368 super(patternmatcher, self).__init__(root, cwd, badfn)
347 369
348 370 self._files = _explicitfiles(kindpats)
349 371 self._anypats = _anypats(kindpats)
350 372 self.patternspat, pm = _buildmatch(ctx, kindpats, '$', listsubrepos,
351 373 root)
352 374 self.matchfn = pm
353 375
354 376 @propertycache
355 377 def _dirs(self):
356 378 return set(util.dirs(self._fileset)) | {'.'}
357 379
358 380 def visitdir(self, dir):
359 381 if self.prefix() and dir in self._fileset:
360 382 return 'all'
361 383 return ('.' in self._fileset or
362 384 dir in self._fileset or
363 385 dir in self._dirs or
364 386 any(parentdir in self._fileset
365 387 for parentdir in util.finddirs(dir)))
366 388
367 389 def anypats(self):
368 390 return self._anypats
369 391
370 392 def __repr__(self):
371 393 return ('<patternmatcher patterns=%r>' % self.patternspat)
372 394
373 395 class includematcher(basematcher):
374 396
375 397 def __init__(self, root, cwd, kindpats, ctx=None, listsubrepos=False,
376 398 badfn=None):
377 399 super(includematcher, self).__init__(root, cwd, badfn)
378 400
379 401 self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)',
380 402 listsubrepos, root)
381 403 self._anypats = _anypats(kindpats)
382 404 roots, dirs = _rootsanddirs(kindpats)
383 405 # roots are directories which are recursively included.
384 406 self._roots = set(roots)
385 407 # dirs are directories which are non-recursively included.
386 408 self._dirs = set(dirs)
387 409 self.matchfn = im
388 410
389 411 def visitdir(self, dir):
390 412 if not self._anypats and dir in self._roots:
391 413 # The condition above is essentially self.prefix() for includes
392 414 return 'all'
393 415 return ('.' in self._roots or
394 416 dir in self._roots or
395 417 dir in self._dirs or
396 418 any(parentdir in self._roots
397 419 for parentdir in util.finddirs(dir)))
398 420
399 421 def anypats(self):
400 422 return True
401 423
402 424 def __repr__(self):
403 425 return ('<includematcher includes=%r>' % self.includepat)
404 426
405 427 class exactmatcher(basematcher):
406 428 '''Matches the input files exactly. They are interpreted as paths, not
407 429 patterns (so no kind-prefixes).
408 430 '''
409 431
410 432 def __init__(self, root, cwd, files, badfn=None):
411 433 super(exactmatcher, self).__init__(root, cwd, badfn)
412 434
413 435 if isinstance(files, list):
414 436 self._files = files
415 437 else:
416 438 self._files = list(files)
417 439
418 440 matchfn = basematcher.exact
419 441
420 442 @propertycache
421 443 def _dirs(self):
422 444 return set(util.dirs(self._fileset)) | {'.'}
423 445
424 446 def visitdir(self, dir):
425 447 return dir in self._dirs
426 448
427 449 def isexact(self):
428 450 return True
429 451
430 452 def __repr__(self):
431 453 return ('<exactmatcher files=%r>' % self._files)
432 454
433 455 class differencematcher(basematcher):
434 456 '''Composes two matchers by matching if the first matches and the second
435 457 does not. Well, almost... If the user provides a pattern like "-X foo foo",
436 458 Mercurial actually does match "foo" against that. That's because exact
437 459 matches are treated specially. So, since this differencematcher is used for
438 460 excludes, it needs to special-case exact matching.
439 461
440 462 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
441 463 traversedir) are ignored.
442 464
443 465 TODO: If we want to keep the behavior described above for exact matches, we
444 466 should consider instead treating the above case something like this:
445 467 union(exact(foo), difference(pattern(foo), include(foo)))
446 468 '''
447 469 def __init__(self, m1, m2):
448 470 super(differencematcher, self).__init__(m1._root, m1._cwd)
449 471 self._m1 = m1
450 472 self._m2 = m2
451 473 self.bad = m1.bad
452 474 self.explicitdir = m1.explicitdir
453 475 self.traversedir = m1.traversedir
454 476
455 477 def matchfn(self, f):
456 478 return self._m1(f) and (not self._m2(f) or self._m1.exact(f))
457 479
458 480 @propertycache
459 481 def _files(self):
460 482 if self.isexact():
461 483 return [f for f in self._m1.files() if self(f)]
462 484 # If m1 is not an exact matcher, we can't easily figure out the set of
463 485 # files, because its files() are not always files. For example, if
464 486 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
465 487 # want to remove "dir" from the set even though it would match m2,
466 488 # because the "dir" in m1 may not be a file.
467 489 return self._m1.files()
468 490
469 491 def visitdir(self, dir):
470 492 if self._m2.visitdir(dir) == 'all':
471 493 # There's a bug here: If m1 matches file 'dir/file' and m2 excludes
472 494 # 'dir' (recursively), we should still visit 'dir' due to the
473 495 # exception we have for exact matches.
474 496 return False
475 497 return bool(self._m1.visitdir(dir))
476 498
477 499 def isexact(self):
478 500 return self._m1.isexact()
479 501
480 502 def anypats(self):
481 503 return self._m1.anypats() or self._m2.anypats()
482 504
483 505 def __repr__(self):
484 506 return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2))
485 507
486 508 def intersectmatchers(m1, m2):
487 509 '''Composes two matchers by matching if both of them match.
488 510
489 511 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
490 512 traversedir) are ignored.
491 513 '''
492 514 if m1 is None or m2 is None:
493 515 return m1 or m2
494 516 if m1.always():
495 517 m = copy.copy(m2)
496 518 # TODO: Consider encapsulating these things in a class so there's only
497 519 # one thing to copy from m1.
498 520 m.bad = m1.bad
499 521 m.explicitdir = m1.explicitdir
500 522 m.traversedir = m1.traversedir
501 523 m.abs = m1.abs
502 524 m.rel = m1.rel
503 525 m._relativeuipath |= m1._relativeuipath
504 526 return m
505 527 if m2.always():
506 528 m = copy.copy(m1)
507 529 m._relativeuipath |= m2._relativeuipath
508 530 return m
509 531 return intersectionmatcher(m1, m2)
510 532
511 533 class intersectionmatcher(basematcher):
512 534 def __init__(self, m1, m2):
513 535 super(intersectionmatcher, self).__init__(m1._root, m1._cwd)
514 536 self._m1 = m1
515 537 self._m2 = m2
516 538 self.bad = m1.bad
517 539 self.explicitdir = m1.explicitdir
518 540 self.traversedir = m1.traversedir
519 541
520 542 @propertycache
521 543 def _files(self):
522 544 if self.isexact():
523 545 m1, m2 = self._m1, self._m2
524 546 if not m1.isexact():
525 547 m1, m2 = m2, m1
526 548 return [f for f in m1.files() if m2(f)]
527 549 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
528 550 # the set of files, because their files() are not always files. For
529 551 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
530 552 # "path:dir2", we don't want to remove "dir2" from the set.
531 553 return self._m1.files() + self._m2.files()
532 554
533 555 def matchfn(self, f):
534 556 return self._m1(f) and self._m2(f)
535 557
536 558 def visitdir(self, dir):
537 559 visit1 = self._m1.visitdir(dir)
538 560 if visit1 == 'all':
539 561 return self._m2.visitdir(dir)
540 562 # bool() because visit1=True + visit2='all' should not be 'all'
541 563 return bool(visit1 and self._m2.visitdir(dir))
542 564
543 565 def always(self):
544 566 return self._m1.always() and self._m2.always()
545 567
546 568 def isexact(self):
547 569 return self._m1.isexact() or self._m2.isexact()
548 570
549 571 def anypats(self):
550 572 return self._m1.anypats() or self._m2.anypats()
551 573
552 574 def __repr__(self):
553 575 return ('<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2))
554 576
555 577 class subdirmatcher(basematcher):
556 578 """Adapt a matcher to work on a subdirectory only.
557 579
558 580 The paths are remapped to remove/insert the path as needed:
559 581
560 582 >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
561 583 >>> m2 = subdirmatcher('sub', m1)
562 584 >>> bool(m2('a.txt'))
563 585 False
564 586 >>> bool(m2('b.txt'))
565 587 True
566 588 >>> bool(m2.matchfn('a.txt'))
567 589 False
568 590 >>> bool(m2.matchfn('b.txt'))
569 591 True
570 592 >>> m2.files()
571 593 ['b.txt']
572 594 >>> m2.exact('b.txt')
573 595 True
574 596 >>> util.pconvert(m2.rel('b.txt'))
575 597 'sub/b.txt'
576 598 >>> def bad(f, msg):
577 599 ... print "%s: %s" % (f, msg)
578 600 >>> m1.bad = bad
579 601 >>> m2.bad('x.txt', 'No such file')
580 602 sub/x.txt: No such file
581 603 >>> m2.abs('c.txt')
582 604 'sub/c.txt'
583 605 """
584 606
585 607 def __init__(self, path, matcher):
586 608 super(subdirmatcher, self).__init__(matcher._root, matcher._cwd)
587 609 self._path = path
588 610 self._matcher = matcher
589 611 self._always = matcher.always()
590 612
591 613 self._files = [f[len(path) + 1:] for f in matcher._files
592 614 if f.startswith(path + "/")]
593 615
594 616 # If the parent repo had a path to this subrepo and the matcher is
595 617 # a prefix matcher, this submatcher always matches.
596 618 if matcher.prefix():
597 619 self._always = any(f == path for f in matcher._files)
598 620
599 621 def bad(self, f, msg):
600 622 self._matcher.bad(self._path + "/" + f, msg)
601 623
602 624 def abs(self, f):
603 625 return self._matcher.abs(self._path + "/" + f)
604 626
605 627 def rel(self, f):
606 628 return self._matcher.rel(self._path + "/" + f)
607 629
608 630 def uipath(self, f):
609 631 return self._matcher.uipath(self._path + "/" + f)
610 632
611 633 def matchfn(self, f):
612 634 # Some information is lost in the superclass's constructor, so we
613 635 # can not accurately create the matching function for the subdirectory
614 636 # from the inputs. Instead, we override matchfn() and visitdir() to
615 637 # call the original matcher with the subdirectory path prepended.
616 638 return self._matcher.matchfn(self._path + "/" + f)
617 639
618 640 def visitdir(self, dir):
619 641 if dir == '.':
620 642 dir = self._path
621 643 else:
622 644 dir = self._path + "/" + dir
623 645 return self._matcher.visitdir(dir)
624 646
625 647 def always(self):
626 648 return self._always
627 649
628 650 def anypats(self):
629 651 return self._matcher.anypats()
630 652
631 653 def __repr__(self):
632 654 return ('<subdirmatcher path=%r, matcher=%r>' %
633 655 (self._path, self._matcher))
634 656
635 657 def patkind(pattern, default=None):
636 658 '''If pattern is 'kind:pat' with a known kind, return kind.'''
637 659 return _patsplit(pattern, default)[0]
638 660
639 661 def _patsplit(pattern, default):
640 662 """Split a string into the optional pattern kind prefix and the actual
641 663 pattern."""
642 664 if ':' in pattern:
643 665 kind, pat = pattern.split(':', 1)
644 666 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
645 667 'listfile', 'listfile0', 'set', 'include', 'subinclude',
646 668 'rootfilesin'):
647 669 return kind, pat
648 670 return default, pattern
649 671
650 672 def _globre(pat):
651 673 r'''Convert an extended glob string to a regexp string.
652 674
653 675 >>> print _globre(r'?')
654 676 .
655 677 >>> print _globre(r'*')
656 678 [^/]*
657 679 >>> print _globre(r'**')
658 680 .*
659 681 >>> print _globre(r'**/a')
660 682 (?:.*/)?a
661 683 >>> print _globre(r'a/**/b')
662 684 a\/(?:.*/)?b
663 685 >>> print _globre(r'[a*?!^][^b][!c]')
664 686 [a*?!^][\^b][^c]
665 687 >>> print _globre(r'{a,b}')
666 688 (?:a|b)
667 689 >>> print _globre(r'.\*\?')
668 690 \.\*\?
669 691 '''
670 692 i, n = 0, len(pat)
671 693 res = ''
672 694 group = 0
673 695 escape = util.re.escape
674 696 def peek():
675 697 return i < n and pat[i:i + 1]
676 698 while i < n:
677 699 c = pat[i:i + 1]
678 700 i += 1
679 701 if c not in '*?[{},\\':
680 702 res += escape(c)
681 703 elif c == '*':
682 704 if peek() == '*':
683 705 i += 1
684 706 if peek() == '/':
685 707 i += 1
686 708 res += '(?:.*/)?'
687 709 else:
688 710 res += '.*'
689 711 else:
690 712 res += '[^/]*'
691 713 elif c == '?':
692 714 res += '.'
693 715 elif c == '[':
694 716 j = i
695 717 if j < n and pat[j:j + 1] in '!]':
696 718 j += 1
697 719 while j < n and pat[j:j + 1] != ']':
698 720 j += 1
699 721 if j >= n:
700 722 res += '\\['
701 723 else:
702 724 stuff = pat[i:j].replace('\\','\\\\')
703 725 i = j + 1
704 726 if stuff[0:1] == '!':
705 727 stuff = '^' + stuff[1:]
706 728 elif stuff[0:1] == '^':
707 729 stuff = '\\' + stuff
708 730 res = '%s[%s]' % (res, stuff)
709 731 elif c == '{':
710 732 group += 1
711 733 res += '(?:'
712 734 elif c == '}' and group:
713 735 res += ')'
714 736 group -= 1
715 737 elif c == ',' and group:
716 738 res += '|'
717 739 elif c == '\\':
718 740 p = peek()
719 741 if p:
720 742 i += 1
721 743 res += escape(p)
722 744 else:
723 745 res += escape(c)
724 746 else:
725 747 res += escape(c)
726 748 return res
727 749
728 750 def _regex(kind, pat, globsuffix):
729 751 '''Convert a (normalized) pattern of any kind into a regular expression.
730 752 globsuffix is appended to the regexp of globs.'''
731 753 if not pat:
732 754 return ''
733 755 if kind == 're':
734 756 return pat
735 757 if kind == 'path':
736 758 if pat == '.':
737 759 return ''
738 760 return '^' + util.re.escape(pat) + '(?:/|$)'
739 761 if kind == 'rootfilesin':
740 762 if pat == '.':
741 763 escaped = ''
742 764 else:
743 765 # Pattern is a directory name.
744 766 escaped = util.re.escape(pat) + '/'
745 767 # Anything after the pattern must be a non-directory.
746 768 return '^' + escaped + '[^/]+$'
747 769 if kind == 'relglob':
748 770 return '(?:|.*/)' + _globre(pat) + globsuffix
749 771 if kind == 'relpath':
750 772 return util.re.escape(pat) + '(?:/|$)'
751 773 if kind == 'relre':
752 774 if pat.startswith('^'):
753 775 return pat
754 776 return '.*' + pat
755 777 return _globre(pat) + globsuffix
756 778
757 779 def _buildmatch(ctx, kindpats, globsuffix, listsubrepos, root):
758 780 '''Return regexp string and a matcher function for kindpats.
759 781 globsuffix is appended to the regexp of globs.'''
760 782 matchfuncs = []
761 783
762 784 subincludes, kindpats = _expandsubinclude(kindpats, root)
763 785 if subincludes:
764 786 submatchers = {}
765 787 def matchsubinclude(f):
766 788 for prefix, matcherargs in subincludes:
767 789 if f.startswith(prefix):
768 790 mf = submatchers.get(prefix)
769 791 if mf is None:
770 792 mf = match(*matcherargs)
771 793 submatchers[prefix] = mf
772 794
773 795 if mf(f[len(prefix):]):
774 796 return True
775 797 return False
776 798 matchfuncs.append(matchsubinclude)
777 799
778 800 fset, kindpats = _expandsets(kindpats, ctx, listsubrepos)
779 801 if fset:
780 802 matchfuncs.append(fset.__contains__)
781 803
782 804 regex = ''
783 805 if kindpats:
784 806 regex, mf = _buildregexmatch(kindpats, globsuffix)
785 807 matchfuncs.append(mf)
786 808
787 809 if len(matchfuncs) == 1:
788 810 return regex, matchfuncs[0]
789 811 else:
790 812 return regex, lambda f: any(mf(f) for mf in matchfuncs)
791 813
792 814 def _buildregexmatch(kindpats, globsuffix):
793 815 """Build a match function from a list of kinds and kindpats,
794 816 return regexp string and a matcher function."""
795 817 try:
796 818 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
797 819 for (k, p, s) in kindpats])
798 820 if len(regex) > 20000:
799 821 raise OverflowError
800 822 return regex, _rematcher(regex)
801 823 except OverflowError:
802 824 # We're using a Python with a tiny regex engine and we
803 825 # made it explode, so we'll divide the pattern list in two
804 826 # until it works
805 827 l = len(kindpats)
806 828 if l < 2:
807 829 raise
808 830 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
809 831 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
810 832 return regex, lambda s: a(s) or b(s)
811 833 except re.error:
812 834 for k, p, s in kindpats:
813 835 try:
814 836 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
815 837 except re.error:
816 838 if s:
817 839 raise error.Abort(_("%s: invalid pattern (%s): %s") %
818 840 (s, k, p))
819 841 else:
820 842 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
821 843 raise error.Abort(_("invalid pattern"))
822 844
823 845 def _patternrootsanddirs(kindpats):
824 846 '''Returns roots and directories corresponding to each pattern.
825 847
826 848 This calculates the roots and directories exactly matching the patterns and
827 849 returns a tuple of (roots, dirs) for each. It does not return other
828 850 directories which may also need to be considered, like the parent
829 851 directories.
830 852 '''
831 853 r = []
832 854 d = []
833 855 for kind, pat, source in kindpats:
834 856 if kind == 'glob': # find the non-glob prefix
835 857 root = []
836 858 for p in pat.split('/'):
837 859 if '[' in p or '{' in p or '*' in p or '?' in p:
838 860 break
839 861 root.append(p)
840 862 r.append('/'.join(root) or '.')
841 863 elif kind in ('relpath', 'path'):
842 864 r.append(pat or '.')
843 865 elif kind in ('rootfilesin',):
844 866 d.append(pat or '.')
845 867 else: # relglob, re, relre
846 868 r.append('.')
847 869 return r, d
848 870
849 871 def _roots(kindpats):
850 872 '''Returns root directories to match recursively from the given patterns.'''
851 873 roots, dirs = _patternrootsanddirs(kindpats)
852 874 return roots
853 875
854 876 def _rootsanddirs(kindpats):
855 877 '''Returns roots and exact directories from patterns.
856 878
857 879 roots are directories to match recursively, whereas exact directories should
858 880 be matched non-recursively. The returned (roots, dirs) tuple will also
859 881 include directories that need to be implicitly considered as either, such as
860 882 parent directories.
861 883
862 884 >>> _rootsanddirs(\
863 885 [('glob', 'g/h/*', ''), ('glob', 'g/h', ''), ('glob', 'g*', '')])
864 886 (['g/h', 'g/h', '.'], ['g', '.'])
865 887 >>> _rootsanddirs(\
866 888 [('rootfilesin', 'g/h', ''), ('rootfilesin', '', '')])
867 889 ([], ['g/h', '.', 'g', '.'])
868 890 >>> _rootsanddirs(\
869 891 [('relpath', 'r', ''), ('path', 'p/p', ''), ('path', '', '')])
870 892 (['r', 'p/p', '.'], ['p', '.'])
871 893 >>> _rootsanddirs(\
872 894 [('relglob', 'rg*', ''), ('re', 're/', ''), ('relre', 'rr', '')])
873 895 (['.', '.', '.'], ['.'])
874 896 '''
875 897 r, d = _patternrootsanddirs(kindpats)
876 898
877 899 # Append the parents as non-recursive/exact directories, since they must be
878 900 # scanned to get to either the roots or the other exact directories.
879 901 d.extend(util.dirs(d))
880 902 d.extend(util.dirs(r))
881 903 # util.dirs() does not include the root directory, so add it manually
882 904 d.append('.')
883 905
884 906 return r, d
885 907
886 908 def _explicitfiles(kindpats):
887 909 '''Returns the potential explicit filenames from the patterns.
888 910
889 911 >>> _explicitfiles([('path', 'foo/bar', '')])
890 912 ['foo/bar']
891 913 >>> _explicitfiles([('rootfilesin', 'foo/bar', '')])
892 914 []
893 915 '''
894 916 # Keep only the pattern kinds where one can specify filenames (vs only
895 917 # directory names).
896 918 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
897 919 return _roots(filable)
898 920
899 921 def _anypats(kindpats):
900 922 for kind, pat, source in kindpats:
901 923 if kind in ('glob', 're', 'relglob', 'relre', 'set', 'rootfilesin'):
902 924 return True
903 925
904 926 _commentre = None
905 927
906 928 def readpatternfile(filepath, warn, sourceinfo=False):
907 929 '''parse a pattern file, returning a list of
908 930 patterns. These patterns should be given to compile()
909 931 to be validated and converted into a match function.
910 932
911 933 trailing white space is dropped.
912 934 the escape character is backslash.
913 935 comments start with #.
914 936 empty lines are skipped.
915 937
916 938 lines can be of the following formats:
917 939
918 940 syntax: regexp # defaults following lines to non-rooted regexps
919 941 syntax: glob # defaults following lines to non-rooted globs
920 942 re:pattern # non-rooted regular expression
921 943 glob:pattern # non-rooted glob
922 944 pattern # pattern of the current default type
923 945
924 946 if sourceinfo is set, returns a list of tuples:
925 947 (pattern, lineno, originalline). This is useful to debug ignore patterns.
926 948 '''
927 949
928 950 syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:',
929 951 'include': 'include', 'subinclude': 'subinclude'}
930 952 syntax = 'relre:'
931 953 patterns = []
932 954
933 955 fp = open(filepath, 'rb')
934 956 for lineno, line in enumerate(util.iterfile(fp), start=1):
935 957 if "#" in line:
936 958 global _commentre
937 959 if not _commentre:
938 960 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
939 961 # remove comments prefixed by an even number of escapes
940 962 m = _commentre.search(line)
941 963 if m:
942 964 line = line[:m.end(1)]
943 965 # fixup properly escaped comments that survived the above
944 966 line = line.replace("\\#", "#")
945 967 line = line.rstrip()
946 968 if not line:
947 969 continue
948 970
949 971 if line.startswith('syntax:'):
950 972 s = line[7:].strip()
951 973 try:
952 974 syntax = syntaxes[s]
953 975 except KeyError:
954 976 if warn:
955 977 warn(_("%s: ignoring invalid syntax '%s'\n") %
956 978 (filepath, s))
957 979 continue
958 980
959 981 linesyntax = syntax
960 982 for s, rels in syntaxes.iteritems():
961 983 if line.startswith(rels):
962 984 linesyntax = rels
963 985 line = line[len(rels):]
964 986 break
965 987 elif line.startswith(s+':'):
966 988 linesyntax = rels
967 989 line = line[len(s) + 1:]
968 990 break
969 991 if sourceinfo:
970 992 patterns.append((linesyntax + line, lineno, line))
971 993 else:
972 994 patterns.append(linesyntax + line)
973 995 fp.close()
974 996 return patterns
@@ -1,301 +1,305 b''
1 1 $ hg init ignorerepo
2 2 $ cd ignorerepo
3 3
4 debugignore with no hgignore should be deterministic:
5 $ hg debugignore
6 <nevermatcher>
7
4 8 Issue562: .hgignore requires newline at end:
5 9
6 10 $ touch foo
7 11 $ touch bar
8 12 $ touch baz
9 13 $ cat > makeignore.py <<EOF
10 14 > f = open(".hgignore", "w")
11 15 > f.write("ignore\n")
12 16 > f.write("foo\n")
13 17 > # No EOL here
14 18 > f.write("bar")
15 19 > f.close()
16 20 > EOF
17 21
18 22 $ python makeignore.py
19 23
20 24 Should display baz only:
21 25
22 26 $ hg status
23 27 ? baz
24 28
25 29 $ rm foo bar baz .hgignore makeignore.py
26 30
27 31 $ touch a.o
28 32 $ touch a.c
29 33 $ touch syntax
30 34 $ mkdir dir
31 35 $ touch dir/a.o
32 36 $ touch dir/b.o
33 37 $ touch dir/c.o
34 38
35 39 $ hg add dir/a.o
36 40 $ hg commit -m 0
37 41 $ hg add dir/b.o
38 42
39 43 $ hg status
40 44 A dir/b.o
41 45 ? a.c
42 46 ? a.o
43 47 ? dir/c.o
44 48 ? syntax
45 49
46 50 $ echo "*.o" > .hgignore
47 51 $ hg status
48 52 abort: $TESTTMP/ignorerepo/.hgignore: invalid pattern (relre): *.o (glob)
49 53 [255]
50 54
51 55 $ echo ".*\.o" > .hgignore
52 56 $ hg status
53 57 A dir/b.o
54 58 ? .hgignore
55 59 ? a.c
56 60 ? syntax
57 61
58 62 Ensure that comments work:
59 63
60 64 $ touch 'foo#bar' 'quux#'
61 65 #if no-windows
62 66 $ touch 'baz\#wat'
63 67 #endif
64 68 $ cat <<'EOF' >> .hgignore
65 69 > # full-line comment
66 70 > # whitespace-only comment line
67 71 > syntax# pattern, no whitespace, then comment
68 72 > a.c # pattern, then whitespace, then comment
69 73 > baz\\# # escaped comment character
70 74 > foo\#b # escaped comment character
71 75 > quux\## escaped comment character at end of name
72 76 > EOF
73 77 $ hg status
74 78 A dir/b.o
75 79 ? .hgignore
76 80 $ rm 'foo#bar' 'quux#'
77 81 #if no-windows
78 82 $ rm 'baz\#wat'
79 83 #endif
80 84
81 85 Check it does not ignore the current directory '.':
82 86
83 87 $ echo "^\." > .hgignore
84 88 $ hg status
85 89 A dir/b.o
86 90 ? a.c
87 91 ? a.o
88 92 ? dir/c.o
89 93 ? syntax
90 94
91 95 Test that patterns from ui.ignore options are read:
92 96
93 97 $ echo > .hgignore
94 98 $ cat >> $HGRCPATH << EOF
95 99 > [ui]
96 100 > ignore.other = $TESTTMP/ignorerepo/.hg/testhgignore
97 101 > EOF
98 102 $ echo "glob:**.o" > .hg/testhgignore
99 103 $ hg status
100 104 A dir/b.o
101 105 ? .hgignore
102 106 ? a.c
103 107 ? syntax
104 108
105 109 empty out testhgignore
106 110 $ echo > .hg/testhgignore
107 111
108 112 Test relative ignore path (issue4473):
109 113
110 114 $ cat >> $HGRCPATH << EOF
111 115 > [ui]
112 116 > ignore.relative = .hg/testhgignorerel
113 117 > EOF
114 118 $ echo "glob:*.o" > .hg/testhgignorerel
115 119 $ cd dir
116 120 $ hg status
117 121 A dir/b.o
118 122 ? .hgignore
119 123 ? a.c
120 124 ? syntax
121 125
122 126 $ cd ..
123 127 $ echo > .hg/testhgignorerel
124 128 $ echo "syntax: glob" > .hgignore
125 129 $ echo "re:.*\.o" >> .hgignore
126 130 $ hg status
127 131 A dir/b.o
128 132 ? .hgignore
129 133 ? a.c
130 134 ? syntax
131 135
132 136 $ echo "syntax: invalid" > .hgignore
133 137 $ hg status
134 138 $TESTTMP/ignorerepo/.hgignore: ignoring invalid syntax 'invalid' (glob)
135 139 A dir/b.o
136 140 ? .hgignore
137 141 ? a.c
138 142 ? a.o
139 143 ? dir/c.o
140 144 ? syntax
141 145
142 146 $ echo "syntax: glob" > .hgignore
143 147 $ echo "*.o" >> .hgignore
144 148 $ hg status
145 149 A dir/b.o
146 150 ? .hgignore
147 151 ? a.c
148 152 ? syntax
149 153
150 154 $ echo "relglob:syntax*" > .hgignore
151 155 $ hg status
152 156 A dir/b.o
153 157 ? .hgignore
154 158 ? a.c
155 159 ? a.o
156 160 ? dir/c.o
157 161
158 162 $ echo "relglob:*" > .hgignore
159 163 $ hg status
160 164 A dir/b.o
161 165
162 166 $ cd dir
163 167 $ hg status .
164 168 A b.o
165 169
166 170 $ hg debugignore
167 171 <includematcher includes='(?:(?:|.*/)[^/]*(?:/|$))'>
168 172
169 173 $ hg debugignore b.o
170 174 b.o is ignored
171 175 (ignore rule in $TESTTMP/ignorerepo/.hgignore, line 1: '*') (glob)
172 176
173 177 $ cd ..
174 178
175 179 Check patterns that match only the directory
176 180
177 181 $ echo "^dir\$" > .hgignore
178 182 $ hg status
179 183 A dir/b.o
180 184 ? .hgignore
181 185 ? a.c
182 186 ? a.o
183 187 ? syntax
184 188
185 189 Check recursive glob pattern matches no directories (dir/**/c.o matches dir/c.o)
186 190
187 191 $ echo "syntax: glob" > .hgignore
188 192 $ echo "dir/**/c.o" >> .hgignore
189 193 $ touch dir/c.o
190 194 $ mkdir dir/subdir
191 195 $ touch dir/subdir/c.o
192 196 $ hg status
193 197 A dir/b.o
194 198 ? .hgignore
195 199 ? a.c
196 200 ? a.o
197 201 ? syntax
198 202 $ hg debugignore a.c
199 203 a.c is not ignored
200 204 $ hg debugignore dir/c.o
201 205 dir/c.o is ignored
202 206 (ignore rule in $TESTTMP/ignorerepo/.hgignore, line 2: 'dir/**/c.o') (glob)
203 207
204 208 Check using 'include:' in ignore file
205 209
206 210 $ hg purge --all --config extensions.purge=
207 211 $ touch foo.included
208 212
209 213 $ echo ".*.included" > otherignore
210 214 $ hg status -I "include:otherignore"
211 215 ? foo.included
212 216
213 217 $ echo "include:otherignore" >> .hgignore
214 218 $ hg status
215 219 A dir/b.o
216 220 ? .hgignore
217 221 ? otherignore
218 222
219 223 Check recursive uses of 'include:'
220 224
221 225 $ echo "include:nested/ignore" >> otherignore
222 226 $ mkdir nested
223 227 $ echo "glob:*ignore" > nested/ignore
224 228 $ hg status
225 229 A dir/b.o
226 230
227 231 $ cp otherignore goodignore
228 232 $ echo "include:badignore" >> otherignore
229 233 $ hg status
230 234 skipping unreadable pattern file 'badignore': No such file or directory
231 235 A dir/b.o
232 236
233 237 $ mv goodignore otherignore
234 238
235 239 Check using 'include:' while in a non-root directory
236 240
237 241 $ cd ..
238 242 $ hg -R ignorerepo status
239 243 A dir/b.o
240 244 $ cd ignorerepo
241 245
242 246 Check including subincludes
243 247
244 248 $ hg revert -q --all
245 249 $ hg purge --all --config extensions.purge=
246 250 $ echo ".hgignore" > .hgignore
247 251 $ mkdir dir1 dir2
248 252 $ touch dir1/file1 dir1/file2 dir2/file1 dir2/file2
249 253 $ echo "subinclude:dir2/.hgignore" >> .hgignore
250 254 $ echo "glob:file*2" > dir2/.hgignore
251 255 $ hg status
252 256 ? dir1/file1
253 257 ? dir1/file2
254 258 ? dir2/file1
255 259
256 260 Check including subincludes with regexs
257 261
258 262 $ echo "subinclude:dir1/.hgignore" >> .hgignore
259 263 $ echo "regexp:f.le1" > dir1/.hgignore
260 264
261 265 $ hg status
262 266 ? dir1/file2
263 267 ? dir2/file1
264 268
265 269 Check multiple levels of sub-ignores
266 270
267 271 $ mkdir dir1/subdir
268 272 $ touch dir1/subdir/subfile1 dir1/subdir/subfile3 dir1/subdir/subfile4
269 273 $ echo "subinclude:subdir/.hgignore" >> dir1/.hgignore
270 274 $ echo "glob:subfil*3" >> dir1/subdir/.hgignore
271 275
272 276 $ hg status
273 277 ? dir1/file2
274 278 ? dir1/subdir/subfile4
275 279 ? dir2/file1
276 280
277 281 Check include subignore at the same level
278 282
279 283 $ mv dir1/subdir/.hgignore dir1/.hgignoretwo
280 284 $ echo "regexp:f.le1" > dir1/.hgignore
281 285 $ echo "subinclude:.hgignoretwo" >> dir1/.hgignore
282 286 $ echo "glob:file*2" > dir1/.hgignoretwo
283 287
284 288 $ hg status | grep file2
285 289 [1]
286 290 $ hg debugignore dir1/file2
287 291 dir1/file2 is ignored
288 292 (ignore rule in dir2/.hgignore, line 1: 'file*2')
289 293
290 294 #if windows
291 295
292 296 Windows paths are accepted on input
293 297
294 298 $ rm dir1/.hgignore
295 299 $ echo "dir1/file*" >> .hgignore
296 300 $ hg debugignore "dir1\file2"
297 301 dir1\file2 is ignored
298 302 (ignore rule in $TESTTMP\ignorerepo\.hgignore, line 4: 'dir1/file*')
299 303 $ hg up -qC .
300 304
301 305 #endif
General Comments 0
You need to be logged in to leave comments. Login now