##// END OF EJS Templates
match: add isexact() method to hide internals...
Martin von Zweigbergk -
r24448:55c44934 default
parent child Browse files
Show More
@@ -1,923 +1,923 b''
1 1 # dirstate.py - working directory tracking for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from node import nullid
9 9 from i18n import _
10 10 import scmutil, util, ignore, osutil, parsers, encoding, pathutil
11 11 import os, stat, errno
12 12
13 13 propertycache = util.propertycache
14 14 filecache = scmutil.filecache
15 15 _rangemask = 0x7fffffff
16 16
17 17 dirstatetuple = parsers.dirstatetuple
18 18
19 19 class repocache(filecache):
20 20 """filecache for files in .hg/"""
21 21 def join(self, obj, fname):
22 22 return obj._opener.join(fname)
23 23
24 24 class rootcache(filecache):
25 25 """filecache for files in the repository root"""
26 26 def join(self, obj, fname):
27 27 return obj._join(fname)
28 28
29 29 class dirstate(object):
30 30
31 31 def __init__(self, opener, ui, root, validate):
32 32 '''Create a new dirstate object.
33 33
34 34 opener is an open()-like callable that can be used to open the
35 35 dirstate file; root is the root of the directory tracked by
36 36 the dirstate.
37 37 '''
38 38 self._opener = opener
39 39 self._validate = validate
40 40 self._root = root
41 41 # ntpath.join(root, '') of Python 2.7.9 does not add sep if root is
42 42 # UNC path pointing to root share (issue4557)
43 43 if root.endswith(os.sep):
44 44 self._rootdir = root
45 45 else:
46 46 self._rootdir = root + os.sep
47 47 self._dirty = False
48 48 self._dirtypl = False
49 49 self._lastnormaltime = 0
50 50 self._ui = ui
51 51 self._filecache = {}
52 52 self._parentwriters = 0
53 53
54 54 def beginparentchange(self):
55 55 '''Marks the beginning of a set of changes that involve changing
56 56 the dirstate parents. If there is an exception during this time,
57 57 the dirstate will not be written when the wlock is released. This
58 58 prevents writing an incoherent dirstate where the parent doesn't
59 59 match the contents.
60 60 '''
61 61 self._parentwriters += 1
62 62
63 63 def endparentchange(self):
64 64 '''Marks the end of a set of changes that involve changing the
65 65 dirstate parents. Once all parent changes have been marked done,
66 66 the wlock will be free to write the dirstate on release.
67 67 '''
68 68 if self._parentwriters > 0:
69 69 self._parentwriters -= 1
70 70
71 71 def pendingparentchange(self):
72 72 '''Returns true if the dirstate is in the middle of a set of changes
73 73 that modify the dirstate parent.
74 74 '''
75 75 return self._parentwriters > 0
76 76
77 77 @propertycache
78 78 def _map(self):
79 79 '''Return the dirstate contents as a map from filename to
80 80 (state, mode, size, time).'''
81 81 self._read()
82 82 return self._map
83 83
84 84 @propertycache
85 85 def _copymap(self):
86 86 self._read()
87 87 return self._copymap
88 88
89 89 @propertycache
90 90 def _foldmap(self):
91 91 f = {}
92 92 normcase = util.normcase
93 93 for name, s in self._map.iteritems():
94 94 if s[0] != 'r':
95 95 f[normcase(name)] = name
96 96 for name in self._dirs:
97 97 f[normcase(name)] = name
98 98 f['.'] = '.' # prevents useless util.fspath() invocation
99 99 return f
100 100
101 101 @repocache('branch')
102 102 def _branch(self):
103 103 try:
104 104 return self._opener.read("branch").strip() or "default"
105 105 except IOError, inst:
106 106 if inst.errno != errno.ENOENT:
107 107 raise
108 108 return "default"
109 109
110 110 @propertycache
111 111 def _pl(self):
112 112 try:
113 113 fp = self._opener("dirstate")
114 114 st = fp.read(40)
115 115 fp.close()
116 116 l = len(st)
117 117 if l == 40:
118 118 return st[:20], st[20:40]
119 119 elif l > 0 and l < 40:
120 120 raise util.Abort(_('working directory state appears damaged!'))
121 121 except IOError, err:
122 122 if err.errno != errno.ENOENT:
123 123 raise
124 124 return [nullid, nullid]
125 125
126 126 @propertycache
127 127 def _dirs(self):
128 128 return scmutil.dirs(self._map, 'r')
129 129
130 130 def dirs(self):
131 131 return self._dirs
132 132
133 133 @rootcache('.hgignore')
134 134 def _ignore(self):
135 135 files = [self._join('.hgignore')]
136 136 for name, path in self._ui.configitems("ui"):
137 137 if name == 'ignore' or name.startswith('ignore.'):
138 138 # we need to use os.path.join here rather than self._join
139 139 # because path is arbitrary and user-specified
140 140 files.append(os.path.join(self._rootdir, util.expandpath(path)))
141 141 return ignore.ignore(self._root, files, self._ui.warn)
142 142
143 143 @propertycache
144 144 def _slash(self):
145 145 return self._ui.configbool('ui', 'slash') and os.sep != '/'
146 146
147 147 @propertycache
148 148 def _checklink(self):
149 149 return util.checklink(self._root)
150 150
151 151 @propertycache
152 152 def _checkexec(self):
153 153 return util.checkexec(self._root)
154 154
155 155 @propertycache
156 156 def _checkcase(self):
157 157 return not util.checkcase(self._join('.hg'))
158 158
159 159 def _join(self, f):
160 160 # much faster than os.path.join()
161 161 # it's safe because f is always a relative path
162 162 return self._rootdir + f
163 163
164 164 def flagfunc(self, buildfallback):
165 165 if self._checklink and self._checkexec:
166 166 def f(x):
167 167 try:
168 168 st = os.lstat(self._join(x))
169 169 if util.statislink(st):
170 170 return 'l'
171 171 if util.statisexec(st):
172 172 return 'x'
173 173 except OSError:
174 174 pass
175 175 return ''
176 176 return f
177 177
178 178 fallback = buildfallback()
179 179 if self._checklink:
180 180 def f(x):
181 181 if os.path.islink(self._join(x)):
182 182 return 'l'
183 183 if 'x' in fallback(x):
184 184 return 'x'
185 185 return ''
186 186 return f
187 187 if self._checkexec:
188 188 def f(x):
189 189 if 'l' in fallback(x):
190 190 return 'l'
191 191 if util.isexec(self._join(x)):
192 192 return 'x'
193 193 return ''
194 194 return f
195 195 else:
196 196 return fallback
197 197
198 198 @propertycache
199 199 def _cwd(self):
200 200 return os.getcwd()
201 201
202 202 def getcwd(self):
203 203 cwd = self._cwd
204 204 if cwd == self._root:
205 205 return ''
206 206 # self._root ends with a path separator if self._root is '/' or 'C:\'
207 207 rootsep = self._root
208 208 if not util.endswithsep(rootsep):
209 209 rootsep += os.sep
210 210 if cwd.startswith(rootsep):
211 211 return cwd[len(rootsep):]
212 212 else:
213 213 # we're outside the repo. return an absolute path.
214 214 return cwd
215 215
216 216 def pathto(self, f, cwd=None):
217 217 if cwd is None:
218 218 cwd = self.getcwd()
219 219 path = util.pathto(self._root, cwd, f)
220 220 if self._slash:
221 221 return util.pconvert(path)
222 222 return path
223 223
224 224 def __getitem__(self, key):
225 225 '''Return the current state of key (a filename) in the dirstate.
226 226
227 227 States are:
228 228 n normal
229 229 m needs merging
230 230 r marked for removal
231 231 a marked for addition
232 232 ? not tracked
233 233 '''
234 234 return self._map.get(key, ("?",))[0]
235 235
236 236 def __contains__(self, key):
237 237 return key in self._map
238 238
239 239 def __iter__(self):
240 240 for x in sorted(self._map):
241 241 yield x
242 242
243 243 def iteritems(self):
244 244 return self._map.iteritems()
245 245
246 246 def parents(self):
247 247 return [self._validate(p) for p in self._pl]
248 248
249 249 def p1(self):
250 250 return self._validate(self._pl[0])
251 251
252 252 def p2(self):
253 253 return self._validate(self._pl[1])
254 254
255 255 def branch(self):
256 256 return encoding.tolocal(self._branch)
257 257
258 258 def setparents(self, p1, p2=nullid):
259 259 """Set dirstate parents to p1 and p2.
260 260
261 261 When moving from two parents to one, 'm' merged entries a
262 262 adjusted to normal and previous copy records discarded and
263 263 returned by the call.
264 264
265 265 See localrepo.setparents()
266 266 """
267 267 if self._parentwriters == 0:
268 268 raise ValueError("cannot set dirstate parent without "
269 269 "calling dirstate.beginparentchange")
270 270
271 271 self._dirty = self._dirtypl = True
272 272 oldp2 = self._pl[1]
273 273 self._pl = p1, p2
274 274 copies = {}
275 275 if oldp2 != nullid and p2 == nullid:
276 276 for f, s in self._map.iteritems():
277 277 # Discard 'm' markers when moving away from a merge state
278 278 if s[0] == 'm':
279 279 if f in self._copymap:
280 280 copies[f] = self._copymap[f]
281 281 self.normallookup(f)
282 282 # Also fix up otherparent markers
283 283 elif s[0] == 'n' and s[2] == -2:
284 284 if f in self._copymap:
285 285 copies[f] = self._copymap[f]
286 286 self.add(f)
287 287 return copies
288 288
289 289 def setbranch(self, branch):
290 290 self._branch = encoding.fromlocal(branch)
291 291 f = self._opener('branch', 'w', atomictemp=True)
292 292 try:
293 293 f.write(self._branch + '\n')
294 294 f.close()
295 295
296 296 # make sure filecache has the correct stat info for _branch after
297 297 # replacing the underlying file
298 298 ce = self._filecache['_branch']
299 299 if ce:
300 300 ce.refresh()
301 301 except: # re-raises
302 302 f.discard()
303 303 raise
304 304
305 305 def _read(self):
306 306 self._map = {}
307 307 self._copymap = {}
308 308 try:
309 309 st = self._opener.read("dirstate")
310 310 except IOError, err:
311 311 if err.errno != errno.ENOENT:
312 312 raise
313 313 return
314 314 if not st:
315 315 return
316 316
317 317 # Python's garbage collector triggers a GC each time a certain number
318 318 # of container objects (the number being defined by
319 319 # gc.get_threshold()) are allocated. parse_dirstate creates a tuple
320 320 # for each file in the dirstate. The C version then immediately marks
321 321 # them as not to be tracked by the collector. However, this has no
322 322 # effect on when GCs are triggered, only on what objects the GC looks
323 323 # into. This means that O(number of files) GCs are unavoidable.
324 324 # Depending on when in the process's lifetime the dirstate is parsed,
325 325 # this can get very expensive. As a workaround, disable GC while
326 326 # parsing the dirstate.
327 327 #
328 328 # (we cannot decorate the function directly since it is in a C module)
329 329 parse_dirstate = util.nogc(parsers.parse_dirstate)
330 330 p = parse_dirstate(self._map, self._copymap, st)
331 331 if not self._dirtypl:
332 332 self._pl = p
333 333
334 334 def invalidate(self):
335 335 for a in ("_map", "_copymap", "_foldmap", "_branch", "_pl", "_dirs",
336 336 "_ignore"):
337 337 if a in self.__dict__:
338 338 delattr(self, a)
339 339 self._lastnormaltime = 0
340 340 self._dirty = False
341 341 self._parentwriters = 0
342 342
343 343 def copy(self, source, dest):
344 344 """Mark dest as a copy of source. Unmark dest if source is None."""
345 345 if source == dest:
346 346 return
347 347 self._dirty = True
348 348 if source is not None:
349 349 self._copymap[dest] = source
350 350 elif dest in self._copymap:
351 351 del self._copymap[dest]
352 352
353 353 def copied(self, file):
354 354 return self._copymap.get(file, None)
355 355
356 356 def copies(self):
357 357 return self._copymap
358 358
359 359 def _droppath(self, f):
360 360 if self[f] not in "?r" and "_dirs" in self.__dict__:
361 361 self._dirs.delpath(f)
362 362
363 363 def _addpath(self, f, state, mode, size, mtime):
364 364 oldstate = self[f]
365 365 if state == 'a' or oldstate == 'r':
366 366 scmutil.checkfilename(f)
367 367 if f in self._dirs:
368 368 raise util.Abort(_('directory %r already in dirstate') % f)
369 369 # shadows
370 370 for d in scmutil.finddirs(f):
371 371 if d in self._dirs:
372 372 break
373 373 if d in self._map and self[d] != 'r':
374 374 raise util.Abort(
375 375 _('file %r in dirstate clashes with %r') % (d, f))
376 376 if oldstate in "?r" and "_dirs" in self.__dict__:
377 377 self._dirs.addpath(f)
378 378 self._dirty = True
379 379 self._map[f] = dirstatetuple(state, mode, size, mtime)
380 380
381 381 def normal(self, f):
382 382 '''Mark a file normal and clean.'''
383 383 s = os.lstat(self._join(f))
384 384 mtime = int(s.st_mtime)
385 385 self._addpath(f, 'n', s.st_mode,
386 386 s.st_size & _rangemask, mtime & _rangemask)
387 387 if f in self._copymap:
388 388 del self._copymap[f]
389 389 if mtime > self._lastnormaltime:
390 390 # Remember the most recent modification timeslot for status(),
391 391 # to make sure we won't miss future size-preserving file content
392 392 # modifications that happen within the same timeslot.
393 393 self._lastnormaltime = mtime
394 394
395 395 def normallookup(self, f):
396 396 '''Mark a file normal, but possibly dirty.'''
397 397 if self._pl[1] != nullid and f in self._map:
398 398 # if there is a merge going on and the file was either
399 399 # in state 'm' (-1) or coming from other parent (-2) before
400 400 # being removed, restore that state.
401 401 entry = self._map[f]
402 402 if entry[0] == 'r' and entry[2] in (-1, -2):
403 403 source = self._copymap.get(f)
404 404 if entry[2] == -1:
405 405 self.merge(f)
406 406 elif entry[2] == -2:
407 407 self.otherparent(f)
408 408 if source:
409 409 self.copy(source, f)
410 410 return
411 411 if entry[0] == 'm' or entry[0] == 'n' and entry[2] == -2:
412 412 return
413 413 self._addpath(f, 'n', 0, -1, -1)
414 414 if f in self._copymap:
415 415 del self._copymap[f]
416 416
417 417 def otherparent(self, f):
418 418 '''Mark as coming from the other parent, always dirty.'''
419 419 if self._pl[1] == nullid:
420 420 raise util.Abort(_("setting %r to other parent "
421 421 "only allowed in merges") % f)
422 422 if f in self and self[f] == 'n':
423 423 # merge-like
424 424 self._addpath(f, 'm', 0, -2, -1)
425 425 else:
426 426 # add-like
427 427 self._addpath(f, 'n', 0, -2, -1)
428 428
429 429 if f in self._copymap:
430 430 del self._copymap[f]
431 431
432 432 def add(self, f):
433 433 '''Mark a file added.'''
434 434 self._addpath(f, 'a', 0, -1, -1)
435 435 if f in self._copymap:
436 436 del self._copymap[f]
437 437
438 438 def remove(self, f):
439 439 '''Mark a file removed.'''
440 440 self._dirty = True
441 441 self._droppath(f)
442 442 size = 0
443 443 if self._pl[1] != nullid and f in self._map:
444 444 # backup the previous state
445 445 entry = self._map[f]
446 446 if entry[0] == 'm': # merge
447 447 size = -1
448 448 elif entry[0] == 'n' and entry[2] == -2: # other parent
449 449 size = -2
450 450 self._map[f] = dirstatetuple('r', 0, size, 0)
451 451 if size == 0 and f in self._copymap:
452 452 del self._copymap[f]
453 453
454 454 def merge(self, f):
455 455 '''Mark a file merged.'''
456 456 if self._pl[1] == nullid:
457 457 return self.normallookup(f)
458 458 return self.otherparent(f)
459 459
460 460 def drop(self, f):
461 461 '''Drop a file from the dirstate'''
462 462 if f in self._map:
463 463 self._dirty = True
464 464 self._droppath(f)
465 465 del self._map[f]
466 466
467 467 def _normalize(self, path, isknown, ignoremissing=False, exists=None):
468 468 normed = util.normcase(path)
469 469 folded = self._foldmap.get(normed, None)
470 470 if folded is None:
471 471 if isknown:
472 472 folded = path
473 473 else:
474 474 if exists is None:
475 475 exists = os.path.lexists(os.path.join(self._root, path))
476 476 if not exists:
477 477 # Maybe a path component exists
478 478 if not ignoremissing and '/' in path:
479 479 d, f = path.rsplit('/', 1)
480 480 d = self._normalize(d, isknown, ignoremissing, None)
481 481 folded = d + "/" + f
482 482 else:
483 483 # No path components, preserve original case
484 484 folded = path
485 485 else:
486 486 # recursively normalize leading directory components
487 487 # against dirstate
488 488 if '/' in normed:
489 489 d, f = normed.rsplit('/', 1)
490 490 d = self._normalize(d, isknown, ignoremissing, True)
491 491 r = self._root + "/" + d
492 492 folded = d + "/" + util.fspath(f, r)
493 493 else:
494 494 folded = util.fspath(normed, self._root)
495 495 self._foldmap[normed] = folded
496 496
497 497 return folded
498 498
499 499 def normalize(self, path, isknown=False, ignoremissing=False):
500 500 '''
501 501 normalize the case of a pathname when on a casefolding filesystem
502 502
503 503 isknown specifies whether the filename came from walking the
504 504 disk, to avoid extra filesystem access.
505 505
506 506 If ignoremissing is True, missing path are returned
507 507 unchanged. Otherwise, we try harder to normalize possibly
508 508 existing path components.
509 509
510 510 The normalized case is determined based on the following precedence:
511 511
512 512 - version of name already stored in the dirstate
513 513 - version of name stored on disk
514 514 - version provided via command arguments
515 515 '''
516 516
517 517 if self._checkcase:
518 518 return self._normalize(path, isknown, ignoremissing)
519 519 return path
520 520
521 521 def clear(self):
522 522 self._map = {}
523 523 if "_dirs" in self.__dict__:
524 524 delattr(self, "_dirs")
525 525 self._copymap = {}
526 526 self._pl = [nullid, nullid]
527 527 self._lastnormaltime = 0
528 528 self._dirty = True
529 529
530 530 def rebuild(self, parent, allfiles, changedfiles=None):
531 531 changedfiles = changedfiles or allfiles
532 532 oldmap = self._map
533 533 self.clear()
534 534 for f in allfiles:
535 535 if f not in changedfiles:
536 536 self._map[f] = oldmap[f]
537 537 else:
538 538 if 'x' in allfiles.flags(f):
539 539 self._map[f] = dirstatetuple('n', 0777, -1, 0)
540 540 else:
541 541 self._map[f] = dirstatetuple('n', 0666, -1, 0)
542 542 self._pl = (parent, nullid)
543 543 self._dirty = True
544 544
545 545 def write(self):
546 546 if not self._dirty:
547 547 return
548 548
549 549 # enough 'delaywrite' prevents 'pack_dirstate' from dropping
550 550 # timestamp of each entries in dirstate, because of 'now > mtime'
551 551 delaywrite = self._ui.configint('debug', 'dirstate.delaywrite', 0)
552 552 if delaywrite > 0:
553 553 import time # to avoid useless import
554 554 time.sleep(delaywrite)
555 555
556 556 st = self._opener("dirstate", "w", atomictemp=True)
557 557 # use the modification time of the newly created temporary file as the
558 558 # filesystem's notion of 'now'
559 559 now = util.fstat(st).st_mtime
560 560 st.write(parsers.pack_dirstate(self._map, self._copymap, self._pl, now))
561 561 st.close()
562 562 self._lastnormaltime = 0
563 563 self._dirty = self._dirtypl = False
564 564
565 565 def _dirignore(self, f):
566 566 if f == '.':
567 567 return False
568 568 if self._ignore(f):
569 569 return True
570 570 for p in scmutil.finddirs(f):
571 571 if self._ignore(p):
572 572 return True
573 573 return False
574 574
575 575 def _walkexplicit(self, match, subrepos):
576 576 '''Get stat data about the files explicitly specified by match.
577 577
578 578 Return a triple (results, dirsfound, dirsnotfound).
579 579 - results is a mapping from filename to stat result. It also contains
580 580 listings mapping subrepos and .hg to None.
581 581 - dirsfound is a list of files found to be directories.
582 582 - dirsnotfound is a list of files that the dirstate thinks are
583 583 directories and that were not found.'''
584 584
585 585 def badtype(mode):
586 586 kind = _('unknown')
587 587 if stat.S_ISCHR(mode):
588 588 kind = _('character device')
589 589 elif stat.S_ISBLK(mode):
590 590 kind = _('block device')
591 591 elif stat.S_ISFIFO(mode):
592 592 kind = _('fifo')
593 593 elif stat.S_ISSOCK(mode):
594 594 kind = _('socket')
595 595 elif stat.S_ISDIR(mode):
596 596 kind = _('directory')
597 597 return _('unsupported file type (type is %s)') % kind
598 598
599 599 matchedir = match.explicitdir
600 600 badfn = match.bad
601 601 dmap = self._map
602 602 normpath = util.normpath
603 603 lstat = os.lstat
604 604 getkind = stat.S_IFMT
605 605 dirkind = stat.S_IFDIR
606 606 regkind = stat.S_IFREG
607 607 lnkkind = stat.S_IFLNK
608 608 join = self._join
609 609 dirsfound = []
610 610 foundadd = dirsfound.append
611 611 dirsnotfound = []
612 612 notfoundadd = dirsnotfound.append
613 613
614 if match.matchfn != match.exact and self._checkcase:
614 if not match.isexact() and self._checkcase:
615 615 normalize = self._normalize
616 616 else:
617 617 normalize = None
618 618
619 619 files = sorted(match.files())
620 620 subrepos.sort()
621 621 i, j = 0, 0
622 622 while i < len(files) and j < len(subrepos):
623 623 subpath = subrepos[j] + "/"
624 624 if files[i] < subpath:
625 625 i += 1
626 626 continue
627 627 while i < len(files) and files[i].startswith(subpath):
628 628 del files[i]
629 629 j += 1
630 630
631 631 if not files or '.' in files:
632 632 files = ['']
633 633 results = dict.fromkeys(subrepos)
634 634 results['.hg'] = None
635 635
636 636 alldirs = None
637 637 for ff in files:
638 638 if normalize:
639 639 nf = normalize(normpath(ff), False, True)
640 640 else:
641 641 nf = normpath(ff)
642 642 if nf in results:
643 643 continue
644 644
645 645 try:
646 646 st = lstat(join(nf))
647 647 kind = getkind(st.st_mode)
648 648 if kind == dirkind:
649 649 if nf in dmap:
650 650 # file replaced by dir on disk but still in dirstate
651 651 results[nf] = None
652 652 if matchedir:
653 653 matchedir(nf)
654 654 foundadd(nf)
655 655 elif kind == regkind or kind == lnkkind:
656 656 results[nf] = st
657 657 else:
658 658 badfn(ff, badtype(kind))
659 659 if nf in dmap:
660 660 results[nf] = None
661 661 except OSError, inst: # nf not found on disk - it is dirstate only
662 662 if nf in dmap: # does it exactly match a missing file?
663 663 results[nf] = None
664 664 else: # does it match a missing directory?
665 665 if alldirs is None:
666 666 alldirs = scmutil.dirs(dmap)
667 667 if nf in alldirs:
668 668 if matchedir:
669 669 matchedir(nf)
670 670 notfoundadd(nf)
671 671 else:
672 672 badfn(ff, inst.strerror)
673 673
674 674 return results, dirsfound, dirsnotfound
675 675
676 676 def walk(self, match, subrepos, unknown, ignored, full=True):
677 677 '''
678 678 Walk recursively through the directory tree, finding all files
679 679 matched by match.
680 680
681 681 If full is False, maybe skip some known-clean files.
682 682
683 683 Return a dict mapping filename to stat-like object (either
684 684 mercurial.osutil.stat instance or return value of os.stat()).
685 685
686 686 '''
687 687 # full is a flag that extensions that hook into walk can use -- this
688 688 # implementation doesn't use it at all. This satisfies the contract
689 689 # because we only guarantee a "maybe".
690 690
691 691 if ignored:
692 692 ignore = util.never
693 693 dirignore = util.never
694 694 elif unknown:
695 695 ignore = self._ignore
696 696 dirignore = self._dirignore
697 697 else:
698 698 # if not unknown and not ignored, drop dir recursion and step 2
699 699 ignore = util.always
700 700 dirignore = util.always
701 701
702 702 matchfn = match.matchfn
703 703 matchalways = match.always()
704 704 matchtdir = match.traversedir
705 705 dmap = self._map
706 706 listdir = osutil.listdir
707 707 lstat = os.lstat
708 708 dirkind = stat.S_IFDIR
709 709 regkind = stat.S_IFREG
710 710 lnkkind = stat.S_IFLNK
711 711 join = self._join
712 712
713 713 exact = skipstep3 = False
714 if matchfn == match.exact: # match.exact
714 if match.isexact(): # match.exact
715 715 exact = True
716 716 dirignore = util.always # skip step 2
717 717 elif match.files() and not match.anypats(): # match.match, no patterns
718 718 skipstep3 = True
719 719
720 720 if not exact and self._checkcase:
721 721 normalize = self._normalize
722 722 skipstep3 = False
723 723 else:
724 724 normalize = None
725 725
726 726 # step 1: find all explicit files
727 727 results, work, dirsnotfound = self._walkexplicit(match, subrepos)
728 728
729 729 skipstep3 = skipstep3 and not (work or dirsnotfound)
730 730 work = [d for d in work if not dirignore(d)]
731 731 wadd = work.append
732 732
733 733 # step 2: visit subdirectories
734 734 while work:
735 735 nd = work.pop()
736 736 skip = None
737 737 if nd == '.':
738 738 nd = ''
739 739 else:
740 740 skip = '.hg'
741 741 try:
742 742 entries = listdir(join(nd), stat=True, skip=skip)
743 743 except OSError, inst:
744 744 if inst.errno in (errno.EACCES, errno.ENOENT):
745 745 match.bad(self.pathto(nd), inst.strerror)
746 746 continue
747 747 raise
748 748 for f, kind, st in entries:
749 749 if normalize:
750 750 nf = normalize(nd and (nd + "/" + f) or f, True, True)
751 751 else:
752 752 nf = nd and (nd + "/" + f) or f
753 753 if nf not in results:
754 754 if kind == dirkind:
755 755 if not ignore(nf):
756 756 if matchtdir:
757 757 matchtdir(nf)
758 758 wadd(nf)
759 759 if nf in dmap and (matchalways or matchfn(nf)):
760 760 results[nf] = None
761 761 elif kind == regkind or kind == lnkkind:
762 762 if nf in dmap:
763 763 if matchalways or matchfn(nf):
764 764 results[nf] = st
765 765 elif (matchalways or matchfn(nf)) and not ignore(nf):
766 766 results[nf] = st
767 767 elif nf in dmap and (matchalways or matchfn(nf)):
768 768 results[nf] = None
769 769
770 770 for s in subrepos:
771 771 del results[s]
772 772 del results['.hg']
773 773
774 774 # step 3: visit remaining files from dmap
775 775 if not skipstep3 and not exact:
776 776 # If a dmap file is not in results yet, it was either
777 777 # a) not matching matchfn b) ignored, c) missing, or d) under a
778 778 # symlink directory.
779 779 if not results and matchalways:
780 780 visit = dmap.keys()
781 781 else:
782 782 visit = [f for f in dmap if f not in results and matchfn(f)]
783 783 visit.sort()
784 784
785 785 if unknown:
786 786 # unknown == True means we walked all dirs under the roots
787 787 # that wasn't ignored, and everything that matched was stat'ed
788 788 # and is already in results.
789 789 # The rest must thus be ignored or under a symlink.
790 790 audit_path = pathutil.pathauditor(self._root)
791 791
792 792 for nf in iter(visit):
793 793 # Report ignored items in the dmap as long as they are not
794 794 # under a symlink directory.
795 795 if audit_path.check(nf):
796 796 try:
797 797 results[nf] = lstat(join(nf))
798 798 # file was just ignored, no links, and exists
799 799 except OSError:
800 800 # file doesn't exist
801 801 results[nf] = None
802 802 else:
803 803 # It's either missing or under a symlink directory
804 804 # which we in this case report as missing
805 805 results[nf] = None
806 806 else:
807 807 # We may not have walked the full directory tree above,
808 808 # so stat and check everything we missed.
809 809 nf = iter(visit).next
810 810 for st in util.statfiles([join(i) for i in visit]):
811 811 results[nf()] = st
812 812 return results
813 813
814 814 def status(self, match, subrepos, ignored, clean, unknown):
815 815 '''Determine the status of the working copy relative to the
816 816 dirstate and return a pair of (unsure, status), where status is of type
817 817 scmutil.status and:
818 818
819 819 unsure:
820 820 files that might have been modified since the dirstate was
821 821 written, but need to be read to be sure (size is the same
822 822 but mtime differs)
823 823 status.modified:
824 824 files that have definitely been modified since the dirstate
825 825 was written (different size or mode)
826 826 status.clean:
827 827 files that have definitely not been modified since the
828 828 dirstate was written
829 829 '''
830 830 listignored, listclean, listunknown = ignored, clean, unknown
831 831 lookup, modified, added, unknown, ignored = [], [], [], [], []
832 832 removed, deleted, clean = [], [], []
833 833
834 834 dmap = self._map
835 835 ladd = lookup.append # aka "unsure"
836 836 madd = modified.append
837 837 aadd = added.append
838 838 uadd = unknown.append
839 839 iadd = ignored.append
840 840 radd = removed.append
841 841 dadd = deleted.append
842 842 cadd = clean.append
843 843 mexact = match.exact
844 844 dirignore = self._dirignore
845 845 checkexec = self._checkexec
846 846 copymap = self._copymap
847 847 lastnormaltime = self._lastnormaltime
848 848
849 849 # We need to do full walks when either
850 850 # - we're listing all clean files, or
851 851 # - match.traversedir does something, because match.traversedir should
852 852 # be called for every dir in the working dir
853 853 full = listclean or match.traversedir is not None
854 854 for fn, st in self.walk(match, subrepos, listunknown, listignored,
855 855 full=full).iteritems():
856 856 if fn not in dmap:
857 857 if (listignored or mexact(fn)) and dirignore(fn):
858 858 if listignored:
859 859 iadd(fn)
860 860 else:
861 861 uadd(fn)
862 862 continue
863 863
864 864 # This is equivalent to 'state, mode, size, time = dmap[fn]' but not
865 865 # written like that for performance reasons. dmap[fn] is not a
866 866 # Python tuple in compiled builds. The CPython UNPACK_SEQUENCE
867 867 # opcode has fast paths when the value to be unpacked is a tuple or
868 868 # a list, but falls back to creating a full-fledged iterator in
869 869 # general. That is much slower than simply accessing and storing the
870 870 # tuple members one by one.
871 871 t = dmap[fn]
872 872 state = t[0]
873 873 mode = t[1]
874 874 size = t[2]
875 875 time = t[3]
876 876
877 877 if not st and state in "nma":
878 878 dadd(fn)
879 879 elif state == 'n':
880 880 mtime = int(st.st_mtime)
881 881 if (size >= 0 and
882 882 ((size != st.st_size and size != st.st_size & _rangemask)
883 883 or ((mode ^ st.st_mode) & 0100 and checkexec))
884 884 or size == -2 # other parent
885 885 or fn in copymap):
886 886 madd(fn)
887 887 elif time != mtime and time != mtime & _rangemask:
888 888 ladd(fn)
889 889 elif mtime == lastnormaltime:
890 890 # fn may have just been marked as normal and it may have
891 891 # changed in the same second without changing its size.
892 892 # This can happen if we quickly do multiple commits.
893 893 # Force lookup, so we don't miss such a racy file change.
894 894 ladd(fn)
895 895 elif listclean:
896 896 cadd(fn)
897 897 elif state == 'm':
898 898 madd(fn)
899 899 elif state == 'a':
900 900 aadd(fn)
901 901 elif state == 'r':
902 902 radd(fn)
903 903
904 904 return (lookup, scmutil.status(modified, added, removed, deleted,
905 905 unknown, ignored, clean))
906 906
907 907 def matches(self, match):
908 908 '''
909 909 return files in the dirstate (in whatever state) filtered by match
910 910 '''
911 911 dmap = self._map
912 912 if match.always():
913 913 return dmap.keys()
914 914 files = match.files()
915 if match.matchfn == match.exact:
915 if match.isexact():
916 916 # fast path -- filter the other way around, since typically files is
917 917 # much smaller than dmap
918 918 return [f for f in files if f in dmap]
919 919 if not match.anypats() and util.all(fn in dmap for fn in files):
920 920 # fast path -- all the values are known to be files, so just return
921 921 # that
922 922 return list(files)
923 923 return [f for f in dmap if match(f)]
@@ -1,668 +1,668 b''
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from i18n import _
9 9 import mdiff, parsers, error, revlog, util, scmutil
10 10 import array, struct
11 11
12 12 propertycache = util.propertycache
13 13
14 14 class _lazymanifest(dict):
15 15 """This is the pure implementation of lazymanifest.
16 16
17 17 It has not been optimized *at all* and is not lazy.
18 18 """
19 19
20 20 def __init__(self, data):
21 21 # This init method does a little bit of excessive-looking
22 22 # precondition checking. This is so that the behavior of this
23 23 # class exactly matches its C counterpart to try and help
24 24 # prevent surprise breakage for anyone that develops against
25 25 # the pure version.
26 26 if data and data[-1] != '\n':
27 27 raise ValueError('Manifest did not end in a newline.')
28 28 dict.__init__(self)
29 29 prev = None
30 30 for l in data.splitlines():
31 31 if prev is not None and prev > l:
32 32 raise ValueError('Manifest lines not in sorted order.')
33 33 prev = l
34 34 f, n = l.split('\0')
35 35 if len(n) > 40:
36 36 self[f] = revlog.bin(n[:40]), n[40:]
37 37 else:
38 38 self[f] = revlog.bin(n), ''
39 39
40 40 def __setitem__(self, k, v):
41 41 node, flag = v
42 42 assert node is not None
43 43 if len(node) > 21:
44 44 node = node[:21] # match c implementation behavior
45 45 dict.__setitem__(self, k, (node, flag))
46 46
47 47 def __iter__(self):
48 48 return iter(sorted(dict.keys(self)))
49 49
50 50 def iterkeys(self):
51 51 return iter(sorted(dict.keys(self)))
52 52
53 53 def iterentries(self):
54 54 return ((f, e[0], e[1]) for f, e in sorted(self.iteritems()))
55 55
56 56 def copy(self):
57 57 c = _lazymanifest('')
58 58 c.update(self)
59 59 return c
60 60
61 61 def diff(self, m2, clean=False):
62 62 '''Finds changes between the current manifest and m2.'''
63 63 diff = {}
64 64
65 65 for fn, e1 in self.iteritems():
66 66 if fn not in m2:
67 67 diff[fn] = e1, (None, '')
68 68 else:
69 69 e2 = m2[fn]
70 70 if e1 != e2:
71 71 diff[fn] = e1, e2
72 72 elif clean:
73 73 diff[fn] = None
74 74
75 75 for fn, e2 in m2.iteritems():
76 76 if fn not in self:
77 77 diff[fn] = (None, ''), e2
78 78
79 79 return diff
80 80
81 81 def filtercopy(self, filterfn):
82 82 c = _lazymanifest('')
83 83 for f, n, fl in self.iterentries():
84 84 if filterfn(f):
85 85 c[f] = n, fl
86 86 return c
87 87
88 88 def text(self):
89 89 """Get the full data of this manifest as a bytestring."""
90 90 fl = sorted(self.iterentries())
91 91
92 92 _hex = revlog.hex
93 93 # if this is changed to support newlines in filenames,
94 94 # be sure to check the templates/ dir again (especially *-raw.tmpl)
95 95 return ''.join("%s\0%s%s\n" % (
96 96 f, _hex(n[:20]), flag) for f, n, flag in fl)
97 97
98 98 try:
99 99 _lazymanifest = parsers.lazymanifest
100 100 except AttributeError:
101 101 pass
102 102
103 103 class manifestdict(object):
104 104 def __init__(self, data=''):
105 105 self._lm = _lazymanifest(data)
106 106
107 107 def __getitem__(self, key):
108 108 return self._lm[key][0]
109 109
110 110 def find(self, key):
111 111 return self._lm[key]
112 112
113 113 def __len__(self):
114 114 return len(self._lm)
115 115
116 116 def __setitem__(self, key, node):
117 117 self._lm[key] = node, self.flags(key, '')
118 118
119 119 def __contains__(self, key):
120 120 return key in self._lm
121 121
122 122 def __delitem__(self, key):
123 123 del self._lm[key]
124 124
125 125 def __iter__(self):
126 126 return self._lm.__iter__()
127 127
128 128 def iterkeys(self):
129 129 return self._lm.iterkeys()
130 130
131 131 def keys(self):
132 132 return list(self.iterkeys())
133 133
134 134 def intersectfiles(self, files):
135 135 '''make a new lazymanifest with the intersection of self with files
136 136
137 137 The algorithm assumes that files is much smaller than self.'''
138 138 ret = manifestdict()
139 139 lm = self._lm
140 140 for fn in files:
141 141 if fn in lm:
142 142 ret._lm[fn] = self._lm[fn]
143 143 return ret
144 144
145 145 def filesnotin(self, m2):
146 146 '''Set of files in this manifest that are not in the other'''
147 147 files = set(self)
148 148 files.difference_update(m2)
149 149 return files
150 150
151 151 @propertycache
152 152 def _dirs(self):
153 153 return scmutil.dirs(self)
154 154
155 155 def dirs(self):
156 156 return self._dirs
157 157
158 158 def hasdir(self, dir):
159 159 return dir in self._dirs
160 160
161 161 def matches(self, match):
162 162 '''generate a new manifest filtered by the match argument'''
163 163 if match.always():
164 164 return self.copy()
165 165
166 166 files = match.files()
167 if (len(files) < 100 and (match.matchfn == match.exact or
167 if (len(files) < 100 and (match.isexact() or
168 168 (not match.anypats() and util.all(fn in self for fn in files)))):
169 169 return self.intersectfiles(files)
170 170
171 171 lm = manifestdict('')
172 172 lm._lm = self._lm.filtercopy(match)
173 173 return lm
174 174
175 175 def diff(self, m2, clean=False):
176 176 '''Finds changes between the current manifest and m2.
177 177
178 178 Args:
179 179 m2: the manifest to which this manifest should be compared.
180 180 clean: if true, include files unchanged between these manifests
181 181 with a None value in the returned dictionary.
182 182
183 183 The result is returned as a dict with filename as key and
184 184 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
185 185 nodeid in the current/other manifest and fl1/fl2 is the flag
186 186 in the current/other manifest. Where the file does not exist,
187 187 the nodeid will be None and the flags will be the empty
188 188 string.
189 189 '''
190 190 return self._lm.diff(m2._lm, clean)
191 191
192 192 def setflag(self, key, flag):
193 193 self._lm[key] = self[key], flag
194 194
195 195 def get(self, key, default=None):
196 196 try:
197 197 return self._lm[key][0]
198 198 except KeyError:
199 199 return default
200 200
201 201 def flags(self, key, default=''):
202 202 try:
203 203 return self._lm[key][1]
204 204 except KeyError:
205 205 return default
206 206
207 207 def copy(self):
208 208 c = manifestdict('')
209 209 c._lm = self._lm.copy()
210 210 return c
211 211
212 212 def iteritems(self):
213 213 return (x[:2] for x in self._lm.iterentries())
214 214
215 215 def text(self):
216 216 return self._lm.text()
217 217
218 218 def fastdelta(self, base, changes):
219 219 """Given a base manifest text as an array.array and a list of changes
220 220 relative to that text, compute a delta that can be used by revlog.
221 221 """
222 222 delta = []
223 223 dstart = None
224 224 dend = None
225 225 dline = [""]
226 226 start = 0
227 227 # zero copy representation of base as a buffer
228 228 addbuf = util.buffer(base)
229 229
230 230 # start with a readonly loop that finds the offset of
231 231 # each line and creates the deltas
232 232 for f, todelete in changes:
233 233 # bs will either be the index of the item or the insert point
234 234 start, end = _msearch(addbuf, f, start)
235 235 if not todelete:
236 236 h, fl = self._lm[f]
237 237 l = "%s\0%s%s\n" % (f, revlog.hex(h), fl)
238 238 else:
239 239 if start == end:
240 240 # item we want to delete was not found, error out
241 241 raise AssertionError(
242 242 _("failed to remove %s from manifest") % f)
243 243 l = ""
244 244 if dstart is not None and dstart <= start and dend >= start:
245 245 if dend < end:
246 246 dend = end
247 247 if l:
248 248 dline.append(l)
249 249 else:
250 250 if dstart is not None:
251 251 delta.append([dstart, dend, "".join(dline)])
252 252 dstart = start
253 253 dend = end
254 254 dline = [l]
255 255
256 256 if dstart is not None:
257 257 delta.append([dstart, dend, "".join(dline)])
258 258 # apply the delta to the base, and get a delta for addrevision
259 259 deltatext, arraytext = _addlistdelta(base, delta)
260 260 return arraytext, deltatext
261 261
262 262 def _msearch(m, s, lo=0, hi=None):
263 263 '''return a tuple (start, end) that says where to find s within m.
264 264
265 265 If the string is found m[start:end] are the line containing
266 266 that string. If start == end the string was not found and
267 267 they indicate the proper sorted insertion point.
268 268
269 269 m should be a buffer or a string
270 270 s is a string'''
271 271 def advance(i, c):
272 272 while i < lenm and m[i] != c:
273 273 i += 1
274 274 return i
275 275 if not s:
276 276 return (lo, lo)
277 277 lenm = len(m)
278 278 if not hi:
279 279 hi = lenm
280 280 while lo < hi:
281 281 mid = (lo + hi) // 2
282 282 start = mid
283 283 while start > 0 and m[start - 1] != '\n':
284 284 start -= 1
285 285 end = advance(start, '\0')
286 286 if m[start:end] < s:
287 287 # we know that after the null there are 40 bytes of sha1
288 288 # this translates to the bisect lo = mid + 1
289 289 lo = advance(end + 40, '\n') + 1
290 290 else:
291 291 # this translates to the bisect hi = mid
292 292 hi = start
293 293 end = advance(lo, '\0')
294 294 found = m[lo:end]
295 295 if s == found:
296 296 # we know that after the null there are 40 bytes of sha1
297 297 end = advance(end + 40, '\n')
298 298 return (lo, end + 1)
299 299 else:
300 300 return (lo, lo)
301 301
302 302 def _checkforbidden(l):
303 303 """Check filenames for illegal characters."""
304 304 for f in l:
305 305 if '\n' in f or '\r' in f:
306 306 raise error.RevlogError(
307 307 _("'\\n' and '\\r' disallowed in filenames: %r") % f)
308 308
309 309
310 310 # apply the changes collected during the bisect loop to our addlist
311 311 # return a delta suitable for addrevision
312 312 def _addlistdelta(addlist, x):
313 313 # for large addlist arrays, building a new array is cheaper
314 314 # than repeatedly modifying the existing one
315 315 currentposition = 0
316 316 newaddlist = array.array('c')
317 317
318 318 for start, end, content in x:
319 319 newaddlist += addlist[currentposition:start]
320 320 if content:
321 321 newaddlist += array.array('c', content)
322 322
323 323 currentposition = end
324 324
325 325 newaddlist += addlist[currentposition:]
326 326
327 327 deltatext = "".join(struct.pack(">lll", start, end, len(content))
328 328 + content for start, end, content in x)
329 329 return deltatext, newaddlist
330 330
331 331 def _splittopdir(f):
332 332 if '/' in f:
333 333 dir, subpath = f.split('/', 1)
334 334 return dir + '/', subpath
335 335 else:
336 336 return '', f
337 337
338 338 class treemanifest(object):
339 339 def __init__(self, dir='', text=''):
340 340 self._dir = dir
341 341 self._dirs = {}
342 342 # Using _lazymanifest here is a little slower than plain old dicts
343 343 self._files = {}
344 344 self._flags = {}
345 345 lm = _lazymanifest(text)
346 346 for f, n, fl in lm.iterentries():
347 347 self[f] = n
348 348 if fl:
349 349 self.setflag(f, fl)
350 350
351 351 def _subpath(self, path):
352 352 return self._dir + path
353 353
354 354 def __len__(self):
355 355 size = len(self._files)
356 356 for m in self._dirs.values():
357 357 size += m.__len__()
358 358 return size
359 359
360 360 def __str__(self):
361 361 return '<treemanifest dir=%s>' % self._dir
362 362
363 363 def iteritems(self):
364 364 for p, n in sorted(self._dirs.items() + self._files.items()):
365 365 if p in self._files:
366 366 yield self._subpath(p), n
367 367 else:
368 368 for f, sn in n.iteritems():
369 369 yield f, sn
370 370
371 371 def iterkeys(self):
372 372 for p in sorted(self._dirs.keys() + self._files.keys()):
373 373 if p in self._files:
374 374 yield self._subpath(p)
375 375 else:
376 376 for f in self._dirs[p].iterkeys():
377 377 yield f
378 378
379 379 def keys(self):
380 380 return list(self.iterkeys())
381 381
382 382 def __iter__(self):
383 383 return self.iterkeys()
384 384
385 385 def __contains__(self, f):
386 386 if f is None:
387 387 return False
388 388 dir, subpath = _splittopdir(f)
389 389 if dir:
390 390 if dir not in self._dirs:
391 391 return False
392 392 return self._dirs[dir].__contains__(subpath)
393 393 else:
394 394 return f in self._files
395 395
396 396 def get(self, f, default=None):
397 397 dir, subpath = _splittopdir(f)
398 398 if dir:
399 399 if dir not in self._dirs:
400 400 return default
401 401 return self._dirs[dir].get(subpath, default)
402 402 else:
403 403 return self._files.get(f, default)
404 404
405 405 def __getitem__(self, f):
406 406 dir, subpath = _splittopdir(f)
407 407 if dir:
408 408 return self._dirs[dir].__getitem__(subpath)
409 409 else:
410 410 return self._files[f]
411 411
412 412 def flags(self, f):
413 413 dir, subpath = _splittopdir(f)
414 414 if dir:
415 415 if dir not in self._dirs:
416 416 return ''
417 417 return self._dirs[dir].flags(subpath)
418 418 else:
419 419 if f in self._dirs:
420 420 return ''
421 421 return self._flags.get(f, '')
422 422
423 423 def find(self, f):
424 424 dir, subpath = _splittopdir(f)
425 425 if dir:
426 426 return self._dirs[dir].find(subpath)
427 427 else:
428 428 return self._files[f], self._flags.get(f, '')
429 429
430 430 def __delitem__(self, f):
431 431 dir, subpath = _splittopdir(f)
432 432 if dir:
433 433 self._dirs[dir].__delitem__(subpath)
434 434 # If the directory is now empty, remove it
435 435 if not self._dirs[dir]._dirs and not self._dirs[dir]._files:
436 436 del self._dirs[dir]
437 437 else:
438 438 del self._files[f]
439 439 if f in self._flags:
440 440 del self._flags[f]
441 441
442 442 def __setitem__(self, f, n):
443 443 assert n is not None
444 444 dir, subpath = _splittopdir(f)
445 445 if dir:
446 446 if dir not in self._dirs:
447 447 self._dirs[dir] = treemanifest(self._subpath(dir))
448 448 self._dirs[dir].__setitem__(subpath, n)
449 449 else:
450 450 self._files[f] = n
451 451
452 452 def setflag(self, f, flags):
453 453 """Set the flags (symlink, executable) for path f."""
454 454 dir, subpath = _splittopdir(f)
455 455 if dir:
456 456 if dir not in self._dirs:
457 457 self._dirs[dir] = treemanifest(self._subpath(dir))
458 458 self._dirs[dir].setflag(subpath, flags)
459 459 else:
460 460 self._flags[f] = flags
461 461
462 462 def copy(self):
463 463 copy = treemanifest(self._dir)
464 464 for d in self._dirs:
465 465 copy._dirs[d] = self._dirs[d].copy()
466 466 copy._files = dict.copy(self._files)
467 467 copy._flags = dict.copy(self._flags)
468 468 return copy
469 469
470 470 def intersectfiles(self, files):
471 471 '''make a new treemanifest with the intersection of self with files
472 472
473 473 The algorithm assumes that files is much smaller than self.'''
474 474 ret = treemanifest()
475 475 for fn in files:
476 476 if fn in self:
477 477 ret[fn] = self[fn]
478 478 flags = self.flags(fn)
479 479 if flags:
480 480 ret.setflag(fn, flags)
481 481 return ret
482 482
483 483 def filesnotin(self, m2):
484 484 '''Set of files in this manifest that are not in the other'''
485 485 files = set()
486 486 def _filesnotin(t1, t2):
487 487 for d, m1 in t1._dirs.iteritems():
488 488 if d in t2._dirs:
489 489 m2 = t2._dirs[d]
490 490 _filesnotin(m1, m2)
491 491 else:
492 492 files.update(m1.iterkeys())
493 493
494 494 for fn in t1._files.iterkeys():
495 495 if fn not in t2._files:
496 496 files.add(t1._subpath(fn))
497 497
498 498 _filesnotin(self, m2)
499 499 return files
500 500
501 501 @propertycache
502 502 def _alldirs(self):
503 503 return scmutil.dirs(self)
504 504
505 505 def dirs(self):
506 506 return self._alldirs
507 507
508 508 def hasdir(self, dir):
509 509 topdir, subdir = _splittopdir(dir)
510 510 if topdir:
511 511 if topdir in self._dirs:
512 512 return self._dirs[topdir].hasdir(subdir)
513 513 return False
514 514 return (dir + '/') in self._dirs
515 515
516 516 def matches(self, match):
517 517 '''generate a new manifest filtered by the match argument'''
518 518 if match.always():
519 519 return self.copy()
520 520
521 521 files = match.files()
522 if (match.matchfn == match.exact or
522 if (match.isexact() or
523 523 (not match.anypats() and util.all(fn in self for fn in files))):
524 524 return self.intersectfiles(files)
525 525
526 526 m = self.copy()
527 527 for fn in m.keys():
528 528 if not match(fn):
529 529 del m[fn]
530 530 return m
531 531
532 532 def diff(self, m2, clean=False):
533 533 '''Finds changes between the current manifest and m2.
534 534
535 535 Args:
536 536 m2: the manifest to which this manifest should be compared.
537 537 clean: if true, include files unchanged between these manifests
538 538 with a None value in the returned dictionary.
539 539
540 540 The result is returned as a dict with filename as key and
541 541 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
542 542 nodeid in the current/other manifest and fl1/fl2 is the flag
543 543 in the current/other manifest. Where the file does not exist,
544 544 the nodeid will be None and the flags will be the empty
545 545 string.
546 546 '''
547 547 result = {}
548 548 emptytree = treemanifest()
549 549 def _diff(t1, t2):
550 550 for d, m1 in t1._dirs.iteritems():
551 551 m2 = t2._dirs.get(d, emptytree)
552 552 _diff(m1, m2)
553 553
554 554 for d, m2 in t2._dirs.iteritems():
555 555 if d not in t1._dirs:
556 556 _diff(emptytree, m2)
557 557
558 558 for fn, n1 in t1._files.iteritems():
559 559 fl1 = t1._flags.get(fn, '')
560 560 n2 = t2._files.get(fn, None)
561 561 fl2 = t2._flags.get(fn, '')
562 562 if n1 != n2 or fl1 != fl2:
563 563 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
564 564 elif clean:
565 565 result[t1._subpath(fn)] = None
566 566
567 567 for fn, n2 in t2._files.iteritems():
568 568 if fn not in t1._files:
569 569 fl2 = t2._flags.get(fn, '')
570 570 result[t2._subpath(fn)] = ((None, ''), (n2, fl2))
571 571
572 572 _diff(self, m2)
573 573 return result
574 574
575 575 def text(self):
576 576 """Get the full data of this manifest as a bytestring."""
577 577 fl = self.keys()
578 578 _checkforbidden(fl)
579 579
580 580 hex, flags = revlog.hex, self.flags
581 581 # if this is changed to support newlines in filenames,
582 582 # be sure to check the templates/ dir again (especially *-raw.tmpl)
583 583 return ''.join("%s\0%s%s\n" % (f, hex(self[f]), flags(f)) for f in fl)
584 584
585 585 class manifest(revlog.revlog):
586 586 def __init__(self, opener):
587 587 # During normal operations, we expect to deal with not more than four
588 588 # revs at a time (such as during commit --amend). When rebasing large
589 589 # stacks of commits, the number can go up, hence the config knob below.
590 590 cachesize = 4
591 591 usetreemanifest = False
592 592 opts = getattr(opener, 'options', None)
593 593 if opts is not None:
594 594 cachesize = opts.get('manifestcachesize', cachesize)
595 595 usetreemanifest = opts.get('usetreemanifest', usetreemanifest)
596 596 self._mancache = util.lrucachedict(cachesize)
597 597 revlog.revlog.__init__(self, opener, "00manifest.i")
598 598 self._usetreemanifest = usetreemanifest
599 599
600 600 def _newmanifest(self, data=''):
601 601 if self._usetreemanifest:
602 602 return treemanifest('', data)
603 603 return manifestdict(data)
604 604
605 605 def readdelta(self, node):
606 606 r = self.rev(node)
607 607 d = mdiff.patchtext(self.revdiff(self.deltaparent(r), r))
608 608 return self._newmanifest(d)
609 609
610 610 def readfast(self, node):
611 611 '''use the faster of readdelta or read'''
612 612 r = self.rev(node)
613 613 deltaparent = self.deltaparent(r)
614 614 if deltaparent != revlog.nullrev and deltaparent in self.parentrevs(r):
615 615 return self.readdelta(node)
616 616 return self.read(node)
617 617
618 618 def read(self, node):
619 619 if node == revlog.nullid:
620 620 return self._newmanifest() # don't upset local cache
621 621 if node in self._mancache:
622 622 return self._mancache[node][0]
623 623 text = self.revision(node)
624 624 arraytext = array.array('c', text)
625 625 m = self._newmanifest(text)
626 626 self._mancache[node] = (m, arraytext)
627 627 return m
628 628
629 629 def find(self, node, f):
630 630 '''look up entry for a single file efficiently.
631 631 return (node, flags) pair if found, (None, None) if not.'''
632 632 m = self.read(node)
633 633 try:
634 634 return m.find(f)
635 635 except KeyError:
636 636 return None, None
637 637
638 638 def add(self, m, transaction, link, p1, p2, added, removed):
639 639 if p1 in self._mancache and not self._usetreemanifest:
640 640 # If our first parent is in the manifest cache, we can
641 641 # compute a delta here using properties we know about the
642 642 # manifest up-front, which may save time later for the
643 643 # revlog layer.
644 644
645 645 _checkforbidden(added)
646 646 # combine the changed lists into one list for sorting
647 647 work = [(x, False) for x in added]
648 648 work.extend((x, True) for x in removed)
649 649 # this could use heapq.merge() (from Python 2.6+) or equivalent
650 650 # since the lists are already sorted
651 651 work.sort()
652 652
653 653 arraytext, deltatext = m.fastdelta(self._mancache[p1][1], work)
654 654 cachedelta = self.rev(p1), deltatext
655 655 text = util.buffer(arraytext)
656 656 else:
657 657 # The first parent manifest isn't already loaded, so we'll
658 658 # just encode a fulltext of the manifest and pass that
659 659 # through to the revlog layer, and let it handle the delta
660 660 # process.
661 661 text = m.text()
662 662 arraytext = array.array('c', text)
663 663 cachedelta = None
664 664
665 665 n = self.addrevision(text, transaction, link, p1, p2, cachedelta)
666 666 self._mancache[n] = (m, arraytext)
667 667
668 668 return n
@@ -1,436 +1,439 b''
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import re
9 9 import util, pathutil
10 10 from i18n import _
11 11
12 12 def _rematcher(regex):
13 13 '''compile the regexp with the best available regexp engine and return a
14 14 matcher function'''
15 15 m = util.re.compile(regex)
16 16 try:
17 17 # slightly faster, provided by facebook's re2 bindings
18 18 return m.test_match
19 19 except AttributeError:
20 20 return m.match
21 21
22 22 def _expandsets(kindpats, ctx):
23 23 '''Returns the kindpats list with the 'set' patterns expanded.'''
24 24 fset = set()
25 25 other = []
26 26
27 27 for kind, pat in kindpats:
28 28 if kind == 'set':
29 29 if not ctx:
30 30 raise util.Abort("fileset expression with no context")
31 31 s = ctx.getfileset(pat)
32 32 fset.update(s)
33 33 continue
34 34 other.append((kind, pat))
35 35 return fset, other
36 36
37 37 def _kindpatsalwaysmatch(kindpats):
38 38 """"Checks whether the kindspats match everything, as e.g.
39 39 'relpath:.' does.
40 40 """
41 41 for kind, pat in kindpats:
42 42 if pat != '' or kind not in ['relpath', 'glob']:
43 43 return False
44 44 return True
45 45
46 46 class match(object):
47 47 def __init__(self, root, cwd, patterns, include=[], exclude=[],
48 48 default='glob', exact=False, auditor=None, ctx=None):
49 49 """build an object to match a set of file patterns
50 50
51 51 arguments:
52 52 root - the canonical root of the tree you're matching against
53 53 cwd - the current working directory, if relevant
54 54 patterns - patterns to find
55 55 include - patterns to include (unless they are excluded)
56 56 exclude - patterns to exclude (even if they are included)
57 57 default - if a pattern in patterns has no explicit type, assume this one
58 58 exact - patterns are actually filenames (include/exclude still apply)
59 59
60 60 a pattern is one of:
61 61 'glob:<glob>' - a glob relative to cwd
62 62 're:<regexp>' - a regular expression
63 63 'path:<path>' - a path relative to repository root
64 64 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
65 65 'relpath:<path>' - a path relative to cwd
66 66 'relre:<regexp>' - a regexp that needn't match the start of a name
67 67 'set:<fileset>' - a fileset expression
68 68 '<something>' - a pattern of the specified default type
69 69 """
70 70
71 71 self._root = root
72 72 self._cwd = cwd
73 73 self._files = [] # exact files and roots of patterns
74 74 self._anypats = bool(include or exclude)
75 75 self._ctx = ctx
76 76 self._always = False
77 77 self._pathrestricted = bool(include or exclude or patterns)
78 78
79 79 matchfns = []
80 80 if include:
81 81 kindpats = _normalize(include, 'glob', root, cwd, auditor)
82 82 self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)')
83 83 matchfns.append(im)
84 84 if exclude:
85 85 kindpats = _normalize(exclude, 'glob', root, cwd, auditor)
86 86 self.excludepat, em = _buildmatch(ctx, kindpats, '(?:/|$)')
87 87 matchfns.append(lambda f: not em(f))
88 88 if exact:
89 89 if isinstance(patterns, list):
90 90 self._files = patterns
91 91 else:
92 92 self._files = list(patterns)
93 93 matchfns.append(self.exact)
94 94 elif patterns:
95 95 kindpats = _normalize(patterns, default, root, cwd, auditor)
96 96 if not _kindpatsalwaysmatch(kindpats):
97 97 self._files = _roots(kindpats)
98 98 self._anypats = self._anypats or _anypats(kindpats)
99 99 self.patternspat, pm = _buildmatch(ctx, kindpats, '$')
100 100 matchfns.append(pm)
101 101
102 102 if not matchfns:
103 103 m = util.always
104 104 self._always = True
105 105 elif len(matchfns) == 1:
106 106 m = matchfns[0]
107 107 else:
108 108 def m(f):
109 109 for matchfn in matchfns:
110 110 if not matchfn(f):
111 111 return False
112 112 return True
113 113
114 114 self.matchfn = m
115 115 self._fmap = set(self._files)
116 116
117 117 def __call__(self, fn):
118 118 return self.matchfn(fn)
119 119 def __iter__(self):
120 120 for f in self._files:
121 121 yield f
122 122
123 123 # Callbacks related to how the matcher is used by dirstate.walk.
124 124 # Subscribers to these events must monkeypatch the matcher object.
125 125 def bad(self, f, msg):
126 126 '''Callback from dirstate.walk for each explicit file that can't be
127 127 found/accessed, with an error message.'''
128 128 pass
129 129
130 130 # If an explicitdir is set, it will be called when an explicitly listed
131 131 # directory is visited.
132 132 explicitdir = None
133 133
134 134 # If an traversedir is set, it will be called when a directory discovered
135 135 # by recursive traversal is visited.
136 136 traversedir = None
137 137
138 138 def abs(self, f):
139 139 '''Convert a repo path back to path that is relative to the root of the
140 140 matcher.'''
141 141 return f
142 142
143 143 def rel(self, f):
144 144 '''Convert repo path back to path that is relative to cwd of matcher.'''
145 145 return util.pathto(self._root, self._cwd, f)
146 146
147 147 def uipath(self, f):
148 148 '''Convert repo path to a display path. If patterns or -I/-X were used
149 149 to create this matcher, the display path will be relative to cwd.
150 150 Otherwise it is relative to the root of the repo.'''
151 151 return (self._pathrestricted and self.rel(f)) or self.abs(f)
152 152
153 153 def files(self):
154 154 '''Explicitly listed files or patterns or roots:
155 155 if no patterns or .always(): empty list,
156 156 if exact: list exact files,
157 157 if not .anypats(): list all files and dirs,
158 158 else: optimal roots'''
159 159 return self._files
160 160
161 161 def exact(self, f):
162 162 '''Returns True if f is in .files().'''
163 163 return f in self._fmap
164 164
165 165 def anypats(self):
166 166 '''Matcher uses patterns or include/exclude.'''
167 167 return self._anypats
168 168
169 169 def always(self):
170 170 '''Matcher will match everything and .files() will be empty
171 171 - optimization might be possible and necessary.'''
172 172 return self._always
173 173
174 def isexact(self):
175 return self.matchfn == self.exact
176
174 177 def exact(root, cwd, files):
175 178 return match(root, cwd, files, exact=True)
176 179
177 180 def always(root, cwd):
178 181 return match(root, cwd, [])
179 182
180 183 class narrowmatcher(match):
181 184 """Adapt a matcher to work on a subdirectory only.
182 185
183 186 The paths are remapped to remove/insert the path as needed:
184 187
185 188 >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
186 189 >>> m2 = narrowmatcher('sub', m1)
187 190 >>> bool(m2('a.txt'))
188 191 False
189 192 >>> bool(m2('b.txt'))
190 193 True
191 194 >>> bool(m2.matchfn('a.txt'))
192 195 False
193 196 >>> bool(m2.matchfn('b.txt'))
194 197 True
195 198 >>> m2.files()
196 199 ['b.txt']
197 200 >>> m2.exact('b.txt')
198 201 True
199 202 >>> util.pconvert(m2.rel('b.txt'))
200 203 'sub/b.txt'
201 204 >>> def bad(f, msg):
202 205 ... print "%s: %s" % (f, msg)
203 206 >>> m1.bad = bad
204 207 >>> m2.bad('x.txt', 'No such file')
205 208 sub/x.txt: No such file
206 209 >>> m2.abs('c.txt')
207 210 'sub/c.txt'
208 211 """
209 212
210 213 def __init__(self, path, matcher):
211 214 self._root = matcher._root
212 215 self._cwd = matcher._cwd
213 216 self._path = path
214 217 self._matcher = matcher
215 218 self._always = matcher._always
216 219 self._pathrestricted = matcher._pathrestricted
217 220
218 221 self._files = [f[len(path) + 1:] for f in matcher._files
219 222 if f.startswith(path + "/")]
220 223 self._anypats = matcher._anypats
221 224 self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)
222 225 self._fmap = set(self._files)
223 226
224 227 def abs(self, f):
225 228 return self._matcher.abs(self._path + "/" + f)
226 229
227 230 def bad(self, f, msg):
228 231 self._matcher.bad(self._path + "/" + f, msg)
229 232
230 233 def rel(self, f):
231 234 return self._matcher.rel(self._path + "/" + f)
232 235
233 236 def patkind(pattern, default=None):
234 237 '''If pattern is 'kind:pat' with a known kind, return kind.'''
235 238 return _patsplit(pattern, default)[0]
236 239
237 240 def _patsplit(pattern, default):
238 241 """Split a string into the optional pattern kind prefix and the actual
239 242 pattern."""
240 243 if ':' in pattern:
241 244 kind, pat = pattern.split(':', 1)
242 245 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
243 246 'listfile', 'listfile0', 'set'):
244 247 return kind, pat
245 248 return default, pattern
246 249
247 250 def _globre(pat):
248 251 r'''Convert an extended glob string to a regexp string.
249 252
250 253 >>> print _globre(r'?')
251 254 .
252 255 >>> print _globre(r'*')
253 256 [^/]*
254 257 >>> print _globre(r'**')
255 258 .*
256 259 >>> print _globre(r'**/a')
257 260 (?:.*/)?a
258 261 >>> print _globre(r'a/**/b')
259 262 a\/(?:.*/)?b
260 263 >>> print _globre(r'[a*?!^][^b][!c]')
261 264 [a*?!^][\^b][^c]
262 265 >>> print _globre(r'{a,b}')
263 266 (?:a|b)
264 267 >>> print _globre(r'.\*\?')
265 268 \.\*\?
266 269 '''
267 270 i, n = 0, len(pat)
268 271 res = ''
269 272 group = 0
270 273 escape = util.re.escape
271 274 def peek():
272 275 return i < n and pat[i]
273 276 while i < n:
274 277 c = pat[i]
275 278 i += 1
276 279 if c not in '*?[{},\\':
277 280 res += escape(c)
278 281 elif c == '*':
279 282 if peek() == '*':
280 283 i += 1
281 284 if peek() == '/':
282 285 i += 1
283 286 res += '(?:.*/)?'
284 287 else:
285 288 res += '.*'
286 289 else:
287 290 res += '[^/]*'
288 291 elif c == '?':
289 292 res += '.'
290 293 elif c == '[':
291 294 j = i
292 295 if j < n and pat[j] in '!]':
293 296 j += 1
294 297 while j < n and pat[j] != ']':
295 298 j += 1
296 299 if j >= n:
297 300 res += '\\['
298 301 else:
299 302 stuff = pat[i:j].replace('\\','\\\\')
300 303 i = j + 1
301 304 if stuff[0] == '!':
302 305 stuff = '^' + stuff[1:]
303 306 elif stuff[0] == '^':
304 307 stuff = '\\' + stuff
305 308 res = '%s[%s]' % (res, stuff)
306 309 elif c == '{':
307 310 group += 1
308 311 res += '(?:'
309 312 elif c == '}' and group:
310 313 res += ')'
311 314 group -= 1
312 315 elif c == ',' and group:
313 316 res += '|'
314 317 elif c == '\\':
315 318 p = peek()
316 319 if p:
317 320 i += 1
318 321 res += escape(p)
319 322 else:
320 323 res += escape(c)
321 324 else:
322 325 res += escape(c)
323 326 return res
324 327
325 328 def _regex(kind, pat, globsuffix):
326 329 '''Convert a (normalized) pattern of any kind into a regular expression.
327 330 globsuffix is appended to the regexp of globs.'''
328 331 if not pat:
329 332 return ''
330 333 if kind == 're':
331 334 return pat
332 335 if kind == 'path':
333 336 return '^' + util.re.escape(pat) + '(?:/|$)'
334 337 if kind == 'relglob':
335 338 return '(?:|.*/)' + _globre(pat) + globsuffix
336 339 if kind == 'relpath':
337 340 return util.re.escape(pat) + '(?:/|$)'
338 341 if kind == 'relre':
339 342 if pat.startswith('^'):
340 343 return pat
341 344 return '.*' + pat
342 345 return _globre(pat) + globsuffix
343 346
344 347 def _buildmatch(ctx, kindpats, globsuffix):
345 348 '''Return regexp string and a matcher function for kindpats.
346 349 globsuffix is appended to the regexp of globs.'''
347 350 fset, kindpats = _expandsets(kindpats, ctx)
348 351 if not kindpats:
349 352 return "", fset.__contains__
350 353
351 354 regex, mf = _buildregexmatch(kindpats, globsuffix)
352 355 if fset:
353 356 return regex, lambda f: f in fset or mf(f)
354 357 return regex, mf
355 358
356 359 def _buildregexmatch(kindpats, globsuffix):
357 360 """Build a match function from a list of kinds and kindpats,
358 361 return regexp string and a matcher function."""
359 362 try:
360 363 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
361 364 for (k, p) in kindpats])
362 365 if len(regex) > 20000:
363 366 raise OverflowError
364 367 return regex, _rematcher(regex)
365 368 except OverflowError:
366 369 # We're using a Python with a tiny regex engine and we
367 370 # made it explode, so we'll divide the pattern list in two
368 371 # until it works
369 372 l = len(kindpats)
370 373 if l < 2:
371 374 raise
372 375 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
373 376 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
374 377 return regex, lambda s: a(s) or b(s)
375 378 except re.error:
376 379 for k, p in kindpats:
377 380 try:
378 381 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
379 382 except re.error:
380 383 raise util.Abort(_("invalid pattern (%s): %s") % (k, p))
381 384 raise util.Abort(_("invalid pattern"))
382 385
383 386 def _normalize(patterns, default, root, cwd, auditor):
384 387 '''Convert 'kind:pat' from the patterns list to tuples with kind and
385 388 normalized and rooted patterns and with listfiles expanded.'''
386 389 kindpats = []
387 390 for kind, pat in [_patsplit(p, default) for p in patterns]:
388 391 if kind in ('glob', 'relpath'):
389 392 pat = pathutil.canonpath(root, cwd, pat, auditor)
390 393 elif kind in ('relglob', 'path'):
391 394 pat = util.normpath(pat)
392 395 elif kind in ('listfile', 'listfile0'):
393 396 try:
394 397 files = util.readfile(pat)
395 398 if kind == 'listfile0':
396 399 files = files.split('\0')
397 400 else:
398 401 files = files.splitlines()
399 402 files = [f for f in files if f]
400 403 except EnvironmentError:
401 404 raise util.Abort(_("unable to read file list (%s)") % pat)
402 405 kindpats += _normalize(files, default, root, cwd, auditor)
403 406 continue
404 407 # else: re or relre - which cannot be normalized
405 408 kindpats.append((kind, pat))
406 409 return kindpats
407 410
408 411 def _roots(kindpats):
409 412 '''return roots and exact explicitly listed files from patterns
410 413
411 414 >>> _roots([('glob', 'g/*'), ('glob', 'g'), ('glob', 'g*')])
412 415 ['g', 'g', '.']
413 416 >>> _roots([('relpath', 'r'), ('path', 'p/p'), ('path', '')])
414 417 ['r', 'p/p', '.']
415 418 >>> _roots([('relglob', 'rg*'), ('re', 're/'), ('relre', 'rr')])
416 419 ['.', '.', '.']
417 420 '''
418 421 r = []
419 422 for kind, pat in kindpats:
420 423 if kind == 'glob': # find the non-glob prefix
421 424 root = []
422 425 for p in pat.split('/'):
423 426 if '[' in p or '{' in p or '*' in p or '?' in p:
424 427 break
425 428 root.append(p)
426 429 r.append('/'.join(root) or '.')
427 430 elif kind in ('relpath', 'path'):
428 431 r.append(pat or '.')
429 432 else: # relglob, re, relre
430 433 r.append('.')
431 434 return r
432 435
433 436 def _anypats(kindpats):
434 437 for kind, pat in kindpats:
435 438 if kind in ('glob', 're', 'relglob', 'relre', 'set'):
436 439 return True
General Comments 0
You need to be logged in to leave comments. Login now