##// END OF EJS Templates
dirstate: use a presized dict for the dirstate...
Siddharth Agarwal -
r25585:868b7ee8 default
parent child Browse files
Show More
@@ -1,993 +1,1006 b''
1 1 # dirstate.py - working directory tracking for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from node import nullid
9 9 from i18n import _
10 10 import scmutil, util, osutil, parsers, encoding, pathutil
11 11 import os, stat, errno
12 12 import match as matchmod
13 13
14 14 propertycache = util.propertycache
15 15 filecache = scmutil.filecache
16 16 _rangemask = 0x7fffffff
17 17
18 18 dirstatetuple = parsers.dirstatetuple
19 19
20 20 class repocache(filecache):
21 21 """filecache for files in .hg/"""
22 22 def join(self, obj, fname):
23 23 return obj._opener.join(fname)
24 24
25 25 class rootcache(filecache):
26 26 """filecache for files in the repository root"""
27 27 def join(self, obj, fname):
28 28 return obj._join(fname)
29 29
30 30 class dirstate(object):
31 31
32 32 def __init__(self, opener, ui, root, validate):
33 33 '''Create a new dirstate object.
34 34
35 35 opener is an open()-like callable that can be used to open the
36 36 dirstate file; root is the root of the directory tracked by
37 37 the dirstate.
38 38 '''
39 39 self._opener = opener
40 40 self._validate = validate
41 41 self._root = root
42 42 # ntpath.join(root, '') of Python 2.7.9 does not add sep if root is
43 43 # UNC path pointing to root share (issue4557)
44 44 self._rootdir = pathutil.normasprefix(root)
45 45 self._dirty = False
46 46 self._dirtypl = False
47 47 self._lastnormaltime = 0
48 48 self._ui = ui
49 49 self._filecache = {}
50 50 self._parentwriters = 0
51 51 self._filename = 'dirstate'
52 52
53 53 def beginparentchange(self):
54 54 '''Marks the beginning of a set of changes that involve changing
55 55 the dirstate parents. If there is an exception during this time,
56 56 the dirstate will not be written when the wlock is released. This
57 57 prevents writing an incoherent dirstate where the parent doesn't
58 58 match the contents.
59 59 '''
60 60 self._parentwriters += 1
61 61
62 62 def endparentchange(self):
63 63 '''Marks the end of a set of changes that involve changing the
64 64 dirstate parents. Once all parent changes have been marked done,
65 65 the wlock will be free to write the dirstate on release.
66 66 '''
67 67 if self._parentwriters > 0:
68 68 self._parentwriters -= 1
69 69
70 70 def pendingparentchange(self):
71 71 '''Returns true if the dirstate is in the middle of a set of changes
72 72 that modify the dirstate parent.
73 73 '''
74 74 return self._parentwriters > 0
75 75
76 76 @propertycache
77 77 def _map(self):
78 78 '''Return the dirstate contents as a map from filename to
79 79 (state, mode, size, time).'''
80 80 self._read()
81 81 return self._map
82 82
83 83 @propertycache
84 84 def _copymap(self):
85 85 self._read()
86 86 return self._copymap
87 87
88 88 @propertycache
89 89 def _filefoldmap(self):
90 90 try:
91 91 makefilefoldmap = parsers.make_file_foldmap
92 92 except AttributeError:
93 93 pass
94 94 else:
95 95 return makefilefoldmap(self._map, util.normcasespec,
96 96 util.normcasefallback)
97 97
98 98 f = {}
99 99 normcase = util.normcase
100 100 for name, s in self._map.iteritems():
101 101 if s[0] != 'r':
102 102 f[normcase(name)] = name
103 103 f['.'] = '.' # prevents useless util.fspath() invocation
104 104 return f
105 105
106 106 @propertycache
107 107 def _dirfoldmap(self):
108 108 f = {}
109 109 normcase = util.normcase
110 110 for name in self._dirs:
111 111 f[normcase(name)] = name
112 112 return f
113 113
114 114 @repocache('branch')
115 115 def _branch(self):
116 116 try:
117 117 return self._opener.read("branch").strip() or "default"
118 118 except IOError, inst:
119 119 if inst.errno != errno.ENOENT:
120 120 raise
121 121 return "default"
122 122
123 123 @propertycache
124 124 def _pl(self):
125 125 try:
126 126 fp = self._opener(self._filename)
127 127 st = fp.read(40)
128 128 fp.close()
129 129 l = len(st)
130 130 if l == 40:
131 131 return st[:20], st[20:40]
132 132 elif l > 0 and l < 40:
133 133 raise util.Abort(_('working directory state appears damaged!'))
134 134 except IOError, err:
135 135 if err.errno != errno.ENOENT:
136 136 raise
137 137 return [nullid, nullid]
138 138
139 139 @propertycache
140 140 def _dirs(self):
141 141 return util.dirs(self._map, 'r')
142 142
143 143 def dirs(self):
144 144 return self._dirs
145 145
146 146 @rootcache('.hgignore')
147 147 def _ignore(self):
148 148 files = []
149 149 if os.path.exists(self._join('.hgignore')):
150 150 files.append(self._join('.hgignore'))
151 151 for name, path in self._ui.configitems("ui"):
152 152 if name == 'ignore' or name.startswith('ignore.'):
153 153 # we need to use os.path.join here rather than self._join
154 154 # because path is arbitrary and user-specified
155 155 files.append(os.path.join(self._rootdir, util.expandpath(path)))
156 156
157 157 if not files:
158 158 return util.never
159 159
160 160 pats = ['include:%s' % f for f in files]
161 161 return matchmod.match(self._root, '', [], pats, warn=self._ui.warn)
162 162
163 163 @propertycache
164 164 def _slash(self):
165 165 return self._ui.configbool('ui', 'slash') and os.sep != '/'
166 166
167 167 @propertycache
168 168 def _checklink(self):
169 169 return util.checklink(self._root)
170 170
171 171 @propertycache
172 172 def _checkexec(self):
173 173 return util.checkexec(self._root)
174 174
175 175 @propertycache
176 176 def _checkcase(self):
177 177 return not util.checkcase(self._join('.hg'))
178 178
179 179 def _join(self, f):
180 180 # much faster than os.path.join()
181 181 # it's safe because f is always a relative path
182 182 return self._rootdir + f
183 183
184 184 def flagfunc(self, buildfallback):
185 185 if self._checklink and self._checkexec:
186 186 def f(x):
187 187 try:
188 188 st = os.lstat(self._join(x))
189 189 if util.statislink(st):
190 190 return 'l'
191 191 if util.statisexec(st):
192 192 return 'x'
193 193 except OSError:
194 194 pass
195 195 return ''
196 196 return f
197 197
198 198 fallback = buildfallback()
199 199 if self._checklink:
200 200 def f(x):
201 201 if os.path.islink(self._join(x)):
202 202 return 'l'
203 203 if 'x' in fallback(x):
204 204 return 'x'
205 205 return ''
206 206 return f
207 207 if self._checkexec:
208 208 def f(x):
209 209 if 'l' in fallback(x):
210 210 return 'l'
211 211 if util.isexec(self._join(x)):
212 212 return 'x'
213 213 return ''
214 214 return f
215 215 else:
216 216 return fallback
217 217
218 218 @propertycache
219 219 def _cwd(self):
220 220 return os.getcwd()
221 221
222 222 def getcwd(self):
223 223 cwd = self._cwd
224 224 if cwd == self._root:
225 225 return ''
226 226 # self._root ends with a path separator if self._root is '/' or 'C:\'
227 227 rootsep = self._root
228 228 if not util.endswithsep(rootsep):
229 229 rootsep += os.sep
230 230 if cwd.startswith(rootsep):
231 231 return cwd[len(rootsep):]
232 232 else:
233 233 # we're outside the repo. return an absolute path.
234 234 return cwd
235 235
236 236 def pathto(self, f, cwd=None):
237 237 if cwd is None:
238 238 cwd = self.getcwd()
239 239 path = util.pathto(self._root, cwd, f)
240 240 if self._slash:
241 241 return util.pconvert(path)
242 242 return path
243 243
244 244 def __getitem__(self, key):
245 245 '''Return the current state of key (a filename) in the dirstate.
246 246
247 247 States are:
248 248 n normal
249 249 m needs merging
250 250 r marked for removal
251 251 a marked for addition
252 252 ? not tracked
253 253 '''
254 254 return self._map.get(key, ("?",))[0]
255 255
256 256 def __contains__(self, key):
257 257 return key in self._map
258 258
259 259 def __iter__(self):
260 260 for x in sorted(self._map):
261 261 yield x
262 262
263 263 def iteritems(self):
264 264 return self._map.iteritems()
265 265
266 266 def parents(self):
267 267 return [self._validate(p) for p in self._pl]
268 268
269 269 def p1(self):
270 270 return self._validate(self._pl[0])
271 271
272 272 def p2(self):
273 273 return self._validate(self._pl[1])
274 274
275 275 def branch(self):
276 276 return encoding.tolocal(self._branch)
277 277
278 278 def setparents(self, p1, p2=nullid):
279 279 """Set dirstate parents to p1 and p2.
280 280
281 281 When moving from two parents to one, 'm' merged entries a
282 282 adjusted to normal and previous copy records discarded and
283 283 returned by the call.
284 284
285 285 See localrepo.setparents()
286 286 """
287 287 if self._parentwriters == 0:
288 288 raise ValueError("cannot set dirstate parent without "
289 289 "calling dirstate.beginparentchange")
290 290
291 291 self._dirty = self._dirtypl = True
292 292 oldp2 = self._pl[1]
293 293 self._pl = p1, p2
294 294 copies = {}
295 295 if oldp2 != nullid and p2 == nullid:
296 296 for f, s in self._map.iteritems():
297 297 # Discard 'm' markers when moving away from a merge state
298 298 if s[0] == 'm':
299 299 if f in self._copymap:
300 300 copies[f] = self._copymap[f]
301 301 self.normallookup(f)
302 302 # Also fix up otherparent markers
303 303 elif s[0] == 'n' and s[2] == -2:
304 304 if f in self._copymap:
305 305 copies[f] = self._copymap[f]
306 306 self.add(f)
307 307 return copies
308 308
309 309 def setbranch(self, branch):
310 310 self._branch = encoding.fromlocal(branch)
311 311 f = self._opener('branch', 'w', atomictemp=True)
312 312 try:
313 313 f.write(self._branch + '\n')
314 314 f.close()
315 315
316 316 # make sure filecache has the correct stat info for _branch after
317 317 # replacing the underlying file
318 318 ce = self._filecache['_branch']
319 319 if ce:
320 320 ce.refresh()
321 321 except: # re-raises
322 322 f.discard()
323 323 raise
324 324
325 325 def _read(self):
326 326 self._map = {}
327 327 self._copymap = {}
328 328 try:
329 329 fp = self._opener.open(self._filename)
330 330 try:
331 331 st = fp.read()
332 332 finally:
333 333 fp.close()
334 334 except IOError, err:
335 335 if err.errno != errno.ENOENT:
336 336 raise
337 337 return
338 338 if not st:
339 339 return
340 340
341 if util.safehasattr(parsers, 'dict_new_presized'):
342 # Make an estimate of the number of files in the dirstate based on
343 # its size. From a linear regression on a set of real-world repos,
344 # all over 10,000 files, the size of a dirstate entry is 85
345 # bytes. The cost of resizing is significantly higher than the cost
346 # of filling in a larger presized dict, so subtract 20% from the
347 # size.
348 #
349 # This heuristic is imperfect in many ways, so in a future dirstate
350 # format update it makes sense to just record the number of entries
351 # on write.
352 self._map = parsers.dict_new_presized(len(st) / 71)
353
341 354 # Python's garbage collector triggers a GC each time a certain number
342 355 # of container objects (the number being defined by
343 356 # gc.get_threshold()) are allocated. parse_dirstate creates a tuple
344 357 # for each file in the dirstate. The C version then immediately marks
345 358 # them as not to be tracked by the collector. However, this has no
346 359 # effect on when GCs are triggered, only on what objects the GC looks
347 360 # into. This means that O(number of files) GCs are unavoidable.
348 361 # Depending on when in the process's lifetime the dirstate is parsed,
349 362 # this can get very expensive. As a workaround, disable GC while
350 363 # parsing the dirstate.
351 364 #
352 365 # (we cannot decorate the function directly since it is in a C module)
353 366 parse_dirstate = util.nogc(parsers.parse_dirstate)
354 367 p = parse_dirstate(self._map, self._copymap, st)
355 368 if not self._dirtypl:
356 369 self._pl = p
357 370
358 371 def invalidate(self):
359 372 for a in ("_map", "_copymap", "_filefoldmap", "_dirfoldmap", "_branch",
360 373 "_pl", "_dirs", "_ignore"):
361 374 if a in self.__dict__:
362 375 delattr(self, a)
363 376 self._lastnormaltime = 0
364 377 self._dirty = False
365 378 self._parentwriters = 0
366 379
367 380 def copy(self, source, dest):
368 381 """Mark dest as a copy of source. Unmark dest if source is None."""
369 382 if source == dest:
370 383 return
371 384 self._dirty = True
372 385 if source is not None:
373 386 self._copymap[dest] = source
374 387 elif dest in self._copymap:
375 388 del self._copymap[dest]
376 389
377 390 def copied(self, file):
378 391 return self._copymap.get(file, None)
379 392
380 393 def copies(self):
381 394 return self._copymap
382 395
383 396 def _droppath(self, f):
384 397 if self[f] not in "?r" and "_dirs" in self.__dict__:
385 398 self._dirs.delpath(f)
386 399
387 400 def _addpath(self, f, state, mode, size, mtime):
388 401 oldstate = self[f]
389 402 if state == 'a' or oldstate == 'r':
390 403 scmutil.checkfilename(f)
391 404 if f in self._dirs:
392 405 raise util.Abort(_('directory %r already in dirstate') % f)
393 406 # shadows
394 407 for d in util.finddirs(f):
395 408 if d in self._dirs:
396 409 break
397 410 if d in self._map and self[d] != 'r':
398 411 raise util.Abort(
399 412 _('file %r in dirstate clashes with %r') % (d, f))
400 413 if oldstate in "?r" and "_dirs" in self.__dict__:
401 414 self._dirs.addpath(f)
402 415 self._dirty = True
403 416 self._map[f] = dirstatetuple(state, mode, size, mtime)
404 417
405 418 def normal(self, f):
406 419 '''Mark a file normal and clean.'''
407 420 s = os.lstat(self._join(f))
408 421 mtime = int(s.st_mtime)
409 422 self._addpath(f, 'n', s.st_mode,
410 423 s.st_size & _rangemask, mtime & _rangemask)
411 424 if f in self._copymap:
412 425 del self._copymap[f]
413 426 if mtime > self._lastnormaltime:
414 427 # Remember the most recent modification timeslot for status(),
415 428 # to make sure we won't miss future size-preserving file content
416 429 # modifications that happen within the same timeslot.
417 430 self._lastnormaltime = mtime
418 431
419 432 def normallookup(self, f):
420 433 '''Mark a file normal, but possibly dirty.'''
421 434 if self._pl[1] != nullid and f in self._map:
422 435 # if there is a merge going on and the file was either
423 436 # in state 'm' (-1) or coming from other parent (-2) before
424 437 # being removed, restore that state.
425 438 entry = self._map[f]
426 439 if entry[0] == 'r' and entry[2] in (-1, -2):
427 440 source = self._copymap.get(f)
428 441 if entry[2] == -1:
429 442 self.merge(f)
430 443 elif entry[2] == -2:
431 444 self.otherparent(f)
432 445 if source:
433 446 self.copy(source, f)
434 447 return
435 448 if entry[0] == 'm' or entry[0] == 'n' and entry[2] == -2:
436 449 return
437 450 self._addpath(f, 'n', 0, -1, -1)
438 451 if f in self._copymap:
439 452 del self._copymap[f]
440 453
441 454 def otherparent(self, f):
442 455 '''Mark as coming from the other parent, always dirty.'''
443 456 if self._pl[1] == nullid:
444 457 raise util.Abort(_("setting %r to other parent "
445 458 "only allowed in merges") % f)
446 459 if f in self and self[f] == 'n':
447 460 # merge-like
448 461 self._addpath(f, 'm', 0, -2, -1)
449 462 else:
450 463 # add-like
451 464 self._addpath(f, 'n', 0, -2, -1)
452 465
453 466 if f in self._copymap:
454 467 del self._copymap[f]
455 468
456 469 def add(self, f):
457 470 '''Mark a file added.'''
458 471 self._addpath(f, 'a', 0, -1, -1)
459 472 if f in self._copymap:
460 473 del self._copymap[f]
461 474
462 475 def remove(self, f):
463 476 '''Mark a file removed.'''
464 477 self._dirty = True
465 478 self._droppath(f)
466 479 size = 0
467 480 if self._pl[1] != nullid and f in self._map:
468 481 # backup the previous state
469 482 entry = self._map[f]
470 483 if entry[0] == 'm': # merge
471 484 size = -1
472 485 elif entry[0] == 'n' and entry[2] == -2: # other parent
473 486 size = -2
474 487 self._map[f] = dirstatetuple('r', 0, size, 0)
475 488 if size == 0 and f in self._copymap:
476 489 del self._copymap[f]
477 490
478 491 def merge(self, f):
479 492 '''Mark a file merged.'''
480 493 if self._pl[1] == nullid:
481 494 return self.normallookup(f)
482 495 return self.otherparent(f)
483 496
484 497 def drop(self, f):
485 498 '''Drop a file from the dirstate'''
486 499 if f in self._map:
487 500 self._dirty = True
488 501 self._droppath(f)
489 502 del self._map[f]
490 503
491 504 def _discoverpath(self, path, normed, ignoremissing, exists, storemap):
492 505 if exists is None:
493 506 exists = os.path.lexists(os.path.join(self._root, path))
494 507 if not exists:
495 508 # Maybe a path component exists
496 509 if not ignoremissing and '/' in path:
497 510 d, f = path.rsplit('/', 1)
498 511 d = self._normalize(d, False, ignoremissing, None)
499 512 folded = d + "/" + f
500 513 else:
501 514 # No path components, preserve original case
502 515 folded = path
503 516 else:
504 517 # recursively normalize leading directory components
505 518 # against dirstate
506 519 if '/' in normed:
507 520 d, f = normed.rsplit('/', 1)
508 521 d = self._normalize(d, False, ignoremissing, True)
509 522 r = self._root + "/" + d
510 523 folded = d + "/" + util.fspath(f, r)
511 524 else:
512 525 folded = util.fspath(normed, self._root)
513 526 storemap[normed] = folded
514 527
515 528 return folded
516 529
517 530 def _normalizefile(self, path, isknown, ignoremissing=False, exists=None):
518 531 normed = util.normcase(path)
519 532 folded = self._filefoldmap.get(normed, None)
520 533 if folded is None:
521 534 if isknown:
522 535 folded = path
523 536 else:
524 537 folded = self._discoverpath(path, normed, ignoremissing, exists,
525 538 self._filefoldmap)
526 539 return folded
527 540
528 541 def _normalize(self, path, isknown, ignoremissing=False, exists=None):
529 542 normed = util.normcase(path)
530 543 folded = self._filefoldmap.get(normed, None)
531 544 if folded is None:
532 545 folded = self._dirfoldmap.get(normed, None)
533 546 if folded is None:
534 547 if isknown:
535 548 folded = path
536 549 else:
537 550 # store discovered result in dirfoldmap so that future
538 551 # normalizefile calls don't start matching directories
539 552 folded = self._discoverpath(path, normed, ignoremissing, exists,
540 553 self._dirfoldmap)
541 554 return folded
542 555
543 556 def normalize(self, path, isknown=False, ignoremissing=False):
544 557 '''
545 558 normalize the case of a pathname when on a casefolding filesystem
546 559
547 560 isknown specifies whether the filename came from walking the
548 561 disk, to avoid extra filesystem access.
549 562
550 563 If ignoremissing is True, missing path are returned
551 564 unchanged. Otherwise, we try harder to normalize possibly
552 565 existing path components.
553 566
554 567 The normalized case is determined based on the following precedence:
555 568
556 569 - version of name already stored in the dirstate
557 570 - version of name stored on disk
558 571 - version provided via command arguments
559 572 '''
560 573
561 574 if self._checkcase:
562 575 return self._normalize(path, isknown, ignoremissing)
563 576 return path
564 577
565 578 def clear(self):
566 579 self._map = {}
567 580 if "_dirs" in self.__dict__:
568 581 delattr(self, "_dirs")
569 582 self._copymap = {}
570 583 self._pl = [nullid, nullid]
571 584 self._lastnormaltime = 0
572 585 self._dirty = True
573 586
574 587 def rebuild(self, parent, allfiles, changedfiles=None):
575 588 if changedfiles is None:
576 589 changedfiles = allfiles
577 590 oldmap = self._map
578 591 self.clear()
579 592 for f in allfiles:
580 593 if f not in changedfiles:
581 594 self._map[f] = oldmap[f]
582 595 else:
583 596 if 'x' in allfiles.flags(f):
584 597 self._map[f] = dirstatetuple('n', 0777, -1, 0)
585 598 else:
586 599 self._map[f] = dirstatetuple('n', 0666, -1, 0)
587 600 self._pl = (parent, nullid)
588 601 self._dirty = True
589 602
590 603 def write(self):
591 604 if not self._dirty:
592 605 return
593 606
594 607 # enough 'delaywrite' prevents 'pack_dirstate' from dropping
595 608 # timestamp of each entries in dirstate, because of 'now > mtime'
596 609 delaywrite = self._ui.configint('debug', 'dirstate.delaywrite', 0)
597 610 if delaywrite > 0:
598 611 import time # to avoid useless import
599 612 time.sleep(delaywrite)
600 613
601 614 st = self._opener(self._filename, "w", atomictemp=True)
602 615 # use the modification time of the newly created temporary file as the
603 616 # filesystem's notion of 'now'
604 617 now = util.fstat(st).st_mtime
605 618 st.write(parsers.pack_dirstate(self._map, self._copymap, self._pl, now))
606 619 st.close()
607 620 self._lastnormaltime = 0
608 621 self._dirty = self._dirtypl = False
609 622
610 623 def _dirignore(self, f):
611 624 if f == '.':
612 625 return False
613 626 if self._ignore(f):
614 627 return True
615 628 for p in util.finddirs(f):
616 629 if self._ignore(p):
617 630 return True
618 631 return False
619 632
620 633 def _walkexplicit(self, match, subrepos):
621 634 '''Get stat data about the files explicitly specified by match.
622 635
623 636 Return a triple (results, dirsfound, dirsnotfound).
624 637 - results is a mapping from filename to stat result. It also contains
625 638 listings mapping subrepos and .hg to None.
626 639 - dirsfound is a list of files found to be directories.
627 640 - dirsnotfound is a list of files that the dirstate thinks are
628 641 directories and that were not found.'''
629 642
630 643 def badtype(mode):
631 644 kind = _('unknown')
632 645 if stat.S_ISCHR(mode):
633 646 kind = _('character device')
634 647 elif stat.S_ISBLK(mode):
635 648 kind = _('block device')
636 649 elif stat.S_ISFIFO(mode):
637 650 kind = _('fifo')
638 651 elif stat.S_ISSOCK(mode):
639 652 kind = _('socket')
640 653 elif stat.S_ISDIR(mode):
641 654 kind = _('directory')
642 655 return _('unsupported file type (type is %s)') % kind
643 656
644 657 matchedir = match.explicitdir
645 658 badfn = match.bad
646 659 dmap = self._map
647 660 lstat = os.lstat
648 661 getkind = stat.S_IFMT
649 662 dirkind = stat.S_IFDIR
650 663 regkind = stat.S_IFREG
651 664 lnkkind = stat.S_IFLNK
652 665 join = self._join
653 666 dirsfound = []
654 667 foundadd = dirsfound.append
655 668 dirsnotfound = []
656 669 notfoundadd = dirsnotfound.append
657 670
658 671 if not match.isexact() and self._checkcase:
659 672 normalize = self._normalize
660 673 else:
661 674 normalize = None
662 675
663 676 files = sorted(match.files())
664 677 subrepos.sort()
665 678 i, j = 0, 0
666 679 while i < len(files) and j < len(subrepos):
667 680 subpath = subrepos[j] + "/"
668 681 if files[i] < subpath:
669 682 i += 1
670 683 continue
671 684 while i < len(files) and files[i].startswith(subpath):
672 685 del files[i]
673 686 j += 1
674 687
675 688 if not files or '.' in files:
676 689 files = ['.']
677 690 results = dict.fromkeys(subrepos)
678 691 results['.hg'] = None
679 692
680 693 alldirs = None
681 694 for ff in files:
682 695 # constructing the foldmap is expensive, so don't do it for the
683 696 # common case where files is ['.']
684 697 if normalize and ff != '.':
685 698 nf = normalize(ff, False, True)
686 699 else:
687 700 nf = ff
688 701 if nf in results:
689 702 continue
690 703
691 704 try:
692 705 st = lstat(join(nf))
693 706 kind = getkind(st.st_mode)
694 707 if kind == dirkind:
695 708 if nf in dmap:
696 709 # file replaced by dir on disk but still in dirstate
697 710 results[nf] = None
698 711 if matchedir:
699 712 matchedir(nf)
700 713 foundadd((nf, ff))
701 714 elif kind == regkind or kind == lnkkind:
702 715 results[nf] = st
703 716 else:
704 717 badfn(ff, badtype(kind))
705 718 if nf in dmap:
706 719 results[nf] = None
707 720 except OSError, inst: # nf not found on disk - it is dirstate only
708 721 if nf in dmap: # does it exactly match a missing file?
709 722 results[nf] = None
710 723 else: # does it match a missing directory?
711 724 if alldirs is None:
712 725 alldirs = util.dirs(dmap)
713 726 if nf in alldirs:
714 727 if matchedir:
715 728 matchedir(nf)
716 729 notfoundadd(nf)
717 730 else:
718 731 badfn(ff, inst.strerror)
719 732
720 733 return results, dirsfound, dirsnotfound
721 734
722 735 def walk(self, match, subrepos, unknown, ignored, full=True):
723 736 '''
724 737 Walk recursively through the directory tree, finding all files
725 738 matched by match.
726 739
727 740 If full is False, maybe skip some known-clean files.
728 741
729 742 Return a dict mapping filename to stat-like object (either
730 743 mercurial.osutil.stat instance or return value of os.stat()).
731 744
732 745 '''
733 746 # full is a flag that extensions that hook into walk can use -- this
734 747 # implementation doesn't use it at all. This satisfies the contract
735 748 # because we only guarantee a "maybe".
736 749
737 750 if ignored:
738 751 ignore = util.never
739 752 dirignore = util.never
740 753 elif unknown:
741 754 ignore = self._ignore
742 755 dirignore = self._dirignore
743 756 else:
744 757 # if not unknown and not ignored, drop dir recursion and step 2
745 758 ignore = util.always
746 759 dirignore = util.always
747 760
748 761 matchfn = match.matchfn
749 762 matchalways = match.always()
750 763 matchtdir = match.traversedir
751 764 dmap = self._map
752 765 listdir = osutil.listdir
753 766 lstat = os.lstat
754 767 dirkind = stat.S_IFDIR
755 768 regkind = stat.S_IFREG
756 769 lnkkind = stat.S_IFLNK
757 770 join = self._join
758 771
759 772 exact = skipstep3 = False
760 773 if match.isexact(): # match.exact
761 774 exact = True
762 775 dirignore = util.always # skip step 2
763 776 elif match.prefix(): # match.match, no patterns
764 777 skipstep3 = True
765 778
766 779 if not exact and self._checkcase:
767 780 normalize = self._normalize
768 781 normalizefile = self._normalizefile
769 782 skipstep3 = False
770 783 else:
771 784 normalize = self._normalize
772 785 normalizefile = None
773 786
774 787 # step 1: find all explicit files
775 788 results, work, dirsnotfound = self._walkexplicit(match, subrepos)
776 789
777 790 skipstep3 = skipstep3 and not (work or dirsnotfound)
778 791 work = [d for d in work if not dirignore(d[0])]
779 792
780 793 # step 2: visit subdirectories
781 794 def traverse(work, alreadynormed):
782 795 wadd = work.append
783 796 while work:
784 797 nd = work.pop()
785 798 skip = None
786 799 if nd == '.':
787 800 nd = ''
788 801 else:
789 802 skip = '.hg'
790 803 try:
791 804 entries = listdir(join(nd), stat=True, skip=skip)
792 805 except OSError, inst:
793 806 if inst.errno in (errno.EACCES, errno.ENOENT):
794 807 match.bad(self.pathto(nd), inst.strerror)
795 808 continue
796 809 raise
797 810 for f, kind, st in entries:
798 811 if normalizefile:
799 812 # even though f might be a directory, we're only
800 813 # interested in comparing it to files currently in the
801 814 # dmap -- therefore normalizefile is enough
802 815 nf = normalizefile(nd and (nd + "/" + f) or f, True,
803 816 True)
804 817 else:
805 818 nf = nd and (nd + "/" + f) or f
806 819 if nf not in results:
807 820 if kind == dirkind:
808 821 if not ignore(nf):
809 822 if matchtdir:
810 823 matchtdir(nf)
811 824 wadd(nf)
812 825 if nf in dmap and (matchalways or matchfn(nf)):
813 826 results[nf] = None
814 827 elif kind == regkind or kind == lnkkind:
815 828 if nf in dmap:
816 829 if matchalways or matchfn(nf):
817 830 results[nf] = st
818 831 elif ((matchalways or matchfn(nf))
819 832 and not ignore(nf)):
820 833 # unknown file -- normalize if necessary
821 834 if not alreadynormed:
822 835 nf = normalize(nf, False, True)
823 836 results[nf] = st
824 837 elif nf in dmap and (matchalways or matchfn(nf)):
825 838 results[nf] = None
826 839
827 840 for nd, d in work:
828 841 # alreadynormed means that processwork doesn't have to do any
829 842 # expensive directory normalization
830 843 alreadynormed = not normalize or nd == d
831 844 traverse([d], alreadynormed)
832 845
833 846 for s in subrepos:
834 847 del results[s]
835 848 del results['.hg']
836 849
837 850 # step 3: visit remaining files from dmap
838 851 if not skipstep3 and not exact:
839 852 # If a dmap file is not in results yet, it was either
840 853 # a) not matching matchfn b) ignored, c) missing, or d) under a
841 854 # symlink directory.
842 855 if not results and matchalways:
843 856 visit = dmap.keys()
844 857 else:
845 858 visit = [f for f in dmap if f not in results and matchfn(f)]
846 859 visit.sort()
847 860
848 861 if unknown:
849 862 # unknown == True means we walked all dirs under the roots
850 863 # that wasn't ignored, and everything that matched was stat'ed
851 864 # and is already in results.
852 865 # The rest must thus be ignored or under a symlink.
853 866 audit_path = pathutil.pathauditor(self._root)
854 867
855 868 for nf in iter(visit):
856 869 # If a stat for the same file was already added with a
857 870 # different case, don't add one for this, since that would
858 871 # make it appear as if the file exists under both names
859 872 # on disk.
860 873 if (normalizefile and
861 874 normalizefile(nf, True, True) in results):
862 875 results[nf] = None
863 876 # Report ignored items in the dmap as long as they are not
864 877 # under a symlink directory.
865 878 elif audit_path.check(nf):
866 879 try:
867 880 results[nf] = lstat(join(nf))
868 881 # file was just ignored, no links, and exists
869 882 except OSError:
870 883 # file doesn't exist
871 884 results[nf] = None
872 885 else:
873 886 # It's either missing or under a symlink directory
874 887 # which we in this case report as missing
875 888 results[nf] = None
876 889 else:
877 890 # We may not have walked the full directory tree above,
878 891 # so stat and check everything we missed.
879 892 nf = iter(visit).next
880 893 for st in util.statfiles([join(i) for i in visit]):
881 894 results[nf()] = st
882 895 return results
883 896
884 897 def status(self, match, subrepos, ignored, clean, unknown):
885 898 '''Determine the status of the working copy relative to the
886 899 dirstate and return a pair of (unsure, status), where status is of type
887 900 scmutil.status and:
888 901
889 902 unsure:
890 903 files that might have been modified since the dirstate was
891 904 written, but need to be read to be sure (size is the same
892 905 but mtime differs)
893 906 status.modified:
894 907 files that have definitely been modified since the dirstate
895 908 was written (different size or mode)
896 909 status.clean:
897 910 files that have definitely not been modified since the
898 911 dirstate was written
899 912 '''
900 913 listignored, listclean, listunknown = ignored, clean, unknown
901 914 lookup, modified, added, unknown, ignored = [], [], [], [], []
902 915 removed, deleted, clean = [], [], []
903 916
904 917 dmap = self._map
905 918 ladd = lookup.append # aka "unsure"
906 919 madd = modified.append
907 920 aadd = added.append
908 921 uadd = unknown.append
909 922 iadd = ignored.append
910 923 radd = removed.append
911 924 dadd = deleted.append
912 925 cadd = clean.append
913 926 mexact = match.exact
914 927 dirignore = self._dirignore
915 928 checkexec = self._checkexec
916 929 copymap = self._copymap
917 930 lastnormaltime = self._lastnormaltime
918 931
919 932 # We need to do full walks when either
920 933 # - we're listing all clean files, or
921 934 # - match.traversedir does something, because match.traversedir should
922 935 # be called for every dir in the working dir
923 936 full = listclean or match.traversedir is not None
924 937 for fn, st in self.walk(match, subrepos, listunknown, listignored,
925 938 full=full).iteritems():
926 939 if fn not in dmap:
927 940 if (listignored or mexact(fn)) and dirignore(fn):
928 941 if listignored:
929 942 iadd(fn)
930 943 else:
931 944 uadd(fn)
932 945 continue
933 946
934 947 # This is equivalent to 'state, mode, size, time = dmap[fn]' but not
935 948 # written like that for performance reasons. dmap[fn] is not a
936 949 # Python tuple in compiled builds. The CPython UNPACK_SEQUENCE
937 950 # opcode has fast paths when the value to be unpacked is a tuple or
938 951 # a list, but falls back to creating a full-fledged iterator in
939 952 # general. That is much slower than simply accessing and storing the
940 953 # tuple members one by one.
941 954 t = dmap[fn]
942 955 state = t[0]
943 956 mode = t[1]
944 957 size = t[2]
945 958 time = t[3]
946 959
947 960 if not st and state in "nma":
948 961 dadd(fn)
949 962 elif state == 'n':
950 963 mtime = int(st.st_mtime)
951 964 if (size >= 0 and
952 965 ((size != st.st_size and size != st.st_size & _rangemask)
953 966 or ((mode ^ st.st_mode) & 0100 and checkexec))
954 967 or size == -2 # other parent
955 968 or fn in copymap):
956 969 madd(fn)
957 970 elif time != mtime and time != mtime & _rangemask:
958 971 ladd(fn)
959 972 elif mtime == lastnormaltime:
960 973 # fn may have just been marked as normal and it may have
961 974 # changed in the same second without changing its size.
962 975 # This can happen if we quickly do multiple commits.
963 976 # Force lookup, so we don't miss such a racy file change.
964 977 ladd(fn)
965 978 elif listclean:
966 979 cadd(fn)
967 980 elif state == 'm':
968 981 madd(fn)
969 982 elif state == 'a':
970 983 aadd(fn)
971 984 elif state == 'r':
972 985 radd(fn)
973 986
974 987 return (lookup, scmutil.status(modified, added, removed, deleted,
975 988 unknown, ignored, clean))
976 989
977 990 def matches(self, match):
978 991 '''
979 992 return files in the dirstate (in whatever state) filtered by match
980 993 '''
981 994 dmap = self._map
982 995 if match.always():
983 996 return dmap.keys()
984 997 files = match.files()
985 998 if match.isexact():
986 999 # fast path -- filter the other way around, since typically files is
987 1000 # much smaller than dmap
988 1001 return [f for f in files if f in dmap]
989 1002 if match.prefix() and all(fn in dmap for fn in files):
990 1003 # fast path -- all the values are known to be files, so just return
991 1004 # that
992 1005 return list(files)
993 1006 return [f for f in dmap if match(f)]
General Comments 0
You need to be logged in to leave comments. Login now