##// END OF EJS Templates
dirstate: batch calls to statfiles (issue4878)...
Matt Mackall -
r26592:502b56a9 default
parent child Browse files
Show More
@@ -1,1047 +1,1053 b''
1 1 # dirstate.py - working directory tracking for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from node import nullid
9 9 from i18n import _
10 10 import scmutil, util, osutil, parsers, encoding, pathutil, error
11 11 import os, stat, errno
12 12 import match as matchmod
13 13
14 14 propertycache = util.propertycache
15 15 filecache = scmutil.filecache
16 16 _rangemask = 0x7fffffff
17 17
18 18 dirstatetuple = parsers.dirstatetuple
19 19
20 20 class repocache(filecache):
21 21 """filecache for files in .hg/"""
22 22 def join(self, obj, fname):
23 23 return obj._opener.join(fname)
24 24
25 25 class rootcache(filecache):
26 26 """filecache for files in the repository root"""
27 27 def join(self, obj, fname):
28 28 return obj._join(fname)
29 29
30 30 class dirstate(object):
31 31
32 32 def __init__(self, opener, ui, root, validate):
33 33 '''Create a new dirstate object.
34 34
35 35 opener is an open()-like callable that can be used to open the
36 36 dirstate file; root is the root of the directory tracked by
37 37 the dirstate.
38 38 '''
39 39 self._opener = opener
40 40 self._validate = validate
41 41 self._root = root
42 42 # ntpath.join(root, '') of Python 2.7.9 does not add sep if root is
43 43 # UNC path pointing to root share (issue4557)
44 44 self._rootdir = pathutil.normasprefix(root)
45 45 # internal config: ui.forcecwd
46 46 forcecwd = ui.config('ui', 'forcecwd')
47 47 if forcecwd:
48 48 self._cwd = forcecwd
49 49 self._dirty = False
50 50 self._dirtypl = False
51 51 self._lastnormaltime = 0
52 52 self._ui = ui
53 53 self._filecache = {}
54 54 self._parentwriters = 0
55 55 self._filename = 'dirstate'
56 56
57 57 def beginparentchange(self):
58 58 '''Marks the beginning of a set of changes that involve changing
59 59 the dirstate parents. If there is an exception during this time,
60 60 the dirstate will not be written when the wlock is released. This
61 61 prevents writing an incoherent dirstate where the parent doesn't
62 62 match the contents.
63 63 '''
64 64 self._parentwriters += 1
65 65
66 66 def endparentchange(self):
67 67 '''Marks the end of a set of changes that involve changing the
68 68 dirstate parents. Once all parent changes have been marked done,
69 69 the wlock will be free to write the dirstate on release.
70 70 '''
71 71 if self._parentwriters > 0:
72 72 self._parentwriters -= 1
73 73
74 74 def pendingparentchange(self):
75 75 '''Returns true if the dirstate is in the middle of a set of changes
76 76 that modify the dirstate parent.
77 77 '''
78 78 return self._parentwriters > 0
79 79
80 80 @propertycache
81 81 def _map(self):
82 82 '''Return the dirstate contents as a map from filename to
83 83 (state, mode, size, time).'''
84 84 self._read()
85 85 return self._map
86 86
87 87 @propertycache
88 88 def _copymap(self):
89 89 self._read()
90 90 return self._copymap
91 91
92 92 @propertycache
93 93 def _filefoldmap(self):
94 94 try:
95 95 makefilefoldmap = parsers.make_file_foldmap
96 96 except AttributeError:
97 97 pass
98 98 else:
99 99 return makefilefoldmap(self._map, util.normcasespec,
100 100 util.normcasefallback)
101 101
102 102 f = {}
103 103 normcase = util.normcase
104 104 for name, s in self._map.iteritems():
105 105 if s[0] != 'r':
106 106 f[normcase(name)] = name
107 107 f['.'] = '.' # prevents useless util.fspath() invocation
108 108 return f
109 109
110 110 @propertycache
111 111 def _dirfoldmap(self):
112 112 f = {}
113 113 normcase = util.normcase
114 114 for name in self._dirs:
115 115 f[normcase(name)] = name
116 116 return f
117 117
118 118 @repocache('branch')
119 119 def _branch(self):
120 120 try:
121 121 return self._opener.read("branch").strip() or "default"
122 122 except IOError as inst:
123 123 if inst.errno != errno.ENOENT:
124 124 raise
125 125 return "default"
126 126
127 127 @propertycache
128 128 def _pl(self):
129 129 try:
130 130 fp = self._opener(self._filename)
131 131 st = fp.read(40)
132 132 fp.close()
133 133 l = len(st)
134 134 if l == 40:
135 135 return st[:20], st[20:40]
136 136 elif l > 0 and l < 40:
137 137 raise error.Abort(_('working directory state appears damaged!'))
138 138 except IOError as err:
139 139 if err.errno != errno.ENOENT:
140 140 raise
141 141 return [nullid, nullid]
142 142
143 143 @propertycache
144 144 def _dirs(self):
145 145 return util.dirs(self._map, 'r')
146 146
147 147 def dirs(self):
148 148 return self._dirs
149 149
150 150 @rootcache('.hgignore')
151 151 def _ignore(self):
152 152 files = []
153 153 if os.path.exists(self._join('.hgignore')):
154 154 files.append(self._join('.hgignore'))
155 155 for name, path in self._ui.configitems("ui"):
156 156 if name == 'ignore' or name.startswith('ignore.'):
157 157 # we need to use os.path.join here rather than self._join
158 158 # because path is arbitrary and user-specified
159 159 files.append(os.path.join(self._rootdir, util.expandpath(path)))
160 160
161 161 if not files:
162 162 return util.never
163 163
164 164 pats = ['include:%s' % f for f in files]
165 165 return matchmod.match(self._root, '', [], pats, warn=self._ui.warn)
166 166
167 167 @propertycache
168 168 def _slash(self):
169 169 return self._ui.configbool('ui', 'slash') and os.sep != '/'
170 170
171 171 @propertycache
172 172 def _checklink(self):
173 173 return util.checklink(self._root)
174 174
175 175 @propertycache
176 176 def _checkexec(self):
177 177 return util.checkexec(self._root)
178 178
179 179 @propertycache
180 180 def _checkcase(self):
181 181 return not util.checkcase(self._join('.hg'))
182 182
183 183 def _join(self, f):
184 184 # much faster than os.path.join()
185 185 # it's safe because f is always a relative path
186 186 return self._rootdir + f
187 187
188 188 def flagfunc(self, buildfallback):
189 189 if self._checklink and self._checkexec:
190 190 def f(x):
191 191 try:
192 192 st = os.lstat(self._join(x))
193 193 if util.statislink(st):
194 194 return 'l'
195 195 if util.statisexec(st):
196 196 return 'x'
197 197 except OSError:
198 198 pass
199 199 return ''
200 200 return f
201 201
202 202 fallback = buildfallback()
203 203 if self._checklink:
204 204 def f(x):
205 205 if os.path.islink(self._join(x)):
206 206 return 'l'
207 207 if 'x' in fallback(x):
208 208 return 'x'
209 209 return ''
210 210 return f
211 211 if self._checkexec:
212 212 def f(x):
213 213 if 'l' in fallback(x):
214 214 return 'l'
215 215 if util.isexec(self._join(x)):
216 216 return 'x'
217 217 return ''
218 218 return f
219 219 else:
220 220 return fallback
221 221
222 222 @propertycache
223 223 def _cwd(self):
224 224 return os.getcwd()
225 225
226 226 def getcwd(self):
227 227 '''Return the path from which a canonical path is calculated.
228 228
229 229 This path should be used to resolve file patterns or to convert
230 230 canonical paths back to file paths for display. It shouldn't be
231 231 used to get real file paths. Use vfs functions instead.
232 232 '''
233 233 cwd = self._cwd
234 234 if cwd == self._root:
235 235 return ''
236 236 # self._root ends with a path separator if self._root is '/' or 'C:\'
237 237 rootsep = self._root
238 238 if not util.endswithsep(rootsep):
239 239 rootsep += os.sep
240 240 if cwd.startswith(rootsep):
241 241 return cwd[len(rootsep):]
242 242 else:
243 243 # we're outside the repo. return an absolute path.
244 244 return cwd
245 245
246 246 def pathto(self, f, cwd=None):
247 247 if cwd is None:
248 248 cwd = self.getcwd()
249 249 path = util.pathto(self._root, cwd, f)
250 250 if self._slash:
251 251 return util.pconvert(path)
252 252 return path
253 253
254 254 def __getitem__(self, key):
255 255 '''Return the current state of key (a filename) in the dirstate.
256 256
257 257 States are:
258 258 n normal
259 259 m needs merging
260 260 r marked for removal
261 261 a marked for addition
262 262 ? not tracked
263 263 '''
264 264 return self._map.get(key, ("?",))[0]
265 265
266 266 def __contains__(self, key):
267 267 return key in self._map
268 268
269 269 def __iter__(self):
270 270 for x in sorted(self._map):
271 271 yield x
272 272
273 273 def iteritems(self):
274 274 return self._map.iteritems()
275 275
276 276 def parents(self):
277 277 return [self._validate(p) for p in self._pl]
278 278
279 279 def p1(self):
280 280 return self._validate(self._pl[0])
281 281
282 282 def p2(self):
283 283 return self._validate(self._pl[1])
284 284
285 285 def branch(self):
286 286 return encoding.tolocal(self._branch)
287 287
288 288 def setparents(self, p1, p2=nullid):
289 289 """Set dirstate parents to p1 and p2.
290 290
291 291 When moving from two parents to one, 'm' merged entries a
292 292 adjusted to normal and previous copy records discarded and
293 293 returned by the call.
294 294
295 295 See localrepo.setparents()
296 296 """
297 297 if self._parentwriters == 0:
298 298 raise ValueError("cannot set dirstate parent without "
299 299 "calling dirstate.beginparentchange")
300 300
301 301 self._dirty = self._dirtypl = True
302 302 oldp2 = self._pl[1]
303 303 self._pl = p1, p2
304 304 copies = {}
305 305 if oldp2 != nullid and p2 == nullid:
306 306 for f, s in self._map.iteritems():
307 307 # Discard 'm' markers when moving away from a merge state
308 308 if s[0] == 'm':
309 309 if f in self._copymap:
310 310 copies[f] = self._copymap[f]
311 311 self.normallookup(f)
312 312 # Also fix up otherparent markers
313 313 elif s[0] == 'n' and s[2] == -2:
314 314 if f in self._copymap:
315 315 copies[f] = self._copymap[f]
316 316 self.add(f)
317 317 return copies
318 318
319 319 def setbranch(self, branch):
320 320 self._branch = encoding.fromlocal(branch)
321 321 f = self._opener('branch', 'w', atomictemp=True)
322 322 try:
323 323 f.write(self._branch + '\n')
324 324 f.close()
325 325
326 326 # make sure filecache has the correct stat info for _branch after
327 327 # replacing the underlying file
328 328 ce = self._filecache['_branch']
329 329 if ce:
330 330 ce.refresh()
331 331 except: # re-raises
332 332 f.discard()
333 333 raise
334 334
335 335 def _read(self):
336 336 self._map = {}
337 337 self._copymap = {}
338 338 try:
339 339 fp = self._opener.open(self._filename)
340 340 try:
341 341 st = fp.read()
342 342 finally:
343 343 fp.close()
344 344 except IOError as err:
345 345 if err.errno != errno.ENOENT:
346 346 raise
347 347 return
348 348 if not st:
349 349 return
350 350
351 351 if util.safehasattr(parsers, 'dict_new_presized'):
352 352 # Make an estimate of the number of files in the dirstate based on
353 353 # its size. From a linear regression on a set of real-world repos,
354 354 # all over 10,000 files, the size of a dirstate entry is 85
355 355 # bytes. The cost of resizing is significantly higher than the cost
356 356 # of filling in a larger presized dict, so subtract 20% from the
357 357 # size.
358 358 #
359 359 # This heuristic is imperfect in many ways, so in a future dirstate
360 360 # format update it makes sense to just record the number of entries
361 361 # on write.
362 362 self._map = parsers.dict_new_presized(len(st) / 71)
363 363
364 364 # Python's garbage collector triggers a GC each time a certain number
365 365 # of container objects (the number being defined by
366 366 # gc.get_threshold()) are allocated. parse_dirstate creates a tuple
367 367 # for each file in the dirstate. The C version then immediately marks
368 368 # them as not to be tracked by the collector. However, this has no
369 369 # effect on when GCs are triggered, only on what objects the GC looks
370 370 # into. This means that O(number of files) GCs are unavoidable.
371 371 # Depending on when in the process's lifetime the dirstate is parsed,
372 372 # this can get very expensive. As a workaround, disable GC while
373 373 # parsing the dirstate.
374 374 #
375 375 # (we cannot decorate the function directly since it is in a C module)
376 376 parse_dirstate = util.nogc(parsers.parse_dirstate)
377 377 p = parse_dirstate(self._map, self._copymap, st)
378 378 if not self._dirtypl:
379 379 self._pl = p
380 380
381 381 def invalidate(self):
382 382 for a in ("_map", "_copymap", "_filefoldmap", "_dirfoldmap", "_branch",
383 383 "_pl", "_dirs", "_ignore"):
384 384 if a in self.__dict__:
385 385 delattr(self, a)
386 386 self._lastnormaltime = 0
387 387 self._dirty = False
388 388 self._parentwriters = 0
389 389
390 390 def copy(self, source, dest):
391 391 """Mark dest as a copy of source. Unmark dest if source is None."""
392 392 if source == dest:
393 393 return
394 394 self._dirty = True
395 395 if source is not None:
396 396 self._copymap[dest] = source
397 397 elif dest in self._copymap:
398 398 del self._copymap[dest]
399 399
400 400 def copied(self, file):
401 401 return self._copymap.get(file, None)
402 402
403 403 def copies(self):
404 404 return self._copymap
405 405
406 406 def _droppath(self, f):
407 407 if self[f] not in "?r" and "_dirs" in self.__dict__:
408 408 self._dirs.delpath(f)
409 409
410 410 def _addpath(self, f, state, mode, size, mtime):
411 411 oldstate = self[f]
412 412 if state == 'a' or oldstate == 'r':
413 413 scmutil.checkfilename(f)
414 414 if f in self._dirs:
415 415 raise error.Abort(_('directory %r already in dirstate') % f)
416 416 # shadows
417 417 for d in util.finddirs(f):
418 418 if d in self._dirs:
419 419 break
420 420 if d in self._map and self[d] != 'r':
421 421 raise error.Abort(
422 422 _('file %r in dirstate clashes with %r') % (d, f))
423 423 if oldstate in "?r" and "_dirs" in self.__dict__:
424 424 self._dirs.addpath(f)
425 425 self._dirty = True
426 426 self._map[f] = dirstatetuple(state, mode, size, mtime)
427 427
428 428 def normal(self, f):
429 429 '''Mark a file normal and clean.'''
430 430 s = os.lstat(self._join(f))
431 431 mtime = util.statmtimesec(s)
432 432 self._addpath(f, 'n', s.st_mode,
433 433 s.st_size & _rangemask, mtime & _rangemask)
434 434 if f in self._copymap:
435 435 del self._copymap[f]
436 436 if mtime > self._lastnormaltime:
437 437 # Remember the most recent modification timeslot for status(),
438 438 # to make sure we won't miss future size-preserving file content
439 439 # modifications that happen within the same timeslot.
440 440 self._lastnormaltime = mtime
441 441
442 442 def normallookup(self, f):
443 443 '''Mark a file normal, but possibly dirty.'''
444 444 if self._pl[1] != nullid and f in self._map:
445 445 # if there is a merge going on and the file was either
446 446 # in state 'm' (-1) or coming from other parent (-2) before
447 447 # being removed, restore that state.
448 448 entry = self._map[f]
449 449 if entry[0] == 'r' and entry[2] in (-1, -2):
450 450 source = self._copymap.get(f)
451 451 if entry[2] == -1:
452 452 self.merge(f)
453 453 elif entry[2] == -2:
454 454 self.otherparent(f)
455 455 if source:
456 456 self.copy(source, f)
457 457 return
458 458 if entry[0] == 'm' or entry[0] == 'n' and entry[2] == -2:
459 459 return
460 460 self._addpath(f, 'n', 0, -1, -1)
461 461 if f in self._copymap:
462 462 del self._copymap[f]
463 463
464 464 def otherparent(self, f):
465 465 '''Mark as coming from the other parent, always dirty.'''
466 466 if self._pl[1] == nullid:
467 467 raise error.Abort(_("setting %r to other parent "
468 468 "only allowed in merges") % f)
469 469 if f in self and self[f] == 'n':
470 470 # merge-like
471 471 self._addpath(f, 'm', 0, -2, -1)
472 472 else:
473 473 # add-like
474 474 self._addpath(f, 'n', 0, -2, -1)
475 475
476 476 if f in self._copymap:
477 477 del self._copymap[f]
478 478
479 479 def add(self, f):
480 480 '''Mark a file added.'''
481 481 self._addpath(f, 'a', 0, -1, -1)
482 482 if f in self._copymap:
483 483 del self._copymap[f]
484 484
485 485 def remove(self, f):
486 486 '''Mark a file removed.'''
487 487 self._dirty = True
488 488 self._droppath(f)
489 489 size = 0
490 490 if self._pl[1] != nullid and f in self._map:
491 491 # backup the previous state
492 492 entry = self._map[f]
493 493 if entry[0] == 'm': # merge
494 494 size = -1
495 495 elif entry[0] == 'n' and entry[2] == -2: # other parent
496 496 size = -2
497 497 self._map[f] = dirstatetuple('r', 0, size, 0)
498 498 if size == 0 and f in self._copymap:
499 499 del self._copymap[f]
500 500
501 501 def merge(self, f):
502 502 '''Mark a file merged.'''
503 503 if self._pl[1] == nullid:
504 504 return self.normallookup(f)
505 505 return self.otherparent(f)
506 506
507 507 def drop(self, f):
508 508 '''Drop a file from the dirstate'''
509 509 if f in self._map:
510 510 self._dirty = True
511 511 self._droppath(f)
512 512 del self._map[f]
513 513
514 514 def _discoverpath(self, path, normed, ignoremissing, exists, storemap):
515 515 if exists is None:
516 516 exists = os.path.lexists(os.path.join(self._root, path))
517 517 if not exists:
518 518 # Maybe a path component exists
519 519 if not ignoremissing and '/' in path:
520 520 d, f = path.rsplit('/', 1)
521 521 d = self._normalize(d, False, ignoremissing, None)
522 522 folded = d + "/" + f
523 523 else:
524 524 # No path components, preserve original case
525 525 folded = path
526 526 else:
527 527 # recursively normalize leading directory components
528 528 # against dirstate
529 529 if '/' in normed:
530 530 d, f = normed.rsplit('/', 1)
531 531 d = self._normalize(d, False, ignoremissing, True)
532 532 r = self._root + "/" + d
533 533 folded = d + "/" + util.fspath(f, r)
534 534 else:
535 535 folded = util.fspath(normed, self._root)
536 536 storemap[normed] = folded
537 537
538 538 return folded
539 539
540 540 def _normalizefile(self, path, isknown, ignoremissing=False, exists=None):
541 541 normed = util.normcase(path)
542 542 folded = self._filefoldmap.get(normed, None)
543 543 if folded is None:
544 544 if isknown:
545 545 folded = path
546 546 else:
547 547 folded = self._discoverpath(path, normed, ignoremissing, exists,
548 548 self._filefoldmap)
549 549 return folded
550 550
551 551 def _normalize(self, path, isknown, ignoremissing=False, exists=None):
552 552 normed = util.normcase(path)
553 553 folded = self._filefoldmap.get(normed, None)
554 554 if folded is None:
555 555 folded = self._dirfoldmap.get(normed, None)
556 556 if folded is None:
557 557 if isknown:
558 558 folded = path
559 559 else:
560 560 # store discovered result in dirfoldmap so that future
561 561 # normalizefile calls don't start matching directories
562 562 folded = self._discoverpath(path, normed, ignoremissing, exists,
563 563 self._dirfoldmap)
564 564 return folded
565 565
566 566 def normalize(self, path, isknown=False, ignoremissing=False):
567 567 '''
568 568 normalize the case of a pathname when on a casefolding filesystem
569 569
570 570 isknown specifies whether the filename came from walking the
571 571 disk, to avoid extra filesystem access.
572 572
573 573 If ignoremissing is True, missing path are returned
574 574 unchanged. Otherwise, we try harder to normalize possibly
575 575 existing path components.
576 576
577 577 The normalized case is determined based on the following precedence:
578 578
579 579 - version of name already stored in the dirstate
580 580 - version of name stored on disk
581 581 - version provided via command arguments
582 582 '''
583 583
584 584 if self._checkcase:
585 585 return self._normalize(path, isknown, ignoremissing)
586 586 return path
587 587
588 588 def clear(self):
589 589 self._map = {}
590 590 if "_dirs" in self.__dict__:
591 591 delattr(self, "_dirs")
592 592 self._copymap = {}
593 593 self._pl = [nullid, nullid]
594 594 self._lastnormaltime = 0
595 595 self._dirty = True
596 596
597 597 def rebuild(self, parent, allfiles, changedfiles=None):
598 598 if changedfiles is None:
599 599 changedfiles = allfiles
600 600 oldmap = self._map
601 601 self.clear()
602 602 for f in allfiles:
603 603 if f not in changedfiles:
604 604 self._map[f] = oldmap[f]
605 605 else:
606 606 if 'x' in allfiles.flags(f):
607 607 self._map[f] = dirstatetuple('n', 0o777, -1, 0)
608 608 else:
609 609 self._map[f] = dirstatetuple('n', 0o666, -1, 0)
610 610 self._pl = (parent, nullid)
611 611 self._dirty = True
612 612
613 613 def write(self):
614 614 if not self._dirty:
615 615 return
616 616
617 617 # enough 'delaywrite' prevents 'pack_dirstate' from dropping
618 618 # timestamp of each entries in dirstate, because of 'now > mtime'
619 619 delaywrite = self._ui.configint('debug', 'dirstate.delaywrite', 0)
620 620 if delaywrite > 0:
621 621 import time # to avoid useless import
622 622 time.sleep(delaywrite)
623 623
624 624 st = self._opener(self._filename, "w", atomictemp=True)
625 625 self._writedirstate(st)
626 626
627 627 def _writedirstate(self, st):
628 628 # use the modification time of the newly created temporary file as the
629 629 # filesystem's notion of 'now'
630 630 now = util.fstat(st).st_mtime
631 631 st.write(parsers.pack_dirstate(self._map, self._copymap, self._pl, now))
632 632 st.close()
633 633 self._lastnormaltime = 0
634 634 self._dirty = self._dirtypl = False
635 635
636 636 def _dirignore(self, f):
637 637 if f == '.':
638 638 return False
639 639 if self._ignore(f):
640 640 return True
641 641 for p in util.finddirs(f):
642 642 if self._ignore(p):
643 643 return True
644 644 return False
645 645
646 646 def _walkexplicit(self, match, subrepos):
647 647 '''Get stat data about the files explicitly specified by match.
648 648
649 649 Return a triple (results, dirsfound, dirsnotfound).
650 650 - results is a mapping from filename to stat result. It also contains
651 651 listings mapping subrepos and .hg to None.
652 652 - dirsfound is a list of files found to be directories.
653 653 - dirsnotfound is a list of files that the dirstate thinks are
654 654 directories and that were not found.'''
655 655
656 656 def badtype(mode):
657 657 kind = _('unknown')
658 658 if stat.S_ISCHR(mode):
659 659 kind = _('character device')
660 660 elif stat.S_ISBLK(mode):
661 661 kind = _('block device')
662 662 elif stat.S_ISFIFO(mode):
663 663 kind = _('fifo')
664 664 elif stat.S_ISSOCK(mode):
665 665 kind = _('socket')
666 666 elif stat.S_ISDIR(mode):
667 667 kind = _('directory')
668 668 return _('unsupported file type (type is %s)') % kind
669 669
670 670 matchedir = match.explicitdir
671 671 badfn = match.bad
672 672 dmap = self._map
673 673 lstat = os.lstat
674 674 getkind = stat.S_IFMT
675 675 dirkind = stat.S_IFDIR
676 676 regkind = stat.S_IFREG
677 677 lnkkind = stat.S_IFLNK
678 678 join = self._join
679 679 dirsfound = []
680 680 foundadd = dirsfound.append
681 681 dirsnotfound = []
682 682 notfoundadd = dirsnotfound.append
683 683
684 684 if not match.isexact() and self._checkcase:
685 685 normalize = self._normalize
686 686 else:
687 687 normalize = None
688 688
689 689 files = sorted(match.files())
690 690 subrepos.sort()
691 691 i, j = 0, 0
692 692 while i < len(files) and j < len(subrepos):
693 693 subpath = subrepos[j] + "/"
694 694 if files[i] < subpath:
695 695 i += 1
696 696 continue
697 697 while i < len(files) and files[i].startswith(subpath):
698 698 del files[i]
699 699 j += 1
700 700
701 701 if not files or '.' in files:
702 702 files = ['.']
703 703 results = dict.fromkeys(subrepos)
704 704 results['.hg'] = None
705 705
706 706 alldirs = None
707 707 for ff in files:
708 708 # constructing the foldmap is expensive, so don't do it for the
709 709 # common case where files is ['.']
710 710 if normalize and ff != '.':
711 711 nf = normalize(ff, False, True)
712 712 else:
713 713 nf = ff
714 714 if nf in results:
715 715 continue
716 716
717 717 try:
718 718 st = lstat(join(nf))
719 719 kind = getkind(st.st_mode)
720 720 if kind == dirkind:
721 721 if nf in dmap:
722 722 # file replaced by dir on disk but still in dirstate
723 723 results[nf] = None
724 724 if matchedir:
725 725 matchedir(nf)
726 726 foundadd((nf, ff))
727 727 elif kind == regkind or kind == lnkkind:
728 728 results[nf] = st
729 729 else:
730 730 badfn(ff, badtype(kind))
731 731 if nf in dmap:
732 732 results[nf] = None
733 733 except OSError as inst: # nf not found on disk - it is dirstate only
734 734 if nf in dmap: # does it exactly match a missing file?
735 735 results[nf] = None
736 736 else: # does it match a missing directory?
737 737 if alldirs is None:
738 738 alldirs = util.dirs(dmap)
739 739 if nf in alldirs:
740 740 if matchedir:
741 741 matchedir(nf)
742 742 notfoundadd(nf)
743 743 else:
744 744 badfn(ff, inst.strerror)
745 745
746 746 # Case insensitive filesystems cannot rely on lstat() failing to detect
747 747 # a case-only rename. Prune the stat object for any file that does not
748 748 # match the case in the filesystem, if there are multiple files that
749 749 # normalize to the same path.
750 750 if match.isexact() and self._checkcase:
751 751 normed = {}
752 752
753 753 for f, st in results.iteritems():
754 754 if st is None:
755 755 continue
756 756
757 757 nc = util.normcase(f)
758 758 paths = normed.get(nc)
759 759
760 760 if paths is None:
761 761 paths = set()
762 762 normed[nc] = paths
763 763
764 764 paths.add(f)
765 765
766 766 for norm, paths in normed.iteritems():
767 767 if len(paths) > 1:
768 768 for path in paths:
769 769 folded = self._discoverpath(path, norm, True, None,
770 770 self._dirfoldmap)
771 771 if path != folded:
772 772 results[path] = None
773 773
774 774 return results, dirsfound, dirsnotfound
775 775
776 776 def walk(self, match, subrepos, unknown, ignored, full=True):
777 777 '''
778 778 Walk recursively through the directory tree, finding all files
779 779 matched by match.
780 780
781 781 If full is False, maybe skip some known-clean files.
782 782
783 783 Return a dict mapping filename to stat-like object (either
784 784 mercurial.osutil.stat instance or return value of os.stat()).
785 785
786 786 '''
787 787 # full is a flag that extensions that hook into walk can use -- this
788 788 # implementation doesn't use it at all. This satisfies the contract
789 789 # because we only guarantee a "maybe".
790 790
791 791 if ignored:
792 792 ignore = util.never
793 793 dirignore = util.never
794 794 elif unknown:
795 795 ignore = self._ignore
796 796 dirignore = self._dirignore
797 797 else:
798 798 # if not unknown and not ignored, drop dir recursion and step 2
799 799 ignore = util.always
800 800 dirignore = util.always
801 801
802 802 matchfn = match.matchfn
803 803 matchalways = match.always()
804 804 matchtdir = match.traversedir
805 805 dmap = self._map
806 806 listdir = osutil.listdir
807 807 lstat = os.lstat
808 808 dirkind = stat.S_IFDIR
809 809 regkind = stat.S_IFREG
810 810 lnkkind = stat.S_IFLNK
811 811 join = self._join
812 812
813 813 exact = skipstep3 = False
814 814 if match.isexact(): # match.exact
815 815 exact = True
816 816 dirignore = util.always # skip step 2
817 817 elif match.prefix(): # match.match, no patterns
818 818 skipstep3 = True
819 819
820 820 if not exact and self._checkcase:
821 821 normalize = self._normalize
822 822 normalizefile = self._normalizefile
823 823 skipstep3 = False
824 824 else:
825 825 normalize = self._normalize
826 826 normalizefile = None
827 827
828 828 # step 1: find all explicit files
829 829 results, work, dirsnotfound = self._walkexplicit(match, subrepos)
830 830
831 831 skipstep3 = skipstep3 and not (work or dirsnotfound)
832 832 work = [d for d in work if not dirignore(d[0])]
833 833
834 834 # step 2: visit subdirectories
835 835 def traverse(work, alreadynormed):
836 836 wadd = work.append
837 837 while work:
838 838 nd = work.pop()
839 839 skip = None
840 840 if nd == '.':
841 841 nd = ''
842 842 else:
843 843 skip = '.hg'
844 844 try:
845 845 entries = listdir(join(nd), stat=True, skip=skip)
846 846 except OSError as inst:
847 847 if inst.errno in (errno.EACCES, errno.ENOENT):
848 848 match.bad(self.pathto(nd), inst.strerror)
849 849 continue
850 850 raise
851 851 for f, kind, st in entries:
852 852 if normalizefile:
853 853 # even though f might be a directory, we're only
854 854 # interested in comparing it to files currently in the
855 855 # dmap -- therefore normalizefile is enough
856 856 nf = normalizefile(nd and (nd + "/" + f) or f, True,
857 857 True)
858 858 else:
859 859 nf = nd and (nd + "/" + f) or f
860 860 if nf not in results:
861 861 if kind == dirkind:
862 862 if not ignore(nf):
863 863 if matchtdir:
864 864 matchtdir(nf)
865 865 wadd(nf)
866 866 if nf in dmap and (matchalways or matchfn(nf)):
867 867 results[nf] = None
868 868 elif kind == regkind or kind == lnkkind:
869 869 if nf in dmap:
870 870 if matchalways or matchfn(nf):
871 871 results[nf] = st
872 872 elif ((matchalways or matchfn(nf))
873 873 and not ignore(nf)):
874 874 # unknown file -- normalize if necessary
875 875 if not alreadynormed:
876 876 nf = normalize(nf, False, True)
877 877 results[nf] = st
878 878 elif nf in dmap and (matchalways or matchfn(nf)):
879 879 results[nf] = None
880 880
881 881 for nd, d in work:
882 882 # alreadynormed means that processwork doesn't have to do any
883 883 # expensive directory normalization
884 884 alreadynormed = not normalize or nd == d
885 885 traverse([d], alreadynormed)
886 886
887 887 for s in subrepos:
888 888 del results[s]
889 889 del results['.hg']
890 890
891 891 # step 3: visit remaining files from dmap
892 892 if not skipstep3 and not exact:
893 893 # If a dmap file is not in results yet, it was either
894 894 # a) not matching matchfn b) ignored, c) missing, or d) under a
895 895 # symlink directory.
896 896 if not results and matchalways:
897 897 visit = dmap.keys()
898 898 else:
899 899 visit = [f for f in dmap if f not in results and matchfn(f)]
900 900 visit.sort()
901 901
902 902 if unknown:
903 903 # unknown == True means we walked all dirs under the roots
904 904 # that wasn't ignored, and everything that matched was stat'ed
905 905 # and is already in results.
906 906 # The rest must thus be ignored or under a symlink.
907 907 audit_path = pathutil.pathauditor(self._root)
908 908
909 909 for nf in iter(visit):
910 910 # If a stat for the same file was already added with a
911 911 # different case, don't add one for this, since that would
912 912 # make it appear as if the file exists under both names
913 913 # on disk.
914 914 if (normalizefile and
915 915 normalizefile(nf, True, True) in results):
916 916 results[nf] = None
917 917 # Report ignored items in the dmap as long as they are not
918 918 # under a symlink directory.
919 919 elif audit_path.check(nf):
920 920 try:
921 921 results[nf] = lstat(join(nf))
922 922 # file was just ignored, no links, and exists
923 923 except OSError:
924 924 # file doesn't exist
925 925 results[nf] = None
926 926 else:
927 927 # It's either missing or under a symlink directory
928 928 # which we in this case report as missing
929 929 results[nf] = None
930 930 else:
931 931 # We may not have walked the full directory tree above,
932 932 # so stat and check everything we missed.
933 933 nf = iter(visit).next
934 for st in util.statfiles([join(i) for i in visit]):
935 results[nf()] = st
934 pos = 0
935 while pos < len(visit):
936 # visit in mid-sized batches so that we don't
937 # block signals indefinitely
938 xr = xrange(pos, min(len(visit), pos + 1000))
939 for st in util.statfiles([join(visit[n]) for n in xr]):
940 results[nf()] = st
941 pos += 1000
936 942 return results
937 943
938 944 def status(self, match, subrepos, ignored, clean, unknown):
939 945 '''Determine the status of the working copy relative to the
940 946 dirstate and return a pair of (unsure, status), where status is of type
941 947 scmutil.status and:
942 948
943 949 unsure:
944 950 files that might have been modified since the dirstate was
945 951 written, but need to be read to be sure (size is the same
946 952 but mtime differs)
947 953 status.modified:
948 954 files that have definitely been modified since the dirstate
949 955 was written (different size or mode)
950 956 status.clean:
951 957 files that have definitely not been modified since the
952 958 dirstate was written
953 959 '''
954 960 listignored, listclean, listunknown = ignored, clean, unknown
955 961 lookup, modified, added, unknown, ignored = [], [], [], [], []
956 962 removed, deleted, clean = [], [], []
957 963
958 964 dmap = self._map
959 965 ladd = lookup.append # aka "unsure"
960 966 madd = modified.append
961 967 aadd = added.append
962 968 uadd = unknown.append
963 969 iadd = ignored.append
964 970 radd = removed.append
965 971 dadd = deleted.append
966 972 cadd = clean.append
967 973 mexact = match.exact
968 974 dirignore = self._dirignore
969 975 checkexec = self._checkexec
970 976 copymap = self._copymap
971 977 lastnormaltime = self._lastnormaltime
972 978
973 979 # We need to do full walks when either
974 980 # - we're listing all clean files, or
975 981 # - match.traversedir does something, because match.traversedir should
976 982 # be called for every dir in the working dir
977 983 full = listclean or match.traversedir is not None
978 984 for fn, st in self.walk(match, subrepos, listunknown, listignored,
979 985 full=full).iteritems():
980 986 if fn not in dmap:
981 987 if (listignored or mexact(fn)) and dirignore(fn):
982 988 if listignored:
983 989 iadd(fn)
984 990 else:
985 991 uadd(fn)
986 992 continue
987 993
988 994 # This is equivalent to 'state, mode, size, time = dmap[fn]' but not
989 995 # written like that for performance reasons. dmap[fn] is not a
990 996 # Python tuple in compiled builds. The CPython UNPACK_SEQUENCE
991 997 # opcode has fast paths when the value to be unpacked is a tuple or
992 998 # a list, but falls back to creating a full-fledged iterator in
993 999 # general. That is much slower than simply accessing and storing the
994 1000 # tuple members one by one.
995 1001 t = dmap[fn]
996 1002 state = t[0]
997 1003 mode = t[1]
998 1004 size = t[2]
999 1005 time = t[3]
1000 1006
1001 1007 if not st and state in "nma":
1002 1008 dadd(fn)
1003 1009 elif state == 'n':
1004 1010 mtime = util.statmtimesec(st)
1005 1011 if (size >= 0 and
1006 1012 ((size != st.st_size and size != st.st_size & _rangemask)
1007 1013 or ((mode ^ st.st_mode) & 0o100 and checkexec))
1008 1014 or size == -2 # other parent
1009 1015 or fn in copymap):
1010 1016 madd(fn)
1011 1017 elif time != mtime and time != mtime & _rangemask:
1012 1018 ladd(fn)
1013 1019 elif mtime == lastnormaltime:
1014 1020 # fn may have just been marked as normal and it may have
1015 1021 # changed in the same second without changing its size.
1016 1022 # This can happen if we quickly do multiple commits.
1017 1023 # Force lookup, so we don't miss such a racy file change.
1018 1024 ladd(fn)
1019 1025 elif listclean:
1020 1026 cadd(fn)
1021 1027 elif state == 'm':
1022 1028 madd(fn)
1023 1029 elif state == 'a':
1024 1030 aadd(fn)
1025 1031 elif state == 'r':
1026 1032 radd(fn)
1027 1033
1028 1034 return (lookup, scmutil.status(modified, added, removed, deleted,
1029 1035 unknown, ignored, clean))
1030 1036
1031 1037 def matches(self, match):
1032 1038 '''
1033 1039 return files in the dirstate (in whatever state) filtered by match
1034 1040 '''
1035 1041 dmap = self._map
1036 1042 if match.always():
1037 1043 return dmap.keys()
1038 1044 files = match.files()
1039 1045 if match.isexact():
1040 1046 # fast path -- filter the other way around, since typically files is
1041 1047 # much smaller than dmap
1042 1048 return [f for f in files if f in dmap]
1043 1049 if match.prefix() and all(fn in dmap for fn in files):
1044 1050 # fast path -- all the values are known to be files, so just return
1045 1051 # that
1046 1052 return list(files)
1047 1053 return [f for f in dmap if match(f)]
General Comments 0
You need to be logged in to leave comments. Login now