##// END OF EJS Templates
dirstate.walk: speed up calling match function
Matt Mackall -
r6834:cbdfd08e default
parent child Browse files
Show More
@@ -1,595 +1,596 b''
1 1 """
2 2 dirstate.py - working directory tracking for mercurial
3 3
4 4 Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5
6 6 This software may be used and distributed according to the terms
7 7 of the GNU General Public License, incorporated herein by reference.
8 8 """
9 9
10 10 from node import nullid
11 11 from i18n import _
12 12 import struct, os, bisect, stat, util, errno, ignore
13 13 import cStringIO, osutil, sys
14 14
15 15 _unknown = ('?', 0, 0, 0)
16 16 _format = ">cllll"
17 17
18 18 def _finddirs(path):
19 19 pos = len(path)
20 20 while 1:
21 21 pos = path.rfind('/', 0, pos)
22 22 if pos == -1:
23 23 break
24 24 yield path[:pos]
25 25
26 26 class dirstate(object):
27 27
28 28 def __init__(self, opener, ui, root):
29 29 self._opener = opener
30 30 self._root = root
31 31 self._dirty = False
32 32 self._dirtypl = False
33 33 self._ui = ui
34 34
35 35 def __getattr__(self, name):
36 36 if name == '_map':
37 37 self._read()
38 38 return self._map
39 39 elif name == '_copymap':
40 40 self._read()
41 41 return self._copymap
42 42 elif name == '_foldmap':
43 43 _foldmap = {}
44 44 for name in self._map:
45 45 norm = os.path.normcase(os.path.normpath(name))
46 46 _foldmap[norm] = name
47 47 self._foldmap = _foldmap
48 48 return self._foldmap
49 49 elif name == '_branch':
50 50 try:
51 51 self._branch = (self._opener("branch").read().strip()
52 52 or "default")
53 53 except IOError:
54 54 self._branch = "default"
55 55 return self._branch
56 56 elif name == '_pl':
57 57 self._pl = [nullid, nullid]
58 58 try:
59 59 st = self._opener("dirstate").read(40)
60 60 if len(st) == 40:
61 61 self._pl = st[:20], st[20:40]
62 62 except IOError, err:
63 63 if err.errno != errno.ENOENT: raise
64 64 return self._pl
65 65 elif name == '_dirs':
66 66 dirs = {}
67 67 for f,s in self._map.items():
68 68 if s[0] != 'r':
69 69 for base in _finddirs(f):
70 70 dirs[base] = dirs.get(base, 0) + 1
71 71 self._dirs = dirs
72 72 return self._dirs
73 73 elif name == '_ignore':
74 74 files = [self._join('.hgignore')]
75 75 for name, path in self._ui.configitems("ui"):
76 76 if name == 'ignore' or name.startswith('ignore.'):
77 77 files.append(os.path.expanduser(path))
78 78 self._ignore = ignore.ignore(self._root, files, self._ui.warn)
79 79 return self._ignore
80 80 elif name == '_slash':
81 81 self._slash = self._ui.configbool('ui', 'slash') and os.sep != '/'
82 82 return self._slash
83 83 elif name == '_checklink':
84 84 self._checklink = util.checklink(self._root)
85 85 return self._checklink
86 86 elif name == '_checkexec':
87 87 self._checkexec = util.checkexec(self._root)
88 88 return self._checkexec
89 89 elif name == '_checkcase':
90 90 self._checkcase = not util.checkcase(self._join('.hg'))
91 91 return self._checkcase
92 92 elif name == 'normalize':
93 93 if self._checkcase:
94 94 self.normalize = self._normalize
95 95 else:
96 96 self.normalize = lambda x: x
97 97 return self.normalize
98 98 else:
99 99 raise AttributeError, name
100 100
101 101 def _join(self, f):
102 102 return os.path.join(self._root, f)
103 103
104 104 def flagfunc(self, fallback):
105 105 if self._checklink:
106 106 if self._checkexec:
107 107 def f(x):
108 108 p = os.path.join(self._root, x)
109 109 if os.path.islink(p):
110 110 return 'l'
111 111 if util.is_exec(p):
112 112 return 'x'
113 113 return ''
114 114 return f
115 115 def f(x):
116 116 if os.path.islink(os.path.join(self._root, x)):
117 117 return 'l'
118 118 if 'x' in fallback(x):
119 119 return 'x'
120 120 return ''
121 121 return f
122 122 if self._checkexec:
123 123 def f(x):
124 124 if 'l' in fallback(x):
125 125 return 'l'
126 126 if util.is_exec(os.path.join(self._root, x)):
127 127 return 'x'
128 128 return ''
129 129 return f
130 130 return fallback
131 131
132 132 def getcwd(self):
133 133 cwd = os.getcwd()
134 134 if cwd == self._root: return ''
135 135 # self._root ends with a path separator if self._root is '/' or 'C:\'
136 136 rootsep = self._root
137 137 if not util.endswithsep(rootsep):
138 138 rootsep += os.sep
139 139 if cwd.startswith(rootsep):
140 140 return cwd[len(rootsep):]
141 141 else:
142 142 # we're outside the repo. return an absolute path.
143 143 return cwd
144 144
145 145 def pathto(self, f, cwd=None):
146 146 if cwd is None:
147 147 cwd = self.getcwd()
148 148 path = util.pathto(self._root, cwd, f)
149 149 if self._slash:
150 150 return util.normpath(path)
151 151 return path
152 152
153 153 def __getitem__(self, key):
154 154 ''' current states:
155 155 n normal
156 156 m needs merging
157 157 r marked for removal
158 158 a marked for addition
159 159 ? not tracked'''
160 160 return self._map.get(key, ("?",))[0]
161 161
162 162 def __contains__(self, key):
163 163 return key in self._map
164 164
165 165 def __iter__(self):
166 166 for x in util.sort(self._map):
167 167 yield x
168 168
169 169 def parents(self):
170 170 return self._pl
171 171
172 172 def branch(self):
173 173 return self._branch
174 174
175 175 def setparents(self, p1, p2=nullid):
176 176 self._dirty = self._dirtypl = True
177 177 self._pl = p1, p2
178 178
179 179 def setbranch(self, branch):
180 180 self._branch = branch
181 181 self._opener("branch", "w").write(branch + '\n')
182 182
183 183 def _read(self):
184 184 self._map = {}
185 185 self._copymap = {}
186 186 if not self._dirtypl:
187 187 self._pl = [nullid, nullid]
188 188 try:
189 189 st = self._opener("dirstate").read()
190 190 except IOError, err:
191 191 if err.errno != errno.ENOENT: raise
192 192 return
193 193 if not st:
194 194 return
195 195
196 196 if not self._dirtypl:
197 197 self._pl = [st[:20], st[20: 40]]
198 198
199 199 # deref fields so they will be local in loop
200 200 dmap = self._map
201 201 copymap = self._copymap
202 202 unpack = struct.unpack
203 203 e_size = struct.calcsize(_format)
204 204 pos1 = 40
205 205 l = len(st)
206 206
207 207 # the inner loop
208 208 while pos1 < l:
209 209 pos2 = pos1 + e_size
210 210 e = unpack(">cllll", st[pos1:pos2]) # a literal here is faster
211 211 pos1 = pos2 + e[4]
212 212 f = st[pos2:pos1]
213 213 if '\0' in f:
214 214 f, c = f.split('\0')
215 215 copymap[f] = c
216 216 dmap[f] = e # we hold onto e[4] because making a subtuple is slow
217 217
218 218 def invalidate(self):
219 219 for a in "_map _copymap _foldmap _branch _pl _dirs _ignore".split():
220 220 if a in self.__dict__:
221 221 delattr(self, a)
222 222 self._dirty = False
223 223
224 224 def copy(self, source, dest):
225 225 if source == dest:
226 226 return
227 227 self._dirty = True
228 228 self._copymap[dest] = source
229 229
230 230 def copied(self, file):
231 231 return self._copymap.get(file, None)
232 232
233 233 def copies(self):
234 234 return self._copymap
235 235
236 236 def _droppath(self, f):
237 237 if self[f] not in "?r" and "_dirs" in self.__dict__:
238 238 dirs = self._dirs
239 239 for base in _finddirs(f):
240 240 if dirs[base] == 1:
241 241 del dirs[base]
242 242 else:
243 243 dirs[base] -= 1
244 244
245 245 def _addpath(self, f, check=False):
246 246 oldstate = self[f]
247 247 if check or oldstate == "r":
248 248 if '\r' in f or '\n' in f:
249 249 raise util.Abort(
250 250 _("'\\n' and '\\r' disallowed in filenames: %r") % f)
251 251 if f in self._dirs:
252 252 raise util.Abort(_('directory %r already in dirstate') % f)
253 253 # shadows
254 254 for d in _finddirs(f):
255 255 if d in self._dirs:
256 256 break
257 257 if d in self._map and self[d] != 'r':
258 258 raise util.Abort(
259 259 _('file %r in dirstate clashes with %r') % (d, f))
260 260 if oldstate in "?r" and "_dirs" in self.__dict__:
261 261 dirs = self._dirs
262 262 for base in _finddirs(f):
263 263 dirs[base] = dirs.get(base, 0) + 1
264 264
265 265 def normal(self, f):
266 266 'mark a file normal and clean'
267 267 self._dirty = True
268 268 self._addpath(f)
269 269 s = os.lstat(self._join(f))
270 270 self._map[f] = ('n', s.st_mode, s.st_size, s.st_mtime, 0)
271 271 if f in self._copymap:
272 272 del self._copymap[f]
273 273
274 274 def normallookup(self, f):
275 275 'mark a file normal, but possibly dirty'
276 276 if self._pl[1] != nullid and f in self._map:
277 277 # if there is a merge going on and the file was either
278 278 # in state 'm' or dirty before being removed, restore that state.
279 279 entry = self._map[f]
280 280 if entry[0] == 'r' and entry[2] in (-1, -2):
281 281 source = self._copymap.get(f)
282 282 if entry[2] == -1:
283 283 self.merge(f)
284 284 elif entry[2] == -2:
285 285 self.normaldirty(f)
286 286 if source:
287 287 self.copy(source, f)
288 288 return
289 289 if entry[0] == 'm' or entry[0] == 'n' and entry[2] == -2:
290 290 return
291 291 self._dirty = True
292 292 self._addpath(f)
293 293 self._map[f] = ('n', 0, -1, -1, 0)
294 294 if f in self._copymap:
295 295 del self._copymap[f]
296 296
297 297 def normaldirty(self, f):
298 298 'mark a file normal, but dirty'
299 299 self._dirty = True
300 300 self._addpath(f)
301 301 self._map[f] = ('n', 0, -2, -1, 0)
302 302 if f in self._copymap:
303 303 del self._copymap[f]
304 304
305 305 def add(self, f):
306 306 'mark a file added'
307 307 self._dirty = True
308 308 self._addpath(f, True)
309 309 self._map[f] = ('a', 0, -1, -1, 0)
310 310 if f in self._copymap:
311 311 del self._copymap[f]
312 312
313 313 def remove(self, f):
314 314 'mark a file removed'
315 315 self._dirty = True
316 316 self._droppath(f)
317 317 size = 0
318 318 if self._pl[1] != nullid and f in self._map:
319 319 entry = self._map[f]
320 320 if entry[0] == 'm':
321 321 size = -1
322 322 elif entry[0] == 'n' and entry[2] == -2:
323 323 size = -2
324 324 self._map[f] = ('r', 0, size, 0, 0)
325 325 if size == 0 and f in self._copymap:
326 326 del self._copymap[f]
327 327
328 328 def merge(self, f):
329 329 'mark a file merged'
330 330 self._dirty = True
331 331 s = os.lstat(self._join(f))
332 332 self._addpath(f)
333 333 self._map[f] = ('m', s.st_mode, s.st_size, s.st_mtime, 0)
334 334 if f in self._copymap:
335 335 del self._copymap[f]
336 336
337 337 def forget(self, f):
338 338 'forget a file'
339 339 self._dirty = True
340 340 try:
341 341 self._droppath(f)
342 342 del self._map[f]
343 343 except KeyError:
344 344 self._ui.warn(_("not in dirstate: %s\n") % f)
345 345
346 346 def _normalize(self, path):
347 347 if path not in self._foldmap:
348 348 if not os.path.exists(path):
349 349 return path
350 350 self._foldmap[path] = util.fspath(path, self._root)
351 351 return self._foldmap[path]
352 352
353 353 def clear(self):
354 354 self._map = {}
355 355 if "_dirs" in self.__dict__:
356 356 delattr(self, "_dirs");
357 357 self._copymap = {}
358 358 self._pl = [nullid, nullid]
359 359 self._dirty = True
360 360
361 361 def rebuild(self, parent, files):
362 362 self.clear()
363 363 for f in files:
364 364 if 'x' in files.flags(f):
365 365 self._map[f] = ('n', 0777, -1, 0, 0)
366 366 else:
367 367 self._map[f] = ('n', 0666, -1, 0, 0)
368 368 self._pl = (parent, nullid)
369 369 self._dirty = True
370 370
371 371 def write(self):
372 372 if not self._dirty:
373 373 return
374 374 st = self._opener("dirstate", "w", atomictemp=True)
375 375
376 376 try:
377 377 gran = int(self._ui.config('dirstate', 'granularity', 1))
378 378 except ValueError:
379 379 gran = 1
380 380 limit = sys.maxint
381 381 if gran > 0:
382 382 limit = util.fstat(st).st_mtime - gran
383 383
384 384 cs = cStringIO.StringIO()
385 385 copymap = self._copymap
386 386 pack = struct.pack
387 387 write = cs.write
388 388 write("".join(self._pl))
389 389 for f, e in self._map.iteritems():
390 390 if f in copymap:
391 391 f = "%s\0%s" % (f, copymap[f])
392 392 if e[3] > limit and e[0] == 'n':
393 393 e = (e[0], 0, -1, -1, 0)
394 394 e = pack(_format, e[0], e[1], e[2], e[3], len(f))
395 395 write(e)
396 396 write(f)
397 397 st.write(cs.getvalue())
398 398 st.rename()
399 399 self._dirty = self._dirtypl = False
400 400
401 401 def _dirignore(self, f):
402 402 if f == '.':
403 403 return False
404 404 if self._ignore(f):
405 405 return True
406 406 for p in _finddirs(f):
407 407 if self._ignore(p):
408 408 return True
409 409 return False
410 410
411 411 def walk(self, match, unknown, ignored):
412 412 '''
413 413 walk recursively through the directory tree, finding all files
414 414 matched by the match function
415 415
416 416 results are yielded in a tuple (filename, stat), where stat
417 417 and st is the stat result if the file was found in the directory.
418 418 '''
419 419
420 420 def fwarn(f, msg):
421 421 self._ui.warn('%s: %s\n' % (self.pathto(ff), msg))
422 422 return False
423 423 badfn = fwarn
424 424 if hasattr(match, 'bad'):
425 425 badfn = match.bad
426 426
427 427 def badtype(f, mode):
428 428 kind = 'unknown'
429 429 if stat.S_ISCHR(mode): kind = _('character device')
430 430 elif stat.S_ISBLK(mode): kind = _('block device')
431 431 elif stat.S_ISFIFO(mode): kind = _('fifo')
432 432 elif stat.S_ISSOCK(mode): kind = _('socket')
433 433 elif stat.S_ISDIR(mode): kind = _('directory')
434 434 self._ui.warn(_('%s: unsupported file type (type is %s)\n')
435 435 % (self.pathto(f), kind))
436 436
437 437 # TODO: don't walk unknown directories if unknown and ignored are False
438 438 ignore = self._ignore
439 439 dirignore = self._dirignore
440 440 if ignored:
441 441 ignore = util.never
442 442 dirignore = util.never
443 443
444 matchfn = match.matchfn
444 445 dmap = self._map
445 446 normpath = util.normpath
446 447 normalize = self.normalize
447 448 listdir = osutil.listdir
448 449 lstat = os.lstat
449 450 bisect_left = bisect.bisect_left
450 451 pconvert = util.pconvert
451 452 getkind = stat.S_IFMT
452 453 dirkind = stat.S_IFDIR
453 454 regkind = stat.S_IFREG
454 455 lnkkind = stat.S_IFLNK
455 456 join = self._join
456 457 work = []
457 458 wadd = work.append
458 459
459 460 files = util.unique(match.files())
460 461 if not files or '.' in files:
461 462 files = ['']
462 463 results = {'.hg': None}
463 464
464 465 # step 1: find all explicit files
465 466 for ff in util.sort(files):
466 467 nf = normalize(normpath(ff))
467 468 if nf in results:
468 469 continue
469 470
470 471 try:
471 472 st = lstat(join(nf))
472 473 kind = getkind(st.st_mode)
473 474 if kind == dirkind:
474 475 if not dirignore(nf):
475 476 wadd(nf)
476 477 elif kind == regkind or kind == lnkkind:
477 478 results[nf] = st
478 479 else:
479 480 badtype(ff, kind)
480 481 if nf in dmap:
481 482 results[nf] = None
482 483 except OSError, inst:
483 484 keep = False
484 485 prefix = nf + "/"
485 486 for fn in dmap:
486 487 if nf == fn or fn.startswith(prefix):
487 488 keep = True
488 489 break
489 490 if not keep:
490 491 if inst.errno != errno.ENOENT:
491 492 fwarn(ff, inst.strerror)
492 493 elif badfn(ff, inst.strerror):
493 if (nf in dmap or not ignore(nf)) and match(nf):
494 if (nf in dmap or not ignore(nf)) and matchfn(nf):
494 495 results[nf] = None
495 496
496 497 # step 2: visit subdirectories
497 498 while work:
498 499 nd = work.pop()
499 500 if hasattr(match, 'dir'):
500 501 match.dir(nd)
501 502 entries = listdir(join(nd), stat=True)
502 503 if nd == '.':
503 504 nd = ''
504 505 else:
505 506 # do not recurse into a repo contained in this
506 507 # one. use bisect to find .hg directory so speed
507 508 # is good on big directory.
508 509 hg = bisect_left(entries, ('.hg'))
509 510 if hg < len(entries) and entries[hg][0] == '.hg' \
510 511 and entries[hg][1] == dirkind:
511 512 continue
512 513 for f, kind, st in entries:
513 514 nf = normalize(nd and (nd + "/" + f) or f)
514 515 if nf not in results:
515 516 if kind == dirkind:
516 517 if not ignore(nf):
517 518 wadd(nf)
518 if nf in dmap and match(nf):
519 if nf in dmap and matchfn(nf):
519 520 results[nf] = None
520 521 elif kind == regkind or kind == lnkkind:
521 522 if nf in dmap:
522 if match(nf):
523 if matchfn(nf):
523 524 results[nf] = st
524 elif match(nf) and not ignore(nf):
525 elif matchfn(nf) and not ignore(nf):
525 526 results[nf] = st
526 elif nf in dmap and match(nf):
527 elif nf in dmap and matchfn(nf):
527 528 results[nf] = None
528 529
529 530 # step 3: report unseen items in the dmap hash
530 531 visit = [f for f in dmap if f not in results and match(f)]
531 532 for nf in util.sort(visit):
532 533 results[nf] = None
533 534 try:
534 535 st = lstat(join(nf))
535 536 kind = getkind(st.st_mode)
536 537 if kind == regkind or kind == lnkkind:
537 538 results[nf] = st
538 539 except OSError, inst:
539 540 if inst.errno not in (errno.ENOENT, errno.ENOTDIR):
540 541 raise
541 542
542 543 del results['.hg']
543 544 return results
544 545
545 546 def status(self, match, ignored, clean, unknown):
546 547 listignored, listclean, listunknown = ignored, clean, unknown
547 548 lookup, modified, added, unknown, ignored = [], [], [], [], []
548 549 removed, deleted, clean = [], [], []
549 550
550 551 _join = self._join
551 552 lstat = os.lstat
552 553 cmap = self._copymap
553 554 dmap = self._map
554 555 ladd = lookup.append
555 556 madd = modified.append
556 557 aadd = added.append
557 558 uadd = unknown.append
558 559 iadd = ignored.append
559 560 radd = removed.append
560 561 dadd = deleted.append
561 562 cadd = clean.append
562 563
563 564 for fn, st in self.walk(match, listunknown, listignored).iteritems():
564 565 if fn not in dmap:
565 566 if (listignored or match.exact(fn)) and self._dirignore(fn):
566 567 if listignored:
567 568 iadd(fn)
568 569 elif listunknown:
569 570 uadd(fn)
570 571 continue
571 572
572 573 state, mode, size, time, foo = dmap[fn]
573 574
574 575 if not st and state in "nma":
575 576 dadd(fn)
576 577 elif state == 'n':
577 578 if (size >= 0 and
578 579 (size != st.st_size
579 580 or ((mode ^ st.st_mode) & 0100 and self._checkexec))
580 581 or size == -2
581 582 or fn in self._copymap):
582 583 madd(fn)
583 584 elif time != int(st.st_mtime):
584 585 ladd(fn)
585 586 elif listclean:
586 587 cadd(fn)
587 588 elif state == 'm':
588 589 madd(fn)
589 590 elif state == 'a':
590 591 aadd(fn)
591 592 elif state == 'r':
592 593 radd(fn)
593 594
594 595 return (lookup, modified, added, removed, deleted, unknown, ignored,
595 596 clean)
@@ -1,47 +1,47 b''
1 1 import util
2 2
3 3 class _match(object):
4 4 def __init__(self, root, cwd, files, mf, ap):
5 5 self._root = root
6 6 self._cwd = cwd
7 7 self._files = files
8 8 self._fmap = dict.fromkeys(files)
9 self._matchfn = mf
9 self.matchfn = mf
10 10 self._anypats = ap
11 11 def __call__(self, fn):
12 return self._matchfn(fn)
12 return self.matchfn(fn)
13 13 def __iter__(self):
14 14 for f in self._files:
15 15 yield f
16 16 def bad(self, f, msg):
17 17 return True
18 18 def dir(self, f):
19 19 pass
20 20 def missing(self, f):
21 21 pass
22 22 def exact(self, f):
23 23 return f in self._fmap
24 24 def rel(self, f):
25 25 return util.pathto(self._root, self._cwd, f)
26 26 def files(self):
27 27 return self._files
28 28 def anypats(self):
29 29 return self._anypats
30 30
31 31 class always(_match):
32 32 def __init__(self, root, cwd):
33 33 _match.__init__(self, root, cwd, [], lambda f: True, False)
34 34
35 35 class never(_match):
36 36 def __init__(self, root, cwd):
37 37 _match.__init__(self, root, cwd, [], lambda f: False, False)
38 38
39 39 class exact(_match):
40 40 def __init__(self, root, cwd, files):
41 41 _match.__init__(self, root, cwd, files, lambda f: f in files, False)
42 42
43 43 class match(_match):
44 44 def __init__(self, root, cwd, patterns, include, exclude, default):
45 45 f, mf, ap = util.matcher(root, cwd, patterns, include, exclude,
46 46 None, default)
47 47 _match.__init__(self, root, cwd, f, mf, ap)
General Comments 0
You need to be logged in to leave comments. Login now