##// END OF EJS Templates
dirstate: improve performance for building _dirs
Matt Mackall -
r7032:7dfac37c default
parent child Browse files
Show More
@@ -1,603 +1,607 b''
1 1 """
2 2 dirstate.py - working directory tracking for mercurial
3 3
4 4 Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5
6 6 This software may be used and distributed according to the terms
7 7 of the GNU General Public License, incorporated herein by reference.
8 8 """
9 9
10 10 from node import nullid
11 11 from i18n import _
12 12 import struct, os, bisect, stat, util, errno, ignore
13 13 import cStringIO, osutil, sys
14 14
15 15 _unknown = ('?', 0, 0, 0)
16 16 _format = ">cllll"
17 17
18 18 def _finddirs(path):
19 pos = len(path)
20 while 1:
19 pos = path.rfind('/')
20 while pos != -1:
21 yield path[:pos]
21 22 pos = path.rfind('/', 0, pos)
22 if pos == -1:
23 break
24 yield path[:pos]
25 23
26 24 class dirstate(object):
27 25
28 26 def __init__(self, opener, ui, root):
29 27 self._opener = opener
30 28 self._root = root
31 29 self._rootdir = os.path.join(root, '')
32 30 self._dirty = False
33 31 self._dirtypl = False
34 32 self._ui = ui
35 33
36 34 def __getattr__(self, name):
37 35 if name == '_map':
38 36 self._read()
39 37 return self._map
40 38 elif name == '_copymap':
41 39 self._read()
42 40 return self._copymap
43 41 elif name == '_foldmap':
44 42 _foldmap = {}
45 43 for name in self._map:
46 44 norm = os.path.normcase(os.path.normpath(name))
47 45 _foldmap[norm] = name
48 46 self._foldmap = _foldmap
49 47 return self._foldmap
50 48 elif name == '_branch':
51 49 try:
52 50 self._branch = (self._opener("branch").read().strip()
53 51 or "default")
54 52 except IOError:
55 53 self._branch = "default"
56 54 return self._branch
57 55 elif name == '_pl':
58 56 self._pl = [nullid, nullid]
59 57 try:
60 58 st = self._opener("dirstate").read(40)
61 59 if len(st) == 40:
62 60 self._pl = st[:20], st[20:40]
63 61 except IOError, err:
64 62 if err.errno != errno.ENOENT: raise
65 63 return self._pl
66 64 elif name == '_dirs':
67 65 dirs = {}
68 for f,s in self._map.items():
66 for f,s in self._map.iteritems():
69 67 if s[0] != 'r':
70 for base in _finddirs(f):
71 dirs[base] = dirs.get(base, 0) + 1
68 pos = f.rfind('/')
69 while pos != -1:
70 f = f[:pos]
71 if f in dirs:
72 dirs[f] += 1
73 break
74 dirs[f] = 1
75 pos = f.rfind('/')
72 76 self._dirs = dirs
73 77 return self._dirs
74 78 elif name == '_ignore':
75 79 files = [self._join('.hgignore')]
76 80 for name, path in self._ui.configitems("ui"):
77 81 if name == 'ignore' or name.startswith('ignore.'):
78 82 files.append(os.path.expanduser(path))
79 83 self._ignore = ignore.ignore(self._root, files, self._ui.warn)
80 84 return self._ignore
81 85 elif name == '_slash':
82 86 self._slash = self._ui.configbool('ui', 'slash') and os.sep != '/'
83 87 return self._slash
84 88 elif name == '_checklink':
85 89 self._checklink = util.checklink(self._root)
86 90 return self._checklink
87 91 elif name == '_checkexec':
88 92 self._checkexec = util.checkexec(self._root)
89 93 return self._checkexec
90 94 elif name == '_checkcase':
91 95 self._checkcase = not util.checkcase(self._join('.hg'))
92 96 return self._checkcase
93 97 elif name == 'normalize':
94 98 if self._checkcase:
95 99 self.normalize = self._normalize
96 100 else:
97 101 self.normalize = lambda x: x
98 102 return self.normalize
99 103 else:
100 104 raise AttributeError(name)
101 105
102 106 def _join(self, f):
103 107 # much faster than os.path.join()
104 108 # it's safe because f is always a relative path
105 109 return self._rootdir + f
106 110
107 111 def flagfunc(self, fallback):
108 112 if self._checklink:
109 113 if self._checkexec:
110 114 def f(x):
111 115 p = self._join(x)
112 116 if os.path.islink(p):
113 117 return 'l'
114 118 if util.is_exec(p):
115 119 return 'x'
116 120 return ''
117 121 return f
118 122 def f(x):
119 123 if os.path.islink(self._join(x)):
120 124 return 'l'
121 125 if 'x' in fallback(x):
122 126 return 'x'
123 127 return ''
124 128 return f
125 129 if self._checkexec:
126 130 def f(x):
127 131 if 'l' in fallback(x):
128 132 return 'l'
129 133 if util.is_exec(self._join(x)):
130 134 return 'x'
131 135 return ''
132 136 return f
133 137 return fallback
134 138
135 139 def getcwd(self):
136 140 cwd = os.getcwd()
137 141 if cwd == self._root: return ''
138 142 # self._root ends with a path separator if self._root is '/' or 'C:\'
139 143 rootsep = self._root
140 144 if not util.endswithsep(rootsep):
141 145 rootsep += os.sep
142 146 if cwd.startswith(rootsep):
143 147 return cwd[len(rootsep):]
144 148 else:
145 149 # we're outside the repo. return an absolute path.
146 150 return cwd
147 151
148 152 def pathto(self, f, cwd=None):
149 153 if cwd is None:
150 154 cwd = self.getcwd()
151 155 path = util.pathto(self._root, cwd, f)
152 156 if self._slash:
153 157 return util.normpath(path)
154 158 return path
155 159
156 160 def __getitem__(self, key):
157 161 ''' current states:
158 162 n normal
159 163 m needs merging
160 164 r marked for removal
161 165 a marked for addition
162 166 ? not tracked'''
163 167 return self._map.get(key, ("?",))[0]
164 168
165 169 def __contains__(self, key):
166 170 return key in self._map
167 171
168 172 def __iter__(self):
169 173 for x in util.sort(self._map):
170 174 yield x
171 175
172 176 def parents(self):
173 177 return self._pl
174 178
175 179 def branch(self):
176 180 return self._branch
177 181
178 182 def setparents(self, p1, p2=nullid):
179 183 self._dirty = self._dirtypl = True
180 184 self._pl = p1, p2
181 185
182 186 def setbranch(self, branch):
183 187 self._branch = branch
184 188 self._opener("branch", "w").write(branch + '\n')
185 189
186 190 def _read(self):
187 191 self._map = {}
188 192 self._copymap = {}
189 193 if not self._dirtypl:
190 194 self._pl = [nullid, nullid]
191 195 try:
192 196 st = self._opener("dirstate").read()
193 197 except IOError, err:
194 198 if err.errno != errno.ENOENT: raise
195 199 return
196 200 if not st:
197 201 return
198 202
199 203 if not self._dirtypl:
200 204 self._pl = [st[:20], st[20: 40]]
201 205
202 206 # deref fields so they will be local in loop
203 207 dmap = self._map
204 208 copymap = self._copymap
205 209 unpack = struct.unpack
206 210 e_size = struct.calcsize(_format)
207 211 pos1 = 40
208 212 l = len(st)
209 213
210 214 # the inner loop
211 215 while pos1 < l:
212 216 pos2 = pos1 + e_size
213 217 e = unpack(">cllll", st[pos1:pos2]) # a literal here is faster
214 218 pos1 = pos2 + e[4]
215 219 f = st[pos2:pos1]
216 220 if '\0' in f:
217 221 f, c = f.split('\0')
218 222 copymap[f] = c
219 223 dmap[f] = e # we hold onto e[4] because making a subtuple is slow
220 224
221 225 def invalidate(self):
222 226 for a in "_map _copymap _foldmap _branch _pl _dirs _ignore".split():
223 227 if a in self.__dict__:
224 228 delattr(self, a)
225 229 self._dirty = False
226 230
227 231 def copy(self, source, dest):
228 232 if source == dest:
229 233 return
230 234 self._dirty = True
231 235 self._copymap[dest] = source
232 236
233 237 def copied(self, file):
234 238 return self._copymap.get(file, None)
235 239
236 240 def copies(self):
237 241 return self._copymap
238 242
239 243 def _droppath(self, f):
240 244 if self[f] not in "?r" and "_dirs" in self.__dict__:
241 245 dirs = self._dirs
242 246 for base in _finddirs(f):
243 247 if dirs[base] == 1:
244 248 del dirs[base]
245 else:
246 dirs[base] -= 1
249 return
250 dirs[base] -= 1
247 251
248 252 def _addpath(self, f, check=False):
249 253 oldstate = self[f]
250 254 if check or oldstate == "r":
251 255 if '\r' in f or '\n' in f:
252 256 raise util.Abort(
253 257 _("'\\n' and '\\r' disallowed in filenames: %r") % f)
254 258 if f in self._dirs:
255 259 raise util.Abort(_('directory %r already in dirstate') % f)
256 260 # shadows
257 261 for d in _finddirs(f):
258 262 if d in self._dirs:
259 263 break
260 264 if d in self._map and self[d] != 'r':
261 265 raise util.Abort(
262 266 _('file %r in dirstate clashes with %r') % (d, f))
263 267 if oldstate in "?r" and "_dirs" in self.__dict__:
264 268 dirs = self._dirs
265 269 for base in _finddirs(f):
266 270 dirs[base] = dirs.get(base, 0) + 1
267 271
268 272 def normal(self, f):
269 273 'mark a file normal and clean'
270 274 self._dirty = True
271 275 self._addpath(f)
272 276 s = os.lstat(self._join(f))
273 277 self._map[f] = ('n', s.st_mode, s.st_size, s.st_mtime, 0)
274 278 if f in self._copymap:
275 279 del self._copymap[f]
276 280
277 281 def normallookup(self, f):
278 282 'mark a file normal, but possibly dirty'
279 283 if self._pl[1] != nullid and f in self._map:
280 284 # if there is a merge going on and the file was either
281 285 # in state 'm' or dirty before being removed, restore that state.
282 286 entry = self._map[f]
283 287 if entry[0] == 'r' and entry[2] in (-1, -2):
284 288 source = self._copymap.get(f)
285 289 if entry[2] == -1:
286 290 self.merge(f)
287 291 elif entry[2] == -2:
288 292 self.normaldirty(f)
289 293 if source:
290 294 self.copy(source, f)
291 295 return
292 296 if entry[0] == 'm' or entry[0] == 'n' and entry[2] == -2:
293 297 return
294 298 self._dirty = True
295 299 self._addpath(f)
296 300 self._map[f] = ('n', 0, -1, -1, 0)
297 301 if f in self._copymap:
298 302 del self._copymap[f]
299 303
300 304 def normaldirty(self, f):
301 305 'mark a file normal, but dirty'
302 306 self._dirty = True
303 307 self._addpath(f)
304 308 self._map[f] = ('n', 0, -2, -1, 0)
305 309 if f in self._copymap:
306 310 del self._copymap[f]
307 311
308 312 def add(self, f):
309 313 'mark a file added'
310 314 self._dirty = True
311 315 self._addpath(f, True)
312 316 self._map[f] = ('a', 0, -1, -1, 0)
313 317 if f in self._copymap:
314 318 del self._copymap[f]
315 319
316 320 def remove(self, f):
317 321 'mark a file removed'
318 322 self._dirty = True
319 323 self._droppath(f)
320 324 size = 0
321 325 if self._pl[1] != nullid and f in self._map:
322 326 entry = self._map[f]
323 327 if entry[0] == 'm':
324 328 size = -1
325 329 elif entry[0] == 'n' and entry[2] == -2:
326 330 size = -2
327 331 self._map[f] = ('r', 0, size, 0, 0)
328 332 if size == 0 and f in self._copymap:
329 333 del self._copymap[f]
330 334
331 335 def merge(self, f):
332 336 'mark a file merged'
333 337 self._dirty = True
334 338 s = os.lstat(self._join(f))
335 339 self._addpath(f)
336 340 self._map[f] = ('m', s.st_mode, s.st_size, s.st_mtime, 0)
337 341 if f in self._copymap:
338 342 del self._copymap[f]
339 343
340 344 def forget(self, f):
341 345 'forget a file'
342 346 self._dirty = True
343 347 try:
344 348 self._droppath(f)
345 349 del self._map[f]
346 350 except KeyError:
347 351 self._ui.warn(_("not in dirstate: %s\n") % f)
348 352
349 353 def _normalize(self, path):
350 354 norm_path = os.path.normcase(os.path.normpath(path))
351 355 if norm_path not in self._foldmap:
352 356 if not os.path.exists(os.path.join(self._root, path)):
353 357 return path
354 358 self._foldmap[norm_path] = util.fspath(path, self._root)
355 359 return self._foldmap[norm_path]
356 360
357 361 def clear(self):
358 362 self._map = {}
359 363 if "_dirs" in self.__dict__:
360 364 delattr(self, "_dirs");
361 365 self._copymap = {}
362 366 self._pl = [nullid, nullid]
363 367 self._dirty = True
364 368
365 369 def rebuild(self, parent, files):
366 370 self.clear()
367 371 for f in files:
368 372 if 'x' in files.flags(f):
369 373 self._map[f] = ('n', 0777, -1, 0, 0)
370 374 else:
371 375 self._map[f] = ('n', 0666, -1, 0, 0)
372 376 self._pl = (parent, nullid)
373 377 self._dirty = True
374 378
375 379 def write(self):
376 380 if not self._dirty:
377 381 return
378 382 st = self._opener("dirstate", "w", atomictemp=True)
379 383
380 384 try:
381 385 gran = int(self._ui.config('dirstate', 'granularity', 1))
382 386 except ValueError:
383 387 gran = 1
384 388 limit = sys.maxint
385 389 if gran > 0:
386 390 limit = util.fstat(st).st_mtime - gran
387 391
388 392 cs = cStringIO.StringIO()
389 393 copymap = self._copymap
390 394 pack = struct.pack
391 395 write = cs.write
392 396 write("".join(self._pl))
393 397 for f, e in self._map.iteritems():
394 398 if f in copymap:
395 399 f = "%s\0%s" % (f, copymap[f])
396 400 if e[3] > limit and e[0] == 'n':
397 401 e = (e[0], 0, -1, -1, 0)
398 402 e = pack(_format, e[0], e[1], e[2], e[3], len(f))
399 403 write(e)
400 404 write(f)
401 405 st.write(cs.getvalue())
402 406 st.rename()
403 407 self._dirty = self._dirtypl = False
404 408
405 409 def _dirignore(self, f):
406 410 if f == '.':
407 411 return False
408 412 if self._ignore(f):
409 413 return True
410 414 for p in _finddirs(f):
411 415 if self._ignore(p):
412 416 return True
413 417 return False
414 418
415 419 def walk(self, match, unknown, ignored):
416 420 '''
417 421 walk recursively through the directory tree, finding all files
418 422 matched by the match function
419 423
420 424 results are yielded in a tuple (filename, stat), where stat
421 425 and st is the stat result if the file was found in the directory.
422 426 '''
423 427
424 428 def fwarn(f, msg):
425 429 self._ui.warn('%s: %s\n' % (self.pathto(f), msg))
426 430 return False
427 431 badfn = fwarn
428 432 if hasattr(match, 'bad'):
429 433 badfn = match.bad
430 434
431 435 def badtype(f, mode):
432 436 kind = 'unknown'
433 437 if stat.S_ISCHR(mode): kind = _('character device')
434 438 elif stat.S_ISBLK(mode): kind = _('block device')
435 439 elif stat.S_ISFIFO(mode): kind = _('fifo')
436 440 elif stat.S_ISSOCK(mode): kind = _('socket')
437 441 elif stat.S_ISDIR(mode): kind = _('directory')
438 442 self._ui.warn(_('%s: unsupported file type (type is %s)\n')
439 443 % (self.pathto(f), kind))
440 444
441 445 ignore = self._ignore
442 446 dirignore = self._dirignore
443 447 if ignored:
444 448 ignore = util.never
445 449 dirignore = util.never
446 450 elif not unknown:
447 451 # if unknown and ignored are False, skip step 2
448 452 ignore = util.always
449 453 dirignore = util.always
450 454
451 455 matchfn = match.matchfn
452 456 dmap = self._map
453 457 normpath = util.normpath
454 458 normalize = self.normalize
455 459 listdir = osutil.listdir
456 460 lstat = os.lstat
457 461 bisect_left = bisect.bisect_left
458 462 pconvert = util.pconvert
459 463 getkind = stat.S_IFMT
460 464 dirkind = stat.S_IFDIR
461 465 regkind = stat.S_IFREG
462 466 lnkkind = stat.S_IFLNK
463 467 join = self._join
464 468 work = []
465 469 wadd = work.append
466 470
467 471 files = util.unique(match.files())
468 472 if not files or '.' in files:
469 473 files = ['']
470 474 results = {'.hg': None}
471 475
472 476 # step 1: find all explicit files
473 477 for ff in util.sort(files):
474 478 nf = normalize(normpath(ff))
475 479 if nf in results:
476 480 continue
477 481
478 482 try:
479 483 st = lstat(join(nf))
480 484 kind = getkind(st.st_mode)
481 485 if kind == dirkind:
482 486 if not dirignore(nf):
483 487 wadd(nf)
484 488 elif kind == regkind or kind == lnkkind:
485 489 results[nf] = st
486 490 else:
487 491 badtype(ff, kind)
488 492 if nf in dmap:
489 493 results[nf] = None
490 494 except OSError, inst:
491 495 keep = False
492 496 prefix = nf + "/"
493 497 for fn in dmap:
494 498 if nf == fn or fn.startswith(prefix):
495 499 keep = True
496 500 break
497 501 if not keep:
498 502 if inst.errno != errno.ENOENT:
499 503 fwarn(ff, inst.strerror)
500 504 elif badfn(ff, inst.strerror):
501 505 if (nf in dmap or not ignore(nf)) and matchfn(nf):
502 506 results[nf] = None
503 507
504 508 # step 2: visit subdirectories
505 509 while work:
506 510 nd = work.pop()
507 511 if hasattr(match, 'dir'):
508 512 match.dir(nd)
509 513 entries = listdir(join(nd), stat=True)
510 514 if nd == '.':
511 515 nd = ''
512 516 else:
513 517 # do not recurse into a repo contained in this
514 518 # one. use bisect to find .hg directory so speed
515 519 # is good on big directory.
516 520 hg = bisect_left(entries, ('.hg'))
517 521 if hg < len(entries) and entries[hg][0] == '.hg' \
518 522 and entries[hg][1] == dirkind:
519 523 continue
520 524 for f, kind, st in entries:
521 525 nf = normalize(nd and (nd + "/" + f) or f)
522 526 if nf not in results:
523 527 if kind == dirkind:
524 528 if not ignore(nf):
525 529 wadd(nf)
526 530 if nf in dmap and matchfn(nf):
527 531 results[nf] = None
528 532 elif kind == regkind or kind == lnkkind:
529 533 if nf in dmap:
530 534 if matchfn(nf):
531 535 results[nf] = st
532 536 elif matchfn(nf) and not ignore(nf):
533 537 results[nf] = st
534 538 elif nf in dmap and matchfn(nf):
535 539 results[nf] = None
536 540
537 541 # step 3: report unseen items in the dmap hash
538 542 visit = [f for f in dmap if f not in results and match(f)]
539 543 for nf in util.sort(visit):
540 544 results[nf] = None
541 545 try:
542 546 st = lstat(join(nf))
543 547 kind = getkind(st.st_mode)
544 548 if kind == regkind or kind == lnkkind:
545 549 results[nf] = st
546 550 except OSError, inst:
547 551 if inst.errno not in (errno.ENOENT, errno.ENOTDIR):
548 552 raise
549 553
550 554 del results['.hg']
551 555 return results
552 556
553 557 def status(self, match, ignored, clean, unknown):
554 558 listignored, listclean, listunknown = ignored, clean, unknown
555 559 lookup, modified, added, unknown, ignored = [], [], [], [], []
556 560 removed, deleted, clean = [], [], []
557 561
558 562 _join = self._join
559 563 lstat = os.lstat
560 564 cmap = self._copymap
561 565 dmap = self._map
562 566 ladd = lookup.append
563 567 madd = modified.append
564 568 aadd = added.append
565 569 uadd = unknown.append
566 570 iadd = ignored.append
567 571 radd = removed.append
568 572 dadd = deleted.append
569 573 cadd = clean.append
570 574
571 575 for fn, st in self.walk(match, listunknown, listignored).iteritems():
572 576 if fn not in dmap:
573 577 if (listignored or match.exact(fn)) and self._dirignore(fn):
574 578 if listignored:
575 579 iadd(fn)
576 580 elif listunknown:
577 581 uadd(fn)
578 582 continue
579 583
580 584 state, mode, size, time, foo = dmap[fn]
581 585
582 586 if not st and state in "nma":
583 587 dadd(fn)
584 588 elif state == 'n':
585 589 if (size >= 0 and
586 590 (size != st.st_size
587 591 or ((mode ^ st.st_mode) & 0100 and self._checkexec))
588 592 or size == -2
589 593 or fn in self._copymap):
590 594 madd(fn)
591 595 elif time != int(st.st_mtime):
592 596 ladd(fn)
593 597 elif listclean:
594 598 cadd(fn)
595 599 elif state == 'm':
596 600 madd(fn)
597 601 elif state == 'a':
598 602 aadd(fn)
599 603 elif state == 'r':
600 604 radd(fn)
601 605
602 606 return (lookup, modified, added, removed, deleted, unknown, ignored,
603 607 clean)
General Comments 0
You need to be logged in to leave comments. Login now