##// END OF EJS Templates
dirstate: speed up sorting in findfiles
Matt Mackall -
r5002:4d079df2 default
parent child Browse files
Show More
@@ -1,518 +1,520 b''
1 1 """
2 2 dirstate.py - working directory tracking for mercurial
3 3
4 4 Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5
6 6 This software may be used and distributed according to the terms
7 7 of the GNU General Public License, incorporated herein by reference.
8 8 """
9 9
10 10 from node import *
11 11 from i18n import _
12 12 import struct, os, time, bisect, stat, strutil, util, re, errno, ignore
13 13 import cStringIO
14 14
15 15 _unknown = ('?', 0, 0, 0)
16 16 _format = ">cllll"
17 17
18 18 class dirstate(object):
19 19
20 20 def __init__(self, opener, ui, root):
21 21 self._opener = opener
22 22 self._root = root
23 23 self._dirty = False
24 24 self._dirtypl = False
25 25 self._ui = ui
26 26
27 27 def __getattr__(self, name):
28 28 if name == '_map':
29 29 self._read()
30 30 return self._map
31 31 elif name == '_copymap':
32 32 self._read()
33 33 return self._copymap
34 34 elif name == '_branch':
35 35 try:
36 36 self._branch = (self._opener("branch").read().strip()
37 37 or "default")
38 38 except IOError:
39 39 self._branch = "default"
40 40 return self._branch
41 41 elif name == '_pl':
42 42 self._pl = [nullid, nullid]
43 43 try:
44 44 st = self._opener("dirstate").read(40)
45 45 if len(st) == 40:
46 46 self._pl = st[:20], st[20:40]
47 47 except IOError, err:
48 48 if err.errno != errno.ENOENT: raise
49 49 return self._pl
50 50 elif name == '_dirs':
51 51 self._dirs = {}
52 52 for f in self._map:
53 53 self._incpath(f)
54 54 return self._dirs
55 55 elif name == '_ignore':
56 56 files = [self._join('.hgignore')]
57 57 for name, path in self._ui.configitems("ui"):
58 58 if name == 'ignore' or name.startswith('ignore.'):
59 59 files.append(os.path.expanduser(path))
60 60 self._ignore = ignore.ignore(self._root, files, self._ui.warn)
61 61 return self._ignore
62 62 elif name == '_slash':
63 63 self._slash = self._ui.configbool('ui', 'slash') and os.sep != '/'
64 64 return self._slash
65 65 else:
66 66 raise AttributeError, name
67 67
68 68 def _join(self, f):
69 69 return os.path.join(self._root, f)
70 70
71 71 def getcwd(self):
72 72 cwd = os.getcwd()
73 73 if cwd == self._root: return ''
74 74 # self._root ends with a path separator if self._root is '/' or 'C:\'
75 75 rootsep = self._root
76 76 if not rootsep.endswith(os.sep):
77 77 rootsep += os.sep
78 78 if cwd.startswith(rootsep):
79 79 return cwd[len(rootsep):]
80 80 else:
81 81 # we're outside the repo. return an absolute path.
82 82 return cwd
83 83
84 84 def pathto(self, f, cwd=None):
85 85 if cwd is None:
86 86 cwd = self.getcwd()
87 87 path = util.pathto(self._root, cwd, f)
88 88 if self._slash:
89 89 return path.replace(os.sep, '/')
90 90 return path
91 91
92 92 def __getitem__(self, key):
93 93 ''' current states:
94 94 n normal
95 95 m needs merging
96 96 r marked for removal
97 97 a marked for addition
98 98 ? not tracked'''
99 99 return self._map.get(key, ("?",))[0]
100 100
101 101 def __contains__(self, key):
102 102 return key in self._map
103 103
104 104 def __iter__(self):
105 105 a = self._map.keys()
106 106 a.sort()
107 107 for x in a:
108 108 yield x
109 109
110 110 def parents(self):
111 111 return self._pl
112 112
113 113 def branch(self):
114 114 return self._branch
115 115
116 116 def setparents(self, p1, p2=nullid):
117 117 self._dirty = self._dirtypl = True
118 118 self._pl = p1, p2
119 119
120 120 def setbranch(self, branch):
121 121 self._branch = branch
122 122 self._opener("branch", "w").write(branch + '\n')
123 123
124 124 def _read(self):
125 125 self._map = {}
126 126 self._copymap = {}
127 127 if not self._dirtypl:
128 128 self._pl = [nullid, nullid]
129 129 try:
130 130 st = self._opener("dirstate").read()
131 131 except IOError, err:
132 132 if err.errno != errno.ENOENT: raise
133 133 return
134 134 if not st:
135 135 return
136 136
137 137 if not self._dirtypl:
138 138 self._pl = [st[:20], st[20: 40]]
139 139
140 140 # deref fields so they will be local in loop
141 141 dmap = self._map
142 142 copymap = self._copymap
143 143 unpack = struct.unpack
144 144
145 145 pos = 40
146 146 e_size = struct.calcsize(_format)
147 147
148 148 while pos < len(st):
149 149 newpos = pos + e_size
150 150 e = unpack(_format, st[pos:newpos])
151 151 l = e[4]
152 152 pos = newpos
153 153 newpos = pos + l
154 154 f = st[pos:newpos]
155 155 if '\0' in f:
156 156 f, c = f.split('\0')
157 157 copymap[f] = c
158 158 dmap[f] = e[:4]
159 159 pos = newpos
160 160
161 161 def invalidate(self):
162 162 for a in "_map _copymap _branch _pl _dirs _ignore".split():
163 163 if a in self.__dict__:
164 164 delattr(self, a)
165 165 self._dirty = False
166 166
167 167 def copy(self, source, dest):
168 168 self._dirty = True
169 169 self._copymap[dest] = source
170 170
171 171 def copied(self, file):
172 172 return self._copymap.get(file, None)
173 173
174 174 def copies(self):
175 175 return self._copymap
176 176
177 177 def _incpath(self, path):
178 178 for c in strutil.findall(path, '/'):
179 179 pc = path[:c]
180 180 self._dirs.setdefault(pc, 0)
181 181 self._dirs[pc] += 1
182 182
183 183 def _decpath(self, path):
184 184 for c in strutil.findall(path, '/'):
185 185 pc = path[:c]
186 186 self._dirs.setdefault(pc, 0)
187 187 self._dirs[pc] -= 1
188 188
189 189 def _incpathcheck(self, f):
190 190 if '\r' in f or '\n' in f:
191 191 raise util.Abort(_("'\\n' and '\\r' disallowed in filenames"))
192 192 # shadows
193 193 if f in self._dirs:
194 194 raise util.Abort(_('directory named %r already in dirstate') % f)
195 195 for c in strutil.rfindall(f, '/'):
196 196 d = f[:c]
197 197 if d in self._dirs:
198 198 break
199 199 if d in self._map:
200 200 raise util.Abort(_('file named %r already in dirstate') % d)
201 201 self._incpath(f)
202 202
203 203 def normal(self, f):
204 204 'mark a file normal'
205 205 self._dirty = True
206 206 s = os.lstat(self._join(f))
207 207 self._map[f] = ('n', s.st_mode, s.st_size, s.st_mtime)
208 208 if self._copymap.has_key(f):
209 209 del self._copymap[f]
210 210
211 211 def normaldirty(self, f):
212 212 'mark a file normal, but possibly dirty'
213 213 self._dirty = True
214 214 s = os.lstat(self._join(f))
215 215 self._map[f] = ('n', s.st_mode, -1, -1)
216 216 if f in self._copymap:
217 217 del self._copymap[f]
218 218
219 219 def add(self, f):
220 220 'mark a file added'
221 221 self._dirty = True
222 222 self._incpathcheck(f)
223 223 self._map[f] = ('a', 0, -1, -1)
224 224 if f in self._copymap:
225 225 del self._copymap[f]
226 226
227 227 def remove(self, f):
228 228 'mark a file removed'
229 229 self._dirty = True
230 230 self._map[f] = ('r', 0, 0, 0)
231 231 self._decpath(f)
232 232 if f in self._copymap:
233 233 del self._copymap[f]
234 234
235 235 def merge(self, f):
236 236 'mark a file merged'
237 237 self._dirty = True
238 238 s = os.lstat(self._join(f))
239 239 self._map[f] = ('m', s.st_mode, s.st_size, s.st_mtime)
240 240 if f in self._copymap:
241 241 del self._copymap[f]
242 242
243 243 def forget(self, f):
244 244 'forget a file'
245 245 self._dirty = True
246 246 try:
247 247 del self._map[f]
248 248 self._decpath(f)
249 249 except KeyError:
250 250 self._ui.warn(_("not in dirstate: %s!\n") % f)
251 251
252 252 def rebuild(self, parent, files):
253 253 self.invalidate()
254 254 for f in files:
255 255 if files.execf(f):
256 256 self._map[f] = ('n', 0777, -1, 0)
257 257 else:
258 258 self._map[f] = ('n', 0666, -1, 0)
259 259 self._pl = (parent, nullid)
260 260 self._dirty = True
261 261
262 262 def write(self):
263 263 if not self._dirty:
264 264 return
265 265 cs = cStringIO.StringIO()
266 266 cs.write("".join(self._pl))
267 267 for f, e in self._map.iteritems():
268 268 c = self.copied(f)
269 269 if c:
270 270 f = f + "\0" + c
271 271 e = struct.pack(_format, e[0], e[1], e[2], e[3], len(f))
272 272 cs.write(e)
273 273 cs.write(f)
274 274 st = self._opener("dirstate", "w", atomictemp=True)
275 275 st.write(cs.getvalue())
276 276 st.rename()
277 277 self._dirty = self._dirtypl = False
278 278
279 279 def _filter(self, files):
280 280 ret = {}
281 281 unknown = []
282 282
283 283 for x in files:
284 284 if x == '.':
285 285 return self._map.copy()
286 286 if x not in self._map:
287 287 unknown.append(x)
288 288 else:
289 289 ret[x] = self._map[x]
290 290
291 291 if not unknown:
292 292 return ret
293 293
294 294 b = self._map.keys()
295 295 b.sort()
296 296 blen = len(b)
297 297
298 298 for x in unknown:
299 299 bs = bisect.bisect(b, "%s%s" % (x, '/'))
300 300 while bs < blen:
301 301 s = b[bs]
302 302 if len(s) > len(x) and s.startswith(x):
303 303 ret[s] = self._map[s]
304 304 else:
305 305 break
306 306 bs += 1
307 307 return ret
308 308
309 309 def _supported(self, f, mode, verbose=False):
310 310 if stat.S_ISREG(mode) or stat.S_ISLNK(mode):
311 311 return True
312 312 if verbose:
313 313 kind = 'unknown'
314 314 if stat.S_ISCHR(mode): kind = _('character device')
315 315 elif stat.S_ISBLK(mode): kind = _('block device')
316 316 elif stat.S_ISFIFO(mode): kind = _('fifo')
317 317 elif stat.S_ISSOCK(mode): kind = _('socket')
318 318 elif stat.S_ISDIR(mode): kind = _('directory')
319 319 self._ui.warn(_('%s: unsupported file type (type is %s)\n')
320 320 % (self.pathto(f), kind))
321 321 return False
322 322
323 323 def walk(self, files=None, match=util.always, badmatch=None):
324 324 # filter out the stat
325 325 for src, f, st in self.statwalk(files, match, badmatch=badmatch):
326 326 yield src, f
327 327
328 328 def statwalk(self, files=None, match=util.always, ignored=False,
329 329 badmatch=None, directories=False):
330 330 '''
331 331 walk recursively through the directory tree, finding all files
332 332 matched by the match function
333 333
334 334 results are yielded in a tuple (src, filename, st), where src
335 335 is one of:
336 336 'f' the file was found in the directory tree
337 337 'd' the file is a directory of the tree
338 338 'm' the file was only in the dirstate and not in the tree
339 339 'b' file was not found and matched badmatch
340 340
341 341 and st is the stat result if the file was found in the directory.
342 342 '''
343 343
344 344 # walk all files by default
345 345 if not files:
346 346 files = ['.']
347 347 dc = self._map.copy()
348 348 else:
349 349 files = util.unique(files)
350 350 dc = self._filter(files)
351 351
352 352 def imatch(file_):
353 353 if file_ not in dc and self._ignore(file_):
354 354 return False
355 355 return match(file_)
356 356
357 357 ignore = self._ignore
358 358 if ignored:
359 359 imatch = match
360 360 ignore = util.never
361 361
362 362 # self._root may end with a path separator when self._root == '/'
363 363 common_prefix_len = len(self._root)
364 364 if not self._root.endswith(os.sep):
365 365 common_prefix_len += 1
366 366
367 367 normpath = util.normpath
368 368 listdir = os.listdir
369 369 lstat = os.lstat
370 370 bisect_left = bisect.bisect_left
371 371 isdir = os.path.isdir
372 372 pconvert = util.pconvert
373 373 join = os.path.join
374 374 s_isdir = stat.S_ISDIR
375 375 supported = self._supported
376 376 _join = self._join
377 377 known = {'.hg': 1}
378 378
379 379 # recursion free walker, faster than os.walk.
380 380 def findfiles(s):
381 381 work = [s]
382 wadd = work.append
383 found = []
384 add = found.append
382 385 if directories:
383 yield 'd', normpath(s[common_prefix_len:]), lstat(s)
386 add((normpath(s[common_prefix_len:]), 'd', lstat(s)))
384 387 while work:
385 388 top = work.pop()
386 389 names = listdir(top)
387 390 names.sort()
388 391 # nd is the top of the repository dir tree
389 392 nd = normpath(top[common_prefix_len:])
390 393 if nd == '.':
391 394 nd = ''
392 395 else:
393 396 # do not recurse into a repo contained in this
394 397 # one. use bisect to find .hg directory so speed
395 398 # is good on big directory.
396 399 hg = bisect_left(names, '.hg')
397 400 if hg < len(names) and names[hg] == '.hg':
398 401 if isdir(join(top, '.hg')):
399 402 continue
400 403 for f in names:
401 404 np = pconvert(join(nd, f))
402 405 if np in known:
403 406 continue
404 407 known[np] = 1
405 408 p = join(top, f)
406 409 # don't trip over symlinks
407 410 st = lstat(p)
408 411 if s_isdir(st.st_mode):
409 412 if not ignore(np):
410 work.append(p)
413 wadd(p)
411 414 if directories:
412 yield 'd', np, st
415 add((np, 'd', st))
413 416 if np in dc and match(np):
414 yield 'm', np, st
417 add((np, 'm', st))
415 418 elif imatch(np):
416 419 if supported(np, st.st_mode):
417 yield 'f', np, st
420 add((np, 'f', st))
418 421 elif np in dc:
419 yield 'm', np, st
422 add((np, 'm', st))
423 found.sort()
424 return found
420 425
421 426 # step one, find all files that match our criteria
422 427 files.sort()
423 428 for ff in files:
424 429 nf = normpath(ff)
425 430 f = _join(ff)
426 431 try:
427 432 st = lstat(f)
428 433 except OSError, inst:
429 434 found = False
430 435 for fn in dc:
431 436 if nf == fn or (fn.startswith(nf) and fn[len(nf)] == '/'):
432 437 found = True
433 438 break
434 439 if not found:
435 440 if inst.errno != errno.ENOENT or not badmatch:
436 441 self._ui.warn('%s: %s\n' %
437 442 (self.pathto(ff), inst.strerror))
438 443 elif badmatch and badmatch(ff) and imatch(nf):
439 444 yield 'b', ff, None
440 445 continue
441 446 if s_isdir(st.st_mode):
442 cmp1 = (lambda x, y: cmp(x[1], y[1]))
443 sorted_ = [ x for x in findfiles(f) ]
444 sorted_.sort(cmp1)
445 for e in sorted_:
446 yield e
447 for f, src, st in findfiles(f):
448 yield src, f, st
447 449 else:
448 450 if nf in known:
449 451 continue
450 452 known[nf] = 1
451 453 if match(nf):
452 454 if supported(ff, st.st_mode, verbose=True):
453 455 yield 'f', nf, st
454 456 elif ff in dc:
455 457 yield 'm', nf, st
456 458
457 459 # step two run through anything left in the dc hash and yield
458 460 # if we haven't already seen it
459 461 ks = dc.keys()
460 462 ks.sort()
461 463 for k in ks:
462 464 if k in known:
463 465 continue
464 466 known[k] = 1
465 467 if imatch(k):
466 468 yield 'm', k, None
467 469
468 470 def status(self, files, match, list_ignored, list_clean):
469 471 lookup, modified, added, unknown, ignored = [], [], [], [], []
470 472 removed, deleted, clean = [], [], []
471 473
472 474 for src, fn, st in self.statwalk(files, match, ignored=list_ignored):
473 475 try:
474 476 type_, mode, size, time = self._map[fn]
475 477 except KeyError:
476 478 if list_ignored and self._ignore(fn):
477 479 ignored.append(fn)
478 480 else:
479 481 unknown.append(fn)
480 482 continue
481 483 if src == 'm':
482 484 nonexistent = True
483 485 if not st:
484 486 try:
485 487 st = os.lstat(self._join(fn))
486 488 except OSError, inst:
487 489 if inst.errno != errno.ENOENT:
488 490 raise
489 491 st = None
490 492 # We need to re-check that it is a valid file
491 493 if st and self._supported(fn, st.st_mode):
492 494 nonexistent = False
493 495 # XXX: what to do with file no longer present in the fs
494 496 # who are not removed in the dirstate ?
495 497 if nonexistent and type_ in "nm":
496 498 deleted.append(fn)
497 499 continue
498 500 # check the common case first
499 501 if type_ == 'n':
500 502 if not st:
501 503 st = os.lstat(self._join(fn))
502 504 if (size >= 0 and (size != st.st_size
503 505 or (mode ^ st.st_mode) & 0100)
504 506 or fn in self._copymap):
505 507 modified.append(fn)
506 508 elif time != int(st.st_mtime):
507 509 lookup.append(fn)
508 510 elif list_clean:
509 511 clean.append(fn)
510 512 elif type_ == 'm':
511 513 modified.append(fn)
512 514 elif type_ == 'a':
513 515 added.append(fn)
514 516 elif type_ == 'r':
515 517 removed.append(fn)
516 518
517 519 return (lookup, modified, added, removed, deleted, unknown, ignored,
518 520 clean)
General Comments 0
You need to be logged in to leave comments. Login now