##// END OF EJS Templates
dirstate: speed up write by 50%.
Bryan O'Sullivan -
r4374:9edc2d6f default
parent child Browse files
Show More
@@ -1,577 +1,580 b''
1 1 """
2 2 dirstate.py - working directory tracking for mercurial
3 3
4 4 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
5 5
6 6 This software may be used and distributed according to the terms
7 7 of the GNU General Public License, incorporated herein by reference.
8 8 """
9 9
10 10 from node import *
11 11 from i18n import _
12 12 import struct, os, time, bisect, stat, strutil, util, re, errno
13 import cStringIO
13 14
14 15 class dirstate(object):
15 16 format = ">cllll"
16 17
17 18 def __init__(self, opener, ui, root):
18 19 self.opener = opener
19 20 self.root = root
20 21 self.dirty = 0
21 22 self.ui = ui
22 23 self.map = None
23 24 self.fp = None
24 25 self.pl = None
25 26 self.dirs = None
26 27 self.copymap = {}
27 28 self.ignorefunc = None
28 29 self._branch = None
29 30
30 31 def wjoin(self, f):
31 32 return os.path.join(self.root, f)
32 33
33 34 def getcwd(self):
34 35 cwd = os.getcwd()
35 36 if cwd == self.root: return ''
36 37 # self.root ends with a path separator if self.root is '/' or 'C:\'
37 38 rootsep = self.root
38 39 if not rootsep.endswith(os.sep):
39 40 rootsep += os.sep
40 41 if cwd.startswith(rootsep):
41 42 return cwd[len(rootsep):]
42 43 else:
43 44 # we're outside the repo. return an absolute path.
44 45 return cwd
45 46
46 47 def hgignore(self):
47 48 '''return the contents of .hgignore files as a list of patterns.
48 49
49 50 the files parsed for patterns include:
50 51 .hgignore in the repository root
51 52 any additional files specified in the [ui] section of ~/.hgrc
52 53
53 54 trailing white space is dropped.
54 55 the escape character is backslash.
55 56 comments start with #.
56 57 empty lines are skipped.
57 58
58 59 lines can be of the following formats:
59 60
60 61 syntax: regexp # defaults following lines to non-rooted regexps
61 62 syntax: glob # defaults following lines to non-rooted globs
62 63 re:pattern # non-rooted regular expression
63 64 glob:pattern # non-rooted glob
64 65 pattern # pattern of the current default type'''
65 66 syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:'}
66 67 def parselines(fp):
67 68 for line in fp:
68 69 escape = False
69 70 for i in xrange(len(line)):
70 71 if escape: escape = False
71 72 elif line[i] == '\\': escape = True
72 73 elif line[i] == '#': break
73 74 line = line[:i].rstrip()
74 75 if line: yield line
75 76 repoignore = self.wjoin('.hgignore')
76 77 files = [repoignore]
77 78 files.extend(self.ui.hgignorefiles())
78 79 pats = {}
79 80 for f in files:
80 81 try:
81 82 pats[f] = []
82 83 fp = open(f)
83 84 syntax = 'relre:'
84 85 for line in parselines(fp):
85 86 if line.startswith('syntax:'):
86 87 s = line[7:].strip()
87 88 try:
88 89 syntax = syntaxes[s]
89 90 except KeyError:
90 91 self.ui.warn(_("%s: ignoring invalid "
91 92 "syntax '%s'\n") % (f, s))
92 93 continue
93 94 pat = syntax + line
94 95 for s in syntaxes.values():
95 96 if line.startswith(s):
96 97 pat = line
97 98 break
98 99 pats[f].append(pat)
99 100 except IOError, inst:
100 101 if f != repoignore:
101 102 self.ui.warn(_("skipping unreadable ignore file"
102 103 " '%s': %s\n") % (f, inst.strerror))
103 104 return pats
104 105
105 106 def ignore(self, fn):
106 107 '''default match function used by dirstate and
107 108 localrepository. this honours the repository .hgignore file
108 109 and any other files specified in the [ui] section of .hgrc.'''
109 110 if not self.ignorefunc:
110 111 ignore = self.hgignore()
111 112 allpats = []
112 113 [allpats.extend(patlist) for patlist in ignore.values()]
113 114 if allpats:
114 115 try:
115 116 files, self.ignorefunc, anypats = (
116 117 util.matcher(self.root, inc=allpats, src='.hgignore'))
117 118 except util.Abort:
118 119 # Re-raise an exception where the src is the right file
119 120 for f, patlist in ignore.items():
120 121 files, self.ignorefunc, anypats = (
121 122 util.matcher(self.root, inc=patlist, src=f))
122 123 else:
123 124 self.ignorefunc = util.never
124 125 return self.ignorefunc(fn)
125 126
126 127 def __del__(self):
127 128 if self.dirty:
128 129 self.write()
129 130
130 131 def __getitem__(self, key):
131 132 try:
132 133 return self.map[key]
133 134 except TypeError:
134 135 self.lazyread()
135 136 return self[key]
136 137
137 138 _unknown = ('?', 0, 0, 0)
138 139
139 140 def get(self, key):
140 141 try:
141 142 return self[key]
142 143 except KeyError:
143 144 return self._unknown
144 145
145 146 def __contains__(self, key):
146 147 self.lazyread()
147 148 return key in self.map
148 149
149 150 def parents(self):
150 151 if self.pl is None:
151 152 self.pl = [nullid, nullid]
152 153 try:
153 154 self.fp = self.opener('dirstate')
154 155 st = self.fp.read(40)
155 156 if len(st) == 40:
156 157 self.pl = st[:20], st[20:40]
157 158 except IOError, err:
158 159 if err.errno != errno.ENOENT: raise
159 160 return self.pl
160 161
161 162 def branch(self):
162 163 if not self._branch:
163 164 try:
164 165 self._branch = self.opener("branch").read().strip()\
165 166 or "default"
166 167 except IOError:
167 168 self._branch = "default"
168 169 return self._branch
169 170
170 171 def markdirty(self):
171 172 if not self.dirty:
172 173 self.dirty = 1
173 174
174 175 def setparents(self, p1, p2=nullid):
175 176 self.lazyread()
176 177 self.markdirty()
177 178 self.pl = p1, p2
178 179
179 180 def setbranch(self, branch):
180 181 self._branch = branch
181 182 self.opener("branch", "w").write(branch + '\n')
182 183
183 184 def state(self, key):
184 185 try:
185 186 return self[key][0]
186 187 except KeyError:
187 188 return "?"
188 189
189 190 def lazyread(self):
190 191 if self.map is None:
191 192 self.read()
192 193
193 194 def parse(self, st):
194 195 self.pl = [st[:20], st[20: 40]]
195 196
196 197 # deref fields so they will be local in loop
197 198 map = self.map
198 199 copymap = self.copymap
199 200 format = self.format
200 201 unpack = struct.unpack
201 202
202 203 pos = 40
203 204 e_size = struct.calcsize(format)
204 205
205 206 while pos < len(st):
206 207 newpos = pos + e_size
207 208 e = unpack(format, st[pos:newpos])
208 209 l = e[4]
209 210 pos = newpos
210 211 newpos = pos + l
211 212 f = st[pos:newpos]
212 213 if '\0' in f:
213 214 f, c = f.split('\0')
214 215 copymap[f] = c
215 216 map[f] = e[:4]
216 217 pos = newpos
217 218
218 219 def read(self):
219 220 self.map = {}
220 221 self.pl = [nullid, nullid]
221 222 try:
222 223 if self.fp:
223 224 self.fp.seek(0)
224 225 st = self.fp.read()
225 226 self.fp = None
226 227 else:
227 228 st = self.opener("dirstate").read()
228 229 if st:
229 230 self.parse(st)
230 231 except IOError, err:
231 232 if err.errno != errno.ENOENT: raise
232 233
233 234 def copy(self, source, dest):
234 235 self.lazyread()
235 236 self.markdirty()
236 237 self.copymap[dest] = source
237 238
238 239 def copied(self, file):
239 240 return self.copymap.get(file, None)
240 241
241 242 def copies(self):
242 243 return self.copymap
243 244
244 245 def initdirs(self):
245 246 if self.dirs is None:
246 247 self.dirs = {}
247 248 for f in self.map:
248 249 self.updatedirs(f, 1)
249 250
250 251 def updatedirs(self, path, delta):
251 252 if self.dirs is not None:
252 253 for c in strutil.findall(path, '/'):
253 254 pc = path[:c]
254 255 self.dirs.setdefault(pc, 0)
255 256 self.dirs[pc] += delta
256 257
257 258 def checkinterfering(self, files):
258 259 def prefixes(f):
259 260 for c in strutil.rfindall(f, '/'):
260 261 yield f[:c]
261 262 self.lazyread()
262 263 self.initdirs()
263 264 seendirs = {}
264 265 for f in files:
265 266 # shadows
266 267 if self.dirs.get(f):
267 268 raise util.Abort(_('directory named %r already in dirstate') %
268 269 f)
269 270 for d in prefixes(f):
270 271 if d in seendirs:
271 272 break
272 273 if d in self.map:
273 274 raise util.Abort(_('file named %r already in dirstate') %
274 275 d)
275 276 seendirs[d] = True
276 277 # disallowed
277 278 if '\r' in f or '\n' in f:
278 279 raise util.Abort(_("'\\n' and '\\r' disallowed in filenames"))
279 280
280 281 def update(self, files, state, **kw):
281 282 ''' current states:
282 283 n normal
283 284 m needs merging
284 285 r marked for removal
285 286 a marked for addition'''
286 287
287 288 if not files: return
288 289 self.lazyread()
289 290 self.markdirty()
290 291 if state == "a":
291 292 self.initdirs()
292 293 self.checkinterfering(files)
293 294 for f in files:
294 295 if state == "r":
295 296 self.map[f] = ('r', 0, 0, 0)
296 297 self.updatedirs(f, -1)
297 298 else:
298 299 if state == "a":
299 300 self.updatedirs(f, 1)
300 301 s = os.lstat(self.wjoin(f))
301 302 st_size = kw.get('st_size', s.st_size)
302 303 st_mtime = kw.get('st_mtime', s.st_mtime)
303 304 self.map[f] = (state, s.st_mode, st_size, st_mtime)
304 305 if self.copymap.has_key(f):
305 306 del self.copymap[f]
306 307
307 308 def forget(self, files):
308 309 if not files: return
309 310 self.lazyread()
310 311 self.markdirty()
311 312 self.initdirs()
312 313 for f in files:
313 314 try:
314 315 del self.map[f]
315 316 self.updatedirs(f, -1)
316 317 except KeyError:
317 318 self.ui.warn(_("not in dirstate: %s!\n") % f)
318 319 pass
319 320
320 321 def clear(self):
321 322 self.map = {}
322 323 self.copymap = {}
323 324 self.dirs = None
324 325 self.markdirty()
325 326
326 327 def rebuild(self, parent, files):
327 328 self.clear()
328 329 for f in files:
329 330 if files.execf(f):
330 331 self.map[f] = ('n', 0777, -1, 0)
331 332 else:
332 333 self.map[f] = ('n', 0666, -1, 0)
333 334 self.pl = (parent, nullid)
334 335 self.markdirty()
335 336
336 337 def write(self):
337 338 if not self.dirty:
338 339 return
339 st = self.opener("dirstate", "w", atomictemp=True)
340 st.write("".join(self.pl))
341 for f, e in self.map.items():
340 cs = cStringIO.StringIO()
341 cs.write("".join(self.pl))
342 for f, e in self.map.iteritems():
342 343 c = self.copied(f)
343 344 if c:
344 345 f = f + "\0" + c
345 346 e = struct.pack(self.format, e[0], e[1], e[2], e[3], len(f))
346 st.write(e + f)
347 st.rename()
347 cs.write(e)
348 cs.write(f)
349 st = self.opener("dirstate", "w", atomic=True)
350 st.write(cs.getvalue())
348 351 self.dirty = 0
349 352
350 353 def filterfiles(self, files):
351 354 ret = {}
352 355 unknown = []
353 356
354 357 for x in files:
355 358 if x == '.':
356 359 return self.map.copy()
357 360 if x not in self.map:
358 361 unknown.append(x)
359 362 else:
360 363 ret[x] = self.map[x]
361 364
362 365 if not unknown:
363 366 return ret
364 367
365 368 b = self.map.keys()
366 369 b.sort()
367 370 blen = len(b)
368 371
369 372 for x in unknown:
370 373 bs = bisect.bisect(b, "%s%s" % (x, '/'))
371 374 while bs < blen:
372 375 s = b[bs]
373 376 if len(s) > len(x) and s.startswith(x):
374 377 ret[s] = self.map[s]
375 378 else:
376 379 break
377 380 bs += 1
378 381 return ret
379 382
380 383 def supported_type(self, f, st, verbose=False):
381 384 if stat.S_ISREG(st.st_mode) or stat.S_ISLNK(st.st_mode):
382 385 return True
383 386 if verbose:
384 387 kind = 'unknown'
385 388 if stat.S_ISCHR(st.st_mode): kind = _('character device')
386 389 elif stat.S_ISBLK(st.st_mode): kind = _('block device')
387 390 elif stat.S_ISFIFO(st.st_mode): kind = _('fifo')
388 391 elif stat.S_ISSOCK(st.st_mode): kind = _('socket')
389 392 elif stat.S_ISDIR(st.st_mode): kind = _('directory')
390 393 self.ui.warn(_('%s: unsupported file type (type is %s)\n') % (
391 394 util.pathto(self.root, self.getcwd(), f),
392 395 kind))
393 396 return False
394 397
395 398 def walk(self, files=None, match=util.always, badmatch=None):
396 399 # filter out the stat
397 400 for src, f, st in self.statwalk(files, match, badmatch=badmatch):
398 401 yield src, f
399 402
400 403 def statwalk(self, files=None, match=util.always, ignored=False,
401 404 badmatch=None, directories=False):
402 405 '''
403 406 walk recursively through the directory tree, finding all files
404 407 matched by the match function
405 408
406 409 results are yielded in a tuple (src, filename, st), where src
407 410 is one of:
408 411 'f' the file was found in the directory tree
409 412 'd' the file is a directory of the tree
410 413 'm' the file was only in the dirstate and not in the tree
411 414 'b' file was not found and matched badmatch
412 415
413 416 and st is the stat result if the file was found in the directory.
414 417 '''
415 418 self.lazyread()
416 419
417 420 # walk all files by default
418 421 if not files:
419 422 files = ['.']
420 423 dc = self.map.copy()
421 424 else:
422 425 files = util.unique(files)
423 426 dc = self.filterfiles(files)
424 427
425 428 def imatch(file_):
426 429 if file_ not in dc and self.ignore(file_):
427 430 return False
428 431 return match(file_)
429 432
430 433 ignore = self.ignore
431 434 if ignored:
432 435 imatch = match
433 436 ignore = util.never
434 437
435 438 # self.root may end with a path separator when self.root == '/'
436 439 common_prefix_len = len(self.root)
437 440 if not self.root.endswith(os.sep):
438 441 common_prefix_len += 1
439 442 # recursion free walker, faster than os.walk.
440 443 def findfiles(s):
441 444 work = [s]
442 445 if directories:
443 446 yield 'd', util.normpath(s[common_prefix_len:]), os.lstat(s)
444 447 while work:
445 448 top = work.pop()
446 449 names = os.listdir(top)
447 450 names.sort()
448 451 # nd is the top of the repository dir tree
449 452 nd = util.normpath(top[common_prefix_len:])
450 453 if nd == '.':
451 454 nd = ''
452 455 else:
453 456 # do not recurse into a repo contained in this
454 457 # one. use bisect to find .hg directory so speed
455 458 # is good on big directory.
456 459 hg = bisect.bisect_left(names, '.hg')
457 460 if hg < len(names) and names[hg] == '.hg':
458 461 if os.path.isdir(os.path.join(top, '.hg')):
459 462 continue
460 463 for f in names:
461 464 np = util.pconvert(os.path.join(nd, f))
462 465 if seen(np):
463 466 continue
464 467 p = os.path.join(top, f)
465 468 # don't trip over symlinks
466 469 st = os.lstat(p)
467 470 if stat.S_ISDIR(st.st_mode):
468 471 if not ignore(np):
469 472 work.append(p)
470 473 if directories:
471 474 yield 'd', np, st
472 475 if imatch(np) and np in dc:
473 476 yield 'm', np, st
474 477 elif imatch(np):
475 478 if self.supported_type(np, st):
476 479 yield 'f', np, st
477 480 elif np in dc:
478 481 yield 'm', np, st
479 482
480 483 known = {'.hg': 1}
481 484 def seen(fn):
482 485 if fn in known: return True
483 486 known[fn] = 1
484 487
485 488 # step one, find all files that match our criteria
486 489 files.sort()
487 490 for ff in files:
488 491 nf = util.normpath(ff)
489 492 f = self.wjoin(ff)
490 493 try:
491 494 st = os.lstat(f)
492 495 except OSError, inst:
493 496 found = False
494 497 for fn in dc:
495 498 if nf == fn or (fn.startswith(nf) and fn[len(nf)] == '/'):
496 499 found = True
497 500 break
498 501 if not found:
499 502 if inst.errno != errno.ENOENT or not badmatch:
500 503 self.ui.warn('%s: %s\n' % (
501 504 util.pathto(self.root, self.getcwd(), ff),
502 505 inst.strerror))
503 506 elif badmatch and badmatch(ff) and imatch(nf):
504 507 yield 'b', ff, None
505 508 continue
506 509 if stat.S_ISDIR(st.st_mode):
507 510 cmp1 = (lambda x, y: cmp(x[1], y[1]))
508 511 sorted_ = [ x for x in findfiles(f) ]
509 512 sorted_.sort(cmp1)
510 513 for e in sorted_:
511 514 yield e
512 515 else:
513 516 if not seen(nf) and match(nf):
514 517 if self.supported_type(ff, st, verbose=True):
515 518 yield 'f', nf, st
516 519 elif ff in dc:
517 520 yield 'm', nf, st
518 521
519 522 # step two run through anything left in the dc hash and yield
520 523 # if we haven't already seen it
521 524 ks = dc.keys()
522 525 ks.sort()
523 526 for k in ks:
524 527 if not seen(k) and imatch(k):
525 528 yield 'm', k, None
526 529
527 530 def status(self, files=None, match=util.always, list_ignored=False,
528 531 list_clean=False):
529 532 lookup, modified, added, unknown, ignored = [], [], [], [], []
530 533 removed, deleted, clean = [], [], []
531 534
532 535 for src, fn, st in self.statwalk(files, match, ignored=list_ignored):
533 536 try:
534 537 type_, mode, size, time = self[fn]
535 538 except KeyError:
536 539 if list_ignored and self.ignore(fn):
537 540 ignored.append(fn)
538 541 else:
539 542 unknown.append(fn)
540 543 continue
541 544 if src == 'm':
542 545 nonexistent = True
543 546 if not st:
544 547 try:
545 548 st = os.lstat(self.wjoin(fn))
546 549 except OSError, inst:
547 550 if inst.errno != errno.ENOENT:
548 551 raise
549 552 st = None
550 553 # We need to re-check that it is a valid file
551 554 if st and self.supported_type(fn, st):
552 555 nonexistent = False
553 556 # XXX: what to do with file no longer present in the fs
554 557 # who are not removed in the dirstate ?
555 558 if nonexistent and type_ in "nm":
556 559 deleted.append(fn)
557 560 continue
558 561 # check the common case first
559 562 if type_ == 'n':
560 563 if not st:
561 564 st = os.lstat(self.wjoin(fn))
562 565 if size >= 0 and (size != st.st_size
563 566 or (mode ^ st.st_mode) & 0100):
564 567 modified.append(fn)
565 568 elif time != int(st.st_mtime):
566 569 lookup.append(fn)
567 570 elif list_clean:
568 571 clean.append(fn)
569 572 elif type_ == 'm':
570 573 modified.append(fn)
571 574 elif type_ == 'a':
572 575 added.append(fn)
573 576 elif type_ == 'r':
574 577 removed.append(fn)
575 578
576 579 return (lookup, modified, added, removed, deleted, unknown, ignored,
577 580 clean)
General Comments 0
You need to be logged in to leave comments. Login now