##// END OF EJS Templates
dirstate: refactor the dirstate binary format, remove magic numbers
Benoit Boissinot -
r2393:5083cba2 default
parent child Browse files
Show More
@@ -1,473 +1,476 b''
1 1 """
2 2 dirstate.py - working directory tracking for mercurial
3 3
4 4 Copyright 2005 Matt Mackall <mpm@selenic.com>
5 5
6 6 This software may be used and distributed according to the terms
7 7 of the GNU General Public License, incorporated herein by reference.
8 8 """
9 9
10 10 import struct, os
11 11 from node import *
12 12 from i18n import gettext as _
13 13 from demandload import *
14 14 demandload(globals(), "time bisect stat util re errno")
15 15
16 16 class dirstate(object):
17 format = ">cllll"
18
17 19 def __init__(self, opener, ui, root):
18 20 self.opener = opener
19 21 self.root = root
20 22 self.dirty = 0
21 23 self.ui = ui
22 24 self.map = None
23 25 self.pl = None
24 26 self.copies = {}
25 27 self.ignorefunc = None
26 28 self.blockignore = False
27 29
28 30 def wjoin(self, f):
29 31 return os.path.join(self.root, f)
30 32
31 33 def getcwd(self):
32 34 cwd = os.getcwd()
33 35 if cwd == self.root: return ''
34 36 return cwd[len(self.root) + 1:]
35 37
36 38 def hgignore(self):
37 39 '''return the contents of .hgignore files as a list of patterns.
38 40
39 41 the files parsed for patterns include:
40 42 .hgignore in the repository root
41 43 any additional files specified in the [ui] section of ~/.hgrc
42 44
43 45 trailing white space is dropped.
44 46 the escape character is backslash.
45 47 comments start with #.
46 48 empty lines are skipped.
47 49
48 50 lines can be of the following formats:
49 51
50 52 syntax: regexp # defaults following lines to non-rooted regexps
51 53 syntax: glob # defaults following lines to non-rooted globs
52 54 re:pattern # non-rooted regular expression
53 55 glob:pattern # non-rooted glob
54 56 pattern # pattern of the current default type'''
55 57 syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:'}
56 58 def parselines(fp):
57 59 for line in fp:
58 60 escape = False
59 61 for i in xrange(len(line)):
60 62 if escape: escape = False
61 63 elif line[i] == '\\': escape = True
62 64 elif line[i] == '#': break
63 65 line = line[:i].rstrip()
64 66 if line: yield line
65 67 repoignore = self.wjoin('.hgignore')
66 68 files = [repoignore]
67 69 files.extend(self.ui.hgignorefiles())
68 70 pats = {}
69 71 for f in files:
70 72 try:
71 73 pats[f] = []
72 74 fp = open(f)
73 75 syntax = 'relre:'
74 76 for line in parselines(fp):
75 77 if line.startswith('syntax:'):
76 78 s = line[7:].strip()
77 79 try:
78 80 syntax = syntaxes[s]
79 81 except KeyError:
80 82 self.ui.warn(_("%s: ignoring invalid "
81 83 "syntax '%s'\n") % (f, s))
82 84 continue
83 85 pat = syntax + line
84 86 for s in syntaxes.values():
85 87 if line.startswith(s):
86 88 pat = line
87 89 break
88 90 pats[f].append(pat)
89 91 except IOError, inst:
90 92 if f != repoignore:
91 93 self.ui.warn(_("skipping unreadable ignore file"
92 94 " '%s': %s\n") % (f, inst.strerror))
93 95 return pats
94 96
95 97 def ignore(self, fn):
96 98 '''default match function used by dirstate and
97 99 localrepository. this honours the repository .hgignore file
98 100 and any other files specified in the [ui] section of .hgrc.'''
99 101 if self.blockignore:
100 102 return False
101 103 if not self.ignorefunc:
102 104 ignore = self.hgignore()
103 105 allpats = []
104 106 [allpats.extend(patlist) for patlist in ignore.values()]
105 107 if allpats:
106 108 try:
107 109 files, self.ignorefunc, anypats = (
108 110 util.matcher(self.root, inc=allpats, src='.hgignore'))
109 111 except util.Abort:
110 112 # Re-raise an exception where the src is the right file
111 113 for f, patlist in ignore.items():
112 114 files, self.ignorefunc, anypats = (
113 115 util.matcher(self.root, inc=patlist, src=f))
114 116 else:
115 117 self.ignorefunc = util.never
116 118 return self.ignorefunc(fn)
117 119
118 120 def __del__(self):
119 121 if self.dirty:
120 122 self.write()
121 123
122 124 def __getitem__(self, key):
123 125 try:
124 126 return self.map[key]
125 127 except TypeError:
126 128 self.lazyread()
127 129 return self[key]
128 130
129 131 def __contains__(self, key):
130 132 self.lazyread()
131 133 return key in self.map
132 134
133 135 def parents(self):
134 136 self.lazyread()
135 137 return self.pl
136 138
137 139 def markdirty(self):
138 140 if not self.dirty:
139 141 self.dirty = 1
140 142
141 143 def setparents(self, p1, p2=nullid):
142 144 self.lazyread()
143 145 self.markdirty()
144 146 self.pl = p1, p2
145 147
146 148 def state(self, key):
147 149 try:
148 150 return self[key][0]
149 151 except KeyError:
150 152 return "?"
151 153
152 154 def lazyread(self):
153 155 if self.map is None:
154 156 self.read()
155 157
156 158 def read(self):
157 159 self.map = {}
158 160 self.pl = [nullid, nullid]
159 161 try:
160 162 st = self.opener("dirstate").read()
161 163 if not st: return
162 164 except: return
163 165
164 166 self.pl = [st[:20], st[20: 40]]
165 167
166 168 pos = 40
169 e_size = struct.calcsize(self.format)
167 170 while pos < len(st):
168 e = struct.unpack(">cllll", st[pos:pos+17])
171 e = struct.unpack(self.format, st[pos:pos+e_size])
169 172 l = e[4]
170 pos += 17
173 pos += e_size
171 174 f = st[pos:pos + l]
172 175 if '\0' in f:
173 176 f, c = f.split('\0')
174 177 self.copies[f] = c
175 178 self.map[f] = e[:4]
176 179 pos += l
177 180
178 181 def copy(self, source, dest):
179 182 self.lazyread()
180 183 self.markdirty()
181 184 self.copies[dest] = source
182 185
183 186 def copied(self, file):
184 187 return self.copies.get(file, None)
185 188
186 189 def update(self, files, state, **kw):
187 190 ''' current states:
188 191 n normal
189 192 m needs merging
190 193 r marked for removal
191 194 a marked for addition'''
192 195
193 196 if not files: return
194 197 self.lazyread()
195 198 self.markdirty()
196 199 for f in files:
197 200 if state == "r":
198 201 self.map[f] = ('r', 0, 0, 0)
199 202 else:
200 203 s = os.lstat(self.wjoin(f))
201 204 st_size = kw.get('st_size', s.st_size)
202 205 st_mtime = kw.get('st_mtime', s.st_mtime)
203 206 self.map[f] = (state, s.st_mode, st_size, st_mtime)
204 207 if self.copies.has_key(f):
205 208 del self.copies[f]
206 209
207 210 def forget(self, files):
208 211 if not files: return
209 212 self.lazyread()
210 213 self.markdirty()
211 214 for f in files:
212 215 try:
213 216 del self.map[f]
214 217 except KeyError:
215 218 self.ui.warn(_("not in dirstate: %s!\n") % f)
216 219 pass
217 220
218 221 def clear(self):
219 222 self.map = {}
220 223 self.copies = {}
221 224 self.markdirty()
222 225
223 226 def rebuild(self, parent, files):
224 227 self.clear()
225 228 umask = os.umask(0)
226 229 os.umask(umask)
227 230 for f, mode in files:
228 231 if mode:
229 232 self.map[f] = ('n', ~umask, -1, 0)
230 233 else:
231 234 self.map[f] = ('n', ~umask & 0666, -1, 0)
232 235 self.pl = (parent, nullid)
233 236 self.markdirty()
234 237
235 238 def write(self):
236 239 if not self.dirty:
237 240 return
238 241 st = self.opener("dirstate", "w", atomic=True)
239 242 st.write("".join(self.pl))
240 243 for f, e in self.map.items():
241 244 c = self.copied(f)
242 245 if c:
243 246 f = f + "\0" + c
244 e = struct.pack(">cllll", e[0], e[1], e[2], e[3], len(f))
247 e = struct.pack(self.format, e[0], e[1], e[2], e[3], len(f))
245 248 st.write(e + f)
246 249 self.dirty = 0
247 250
248 251 def filterfiles(self, files):
249 252 ret = {}
250 253 unknown = []
251 254
252 255 for x in files:
253 256 if x == '.':
254 257 return self.map.copy()
255 258 if x not in self.map:
256 259 unknown.append(x)
257 260 else:
258 261 ret[x] = self.map[x]
259 262
260 263 if not unknown:
261 264 return ret
262 265
263 266 b = self.map.keys()
264 267 b.sort()
265 268 blen = len(b)
266 269
267 270 for x in unknown:
268 271 bs = bisect.bisect(b, x)
269 272 if bs != 0 and b[bs-1] == x:
270 273 ret[x] = self.map[x]
271 274 continue
272 275 while bs < blen:
273 276 s = b[bs]
274 277 if len(s) > len(x) and s.startswith(x) and s[len(x)] == '/':
275 278 ret[s] = self.map[s]
276 279 else:
277 280 break
278 281 bs += 1
279 282 return ret
280 283
281 284 def supported_type(self, f, st, verbose=False):
282 285 if stat.S_ISREG(st.st_mode):
283 286 return True
284 287 if verbose:
285 288 kind = 'unknown'
286 289 if stat.S_ISCHR(st.st_mode): kind = _('character device')
287 290 elif stat.S_ISBLK(st.st_mode): kind = _('block device')
288 291 elif stat.S_ISFIFO(st.st_mode): kind = _('fifo')
289 292 elif stat.S_ISLNK(st.st_mode): kind = _('symbolic link')
290 293 elif stat.S_ISSOCK(st.st_mode): kind = _('socket')
291 294 elif stat.S_ISDIR(st.st_mode): kind = _('directory')
292 295 self.ui.warn(_('%s: unsupported file type (type is %s)\n') % (
293 296 util.pathto(self.getcwd(), f),
294 297 kind))
295 298 return False
296 299
297 300 def statwalk(self, files=None, match=util.always, dc=None, ignored=False,
298 301 badmatch=None):
299 302 self.lazyread()
300 303
301 304 # walk all files by default
302 305 if not files:
303 306 files = [self.root]
304 307 if not dc:
305 308 dc = self.map.copy()
306 309 elif not dc:
307 310 dc = self.filterfiles(files)
308 311
309 312 def statmatch(file_, stat):
310 313 file_ = util.pconvert(file_)
311 314 if not ignored and file_ not in dc and self.ignore(file_):
312 315 return False
313 316 return match(file_)
314 317
315 318 return self.walkhelper(files=files, statmatch=statmatch, dc=dc,
316 319 badmatch=badmatch)
317 320
318 321 def walk(self, files=None, match=util.always, dc=None, badmatch=None):
319 322 # filter out the stat
320 323 for src, f, st in self.statwalk(files, match, dc, badmatch=badmatch):
321 324 yield src, f
322 325
323 326 # walk recursively through the directory tree, finding all files
324 327 # matched by the statmatch function
325 328 #
326 329 # results are yielded in a tuple (src, filename, st), where src
327 330 # is one of:
328 331 # 'f' the file was found in the directory tree
329 332 # 'm' the file was only in the dirstate and not in the tree
330 333 # and st is the stat result if the file was found in the directory.
331 334 #
332 335 # dc is an optional arg for the current dirstate. dc is not modified
333 336 # directly by this function, but might be modified by your statmatch call.
334 337 #
335 338 def walkhelper(self, files, statmatch, dc, badmatch=None):
336 339 # recursion free walker, faster than os.walk.
337 340 def findfiles(s):
338 341 work = [s]
339 342 while work:
340 343 top = work.pop()
341 344 names = os.listdir(top)
342 345 names.sort()
343 346 # nd is the top of the repository dir tree
344 347 nd = util.normpath(top[len(self.root) + 1:])
345 348 if nd == '.':
346 349 nd = ''
347 350 else:
348 351 # do not recurse into a repo contained in this
349 352 # one. use bisect to find .hg directory so speed
350 353 # is good on big directory.
351 354 hg = bisect.bisect_left(names, '.hg')
352 355 if hg < len(names) and names[hg] == '.hg':
353 356 if os.path.isdir(os.path.join(top, '.hg')):
354 357 continue
355 358 for f in names:
356 359 np = util.pconvert(os.path.join(nd, f))
357 360 if seen(np):
358 361 continue
359 362 p = os.path.join(top, f)
360 363 # don't trip over symlinks
361 364 st = os.lstat(p)
362 365 if stat.S_ISDIR(st.st_mode):
363 366 ds = os.path.join(nd, f +'/')
364 367 if statmatch(ds, st):
365 368 work.append(p)
366 369 if statmatch(np, st) and np in dc:
367 370 yield 'm', np, st
368 371 elif statmatch(np, st):
369 372 if self.supported_type(np, st):
370 373 yield 'f', np, st
371 374 elif np in dc:
372 375 yield 'm', np, st
373 376
374 377 known = {'.hg': 1}
375 378 def seen(fn):
376 379 if fn in known: return True
377 380 known[fn] = 1
378 381
379 382 # step one, find all files that match our criteria
380 383 files.sort()
381 384 for ff in util.unique(files):
382 385 f = self.wjoin(ff)
383 386 try:
384 387 st = os.lstat(f)
385 388 except OSError, inst:
386 389 nf = util.normpath(ff)
387 390 found = False
388 391 for fn in dc:
389 392 if nf == fn or (fn.startswith(nf) and fn[len(nf)] == '/'):
390 393 found = True
391 394 break
392 395 if not found:
393 396 if inst.errno != errno.ENOENT or not badmatch:
394 397 self.ui.warn('%s: %s\n' % (
395 398 util.pathto(self.getcwd(), ff),
396 399 inst.strerror))
397 400 elif badmatch and badmatch(ff) and statmatch(ff, None):
398 401 yield 'b', ff, None
399 402 continue
400 403 if stat.S_ISDIR(st.st_mode):
401 404 cmp1 = (lambda x, y: cmp(x[1], y[1]))
402 405 sorted_ = [ x for x in findfiles(f) ]
403 406 sorted_.sort(cmp1)
404 407 for e in sorted_:
405 408 yield e
406 409 else:
407 410 ff = util.normpath(ff)
408 411 if seen(ff):
409 412 continue
410 413 self.blockignore = True
411 414 if statmatch(ff, st):
412 415 if self.supported_type(ff, st, verbose=True):
413 416 yield 'f', ff, st
414 417 elif ff in dc:
415 418 yield 'm', ff, st
416 419 self.blockignore = False
417 420
418 421 # step two run through anything left in the dc hash and yield
419 422 # if we haven't already seen it
420 423 ks = dc.keys()
421 424 ks.sort()
422 425 for k in ks:
423 426 if not seen(k) and (statmatch(k, None)):
424 427 yield 'm', k, None
425 428
426 429 def changes(self, files=None, match=util.always, show_ignored=None):
427 430 lookup, modified, added, unknown, ignored = [], [], [], [], []
428 431 removed, deleted = [], []
429 432
430 433 for src, fn, st in self.statwalk(files, match, ignored=show_ignored):
431 434 try:
432 435 type_, mode, size, time = self[fn]
433 436 except KeyError:
434 437 if show_ignored and self.ignore(fn):
435 438 ignored.append(fn)
436 439 else:
437 440 unknown.append(fn)
438 441 continue
439 442 if src == 'm':
440 443 nonexistent = True
441 444 if not st:
442 445 try:
443 446 f = self.wjoin(fn)
444 447 st = os.lstat(f)
445 448 except OSError, inst:
446 449 if inst.errno != errno.ENOENT:
447 450 raise
448 451 st = None
449 452 # We need to re-check that it is a valid file
450 453 if st and self.supported_type(fn, st):
451 454 nonexistent = False
452 455 # XXX: what to do with file no longer present in the fs
453 456 # who are not removed in the dirstate ?
454 457 if nonexistent and type_ in "nm":
455 458 deleted.append(fn)
456 459 continue
457 460 # check the common case first
458 461 if type_ == 'n':
459 462 if not st:
460 463 st = os.stat(fn)
461 464 if size >= 0 and (size != st.st_size
462 465 or (mode ^ st.st_mode) & 0100):
463 466 modified.append(fn)
464 467 elif time != st.st_mtime:
465 468 lookup.append(fn)
466 469 elif type_ == 'm':
467 470 modified.append(fn)
468 471 elif type_ == 'a':
469 472 added.append(fn)
470 473 elif type_ == 'r':
471 474 removed.append(fn)
472 475
473 476 return (lookup, modified, added, removed, deleted, unknown, ignored)
General Comments 0
You need to be logged in to leave comments. Login now