##// END OF EJS Templates
Optimize dirstate walking...
mason@suse.com -
r1183:d9e85a75 default
parent child Browse files
Show More
@@ -1,314 +1,372 b''
1 """
1 """
2 dirstate.py - working directory tracking for mercurial
2 dirstate.py - working directory tracking for mercurial
3
3
4 Copyright 2005 Matt Mackall <mpm@selenic.com>
4 Copyright 2005 Matt Mackall <mpm@selenic.com>
5
5
6 This software may be used and distributed according to the terms
6 This software may be used and distributed according to the terms
7 of the GNU General Public License, incorporated herein by reference.
7 of the GNU General Public License, incorporated herein by reference.
8 """
8 """
9
9
10 import struct, os
10 import struct, os
11 from node import *
11 from node import *
12 from demandload import *
12 from demandload import *
13 demandload(globals(), "time bisect stat util re")
13 demandload(globals(), "time bisect stat util re")
14
14
15 class dirstate:
15 class dirstate:
16 def __init__(self, opener, ui, root):
16 def __init__(self, opener, ui, root):
17 self.opener = opener
17 self.opener = opener
18 self.root = root
18 self.root = root
19 self.dirty = 0
19 self.dirty = 0
20 self.ui = ui
20 self.ui = ui
21 self.map = None
21 self.map = None
22 self.pl = None
22 self.pl = None
23 self.copies = {}
23 self.copies = {}
24 self.ignorefunc = None
24 self.ignorefunc = None
25 self.blockignore = False
25
26
26 def wjoin(self, f):
27 def wjoin(self, f):
27 return os.path.join(self.root, f)
28 return os.path.join(self.root, f)
28
29
29 def getcwd(self):
30 def getcwd(self):
30 cwd = os.getcwd()
31 cwd = os.getcwd()
31 if cwd == self.root: return ''
32 if cwd == self.root: return ''
32 return cwd[len(self.root) + 1:]
33 return cwd[len(self.root) + 1:]
33
34
34 def ignore(self, f):
35 def ignore(self, f):
36 if self.blockignore:
37 return False
35 if not self.ignorefunc:
38 if not self.ignorefunc:
36 bigpat = []
39 bigpat = []
37 try:
40 try:
38 l = file(self.wjoin(".hgignore"))
41 l = file(self.wjoin(".hgignore"))
39 for pat in l:
42 for pat in l:
40 p = pat.rstrip()
43 p = pat.rstrip()
41 if p:
44 if p:
42 try:
45 try:
43 re.compile(p)
46 re.compile(p)
44 except:
47 except:
45 self.ui.warn("ignoring invalid ignore"
48 self.ui.warn("ignoring invalid ignore"
46 + " regular expression '%s'\n" % p)
49 + " regular expression '%s'\n" % p)
47 else:
50 else:
48 bigpat.append(p)
51 bigpat.append(p)
49 except IOError: pass
52 except IOError: pass
50
53
51 if bigpat:
54 if bigpat:
52 s = "(?:%s)" % (")|(?:".join(bigpat))
55 s = "(?:%s)" % (")|(?:".join(bigpat))
53 r = re.compile(s)
56 r = re.compile(s)
54 self.ignorefunc = r.search
57 self.ignorefunc = r.search
55 else:
58 else:
56 self.ignorefunc = util.never
59 self.ignorefunc = util.never
57
60
58 return self.ignorefunc(f)
61 return self.ignorefunc(f)
59
62
60 def __del__(self):
63 def __del__(self):
61 if self.dirty:
64 if self.dirty:
62 self.write()
65 self.write()
63
66
64 def __getitem__(self, key):
67 def __getitem__(self, key):
65 try:
68 try:
66 return self.map[key]
69 return self.map[key]
67 except TypeError:
70 except TypeError:
68 self.read()
71 self.read()
69 return self[key]
72 return self[key]
70
73
71 def __contains__(self, key):
74 def __contains__(self, key):
72 if not self.map: self.read()
75 if not self.map: self.read()
73 return key in self.map
76 return key in self.map
74
77
75 def parents(self):
78 def parents(self):
76 if not self.pl:
79 if not self.pl:
77 self.read()
80 self.read()
78 return self.pl
81 return self.pl
79
82
80 def markdirty(self):
83 def markdirty(self):
81 if not self.dirty:
84 if not self.dirty:
82 self.dirty = 1
85 self.dirty = 1
83
86
84 def setparents(self, p1, p2=nullid):
87 def setparents(self, p1, p2=nullid):
85 self.markdirty()
88 self.markdirty()
86 self.pl = p1, p2
89 self.pl = p1, p2
87
90
88 def state(self, key):
91 def state(self, key):
89 try:
92 try:
90 return self[key][0]
93 return self[key][0]
91 except KeyError:
94 except KeyError:
92 return "?"
95 return "?"
93
96
94 def read(self):
97 def read(self):
95 if self.map is not None: return self.map
98 if self.map is not None: return self.map
96
99
97 self.map = {}
100 self.map = {}
98 self.pl = [nullid, nullid]
101 self.pl = [nullid, nullid]
99 try:
102 try:
100 st = self.opener("dirstate").read()
103 st = self.opener("dirstate").read()
101 if not st: return
104 if not st: return
102 except: return
105 except: return
103
106
104 self.pl = [st[:20], st[20: 40]]
107 self.pl = [st[:20], st[20: 40]]
105
108
106 pos = 40
109 pos = 40
107 while pos < len(st):
110 while pos < len(st):
108 e = struct.unpack(">cllll", st[pos:pos+17])
111 e = struct.unpack(">cllll", st[pos:pos+17])
109 l = e[4]
112 l = e[4]
110 pos += 17
113 pos += 17
111 f = st[pos:pos + l]
114 f = st[pos:pos + l]
112 if '\0' in f:
115 if '\0' in f:
113 f, c = f.split('\0')
116 f, c = f.split('\0')
114 self.copies[f] = c
117 self.copies[f] = c
115 self.map[f] = e[:4]
118 self.map[f] = e[:4]
116 pos += l
119 pos += l
117
120
118 def copy(self, source, dest):
121 def copy(self, source, dest):
119 self.read()
122 self.read()
120 self.markdirty()
123 self.markdirty()
121 self.copies[dest] = source
124 self.copies[dest] = source
122
125
123 def copied(self, file):
126 def copied(self, file):
124 return self.copies.get(file, None)
127 return self.copies.get(file, None)
125
128
126 def update(self, files, state, **kw):
129 def update(self, files, state, **kw):
127 ''' current states:
130 ''' current states:
128 n normal
131 n normal
129 m needs merging
132 m needs merging
130 r marked for removal
133 r marked for removal
131 a marked for addition'''
134 a marked for addition'''
132
135
133 if not files: return
136 if not files: return
134 self.read()
137 self.read()
135 self.markdirty()
138 self.markdirty()
136 for f in files:
139 for f in files:
137 if state == "r":
140 if state == "r":
138 self.map[f] = ('r', 0, 0, 0)
141 self.map[f] = ('r', 0, 0, 0)
139 else:
142 else:
140 s = os.stat(os.path.join(self.root, f))
143 s = os.stat(os.path.join(self.root, f))
141 st_size = kw.get('st_size', s.st_size)
144 st_size = kw.get('st_size', s.st_size)
142 st_mtime = kw.get('st_mtime', s.st_mtime)
145 st_mtime = kw.get('st_mtime', s.st_mtime)
143 self.map[f] = (state, s.st_mode, st_size, st_mtime)
146 self.map[f] = (state, s.st_mode, st_size, st_mtime)
144 if self.copies.has_key(f):
147 if self.copies.has_key(f):
145 del self.copies[f]
148 del self.copies[f]
146
149
147 def forget(self, files):
150 def forget(self, files):
148 if not files: return
151 if not files: return
149 self.read()
152 self.read()
150 self.markdirty()
153 self.markdirty()
151 for f in files:
154 for f in files:
152 try:
155 try:
153 del self.map[f]
156 del self.map[f]
154 except KeyError:
157 except KeyError:
155 self.ui.warn("not in dirstate: %s!\n" % f)
158 self.ui.warn("not in dirstate: %s!\n" % f)
156 pass
159 pass
157
160
158 def clear(self):
161 def clear(self):
159 self.map = {}
162 self.map = {}
160 self.markdirty()
163 self.markdirty()
161
164
162 def write(self):
165 def write(self):
163 st = self.opener("dirstate", "w")
166 st = self.opener("dirstate", "w")
164 st.write("".join(self.pl))
167 st.write("".join(self.pl))
165 for f, e in self.map.items():
168 for f, e in self.map.items():
166 c = self.copied(f)
169 c = self.copied(f)
167 if c:
170 if c:
168 f = f + "\0" + c
171 f = f + "\0" + c
169 e = struct.pack(">cllll", e[0], e[1], e[2], e[3], len(f))
172 e = struct.pack(">cllll", e[0], e[1], e[2], e[3], len(f))
170 st.write(e + f)
173 st.write(e + f)
171 self.dirty = 0
174 self.dirty = 0
172
175
173 def filterfiles(self, files):
176 def filterfiles(self, files):
174 ret = {}
177 ret = {}
175 unknown = []
178 unknown = []
176
179
177 for x in files:
180 for x in files:
178 if x is '.':
181 if x is '.':
179 return self.map.copy()
182 return self.map.copy()
180 if x not in self.map:
183 if x not in self.map:
181 unknown.append(x)
184 unknown.append(x)
182 else:
185 else:
183 ret[x] = self.map[x]
186 ret[x] = self.map[x]
184
187
185 if not unknown:
188 if not unknown:
186 return ret
189 return ret
187
190
188 b = self.map.keys()
191 b = self.map.keys()
189 b.sort()
192 b.sort()
190 blen = len(b)
193 blen = len(b)
191
194
192 for x in unknown:
195 for x in unknown:
193 bs = bisect.bisect(b, x)
196 bs = bisect.bisect(b, x)
194 if bs != 0 and b[bs-1] == x:
197 if bs != 0 and b[bs-1] == x:
195 ret[x] = self.map[x]
198 ret[x] = self.map[x]
196 continue
199 continue
197 while bs < blen:
200 while bs < blen:
198 s = b[bs]
201 s = b[bs]
199 if len(s) > len(x) and s.startswith(x) and s[len(x)] == '/':
202 if len(s) > len(x) and s.startswith(x) and s[len(x)] == '/':
200 ret[s] = self.map[s]
203 ret[s] = self.map[s]
201 else:
204 else:
202 break
205 break
203 bs += 1
206 bs += 1
204 return ret
207 return ret
205
208
206 def walk(self, files=None, match=util.always, dc=None):
209 def walk(self, files=None, match=util.always, dc=None):
207 self.read()
210 self.read()
208
211
209 # walk all files by default
212 # walk all files by default
210 if not files:
213 if not files:
211 files = [self.root]
214 files = [self.root]
212 if not dc:
215 if not dc:
213 dc = self.map.copy()
216 dc = self.map.copy()
214 elif not dc:
217 elif not dc:
215 dc = self.filterfiles(files)
218 dc = self.filterfiles(files)
216
219
220 def statmatch(file, stat):
221 if file not in dc and self.ignore(file):
222 return False
223 return match(file)
224 return self.walkhelper(files=files, statmatch=statmatch, dc=dc)
225
226 # walk recursively through the directory tree, finding all files
227 # matched by the statmatch function
228 #
229 # results are yielded in a tuple (src, filename), where src is one of:
230 # 'f' the file was found in the directory tree
231 # 'm' the file was only in the dirstate and not in the tree
232 #
233 # dc is an optional arg for the current dirstate. dc is not modified
234 # directly by this function, but might be modified by your statmatch call.
235 #
236 def walkhelper(self, files, statmatch, dc):
237 # recursion free walker, faster than os.walk.
238 def findfiles(s):
239 retfiles = []
240 work = [s]
241 while work:
242 top = work.pop()
243 names = os.listdir(top)
244 names.sort()
245 # nd is the top of the repository dir tree
246 nd = util.normpath(top[len(self.root) + 1:])
247 if nd == '.': nd = ''
248 for f in names:
249 np = os.path.join(nd, f)
250 if seen(np):
251 continue
252 p = os.path.join(top, f)
253 st = os.stat(p)
254 if stat.S_ISDIR(st.st_mode):
255 ds = os.path.join(nd, f +'/')
256 if statmatch(ds, st):
257 work.append(p)
258 else:
259 if statmatch(np, st):
260 yield np
261
217 known = {'.hg': 1}
262 known = {'.hg': 1}
218 def seen(fn):
263 def seen(fn):
219 if fn in known: return True
264 if fn in known: return True
220 known[fn] = 1
265 known[fn] = 1
221 def traverse():
266
222 for ff in util.unique(files):
267 # step one, find all files that match our criteria
223 f = os.path.join(self.root, ff)
268 files.sort()
224 try:
269 for ff in util.unique(files):
225 st = os.stat(f)
270 f = os.path.join(self.root, ff)
226 except OSError, inst:
271 try:
227 if ff not in dc: self.ui.warn('%s: %s\n' % (
272 st = os.stat(f)
228 util.pathto(self.getcwd(), ff),
273 except OSError, inst:
229 inst.strerror))
274 if ff not in dc: self.ui.warn('%s: %s\n' % (
275 util.pathto(self.getcwd(), ff),
276 inst.strerror))
277 continue
278 if stat.S_ISDIR(st.st_mode):
279 sorted = [ x for x in findfiles(f) ]
280 sorted.sort()
281 for fl in sorted:
282 yield 'f', fl
283 elif stat.S_ISREG(st.st_mode):
284 ff = util.normpath(ff)
285 if seen(ff):
230 continue
286 continue
231 if stat.S_ISDIR(st.st_mode):
287 found = False
232 for dir, subdirs, fl in os.walk(f):
288 self.blockignore = True
233 d = dir[len(self.root) + 1:]
289 if statmatch(ff, st):
234 nd = util.normpath(d)
290 found = True
235 if nd == '.': nd = ''
291 self.blockignore = False
236 if seen(nd):
292 if found:
237 subdirs[:] = []
238 continue
239 for sd in subdirs:
240 ds = os.path.join(nd, sd +'/')
241 if self.ignore(ds) or not match(ds):
242 subdirs.remove(sd)
243 subdirs.sort()
244 fl.sort()
245 for fn in fl:
246 fn = util.pconvert(os.path.join(d, fn))
247 yield 'f', fn
248 elif stat.S_ISREG(st.st_mode):
249 yield 'f', ff
293 yield 'f', ff
250 else:
294 else:
251 kind = 'unknown'
295 kind = 'unknown'
252 if stat.S_ISCHR(st.st_mode): kind = 'character device'
296 if stat.S_ISCHR(st.st_mode): kind = 'character device'
253 elif stat.S_ISBLK(st.st_mode): kind = 'block device'
297 elif stat.S_ISBLK(st.st_mode): kind = 'block device'
254 elif stat.S_ISFIFO(st.st_mode): kind = 'fifo'
298 elif stat.S_ISFIFO(st.st_mode): kind = 'fifo'
255 elif stat.S_ISLNK(st.st_mode): kind = 'symbolic link'
299 elif stat.S_ISLNK(st.st_mode): kind = 'symbolic link'
256 elif stat.S_ISSOCK(st.st_mode): kind = 'socket'
300 elif stat.S_ISSOCK(st.st_mode): kind = 'socket'
257 self.ui.warn('%s: unsupported file type (type is %s)\n' % (
301 self.ui.warn('%s: unsupported file type (type is %s)\n' % (
258 util.pathto(self.getcwd(), ff),
302 util.pathto(self.getcwd(), ff),
259 kind))
303 kind))
260
304
261 ks = dc.keys()
305 # step two run through anything left in the dc hash and yield
262 ks.sort()
306 # if we haven't already seen it
263 for k in ks:
307 ks = dc.keys()
308 ks.sort()
309 for k in ks:
310 if not seen(k) and (statmatch(k, None)):
264 yield 'm', k
311 yield 'm', k
265
312
266 # yield only files that match: all in dirstate, others only if
267 # not in .hgignore
268
269 for src, fn in util.unique(traverse()):
270 fn = util.normpath(fn)
271 if seen(fn): continue
272 if fn not in dc and self.ignore(fn):
273 continue
274 if match(fn):
275 yield src, fn
276
277 def changes(self, files=None, match=util.always):
313 def changes(self, files=None, match=util.always):
278 self.read()
314 self.read()
279 if not files:
315 if not files:
316 files = [self.root]
280 dc = self.map.copy()
317 dc = self.map.copy()
281 else:
318 else:
282 dc = self.filterfiles(files)
319 dc = self.filterfiles(files)
283 lookup, modified, added, unknown = [], [], [], []
320 lookup, modified, added, unknown = [], [], [], []
284 removed, deleted = [], []
321 removed, deleted = [], []
285
322
286 for src, fn in self.walk(files, match, dc=dc):
323 # statmatch function to eliminate entries from the dirstate copy
287 try:
324 # and put files into the appropriate array. This gets passed
288 s = os.stat(os.path.join(self.root, fn))
325 # to the walking code
289 except OSError:
326 def statmatch(fn, s):
290 continue
327 def checkappend(l, fn):
328 if match is util.always or match(fn):
329 l.append(fn)
330
331 if not s or stat.S_ISDIR(s.st_mode):
332 return self.ignore(fn) and False or match(fn)
333
291 if not stat.S_ISREG(s.st_mode):
334 if not stat.S_ISREG(s.st_mode):
292 continue
335 return False
293 c = dc.get(fn)
336 c = dc.pop(fn, None)
294 if c:
337 if c:
295 del dc[fn]
338 type, mode, size, time = c
296 if c[0] == 'm':
339 # check the common case first
297 modified.append(fn)
340 if type == 'n':
298 elif c[0] == 'a':
341 if size != s.st_size or (mode ^ s.st_mode) & 0100:
299 added.append(fn)
342 checkappend(modified, fn)
300 elif c[0] == 'r':
343 elif time != s.st_mtime:
344 checkappend(lookup, fn)
345 elif type == 'm':
346 checkappend(modified, fn)
347 elif type == 'a':
348 checkappend(added, fn)
349 elif type == 'r':
350 checkappend(unknown, fn)
351 else:
352 if not self.ignore(fn) and match(fn):
301 unknown.append(fn)
353 unknown.append(fn)
302 elif c[2] != s.st_size or (c[1] ^ s.st_mode) & 0100:
354 # return false because we've already handled all cases above.
303 modified.append(fn)
355 # there's no need for the walking code to process the file
304 elif c[3] != s.st_mtime:
356 # any further.
305 lookup.append(fn)
357 return False
306 else:
307 unknown.append(fn)
308
358
359 # because our statmatch always returns false, self.walk will only
360 # return files in the dirstate map that are not present in the FS.
361 # But, we still need to iterate through the results to force the
362 # walk to complete
363 for src, fn in self.walkhelper(files, statmatch, dc):
364 pass
365
366 # anything left in dc didn't exist in the filesystem
309 for fn, c in [(fn, c) for fn, c in dc.items() if match(fn)]:
367 for fn, c in [(fn, c) for fn, c in dc.items() if match(fn)]:
310 if c[0] == 'r':
368 if c[0] == 'r':
311 removed.append(fn)
369 removed.append(fn)
312 else:
370 else:
313 deleted.append(fn)
371 deleted.append(fn)
314 return (lookup, modified, added, removed + deleted, unknown)
372 return (lookup, modified, added, removed + deleted, unknown)
General Comments 0
You need to be logged in to leave comments. Login now