##// END OF EJS Templates
Optimize dirstate walking...
mason@suse.com -
r1183:d9e85a75 default
parent child Browse files
Show More
@@ -1,314 +1,372 b''
1 1 """
2 2 dirstate.py - working directory tracking for mercurial
3 3
4 4 Copyright 2005 Matt Mackall <mpm@selenic.com>
5 5
6 6 This software may be used and distributed according to the terms
7 7 of the GNU General Public License, incorporated herein by reference.
8 8 """
9 9
10 10 import struct, os
11 11 from node import *
12 12 from demandload import *
13 13 demandload(globals(), "time bisect stat util re")
14 14
15 15 class dirstate:
16 16 def __init__(self, opener, ui, root):
17 17 self.opener = opener
18 18 self.root = root
19 19 self.dirty = 0
20 20 self.ui = ui
21 21 self.map = None
22 22 self.pl = None
23 23 self.copies = {}
24 24 self.ignorefunc = None
25 self.blockignore = False
25 26
26 27 def wjoin(self, f):
27 28 return os.path.join(self.root, f)
28 29
29 30 def getcwd(self):
30 31 cwd = os.getcwd()
31 32 if cwd == self.root: return ''
32 33 return cwd[len(self.root) + 1:]
33 34
34 35 def ignore(self, f):
36 if self.blockignore:
37 return False
35 38 if not self.ignorefunc:
36 39 bigpat = []
37 40 try:
38 41 l = file(self.wjoin(".hgignore"))
39 42 for pat in l:
40 43 p = pat.rstrip()
41 44 if p:
42 45 try:
43 46 re.compile(p)
44 47 except:
45 48 self.ui.warn("ignoring invalid ignore"
46 49 + " regular expression '%s'\n" % p)
47 50 else:
48 51 bigpat.append(p)
49 52 except IOError: pass
50 53
51 54 if bigpat:
52 55 s = "(?:%s)" % (")|(?:".join(bigpat))
53 56 r = re.compile(s)
54 57 self.ignorefunc = r.search
55 58 else:
56 59 self.ignorefunc = util.never
57 60
58 61 return self.ignorefunc(f)
59 62
60 63 def __del__(self):
61 64 if self.dirty:
62 65 self.write()
63 66
64 67 def __getitem__(self, key):
65 68 try:
66 69 return self.map[key]
67 70 except TypeError:
68 71 self.read()
69 72 return self[key]
70 73
71 74 def __contains__(self, key):
72 75 if not self.map: self.read()
73 76 return key in self.map
74 77
75 78 def parents(self):
76 79 if not self.pl:
77 80 self.read()
78 81 return self.pl
79 82
80 83 def markdirty(self):
81 84 if not self.dirty:
82 85 self.dirty = 1
83 86
84 87 def setparents(self, p1, p2=nullid):
85 88 self.markdirty()
86 89 self.pl = p1, p2
87 90
88 91 def state(self, key):
89 92 try:
90 93 return self[key][0]
91 94 except KeyError:
92 95 return "?"
93 96
94 97 def read(self):
95 98 if self.map is not None: return self.map
96 99
97 100 self.map = {}
98 101 self.pl = [nullid, nullid]
99 102 try:
100 103 st = self.opener("dirstate").read()
101 104 if not st: return
102 105 except: return
103 106
104 107 self.pl = [st[:20], st[20: 40]]
105 108
106 109 pos = 40
107 110 while pos < len(st):
108 111 e = struct.unpack(">cllll", st[pos:pos+17])
109 112 l = e[4]
110 113 pos += 17
111 114 f = st[pos:pos + l]
112 115 if '\0' in f:
113 116 f, c = f.split('\0')
114 117 self.copies[f] = c
115 118 self.map[f] = e[:4]
116 119 pos += l
117 120
118 121 def copy(self, source, dest):
119 122 self.read()
120 123 self.markdirty()
121 124 self.copies[dest] = source
122 125
123 126 def copied(self, file):
124 127 return self.copies.get(file, None)
125 128
126 129 def update(self, files, state, **kw):
127 130 ''' current states:
128 131 n normal
129 132 m needs merging
130 133 r marked for removal
131 134 a marked for addition'''
132 135
133 136 if not files: return
134 137 self.read()
135 138 self.markdirty()
136 139 for f in files:
137 140 if state == "r":
138 141 self.map[f] = ('r', 0, 0, 0)
139 142 else:
140 143 s = os.stat(os.path.join(self.root, f))
141 144 st_size = kw.get('st_size', s.st_size)
142 145 st_mtime = kw.get('st_mtime', s.st_mtime)
143 146 self.map[f] = (state, s.st_mode, st_size, st_mtime)
144 147 if self.copies.has_key(f):
145 148 del self.copies[f]
146 149
147 150 def forget(self, files):
148 151 if not files: return
149 152 self.read()
150 153 self.markdirty()
151 154 for f in files:
152 155 try:
153 156 del self.map[f]
154 157 except KeyError:
155 158 self.ui.warn("not in dirstate: %s!\n" % f)
156 159 pass
157 160
158 161 def clear(self):
159 162 self.map = {}
160 163 self.markdirty()
161 164
162 165 def write(self):
163 166 st = self.opener("dirstate", "w")
164 167 st.write("".join(self.pl))
165 168 for f, e in self.map.items():
166 169 c = self.copied(f)
167 170 if c:
168 171 f = f + "\0" + c
169 172 e = struct.pack(">cllll", e[0], e[1], e[2], e[3], len(f))
170 173 st.write(e + f)
171 174 self.dirty = 0
172 175
173 176 def filterfiles(self, files):
174 177 ret = {}
175 178 unknown = []
176 179
177 180 for x in files:
178 181 if x is '.':
179 182 return self.map.copy()
180 183 if x not in self.map:
181 184 unknown.append(x)
182 185 else:
183 186 ret[x] = self.map[x]
184 187
185 188 if not unknown:
186 189 return ret
187 190
188 191 b = self.map.keys()
189 192 b.sort()
190 193 blen = len(b)
191 194
192 195 for x in unknown:
193 196 bs = bisect.bisect(b, x)
194 197 if bs != 0 and b[bs-1] == x:
195 198 ret[x] = self.map[x]
196 199 continue
197 200 while bs < blen:
198 201 s = b[bs]
199 202 if len(s) > len(x) and s.startswith(x) and s[len(x)] == '/':
200 203 ret[s] = self.map[s]
201 204 else:
202 205 break
203 206 bs += 1
204 207 return ret
205 208
206 209 def walk(self, files=None, match=util.always, dc=None):
207 210 self.read()
208 211
209 212 # walk all files by default
210 213 if not files:
211 214 files = [self.root]
212 215 if not dc:
213 216 dc = self.map.copy()
214 217 elif not dc:
215 218 dc = self.filterfiles(files)
216 219
220 def statmatch(file, stat):
221 if file not in dc and self.ignore(file):
222 return False
223 return match(file)
224 return self.walkhelper(files=files, statmatch=statmatch, dc=dc)
225
226 # walk recursively through the directory tree, finding all files
227 # matched by the statmatch function
228 #
229 # results are yielded in a tuple (src, filename), where src is one of:
230 # 'f' the file was found in the directory tree
231 # 'm' the file was only in the dirstate and not in the tree
232 #
233 # dc is an optional arg for the current dirstate. dc is not modified
234 # directly by this function, but might be modified by your statmatch call.
235 #
236 def walkhelper(self, files, statmatch, dc):
237 # recursion free walker, faster than os.walk.
238 def findfiles(s):
239 retfiles = []
240 work = [s]
241 while work:
242 top = work.pop()
243 names = os.listdir(top)
244 names.sort()
245 # nd is the top of the repository dir tree
246 nd = util.normpath(top[len(self.root) + 1:])
247 if nd == '.': nd = ''
248 for f in names:
249 np = os.path.join(nd, f)
250 if seen(np):
251 continue
252 p = os.path.join(top, f)
253 st = os.stat(p)
254 if stat.S_ISDIR(st.st_mode):
255 ds = os.path.join(nd, f +'/')
256 if statmatch(ds, st):
257 work.append(p)
258 else:
259 if statmatch(np, st):
260 yield np
261
217 262 known = {'.hg': 1}
218 263 def seen(fn):
219 264 if fn in known: return True
220 265 known[fn] = 1
221 def traverse():
222 for ff in util.unique(files):
223 f = os.path.join(self.root, ff)
224 try:
225 st = os.stat(f)
226 except OSError, inst:
227 if ff not in dc: self.ui.warn('%s: %s\n' % (
228 util.pathto(self.getcwd(), ff),
229 inst.strerror))
266
267 # step one, find all files that match our criteria
268 files.sort()
269 for ff in util.unique(files):
270 f = os.path.join(self.root, ff)
271 try:
272 st = os.stat(f)
273 except OSError, inst:
274 if ff not in dc: self.ui.warn('%s: %s\n' % (
275 util.pathto(self.getcwd(), ff),
276 inst.strerror))
277 continue
278 if stat.S_ISDIR(st.st_mode):
279 sorted = [ x for x in findfiles(f) ]
280 sorted.sort()
281 for fl in sorted:
282 yield 'f', fl
283 elif stat.S_ISREG(st.st_mode):
284 ff = util.normpath(ff)
285 if seen(ff):
230 286 continue
231 if stat.S_ISDIR(st.st_mode):
232 for dir, subdirs, fl in os.walk(f):
233 d = dir[len(self.root) + 1:]
234 nd = util.normpath(d)
235 if nd == '.': nd = ''
236 if seen(nd):
237 subdirs[:] = []
238 continue
239 for sd in subdirs:
240 ds = os.path.join(nd, sd +'/')
241 if self.ignore(ds) or not match(ds):
242 subdirs.remove(sd)
243 subdirs.sort()
244 fl.sort()
245 for fn in fl:
246 fn = util.pconvert(os.path.join(d, fn))
247 yield 'f', fn
248 elif stat.S_ISREG(st.st_mode):
287 found = False
288 self.blockignore = True
289 if statmatch(ff, st):
290 found = True
291 self.blockignore = False
292 if found:
249 293 yield 'f', ff
250 else:
251 kind = 'unknown'
252 if stat.S_ISCHR(st.st_mode): kind = 'character device'
253 elif stat.S_ISBLK(st.st_mode): kind = 'block device'
254 elif stat.S_ISFIFO(st.st_mode): kind = 'fifo'
255 elif stat.S_ISLNK(st.st_mode): kind = 'symbolic link'
256 elif stat.S_ISSOCK(st.st_mode): kind = 'socket'
257 self.ui.warn('%s: unsupported file type (type is %s)\n' % (
258 util.pathto(self.getcwd(), ff),
259 kind))
294 else:
295 kind = 'unknown'
296 if stat.S_ISCHR(st.st_mode): kind = 'character device'
297 elif stat.S_ISBLK(st.st_mode): kind = 'block device'
298 elif stat.S_ISFIFO(st.st_mode): kind = 'fifo'
299 elif stat.S_ISLNK(st.st_mode): kind = 'symbolic link'
300 elif stat.S_ISSOCK(st.st_mode): kind = 'socket'
301 self.ui.warn('%s: unsupported file type (type is %s)\n' % (
302 util.pathto(self.getcwd(), ff),
303 kind))
260 304
261 ks = dc.keys()
262 ks.sort()
263 for k in ks:
305 # step two run through anything left in the dc hash and yield
306 # if we haven't already seen it
307 ks = dc.keys()
308 ks.sort()
309 for k in ks:
310 if not seen(k) and (statmatch(k, None)):
264 311 yield 'm', k
265 312
266 # yield only files that match: all in dirstate, others only if
267 # not in .hgignore
268
269 for src, fn in util.unique(traverse()):
270 fn = util.normpath(fn)
271 if seen(fn): continue
272 if fn not in dc and self.ignore(fn):
273 continue
274 if match(fn):
275 yield src, fn
276
277 313 def changes(self, files=None, match=util.always):
278 314 self.read()
279 315 if not files:
316 files = [self.root]
280 317 dc = self.map.copy()
281 318 else:
282 319 dc = self.filterfiles(files)
283 320 lookup, modified, added, unknown = [], [], [], []
284 321 removed, deleted = [], []
285 322
286 for src, fn in self.walk(files, match, dc=dc):
287 try:
288 s = os.stat(os.path.join(self.root, fn))
289 except OSError:
290 continue
323 # statmatch function to eliminate entries from the dirstate copy
324 # and put files into the appropriate array. This gets passed
325 # to the walking code
326 def statmatch(fn, s):
327 def checkappend(l, fn):
328 if match is util.always or match(fn):
329 l.append(fn)
330
331 if not s or stat.S_ISDIR(s.st_mode):
332 return self.ignore(fn) and False or match(fn)
333
291 334 if not stat.S_ISREG(s.st_mode):
292 continue
293 c = dc.get(fn)
335 return False
336 c = dc.pop(fn, None)
294 337 if c:
295 del dc[fn]
296 if c[0] == 'm':
297 modified.append(fn)
298 elif c[0] == 'a':
299 added.append(fn)
300 elif c[0] == 'r':
338 type, mode, size, time = c
339 # check the common case first
340 if type == 'n':
341 if size != s.st_size or (mode ^ s.st_mode) & 0100:
342 checkappend(modified, fn)
343 elif time != s.st_mtime:
344 checkappend(lookup, fn)
345 elif type == 'm':
346 checkappend(modified, fn)
347 elif type == 'a':
348 checkappend(added, fn)
349 elif type == 'r':
350 checkappend(unknown, fn)
351 else:
352 if not self.ignore(fn) and match(fn):
301 353 unknown.append(fn)
302 elif c[2] != s.st_size or (c[1] ^ s.st_mode) & 0100:
303 modified.append(fn)
304 elif c[3] != s.st_mtime:
305 lookup.append(fn)
306 else:
307 unknown.append(fn)
354 # return false because we've already handled all cases above.
355 # there's no need for the walking code to process the file
356 # any further.
357 return False
308 358
359 # because our statmatch always returns false, self.walk will only
360 # return files in the dirstate map that are not present in the FS.
361 # But, we still need to iterate through the results to force the
362 # walk to complete
363 for src, fn in self.walkhelper(files, statmatch, dc):
364 pass
365
366 # anything left in dc didn't exist in the filesystem
309 367 for fn, c in [(fn, c) for fn, c in dc.items() if match(fn)]:
310 368 if c[0] == 'r':
311 369 removed.append(fn)
312 370 else:
313 371 deleted.append(fn)
314 372 return (lookup, modified, added, removed + deleted, unknown)
General Comments 0
You need to be logged in to leave comments. Login now