##// END OF EJS Templates
Fix performance of dirstate.changes with ignored directories.
Bryan O'Sullivan -
r1268:c631f263 default
parent child Browse files
Show More
@@ -1,376 +1,377 b''
1 1 """
2 2 dirstate.py - working directory tracking for mercurial
3 3
4 4 Copyright 2005 Matt Mackall <mpm@selenic.com>
5 5
6 6 This software may be used and distributed according to the terms
7 7 of the GNU General Public License, incorporated herein by reference.
8 8 """
9 9
10 10 import struct, os
11 11 from node import *
12 12 from demandload import *
13 13 demandload(globals(), "time bisect stat util re")
14 14
15 15 class dirstate:
16 16 def __init__(self, opener, ui, root):
17 17 self.opener = opener
18 18 self.root = root
19 19 self.dirty = 0
20 20 self.ui = ui
21 21 self.map = None
22 22 self.pl = None
23 23 self.copies = {}
24 24 self.ignorefunc = None
25 25 self.blockignore = False
26 26
27 27 def wjoin(self, f):
28 28 return os.path.join(self.root, f)
29 29
30 30 def getcwd(self):
31 31 cwd = os.getcwd()
32 32 if cwd == self.root: return ''
33 33 return cwd[len(self.root) + 1:]
34 34
35 35 def ignore(self, f):
36 36 if self.blockignore:
37 37 return False
38 38 if not self.ignorefunc:
39 39 bigpat = []
40 40 try:
41 41 l = file(self.wjoin(".hgignore"))
42 42 for pat in l:
43 43 p = pat.rstrip()
44 44 if p:
45 45 try:
46 46 re.compile(p)
47 47 except:
48 48 self.ui.warn("ignoring invalid ignore"
49 49 + " regular expression '%s'\n" % p)
50 50 else:
51 51 bigpat.append(p)
52 52 except IOError: pass
53 53
54 54 if bigpat:
55 55 s = "(?:%s)" % (")|(?:".join(bigpat))
56 56 r = re.compile(s)
57 57 self.ignorefunc = r.search
58 58 else:
59 59 self.ignorefunc = util.never
60 60
61 61 return self.ignorefunc(f)
62 62
63 63 def __del__(self):
64 64 if self.dirty:
65 65 self.write()
66 66
67 67 def __getitem__(self, key):
68 68 try:
69 69 return self.map[key]
70 70 except TypeError:
71 71 self.read()
72 72 return self[key]
73 73
74 74 def __contains__(self, key):
75 75 if not self.map: self.read()
76 76 return key in self.map
77 77
78 78 def parents(self):
79 79 if not self.pl:
80 80 self.read()
81 81 return self.pl
82 82
83 83 def markdirty(self):
84 84 if not self.dirty:
85 85 self.dirty = 1
86 86
87 87 def setparents(self, p1, p2=nullid):
88 88 self.markdirty()
89 89 self.pl = p1, p2
90 90
91 91 def state(self, key):
92 92 try:
93 93 return self[key][0]
94 94 except KeyError:
95 95 return "?"
96 96
97 97 def read(self):
98 98 if self.map is not None: return self.map
99 99
100 100 self.map = {}
101 101 self.pl = [nullid, nullid]
102 102 try:
103 103 st = self.opener("dirstate").read()
104 104 if not st: return
105 105 except: return
106 106
107 107 self.pl = [st[:20], st[20: 40]]
108 108
109 109 pos = 40
110 110 while pos < len(st):
111 111 e = struct.unpack(">cllll", st[pos:pos+17])
112 112 l = e[4]
113 113 pos += 17
114 114 f = st[pos:pos + l]
115 115 if '\0' in f:
116 116 f, c = f.split('\0')
117 117 self.copies[f] = c
118 118 self.map[f] = e[:4]
119 119 pos += l
120 120
121 121 def copy(self, source, dest):
122 122 self.read()
123 123 self.markdirty()
124 124 self.copies[dest] = source
125 125
126 126 def copied(self, file):
127 127 return self.copies.get(file, None)
128 128
129 129 def update(self, files, state, **kw):
130 130 ''' current states:
131 131 n normal
132 132 m needs merging
133 133 r marked for removal
134 134 a marked for addition'''
135 135
136 136 if not files: return
137 137 self.read()
138 138 self.markdirty()
139 139 for f in files:
140 140 if state == "r":
141 141 self.map[f] = ('r', 0, 0, 0)
142 142 else:
143 143 s = os.lstat(os.path.join(self.root, f))
144 144 st_size = kw.get('st_size', s.st_size)
145 145 st_mtime = kw.get('st_mtime', s.st_mtime)
146 146 self.map[f] = (state, s.st_mode, st_size, st_mtime)
147 147 if self.copies.has_key(f):
148 148 del self.copies[f]
149 149
150 150 def forget(self, files):
151 151 if not files: return
152 152 self.read()
153 153 self.markdirty()
154 154 for f in files:
155 155 try:
156 156 del self.map[f]
157 157 except KeyError:
158 158 self.ui.warn("not in dirstate: %s!\n" % f)
159 159 pass
160 160
161 161 def clear(self):
162 162 self.map = {}
163 163 self.markdirty()
164 164
165 165 def write(self):
166 166 st = self.opener("dirstate", "w")
167 167 st.write("".join(self.pl))
168 168 for f, e in self.map.items():
169 169 c = self.copied(f)
170 170 if c:
171 171 f = f + "\0" + c
172 172 e = struct.pack(">cllll", e[0], e[1], e[2], e[3], len(f))
173 173 st.write(e + f)
174 174 self.dirty = 0
175 175
176 176 def filterfiles(self, files):
177 177 ret = {}
178 178 unknown = []
179 179
180 180 for x in files:
181 181 if x is '.':
182 182 return self.map.copy()
183 183 if x not in self.map:
184 184 unknown.append(x)
185 185 else:
186 186 ret[x] = self.map[x]
187 187
188 188 if not unknown:
189 189 return ret
190 190
191 191 b = self.map.keys()
192 192 b.sort()
193 193 blen = len(b)
194 194
195 195 for x in unknown:
196 196 bs = bisect.bisect(b, x)
197 197 if bs != 0 and b[bs-1] == x:
198 198 ret[x] = self.map[x]
199 199 continue
200 200 while bs < blen:
201 201 s = b[bs]
202 202 if len(s) > len(x) and s.startswith(x) and s[len(x)] == '/':
203 203 ret[s] = self.map[s]
204 204 else:
205 205 break
206 206 bs += 1
207 207 return ret
208 208
209 209 def walk(self, files=None, match=util.always, dc=None):
210 210 self.read()
211 211
212 212 # walk all files by default
213 213 if not files:
214 214 files = [self.root]
215 215 if not dc:
216 216 dc = self.map.copy()
217 217 elif not dc:
218 218 dc = self.filterfiles(files)
219 219
220 220 def statmatch(file, stat):
221 221 file = util.pconvert(file)
222 222 if file not in dc and self.ignore(file):
223 223 return False
224 224 return match(file)
225 225
226 226 return self.walkhelper(files=files, statmatch=statmatch, dc=dc)
227 227
228 228 # walk recursively through the directory tree, finding all files
229 229 # matched by the statmatch function
230 230 #
231 231 # results are yielded in a tuple (src, filename), where src is one of:
232 232 # 'f' the file was found in the directory tree
233 233 # 'm' the file was only in the dirstate and not in the tree
234 234 #
235 235 # dc is an optional arg for the current dirstate. dc is not modified
236 236 # directly by this function, but might be modified by your statmatch call.
237 237 #
238 238 def walkhelper(self, files, statmatch, dc):
239 239 # recursion free walker, faster than os.walk.
240 240 def findfiles(s):
241 241 retfiles = []
242 242 work = [s]
243 243 while work:
244 244 top = work.pop()
245 245 names = os.listdir(top)
246 246 names.sort()
247 247 # nd is the top of the repository dir tree
248 248 nd = util.normpath(top[len(self.root) + 1:])
249 249 if nd == '.': nd = ''
250 250 for f in names:
251 251 np = os.path.join(nd, f)
252 252 if seen(np):
253 253 continue
254 254 p = os.path.join(top, f)
255 255 # don't trip over symlinks
256 256 st = os.lstat(p)
257 257 if stat.S_ISDIR(st.st_mode):
258 258 ds = os.path.join(nd, f +'/')
259 259 if statmatch(ds, st):
260 260 work.append(p)
261 261 else:
262 262 if statmatch(np, st):
263 263 yield util.pconvert(np)
264 264
265 265 known = {'.hg': 1}
266 266 def seen(fn):
267 267 if fn in known: return True
268 268 known[fn] = 1
269 269
270 270 # step one, find all files that match our criteria
271 271 files.sort()
272 272 for ff in util.unique(files):
273 273 f = os.path.join(self.root, ff)
274 274 try:
275 275 st = os.lstat(f)
276 276 except OSError, inst:
277 277 if ff not in dc: self.ui.warn('%s: %s\n' % (
278 278 util.pathto(self.getcwd(), ff),
279 279 inst.strerror))
280 280 continue
281 281 if stat.S_ISDIR(st.st_mode):
282 282 sorted = [ x for x in findfiles(f) ]
283 283 sorted.sort()
284 284 for fl in sorted:
285 285 yield 'f', fl
286 286 elif stat.S_ISREG(st.st_mode):
287 287 ff = util.normpath(ff)
288 288 if seen(ff):
289 289 continue
290 290 found = False
291 291 self.blockignore = True
292 292 if statmatch(ff, st):
293 293 found = True
294 294 self.blockignore = False
295 295 if found:
296 296 yield 'f', ff
297 297 else:
298 298 kind = 'unknown'
299 299 if stat.S_ISCHR(st.st_mode): kind = 'character device'
300 300 elif stat.S_ISBLK(st.st_mode): kind = 'block device'
301 301 elif stat.S_ISFIFO(st.st_mode): kind = 'fifo'
302 302 elif stat.S_ISLNK(st.st_mode): kind = 'symbolic link'
303 303 elif stat.S_ISSOCK(st.st_mode): kind = 'socket'
304 304 self.ui.warn('%s: unsupported file type (type is %s)\n' % (
305 305 util.pathto(self.getcwd(), ff),
306 306 kind))
307 307
308 308 # step two run through anything left in the dc hash and yield
309 309 # if we haven't already seen it
310 310 ks = dc.keys()
311 311 ks.sort()
312 312 for k in ks:
313 313 if not seen(k) and (statmatch(k, None)):
314 314 yield 'm', k
315 315
316 316 def changes(self, files=None, match=util.always):
317 317 self.read()
318 318 if not files:
319 319 files = [self.root]
320 320 dc = self.map.copy()
321 321 else:
322 322 dc = self.filterfiles(files)
323 323 lookup, modified, added, unknown = [], [], [], []
324 324 removed, deleted = [], []
325 325
326 326 # statmatch function to eliminate entries from the dirstate copy
327 327 # and put files into the appropriate array. This gets passed
328 328 # to the walking code
329 329 def statmatch(fn, s):
330 330 fn = util.pconvert(fn)
331 331 def checkappend(l, fn):
332 332 if match is util.always or match(fn):
333 333 l.append(fn)
334 334
335 335 if not s or stat.S_ISDIR(s.st_mode):
336 return self.ignore(fn) and False or match(fn)
336 if self.ignore(fn): return False
337 return match(fn)
337 338
338 339 if not stat.S_ISREG(s.st_mode):
339 340 return False
340 341 c = dc.pop(fn, None)
341 342 if c:
342 343 type, mode, size, time = c
343 344 # check the common case first
344 345 if type == 'n':
345 346 if size != s.st_size or (mode ^ s.st_mode) & 0100:
346 347 checkappend(modified, fn)
347 348 elif time != s.st_mtime:
348 349 checkappend(lookup, fn)
349 350 elif type == 'm':
350 351 checkappend(modified, fn)
351 352 elif type == 'a':
352 353 checkappend(added, fn)
353 354 elif type == 'r':
354 355 checkappend(unknown, fn)
355 356 else:
356 357 if not self.ignore(fn) and match(fn):
357 358 unknown.append(fn)
358 359 # return false because we've already handled all cases above.
359 360 # there's no need for the walking code to process the file
360 361 # any further.
361 362 return False
362 363
363 364 # because our statmatch always returns false, self.walk will only
364 365 # return files in the dirstate map that are not present in the FS.
365 366 # But, we still need to iterate through the results to force the
366 367 # walk to complete
367 368 for src, fn in self.walkhelper(files, statmatch, dc):
368 369 pass
369 370
370 371 # anything left in dc didn't exist in the filesystem
371 372 for fn, c in [(fn, c) for fn, c in dc.items() if match(fn)]:
372 373 if c[0] == 'r':
373 374 removed.append(fn)
374 375 else:
375 376 deleted.append(fn)
376 377 return (lookup, modified, added, removed + deleted, unknown)
General Comments 0
You need to be logged in to leave comments. Login now