##// END OF EJS Templates
revbranchcache: populate cache incrementally...
Durham Goode -
r24376:203a078d default
parent child Browse files
Show More
@@ -1,454 +1,459
1 1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from node import bin, hex, nullid, nullrev
9 9 import encoding
10 10 import util
11 11 import time
12 12 from array import array
13 13 from struct import calcsize, pack, unpack
14 14
15 15 def _filename(repo):
16 16 """name of a branchcache file for a given repo or repoview"""
17 17 filename = "cache/branch2"
18 18 if repo.filtername:
19 19 filename = '%s-%s' % (filename, repo.filtername)
20 20 return filename
21 21
22 22 def read(repo):
23 23 try:
24 24 f = repo.vfs(_filename(repo))
25 25 lines = f.read().split('\n')
26 26 f.close()
27 27 except (IOError, OSError):
28 28 return None
29 29
30 30 try:
31 31 cachekey = lines.pop(0).split(" ", 2)
32 32 last, lrev = cachekey[:2]
33 33 last, lrev = bin(last), int(lrev)
34 34 filteredhash = None
35 35 if len(cachekey) > 2:
36 36 filteredhash = bin(cachekey[2])
37 37 partial = branchcache(tipnode=last, tiprev=lrev,
38 38 filteredhash=filteredhash)
39 39 if not partial.validfor(repo):
40 40 # invalidate the cache
41 41 raise ValueError('tip differs')
42 42 for l in lines:
43 43 if not l:
44 44 continue
45 45 node, state, label = l.split(" ", 2)
46 46 if state not in 'oc':
47 47 raise ValueError('invalid branch state')
48 48 label = encoding.tolocal(label.strip())
49 49 if not node in repo:
50 50 raise ValueError('node %s does not exist' % node)
51 51 node = bin(node)
52 52 partial.setdefault(label, []).append(node)
53 53 if state == 'c':
54 54 partial._closednodes.add(node)
55 55 except KeyboardInterrupt:
56 56 raise
57 57 except Exception, inst:
58 58 if repo.ui.debugflag:
59 59 msg = 'invalid branchheads cache'
60 60 if repo.filtername is not None:
61 61 msg += ' (%s)' % repo.filtername
62 62 msg += ': %s\n'
63 63 repo.ui.debug(msg % inst)
64 64 partial = None
65 65 return partial
66 66
67 67 ### Nearest subset relation
68 68 # Nearest subset of filter X is a filter Y so that:
69 69 # * Y is included in X,
70 70 # * X - Y is as small as possible.
71 71 # This create and ordering used for branchmap purpose.
72 72 # the ordering may be partial
73 73 subsettable = {None: 'visible',
74 74 'visible': 'served',
75 75 'served': 'immutable',
76 76 'immutable': 'base'}
77 77
78 78 def updatecache(repo):
79 79 cl = repo.changelog
80 80 filtername = repo.filtername
81 81 partial = repo._branchcaches.get(filtername)
82 82
83 83 revs = []
84 84 if partial is None or not partial.validfor(repo):
85 85 partial = read(repo)
86 86 if partial is None:
87 87 subsetname = subsettable.get(filtername)
88 88 if subsetname is None:
89 89 partial = branchcache()
90 90 else:
91 91 subset = repo.filtered(subsetname)
92 92 partial = subset.branchmap().copy()
93 93 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
94 94 revs.extend(r for r in extrarevs if r <= partial.tiprev)
95 95 revs.extend(cl.revs(start=partial.tiprev + 1))
96 96 if revs:
97 97 partial.update(repo, revs)
98 98 partial.write(repo)
99 99
100 100 if repo._revbranchcache is not None:
101 101 repo._revbranchcache.write()
102 102
103 103 assert partial.validfor(repo), filtername
104 104 repo._branchcaches[repo.filtername] = partial
105 105
106 106 class branchcache(dict):
107 107 """A dict like object that hold branches heads cache.
108 108
109 109 This cache is used to avoid costly computations to determine all the
110 110 branch heads of a repo.
111 111
112 112 The cache is serialized on disk in the following format:
113 113
114 114 <tip hex node> <tip rev number> [optional filtered repo hex hash]
115 115 <branch head hex node> <open/closed state> <branch name>
116 116 <branch head hex node> <open/closed state> <branch name>
117 117 ...
118 118
119 119 The first line is used to check if the cache is still valid. If the
120 120 branch cache is for a filtered repo view, an optional third hash is
121 121 included that hashes the hashes of all filtered revisions.
122 122
123 123 The open/closed state is represented by a single letter 'o' or 'c'.
124 124 This field can be used to avoid changelog reads when determining if a
125 125 branch head closes a branch or not.
126 126 """
127 127
128 128 def __init__(self, entries=(), tipnode=nullid, tiprev=nullrev,
129 129 filteredhash=None, closednodes=None):
130 130 super(branchcache, self).__init__(entries)
131 131 self.tipnode = tipnode
132 132 self.tiprev = tiprev
133 133 self.filteredhash = filteredhash
134 134 # closednodes is a set of nodes that close their branch. If the branch
135 135 # cache has been updated, it may contain nodes that are no longer
136 136 # heads.
137 137 if closednodes is None:
138 138 self._closednodes = set()
139 139 else:
140 140 self._closednodes = closednodes
141 141
142 142 def _hashfiltered(self, repo):
143 143 """build hash of revision filtered in the current cache
144 144
145 145 Tracking tipnode and tiprev is not enough to ensure validity of the
146 146 cache as they do not help to distinct cache that ignored various
147 147 revision bellow tiprev.
148 148
149 149 To detect such difference, we build a cache of all ignored revisions.
150 150 """
151 151 cl = repo.changelog
152 152 if not cl.filteredrevs:
153 153 return None
154 154 key = None
155 155 revs = sorted(r for r in cl.filteredrevs if r <= self.tiprev)
156 156 if revs:
157 157 s = util.sha1()
158 158 for rev in revs:
159 159 s.update('%s;' % rev)
160 160 key = s.digest()
161 161 return key
162 162
163 163 def validfor(self, repo):
164 164 """Is the cache content valid regarding a repo
165 165
166 166 - False when cached tipnode is unknown or if we detect a strip.
167 167 - True when cache is up to date or a subset of current repo."""
168 168 try:
169 169 return ((self.tipnode == repo.changelog.node(self.tiprev))
170 170 and (self.filteredhash == self._hashfiltered(repo)))
171 171 except IndexError:
172 172 return False
173 173
174 174 def _branchtip(self, heads):
175 175 '''Return tuple with last open head in heads and false,
176 176 otherwise return last closed head and true.'''
177 177 tip = heads[-1]
178 178 closed = True
179 179 for h in reversed(heads):
180 180 if h not in self._closednodes:
181 181 tip = h
182 182 closed = False
183 183 break
184 184 return tip, closed
185 185
186 186 def branchtip(self, branch):
187 187 '''Return the tipmost open head on branch head, otherwise return the
188 188 tipmost closed head on branch.
189 189 Raise KeyError for unknown branch.'''
190 190 return self._branchtip(self[branch])[0]
191 191
192 192 def branchheads(self, branch, closed=False):
193 193 heads = self[branch]
194 194 if not closed:
195 195 heads = [h for h in heads if h not in self._closednodes]
196 196 return heads
197 197
198 198 def iterbranches(self):
199 199 for bn, heads in self.iteritems():
200 200 yield (bn, heads) + self._branchtip(heads)
201 201
202 202 def copy(self):
203 203 """return an deep copy of the branchcache object"""
204 204 return branchcache(self, self.tipnode, self.tiprev, self.filteredhash,
205 205 self._closednodes)
206 206
207 207 def write(self, repo):
208 208 try:
209 209 f = repo.vfs(_filename(repo), "w", atomictemp=True)
210 210 cachekey = [hex(self.tipnode), str(self.tiprev)]
211 211 if self.filteredhash is not None:
212 212 cachekey.append(hex(self.filteredhash))
213 213 f.write(" ".join(cachekey) + '\n')
214 214 nodecount = 0
215 215 for label, nodes in sorted(self.iteritems()):
216 216 for node in nodes:
217 217 nodecount += 1
218 218 if node in self._closednodes:
219 219 state = 'c'
220 220 else:
221 221 state = 'o'
222 222 f.write("%s %s %s\n" % (hex(node), state,
223 223 encoding.fromlocal(label)))
224 224 f.close()
225 225 repo.ui.log('branchcache',
226 226 'wrote %s branch cache with %d labels and %d nodes\n',
227 227 repo.filtername, len(self), nodecount)
228 228 except (IOError, OSError, util.Abort), inst:
229 229 repo.ui.debug("couldn't write branch cache: %s\n" % inst)
230 230 # Abort may be raise by read only opener
231 231 pass
232 232
233 233 def update(self, repo, revgen):
234 234 """Given a branchhead cache, self, that may have extra nodes or be
235 235 missing heads, and a generator of nodes that are strictly a superset of
236 236 heads missing, this function updates self to be correct.
237 237 """
238 238 starttime = time.time()
239 239 cl = repo.changelog
240 240 # collect new branch entries
241 241 newbranches = {}
242 242 getbranchinfo = repo.revbranchcache().branchinfo
243 243 for r in revgen:
244 244 branch, closesbranch = getbranchinfo(r)
245 245 newbranches.setdefault(branch, []).append(r)
246 246 if closesbranch:
247 247 self._closednodes.add(cl.node(r))
248 248
249 249 # fetch current topological heads to speed up filtering
250 250 topoheads = set(cl.headrevs())
251 251
252 252 # if older branchheads are reachable from new ones, they aren't
253 253 # really branchheads. Note checking parents is insufficient:
254 254 # 1 (branch a) -> 2 (branch b) -> 3 (branch a)
255 255 for branch, newheadrevs in newbranches.iteritems():
256 256 bheads = self.setdefault(branch, [])
257 257 bheadset = set(cl.rev(node) for node in bheads)
258 258
259 259 # This have been tested True on all internal usage of this function.
260 260 # run it again in case of doubt
261 261 # assert not (set(bheadrevs) & set(newheadrevs))
262 262 newheadrevs.sort()
263 263 bheadset.update(newheadrevs)
264 264
265 265 # This prunes out two kinds of heads - heads that are superseded by
266 266 # a head in newheadrevs, and newheadrevs that are not heads because
267 267 # an existing head is their descendant.
268 268 uncertain = bheadset - topoheads
269 269 if uncertain:
270 270 floorrev = min(uncertain)
271 271 ancestors = set(cl.ancestors(newheadrevs, floorrev))
272 272 bheadset -= ancestors
273 273 bheadrevs = sorted(bheadset)
274 274 self[branch] = [cl.node(rev) for rev in bheadrevs]
275 275 tiprev = bheadrevs[-1]
276 276 if tiprev > self.tiprev:
277 277 self.tipnode = cl.node(tiprev)
278 278 self.tiprev = tiprev
279 279
280 280 if not self.validfor(repo):
281 281 # cache key are not valid anymore
282 282 self.tipnode = nullid
283 283 self.tiprev = nullrev
284 284 for heads in self.values():
285 285 tiprev = max(cl.rev(node) for node in heads)
286 286 if tiprev > self.tiprev:
287 287 self.tipnode = cl.node(tiprev)
288 288 self.tiprev = tiprev
289 289 self.filteredhash = self._hashfiltered(repo)
290 290
291 291 duration = time.time() - starttime
292 292 repo.ui.log('branchcache', 'updated %s branch cache in %.4f seconds\n',
293 293 repo.filtername, duration)
294 294
295 295 # Revision branch info cache
296 296
297 297 _rbcversion = '-v1'
298 298 _rbcnames = 'cache/rbc-names' + _rbcversion
299 299 _rbcrevs = 'cache/rbc-revs' + _rbcversion
300 300 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
301 301 _rbcrecfmt = '>4sI'
302 302 _rbcrecsize = calcsize(_rbcrecfmt)
303 303 _rbcnodelen = 4
304 304 _rbcbranchidxmask = 0x7fffffff
305 305 _rbccloseflag = 0x80000000
306 306
307 307 class revbranchcache(object):
308 308 """Persistent cache, mapping from revision number to branch name and close.
309 309 This is a low level cache, independent of filtering.
310 310
311 311 Branch names are stored in rbc-names in internal encoding separated by 0.
312 312 rbc-names is append-only, and each branch name is only stored once and will
313 313 thus have a unique index.
314 314
315 315 The branch info for each revision is stored in rbc-revs as constant size
316 316 records. The whole file is read into memory, but it is only 'parsed' on
317 317 demand. The file is usually append-only but will be truncated if repo
318 318 modification is detected.
319 319 The record for each revision contains the first 4 bytes of the
320 320 corresponding node hash, and the record is only used if it still matches.
321 321 Even a completely trashed rbc-revs fill thus still give the right result
322 322 while converging towards full recovery ... assuming no incorrectly matching
323 323 node hashes.
324 324 The record also contains 4 bytes where 31 bits contains the index of the
325 325 branch and the last bit indicate that it is a branch close commit.
326 326 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
327 327 and will grow with it but be 1/8th of its size.
328 328 """
329 329
330 330 def __init__(self, repo, readonly=True):
331 331 assert repo.filtername is None
332 332 self._repo = repo
333 333 self._names = [] # branch names in local encoding with static index
334 334 self._rbcrevs = array('c') # structs of type _rbcrecfmt
335 335 self._rbcsnameslen = 0
336 336 try:
337 337 bndata = repo.vfs.read(_rbcnames)
338 338 self._rbcsnameslen = len(bndata) # for verification before writing
339 339 self._names = [encoding.tolocal(bn) for bn in bndata.split('\0')]
340 340 except (IOError, OSError), inst:
341 341 repo.ui.debug("couldn't read revision branch cache names: %s\n" %
342 342 inst)
343 343 if readonly:
344 344 # don't try to use cache - fall back to the slow path
345 345 self.branchinfo = self._branchinfo
346 346
347 347 if self._names:
348 348 try:
349 349 data = repo.vfs.read(_rbcrevs)
350 350 self._rbcrevs.fromstring(data)
351 351 except (IOError, OSError), inst:
352 352 repo.ui.debug("couldn't read revision branch cache: %s\n" %
353 353 inst)
354 354 # remember number of good records on disk
355 355 self._rbcrevslen = min(len(self._rbcrevs) // _rbcrecsize,
356 356 len(repo.changelog))
357 357 if self._rbcrevslen == 0:
358 358 self._names = []
359 359 self._rbcnamescount = len(self._names) # number of good names on disk
360 360 self._namesreverse = dict((b, r) for r, b in enumerate(self._names))
361 361
362 362 def branchinfo(self, rev):
363 363 """Return branch name and close flag for rev, using and updating
364 364 persistent cache."""
365 365 changelog = self._repo.changelog
366 366 rbcrevidx = rev * _rbcrecsize
367 367
368 368 # if requested rev is missing, add and populate all missing revs
369 369 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
370 first = len(self._rbcrevs) // _rbcrecsize
371 370 self._rbcrevs.extend('\0' * (len(changelog) * _rbcrecsize -
372 371 len(self._rbcrevs)))
373 for r in xrange(first, len(changelog)):
374 self._branchinfo(r)
375 372
376 373 # fast path: extract data from cache, use it if node is matching
377 374 reponode = changelog.node(rev)[:_rbcnodelen]
378 375 cachenode, branchidx = unpack(
379 376 _rbcrecfmt, buffer(self._rbcrevs, rbcrevidx, _rbcrecsize))
380 377 close = bool(branchidx & _rbccloseflag)
381 378 if close:
382 379 branchidx &= _rbcbranchidxmask
383 if cachenode == reponode:
380 if cachenode == '\0\0\0\0':
381 pass
382 elif cachenode == reponode:
384 383 return self._names[branchidx], close
384 else:
385 # rev/node map has changed, invalidate the cache from here up
386 truncate = rbcrevidx + _rbcrecsize
387 del self._rbcrevs[truncate:]
388 self._rbcrevslen = min(self._rbcrevslen, truncate)
389
385 390 # fall back to slow path and make sure it will be written to disk
386 self._rbcrevslen = min(self._rbcrevslen, rev)
387 391 return self._branchinfo(rev)
388 392
389 393 def _branchinfo(self, rev):
390 394 """Retrieve branch info from changelog and update _rbcrevs"""
391 395 changelog = self._repo.changelog
392 396 b, close = changelog.branchinfo(rev)
393 397 if b in self._namesreverse:
394 398 branchidx = self._namesreverse[b]
395 399 else:
396 400 branchidx = len(self._names)
397 401 self._names.append(b)
398 402 self._namesreverse[b] = branchidx
399 403 reponode = changelog.node(rev)
400 404 if close:
401 405 branchidx |= _rbccloseflag
402 406 self._setcachedata(rev, reponode, branchidx)
403 407 return b, close
404 408
405 409 def _setcachedata(self, rev, node, branchidx):
406 410 """Writes the node's branch data to the in-memory cache data."""
407 411 rbcrevidx = rev * _rbcrecsize
408 412 rec = array('c')
409 413 rec.fromstring(pack(_rbcrecfmt, node, branchidx))
410 414 self._rbcrevs[rbcrevidx:rbcrevidx + _rbcrecsize] = rec
415 self._rbcrevslen = min(self._rbcrevslen, rev)
411 416
412 417 def write(self):
413 418 """Save branch cache if it is dirty."""
414 419 repo = self._repo
415 420 if self._rbcnamescount < len(self._names):
416 421 try:
417 422 if self._rbcnamescount != 0:
418 423 f = repo.vfs.open(_rbcnames, 'ab')
419 424 if f.tell() == self._rbcsnameslen:
420 425 f.write('\0')
421 426 else:
422 427 f.close()
423 428 repo.ui.debug("%s changed - rewriting it\n" % _rbcnames)
424 429 self._rbcnamescount = 0
425 430 self._rbcrevslen = 0
426 431 if self._rbcnamescount == 0:
427 432 f = repo.vfs.open(_rbcnames, 'wb')
428 433 f.write('\0'.join(encoding.fromlocal(b)
429 434 for b in self._names[self._rbcnamescount:]))
430 435 self._rbcsnameslen = f.tell()
431 436 f.close()
432 437 except (IOError, OSError, util.Abort), inst:
433 438 repo.ui.debug("couldn't write revision branch cache names: "
434 439 "%s\n" % inst)
435 440 return
436 441 self._rbcnamescount = len(self._names)
437 442
438 443 start = self._rbcrevslen * _rbcrecsize
439 444 if start != len(self._rbcrevs):
440 445 revs = min(len(repo.changelog), len(self._rbcrevs) // _rbcrecsize)
441 446 try:
442 447 f = repo.vfs.open(_rbcrevs, 'ab')
443 448 if f.tell() != start:
444 449 repo.ui.debug("truncating %s to %s\n" % (_rbcrevs, start))
445 450 f.seek(start)
446 451 f.truncate()
447 452 end = revs * _rbcrecsize
448 453 f.write(self._rbcrevs[start:end])
449 454 f.close()
450 455 except (IOError, OSError, util.Abort), inst:
451 456 repo.ui.debug("couldn't write revision branch cache: %s\n" %
452 457 inst)
453 458 return
454 459 self._rbcrevslen = revs
General Comments 0
You need to be logged in to leave comments. Login now