##// END OF EJS Templates
revbranchcache: move entry writing to a separate function...
Durham Goode -
r24375:fe255b25 default
parent child Browse files
Show More
@@ -1,450 +1,454 b''
1 1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from node import bin, hex, nullid, nullrev
9 9 import encoding
10 10 import util
11 11 import time
12 12 from array import array
13 13 from struct import calcsize, pack, unpack
14 14
15 15 def _filename(repo):
16 16 """name of a branchcache file for a given repo or repoview"""
17 17 filename = "cache/branch2"
18 18 if repo.filtername:
19 19 filename = '%s-%s' % (filename, repo.filtername)
20 20 return filename
21 21
22 22 def read(repo):
23 23 try:
24 24 f = repo.vfs(_filename(repo))
25 25 lines = f.read().split('\n')
26 26 f.close()
27 27 except (IOError, OSError):
28 28 return None
29 29
30 30 try:
31 31 cachekey = lines.pop(0).split(" ", 2)
32 32 last, lrev = cachekey[:2]
33 33 last, lrev = bin(last), int(lrev)
34 34 filteredhash = None
35 35 if len(cachekey) > 2:
36 36 filteredhash = bin(cachekey[2])
37 37 partial = branchcache(tipnode=last, tiprev=lrev,
38 38 filteredhash=filteredhash)
39 39 if not partial.validfor(repo):
40 40 # invalidate the cache
41 41 raise ValueError('tip differs')
42 42 for l in lines:
43 43 if not l:
44 44 continue
45 45 node, state, label = l.split(" ", 2)
46 46 if state not in 'oc':
47 47 raise ValueError('invalid branch state')
48 48 label = encoding.tolocal(label.strip())
49 49 if not node in repo:
50 50 raise ValueError('node %s does not exist' % node)
51 51 node = bin(node)
52 52 partial.setdefault(label, []).append(node)
53 53 if state == 'c':
54 54 partial._closednodes.add(node)
55 55 except KeyboardInterrupt:
56 56 raise
57 57 except Exception, inst:
58 58 if repo.ui.debugflag:
59 59 msg = 'invalid branchheads cache'
60 60 if repo.filtername is not None:
61 61 msg += ' (%s)' % repo.filtername
62 62 msg += ': %s\n'
63 63 repo.ui.debug(msg % inst)
64 64 partial = None
65 65 return partial
66 66
67 67 ### Nearest subset relation
68 68 # Nearest subset of filter X is a filter Y so that:
69 69 # * Y is included in X,
70 70 # * X - Y is as small as possible.
71 71 # This create and ordering used for branchmap purpose.
72 72 # the ordering may be partial
73 73 subsettable = {None: 'visible',
74 74 'visible': 'served',
75 75 'served': 'immutable',
76 76 'immutable': 'base'}
77 77
78 78 def updatecache(repo):
79 79 cl = repo.changelog
80 80 filtername = repo.filtername
81 81 partial = repo._branchcaches.get(filtername)
82 82
83 83 revs = []
84 84 if partial is None or not partial.validfor(repo):
85 85 partial = read(repo)
86 86 if partial is None:
87 87 subsetname = subsettable.get(filtername)
88 88 if subsetname is None:
89 89 partial = branchcache()
90 90 else:
91 91 subset = repo.filtered(subsetname)
92 92 partial = subset.branchmap().copy()
93 93 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
94 94 revs.extend(r for r in extrarevs if r <= partial.tiprev)
95 95 revs.extend(cl.revs(start=partial.tiprev + 1))
96 96 if revs:
97 97 partial.update(repo, revs)
98 98 partial.write(repo)
99 99
100 100 if repo._revbranchcache is not None:
101 101 repo._revbranchcache.write()
102 102
103 103 assert partial.validfor(repo), filtername
104 104 repo._branchcaches[repo.filtername] = partial
105 105
106 106 class branchcache(dict):
107 107 """A dict like object that hold branches heads cache.
108 108
109 109 This cache is used to avoid costly computations to determine all the
110 110 branch heads of a repo.
111 111
112 112 The cache is serialized on disk in the following format:
113 113
114 114 <tip hex node> <tip rev number> [optional filtered repo hex hash]
115 115 <branch head hex node> <open/closed state> <branch name>
116 116 <branch head hex node> <open/closed state> <branch name>
117 117 ...
118 118
119 119 The first line is used to check if the cache is still valid. If the
120 120 branch cache is for a filtered repo view, an optional third hash is
121 121 included that hashes the hashes of all filtered revisions.
122 122
123 123 The open/closed state is represented by a single letter 'o' or 'c'.
124 124 This field can be used to avoid changelog reads when determining if a
125 125 branch head closes a branch or not.
126 126 """
127 127
128 128 def __init__(self, entries=(), tipnode=nullid, tiprev=nullrev,
129 129 filteredhash=None, closednodes=None):
130 130 super(branchcache, self).__init__(entries)
131 131 self.tipnode = tipnode
132 132 self.tiprev = tiprev
133 133 self.filteredhash = filteredhash
134 134 # closednodes is a set of nodes that close their branch. If the branch
135 135 # cache has been updated, it may contain nodes that are no longer
136 136 # heads.
137 137 if closednodes is None:
138 138 self._closednodes = set()
139 139 else:
140 140 self._closednodes = closednodes
141 141
142 142 def _hashfiltered(self, repo):
143 143 """build hash of revision filtered in the current cache
144 144
145 145 Tracking tipnode and tiprev is not enough to ensure validity of the
146 146 cache as they do not help to distinct cache that ignored various
147 147 revision bellow tiprev.
148 148
149 149 To detect such difference, we build a cache of all ignored revisions.
150 150 """
151 151 cl = repo.changelog
152 152 if not cl.filteredrevs:
153 153 return None
154 154 key = None
155 155 revs = sorted(r for r in cl.filteredrevs if r <= self.tiprev)
156 156 if revs:
157 157 s = util.sha1()
158 158 for rev in revs:
159 159 s.update('%s;' % rev)
160 160 key = s.digest()
161 161 return key
162 162
163 163 def validfor(self, repo):
164 164 """Is the cache content valid regarding a repo
165 165
166 166 - False when cached tipnode is unknown or if we detect a strip.
167 167 - True when cache is up to date or a subset of current repo."""
168 168 try:
169 169 return ((self.tipnode == repo.changelog.node(self.tiprev))
170 170 and (self.filteredhash == self._hashfiltered(repo)))
171 171 except IndexError:
172 172 return False
173 173
174 174 def _branchtip(self, heads):
175 175 '''Return tuple with last open head in heads and false,
176 176 otherwise return last closed head and true.'''
177 177 tip = heads[-1]
178 178 closed = True
179 179 for h in reversed(heads):
180 180 if h not in self._closednodes:
181 181 tip = h
182 182 closed = False
183 183 break
184 184 return tip, closed
185 185
186 186 def branchtip(self, branch):
187 187 '''Return the tipmost open head on branch head, otherwise return the
188 188 tipmost closed head on branch.
189 189 Raise KeyError for unknown branch.'''
190 190 return self._branchtip(self[branch])[0]
191 191
192 192 def branchheads(self, branch, closed=False):
193 193 heads = self[branch]
194 194 if not closed:
195 195 heads = [h for h in heads if h not in self._closednodes]
196 196 return heads
197 197
198 198 def iterbranches(self):
199 199 for bn, heads in self.iteritems():
200 200 yield (bn, heads) + self._branchtip(heads)
201 201
202 202 def copy(self):
203 203 """return an deep copy of the branchcache object"""
204 204 return branchcache(self, self.tipnode, self.tiprev, self.filteredhash,
205 205 self._closednodes)
206 206
207 207 def write(self, repo):
208 208 try:
209 209 f = repo.vfs(_filename(repo), "w", atomictemp=True)
210 210 cachekey = [hex(self.tipnode), str(self.tiprev)]
211 211 if self.filteredhash is not None:
212 212 cachekey.append(hex(self.filteredhash))
213 213 f.write(" ".join(cachekey) + '\n')
214 214 nodecount = 0
215 215 for label, nodes in sorted(self.iteritems()):
216 216 for node in nodes:
217 217 nodecount += 1
218 218 if node in self._closednodes:
219 219 state = 'c'
220 220 else:
221 221 state = 'o'
222 222 f.write("%s %s %s\n" % (hex(node), state,
223 223 encoding.fromlocal(label)))
224 224 f.close()
225 225 repo.ui.log('branchcache',
226 226 'wrote %s branch cache with %d labels and %d nodes\n',
227 227 repo.filtername, len(self), nodecount)
228 228 except (IOError, OSError, util.Abort), inst:
229 229 repo.ui.debug("couldn't write branch cache: %s\n" % inst)
230 230 # Abort may be raise by read only opener
231 231 pass
232 232
233 233 def update(self, repo, revgen):
234 234 """Given a branchhead cache, self, that may have extra nodes or be
235 235 missing heads, and a generator of nodes that are strictly a superset of
236 236 heads missing, this function updates self to be correct.
237 237 """
238 238 starttime = time.time()
239 239 cl = repo.changelog
240 240 # collect new branch entries
241 241 newbranches = {}
242 242 getbranchinfo = repo.revbranchcache().branchinfo
243 243 for r in revgen:
244 244 branch, closesbranch = getbranchinfo(r)
245 245 newbranches.setdefault(branch, []).append(r)
246 246 if closesbranch:
247 247 self._closednodes.add(cl.node(r))
248 248
249 249 # fetch current topological heads to speed up filtering
250 250 topoheads = set(cl.headrevs())
251 251
252 252 # if older branchheads are reachable from new ones, they aren't
253 253 # really branchheads. Note checking parents is insufficient:
254 254 # 1 (branch a) -> 2 (branch b) -> 3 (branch a)
255 255 for branch, newheadrevs in newbranches.iteritems():
256 256 bheads = self.setdefault(branch, [])
257 257 bheadset = set(cl.rev(node) for node in bheads)
258 258
259 259 # This have been tested True on all internal usage of this function.
260 260 # run it again in case of doubt
261 261 # assert not (set(bheadrevs) & set(newheadrevs))
262 262 newheadrevs.sort()
263 263 bheadset.update(newheadrevs)
264 264
265 265 # This prunes out two kinds of heads - heads that are superseded by
266 266 # a head in newheadrevs, and newheadrevs that are not heads because
267 267 # an existing head is their descendant.
268 268 uncertain = bheadset - topoheads
269 269 if uncertain:
270 270 floorrev = min(uncertain)
271 271 ancestors = set(cl.ancestors(newheadrevs, floorrev))
272 272 bheadset -= ancestors
273 273 bheadrevs = sorted(bheadset)
274 274 self[branch] = [cl.node(rev) for rev in bheadrevs]
275 275 tiprev = bheadrevs[-1]
276 276 if tiprev > self.tiprev:
277 277 self.tipnode = cl.node(tiprev)
278 278 self.tiprev = tiprev
279 279
280 280 if not self.validfor(repo):
281 281 # cache key are not valid anymore
282 282 self.tipnode = nullid
283 283 self.tiprev = nullrev
284 284 for heads in self.values():
285 285 tiprev = max(cl.rev(node) for node in heads)
286 286 if tiprev > self.tiprev:
287 287 self.tipnode = cl.node(tiprev)
288 288 self.tiprev = tiprev
289 289 self.filteredhash = self._hashfiltered(repo)
290 290
291 291 duration = time.time() - starttime
292 292 repo.ui.log('branchcache', 'updated %s branch cache in %.4f seconds\n',
293 293 repo.filtername, duration)
294 294
295 295 # Revision branch info cache
296 296
297 297 _rbcversion = '-v1'
298 298 _rbcnames = 'cache/rbc-names' + _rbcversion
299 299 _rbcrevs = 'cache/rbc-revs' + _rbcversion
300 300 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
301 301 _rbcrecfmt = '>4sI'
302 302 _rbcrecsize = calcsize(_rbcrecfmt)
303 303 _rbcnodelen = 4
304 304 _rbcbranchidxmask = 0x7fffffff
305 305 _rbccloseflag = 0x80000000
306 306
307 307 class revbranchcache(object):
308 308 """Persistent cache, mapping from revision number to branch name and close.
309 309 This is a low level cache, independent of filtering.
310 310
311 311 Branch names are stored in rbc-names in internal encoding separated by 0.
312 312 rbc-names is append-only, and each branch name is only stored once and will
313 313 thus have a unique index.
314 314
315 315 The branch info for each revision is stored in rbc-revs as constant size
316 316 records. The whole file is read into memory, but it is only 'parsed' on
317 317 demand. The file is usually append-only but will be truncated if repo
318 318 modification is detected.
319 319 The record for each revision contains the first 4 bytes of the
320 320 corresponding node hash, and the record is only used if it still matches.
321 321 Even a completely trashed rbc-revs fill thus still give the right result
322 322 while converging towards full recovery ... assuming no incorrectly matching
323 323 node hashes.
324 324 The record also contains 4 bytes where 31 bits contains the index of the
325 325 branch and the last bit indicate that it is a branch close commit.
326 326 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
327 327 and will grow with it but be 1/8th of its size.
328 328 """
329 329
330 330 def __init__(self, repo, readonly=True):
331 331 assert repo.filtername is None
332 332 self._repo = repo
333 333 self._names = [] # branch names in local encoding with static index
334 334 self._rbcrevs = array('c') # structs of type _rbcrecfmt
335 335 self._rbcsnameslen = 0
336 336 try:
337 337 bndata = repo.vfs.read(_rbcnames)
338 338 self._rbcsnameslen = len(bndata) # for verification before writing
339 339 self._names = [encoding.tolocal(bn) for bn in bndata.split('\0')]
340 340 except (IOError, OSError), inst:
341 341 repo.ui.debug("couldn't read revision branch cache names: %s\n" %
342 342 inst)
343 343 if readonly:
344 344 # don't try to use cache - fall back to the slow path
345 345 self.branchinfo = self._branchinfo
346 346
347 347 if self._names:
348 348 try:
349 349 data = repo.vfs.read(_rbcrevs)
350 350 self._rbcrevs.fromstring(data)
351 351 except (IOError, OSError), inst:
352 352 repo.ui.debug("couldn't read revision branch cache: %s\n" %
353 353 inst)
354 354 # remember number of good records on disk
355 355 self._rbcrevslen = min(len(self._rbcrevs) // _rbcrecsize,
356 356 len(repo.changelog))
357 357 if self._rbcrevslen == 0:
358 358 self._names = []
359 359 self._rbcnamescount = len(self._names) # number of good names on disk
360 360 self._namesreverse = dict((b, r) for r, b in enumerate(self._names))
361 361
362 362 def branchinfo(self, rev):
363 363 """Return branch name and close flag for rev, using and updating
364 364 persistent cache."""
365 365 changelog = self._repo.changelog
366 366 rbcrevidx = rev * _rbcrecsize
367 367
368 368 # if requested rev is missing, add and populate all missing revs
369 369 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
370 370 first = len(self._rbcrevs) // _rbcrecsize
371 371 self._rbcrevs.extend('\0' * (len(changelog) * _rbcrecsize -
372 372 len(self._rbcrevs)))
373 373 for r in xrange(first, len(changelog)):
374 374 self._branchinfo(r)
375 375
376 376 # fast path: extract data from cache, use it if node is matching
377 377 reponode = changelog.node(rev)[:_rbcnodelen]
378 378 cachenode, branchidx = unpack(
379 379 _rbcrecfmt, buffer(self._rbcrevs, rbcrevidx, _rbcrecsize))
380 380 close = bool(branchidx & _rbccloseflag)
381 381 if close:
382 382 branchidx &= _rbcbranchidxmask
383 383 if cachenode == reponode:
384 384 return self._names[branchidx], close
385 385 # fall back to slow path and make sure it will be written to disk
386 386 self._rbcrevslen = min(self._rbcrevslen, rev)
387 387 return self._branchinfo(rev)
388 388
389 389 def _branchinfo(self, rev):
390 390 """Retrieve branch info from changelog and update _rbcrevs"""
391 391 changelog = self._repo.changelog
392 392 b, close = changelog.branchinfo(rev)
393 393 if b in self._namesreverse:
394 394 branchidx = self._namesreverse[b]
395 395 else:
396 396 branchidx = len(self._names)
397 397 self._names.append(b)
398 398 self._namesreverse[b] = branchidx
399 399 reponode = changelog.node(rev)
400 400 if close:
401 401 branchidx |= _rbccloseflag
402 self._setcachedata(rev, reponode, branchidx)
403 return b, close
404
405 def _setcachedata(self, rev, node, branchidx):
406 """Writes the node's branch data to the in-memory cache data."""
402 407 rbcrevidx = rev * _rbcrecsize
403 408 rec = array('c')
404 rec.fromstring(pack(_rbcrecfmt, reponode, branchidx))
409 rec.fromstring(pack(_rbcrecfmt, node, branchidx))
405 410 self._rbcrevs[rbcrevidx:rbcrevidx + _rbcrecsize] = rec
406 return b, close
407 411
408 412 def write(self):
409 413 """Save branch cache if it is dirty."""
410 414 repo = self._repo
411 415 if self._rbcnamescount < len(self._names):
412 416 try:
413 417 if self._rbcnamescount != 0:
414 418 f = repo.vfs.open(_rbcnames, 'ab')
415 419 if f.tell() == self._rbcsnameslen:
416 420 f.write('\0')
417 421 else:
418 422 f.close()
419 423 repo.ui.debug("%s changed - rewriting it\n" % _rbcnames)
420 424 self._rbcnamescount = 0
421 425 self._rbcrevslen = 0
422 426 if self._rbcnamescount == 0:
423 427 f = repo.vfs.open(_rbcnames, 'wb')
424 428 f.write('\0'.join(encoding.fromlocal(b)
425 429 for b in self._names[self._rbcnamescount:]))
426 430 self._rbcsnameslen = f.tell()
427 431 f.close()
428 432 except (IOError, OSError, util.Abort), inst:
429 433 repo.ui.debug("couldn't write revision branch cache names: "
430 434 "%s\n" % inst)
431 435 return
432 436 self._rbcnamescount = len(self._names)
433 437
434 438 start = self._rbcrevslen * _rbcrecsize
435 439 if start != len(self._rbcrevs):
436 440 revs = min(len(repo.changelog), len(self._rbcrevs) // _rbcrecsize)
437 441 try:
438 442 f = repo.vfs.open(_rbcrevs, 'ab')
439 443 if f.tell() != start:
440 444 repo.ui.debug("truncating %s to %s\n" % (_rbcrevs, start))
441 445 f.seek(start)
442 446 f.truncate()
443 447 end = revs * _rbcrecsize
444 448 f.write(self._rbcrevs[start:end])
445 449 f.close()
446 450 except (IOError, OSError, util.Abort), inst:
447 451 repo.ui.debug("couldn't write revision branch cache: %s\n" %
448 452 inst)
449 453 return
450 454 self._rbcrevslen = revs
General Comments 0
You need to be logged in to leave comments. Login now