##// END OF EJS Templates
branchmap: close cache file properly...
Yuya Nishihara -
r39173:dd161490 default
parent child Browse files
Show More
@@ -1,548 +1,554 b''
1 1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import struct
11 11
12 12 from .node import (
13 13 bin,
14 14 hex,
15 15 nullid,
16 16 nullrev,
17 17 )
18 18 from . import (
19 19 encoding,
20 20 error,
21 21 pycompat,
22 22 scmutil,
23 23 util,
24 24 )
25 25 from .utils import (
26 26 stringutil,
27 27 )
28 28
29 29 calcsize = struct.calcsize
30 30 pack_into = struct.pack_into
31 31 unpack_from = struct.unpack_from
32 32
33 33 def _filename(repo):
34 34 """name of a branchcache file for a given repo or repoview"""
35 35 filename = "branch2"
36 36 if repo.filtername:
37 37 filename = '%s-%s' % (filename, repo.filtername)
38 38 return filename
39 39
40 40 def read(repo):
41 f = None
41 42 try:
42 43 f = repo.cachevfs(_filename(repo))
43 44 cachekey = next(f).rstrip('\n').split(" ", 2)
44 45 last, lrev = cachekey[:2]
45 46 last, lrev = bin(last), int(lrev)
46 47 filteredhash = None
47 48 if len(cachekey) > 2:
48 49 filteredhash = bin(cachekey[2])
49 50 partial = branchcache(tipnode=last, tiprev=lrev,
50 51 filteredhash=filteredhash)
51 52 if not partial.validfor(repo):
52 53 # invalidate the cache
53 54 raise ValueError(r'tip differs')
54 55 cl = repo.changelog
55 56 for l in f:
56 57 l = l.rstrip('\n')
57 58 if not l:
58 59 continue
59 60 node, state, label = l.split(" ", 2)
60 61 if state not in 'oc':
61 62 raise ValueError(r'invalid branch state')
62 63 label = encoding.tolocal(label.strip())
63 64 node = bin(node)
64 65 if not cl.hasnode(node):
65 66 raise ValueError(
66 67 r'node %s does not exist' % pycompat.sysstr(hex(node)))
67 68 partial.setdefault(label, []).append(node)
68 69 if state == 'c':
69 70 partial._closednodes.add(node)
70 71
71 72 except (IOError, OSError):
72 73 return None
73 74
74 75 except Exception as inst:
75 76 if repo.ui.debugflag:
76 77 msg = 'invalid branchheads cache'
77 78 if repo.filtername is not None:
78 79 msg += ' (%s)' % repo.filtername
79 80 msg += ': %s\n'
80 81 repo.ui.debug(msg % pycompat.bytestr(inst))
81 82 partial = None
83
84 finally:
85 if f:
86 f.close()
87
82 88 return partial
83 89
84 90 ### Nearest subset relation
85 91 # Nearest subset of filter X is a filter Y so that:
86 92 # * Y is included in X,
87 93 # * X - Y is as small as possible.
88 94 # This create and ordering used for branchmap purpose.
89 95 # the ordering may be partial
90 96 subsettable = {None: 'visible',
91 97 'visible-hidden': 'visible',
92 98 'visible': 'served',
93 99 'served': 'immutable',
94 100 'immutable': 'base'}
95 101
96 102 def updatecache(repo):
97 103 cl = repo.changelog
98 104 filtername = repo.filtername
99 105 partial = repo._branchcaches.get(filtername)
100 106
101 107 revs = []
102 108 if partial is None or not partial.validfor(repo):
103 109 partial = read(repo)
104 110 if partial is None:
105 111 subsetname = subsettable.get(filtername)
106 112 if subsetname is None:
107 113 partial = branchcache()
108 114 else:
109 115 subset = repo.filtered(subsetname)
110 116 partial = subset.branchmap().copy()
111 117 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
112 118 revs.extend(r for r in extrarevs if r <= partial.tiprev)
113 119 revs.extend(cl.revs(start=partial.tiprev + 1))
114 120 if revs:
115 121 partial.update(repo, revs)
116 122 partial.write(repo)
117 123
118 124 assert partial.validfor(repo), filtername
119 125 repo._branchcaches[repo.filtername] = partial
120 126
121 127 def replacecache(repo, bm):
122 128 """Replace the branchmap cache for a repo with a branch mapping.
123 129
124 130 This is likely only called during clone with a branch map from a remote.
125 131 """
126 132 rbheads = []
127 133 closed = []
128 134 for bheads in bm.itervalues():
129 135 rbheads.extend(bheads)
130 136 for h in bheads:
131 137 r = repo.changelog.rev(h)
132 138 b, c = repo.changelog.branchinfo(r)
133 139 if c:
134 140 closed.append(h)
135 141
136 142 if rbheads:
137 143 rtiprev = max((int(repo.changelog.rev(node))
138 144 for node in rbheads))
139 145 cache = branchcache(bm,
140 146 repo[rtiprev].node(),
141 147 rtiprev,
142 148 closednodes=closed)
143 149
144 150 # Try to stick it as low as possible
145 151 # filter above served are unlikely to be fetch from a clone
146 152 for candidate in ('base', 'immutable', 'served'):
147 153 rview = repo.filtered(candidate)
148 154 if cache.validfor(rview):
149 155 repo._branchcaches[candidate] = cache
150 156 cache.write(rview)
151 157 break
152 158
153 159 class branchcache(dict):
154 160 """A dict like object that hold branches heads cache.
155 161
156 162 This cache is used to avoid costly computations to determine all the
157 163 branch heads of a repo.
158 164
159 165 The cache is serialized on disk in the following format:
160 166
161 167 <tip hex node> <tip rev number> [optional filtered repo hex hash]
162 168 <branch head hex node> <open/closed state> <branch name>
163 169 <branch head hex node> <open/closed state> <branch name>
164 170 ...
165 171
166 172 The first line is used to check if the cache is still valid. If the
167 173 branch cache is for a filtered repo view, an optional third hash is
168 174 included that hashes the hashes of all filtered revisions.
169 175
170 176 The open/closed state is represented by a single letter 'o' or 'c'.
171 177 This field can be used to avoid changelog reads when determining if a
172 178 branch head closes a branch or not.
173 179 """
174 180
175 181 def __init__(self, entries=(), tipnode=nullid, tiprev=nullrev,
176 182 filteredhash=None, closednodes=None):
177 183 super(branchcache, self).__init__(entries)
178 184 self.tipnode = tipnode
179 185 self.tiprev = tiprev
180 186 self.filteredhash = filteredhash
181 187 # closednodes is a set of nodes that close their branch. If the branch
182 188 # cache has been updated, it may contain nodes that are no longer
183 189 # heads.
184 190 if closednodes is None:
185 191 self._closednodes = set()
186 192 else:
187 193 self._closednodes = closednodes
188 194
189 195 def validfor(self, repo):
190 196 """Is the cache content valid regarding a repo
191 197
192 198 - False when cached tipnode is unknown or if we detect a strip.
193 199 - True when cache is up to date or a subset of current repo."""
194 200 try:
195 201 return ((self.tipnode == repo.changelog.node(self.tiprev))
196 202 and (self.filteredhash == \
197 203 scmutil.filteredhash(repo, self.tiprev)))
198 204 except IndexError:
199 205 return False
200 206
201 207 def _branchtip(self, heads):
202 208 '''Return tuple with last open head in heads and false,
203 209 otherwise return last closed head and true.'''
204 210 tip = heads[-1]
205 211 closed = True
206 212 for h in reversed(heads):
207 213 if h not in self._closednodes:
208 214 tip = h
209 215 closed = False
210 216 break
211 217 return tip, closed
212 218
213 219 def branchtip(self, branch):
214 220 '''Return the tipmost open head on branch head, otherwise return the
215 221 tipmost closed head on branch.
216 222 Raise KeyError for unknown branch.'''
217 223 return self._branchtip(self[branch])[0]
218 224
219 225 def iteropen(self, nodes):
220 226 return (n for n in nodes if n not in self._closednodes)
221 227
222 228 def branchheads(self, branch, closed=False):
223 229 heads = self[branch]
224 230 if not closed:
225 231 heads = list(self.iteropen(heads))
226 232 return heads
227 233
228 234 def iterbranches(self):
229 235 for bn, heads in self.iteritems():
230 236 yield (bn, heads) + self._branchtip(heads)
231 237
232 238 def copy(self):
233 239 """return an deep copy of the branchcache object"""
234 240 return branchcache(self, self.tipnode, self.tiprev, self.filteredhash,
235 241 self._closednodes)
236 242
237 243 def write(self, repo):
238 244 try:
239 245 f = repo.cachevfs(_filename(repo), "w", atomictemp=True)
240 246 cachekey = [hex(self.tipnode), '%d' % self.tiprev]
241 247 if self.filteredhash is not None:
242 248 cachekey.append(hex(self.filteredhash))
243 249 f.write(" ".join(cachekey) + '\n')
244 250 nodecount = 0
245 251 for label, nodes in sorted(self.iteritems()):
246 252 for node in nodes:
247 253 nodecount += 1
248 254 if node in self._closednodes:
249 255 state = 'c'
250 256 else:
251 257 state = 'o'
252 258 f.write("%s %s %s\n" % (hex(node), state,
253 259 encoding.fromlocal(label)))
254 260 f.close()
255 261 repo.ui.log('branchcache',
256 262 'wrote %s branch cache with %d labels and %d nodes\n',
257 263 repo.filtername, len(self), nodecount)
258 264 except (IOError, OSError, error.Abort) as inst:
259 265 # Abort may be raised by read only opener, so log and continue
260 266 repo.ui.debug("couldn't write branch cache: %s\n" %
261 267 stringutil.forcebytestr(inst))
262 268
263 269 def update(self, repo, revgen):
264 270 """Given a branchhead cache, self, that may have extra nodes or be
265 271 missing heads, and a generator of nodes that are strictly a superset of
266 272 heads missing, this function updates self to be correct.
267 273 """
268 274 starttime = util.timer()
269 275 cl = repo.changelog
270 276 # collect new branch entries
271 277 newbranches = {}
272 278 getbranchinfo = repo.revbranchcache().branchinfo
273 279 for r in revgen:
274 280 branch, closesbranch = getbranchinfo(r)
275 281 newbranches.setdefault(branch, []).append(r)
276 282 if closesbranch:
277 283 self._closednodes.add(cl.node(r))
278 284
279 285 # fetch current topological heads to speed up filtering
280 286 topoheads = set(cl.headrevs())
281 287
282 288 # if older branchheads are reachable from new ones, they aren't
283 289 # really branchheads. Note checking parents is insufficient:
284 290 # 1 (branch a) -> 2 (branch b) -> 3 (branch a)
285 291 for branch, newheadrevs in newbranches.iteritems():
286 292 bheads = self.setdefault(branch, [])
287 293 bheadset = set(cl.rev(node) for node in bheads)
288 294
289 295 # This have been tested True on all internal usage of this function.
290 296 # run it again in case of doubt
291 297 # assert not (set(bheadrevs) & set(newheadrevs))
292 298 newheadrevs.sort()
293 299 bheadset.update(newheadrevs)
294 300
295 301 # This prunes out two kinds of heads - heads that are superseded by
296 302 # a head in newheadrevs, and newheadrevs that are not heads because
297 303 # an existing head is their descendant.
298 304 uncertain = bheadset - topoheads
299 305 if uncertain:
300 306 floorrev = min(uncertain)
301 307 ancestors = set(cl.ancestors(newheadrevs, floorrev))
302 308 bheadset -= ancestors
303 309 bheadrevs = sorted(bheadset)
304 310 self[branch] = [cl.node(rev) for rev in bheadrevs]
305 311 tiprev = bheadrevs[-1]
306 312 if tiprev > self.tiprev:
307 313 self.tipnode = cl.node(tiprev)
308 314 self.tiprev = tiprev
309 315
310 316 if not self.validfor(repo):
311 317 # cache key are not valid anymore
312 318 self.tipnode = nullid
313 319 self.tiprev = nullrev
314 320 for heads in self.values():
315 321 tiprev = max(cl.rev(node) for node in heads)
316 322 if tiprev > self.tiprev:
317 323 self.tipnode = cl.node(tiprev)
318 324 self.tiprev = tiprev
319 325 self.filteredhash = scmutil.filteredhash(repo, self.tiprev)
320 326
321 327 duration = util.timer() - starttime
322 328 repo.ui.log('branchcache', 'updated %s branch cache in %.4f seconds\n',
323 329 repo.filtername, duration)
324 330
325 331 # Revision branch info cache
326 332
327 333 _rbcversion = '-v1'
328 334 _rbcnames = 'rbc-names' + _rbcversion
329 335 _rbcrevs = 'rbc-revs' + _rbcversion
330 336 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
331 337 _rbcrecfmt = '>4sI'
332 338 _rbcrecsize = calcsize(_rbcrecfmt)
333 339 _rbcnodelen = 4
334 340 _rbcbranchidxmask = 0x7fffffff
335 341 _rbccloseflag = 0x80000000
336 342
337 343 class revbranchcache(object):
338 344 """Persistent cache, mapping from revision number to branch name and close.
339 345 This is a low level cache, independent of filtering.
340 346
341 347 Branch names are stored in rbc-names in internal encoding separated by 0.
342 348 rbc-names is append-only, and each branch name is only stored once and will
343 349 thus have a unique index.
344 350
345 351 The branch info for each revision is stored in rbc-revs as constant size
346 352 records. The whole file is read into memory, but it is only 'parsed' on
347 353 demand. The file is usually append-only but will be truncated if repo
348 354 modification is detected.
349 355 The record for each revision contains the first 4 bytes of the
350 356 corresponding node hash, and the record is only used if it still matches.
351 357 Even a completely trashed rbc-revs fill thus still give the right result
352 358 while converging towards full recovery ... assuming no incorrectly matching
353 359 node hashes.
354 360 The record also contains 4 bytes where 31 bits contains the index of the
355 361 branch and the last bit indicate that it is a branch close commit.
356 362 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
357 363 and will grow with it but be 1/8th of its size.
358 364 """
359 365
360 366 def __init__(self, repo, readonly=True):
361 367 assert repo.filtername is None
362 368 self._repo = repo
363 369 self._names = [] # branch names in local encoding with static index
364 370 self._rbcrevs = bytearray()
365 371 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
366 372 try:
367 373 bndata = repo.cachevfs.read(_rbcnames)
368 374 self._rbcsnameslen = len(bndata) # for verification before writing
369 375 if bndata:
370 376 self._names = [encoding.tolocal(bn)
371 377 for bn in bndata.split('\0')]
372 378 except (IOError, OSError):
373 379 if readonly:
374 380 # don't try to use cache - fall back to the slow path
375 381 self.branchinfo = self._branchinfo
376 382
377 383 if self._names:
378 384 try:
379 385 data = repo.cachevfs.read(_rbcrevs)
380 386 self._rbcrevs[:] = data
381 387 except (IOError, OSError) as inst:
382 388 repo.ui.debug("couldn't read revision branch cache: %s\n" %
383 389 stringutil.forcebytestr(inst))
384 390 # remember number of good records on disk
385 391 self._rbcrevslen = min(len(self._rbcrevs) // _rbcrecsize,
386 392 len(repo.changelog))
387 393 if self._rbcrevslen == 0:
388 394 self._names = []
389 395 self._rbcnamescount = len(self._names) # number of names read at
390 396 # _rbcsnameslen
391 397 self._namesreverse = dict((b, r) for r, b in enumerate(self._names))
392 398
393 399 def _clear(self):
394 400 self._rbcsnameslen = 0
395 401 del self._names[:]
396 402 self._rbcnamescount = 0
397 403 self._namesreverse.clear()
398 404 self._rbcrevslen = len(self._repo.changelog)
399 405 self._rbcrevs = bytearray(self._rbcrevslen * _rbcrecsize)
400 406
401 407 def branchinfo(self, rev):
402 408 """Return branch name and close flag for rev, using and updating
403 409 persistent cache."""
404 410 changelog = self._repo.changelog
405 411 rbcrevidx = rev * _rbcrecsize
406 412
407 413 # avoid negative index, changelog.read(nullrev) is fast without cache
408 414 if rev == nullrev:
409 415 return changelog.branchinfo(rev)
410 416
411 417 # if requested rev isn't allocated, grow and cache the rev info
412 418 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
413 419 return self._branchinfo(rev)
414 420
415 421 # fast path: extract data from cache, use it if node is matching
416 422 reponode = changelog.node(rev)[:_rbcnodelen]
417 423 cachenode, branchidx = unpack_from(
418 424 _rbcrecfmt, util.buffer(self._rbcrevs), rbcrevidx)
419 425 close = bool(branchidx & _rbccloseflag)
420 426 if close:
421 427 branchidx &= _rbcbranchidxmask
422 428 if cachenode == '\0\0\0\0':
423 429 pass
424 430 elif cachenode == reponode:
425 431 try:
426 432 return self._names[branchidx], close
427 433 except IndexError:
428 434 # recover from invalid reference to unknown branch
429 435 self._repo.ui.debug("referenced branch names not found"
430 436 " - rebuilding revision branch cache from scratch\n")
431 437 self._clear()
432 438 else:
433 439 # rev/node map has changed, invalidate the cache from here up
434 440 self._repo.ui.debug("history modification detected - truncating "
435 441 "revision branch cache to revision %d\n" % rev)
436 442 truncate = rbcrevidx + _rbcrecsize
437 443 del self._rbcrevs[truncate:]
438 444 self._rbcrevslen = min(self._rbcrevslen, truncate)
439 445
440 446 # fall back to slow path and make sure it will be written to disk
441 447 return self._branchinfo(rev)
442 448
443 449 def _branchinfo(self, rev):
444 450 """Retrieve branch info from changelog and update _rbcrevs"""
445 451 changelog = self._repo.changelog
446 452 b, close = changelog.branchinfo(rev)
447 453 if b in self._namesreverse:
448 454 branchidx = self._namesreverse[b]
449 455 else:
450 456 branchidx = len(self._names)
451 457 self._names.append(b)
452 458 self._namesreverse[b] = branchidx
453 459 reponode = changelog.node(rev)
454 460 if close:
455 461 branchidx |= _rbccloseflag
456 462 self._setcachedata(rev, reponode, branchidx)
457 463 return b, close
458 464
459 465 def setdata(self, branch, rev, node, close):
460 466 """add new data information to the cache"""
461 467 if branch in self._namesreverse:
462 468 branchidx = self._namesreverse[branch]
463 469 else:
464 470 branchidx = len(self._names)
465 471 self._names.append(branch)
466 472 self._namesreverse[branch] = branchidx
467 473 if close:
468 474 branchidx |= _rbccloseflag
469 475 self._setcachedata(rev, node, branchidx)
470 476 # If no cache data were readable (non exists, bad permission, etc)
471 477 # the cache was bypassing itself by setting:
472 478 #
473 479 # self.branchinfo = self._branchinfo
474 480 #
475 481 # Since we now have data in the cache, we need to drop this bypassing.
476 482 if 'branchinfo' in vars(self):
477 483 del self.branchinfo
478 484
479 485 def _setcachedata(self, rev, node, branchidx):
480 486 """Writes the node's branch data to the in-memory cache data."""
481 487 if rev == nullrev:
482 488 return
483 489 rbcrevidx = rev * _rbcrecsize
484 490 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
485 491 self._rbcrevs.extend('\0' *
486 492 (len(self._repo.changelog) * _rbcrecsize -
487 493 len(self._rbcrevs)))
488 494 pack_into(_rbcrecfmt, self._rbcrevs, rbcrevidx, node, branchidx)
489 495 self._rbcrevslen = min(self._rbcrevslen, rev)
490 496
491 497 tr = self._repo.currenttransaction()
492 498 if tr:
493 499 tr.addfinalize('write-revbranchcache', self.write)
494 500
495 501 def write(self, tr=None):
496 502 """Save branch cache if it is dirty."""
497 503 repo = self._repo
498 504 wlock = None
499 505 step = ''
500 506 try:
501 507 if self._rbcnamescount < len(self._names):
502 508 step = ' names'
503 509 wlock = repo.wlock(wait=False)
504 510 if self._rbcnamescount != 0:
505 511 f = repo.cachevfs.open(_rbcnames, 'ab')
506 512 if f.tell() == self._rbcsnameslen:
507 513 f.write('\0')
508 514 else:
509 515 f.close()
510 516 repo.ui.debug("%s changed - rewriting it\n" % _rbcnames)
511 517 self._rbcnamescount = 0
512 518 self._rbcrevslen = 0
513 519 if self._rbcnamescount == 0:
514 520 # before rewriting names, make sure references are removed
515 521 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
516 522 f = repo.cachevfs.open(_rbcnames, 'wb')
517 523 f.write('\0'.join(encoding.fromlocal(b)
518 524 for b in self._names[self._rbcnamescount:]))
519 525 self._rbcsnameslen = f.tell()
520 526 f.close()
521 527 self._rbcnamescount = len(self._names)
522 528
523 529 start = self._rbcrevslen * _rbcrecsize
524 530 if start != len(self._rbcrevs):
525 531 step = ''
526 532 if wlock is None:
527 533 wlock = repo.wlock(wait=False)
528 534 revs = min(len(repo.changelog),
529 535 len(self._rbcrevs) // _rbcrecsize)
530 536 f = repo.cachevfs.open(_rbcrevs, 'ab')
531 537 if f.tell() != start:
532 538 repo.ui.debug("truncating cache/%s to %d\n"
533 539 % (_rbcrevs, start))
534 540 f.seek(start)
535 541 if f.tell() != start:
536 542 start = 0
537 543 f.seek(start)
538 544 f.truncate()
539 545 end = revs * _rbcrecsize
540 546 f.write(self._rbcrevs[start:end])
541 547 f.close()
542 548 self._rbcrevslen = revs
543 549 except (IOError, OSError, error.Abort, error.LockError) as inst:
544 550 repo.ui.debug("couldn't write revision branch cache%s: %s\n"
545 551 % (step, stringutil.forcebytestr(inst)))
546 552 finally:
547 553 if wlock is not None:
548 554 wlock.release()
General Comments 0
You need to be logged in to leave comments. Login now