##// END OF EJS Templates
revbranchcache: add a public function to update the data...
Boris Feld -
r36980:95f4f1bf default
parent child Browse files
Show More
@@ -1,526 +1,546 b''
1 1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import struct
11 11
12 12 from .node import (
13 13 bin,
14 14 hex,
15 15 nullid,
16 16 nullrev,
17 17 )
18 18 from . import (
19 19 encoding,
20 20 error,
21 21 pycompat,
22 22 scmutil,
23 23 util,
24 24 )
25 25
26 26 calcsize = struct.calcsize
27 27 pack_into = struct.pack_into
28 28 unpack_from = struct.unpack_from
29 29
30 30 def _filename(repo):
31 31 """name of a branchcache file for a given repo or repoview"""
32 32 filename = "branch2"
33 33 if repo.filtername:
34 34 filename = '%s-%s' % (filename, repo.filtername)
35 35 return filename
36 36
37 37 def read(repo):
38 38 try:
39 39 f = repo.cachevfs(_filename(repo))
40 40 lines = f.read().split('\n')
41 41 f.close()
42 42 except (IOError, OSError):
43 43 return None
44 44
45 45 try:
46 46 cachekey = lines.pop(0).split(" ", 2)
47 47 last, lrev = cachekey[:2]
48 48 last, lrev = bin(last), int(lrev)
49 49 filteredhash = None
50 50 if len(cachekey) > 2:
51 51 filteredhash = bin(cachekey[2])
52 52 partial = branchcache(tipnode=last, tiprev=lrev,
53 53 filteredhash=filteredhash)
54 54 if not partial.validfor(repo):
55 55 # invalidate the cache
56 56 raise ValueError(r'tip differs')
57 57 cl = repo.changelog
58 58 for l in lines:
59 59 if not l:
60 60 continue
61 61 node, state, label = l.split(" ", 2)
62 62 if state not in 'oc':
63 63 raise ValueError(r'invalid branch state')
64 64 label = encoding.tolocal(label.strip())
65 65 node = bin(node)
66 66 if not cl.hasnode(node):
67 67 raise ValueError(
68 68 r'node %s does not exist' % pycompat.sysstr(hex(node)))
69 69 partial.setdefault(label, []).append(node)
70 70 if state == 'c':
71 71 partial._closednodes.add(node)
72 72 except Exception as inst:
73 73 if repo.ui.debugflag:
74 74 msg = 'invalid branchheads cache'
75 75 if repo.filtername is not None:
76 76 msg += ' (%s)' % repo.filtername
77 77 msg += ': %s\n'
78 78 repo.ui.debug(msg % pycompat.bytestr(inst))
79 79 partial = None
80 80 return partial
81 81
82 82 ### Nearest subset relation
83 83 # Nearest subset of filter X is a filter Y so that:
84 84 # * Y is included in X,
85 85 # * X - Y is as small as possible.
86 86 # This create and ordering used for branchmap purpose.
87 87 # the ordering may be partial
88 88 subsettable = {None: 'visible',
89 89 'visible-hidden': 'visible',
90 90 'visible': 'served',
91 91 'served': 'immutable',
92 92 'immutable': 'base'}
93 93
94 94 def updatecache(repo):
95 95 cl = repo.changelog
96 96 filtername = repo.filtername
97 97 partial = repo._branchcaches.get(filtername)
98 98
99 99 revs = []
100 100 if partial is None or not partial.validfor(repo):
101 101 partial = read(repo)
102 102 if partial is None:
103 103 subsetname = subsettable.get(filtername)
104 104 if subsetname is None:
105 105 partial = branchcache()
106 106 else:
107 107 subset = repo.filtered(subsetname)
108 108 partial = subset.branchmap().copy()
109 109 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
110 110 revs.extend(r for r in extrarevs if r <= partial.tiprev)
111 111 revs.extend(cl.revs(start=partial.tiprev + 1))
112 112 if revs:
113 113 partial.update(repo, revs)
114 114 partial.write(repo)
115 115
116 116 assert partial.validfor(repo), filtername
117 117 repo._branchcaches[repo.filtername] = partial
118 118
119 119 def replacecache(repo, bm):
120 120 """Replace the branchmap cache for a repo with a branch mapping.
121 121
122 122 This is likely only called during clone with a branch map from a remote.
123 123 """
124 124 rbheads = []
125 125 closed = []
126 126 for bheads in bm.itervalues():
127 127 rbheads.extend(bheads)
128 128 for h in bheads:
129 129 r = repo.changelog.rev(h)
130 130 b, c = repo.changelog.branchinfo(r)
131 131 if c:
132 132 closed.append(h)
133 133
134 134 if rbheads:
135 135 rtiprev = max((int(repo.changelog.rev(node))
136 136 for node in rbheads))
137 137 cache = branchcache(bm,
138 138 repo[rtiprev].node(),
139 139 rtiprev,
140 140 closednodes=closed)
141 141
142 142 # Try to stick it as low as possible
143 143 # filter above served are unlikely to be fetch from a clone
144 144 for candidate in ('base', 'immutable', 'served'):
145 145 rview = repo.filtered(candidate)
146 146 if cache.validfor(rview):
147 147 repo._branchcaches[candidate] = cache
148 148 cache.write(rview)
149 149 break
150 150
151 151 class branchcache(dict):
152 152 """A dict like object that hold branches heads cache.
153 153
154 154 This cache is used to avoid costly computations to determine all the
155 155 branch heads of a repo.
156 156
157 157 The cache is serialized on disk in the following format:
158 158
159 159 <tip hex node> <tip rev number> [optional filtered repo hex hash]
160 160 <branch head hex node> <open/closed state> <branch name>
161 161 <branch head hex node> <open/closed state> <branch name>
162 162 ...
163 163
164 164 The first line is used to check if the cache is still valid. If the
165 165 branch cache is for a filtered repo view, an optional third hash is
166 166 included that hashes the hashes of all filtered revisions.
167 167
168 168 The open/closed state is represented by a single letter 'o' or 'c'.
169 169 This field can be used to avoid changelog reads when determining if a
170 170 branch head closes a branch or not.
171 171 """
172 172
173 173 def __init__(self, entries=(), tipnode=nullid, tiprev=nullrev,
174 174 filteredhash=None, closednodes=None):
175 175 super(branchcache, self).__init__(entries)
176 176 self.tipnode = tipnode
177 177 self.tiprev = tiprev
178 178 self.filteredhash = filteredhash
179 179 # closednodes is a set of nodes that close their branch. If the branch
180 180 # cache has been updated, it may contain nodes that are no longer
181 181 # heads.
182 182 if closednodes is None:
183 183 self._closednodes = set()
184 184 else:
185 185 self._closednodes = closednodes
186 186
187 187 def validfor(self, repo):
188 188 """Is the cache content valid regarding a repo
189 189
190 190 - False when cached tipnode is unknown or if we detect a strip.
191 191 - True when cache is up to date or a subset of current repo."""
192 192 try:
193 193 return ((self.tipnode == repo.changelog.node(self.tiprev))
194 194 and (self.filteredhash == \
195 195 scmutil.filteredhash(repo, self.tiprev)))
196 196 except IndexError:
197 197 return False
198 198
199 199 def _branchtip(self, heads):
200 200 '''Return tuple with last open head in heads and false,
201 201 otherwise return last closed head and true.'''
202 202 tip = heads[-1]
203 203 closed = True
204 204 for h in reversed(heads):
205 205 if h not in self._closednodes:
206 206 tip = h
207 207 closed = False
208 208 break
209 209 return tip, closed
210 210
211 211 def branchtip(self, branch):
212 212 '''Return the tipmost open head on branch head, otherwise return the
213 213 tipmost closed head on branch.
214 214 Raise KeyError for unknown branch.'''
215 215 return self._branchtip(self[branch])[0]
216 216
217 217 def iteropen(self, nodes):
218 218 return (n for n in nodes if n not in self._closednodes)
219 219
220 220 def branchheads(self, branch, closed=False):
221 221 heads = self[branch]
222 222 if not closed:
223 223 heads = list(self.iteropen(heads))
224 224 return heads
225 225
226 226 def iterbranches(self):
227 227 for bn, heads in self.iteritems():
228 228 yield (bn, heads) + self._branchtip(heads)
229 229
230 230 def copy(self):
231 231 """return an deep copy of the branchcache object"""
232 232 return branchcache(self, self.tipnode, self.tiprev, self.filteredhash,
233 233 self._closednodes)
234 234
235 235 def write(self, repo):
236 236 try:
237 237 f = repo.cachevfs(_filename(repo), "w", atomictemp=True)
238 238 cachekey = [hex(self.tipnode), '%d' % self.tiprev]
239 239 if self.filteredhash is not None:
240 240 cachekey.append(hex(self.filteredhash))
241 241 f.write(" ".join(cachekey) + '\n')
242 242 nodecount = 0
243 243 for label, nodes in sorted(self.iteritems()):
244 244 for node in nodes:
245 245 nodecount += 1
246 246 if node in self._closednodes:
247 247 state = 'c'
248 248 else:
249 249 state = 'o'
250 250 f.write("%s %s %s\n" % (hex(node), state,
251 251 encoding.fromlocal(label)))
252 252 f.close()
253 253 repo.ui.log('branchcache',
254 254 'wrote %s branch cache with %d labels and %d nodes\n',
255 255 repo.filtername, len(self), nodecount)
256 256 except (IOError, OSError, error.Abort) as inst:
257 257 # Abort may be raised by read only opener, so log and continue
258 258 repo.ui.debug("couldn't write branch cache: %s\n" %
259 259 util.forcebytestr(inst))
260 260
261 261 def update(self, repo, revgen):
262 262 """Given a branchhead cache, self, that may have extra nodes or be
263 263 missing heads, and a generator of nodes that are strictly a superset of
264 264 heads missing, this function updates self to be correct.
265 265 """
266 266 starttime = util.timer()
267 267 cl = repo.changelog
268 268 # collect new branch entries
269 269 newbranches = {}
270 270 getbranchinfo = repo.revbranchcache().branchinfo
271 271 for r in revgen:
272 272 branch, closesbranch = getbranchinfo(r)
273 273 newbranches.setdefault(branch, []).append(r)
274 274 if closesbranch:
275 275 self._closednodes.add(cl.node(r))
276 276
277 277 # fetch current topological heads to speed up filtering
278 278 topoheads = set(cl.headrevs())
279 279
280 280 # if older branchheads are reachable from new ones, they aren't
281 281 # really branchheads. Note checking parents is insufficient:
282 282 # 1 (branch a) -> 2 (branch b) -> 3 (branch a)
283 283 for branch, newheadrevs in newbranches.iteritems():
284 284 bheads = self.setdefault(branch, [])
285 285 bheadset = set(cl.rev(node) for node in bheads)
286 286
287 287 # This have been tested True on all internal usage of this function.
288 288 # run it again in case of doubt
289 289 # assert not (set(bheadrevs) & set(newheadrevs))
290 290 newheadrevs.sort()
291 291 bheadset.update(newheadrevs)
292 292
293 293 # This prunes out two kinds of heads - heads that are superseded by
294 294 # a head in newheadrevs, and newheadrevs that are not heads because
295 295 # an existing head is their descendant.
296 296 uncertain = bheadset - topoheads
297 297 if uncertain:
298 298 floorrev = min(uncertain)
299 299 ancestors = set(cl.ancestors(newheadrevs, floorrev))
300 300 bheadset -= ancestors
301 301 bheadrevs = sorted(bheadset)
302 302 self[branch] = [cl.node(rev) for rev in bheadrevs]
303 303 tiprev = bheadrevs[-1]
304 304 if tiprev > self.tiprev:
305 305 self.tipnode = cl.node(tiprev)
306 306 self.tiprev = tiprev
307 307
308 308 if not self.validfor(repo):
309 309 # cache key are not valid anymore
310 310 self.tipnode = nullid
311 311 self.tiprev = nullrev
312 312 for heads in self.values():
313 313 tiprev = max(cl.rev(node) for node in heads)
314 314 if tiprev > self.tiprev:
315 315 self.tipnode = cl.node(tiprev)
316 316 self.tiprev = tiprev
317 317 self.filteredhash = scmutil.filteredhash(repo, self.tiprev)
318 318
319 319 duration = util.timer() - starttime
320 320 repo.ui.log('branchcache', 'updated %s branch cache in %.4f seconds\n',
321 321 repo.filtername, duration)
322 322
323 323 # Revision branch info cache
324 324
325 325 _rbcversion = '-v1'
326 326 _rbcnames = 'rbc-names' + _rbcversion
327 327 _rbcrevs = 'rbc-revs' + _rbcversion
328 328 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
329 329 _rbcrecfmt = '>4sI'
330 330 _rbcrecsize = calcsize(_rbcrecfmt)
331 331 _rbcnodelen = 4
332 332 _rbcbranchidxmask = 0x7fffffff
333 333 _rbccloseflag = 0x80000000
334 334
335 335 class revbranchcache(object):
336 336 """Persistent cache, mapping from revision number to branch name and close.
337 337 This is a low level cache, independent of filtering.
338 338
339 339 Branch names are stored in rbc-names in internal encoding separated by 0.
340 340 rbc-names is append-only, and each branch name is only stored once and will
341 341 thus have a unique index.
342 342
343 343 The branch info for each revision is stored in rbc-revs as constant size
344 344 records. The whole file is read into memory, but it is only 'parsed' on
345 345 demand. The file is usually append-only but will be truncated if repo
346 346 modification is detected.
347 347 The record for each revision contains the first 4 bytes of the
348 348 corresponding node hash, and the record is only used if it still matches.
349 349 Even a completely trashed rbc-revs fill thus still give the right result
350 350 while converging towards full recovery ... assuming no incorrectly matching
351 351 node hashes.
352 352 The record also contains 4 bytes where 31 bits contains the index of the
353 353 branch and the last bit indicate that it is a branch close commit.
354 354 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
355 355 and will grow with it but be 1/8th of its size.
356 356 """
357 357
358 358 def __init__(self, repo, readonly=True):
359 359 assert repo.filtername is None
360 360 self._repo = repo
361 361 self._names = [] # branch names in local encoding with static index
362 362 self._rbcrevs = bytearray()
363 363 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
364 364 try:
365 365 bndata = repo.cachevfs.read(_rbcnames)
366 366 self._rbcsnameslen = len(bndata) # for verification before writing
367 367 if bndata:
368 368 self._names = [encoding.tolocal(bn)
369 369 for bn in bndata.split('\0')]
370 370 except (IOError, OSError):
371 371 if readonly:
372 372 # don't try to use cache - fall back to the slow path
373 373 self.branchinfo = self._branchinfo
374 374
375 375 if self._names:
376 376 try:
377 377 data = repo.cachevfs.read(_rbcrevs)
378 378 self._rbcrevs[:] = data
379 379 except (IOError, OSError) as inst:
380 380 repo.ui.debug("couldn't read revision branch cache: %s\n" %
381 381 util.forcebytestr(inst))
382 382 # remember number of good records on disk
383 383 self._rbcrevslen = min(len(self._rbcrevs) // _rbcrecsize,
384 384 len(repo.changelog))
385 385 if self._rbcrevslen == 0:
386 386 self._names = []
387 387 self._rbcnamescount = len(self._names) # number of names read at
388 388 # _rbcsnameslen
389 389 self._namesreverse = dict((b, r) for r, b in enumerate(self._names))
390 390
391 391 def _clear(self):
392 392 self._rbcsnameslen = 0
393 393 del self._names[:]
394 394 self._rbcnamescount = 0
395 395 self._namesreverse.clear()
396 396 self._rbcrevslen = len(self._repo.changelog)
397 397 self._rbcrevs = bytearray(self._rbcrevslen * _rbcrecsize)
398 398
399 399 def branchinfo(self, rev):
400 400 """Return branch name and close flag for rev, using and updating
401 401 persistent cache."""
402 402 changelog = self._repo.changelog
403 403 rbcrevidx = rev * _rbcrecsize
404 404
405 405 # avoid negative index, changelog.read(nullrev) is fast without cache
406 406 if rev == nullrev:
407 407 return changelog.branchinfo(rev)
408 408
409 409 # if requested rev isn't allocated, grow and cache the rev info
410 410 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
411 411 return self._branchinfo(rev)
412 412
413 413 # fast path: extract data from cache, use it if node is matching
414 414 reponode = changelog.node(rev)[:_rbcnodelen]
415 415 cachenode, branchidx = unpack_from(
416 416 _rbcrecfmt, util.buffer(self._rbcrevs), rbcrevidx)
417 417 close = bool(branchidx & _rbccloseflag)
418 418 if close:
419 419 branchidx &= _rbcbranchidxmask
420 420 if cachenode == '\0\0\0\0':
421 421 pass
422 422 elif cachenode == reponode:
423 423 try:
424 424 return self._names[branchidx], close
425 425 except IndexError:
426 426 # recover from invalid reference to unknown branch
427 427 self._repo.ui.debug("referenced branch names not found"
428 428 " - rebuilding revision branch cache from scratch\n")
429 429 self._clear()
430 430 else:
431 431 # rev/node map has changed, invalidate the cache from here up
432 432 self._repo.ui.debug("history modification detected - truncating "
433 433 "revision branch cache to revision %d\n" % rev)
434 434 truncate = rbcrevidx + _rbcrecsize
435 435 del self._rbcrevs[truncate:]
436 436 self._rbcrevslen = min(self._rbcrevslen, truncate)
437 437
438 438 # fall back to slow path and make sure it will be written to disk
439 439 return self._branchinfo(rev)
440 440
441 441 def _branchinfo(self, rev):
442 442 """Retrieve branch info from changelog and update _rbcrevs"""
443 443 changelog = self._repo.changelog
444 444 b, close = changelog.branchinfo(rev)
445 445 if b in self._namesreverse:
446 446 branchidx = self._namesreverse[b]
447 447 else:
448 448 branchidx = len(self._names)
449 449 self._names.append(b)
450 450 self._namesreverse[b] = branchidx
451 451 reponode = changelog.node(rev)
452 452 if close:
453 453 branchidx |= _rbccloseflag
454 454 self._setcachedata(rev, reponode, branchidx)
455 455 return b, close
456 456
457 def setdata(self, branch, rev, node, close):
458 """add new data information to the cache"""
459 if branch in self._namesreverse:
460 branchidx = self._namesreverse[branch]
461 else:
462 branchidx = len(self._names)
463 self._names.append(branch)
464 self._namesreverse[branch] = branchidx
465 if close:
466 branchidx |= _rbccloseflag
467 self._setcachedata(rev, node, branchidx)
468 # If no cache data were readable (non exists, bad permission, etc)
469 # the cache was bypassing itself by setting:
470 #
471 # self.branchinfo = self._branchinfo
472 #
473 # Since we now have data in the cache, we need to drop this bypassing.
474 if 'branchinfo' in vars(self):
475 del self.branchinfo
476
457 477 def _setcachedata(self, rev, node, branchidx):
458 478 """Writes the node's branch data to the in-memory cache data."""
459 479 if rev == nullrev:
460 480 return
461 481 rbcrevidx = rev * _rbcrecsize
462 482 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
463 483 self._rbcrevs.extend('\0' *
464 484 (len(self._repo.changelog) * _rbcrecsize -
465 485 len(self._rbcrevs)))
466 486 pack_into(_rbcrecfmt, self._rbcrevs, rbcrevidx, node, branchidx)
467 487 self._rbcrevslen = min(self._rbcrevslen, rev)
468 488
469 489 tr = self._repo.currenttransaction()
470 490 if tr:
471 491 tr.addfinalize('write-revbranchcache', self.write)
472 492
473 493 def write(self, tr=None):
474 494 """Save branch cache if it is dirty."""
475 495 repo = self._repo
476 496 wlock = None
477 497 step = ''
478 498 try:
479 499 if self._rbcnamescount < len(self._names):
480 500 step = ' names'
481 501 wlock = repo.wlock(wait=False)
482 502 if self._rbcnamescount != 0:
483 503 f = repo.cachevfs.open(_rbcnames, 'ab')
484 504 if f.tell() == self._rbcsnameslen:
485 505 f.write('\0')
486 506 else:
487 507 f.close()
488 508 repo.ui.debug("%s changed - rewriting it\n" % _rbcnames)
489 509 self._rbcnamescount = 0
490 510 self._rbcrevslen = 0
491 511 if self._rbcnamescount == 0:
492 512 # before rewriting names, make sure references are removed
493 513 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
494 514 f = repo.cachevfs.open(_rbcnames, 'wb')
495 515 f.write('\0'.join(encoding.fromlocal(b)
496 516 for b in self._names[self._rbcnamescount:]))
497 517 self._rbcsnameslen = f.tell()
498 518 f.close()
499 519 self._rbcnamescount = len(self._names)
500 520
501 521 start = self._rbcrevslen * _rbcrecsize
502 522 if start != len(self._rbcrevs):
503 523 step = ''
504 524 if wlock is None:
505 525 wlock = repo.wlock(wait=False)
506 526 revs = min(len(repo.changelog),
507 527 len(self._rbcrevs) // _rbcrecsize)
508 528 f = repo.cachevfs.open(_rbcrevs, 'ab')
509 529 if f.tell() != start:
510 530 repo.ui.debug("truncating cache/%s to %d\n"
511 531 % (_rbcrevs, start))
512 532 f.seek(start)
513 533 if f.tell() != start:
514 534 start = 0
515 535 f.seek(start)
516 536 f.truncate()
517 537 end = revs * _rbcrecsize
518 538 f.write(self._rbcrevs[start:end])
519 539 f.close()
520 540 self._rbcrevslen = revs
521 541 except (IOError, OSError, error.Abort, error.LockError) as inst:
522 542 repo.ui.debug("couldn't write revision branch cache%s: %s\n"
523 543 % (step, util.forcebytestr(inst)))
524 544 finally:
525 545 if wlock is not None:
526 546 wlock.release()
General Comments 0
You need to be logged in to leave comments. Login now