##// END OF EJS Templates
branchmap: load branchmap as an iterable...
Martijn Pieters -
r39151:2a4bfbb5 default
parent child Browse files
Show More
@@ -1,549 +1,547 b''
1 1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import struct
11 11
12 12 from .node import (
13 13 bin,
14 14 hex,
15 15 nullid,
16 16 nullrev,
17 17 )
18 18 from . import (
19 19 encoding,
20 20 error,
21 21 pycompat,
22 22 scmutil,
23 23 util,
24 24 )
25 25 from .utils import (
26 26 stringutil,
27 27 )
28 28
29 29 calcsize = struct.calcsize
30 30 pack_into = struct.pack_into
31 31 unpack_from = struct.unpack_from
32 32
33 33 def _filename(repo):
34 34 """name of a branchcache file for a given repo or repoview"""
35 35 filename = "branch2"
36 36 if repo.filtername:
37 37 filename = '%s-%s' % (filename, repo.filtername)
38 38 return filename
39 39
40 40 def read(repo):
41 41 try:
42 42 f = repo.cachevfs(_filename(repo))
43 lines = f.read().split('\n')
44 f.close()
45 except (IOError, OSError):
46 return None
47
48 try:
49 cachekey = lines.pop(0).split(" ", 2)
43 cachekey = next(f).split(" ", 2)
50 44 last, lrev = cachekey[:2]
51 45 last, lrev = bin(last), int(lrev)
52 46 filteredhash = None
53 47 if len(cachekey) > 2:
54 48 filteredhash = bin(cachekey[2])
55 49 partial = branchcache(tipnode=last, tiprev=lrev,
56 50 filteredhash=filteredhash)
57 51 if not partial.validfor(repo):
58 52 # invalidate the cache
59 53 raise ValueError(r'tip differs')
60 54 cl = repo.changelog
61 for l in lines:
55 for l in f:
62 56 if not l:
63 57 continue
64 58 node, state, label = l.split(" ", 2)
65 59 if state not in 'oc':
66 60 raise ValueError(r'invalid branch state')
67 61 label = encoding.tolocal(label.strip())
68 62 node = bin(node)
69 63 if not cl.hasnode(node):
70 64 raise ValueError(
71 65 r'node %s does not exist' % pycompat.sysstr(hex(node)))
72 66 partial.setdefault(label, []).append(node)
73 67 if state == 'c':
74 68 partial._closednodes.add(node)
69
70 except (IOError, OSError):
71 return None
72
75 73 except Exception as inst:
76 74 if repo.ui.debugflag:
77 75 msg = 'invalid branchheads cache'
78 76 if repo.filtername is not None:
79 77 msg += ' (%s)' % repo.filtername
80 78 msg += ': %s\n'
81 79 repo.ui.debug(msg % pycompat.bytestr(inst))
82 80 partial = None
83 81 return partial
84 82
85 83 ### Nearest subset relation
86 84 # Nearest subset of filter X is a filter Y so that:
87 85 # * Y is included in X,
88 86 # * X - Y is as small as possible.
89 87 # This create and ordering used for branchmap purpose.
90 88 # the ordering may be partial
91 89 subsettable = {None: 'visible',
92 90 'visible-hidden': 'visible',
93 91 'visible': 'served',
94 92 'served': 'immutable',
95 93 'immutable': 'base'}
96 94
97 95 def updatecache(repo):
98 96 cl = repo.changelog
99 97 filtername = repo.filtername
100 98 partial = repo._branchcaches.get(filtername)
101 99
102 100 revs = []
103 101 if partial is None or not partial.validfor(repo):
104 102 partial = read(repo)
105 103 if partial is None:
106 104 subsetname = subsettable.get(filtername)
107 105 if subsetname is None:
108 106 partial = branchcache()
109 107 else:
110 108 subset = repo.filtered(subsetname)
111 109 partial = subset.branchmap().copy()
112 110 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
113 111 revs.extend(r for r in extrarevs if r <= partial.tiprev)
114 112 revs.extend(cl.revs(start=partial.tiprev + 1))
115 113 if revs:
116 114 partial.update(repo, revs)
117 115 partial.write(repo)
118 116
119 117 assert partial.validfor(repo), filtername
120 118 repo._branchcaches[repo.filtername] = partial
121 119
122 120 def replacecache(repo, bm):
123 121 """Replace the branchmap cache for a repo with a branch mapping.
124 122
125 123 This is likely only called during clone with a branch map from a remote.
126 124 """
127 125 rbheads = []
128 126 closed = []
129 127 for bheads in bm.itervalues():
130 128 rbheads.extend(bheads)
131 129 for h in bheads:
132 130 r = repo.changelog.rev(h)
133 131 b, c = repo.changelog.branchinfo(r)
134 132 if c:
135 133 closed.append(h)
136 134
137 135 if rbheads:
138 136 rtiprev = max((int(repo.changelog.rev(node))
139 137 for node in rbheads))
140 138 cache = branchcache(bm,
141 139 repo[rtiprev].node(),
142 140 rtiprev,
143 141 closednodes=closed)
144 142
145 143 # Try to stick it as low as possible
146 144 # filter above served are unlikely to be fetch from a clone
147 145 for candidate in ('base', 'immutable', 'served'):
148 146 rview = repo.filtered(candidate)
149 147 if cache.validfor(rview):
150 148 repo._branchcaches[candidate] = cache
151 149 cache.write(rview)
152 150 break
153 151
154 152 class branchcache(dict):
155 153 """A dict like object that hold branches heads cache.
156 154
157 155 This cache is used to avoid costly computations to determine all the
158 156 branch heads of a repo.
159 157
160 158 The cache is serialized on disk in the following format:
161 159
162 160 <tip hex node> <tip rev number> [optional filtered repo hex hash]
163 161 <branch head hex node> <open/closed state> <branch name>
164 162 <branch head hex node> <open/closed state> <branch name>
165 163 ...
166 164
167 165 The first line is used to check if the cache is still valid. If the
168 166 branch cache is for a filtered repo view, an optional third hash is
169 167 included that hashes the hashes of all filtered revisions.
170 168
171 169 The open/closed state is represented by a single letter 'o' or 'c'.
172 170 This field can be used to avoid changelog reads when determining if a
173 171 branch head closes a branch or not.
174 172 """
175 173
176 174 def __init__(self, entries=(), tipnode=nullid, tiprev=nullrev,
177 175 filteredhash=None, closednodes=None):
178 176 super(branchcache, self).__init__(entries)
179 177 self.tipnode = tipnode
180 178 self.tiprev = tiprev
181 179 self.filteredhash = filteredhash
182 180 # closednodes is a set of nodes that close their branch. If the branch
183 181 # cache has been updated, it may contain nodes that are no longer
184 182 # heads.
185 183 if closednodes is None:
186 184 self._closednodes = set()
187 185 else:
188 186 self._closednodes = closednodes
189 187
190 188 def validfor(self, repo):
191 189 """Is the cache content valid regarding a repo
192 190
193 191 - False when cached tipnode is unknown or if we detect a strip.
194 192 - True when cache is up to date or a subset of current repo."""
195 193 try:
196 194 return ((self.tipnode == repo.changelog.node(self.tiprev))
197 195 and (self.filteredhash == \
198 196 scmutil.filteredhash(repo, self.tiprev)))
199 197 except IndexError:
200 198 return False
201 199
202 200 def _branchtip(self, heads):
203 201 '''Return tuple with last open head in heads and false,
204 202 otherwise return last closed head and true.'''
205 203 tip = heads[-1]
206 204 closed = True
207 205 for h in reversed(heads):
208 206 if h not in self._closednodes:
209 207 tip = h
210 208 closed = False
211 209 break
212 210 return tip, closed
213 211
214 212 def branchtip(self, branch):
215 213 '''Return the tipmost open head on branch head, otherwise return the
216 214 tipmost closed head on branch.
217 215 Raise KeyError for unknown branch.'''
218 216 return self._branchtip(self[branch])[0]
219 217
220 218 def iteropen(self, nodes):
221 219 return (n for n in nodes if n not in self._closednodes)
222 220
223 221 def branchheads(self, branch, closed=False):
224 222 heads = self[branch]
225 223 if not closed:
226 224 heads = list(self.iteropen(heads))
227 225 return heads
228 226
229 227 def iterbranches(self):
230 228 for bn, heads in self.iteritems():
231 229 yield (bn, heads) + self._branchtip(heads)
232 230
233 231 def copy(self):
234 232 """return an deep copy of the branchcache object"""
235 233 return branchcache(self, self.tipnode, self.tiprev, self.filteredhash,
236 234 self._closednodes)
237 235
238 236 def write(self, repo):
239 237 try:
240 238 f = repo.cachevfs(_filename(repo), "w", atomictemp=True)
241 239 cachekey = [hex(self.tipnode), '%d' % self.tiprev]
242 240 if self.filteredhash is not None:
243 241 cachekey.append(hex(self.filteredhash))
244 242 f.write(" ".join(cachekey) + '\n')
245 243 nodecount = 0
246 244 for label, nodes in sorted(self.iteritems()):
247 245 for node in nodes:
248 246 nodecount += 1
249 247 if node in self._closednodes:
250 248 state = 'c'
251 249 else:
252 250 state = 'o'
253 251 f.write("%s %s %s\n" % (hex(node), state,
254 252 encoding.fromlocal(label)))
255 253 f.close()
256 254 repo.ui.log('branchcache',
257 255 'wrote %s branch cache with %d labels and %d nodes\n',
258 256 repo.filtername, len(self), nodecount)
259 257 except (IOError, OSError, error.Abort) as inst:
260 258 # Abort may be raised by read only opener, so log and continue
261 259 repo.ui.debug("couldn't write branch cache: %s\n" %
262 260 stringutil.forcebytestr(inst))
263 261
264 262 def update(self, repo, revgen):
265 263 """Given a branchhead cache, self, that may have extra nodes or be
266 264 missing heads, and a generator of nodes that are strictly a superset of
267 265 heads missing, this function updates self to be correct.
268 266 """
269 267 starttime = util.timer()
270 268 cl = repo.changelog
271 269 # collect new branch entries
272 270 newbranches = {}
273 271 getbranchinfo = repo.revbranchcache().branchinfo
274 272 for r in revgen:
275 273 branch, closesbranch = getbranchinfo(r)
276 274 newbranches.setdefault(branch, []).append(r)
277 275 if closesbranch:
278 276 self._closednodes.add(cl.node(r))
279 277
280 278 # fetch current topological heads to speed up filtering
281 279 topoheads = set(cl.headrevs())
282 280
283 281 # if older branchheads are reachable from new ones, they aren't
284 282 # really branchheads. Note checking parents is insufficient:
285 283 # 1 (branch a) -> 2 (branch b) -> 3 (branch a)
286 284 for branch, newheadrevs in newbranches.iteritems():
287 285 bheads = self.setdefault(branch, [])
288 286 bheadset = set(cl.rev(node) for node in bheads)
289 287
290 288 # This have been tested True on all internal usage of this function.
291 289 # run it again in case of doubt
292 290 # assert not (set(bheadrevs) & set(newheadrevs))
293 291 newheadrevs.sort()
294 292 bheadset.update(newheadrevs)
295 293
296 294 # This prunes out two kinds of heads - heads that are superseded by
297 295 # a head in newheadrevs, and newheadrevs that are not heads because
298 296 # an existing head is their descendant.
299 297 uncertain = bheadset - topoheads
300 298 if uncertain:
301 299 floorrev = min(uncertain)
302 300 ancestors = set(cl.ancestors(newheadrevs, floorrev))
303 301 bheadset -= ancestors
304 302 bheadrevs = sorted(bheadset)
305 303 self[branch] = [cl.node(rev) for rev in bheadrevs]
306 304 tiprev = bheadrevs[-1]
307 305 if tiprev > self.tiprev:
308 306 self.tipnode = cl.node(tiprev)
309 307 self.tiprev = tiprev
310 308
311 309 if not self.validfor(repo):
312 310 # cache key are not valid anymore
313 311 self.tipnode = nullid
314 312 self.tiprev = nullrev
315 313 for heads in self.values():
316 314 tiprev = max(cl.rev(node) for node in heads)
317 315 if tiprev > self.tiprev:
318 316 self.tipnode = cl.node(tiprev)
319 317 self.tiprev = tiprev
320 318 self.filteredhash = scmutil.filteredhash(repo, self.tiprev)
321 319
322 320 duration = util.timer() - starttime
323 321 repo.ui.log('branchcache', 'updated %s branch cache in %.4f seconds\n',
324 322 repo.filtername, duration)
325 323
326 324 # Revision branch info cache
327 325
328 326 _rbcversion = '-v1'
329 327 _rbcnames = 'rbc-names' + _rbcversion
330 328 _rbcrevs = 'rbc-revs' + _rbcversion
331 329 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
332 330 _rbcrecfmt = '>4sI'
333 331 _rbcrecsize = calcsize(_rbcrecfmt)
334 332 _rbcnodelen = 4
335 333 _rbcbranchidxmask = 0x7fffffff
336 334 _rbccloseflag = 0x80000000
337 335
338 336 class revbranchcache(object):
339 337 """Persistent cache, mapping from revision number to branch name and close.
340 338 This is a low level cache, independent of filtering.
341 339
342 340 Branch names are stored in rbc-names in internal encoding separated by 0.
343 341 rbc-names is append-only, and each branch name is only stored once and will
344 342 thus have a unique index.
345 343
346 344 The branch info for each revision is stored in rbc-revs as constant size
347 345 records. The whole file is read into memory, but it is only 'parsed' on
348 346 demand. The file is usually append-only but will be truncated if repo
349 347 modification is detected.
350 348 The record for each revision contains the first 4 bytes of the
351 349 corresponding node hash, and the record is only used if it still matches.
352 350 Even a completely trashed rbc-revs fill thus still give the right result
353 351 while converging towards full recovery ... assuming no incorrectly matching
354 352 node hashes.
355 353 The record also contains 4 bytes where 31 bits contains the index of the
356 354 branch and the last bit indicate that it is a branch close commit.
357 355 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
358 356 and will grow with it but be 1/8th of its size.
359 357 """
360 358
361 359 def __init__(self, repo, readonly=True):
362 360 assert repo.filtername is None
363 361 self._repo = repo
364 362 self._names = [] # branch names in local encoding with static index
365 363 self._rbcrevs = bytearray()
366 364 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
367 365 try:
368 366 bndata = repo.cachevfs.read(_rbcnames)
369 367 self._rbcsnameslen = len(bndata) # for verification before writing
370 368 if bndata:
371 369 self._names = [encoding.tolocal(bn)
372 370 for bn in bndata.split('\0')]
373 371 except (IOError, OSError):
374 372 if readonly:
375 373 # don't try to use cache - fall back to the slow path
376 374 self.branchinfo = self._branchinfo
377 375
378 376 if self._names:
379 377 try:
380 378 data = repo.cachevfs.read(_rbcrevs)
381 379 self._rbcrevs[:] = data
382 380 except (IOError, OSError) as inst:
383 381 repo.ui.debug("couldn't read revision branch cache: %s\n" %
384 382 stringutil.forcebytestr(inst))
385 383 # remember number of good records on disk
386 384 self._rbcrevslen = min(len(self._rbcrevs) // _rbcrecsize,
387 385 len(repo.changelog))
388 386 if self._rbcrevslen == 0:
389 387 self._names = []
390 388 self._rbcnamescount = len(self._names) # number of names read at
391 389 # _rbcsnameslen
392 390 self._namesreverse = dict((b, r) for r, b in enumerate(self._names))
393 391
394 392 def _clear(self):
395 393 self._rbcsnameslen = 0
396 394 del self._names[:]
397 395 self._rbcnamescount = 0
398 396 self._namesreverse.clear()
399 397 self._rbcrevslen = len(self._repo.changelog)
400 398 self._rbcrevs = bytearray(self._rbcrevslen * _rbcrecsize)
401 399
402 400 def branchinfo(self, rev):
403 401 """Return branch name and close flag for rev, using and updating
404 402 persistent cache."""
405 403 changelog = self._repo.changelog
406 404 rbcrevidx = rev * _rbcrecsize
407 405
408 406 # avoid negative index, changelog.read(nullrev) is fast without cache
409 407 if rev == nullrev:
410 408 return changelog.branchinfo(rev)
411 409
412 410 # if requested rev isn't allocated, grow and cache the rev info
413 411 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
414 412 return self._branchinfo(rev)
415 413
416 414 # fast path: extract data from cache, use it if node is matching
417 415 reponode = changelog.node(rev)[:_rbcnodelen]
418 416 cachenode, branchidx = unpack_from(
419 417 _rbcrecfmt, util.buffer(self._rbcrevs), rbcrevidx)
420 418 close = bool(branchidx & _rbccloseflag)
421 419 if close:
422 420 branchidx &= _rbcbranchidxmask
423 421 if cachenode == '\0\0\0\0':
424 422 pass
425 423 elif cachenode == reponode:
426 424 try:
427 425 return self._names[branchidx], close
428 426 except IndexError:
429 427 # recover from invalid reference to unknown branch
430 428 self._repo.ui.debug("referenced branch names not found"
431 429 " - rebuilding revision branch cache from scratch\n")
432 430 self._clear()
433 431 else:
434 432 # rev/node map has changed, invalidate the cache from here up
435 433 self._repo.ui.debug("history modification detected - truncating "
436 434 "revision branch cache to revision %d\n" % rev)
437 435 truncate = rbcrevidx + _rbcrecsize
438 436 del self._rbcrevs[truncate:]
439 437 self._rbcrevslen = min(self._rbcrevslen, truncate)
440 438
441 439 # fall back to slow path and make sure it will be written to disk
442 440 return self._branchinfo(rev)
443 441
444 442 def _branchinfo(self, rev):
445 443 """Retrieve branch info from changelog and update _rbcrevs"""
446 444 changelog = self._repo.changelog
447 445 b, close = changelog.branchinfo(rev)
448 446 if b in self._namesreverse:
449 447 branchidx = self._namesreverse[b]
450 448 else:
451 449 branchidx = len(self._names)
452 450 self._names.append(b)
453 451 self._namesreverse[b] = branchidx
454 452 reponode = changelog.node(rev)
455 453 if close:
456 454 branchidx |= _rbccloseflag
457 455 self._setcachedata(rev, reponode, branchidx)
458 456 return b, close
459 457
460 458 def setdata(self, branch, rev, node, close):
461 459 """add new data information to the cache"""
462 460 if branch in self._namesreverse:
463 461 branchidx = self._namesreverse[branch]
464 462 else:
465 463 branchidx = len(self._names)
466 464 self._names.append(branch)
467 465 self._namesreverse[branch] = branchidx
468 466 if close:
469 467 branchidx |= _rbccloseflag
470 468 self._setcachedata(rev, node, branchidx)
471 469 # If no cache data were readable (non exists, bad permission, etc)
472 470 # the cache was bypassing itself by setting:
473 471 #
474 472 # self.branchinfo = self._branchinfo
475 473 #
476 474 # Since we now have data in the cache, we need to drop this bypassing.
477 475 if 'branchinfo' in vars(self):
478 476 del self.branchinfo
479 477
480 478 def _setcachedata(self, rev, node, branchidx):
481 479 """Writes the node's branch data to the in-memory cache data."""
482 480 if rev == nullrev:
483 481 return
484 482 rbcrevidx = rev * _rbcrecsize
485 483 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
486 484 self._rbcrevs.extend('\0' *
487 485 (len(self._repo.changelog) * _rbcrecsize -
488 486 len(self._rbcrevs)))
489 487 pack_into(_rbcrecfmt, self._rbcrevs, rbcrevidx, node, branchidx)
490 488 self._rbcrevslen = min(self._rbcrevslen, rev)
491 489
492 490 tr = self._repo.currenttransaction()
493 491 if tr:
494 492 tr.addfinalize('write-revbranchcache', self.write)
495 493
496 494 def write(self, tr=None):
497 495 """Save branch cache if it is dirty."""
498 496 repo = self._repo
499 497 wlock = None
500 498 step = ''
501 499 try:
502 500 if self._rbcnamescount < len(self._names):
503 501 step = ' names'
504 502 wlock = repo.wlock(wait=False)
505 503 if self._rbcnamescount != 0:
506 504 f = repo.cachevfs.open(_rbcnames, 'ab')
507 505 if f.tell() == self._rbcsnameslen:
508 506 f.write('\0')
509 507 else:
510 508 f.close()
511 509 repo.ui.debug("%s changed - rewriting it\n" % _rbcnames)
512 510 self._rbcnamescount = 0
513 511 self._rbcrevslen = 0
514 512 if self._rbcnamescount == 0:
515 513 # before rewriting names, make sure references are removed
516 514 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
517 515 f = repo.cachevfs.open(_rbcnames, 'wb')
518 516 f.write('\0'.join(encoding.fromlocal(b)
519 517 for b in self._names[self._rbcnamescount:]))
520 518 self._rbcsnameslen = f.tell()
521 519 f.close()
522 520 self._rbcnamescount = len(self._names)
523 521
524 522 start = self._rbcrevslen * _rbcrecsize
525 523 if start != len(self._rbcrevs):
526 524 step = ''
527 525 if wlock is None:
528 526 wlock = repo.wlock(wait=False)
529 527 revs = min(len(repo.changelog),
530 528 len(self._rbcrevs) // _rbcrecsize)
531 529 f = repo.cachevfs.open(_rbcrevs, 'ab')
532 530 if f.tell() != start:
533 531 repo.ui.debug("truncating cache/%s to %d\n"
534 532 % (_rbcrevs, start))
535 533 f.seek(start)
536 534 if f.tell() != start:
537 535 start = 0
538 536 f.seek(start)
539 537 f.truncate()
540 538 end = revs * _rbcrecsize
541 539 f.write(self._rbcrevs[start:end])
542 540 f.close()
543 541 self._rbcrevslen = revs
544 542 except (IOError, OSError, error.Abort, error.LockError) as inst:
545 543 repo.ui.debug("couldn't write revision branch cache%s: %s\n"
546 544 % (step, stringutil.forcebytestr(inst)))
547 545 finally:
548 546 if wlock is not None:
549 547 wlock.release()
General Comments 0
You need to be logged in to leave comments. Login now