##// END OF EJS Templates
branchmap: prevent reading the file twice through different iterators...
Pulkit Goyal -
r41974:8ad46ac6 default
parent child Browse files
Show More
@@ -1,600 +1,600 b''
1 1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import struct
11 11
12 12 from .node import (
13 13 bin,
14 14 hex,
15 15 nullid,
16 16 nullrev,
17 17 )
18 18 from . import (
19 19 encoding,
20 20 error,
21 21 pycompat,
22 22 scmutil,
23 23 util,
24 24 )
25 25 from .utils import (
26 26 stringutil,
27 27 )
28 28
29 29 calcsize = struct.calcsize
30 30 pack_into = struct.pack_into
31 31 unpack_from = struct.unpack_from
32 32
33 33
34 34 ### Nearest subset relation
35 35 # Nearest subset of filter X is a filter Y so that:
36 36 # * Y is included in X,
37 37 # * X - Y is as small as possible.
38 38 # This create and ordering used for branchmap purpose.
39 39 # the ordering may be partial
40 40 subsettable = {None: 'visible',
41 41 'visible-hidden': 'visible',
42 42 'visible': 'served',
43 43 'served': 'immutable',
44 44 'immutable': 'base'}
45 45
46 46
47 47 class BranchMapCache(object):
48 48 """mapping of filtered views of repo with their branchcache"""
49 49 def __init__(self):
50 50 self._per_filter = {}
51 51
52 52 def __getitem__(self, repo):
53 53 self.updatecache(repo)
54 54 return self._per_filter[repo.filtername]
55 55
56 56 def updatecache(self, repo):
57 57 """Update the cache for the given filtered view on a repository"""
58 58 # This can trigger updates for the caches for subsets of the filtered
59 59 # view, e.g. when there is no cache for this filtered view or the cache
60 60 # is stale.
61 61
62 62 cl = repo.changelog
63 63 filtername = repo.filtername
64 64 bcache = self._per_filter.get(filtername)
65 65 if bcache is None or not bcache.validfor(repo):
66 66 # cache object missing or cache object stale? Read from disk
67 67 bcache = branchcache.fromfile(repo)
68 68
69 69 revs = []
70 70 if bcache is None:
71 71 # no (fresh) cache available anymore, perhaps we can re-use
72 72 # the cache for a subset, then extend that to add info on missing
73 73 # revisions.
74 74 subsetname = subsettable.get(filtername)
75 75 if subsetname is not None:
76 76 subset = repo.filtered(subsetname)
77 77 bcache = self[subset].copy()
78 78 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
79 79 revs.extend(r for r in extrarevs if r <= bcache.tiprev)
80 80 else:
81 81 # nothing to fall back on, start empty.
82 82 bcache = branchcache()
83 83
84 84 revs.extend(cl.revs(start=bcache.tiprev + 1))
85 85 if revs:
86 86 bcache.update(repo, revs)
87 87
88 88 assert bcache.validfor(repo), filtername
89 89 self._per_filter[repo.filtername] = bcache
90 90
91 91 def replace(self, repo, remotebranchmap):
92 92 """Replace the branchmap cache for a repo with a branch mapping.
93 93
94 94 This is likely only called during clone with a branch map from a
95 95 remote.
96 96
97 97 """
98 98 cl = repo.changelog
99 99 clrev = cl.rev
100 100 clbranchinfo = cl.branchinfo
101 101 rbheads = []
102 102 closed = []
103 103 for bheads in remotebranchmap.itervalues():
104 104 rbheads += bheads
105 105 for h in bheads:
106 106 r = clrev(h)
107 107 b, c = clbranchinfo(r)
108 108 if c:
109 109 closed.append(h)
110 110
111 111 if rbheads:
112 112 rtiprev = max((int(clrev(node)) for node in rbheads))
113 113 cache = branchcache(
114 114 remotebranchmap, repo[rtiprev].node(), rtiprev,
115 115 closednodes=closed)
116 116
117 117 # Try to stick it as low as possible
118 118 # filter above served are unlikely to be fetch from a clone
119 119 for candidate in ('base', 'immutable', 'served'):
120 120 rview = repo.filtered(candidate)
121 121 if cache.validfor(rview):
122 122 self._per_filter[candidate] = cache
123 123 cache.write(rview)
124 124 return
125 125
126 126 def clear(self):
127 127 self._per_filter.clear()
128 128
129 129
130 130 class branchcache(dict):
131 131 """A dict like object that hold branches heads cache.
132 132
133 133 This cache is used to avoid costly computations to determine all the
134 134 branch heads of a repo.
135 135
136 136 The cache is serialized on disk in the following format:
137 137
138 138 <tip hex node> <tip rev number> [optional filtered repo hex hash]
139 139 <branch head hex node> <open/closed state> <branch name>
140 140 <branch head hex node> <open/closed state> <branch name>
141 141 ...
142 142
143 143 The first line is used to check if the cache is still valid. If the
144 144 branch cache is for a filtered repo view, an optional third hash is
145 145 included that hashes the hashes of all filtered revisions.
146 146
147 147 The open/closed state is represented by a single letter 'o' or 'c'.
148 148 This field can be used to avoid changelog reads when determining if a
149 149 branch head closes a branch or not.
150 150 """
151 151
152 152 def __init__(self, entries=(), tipnode=nullid, tiprev=nullrev,
153 153 filteredhash=None, closednodes=None):
154 154 super(branchcache, self).__init__(entries)
155 155 self.tipnode = tipnode
156 156 self.tiprev = tiprev
157 157 self.filteredhash = filteredhash
158 158 # closednodes is a set of nodes that close their branch. If the branch
159 159 # cache has been updated, it may contain nodes that are no longer
160 160 # heads.
161 161 if closednodes is None:
162 162 self._closednodes = set()
163 163 else:
164 164 self._closednodes = closednodes
165 165
166 166 @classmethod
167 167 def fromfile(cls, repo):
168 168 f = None
169 169 try:
170 170 f = repo.cachevfs(cls._filename(repo))
171 171 lineiter = iter(f)
172 172 cachekey = next(lineiter).rstrip('\n').split(" ", 2)
173 173 last, lrev = cachekey[:2]
174 174 last, lrev = bin(last), int(lrev)
175 175 filteredhash = None
176 176 if len(cachekey) > 2:
177 177 filteredhash = bin(cachekey[2])
178 178 bcache = cls(tipnode=last, tiprev=lrev, filteredhash=filteredhash)
179 179 if not bcache.validfor(repo):
180 180 # invalidate the cache
181 181 raise ValueError(r'tip differs')
182 bcache.load(repo, f)
182 bcache.load(repo, lineiter)
183 183 except (IOError, OSError):
184 184 return None
185 185
186 186 except Exception as inst:
187 187 if repo.ui.debugflag:
188 188 msg = 'invalid branchheads cache'
189 189 if repo.filtername is not None:
190 190 msg += ' (%s)' % repo.filtername
191 191 msg += ': %s\n'
192 192 repo.ui.debug(msg % pycompat.bytestr(inst))
193 193 bcache = None
194 194
195 195 finally:
196 196 if f:
197 197 f.close()
198 198
199 199 return bcache
200 200
201 def load(self, repo, f):
202 """ fully loads the branchcache by reading from the file f """
201 def load(self, repo, lineiter):
202 """ fully loads the branchcache by reading from the file using the line
203 iterator passed"""
203 204 cl = repo.changelog
204 lineiter = iter(f)
205 205 for line in lineiter:
206 206 line = line.rstrip('\n')
207 207 if not line:
208 208 continue
209 209 node, state, label = line.split(" ", 2)
210 210 if state not in 'oc':
211 211 raise ValueError(r'invalid branch state')
212 212 label = encoding.tolocal(label.strip())
213 213 node = bin(node)
214 214 if not cl.hasnode(node):
215 215 raise ValueError(
216 216 r'node %s does not exist' % pycompat.sysstr(hex(node)))
217 217 self.setdefault(label, []).append(node)
218 218 if state == 'c':
219 219 self._closednodes.add(node)
220 220
221 221 @staticmethod
222 222 def _filename(repo):
223 223 """name of a branchcache file for a given repo or repoview"""
224 224 filename = "branch2"
225 225 if repo.filtername:
226 226 filename = '%s-%s' % (filename, repo.filtername)
227 227 return filename
228 228
229 229 def validfor(self, repo):
230 230 """Is the cache content valid regarding a repo
231 231
232 232 - False when cached tipnode is unknown or if we detect a strip.
233 233 - True when cache is up to date or a subset of current repo."""
234 234 try:
235 235 return ((self.tipnode == repo.changelog.node(self.tiprev))
236 236 and (self.filteredhash ==
237 237 scmutil.filteredhash(repo, self.tiprev)))
238 238 except IndexError:
239 239 return False
240 240
241 241 def _branchtip(self, heads):
242 242 '''Return tuple with last open head in heads and false,
243 243 otherwise return last closed head and true.'''
244 244 tip = heads[-1]
245 245 closed = True
246 246 for h in reversed(heads):
247 247 if h not in self._closednodes:
248 248 tip = h
249 249 closed = False
250 250 break
251 251 return tip, closed
252 252
253 253 def branchtip(self, branch):
254 254 '''Return the tipmost open head on branch head, otherwise return the
255 255 tipmost closed head on branch.
256 256 Raise KeyError for unknown branch.'''
257 257 return self._branchtip(self[branch])[0]
258 258
259 259 def iteropen(self, nodes):
260 260 return (n for n in nodes if n not in self._closednodes)
261 261
262 262 def branchheads(self, branch, closed=False):
263 263 heads = self[branch]
264 264 if not closed:
265 265 heads = list(self.iteropen(heads))
266 266 return heads
267 267
268 268 def iterbranches(self):
269 269 for bn, heads in self.iteritems():
270 270 yield (bn, heads) + self._branchtip(heads)
271 271
272 272 def copy(self):
273 273 """return an deep copy of the branchcache object"""
274 274 return type(self)(
275 275 self, self.tipnode, self.tiprev, self.filteredhash,
276 276 self._closednodes)
277 277
278 278 def write(self, repo):
279 279 try:
280 280 f = repo.cachevfs(self._filename(repo), "w", atomictemp=True)
281 281 cachekey = [hex(self.tipnode), '%d' % self.tiprev]
282 282 if self.filteredhash is not None:
283 283 cachekey.append(hex(self.filteredhash))
284 284 f.write(" ".join(cachekey) + '\n')
285 285 nodecount = 0
286 286 for label, nodes in sorted(self.iteritems()):
287 287 label = encoding.fromlocal(label)
288 288 for node in nodes:
289 289 nodecount += 1
290 290 if node in self._closednodes:
291 291 state = 'c'
292 292 else:
293 293 state = 'o'
294 294 f.write("%s %s %s\n" % (hex(node), state, label))
295 295 f.close()
296 296 repo.ui.log('branchcache',
297 297 'wrote %s branch cache with %d labels and %d nodes\n',
298 298 repo.filtername, len(self), nodecount)
299 299 except (IOError, OSError, error.Abort) as inst:
300 300 # Abort may be raised by read only opener, so log and continue
301 301 repo.ui.debug("couldn't write branch cache: %s\n" %
302 302 stringutil.forcebytestr(inst))
303 303
304 304 def update(self, repo, revgen):
305 305 """Given a branchhead cache, self, that may have extra nodes or be
306 306 missing heads, and a generator of nodes that are strictly a superset of
307 307 heads missing, this function updates self to be correct.
308 308 """
309 309 starttime = util.timer()
310 310 cl = repo.changelog
311 311 # collect new branch entries
312 312 newbranches = {}
313 313 getbranchinfo = repo.revbranchcache().branchinfo
314 314 for r in revgen:
315 315 branch, closesbranch = getbranchinfo(r)
316 316 newbranches.setdefault(branch, []).append(r)
317 317 if closesbranch:
318 318 self._closednodes.add(cl.node(r))
319 319
320 320 # fetch current topological heads to speed up filtering
321 321 topoheads = set(cl.headrevs())
322 322
323 323 # if older branchheads are reachable from new ones, they aren't
324 324 # really branchheads. Note checking parents is insufficient:
325 325 # 1 (branch a) -> 2 (branch b) -> 3 (branch a)
326 326 for branch, newheadrevs in newbranches.iteritems():
327 327 bheads = self.setdefault(branch, [])
328 328 bheadset = set(cl.rev(node) for node in bheads)
329 329
330 330 # This have been tested True on all internal usage of this function.
331 331 # run it again in case of doubt
332 332 # assert not (set(bheadrevs) & set(newheadrevs))
333 333 bheadset.update(newheadrevs)
334 334
335 335 # This prunes out two kinds of heads - heads that are superseded by
336 336 # a head in newheadrevs, and newheadrevs that are not heads because
337 337 # an existing head is their descendant.
338 338 uncertain = bheadset - topoheads
339 339 if uncertain:
340 340 floorrev = min(uncertain)
341 341 ancestors = set(cl.ancestors(newheadrevs, floorrev))
342 342 bheadset -= ancestors
343 343 bheadrevs = sorted(bheadset)
344 344 self[branch] = [cl.node(rev) for rev in bheadrevs]
345 345 tiprev = bheadrevs[-1]
346 346 if tiprev > self.tiprev:
347 347 self.tipnode = cl.node(tiprev)
348 348 self.tiprev = tiprev
349 349
350 350 if not self.validfor(repo):
351 351 # cache key are not valid anymore
352 352 self.tipnode = nullid
353 353 self.tiprev = nullrev
354 354 for heads in self.values():
355 355 tiprev = max(cl.rev(node) for node in heads)
356 356 if tiprev > self.tiprev:
357 357 self.tipnode = cl.node(tiprev)
358 358 self.tiprev = tiprev
359 359 self.filteredhash = scmutil.filteredhash(repo, self.tiprev)
360 360
361 361 duration = util.timer() - starttime
362 362 repo.ui.log('branchcache', 'updated %s branch cache in %.4f seconds\n',
363 363 repo.filtername, duration)
364 364
365 365 self.write(repo)
366 366
367 367
368 368 class remotebranchcache(branchcache):
369 369 """Branchmap info for a remote connection, should not write locally"""
370 370 def write(self, repo):
371 371 pass
372 372
373 373
374 374 # Revision branch info cache
375 375
376 376 _rbcversion = '-v1'
377 377 _rbcnames = 'rbc-names' + _rbcversion
378 378 _rbcrevs = 'rbc-revs' + _rbcversion
379 379 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
380 380 _rbcrecfmt = '>4sI'
381 381 _rbcrecsize = calcsize(_rbcrecfmt)
382 382 _rbcnodelen = 4
383 383 _rbcbranchidxmask = 0x7fffffff
384 384 _rbccloseflag = 0x80000000
385 385
386 386 class revbranchcache(object):
387 387 """Persistent cache, mapping from revision number to branch name and close.
388 388 This is a low level cache, independent of filtering.
389 389
390 390 Branch names are stored in rbc-names in internal encoding separated by 0.
391 391 rbc-names is append-only, and each branch name is only stored once and will
392 392 thus have a unique index.
393 393
394 394 The branch info for each revision is stored in rbc-revs as constant size
395 395 records. The whole file is read into memory, but it is only 'parsed' on
396 396 demand. The file is usually append-only but will be truncated if repo
397 397 modification is detected.
398 398 The record for each revision contains the first 4 bytes of the
399 399 corresponding node hash, and the record is only used if it still matches.
400 400 Even a completely trashed rbc-revs fill thus still give the right result
401 401 while converging towards full recovery ... assuming no incorrectly matching
402 402 node hashes.
403 403 The record also contains 4 bytes where 31 bits contains the index of the
404 404 branch and the last bit indicate that it is a branch close commit.
405 405 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
406 406 and will grow with it but be 1/8th of its size.
407 407 """
408 408
409 409 def __init__(self, repo, readonly=True):
410 410 assert repo.filtername is None
411 411 self._repo = repo
412 412 self._names = [] # branch names in local encoding with static index
413 413 self._rbcrevs = bytearray()
414 414 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
415 415 try:
416 416 bndata = repo.cachevfs.read(_rbcnames)
417 417 self._rbcsnameslen = len(bndata) # for verification before writing
418 418 if bndata:
419 419 self._names = [encoding.tolocal(bn)
420 420 for bn in bndata.split('\0')]
421 421 except (IOError, OSError):
422 422 if readonly:
423 423 # don't try to use cache - fall back to the slow path
424 424 self.branchinfo = self._branchinfo
425 425
426 426 if self._names:
427 427 try:
428 428 data = repo.cachevfs.read(_rbcrevs)
429 429 self._rbcrevs[:] = data
430 430 except (IOError, OSError) as inst:
431 431 repo.ui.debug("couldn't read revision branch cache: %s\n" %
432 432 stringutil.forcebytestr(inst))
433 433 # remember number of good records on disk
434 434 self._rbcrevslen = min(len(self._rbcrevs) // _rbcrecsize,
435 435 len(repo.changelog))
436 436 if self._rbcrevslen == 0:
437 437 self._names = []
438 438 self._rbcnamescount = len(self._names) # number of names read at
439 439 # _rbcsnameslen
440 440
441 441 def _clear(self):
442 442 self._rbcsnameslen = 0
443 443 del self._names[:]
444 444 self._rbcnamescount = 0
445 445 self._rbcrevslen = len(self._repo.changelog)
446 446 self._rbcrevs = bytearray(self._rbcrevslen * _rbcrecsize)
447 447 util.clearcachedproperty(self, '_namesreverse')
448 448
449 449 @util.propertycache
450 450 def _namesreverse(self):
451 451 return dict((b, r) for r, b in enumerate(self._names))
452 452
453 453 def branchinfo(self, rev):
454 454 """Return branch name and close flag for rev, using and updating
455 455 persistent cache."""
456 456 changelog = self._repo.changelog
457 457 rbcrevidx = rev * _rbcrecsize
458 458
459 459 # avoid negative index, changelog.read(nullrev) is fast without cache
460 460 if rev == nullrev:
461 461 return changelog.branchinfo(rev)
462 462
463 463 # if requested rev isn't allocated, grow and cache the rev info
464 464 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
465 465 return self._branchinfo(rev)
466 466
467 467 # fast path: extract data from cache, use it if node is matching
468 468 reponode = changelog.node(rev)[:_rbcnodelen]
469 469 cachenode, branchidx = unpack_from(
470 470 _rbcrecfmt, util.buffer(self._rbcrevs), rbcrevidx)
471 471 close = bool(branchidx & _rbccloseflag)
472 472 if close:
473 473 branchidx &= _rbcbranchidxmask
474 474 if cachenode == '\0\0\0\0':
475 475 pass
476 476 elif cachenode == reponode:
477 477 try:
478 478 return self._names[branchidx], close
479 479 except IndexError:
480 480 # recover from invalid reference to unknown branch
481 481 self._repo.ui.debug("referenced branch names not found"
482 482 " - rebuilding revision branch cache from scratch\n")
483 483 self._clear()
484 484 else:
485 485 # rev/node map has changed, invalidate the cache from here up
486 486 self._repo.ui.debug("history modification detected - truncating "
487 487 "revision branch cache to revision %d\n" % rev)
488 488 truncate = rbcrevidx + _rbcrecsize
489 489 del self._rbcrevs[truncate:]
490 490 self._rbcrevslen = min(self._rbcrevslen, truncate)
491 491
492 492 # fall back to slow path and make sure it will be written to disk
493 493 return self._branchinfo(rev)
494 494
495 495 def _branchinfo(self, rev):
496 496 """Retrieve branch info from changelog and update _rbcrevs"""
497 497 changelog = self._repo.changelog
498 498 b, close = changelog.branchinfo(rev)
499 499 if b in self._namesreverse:
500 500 branchidx = self._namesreverse[b]
501 501 else:
502 502 branchidx = len(self._names)
503 503 self._names.append(b)
504 504 self._namesreverse[b] = branchidx
505 505 reponode = changelog.node(rev)
506 506 if close:
507 507 branchidx |= _rbccloseflag
508 508 self._setcachedata(rev, reponode, branchidx)
509 509 return b, close
510 510
511 511 def setdata(self, branch, rev, node, close):
512 512 """add new data information to the cache"""
513 513 if branch in self._namesreverse:
514 514 branchidx = self._namesreverse[branch]
515 515 else:
516 516 branchidx = len(self._names)
517 517 self._names.append(branch)
518 518 self._namesreverse[branch] = branchidx
519 519 if close:
520 520 branchidx |= _rbccloseflag
521 521 self._setcachedata(rev, node, branchidx)
522 522 # If no cache data were readable (non exists, bad permission, etc)
523 523 # the cache was bypassing itself by setting:
524 524 #
525 525 # self.branchinfo = self._branchinfo
526 526 #
527 527 # Since we now have data in the cache, we need to drop this bypassing.
528 528 if r'branchinfo' in vars(self):
529 529 del self.branchinfo
530 530
531 531 def _setcachedata(self, rev, node, branchidx):
532 532 """Writes the node's branch data to the in-memory cache data."""
533 533 if rev == nullrev:
534 534 return
535 535 rbcrevidx = rev * _rbcrecsize
536 536 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
537 537 self._rbcrevs.extend('\0' *
538 538 (len(self._repo.changelog) * _rbcrecsize -
539 539 len(self._rbcrevs)))
540 540 pack_into(_rbcrecfmt, self._rbcrevs, rbcrevidx, node, branchidx)
541 541 self._rbcrevslen = min(self._rbcrevslen, rev)
542 542
543 543 tr = self._repo.currenttransaction()
544 544 if tr:
545 545 tr.addfinalize('write-revbranchcache', self.write)
546 546
547 547 def write(self, tr=None):
548 548 """Save branch cache if it is dirty."""
549 549 repo = self._repo
550 550 wlock = None
551 551 step = ''
552 552 try:
553 553 if self._rbcnamescount < len(self._names):
554 554 step = ' names'
555 555 wlock = repo.wlock(wait=False)
556 556 if self._rbcnamescount != 0:
557 557 f = repo.cachevfs.open(_rbcnames, 'ab')
558 558 if f.tell() == self._rbcsnameslen:
559 559 f.write('\0')
560 560 else:
561 561 f.close()
562 562 repo.ui.debug("%s changed - rewriting it\n" % _rbcnames)
563 563 self._rbcnamescount = 0
564 564 self._rbcrevslen = 0
565 565 if self._rbcnamescount == 0:
566 566 # before rewriting names, make sure references are removed
567 567 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
568 568 f = repo.cachevfs.open(_rbcnames, 'wb')
569 569 f.write('\0'.join(encoding.fromlocal(b)
570 570 for b in self._names[self._rbcnamescount:]))
571 571 self._rbcsnameslen = f.tell()
572 572 f.close()
573 573 self._rbcnamescount = len(self._names)
574 574
575 575 start = self._rbcrevslen * _rbcrecsize
576 576 if start != len(self._rbcrevs):
577 577 step = ''
578 578 if wlock is None:
579 579 wlock = repo.wlock(wait=False)
580 580 revs = min(len(repo.changelog),
581 581 len(self._rbcrevs) // _rbcrecsize)
582 582 f = repo.cachevfs.open(_rbcrevs, 'ab')
583 583 if f.tell() != start:
584 584 repo.ui.debug("truncating cache/%s to %d\n"
585 585 % (_rbcrevs, start))
586 586 f.seek(start)
587 587 if f.tell() != start:
588 588 start = 0
589 589 f.seek(start)
590 590 f.truncate()
591 591 end = revs * _rbcrecsize
592 592 f.write(self._rbcrevs[start:end])
593 593 f.close()
594 594 self._rbcrevslen = revs
595 595 except (IOError, OSError, error.Abort, error.LockError) as inst:
596 596 repo.ui.debug("couldn't write revision branch cache%s: %s\n"
597 597 % (step, stringutil.forcebytestr(inst)))
598 598 finally:
599 599 if wlock is not None:
600 600 wlock.release()
General Comments 0
You need to be logged in to leave comments. Login now