##// END OF EJS Templates
branchmap: explicitly convert file into iterator...
Yuya Nishihara -
r39174:8547c859 default
parent child Browse files
Show More
@@ -1,554 +1,555 b''
1 1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import struct
11 11
12 12 from .node import (
13 13 bin,
14 14 hex,
15 15 nullid,
16 16 nullrev,
17 17 )
18 18 from . import (
19 19 encoding,
20 20 error,
21 21 pycompat,
22 22 scmutil,
23 23 util,
24 24 )
25 25 from .utils import (
26 26 stringutil,
27 27 )
28 28
29 29 calcsize = struct.calcsize
30 30 pack_into = struct.pack_into
31 31 unpack_from = struct.unpack_from
32 32
33 33 def _filename(repo):
34 34 """name of a branchcache file for a given repo or repoview"""
35 35 filename = "branch2"
36 36 if repo.filtername:
37 37 filename = '%s-%s' % (filename, repo.filtername)
38 38 return filename
39 39
40 40 def read(repo):
41 41 f = None
42 42 try:
43 43 f = repo.cachevfs(_filename(repo))
44 cachekey = next(f).rstrip('\n').split(" ", 2)
44 lineiter = iter(f)
45 cachekey = next(lineiter).rstrip('\n').split(" ", 2)
45 46 last, lrev = cachekey[:2]
46 47 last, lrev = bin(last), int(lrev)
47 48 filteredhash = None
48 49 if len(cachekey) > 2:
49 50 filteredhash = bin(cachekey[2])
50 51 partial = branchcache(tipnode=last, tiprev=lrev,
51 52 filteredhash=filteredhash)
52 53 if not partial.validfor(repo):
53 54 # invalidate the cache
54 55 raise ValueError(r'tip differs')
55 56 cl = repo.changelog
56 for l in f:
57 for l in lineiter:
57 58 l = l.rstrip('\n')
58 59 if not l:
59 60 continue
60 61 node, state, label = l.split(" ", 2)
61 62 if state not in 'oc':
62 63 raise ValueError(r'invalid branch state')
63 64 label = encoding.tolocal(label.strip())
64 65 node = bin(node)
65 66 if not cl.hasnode(node):
66 67 raise ValueError(
67 68 r'node %s does not exist' % pycompat.sysstr(hex(node)))
68 69 partial.setdefault(label, []).append(node)
69 70 if state == 'c':
70 71 partial._closednodes.add(node)
71 72
72 73 except (IOError, OSError):
73 74 return None
74 75
75 76 except Exception as inst:
76 77 if repo.ui.debugflag:
77 78 msg = 'invalid branchheads cache'
78 79 if repo.filtername is not None:
79 80 msg += ' (%s)' % repo.filtername
80 81 msg += ': %s\n'
81 82 repo.ui.debug(msg % pycompat.bytestr(inst))
82 83 partial = None
83 84
84 85 finally:
85 86 if f:
86 87 f.close()
87 88
88 89 return partial
89 90
90 91 ### Nearest subset relation
91 92 # Nearest subset of filter X is a filter Y so that:
92 93 # * Y is included in X,
93 94 # * X - Y is as small as possible.
94 95 # This create and ordering used for branchmap purpose.
95 96 # the ordering may be partial
96 97 subsettable = {None: 'visible',
97 98 'visible-hidden': 'visible',
98 99 'visible': 'served',
99 100 'served': 'immutable',
100 101 'immutable': 'base'}
101 102
102 103 def updatecache(repo):
103 104 cl = repo.changelog
104 105 filtername = repo.filtername
105 106 partial = repo._branchcaches.get(filtername)
106 107
107 108 revs = []
108 109 if partial is None or not partial.validfor(repo):
109 110 partial = read(repo)
110 111 if partial is None:
111 112 subsetname = subsettable.get(filtername)
112 113 if subsetname is None:
113 114 partial = branchcache()
114 115 else:
115 116 subset = repo.filtered(subsetname)
116 117 partial = subset.branchmap().copy()
117 118 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
118 119 revs.extend(r for r in extrarevs if r <= partial.tiprev)
119 120 revs.extend(cl.revs(start=partial.tiprev + 1))
120 121 if revs:
121 122 partial.update(repo, revs)
122 123 partial.write(repo)
123 124
124 125 assert partial.validfor(repo), filtername
125 126 repo._branchcaches[repo.filtername] = partial
126 127
127 128 def replacecache(repo, bm):
128 129 """Replace the branchmap cache for a repo with a branch mapping.
129 130
130 131 This is likely only called during clone with a branch map from a remote.
131 132 """
132 133 rbheads = []
133 134 closed = []
134 135 for bheads in bm.itervalues():
135 136 rbheads.extend(bheads)
136 137 for h in bheads:
137 138 r = repo.changelog.rev(h)
138 139 b, c = repo.changelog.branchinfo(r)
139 140 if c:
140 141 closed.append(h)
141 142
142 143 if rbheads:
143 144 rtiprev = max((int(repo.changelog.rev(node))
144 145 for node in rbheads))
145 146 cache = branchcache(bm,
146 147 repo[rtiprev].node(),
147 148 rtiprev,
148 149 closednodes=closed)
149 150
150 151 # Try to stick it as low as possible
151 152 # filter above served are unlikely to be fetch from a clone
152 153 for candidate in ('base', 'immutable', 'served'):
153 154 rview = repo.filtered(candidate)
154 155 if cache.validfor(rview):
155 156 repo._branchcaches[candidate] = cache
156 157 cache.write(rview)
157 158 break
158 159
159 160 class branchcache(dict):
160 161 """A dict like object that hold branches heads cache.
161 162
162 163 This cache is used to avoid costly computations to determine all the
163 164 branch heads of a repo.
164 165
165 166 The cache is serialized on disk in the following format:
166 167
167 168 <tip hex node> <tip rev number> [optional filtered repo hex hash]
168 169 <branch head hex node> <open/closed state> <branch name>
169 170 <branch head hex node> <open/closed state> <branch name>
170 171 ...
171 172
172 173 The first line is used to check if the cache is still valid. If the
173 174 branch cache is for a filtered repo view, an optional third hash is
174 175 included that hashes the hashes of all filtered revisions.
175 176
176 177 The open/closed state is represented by a single letter 'o' or 'c'.
177 178 This field can be used to avoid changelog reads when determining if a
178 179 branch head closes a branch or not.
179 180 """
180 181
181 182 def __init__(self, entries=(), tipnode=nullid, tiprev=nullrev,
182 183 filteredhash=None, closednodes=None):
183 184 super(branchcache, self).__init__(entries)
184 185 self.tipnode = tipnode
185 186 self.tiprev = tiprev
186 187 self.filteredhash = filteredhash
187 188 # closednodes is a set of nodes that close their branch. If the branch
188 189 # cache has been updated, it may contain nodes that are no longer
189 190 # heads.
190 191 if closednodes is None:
191 192 self._closednodes = set()
192 193 else:
193 194 self._closednodes = closednodes
194 195
195 196 def validfor(self, repo):
196 197 """Is the cache content valid regarding a repo
197 198
198 199 - False when cached tipnode is unknown or if we detect a strip.
199 200 - True when cache is up to date or a subset of current repo."""
200 201 try:
201 202 return ((self.tipnode == repo.changelog.node(self.tiprev))
202 203 and (self.filteredhash == \
203 204 scmutil.filteredhash(repo, self.tiprev)))
204 205 except IndexError:
205 206 return False
206 207
207 208 def _branchtip(self, heads):
208 209 '''Return tuple with last open head in heads and false,
209 210 otherwise return last closed head and true.'''
210 211 tip = heads[-1]
211 212 closed = True
212 213 for h in reversed(heads):
213 214 if h not in self._closednodes:
214 215 tip = h
215 216 closed = False
216 217 break
217 218 return tip, closed
218 219
219 220 def branchtip(self, branch):
220 221 '''Return the tipmost open head on branch head, otherwise return the
221 222 tipmost closed head on branch.
222 223 Raise KeyError for unknown branch.'''
223 224 return self._branchtip(self[branch])[0]
224 225
225 226 def iteropen(self, nodes):
226 227 return (n for n in nodes if n not in self._closednodes)
227 228
228 229 def branchheads(self, branch, closed=False):
229 230 heads = self[branch]
230 231 if not closed:
231 232 heads = list(self.iteropen(heads))
232 233 return heads
233 234
234 235 def iterbranches(self):
235 236 for bn, heads in self.iteritems():
236 237 yield (bn, heads) + self._branchtip(heads)
237 238
238 239 def copy(self):
239 240 """return an deep copy of the branchcache object"""
240 241 return branchcache(self, self.tipnode, self.tiprev, self.filteredhash,
241 242 self._closednodes)
242 243
243 244 def write(self, repo):
244 245 try:
245 246 f = repo.cachevfs(_filename(repo), "w", atomictemp=True)
246 247 cachekey = [hex(self.tipnode), '%d' % self.tiprev]
247 248 if self.filteredhash is not None:
248 249 cachekey.append(hex(self.filteredhash))
249 250 f.write(" ".join(cachekey) + '\n')
250 251 nodecount = 0
251 252 for label, nodes in sorted(self.iteritems()):
252 253 for node in nodes:
253 254 nodecount += 1
254 255 if node in self._closednodes:
255 256 state = 'c'
256 257 else:
257 258 state = 'o'
258 259 f.write("%s %s %s\n" % (hex(node), state,
259 260 encoding.fromlocal(label)))
260 261 f.close()
261 262 repo.ui.log('branchcache',
262 263 'wrote %s branch cache with %d labels and %d nodes\n',
263 264 repo.filtername, len(self), nodecount)
264 265 except (IOError, OSError, error.Abort) as inst:
265 266 # Abort may be raised by read only opener, so log and continue
266 267 repo.ui.debug("couldn't write branch cache: %s\n" %
267 268 stringutil.forcebytestr(inst))
268 269
269 270 def update(self, repo, revgen):
270 271 """Given a branchhead cache, self, that may have extra nodes or be
271 272 missing heads, and a generator of nodes that are strictly a superset of
272 273 heads missing, this function updates self to be correct.
273 274 """
274 275 starttime = util.timer()
275 276 cl = repo.changelog
276 277 # collect new branch entries
277 278 newbranches = {}
278 279 getbranchinfo = repo.revbranchcache().branchinfo
279 280 for r in revgen:
280 281 branch, closesbranch = getbranchinfo(r)
281 282 newbranches.setdefault(branch, []).append(r)
282 283 if closesbranch:
283 284 self._closednodes.add(cl.node(r))
284 285
285 286 # fetch current topological heads to speed up filtering
286 287 topoheads = set(cl.headrevs())
287 288
288 289 # if older branchheads are reachable from new ones, they aren't
289 290 # really branchheads. Note checking parents is insufficient:
290 291 # 1 (branch a) -> 2 (branch b) -> 3 (branch a)
291 292 for branch, newheadrevs in newbranches.iteritems():
292 293 bheads = self.setdefault(branch, [])
293 294 bheadset = set(cl.rev(node) for node in bheads)
294 295
295 296 # This have been tested True on all internal usage of this function.
296 297 # run it again in case of doubt
297 298 # assert not (set(bheadrevs) & set(newheadrevs))
298 299 newheadrevs.sort()
299 300 bheadset.update(newheadrevs)
300 301
301 302 # This prunes out two kinds of heads - heads that are superseded by
302 303 # a head in newheadrevs, and newheadrevs that are not heads because
303 304 # an existing head is their descendant.
304 305 uncertain = bheadset - topoheads
305 306 if uncertain:
306 307 floorrev = min(uncertain)
307 308 ancestors = set(cl.ancestors(newheadrevs, floorrev))
308 309 bheadset -= ancestors
309 310 bheadrevs = sorted(bheadset)
310 311 self[branch] = [cl.node(rev) for rev in bheadrevs]
311 312 tiprev = bheadrevs[-1]
312 313 if tiprev > self.tiprev:
313 314 self.tipnode = cl.node(tiprev)
314 315 self.tiprev = tiprev
315 316
316 317 if not self.validfor(repo):
317 318 # cache key are not valid anymore
318 319 self.tipnode = nullid
319 320 self.tiprev = nullrev
320 321 for heads in self.values():
321 322 tiprev = max(cl.rev(node) for node in heads)
322 323 if tiprev > self.tiprev:
323 324 self.tipnode = cl.node(tiprev)
324 325 self.tiprev = tiprev
325 326 self.filteredhash = scmutil.filteredhash(repo, self.tiprev)
326 327
327 328 duration = util.timer() - starttime
328 329 repo.ui.log('branchcache', 'updated %s branch cache in %.4f seconds\n',
329 330 repo.filtername, duration)
330 331
331 332 # Revision branch info cache
332 333
333 334 _rbcversion = '-v1'
334 335 _rbcnames = 'rbc-names' + _rbcversion
335 336 _rbcrevs = 'rbc-revs' + _rbcversion
336 337 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
337 338 _rbcrecfmt = '>4sI'
338 339 _rbcrecsize = calcsize(_rbcrecfmt)
339 340 _rbcnodelen = 4
340 341 _rbcbranchidxmask = 0x7fffffff
341 342 _rbccloseflag = 0x80000000
342 343
343 344 class revbranchcache(object):
344 345 """Persistent cache, mapping from revision number to branch name and close.
345 346 This is a low level cache, independent of filtering.
346 347
347 348 Branch names are stored in rbc-names in internal encoding separated by 0.
348 349 rbc-names is append-only, and each branch name is only stored once and will
349 350 thus have a unique index.
350 351
351 352 The branch info for each revision is stored in rbc-revs as constant size
352 353 records. The whole file is read into memory, but it is only 'parsed' on
353 354 demand. The file is usually append-only but will be truncated if repo
354 355 modification is detected.
355 356 The record for each revision contains the first 4 bytes of the
356 357 corresponding node hash, and the record is only used if it still matches.
357 358 Even a completely trashed rbc-revs fill thus still give the right result
358 359 while converging towards full recovery ... assuming no incorrectly matching
359 360 node hashes.
360 361 The record also contains 4 bytes where 31 bits contains the index of the
361 362 branch and the last bit indicate that it is a branch close commit.
362 363 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
363 364 and will grow with it but be 1/8th of its size.
364 365 """
365 366
366 367 def __init__(self, repo, readonly=True):
367 368 assert repo.filtername is None
368 369 self._repo = repo
369 370 self._names = [] # branch names in local encoding with static index
370 371 self._rbcrevs = bytearray()
371 372 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
372 373 try:
373 374 bndata = repo.cachevfs.read(_rbcnames)
374 375 self._rbcsnameslen = len(bndata) # for verification before writing
375 376 if bndata:
376 377 self._names = [encoding.tolocal(bn)
377 378 for bn in bndata.split('\0')]
378 379 except (IOError, OSError):
379 380 if readonly:
380 381 # don't try to use cache - fall back to the slow path
381 382 self.branchinfo = self._branchinfo
382 383
383 384 if self._names:
384 385 try:
385 386 data = repo.cachevfs.read(_rbcrevs)
386 387 self._rbcrevs[:] = data
387 388 except (IOError, OSError) as inst:
388 389 repo.ui.debug("couldn't read revision branch cache: %s\n" %
389 390 stringutil.forcebytestr(inst))
390 391 # remember number of good records on disk
391 392 self._rbcrevslen = min(len(self._rbcrevs) // _rbcrecsize,
392 393 len(repo.changelog))
393 394 if self._rbcrevslen == 0:
394 395 self._names = []
395 396 self._rbcnamescount = len(self._names) # number of names read at
396 397 # _rbcsnameslen
397 398 self._namesreverse = dict((b, r) for r, b in enumerate(self._names))
398 399
399 400 def _clear(self):
400 401 self._rbcsnameslen = 0
401 402 del self._names[:]
402 403 self._rbcnamescount = 0
403 404 self._namesreverse.clear()
404 405 self._rbcrevslen = len(self._repo.changelog)
405 406 self._rbcrevs = bytearray(self._rbcrevslen * _rbcrecsize)
406 407
407 408 def branchinfo(self, rev):
408 409 """Return branch name and close flag for rev, using and updating
409 410 persistent cache."""
410 411 changelog = self._repo.changelog
411 412 rbcrevidx = rev * _rbcrecsize
412 413
413 414 # avoid negative index, changelog.read(nullrev) is fast without cache
414 415 if rev == nullrev:
415 416 return changelog.branchinfo(rev)
416 417
417 418 # if requested rev isn't allocated, grow and cache the rev info
418 419 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
419 420 return self._branchinfo(rev)
420 421
421 422 # fast path: extract data from cache, use it if node is matching
422 423 reponode = changelog.node(rev)[:_rbcnodelen]
423 424 cachenode, branchidx = unpack_from(
424 425 _rbcrecfmt, util.buffer(self._rbcrevs), rbcrevidx)
425 426 close = bool(branchidx & _rbccloseflag)
426 427 if close:
427 428 branchidx &= _rbcbranchidxmask
428 429 if cachenode == '\0\0\0\0':
429 430 pass
430 431 elif cachenode == reponode:
431 432 try:
432 433 return self._names[branchidx], close
433 434 except IndexError:
434 435 # recover from invalid reference to unknown branch
435 436 self._repo.ui.debug("referenced branch names not found"
436 437 " - rebuilding revision branch cache from scratch\n")
437 438 self._clear()
438 439 else:
439 440 # rev/node map has changed, invalidate the cache from here up
440 441 self._repo.ui.debug("history modification detected - truncating "
441 442 "revision branch cache to revision %d\n" % rev)
442 443 truncate = rbcrevidx + _rbcrecsize
443 444 del self._rbcrevs[truncate:]
444 445 self._rbcrevslen = min(self._rbcrevslen, truncate)
445 446
446 447 # fall back to slow path and make sure it will be written to disk
447 448 return self._branchinfo(rev)
448 449
449 450 def _branchinfo(self, rev):
450 451 """Retrieve branch info from changelog and update _rbcrevs"""
451 452 changelog = self._repo.changelog
452 453 b, close = changelog.branchinfo(rev)
453 454 if b in self._namesreverse:
454 455 branchidx = self._namesreverse[b]
455 456 else:
456 457 branchidx = len(self._names)
457 458 self._names.append(b)
458 459 self._namesreverse[b] = branchidx
459 460 reponode = changelog.node(rev)
460 461 if close:
461 462 branchidx |= _rbccloseflag
462 463 self._setcachedata(rev, reponode, branchidx)
463 464 return b, close
464 465
465 466 def setdata(self, branch, rev, node, close):
466 467 """add new data information to the cache"""
467 468 if branch in self._namesreverse:
468 469 branchidx = self._namesreverse[branch]
469 470 else:
470 471 branchidx = len(self._names)
471 472 self._names.append(branch)
472 473 self._namesreverse[branch] = branchidx
473 474 if close:
474 475 branchidx |= _rbccloseflag
475 476 self._setcachedata(rev, node, branchidx)
476 477 # If no cache data were readable (non exists, bad permission, etc)
477 478 # the cache was bypassing itself by setting:
478 479 #
479 480 # self.branchinfo = self._branchinfo
480 481 #
481 482 # Since we now have data in the cache, we need to drop this bypassing.
482 483 if 'branchinfo' in vars(self):
483 484 del self.branchinfo
484 485
485 486 def _setcachedata(self, rev, node, branchidx):
486 487 """Writes the node's branch data to the in-memory cache data."""
487 488 if rev == nullrev:
488 489 return
489 490 rbcrevidx = rev * _rbcrecsize
490 491 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
491 492 self._rbcrevs.extend('\0' *
492 493 (len(self._repo.changelog) * _rbcrecsize -
493 494 len(self._rbcrevs)))
494 495 pack_into(_rbcrecfmt, self._rbcrevs, rbcrevidx, node, branchidx)
495 496 self._rbcrevslen = min(self._rbcrevslen, rev)
496 497
497 498 tr = self._repo.currenttransaction()
498 499 if tr:
499 500 tr.addfinalize('write-revbranchcache', self.write)
500 501
501 502 def write(self, tr=None):
502 503 """Save branch cache if it is dirty."""
503 504 repo = self._repo
504 505 wlock = None
505 506 step = ''
506 507 try:
507 508 if self._rbcnamescount < len(self._names):
508 509 step = ' names'
509 510 wlock = repo.wlock(wait=False)
510 511 if self._rbcnamescount != 0:
511 512 f = repo.cachevfs.open(_rbcnames, 'ab')
512 513 if f.tell() == self._rbcsnameslen:
513 514 f.write('\0')
514 515 else:
515 516 f.close()
516 517 repo.ui.debug("%s changed - rewriting it\n" % _rbcnames)
517 518 self._rbcnamescount = 0
518 519 self._rbcrevslen = 0
519 520 if self._rbcnamescount == 0:
520 521 # before rewriting names, make sure references are removed
521 522 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
522 523 f = repo.cachevfs.open(_rbcnames, 'wb')
523 524 f.write('\0'.join(encoding.fromlocal(b)
524 525 for b in self._names[self._rbcnamescount:]))
525 526 self._rbcsnameslen = f.tell()
526 527 f.close()
527 528 self._rbcnamescount = len(self._names)
528 529
529 530 start = self._rbcrevslen * _rbcrecsize
530 531 if start != len(self._rbcrevs):
531 532 step = ''
532 533 if wlock is None:
533 534 wlock = repo.wlock(wait=False)
534 535 revs = min(len(repo.changelog),
535 536 len(self._rbcrevs) // _rbcrecsize)
536 537 f = repo.cachevfs.open(_rbcrevs, 'ab')
537 538 if f.tell() != start:
538 539 repo.ui.debug("truncating cache/%s to %d\n"
539 540 % (_rbcrevs, start))
540 541 f.seek(start)
541 542 if f.tell() != start:
542 543 start = 0
543 544 f.seek(start)
544 545 f.truncate()
545 546 end = revs * _rbcrecsize
546 547 f.write(self._rbcrevs[start:end])
547 548 f.close()
548 549 self._rbcrevslen = revs
549 550 except (IOError, OSError, error.Abort, error.LockError) as inst:
550 551 repo.ui.debug("couldn't write revision branch cache%s: %s\n"
551 552 % (step, stringutil.forcebytestr(inst)))
552 553 finally:
553 554 if wlock is not None:
554 555 wlock.release()
General Comments 0
You need to be logged in to leave comments. Login now