##// END OF EJS Templates
branchmap: remove redundant sort...
Martijn Pieters -
r40348:5644f7c8 default
parent child Browse files
Show More
@@ -1,555 +1,554
1 1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import struct
11 11
12 12 from .node import (
13 13 bin,
14 14 hex,
15 15 nullid,
16 16 nullrev,
17 17 )
18 18 from . import (
19 19 encoding,
20 20 error,
21 21 pycompat,
22 22 scmutil,
23 23 util,
24 24 )
25 25 from .utils import (
26 26 stringutil,
27 27 )
28 28
29 29 calcsize = struct.calcsize
30 30 pack_into = struct.pack_into
31 31 unpack_from = struct.unpack_from
32 32
33 33 def _filename(repo):
34 34 """name of a branchcache file for a given repo or repoview"""
35 35 filename = "branch2"
36 36 if repo.filtername:
37 37 filename = '%s-%s' % (filename, repo.filtername)
38 38 return filename
39 39
40 40 def read(repo):
41 41 f = None
42 42 try:
43 43 f = repo.cachevfs(_filename(repo))
44 44 lineiter = iter(f)
45 45 cachekey = next(lineiter).rstrip('\n').split(" ", 2)
46 46 last, lrev = cachekey[:2]
47 47 last, lrev = bin(last), int(lrev)
48 48 filteredhash = None
49 49 if len(cachekey) > 2:
50 50 filteredhash = bin(cachekey[2])
51 51 partial = branchcache(tipnode=last, tiprev=lrev,
52 52 filteredhash=filteredhash)
53 53 if not partial.validfor(repo):
54 54 # invalidate the cache
55 55 raise ValueError(r'tip differs')
56 56 cl = repo.changelog
57 57 for l in lineiter:
58 58 l = l.rstrip('\n')
59 59 if not l:
60 60 continue
61 61 node, state, label = l.split(" ", 2)
62 62 if state not in 'oc':
63 63 raise ValueError(r'invalid branch state')
64 64 label = encoding.tolocal(label.strip())
65 65 node = bin(node)
66 66 if not cl.hasnode(node):
67 67 raise ValueError(
68 68 r'node %s does not exist' % pycompat.sysstr(hex(node)))
69 69 partial.setdefault(label, []).append(node)
70 70 if state == 'c':
71 71 partial._closednodes.add(node)
72 72
73 73 except (IOError, OSError):
74 74 return None
75 75
76 76 except Exception as inst:
77 77 if repo.ui.debugflag:
78 78 msg = 'invalid branchheads cache'
79 79 if repo.filtername is not None:
80 80 msg += ' (%s)' % repo.filtername
81 81 msg += ': %s\n'
82 82 repo.ui.debug(msg % pycompat.bytestr(inst))
83 83 partial = None
84 84
85 85 finally:
86 86 if f:
87 87 f.close()
88 88
89 89 return partial
90 90
91 91 ### Nearest subset relation
92 92 # Nearest subset of filter X is a filter Y so that:
93 93 # * Y is included in X,
94 94 # * X - Y is as small as possible.
95 95 # This create and ordering used for branchmap purpose.
96 96 # the ordering may be partial
97 97 subsettable = {None: 'visible',
98 98 'visible-hidden': 'visible',
99 99 'visible': 'served',
100 100 'served': 'immutable',
101 101 'immutable': 'base'}
102 102
103 103 def updatecache(repo):
104 104 cl = repo.changelog
105 105 filtername = repo.filtername
106 106 partial = repo._branchcaches.get(filtername)
107 107
108 108 revs = []
109 109 if partial is None or not partial.validfor(repo):
110 110 partial = read(repo)
111 111 if partial is None:
112 112 subsetname = subsettable.get(filtername)
113 113 if subsetname is None:
114 114 partial = branchcache()
115 115 else:
116 116 subset = repo.filtered(subsetname)
117 117 partial = subset.branchmap().copy()
118 118 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
119 119 revs.extend(r for r in extrarevs if r <= partial.tiprev)
120 120 revs.extend(cl.revs(start=partial.tiprev + 1))
121 121 if revs:
122 122 partial.update(repo, revs)
123 123 partial.write(repo)
124 124
125 125 assert partial.validfor(repo), filtername
126 126 repo._branchcaches[repo.filtername] = partial
127 127
128 128 def replacecache(repo, bm):
129 129 """Replace the branchmap cache for a repo with a branch mapping.
130 130
131 131 This is likely only called during clone with a branch map from a remote.
132 132 """
133 133 rbheads = []
134 134 closed = []
135 135 for bheads in bm.itervalues():
136 136 rbheads.extend(bheads)
137 137 for h in bheads:
138 138 r = repo.changelog.rev(h)
139 139 b, c = repo.changelog.branchinfo(r)
140 140 if c:
141 141 closed.append(h)
142 142
143 143 if rbheads:
144 144 rtiprev = max((int(repo.changelog.rev(node))
145 145 for node in rbheads))
146 146 cache = branchcache(bm,
147 147 repo[rtiprev].node(),
148 148 rtiprev,
149 149 closednodes=closed)
150 150
151 151 # Try to stick it as low as possible
152 152 # filter above served are unlikely to be fetch from a clone
153 153 for candidate in ('base', 'immutable', 'served'):
154 154 rview = repo.filtered(candidate)
155 155 if cache.validfor(rview):
156 156 repo._branchcaches[candidate] = cache
157 157 cache.write(rview)
158 158 break
159 159
160 160 class branchcache(dict):
161 161 """A dict like object that hold branches heads cache.
162 162
163 163 This cache is used to avoid costly computations to determine all the
164 164 branch heads of a repo.
165 165
166 166 The cache is serialized on disk in the following format:
167 167
168 168 <tip hex node> <tip rev number> [optional filtered repo hex hash]
169 169 <branch head hex node> <open/closed state> <branch name>
170 170 <branch head hex node> <open/closed state> <branch name>
171 171 ...
172 172
173 173 The first line is used to check if the cache is still valid. If the
174 174 branch cache is for a filtered repo view, an optional third hash is
175 175 included that hashes the hashes of all filtered revisions.
176 176
177 177 The open/closed state is represented by a single letter 'o' or 'c'.
178 178 This field can be used to avoid changelog reads when determining if a
179 179 branch head closes a branch or not.
180 180 """
181 181
182 182 def __init__(self, entries=(), tipnode=nullid, tiprev=nullrev,
183 183 filteredhash=None, closednodes=None):
184 184 super(branchcache, self).__init__(entries)
185 185 self.tipnode = tipnode
186 186 self.tiprev = tiprev
187 187 self.filteredhash = filteredhash
188 188 # closednodes is a set of nodes that close their branch. If the branch
189 189 # cache has been updated, it may contain nodes that are no longer
190 190 # heads.
191 191 if closednodes is None:
192 192 self._closednodes = set()
193 193 else:
194 194 self._closednodes = closednodes
195 195
196 196 def validfor(self, repo):
197 197 """Is the cache content valid regarding a repo
198 198
199 199 - False when cached tipnode is unknown or if we detect a strip.
200 200 - True when cache is up to date or a subset of current repo."""
201 201 try:
202 202 return ((self.tipnode == repo.changelog.node(self.tiprev))
203 203 and (self.filteredhash == \
204 204 scmutil.filteredhash(repo, self.tiprev)))
205 205 except IndexError:
206 206 return False
207 207
208 208 def _branchtip(self, heads):
209 209 '''Return tuple with last open head in heads and false,
210 210 otherwise return last closed head and true.'''
211 211 tip = heads[-1]
212 212 closed = True
213 213 for h in reversed(heads):
214 214 if h not in self._closednodes:
215 215 tip = h
216 216 closed = False
217 217 break
218 218 return tip, closed
219 219
220 220 def branchtip(self, branch):
221 221 '''Return the tipmost open head on branch head, otherwise return the
222 222 tipmost closed head on branch.
223 223 Raise KeyError for unknown branch.'''
224 224 return self._branchtip(self[branch])[0]
225 225
226 226 def iteropen(self, nodes):
227 227 return (n for n in nodes if n not in self._closednodes)
228 228
229 229 def branchheads(self, branch, closed=False):
230 230 heads = self[branch]
231 231 if not closed:
232 232 heads = list(self.iteropen(heads))
233 233 return heads
234 234
235 235 def iterbranches(self):
236 236 for bn, heads in self.iteritems():
237 237 yield (bn, heads) + self._branchtip(heads)
238 238
239 239 def copy(self):
240 240 """return an deep copy of the branchcache object"""
241 241 return branchcache(self, self.tipnode, self.tiprev, self.filteredhash,
242 242 self._closednodes)
243 243
244 244 def write(self, repo):
245 245 try:
246 246 f = repo.cachevfs(_filename(repo), "w", atomictemp=True)
247 247 cachekey = [hex(self.tipnode), '%d' % self.tiprev]
248 248 if self.filteredhash is not None:
249 249 cachekey.append(hex(self.filteredhash))
250 250 f.write(" ".join(cachekey) + '\n')
251 251 nodecount = 0
252 252 for label, nodes in sorted(self.iteritems()):
253 253 for node in nodes:
254 254 nodecount += 1
255 255 if node in self._closednodes:
256 256 state = 'c'
257 257 else:
258 258 state = 'o'
259 259 f.write("%s %s %s\n" % (hex(node), state,
260 260 encoding.fromlocal(label)))
261 261 f.close()
262 262 repo.ui.log('branchcache',
263 263 'wrote %s branch cache with %d labels and %d nodes\n',
264 264 repo.filtername, len(self), nodecount)
265 265 except (IOError, OSError, error.Abort) as inst:
266 266 # Abort may be raised by read only opener, so log and continue
267 267 repo.ui.debug("couldn't write branch cache: %s\n" %
268 268 stringutil.forcebytestr(inst))
269 269
270 270 def update(self, repo, revgen):
271 271 """Given a branchhead cache, self, that may have extra nodes or be
272 272 missing heads, and a generator of nodes that are strictly a superset of
273 273 heads missing, this function updates self to be correct.
274 274 """
275 275 starttime = util.timer()
276 276 cl = repo.changelog
277 277 # collect new branch entries
278 278 newbranches = {}
279 279 getbranchinfo = repo.revbranchcache().branchinfo
280 280 for r in revgen:
281 281 branch, closesbranch = getbranchinfo(r)
282 282 newbranches.setdefault(branch, []).append(r)
283 283 if closesbranch:
284 284 self._closednodes.add(cl.node(r))
285 285
286 286 # fetch current topological heads to speed up filtering
287 287 topoheads = set(cl.headrevs())
288 288
289 289 # if older branchheads are reachable from new ones, they aren't
290 290 # really branchheads. Note checking parents is insufficient:
291 291 # 1 (branch a) -> 2 (branch b) -> 3 (branch a)
292 292 for branch, newheadrevs in newbranches.iteritems():
293 293 bheads = self.setdefault(branch, [])
294 294 bheadset = set(cl.rev(node) for node in bheads)
295 295
296 296 # This have been tested True on all internal usage of this function.
297 297 # run it again in case of doubt
298 298 # assert not (set(bheadrevs) & set(newheadrevs))
299 newheadrevs.sort()
300 299 bheadset.update(newheadrevs)
301 300
302 301 # This prunes out two kinds of heads - heads that are superseded by
303 302 # a head in newheadrevs, and newheadrevs that are not heads because
304 303 # an existing head is their descendant.
305 304 uncertain = bheadset - topoheads
306 305 if uncertain:
307 306 floorrev = min(uncertain)
308 307 ancestors = set(cl.ancestors(newheadrevs, floorrev))
309 308 bheadset -= ancestors
310 309 bheadrevs = sorted(bheadset)
311 310 self[branch] = [cl.node(rev) for rev in bheadrevs]
312 311 tiprev = bheadrevs[-1]
313 312 if tiprev > self.tiprev:
314 313 self.tipnode = cl.node(tiprev)
315 314 self.tiprev = tiprev
316 315
317 316 if not self.validfor(repo):
318 317 # cache key are not valid anymore
319 318 self.tipnode = nullid
320 319 self.tiprev = nullrev
321 320 for heads in self.values():
322 321 tiprev = max(cl.rev(node) for node in heads)
323 322 if tiprev > self.tiprev:
324 323 self.tipnode = cl.node(tiprev)
325 324 self.tiprev = tiprev
326 325 self.filteredhash = scmutil.filteredhash(repo, self.tiprev)
327 326
328 327 duration = util.timer() - starttime
329 328 repo.ui.log('branchcache', 'updated %s branch cache in %.4f seconds\n',
330 329 repo.filtername, duration)
331 330
332 331 # Revision branch info cache
333 332
334 333 _rbcversion = '-v1'
335 334 _rbcnames = 'rbc-names' + _rbcversion
336 335 _rbcrevs = 'rbc-revs' + _rbcversion
337 336 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
338 337 _rbcrecfmt = '>4sI'
339 338 _rbcrecsize = calcsize(_rbcrecfmt)
340 339 _rbcnodelen = 4
341 340 _rbcbranchidxmask = 0x7fffffff
342 341 _rbccloseflag = 0x80000000
343 342
344 343 class revbranchcache(object):
345 344 """Persistent cache, mapping from revision number to branch name and close.
346 345 This is a low level cache, independent of filtering.
347 346
348 347 Branch names are stored in rbc-names in internal encoding separated by 0.
349 348 rbc-names is append-only, and each branch name is only stored once and will
350 349 thus have a unique index.
351 350
352 351 The branch info for each revision is stored in rbc-revs as constant size
353 352 records. The whole file is read into memory, but it is only 'parsed' on
354 353 demand. The file is usually append-only but will be truncated if repo
355 354 modification is detected.
356 355 The record for each revision contains the first 4 bytes of the
357 356 corresponding node hash, and the record is only used if it still matches.
358 357 Even a completely trashed rbc-revs fill thus still give the right result
359 358 while converging towards full recovery ... assuming no incorrectly matching
360 359 node hashes.
361 360 The record also contains 4 bytes where 31 bits contains the index of the
362 361 branch and the last bit indicate that it is a branch close commit.
363 362 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
364 363 and will grow with it but be 1/8th of its size.
365 364 """
366 365
367 366 def __init__(self, repo, readonly=True):
368 367 assert repo.filtername is None
369 368 self._repo = repo
370 369 self._names = [] # branch names in local encoding with static index
371 370 self._rbcrevs = bytearray()
372 371 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
373 372 try:
374 373 bndata = repo.cachevfs.read(_rbcnames)
375 374 self._rbcsnameslen = len(bndata) # for verification before writing
376 375 if bndata:
377 376 self._names = [encoding.tolocal(bn)
378 377 for bn in bndata.split('\0')]
379 378 except (IOError, OSError):
380 379 if readonly:
381 380 # don't try to use cache - fall back to the slow path
382 381 self.branchinfo = self._branchinfo
383 382
384 383 if self._names:
385 384 try:
386 385 data = repo.cachevfs.read(_rbcrevs)
387 386 self._rbcrevs[:] = data
388 387 except (IOError, OSError) as inst:
389 388 repo.ui.debug("couldn't read revision branch cache: %s\n" %
390 389 stringutil.forcebytestr(inst))
391 390 # remember number of good records on disk
392 391 self._rbcrevslen = min(len(self._rbcrevs) // _rbcrecsize,
393 392 len(repo.changelog))
394 393 if self._rbcrevslen == 0:
395 394 self._names = []
396 395 self._rbcnamescount = len(self._names) # number of names read at
397 396 # _rbcsnameslen
398 397 self._namesreverse = dict((b, r) for r, b in enumerate(self._names))
399 398
400 399 def _clear(self):
401 400 self._rbcsnameslen = 0
402 401 del self._names[:]
403 402 self._rbcnamescount = 0
404 403 self._namesreverse.clear()
405 404 self._rbcrevslen = len(self._repo.changelog)
406 405 self._rbcrevs = bytearray(self._rbcrevslen * _rbcrecsize)
407 406
408 407 def branchinfo(self, rev):
409 408 """Return branch name and close flag for rev, using and updating
410 409 persistent cache."""
411 410 changelog = self._repo.changelog
412 411 rbcrevidx = rev * _rbcrecsize
413 412
414 413 # avoid negative index, changelog.read(nullrev) is fast without cache
415 414 if rev == nullrev:
416 415 return changelog.branchinfo(rev)
417 416
418 417 # if requested rev isn't allocated, grow and cache the rev info
419 418 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
420 419 return self._branchinfo(rev)
421 420
422 421 # fast path: extract data from cache, use it if node is matching
423 422 reponode = changelog.node(rev)[:_rbcnodelen]
424 423 cachenode, branchidx = unpack_from(
425 424 _rbcrecfmt, util.buffer(self._rbcrevs), rbcrevidx)
426 425 close = bool(branchidx & _rbccloseflag)
427 426 if close:
428 427 branchidx &= _rbcbranchidxmask
429 428 if cachenode == '\0\0\0\0':
430 429 pass
431 430 elif cachenode == reponode:
432 431 try:
433 432 return self._names[branchidx], close
434 433 except IndexError:
435 434 # recover from invalid reference to unknown branch
436 435 self._repo.ui.debug("referenced branch names not found"
437 436 " - rebuilding revision branch cache from scratch\n")
438 437 self._clear()
439 438 else:
440 439 # rev/node map has changed, invalidate the cache from here up
441 440 self._repo.ui.debug("history modification detected - truncating "
442 441 "revision branch cache to revision %d\n" % rev)
443 442 truncate = rbcrevidx + _rbcrecsize
444 443 del self._rbcrevs[truncate:]
445 444 self._rbcrevslen = min(self._rbcrevslen, truncate)
446 445
447 446 # fall back to slow path and make sure it will be written to disk
448 447 return self._branchinfo(rev)
449 448
450 449 def _branchinfo(self, rev):
451 450 """Retrieve branch info from changelog and update _rbcrevs"""
452 451 changelog = self._repo.changelog
453 452 b, close = changelog.branchinfo(rev)
454 453 if b in self._namesreverse:
455 454 branchidx = self._namesreverse[b]
456 455 else:
457 456 branchidx = len(self._names)
458 457 self._names.append(b)
459 458 self._namesreverse[b] = branchidx
460 459 reponode = changelog.node(rev)
461 460 if close:
462 461 branchidx |= _rbccloseflag
463 462 self._setcachedata(rev, reponode, branchidx)
464 463 return b, close
465 464
466 465 def setdata(self, branch, rev, node, close):
467 466 """add new data information to the cache"""
468 467 if branch in self._namesreverse:
469 468 branchidx = self._namesreverse[branch]
470 469 else:
471 470 branchidx = len(self._names)
472 471 self._names.append(branch)
473 472 self._namesreverse[branch] = branchidx
474 473 if close:
475 474 branchidx |= _rbccloseflag
476 475 self._setcachedata(rev, node, branchidx)
477 476 # If no cache data were readable (non exists, bad permission, etc)
478 477 # the cache was bypassing itself by setting:
479 478 #
480 479 # self.branchinfo = self._branchinfo
481 480 #
482 481 # Since we now have data in the cache, we need to drop this bypassing.
483 482 if r'branchinfo' in vars(self):
484 483 del self.branchinfo
485 484
486 485 def _setcachedata(self, rev, node, branchidx):
487 486 """Writes the node's branch data to the in-memory cache data."""
488 487 if rev == nullrev:
489 488 return
490 489 rbcrevidx = rev * _rbcrecsize
491 490 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
492 491 self._rbcrevs.extend('\0' *
493 492 (len(self._repo.changelog) * _rbcrecsize -
494 493 len(self._rbcrevs)))
495 494 pack_into(_rbcrecfmt, self._rbcrevs, rbcrevidx, node, branchidx)
496 495 self._rbcrevslen = min(self._rbcrevslen, rev)
497 496
498 497 tr = self._repo.currenttransaction()
499 498 if tr:
500 499 tr.addfinalize('write-revbranchcache', self.write)
501 500
502 501 def write(self, tr=None):
503 502 """Save branch cache if it is dirty."""
504 503 repo = self._repo
505 504 wlock = None
506 505 step = ''
507 506 try:
508 507 if self._rbcnamescount < len(self._names):
509 508 step = ' names'
510 509 wlock = repo.wlock(wait=False)
511 510 if self._rbcnamescount != 0:
512 511 f = repo.cachevfs.open(_rbcnames, 'ab')
513 512 if f.tell() == self._rbcsnameslen:
514 513 f.write('\0')
515 514 else:
516 515 f.close()
517 516 repo.ui.debug("%s changed - rewriting it\n" % _rbcnames)
518 517 self._rbcnamescount = 0
519 518 self._rbcrevslen = 0
520 519 if self._rbcnamescount == 0:
521 520 # before rewriting names, make sure references are removed
522 521 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
523 522 f = repo.cachevfs.open(_rbcnames, 'wb')
524 523 f.write('\0'.join(encoding.fromlocal(b)
525 524 for b in self._names[self._rbcnamescount:]))
526 525 self._rbcsnameslen = f.tell()
527 526 f.close()
528 527 self._rbcnamescount = len(self._names)
529 528
530 529 start = self._rbcrevslen * _rbcrecsize
531 530 if start != len(self._rbcrevs):
532 531 step = ''
533 532 if wlock is None:
534 533 wlock = repo.wlock(wait=False)
535 534 revs = min(len(repo.changelog),
536 535 len(self._rbcrevs) // _rbcrecsize)
537 536 f = repo.cachevfs.open(_rbcrevs, 'ab')
538 537 if f.tell() != start:
539 538 repo.ui.debug("truncating cache/%s to %d\n"
540 539 % (_rbcrevs, start))
541 540 f.seek(start)
542 541 if f.tell() != start:
543 542 start = 0
544 543 f.seek(start)
545 544 f.truncate()
546 545 end = revs * _rbcrecsize
547 546 f.write(self._rbcrevs[start:end])
548 547 f.close()
549 548 self._rbcrevslen = revs
550 549 except (IOError, OSError, error.Abort, error.LockError) as inst:
551 550 repo.ui.debug("couldn't write revision branch cache%s: %s\n"
552 551 % (step, stringutil.forcebytestr(inst)))
553 552 finally:
554 553 if wlock is not None:
555 554 wlock.release()
General Comments 0
You need to be logged in to leave comments. Login now