##// END OF EJS Templates
branchmap: wrap builtin exception in bytes for logging...
Augie Fackler -
r36168:fa4d333c default
parent child Browse files
Show More
@@ -1,525 +1,525 b''
1 1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import struct
11 11
12 12 from .node import (
13 13 bin,
14 14 hex,
15 15 nullid,
16 16 nullrev,
17 17 )
18 18 from . import (
19 19 encoding,
20 20 error,
21 21 pycompat,
22 22 scmutil,
23 23 util,
24 24 )
25 25
26 26 calcsize = struct.calcsize
27 27 pack_into = struct.pack_into
28 28 unpack_from = struct.unpack_from
29 29
30 30 def _filename(repo):
31 31 """name of a branchcache file for a given repo or repoview"""
32 32 filename = "branch2"
33 33 if repo.filtername:
34 34 filename = '%s-%s' % (filename, repo.filtername)
35 35 return filename
36 36
37 37 def read(repo):
38 38 try:
39 39 f = repo.cachevfs(_filename(repo))
40 40 lines = f.read().split('\n')
41 41 f.close()
42 42 except (IOError, OSError):
43 43 return None
44 44
45 45 try:
46 46 cachekey = lines.pop(0).split(" ", 2)
47 47 last, lrev = cachekey[:2]
48 48 last, lrev = bin(last), int(lrev)
49 49 filteredhash = None
50 50 if len(cachekey) > 2:
51 51 filteredhash = bin(cachekey[2])
52 52 partial = branchcache(tipnode=last, tiprev=lrev,
53 53 filteredhash=filteredhash)
54 54 if not partial.validfor(repo):
55 55 # invalidate the cache
56 56 raise ValueError(r'tip differs')
57 57 cl = repo.changelog
58 58 for l in lines:
59 59 if not l:
60 60 continue
61 61 node, state, label = l.split(" ", 2)
62 62 if state not in 'oc':
63 63 raise ValueError(r'invalid branch state')
64 64 label = encoding.tolocal(label.strip())
65 65 node = bin(node)
66 66 if not cl.hasnode(node):
67 67 raise ValueError(
68 68 r'node %s does not exist' % pycompat.sysstr(hex(node)))
69 69 partial.setdefault(label, []).append(node)
70 70 if state == 'c':
71 71 partial._closednodes.add(node)
72 72 except Exception as inst:
73 73 if repo.ui.debugflag:
74 74 msg = 'invalid branchheads cache'
75 75 if repo.filtername is not None:
76 76 msg += ' (%s)' % repo.filtername
77 77 msg += ': %s\n'
78 78 repo.ui.debug(msg % pycompat.bytestr(inst))
79 79 partial = None
80 80 return partial
81 81
82 82 ### Nearest subset relation
83 83 # Nearest subset of filter X is a filter Y so that:
84 84 # * Y is included in X,
85 85 # * X - Y is as small as possible.
86 86 # This create and ordering used for branchmap purpose.
87 87 # the ordering may be partial
88 88 subsettable = {None: 'visible',
89 89 'visible-hidden': 'visible',
90 90 'visible': 'served',
91 91 'served': 'immutable',
92 92 'immutable': 'base'}
93 93
94 94 def updatecache(repo):
95 95 cl = repo.changelog
96 96 filtername = repo.filtername
97 97 partial = repo._branchcaches.get(filtername)
98 98
99 99 revs = []
100 100 if partial is None or not partial.validfor(repo):
101 101 partial = read(repo)
102 102 if partial is None:
103 103 subsetname = subsettable.get(filtername)
104 104 if subsetname is None:
105 105 partial = branchcache()
106 106 else:
107 107 subset = repo.filtered(subsetname)
108 108 partial = subset.branchmap().copy()
109 109 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
110 110 revs.extend(r for r in extrarevs if r <= partial.tiprev)
111 111 revs.extend(cl.revs(start=partial.tiprev + 1))
112 112 if revs:
113 113 partial.update(repo, revs)
114 114 partial.write(repo)
115 115
116 116 assert partial.validfor(repo), filtername
117 117 repo._branchcaches[repo.filtername] = partial
118 118
119 119 def replacecache(repo, bm):
120 120 """Replace the branchmap cache for a repo with a branch mapping.
121 121
122 122 This is likely only called during clone with a branch map from a remote.
123 123 """
124 124 rbheads = []
125 125 closed = []
126 126 for bheads in bm.itervalues():
127 127 rbheads.extend(bheads)
128 128 for h in bheads:
129 129 r = repo.changelog.rev(h)
130 130 b, c = repo.changelog.branchinfo(r)
131 131 if c:
132 132 closed.append(h)
133 133
134 134 if rbheads:
135 135 rtiprev = max((int(repo.changelog.rev(node))
136 136 for node in rbheads))
137 137 cache = branchcache(bm,
138 138 repo[rtiprev].node(),
139 139 rtiprev,
140 140 closednodes=closed)
141 141
142 142 # Try to stick it as low as possible
143 143 # filter above served are unlikely to be fetch from a clone
144 144 for candidate in ('base', 'immutable', 'served'):
145 145 rview = repo.filtered(candidate)
146 146 if cache.validfor(rview):
147 147 repo._branchcaches[candidate] = cache
148 148 cache.write(rview)
149 149 break
150 150
151 151 class branchcache(dict):
152 152 """A dict like object that hold branches heads cache.
153 153
154 154 This cache is used to avoid costly computations to determine all the
155 155 branch heads of a repo.
156 156
157 157 The cache is serialized on disk in the following format:
158 158
159 159 <tip hex node> <tip rev number> [optional filtered repo hex hash]
160 160 <branch head hex node> <open/closed state> <branch name>
161 161 <branch head hex node> <open/closed state> <branch name>
162 162 ...
163 163
164 164 The first line is used to check if the cache is still valid. If the
165 165 branch cache is for a filtered repo view, an optional third hash is
166 166 included that hashes the hashes of all filtered revisions.
167 167
168 168 The open/closed state is represented by a single letter 'o' or 'c'.
169 169 This field can be used to avoid changelog reads when determining if a
170 170 branch head closes a branch or not.
171 171 """
172 172
173 173 def __init__(self, entries=(), tipnode=nullid, tiprev=nullrev,
174 174 filteredhash=None, closednodes=None):
175 175 super(branchcache, self).__init__(entries)
176 176 self.tipnode = tipnode
177 177 self.tiprev = tiprev
178 178 self.filteredhash = filteredhash
179 179 # closednodes is a set of nodes that close their branch. If the branch
180 180 # cache has been updated, it may contain nodes that are no longer
181 181 # heads.
182 182 if closednodes is None:
183 183 self._closednodes = set()
184 184 else:
185 185 self._closednodes = closednodes
186 186
187 187 def validfor(self, repo):
188 188 """Is the cache content valid regarding a repo
189 189
190 190 - False when cached tipnode is unknown or if we detect a strip.
191 191 - True when cache is up to date or a subset of current repo."""
192 192 try:
193 193 return ((self.tipnode == repo.changelog.node(self.tiprev))
194 194 and (self.filteredhash == \
195 195 scmutil.filteredhash(repo, self.tiprev)))
196 196 except IndexError:
197 197 return False
198 198
199 199 def _branchtip(self, heads):
200 200 '''Return tuple with last open head in heads and false,
201 201 otherwise return last closed head and true.'''
202 202 tip = heads[-1]
203 203 closed = True
204 204 for h in reversed(heads):
205 205 if h not in self._closednodes:
206 206 tip = h
207 207 closed = False
208 208 break
209 209 return tip, closed
210 210
211 211 def branchtip(self, branch):
212 212 '''Return the tipmost open head on branch head, otherwise return the
213 213 tipmost closed head on branch.
214 214 Raise KeyError for unknown branch.'''
215 215 return self._branchtip(self[branch])[0]
216 216
217 217 def iteropen(self, nodes):
218 218 return (n for n in nodes if n not in self._closednodes)
219 219
220 220 def branchheads(self, branch, closed=False):
221 221 heads = self[branch]
222 222 if not closed:
223 223 heads = list(self.iteropen(heads))
224 224 return heads
225 225
226 226 def iterbranches(self):
227 227 for bn, heads in self.iteritems():
228 228 yield (bn, heads) + self._branchtip(heads)
229 229
230 230 def copy(self):
231 231 """return an deep copy of the branchcache object"""
232 232 return branchcache(self, self.tipnode, self.tiprev, self.filteredhash,
233 233 self._closednodes)
234 234
235 235 def write(self, repo):
236 236 try:
237 237 f = repo.cachevfs(_filename(repo), "w", atomictemp=True)
238 238 cachekey = [hex(self.tipnode), '%d' % self.tiprev]
239 239 if self.filteredhash is not None:
240 240 cachekey.append(hex(self.filteredhash))
241 241 f.write(" ".join(cachekey) + '\n')
242 242 nodecount = 0
243 243 for label, nodes in sorted(self.iteritems()):
244 244 for node in nodes:
245 245 nodecount += 1
246 246 if node in self._closednodes:
247 247 state = 'c'
248 248 else:
249 249 state = 'o'
250 250 f.write("%s %s %s\n" % (hex(node), state,
251 251 encoding.fromlocal(label)))
252 252 f.close()
253 253 repo.ui.log('branchcache',
254 254 'wrote %s branch cache with %d labels and %d nodes\n',
255 255 repo.filtername, len(self), nodecount)
256 256 except (IOError, OSError, error.Abort) as inst:
257 257 # Abort may be raised by read only opener, so log and continue
258 258 repo.ui.debug("couldn't write branch cache: %s\n" % inst)
259 259
260 260 def update(self, repo, revgen):
261 261 """Given a branchhead cache, self, that may have extra nodes or be
262 262 missing heads, and a generator of nodes that are strictly a superset of
263 263 heads missing, this function updates self to be correct.
264 264 """
265 265 starttime = util.timer()
266 266 cl = repo.changelog
267 267 # collect new branch entries
268 268 newbranches = {}
269 269 getbranchinfo = repo.revbranchcache().branchinfo
270 270 for r in revgen:
271 271 branch, closesbranch = getbranchinfo(r)
272 272 newbranches.setdefault(branch, []).append(r)
273 273 if closesbranch:
274 274 self._closednodes.add(cl.node(r))
275 275
276 276 # fetch current topological heads to speed up filtering
277 277 topoheads = set(cl.headrevs())
278 278
279 279 # if older branchheads are reachable from new ones, they aren't
280 280 # really branchheads. Note checking parents is insufficient:
281 281 # 1 (branch a) -> 2 (branch b) -> 3 (branch a)
282 282 for branch, newheadrevs in newbranches.iteritems():
283 283 bheads = self.setdefault(branch, [])
284 284 bheadset = set(cl.rev(node) for node in bheads)
285 285
286 286 # This have been tested True on all internal usage of this function.
287 287 # run it again in case of doubt
288 288 # assert not (set(bheadrevs) & set(newheadrevs))
289 289 newheadrevs.sort()
290 290 bheadset.update(newheadrevs)
291 291
292 292 # This prunes out two kinds of heads - heads that are superseded by
293 293 # a head in newheadrevs, and newheadrevs that are not heads because
294 294 # an existing head is their descendant.
295 295 uncertain = bheadset - topoheads
296 296 if uncertain:
297 297 floorrev = min(uncertain)
298 298 ancestors = set(cl.ancestors(newheadrevs, floorrev))
299 299 bheadset -= ancestors
300 300 bheadrevs = sorted(bheadset)
301 301 self[branch] = [cl.node(rev) for rev in bheadrevs]
302 302 tiprev = bheadrevs[-1]
303 303 if tiprev > self.tiprev:
304 304 self.tipnode = cl.node(tiprev)
305 305 self.tiprev = tiprev
306 306
307 307 if not self.validfor(repo):
308 308 # cache key are not valid anymore
309 309 self.tipnode = nullid
310 310 self.tiprev = nullrev
311 311 for heads in self.values():
312 312 tiprev = max(cl.rev(node) for node in heads)
313 313 if tiprev > self.tiprev:
314 314 self.tipnode = cl.node(tiprev)
315 315 self.tiprev = tiprev
316 316 self.filteredhash = scmutil.filteredhash(repo, self.tiprev)
317 317
318 318 duration = util.timer() - starttime
319 319 repo.ui.log('branchcache', 'updated %s branch cache in %.4f seconds\n',
320 320 repo.filtername, duration)
321 321
322 322 # Revision branch info cache
323 323
324 324 _rbcversion = '-v1'
325 325 _rbcnames = 'rbc-names' + _rbcversion
326 326 _rbcrevs = 'rbc-revs' + _rbcversion
327 327 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
328 328 _rbcrecfmt = '>4sI'
329 329 _rbcrecsize = calcsize(_rbcrecfmt)
330 330 _rbcnodelen = 4
331 331 _rbcbranchidxmask = 0x7fffffff
332 332 _rbccloseflag = 0x80000000
333 333
334 334 class revbranchcache(object):
335 335 """Persistent cache, mapping from revision number to branch name and close.
336 336 This is a low level cache, independent of filtering.
337 337
338 338 Branch names are stored in rbc-names in internal encoding separated by 0.
339 339 rbc-names is append-only, and each branch name is only stored once and will
340 340 thus have a unique index.
341 341
342 342 The branch info for each revision is stored in rbc-revs as constant size
343 343 records. The whole file is read into memory, but it is only 'parsed' on
344 344 demand. The file is usually append-only but will be truncated if repo
345 345 modification is detected.
346 346 The record for each revision contains the first 4 bytes of the
347 347 corresponding node hash, and the record is only used if it still matches.
348 348 Even a completely trashed rbc-revs fill thus still give the right result
349 349 while converging towards full recovery ... assuming no incorrectly matching
350 350 node hashes.
351 351 The record also contains 4 bytes where 31 bits contains the index of the
352 352 branch and the last bit indicate that it is a branch close commit.
353 353 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
354 354 and will grow with it but be 1/8th of its size.
355 355 """
356 356
357 357 def __init__(self, repo, readonly=True):
358 358 assert repo.filtername is None
359 359 self._repo = repo
360 360 self._names = [] # branch names in local encoding with static index
361 361 self._rbcrevs = bytearray()
362 362 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
363 363 try:
364 364 bndata = repo.cachevfs.read(_rbcnames)
365 365 self._rbcsnameslen = len(bndata) # for verification before writing
366 366 if bndata:
367 367 self._names = [encoding.tolocal(bn)
368 368 for bn in bndata.split('\0')]
369 369 except (IOError, OSError):
370 370 if readonly:
371 371 # don't try to use cache - fall back to the slow path
372 372 self.branchinfo = self._branchinfo
373 373
374 374 if self._names:
375 375 try:
376 376 data = repo.cachevfs.read(_rbcrevs)
377 377 self._rbcrevs[:] = data
378 378 except (IOError, OSError) as inst:
379 379 repo.ui.debug("couldn't read revision branch cache: %s\n" %
380 inst)
380 pycompat.bytestr(inst))
381 381 # remember number of good records on disk
382 382 self._rbcrevslen = min(len(self._rbcrevs) // _rbcrecsize,
383 383 len(repo.changelog))
384 384 if self._rbcrevslen == 0:
385 385 self._names = []
386 386 self._rbcnamescount = len(self._names) # number of names read at
387 387 # _rbcsnameslen
388 388 self._namesreverse = dict((b, r) for r, b in enumerate(self._names))
389 389
390 390 def _clear(self):
391 391 self._rbcsnameslen = 0
392 392 del self._names[:]
393 393 self._rbcnamescount = 0
394 394 self._namesreverse.clear()
395 395 self._rbcrevslen = len(self._repo.changelog)
396 396 self._rbcrevs = bytearray(self._rbcrevslen * _rbcrecsize)
397 397
398 398 def branchinfo(self, rev):
399 399 """Return branch name and close flag for rev, using and updating
400 400 persistent cache."""
401 401 changelog = self._repo.changelog
402 402 rbcrevidx = rev * _rbcrecsize
403 403
404 404 # avoid negative index, changelog.read(nullrev) is fast without cache
405 405 if rev == nullrev:
406 406 return changelog.branchinfo(rev)
407 407
408 408 # if requested rev isn't allocated, grow and cache the rev info
409 409 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
410 410 return self._branchinfo(rev)
411 411
412 412 # fast path: extract data from cache, use it if node is matching
413 413 reponode = changelog.node(rev)[:_rbcnodelen]
414 414 cachenode, branchidx = unpack_from(
415 415 _rbcrecfmt, util.buffer(self._rbcrevs), rbcrevidx)
416 416 close = bool(branchidx & _rbccloseflag)
417 417 if close:
418 418 branchidx &= _rbcbranchidxmask
419 419 if cachenode == '\0\0\0\0':
420 420 pass
421 421 elif cachenode == reponode:
422 422 try:
423 423 return self._names[branchidx], close
424 424 except IndexError:
425 425 # recover from invalid reference to unknown branch
426 426 self._repo.ui.debug("referenced branch names not found"
427 427 " - rebuilding revision branch cache from scratch\n")
428 428 self._clear()
429 429 else:
430 430 # rev/node map has changed, invalidate the cache from here up
431 431 self._repo.ui.debug("history modification detected - truncating "
432 432 "revision branch cache to revision %d\n" % rev)
433 433 truncate = rbcrevidx + _rbcrecsize
434 434 del self._rbcrevs[truncate:]
435 435 self._rbcrevslen = min(self._rbcrevslen, truncate)
436 436
437 437 # fall back to slow path and make sure it will be written to disk
438 438 return self._branchinfo(rev)
439 439
440 440 def _branchinfo(self, rev):
441 441 """Retrieve branch info from changelog and update _rbcrevs"""
442 442 changelog = self._repo.changelog
443 443 b, close = changelog.branchinfo(rev)
444 444 if b in self._namesreverse:
445 445 branchidx = self._namesreverse[b]
446 446 else:
447 447 branchidx = len(self._names)
448 448 self._names.append(b)
449 449 self._namesreverse[b] = branchidx
450 450 reponode = changelog.node(rev)
451 451 if close:
452 452 branchidx |= _rbccloseflag
453 453 self._setcachedata(rev, reponode, branchidx)
454 454 return b, close
455 455
456 456 def _setcachedata(self, rev, node, branchidx):
457 457 """Writes the node's branch data to the in-memory cache data."""
458 458 if rev == nullrev:
459 459 return
460 460 rbcrevidx = rev * _rbcrecsize
461 461 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
462 462 self._rbcrevs.extend('\0' *
463 463 (len(self._repo.changelog) * _rbcrecsize -
464 464 len(self._rbcrevs)))
465 465 pack_into(_rbcrecfmt, self._rbcrevs, rbcrevidx, node, branchidx)
466 466 self._rbcrevslen = min(self._rbcrevslen, rev)
467 467
468 468 tr = self._repo.currenttransaction()
469 469 if tr:
470 470 tr.addfinalize('write-revbranchcache', self.write)
471 471
472 472 def write(self, tr=None):
473 473 """Save branch cache if it is dirty."""
474 474 repo = self._repo
475 475 wlock = None
476 476 step = ''
477 477 try:
478 478 if self._rbcnamescount < len(self._names):
479 479 step = ' names'
480 480 wlock = repo.wlock(wait=False)
481 481 if self._rbcnamescount != 0:
482 482 f = repo.cachevfs.open(_rbcnames, 'ab')
483 483 if f.tell() == self._rbcsnameslen:
484 484 f.write('\0')
485 485 else:
486 486 f.close()
487 487 repo.ui.debug("%s changed - rewriting it\n" % _rbcnames)
488 488 self._rbcnamescount = 0
489 489 self._rbcrevslen = 0
490 490 if self._rbcnamescount == 0:
491 491 # before rewriting names, make sure references are removed
492 492 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
493 493 f = repo.cachevfs.open(_rbcnames, 'wb')
494 494 f.write('\0'.join(encoding.fromlocal(b)
495 495 for b in self._names[self._rbcnamescount:]))
496 496 self._rbcsnameslen = f.tell()
497 497 f.close()
498 498 self._rbcnamescount = len(self._names)
499 499
500 500 start = self._rbcrevslen * _rbcrecsize
501 501 if start != len(self._rbcrevs):
502 502 step = ''
503 503 if wlock is None:
504 504 wlock = repo.wlock(wait=False)
505 505 revs = min(len(repo.changelog),
506 506 len(self._rbcrevs) // _rbcrecsize)
507 507 f = repo.cachevfs.open(_rbcrevs, 'ab')
508 508 if f.tell() != start:
509 509 repo.ui.debug("truncating cache/%s to %d\n"
510 510 % (_rbcrevs, start))
511 511 f.seek(start)
512 512 if f.tell() != start:
513 513 start = 0
514 514 f.seek(start)
515 515 f.truncate()
516 516 end = revs * _rbcrecsize
517 517 f.write(self._rbcrevs[start:end])
518 518 f.close()
519 519 self._rbcrevslen = revs
520 520 except (IOError, OSError, error.Abort, error.LockError) as inst:
521 521 repo.ui.debug("couldn't write revision branch cache%s: %s\n"
522 522 % (step, inst))
523 523 finally:
524 524 if wlock is not None:
525 525 wlock.release()
General Comments 0
You need to be logged in to leave comments. Login now