##// END OF EJS Templates
branchmap: strip '\n' read from cache file as before...
Yuya Nishihara -
r39172:b892df07 default
parent child Browse files
Show More
@@ -1,547 +1,548 b''
1 1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import struct
11 11
12 12 from .node import (
13 13 bin,
14 14 hex,
15 15 nullid,
16 16 nullrev,
17 17 )
18 18 from . import (
19 19 encoding,
20 20 error,
21 21 pycompat,
22 22 scmutil,
23 23 util,
24 24 )
25 25 from .utils import (
26 26 stringutil,
27 27 )
28 28
29 29 calcsize = struct.calcsize
30 30 pack_into = struct.pack_into
31 31 unpack_from = struct.unpack_from
32 32
33 33 def _filename(repo):
34 34 """name of a branchcache file for a given repo or repoview"""
35 35 filename = "branch2"
36 36 if repo.filtername:
37 37 filename = '%s-%s' % (filename, repo.filtername)
38 38 return filename
39 39
40 40 def read(repo):
41 41 try:
42 42 f = repo.cachevfs(_filename(repo))
43 cachekey = next(f).split(" ", 2)
43 cachekey = next(f).rstrip('\n').split(" ", 2)
44 44 last, lrev = cachekey[:2]
45 45 last, lrev = bin(last), int(lrev)
46 46 filteredhash = None
47 47 if len(cachekey) > 2:
48 48 filteredhash = bin(cachekey[2])
49 49 partial = branchcache(tipnode=last, tiprev=lrev,
50 50 filteredhash=filteredhash)
51 51 if not partial.validfor(repo):
52 52 # invalidate the cache
53 53 raise ValueError(r'tip differs')
54 54 cl = repo.changelog
55 55 for l in f:
56 l = l.rstrip('\n')
56 57 if not l:
57 58 continue
58 59 node, state, label = l.split(" ", 2)
59 60 if state not in 'oc':
60 61 raise ValueError(r'invalid branch state')
61 62 label = encoding.tolocal(label.strip())
62 63 node = bin(node)
63 64 if not cl.hasnode(node):
64 65 raise ValueError(
65 66 r'node %s does not exist' % pycompat.sysstr(hex(node)))
66 67 partial.setdefault(label, []).append(node)
67 68 if state == 'c':
68 69 partial._closednodes.add(node)
69 70
70 71 except (IOError, OSError):
71 72 return None
72 73
73 74 except Exception as inst:
74 75 if repo.ui.debugflag:
75 76 msg = 'invalid branchheads cache'
76 77 if repo.filtername is not None:
77 78 msg += ' (%s)' % repo.filtername
78 79 msg += ': %s\n'
79 80 repo.ui.debug(msg % pycompat.bytestr(inst))
80 81 partial = None
81 82 return partial
82 83
83 84 ### Nearest subset relation
84 85 # Nearest subset of filter X is a filter Y so that:
85 86 # * Y is included in X,
86 87 # * X - Y is as small as possible.
87 88 # This create and ordering used for branchmap purpose.
88 89 # the ordering may be partial
89 90 subsettable = {None: 'visible',
90 91 'visible-hidden': 'visible',
91 92 'visible': 'served',
92 93 'served': 'immutable',
93 94 'immutable': 'base'}
94 95
95 96 def updatecache(repo):
96 97 cl = repo.changelog
97 98 filtername = repo.filtername
98 99 partial = repo._branchcaches.get(filtername)
99 100
100 101 revs = []
101 102 if partial is None or not partial.validfor(repo):
102 103 partial = read(repo)
103 104 if partial is None:
104 105 subsetname = subsettable.get(filtername)
105 106 if subsetname is None:
106 107 partial = branchcache()
107 108 else:
108 109 subset = repo.filtered(subsetname)
109 110 partial = subset.branchmap().copy()
110 111 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
111 112 revs.extend(r for r in extrarevs if r <= partial.tiprev)
112 113 revs.extend(cl.revs(start=partial.tiprev + 1))
113 114 if revs:
114 115 partial.update(repo, revs)
115 116 partial.write(repo)
116 117
117 118 assert partial.validfor(repo), filtername
118 119 repo._branchcaches[repo.filtername] = partial
119 120
120 121 def replacecache(repo, bm):
121 122 """Replace the branchmap cache for a repo with a branch mapping.
122 123
123 124 This is likely only called during clone with a branch map from a remote.
124 125 """
125 126 rbheads = []
126 127 closed = []
127 128 for bheads in bm.itervalues():
128 129 rbheads.extend(bheads)
129 130 for h in bheads:
130 131 r = repo.changelog.rev(h)
131 132 b, c = repo.changelog.branchinfo(r)
132 133 if c:
133 134 closed.append(h)
134 135
135 136 if rbheads:
136 137 rtiprev = max((int(repo.changelog.rev(node))
137 138 for node in rbheads))
138 139 cache = branchcache(bm,
139 140 repo[rtiprev].node(),
140 141 rtiprev,
141 142 closednodes=closed)
142 143
143 144 # Try to stick it as low as possible
144 145 # filter above served are unlikely to be fetch from a clone
145 146 for candidate in ('base', 'immutable', 'served'):
146 147 rview = repo.filtered(candidate)
147 148 if cache.validfor(rview):
148 149 repo._branchcaches[candidate] = cache
149 150 cache.write(rview)
150 151 break
151 152
152 153 class branchcache(dict):
153 154 """A dict like object that hold branches heads cache.
154 155
155 156 This cache is used to avoid costly computations to determine all the
156 157 branch heads of a repo.
157 158
158 159 The cache is serialized on disk in the following format:
159 160
160 161 <tip hex node> <tip rev number> [optional filtered repo hex hash]
161 162 <branch head hex node> <open/closed state> <branch name>
162 163 <branch head hex node> <open/closed state> <branch name>
163 164 ...
164 165
165 166 The first line is used to check if the cache is still valid. If the
166 167 branch cache is for a filtered repo view, an optional third hash is
167 168 included that hashes the hashes of all filtered revisions.
168 169
169 170 The open/closed state is represented by a single letter 'o' or 'c'.
170 171 This field can be used to avoid changelog reads when determining if a
171 172 branch head closes a branch or not.
172 173 """
173 174
174 175 def __init__(self, entries=(), tipnode=nullid, tiprev=nullrev,
175 176 filteredhash=None, closednodes=None):
176 177 super(branchcache, self).__init__(entries)
177 178 self.tipnode = tipnode
178 179 self.tiprev = tiprev
179 180 self.filteredhash = filteredhash
180 181 # closednodes is a set of nodes that close their branch. If the branch
181 182 # cache has been updated, it may contain nodes that are no longer
182 183 # heads.
183 184 if closednodes is None:
184 185 self._closednodes = set()
185 186 else:
186 187 self._closednodes = closednodes
187 188
188 189 def validfor(self, repo):
189 190 """Is the cache content valid regarding a repo
190 191
191 192 - False when cached tipnode is unknown or if we detect a strip.
192 193 - True when cache is up to date or a subset of current repo."""
193 194 try:
194 195 return ((self.tipnode == repo.changelog.node(self.tiprev))
195 196 and (self.filteredhash == \
196 197 scmutil.filteredhash(repo, self.tiprev)))
197 198 except IndexError:
198 199 return False
199 200
200 201 def _branchtip(self, heads):
201 202 '''Return tuple with last open head in heads and false,
202 203 otherwise return last closed head and true.'''
203 204 tip = heads[-1]
204 205 closed = True
205 206 for h in reversed(heads):
206 207 if h not in self._closednodes:
207 208 tip = h
208 209 closed = False
209 210 break
210 211 return tip, closed
211 212
212 213 def branchtip(self, branch):
213 214 '''Return the tipmost open head on branch head, otherwise return the
214 215 tipmost closed head on branch.
215 216 Raise KeyError for unknown branch.'''
216 217 return self._branchtip(self[branch])[0]
217 218
218 219 def iteropen(self, nodes):
219 220 return (n for n in nodes if n not in self._closednodes)
220 221
221 222 def branchheads(self, branch, closed=False):
222 223 heads = self[branch]
223 224 if not closed:
224 225 heads = list(self.iteropen(heads))
225 226 return heads
226 227
227 228 def iterbranches(self):
228 229 for bn, heads in self.iteritems():
229 230 yield (bn, heads) + self._branchtip(heads)
230 231
231 232 def copy(self):
232 233 """return an deep copy of the branchcache object"""
233 234 return branchcache(self, self.tipnode, self.tiprev, self.filteredhash,
234 235 self._closednodes)
235 236
236 237 def write(self, repo):
237 238 try:
238 239 f = repo.cachevfs(_filename(repo), "w", atomictemp=True)
239 240 cachekey = [hex(self.tipnode), '%d' % self.tiprev]
240 241 if self.filteredhash is not None:
241 242 cachekey.append(hex(self.filteredhash))
242 243 f.write(" ".join(cachekey) + '\n')
243 244 nodecount = 0
244 245 for label, nodes in sorted(self.iteritems()):
245 246 for node in nodes:
246 247 nodecount += 1
247 248 if node in self._closednodes:
248 249 state = 'c'
249 250 else:
250 251 state = 'o'
251 252 f.write("%s %s %s\n" % (hex(node), state,
252 253 encoding.fromlocal(label)))
253 254 f.close()
254 255 repo.ui.log('branchcache',
255 256 'wrote %s branch cache with %d labels and %d nodes\n',
256 257 repo.filtername, len(self), nodecount)
257 258 except (IOError, OSError, error.Abort) as inst:
258 259 # Abort may be raised by read only opener, so log and continue
259 260 repo.ui.debug("couldn't write branch cache: %s\n" %
260 261 stringutil.forcebytestr(inst))
261 262
262 263 def update(self, repo, revgen):
263 264 """Given a branchhead cache, self, that may have extra nodes or be
264 265 missing heads, and a generator of nodes that are strictly a superset of
265 266 heads missing, this function updates self to be correct.
266 267 """
267 268 starttime = util.timer()
268 269 cl = repo.changelog
269 270 # collect new branch entries
270 271 newbranches = {}
271 272 getbranchinfo = repo.revbranchcache().branchinfo
272 273 for r in revgen:
273 274 branch, closesbranch = getbranchinfo(r)
274 275 newbranches.setdefault(branch, []).append(r)
275 276 if closesbranch:
276 277 self._closednodes.add(cl.node(r))
277 278
278 279 # fetch current topological heads to speed up filtering
279 280 topoheads = set(cl.headrevs())
280 281
281 282 # if older branchheads are reachable from new ones, they aren't
282 283 # really branchheads. Note checking parents is insufficient:
283 284 # 1 (branch a) -> 2 (branch b) -> 3 (branch a)
284 285 for branch, newheadrevs in newbranches.iteritems():
285 286 bheads = self.setdefault(branch, [])
286 287 bheadset = set(cl.rev(node) for node in bheads)
287 288
288 289 # This have been tested True on all internal usage of this function.
289 290 # run it again in case of doubt
290 291 # assert not (set(bheadrevs) & set(newheadrevs))
291 292 newheadrevs.sort()
292 293 bheadset.update(newheadrevs)
293 294
294 295 # This prunes out two kinds of heads - heads that are superseded by
295 296 # a head in newheadrevs, and newheadrevs that are not heads because
296 297 # an existing head is their descendant.
297 298 uncertain = bheadset - topoheads
298 299 if uncertain:
299 300 floorrev = min(uncertain)
300 301 ancestors = set(cl.ancestors(newheadrevs, floorrev))
301 302 bheadset -= ancestors
302 303 bheadrevs = sorted(bheadset)
303 304 self[branch] = [cl.node(rev) for rev in bheadrevs]
304 305 tiprev = bheadrevs[-1]
305 306 if tiprev > self.tiprev:
306 307 self.tipnode = cl.node(tiprev)
307 308 self.tiprev = tiprev
308 309
309 310 if not self.validfor(repo):
310 311 # cache key are not valid anymore
311 312 self.tipnode = nullid
312 313 self.tiprev = nullrev
313 314 for heads in self.values():
314 315 tiprev = max(cl.rev(node) for node in heads)
315 316 if tiprev > self.tiprev:
316 317 self.tipnode = cl.node(tiprev)
317 318 self.tiprev = tiprev
318 319 self.filteredhash = scmutil.filteredhash(repo, self.tiprev)
319 320
320 321 duration = util.timer() - starttime
321 322 repo.ui.log('branchcache', 'updated %s branch cache in %.4f seconds\n',
322 323 repo.filtername, duration)
323 324
324 325 # Revision branch info cache
325 326
326 327 _rbcversion = '-v1'
327 328 _rbcnames = 'rbc-names' + _rbcversion
328 329 _rbcrevs = 'rbc-revs' + _rbcversion
329 330 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
330 331 _rbcrecfmt = '>4sI'
331 332 _rbcrecsize = calcsize(_rbcrecfmt)
332 333 _rbcnodelen = 4
333 334 _rbcbranchidxmask = 0x7fffffff
334 335 _rbccloseflag = 0x80000000
335 336
336 337 class revbranchcache(object):
337 338 """Persistent cache, mapping from revision number to branch name and close.
338 339 This is a low level cache, independent of filtering.
339 340
340 341 Branch names are stored in rbc-names in internal encoding separated by 0.
341 342 rbc-names is append-only, and each branch name is only stored once and will
342 343 thus have a unique index.
343 344
344 345 The branch info for each revision is stored in rbc-revs as constant size
345 346 records. The whole file is read into memory, but it is only 'parsed' on
346 347 demand. The file is usually append-only but will be truncated if repo
347 348 modification is detected.
348 349 The record for each revision contains the first 4 bytes of the
349 350 corresponding node hash, and the record is only used if it still matches.
350 351 Even a completely trashed rbc-revs fill thus still give the right result
351 352 while converging towards full recovery ... assuming no incorrectly matching
352 353 node hashes.
353 354 The record also contains 4 bytes where 31 bits contains the index of the
354 355 branch and the last bit indicate that it is a branch close commit.
355 356 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
356 357 and will grow with it but be 1/8th of its size.
357 358 """
358 359
359 360 def __init__(self, repo, readonly=True):
360 361 assert repo.filtername is None
361 362 self._repo = repo
362 363 self._names = [] # branch names in local encoding with static index
363 364 self._rbcrevs = bytearray()
364 365 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
365 366 try:
366 367 bndata = repo.cachevfs.read(_rbcnames)
367 368 self._rbcsnameslen = len(bndata) # for verification before writing
368 369 if bndata:
369 370 self._names = [encoding.tolocal(bn)
370 371 for bn in bndata.split('\0')]
371 372 except (IOError, OSError):
372 373 if readonly:
373 374 # don't try to use cache - fall back to the slow path
374 375 self.branchinfo = self._branchinfo
375 376
376 377 if self._names:
377 378 try:
378 379 data = repo.cachevfs.read(_rbcrevs)
379 380 self._rbcrevs[:] = data
380 381 except (IOError, OSError) as inst:
381 382 repo.ui.debug("couldn't read revision branch cache: %s\n" %
382 383 stringutil.forcebytestr(inst))
383 384 # remember number of good records on disk
384 385 self._rbcrevslen = min(len(self._rbcrevs) // _rbcrecsize,
385 386 len(repo.changelog))
386 387 if self._rbcrevslen == 0:
387 388 self._names = []
388 389 self._rbcnamescount = len(self._names) # number of names read at
389 390 # _rbcsnameslen
390 391 self._namesreverse = dict((b, r) for r, b in enumerate(self._names))
391 392
392 393 def _clear(self):
393 394 self._rbcsnameslen = 0
394 395 del self._names[:]
395 396 self._rbcnamescount = 0
396 397 self._namesreverse.clear()
397 398 self._rbcrevslen = len(self._repo.changelog)
398 399 self._rbcrevs = bytearray(self._rbcrevslen * _rbcrecsize)
399 400
400 401 def branchinfo(self, rev):
401 402 """Return branch name and close flag for rev, using and updating
402 403 persistent cache."""
403 404 changelog = self._repo.changelog
404 405 rbcrevidx = rev * _rbcrecsize
405 406
406 407 # avoid negative index, changelog.read(nullrev) is fast without cache
407 408 if rev == nullrev:
408 409 return changelog.branchinfo(rev)
409 410
410 411 # if requested rev isn't allocated, grow and cache the rev info
411 412 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
412 413 return self._branchinfo(rev)
413 414
414 415 # fast path: extract data from cache, use it if node is matching
415 416 reponode = changelog.node(rev)[:_rbcnodelen]
416 417 cachenode, branchidx = unpack_from(
417 418 _rbcrecfmt, util.buffer(self._rbcrevs), rbcrevidx)
418 419 close = bool(branchidx & _rbccloseflag)
419 420 if close:
420 421 branchidx &= _rbcbranchidxmask
421 422 if cachenode == '\0\0\0\0':
422 423 pass
423 424 elif cachenode == reponode:
424 425 try:
425 426 return self._names[branchidx], close
426 427 except IndexError:
427 428 # recover from invalid reference to unknown branch
428 429 self._repo.ui.debug("referenced branch names not found"
429 430 " - rebuilding revision branch cache from scratch\n")
430 431 self._clear()
431 432 else:
432 433 # rev/node map has changed, invalidate the cache from here up
433 434 self._repo.ui.debug("history modification detected - truncating "
434 435 "revision branch cache to revision %d\n" % rev)
435 436 truncate = rbcrevidx + _rbcrecsize
436 437 del self._rbcrevs[truncate:]
437 438 self._rbcrevslen = min(self._rbcrevslen, truncate)
438 439
439 440 # fall back to slow path and make sure it will be written to disk
440 441 return self._branchinfo(rev)
441 442
442 443 def _branchinfo(self, rev):
443 444 """Retrieve branch info from changelog and update _rbcrevs"""
444 445 changelog = self._repo.changelog
445 446 b, close = changelog.branchinfo(rev)
446 447 if b in self._namesreverse:
447 448 branchidx = self._namesreverse[b]
448 449 else:
449 450 branchidx = len(self._names)
450 451 self._names.append(b)
451 452 self._namesreverse[b] = branchidx
452 453 reponode = changelog.node(rev)
453 454 if close:
454 455 branchidx |= _rbccloseflag
455 456 self._setcachedata(rev, reponode, branchidx)
456 457 return b, close
457 458
458 459 def setdata(self, branch, rev, node, close):
459 460 """add new data information to the cache"""
460 461 if branch in self._namesreverse:
461 462 branchidx = self._namesreverse[branch]
462 463 else:
463 464 branchidx = len(self._names)
464 465 self._names.append(branch)
465 466 self._namesreverse[branch] = branchidx
466 467 if close:
467 468 branchidx |= _rbccloseflag
468 469 self._setcachedata(rev, node, branchidx)
469 470 # If no cache data were readable (non exists, bad permission, etc)
470 471 # the cache was bypassing itself by setting:
471 472 #
472 473 # self.branchinfo = self._branchinfo
473 474 #
474 475 # Since we now have data in the cache, we need to drop this bypassing.
475 476 if 'branchinfo' in vars(self):
476 477 del self.branchinfo
477 478
478 479 def _setcachedata(self, rev, node, branchidx):
479 480 """Writes the node's branch data to the in-memory cache data."""
480 481 if rev == nullrev:
481 482 return
482 483 rbcrevidx = rev * _rbcrecsize
483 484 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
484 485 self._rbcrevs.extend('\0' *
485 486 (len(self._repo.changelog) * _rbcrecsize -
486 487 len(self._rbcrevs)))
487 488 pack_into(_rbcrecfmt, self._rbcrevs, rbcrevidx, node, branchidx)
488 489 self._rbcrevslen = min(self._rbcrevslen, rev)
489 490
490 491 tr = self._repo.currenttransaction()
491 492 if tr:
492 493 tr.addfinalize('write-revbranchcache', self.write)
493 494
494 495 def write(self, tr=None):
495 496 """Save branch cache if it is dirty."""
496 497 repo = self._repo
497 498 wlock = None
498 499 step = ''
499 500 try:
500 501 if self._rbcnamescount < len(self._names):
501 502 step = ' names'
502 503 wlock = repo.wlock(wait=False)
503 504 if self._rbcnamescount != 0:
504 505 f = repo.cachevfs.open(_rbcnames, 'ab')
505 506 if f.tell() == self._rbcsnameslen:
506 507 f.write('\0')
507 508 else:
508 509 f.close()
509 510 repo.ui.debug("%s changed - rewriting it\n" % _rbcnames)
510 511 self._rbcnamescount = 0
511 512 self._rbcrevslen = 0
512 513 if self._rbcnamescount == 0:
513 514 # before rewriting names, make sure references are removed
514 515 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
515 516 f = repo.cachevfs.open(_rbcnames, 'wb')
516 517 f.write('\0'.join(encoding.fromlocal(b)
517 518 for b in self._names[self._rbcnamescount:]))
518 519 self._rbcsnameslen = f.tell()
519 520 f.close()
520 521 self._rbcnamescount = len(self._names)
521 522
522 523 start = self._rbcrevslen * _rbcrecsize
523 524 if start != len(self._rbcrevs):
524 525 step = ''
525 526 if wlock is None:
526 527 wlock = repo.wlock(wait=False)
527 528 revs = min(len(repo.changelog),
528 529 len(self._rbcrevs) // _rbcrecsize)
529 530 f = repo.cachevfs.open(_rbcrevs, 'ab')
530 531 if f.tell() != start:
531 532 repo.ui.debug("truncating cache/%s to %d\n"
532 533 % (_rbcrevs, start))
533 534 f.seek(start)
534 535 if f.tell() != start:
535 536 start = 0
536 537 f.seek(start)
537 538 f.truncate()
538 539 end = revs * _rbcrecsize
539 540 f.write(self._rbcrevs[start:end])
540 541 f.close()
541 542 self._rbcrevslen = revs
542 543 except (IOError, OSError, error.Abort, error.LockError) as inst:
543 544 repo.ui.debug("couldn't write revision branch cache%s: %s\n"
544 545 % (step, stringutil.forcebytestr(inst)))
545 546 finally:
546 547 if wlock is not None:
547 548 wlock.release()
General Comments 0
You need to be logged in to leave comments. Login now