##// END OF EJS Templates
py3: pass in system string to vars(branchmap).__contains__()
Yuya Nishihara -
r40268:d99a588d default
parent child Browse files
Show More
@@ -1,555 +1,555 b''
1 1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import struct
11 11
12 12 from .node import (
13 13 bin,
14 14 hex,
15 15 nullid,
16 16 nullrev,
17 17 )
18 18 from . import (
19 19 encoding,
20 20 error,
21 21 pycompat,
22 22 scmutil,
23 23 util,
24 24 )
25 25 from .utils import (
26 26 stringutil,
27 27 )
28 28
29 29 calcsize = struct.calcsize
30 30 pack_into = struct.pack_into
31 31 unpack_from = struct.unpack_from
32 32
33 33 def _filename(repo):
34 34 """name of a branchcache file for a given repo or repoview"""
35 35 filename = "branch2"
36 36 if repo.filtername:
37 37 filename = '%s-%s' % (filename, repo.filtername)
38 38 return filename
39 39
40 40 def read(repo):
41 41 f = None
42 42 try:
43 43 f = repo.cachevfs(_filename(repo))
44 44 lineiter = iter(f)
45 45 cachekey = next(lineiter).rstrip('\n').split(" ", 2)
46 46 last, lrev = cachekey[:2]
47 47 last, lrev = bin(last), int(lrev)
48 48 filteredhash = None
49 49 if len(cachekey) > 2:
50 50 filteredhash = bin(cachekey[2])
51 51 partial = branchcache(tipnode=last, tiprev=lrev,
52 52 filteredhash=filteredhash)
53 53 if not partial.validfor(repo):
54 54 # invalidate the cache
55 55 raise ValueError(r'tip differs')
56 56 cl = repo.changelog
57 57 for l in lineiter:
58 58 l = l.rstrip('\n')
59 59 if not l:
60 60 continue
61 61 node, state, label = l.split(" ", 2)
62 62 if state not in 'oc':
63 63 raise ValueError(r'invalid branch state')
64 64 label = encoding.tolocal(label.strip())
65 65 node = bin(node)
66 66 if not cl.hasnode(node):
67 67 raise ValueError(
68 68 r'node %s does not exist' % pycompat.sysstr(hex(node)))
69 69 partial.setdefault(label, []).append(node)
70 70 if state == 'c':
71 71 partial._closednodes.add(node)
72 72
73 73 except (IOError, OSError):
74 74 return None
75 75
76 76 except Exception as inst:
77 77 if repo.ui.debugflag:
78 78 msg = 'invalid branchheads cache'
79 79 if repo.filtername is not None:
80 80 msg += ' (%s)' % repo.filtername
81 81 msg += ': %s\n'
82 82 repo.ui.debug(msg % pycompat.bytestr(inst))
83 83 partial = None
84 84
85 85 finally:
86 86 if f:
87 87 f.close()
88 88
89 89 return partial
90 90
91 91 ### Nearest subset relation
92 92 # Nearest subset of filter X is a filter Y so that:
93 93 # * Y is included in X,
94 94 # * X - Y is as small as possible.
95 95 # This create and ordering used for branchmap purpose.
96 96 # the ordering may be partial
97 97 subsettable = {None: 'visible',
98 98 'visible-hidden': 'visible',
99 99 'visible': 'served',
100 100 'served': 'immutable',
101 101 'immutable': 'base'}
102 102
103 103 def updatecache(repo):
104 104 cl = repo.changelog
105 105 filtername = repo.filtername
106 106 partial = repo._branchcaches.get(filtername)
107 107
108 108 revs = []
109 109 if partial is None or not partial.validfor(repo):
110 110 partial = read(repo)
111 111 if partial is None:
112 112 subsetname = subsettable.get(filtername)
113 113 if subsetname is None:
114 114 partial = branchcache()
115 115 else:
116 116 subset = repo.filtered(subsetname)
117 117 partial = subset.branchmap().copy()
118 118 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
119 119 revs.extend(r for r in extrarevs if r <= partial.tiprev)
120 120 revs.extend(cl.revs(start=partial.tiprev + 1))
121 121 if revs:
122 122 partial.update(repo, revs)
123 123 partial.write(repo)
124 124
125 125 assert partial.validfor(repo), filtername
126 126 repo._branchcaches[repo.filtername] = partial
127 127
128 128 def replacecache(repo, bm):
129 129 """Replace the branchmap cache for a repo with a branch mapping.
130 130
131 131 This is likely only called during clone with a branch map from a remote.
132 132 """
133 133 rbheads = []
134 134 closed = []
135 135 for bheads in bm.itervalues():
136 136 rbheads.extend(bheads)
137 137 for h in bheads:
138 138 r = repo.changelog.rev(h)
139 139 b, c = repo.changelog.branchinfo(r)
140 140 if c:
141 141 closed.append(h)
142 142
143 143 if rbheads:
144 144 rtiprev = max((int(repo.changelog.rev(node))
145 145 for node in rbheads))
146 146 cache = branchcache(bm,
147 147 repo[rtiprev].node(),
148 148 rtiprev,
149 149 closednodes=closed)
150 150
151 151 # Try to stick it as low as possible
152 152 # filter above served are unlikely to be fetch from a clone
153 153 for candidate in ('base', 'immutable', 'served'):
154 154 rview = repo.filtered(candidate)
155 155 if cache.validfor(rview):
156 156 repo._branchcaches[candidate] = cache
157 157 cache.write(rview)
158 158 break
159 159
160 160 class branchcache(dict):
161 161 """A dict like object that hold branches heads cache.
162 162
163 163 This cache is used to avoid costly computations to determine all the
164 164 branch heads of a repo.
165 165
166 166 The cache is serialized on disk in the following format:
167 167
168 168 <tip hex node> <tip rev number> [optional filtered repo hex hash]
169 169 <branch head hex node> <open/closed state> <branch name>
170 170 <branch head hex node> <open/closed state> <branch name>
171 171 ...
172 172
173 173 The first line is used to check if the cache is still valid. If the
174 174 branch cache is for a filtered repo view, an optional third hash is
175 175 included that hashes the hashes of all filtered revisions.
176 176
177 177 The open/closed state is represented by a single letter 'o' or 'c'.
178 178 This field can be used to avoid changelog reads when determining if a
179 179 branch head closes a branch or not.
180 180 """
181 181
182 182 def __init__(self, entries=(), tipnode=nullid, tiprev=nullrev,
183 183 filteredhash=None, closednodes=None):
184 184 super(branchcache, self).__init__(entries)
185 185 self.tipnode = tipnode
186 186 self.tiprev = tiprev
187 187 self.filteredhash = filteredhash
188 188 # closednodes is a set of nodes that close their branch. If the branch
189 189 # cache has been updated, it may contain nodes that are no longer
190 190 # heads.
191 191 if closednodes is None:
192 192 self._closednodes = set()
193 193 else:
194 194 self._closednodes = closednodes
195 195
196 196 def validfor(self, repo):
197 197 """Is the cache content valid regarding a repo
198 198
199 199 - False when cached tipnode is unknown or if we detect a strip.
200 200 - True when cache is up to date or a subset of current repo."""
201 201 try:
202 202 return ((self.tipnode == repo.changelog.node(self.tiprev))
203 203 and (self.filteredhash == \
204 204 scmutil.filteredhash(repo, self.tiprev)))
205 205 except IndexError:
206 206 return False
207 207
208 208 def _branchtip(self, heads):
209 209 '''Return tuple with last open head in heads and false,
210 210 otherwise return last closed head and true.'''
211 211 tip = heads[-1]
212 212 closed = True
213 213 for h in reversed(heads):
214 214 if h not in self._closednodes:
215 215 tip = h
216 216 closed = False
217 217 break
218 218 return tip, closed
219 219
220 220 def branchtip(self, branch):
221 221 '''Return the tipmost open head on branch head, otherwise return the
222 222 tipmost closed head on branch.
223 223 Raise KeyError for unknown branch.'''
224 224 return self._branchtip(self[branch])[0]
225 225
226 226 def iteropen(self, nodes):
227 227 return (n for n in nodes if n not in self._closednodes)
228 228
229 229 def branchheads(self, branch, closed=False):
230 230 heads = self[branch]
231 231 if not closed:
232 232 heads = list(self.iteropen(heads))
233 233 return heads
234 234
235 235 def iterbranches(self):
236 236 for bn, heads in self.iteritems():
237 237 yield (bn, heads) + self._branchtip(heads)
238 238
239 239 def copy(self):
240 240 """return an deep copy of the branchcache object"""
241 241 return branchcache(self, self.tipnode, self.tiprev, self.filteredhash,
242 242 self._closednodes)
243 243
244 244 def write(self, repo):
245 245 try:
246 246 f = repo.cachevfs(_filename(repo), "w", atomictemp=True)
247 247 cachekey = [hex(self.tipnode), '%d' % self.tiprev]
248 248 if self.filteredhash is not None:
249 249 cachekey.append(hex(self.filteredhash))
250 250 f.write(" ".join(cachekey) + '\n')
251 251 nodecount = 0
252 252 for label, nodes in sorted(self.iteritems()):
253 253 for node in nodes:
254 254 nodecount += 1
255 255 if node in self._closednodes:
256 256 state = 'c'
257 257 else:
258 258 state = 'o'
259 259 f.write("%s %s %s\n" % (hex(node), state,
260 260 encoding.fromlocal(label)))
261 261 f.close()
262 262 repo.ui.log('branchcache',
263 263 'wrote %s branch cache with %d labels and %d nodes\n',
264 264 repo.filtername, len(self), nodecount)
265 265 except (IOError, OSError, error.Abort) as inst:
266 266 # Abort may be raised by read only opener, so log and continue
267 267 repo.ui.debug("couldn't write branch cache: %s\n" %
268 268 stringutil.forcebytestr(inst))
269 269
270 270 def update(self, repo, revgen):
271 271 """Given a branchhead cache, self, that may have extra nodes or be
272 272 missing heads, and a generator of nodes that are strictly a superset of
273 273 heads missing, this function updates self to be correct.
274 274 """
275 275 starttime = util.timer()
276 276 cl = repo.changelog
277 277 # collect new branch entries
278 278 newbranches = {}
279 279 getbranchinfo = repo.revbranchcache().branchinfo
280 280 for r in revgen:
281 281 branch, closesbranch = getbranchinfo(r)
282 282 newbranches.setdefault(branch, []).append(r)
283 283 if closesbranch:
284 284 self._closednodes.add(cl.node(r))
285 285
286 286 # fetch current topological heads to speed up filtering
287 287 topoheads = set(cl.headrevs())
288 288
289 289 # if older branchheads are reachable from new ones, they aren't
290 290 # really branchheads. Note checking parents is insufficient:
291 291 # 1 (branch a) -> 2 (branch b) -> 3 (branch a)
292 292 for branch, newheadrevs in newbranches.iteritems():
293 293 bheads = self.setdefault(branch, [])
294 294 bheadset = set(cl.rev(node) for node in bheads)
295 295
296 296 # This have been tested True on all internal usage of this function.
297 297 # run it again in case of doubt
298 298 # assert not (set(bheadrevs) & set(newheadrevs))
299 299 newheadrevs.sort()
300 300 bheadset.update(newheadrevs)
301 301
302 302 # This prunes out two kinds of heads - heads that are superseded by
303 303 # a head in newheadrevs, and newheadrevs that are not heads because
304 304 # an existing head is their descendant.
305 305 uncertain = bheadset - topoheads
306 306 if uncertain:
307 307 floorrev = min(uncertain)
308 308 ancestors = set(cl.ancestors(newheadrevs, floorrev))
309 309 bheadset -= ancestors
310 310 bheadrevs = sorted(bheadset)
311 311 self[branch] = [cl.node(rev) for rev in bheadrevs]
312 312 tiprev = bheadrevs[-1]
313 313 if tiprev > self.tiprev:
314 314 self.tipnode = cl.node(tiprev)
315 315 self.tiprev = tiprev
316 316
317 317 if not self.validfor(repo):
318 318 # cache key are not valid anymore
319 319 self.tipnode = nullid
320 320 self.tiprev = nullrev
321 321 for heads in self.values():
322 322 tiprev = max(cl.rev(node) for node in heads)
323 323 if tiprev > self.tiprev:
324 324 self.tipnode = cl.node(tiprev)
325 325 self.tiprev = tiprev
326 326 self.filteredhash = scmutil.filteredhash(repo, self.tiprev)
327 327
328 328 duration = util.timer() - starttime
329 329 repo.ui.log('branchcache', 'updated %s branch cache in %.4f seconds\n',
330 330 repo.filtername, duration)
331 331
332 332 # Revision branch info cache
333 333
334 334 _rbcversion = '-v1'
335 335 _rbcnames = 'rbc-names' + _rbcversion
336 336 _rbcrevs = 'rbc-revs' + _rbcversion
337 337 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
338 338 _rbcrecfmt = '>4sI'
339 339 _rbcrecsize = calcsize(_rbcrecfmt)
340 340 _rbcnodelen = 4
341 341 _rbcbranchidxmask = 0x7fffffff
342 342 _rbccloseflag = 0x80000000
343 343
344 344 class revbranchcache(object):
345 345 """Persistent cache, mapping from revision number to branch name and close.
346 346 This is a low level cache, independent of filtering.
347 347
348 348 Branch names are stored in rbc-names in internal encoding separated by 0.
349 349 rbc-names is append-only, and each branch name is only stored once and will
350 350 thus have a unique index.
351 351
352 352 The branch info for each revision is stored in rbc-revs as constant size
353 353 records. The whole file is read into memory, but it is only 'parsed' on
354 354 demand. The file is usually append-only but will be truncated if repo
355 355 modification is detected.
356 356 The record for each revision contains the first 4 bytes of the
357 357 corresponding node hash, and the record is only used if it still matches.
358 358 Even a completely trashed rbc-revs fill thus still give the right result
359 359 while converging towards full recovery ... assuming no incorrectly matching
360 360 node hashes.
361 361 The record also contains 4 bytes where 31 bits contains the index of the
362 362 branch and the last bit indicate that it is a branch close commit.
363 363 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
364 364 and will grow with it but be 1/8th of its size.
365 365 """
366 366
367 367 def __init__(self, repo, readonly=True):
368 368 assert repo.filtername is None
369 369 self._repo = repo
370 370 self._names = [] # branch names in local encoding with static index
371 371 self._rbcrevs = bytearray()
372 372 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
373 373 try:
374 374 bndata = repo.cachevfs.read(_rbcnames)
375 375 self._rbcsnameslen = len(bndata) # for verification before writing
376 376 if bndata:
377 377 self._names = [encoding.tolocal(bn)
378 378 for bn in bndata.split('\0')]
379 379 except (IOError, OSError):
380 380 if readonly:
381 381 # don't try to use cache - fall back to the slow path
382 382 self.branchinfo = self._branchinfo
383 383
384 384 if self._names:
385 385 try:
386 386 data = repo.cachevfs.read(_rbcrevs)
387 387 self._rbcrevs[:] = data
388 388 except (IOError, OSError) as inst:
389 389 repo.ui.debug("couldn't read revision branch cache: %s\n" %
390 390 stringutil.forcebytestr(inst))
391 391 # remember number of good records on disk
392 392 self._rbcrevslen = min(len(self._rbcrevs) // _rbcrecsize,
393 393 len(repo.changelog))
394 394 if self._rbcrevslen == 0:
395 395 self._names = []
396 396 self._rbcnamescount = len(self._names) # number of names read at
397 397 # _rbcsnameslen
398 398 self._namesreverse = dict((b, r) for r, b in enumerate(self._names))
399 399
400 400 def _clear(self):
401 401 self._rbcsnameslen = 0
402 402 del self._names[:]
403 403 self._rbcnamescount = 0
404 404 self._namesreverse.clear()
405 405 self._rbcrevslen = len(self._repo.changelog)
406 406 self._rbcrevs = bytearray(self._rbcrevslen * _rbcrecsize)
407 407
408 408 def branchinfo(self, rev):
409 409 """Return branch name and close flag for rev, using and updating
410 410 persistent cache."""
411 411 changelog = self._repo.changelog
412 412 rbcrevidx = rev * _rbcrecsize
413 413
414 414 # avoid negative index, changelog.read(nullrev) is fast without cache
415 415 if rev == nullrev:
416 416 return changelog.branchinfo(rev)
417 417
418 418 # if requested rev isn't allocated, grow and cache the rev info
419 419 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
420 420 return self._branchinfo(rev)
421 421
422 422 # fast path: extract data from cache, use it if node is matching
423 423 reponode = changelog.node(rev)[:_rbcnodelen]
424 424 cachenode, branchidx = unpack_from(
425 425 _rbcrecfmt, util.buffer(self._rbcrevs), rbcrevidx)
426 426 close = bool(branchidx & _rbccloseflag)
427 427 if close:
428 428 branchidx &= _rbcbranchidxmask
429 429 if cachenode == '\0\0\0\0':
430 430 pass
431 431 elif cachenode == reponode:
432 432 try:
433 433 return self._names[branchidx], close
434 434 except IndexError:
435 435 # recover from invalid reference to unknown branch
436 436 self._repo.ui.debug("referenced branch names not found"
437 437 " - rebuilding revision branch cache from scratch\n")
438 438 self._clear()
439 439 else:
440 440 # rev/node map has changed, invalidate the cache from here up
441 441 self._repo.ui.debug("history modification detected - truncating "
442 442 "revision branch cache to revision %d\n" % rev)
443 443 truncate = rbcrevidx + _rbcrecsize
444 444 del self._rbcrevs[truncate:]
445 445 self._rbcrevslen = min(self._rbcrevslen, truncate)
446 446
447 447 # fall back to slow path and make sure it will be written to disk
448 448 return self._branchinfo(rev)
449 449
450 450 def _branchinfo(self, rev):
451 451 """Retrieve branch info from changelog and update _rbcrevs"""
452 452 changelog = self._repo.changelog
453 453 b, close = changelog.branchinfo(rev)
454 454 if b in self._namesreverse:
455 455 branchidx = self._namesreverse[b]
456 456 else:
457 457 branchidx = len(self._names)
458 458 self._names.append(b)
459 459 self._namesreverse[b] = branchidx
460 460 reponode = changelog.node(rev)
461 461 if close:
462 462 branchidx |= _rbccloseflag
463 463 self._setcachedata(rev, reponode, branchidx)
464 464 return b, close
465 465
466 466 def setdata(self, branch, rev, node, close):
467 467 """add new data information to the cache"""
468 468 if branch in self._namesreverse:
469 469 branchidx = self._namesreverse[branch]
470 470 else:
471 471 branchidx = len(self._names)
472 472 self._names.append(branch)
473 473 self._namesreverse[branch] = branchidx
474 474 if close:
475 475 branchidx |= _rbccloseflag
476 476 self._setcachedata(rev, node, branchidx)
477 477 # If no cache data were readable (non exists, bad permission, etc)
478 478 # the cache was bypassing itself by setting:
479 479 #
480 480 # self.branchinfo = self._branchinfo
481 481 #
482 482 # Since we now have data in the cache, we need to drop this bypassing.
483 if 'branchinfo' in vars(self):
483 if r'branchinfo' in vars(self):
484 484 del self.branchinfo
485 485
486 486 def _setcachedata(self, rev, node, branchidx):
487 487 """Writes the node's branch data to the in-memory cache data."""
488 488 if rev == nullrev:
489 489 return
490 490 rbcrevidx = rev * _rbcrecsize
491 491 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
492 492 self._rbcrevs.extend('\0' *
493 493 (len(self._repo.changelog) * _rbcrecsize -
494 494 len(self._rbcrevs)))
495 495 pack_into(_rbcrecfmt, self._rbcrevs, rbcrevidx, node, branchidx)
496 496 self._rbcrevslen = min(self._rbcrevslen, rev)
497 497
498 498 tr = self._repo.currenttransaction()
499 499 if tr:
500 500 tr.addfinalize('write-revbranchcache', self.write)
501 501
502 502 def write(self, tr=None):
503 503 """Save branch cache if it is dirty."""
504 504 repo = self._repo
505 505 wlock = None
506 506 step = ''
507 507 try:
508 508 if self._rbcnamescount < len(self._names):
509 509 step = ' names'
510 510 wlock = repo.wlock(wait=False)
511 511 if self._rbcnamescount != 0:
512 512 f = repo.cachevfs.open(_rbcnames, 'ab')
513 513 if f.tell() == self._rbcsnameslen:
514 514 f.write('\0')
515 515 else:
516 516 f.close()
517 517 repo.ui.debug("%s changed - rewriting it\n" % _rbcnames)
518 518 self._rbcnamescount = 0
519 519 self._rbcrevslen = 0
520 520 if self._rbcnamescount == 0:
521 521 # before rewriting names, make sure references are removed
522 522 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
523 523 f = repo.cachevfs.open(_rbcnames, 'wb')
524 524 f.write('\0'.join(encoding.fromlocal(b)
525 525 for b in self._names[self._rbcnamescount:]))
526 526 self._rbcsnameslen = f.tell()
527 527 f.close()
528 528 self._rbcnamescount = len(self._names)
529 529
530 530 start = self._rbcrevslen * _rbcrecsize
531 531 if start != len(self._rbcrevs):
532 532 step = ''
533 533 if wlock is None:
534 534 wlock = repo.wlock(wait=False)
535 535 revs = min(len(repo.changelog),
536 536 len(self._rbcrevs) // _rbcrecsize)
537 537 f = repo.cachevfs.open(_rbcrevs, 'ab')
538 538 if f.tell() != start:
539 539 repo.ui.debug("truncating cache/%s to %d\n"
540 540 % (_rbcrevs, start))
541 541 f.seek(start)
542 542 if f.tell() != start:
543 543 start = 0
544 544 f.seek(start)
545 545 f.truncate()
546 546 end = revs * _rbcrecsize
547 547 f.write(self._rbcrevs[start:end])
548 548 f.close()
549 549 self._rbcrevslen = revs
550 550 except (IOError, OSError, error.Abort, error.LockError) as inst:
551 551 repo.ui.debug("couldn't write revision branch cache%s: %s\n"
552 552 % (step, stringutil.forcebytestr(inst)))
553 553 finally:
554 554 if wlock is not None:
555 555 wlock.release()
General Comments 0
You need to be logged in to leave comments. Login now