##// END OF EJS Templates
branchmap: revert c34532365b38 for Python 2.7 compatibility...
Mike Hommey -
r33737:1814ca41 stable
parent child Browse files
Show More
@@ -1,519 +1,520 b''
1 1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import struct
11 11
12 12 from .node import (
13 13 bin,
14 14 hex,
15 15 nullid,
16 16 nullrev,
17 17 )
18 18 from . import (
19 19 encoding,
20 20 error,
21 21 scmutil,
22 22 util,
23 23 )
24 24
25 25 calcsize = struct.calcsize
26 26 pack_into = struct.pack_into
27 27 unpack_from = struct.unpack_from
28 28
29 29 def _filename(repo):
30 30 """name of a branchcache file for a given repo or repoview"""
31 31 filename = "branch2"
32 32 if repo.filtername:
33 33 filename = '%s-%s' % (filename, repo.filtername)
34 34 return filename
35 35
36 36 def read(repo):
37 37 try:
38 38 f = repo.cachevfs(_filename(repo))
39 39 lines = f.read().split('\n')
40 40 f.close()
41 41 except (IOError, OSError):
42 42 return None
43 43
44 44 try:
45 45 cachekey = lines.pop(0).split(" ", 2)
46 46 last, lrev = cachekey[:2]
47 47 last, lrev = bin(last), int(lrev)
48 48 filteredhash = None
49 49 if len(cachekey) > 2:
50 50 filteredhash = bin(cachekey[2])
51 51 partial = branchcache(tipnode=last, tiprev=lrev,
52 52 filteredhash=filteredhash)
53 53 if not partial.validfor(repo):
54 54 # invalidate the cache
55 55 raise ValueError('tip differs')
56 56 cl = repo.changelog
57 57 for l in lines:
58 58 if not l:
59 59 continue
60 60 node, state, label = l.split(" ", 2)
61 61 if state not in 'oc':
62 62 raise ValueError('invalid branch state')
63 63 label = encoding.tolocal(label.strip())
64 64 node = bin(node)
65 65 if not cl.hasnode(node):
66 66 raise ValueError('node %s does not exist' % hex(node))
67 67 partial.setdefault(label, []).append(node)
68 68 if state == 'c':
69 69 partial._closednodes.add(node)
70 70 except Exception as inst:
71 71 if repo.ui.debugflag:
72 72 msg = 'invalid branchheads cache'
73 73 if repo.filtername is not None:
74 74 msg += ' (%s)' % repo.filtername
75 75 msg += ': %s\n'
76 76 repo.ui.debug(msg % inst)
77 77 partial = None
78 78 return partial
79 79
80 80 ### Nearest subset relation
81 81 # Nearest subset of filter X is a filter Y so that:
82 82 # * Y is included in X,
83 83 # * X - Y is as small as possible.
84 84 # This create and ordering used for branchmap purpose.
85 85 # the ordering may be partial
86 86 subsettable = {None: 'visible',
87 87 'visible': 'served',
88 88 'served': 'immutable',
89 89 'immutable': 'base'}
90 90
91 91 def updatecache(repo):
92 92 cl = repo.changelog
93 93 filtername = repo.filtername
94 94 partial = repo._branchcaches.get(filtername)
95 95
96 96 revs = []
97 97 if partial is None or not partial.validfor(repo):
98 98 partial = read(repo)
99 99 if partial is None:
100 100 subsetname = subsettable.get(filtername)
101 101 if subsetname is None:
102 102 partial = branchcache()
103 103 else:
104 104 subset = repo.filtered(subsetname)
105 105 partial = subset.branchmap().copy()
106 106 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
107 107 revs.extend(r for r in extrarevs if r <= partial.tiprev)
108 108 revs.extend(cl.revs(start=partial.tiprev + 1))
109 109 if revs:
110 110 partial.update(repo, revs)
111 111 partial.write(repo)
112 112
113 113 assert partial.validfor(repo), filtername
114 114 repo._branchcaches[repo.filtername] = partial
115 115
116 116 def replacecache(repo, bm):
117 117 """Replace the branchmap cache for a repo with a branch mapping.
118 118
119 119 This is likely only called during clone with a branch map from a remote.
120 120 """
121 121 rbheads = []
122 122 closed = []
123 123 for bheads in bm.itervalues():
124 124 rbheads.extend(bheads)
125 125 for h in bheads:
126 126 r = repo.changelog.rev(h)
127 127 b, c = repo.changelog.branchinfo(r)
128 128 if c:
129 129 closed.append(h)
130 130
131 131 if rbheads:
132 132 rtiprev = max((int(repo.changelog.rev(node))
133 133 for node in rbheads))
134 134 cache = branchcache(bm,
135 135 repo[rtiprev].node(),
136 136 rtiprev,
137 137 closednodes=closed)
138 138
139 139 # Try to stick it as low as possible
140 140 # filter above served are unlikely to be fetch from a clone
141 141 for candidate in ('base', 'immutable', 'served'):
142 142 rview = repo.filtered(candidate)
143 143 if cache.validfor(rview):
144 144 repo._branchcaches[candidate] = cache
145 145 cache.write(rview)
146 146 break
147 147
148 148 class branchcache(dict):
149 149 """A dict like object that hold branches heads cache.
150 150
151 151 This cache is used to avoid costly computations to determine all the
152 152 branch heads of a repo.
153 153
154 154 The cache is serialized on disk in the following format:
155 155
156 156 <tip hex node> <tip rev number> [optional filtered repo hex hash]
157 157 <branch head hex node> <open/closed state> <branch name>
158 158 <branch head hex node> <open/closed state> <branch name>
159 159 ...
160 160
161 161 The first line is used to check if the cache is still valid. If the
162 162 branch cache is for a filtered repo view, an optional third hash is
163 163 included that hashes the hashes of all filtered revisions.
164 164
165 165 The open/closed state is represented by a single letter 'o' or 'c'.
166 166 This field can be used to avoid changelog reads when determining if a
167 167 branch head closes a branch or not.
168 168 """
169 169
170 170 def __init__(self, entries=(), tipnode=nullid, tiprev=nullrev,
171 171 filteredhash=None, closednodes=None):
172 172 super(branchcache, self).__init__(entries)
173 173 self.tipnode = tipnode
174 174 self.tiprev = tiprev
175 175 self.filteredhash = filteredhash
176 176 # closednodes is a set of nodes that close their branch. If the branch
177 177 # cache has been updated, it may contain nodes that are no longer
178 178 # heads.
179 179 if closednodes is None:
180 180 self._closednodes = set()
181 181 else:
182 182 self._closednodes = closednodes
183 183
184 184 def validfor(self, repo):
185 185 """Is the cache content valid regarding a repo
186 186
187 187 - False when cached tipnode is unknown or if we detect a strip.
188 188 - True when cache is up to date or a subset of current repo."""
189 189 try:
190 190 return ((self.tipnode == repo.changelog.node(self.tiprev))
191 191 and (self.filteredhash == \
192 192 scmutil.filteredhash(repo, self.tiprev)))
193 193 except IndexError:
194 194 return False
195 195
196 196 def _branchtip(self, heads):
197 197 '''Return tuple with last open head in heads and false,
198 198 otherwise return last closed head and true.'''
199 199 tip = heads[-1]
200 200 closed = True
201 201 for h in reversed(heads):
202 202 if h not in self._closednodes:
203 203 tip = h
204 204 closed = False
205 205 break
206 206 return tip, closed
207 207
208 208 def branchtip(self, branch):
209 209 '''Return the tipmost open head on branch head, otherwise return the
210 210 tipmost closed head on branch.
211 211 Raise KeyError for unknown branch.'''
212 212 return self._branchtip(self[branch])[0]
213 213
214 214 def branchheads(self, branch, closed=False):
215 215 heads = self[branch]
216 216 if not closed:
217 217 heads = [h for h in heads if h not in self._closednodes]
218 218 return heads
219 219
220 220 def iterbranches(self):
221 221 for bn, heads in self.iteritems():
222 222 yield (bn, heads) + self._branchtip(heads)
223 223
224 224 def copy(self):
225 225 """return an deep copy of the branchcache object"""
226 226 return branchcache(self, self.tipnode, self.tiprev, self.filteredhash,
227 227 self._closednodes)
228 228
229 229 def write(self, repo):
230 230 try:
231 231 f = repo.cachevfs(_filename(repo), "w", atomictemp=True)
232 232 cachekey = [hex(self.tipnode), '%d' % self.tiprev]
233 233 if self.filteredhash is not None:
234 234 cachekey.append(hex(self.filteredhash))
235 235 f.write(" ".join(cachekey) + '\n')
236 236 nodecount = 0
237 237 for label, nodes in sorted(self.iteritems()):
238 238 for node in nodes:
239 239 nodecount += 1
240 240 if node in self._closednodes:
241 241 state = 'c'
242 242 else:
243 243 state = 'o'
244 244 f.write("%s %s %s\n" % (hex(node), state,
245 245 encoding.fromlocal(label)))
246 246 f.close()
247 247 repo.ui.log('branchcache',
248 248 'wrote %s branch cache with %d labels and %d nodes\n',
249 249 repo.filtername, len(self), nodecount)
250 250 except (IOError, OSError, error.Abort) as inst:
251 251 repo.ui.debug("couldn't write branch cache: %s\n" % inst)
252 252 # Abort may be raise by read only opener
253 253 pass
254 254
255 255 def update(self, repo, revgen):
256 256 """Given a branchhead cache, self, that may have extra nodes or be
257 257 missing heads, and a generator of nodes that are strictly a superset of
258 258 heads missing, this function updates self to be correct.
259 259 """
260 260 starttime = util.timer()
261 261 cl = repo.changelog
262 262 # collect new branch entries
263 263 newbranches = {}
264 264 getbranchinfo = repo.revbranchcache().branchinfo
265 265 for r in revgen:
266 266 branch, closesbranch = getbranchinfo(r)
267 267 newbranches.setdefault(branch, []).append(r)
268 268 if closesbranch:
269 269 self._closednodes.add(cl.node(r))
270 270
271 271 # fetch current topological heads to speed up filtering
272 272 topoheads = set(cl.headrevs())
273 273
274 274 # if older branchheads are reachable from new ones, they aren't
275 275 # really branchheads. Note checking parents is insufficient:
276 276 # 1 (branch a) -> 2 (branch b) -> 3 (branch a)
277 277 for branch, newheadrevs in newbranches.iteritems():
278 278 bheads = self.setdefault(branch, [])
279 279 bheadset = set(cl.rev(node) for node in bheads)
280 280
281 281 # This have been tested True on all internal usage of this function.
282 282 # run it again in case of doubt
283 283 # assert not (set(bheadrevs) & set(newheadrevs))
284 284 newheadrevs.sort()
285 285 bheadset.update(newheadrevs)
286 286
287 287 # This prunes out two kinds of heads - heads that are superseded by
288 288 # a head in newheadrevs, and newheadrevs that are not heads because
289 289 # an existing head is their descendant.
290 290 uncertain = bheadset - topoheads
291 291 if uncertain:
292 292 floorrev = min(uncertain)
293 293 ancestors = set(cl.ancestors(newheadrevs, floorrev))
294 294 bheadset -= ancestors
295 295 bheadrevs = sorted(bheadset)
296 296 self[branch] = [cl.node(rev) for rev in bheadrevs]
297 297 tiprev = bheadrevs[-1]
298 298 if tiprev > self.tiprev:
299 299 self.tipnode = cl.node(tiprev)
300 300 self.tiprev = tiprev
301 301
302 302 if not self.validfor(repo):
303 303 # cache key are not valid anymore
304 304 self.tipnode = nullid
305 305 self.tiprev = nullrev
306 306 for heads in self.values():
307 307 tiprev = max(cl.rev(node) for node in heads)
308 308 if tiprev > self.tiprev:
309 309 self.tipnode = cl.node(tiprev)
310 310 self.tiprev = tiprev
311 311 self.filteredhash = scmutil.filteredhash(repo, self.tiprev)
312 312
313 313 duration = util.timer() - starttime
314 314 repo.ui.log('branchcache', 'updated %s branch cache in %.4f seconds\n',
315 315 repo.filtername, duration)
316 316
317 317 # Revision branch info cache
318 318
319 319 _rbcversion = '-v1'
320 320 _rbcnames = 'rbc-names' + _rbcversion
321 321 _rbcrevs = 'rbc-revs' + _rbcversion
322 322 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
323 323 _rbcrecfmt = '>4sI'
324 324 _rbcrecsize = calcsize(_rbcrecfmt)
325 325 _rbcnodelen = 4
326 326 _rbcbranchidxmask = 0x7fffffff
327 327 _rbccloseflag = 0x80000000
328 328
329 329 class revbranchcache(object):
330 330 """Persistent cache, mapping from revision number to branch name and close.
331 331 This is a low level cache, independent of filtering.
332 332
333 333 Branch names are stored in rbc-names in internal encoding separated by 0.
334 334 rbc-names is append-only, and each branch name is only stored once and will
335 335 thus have a unique index.
336 336
337 337 The branch info for each revision is stored in rbc-revs as constant size
338 338 records. The whole file is read into memory, but it is only 'parsed' on
339 339 demand. The file is usually append-only but will be truncated if repo
340 340 modification is detected.
341 341 The record for each revision contains the first 4 bytes of the
342 342 corresponding node hash, and the record is only used if it still matches.
343 343 Even a completely trashed rbc-revs fill thus still give the right result
344 344 while converging towards full recovery ... assuming no incorrectly matching
345 345 node hashes.
346 346 The record also contains 4 bytes where 31 bits contains the index of the
347 347 branch and the last bit indicate that it is a branch close commit.
348 348 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
349 349 and will grow with it but be 1/8th of its size.
350 350 """
351 351
352 352 def __init__(self, repo, readonly=True):
353 353 assert repo.filtername is None
354 354 self._repo = repo
355 355 self._names = [] # branch names in local encoding with static index
356 356 self._rbcrevs = bytearray()
357 357 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
358 358 try:
359 359 bndata = repo.cachevfs.read(_rbcnames)
360 360 self._rbcsnameslen = len(bndata) # for verification before writing
361 361 if bndata:
362 362 self._names = [encoding.tolocal(bn)
363 363 for bn in bndata.split('\0')]
364 364 except (IOError, OSError):
365 365 if readonly:
366 366 # don't try to use cache - fall back to the slow path
367 367 self.branchinfo = self._branchinfo
368 368
369 369 if self._names:
370 370 try:
371 371 data = repo.cachevfs.read(_rbcrevs)
372 372 self._rbcrevs[:] = data
373 373 except (IOError, OSError) as inst:
374 374 repo.ui.debug("couldn't read revision branch cache: %s\n" %
375 375 inst)
376 376 # remember number of good records on disk
377 377 self._rbcrevslen = min(len(self._rbcrevs) // _rbcrecsize,
378 378 len(repo.changelog))
379 379 if self._rbcrevslen == 0:
380 380 self._names = []
381 381 self._rbcnamescount = len(self._names) # number of names read at
382 382 # _rbcsnameslen
383 383 self._namesreverse = dict((b, r) for r, b in enumerate(self._names))
384 384
385 385 def _clear(self):
386 386 self._rbcsnameslen = 0
387 387 del self._names[:]
388 388 self._rbcnamescount = 0
389 389 self._namesreverse.clear()
390 390 self._rbcrevslen = len(self._repo.changelog)
391 391 self._rbcrevs = bytearray(self._rbcrevslen * _rbcrecsize)
392 392
393 393 def branchinfo(self, rev):
394 394 """Return branch name and close flag for rev, using and updating
395 395 persistent cache."""
396 396 changelog = self._repo.changelog
397 397 rbcrevidx = rev * _rbcrecsize
398 398
399 399 # avoid negative index, changelog.read(nullrev) is fast without cache
400 400 if rev == nullrev:
401 401 return changelog.branchinfo(rev)
402 402
403 403 # if requested rev isn't allocated, grow and cache the rev info
404 404 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
405 405 return self._branchinfo(rev)
406 406
407 407 # fast path: extract data from cache, use it if node is matching
408 408 reponode = changelog.node(rev)[:_rbcnodelen]
409 cachenode, branchidx = unpack_from(_rbcrecfmt, self._rbcrevs, rbcrevidx)
409 cachenode, branchidx = unpack_from(
410 _rbcrecfmt, util.buffer(self._rbcrevs), rbcrevidx)
410 411 close = bool(branchidx & _rbccloseflag)
411 412 if close:
412 413 branchidx &= _rbcbranchidxmask
413 414 if cachenode == '\0\0\0\0':
414 415 pass
415 416 elif cachenode == reponode:
416 417 try:
417 418 return self._names[branchidx], close
418 419 except IndexError:
419 420 # recover from invalid reference to unknown branch
420 421 self._repo.ui.debug("referenced branch names not found"
421 422 " - rebuilding revision branch cache from scratch\n")
422 423 self._clear()
423 424 else:
424 425 # rev/node map has changed, invalidate the cache from here up
425 426 self._repo.ui.debug("history modification detected - truncating "
426 427 "revision branch cache to revision %d\n" % rev)
427 428 truncate = rbcrevidx + _rbcrecsize
428 429 del self._rbcrevs[truncate:]
429 430 self._rbcrevslen = min(self._rbcrevslen, truncate)
430 431
431 432 # fall back to slow path and make sure it will be written to disk
432 433 return self._branchinfo(rev)
433 434
434 435 def _branchinfo(self, rev):
435 436 """Retrieve branch info from changelog and update _rbcrevs"""
436 437 changelog = self._repo.changelog
437 438 b, close = changelog.branchinfo(rev)
438 439 if b in self._namesreverse:
439 440 branchidx = self._namesreverse[b]
440 441 else:
441 442 branchidx = len(self._names)
442 443 self._names.append(b)
443 444 self._namesreverse[b] = branchidx
444 445 reponode = changelog.node(rev)
445 446 if close:
446 447 branchidx |= _rbccloseflag
447 448 self._setcachedata(rev, reponode, branchidx)
448 449 return b, close
449 450
450 451 def _setcachedata(self, rev, node, branchidx):
451 452 """Writes the node's branch data to the in-memory cache data."""
452 453 if rev == nullrev:
453 454 return
454 455 rbcrevidx = rev * _rbcrecsize
455 456 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
456 457 self._rbcrevs.extend('\0' *
457 458 (len(self._repo.changelog) * _rbcrecsize -
458 459 len(self._rbcrevs)))
459 460 pack_into(_rbcrecfmt, self._rbcrevs, rbcrevidx, node, branchidx)
460 461 self._rbcrevslen = min(self._rbcrevslen, rev)
461 462
462 463 tr = self._repo.currenttransaction()
463 464 if tr:
464 465 tr.addfinalize('write-revbranchcache', self.write)
465 466
466 467 def write(self, tr=None):
467 468 """Save branch cache if it is dirty."""
468 469 repo = self._repo
469 470 wlock = None
470 471 step = ''
471 472 try:
472 473 if self._rbcnamescount < len(self._names):
473 474 step = ' names'
474 475 wlock = repo.wlock(wait=False)
475 476 if self._rbcnamescount != 0:
476 477 f = repo.cachevfs.open(_rbcnames, 'ab')
477 478 if f.tell() == self._rbcsnameslen:
478 479 f.write('\0')
479 480 else:
480 481 f.close()
481 482 repo.ui.debug("%s changed - rewriting it\n" % _rbcnames)
482 483 self._rbcnamescount = 0
483 484 self._rbcrevslen = 0
484 485 if self._rbcnamescount == 0:
485 486 # before rewriting names, make sure references are removed
486 487 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
487 488 f = repo.cachevfs.open(_rbcnames, 'wb')
488 489 f.write('\0'.join(encoding.fromlocal(b)
489 490 for b in self._names[self._rbcnamescount:]))
490 491 self._rbcsnameslen = f.tell()
491 492 f.close()
492 493 self._rbcnamescount = len(self._names)
493 494
494 495 start = self._rbcrevslen * _rbcrecsize
495 496 if start != len(self._rbcrevs):
496 497 step = ''
497 498 if wlock is None:
498 499 wlock = repo.wlock(wait=False)
499 500 revs = min(len(repo.changelog),
500 501 len(self._rbcrevs) // _rbcrecsize)
501 502 f = repo.cachevfs.open(_rbcrevs, 'ab')
502 503 if f.tell() != start:
503 504 repo.ui.debug("truncating cache/%s to %d\n"
504 505 % (_rbcrevs, start))
505 506 f.seek(start)
506 507 if f.tell() != start:
507 508 start = 0
508 509 f.seek(start)
509 510 f.truncate()
510 511 end = revs * _rbcrecsize
511 512 f.write(self._rbcrevs[start:end])
512 513 f.close()
513 514 self._rbcrevslen = revs
514 515 except (IOError, OSError, error.Abort, error.LockError) as inst:
515 516 repo.ui.debug("couldn't write revision branch cache%s: %s\n"
516 517 % (step, inst))
517 518 finally:
518 519 if wlock is not None:
519 520 wlock.release()
General Comments 0
You need to be logged in to leave comments. Login now