##// END OF EJS Templates
branchmap: check node against changelog instead of repo...
Durham Goode -
r28364:f1460af1 default
parent child Browse files
Show More
@@ -1,490 +1,491 b''
1 1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import array
11 11 import struct
12 12 import time
13 13
14 14 from .node import (
15 15 bin,
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 )
20 20 from . import (
21 21 encoding,
22 22 error,
23 23 scmutil,
24 24 )
25 25
26 26 array = array.array
27 27 calcsize = struct.calcsize
28 28 pack = struct.pack
29 29 unpack = struct.unpack
30 30
31 31 def _filename(repo):
32 32 """name of a branchcache file for a given repo or repoview"""
33 33 filename = "cache/branch2"
34 34 if repo.filtername:
35 35 filename = '%s-%s' % (filename, repo.filtername)
36 36 return filename
37 37
38 38 def read(repo):
39 39 try:
40 40 f = repo.vfs(_filename(repo))
41 41 lines = f.read().split('\n')
42 42 f.close()
43 43 except (IOError, OSError):
44 44 return None
45 45
46 46 try:
47 47 cachekey = lines.pop(0).split(" ", 2)
48 48 last, lrev = cachekey[:2]
49 49 last, lrev = bin(last), int(lrev)
50 50 filteredhash = None
51 51 if len(cachekey) > 2:
52 52 filteredhash = bin(cachekey[2])
53 53 partial = branchcache(tipnode=last, tiprev=lrev,
54 54 filteredhash=filteredhash)
55 55 if not partial.validfor(repo):
56 56 # invalidate the cache
57 57 raise ValueError('tip differs')
58 cl = repo.changelog
58 59 for l in lines:
59 60 if not l:
60 61 continue
61 62 node, state, label = l.split(" ", 2)
62 63 if state not in 'oc':
63 64 raise ValueError('invalid branch state')
64 65 label = encoding.tolocal(label.strip())
65 if not node in repo:
66 raise ValueError('node %s does not exist' % node)
67 66 node = bin(node)
67 if not cl.hasnode(node):
68 raise ValueError('node %s does not exist' % hex(node))
68 69 partial.setdefault(label, []).append(node)
69 70 if state == 'c':
70 71 partial._closednodes.add(node)
71 72 except KeyboardInterrupt:
72 73 raise
73 74 except Exception as inst:
74 75 if repo.ui.debugflag:
75 76 msg = 'invalid branchheads cache'
76 77 if repo.filtername is not None:
77 78 msg += ' (%s)' % repo.filtername
78 79 msg += ': %s\n'
79 80 repo.ui.debug(msg % inst)
80 81 partial = None
81 82 return partial
82 83
83 84 ### Nearest subset relation
84 85 # Nearest subset of filter X is a filter Y so that:
85 86 # * Y is included in X,
86 87 # * X - Y is as small as possible.
87 88 # This create and ordering used for branchmap purpose.
88 89 # the ordering may be partial
89 90 subsettable = {None: 'visible',
90 91 'visible': 'served',
91 92 'served': 'immutable',
92 93 'immutable': 'base'}
93 94
94 95 def updatecache(repo):
95 96 cl = repo.changelog
96 97 filtername = repo.filtername
97 98 partial = repo._branchcaches.get(filtername)
98 99
99 100 revs = []
100 101 if partial is None or not partial.validfor(repo):
101 102 partial = read(repo)
102 103 if partial is None:
103 104 subsetname = subsettable.get(filtername)
104 105 if subsetname is None:
105 106 partial = branchcache()
106 107 else:
107 108 subset = repo.filtered(subsetname)
108 109 partial = subset.branchmap().copy()
109 110 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
110 111 revs.extend(r for r in extrarevs if r <= partial.tiprev)
111 112 revs.extend(cl.revs(start=partial.tiprev + 1))
112 113 if revs:
113 114 partial.update(repo, revs)
114 115 partial.write(repo)
115 116
116 117 assert partial.validfor(repo), filtername
117 118 repo._branchcaches[repo.filtername] = partial
118 119
119 120 def replacecache(repo, bm):
120 121 """Replace the branchmap cache for a repo with a branch mapping.
121 122
122 123 This is likely only called during clone with a branch map from a remote.
123 124 """
124 125 rbheads = []
125 126 closed = []
126 127 for bheads in bm.itervalues():
127 128 rbheads.extend(bheads)
128 129 for h in bheads:
129 130 r = repo.changelog.rev(h)
130 131 b, c = repo.changelog.branchinfo(r)
131 132 if c:
132 133 closed.append(h)
133 134
134 135 if rbheads:
135 136 rtiprev = max((int(repo.changelog.rev(node))
136 137 for node in rbheads))
137 138 cache = branchcache(bm,
138 139 repo[rtiprev].node(),
139 140 rtiprev,
140 141 closednodes=closed)
141 142
142 143 # Try to stick it as low as possible
143 144 # filter above served are unlikely to be fetch from a clone
144 145 for candidate in ('base', 'immutable', 'served'):
145 146 rview = repo.filtered(candidate)
146 147 if cache.validfor(rview):
147 148 repo._branchcaches[candidate] = cache
148 149 cache.write(rview)
149 150 break
150 151
151 152 class branchcache(dict):
152 153 """A dict like object that hold branches heads cache.
153 154
154 155 This cache is used to avoid costly computations to determine all the
155 156 branch heads of a repo.
156 157
157 158 The cache is serialized on disk in the following format:
158 159
159 160 <tip hex node> <tip rev number> [optional filtered repo hex hash]
160 161 <branch head hex node> <open/closed state> <branch name>
161 162 <branch head hex node> <open/closed state> <branch name>
162 163 ...
163 164
164 165 The first line is used to check if the cache is still valid. If the
165 166 branch cache is for a filtered repo view, an optional third hash is
166 167 included that hashes the hashes of all filtered revisions.
167 168
168 169 The open/closed state is represented by a single letter 'o' or 'c'.
169 170 This field can be used to avoid changelog reads when determining if a
170 171 branch head closes a branch or not.
171 172 """
172 173
173 174 def __init__(self, entries=(), tipnode=nullid, tiprev=nullrev,
174 175 filteredhash=None, closednodes=None):
175 176 super(branchcache, self).__init__(entries)
176 177 self.tipnode = tipnode
177 178 self.tiprev = tiprev
178 179 self.filteredhash = filteredhash
179 180 # closednodes is a set of nodes that close their branch. If the branch
180 181 # cache has been updated, it may contain nodes that are no longer
181 182 # heads.
182 183 if closednodes is None:
183 184 self._closednodes = set()
184 185 else:
185 186 self._closednodes = closednodes
186 187
187 188 def validfor(self, repo):
188 189 """Is the cache content valid regarding a repo
189 190
190 191 - False when cached tipnode is unknown or if we detect a strip.
191 192 - True when cache is up to date or a subset of current repo."""
192 193 try:
193 194 return ((self.tipnode == repo.changelog.node(self.tiprev))
194 195 and (self.filteredhash == \
195 196 scmutil.filteredhash(repo, self.tiprev)))
196 197 except IndexError:
197 198 return False
198 199
199 200 def _branchtip(self, heads):
200 201 '''Return tuple with last open head in heads and false,
201 202 otherwise return last closed head and true.'''
202 203 tip = heads[-1]
203 204 closed = True
204 205 for h in reversed(heads):
205 206 if h not in self._closednodes:
206 207 tip = h
207 208 closed = False
208 209 break
209 210 return tip, closed
210 211
211 212 def branchtip(self, branch):
212 213 '''Return the tipmost open head on branch head, otherwise return the
213 214 tipmost closed head on branch.
214 215 Raise KeyError for unknown branch.'''
215 216 return self._branchtip(self[branch])[0]
216 217
217 218 def branchheads(self, branch, closed=False):
218 219 heads = self[branch]
219 220 if not closed:
220 221 heads = [h for h in heads if h not in self._closednodes]
221 222 return heads
222 223
223 224 def iterbranches(self):
224 225 for bn, heads in self.iteritems():
225 226 yield (bn, heads) + self._branchtip(heads)
226 227
227 228 def copy(self):
228 229 """return an deep copy of the branchcache object"""
229 230 return branchcache(self, self.tipnode, self.tiprev, self.filteredhash,
230 231 self._closednodes)
231 232
232 233 def write(self, repo):
233 234 try:
234 235 f = repo.vfs(_filename(repo), "w", atomictemp=True)
235 236 cachekey = [hex(self.tipnode), str(self.tiprev)]
236 237 if self.filteredhash is not None:
237 238 cachekey.append(hex(self.filteredhash))
238 239 f.write(" ".join(cachekey) + '\n')
239 240 nodecount = 0
240 241 for label, nodes in sorted(self.iteritems()):
241 242 for node in nodes:
242 243 nodecount += 1
243 244 if node in self._closednodes:
244 245 state = 'c'
245 246 else:
246 247 state = 'o'
247 248 f.write("%s %s %s\n" % (hex(node), state,
248 249 encoding.fromlocal(label)))
249 250 f.close()
250 251 repo.ui.log('branchcache',
251 252 'wrote %s branch cache with %d labels and %d nodes\n',
252 253 repo.filtername, len(self), nodecount)
253 254 except (IOError, OSError, error.Abort) as inst:
254 255 repo.ui.debug("couldn't write branch cache: %s\n" % inst)
255 256 # Abort may be raise by read only opener
256 257 pass
257 258
258 259 def update(self, repo, revgen):
259 260 """Given a branchhead cache, self, that may have extra nodes or be
260 261 missing heads, and a generator of nodes that are strictly a superset of
261 262 heads missing, this function updates self to be correct.
262 263 """
263 264 starttime = time.time()
264 265 cl = repo.changelog
265 266 # collect new branch entries
266 267 newbranches = {}
267 268 getbranchinfo = repo.revbranchcache().branchinfo
268 269 for r in revgen:
269 270 branch, closesbranch = getbranchinfo(r)
270 271 newbranches.setdefault(branch, []).append(r)
271 272 if closesbranch:
272 273 self._closednodes.add(cl.node(r))
273 274
274 275 # fetch current topological heads to speed up filtering
275 276 topoheads = set(cl.headrevs())
276 277
277 278 # if older branchheads are reachable from new ones, they aren't
278 279 # really branchheads. Note checking parents is insufficient:
279 280 # 1 (branch a) -> 2 (branch b) -> 3 (branch a)
280 281 for branch, newheadrevs in newbranches.iteritems():
281 282 bheads = self.setdefault(branch, [])
282 283 bheadset = set(cl.rev(node) for node in bheads)
283 284
284 285 # This have been tested True on all internal usage of this function.
285 286 # run it again in case of doubt
286 287 # assert not (set(bheadrevs) & set(newheadrevs))
287 288 newheadrevs.sort()
288 289 bheadset.update(newheadrevs)
289 290
290 291 # This prunes out two kinds of heads - heads that are superseded by
291 292 # a head in newheadrevs, and newheadrevs that are not heads because
292 293 # an existing head is their descendant.
293 294 uncertain = bheadset - topoheads
294 295 if uncertain:
295 296 floorrev = min(uncertain)
296 297 ancestors = set(cl.ancestors(newheadrevs, floorrev))
297 298 bheadset -= ancestors
298 299 bheadrevs = sorted(bheadset)
299 300 self[branch] = [cl.node(rev) for rev in bheadrevs]
300 301 tiprev = bheadrevs[-1]
301 302 if tiprev > self.tiprev:
302 303 self.tipnode = cl.node(tiprev)
303 304 self.tiprev = tiprev
304 305
305 306 if not self.validfor(repo):
306 307 # cache key are not valid anymore
307 308 self.tipnode = nullid
308 309 self.tiprev = nullrev
309 310 for heads in self.values():
310 311 tiprev = max(cl.rev(node) for node in heads)
311 312 if tiprev > self.tiprev:
312 313 self.tipnode = cl.node(tiprev)
313 314 self.tiprev = tiprev
314 315 self.filteredhash = scmutil.filteredhash(repo, self.tiprev)
315 316
316 317 duration = time.time() - starttime
317 318 repo.ui.log('branchcache', 'updated %s branch cache in %.4f seconds\n',
318 319 repo.filtername, duration)
319 320
320 321 # Revision branch info cache
321 322
322 323 _rbcversion = '-v1'
323 324 _rbcnames = 'cache/rbc-names' + _rbcversion
324 325 _rbcrevs = 'cache/rbc-revs' + _rbcversion
325 326 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
326 327 _rbcrecfmt = '>4sI'
327 328 _rbcrecsize = calcsize(_rbcrecfmt)
328 329 _rbcnodelen = 4
329 330 _rbcbranchidxmask = 0x7fffffff
330 331 _rbccloseflag = 0x80000000
331 332
332 333 class revbranchcache(object):
333 334 """Persistent cache, mapping from revision number to branch name and close.
334 335 This is a low level cache, independent of filtering.
335 336
336 337 Branch names are stored in rbc-names in internal encoding separated by 0.
337 338 rbc-names is append-only, and each branch name is only stored once and will
338 339 thus have a unique index.
339 340
340 341 The branch info for each revision is stored in rbc-revs as constant size
341 342 records. The whole file is read into memory, but it is only 'parsed' on
342 343 demand. The file is usually append-only but will be truncated if repo
343 344 modification is detected.
344 345 The record for each revision contains the first 4 bytes of the
345 346 corresponding node hash, and the record is only used if it still matches.
346 347 Even a completely trashed rbc-revs fill thus still give the right result
347 348 while converging towards full recovery ... assuming no incorrectly matching
348 349 node hashes.
349 350 The record also contains 4 bytes where 31 bits contains the index of the
350 351 branch and the last bit indicate that it is a branch close commit.
351 352 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
352 353 and will grow with it but be 1/8th of its size.
353 354 """
354 355
355 356 def __init__(self, repo, readonly=True):
356 357 assert repo.filtername is None
357 358 self._repo = repo
358 359 self._names = [] # branch names in local encoding with static index
359 360 self._rbcrevs = array('c') # structs of type _rbcrecfmt
360 361 self._rbcsnameslen = 0
361 362 try:
362 363 bndata = repo.vfs.read(_rbcnames)
363 364 self._rbcsnameslen = len(bndata) # for verification before writing
364 365 self._names = [encoding.tolocal(bn) for bn in bndata.split('\0')]
365 366 except (IOError, OSError) as inst:
366 367 if readonly:
367 368 # don't try to use cache - fall back to the slow path
368 369 self.branchinfo = self._branchinfo
369 370
370 371 if self._names:
371 372 try:
372 373 data = repo.vfs.read(_rbcrevs)
373 374 self._rbcrevs.fromstring(data)
374 375 except (IOError, OSError) as inst:
375 376 repo.ui.debug("couldn't read revision branch cache: %s\n" %
376 377 inst)
377 378 # remember number of good records on disk
378 379 self._rbcrevslen = min(len(self._rbcrevs) // _rbcrecsize,
379 380 len(repo.changelog))
380 381 if self._rbcrevslen == 0:
381 382 self._names = []
382 383 self._rbcnamescount = len(self._names) # number of good names on disk
383 384 self._namesreverse = dict((b, r) for r, b in enumerate(self._names))
384 385
385 386 def branchinfo(self, rev):
386 387 """Return branch name and close flag for rev, using and updating
387 388 persistent cache."""
388 389 changelog = self._repo.changelog
389 390 rbcrevidx = rev * _rbcrecsize
390 391
391 392 # avoid negative index, changelog.read(nullrev) is fast without cache
392 393 if rev == nullrev:
393 394 return changelog.branchinfo(rev)
394 395
395 396 # if requested rev is missing, add and populate all missing revs
396 397 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
397 398 self._rbcrevs.extend('\0' * (len(changelog) * _rbcrecsize -
398 399 len(self._rbcrevs)))
399 400
400 401 # fast path: extract data from cache, use it if node is matching
401 402 reponode = changelog.node(rev)[:_rbcnodelen]
402 403 cachenode, branchidx = unpack(
403 404 _rbcrecfmt, buffer(self._rbcrevs, rbcrevidx, _rbcrecsize))
404 405 close = bool(branchidx & _rbccloseflag)
405 406 if close:
406 407 branchidx &= _rbcbranchidxmask
407 408 if cachenode == '\0\0\0\0':
408 409 pass
409 410 elif cachenode == reponode:
410 411 return self._names[branchidx], close
411 412 else:
412 413 # rev/node map has changed, invalidate the cache from here up
413 414 truncate = rbcrevidx + _rbcrecsize
414 415 del self._rbcrevs[truncate:]
415 416 self._rbcrevslen = min(self._rbcrevslen, truncate)
416 417
417 418 # fall back to slow path and make sure it will be written to disk
418 419 return self._branchinfo(rev)
419 420
420 421 def _branchinfo(self, rev):
421 422 """Retrieve branch info from changelog and update _rbcrevs"""
422 423 changelog = self._repo.changelog
423 424 b, close = changelog.branchinfo(rev)
424 425 if b in self._namesreverse:
425 426 branchidx = self._namesreverse[b]
426 427 else:
427 428 branchidx = len(self._names)
428 429 self._names.append(b)
429 430 self._namesreverse[b] = branchidx
430 431 reponode = changelog.node(rev)
431 432 if close:
432 433 branchidx |= _rbccloseflag
433 434 self._setcachedata(rev, reponode, branchidx)
434 435 return b, close
435 436
436 437 def _setcachedata(self, rev, node, branchidx):
437 438 """Writes the node's branch data to the in-memory cache data."""
438 439 rbcrevidx = rev * _rbcrecsize
439 440 rec = array('c')
440 441 rec.fromstring(pack(_rbcrecfmt, node, branchidx))
441 442 self._rbcrevs[rbcrevidx:rbcrevidx + _rbcrecsize] = rec
442 443 self._rbcrevslen = min(self._rbcrevslen, rev)
443 444
444 445 tr = self._repo.currenttransaction()
445 446 if tr:
446 447 tr.addfinalize('write-revbranchcache', self.write)
447 448
448 449 def write(self, tr=None):
449 450 """Save branch cache if it is dirty."""
450 451 repo = self._repo
451 452 if self._rbcnamescount < len(self._names):
452 453 try:
453 454 if self._rbcnamescount != 0:
454 455 f = repo.vfs.open(_rbcnames, 'ab')
455 456 if f.tell() == self._rbcsnameslen:
456 457 f.write('\0')
457 458 else:
458 459 f.close()
459 460 repo.ui.debug("%s changed - rewriting it\n" % _rbcnames)
460 461 self._rbcnamescount = 0
461 462 self._rbcrevslen = 0
462 463 if self._rbcnamescount == 0:
463 464 f = repo.vfs.open(_rbcnames, 'wb')
464 465 f.write('\0'.join(encoding.fromlocal(b)
465 466 for b in self._names[self._rbcnamescount:]))
466 467 self._rbcsnameslen = f.tell()
467 468 f.close()
468 469 except (IOError, OSError, error.Abort) as inst:
469 470 repo.ui.debug("couldn't write revision branch cache names: "
470 471 "%s\n" % inst)
471 472 return
472 473 self._rbcnamescount = len(self._names)
473 474
474 475 start = self._rbcrevslen * _rbcrecsize
475 476 if start != len(self._rbcrevs):
476 477 revs = min(len(repo.changelog), len(self._rbcrevs) // _rbcrecsize)
477 478 try:
478 479 f = repo.vfs.open(_rbcrevs, 'ab')
479 480 if f.tell() != start:
480 481 repo.ui.debug("truncating %s to %s\n" % (_rbcrevs, start))
481 482 f.seek(start)
482 483 f.truncate()
483 484 end = revs * _rbcrecsize
484 485 f.write(self._rbcrevs[start:end])
485 486 f.close()
486 487 except (IOError, OSError, error.Abort) as inst:
487 488 repo.ui.debug("couldn't write revision branch cache: %s\n" %
488 489 inst)
489 490 return
490 491 self._rbcrevslen = revs
General Comments 0
You need to be logged in to leave comments. Login now