##// END OF EJS Templates
branchmap: preparatory indent of indent the branch rev writing code...
Pierre-Yves David -
r29743:9f3c49ee default
parent child Browse files
Show More
@@ -1,517 +1,523 b''
1 1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import array
11 11 import struct
12 12 import time
13 13
14 14 from .node import (
15 15 bin,
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 )
20 20 from . import (
21 21 encoding,
22 22 error,
23 23 scmutil,
24 24 )
25 25
26 26 array = array.array
27 27 calcsize = struct.calcsize
28 28 pack = struct.pack
29 29 unpack = struct.unpack
30 30
31 31 def _filename(repo):
32 32 """name of a branchcache file for a given repo or repoview"""
33 33 filename = "cache/branch2"
34 34 if repo.filtername:
35 35 filename = '%s-%s' % (filename, repo.filtername)
36 36 return filename
37 37
38 38 def read(repo):
39 39 try:
40 40 f = repo.vfs(_filename(repo))
41 41 lines = f.read().split('\n')
42 42 f.close()
43 43 except (IOError, OSError):
44 44 return None
45 45
46 46 try:
47 47 cachekey = lines.pop(0).split(" ", 2)
48 48 last, lrev = cachekey[:2]
49 49 last, lrev = bin(last), int(lrev)
50 50 filteredhash = None
51 51 if len(cachekey) > 2:
52 52 filteredhash = bin(cachekey[2])
53 53 partial = branchcache(tipnode=last, tiprev=lrev,
54 54 filteredhash=filteredhash)
55 55 if not partial.validfor(repo):
56 56 # invalidate the cache
57 57 raise ValueError('tip differs')
58 58 cl = repo.changelog
59 59 for l in lines:
60 60 if not l:
61 61 continue
62 62 node, state, label = l.split(" ", 2)
63 63 if state not in 'oc':
64 64 raise ValueError('invalid branch state')
65 65 label = encoding.tolocal(label.strip())
66 66 node = bin(node)
67 67 if not cl.hasnode(node):
68 68 raise ValueError('node %s does not exist' % hex(node))
69 69 partial.setdefault(label, []).append(node)
70 70 if state == 'c':
71 71 partial._closednodes.add(node)
72 72 except KeyboardInterrupt:
73 73 raise
74 74 except Exception as inst:
75 75 if repo.ui.debugflag:
76 76 msg = 'invalid branchheads cache'
77 77 if repo.filtername is not None:
78 78 msg += ' (%s)' % repo.filtername
79 79 msg += ': %s\n'
80 80 repo.ui.debug(msg % inst)
81 81 partial = None
82 82 return partial
83 83
84 84 ### Nearest subset relation
85 85 # Nearest subset of filter X is a filter Y so that:
86 86 # * Y is included in X,
87 87 # * X - Y is as small as possible.
88 88 # This create and ordering used for branchmap purpose.
89 89 # the ordering may be partial
90 90 subsettable = {None: 'visible',
91 91 'visible': 'served',
92 92 'served': 'immutable',
93 93 'immutable': 'base'}
94 94
95 95 def updatecache(repo):
96 96 cl = repo.changelog
97 97 filtername = repo.filtername
98 98 partial = repo._branchcaches.get(filtername)
99 99
100 100 revs = []
101 101 if partial is None or not partial.validfor(repo):
102 102 partial = read(repo)
103 103 if partial is None:
104 104 subsetname = subsettable.get(filtername)
105 105 if subsetname is None:
106 106 partial = branchcache()
107 107 else:
108 108 subset = repo.filtered(subsetname)
109 109 partial = subset.branchmap().copy()
110 110 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
111 111 revs.extend(r for r in extrarevs if r <= partial.tiprev)
112 112 revs.extend(cl.revs(start=partial.tiprev + 1))
113 113 if revs:
114 114 partial.update(repo, revs)
115 115 partial.write(repo)
116 116
117 117 assert partial.validfor(repo), filtername
118 118 repo._branchcaches[repo.filtername] = partial
119 119
120 120 def replacecache(repo, bm):
121 121 """Replace the branchmap cache for a repo with a branch mapping.
122 122
123 123 This is likely only called during clone with a branch map from a remote.
124 124 """
125 125 rbheads = []
126 126 closed = []
127 127 for bheads in bm.itervalues():
128 128 rbheads.extend(bheads)
129 129 for h in bheads:
130 130 r = repo.changelog.rev(h)
131 131 b, c = repo.changelog.branchinfo(r)
132 132 if c:
133 133 closed.append(h)
134 134
135 135 if rbheads:
136 136 rtiprev = max((int(repo.changelog.rev(node))
137 137 for node in rbheads))
138 138 cache = branchcache(bm,
139 139 repo[rtiprev].node(),
140 140 rtiprev,
141 141 closednodes=closed)
142 142
143 143 # Try to stick it as low as possible
144 144 # filter above served are unlikely to be fetch from a clone
145 145 for candidate in ('base', 'immutable', 'served'):
146 146 rview = repo.filtered(candidate)
147 147 if cache.validfor(rview):
148 148 repo._branchcaches[candidate] = cache
149 149 cache.write(rview)
150 150 break
151 151
152 152 class branchcache(dict):
153 153 """A dict like object that hold branches heads cache.
154 154
155 155 This cache is used to avoid costly computations to determine all the
156 156 branch heads of a repo.
157 157
158 158 The cache is serialized on disk in the following format:
159 159
160 160 <tip hex node> <tip rev number> [optional filtered repo hex hash]
161 161 <branch head hex node> <open/closed state> <branch name>
162 162 <branch head hex node> <open/closed state> <branch name>
163 163 ...
164 164
165 165 The first line is used to check if the cache is still valid. If the
166 166 branch cache is for a filtered repo view, an optional third hash is
167 167 included that hashes the hashes of all filtered revisions.
168 168
169 169 The open/closed state is represented by a single letter 'o' or 'c'.
170 170 This field can be used to avoid changelog reads when determining if a
171 171 branch head closes a branch or not.
172 172 """
173 173
174 174 def __init__(self, entries=(), tipnode=nullid, tiprev=nullrev,
175 175 filteredhash=None, closednodes=None):
176 176 super(branchcache, self).__init__(entries)
177 177 self.tipnode = tipnode
178 178 self.tiprev = tiprev
179 179 self.filteredhash = filteredhash
180 180 # closednodes is a set of nodes that close their branch. If the branch
181 181 # cache has been updated, it may contain nodes that are no longer
182 182 # heads.
183 183 if closednodes is None:
184 184 self._closednodes = set()
185 185 else:
186 186 self._closednodes = closednodes
187 187
188 188 def validfor(self, repo):
189 189 """Is the cache content valid regarding a repo
190 190
191 191 - False when cached tipnode is unknown or if we detect a strip.
192 192 - True when cache is up to date or a subset of current repo."""
193 193 try:
194 194 return ((self.tipnode == repo.changelog.node(self.tiprev))
195 195 and (self.filteredhash == \
196 196 scmutil.filteredhash(repo, self.tiprev)))
197 197 except IndexError:
198 198 return False
199 199
200 200 def _branchtip(self, heads):
201 201 '''Return tuple with last open head in heads and false,
202 202 otherwise return last closed head and true.'''
203 203 tip = heads[-1]
204 204 closed = True
205 205 for h in reversed(heads):
206 206 if h not in self._closednodes:
207 207 tip = h
208 208 closed = False
209 209 break
210 210 return tip, closed
211 211
212 212 def branchtip(self, branch):
213 213 '''Return the tipmost open head on branch head, otherwise return the
214 214 tipmost closed head on branch.
215 215 Raise KeyError for unknown branch.'''
216 216 return self._branchtip(self[branch])[0]
217 217
218 218 def branchheads(self, branch, closed=False):
219 219 heads = self[branch]
220 220 if not closed:
221 221 heads = [h for h in heads if h not in self._closednodes]
222 222 return heads
223 223
224 224 def iterbranches(self):
225 225 for bn, heads in self.iteritems():
226 226 yield (bn, heads) + self._branchtip(heads)
227 227
228 228 def copy(self):
229 229 """return an deep copy of the branchcache object"""
230 230 return branchcache(self, self.tipnode, self.tiprev, self.filteredhash,
231 231 self._closednodes)
232 232
233 233 def write(self, repo):
234 234 try:
235 235 f = repo.vfs(_filename(repo), "w", atomictemp=True)
236 236 cachekey = [hex(self.tipnode), str(self.tiprev)]
237 237 if self.filteredhash is not None:
238 238 cachekey.append(hex(self.filteredhash))
239 239 f.write(" ".join(cachekey) + '\n')
240 240 nodecount = 0
241 241 for label, nodes in sorted(self.iteritems()):
242 242 for node in nodes:
243 243 nodecount += 1
244 244 if node in self._closednodes:
245 245 state = 'c'
246 246 else:
247 247 state = 'o'
248 248 f.write("%s %s %s\n" % (hex(node), state,
249 249 encoding.fromlocal(label)))
250 250 f.close()
251 251 repo.ui.log('branchcache',
252 252 'wrote %s branch cache with %d labels and %d nodes\n',
253 253 repo.filtername, len(self), nodecount)
254 254 except (IOError, OSError, error.Abort) as inst:
255 255 repo.ui.debug("couldn't write branch cache: %s\n" % inst)
256 256 # Abort may be raise by read only opener
257 257 pass
258 258
259 259 def update(self, repo, revgen):
260 260 """Given a branchhead cache, self, that may have extra nodes or be
261 261 missing heads, and a generator of nodes that are strictly a superset of
262 262 heads missing, this function updates self to be correct.
263 263 """
264 264 starttime = time.time()
265 265 cl = repo.changelog
266 266 # collect new branch entries
267 267 newbranches = {}
268 268 getbranchinfo = repo.revbranchcache().branchinfo
269 269 for r in revgen:
270 270 branch, closesbranch = getbranchinfo(r)
271 271 newbranches.setdefault(branch, []).append(r)
272 272 if closesbranch:
273 273 self._closednodes.add(cl.node(r))
274 274
275 275 # fetch current topological heads to speed up filtering
276 276 topoheads = set(cl.headrevs())
277 277
278 278 # if older branchheads are reachable from new ones, they aren't
279 279 # really branchheads. Note checking parents is insufficient:
280 280 # 1 (branch a) -> 2 (branch b) -> 3 (branch a)
281 281 for branch, newheadrevs in newbranches.iteritems():
282 282 bheads = self.setdefault(branch, [])
283 283 bheadset = set(cl.rev(node) for node in bheads)
284 284
285 285 # This have been tested True on all internal usage of this function.
286 286 # run it again in case of doubt
287 287 # assert not (set(bheadrevs) & set(newheadrevs))
288 288 newheadrevs.sort()
289 289 bheadset.update(newheadrevs)
290 290
291 291 # This prunes out two kinds of heads - heads that are superseded by
292 292 # a head in newheadrevs, and newheadrevs that are not heads because
293 293 # an existing head is their descendant.
294 294 uncertain = bheadset - topoheads
295 295 if uncertain:
296 296 floorrev = min(uncertain)
297 297 ancestors = set(cl.ancestors(newheadrevs, floorrev))
298 298 bheadset -= ancestors
299 299 bheadrevs = sorted(bheadset)
300 300 self[branch] = [cl.node(rev) for rev in bheadrevs]
301 301 tiprev = bheadrevs[-1]
302 302 if tiprev > self.tiprev:
303 303 self.tipnode = cl.node(tiprev)
304 304 self.tiprev = tiprev
305 305
306 306 if not self.validfor(repo):
307 307 # cache key are not valid anymore
308 308 self.tipnode = nullid
309 309 self.tiprev = nullrev
310 310 for heads in self.values():
311 311 tiprev = max(cl.rev(node) for node in heads)
312 312 if tiprev > self.tiprev:
313 313 self.tipnode = cl.node(tiprev)
314 314 self.tiprev = tiprev
315 315 self.filteredhash = scmutil.filteredhash(repo, self.tiprev)
316 316
317 317 duration = time.time() - starttime
318 318 repo.ui.log('branchcache', 'updated %s branch cache in %.4f seconds\n',
319 319 repo.filtername, duration)
320 320
321 321 # Revision branch info cache
322 322
323 323 _rbcversion = '-v1'
324 324 _rbcnames = 'cache/rbc-names' + _rbcversion
325 325 _rbcrevs = 'cache/rbc-revs' + _rbcversion
326 326 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
327 327 _rbcrecfmt = '>4sI'
328 328 _rbcrecsize = calcsize(_rbcrecfmt)
329 329 _rbcnodelen = 4
330 330 _rbcbranchidxmask = 0x7fffffff
331 331 _rbccloseflag = 0x80000000
332 332
333 333 class revbranchcache(object):
334 334 """Persistent cache, mapping from revision number to branch name and close.
335 335 This is a low level cache, independent of filtering.
336 336
337 337 Branch names are stored in rbc-names in internal encoding separated by 0.
338 338 rbc-names is append-only, and each branch name is only stored once and will
339 339 thus have a unique index.
340 340
341 341 The branch info for each revision is stored in rbc-revs as constant size
342 342 records. The whole file is read into memory, but it is only 'parsed' on
343 343 demand. The file is usually append-only but will be truncated if repo
344 344 modification is detected.
345 345 The record for each revision contains the first 4 bytes of the
346 346 corresponding node hash, and the record is only used if it still matches.
347 347 Even a completely trashed rbc-revs fill thus still give the right result
348 348 while converging towards full recovery ... assuming no incorrectly matching
349 349 node hashes.
350 350 The record also contains 4 bytes where 31 bits contains the index of the
351 351 branch and the last bit indicate that it is a branch close commit.
352 352 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
353 353 and will grow with it but be 1/8th of its size.
354 354 """
355 355
356 356 def __init__(self, repo, readonly=True):
357 357 assert repo.filtername is None
358 358 self._repo = repo
359 359 self._names = [] # branch names in local encoding with static index
360 360 self._rbcrevs = array('c') # structs of type _rbcrecfmt
361 361 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
362 362 try:
363 363 bndata = repo.vfs.read(_rbcnames)
364 364 self._rbcsnameslen = len(bndata) # for verification before writing
365 365 self._names = [encoding.tolocal(bn) for bn in bndata.split('\0')]
366 366 except (IOError, OSError):
367 367 if readonly:
368 368 # don't try to use cache - fall back to the slow path
369 369 self.branchinfo = self._branchinfo
370 370
371 371 if self._names:
372 372 try:
373 373 data = repo.vfs.read(_rbcrevs)
374 374 self._rbcrevs.fromstring(data)
375 375 except (IOError, OSError) as inst:
376 376 repo.ui.debug("couldn't read revision branch cache: %s\n" %
377 377 inst)
378 378 # remember number of good records on disk
379 379 self._rbcrevslen = min(len(self._rbcrevs) // _rbcrecsize,
380 380 len(repo.changelog))
381 381 if self._rbcrevslen == 0:
382 382 self._names = []
383 383 self._rbcnamescount = len(self._names) # number of names read at
384 384 # _rbcsnameslen
385 385 self._namesreverse = dict((b, r) for r, b in enumerate(self._names))
386 386
387 387 def _clear(self):
388 388 self._rbcsnameslen = 0
389 389 del self._names[:]
390 390 self._rbcnamescount = 0
391 391 self._namesreverse.clear()
392 392 self._rbcrevslen = len(self._repo.changelog)
393 393 self._rbcrevs = array('c')
394 394 self._rbcrevs.fromstring('\0' * (self._rbcrevslen * _rbcrecsize))
395 395
396 396 def branchinfo(self, rev):
397 397 """Return branch name and close flag for rev, using and updating
398 398 persistent cache."""
399 399 changelog = self._repo.changelog
400 400 rbcrevidx = rev * _rbcrecsize
401 401
402 402 # avoid negative index, changelog.read(nullrev) is fast without cache
403 403 if rev == nullrev:
404 404 return changelog.branchinfo(rev)
405 405
406 406 # if requested rev isn't allocated, grow and cache the rev info
407 407 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
408 408 return self._branchinfo(rev)
409 409
410 410 # fast path: extract data from cache, use it if node is matching
411 411 reponode = changelog.node(rev)[:_rbcnodelen]
412 412 cachenode, branchidx = unpack(
413 413 _rbcrecfmt, buffer(self._rbcrevs, rbcrevidx, _rbcrecsize))
414 414 close = bool(branchidx & _rbccloseflag)
415 415 if close:
416 416 branchidx &= _rbcbranchidxmask
417 417 if cachenode == '\0\0\0\0':
418 418 pass
419 419 elif cachenode == reponode:
420 420 try:
421 421 return self._names[branchidx], close
422 422 except IndexError:
423 423 # recover from invalid reference to unknown branch
424 424 self._repo.ui.debug("referenced branch names not found"
425 425 " - rebuilding revision branch cache from scratch\n")
426 426 self._clear()
427 427 else:
428 428 # rev/node map has changed, invalidate the cache from here up
429 429 self._repo.ui.debug("history modification detected - truncating "
430 430 "revision branch cache to revision %s\n" % rev)
431 431 truncate = rbcrevidx + _rbcrecsize
432 432 del self._rbcrevs[truncate:]
433 433 self._rbcrevslen = min(self._rbcrevslen, truncate)
434 434
435 435 # fall back to slow path and make sure it will be written to disk
436 436 return self._branchinfo(rev)
437 437
438 438 def _branchinfo(self, rev):
439 439 """Retrieve branch info from changelog and update _rbcrevs"""
440 440 changelog = self._repo.changelog
441 441 b, close = changelog.branchinfo(rev)
442 442 if b in self._namesreverse:
443 443 branchidx = self._namesreverse[b]
444 444 else:
445 445 branchidx = len(self._names)
446 446 self._names.append(b)
447 447 self._namesreverse[b] = branchidx
448 448 reponode = changelog.node(rev)
449 449 if close:
450 450 branchidx |= _rbccloseflag
451 451 self._setcachedata(rev, reponode, branchidx)
452 452 return b, close
453 453
454 454 def _setcachedata(self, rev, node, branchidx):
455 455 """Writes the node's branch data to the in-memory cache data."""
456 456 rbcrevidx = rev * _rbcrecsize
457 457 rec = array('c')
458 458 rec.fromstring(pack(_rbcrecfmt, node, branchidx))
459 459 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
460 460 self._rbcrevs.extend('\0' *
461 461 (len(self._repo.changelog) * _rbcrecsize -
462 462 len(self._rbcrevs)))
463 463 self._rbcrevs[rbcrevidx:rbcrevidx + _rbcrecsize] = rec
464 464 self._rbcrevslen = min(self._rbcrevslen, rev)
465 465
466 466 tr = self._repo.currenttransaction()
467 467 if tr:
468 468 tr.addfinalize('write-revbranchcache', self.write)
469 469
470 470 def write(self, tr=None):
471 471 """Save branch cache if it is dirty."""
472 472 repo = self._repo
473 if self._rbcnamescount < len(self._names):
474 try:
475 if self._rbcnamescount != 0:
476 f = repo.vfs.open(_rbcnames, 'ab')
477 if f.tell() == self._rbcsnameslen:
478 f.write('\0')
479 else:
480 f.close()
481 repo.ui.debug("%s changed - rewriting it\n" % _rbcnames)
482 self._rbcnamescount = 0
483 self._rbcrevslen = 0
484 if self._rbcnamescount == 0:
485 # before rewriting names, make sure references are removed
486 repo.vfs.unlinkpath(_rbcrevs, ignoremissing=True)
487 f = repo.vfs.open(_rbcnames, 'wb')
488 f.write('\0'.join(encoding.fromlocal(b)
489 for b in self._names[self._rbcnamescount:]))
490 self._rbcsnameslen = f.tell()
491 f.close()
492 except (IOError, OSError, error.Abort) as inst:
493 repo.ui.debug("couldn't write revision branch cache names: "
494 "%s\n" % inst)
495 return
496 self._rbcnamescount = len(self._names)
473 if True:
474 if self._rbcnamescount < len(self._names):
475 try:
476 if self._rbcnamescount != 0:
477 f = repo.vfs.open(_rbcnames, 'ab')
478 if f.tell() == self._rbcsnameslen:
479 f.write('\0')
480 else:
481 f.close()
482 repo.ui.debug("%s changed - rewriting it\n"
483 % _rbcnames)
484 self._rbcnamescount = 0
485 self._rbcrevslen = 0
486 if self._rbcnamescount == 0:
487 # before rewriting names, make sure references are
488 # removed
489 repo.vfs.unlinkpath(_rbcrevs, ignoremissing=True)
490 f = repo.vfs.open(_rbcnames, 'wb')
491 f.write('\0'.join(encoding.fromlocal(b)
492 for b in self._names[self._rbcnamescount:]
493 ))
494 self._rbcsnameslen = f.tell()
495 f.close()
496 except (IOError, OSError, error.Abort) as inst:
497 repo.ui.debug("couldn't write revision branch cache names: "
498 "%s\n" % inst)
499 return
500 self._rbcnamescount = len(self._names)
497 501
498 start = self._rbcrevslen * _rbcrecsize
499 if start != len(self._rbcrevs):
500 revs = min(len(repo.changelog), len(self._rbcrevs) // _rbcrecsize)
501 try:
502 f = repo.vfs.open(_rbcrevs, 'ab')
503 if f.tell() != start:
504 repo.ui.debug("truncating %s to %s\n" % (_rbcrevs, start))
505 f.seek(start)
502 start = self._rbcrevslen * _rbcrecsize
503 if start != len(self._rbcrevs):
504 revs = min(len(repo.changelog),
505 len(self._rbcrevs) // _rbcrecsize)
506 try:
507 f = repo.vfs.open(_rbcrevs, 'ab')
506 508 if f.tell() != start:
507 start = 0
509 repo.ui.debug("truncating %s to %s\n"
510 % (_rbcrevs, start))
508 511 f.seek(start)
509 f.truncate()
510 end = revs * _rbcrecsize
511 f.write(self._rbcrevs[start:end])
512 f.close()
513 except (IOError, OSError, error.Abort) as inst:
514 repo.ui.debug("couldn't write revision branch cache: %s\n" %
515 inst)
516 return
517 self._rbcrevslen = revs
512 if f.tell() != start:
513 start = 0
514 f.seek(start)
515 f.truncate()
516 end = revs * _rbcrecsize
517 f.write(self._rbcrevs[start:end])
518 f.close()
519 except (IOError, OSError, error.Abort) as inst:
520 repo.ui.debug("couldn't write revision branch cache: %s\n" %
521 inst)
522 return
523 self._rbcrevslen = revs
General Comments 0
You need to be logged in to leave comments. Login now