##// END OF EJS Templates
rbc: use struct unpack_from and pack_into instead of unpack and pack...
Mads Kiilerich -
r31370:906be869 default
parent child Browse files
Show More
@@ -1,518 +1,516 b''
1 1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import struct
11 11
12 12 from .node import (
13 13 bin,
14 14 hex,
15 15 nullid,
16 16 nullrev,
17 17 )
18 18 from . import (
19 19 encoding,
20 20 error,
21 21 scmutil,
22 22 util,
23 23 )
24 24
25 25 calcsize = struct.calcsize
26 pack = struct.pack
27 unpack = struct.unpack
26 pack_into = struct.pack_into
27 unpack_from = struct.unpack_from
28 28
29 29 def _filename(repo):
30 30 """name of a branchcache file for a given repo or repoview"""
31 31 filename = "cache/branch2"
32 32 if repo.filtername:
33 33 filename = '%s-%s' % (filename, repo.filtername)
34 34 return filename
35 35
36 36 def read(repo):
37 37 try:
38 38 f = repo.vfs(_filename(repo))
39 39 lines = f.read().split('\n')
40 40 f.close()
41 41 except (IOError, OSError):
42 42 return None
43 43
44 44 try:
45 45 cachekey = lines.pop(0).split(" ", 2)
46 46 last, lrev = cachekey[:2]
47 47 last, lrev = bin(last), int(lrev)
48 48 filteredhash = None
49 49 if len(cachekey) > 2:
50 50 filteredhash = bin(cachekey[2])
51 51 partial = branchcache(tipnode=last, tiprev=lrev,
52 52 filteredhash=filteredhash)
53 53 if not partial.validfor(repo):
54 54 # invalidate the cache
55 55 raise ValueError('tip differs')
56 56 cl = repo.changelog
57 57 for l in lines:
58 58 if not l:
59 59 continue
60 60 node, state, label = l.split(" ", 2)
61 61 if state not in 'oc':
62 62 raise ValueError('invalid branch state')
63 63 label = encoding.tolocal(label.strip())
64 64 node = bin(node)
65 65 if not cl.hasnode(node):
66 66 raise ValueError('node %s does not exist' % hex(node))
67 67 partial.setdefault(label, []).append(node)
68 68 if state == 'c':
69 69 partial._closednodes.add(node)
70 70 except KeyboardInterrupt:
71 71 raise
72 72 except Exception as inst:
73 73 if repo.ui.debugflag:
74 74 msg = 'invalid branchheads cache'
75 75 if repo.filtername is not None:
76 76 msg += ' (%s)' % repo.filtername
77 77 msg += ': %s\n'
78 78 repo.ui.debug(msg % inst)
79 79 partial = None
80 80 return partial
81 81
82 82 ### Nearest subset relation
83 83 # Nearest subset of filter X is a filter Y so that:
84 84 # * Y is included in X,
85 85 # * X - Y is as small as possible.
86 86 # This create and ordering used for branchmap purpose.
87 87 # the ordering may be partial
88 88 subsettable = {None: 'visible',
89 89 'visible': 'served',
90 90 'served': 'immutable',
91 91 'immutable': 'base'}
92 92
93 93 def updatecache(repo):
94 94 cl = repo.changelog
95 95 filtername = repo.filtername
96 96 partial = repo._branchcaches.get(filtername)
97 97
98 98 revs = []
99 99 if partial is None or not partial.validfor(repo):
100 100 partial = read(repo)
101 101 if partial is None:
102 102 subsetname = subsettable.get(filtername)
103 103 if subsetname is None:
104 104 partial = branchcache()
105 105 else:
106 106 subset = repo.filtered(subsetname)
107 107 partial = subset.branchmap().copy()
108 108 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
109 109 revs.extend(r for r in extrarevs if r <= partial.tiprev)
110 110 revs.extend(cl.revs(start=partial.tiprev + 1))
111 111 if revs:
112 112 partial.update(repo, revs)
113 113 partial.write(repo)
114 114
115 115 assert partial.validfor(repo), filtername
116 116 repo._branchcaches[repo.filtername] = partial
117 117
118 118 def replacecache(repo, bm):
119 119 """Replace the branchmap cache for a repo with a branch mapping.
120 120
121 121 This is likely only called during clone with a branch map from a remote.
122 122 """
123 123 rbheads = []
124 124 closed = []
125 125 for bheads in bm.itervalues():
126 126 rbheads.extend(bheads)
127 127 for h in bheads:
128 128 r = repo.changelog.rev(h)
129 129 b, c = repo.changelog.branchinfo(r)
130 130 if c:
131 131 closed.append(h)
132 132
133 133 if rbheads:
134 134 rtiprev = max((int(repo.changelog.rev(node))
135 135 for node in rbheads))
136 136 cache = branchcache(bm,
137 137 repo[rtiprev].node(),
138 138 rtiprev,
139 139 closednodes=closed)
140 140
141 141 # Try to stick it as low as possible
142 142 # filter above served are unlikely to be fetch from a clone
143 143 for candidate in ('base', 'immutable', 'served'):
144 144 rview = repo.filtered(candidate)
145 145 if cache.validfor(rview):
146 146 repo._branchcaches[candidate] = cache
147 147 cache.write(rview)
148 148 break
149 149
150 150 class branchcache(dict):
151 151 """A dict like object that hold branches heads cache.
152 152
153 153 This cache is used to avoid costly computations to determine all the
154 154 branch heads of a repo.
155 155
156 156 The cache is serialized on disk in the following format:
157 157
158 158 <tip hex node> <tip rev number> [optional filtered repo hex hash]
159 159 <branch head hex node> <open/closed state> <branch name>
160 160 <branch head hex node> <open/closed state> <branch name>
161 161 ...
162 162
163 163 The first line is used to check if the cache is still valid. If the
164 164 branch cache is for a filtered repo view, an optional third hash is
165 165 included that hashes the hashes of all filtered revisions.
166 166
167 167 The open/closed state is represented by a single letter 'o' or 'c'.
168 168 This field can be used to avoid changelog reads when determining if a
169 169 branch head closes a branch or not.
170 170 """
171 171
172 172 def __init__(self, entries=(), tipnode=nullid, tiprev=nullrev,
173 173 filteredhash=None, closednodes=None):
174 174 super(branchcache, self).__init__(entries)
175 175 self.tipnode = tipnode
176 176 self.tiprev = tiprev
177 177 self.filteredhash = filteredhash
178 178 # closednodes is a set of nodes that close their branch. If the branch
179 179 # cache has been updated, it may contain nodes that are no longer
180 180 # heads.
181 181 if closednodes is None:
182 182 self._closednodes = set()
183 183 else:
184 184 self._closednodes = closednodes
185 185
186 186 def validfor(self, repo):
187 187 """Is the cache content valid regarding a repo
188 188
189 189 - False when cached tipnode is unknown or if we detect a strip.
190 190 - True when cache is up to date or a subset of current repo."""
191 191 try:
192 192 return ((self.tipnode == repo.changelog.node(self.tiprev))
193 193 and (self.filteredhash == \
194 194 scmutil.filteredhash(repo, self.tiprev)))
195 195 except IndexError:
196 196 return False
197 197
198 198 def _branchtip(self, heads):
199 199 '''Return tuple with last open head in heads and false,
200 200 otherwise return last closed head and true.'''
201 201 tip = heads[-1]
202 202 closed = True
203 203 for h in reversed(heads):
204 204 if h not in self._closednodes:
205 205 tip = h
206 206 closed = False
207 207 break
208 208 return tip, closed
209 209
210 210 def branchtip(self, branch):
211 211 '''Return the tipmost open head on branch head, otherwise return the
212 212 tipmost closed head on branch.
213 213 Raise KeyError for unknown branch.'''
214 214 return self._branchtip(self[branch])[0]
215 215
216 216 def branchheads(self, branch, closed=False):
217 217 heads = self[branch]
218 218 if not closed:
219 219 heads = [h for h in heads if h not in self._closednodes]
220 220 return heads
221 221
222 222 def iterbranches(self):
223 223 for bn, heads in self.iteritems():
224 224 yield (bn, heads) + self._branchtip(heads)
225 225
226 226 def copy(self):
227 227 """return an deep copy of the branchcache object"""
228 228 return branchcache(self, self.tipnode, self.tiprev, self.filteredhash,
229 229 self._closednodes)
230 230
231 231 def write(self, repo):
232 232 try:
233 233 f = repo.vfs(_filename(repo), "w", atomictemp=True)
234 234 cachekey = [hex(self.tipnode), '%d' % self.tiprev]
235 235 if self.filteredhash is not None:
236 236 cachekey.append(hex(self.filteredhash))
237 237 f.write(" ".join(cachekey) + '\n')
238 238 nodecount = 0
239 239 for label, nodes in sorted(self.iteritems()):
240 240 for node in nodes:
241 241 nodecount += 1
242 242 if node in self._closednodes:
243 243 state = 'c'
244 244 else:
245 245 state = 'o'
246 246 f.write("%s %s %s\n" % (hex(node), state,
247 247 encoding.fromlocal(label)))
248 248 f.close()
249 249 repo.ui.log('branchcache',
250 250 'wrote %s branch cache with %d labels and %d nodes\n',
251 251 repo.filtername, len(self), nodecount)
252 252 except (IOError, OSError, error.Abort) as inst:
253 253 repo.ui.debug("couldn't write branch cache: %s\n" % inst)
254 254 # Abort may be raise by read only opener
255 255 pass
256 256
257 257 def update(self, repo, revgen):
258 258 """Given a branchhead cache, self, that may have extra nodes or be
259 259 missing heads, and a generator of nodes that are strictly a superset of
260 260 heads missing, this function updates self to be correct.
261 261 """
262 262 starttime = util.timer()
263 263 cl = repo.changelog
264 264 # collect new branch entries
265 265 newbranches = {}
266 266 getbranchinfo = repo.revbranchcache().branchinfo
267 267 for r in revgen:
268 268 branch, closesbranch = getbranchinfo(r)
269 269 newbranches.setdefault(branch, []).append(r)
270 270 if closesbranch:
271 271 self._closednodes.add(cl.node(r))
272 272
273 273 # fetch current topological heads to speed up filtering
274 274 topoheads = set(cl.headrevs())
275 275
276 276 # if older branchheads are reachable from new ones, they aren't
277 277 # really branchheads. Note checking parents is insufficient:
278 278 # 1 (branch a) -> 2 (branch b) -> 3 (branch a)
279 279 for branch, newheadrevs in newbranches.iteritems():
280 280 bheads = self.setdefault(branch, [])
281 281 bheadset = set(cl.rev(node) for node in bheads)
282 282
283 283 # This have been tested True on all internal usage of this function.
284 284 # run it again in case of doubt
285 285 # assert not (set(bheadrevs) & set(newheadrevs))
286 286 newheadrevs.sort()
287 287 bheadset.update(newheadrevs)
288 288
289 289 # This prunes out two kinds of heads - heads that are superseded by
290 290 # a head in newheadrevs, and newheadrevs that are not heads because
291 291 # an existing head is their descendant.
292 292 uncertain = bheadset - topoheads
293 293 if uncertain:
294 294 floorrev = min(uncertain)
295 295 ancestors = set(cl.ancestors(newheadrevs, floorrev))
296 296 bheadset -= ancestors
297 297 bheadrevs = sorted(bheadset)
298 298 self[branch] = [cl.node(rev) for rev in bheadrevs]
299 299 tiprev = bheadrevs[-1]
300 300 if tiprev > self.tiprev:
301 301 self.tipnode = cl.node(tiprev)
302 302 self.tiprev = tiprev
303 303
304 304 if not self.validfor(repo):
305 305 # cache key are not valid anymore
306 306 self.tipnode = nullid
307 307 self.tiprev = nullrev
308 308 for heads in self.values():
309 309 tiprev = max(cl.rev(node) for node in heads)
310 310 if tiprev > self.tiprev:
311 311 self.tipnode = cl.node(tiprev)
312 312 self.tiprev = tiprev
313 313 self.filteredhash = scmutil.filteredhash(repo, self.tiprev)
314 314
315 315 duration = util.timer() - starttime
316 316 repo.ui.log('branchcache', 'updated %s branch cache in %.4f seconds\n',
317 317 repo.filtername, duration)
318 318
319 319 # Revision branch info cache
320 320
321 321 _rbcversion = '-v1'
322 322 _rbcnames = 'cache/rbc-names' + _rbcversion
323 323 _rbcrevs = 'cache/rbc-revs' + _rbcversion
324 324 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
325 325 _rbcrecfmt = '>4sI'
326 326 _rbcrecsize = calcsize(_rbcrecfmt)
327 327 _rbcnodelen = 4
328 328 _rbcbranchidxmask = 0x7fffffff
329 329 _rbccloseflag = 0x80000000
330 330
331 331 class revbranchcache(object):
332 332 """Persistent cache, mapping from revision number to branch name and close.
333 333 This is a low level cache, independent of filtering.
334 334
335 335 Branch names are stored in rbc-names in internal encoding separated by 0.
336 336 rbc-names is append-only, and each branch name is only stored once and will
337 337 thus have a unique index.
338 338
339 339 The branch info for each revision is stored in rbc-revs as constant size
340 340 records. The whole file is read into memory, but it is only 'parsed' on
341 341 demand. The file is usually append-only but will be truncated if repo
342 342 modification is detected.
343 343 The record for each revision contains the first 4 bytes of the
344 344 corresponding node hash, and the record is only used if it still matches.
345 345 Even a completely trashed rbc-revs fill thus still give the right result
346 346 while converging towards full recovery ... assuming no incorrectly matching
347 347 node hashes.
348 348 The record also contains 4 bytes where 31 bits contains the index of the
349 349 branch and the last bit indicate that it is a branch close commit.
350 350 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
351 351 and will grow with it but be 1/8th of its size.
352 352 """
353 353
354 354 def __init__(self, repo, readonly=True):
355 355 assert repo.filtername is None
356 356 self._repo = repo
357 357 self._names = [] # branch names in local encoding with static index
358 358 self._rbcrevs = bytearray()
359 359 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
360 360 try:
361 361 bndata = repo.vfs.read(_rbcnames)
362 362 self._rbcsnameslen = len(bndata) # for verification before writing
363 363 self._names = [encoding.tolocal(bn) for bn in bndata.split('\0')]
364 364 except (IOError, OSError):
365 365 if readonly:
366 366 # don't try to use cache - fall back to the slow path
367 367 self.branchinfo = self._branchinfo
368 368
369 369 if self._names:
370 370 try:
371 371 data = repo.vfs.read(_rbcrevs)
372 372 self._rbcrevs[:] = data
373 373 except (IOError, OSError) as inst:
374 374 repo.ui.debug("couldn't read revision branch cache: %s\n" %
375 375 inst)
376 376 # remember number of good records on disk
377 377 self._rbcrevslen = min(len(self._rbcrevs) // _rbcrecsize,
378 378 len(repo.changelog))
379 379 if self._rbcrevslen == 0:
380 380 self._names = []
381 381 self._rbcnamescount = len(self._names) # number of names read at
382 382 # _rbcsnameslen
383 383 self._namesreverse = dict((b, r) for r, b in enumerate(self._names))
384 384
385 385 def _clear(self):
386 386 self._rbcsnameslen = 0
387 387 del self._names[:]
388 388 self._rbcnamescount = 0
389 389 self._namesreverse.clear()
390 390 self._rbcrevslen = len(self._repo.changelog)
391 391 self._rbcrevs = bytearray(self._rbcrevslen * _rbcrecsize)
392 392
393 393 def branchinfo(self, rev):
394 394 """Return branch name and close flag for rev, using and updating
395 395 persistent cache."""
396 396 changelog = self._repo.changelog
397 397 rbcrevidx = rev * _rbcrecsize
398 398
399 399 # avoid negative index, changelog.read(nullrev) is fast without cache
400 400 if rev == nullrev:
401 401 return changelog.branchinfo(rev)
402 402
403 403 # if requested rev isn't allocated, grow and cache the rev info
404 404 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
405 405 return self._branchinfo(rev)
406 406
407 407 # fast path: extract data from cache, use it if node is matching
408 408 reponode = changelog.node(rev)[:_rbcnodelen]
409 cachenode, branchidx = unpack(
410 _rbcrecfmt, util.buffer(self._rbcrevs, rbcrevidx, _rbcrecsize))
409 cachenode, branchidx = unpack_from(_rbcrecfmt, self._rbcrevs, rbcrevidx)
411 410 close = bool(branchidx & _rbccloseflag)
412 411 if close:
413 412 branchidx &= _rbcbranchidxmask
414 413 if cachenode == '\0\0\0\0':
415 414 pass
416 415 elif cachenode == reponode:
417 416 try:
418 417 return self._names[branchidx], close
419 418 except IndexError:
420 419 # recover from invalid reference to unknown branch
421 420 self._repo.ui.debug("referenced branch names not found"
422 421 " - rebuilding revision branch cache from scratch\n")
423 422 self._clear()
424 423 else:
425 424 # rev/node map has changed, invalidate the cache from here up
426 425 self._repo.ui.debug("history modification detected - truncating "
427 426 "revision branch cache to revision %s\n" % rev)
428 427 truncate = rbcrevidx + _rbcrecsize
429 428 del self._rbcrevs[truncate:]
430 429 self._rbcrevslen = min(self._rbcrevslen, truncate)
431 430
432 431 # fall back to slow path and make sure it will be written to disk
433 432 return self._branchinfo(rev)
434 433
435 434 def _branchinfo(self, rev):
436 435 """Retrieve branch info from changelog and update _rbcrevs"""
437 436 changelog = self._repo.changelog
438 437 b, close = changelog.branchinfo(rev)
439 438 if b in self._namesreverse:
440 439 branchidx = self._namesreverse[b]
441 440 else:
442 441 branchidx = len(self._names)
443 442 self._names.append(b)
444 443 self._namesreverse[b] = branchidx
445 444 reponode = changelog.node(rev)
446 445 if close:
447 446 branchidx |= _rbccloseflag
448 447 self._setcachedata(rev, reponode, branchidx)
449 448 return b, close
450 449
451 450 def _setcachedata(self, rev, node, branchidx):
452 451 """Writes the node's branch data to the in-memory cache data."""
453 452 rbcrevidx = rev * _rbcrecsize
454 rec = bytearray(pack(_rbcrecfmt, node, branchidx))
455 453 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
456 454 self._rbcrevs.extend('\0' *
457 455 (len(self._repo.changelog) * _rbcrecsize -
458 456 len(self._rbcrevs)))
459 self._rbcrevs[rbcrevidx:rbcrevidx + _rbcrecsize] = rec
457 pack_into(_rbcrecfmt, self._rbcrevs, rbcrevidx, node, branchidx)
460 458 self._rbcrevslen = min(self._rbcrevslen, rev)
461 459
462 460 tr = self._repo.currenttransaction()
463 461 if tr:
464 462 tr.addfinalize('write-revbranchcache', self.write)
465 463
466 464 def write(self, tr=None):
467 465 """Save branch cache if it is dirty."""
468 466 repo = self._repo
469 467 wlock = None
470 468 step = ''
471 469 try:
472 470 if self._rbcnamescount < len(self._names):
473 471 step = ' names'
474 472 wlock = repo.wlock(wait=False)
475 473 if self._rbcnamescount != 0:
476 474 f = repo.vfs.open(_rbcnames, 'ab')
477 475 if f.tell() == self._rbcsnameslen:
478 476 f.write('\0')
479 477 else:
480 478 f.close()
481 479 repo.ui.debug("%s changed - rewriting it\n" % _rbcnames)
482 480 self._rbcnamescount = 0
483 481 self._rbcrevslen = 0
484 482 if self._rbcnamescount == 0:
485 483 # before rewriting names, make sure references are removed
486 484 repo.vfs.unlinkpath(_rbcrevs, ignoremissing=True)
487 485 f = repo.vfs.open(_rbcnames, 'wb')
488 486 f.write('\0'.join(encoding.fromlocal(b)
489 487 for b in self._names[self._rbcnamescount:]))
490 488 self._rbcsnameslen = f.tell()
491 489 f.close()
492 490 self._rbcnamescount = len(self._names)
493 491
494 492 start = self._rbcrevslen * _rbcrecsize
495 493 if start != len(self._rbcrevs):
496 494 step = ''
497 495 if wlock is None:
498 496 wlock = repo.wlock(wait=False)
499 497 revs = min(len(repo.changelog),
500 498 len(self._rbcrevs) // _rbcrecsize)
501 499 f = repo.vfs.open(_rbcrevs, 'ab')
502 500 if f.tell() != start:
503 501 repo.ui.debug("truncating %s to %s\n" % (_rbcrevs, start))
504 502 f.seek(start)
505 503 if f.tell() != start:
506 504 start = 0
507 505 f.seek(start)
508 506 f.truncate()
509 507 end = revs * _rbcrecsize
510 508 f.write(self._rbcrevs[start:end])
511 509 f.close()
512 510 self._rbcrevslen = revs
513 511 except (IOError, OSError, error.Abort, error.LockError) as inst:
514 512 repo.ui.debug("couldn't write revision branch cache%s: %s\n"
515 513 % (step, inst))
516 514 finally:
517 515 if wlock is not None:
518 516 wlock.release()
General Comments 0
You need to be logged in to leave comments. Login now