##// END OF EJS Templates
cache: safer handling of failing seek when writing revision branch cache...
Mads Kiilerich -
r28557:4b83507b default
parent child Browse files
Show More
@@ -1,493 +1,496 b''
1 1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import array
11 11 import struct
12 12 import time
13 13
14 14 from .node import (
15 15 bin,
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 )
20 20 from . import (
21 21 encoding,
22 22 error,
23 23 scmutil,
24 24 )
25 25
26 26 array = array.array
27 27 calcsize = struct.calcsize
28 28 pack = struct.pack
29 29 unpack = struct.unpack
30 30
31 31 def _filename(repo):
32 32 """name of a branchcache file for a given repo or repoview"""
33 33 filename = "cache/branch2"
34 34 if repo.filtername:
35 35 filename = '%s-%s' % (filename, repo.filtername)
36 36 return filename
37 37
38 38 def read(repo):
39 39 try:
40 40 f = repo.vfs(_filename(repo))
41 41 lines = f.read().split('\n')
42 42 f.close()
43 43 except (IOError, OSError):
44 44 return None
45 45
46 46 try:
47 47 cachekey = lines.pop(0).split(" ", 2)
48 48 last, lrev = cachekey[:2]
49 49 last, lrev = bin(last), int(lrev)
50 50 filteredhash = None
51 51 if len(cachekey) > 2:
52 52 filteredhash = bin(cachekey[2])
53 53 partial = branchcache(tipnode=last, tiprev=lrev,
54 54 filteredhash=filteredhash)
55 55 if not partial.validfor(repo):
56 56 # invalidate the cache
57 57 raise ValueError('tip differs')
58 58 cl = repo.changelog
59 59 for l in lines:
60 60 if not l:
61 61 continue
62 62 node, state, label = l.split(" ", 2)
63 63 if state not in 'oc':
64 64 raise ValueError('invalid branch state')
65 65 label = encoding.tolocal(label.strip())
66 66 node = bin(node)
67 67 if not cl.hasnode(node):
68 68 raise ValueError('node %s does not exist' % hex(node))
69 69 partial.setdefault(label, []).append(node)
70 70 if state == 'c':
71 71 partial._closednodes.add(node)
72 72 except KeyboardInterrupt:
73 73 raise
74 74 except Exception as inst:
75 75 if repo.ui.debugflag:
76 76 msg = 'invalid branchheads cache'
77 77 if repo.filtername is not None:
78 78 msg += ' (%s)' % repo.filtername
79 79 msg += ': %s\n'
80 80 repo.ui.debug(msg % inst)
81 81 partial = None
82 82 return partial
83 83
84 84 ### Nearest subset relation
85 85 # Nearest subset of filter X is a filter Y so that:
86 86 # * Y is included in X,
87 87 # * X - Y is as small as possible.
88 88 # This create and ordering used for branchmap purpose.
89 89 # the ordering may be partial
90 90 subsettable = {None: 'visible',
91 91 'visible': 'served',
92 92 'served': 'immutable',
93 93 'immutable': 'base'}
94 94
95 95 def updatecache(repo):
96 96 cl = repo.changelog
97 97 filtername = repo.filtername
98 98 partial = repo._branchcaches.get(filtername)
99 99
100 100 revs = []
101 101 if partial is None or not partial.validfor(repo):
102 102 partial = read(repo)
103 103 if partial is None:
104 104 subsetname = subsettable.get(filtername)
105 105 if subsetname is None:
106 106 partial = branchcache()
107 107 else:
108 108 subset = repo.filtered(subsetname)
109 109 partial = subset.branchmap().copy()
110 110 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
111 111 revs.extend(r for r in extrarevs if r <= partial.tiprev)
112 112 revs.extend(cl.revs(start=partial.tiprev + 1))
113 113 if revs:
114 114 partial.update(repo, revs)
115 115 partial.write(repo)
116 116
117 117 assert partial.validfor(repo), filtername
118 118 repo._branchcaches[repo.filtername] = partial
119 119
120 120 def replacecache(repo, bm):
121 121 """Replace the branchmap cache for a repo with a branch mapping.
122 122
123 123 This is likely only called during clone with a branch map from a remote.
124 124 """
125 125 rbheads = []
126 126 closed = []
127 127 for bheads in bm.itervalues():
128 128 rbheads.extend(bheads)
129 129 for h in bheads:
130 130 r = repo.changelog.rev(h)
131 131 b, c = repo.changelog.branchinfo(r)
132 132 if c:
133 133 closed.append(h)
134 134
135 135 if rbheads:
136 136 rtiprev = max((int(repo.changelog.rev(node))
137 137 for node in rbheads))
138 138 cache = branchcache(bm,
139 139 repo[rtiprev].node(),
140 140 rtiprev,
141 141 closednodes=closed)
142 142
143 143 # Try to stick it as low as possible
144 144 # filter above served are unlikely to be fetch from a clone
145 145 for candidate in ('base', 'immutable', 'served'):
146 146 rview = repo.filtered(candidate)
147 147 if cache.validfor(rview):
148 148 repo._branchcaches[candidate] = cache
149 149 cache.write(rview)
150 150 break
151 151
152 152 class branchcache(dict):
153 153 """A dict like object that hold branches heads cache.
154 154
155 155 This cache is used to avoid costly computations to determine all the
156 156 branch heads of a repo.
157 157
158 158 The cache is serialized on disk in the following format:
159 159
160 160 <tip hex node> <tip rev number> [optional filtered repo hex hash]
161 161 <branch head hex node> <open/closed state> <branch name>
162 162 <branch head hex node> <open/closed state> <branch name>
163 163 ...
164 164
165 165 The first line is used to check if the cache is still valid. If the
166 166 branch cache is for a filtered repo view, an optional third hash is
167 167 included that hashes the hashes of all filtered revisions.
168 168
169 169 The open/closed state is represented by a single letter 'o' or 'c'.
170 170 This field can be used to avoid changelog reads when determining if a
171 171 branch head closes a branch or not.
172 172 """
173 173
174 174 def __init__(self, entries=(), tipnode=nullid, tiprev=nullrev,
175 175 filteredhash=None, closednodes=None):
176 176 super(branchcache, self).__init__(entries)
177 177 self.tipnode = tipnode
178 178 self.tiprev = tiprev
179 179 self.filteredhash = filteredhash
180 180 # closednodes is a set of nodes that close their branch. If the branch
181 181 # cache has been updated, it may contain nodes that are no longer
182 182 # heads.
183 183 if closednodes is None:
184 184 self._closednodes = set()
185 185 else:
186 186 self._closednodes = closednodes
187 187
188 188 def validfor(self, repo):
189 189 """Is the cache content valid regarding a repo
190 190
191 191 - False when cached tipnode is unknown or if we detect a strip.
192 192 - True when cache is up to date or a subset of current repo."""
193 193 try:
194 194 return ((self.tipnode == repo.changelog.node(self.tiprev))
195 195 and (self.filteredhash == \
196 196 scmutil.filteredhash(repo, self.tiprev)))
197 197 except IndexError:
198 198 return False
199 199
200 200 def _branchtip(self, heads):
201 201 '''Return tuple with last open head in heads and false,
202 202 otherwise return last closed head and true.'''
203 203 tip = heads[-1]
204 204 closed = True
205 205 for h in reversed(heads):
206 206 if h not in self._closednodes:
207 207 tip = h
208 208 closed = False
209 209 break
210 210 return tip, closed
211 211
212 212 def branchtip(self, branch):
213 213 '''Return the tipmost open head on branch head, otherwise return the
214 214 tipmost closed head on branch.
215 215 Raise KeyError for unknown branch.'''
216 216 return self._branchtip(self[branch])[0]
217 217
218 218 def branchheads(self, branch, closed=False):
219 219 heads = self[branch]
220 220 if not closed:
221 221 heads = [h for h in heads if h not in self._closednodes]
222 222 return heads
223 223
224 224 def iterbranches(self):
225 225 for bn, heads in self.iteritems():
226 226 yield (bn, heads) + self._branchtip(heads)
227 227
228 228 def copy(self):
229 229 """return an deep copy of the branchcache object"""
230 230 return branchcache(self, self.tipnode, self.tiprev, self.filteredhash,
231 231 self._closednodes)
232 232
233 233 def write(self, repo):
234 234 try:
235 235 f = repo.vfs(_filename(repo), "w", atomictemp=True)
236 236 cachekey = [hex(self.tipnode), str(self.tiprev)]
237 237 if self.filteredhash is not None:
238 238 cachekey.append(hex(self.filteredhash))
239 239 f.write(" ".join(cachekey) + '\n')
240 240 nodecount = 0
241 241 for label, nodes in sorted(self.iteritems()):
242 242 for node in nodes:
243 243 nodecount += 1
244 244 if node in self._closednodes:
245 245 state = 'c'
246 246 else:
247 247 state = 'o'
248 248 f.write("%s %s %s\n" % (hex(node), state,
249 249 encoding.fromlocal(label)))
250 250 f.close()
251 251 repo.ui.log('branchcache',
252 252 'wrote %s branch cache with %d labels and %d nodes\n',
253 253 repo.filtername, len(self), nodecount)
254 254 except (IOError, OSError, error.Abort) as inst:
255 255 repo.ui.debug("couldn't write branch cache: %s\n" % inst)
256 256 # Abort may be raise by read only opener
257 257 pass
258 258
259 259 def update(self, repo, revgen):
260 260 """Given a branchhead cache, self, that may have extra nodes or be
261 261 missing heads, and a generator of nodes that are strictly a superset of
262 262 heads missing, this function updates self to be correct.
263 263 """
264 264 starttime = time.time()
265 265 cl = repo.changelog
266 266 # collect new branch entries
267 267 newbranches = {}
268 268 getbranchinfo = repo.revbranchcache().branchinfo
269 269 for r in revgen:
270 270 branch, closesbranch = getbranchinfo(r)
271 271 newbranches.setdefault(branch, []).append(r)
272 272 if closesbranch:
273 273 self._closednodes.add(cl.node(r))
274 274
275 275 # fetch current topological heads to speed up filtering
276 276 topoheads = set(cl.headrevs())
277 277
278 278 # if older branchheads are reachable from new ones, they aren't
279 279 # really branchheads. Note checking parents is insufficient:
280 280 # 1 (branch a) -> 2 (branch b) -> 3 (branch a)
281 281 for branch, newheadrevs in newbranches.iteritems():
282 282 bheads = self.setdefault(branch, [])
283 283 bheadset = set(cl.rev(node) for node in bheads)
284 284
285 285 # This have been tested True on all internal usage of this function.
286 286 # run it again in case of doubt
287 287 # assert not (set(bheadrevs) & set(newheadrevs))
288 288 newheadrevs.sort()
289 289 bheadset.update(newheadrevs)
290 290
291 291 # This prunes out two kinds of heads - heads that are superseded by
292 292 # a head in newheadrevs, and newheadrevs that are not heads because
293 293 # an existing head is their descendant.
294 294 uncertain = bheadset - topoheads
295 295 if uncertain:
296 296 floorrev = min(uncertain)
297 297 ancestors = set(cl.ancestors(newheadrevs, floorrev))
298 298 bheadset -= ancestors
299 299 bheadrevs = sorted(bheadset)
300 300 self[branch] = [cl.node(rev) for rev in bheadrevs]
301 301 tiprev = bheadrevs[-1]
302 302 if tiprev > self.tiprev:
303 303 self.tipnode = cl.node(tiprev)
304 304 self.tiprev = tiprev
305 305
306 306 if not self.validfor(repo):
307 307 # cache key are not valid anymore
308 308 self.tipnode = nullid
309 309 self.tiprev = nullrev
310 310 for heads in self.values():
311 311 tiprev = max(cl.rev(node) for node in heads)
312 312 if tiprev > self.tiprev:
313 313 self.tipnode = cl.node(tiprev)
314 314 self.tiprev = tiprev
315 315 self.filteredhash = scmutil.filteredhash(repo, self.tiprev)
316 316
317 317 duration = time.time() - starttime
318 318 repo.ui.log('branchcache', 'updated %s branch cache in %.4f seconds\n',
319 319 repo.filtername, duration)
320 320
321 321 # Revision branch info cache
322 322
323 323 _rbcversion = '-v1'
324 324 _rbcnames = 'cache/rbc-names' + _rbcversion
325 325 _rbcrevs = 'cache/rbc-revs' + _rbcversion
326 326 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
327 327 _rbcrecfmt = '>4sI'
328 328 _rbcrecsize = calcsize(_rbcrecfmt)
329 329 _rbcnodelen = 4
330 330 _rbcbranchidxmask = 0x7fffffff
331 331 _rbccloseflag = 0x80000000
332 332
333 333 class revbranchcache(object):
334 334 """Persistent cache, mapping from revision number to branch name and close.
335 335 This is a low level cache, independent of filtering.
336 336
337 337 Branch names are stored in rbc-names in internal encoding separated by 0.
338 338 rbc-names is append-only, and each branch name is only stored once and will
339 339 thus have a unique index.
340 340
341 341 The branch info for each revision is stored in rbc-revs as constant size
342 342 records. The whole file is read into memory, but it is only 'parsed' on
343 343 demand. The file is usually append-only but will be truncated if repo
344 344 modification is detected.
345 345 The record for each revision contains the first 4 bytes of the
346 346 corresponding node hash, and the record is only used if it still matches.
347 347 Even a completely trashed rbc-revs fill thus still give the right result
348 348 while converging towards full recovery ... assuming no incorrectly matching
349 349 node hashes.
350 350 The record also contains 4 bytes where 31 bits contains the index of the
351 351 branch and the last bit indicate that it is a branch close commit.
352 352 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
353 353 and will grow with it but be 1/8th of its size.
354 354 """
355 355
356 356 def __init__(self, repo, readonly=True):
357 357 assert repo.filtername is None
358 358 self._repo = repo
359 359 self._names = [] # branch names in local encoding with static index
360 360 self._rbcrevs = array('c') # structs of type _rbcrecfmt
361 361 self._rbcsnameslen = 0
362 362 try:
363 363 bndata = repo.vfs.read(_rbcnames)
364 364 self._rbcsnameslen = len(bndata) # for verification before writing
365 365 self._names = [encoding.tolocal(bn) for bn in bndata.split('\0')]
366 366 except (IOError, OSError) as inst:
367 367 if readonly:
368 368 # don't try to use cache - fall back to the slow path
369 369 self.branchinfo = self._branchinfo
370 370
371 371 if self._names:
372 372 try:
373 373 data = repo.vfs.read(_rbcrevs)
374 374 self._rbcrevs.fromstring(data)
375 375 except (IOError, OSError) as inst:
376 376 repo.ui.debug("couldn't read revision branch cache: %s\n" %
377 377 inst)
378 378 # remember number of good records on disk
379 379 self._rbcrevslen = min(len(self._rbcrevs) // _rbcrecsize,
380 380 len(repo.changelog))
381 381 if self._rbcrevslen == 0:
382 382 self._names = []
383 383 self._rbcnamescount = len(self._names) # number of good names on disk
384 384 self._namesreverse = dict((b, r) for r, b in enumerate(self._names))
385 385
386 386 def branchinfo(self, rev):
387 387 """Return branch name and close flag for rev, using and updating
388 388 persistent cache."""
389 389 changelog = self._repo.changelog
390 390 rbcrevidx = rev * _rbcrecsize
391 391
392 392 # avoid negative index, changelog.read(nullrev) is fast without cache
393 393 if rev == nullrev:
394 394 return changelog.branchinfo(rev)
395 395
396 396 # if requested rev is missing, add and populate all missing revs
397 397 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
398 398 self._rbcrevs.extend('\0' * (len(changelog) * _rbcrecsize -
399 399 len(self._rbcrevs)))
400 400
401 401 # fast path: extract data from cache, use it if node is matching
402 402 reponode = changelog.node(rev)[:_rbcnodelen]
403 403 cachenode, branchidx = unpack(
404 404 _rbcrecfmt, buffer(self._rbcrevs, rbcrevidx, _rbcrecsize))
405 405 close = bool(branchidx & _rbccloseflag)
406 406 if close:
407 407 branchidx &= _rbcbranchidxmask
408 408 if cachenode == '\0\0\0\0':
409 409 pass
410 410 elif cachenode == reponode:
411 411 return self._names[branchidx], close
412 412 else:
413 413 # rev/node map has changed, invalidate the cache from here up
414 414 truncate = rbcrevidx + _rbcrecsize
415 415 del self._rbcrevs[truncate:]
416 416 self._rbcrevslen = min(self._rbcrevslen, truncate)
417 417
418 418 # fall back to slow path and make sure it will be written to disk
419 419 return self._branchinfo(rev)
420 420
421 421 def _branchinfo(self, rev):
422 422 """Retrieve branch info from changelog and update _rbcrevs"""
423 423 changelog = self._repo.changelog
424 424 b, close = changelog.branchinfo(rev)
425 425 if b in self._namesreverse:
426 426 branchidx = self._namesreverse[b]
427 427 else:
428 428 branchidx = len(self._names)
429 429 self._names.append(b)
430 430 self._namesreverse[b] = branchidx
431 431 reponode = changelog.node(rev)
432 432 if close:
433 433 branchidx |= _rbccloseflag
434 434 self._setcachedata(rev, reponode, branchidx)
435 435 return b, close
436 436
437 437 def _setcachedata(self, rev, node, branchidx):
438 438 """Writes the node's branch data to the in-memory cache data."""
439 439 rbcrevidx = rev * _rbcrecsize
440 440 rec = array('c')
441 441 rec.fromstring(pack(_rbcrecfmt, node, branchidx))
442 442 self._rbcrevs[rbcrevidx:rbcrevidx + _rbcrecsize] = rec
443 443 self._rbcrevslen = min(self._rbcrevslen, rev)
444 444
445 445 tr = self._repo.currenttransaction()
446 446 if tr:
447 447 tr.addfinalize('write-revbranchcache', self.write)
448 448
449 449 def write(self, tr=None):
450 450 """Save branch cache if it is dirty."""
451 451 repo = self._repo
452 452 if self._rbcnamescount < len(self._names):
453 453 try:
454 454 if self._rbcnamescount != 0:
455 455 f = repo.vfs.open(_rbcnames, 'ab')
456 456 if f.tell() == self._rbcsnameslen:
457 457 f.write('\0')
458 458 else:
459 459 f.close()
460 460 repo.ui.debug("%s changed - rewriting it\n" % _rbcnames)
461 461 self._rbcnamescount = 0
462 462 self._rbcrevslen = 0
463 463 if self._rbcnamescount == 0:
464 464 # before rewriting names, make sure references are removed
465 465 repo.vfs.unlinkpath(_rbcrevs, ignoremissing=True)
466 466 f = repo.vfs.open(_rbcnames, 'wb')
467 467 f.write('\0'.join(encoding.fromlocal(b)
468 468 for b in self._names[self._rbcnamescount:]))
469 469 self._rbcsnameslen = f.tell()
470 470 f.close()
471 471 except (IOError, OSError, error.Abort) as inst:
472 472 repo.ui.debug("couldn't write revision branch cache names: "
473 473 "%s\n" % inst)
474 474 return
475 475 self._rbcnamescount = len(self._names)
476 476
477 477 start = self._rbcrevslen * _rbcrecsize
478 478 if start != len(self._rbcrevs):
479 479 revs = min(len(repo.changelog), len(self._rbcrevs) // _rbcrecsize)
480 480 try:
481 481 f = repo.vfs.open(_rbcrevs, 'ab')
482 482 if f.tell() != start:
483 483 repo.ui.debug("truncating %s to %s\n" % (_rbcrevs, start))
484 484 f.seek(start)
485 if f.tell() != start:
486 start = 0
487 f.seek(start)
485 488 f.truncate()
486 489 end = revs * _rbcrecsize
487 490 f.write(self._rbcrevs[start:end])
488 491 f.close()
489 492 except (IOError, OSError, error.Abort) as inst:
490 493 repo.ui.debug("couldn't write revision branch cache: %s\n" %
491 494 inst)
492 495 return
493 496 self._rbcrevslen = revs
General Comments 0
You need to be logged in to leave comments. Login now