##// END OF EJS Templates
branchmap: add seek() to end of file before calling tell() on append open()...
Matt Harbison -
r23819:6bf93440 default
parent child Browse files
Show More
@@ -1,443 +1,449
1 1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from node import bin, hex, nullid, nullrev
9 9 import encoding
10 10 import util
11 11 import time
12 12 from array import array
13 13 from struct import calcsize, pack, unpack
14 14
15 15 def _filename(repo):
16 16 """name of a branchcache file for a given repo or repoview"""
17 17 filename = "cache/branch2"
18 18 if repo.filtername:
19 19 filename = '%s-%s' % (filename, repo.filtername)
20 20 return filename
21 21
22 22 def read(repo):
23 23 try:
24 24 f = repo.opener(_filename(repo))
25 25 lines = f.read().split('\n')
26 26 f.close()
27 27 except (IOError, OSError):
28 28 return None
29 29
30 30 try:
31 31 cachekey = lines.pop(0).split(" ", 2)
32 32 last, lrev = cachekey[:2]
33 33 last, lrev = bin(last), int(lrev)
34 34 filteredhash = None
35 35 if len(cachekey) > 2:
36 36 filteredhash = bin(cachekey[2])
37 37 partial = branchcache(tipnode=last, tiprev=lrev,
38 38 filteredhash=filteredhash)
39 39 if not partial.validfor(repo):
40 40 # invalidate the cache
41 41 raise ValueError('tip differs')
42 42 for l in lines:
43 43 if not l:
44 44 continue
45 45 node, state, label = l.split(" ", 2)
46 46 if state not in 'oc':
47 47 raise ValueError('invalid branch state')
48 48 label = encoding.tolocal(label.strip())
49 49 if not node in repo:
50 50 raise ValueError('node %s does not exist' % node)
51 51 node = bin(node)
52 52 partial.setdefault(label, []).append(node)
53 53 if state == 'c':
54 54 partial._closednodes.add(node)
55 55 except KeyboardInterrupt:
56 56 raise
57 57 except Exception, inst:
58 58 if repo.ui.debugflag:
59 59 msg = 'invalid branchheads cache'
60 60 if repo.filtername is not None:
61 61 msg += ' (%s)' % repo.filtername
62 62 msg += ': %s\n'
63 63 repo.ui.debug(msg % inst)
64 64 partial = None
65 65 return partial
66 66
67 67 ### Nearest subset relation
68 68 # Nearest subset of filter X is a filter Y so that:
69 69 # * Y is included in X,
70 70 # * X - Y is as small as possible.
71 71 # This create and ordering used for branchmap purpose.
72 72 # the ordering may be partial
73 73 subsettable = {None: 'visible',
74 74 'visible': 'served',
75 75 'served': 'immutable',
76 76 'immutable': 'base'}
77 77
78 78 def updatecache(repo):
79 79 cl = repo.changelog
80 80 filtername = repo.filtername
81 81 partial = repo._branchcaches.get(filtername)
82 82
83 83 revs = []
84 84 if partial is None or not partial.validfor(repo):
85 85 partial = read(repo)
86 86 if partial is None:
87 87 subsetname = subsettable.get(filtername)
88 88 if subsetname is None:
89 89 partial = branchcache()
90 90 else:
91 91 subset = repo.filtered(subsetname)
92 92 partial = subset.branchmap().copy()
93 93 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
94 94 revs.extend(r for r in extrarevs if r <= partial.tiprev)
95 95 revs.extend(cl.revs(start=partial.tiprev + 1))
96 96 if revs:
97 97 partial.update(repo, revs)
98 98 partial.write(repo)
99 99 assert partial.validfor(repo), filtername
100 100 repo._branchcaches[repo.filtername] = partial
101 101
102 102 class branchcache(dict):
103 103 """A dict like object that hold branches heads cache.
104 104
105 105 This cache is used to avoid costly computations to determine all the
106 106 branch heads of a repo.
107 107
108 108 The cache is serialized on disk in the following format:
109 109
110 110 <tip hex node> <tip rev number> [optional filtered repo hex hash]
111 111 <branch head hex node> <open/closed state> <branch name>
112 112 <branch head hex node> <open/closed state> <branch name>
113 113 ...
114 114
115 115 The first line is used to check if the cache is still valid. If the
116 116 branch cache is for a filtered repo view, an optional third hash is
117 117 included that hashes the hashes of all filtered revisions.
118 118
119 119 The open/closed state is represented by a single letter 'o' or 'c'.
120 120 This field can be used to avoid changelog reads when determining if a
121 121 branch head closes a branch or not.
122 122 """
123 123
124 124 def __init__(self, entries=(), tipnode=nullid, tiprev=nullrev,
125 125 filteredhash=None, closednodes=None):
126 126 super(branchcache, self).__init__(entries)
127 127 self.tipnode = tipnode
128 128 self.tiprev = tiprev
129 129 self.filteredhash = filteredhash
130 130 # closednodes is a set of nodes that close their branch. If the branch
131 131 # cache has been updated, it may contain nodes that are no longer
132 132 # heads.
133 133 if closednodes is None:
134 134 self._closednodes = set()
135 135 else:
136 136 self._closednodes = closednodes
137 137 self._revbranchcache = None
138 138
139 139 def _hashfiltered(self, repo):
140 140 """build hash of revision filtered in the current cache
141 141
142 142 Tracking tipnode and tiprev is not enough to ensure validity of the
143 143 cache as they do not help to distinct cache that ignored various
144 144 revision bellow tiprev.
145 145
146 146 To detect such difference, we build a cache of all ignored revisions.
147 147 """
148 148 cl = repo.changelog
149 149 if not cl.filteredrevs:
150 150 return None
151 151 key = None
152 152 revs = sorted(r for r in cl.filteredrevs if r <= self.tiprev)
153 153 if revs:
154 154 s = util.sha1()
155 155 for rev in revs:
156 156 s.update('%s;' % rev)
157 157 key = s.digest()
158 158 return key
159 159
160 160 def validfor(self, repo):
161 161 """Is the cache content valid regarding a repo
162 162
163 163 - False when cached tipnode is unknown or if we detect a strip.
164 164 - True when cache is up to date or a subset of current repo."""
165 165 try:
166 166 return ((self.tipnode == repo.changelog.node(self.tiprev))
167 167 and (self.filteredhash == self._hashfiltered(repo)))
168 168 except IndexError:
169 169 return False
170 170
171 171 def _branchtip(self, heads):
172 172 '''Return tuple with last open head in heads and false,
173 173 otherwise return last closed head and true.'''
174 174 tip = heads[-1]
175 175 closed = True
176 176 for h in reversed(heads):
177 177 if h not in self._closednodes:
178 178 tip = h
179 179 closed = False
180 180 break
181 181 return tip, closed
182 182
183 183 def branchtip(self, branch):
184 184 '''Return the tipmost open head on branch head, otherwise return the
185 185 tipmost closed head on branch.
186 186 Raise KeyError for unknown branch.'''
187 187 return self._branchtip(self[branch])[0]
188 188
189 189 def branchheads(self, branch, closed=False):
190 190 heads = self[branch]
191 191 if not closed:
192 192 heads = [h for h in heads if h not in self._closednodes]
193 193 return heads
194 194
195 195 def iterbranches(self):
196 196 for bn, heads in self.iteritems():
197 197 yield (bn, heads) + self._branchtip(heads)
198 198
199 199 def copy(self):
200 200 """return an deep copy of the branchcache object"""
201 201 return branchcache(self, self.tipnode, self.tiprev, self.filteredhash,
202 202 self._closednodes)
203 203
204 204 def write(self, repo):
205 205 try:
206 206 f = repo.opener(_filename(repo), "w", atomictemp=True)
207 207 cachekey = [hex(self.tipnode), str(self.tiprev)]
208 208 if self.filteredhash is not None:
209 209 cachekey.append(hex(self.filteredhash))
210 210 f.write(" ".join(cachekey) + '\n')
211 211 nodecount = 0
212 212 for label, nodes in sorted(self.iteritems()):
213 213 for node in nodes:
214 214 nodecount += 1
215 215 if node in self._closednodes:
216 216 state = 'c'
217 217 else:
218 218 state = 'o'
219 219 f.write("%s %s %s\n" % (hex(node), state,
220 220 encoding.fromlocal(label)))
221 221 f.close()
222 222 repo.ui.log('branchcache',
223 223 'wrote %s branch cache with %d labels and %d nodes\n',
224 224 repo.filtername, len(self), nodecount)
225 225 except (IOError, OSError, util.Abort), inst:
226 226 repo.ui.debug("couldn't write branch cache: %s\n" % inst)
227 227 # Abort may be raise by read only opener
228 228 pass
229 229 if self._revbranchcache:
230 230 self._revbranchcache.write(repo.unfiltered())
231 231 self._revbranchcache = None
232 232
233 233 def update(self, repo, revgen):
234 234 """Given a branchhead cache, self, that may have extra nodes or be
235 235 missing heads, and a generator of nodes that are strictly a superset of
236 236 heads missing, this function updates self to be correct.
237 237 """
238 238 starttime = time.time()
239 239 cl = repo.changelog
240 240 # collect new branch entries
241 241 newbranches = {}
242 242 urepo = repo.unfiltered()
243 243 self._revbranchcache = revbranchcache(urepo)
244 244 getbranchinfo = self._revbranchcache.branchinfo
245 245 ucl = urepo.changelog
246 246 for r in revgen:
247 247 branch, closesbranch = getbranchinfo(ucl, r)
248 248 newbranches.setdefault(branch, []).append(r)
249 249 if closesbranch:
250 250 self._closednodes.add(cl.node(r))
251 251
252 252 # fetch current topological heads to speed up filtering
253 253 topoheads = set(cl.headrevs())
254 254
255 255 # if older branchheads are reachable from new ones, they aren't
256 256 # really branchheads. Note checking parents is insufficient:
257 257 # 1 (branch a) -> 2 (branch b) -> 3 (branch a)
258 258 for branch, newheadrevs in newbranches.iteritems():
259 259 bheads = self.setdefault(branch, [])
260 260 bheadset = set(cl.rev(node) for node in bheads)
261 261
262 262 # This have been tested True on all internal usage of this function.
263 263 # run it again in case of doubt
264 264 # assert not (set(bheadrevs) & set(newheadrevs))
265 265 newheadrevs.sort()
266 266 bheadset.update(newheadrevs)
267 267
268 268 # This prunes out two kinds of heads - heads that are superseded by
269 269 # a head in newheadrevs, and newheadrevs that are not heads because
270 270 # an existing head is their descendant.
271 271 uncertain = bheadset - topoheads
272 272 if uncertain:
273 273 floorrev = min(uncertain)
274 274 ancestors = set(cl.ancestors(newheadrevs, floorrev))
275 275 bheadset -= ancestors
276 276 bheadrevs = sorted(bheadset)
277 277 self[branch] = [cl.node(rev) for rev in bheadrevs]
278 278 tiprev = bheadrevs[-1]
279 279 if tiprev > self.tiprev:
280 280 self.tipnode = cl.node(tiprev)
281 281 self.tiprev = tiprev
282 282
283 283 if not self.validfor(repo):
284 284 # cache key are not valid anymore
285 285 self.tipnode = nullid
286 286 self.tiprev = nullrev
287 287 for heads in self.values():
288 288 tiprev = max(cl.rev(node) for node in heads)
289 289 if tiprev > self.tiprev:
290 290 self.tipnode = cl.node(tiprev)
291 291 self.tiprev = tiprev
292 292 self.filteredhash = self._hashfiltered(repo)
293 293
294 294 duration = time.time() - starttime
295 295 repo.ui.log('branchcache', 'updated %s branch cache in %.4f seconds\n',
296 296 repo.filtername, duration)
297 297
298 298 # Revision branch info cache
299 299
300 300 _rbcversion = '-v1'
301 301 _rbcnames = 'cache/rbc-names' + _rbcversion
302 302 _rbcrevs = 'cache/rbc-revs' + _rbcversion
303 303 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
304 304 _rbcrecfmt = '>4sI'
305 305 _rbcrecsize = calcsize(_rbcrecfmt)
306 306 _rbcnodelen = 4
307 307 _rbcbranchidxmask = 0x7fffffff
308 308 _rbccloseflag = 0x80000000
309 309
310 310 class revbranchcache(object):
311 311 """Persistent cache, mapping from revision number to branch name and close.
312 312 This is a low level cache, independent of filtering.
313 313
314 314 Branch names are stored in rbc-names in internal encoding separated by 0.
315 315 rbc-names is append-only, and each branch name is only stored once and will
316 316 thus have a unique index.
317 317
318 318 The branch info for each revision is stored in rbc-revs as constant size
319 319 records. The whole file is read into memory, but it is only 'parsed' on
320 320 demand. The file is usually append-only but will be truncated if repo
321 321 modification is detected.
322 322 The record for each revision contains the first 4 bytes of the
323 323 corresponding node hash, and the record is only used if it still matches.
324 324 Even a completely trashed rbc-revs fill thus still give the right result
325 325 while converging towards full recovery ... assuming no incorrectly matching
326 326 node hashes.
327 327 The record also contains 4 bytes where 31 bits contains the index of the
328 328 branch and the last bit indicate that it is a branch close commit.
329 329 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
330 330 and will grow with it but be 1/8th of its size.
331 331 """
332 332
333 333 def __init__(self, repo):
334 334 assert repo.filtername is None
335 335 self._names = [] # branch names in local encoding with static index
336 336 self._rbcrevs = array('c') # structs of type _rbcrecfmt
337 337 self._rbcsnameslen = 0
338 338 try:
339 339 bndata = repo.vfs.read(_rbcnames)
340 340 self._rbcsnameslen = len(bndata) # for verification before writing
341 341 self._names = [encoding.tolocal(bn) for bn in bndata.split('\0')]
342 342 except (IOError, OSError), inst:
343 343 repo.ui.debug("couldn't read revision branch cache names: %s\n" %
344 344 inst)
345 345 if self._names:
346 346 try:
347 347 data = repo.vfs.read(_rbcrevs)
348 348 self._rbcrevs.fromstring(data)
349 349 except (IOError, OSError), inst:
350 350 repo.ui.debug("couldn't read revision branch cache: %s\n" %
351 351 inst)
352 352 # remember number of good records on disk
353 353 self._rbcrevslen = min(len(self._rbcrevs) // _rbcrecsize,
354 354 len(repo.changelog))
355 355 if self._rbcrevslen == 0:
356 356 self._names = []
357 357 self._rbcnamescount = len(self._names) # number of good names on disk
358 358 self._namesreverse = dict((b, r) for r, b in enumerate(self._names))
359 359
360 360 def branchinfo(self, changelog, rev):
361 361 """Return branch name and close flag for rev, using and updating
362 362 persistent cache."""
363 363 rbcrevidx = rev * _rbcrecsize
364 364
365 365 # if requested rev is missing, add and populate all missing revs
366 366 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
367 367 first = len(self._rbcrevs) // _rbcrecsize
368 368 self._rbcrevs.extend('\0' * (len(changelog) * _rbcrecsize -
369 369 len(self._rbcrevs)))
370 370 for r in xrange(first, len(changelog)):
371 371 self._branchinfo(changelog, r)
372 372
373 373 # fast path: extract data from cache, use it if node is matching
374 374 reponode = changelog.node(rev)[:_rbcnodelen]
375 375 cachenode, branchidx = unpack(
376 376 _rbcrecfmt, buffer(self._rbcrevs, rbcrevidx, _rbcrecsize))
377 377 close = bool(branchidx & _rbccloseflag)
378 378 if close:
379 379 branchidx &= _rbcbranchidxmask
380 380 if cachenode == reponode:
381 381 return self._names[branchidx], close
382 382 # fall back to slow path and make sure it will be written to disk
383 383 self._rbcrevslen = min(self._rbcrevslen, rev)
384 384 return self._branchinfo(changelog, rev)
385 385
386 386 def _branchinfo(self, changelog, rev):
387 387 """Retrieve branch info from changelog and update _rbcrevs"""
388 388 b, close = changelog.branchinfo(rev)
389 389 if b in self._namesreverse:
390 390 branchidx = self._namesreverse[b]
391 391 else:
392 392 branchidx = len(self._names)
393 393 self._names.append(b)
394 394 self._namesreverse[b] = branchidx
395 395 reponode = changelog.node(rev)
396 396 if close:
397 397 branchidx |= _rbccloseflag
398 398 rbcrevidx = rev * _rbcrecsize
399 399 rec = array('c')
400 400 rec.fromstring(pack(_rbcrecfmt, reponode, branchidx))
401 401 self._rbcrevs[rbcrevidx:rbcrevidx + _rbcrecsize] = rec
402 402 return b, close
403 403
404 404 def write(self, repo):
405 405 """Save branch cache if it is dirty."""
406 406 if self._rbcnamescount < len(self._names):
407 407 try:
408 408 if self._rbcnamescount != 0:
409 409 f = repo.vfs.open(_rbcnames, 'ab')
410 # The position after open(x, 'a') is implementation defined-
411 # see issue3543. SEEK_END was added in 2.5
412 f.seek(0, 2) #os.SEEK_END
410 413 if f.tell() == self._rbcsnameslen:
411 414 f.write('\0')
412 415 else:
413 416 f.close()
414 417 self._rbcnamescount = 0
415 418 self._rbcrevslen = 0
416 419 if self._rbcnamescount == 0:
417 420 f = repo.vfs.open(_rbcnames, 'wb')
418 421 f.write('\0'.join(encoding.fromlocal(b)
419 422 for b in self._names[self._rbcnamescount:]))
420 423 self._rbcsnameslen = f.tell()
421 424 f.close()
422 425 except (IOError, OSError, util.Abort), inst:
423 426 repo.ui.debug("couldn't write revision branch cache names: "
424 427 "%s\n" % inst)
425 428 return
426 429 self._rbcnamescount = len(self._names)
427 430
428 431 start = self._rbcrevslen * _rbcrecsize
429 432 if start != len(self._rbcrevs):
430 433 self._rbcrevslen = min(len(repo.changelog),
431 434 len(self._rbcrevs) // _rbcrecsize)
432 435 try:
433 436 f = repo.vfs.open(_rbcrevs, 'ab')
437 # The position after open(x, 'a') is implementation defined-
438 # see issue3543. SEEK_END was added in 2.5
439 f.seek(0, 2) #os.SEEK_END
434 440 if f.tell() != start:
435 441 f.seek(start)
436 442 f.truncate()
437 443 end = self._rbcrevslen * _rbcrecsize
438 444 f.write(self._rbcrevs[start:end])
439 445 f.close()
440 446 except (IOError, OSError, util.Abort), inst:
441 447 repo.ui.debug("couldn't write revision branch cache: %s\n" %
442 448 inst)
443 449 return
General Comments 0
You need to be logged in to leave comments. Login now