##// END OF EJS Templates
tags: silence hgtagsfnodes reading failures...
Matt Mackall -
r29039:e3055b46 stable
parent child Browse files
Show More
@@ -1,568 +1,571 b''
1 1 # tags.py - read tag info from local repository
2 2 #
3 3 # Copyright 2009 Matt Mackall <mpm@selenic.com>
4 4 # Copyright 2009 Greg Ward <greg@gerg.ca>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 # Currently this module only deals with reading and caching tags.
10 10 # Eventually, it could take care of updating (adding/removing/moving)
11 11 # tags too.
12 12
13 13 from __future__ import absolute_import
14 14
15 15 import array
16 16 import errno
17 17 import time
18 18
19 19 from .node import (
20 20 bin,
21 21 hex,
22 22 nullid,
23 23 short,
24 24 )
25 25 from . import (
26 26 encoding,
27 27 error,
28 28 util,
29 29 )
30 30
31 31 array = array.array
32 32
33 33 # Tags computation can be expensive and caches exist to make it fast in
34 34 # the common case.
35 35 #
36 36 # The "hgtagsfnodes1" cache file caches the .hgtags filenode values for
37 37 # each revision in the repository. The file is effectively an array of
38 38 # fixed length records. Read the docs for "hgtagsfnodescache" for technical
39 39 # details.
40 40 #
41 41 # The .hgtags filenode cache grows in proportion to the length of the
42 42 # changelog. The file is truncated when the # changelog is stripped.
43 43 #
44 44 # The purpose of the filenode cache is to avoid the most expensive part
45 45 # of finding global tags, which is looking up the .hgtags filenode in the
46 46 # manifest for each head. This can take dozens or over 100ms for
47 47 # repositories with very large manifests. Multiplied by dozens or even
48 48 # hundreds of heads and there is a significant performance concern.
49 49 #
50 50 # There also exist a separate cache file for each repository filter.
51 51 # These "tags-*" files store information about the history of tags.
52 52 #
53 53 # The tags cache files consists of a cache validation line followed by
54 54 # a history of tags.
55 55 #
56 56 # The cache validation line has the format:
57 57 #
58 58 # <tiprev> <tipnode> [<filteredhash>]
59 59 #
60 60 # <tiprev> is an integer revision and <tipnode> is a 40 character hex
61 61 # node for that changeset. These redundantly identify the repository
62 62 # tip from the time the cache was written. In addition, <filteredhash>,
63 63 # if present, is a 40 character hex hash of the contents of the filtered
64 64 # revisions for this filter. If the set of filtered revs changes, the
65 65 # hash will change and invalidate the cache.
66 66 #
67 67 # The history part of the tags cache consists of lines of the form:
68 68 #
69 69 # <node> <tag>
70 70 #
71 71 # (This format is identical to that of .hgtags files.)
72 72 #
73 73 # <tag> is the tag name and <node> is the 40 character hex changeset
74 74 # the tag is associated with.
75 75 #
76 76 # Tags are written sorted by tag name.
77 77 #
78 78 # Tags associated with multiple changesets have an entry for each changeset.
79 79 # The most recent changeset (in terms of revlog ordering for the head
80 80 # setting it) for each tag is last.
81 81
82 82 def findglobaltags(ui, repo, alltags, tagtypes):
83 83 '''Find global tags in a repo.
84 84
85 85 "alltags" maps tag name to (node, hist) 2-tuples.
86 86
87 87 "tagtypes" maps tag name to tag type. Global tags always have the
88 88 "global" tag type.
89 89
90 90 The "alltags" and "tagtypes" dicts are updated in place. Empty dicts
91 91 should be passed in.
92 92
93 93 The tags cache is read and updated as a side-effect of calling.
94 94 '''
95 95 # This is so we can be lazy and assume alltags contains only global
96 96 # tags when we pass it to _writetagcache().
97 97 assert len(alltags) == len(tagtypes) == 0, \
98 98 "findglobaltags() should be called first"
99 99
100 100 (heads, tagfnode, valid, cachetags, shouldwrite) = _readtagcache(ui, repo)
101 101 if cachetags is not None:
102 102 assert not shouldwrite
103 103 # XXX is this really 100% correct? are there oddball special
104 104 # cases where a global tag should outrank a local tag but won't,
105 105 # because cachetags does not contain rank info?
106 106 _updatetags(cachetags, 'global', alltags, tagtypes)
107 107 return
108 108
109 109 seen = set() # set of fnode
110 110 fctx = None
111 111 for head in reversed(heads): # oldest to newest
112 112 assert head in repo.changelog.nodemap, \
113 113 "tag cache returned bogus head %s" % short(head)
114 114
115 115 fnode = tagfnode.get(head)
116 116 if fnode and fnode not in seen:
117 117 seen.add(fnode)
118 118 if not fctx:
119 119 fctx = repo.filectx('.hgtags', fileid=fnode)
120 120 else:
121 121 fctx = fctx.filectx(fnode)
122 122
123 123 filetags = _readtags(ui, repo, fctx.data().splitlines(), fctx)
124 124 _updatetags(filetags, 'global', alltags, tagtypes)
125 125
126 126 # and update the cache (if necessary)
127 127 if shouldwrite:
128 128 _writetagcache(ui, repo, valid, alltags)
129 129
130 130 def readlocaltags(ui, repo, alltags, tagtypes):
131 131 '''Read local tags in repo. Update alltags and tagtypes.'''
132 132 try:
133 133 data = repo.vfs.read("localtags")
134 134 except IOError as inst:
135 135 if inst.errno != errno.ENOENT:
136 136 raise
137 137 return
138 138
139 139 # localtags is in the local encoding; re-encode to UTF-8 on
140 140 # input for consistency with the rest of this module.
141 141 filetags = _readtags(
142 142 ui, repo, data.splitlines(), "localtags",
143 143 recode=encoding.fromlocal)
144 144
145 145 # remove tags pointing to invalid nodes
146 146 cl = repo.changelog
147 147 for t in filetags.keys():
148 148 try:
149 149 cl.rev(filetags[t][0])
150 150 except (LookupError, ValueError):
151 151 del filetags[t]
152 152
153 153 _updatetags(filetags, "local", alltags, tagtypes)
154 154
155 155 def _readtaghist(ui, repo, lines, fn, recode=None, calcnodelines=False):
156 156 '''Read tag definitions from a file (or any source of lines).
157 157
158 158 This function returns two sortdicts with similar information:
159 159
160 160 - the first dict, bintaghist, contains the tag information as expected by
161 161 the _readtags function, i.e. a mapping from tag name to (node, hist):
162 162 - node is the node id from the last line read for that name,
163 163 - hist is the list of node ids previously associated with it (in file
164 164 order). All node ids are binary, not hex.
165 165
166 166 - the second dict, hextaglines, is a mapping from tag name to a list of
167 167 [hexnode, line number] pairs, ordered from the oldest to the newest node.
168 168
169 169 When calcnodelines is False the hextaglines dict is not calculated (an
170 170 empty dict is returned). This is done to improve this function's
171 171 performance in cases where the line numbers are not needed.
172 172 '''
173 173
174 174 bintaghist = util.sortdict()
175 175 hextaglines = util.sortdict()
176 176 count = 0
177 177
178 178 def dbg(msg):
179 179 ui.debug("%s, line %s: %s\n" % (fn, count, msg))
180 180
181 181 for nline, line in enumerate(lines):
182 182 count += 1
183 183 if not line:
184 184 continue
185 185 try:
186 186 (nodehex, name) = line.split(" ", 1)
187 187 except ValueError:
188 188 dbg("cannot parse entry")
189 189 continue
190 190 name = name.strip()
191 191 if recode:
192 192 name = recode(name)
193 193 try:
194 194 nodebin = bin(nodehex)
195 195 except TypeError:
196 196 dbg("node '%s' is not well formed" % nodehex)
197 197 continue
198 198
199 199 # update filetags
200 200 if calcnodelines:
201 201 # map tag name to a list of line numbers
202 202 if name not in hextaglines:
203 203 hextaglines[name] = []
204 204 hextaglines[name].append([nodehex, nline])
205 205 continue
206 206 # map tag name to (node, hist)
207 207 if name not in bintaghist:
208 208 bintaghist[name] = []
209 209 bintaghist[name].append(nodebin)
210 210 return bintaghist, hextaglines
211 211
212 212 def _readtags(ui, repo, lines, fn, recode=None, calcnodelines=False):
213 213 '''Read tag definitions from a file (or any source of lines).
214 214
215 215 Returns a mapping from tag name to (node, hist).
216 216
217 217 "node" is the node id from the last line read for that name. "hist"
218 218 is the list of node ids previously associated with it (in file order).
219 219 All node ids are binary, not hex.
220 220 '''
221 221 filetags, nodelines = _readtaghist(ui, repo, lines, fn, recode=recode,
222 222 calcnodelines=calcnodelines)
223 223 # util.sortdict().__setitem__ is much slower at replacing then inserting
224 224 # new entries. The difference can matter if there are thousands of tags.
225 225 # Create a new sortdict to avoid the performance penalty.
226 226 newtags = util.sortdict()
227 227 for tag, taghist in filetags.items():
228 228 newtags[tag] = (taghist[-1], taghist[:-1])
229 229 return newtags
230 230
231 231 def _updatetags(filetags, tagtype, alltags, tagtypes):
232 232 '''Incorporate the tag info read from one file into the two
233 233 dictionaries, alltags and tagtypes, that contain all tag
234 234 info (global across all heads plus local).'''
235 235
236 236 for name, nodehist in filetags.iteritems():
237 237 if name not in alltags:
238 238 alltags[name] = nodehist
239 239 tagtypes[name] = tagtype
240 240 continue
241 241
242 242 # we prefer alltags[name] if:
243 243 # it supersedes us OR
244 244 # mutual supersedes and it has a higher rank
245 245 # otherwise we win because we're tip-most
246 246 anode, ahist = nodehist
247 247 bnode, bhist = alltags[name]
248 248 if (bnode != anode and anode in bhist and
249 249 (bnode not in ahist or len(bhist) > len(ahist))):
250 250 anode = bnode
251 251 else:
252 252 tagtypes[name] = tagtype
253 253 ahist.extend([n for n in bhist if n not in ahist])
254 254 alltags[name] = anode, ahist
255 255
256 256 def _filename(repo):
257 257 """name of a tagcache file for a given repo or repoview"""
258 258 filename = 'cache/tags2'
259 259 if repo.filtername:
260 260 filename = '%s-%s' % (filename, repo.filtername)
261 261 return filename
262 262
263 263 def _readtagcache(ui, repo):
264 264 '''Read the tag cache.
265 265
266 266 Returns a tuple (heads, fnodes, validinfo, cachetags, shouldwrite).
267 267
268 268 If the cache is completely up-to-date, "cachetags" is a dict of the
269 269 form returned by _readtags() and "heads", "fnodes", and "validinfo" are
270 270 None and "shouldwrite" is False.
271 271
272 272 If the cache is not up to date, "cachetags" is None. "heads" is a list
273 273 of all heads currently in the repository, ordered from tip to oldest.
274 274 "validinfo" is a tuple describing cache validation info. This is used
275 275 when writing the tags cache. "fnodes" is a mapping from head to .hgtags
276 276 filenode. "shouldwrite" is True.
277 277
278 278 If the cache is not up to date, the caller is responsible for reading tag
279 279 info from each returned head. (See findglobaltags().)
280 280 '''
281 281 from . import scmutil # avoid cycle
282 282
283 283 try:
284 284 cachefile = repo.vfs(_filename(repo), 'r')
285 285 # force reading the file for static-http
286 286 cachelines = iter(cachefile)
287 287 except IOError:
288 288 cachefile = None
289 289
290 290 cacherev = None
291 291 cachenode = None
292 292 cachehash = None
293 293 if cachefile:
294 294 try:
295 295 validline = cachelines.next()
296 296 validline = validline.split()
297 297 cacherev = int(validline[0])
298 298 cachenode = bin(validline[1])
299 299 if len(validline) > 2:
300 300 cachehash = bin(validline[2])
301 301 except Exception:
302 302 # corruption of the cache, just recompute it.
303 303 pass
304 304
305 305 tipnode = repo.changelog.tip()
306 306 tiprev = len(repo.changelog) - 1
307 307
308 308 # Case 1 (common): tip is the same, so nothing has changed.
309 309 # (Unchanged tip trivially means no changesets have been added.
310 310 # But, thanks to localrepository.destroyed(), it also means none
311 311 # have been destroyed by strip or rollback.)
312 312 if (cacherev == tiprev
313 313 and cachenode == tipnode
314 314 and cachehash == scmutil.filteredhash(repo, tiprev)):
315 315 tags = _readtags(ui, repo, cachelines, cachefile.name)
316 316 cachefile.close()
317 317 return (None, None, None, tags, False)
318 318 if cachefile:
319 319 cachefile.close() # ignore rest of file
320 320
321 321 valid = (tiprev, tipnode, scmutil.filteredhash(repo, tiprev))
322 322
323 323 repoheads = repo.heads()
324 324 # Case 2 (uncommon): empty repo; get out quickly and don't bother
325 325 # writing an empty cache.
326 326 if repoheads == [nullid]:
327 327 return ([], {}, valid, {}, False)
328 328
329 329 # Case 3 (uncommon): cache file missing or empty.
330 330
331 331 # Case 4 (uncommon): tip rev decreased. This should only happen
332 332 # when we're called from localrepository.destroyed(). Refresh the
333 333 # cache so future invocations will not see disappeared heads in the
334 334 # cache.
335 335
336 336 # Case 5 (common): tip has changed, so we've added/replaced heads.
337 337
338 338 # As it happens, the code to handle cases 3, 4, 5 is the same.
339 339
340 340 # N.B. in case 4 (nodes destroyed), "new head" really means "newly
341 341 # exposed".
342 342 if not len(repo.file('.hgtags')):
343 343 # No tags have ever been committed, so we can avoid a
344 344 # potentially expensive search.
345 345 return ([], {}, valid, None, True)
346 346
347 347 starttime = time.time()
348 348
349 349 # Now we have to lookup the .hgtags filenode for every new head.
350 350 # This is the most expensive part of finding tags, so performance
351 351 # depends primarily on the size of newheads. Worst case: no cache
352 352 # file, so newheads == repoheads.
353 353 fnodescache = hgtagsfnodescache(repo.unfiltered())
354 354 cachefnode = {}
355 355 for head in reversed(repoheads):
356 356 fnode = fnodescache.getfnode(head)
357 357 if fnode != nullid:
358 358 cachefnode[head] = fnode
359 359
360 360 fnodescache.write()
361 361
362 362 duration = time.time() - starttime
363 363 ui.log('tagscache',
364 364 '%d/%d cache hits/lookups in %0.4f '
365 365 'seconds\n',
366 366 fnodescache.hitcount, fnodescache.lookupcount, duration)
367 367
368 368 # Caller has to iterate over all heads, but can use the filenodes in
369 369 # cachefnode to get to each .hgtags revision quickly.
370 370 return (repoheads, cachefnode, valid, None, True)
371 371
372 372 def _writetagcache(ui, repo, valid, cachetags):
373 373 filename = _filename(repo)
374 374 try:
375 375 cachefile = repo.vfs(filename, 'w', atomictemp=True)
376 376 except (OSError, IOError):
377 377 return
378 378
379 379 ui.log('tagscache', 'writing .hg/%s with %d tags\n',
380 380 filename, len(cachetags))
381 381
382 382 if valid[2]:
383 383 cachefile.write('%d %s %s\n' % (valid[0], hex(valid[1]), hex(valid[2])))
384 384 else:
385 385 cachefile.write('%d %s\n' % (valid[0], hex(valid[1])))
386 386
387 387 # Tag names in the cache are in UTF-8 -- which is the whole reason
388 388 # we keep them in UTF-8 throughout this module. If we converted
389 389 # them local encoding on input, we would lose info writing them to
390 390 # the cache.
391 391 for (name, (node, hist)) in sorted(cachetags.iteritems()):
392 392 for n in hist:
393 393 cachefile.write("%s %s\n" % (hex(n), name))
394 394 cachefile.write("%s %s\n" % (hex(node), name))
395 395
396 396 try:
397 397 cachefile.close()
398 398 except (OSError, IOError):
399 399 pass
400 400
401 401 _fnodescachefile = 'cache/hgtagsfnodes1'
402 402 _fnodesrecsize = 4 + 20 # changeset fragment + filenode
403 403 _fnodesmissingrec = '\xff' * 24
404 404
405 405 class hgtagsfnodescache(object):
406 406 """Persistent cache mapping revisions to .hgtags filenodes.
407 407
408 408 The cache is an array of records. Each item in the array corresponds to
409 409 a changelog revision. Values in the array contain the first 4 bytes of
410 410 the node hash and the 20 bytes .hgtags filenode for that revision.
411 411
412 412 The first 4 bytes are present as a form of verification. Repository
413 413 stripping and rewriting may change the node at a numeric revision in the
414 414 changelog. The changeset fragment serves as a verifier to detect
415 415 rewriting. This logic is shared with the rev branch cache (see
416 416 branchmap.py).
417 417
418 418 The instance holds in memory the full cache content but entries are
419 419 only parsed on read.
420 420
421 421 Instances behave like lists. ``c[i]`` works where i is a rev or
422 422 changeset node. Missing indexes are populated automatically on access.
423 423 """
424 424 def __init__(self, repo):
425 425 assert repo.filtername is None
426 426
427 427 self._repo = repo
428 428
429 429 # Only for reporting purposes.
430 430 self.lookupcount = 0
431 431 self.hitcount = 0
432 432
433 433 self._raw = array('c')
434 434
435 data = repo.vfs.tryread(_fnodescachefile)
435 try:
436 data = repo.vfs.read(_fnodescachefile)
437 except (OSError, IOError):
438 data = ""
436 439 self._raw.fromstring(data)
437 440
438 441 # The end state of self._raw is an array that is of the exact length
439 442 # required to hold a record for every revision in the repository.
440 443 # We truncate or extend the array as necessary. self._dirtyoffset is
441 444 # defined to be the start offset at which we need to write the output
442 445 # file. This offset is also adjusted when new entries are calculated
443 446 # for array members.
444 447 cllen = len(repo.changelog)
445 448 wantedlen = cllen * _fnodesrecsize
446 449 rawlen = len(self._raw)
447 450
448 451 self._dirtyoffset = None
449 452
450 453 if rawlen < wantedlen:
451 454 self._dirtyoffset = rawlen
452 455 self._raw.extend('\xff' * (wantedlen - rawlen))
453 456 elif rawlen > wantedlen:
454 457 # There's no easy way to truncate array instances. This seems
455 458 # slightly less evil than copying a potentially large array slice.
456 459 for i in range(rawlen - wantedlen):
457 460 self._raw.pop()
458 461 self._dirtyoffset = len(self._raw)
459 462
460 463 def getfnode(self, node, computemissing=True):
461 464 """Obtain the filenode of the .hgtags file at a specified revision.
462 465
463 466 If the value is in the cache, the entry will be validated and returned.
464 467 Otherwise, the filenode will be computed and returned unless
465 468 "computemissing" is False, in which case None will be returned without
466 469 any potentially expensive computation being performed.
467 470
468 471 If an .hgtags does not exist at the specified revision, nullid is
469 472 returned.
470 473 """
471 474 ctx = self._repo[node]
472 475 rev = ctx.rev()
473 476
474 477 self.lookupcount += 1
475 478
476 479 offset = rev * _fnodesrecsize
477 480 record = self._raw[offset:offset + _fnodesrecsize].tostring()
478 481 properprefix = node[0:4]
479 482
480 483 # Validate and return existing entry.
481 484 if record != _fnodesmissingrec:
482 485 fileprefix = record[0:4]
483 486
484 487 if fileprefix == properprefix:
485 488 self.hitcount += 1
486 489 return record[4:]
487 490
488 491 # Fall through.
489 492
490 493 # If we get here, the entry is either missing or invalid.
491 494
492 495 if not computemissing:
493 496 return None
494 497
495 498 # Populate missing entry.
496 499 try:
497 500 fnode = ctx.filenode('.hgtags')
498 501 except error.LookupError:
499 502 # No .hgtags file on this revision.
500 503 fnode = nullid
501 504
502 505 self._writeentry(offset, properprefix, fnode)
503 506 return fnode
504 507
505 508 def setfnode(self, node, fnode):
506 509 """Set the .hgtags filenode for a given changeset."""
507 510 assert len(fnode) == 20
508 511 ctx = self._repo[node]
509 512
510 513 # Do a lookup first to avoid writing if nothing has changed.
511 514 if self.getfnode(ctx.node(), computemissing=False) == fnode:
512 515 return
513 516
514 517 self._writeentry(ctx.rev() * _fnodesrecsize, node[0:4], fnode)
515 518
516 519 def _writeentry(self, offset, prefix, fnode):
517 520 # Slices on array instances only accept other array.
518 521 entry = array('c', prefix + fnode)
519 522 self._raw[offset:offset + _fnodesrecsize] = entry
520 523 # self._dirtyoffset could be None.
521 524 self._dirtyoffset = min(self._dirtyoffset, offset) or 0
522 525
523 526 def write(self):
524 527 """Perform all necessary writes to cache file.
525 528
526 529 This may no-op if no writes are needed or if a write lock could
527 530 not be obtained.
528 531 """
529 532 if self._dirtyoffset is None:
530 533 return
531 534
532 535 data = self._raw[self._dirtyoffset:]
533 536 if not data:
534 537 return
535 538
536 539 repo = self._repo
537 540
538 541 try:
539 542 lock = repo.wlock(wait=False)
540 543 except error.LockError:
541 544 repo.ui.log('tagscache',
542 545 'not writing .hg/%s because lock cannot be acquired\n' %
543 546 (_fnodescachefile))
544 547 return
545 548
546 549 try:
547 550 f = repo.vfs.open(_fnodescachefile, 'ab')
548 551 try:
549 552 # if the file has been truncated
550 553 actualoffset = f.tell()
551 554 if actualoffset < self._dirtyoffset:
552 555 self._dirtyoffset = actualoffset
553 556 data = self._raw[self._dirtyoffset:]
554 557 f.seek(self._dirtyoffset)
555 558 f.truncate()
556 559 repo.ui.log('tagscache',
557 560 'writing %d bytes to %s\n' % (
558 561 len(data), _fnodescachefile))
559 562 f.write(data)
560 563 self._dirtyoffset = None
561 564 finally:
562 565 f.close()
563 566 except (IOError, OSError) as inst:
564 567 repo.ui.log('tagscache',
565 568 "couldn't write %s: %s\n" % (
566 569 _fnodescachefile, inst))
567 570 finally:
568 571 lock.release()
General Comments 0
You need to be logged in to leave comments. Login now