##// END OF EJS Templates
tags: support setting hgtags fnodes cache entries...
Gregory Szorc -
r25381:47edeff1 default
parent child Browse files
Show More
@@ -1,540 +1,553 b''
1 1 # tags.py - read tag info from local repository
2 2 #
3 3 # Copyright 2009 Matt Mackall <mpm@selenic.com>
4 4 # Copyright 2009 Greg Ward <greg@gerg.ca>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 # Currently this module only deals with reading and caching tags.
10 10 # Eventually, it could take care of updating (adding/removing/moving)
11 11 # tags too.
12 12
13 13 from node import nullid, bin, hex, short
14 14 from i18n import _
15 15 import util
16 16 import encoding
17 17 import error
18 18 from array import array
19 19 import errno
20 20 import time
21 21
22 22 # Tags computation can be expensive and caches exist to make it fast in
23 23 # the common case.
24 24 #
25 25 # The "hgtagsfnodes1" cache file caches the .hgtags filenode values for
26 26 # each revision in the repository. The file is effectively an array of
27 27 # fixed length records. Read the docs for "hgtagsfnodescache" for technical
28 28 # details.
29 29 #
30 30 # The .hgtags filenode cache grows in proportion to the length of the
31 31 # changelog. The file is truncated when the # changelog is stripped.
32 32 #
33 33 # The purpose of the filenode cache is to avoid the most expensive part
34 34 # of finding global tags, which is looking up the .hgtags filenode in the
35 35 # manifest for each head. This can take dozens or over 100ms for
36 36 # repositories with very large manifests. Multiplied by dozens or even
37 37 # hundreds of heads and there is a significant performance concern.
38 38 #
39 39 # There also exist a separate cache file for each repository filter.
40 40 # These "tags-*" files store information about the history of tags.
41 41 #
42 42 # The tags cache files consists of a cache validation line followed by
43 43 # a history of tags.
44 44 #
45 45 # The cache validation line has the format:
46 46 #
47 47 # <tiprev> <tipnode> [<filteredhash>]
48 48 #
49 49 # <tiprev> is an integer revision and <tipnode> is a 40 character hex
50 50 # node for that changeset. These redundantly identify the repository
51 51 # tip from the time the cache was written. In addition, <filteredhash>,
52 52 # if present, is a 40 character hex hash of the contents of the filtered
53 53 # revisions for this filter. If the set of filtered revs changes, the
54 54 # hash will change and invalidate the cache.
55 55 #
56 56 # The history part of the tags cache consists of lines of the form:
57 57 #
58 58 # <node> <tag>
59 59 #
60 60 # (This format is identical to that of .hgtags files.)
61 61 #
62 62 # <tag> is the tag name and <node> is the 40 character hex changeset
63 63 # the tag is associated with.
64 64 #
65 65 # Tags are written sorted by tag name.
66 66 #
67 67 # Tags associated with multiple changesets have an entry for each changeset.
68 68 # The most recent changeset (in terms of revlog ordering for the head
69 69 # setting it) for each tag is last.
70 70
71 71 def findglobaltags(ui, repo, alltags, tagtypes):
72 72 '''Find global tags in a repo.
73 73
74 74 "alltags" maps tag name to (node, hist) 2-tuples.
75 75
76 76 "tagtypes" maps tag name to tag type. Global tags always have the
77 77 "global" tag type.
78 78
79 79 The "alltags" and "tagtypes" dicts are updated in place. Empty dicts
80 80 should be passed in.
81 81
82 82 The tags cache is read and updated as a side-effect of calling.
83 83 '''
84 84 # This is so we can be lazy and assume alltags contains only global
85 85 # tags when we pass it to _writetagcache().
86 86 assert len(alltags) == len(tagtypes) == 0, \
87 87 "findglobaltags() should be called first"
88 88
89 89 (heads, tagfnode, valid, cachetags, shouldwrite) = _readtagcache(ui, repo)
90 90 if cachetags is not None:
91 91 assert not shouldwrite
92 92 # XXX is this really 100% correct? are there oddball special
93 93 # cases where a global tag should outrank a local tag but won't,
94 94 # because cachetags does not contain rank info?
95 95 _updatetags(cachetags, 'global', alltags, tagtypes)
96 96 return
97 97
98 98 seen = set() # set of fnode
99 99 fctx = None
100 100 for head in reversed(heads): # oldest to newest
101 101 assert head in repo.changelog.nodemap, \
102 102 "tag cache returned bogus head %s" % short(head)
103 103
104 104 fnode = tagfnode.get(head)
105 105 if fnode and fnode not in seen:
106 106 seen.add(fnode)
107 107 if not fctx:
108 108 fctx = repo.filectx('.hgtags', fileid=fnode)
109 109 else:
110 110 fctx = fctx.filectx(fnode)
111 111
112 112 filetags = _readtags(ui, repo, fctx.data().splitlines(), fctx)
113 113 _updatetags(filetags, 'global', alltags, tagtypes)
114 114
115 115 # and update the cache (if necessary)
116 116 if shouldwrite:
117 117 _writetagcache(ui, repo, valid, alltags)
118 118
119 119 def readlocaltags(ui, repo, alltags, tagtypes):
120 120 '''Read local tags in repo. Update alltags and tagtypes.'''
121 121 try:
122 122 data = repo.vfs.read("localtags")
123 123 except IOError, inst:
124 124 if inst.errno != errno.ENOENT:
125 125 raise
126 126 return
127 127
128 128 # localtags is in the local encoding; re-encode to UTF-8 on
129 129 # input for consistency with the rest of this module.
130 130 filetags = _readtags(
131 131 ui, repo, data.splitlines(), "localtags",
132 132 recode=encoding.fromlocal)
133 133
134 134 # remove tags pointing to invalid nodes
135 135 cl = repo.changelog
136 136 for t in filetags.keys():
137 137 try:
138 138 cl.rev(filetags[t][0])
139 139 except (LookupError, ValueError):
140 140 del filetags[t]
141 141
142 142 _updatetags(filetags, "local", alltags, tagtypes)
143 143
144 144 def _readtaghist(ui, repo, lines, fn, recode=None, calcnodelines=False):
145 145 '''Read tag definitions from a file (or any source of lines).
146 146
147 147 This function returns two sortdicts with similar information:
148 148
149 149 - the first dict, bintaghist, contains the tag information as expected by
150 150 the _readtags function, i.e. a mapping from tag name to (node, hist):
151 151 - node is the node id from the last line read for that name,
152 152 - hist is the list of node ids previously associated with it (in file
153 153 order). All node ids are binary, not hex.
154 154
155 155 - the second dict, hextaglines, is a mapping from tag name to a list of
156 156 [hexnode, line number] pairs, ordered from the oldest to the newest node.
157 157
158 158 When calcnodelines is False the hextaglines dict is not calculated (an
159 159 empty dict is returned). This is done to improve this function's
160 160 performance in cases where the line numbers are not needed.
161 161 '''
162 162
163 163 bintaghist = util.sortdict()
164 164 hextaglines = util.sortdict()
165 165 count = 0
166 166
167 167 def warn(msg):
168 168 ui.warn(_("%s, line %s: %s\n") % (fn, count, msg))
169 169
170 170 for nline, line in enumerate(lines):
171 171 count += 1
172 172 if not line:
173 173 continue
174 174 try:
175 175 (nodehex, name) = line.split(" ", 1)
176 176 except ValueError:
177 177 warn(_("cannot parse entry"))
178 178 continue
179 179 name = name.strip()
180 180 if recode:
181 181 name = recode(name)
182 182 try:
183 183 nodebin = bin(nodehex)
184 184 except TypeError:
185 185 warn(_("node '%s' is not well formed") % nodehex)
186 186 continue
187 187
188 188 # update filetags
189 189 if calcnodelines:
190 190 # map tag name to a list of line numbers
191 191 if name not in hextaglines:
192 192 hextaglines[name] = []
193 193 hextaglines[name].append([nodehex, nline])
194 194 continue
195 195 # map tag name to (node, hist)
196 196 if name not in bintaghist:
197 197 bintaghist[name] = []
198 198 bintaghist[name].append(nodebin)
199 199 return bintaghist, hextaglines
200 200
201 201 def _readtags(ui, repo, lines, fn, recode=None, calcnodelines=False):
202 202 '''Read tag definitions from a file (or any source of lines).
203 203
204 204 Returns a mapping from tag name to (node, hist).
205 205
206 206 "node" is the node id from the last line read for that name. "hist"
207 207 is the list of node ids previously associated with it (in file order).
208 208 All node ids are binary, not hex.
209 209 '''
210 210 filetags, nodelines = _readtaghist(ui, repo, lines, fn, recode=recode,
211 211 calcnodelines=calcnodelines)
212 212 for tag, taghist in filetags.items():
213 213 filetags[tag] = (taghist[-1], taghist[:-1])
214 214 return filetags
215 215
216 216 def _updatetags(filetags, tagtype, alltags, tagtypes):
217 217 '''Incorporate the tag info read from one file into the two
218 218 dictionaries, alltags and tagtypes, that contain all tag
219 219 info (global across all heads plus local).'''
220 220
221 221 for name, nodehist in filetags.iteritems():
222 222 if name not in alltags:
223 223 alltags[name] = nodehist
224 224 tagtypes[name] = tagtype
225 225 continue
226 226
227 227 # we prefer alltags[name] if:
228 228 # it supersedes us OR
229 229 # mutual supersedes and it has a higher rank
230 230 # otherwise we win because we're tip-most
231 231 anode, ahist = nodehist
232 232 bnode, bhist = alltags[name]
233 233 if (bnode != anode and anode in bhist and
234 234 (bnode not in ahist or len(bhist) > len(ahist))):
235 235 anode = bnode
236 236 else:
237 237 tagtypes[name] = tagtype
238 238 ahist.extend([n for n in bhist if n not in ahist])
239 239 alltags[name] = anode, ahist
240 240
241 241 def _filename(repo):
242 242 """name of a tagcache file for a given repo or repoview"""
243 243 filename = 'cache/tags2'
244 244 if repo.filtername:
245 245 filename = '%s-%s' % (filename, repo.filtername)
246 246 return filename
247 247
248 248 def _readtagcache(ui, repo):
249 249 '''Read the tag cache.
250 250
251 251 Returns a tuple (heads, fnodes, validinfo, cachetags, shouldwrite).
252 252
253 253 If the cache is completely up-to-date, "cachetags" is a dict of the
254 254 form returned by _readtags() and "heads", "fnodes", and "validinfo" are
255 255 None and "shouldwrite" is False.
256 256
257 257 If the cache is not up to date, "cachetags" is None. "heads" is a list
258 258 of all heads currently in the repository, ordered from tip to oldest.
259 259 "validinfo" is a tuple describing cache validation info. This is used
260 260 when writing the tags cache. "fnodes" is a mapping from head to .hgtags
261 261 filenode. "shouldwrite" is True.
262 262
263 263 If the cache is not up to date, the caller is responsible for reading tag
264 264 info from each returned head. (See findglobaltags().)
265 265 '''
266 266 import scmutil # avoid cycle
267 267
268 268 try:
269 269 cachefile = repo.vfs(_filename(repo), 'r')
270 270 # force reading the file for static-http
271 271 cachelines = iter(cachefile)
272 272 except IOError:
273 273 cachefile = None
274 274
275 275 cacherev = None
276 276 cachenode = None
277 277 cachehash = None
278 278 if cachefile:
279 279 try:
280 280 validline = cachelines.next()
281 281 validline = validline.split()
282 282 cacherev = int(validline[0])
283 283 cachenode = bin(validline[1])
284 284 if len(validline) > 2:
285 285 cachehash = bin(validline[2])
286 286 except Exception:
287 287 # corruption of the cache, just recompute it.
288 288 pass
289 289
290 290 tipnode = repo.changelog.tip()
291 291 tiprev = len(repo.changelog) - 1
292 292
293 293 # Case 1 (common): tip is the same, so nothing has changed.
294 294 # (Unchanged tip trivially means no changesets have been added.
295 295 # But, thanks to localrepository.destroyed(), it also means none
296 296 # have been destroyed by strip or rollback.)
297 297 if (cacherev == tiprev
298 298 and cachenode == tipnode
299 299 and cachehash == scmutil.filteredhash(repo, tiprev)):
300 300 tags = _readtags(ui, repo, cachelines, cachefile.name)
301 301 cachefile.close()
302 302 return (None, None, None, tags, False)
303 303 if cachefile:
304 304 cachefile.close() # ignore rest of file
305 305
306 306 valid = (tiprev, tipnode, scmutil.filteredhash(repo, tiprev))
307 307
308 308 repoheads = repo.heads()
309 309 # Case 2 (uncommon): empty repo; get out quickly and don't bother
310 310 # writing an empty cache.
311 311 if repoheads == [nullid]:
312 312 return ([], {}, valid, {}, False)
313 313
314 314 # Case 3 (uncommon): cache file missing or empty.
315 315
316 316 # Case 4 (uncommon): tip rev decreased. This should only happen
317 317 # when we're called from localrepository.destroyed(). Refresh the
318 318 # cache so future invocations will not see disappeared heads in the
319 319 # cache.
320 320
321 321 # Case 5 (common): tip has changed, so we've added/replaced heads.
322 322
323 323 # As it happens, the code to handle cases 3, 4, 5 is the same.
324 324
325 325 # N.B. in case 4 (nodes destroyed), "new head" really means "newly
326 326 # exposed".
327 327 if not len(repo.file('.hgtags')):
328 328 # No tags have ever been committed, so we can avoid a
329 329 # potentially expensive search.
330 330 return ([], {}, valid, None, True)
331 331
332 332 starttime = time.time()
333 333
334 334 # Now we have to lookup the .hgtags filenode for every new head.
335 335 # This is the most expensive part of finding tags, so performance
336 336 # depends primarily on the size of newheads. Worst case: no cache
337 337 # file, so newheads == repoheads.
338 338 fnodescache = hgtagsfnodescache(repo.unfiltered())
339 339 cachefnode = {}
340 340 for head in reversed(repoheads):
341 341 fnode = fnodescache.getfnode(head)
342 342 if fnode != nullid:
343 343 cachefnode[head] = fnode
344 344
345 345 fnodescache.write()
346 346
347 347 duration = time.time() - starttime
348 348 ui.log('tagscache',
349 349 '%d/%d cache hits/lookups in %0.4f '
350 350 'seconds\n',
351 351 fnodescache.hitcount, fnodescache.lookupcount, duration)
352 352
353 353 # Caller has to iterate over all heads, but can use the filenodes in
354 354 # cachefnode to get to each .hgtags revision quickly.
355 355 return (repoheads, cachefnode, valid, None, True)
356 356
357 357 def _writetagcache(ui, repo, valid, cachetags):
358 358 filename = _filename(repo)
359 359 try:
360 360 cachefile = repo.vfs(filename, 'w', atomictemp=True)
361 361 except (OSError, IOError):
362 362 return
363 363
364 364 ui.log('tagscache', 'writing .hg/%s with %d tags\n',
365 365 filename, len(cachetags))
366 366
367 367 if valid[2]:
368 368 cachefile.write('%d %s %s\n' % (valid[0], hex(valid[1]), hex(valid[2])))
369 369 else:
370 370 cachefile.write('%d %s\n' % (valid[0], hex(valid[1])))
371 371
372 372 # Tag names in the cache are in UTF-8 -- which is the whole reason
373 373 # we keep them in UTF-8 throughout this module. If we converted
374 374 # them local encoding on input, we would lose info writing them to
375 375 # the cache.
376 376 for (name, (node, hist)) in sorted(cachetags.iteritems()):
377 377 for n in hist:
378 378 cachefile.write("%s %s\n" % (hex(n), name))
379 379 cachefile.write("%s %s\n" % (hex(node), name))
380 380
381 381 try:
382 382 cachefile.close()
383 383 except (OSError, IOError):
384 384 pass
385 385
386 386 _fnodescachefile = 'cache/hgtagsfnodes1'
387 387 _fnodesrecsize = 4 + 20 # changeset fragment + filenode
388 388 _fnodesmissingrec = '\xff' * 24
389 389
390 390 class hgtagsfnodescache(object):
391 391 """Persistent cache mapping revisions to .hgtags filenodes.
392 392
393 393 The cache is an array of records. Each item in the array corresponds to
394 394 a changelog revision. Values in the array contain the first 4 bytes of
395 395 the node hash and the 20 bytes .hgtags filenode for that revision.
396 396
397 397 The first 4 bytes are present as a form of verification. Repository
398 398 stripping and rewriting may change the node at a numeric revision in the
399 399 changelog. The changeset fragment serves as a verifier to detect
400 400 rewriting. This logic is shared with the rev branch cache (see
401 401 branchmap.py).
402 402
403 403 The instance holds in memory the full cache content but entries are
404 404 only parsed on read.
405 405
406 406 Instances behave like lists. ``c[i]`` works where i is a rev or
407 407 changeset node. Missing indexes are populated automatically on access.
408 408 """
409 409 def __init__(self, repo):
410 410 assert repo.filtername is None
411 411
412 412 self._repo = repo
413 413
414 414 # Only for reporting purposes.
415 415 self.lookupcount = 0
416 416 self.hitcount = 0
417 417
418 418 self._raw = array('c')
419 419
420 420 data = repo.vfs.tryread(_fnodescachefile)
421 421 self._raw.fromstring(data)
422 422
423 423 # The end state of self._raw is an array that is of the exact length
424 424 # required to hold a record for every revision in the repository.
425 425 # We truncate or extend the array as necessary. self._dirtyoffset is
426 426 # defined to be the start offset at which we need to write the output
427 427 # file. This offset is also adjusted when new entries are calculated
428 428 # for array members.
429 429 cllen = len(repo.changelog)
430 430 wantedlen = cllen * _fnodesrecsize
431 431 rawlen = len(self._raw)
432 432
433 433 self._dirtyoffset = None
434 434
435 435 if rawlen < wantedlen:
436 436 self._dirtyoffset = rawlen
437 437 self._raw.extend('\xff' * (wantedlen - rawlen))
438 438 elif rawlen > wantedlen:
439 439 # There's no easy way to truncate array instances. This seems
440 440 # slightly less evil than copying a potentially large array slice.
441 441 for i in range(rawlen - wantedlen):
442 442 self._raw.pop()
443 443 self._dirtyoffset = len(self._raw)
444 444
445 445 def getfnode(self, node, computemissing=True):
446 446 """Obtain the filenode of the .hgtags file at a specified revision.
447 447
448 448 If the value is in the cache, the entry will be validated and returned.
449 449 Otherwise, the filenode will be computed and returned unless
450 450 "computemissing" is False, in which case None will be returned without
451 451 any potentially expensive computation being performed.
452 452
453 453 If an .hgtags does not exist at the specified revision, nullid is
454 454 returned.
455 455 """
456 456 ctx = self._repo[node]
457 457 rev = ctx.rev()
458 458
459 459 self.lookupcount += 1
460 460
461 461 offset = rev * _fnodesrecsize
462 462 record = self._raw[offset:offset + _fnodesrecsize].tostring()
463 463 properprefix = node[0:4]
464 464
465 465 # Validate and return existing entry.
466 466 if record != _fnodesmissingrec:
467 467 fileprefix = record[0:4]
468 468
469 469 if fileprefix == properprefix:
470 470 self.hitcount += 1
471 471 return record[4:]
472 472
473 473 # Fall through.
474 474
475 475 # If we get here, the entry is either missing or invalid.
476 476
477 477 if not computemissing:
478 478 return None
479 479
480 480 # Populate missing entry.
481 481 try:
482 482 fnode = ctx.filenode('.hgtags')
483 483 except error.LookupError:
484 484 # No .hgtags file on this revision.
485 485 fnode = nullid
486 486
487 self._writeentry(offset, properprefix, fnode)
488 return fnode
489
490 def setfnode(self, node, fnode):
491 """Set the .hgtags filenode for a given changeset."""
492 assert len(fnode) == 20
493 ctx = self._repo[node]
494
495 # Do a lookup first to avoid writing if nothing has changed.
496 if self.getfnode(ctx.node(), computemissing=False) == fnode:
497 return
498
499 self._writeentry(ctx.rev() * _fnodesrecsize, node[0:4], fnode)
500
501 def _writeentry(self, offset, prefix, fnode):
487 502 # Slices on array instances only accept other array.
488 entry = array('c', properprefix + fnode)
503 entry = array('c', prefix + fnode)
489 504 self._raw[offset:offset + _fnodesrecsize] = entry
490 505 # self._dirtyoffset could be None.
491 506 self._dirtyoffset = min(self._dirtyoffset, offset) or 0
492 507
493 return fnode
494
495 508 def write(self):
496 509 """Perform all necessary writes to cache file.
497 510
498 511 This may no-op if no writes are needed or if a write lock could
499 512 not be obtained.
500 513 """
501 514 if self._dirtyoffset is None:
502 515 return
503 516
504 517 data = self._raw[self._dirtyoffset:]
505 518 if not data:
506 519 return
507 520
508 521 repo = self._repo
509 522
510 523 try:
511 524 lock = repo.wlock(wait=False)
512 525 except error.LockError:
513 526 repo.ui.log('tagscache',
514 527 'not writing .hg/%s because lock cannot be acquired\n' %
515 528 (_fnodescachefile))
516 529 return
517 530
518 531 try:
519 532 f = repo.vfs.open(_fnodescachefile, 'ab')
520 533 try:
521 534 # if the file has been truncated
522 535 actualoffset = f.tell()
523 536 if actualoffset < self._dirtyoffset:
524 537 self._dirtyoffset = actualoffset
525 538 data = self._raw[self._dirtyoffset:]
526 539 f.seek(self._dirtyoffset)
527 540 f.truncate()
528 541 repo.ui.log('tagscache',
529 542 'writing %d bytes to %s\n' % (
530 543 len(data), _fnodescachefile))
531 544 f.write(data)
532 545 self._dirtyoffset = None
533 546 finally:
534 547 f.close()
535 548 except (IOError, OSError), inst:
536 549 repo.ui.log('tagscache',
537 550 "couldn't write %s: %s\n" % (
538 551 _fnodescachefile, inst))
539 552 finally:
540 553 lock.release()
General Comments 0
You need to be logged in to leave comments. Login now