##// END OF EJS Templates
tags: use absolute_import
Gregory Szorc -
r25982:b2f3f185 default
parent child Browse files
Show More
@@ -1,553 +1,565
1 1 # tags.py - read tag info from local repository
2 2 #
3 3 # Copyright 2009 Matt Mackall <mpm@selenic.com>
4 4 # Copyright 2009 Greg Ward <greg@gerg.ca>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 # Currently this module only deals with reading and caching tags.
10 10 # Eventually, it could take care of updating (adding/removing/moving)
11 11 # tags too.
12 12
13 from node import nullid, bin, hex, short
14 from i18n import _
15 import util
16 import encoding
17 import error
18 from array import array
13 from __future__ import absolute_import
14
15 import array
19 16 import errno
20 17 import time
21 18
19 from .i18n import _
20 from .node import (
21 bin,
22 hex,
23 nullid,
24 short,
25 )
26 from . import (
27 encoding,
28 error,
29 util,
30 )
31
32 array = array.array
33
22 34 # Tags computation can be expensive and caches exist to make it fast in
23 35 # the common case.
24 36 #
25 37 # The "hgtagsfnodes1" cache file caches the .hgtags filenode values for
26 38 # each revision in the repository. The file is effectively an array of
27 39 # fixed length records. Read the docs for "hgtagsfnodescache" for technical
28 40 # details.
29 41 #
30 42 # The .hgtags filenode cache grows in proportion to the length of the
31 43 # changelog. The file is truncated when the # changelog is stripped.
32 44 #
33 45 # The purpose of the filenode cache is to avoid the most expensive part
34 46 # of finding global tags, which is looking up the .hgtags filenode in the
35 47 # manifest for each head. This can take dozens or over 100ms for
36 48 # repositories with very large manifests. Multiplied by dozens or even
37 49 # hundreds of heads and there is a significant performance concern.
38 50 #
39 51 # There also exist a separate cache file for each repository filter.
40 52 # These "tags-*" files store information about the history of tags.
41 53 #
42 54 # The tags cache files consists of a cache validation line followed by
43 55 # a history of tags.
44 56 #
45 57 # The cache validation line has the format:
46 58 #
47 59 # <tiprev> <tipnode> [<filteredhash>]
48 60 #
49 61 # <tiprev> is an integer revision and <tipnode> is a 40 character hex
50 62 # node for that changeset. These redundantly identify the repository
51 63 # tip from the time the cache was written. In addition, <filteredhash>,
52 64 # if present, is a 40 character hex hash of the contents of the filtered
53 65 # revisions for this filter. If the set of filtered revs changes, the
54 66 # hash will change and invalidate the cache.
55 67 #
56 68 # The history part of the tags cache consists of lines of the form:
57 69 #
58 70 # <node> <tag>
59 71 #
60 72 # (This format is identical to that of .hgtags files.)
61 73 #
62 74 # <tag> is the tag name and <node> is the 40 character hex changeset
63 75 # the tag is associated with.
64 76 #
65 77 # Tags are written sorted by tag name.
66 78 #
67 79 # Tags associated with multiple changesets have an entry for each changeset.
68 80 # The most recent changeset (in terms of revlog ordering for the head
69 81 # setting it) for each tag is last.
70 82
71 83 def findglobaltags(ui, repo, alltags, tagtypes):
72 84 '''Find global tags in a repo.
73 85
74 86 "alltags" maps tag name to (node, hist) 2-tuples.
75 87
76 88 "tagtypes" maps tag name to tag type. Global tags always have the
77 89 "global" tag type.
78 90
79 91 The "alltags" and "tagtypes" dicts are updated in place. Empty dicts
80 92 should be passed in.
81 93
82 94 The tags cache is read and updated as a side-effect of calling.
83 95 '''
84 96 # This is so we can be lazy and assume alltags contains only global
85 97 # tags when we pass it to _writetagcache().
86 98 assert len(alltags) == len(tagtypes) == 0, \
87 99 "findglobaltags() should be called first"
88 100
89 101 (heads, tagfnode, valid, cachetags, shouldwrite) = _readtagcache(ui, repo)
90 102 if cachetags is not None:
91 103 assert not shouldwrite
92 104 # XXX is this really 100% correct? are there oddball special
93 105 # cases where a global tag should outrank a local tag but won't,
94 106 # because cachetags does not contain rank info?
95 107 _updatetags(cachetags, 'global', alltags, tagtypes)
96 108 return
97 109
98 110 seen = set() # set of fnode
99 111 fctx = None
100 112 for head in reversed(heads): # oldest to newest
101 113 assert head in repo.changelog.nodemap, \
102 114 "tag cache returned bogus head %s" % short(head)
103 115
104 116 fnode = tagfnode.get(head)
105 117 if fnode and fnode not in seen:
106 118 seen.add(fnode)
107 119 if not fctx:
108 120 fctx = repo.filectx('.hgtags', fileid=fnode)
109 121 else:
110 122 fctx = fctx.filectx(fnode)
111 123
112 124 filetags = _readtags(ui, repo, fctx.data().splitlines(), fctx)
113 125 _updatetags(filetags, 'global', alltags, tagtypes)
114 126
115 127 # and update the cache (if necessary)
116 128 if shouldwrite:
117 129 _writetagcache(ui, repo, valid, alltags)
118 130
119 131 def readlocaltags(ui, repo, alltags, tagtypes):
120 132 '''Read local tags in repo. Update alltags and tagtypes.'''
121 133 try:
122 134 data = repo.vfs.read("localtags")
123 135 except IOError as inst:
124 136 if inst.errno != errno.ENOENT:
125 137 raise
126 138 return
127 139
128 140 # localtags is in the local encoding; re-encode to UTF-8 on
129 141 # input for consistency with the rest of this module.
130 142 filetags = _readtags(
131 143 ui, repo, data.splitlines(), "localtags",
132 144 recode=encoding.fromlocal)
133 145
134 146 # remove tags pointing to invalid nodes
135 147 cl = repo.changelog
136 148 for t in filetags.keys():
137 149 try:
138 150 cl.rev(filetags[t][0])
139 151 except (LookupError, ValueError):
140 152 del filetags[t]
141 153
142 154 _updatetags(filetags, "local", alltags, tagtypes)
143 155
144 156 def _readtaghist(ui, repo, lines, fn, recode=None, calcnodelines=False):
145 157 '''Read tag definitions from a file (or any source of lines).
146 158
147 159 This function returns two sortdicts with similar information:
148 160
149 161 - the first dict, bintaghist, contains the tag information as expected by
150 162 the _readtags function, i.e. a mapping from tag name to (node, hist):
151 163 - node is the node id from the last line read for that name,
152 164 - hist is the list of node ids previously associated with it (in file
153 165 order). All node ids are binary, not hex.
154 166
155 167 - the second dict, hextaglines, is a mapping from tag name to a list of
156 168 [hexnode, line number] pairs, ordered from the oldest to the newest node.
157 169
158 170 When calcnodelines is False the hextaglines dict is not calculated (an
159 171 empty dict is returned). This is done to improve this function's
160 172 performance in cases where the line numbers are not needed.
161 173 '''
162 174
163 175 bintaghist = util.sortdict()
164 176 hextaglines = util.sortdict()
165 177 count = 0
166 178
167 179 def warn(msg):
168 180 ui.warn(_("%s, line %s: %s\n") % (fn, count, msg))
169 181
170 182 for nline, line in enumerate(lines):
171 183 count += 1
172 184 if not line:
173 185 continue
174 186 try:
175 187 (nodehex, name) = line.split(" ", 1)
176 188 except ValueError:
177 189 warn(_("cannot parse entry"))
178 190 continue
179 191 name = name.strip()
180 192 if recode:
181 193 name = recode(name)
182 194 try:
183 195 nodebin = bin(nodehex)
184 196 except TypeError:
185 197 warn(_("node '%s' is not well formed") % nodehex)
186 198 continue
187 199
188 200 # update filetags
189 201 if calcnodelines:
190 202 # map tag name to a list of line numbers
191 203 if name not in hextaglines:
192 204 hextaglines[name] = []
193 205 hextaglines[name].append([nodehex, nline])
194 206 continue
195 207 # map tag name to (node, hist)
196 208 if name not in bintaghist:
197 209 bintaghist[name] = []
198 210 bintaghist[name].append(nodebin)
199 211 return bintaghist, hextaglines
200 212
201 213 def _readtags(ui, repo, lines, fn, recode=None, calcnodelines=False):
202 214 '''Read tag definitions from a file (or any source of lines).
203 215
204 216 Returns a mapping from tag name to (node, hist).
205 217
206 218 "node" is the node id from the last line read for that name. "hist"
207 219 is the list of node ids previously associated with it (in file order).
208 220 All node ids are binary, not hex.
209 221 '''
210 222 filetags, nodelines = _readtaghist(ui, repo, lines, fn, recode=recode,
211 223 calcnodelines=calcnodelines)
212 224 for tag, taghist in filetags.items():
213 225 filetags[tag] = (taghist[-1], taghist[:-1])
214 226 return filetags
215 227
216 228 def _updatetags(filetags, tagtype, alltags, tagtypes):
217 229 '''Incorporate the tag info read from one file into the two
218 230 dictionaries, alltags and tagtypes, that contain all tag
219 231 info (global across all heads plus local).'''
220 232
221 233 for name, nodehist in filetags.iteritems():
222 234 if name not in alltags:
223 235 alltags[name] = nodehist
224 236 tagtypes[name] = tagtype
225 237 continue
226 238
227 239 # we prefer alltags[name] if:
228 240 # it supersedes us OR
229 241 # mutual supersedes and it has a higher rank
230 242 # otherwise we win because we're tip-most
231 243 anode, ahist = nodehist
232 244 bnode, bhist = alltags[name]
233 245 if (bnode != anode and anode in bhist and
234 246 (bnode not in ahist or len(bhist) > len(ahist))):
235 247 anode = bnode
236 248 else:
237 249 tagtypes[name] = tagtype
238 250 ahist.extend([n for n in bhist if n not in ahist])
239 251 alltags[name] = anode, ahist
240 252
241 253 def _filename(repo):
242 254 """name of a tagcache file for a given repo or repoview"""
243 255 filename = 'cache/tags2'
244 256 if repo.filtername:
245 257 filename = '%s-%s' % (filename, repo.filtername)
246 258 return filename
247 259
248 260 def _readtagcache(ui, repo):
249 261 '''Read the tag cache.
250 262
251 263 Returns a tuple (heads, fnodes, validinfo, cachetags, shouldwrite).
252 264
253 265 If the cache is completely up-to-date, "cachetags" is a dict of the
254 266 form returned by _readtags() and "heads", "fnodes", and "validinfo" are
255 267 None and "shouldwrite" is False.
256 268
257 269 If the cache is not up to date, "cachetags" is None. "heads" is a list
258 270 of all heads currently in the repository, ordered from tip to oldest.
259 271 "validinfo" is a tuple describing cache validation info. This is used
260 272 when writing the tags cache. "fnodes" is a mapping from head to .hgtags
261 273 filenode. "shouldwrite" is True.
262 274
263 275 If the cache is not up to date, the caller is responsible for reading tag
264 276 info from each returned head. (See findglobaltags().)
265 277 '''
266 import scmutil # avoid cycle
278 from . import scmutil # avoid cycle
267 279
268 280 try:
269 281 cachefile = repo.vfs(_filename(repo), 'r')
270 282 # force reading the file for static-http
271 283 cachelines = iter(cachefile)
272 284 except IOError:
273 285 cachefile = None
274 286
275 287 cacherev = None
276 288 cachenode = None
277 289 cachehash = None
278 290 if cachefile:
279 291 try:
280 292 validline = cachelines.next()
281 293 validline = validline.split()
282 294 cacherev = int(validline[0])
283 295 cachenode = bin(validline[1])
284 296 if len(validline) > 2:
285 297 cachehash = bin(validline[2])
286 298 except Exception:
287 299 # corruption of the cache, just recompute it.
288 300 pass
289 301
290 302 tipnode = repo.changelog.tip()
291 303 tiprev = len(repo.changelog) - 1
292 304
293 305 # Case 1 (common): tip is the same, so nothing has changed.
294 306 # (Unchanged tip trivially means no changesets have been added.
295 307 # But, thanks to localrepository.destroyed(), it also means none
296 308 # have been destroyed by strip or rollback.)
297 309 if (cacherev == tiprev
298 310 and cachenode == tipnode
299 311 and cachehash == scmutil.filteredhash(repo, tiprev)):
300 312 tags = _readtags(ui, repo, cachelines, cachefile.name)
301 313 cachefile.close()
302 314 return (None, None, None, tags, False)
303 315 if cachefile:
304 316 cachefile.close() # ignore rest of file
305 317
306 318 valid = (tiprev, tipnode, scmutil.filteredhash(repo, tiprev))
307 319
308 320 repoheads = repo.heads()
309 321 # Case 2 (uncommon): empty repo; get out quickly and don't bother
310 322 # writing an empty cache.
311 323 if repoheads == [nullid]:
312 324 return ([], {}, valid, {}, False)
313 325
314 326 # Case 3 (uncommon): cache file missing or empty.
315 327
316 328 # Case 4 (uncommon): tip rev decreased. This should only happen
317 329 # when we're called from localrepository.destroyed(). Refresh the
318 330 # cache so future invocations will not see disappeared heads in the
319 331 # cache.
320 332
321 333 # Case 5 (common): tip has changed, so we've added/replaced heads.
322 334
323 335 # As it happens, the code to handle cases 3, 4, 5 is the same.
324 336
325 337 # N.B. in case 4 (nodes destroyed), "new head" really means "newly
326 338 # exposed".
327 339 if not len(repo.file('.hgtags')):
328 340 # No tags have ever been committed, so we can avoid a
329 341 # potentially expensive search.
330 342 return ([], {}, valid, None, True)
331 343
332 344 starttime = time.time()
333 345
334 346 # Now we have to lookup the .hgtags filenode for every new head.
335 347 # This is the most expensive part of finding tags, so performance
336 348 # depends primarily on the size of newheads. Worst case: no cache
337 349 # file, so newheads == repoheads.
338 350 fnodescache = hgtagsfnodescache(repo.unfiltered())
339 351 cachefnode = {}
340 352 for head in reversed(repoheads):
341 353 fnode = fnodescache.getfnode(head)
342 354 if fnode != nullid:
343 355 cachefnode[head] = fnode
344 356
345 357 fnodescache.write()
346 358
347 359 duration = time.time() - starttime
348 360 ui.log('tagscache',
349 361 '%d/%d cache hits/lookups in %0.4f '
350 362 'seconds\n',
351 363 fnodescache.hitcount, fnodescache.lookupcount, duration)
352 364
353 365 # Caller has to iterate over all heads, but can use the filenodes in
354 366 # cachefnode to get to each .hgtags revision quickly.
355 367 return (repoheads, cachefnode, valid, None, True)
356 368
357 369 def _writetagcache(ui, repo, valid, cachetags):
358 370 filename = _filename(repo)
359 371 try:
360 372 cachefile = repo.vfs(filename, 'w', atomictemp=True)
361 373 except (OSError, IOError):
362 374 return
363 375
364 376 ui.log('tagscache', 'writing .hg/%s with %d tags\n',
365 377 filename, len(cachetags))
366 378
367 379 if valid[2]:
368 380 cachefile.write('%d %s %s\n' % (valid[0], hex(valid[1]), hex(valid[2])))
369 381 else:
370 382 cachefile.write('%d %s\n' % (valid[0], hex(valid[1])))
371 383
372 384 # Tag names in the cache are in UTF-8 -- which is the whole reason
373 385 # we keep them in UTF-8 throughout this module. If we converted
374 386 # them local encoding on input, we would lose info writing them to
375 387 # the cache.
376 388 for (name, (node, hist)) in sorted(cachetags.iteritems()):
377 389 for n in hist:
378 390 cachefile.write("%s %s\n" % (hex(n), name))
379 391 cachefile.write("%s %s\n" % (hex(node), name))
380 392
381 393 try:
382 394 cachefile.close()
383 395 except (OSError, IOError):
384 396 pass
385 397
386 398 _fnodescachefile = 'cache/hgtagsfnodes1'
387 399 _fnodesrecsize = 4 + 20 # changeset fragment + filenode
388 400 _fnodesmissingrec = '\xff' * 24
389 401
390 402 class hgtagsfnodescache(object):
391 403 """Persistent cache mapping revisions to .hgtags filenodes.
392 404
393 405 The cache is an array of records. Each item in the array corresponds to
394 406 a changelog revision. Values in the array contain the first 4 bytes of
395 407 the node hash and the 20 bytes .hgtags filenode for that revision.
396 408
397 409 The first 4 bytes are present as a form of verification. Repository
398 410 stripping and rewriting may change the node at a numeric revision in the
399 411 changelog. The changeset fragment serves as a verifier to detect
400 412 rewriting. This logic is shared with the rev branch cache (see
401 413 branchmap.py).
402 414
403 415 The instance holds in memory the full cache content but entries are
404 416 only parsed on read.
405 417
406 418 Instances behave like lists. ``c[i]`` works where i is a rev or
407 419 changeset node. Missing indexes are populated automatically on access.
408 420 """
409 421 def __init__(self, repo):
410 422 assert repo.filtername is None
411 423
412 424 self._repo = repo
413 425
414 426 # Only for reporting purposes.
415 427 self.lookupcount = 0
416 428 self.hitcount = 0
417 429
418 430 self._raw = array('c')
419 431
420 432 data = repo.vfs.tryread(_fnodescachefile)
421 433 self._raw.fromstring(data)
422 434
423 435 # The end state of self._raw is an array that is of the exact length
424 436 # required to hold a record for every revision in the repository.
425 437 # We truncate or extend the array as necessary. self._dirtyoffset is
426 438 # defined to be the start offset at which we need to write the output
427 439 # file. This offset is also adjusted when new entries are calculated
428 440 # for array members.
429 441 cllen = len(repo.changelog)
430 442 wantedlen = cllen * _fnodesrecsize
431 443 rawlen = len(self._raw)
432 444
433 445 self._dirtyoffset = None
434 446
435 447 if rawlen < wantedlen:
436 448 self._dirtyoffset = rawlen
437 449 self._raw.extend('\xff' * (wantedlen - rawlen))
438 450 elif rawlen > wantedlen:
439 451 # There's no easy way to truncate array instances. This seems
440 452 # slightly less evil than copying a potentially large array slice.
441 453 for i in range(rawlen - wantedlen):
442 454 self._raw.pop()
443 455 self._dirtyoffset = len(self._raw)
444 456
445 457 def getfnode(self, node, computemissing=True):
446 458 """Obtain the filenode of the .hgtags file at a specified revision.
447 459
448 460 If the value is in the cache, the entry will be validated and returned.
449 461 Otherwise, the filenode will be computed and returned unless
450 462 "computemissing" is False, in which case None will be returned without
451 463 any potentially expensive computation being performed.
452 464
453 465 If an .hgtags does not exist at the specified revision, nullid is
454 466 returned.
455 467 """
456 468 ctx = self._repo[node]
457 469 rev = ctx.rev()
458 470
459 471 self.lookupcount += 1
460 472
461 473 offset = rev * _fnodesrecsize
462 474 record = self._raw[offset:offset + _fnodesrecsize].tostring()
463 475 properprefix = node[0:4]
464 476
465 477 # Validate and return existing entry.
466 478 if record != _fnodesmissingrec:
467 479 fileprefix = record[0:4]
468 480
469 481 if fileprefix == properprefix:
470 482 self.hitcount += 1
471 483 return record[4:]
472 484
473 485 # Fall through.
474 486
475 487 # If we get here, the entry is either missing or invalid.
476 488
477 489 if not computemissing:
478 490 return None
479 491
480 492 # Populate missing entry.
481 493 try:
482 494 fnode = ctx.filenode('.hgtags')
483 495 except error.LookupError:
484 496 # No .hgtags file on this revision.
485 497 fnode = nullid
486 498
487 499 self._writeentry(offset, properprefix, fnode)
488 500 return fnode
489 501
490 502 def setfnode(self, node, fnode):
491 503 """Set the .hgtags filenode for a given changeset."""
492 504 assert len(fnode) == 20
493 505 ctx = self._repo[node]
494 506
495 507 # Do a lookup first to avoid writing if nothing has changed.
496 508 if self.getfnode(ctx.node(), computemissing=False) == fnode:
497 509 return
498 510
499 511 self._writeentry(ctx.rev() * _fnodesrecsize, node[0:4], fnode)
500 512
501 513 def _writeentry(self, offset, prefix, fnode):
502 514 # Slices on array instances only accept other array.
503 515 entry = array('c', prefix + fnode)
504 516 self._raw[offset:offset + _fnodesrecsize] = entry
505 517 # self._dirtyoffset could be None.
506 518 self._dirtyoffset = min(self._dirtyoffset, offset) or 0
507 519
508 520 def write(self):
509 521 """Perform all necessary writes to cache file.
510 522
511 523 This may no-op if no writes are needed or if a write lock could
512 524 not be obtained.
513 525 """
514 526 if self._dirtyoffset is None:
515 527 return
516 528
517 529 data = self._raw[self._dirtyoffset:]
518 530 if not data:
519 531 return
520 532
521 533 repo = self._repo
522 534
523 535 try:
524 536 lock = repo.wlock(wait=False)
525 537 except error.LockError:
526 538 repo.ui.log('tagscache',
527 539 'not writing .hg/%s because lock cannot be acquired\n' %
528 540 (_fnodescachefile))
529 541 return
530 542
531 543 try:
532 544 f = repo.vfs.open(_fnodescachefile, 'ab')
533 545 try:
534 546 # if the file has been truncated
535 547 actualoffset = f.tell()
536 548 if actualoffset < self._dirtyoffset:
537 549 self._dirtyoffset = actualoffset
538 550 data = self._raw[self._dirtyoffset:]
539 551 f.seek(self._dirtyoffset)
540 552 f.truncate()
541 553 repo.ui.log('tagscache',
542 554 'writing %d bytes to %s\n' % (
543 555 len(data), _fnodescachefile))
544 556 f.write(data)
545 557 self._dirtyoffset = None
546 558 finally:
547 559 f.close()
548 560 except (IOError, OSError) as inst:
549 561 repo.ui.log('tagscache',
550 562 "couldn't write %s: %s\n" % (
551 563 _fnodescachefile, inst))
552 564 finally:
553 565 lock.release()
General Comments 0
You need to be logged in to leave comments. Login now