##// END OF EJS Templates
tags: avoid expensive access to repo.changelog in a loop...
marmoute -
r51835:75d3306f stable
parent child Browse files
Show More
@@ -1,933 +1,932 b''
1 1 # tags.py - read tag info from local repository
2 2 #
3 3 # Copyright 2009 Olivia Mackall <olivia@selenic.com>
4 4 # Copyright 2009 Greg Ward <greg@gerg.ca>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 # Currently this module only deals with reading and caching tags.
10 10 # Eventually, it could take care of updating (adding/removing/moving)
11 11 # tags too.
12 12
13 13
14 14 import binascii
15 15 import io
16 16
17 17 from .node import (
18 18 bin,
19 19 hex,
20 20 nullrev,
21 21 short,
22 22 )
23 23 from .i18n import _
24 24 from . import (
25 25 encoding,
26 26 error,
27 27 match as matchmod,
28 28 scmutil,
29 29 util,
30 30 )
31 31 from .utils import stringutil
32 32
33 33 # Tags computation can be expensive and caches exist to make it fast in
34 34 # the common case.
35 35 #
36 36 # The "hgtagsfnodes1" cache file caches the .hgtags filenode values for
37 37 # each revision in the repository. The file is effectively an array of
38 38 # fixed length records. Read the docs for "hgtagsfnodescache" for technical
39 39 # details.
40 40 #
41 41 # The .hgtags filenode cache grows in proportion to the length of the
42 42 # changelog. The file is truncated when the # changelog is stripped.
43 43 #
44 44 # The purpose of the filenode cache is to avoid the most expensive part
45 45 # of finding global tags, which is looking up the .hgtags filenode in the
46 46 # manifest for each head. This can take dozens or over 100ms for
47 47 # repositories with very large manifests. Multiplied by dozens or even
48 48 # hundreds of heads and there is a significant performance concern.
49 49 #
50 50 # There also exist a separate cache file for each repository filter.
51 51 # These "tags-*" files store information about the history of tags.
52 52 #
53 53 # The tags cache files consists of a cache validation line followed by
54 54 # a history of tags.
55 55 #
56 56 # The cache validation line has the format:
57 57 #
58 58 # <tiprev> <tipnode> [<filteredhash>]
59 59 #
60 60 # <tiprev> is an integer revision and <tipnode> is a 40 character hex
61 61 # node for that changeset. These redundantly identify the repository
62 62 # tip from the time the cache was written. In addition, <filteredhash>,
63 63 # if present, is a 40 character hex hash of the contents of the filtered
64 64 # revisions for this filter. If the set of filtered revs changes, the
65 65 # hash will change and invalidate the cache.
66 66 #
67 67 # The history part of the tags cache consists of lines of the form:
68 68 #
69 69 # <node> <tag>
70 70 #
71 71 # (This format is identical to that of .hgtags files.)
72 72 #
73 73 # <tag> is the tag name and <node> is the 40 character hex changeset
74 74 # the tag is associated with.
75 75 #
76 76 # Tags are written sorted by tag name.
77 77 #
78 78 # Tags associated with multiple changesets have an entry for each changeset.
79 79 # The most recent changeset (in terms of revlog ordering for the head
80 80 # setting it) for each tag is last.
81 81
82 82
83 83 def fnoderevs(ui, repo, revs):
84 84 """return the list of '.hgtags' fnodes used in a set revisions
85 85
86 86 This is returned as list of unique fnodes. We use a list instead of a set
87 87 because order matters when it comes to tags."""
88 88 unfi = repo.unfiltered()
89 89 tonode = unfi.changelog.node
90 90 nodes = [tonode(r) for r in revs]
91 91 fnodes = _getfnodes(ui, repo, nodes)
92 92 fnodes = _filterfnodes(fnodes, nodes)
93 93 return fnodes
94 94
95 95
96 96 def _nulltonone(repo, value):
97 97 """convert nullid to None
98 98
99 99 For tag value, nullid means "deleted". This small utility function helps
100 100 translating that to None."""
101 101 if value == repo.nullid:
102 102 return None
103 103 return value
104 104
105 105
106 106 def difftags(ui, repo, oldfnodes, newfnodes):
107 107 """list differences between tags expressed in two set of file-nodes
108 108
109 109 The list contains entries in the form: (tagname, oldvalue, new value).
110 110 None is used to expressed missing value:
111 111 ('foo', None, 'abcd') is a new tag,
112 112 ('bar', 'ef01', None) is a deletion,
113 113 ('baz', 'abcd', 'ef01') is a tag movement.
114 114 """
115 115 if oldfnodes == newfnodes:
116 116 return []
117 117 oldtags = _tagsfromfnodes(ui, repo, oldfnodes)
118 118 newtags = _tagsfromfnodes(ui, repo, newfnodes)
119 119
120 120 # list of (tag, old, new): None means missing
121 121 entries = []
122 122 for tag, (new, __) in newtags.items():
123 123 new = _nulltonone(repo, new)
124 124 old, __ = oldtags.pop(tag, (None, None))
125 125 old = _nulltonone(repo, old)
126 126 if old != new:
127 127 entries.append((tag, old, new))
128 128 # handle deleted tags
129 129 for tag, (old, __) in oldtags.items():
130 130 old = _nulltonone(repo, old)
131 131 if old is not None:
132 132 entries.append((tag, old, None))
133 133 entries.sort()
134 134 return entries
135 135
136 136
137 137 def writediff(fp, difflist):
138 138 """write tags diff information to a file.
139 139
140 140 Data are stored with a line based format:
141 141
142 142 <action> <hex-node> <tag-name>\n
143 143
144 144 Action are defined as follow:
145 145 -R tag is removed,
146 146 +A tag is added,
147 147 -M tag is moved (old value),
148 148 +M tag is moved (new value),
149 149
150 150 Example:
151 151
152 152 +A 875517b4806a848f942811a315a5bce30804ae85 t5
153 153
154 154 See documentation of difftags output for details about the input.
155 155 """
156 156 add = b'+A %s %s\n'
157 157 remove = b'-R %s %s\n'
158 158 updateold = b'-M %s %s\n'
159 159 updatenew = b'+M %s %s\n'
160 160 for tag, old, new in difflist:
161 161 # translate to hex
162 162 if old is not None:
163 163 old = hex(old)
164 164 if new is not None:
165 165 new = hex(new)
166 166 # write to file
167 167 if old is None:
168 168 fp.write(add % (new, tag))
169 169 elif new is None:
170 170 fp.write(remove % (old, tag))
171 171 else:
172 172 fp.write(updateold % (old, tag))
173 173 fp.write(updatenew % (new, tag))
174 174
175 175
176 176 def findglobaltags(ui, repo):
177 177 """Find global tags in a repo: return a tagsmap
178 178
179 179 tagsmap: tag name to (node, hist) 2-tuples.
180 180
181 181 The tags cache is read and updated as a side-effect of calling.
182 182 """
183 183 (heads, tagfnode, valid, cachetags, shouldwrite) = _readtagcache(ui, repo)
184 184 if cachetags is not None:
185 185 assert not shouldwrite
186 186 # XXX is this really 100% correct? are there oddball special
187 187 # cases where a global tag should outrank a local tag but won't,
188 188 # because cachetags does not contain rank info?
189 189 alltags = {}
190 190 _updatetags(cachetags, alltags)
191 191 return alltags
192 192
193 has_node = repo.changelog.index.has_node
193 194 for head in reversed(heads): # oldest to newest
194 assert repo.changelog.index.has_node(
195 head
196 ), b"tag cache returned bogus head %s" % short(head)
195 assert has_node(head), b"tag cache returned bogus head %s" % short(head)
197 196 fnodes = _filterfnodes(tagfnode, reversed(heads))
198 197 alltags = _tagsfromfnodes(ui, repo, fnodes)
199 198
200 199 # and update the cache (if necessary)
201 200 if shouldwrite:
202 201 _writetagcache(ui, repo, valid, alltags)
203 202 return alltags
204 203
205 204
206 205 def _filterfnodes(tagfnode, nodes):
207 206 """return a list of unique fnodes
208 207
209 208 The order of this list matches the order of "nodes". Preserving this order
210 209 is important as reading tags in different order provides different
211 210 results."""
212 211 seen = set() # set of fnode
213 212 fnodes = []
214 213 for no in nodes: # oldest to newest
215 214 fnode = tagfnode.get(no)
216 215 if fnode and fnode not in seen:
217 216 seen.add(fnode)
218 217 fnodes.append(fnode)
219 218 return fnodes
220 219
221 220
222 221 def _tagsfromfnodes(ui, repo, fnodes):
223 222 """return a tagsmap from a list of file-node
224 223
225 224 tagsmap: tag name to (node, hist) 2-tuples.
226 225
227 226 The order of the list matters."""
228 227 alltags = {}
229 228 fctx = None
230 229 for fnode in fnodes:
231 230 if fctx is None:
232 231 fctx = repo.filectx(b'.hgtags', fileid=fnode)
233 232 else:
234 233 fctx = fctx.filectx(fnode)
235 234 filetags = _readtags(ui, repo, fctx.data().splitlines(), fctx)
236 235 _updatetags(filetags, alltags)
237 236 return alltags
238 237
239 238
240 239 def readlocaltags(ui, repo, alltags, tagtypes):
241 240 '''Read local tags in repo. Update alltags and tagtypes.'''
242 241 try:
243 242 data = repo.vfs.read(b"localtags")
244 243 except FileNotFoundError:
245 244 return
246 245
247 246 # localtags is in the local encoding; re-encode to UTF-8 on
248 247 # input for consistency with the rest of this module.
249 248 filetags = _readtags(
250 249 ui, repo, data.splitlines(), b"localtags", recode=encoding.fromlocal
251 250 )
252 251
253 252 # remove tags pointing to invalid nodes
254 253 cl = repo.changelog
255 254 for t in list(filetags):
256 255 try:
257 256 cl.rev(filetags[t][0])
258 257 except (LookupError, ValueError):
259 258 del filetags[t]
260 259
261 260 _updatetags(filetags, alltags, b'local', tagtypes)
262 261
263 262
264 263 def _readtaghist(ui, repo, lines, fn, recode=None, calcnodelines=False):
265 264 """Read tag definitions from a file (or any source of lines).
266 265
267 266 This function returns two sortdicts with similar information:
268 267
269 268 - the first dict, bintaghist, contains the tag information as expected by
270 269 the _readtags function, i.e. a mapping from tag name to (node, hist):
271 270 - node is the node id from the last line read for that name,
272 271 - hist is the list of node ids previously associated with it (in file
273 272 order). All node ids are binary, not hex.
274 273
275 274 - the second dict, hextaglines, is a mapping from tag name to a list of
276 275 [hexnode, line number] pairs, ordered from the oldest to the newest node.
277 276
278 277 When calcnodelines is False the hextaglines dict is not calculated (an
279 278 empty dict is returned). This is done to improve this function's
280 279 performance in cases where the line numbers are not needed.
281 280 """
282 281
283 282 bintaghist = util.sortdict()
284 283 hextaglines = util.sortdict()
285 284 count = 0
286 285
287 286 def dbg(msg):
288 287 ui.debug(b"%s, line %d: %s\n" % (fn, count, msg))
289 288
290 289 for nline, line in enumerate(lines):
291 290 count += 1
292 291 if not line:
293 292 continue
294 293 try:
295 294 (nodehex, name) = line.split(b" ", 1)
296 295 except ValueError:
297 296 dbg(b"cannot parse entry")
298 297 continue
299 298 name = name.strip()
300 299 if recode:
301 300 name = recode(name)
302 301 try:
303 302 nodebin = bin(nodehex)
304 303 except binascii.Error:
305 304 dbg(b"node '%s' is not well formed" % nodehex)
306 305 continue
307 306
308 307 # update filetags
309 308 if calcnodelines:
310 309 # map tag name to a list of line numbers
311 310 if name not in hextaglines:
312 311 hextaglines[name] = []
313 312 hextaglines[name].append([nodehex, nline])
314 313 continue
315 314 # map tag name to (node, hist)
316 315 if name not in bintaghist:
317 316 bintaghist[name] = []
318 317 bintaghist[name].append(nodebin)
319 318 return bintaghist, hextaglines
320 319
321 320
322 321 def _readtags(ui, repo, lines, fn, recode=None, calcnodelines=False):
323 322 """Read tag definitions from a file (or any source of lines).
324 323
325 324 Returns a mapping from tag name to (node, hist).
326 325
327 326 "node" is the node id from the last line read for that name. "hist"
328 327 is the list of node ids previously associated with it (in file order).
329 328 All node ids are binary, not hex.
330 329 """
331 330 filetags, nodelines = _readtaghist(
332 331 ui, repo, lines, fn, recode=recode, calcnodelines=calcnodelines
333 332 )
334 333 # util.sortdict().__setitem__ is much slower at replacing then inserting
335 334 # new entries. The difference can matter if there are thousands of tags.
336 335 # Create a new sortdict to avoid the performance penalty.
337 336 newtags = util.sortdict()
338 337 for tag, taghist in filetags.items():
339 338 newtags[tag] = (taghist[-1], taghist[:-1])
340 339 return newtags
341 340
342 341
343 342 def _updatetags(filetags, alltags, tagtype=None, tagtypes=None):
344 343 """Incorporate the tag info read from one file into dictionnaries
345 344
346 345 The first one, 'alltags', is a "tagmaps" (see 'findglobaltags' for details).
347 346
348 347 The second one, 'tagtypes', is optional and will be updated to track the
349 348 "tagtype" of entries in the tagmaps. When set, the 'tagtype' argument also
350 349 needs to be set."""
351 350 if tagtype is None:
352 351 assert tagtypes is None
353 352
354 353 for name, nodehist in filetags.items():
355 354 if name not in alltags:
356 355 alltags[name] = nodehist
357 356 if tagtype is not None:
358 357 tagtypes[name] = tagtype
359 358 continue
360 359
361 360 # we prefer alltags[name] if:
362 361 # it supersedes us OR
363 362 # mutual supersedes and it has a higher rank
364 363 # otherwise we win because we're tip-most
365 364 anode, ahist = nodehist
366 365 bnode, bhist = alltags[name]
367 366 if (
368 367 bnode != anode
369 368 and anode in bhist
370 369 and (bnode not in ahist or len(bhist) > len(ahist))
371 370 ):
372 371 anode = bnode
373 372 elif tagtype is not None:
374 373 tagtypes[name] = tagtype
375 374 ahist.extend([n for n in bhist if n not in ahist])
376 375 alltags[name] = anode, ahist
377 376
378 377
379 378 def _filename(repo):
380 379 """name of a tagcache file for a given repo or repoview"""
381 380 filename = b'tags2'
382 381 if repo.filtername:
383 382 filename = b'%s-%s' % (filename, repo.filtername)
384 383 return filename
385 384
386 385
387 386 def _readtagcache(ui, repo):
388 387 """Read the tag cache.
389 388
390 389 Returns a tuple (heads, fnodes, validinfo, cachetags, shouldwrite).
391 390
392 391 If the cache is completely up-to-date, "cachetags" is a dict of the
393 392 form returned by _readtags() and "heads", "fnodes", and "validinfo" are
394 393 None and "shouldwrite" is False.
395 394
396 395 If the cache is not up to date, "cachetags" is None. "heads" is a list
397 396 of all heads currently in the repository, ordered from tip to oldest.
398 397 "validinfo" is a tuple describing cache validation info. This is used
399 398 when writing the tags cache. "fnodes" is a mapping from head to .hgtags
400 399 filenode. "shouldwrite" is True.
401 400
402 401 If the cache is not up to date, the caller is responsible for reading tag
403 402 info from each returned head. (See findglobaltags().)
404 403 """
405 404 try:
406 405 cachefile = repo.cachevfs(_filename(repo), b'r')
407 406 # force reading the file for static-http
408 407 cachelines = iter(cachefile)
409 408 except IOError:
410 409 cachefile = None
411 410
412 411 cacherev = None
413 412 cachenode = None
414 413 cachehash = None
415 414 if cachefile:
416 415 try:
417 416 validline = next(cachelines)
418 417 validline = validline.split()
419 418 cacherev = int(validline[0])
420 419 cachenode = bin(validline[1])
421 420 if len(validline) > 2:
422 421 cachehash = bin(validline[2])
423 422 except Exception:
424 423 # corruption of the cache, just recompute it.
425 424 pass
426 425
427 426 tipnode = repo.changelog.tip()
428 427 tiprev = len(repo.changelog) - 1
429 428
430 429 # Case 1 (common): tip is the same, so nothing has changed.
431 430 # (Unchanged tip trivially means no changesets have been added.
432 431 # But, thanks to localrepository.destroyed(), it also means none
433 432 # have been destroyed by strip or rollback.)
434 433 if (
435 434 cacherev == tiprev
436 435 and cachenode == tipnode
437 436 and cachehash == scmutil.filteredhash(repo, tiprev)
438 437 ):
439 438 tags = _readtags(ui, repo, cachelines, cachefile.name)
440 439 cachefile.close()
441 440 return (None, None, None, tags, False)
442 441 if cachefile:
443 442 cachefile.close() # ignore rest of file
444 443
445 444 valid = (tiprev, tipnode, scmutil.filteredhash(repo, tiprev))
446 445
447 446 repoheads = repo.heads()
448 447 # Case 2 (uncommon): empty repo; get out quickly and don't bother
449 448 # writing an empty cache.
450 449 if repoheads == [repo.nullid]:
451 450 return ([], {}, valid, {}, False)
452 451
453 452 # Case 3 (uncommon): cache file missing or empty.
454 453
455 454 # Case 4 (uncommon): tip rev decreased. This should only happen
456 455 # when we're called from localrepository.destroyed(). Refresh the
457 456 # cache so future invocations will not see disappeared heads in the
458 457 # cache.
459 458
460 459 # Case 5 (common): tip has changed, so we've added/replaced heads.
461 460
462 461 # As it happens, the code to handle cases 3, 4, 5 is the same.
463 462
464 463 # N.B. in case 4 (nodes destroyed), "new head" really means "newly
465 464 # exposed".
466 465 if not len(repo.file(b'.hgtags')):
467 466 # No tags have ever been committed, so we can avoid a
468 467 # potentially expensive search.
469 468 return ([], {}, valid, None, True)
470 469
471 470 # Now we have to lookup the .hgtags filenode for every new head.
472 471 # This is the most expensive part of finding tags, so performance
473 472 # depends primarily on the size of newheads. Worst case: no cache
474 473 # file, so newheads == repoheads.
475 474 # Reversed order helps the cache ('repoheads' is in descending order)
476 475 cachefnode = _getfnodes(ui, repo, reversed(repoheads))
477 476
478 477 # Caller has to iterate over all heads, but can use the filenodes in
479 478 # cachefnode to get to each .hgtags revision quickly.
480 479 return (repoheads, cachefnode, valid, None, True)
481 480
482 481
483 482 def _getfnodes(ui, repo, nodes):
484 483 """return .hgtags fnodes for a list of changeset nodes
485 484
486 485 Return value is a {node: fnode} mapping. There will be no entry for nodes
487 486 without a '.hgtags' file.
488 487 """
489 488 starttime = util.timer()
490 489 fnodescache = hgtagsfnodescache(repo.unfiltered())
491 490 cachefnode = {}
492 491 validated_fnodes = set()
493 492 unknown_entries = set()
494 493
495 494 flog = None
496 495 for node in nodes:
497 496 fnode = fnodescache.getfnode(node)
498 497 if fnode != repo.nullid:
499 498 if fnode not in validated_fnodes:
500 499 if flog is None:
501 500 flog = repo.file(b'.hgtags')
502 501 if flog.hasnode(fnode):
503 502 validated_fnodes.add(fnode)
504 503 else:
505 504 unknown_entries.add(node)
506 505 cachefnode[node] = fnode
507 506
508 507 if unknown_entries:
509 508 fixed_nodemap = fnodescache.refresh_invalid_nodes(unknown_entries)
510 509 for node, fnode in fixed_nodemap.items():
511 510 if fnode != repo.nullid:
512 511 cachefnode[node] = fnode
513 512
514 513 fnodescache.write()
515 514
516 515 duration = util.timer() - starttime
517 516 ui.log(
518 517 b'tagscache',
519 518 b'%d/%d cache hits/lookups in %0.4f seconds\n',
520 519 fnodescache.hitcount,
521 520 fnodescache.lookupcount,
522 521 duration,
523 522 )
524 523 return cachefnode
525 524
526 525
527 526 def _writetagcache(ui, repo, valid, cachetags):
528 527 filename = _filename(repo)
529 528 try:
530 529 cachefile = repo.cachevfs(filename, b'w', atomictemp=True)
531 530 except (OSError, IOError):
532 531 return
533 532
534 533 ui.log(
535 534 b'tagscache',
536 535 b'writing .hg/cache/%s with %d tags\n',
537 536 filename,
538 537 len(cachetags),
539 538 )
540 539
541 540 if valid[2]:
542 541 cachefile.write(
543 542 b'%d %s %s\n' % (valid[0], hex(valid[1]), hex(valid[2]))
544 543 )
545 544 else:
546 545 cachefile.write(b'%d %s\n' % (valid[0], hex(valid[1])))
547 546
548 547 # Tag names in the cache are in UTF-8 -- which is the whole reason
549 548 # we keep them in UTF-8 throughout this module. If we converted
550 549 # them local encoding on input, we would lose info writing them to
551 550 # the cache.
552 551 for (name, (node, hist)) in sorted(cachetags.items()):
553 552 for n in hist:
554 553 cachefile.write(b"%s %s\n" % (hex(n), name))
555 554 cachefile.write(b"%s %s\n" % (hex(node), name))
556 555
557 556 try:
558 557 cachefile.close()
559 558 except (OSError, IOError):
560 559 pass
561 560
562 561
563 562 def tag(repo, names, node, message, local, user, date, editor=False):
564 563 """tag a revision with one or more symbolic names.
565 564
566 565 names is a list of strings or, when adding a single tag, names may be a
567 566 string.
568 567
569 568 if local is True, the tags are stored in a per-repository file.
570 569 otherwise, they are stored in the .hgtags file, and a new
571 570 changeset is committed with the change.
572 571
573 572 keyword arguments:
574 573
575 574 local: whether to store tags in non-version-controlled file
576 575 (default False)
577 576
578 577 message: commit message to use if committing
579 578
580 579 user: name of user to use if committing
581 580
582 581 date: date tuple to use if committing"""
583 582
584 583 if not local:
585 584 m = matchmod.exact([b'.hgtags'])
586 585 st = repo.status(match=m, unknown=True, ignored=True)
587 586 if any(
588 587 (
589 588 st.modified,
590 589 st.added,
591 590 st.removed,
592 591 st.deleted,
593 592 st.unknown,
594 593 st.ignored,
595 594 )
596 595 ):
597 596 raise error.Abort(
598 597 _(b'working copy of .hgtags is changed'),
599 598 hint=_(b'please commit .hgtags manually'),
600 599 )
601 600
602 601 with repo.wlock():
603 602 repo.tags() # instantiate the cache
604 603 _tag(repo, names, node, message, local, user, date, editor=editor)
605 604
606 605
607 606 def _tag(
608 607 repo, names, node, message, local, user, date, extra=None, editor=False
609 608 ):
610 609 if isinstance(names, bytes):
611 610 names = (names,)
612 611
613 612 branches = repo.branchmap()
614 613 for name in names:
615 614 repo.hook(b'pretag', throw=True, node=hex(node), tag=name, local=local)
616 615 if name in branches:
617 616 repo.ui.warn(
618 617 _(b"warning: tag %s conflicts with existing branch name\n")
619 618 % name
620 619 )
621 620
622 621 def writetags(fp, names, munge, prevtags):
623 622 fp.seek(0, io.SEEK_END)
624 623 if prevtags and not prevtags.endswith(b'\n'):
625 624 fp.write(b'\n')
626 625 for name in names:
627 626 if munge:
628 627 m = munge(name)
629 628 else:
630 629 m = name
631 630
632 631 if repo._tagscache.tagtypes and name in repo._tagscache.tagtypes:
633 632 old = repo.tags().get(name, repo.nullid)
634 633 fp.write(b'%s %s\n' % (hex(old), m))
635 634 fp.write(b'%s %s\n' % (hex(node), m))
636 635 fp.close()
637 636
638 637 prevtags = b''
639 638 if local:
640 639 try:
641 640 fp = repo.vfs(b'localtags', b'r+')
642 641 except IOError:
643 642 fp = repo.vfs(b'localtags', b'a')
644 643 else:
645 644 prevtags = fp.read()
646 645
647 646 # local tags are stored in the current charset
648 647 writetags(fp, names, None, prevtags)
649 648 for name in names:
650 649 repo.hook(b'tag', node=hex(node), tag=name, local=local)
651 650 return
652 651
653 652 try:
654 653 fp = repo.wvfs(b'.hgtags', b'rb+')
655 654 except FileNotFoundError:
656 655 fp = repo.wvfs(b'.hgtags', b'ab')
657 656 else:
658 657 prevtags = fp.read()
659 658
660 659 # committed tags are stored in UTF-8
661 660 writetags(fp, names, encoding.fromlocal, prevtags)
662 661
663 662 fp.close()
664 663
665 664 repo.invalidatecaches()
666 665
667 666 with repo.dirstate.changing_files(repo):
668 667 if b'.hgtags' not in repo.dirstate:
669 668 repo[None].add([b'.hgtags'])
670 669
671 670 m = matchmod.exact([b'.hgtags'])
672 671 tagnode = repo.commit(
673 672 message, user, date, extra=extra, match=m, editor=editor
674 673 )
675 674
676 675 for name in names:
677 676 repo.hook(b'tag', node=hex(node), tag=name, local=local)
678 677
679 678 return tagnode
680 679
681 680
682 681 _fnodescachefile = b'hgtagsfnodes1'
683 682 _fnodesrecsize = 4 + 20 # changeset fragment + filenode
684 683 _fnodesmissingrec = b'\xff' * 24
685 684
686 685
687 686 class hgtagsfnodescache:
688 687 """Persistent cache mapping revisions to .hgtags filenodes.
689 688
690 689 The cache is an array of records. Each item in the array corresponds to
691 690 a changelog revision. Values in the array contain the first 4 bytes of
692 691 the node hash and the 20 bytes .hgtags filenode for that revision.
693 692
694 693 The first 4 bytes are present as a form of verification. Repository
695 694 stripping and rewriting may change the node at a numeric revision in the
696 695 changelog. The changeset fragment serves as a verifier to detect
697 696 rewriting. This logic is shared with the rev branch cache (see
698 697 branchmap.py).
699 698
700 699 The instance holds in memory the full cache content but entries are
701 700 only parsed on read.
702 701
703 702 Instances behave like lists. ``c[i]`` works where i is a rev or
704 703 changeset node. Missing indexes are populated automatically on access.
705 704 """
706 705
707 706 def __init__(self, repo):
708 707 assert repo.filtername is None
709 708
710 709 self._repo = repo
711 710
712 711 # Only for reporting purposes.
713 712 self.lookupcount = 0
714 713 self.hitcount = 0
715 714
716 715 try:
717 716 data = repo.cachevfs.read(_fnodescachefile)
718 717 except (OSError, IOError):
719 718 data = b""
720 719 self._raw = bytearray(data)
721 720
722 721 # The end state of self._raw is an array that is of the exact length
723 722 # required to hold a record for every revision in the repository.
724 723 # We truncate or extend the array as necessary. self._dirtyoffset is
725 724 # defined to be the start offset at which we need to write the output
726 725 # file. This offset is also adjusted when new entries are calculated
727 726 # for array members.
728 727 cllen = len(repo.changelog)
729 728 wantedlen = cllen * _fnodesrecsize
730 729 rawlen = len(self._raw)
731 730
732 731 self._dirtyoffset = None
733 732
734 733 rawlentokeep = min(
735 734 wantedlen, (rawlen // _fnodesrecsize) * _fnodesrecsize
736 735 )
737 736 if rawlen > rawlentokeep:
738 737 # There's no easy way to truncate array instances. This seems
739 738 # slightly less evil than copying a potentially large array slice.
740 739 for i in range(rawlen - rawlentokeep):
741 740 self._raw.pop()
742 741 rawlen = len(self._raw)
743 742 self._dirtyoffset = rawlen
744 743 if rawlen < wantedlen:
745 744 if self._dirtyoffset is None:
746 745 self._dirtyoffset = rawlen
747 746 # TODO: zero fill entire record, because it's invalid not missing?
748 747 self._raw.extend(b'\xff' * (wantedlen - rawlen))
749 748
750 749 def getfnode(self, node, computemissing=True):
751 750 """Obtain the filenode of the .hgtags file at a specified revision.
752 751
753 752 If the value is in the cache, the entry will be validated and returned.
754 753 Otherwise, the filenode will be computed and returned unless
755 754 "computemissing" is False. In that case, None will be returned if
756 755 the entry is missing or False if the entry is invalid without
757 756 any potentially expensive computation being performed.
758 757
759 758 If an .hgtags does not exist at the specified revision, nullid is
760 759 returned.
761 760 """
762 761 if node == self._repo.nullid:
763 762 return node
764 763
765 764 rev = self._repo.changelog.rev(node)
766 765
767 766 self.lookupcount += 1
768 767
769 768 offset = rev * _fnodesrecsize
770 769 record = b'%s' % self._raw[offset : offset + _fnodesrecsize]
771 770 properprefix = node[0:4]
772 771
773 772 # Validate and return existing entry.
774 773 if record != _fnodesmissingrec and len(record) == _fnodesrecsize:
775 774 fileprefix = record[0:4]
776 775
777 776 if fileprefix == properprefix:
778 777 self.hitcount += 1
779 778 return record[4:]
780 779
781 780 # Fall through.
782 781
783 782 # If we get here, the entry is either missing or invalid.
784 783
785 784 if not computemissing:
786 785 if record != _fnodesmissingrec:
787 786 return False
788 787 return None
789 788
790 789 fnode = self._computefnode(node)
791 790 self._writeentry(offset, properprefix, fnode)
792 791 return fnode
793 792
794 793 def _computefnode(self, node):
795 794 """Finds the tag filenode for a node which is missing or invalid
796 795 in cache"""
797 796 ctx = self._repo[node]
798 797 rev = ctx.rev()
799 798 fnode = None
800 799 cl = self._repo.changelog
801 800 p1rev, p2rev = cl._uncheckedparentrevs(rev)
802 801 p1node = cl.node(p1rev)
803 802 p1fnode = self.getfnode(p1node, computemissing=False)
804 803 if p2rev != nullrev:
805 804 # There is some no-merge changeset where p1 is null and p2 is set
806 805 # Processing them as merge is just slower, but still gives a good
807 806 # result.
808 807 p2node = cl.node(p2rev)
809 808 p2fnode = self.getfnode(p2node, computemissing=False)
810 809 if p1fnode != p2fnode:
811 810 # we cannot rely on readfast because we don't know against what
812 811 # parent the readfast delta is computed
813 812 p1fnode = None
814 813 if p1fnode:
815 814 mctx = ctx.manifestctx()
816 815 fnode = mctx.readfast().get(b'.hgtags')
817 816 if fnode is None:
818 817 fnode = p1fnode
819 818 if fnode is None:
820 819 # Populate missing entry.
821 820 try:
822 821 fnode = ctx.filenode(b'.hgtags')
823 822 except error.LookupError:
824 823 # No .hgtags file on this revision.
825 824 fnode = self._repo.nullid
826 825 return fnode
827 826
828 827 def setfnode(self, node, fnode):
829 828 """Set the .hgtags filenode for a given changeset."""
830 829 assert len(fnode) == 20
831 830 ctx = self._repo[node]
832 831
833 832 # Do a lookup first to avoid writing if nothing has changed.
834 833 if self.getfnode(ctx.node(), computemissing=False) == fnode:
835 834 return
836 835
837 836 self._writeentry(ctx.rev() * _fnodesrecsize, node[0:4], fnode)
838 837
839 838 def refresh_invalid_nodes(self, nodes):
840 839 """recomputes file nodes for a given set of nodes which has unknown
841 840 filenodes for them in the cache
842 841 Also updates the in-memory cache with the correct filenode.
843 842 Caller needs to take care about calling `.write()` so that updates are
844 843 persisted.
845 844 Returns a map {node: recomputed fnode}
846 845 """
847 846 fixed_nodemap = {}
848 847 for node in nodes:
849 848 fnode = self._computefnode(node)
850 849 fixed_nodemap[node] = fnode
851 850 self.setfnode(node, fnode)
852 851 return fixed_nodemap
853 852
854 853 def _writeentry(self, offset, prefix, fnode):
855 854 # Slices on array instances only accept other array.
856 855 entry = bytearray(prefix + fnode)
857 856 self._raw[offset : offset + _fnodesrecsize] = entry
858 857 # self._dirtyoffset could be None.
859 858 self._dirtyoffset = min(self._dirtyoffset or 0, offset or 0)
860 859
861 860 def write(self):
862 861 """Perform all necessary writes to cache file.
863 862
864 863 This may no-op if no writes are needed or if a write lock could
865 864 not be obtained.
866 865 """
867 866 if self._dirtyoffset is None:
868 867 return
869 868
870 869 data = self._raw[self._dirtyoffset :]
871 870 if not data:
872 871 return
873 872
874 873 repo = self._repo
875 874
876 875 try:
877 876 lock = repo.lock(wait=False)
878 877 except error.LockError:
879 878 repo.ui.log(
880 879 b'tagscache',
881 880 b'not writing .hg/cache/%s because '
882 881 b'lock cannot be acquired\n' % _fnodescachefile,
883 882 )
884 883 return
885 884
886 885 try:
887 886 f = repo.cachevfs.open(_fnodescachefile, b'ab')
888 887 try:
889 888 # if the file has been truncated
890 889 actualoffset = f.tell()
891 890 if actualoffset < self._dirtyoffset:
892 891 self._dirtyoffset = actualoffset
893 892 data = self._raw[self._dirtyoffset :]
894 893 f.seek(self._dirtyoffset)
895 894 f.truncate()
896 895 repo.ui.log(
897 896 b'tagscache',
898 897 b'writing %d bytes to cache/%s\n'
899 898 % (len(data), _fnodescachefile),
900 899 )
901 900 f.write(data)
902 901 self._dirtyoffset = None
903 902 finally:
904 903 f.close()
905 904 except (IOError, OSError) as inst:
906 905 repo.ui.log(
907 906 b'tagscache',
908 907 b"couldn't write cache/%s: %s\n"
909 908 % (_fnodescachefile, stringutil.forcebytestr(inst)),
910 909 )
911 910 finally:
912 911 lock.release()
913 912
914 913
915 914 def clear_cache_on_disk(repo):
916 915 """function used by the perf extension to "tags" cache"""
917 916 repo.cachevfs.tryunlink(_filename(repo))
918 917
919 918
920 919 def clear_cache_fnodes(repo):
921 920 """function used by the perf extension to clear "file node cache"""
922 921 repo.cachevfs.tryunlink(_filename(repo))
923 922
924 923
925 924 def forget_fnodes(repo, revs):
926 925 """function used by the perf extension to prune some entries from the fnodes
927 926 cache"""
928 927 missing_1 = b'\xff' * 4
929 928 missing_2 = b'\xff' * 20
930 929 cache = hgtagsfnodescache(repo.unfiltered())
931 930 for r in revs:
932 931 cache._writeentry(r * _fnodesrecsize, missing_1, missing_2)
933 932 cache.write()
General Comments 0
You need to be logged in to leave comments. Login now