##// END OF EJS Templates
tags: improve documentation...
Gregory Szorc -
r24445:c71edbaf default
parent child Browse files
Show More
@@ -1,348 +1,391
1 1 # tags.py - read tag info from local repository
2 2 #
3 3 # Copyright 2009 Matt Mackall <mpm@selenic.com>
4 4 # Copyright 2009 Greg Ward <greg@gerg.ca>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 # Currently this module only deals with reading and caching tags.
10 10 # Eventually, it could take care of updating (adding/removing/moving)
11 11 # tags too.
12 12
13 13 from node import nullid, bin, hex, short
14 14 from i18n import _
15 15 import util
16 16 import encoding
17 17 import error
18 18 import errno
19 19 import time
20 20
21 # The tags cache stores information about heads and the history of tags.
22 #
23 # The cache file consists of two parts. The first part maps head nodes
24 # to .hgtags filenodes. The second part is a history of tags. The two
25 # parts are separated by an empty line.
26 #
27 # The first part consists of lines of the form:
28 #
29 # <headrev> <headnode> [<hgtagsnode>]
30 #
31 # <headrev> is an integer revision and <headnode> is a 40 character hex
32 # node for that changeset. These redundantly identify a repository
33 # head from the time the cache was written.
34 #
35 # <tagnode> is the filenode of .hgtags on that head. Heads with no .hgtags
36 # file will have no <hgtagsnode> (just 2 values per line).
37 #
38 # The filenode cache is ordered from tip to oldest (which is part of why
39 # <headrev> is there: a quick check of the tip from when the cache was
40 # written against the current tip is all that is needed to check whether
41 # the cache is up to date).
42 #
43 # The purpose of the filenode cache is to avoid the most expensive part
44 # of finding global tags, which is looking up the .hgtags filenode in the
45 # manifest for each head. This can take over a minute on repositories
46 # that have large manifests and many heads.
47 #
48 # The second part of the tags cache consists of lines of the form:
49 #
50 # <node> <tag>
51 #
52 # (This format is identical to that of .hgtags files.)
53 #
54 # <tag> is the tag name and <node> is the 40 character hex changeset
55 # the tag is associated with.
56 #
57 # Tags are written sorted by tag name.
58 #
59 # Tags associated with multiple changesets have an entry for each changeset.
60 # The most recent changeset (in terms of revlog ordering for the head
61 # setting it) for each tag is last.
62
21 63 def findglobaltags(ui, repo, alltags, tagtypes):
22 '''Find global tags in repo by reading .hgtags from every head that
23 has a distinct version of it, using a cache to avoid excess work.
24 Updates the dicts alltags, tagtypes in place: alltags maps tag name
25 to (node, hist) pair (see _readtags() below), and tagtypes maps tag
26 name to tag type ("global" in this case).'''
64 '''Find global tags in a repo.
65
66 "alltags" maps tag name to (node, hist) 2-tuples.
67
68 "tagtypes" maps tag name to tag type. Global tags always have the
69 "global" tag type.
70
71 The "alltags" and "tagtypes" dicts are updated in place. Empty dicts
72 should be passed in.
73
74 The tags cache is read and updated as a side-effect of calling.
75 '''
27 76 # This is so we can be lazy and assume alltags contains only global
28 77 # tags when we pass it to _writetagcache().
29 78 assert len(alltags) == len(tagtypes) == 0, \
30 79 "findglobaltags() should be called first"
31 80
32 81 (heads, tagfnode, cachetags, shouldwrite) = _readtagcache(ui, repo)
33 82 if cachetags is not None:
34 83 assert not shouldwrite
35 84 # XXX is this really 100% correct? are there oddball special
36 85 # cases where a global tag should outrank a local tag but won't,
37 86 # because cachetags does not contain rank info?
38 87 _updatetags(cachetags, 'global', alltags, tagtypes)
39 88 return
40 89
41 seen = set() # set of fnode
90 seen = set() # set of fnode
42 91 fctx = None
43 for head in reversed(heads): # oldest to newest
92 for head in reversed(heads): # oldest to newest
44 93 assert head in repo.changelog.nodemap, \
45 94 "tag cache returned bogus head %s" % short(head)
46 95
47 96 fnode = tagfnode.get(head)
48 97 if fnode and fnode not in seen:
49 98 seen.add(fnode)
50 99 if not fctx:
51 100 fctx = repo.filectx('.hgtags', fileid=fnode)
52 101 else:
53 102 fctx = fctx.filectx(fnode)
54 103
55 104 filetags = _readtags(ui, repo, fctx.data().splitlines(), fctx)
56 105 _updatetags(filetags, 'global', alltags, tagtypes)
57 106
58 107 # and update the cache (if necessary)
59 108 if shouldwrite:
60 109 _writetagcache(ui, repo, heads, tagfnode, alltags)
61 110
62 111 def readlocaltags(ui, repo, alltags, tagtypes):
63 '''Read local tags in repo. Update alltags and tagtypes.'''
112 '''Read local tags in repo. Update alltags and tagtypes.'''
64 113 try:
65 114 data = repo.vfs.read("localtags")
66 115 except IOError, inst:
67 116 if inst.errno != errno.ENOENT:
68 117 raise
69 118 return
70 119
71 120 # localtags is in the local encoding; re-encode to UTF-8 on
72 121 # input for consistency with the rest of this module.
73 122 filetags = _readtags(
74 123 ui, repo, data.splitlines(), "localtags",
75 124 recode=encoding.fromlocal)
76 125
77 126 # remove tags pointing to invalid nodes
78 127 cl = repo.changelog
79 128 for t in filetags.keys():
80 129 try:
81 130 cl.rev(filetags[t][0])
82 131 except (LookupError, ValueError):
83 132 del filetags[t]
84 133
85 134 _updatetags(filetags, "local", alltags, tagtypes)
86 135
87 136 def _readtaghist(ui, repo, lines, fn, recode=None, calcnodelines=False):
88 137 '''Read tag definitions from a file (or any source of lines).
138
89 139 This function returns two sortdicts with similar information:
140
90 141 - the first dict, bintaghist, contains the tag information as expected by
91 142 the _readtags function, i.e. a mapping from tag name to (node, hist):
92 143 - node is the node id from the last line read for that name,
93 144 - hist is the list of node ids previously associated with it (in file
94 order). All node ids are binary, not hex.
145 order). All node ids are binary, not hex.
146
95 147 - the second dict, hextaglines, is a mapping from tag name to a list of
96 148 [hexnode, line number] pairs, ordered from the oldest to the newest node.
149
97 150 When calcnodelines is False the hextaglines dict is not calculated (an
98 151 empty dict is returned). This is done to improve this function's
99 152 performance in cases where the line numbers are not needed.
100 153 '''
101 154
102 155 bintaghist = util.sortdict()
103 156 hextaglines = util.sortdict()
104 157 count = 0
105 158
106 159 def warn(msg):
107 160 ui.warn(_("%s, line %s: %s\n") % (fn, count, msg))
108 161
109 162 for nline, line in enumerate(lines):
110 163 count += 1
111 164 if not line:
112 165 continue
113 166 try:
114 167 (nodehex, name) = line.split(" ", 1)
115 168 except ValueError:
116 169 warn(_("cannot parse entry"))
117 170 continue
118 171 name = name.strip()
119 172 if recode:
120 173 name = recode(name)
121 174 try:
122 175 nodebin = bin(nodehex)
123 176 except TypeError:
124 177 warn(_("node '%s' is not well formed") % nodehex)
125 178 continue
126 179
127 180 # update filetags
128 181 if calcnodelines:
129 182 # map tag name to a list of line numbers
130 183 if name not in hextaglines:
131 184 hextaglines[name] = []
132 185 hextaglines[name].append([nodehex, nline])
133 186 continue
134 187 # map tag name to (node, hist)
135 188 if name not in bintaghist:
136 189 bintaghist[name] = []
137 190 bintaghist[name].append(nodebin)
138 191 return bintaghist, hextaglines
139 192
140 193 def _readtags(ui, repo, lines, fn, recode=None, calcnodelines=False):
141 194 '''Read tag definitions from a file (or any source of lines).
142 Return a mapping from tag name to (node, hist): node is the node id
143 from the last line read for that name, and hist is the list of node
144 ids previously associated with it (in file order). All node ids are
145 binary, not hex.'''
195
196 Returns a mapping from tag name to (node, hist).
197
198 "node" is the node id from the last line read for that name. "hist"
199 is the list of node ids previously associated with it (in file order).
200 All node ids are binary, not hex.
201 '''
146 202 filetags, nodelines = _readtaghist(ui, repo, lines, fn, recode=recode,
147 203 calcnodelines=calcnodelines)
148 204 for tag, taghist in filetags.items():
149 205 filetags[tag] = (taghist[-1], taghist[:-1])
150 206 return filetags
151 207
152 208 def _updatetags(filetags, tagtype, alltags, tagtypes):
153 209 '''Incorporate the tag info read from one file into the two
154 210 dictionaries, alltags and tagtypes, that contain all tag
155 211 info (global across all heads plus local).'''
156 212
157 213 for name, nodehist in filetags.iteritems():
158 214 if name not in alltags:
159 215 alltags[name] = nodehist
160 216 tagtypes[name] = tagtype
161 217 continue
162 218
163 219 # we prefer alltags[name] if:
164 220 # it supersedes us OR
165 221 # mutual supersedes and it has a higher rank
166 222 # otherwise we win because we're tip-most
167 223 anode, ahist = nodehist
168 224 bnode, bhist = alltags[name]
169 225 if (bnode != anode and anode in bhist and
170 226 (bnode not in ahist or len(bhist) > len(ahist))):
171 227 anode = bnode
172 228 else:
173 229 tagtypes[name] = tagtype
174 230 ahist.extend([n for n in bhist if n not in ahist])
175 231 alltags[name] = anode, ahist
176 232
233 def _readtagcache(ui, repo):
234 '''Read the tag cache.
177 235
178 # The tag cache only stores info about heads, not the tag contents
179 # from each head. I.e. it doesn't try to squeeze out the maximum
180 # performance, but is simpler has a better chance of actually
181 # working correctly. And this gives the biggest performance win: it
182 # avoids looking up .hgtags in the manifest for every head, and it
183 # can avoid calling heads() at all if there have been no changes to
184 # the repo.
236 Returns a tuple (heads, fnodes, cachetags, shouldwrite).
237
238 If the cache is completely up-to-date, "cachetags" is a dict of the
239 form returned by _readtags() and "heads" and "fnodes" are None and
240 "shouldwrite" is False.
185 241
186 def _readtagcache(ui, repo):
187 '''Read the tag cache and return a tuple (heads, fnodes, cachetags,
188 shouldwrite). If the cache is completely up-to-date, cachetags is a
189 dict of the form returned by _readtags(); otherwise, it is None and
190 heads and fnodes are set. In that case, heads is the list of all
191 heads currently in the repository (ordered from tip to oldest) and
192 fnodes is a mapping from head to .hgtags filenode. If those two are
193 set, caller is responsible for reading tag info from each head.'''
242 If the cache is not up to date, "cachetags" is None. "heads" is a list
243 of all heads currently in the repository, ordered from tip to oldest.
244 "fnodes" is a mapping from head to .hgtags filenode. "shouldwrite" is
245 True.
246
247 If the cache is not up to date, the caller is responsible for reading tag
248 info from each returned head. (See findglobaltags().)
249 '''
194 250
195 251 try:
196 252 cachefile = repo.vfs('cache/tags', 'r')
197 253 # force reading the file for static-http
198 254 cachelines = iter(cachefile)
199 255 except IOError:
200 256 cachefile = None
201 257
202 # The cache file consists of lines like
203 # <headrev> <headnode> [<tagnode>]
204 # where <headrev> and <headnode> redundantly identify a repository
205 # head from the time the cache was written, and <tagnode> is the
206 # filenode of .hgtags on that head. Heads with no .hgtags file will
207 # have no <tagnode>. The cache is ordered from tip to oldest (which
208 # is part of why <headrev> is there: a quick visual check is all
209 # that's required to ensure correct order).
210 #
211 # This information is enough to let us avoid the most expensive part
212 # of finding global tags, which is looking up <tagnode> in the
213 # manifest for each head.
214 cacherevs = [] # list of headrev
215 cacheheads = [] # list of headnode
216 cachefnode = {} # map headnode to filenode
258 cacherevs = [] # list of headrev
259 cacheheads = [] # list of headnode
260 cachefnode = {} # map headnode to filenode
217 261 if cachefile:
218 262 try:
219 263 for line in cachelines:
220 264 if line == "\n":
221 265 break
222 266 line = line.split()
223 267 cacherevs.append(int(line[0]))
224 268 headnode = bin(line[1])
225 269 cacheheads.append(headnode)
226 270 if len(line) == 3:
227 271 fnode = bin(line[2])
228 272 cachefnode[headnode] = fnode
229 273 except Exception:
230 274 # corruption of the tags cache, just recompute it
231 275 ui.warn(_('.hg/cache/tags is corrupt, rebuilding it\n'))
232 276 cacheheads = []
233 277 cacherevs = []
234 278 cachefnode = {}
235 279
236 280 tipnode = repo.changelog.tip()
237 281 tiprev = len(repo.changelog) - 1
238 282
239 283 # Case 1 (common): tip is the same, so nothing has changed.
240 284 # (Unchanged tip trivially means no changesets have been added.
241 285 # But, thanks to localrepository.destroyed(), it also means none
242 286 # have been destroyed by strip or rollback.)
243 287 if cacheheads and cacheheads[0] == tipnode and cacherevs[0] == tiprev:
244 288 tags = _readtags(ui, repo, cachelines, cachefile.name)
245 289 cachefile.close()
246 290 return (None, None, tags, False)
247 291 if cachefile:
248 292 cachefile.close() # ignore rest of file
249 293
250 294 repoheads = repo.heads()
251 295 # Case 2 (uncommon): empty repo; get out quickly and don't bother
252 296 # writing an empty cache.
253 297 if repoheads == [nullid]:
254 298 return ([], {}, {}, False)
255 299
256 300 # Case 3 (uncommon): cache file missing or empty.
257 301
258 302 # Case 4 (uncommon): tip rev decreased. This should only happen
259 303 # when we're called from localrepository.destroyed(). Refresh the
260 304 # cache so future invocations will not see disappeared heads in the
261 305 # cache.
262 306
263 307 # Case 5 (common): tip has changed, so we've added/replaced heads.
264 308
265 309 # As it happens, the code to handle cases 3, 4, 5 is the same.
266 310
267 311 # N.B. in case 4 (nodes destroyed), "new head" really means "newly
268 312 # exposed".
269 313 if not len(repo.file('.hgtags')):
270 314 # No tags have ever been committed, so we can avoid a
271 315 # potentially expensive search.
272 316 return (repoheads, cachefnode, None, True)
273 317
274 318 starttime = time.time()
275 319
276 320 newheads = [head
277 321 for head in repoheads
278 322 if head not in set(cacheheads)]
279 323
280 324 # Now we have to lookup the .hgtags filenode for every new head.
281 325 # This is the most expensive part of finding tags, so performance
282 326 # depends primarily on the size of newheads. Worst case: no cache
283 327 # file, so newheads == repoheads.
284 328 for head in reversed(newheads):
285 329 cctx = repo[head]
286 330 try:
287 331 fnode = cctx.filenode('.hgtags')
288 332 cachefnode[head] = fnode
289 333 except error.LookupError:
290 334 # no .hgtags file on this head
291 335 pass
292 336
293 337 duration = time.time() - starttime
294 338 ui.log('tagscache',
295 339 'resolved %d tags cache entries from %d manifests in %0.4f '
296 340 'seconds\n',
297 341 len(cachefnode), len(newheads), duration)
298 342
299 343 # Caller has to iterate over all heads, but can use the filenodes in
300 344 # cachefnode to get to each .hgtags revision quickly.
301 345 return (repoheads, cachefnode, None, True)
302 346
303 347 def _writetagcache(ui, repo, heads, tagfnode, cachetags):
304
305 348 try:
306 349 cachefile = repo.vfs('cache/tags', 'w', atomictemp=True)
307 350 except (OSError, IOError):
308 351 return
309 352
310 353 ui.log('tagscache', 'writing tags cache file with %d heads and %d tags\n',
311 354 len(heads), len(cachetags))
312 355
313 356 realheads = repo.heads() # for sanity checks below
314 357 for head in heads:
315 358 # temporary sanity checks; these can probably be removed
316 359 # once this code has been in crew for a few weeks
317 360 assert head in repo.changelog.nodemap, \
318 361 'trying to write non-existent node %s to tag cache' % short(head)
319 362 assert head in realheads, \
320 363 'trying to write non-head %s to tag cache' % short(head)
321 364 assert head != nullid, \
322 365 'trying to write nullid to tag cache'
323 366
324 367 # This can't fail because of the first assert above. When/if we
325 368 # remove that assert, we might want to catch LookupError here
326 369 # and downgrade it to a warning.
327 370 rev = repo.changelog.rev(head)
328 371
329 372 fnode = tagfnode.get(head)
330 373 if fnode:
331 374 cachefile.write('%d %s %s\n' % (rev, hex(head), hex(fnode)))
332 375 else:
333 376 cachefile.write('%d %s\n' % (rev, hex(head)))
334 377
335 378 # Tag names in the cache are in UTF-8 -- which is the whole reason
336 379 # we keep them in UTF-8 throughout this module. If we converted
337 380 # them local encoding on input, we would lose info writing them to
338 381 # the cache.
339 382 cachefile.write('\n')
340 383 for (name, (node, hist)) in sorted(cachetags.iteritems()):
341 384 for n in hist:
342 385 cachefile.write("%s %s\n" % (hex(n), name))
343 386 cachefile.write("%s %s\n" % (hex(node), name))
344 387
345 388 try:
346 389 cachefile.close()
347 390 except (OSError, IOError):
348 391 pass
General Comments 0
You need to be logged in to leave comments. Login now