##// END OF EJS Templates
tags: write tags cache deterministically...
Gregory Szorc -
r24143:7b09dbbb default
parent child Browse files
Show More
@@ -1,348 +1,348 b''
1 1 # tags.py - read tag info from local repository
2 2 #
3 3 # Copyright 2009 Matt Mackall <mpm@selenic.com>
4 4 # Copyright 2009 Greg Ward <greg@gerg.ca>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 # Currently this module only deals with reading and caching tags.
10 10 # Eventually, it could take care of updating (adding/removing/moving)
11 11 # tags too.
12 12
13 13 from node import nullid, bin, hex, short
14 14 from i18n import _
15 15 import util
16 16 import encoding
17 17 import error
18 18 import errno
19 19 import time
20 20
21 21 def findglobaltags(ui, repo, alltags, tagtypes):
22 22 '''Find global tags in repo by reading .hgtags from every head that
23 23 has a distinct version of it, using a cache to avoid excess work.
24 24 Updates the dicts alltags, tagtypes in place: alltags maps tag name
25 25 to (node, hist) pair (see _readtags() below), and tagtypes maps tag
26 26 name to tag type ("global" in this case).'''
27 27 # This is so we can be lazy and assume alltags contains only global
28 28 # tags when we pass it to _writetagcache().
29 29 assert len(alltags) == len(tagtypes) == 0, \
30 30 "findglobaltags() should be called first"
31 31
32 32 (heads, tagfnode, cachetags, shouldwrite) = _readtagcache(ui, repo)
33 33 if cachetags is not None:
34 34 assert not shouldwrite
35 35 # XXX is this really 100% correct? are there oddball special
36 36 # cases where a global tag should outrank a local tag but won't,
37 37 # because cachetags does not contain rank info?
38 38 _updatetags(cachetags, 'global', alltags, tagtypes)
39 39 return
40 40
41 41 seen = set() # set of fnode
42 42 fctx = None
43 43 for head in reversed(heads): # oldest to newest
44 44 assert head in repo.changelog.nodemap, \
45 45 "tag cache returned bogus head %s" % short(head)
46 46
47 47 fnode = tagfnode.get(head)
48 48 if fnode and fnode not in seen:
49 49 seen.add(fnode)
50 50 if not fctx:
51 51 fctx = repo.filectx('.hgtags', fileid=fnode)
52 52 else:
53 53 fctx = fctx.filectx(fnode)
54 54
55 55 filetags = _readtags(ui, repo, fctx.data().splitlines(), fctx)
56 56 _updatetags(filetags, 'global', alltags, tagtypes)
57 57
58 58 # and update the cache (if necessary)
59 59 if shouldwrite:
60 60 _writetagcache(ui, repo, heads, tagfnode, alltags)
61 61
62 62 def readlocaltags(ui, repo, alltags, tagtypes):
63 63 '''Read local tags in repo. Update alltags and tagtypes.'''
64 64 try:
65 65 data = repo.vfs.read("localtags")
66 66 except IOError, inst:
67 67 if inst.errno != errno.ENOENT:
68 68 raise
69 69 return
70 70
71 71 # localtags is in the local encoding; re-encode to UTF-8 on
72 72 # input for consistency with the rest of this module.
73 73 filetags = _readtags(
74 74 ui, repo, data.splitlines(), "localtags",
75 75 recode=encoding.fromlocal)
76 76
77 77 # remove tags pointing to invalid nodes
78 78 cl = repo.changelog
79 79 for t in filetags.keys():
80 80 try:
81 81 cl.rev(filetags[t][0])
82 82 except (LookupError, ValueError):
83 83 del filetags[t]
84 84
85 85 _updatetags(filetags, "local", alltags, tagtypes)
86 86
87 87 def _readtaghist(ui, repo, lines, fn, recode=None, calcnodelines=False):
88 88 '''Read tag definitions from a file (or any source of lines).
89 89 This function returns two sortdicts with similar information:
90 90 - the first dict, bintaghist, contains the tag information as expected by
91 91 the _readtags function, i.e. a mapping from tag name to (node, hist):
92 92 - node is the node id from the last line read for that name,
93 93 - hist is the list of node ids previously associated with it (in file
94 94 order). All node ids are binary, not hex.
95 95 - the second dict, hextaglines, is a mapping from tag name to a list of
96 96 [hexnode, line number] pairs, ordered from the oldest to the newest node.
97 97 When calcnodelines is False the hextaglines dict is not calculated (an
98 98 empty dict is returned). This is done to improve this function's
99 99 performance in cases where the line numbers are not needed.
100 100 '''
101 101
102 102 bintaghist = util.sortdict()
103 103 hextaglines = util.sortdict()
104 104 count = 0
105 105
106 106 def warn(msg):
107 107 ui.warn(_("%s, line %s: %s\n") % (fn, count, msg))
108 108
109 109 for nline, line in enumerate(lines):
110 110 count += 1
111 111 if not line:
112 112 continue
113 113 try:
114 114 (nodehex, name) = line.split(" ", 1)
115 115 except ValueError:
116 116 warn(_("cannot parse entry"))
117 117 continue
118 118 name = name.strip()
119 119 if recode:
120 120 name = recode(name)
121 121 try:
122 122 nodebin = bin(nodehex)
123 123 except TypeError:
124 124 warn(_("node '%s' is not well formed") % nodehex)
125 125 continue
126 126
127 127 # update filetags
128 128 if calcnodelines:
129 129 # map tag name to a list of line numbers
130 130 if name not in hextaglines:
131 131 hextaglines[name] = []
132 132 hextaglines[name].append([nodehex, nline])
133 133 continue
134 134 # map tag name to (node, hist)
135 135 if name not in bintaghist:
136 136 bintaghist[name] = []
137 137 bintaghist[name].append(nodebin)
138 138 return bintaghist, hextaglines
139 139
140 140 def _readtags(ui, repo, lines, fn, recode=None, calcnodelines=False):
141 141 '''Read tag definitions from a file (or any source of lines).
142 142 Return a mapping from tag name to (node, hist): node is the node id
143 143 from the last line read for that name, and hist is the list of node
144 144 ids previously associated with it (in file order). All node ids are
145 145 binary, not hex.'''
146 146 filetags, nodelines = _readtaghist(ui, repo, lines, fn, recode=recode,
147 147 calcnodelines=calcnodelines)
148 148 for tag, taghist in filetags.items():
149 149 filetags[tag] = (taghist[-1], taghist[:-1])
150 150 return filetags
151 151
152 152 def _updatetags(filetags, tagtype, alltags, tagtypes):
153 153 '''Incorporate the tag info read from one file into the two
154 154 dictionaries, alltags and tagtypes, that contain all tag
155 155 info (global across all heads plus local).'''
156 156
157 157 for name, nodehist in filetags.iteritems():
158 158 if name not in alltags:
159 159 alltags[name] = nodehist
160 160 tagtypes[name] = tagtype
161 161 continue
162 162
163 163 # we prefer alltags[name] if:
164 164 # it supersedes us OR
165 165 # mutual supersedes and it has a higher rank
166 166 # otherwise we win because we're tip-most
167 167 anode, ahist = nodehist
168 168 bnode, bhist = alltags[name]
169 169 if (bnode != anode and anode in bhist and
170 170 (bnode not in ahist or len(bhist) > len(ahist))):
171 171 anode = bnode
172 172 else:
173 173 tagtypes[name] = tagtype
174 174 ahist.extend([n for n in bhist if n not in ahist])
175 175 alltags[name] = anode, ahist
176 176
177 177
178 178 # The tag cache only stores info about heads, not the tag contents
179 179 # from each head. I.e. it doesn't try to squeeze out the maximum
180 180 # performance, but is simpler has a better chance of actually
181 181 # working correctly. And this gives the biggest performance win: it
182 182 # avoids looking up .hgtags in the manifest for every head, and it
183 183 # can avoid calling heads() at all if there have been no changes to
184 184 # the repo.
185 185
186 186 def _readtagcache(ui, repo):
187 187 '''Read the tag cache and return a tuple (heads, fnodes, cachetags,
188 188 shouldwrite). If the cache is completely up-to-date, cachetags is a
189 189 dict of the form returned by _readtags(); otherwise, it is None and
190 190 heads and fnodes are set. In that case, heads is the list of all
191 191 heads currently in the repository (ordered from tip to oldest) and
192 192 fnodes is a mapping from head to .hgtags filenode. If those two are
193 193 set, caller is responsible for reading tag info from each head.'''
194 194
195 195 try:
196 196 cachefile = repo.vfs('cache/tags', 'r')
197 197 # force reading the file for static-http
198 198 cachelines = iter(cachefile)
199 199 except IOError:
200 200 cachefile = None
201 201
202 202 # The cache file consists of lines like
203 203 # <headrev> <headnode> [<tagnode>]
204 204 # where <headrev> and <headnode> redundantly identify a repository
205 205 # head from the time the cache was written, and <tagnode> is the
206 206 # filenode of .hgtags on that head. Heads with no .hgtags file will
207 207 # have no <tagnode>. The cache is ordered from tip to oldest (which
208 208 # is part of why <headrev> is there: a quick visual check is all
209 209 # that's required to ensure correct order).
210 210 #
211 211 # This information is enough to let us avoid the most expensive part
212 212 # of finding global tags, which is looking up <tagnode> in the
213 213 # manifest for each head.
214 214 cacherevs = [] # list of headrev
215 215 cacheheads = [] # list of headnode
216 216 cachefnode = {} # map headnode to filenode
217 217 if cachefile:
218 218 try:
219 219 for line in cachelines:
220 220 if line == "\n":
221 221 break
222 222 line = line.split()
223 223 cacherevs.append(int(line[0]))
224 224 headnode = bin(line[1])
225 225 cacheheads.append(headnode)
226 226 if len(line) == 3:
227 227 fnode = bin(line[2])
228 228 cachefnode[headnode] = fnode
229 229 except Exception:
230 230 # corruption of the tags cache, just recompute it
231 231 ui.warn(_('.hg/cache/tags is corrupt, rebuilding it\n'))
232 232 cacheheads = []
233 233 cacherevs = []
234 234 cachefnode = {}
235 235
236 236 tipnode = repo.changelog.tip()
237 237 tiprev = len(repo.changelog) - 1
238 238
239 239 # Case 1 (common): tip is the same, so nothing has changed.
240 240 # (Unchanged tip trivially means no changesets have been added.
241 241 # But, thanks to localrepository.destroyed(), it also means none
242 242 # have been destroyed by strip or rollback.)
243 243 if cacheheads and cacheheads[0] == tipnode and cacherevs[0] == tiprev:
244 244 tags = _readtags(ui, repo, cachelines, cachefile.name)
245 245 cachefile.close()
246 246 return (None, None, tags, False)
247 247 if cachefile:
248 248 cachefile.close() # ignore rest of file
249 249
250 250 repoheads = repo.heads()
251 251 # Case 2 (uncommon): empty repo; get out quickly and don't bother
252 252 # writing an empty cache.
253 253 if repoheads == [nullid]:
254 254 return ([], {}, {}, False)
255 255
256 256 # Case 3 (uncommon): cache file missing or empty.
257 257
258 258 # Case 4 (uncommon): tip rev decreased. This should only happen
259 259 # when we're called from localrepository.destroyed(). Refresh the
260 260 # cache so future invocations will not see disappeared heads in the
261 261 # cache.
262 262
263 263 # Case 5 (common): tip has changed, so we've added/replaced heads.
264 264
265 265 # As it happens, the code to handle cases 3, 4, 5 is the same.
266 266
267 267 # N.B. in case 4 (nodes destroyed), "new head" really means "newly
268 268 # exposed".
269 269 if not len(repo.file('.hgtags')):
270 270 # No tags have ever been committed, so we can avoid a
271 271 # potentially expensive search.
272 272 return (repoheads, cachefnode, None, True)
273 273
274 274 starttime = time.time()
275 275
276 276 newheads = [head
277 277 for head in repoheads
278 278 if head not in set(cacheheads)]
279 279
280 280 # Now we have to lookup the .hgtags filenode for every new head.
281 281 # This is the most expensive part of finding tags, so performance
282 282 # depends primarily on the size of newheads. Worst case: no cache
283 283 # file, so newheads == repoheads.
284 284 for head in reversed(newheads):
285 285 cctx = repo[head]
286 286 try:
287 287 fnode = cctx.filenode('.hgtags')
288 288 cachefnode[head] = fnode
289 289 except error.LookupError:
290 290 # no .hgtags file on this head
291 291 pass
292 292
293 293 duration = time.time() - starttime
294 294 ui.log('tagscache',
295 295 'resolved %d tags cache entries from %d manifests in %0.4f '
296 296 'seconds\n',
297 297 len(cachefnode), len(newheads), duration)
298 298
299 299 # Caller has to iterate over all heads, but can use the filenodes in
300 300 # cachefnode to get to each .hgtags revision quickly.
301 301 return (repoheads, cachefnode, None, True)
302 302
303 303 def _writetagcache(ui, repo, heads, tagfnode, cachetags):
304 304
305 305 try:
306 306 cachefile = repo.vfs('cache/tags', 'w', atomictemp=True)
307 307 except (OSError, IOError):
308 308 return
309 309
310 310 ui.log('tagscache', 'writing tags cache file with %d heads and %d tags\n',
311 311 len(heads), len(cachetags))
312 312
313 313 realheads = repo.heads() # for sanity checks below
314 314 for head in heads:
315 315 # temporary sanity checks; these can probably be removed
316 316 # once this code has been in crew for a few weeks
317 317 assert head in repo.changelog.nodemap, \
318 318 'trying to write non-existent node %s to tag cache' % short(head)
319 319 assert head in realheads, \
320 320 'trying to write non-head %s to tag cache' % short(head)
321 321 assert head != nullid, \
322 322 'trying to write nullid to tag cache'
323 323
324 324 # This can't fail because of the first assert above. When/if we
325 325 # remove that assert, we might want to catch LookupError here
326 326 # and downgrade it to a warning.
327 327 rev = repo.changelog.rev(head)
328 328
329 329 fnode = tagfnode.get(head)
330 330 if fnode:
331 331 cachefile.write('%d %s %s\n' % (rev, hex(head), hex(fnode)))
332 332 else:
333 333 cachefile.write('%d %s\n' % (rev, hex(head)))
334 334
335 335 # Tag names in the cache are in UTF-8 -- which is the whole reason
336 336 # we keep them in UTF-8 throughout this module. If we converted
337 337 # them local encoding on input, we would lose info writing them to
338 338 # the cache.
339 339 cachefile.write('\n')
340 for (name, (node, hist)) in cachetags.iteritems():
340 for (name, (node, hist)) in sorted(cachetags.iteritems()):
341 341 for n in hist:
342 342 cachefile.write("%s %s\n" % (hex(n), name))
343 343 cachefile.write("%s %s\n" % (hex(node), name))
344 344
345 345 try:
346 346 cachefile.close()
347 347 except (OSError, IOError):
348 348 pass
General Comments 0
You need to be logged in to leave comments. Login now