##// END OF EJS Templates
tags: read tag info into a sorted dict (rather than into a regular dict)...
Angel Ezquerra -
r21814:5125856a default
parent child Browse files
Show More
@@ -1,312 +1,313 b''
1 # tags.py - read tag info from local repository
1 # tags.py - read tag info from local repository
2 #
2 #
3 # Copyright 2009 Matt Mackall <mpm@selenic.com>
3 # Copyright 2009 Matt Mackall <mpm@selenic.com>
4 # Copyright 2009 Greg Ward <greg@gerg.ca>
4 # Copyright 2009 Greg Ward <greg@gerg.ca>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 # Currently this module only deals with reading and caching tags.
9 # Currently this module only deals with reading and caching tags.
10 # Eventually, it could take care of updating (adding/removing/moving)
10 # Eventually, it could take care of updating (adding/removing/moving)
11 # tags too.
11 # tags too.
12
12
13 from node import nullid, bin, hex, short
13 from node import nullid, bin, hex, short
14 from i18n import _
14 from i18n import _
15 import util
15 import encoding
16 import encoding
16 import error
17 import error
17 import errno
18 import errno
18 import time
19 import time
19
20
20 def findglobaltags(ui, repo, alltags, tagtypes):
21 def findglobaltags(ui, repo, alltags, tagtypes):
21 '''Find global tags in repo by reading .hgtags from every head that
22 '''Find global tags in repo by reading .hgtags from every head that
22 has a distinct version of it, using a cache to avoid excess work.
23 has a distinct version of it, using a cache to avoid excess work.
23 Updates the dicts alltags, tagtypes in place: alltags maps tag name
24 Updates the dicts alltags, tagtypes in place: alltags maps tag name
24 to (node, hist) pair (see _readtags() below), and tagtypes maps tag
25 to (node, hist) pair (see _readtags() below), and tagtypes maps tag
25 name to tag type ("global" in this case).'''
26 name to tag type ("global" in this case).'''
26 # This is so we can be lazy and assume alltags contains only global
27 # This is so we can be lazy and assume alltags contains only global
27 # tags when we pass it to _writetagcache().
28 # tags when we pass it to _writetagcache().
28 assert len(alltags) == len(tagtypes) == 0, \
29 assert len(alltags) == len(tagtypes) == 0, \
29 "findglobaltags() should be called first"
30 "findglobaltags() should be called first"
30
31
31 (heads, tagfnode, cachetags, shouldwrite) = _readtagcache(ui, repo)
32 (heads, tagfnode, cachetags, shouldwrite) = _readtagcache(ui, repo)
32 if cachetags is not None:
33 if cachetags is not None:
33 assert not shouldwrite
34 assert not shouldwrite
34 # XXX is this really 100% correct? are there oddball special
35 # XXX is this really 100% correct? are there oddball special
35 # cases where a global tag should outrank a local tag but won't,
36 # cases where a global tag should outrank a local tag but won't,
36 # because cachetags does not contain rank info?
37 # because cachetags does not contain rank info?
37 _updatetags(cachetags, 'global', alltags, tagtypes)
38 _updatetags(cachetags, 'global', alltags, tagtypes)
38 return
39 return
39
40
40 seen = set() # set of fnode
41 seen = set() # set of fnode
41 fctx = None
42 fctx = None
42 for head in reversed(heads): # oldest to newest
43 for head in reversed(heads): # oldest to newest
43 assert head in repo.changelog.nodemap, \
44 assert head in repo.changelog.nodemap, \
44 "tag cache returned bogus head %s" % short(head)
45 "tag cache returned bogus head %s" % short(head)
45
46
46 fnode = tagfnode.get(head)
47 fnode = tagfnode.get(head)
47 if fnode and fnode not in seen:
48 if fnode and fnode not in seen:
48 seen.add(fnode)
49 seen.add(fnode)
49 if not fctx:
50 if not fctx:
50 fctx = repo.filectx('.hgtags', fileid=fnode)
51 fctx = repo.filectx('.hgtags', fileid=fnode)
51 else:
52 else:
52 fctx = fctx.filectx(fnode)
53 fctx = fctx.filectx(fnode)
53
54
54 filetags = _readtags(ui, repo, fctx.data().splitlines(), fctx)
55 filetags = _readtags(ui, repo, fctx.data().splitlines(), fctx)
55 _updatetags(filetags, 'global', alltags, tagtypes)
56 _updatetags(filetags, 'global', alltags, tagtypes)
56
57
57 # and update the cache (if necessary)
58 # and update the cache (if necessary)
58 if shouldwrite:
59 if shouldwrite:
59 _writetagcache(ui, repo, heads, tagfnode, alltags)
60 _writetagcache(ui, repo, heads, tagfnode, alltags)
60
61
61 def readlocaltags(ui, repo, alltags, tagtypes):
62 def readlocaltags(ui, repo, alltags, tagtypes):
62 '''Read local tags in repo. Update alltags and tagtypes.'''
63 '''Read local tags in repo. Update alltags and tagtypes.'''
63 try:
64 try:
64 data = repo.opener.read("localtags")
65 data = repo.opener.read("localtags")
65 except IOError, inst:
66 except IOError, inst:
66 if inst.errno != errno.ENOENT:
67 if inst.errno != errno.ENOENT:
67 raise
68 raise
68 return
69 return
69
70
70 # localtags is in the local encoding; re-encode to UTF-8 on
71 # localtags is in the local encoding; re-encode to UTF-8 on
71 # input for consistency with the rest of this module.
72 # input for consistency with the rest of this module.
72 filetags = _readtags(
73 filetags = _readtags(
73 ui, repo, data.splitlines(), "localtags",
74 ui, repo, data.splitlines(), "localtags",
74 recode=encoding.fromlocal)
75 recode=encoding.fromlocal)
75 _updatetags(filetags, "local", alltags, tagtypes)
76 _updatetags(filetags, "local", alltags, tagtypes)
76
77
77 def _readtags(ui, repo, lines, fn, recode=None):
78 def _readtags(ui, repo, lines, fn, recode=None):
78 '''Read tag definitions from a file (or any source of lines).
79 '''Read tag definitions from a file (or any source of lines).
79 Return a mapping from tag name to (node, hist): node is the node id
80 Return a mapping from tag name to (node, hist): node is the node id
80 from the last line read for that name, and hist is the list of node
81 from the last line read for that name, and hist is the list of node
81 ids previously associated with it (in file order). All node ids are
82 ids previously associated with it (in file order). All node ids are
82 binary, not hex.'''
83 binary, not hex.'''
83
84
84 filetags = {} # map tag name to (node, hist)
85 filetags = util.sortdict() # map tag name to (node, hist)
85 count = 0
86 count = 0
86
87
87 def warn(msg):
88 def warn(msg):
88 ui.warn(_("%s, line %s: %s\n") % (fn, count, msg))
89 ui.warn(_("%s, line %s: %s\n") % (fn, count, msg))
89
90
90 for line in lines:
91 for line in lines:
91 count += 1
92 count += 1
92 if not line:
93 if not line:
93 continue
94 continue
94 try:
95 try:
95 (nodehex, name) = line.split(" ", 1)
96 (nodehex, name) = line.split(" ", 1)
96 except ValueError:
97 except ValueError:
97 warn(_("cannot parse entry"))
98 warn(_("cannot parse entry"))
98 continue
99 continue
99 name = name.strip()
100 name = name.strip()
100 if recode:
101 if recode:
101 name = recode(name)
102 name = recode(name)
102 try:
103 try:
103 nodebin = bin(nodehex)
104 nodebin = bin(nodehex)
104 except TypeError:
105 except TypeError:
105 warn(_("node '%s' is not well formed") % nodehex)
106 warn(_("node '%s' is not well formed") % nodehex)
106 continue
107 continue
107
108
108 # update filetags
109 # update filetags
109 hist = []
110 hist = []
110 if name in filetags:
111 if name in filetags:
111 n, hist = filetags[name]
112 n, hist = filetags[name]
112 hist.append(n)
113 hist.append(n)
113 filetags[name] = (nodebin, hist)
114 filetags[name] = (nodebin, hist)
114 return filetags
115 return filetags
115
116
116 def _updatetags(filetags, tagtype, alltags, tagtypes):
117 def _updatetags(filetags, tagtype, alltags, tagtypes):
117 '''Incorporate the tag info read from one file into the two
118 '''Incorporate the tag info read from one file into the two
118 dictionaries, alltags and tagtypes, that contain all tag
119 dictionaries, alltags and tagtypes, that contain all tag
119 info (global across all heads plus local).'''
120 info (global across all heads plus local).'''
120
121
121 for name, nodehist in filetags.iteritems():
122 for name, nodehist in filetags.iteritems():
122 if name not in alltags:
123 if name not in alltags:
123 alltags[name] = nodehist
124 alltags[name] = nodehist
124 tagtypes[name] = tagtype
125 tagtypes[name] = tagtype
125 continue
126 continue
126
127
127 # we prefer alltags[name] if:
128 # we prefer alltags[name] if:
128 # it supersedes us OR
129 # it supersedes us OR
129 # mutual supersedes and it has a higher rank
130 # mutual supersedes and it has a higher rank
130 # otherwise we win because we're tip-most
131 # otherwise we win because we're tip-most
131 anode, ahist = nodehist
132 anode, ahist = nodehist
132 bnode, bhist = alltags[name]
133 bnode, bhist = alltags[name]
133 if (bnode != anode and anode in bhist and
134 if (bnode != anode and anode in bhist and
134 (bnode not in ahist or len(bhist) > len(ahist))):
135 (bnode not in ahist or len(bhist) > len(ahist))):
135 anode = bnode
136 anode = bnode
136 else:
137 else:
137 tagtypes[name] = tagtype
138 tagtypes[name] = tagtype
138 ahist.extend([n for n in bhist if n not in ahist])
139 ahist.extend([n for n in bhist if n not in ahist])
139 alltags[name] = anode, ahist
140 alltags[name] = anode, ahist
140
141
141
142
142 # The tag cache only stores info about heads, not the tag contents
143 # The tag cache only stores info about heads, not the tag contents
143 # from each head. I.e. it doesn't try to squeeze out the maximum
144 # from each head. I.e. it doesn't try to squeeze out the maximum
144 # performance, but is simpler has a better chance of actually
145 # performance, but is simpler has a better chance of actually
145 # working correctly. And this gives the biggest performance win: it
146 # working correctly. And this gives the biggest performance win: it
146 # avoids looking up .hgtags in the manifest for every head, and it
147 # avoids looking up .hgtags in the manifest for every head, and it
147 # can avoid calling heads() at all if there have been no changes to
148 # can avoid calling heads() at all if there have been no changes to
148 # the repo.
149 # the repo.
149
150
150 def _readtagcache(ui, repo):
151 def _readtagcache(ui, repo):
151 '''Read the tag cache and return a tuple (heads, fnodes, cachetags,
152 '''Read the tag cache and return a tuple (heads, fnodes, cachetags,
152 shouldwrite). If the cache is completely up-to-date, cachetags is a
153 shouldwrite). If the cache is completely up-to-date, cachetags is a
153 dict of the form returned by _readtags(); otherwise, it is None and
154 dict of the form returned by _readtags(); otherwise, it is None and
154 heads and fnodes are set. In that case, heads is the list of all
155 heads and fnodes are set. In that case, heads is the list of all
155 heads currently in the repository (ordered from tip to oldest) and
156 heads currently in the repository (ordered from tip to oldest) and
156 fnodes is a mapping from head to .hgtags filenode. If those two are
157 fnodes is a mapping from head to .hgtags filenode. If those two are
157 set, caller is responsible for reading tag info from each head.'''
158 set, caller is responsible for reading tag info from each head.'''
158
159
159 try:
160 try:
160 cachefile = repo.opener('cache/tags', 'r')
161 cachefile = repo.opener('cache/tags', 'r')
161 # force reading the file for static-http
162 # force reading the file for static-http
162 cachelines = iter(cachefile)
163 cachelines = iter(cachefile)
163 except IOError:
164 except IOError:
164 cachefile = None
165 cachefile = None
165
166
166 # The cache file consists of lines like
167 # The cache file consists of lines like
167 # <headrev> <headnode> [<tagnode>]
168 # <headrev> <headnode> [<tagnode>]
168 # where <headrev> and <headnode> redundantly identify a repository
169 # where <headrev> and <headnode> redundantly identify a repository
169 # head from the time the cache was written, and <tagnode> is the
170 # head from the time the cache was written, and <tagnode> is the
170 # filenode of .hgtags on that head. Heads with no .hgtags file will
171 # filenode of .hgtags on that head. Heads with no .hgtags file will
171 # have no <tagnode>. The cache is ordered from tip to oldest (which
172 # have no <tagnode>. The cache is ordered from tip to oldest (which
172 # is part of why <headrev> is there: a quick visual check is all
173 # is part of why <headrev> is there: a quick visual check is all
173 # that's required to ensure correct order).
174 # that's required to ensure correct order).
174 #
175 #
175 # This information is enough to let us avoid the most expensive part
176 # This information is enough to let us avoid the most expensive part
176 # of finding global tags, which is looking up <tagnode> in the
177 # of finding global tags, which is looking up <tagnode> in the
177 # manifest for each head.
178 # manifest for each head.
178 cacherevs = [] # list of headrev
179 cacherevs = [] # list of headrev
179 cacheheads = [] # list of headnode
180 cacheheads = [] # list of headnode
180 cachefnode = {} # map headnode to filenode
181 cachefnode = {} # map headnode to filenode
181 if cachefile:
182 if cachefile:
182 try:
183 try:
183 for line in cachelines:
184 for line in cachelines:
184 if line == "\n":
185 if line == "\n":
185 break
186 break
186 line = line.split()
187 line = line.split()
187 cacherevs.append(int(line[0]))
188 cacherevs.append(int(line[0]))
188 headnode = bin(line[1])
189 headnode = bin(line[1])
189 cacheheads.append(headnode)
190 cacheheads.append(headnode)
190 if len(line) == 3:
191 if len(line) == 3:
191 fnode = bin(line[2])
192 fnode = bin(line[2])
192 cachefnode[headnode] = fnode
193 cachefnode[headnode] = fnode
193 except Exception:
194 except Exception:
194 # corruption of the tags cache, just recompute it
195 # corruption of the tags cache, just recompute it
195 ui.warn(_('.hg/cache/tags is corrupt, rebuilding it\n'))
196 ui.warn(_('.hg/cache/tags is corrupt, rebuilding it\n'))
196 cacheheads = []
197 cacheheads = []
197 cacherevs = []
198 cacherevs = []
198 cachefnode = {}
199 cachefnode = {}
199
200
200 tipnode = repo.changelog.tip()
201 tipnode = repo.changelog.tip()
201 tiprev = len(repo.changelog) - 1
202 tiprev = len(repo.changelog) - 1
202
203
203 # Case 1 (common): tip is the same, so nothing has changed.
204 # Case 1 (common): tip is the same, so nothing has changed.
204 # (Unchanged tip trivially means no changesets have been added.
205 # (Unchanged tip trivially means no changesets have been added.
205 # But, thanks to localrepository.destroyed(), it also means none
206 # But, thanks to localrepository.destroyed(), it also means none
206 # have been destroyed by strip or rollback.)
207 # have been destroyed by strip or rollback.)
207 if cacheheads and cacheheads[0] == tipnode and cacherevs[0] == tiprev:
208 if cacheheads and cacheheads[0] == tipnode and cacherevs[0] == tiprev:
208 tags = _readtags(ui, repo, cachelines, cachefile.name)
209 tags = _readtags(ui, repo, cachelines, cachefile.name)
209 cachefile.close()
210 cachefile.close()
210 return (None, None, tags, False)
211 return (None, None, tags, False)
211 if cachefile:
212 if cachefile:
212 cachefile.close() # ignore rest of file
213 cachefile.close() # ignore rest of file
213
214
214 repoheads = repo.heads()
215 repoheads = repo.heads()
215 # Case 2 (uncommon): empty repo; get out quickly and don't bother
216 # Case 2 (uncommon): empty repo; get out quickly and don't bother
216 # writing an empty cache.
217 # writing an empty cache.
217 if repoheads == [nullid]:
218 if repoheads == [nullid]:
218 return ([], {}, {}, False)
219 return ([], {}, {}, False)
219
220
220 # Case 3 (uncommon): cache file missing or empty.
221 # Case 3 (uncommon): cache file missing or empty.
221
222
222 # Case 4 (uncommon): tip rev decreased. This should only happen
223 # Case 4 (uncommon): tip rev decreased. This should only happen
223 # when we're called from localrepository.destroyed(). Refresh the
224 # when we're called from localrepository.destroyed(). Refresh the
224 # cache so future invocations will not see disappeared heads in the
225 # cache so future invocations will not see disappeared heads in the
225 # cache.
226 # cache.
226
227
227 # Case 5 (common): tip has changed, so we've added/replaced heads.
228 # Case 5 (common): tip has changed, so we've added/replaced heads.
228
229
229 # As it happens, the code to handle cases 3, 4, 5 is the same.
230 # As it happens, the code to handle cases 3, 4, 5 is the same.
230
231
231 # N.B. in case 4 (nodes destroyed), "new head" really means "newly
232 # N.B. in case 4 (nodes destroyed), "new head" really means "newly
232 # exposed".
233 # exposed".
233 if not len(repo.file('.hgtags')):
234 if not len(repo.file('.hgtags')):
234 # No tags have ever been committed, so we can avoid a
235 # No tags have ever been committed, so we can avoid a
235 # potentially expensive search.
236 # potentially expensive search.
236 return (repoheads, cachefnode, None, True)
237 return (repoheads, cachefnode, None, True)
237
238
238 starttime = time.time()
239 starttime = time.time()
239
240
240 newheads = [head
241 newheads = [head
241 for head in repoheads
242 for head in repoheads
242 if head not in set(cacheheads)]
243 if head not in set(cacheheads)]
243
244
244 # Now we have to lookup the .hgtags filenode for every new head.
245 # Now we have to lookup the .hgtags filenode for every new head.
245 # This is the most expensive part of finding tags, so performance
246 # This is the most expensive part of finding tags, so performance
246 # depends primarily on the size of newheads. Worst case: no cache
247 # depends primarily on the size of newheads. Worst case: no cache
247 # file, so newheads == repoheads.
248 # file, so newheads == repoheads.
248 for head in reversed(newheads):
249 for head in reversed(newheads):
249 cctx = repo[head]
250 cctx = repo[head]
250 try:
251 try:
251 fnode = cctx.filenode('.hgtags')
252 fnode = cctx.filenode('.hgtags')
252 cachefnode[head] = fnode
253 cachefnode[head] = fnode
253 except error.LookupError:
254 except error.LookupError:
254 # no .hgtags file on this head
255 # no .hgtags file on this head
255 pass
256 pass
256
257
257 duration = time.time() - starttime
258 duration = time.time() - starttime
258 ui.log('tagscache',
259 ui.log('tagscache',
259 'resolved %d tags cache entries from %d manifests in %0.4f '
260 'resolved %d tags cache entries from %d manifests in %0.4f '
260 'seconds\n',
261 'seconds\n',
261 len(cachefnode), len(newheads), duration)
262 len(cachefnode), len(newheads), duration)
262
263
263 # Caller has to iterate over all heads, but can use the filenodes in
264 # Caller has to iterate over all heads, but can use the filenodes in
264 # cachefnode to get to each .hgtags revision quickly.
265 # cachefnode to get to each .hgtags revision quickly.
265 return (repoheads, cachefnode, None, True)
266 return (repoheads, cachefnode, None, True)
266
267
267 def _writetagcache(ui, repo, heads, tagfnode, cachetags):
268 def _writetagcache(ui, repo, heads, tagfnode, cachetags):
268
269
269 try:
270 try:
270 cachefile = repo.opener('cache/tags', 'w', atomictemp=True)
271 cachefile = repo.opener('cache/tags', 'w', atomictemp=True)
271 except (OSError, IOError):
272 except (OSError, IOError):
272 return
273 return
273
274
274 ui.log('tagscache', 'writing tags cache file with %d heads and %d tags\n',
275 ui.log('tagscache', 'writing tags cache file with %d heads and %d tags\n',
275 len(heads), len(cachetags))
276 len(heads), len(cachetags))
276
277
277 realheads = repo.heads() # for sanity checks below
278 realheads = repo.heads() # for sanity checks below
278 for head in heads:
279 for head in heads:
279 # temporary sanity checks; these can probably be removed
280 # temporary sanity checks; these can probably be removed
280 # once this code has been in crew for a few weeks
281 # once this code has been in crew for a few weeks
281 assert head in repo.changelog.nodemap, \
282 assert head in repo.changelog.nodemap, \
282 'trying to write non-existent node %s to tag cache' % short(head)
283 'trying to write non-existent node %s to tag cache' % short(head)
283 assert head in realheads, \
284 assert head in realheads, \
284 'trying to write non-head %s to tag cache' % short(head)
285 'trying to write non-head %s to tag cache' % short(head)
285 assert head != nullid, \
286 assert head != nullid, \
286 'trying to write nullid to tag cache'
287 'trying to write nullid to tag cache'
287
288
288 # This can't fail because of the first assert above. When/if we
289 # This can't fail because of the first assert above. When/if we
289 # remove that assert, we might want to catch LookupError here
290 # remove that assert, we might want to catch LookupError here
290 # and downgrade it to a warning.
291 # and downgrade it to a warning.
291 rev = repo.changelog.rev(head)
292 rev = repo.changelog.rev(head)
292
293
293 fnode = tagfnode.get(head)
294 fnode = tagfnode.get(head)
294 if fnode:
295 if fnode:
295 cachefile.write('%d %s %s\n' % (rev, hex(head), hex(fnode)))
296 cachefile.write('%d %s %s\n' % (rev, hex(head), hex(fnode)))
296 else:
297 else:
297 cachefile.write('%d %s\n' % (rev, hex(head)))
298 cachefile.write('%d %s\n' % (rev, hex(head)))
298
299
299 # Tag names in the cache are in UTF-8 -- which is the whole reason
300 # Tag names in the cache are in UTF-8 -- which is the whole reason
300 # we keep them in UTF-8 throughout this module. If we converted
301 # we keep them in UTF-8 throughout this module. If we converted
301 # them local encoding on input, we would lose info writing them to
302 # them local encoding on input, we would lose info writing them to
302 # the cache.
303 # the cache.
303 cachefile.write('\n')
304 cachefile.write('\n')
304 for (name, (node, hist)) in cachetags.iteritems():
305 for (name, (node, hist)) in cachetags.iteritems():
305 for n in hist:
306 for n in hist:
306 cachefile.write("%s %s\n" % (hex(n), name))
307 cachefile.write("%s %s\n" % (hex(n), name))
307 cachefile.write("%s %s\n" % (hex(node), name))
308 cachefile.write("%s %s\n" % (hex(node), name))
308
309
309 try:
310 try:
310 cachefile.close()
311 cachefile.close()
311 except (OSError, IOError):
312 except (OSError, IOError):
312 pass
313 pass
General Comments 0
You need to be logged in to leave comments. Login now