##// END OF EJS Templates
tags: read tag info into a sorted dict (rather than into a regular dict)...
Angel Ezquerra -
r21814:5125856a default
parent child Browse files
Show More
@@ -1,312 +1,313 b''
1 1 # tags.py - read tag info from local repository
2 2 #
3 3 # Copyright 2009 Matt Mackall <mpm@selenic.com>
4 4 # Copyright 2009 Greg Ward <greg@gerg.ca>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 # Currently this module only deals with reading and caching tags.
10 10 # Eventually, it could take care of updating (adding/removing/moving)
11 11 # tags too.
12 12
13 13 from node import nullid, bin, hex, short
14 14 from i18n import _
15 import util
15 16 import encoding
16 17 import error
17 18 import errno
18 19 import time
19 20
20 21 def findglobaltags(ui, repo, alltags, tagtypes):
21 22 '''Find global tags in repo by reading .hgtags from every head that
22 23 has a distinct version of it, using a cache to avoid excess work.
23 24 Updates the dicts alltags, tagtypes in place: alltags maps tag name
24 25 to (node, hist) pair (see _readtags() below), and tagtypes maps tag
25 26 name to tag type ("global" in this case).'''
26 27 # This is so we can be lazy and assume alltags contains only global
27 28 # tags when we pass it to _writetagcache().
28 29 assert len(alltags) == len(tagtypes) == 0, \
29 30 "findglobaltags() should be called first"
30 31
31 32 (heads, tagfnode, cachetags, shouldwrite) = _readtagcache(ui, repo)
32 33 if cachetags is not None:
33 34 assert not shouldwrite
34 35 # XXX is this really 100% correct? are there oddball special
35 36 # cases where a global tag should outrank a local tag but won't,
36 37 # because cachetags does not contain rank info?
37 38 _updatetags(cachetags, 'global', alltags, tagtypes)
38 39 return
39 40
40 41 seen = set() # set of fnode
41 42 fctx = None
42 43 for head in reversed(heads): # oldest to newest
43 44 assert head in repo.changelog.nodemap, \
44 45 "tag cache returned bogus head %s" % short(head)
45 46
46 47 fnode = tagfnode.get(head)
47 48 if fnode and fnode not in seen:
48 49 seen.add(fnode)
49 50 if not fctx:
50 51 fctx = repo.filectx('.hgtags', fileid=fnode)
51 52 else:
52 53 fctx = fctx.filectx(fnode)
53 54
54 55 filetags = _readtags(ui, repo, fctx.data().splitlines(), fctx)
55 56 _updatetags(filetags, 'global', alltags, tagtypes)
56 57
57 58 # and update the cache (if necessary)
58 59 if shouldwrite:
59 60 _writetagcache(ui, repo, heads, tagfnode, alltags)
60 61
61 62 def readlocaltags(ui, repo, alltags, tagtypes):
62 63 '''Read local tags in repo. Update alltags and tagtypes.'''
63 64 try:
64 65 data = repo.opener.read("localtags")
65 66 except IOError, inst:
66 67 if inst.errno != errno.ENOENT:
67 68 raise
68 69 return
69 70
70 71 # localtags is in the local encoding; re-encode to UTF-8 on
71 72 # input for consistency with the rest of this module.
72 73 filetags = _readtags(
73 74 ui, repo, data.splitlines(), "localtags",
74 75 recode=encoding.fromlocal)
75 76 _updatetags(filetags, "local", alltags, tagtypes)
76 77
77 78 def _readtags(ui, repo, lines, fn, recode=None):
78 79 '''Read tag definitions from a file (or any source of lines).
79 80 Return a mapping from tag name to (node, hist): node is the node id
80 81 from the last line read for that name, and hist is the list of node
81 82 ids previously associated with it (in file order). All node ids are
82 83 binary, not hex.'''
83 84
84 filetags = {} # map tag name to (node, hist)
85 filetags = util.sortdict() # map tag name to (node, hist)
85 86 count = 0
86 87
87 88 def warn(msg):
88 89 ui.warn(_("%s, line %s: %s\n") % (fn, count, msg))
89 90
90 91 for line in lines:
91 92 count += 1
92 93 if not line:
93 94 continue
94 95 try:
95 96 (nodehex, name) = line.split(" ", 1)
96 97 except ValueError:
97 98 warn(_("cannot parse entry"))
98 99 continue
99 100 name = name.strip()
100 101 if recode:
101 102 name = recode(name)
102 103 try:
103 104 nodebin = bin(nodehex)
104 105 except TypeError:
105 106 warn(_("node '%s' is not well formed") % nodehex)
106 107 continue
107 108
108 109 # update filetags
109 110 hist = []
110 111 if name in filetags:
111 112 n, hist = filetags[name]
112 113 hist.append(n)
113 114 filetags[name] = (nodebin, hist)
114 115 return filetags
115 116
116 117 def _updatetags(filetags, tagtype, alltags, tagtypes):
117 118 '''Incorporate the tag info read from one file into the two
118 119 dictionaries, alltags and tagtypes, that contain all tag
119 120 info (global across all heads plus local).'''
120 121
121 122 for name, nodehist in filetags.iteritems():
122 123 if name not in alltags:
123 124 alltags[name] = nodehist
124 125 tagtypes[name] = tagtype
125 126 continue
126 127
127 128 # we prefer alltags[name] if:
128 129 # it supersedes us OR
129 130 # mutual supersedes and it has a higher rank
130 131 # otherwise we win because we're tip-most
131 132 anode, ahist = nodehist
132 133 bnode, bhist = alltags[name]
133 134 if (bnode != anode and anode in bhist and
134 135 (bnode not in ahist or len(bhist) > len(ahist))):
135 136 anode = bnode
136 137 else:
137 138 tagtypes[name] = tagtype
138 139 ahist.extend([n for n in bhist if n not in ahist])
139 140 alltags[name] = anode, ahist
140 141
141 142
142 143 # The tag cache only stores info about heads, not the tag contents
143 144 # from each head. I.e. it doesn't try to squeeze out the maximum
144 145 # performance, but is simpler has a better chance of actually
145 146 # working correctly. And this gives the biggest performance win: it
146 147 # avoids looking up .hgtags in the manifest for every head, and it
147 148 # can avoid calling heads() at all if there have been no changes to
148 149 # the repo.
149 150
150 151 def _readtagcache(ui, repo):
151 152 '''Read the tag cache and return a tuple (heads, fnodes, cachetags,
152 153 shouldwrite). If the cache is completely up-to-date, cachetags is a
153 154 dict of the form returned by _readtags(); otherwise, it is None and
154 155 heads and fnodes are set. In that case, heads is the list of all
155 156 heads currently in the repository (ordered from tip to oldest) and
156 157 fnodes is a mapping from head to .hgtags filenode. If those two are
157 158 set, caller is responsible for reading tag info from each head.'''
158 159
159 160 try:
160 161 cachefile = repo.opener('cache/tags', 'r')
161 162 # force reading the file for static-http
162 163 cachelines = iter(cachefile)
163 164 except IOError:
164 165 cachefile = None
165 166
166 167 # The cache file consists of lines like
167 168 # <headrev> <headnode> [<tagnode>]
168 169 # where <headrev> and <headnode> redundantly identify a repository
169 170 # head from the time the cache was written, and <tagnode> is the
170 171 # filenode of .hgtags on that head. Heads with no .hgtags file will
171 172 # have no <tagnode>. The cache is ordered from tip to oldest (which
172 173 # is part of why <headrev> is there: a quick visual check is all
173 174 # that's required to ensure correct order).
174 175 #
175 176 # This information is enough to let us avoid the most expensive part
176 177 # of finding global tags, which is looking up <tagnode> in the
177 178 # manifest for each head.
178 179 cacherevs = [] # list of headrev
179 180 cacheheads = [] # list of headnode
180 181 cachefnode = {} # map headnode to filenode
181 182 if cachefile:
182 183 try:
183 184 for line in cachelines:
184 185 if line == "\n":
185 186 break
186 187 line = line.split()
187 188 cacherevs.append(int(line[0]))
188 189 headnode = bin(line[1])
189 190 cacheheads.append(headnode)
190 191 if len(line) == 3:
191 192 fnode = bin(line[2])
192 193 cachefnode[headnode] = fnode
193 194 except Exception:
194 195 # corruption of the tags cache, just recompute it
195 196 ui.warn(_('.hg/cache/tags is corrupt, rebuilding it\n'))
196 197 cacheheads = []
197 198 cacherevs = []
198 199 cachefnode = {}
199 200
200 201 tipnode = repo.changelog.tip()
201 202 tiprev = len(repo.changelog) - 1
202 203
203 204 # Case 1 (common): tip is the same, so nothing has changed.
204 205 # (Unchanged tip trivially means no changesets have been added.
205 206 # But, thanks to localrepository.destroyed(), it also means none
206 207 # have been destroyed by strip or rollback.)
207 208 if cacheheads and cacheheads[0] == tipnode and cacherevs[0] == tiprev:
208 209 tags = _readtags(ui, repo, cachelines, cachefile.name)
209 210 cachefile.close()
210 211 return (None, None, tags, False)
211 212 if cachefile:
212 213 cachefile.close() # ignore rest of file
213 214
214 215 repoheads = repo.heads()
215 216 # Case 2 (uncommon): empty repo; get out quickly and don't bother
216 217 # writing an empty cache.
217 218 if repoheads == [nullid]:
218 219 return ([], {}, {}, False)
219 220
220 221 # Case 3 (uncommon): cache file missing or empty.
221 222
222 223 # Case 4 (uncommon): tip rev decreased. This should only happen
223 224 # when we're called from localrepository.destroyed(). Refresh the
224 225 # cache so future invocations will not see disappeared heads in the
225 226 # cache.
226 227
227 228 # Case 5 (common): tip has changed, so we've added/replaced heads.
228 229
229 230 # As it happens, the code to handle cases 3, 4, 5 is the same.
230 231
231 232 # N.B. in case 4 (nodes destroyed), "new head" really means "newly
232 233 # exposed".
233 234 if not len(repo.file('.hgtags')):
234 235 # No tags have ever been committed, so we can avoid a
235 236 # potentially expensive search.
236 237 return (repoheads, cachefnode, None, True)
237 238
238 239 starttime = time.time()
239 240
240 241 newheads = [head
241 242 for head in repoheads
242 243 if head not in set(cacheheads)]
243 244
244 245 # Now we have to lookup the .hgtags filenode for every new head.
245 246 # This is the most expensive part of finding tags, so performance
246 247 # depends primarily on the size of newheads. Worst case: no cache
247 248 # file, so newheads == repoheads.
248 249 for head in reversed(newheads):
249 250 cctx = repo[head]
250 251 try:
251 252 fnode = cctx.filenode('.hgtags')
252 253 cachefnode[head] = fnode
253 254 except error.LookupError:
254 255 # no .hgtags file on this head
255 256 pass
256 257
257 258 duration = time.time() - starttime
258 259 ui.log('tagscache',
259 260 'resolved %d tags cache entries from %d manifests in %0.4f '
260 261 'seconds\n',
261 262 len(cachefnode), len(newheads), duration)
262 263
263 264 # Caller has to iterate over all heads, but can use the filenodes in
264 265 # cachefnode to get to each .hgtags revision quickly.
265 266 return (repoheads, cachefnode, None, True)
266 267
267 268 def _writetagcache(ui, repo, heads, tagfnode, cachetags):
268 269
269 270 try:
270 271 cachefile = repo.opener('cache/tags', 'w', atomictemp=True)
271 272 except (OSError, IOError):
272 273 return
273 274
274 275 ui.log('tagscache', 'writing tags cache file with %d heads and %d tags\n',
275 276 len(heads), len(cachetags))
276 277
277 278 realheads = repo.heads() # for sanity checks below
278 279 for head in heads:
279 280 # temporary sanity checks; these can probably be removed
280 281 # once this code has been in crew for a few weeks
281 282 assert head in repo.changelog.nodemap, \
282 283 'trying to write non-existent node %s to tag cache' % short(head)
283 284 assert head in realheads, \
284 285 'trying to write non-head %s to tag cache' % short(head)
285 286 assert head != nullid, \
286 287 'trying to write nullid to tag cache'
287 288
288 289 # This can't fail because of the first assert above. When/if we
289 290 # remove that assert, we might want to catch LookupError here
290 291 # and downgrade it to a warning.
291 292 rev = repo.changelog.rev(head)
292 293
293 294 fnode = tagfnode.get(head)
294 295 if fnode:
295 296 cachefile.write('%d %s %s\n' % (rev, hex(head), hex(fnode)))
296 297 else:
297 298 cachefile.write('%d %s\n' % (rev, hex(head)))
298 299
299 300 # Tag names in the cache are in UTF-8 -- which is the whole reason
300 301 # we keep them in UTF-8 throughout this module. If we converted
301 302 # them local encoding on input, we would lose info writing them to
302 303 # the cache.
303 304 cachefile.write('\n')
304 305 for (name, (node, hist)) in cachetags.iteritems():
305 306 for n in hist:
306 307 cachefile.write("%s %s\n" % (hex(n), name))
307 308 cachefile.write("%s %s\n" % (hex(node), name))
308 309
309 310 try:
310 311 cachefile.close()
311 312 except (OSError, IOError):
312 313 pass
General Comments 0
You need to be logged in to leave comments. Login now