##// END OF EJS Templates
tags: support setting hgtags fnodes cache entries...
Gregory Szorc -
r25381:47edeff1 default
parent child Browse files
Show More
@@ -1,540 +1,553 b''
1 # tags.py - read tag info from local repository
1 # tags.py - read tag info from local repository
2 #
2 #
3 # Copyright 2009 Matt Mackall <mpm@selenic.com>
3 # Copyright 2009 Matt Mackall <mpm@selenic.com>
4 # Copyright 2009 Greg Ward <greg@gerg.ca>
4 # Copyright 2009 Greg Ward <greg@gerg.ca>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 # Currently this module only deals with reading and caching tags.
9 # Currently this module only deals with reading and caching tags.
10 # Eventually, it could take care of updating (adding/removing/moving)
10 # Eventually, it could take care of updating (adding/removing/moving)
11 # tags too.
11 # tags too.
12
12
13 from node import nullid, bin, hex, short
13 from node import nullid, bin, hex, short
14 from i18n import _
14 from i18n import _
15 import util
15 import util
16 import encoding
16 import encoding
17 import error
17 import error
18 from array import array
18 from array import array
19 import errno
19 import errno
20 import time
20 import time
21
21
22 # Tags computation can be expensive and caches exist to make it fast in
22 # Tags computation can be expensive and caches exist to make it fast in
23 # the common case.
23 # the common case.
24 #
24 #
25 # The "hgtagsfnodes1" cache file caches the .hgtags filenode values for
25 # The "hgtagsfnodes1" cache file caches the .hgtags filenode values for
26 # each revision in the repository. The file is effectively an array of
26 # each revision in the repository. The file is effectively an array of
27 # fixed length records. Read the docs for "hgtagsfnodescache" for technical
27 # fixed length records. Read the docs for "hgtagsfnodescache" for technical
28 # details.
28 # details.
29 #
29 #
30 # The .hgtags filenode cache grows in proportion to the length of the
30 # The .hgtags filenode cache grows in proportion to the length of the
31 # changelog. The file is truncated when the # changelog is stripped.
31 # changelog. The file is truncated when the # changelog is stripped.
32 #
32 #
33 # The purpose of the filenode cache is to avoid the most expensive part
33 # The purpose of the filenode cache is to avoid the most expensive part
34 # of finding global tags, which is looking up the .hgtags filenode in the
34 # of finding global tags, which is looking up the .hgtags filenode in the
35 # manifest for each head. This can take dozens or over 100ms for
35 # manifest for each head. This can take dozens or over 100ms for
36 # repositories with very large manifests. Multiplied by dozens or even
36 # repositories with very large manifests. Multiplied by dozens or even
37 # hundreds of heads and there is a significant performance concern.
37 # hundreds of heads and there is a significant performance concern.
38 #
38 #
39 # There also exist a separate cache file for each repository filter.
39 # There also exist a separate cache file for each repository filter.
40 # These "tags-*" files store information about the history of tags.
40 # These "tags-*" files store information about the history of tags.
41 #
41 #
42 # The tags cache files consists of a cache validation line followed by
42 # The tags cache files consists of a cache validation line followed by
43 # a history of tags.
43 # a history of tags.
44 #
44 #
45 # The cache validation line has the format:
45 # The cache validation line has the format:
46 #
46 #
47 # <tiprev> <tipnode> [<filteredhash>]
47 # <tiprev> <tipnode> [<filteredhash>]
48 #
48 #
49 # <tiprev> is an integer revision and <tipnode> is a 40 character hex
49 # <tiprev> is an integer revision and <tipnode> is a 40 character hex
50 # node for that changeset. These redundantly identify the repository
50 # node for that changeset. These redundantly identify the repository
51 # tip from the time the cache was written. In addition, <filteredhash>,
51 # tip from the time the cache was written. In addition, <filteredhash>,
52 # if present, is a 40 character hex hash of the contents of the filtered
52 # if present, is a 40 character hex hash of the contents of the filtered
53 # revisions for this filter. If the set of filtered revs changes, the
53 # revisions for this filter. If the set of filtered revs changes, the
54 # hash will change and invalidate the cache.
54 # hash will change and invalidate the cache.
55 #
55 #
56 # The history part of the tags cache consists of lines of the form:
56 # The history part of the tags cache consists of lines of the form:
57 #
57 #
58 # <node> <tag>
58 # <node> <tag>
59 #
59 #
60 # (This format is identical to that of .hgtags files.)
60 # (This format is identical to that of .hgtags files.)
61 #
61 #
62 # <tag> is the tag name and <node> is the 40 character hex changeset
62 # <tag> is the tag name and <node> is the 40 character hex changeset
63 # the tag is associated with.
63 # the tag is associated with.
64 #
64 #
65 # Tags are written sorted by tag name.
65 # Tags are written sorted by tag name.
66 #
66 #
67 # Tags associated with multiple changesets have an entry for each changeset.
67 # Tags associated with multiple changesets have an entry for each changeset.
68 # The most recent changeset (in terms of revlog ordering for the head
68 # The most recent changeset (in terms of revlog ordering for the head
69 # setting it) for each tag is last.
69 # setting it) for each tag is last.
70
70
71 def findglobaltags(ui, repo, alltags, tagtypes):
71 def findglobaltags(ui, repo, alltags, tagtypes):
72 '''Find global tags in a repo.
72 '''Find global tags in a repo.
73
73
74 "alltags" maps tag name to (node, hist) 2-tuples.
74 "alltags" maps tag name to (node, hist) 2-tuples.
75
75
76 "tagtypes" maps tag name to tag type. Global tags always have the
76 "tagtypes" maps tag name to tag type. Global tags always have the
77 "global" tag type.
77 "global" tag type.
78
78
79 The "alltags" and "tagtypes" dicts are updated in place. Empty dicts
79 The "alltags" and "tagtypes" dicts are updated in place. Empty dicts
80 should be passed in.
80 should be passed in.
81
81
82 The tags cache is read and updated as a side-effect of calling.
82 The tags cache is read and updated as a side-effect of calling.
83 '''
83 '''
84 # This is so we can be lazy and assume alltags contains only global
84 # This is so we can be lazy and assume alltags contains only global
85 # tags when we pass it to _writetagcache().
85 # tags when we pass it to _writetagcache().
86 assert len(alltags) == len(tagtypes) == 0, \
86 assert len(alltags) == len(tagtypes) == 0, \
87 "findglobaltags() should be called first"
87 "findglobaltags() should be called first"
88
88
89 (heads, tagfnode, valid, cachetags, shouldwrite) = _readtagcache(ui, repo)
89 (heads, tagfnode, valid, cachetags, shouldwrite) = _readtagcache(ui, repo)
90 if cachetags is not None:
90 if cachetags is not None:
91 assert not shouldwrite
91 assert not shouldwrite
92 # XXX is this really 100% correct? are there oddball special
92 # XXX is this really 100% correct? are there oddball special
93 # cases where a global tag should outrank a local tag but won't,
93 # cases where a global tag should outrank a local tag but won't,
94 # because cachetags does not contain rank info?
94 # because cachetags does not contain rank info?
95 _updatetags(cachetags, 'global', alltags, tagtypes)
95 _updatetags(cachetags, 'global', alltags, tagtypes)
96 return
96 return
97
97
98 seen = set() # set of fnode
98 seen = set() # set of fnode
99 fctx = None
99 fctx = None
100 for head in reversed(heads): # oldest to newest
100 for head in reversed(heads): # oldest to newest
101 assert head in repo.changelog.nodemap, \
101 assert head in repo.changelog.nodemap, \
102 "tag cache returned bogus head %s" % short(head)
102 "tag cache returned bogus head %s" % short(head)
103
103
104 fnode = tagfnode.get(head)
104 fnode = tagfnode.get(head)
105 if fnode and fnode not in seen:
105 if fnode and fnode not in seen:
106 seen.add(fnode)
106 seen.add(fnode)
107 if not fctx:
107 if not fctx:
108 fctx = repo.filectx('.hgtags', fileid=fnode)
108 fctx = repo.filectx('.hgtags', fileid=fnode)
109 else:
109 else:
110 fctx = fctx.filectx(fnode)
110 fctx = fctx.filectx(fnode)
111
111
112 filetags = _readtags(ui, repo, fctx.data().splitlines(), fctx)
112 filetags = _readtags(ui, repo, fctx.data().splitlines(), fctx)
113 _updatetags(filetags, 'global', alltags, tagtypes)
113 _updatetags(filetags, 'global', alltags, tagtypes)
114
114
115 # and update the cache (if necessary)
115 # and update the cache (if necessary)
116 if shouldwrite:
116 if shouldwrite:
117 _writetagcache(ui, repo, valid, alltags)
117 _writetagcache(ui, repo, valid, alltags)
118
118
119 def readlocaltags(ui, repo, alltags, tagtypes):
119 def readlocaltags(ui, repo, alltags, tagtypes):
120 '''Read local tags in repo. Update alltags and tagtypes.'''
120 '''Read local tags in repo. Update alltags and tagtypes.'''
121 try:
121 try:
122 data = repo.vfs.read("localtags")
122 data = repo.vfs.read("localtags")
123 except IOError, inst:
123 except IOError, inst:
124 if inst.errno != errno.ENOENT:
124 if inst.errno != errno.ENOENT:
125 raise
125 raise
126 return
126 return
127
127
128 # localtags is in the local encoding; re-encode to UTF-8 on
128 # localtags is in the local encoding; re-encode to UTF-8 on
129 # input for consistency with the rest of this module.
129 # input for consistency with the rest of this module.
130 filetags = _readtags(
130 filetags = _readtags(
131 ui, repo, data.splitlines(), "localtags",
131 ui, repo, data.splitlines(), "localtags",
132 recode=encoding.fromlocal)
132 recode=encoding.fromlocal)
133
133
134 # remove tags pointing to invalid nodes
134 # remove tags pointing to invalid nodes
135 cl = repo.changelog
135 cl = repo.changelog
136 for t in filetags.keys():
136 for t in filetags.keys():
137 try:
137 try:
138 cl.rev(filetags[t][0])
138 cl.rev(filetags[t][0])
139 except (LookupError, ValueError):
139 except (LookupError, ValueError):
140 del filetags[t]
140 del filetags[t]
141
141
142 _updatetags(filetags, "local", alltags, tagtypes)
142 _updatetags(filetags, "local", alltags, tagtypes)
143
143
144 def _readtaghist(ui, repo, lines, fn, recode=None, calcnodelines=False):
144 def _readtaghist(ui, repo, lines, fn, recode=None, calcnodelines=False):
145 '''Read tag definitions from a file (or any source of lines).
145 '''Read tag definitions from a file (or any source of lines).
146
146
147 This function returns two sortdicts with similar information:
147 This function returns two sortdicts with similar information:
148
148
149 - the first dict, bintaghist, contains the tag information as expected by
149 - the first dict, bintaghist, contains the tag information as expected by
150 the _readtags function, i.e. a mapping from tag name to (node, hist):
150 the _readtags function, i.e. a mapping from tag name to (node, hist):
151 - node is the node id from the last line read for that name,
151 - node is the node id from the last line read for that name,
152 - hist is the list of node ids previously associated with it (in file
152 - hist is the list of node ids previously associated with it (in file
153 order). All node ids are binary, not hex.
153 order). All node ids are binary, not hex.
154
154
155 - the second dict, hextaglines, is a mapping from tag name to a list of
155 - the second dict, hextaglines, is a mapping from tag name to a list of
156 [hexnode, line number] pairs, ordered from the oldest to the newest node.
156 [hexnode, line number] pairs, ordered from the oldest to the newest node.
157
157
158 When calcnodelines is False the hextaglines dict is not calculated (an
158 When calcnodelines is False the hextaglines dict is not calculated (an
159 empty dict is returned). This is done to improve this function's
159 empty dict is returned). This is done to improve this function's
160 performance in cases where the line numbers are not needed.
160 performance in cases where the line numbers are not needed.
161 '''
161 '''
162
162
163 bintaghist = util.sortdict()
163 bintaghist = util.sortdict()
164 hextaglines = util.sortdict()
164 hextaglines = util.sortdict()
165 count = 0
165 count = 0
166
166
167 def warn(msg):
167 def warn(msg):
168 ui.warn(_("%s, line %s: %s\n") % (fn, count, msg))
168 ui.warn(_("%s, line %s: %s\n") % (fn, count, msg))
169
169
170 for nline, line in enumerate(lines):
170 for nline, line in enumerate(lines):
171 count += 1
171 count += 1
172 if not line:
172 if not line:
173 continue
173 continue
174 try:
174 try:
175 (nodehex, name) = line.split(" ", 1)
175 (nodehex, name) = line.split(" ", 1)
176 except ValueError:
176 except ValueError:
177 warn(_("cannot parse entry"))
177 warn(_("cannot parse entry"))
178 continue
178 continue
179 name = name.strip()
179 name = name.strip()
180 if recode:
180 if recode:
181 name = recode(name)
181 name = recode(name)
182 try:
182 try:
183 nodebin = bin(nodehex)
183 nodebin = bin(nodehex)
184 except TypeError:
184 except TypeError:
185 warn(_("node '%s' is not well formed") % nodehex)
185 warn(_("node '%s' is not well formed") % nodehex)
186 continue
186 continue
187
187
188 # update filetags
188 # update filetags
189 if calcnodelines:
189 if calcnodelines:
190 # map tag name to a list of line numbers
190 # map tag name to a list of line numbers
191 if name not in hextaglines:
191 if name not in hextaglines:
192 hextaglines[name] = []
192 hextaglines[name] = []
193 hextaglines[name].append([nodehex, nline])
193 hextaglines[name].append([nodehex, nline])
194 continue
194 continue
195 # map tag name to (node, hist)
195 # map tag name to (node, hist)
196 if name not in bintaghist:
196 if name not in bintaghist:
197 bintaghist[name] = []
197 bintaghist[name] = []
198 bintaghist[name].append(nodebin)
198 bintaghist[name].append(nodebin)
199 return bintaghist, hextaglines
199 return bintaghist, hextaglines
200
200
201 def _readtags(ui, repo, lines, fn, recode=None, calcnodelines=False):
201 def _readtags(ui, repo, lines, fn, recode=None, calcnodelines=False):
202 '''Read tag definitions from a file (or any source of lines).
202 '''Read tag definitions from a file (or any source of lines).
203
203
204 Returns a mapping from tag name to (node, hist).
204 Returns a mapping from tag name to (node, hist).
205
205
206 "node" is the node id from the last line read for that name. "hist"
206 "node" is the node id from the last line read for that name. "hist"
207 is the list of node ids previously associated with it (in file order).
207 is the list of node ids previously associated with it (in file order).
208 All node ids are binary, not hex.
208 All node ids are binary, not hex.
209 '''
209 '''
210 filetags, nodelines = _readtaghist(ui, repo, lines, fn, recode=recode,
210 filetags, nodelines = _readtaghist(ui, repo, lines, fn, recode=recode,
211 calcnodelines=calcnodelines)
211 calcnodelines=calcnodelines)
212 for tag, taghist in filetags.items():
212 for tag, taghist in filetags.items():
213 filetags[tag] = (taghist[-1], taghist[:-1])
213 filetags[tag] = (taghist[-1], taghist[:-1])
214 return filetags
214 return filetags
215
215
216 def _updatetags(filetags, tagtype, alltags, tagtypes):
216 def _updatetags(filetags, tagtype, alltags, tagtypes):
217 '''Incorporate the tag info read from one file into the two
217 '''Incorporate the tag info read from one file into the two
218 dictionaries, alltags and tagtypes, that contain all tag
218 dictionaries, alltags and tagtypes, that contain all tag
219 info (global across all heads plus local).'''
219 info (global across all heads plus local).'''
220
220
221 for name, nodehist in filetags.iteritems():
221 for name, nodehist in filetags.iteritems():
222 if name not in alltags:
222 if name not in alltags:
223 alltags[name] = nodehist
223 alltags[name] = nodehist
224 tagtypes[name] = tagtype
224 tagtypes[name] = tagtype
225 continue
225 continue
226
226
227 # we prefer alltags[name] if:
227 # we prefer alltags[name] if:
228 # it supersedes us OR
228 # it supersedes us OR
229 # mutual supersedes and it has a higher rank
229 # mutual supersedes and it has a higher rank
230 # otherwise we win because we're tip-most
230 # otherwise we win because we're tip-most
231 anode, ahist = nodehist
231 anode, ahist = nodehist
232 bnode, bhist = alltags[name]
232 bnode, bhist = alltags[name]
233 if (bnode != anode and anode in bhist and
233 if (bnode != anode and anode in bhist and
234 (bnode not in ahist or len(bhist) > len(ahist))):
234 (bnode not in ahist or len(bhist) > len(ahist))):
235 anode = bnode
235 anode = bnode
236 else:
236 else:
237 tagtypes[name] = tagtype
237 tagtypes[name] = tagtype
238 ahist.extend([n for n in bhist if n not in ahist])
238 ahist.extend([n for n in bhist if n not in ahist])
239 alltags[name] = anode, ahist
239 alltags[name] = anode, ahist
240
240
241 def _filename(repo):
241 def _filename(repo):
242 """name of a tagcache file for a given repo or repoview"""
242 """name of a tagcache file for a given repo or repoview"""
243 filename = 'cache/tags2'
243 filename = 'cache/tags2'
244 if repo.filtername:
244 if repo.filtername:
245 filename = '%s-%s' % (filename, repo.filtername)
245 filename = '%s-%s' % (filename, repo.filtername)
246 return filename
246 return filename
247
247
248 def _readtagcache(ui, repo):
248 def _readtagcache(ui, repo):
249 '''Read the tag cache.
249 '''Read the tag cache.
250
250
251 Returns a tuple (heads, fnodes, validinfo, cachetags, shouldwrite).
251 Returns a tuple (heads, fnodes, validinfo, cachetags, shouldwrite).
252
252
253 If the cache is completely up-to-date, "cachetags" is a dict of the
253 If the cache is completely up-to-date, "cachetags" is a dict of the
254 form returned by _readtags() and "heads", "fnodes", and "validinfo" are
254 form returned by _readtags() and "heads", "fnodes", and "validinfo" are
255 None and "shouldwrite" is False.
255 None and "shouldwrite" is False.
256
256
257 If the cache is not up to date, "cachetags" is None. "heads" is a list
257 If the cache is not up to date, "cachetags" is None. "heads" is a list
258 of all heads currently in the repository, ordered from tip to oldest.
258 of all heads currently in the repository, ordered from tip to oldest.
259 "validinfo" is a tuple describing cache validation info. This is used
259 "validinfo" is a tuple describing cache validation info. This is used
260 when writing the tags cache. "fnodes" is a mapping from head to .hgtags
260 when writing the tags cache. "fnodes" is a mapping from head to .hgtags
261 filenode. "shouldwrite" is True.
261 filenode. "shouldwrite" is True.
262
262
263 If the cache is not up to date, the caller is responsible for reading tag
263 If the cache is not up to date, the caller is responsible for reading tag
264 info from each returned head. (See findglobaltags().)
264 info from each returned head. (See findglobaltags().)
265 '''
265 '''
266 import scmutil # avoid cycle
266 import scmutil # avoid cycle
267
267
268 try:
268 try:
269 cachefile = repo.vfs(_filename(repo), 'r')
269 cachefile = repo.vfs(_filename(repo), 'r')
270 # force reading the file for static-http
270 # force reading the file for static-http
271 cachelines = iter(cachefile)
271 cachelines = iter(cachefile)
272 except IOError:
272 except IOError:
273 cachefile = None
273 cachefile = None
274
274
275 cacherev = None
275 cacherev = None
276 cachenode = None
276 cachenode = None
277 cachehash = None
277 cachehash = None
278 if cachefile:
278 if cachefile:
279 try:
279 try:
280 validline = cachelines.next()
280 validline = cachelines.next()
281 validline = validline.split()
281 validline = validline.split()
282 cacherev = int(validline[0])
282 cacherev = int(validline[0])
283 cachenode = bin(validline[1])
283 cachenode = bin(validline[1])
284 if len(validline) > 2:
284 if len(validline) > 2:
285 cachehash = bin(validline[2])
285 cachehash = bin(validline[2])
286 except Exception:
286 except Exception:
287 # corruption of the cache, just recompute it.
287 # corruption of the cache, just recompute it.
288 pass
288 pass
289
289
290 tipnode = repo.changelog.tip()
290 tipnode = repo.changelog.tip()
291 tiprev = len(repo.changelog) - 1
291 tiprev = len(repo.changelog) - 1
292
292
293 # Case 1 (common): tip is the same, so nothing has changed.
293 # Case 1 (common): tip is the same, so nothing has changed.
294 # (Unchanged tip trivially means no changesets have been added.
294 # (Unchanged tip trivially means no changesets have been added.
295 # But, thanks to localrepository.destroyed(), it also means none
295 # But, thanks to localrepository.destroyed(), it also means none
296 # have been destroyed by strip or rollback.)
296 # have been destroyed by strip or rollback.)
297 if (cacherev == tiprev
297 if (cacherev == tiprev
298 and cachenode == tipnode
298 and cachenode == tipnode
299 and cachehash == scmutil.filteredhash(repo, tiprev)):
299 and cachehash == scmutil.filteredhash(repo, tiprev)):
300 tags = _readtags(ui, repo, cachelines, cachefile.name)
300 tags = _readtags(ui, repo, cachelines, cachefile.name)
301 cachefile.close()
301 cachefile.close()
302 return (None, None, None, tags, False)
302 return (None, None, None, tags, False)
303 if cachefile:
303 if cachefile:
304 cachefile.close() # ignore rest of file
304 cachefile.close() # ignore rest of file
305
305
306 valid = (tiprev, tipnode, scmutil.filteredhash(repo, tiprev))
306 valid = (tiprev, tipnode, scmutil.filteredhash(repo, tiprev))
307
307
308 repoheads = repo.heads()
308 repoheads = repo.heads()
309 # Case 2 (uncommon): empty repo; get out quickly and don't bother
309 # Case 2 (uncommon): empty repo; get out quickly and don't bother
310 # writing an empty cache.
310 # writing an empty cache.
311 if repoheads == [nullid]:
311 if repoheads == [nullid]:
312 return ([], {}, valid, {}, False)
312 return ([], {}, valid, {}, False)
313
313
314 # Case 3 (uncommon): cache file missing or empty.
314 # Case 3 (uncommon): cache file missing or empty.
315
315
316 # Case 4 (uncommon): tip rev decreased. This should only happen
316 # Case 4 (uncommon): tip rev decreased. This should only happen
317 # when we're called from localrepository.destroyed(). Refresh the
317 # when we're called from localrepository.destroyed(). Refresh the
318 # cache so future invocations will not see disappeared heads in the
318 # cache so future invocations will not see disappeared heads in the
319 # cache.
319 # cache.
320
320
321 # Case 5 (common): tip has changed, so we've added/replaced heads.
321 # Case 5 (common): tip has changed, so we've added/replaced heads.
322
322
323 # As it happens, the code to handle cases 3, 4, 5 is the same.
323 # As it happens, the code to handle cases 3, 4, 5 is the same.
324
324
325 # N.B. in case 4 (nodes destroyed), "new head" really means "newly
325 # N.B. in case 4 (nodes destroyed), "new head" really means "newly
326 # exposed".
326 # exposed".
327 if not len(repo.file('.hgtags')):
327 if not len(repo.file('.hgtags')):
328 # No tags have ever been committed, so we can avoid a
328 # No tags have ever been committed, so we can avoid a
329 # potentially expensive search.
329 # potentially expensive search.
330 return ([], {}, valid, None, True)
330 return ([], {}, valid, None, True)
331
331
332 starttime = time.time()
332 starttime = time.time()
333
333
334 # Now we have to lookup the .hgtags filenode for every new head.
334 # Now we have to lookup the .hgtags filenode for every new head.
335 # This is the most expensive part of finding tags, so performance
335 # This is the most expensive part of finding tags, so performance
336 # depends primarily on the size of newheads. Worst case: no cache
336 # depends primarily on the size of newheads. Worst case: no cache
337 # file, so newheads == repoheads.
337 # file, so newheads == repoheads.
338 fnodescache = hgtagsfnodescache(repo.unfiltered())
338 fnodescache = hgtagsfnodescache(repo.unfiltered())
339 cachefnode = {}
339 cachefnode = {}
340 for head in reversed(repoheads):
340 for head in reversed(repoheads):
341 fnode = fnodescache.getfnode(head)
341 fnode = fnodescache.getfnode(head)
342 if fnode != nullid:
342 if fnode != nullid:
343 cachefnode[head] = fnode
343 cachefnode[head] = fnode
344
344
345 fnodescache.write()
345 fnodescache.write()
346
346
347 duration = time.time() - starttime
347 duration = time.time() - starttime
348 ui.log('tagscache',
348 ui.log('tagscache',
349 '%d/%d cache hits/lookups in %0.4f '
349 '%d/%d cache hits/lookups in %0.4f '
350 'seconds\n',
350 'seconds\n',
351 fnodescache.hitcount, fnodescache.lookupcount, duration)
351 fnodescache.hitcount, fnodescache.lookupcount, duration)
352
352
353 # Caller has to iterate over all heads, but can use the filenodes in
353 # Caller has to iterate over all heads, but can use the filenodes in
354 # cachefnode to get to each .hgtags revision quickly.
354 # cachefnode to get to each .hgtags revision quickly.
355 return (repoheads, cachefnode, valid, None, True)
355 return (repoheads, cachefnode, valid, None, True)
356
356
357 def _writetagcache(ui, repo, valid, cachetags):
357 def _writetagcache(ui, repo, valid, cachetags):
358 filename = _filename(repo)
358 filename = _filename(repo)
359 try:
359 try:
360 cachefile = repo.vfs(filename, 'w', atomictemp=True)
360 cachefile = repo.vfs(filename, 'w', atomictemp=True)
361 except (OSError, IOError):
361 except (OSError, IOError):
362 return
362 return
363
363
364 ui.log('tagscache', 'writing .hg/%s with %d tags\n',
364 ui.log('tagscache', 'writing .hg/%s with %d tags\n',
365 filename, len(cachetags))
365 filename, len(cachetags))
366
366
367 if valid[2]:
367 if valid[2]:
368 cachefile.write('%d %s %s\n' % (valid[0], hex(valid[1]), hex(valid[2])))
368 cachefile.write('%d %s %s\n' % (valid[0], hex(valid[1]), hex(valid[2])))
369 else:
369 else:
370 cachefile.write('%d %s\n' % (valid[0], hex(valid[1])))
370 cachefile.write('%d %s\n' % (valid[0], hex(valid[1])))
371
371
372 # Tag names in the cache are in UTF-8 -- which is the whole reason
372 # Tag names in the cache are in UTF-8 -- which is the whole reason
373 # we keep them in UTF-8 throughout this module. If we converted
373 # we keep them in UTF-8 throughout this module. If we converted
374 # them local encoding on input, we would lose info writing them to
374 # them local encoding on input, we would lose info writing them to
375 # the cache.
375 # the cache.
376 for (name, (node, hist)) in sorted(cachetags.iteritems()):
376 for (name, (node, hist)) in sorted(cachetags.iteritems()):
377 for n in hist:
377 for n in hist:
378 cachefile.write("%s %s\n" % (hex(n), name))
378 cachefile.write("%s %s\n" % (hex(n), name))
379 cachefile.write("%s %s\n" % (hex(node), name))
379 cachefile.write("%s %s\n" % (hex(node), name))
380
380
381 try:
381 try:
382 cachefile.close()
382 cachefile.close()
383 except (OSError, IOError):
383 except (OSError, IOError):
384 pass
384 pass
385
385
386 _fnodescachefile = 'cache/hgtagsfnodes1'
386 _fnodescachefile = 'cache/hgtagsfnodes1'
387 _fnodesrecsize = 4 + 20 # changeset fragment + filenode
387 _fnodesrecsize = 4 + 20 # changeset fragment + filenode
388 _fnodesmissingrec = '\xff' * 24
388 _fnodesmissingrec = '\xff' * 24
389
389
390 class hgtagsfnodescache(object):
390 class hgtagsfnodescache(object):
391 """Persistent cache mapping revisions to .hgtags filenodes.
391 """Persistent cache mapping revisions to .hgtags filenodes.
392
392
393 The cache is an array of records. Each item in the array corresponds to
393 The cache is an array of records. Each item in the array corresponds to
394 a changelog revision. Values in the array contain the first 4 bytes of
394 a changelog revision. Values in the array contain the first 4 bytes of
395 the node hash and the 20 bytes .hgtags filenode for that revision.
395 the node hash and the 20 bytes .hgtags filenode for that revision.
396
396
397 The first 4 bytes are present as a form of verification. Repository
397 The first 4 bytes are present as a form of verification. Repository
398 stripping and rewriting may change the node at a numeric revision in the
398 stripping and rewriting may change the node at a numeric revision in the
399 changelog. The changeset fragment serves as a verifier to detect
399 changelog. The changeset fragment serves as a verifier to detect
400 rewriting. This logic is shared with the rev branch cache (see
400 rewriting. This logic is shared with the rev branch cache (see
401 branchmap.py).
401 branchmap.py).
402
402
403 The instance holds in memory the full cache content but entries are
403 The instance holds in memory the full cache content but entries are
404 only parsed on read.
404 only parsed on read.
405
405
406 Instances behave like lists. ``c[i]`` works where i is a rev or
406 Instances behave like lists. ``c[i]`` works where i is a rev or
407 changeset node. Missing indexes are populated automatically on access.
407 changeset node. Missing indexes are populated automatically on access.
408 """
408 """
409 def __init__(self, repo):
409 def __init__(self, repo):
410 assert repo.filtername is None
410 assert repo.filtername is None
411
411
412 self._repo = repo
412 self._repo = repo
413
413
414 # Only for reporting purposes.
414 # Only for reporting purposes.
415 self.lookupcount = 0
415 self.lookupcount = 0
416 self.hitcount = 0
416 self.hitcount = 0
417
417
418 self._raw = array('c')
418 self._raw = array('c')
419
419
420 data = repo.vfs.tryread(_fnodescachefile)
420 data = repo.vfs.tryread(_fnodescachefile)
421 self._raw.fromstring(data)
421 self._raw.fromstring(data)
422
422
423 # The end state of self._raw is an array that is of the exact length
423 # The end state of self._raw is an array that is of the exact length
424 # required to hold a record for every revision in the repository.
424 # required to hold a record for every revision in the repository.
425 # We truncate or extend the array as necessary. self._dirtyoffset is
425 # We truncate or extend the array as necessary. self._dirtyoffset is
426 # defined to be the start offset at which we need to write the output
426 # defined to be the start offset at which we need to write the output
427 # file. This offset is also adjusted when new entries are calculated
427 # file. This offset is also adjusted when new entries are calculated
428 # for array members.
428 # for array members.
429 cllen = len(repo.changelog)
429 cllen = len(repo.changelog)
430 wantedlen = cllen * _fnodesrecsize
430 wantedlen = cllen * _fnodesrecsize
431 rawlen = len(self._raw)
431 rawlen = len(self._raw)
432
432
433 self._dirtyoffset = None
433 self._dirtyoffset = None
434
434
435 if rawlen < wantedlen:
435 if rawlen < wantedlen:
436 self._dirtyoffset = rawlen
436 self._dirtyoffset = rawlen
437 self._raw.extend('\xff' * (wantedlen - rawlen))
437 self._raw.extend('\xff' * (wantedlen - rawlen))
438 elif rawlen > wantedlen:
438 elif rawlen > wantedlen:
439 # There's no easy way to truncate array instances. This seems
439 # There's no easy way to truncate array instances. This seems
440 # slightly less evil than copying a potentially large array slice.
440 # slightly less evil than copying a potentially large array slice.
441 for i in range(rawlen - wantedlen):
441 for i in range(rawlen - wantedlen):
442 self._raw.pop()
442 self._raw.pop()
443 self._dirtyoffset = len(self._raw)
443 self._dirtyoffset = len(self._raw)
444
444
445 def getfnode(self, node, computemissing=True):
445 def getfnode(self, node, computemissing=True):
446 """Obtain the filenode of the .hgtags file at a specified revision.
446 """Obtain the filenode of the .hgtags file at a specified revision.
447
447
448 If the value is in the cache, the entry will be validated and returned.
448 If the value is in the cache, the entry will be validated and returned.
449 Otherwise, the filenode will be computed and returned unless
449 Otherwise, the filenode will be computed and returned unless
450 "computemissing" is False, in which case None will be returned without
450 "computemissing" is False, in which case None will be returned without
451 any potentially expensive computation being performed.
451 any potentially expensive computation being performed.
452
452
453 If an .hgtags does not exist at the specified revision, nullid is
453 If an .hgtags does not exist at the specified revision, nullid is
454 returned.
454 returned.
455 """
455 """
456 ctx = self._repo[node]
456 ctx = self._repo[node]
457 rev = ctx.rev()
457 rev = ctx.rev()
458
458
459 self.lookupcount += 1
459 self.lookupcount += 1
460
460
461 offset = rev * _fnodesrecsize
461 offset = rev * _fnodesrecsize
462 record = self._raw[offset:offset + _fnodesrecsize].tostring()
462 record = self._raw[offset:offset + _fnodesrecsize].tostring()
463 properprefix = node[0:4]
463 properprefix = node[0:4]
464
464
465 # Validate and return existing entry.
465 # Validate and return existing entry.
466 if record != _fnodesmissingrec:
466 if record != _fnodesmissingrec:
467 fileprefix = record[0:4]
467 fileprefix = record[0:4]
468
468
469 if fileprefix == properprefix:
469 if fileprefix == properprefix:
470 self.hitcount += 1
470 self.hitcount += 1
471 return record[4:]
471 return record[4:]
472
472
473 # Fall through.
473 # Fall through.
474
474
475 # If we get here, the entry is either missing or invalid.
475 # If we get here, the entry is either missing or invalid.
476
476
477 if not computemissing:
477 if not computemissing:
478 return None
478 return None
479
479
480 # Populate missing entry.
480 # Populate missing entry.
481 try:
481 try:
482 fnode = ctx.filenode('.hgtags')
482 fnode = ctx.filenode('.hgtags')
483 except error.LookupError:
483 except error.LookupError:
484 # No .hgtags file on this revision.
484 # No .hgtags file on this revision.
485 fnode = nullid
485 fnode = nullid
486
486
487 self._writeentry(offset, properprefix, fnode)
488 return fnode
489
490 def setfnode(self, node, fnode):
491 """Set the .hgtags filenode for a given changeset."""
492 assert len(fnode) == 20
493 ctx = self._repo[node]
494
495 # Do a lookup first to avoid writing if nothing has changed.
496 if self.getfnode(ctx.node(), computemissing=False) == fnode:
497 return
498
499 self._writeentry(ctx.rev() * _fnodesrecsize, node[0:4], fnode)
500
501 def _writeentry(self, offset, prefix, fnode):
487 # Slices on array instances only accept other array.
502 # Slices on array instances only accept other array.
488 entry = array('c', properprefix + fnode)
503 entry = array('c', prefix + fnode)
489 self._raw[offset:offset + _fnodesrecsize] = entry
504 self._raw[offset:offset + _fnodesrecsize] = entry
490 # self._dirtyoffset could be None.
505 # self._dirtyoffset could be None.
491 self._dirtyoffset = min(self._dirtyoffset, offset) or 0
506 self._dirtyoffset = min(self._dirtyoffset, offset) or 0
492
507
493 return fnode
494
495 def write(self):
508 def write(self):
496 """Perform all necessary writes to cache file.
509 """Perform all necessary writes to cache file.
497
510
498 This may no-op if no writes are needed or if a write lock could
511 This may no-op if no writes are needed or if a write lock could
499 not be obtained.
512 not be obtained.
500 """
513 """
501 if self._dirtyoffset is None:
514 if self._dirtyoffset is None:
502 return
515 return
503
516
504 data = self._raw[self._dirtyoffset:]
517 data = self._raw[self._dirtyoffset:]
505 if not data:
518 if not data:
506 return
519 return
507
520
508 repo = self._repo
521 repo = self._repo
509
522
510 try:
523 try:
511 lock = repo.wlock(wait=False)
524 lock = repo.wlock(wait=False)
512 except error.LockError:
525 except error.LockError:
513 repo.ui.log('tagscache',
526 repo.ui.log('tagscache',
514 'not writing .hg/%s because lock cannot be acquired\n' %
527 'not writing .hg/%s because lock cannot be acquired\n' %
515 (_fnodescachefile))
528 (_fnodescachefile))
516 return
529 return
517
530
518 try:
531 try:
519 f = repo.vfs.open(_fnodescachefile, 'ab')
532 f = repo.vfs.open(_fnodescachefile, 'ab')
520 try:
533 try:
521 # if the file has been truncated
534 # if the file has been truncated
522 actualoffset = f.tell()
535 actualoffset = f.tell()
523 if actualoffset < self._dirtyoffset:
536 if actualoffset < self._dirtyoffset:
524 self._dirtyoffset = actualoffset
537 self._dirtyoffset = actualoffset
525 data = self._raw[self._dirtyoffset:]
538 data = self._raw[self._dirtyoffset:]
526 f.seek(self._dirtyoffset)
539 f.seek(self._dirtyoffset)
527 f.truncate()
540 f.truncate()
528 repo.ui.log('tagscache',
541 repo.ui.log('tagscache',
529 'writing %d bytes to %s\n' % (
542 'writing %d bytes to %s\n' % (
530 len(data), _fnodescachefile))
543 len(data), _fnodescachefile))
531 f.write(data)
544 f.write(data)
532 self._dirtyoffset = None
545 self._dirtyoffset = None
533 finally:
546 finally:
534 f.close()
547 f.close()
535 except (IOError, OSError), inst:
548 except (IOError, OSError), inst:
536 repo.ui.log('tagscache',
549 repo.ui.log('tagscache',
537 "couldn't write %s: %s\n" % (
550 "couldn't write %s: %s\n" % (
538 _fnodescachefile, inst))
551 _fnodescachefile, inst))
539 finally:
552 finally:
540 lock.release()
553 lock.release()
General Comments 0
You need to be logged in to leave comments. Login now