##// END OF EJS Templates
tags: silence hgtagsfnodes reading failures...
Matt Mackall -
r29039:e3055b46 stable
parent child Browse files
Show More
@@ -1,568 +1,571 b''
1 # tags.py - read tag info from local repository
1 # tags.py - read tag info from local repository
2 #
2 #
3 # Copyright 2009 Matt Mackall <mpm@selenic.com>
3 # Copyright 2009 Matt Mackall <mpm@selenic.com>
4 # Copyright 2009 Greg Ward <greg@gerg.ca>
4 # Copyright 2009 Greg Ward <greg@gerg.ca>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 # Currently this module only deals with reading and caching tags.
9 # Currently this module only deals with reading and caching tags.
10 # Eventually, it could take care of updating (adding/removing/moving)
10 # Eventually, it could take care of updating (adding/removing/moving)
11 # tags too.
11 # tags too.
12
12
13 from __future__ import absolute_import
13 from __future__ import absolute_import
14
14
15 import array
15 import array
16 import errno
16 import errno
17 import time
17 import time
18
18
19 from .node import (
19 from .node import (
20 bin,
20 bin,
21 hex,
21 hex,
22 nullid,
22 nullid,
23 short,
23 short,
24 )
24 )
25 from . import (
25 from . import (
26 encoding,
26 encoding,
27 error,
27 error,
28 util,
28 util,
29 )
29 )
30
30
31 array = array.array
31 array = array.array
32
32
33 # Tags computation can be expensive and caches exist to make it fast in
33 # Tags computation can be expensive and caches exist to make it fast in
34 # the common case.
34 # the common case.
35 #
35 #
36 # The "hgtagsfnodes1" cache file caches the .hgtags filenode values for
36 # The "hgtagsfnodes1" cache file caches the .hgtags filenode values for
37 # each revision in the repository. The file is effectively an array of
37 # each revision in the repository. The file is effectively an array of
38 # fixed length records. Read the docs for "hgtagsfnodescache" for technical
38 # fixed length records. Read the docs for "hgtagsfnodescache" for technical
39 # details.
39 # details.
40 #
40 #
41 # The .hgtags filenode cache grows in proportion to the length of the
41 # The .hgtags filenode cache grows in proportion to the length of the
42 # changelog. The file is truncated when the # changelog is stripped.
42 # changelog. The file is truncated when the # changelog is stripped.
43 #
43 #
44 # The purpose of the filenode cache is to avoid the most expensive part
44 # The purpose of the filenode cache is to avoid the most expensive part
45 # of finding global tags, which is looking up the .hgtags filenode in the
45 # of finding global tags, which is looking up the .hgtags filenode in the
46 # manifest for each head. This can take dozens or over 100ms for
46 # manifest for each head. This can take dozens or over 100ms for
47 # repositories with very large manifests. Multiplied by dozens or even
47 # repositories with very large manifests. Multiplied by dozens or even
48 # hundreds of heads and there is a significant performance concern.
48 # hundreds of heads and there is a significant performance concern.
49 #
49 #
50 # There also exist a separate cache file for each repository filter.
50 # There also exist a separate cache file for each repository filter.
51 # These "tags-*" files store information about the history of tags.
51 # These "tags-*" files store information about the history of tags.
52 #
52 #
53 # The tags cache files consists of a cache validation line followed by
53 # The tags cache files consists of a cache validation line followed by
54 # a history of tags.
54 # a history of tags.
55 #
55 #
56 # The cache validation line has the format:
56 # The cache validation line has the format:
57 #
57 #
58 # <tiprev> <tipnode> [<filteredhash>]
58 # <tiprev> <tipnode> [<filteredhash>]
59 #
59 #
60 # <tiprev> is an integer revision and <tipnode> is a 40 character hex
60 # <tiprev> is an integer revision and <tipnode> is a 40 character hex
61 # node for that changeset. These redundantly identify the repository
61 # node for that changeset. These redundantly identify the repository
62 # tip from the time the cache was written. In addition, <filteredhash>,
62 # tip from the time the cache was written. In addition, <filteredhash>,
63 # if present, is a 40 character hex hash of the contents of the filtered
63 # if present, is a 40 character hex hash of the contents of the filtered
64 # revisions for this filter. If the set of filtered revs changes, the
64 # revisions for this filter. If the set of filtered revs changes, the
65 # hash will change and invalidate the cache.
65 # hash will change and invalidate the cache.
66 #
66 #
67 # The history part of the tags cache consists of lines of the form:
67 # The history part of the tags cache consists of lines of the form:
68 #
68 #
69 # <node> <tag>
69 # <node> <tag>
70 #
70 #
71 # (This format is identical to that of .hgtags files.)
71 # (This format is identical to that of .hgtags files.)
72 #
72 #
73 # <tag> is the tag name and <node> is the 40 character hex changeset
73 # <tag> is the tag name and <node> is the 40 character hex changeset
74 # the tag is associated with.
74 # the tag is associated with.
75 #
75 #
76 # Tags are written sorted by tag name.
76 # Tags are written sorted by tag name.
77 #
77 #
78 # Tags associated with multiple changesets have an entry for each changeset.
78 # Tags associated with multiple changesets have an entry for each changeset.
79 # The most recent changeset (in terms of revlog ordering for the head
79 # The most recent changeset (in terms of revlog ordering for the head
80 # setting it) for each tag is last.
80 # setting it) for each tag is last.
81
81
82 def findglobaltags(ui, repo, alltags, tagtypes):
82 def findglobaltags(ui, repo, alltags, tagtypes):
83 '''Find global tags in a repo.
83 '''Find global tags in a repo.
84
84
85 "alltags" maps tag name to (node, hist) 2-tuples.
85 "alltags" maps tag name to (node, hist) 2-tuples.
86
86
87 "tagtypes" maps tag name to tag type. Global tags always have the
87 "tagtypes" maps tag name to tag type. Global tags always have the
88 "global" tag type.
88 "global" tag type.
89
89
90 The "alltags" and "tagtypes" dicts are updated in place. Empty dicts
90 The "alltags" and "tagtypes" dicts are updated in place. Empty dicts
91 should be passed in.
91 should be passed in.
92
92
93 The tags cache is read and updated as a side-effect of calling.
93 The tags cache is read and updated as a side-effect of calling.
94 '''
94 '''
95 # This is so we can be lazy and assume alltags contains only global
95 # This is so we can be lazy and assume alltags contains only global
96 # tags when we pass it to _writetagcache().
96 # tags when we pass it to _writetagcache().
97 assert len(alltags) == len(tagtypes) == 0, \
97 assert len(alltags) == len(tagtypes) == 0, \
98 "findglobaltags() should be called first"
98 "findglobaltags() should be called first"
99
99
100 (heads, tagfnode, valid, cachetags, shouldwrite) = _readtagcache(ui, repo)
100 (heads, tagfnode, valid, cachetags, shouldwrite) = _readtagcache(ui, repo)
101 if cachetags is not None:
101 if cachetags is not None:
102 assert not shouldwrite
102 assert not shouldwrite
103 # XXX is this really 100% correct? are there oddball special
103 # XXX is this really 100% correct? are there oddball special
104 # cases where a global tag should outrank a local tag but won't,
104 # cases where a global tag should outrank a local tag but won't,
105 # because cachetags does not contain rank info?
105 # because cachetags does not contain rank info?
106 _updatetags(cachetags, 'global', alltags, tagtypes)
106 _updatetags(cachetags, 'global', alltags, tagtypes)
107 return
107 return
108
108
109 seen = set() # set of fnode
109 seen = set() # set of fnode
110 fctx = None
110 fctx = None
111 for head in reversed(heads): # oldest to newest
111 for head in reversed(heads): # oldest to newest
112 assert head in repo.changelog.nodemap, \
112 assert head in repo.changelog.nodemap, \
113 "tag cache returned bogus head %s" % short(head)
113 "tag cache returned bogus head %s" % short(head)
114
114
115 fnode = tagfnode.get(head)
115 fnode = tagfnode.get(head)
116 if fnode and fnode not in seen:
116 if fnode and fnode not in seen:
117 seen.add(fnode)
117 seen.add(fnode)
118 if not fctx:
118 if not fctx:
119 fctx = repo.filectx('.hgtags', fileid=fnode)
119 fctx = repo.filectx('.hgtags', fileid=fnode)
120 else:
120 else:
121 fctx = fctx.filectx(fnode)
121 fctx = fctx.filectx(fnode)
122
122
123 filetags = _readtags(ui, repo, fctx.data().splitlines(), fctx)
123 filetags = _readtags(ui, repo, fctx.data().splitlines(), fctx)
124 _updatetags(filetags, 'global', alltags, tagtypes)
124 _updatetags(filetags, 'global', alltags, tagtypes)
125
125
126 # and update the cache (if necessary)
126 # and update the cache (if necessary)
127 if shouldwrite:
127 if shouldwrite:
128 _writetagcache(ui, repo, valid, alltags)
128 _writetagcache(ui, repo, valid, alltags)
129
129
130 def readlocaltags(ui, repo, alltags, tagtypes):
130 def readlocaltags(ui, repo, alltags, tagtypes):
131 '''Read local tags in repo. Update alltags and tagtypes.'''
131 '''Read local tags in repo. Update alltags and tagtypes.'''
132 try:
132 try:
133 data = repo.vfs.read("localtags")
133 data = repo.vfs.read("localtags")
134 except IOError as inst:
134 except IOError as inst:
135 if inst.errno != errno.ENOENT:
135 if inst.errno != errno.ENOENT:
136 raise
136 raise
137 return
137 return
138
138
139 # localtags is in the local encoding; re-encode to UTF-8 on
139 # localtags is in the local encoding; re-encode to UTF-8 on
140 # input for consistency with the rest of this module.
140 # input for consistency with the rest of this module.
141 filetags = _readtags(
141 filetags = _readtags(
142 ui, repo, data.splitlines(), "localtags",
142 ui, repo, data.splitlines(), "localtags",
143 recode=encoding.fromlocal)
143 recode=encoding.fromlocal)
144
144
145 # remove tags pointing to invalid nodes
145 # remove tags pointing to invalid nodes
146 cl = repo.changelog
146 cl = repo.changelog
147 for t in filetags.keys():
147 for t in filetags.keys():
148 try:
148 try:
149 cl.rev(filetags[t][0])
149 cl.rev(filetags[t][0])
150 except (LookupError, ValueError):
150 except (LookupError, ValueError):
151 del filetags[t]
151 del filetags[t]
152
152
153 _updatetags(filetags, "local", alltags, tagtypes)
153 _updatetags(filetags, "local", alltags, tagtypes)
154
154
155 def _readtaghist(ui, repo, lines, fn, recode=None, calcnodelines=False):
155 def _readtaghist(ui, repo, lines, fn, recode=None, calcnodelines=False):
156 '''Read tag definitions from a file (or any source of lines).
156 '''Read tag definitions from a file (or any source of lines).
157
157
158 This function returns two sortdicts with similar information:
158 This function returns two sortdicts with similar information:
159
159
160 - the first dict, bintaghist, contains the tag information as expected by
160 - the first dict, bintaghist, contains the tag information as expected by
161 the _readtags function, i.e. a mapping from tag name to (node, hist):
161 the _readtags function, i.e. a mapping from tag name to (node, hist):
162 - node is the node id from the last line read for that name,
162 - node is the node id from the last line read for that name,
163 - hist is the list of node ids previously associated with it (in file
163 - hist is the list of node ids previously associated with it (in file
164 order). All node ids are binary, not hex.
164 order). All node ids are binary, not hex.
165
165
166 - the second dict, hextaglines, is a mapping from tag name to a list of
166 - the second dict, hextaglines, is a mapping from tag name to a list of
167 [hexnode, line number] pairs, ordered from the oldest to the newest node.
167 [hexnode, line number] pairs, ordered from the oldest to the newest node.
168
168
169 When calcnodelines is False the hextaglines dict is not calculated (an
169 When calcnodelines is False the hextaglines dict is not calculated (an
170 empty dict is returned). This is done to improve this function's
170 empty dict is returned). This is done to improve this function's
171 performance in cases where the line numbers are not needed.
171 performance in cases where the line numbers are not needed.
172 '''
172 '''
173
173
174 bintaghist = util.sortdict()
174 bintaghist = util.sortdict()
175 hextaglines = util.sortdict()
175 hextaglines = util.sortdict()
176 count = 0
176 count = 0
177
177
178 def dbg(msg):
178 def dbg(msg):
179 ui.debug("%s, line %s: %s\n" % (fn, count, msg))
179 ui.debug("%s, line %s: %s\n" % (fn, count, msg))
180
180
181 for nline, line in enumerate(lines):
181 for nline, line in enumerate(lines):
182 count += 1
182 count += 1
183 if not line:
183 if not line:
184 continue
184 continue
185 try:
185 try:
186 (nodehex, name) = line.split(" ", 1)
186 (nodehex, name) = line.split(" ", 1)
187 except ValueError:
187 except ValueError:
188 dbg("cannot parse entry")
188 dbg("cannot parse entry")
189 continue
189 continue
190 name = name.strip()
190 name = name.strip()
191 if recode:
191 if recode:
192 name = recode(name)
192 name = recode(name)
193 try:
193 try:
194 nodebin = bin(nodehex)
194 nodebin = bin(nodehex)
195 except TypeError:
195 except TypeError:
196 dbg("node '%s' is not well formed" % nodehex)
196 dbg("node '%s' is not well formed" % nodehex)
197 continue
197 continue
198
198
199 # update filetags
199 # update filetags
200 if calcnodelines:
200 if calcnodelines:
201 # map tag name to a list of line numbers
201 # map tag name to a list of line numbers
202 if name not in hextaglines:
202 if name not in hextaglines:
203 hextaglines[name] = []
203 hextaglines[name] = []
204 hextaglines[name].append([nodehex, nline])
204 hextaglines[name].append([nodehex, nline])
205 continue
205 continue
206 # map tag name to (node, hist)
206 # map tag name to (node, hist)
207 if name not in bintaghist:
207 if name not in bintaghist:
208 bintaghist[name] = []
208 bintaghist[name] = []
209 bintaghist[name].append(nodebin)
209 bintaghist[name].append(nodebin)
210 return bintaghist, hextaglines
210 return bintaghist, hextaglines
211
211
212 def _readtags(ui, repo, lines, fn, recode=None, calcnodelines=False):
212 def _readtags(ui, repo, lines, fn, recode=None, calcnodelines=False):
213 '''Read tag definitions from a file (or any source of lines).
213 '''Read tag definitions from a file (or any source of lines).
214
214
215 Returns a mapping from tag name to (node, hist).
215 Returns a mapping from tag name to (node, hist).
216
216
217 "node" is the node id from the last line read for that name. "hist"
217 "node" is the node id from the last line read for that name. "hist"
218 is the list of node ids previously associated with it (in file order).
218 is the list of node ids previously associated with it (in file order).
219 All node ids are binary, not hex.
219 All node ids are binary, not hex.
220 '''
220 '''
221 filetags, nodelines = _readtaghist(ui, repo, lines, fn, recode=recode,
221 filetags, nodelines = _readtaghist(ui, repo, lines, fn, recode=recode,
222 calcnodelines=calcnodelines)
222 calcnodelines=calcnodelines)
223 # util.sortdict().__setitem__ is much slower at replacing then inserting
223 # util.sortdict().__setitem__ is much slower at replacing then inserting
224 # new entries. The difference can matter if there are thousands of tags.
224 # new entries. The difference can matter if there are thousands of tags.
225 # Create a new sortdict to avoid the performance penalty.
225 # Create a new sortdict to avoid the performance penalty.
226 newtags = util.sortdict()
226 newtags = util.sortdict()
227 for tag, taghist in filetags.items():
227 for tag, taghist in filetags.items():
228 newtags[tag] = (taghist[-1], taghist[:-1])
228 newtags[tag] = (taghist[-1], taghist[:-1])
229 return newtags
229 return newtags
230
230
231 def _updatetags(filetags, tagtype, alltags, tagtypes):
231 def _updatetags(filetags, tagtype, alltags, tagtypes):
232 '''Incorporate the tag info read from one file into the two
232 '''Incorporate the tag info read from one file into the two
233 dictionaries, alltags and tagtypes, that contain all tag
233 dictionaries, alltags and tagtypes, that contain all tag
234 info (global across all heads plus local).'''
234 info (global across all heads plus local).'''
235
235
236 for name, nodehist in filetags.iteritems():
236 for name, nodehist in filetags.iteritems():
237 if name not in alltags:
237 if name not in alltags:
238 alltags[name] = nodehist
238 alltags[name] = nodehist
239 tagtypes[name] = tagtype
239 tagtypes[name] = tagtype
240 continue
240 continue
241
241
242 # we prefer alltags[name] if:
242 # we prefer alltags[name] if:
243 # it supersedes us OR
243 # it supersedes us OR
244 # mutual supersedes and it has a higher rank
244 # mutual supersedes and it has a higher rank
245 # otherwise we win because we're tip-most
245 # otherwise we win because we're tip-most
246 anode, ahist = nodehist
246 anode, ahist = nodehist
247 bnode, bhist = alltags[name]
247 bnode, bhist = alltags[name]
248 if (bnode != anode and anode in bhist and
248 if (bnode != anode and anode in bhist and
249 (bnode not in ahist or len(bhist) > len(ahist))):
249 (bnode not in ahist or len(bhist) > len(ahist))):
250 anode = bnode
250 anode = bnode
251 else:
251 else:
252 tagtypes[name] = tagtype
252 tagtypes[name] = tagtype
253 ahist.extend([n for n in bhist if n not in ahist])
253 ahist.extend([n for n in bhist if n not in ahist])
254 alltags[name] = anode, ahist
254 alltags[name] = anode, ahist
255
255
256 def _filename(repo):
256 def _filename(repo):
257 """name of a tagcache file for a given repo or repoview"""
257 """name of a tagcache file for a given repo or repoview"""
258 filename = 'cache/tags2'
258 filename = 'cache/tags2'
259 if repo.filtername:
259 if repo.filtername:
260 filename = '%s-%s' % (filename, repo.filtername)
260 filename = '%s-%s' % (filename, repo.filtername)
261 return filename
261 return filename
262
262
263 def _readtagcache(ui, repo):
263 def _readtagcache(ui, repo):
264 '''Read the tag cache.
264 '''Read the tag cache.
265
265
266 Returns a tuple (heads, fnodes, validinfo, cachetags, shouldwrite).
266 Returns a tuple (heads, fnodes, validinfo, cachetags, shouldwrite).
267
267
268 If the cache is completely up-to-date, "cachetags" is a dict of the
268 If the cache is completely up-to-date, "cachetags" is a dict of the
269 form returned by _readtags() and "heads", "fnodes", and "validinfo" are
269 form returned by _readtags() and "heads", "fnodes", and "validinfo" are
270 None and "shouldwrite" is False.
270 None and "shouldwrite" is False.
271
271
272 If the cache is not up to date, "cachetags" is None. "heads" is a list
272 If the cache is not up to date, "cachetags" is None. "heads" is a list
273 of all heads currently in the repository, ordered from tip to oldest.
273 of all heads currently in the repository, ordered from tip to oldest.
274 "validinfo" is a tuple describing cache validation info. This is used
274 "validinfo" is a tuple describing cache validation info. This is used
275 when writing the tags cache. "fnodes" is a mapping from head to .hgtags
275 when writing the tags cache. "fnodes" is a mapping from head to .hgtags
276 filenode. "shouldwrite" is True.
276 filenode. "shouldwrite" is True.
277
277
278 If the cache is not up to date, the caller is responsible for reading tag
278 If the cache is not up to date, the caller is responsible for reading tag
279 info from each returned head. (See findglobaltags().)
279 info from each returned head. (See findglobaltags().)
280 '''
280 '''
281 from . import scmutil # avoid cycle
281 from . import scmutil # avoid cycle
282
282
283 try:
283 try:
284 cachefile = repo.vfs(_filename(repo), 'r')
284 cachefile = repo.vfs(_filename(repo), 'r')
285 # force reading the file for static-http
285 # force reading the file for static-http
286 cachelines = iter(cachefile)
286 cachelines = iter(cachefile)
287 except IOError:
287 except IOError:
288 cachefile = None
288 cachefile = None
289
289
290 cacherev = None
290 cacherev = None
291 cachenode = None
291 cachenode = None
292 cachehash = None
292 cachehash = None
293 if cachefile:
293 if cachefile:
294 try:
294 try:
295 validline = cachelines.next()
295 validline = cachelines.next()
296 validline = validline.split()
296 validline = validline.split()
297 cacherev = int(validline[0])
297 cacherev = int(validline[0])
298 cachenode = bin(validline[1])
298 cachenode = bin(validline[1])
299 if len(validline) > 2:
299 if len(validline) > 2:
300 cachehash = bin(validline[2])
300 cachehash = bin(validline[2])
301 except Exception:
301 except Exception:
302 # corruption of the cache, just recompute it.
302 # corruption of the cache, just recompute it.
303 pass
303 pass
304
304
305 tipnode = repo.changelog.tip()
305 tipnode = repo.changelog.tip()
306 tiprev = len(repo.changelog) - 1
306 tiprev = len(repo.changelog) - 1
307
307
308 # Case 1 (common): tip is the same, so nothing has changed.
308 # Case 1 (common): tip is the same, so nothing has changed.
309 # (Unchanged tip trivially means no changesets have been added.
309 # (Unchanged tip trivially means no changesets have been added.
310 # But, thanks to localrepository.destroyed(), it also means none
310 # But, thanks to localrepository.destroyed(), it also means none
311 # have been destroyed by strip or rollback.)
311 # have been destroyed by strip or rollback.)
312 if (cacherev == tiprev
312 if (cacherev == tiprev
313 and cachenode == tipnode
313 and cachenode == tipnode
314 and cachehash == scmutil.filteredhash(repo, tiprev)):
314 and cachehash == scmutil.filteredhash(repo, tiprev)):
315 tags = _readtags(ui, repo, cachelines, cachefile.name)
315 tags = _readtags(ui, repo, cachelines, cachefile.name)
316 cachefile.close()
316 cachefile.close()
317 return (None, None, None, tags, False)
317 return (None, None, None, tags, False)
318 if cachefile:
318 if cachefile:
319 cachefile.close() # ignore rest of file
319 cachefile.close() # ignore rest of file
320
320
321 valid = (tiprev, tipnode, scmutil.filteredhash(repo, tiprev))
321 valid = (tiprev, tipnode, scmutil.filteredhash(repo, tiprev))
322
322
323 repoheads = repo.heads()
323 repoheads = repo.heads()
324 # Case 2 (uncommon): empty repo; get out quickly and don't bother
324 # Case 2 (uncommon): empty repo; get out quickly and don't bother
325 # writing an empty cache.
325 # writing an empty cache.
326 if repoheads == [nullid]:
326 if repoheads == [nullid]:
327 return ([], {}, valid, {}, False)
327 return ([], {}, valid, {}, False)
328
328
329 # Case 3 (uncommon): cache file missing or empty.
329 # Case 3 (uncommon): cache file missing or empty.
330
330
331 # Case 4 (uncommon): tip rev decreased. This should only happen
331 # Case 4 (uncommon): tip rev decreased. This should only happen
332 # when we're called from localrepository.destroyed(). Refresh the
332 # when we're called from localrepository.destroyed(). Refresh the
333 # cache so future invocations will not see disappeared heads in the
333 # cache so future invocations will not see disappeared heads in the
334 # cache.
334 # cache.
335
335
336 # Case 5 (common): tip has changed, so we've added/replaced heads.
336 # Case 5 (common): tip has changed, so we've added/replaced heads.
337
337
338 # As it happens, the code to handle cases 3, 4, 5 is the same.
338 # As it happens, the code to handle cases 3, 4, 5 is the same.
339
339
340 # N.B. in case 4 (nodes destroyed), "new head" really means "newly
340 # N.B. in case 4 (nodes destroyed), "new head" really means "newly
341 # exposed".
341 # exposed".
342 if not len(repo.file('.hgtags')):
342 if not len(repo.file('.hgtags')):
343 # No tags have ever been committed, so we can avoid a
343 # No tags have ever been committed, so we can avoid a
344 # potentially expensive search.
344 # potentially expensive search.
345 return ([], {}, valid, None, True)
345 return ([], {}, valid, None, True)
346
346
347 starttime = time.time()
347 starttime = time.time()
348
348
349 # Now we have to lookup the .hgtags filenode for every new head.
349 # Now we have to lookup the .hgtags filenode for every new head.
350 # This is the most expensive part of finding tags, so performance
350 # This is the most expensive part of finding tags, so performance
351 # depends primarily on the size of newheads. Worst case: no cache
351 # depends primarily on the size of newheads. Worst case: no cache
352 # file, so newheads == repoheads.
352 # file, so newheads == repoheads.
353 fnodescache = hgtagsfnodescache(repo.unfiltered())
353 fnodescache = hgtagsfnodescache(repo.unfiltered())
354 cachefnode = {}
354 cachefnode = {}
355 for head in reversed(repoheads):
355 for head in reversed(repoheads):
356 fnode = fnodescache.getfnode(head)
356 fnode = fnodescache.getfnode(head)
357 if fnode != nullid:
357 if fnode != nullid:
358 cachefnode[head] = fnode
358 cachefnode[head] = fnode
359
359
360 fnodescache.write()
360 fnodescache.write()
361
361
362 duration = time.time() - starttime
362 duration = time.time() - starttime
363 ui.log('tagscache',
363 ui.log('tagscache',
364 '%d/%d cache hits/lookups in %0.4f '
364 '%d/%d cache hits/lookups in %0.4f '
365 'seconds\n',
365 'seconds\n',
366 fnodescache.hitcount, fnodescache.lookupcount, duration)
366 fnodescache.hitcount, fnodescache.lookupcount, duration)
367
367
368 # Caller has to iterate over all heads, but can use the filenodes in
368 # Caller has to iterate over all heads, but can use the filenodes in
369 # cachefnode to get to each .hgtags revision quickly.
369 # cachefnode to get to each .hgtags revision quickly.
370 return (repoheads, cachefnode, valid, None, True)
370 return (repoheads, cachefnode, valid, None, True)
371
371
372 def _writetagcache(ui, repo, valid, cachetags):
372 def _writetagcache(ui, repo, valid, cachetags):
373 filename = _filename(repo)
373 filename = _filename(repo)
374 try:
374 try:
375 cachefile = repo.vfs(filename, 'w', atomictemp=True)
375 cachefile = repo.vfs(filename, 'w', atomictemp=True)
376 except (OSError, IOError):
376 except (OSError, IOError):
377 return
377 return
378
378
379 ui.log('tagscache', 'writing .hg/%s with %d tags\n',
379 ui.log('tagscache', 'writing .hg/%s with %d tags\n',
380 filename, len(cachetags))
380 filename, len(cachetags))
381
381
382 if valid[2]:
382 if valid[2]:
383 cachefile.write('%d %s %s\n' % (valid[0], hex(valid[1]), hex(valid[2])))
383 cachefile.write('%d %s %s\n' % (valid[0], hex(valid[1]), hex(valid[2])))
384 else:
384 else:
385 cachefile.write('%d %s\n' % (valid[0], hex(valid[1])))
385 cachefile.write('%d %s\n' % (valid[0], hex(valid[1])))
386
386
387 # Tag names in the cache are in UTF-8 -- which is the whole reason
387 # Tag names in the cache are in UTF-8 -- which is the whole reason
388 # we keep them in UTF-8 throughout this module. If we converted
388 # we keep them in UTF-8 throughout this module. If we converted
389 # them local encoding on input, we would lose info writing them to
389 # them local encoding on input, we would lose info writing them to
390 # the cache.
390 # the cache.
391 for (name, (node, hist)) in sorted(cachetags.iteritems()):
391 for (name, (node, hist)) in sorted(cachetags.iteritems()):
392 for n in hist:
392 for n in hist:
393 cachefile.write("%s %s\n" % (hex(n), name))
393 cachefile.write("%s %s\n" % (hex(n), name))
394 cachefile.write("%s %s\n" % (hex(node), name))
394 cachefile.write("%s %s\n" % (hex(node), name))
395
395
396 try:
396 try:
397 cachefile.close()
397 cachefile.close()
398 except (OSError, IOError):
398 except (OSError, IOError):
399 pass
399 pass
400
400
401 _fnodescachefile = 'cache/hgtagsfnodes1'
401 _fnodescachefile = 'cache/hgtagsfnodes1'
402 _fnodesrecsize = 4 + 20 # changeset fragment + filenode
402 _fnodesrecsize = 4 + 20 # changeset fragment + filenode
403 _fnodesmissingrec = '\xff' * 24
403 _fnodesmissingrec = '\xff' * 24
404
404
405 class hgtagsfnodescache(object):
405 class hgtagsfnodescache(object):
406 """Persistent cache mapping revisions to .hgtags filenodes.
406 """Persistent cache mapping revisions to .hgtags filenodes.
407
407
408 The cache is an array of records. Each item in the array corresponds to
408 The cache is an array of records. Each item in the array corresponds to
409 a changelog revision. Values in the array contain the first 4 bytes of
409 a changelog revision. Values in the array contain the first 4 bytes of
410 the node hash and the 20 bytes .hgtags filenode for that revision.
410 the node hash and the 20 bytes .hgtags filenode for that revision.
411
411
412 The first 4 bytes are present as a form of verification. Repository
412 The first 4 bytes are present as a form of verification. Repository
413 stripping and rewriting may change the node at a numeric revision in the
413 stripping and rewriting may change the node at a numeric revision in the
414 changelog. The changeset fragment serves as a verifier to detect
414 changelog. The changeset fragment serves as a verifier to detect
415 rewriting. This logic is shared with the rev branch cache (see
415 rewriting. This logic is shared with the rev branch cache (see
416 branchmap.py).
416 branchmap.py).
417
417
418 The instance holds in memory the full cache content but entries are
418 The instance holds in memory the full cache content but entries are
419 only parsed on read.
419 only parsed on read.
420
420
421 Instances behave like lists. ``c[i]`` works where i is a rev or
421 Instances behave like lists. ``c[i]`` works where i is a rev or
422 changeset node. Missing indexes are populated automatically on access.
422 changeset node. Missing indexes are populated automatically on access.
423 """
423 """
424 def __init__(self, repo):
424 def __init__(self, repo):
425 assert repo.filtername is None
425 assert repo.filtername is None
426
426
427 self._repo = repo
427 self._repo = repo
428
428
429 # Only for reporting purposes.
429 # Only for reporting purposes.
430 self.lookupcount = 0
430 self.lookupcount = 0
431 self.hitcount = 0
431 self.hitcount = 0
432
432
433 self._raw = array('c')
433 self._raw = array('c')
434
434
435 data = repo.vfs.tryread(_fnodescachefile)
435 try:
436 data = repo.vfs.read(_fnodescachefile)
437 except (OSError, IOError):
438 data = ""
436 self._raw.fromstring(data)
439 self._raw.fromstring(data)
437
440
438 # The end state of self._raw is an array that is of the exact length
441 # The end state of self._raw is an array that is of the exact length
439 # required to hold a record for every revision in the repository.
442 # required to hold a record for every revision in the repository.
440 # We truncate or extend the array as necessary. self._dirtyoffset is
443 # We truncate or extend the array as necessary. self._dirtyoffset is
441 # defined to be the start offset at which we need to write the output
444 # defined to be the start offset at which we need to write the output
442 # file. This offset is also adjusted when new entries are calculated
445 # file. This offset is also adjusted when new entries are calculated
443 # for array members.
446 # for array members.
444 cllen = len(repo.changelog)
447 cllen = len(repo.changelog)
445 wantedlen = cllen * _fnodesrecsize
448 wantedlen = cllen * _fnodesrecsize
446 rawlen = len(self._raw)
449 rawlen = len(self._raw)
447
450
448 self._dirtyoffset = None
451 self._dirtyoffset = None
449
452
450 if rawlen < wantedlen:
453 if rawlen < wantedlen:
451 self._dirtyoffset = rawlen
454 self._dirtyoffset = rawlen
452 self._raw.extend('\xff' * (wantedlen - rawlen))
455 self._raw.extend('\xff' * (wantedlen - rawlen))
453 elif rawlen > wantedlen:
456 elif rawlen > wantedlen:
454 # There's no easy way to truncate array instances. This seems
457 # There's no easy way to truncate array instances. This seems
455 # slightly less evil than copying a potentially large array slice.
458 # slightly less evil than copying a potentially large array slice.
456 for i in range(rawlen - wantedlen):
459 for i in range(rawlen - wantedlen):
457 self._raw.pop()
460 self._raw.pop()
458 self._dirtyoffset = len(self._raw)
461 self._dirtyoffset = len(self._raw)
459
462
460 def getfnode(self, node, computemissing=True):
463 def getfnode(self, node, computemissing=True):
461 """Obtain the filenode of the .hgtags file at a specified revision.
464 """Obtain the filenode of the .hgtags file at a specified revision.
462
465
463 If the value is in the cache, the entry will be validated and returned.
466 If the value is in the cache, the entry will be validated and returned.
464 Otherwise, the filenode will be computed and returned unless
467 Otherwise, the filenode will be computed and returned unless
465 "computemissing" is False, in which case None will be returned without
468 "computemissing" is False, in which case None will be returned without
466 any potentially expensive computation being performed.
469 any potentially expensive computation being performed.
467
470
468 If an .hgtags does not exist at the specified revision, nullid is
471 If an .hgtags does not exist at the specified revision, nullid is
469 returned.
472 returned.
470 """
473 """
471 ctx = self._repo[node]
474 ctx = self._repo[node]
472 rev = ctx.rev()
475 rev = ctx.rev()
473
476
474 self.lookupcount += 1
477 self.lookupcount += 1
475
478
476 offset = rev * _fnodesrecsize
479 offset = rev * _fnodesrecsize
477 record = self._raw[offset:offset + _fnodesrecsize].tostring()
480 record = self._raw[offset:offset + _fnodesrecsize].tostring()
478 properprefix = node[0:4]
481 properprefix = node[0:4]
479
482
480 # Validate and return existing entry.
483 # Validate and return existing entry.
481 if record != _fnodesmissingrec:
484 if record != _fnodesmissingrec:
482 fileprefix = record[0:4]
485 fileprefix = record[0:4]
483
486
484 if fileprefix == properprefix:
487 if fileprefix == properprefix:
485 self.hitcount += 1
488 self.hitcount += 1
486 return record[4:]
489 return record[4:]
487
490
488 # Fall through.
491 # Fall through.
489
492
490 # If we get here, the entry is either missing or invalid.
493 # If we get here, the entry is either missing or invalid.
491
494
492 if not computemissing:
495 if not computemissing:
493 return None
496 return None
494
497
495 # Populate missing entry.
498 # Populate missing entry.
496 try:
499 try:
497 fnode = ctx.filenode('.hgtags')
500 fnode = ctx.filenode('.hgtags')
498 except error.LookupError:
501 except error.LookupError:
499 # No .hgtags file on this revision.
502 # No .hgtags file on this revision.
500 fnode = nullid
503 fnode = nullid
501
504
502 self._writeentry(offset, properprefix, fnode)
505 self._writeentry(offset, properprefix, fnode)
503 return fnode
506 return fnode
504
507
505 def setfnode(self, node, fnode):
508 def setfnode(self, node, fnode):
506 """Set the .hgtags filenode for a given changeset."""
509 """Set the .hgtags filenode for a given changeset."""
507 assert len(fnode) == 20
510 assert len(fnode) == 20
508 ctx = self._repo[node]
511 ctx = self._repo[node]
509
512
510 # Do a lookup first to avoid writing if nothing has changed.
513 # Do a lookup first to avoid writing if nothing has changed.
511 if self.getfnode(ctx.node(), computemissing=False) == fnode:
514 if self.getfnode(ctx.node(), computemissing=False) == fnode:
512 return
515 return
513
516
514 self._writeentry(ctx.rev() * _fnodesrecsize, node[0:4], fnode)
517 self._writeentry(ctx.rev() * _fnodesrecsize, node[0:4], fnode)
515
518
516 def _writeentry(self, offset, prefix, fnode):
519 def _writeentry(self, offset, prefix, fnode):
517 # Slices on array instances only accept other array.
520 # Slices on array instances only accept other array.
518 entry = array('c', prefix + fnode)
521 entry = array('c', prefix + fnode)
519 self._raw[offset:offset + _fnodesrecsize] = entry
522 self._raw[offset:offset + _fnodesrecsize] = entry
520 # self._dirtyoffset could be None.
523 # self._dirtyoffset could be None.
521 self._dirtyoffset = min(self._dirtyoffset, offset) or 0
524 self._dirtyoffset = min(self._dirtyoffset, offset) or 0
522
525
523 def write(self):
526 def write(self):
524 """Perform all necessary writes to cache file.
527 """Perform all necessary writes to cache file.
525
528
526 This may no-op if no writes are needed or if a write lock could
529 This may no-op if no writes are needed or if a write lock could
527 not be obtained.
530 not be obtained.
528 """
531 """
529 if self._dirtyoffset is None:
532 if self._dirtyoffset is None:
530 return
533 return
531
534
532 data = self._raw[self._dirtyoffset:]
535 data = self._raw[self._dirtyoffset:]
533 if not data:
536 if not data:
534 return
537 return
535
538
536 repo = self._repo
539 repo = self._repo
537
540
538 try:
541 try:
539 lock = repo.wlock(wait=False)
542 lock = repo.wlock(wait=False)
540 except error.LockError:
543 except error.LockError:
541 repo.ui.log('tagscache',
544 repo.ui.log('tagscache',
542 'not writing .hg/%s because lock cannot be acquired\n' %
545 'not writing .hg/%s because lock cannot be acquired\n' %
543 (_fnodescachefile))
546 (_fnodescachefile))
544 return
547 return
545
548
546 try:
549 try:
547 f = repo.vfs.open(_fnodescachefile, 'ab')
550 f = repo.vfs.open(_fnodescachefile, 'ab')
548 try:
551 try:
549 # if the file has been truncated
552 # if the file has been truncated
550 actualoffset = f.tell()
553 actualoffset = f.tell()
551 if actualoffset < self._dirtyoffset:
554 if actualoffset < self._dirtyoffset:
552 self._dirtyoffset = actualoffset
555 self._dirtyoffset = actualoffset
553 data = self._raw[self._dirtyoffset:]
556 data = self._raw[self._dirtyoffset:]
554 f.seek(self._dirtyoffset)
557 f.seek(self._dirtyoffset)
555 f.truncate()
558 f.truncate()
556 repo.ui.log('tagscache',
559 repo.ui.log('tagscache',
557 'writing %d bytes to %s\n' % (
560 'writing %d bytes to %s\n' % (
558 len(data), _fnodescachefile))
561 len(data), _fnodescachefile))
559 f.write(data)
562 f.write(data)
560 self._dirtyoffset = None
563 self._dirtyoffset = None
561 finally:
564 finally:
562 f.close()
565 f.close()
563 except (IOError, OSError) as inst:
566 except (IOError, OSError) as inst:
564 repo.ui.log('tagscache',
567 repo.ui.log('tagscache',
565 "couldn't write %s: %s\n" % (
568 "couldn't write %s: %s\n" % (
566 _fnodescachefile, inst))
569 _fnodescachefile, inst))
567 finally:
570 finally:
568 lock.release()
571 lock.release()
General Comments 0
You need to be logged in to leave comments. Login now