##// END OF EJS Templates
fastannotate: remove support for flock() locking...
Augie Fackler -
r43217:0152a907 default
parent child Browse files
Show More
@@ -1,193 +1,170
1 1 # Copyright 2016-present Facebook. All Rights Reserved.
2 2 #
3 3 # fastannotate: faster annotate implementation using linelog
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7 """yet another annotate implementation that might be faster (EXPERIMENTAL)
8 8
9 9 The fastannotate extension provides a 'fastannotate' command that makes
10 10 use of the linelog data structure as a cache layer and is expected to
11 11 be faster than the vanilla 'annotate' if the cache is present.
12 12
13 13 In most cases, fastannotate requires a setup that mainbranch is some pointer
14 14 that always moves forward, to be most efficient.
15 15
16 16 Using fastannotate together with linkrevcache would speed up building the
17 17 annotate cache greatly. Run "debugbuildlinkrevcache" before
18 18 "debugbuildannotatecache".
19 19
20 20 ::
21 21
22 22 [fastannotate]
23 23 # specify the main branch head. the internal linelog will only contain
24 24 # the linear (ignoring p2) "mainbranch". since linelog cannot move
25 25 # backwards without a rebuild, this should be something that always moves
26 26 # forward, usually it is "master" or "@".
27 27 mainbranch = master
28 28
29 29 # fastannotate supports different modes to expose its feature.
30 30 # a list of combination:
31 31 # - fastannotate: expose the feature via the "fastannotate" command which
32 32 # deals with everything in a most efficient way, and provides extra
33 33 # features like --deleted etc.
34 34 # - fctx: replace fctx.annotate implementation. note:
35 35 # a. it is less efficient than the "fastannotate" command
36 36 # b. it will make it practically impossible to access the old (disk
37 37 # side-effect free) annotate implementation
38 38 # c. it implies "hgweb".
39 39 # - hgweb: replace hgweb's annotate implementation. conflict with "fctx".
40 40 # (default: fastannotate)
41 41 modes = fastannotate
42 42
43 43 # default format when no format flags are used (default: number)
44 44 defaultformat = changeset, user, date
45 45
46 46 # serve the annotate cache via wire protocol (default: False)
47 47 # tip: the .hg/fastannotate directory is portable - can be rsynced
48 48 server = True
49 49
50 50 # build annotate cache on demand for every client request (default: True)
51 51 # disabling it could make server response faster, useful when there is a
52 52 # cronjob building the cache.
53 53 serverbuildondemand = True
54 54
55 55 # update local annotate cache from remote on demand
56 56 client = False
57 57
58 58 # path to use when connecting to the remote server (default: default)
59 59 remotepath = default
60 60
61 61 # minimal length of the history of a file required to fetch linelog from
62 62 # the server. (default: 10)
63 63 clientfetchthreshold = 10
64 64
65 # use flock instead of the file existence lock
66 # flock may not work well on some network filesystems, but they avoid
67 # creating and deleting files frequently, which is faster when updating
68 # the annotate cache in batch. if you have issues with this option, set it
69 # to False. (default: True if flock is supported, False otherwise)
70 useflock = True
71
72 65 # for "fctx" mode, always follow renames regardless of command line option.
73 66 # this is a BC with the original command but will reduced the space needed
74 67 # for annotate cache, and is useful for client-server setup since the
75 68 # server will only provide annotate cache with default options (i.e. with
76 69 # follow). do not affect "fastannotate" mode. (default: True)
77 70 forcefollow = True
78 71
79 72 # for "fctx" mode, always treat file as text files, to skip the "isbinary"
80 73 # check. this is consistent with the "fastannotate" command and could help
81 74 # to avoid a file fetch if remotefilelog is used. (default: True)
82 75 forcetext = True
83 76
84 77 # use unfiltered repo for better performance.
85 78 unfilteredrepo = True
86 79
87 80 # sacrifice correctness in some corner cases for performance. it does not
88 81 # affect the correctness of the annotate cache being built. the option
89 82 # is experimental and may disappear in the future (default: False)
90 83 perfhack = True
91 84 """
92 85
93 86 # TODO from import:
94 87 # * `branch` is probably the wrong term, throughout the code.
95 88 #
96 89 # * replace the fastannotate `modes` configuration with a collection
97 90 # of booleans.
98 91 #
99 92 # * Use the templater instead of bespoke formatting
100 93 #
101 94 # * rename the config knob for updating the local cache from a remote server
102 95 #
103 # * move `flock` based locking to a common area
104 #
105 96 # * revise wireprotocol for sharing annotate files
106 97 #
107 98 # * figure out a sensible default for `mainbranch` (with the caveat
108 99 # that we probably also want to figure out a better term than
109 100 # `branch`, see above)
110 101 #
111 102 # * format changes to the revmap file (maybe use length-encoding
112 103 # instead of null-terminated file paths at least?)
113 104 from __future__ import absolute_import
114 105
115 106 from mercurial.i18n import _
116 107 from mercurial import (
117 configitems,
118 108 error as hgerror,
119 109 localrepo,
120 110 registrar,
121 111 )
122 112
123 113 from . import (
124 114 commands,
125 context,
126 115 protocol,
127 116 )
128 117
129 118 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
130 119 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
131 120 # be specifying the version(s) of Mercurial they are tested with, or
132 121 # leave the attribute unspecified.
133 122 testedwith = 'ships-with-hg-core'
134 123
135 124 cmdtable = commands.cmdtable
136 125
137 126 configtable = {}
138 127 configitem = registrar.configitem(configtable)
139 128
140 129 configitem('fastannotate', 'modes', default=['fastannotate'])
141 130 configitem('fastannotate', 'server', default=False)
142 configitem('fastannotate', 'useflock', default=configitems.dynamicdefault)
143 131 configitem('fastannotate', 'client', default=False)
144 132 configitem('fastannotate', 'unfilteredrepo', default=True)
145 133 configitem('fastannotate', 'defaultformat', default=['number'])
146 134 configitem('fastannotate', 'perfhack', default=False)
147 135 configitem('fastannotate', 'mainbranch')
148 136 configitem('fastannotate', 'forcetext', default=True)
149 137 configitem('fastannotate', 'forcefollow', default=True)
150 138 configitem('fastannotate', 'clientfetchthreshold', default=10)
151 139 configitem('fastannotate', 'serverbuildondemand', default=True)
152 140 configitem('fastannotate', 'remotepath', default='default')
153 141
154 def _flockavailable():
155 try:
156 import fcntl
157 fcntl.flock
158 except (AttributeError, ImportError):
159 return False
160 else:
161 return True
162 142
163 143 def uisetup(ui):
164 144 modes = set(ui.configlist('fastannotate', 'modes'))
165 145 if 'fctx' in modes:
166 146 modes.discard('hgweb')
167 147 for name in modes:
168 148 if name == 'fastannotate':
169 149 commands.registercommand()
170 150 elif name == 'hgweb':
171 151 from . import support
172 152 support.replacehgwebannotate()
173 153 elif name == 'fctx':
174 154 from . import support
175 155 support.replacefctxannotate()
176 156 commands.wrapdefault()
177 157 else:
178 158 raise hgerror.Abort(_('fastannotate: invalid mode: %s') % name)
179 159
180 160 if ui.configbool('fastannotate', 'server'):
181 161 protocol.serveruisetup(ui)
182 162
183 if ui.configbool('fastannotate', 'useflock', _flockavailable()):
184 context.pathhelper.lock = context.pathhelper._lockflock
185
186 163 def extsetup(ui):
187 164 # fastannotate has its own locking, without depending on repo lock
188 165 # TODO: avoid mutating this unless the specific repo has it enabled
189 166 localrepo.localrepository._wlockfreeprefix.add('fastannotate/')
190 167
191 168 def reposetup(ui, repo):
192 169 if ui.configbool('fastannotate', 'client'):
193 170 protocol.clientreposetup(ui, repo)
@@ -1,826 +1,811
1 1 # Copyright 2016-present Facebook. All Rights Reserved.
2 2 #
3 3 # context: context needed to annotate a file
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import contextlib
12 12 import hashlib
13 13 import os
14 14
15 15 from mercurial.i18n import _
16 16 from mercurial import (
17 17 error,
18 18 linelog as linelogmod,
19 19 lock as lockmod,
20 20 mdiff,
21 21 node,
22 22 pycompat,
23 23 scmutil,
24 24 util,
25 25 )
26 26 from mercurial.utils import (
27 27 stringutil,
28 28 )
29 29
30 30 from . import (
31 31 error as faerror,
32 32 revmap as revmapmod,
33 33 )
34 34
35 35 # given path, get filelog, cached
36 36 @util.lrucachefunc
37 37 def _getflog(repo, path):
38 38 return repo.file(path)
39 39
40 40 # extracted from mercurial.context.basefilectx.annotate
41 41 def _parents(f, follow=True):
42 42 # Cut _descendantrev here to mitigate the penalty of lazy linkrev
43 43 # adjustment. Otherwise, p._adjustlinkrev() would walk changelog
44 44 # from the topmost introrev (= srcrev) down to p.linkrev() if it
45 45 # isn't an ancestor of the srcrev.
46 46 f._changeid
47 47 pl = f.parents()
48 48
49 49 # Don't return renamed parents if we aren't following.
50 50 if not follow:
51 51 pl = [p for p in pl if p.path() == f.path()]
52 52
53 53 # renamed filectx won't have a filelog yet, so set it
54 54 # from the cache to save time
55 55 for p in pl:
56 56 if not '_filelog' in p.__dict__:
57 57 p._filelog = _getflog(f._repo, p.path())
58 58
59 59 return pl
60 60
61 61 # extracted from mercurial.context.basefilectx.annotate. slightly modified
62 62 # so it takes a fctx instead of a pair of text and fctx.
63 63 def _decorate(fctx):
64 64 text = fctx.data()
65 65 linecount = text.count('\n')
66 66 if text and not text.endswith('\n'):
67 67 linecount += 1
68 68 return ([(fctx, i) for i in pycompat.xrange(linecount)], text)
69 69
70 70 # extracted from mercurial.context.basefilectx.annotate. slightly modified
71 71 # so it takes an extra "blocks" parameter calculated elsewhere, instead of
72 72 # calculating diff here.
73 73 def _pair(parent, child, blocks):
74 74 for (a1, a2, b1, b2), t in blocks:
75 75 # Changed blocks ('!') or blocks made only of blank lines ('~')
76 76 # belong to the child.
77 77 if t == '=':
78 78 child[0][b1:b2] = parent[0][a1:a2]
79 79 return child
80 80
81 81 # like scmutil.revsingle, but with lru cache, so their states (like manifests)
82 82 # could be reused
83 83 _revsingle = util.lrucachefunc(scmutil.revsingle)
84 84
85 85 def resolvefctx(repo, rev, path, resolverev=False, adjustctx=None):
86 86 """(repo, str, str) -> fctx
87 87
88 88 get the filectx object from repo, rev, path, in an efficient way.
89 89
90 90 if resolverev is True, "rev" is a revision specified by the revset
91 91 language, otherwise "rev" is a nodeid, or a revision number that can
92 92 be consumed by repo.__getitem__.
93 93
94 94 if adjustctx is not None, the returned fctx will point to a changeset
95 95 that introduces the change (last modified the file). if adjustctx
96 96 is 'linkrev', trust the linkrev and do not adjust it. this is noticeably
97 97 faster for big repos but is incorrect for some cases.
98 98 """
99 99 if resolverev and not isinstance(rev, int) and rev is not None:
100 100 ctx = _revsingle(repo, rev)
101 101 else:
102 102 ctx = repo[rev]
103 103
104 104 # If we don't need to adjust the linkrev, create the filectx using the
105 105 # changectx instead of using ctx[path]. This means it already has the
106 106 # changectx information, so blame -u will be able to look directly at the
107 107 # commitctx object instead of having to resolve it by going through the
108 108 # manifest. In a lazy-manifest world this can prevent us from downloading a
109 109 # lot of data.
110 110 if adjustctx is None:
111 111 # ctx.rev() is None means it's the working copy, which is a special
112 112 # case.
113 113 if ctx.rev() is None:
114 114 fctx = ctx[path]
115 115 else:
116 116 fctx = repo.filectx(path, changeid=ctx.rev())
117 117 else:
118 118 fctx = ctx[path]
119 119 if adjustctx == 'linkrev':
120 120 introrev = fctx.linkrev()
121 121 else:
122 122 introrev = fctx.introrev()
123 123 if introrev != ctx.rev():
124 124 fctx._changeid = introrev
125 125 fctx._changectx = repo[introrev]
126 126 return fctx
127 127
128 128 # like mercurial.store.encodedir, but use linelog suffixes: .m, .l, .lock
129 129 def encodedir(path):
130 130 return (path
131 131 .replace('.hg/', '.hg.hg/')
132 132 .replace('.l/', '.l.hg/')
133 133 .replace('.m/', '.m.hg/')
134 134 .replace('.lock/', '.lock.hg/'))
135 135
136 136 def hashdiffopts(diffopts):
137 137 diffoptstr = stringutil.pprint(sorted(
138 138 (k, getattr(diffopts, k))
139 139 for k in mdiff.diffopts.defaults
140 140 ))
141 141 return node.hex(hashlib.sha1(diffoptstr).digest())[:6]
142 142
143 143 _defaultdiffopthash = hashdiffopts(mdiff.defaultopts)
144 144
145 145 class annotateopts(object):
146 146 """like mercurial.mdiff.diffopts, but is for annotate
147 147
148 148 followrename: follow renames, like "hg annotate -f"
149 149 followmerge: follow p2 of a merge changeset, otherwise p2 is ignored
150 150 """
151 151
152 152 defaults = {
153 153 'diffopts': None,
154 154 'followrename': True,
155 155 'followmerge': True,
156 156 }
157 157
158 158 def __init__(self, **opts):
159 159 opts = pycompat.byteskwargs(opts)
160 160 for k, v in self.defaults.iteritems():
161 161 setattr(self, k, opts.get(k, v))
162 162
163 163 @util.propertycache
164 164 def shortstr(self):
165 165 """represent opts in a short string, suitable for a directory name"""
166 166 result = ''
167 167 if not self.followrename:
168 168 result += 'r0'
169 169 if not self.followmerge:
170 170 result += 'm0'
171 171 if self.diffopts is not None:
172 172 assert isinstance(self.diffopts, mdiff.diffopts)
173 173 diffopthash = hashdiffopts(self.diffopts)
174 174 if diffopthash != _defaultdiffopthash:
175 175 result += 'i' + diffopthash
176 176 return result or 'default'
177 177
178 178 defaultopts = annotateopts()
179 179
180 180 class _annotatecontext(object):
181 181 """do not use this class directly as it does not use lock to protect
182 182 writes. use "with annotatecontext(...)" instead.
183 183 """
184 184
185 185 def __init__(self, repo, path, linelogpath, revmappath, opts):
186 186 self.repo = repo
187 187 self.ui = repo.ui
188 188 self.path = path
189 189 self.opts = opts
190 190 self.linelogpath = linelogpath
191 191 self.revmappath = revmappath
192 192 self._linelog = None
193 193 self._revmap = None
194 194 self._node2path = {} # {str: str}
195 195
196 196 @property
197 197 def linelog(self):
198 198 if self._linelog is None:
199 199 if os.path.exists(self.linelogpath):
200 200 with open(self.linelogpath, 'rb') as f:
201 201 try:
202 202 self._linelog = linelogmod.linelog.fromdata(f.read())
203 203 except linelogmod.LineLogError:
204 204 self._linelog = linelogmod.linelog()
205 205 else:
206 206 self._linelog = linelogmod.linelog()
207 207 return self._linelog
208 208
209 209 @property
210 210 def revmap(self):
211 211 if self._revmap is None:
212 212 self._revmap = revmapmod.revmap(self.revmappath)
213 213 return self._revmap
214 214
215 215 def close(self):
216 216 if self._revmap is not None:
217 217 self._revmap.flush()
218 218 self._revmap = None
219 219 if self._linelog is not None:
220 220 with open(self.linelogpath, 'wb') as f:
221 221 f.write(self._linelog.encode())
222 222 self._linelog = None
223 223
224 224 __del__ = close
225 225
226 226 def rebuild(self):
227 227 """delete linelog and revmap, useful for rebuilding"""
228 228 self.close()
229 229 self._node2path.clear()
230 230 _unlinkpaths([self.revmappath, self.linelogpath])
231 231
232 232 @property
233 233 def lastnode(self):
234 234 """return last node in revmap, or None if revmap is empty"""
235 235 if self._revmap is None:
236 236 # fast path, read revmap without loading its full content
237 237 return revmapmod.getlastnode(self.revmappath)
238 238 else:
239 239 return self._revmap.rev2hsh(self._revmap.maxrev)
240 240
241 241 def isuptodate(self, master, strict=True):
242 242 """return True if the revmap / linelog is up-to-date, or the file
243 243 does not exist in the master revision. False otherwise.
244 244
245 245 it tries to be fast and could return false negatives, because of the
246 246 use of linkrev instead of introrev.
247 247
248 248 useful for both server and client to decide whether to update
249 249 fastannotate cache or not.
250 250
251 251 if strict is True, even if fctx exists in the revmap, but is not the
252 252 last node, isuptodate will return False. it's good for performance - no
253 253 expensive check was done.
254 254
255 255 if strict is False, if fctx exists in the revmap, this function may
256 256 return True. this is useful for the client to skip downloading the
257 257 cache if the client's master is behind the server's.
258 258 """
259 259 lastnode = self.lastnode
260 260 try:
261 261 f = self._resolvefctx(master, resolverev=True)
262 262 # choose linkrev instead of introrev as the check is meant to be
263 263 # *fast*.
264 264 linknode = self.repo.changelog.node(f.linkrev())
265 265 if not strict and lastnode and linknode != lastnode:
266 266 # check if f.node() is in the revmap. note: this loads the
267 267 # revmap and can be slow.
268 268 return self.revmap.hsh2rev(linknode) is not None
269 269 # avoid resolving old manifest, or slow adjustlinkrev to be fast,
270 270 # false negatives are acceptable in this case.
271 271 return linknode == lastnode
272 272 except LookupError:
273 273 # master does not have the file, or the revmap is ahead
274 274 return True
275 275
276 276 def annotate(self, rev, master=None, showpath=False, showlines=False):
277 277 """incrementally update the cache so it includes revisions in the main
278 278 branch till 'master'. and run annotate on 'rev', which may or may not be
279 279 included in the main branch.
280 280
281 281 if master is None, do not update linelog.
282 282
283 283 the first value returned is the annotate result, it is [(node, linenum)]
284 284 by default. [(node, linenum, path)] if showpath is True.
285 285
286 286 if showlines is True, a second value will be returned, it is a list of
287 287 corresponding line contents.
288 288 """
289 289
290 290 # the fast path test requires commit hash, convert rev number to hash,
291 291 # so it may hit the fast path. note: in the "fctx" mode, the "annotate"
292 292 # command could give us a revision number even if the user passes a
293 293 # commit hash.
294 294 if isinstance(rev, int):
295 295 rev = node.hex(self.repo.changelog.node(rev))
296 296
297 297 # fast path: if rev is in the main branch already
298 298 directly, revfctx = self.canannotatedirectly(rev)
299 299 if directly:
300 300 if self.ui.debugflag:
301 301 self.ui.debug('fastannotate: %s: using fast path '
302 302 '(resolved fctx: %s)\n'
303 303 % (self.path,
304 304 stringutil.pprint(util.safehasattr(revfctx,
305 305 'node'))))
306 306 return self.annotatedirectly(revfctx, showpath, showlines)
307 307
308 308 # resolve master
309 309 masterfctx = None
310 310 if master:
311 311 try:
312 312 masterfctx = self._resolvefctx(master, resolverev=True,
313 313 adjustctx=True)
314 314 except LookupError: # master does not have the file
315 315 pass
316 316 else:
317 317 if masterfctx in self.revmap: # no need to update linelog
318 318 masterfctx = None
319 319
320 320 # ... - @ <- rev (can be an arbitrary changeset,
321 321 # / not necessarily a descendant
322 322 # master -> o of master)
323 323 # |
324 324 # a merge -> o 'o': new changesets in the main branch
325 325 # |\ '#': revisions in the main branch that
326 326 # o * exist in linelog / revmap
327 327 # | . '*': changesets in side branches, or
328 328 # last master -> # . descendants of master
329 329 # | .
330 330 # # * joint: '#', and is a parent of a '*'
331 331 # |/
332 332 # a joint -> # ^^^^ --- side branches
333 333 # |
334 334 # ^ --- main branch (in linelog)
335 335
336 336 # these DFSes are similar to the traditional annotate algorithm.
337 337 # we cannot really reuse the code for perf reason.
338 338
339 339 # 1st DFS calculates merges, joint points, and needed.
340 340 # "needed" is a simple reference counting dict to free items in
341 341 # "hist", reducing its memory usage otherwise could be huge.
342 342 initvisit = [revfctx]
343 343 if masterfctx:
344 344 if masterfctx.rev() is None:
345 345 raise error.Abort(_('cannot update linelog to wdir()'),
346 346 hint=_('set fastannotate.mainbranch'))
347 347 initvisit.append(masterfctx)
348 348 visit = initvisit[:]
349 349 pcache = {}
350 350 needed = {revfctx: 1}
351 351 hist = {} # {fctx: ([(llrev or fctx, linenum)], text)}
352 352 while visit:
353 353 f = visit.pop()
354 354 if f in pcache or f in hist:
355 355 continue
356 356 if f in self.revmap: # in the old main branch, it's a joint
357 357 llrev = self.revmap.hsh2rev(f.node())
358 358 self.linelog.annotate(llrev)
359 359 result = self.linelog.annotateresult
360 360 hist[f] = (result, f.data())
361 361 continue
362 362 pl = self._parentfunc(f)
363 363 pcache[f] = pl
364 364 for p in pl:
365 365 needed[p] = needed.get(p, 0) + 1
366 366 if p not in pcache:
367 367 visit.append(p)
368 368
369 369 # 2nd (simple) DFS calculates new changesets in the main branch
370 370 # ('o' nodes in # the above graph), so we know when to update linelog.
371 371 newmainbranch = set()
372 372 f = masterfctx
373 373 while f and f not in self.revmap:
374 374 newmainbranch.add(f)
375 375 pl = pcache[f]
376 376 if pl:
377 377 f = pl[0]
378 378 else:
379 379 f = None
380 380 break
381 381
382 382 # f, if present, is the position where the last build stopped at, and
383 383 # should be the "master" last time. check to see if we can continue
384 384 # building the linelog incrementally. (we cannot if diverged)
385 385 if masterfctx is not None:
386 386 self._checklastmasterhead(f)
387 387
388 388 if self.ui.debugflag:
389 389 if newmainbranch:
390 390 self.ui.debug('fastannotate: %s: %d new changesets in the main'
391 391 ' branch\n' % (self.path, len(newmainbranch)))
392 392 elif not hist: # no joints, no updates
393 393 self.ui.debug('fastannotate: %s: linelog cannot help in '
394 394 'annotating this revision\n' % self.path)
395 395
396 396 # prepare annotateresult so we can update linelog incrementally
397 397 self.linelog.annotate(self.linelog.maxrev)
398 398
399 399 # 3rd DFS does the actual annotate
400 400 visit = initvisit[:]
401 401 progress = self.ui.makeprogress(('building cache'),
402 402 total=len(newmainbranch))
403 403 while visit:
404 404 f = visit[-1]
405 405 if f in hist:
406 406 visit.pop()
407 407 continue
408 408
409 409 ready = True
410 410 pl = pcache[f]
411 411 for p in pl:
412 412 if p not in hist:
413 413 ready = False
414 414 visit.append(p)
415 415 if not ready:
416 416 continue
417 417
418 418 visit.pop()
419 419 blocks = None # mdiff blocks, used for appending linelog
420 420 ismainbranch = (f in newmainbranch)
421 421 # curr is the same as the traditional annotate algorithm,
422 422 # if we only care about linear history (do not follow merge),
423 423 # then curr is not actually used.
424 424 assert f not in hist
425 425 curr = _decorate(f)
426 426 for i, p in enumerate(pl):
427 427 bs = list(self._diffblocks(hist[p][1], curr[1]))
428 428 if i == 0 and ismainbranch:
429 429 blocks = bs
430 430 curr = _pair(hist[p], curr, bs)
431 431 if needed[p] == 1:
432 432 del hist[p]
433 433 del needed[p]
434 434 else:
435 435 needed[p] -= 1
436 436
437 437 hist[f] = curr
438 438 del pcache[f]
439 439
440 440 if ismainbranch: # need to write to linelog
441 441 progress.increment()
442 442 bannotated = None
443 443 if len(pl) == 2 and self.opts.followmerge: # merge
444 444 bannotated = curr[0]
445 445 if blocks is None: # no parents, add an empty one
446 446 blocks = list(self._diffblocks('', curr[1]))
447 447 self._appendrev(f, blocks, bannotated)
448 448 elif showpath: # not append linelog, but we need to record path
449 449 self._node2path[f.node()] = f.path()
450 450
451 451 progress.complete()
452 452
453 453 result = [
454 454 ((self.revmap.rev2hsh(fr) if isinstance(fr, int) else fr.node()), l)
455 455 for fr, l in hist[revfctx][0]] # [(node, linenumber)]
456 456 return self._refineannotateresult(result, revfctx, showpath, showlines)
457 457
458 458 def canannotatedirectly(self, rev):
459 459 """(str) -> bool, fctx or node.
460 460 return (True, f) if we can annotate without updating the linelog, pass
461 461 f to annotatedirectly.
462 462 return (False, f) if we need extra calculation. f is the fctx resolved
463 463 from rev.
464 464 """
465 465 result = True
466 466 f = None
467 467 if not isinstance(rev, int) and rev is not None:
468 468 hsh = {20: bytes, 40: node.bin}.get(len(rev), lambda x: None)(rev)
469 469 if hsh is not None and (hsh, self.path) in self.revmap:
470 470 f = hsh
471 471 if f is None:
472 472 adjustctx = 'linkrev' if self._perfhack else True
473 473 f = self._resolvefctx(rev, adjustctx=adjustctx, resolverev=True)
474 474 result = f in self.revmap
475 475 if not result and self._perfhack:
476 476 # redo the resolution without perfhack - as we are going to
477 477 # do write operations, we need a correct fctx.
478 478 f = self._resolvefctx(rev, adjustctx=True, resolverev=True)
479 479 return result, f
480 480
481 481 def annotatealllines(self, rev, showpath=False, showlines=False):
482 482 """(rev : str) -> [(node : str, linenum : int, path : str)]
483 483
484 484 the result has the same format with annotate, but include all (including
485 485 deleted) lines up to rev. call this after calling annotate(rev, ...) for
486 486 better performance and accuracy.
487 487 """
488 488 revfctx = self._resolvefctx(rev, resolverev=True, adjustctx=True)
489 489
490 490 # find a chain from rev to anything in the mainbranch
491 491 if revfctx not in self.revmap:
492 492 chain = [revfctx]
493 493 a = ''
494 494 while True:
495 495 f = chain[-1]
496 496 pl = self._parentfunc(f)
497 497 if not pl:
498 498 break
499 499 if pl[0] in self.revmap:
500 500 a = pl[0].data()
501 501 break
502 502 chain.append(pl[0])
503 503
504 504 # both self.linelog and self.revmap is backed by filesystem. now
505 505 # we want to modify them but do not want to write changes back to
506 506 # files. so we create in-memory objects and copy them. it's like
507 507 # a "fork".
508 508 linelog = linelogmod.linelog()
509 509 linelog.copyfrom(self.linelog)
510 510 linelog.annotate(linelog.maxrev)
511 511 revmap = revmapmod.revmap()
512 512 revmap.copyfrom(self.revmap)
513 513
514 514 for f in reversed(chain):
515 515 b = f.data()
516 516 blocks = list(self._diffblocks(a, b))
517 517 self._doappendrev(linelog, revmap, f, blocks)
518 518 a = b
519 519 else:
520 520 # fastpath: use existing linelog, revmap as we don't write to them
521 521 linelog = self.linelog
522 522 revmap = self.revmap
523 523
524 524 lines = linelog.getalllines()
525 525 hsh = revfctx.node()
526 526 llrev = revmap.hsh2rev(hsh)
527 527 result = [(revmap.rev2hsh(r), l) for r, l in lines if r <= llrev]
528 528 # cannot use _refineannotateresult since we need custom logic for
529 529 # resolving line contents
530 530 if showpath:
531 531 result = self._addpathtoresult(result, revmap)
532 532 if showlines:
533 533 linecontents = self._resolvelines(result, revmap, linelog)
534 534 result = (result, linecontents)
535 535 return result
536 536
537 537 def _resolvelines(self, annotateresult, revmap, linelog):
538 538 """(annotateresult) -> [line]. designed for annotatealllines.
539 539 this is probably the most inefficient code in the whole fastannotate
540 540 directory. but we have made a decision that the linelog does not
541 541 store line contents. so getting them requires random accesses to
542 542 the revlog data, since they can be many, it can be very slow.
543 543 """
544 544 # [llrev]
545 545 revs = [revmap.hsh2rev(l[0]) for l in annotateresult]
546 546 result = [None] * len(annotateresult)
547 547 # {(rev, linenum): [lineindex]}
548 548 key2idxs = collections.defaultdict(list)
549 549 for i in pycompat.xrange(len(result)):
550 550 key2idxs[(revs[i], annotateresult[i][1])].append(i)
551 551 while key2idxs:
552 552 # find an unresolved line and its linelog rev to annotate
553 553 hsh = None
554 554 try:
555 555 for (rev, _linenum), idxs in key2idxs.iteritems():
556 556 if revmap.rev2flag(rev) & revmapmod.sidebranchflag:
557 557 continue
558 558 hsh = annotateresult[idxs[0]][0]
559 559 break
560 560 except StopIteration: # no more unresolved lines
561 561 return result
562 562 if hsh is None:
563 563 # the remaining key2idxs are not in main branch, resolving them
564 564 # using the hard way...
565 565 revlines = {}
566 566 for (rev, linenum), idxs in key2idxs.iteritems():
567 567 if rev not in revlines:
568 568 hsh = annotateresult[idxs[0]][0]
569 569 if self.ui.debugflag:
570 570 self.ui.debug('fastannotate: reading %s line #%d '
571 571 'to resolve lines %r\n'
572 572 % (node.short(hsh), linenum, idxs))
573 573 fctx = self._resolvefctx(hsh, revmap.rev2path(rev))
574 574 lines = mdiff.splitnewlines(fctx.data())
575 575 revlines[rev] = lines
576 576 for idx in idxs:
577 577 result[idx] = revlines[rev][linenum]
578 578 assert all(x is not None for x in result)
579 579 return result
580 580
581 581 # run the annotate and the lines should match to the file content
582 582 self.ui.debug('fastannotate: annotate %s to resolve lines\n'
583 583 % node.short(hsh))
584 584 linelog.annotate(rev)
585 585 fctx = self._resolvefctx(hsh, revmap.rev2path(rev))
586 586 annotated = linelog.annotateresult
587 587 lines = mdiff.splitnewlines(fctx.data())
588 588 if len(lines) != len(annotated):
589 589 raise faerror.CorruptedFileError('unexpected annotated lines')
590 590 # resolve lines from the annotate result
591 591 for i, line in enumerate(lines):
592 592 k = annotated[i]
593 593 if k in key2idxs:
594 594 for idx in key2idxs[k]:
595 595 result[idx] = line
596 596 del key2idxs[k]
597 597 return result
598 598
599 599 def annotatedirectly(self, f, showpath, showlines):
600 600 """like annotate, but when we know that f is in linelog.
601 601 f can be either a 20-char str (node) or a fctx. this is for perf - in
602 602 the best case, the user provides a node and we don't need to read the
603 603 filelog or construct any filecontext.
604 604 """
605 605 if isinstance(f, bytes):
606 606 hsh = f
607 607 else:
608 608 hsh = f.node()
609 609 llrev = self.revmap.hsh2rev(hsh)
610 610 if not llrev:
611 611 raise faerror.CorruptedFileError('%s is not in revmap'
612 612 % node.hex(hsh))
613 613 if (self.revmap.rev2flag(llrev) & revmapmod.sidebranchflag) != 0:
614 614 raise faerror.CorruptedFileError('%s is not in revmap mainbranch'
615 615 % node.hex(hsh))
616 616 self.linelog.annotate(llrev)
617 617 result = [(self.revmap.rev2hsh(r), l)
618 618 for r, l in self.linelog.annotateresult]
619 619 return self._refineannotateresult(result, f, showpath, showlines)
620 620
621 621 def _refineannotateresult(self, result, f, showpath, showlines):
622 622 """add the missing path or line contents, they can be expensive.
623 623 f could be either node or fctx.
624 624 """
625 625 if showpath:
626 626 result = self._addpathtoresult(result)
627 627 if showlines:
628 628 if isinstance(f, bytes): # f: node or fctx
629 629 llrev = self.revmap.hsh2rev(f)
630 630 fctx = self._resolvefctx(f, self.revmap.rev2path(llrev))
631 631 else:
632 632 fctx = f
633 633 lines = mdiff.splitnewlines(fctx.data())
634 634 if len(lines) != len(result): # linelog is probably corrupted
635 635 raise faerror.CorruptedFileError()
636 636 result = (result, lines)
637 637 return result
638 638
639 639 def _appendrev(self, fctx, blocks, bannotated=None):
640 640 self._doappendrev(self.linelog, self.revmap, fctx, blocks, bannotated)
641 641
642 642 def _diffblocks(self, a, b):
643 643 return mdiff.allblocks(a, b, self.opts.diffopts)
644 644
645 645 @staticmethod
646 646 def _doappendrev(linelog, revmap, fctx, blocks, bannotated=None):
647 647 """append a revision to linelog and revmap"""
648 648
649 649 def getllrev(f):
650 650 """(fctx) -> int"""
651 651 # f should not be a linelog revision
652 652 if isinstance(f, int):
653 653 raise error.ProgrammingError('f should not be an int')
654 654 # f is a fctx, allocate linelog rev on demand
655 655 hsh = f.node()
656 656 rev = revmap.hsh2rev(hsh)
657 657 if rev is None:
658 658 rev = revmap.append(hsh, sidebranch=True, path=f.path())
659 659 return rev
660 660
661 661 # append sidebranch revisions to revmap
662 662 siderevs = []
663 663 siderevmap = {} # node: int
664 664 if bannotated is not None:
665 665 for (a1, a2, b1, b2), op in blocks:
666 666 if op != '=':
667 667 # f could be either linelong rev, or fctx.
668 668 siderevs += [f for f, l in bannotated[b1:b2]
669 669 if not isinstance(f, int)]
670 670 siderevs = set(siderevs)
671 671 if fctx in siderevs: # mainnode must be appended seperately
672 672 siderevs.remove(fctx)
673 673 for f in siderevs:
674 674 siderevmap[f] = getllrev(f)
675 675
676 676 # the changeset in the main branch, could be a merge
677 677 llrev = revmap.append(fctx.node(), path=fctx.path())
678 678 siderevmap[fctx] = llrev
679 679
680 680 for (a1, a2, b1, b2), op in reversed(blocks):
681 681 if op == '=':
682 682 continue
683 683 if bannotated is None:
684 684 linelog.replacelines(llrev, a1, a2, b1, b2)
685 685 else:
686 686 blines = [((r if isinstance(r, int) else siderevmap[r]), l)
687 687 for r, l in bannotated[b1:b2]]
688 688 linelog.replacelines_vec(llrev, a1, a2, blines)
689 689
690 690 def _addpathtoresult(self, annotateresult, revmap=None):
691 691 """(revmap, [(node, linenum)]) -> [(node, linenum, path)]"""
692 692 if revmap is None:
693 693 revmap = self.revmap
694 694
695 695 def _getpath(nodeid):
696 696 path = self._node2path.get(nodeid)
697 697 if path is None:
698 698 path = revmap.rev2path(revmap.hsh2rev(nodeid))
699 699 self._node2path[nodeid] = path
700 700 return path
701 701
702 702 return [(n, l, _getpath(n)) for n, l in annotateresult]
703 703
704 704 def _checklastmasterhead(self, fctx):
705 705 """check if fctx is the master's head last time, raise if not"""
706 706 if fctx is None:
707 707 llrev = 0
708 708 else:
709 709 llrev = self.revmap.hsh2rev(fctx.node())
710 710 if not llrev:
711 711 raise faerror.CannotReuseError()
712 712 if self.linelog.maxrev != llrev:
713 713 raise faerror.CannotReuseError()
714 714
715 715 @util.propertycache
716 716 def _parentfunc(self):
717 717 """-> (fctx) -> [fctx]"""
718 718 followrename = self.opts.followrename
719 719 followmerge = self.opts.followmerge
720 720 def parents(f):
721 721 pl = _parents(f, follow=followrename)
722 722 if not followmerge:
723 723 pl = pl[:1]
724 724 return pl
725 725 return parents
726 726
727 727 @util.propertycache
728 728 def _perfhack(self):
729 729 return self.ui.configbool('fastannotate', 'perfhack')
730 730
731 731 def _resolvefctx(self, rev, path=None, **kwds):
732 732 return resolvefctx(self.repo, rev, (path or self.path), **kwds)
733 733
734 734 def _unlinkpaths(paths):
735 735 """silent, best-effort unlink"""
736 736 for path in paths:
737 737 try:
738 738 util.unlink(path)
739 739 except OSError:
740 740 pass
741 741
742 742 class pathhelper(object):
743 743 """helper for getting paths for lockfile, linelog and revmap"""
744 744
745 745 def __init__(self, repo, path, opts=defaultopts):
746 746 # different options use different directories
747 747 self._vfspath = os.path.join('fastannotate',
748 748 opts.shortstr, encodedir(path))
749 749 self._repo = repo
750 750
751 751 @property
752 752 def dirname(self):
753 753 return os.path.dirname(self._repo.vfs.join(self._vfspath))
754 754
755 755 @property
756 756 def linelogpath(self):
757 757 return self._repo.vfs.join(self._vfspath + '.l')
758 758
759 759 def lock(self):
760 760 return lockmod.lock(self._repo.vfs, self._vfspath + '.lock')
761 761
762 @contextlib.contextmanager
763 def _lockflock(self):
764 """the same as 'lock' but use flock instead of lockmod.lock, to avoid
765 creating temporary symlinks."""
766 import fcntl
767 lockpath = self.linelogpath
768 util.makedirs(os.path.dirname(lockpath))
769 lockfd = os.open(lockpath, os.O_RDONLY | os.O_CREAT, 0o664)
770 fcntl.flock(lockfd, fcntl.LOCK_EX)
771 try:
772 yield
773 finally:
774 fcntl.flock(lockfd, fcntl.LOCK_UN)
775 os.close(lockfd)
776
777 762 @property
778 763 def revmappath(self):
779 764 return self._repo.vfs.join(self._vfspath + '.m')
780 765
781 766 @contextlib.contextmanager
782 767 def annotatecontext(repo, path, opts=defaultopts, rebuild=False):
783 768 """context needed to perform (fast) annotate on a file
784 769
785 770 an annotatecontext of a single file consists of two structures: the
786 771 linelog and the revmap. this function takes care of locking. only 1
787 772 process is allowed to write that file's linelog and revmap at a time.
788 773
789 774 when something goes wrong, this function will assume the linelog and the
790 775 revmap are in a bad state, and remove them from disk.
791 776
792 777 use this function in the following way:
793 778
794 779 with annotatecontext(...) as actx:
795 780 actx. ....
796 781 """
797 782 helper = pathhelper(repo, path, opts)
798 783 util.makedirs(helper.dirname)
799 784 revmappath = helper.revmappath
800 785 linelogpath = helper.linelogpath
801 786 actx = None
802 787 try:
803 788 with helper.lock():
804 789 actx = _annotatecontext(repo, path, linelogpath, revmappath, opts)
805 790 if rebuild:
806 791 actx.rebuild()
807 792 yield actx
808 793 except Exception:
809 794 if actx is not None:
810 795 actx.rebuild()
811 796 repo.ui.debug('fastannotate: %s: cache broken and deleted\n' % path)
812 797 raise
813 798 finally:
814 799 if actx is not None:
815 800 actx.close()
816 801
817 802 def fctxannotatecontext(fctx, follow=True, diffopts=None, rebuild=False):
818 803 """like annotatecontext but get the context from a fctx. convenient when
819 804 used in fctx.annotate
820 805 """
821 806 repo = fctx._repo
822 807 path = fctx._path
823 808 if repo.ui.configbool('fastannotate', 'forcefollow', True):
824 809 follow = True
825 810 aopts = annotateopts(diffopts=diffopts, followrename=follow)
826 811 return annotatecontext(repo, path, aopts, rebuild)
General Comments 0
You need to be logged in to leave comments. Login now