##// END OF EJS Templates
diff-option: move attributes handling to sysstr...
marmoute -
r51807:e586a7eb default
parent child Browse files
Show More
@@ -1,858 +1,861 b''
1 # Copyright 2016-present Facebook. All Rights Reserved.
1 # Copyright 2016-present Facebook. All Rights Reserved.
2 #
2 #
3 # context: context needed to annotate a file
3 # context: context needed to annotate a file
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8
8
9 import collections
9 import collections
10 import contextlib
10 import contextlib
11 import os
11 import os
12
12
13 from mercurial.i18n import _
13 from mercurial.i18n import _
14 from mercurial.pycompat import (
14 from mercurial.pycompat import (
15 getattr,
15 getattr,
16 open,
16 open,
17 setattr,
17 setattr,
18 )
18 )
19 from mercurial.node import (
19 from mercurial.node import (
20 bin,
20 bin,
21 hex,
21 hex,
22 short,
22 short,
23 )
23 )
24 from mercurial import (
24 from mercurial import (
25 error,
25 error,
26 linelog as linelogmod,
26 linelog as linelogmod,
27 lock as lockmod,
27 lock as lockmod,
28 mdiff,
28 mdiff,
29 pycompat,
29 pycompat,
30 scmutil,
30 scmutil,
31 util,
31 util,
32 )
32 )
33 from mercurial.utils import (
33 from mercurial.utils import (
34 hashutil,
34 hashutil,
35 stringutil,
35 stringutil,
36 )
36 )
37
37
38 from . import (
38 from . import (
39 error as faerror,
39 error as faerror,
40 revmap as revmapmod,
40 revmap as revmapmod,
41 )
41 )
42
42
43 # given path, get filelog, cached
43 # given path, get filelog, cached
44 @util.lrucachefunc
44 @util.lrucachefunc
45 def _getflog(repo, path):
45 def _getflog(repo, path):
46 return repo.file(path)
46 return repo.file(path)
47
47
48
48
49 # extracted from mercurial.context.basefilectx.annotate
49 # extracted from mercurial.context.basefilectx.annotate
50 def _parents(f, follow=True):
50 def _parents(f, follow=True):
51 # Cut _descendantrev here to mitigate the penalty of lazy linkrev
51 # Cut _descendantrev here to mitigate the penalty of lazy linkrev
52 # adjustment. Otherwise, p._adjustlinkrev() would walk changelog
52 # adjustment. Otherwise, p._adjustlinkrev() would walk changelog
53 # from the topmost introrev (= srcrev) down to p.linkrev() if it
53 # from the topmost introrev (= srcrev) down to p.linkrev() if it
54 # isn't an ancestor of the srcrev.
54 # isn't an ancestor of the srcrev.
55 f._changeid
55 f._changeid
56 pl = f.parents()
56 pl = f.parents()
57
57
58 # Don't return renamed parents if we aren't following.
58 # Don't return renamed parents if we aren't following.
59 if not follow:
59 if not follow:
60 pl = [p for p in pl if p.path() == f.path()]
60 pl = [p for p in pl if p.path() == f.path()]
61
61
62 # renamed filectx won't have a filelog yet, so set it
62 # renamed filectx won't have a filelog yet, so set it
63 # from the cache to save time
63 # from the cache to save time
64 for p in pl:
64 for p in pl:
65 if not '_filelog' in p.__dict__:
65 if not '_filelog' in p.__dict__:
66 p._filelog = _getflog(f._repo, p.path())
66 p._filelog = _getflog(f._repo, p.path())
67
67
68 return pl
68 return pl
69
69
70
70
71 # extracted from mercurial.context.basefilectx.annotate. slightly modified
71 # extracted from mercurial.context.basefilectx.annotate. slightly modified
72 # so it takes a fctx instead of a pair of text and fctx.
72 # so it takes a fctx instead of a pair of text and fctx.
73 def _decorate(fctx):
73 def _decorate(fctx):
74 text = fctx.data()
74 text = fctx.data()
75 linecount = text.count(b'\n')
75 linecount = text.count(b'\n')
76 if text and not text.endswith(b'\n'):
76 if text and not text.endswith(b'\n'):
77 linecount += 1
77 linecount += 1
78 return ([(fctx, i) for i in range(linecount)], text)
78 return ([(fctx, i) for i in range(linecount)], text)
79
79
80
80
81 # extracted from mercurial.context.basefilectx.annotate. slightly modified
81 # extracted from mercurial.context.basefilectx.annotate. slightly modified
82 # so it takes an extra "blocks" parameter calculated elsewhere, instead of
82 # so it takes an extra "blocks" parameter calculated elsewhere, instead of
83 # calculating diff here.
83 # calculating diff here.
84 def _pair(parent, child, blocks):
84 def _pair(parent, child, blocks):
85 for (a1, a2, b1, b2), t in blocks:
85 for (a1, a2, b1, b2), t in blocks:
86 # Changed blocks ('!') or blocks made only of blank lines ('~')
86 # Changed blocks ('!') or blocks made only of blank lines ('~')
87 # belong to the child.
87 # belong to the child.
88 if t == b'=':
88 if t == b'=':
89 child[0][b1:b2] = parent[0][a1:a2]
89 child[0][b1:b2] = parent[0][a1:a2]
90 return child
90 return child
91
91
92
92
93 # like scmutil.revsingle, but with lru cache, so their states (like manifests)
93 # like scmutil.revsingle, but with lru cache, so their states (like manifests)
94 # could be reused
94 # could be reused
95 _revsingle = util.lrucachefunc(scmutil.revsingle)
95 _revsingle = util.lrucachefunc(scmutil.revsingle)
96
96
97
97
98 def resolvefctx(repo, rev, path, resolverev=False, adjustctx=None):
98 def resolvefctx(repo, rev, path, resolverev=False, adjustctx=None):
99 """(repo, str, str) -> fctx
99 """(repo, str, str) -> fctx
100
100
101 get the filectx object from repo, rev, path, in an efficient way.
101 get the filectx object from repo, rev, path, in an efficient way.
102
102
103 if resolverev is True, "rev" is a revision specified by the revset
103 if resolverev is True, "rev" is a revision specified by the revset
104 language, otherwise "rev" is a nodeid, or a revision number that can
104 language, otherwise "rev" is a nodeid, or a revision number that can
105 be consumed by repo.__getitem__.
105 be consumed by repo.__getitem__.
106
106
107 if adjustctx is not None, the returned fctx will point to a changeset
107 if adjustctx is not None, the returned fctx will point to a changeset
108 that introduces the change (last modified the file). if adjustctx
108 that introduces the change (last modified the file). if adjustctx
109 is 'linkrev', trust the linkrev and do not adjust it. this is noticeably
109 is 'linkrev', trust the linkrev and do not adjust it. this is noticeably
110 faster for big repos but is incorrect for some cases.
110 faster for big repos but is incorrect for some cases.
111 """
111 """
112 if resolverev and not isinstance(rev, int) and rev is not None:
112 if resolverev and not isinstance(rev, int) and rev is not None:
113 ctx = _revsingle(repo, rev)
113 ctx = _revsingle(repo, rev)
114 else:
114 else:
115 ctx = repo[rev]
115 ctx = repo[rev]
116
116
117 # If we don't need to adjust the linkrev, create the filectx using the
117 # If we don't need to adjust the linkrev, create the filectx using the
118 # changectx instead of using ctx[path]. This means it already has the
118 # changectx instead of using ctx[path]. This means it already has the
119 # changectx information, so blame -u will be able to look directly at the
119 # changectx information, so blame -u will be able to look directly at the
120 # commitctx object instead of having to resolve it by going through the
120 # commitctx object instead of having to resolve it by going through the
121 # manifest. In a lazy-manifest world this can prevent us from downloading a
121 # manifest. In a lazy-manifest world this can prevent us from downloading a
122 # lot of data.
122 # lot of data.
123 if adjustctx is None:
123 if adjustctx is None:
124 # ctx.rev() is None means it's the working copy, which is a special
124 # ctx.rev() is None means it's the working copy, which is a special
125 # case.
125 # case.
126 if ctx.rev() is None:
126 if ctx.rev() is None:
127 fctx = ctx[path]
127 fctx = ctx[path]
128 else:
128 else:
129 fctx = repo.filectx(path, changeid=ctx.rev())
129 fctx = repo.filectx(path, changeid=ctx.rev())
130 else:
130 else:
131 fctx = ctx[path]
131 fctx = ctx[path]
132 if adjustctx == b'linkrev':
132 if adjustctx == b'linkrev':
133 introrev = fctx.linkrev()
133 introrev = fctx.linkrev()
134 else:
134 else:
135 introrev = fctx.introrev()
135 introrev = fctx.introrev()
136 if introrev != ctx.rev():
136 if introrev != ctx.rev():
137 fctx._changeid = introrev
137 fctx._changeid = introrev
138 fctx._changectx = repo[introrev]
138 fctx._changectx = repo[introrev]
139 return fctx
139 return fctx
140
140
141
141
142 # like mercurial.store.encodedir, but use linelog suffixes: .m, .l, .lock
142 # like mercurial.store.encodedir, but use linelog suffixes: .m, .l, .lock
143 def encodedir(path):
143 def encodedir(path):
144 return (
144 return (
145 path.replace(b'.hg/', b'.hg.hg/')
145 path.replace(b'.hg/', b'.hg.hg/')
146 .replace(b'.l/', b'.l.hg/')
146 .replace(b'.l/', b'.l.hg/')
147 .replace(b'.m/', b'.m.hg/')
147 .replace(b'.m/', b'.m.hg/')
148 .replace(b'.lock/', b'.lock.hg/')
148 .replace(b'.lock/', b'.lock.hg/')
149 )
149 )
150
150
151
151
152 def hashdiffopts(diffopts):
152 def hashdiffopts(diffopts):
153 diffoptstr = stringutil.pprint(
153 diffoptstr = stringutil.pprint(
154 sorted((k, getattr(diffopts, k)) for k in mdiff.diffopts.defaults)
154 sorted(
155 (k, getattr(diffopts, pycompat.sysstr(k)))
156 for k in mdiff.diffopts.defaults
157 )
155 )
158 )
156 return hex(hashutil.sha1(diffoptstr).digest())[:6]
159 return hex(hashutil.sha1(diffoptstr).digest())[:6]
157
160
158
161
159 _defaultdiffopthash = hashdiffopts(mdiff.defaultopts)
162 _defaultdiffopthash = hashdiffopts(mdiff.defaultopts)
160
163
161
164
162 class annotateopts:
165 class annotateopts:
163 """like mercurial.mdiff.diffopts, but is for annotate
166 """like mercurial.mdiff.diffopts, but is for annotate
164
167
165 followrename: follow renames, like "hg annotate -f"
168 followrename: follow renames, like "hg annotate -f"
166 followmerge: follow p2 of a merge changeset, otherwise p2 is ignored
169 followmerge: follow p2 of a merge changeset, otherwise p2 is ignored
167 """
170 """
168
171
169 defaults = {
172 defaults = {
170 b'diffopts': None,
173 b'diffopts': None,
171 b'followrename': True,
174 b'followrename': True,
172 b'followmerge': True,
175 b'followmerge': True,
173 }
176 }
174
177
175 def __init__(self, **opts):
178 def __init__(self, **opts):
176 opts = pycompat.byteskwargs(opts)
179 opts = pycompat.byteskwargs(opts)
177 for k, v in self.defaults.items():
180 for k, v in self.defaults.items():
178 setattr(self, k, opts.get(k, v))
181 setattr(self, k, opts.get(k, v))
179
182
180 @util.propertycache
183 @util.propertycache
181 def shortstr(self):
184 def shortstr(self):
182 """represent opts in a short string, suitable for a directory name"""
185 """represent opts in a short string, suitable for a directory name"""
183 result = b''
186 result = b''
184 if not self.followrename:
187 if not self.followrename:
185 result += b'r0'
188 result += b'r0'
186 if not self.followmerge:
189 if not self.followmerge:
187 result += b'm0'
190 result += b'm0'
188 if self.diffopts is not None:
191 if self.diffopts is not None:
189 assert isinstance(self.diffopts, mdiff.diffopts)
192 assert isinstance(self.diffopts, mdiff.diffopts)
190 diffopthash = hashdiffopts(self.diffopts)
193 diffopthash = hashdiffopts(self.diffopts)
191 if diffopthash != _defaultdiffopthash:
194 if diffopthash != _defaultdiffopthash:
192 result += b'i' + diffopthash
195 result += b'i' + diffopthash
193 return result or b'default'
196 return result or b'default'
194
197
195
198
196 defaultopts = annotateopts()
199 defaultopts = annotateopts()
197
200
198
201
199 class _annotatecontext:
202 class _annotatecontext:
200 """do not use this class directly as it does not use lock to protect
203 """do not use this class directly as it does not use lock to protect
201 writes. use "with annotatecontext(...)" instead.
204 writes. use "with annotatecontext(...)" instead.
202 """
205 """
203
206
204 def __init__(self, repo, path, linelogpath, revmappath, opts):
207 def __init__(self, repo, path, linelogpath, revmappath, opts):
205 self.repo = repo
208 self.repo = repo
206 self.ui = repo.ui
209 self.ui = repo.ui
207 self.path = path
210 self.path = path
208 self.opts = opts
211 self.opts = opts
209 self.linelogpath = linelogpath
212 self.linelogpath = linelogpath
210 self.revmappath = revmappath
213 self.revmappath = revmappath
211 self._linelog = None
214 self._linelog = None
212 self._revmap = None
215 self._revmap = None
213 self._node2path = {} # {str: str}
216 self._node2path = {} # {str: str}
214
217
215 @property
218 @property
216 def linelog(self):
219 def linelog(self):
217 if self._linelog is None:
220 if self._linelog is None:
218 if os.path.exists(self.linelogpath):
221 if os.path.exists(self.linelogpath):
219 with open(self.linelogpath, b'rb') as f:
222 with open(self.linelogpath, b'rb') as f:
220 try:
223 try:
221 self._linelog = linelogmod.linelog.fromdata(f.read())
224 self._linelog = linelogmod.linelog.fromdata(f.read())
222 except linelogmod.LineLogError:
225 except linelogmod.LineLogError:
223 self._linelog = linelogmod.linelog()
226 self._linelog = linelogmod.linelog()
224 else:
227 else:
225 self._linelog = linelogmod.linelog()
228 self._linelog = linelogmod.linelog()
226 return self._linelog
229 return self._linelog
227
230
228 @property
231 @property
229 def revmap(self):
232 def revmap(self):
230 if self._revmap is None:
233 if self._revmap is None:
231 self._revmap = revmapmod.revmap(self.revmappath)
234 self._revmap = revmapmod.revmap(self.revmappath)
232 return self._revmap
235 return self._revmap
233
236
234 def close(self):
237 def close(self):
235 if self._revmap is not None:
238 if self._revmap is not None:
236 self._revmap.flush()
239 self._revmap.flush()
237 self._revmap = None
240 self._revmap = None
238 if self._linelog is not None:
241 if self._linelog is not None:
239 with open(self.linelogpath, b'wb') as f:
242 with open(self.linelogpath, b'wb') as f:
240 f.write(self._linelog.encode())
243 f.write(self._linelog.encode())
241 self._linelog = None
244 self._linelog = None
242
245
243 __del__ = close
246 __del__ = close
244
247
245 def rebuild(self):
248 def rebuild(self):
246 """delete linelog and revmap, useful for rebuilding"""
249 """delete linelog and revmap, useful for rebuilding"""
247 self.close()
250 self.close()
248 self._node2path.clear()
251 self._node2path.clear()
249 _unlinkpaths([self.revmappath, self.linelogpath])
252 _unlinkpaths([self.revmappath, self.linelogpath])
250
253
251 @property
254 @property
252 def lastnode(self):
255 def lastnode(self):
253 """return last node in revmap, or None if revmap is empty"""
256 """return last node in revmap, or None if revmap is empty"""
254 if self._revmap is None:
257 if self._revmap is None:
255 # fast path, read revmap without loading its full content
258 # fast path, read revmap without loading its full content
256 return revmapmod.getlastnode(self.revmappath)
259 return revmapmod.getlastnode(self.revmappath)
257 else:
260 else:
258 return self._revmap.rev2hsh(self._revmap.maxrev)
261 return self._revmap.rev2hsh(self._revmap.maxrev)
259
262
260 def isuptodate(self, master, strict=True):
263 def isuptodate(self, master, strict=True):
261 """return True if the revmap / linelog is up-to-date, or the file
264 """return True if the revmap / linelog is up-to-date, or the file
262 does not exist in the master revision. False otherwise.
265 does not exist in the master revision. False otherwise.
263
266
264 it tries to be fast and could return false negatives, because of the
267 it tries to be fast and could return false negatives, because of the
265 use of linkrev instead of introrev.
268 use of linkrev instead of introrev.
266
269
267 useful for both server and client to decide whether to update
270 useful for both server and client to decide whether to update
268 fastannotate cache or not.
271 fastannotate cache or not.
269
272
270 if strict is True, even if fctx exists in the revmap, but is not the
273 if strict is True, even if fctx exists in the revmap, but is not the
271 last node, isuptodate will return False. it's good for performance - no
274 last node, isuptodate will return False. it's good for performance - no
272 expensive check was done.
275 expensive check was done.
273
276
274 if strict is False, if fctx exists in the revmap, this function may
277 if strict is False, if fctx exists in the revmap, this function may
275 return True. this is useful for the client to skip downloading the
278 return True. this is useful for the client to skip downloading the
276 cache if the client's master is behind the server's.
279 cache if the client's master is behind the server's.
277 """
280 """
278 lastnode = self.lastnode
281 lastnode = self.lastnode
279 try:
282 try:
280 f = self._resolvefctx(master, resolverev=True)
283 f = self._resolvefctx(master, resolverev=True)
281 # choose linkrev instead of introrev as the check is meant to be
284 # choose linkrev instead of introrev as the check is meant to be
282 # *fast*.
285 # *fast*.
283 linknode = self.repo.changelog.node(f.linkrev())
286 linknode = self.repo.changelog.node(f.linkrev())
284 if not strict and lastnode and linknode != lastnode:
287 if not strict and lastnode and linknode != lastnode:
285 # check if f.node() is in the revmap. note: this loads the
288 # check if f.node() is in the revmap. note: this loads the
286 # revmap and can be slow.
289 # revmap and can be slow.
287 return self.revmap.hsh2rev(linknode) is not None
290 return self.revmap.hsh2rev(linknode) is not None
288 # avoid resolving old manifest, or slow adjustlinkrev to be fast,
291 # avoid resolving old manifest, or slow adjustlinkrev to be fast,
289 # false negatives are acceptable in this case.
292 # false negatives are acceptable in this case.
290 return linknode == lastnode
293 return linknode == lastnode
291 except LookupError:
294 except LookupError:
292 # master does not have the file, or the revmap is ahead
295 # master does not have the file, or the revmap is ahead
293 return True
296 return True
294
297
295 def annotate(self, rev, master=None, showpath=False, showlines=False):
298 def annotate(self, rev, master=None, showpath=False, showlines=False):
296 """incrementally update the cache so it includes revisions in the main
299 """incrementally update the cache so it includes revisions in the main
297 branch till 'master'. and run annotate on 'rev', which may or may not be
300 branch till 'master'. and run annotate on 'rev', which may or may not be
298 included in the main branch.
301 included in the main branch.
299
302
300 if master is None, do not update linelog.
303 if master is None, do not update linelog.
301
304
302 the first value returned is the annotate result, it is [(node, linenum)]
305 the first value returned is the annotate result, it is [(node, linenum)]
303 by default. [(node, linenum, path)] if showpath is True.
306 by default. [(node, linenum, path)] if showpath is True.
304
307
305 if showlines is True, a second value will be returned, it is a list of
308 if showlines is True, a second value will be returned, it is a list of
306 corresponding line contents.
309 corresponding line contents.
307 """
310 """
308
311
309 # the fast path test requires commit hash, convert rev number to hash,
312 # the fast path test requires commit hash, convert rev number to hash,
310 # so it may hit the fast path. note: in the "fctx" mode, the "annotate"
313 # so it may hit the fast path. note: in the "fctx" mode, the "annotate"
311 # command could give us a revision number even if the user passes a
314 # command could give us a revision number even if the user passes a
312 # commit hash.
315 # commit hash.
313 if isinstance(rev, int):
316 if isinstance(rev, int):
314 rev = hex(self.repo.changelog.node(rev))
317 rev = hex(self.repo.changelog.node(rev))
315
318
316 # fast path: if rev is in the main branch already
319 # fast path: if rev is in the main branch already
317 directly, revfctx = self.canannotatedirectly(rev)
320 directly, revfctx = self.canannotatedirectly(rev)
318 if directly:
321 if directly:
319 if self.ui.debugflag:
322 if self.ui.debugflag:
320 self.ui.debug(
323 self.ui.debug(
321 b'fastannotate: %s: using fast path '
324 b'fastannotate: %s: using fast path '
322 b'(resolved fctx: %s)\n'
325 b'(resolved fctx: %s)\n'
323 % (
326 % (
324 self.path,
327 self.path,
325 stringutil.pprint(util.safehasattr(revfctx, b'node')),
328 stringutil.pprint(util.safehasattr(revfctx, b'node')),
326 )
329 )
327 )
330 )
328 return self.annotatedirectly(revfctx, showpath, showlines)
331 return self.annotatedirectly(revfctx, showpath, showlines)
329
332
330 # resolve master
333 # resolve master
331 masterfctx = None
334 masterfctx = None
332 if master:
335 if master:
333 try:
336 try:
334 masterfctx = self._resolvefctx(
337 masterfctx = self._resolvefctx(
335 master, resolverev=True, adjustctx=True
338 master, resolverev=True, adjustctx=True
336 )
339 )
337 except LookupError: # master does not have the file
340 except LookupError: # master does not have the file
338 pass
341 pass
339 else:
342 else:
340 if masterfctx in self.revmap: # no need to update linelog
343 if masterfctx in self.revmap: # no need to update linelog
341 masterfctx = None
344 masterfctx = None
342
345
343 # ... - @ <- rev (can be an arbitrary changeset,
346 # ... - @ <- rev (can be an arbitrary changeset,
344 # / not necessarily a descendant
347 # / not necessarily a descendant
345 # master -> o of master)
348 # master -> o of master)
346 # |
349 # |
347 # a merge -> o 'o': new changesets in the main branch
350 # a merge -> o 'o': new changesets in the main branch
348 # |\ '#': revisions in the main branch that
351 # |\ '#': revisions in the main branch that
349 # o * exist in linelog / revmap
352 # o * exist in linelog / revmap
350 # | . '*': changesets in side branches, or
353 # | . '*': changesets in side branches, or
351 # last master -> # . descendants of master
354 # last master -> # . descendants of master
352 # | .
355 # | .
353 # # * joint: '#', and is a parent of a '*'
356 # # * joint: '#', and is a parent of a '*'
354 # |/
357 # |/
355 # a joint -> # ^^^^ --- side branches
358 # a joint -> # ^^^^ --- side branches
356 # |
359 # |
357 # ^ --- main branch (in linelog)
360 # ^ --- main branch (in linelog)
358
361
359 # these DFSes are similar to the traditional annotate algorithm.
362 # these DFSes are similar to the traditional annotate algorithm.
360 # we cannot really reuse the code for perf reason.
363 # we cannot really reuse the code for perf reason.
361
364
362 # 1st DFS calculates merges, joint points, and needed.
365 # 1st DFS calculates merges, joint points, and needed.
363 # "needed" is a simple reference counting dict to free items in
366 # "needed" is a simple reference counting dict to free items in
364 # "hist", reducing its memory usage otherwise could be huge.
367 # "hist", reducing its memory usage otherwise could be huge.
365 initvisit = [revfctx]
368 initvisit = [revfctx]
366 if masterfctx:
369 if masterfctx:
367 if masterfctx.rev() is None:
370 if masterfctx.rev() is None:
368 raise error.Abort(
371 raise error.Abort(
369 _(b'cannot update linelog to wdir()'),
372 _(b'cannot update linelog to wdir()'),
370 hint=_(b'set fastannotate.mainbranch'),
373 hint=_(b'set fastannotate.mainbranch'),
371 )
374 )
372 initvisit.append(masterfctx)
375 initvisit.append(masterfctx)
373 visit = initvisit[:]
376 visit = initvisit[:]
374 pcache = {}
377 pcache = {}
375 needed = {revfctx: 1}
378 needed = {revfctx: 1}
376 hist = {} # {fctx: ([(llrev or fctx, linenum)], text)}
379 hist = {} # {fctx: ([(llrev or fctx, linenum)], text)}
377 while visit:
380 while visit:
378 f = visit.pop()
381 f = visit.pop()
379 if f in pcache or f in hist:
382 if f in pcache or f in hist:
380 continue
383 continue
381 if f in self.revmap: # in the old main branch, it's a joint
384 if f in self.revmap: # in the old main branch, it's a joint
382 llrev = self.revmap.hsh2rev(f.node())
385 llrev = self.revmap.hsh2rev(f.node())
383 self.linelog.annotate(llrev)
386 self.linelog.annotate(llrev)
384 result = self.linelog.annotateresult
387 result = self.linelog.annotateresult
385 hist[f] = (result, f.data())
388 hist[f] = (result, f.data())
386 continue
389 continue
387 pl = self._parentfunc(f)
390 pl = self._parentfunc(f)
388 pcache[f] = pl
391 pcache[f] = pl
389 for p in pl:
392 for p in pl:
390 needed[p] = needed.get(p, 0) + 1
393 needed[p] = needed.get(p, 0) + 1
391 if p not in pcache:
394 if p not in pcache:
392 visit.append(p)
395 visit.append(p)
393
396
394 # 2nd (simple) DFS calculates new changesets in the main branch
397 # 2nd (simple) DFS calculates new changesets in the main branch
395 # ('o' nodes in # the above graph), so we know when to update linelog.
398 # ('o' nodes in # the above graph), so we know when to update linelog.
396 newmainbranch = set()
399 newmainbranch = set()
397 f = masterfctx
400 f = masterfctx
398 while f and f not in self.revmap:
401 while f and f not in self.revmap:
399 newmainbranch.add(f)
402 newmainbranch.add(f)
400 pl = pcache[f]
403 pl = pcache[f]
401 if pl:
404 if pl:
402 f = pl[0]
405 f = pl[0]
403 else:
406 else:
404 f = None
407 f = None
405 break
408 break
406
409
407 # f, if present, is the position where the last build stopped at, and
410 # f, if present, is the position where the last build stopped at, and
408 # should be the "master" last time. check to see if we can continue
411 # should be the "master" last time. check to see if we can continue
409 # building the linelog incrementally. (we cannot if diverged)
412 # building the linelog incrementally. (we cannot if diverged)
410 if masterfctx is not None:
413 if masterfctx is not None:
411 self._checklastmasterhead(f)
414 self._checklastmasterhead(f)
412
415
413 if self.ui.debugflag:
416 if self.ui.debugflag:
414 if newmainbranch:
417 if newmainbranch:
415 self.ui.debug(
418 self.ui.debug(
416 b'fastannotate: %s: %d new changesets in the main'
419 b'fastannotate: %s: %d new changesets in the main'
417 b' branch\n' % (self.path, len(newmainbranch))
420 b' branch\n' % (self.path, len(newmainbranch))
418 )
421 )
419 elif not hist: # no joints, no updates
422 elif not hist: # no joints, no updates
420 self.ui.debug(
423 self.ui.debug(
421 b'fastannotate: %s: linelog cannot help in '
424 b'fastannotate: %s: linelog cannot help in '
422 b'annotating this revision\n' % self.path
425 b'annotating this revision\n' % self.path
423 )
426 )
424
427
425 # prepare annotateresult so we can update linelog incrementally
428 # prepare annotateresult so we can update linelog incrementally
426 self.linelog.annotate(self.linelog.maxrev)
429 self.linelog.annotate(self.linelog.maxrev)
427
430
428 # 3rd DFS does the actual annotate
431 # 3rd DFS does the actual annotate
429 visit = initvisit[:]
432 visit = initvisit[:]
430 progress = self.ui.makeprogress(
433 progress = self.ui.makeprogress(
431 b'building cache', total=len(newmainbranch)
434 b'building cache', total=len(newmainbranch)
432 )
435 )
433 while visit:
436 while visit:
434 f = visit[-1]
437 f = visit[-1]
435 if f in hist:
438 if f in hist:
436 visit.pop()
439 visit.pop()
437 continue
440 continue
438
441
439 ready = True
442 ready = True
440 pl = pcache[f]
443 pl = pcache[f]
441 for p in pl:
444 for p in pl:
442 if p not in hist:
445 if p not in hist:
443 ready = False
446 ready = False
444 visit.append(p)
447 visit.append(p)
445 if not ready:
448 if not ready:
446 continue
449 continue
447
450
448 visit.pop()
451 visit.pop()
449 blocks = None # mdiff blocks, used for appending linelog
452 blocks = None # mdiff blocks, used for appending linelog
450 ismainbranch = f in newmainbranch
453 ismainbranch = f in newmainbranch
451 # curr is the same as the traditional annotate algorithm,
454 # curr is the same as the traditional annotate algorithm,
452 # if we only care about linear history (do not follow merge),
455 # if we only care about linear history (do not follow merge),
453 # then curr is not actually used.
456 # then curr is not actually used.
454 assert f not in hist
457 assert f not in hist
455 curr = _decorate(f)
458 curr = _decorate(f)
456 for i, p in enumerate(pl):
459 for i, p in enumerate(pl):
457 bs = list(self._diffblocks(hist[p][1], curr[1]))
460 bs = list(self._diffblocks(hist[p][1], curr[1]))
458 if i == 0 and ismainbranch:
461 if i == 0 and ismainbranch:
459 blocks = bs
462 blocks = bs
460 curr = _pair(hist[p], curr, bs)
463 curr = _pair(hist[p], curr, bs)
461 if needed[p] == 1:
464 if needed[p] == 1:
462 del hist[p]
465 del hist[p]
463 del needed[p]
466 del needed[p]
464 else:
467 else:
465 needed[p] -= 1
468 needed[p] -= 1
466
469
467 hist[f] = curr
470 hist[f] = curr
468 del pcache[f]
471 del pcache[f]
469
472
470 if ismainbranch: # need to write to linelog
473 if ismainbranch: # need to write to linelog
471 progress.increment()
474 progress.increment()
472 bannotated = None
475 bannotated = None
473 if len(pl) == 2 and self.opts.followmerge: # merge
476 if len(pl) == 2 and self.opts.followmerge: # merge
474 bannotated = curr[0]
477 bannotated = curr[0]
475 if blocks is None: # no parents, add an empty one
478 if blocks is None: # no parents, add an empty one
476 blocks = list(self._diffblocks(b'', curr[1]))
479 blocks = list(self._diffblocks(b'', curr[1]))
477 self._appendrev(f, blocks, bannotated)
480 self._appendrev(f, blocks, bannotated)
478 elif showpath: # not append linelog, but we need to record path
481 elif showpath: # not append linelog, but we need to record path
479 self._node2path[f.node()] = f.path()
482 self._node2path[f.node()] = f.path()
480
483
481 progress.complete()
484 progress.complete()
482
485
483 result = [
486 result = [
484 ((self.revmap.rev2hsh(fr) if isinstance(fr, int) else fr.node()), l)
487 ((self.revmap.rev2hsh(fr) if isinstance(fr, int) else fr.node()), l)
485 for fr, l in hist[revfctx][0]
488 for fr, l in hist[revfctx][0]
486 ] # [(node, linenumber)]
489 ] # [(node, linenumber)]
487 return self._refineannotateresult(result, revfctx, showpath, showlines)
490 return self._refineannotateresult(result, revfctx, showpath, showlines)
488
491
489 def canannotatedirectly(self, rev):
492 def canannotatedirectly(self, rev):
490 """(str) -> bool, fctx or node.
493 """(str) -> bool, fctx or node.
491 return (True, f) if we can annotate without updating the linelog, pass
494 return (True, f) if we can annotate without updating the linelog, pass
492 f to annotatedirectly.
495 f to annotatedirectly.
493 return (False, f) if we need extra calculation. f is the fctx resolved
496 return (False, f) if we need extra calculation. f is the fctx resolved
494 from rev.
497 from rev.
495 """
498 """
496 result = True
499 result = True
497 f = None
500 f = None
498 if not isinstance(rev, int) and rev is not None:
501 if not isinstance(rev, int) and rev is not None:
499 hsh = {20: bytes, 40: bin}.get(len(rev), lambda x: None)(rev)
502 hsh = {20: bytes, 40: bin}.get(len(rev), lambda x: None)(rev)
500 if hsh is not None and (hsh, self.path) in self.revmap:
503 if hsh is not None and (hsh, self.path) in self.revmap:
501 f = hsh
504 f = hsh
502 if f is None:
505 if f is None:
503 adjustctx = b'linkrev' if self._perfhack else True
506 adjustctx = b'linkrev' if self._perfhack else True
504 f = self._resolvefctx(rev, adjustctx=adjustctx, resolverev=True)
507 f = self._resolvefctx(rev, adjustctx=adjustctx, resolverev=True)
505 result = f in self.revmap
508 result = f in self.revmap
506 if not result and self._perfhack:
509 if not result and self._perfhack:
507 # redo the resolution without perfhack - as we are going to
510 # redo the resolution without perfhack - as we are going to
508 # do write operations, we need a correct fctx.
511 # do write operations, we need a correct fctx.
509 f = self._resolvefctx(rev, adjustctx=True, resolverev=True)
512 f = self._resolvefctx(rev, adjustctx=True, resolverev=True)
510 return result, f
513 return result, f
511
514
512 def annotatealllines(self, rev, showpath=False, showlines=False):
515 def annotatealllines(self, rev, showpath=False, showlines=False):
513 """(rev : str) -> [(node : str, linenum : int, path : str)]
516 """(rev : str) -> [(node : str, linenum : int, path : str)]
514
517
515 the result has the same format with annotate, but include all (including
518 the result has the same format with annotate, but include all (including
516 deleted) lines up to rev. call this after calling annotate(rev, ...) for
519 deleted) lines up to rev. call this after calling annotate(rev, ...) for
517 better performance and accuracy.
520 better performance and accuracy.
518 """
521 """
519 revfctx = self._resolvefctx(rev, resolverev=True, adjustctx=True)
522 revfctx = self._resolvefctx(rev, resolverev=True, adjustctx=True)
520
523
521 # find a chain from rev to anything in the mainbranch
524 # find a chain from rev to anything in the mainbranch
522 if revfctx not in self.revmap:
525 if revfctx not in self.revmap:
523 chain = [revfctx]
526 chain = [revfctx]
524 a = b''
527 a = b''
525 while True:
528 while True:
526 f = chain[-1]
529 f = chain[-1]
527 pl = self._parentfunc(f)
530 pl = self._parentfunc(f)
528 if not pl:
531 if not pl:
529 break
532 break
530 if pl[0] in self.revmap:
533 if pl[0] in self.revmap:
531 a = pl[0].data()
534 a = pl[0].data()
532 break
535 break
533 chain.append(pl[0])
536 chain.append(pl[0])
534
537
535 # both self.linelog and self.revmap is backed by filesystem. now
538 # both self.linelog and self.revmap is backed by filesystem. now
536 # we want to modify them but do not want to write changes back to
539 # we want to modify them but do not want to write changes back to
537 # files. so we create in-memory objects and copy them. it's like
540 # files. so we create in-memory objects and copy them. it's like
538 # a "fork".
541 # a "fork".
539 linelog = linelogmod.linelog()
542 linelog = linelogmod.linelog()
540 linelog.copyfrom(self.linelog)
543 linelog.copyfrom(self.linelog)
541 linelog.annotate(linelog.maxrev)
544 linelog.annotate(linelog.maxrev)
542 revmap = revmapmod.revmap()
545 revmap = revmapmod.revmap()
543 revmap.copyfrom(self.revmap)
546 revmap.copyfrom(self.revmap)
544
547
545 for f in reversed(chain):
548 for f in reversed(chain):
546 b = f.data()
549 b = f.data()
547 blocks = list(self._diffblocks(a, b))
550 blocks = list(self._diffblocks(a, b))
548 self._doappendrev(linelog, revmap, f, blocks)
551 self._doappendrev(linelog, revmap, f, blocks)
549 a = b
552 a = b
550 else:
553 else:
551 # fastpath: use existing linelog, revmap as we don't write to them
554 # fastpath: use existing linelog, revmap as we don't write to them
552 linelog = self.linelog
555 linelog = self.linelog
553 revmap = self.revmap
556 revmap = self.revmap
554
557
555 lines = linelog.getalllines()
558 lines = linelog.getalllines()
556 hsh = revfctx.node()
559 hsh = revfctx.node()
557 llrev = revmap.hsh2rev(hsh)
560 llrev = revmap.hsh2rev(hsh)
558 result = [(revmap.rev2hsh(r), l) for r, l in lines if r <= llrev]
561 result = [(revmap.rev2hsh(r), l) for r, l in lines if r <= llrev]
559 # cannot use _refineannotateresult since we need custom logic for
562 # cannot use _refineannotateresult since we need custom logic for
560 # resolving line contents
563 # resolving line contents
561 if showpath:
564 if showpath:
562 result = self._addpathtoresult(result, revmap)
565 result = self._addpathtoresult(result, revmap)
563 if showlines:
566 if showlines:
564 linecontents = self._resolvelines(result, revmap, linelog)
567 linecontents = self._resolvelines(result, revmap, linelog)
565 result = (result, linecontents)
568 result = (result, linecontents)
566 return result
569 return result
567
570
568 def _resolvelines(self, annotateresult, revmap, linelog):
571 def _resolvelines(self, annotateresult, revmap, linelog):
569 """(annotateresult) -> [line]. designed for annotatealllines.
572 """(annotateresult) -> [line]. designed for annotatealllines.
570 this is probably the most inefficient code in the whole fastannotate
573 this is probably the most inefficient code in the whole fastannotate
571 directory. but we have made a decision that the linelog does not
574 directory. but we have made a decision that the linelog does not
572 store line contents. so getting them requires random accesses to
575 store line contents. so getting them requires random accesses to
573 the revlog data, since they can be many, it can be very slow.
576 the revlog data, since they can be many, it can be very slow.
574 """
577 """
575 # [llrev]
578 # [llrev]
576 revs = [revmap.hsh2rev(l[0]) for l in annotateresult]
579 revs = [revmap.hsh2rev(l[0]) for l in annotateresult]
577 result = [None] * len(annotateresult)
580 result = [None] * len(annotateresult)
578 # {(rev, linenum): [lineindex]}
581 # {(rev, linenum): [lineindex]}
579 key2idxs = collections.defaultdict(list)
582 key2idxs = collections.defaultdict(list)
580 for i in range(len(result)):
583 for i in range(len(result)):
581 key2idxs[(revs[i], annotateresult[i][1])].append(i)
584 key2idxs[(revs[i], annotateresult[i][1])].append(i)
582 while key2idxs:
585 while key2idxs:
583 # find an unresolved line and its linelog rev to annotate
586 # find an unresolved line and its linelog rev to annotate
584 hsh = None
587 hsh = None
585 try:
588 try:
586 for (rev, _linenum), idxs in key2idxs.items():
589 for (rev, _linenum), idxs in key2idxs.items():
587 if revmap.rev2flag(rev) & revmapmod.sidebranchflag:
590 if revmap.rev2flag(rev) & revmapmod.sidebranchflag:
588 continue
591 continue
589 hsh = annotateresult[idxs[0]][0]
592 hsh = annotateresult[idxs[0]][0]
590 break
593 break
591 except StopIteration: # no more unresolved lines
594 except StopIteration: # no more unresolved lines
592 return result
595 return result
593 if hsh is None:
596 if hsh is None:
594 # the remaining key2idxs are not in main branch, resolving them
597 # the remaining key2idxs are not in main branch, resolving them
595 # using the hard way...
598 # using the hard way...
596 revlines = {}
599 revlines = {}
597 for (rev, linenum), idxs in key2idxs.items():
600 for (rev, linenum), idxs in key2idxs.items():
598 if rev not in revlines:
601 if rev not in revlines:
599 hsh = annotateresult[idxs[0]][0]
602 hsh = annotateresult[idxs[0]][0]
600 if self.ui.debugflag:
603 if self.ui.debugflag:
601 self.ui.debug(
604 self.ui.debug(
602 b'fastannotate: reading %s line #%d '
605 b'fastannotate: reading %s line #%d '
603 b'to resolve lines %r\n'
606 b'to resolve lines %r\n'
604 % (short(hsh), linenum, idxs)
607 % (short(hsh), linenum, idxs)
605 )
608 )
606 fctx = self._resolvefctx(hsh, revmap.rev2path(rev))
609 fctx = self._resolvefctx(hsh, revmap.rev2path(rev))
607 lines = mdiff.splitnewlines(fctx.data())
610 lines = mdiff.splitnewlines(fctx.data())
608 revlines[rev] = lines
611 revlines[rev] = lines
609 for idx in idxs:
612 for idx in idxs:
610 result[idx] = revlines[rev][linenum]
613 result[idx] = revlines[rev][linenum]
611 assert all(x is not None for x in result)
614 assert all(x is not None for x in result)
612 return result
615 return result
613
616
614 # run the annotate and the lines should match to the file content
617 # run the annotate and the lines should match to the file content
615 self.ui.debug(
618 self.ui.debug(
616 b'fastannotate: annotate %s to resolve lines\n' % short(hsh)
619 b'fastannotate: annotate %s to resolve lines\n' % short(hsh)
617 )
620 )
618 linelog.annotate(rev)
621 linelog.annotate(rev)
619 fctx = self._resolvefctx(hsh, revmap.rev2path(rev))
622 fctx = self._resolvefctx(hsh, revmap.rev2path(rev))
620 annotated = linelog.annotateresult
623 annotated = linelog.annotateresult
621 lines = mdiff.splitnewlines(fctx.data())
624 lines = mdiff.splitnewlines(fctx.data())
622 if len(lines) != len(annotated):
625 if len(lines) != len(annotated):
623 raise faerror.CorruptedFileError(b'unexpected annotated lines')
626 raise faerror.CorruptedFileError(b'unexpected annotated lines')
624 # resolve lines from the annotate result
627 # resolve lines from the annotate result
625 for i, line in enumerate(lines):
628 for i, line in enumerate(lines):
626 k = annotated[i]
629 k = annotated[i]
627 if k in key2idxs:
630 if k in key2idxs:
628 for idx in key2idxs[k]:
631 for idx in key2idxs[k]:
629 result[idx] = line
632 result[idx] = line
630 del key2idxs[k]
633 del key2idxs[k]
631 return result
634 return result
632
635
633 def annotatedirectly(self, f, showpath, showlines):
636 def annotatedirectly(self, f, showpath, showlines):
634 """like annotate, but when we know that f is in linelog.
637 """like annotate, but when we know that f is in linelog.
635 f can be either a 20-char str (node) or a fctx. this is for perf - in
638 f can be either a 20-char str (node) or a fctx. this is for perf - in
636 the best case, the user provides a node and we don't need to read the
639 the best case, the user provides a node and we don't need to read the
637 filelog or construct any filecontext.
640 filelog or construct any filecontext.
638 """
641 """
639 if isinstance(f, bytes):
642 if isinstance(f, bytes):
640 hsh = f
643 hsh = f
641 else:
644 else:
642 hsh = f.node()
645 hsh = f.node()
643 llrev = self.revmap.hsh2rev(hsh)
646 llrev = self.revmap.hsh2rev(hsh)
644 if not llrev:
647 if not llrev:
645 raise faerror.CorruptedFileError(b'%s is not in revmap' % hex(hsh))
648 raise faerror.CorruptedFileError(b'%s is not in revmap' % hex(hsh))
646 if (self.revmap.rev2flag(llrev) & revmapmod.sidebranchflag) != 0:
649 if (self.revmap.rev2flag(llrev) & revmapmod.sidebranchflag) != 0:
647 raise faerror.CorruptedFileError(
650 raise faerror.CorruptedFileError(
648 b'%s is not in revmap mainbranch' % hex(hsh)
651 b'%s is not in revmap mainbranch' % hex(hsh)
649 )
652 )
650 self.linelog.annotate(llrev)
653 self.linelog.annotate(llrev)
651 result = [
654 result = [
652 (self.revmap.rev2hsh(r), l) for r, l in self.linelog.annotateresult
655 (self.revmap.rev2hsh(r), l) for r, l in self.linelog.annotateresult
653 ]
656 ]
654 return self._refineannotateresult(result, f, showpath, showlines)
657 return self._refineannotateresult(result, f, showpath, showlines)
655
658
656 def _refineannotateresult(self, result, f, showpath, showlines):
659 def _refineannotateresult(self, result, f, showpath, showlines):
657 """add the missing path or line contents, they can be expensive.
660 """add the missing path or line contents, they can be expensive.
658 f could be either node or fctx.
661 f could be either node or fctx.
659 """
662 """
660 if showpath:
663 if showpath:
661 result = self._addpathtoresult(result)
664 result = self._addpathtoresult(result)
662 if showlines:
665 if showlines:
663 if isinstance(f, bytes): # f: node or fctx
666 if isinstance(f, bytes): # f: node or fctx
664 llrev = self.revmap.hsh2rev(f)
667 llrev = self.revmap.hsh2rev(f)
665 fctx = self._resolvefctx(f, self.revmap.rev2path(llrev))
668 fctx = self._resolvefctx(f, self.revmap.rev2path(llrev))
666 else:
669 else:
667 fctx = f
670 fctx = f
668 lines = mdiff.splitnewlines(fctx.data())
671 lines = mdiff.splitnewlines(fctx.data())
669 if len(lines) != len(result): # linelog is probably corrupted
672 if len(lines) != len(result): # linelog is probably corrupted
670 raise faerror.CorruptedFileError()
673 raise faerror.CorruptedFileError()
671 result = (result, lines)
674 result = (result, lines)
672 return result
675 return result
673
676
674 def _appendrev(self, fctx, blocks, bannotated=None):
677 def _appendrev(self, fctx, blocks, bannotated=None):
675 self._doappendrev(self.linelog, self.revmap, fctx, blocks, bannotated)
678 self._doappendrev(self.linelog, self.revmap, fctx, blocks, bannotated)
676
679
677 def _diffblocks(self, a, b):
680 def _diffblocks(self, a, b):
678 return mdiff.allblocks(a, b, self.opts.diffopts)
681 return mdiff.allblocks(a, b, self.opts.diffopts)
679
682
680 @staticmethod
683 @staticmethod
681 def _doappendrev(linelog, revmap, fctx, blocks, bannotated=None):
684 def _doappendrev(linelog, revmap, fctx, blocks, bannotated=None):
682 """append a revision to linelog and revmap"""
685 """append a revision to linelog and revmap"""
683
686
684 def getllrev(f):
687 def getllrev(f):
685 """(fctx) -> int"""
688 """(fctx) -> int"""
686 # f should not be a linelog revision
689 # f should not be a linelog revision
687 if isinstance(f, int):
690 if isinstance(f, int):
688 raise error.ProgrammingError(b'f should not be an int')
691 raise error.ProgrammingError(b'f should not be an int')
689 # f is a fctx, allocate linelog rev on demand
692 # f is a fctx, allocate linelog rev on demand
690 hsh = f.node()
693 hsh = f.node()
691 rev = revmap.hsh2rev(hsh)
694 rev = revmap.hsh2rev(hsh)
692 if rev is None:
695 if rev is None:
693 rev = revmap.append(hsh, sidebranch=True, path=f.path())
696 rev = revmap.append(hsh, sidebranch=True, path=f.path())
694 return rev
697 return rev
695
698
696 # append sidebranch revisions to revmap
699 # append sidebranch revisions to revmap
697 siderevs = []
700 siderevs = []
698 siderevmap = {} # node: int
701 siderevmap = {} # node: int
699 if bannotated is not None:
702 if bannotated is not None:
700 for (a1, a2, b1, b2), op in blocks:
703 for (a1, a2, b1, b2), op in blocks:
701 if op != b'=':
704 if op != b'=':
702 # f could be either linelong rev, or fctx.
705 # f could be either linelong rev, or fctx.
703 siderevs += [
706 siderevs += [
704 f
707 f
705 for f, l in bannotated[b1:b2]
708 for f, l in bannotated[b1:b2]
706 if not isinstance(f, int)
709 if not isinstance(f, int)
707 ]
710 ]
708 siderevs = set(siderevs)
711 siderevs = set(siderevs)
709 if fctx in siderevs: # mainnode must be appended seperately
712 if fctx in siderevs: # mainnode must be appended seperately
710 siderevs.remove(fctx)
713 siderevs.remove(fctx)
711 for f in siderevs:
714 for f in siderevs:
712 siderevmap[f] = getllrev(f)
715 siderevmap[f] = getllrev(f)
713
716
714 # the changeset in the main branch, could be a merge
717 # the changeset in the main branch, could be a merge
715 llrev = revmap.append(fctx.node(), path=fctx.path())
718 llrev = revmap.append(fctx.node(), path=fctx.path())
716 siderevmap[fctx] = llrev
719 siderevmap[fctx] = llrev
717
720
718 for (a1, a2, b1, b2), op in reversed(blocks):
721 for (a1, a2, b1, b2), op in reversed(blocks):
719 if op == b'=':
722 if op == b'=':
720 continue
723 continue
721 if bannotated is None:
724 if bannotated is None:
722 linelog.replacelines(llrev, a1, a2, b1, b2)
725 linelog.replacelines(llrev, a1, a2, b1, b2)
723 else:
726 else:
724 blines = [
727 blines = [
725 ((r if isinstance(r, int) else siderevmap[r]), l)
728 ((r if isinstance(r, int) else siderevmap[r]), l)
726 for r, l in bannotated[b1:b2]
729 for r, l in bannotated[b1:b2]
727 ]
730 ]
728 linelog.replacelines_vec(llrev, a1, a2, blines)
731 linelog.replacelines_vec(llrev, a1, a2, blines)
729
732
730 def _addpathtoresult(self, annotateresult, revmap=None):
733 def _addpathtoresult(self, annotateresult, revmap=None):
731 """(revmap, [(node, linenum)]) -> [(node, linenum, path)]"""
734 """(revmap, [(node, linenum)]) -> [(node, linenum, path)]"""
732 if revmap is None:
735 if revmap is None:
733 revmap = self.revmap
736 revmap = self.revmap
734
737
735 def _getpath(nodeid):
738 def _getpath(nodeid):
736 path = self._node2path.get(nodeid)
739 path = self._node2path.get(nodeid)
737 if path is None:
740 if path is None:
738 path = revmap.rev2path(revmap.hsh2rev(nodeid))
741 path = revmap.rev2path(revmap.hsh2rev(nodeid))
739 self._node2path[nodeid] = path
742 self._node2path[nodeid] = path
740 return path
743 return path
741
744
742 return [(n, l, _getpath(n)) for n, l in annotateresult]
745 return [(n, l, _getpath(n)) for n, l in annotateresult]
743
746
744 def _checklastmasterhead(self, fctx):
747 def _checklastmasterhead(self, fctx):
745 """check if fctx is the master's head last time, raise if not"""
748 """check if fctx is the master's head last time, raise if not"""
746 if fctx is None:
749 if fctx is None:
747 llrev = 0
750 llrev = 0
748 else:
751 else:
749 llrev = self.revmap.hsh2rev(fctx.node())
752 llrev = self.revmap.hsh2rev(fctx.node())
750 if not llrev:
753 if not llrev:
751 raise faerror.CannotReuseError()
754 raise faerror.CannotReuseError()
752 if self.linelog.maxrev != llrev:
755 if self.linelog.maxrev != llrev:
753 raise faerror.CannotReuseError()
756 raise faerror.CannotReuseError()
754
757
755 @util.propertycache
758 @util.propertycache
756 def _parentfunc(self):
759 def _parentfunc(self):
757 """-> (fctx) -> [fctx]"""
760 """-> (fctx) -> [fctx]"""
758 followrename = self.opts.followrename
761 followrename = self.opts.followrename
759 followmerge = self.opts.followmerge
762 followmerge = self.opts.followmerge
760
763
761 def parents(f):
764 def parents(f):
762 pl = _parents(f, follow=followrename)
765 pl = _parents(f, follow=followrename)
763 if not followmerge:
766 if not followmerge:
764 pl = pl[:1]
767 pl = pl[:1]
765 return pl
768 return pl
766
769
767 return parents
770 return parents
768
771
769 @util.propertycache
772 @util.propertycache
770 def _perfhack(self):
773 def _perfhack(self):
771 return self.ui.configbool(b'fastannotate', b'perfhack')
774 return self.ui.configbool(b'fastannotate', b'perfhack')
772
775
773 def _resolvefctx(self, rev, path=None, **kwds):
776 def _resolvefctx(self, rev, path=None, **kwds):
774 return resolvefctx(self.repo, rev, (path or self.path), **kwds)
777 return resolvefctx(self.repo, rev, (path or self.path), **kwds)
775
778
776
779
777 def _unlinkpaths(paths):
780 def _unlinkpaths(paths):
778 """silent, best-effort unlink"""
781 """silent, best-effort unlink"""
779 for path in paths:
782 for path in paths:
780 try:
783 try:
781 util.unlink(path)
784 util.unlink(path)
782 except OSError:
785 except OSError:
783 pass
786 pass
784
787
785
788
786 class pathhelper:
789 class pathhelper:
787 """helper for getting paths for lockfile, linelog and revmap"""
790 """helper for getting paths for lockfile, linelog and revmap"""
788
791
789 def __init__(self, repo, path, opts=defaultopts):
792 def __init__(self, repo, path, opts=defaultopts):
790 # different options use different directories
793 # different options use different directories
791 self._vfspath = os.path.join(
794 self._vfspath = os.path.join(
792 b'fastannotate', opts.shortstr, encodedir(path)
795 b'fastannotate', opts.shortstr, encodedir(path)
793 )
796 )
794 self._repo = repo
797 self._repo = repo
795
798
796 @property
799 @property
797 def dirname(self):
800 def dirname(self):
798 return os.path.dirname(self._repo.vfs.join(self._vfspath))
801 return os.path.dirname(self._repo.vfs.join(self._vfspath))
799
802
800 @property
803 @property
801 def linelogpath(self):
804 def linelogpath(self):
802 return self._repo.vfs.join(self._vfspath + b'.l')
805 return self._repo.vfs.join(self._vfspath + b'.l')
803
806
804 def lock(self):
807 def lock(self):
805 return lockmod.lock(self._repo.vfs, self._vfspath + b'.lock')
808 return lockmod.lock(self._repo.vfs, self._vfspath + b'.lock')
806
809
807 @property
810 @property
808 def revmappath(self):
811 def revmappath(self):
809 return self._repo.vfs.join(self._vfspath + b'.m')
812 return self._repo.vfs.join(self._vfspath + b'.m')
810
813
811
814
812 @contextlib.contextmanager
815 @contextlib.contextmanager
813 def annotatecontext(repo, path, opts=defaultopts, rebuild=False):
816 def annotatecontext(repo, path, opts=defaultopts, rebuild=False):
814 """context needed to perform (fast) annotate on a file
817 """context needed to perform (fast) annotate on a file
815
818
816 an annotatecontext of a single file consists of two structures: the
819 an annotatecontext of a single file consists of two structures: the
817 linelog and the revmap. this function takes care of locking. only 1
820 linelog and the revmap. this function takes care of locking. only 1
818 process is allowed to write that file's linelog and revmap at a time.
821 process is allowed to write that file's linelog and revmap at a time.
819
822
820 when something goes wrong, this function will assume the linelog and the
823 when something goes wrong, this function will assume the linelog and the
821 revmap are in a bad state, and remove them from disk.
824 revmap are in a bad state, and remove them from disk.
822
825
823 use this function in the following way:
826 use this function in the following way:
824
827
825 with annotatecontext(...) as actx:
828 with annotatecontext(...) as actx:
826 actx. ....
829 actx. ....
827 """
830 """
828 helper = pathhelper(repo, path, opts)
831 helper = pathhelper(repo, path, opts)
829 util.makedirs(helper.dirname)
832 util.makedirs(helper.dirname)
830 revmappath = helper.revmappath
833 revmappath = helper.revmappath
831 linelogpath = helper.linelogpath
834 linelogpath = helper.linelogpath
832 actx = None
835 actx = None
833 try:
836 try:
834 with helper.lock():
837 with helper.lock():
835 actx = _annotatecontext(repo, path, linelogpath, revmappath, opts)
838 actx = _annotatecontext(repo, path, linelogpath, revmappath, opts)
836 if rebuild:
839 if rebuild:
837 actx.rebuild()
840 actx.rebuild()
838 yield actx
841 yield actx
839 except Exception:
842 except Exception:
840 if actx is not None:
843 if actx is not None:
841 actx.rebuild()
844 actx.rebuild()
842 repo.ui.debug(b'fastannotate: %s: cache broken and deleted\n' % path)
845 repo.ui.debug(b'fastannotate: %s: cache broken and deleted\n' % path)
843 raise
846 raise
844 finally:
847 finally:
845 if actx is not None:
848 if actx is not None:
846 actx.close()
849 actx.close()
847
850
848
851
849 def fctxannotatecontext(fctx, follow=True, diffopts=None, rebuild=False):
852 def fctxannotatecontext(fctx, follow=True, diffopts=None, rebuild=False):
850 """like annotatecontext but get the context from a fctx. convenient when
853 """like annotatecontext but get the context from a fctx. convenient when
851 used in fctx.annotate
854 used in fctx.annotate
852 """
855 """
853 repo = fctx._repo
856 repo = fctx._repo
854 path = fctx._path
857 path = fctx._path
855 if repo.ui.configbool(b'fastannotate', b'forcefollow', True):
858 if repo.ui.configbool(b'fastannotate', b'forcefollow', True):
856 follow = True
859 follow = True
857 aopts = annotateopts(diffopts=diffopts, followrename=follow)
860 aopts = annotateopts(diffopts=diffopts, followrename=follow)
858 return annotatecontext(repo, path, aopts, rebuild)
861 return annotatecontext(repo, path, aopts, rebuild)
@@ -1,562 +1,563 b''
1 # mdiff.py - diff and patch routines for mercurial
1 # mdiff.py - diff and patch routines for mercurial
2 #
2 #
3 # Copyright 2005, 2006 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005, 2006 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8
8
9 import re
9 import re
10 import struct
10 import struct
11 import zlib
11 import zlib
12
12
13 from .i18n import _
13 from .i18n import _
14 from .pycompat import (
14 from .pycompat import (
15 getattr,
15 getattr,
16 setattr,
16 setattr,
17 )
17 )
18 from . import (
18 from . import (
19 diffhelper,
19 diffhelper,
20 encoding,
20 encoding,
21 error,
21 error,
22 policy,
22 policy,
23 pycompat,
23 pycompat,
24 util,
24 util,
25 )
25 )
26 from .utils import dateutil
26 from .utils import dateutil
27
27
28 bdiff = policy.importmod('bdiff')
28 bdiff = policy.importmod('bdiff')
29 mpatch = policy.importmod('mpatch')
29 mpatch = policy.importmod('mpatch')
30
30
31 blocks = bdiff.blocks
31 blocks = bdiff.blocks
32 fixws = bdiff.fixws
32 fixws = bdiff.fixws
33 patches = mpatch.patches
33 patches = mpatch.patches
34 patchedsize = mpatch.patchedsize
34 patchedsize = mpatch.patchedsize
35 textdiff = bdiff.bdiff
35 textdiff = bdiff.bdiff
36 splitnewlines = bdiff.splitnewlines
36 splitnewlines = bdiff.splitnewlines
37
37
38
38
39 # TODO: this looks like it could be an attrs, which might help pytype
39 # TODO: this looks like it could be an attrs, which might help pytype
40 class diffopts:
40 class diffopts:
41 """context is the number of context lines
41 """context is the number of context lines
42 text treats all files as text
42 text treats all files as text
43 showfunc enables diff -p output
43 showfunc enables diff -p output
44 git enables the git extended patch format
44 git enables the git extended patch format
45 nodates removes dates from diff headers
45 nodates removes dates from diff headers
46 nobinary ignores binary files
46 nobinary ignores binary files
47 noprefix disables the 'a/' and 'b/' prefixes (ignored in plain mode)
47 noprefix disables the 'a/' and 'b/' prefixes (ignored in plain mode)
48 ignorews ignores all whitespace changes in the diff
48 ignorews ignores all whitespace changes in the diff
49 ignorewsamount ignores changes in the amount of whitespace
49 ignorewsamount ignores changes in the amount of whitespace
50 ignoreblanklines ignores changes whose lines are all blank
50 ignoreblanklines ignores changes whose lines are all blank
51 upgrade generates git diffs to avoid data loss
51 upgrade generates git diffs to avoid data loss
52 """
52 """
53
53
54 _HAS_DYNAMIC_ATTRIBUTES = True
54 _HAS_DYNAMIC_ATTRIBUTES = True
55
55
56 defaults = {
56 defaults = {
57 b'context': 3,
57 b'context': 3,
58 b'text': False,
58 b'text': False,
59 b'showfunc': False,
59 b'showfunc': False,
60 b'git': False,
60 b'git': False,
61 b'nodates': False,
61 b'nodates': False,
62 b'nobinary': False,
62 b'nobinary': False,
63 b'noprefix': False,
63 b'noprefix': False,
64 b'index': 0,
64 b'index': 0,
65 b'ignorews': False,
65 b'ignorews': False,
66 b'ignorewsamount': False,
66 b'ignorewsamount': False,
67 b'ignorewseol': False,
67 b'ignorewseol': False,
68 b'ignoreblanklines': False,
68 b'ignoreblanklines': False,
69 b'upgrade': False,
69 b'upgrade': False,
70 b'showsimilarity': False,
70 b'showsimilarity': False,
71 b'worddiff': False,
71 b'worddiff': False,
72 b'xdiff': False,
72 b'xdiff': False,
73 }
73 }
74
74
75 def __init__(self, **opts):
75 def __init__(self, **opts):
76 opts = pycompat.byteskwargs(opts)
76 opts = pycompat.byteskwargs(opts)
77 for k in self.defaults.keys():
77 for k in self.defaults.keys():
78 v = opts.get(k)
78 v = opts.get(k)
79 if v is None:
79 if v is None:
80 v = self.defaults[k]
80 v = self.defaults[k]
81 setattr(self, k, v)
81 setattr(self, pycompat.sysstr(k), v)
82
82
83 try:
83 try:
84 self.context = int(self.context)
84 self.context = int(self.context)
85 except ValueError:
85 except ValueError:
86 raise error.InputError(
86 raise error.InputError(
87 _(b'diff context lines count must be an integer, not %r')
87 _(b'diff context lines count must be an integer, not %r')
88 % pycompat.bytestr(self.context)
88 % pycompat.bytestr(self.context)
89 )
89 )
90
90
91 def copy(self, **kwargs):
91 def copy(self, **kwargs):
92 opts = {k: getattr(self, k) for k in self.defaults}
92 opts = {k: getattr(self, pycompat.sysstr(k)) for k in self.defaults}
93 opts = pycompat.strkwargs(opts)
93 opts = pycompat.strkwargs(opts)
94 opts.update(kwargs)
94 opts.update(kwargs)
95 return diffopts(**opts)
95 return diffopts(**opts)
96
96
97 def __bytes__(self):
97 def __bytes__(self):
98 return b", ".join(
98 return b", ".join(
99 b"%s: %r" % (k, getattr(self, k)) for k in self.defaults
99 b"%s: %r" % (k, getattr(self, pycompat.sysstr(k)))
100 for k in self.defaults
100 )
101 )
101
102
102 __str__ = encoding.strmethod(__bytes__)
103 __str__ = encoding.strmethod(__bytes__)
103
104
104
105
105 defaultopts = diffopts()
106 defaultopts = diffopts()
106
107
107
108
108 def wsclean(opts, text, blank=True):
109 def wsclean(opts, text, blank=True):
109 if opts.ignorews:
110 if opts.ignorews:
110 text = bdiff.fixws(text, 1)
111 text = bdiff.fixws(text, 1)
111 elif opts.ignorewsamount:
112 elif opts.ignorewsamount:
112 text = bdiff.fixws(text, 0)
113 text = bdiff.fixws(text, 0)
113 if blank and opts.ignoreblanklines:
114 if blank and opts.ignoreblanklines:
114 text = re.sub(b'\n+', b'\n', text).strip(b'\n')
115 text = re.sub(b'\n+', b'\n', text).strip(b'\n')
115 if opts.ignorewseol:
116 if opts.ignorewseol:
116 text = re.sub(br'[ \t\r\f]+\n', br'\n', text)
117 text = re.sub(br'[ \t\r\f]+\n', br'\n', text)
117 return text
118 return text
118
119
119
120
120 def splitblock(base1, lines1, base2, lines2, opts):
121 def splitblock(base1, lines1, base2, lines2, opts):
121 # The input lines matches except for interwoven blank lines. We
122 # The input lines matches except for interwoven blank lines. We
122 # transform it into a sequence of matching blocks and blank blocks.
123 # transform it into a sequence of matching blocks and blank blocks.
123 lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1]
124 lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1]
124 lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2]
125 lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2]
125 s1, e1 = 0, len(lines1)
126 s1, e1 = 0, len(lines1)
126 s2, e2 = 0, len(lines2)
127 s2, e2 = 0, len(lines2)
127 while s1 < e1 or s2 < e2:
128 while s1 < e1 or s2 < e2:
128 i1, i2, btype = s1, s2, b'='
129 i1, i2, btype = s1, s2, b'='
129 if i1 >= e1 or lines1[i1] == 0 or i2 >= e2 or lines2[i2] == 0:
130 if i1 >= e1 or lines1[i1] == 0 or i2 >= e2 or lines2[i2] == 0:
130 # Consume the block of blank lines
131 # Consume the block of blank lines
131 btype = b'~'
132 btype = b'~'
132 while i1 < e1 and lines1[i1] == 0:
133 while i1 < e1 and lines1[i1] == 0:
133 i1 += 1
134 i1 += 1
134 while i2 < e2 and lines2[i2] == 0:
135 while i2 < e2 and lines2[i2] == 0:
135 i2 += 1
136 i2 += 1
136 else:
137 else:
137 # Consume the matching lines
138 # Consume the matching lines
138 while i1 < e1 and lines1[i1] == 1 and lines2[i2] == 1:
139 while i1 < e1 and lines1[i1] == 1 and lines2[i2] == 1:
139 i1 += 1
140 i1 += 1
140 i2 += 1
141 i2 += 1
141 yield [base1 + s1, base1 + i1, base2 + s2, base2 + i2], btype
142 yield [base1 + s1, base1 + i1, base2 + s2, base2 + i2], btype
142 s1 = i1
143 s1 = i1
143 s2 = i2
144 s2 = i2
144
145
145
146
146 def hunkinrange(hunk, linerange):
147 def hunkinrange(hunk, linerange):
147 """Return True if `hunk` defined as (start, length) is in `linerange`
148 """Return True if `hunk` defined as (start, length) is in `linerange`
148 defined as (lowerbound, upperbound).
149 defined as (lowerbound, upperbound).
149
150
150 >>> hunkinrange((5, 10), (2, 7))
151 >>> hunkinrange((5, 10), (2, 7))
151 True
152 True
152 >>> hunkinrange((5, 10), (6, 12))
153 >>> hunkinrange((5, 10), (6, 12))
153 True
154 True
154 >>> hunkinrange((5, 10), (13, 17))
155 >>> hunkinrange((5, 10), (13, 17))
155 True
156 True
156 >>> hunkinrange((5, 10), (3, 17))
157 >>> hunkinrange((5, 10), (3, 17))
157 True
158 True
158 >>> hunkinrange((5, 10), (1, 3))
159 >>> hunkinrange((5, 10), (1, 3))
159 False
160 False
160 >>> hunkinrange((5, 10), (18, 20))
161 >>> hunkinrange((5, 10), (18, 20))
161 False
162 False
162 >>> hunkinrange((5, 10), (1, 5))
163 >>> hunkinrange((5, 10), (1, 5))
163 False
164 False
164 >>> hunkinrange((5, 10), (15, 27))
165 >>> hunkinrange((5, 10), (15, 27))
165 False
166 False
166 """
167 """
167 start, length = hunk
168 start, length = hunk
168 lowerbound, upperbound = linerange
169 lowerbound, upperbound = linerange
169 return lowerbound < start + length and start < upperbound
170 return lowerbound < start + length and start < upperbound
170
171
171
172
172 def blocksinrange(blocks, rangeb):
173 def blocksinrange(blocks, rangeb):
173 """filter `blocks` like (a1, a2, b1, b2) from items outside line range
174 """filter `blocks` like (a1, a2, b1, b2) from items outside line range
174 `rangeb` from ``(b1, b2)`` point of view.
175 `rangeb` from ``(b1, b2)`` point of view.
175
176
176 Return `filteredblocks, rangea` where:
177 Return `filteredblocks, rangea` where:
177
178
178 * `filteredblocks` is list of ``block = (a1, a2, b1, b2), stype`` items of
179 * `filteredblocks` is list of ``block = (a1, a2, b1, b2), stype`` items of
179 `blocks` that are inside `rangeb` from ``(b1, b2)`` point of view; a
180 `blocks` that are inside `rangeb` from ``(b1, b2)`` point of view; a
180 block ``(b1, b2)`` being inside `rangeb` if
181 block ``(b1, b2)`` being inside `rangeb` if
181 ``rangeb[0] < b2 and b1 < rangeb[1]``;
182 ``rangeb[0] < b2 and b1 < rangeb[1]``;
182 * `rangea` is the line range w.r.t. to ``(a1, a2)`` parts of `blocks`.
183 * `rangea` is the line range w.r.t. to ``(a1, a2)`` parts of `blocks`.
183 """
184 """
184 lbb, ubb = rangeb
185 lbb, ubb = rangeb
185 lba, uba = None, None
186 lba, uba = None, None
186 filteredblocks = []
187 filteredblocks = []
187 for block in blocks:
188 for block in blocks:
188 (a1, a2, b1, b2), stype = block
189 (a1, a2, b1, b2), stype = block
189 if lbb >= b1 and ubb <= b2 and stype == b'=':
190 if lbb >= b1 and ubb <= b2 and stype == b'=':
190 # rangeb is within a single "=" hunk, restrict back linerange1
191 # rangeb is within a single "=" hunk, restrict back linerange1
191 # by offsetting rangeb
192 # by offsetting rangeb
192 lba = lbb - b1 + a1
193 lba = lbb - b1 + a1
193 uba = ubb - b1 + a1
194 uba = ubb - b1 + a1
194 else:
195 else:
195 if b1 <= lbb < b2:
196 if b1 <= lbb < b2:
196 if stype == b'=':
197 if stype == b'=':
197 lba = a2 - (b2 - lbb)
198 lba = a2 - (b2 - lbb)
198 else:
199 else:
199 lba = a1
200 lba = a1
200 if b1 < ubb <= b2:
201 if b1 < ubb <= b2:
201 if stype == b'=':
202 if stype == b'=':
202 uba = a1 + (ubb - b1)
203 uba = a1 + (ubb - b1)
203 else:
204 else:
204 uba = a2
205 uba = a2
205 if hunkinrange((b1, (b2 - b1)), rangeb):
206 if hunkinrange((b1, (b2 - b1)), rangeb):
206 filteredblocks.append(block)
207 filteredblocks.append(block)
207 if lba is None or uba is None or uba < lba:
208 if lba is None or uba is None or uba < lba:
208 raise error.InputError(_(b'line range exceeds file size'))
209 raise error.InputError(_(b'line range exceeds file size'))
209 return filteredblocks, (lba, uba)
210 return filteredblocks, (lba, uba)
210
211
211
212
212 def chooseblocksfunc(opts=None):
213 def chooseblocksfunc(opts=None):
213 if (
214 if (
214 opts is None
215 opts is None
215 or not opts.xdiff
216 or not opts.xdiff
216 or not util.safehasattr(bdiff, 'xdiffblocks')
217 or not util.safehasattr(bdiff, 'xdiffblocks')
217 ):
218 ):
218 return bdiff.blocks
219 return bdiff.blocks
219 else:
220 else:
220 return bdiff.xdiffblocks
221 return bdiff.xdiffblocks
221
222
222
223
223 def allblocks(text1, text2, opts=None, lines1=None, lines2=None):
224 def allblocks(text1, text2, opts=None, lines1=None, lines2=None):
224 """Return (block, type) tuples, where block is an mdiff.blocks
225 """Return (block, type) tuples, where block is an mdiff.blocks
225 line entry. type is '=' for blocks matching exactly one another
226 line entry. type is '=' for blocks matching exactly one another
226 (bdiff blocks), '!' for non-matching blocks and '~' for blocks
227 (bdiff blocks), '!' for non-matching blocks and '~' for blocks
227 matching only after having filtered blank lines.
228 matching only after having filtered blank lines.
228 line1 and line2 are text1 and text2 split with splitnewlines() if
229 line1 and line2 are text1 and text2 split with splitnewlines() if
229 they are already available.
230 they are already available.
230 """
231 """
231 if opts is None:
232 if opts is None:
232 opts = defaultopts
233 opts = defaultopts
233 if opts.ignorews or opts.ignorewsamount or opts.ignorewseol:
234 if opts.ignorews or opts.ignorewsamount or opts.ignorewseol:
234 text1 = wsclean(opts, text1, False)
235 text1 = wsclean(opts, text1, False)
235 text2 = wsclean(opts, text2, False)
236 text2 = wsclean(opts, text2, False)
236 diff = chooseblocksfunc(opts)(text1, text2)
237 diff = chooseblocksfunc(opts)(text1, text2)
237 for i, s1 in enumerate(diff):
238 for i, s1 in enumerate(diff):
238 # The first match is special.
239 # The first match is special.
239 # we've either found a match starting at line 0 or a match later
240 # we've either found a match starting at line 0 or a match later
240 # in the file. If it starts later, old and new below will both be
241 # in the file. If it starts later, old and new below will both be
241 # empty and we'll continue to the next match.
242 # empty and we'll continue to the next match.
242 if i > 0:
243 if i > 0:
243 s = diff[i - 1]
244 s = diff[i - 1]
244 else:
245 else:
245 s = [0, 0, 0, 0]
246 s = [0, 0, 0, 0]
246 s = [s[1], s1[0], s[3], s1[2]]
247 s = [s[1], s1[0], s[3], s1[2]]
247
248
248 # bdiff sometimes gives huge matches past eof, this check eats them,
249 # bdiff sometimes gives huge matches past eof, this check eats them,
249 # and deals with the special first match case described above
250 # and deals with the special first match case described above
250 if s[0] != s[1] or s[2] != s[3]:
251 if s[0] != s[1] or s[2] != s[3]:
251 type = b'!'
252 type = b'!'
252 if opts.ignoreblanklines:
253 if opts.ignoreblanklines:
253 if lines1 is None:
254 if lines1 is None:
254 lines1 = splitnewlines(text1)
255 lines1 = splitnewlines(text1)
255 if lines2 is None:
256 if lines2 is None:
256 lines2 = splitnewlines(text2)
257 lines2 = splitnewlines(text2)
257 old = wsclean(opts, b"".join(lines1[s[0] : s[1]]))
258 old = wsclean(opts, b"".join(lines1[s[0] : s[1]]))
258 new = wsclean(opts, b"".join(lines2[s[2] : s[3]]))
259 new = wsclean(opts, b"".join(lines2[s[2] : s[3]]))
259 if old == new:
260 if old == new:
260 type = b'~'
261 type = b'~'
261 yield s, type
262 yield s, type
262 yield s1, b'='
263 yield s1, b'='
263
264
264
265
265 def unidiff(a, ad, b, bd, fn1, fn2, binary, opts=defaultopts):
266 def unidiff(a, ad, b, bd, fn1, fn2, binary, opts=defaultopts):
266 """Return a unified diff as a (headers, hunks) tuple.
267 """Return a unified diff as a (headers, hunks) tuple.
267
268
268 If the diff is not null, `headers` is a list with unified diff header
269 If the diff is not null, `headers` is a list with unified diff header
269 lines "--- <original>" and "+++ <new>" and `hunks` is a generator yielding
270 lines "--- <original>" and "+++ <new>" and `hunks` is a generator yielding
270 (hunkrange, hunklines) coming from _unidiff().
271 (hunkrange, hunklines) coming from _unidiff().
271 Otherwise, `headers` and `hunks` are empty.
272 Otherwise, `headers` and `hunks` are empty.
272
273
273 Set binary=True if either a or b should be taken as a binary file.
274 Set binary=True if either a or b should be taken as a binary file.
274 """
275 """
275
276
276 def datetag(date, fn=None):
277 def datetag(date, fn=None):
277 if not opts.git and not opts.nodates:
278 if not opts.git and not opts.nodates:
278 return b'\t%s' % date
279 return b'\t%s' % date
279 if fn and b' ' in fn:
280 if fn and b' ' in fn:
280 return b'\t'
281 return b'\t'
281 return b''
282 return b''
282
283
283 sentinel = [], ()
284 sentinel = [], ()
284 if not a and not b:
285 if not a and not b:
285 return sentinel
286 return sentinel
286
287
287 if opts.noprefix:
288 if opts.noprefix:
288 aprefix = bprefix = b''
289 aprefix = bprefix = b''
289 else:
290 else:
290 aprefix = b'a/'
291 aprefix = b'a/'
291 bprefix = b'b/'
292 bprefix = b'b/'
292
293
293 epoch = dateutil.datestr((0, 0))
294 epoch = dateutil.datestr((0, 0))
294
295
295 fn1 = util.pconvert(fn1)
296 fn1 = util.pconvert(fn1)
296 fn2 = util.pconvert(fn2)
297 fn2 = util.pconvert(fn2)
297
298
298 if binary:
299 if binary:
299 if a and b and len(a) == len(b) and a == b:
300 if a and b and len(a) == len(b) and a == b:
300 return sentinel
301 return sentinel
301 headerlines = []
302 headerlines = []
302 hunks = ((None, [b'Binary file %s has changed\n' % fn1]),)
303 hunks = ((None, [b'Binary file %s has changed\n' % fn1]),)
303 elif not a:
304 elif not a:
304 without_newline = not b.endswith(b'\n')
305 without_newline = not b.endswith(b'\n')
305 b = splitnewlines(b)
306 b = splitnewlines(b)
306 if a is None:
307 if a is None:
307 l1 = b'--- /dev/null%s' % datetag(epoch)
308 l1 = b'--- /dev/null%s' % datetag(epoch)
308 else:
309 else:
309 l1 = b"--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
310 l1 = b"--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
310 l2 = b"+++ %s%s" % (bprefix + fn2, datetag(bd, fn2))
311 l2 = b"+++ %s%s" % (bprefix + fn2, datetag(bd, fn2))
311 headerlines = [l1, l2]
312 headerlines = [l1, l2]
312 size = len(b)
313 size = len(b)
313 hunkrange = (0, 0, 1, size)
314 hunkrange = (0, 0, 1, size)
314 hunklines = [b"@@ -0,0 +1,%d @@\n" % size] + [b"+" + e for e in b]
315 hunklines = [b"@@ -0,0 +1,%d @@\n" % size] + [b"+" + e for e in b]
315 if without_newline:
316 if without_newline:
316 hunklines[-1] += b'\n'
317 hunklines[-1] += b'\n'
317 hunklines.append(diffhelper.MISSING_NEWLINE_MARKER)
318 hunklines.append(diffhelper.MISSING_NEWLINE_MARKER)
318 hunks = ((hunkrange, hunklines),)
319 hunks = ((hunkrange, hunklines),)
319 elif not b:
320 elif not b:
320 without_newline = not a.endswith(b'\n')
321 without_newline = not a.endswith(b'\n')
321 a = splitnewlines(a)
322 a = splitnewlines(a)
322 l1 = b"--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
323 l1 = b"--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1))
323 if b is None:
324 if b is None:
324 l2 = b'+++ /dev/null%s' % datetag(epoch)
325 l2 = b'+++ /dev/null%s' % datetag(epoch)
325 else:
326 else:
326 l2 = b"+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2))
327 l2 = b"+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2))
327 headerlines = [l1, l2]
328 headerlines = [l1, l2]
328 size = len(a)
329 size = len(a)
329 hunkrange = (1, size, 0, 0)
330 hunkrange = (1, size, 0, 0)
330 hunklines = [b"@@ -1,%d +0,0 @@\n" % size] + [b"-" + e for e in a]
331 hunklines = [b"@@ -1,%d +0,0 @@\n" % size] + [b"-" + e for e in a]
331 if without_newline:
332 if without_newline:
332 hunklines[-1] += b'\n'
333 hunklines[-1] += b'\n'
333 hunklines.append(diffhelper.MISSING_NEWLINE_MARKER)
334 hunklines.append(diffhelper.MISSING_NEWLINE_MARKER)
334 hunks = ((hunkrange, hunklines),)
335 hunks = ((hunkrange, hunklines),)
335 else:
336 else:
336 hunks = _unidiff(a, b, opts=opts)
337 hunks = _unidiff(a, b, opts=opts)
337 if not next(hunks):
338 if not next(hunks):
338 return sentinel
339 return sentinel
339
340
340 headerlines = [
341 headerlines = [
341 b"--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)),
342 b"--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)),
342 b"+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2)),
343 b"+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2)),
343 ]
344 ]
344
345
345 return headerlines, hunks
346 return headerlines, hunks
346
347
347
348
348 def _unidiff(t1, t2, opts=defaultopts):
349 def _unidiff(t1, t2, opts=defaultopts):
349 """Yield hunks of a headerless unified diff from t1 and t2 texts.
350 """Yield hunks of a headerless unified diff from t1 and t2 texts.
350
351
351 Each hunk consists of a (hunkrange, hunklines) tuple where `hunkrange` is a
352 Each hunk consists of a (hunkrange, hunklines) tuple where `hunkrange` is a
352 tuple (s1, l1, s2, l2) representing the range information of the hunk to
353 tuple (s1, l1, s2, l2) representing the range information of the hunk to
353 form the '@@ -s1,l1 +s2,l2 @@' header and `hunklines` is a list of lines
354 form the '@@ -s1,l1 +s2,l2 @@' header and `hunklines` is a list of lines
354 of the hunk combining said header followed by line additions and
355 of the hunk combining said header followed by line additions and
355 deletions.
356 deletions.
356
357
357 The hunks are prefixed with a bool.
358 The hunks are prefixed with a bool.
358 """
359 """
359 l1 = splitnewlines(t1)
360 l1 = splitnewlines(t1)
360 l2 = splitnewlines(t2)
361 l2 = splitnewlines(t2)
361
362
362 def contextend(l, len):
363 def contextend(l, len):
363 ret = l + opts.context
364 ret = l + opts.context
364 if ret > len:
365 if ret > len:
365 ret = len
366 ret = len
366 return ret
367 return ret
367
368
368 def contextstart(l):
369 def contextstart(l):
369 ret = l - opts.context
370 ret = l - opts.context
370 if ret < 0:
371 if ret < 0:
371 return 0
372 return 0
372 return ret
373 return ret
373
374
374 lastfunc = [0, b'']
375 lastfunc = [0, b'']
375
376
376 def yieldhunk(hunk):
377 def yieldhunk(hunk):
377 (astart, a2, bstart, b2, delta) = hunk
378 (astart, a2, bstart, b2, delta) = hunk
378 aend = contextend(a2, len(l1))
379 aend = contextend(a2, len(l1))
379 alen = aend - astart
380 alen = aend - astart
380 blen = b2 - bstart + aend - a2
381 blen = b2 - bstart + aend - a2
381
382
382 func = b""
383 func = b""
383 if opts.showfunc:
384 if opts.showfunc:
384 lastpos, func = lastfunc
385 lastpos, func = lastfunc
385 # walk backwards from the start of the context up to the start of
386 # walk backwards from the start of the context up to the start of
386 # the previous hunk context until we find a line starting with an
387 # the previous hunk context until we find a line starting with an
387 # alphanumeric char.
388 # alphanumeric char.
388 for i in range(astart - 1, lastpos - 1, -1):
389 for i in range(astart - 1, lastpos - 1, -1):
389 if l1[i][0:1].isalnum():
390 if l1[i][0:1].isalnum():
390 func = b' ' + l1[i].rstrip()
391 func = b' ' + l1[i].rstrip()
391 # split long function name if ASCII. otherwise we have no
392 # split long function name if ASCII. otherwise we have no
392 # idea where the multi-byte boundary is, so just leave it.
393 # idea where the multi-byte boundary is, so just leave it.
393 if encoding.isasciistr(func):
394 if encoding.isasciistr(func):
394 func = func[:41]
395 func = func[:41]
395 lastfunc[1] = func
396 lastfunc[1] = func
396 break
397 break
397 # by recording this hunk's starting point as the next place to
398 # by recording this hunk's starting point as the next place to
398 # start looking for function lines, we avoid reading any line in
399 # start looking for function lines, we avoid reading any line in
399 # the file more than once.
400 # the file more than once.
400 lastfunc[0] = astart
401 lastfunc[0] = astart
401
402
402 # zero-length hunk ranges report their start line as one less
403 # zero-length hunk ranges report their start line as one less
403 if alen:
404 if alen:
404 astart += 1
405 astart += 1
405 if blen:
406 if blen:
406 bstart += 1
407 bstart += 1
407
408
408 hunkrange = astart, alen, bstart, blen
409 hunkrange = astart, alen, bstart, blen
409 hunklines = (
410 hunklines = (
410 [b"@@ -%d,%d +%d,%d @@%s\n" % (hunkrange + (func,))]
411 [b"@@ -%d,%d +%d,%d @@%s\n" % (hunkrange + (func,))]
411 + delta
412 + delta
412 + [b' ' + l1[x] for x in range(a2, aend)]
413 + [b' ' + l1[x] for x in range(a2, aend)]
413 )
414 )
414 # If either file ends without a newline and the last line of
415 # If either file ends without a newline and the last line of
415 # that file is part of a hunk, a marker is printed. If the
416 # that file is part of a hunk, a marker is printed. If the
416 # last line of both files is identical and neither ends in
417 # last line of both files is identical and neither ends in
417 # a newline, print only one marker. That's the only case in
418 # a newline, print only one marker. That's the only case in
418 # which the hunk can end in a shared line without a newline.
419 # which the hunk can end in a shared line without a newline.
419 skip = False
420 skip = False
420 if not t1.endswith(b'\n') and astart + alen == len(l1) + 1:
421 if not t1.endswith(b'\n') and astart + alen == len(l1) + 1:
421 for i in range(len(hunklines) - 1, -1, -1):
422 for i in range(len(hunklines) - 1, -1, -1):
422 if hunklines[i].startswith((b'-', b' ')):
423 if hunklines[i].startswith((b'-', b' ')):
423 if hunklines[i].startswith(b' '):
424 if hunklines[i].startswith(b' '):
424 skip = True
425 skip = True
425 hunklines[i] += b'\n'
426 hunklines[i] += b'\n'
426 hunklines.insert(i + 1, diffhelper.MISSING_NEWLINE_MARKER)
427 hunklines.insert(i + 1, diffhelper.MISSING_NEWLINE_MARKER)
427 break
428 break
428 if not skip and not t2.endswith(b'\n') and bstart + blen == len(l2) + 1:
429 if not skip and not t2.endswith(b'\n') and bstart + blen == len(l2) + 1:
429 for i in range(len(hunklines) - 1, -1, -1):
430 for i in range(len(hunklines) - 1, -1, -1):
430 if hunklines[i].startswith(b'+'):
431 if hunklines[i].startswith(b'+'):
431 hunklines[i] += b'\n'
432 hunklines[i] += b'\n'
432 hunklines.insert(i + 1, diffhelper.MISSING_NEWLINE_MARKER)
433 hunklines.insert(i + 1, diffhelper.MISSING_NEWLINE_MARKER)
433 break
434 break
434 yield hunkrange, hunklines
435 yield hunkrange, hunklines
435
436
436 # bdiff.blocks gives us the matching sequences in the files. The loop
437 # bdiff.blocks gives us the matching sequences in the files. The loop
437 # below finds the spaces between those matching sequences and translates
438 # below finds the spaces between those matching sequences and translates
438 # them into diff output.
439 # them into diff output.
439 #
440 #
440 hunk = None
441 hunk = None
441 ignoredlines = 0
442 ignoredlines = 0
442 has_hunks = False
443 has_hunks = False
443 for s, stype in allblocks(t1, t2, opts, l1, l2):
444 for s, stype in allblocks(t1, t2, opts, l1, l2):
444 a1, a2, b1, b2 = s
445 a1, a2, b1, b2 = s
445 if stype != b'!':
446 if stype != b'!':
446 if stype == b'~':
447 if stype == b'~':
447 # The diff context lines are based on t1 content. When
448 # The diff context lines are based on t1 content. When
448 # blank lines are ignored, the new lines offsets must
449 # blank lines are ignored, the new lines offsets must
449 # be adjusted as if equivalent blocks ('~') had the
450 # be adjusted as if equivalent blocks ('~') had the
450 # same sizes on both sides.
451 # same sizes on both sides.
451 ignoredlines += (b2 - b1) - (a2 - a1)
452 ignoredlines += (b2 - b1) - (a2 - a1)
452 continue
453 continue
453 delta = []
454 delta = []
454 old = l1[a1:a2]
455 old = l1[a1:a2]
455 new = l2[b1:b2]
456 new = l2[b1:b2]
456
457
457 b1 -= ignoredlines
458 b1 -= ignoredlines
458 b2 -= ignoredlines
459 b2 -= ignoredlines
459 astart = contextstart(a1)
460 astart = contextstart(a1)
460 bstart = contextstart(b1)
461 bstart = contextstart(b1)
461 prev = None
462 prev = None
462 if hunk:
463 if hunk:
463 # join with the previous hunk if it falls inside the context
464 # join with the previous hunk if it falls inside the context
464 if astart < hunk[1] + opts.context + 1:
465 if astart < hunk[1] + opts.context + 1:
465 prev = hunk
466 prev = hunk
466 astart = hunk[1]
467 astart = hunk[1]
467 bstart = hunk[3]
468 bstart = hunk[3]
468 else:
469 else:
469 if not has_hunks:
470 if not has_hunks:
470 has_hunks = True
471 has_hunks = True
471 yield True
472 yield True
472 for x in yieldhunk(hunk):
473 for x in yieldhunk(hunk):
473 yield x
474 yield x
474 if prev:
475 if prev:
475 # we've joined the previous hunk, record the new ending points.
476 # we've joined the previous hunk, record the new ending points.
476 hunk[1] = a2
477 hunk[1] = a2
477 hunk[3] = b2
478 hunk[3] = b2
478 delta = hunk[4]
479 delta = hunk[4]
479 else:
480 else:
480 # create a new hunk
481 # create a new hunk
481 hunk = [astart, a2, bstart, b2, delta]
482 hunk = [astart, a2, bstart, b2, delta]
482
483
483 delta[len(delta) :] = [b' ' + x for x in l1[astart:a1]]
484 delta[len(delta) :] = [b' ' + x for x in l1[astart:a1]]
484 delta[len(delta) :] = [b'-' + x for x in old]
485 delta[len(delta) :] = [b'-' + x for x in old]
485 delta[len(delta) :] = [b'+' + x for x in new]
486 delta[len(delta) :] = [b'+' + x for x in new]
486
487
487 if hunk:
488 if hunk:
488 if not has_hunks:
489 if not has_hunks:
489 has_hunks = True
490 has_hunks = True
490 yield True
491 yield True
491 for x in yieldhunk(hunk):
492 for x in yieldhunk(hunk):
492 yield x
493 yield x
493 elif not has_hunks:
494 elif not has_hunks:
494 yield False
495 yield False
495
496
496
497
497 def b85diff(to, tn):
498 def b85diff(to, tn):
498 '''print base85-encoded binary diff'''
499 '''print base85-encoded binary diff'''
499
500
500 def fmtline(line):
501 def fmtline(line):
501 l = len(line)
502 l = len(line)
502 if l <= 26:
503 if l <= 26:
503 l = pycompat.bytechr(ord(b'A') + l - 1)
504 l = pycompat.bytechr(ord(b'A') + l - 1)
504 else:
505 else:
505 l = pycompat.bytechr(l - 26 + ord(b'a') - 1)
506 l = pycompat.bytechr(l - 26 + ord(b'a') - 1)
506 return b'%c%s\n' % (l, util.b85encode(line, True))
507 return b'%c%s\n' % (l, util.b85encode(line, True))
507
508
508 def chunk(text, csize=52):
509 def chunk(text, csize=52):
509 l = len(text)
510 l = len(text)
510 i = 0
511 i = 0
511 while i < l:
512 while i < l:
512 yield text[i : i + csize]
513 yield text[i : i + csize]
513 i += csize
514 i += csize
514
515
515 if to is None:
516 if to is None:
516 to = b''
517 to = b''
517 if tn is None:
518 if tn is None:
518 tn = b''
519 tn = b''
519
520
520 if to == tn:
521 if to == tn:
521 return b''
522 return b''
522
523
523 # TODO: deltas
524 # TODO: deltas
524 ret = []
525 ret = []
525 ret.append(b'GIT binary patch\n')
526 ret.append(b'GIT binary patch\n')
526 ret.append(b'literal %d\n' % len(tn))
527 ret.append(b'literal %d\n' % len(tn))
527 for l in chunk(zlib.compress(tn)):
528 for l in chunk(zlib.compress(tn)):
528 ret.append(fmtline(l))
529 ret.append(fmtline(l))
529 ret.append(b'\n')
530 ret.append(b'\n')
530
531
531 return b''.join(ret)
532 return b''.join(ret)
532
533
533
534
534 def patchtext(bin):
535 def patchtext(bin):
535 pos = 0
536 pos = 0
536 t = []
537 t = []
537 while pos < len(bin):
538 while pos < len(bin):
538 p1, p2, l = struct.unpack(b">lll", bin[pos : pos + 12])
539 p1, p2, l = struct.unpack(b">lll", bin[pos : pos + 12])
539 pos += 12
540 pos += 12
540 t.append(bin[pos : pos + l])
541 t.append(bin[pos : pos + l])
541 pos += l
542 pos += l
542 return b"".join(t)
543 return b"".join(t)
543
544
544
545
545 def patch(a, bin):
546 def patch(a, bin):
546 if len(a) == 0:
547 if len(a) == 0:
547 # skip over trivial delta header
548 # skip over trivial delta header
548 return util.buffer(bin, 12)
549 return util.buffer(bin, 12)
549 return mpatch.patches(a, [bin])
550 return mpatch.patches(a, [bin])
550
551
551
552
552 # similar to difflib.SequenceMatcher.get_matching_blocks
553 # similar to difflib.SequenceMatcher.get_matching_blocks
553 def get_matching_blocks(a, b):
554 def get_matching_blocks(a, b):
554 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
555 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
555
556
556
557
557 def trivialdiffheader(length):
558 def trivialdiffheader(length):
558 return struct.pack(b">lll", 0, 0, length) if length else b''
559 return struct.pack(b">lll", 0, 0, length) if length else b''
559
560
560
561
561 def replacediffheader(oldlen, newlen):
562 def replacediffheader(oldlen, newlen):
562 return struct.pack(b">lll", 0, oldlen, newlen)
563 return struct.pack(b">lll", 0, oldlen, newlen)
General Comments 0
You need to be logged in to leave comments. Login now