##// END OF EJS Templates
diffs: limit the file context to ~1mln lines. Fixes #4184...
marcink -
r679:df6d63d7 stable
parent child Browse files
Show More
@@ -1,877 +1,885 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2
2
3 # Copyright (C) 2011-2016 RhodeCode GmbH
3 # Copyright (C) 2011-2016 RhodeCode GmbH
4 #
4 #
5 # This program is free software: you can redistribute it and/or modify
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License, version 3
6 # it under the terms of the GNU Affero General Public License, version 3
7 # (only), as published by the Free Software Foundation.
7 # (only), as published by the Free Software Foundation.
8 #
8 #
9 # This program is distributed in the hope that it will be useful,
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
12 # GNU General Public License for more details.
13 #
13 #
14 # You should have received a copy of the GNU Affero General Public License
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 #
16 #
17 # This program is dual-licensed. If you wish to learn more about the
17 # This program is dual-licensed. If you wish to learn more about the
18 # RhodeCode Enterprise Edition, including its added features, Support services,
18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20
20
21
21
22 """
22 """
23 Set of diffing helpers, previously part of vcs
23 Set of diffing helpers, previously part of vcs
24 """
24 """
25
25
26 import collections
26 import collections
27 import re
27 import re
28 import difflib
28 import difflib
29 import logging
29 import logging
30
30
31 from itertools import tee, imap
31 from itertools import tee, imap
32
32
33 from pylons.i18n.translation import _
33 from pylons.i18n.translation import _
34
34
35 from rhodecode.lib.vcs.exceptions import VCSError
35 from rhodecode.lib.vcs.exceptions import VCSError
36 from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode
36 from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode
37 from rhodecode.lib.vcs.backends.base import EmptyCommit
37 from rhodecode.lib.vcs.backends.base import EmptyCommit
38 from rhodecode.lib.helpers import escape
38 from rhodecode.lib.helpers import escape
39 from rhodecode.lib.utils2 import safe_unicode
39 from rhodecode.lib.utils2 import safe_unicode
40
40
41 log = logging.getLogger(__name__)
41 log = logging.getLogger(__name__)
42
42
43 # define max context, a file with more than this numbers of lines is unusable
44 # in browser anyway
45 MAX_CONTEXT = 1024 * 1014
46
43
47
44 class OPS(object):
48 class OPS(object):
45 ADD = 'A'
49 ADD = 'A'
46 MOD = 'M'
50 MOD = 'M'
47 DEL = 'D'
51 DEL = 'D'
48
52
49
53
50 def wrap_to_table(str_):
54 def wrap_to_table(str_):
51 return '''<table class="code-difftable">
55 return '''<table class="code-difftable">
52 <tr class="line no-comment">
56 <tr class="line no-comment">
53 <td class="add-comment-line tooltip" title="%s"><span class="add-comment-content"></span></td>
57 <td class="add-comment-line tooltip" title="%s"><span class="add-comment-content"></span></td>
54 <td class="lineno new"></td>
58 <td class="lineno new"></td>
55 <td class="code no-comment"><pre>%s</pre></td>
59 <td class="code no-comment"><pre>%s</pre></td>
56 </tr>
60 </tr>
57 </table>''' % (_('Click to comment'), str_)
61 </table>''' % (_('Click to comment'), str_)
58
62
59
63
60 def wrapped_diff(filenode_old, filenode_new, diff_limit=None, file_limit=None,
64 def wrapped_diff(filenode_old, filenode_new, diff_limit=None, file_limit=None,
61 show_full_diff=False, ignore_whitespace=True, line_context=3,
65 show_full_diff=False, ignore_whitespace=True, line_context=3,
62 enable_comments=False):
66 enable_comments=False):
63 """
67 """
64 returns a wrapped diff into a table, checks for cut_off_limit for file and
68 returns a wrapped diff into a table, checks for cut_off_limit for file and
65 whole diff and presents proper message
69 whole diff and presents proper message
66 """
70 """
67
71
68 if filenode_old is None:
72 if filenode_old is None:
69 filenode_old = FileNode(filenode_new.path, '', EmptyCommit())
73 filenode_old = FileNode(filenode_new.path, '', EmptyCommit())
70
74
71 if filenode_old.is_binary or filenode_new.is_binary:
75 if filenode_old.is_binary or filenode_new.is_binary:
72 diff = wrap_to_table(_('Binary file'))
76 diff = wrap_to_table(_('Binary file'))
73 stats = None
77 stats = None
74 size = 0
78 size = 0
75 data = None
79 data = None
76
80
77 elif diff_limit != -1 and (diff_limit is None or
81 elif diff_limit != -1 and (diff_limit is None or
78 (filenode_old.size < diff_limit and filenode_new.size < diff_limit)):
82 (filenode_old.size < diff_limit and filenode_new.size < diff_limit)):
79
83
80 f_gitdiff = get_gitdiff(filenode_old, filenode_new,
84 f_gitdiff = get_gitdiff(filenode_old, filenode_new,
81 ignore_whitespace=ignore_whitespace,
85 ignore_whitespace=ignore_whitespace,
82 context=line_context)
86 context=line_context)
83 diff_processor = DiffProcessor(
87 diff_processor = DiffProcessor(
84 f_gitdiff, format='gitdiff', diff_limit=diff_limit,
88 f_gitdiff, format='gitdiff', diff_limit=diff_limit,
85 file_limit=file_limit, show_full_diff=show_full_diff)
89 file_limit=file_limit, show_full_diff=show_full_diff)
86 _parsed = diff_processor.prepare()
90 _parsed = diff_processor.prepare()
87
91
88 diff = diff_processor.as_html(enable_comments=enable_comments)
92 diff = diff_processor.as_html(enable_comments=enable_comments)
89 stats = _parsed[0]['stats'] if _parsed else None
93 stats = _parsed[0]['stats'] if _parsed else None
90 size = len(diff or '')
94 size = len(diff or '')
91 data = _parsed[0] if _parsed else None
95 data = _parsed[0] if _parsed else None
92 else:
96 else:
93 diff = wrap_to_table(_('Changeset was too big and was cut off, use '
97 diff = wrap_to_table(_('Changeset was too big and was cut off, use '
94 'diff menu to display this diff'))
98 'diff menu to display this diff'))
95 stats = None
99 stats = None
96 size = 0
100 size = 0
97 data = None
101 data = None
98 if not diff:
102 if not diff:
99 submodules = filter(lambda o: isinstance(o, SubModuleNode),
103 submodules = filter(lambda o: isinstance(o, SubModuleNode),
100 [filenode_new, filenode_old])
104 [filenode_new, filenode_old])
101 if submodules:
105 if submodules:
102 diff = wrap_to_table(escape('Submodule %r' % submodules[0]))
106 diff = wrap_to_table(escape('Submodule %r' % submodules[0]))
103 else:
107 else:
104 diff = wrap_to_table(_('No changes detected'))
108 diff = wrap_to_table(_('No changes detected'))
105
109
106 cs1 = filenode_old.commit.raw_id
110 cs1 = filenode_old.commit.raw_id
107 cs2 = filenode_new.commit.raw_id
111 cs2 = filenode_new.commit.raw_id
108
112
109 return size, cs1, cs2, diff, stats, data
113 return size, cs1, cs2, diff, stats, data
110
114
111
115
112 def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
116 def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
113 """
117 """
114 Returns git style diff between given ``filenode_old`` and ``filenode_new``.
118 Returns git style diff between given ``filenode_old`` and ``filenode_new``.
115
119
116 :param ignore_whitespace: ignore whitespaces in diff
120 :param ignore_whitespace: ignore whitespaces in diff
117 """
121 """
118 # make sure we pass in default context
122 # make sure we pass in default context
119 context = context or 3
123 context = context or 3
124 # protect against IntOverflow when passing HUGE context
125 if context > MAX_CONTEXT:
126 context = MAX_CONTEXT
127
120 submodules = filter(lambda o: isinstance(o, SubModuleNode),
128 submodules = filter(lambda o: isinstance(o, SubModuleNode),
121 [filenode_new, filenode_old])
129 [filenode_new, filenode_old])
122 if submodules:
130 if submodules:
123 return ''
131 return ''
124
132
125 for filenode in (filenode_old, filenode_new):
133 for filenode in (filenode_old, filenode_new):
126 if not isinstance(filenode, FileNode):
134 if not isinstance(filenode, FileNode):
127 raise VCSError(
135 raise VCSError(
128 "Given object should be FileNode object, not %s"
136 "Given object should be FileNode object, not %s"
129 % filenode.__class__)
137 % filenode.__class__)
130
138
131 repo = filenode_new.commit.repository
139 repo = filenode_new.commit.repository
132 old_commit = filenode_old.commit or repo.EMPTY_COMMIT
140 old_commit = filenode_old.commit or repo.EMPTY_COMMIT
133 new_commit = filenode_new.commit
141 new_commit = filenode_new.commit
134
142
135 vcs_gitdiff = repo.get_diff(
143 vcs_gitdiff = repo.get_diff(
136 old_commit, new_commit, filenode_new.path,
144 old_commit, new_commit, filenode_new.path,
137 ignore_whitespace, context, path1=filenode_old.path)
145 ignore_whitespace, context, path1=filenode_old.path)
138 return vcs_gitdiff
146 return vcs_gitdiff
139
147
140 NEW_FILENODE = 1
148 NEW_FILENODE = 1
141 DEL_FILENODE = 2
149 DEL_FILENODE = 2
142 MOD_FILENODE = 3
150 MOD_FILENODE = 3
143 RENAMED_FILENODE = 4
151 RENAMED_FILENODE = 4
144 COPIED_FILENODE = 5
152 COPIED_FILENODE = 5
145 CHMOD_FILENODE = 6
153 CHMOD_FILENODE = 6
146 BIN_FILENODE = 7
154 BIN_FILENODE = 7
147
155
148
156
149 class LimitedDiffContainer(object):
157 class LimitedDiffContainer(object):
150
158
151 def __init__(self, diff_limit, cur_diff_size, diff):
159 def __init__(self, diff_limit, cur_diff_size, diff):
152 self.diff = diff
160 self.diff = diff
153 self.diff_limit = diff_limit
161 self.diff_limit = diff_limit
154 self.cur_diff_size = cur_diff_size
162 self.cur_diff_size = cur_diff_size
155
163
156 def __getitem__(self, key):
164 def __getitem__(self, key):
157 return self.diff.__getitem__(key)
165 return self.diff.__getitem__(key)
158
166
159 def __iter__(self):
167 def __iter__(self):
160 for l in self.diff:
168 for l in self.diff:
161 yield l
169 yield l
162
170
163
171
164 class Action(object):
172 class Action(object):
165 """
173 """
166 Contains constants for the action value of the lines in a parsed diff.
174 Contains constants for the action value of the lines in a parsed diff.
167 """
175 """
168
176
169 ADD = 'add'
177 ADD = 'add'
170 DELETE = 'del'
178 DELETE = 'del'
171 UNMODIFIED = 'unmod'
179 UNMODIFIED = 'unmod'
172
180
173 CONTEXT = 'context'
181 CONTEXT = 'context'
174
182
175
183
176 class DiffProcessor(object):
184 class DiffProcessor(object):
177 """
185 """
178 Give it a unified or git diff and it returns a list of the files that were
186 Give it a unified or git diff and it returns a list of the files that were
179 mentioned in the diff together with a dict of meta information that
187 mentioned in the diff together with a dict of meta information that
180 can be used to render it in a HTML template.
188 can be used to render it in a HTML template.
181
189
182 .. note:: Unicode handling
190 .. note:: Unicode handling
183
191
184 The original diffs are a byte sequence and can contain filenames
192 The original diffs are a byte sequence and can contain filenames
185 in mixed encodings. This class generally returns `unicode` objects
193 in mixed encodings. This class generally returns `unicode` objects
186 since the result is intended for presentation to the user.
194 since the result is intended for presentation to the user.
187
195
188 """
196 """
189 _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
197 _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
190 _newline_marker = re.compile(r'^\\ No newline at end of file')
198 _newline_marker = re.compile(r'^\\ No newline at end of file')
191
199
192 # used for inline highlighter word split
200 # used for inline highlighter word split
193 _token_re = re.compile(r'()(&gt;|&lt;|&amp;|\W+?)')
201 _token_re = re.compile(r'()(&gt;|&lt;|&amp;|\W+?)')
194
202
195 def __init__(self, diff, format='gitdiff', diff_limit=None,
203 def __init__(self, diff, format='gitdiff', diff_limit=None,
196 file_limit=None, show_full_diff=True):
204 file_limit=None, show_full_diff=True):
197 """
205 """
198 :param diff: A `Diff` object representing a diff from a vcs backend
206 :param diff: A `Diff` object representing a diff from a vcs backend
199 :param format: format of diff passed, `udiff` or `gitdiff`
207 :param format: format of diff passed, `udiff` or `gitdiff`
200 :param diff_limit: define the size of diff that is considered "big"
208 :param diff_limit: define the size of diff that is considered "big"
201 based on that parameter cut off will be triggered, set to None
209 based on that parameter cut off will be triggered, set to None
202 to show full diff
210 to show full diff
203 """
211 """
204 self._diff = diff
212 self._diff = diff
205 self._format = format
213 self._format = format
206 self.adds = 0
214 self.adds = 0
207 self.removes = 0
215 self.removes = 0
208 # calculate diff size
216 # calculate diff size
209 self.diff_limit = diff_limit
217 self.diff_limit = diff_limit
210 self.file_limit = file_limit
218 self.file_limit = file_limit
211 self.show_full_diff = show_full_diff
219 self.show_full_diff = show_full_diff
212 self.cur_diff_size = 0
220 self.cur_diff_size = 0
213 self.parsed = False
221 self.parsed = False
214 self.parsed_diff = []
222 self.parsed_diff = []
215
223
216 if format == 'gitdiff':
224 if format == 'gitdiff':
217 self.differ = self._highlight_line_difflib
225 self.differ = self._highlight_line_difflib
218 self._parser = self._parse_gitdiff
226 self._parser = self._parse_gitdiff
219 else:
227 else:
220 self.differ = self._highlight_line_udiff
228 self.differ = self._highlight_line_udiff
221 self._parser = self._parse_udiff
229 self._parser = self._parse_udiff
222
230
223 def _copy_iterator(self):
231 def _copy_iterator(self):
224 """
232 """
225 make a fresh copy of generator, we should not iterate thru
233 make a fresh copy of generator, we should not iterate thru
226 an original as it's needed for repeating operations on
234 an original as it's needed for repeating operations on
227 this instance of DiffProcessor
235 this instance of DiffProcessor
228 """
236 """
229 self.__udiff, iterator_copy = tee(self.__udiff)
237 self.__udiff, iterator_copy = tee(self.__udiff)
230 return iterator_copy
238 return iterator_copy
231
239
232 def _escaper(self, string):
240 def _escaper(self, string):
233 """
241 """
234 Escaper for diff escapes special chars and checks the diff limit
242 Escaper for diff escapes special chars and checks the diff limit
235
243
236 :param string:
244 :param string:
237 """
245 """
238
246
239 self.cur_diff_size += len(string)
247 self.cur_diff_size += len(string)
240
248
241 if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
249 if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
242 raise DiffLimitExceeded('Diff Limit Exceeded')
250 raise DiffLimitExceeded('Diff Limit Exceeded')
243
251
244 return safe_unicode(string)\
252 return safe_unicode(string)\
245 .replace('&', '&amp;')\
253 .replace('&', '&amp;')\
246 .replace('<', '&lt;')\
254 .replace('<', '&lt;')\
247 .replace('>', '&gt;')
255 .replace('>', '&gt;')
248
256
249 def _line_counter(self, l):
257 def _line_counter(self, l):
250 """
258 """
251 Checks each line and bumps total adds/removes for this diff
259 Checks each line and bumps total adds/removes for this diff
252
260
253 :param l:
261 :param l:
254 """
262 """
255 if l.startswith('+') and not l.startswith('+++'):
263 if l.startswith('+') and not l.startswith('+++'):
256 self.adds += 1
264 self.adds += 1
257 elif l.startswith('-') and not l.startswith('---'):
265 elif l.startswith('-') and not l.startswith('---'):
258 self.removes += 1
266 self.removes += 1
259 return safe_unicode(l)
267 return safe_unicode(l)
260
268
261 def _highlight_line_difflib(self, line, next_):
269 def _highlight_line_difflib(self, line, next_):
262 """
270 """
263 Highlight inline changes in both lines.
271 Highlight inline changes in both lines.
264 """
272 """
265
273
266 if line['action'] == Action.DELETE:
274 if line['action'] == Action.DELETE:
267 old, new = line, next_
275 old, new = line, next_
268 else:
276 else:
269 old, new = next_, line
277 old, new = next_, line
270
278
271 oldwords = self._token_re.split(old['line'])
279 oldwords = self._token_re.split(old['line'])
272 newwords = self._token_re.split(new['line'])
280 newwords = self._token_re.split(new['line'])
273 sequence = difflib.SequenceMatcher(None, oldwords, newwords)
281 sequence = difflib.SequenceMatcher(None, oldwords, newwords)
274
282
275 oldfragments, newfragments = [], []
283 oldfragments, newfragments = [], []
276 for tag, i1, i2, j1, j2 in sequence.get_opcodes():
284 for tag, i1, i2, j1, j2 in sequence.get_opcodes():
277 oldfrag = ''.join(oldwords[i1:i2])
285 oldfrag = ''.join(oldwords[i1:i2])
278 newfrag = ''.join(newwords[j1:j2])
286 newfrag = ''.join(newwords[j1:j2])
279 if tag != 'equal':
287 if tag != 'equal':
280 if oldfrag:
288 if oldfrag:
281 oldfrag = '<del>%s</del>' % oldfrag
289 oldfrag = '<del>%s</del>' % oldfrag
282 if newfrag:
290 if newfrag:
283 newfrag = '<ins>%s</ins>' % newfrag
291 newfrag = '<ins>%s</ins>' % newfrag
284 oldfragments.append(oldfrag)
292 oldfragments.append(oldfrag)
285 newfragments.append(newfrag)
293 newfragments.append(newfrag)
286
294
287 old['line'] = "".join(oldfragments)
295 old['line'] = "".join(oldfragments)
288 new['line'] = "".join(newfragments)
296 new['line'] = "".join(newfragments)
289
297
290 def _highlight_line_udiff(self, line, next_):
298 def _highlight_line_udiff(self, line, next_):
291 """
299 """
292 Highlight inline changes in both lines.
300 Highlight inline changes in both lines.
293 """
301 """
294 start = 0
302 start = 0
295 limit = min(len(line['line']), len(next_['line']))
303 limit = min(len(line['line']), len(next_['line']))
296 while start < limit and line['line'][start] == next_['line'][start]:
304 while start < limit and line['line'][start] == next_['line'][start]:
297 start += 1
305 start += 1
298 end = -1
306 end = -1
299 limit -= start
307 limit -= start
300 while -end <= limit and line['line'][end] == next_['line'][end]:
308 while -end <= limit and line['line'][end] == next_['line'][end]:
301 end -= 1
309 end -= 1
302 end += 1
310 end += 1
303 if start or end:
311 if start or end:
304 def do(l):
312 def do(l):
305 last = end + len(l['line'])
313 last = end + len(l['line'])
306 if l['action'] == Action.ADD:
314 if l['action'] == Action.ADD:
307 tag = 'ins'
315 tag = 'ins'
308 else:
316 else:
309 tag = 'del'
317 tag = 'del'
310 l['line'] = '%s<%s>%s</%s>%s' % (
318 l['line'] = '%s<%s>%s</%s>%s' % (
311 l['line'][:start],
319 l['line'][:start],
312 tag,
320 tag,
313 l['line'][start:last],
321 l['line'][start:last],
314 tag,
322 tag,
315 l['line'][last:]
323 l['line'][last:]
316 )
324 )
317 do(line)
325 do(line)
318 do(next_)
326 do(next_)
319
327
320 def _clean_line(self, line, command):
328 def _clean_line(self, line, command):
321 if command in ['+', '-', ' ']:
329 if command in ['+', '-', ' ']:
322 # only modify the line if it's actually a diff thing
330 # only modify the line if it's actually a diff thing
323 line = line[1:]
331 line = line[1:]
324 return line
332 return line
325
333
326 def _parse_gitdiff(self, inline_diff=True):
334 def _parse_gitdiff(self, inline_diff=True):
327 _files = []
335 _files = []
328 diff_container = lambda arg: arg
336 diff_container = lambda arg: arg
329
337
330 for chunk in self._diff.chunks():
338 for chunk in self._diff.chunks():
331 head = chunk.header
339 head = chunk.header
332
340
333 diff = imap(self._escaper, chunk.diff.splitlines(1))
341 diff = imap(self._escaper, chunk.diff.splitlines(1))
334 raw_diff = chunk.raw
342 raw_diff = chunk.raw
335 limited_diff = False
343 limited_diff = False
336 exceeds_limit = False
344 exceeds_limit = False
337
345
338 op = None
346 op = None
339 stats = {
347 stats = {
340 'added': 0,
348 'added': 0,
341 'deleted': 0,
349 'deleted': 0,
342 'binary': False,
350 'binary': False,
343 'ops': {},
351 'ops': {},
344 }
352 }
345
353
346 if head['deleted_file_mode']:
354 if head['deleted_file_mode']:
347 op = OPS.DEL
355 op = OPS.DEL
348 stats['binary'] = True
356 stats['binary'] = True
349 stats['ops'][DEL_FILENODE] = 'deleted file'
357 stats['ops'][DEL_FILENODE] = 'deleted file'
350
358
351 elif head['new_file_mode']:
359 elif head['new_file_mode']:
352 op = OPS.ADD
360 op = OPS.ADD
353 stats['binary'] = True
361 stats['binary'] = True
354 stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
362 stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
355 else: # modify operation, can be copy, rename or chmod
363 else: # modify operation, can be copy, rename or chmod
356
364
357 # CHMOD
365 # CHMOD
358 if head['new_mode'] and head['old_mode']:
366 if head['new_mode'] and head['old_mode']:
359 op = OPS.MOD
367 op = OPS.MOD
360 stats['binary'] = True
368 stats['binary'] = True
361 stats['ops'][CHMOD_FILENODE] = (
369 stats['ops'][CHMOD_FILENODE] = (
362 'modified file chmod %s => %s' % (
370 'modified file chmod %s => %s' % (
363 head['old_mode'], head['new_mode']))
371 head['old_mode'], head['new_mode']))
364 # RENAME
372 # RENAME
365 if head['rename_from'] != head['rename_to']:
373 if head['rename_from'] != head['rename_to']:
366 op = OPS.MOD
374 op = OPS.MOD
367 stats['binary'] = True
375 stats['binary'] = True
368 stats['ops'][RENAMED_FILENODE] = (
376 stats['ops'][RENAMED_FILENODE] = (
369 'file renamed from %s to %s' % (
377 'file renamed from %s to %s' % (
370 head['rename_from'], head['rename_to']))
378 head['rename_from'], head['rename_to']))
371 # COPY
379 # COPY
372 if head.get('copy_from') and head.get('copy_to'):
380 if head.get('copy_from') and head.get('copy_to'):
373 op = OPS.MOD
381 op = OPS.MOD
374 stats['binary'] = True
382 stats['binary'] = True
375 stats['ops'][COPIED_FILENODE] = (
383 stats['ops'][COPIED_FILENODE] = (
376 'file copied from %s to %s' % (
384 'file copied from %s to %s' % (
377 head['copy_from'], head['copy_to']))
385 head['copy_from'], head['copy_to']))
378
386
379 # If our new parsed headers didn't match anything fallback to
387 # If our new parsed headers didn't match anything fallback to
380 # old style detection
388 # old style detection
381 if op is None:
389 if op is None:
382 if not head['a_file'] and head['b_file']:
390 if not head['a_file'] and head['b_file']:
383 op = OPS.ADD
391 op = OPS.ADD
384 stats['binary'] = True
392 stats['binary'] = True
385 stats['ops'][NEW_FILENODE] = 'new file'
393 stats['ops'][NEW_FILENODE] = 'new file'
386
394
387 elif head['a_file'] and not head['b_file']:
395 elif head['a_file'] and not head['b_file']:
388 op = OPS.DEL
396 op = OPS.DEL
389 stats['binary'] = True
397 stats['binary'] = True
390 stats['ops'][DEL_FILENODE] = 'deleted file'
398 stats['ops'][DEL_FILENODE] = 'deleted file'
391
399
392 # it's not ADD not DELETE
400 # it's not ADD not DELETE
393 if op is None:
401 if op is None:
394 op = OPS.MOD
402 op = OPS.MOD
395 stats['binary'] = True
403 stats['binary'] = True
396 stats['ops'][MOD_FILENODE] = 'modified file'
404 stats['ops'][MOD_FILENODE] = 'modified file'
397
405
398 # a real non-binary diff
406 # a real non-binary diff
399 if head['a_file'] or head['b_file']:
407 if head['a_file'] or head['b_file']:
400 try:
408 try:
401 raw_diff, chunks, _stats = self._parse_lines(diff)
409 raw_diff, chunks, _stats = self._parse_lines(diff)
402 stats['binary'] = False
410 stats['binary'] = False
403 stats['added'] = _stats[0]
411 stats['added'] = _stats[0]
404 stats['deleted'] = _stats[1]
412 stats['deleted'] = _stats[1]
405 # explicit mark that it's a modified file
413 # explicit mark that it's a modified file
406 if op == OPS.MOD:
414 if op == OPS.MOD:
407 stats['ops'][MOD_FILENODE] = 'modified file'
415 stats['ops'][MOD_FILENODE] = 'modified file'
408 exceeds_limit = len(raw_diff) > self.file_limit
416 exceeds_limit = len(raw_diff) > self.file_limit
409
417
410 # changed from _escaper function so we validate size of
418 # changed from _escaper function so we validate size of
411 # each file instead of the whole diff
419 # each file instead of the whole diff
412 # diff will hide big files but still show small ones
420 # diff will hide big files but still show small ones
413 # from my tests, big files are fairly safe to be parsed
421 # from my tests, big files are fairly safe to be parsed
414 # but the browser is the bottleneck
422 # but the browser is the bottleneck
415 if not self.show_full_diff and exceeds_limit:
423 if not self.show_full_diff and exceeds_limit:
416 raise DiffLimitExceeded('File Limit Exceeded')
424 raise DiffLimitExceeded('File Limit Exceeded')
417
425
418 except DiffLimitExceeded:
426 except DiffLimitExceeded:
419 diff_container = lambda _diff: \
427 diff_container = lambda _diff: \
420 LimitedDiffContainer(
428 LimitedDiffContainer(
421 self.diff_limit, self.cur_diff_size, _diff)
429 self.diff_limit, self.cur_diff_size, _diff)
422
430
423 exceeds_limit = len(raw_diff) > self.file_limit
431 exceeds_limit = len(raw_diff) > self.file_limit
424 limited_diff = True
432 limited_diff = True
425 chunks = []
433 chunks = []
426
434
427 else: # GIT format binary patch, or possibly empty diff
435 else: # GIT format binary patch, or possibly empty diff
428 if head['bin_patch']:
436 if head['bin_patch']:
429 # we have operation already extracted, but we mark simply
437 # we have operation already extracted, but we mark simply
430 # it's a diff we wont show for binary files
438 # it's a diff we wont show for binary files
431 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
439 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
432 chunks = []
440 chunks = []
433
441
434 if chunks and not self.show_full_diff and op == OPS.DEL:
442 if chunks and not self.show_full_diff and op == OPS.DEL:
435 # if not full diff mode show deleted file contents
443 # if not full diff mode show deleted file contents
436 # TODO: anderson: if the view is not too big, there is no way
444 # TODO: anderson: if the view is not too big, there is no way
437 # to see the content of the file
445 # to see the content of the file
438 chunks = []
446 chunks = []
439
447
440 chunks.insert(0, [{
448 chunks.insert(0, [{
441 'old_lineno': '',
449 'old_lineno': '',
442 'new_lineno': '',
450 'new_lineno': '',
443 'action': Action.CONTEXT,
451 'action': Action.CONTEXT,
444 'line': msg,
452 'line': msg,
445 } for _op, msg in stats['ops'].iteritems()
453 } for _op, msg in stats['ops'].iteritems()
446 if _op not in [MOD_FILENODE]])
454 if _op not in [MOD_FILENODE]])
447
455
448 _files.append({
456 _files.append({
449 'filename': safe_unicode(head['b_path']),
457 'filename': safe_unicode(head['b_path']),
450 'old_revision': head['a_blob_id'],
458 'old_revision': head['a_blob_id'],
451 'new_revision': head['b_blob_id'],
459 'new_revision': head['b_blob_id'],
452 'chunks': chunks,
460 'chunks': chunks,
453 'raw_diff': safe_unicode(raw_diff),
461 'raw_diff': safe_unicode(raw_diff),
454 'operation': op,
462 'operation': op,
455 'stats': stats,
463 'stats': stats,
456 'exceeds_limit': exceeds_limit,
464 'exceeds_limit': exceeds_limit,
457 'is_limited_diff': limited_diff,
465 'is_limited_diff': limited_diff,
458 })
466 })
459
467
460 sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
468 sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
461 OPS.DEL: 2}.get(info['operation'])
469 OPS.DEL: 2}.get(info['operation'])
462
470
463 if not inline_diff:
471 if not inline_diff:
464 return diff_container(sorted(_files, key=sorter))
472 return diff_container(sorted(_files, key=sorter))
465
473
466 # highlight inline changes
474 # highlight inline changes
467 for diff_data in _files:
475 for diff_data in _files:
468 for chunk in diff_data['chunks']:
476 for chunk in diff_data['chunks']:
469 lineiter = iter(chunk)
477 lineiter = iter(chunk)
470 try:
478 try:
471 while 1:
479 while 1:
472 line = lineiter.next()
480 line = lineiter.next()
473 if line['action'] not in (
481 if line['action'] not in (
474 Action.UNMODIFIED, Action.CONTEXT):
482 Action.UNMODIFIED, Action.CONTEXT):
475 nextline = lineiter.next()
483 nextline = lineiter.next()
476 if nextline['action'] in ['unmod', 'context'] or \
484 if nextline['action'] in ['unmod', 'context'] or \
477 nextline['action'] == line['action']:
485 nextline['action'] == line['action']:
478 continue
486 continue
479 self.differ(line, nextline)
487 self.differ(line, nextline)
480 except StopIteration:
488 except StopIteration:
481 pass
489 pass
482
490
483 return diff_container(sorted(_files, key=sorter))
491 return diff_container(sorted(_files, key=sorter))
484
492
485 def _parse_udiff(self, inline_diff=True):
493 def _parse_udiff(self, inline_diff=True):
486 raise NotImplementedError()
494 raise NotImplementedError()
487
495
488 def _parse_lines(self, diff):
496 def _parse_lines(self, diff):
489 """
497 """
490 Parse the diff an return data for the template.
498 Parse the diff an return data for the template.
491 """
499 """
492
500
493 lineiter = iter(diff)
501 lineiter = iter(diff)
494 stats = [0, 0]
502 stats = [0, 0]
495 chunks = []
503 chunks = []
496 raw_diff = []
504 raw_diff = []
497
505
498 try:
506 try:
499 line = lineiter.next()
507 line = lineiter.next()
500
508
501 while line:
509 while line:
502 raw_diff.append(line)
510 raw_diff.append(line)
503 lines = []
511 lines = []
504 chunks.append(lines)
512 chunks.append(lines)
505
513
506 match = self._chunk_re.match(line)
514 match = self._chunk_re.match(line)
507
515
508 if not match:
516 if not match:
509 break
517 break
510
518
511 gr = match.groups()
519 gr = match.groups()
512 (old_line, old_end,
520 (old_line, old_end,
513 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
521 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
514 old_line -= 1
522 old_line -= 1
515 new_line -= 1
523 new_line -= 1
516
524
517 context = len(gr) == 5
525 context = len(gr) == 5
518 old_end += old_line
526 old_end += old_line
519 new_end += new_line
527 new_end += new_line
520
528
521 if context:
529 if context:
522 # skip context only if it's first line
530 # skip context only if it's first line
523 if int(gr[0]) > 1:
531 if int(gr[0]) > 1:
524 lines.append({
532 lines.append({
525 'old_lineno': '...',
533 'old_lineno': '...',
526 'new_lineno': '...',
534 'new_lineno': '...',
527 'action': Action.CONTEXT,
535 'action': Action.CONTEXT,
528 'line': line,
536 'line': line,
529 })
537 })
530
538
531 line = lineiter.next()
539 line = lineiter.next()
532
540
533 while old_line < old_end or new_line < new_end:
541 while old_line < old_end or new_line < new_end:
534 command = ' '
542 command = ' '
535 if line:
543 if line:
536 command = line[0]
544 command = line[0]
537
545
538 affects_old = affects_new = False
546 affects_old = affects_new = False
539
547
540 # ignore those if we don't expect them
548 # ignore those if we don't expect them
541 if command in '#@':
549 if command in '#@':
542 continue
550 continue
543 elif command == '+':
551 elif command == '+':
544 affects_new = True
552 affects_new = True
545 action = Action.ADD
553 action = Action.ADD
546 stats[0] += 1
554 stats[0] += 1
547 elif command == '-':
555 elif command == '-':
548 affects_old = True
556 affects_old = True
549 action = Action.DELETE
557 action = Action.DELETE
550 stats[1] += 1
558 stats[1] += 1
551 else:
559 else:
552 affects_old = affects_new = True
560 affects_old = affects_new = True
553 action = Action.UNMODIFIED
561 action = Action.UNMODIFIED
554
562
555 if not self._newline_marker.match(line):
563 if not self._newline_marker.match(line):
556 old_line += affects_old
564 old_line += affects_old
557 new_line += affects_new
565 new_line += affects_new
558 lines.append({
566 lines.append({
559 'old_lineno': affects_old and old_line or '',
567 'old_lineno': affects_old and old_line or '',
560 'new_lineno': affects_new and new_line or '',
568 'new_lineno': affects_new and new_line or '',
561 'action': action,
569 'action': action,
562 'line': self._clean_line(line, command)
570 'line': self._clean_line(line, command)
563 })
571 })
564 raw_diff.append(line)
572 raw_diff.append(line)
565
573
566 line = lineiter.next()
574 line = lineiter.next()
567
575
568 if self._newline_marker.match(line):
576 if self._newline_marker.match(line):
569 # we need to append to lines, since this is not
577 # we need to append to lines, since this is not
570 # counted in the line specs of diff
578 # counted in the line specs of diff
571 lines.append({
579 lines.append({
572 'old_lineno': '...',
580 'old_lineno': '...',
573 'new_lineno': '...',
581 'new_lineno': '...',
574 'action': Action.CONTEXT,
582 'action': Action.CONTEXT,
575 'line': self._clean_line(line, command)
583 'line': self._clean_line(line, command)
576 })
584 })
577
585
578 except StopIteration:
586 except StopIteration:
579 pass
587 pass
580 return ''.join(raw_diff), chunks, stats
588 return ''.join(raw_diff), chunks, stats
581
589
582 def _safe_id(self, idstring):
590 def _safe_id(self, idstring):
583 """Make a string safe for including in an id attribute.
591 """Make a string safe for including in an id attribute.
584
592
585 The HTML spec says that id attributes 'must begin with
593 The HTML spec says that id attributes 'must begin with
586 a letter ([A-Za-z]) and may be followed by any number
594 a letter ([A-Za-z]) and may be followed by any number
587 of letters, digits ([0-9]), hyphens ("-"), underscores
595 of letters, digits ([0-9]), hyphens ("-"), underscores
588 ("_"), colons (":"), and periods (".")'. These regexps
596 ("_"), colons (":"), and periods (".")'. These regexps
589 are slightly over-zealous, in that they remove colons
597 are slightly over-zealous, in that they remove colons
590 and periods unnecessarily.
598 and periods unnecessarily.
591
599
592 Whitespace is transformed into underscores, and then
600 Whitespace is transformed into underscores, and then
593 anything which is not a hyphen or a character that
601 anything which is not a hyphen or a character that
594 matches \w (alphanumerics and underscore) is removed.
602 matches \w (alphanumerics and underscore) is removed.
595
603
596 """
604 """
597 # Transform all whitespace to underscore
605 # Transform all whitespace to underscore
598 idstring = re.sub(r'\s', "_", '%s' % idstring)
606 idstring = re.sub(r'\s', "_", '%s' % idstring)
599 # Remove everything that is not a hyphen or a member of \w
607 # Remove everything that is not a hyphen or a member of \w
600 idstring = re.sub(r'(?!-)\W', "", idstring).lower()
608 idstring = re.sub(r'(?!-)\W', "", idstring).lower()
601 return idstring
609 return idstring
602
610
603 def prepare(self, inline_diff=True):
611 def prepare(self, inline_diff=True):
604 """
612 """
605 Prepare the passed udiff for HTML rendering.
613 Prepare the passed udiff for HTML rendering.
606
614
607 :return: A list of dicts with diff information.
615 :return: A list of dicts with diff information.
608 """
616 """
609 parsed = self._parser(inline_diff=inline_diff)
617 parsed = self._parser(inline_diff=inline_diff)
610 self.parsed = True
618 self.parsed = True
611 self.parsed_diff = parsed
619 self.parsed_diff = parsed
612 return parsed
620 return parsed
613
621
614 def as_raw(self, diff_lines=None):
622 def as_raw(self, diff_lines=None):
615 """
623 """
616 Returns raw diff as a byte string
624 Returns raw diff as a byte string
617 """
625 """
618 return self._diff.raw
626 return self._diff.raw
619
627
620 def as_html(self, table_class='code-difftable', line_class='line',
628 def as_html(self, table_class='code-difftable', line_class='line',
621 old_lineno_class='lineno old', new_lineno_class='lineno new',
629 old_lineno_class='lineno old', new_lineno_class='lineno new',
622 code_class='code', enable_comments=False, parsed_lines=None):
630 code_class='code', enable_comments=False, parsed_lines=None):
623 """
631 """
624 Return given diff as html table with customized css classes
632 Return given diff as html table with customized css classes
625 """
633 """
626 def _link_to_if(condition, label, url):
634 def _link_to_if(condition, label, url):
627 """
635 """
628 Generates a link if condition is meet or just the label if not.
636 Generates a link if condition is meet or just the label if not.
629 """
637 """
630
638
631 if condition:
639 if condition:
632 return '''<a href="%(url)s" class="tooltip"
640 return '''<a href="%(url)s" class="tooltip"
633 title="%(title)s">%(label)s</a>''' % {
641 title="%(title)s">%(label)s</a>''' % {
634 'title': _('Click to select line'),
642 'title': _('Click to select line'),
635 'url': url,
643 'url': url,
636 'label': label
644 'label': label
637 }
645 }
638 else:
646 else:
639 return label
647 return label
640 if not self.parsed:
648 if not self.parsed:
641 self.prepare()
649 self.prepare()
642
650
643 diff_lines = self.parsed_diff
651 diff_lines = self.parsed_diff
644 if parsed_lines:
652 if parsed_lines:
645 diff_lines = parsed_lines
653 diff_lines = parsed_lines
646
654
647 _html_empty = True
655 _html_empty = True
648 _html = []
656 _html = []
649 _html.append('''<table class="%(table_class)s">\n''' % {
657 _html.append('''<table class="%(table_class)s">\n''' % {
650 'table_class': table_class
658 'table_class': table_class
651 })
659 })
652
660
653 for diff in diff_lines:
661 for diff in diff_lines:
654 for line in diff['chunks']:
662 for line in diff['chunks']:
655 _html_empty = False
663 _html_empty = False
656 for change in line:
664 for change in line:
657 _html.append('''<tr class="%(lc)s %(action)s">\n''' % {
665 _html.append('''<tr class="%(lc)s %(action)s">\n''' % {
658 'lc': line_class,
666 'lc': line_class,
659 'action': change['action']
667 'action': change['action']
660 })
668 })
661 anchor_old_id = ''
669 anchor_old_id = ''
662 anchor_new_id = ''
670 anchor_new_id = ''
663 anchor_old = "%(filename)s_o%(oldline_no)s" % {
671 anchor_old = "%(filename)s_o%(oldline_no)s" % {
664 'filename': self._safe_id(diff['filename']),
672 'filename': self._safe_id(diff['filename']),
665 'oldline_no': change['old_lineno']
673 'oldline_no': change['old_lineno']
666 }
674 }
667 anchor_new = "%(filename)s_n%(oldline_no)s" % {
675 anchor_new = "%(filename)s_n%(oldline_no)s" % {
668 'filename': self._safe_id(diff['filename']),
676 'filename': self._safe_id(diff['filename']),
669 'oldline_no': change['new_lineno']
677 'oldline_no': change['new_lineno']
670 }
678 }
671 cond_old = (change['old_lineno'] != '...' and
679 cond_old = (change['old_lineno'] != '...' and
672 change['old_lineno'])
680 change['old_lineno'])
673 cond_new = (change['new_lineno'] != '...' and
681 cond_new = (change['new_lineno'] != '...' and
674 change['new_lineno'])
682 change['new_lineno'])
675 if cond_old:
683 if cond_old:
676 anchor_old_id = 'id="%s"' % anchor_old
684 anchor_old_id = 'id="%s"' % anchor_old
677 if cond_new:
685 if cond_new:
678 anchor_new_id = 'id="%s"' % anchor_new
686 anchor_new_id = 'id="%s"' % anchor_new
679
687
680 if change['action'] != Action.CONTEXT:
688 if change['action'] != Action.CONTEXT:
681 anchor_link = True
689 anchor_link = True
682 else:
690 else:
683 anchor_link = False
691 anchor_link = False
684
692
685 ###########################################################
693 ###########################################################
686 # COMMENT ICON
694 # COMMENT ICON
687 ###########################################################
695 ###########################################################
688 _html.append('''\t<td class="add-comment-line"><span class="add-comment-content">''')
696 _html.append('''\t<td class="add-comment-line"><span class="add-comment-content">''')
689
697
690 if enable_comments and change['action'] != Action.CONTEXT:
698 if enable_comments and change['action'] != Action.CONTEXT:
691 _html.append('''<a href="#"><span class="icon-comment-add"></span></a>''')
699 _html.append('''<a href="#"><span class="icon-comment-add"></span></a>''')
692
700
693 _html.append('''</span></td>\n''')
701 _html.append('''</span></td>\n''')
694
702
695 ###########################################################
703 ###########################################################
696 # OLD LINE NUMBER
704 # OLD LINE NUMBER
697 ###########################################################
705 ###########################################################
698 _html.append('''\t<td %(a_id)s class="%(olc)s">''' % {
706 _html.append('''\t<td %(a_id)s class="%(olc)s">''' % {
699 'a_id': anchor_old_id,
707 'a_id': anchor_old_id,
700 'olc': old_lineno_class
708 'olc': old_lineno_class
701 })
709 })
702
710
703 _html.append('''%(link)s''' % {
711 _html.append('''%(link)s''' % {
704 'link': _link_to_if(anchor_link, change['old_lineno'],
712 'link': _link_to_if(anchor_link, change['old_lineno'],
705 '#%s' % anchor_old)
713 '#%s' % anchor_old)
706 })
714 })
707 _html.append('''</td>\n''')
715 _html.append('''</td>\n''')
708 ###########################################################
716 ###########################################################
709 # NEW LINE NUMBER
717 # NEW LINE NUMBER
710 ###########################################################
718 ###########################################################
711
719
712 _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
720 _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
713 'a_id': anchor_new_id,
721 'a_id': anchor_new_id,
714 'nlc': new_lineno_class
722 'nlc': new_lineno_class
715 })
723 })
716
724
717 _html.append('''%(link)s''' % {
725 _html.append('''%(link)s''' % {
718 'link': _link_to_if(anchor_link, change['new_lineno'],
726 'link': _link_to_if(anchor_link, change['new_lineno'],
719 '#%s' % anchor_new)
727 '#%s' % anchor_new)
720 })
728 })
721 _html.append('''</td>\n''')
729 _html.append('''</td>\n''')
722 ###########################################################
730 ###########################################################
723 # CODE
731 # CODE
724 ###########################################################
732 ###########################################################
725 code_classes = [code_class]
733 code_classes = [code_class]
726 if (not enable_comments or
734 if (not enable_comments or
727 change['action'] == Action.CONTEXT):
735 change['action'] == Action.CONTEXT):
728 code_classes.append('no-comment')
736 code_classes.append('no-comment')
729 _html.append('\t<td class="%s">' % ' '.join(code_classes))
737 _html.append('\t<td class="%s">' % ' '.join(code_classes))
730 _html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {
738 _html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {
731 'code': change['line']
739 'code': change['line']
732 })
740 })
733
741
734 _html.append('''\t</td>''')
742 _html.append('''\t</td>''')
735 _html.append('''\n</tr>\n''')
743 _html.append('''\n</tr>\n''')
736 _html.append('''</table>''')
744 _html.append('''</table>''')
737 if _html_empty:
745 if _html_empty:
738 return None
746 return None
739 return ''.join(_html)
747 return ''.join(_html)
740
748
741 def stat(self):
749 def stat(self):
742 """
750 """
743 Returns tuple of added, and removed lines for this instance
751 Returns tuple of added, and removed lines for this instance
744 """
752 """
745 return self.adds, self.removes
753 return self.adds, self.removes
746
754
747 def get_context_of_line(
755 def get_context_of_line(
748 self, path, diff_line=None, context_before=3, context_after=3):
756 self, path, diff_line=None, context_before=3, context_after=3):
749 """
757 """
750 Returns the context lines for the specified diff line.
758 Returns the context lines for the specified diff line.
751
759
752 :type diff_line: :class:`DiffLineNumber`
760 :type diff_line: :class:`DiffLineNumber`
753 """
761 """
754 assert self.parsed, "DiffProcessor is not initialized."
762 assert self.parsed, "DiffProcessor is not initialized."
755
763
756 if None not in diff_line:
764 if None not in diff_line:
757 raise ValueError(
765 raise ValueError(
758 "Cannot specify both line numbers: {}".format(diff_line))
766 "Cannot specify both line numbers: {}".format(diff_line))
759
767
760 file_diff = self._get_file_diff(path)
768 file_diff = self._get_file_diff(path)
761 chunk, idx = self._find_chunk_line_index(file_diff, diff_line)
769 chunk, idx = self._find_chunk_line_index(file_diff, diff_line)
762
770
763 first_line_to_include = max(idx - context_before, 0)
771 first_line_to_include = max(idx - context_before, 0)
764 first_line_after_context = idx + context_after + 1
772 first_line_after_context = idx + context_after + 1
765 context_lines = chunk[first_line_to_include:first_line_after_context]
773 context_lines = chunk[first_line_to_include:first_line_after_context]
766
774
767 line_contents = [
775 line_contents = [
768 _context_line(line) for line in context_lines
776 _context_line(line) for line in context_lines
769 if _is_diff_content(line)]
777 if _is_diff_content(line)]
770 # TODO: johbo: Interim fixup, the diff chunks drop the final newline.
778 # TODO: johbo: Interim fixup, the diff chunks drop the final newline.
771 # Once they are fixed, we can drop this line here.
779 # Once they are fixed, we can drop this line here.
772 if line_contents:
780 if line_contents:
773 line_contents[-1] = (
781 line_contents[-1] = (
774 line_contents[-1][0], line_contents[-1][1].rstrip('\n') + '\n')
782 line_contents[-1][0], line_contents[-1][1].rstrip('\n') + '\n')
775 return line_contents
783 return line_contents
776
784
777 def find_context(self, path, context, offset=0):
785 def find_context(self, path, context, offset=0):
778 """
786 """
779 Finds the given `context` inside of the diff.
787 Finds the given `context` inside of the diff.
780
788
781 Use the parameter `offset` to specify which offset the target line has
789 Use the parameter `offset` to specify which offset the target line has
782 inside of the given `context`. This way the correct diff line will be
790 inside of the given `context`. This way the correct diff line will be
783 returned.
791 returned.
784
792
785 :param offset: Shall be used to specify the offset of the main line
793 :param offset: Shall be used to specify the offset of the main line
786 within the given `context`.
794 within the given `context`.
787 """
795 """
788 if offset < 0 or offset >= len(context):
796 if offset < 0 or offset >= len(context):
789 raise ValueError(
797 raise ValueError(
790 "Only positive values up to the length of the context "
798 "Only positive values up to the length of the context "
791 "minus one are allowed.")
799 "minus one are allowed.")
792
800
793 matches = []
801 matches = []
794 file_diff = self._get_file_diff(path)
802 file_diff = self._get_file_diff(path)
795
803
796 for chunk in file_diff['chunks']:
804 for chunk in file_diff['chunks']:
797 context_iter = iter(context)
805 context_iter = iter(context)
798 for line_idx, line in enumerate(chunk):
806 for line_idx, line in enumerate(chunk):
799 try:
807 try:
800 if _context_line(line) == context_iter.next():
808 if _context_line(line) == context_iter.next():
801 continue
809 continue
802 except StopIteration:
810 except StopIteration:
803 matches.append((line_idx, chunk))
811 matches.append((line_idx, chunk))
804 context_iter = iter(context)
812 context_iter = iter(context)
805
813
806 # Increment position and triger StopIteration
814 # Increment position and triger StopIteration
807 # if we had a match at the end
815 # if we had a match at the end
808 line_idx += 1
816 line_idx += 1
809 try:
817 try:
810 context_iter.next()
818 context_iter.next()
811 except StopIteration:
819 except StopIteration:
812 matches.append((line_idx, chunk))
820 matches.append((line_idx, chunk))
813
821
814 effective_offset = len(context) - offset
822 effective_offset = len(context) - offset
815 found_at_diff_lines = [
823 found_at_diff_lines = [
816 _line_to_diff_line_number(chunk[idx - effective_offset])
824 _line_to_diff_line_number(chunk[idx - effective_offset])
817 for idx, chunk in matches]
825 for idx, chunk in matches]
818
826
819 return found_at_diff_lines
827 return found_at_diff_lines
820
828
821 def _get_file_diff(self, path):
829 def _get_file_diff(self, path):
822 for file_diff in self.parsed_diff:
830 for file_diff in self.parsed_diff:
823 if file_diff['filename'] == path:
831 if file_diff['filename'] == path:
824 break
832 break
825 else:
833 else:
826 raise FileNotInDiffException("File {} not in diff".format(path))
834 raise FileNotInDiffException("File {} not in diff".format(path))
827 return file_diff
835 return file_diff
828
836
829 def _find_chunk_line_index(self, file_diff, diff_line):
837 def _find_chunk_line_index(self, file_diff, diff_line):
830 for chunk in file_diff['chunks']:
838 for chunk in file_diff['chunks']:
831 for idx, line in enumerate(chunk):
839 for idx, line in enumerate(chunk):
832 if line['old_lineno'] == diff_line.old:
840 if line['old_lineno'] == diff_line.old:
833 return chunk, idx
841 return chunk, idx
834 if line['new_lineno'] == diff_line.new:
842 if line['new_lineno'] == diff_line.new:
835 return chunk, idx
843 return chunk, idx
836 raise LineNotInDiffException(
844 raise LineNotInDiffException(
837 "The line {} is not part of the diff.".format(diff_line))
845 "The line {} is not part of the diff.".format(diff_line))
838
846
839
847
840 def _is_diff_content(line):
848 def _is_diff_content(line):
841 return line['action'] in (
849 return line['action'] in (
842 Action.UNMODIFIED, Action.ADD, Action.DELETE)
850 Action.UNMODIFIED, Action.ADD, Action.DELETE)
843
851
844
852
845 def _context_line(line):
853 def _context_line(line):
846 return (line['action'], line['line'])
854 return (line['action'], line['line'])
847
855
848
856
849 DiffLineNumber = collections.namedtuple('DiffLineNumber', ['old', 'new'])
857 DiffLineNumber = collections.namedtuple('DiffLineNumber', ['old', 'new'])
850
858
851
859
852 def _line_to_diff_line_number(line):
860 def _line_to_diff_line_number(line):
853 new_line_no = line['new_lineno'] or None
861 new_line_no = line['new_lineno'] or None
854 old_line_no = line['old_lineno'] or None
862 old_line_no = line['old_lineno'] or None
855 return DiffLineNumber(old=old_line_no, new=new_line_no)
863 return DiffLineNumber(old=old_line_no, new=new_line_no)
856
864
857
865
858 class FileNotInDiffException(Exception):
866 class FileNotInDiffException(Exception):
859 """
867 """
860 Raised when the context for a missing file is requested.
868 Raised when the context for a missing file is requested.
861
869
862 If you request the context for a line in a file which is not part of the
870 If you request the context for a line in a file which is not part of the
863 given diff, then this exception is raised.
871 given diff, then this exception is raised.
864 """
872 """
865
873
866
874
867 class LineNotInDiffException(Exception):
875 class LineNotInDiffException(Exception):
868 """
876 """
869 Raised when the context for a missing line is requested.
877 Raised when the context for a missing line is requested.
870
878
871 If you request the context for a line in a file and this line is not
879 If you request the context for a line in a file and this line is not
872 part of the given diff, then this exception is raised.
880 part of the given diff, then this exception is raised.
873 """
881 """
874
882
875
883
876 class DiffLimitExceeded(Exception):
884 class DiffLimitExceeded(Exception):
877 pass
885 pass
General Comments 0
You need to be logged in to leave comments. Login now