##// END OF EJS Templates
diff parser: more correct detection and reporting of binary git diffs...
Mads Kiilerich -
r3818:0d22458b beta
parent child Browse files
Show More
@@ -1,684 +1,688 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2 """
2 """
3 rhodecode.lib.diffs
3 rhodecode.lib.diffs
4 ~~~~~~~~~~~~~~~~~~~
4 ~~~~~~~~~~~~~~~~~~~
5
5
6 Set of diffing helpers, previously part of vcs
6 Set of diffing helpers, previously part of vcs
7
7
8
8
9 :created_on: Dec 4, 2011
9 :created_on: Dec 4, 2011
10 :author: marcink
10 :author: marcink
11 :copyright: (C) 2010-2012 Marcin Kuzminski <marcin@python-works.com>
11 :copyright: (C) 2010-2012 Marcin Kuzminski <marcin@python-works.com>
12 :original copyright: 2007-2008 by Armin Ronacher
12 :original copyright: 2007-2008 by Armin Ronacher
13 :license: GPLv3, see COPYING for more details.
13 :license: GPLv3, see COPYING for more details.
14 """
14 """
15 # This program is free software: you can redistribute it and/or modify
15 # This program is free software: you can redistribute it and/or modify
16 # it under the terms of the GNU General Public License as published by
16 # it under the terms of the GNU General Public License as published by
17 # the Free Software Foundation, either version 3 of the License, or
17 # the Free Software Foundation, either version 3 of the License, or
18 # (at your option) any later version.
18 # (at your option) any later version.
19 #
19 #
20 # This program is distributed in the hope that it will be useful,
20 # This program is distributed in the hope that it will be useful,
21 # but WITHOUT ANY WARRANTY; without even the implied warranty of
21 # but WITHOUT ANY WARRANTY; without even the implied warranty of
22 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 # GNU General Public License for more details.
23 # GNU General Public License for more details.
24 #
24 #
25 # You should have received a copy of the GNU General Public License
25 # You should have received a copy of the GNU General Public License
26 # along with this program. If not, see <http://www.gnu.org/licenses/>.
26 # along with this program. If not, see <http://www.gnu.org/licenses/>.
27
27
28 import re
28 import re
29 import difflib
29 import difflib
30 import logging
30 import logging
31
31
32 from itertools import tee, imap
32 from itertools import tee, imap
33
33
34 from pylons.i18n.translation import _
34 from pylons.i18n.translation import _
35
35
36 from rhodecode.lib.vcs.exceptions import VCSError
36 from rhodecode.lib.vcs.exceptions import VCSError
37 from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode
37 from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode
38 from rhodecode.lib.vcs.backends.base import EmptyChangeset
38 from rhodecode.lib.vcs.backends.base import EmptyChangeset
39 from rhodecode.lib.helpers import escape
39 from rhodecode.lib.helpers import escape
40 from rhodecode.lib.utils2 import safe_unicode, safe_str
40 from rhodecode.lib.utils2 import safe_unicode, safe_str
41
41
42 log = logging.getLogger(__name__)
42 log = logging.getLogger(__name__)
43
43
44
44
45 def wrap_to_table(str_):
45 def wrap_to_table(str_):
46 return '''<table class="code-difftable">
46 return '''<table class="code-difftable">
47 <tr class="line no-comment">
47 <tr class="line no-comment">
48 <td class="lineno new"></td>
48 <td class="lineno new"></td>
49 <td class="code no-comment"><pre>%s</pre></td>
49 <td class="code no-comment"><pre>%s</pre></td>
50 </tr>
50 </tr>
51 </table>''' % str_
51 </table>''' % str_
52
52
53
53
54 def wrapped_diff(filenode_old, filenode_new, cut_off_limit=None,
54 def wrapped_diff(filenode_old, filenode_new, cut_off_limit=None,
55 ignore_whitespace=True, line_context=3,
55 ignore_whitespace=True, line_context=3,
56 enable_comments=False):
56 enable_comments=False):
57 """
57 """
58 returns a wrapped diff into a table, checks for cut_off_limit and presents
58 returns a wrapped diff into a table, checks for cut_off_limit and presents
59 proper message
59 proper message
60 """
60 """
61
61
62 if filenode_old is None:
62 if filenode_old is None:
63 filenode_old = FileNode(filenode_new.path, '', EmptyChangeset())
63 filenode_old = FileNode(filenode_new.path, '', EmptyChangeset())
64
64
65 if filenode_old.is_binary or filenode_new.is_binary:
65 if filenode_old.is_binary or filenode_new.is_binary:
66 diff = wrap_to_table(_('Binary file'))
66 diff = wrap_to_table(_('Binary file'))
67 stats = (0, 0)
67 stats = (0, 0)
68 size = 0
68 size = 0
69
69
70 elif cut_off_limit != -1 and (cut_off_limit is None or
70 elif cut_off_limit != -1 and (cut_off_limit is None or
71 (filenode_old.size < cut_off_limit and filenode_new.size < cut_off_limit)):
71 (filenode_old.size < cut_off_limit and filenode_new.size < cut_off_limit)):
72
72
73 f_gitdiff = get_gitdiff(filenode_old, filenode_new,
73 f_gitdiff = get_gitdiff(filenode_old, filenode_new,
74 ignore_whitespace=ignore_whitespace,
74 ignore_whitespace=ignore_whitespace,
75 context=line_context)
75 context=line_context)
76 diff_processor = DiffProcessor(f_gitdiff, format='gitdiff')
76 diff_processor = DiffProcessor(f_gitdiff, format='gitdiff')
77
77
78 diff = diff_processor.as_html(enable_comments=enable_comments)
78 diff = diff_processor.as_html(enable_comments=enable_comments)
79 stats = diff_processor.stat()
79 stats = diff_processor.stat()
80 size = len(diff or '')
80 size = len(diff or '')
81 else:
81 else:
82 diff = wrap_to_table(_('Changeset was too big and was cut off, use '
82 diff = wrap_to_table(_('Changeset was too big and was cut off, use '
83 'diff menu to display this diff'))
83 'diff menu to display this diff'))
84 stats = (0, 0)
84 stats = (0, 0)
85 size = 0
85 size = 0
86 if not diff:
86 if not diff:
87 submodules = filter(lambda o: isinstance(o, SubModuleNode),
87 submodules = filter(lambda o: isinstance(o, SubModuleNode),
88 [filenode_new, filenode_old])
88 [filenode_new, filenode_old])
89 if submodules:
89 if submodules:
90 diff = wrap_to_table(escape('Submodule %r' % submodules[0]))
90 diff = wrap_to_table(escape('Submodule %r' % submodules[0]))
91 else:
91 else:
92 diff = wrap_to_table(_('No changes detected'))
92 diff = wrap_to_table(_('No changes detected'))
93
93
94 cs1 = filenode_old.changeset.raw_id
94 cs1 = filenode_old.changeset.raw_id
95 cs2 = filenode_new.changeset.raw_id
95 cs2 = filenode_new.changeset.raw_id
96
96
97 return size, cs1, cs2, diff, stats
97 return size, cs1, cs2, diff, stats
98
98
99
99
100 def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
100 def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
101 """
101 """
102 Returns git style diff between given ``filenode_old`` and ``filenode_new``.
102 Returns git style diff between given ``filenode_old`` and ``filenode_new``.
103
103
104 :param ignore_whitespace: ignore whitespaces in diff
104 :param ignore_whitespace: ignore whitespaces in diff
105 """
105 """
106 # make sure we pass in default context
106 # make sure we pass in default context
107 context = context or 3
107 context = context or 3
108 submodules = filter(lambda o: isinstance(o, SubModuleNode),
108 submodules = filter(lambda o: isinstance(o, SubModuleNode),
109 [filenode_new, filenode_old])
109 [filenode_new, filenode_old])
110 if submodules:
110 if submodules:
111 return ''
111 return ''
112
112
113 for filenode in (filenode_old, filenode_new):
113 for filenode in (filenode_old, filenode_new):
114 if not isinstance(filenode, FileNode):
114 if not isinstance(filenode, FileNode):
115 raise VCSError("Given object should be FileNode object, not %s"
115 raise VCSError("Given object should be FileNode object, not %s"
116 % filenode.__class__)
116 % filenode.__class__)
117
117
118 repo = filenode_new.changeset.repository
118 repo = filenode_new.changeset.repository
119 old_raw_id = getattr(filenode_old.changeset, 'raw_id', repo.EMPTY_CHANGESET)
119 old_raw_id = getattr(filenode_old.changeset, 'raw_id', repo.EMPTY_CHANGESET)
120 new_raw_id = getattr(filenode_new.changeset, 'raw_id', repo.EMPTY_CHANGESET)
120 new_raw_id = getattr(filenode_new.changeset, 'raw_id', repo.EMPTY_CHANGESET)
121
121
122 vcs_gitdiff = repo.get_diff(old_raw_id, new_raw_id, filenode_new.path,
122 vcs_gitdiff = repo.get_diff(old_raw_id, new_raw_id, filenode_new.path,
123 ignore_whitespace, context)
123 ignore_whitespace, context)
124 return vcs_gitdiff
124 return vcs_gitdiff
125
125
126 NEW_FILENODE = 1
126 NEW_FILENODE = 1
127 DEL_FILENODE = 2
127 DEL_FILENODE = 2
128 MOD_FILENODE = 3
128 MOD_FILENODE = 3
129 RENAMED_FILENODE = 4
129 RENAMED_FILENODE = 4
130 CHMOD_FILENODE = 5
130 CHMOD_FILENODE = 5
131
131
132
132
133 class DiffLimitExceeded(Exception):
133 class DiffLimitExceeded(Exception):
134 pass
134 pass
135
135
136
136
137 class LimitedDiffContainer(object):
137 class LimitedDiffContainer(object):
138
138
139 def __init__(self, diff_limit, cur_diff_size, diff):
139 def __init__(self, diff_limit, cur_diff_size, diff):
140 self.diff = diff
140 self.diff = diff
141 self.diff_limit = diff_limit
141 self.diff_limit = diff_limit
142 self.cur_diff_size = cur_diff_size
142 self.cur_diff_size = cur_diff_size
143
143
144 def __iter__(self):
144 def __iter__(self):
145 for l in self.diff:
145 for l in self.diff:
146 yield l
146 yield l
147
147
148
148
149 class DiffProcessor(object):
149 class DiffProcessor(object):
150 """
150 """
151 Give it a unified or git diff and it returns a list of the files that were
151 Give it a unified or git diff and it returns a list of the files that were
152 mentioned in the diff together with a dict of meta information that
152 mentioned in the diff together with a dict of meta information that
153 can be used to render it in a HTML template.
153 can be used to render it in a HTML template.
154 """
154 """
155 _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
155 _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
156 _newline_marker = re.compile(r'^\\ No newline at end of file')
156 _newline_marker = re.compile(r'^\\ No newline at end of file')
157 _git_header_re = re.compile(r"""
157 _git_header_re = re.compile(r"""
158 #^diff[ ]--git
158 #^diff[ ]--git
159 [ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n
159 [ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n
160 (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%\n
160 (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%\n
161 ^rename[ ]from[ ](?P<rename_from>\S+)\n
161 ^rename[ ]from[ ](?P<rename_from>\S+)\n
162 ^rename[ ]to[ ](?P<rename_to>\S+)(?:\n|$))?
162 ^rename[ ]to[ ](?P<rename_to>\S+)(?:\n|$))?
163 (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
163 (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
164 ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
164 ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
165 (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
165 (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
166 (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
166 (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
167 (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
167 (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
168 \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
168 \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
169 (?:^---[ ](a/(?P<a_file>.+)|/dev/null)(?:\n|$))?
169 (?:^---[ ](a/(?P<a_file>.+)|/dev/null)(?:\n|$))?
170 (?:^\+\+\+[ ](b/(?P<b_file>.+)|/dev/null)(?:\n|$))?
170 (?:^\+\+\+[ ](b/(?P<b_file>.+)|/dev/null)(?:\n|$))?
171 """, re.VERBOSE | re.MULTILINE)
171 """, re.VERBOSE | re.MULTILINE)
172 _hg_header_re = re.compile(r"""
172 _hg_header_re = re.compile(r"""
173 #^diff[ ]--git
173 #^diff[ ]--git
174 [ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n
174 [ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n
175 (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
175 (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
176 ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
176 ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
177 (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%(?:\n|$))?
177 (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%(?:\n|$))?
178 (?:^rename[ ]from[ ](?P<rename_from>\S+)\n
178 (?:^rename[ ]from[ ](?P<rename_from>\S+)\n
179 ^rename[ ]to[ ](?P<rename_to>\S+)(?:\n|$))?
179 ^rename[ ]to[ ](?P<rename_to>\S+)(?:\n|$))?
180 (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
180 (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
181 (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
181 (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
182 (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
182 (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
183 \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
183 \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
184 (?:^---[ ](a/(?P<a_file>.+)|/dev/null)(?:\n|$))?
184 (?:^---[ ](a/(?P<a_file>.+)|/dev/null)(?:\n|$))?
185 (?:^\+\+\+[ ](b/(?P<b_file>.+)|/dev/null)(?:\n|$))?
185 (?:^\+\+\+[ ](b/(?P<b_file>.+)|/dev/null)(?:\n|$))?
186 """, re.VERBOSE | re.MULTILINE)
186 """, re.VERBOSE | re.MULTILINE)
187
187
188 #used for inline highlighter word split
188 #used for inline highlighter word split
189 _token_re = re.compile(r'()(&gt;|&lt;|&amp;|\W+?)')
189 _token_re = re.compile(r'()(&gt;|&lt;|&amp;|\W+?)')
190
190
191 def __init__(self, diff, vcs='hg', format='gitdiff', diff_limit=None):
191 def __init__(self, diff, vcs='hg', format='gitdiff', diff_limit=None):
192 """
192 """
193 :param diff: a text in diff format
193 :param diff: a text in diff format
194 :param vcs: type of version controll hg or git
194 :param vcs: type of version controll hg or git
195 :param format: format of diff passed, `udiff` or `gitdiff`
195 :param format: format of diff passed, `udiff` or `gitdiff`
196 :param diff_limit: define the size of diff that is considered "big"
196 :param diff_limit: define the size of diff that is considered "big"
197 based on that parameter cut off will be triggered, set to None
197 based on that parameter cut off will be triggered, set to None
198 to show full diff
198 to show full diff
199 """
199 """
200 if not isinstance(diff, basestring):
200 if not isinstance(diff, basestring):
201 raise Exception('Diff must be a basestring got %s instead' % type(diff))
201 raise Exception('Diff must be a basestring got %s instead' % type(diff))
202
202
203 self._diff = diff
203 self._diff = diff
204 self._format = format
204 self._format = format
205 self.adds = 0
205 self.adds = 0
206 self.removes = 0
206 self.removes = 0
207 # calculate diff size
207 # calculate diff size
208 self.diff_size = len(diff)
208 self.diff_size = len(diff)
209 self.diff_limit = diff_limit
209 self.diff_limit = diff_limit
210 self.cur_diff_size = 0
210 self.cur_diff_size = 0
211 self.parsed = False
211 self.parsed = False
212 self.parsed_diff = []
212 self.parsed_diff = []
213 self.vcs = vcs
213 self.vcs = vcs
214
214
215 if format == 'gitdiff':
215 if format == 'gitdiff':
216 self.differ = self._highlight_line_difflib
216 self.differ = self._highlight_line_difflib
217 self._parser = self._parse_gitdiff
217 self._parser = self._parse_gitdiff
218 else:
218 else:
219 self.differ = self._highlight_line_udiff
219 self.differ = self._highlight_line_udiff
220 self._parser = self._parse_udiff
220 self._parser = self._parse_udiff
221
221
222 def _copy_iterator(self):
222 def _copy_iterator(self):
223 """
223 """
224 make a fresh copy of generator, we should not iterate thru
224 make a fresh copy of generator, we should not iterate thru
225 an original as it's needed for repeating operations on
225 an original as it's needed for repeating operations on
226 this instance of DiffProcessor
226 this instance of DiffProcessor
227 """
227 """
228 self.__udiff, iterator_copy = tee(self.__udiff)
228 self.__udiff, iterator_copy = tee(self.__udiff)
229 return iterator_copy
229 return iterator_copy
230
230
231 def _escaper(self, string):
231 def _escaper(self, string):
232 """
232 """
233 Escaper for diff escapes special chars and checks the diff limit
233 Escaper for diff escapes special chars and checks the diff limit
234
234
235 :param string:
235 :param string:
236 :type string:
236 :type string:
237 """
237 """
238
238
239 self.cur_diff_size += len(string)
239 self.cur_diff_size += len(string)
240
240
241 # escaper get's iterated on each .next() call and it checks if each
241 # escaper get's iterated on each .next() call and it checks if each
242 # parsed line doesn't exceed the diff limit
242 # parsed line doesn't exceed the diff limit
243 if self.diff_limit is not None and self.cur_diff_size > self.diff_limit:
243 if self.diff_limit is not None and self.cur_diff_size > self.diff_limit:
244 raise DiffLimitExceeded('Diff Limit Exceeded')
244 raise DiffLimitExceeded('Diff Limit Exceeded')
245
245
246 return safe_unicode(string).replace('&', '&amp;')\
246 return safe_unicode(string).replace('&', '&amp;')\
247 .replace('<', '&lt;')\
247 .replace('<', '&lt;')\
248 .replace('>', '&gt;')
248 .replace('>', '&gt;')
249
249
250 def _line_counter(self, l):
250 def _line_counter(self, l):
251 """
251 """
252 Checks each line and bumps total adds/removes for this diff
252 Checks each line and bumps total adds/removes for this diff
253
253
254 :param l:
254 :param l:
255 """
255 """
256 if l.startswith('+') and not l.startswith('+++'):
256 if l.startswith('+') and not l.startswith('+++'):
257 self.adds += 1
257 self.adds += 1
258 elif l.startswith('-') and not l.startswith('---'):
258 elif l.startswith('-') and not l.startswith('---'):
259 self.removes += 1
259 self.removes += 1
260 return safe_unicode(l)
260 return safe_unicode(l)
261
261
262 def _highlight_line_difflib(self, line, next_):
262 def _highlight_line_difflib(self, line, next_):
263 """
263 """
264 Highlight inline changes in both lines.
264 Highlight inline changes in both lines.
265 """
265 """
266
266
267 if line['action'] == 'del':
267 if line['action'] == 'del':
268 old, new = line, next_
268 old, new = line, next_
269 else:
269 else:
270 old, new = next_, line
270 old, new = next_, line
271
271
272 oldwords = self._token_re.split(old['line'])
272 oldwords = self._token_re.split(old['line'])
273 newwords = self._token_re.split(new['line'])
273 newwords = self._token_re.split(new['line'])
274 sequence = difflib.SequenceMatcher(None, oldwords, newwords)
274 sequence = difflib.SequenceMatcher(None, oldwords, newwords)
275
275
276 oldfragments, newfragments = [], []
276 oldfragments, newfragments = [], []
277 for tag, i1, i2, j1, j2 in sequence.get_opcodes():
277 for tag, i1, i2, j1, j2 in sequence.get_opcodes():
278 oldfrag = ''.join(oldwords[i1:i2])
278 oldfrag = ''.join(oldwords[i1:i2])
279 newfrag = ''.join(newwords[j1:j2])
279 newfrag = ''.join(newwords[j1:j2])
280 if tag != 'equal':
280 if tag != 'equal':
281 if oldfrag:
281 if oldfrag:
282 oldfrag = '<del>%s</del>' % oldfrag
282 oldfrag = '<del>%s</del>' % oldfrag
283 if newfrag:
283 if newfrag:
284 newfrag = '<ins>%s</ins>' % newfrag
284 newfrag = '<ins>%s</ins>' % newfrag
285 oldfragments.append(oldfrag)
285 oldfragments.append(oldfrag)
286 newfragments.append(newfrag)
286 newfragments.append(newfrag)
287
287
288 old['line'] = "".join(oldfragments)
288 old['line'] = "".join(oldfragments)
289 new['line'] = "".join(newfragments)
289 new['line'] = "".join(newfragments)
290
290
291 def _highlight_line_udiff(self, line, next_):
291 def _highlight_line_udiff(self, line, next_):
292 """
292 """
293 Highlight inline changes in both lines.
293 Highlight inline changes in both lines.
294 """
294 """
295 start = 0
295 start = 0
296 limit = min(len(line['line']), len(next_['line']))
296 limit = min(len(line['line']), len(next_['line']))
297 while start < limit and line['line'][start] == next_['line'][start]:
297 while start < limit and line['line'][start] == next_['line'][start]:
298 start += 1
298 start += 1
299 end = -1
299 end = -1
300 limit -= start
300 limit -= start
301 while -end <= limit and line['line'][end] == next_['line'][end]:
301 while -end <= limit and line['line'][end] == next_['line'][end]:
302 end -= 1
302 end -= 1
303 end += 1
303 end += 1
304 if start or end:
304 if start or end:
305 def do(l):
305 def do(l):
306 last = end + len(l['line'])
306 last = end + len(l['line'])
307 if l['action'] == 'add':
307 if l['action'] == 'add':
308 tag = 'ins'
308 tag = 'ins'
309 else:
309 else:
310 tag = 'del'
310 tag = 'del'
311 l['line'] = '%s<%s>%s</%s>%s' % (
311 l['line'] = '%s<%s>%s</%s>%s' % (
312 l['line'][:start],
312 l['line'][:start],
313 tag,
313 tag,
314 l['line'][start:last],
314 l['line'][start:last],
315 tag,
315 tag,
316 l['line'][last:]
316 l['line'][last:]
317 )
317 )
318 do(line)
318 do(line)
319 do(next_)
319 do(next_)
320
320
321 def _get_header(self, diff_chunk):
321 def _get_header(self, diff_chunk):
322 """
322 """
323 parses the diff header, and returns parts, and leftover diff
323 parses the diff header, and returns parts, and leftover diff
324 parts consists of 14 elements::
324 parts consists of 14 elements::
325
325
326 a_path, b_path, similarity_index, rename_from, rename_to,
326 a_path, b_path, similarity_index, rename_from, rename_to,
327 old_mode, new_mode, new_file_mode, deleted_file_mode,
327 old_mode, new_mode, new_file_mode, deleted_file_mode,
328 a_blob_id, b_blob_id, b_mode, a_file, b_file
328 a_blob_id, b_blob_id, b_mode, a_file, b_file
329
329
330 :param diff_chunk:
330 :param diff_chunk:
331 :type diff_chunk:
331 :type diff_chunk:
332 """
332 """
333
333
334 if self.vcs == 'git':
334 if self.vcs == 'git':
335 match = self._git_header_re.match(diff_chunk)
335 match = self._git_header_re.match(diff_chunk)
336 diff = diff_chunk[match.end():]
336 diff = diff_chunk[match.end():]
337 return match.groupdict(), imap(self._escaper, diff.splitlines(1))
337 return match.groupdict(), imap(self._escaper, diff.splitlines(1))
338 elif self.vcs == 'hg':
338 elif self.vcs == 'hg':
339 match = self._hg_header_re.match(diff_chunk)
339 match = self._hg_header_re.match(diff_chunk)
340 diff = diff_chunk[match.end():]
340 diff = diff_chunk[match.end():]
341 return match.groupdict(), imap(self._escaper, diff.splitlines(1))
341 return match.groupdict(), imap(self._escaper, diff.splitlines(1))
342 else:
342 else:
343 raise Exception('VCS type %s is not supported' % self.vcs)
343 raise Exception('VCS type %s is not supported' % self.vcs)
344
344
345 def _clean_line(self, line, command):
345 def _clean_line(self, line, command):
346 if command in ['+', '-', ' ']:
346 if command in ['+', '-', ' ']:
347 #only modify the line if it's actually a diff thing
347 #only modify the line if it's actually a diff thing
348 line = line[1:]
348 line = line[1:]
349 return line
349 return line
350
350
351 def _parse_gitdiff(self, inline_diff=True):
351 def _parse_gitdiff(self, inline_diff=True):
352 _files = []
352 _files = []
353 diff_container = lambda arg: arg
353 diff_container = lambda arg: arg
354
354
355 ##split the diff in chunks of separate --git a/file b/file chunks
355 ##split the diff in chunks of separate --git a/file b/file chunks
356 for raw_diff in ('\n' + self._diff).split('\ndiff --git')[1:]:
356 for raw_diff in ('\n' + self._diff).split('\ndiff --git')[1:]:
357 binary = False
358 binary_msg = 'unknown binary'
359 head, diff = self._get_header(raw_diff)
357 head, diff = self._get_header(raw_diff)
360
358
359 op = None
360 stats = None
361 msg = None
362
361 if not head['a_file'] and head['b_file']:
363 if not head['a_file'] and head['b_file']:
362 op = 'A'
364 op = 'A'
363 elif head['a_file'] and head['b_file']:
365 elif head['a_file'] and head['b_file']:
364 op = 'M'
366 op = 'M'
365 elif head['a_file'] and not head['b_file']:
367 elif head['a_file'] and not head['b_file']:
366 op = 'D'
368 op = 'D'
367 else:
369 else:
368 #probably we're dealing with a binary file 1
369 binary = True
370 if head['deleted_file_mode']:
370 if head['deleted_file_mode']:
371 op = 'D'
371 op = 'D'
372 stats = ['b', DEL_FILENODE]
372 stats = ['b', DEL_FILENODE]
373 binary_msg = 'deleted binary file'
373 msg = 'deleted file'
374 elif head['new_file_mode']:
374 elif head['new_file_mode']:
375 op = 'A'
375 op = 'A'
376 stats = ['b', NEW_FILENODE]
376 stats = ['b', NEW_FILENODE]
377 binary_msg = 'new binary file %s' % head['new_file_mode']
377 msg = 'new file %s' % head['new_file_mode']
378 else:
378 else:
379 if head['new_mode'] and head['old_mode']:
379 if head['new_mode'] and head['old_mode']:
380 stats = ['b', CHMOD_FILENODE]
380 stats = ['b', CHMOD_FILENODE]
381 op = 'M'
381 op = 'M'
382 binary_msg = ('modified binary file chmod %s => %s'
382 msg = ('modified file chmod %s => %s'
383 % (head['old_mode'], head['new_mode']))
383 % (head['old_mode'], head['new_mode']))
384 elif (head['rename_from'] and head['rename_to']
384 elif (head['rename_from'] and head['rename_to']
385 and head['rename_from'] != head['rename_to']):
385 and head['rename_from'] != head['rename_to']):
386 stats = ['b', RENAMED_FILENODE]
386 stats = ['b', RENAMED_FILENODE]
387 op = 'M'
387 op = 'M'
388 binary_msg = ('file renamed from %s to %s'
388 msg = ('file renamed from %s to %s'
389 % (head['rename_from'], head['rename_to']))
389 % (head['rename_from'], head['rename_to']))
390 else:
390 else:
391 stats = ['b', MOD_FILENODE]
391 stats = ['b', MOD_FILENODE]
392 op = 'M'
392 op = 'M'
393 binary_msg = 'modified binary file'
393 msg = 'modified file'
394
394
395 if not binary:
395 if head['a_file'] or head['b_file']: # a real diff
396 try:
396 try:
397 chunks, stats = self._parse_lines(diff)
397 chunks, stats = self._parse_lines(diff)
398 except DiffLimitExceeded:
398 except DiffLimitExceeded:
399 diff_container = lambda _diff: LimitedDiffContainer(
399 diff_container = lambda _diff: LimitedDiffContainer(
400 self.diff_limit,
400 self.diff_limit,
401 self.cur_diff_size,
401 self.cur_diff_size,
402 _diff)
402 _diff)
403 break
403 break
404 else:
404 else: # GIT binary patch (or empty diff)
405 chunks = []
405 chunks = []
406 chunks.append([{
406 if not msg: # don't overwrite more important message
407 msg = 'binary diff not shown'
408
409 if msg:
410 chunks.insert(0, [{
407 'old_lineno': '',
411 'old_lineno': '',
408 'new_lineno': '',
412 'new_lineno': '',
409 'action': 'binary',
413 'action': 'binary',
410 'line': binary_msg,
414 'line': msg,
411 }])
415 }])
412
416
413 _files.append({
417 _files.append({
414 'filename': head['b_path'],
418 'filename': head['b_path'],
415 'old_revision': head['a_blob_id'],
419 'old_revision': head['a_blob_id'],
416 'new_revision': head['b_blob_id'],
420 'new_revision': head['b_blob_id'],
417 'chunks': chunks,
421 'chunks': chunks,
418 'operation': op,
422 'operation': op,
419 'stats': stats,
423 'stats': stats,
420 })
424 })
421
425
422 sorter = lambda info: {'A': 0, 'M': 1, 'D': 2}.get(info['operation'])
426 sorter = lambda info: {'A': 0, 'M': 1, 'D': 2}.get(info['operation'])
423
427
424 if not inline_diff:
428 if not inline_diff:
425 return diff_container(sorted(_files, key=sorter))
429 return diff_container(sorted(_files, key=sorter))
426
430
427 # highlight inline changes
431 # highlight inline changes
428 for diff_data in _files:
432 for diff_data in _files:
429 for chunk in diff_data['chunks']:
433 for chunk in diff_data['chunks']:
430 lineiter = iter(chunk)
434 lineiter = iter(chunk)
431 try:
435 try:
432 while 1:
436 while 1:
433 line = lineiter.next()
437 line = lineiter.next()
434 if line['action'] not in ['unmod', 'context']:
438 if line['action'] not in ['unmod', 'context']:
435 nextline = lineiter.next()
439 nextline = lineiter.next()
436 if nextline['action'] in ['unmod', 'context'] or \
440 if nextline['action'] in ['unmod', 'context'] or \
437 nextline['action'] == line['action']:
441 nextline['action'] == line['action']:
438 continue
442 continue
439 self.differ(line, nextline)
443 self.differ(line, nextline)
440 except StopIteration:
444 except StopIteration:
441 pass
445 pass
442
446
443 return diff_container(sorted(_files, key=sorter))
447 return diff_container(sorted(_files, key=sorter))
444
448
445 def _parse_udiff(self, inline_diff=True):
449 def _parse_udiff(self, inline_diff=True):
446 raise NotImplementedError()
450 raise NotImplementedError()
447
451
448 def _parse_lines(self, diff):
452 def _parse_lines(self, diff):
449 """
453 """
450 Parse the diff an return data for the template.
454 Parse the diff an return data for the template.
451 """
455 """
452
456
453 lineiter = iter(diff)
457 lineiter = iter(diff)
454 stats = [0, 0]
458 stats = [0, 0]
455
459
456 try:
460 try:
457 chunks = []
461 chunks = []
458 line = lineiter.next()
462 line = lineiter.next()
459
463
460 while line:
464 while line:
461 lines = []
465 lines = []
462 chunks.append(lines)
466 chunks.append(lines)
463
467
464 match = self._chunk_re.match(line)
468 match = self._chunk_re.match(line)
465
469
466 if not match:
470 if not match:
467 break
471 break
468
472
469 gr = match.groups()
473 gr = match.groups()
470 (old_line, old_end,
474 (old_line, old_end,
471 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
475 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
472 old_line -= 1
476 old_line -= 1
473 new_line -= 1
477 new_line -= 1
474
478
475 context = len(gr) == 5
479 context = len(gr) == 5
476 old_end += old_line
480 old_end += old_line
477 new_end += new_line
481 new_end += new_line
478
482
479 if context:
483 if context:
480 # skip context only if it's first line
484 # skip context only if it's first line
481 if int(gr[0]) > 1:
485 if int(gr[0]) > 1:
482 lines.append({
486 lines.append({
483 'old_lineno': '...',
487 'old_lineno': '...',
484 'new_lineno': '...',
488 'new_lineno': '...',
485 'action': 'context',
489 'action': 'context',
486 'line': line,
490 'line': line,
487 })
491 })
488
492
489 line = lineiter.next()
493 line = lineiter.next()
490
494
491 while old_line < old_end or new_line < new_end:
495 while old_line < old_end or new_line < new_end:
492 command = ' '
496 command = ' '
493 if line:
497 if line:
494 command = line[0]
498 command = line[0]
495
499
496 affects_old = affects_new = False
500 affects_old = affects_new = False
497
501
498 # ignore those if we don't expect them
502 # ignore those if we don't expect them
499 if command in '#@':
503 if command in '#@':
500 continue
504 continue
501 elif command == '+':
505 elif command == '+':
502 affects_new = True
506 affects_new = True
503 action = 'add'
507 action = 'add'
504 stats[0] += 1
508 stats[0] += 1
505 elif command == '-':
509 elif command == '-':
506 affects_old = True
510 affects_old = True
507 action = 'del'
511 action = 'del'
508 stats[1] += 1
512 stats[1] += 1
509 else:
513 else:
510 affects_old = affects_new = True
514 affects_old = affects_new = True
511 action = 'unmod'
515 action = 'unmod'
512
516
513 if not self._newline_marker.match(line):
517 if not self._newline_marker.match(line):
514 old_line += affects_old
518 old_line += affects_old
515 new_line += affects_new
519 new_line += affects_new
516 lines.append({
520 lines.append({
517 'old_lineno': affects_old and old_line or '',
521 'old_lineno': affects_old and old_line or '',
518 'new_lineno': affects_new and new_line or '',
522 'new_lineno': affects_new and new_line or '',
519 'action': action,
523 'action': action,
520 'line': self._clean_line(line, command)
524 'line': self._clean_line(line, command)
521 })
525 })
522
526
523 line = lineiter.next()
527 line = lineiter.next()
524
528
525 if self._newline_marker.match(line):
529 if self._newline_marker.match(line):
526 # we need to append to lines, since this is not
530 # we need to append to lines, since this is not
527 # counted in the line specs of diff
531 # counted in the line specs of diff
528 lines.append({
532 lines.append({
529 'old_lineno': '...',
533 'old_lineno': '...',
530 'new_lineno': '...',
534 'new_lineno': '...',
531 'action': 'context',
535 'action': 'context',
532 'line': self._clean_line(line, command)
536 'line': self._clean_line(line, command)
533 })
537 })
534
538
535 except StopIteration:
539 except StopIteration:
536 pass
540 pass
537 return chunks, stats
541 return chunks, stats
538
542
539 def _safe_id(self, idstring):
543 def _safe_id(self, idstring):
540 """Make a string safe for including in an id attribute.
544 """Make a string safe for including in an id attribute.
541
545
542 The HTML spec says that id attributes 'must begin with
546 The HTML spec says that id attributes 'must begin with
543 a letter ([A-Za-z]) and may be followed by any number
547 a letter ([A-Za-z]) and may be followed by any number
544 of letters, digits ([0-9]), hyphens ("-"), underscores
548 of letters, digits ([0-9]), hyphens ("-"), underscores
545 ("_"), colons (":"), and periods (".")'. These regexps
549 ("_"), colons (":"), and periods (".")'. These regexps
546 are slightly over-zealous, in that they remove colons
550 are slightly over-zealous, in that they remove colons
547 and periods unnecessarily.
551 and periods unnecessarily.
548
552
549 Whitespace is transformed into underscores, and then
553 Whitespace is transformed into underscores, and then
550 anything which is not a hyphen or a character that
554 anything which is not a hyphen or a character that
551 matches \w (alphanumerics and underscore) is removed.
555 matches \w (alphanumerics and underscore) is removed.
552
556
553 """
557 """
554 # Transform all whitespace to underscore
558 # Transform all whitespace to underscore
555 idstring = re.sub(r'\s', "_", '%s' % idstring)
559 idstring = re.sub(r'\s', "_", '%s' % idstring)
556 # Remove everything that is not a hyphen or a member of \w
560 # Remove everything that is not a hyphen or a member of \w
557 idstring = re.sub(r'(?!-)\W', "", idstring).lower()
561 idstring = re.sub(r'(?!-)\W', "", idstring).lower()
558 return idstring
562 return idstring
559
563
560 def prepare(self, inline_diff=True):
564 def prepare(self, inline_diff=True):
561 """
565 """
562 Prepare the passed udiff for HTML rendering. It'l return a list
566 Prepare the passed udiff for HTML rendering. It'l return a list
563 of dicts with diff information
567 of dicts with diff information
564 """
568 """
565 parsed = self._parser(inline_diff=inline_diff)
569 parsed = self._parser(inline_diff=inline_diff)
566 self.parsed = True
570 self.parsed = True
567 self.parsed_diff = parsed
571 self.parsed_diff = parsed
568 return parsed
572 return parsed
569
573
570 def as_raw(self, diff_lines=None):
574 def as_raw(self, diff_lines=None):
571 """
575 """
572 Returns raw string diff
576 Returns raw string diff
573 """
577 """
574 return self._diff
578 return self._diff
575 #return u''.join(imap(self._line_counter, self._diff.splitlines(1)))
579 #return u''.join(imap(self._line_counter, self._diff.splitlines(1)))
576
580
577 def as_html(self, table_class='code-difftable', line_class='line',
581 def as_html(self, table_class='code-difftable', line_class='line',
578 old_lineno_class='lineno old', new_lineno_class='lineno new',
582 old_lineno_class='lineno old', new_lineno_class='lineno new',
579 code_class='code', enable_comments=False, parsed_lines=None):
583 code_class='code', enable_comments=False, parsed_lines=None):
580 """
584 """
581 Return given diff as html table with customized css classes
585 Return given diff as html table with customized css classes
582 """
586 """
583 def _link_to_if(condition, label, url):
587 def _link_to_if(condition, label, url):
584 """
588 """
585 Generates a link if condition is meet or just the label if not.
589 Generates a link if condition is meet or just the label if not.
586 """
590 """
587
591
588 if condition:
592 if condition:
589 return '''<a href="%(url)s">%(label)s</a>''' % {
593 return '''<a href="%(url)s">%(label)s</a>''' % {
590 'url': url,
594 'url': url,
591 'label': label
595 'label': label
592 }
596 }
593 else:
597 else:
594 return label
598 return label
595 if not self.parsed:
599 if not self.parsed:
596 self.prepare()
600 self.prepare()
597
601
598 diff_lines = self.parsed_diff
602 diff_lines = self.parsed_diff
599 if parsed_lines:
603 if parsed_lines:
600 diff_lines = parsed_lines
604 diff_lines = parsed_lines
601
605
602 _html_empty = True
606 _html_empty = True
603 _html = []
607 _html = []
604 _html.append('''<table class="%(table_class)s">\n''' % {
608 _html.append('''<table class="%(table_class)s">\n''' % {
605 'table_class': table_class
609 'table_class': table_class
606 })
610 })
607
611
608 for diff in diff_lines:
612 for diff in diff_lines:
609 for line in diff['chunks']:
613 for line in diff['chunks']:
610 _html_empty = False
614 _html_empty = False
611 for change in line:
615 for change in line:
612 _html.append('''<tr class="%(lc)s %(action)s">\n''' % {
616 _html.append('''<tr class="%(lc)s %(action)s">\n''' % {
613 'lc': line_class,
617 'lc': line_class,
614 'action': change['action']
618 'action': change['action']
615 })
619 })
616 anchor_old_id = ''
620 anchor_old_id = ''
617 anchor_new_id = ''
621 anchor_new_id = ''
618 anchor_old = "%(filename)s_o%(oldline_no)s" % {
622 anchor_old = "%(filename)s_o%(oldline_no)s" % {
619 'filename': self._safe_id(diff['filename']),
623 'filename': self._safe_id(diff['filename']),
620 'oldline_no': change['old_lineno']
624 'oldline_no': change['old_lineno']
621 }
625 }
622 anchor_new = "%(filename)s_n%(oldline_no)s" % {
626 anchor_new = "%(filename)s_n%(oldline_no)s" % {
623 'filename': self._safe_id(diff['filename']),
627 'filename': self._safe_id(diff['filename']),
624 'oldline_no': change['new_lineno']
628 'oldline_no': change['new_lineno']
625 }
629 }
626 cond_old = (change['old_lineno'] != '...' and
630 cond_old = (change['old_lineno'] != '...' and
627 change['old_lineno'])
631 change['old_lineno'])
628 cond_new = (change['new_lineno'] != '...' and
632 cond_new = (change['new_lineno'] != '...' and
629 change['new_lineno'])
633 change['new_lineno'])
630 if cond_old:
634 if cond_old:
631 anchor_old_id = 'id="%s"' % anchor_old
635 anchor_old_id = 'id="%s"' % anchor_old
632 if cond_new:
636 if cond_new:
633 anchor_new_id = 'id="%s"' % anchor_new
637 anchor_new_id = 'id="%s"' % anchor_new
634 ###########################################################
638 ###########################################################
635 # OLD LINE NUMBER
639 # OLD LINE NUMBER
636 ###########################################################
640 ###########################################################
637 _html.append('''\t<td %(a_id)s class="%(olc)s">''' % {
641 _html.append('''\t<td %(a_id)s class="%(olc)s">''' % {
638 'a_id': anchor_old_id,
642 'a_id': anchor_old_id,
639 'olc': old_lineno_class
643 'olc': old_lineno_class
640 })
644 })
641
645
642 _html.append('''%(link)s''' % {
646 _html.append('''%(link)s''' % {
643 'link': _link_to_if(True, change['old_lineno'],
647 'link': _link_to_if(True, change['old_lineno'],
644 '#%s' % anchor_old)
648 '#%s' % anchor_old)
645 })
649 })
646 _html.append('''</td>\n''')
650 _html.append('''</td>\n''')
647 ###########################################################
651 ###########################################################
648 # NEW LINE NUMBER
652 # NEW LINE NUMBER
649 ###########################################################
653 ###########################################################
650
654
651 _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
655 _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
652 'a_id': anchor_new_id,
656 'a_id': anchor_new_id,
653 'nlc': new_lineno_class
657 'nlc': new_lineno_class
654 })
658 })
655
659
656 _html.append('''%(link)s''' % {
660 _html.append('''%(link)s''' % {
657 'link': _link_to_if(True, change['new_lineno'],
661 'link': _link_to_if(True, change['new_lineno'],
658 '#%s' % anchor_new)
662 '#%s' % anchor_new)
659 })
663 })
660 _html.append('''</td>\n''')
664 _html.append('''</td>\n''')
661 ###########################################################
665 ###########################################################
662 # CODE
666 # CODE
663 ###########################################################
667 ###########################################################
664 comments = '' if enable_comments else 'no-comment'
668 comments = '' if enable_comments else 'no-comment'
665 _html.append('''\t<td class="%(cc)s %(inc)s">''' % {
669 _html.append('''\t<td class="%(cc)s %(inc)s">''' % {
666 'cc': code_class,
670 'cc': code_class,
667 'inc': comments
671 'inc': comments
668 })
672 })
669 _html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {
673 _html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {
670 'code': change['line']
674 'code': change['line']
671 })
675 })
672
676
673 _html.append('''\t</td>''')
677 _html.append('''\t</td>''')
674 _html.append('''\n</tr>\n''')
678 _html.append('''\n</tr>\n''')
675 _html.append('''</table>''')
679 _html.append('''</table>''')
676 if _html_empty:
680 if _html_empty:
677 return None
681 return None
678 return ''.join(_html)
682 return ''.join(_html)
679
683
680 def stat(self):
684 def stat(self):
681 """
685 """
682 Returns tuple of added, and removed lines for this instance
686 Returns tuple of added, and removed lines for this instance
683 """
687 """
684 return self.adds, self.removes
688 return self.adds, self.removes
General Comments 0
You need to be logged in to leave comments. Login now