##// END OF EJS Templates
diff parser: prefer git headers over old unified diff headers...
Mads Kiilerich -
r3820:8df1bc51 beta
parent child Browse files
Show More
@@ -1,691 +1,688 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2 """
2 """
3 rhodecode.lib.diffs
3 rhodecode.lib.diffs
4 ~~~~~~~~~~~~~~~~~~~
4 ~~~~~~~~~~~~~~~~~~~
5
5
6 Set of diffing helpers, previously part of vcs
6 Set of diffing helpers, previously part of vcs
7
7
8
8
9 :created_on: Dec 4, 2011
9 :created_on: Dec 4, 2011
10 :author: marcink
10 :author: marcink
11 :copyright: (C) 2010-2012 Marcin Kuzminski <marcin@python-works.com>
11 :copyright: (C) 2010-2012 Marcin Kuzminski <marcin@python-works.com>
12 :original copyright: 2007-2008 by Armin Ronacher
12 :original copyright: 2007-2008 by Armin Ronacher
13 :license: GPLv3, see COPYING for more details.
13 :license: GPLv3, see COPYING for more details.
14 """
14 """
15 # This program is free software: you can redistribute it and/or modify
15 # This program is free software: you can redistribute it and/or modify
16 # it under the terms of the GNU General Public License as published by
16 # it under the terms of the GNU General Public License as published by
17 # the Free Software Foundation, either version 3 of the License, or
17 # the Free Software Foundation, either version 3 of the License, or
18 # (at your option) any later version.
18 # (at your option) any later version.
19 #
19 #
20 # This program is distributed in the hope that it will be useful,
20 # This program is distributed in the hope that it will be useful,
21 # but WITHOUT ANY WARRANTY; without even the implied warranty of
21 # but WITHOUT ANY WARRANTY; without even the implied warranty of
22 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 # GNU General Public License for more details.
23 # GNU General Public License for more details.
24 #
24 #
25 # You should have received a copy of the GNU General Public License
25 # You should have received a copy of the GNU General Public License
26 # along with this program. If not, see <http://www.gnu.org/licenses/>.
26 # along with this program. If not, see <http://www.gnu.org/licenses/>.
27
27
28 import re
28 import re
29 import difflib
29 import difflib
30 import logging
30 import logging
31
31
32 from itertools import tee, imap
32 from itertools import tee, imap
33
33
34 from pylons.i18n.translation import _
34 from pylons.i18n.translation import _
35
35
36 from rhodecode.lib.vcs.exceptions import VCSError
36 from rhodecode.lib.vcs.exceptions import VCSError
37 from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode
37 from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode
38 from rhodecode.lib.vcs.backends.base import EmptyChangeset
38 from rhodecode.lib.vcs.backends.base import EmptyChangeset
39 from rhodecode.lib.helpers import escape
39 from rhodecode.lib.helpers import escape
40 from rhodecode.lib.utils2 import safe_unicode, safe_str
40 from rhodecode.lib.utils2 import safe_unicode, safe_str
41
41
42 log = logging.getLogger(__name__)
42 log = logging.getLogger(__name__)
43
43
44
44
45 def wrap_to_table(str_):
45 def wrap_to_table(str_):
46 return '''<table class="code-difftable">
46 return '''<table class="code-difftable">
47 <tr class="line no-comment">
47 <tr class="line no-comment">
48 <td class="lineno new"></td>
48 <td class="lineno new"></td>
49 <td class="code no-comment"><pre>%s</pre></td>
49 <td class="code no-comment"><pre>%s</pre></td>
50 </tr>
50 </tr>
51 </table>''' % str_
51 </table>''' % str_
52
52
53
53
54 def wrapped_diff(filenode_old, filenode_new, cut_off_limit=None,
54 def wrapped_diff(filenode_old, filenode_new, cut_off_limit=None,
55 ignore_whitespace=True, line_context=3,
55 ignore_whitespace=True, line_context=3,
56 enable_comments=False):
56 enable_comments=False):
57 """
57 """
58 returns a wrapped diff into a table, checks for cut_off_limit and presents
58 returns a wrapped diff into a table, checks for cut_off_limit and presents
59 proper message
59 proper message
60 """
60 """
61
61
62 if filenode_old is None:
62 if filenode_old is None:
63 filenode_old = FileNode(filenode_new.path, '', EmptyChangeset())
63 filenode_old = FileNode(filenode_new.path, '', EmptyChangeset())
64
64
65 if filenode_old.is_binary or filenode_new.is_binary:
65 if filenode_old.is_binary or filenode_new.is_binary:
66 diff = wrap_to_table(_('Binary file'))
66 diff = wrap_to_table(_('Binary file'))
67 stats = (0, 0)
67 stats = (0, 0)
68 size = 0
68 size = 0
69
69
70 elif cut_off_limit != -1 and (cut_off_limit is None or
70 elif cut_off_limit != -1 and (cut_off_limit is None or
71 (filenode_old.size < cut_off_limit and filenode_new.size < cut_off_limit)):
71 (filenode_old.size < cut_off_limit and filenode_new.size < cut_off_limit)):
72
72
73 f_gitdiff = get_gitdiff(filenode_old, filenode_new,
73 f_gitdiff = get_gitdiff(filenode_old, filenode_new,
74 ignore_whitespace=ignore_whitespace,
74 ignore_whitespace=ignore_whitespace,
75 context=line_context)
75 context=line_context)
76 diff_processor = DiffProcessor(f_gitdiff, format='gitdiff')
76 diff_processor = DiffProcessor(f_gitdiff, format='gitdiff')
77
77
78 diff = diff_processor.as_html(enable_comments=enable_comments)
78 diff = diff_processor.as_html(enable_comments=enable_comments)
79 stats = diff_processor.stat()
79 stats = diff_processor.stat()
80 size = len(diff or '')
80 size = len(diff or '')
81 else:
81 else:
82 diff = wrap_to_table(_('Changeset was too big and was cut off, use '
82 diff = wrap_to_table(_('Changeset was too big and was cut off, use '
83 'diff menu to display this diff'))
83 'diff menu to display this diff'))
84 stats = (0, 0)
84 stats = (0, 0)
85 size = 0
85 size = 0
86 if not diff:
86 if not diff:
87 submodules = filter(lambda o: isinstance(o, SubModuleNode),
87 submodules = filter(lambda o: isinstance(o, SubModuleNode),
88 [filenode_new, filenode_old])
88 [filenode_new, filenode_old])
89 if submodules:
89 if submodules:
90 diff = wrap_to_table(escape('Submodule %r' % submodules[0]))
90 diff = wrap_to_table(escape('Submodule %r' % submodules[0]))
91 else:
91 else:
92 diff = wrap_to_table(_('No changes detected'))
92 diff = wrap_to_table(_('No changes detected'))
93
93
94 cs1 = filenode_old.changeset.raw_id
94 cs1 = filenode_old.changeset.raw_id
95 cs2 = filenode_new.changeset.raw_id
95 cs2 = filenode_new.changeset.raw_id
96
96
97 return size, cs1, cs2, diff, stats
97 return size, cs1, cs2, diff, stats
98
98
99
99
100 def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
100 def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
101 """
101 """
102 Returns git style diff between given ``filenode_old`` and ``filenode_new``.
102 Returns git style diff between given ``filenode_old`` and ``filenode_new``.
103
103
104 :param ignore_whitespace: ignore whitespaces in diff
104 :param ignore_whitespace: ignore whitespaces in diff
105 """
105 """
106 # make sure we pass in default context
106 # make sure we pass in default context
107 context = context or 3
107 context = context or 3
108 submodules = filter(lambda o: isinstance(o, SubModuleNode),
108 submodules = filter(lambda o: isinstance(o, SubModuleNode),
109 [filenode_new, filenode_old])
109 [filenode_new, filenode_old])
110 if submodules:
110 if submodules:
111 return ''
111 return ''
112
112
113 for filenode in (filenode_old, filenode_new):
113 for filenode in (filenode_old, filenode_new):
114 if not isinstance(filenode, FileNode):
114 if not isinstance(filenode, FileNode):
115 raise VCSError("Given object should be FileNode object, not %s"
115 raise VCSError("Given object should be FileNode object, not %s"
116 % filenode.__class__)
116 % filenode.__class__)
117
117
118 repo = filenode_new.changeset.repository
118 repo = filenode_new.changeset.repository
119 old_raw_id = getattr(filenode_old.changeset, 'raw_id', repo.EMPTY_CHANGESET)
119 old_raw_id = getattr(filenode_old.changeset, 'raw_id', repo.EMPTY_CHANGESET)
120 new_raw_id = getattr(filenode_new.changeset, 'raw_id', repo.EMPTY_CHANGESET)
120 new_raw_id = getattr(filenode_new.changeset, 'raw_id', repo.EMPTY_CHANGESET)
121
121
122 vcs_gitdiff = repo.get_diff(old_raw_id, new_raw_id, filenode_new.path,
122 vcs_gitdiff = repo.get_diff(old_raw_id, new_raw_id, filenode_new.path,
123 ignore_whitespace, context)
123 ignore_whitespace, context)
124 return vcs_gitdiff
124 return vcs_gitdiff
125
125
126 NEW_FILENODE = 1
126 NEW_FILENODE = 1
127 DEL_FILENODE = 2
127 DEL_FILENODE = 2
128 MOD_FILENODE = 3
128 MOD_FILENODE = 3
129 RENAMED_FILENODE = 4
129 RENAMED_FILENODE = 4
130 CHMOD_FILENODE = 5
130 CHMOD_FILENODE = 5
131
131
132
132
133 class DiffLimitExceeded(Exception):
133 class DiffLimitExceeded(Exception):
134 pass
134 pass
135
135
136
136
137 class LimitedDiffContainer(object):
137 class LimitedDiffContainer(object):
138
138
139 def __init__(self, diff_limit, cur_diff_size, diff):
139 def __init__(self, diff_limit, cur_diff_size, diff):
140 self.diff = diff
140 self.diff = diff
141 self.diff_limit = diff_limit
141 self.diff_limit = diff_limit
142 self.cur_diff_size = cur_diff_size
142 self.cur_diff_size = cur_diff_size
143
143
144 def __iter__(self):
144 def __iter__(self):
145 for l in self.diff:
145 for l in self.diff:
146 yield l
146 yield l
147
147
148
148
149 class DiffProcessor(object):
149 class DiffProcessor(object):
150 """
150 """
151 Give it a unified or git diff and it returns a list of the files that were
151 Give it a unified or git diff and it returns a list of the files that were
152 mentioned in the diff together with a dict of meta information that
152 mentioned in the diff together with a dict of meta information that
153 can be used to render it in a HTML template.
153 can be used to render it in a HTML template.
154 """
154 """
155 _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
155 _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
156 _newline_marker = re.compile(r'^\\ No newline at end of file')
156 _newline_marker = re.compile(r'^\\ No newline at end of file')
157 _git_header_re = re.compile(r"""
157 _git_header_re = re.compile(r"""
158 #^diff[ ]--git
158 #^diff[ ]--git
159 [ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n
159 [ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n
160 (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%\n
160 (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%\n
161 ^rename[ ]from[ ](?P<rename_from>\S+)\n
161 ^rename[ ]from[ ](?P<rename_from>\S+)\n
162 ^rename[ ]to[ ](?P<rename_to>\S+)(?:\n|$))?
162 ^rename[ ]to[ ](?P<rename_to>\S+)(?:\n|$))?
163 (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
163 (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
164 ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
164 ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
165 (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
165 (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
166 (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
166 (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
167 (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
167 (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
168 \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
168 \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
169 (?:^---[ ](a/(?P<a_file>.+)|/dev/null)(?:\n|$))?
169 (?:^---[ ](a/(?P<a_file>.+)|/dev/null)(?:\n|$))?
170 (?:^\+\+\+[ ](b/(?P<b_file>.+)|/dev/null)(?:\n|$))?
170 (?:^\+\+\+[ ](b/(?P<b_file>.+)|/dev/null)(?:\n|$))?
171 """, re.VERBOSE | re.MULTILINE)
171 """, re.VERBOSE | re.MULTILINE)
172 _hg_header_re = re.compile(r"""
172 _hg_header_re = re.compile(r"""
173 #^diff[ ]--git
173 #^diff[ ]--git
174 [ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n
174 [ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n
175 (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
175 (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
176 ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
176 ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
177 (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%(?:\n|$))?
177 (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%(?:\n|$))?
178 (?:^rename[ ]from[ ](?P<rename_from>\S+)\n
178 (?:^rename[ ]from[ ](?P<rename_from>\S+)\n
179 ^rename[ ]to[ ](?P<rename_to>\S+)(?:\n|$))?
179 ^rename[ ]to[ ](?P<rename_to>\S+)(?:\n|$))?
180 (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
180 (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
181 (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
181 (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
182 (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
182 (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
183 \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
183 \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
184 (?:^---[ ](a/(?P<a_file>.+)|/dev/null)(?:\n|$))?
184 (?:^---[ ](a/(?P<a_file>.+)|/dev/null)(?:\n|$))?
185 (?:^\+\+\+[ ](b/(?P<b_file>.+)|/dev/null)(?:\n|$))?
185 (?:^\+\+\+[ ](b/(?P<b_file>.+)|/dev/null)(?:\n|$))?
186 """, re.VERBOSE | re.MULTILINE)
186 """, re.VERBOSE | re.MULTILINE)
187
187
188 #used for inline highlighter word split
188 #used for inline highlighter word split
189 _token_re = re.compile(r'()(&gt;|&lt;|&amp;|\W+?)')
189 _token_re = re.compile(r'()(&gt;|&lt;|&amp;|\W+?)')
190
190
191 def __init__(self, diff, vcs='hg', format='gitdiff', diff_limit=None):
191 def __init__(self, diff, vcs='hg', format='gitdiff', diff_limit=None):
192 """
192 """
193 :param diff: a text in diff format
193 :param diff: a text in diff format
194 :param vcs: type of version controll hg or git
194 :param vcs: type of version controll hg or git
195 :param format: format of diff passed, `udiff` or `gitdiff`
195 :param format: format of diff passed, `udiff` or `gitdiff`
196 :param diff_limit: define the size of diff that is considered "big"
196 :param diff_limit: define the size of diff that is considered "big"
197 based on that parameter cut off will be triggered, set to None
197 based on that parameter cut off will be triggered, set to None
198 to show full diff
198 to show full diff
199 """
199 """
200 if not isinstance(diff, basestring):
200 if not isinstance(diff, basestring):
201 raise Exception('Diff must be a basestring got %s instead' % type(diff))
201 raise Exception('Diff must be a basestring got %s instead' % type(diff))
202
202
203 self._diff = diff
203 self._diff = diff
204 self._format = format
204 self._format = format
205 self.adds = 0
205 self.adds = 0
206 self.removes = 0
206 self.removes = 0
207 # calculate diff size
207 # calculate diff size
208 self.diff_size = len(diff)
208 self.diff_size = len(diff)
209 self.diff_limit = diff_limit
209 self.diff_limit = diff_limit
210 self.cur_diff_size = 0
210 self.cur_diff_size = 0
211 self.parsed = False
211 self.parsed = False
212 self.parsed_diff = []
212 self.parsed_diff = []
213 self.vcs = vcs
213 self.vcs = vcs
214
214
215 if format == 'gitdiff':
215 if format == 'gitdiff':
216 self.differ = self._highlight_line_difflib
216 self.differ = self._highlight_line_difflib
217 self._parser = self._parse_gitdiff
217 self._parser = self._parse_gitdiff
218 else:
218 else:
219 self.differ = self._highlight_line_udiff
219 self.differ = self._highlight_line_udiff
220 self._parser = self._parse_udiff
220 self._parser = self._parse_udiff
221
221
222 def _copy_iterator(self):
222 def _copy_iterator(self):
223 """
223 """
224 make a fresh copy of generator, we should not iterate thru
224 make a fresh copy of generator, we should not iterate thru
225 an original as it's needed for repeating operations on
225 an original as it's needed for repeating operations on
226 this instance of DiffProcessor
226 this instance of DiffProcessor
227 """
227 """
228 self.__udiff, iterator_copy = tee(self.__udiff)
228 self.__udiff, iterator_copy = tee(self.__udiff)
229 return iterator_copy
229 return iterator_copy
230
230
231 def _escaper(self, string):
231 def _escaper(self, string):
232 """
232 """
233 Escaper for diff escapes special chars and checks the diff limit
233 Escaper for diff escapes special chars and checks the diff limit
234
234
235 :param string:
235 :param string:
236 :type string:
236 :type string:
237 """
237 """
238
238
239 self.cur_diff_size += len(string)
239 self.cur_diff_size += len(string)
240
240
241 # escaper get's iterated on each .next() call and it checks if each
241 # escaper get's iterated on each .next() call and it checks if each
242 # parsed line doesn't exceed the diff limit
242 # parsed line doesn't exceed the diff limit
243 if self.diff_limit is not None and self.cur_diff_size > self.diff_limit:
243 if self.diff_limit is not None and self.cur_diff_size > self.diff_limit:
244 raise DiffLimitExceeded('Diff Limit Exceeded')
244 raise DiffLimitExceeded('Diff Limit Exceeded')
245
245
246 return safe_unicode(string).replace('&', '&amp;')\
246 return safe_unicode(string).replace('&', '&amp;')\
247 .replace('<', '&lt;')\
247 .replace('<', '&lt;')\
248 .replace('>', '&gt;')
248 .replace('>', '&gt;')
249
249
250 def _line_counter(self, l):
250 def _line_counter(self, l):
251 """
251 """
252 Checks each line and bumps total adds/removes for this diff
252 Checks each line and bumps total adds/removes for this diff
253
253
254 :param l:
254 :param l:
255 """
255 """
256 if l.startswith('+') and not l.startswith('+++'):
256 if l.startswith('+') and not l.startswith('+++'):
257 self.adds += 1
257 self.adds += 1
258 elif l.startswith('-') and not l.startswith('---'):
258 elif l.startswith('-') and not l.startswith('---'):
259 self.removes += 1
259 self.removes += 1
260 return safe_unicode(l)
260 return safe_unicode(l)
261
261
262 def _highlight_line_difflib(self, line, next_):
262 def _highlight_line_difflib(self, line, next_):
263 """
263 """
264 Highlight inline changes in both lines.
264 Highlight inline changes in both lines.
265 """
265 """
266
266
267 if line['action'] == 'del':
267 if line['action'] == 'del':
268 old, new = line, next_
268 old, new = line, next_
269 else:
269 else:
270 old, new = next_, line
270 old, new = next_, line
271
271
272 oldwords = self._token_re.split(old['line'])
272 oldwords = self._token_re.split(old['line'])
273 newwords = self._token_re.split(new['line'])
273 newwords = self._token_re.split(new['line'])
274 sequence = difflib.SequenceMatcher(None, oldwords, newwords)
274 sequence = difflib.SequenceMatcher(None, oldwords, newwords)
275
275
276 oldfragments, newfragments = [], []
276 oldfragments, newfragments = [], []
277 for tag, i1, i2, j1, j2 in sequence.get_opcodes():
277 for tag, i1, i2, j1, j2 in sequence.get_opcodes():
278 oldfrag = ''.join(oldwords[i1:i2])
278 oldfrag = ''.join(oldwords[i1:i2])
279 newfrag = ''.join(newwords[j1:j2])
279 newfrag = ''.join(newwords[j1:j2])
280 if tag != 'equal':
280 if tag != 'equal':
281 if oldfrag:
281 if oldfrag:
282 oldfrag = '<del>%s</del>' % oldfrag
282 oldfrag = '<del>%s</del>' % oldfrag
283 if newfrag:
283 if newfrag:
284 newfrag = '<ins>%s</ins>' % newfrag
284 newfrag = '<ins>%s</ins>' % newfrag
285 oldfragments.append(oldfrag)
285 oldfragments.append(oldfrag)
286 newfragments.append(newfrag)
286 newfragments.append(newfrag)
287
287
288 old['line'] = "".join(oldfragments)
288 old['line'] = "".join(oldfragments)
289 new['line'] = "".join(newfragments)
289 new['line'] = "".join(newfragments)
290
290
291 def _highlight_line_udiff(self, line, next_):
291 def _highlight_line_udiff(self, line, next_):
292 """
292 """
293 Highlight inline changes in both lines.
293 Highlight inline changes in both lines.
294 """
294 """
295 start = 0
295 start = 0
296 limit = min(len(line['line']), len(next_['line']))
296 limit = min(len(line['line']), len(next_['line']))
297 while start < limit and line['line'][start] == next_['line'][start]:
297 while start < limit and line['line'][start] == next_['line'][start]:
298 start += 1
298 start += 1
299 end = -1
299 end = -1
300 limit -= start
300 limit -= start
301 while -end <= limit and line['line'][end] == next_['line'][end]:
301 while -end <= limit and line['line'][end] == next_['line'][end]:
302 end -= 1
302 end -= 1
303 end += 1
303 end += 1
304 if start or end:
304 if start or end:
305 def do(l):
305 def do(l):
306 last = end + len(l['line'])
306 last = end + len(l['line'])
307 if l['action'] == 'add':
307 if l['action'] == 'add':
308 tag = 'ins'
308 tag = 'ins'
309 else:
309 else:
310 tag = 'del'
310 tag = 'del'
311 l['line'] = '%s<%s>%s</%s>%s' % (
311 l['line'] = '%s<%s>%s</%s>%s' % (
312 l['line'][:start],
312 l['line'][:start],
313 tag,
313 tag,
314 l['line'][start:last],
314 l['line'][start:last],
315 tag,
315 tag,
316 l['line'][last:]
316 l['line'][last:]
317 )
317 )
318 do(line)
318 do(line)
319 do(next_)
319 do(next_)
320
320
321 def _get_header(self, diff_chunk):
321 def _get_header(self, diff_chunk):
322 """
322 """
323 parses the diff header, and returns parts, and leftover diff
323 parses the diff header, and returns parts, and leftover diff
324 parts consists of 14 elements::
324 parts consists of 14 elements::
325
325
326 a_path, b_path, similarity_index, rename_from, rename_to,
326 a_path, b_path, similarity_index, rename_from, rename_to,
327 old_mode, new_mode, new_file_mode, deleted_file_mode,
327 old_mode, new_mode, new_file_mode, deleted_file_mode,
328 a_blob_id, b_blob_id, b_mode, a_file, b_file
328 a_blob_id, b_blob_id, b_mode, a_file, b_file
329
329
330 :param diff_chunk:
330 :param diff_chunk:
331 :type diff_chunk:
331 :type diff_chunk:
332 """
332 """
333
333
334 if self.vcs == 'git':
334 if self.vcs == 'git':
335 match = self._git_header_re.match(diff_chunk)
335 match = self._git_header_re.match(diff_chunk)
336 diff = diff_chunk[match.end():]
336 diff = diff_chunk[match.end():]
337 return match.groupdict(), imap(self._escaper, diff.splitlines(1))
337 return match.groupdict(), imap(self._escaper, diff.splitlines(1))
338 elif self.vcs == 'hg':
338 elif self.vcs == 'hg':
339 match = self._hg_header_re.match(diff_chunk)
339 match = self._hg_header_re.match(diff_chunk)
340 diff = diff_chunk[match.end():]
340 diff = diff_chunk[match.end():]
341 return match.groupdict(), imap(self._escaper, diff.splitlines(1))
341 return match.groupdict(), imap(self._escaper, diff.splitlines(1))
342 else:
342 else:
343 raise Exception('VCS type %s is not supported' % self.vcs)
343 raise Exception('VCS type %s is not supported' % self.vcs)
344
344
345 def _clean_line(self, line, command):
345 def _clean_line(self, line, command):
346 if command in ['+', '-', ' ']:
346 if command in ['+', '-', ' ']:
347 #only modify the line if it's actually a diff thing
347 #only modify the line if it's actually a diff thing
348 line = line[1:]
348 line = line[1:]
349 return line
349 return line
350
350
351 def _parse_gitdiff(self, inline_diff=True):
351 def _parse_gitdiff(self, inline_diff=True):
352 _files = []
352 _files = []
353 diff_container = lambda arg: arg
353 diff_container = lambda arg: arg
354
354
355 ##split the diff in chunks of separate --git a/file b/file chunks
355 ##split the diff in chunks of separate --git a/file b/file chunks
356 for raw_diff in ('\n' + self._diff).split('\ndiff --git')[1:]:
356 for raw_diff in ('\n' + self._diff).split('\ndiff --git')[1:]:
357 head, diff = self._get_header(raw_diff)
357 head, diff = self._get_header(raw_diff)
358
358
359 op = None
359 op = None
360 stats = None
360 stats = None
361 msgs = []
361 msgs = []
362
362
363 if not head['a_file'] and head['b_file']:
364 op = 'A'
365 stats = ['b', NEW_FILENODE]
366 msgs.append('new file')
367 elif head['a_file'] and head['b_file']:
368 op = 'M'
369 stats = ['b', MOD_FILENODE]
370 elif head['a_file'] and not head['b_file']:
371 op = 'D'
372 stats = ['b', DEL_FILENODE]
373 msgs.append('deleted file')
374 else:
375 if head['deleted_file_mode']:
363 if head['deleted_file_mode']:
376 op = 'D'
364 op = 'D'
377 stats = ['b', DEL_FILENODE]
365 stats = ['b', DEL_FILENODE]
378 msgs.append('deleted file')
366 msgs.append('deleted file')
379 elif head['new_file_mode']:
367 elif head['new_file_mode']:
380 op = 'A'
368 op = 'A'
381 stats = ['b', NEW_FILENODE]
369 stats = ['b', NEW_FILENODE]
382 msgs.append('new file %s' % head['new_file_mode'])
370 msgs.append('new file %s' % head['new_file_mode'])
383 else:
371 else:
384 if head['new_mode'] and head['old_mode']:
372 if head['new_mode'] and head['old_mode']:
385 op = 'M'
373 op = 'M'
386 stats = ['b', CHMOD_FILENODE]
374 stats = ['b', CHMOD_FILENODE]
387 msgs.append('modified file chmod %s => %s'
375 msgs.append('modified file chmod %s => %s'
388 % (head['old_mode'], head['new_mode']))
376 % (head['old_mode'], head['new_mode']))
389 if (head['rename_from'] and head['rename_to']
377 if (head['rename_from'] and head['rename_to']
390 and head['rename_from'] != head['rename_to']):
378 and head['rename_from'] != head['rename_to']):
391 op = 'M'
379 op = 'M'
392 stats = ['b', RENAMED_FILENODE] # might overwrite CHMOD_FILENODE
380 stats = ['b', RENAMED_FILENODE] # might overwrite CHMOD_FILENODE
393 msgs.append('file renamed from %s to %s'
381 msgs.append('file renamed from %s to %s'
394 % (head['rename_from'], head['rename_to']))
382 % (head['rename_from'], head['rename_to']))
383 if op is None: # fall back: detect missed old style add or remove
384 if not head['a_file'] and head['b_file']:
385 op = 'A'
386 stats = ['b', NEW_FILENODE]
387 msgs.append('new file')
388 elif head['a_file'] and not head['b_file']:
389 op = 'D'
390 stats = ['b', DEL_FILENODE]
391 msgs.append('deleted file')
395 if op is None:
392 if op is None:
396 op = 'M'
393 op = 'M'
397 stats = ['b', MOD_FILENODE]
394 stats = ['b', MOD_FILENODE]
398
395
399 if head['a_file'] or head['b_file']: # a real diff
396 if head['a_file'] or head['b_file']: # a real diff
400 try:
397 try:
401 chunks, stats = self._parse_lines(diff)
398 chunks, stats = self._parse_lines(diff)
402 except DiffLimitExceeded:
399 except DiffLimitExceeded:
403 diff_container = lambda _diff: LimitedDiffContainer(
400 diff_container = lambda _diff: LimitedDiffContainer(
404 self.diff_limit,
401 self.diff_limit,
405 self.cur_diff_size,
402 self.cur_diff_size,
406 _diff)
403 _diff)
407 break
404 break
408 else: # GIT binary patch (or empty diff)
405 else: # GIT binary patch (or empty diff)
409 chunks = []
406 chunks = []
410 msgs.append('binary diff not shown') # or no diff because it was a rename or chmod or add/remove of empty file
407 msgs.append('binary diff not shown') # or no diff because it was a rename or chmod or add/remove of empty file
411
408
412 if msgs:
409 if msgs:
413 chunks.insert(0, [{
410 chunks.insert(0, [{
414 'old_lineno': '',
411 'old_lineno': '',
415 'new_lineno': '',
412 'new_lineno': '',
416 'action': 'binary',
413 'action': 'binary',
417 'line': msg,
414 'line': msg,
418 } for msg in msgs])
415 } for msg in msgs])
419
416
420 _files.append({
417 _files.append({
421 'filename': head['b_path'],
418 'filename': head['b_path'],
422 'old_revision': head['a_blob_id'],
419 'old_revision': head['a_blob_id'],
423 'new_revision': head['b_blob_id'],
420 'new_revision': head['b_blob_id'],
424 'chunks': chunks,
421 'chunks': chunks,
425 'operation': op,
422 'operation': op,
426 'stats': stats,
423 'stats': stats,
427 })
424 })
428
425
429 sorter = lambda info: {'A': 0, 'M': 1, 'D': 2}.get(info['operation'])
426 sorter = lambda info: {'A': 0, 'M': 1, 'D': 2}.get(info['operation'])
430
427
431 if not inline_diff:
428 if not inline_diff:
432 return diff_container(sorted(_files, key=sorter))
429 return diff_container(sorted(_files, key=sorter))
433
430
434 # highlight inline changes
431 # highlight inline changes
435 for diff_data in _files:
432 for diff_data in _files:
436 for chunk in diff_data['chunks']:
433 for chunk in diff_data['chunks']:
437 lineiter = iter(chunk)
434 lineiter = iter(chunk)
438 try:
435 try:
439 while 1:
436 while 1:
440 line = lineiter.next()
437 line = lineiter.next()
441 if line['action'] not in ['unmod', 'context']:
438 if line['action'] not in ['unmod', 'context']:
442 nextline = lineiter.next()
439 nextline = lineiter.next()
443 if nextline['action'] in ['unmod', 'context'] or \
440 if nextline['action'] in ['unmod', 'context'] or \
444 nextline['action'] == line['action']:
441 nextline['action'] == line['action']:
445 continue
442 continue
446 self.differ(line, nextline)
443 self.differ(line, nextline)
447 except StopIteration:
444 except StopIteration:
448 pass
445 pass
449
446
450 return diff_container(sorted(_files, key=sorter))
447 return diff_container(sorted(_files, key=sorter))
451
448
452 def _parse_udiff(self, inline_diff=True):
449 def _parse_udiff(self, inline_diff=True):
453 raise NotImplementedError()
450 raise NotImplementedError()
454
451
455 def _parse_lines(self, diff):
452 def _parse_lines(self, diff):
456 """
453 """
457 Parse the diff an return data for the template.
454 Parse the diff an return data for the template.
458 """
455 """
459
456
460 lineiter = iter(diff)
457 lineiter = iter(diff)
461 stats = [0, 0]
458 stats = [0, 0]
462
459
463 try:
460 try:
464 chunks = []
461 chunks = []
465 line = lineiter.next()
462 line = lineiter.next()
466
463
467 while line:
464 while line:
468 lines = []
465 lines = []
469 chunks.append(lines)
466 chunks.append(lines)
470
467
471 match = self._chunk_re.match(line)
468 match = self._chunk_re.match(line)
472
469
473 if not match:
470 if not match:
474 break
471 break
475
472
476 gr = match.groups()
473 gr = match.groups()
477 (old_line, old_end,
474 (old_line, old_end,
478 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
475 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
479 old_line -= 1
476 old_line -= 1
480 new_line -= 1
477 new_line -= 1
481
478
482 context = len(gr) == 5
479 context = len(gr) == 5
483 old_end += old_line
480 old_end += old_line
484 new_end += new_line
481 new_end += new_line
485
482
486 if context:
483 if context:
487 # skip context only if it's first line
484 # skip context only if it's first line
488 if int(gr[0]) > 1:
485 if int(gr[0]) > 1:
489 lines.append({
486 lines.append({
490 'old_lineno': '...',
487 'old_lineno': '...',
491 'new_lineno': '...',
488 'new_lineno': '...',
492 'action': 'context',
489 'action': 'context',
493 'line': line,
490 'line': line,
494 })
491 })
495
492
496 line = lineiter.next()
493 line = lineiter.next()
497
494
498 while old_line < old_end or new_line < new_end:
495 while old_line < old_end or new_line < new_end:
499 command = ' '
496 command = ' '
500 if line:
497 if line:
501 command = line[0]
498 command = line[0]
502
499
503 affects_old = affects_new = False
500 affects_old = affects_new = False
504
501
505 # ignore those if we don't expect them
502 # ignore those if we don't expect them
506 if command in '#@':
503 if command in '#@':
507 continue
504 continue
508 elif command == '+':
505 elif command == '+':
509 affects_new = True
506 affects_new = True
510 action = 'add'
507 action = 'add'
511 stats[0] += 1
508 stats[0] += 1
512 elif command == '-':
509 elif command == '-':
513 affects_old = True
510 affects_old = True
514 action = 'del'
511 action = 'del'
515 stats[1] += 1
512 stats[1] += 1
516 else:
513 else:
517 affects_old = affects_new = True
514 affects_old = affects_new = True
518 action = 'unmod'
515 action = 'unmod'
519
516
520 if not self._newline_marker.match(line):
517 if not self._newline_marker.match(line):
521 old_line += affects_old
518 old_line += affects_old
522 new_line += affects_new
519 new_line += affects_new
523 lines.append({
520 lines.append({
524 'old_lineno': affects_old and old_line or '',
521 'old_lineno': affects_old and old_line or '',
525 'new_lineno': affects_new and new_line or '',
522 'new_lineno': affects_new and new_line or '',
526 'action': action,
523 'action': action,
527 'line': self._clean_line(line, command)
524 'line': self._clean_line(line, command)
528 })
525 })
529
526
530 line = lineiter.next()
527 line = lineiter.next()
531
528
532 if self._newline_marker.match(line):
529 if self._newline_marker.match(line):
533 # we need to append to lines, since this is not
530 # we need to append to lines, since this is not
534 # counted in the line specs of diff
531 # counted in the line specs of diff
535 lines.append({
532 lines.append({
536 'old_lineno': '...',
533 'old_lineno': '...',
537 'new_lineno': '...',
534 'new_lineno': '...',
538 'action': 'context',
535 'action': 'context',
539 'line': self._clean_line(line, command)
536 'line': self._clean_line(line, command)
540 })
537 })
541
538
542 except StopIteration:
539 except StopIteration:
543 pass
540 pass
544 return chunks, stats
541 return chunks, stats
545
542
546 def _safe_id(self, idstring):
543 def _safe_id(self, idstring):
547 """Make a string safe for including in an id attribute.
544 """Make a string safe for including in an id attribute.
548
545
549 The HTML spec says that id attributes 'must begin with
546 The HTML spec says that id attributes 'must begin with
550 a letter ([A-Za-z]) and may be followed by any number
547 a letter ([A-Za-z]) and may be followed by any number
551 of letters, digits ([0-9]), hyphens ("-"), underscores
548 of letters, digits ([0-9]), hyphens ("-"), underscores
552 ("_"), colons (":"), and periods (".")'. These regexps
549 ("_"), colons (":"), and periods (".")'. These regexps
553 are slightly over-zealous, in that they remove colons
550 are slightly over-zealous, in that they remove colons
554 and periods unnecessarily.
551 and periods unnecessarily.
555
552
556 Whitespace is transformed into underscores, and then
553 Whitespace is transformed into underscores, and then
557 anything which is not a hyphen or a character that
554 anything which is not a hyphen or a character that
558 matches \w (alphanumerics and underscore) is removed.
555 matches \w (alphanumerics and underscore) is removed.
559
556
560 """
557 """
561 # Transform all whitespace to underscore
558 # Transform all whitespace to underscore
562 idstring = re.sub(r'\s', "_", '%s' % idstring)
559 idstring = re.sub(r'\s', "_", '%s' % idstring)
563 # Remove everything that is not a hyphen or a member of \w
560 # Remove everything that is not a hyphen or a member of \w
564 idstring = re.sub(r'(?!-)\W', "", idstring).lower()
561 idstring = re.sub(r'(?!-)\W', "", idstring).lower()
565 return idstring
562 return idstring
566
563
567 def prepare(self, inline_diff=True):
564 def prepare(self, inline_diff=True):
568 """
565 """
569 Prepare the passed udiff for HTML rendering. It'l return a list
566 Prepare the passed udiff for HTML rendering. It'l return a list
570 of dicts with diff information
567 of dicts with diff information
571 """
568 """
572 parsed = self._parser(inline_diff=inline_diff)
569 parsed = self._parser(inline_diff=inline_diff)
573 self.parsed = True
570 self.parsed = True
574 self.parsed_diff = parsed
571 self.parsed_diff = parsed
575 return parsed
572 return parsed
576
573
577 def as_raw(self, diff_lines=None):
574 def as_raw(self, diff_lines=None):
578 """
575 """
579 Returns raw string diff
576 Returns raw string diff
580 """
577 """
581 return self._diff
578 return self._diff
582 #return u''.join(imap(self._line_counter, self._diff.splitlines(1)))
579 #return u''.join(imap(self._line_counter, self._diff.splitlines(1)))
583
580
584 def as_html(self, table_class='code-difftable', line_class='line',
581 def as_html(self, table_class='code-difftable', line_class='line',
585 old_lineno_class='lineno old', new_lineno_class='lineno new',
582 old_lineno_class='lineno old', new_lineno_class='lineno new',
586 code_class='code', enable_comments=False, parsed_lines=None):
583 code_class='code', enable_comments=False, parsed_lines=None):
587 """
584 """
588 Return given diff as html table with customized css classes
585 Return given diff as html table with customized css classes
589 """
586 """
590 def _link_to_if(condition, label, url):
587 def _link_to_if(condition, label, url):
591 """
588 """
592 Generates a link if condition is meet or just the label if not.
589 Generates a link if condition is meet or just the label if not.
593 """
590 """
594
591
595 if condition:
592 if condition:
596 return '''<a href="%(url)s">%(label)s</a>''' % {
593 return '''<a href="%(url)s">%(label)s</a>''' % {
597 'url': url,
594 'url': url,
598 'label': label
595 'label': label
599 }
596 }
600 else:
597 else:
601 return label
598 return label
602 if not self.parsed:
599 if not self.parsed:
603 self.prepare()
600 self.prepare()
604
601
605 diff_lines = self.parsed_diff
602 diff_lines = self.parsed_diff
606 if parsed_lines:
603 if parsed_lines:
607 diff_lines = parsed_lines
604 diff_lines = parsed_lines
608
605
609 _html_empty = True
606 _html_empty = True
610 _html = []
607 _html = []
611 _html.append('''<table class="%(table_class)s">\n''' % {
608 _html.append('''<table class="%(table_class)s">\n''' % {
612 'table_class': table_class
609 'table_class': table_class
613 })
610 })
614
611
615 for diff in diff_lines:
612 for diff in diff_lines:
616 for line in diff['chunks']:
613 for line in diff['chunks']:
617 _html_empty = False
614 _html_empty = False
618 for change in line:
615 for change in line:
619 _html.append('''<tr class="%(lc)s %(action)s">\n''' % {
616 _html.append('''<tr class="%(lc)s %(action)s">\n''' % {
620 'lc': line_class,
617 'lc': line_class,
621 'action': change['action']
618 'action': change['action']
622 })
619 })
623 anchor_old_id = ''
620 anchor_old_id = ''
624 anchor_new_id = ''
621 anchor_new_id = ''
625 anchor_old = "%(filename)s_o%(oldline_no)s" % {
622 anchor_old = "%(filename)s_o%(oldline_no)s" % {
626 'filename': self._safe_id(diff['filename']),
623 'filename': self._safe_id(diff['filename']),
627 'oldline_no': change['old_lineno']
624 'oldline_no': change['old_lineno']
628 }
625 }
629 anchor_new = "%(filename)s_n%(oldline_no)s" % {
626 anchor_new = "%(filename)s_n%(oldline_no)s" % {
630 'filename': self._safe_id(diff['filename']),
627 'filename': self._safe_id(diff['filename']),
631 'oldline_no': change['new_lineno']
628 'oldline_no': change['new_lineno']
632 }
629 }
633 cond_old = (change['old_lineno'] != '...' and
630 cond_old = (change['old_lineno'] != '...' and
634 change['old_lineno'])
631 change['old_lineno'])
635 cond_new = (change['new_lineno'] != '...' and
632 cond_new = (change['new_lineno'] != '...' and
636 change['new_lineno'])
633 change['new_lineno'])
637 if cond_old:
634 if cond_old:
638 anchor_old_id = 'id="%s"' % anchor_old
635 anchor_old_id = 'id="%s"' % anchor_old
639 if cond_new:
636 if cond_new:
640 anchor_new_id = 'id="%s"' % anchor_new
637 anchor_new_id = 'id="%s"' % anchor_new
641 ###########################################################
638 ###########################################################
642 # OLD LINE NUMBER
639 # OLD LINE NUMBER
643 ###########################################################
640 ###########################################################
644 _html.append('''\t<td %(a_id)s class="%(olc)s">''' % {
641 _html.append('''\t<td %(a_id)s class="%(olc)s">''' % {
645 'a_id': anchor_old_id,
642 'a_id': anchor_old_id,
646 'olc': old_lineno_class
643 'olc': old_lineno_class
647 })
644 })
648
645
649 _html.append('''%(link)s''' % {
646 _html.append('''%(link)s''' % {
650 'link': _link_to_if(True, change['old_lineno'],
647 'link': _link_to_if(True, change['old_lineno'],
651 '#%s' % anchor_old)
648 '#%s' % anchor_old)
652 })
649 })
653 _html.append('''</td>\n''')
650 _html.append('''</td>\n''')
654 ###########################################################
651 ###########################################################
655 # NEW LINE NUMBER
652 # NEW LINE NUMBER
656 ###########################################################
653 ###########################################################
657
654
658 _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
655 _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
659 'a_id': anchor_new_id,
656 'a_id': anchor_new_id,
660 'nlc': new_lineno_class
657 'nlc': new_lineno_class
661 })
658 })
662
659
663 _html.append('''%(link)s''' % {
660 _html.append('''%(link)s''' % {
664 'link': _link_to_if(True, change['new_lineno'],
661 'link': _link_to_if(True, change['new_lineno'],
665 '#%s' % anchor_new)
662 '#%s' % anchor_new)
666 })
663 })
667 _html.append('''</td>\n''')
664 _html.append('''</td>\n''')
668 ###########################################################
665 ###########################################################
669 # CODE
666 # CODE
670 ###########################################################
667 ###########################################################
671 comments = '' if enable_comments else 'no-comment'
668 comments = '' if enable_comments else 'no-comment'
672 _html.append('''\t<td class="%(cc)s %(inc)s">''' % {
669 _html.append('''\t<td class="%(cc)s %(inc)s">''' % {
673 'cc': code_class,
670 'cc': code_class,
674 'inc': comments
671 'inc': comments
675 })
672 })
676 _html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {
673 _html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {
677 'code': change['line']
674 'code': change['line']
678 })
675 })
679
676
680 _html.append('''\t</td>''')
677 _html.append('''\t</td>''')
681 _html.append('''\n</tr>\n''')
678 _html.append('''\n</tr>\n''')
682 _html.append('''</table>''')
679 _html.append('''</table>''')
683 if _html_empty:
680 if _html_empty:
684 return None
681 return None
685 return ''.join(_html)
682 return ''.join(_html)
686
683
687 def stat(self):
684 def stat(self):
688 """
685 """
689 Returns tuple of added, and removed lines for this instance
686 Returns tuple of added, and removed lines for this instance
690 """
687 """
691 return self.adds, self.removes
688 return self.adds, self.removes
General Comments 0
You need to be logged in to leave comments. Login now