##// END OF EJS Templates
fix some small unicode issues with logging on differ
marcink -
r3366:c72dbcad beta
parent child Browse files
Show More
@@ -1,716 +1,717 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2 """
2 """
3 rhodecode.lib.diffs
3 rhodecode.lib.diffs
4 ~~~~~~~~~~~~~~~~~~~
4 ~~~~~~~~~~~~~~~~~~~
5
5
6 Set of diffing helpers, previously part of vcs
6 Set of diffing helpers, previously part of vcs
7
7
8
8
9 :created_on: Dec 4, 2011
9 :created_on: Dec 4, 2011
10 :author: marcink
10 :author: marcink
11 :copyright: (C) 2010-2012 Marcin Kuzminski <marcin@python-works.com>
11 :copyright: (C) 2010-2012 Marcin Kuzminski <marcin@python-works.com>
12 :original copyright: 2007-2008 by Armin Ronacher
12 :original copyright: 2007-2008 by Armin Ronacher
13 :license: GPLv3, see COPYING for more details.
13 :license: GPLv3, see COPYING for more details.
14 """
14 """
15 # This program is free software: you can redistribute it and/or modify
15 # This program is free software: you can redistribute it and/or modify
16 # it under the terms of the GNU General Public License as published by
16 # it under the terms of the GNU General Public License as published by
17 # the Free Software Foundation, either version 3 of the License, or
17 # the Free Software Foundation, either version 3 of the License, or
18 # (at your option) any later version.
18 # (at your option) any later version.
19 #
19 #
20 # This program is distributed in the hope that it will be useful,
20 # This program is distributed in the hope that it will be useful,
21 # but WITHOUT ANY WARRANTY; without even the implied warranty of
21 # but WITHOUT ANY WARRANTY; without even the implied warranty of
22 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 # GNU General Public License for more details.
23 # GNU General Public License for more details.
24 #
24 #
25 # You should have received a copy of the GNU General Public License
25 # You should have received a copy of the GNU General Public License
26 # along with this program. If not, see <http://www.gnu.org/licenses/>.
26 # along with this program. If not, see <http://www.gnu.org/licenses/>.
27
27
28 import re
28 import re
29 import difflib
29 import difflib
30 import logging
30 import logging
31
31
32 from itertools import tee, imap
32 from itertools import tee, imap
33
33
34 from pylons.i18n.translation import _
34 from pylons.i18n.translation import _
35
35
36 from rhodecode.lib.vcs.exceptions import VCSError
36 from rhodecode.lib.vcs.exceptions import VCSError
37 from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode
37 from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode
38 from rhodecode.lib.vcs.backends.base import EmptyChangeset
38 from rhodecode.lib.vcs.backends.base import EmptyChangeset
39 from rhodecode.lib.helpers import escape
39 from rhodecode.lib.helpers import escape
40 from rhodecode.lib.utils2 import safe_unicode
40 from rhodecode.lib.utils2 import safe_unicode, safe_str
41
41
42 log = logging.getLogger(__name__)
42 log = logging.getLogger(__name__)
43
43
44
44
45 def wrap_to_table(str_):
45 def wrap_to_table(str_):
46 return '''<table class="code-difftable">
46 return '''<table class="code-difftable">
47 <tr class="line no-comment">
47 <tr class="line no-comment">
48 <td class="lineno new"></td>
48 <td class="lineno new"></td>
49 <td class="code no-comment"><pre>%s</pre></td>
49 <td class="code no-comment"><pre>%s</pre></td>
50 </tr>
50 </tr>
51 </table>''' % str_
51 </table>''' % str_
52
52
53
53
54 def wrapped_diff(filenode_old, filenode_new, cut_off_limit=None,
54 def wrapped_diff(filenode_old, filenode_new, cut_off_limit=None,
55 ignore_whitespace=True, line_context=3,
55 ignore_whitespace=True, line_context=3,
56 enable_comments=False):
56 enable_comments=False):
57 """
57 """
58 returns a wrapped diff into a table, checks for cut_off_limit and presents
58 returns a wrapped diff into a table, checks for cut_off_limit and presents
59 proper message
59 proper message
60 """
60 """
61
61
62 if filenode_old is None:
62 if filenode_old is None:
63 filenode_old = FileNode(filenode_new.path, '', EmptyChangeset())
63 filenode_old = FileNode(filenode_new.path, '', EmptyChangeset())
64
64
65 if filenode_old.is_binary or filenode_new.is_binary:
65 if filenode_old.is_binary or filenode_new.is_binary:
66 diff = wrap_to_table(_('binary file'))
66 diff = wrap_to_table(_('binary file'))
67 stats = (0, 0)
67 stats = (0, 0)
68 size = 0
68 size = 0
69
69
70 elif cut_off_limit != -1 and (cut_off_limit is None or
70 elif cut_off_limit != -1 and (cut_off_limit is None or
71 (filenode_old.size < cut_off_limit and filenode_new.size < cut_off_limit)):
71 (filenode_old.size < cut_off_limit and filenode_new.size < cut_off_limit)):
72
72
73 f_gitdiff = get_gitdiff(filenode_old, filenode_new,
73 f_gitdiff = get_gitdiff(filenode_old, filenode_new,
74 ignore_whitespace=ignore_whitespace,
74 ignore_whitespace=ignore_whitespace,
75 context=line_context)
75 context=line_context)
76 diff_processor = DiffProcessor(f_gitdiff, format='gitdiff')
76 diff_processor = DiffProcessor(f_gitdiff, format='gitdiff')
77
77
78 diff = diff_processor.as_html(enable_comments=enable_comments)
78 diff = diff_processor.as_html(enable_comments=enable_comments)
79 stats = diff_processor.stat()
79 stats = diff_processor.stat()
80 size = len(diff or '')
80 size = len(diff or '')
81 else:
81 else:
82 diff = wrap_to_table(_('Changeset was too big and was cut off, use '
82 diff = wrap_to_table(_('Changeset was too big and was cut off, use '
83 'diff menu to display this diff'))
83 'diff menu to display this diff'))
84 stats = (0, 0)
84 stats = (0, 0)
85 size = 0
85 size = 0
86 if not diff:
86 if not diff:
87 submodules = filter(lambda o: isinstance(o, SubModuleNode),
87 submodules = filter(lambda o: isinstance(o, SubModuleNode),
88 [filenode_new, filenode_old])
88 [filenode_new, filenode_old])
89 if submodules:
89 if submodules:
90 diff = wrap_to_table(escape('Submodule %r' % submodules[0]))
90 diff = wrap_to_table(escape('Submodule %r' % submodules[0]))
91 else:
91 else:
92 diff = wrap_to_table(_('No changes detected'))
92 diff = wrap_to_table(_('No changes detected'))
93
93
94 cs1 = filenode_old.changeset.raw_id
94 cs1 = filenode_old.changeset.raw_id
95 cs2 = filenode_new.changeset.raw_id
95 cs2 = filenode_new.changeset.raw_id
96
96
97 return size, cs1, cs2, diff, stats
97 return size, cs1, cs2, diff, stats
98
98
99
99
100 def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
100 def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
101 """
101 """
102 Returns git style diff between given ``filenode_old`` and ``filenode_new``.
102 Returns git style diff between given ``filenode_old`` and ``filenode_new``.
103
103
104 :param ignore_whitespace: ignore whitespaces in diff
104 :param ignore_whitespace: ignore whitespaces in diff
105 """
105 """
106 # make sure we pass in default context
106 # make sure we pass in default context
107 context = context or 3
107 context = context or 3
108 submodules = filter(lambda o: isinstance(o, SubModuleNode),
108 submodules = filter(lambda o: isinstance(o, SubModuleNode),
109 [filenode_new, filenode_old])
109 [filenode_new, filenode_old])
110 if submodules:
110 if submodules:
111 return ''
111 return ''
112
112
113 for filenode in (filenode_old, filenode_new):
113 for filenode in (filenode_old, filenode_new):
114 if not isinstance(filenode, FileNode):
114 if not isinstance(filenode, FileNode):
115 raise VCSError("Given object should be FileNode object, not %s"
115 raise VCSError("Given object should be FileNode object, not %s"
116 % filenode.__class__)
116 % filenode.__class__)
117
117
118 repo = filenode_new.changeset.repository
118 repo = filenode_new.changeset.repository
119 old_raw_id = getattr(filenode_old.changeset, 'raw_id', repo.EMPTY_CHANGESET)
119 old_raw_id = getattr(filenode_old.changeset, 'raw_id', repo.EMPTY_CHANGESET)
120 new_raw_id = getattr(filenode_new.changeset, 'raw_id', repo.EMPTY_CHANGESET)
120 new_raw_id = getattr(filenode_new.changeset, 'raw_id', repo.EMPTY_CHANGESET)
121
121
122 vcs_gitdiff = repo.get_diff(old_raw_id, new_raw_id, filenode_new.path,
122 vcs_gitdiff = repo.get_diff(old_raw_id, new_raw_id, filenode_new.path,
123 ignore_whitespace, context)
123 ignore_whitespace, context)
124 return vcs_gitdiff
124 return vcs_gitdiff
125
125
126 NEW_FILENODE = 1
126 NEW_FILENODE = 1
127 DEL_FILENODE = 2
127 DEL_FILENODE = 2
128 MOD_FILENODE = 3
128 MOD_FILENODE = 3
129 RENAMED_FILENODE = 4
129 RENAMED_FILENODE = 4
130 CHMOD_FILENODE = 5
130 CHMOD_FILENODE = 5
131
131
132
132
133 class DiffLimitExceeded(Exception):
133 class DiffLimitExceeded(Exception):
134 pass
134 pass
135
135
136
136
137 class LimitedDiffContainer(object):
137 class LimitedDiffContainer(object):
138
138
139 def __init__(self, diff_limit, cur_diff_size, diff):
139 def __init__(self, diff_limit, cur_diff_size, diff):
140 self.diff = diff
140 self.diff = diff
141 self.diff_limit = diff_limit
141 self.diff_limit = diff_limit
142 self.cur_diff_size = cur_diff_size
142 self.cur_diff_size = cur_diff_size
143
143
144 def __iter__(self):
144 def __iter__(self):
145 for l in self.diff:
145 for l in self.diff:
146 yield l
146 yield l
147
147
148
148
149 class DiffProcessor(object):
149 class DiffProcessor(object):
150 """
150 """
151 Give it a unified or git diff and it returns a list of the files that were
151 Give it a unified or git diff and it returns a list of the files that were
152 mentioned in the diff together with a dict of meta information that
152 mentioned in the diff together with a dict of meta information that
153 can be used to render it in a HTML template.
153 can be used to render it in a HTML template.
154 """
154 """
155 _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
155 _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
156 _newline_marker = re.compile(r'^\\ No newline at end of file')
156 _newline_marker = re.compile(r'^\\ No newline at end of file')
157 _git_header_re = re.compile(r"""
157 _git_header_re = re.compile(r"""
158 #^diff[ ]--git
158 #^diff[ ]--git
159 [ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n
159 [ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n
160 (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%\n
160 (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%\n
161 ^rename[ ]from[ ](?P<rename_from>\S+)\n
161 ^rename[ ]from[ ](?P<rename_from>\S+)\n
162 ^rename[ ]to[ ](?P<rename_to>\S+)(?:\n|$))?
162 ^rename[ ]to[ ](?P<rename_to>\S+)(?:\n|$))?
163 (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
163 (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
164 ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
164 ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
165 (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
165 (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
166 (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
166 (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
167 (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
167 (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
168 \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
168 \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
169 (?:^---[ ](a/(?P<a_file>.+)|/dev/null)(?:\n|$))?
169 (?:^---[ ](a/(?P<a_file>.+)|/dev/null)(?:\n|$))?
170 (?:^\+\+\+[ ](b/(?P<b_file>.+)|/dev/null)(?:\n|$))?
170 (?:^\+\+\+[ ](b/(?P<b_file>.+)|/dev/null)(?:\n|$))?
171 """, re.VERBOSE | re.MULTILINE)
171 """, re.VERBOSE | re.MULTILINE)
172 _hg_header_re = re.compile(r"""
172 _hg_header_re = re.compile(r"""
173 #^diff[ ]--git
173 #^diff[ ]--git
174 [ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n
174 [ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n
175 (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%(?:\n|$))?
175 (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%(?:\n|$))?
176 (?:^rename[ ]from[ ](?P<rename_from>\S+)\n
176 (?:^rename[ ]from[ ](?P<rename_from>\S+)\n
177 ^rename[ ]to[ ](?P<rename_to>\S+)(?:\n|$))?
177 ^rename[ ]to[ ](?P<rename_to>\S+)(?:\n|$))?
178 (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
178 (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
179 ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
179 ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
180 (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
180 (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
181 (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
181 (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
182 (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
182 (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
183 \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
183 \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
184 (?:^---[ ](a/(?P<a_file>.+)|/dev/null)(?:\n|$))?
184 (?:^---[ ](a/(?P<a_file>.+)|/dev/null)(?:\n|$))?
185 (?:^\+\+\+[ ](b/(?P<b_file>.+)|/dev/null)(?:\n|$))?
185 (?:^\+\+\+[ ](b/(?P<b_file>.+)|/dev/null)(?:\n|$))?
186 """, re.VERBOSE | re.MULTILINE)
186 """, re.VERBOSE | re.MULTILINE)
187
187
188 #used for inline highlighter word split
188 #used for inline highlighter word split
189 _token_re = re.compile(r'()(&gt;|&lt;|&amp;|\W+?)')
189 _token_re = re.compile(r'()(&gt;|&lt;|&amp;|\W+?)')
190
190
191 def __init__(self, diff, vcs='hg', format='gitdiff', diff_limit=None):
191 def __init__(self, diff, vcs='hg', format='gitdiff', diff_limit=None):
192 """
192 """
193 :param diff: a text in diff format
193 :param diff: a text in diff format
194 :param vcs: type of version controll hg or git
194 :param vcs: type of version controll hg or git
195 :param format: format of diff passed, `udiff` or `gitdiff`
195 :param format: format of diff passed, `udiff` or `gitdiff`
196 :param diff_limit: define the size of diff that is considered "big"
196 :param diff_limit: define the size of diff that is considered "big"
197 based on that parameter cut off will be triggered, set to None
197 based on that parameter cut off will be triggered, set to None
198 to show full diff
198 to show full diff
199 """
199 """
200 if not isinstance(diff, basestring):
200 if not isinstance(diff, basestring):
201 raise Exception('Diff must be a basestring got %s instead' % type(diff))
201 raise Exception('Diff must be a basestring got %s instead' % type(diff))
202
202
203 self._diff = diff
203 self._diff = diff
204 self._format = format
204 self._format = format
205 self.adds = 0
205 self.adds = 0
206 self.removes = 0
206 self.removes = 0
207 # calculate diff size
207 # calculate diff size
208 self.diff_size = len(diff)
208 self.diff_size = len(diff)
209 self.diff_limit = diff_limit
209 self.diff_limit = diff_limit
210 self.cur_diff_size = 0
210 self.cur_diff_size = 0
211 self.parsed = False
211 self.parsed = False
212 self.parsed_diff = []
212 self.parsed_diff = []
213 self.vcs = vcs
213 self.vcs = vcs
214
214
215 if format == 'gitdiff':
215 if format == 'gitdiff':
216 self.differ = self._highlight_line_difflib
216 self.differ = self._highlight_line_difflib
217 self._parser = self._parse_gitdiff
217 self._parser = self._parse_gitdiff
218 else:
218 else:
219 self.differ = self._highlight_line_udiff
219 self.differ = self._highlight_line_udiff
220 self._parser = self._parse_udiff
220 self._parser = self._parse_udiff
221
221
222 def _copy_iterator(self):
222 def _copy_iterator(self):
223 """
223 """
224 make a fresh copy of generator, we should not iterate thru
224 make a fresh copy of generator, we should not iterate thru
225 an original as it's needed for repeating operations on
225 an original as it's needed for repeating operations on
226 this instance of DiffProcessor
226 this instance of DiffProcessor
227 """
227 """
228 self.__udiff, iterator_copy = tee(self.__udiff)
228 self.__udiff, iterator_copy = tee(self.__udiff)
229 return iterator_copy
229 return iterator_copy
230
230
231 def _escaper(self, string):
231 def _escaper(self, string):
232 """
232 """
233 Escaper for diff escapes special chars and checks the diff limit
233 Escaper for diff escapes special chars and checks the diff limit
234
234
235 :param string:
235 :param string:
236 :type string:
236 :type string:
237 """
237 """
238
238
239 self.cur_diff_size += len(string)
239 self.cur_diff_size += len(string)
240
240
241 # escaper get's iterated on each .next() call and it checks if each
241 # escaper get's iterated on each .next() call and it checks if each
242 # parsed line doesn't exceed the diff limit
242 # parsed line doesn't exceed the diff limit
243 if self.diff_limit is not None and self.cur_diff_size > self.diff_limit:
243 if self.diff_limit is not None and self.cur_diff_size > self.diff_limit:
244 raise DiffLimitExceeded('Diff Limit Exceeded')
244 raise DiffLimitExceeded('Diff Limit Exceeded')
245
245
246 return safe_unicode(string).replace('&', '&amp;')\
246 return safe_unicode(string).replace('&', '&amp;')\
247 .replace('<', '&lt;')\
247 .replace('<', '&lt;')\
248 .replace('>', '&gt;')
248 .replace('>', '&gt;')
249
249
250 def _line_counter(self, l):
250 def _line_counter(self, l):
251 """
251 """
252 Checks each line and bumps total adds/removes for this diff
252 Checks each line and bumps total adds/removes for this diff
253
253
254 :param l:
254 :param l:
255 """
255 """
256 if l.startswith('+') and not l.startswith('+++'):
256 if l.startswith('+') and not l.startswith('+++'):
257 self.adds += 1
257 self.adds += 1
258 elif l.startswith('-') and not l.startswith('---'):
258 elif l.startswith('-') and not l.startswith('---'):
259 self.removes += 1
259 self.removes += 1
260 return safe_unicode(l)
260 return safe_unicode(l)
261
261
262 def _highlight_line_difflib(self, line, next_):
262 def _highlight_line_difflib(self, line, next_):
263 """
263 """
264 Highlight inline changes in both lines.
264 Highlight inline changes in both lines.
265 """
265 """
266
266
267 if line['action'] == 'del':
267 if line['action'] == 'del':
268 old, new = line, next_
268 old, new = line, next_
269 else:
269 else:
270 old, new = next_, line
270 old, new = next_, line
271
271
272 oldwords = self._token_re.split(old['line'])
272 oldwords = self._token_re.split(old['line'])
273 newwords = self._token_re.split(new['line'])
273 newwords = self._token_re.split(new['line'])
274 sequence = difflib.SequenceMatcher(None, oldwords, newwords)
274 sequence = difflib.SequenceMatcher(None, oldwords, newwords)
275
275
276 oldfragments, newfragments = [], []
276 oldfragments, newfragments = [], []
277 for tag, i1, i2, j1, j2 in sequence.get_opcodes():
277 for tag, i1, i2, j1, j2 in sequence.get_opcodes():
278 oldfrag = ''.join(oldwords[i1:i2])
278 oldfrag = ''.join(oldwords[i1:i2])
279 newfrag = ''.join(newwords[j1:j2])
279 newfrag = ''.join(newwords[j1:j2])
280 if tag != 'equal':
280 if tag != 'equal':
281 if oldfrag:
281 if oldfrag:
282 oldfrag = '<del>%s</del>' % oldfrag
282 oldfrag = '<del>%s</del>' % oldfrag
283 if newfrag:
283 if newfrag:
284 newfrag = '<ins>%s</ins>' % newfrag
284 newfrag = '<ins>%s</ins>' % newfrag
285 oldfragments.append(oldfrag)
285 oldfragments.append(oldfrag)
286 newfragments.append(newfrag)
286 newfragments.append(newfrag)
287
287
288 old['line'] = "".join(oldfragments)
288 old['line'] = "".join(oldfragments)
289 new['line'] = "".join(newfragments)
289 new['line'] = "".join(newfragments)
290
290
291 def _highlight_line_udiff(self, line, next_):
291 def _highlight_line_udiff(self, line, next_):
292 """
292 """
293 Highlight inline changes in both lines.
293 Highlight inline changes in both lines.
294 """
294 """
295 start = 0
295 start = 0
296 limit = min(len(line['line']), len(next_['line']))
296 limit = min(len(line['line']), len(next_['line']))
297 while start < limit and line['line'][start] == next_['line'][start]:
297 while start < limit and line['line'][start] == next_['line'][start]:
298 start += 1
298 start += 1
299 end = -1
299 end = -1
300 limit -= start
300 limit -= start
301 while -end <= limit and line['line'][end] == next_['line'][end]:
301 while -end <= limit and line['line'][end] == next_['line'][end]:
302 end -= 1
302 end -= 1
303 end += 1
303 end += 1
304 if start or end:
304 if start or end:
305 def do(l):
305 def do(l):
306 last = end + len(l['line'])
306 last = end + len(l['line'])
307 if l['action'] == 'add':
307 if l['action'] == 'add':
308 tag = 'ins'
308 tag = 'ins'
309 else:
309 else:
310 tag = 'del'
310 tag = 'del'
311 l['line'] = '%s<%s>%s</%s>%s' % (
311 l['line'] = '%s<%s>%s</%s>%s' % (
312 l['line'][:start],
312 l['line'][:start],
313 tag,
313 tag,
314 l['line'][start:last],
314 l['line'][start:last],
315 tag,
315 tag,
316 l['line'][last:]
316 l['line'][last:]
317 )
317 )
318 do(line)
318 do(line)
319 do(next_)
319 do(next_)
320
320
321 def _get_header(self, diff_chunk):
321 def _get_header(self, diff_chunk):
322 """
322 """
323 parses the diff header, and returns parts, and leftover diff
323 parses the diff header, and returns parts, and leftover diff
324 parts consists of 14 elements::
324 parts consists of 14 elements::
325
325
326 a_path, b_path, similarity_index, rename_from, rename_to,
326 a_path, b_path, similarity_index, rename_from, rename_to,
327 old_mode, new_mode, new_file_mode, deleted_file_mode,
327 old_mode, new_mode, new_file_mode, deleted_file_mode,
328 a_blob_id, b_blob_id, b_mode, a_file, b_file
328 a_blob_id, b_blob_id, b_mode, a_file, b_file
329
329
330 :param diff_chunk:
330 :param diff_chunk:
331 :type diff_chunk:
331 :type diff_chunk:
332 """
332 """
333
333
334 if self.vcs == 'git':
334 if self.vcs == 'git':
335 match = self._git_header_re.match(diff_chunk)
335 match = self._git_header_re.match(diff_chunk)
336 diff = diff_chunk[match.end():]
336 diff = diff_chunk[match.end():]
337 return match.groupdict(), imap(self._escaper, diff.splitlines(1))
337 return match.groupdict(), imap(self._escaper, diff.splitlines(1))
338 elif self.vcs == 'hg':
338 elif self.vcs == 'hg':
339 match = self._hg_header_re.match(diff_chunk)
339 match = self._hg_header_re.match(diff_chunk)
340 diff = diff_chunk[match.end():]
340 diff = diff_chunk[match.end():]
341 return match.groupdict(), imap(self._escaper, diff.splitlines(1))
341 return match.groupdict(), imap(self._escaper, diff.splitlines(1))
342 else:
342 else:
343 raise Exception('VCS type %s is not supported' % self.vcs)
343 raise Exception('VCS type %s is not supported' % self.vcs)
344
344
345 def _clean_line(self, line, command):
345 def _clean_line(self, line, command):
346 if command in ['+', '-', ' ']:
346 if command in ['+', '-', ' ']:
347 #only modify the line if it's actually a diff thing
347 #only modify the line if it's actually a diff thing
348 line = line[1:]
348 line = line[1:]
349 return line
349 return line
350
350
351 def _parse_gitdiff(self, inline_diff=True):
351 def _parse_gitdiff(self, inline_diff=True):
352 _files = []
352 _files = []
353 diff_container = lambda arg: arg
353 diff_container = lambda arg: arg
354
354
355 ##split the diff in chunks of separate --git a/file b/file chunks
355 ##split the diff in chunks of separate --git a/file b/file chunks
356 for raw_diff in ('\n' + self._diff).split('\ndiff --git')[1:]:
356 for raw_diff in ('\n' + self._diff).split('\ndiff --git')[1:]:
357 binary = False
357 binary = False
358 binary_msg = 'unknown binary'
358 binary_msg = 'unknown binary'
359 head, diff = self._get_header(raw_diff)
359 head, diff = self._get_header(raw_diff)
360
360
361 if not head['a_file'] and head['b_file']:
361 if not head['a_file'] and head['b_file']:
362 op = 'A'
362 op = 'A'
363 elif head['a_file'] and head['b_file']:
363 elif head['a_file'] and head['b_file']:
364 op = 'M'
364 op = 'M'
365 elif head['a_file'] and not head['b_file']:
365 elif head['a_file'] and not head['b_file']:
366 op = 'D'
366 op = 'D'
367 else:
367 else:
368 #probably we're dealing with a binary file 1
368 #probably we're dealing with a binary file 1
369 binary = True
369 binary = True
370 if head['deleted_file_mode']:
370 if head['deleted_file_mode']:
371 op = 'D'
371 op = 'D'
372 stats = ['b', DEL_FILENODE]
372 stats = ['b', DEL_FILENODE]
373 binary_msg = 'deleted binary file'
373 binary_msg = 'deleted binary file'
374 elif head['new_file_mode']:
374 elif head['new_file_mode']:
375 op = 'A'
375 op = 'A'
376 stats = ['b', NEW_FILENODE]
376 stats = ['b', NEW_FILENODE]
377 binary_msg = 'new binary file %s' % head['new_file_mode']
377 binary_msg = 'new binary file %s' % head['new_file_mode']
378 else:
378 else:
379 if head['new_mode'] and head['old_mode']:
379 if head['new_mode'] and head['old_mode']:
380 stats = ['b', CHMOD_FILENODE]
380 stats = ['b', CHMOD_FILENODE]
381 op = 'M'
381 op = 'M'
382 binary_msg = ('modified binary file chmod %s => %s'
382 binary_msg = ('modified binary file chmod %s => %s'
383 % (head['old_mode'], head['new_mode']))
383 % (head['old_mode'], head['new_mode']))
384 elif (head['rename_from'] and head['rename_to']
384 elif (head['rename_from'] and head['rename_to']
385 and head['rename_from'] != head['rename_to']):
385 and head['rename_from'] != head['rename_to']):
386 stats = ['b', RENAMED_FILENODE]
386 stats = ['b', RENAMED_FILENODE]
387 op = 'M'
387 op = 'M'
388 binary_msg = ('file renamed from %s to %s'
388 binary_msg = ('file renamed from %s to %s'
389 % (head['rename_from'], head['rename_to']))
389 % (head['rename_from'], head['rename_to']))
390 else:
390 else:
391 stats = ['b', MOD_FILENODE]
391 stats = ['b', MOD_FILENODE]
392 op = 'M'
392 op = 'M'
393 binary_msg = 'modified binary file'
393 binary_msg = 'modified binary file'
394
394
395 if not binary:
395 if not binary:
396 try:
396 try:
397 chunks, stats = self._parse_lines(diff)
397 chunks, stats = self._parse_lines(diff)
398 except DiffLimitExceeded:
398 except DiffLimitExceeded:
399 diff_container = lambda _diff: LimitedDiffContainer(
399 diff_container = lambda _diff: LimitedDiffContainer(
400 self.diff_limit,
400 self.diff_limit,
401 self.cur_diff_size,
401 self.cur_diff_size,
402 _diff)
402 _diff)
403 break
403 break
404 else:
404 else:
405 chunks = []
405 chunks = []
406 chunks.append([{
406 chunks.append([{
407 'old_lineno': '',
407 'old_lineno': '',
408 'new_lineno': '',
408 'new_lineno': '',
409 'action': 'binary',
409 'action': 'binary',
410 'line': binary_msg,
410 'line': binary_msg,
411 }])
411 }])
412
412
413 _files.append({
413 _files.append({
414 'filename': head['b_path'],
414 'filename': head['b_path'],
415 'old_revision': head['a_blob_id'],
415 'old_revision': head['a_blob_id'],
416 'new_revision': head['b_blob_id'],
416 'new_revision': head['b_blob_id'],
417 'chunks': chunks,
417 'chunks': chunks,
418 'operation': op,
418 'operation': op,
419 'stats': stats,
419 'stats': stats,
420 })
420 })
421
421
422 sorter = lambda info: {'A': 0, 'M': 1, 'D': 2}.get(info['operation'])
422 sorter = lambda info: {'A': 0, 'M': 1, 'D': 2}.get(info['operation'])
423
423
424 if inline_diff is False:
424 if inline_diff is False:
425 return diff_container(sorted(_files, key=sorter))
425 return diff_container(sorted(_files, key=sorter))
426
426
427 # highlight inline changes
427 # highlight inline changes
428 for diff_data in _files:
428 for diff_data in _files:
429 for chunk in diff_data['chunks']:
429 for chunk in diff_data['chunks']:
430 lineiter = iter(chunk)
430 lineiter = iter(chunk)
431 try:
431 try:
432 while 1:
432 while 1:
433 line = lineiter.next()
433 line = lineiter.next()
434 if line['action'] not in ['unmod', 'context']:
434 if line['action'] not in ['unmod', 'context']:
435 nextline = lineiter.next()
435 nextline = lineiter.next()
436 if nextline['action'] in ['unmod', 'context'] or \
436 if nextline['action'] in ['unmod', 'context'] or \
437 nextline['action'] == line['action']:
437 nextline['action'] == line['action']:
438 continue
438 continue
439 self.differ(line, nextline)
439 self.differ(line, nextline)
440 except StopIteration:
440 except StopIteration:
441 pass
441 pass
442
442
443 return diff_container(sorted(_files, key=sorter))
443 return diff_container(sorted(_files, key=sorter))
444
444
445 def _parse_udiff(self, inline_diff=True):
445 def _parse_udiff(self, inline_diff=True):
446 raise NotImplementedError()
446 raise NotImplementedError()
447
447
448 def _parse_lines(self, diff):
448 def _parse_lines(self, diff):
449 """
449 """
450 Parse the diff an return data for the template.
450 Parse the diff an return data for the template.
451 """
451 """
452
452
453 lineiter = iter(diff)
453 lineiter = iter(diff)
454 stats = [0, 0]
454 stats = [0, 0]
455
455
456 try:
456 try:
457 chunks = []
457 chunks = []
458 line = lineiter.next()
458 line = lineiter.next()
459
459
460 while line:
460 while line:
461 lines = []
461 lines = []
462 chunks.append(lines)
462 chunks.append(lines)
463
463
464 match = self._chunk_re.match(line)
464 match = self._chunk_re.match(line)
465
465
466 if not match:
466 if not match:
467 break
467 break
468
468
469 gr = match.groups()
469 gr = match.groups()
470 (old_line, old_end,
470 (old_line, old_end,
471 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
471 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
472 old_line -= 1
472 old_line -= 1
473 new_line -= 1
473 new_line -= 1
474
474
475 context = len(gr) == 5
475 context = len(gr) == 5
476 old_end += old_line
476 old_end += old_line
477 new_end += new_line
477 new_end += new_line
478
478
479 if context:
479 if context:
480 # skip context only if it's first line
480 # skip context only if it's first line
481 if int(gr[0]) > 1:
481 if int(gr[0]) > 1:
482 lines.append({
482 lines.append({
483 'old_lineno': '...',
483 'old_lineno': '...',
484 'new_lineno': '...',
484 'new_lineno': '...',
485 'action': 'context',
485 'action': 'context',
486 'line': line,
486 'line': line,
487 })
487 })
488
488
489 line = lineiter.next()
489 line = lineiter.next()
490
490
491 while old_line < old_end or new_line < new_end:
491 while old_line < old_end or new_line < new_end:
492 command = ' '
492 command = ' '
493 if line:
493 if line:
494 command = line[0]
494 command = line[0]
495
495
496 affects_old = affects_new = False
496 affects_old = affects_new = False
497
497
498 # ignore those if we don't expect them
498 # ignore those if we don't expect them
499 if command in '#@':
499 if command in '#@':
500 continue
500 continue
501 elif command == '+':
501 elif command == '+':
502 affects_new = True
502 affects_new = True
503 action = 'add'
503 action = 'add'
504 stats[0] += 1
504 stats[0] += 1
505 elif command == '-':
505 elif command == '-':
506 affects_old = True
506 affects_old = True
507 action = 'del'
507 action = 'del'
508 stats[1] += 1
508 stats[1] += 1
509 else:
509 else:
510 affects_old = affects_new = True
510 affects_old = affects_new = True
511 action = 'unmod'
511 action = 'unmod'
512
512
513 if not self._newline_marker.match(line):
513 if not self._newline_marker.match(line):
514 old_line += affects_old
514 old_line += affects_old
515 new_line += affects_new
515 new_line += affects_new
516 lines.append({
516 lines.append({
517 'old_lineno': affects_old and old_line or '',
517 'old_lineno': affects_old and old_line or '',
518 'new_lineno': affects_new and new_line or '',
518 'new_lineno': affects_new and new_line or '',
519 'action': action,
519 'action': action,
520 'line': self._clean_line(line, command)
520 'line': self._clean_line(line, command)
521 })
521 })
522
522
523 line = lineiter.next()
523 line = lineiter.next()
524
524
525 if self._newline_marker.match(line):
525 if self._newline_marker.match(line):
526 # we need to append to lines, since this is not
526 # we need to append to lines, since this is not
527 # counted in the line specs of diff
527 # counted in the line specs of diff
528 lines.append({
528 lines.append({
529 'old_lineno': '...',
529 'old_lineno': '...',
530 'new_lineno': '...',
530 'new_lineno': '...',
531 'action': 'context',
531 'action': 'context',
532 'line': self._clean_line(line, command)
532 'line': self._clean_line(line, command)
533 })
533 })
534
534
535 except StopIteration:
535 except StopIteration:
536 pass
536 pass
537 return chunks, stats
537 return chunks, stats
538
538
539 def _safe_id(self, idstring):
539 def _safe_id(self, idstring):
540 """Make a string safe for including in an id attribute.
540 """Make a string safe for including in an id attribute.
541
541
542 The HTML spec says that id attributes 'must begin with
542 The HTML spec says that id attributes 'must begin with
543 a letter ([A-Za-z]) and may be followed by any number
543 a letter ([A-Za-z]) and may be followed by any number
544 of letters, digits ([0-9]), hyphens ("-"), underscores
544 of letters, digits ([0-9]), hyphens ("-"), underscores
545 ("_"), colons (":"), and periods (".")'. These regexps
545 ("_"), colons (":"), and periods (".")'. These regexps
546 are slightly over-zealous, in that they remove colons
546 are slightly over-zealous, in that they remove colons
547 and periods unnecessarily.
547 and periods unnecessarily.
548
548
549 Whitespace is transformed into underscores, and then
549 Whitespace is transformed into underscores, and then
550 anything which is not a hyphen or a character that
550 anything which is not a hyphen or a character that
551 matches \w (alphanumerics and underscore) is removed.
551 matches \w (alphanumerics and underscore) is removed.
552
552
553 """
553 """
554 # Transform all whitespace to underscore
554 # Transform all whitespace to underscore
555 idstring = re.sub(r'\s', "_", '%s' % idstring)
555 idstring = re.sub(r'\s', "_", '%s' % idstring)
556 # Remove everything that is not a hyphen or a member of \w
556 # Remove everything that is not a hyphen or a member of \w
557 idstring = re.sub(r'(?!-)\W', "", idstring).lower()
557 idstring = re.sub(r'(?!-)\W', "", idstring).lower()
558 return idstring
558 return idstring
559
559
560 def prepare(self, inline_diff=True):
560 def prepare(self, inline_diff=True):
561 """
561 """
562 Prepare the passed udiff for HTML rendering. It'l return a list
562 Prepare the passed udiff for HTML rendering. It'l return a list
563 of dicts with diff information
563 of dicts with diff information
564 """
564 """
565 parsed = self._parser(inline_diff=inline_diff)
565 parsed = self._parser(inline_diff=inline_diff)
566 self.parsed = True
566 self.parsed = True
567 self.parsed_diff = parsed
567 self.parsed_diff = parsed
568 return parsed
568 return parsed
569
569
570 def as_raw(self, diff_lines=None):
570 def as_raw(self, diff_lines=None):
571 """
571 """
572 Returns raw string diff
572 Returns raw string diff
573 """
573 """
574 return self._diff
574 return self._diff
575 #return u''.join(imap(self._line_counter, self._diff.splitlines(1)))
575 #return u''.join(imap(self._line_counter, self._diff.splitlines(1)))
576
576
577 def as_html(self, table_class='code-difftable', line_class='line',
577 def as_html(self, table_class='code-difftable', line_class='line',
578 old_lineno_class='lineno old', new_lineno_class='lineno new',
578 old_lineno_class='lineno old', new_lineno_class='lineno new',
579 code_class='code', enable_comments=False, parsed_lines=None):
579 code_class='code', enable_comments=False, parsed_lines=None):
580 """
580 """
581 Return given diff as html table with customized css classes
581 Return given diff as html table with customized css classes
582 """
582 """
583 def _link_to_if(condition, label, url):
583 def _link_to_if(condition, label, url):
584 """
584 """
585 Generates a link if condition is meet or just the label if not.
585 Generates a link if condition is meet or just the label if not.
586 """
586 """
587
587
588 if condition:
588 if condition:
589 return '''<a href="%(url)s">%(label)s</a>''' % {
589 return '''<a href="%(url)s">%(label)s</a>''' % {
590 'url': url,
590 'url': url,
591 'label': label
591 'label': label
592 }
592 }
593 else:
593 else:
594 return label
594 return label
595 if not self.parsed:
595 if not self.parsed:
596 self.prepare()
596 self.prepare()
597
597
598 diff_lines = self.parsed_diff
598 diff_lines = self.parsed_diff
599 if parsed_lines:
599 if parsed_lines:
600 diff_lines = parsed_lines
600 diff_lines = parsed_lines
601
601
602 _html_empty = True
602 _html_empty = True
603 _html = []
603 _html = []
604 _html.append('''<table class="%(table_class)s">\n''' % {
604 _html.append('''<table class="%(table_class)s">\n''' % {
605 'table_class': table_class
605 'table_class': table_class
606 })
606 })
607
607
608 for diff in diff_lines:
608 for diff in diff_lines:
609 for line in diff['chunks']:
609 for line in diff['chunks']:
610 _html_empty = False
610 _html_empty = False
611 for change in line:
611 for change in line:
612 _html.append('''<tr class="%(lc)s %(action)s">\n''' % {
612 _html.append('''<tr class="%(lc)s %(action)s">\n''' % {
613 'lc': line_class,
613 'lc': line_class,
614 'action': change['action']
614 'action': change['action']
615 })
615 })
616 anchor_old_id = ''
616 anchor_old_id = ''
617 anchor_new_id = ''
617 anchor_new_id = ''
618 anchor_old = "%(filename)s_o%(oldline_no)s" % {
618 anchor_old = "%(filename)s_o%(oldline_no)s" % {
619 'filename': self._safe_id(diff['filename']),
619 'filename': self._safe_id(diff['filename']),
620 'oldline_no': change['old_lineno']
620 'oldline_no': change['old_lineno']
621 }
621 }
622 anchor_new = "%(filename)s_n%(oldline_no)s" % {
622 anchor_new = "%(filename)s_n%(oldline_no)s" % {
623 'filename': self._safe_id(diff['filename']),
623 'filename': self._safe_id(diff['filename']),
624 'oldline_no': change['new_lineno']
624 'oldline_no': change['new_lineno']
625 }
625 }
626 cond_old = (change['old_lineno'] != '...' and
626 cond_old = (change['old_lineno'] != '...' and
627 change['old_lineno'])
627 change['old_lineno'])
628 cond_new = (change['new_lineno'] != '...' and
628 cond_new = (change['new_lineno'] != '...' and
629 change['new_lineno'])
629 change['new_lineno'])
630 if cond_old:
630 if cond_old:
631 anchor_old_id = 'id="%s"' % anchor_old
631 anchor_old_id = 'id="%s"' % anchor_old
632 if cond_new:
632 if cond_new:
633 anchor_new_id = 'id="%s"' % anchor_new
633 anchor_new_id = 'id="%s"' % anchor_new
634 ###########################################################
634 ###########################################################
635 # OLD LINE NUMBER
635 # OLD LINE NUMBER
636 ###########################################################
636 ###########################################################
637 _html.append('''\t<td %(a_id)s class="%(olc)s">''' % {
637 _html.append('''\t<td %(a_id)s class="%(olc)s">''' % {
638 'a_id': anchor_old_id,
638 'a_id': anchor_old_id,
639 'olc': old_lineno_class
639 'olc': old_lineno_class
640 })
640 })
641
641
642 _html.append('''%(link)s''' % {
642 _html.append('''%(link)s''' % {
643 'link': _link_to_if(True, change['old_lineno'],
643 'link': _link_to_if(True, change['old_lineno'],
644 '#%s' % anchor_old)
644 '#%s' % anchor_old)
645 })
645 })
646 _html.append('''</td>\n''')
646 _html.append('''</td>\n''')
647 ###########################################################
647 ###########################################################
648 # NEW LINE NUMBER
648 # NEW LINE NUMBER
649 ###########################################################
649 ###########################################################
650
650
651 _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
651 _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
652 'a_id': anchor_new_id,
652 'a_id': anchor_new_id,
653 'nlc': new_lineno_class
653 'nlc': new_lineno_class
654 })
654 })
655
655
656 _html.append('''%(link)s''' % {
656 _html.append('''%(link)s''' % {
657 'link': _link_to_if(True, change['new_lineno'],
657 'link': _link_to_if(True, change['new_lineno'],
658 '#%s' % anchor_new)
658 '#%s' % anchor_new)
659 })
659 })
660 _html.append('''</td>\n''')
660 _html.append('''</td>\n''')
661 ###########################################################
661 ###########################################################
662 # CODE
662 # CODE
663 ###########################################################
663 ###########################################################
664 comments = '' if enable_comments else 'no-comment'
664 comments = '' if enable_comments else 'no-comment'
665 _html.append('''\t<td class="%(cc)s %(inc)s">''' % {
665 _html.append('''\t<td class="%(cc)s %(inc)s">''' % {
666 'cc': code_class,
666 'cc': code_class,
667 'inc': comments
667 'inc': comments
668 })
668 })
669 _html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {
669 _html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {
670 'code': change['line']
670 'code': change['line']
671 })
671 })
672
672
673 _html.append('''\t</td>''')
673 _html.append('''\t</td>''')
674 _html.append('''\n</tr>\n''')
674 _html.append('''\n</tr>\n''')
675 _html.append('''</table>''')
675 _html.append('''</table>''')
676 if _html_empty:
676 if _html_empty:
677 return None
677 return None
678 return ''.join(_html)
678 return ''.join(_html)
679
679
680 def stat(self):
680 def stat(self):
681 """
681 """
682 Returns tuple of added, and removed lines for this instance
682 Returns tuple of added, and removed lines for this instance
683 """
683 """
684 return self.adds, self.removes
684 return self.adds, self.removes
685
685
686
686
687 def differ(org_repo, org_ref, other_repo, other_ref,
687 def differ(org_repo, org_ref, other_repo, other_ref,
688 context=3, ignore_whitespace=False):
688 context=3, ignore_whitespace=False):
689 """
689 """
690 General differ between branches, bookmarks, revisions of two remote or
690 General differ between branches, bookmarks, revisions of two remote or
691 local but related repositories
691 local but related repositories
692
692
693 :param org_repo:
693 :param org_repo:
694 :param org_ref:
694 :param org_ref:
695 :param other_repo:
695 :param other_repo:
696 :type other_repo:
696 :type other_repo:
697 :type other_ref:
697 :type other_ref:
698 """
698 """
699
699
700 org_repo_scm = org_repo.scm_instance
700 org_repo_scm = org_repo.scm_instance
701 other_repo_scm = other_repo.scm_instance
701 other_repo_scm = other_repo.scm_instance
702
702
703 org_repo = org_repo_scm._repo
703 org_repo = org_repo_scm._repo
704 other_repo = other_repo_scm._repo
704 other_repo = other_repo_scm._repo
705
705
706 org_ref = org_ref[1]
706 org_ref = safe_str(org_ref[1])
707 other_ref = other_ref[1]
707 other_ref = safe_str(other_ref[1])
708
708
709 if org_repo_scm == other_repo_scm:
709 if org_repo_scm == other_repo_scm:
710 log.debug('running diff between %s@%s and %s@%s'
710 log.debug('running diff between %s@%s and %s@%s'
711 % (org_repo.path, org_ref, other_repo.path, other_ref))
711 % (org_repo.path, org_ref,
712 other_repo.path, other_ref))
712 _diff = org_repo_scm.get_diff(rev1=org_ref, rev2=other_ref,
713 _diff = org_repo_scm.get_diff(rev1=org_ref, rev2=other_ref,
713 ignore_whitespace=ignore_whitespace, context=context)
714 ignore_whitespace=ignore_whitespace, context=context)
714 return _diff
715 return _diff
715
716
716 return '' # FIXME: when is it ever relevant to return nothing?
717 return '' # FIXME: when is it ever relevant to return nothing?
General Comments 0
You need to be logged in to leave comments. Login now