##// END OF EJS Templates
Alwas show initial context on html diffs.
marcink -
r2359:a264d898 beta
parent child Browse files
Show More
@@ -1,524 +1,523 b''
1 1 # -*- coding: utf-8 -*-
2 2 """
3 3 rhodecode.lib.diffs
4 4 ~~~~~~~~~~~~~~~~~~~
5 5
6 6 Set of diffing helpers, previously part of vcs
7 7
8 8
9 9 :created_on: Dec 4, 2011
10 10 :author: marcink
11 11 :copyright: (C) 2010-2012 Marcin Kuzminski <marcin@python-works.com>
12 12 :original copyright: 2007-2008 by Armin Ronacher
13 13 :license: GPLv3, see COPYING for more details.
14 14 """
15 15 # This program is free software: you can redistribute it and/or modify
16 16 # it under the terms of the GNU General Public License as published by
17 17 # the Free Software Foundation, either version 3 of the License, or
18 18 # (at your option) any later version.
19 19 #
20 20 # This program is distributed in the hope that it will be useful,
21 21 # but WITHOUT ANY WARRANTY; without even the implied warranty of
22 22 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 23 # GNU General Public License for more details.
24 24 #
25 25 # You should have received a copy of the GNU General Public License
26 26 # along with this program. If not, see <http://www.gnu.org/licenses/>.
27 27
28 28 import re
29 29 import difflib
30 30 import markupsafe
31 31 from itertools import tee, imap
32 32
33 33 from pylons.i18n.translation import _
34 34
35 35 from rhodecode.lib.vcs.exceptions import VCSError
36 36 from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode
37 37 from rhodecode.lib.helpers import escape
38 38 from rhodecode.lib.utils import EmptyChangeset
39 39
40 40
41 41 def wrap_to_table(str_):
42 42 return '''<table class="code-difftable">
43 43 <tr class="line no-comment">
44 44 <td class="lineno new"></td>
45 45 <td class="code no-comment"><pre>%s</pre></td>
46 46 </tr>
47 47 </table>''' % str_
48 48
49 49
50 50 def wrapped_diff(filenode_old, filenode_new, cut_off_limit=None,
51 51 ignore_whitespace=True, line_context=3,
52 52 enable_comments=False):
53 53 """
54 54 returns a wrapped diff into a table, checks for cut_off_limit and presents
55 55 proper message
56 56 """
57 57
58 58 if filenode_old is None:
59 59 filenode_old = FileNode(filenode_new.path, '', EmptyChangeset())
60 60
61 61 if filenode_old.is_binary or filenode_new.is_binary:
62 62 diff = wrap_to_table(_('binary file'))
63 63 stats = (0, 0)
64 64 size = 0
65 65
66 66 elif cut_off_limit != -1 and (cut_off_limit is None or
67 67 (filenode_old.size < cut_off_limit and filenode_new.size < cut_off_limit)):
68 68
69 69 f_gitdiff = get_gitdiff(filenode_old, filenode_new,
70 70 ignore_whitespace=ignore_whitespace,
71 71 context=line_context)
72 72 diff_processor = DiffProcessor(f_gitdiff, format='gitdiff')
73 73
74 74 diff = diff_processor.as_html(enable_comments=enable_comments)
75 75 stats = diff_processor.stat()
76 76 size = len(diff or '')
77 77 else:
78 78 diff = wrap_to_table(_('Changeset was too big and was cut off, use '
79 79 'diff menu to display this diff'))
80 80 stats = (0, 0)
81 81 size = 0
82 82 if not diff:
83 83 submodules = filter(lambda o: isinstance(o, SubModuleNode),
84 84 [filenode_new, filenode_old])
85 85 if submodules:
86 86 diff = wrap_to_table(escape('Submodule %r' % submodules[0]))
87 87 else:
88 88 diff = wrap_to_table(_('No changes detected'))
89 89
90 90 cs1 = filenode_old.changeset.raw_id
91 91 cs2 = filenode_new.changeset.raw_id
92 92
93 93 return size, cs1, cs2, diff, stats
94 94
95 95
96 96 def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
97 97 """
98 98 Returns git style diff between given ``filenode_old`` and ``filenode_new``.
99 99
100 100 :param ignore_whitespace: ignore whitespaces in diff
101 101 """
102 102 # make sure we pass in default context
103 103 context = context or 3
104 104 submodules = filter(lambda o: isinstance(o, SubModuleNode),
105 105 [filenode_new, filenode_old])
106 106 if submodules:
107 107 return ''
108 108
109 109 for filenode in (filenode_old, filenode_new):
110 110 if not isinstance(filenode, FileNode):
111 111 raise VCSError("Given object should be FileNode object, not %s"
112 112 % filenode.__class__)
113 113
114 114 repo = filenode_new.changeset.repository
115 115 old_raw_id = getattr(filenode_old.changeset, 'raw_id', repo.EMPTY_CHANGESET)
116 116 new_raw_id = getattr(filenode_new.changeset, 'raw_id', repo.EMPTY_CHANGESET)
117 117
118 118 vcs_gitdiff = repo.get_diff(old_raw_id, new_raw_id, filenode_new.path,
119 119 ignore_whitespace, context)
120 120 return vcs_gitdiff
121 121
122 122
123 123 class DiffProcessor(object):
124 124 """
125 125 Give it a unified diff and it returns a list of the files that were
126 126 mentioned in the diff together with a dict of meta information that
127 127 can be used to render it in a HTML template.
128 128 """
129 129 _chunk_re = re.compile(r'@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
130 130
131 131 def __init__(self, diff, differ='diff', format='udiff'):
132 132 """
133 133 :param diff: a text in diff format or generator
134 134 :param format: format of diff passed, `udiff` or `gitdiff`
135 135 """
136 136 if isinstance(diff, basestring):
137 137 diff = [diff]
138 138
139 139 self.__udiff = diff
140 140 self.__format = format
141 141 self.adds = 0
142 142 self.removes = 0
143 143
144 144 if isinstance(self.__udiff, basestring):
145 145 self.lines = iter(self.__udiff.splitlines(1))
146 146
147 147 elif self.__format == 'gitdiff':
148 148 udiff_copy = self.copy_iterator()
149 149 self.lines = imap(self.escaper, self._parse_gitdiff(udiff_copy))
150 150 else:
151 151 udiff_copy = self.copy_iterator()
152 152 self.lines = imap(self.escaper, udiff_copy)
153 153
154 154 # Select a differ.
155 155 if differ == 'difflib':
156 156 self.differ = self._highlight_line_difflib
157 157 else:
158 158 self.differ = self._highlight_line_udiff
159 159
160 160 def escaper(self, string):
161 161 return markupsafe.escape(string)
162 162
163 163 def copy_iterator(self):
164 164 """
165 165 make a fresh copy of generator, we should not iterate thru
166 166 an original as it's needed for repeating operations on
167 167 this instance of DiffProcessor
168 168 """
169 169 self.__udiff, iterator_copy = tee(self.__udiff)
170 170 return iterator_copy
171 171
172 172 def _extract_rev(self, line1, line2):
173 173 """
174 174 Extract the filename and revision hint from a line.
175 175 """
176 176
177 177 try:
178 178 if line1.startswith('--- ') and line2.startswith('+++ '):
179 179 l1 = line1[4:].split(None, 1)
180 180 old_filename = (l1[0].replace('a/', '', 1)
181 181 if len(l1) >= 1 else None)
182 182 old_rev = l1[1] if len(l1) == 2 else 'old'
183 183
184 184 l2 = line2[4:].split(None, 1)
185 185 new_filename = (l2[0].replace('b/', '', 1)
186 186 if len(l1) >= 1 else None)
187 187 new_rev = l2[1] if len(l2) == 2 else 'new'
188 188
189 189 filename = (old_filename
190 190 if old_filename != '/dev/null' else new_filename)
191 191
192 192 return filename, new_rev, old_rev
193 193 except (ValueError, IndexError):
194 194 pass
195 195
196 196 return None, None, None
197 197
198 198 def _parse_gitdiff(self, diffiterator):
199 199 def line_decoder(l):
200 200 if l.startswith('+') and not l.startswith('+++'):
201 201 self.adds += 1
202 202 elif l.startswith('-') and not l.startswith('---'):
203 203 self.removes += 1
204 204 return l.decode('utf8', 'replace')
205 205
206 206 output = list(diffiterator)
207 207 size = len(output)
208 208
209 209 if size == 2:
210 210 l = []
211 211 l.extend([output[0]])
212 212 l.extend(output[1].splitlines(1))
213 213 return map(line_decoder, l)
214 214 elif size == 1:
215 215 return map(line_decoder, output[0].splitlines(1))
216 216 elif size == 0:
217 217 return []
218 218
219 219 raise Exception('wrong size of diff %s' % size)
220 220
221 221 def _highlight_line_difflib(self, line, next_):
222 222 """
223 223 Highlight inline changes in both lines.
224 224 """
225 225
226 226 if line['action'] == 'del':
227 227 old, new = line, next_
228 228 else:
229 229 old, new = next_, line
230 230
231 231 oldwords = re.split(r'(\W)', old['line'])
232 232 newwords = re.split(r'(\W)', new['line'])
233 233
234 234 sequence = difflib.SequenceMatcher(None, oldwords, newwords)
235 235
236 236 oldfragments, newfragments = [], []
237 237 for tag, i1, i2, j1, j2 in sequence.get_opcodes():
238 238 oldfrag = ''.join(oldwords[i1:i2])
239 239 newfrag = ''.join(newwords[j1:j2])
240 240 if tag != 'equal':
241 241 if oldfrag:
242 242 oldfrag = '<del>%s</del>' % oldfrag
243 243 if newfrag:
244 244 newfrag = '<ins>%s</ins>' % newfrag
245 245 oldfragments.append(oldfrag)
246 246 newfragments.append(newfrag)
247 247
248 248 old['line'] = "".join(oldfragments)
249 249 new['line'] = "".join(newfragments)
250 250
251 251 def _highlight_line_udiff(self, line, next_):
252 252 """
253 253 Highlight inline changes in both lines.
254 254 """
255 255 start = 0
256 256 limit = min(len(line['line']), len(next_['line']))
257 257 while start < limit and line['line'][start] == next_['line'][start]:
258 258 start += 1
259 259 end = -1
260 260 limit -= start
261 261 while -end <= limit and line['line'][end] == next_['line'][end]:
262 262 end -= 1
263 263 end += 1
264 264 if start or end:
265 265 def do(l):
266 266 last = end + len(l['line'])
267 267 if l['action'] == 'add':
268 268 tag = 'ins'
269 269 else:
270 270 tag = 'del'
271 271 l['line'] = '%s<%s>%s</%s>%s' % (
272 272 l['line'][:start],
273 273 tag,
274 274 l['line'][start:last],
275 275 tag,
276 276 l['line'][last:]
277 277 )
278 278 do(line)
279 279 do(next_)
280 280
281 281 def _parse_udiff(self):
282 282 """
283 283 Parse the diff an return data for the template.
284 284 """
285 285 lineiter = self.lines
286 286 files = []
287 287 try:
288 288 line = lineiter.next()
289 # skip first context
290 skipfirst = True
291 289 while 1:
292 290 # continue until we found the old file
293 291 if not line.startswith('--- '):
294 292 line = lineiter.next()
295 293 continue
296 294
297 295 chunks = []
298 296 filename, old_rev, new_rev = \
299 297 self._extract_rev(line, lineiter.next())
300 298 files.append({
301 299 'filename': filename,
302 300 'old_revision': old_rev,
303 301 'new_revision': new_rev,
304 302 'chunks': chunks
305 303 })
306 304
307 305 line = lineiter.next()
308 306 while line:
309 307 match = self._chunk_re.match(line)
310 308 if not match:
311 309 break
312 310
313 311 lines = []
314 312 chunks.append(lines)
315 313
316 314 old_line, old_end, new_line, new_end = \
317 315 [int(x or 1) for x in match.groups()[:-1]]
318 316 old_line -= 1
319 317 new_line -= 1
320 context = len(match.groups()) == 5
318 gr = match.groups()
319 context = len(gr) == 5
321 320 old_end += old_line
322 321 new_end += new_line
323 322
324 323 if context:
325 if not skipfirst:
324 # skip context only if it's first line
325 if int(gr[0]) > 1:
326 326 lines.append({
327 327 'old_lineno': '...',
328 328 'new_lineno': '...',
329 329 'action': 'context',
330 330 'line': line,
331 331 })
332 else:
333 skipfirst = False
334 332
335 333 line = lineiter.next()
334
336 335 while old_line < old_end or new_line < new_end:
337 336 if line:
338 337 command, line = line[0], line[1:]
339 338 else:
340 339 command = ' '
341 340 affects_old = affects_new = False
342 341
343 342 # ignore those if we don't expect them
344 343 if command in '#@':
345 344 continue
346 345 elif command == '+':
347 346 affects_new = True
348 347 action = 'add'
349 348 elif command == '-':
350 349 affects_old = True
351 350 action = 'del'
352 351 else:
353 352 affects_old = affects_new = True
354 353 action = 'unmod'
355 354
356 355 old_line += affects_old
357 356 new_line += affects_new
358 357 lines.append({
359 358 'old_lineno': affects_old and old_line or '',
360 359 'new_lineno': affects_new and new_line or '',
361 360 'action': action,
362 361 'line': line
363 362 })
364 363 line = lineiter.next()
365 364
366 365 except StopIteration:
367 366 pass
368 367
369 368 # highlight inline changes
370 369 for _ in files:
371 370 for chunk in chunks:
372 371 lineiter = iter(chunk)
373 372 #first = True
374 373 try:
375 374 while 1:
376 375 line = lineiter.next()
377 376 if line['action'] != 'unmod':
378 377 nextline = lineiter.next()
379 378 if nextline['action'] == 'unmod' or \
380 379 nextline['action'] == line['action']:
381 380 continue
382 381 self.differ(line, nextline)
383 382 except StopIteration:
384 383 pass
385 384
386 385 return files
387 386
388 387 def prepare(self):
389 388 """
390 389 Prepare the passed udiff for HTML rendering. It'l return a list
391 390 of dicts
392 391 """
393 392 return self._parse_udiff()
394 393
395 394 def _safe_id(self, idstring):
396 395 """Make a string safe for including in an id attribute.
397 396
398 397 The HTML spec says that id attributes 'must begin with
399 398 a letter ([A-Za-z]) and may be followed by any number
400 399 of letters, digits ([0-9]), hyphens ("-"), underscores
401 400 ("_"), colons (":"), and periods (".")'. These regexps
402 401 are slightly over-zealous, in that they remove colons
403 402 and periods unnecessarily.
404 403
405 404 Whitespace is transformed into underscores, and then
406 405 anything which is not a hyphen or a character that
407 406 matches \w (alphanumerics and underscore) is removed.
408 407
409 408 """
410 409 # Transform all whitespace to underscore
411 410 idstring = re.sub(r'\s', "_", '%s' % idstring)
412 411 # Remove everything that is not a hyphen or a member of \w
413 412 idstring = re.sub(r'(?!-)\W', "", idstring).lower()
414 413 return idstring
415 414
416 415 def raw_diff(self):
417 416 """
418 417 Returns raw string as udiff
419 418 """
420 419 udiff_copy = self.copy_iterator()
421 420 if self.__format == 'gitdiff':
422 421 udiff_copy = self._parse_gitdiff(udiff_copy)
423 422 return u''.join(udiff_copy)
424 423
425 424 def as_html(self, table_class='code-difftable', line_class='line',
426 425 new_lineno_class='lineno old', old_lineno_class='lineno new',
427 426 code_class='code', enable_comments=False):
428 427 """
429 428 Return udiff as html table with customized css classes
430 429 """
431 430 def _link_to_if(condition, label, url):
432 431 """
433 432 Generates a link if condition is meet or just the label if not.
434 433 """
435 434
436 435 if condition:
437 436 return '''<a href="%(url)s">%(label)s</a>''' % {
438 437 'url': url,
439 438 'label': label
440 439 }
441 440 else:
442 441 return label
443 442 diff_lines = self.prepare()
444 443 _html_empty = True
445 444 _html = []
446 445 _html.append('''<table class="%(table_class)s">\n''' % {
447 446 'table_class': table_class
448 447 })
449 448 for diff in diff_lines:
450 449 for line in diff['chunks']:
451 450 _html_empty = False
452 451 for change in line:
453 452 _html.append('''<tr class="%(lc)s %(action)s">\n''' % {
454 453 'lc': line_class,
455 454 'action': change['action']
456 455 })
457 456 anchor_old_id = ''
458 457 anchor_new_id = ''
459 458 anchor_old = "%(filename)s_o%(oldline_no)s" % {
460 459 'filename': self._safe_id(diff['filename']),
461 460 'oldline_no': change['old_lineno']
462 461 }
463 462 anchor_new = "%(filename)s_n%(oldline_no)s" % {
464 463 'filename': self._safe_id(diff['filename']),
465 464 'oldline_no': change['new_lineno']
466 465 }
467 466 cond_old = (change['old_lineno'] != '...' and
468 467 change['old_lineno'])
469 468 cond_new = (change['new_lineno'] != '...' and
470 469 change['new_lineno'])
471 470 if cond_old:
472 471 anchor_old_id = 'id="%s"' % anchor_old
473 472 if cond_new:
474 473 anchor_new_id = 'id="%s"' % anchor_new
475 474 ###########################################################
476 475 # OLD LINE NUMBER
477 476 ###########################################################
478 477 _html.append('''\t<td %(a_id)s class="%(olc)s">''' % {
479 478 'a_id': anchor_old_id,
480 479 'olc': old_lineno_class
481 480 })
482 481
483 482 _html.append('''%(link)s''' % {
484 483 'link': _link_to_if(True, change['old_lineno'],
485 484 '#%s' % anchor_old)
486 485 })
487 486 _html.append('''</td>\n''')
488 487 ###########################################################
489 488 # NEW LINE NUMBER
490 489 ###########################################################
491 490
492 491 _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
493 492 'a_id': anchor_new_id,
494 493 'nlc': new_lineno_class
495 494 })
496 495
497 496 _html.append('''%(link)s''' % {
498 497 'link': _link_to_if(True, change['new_lineno'],
499 498 '#%s' % anchor_new)
500 499 })
501 500 _html.append('''</td>\n''')
502 501 ###########################################################
503 502 # CODE
504 503 ###########################################################
505 504 comments = '' if enable_comments else 'no-comment'
506 505 _html.append('''\t<td class="%(cc)s %(inc)s">''' % {
507 506 'cc': code_class,
508 507 'inc': comments
509 508 })
510 509 _html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {
511 510 'code': change['line']
512 511 })
513 512 _html.append('''\t</td>''')
514 513 _html.append('''\n</tr>\n''')
515 514 _html.append('''</table>''')
516 515 if _html_empty:
517 516 return None
518 517 return ''.join(_html)
519 518
520 519 def stat(self):
521 520 """
522 521 Returns tuple of added, and removed lines for this instance
523 522 """
524 523 return self.adds, self.removes
General Comments 0
You need to be logged in to leave comments. Login now