##// END OF EJS Templates
fixed bug with inline changes highlighter.
marcink -
r2349:0edbffa9 codereview
parent child Browse files
Show More
@@ -1,562 +1,565 b''
1 1 # -*- coding: utf-8 -*-
2 2 """
3 3 rhodecode.lib.diffs
4 4 ~~~~~~~~~~~~~~~~~~~
5 5
6 6 Set of diffing helpers, previously part of vcs
7 7
8 8
9 9 :created_on: Dec 4, 2011
10 10 :author: marcink
11 11 :copyright: (C) 2010-2012 Marcin Kuzminski <marcin@python-works.com>
12 12 :original copyright: 2007-2008 by Armin Ronacher
13 13 :license: GPLv3, see COPYING for more details.
14 14 """
15 15 # This program is free software: you can redistribute it and/or modify
16 16 # it under the terms of the GNU General Public License as published by
17 17 # the Free Software Foundation, either version 3 of the License, or
18 18 # (at your option) any later version.
19 19 #
20 20 # This program is distributed in the hope that it will be useful,
21 21 # but WITHOUT ANY WARRANTY; without even the implied warranty of
22 22 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 23 # GNU General Public License for more details.
24 24 #
25 25 # You should have received a copy of the GNU General Public License
26 26 # along with this program. If not, see <http://www.gnu.org/licenses/>.
27 27
28 28 import re
29 29 import difflib
30 30 import markupsafe
31 31 from itertools import tee, imap
32 32
33 33 from pylons.i18n.translation import _
34 34
35 35 from rhodecode.lib.vcs.exceptions import VCSError
36 36 from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode
37 37 from rhodecode.lib.helpers import escape
38 38 from rhodecode.lib.utils import EmptyChangeset
39 39
40 40
41 41 def wrap_to_table(str_):
42 42 return '''<table class="code-difftable">
43 43 <tr class="line no-comment">
44 44 <td class="lineno new"></td>
45 45 <td class="code no-comment"><pre>%s</pre></td>
46 46 </tr>
47 47 </table>''' % str_
48 48
49 49
50 50 def wrapped_diff(filenode_old, filenode_new, cut_off_limit=None,
51 51 ignore_whitespace=True, line_context=3,
52 52 enable_comments=False):
53 53 """
54 54 returns a wrapped diff into a table, checks for cut_off_limit and presents
55 55 proper message
56 56 """
57 57
58 58 if filenode_old is None:
59 59 filenode_old = FileNode(filenode_new.path, '', EmptyChangeset())
60 60
61 61 if filenode_old.is_binary or filenode_new.is_binary:
62 62 diff = wrap_to_table(_('binary file'))
63 63 stats = (0, 0)
64 64 size = 0
65 65
66 66 elif cut_off_limit != -1 and (cut_off_limit is None or
67 67 (filenode_old.size < cut_off_limit and filenode_new.size < cut_off_limit)):
68 68
69 69 f_gitdiff = get_gitdiff(filenode_old, filenode_new,
70 70 ignore_whitespace=ignore_whitespace,
71 71 context=line_context)
72 72 diff_processor = DiffProcessor(f_gitdiff, format='gitdiff')
73 73
74 74 diff = diff_processor.as_html(enable_comments=enable_comments)
75 75 stats = diff_processor.stat()
76 76 size = len(diff or '')
77 77 else:
78 78 diff = wrap_to_table(_('Changeset was too big and was cut off, use '
79 79 'diff menu to display this diff'))
80 80 stats = (0, 0)
81 81 size = 0
82 82 if not diff:
83 83 submodules = filter(lambda o: isinstance(o, SubModuleNode),
84 84 [filenode_new, filenode_old])
85 85 if submodules:
86 86 diff = wrap_to_table(escape('Submodule %r' % submodules[0]))
87 87 else:
88 88 diff = wrap_to_table(_('No changes detected'))
89 89
90 90 cs1 = filenode_old.changeset.raw_id
91 91 cs2 = filenode_new.changeset.raw_id
92 92
93 93 return size, cs1, cs2, diff, stats
94 94
95 95
96 96 def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
97 97 """
98 98 Returns git style diff between given ``filenode_old`` and ``filenode_new``.
99 99
100 100 :param ignore_whitespace: ignore whitespaces in diff
101 101 """
102 102 # make sure we pass in default context
103 103 context = context or 3
104 104 submodules = filter(lambda o: isinstance(o, SubModuleNode),
105 105 [filenode_new, filenode_old])
106 106 if submodules:
107 107 return ''
108 108
109 109 for filenode in (filenode_old, filenode_new):
110 110 if not isinstance(filenode, FileNode):
111 111 raise VCSError("Given object should be FileNode object, not %s"
112 112 % filenode.__class__)
113 113
114 114 repo = filenode_new.changeset.repository
115 115 old_raw_id = getattr(filenode_old.changeset, 'raw_id', repo.EMPTY_CHANGESET)
116 116 new_raw_id = getattr(filenode_new.changeset, 'raw_id', repo.EMPTY_CHANGESET)
117 117
118 118 vcs_gitdiff = repo.get_diff(old_raw_id, new_raw_id, filenode_new.path,
119 119 ignore_whitespace, context)
120 120 return vcs_gitdiff
121 121
122 122
123 123 class DiffProcessor(object):
124 124 """
125 125 Give it a unified diff and it returns a list of the files that were
126 126 mentioned in the diff together with a dict of meta information that
127 127 can be used to render it in a HTML template.
128 128 """
129 129 _chunk_re = re.compile(r'@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
130 130
131 131 def __init__(self, diff, differ='diff', format='udiff'):
132 132 """
133 133 :param diff: a text in diff format or generator
134 134 :param format: format of diff passed, `udiff` or `gitdiff`
135 135 """
136 136 if isinstance(diff, basestring):
137 137 diff = [diff]
138 138
139 139 self.__udiff = diff
140 140 self.__format = format
141 141 self.adds = 0
142 142 self.removes = 0
143 143
144 144 if isinstance(self.__udiff, basestring):
145 145 self.lines = iter(self.__udiff.splitlines(1))
146 146
147 147 elif self.__format == 'gitdiff':
148 148 udiff_copy = self.copy_iterator()
149 149 self.lines = imap(self.escaper, self._parse_gitdiff(udiff_copy))
150 150 else:
151 151 udiff_copy = self.copy_iterator()
152 152 self.lines = imap(self.escaper, udiff_copy)
153 153
154 154 # Select a differ.
155 155 if differ == 'difflib':
156 156 self.differ = self._highlight_line_difflib
157 157 else:
158 158 self.differ = self._highlight_line_udiff
159 159
160 160 def escaper(self, string):
161 161 return markupsafe.escape(string)
162 162
163 163 def copy_iterator(self):
164 164 """
165 165 make a fresh copy of generator, we should not iterate thru
166 166 an original as it's needed for repeating operations on
167 167 this instance of DiffProcessor
168 168 """
169 169 self.__udiff, iterator_copy = tee(self.__udiff)
170 170 return iterator_copy
171 171
172 172 def _extract_rev(self, line1, line2):
173 173 """
174 174 Extract the operation (A/M/D), filename and revision hint from a line.
175 175 """
176 176
177 177 try:
178 178 if line1.startswith('--- ') and line2.startswith('+++ '):
179 179 l1 = line1[4:].split(None, 1)
180 180 old_filename = (l1[0].replace('a/', '', 1)
181 181 if len(l1) >= 1 else None)
182 182 old_rev = l1[1] if len(l1) == 2 else 'old'
183 183
184 184 l2 = line2[4:].split(None, 1)
185 185 new_filename = (l2[0].replace('b/', '', 1)
186 186 if len(l1) >= 1 else None)
187 187 new_rev = l2[1] if len(l2) == 2 else 'new'
188 188
189 189 filename = (old_filename
190 190 if old_filename != '/dev/null' else new_filename)
191 191
192 192 operation = 'D' if new_filename == '/dev/null' else None
193 193 if not operation:
194 194 operation = 'M' if old_filename != '/dev/null' else 'A'
195 195
196 196 return operation, filename, new_rev, old_rev
197 197 except (ValueError, IndexError):
198 198 pass
199 199
200 200 return None, None, None, None
201 201
202 202 def _parse_gitdiff(self, diffiterator):
203 203 def line_decoder(l):
204 204 if l.startswith('+') and not l.startswith('+++'):
205 205 self.adds += 1
206 206 elif l.startswith('-') and not l.startswith('---'):
207 207 self.removes += 1
208 208 return l.decode('utf8', 'replace')
209 209
210 210 output = list(diffiterator)
211 211 size = len(output)
212 212
213 213 if size == 2:
214 214 l = []
215 215 l.extend([output[0]])
216 216 l.extend(output[1].splitlines(1))
217 217 return map(line_decoder, l)
218 218 elif size == 1:
219 219 return map(line_decoder, output[0].splitlines(1))
220 220 elif size == 0:
221 221 return []
222 222
223 223 raise Exception('wrong size of diff %s' % size)
224 224
225 225 def _highlight_line_difflib(self, line, next_):
226 226 """
227 227 Highlight inline changes in both lines.
228 228 """
229 229
230 230 if line['action'] == 'del':
231 231 old, new = line, next_
232 232 else:
233 233 old, new = next_, line
234 234
235 235 oldwords = re.split(r'(\W)', old['line'])
236 236 newwords = re.split(r'(\W)', new['line'])
237 237
238 238 sequence = difflib.SequenceMatcher(None, oldwords, newwords)
239 239
240 240 oldfragments, newfragments = [], []
241 241 for tag, i1, i2, j1, j2 in sequence.get_opcodes():
242 242 oldfrag = ''.join(oldwords[i1:i2])
243 243 newfrag = ''.join(newwords[j1:j2])
244 244 if tag != 'equal':
245 245 if oldfrag:
246 246 oldfrag = '<del>%s</del>' % oldfrag
247 247 if newfrag:
248 248 newfrag = '<ins>%s</ins>' % newfrag
249 249 oldfragments.append(oldfrag)
250 250 newfragments.append(newfrag)
251 251
252 252 old['line'] = "".join(oldfragments)
253 253 new['line'] = "".join(newfragments)
254 254
255 255 def _highlight_line_udiff(self, line, next_):
256 256 """
257 257 Highlight inline changes in both lines.
258 258 """
259 259 start = 0
260 260 limit = min(len(line['line']), len(next_['line']))
261 261 while start < limit and line['line'][start] == next_['line'][start]:
262 262 start += 1
263 263 end = -1
264 264 limit -= start
265 265 while -end <= limit and line['line'][end] == next_['line'][end]:
266 266 end -= 1
267 267 end += 1
268 268 if start or end:
269 269 def do(l):
270 270 last = end + len(l['line'])
271 271 if l['action'] == 'add':
272 272 tag = 'ins'
273 273 else:
274 274 tag = 'del'
275 275 l['line'] = '%s<%s>%s</%s>%s' % (
276 276 l['line'][:start],
277 277 tag,
278 278 l['line'][start:last],
279 279 tag,
280 280 l['line'][last:]
281 281 )
282
282 283 do(line)
283 284 do(next_)
284 285
285 286 def _parse_udiff(self):
286 287 """
287 288 Parse the diff an return data for the template.
288 289 """
289 290 lineiter = self.lines
290 291 files = []
291 292 try:
292 293 line = lineiter.next()
293 294 # skip first context
294 295 skipfirst = True
295 296
296 297 while 1:
297 298 # continue until we found the old file
298 299 if not line.startswith('--- '):
299 300 line = lineiter.next()
300 301 continue
301 302
302 303 chunks = []
303 304 stats = [0, 0]
304 305 operation, filename, old_rev, new_rev = \
305 306 self._extract_rev(line, lineiter.next())
306 307 files.append({
307 308 'filename': filename,
308 309 'old_revision': old_rev,
309 310 'new_revision': new_rev,
310 311 'chunks': chunks,
311 312 'operation': operation,
312 313 'stats': stats,
313 314 })
314 315
315 316 line = lineiter.next()
316 317 while line:
317 318
318 319 match = self._chunk_re.match(line)
319 320 if not match:
320 321 break
321 322
322 323 lines = []
323 324 chunks.append(lines)
324 325
325 326 old_line, old_end, new_line, new_end = \
326 327 [int(x or 1) for x in match.groups()[:-1]]
327 328 old_line -= 1
328 329 new_line -= 1
329 330 context = len(match.groups()) == 5
330 331 old_end += old_line
331 332 new_end += new_line
332 333
333 334 if context:
334 335 if not skipfirst:
335 336 lines.append({
336 337 'old_lineno': '...',
337 338 'new_lineno': '...',
338 339 'action': 'context',
339 340 'line': line,
340 341 })
341 342 else:
342 343 skipfirst = False
343 344
344 345 line = lineiter.next()
345 346 while old_line < old_end or new_line < new_end:
346 347 if line:
347 348 command, line = line[0], line[1:]
348 349 else:
349 350 command = ' '
350 351 affects_old = affects_new = False
351 352
352 353 # ignore those if we don't expect them
353 354 if command in '#@':
354 355 continue
355 356 elif command == '+':
356 357 affects_new = True
357 358 action = 'add'
358 359 stats[0] += 1
359 360 elif command == '-':
360 361 affects_old = True
361 362 action = 'del'
362 363 stats[1] += 1
363 364 else:
364 365 affects_old = affects_new = True
365 366 action = 'unmod'
366 367
367 368 old_line += affects_old
368 369 new_line += affects_new
369 370 lines.append({
370 371 'old_lineno': affects_old and old_line or '',
371 372 'new_lineno': affects_new and new_line or '',
372 373 'action': action,
373 374 'line': line
374 375 })
375 376 line = lineiter.next()
376 377 except StopIteration:
377 378 pass
378 379
379 380 # highlight inline changes
380 for _ in files:
381 for chunk in chunks:
381 for diff_data in files:
382 for chunk in diff_data['chunks']:
382 383 lineiter = iter(chunk)
383 384 try:
384 385 while 1:
385 386 line = lineiter.next()
386 387 if line['action'] != 'unmod':
387 388 nextline = lineiter.next()
388 389 if nextline['action'] == 'unmod' or \
389 390 nextline['action'] == line['action']:
390 391 continue
391 392 self.differ(line, nextline)
392 393 except StopIteration:
393 394 pass
394 395 return files
395 396
396 397 def prepare(self):
397 398 """
398 399 Prepare the passed udiff for HTML rendering. It'l return a list
399 400 of dicts
400 401 """
401 402 return self._parse_udiff()
402 403
403 404 def _safe_id(self, idstring):
404 405 """Make a string safe for including in an id attribute.
405 406
406 407 The HTML spec says that id attributes 'must begin with
407 408 a letter ([A-Za-z]) and may be followed by any number
408 409 of letters, digits ([0-9]), hyphens ("-"), underscores
409 410 ("_"), colons (":"), and periods (".")'. These regexps
410 411 are slightly over-zealous, in that they remove colons
411 412 and periods unnecessarily.
412 413
413 414 Whitespace is transformed into underscores, and then
414 415 anything which is not a hyphen or a character that
415 416 matches \w (alphanumerics and underscore) is removed.
416 417
417 418 """
418 419 # Transform all whitespace to underscore
419 420 idstring = re.sub(r'\s', "_", '%s' % idstring)
420 421 # Remove everything that is not a hyphen or a member of \w
421 422 idstring = re.sub(r'(?!-)\W', "", idstring).lower()
422 423 return idstring
423 424
424 425 def raw_diff(self):
425 426 """
426 427 Returns raw string as udiff
427 428 """
428 429 udiff_copy = self.copy_iterator()
429 430 if self.__format == 'gitdiff':
430 431 udiff_copy = self._parse_gitdiff(udiff_copy)
431 432 return u''.join(udiff_copy)
432 433
433 434 def as_html(self, table_class='code-difftable', line_class='line',
434 435 new_lineno_class='lineno old', old_lineno_class='lineno new',
435 436 code_class='code', enable_comments=False, diff_lines=None):
436 437 """
437 Return udiff as html table with customized css classes
438 Return given diff as html table with customized css classes
438 439 """
439 440 def _link_to_if(condition, label, url):
440 441 """
441 442 Generates a link if condition is meet or just the label if not.
442 443 """
443 444
444 445 if condition:
445 446 return '''<a href="%(url)s">%(label)s</a>''' % {
446 447 'url': url,
447 448 'label': label
448 449 }
449 450 else:
450 451 return label
451 452 if diff_lines is None:
452 453 diff_lines = self.prepare()
453 454 _html_empty = True
454 455 _html = []
455 456 _html.append('''<table class="%(table_class)s">\n''' % {
456 457 'table_class': table_class
457 458 })
458 459 for diff in diff_lines:
459 460 for line in diff['chunks']:
460 461 _html_empty = False
461 462 for change in line:
462 463 _html.append('''<tr class="%(lc)s %(action)s">\n''' % {
463 464 'lc': line_class,
464 465 'action': change['action']
465 466 })
466 467 anchor_old_id = ''
467 468 anchor_new_id = ''
468 469 anchor_old = "%(filename)s_o%(oldline_no)s" % {
469 470 'filename': self._safe_id(diff['filename']),
470 471 'oldline_no': change['old_lineno']
471 472 }
472 473 anchor_new = "%(filename)s_n%(oldline_no)s" % {
473 474 'filename': self._safe_id(diff['filename']),
474 475 'oldline_no': change['new_lineno']
475 476 }
476 477 cond_old = (change['old_lineno'] != '...' and
477 478 change['old_lineno'])
478 479 cond_new = (change['new_lineno'] != '...' and
479 480 change['new_lineno'])
480 481 if cond_old:
481 482 anchor_old_id = 'id="%s"' % anchor_old
482 483 if cond_new:
483 484 anchor_new_id = 'id="%s"' % anchor_new
484 485 ###########################################################
485 486 # OLD LINE NUMBER
486 487 ###########################################################
487 488 _html.append('''\t<td %(a_id)s class="%(olc)s">''' % {
488 489 'a_id': anchor_old_id,
489 490 'olc': old_lineno_class
490 491 })
491 492
492 493 _html.append('''%(link)s''' % {
493 494 'link': _link_to_if(True, change['old_lineno'],
494 495 '#%s' % anchor_old)
495 496 })
496 497 _html.append('''</td>\n''')
497 498 ###########################################################
498 499 # NEW LINE NUMBER
499 500 ###########################################################
500 501
501 502 _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
502 503 'a_id': anchor_new_id,
503 504 'nlc': new_lineno_class
504 505 })
505 506
506 507 _html.append('''%(link)s''' % {
507 508 'link': _link_to_if(True, change['new_lineno'],
508 509 '#%s' % anchor_new)
509 510 })
510 511 _html.append('''</td>\n''')
511 512 ###########################################################
512 513 # CODE
513 514 ###########################################################
514 515 comments = '' if enable_comments else 'no-comment'
515 516 _html.append('''\t<td class="%(cc)s %(inc)s">''' % {
516 517 'cc': code_class,
517 518 'inc': comments
518 519 })
519 520 _html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {
520 521 'code': change['line']
521 522 })
522 523 _html.append('''\t</td>''')
523 524 _html.append('''\n</tr>\n''')
524 525 _html.append('''</table>''')
525 526 if _html_empty:
526 527 return None
527 528 return ''.join(_html)
528 529
529 530 def stat(self):
530 531 """
531 532 Returns tuple of added, and removed lines for this instance
532 533 """
533 534 return self.adds, self.removes
534 535
535 536
536 537 def differ(org_repo, org_ref, other_repo, other_ref):
537 538 """
539 General differ between branches, bookmarks or separate but releated
540 repositories
538 541
539 542 :param org_repo:
540 543 :type org_repo:
541 544 :param org_ref:
542 545 :type org_ref:
543 546 :param other_repo:
544 547 :type other_repo:
545 548 :param other_ref:
546 549 :type other_ref:
547 550 """
548 551 ignore_whitespace = False
549 552 context = 3
550 553 from mercurial import patch
551 554 from mercurial.mdiff import diffopts
552 555
553 556 org_repo = org_repo.scm_instance._repo
554 557 other_repo = other_repo.scm_instance._repo
555 558
556 559 org_ref = org_ref[1]
557 560 other_ref = other_ref[1]
558 561
559 562 opts = diffopts(git=True, ignorews=ignore_whitespace, context=context)
560 563
561 564 return ''.join(patch.diff(org_repo, node1=org_ref, node2=other_ref,
562 565 opts=opts))
General Comments 0
You need to be logged in to leave comments. Login now