##// END OF EJS Templates
added stats of line changes and operation (A/M/D) into diffs lib
marcink -
r2347:58bcaf1b codereview
parent child Browse files
Show More
@@ -1,553 +1,562 b''
1 1 # -*- coding: utf-8 -*-
2 2 """
3 3 rhodecode.lib.diffs
4 4 ~~~~~~~~~~~~~~~~~~~
5 5
6 6 Set of diffing helpers, previously part of vcs
7 7
8 8
9 9 :created_on: Dec 4, 2011
10 10 :author: marcink
11 11 :copyright: (C) 2010-2012 Marcin Kuzminski <marcin@python-works.com>
12 12 :original copyright: 2007-2008 by Armin Ronacher
13 13 :license: GPLv3, see COPYING for more details.
14 14 """
15 15 # This program is free software: you can redistribute it and/or modify
16 16 # it under the terms of the GNU General Public License as published by
17 17 # the Free Software Foundation, either version 3 of the License, or
18 18 # (at your option) any later version.
19 19 #
20 20 # This program is distributed in the hope that it will be useful,
21 21 # but WITHOUT ANY WARRANTY; without even the implied warranty of
22 22 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 23 # GNU General Public License for more details.
24 24 #
25 25 # You should have received a copy of the GNU General Public License
26 26 # along with this program. If not, see <http://www.gnu.org/licenses/>.
27 27
28 28 import re
29 29 import difflib
30 30 import markupsafe
31 31 from itertools import tee, imap
32 32
33 33 from pylons.i18n.translation import _
34 34
35 35 from rhodecode.lib.vcs.exceptions import VCSError
36 36 from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode
37 37 from rhodecode.lib.helpers import escape
38 38 from rhodecode.lib.utils import EmptyChangeset
39 39
40 40
41 41 def wrap_to_table(str_):
42 42 return '''<table class="code-difftable">
43 43 <tr class="line no-comment">
44 44 <td class="lineno new"></td>
45 45 <td class="code no-comment"><pre>%s</pre></td>
46 46 </tr>
47 47 </table>''' % str_
48 48
49 49
50 50 def wrapped_diff(filenode_old, filenode_new, cut_off_limit=None,
51 51 ignore_whitespace=True, line_context=3,
52 52 enable_comments=False):
53 53 """
54 54 returns a wrapped diff into a table, checks for cut_off_limit and presents
55 55 proper message
56 56 """
57 57
58 58 if filenode_old is None:
59 59 filenode_old = FileNode(filenode_new.path, '', EmptyChangeset())
60 60
61 61 if filenode_old.is_binary or filenode_new.is_binary:
62 62 diff = wrap_to_table(_('binary file'))
63 63 stats = (0, 0)
64 64 size = 0
65 65
66 66 elif cut_off_limit != -1 and (cut_off_limit is None or
67 67 (filenode_old.size < cut_off_limit and filenode_new.size < cut_off_limit)):
68 68
69 69 f_gitdiff = get_gitdiff(filenode_old, filenode_new,
70 70 ignore_whitespace=ignore_whitespace,
71 71 context=line_context)
72 72 diff_processor = DiffProcessor(f_gitdiff, format='gitdiff')
73 73
74 74 diff = diff_processor.as_html(enable_comments=enable_comments)
75 75 stats = diff_processor.stat()
76 76 size = len(diff or '')
77 77 else:
78 78 diff = wrap_to_table(_('Changeset was too big and was cut off, use '
79 79 'diff menu to display this diff'))
80 80 stats = (0, 0)
81 81 size = 0
82 82 if not diff:
83 83 submodules = filter(lambda o: isinstance(o, SubModuleNode),
84 84 [filenode_new, filenode_old])
85 85 if submodules:
86 86 diff = wrap_to_table(escape('Submodule %r' % submodules[0]))
87 87 else:
88 88 diff = wrap_to_table(_('No changes detected'))
89 89
90 90 cs1 = filenode_old.changeset.raw_id
91 91 cs2 = filenode_new.changeset.raw_id
92 92
93 93 return size, cs1, cs2, diff, stats
94 94
95 95
96 96 def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
97 97 """
98 98 Returns git style diff between given ``filenode_old`` and ``filenode_new``.
99 99
100 100 :param ignore_whitespace: ignore whitespaces in diff
101 101 """
102 102 # make sure we pass in default context
103 103 context = context or 3
104 104 submodules = filter(lambda o: isinstance(o, SubModuleNode),
105 105 [filenode_new, filenode_old])
106 106 if submodules:
107 107 return ''
108 108
109 109 for filenode in (filenode_old, filenode_new):
110 110 if not isinstance(filenode, FileNode):
111 111 raise VCSError("Given object should be FileNode object, not %s"
112 112 % filenode.__class__)
113 113
114 114 repo = filenode_new.changeset.repository
115 115 old_raw_id = getattr(filenode_old.changeset, 'raw_id', repo.EMPTY_CHANGESET)
116 116 new_raw_id = getattr(filenode_new.changeset, 'raw_id', repo.EMPTY_CHANGESET)
117 117
118 118 vcs_gitdiff = repo.get_diff(old_raw_id, new_raw_id, filenode_new.path,
119 119 ignore_whitespace, context)
120 120 return vcs_gitdiff
121 121
122 122
123 123 class DiffProcessor(object):
124 124 """
125 125 Give it a unified diff and it returns a list of the files that were
126 126 mentioned in the diff together with a dict of meta information that
127 127 can be used to render it in a HTML template.
128 128 """
129 129 _chunk_re = re.compile(r'@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
130 130
131 131 def __init__(self, diff, differ='diff', format='udiff'):
132 132 """
133 133 :param diff: a text in diff format or generator
134 134 :param format: format of diff passed, `udiff` or `gitdiff`
135 135 """
136 136 if isinstance(diff, basestring):
137 137 diff = [diff]
138 138
139 139 self.__udiff = diff
140 140 self.__format = format
141 141 self.adds = 0
142 142 self.removes = 0
143 143
144 144 if isinstance(self.__udiff, basestring):
145 145 self.lines = iter(self.__udiff.splitlines(1))
146 146
147 147 elif self.__format == 'gitdiff':
148 148 udiff_copy = self.copy_iterator()
149 149 self.lines = imap(self.escaper, self._parse_gitdiff(udiff_copy))
150 150 else:
151 151 udiff_copy = self.copy_iterator()
152 152 self.lines = imap(self.escaper, udiff_copy)
153 153
154 154 # Select a differ.
155 155 if differ == 'difflib':
156 156 self.differ = self._highlight_line_difflib
157 157 else:
158 158 self.differ = self._highlight_line_udiff
159 159
160 160 def escaper(self, string):
161 161 return markupsafe.escape(string)
162 162
163 163 def copy_iterator(self):
164 164 """
165 165 make a fresh copy of generator, we should not iterate thru
166 166 an original as it's needed for repeating operations on
167 167 this instance of DiffProcessor
168 168 """
169 169 self.__udiff, iterator_copy = tee(self.__udiff)
170 170 return iterator_copy
171 171
172 172 def _extract_rev(self, line1, line2):
173 173 """
174 Extract the filename and revision hint from a line.
174 Extract the operation (A/M/D), filename and revision hint from a line.
175 175 """
176 176
177 177 try:
178 178 if line1.startswith('--- ') and line2.startswith('+++ '):
179 179 l1 = line1[4:].split(None, 1)
180 180 old_filename = (l1[0].replace('a/', '', 1)
181 181 if len(l1) >= 1 else None)
182 182 old_rev = l1[1] if len(l1) == 2 else 'old'
183 183
184 184 l2 = line2[4:].split(None, 1)
185 185 new_filename = (l2[0].replace('b/', '', 1)
186 186 if len(l1) >= 1 else None)
187 187 new_rev = l2[1] if len(l2) == 2 else 'new'
188 188
189 189 filename = (old_filename
190 190 if old_filename != '/dev/null' else new_filename)
191 191
192 return filename, new_rev, old_rev
192 operation = 'D' if new_filename == '/dev/null' else None
193 if not operation:
194 operation = 'M' if old_filename != '/dev/null' else 'A'
195
196 return operation, filename, new_rev, old_rev
193 197 except (ValueError, IndexError):
194 198 pass
195 199
196 return None, None, None
200 return None, None, None, None
197 201
198 202 def _parse_gitdiff(self, diffiterator):
199 203 def line_decoder(l):
200 204 if l.startswith('+') and not l.startswith('+++'):
201 205 self.adds += 1
202 206 elif l.startswith('-') and not l.startswith('---'):
203 207 self.removes += 1
204 208 return l.decode('utf8', 'replace')
205 209
206 210 output = list(diffiterator)
207 211 size = len(output)
208 212
209 213 if size == 2:
210 214 l = []
211 215 l.extend([output[0]])
212 216 l.extend(output[1].splitlines(1))
213 217 return map(line_decoder, l)
214 218 elif size == 1:
215 219 return map(line_decoder, output[0].splitlines(1))
216 220 elif size == 0:
217 221 return []
218 222
219 223 raise Exception('wrong size of diff %s' % size)
220 224
221 225 def _highlight_line_difflib(self, line, next_):
222 226 """
223 227 Highlight inline changes in both lines.
224 228 """
225 229
226 230 if line['action'] == 'del':
227 231 old, new = line, next_
228 232 else:
229 233 old, new = next_, line
230 234
231 235 oldwords = re.split(r'(\W)', old['line'])
232 236 newwords = re.split(r'(\W)', new['line'])
233 237
234 238 sequence = difflib.SequenceMatcher(None, oldwords, newwords)
235 239
236 240 oldfragments, newfragments = [], []
237 241 for tag, i1, i2, j1, j2 in sequence.get_opcodes():
238 242 oldfrag = ''.join(oldwords[i1:i2])
239 243 newfrag = ''.join(newwords[j1:j2])
240 244 if tag != 'equal':
241 245 if oldfrag:
242 246 oldfrag = '<del>%s</del>' % oldfrag
243 247 if newfrag:
244 248 newfrag = '<ins>%s</ins>' % newfrag
245 249 oldfragments.append(oldfrag)
246 250 newfragments.append(newfrag)
247 251
248 252 old['line'] = "".join(oldfragments)
249 253 new['line'] = "".join(newfragments)
250 254
251 255 def _highlight_line_udiff(self, line, next_):
252 256 """
253 257 Highlight inline changes in both lines.
254 258 """
255 259 start = 0
256 260 limit = min(len(line['line']), len(next_['line']))
257 261 while start < limit and line['line'][start] == next_['line'][start]:
258 262 start += 1
259 263 end = -1
260 264 limit -= start
261 265 while -end <= limit and line['line'][end] == next_['line'][end]:
262 266 end -= 1
263 267 end += 1
264 268 if start or end:
265 269 def do(l):
266 270 last = end + len(l['line'])
267 271 if l['action'] == 'add':
268 272 tag = 'ins'
269 273 else:
270 274 tag = 'del'
271 275 l['line'] = '%s<%s>%s</%s>%s' % (
272 276 l['line'][:start],
273 277 tag,
274 278 l['line'][start:last],
275 279 tag,
276 280 l['line'][last:]
277 281 )
278 282 do(line)
279 283 do(next_)
280 284
281 285 def _parse_udiff(self):
282 286 """
283 287 Parse the diff an return data for the template.
284 288 """
285 289 lineiter = self.lines
286 290 files = []
287 291 try:
288 292 line = lineiter.next()
289 293 # skip first context
290 294 skipfirst = True
295
291 296 while 1:
292 297 # continue until we found the old file
293 298 if not line.startswith('--- '):
294 299 line = lineiter.next()
295 300 continue
296 301
297 302 chunks = []
298 filename, old_rev, new_rev = \
303 stats = [0, 0]
304 operation, filename, old_rev, new_rev = \
299 305 self._extract_rev(line, lineiter.next())
300 306 files.append({
301 307 'filename': filename,
302 308 'old_revision': old_rev,
303 309 'new_revision': new_rev,
304 'chunks': chunks
310 'chunks': chunks,
311 'operation': operation,
312 'stats': stats,
305 313 })
306 314
307 315 line = lineiter.next()
308 316 while line:
317
309 318 match = self._chunk_re.match(line)
310 319 if not match:
311 320 break
312 321
313 322 lines = []
314 323 chunks.append(lines)
315 324
316 325 old_line, old_end, new_line, new_end = \
317 326 [int(x or 1) for x in match.groups()[:-1]]
318 327 old_line -= 1
319 328 new_line -= 1
320 329 context = len(match.groups()) == 5
321 330 old_end += old_line
322 331 new_end += new_line
323 332
324 333 if context:
325 334 if not skipfirst:
326 335 lines.append({
327 336 'old_lineno': '...',
328 337 'new_lineno': '...',
329 338 'action': 'context',
330 339 'line': line,
331 340 })
332 341 else:
333 342 skipfirst = False
334 343
335 344 line = lineiter.next()
336 345 while old_line < old_end or new_line < new_end:
337 346 if line:
338 347 command, line = line[0], line[1:]
339 348 else:
340 349 command = ' '
341 350 affects_old = affects_new = False
342 351
343 352 # ignore those if we don't expect them
344 353 if command in '#@':
345 354 continue
346 355 elif command == '+':
347 356 affects_new = True
348 357 action = 'add'
358 stats[0] += 1
349 359 elif command == '-':
350 360 affects_old = True
351 361 action = 'del'
362 stats[1] += 1
352 363 else:
353 364 affects_old = affects_new = True
354 365 action = 'unmod'
355 366
356 367 old_line += affects_old
357 368 new_line += affects_new
358 369 lines.append({
359 370 'old_lineno': affects_old and old_line or '',
360 371 'new_lineno': affects_new and new_line or '',
361 372 'action': action,
362 373 'line': line
363 374 })
364 375 line = lineiter.next()
365
366 376 except StopIteration:
367 377 pass
368 378
369 379 # highlight inline changes
370 380 for _ in files:
371 381 for chunk in chunks:
372 382 lineiter = iter(chunk)
373 #first = True
374 383 try:
375 384 while 1:
376 385 line = lineiter.next()
377 386 if line['action'] != 'unmod':
378 387 nextline = lineiter.next()
379 388 if nextline['action'] == 'unmod' or \
380 389 nextline['action'] == line['action']:
381 390 continue
382 391 self.differ(line, nextline)
383 392 except StopIteration:
384 393 pass
385
386 394 return files
387 395
388 396 def prepare(self):
389 397 """
390 398 Prepare the passed udiff for HTML rendering. It'l return a list
391 399 of dicts
392 400 """
393 401 return self._parse_udiff()
394 402
395 403 def _safe_id(self, idstring):
396 404 """Make a string safe for including in an id attribute.
397 405
398 406 The HTML spec says that id attributes 'must begin with
399 407 a letter ([A-Za-z]) and may be followed by any number
400 408 of letters, digits ([0-9]), hyphens ("-"), underscores
401 409 ("_"), colons (":"), and periods (".")'. These regexps
402 410 are slightly over-zealous, in that they remove colons
403 411 and periods unnecessarily.
404 412
405 413 Whitespace is transformed into underscores, and then
406 414 anything which is not a hyphen or a character that
407 415 matches \w (alphanumerics and underscore) is removed.
408 416
409 417 """
410 418 # Transform all whitespace to underscore
411 419 idstring = re.sub(r'\s', "_", '%s' % idstring)
412 420 # Remove everything that is not a hyphen or a member of \w
413 421 idstring = re.sub(r'(?!-)\W', "", idstring).lower()
414 422 return idstring
415 423
416 424 def raw_diff(self):
417 425 """
418 426 Returns raw string as udiff
419 427 """
420 428 udiff_copy = self.copy_iterator()
421 429 if self.__format == 'gitdiff':
422 430 udiff_copy = self._parse_gitdiff(udiff_copy)
423 431 return u''.join(udiff_copy)
424 432
425 433 def as_html(self, table_class='code-difftable', line_class='line',
426 434 new_lineno_class='lineno old', old_lineno_class='lineno new',
427 code_class='code', enable_comments=False):
435 code_class='code', enable_comments=False, diff_lines=None):
428 436 """
429 437 Return udiff as html table with customized css classes
430 438 """
431 439 def _link_to_if(condition, label, url):
432 440 """
433 441 Generates a link if condition is meet or just the label if not.
434 442 """
435 443
436 444 if condition:
437 445 return '''<a href="%(url)s">%(label)s</a>''' % {
438 446 'url': url,
439 447 'label': label
440 448 }
441 449 else:
442 450 return label
443 diff_lines = self.prepare()
451 if diff_lines is None:
452 diff_lines = self.prepare()
444 453 _html_empty = True
445 454 _html = []
446 455 _html.append('''<table class="%(table_class)s">\n''' % {
447 456 'table_class': table_class
448 457 })
449 458 for diff in diff_lines:
450 459 for line in diff['chunks']:
451 460 _html_empty = False
452 461 for change in line:
453 462 _html.append('''<tr class="%(lc)s %(action)s">\n''' % {
454 463 'lc': line_class,
455 464 'action': change['action']
456 465 })
457 466 anchor_old_id = ''
458 467 anchor_new_id = ''
459 468 anchor_old = "%(filename)s_o%(oldline_no)s" % {
460 469 'filename': self._safe_id(diff['filename']),
461 470 'oldline_no': change['old_lineno']
462 471 }
463 472 anchor_new = "%(filename)s_n%(oldline_no)s" % {
464 473 'filename': self._safe_id(diff['filename']),
465 474 'oldline_no': change['new_lineno']
466 475 }
467 476 cond_old = (change['old_lineno'] != '...' and
468 477 change['old_lineno'])
469 478 cond_new = (change['new_lineno'] != '...' and
470 479 change['new_lineno'])
471 480 if cond_old:
472 481 anchor_old_id = 'id="%s"' % anchor_old
473 482 if cond_new:
474 483 anchor_new_id = 'id="%s"' % anchor_new
475 484 ###########################################################
476 485 # OLD LINE NUMBER
477 486 ###########################################################
478 487 _html.append('''\t<td %(a_id)s class="%(olc)s">''' % {
479 488 'a_id': anchor_old_id,
480 489 'olc': old_lineno_class
481 490 })
482 491
483 492 _html.append('''%(link)s''' % {
484 493 'link': _link_to_if(True, change['old_lineno'],
485 494 '#%s' % anchor_old)
486 495 })
487 496 _html.append('''</td>\n''')
488 497 ###########################################################
489 498 # NEW LINE NUMBER
490 499 ###########################################################
491 500
492 501 _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
493 502 'a_id': anchor_new_id,
494 503 'nlc': new_lineno_class
495 504 })
496 505
497 506 _html.append('''%(link)s''' % {
498 507 'link': _link_to_if(True, change['new_lineno'],
499 508 '#%s' % anchor_new)
500 509 })
501 510 _html.append('''</td>\n''')
502 511 ###########################################################
503 512 # CODE
504 513 ###########################################################
505 514 comments = '' if enable_comments else 'no-comment'
506 515 _html.append('''\t<td class="%(cc)s %(inc)s">''' % {
507 516 'cc': code_class,
508 517 'inc': comments
509 518 })
510 519 _html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {
511 520 'code': change['line']
512 521 })
513 522 _html.append('''\t</td>''')
514 523 _html.append('''\n</tr>\n''')
515 524 _html.append('''</table>''')
516 525 if _html_empty:
517 526 return None
518 527 return ''.join(_html)
519 528
520 529 def stat(self):
521 530 """
522 531 Returns tuple of added, and removed lines for this instance
523 532 """
524 533 return self.adds, self.removes
525 534
526 535
527 536 def differ(org_repo, org_ref, other_repo, other_ref):
528 537 """
529 538
530 539 :param org_repo:
531 540 :type org_repo:
532 541 :param org_ref:
533 542 :type org_ref:
534 543 :param other_repo:
535 544 :type other_repo:
536 545 :param other_ref:
537 546 :type other_ref:
538 547 """
539 548 ignore_whitespace = False
540 549 context = 3
541 550 from mercurial import patch
542 551 from mercurial.mdiff import diffopts
543 552
544 553 org_repo = org_repo.scm_instance._repo
545 554 other_repo = other_repo.scm_instance._repo
546 555
547 556 org_ref = org_ref[1]
548 557 other_ref = other_ref[1]
549 558
550 559 opts = diffopts(git=True, ignorews=ignore_whitespace, context=context)
551 560
552 561 return ''.join(patch.diff(org_repo, node1=org_ref, node2=other_ref,
553 562 opts=opts))
General Comments 0
You need to be logged in to leave comments. Login now