##// END OF EJS Templates
code garden
marcink -
r2364:9d61aad8 codereview
parent child Browse files
Show More
@@ -1,615 +1,618 b''
1 1 # -*- coding: utf-8 -*-
2 2 """
3 3 rhodecode.lib.diffs
4 4 ~~~~~~~~~~~~~~~~~~~
5 5
6 6 Set of diffing helpers, previously part of vcs
7 7
8 8
9 9 :created_on: Dec 4, 2011
10 10 :author: marcink
11 11 :copyright: (C) 2010-2012 Marcin Kuzminski <marcin@python-works.com>
12 12 :original copyright: 2007-2008 by Armin Ronacher
13 13 :license: GPLv3, see COPYING for more details.
14 14 """
15 15 # This program is free software: you can redistribute it and/or modify
16 16 # it under the terms of the GNU General Public License as published by
17 17 # the Free Software Foundation, either version 3 of the License, or
18 18 # (at your option) any later version.
19 19 #
20 20 # This program is distributed in the hope that it will be useful,
21 21 # but WITHOUT ANY WARRANTY; without even the implied warranty of
22 22 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 23 # GNU General Public License for more details.
24 24 #
25 25 # You should have received a copy of the GNU General Public License
26 26 # along with this program. If not, see <http://www.gnu.org/licenses/>.
27 27
28 28 import re
29 29 import io
30 30 import difflib
31 31 import markupsafe
32 32
33 33 from itertools import tee, imap
34 34
35 35 from mercurial import patch
36 36 from mercurial.mdiff import diffopts
37 37 from mercurial.bundlerepo import bundlerepository
38 38 from mercurial import localrepo
39 39
40 40 from pylons.i18n.translation import _
41 41
42 42 from rhodecode.lib.vcs.exceptions import VCSError
43 43 from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode
44 44 from rhodecode.lib.helpers import escape
45 45 from rhodecode.lib.utils import EmptyChangeset, make_ui
46 46
47 47
48 48 def wrap_to_table(str_):
49 49 return '''<table class="code-difftable">
50 50 <tr class="line no-comment">
51 51 <td class="lineno new"></td>
52 52 <td class="code no-comment"><pre>%s</pre></td>
53 53 </tr>
54 54 </table>''' % str_
55 55
56 56
57 57 def wrapped_diff(filenode_old, filenode_new, cut_off_limit=None,
58 58 ignore_whitespace=True, line_context=3,
59 59 enable_comments=False):
60 60 """
61 61 returns a wrapped diff into a table, checks for cut_off_limit and presents
62 62 proper message
63 63 """
64 64
65 65 if filenode_old is None:
66 66 filenode_old = FileNode(filenode_new.path, '', EmptyChangeset())
67 67
68 68 if filenode_old.is_binary or filenode_new.is_binary:
69 69 diff = wrap_to_table(_('binary file'))
70 70 stats = (0, 0)
71 71 size = 0
72 72
73 73 elif cut_off_limit != -1 and (cut_off_limit is None or
74 74 (filenode_old.size < cut_off_limit and filenode_new.size < cut_off_limit)):
75 75
76 76 f_gitdiff = get_gitdiff(filenode_old, filenode_new,
77 77 ignore_whitespace=ignore_whitespace,
78 78 context=line_context)
79 79 diff_processor = DiffProcessor(f_gitdiff, format='gitdiff')
80 80
81 81 diff = diff_processor.as_html(enable_comments=enable_comments)
82 82 stats = diff_processor.stat()
83 83 size = len(diff or '')
84 84 else:
85 85 diff = wrap_to_table(_('Changeset was too big and was cut off, use '
86 86 'diff menu to display this diff'))
87 87 stats = (0, 0)
88 88 size = 0
89 89 if not diff:
90 90 submodules = filter(lambda o: isinstance(o, SubModuleNode),
91 91 [filenode_new, filenode_old])
92 92 if submodules:
93 93 diff = wrap_to_table(escape('Submodule %r' % submodules[0]))
94 94 else:
95 95 diff = wrap_to_table(_('No changes detected'))
96 96
97 97 cs1 = filenode_old.changeset.raw_id
98 98 cs2 = filenode_new.changeset.raw_id
99 99
100 100 return size, cs1, cs2, diff, stats
101 101
102 102
103 103 def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
104 104 """
105 105 Returns git style diff between given ``filenode_old`` and ``filenode_new``.
106 106
107 107 :param ignore_whitespace: ignore whitespaces in diff
108 108 """
109 109 # make sure we pass in default context
110 110 context = context or 3
111 111 submodules = filter(lambda o: isinstance(o, SubModuleNode),
112 112 [filenode_new, filenode_old])
113 113 if submodules:
114 114 return ''
115 115
116 116 for filenode in (filenode_old, filenode_new):
117 117 if not isinstance(filenode, FileNode):
118 118 raise VCSError("Given object should be FileNode object, not %s"
119 119 % filenode.__class__)
120 120
121 121 repo = filenode_new.changeset.repository
122 122 old_raw_id = getattr(filenode_old.changeset, 'raw_id', repo.EMPTY_CHANGESET)
123 123 new_raw_id = getattr(filenode_new.changeset, 'raw_id', repo.EMPTY_CHANGESET)
124 124
125 125 vcs_gitdiff = repo.get_diff(old_raw_id, new_raw_id, filenode_new.path,
126 126 ignore_whitespace, context)
127 127 return vcs_gitdiff
128 128
129 129
130 130 class DiffProcessor(object):
131 131 """
132 132 Give it a unified diff and it returns a list of the files that were
133 133 mentioned in the diff together with a dict of meta information that
134 134 can be used to render it in a HTML template.
135 135 """
136 136 _chunk_re = re.compile(r'@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
137 137
138 138 def __init__(self, diff, differ='diff', format='udiff'):
139 139 """
140 140 :param diff: a text in diff format or generator
141 141 :param format: format of diff passed, `udiff` or `gitdiff`
142 142 """
143 143 if isinstance(diff, basestring):
144 144 diff = [diff]
145 145
146 146 self.__udiff = diff
147 147 self.__format = format
148 148 self.adds = 0
149 149 self.removes = 0
150 150
151 151 if isinstance(self.__udiff, basestring):
152 152 self.lines = iter(self.__udiff.splitlines(1))
153 153
154 154 elif self.__format == 'gitdiff':
155 155 udiff_copy = self.copy_iterator()
156 156 self.lines = imap(self.escaper, self._parse_gitdiff(udiff_copy))
157 157 else:
158 158 udiff_copy = self.copy_iterator()
159 159 self.lines = imap(self.escaper, udiff_copy)
160 160
161 161 # Select a differ.
162 162 if differ == 'difflib':
163 163 self.differ = self._highlight_line_difflib
164 164 else:
165 165 self.differ = self._highlight_line_udiff
166 166
167 167 def escaper(self, string):
168 168 return markupsafe.escape(string)
169 169
170 170 def copy_iterator(self):
171 171 """
172 172 make a fresh copy of generator, we should not iterate thru
173 173 an original as it's needed for repeating operations on
174 174 this instance of DiffProcessor
175 175 """
176 176 self.__udiff, iterator_copy = tee(self.__udiff)
177 177 return iterator_copy
178 178
179 179 def _extract_rev(self, line1, line2):
180 180 """
181 181 Extract the operation (A/M/D), filename and revision hint from a line.
182 182 """
183 183
184 184 try:
185 185 if line1.startswith('--- ') and line2.startswith('+++ '):
186 186 l1 = line1[4:].split(None, 1)
187 187 old_filename = (l1[0].replace('a/', '', 1)
188 188 if len(l1) >= 1 else None)
189 189 old_rev = l1[1] if len(l1) == 2 else 'old'
190 190
191 191 l2 = line2[4:].split(None, 1)
192 192 new_filename = (l2[0].replace('b/', '', 1)
193 193 if len(l1) >= 1 else None)
194 194 new_rev = l2[1] if len(l2) == 2 else 'new'
195 195
196 196 filename = (old_filename
197 197 if old_filename != '/dev/null' else new_filename)
198 198
199 199 operation = 'D' if new_filename == '/dev/null' else None
200 200 if not operation:
201 201 operation = 'M' if old_filename != '/dev/null' else 'A'
202 202
203 203 return operation, filename, new_rev, old_rev
204 204 except (ValueError, IndexError):
205 205 pass
206 206
207 207 return None, None, None, None
208 208
209 209 def _parse_gitdiff(self, diffiterator):
210 210 def line_decoder(l):
211 211 if l.startswith('+') and not l.startswith('+++'):
212 212 self.adds += 1
213 213 elif l.startswith('-') and not l.startswith('---'):
214 214 self.removes += 1
215 215 return l.decode('utf8', 'replace')
216 216
217 217 output = list(diffiterator)
218 218 size = len(output)
219 219
220 220 if size == 2:
221 221 l = []
222 222 l.extend([output[0]])
223 223 l.extend(output[1].splitlines(1))
224 224 return map(line_decoder, l)
225 225 elif size == 1:
226 226 return map(line_decoder, output[0].splitlines(1))
227 227 elif size == 0:
228 228 return []
229 229
230 230 raise Exception('wrong size of diff %s' % size)
231 231
232 232 def _highlight_line_difflib(self, line, next_):
233 233 """
234 234 Highlight inline changes in both lines.
235 235 """
236 236
237 237 if line['action'] == 'del':
238 238 old, new = line, next_
239 239 else:
240 240 old, new = next_, line
241 241
242 242 oldwords = re.split(r'(\W)', old['line'])
243 243 newwords = re.split(r'(\W)', new['line'])
244 244
245 245 sequence = difflib.SequenceMatcher(None, oldwords, newwords)
246 246
247 247 oldfragments, newfragments = [], []
248 248 for tag, i1, i2, j1, j2 in sequence.get_opcodes():
249 249 oldfrag = ''.join(oldwords[i1:i2])
250 250 newfrag = ''.join(newwords[j1:j2])
251 251 if tag != 'equal':
252 252 if oldfrag:
253 253 oldfrag = '<del>%s</del>' % oldfrag
254 254 if newfrag:
255 255 newfrag = '<ins>%s</ins>' % newfrag
256 256 oldfragments.append(oldfrag)
257 257 newfragments.append(newfrag)
258 258
259 259 old['line'] = "".join(oldfragments)
260 260 new['line'] = "".join(newfragments)
261 261
262 262 def _highlight_line_udiff(self, line, next_):
263 263 """
264 264 Highlight inline changes in both lines.
265 265 """
266 266 start = 0
267 267 limit = min(len(line['line']), len(next_['line']))
268 268 while start < limit and line['line'][start] == next_['line'][start]:
269 269 start += 1
270 270 end = -1
271 271 limit -= start
272 272 while -end <= limit and line['line'][end] == next_['line'][end]:
273 273 end -= 1
274 274 end += 1
275 275 if start or end:
276 276 def do(l):
277 277 last = end + len(l['line'])
278 278 if l['action'] == 'add':
279 279 tag = 'ins'
280 280 else:
281 281 tag = 'del'
282 282 l['line'] = '%s<%s>%s</%s>%s' % (
283 283 l['line'][:start],
284 284 tag,
285 285 l['line'][start:last],
286 286 tag,
287 287 l['line'][last:]
288 288 )
289 289 do(line)
290 290 do(next_)
291 291
292 292 def _parse_udiff(self):
293 293 """
294 294 Parse the diff an return data for the template.
295 295 """
296 296 lineiter = self.lines
297 297 files = []
298 298 try:
299 299 line = lineiter.next()
300 300 while 1:
301 301 # continue until we found the old file
302 302 if not line.startswith('--- '):
303 303 line = lineiter.next()
304 304 continue
305 305
306 306 chunks = []
307 307 stats = [0, 0]
308 308 operation, filename, old_rev, new_rev = \
309 309 self._extract_rev(line, lineiter.next())
310 310 files.append({
311 311 'filename': filename,
312 312 'old_revision': old_rev,
313 313 'new_revision': new_rev,
314 314 'chunks': chunks,
315 315 'operation': operation,
316 316 'stats': stats,
317 317 })
318 318
319 319 line = lineiter.next()
320 320 while line:
321 321 match = self._chunk_re.match(line)
322 322 if not match:
323 323 break
324 324
325 325 lines = []
326 326 chunks.append(lines)
327 327
328 328 old_line, old_end, new_line, new_end = \
329 329 [int(x or 1) for x in match.groups()[:-1]]
330 330 old_line -= 1
331 331 new_line -= 1
332 332 gr = match.groups()
333 333 context = len(gr) == 5
334 334 old_end += old_line
335 335 new_end += new_line
336 336
337 337 if context:
338 338 # skip context only if it's first line
339 339 if int(gr[0]) > 1:
340 340 lines.append({
341 341 'old_lineno': '...',
342 342 'new_lineno': '...',
343 343 'action': 'context',
344 344 'line': line,
345 345 })
346 346
347 347 line = lineiter.next()
348 348 while old_line < old_end or new_line < new_end:
349 349 if line:
350 350 command, line = line[0], line[1:]
351 351 else:
352 352 command = ' '
353 353 affects_old = affects_new = False
354 354
355 355 # ignore those if we don't expect them
356 356 if command in '#@':
357 357 continue
358 358 elif command == '+':
359 359 affects_new = True
360 360 action = 'add'
361 361 stats[0] += 1
362 362 elif command == '-':
363 363 affects_old = True
364 364 action = 'del'
365 365 stats[1] += 1
366 366 else:
367 367 affects_old = affects_new = True
368 368 action = 'unmod'
369 369
370 370 if line.find('No newline at end of file') != -1:
371 371 lines.append({
372 372 'old_lineno': '...',
373 373 'new_lineno': '...',
374 374 'action': 'context',
375 375 'line': line
376 376 })
377 377
378 378 else:
379 379 old_line += affects_old
380 380 new_line += affects_new
381 381 lines.append({
382 382 'old_lineno': affects_old and old_line or '',
383 383 'new_lineno': affects_new and new_line or '',
384 384 'action': action,
385 385 'line': line
386 386 })
387 387
388 388 line = lineiter.next()
389 389 except StopIteration:
390 390 pass
391 391
392 392 # highlight inline changes
393 393 for diff_data in files:
394 394 for chunk in diff_data['chunks']:
395 395 lineiter = iter(chunk)
396 396 try:
397 397 while 1:
398 398 line = lineiter.next()
399 399 if line['action'] != 'unmod':
400 400 nextline = lineiter.next()
401 401 if nextline['action'] in ['unmod', 'context'] or \
402 402 nextline['action'] == line['action']:
403 403 continue
404 404 self.differ(line, nextline)
405 405 except StopIteration:
406 406 pass
407 407 return files
408 408
409 409 def prepare(self):
410 410 """
411 411 Prepare the passed udiff for HTML rendering. It'l return a list
412 412 of dicts
413 413 """
414 414 return self._parse_udiff()
415 415
416 416 def _safe_id(self, idstring):
417 417 """Make a string safe for including in an id attribute.
418 418
419 419 The HTML spec says that id attributes 'must begin with
420 420 a letter ([A-Za-z]) and may be followed by any number
421 421 of letters, digits ([0-9]), hyphens ("-"), underscores
422 422 ("_"), colons (":"), and periods (".")'. These regexps
423 423 are slightly over-zealous, in that they remove colons
424 424 and periods unnecessarily.
425 425
426 426 Whitespace is transformed into underscores, and then
427 427 anything which is not a hyphen or a character that
428 428 matches \w (alphanumerics and underscore) is removed.
429 429
430 430 """
431 431 # Transform all whitespace to underscore
432 432 idstring = re.sub(r'\s', "_", '%s' % idstring)
433 433 # Remove everything that is not a hyphen or a member of \w
434 434 idstring = re.sub(r'(?!-)\W', "", idstring).lower()
435 435 return idstring
436 436
437 437 def raw_diff(self):
438 438 """
439 439 Returns raw string as udiff
440 440 """
441 441 udiff_copy = self.copy_iterator()
442 442 if self.__format == 'gitdiff':
443 443 udiff_copy = self._parse_gitdiff(udiff_copy)
444 444 return u''.join(udiff_copy)
445 445
446 446 def as_html(self, table_class='code-difftable', line_class='line',
447 447 new_lineno_class='lineno old', old_lineno_class='lineno new',
448 448 code_class='code', enable_comments=False, diff_lines=None):
449 449 """
450 450 Return given diff as html table with customized css classes
451 451 """
452 452 def _link_to_if(condition, label, url):
453 453 """
454 454 Generates a link if condition is meet or just the label if not.
455 455 """
456 456
457 457 if condition:
458 458 return '''<a href="%(url)s">%(label)s</a>''' % {
459 459 'url': url,
460 460 'label': label
461 461 }
462 462 else:
463 463 return label
464 464 if diff_lines is None:
465 465 diff_lines = self.prepare()
466 466 _html_empty = True
467 467 _html = []
468 468 _html.append('''<table class="%(table_class)s">\n''' % {
469 469 'table_class': table_class
470 470 })
471 471 for diff in diff_lines:
472 472 for line in diff['chunks']:
473 473 _html_empty = False
474 474 for change in line:
475 475 _html.append('''<tr class="%(lc)s %(action)s">\n''' % {
476 476 'lc': line_class,
477 477 'action': change['action']
478 478 })
479 479 anchor_old_id = ''
480 480 anchor_new_id = ''
481 481 anchor_old = "%(filename)s_o%(oldline_no)s" % {
482 482 'filename': self._safe_id(diff['filename']),
483 483 'oldline_no': change['old_lineno']
484 484 }
485 485 anchor_new = "%(filename)s_n%(oldline_no)s" % {
486 486 'filename': self._safe_id(diff['filename']),
487 487 'oldline_no': change['new_lineno']
488 488 }
489 489 cond_old = (change['old_lineno'] != '...' and
490 490 change['old_lineno'])
491 491 cond_new = (change['new_lineno'] != '...' and
492 492 change['new_lineno'])
493 493 if cond_old:
494 494 anchor_old_id = 'id="%s"' % anchor_old
495 495 if cond_new:
496 496 anchor_new_id = 'id="%s"' % anchor_new
497 497 ###########################################################
498 498 # OLD LINE NUMBER
499 499 ###########################################################
500 500 _html.append('''\t<td %(a_id)s class="%(olc)s">''' % {
501 501 'a_id': anchor_old_id,
502 502 'olc': old_lineno_class
503 503 })
504 504
505 505 _html.append('''%(link)s''' % {
506 506 'link': _link_to_if(True, change['old_lineno'],
507 507 '#%s' % anchor_old)
508 508 })
509 509 _html.append('''</td>\n''')
510 510 ###########################################################
511 511 # NEW LINE NUMBER
512 512 ###########################################################
513 513
514 514 _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
515 515 'a_id': anchor_new_id,
516 516 'nlc': new_lineno_class
517 517 })
518 518
519 519 _html.append('''%(link)s''' % {
520 520 'link': _link_to_if(True, change['new_lineno'],
521 521 '#%s' % anchor_new)
522 522 })
523 523 _html.append('''</td>\n''')
524 524 ###########################################################
525 525 # CODE
526 526 ###########################################################
527 527 comments = '' if enable_comments else 'no-comment'
528 528 _html.append('''\t<td class="%(cc)s %(inc)s">''' % {
529 529 'cc': code_class,
530 530 'inc': comments
531 531 })
532 532 _html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {
533 533 'code': change['line']
534 534 })
535 535 _html.append('''\t</td>''')
536 536 _html.append('''\n</tr>\n''')
537 537 _html.append('''</table>''')
538 538 if _html_empty:
539 539 return None
540 540 return ''.join(_html)
541 541
542 542 def stat(self):
543 543 """
544 544 Returns tuple of added, and removed lines for this instance
545 545 """
546 546 return self.adds, self.removes
547 547
548 548
549 549 class InMemoryBundleRepo(bundlerepository):
550 550 def __init__(self, ui, path, bundlestream):
551 551 self._tempparent = None
552 552 localrepo.localrepository.__init__(self, ui, path)
553 553 self.ui.setconfig('phases', 'publish', False)
554 554
555 555 self.bundle = bundlestream
556 556
557 557 # dict with the mapping 'filename' -> position in the bundle
558 558 self.bundlefilespos = {}
559 559
560 560
561 561 def differ(org_repo, org_ref, other_repo, other_ref, discovery_data=None):
562 562 """
563 563 General differ between branches, bookmarks or separate but releated
564 564 repositories
565 565
566 566 :param org_repo:
567 567 :type org_repo:
568 568 :param org_ref:
569 569 :type org_ref:
570 570 :param other_repo:
571 571 :type other_repo:
572 572 :param other_ref:
573 573 :type other_ref:
574 574 """
575 575
576 bundlerepo = ignore_whitespace = False
576 bundlerepo = None
577 ignore_whitespace = False
577 578 context = 3
578 579 org_repo = org_repo.scm_instance._repo
579 580 other_repo = other_repo.scm_instance._repo
580 581 opts = diffopts(git=True, ignorews=ignore_whitespace, context=context)
581 582 org_ref = org_ref[1]
582 583 other_ref = other_ref[1]
583 584
584 585 if org_repo != other_repo:
585 586
586 587 common, incoming, rheads = discovery_data
587 588
588 589 # create a bundle (uncompressed if other repo is not local)
589 590 if other_repo.capable('getbundle') and incoming:
590 591 # disable repo hooks here since it's just bundle !
591 592 # patch and reset hooks section of UI config to not run any
592 593 # hooks on fetching archives with subrepos
593 594 for k, _ in other_repo.ui.configitems('hooks'):
594 595 other_repo.ui.setconfig('hooks', k, None)
595 596
596 597 unbundle = other_repo.getbundle('incoming', common=common,
597 598 heads=rheads)
598 599
599 600 buf = io.BytesIO()
600 601 while True:
601 chunk = unbundle._stream.read(1024*4)
602 chunk = unbundle._stream.read(1024 * 4)
602 603 if not chunk:
603 604 break
604 605 buf.write(chunk)
605 606
606 607 buf.seek(0)
608 # replace chunked _stream with data that can do tell() and seek()
607 609 unbundle._stream = buf
608 610
609 611 ui = make_ui('db')
610 612 bundlerepo = InMemoryBundleRepo(ui, path=org_repo.root,
611 613 bundlestream=unbundle)
612 return ''.join(patch.diff(bundlerepo or org_repo, node2=other_ref, opts=opts))
614 return ''.join(patch.diff(bundlerepo or org_repo, node2=other_ref,
615 opts=opts))
613 616 else:
614 617 return ''.join(patch.diff(org_repo, node1=org_ref, node2=other_ref,
615 618 opts=opts))
General Comments 0
You need to be logged in to leave comments. Login now