##// END OF EJS Templates
Improved cross-repo diff using bundlerepo
marcink -
r2431:60dfc369 codereview
parent child Browse files
Show More
@@ -1,624 +1,627
1 1 # -*- coding: utf-8 -*-
2 2 """
3 3 rhodecode.lib.diffs
4 4 ~~~~~~~~~~~~~~~~~~~
5 5
6 6 Set of diffing helpers, previously part of vcs
7 7
8 8
9 9 :created_on: Dec 4, 2011
10 10 :author: marcink
11 11 :copyright: (C) 2010-2012 Marcin Kuzminski <marcin@python-works.com>
12 12 :original copyright: 2007-2008 by Armin Ronacher
13 13 :license: GPLv3, see COPYING for more details.
14 14 """
15 15 # This program is free software: you can redistribute it and/or modify
16 16 # it under the terms of the GNU General Public License as published by
17 17 # the Free Software Foundation, either version 3 of the License, or
18 18 # (at your option) any later version.
19 19 #
20 20 # This program is distributed in the hope that it will be useful,
21 21 # but WITHOUT ANY WARRANTY; without even the implied warranty of
22 22 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 23 # GNU General Public License for more details.
24 24 #
25 25 # You should have received a copy of the GNU General Public License
26 26 # along with this program. If not, see <http://www.gnu.org/licenses/>.
27 27
28 28 import re
29 29 import io
30 30 import difflib
31 31 import markupsafe
32 32
33 33 from itertools import tee, imap
34 34
35 35 from mercurial import patch
36 36 from mercurial.mdiff import diffopts
37 37 from mercurial.bundlerepo import bundlerepository
38 38 from mercurial import localrepo
39 39
40 40 from pylons.i18n.translation import _
41 41
42 42 from rhodecode.lib.vcs.exceptions import VCSError
43 43 from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode
44 44 from rhodecode.lib.helpers import escape
45 45 from rhodecode.lib.utils import EmptyChangeset, make_ui
46 46
47 47
48 48 def wrap_to_table(str_):
49 49 return '''<table class="code-difftable">
50 50 <tr class="line no-comment">
51 51 <td class="lineno new"></td>
52 52 <td class="code no-comment"><pre>%s</pre></td>
53 53 </tr>
54 54 </table>''' % str_
55 55
56 56
57 57 def wrapped_diff(filenode_old, filenode_new, cut_off_limit=None,
58 58 ignore_whitespace=True, line_context=3,
59 59 enable_comments=False):
60 60 """
61 61 returns a wrapped diff into a table, checks for cut_off_limit and presents
62 62 proper message
63 63 """
64 64
65 65 if filenode_old is None:
66 66 filenode_old = FileNode(filenode_new.path, '', EmptyChangeset())
67 67
68 68 if filenode_old.is_binary or filenode_new.is_binary:
69 69 diff = wrap_to_table(_('binary file'))
70 70 stats = (0, 0)
71 71 size = 0
72 72
73 73 elif cut_off_limit != -1 and (cut_off_limit is None or
74 74 (filenode_old.size < cut_off_limit and filenode_new.size < cut_off_limit)):
75 75
76 76 f_gitdiff = get_gitdiff(filenode_old, filenode_new,
77 77 ignore_whitespace=ignore_whitespace,
78 78 context=line_context)
79 79 diff_processor = DiffProcessor(f_gitdiff, format='gitdiff')
80 80
81 81 diff = diff_processor.as_html(enable_comments=enable_comments)
82 82 stats = diff_processor.stat()
83 83 size = len(diff or '')
84 84 else:
85 85 diff = wrap_to_table(_('Changeset was too big and was cut off, use '
86 86 'diff menu to display this diff'))
87 87 stats = (0, 0)
88 88 size = 0
89 89 if not diff:
90 90 submodules = filter(lambda o: isinstance(o, SubModuleNode),
91 91 [filenode_new, filenode_old])
92 92 if submodules:
93 93 diff = wrap_to_table(escape('Submodule %r' % submodules[0]))
94 94 else:
95 95 diff = wrap_to_table(_('No changes detected'))
96 96
97 97 cs1 = filenode_old.changeset.raw_id
98 98 cs2 = filenode_new.changeset.raw_id
99 99
100 100 return size, cs1, cs2, diff, stats
101 101
102 102
103 103 def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
104 104 """
105 105 Returns git style diff between given ``filenode_old`` and ``filenode_new``.
106 106
107 107 :param ignore_whitespace: ignore whitespaces in diff
108 108 """
109 109 # make sure we pass in default context
110 110 context = context or 3
111 111 submodules = filter(lambda o: isinstance(o, SubModuleNode),
112 112 [filenode_new, filenode_old])
113 113 if submodules:
114 114 return ''
115 115
116 116 for filenode in (filenode_old, filenode_new):
117 117 if not isinstance(filenode, FileNode):
118 118 raise VCSError("Given object should be FileNode object, not %s"
119 119 % filenode.__class__)
120 120
121 121 repo = filenode_new.changeset.repository
122 122 old_raw_id = getattr(filenode_old.changeset, 'raw_id', repo.EMPTY_CHANGESET)
123 123 new_raw_id = getattr(filenode_new.changeset, 'raw_id', repo.EMPTY_CHANGESET)
124 124
125 125 vcs_gitdiff = repo.get_diff(old_raw_id, new_raw_id, filenode_new.path,
126 126 ignore_whitespace, context)
127 127 return vcs_gitdiff
128 128
129 129
130 130 class DiffProcessor(object):
131 131 """
132 132 Give it a unified diff and it returns a list of the files that were
133 133 mentioned in the diff together with a dict of meta information that
134 134 can be used to render it in a HTML template.
135 135 """
136 136 _chunk_re = re.compile(r'@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
137 137
138 138 def __init__(self, diff, differ='diff', format='gitdiff'):
139 139 """
140 140 :param diff: a text in diff format or generator
141 141 :param format: format of diff passed, `udiff` or `gitdiff`
142 142 """
143 143 if isinstance(diff, basestring):
144 144 diff = [diff]
145 145
146 146 self.__udiff = diff
147 147 self.__format = format
148 148 self.adds = 0
149 149 self.removes = 0
150 150
151 151 if isinstance(self.__udiff, basestring):
152 152 self.lines = iter(self.__udiff.splitlines(1))
153 153
154 154 elif self.__format == 'gitdiff':
155 155 udiff_copy = self.copy_iterator()
156 156 self.lines = imap(self.escaper, self._parse_gitdiff(udiff_copy))
157 157 else:
158 158 udiff_copy = self.copy_iterator()
159 159 self.lines = imap(self.escaper, udiff_copy)
160 160
161 161 # Select a differ.
162 162 if differ == 'difflib':
163 163 self.differ = self._highlight_line_difflib
164 164 else:
165 165 self.differ = self._highlight_line_udiff
166 166
167 167 def escaper(self, string):
168 168 return markupsafe.escape(string)
169 169
170 170 def copy_iterator(self):
171 171 """
172 172 make a fresh copy of generator, we should not iterate thru
173 173 an original as it's needed for repeating operations on
174 174 this instance of DiffProcessor
175 175 """
176 176 self.__udiff, iterator_copy = tee(self.__udiff)
177 177 return iterator_copy
178 178
179 179 def _extract_rev(self, line1, line2):
180 180 """
181 181 Extract the operation (A/M/D), filename and revision hint from a line.
182 182 """
183 183
184 184 try:
185 185 if line1.startswith('--- ') and line2.startswith('+++ '):
186 186 l1 = line1[4:].split(None, 1)
187 187 old_filename = (l1[0].replace('a/', '', 1)
188 188 if len(l1) >= 1 else None)
189 189 old_rev = l1[1] if len(l1) == 2 else 'old'
190 190
191 191 l2 = line2[4:].split(None, 1)
192 192 new_filename = (l2[0].replace('b/', '', 1)
193 193 if len(l1) >= 1 else None)
194 194 new_rev = l2[1] if len(l2) == 2 else 'new'
195 195
196 196 filename = (old_filename
197 197 if old_filename != '/dev/null' else new_filename)
198 198
199 199 operation = 'D' if new_filename == '/dev/null' else None
200 200 if not operation:
201 201 operation = 'M' if old_filename != '/dev/null' else 'A'
202 202
203 203 return operation, filename, new_rev, old_rev
204 204 except (ValueError, IndexError):
205 205 pass
206 206
207 207 return None, None, None, None
208 208
209 209 def _parse_gitdiff(self, diffiterator):
210 210 def line_decoder(l):
211 211 if l.startswith('+') and not l.startswith('+++'):
212 212 self.adds += 1
213 213 elif l.startswith('-') and not l.startswith('---'):
214 214 self.removes += 1
215 215 return l.decode('utf8', 'replace')
216 216
217 217 output = list(diffiterator)
218 218 size = len(output)
219 219
220 220 if size == 2:
221 221 l = []
222 222 l.extend([output[0]])
223 223 l.extend(output[1].splitlines(1))
224 224 return map(line_decoder, l)
225 225 elif size == 1:
226 226 return map(line_decoder, output[0].splitlines(1))
227 227 elif size == 0:
228 228 return []
229 229
230 230 raise Exception('wrong size of diff %s' % size)
231 231
232 232 def _highlight_line_difflib(self, line, next_):
233 233 """
234 234 Highlight inline changes in both lines.
235 235 """
236 236
237 237 if line['action'] == 'del':
238 238 old, new = line, next_
239 239 else:
240 240 old, new = next_, line
241 241
242 242 oldwords = re.split(r'(\W)', old['line'])
243 243 newwords = re.split(r'(\W)', new['line'])
244 244
245 245 sequence = difflib.SequenceMatcher(None, oldwords, newwords)
246 246
247 247 oldfragments, newfragments = [], []
248 248 for tag, i1, i2, j1, j2 in sequence.get_opcodes():
249 249 oldfrag = ''.join(oldwords[i1:i2])
250 250 newfrag = ''.join(newwords[j1:j2])
251 251 if tag != 'equal':
252 252 if oldfrag:
253 253 oldfrag = '<del>%s</del>' % oldfrag
254 254 if newfrag:
255 255 newfrag = '<ins>%s</ins>' % newfrag
256 256 oldfragments.append(oldfrag)
257 257 newfragments.append(newfrag)
258 258
259 259 old['line'] = "".join(oldfragments)
260 260 new['line'] = "".join(newfragments)
261 261
262 262 def _highlight_line_udiff(self, line, next_):
263 263 """
264 264 Highlight inline changes in both lines.
265 265 """
266 266 start = 0
267 267 limit = min(len(line['line']), len(next_['line']))
268 268 while start < limit and line['line'][start] == next_['line'][start]:
269 269 start += 1
270 270 end = -1
271 271 limit -= start
272 272 while -end <= limit and line['line'][end] == next_['line'][end]:
273 273 end -= 1
274 274 end += 1
275 275 if start or end:
276 276 def do(l):
277 277 last = end + len(l['line'])
278 278 if l['action'] == 'add':
279 279 tag = 'ins'
280 280 else:
281 281 tag = 'del'
282 282 l['line'] = '%s<%s>%s</%s>%s' % (
283 283 l['line'][:start],
284 284 tag,
285 285 l['line'][start:last],
286 286 tag,
287 287 l['line'][last:]
288 288 )
289 289 do(line)
290 290 do(next_)
291 291
292 292 def _parse_udiff(self, inline_diff=True):
293 293 """
294 294 Parse the diff an return data for the template.
295 295 """
296 296 lineiter = self.lines
297 297 files = []
298 298 try:
299 299 line = lineiter.next()
300 300 while 1:
301 301 # continue until we found the old file
302 302 if not line.startswith('--- '):
303 303 line = lineiter.next()
304 304 continue
305 305
306 306 chunks = []
307 307 stats = [0, 0]
308 308 operation, filename, old_rev, new_rev = \
309 309 self._extract_rev(line, lineiter.next())
310 310 files.append({
311 311 'filename': filename,
312 312 'old_revision': old_rev,
313 313 'new_revision': new_rev,
314 314 'chunks': chunks,
315 315 'operation': operation,
316 316 'stats': stats,
317 317 })
318 318
319 319 line = lineiter.next()
320 320 while line:
321 321 match = self._chunk_re.match(line)
322 322 if not match:
323 323 break
324 324
325 325 lines = []
326 326 chunks.append(lines)
327 327
328 328 old_line, old_end, new_line, new_end = \
329 329 [int(x or 1) for x in match.groups()[:-1]]
330 330 old_line -= 1
331 331 new_line -= 1
332 332 gr = match.groups()
333 333 context = len(gr) == 5
334 334 old_end += old_line
335 335 new_end += new_line
336 336
337 337 if context:
338 338 # skip context only if it's first line
339 339 if int(gr[0]) > 1:
340 340 lines.append({
341 341 'old_lineno': '...',
342 342 'new_lineno': '...',
343 343 'action': 'context',
344 344 'line': line,
345 345 })
346 346
347 347 line = lineiter.next()
348 348 while old_line < old_end or new_line < new_end:
349 349 if line:
350 350 command, line = line[0], line[1:]
351 351 else:
352 352 command = ' '
353 353 affects_old = affects_new = False
354 354
355 355 # ignore those if we don't expect them
356 356 if command in '#@':
357 357 continue
358 358 elif command == '+':
359 359 affects_new = True
360 360 action = 'add'
361 361 stats[0] += 1
362 362 elif command == '-':
363 363 affects_old = True
364 364 action = 'del'
365 365 stats[1] += 1
366 366 else:
367 367 affects_old = affects_new = True
368 368 action = 'unmod'
369 369
370 370 if line.find('No newline at end of file') != -1:
371 371 lines.append({
372 372 'old_lineno': '...',
373 373 'new_lineno': '...',
374 374 'action': 'context',
375 375 'line': line
376 376 })
377 377
378 378 else:
379 379 old_line += affects_old
380 380 new_line += affects_new
381 381 lines.append({
382 382 'old_lineno': affects_old and old_line or '',
383 383 'new_lineno': affects_new and new_line or '',
384 384 'action': action,
385 385 'line': line
386 386 })
387 387
388 388 line = lineiter.next()
389 389
390 390 except StopIteration:
391 391 pass
392 392
393 393 sorter = lambda info: {'A': 0, 'M': 1, 'D': 2}.get(info['operation'])
394 394 if inline_diff is False:
395 395 return sorted(files, key=sorter)
396 396
397 397 # highlight inline changes
398 398 for diff_data in files:
399 399 for chunk in diff_data['chunks']:
400 400 lineiter = iter(chunk)
401 401 try:
402 402 while 1:
403 403 line = lineiter.next()
404 404 if line['action'] != 'unmod':
405 405 nextline = lineiter.next()
406 406 if nextline['action'] in ['unmod', 'context'] or \
407 407 nextline['action'] == line['action']:
408 408 continue
409 409 self.differ(line, nextline)
410 410 except StopIteration:
411 411 pass
412 412
413 413 return sorted(files, key=sorter)
414 414
415 415 def prepare(self, inline_diff=True):
416 416 """
417 417 Prepare the passed udiff for HTML rendering. It'l return a list
418 418 of dicts
419 419 """
420 420 return self._parse_udiff(inline_diff=inline_diff)
421 421
422 422 def _safe_id(self, idstring):
423 423 """Make a string safe for including in an id attribute.
424 424
425 425 The HTML spec says that id attributes 'must begin with
426 426 a letter ([A-Za-z]) and may be followed by any number
427 427 of letters, digits ([0-9]), hyphens ("-"), underscores
428 428 ("_"), colons (":"), and periods (".")'. These regexps
429 429 are slightly over-zealous, in that they remove colons
430 430 and periods unnecessarily.
431 431
432 432 Whitespace is transformed into underscores, and then
433 433 anything which is not a hyphen or a character that
434 434 matches \w (alphanumerics and underscore) is removed.
435 435
436 436 """
437 437 # Transform all whitespace to underscore
438 438 idstring = re.sub(r'\s', "_", '%s' % idstring)
439 439 # Remove everything that is not a hyphen or a member of \w
440 440 idstring = re.sub(r'(?!-)\W', "", idstring).lower()
441 441 return idstring
442 442
443 443 def raw_diff(self):
444 444 """
445 445 Returns raw string as udiff
446 446 """
447 447 udiff_copy = self.copy_iterator()
448 448 if self.__format == 'gitdiff':
449 449 udiff_copy = self._parse_gitdiff(udiff_copy)
450 450 return u''.join(udiff_copy)
451 451
452 452 def as_html(self, table_class='code-difftable', line_class='line',
453 453 new_lineno_class='lineno old', old_lineno_class='lineno new',
454 454 code_class='code', enable_comments=False, diff_lines=None):
455 455 """
456 456 Return given diff as html table with customized css classes
457 457 """
458 458 def _link_to_if(condition, label, url):
459 459 """
460 460 Generates a link if condition is meet or just the label if not.
461 461 """
462 462
463 463 if condition:
464 464 return '''<a href="%(url)s">%(label)s</a>''' % {
465 465 'url': url,
466 466 'label': label
467 467 }
468 468 else:
469 469 return label
470 470 if diff_lines is None:
471 471 diff_lines = self.prepare()
472 472 _html_empty = True
473 473 _html = []
474 474 _html.append('''<table class="%(table_class)s">\n''' % {
475 475 'table_class': table_class
476 476 })
477 477 for diff in diff_lines:
478 478 for line in diff['chunks']:
479 479 _html_empty = False
480 480 for change in line:
481 481 _html.append('''<tr class="%(lc)s %(action)s">\n''' % {
482 482 'lc': line_class,
483 483 'action': change['action']
484 484 })
485 485 anchor_old_id = ''
486 486 anchor_new_id = ''
487 487 anchor_old = "%(filename)s_o%(oldline_no)s" % {
488 488 'filename': self._safe_id(diff['filename']),
489 489 'oldline_no': change['old_lineno']
490 490 }
491 491 anchor_new = "%(filename)s_n%(oldline_no)s" % {
492 492 'filename': self._safe_id(diff['filename']),
493 493 'oldline_no': change['new_lineno']
494 494 }
495 495 cond_old = (change['old_lineno'] != '...' and
496 496 change['old_lineno'])
497 497 cond_new = (change['new_lineno'] != '...' and
498 498 change['new_lineno'])
499 499 if cond_old:
500 500 anchor_old_id = 'id="%s"' % anchor_old
501 501 if cond_new:
502 502 anchor_new_id = 'id="%s"' % anchor_new
503 503 ###########################################################
504 504 # OLD LINE NUMBER
505 505 ###########################################################
506 506 _html.append('''\t<td %(a_id)s class="%(olc)s">''' % {
507 507 'a_id': anchor_old_id,
508 508 'olc': old_lineno_class
509 509 })
510 510
511 511 _html.append('''%(link)s''' % {
512 512 'link': _link_to_if(True, change['old_lineno'],
513 513 '#%s' % anchor_old)
514 514 })
515 515 _html.append('''</td>\n''')
516 516 ###########################################################
517 517 # NEW LINE NUMBER
518 518 ###########################################################
519 519
520 520 _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
521 521 'a_id': anchor_new_id,
522 522 'nlc': new_lineno_class
523 523 })
524 524
525 525 _html.append('''%(link)s''' % {
526 526 'link': _link_to_if(True, change['new_lineno'],
527 527 '#%s' % anchor_new)
528 528 })
529 529 _html.append('''</td>\n''')
530 530 ###########################################################
531 531 # CODE
532 532 ###########################################################
533 533 comments = '' if enable_comments else 'no-comment'
534 534 _html.append('''\t<td class="%(cc)s %(inc)s">''' % {
535 535 'cc': code_class,
536 536 'inc': comments
537 537 })
538 538 _html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {
539 539 'code': change['line']
540 540 })
541 541 _html.append('''\t</td>''')
542 542 _html.append('''\n</tr>\n''')
543 543 _html.append('''</table>''')
544 544 if _html_empty:
545 545 return None
546 546 return ''.join(_html)
547 547
548 548 def stat(self):
549 549 """
550 550 Returns tuple of added, and removed lines for this instance
551 551 """
552 552 return self.adds, self.removes
553 553
554 554
555 555 class InMemoryBundleRepo(bundlerepository):
556 556 def __init__(self, ui, path, bundlestream):
557 557 self._tempparent = None
558 558 localrepo.localrepository.__init__(self, ui, path)
559 559 self.ui.setconfig('phases', 'publish', False)
560 560
561 561 self.bundle = bundlestream
562 562
563 563 # dict with the mapping 'filename' -> position in the bundle
564 564 self.bundlefilespos = {}
565 565
566 566
567 567 def differ(org_repo, org_ref, other_repo, other_ref, discovery_data=None):
568 568 """
569 569 General differ between branches, bookmarks or separate but releated
570 570 repositories
571 571
572 572 :param org_repo:
573 573 :type org_repo:
574 574 :param org_ref:
575 575 :type org_ref:
576 576 :param other_repo:
577 577 :type other_repo:
578 578 :param other_ref:
579 579 :type other_ref:
580 580 """
581 581
582 582 bundlerepo = None
583 583 ignore_whitespace = False
584 584 context = 3
585 585 org_repo = org_repo.scm_instance._repo
586 586 other_repo = other_repo.scm_instance._repo
587 587 opts = diffopts(git=True, ignorews=ignore_whitespace, context=context)
588 588 org_ref = org_ref[1]
589 589 other_ref = other_ref[1]
590 590
591 591 if org_repo != other_repo:
592 592
593 593 common, incoming, rheads = discovery_data
594 594
595 595 # create a bundle (uncompressed if other repo is not local)
596 596 if other_repo.capable('getbundle') and incoming:
597 597 # disable repo hooks here since it's just bundle !
598 598 # patch and reset hooks section of UI config to not run any
599 599 # hooks on fetching archives with subrepos
600 600 for k, _ in other_repo.ui.configitems('hooks'):
601 601 other_repo.ui.setconfig('hooks', k, None)
602 602
603 603 unbundle = other_repo.getbundle('incoming', common=common,
604 604 heads=rheads)
605 605
606 606 buf = io.BytesIO()
607 607 while True:
608 608 chunk = unbundle._stream.read(1024 * 4)
609 609 if not chunk:
610 610 break
611 611 buf.write(chunk)
612 612
613 613 buf.seek(0)
614 614 # replace chunked _stream with data that can do tell() and seek()
615 615 unbundle._stream = buf
616 616
617 617 ui = make_ui('db')
618 618 bundlerepo = InMemoryBundleRepo(ui, path=org_repo.root,
619 619 bundlestream=unbundle)
620 return ''.join(patch.diff(bundlerepo or org_repo, node2=other_ref,
620
621 return ''.join(patch.diff(bundlerepo or org_repo,
622 node1=org_repo[org_ref].node(),
623 node2=other_repo[other_ref].node(),
621 624 opts=opts))
622 625 else:
623 626 return ''.join(patch.diff(org_repo, node1=org_ref, node2=other_ref,
624 627 opts=opts))
General Comments 0
You need to be logged in to leave comments. Login now