diffs.py
515 lines
| 18.2 KiB
| text/x-python
|
PythonLexer
r1753 | # -*- coding: utf-8 -*- | |||
""" | ||||
rhodecode.lib.diffs | ||||
~~~~~~~~~~~~~~~~~~~ | ||||
Set of diffing helpers, previously part of vcs | ||||
r1781 | ||||
r1753 | :created_on: Dec 4, 2011 | |||
:author: marcink | ||||
:copyright: (C) 2009-2011 Marcin Kuzminski <marcin@python-works.com> | ||||
r1781 | :original copyright: 2007-2008 by Armin Ronacher | |||
r1753 | :license: GPLv3, see COPYING for more details. | |||
""" | ||||
# This program is free software: you can redistribute it and/or modify | ||||
# it under the terms of the GNU General Public License as published by | ||||
# the Free Software Foundation, either version 3 of the License, or | ||||
# (at your option) any later version. | ||||
# | ||||
# This program is distributed in the hope that it will be useful, | ||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||||
# GNU General Public License for more details. | ||||
# | ||||
# You should have received a copy of the GNU General Public License | ||||
# along with this program. If not, see <http://www.gnu.org/licenses/>. | ||||
import re | ||||
import difflib | ||||
r1789 | import markupsafe | |||
from itertools import tee, imap | ||||
r1753 | ||||
r1789 | from pylons.i18n.translation import _ | |||
r1753 | ||||
from vcs.exceptions import VCSError | ||||
from vcs.nodes import FileNode | ||||
r1789 | ||||
from rhodecode.lib.utils import EmptyChangeset | ||||
def wrap_to_table(str_): | ||||
return '''<table class="code-difftable"> | ||||
<tr class="line no-comment"> | ||||
<td class="lineno new"></td> | ||||
<td class="code no-comment"><pre>%s</pre></td> | ||||
</tr> | ||||
</table>''' % str_ | ||||
def wrapped_diff(filenode_old, filenode_new, cut_off_limit=None, | ||||
ignore_whitespace=True, line_context=3, | ||||
enable_comments=False): | ||||
""" | ||||
returns a wrapped diff into a table, checks for cut_off_limit and presents | ||||
proper message | ||||
""" | ||||
if filenode_old is None: | ||||
filenode_old = FileNode(filenode_new.path, '', EmptyChangeset()) | ||||
if filenode_old.is_binary or filenode_new.is_binary: | ||||
diff = wrap_to_table(_('binary file')) | ||||
stats = (0, 0) | ||||
size = 0 | ||||
elif cut_off_limit != -1 and (cut_off_limit is None or | ||||
(filenode_old.size < cut_off_limit and filenode_new.size < cut_off_limit)): | ||||
f_gitdiff = get_gitdiff(filenode_old, filenode_new, | ||||
ignore_whitespace=ignore_whitespace, | ||||
context=line_context) | ||||
diff_processor = DiffProcessor(f_gitdiff, format='gitdiff') | ||||
diff = diff_processor.as_html(enable_comments=enable_comments) | ||||
stats = diff_processor.stat() | ||||
size = len(diff or '') | ||||
else: | ||||
diff = wrap_to_table(_('Changeset was to big and was cut off, use ' | ||||
'diff menu to display this diff')) | ||||
stats = (0, 0) | ||||
size = 0 | ||||
if not diff: | ||||
diff = wrap_to_table(_('No changes detected')) | ||||
cs1 = filenode_old.last_changeset.raw_id | ||||
cs2 = filenode_new.last_changeset.raw_id | ||||
return size, cs1, cs2, diff, stats | ||||
r1781 | ||||
r1753 | ||||
r1768 | def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3): | |||
r1753 | """ | |||
Returns git style diff between given ``filenode_old`` and ``filenode_new``. | ||||
r1781 | ||||
r1753 | :param ignore_whitespace: ignore whitespaces in diff | |||
""" | ||||
for filenode in (filenode_old, filenode_new): | ||||
if not isinstance(filenode, FileNode): | ||||
raise VCSError("Given object should be FileNode object, not %s" | ||||
% filenode.__class__) | ||||
old_raw_id = getattr(filenode_old.changeset, 'raw_id', '0' * 40) | ||||
new_raw_id = getattr(filenode_new.changeset, 'raw_id', '0' * 40) | ||||
repo = filenode_new.changeset.repository | ||||
vcs_gitdiff = repo._get_diff(old_raw_id, new_raw_id, filenode_new.path, | ||||
r1768 | ignore_whitespace, context) | |||
r1753 | ||||
return vcs_gitdiff | ||||
class DiffProcessor(object): | ||||
""" | ||||
Give it a unified diff and it returns a list of the files that were | ||||
mentioned in the diff together with a dict of meta information that | ||||
can be used to render it in a HTML template. | ||||
""" | ||||
_chunk_re = re.compile(r'@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)') | ||||
def __init__(self, diff, differ='diff', format='udiff'): | ||||
""" | ||||
:param diff: a text in diff format or generator | ||||
:param format: format of diff passed, `udiff` or `gitdiff` | ||||
""" | ||||
if isinstance(diff, basestring): | ||||
diff = [diff] | ||||
self.__udiff = diff | ||||
self.__format = format | ||||
self.adds = 0 | ||||
self.removes = 0 | ||||
if isinstance(self.__udiff, basestring): | ||||
self.lines = iter(self.__udiff.splitlines(1)) | ||||
elif self.__format == 'gitdiff': | ||||
udiff_copy = self.copy_iterator() | ||||
self.lines = imap(self.escaper, self._parse_gitdiff(udiff_copy)) | ||||
else: | ||||
udiff_copy = self.copy_iterator() | ||||
self.lines = imap(self.escaper, udiff_copy) | ||||
# Select a differ. | ||||
if differ == 'difflib': | ||||
self.differ = self._highlight_line_difflib | ||||
else: | ||||
self.differ = self._highlight_line_udiff | ||||
def escaper(self, string): | ||||
r1781 | return markupsafe.escape(string) | |||
r1753 | ||||
def copy_iterator(self): | ||||
""" | ||||
make a fresh copy of generator, we should not iterate thru | ||||
an original as it's needed for repeating operations on | ||||
this instance of DiffProcessor | ||||
""" | ||||
self.__udiff, iterator_copy = tee(self.__udiff) | ||||
return iterator_copy | ||||
def _extract_rev(self, line1, line2): | ||||
""" | ||||
Extract the filename and revision hint from a line. | ||||
""" | ||||
try: | ||||
if line1.startswith('--- ') and line2.startswith('+++ '): | ||||
l1 = line1[4:].split(None, 1) | ||||
r1787 | old_filename = (l1[0].replace('a/', '', 1) | |||
if len(l1) >= 1 else None) | ||||
r1753 | old_rev = l1[1] if len(l1) == 2 else 'old' | |||
l2 = line2[4:].split(None, 1) | ||||
r1787 | new_filename = (l2[0].replace('b/', '', 1) | |||
if len(l1) >= 1 else None) | ||||
r1753 | new_rev = l2[1] if len(l2) == 2 else 'new' | |||
r1787 | filename = (old_filename | |||
if old_filename != '/dev/null' else new_filename) | ||||
r1753 | ||||
return filename, new_rev, old_rev | ||||
except (ValueError, IndexError): | ||||
pass | ||||
return None, None, None | ||||
def _parse_gitdiff(self, diffiterator): | ||||
def line_decoder(l): | ||||
if l.startswith('+') and not l.startswith('+++'): | ||||
self.adds += 1 | ||||
elif l.startswith('-') and not l.startswith('---'): | ||||
self.removes += 1 | ||||
return l.decode('utf8', 'replace') | ||||
output = list(diffiterator) | ||||
size = len(output) | ||||
if size == 2: | ||||
l = [] | ||||
l.extend([output[0]]) | ||||
l.extend(output[1].splitlines(1)) | ||||
return map(line_decoder, l) | ||||
elif size == 1: | ||||
return map(line_decoder, output[0].splitlines(1)) | ||||
elif size == 0: | ||||
return [] | ||||
raise Exception('wrong size of diff %s' % size) | ||||
r1781 | def _highlight_line_difflib(self, line, next_): | |||
r1753 | """ | |||
Highlight inline changes in both lines. | ||||
""" | ||||
if line['action'] == 'del': | ||||
r1781 | old, new = line, next_ | |||
r1753 | else: | |||
r1781 | old, new = next_, line | |||
r1753 | ||||
oldwords = re.split(r'(\W)', old['line']) | ||||
newwords = re.split(r'(\W)', new['line']) | ||||
sequence = difflib.SequenceMatcher(None, oldwords, newwords) | ||||
oldfragments, newfragments = [], [] | ||||
for tag, i1, i2, j1, j2 in sequence.get_opcodes(): | ||||
oldfrag = ''.join(oldwords[i1:i2]) | ||||
newfrag = ''.join(newwords[j1:j2]) | ||||
if tag != 'equal': | ||||
if oldfrag: | ||||
oldfrag = '<del>%s</del>' % oldfrag | ||||
if newfrag: | ||||
newfrag = '<ins>%s</ins>' % newfrag | ||||
oldfragments.append(oldfrag) | ||||
newfragments.append(newfrag) | ||||
old['line'] = "".join(oldfragments) | ||||
new['line'] = "".join(newfragments) | ||||
r1781 | def _highlight_line_udiff(self, line, next_): | |||
r1753 | """ | |||
Highlight inline changes in both lines. | ||||
""" | ||||
start = 0 | ||||
r1781 | limit = min(len(line['line']), len(next_['line'])) | |||
while start < limit and line['line'][start] == next_['line'][start]: | ||||
r1753 | start += 1 | |||
end = -1 | ||||
limit -= start | ||||
r1781 | while -end <= limit and line['line'][end] == next_['line'][end]: | |||
r1753 | end -= 1 | |||
end += 1 | ||||
if start or end: | ||||
def do(l): | ||||
last = end + len(l['line']) | ||||
if l['action'] == 'add': | ||||
tag = 'ins' | ||||
else: | ||||
tag = 'del' | ||||
l['line'] = '%s<%s>%s</%s>%s' % ( | ||||
l['line'][:start], | ||||
tag, | ||||
l['line'][start:last], | ||||
tag, | ||||
l['line'][last:] | ||||
) | ||||
do(line) | ||||
r1781 | do(next_) | |||
r1753 | ||||
def _parse_udiff(self): | ||||
""" | ||||
Parse the diff an return data for the template. | ||||
""" | ||||
lineiter = self.lines | ||||
files = [] | ||||
try: | ||||
line = lineiter.next() | ||||
# skip first context | ||||
skipfirst = True | ||||
while 1: | ||||
# continue until we found the old file | ||||
if not line.startswith('--- '): | ||||
line = lineiter.next() | ||||
continue | ||||
chunks = [] | ||||
filename, old_rev, new_rev = \ | ||||
self._extract_rev(line, lineiter.next()) | ||||
files.append({ | ||||
'filename': filename, | ||||
'old_revision': old_rev, | ||||
'new_revision': new_rev, | ||||
'chunks': chunks | ||||
}) | ||||
line = lineiter.next() | ||||
while line: | ||||
match = self._chunk_re.match(line) | ||||
if not match: | ||||
break | ||||
lines = [] | ||||
chunks.append(lines) | ||||
old_line, old_end, new_line, new_end = \ | ||||
[int(x or 1) for x in match.groups()[:-1]] | ||||
old_line -= 1 | ||||
new_line -= 1 | ||||
context = len(match.groups()) == 5 | ||||
old_end += old_line | ||||
new_end += new_line | ||||
if context: | ||||
if not skipfirst: | ||||
lines.append({ | ||||
'old_lineno': '...', | ||||
'new_lineno': '...', | ||||
r1789 | 'action': 'context', | |||
'line': line, | ||||
r1753 | }) | |||
else: | ||||
skipfirst = False | ||||
line = lineiter.next() | ||||
while old_line < old_end or new_line < new_end: | ||||
if line: | ||||
command, line = line[0], line[1:] | ||||
else: | ||||
command = ' ' | ||||
affects_old = affects_new = False | ||||
# ignore those if we don't expect them | ||||
if command in '#@': | ||||
continue | ||||
elif command == '+': | ||||
affects_new = True | ||||
action = 'add' | ||||
elif command == '-': | ||||
affects_old = True | ||||
action = 'del' | ||||
else: | ||||
affects_old = affects_new = True | ||||
action = 'unmod' | ||||
old_line += affects_old | ||||
new_line += affects_new | ||||
lines.append({ | ||||
'old_lineno': affects_old and old_line or '', | ||||
'new_lineno': affects_new and new_line or '', | ||||
'action': action, | ||||
'line': line | ||||
}) | ||||
line = lineiter.next() | ||||
except StopIteration: | ||||
pass | ||||
# highlight inline changes | ||||
r1781 | for _ in files: | |||
r1753 | for chunk in chunks: | |||
lineiter = iter(chunk) | ||||
#first = True | ||||
try: | ||||
while 1: | ||||
line = lineiter.next() | ||||
if line['action'] != 'unmod': | ||||
nextline = lineiter.next() | ||||
if nextline['action'] == 'unmod' or \ | ||||
nextline['action'] == line['action']: | ||||
continue | ||||
self.differ(line, nextline) | ||||
except StopIteration: | ||||
pass | ||||
return files | ||||
def prepare(self): | ||||
""" | ||||
Prepare the passed udiff for HTML rendering. It'l return a list | ||||
of dicts | ||||
""" | ||||
return self._parse_udiff() | ||||
def _safe_id(self, idstring): | ||||
"""Make a string safe for including in an id attribute. | ||||
The HTML spec says that id attributes 'must begin with | ||||
a letter ([A-Za-z]) and may be followed by any number | ||||
of letters, digits ([0-9]), hyphens ("-"), underscores | ||||
("_"), colons (":"), and periods (".")'. These regexps | ||||
are slightly over-zealous, in that they remove colons | ||||
and periods unnecessarily. | ||||
Whitespace is transformed into underscores, and then | ||||
anything which is not a hyphen or a character that | ||||
matches \w (alphanumerics and underscore) is removed. | ||||
""" | ||||
# Transform all whitespace to underscore | ||||
idstring = re.sub(r'\s', "_", '%s' % idstring) | ||||
# Remove everything that is not a hyphen or a member of \w | ||||
idstring = re.sub(r'(?!-)\W', "", idstring).lower() | ||||
return idstring | ||||
def raw_diff(self): | ||||
""" | ||||
Returns raw string as udiff | ||||
""" | ||||
udiff_copy = self.copy_iterator() | ||||
if self.__format == 'gitdiff': | ||||
udiff_copy = self._parse_gitdiff(udiff_copy) | ||||
return u''.join(udiff_copy) | ||||
def as_html(self, table_class='code-difftable', line_class='line', | ||||
new_lineno_class='lineno old', old_lineno_class='lineno new', | ||||
r1787 | code_class='code', enable_comments=False): | |||
r1753 | """ | |||
Return udiff as html table with customized css classes | ||||
""" | ||||
def _link_to_if(condition, label, url): | ||||
""" | ||||
Generates a link if condition is meet or just the label if not. | ||||
""" | ||||
if condition: | ||||
r1789 | return '''<a href="%(url)s">%(label)s</a>''' % { | |||
'url': url, | ||||
'label': label | ||||
} | ||||
r1753 | else: | |||
return label | ||||
diff_lines = self.prepare() | ||||
_html_empty = True | ||||
_html = [] | ||||
r1789 | _html.append('''<table class="%(table_class)s">\n''' % { | |||
'table_class': table_class | ||||
}) | ||||
r1753 | for diff in diff_lines: | |||
for line in diff['chunks']: | ||||
_html_empty = False | ||||
for change in line: | ||||
r1789 | _html.append('''<tr class="%(lc)s %(action)s">\n''' % { | |||
'lc': line_class, | ||||
'action': change['action'] | ||||
}) | ||||
r1753 | anchor_old_id = '' | |||
anchor_new_id = '' | ||||
r1789 | anchor_old = "%(filename)s_o%(oldline_no)s" % { | |||
'filename': self._safe_id(diff['filename']), | ||||
'oldline_no': change['old_lineno'] | ||||
} | ||||
anchor_new = "%(filename)s_n%(oldline_no)s" % { | ||||
'filename': self._safe_id(diff['filename']), | ||||
'oldline_no': change['new_lineno'] | ||||
} | ||||
cond_old = (change['old_lineno'] != '...' and | ||||
change['old_lineno']) | ||||
cond_new = (change['new_lineno'] != '...' and | ||||
change['new_lineno']) | ||||
r1753 | if cond_old: | |||
anchor_old_id = 'id="%s"' % anchor_old | ||||
if cond_new: | ||||
anchor_new_id = 'id="%s"' % anchor_new | ||||
########################################################### | ||||
# OLD LINE NUMBER | ||||
########################################################### | ||||
r1789 | _html.append('''\t<td %(a_id)s class="%(olc)s">''' % { | |||
'a_id': anchor_old_id, | ||||
'olc': old_lineno_class | ||||
}) | ||||
r1753 | ||||
r1789 | _html.append('''%(link)s''' % { | |||
'link': _link_to_if(True, change['old_lineno'], | ||||
'#%s' % anchor_old) | ||||
}) | ||||
r1753 | _html.append('''</td>\n''') | |||
########################################################### | ||||
# NEW LINE NUMBER | ||||
########################################################### | ||||
r1789 | _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % { | |||
'a_id': anchor_new_id, | ||||
'nlc': new_lineno_class | ||||
}) | ||||
r1753 | ||||
r1789 | _html.append('''%(link)s''' % { | |||
'link': _link_to_if(True, change['new_lineno'], | ||||
'#%s' % anchor_new) | ||||
}) | ||||
r1753 | _html.append('''</td>\n''') | |||
########################################################### | ||||
# CODE | ||||
########################################################### | ||||
r1787 | comments = '' if enable_comments else 'no-comment' | |||
r1789 | _html.append('''\t<td class="%(cc)s %(inc)s">''' % { | |||
'cc': code_class, | ||||
'inc': comments | ||||
}) | ||||
_html.append('''\n\t\t<pre>%(code)s</pre>\n''' % { | ||||
'code': change['line'] | ||||
}) | ||||
r1753 | _html.append('''\t</td>''') | |||
_html.append('''\n</tr>\n''') | ||||
_html.append('''</table>''') | ||||
if _html_empty: | ||||
return None | ||||
return ''.join(_html) | ||||
def stat(self): | ||||
""" | ||||
Returns tuple of added, and removed lines for this instance | ||||
""" | ||||
return self.adds, self.removes | ||||