diffs.py
627 lines
| 22.6 KiB
| text/x-python
|
PythonLexer
r1753 | # -*- coding: utf-8 -*- | |||
""" | ||||
rhodecode.lib.diffs | ||||
~~~~~~~~~~~~~~~~~~~ | ||||
Set of diffing helpers, previously part of vcs | ||||
r1781 | ||||
r1753 | :created_on: Dec 4, 2011 | |||
:author: marcink | ||||
r1824 | :copyright: (C) 2010-2012 Marcin Kuzminski <marcin@python-works.com> | |||
r1781 | :original copyright: 2007-2008 by Armin Ronacher | |||
r1753 | :license: GPLv3, see COPYING for more details. | |||
""" | ||||
# This program is free software: you can redistribute it and/or modify | ||||
# it under the terms of the GNU General Public License as published by | ||||
# the Free Software Foundation, either version 3 of the License, or | ||||
# (at your option) any later version. | ||||
# | ||||
# This program is distributed in the hope that it will be useful, | ||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||||
# GNU General Public License for more details. | ||||
# | ||||
# You should have received a copy of the GNU General Public License | ||||
# along with this program. If not, see <http://www.gnu.org/licenses/>. | ||||
import re | ||||
import difflib | ||||
r1789 | import markupsafe | |||
r2355 | ||||
r1789 | from itertools import tee, imap | |||
r1753 | ||||
r2355 | from mercurial import patch | |||
from mercurial.mdiff import diffopts | ||||
from mercurial.bundlerepo import bundlerepository | ||||
from mercurial import localrepo | ||||
r1789 | from pylons.i18n.translation import _ | |||
r1753 | ||||
r2552 | from rhodecode.lib.compat import BytesIO | |||
r2007 | from rhodecode.lib.vcs.exceptions import VCSError | |||
r2233 | from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode | |||
from rhodecode.lib.helpers import escape | ||||
r2355 | from rhodecode.lib.utils import EmptyChangeset, make_ui | |||
r1789 | ||||
def wrap_to_table(str_): | ||||
return '''<table class="code-difftable"> | ||||
<tr class="line no-comment"> | ||||
<td class="lineno new"></td> | ||||
<td class="code no-comment"><pre>%s</pre></td> | ||||
</tr> | ||||
</table>''' % str_ | ||||
def wrapped_diff(filenode_old, filenode_new, cut_off_limit=None, | ||||
ignore_whitespace=True, line_context=3, | ||||
enable_comments=False): | ||||
""" | ||||
returns a wrapped diff into a table, checks for cut_off_limit and presents | ||||
proper message | ||||
""" | ||||
if filenode_old is None: | ||||
filenode_old = FileNode(filenode_new.path, '', EmptyChangeset()) | ||||
if filenode_old.is_binary or filenode_new.is_binary: | ||||
diff = wrap_to_table(_('binary file')) | ||||
stats = (0, 0) | ||||
size = 0 | ||||
elif cut_off_limit != -1 and (cut_off_limit is None or | ||||
(filenode_old.size < cut_off_limit and filenode_new.size < cut_off_limit)): | ||||
f_gitdiff = get_gitdiff(filenode_old, filenode_new, | ||||
ignore_whitespace=ignore_whitespace, | ||||
context=line_context) | ||||
diff_processor = DiffProcessor(f_gitdiff, format='gitdiff') | ||||
diff = diff_processor.as_html(enable_comments=enable_comments) | ||||
stats = diff_processor.stat() | ||||
size = len(diff or '') | ||||
else: | ||||
r2340 | diff = wrap_to_table(_('Changeset was too big and was cut off, use ' | |||
r1789 | 'diff menu to display this diff')) | |||
stats = (0, 0) | ||||
size = 0 | ||||
if not diff: | ||||
r2233 | submodules = filter(lambda o: isinstance(o, SubModuleNode), | |||
[filenode_new, filenode_old]) | ||||
if submodules: | ||||
diff = wrap_to_table(escape('Submodule %r' % submodules[0])) | ||||
else: | ||||
diff = wrap_to_table(_('No changes detected')) | ||||
r1789 | ||||
r2084 | cs1 = filenode_old.changeset.raw_id | |||
cs2 = filenode_new.changeset.raw_id | ||||
r1789 | ||||
return size, cs1, cs2, diff, stats | ||||
r1781 | ||||
r1753 | ||||
r1768 | def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3): | |||
r1753 | """ | |||
Returns git style diff between given ``filenode_old`` and ``filenode_new``. | ||||
r1781 | ||||
r1753 | :param ignore_whitespace: ignore whitespaces in diff | |||
""" | ||||
r1894 | # make sure we pass in default context | |||
context = context or 3 | ||||
r2233 | submodules = filter(lambda o: isinstance(o, SubModuleNode), | |||
[filenode_new, filenode_old]) | ||||
if submodules: | ||||
return '' | ||||
r1753 | ||||
for filenode in (filenode_old, filenode_new): | ||||
if not isinstance(filenode, FileNode): | ||||
raise VCSError("Given object should be FileNode object, not %s" | ||||
% filenode.__class__) | ||||
r1894 | repo = filenode_new.changeset.repository | |||
old_raw_id = getattr(filenode_old.changeset, 'raw_id', repo.EMPTY_CHANGESET) | ||||
new_raw_id = getattr(filenode_new.changeset, 'raw_id', repo.EMPTY_CHANGESET) | ||||
r1753 | ||||
r1883 | vcs_gitdiff = repo.get_diff(old_raw_id, new_raw_id, filenode_new.path, | |||
r1768 | ignore_whitespace, context) | |||
r1753 | return vcs_gitdiff | |||
class DiffProcessor(object): | ||||
""" | ||||
Give it a unified diff and it returns a list of the files that were | ||||
mentioned in the diff together with a dict of meta information that | ||||
can be used to render it in a HTML template. | ||||
""" | ||||
_chunk_re = re.compile(r'@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)') | ||||
r2385 | def __init__(self, diff, differ='diff', format='gitdiff'): | |||
r1753 | """ | |||
:param diff: a text in diff format or generator | ||||
:param format: format of diff passed, `udiff` or `gitdiff` | ||||
""" | ||||
if isinstance(diff, basestring): | ||||
diff = [diff] | ||||
self.__udiff = diff | ||||
self.__format = format | ||||
self.adds = 0 | ||||
self.removes = 0 | ||||
if isinstance(self.__udiff, basestring): | ||||
self.lines = iter(self.__udiff.splitlines(1)) | ||||
elif self.__format == 'gitdiff': | ||||
udiff_copy = self.copy_iterator() | ||||
self.lines = imap(self.escaper, self._parse_gitdiff(udiff_copy)) | ||||
else: | ||||
udiff_copy = self.copy_iterator() | ||||
self.lines = imap(self.escaper, udiff_copy) | ||||
# Select a differ. | ||||
if differ == 'difflib': | ||||
self.differ = self._highlight_line_difflib | ||||
else: | ||||
self.differ = self._highlight_line_udiff | ||||
def escaper(self, string): | ||||
r1781 | return markupsafe.escape(string) | |||
r1753 | ||||
def copy_iterator(self): | ||||
""" | ||||
make a fresh copy of generator, we should not iterate thru | ||||
an original as it's needed for repeating operations on | ||||
this instance of DiffProcessor | ||||
""" | ||||
self.__udiff, iterator_copy = tee(self.__udiff) | ||||
return iterator_copy | ||||
def _extract_rev(self, line1, line2): | ||||
""" | ||||
r2385 | Extract the operation (A/M/D), filename and revision hint from a line. | |||
r1753 | """ | |||
try: | ||||
if line1.startswith('--- ') and line2.startswith('+++ '): | ||||
l1 = line1[4:].split(None, 1) | ||||
r1787 | old_filename = (l1[0].replace('a/', '', 1) | |||
if len(l1) >= 1 else None) | ||||
r1753 | old_rev = l1[1] if len(l1) == 2 else 'old' | |||
l2 = line2[4:].split(None, 1) | ||||
r1787 | new_filename = (l2[0].replace('b/', '', 1) | |||
if len(l1) >= 1 else None) | ||||
r1753 | new_rev = l2[1] if len(l2) == 2 else 'new' | |||
r1787 | filename = (old_filename | |||
if old_filename != '/dev/null' else new_filename) | ||||
r1753 | ||||
r2385 | operation = 'D' if new_filename == '/dev/null' else None | |||
if not operation: | ||||
operation = 'M' if old_filename != '/dev/null' else 'A' | ||||
return operation, filename, new_rev, old_rev | ||||
r1753 | except (ValueError, IndexError): | |||
pass | ||||
r2385 | return None, None, None, None | |||
r1753 | ||||
def _parse_gitdiff(self, diffiterator): | ||||
def line_decoder(l): | ||||
if l.startswith('+') and not l.startswith('+++'): | ||||
self.adds += 1 | ||||
elif l.startswith('-') and not l.startswith('---'): | ||||
self.removes += 1 | ||||
return l.decode('utf8', 'replace') | ||||
output = list(diffiterator) | ||||
size = len(output) | ||||
if size == 2: | ||||
l = [] | ||||
l.extend([output[0]]) | ||||
l.extend(output[1].splitlines(1)) | ||||
return map(line_decoder, l) | ||||
elif size == 1: | ||||
return map(line_decoder, output[0].splitlines(1)) | ||||
elif size == 0: | ||||
return [] | ||||
raise Exception('wrong size of diff %s' % size) | ||||
r1781 | def _highlight_line_difflib(self, line, next_): | |||
r1753 | """ | |||
Highlight inline changes in both lines. | ||||
""" | ||||
if line['action'] == 'del': | ||||
r1781 | old, new = line, next_ | |||
r1753 | else: | |||
r1781 | old, new = next_, line | |||
r1753 | ||||
oldwords = re.split(r'(\W)', old['line']) | ||||
newwords = re.split(r'(\W)', new['line']) | ||||
sequence = difflib.SequenceMatcher(None, oldwords, newwords) | ||||
oldfragments, newfragments = [], [] | ||||
for tag, i1, i2, j1, j2 in sequence.get_opcodes(): | ||||
oldfrag = ''.join(oldwords[i1:i2]) | ||||
newfrag = ''.join(newwords[j1:j2]) | ||||
if tag != 'equal': | ||||
if oldfrag: | ||||
oldfrag = '<del>%s</del>' % oldfrag | ||||
if newfrag: | ||||
newfrag = '<ins>%s</ins>' % newfrag | ||||
oldfragments.append(oldfrag) | ||||
newfragments.append(newfrag) | ||||
old['line'] = "".join(oldfragments) | ||||
new['line'] = "".join(newfragments) | ||||
r1781 | def _highlight_line_udiff(self, line, next_): | |||
r1753 | """ | |||
Highlight inline changes in both lines. | ||||
""" | ||||
start = 0 | ||||
r1781 | limit = min(len(line['line']), len(next_['line'])) | |||
while start < limit and line['line'][start] == next_['line'][start]: | ||||
r1753 | start += 1 | |||
end = -1 | ||||
limit -= start | ||||
r1781 | while -end <= limit and line['line'][end] == next_['line'][end]: | |||
r1753 | end -= 1 | |||
end += 1 | ||||
if start or end: | ||||
def do(l): | ||||
last = end + len(l['line']) | ||||
if l['action'] == 'add': | ||||
tag = 'ins' | ||||
else: | ||||
tag = 'del' | ||||
l['line'] = '%s<%s>%s</%s>%s' % ( | ||||
l['line'][:start], | ||||
tag, | ||||
l['line'][start:last], | ||||
tag, | ||||
l['line'][last:] | ||||
) | ||||
do(line) | ||||
r1781 | do(next_) | |||
r1753 | ||||
r2385 | def _parse_udiff(self, inline_diff=True): | |||
r1753 | """ | |||
Parse the diff an return data for the template. | ||||
""" | ||||
lineiter = self.lines | ||||
files = [] | ||||
try: | ||||
line = lineiter.next() | ||||
while 1: | ||||
# continue until we found the old file | ||||
if not line.startswith('--- '): | ||||
line = lineiter.next() | ||||
continue | ||||
chunks = [] | ||||
r2385 | stats = [0, 0] | |||
operation, filename, old_rev, new_rev = \ | ||||
r1753 | self._extract_rev(line, lineiter.next()) | |||
files.append({ | ||||
'filename': filename, | ||||
'old_revision': old_rev, | ||||
'new_revision': new_rev, | ||||
r2385 | 'chunks': chunks, | |||
'operation': operation, | ||||
'stats': stats, | ||||
r1753 | }) | |||
line = lineiter.next() | ||||
while line: | ||||
match = self._chunk_re.match(line) | ||||
if not match: | ||||
break | ||||
lines = [] | ||||
chunks.append(lines) | ||||
old_line, old_end, new_line, new_end = \ | ||||
[int(x or 1) for x in match.groups()[:-1]] | ||||
old_line -= 1 | ||||
new_line -= 1 | ||||
r2359 | gr = match.groups() | |||
context = len(gr) == 5 | ||||
r1753 | old_end += old_line | |||
new_end += new_line | ||||
if context: | ||||
r2359 | # skip context only if it's first line | |||
if int(gr[0]) > 1: | ||||
r1753 | lines.append({ | |||
'old_lineno': '...', | ||||
'new_lineno': '...', | ||||
r1789 | 'action': 'context', | |||
'line': line, | ||||
r1753 | }) | |||
line = lineiter.next() | ||||
while old_line < old_end or new_line < new_end: | ||||
if line: | ||||
command, line = line[0], line[1:] | ||||
else: | ||||
command = ' ' | ||||
affects_old = affects_new = False | ||||
# ignore those if we don't expect them | ||||
if command in '#@': | ||||
continue | ||||
elif command == '+': | ||||
affects_new = True | ||||
action = 'add' | ||||
r2385 | stats[0] += 1 | |||
r1753 | elif command == '-': | |||
affects_old = True | ||||
action = 'del' | ||||
r2385 | stats[1] += 1 | |||
r1753 | else: | |||
affects_old = affects_new = True | ||||
action = 'unmod' | ||||
r2360 | if line.find('No newline at end of file') != -1: | |||
lines.append({ | ||||
'old_lineno': '...', | ||||
'new_lineno': '...', | ||||
'action': 'context', | ||||
'line': line | ||||
}) | ||||
else: | ||||
old_line += affects_old | ||||
new_line += affects_new | ||||
lines.append({ | ||||
'old_lineno': affects_old and old_line or '', | ||||
'new_lineno': affects_new and new_line or '', | ||||
'action': action, | ||||
'line': line | ||||
}) | ||||
r1753 | line = lineiter.next() | |||
r2391 | ||||
r1753 | except StopIteration: | |||
pass | ||||
r2385 | sorter = lambda info: {'A': 0, 'M': 1, 'D': 2}.get(info['operation']) | |||
if inline_diff is False: | ||||
return sorted(files, key=sorter) | ||||
r1753 | # highlight inline changes | |||
r2385 | for diff_data in files: | |||
for chunk in diff_data['chunks']: | ||||
r1753 | lineiter = iter(chunk) | |||
try: | ||||
while 1: | ||||
line = lineiter.next() | ||||
if line['action'] != 'unmod': | ||||
nextline = lineiter.next() | ||||
r2360 | if nextline['action'] in ['unmod', 'context'] or \ | |||
r1753 | nextline['action'] == line['action']: | |||
continue | ||||
self.differ(line, nextline) | ||||
except StopIteration: | ||||
pass | ||||
r2385 | return sorted(files, key=sorter) | |||
r1753 | ||||
r2385 | def prepare(self, inline_diff=True): | |||
r1753 | """ | |||
Prepare the passed udiff for HTML rendering. It'l return a list | ||||
of dicts | ||||
""" | ||||
r2385 | return self._parse_udiff(inline_diff=inline_diff) | |||
r1753 | ||||
def _safe_id(self, idstring): | ||||
"""Make a string safe for including in an id attribute. | ||||
The HTML spec says that id attributes 'must begin with | ||||
a letter ([A-Za-z]) and may be followed by any number | ||||
of letters, digits ([0-9]), hyphens ("-"), underscores | ||||
("_"), colons (":"), and periods (".")'. These regexps | ||||
are slightly over-zealous, in that they remove colons | ||||
and periods unnecessarily. | ||||
Whitespace is transformed into underscores, and then | ||||
anything which is not a hyphen or a character that | ||||
matches \w (alphanumerics and underscore) is removed. | ||||
""" | ||||
# Transform all whitespace to underscore | ||||
idstring = re.sub(r'\s', "_", '%s' % idstring) | ||||
# Remove everything that is not a hyphen or a member of \w | ||||
idstring = re.sub(r'(?!-)\W', "", idstring).lower() | ||||
return idstring | ||||
def raw_diff(self): | ||||
""" | ||||
Returns raw string as udiff | ||||
""" | ||||
udiff_copy = self.copy_iterator() | ||||
if self.__format == 'gitdiff': | ||||
udiff_copy = self._parse_gitdiff(udiff_copy) | ||||
return u''.join(udiff_copy) | ||||
def as_html(self, table_class='code-difftable', line_class='line', | ||||
new_lineno_class='lineno old', old_lineno_class='lineno new', | ||||
r2385 | code_class='code', enable_comments=False, diff_lines=None): | |||
r1753 | """ | |||
r2349 | Return given diff as html table with customized css classes | |||
r1753 | """ | |||
def _link_to_if(condition, label, url): | ||||
""" | ||||
Generates a link if condition is meet or just the label if not. | ||||
""" | ||||
if condition: | ||||
r1789 | return '''<a href="%(url)s">%(label)s</a>''' % { | |||
'url': url, | ||||
'label': label | ||||
} | ||||
r1753 | else: | |||
return label | ||||
r2385 | if diff_lines is None: | |||
diff_lines = self.prepare() | ||||
r1753 | _html_empty = True | |||
_html = [] | ||||
r1789 | _html.append('''<table class="%(table_class)s">\n''' % { | |||
'table_class': table_class | ||||
}) | ||||
r1753 | for diff in diff_lines: | |||
for line in diff['chunks']: | ||||
_html_empty = False | ||||
for change in line: | ||||
r1789 | _html.append('''<tr class="%(lc)s %(action)s">\n''' % { | |||
'lc': line_class, | ||||
'action': change['action'] | ||||
}) | ||||
r1753 | anchor_old_id = '' | |||
anchor_new_id = '' | ||||
r1789 | anchor_old = "%(filename)s_o%(oldline_no)s" % { | |||
'filename': self._safe_id(diff['filename']), | ||||
'oldline_no': change['old_lineno'] | ||||
} | ||||
anchor_new = "%(filename)s_n%(oldline_no)s" % { | ||||
'filename': self._safe_id(diff['filename']), | ||||
'oldline_no': change['new_lineno'] | ||||
} | ||||
cond_old = (change['old_lineno'] != '...' and | ||||
change['old_lineno']) | ||||
cond_new = (change['new_lineno'] != '...' and | ||||
change['new_lineno']) | ||||
r1753 | if cond_old: | |||
anchor_old_id = 'id="%s"' % anchor_old | ||||
if cond_new: | ||||
anchor_new_id = 'id="%s"' % anchor_new | ||||
########################################################### | ||||
# OLD LINE NUMBER | ||||
########################################################### | ||||
r1789 | _html.append('''\t<td %(a_id)s class="%(olc)s">''' % { | |||
'a_id': anchor_old_id, | ||||
'olc': old_lineno_class | ||||
}) | ||||
r1753 | ||||
r1789 | _html.append('''%(link)s''' % { | |||
'link': _link_to_if(True, change['old_lineno'], | ||||
'#%s' % anchor_old) | ||||
}) | ||||
r1753 | _html.append('''</td>\n''') | |||
########################################################### | ||||
# NEW LINE NUMBER | ||||
########################################################### | ||||
r1789 | _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % { | |||
'a_id': anchor_new_id, | ||||
'nlc': new_lineno_class | ||||
}) | ||||
r1753 | ||||
r1789 | _html.append('''%(link)s''' % { | |||
'link': _link_to_if(True, change['new_lineno'], | ||||
'#%s' % anchor_new) | ||||
}) | ||||
r1753 | _html.append('''</td>\n''') | |||
########################################################### | ||||
# CODE | ||||
########################################################### | ||||
r1787 | comments = '' if enable_comments else 'no-comment' | |||
r1789 | _html.append('''\t<td class="%(cc)s %(inc)s">''' % { | |||
'cc': code_class, | ||||
'inc': comments | ||||
}) | ||||
_html.append('''\n\t\t<pre>%(code)s</pre>\n''' % { | ||||
'code': change['line'] | ||||
}) | ||||
r1753 | _html.append('''\t</td>''') | |||
_html.append('''\n</tr>\n''') | ||||
_html.append('''</table>''') | ||||
if _html_empty: | ||||
return None | ||||
return ''.join(_html) | ||||
def stat(self): | ||||
""" | ||||
Returns tuple of added, and removed lines for this instance | ||||
""" | ||||
return self.adds, self.removes | ||||
r2337 | ||||
r2362 | class InMemoryBundleRepo(bundlerepository): | |||
def __init__(self, ui, path, bundlestream): | ||||
self._tempparent = None | ||||
localrepo.localrepository.__init__(self, ui, path) | ||||
self.ui.setconfig('phases', 'publish', False) | ||||
self.bundle = bundlestream | ||||
# dict with the mapping 'filename' -> position in the bundle | ||||
self.bundlefilespos = {} | ||||
r2355 | def differ(org_repo, org_ref, other_repo, other_ref, discovery_data=None): | |||
r2337 | """ | |||
r2478 | General differ between branches, bookmarks or separate but releated | |||
r2349 | repositories | |||
r2337 | ||||
:param org_repo: | ||||
:type org_repo: | ||||
:param org_ref: | ||||
:type org_ref: | ||||
:param other_repo: | ||||
:type other_repo: | ||||
:param other_ref: | ||||
:type other_ref: | ||||
""" | ||||
r2355 | ||||
r2364 | bundlerepo = None | |||
ignore_whitespace = False | ||||
r2337 | context = 3 | |||
org_repo = org_repo.scm_instance._repo | ||||
other_repo = other_repo.scm_instance._repo | ||||
r2355 | opts = diffopts(git=True, ignorews=ignore_whitespace, context=context) | |||
r2337 | org_ref = org_ref[1] | |||
other_ref = other_ref[1] | ||||
r2355 | if org_repo != other_repo: | |||
common, incoming, rheads = discovery_data | ||||
r2362 | ||||
r2355 | # create a bundle (uncompressed if other repo is not local) | |||
r2362 | if other_repo.capable('getbundle') and incoming: | |||
r2355 | # disable repo hooks here since it's just bundle ! | |||
# patch and reset hooks section of UI config to not run any | ||||
# hooks on fetching archives with subrepos | ||||
for k, _ in other_repo.ui.configitems('hooks'): | ||||
other_repo.ui.setconfig('hooks', k, None) | ||||
unbundle = other_repo.getbundle('incoming', common=common, | ||||
heads=rheads) | ||||
r2552 | buf = BytesIO() | |||
r2355 | while True: | |||
r2364 | chunk = unbundle._stream.read(1024 * 4) | |||
r2355 | if not chunk: | |||
break | ||||
buf.write(chunk) | ||||
r2337 | ||||
r2355 | buf.seek(0) | |||
r2364 | # replace chunked _stream with data that can do tell() and seek() | |||
r2355 | unbundle._stream = buf | |||
r2362 | ui = make_ui('db') | |||
bundlerepo = InMemoryBundleRepo(ui, path=org_repo.root, | ||||
bundlestream=unbundle) | ||||
r2431 | ||||
return ''.join(patch.diff(bundlerepo or org_repo, | ||||
node1=org_repo[org_ref].node(), | ||||
node2=other_repo[other_ref].node(), | ||||
r2364 | opts=opts)) | |||
r2355 | else: | |||
return ''.join(patch.diff(org_repo, node1=org_ref, node2=other_ref, | ||||
opts=opts)) | ||||