##// END OF EJS Templates
use whoosh latest branch to fix py25 tests
use whoosh latest branch to fix py25 tests

File last commit:

r2567:5fe0f744 beta
r2582:d01a01ec beta
Show More
diffs.py
635 lines | 23.0 KiB | text/x-python | PythonLexer
# -*- coding: utf-8 -*-
"""
rhodecode.lib.diffs
~~~~~~~~~~~~~~~~~~~
Set of diffing helpers, previously part of vcs
:created_on: Dec 4, 2011
:author: marcink
:copyright: (C) 2010-2012 Marcin Kuzminski <marcin@python-works.com>
:original copyright: 2007-2008 by Armin Ronacher
:license: GPLv3, see COPYING for more details.
"""
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import re
import difflib
import markupsafe
from itertools import tee, imap
from mercurial import patch
from mercurial.mdiff import diffopts
from mercurial.bundlerepo import bundlerepository
from mercurial import localrepo
from pylons.i18n.translation import _
from rhodecode.lib.compat import BytesIO
from rhodecode.lib.vcs.exceptions import VCSError
from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode
from rhodecode.lib.helpers import escape
from rhodecode.lib.utils import EmptyChangeset, make_ui
def wrap_to_table(str_):
return '''<table class="code-difftable">
<tr class="line no-comment">
<td class="lineno new"></td>
<td class="code no-comment"><pre>%s</pre></td>
</tr>
</table>''' % str_
def wrapped_diff(filenode_old, filenode_new, cut_off_limit=None,
ignore_whitespace=True, line_context=3,
enable_comments=False):
"""
returns a wrapped diff into a table, checks for cut_off_limit and presents
proper message
"""
if filenode_old is None:
filenode_old = FileNode(filenode_new.path, '', EmptyChangeset())
if filenode_old.is_binary or filenode_new.is_binary:
diff = wrap_to_table(_('binary file'))
stats = (0, 0)
size = 0
elif cut_off_limit != -1 and (cut_off_limit is None or
(filenode_old.size < cut_off_limit and filenode_new.size < cut_off_limit)):
f_gitdiff = get_gitdiff(filenode_old, filenode_new,
ignore_whitespace=ignore_whitespace,
context=line_context)
diff_processor = DiffProcessor(f_gitdiff, format='gitdiff')
diff = diff_processor.as_html(enable_comments=enable_comments)
stats = diff_processor.stat()
size = len(diff or '')
else:
diff = wrap_to_table(_('Changeset was too big and was cut off, use '
'diff menu to display this diff'))
stats = (0, 0)
size = 0
if not diff:
submodules = filter(lambda o: isinstance(o, SubModuleNode),
[filenode_new, filenode_old])
if submodules:
diff = wrap_to_table(escape('Submodule %r' % submodules[0]))
else:
diff = wrap_to_table(_('No changes detected'))
cs1 = filenode_old.changeset.raw_id
cs2 = filenode_new.changeset.raw_id
return size, cs1, cs2, diff, stats
def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
"""
Returns git style diff between given ``filenode_old`` and ``filenode_new``.
:param ignore_whitespace: ignore whitespaces in diff
"""
# make sure we pass in default context
context = context or 3
submodules = filter(lambda o: isinstance(o, SubModuleNode),
[filenode_new, filenode_old])
if submodules:
return ''
for filenode in (filenode_old, filenode_new):
if not isinstance(filenode, FileNode):
raise VCSError("Given object should be FileNode object, not %s"
% filenode.__class__)
repo = filenode_new.changeset.repository
old_raw_id = getattr(filenode_old.changeset, 'raw_id', repo.EMPTY_CHANGESET)
new_raw_id = getattr(filenode_new.changeset, 'raw_id', repo.EMPTY_CHANGESET)
vcs_gitdiff = repo.get_diff(old_raw_id, new_raw_id, filenode_new.path,
ignore_whitespace, context)
return vcs_gitdiff
class DiffProcessor(object):
"""
Give it a unified diff and it returns a list of the files that were
mentioned in the diff together with a dict of meta information that
can be used to render it in a HTML template.
"""
_chunk_re = re.compile(r'@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
_newline_marker = '\\ No newline at end of file\n'
def __init__(self, diff, differ='diff', format='gitdiff'):
"""
:param diff: a text in diff format or generator
:param format: format of diff passed, `udiff` or `gitdiff`
"""
if isinstance(diff, basestring):
diff = [diff]
self.__udiff = diff
self.__format = format
self.adds = 0
self.removes = 0
if isinstance(self.__udiff, basestring):
self.lines = iter(self.__udiff.splitlines(1))
elif self.__format == 'gitdiff':
udiff_copy = self.copy_iterator()
self.lines = imap(self.escaper, self._parse_gitdiff(udiff_copy))
else:
udiff_copy = self.copy_iterator()
self.lines = imap(self.escaper, udiff_copy)
# Select a differ.
if differ == 'difflib':
self.differ = self._highlight_line_difflib
else:
self.differ = self._highlight_line_udiff
def escaper(self, string):
return markupsafe.escape(string)
def copy_iterator(self):
"""
make a fresh copy of generator, we should not iterate thru
an original as it's needed for repeating operations on
this instance of DiffProcessor
"""
self.__udiff, iterator_copy = tee(self.__udiff)
return iterator_copy
def _extract_rev(self, line1, line2):
"""
Extract the operation (A/M/D), filename and revision hint from a line.
"""
try:
if line1.startswith('--- ') and line2.startswith('+++ '):
l1 = line1[4:].split(None, 1)
old_filename = (l1[0].replace('a/', '', 1)
if len(l1) >= 1 else None)
old_rev = l1[1] if len(l1) == 2 else 'old'
l2 = line2[4:].split(None, 1)
new_filename = (l2[0].replace('b/', '', 1)
if len(l1) >= 1 else None)
new_rev = l2[1] if len(l2) == 2 else 'new'
filename = (old_filename
if old_filename != '/dev/null' else new_filename)
operation = 'D' if new_filename == '/dev/null' else None
if not operation:
operation = 'M' if old_filename != '/dev/null' else 'A'
return operation, filename, new_rev, old_rev
except (ValueError, IndexError):
pass
return None, None, None, None
def _parse_gitdiff(self, diffiterator):
def line_decoder(l):
if l.startswith('+') and not l.startswith('+++'):
self.adds += 1
elif l.startswith('-') and not l.startswith('---'):
self.removes += 1
return l.decode('utf8', 'replace')
output = list(diffiterator)
size = len(output)
if size == 2:
l = []
l.extend([output[0]])
l.extend(output[1].splitlines(1))
return map(line_decoder, l)
elif size == 1:
return map(line_decoder, output[0].splitlines(1))
elif size == 0:
return []
raise Exception('wrong size of diff %s' % size)
def _highlight_line_difflib(self, line, next_):
"""
Highlight inline changes in both lines.
"""
if line['action'] == 'del':
old, new = line, next_
else:
old, new = next_, line
oldwords = re.split(r'(\W)', old['line'])
newwords = re.split(r'(\W)', new['line'])
sequence = difflib.SequenceMatcher(None, oldwords, newwords)
oldfragments, newfragments = [], []
for tag, i1, i2, j1, j2 in sequence.get_opcodes():
oldfrag = ''.join(oldwords[i1:i2])
newfrag = ''.join(newwords[j1:j2])
if tag != 'equal':
if oldfrag:
oldfrag = '<del>%s</del>' % oldfrag
if newfrag:
newfrag = '<ins>%s</ins>' % newfrag
oldfragments.append(oldfrag)
newfragments.append(newfrag)
old['line'] = "".join(oldfragments)
new['line'] = "".join(newfragments)
def _highlight_line_udiff(self, line, next_):
"""
Highlight inline changes in both lines.
"""
start = 0
limit = min(len(line['line']), len(next_['line']))
while start < limit and line['line'][start] == next_['line'][start]:
start += 1
end = -1
limit -= start
while -end <= limit and line['line'][end] == next_['line'][end]:
end -= 1
end += 1
if start or end:
def do(l):
last = end + len(l['line'])
if l['action'] == 'add':
tag = 'ins'
else:
tag = 'del'
l['line'] = '%s<%s>%s</%s>%s' % (
l['line'][:start],
tag,
l['line'][start:last],
tag,
l['line'][last:]
)
do(line)
do(next_)
def _parse_udiff(self, inline_diff=True):
"""
Parse the diff an return data for the template.
"""
lineiter = self.lines
files = []
try:
line = lineiter.next()
while 1:
# continue until we found the old file
if not line.startswith('--- '):
line = lineiter.next()
continue
chunks = []
stats = [0, 0]
operation, filename, old_rev, new_rev = \
self._extract_rev(line, lineiter.next())
files.append({
'filename': filename,
'old_revision': old_rev,
'new_revision': new_rev,
'chunks': chunks,
'operation': operation,
'stats': stats,
})
line = lineiter.next()
while line:
match = self._chunk_re.match(line)
if not match:
break
lines = []
chunks.append(lines)
old_line, old_end, new_line, new_end = \
[int(x or 1) for x in match.groups()[:-1]]
old_line -= 1
new_line -= 1
gr = match.groups()
context = len(gr) == 5
old_end += old_line
new_end += new_line
if context:
# skip context only if it's first line
if int(gr[0]) > 1:
lines.append({
'old_lineno': '...',
'new_lineno': '...',
'action': 'context',
'line': line,
})
line = lineiter.next()
while old_line < old_end or new_line < new_end:
if line:
command = line[0]
if command in ['+', '-', ' ']:
#only modify the line if it's actually a diff
# thing
line = line[1:]
else:
command = ' '
affects_old = affects_new = False
# ignore those if we don't expect them
if command in '#@':
continue
elif command == '+':
affects_new = True
action = 'add'
stats[0] += 1
elif command == '-':
affects_old = True
action = 'del'
stats[1] += 1
else:
affects_old = affects_new = True
action = 'unmod'
if line != self._newline_marker:
old_line += affects_old
new_line += affects_new
lines.append({
'old_lineno': affects_old and old_line or '',
'new_lineno': affects_new and new_line or '',
'action': action,
'line': line
})
line = lineiter.next()
if line == self._newline_marker:
# we need to append to lines, since this is not
# counted in the line specs of diff
lines.append({
'old_lineno': '...',
'new_lineno': '...',
'action': 'context',
'line': line
})
except StopIteration:
pass
sorter = lambda info: {'A': 0, 'M': 1, 'D': 2}.get(info['operation'])
if inline_diff is False:
return sorted(files, key=sorter)
# highlight inline changes
for diff_data in files:
for chunk in diff_data['chunks']:
lineiter = iter(chunk)
try:
while 1:
line = lineiter.next()
if line['action'] not in ['unmod', 'context']:
nextline = lineiter.next()
if nextline['action'] in ['unmod', 'context'] or \
nextline['action'] == line['action']:
continue
self.differ(line, nextline)
except StopIteration:
pass
return sorted(files, key=sorter)
def prepare(self, inline_diff=True):
"""
Prepare the passed udiff for HTML rendering. It'l return a list
of dicts
"""
return self._parse_udiff(inline_diff=inline_diff)
def _safe_id(self, idstring):
"""Make a string safe for including in an id attribute.
The HTML spec says that id attributes 'must begin with
a letter ([A-Za-z]) and may be followed by any number
of letters, digits ([0-9]), hyphens ("-"), underscores
("_"), colons (":"), and periods (".")'. These regexps
are slightly over-zealous, in that they remove colons
and periods unnecessarily.
Whitespace is transformed into underscores, and then
anything which is not a hyphen or a character that
matches \w (alphanumerics and underscore) is removed.
"""
# Transform all whitespace to underscore
idstring = re.sub(r'\s', "_", '%s' % idstring)
# Remove everything that is not a hyphen or a member of \w
idstring = re.sub(r'(?!-)\W', "", idstring).lower()
return idstring
def raw_diff(self):
"""
Returns raw string as udiff
"""
udiff_copy = self.copy_iterator()
if self.__format == 'gitdiff':
udiff_copy = self._parse_gitdiff(udiff_copy)
return u''.join(udiff_copy)
def as_html(self, table_class='code-difftable', line_class='line',
new_lineno_class='lineno old', old_lineno_class='lineno new',
code_class='code', enable_comments=False, diff_lines=None):
"""
Return given diff as html table with customized css classes
"""
def _link_to_if(condition, label, url):
"""
Generates a link if condition is meet or just the label if not.
"""
if condition:
return '''<a href="%(url)s">%(label)s</a>''' % {
'url': url,
'label': label
}
else:
return label
if diff_lines is None:
diff_lines = self.prepare()
_html_empty = True
_html = []
_html.append('''<table class="%(table_class)s">\n''' % {
'table_class': table_class
})
for diff in diff_lines:
for line in diff['chunks']:
_html_empty = False
for change in line:
_html.append('''<tr class="%(lc)s %(action)s">\n''' % {
'lc': line_class,
'action': change['action']
})
anchor_old_id = ''
anchor_new_id = ''
anchor_old = "%(filename)s_o%(oldline_no)s" % {
'filename': self._safe_id(diff['filename']),
'oldline_no': change['old_lineno']
}
anchor_new = "%(filename)s_n%(oldline_no)s" % {
'filename': self._safe_id(diff['filename']),
'oldline_no': change['new_lineno']
}
cond_old = (change['old_lineno'] != '...' and
change['old_lineno'])
cond_new = (change['new_lineno'] != '...' and
change['new_lineno'])
if cond_old:
anchor_old_id = 'id="%s"' % anchor_old
if cond_new:
anchor_new_id = 'id="%s"' % anchor_new
###########################################################
# OLD LINE NUMBER
###########################################################
_html.append('''\t<td %(a_id)s class="%(olc)s">''' % {
'a_id': anchor_old_id,
'olc': old_lineno_class
})
_html.append('''%(link)s''' % {
'link': _link_to_if(True, change['old_lineno'],
'#%s' % anchor_old)
})
_html.append('''</td>\n''')
###########################################################
# NEW LINE NUMBER
###########################################################
_html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
'a_id': anchor_new_id,
'nlc': new_lineno_class
})
_html.append('''%(link)s''' % {
'link': _link_to_if(True, change['new_lineno'],
'#%s' % anchor_new)
})
_html.append('''</td>\n''')
###########################################################
# CODE
###########################################################
comments = '' if enable_comments else 'no-comment'
_html.append('''\t<td class="%(cc)s %(inc)s">''' % {
'cc': code_class,
'inc': comments
})
_html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {
'code': change['line']
})
_html.append('''\t</td>''')
_html.append('''\n</tr>\n''')
_html.append('''</table>''')
if _html_empty:
return None
return ''.join(_html)
def stat(self):
"""
Returns tuple of added, and removed lines for this instance
"""
return self.adds, self.removes
class InMemoryBundleRepo(bundlerepository):
def __init__(self, ui, path, bundlestream):
self._tempparent = None
localrepo.localrepository.__init__(self, ui, path)
self.ui.setconfig('phases', 'publish', False)
self.bundle = bundlestream
# dict with the mapping 'filename' -> position in the bundle
self.bundlefilespos = {}
def differ(org_repo, org_ref, other_repo, other_ref, discovery_data=None):
"""
General differ between branches, bookmarks or separate but releated
repositories
:param org_repo:
:type org_repo:
:param org_ref:
:type org_ref:
:param other_repo:
:type other_repo:
:param other_ref:
:type other_ref:
"""
bundlerepo = None
ignore_whitespace = False
context = 3
org_repo = org_repo.scm_instance._repo
other_repo = other_repo.scm_instance._repo
opts = diffopts(git=True, ignorews=ignore_whitespace, context=context)
org_ref = org_ref[1]
other_ref = other_ref[1]
if org_repo != other_repo:
common, incoming, rheads = discovery_data
# create a bundle (uncompressed if other repo is not local)
if other_repo.capable('getbundle') and incoming:
# disable repo hooks here since it's just bundle !
# patch and reset hooks section of UI config to not run any
# hooks on fetching archives with subrepos
for k, _ in other_repo.ui.configitems('hooks'):
other_repo.ui.setconfig('hooks', k, None)
unbundle = other_repo.getbundle('incoming', common=common,
heads=rheads)
buf = BytesIO()
while True:
chunk = unbundle._stream.read(1024 * 4)
if not chunk:
break
buf.write(chunk)
buf.seek(0)
# replace chunked _stream with data that can do tell() and seek()
unbundle._stream = buf
ui = make_ui('db')
bundlerepo = InMemoryBundleRepo(ui, path=org_repo.root,
bundlestream=unbundle)
return ''.join(patch.diff(bundlerepo or org_repo,
node1=org_repo[org_ref].node(),
node2=other_repo[other_ref].node(),
opts=opts))
else:
return ''.join(patch.diff(org_repo, node1=org_ref, node2=other_ref,
opts=opts))