diffs.py
679 lines
| 25.4 KiB
| text/x-python
|
PythonLexer
Bradley M. Kuhn
|
r4187 | # -*- coding: utf-8 -*- | ||
# This program is free software: you can redistribute it and/or modify | ||||
# it under the terms of the GNU General Public License as published by | ||||
# the Free Software Foundation, either version 3 of the License, or | ||||
# (at your option) any later version. | ||||
# | ||||
# This program is distributed in the hope that it will be useful, | ||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||||
# GNU General Public License for more details. | ||||
# | ||||
# You should have received a copy of the GNU General Public License | ||||
# along with this program. If not, see <http://www.gnu.org/licenses/>. | ||||
""" | ||||
kallithea.lib.diffs | ||||
~~~~~~~~~~~~~~~~~~~ | ||||
Set of diffing helpers, previously part of vcs | ||||
Bradley M. Kuhn
|
r4211 | This file was forked by the Kallithea project in July 2014. | ||
Original author and date, and relevant copyright and licensing information is below: | ||||
Bradley M. Kuhn
|
r4187 | :created_on: Dec 4, 2011 | ||
:author: marcink | ||||
Bradley M. Kuhn
|
r4211 | :copyright: (c) 2013 RhodeCode GmbH, and others. | ||
Bradley M. Kuhn
|
r4208 | :license: GPLv3, see LICENSE.md for more details. | ||
Bradley M. Kuhn
|
r4187 | """ | ||
import difflib | ||||
import logging | ||||
Mads Kiilerich
|
r7718 | import re | ||
Bradley M. Kuhn
|
r4187 | |||
Mads Kiilerich
|
r6508 | from tg.i18n import ugettext as _ | ||
Bradley M. Kuhn
|
r4187 | |||
domruf
|
r6864 | from kallithea.lib import helpers as h | ||
Mads Kiilerich
|
r8078 | from kallithea.lib.utils2 import safe_str | ||
Mads Kiilerich
|
r7718 | from kallithea.lib.vcs.backends.base import EmptyChangeset | ||
Bradley M. Kuhn
|
r4187 | from kallithea.lib.vcs.exceptions import VCSError | ||
from kallithea.lib.vcs.nodes import FileNode, SubModuleNode | ||||
Mads Kiilerich
|
r7718 | |||
Bradley M. Kuhn
|
r4187 | |||
log = logging.getLogger(__name__) | ||||
Mads Kiilerich
|
r6841 | def _safe_id(idstring): | ||
Mads Kiilerich
|
r7720 | r"""Make a string safe for including in an id attribute. | ||
Mads Kiilerich
|
r6841 | |||
The HTML spec says that id attributes 'must begin with | ||||
a letter ([A-Za-z]) and may be followed by any number | ||||
of letters, digits ([0-9]), hyphens ("-"), underscores | ||||
("_"), colons (":"), and periods (".")'. These regexps | ||||
are slightly over-zealous, in that they remove colons | ||||
and periods unnecessarily. | ||||
Whitespace is transformed into underscores, and then | ||||
anything which is not a hyphen or a character that | ||||
matches \w (alphanumerics and underscore) is removed. | ||||
""" | ||||
# Transform all whitespace to underscore | ||||
idstring = re.sub(r'\s', "_", idstring) | ||||
# Remove everything that is not a hyphen or a member of \w | ||||
idstring = re.sub(r'(?!-)\W', "", idstring).lower() | ||||
return idstring | ||||
def as_html(table_class='code-difftable', line_class='line', | ||||
old_lineno_class='lineno old', new_lineno_class='lineno new', | ||||
no_lineno_class='lineno', | ||||
code_class='code', enable_comments=False, parsed_lines=None): | ||||
""" | ||||
Return given diff as html table with customized css classes | ||||
""" | ||||
def _link_to_if(condition, label, url): | ||||
""" | ||||
Generates a link if condition is meet or just the label if not. | ||||
""" | ||||
if condition: | ||||
domruf
|
r7018 | return '''<a href="%(url)s" data-pseudo-content="%(label)s"></a>''' % { | ||
Mads Kiilerich
|
r6841 | 'url': url, | ||
'label': label | ||||
} | ||||
else: | ||||
return label | ||||
_html_empty = True | ||||
_html = [] | ||||
_html.append('''<table class="%(table_class)s">\n''' % { | ||||
'table_class': table_class | ||||
}) | ||||
for diff in parsed_lines: | ||||
for line in diff['chunks']: | ||||
_html_empty = False | ||||
for change in line: | ||||
_html.append('''<tr class="%(lc)s %(action)s">\n''' % { | ||||
'lc': line_class, | ||||
'action': change['action'] | ||||
}) | ||||
anchor_old_id = '' | ||||
anchor_new_id = '' | ||||
anchor_old = "%(filename)s_o%(oldline_no)s" % { | ||||
'filename': _safe_id(diff['filename']), | ||||
'oldline_no': change['old_lineno'] | ||||
} | ||||
anchor_new = "%(filename)s_n%(oldline_no)s" % { | ||||
'filename': _safe_id(diff['filename']), | ||||
'oldline_no': change['new_lineno'] | ||||
} | ||||
cond_old = (change['old_lineno'] != '...' and | ||||
change['old_lineno']) | ||||
cond_new = (change['new_lineno'] != '...' and | ||||
change['new_lineno']) | ||||
no_lineno = (change['old_lineno'] == '...' and | ||||
change['new_lineno'] == '...') | ||||
if cond_old: | ||||
anchor_old_id = 'id="%s"' % anchor_old | ||||
if cond_new: | ||||
anchor_new_id = 'id="%s"' % anchor_new | ||||
########################################################### | ||||
# OLD LINE NUMBER | ||||
########################################################### | ||||
_html.append('''\t<td %(a_id)s class="%(olc)s" %(colspan)s>''' % { | ||||
'a_id': anchor_old_id, | ||||
'olc': no_lineno_class if no_lineno else old_lineno_class, | ||||
'colspan': 'colspan="2"' if no_lineno else '' | ||||
}) | ||||
_html.append('''%(link)s''' % { | ||||
'link': _link_to_if(not no_lineno, change['old_lineno'], | ||||
'#%s' % anchor_old) | ||||
}) | ||||
_html.append('''</td>\n''') | ||||
########################################################### | ||||
# NEW LINE NUMBER | ||||
########################################################### | ||||
if not no_lineno: | ||||
_html.append('''\t<td %(a_id)s class="%(nlc)s">''' % { | ||||
'a_id': anchor_new_id, | ||||
'nlc': new_lineno_class | ||||
}) | ||||
_html.append('''%(link)s''' % { | ||||
'link': _link_to_if(True, change['new_lineno'], | ||||
'#%s' % anchor_new) | ||||
}) | ||||
_html.append('''</td>\n''') | ||||
########################################################### | ||||
# CODE | ||||
########################################################### | ||||
comments = '' if enable_comments else 'no-comment' | ||||
_html.append('''\t<td class="%(cc)s %(inc)s">''' % { | ||||
'cc': code_class, | ||||
'inc': comments | ||||
}) | ||||
_html.append('''\n\t\t<div class="add-bubble"><div> </div></div><pre>%(code)s</pre>\n''' % { | ||||
'code': change['line'] | ||||
}) | ||||
_html.append('''\t</td>''') | ||||
_html.append('''\n</tr>\n''') | ||||
_html.append('''</table>''') | ||||
if _html_empty: | ||||
return None | ||||
return ''.join(_html) | ||||
Mads Kiilerich
|
r6832 | def wrap_to_table(html): | ||
"""Given a string with html, return it wrapped in a table, similar to what | ||||
DiffProcessor returns.""" | ||||
return '''\ | ||||
<table class="code-difftable"> | ||||
Bradley M. Kuhn
|
r4187 | <tr class="line no-comment"> | ||
<td class="lineno new"></td> | ||||
<td class="code no-comment"><pre>%s</pre></td> | ||||
</tr> | ||||
Mads Kiilerich
|
r6832 | </table>''' % html | ||
Bradley M. Kuhn
|
r4187 | |||
Mads Kiilerich
|
r6831 | def wrapped_diff(filenode_old, filenode_new, diff_limit=None, | ||
Bradley M. Kuhn
|
r4187 | ignore_whitespace=True, line_context=3, | ||
enable_comments=False): | ||||
""" | ||||
Mads Kiilerich
|
r6832 | Returns a file diff wrapped into a table. | ||
Checks for diff_limit and presents a message if the diff is too big. | ||||
Bradley M. Kuhn
|
r4187 | """ | ||
if filenode_old is None: | ||||
filenode_old = FileNode(filenode_new.path, '', EmptyChangeset()) | ||||
Mads Kiilerich
|
r6154 | op = None | ||
Mads Kiilerich
|
r6158 | a_path = filenode_old.path # default, might be overriden by actual rename in diff | ||
Bradley M. Kuhn
|
r4187 | if filenode_old.is_binary or filenode_new.is_binary: | ||
Mads Kiilerich
|
r6841 | html_diff = wrap_to_table(_('Binary file')) | ||
Bradley M. Kuhn
|
r4187 | stats = (0, 0) | ||
Mads Kiilerich
|
r6831 | elif diff_limit != -1 and ( | ||
diff_limit is None or | ||||
(filenode_old.size < diff_limit and filenode_new.size < diff_limit)): | ||||
Bradley M. Kuhn
|
r4187 | |||
Mads Kiilerich
|
r6838 | raw_diff = get_gitdiff(filenode_old, filenode_new, | ||
Bradley M. Kuhn
|
r4187 | ignore_whitespace=ignore_whitespace, | ||
context=line_context) | ||||
Mads Kiilerich
|
r6838 | diff_processor = DiffProcessor(raw_diff) | ||
if diff_processor.parsed: # there should be exactly one element, for the specified file | ||||
f = diff_processor.parsed[0] | ||||
Mads Kiilerich
|
r6154 | op = f['operation'] | ||
Mads Kiilerich
|
r6158 | a_path = f['old_filename'] | ||
Bradley M. Kuhn
|
r4187 | |||
Mads Kiilerich
|
r6841 | html_diff = as_html(parsed_lines=diff_processor.parsed, enable_comments=enable_comments) | ||
Bradley M. Kuhn
|
r4187 | stats = diff_processor.stat() | ||
Mads Kiilerich
|
r6154 | |||
Bradley M. Kuhn
|
r4187 | else: | ||
Mads Kiilerich
|
r6841 | html_diff = wrap_to_table(_('Changeset was too big and was cut off, use ' | ||
Bradley M. Kuhn
|
r4187 | 'diff menu to display this diff')) | ||
stats = (0, 0) | ||||
Mads Kiilerich
|
r6154 | |||
Mads Kiilerich
|
r6841 | if not html_diff: | ||
Mads Kiilerich
|
r7893 | submodules = [o for o in [filenode_new, filenode_old] if isinstance(o, SubModuleNode)] | ||
Bradley M. Kuhn
|
r4187 | if submodules: | ||
domruf
|
r6864 | html_diff = wrap_to_table(h.escape('Submodule %r' % submodules[0])) | ||
Bradley M. Kuhn
|
r4187 | else: | ||
Mads Kiilerich
|
r6841 | html_diff = wrap_to_table(_('No changes detected')) | ||
Bradley M. Kuhn
|
r4187 | |||
cs1 = filenode_old.changeset.raw_id | ||||
cs2 = filenode_new.changeset.raw_id | ||||
Mads Kiilerich
|
r6841 | return cs1, cs2, a_path, html_diff, stats, op | ||
Bradley M. Kuhn
|
r4187 | |||
def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3): | ||||
""" | ||||
Returns git style diff between given ``filenode_old`` and ``filenode_new``. | ||||
""" | ||||
# make sure we pass in default context | ||||
context = context or 3 | ||||
Mads Kiilerich
|
r7893 | submodules = [o for o in [filenode_new, filenode_old] if isinstance(o, SubModuleNode)] | ||
Bradley M. Kuhn
|
r4187 | if submodules: | ||
Mads Kiilerich
|
r7958 | return b'' | ||
Bradley M. Kuhn
|
r4187 | |||
for filenode in (filenode_old, filenode_new): | ||||
if not isinstance(filenode, FileNode): | ||||
raise VCSError("Given object should be FileNode object, not %s" | ||||
% filenode.__class__) | ||||
repo = filenode_new.changeset.repository | ||||
old_raw_id = getattr(filenode_old.changeset, 'raw_id', repo.EMPTY_CHANGESET) | ||||
new_raw_id = getattr(filenode_new.changeset, 'raw_id', repo.EMPTY_CHANGESET) | ||||
Mads Kiilerich
|
r6863 | vcs_gitdiff = get_diff(repo, old_raw_id, new_raw_id, filenode_new.path, | ||
ignore_whitespace, context) | ||||
Bradley M. Kuhn
|
r4187 | return vcs_gitdiff | ||
Mads Kiilerich
|
r6863 | |||
def get_diff(scm_instance, rev1, rev2, path=None, ignore_whitespace=False, context=3): | ||||
""" | ||||
A thin wrapper around vcs lib get_diff. | ||||
""" | ||||
domruf
|
r6864 | try: | ||
return scm_instance.get_diff(rev1, rev2, path=path, | ||||
ignore_whitespace=ignore_whitespace, context=context) | ||||
except MemoryError: | ||||
h.flash('MemoryError: Diff is too big', category='error') | ||||
Mads Kiilerich
|
r7958 | return b'' | ||
Mads Kiilerich
|
r6863 | |||
Bradley M. Kuhn
|
r4187 | NEW_FILENODE = 1 | ||
DEL_FILENODE = 2 | ||||
MOD_FILENODE = 3 | ||||
RENAMED_FILENODE = 4 | ||||
COPIED_FILENODE = 5 | ||||
CHMOD_FILENODE = 6 | ||||
BIN_FILENODE = 7 | ||||
class DiffProcessor(object): | ||||
""" | ||||
Give it a unified or git diff and it returns a list of the files that were | ||||
mentioned in the diff together with a dict of meta information that | ||||
can be used to render it in a HTML template. | ||||
""" | ||||
Mads Kiilerich
|
r7958 | _diff_git_re = re.compile(b'^diff --git', re.MULTILINE) | ||
Bradley M. Kuhn
|
r4187 | |||
Mads Kiilerich
|
r6838 | def __init__(self, diff, vcs='hg', diff_limit=None, inline_diff=True): | ||
Bradley M. Kuhn
|
r4187 | """ | ||
:param diff: a text in diff format | ||||
Thomas De Schampheleire
|
r4918 | :param vcs: type of version control hg or git | ||
Bradley M. Kuhn
|
r4187 | :param diff_limit: define the size of diff that is considered "big" | ||
based on that parameter cut off will be triggered, set to None | ||||
to show full diff | ||||
""" | ||||
Mads Kiilerich
|
r7942 | if not isinstance(diff, bytes): | ||
raise Exception('Diff must be bytes - got %s' % type(diff)) | ||||
Bradley M. Kuhn
|
r4187 | |||
Mads Kiilerich
|
r8058 | self._diff = memoryview(diff) | ||
Bradley M. Kuhn
|
r4187 | self.adds = 0 | ||
self.removes = 0 | ||||
self.diff_limit = diff_limit | ||||
Mads Kiilerich
|
r6839 | self.limited_diff = False | ||
Bradley M. Kuhn
|
r4187 | self.vcs = vcs | ||
Mads Kiilerich
|
r6838 | self.parsed = self._parse_gitdiff(inline_diff=inline_diff) | ||
Bradley M. Kuhn
|
r4187 | |||
Mads Kiilerich
|
r6838 | def _parse_gitdiff(self, inline_diff): | ||
Mads Kiilerich
|
r6832 | """Parse self._diff and return a list of dicts with meta info and chunks for each file. | ||
Mads Kiilerich
|
r6839 | Might set limited_diff. | ||
Mads Kiilerich
|
r6832 | Optionally, do an extra pass and to extra markup of one-liner changes. | ||
""" | ||||
_files = [] # list of dicts with meta info and chunks | ||||
Bradley M. Kuhn
|
r4187 | |||
Mads Kiilerich
|
r6836 | starts = [m.start() for m in self._diff_git_re.finditer(self._diff)] | ||
starts.append(len(self._diff)) | ||||
for start, end in zip(starts, starts[1:]): | ||||
Mads Kiilerich
|
r6840 | if self.diff_limit and end > self.diff_limit: | ||
self.limited_diff = True | ||||
continue | ||||
Mads Kiilerich
|
r8058 | head, diff_lines = _get_header(self.vcs, self._diff[start:end]) | ||
Bradley M. Kuhn
|
r4187 | |||
op = None | ||||
stats = { | ||||
'added': 0, | ||||
'deleted': 0, | ||||
'binary': False, | ||||
'ops': {}, | ||||
} | ||||
if head['deleted_file_mode']: | ||||
domruf
|
r6905 | op = 'removed' | ||
Bradley M. Kuhn
|
r4187 | stats['binary'] = True | ||
stats['ops'][DEL_FILENODE] = 'deleted file' | ||||
elif head['new_file_mode']: | ||||
domruf
|
r6905 | op = 'added' | ||
Bradley M. Kuhn
|
r4187 | stats['binary'] = True | ||
stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode'] | ||||
else: # modify operation, can be cp, rename, chmod | ||||
# CHMOD | ||||
if head['new_mode'] and head['old_mode']: | ||||
domruf
|
r6905 | op = 'modified' | ||
Bradley M. Kuhn
|
r4187 | stats['binary'] = True | ||
stats['ops'][CHMOD_FILENODE] = ('modified file chmod %s => %s' | ||||
% (head['old_mode'], head['new_mode'])) | ||||
# RENAME | ||||
if (head['rename_from'] and head['rename_to'] | ||||
and head['rename_from'] != head['rename_to']): | ||||
domruf
|
r6905 | op = 'renamed' | ||
Bradley M. Kuhn
|
r4187 | stats['binary'] = True | ||
stats['ops'][RENAMED_FILENODE] = ('file renamed from %s to %s' | ||||
% (head['rename_from'], head['rename_to'])) | ||||
# COPY | ||||
if head.get('copy_from') and head.get('copy_to'): | ||||
domruf
|
r6905 | op = 'modified' | ||
Bradley M. Kuhn
|
r4187 | stats['binary'] = True | ||
stats['ops'][COPIED_FILENODE] = ('file copied from %s to %s' | ||||
% (head['copy_from'], head['copy_to'])) | ||||
# FALL BACK: detect missed old style add or remove | ||||
if op is None: | ||||
if not head['a_file'] and head['b_file']: | ||||
domruf
|
r6905 | op = 'added' | ||
Bradley M. Kuhn
|
r4187 | stats['binary'] = True | ||
stats['ops'][NEW_FILENODE] = 'new file' | ||||
elif head['a_file'] and not head['b_file']: | ||||
domruf
|
r6905 | op = 'removed' | ||
Bradley M. Kuhn
|
r4187 | stats['binary'] = True | ||
stats['ops'][DEL_FILENODE] = 'deleted file' | ||||
# it's not ADD not DELETE | ||||
if op is None: | ||||
domruf
|
r6905 | op = 'modified' | ||
Bradley M. Kuhn
|
r4187 | stats['binary'] = True | ||
stats['ops'][MOD_FILENODE] = 'modified file' | ||||
# a real non-binary diff | ||||
if head['a_file'] or head['b_file']: | ||||
Mads Kiilerich
|
r6845 | chunks, added, deleted = _parse_lines(diff_lines) | ||
Mads Kiilerich
|
r6840 | stats['binary'] = False | ||
stats['added'] = added | ||||
stats['deleted'] = deleted | ||||
# explicit mark that it's a modified file | ||||
domruf
|
r6905 | if op == 'modified': | ||
Mads Kiilerich
|
r6840 | stats['ops'][MOD_FILENODE] = 'modified file' | ||
Andrew Shadura
|
r4937 | else: # Git binary patch (or empty diff) | ||
# Git binary patch | ||||
Bradley M. Kuhn
|
r4187 | if head['bin_patch']: | ||
stats['ops'][BIN_FILENODE] = 'binary diff not shown' | ||||
chunks = [] | ||||
domruf
|
r6905 | if op == 'removed' and chunks: | ||
Mads Kiilerich
|
r4290 | # a way of seeing deleted content could perhaps be nice - but | ||
# not with the current UI | ||||
chunks = [] | ||||
Bradley M. Kuhn
|
r4187 | |||
chunks.insert(0, [{ | ||||
'old_lineno': '', | ||||
'new_lineno': '', | ||||
'action': 'context', | ||||
'line': msg, | ||||
Mads Kiilerich
|
r8059 | } for _op, msg in stats['ops'].items() | ||
Bradley M. Kuhn
|
r4187 | if _op not in [MOD_FILENODE]]) | ||
_files.append({ | ||||
Mads Kiilerich
|
r6158 | 'old_filename': head['a_path'], | ||
Bradley M. Kuhn
|
r4187 | 'filename': head['b_path'], | ||
'old_revision': head['a_blob_id'], | ||||
'new_revision': head['b_blob_id'], | ||||
'chunks': chunks, | ||||
'operation': op, | ||||
'stats': stats, | ||||
}) | ||||
if not inline_diff: | ||||
Mads Kiilerich
|
r6839 | return _files | ||
Bradley M. Kuhn
|
r4187 | |||
Mads Kiilerich
|
r6094 | # highlight inline changes when one del is followed by one add | ||
Bradley M. Kuhn
|
r4187 | for diff_data in _files: | ||
for chunk in diff_data['chunks']: | ||||
lineiter = iter(chunk) | ||||
try: | ||||
Mads Kiilerich
|
r8057 | peekline = next(lineiter) | ||
Mads Kiilerich
|
r6094 | while True: | ||
# find a first del line | ||||
while peekline['action'] != 'del': | ||||
Mads Kiilerich
|
r8057 | peekline = next(lineiter) | ||
Mads Kiilerich
|
r6094 | delline = peekline | ||
Mads Kiilerich
|
r8057 | peekline = next(lineiter) | ||
Mads Kiilerich
|
r6094 | # if not followed by add, eat all following del lines | ||
if peekline['action'] != 'add': | ||||
while peekline['action'] == 'del': | ||||
Mads Kiilerich
|
r8057 | peekline = next(lineiter) | ||
Mads Kiilerich
|
r6094 | continue | ||
# found an add - make sure it is the only one | ||||
addline = peekline | ||||
try: | ||||
Mads Kiilerich
|
r8057 | peekline = next(lineiter) | ||
Mads Kiilerich
|
r6094 | except StopIteration: | ||
# add was last line - ok | ||||
Mads Kiilerich
|
r6842 | _highlight_inline_diff(delline, addline) | ||
Mads Kiilerich
|
r6094 | raise | ||
if peekline['action'] != 'add': | ||||
# there was only one add line - ok | ||||
Mads Kiilerich
|
r6842 | _highlight_inline_diff(delline, addline) | ||
Bradley M. Kuhn
|
r4187 | except StopIteration: | ||
pass | ||||
Mads Kiilerich
|
r6839 | return _files | ||
Bradley M. Kuhn
|
r4187 | |||
def stat(self): | ||||
""" | ||||
Returns tuple of added, and removed lines for this instance | ||||
""" | ||||
return self.adds, self.removes | ||||
Mads Kiilerich
|
r6842 | |||
Mads Kiilerich
|
r6843 | _escape_re = re.compile(r'(&)|(<)|(>)|(\t)|(\r)|(?<=.)( \n| $)') | ||
def _escaper(string): | ||||
""" | ||||
Do HTML escaping/markup | ||||
""" | ||||
def substitute(m): | ||||
groups = m.groups() | ||||
if groups[0]: | ||||
return '&' | ||||
if groups[1]: | ||||
return '<' | ||||
if groups[2]: | ||||
return '>' | ||||
if groups[3]: | ||||
return '<u>\t</u>' | ||||
if groups[4]: | ||||
return '<u class="cr"></u>' | ||||
if groups[5]: | ||||
return ' <i></i>' | ||||
assert False | ||||
Mads Kiilerich
|
r8078 | return _escape_re.sub(substitute, safe_str(string)) | ||
Mads Kiilerich
|
r6843 | |||
Mads Kiilerich
|
r7958 | _git_header_re = re.compile(br""" | ||
Mads Kiilerich
|
r6844 | ^diff[ ]--git[ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n | ||
(?:^old[ ]mode[ ](?P<old_mode>\d+)\n | ||||
^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))? | ||||
(?:^similarity[ ]index[ ](?P<similarity_index>\d+)%\n | ||||
^rename[ ]from[ ](?P<rename_from>.+)\n | ||||
^rename[ ]to[ ](?P<rename_to>.+)(?:\n|$))? | ||||
(?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))? | ||||
(?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))? | ||||
(?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+) | ||||
\.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))? | ||||
(?:^(?P<bin_patch>GIT[ ]binary[ ]patch)(?:\n|$))? | ||||
(?:^---[ ](a/(?P<a_file>.+?)|/dev/null)\t?(?:\n|$))? | ||||
(?:^\+\+\+[ ](b/(?P<b_file>.+?)|/dev/null)\t?(?:\n|$))? | ||||
""", re.VERBOSE | re.MULTILINE) | ||||
Mads Kiilerich
|
r7958 | _hg_header_re = re.compile(br""" | ||
Mads Kiilerich
|
r6844 | ^diff[ ]--git[ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n | ||
(?:^old[ ]mode[ ](?P<old_mode>\d+)\n | ||||
^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))? | ||||
(?:^similarity[ ]index[ ](?P<similarity_index>\d+)%(?:\n|$))? | ||||
(?:^rename[ ]from[ ](?P<rename_from>.+)\n | ||||
^rename[ ]to[ ](?P<rename_to>.+)(?:\n|$))? | ||||
(?:^copy[ ]from[ ](?P<copy_from>.+)\n | ||||
^copy[ ]to[ ](?P<copy_to>.+)(?:\n|$))? | ||||
(?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))? | ||||
(?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))? | ||||
(?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+) | ||||
\.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))? | ||||
(?:^(?P<bin_patch>GIT[ ]binary[ ]patch)(?:\n|$))? | ||||
(?:^---[ ](a/(?P<a_file>.+?)|/dev/null)\t?(?:\n|$))? | ||||
(?:^\+\+\+[ ](b/(?P<b_file>.+?)|/dev/null)\t?(?:\n|$))? | ||||
""", re.VERBOSE | re.MULTILINE) | ||||
Mads Kiilerich
|
r7942 | _header_next_check = re.compile(br'''(?!@)(?!literal )(?!delta )''') | ||
Mads Kiilerich
|
r6844 | def _get_header(vcs, diff_chunk): | ||
""" | ||||
Parses a Git diff for a single file (header and chunks) and returns a tuple with: | ||||
1. A dict with meta info: | ||||
a_path, b_path, similarity_index, rename_from, rename_to, | ||||
old_mode, new_mode, new_file_mode, deleted_file_mode, | ||||
a_blob_id, b_blob_id, b_mode, a_file, b_file | ||||
2. An iterator yielding lines with simple HTML markup. | ||||
""" | ||||
match = None | ||||
if vcs == 'git': | ||||
match = _git_header_re.match(diff_chunk) | ||||
elif vcs == 'hg': | ||||
match = _hg_header_re.match(diff_chunk) | ||||
if match is None: | ||||
raise Exception('diff not recognized as valid %s diff' % vcs) | ||||
Mads Kiilerich
|
r8079 | meta_info = {k: None if v is None else safe_str(v) for k, v in match.groupdict().items()} | ||
Mads Kiilerich
|
r6844 | rest = diff_chunk[match.end():] | ||
Mads Kiilerich
|
r7942 | if rest and _header_next_check.match(rest): | ||
Mads Kiilerich
|
r8079 | raise Exception('cannot parse %s diff header: %r followed by %r' % (vcs, safe_str(bytes(diff_chunk[:match.end()])), safe_str(bytes(rest[:1000])))) | ||
Mads Kiilerich
|
r7958 | diff_lines = (_escaper(m.group(0)) for m in re.finditer(br'.*\n|.+$', rest)) # don't split on \r as str.splitlines do | ||
Mads Kiilerich
|
r6844 | return meta_info, diff_lines | ||
Mads Kiilerich
|
r6845 | _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)') | ||
_newline_marker = re.compile(r'^\\ No newline at end of file') | ||||
def _parse_lines(diff_lines): | ||||
""" | ||||
Given an iterator of diff body lines, parse them and return a dict per | ||||
line and added/removed totals. | ||||
""" | ||||
added = deleted = 0 | ||||
old_line = old_end = new_line = new_end = None | ||||
Mads Kiilerich
|
r8032 | chunks = [] | ||
Mads Kiilerich
|
r6845 | try: | ||
Mads Kiilerich
|
r8057 | line = next(diff_lines) | ||
Mads Kiilerich
|
r6845 | |||
while True: | ||||
lines = [] | ||||
chunks.append(lines) | ||||
match = _chunk_re.match(line) | ||||
if not match: | ||||
raise Exception('error parsing diff @@ line %r' % line) | ||||
gr = match.groups() | ||||
(old_line, old_end, | ||||
new_line, new_end) = [int(x or 1) for x in gr[:-1]] | ||||
old_line -= 1 | ||||
new_line -= 1 | ||||
context = len(gr) == 5 | ||||
old_end += old_line | ||||
new_end += new_line | ||||
if context: | ||||
# skip context only if it's first line | ||||
if int(gr[0]) > 1: | ||||
lines.append({ | ||||
'old_lineno': '...', | ||||
'new_lineno': '...', | ||||
'action': 'context', | ||||
'line': line, | ||||
}) | ||||
Mads Kiilerich
|
r8057 | line = next(diff_lines) | ||
Mads Kiilerich
|
r6845 | |||
while old_line < old_end or new_line < new_end: | ||||
if not line: | ||||
raise Exception('error parsing diff - empty line at -%s+%s' % (old_line, new_line)) | ||||
affects_old = affects_new = False | ||||
command = line[0] | ||||
if command == '+': | ||||
affects_new = True | ||||
action = 'add' | ||||
added += 1 | ||||
elif command == '-': | ||||
affects_old = True | ||||
action = 'del' | ||||
deleted += 1 | ||||
elif command == ' ': | ||||
affects_old = affects_new = True | ||||
action = 'unmod' | ||||
else: | ||||
raise Exception('error parsing diff - unknown command in line %r at -%s+%s' % (line, old_line, new_line)) | ||||
if not _newline_marker.match(line): | ||||
old_line += affects_old | ||||
new_line += affects_new | ||||
lines.append({ | ||||
'old_lineno': affects_old and old_line or '', | ||||
'new_lineno': affects_new and new_line or '', | ||||
'action': action, | ||||
'line': line[1:], | ||||
}) | ||||
Mads Kiilerich
|
r8057 | line = next(diff_lines) | ||
Mads Kiilerich
|
r6845 | |||
if _newline_marker.match(line): | ||||
# we need to append to lines, since this is not | ||||
# counted in the line specs of diff | ||||
lines.append({ | ||||
'old_lineno': '...', | ||||
'new_lineno': '...', | ||||
'action': 'context', | ||||
'line': line, | ||||
}) | ||||
Mads Kiilerich
|
r8057 | line = next(diff_lines) | ||
Mads Kiilerich
|
r6845 | if old_line > old_end: | ||
raise Exception('error parsing diff - more than %s "-" lines at -%s+%s' % (old_end, old_line, new_line)) | ||||
if new_line > new_end: | ||||
raise Exception('error parsing diff - more than %s "+" lines at -%s+%s' % (new_end, old_line, new_line)) | ||||
except StopIteration: | ||||
pass | ||||
if old_line != old_end or new_line != new_end: | ||||
raise Exception('diff processing broken when old %s<>%s or new %s<>%s line %r' % (old_line, old_end, new_line, new_end, line)) | ||||
return chunks, added, deleted | ||||
Mads Kiilerich
|
r6842 | # Used for inline highlighter word split, must match the substitutions in _escaper | ||
_token_re = re.compile(r'()(&|<|>|<u>\t</u>|<u class="cr"></u>| <i></i>|\W+?)') | ||||
def _highlight_inline_diff(old, new): | ||||
""" | ||||
Highlight simple add/remove in two lines given as info dicts. They are | ||||
modified in place and given markup with <del>/<ins>. | ||||
""" | ||||
assert old['action'] == 'del' | ||||
assert new['action'] == 'add' | ||||
oldwords = _token_re.split(old['line']) | ||||
newwords = _token_re.split(new['line']) | ||||
sequence = difflib.SequenceMatcher(None, oldwords, newwords) | ||||
oldfragments, newfragments = [], [] | ||||
for tag, i1, i2, j1, j2 in sequence.get_opcodes(): | ||||
oldfrag = ''.join(oldwords[i1:i2]) | ||||
newfrag = ''.join(newwords[j1:j2]) | ||||
if tag != 'equal': | ||||
if oldfrag: | ||||
oldfrag = '<del>%s</del>' % oldfrag | ||||
if newfrag: | ||||
newfrag = '<ins>%s</ins>' % newfrag | ||||
oldfragments.append(oldfrag) | ||||
newfragments.append(newfrag) | ||||
old['line'] = "".join(oldfragments) | ||||
new['line'] = "".join(newfragments) | ||||