# -*- coding: utf-8 -*-
# Copyright (C) 2011-2019 RhodeCode GmbH
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License, version 3
# (only), as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see %s' % oldfrag
if newfrag:
newfrag = '%s' % newfrag
oldfragments.append(oldfrag)
newfragments.append(newfrag)
old['line'] = "".join(oldfragments)
new['line'] = "".join(newfragments)
def _highlight_line_udiff(self, line, next_):
"""
Highlight inline changes in both lines.
"""
start = 0
limit = min(len(line['line']), len(next_['line']))
while start < limit and line['line'][start] == next_['line'][start]:
start += 1
end = -1
limit -= start
while -end <= limit and line['line'][end] == next_['line'][end]:
end -= 1
end += 1
if start or end:
def do(l):
last = end + len(l['line'])
if l['action'] == Action.ADD:
tag = 'ins'
else:
tag = 'del'
l['line'] = '%s<%s>%s%s>%s' % (
l['line'][:start],
tag,
l['line'][start:last],
tag,
l['line'][last:]
)
do(line)
do(next_)
def _clean_line(self, line, command):
if command in ['+', '-', ' ']:
# only modify the line if it's actually a diff thing
line = line[1:]
return line
def _parse_gitdiff(self, inline_diff=True):
_files = []
diff_container = lambda arg: arg
for chunk in self._diff.chunks():
head = chunk.header
diff = imap(self._escaper, self.diff_splitter(chunk.diff))
raw_diff = chunk.raw
limited_diff = False
exceeds_limit = False
op = None
stats = {
'added': 0,
'deleted': 0,
'binary': False,
'ops': {},
}
if head['deleted_file_mode']:
op = OPS.DEL
stats['binary'] = True
stats['ops'][DEL_FILENODE] = 'deleted file'
elif head['new_file_mode']:
op = OPS.ADD
stats['binary'] = True
stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
else: # modify operation, can be copy, rename or chmod
# CHMOD
if head['new_mode'] and head['old_mode']:
op = OPS.MOD
stats['binary'] = True
stats['ops'][CHMOD_FILENODE] = (
'modified file chmod %s => %s' % (
head['old_mode'], head['new_mode']))
# RENAME
if head['rename_from'] != head['rename_to']:
op = OPS.MOD
stats['binary'] = True
stats['ops'][RENAMED_FILENODE] = (
'file renamed from %s to %s' % (
head['rename_from'], head['rename_to']))
# COPY
if head.get('copy_from') and head.get('copy_to'):
op = OPS.MOD
stats['binary'] = True
stats['ops'][COPIED_FILENODE] = (
'file copied from %s to %s' % (
head['copy_from'], head['copy_to']))
# If our new parsed headers didn't match anything fallback to
# old style detection
if op is None:
if not head['a_file'] and head['b_file']:
op = OPS.ADD
stats['binary'] = True
stats['ops'][NEW_FILENODE] = 'new file'
elif head['a_file'] and not head['b_file']:
op = OPS.DEL
stats['binary'] = True
stats['ops'][DEL_FILENODE] = 'deleted file'
# it's not ADD not DELETE
if op is None:
op = OPS.MOD
stats['binary'] = True
stats['ops'][MOD_FILENODE] = 'modified file'
# a real non-binary diff
if head['a_file'] or head['b_file']:
try:
raw_diff, chunks, _stats = self._parse_lines(diff)
stats['binary'] = False
stats['added'] = _stats[0]
stats['deleted'] = _stats[1]
# explicit mark that it's a modified file
if op == OPS.MOD:
stats['ops'][MOD_FILENODE] = 'modified file'
exceeds_limit = len(raw_diff) > self.file_limit
# changed from _escaper function so we validate size of
# each file instead of the whole diff
# diff will hide big files but still show small ones
# from my tests, big files are fairly safe to be parsed
# but the browser is the bottleneck
if not self.show_full_diff and exceeds_limit:
raise DiffLimitExceeded('File Limit Exceeded')
except DiffLimitExceeded:
diff_container = lambda _diff: \
LimitedDiffContainer(
self.diff_limit, self.cur_diff_size, _diff)
exceeds_limit = len(raw_diff) > self.file_limit
limited_diff = True
chunks = []
else: # GIT format binary patch, or possibly empty diff
if head['bin_patch']:
# we have operation already extracted, but we mark simply
# it's a diff we wont show for binary files
stats['ops'][BIN_FILENODE] = 'binary diff hidden'
chunks = []
if chunks and not self.show_full_diff and op == OPS.DEL:
# if not full diff mode show deleted file contents
# TODO: anderson: if the view is not too big, there is no way
# to see the content of the file
chunks = []
chunks.insert(0, [{
'old_lineno': '',
'new_lineno': '',
'action': Action.CONTEXT,
'line': msg,
} for _op, msg in stats['ops'].iteritems()
if _op not in [MOD_FILENODE]])
_files.append({
'filename': safe_unicode(head['b_path']),
'old_revision': head['a_blob_id'],
'new_revision': head['b_blob_id'],
'chunks': chunks,
'raw_diff': safe_unicode(raw_diff),
'operation': op,
'stats': stats,
'exceeds_limit': exceeds_limit,
'is_limited_diff': limited_diff,
})
sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
OPS.DEL: 2}.get(info['operation'])
if not inline_diff:
return diff_container(sorted(_files, key=sorter))
# highlight inline changes
for diff_data in _files:
for chunk in diff_data['chunks']:
lineiter = iter(chunk)
try:
while 1:
line = lineiter.next()
if line['action'] not in (
Action.UNMODIFIED, Action.CONTEXT):
nextline = lineiter.next()
if nextline['action'] in ['unmod', 'context'] or \
nextline['action'] == line['action']:
continue
self.differ(line, nextline)
except StopIteration:
pass
return diff_container(sorted(_files, key=sorter))
def _check_large_diff(self):
log.debug('Diff exceeds current diff_limit of %s', self.diff_limit)
if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
raise DiffLimitExceeded('Diff Limit `%s` Exceeded', self.diff_limit)
# FIXME: NEWDIFFS: dan: this replaces _parse_gitdiff
def _new_parse_gitdiff(self, inline_diff=True):
_files = []
# this can be overriden later to a LimitedDiffContainer type
diff_container = lambda arg: arg
for chunk in self._diff.chunks():
head = chunk.header
log.debug('parsing diff %r', head)
raw_diff = chunk.raw
limited_diff = False
exceeds_limit = False
op = None
stats = {
'added': 0,
'deleted': 0,
'binary': False,
'old_mode': None,
'new_mode': None,
'ops': {},
}
if head['old_mode']:
stats['old_mode'] = head['old_mode']
if head['new_mode']:
stats['new_mode'] = head['new_mode']
if head['b_mode']:
stats['new_mode'] = head['b_mode']
# delete file
if head['deleted_file_mode']:
op = OPS.DEL
stats['binary'] = True
stats['ops'][DEL_FILENODE] = 'deleted file'
# new file
elif head['new_file_mode']:
op = OPS.ADD
stats['binary'] = True
stats['old_mode'] = None
stats['new_mode'] = head['new_file_mode']
stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
# modify operation, can be copy, rename or chmod
else:
# CHMOD
if head['new_mode'] and head['old_mode']:
op = OPS.MOD
stats['binary'] = True
stats['ops'][CHMOD_FILENODE] = (
'modified file chmod %s => %s' % (
head['old_mode'], head['new_mode']))
# RENAME
if head['rename_from'] != head['rename_to']:
op = OPS.MOD
stats['binary'] = True
stats['renamed'] = (head['rename_from'], head['rename_to'])
stats['ops'][RENAMED_FILENODE] = (
'file renamed from %s to %s' % (
head['rename_from'], head['rename_to']))
# COPY
if head.get('copy_from') and head.get('copy_to'):
op = OPS.MOD
stats['binary'] = True
stats['copied'] = (head['copy_from'], head['copy_to'])
stats['ops'][COPIED_FILENODE] = (
'file copied from %s to %s' % (
head['copy_from'], head['copy_to']))
# If our new parsed headers didn't match anything fallback to
# old style detection
if op is None:
if not head['a_file'] and head['b_file']:
op = OPS.ADD
stats['binary'] = True
stats['new_file'] = True
stats['ops'][NEW_FILENODE] = 'new file'
elif head['a_file'] and not head['b_file']:
op = OPS.DEL
stats['binary'] = True
stats['ops'][DEL_FILENODE] = 'deleted file'
# it's not ADD not DELETE
if op is None:
op = OPS.MOD
stats['binary'] = True
stats['ops'][MOD_FILENODE] = 'modified file'
# a real non-binary diff
if head['a_file'] or head['b_file']:
# simulate splitlines, so we keep the line end part
diff = self.diff_splitter(chunk.diff)
# append each file to the diff size
raw_chunk_size = len(raw_diff)
exceeds_limit = raw_chunk_size > self.file_limit
self.cur_diff_size += raw_chunk_size
try:
# Check each file instead of the whole diff.
# Diff will hide big files but still show small ones.
# From the tests big files are fairly safe to be parsed
# but the browser is the bottleneck.
if not self.show_full_diff and exceeds_limit:
log.debug('File `%s` exceeds current file_limit of %s',
safe_unicode(head['b_path']), self.file_limit)
raise DiffLimitExceeded(
'File Limit %s Exceeded', self.file_limit)
self._check_large_diff()
raw_diff, chunks, _stats = self._new_parse_lines(diff)
stats['binary'] = False
stats['added'] = _stats[0]
stats['deleted'] = _stats[1]
# explicit mark that it's a modified file
if op == OPS.MOD:
stats['ops'][MOD_FILENODE] = 'modified file'
except DiffLimitExceeded:
diff_container = lambda _diff: \
LimitedDiffContainer(
self.diff_limit, self.cur_diff_size, _diff)
limited_diff = True
chunks = []
else: # GIT format binary patch, or possibly empty diff
if head['bin_patch']:
# we have operation already extracted, but we mark simply
# it's a diff we wont show for binary files
stats['ops'][BIN_FILENODE] = 'binary diff hidden'
chunks = []
# Hide content of deleted node by setting empty chunks
if chunks and not self.show_full_diff and op == OPS.DEL:
# if not full diff mode show deleted file contents
# TODO: anderson: if the view is not too big, there is no way
# to see the content of the file
chunks = []
chunks.insert(
0, [{'old_lineno': '',
'new_lineno': '',
'action': Action.CONTEXT,
'line': msg,
} for _op, msg in stats['ops'].iteritems()
if _op not in [MOD_FILENODE]])
original_filename = safe_unicode(head['a_path'])
_files.append({
'original_filename': original_filename,
'filename': safe_unicode(head['b_path']),
'old_revision': head['a_blob_id'],
'new_revision': head['b_blob_id'],
'chunks': chunks,
'raw_diff': safe_unicode(raw_diff),
'operation': op,
'stats': stats,
'exceeds_limit': exceeds_limit,
'is_limited_diff': limited_diff,
})
sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
OPS.DEL: 2}.get(info['operation'])
return diff_container(sorted(_files, key=sorter))
# FIXME: NEWDIFFS: dan: this gets replaced by _new_parse_lines
def _parse_lines(self, diff_iter):
"""
Parse the diff an return data for the template.
"""
stats = [0, 0]
chunks = []
raw_diff = []
try:
line = diff_iter.next()
while line:
raw_diff.append(line)
lines = []
chunks.append(lines)
match = self._chunk_re.match(line)
if not match:
break
gr = match.groups()
(old_line, old_end,
new_line, new_end) = [int(x or 1) for x in gr[:-1]]
old_line -= 1
new_line -= 1
context = len(gr) == 5
old_end += old_line
new_end += new_line
if context:
# skip context only if it's first line
if int(gr[0]) > 1:
lines.append({
'old_lineno': '...',
'new_lineno': '...',
'action': Action.CONTEXT,
'line': line,
})
line = diff_iter.next()
while old_line < old_end or new_line < new_end:
command = ' '
if line:
command = line[0]
affects_old = affects_new = False
# ignore those if we don't expect them
if command in '#@':
continue
elif command == '+':
affects_new = True
action = Action.ADD
stats[0] += 1
elif command == '-':
affects_old = True
action = Action.DELETE
stats[1] += 1
else:
affects_old = affects_new = True
action = Action.UNMODIFIED
if not self._newline_marker.match(line):
old_line += affects_old
new_line += affects_new
lines.append({
'old_lineno': affects_old and old_line or '',
'new_lineno': affects_new and new_line or '',
'action': action,
'line': self._clean_line(line, command)
})
raw_diff.append(line)
line = diff_iter.next()
if self._newline_marker.match(line):
# we need to append to lines, since this is not
# counted in the line specs of diff
lines.append({
'old_lineno': '...',
'new_lineno': '...',
'action': Action.CONTEXT,
'line': self._clean_line(line, command)
})
except StopIteration:
pass
return ''.join(raw_diff), chunks, stats
# FIXME: NEWDIFFS: dan: this replaces _parse_lines
def _new_parse_lines(self, diff_iter):
"""
Parse the diff an return data for the template.
"""
stats = [0, 0]
chunks = []
raw_diff = []
try:
line = diff_iter.next()
while line:
raw_diff.append(line)
# match header e.g @@ -0,0 +1 @@\n'
match = self._chunk_re.match(line)
if not match:
break
gr = match.groups()
(old_line, old_end,
new_line, new_end) = [int(x or 1) for x in gr[:-1]]
lines = []
hunk = {
'section_header': gr[-1],
'source_start': old_line,
'source_length': old_end,
'target_start': new_line,
'target_length': new_end,
'lines': lines,
}
chunks.append(hunk)
old_line -= 1
new_line -= 1
context = len(gr) == 5
old_end += old_line
new_end += new_line
line = diff_iter.next()
while old_line < old_end or new_line < new_end:
command = ' '
if line:
command = line[0]
affects_old = affects_new = False
# ignore those if we don't expect them
if command in '#@':
continue
elif command == '+':
affects_new = True
action = Action.ADD
stats[0] += 1
elif command == '-':
affects_old = True
action = Action.DELETE
stats[1] += 1
else:
affects_old = affects_new = True
action = Action.UNMODIFIED
if not self._newline_marker.match(line):
old_line += affects_old
new_line += affects_new
lines.append({
'old_lineno': affects_old and old_line or '',
'new_lineno': affects_new and new_line or '',
'action': action,
'line': self._clean_line(line, command)
})
raw_diff.append(line)
line = diff_iter.next()
if self._newline_marker.match(line):
# we need to append to lines, since this is not
# counted in the line specs of diff
if affects_old:
action = Action.OLD_NO_NL
elif affects_new:
action = Action.NEW_NO_NL
else:
raise Exception('invalid context for no newline')
lines.append({
'old_lineno': None,
'new_lineno': None,
'action': action,
'line': self._clean_line(line, command)
})
except StopIteration:
pass
return ''.join(raw_diff), chunks, stats
def _safe_id(self, idstring):
"""Make a string safe for including in an id attribute.
The HTML spec says that id attributes 'must begin with
a letter ([A-Za-z]) and may be followed by any number
of letters, digits ([0-9]), hyphens ("-"), underscores
("_"), colons (":"), and periods (".")'. These regexps
are slightly over-zealous, in that they remove colons
and periods unnecessarily.
Whitespace is transformed into underscores, and then
anything which is not a hyphen or a character that
matches \w (alphanumerics and underscore) is removed.
"""
# Transform all whitespace to underscore
idstring = re.sub(r'\s', "_", '%s' % idstring)
# Remove everything that is not a hyphen or a member of \w
idstring = re.sub(r'(?!-)\W', "", idstring).lower()
return idstring
@classmethod
def diff_splitter(cls, string):
"""
Diff split that emulates .splitlines() but works only on \n
"""
if not string:
return
elif string == '\n':
yield u'\n'
else:
has_newline = string.endswith('\n')
elements = string.split('\n')
if has_newline:
# skip last element as it's empty string from newlines
elements = elements[:-1]
len_elements = len(elements)
for cnt, line in enumerate(elements, start=1):
last_line = cnt == len_elements
if last_line and not has_newline:
yield safe_unicode(line)
else:
yield safe_unicode(line) + '\n'
def prepare(self, inline_diff=True):
"""
Prepare the passed udiff for HTML rendering.
:return: A list of dicts with diff information.
"""
parsed = self._parser(inline_diff=inline_diff)
self.parsed = True
self.parsed_diff = parsed
return parsed
def as_raw(self, diff_lines=None):
"""
Returns raw diff as a byte string
"""
return self._diff.raw
def as_html(self, table_class='code-difftable', line_class='line',
old_lineno_class='lineno old', new_lineno_class='lineno new',
code_class='code', enable_comments=False, parsed_lines=None):
"""
Return given diff as html table with customized css classes
"""
# TODO(marcink): not sure how to pass in translator
# here in an efficient way, leave the _ for proper gettext extraction
_ = lambda s: s
def _link_to_if(condition, label, url):
"""
Generates a link if condition is meet or just the label if not.
"""
if condition:
return '''%(label)s''' % {
'title': _('Click to select line'),
'url': url,
'label': label
}
else:
return label
if not self.parsed:
self.prepare()
diff_lines = self.parsed_diff
if parsed_lines:
diff_lines = parsed_lines
_html_empty = True
_html = []
_html.append('''