##// END OF EJS Templates
fix(encoding for file): fixed support of non utf-8 files in all backends
fix(encoding for file): fixed support of non utf-8 files in all backends

File last commit:

r5647:8333bc7b default
r5647:8333bc7b default
Show More
codeblocks.py
819 lines | 31.3 KiB | text/x-python | PythonLexer
core: updated copyright to 2024
r5608 # Copyright (C) 2011-2024 RhodeCode GmbH
dan
annotations: replace annotated source code viewer with renderer...
r986 #
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License, version 3
# (only), as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# This program is dual-licensed. If you wish to learn more about the
# RhodeCode Enterprise Edition, including its added features, Support services,
# and proprietary license terms, please see https://rhodecode.com/licenses/
dan
codeblocks: add new code token rendering function that...
r1025 import logging
dan
diffs: replace compare controller with new html based diffs:...
r1030 import difflib
libs: major refactor for python3
r5085 import itertools
dan
annotations: replace annotated source code viewer with renderer...
r986
from pygments import lex
dan
codeblocks: add new code token rendering function that...
r1025 from pygments.formatters.html import _get_ttype_class as pygment_token_class
diffs: in case of text lexers don't do any HL because of pygments newline...
r2546 from pygments.lexers.special import TextLexer, Token
diffs: don't use highlite on the new ops lines
r3082 from pygments.lexers import get_lexer_by_name
diffs: in case of text lexers don't do any HL because of pygments newline...
r2546
dan
diffs: replace compare controller with new html based diffs:...
r1030 from rhodecode.lib.helpers import (
diffs: use custom lexer extraction in diffs to so it behaves consistently with...
r1591 get_lexer_for_filenode, html_escape, get_custom_lexer)
libs: major refactor for python3
r5085 from rhodecode.lib.str_utils import safe_str
from rhodecode.lib.utils2 import AttributeDict, StrictAttributeDict
dan
codeblocks: add new code token rendering function that...
r1025 from rhodecode.lib.vcs.nodes import FileNode
libs: major refactor for python3
r5085 from rhodecode.lib.vcs.exceptions import NodeDoesNotExistError
dan
diffs: replace compare controller with new html based diffs:...
r1030 from rhodecode.lib.diff_match_patch import diff_match_patch
comments: allow commenting on empty files without content.
r3081 from rhodecode.lib.diffs import LimitedDiffContainer, DEL_FILENODE, BIN_FILENODE
diffs: don't use highlite on the new ops lines
r3082
dan
codeblocks: add new code token rendering function that...
r1025
plain_text_lexer = get_lexer_by_name(
'text', stripall=False, stripnl=False, ensurenl=False)
logging: don't use root logger on codeblocks
r2586 log = logging.getLogger(__name__)
dan
annotations: replace annotated source code viewer with renderer...
r986
dan
codeblocks: add new code token rendering function that...
r1025 def filenode_as_lines_tokens(filenode, lexer=None):
diffs: optimize how lexer is fetche for rich highlight mode....
r1356 org_lexer = lexer
dan
codeblocks: add new code token rendering function that...
r1025 lexer = lexer or get_lexer_for_filenode(filenode)
libs: major refactor for python3
r5085 log.debug('Generating file node pygment tokens for %s, file=`%s`, org_lexer:%s',
diffs: optimize how lexer is fetche for rich highlight mode....
r1356 lexer, filenode, org_lexer)
libs: major refactor for python3
r5085 content = filenode.str_content
diffs: fixed case of bogus files diff rendering...
r3444 tokens = tokenize_string(content, lexer)
lines = split_token_stream(tokens, content)
dan
codeblocks: add new code token rendering function that...
r1025 rv = list(lines)
return rv
def tokenize_string(content, lexer):
dan
annotations: replace annotated source code viewer with renderer...
r986 """
Use pygments to tokenize some content based on a lexer
ensuring all original new lines and whitespace is preserved
"""
lexer.stripall = False
lexer.stripnl = False
lexer.ensurenl = False
diffs: in case of text lexers don't do any HL because of pygments newline...
r2546
libs: major refactor for python3
r5085 # pygments needs to operate on str
str_content = safe_str(content)
diffs: in case of text lexers don't do any HL because of pygments newline...
r2546 if isinstance(lexer, TextLexer):
libs: major refactor for python3
r5085 # we convert content here to STR because pygments does that while tokenizing
# if we DON'T get a lexer for unknown file type
lexed = [(Token.Text, str_content)]
diffs: in case of text lexers don't do any HL because of pygments newline...
r2546 else:
libs: major refactor for python3
r5085 lexed = lex(str_content, lexer)
diffs: in case of text lexers don't do any HL because of pygments newline...
r2546
for token_type, token_text in lexed:
dan
codeblocks: add new code token rendering function that...
r1025 yield pygment_token_class(token_type), token_text
dan
annotations: replace annotated source code viewer with renderer...
r986
diffs: fixed case of bogus files diff rendering...
r3444 def split_token_stream(tokens, content):
dan
annotations: replace annotated source code viewer with renderer...
r986 """
Take a list of (TokenType, text) tuples and split them by a string
diffs: in case of text lexers don't do any HL because of pygments newline...
r2546 split_token_stream([(TEXT, 'some\ntext'), (TEXT, 'more\n')])
dan
codeblocks: add new code token rendering function that...
r1025 [(TEXT, 'some'), (TEXT, 'text'),
(TEXT, 'more'), (TEXT, 'text')]
dan
annotations: replace annotated source code viewer with renderer...
r986 """
diffs: fixed case of bogus files diff rendering...
r3444 token_buffer = []
libs: major refactor for python3
r5085
dan
codeblocks: add new code token rendering function that...
r1025 for token_class, token_text in tokens:
libs: major refactor for python3
r5085
# token_text, should be str
diffs: in case of text lexers don't do any HL because of pygments newline...
r2546 parts = token_text.split('\n')
dan
annotations: replace annotated source code viewer with renderer...
r986 for part in parts[:-1]:
diffs: fixed case of bogus files diff rendering...
r3444 token_buffer.append((token_class, part))
yield token_buffer
token_buffer = []
token_buffer.append((token_class, parts[-1]))
dan
annotations: replace annotated source code viewer with renderer...
r986
diffs: fixed case of bogus files diff rendering...
r3444 if token_buffer:
yield token_buffer
elif content:
# this is a special case, we have the content, but tokenization didn't produce
libs: major refactor for python3
r5085 # any results. This can happen if know file extensions like .css have some bogus
diffs: fixed case of bogus files diff rendering...
r3444 # unicode content without any newline characters
yield [(pygment_token_class(Token.Text), content)]
dan
annotations: replace annotated source code viewer with renderer...
r986
def filenode_as_annotated_lines_tokens(filenode):
"""
Take a file node and return a list of annotations => lines, if no annotation
is found, it will be None.
eg:
[
(annotation1, [
(1, line1_tokens_list),
(2, line2_tokens_list),
]),
(annotation2, [
(3, line1_tokens_list),
]),
(None, [
(4, line1_tokens_list),
]),
(annotation1, [
(5, line1_tokens_list),
(6, line2_tokens_list),
])
]
"""
annotations: fixed UI problems in annotation view for newer browsers.
r1412 commit_cache = {} # cache commit_getter lookups
dan
annotations: replace annotated source code viewer with renderer...
r986
def _get_annotation(commit_id, commit_getter):
if commit_id not in commit_cache:
commit_cache[commit_id] = commit_getter()
return commit_cache[commit_id]
annotation_lookup = {
line_no: _get_annotation(commit_id, commit_getter)
for line_no, commit_id, commit_getter, line_content
in filenode.annotate
}
annotations_lines = ((annotation_lookup.get(line_no), line_no, tokens)
for line_no, tokens
dan
codeblocks: add new code token rendering function that...
r1025 in enumerate(filenode_as_lines_tokens(filenode), 1))
dan
annotations: replace annotated source code viewer with renderer...
r986
libs: major refactor for python3
r5085 grouped_annotations_lines = itertools.groupby(annotations_lines, lambda x: x[0])
dan
annotations: replace annotated source code viewer with renderer...
r986
for annotation, group in grouped_annotations_lines:
yield (
annotation, [(line_no, tokens)
for (_, line_no, tokens) in group]
)
dan
codeblocks: add new code token rendering function that...
r1025
def render_tokenstream(tokenstream):
result = []
for token_class, token_ops_texts in rollup_tokenstream(tokenstream):
if token_class:
libs: major refactor for python3
r5085 result.append(f'<span class="{token_class}">')
dan
codeblocks: add new code token rendering function that...
r1025 else:
python3: fixed various code issues...
r4973 result.append('<span>')
dan
codeblocks: add new code token rendering function that...
r1025
for op_tag, token_text in token_ops_texts:
if op_tag:
libs: major refactor for python3
r5085 result.append(f'<{op_tag}>')
dan
codeblocks: add new code token rendering function that...
r1025
dan
diffs: handle very odd case of binary, corrupted diffs which crashed the diff parser.
r3831 # NOTE(marcink): in some cases of mixed encodings, we might run into
# troubles in the html_escape, in this case we say unicode force on token_text
# that would ensure "correct" data even with the cost of rendered
try:
escaped_text = html_escape(token_text)
except TypeError:
libs: major refactor for python3
r5085 escaped_text = html_escape(safe_str(token_text))
dan
diffs: replace compare controller with new html based diffs:...
r1030
# TODO: dan: investigate showing hidden characters like space/nl/tab
# escaped_text = escaped_text.replace(' ', '<sp> </sp>')
# escaped_text = escaped_text.replace('\n', '<nl>\n</nl>')
# escaped_text = escaped_text.replace('\t', '<tab>\t</tab>')
dan
codeblocks: add new code token rendering function that...
r1025
result.append(escaped_text)
if op_tag:
libs: major refactor for python3
r5085 result.append(f'</{op_tag}>')
dan
codeblocks: add new code token rendering function that...
r1025
python3: fixed various code issues...
r4973 result.append('</span>')
dan
codeblocks: add new code token rendering function that...
r1025
html = ''.join(result)
return html
def rollup_tokenstream(tokenstream):
"""
Group a token stream of the format:
('class', 'op', 'text')
or
('class', 'text')
into
[('class1',
[('op1', 'text'),
('op2', 'text')]),
('class2',
[('op3', 'text')])]
This is used to get the minimal tags necessary when
rendering to html eg for a token stream ie.
<span class="A"><ins>he</ins>llo</span>
vs
<span class="A"><ins>he</ins></span><span class="A">llo</span>
If a 2 tuple is passed in, the output op will be an empty string.
eg:
>>> rollup_tokenstream([('classA', '', 'h'),
('classA', 'del', 'ell'),
('classA', '', 'o'),
('classB', '', ' '),
('classA', '', 'the'),
('classA', '', 're'),
])
[('classA', [('', 'h'), ('del', 'ell'), ('', 'o')],
('classB', [('', ' ')],
('classA', [('', 'there')]]
"""
if tokenstream and len(tokenstream[0]) == 2:
tokenstream = ((t[0], '', t[1]) for t in tokenstream)
result = []
libs: major refactor for python3
r5085 for token_class, op_list in itertools.groupby(tokenstream, lambda t: t[0]):
dan
codeblocks: add new code token rendering function that...
r1025 ops = []
libs: major refactor for python3
r5085 for token_op, token_text_list in itertools.groupby(op_list, lambda o: o[1]):
dan
codeblocks: add new code token rendering function that...
r1025 text_buffer = []
for t_class, t_op, t_text in token_text_list:
text_buffer.append(t_text)
libs: major refactor for python3
r5085
dan
codeblocks: add new code token rendering function that...
r1025 ops.append((token_op, ''.join(text_buffer)))
result.append((token_class, ops))
return result
dan
diffs: replace compare controller with new html based diffs:...
r1030
def tokens_diff(old_tokens, new_tokens, use_diff_match_patch=True):
"""
Converts a list of (token_class, token_text) tuples to a list of
(token_class, token_op, token_text) tuples where token_op is one of
('ins', 'del', '')
:param old_tokens: list of (token_class, token_text) tuples of old line
:param new_tokens: list of (token_class, token_text) tuples of new line
:param use_diff_match_patch: boolean, will use google's diff match patch
library which has options to 'smooth' out the character by character
differences making nicer ins/del blocks
"""
old_tokens_result = []
new_tokens_result = []
libs: major refactor for python3
r5085 def int_convert(val):
if isinstance(val, int):
return str(val)
return val
similarity = difflib.SequenceMatcher(
None,
dan
diffs: replace compare controller with new html based diffs:...
r1030 ''.join(token_text for token_class, token_text in old_tokens),
''.join(token_text for token_class, token_text in new_tokens)
).ratio()
libs: major refactor for python3
r5085 if similarity < 0.6: # return, the blocks are too different
dan
diffs: replace compare controller with new html based diffs:...
r1030 for token_class, token_text in old_tokens:
old_tokens_result.append((token_class, '', token_text))
for token_class, token_text in new_tokens:
new_tokens_result.append((token_class, '', token_text))
return old_tokens_result, new_tokens_result, similarity
libs: major refactor for python3
r5085 token_sequence_matcher = difflib.SequenceMatcher(
None,
dan
diffs: replace compare controller with new html based diffs:...
r1030 [x[1] for x in old_tokens],
[x[1] for x in new_tokens])
for tag, o1, o2, n1, n2 in token_sequence_matcher.get_opcodes():
libs: major refactor for python3
r5085 # check the differences by token block types first to give a
dan
diffs: replace compare controller with new html based diffs:...
r1030 # nicer "block" level replacement vs character diffs
if tag == 'equal':
for token_class, token_text in old_tokens[o1:o2]:
old_tokens_result.append((token_class, '', token_text))
for token_class, token_text in new_tokens[n1:n2]:
new_tokens_result.append((token_class, '', token_text))
elif tag == 'delete':
for token_class, token_text in old_tokens[o1:o2]:
libs: major refactor for python3
r5085 old_tokens_result.append((token_class, 'del', int_convert(token_text)))
dan
diffs: replace compare controller with new html based diffs:...
r1030 elif tag == 'insert':
for token_class, token_text in new_tokens[n1:n2]:
libs: major refactor for python3
r5085 new_tokens_result.append((token_class, 'ins', int_convert(token_text)))
dan
diffs: replace compare controller with new html based diffs:...
r1030 elif tag == 'replace':
# if same type token blocks must be replaced, do a diff on the
# characters in the token blocks to show individual changes
old_char_tokens = []
new_char_tokens = []
for token_class, token_text in old_tokens[o1:o2]:
libs: major refactor for python3
r5085 for char in map(lambda i: i, token_text):
dan
diffs: replace compare controller with new html based diffs:...
r1030 old_char_tokens.append((token_class, char))
for token_class, token_text in new_tokens[n1:n2]:
libs: major refactor for python3
r5085 for char in map(lambda i: i, token_text):
dan
diffs: replace compare controller with new html based diffs:...
r1030 new_char_tokens.append((token_class, char))
old_string = ''.join([token_text for
libs: major refactor for python3
r5085 token_class, token_text in old_char_tokens])
dan
diffs: replace compare controller with new html based diffs:...
r1030 new_string = ''.join([token_text for
libs: major refactor for python3
r5085 token_class, token_text in new_char_tokens])
dan
diffs: replace compare controller with new html based diffs:...
r1030
char_sequence = difflib.SequenceMatcher(
None, old_string, new_string)
copcodes = char_sequence.get_opcodes()
obuffer, nbuffer = [], []
if use_diff_match_patch:
dmp = diff_match_patch()
diffs: don't use highlite on the new ops lines
r3082 dmp.Diff_EditCost = 11 # TODO: dan: extract this to a setting
dan
diffs: replace compare controller with new html based diffs:...
r1030 reps = dmp.diff_main(old_string, new_string)
dmp.diff_cleanupEfficiency(reps)
a, b = 0, 0
for op, rep in reps:
l = len(rep)
if op == 0:
for i, c in enumerate(rep):
obuffer.append((old_char_tokens[a+i][0], '', c))
nbuffer.append((new_char_tokens[b+i][0], '', c))
a += l
b += l
elif op == -1:
for i, c in enumerate(rep):
libs: major refactor for python3
r5085 obuffer.append((old_char_tokens[a+i][0], 'del', int_convert(c)))
dan
diffs: replace compare controller with new html based diffs:...
r1030 a += l
elif op == 1:
for i, c in enumerate(rep):
libs: major refactor for python3
r5085 nbuffer.append((new_char_tokens[b+i][0], 'ins', int_convert(c)))
dan
diffs: replace compare controller with new html based diffs:...
r1030 b += l
else:
for ctag, co1, co2, cn1, cn2 in copcodes:
if ctag == 'equal':
for token_class, token_text in old_char_tokens[co1:co2]:
obuffer.append((token_class, '', token_text))
for token_class, token_text in new_char_tokens[cn1:cn2]:
nbuffer.append((token_class, '', token_text))
elif ctag == 'delete':
for token_class, token_text in old_char_tokens[co1:co2]:
libs: major refactor for python3
r5085 obuffer.append((token_class, 'del', int_convert(token_text)))
dan
diffs: replace compare controller with new html based diffs:...
r1030 elif ctag == 'insert':
for token_class, token_text in new_char_tokens[cn1:cn2]:
libs: major refactor for python3
r5085 nbuffer.append((token_class, 'ins', int_convert(token_text)))
dan
diffs: replace compare controller with new html based diffs:...
r1030 elif ctag == 'replace':
for token_class, token_text in old_char_tokens[co1:co2]:
libs: major refactor for python3
r5085 obuffer.append((token_class, 'del', int_convert(token_text)))
dan
diffs: replace compare controller with new html based diffs:...
r1030 for token_class, token_text in new_char_tokens[cn1:cn2]:
libs: major refactor for python3
r5085 nbuffer.append((token_class, 'ins', int_convert(token_text)))
dan
diffs: replace compare controller with new html based diffs:...
r1030
old_tokens_result.extend(obuffer)
new_tokens_result.extend(nbuffer)
return old_tokens_result, new_tokens_result, similarity
Bartłomiej Wołyńczyk
caching: add option to cache diffs for commits and pull requests....
r2685 def diffset_node_getter(commit):
libs: major refactor for python3
r5085 def get_diff_node(file_name):
Bartłomiej Wołyńczyk
caching: add option to cache diffs for commits and pull requests....
r2685 try:
libs: major refactor for python3
r5085 return commit.get_node(file_name, pre_load=['size', 'flags', 'data'])
Bartłomiej Wołyńczyk
caching: add option to cache diffs for commits and pull requests....
r2685 except NodeDoesNotExistError:
return None
libs: major refactor for python3
r5085 return get_diff_node
Bartłomiej Wołyńczyk
caching: add option to cache diffs for commits and pull requests....
r2685
dan
diffs: replace compare controller with new html based diffs:...
r1030 class DiffSet(object):
"""
An object for parsing the diff result from diffs.DiffProcessor and
adding highlighting, side by side/unified renderings and line diffs
"""
comments: ensure we ALWAYS display unmatched comments.
r3080 HL_REAL = 'REAL' # highlights using original file, slow
HL_FAST = 'FAST' # highlights using just the line, fast but not correct
# in the case of multiline code
HL_NONE = 'NONE' # no highlighting, fastest
dan
diffs: replace compare controller with new html based diffs:...
r1030
dan
diffs: add repo_name as parameter of diffset - fixes bug...
r1142 def __init__(self, highlight_mode=HL_REAL, repo_name=None,
diffs: fixed other file source when using pull requests. It must use...
r1194 source_repo_name=None,
dan
diffs: replace compare controller with new html based diffs:...
r1030 source_node_getter=lambda filename: None,
compare: fixed case of cross repo compare before links not working.
r3146 target_repo_name=None,
dan
diffs: replace compare controller with new html based diffs:...
r1030 target_node_getter=lambda filename: None,
source_nodes=None, target_nodes=None,
comments: ensure we ALWAYS display unmatched comments.
r3080 # files over this size will use fast highlighting
max_file_size_limit=150 * 1024,
dan
diffs: replace compare controller with new html based diffs:...
r1030 ):
self.highlight_mode = highlight_mode
diffs: fixed problem with potential diff display.
r4592 self.highlighted_filenodes = {
'before': {},
'after': {}
}
dan
diffs: replace compare controller with new html based diffs:...
r1030 self.source_node_getter = source_node_getter
self.target_node_getter = target_node_getter
self.source_nodes = source_nodes or {}
self.target_nodes = target_nodes or {}
dan
diffs: add repo_name as parameter of diffset - fixes bug...
r1142 self.repo_name = repo_name
compare: fixed case of cross repo compare before links not working.
r3146 self.target_repo_name = target_repo_name or repo_name
diffs: fixed other file source when using pull requests. It must use...
r1194 self.source_repo_name = source_repo_name or repo_name
dan
diffs: replace compare controller with new html based diffs:...
r1030 self.max_file_size_limit = max_file_size_limit
def render_patchset(self, patchset, source_ref=None, target_ref=None):
diffset = AttributeDict(dict(
lines_added=0,
lines_deleted=0,
changed_files=0,
files=[],
pr-versioning: implemented versioning for pull requests....
r1368 file_stats={},
dan
diffs: replace compare controller with new html based diffs:...
r1030 limited_diff=isinstance(patchset, LimitedDiffContainer),
dan
diffs: add repo_name as parameter of diffset - fixes bug...
r1142 repo_name=self.repo_name,
compare: fixed case of cross repo compare before links not working.
r3146 target_repo_name=self.target_repo_name,
diffs: fixed other file source when using pull requests. It must use...
r1194 source_repo_name=self.source_repo_name,
dan
diffs: replace compare controller with new html based diffs:...
r1030 source_ref=source_ref,
target_ref=target_ref,
))
for patch in patchset:
pr-versioning: implemented versioning for pull requests....
r1368 diffset.file_stats[patch['filename']] = patch['stats']
dan
diffs: replace compare controller with new html based diffs:...
r1030 filediff = self.render_patch(patch)
diffs: don't use recurred diffset attachment in diffs. This makes this structure much harder to...
r2682 filediff.diffset = StrictAttributeDict(dict(
source_ref=diffset.source_ref,
target_ref=diffset.target_ref,
repo_name=diffset.repo_name,
source_repo_name=diffset.source_repo_name,
compare: fixed case of cross repo compare before links not working.
r3146 target_repo_name=diffset.target_repo_name,
diffs: don't use recurred diffset attachment in diffs. This makes this structure much harder to...
r2682 ))
dan
diffs: replace compare controller with new html based diffs:...
r1030 diffset.files.append(filediff)
diffset.changed_files += 1
if not patch['stats']['binary']:
diffset.lines_added += patch['stats']['added']
diffset.lines_deleted += patch['stats']['deleted']
return diffset
_lexer_cache = {}
comments: place the left over comments (outdated/misplaced) to the left or right pane....
r2249
diffs: optimize how lexer is fetche for rich highlight mode....
r1356 def _get_lexer_for_filename(self, filename, filenode=None):
dan
diffs: replace compare controller with new html based diffs:...
r1030 # cached because we might need to call it twice for source/target
if filename not in self._lexer_cache:
diffs: optimize how lexer is fetche for rich highlight mode....
r1356 if filenode:
lexer = filenode.lexer
diffs: use custom lexer extraction in diffs to so it behaves consistently with...
r1591 extension = filenode.extension
diffs: optimize how lexer is fetche for rich highlight mode....
r1356 else:
diffs: switched lexer extraction to use single function in all places.
r1358 lexer = FileNode.get_lexer(filename=filename)
diffs: use custom lexer extraction in diffs to so it behaves consistently with...
r1591 extension = filename.split('.')[-1]
lexer = get_custom_lexer(extension) or lexer
diffs: optimize how lexer is fetche for rich highlight mode....
r1356 self._lexer_cache[filename] = lexer
dan
diffs: replace compare controller with new html based diffs:...
r1030 return self._lexer_cache[filename]
def render_patch(self, patch):
logging: use lazy parameter evaluation in log calls.
r3061 log.debug('rendering diff for %r', patch['filename'])
dan
diffs: replace compare controller with new html based diffs:...
r1030
source_filename = patch['original_filename']
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 source_filename_bytes = patch['original_filename_bytes']
dan
diffs: replace compare controller with new html based diffs:...
r1030 target_filename = patch['filename']
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 target_filename_bytes = patch['filename_bytes']
dan
diffs: replace compare controller with new html based diffs:...
r1030
source_lexer = plain_text_lexer
target_lexer = plain_text_lexer
if not patch['stats']['binary']:
diffs: don't use highlite on the new ops lines
r3082 node_hl_mode = self.HL_NONE if patch['chunks'] == [] else None
hl_mode = node_hl_mode or self.highlight_mode
if hl_mode == self.HL_REAL:
dan
diffs: replace compare controller with new html based diffs:...
r1030 if (source_filename and patch['operation'] in ('D', 'M')
and source_filename not in self.source_nodes):
self.source_nodes[source_filename] = (
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 self.source_node_getter(source_filename_bytes))
dan
diffs: replace compare controller with new html based diffs:...
r1030
if (target_filename and patch['operation'] in ('A', 'M')
and target_filename not in self.target_nodes):
self.target_nodes[target_filename] = (
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 self.target_node_getter(target_filename_bytes))
dan
diffs: replace compare controller with new html based diffs:...
r1030
diffs: don't use highlite on the new ops lines
r3082 elif hl_mode == self.HL_FAST:
dan
diffs: replace compare controller with new html based diffs:...
r1030 source_lexer = self._get_lexer_for_filename(source_filename)
target_lexer = self._get_lexer_for_filename(target_filename)
source_file = self.source_nodes.get(source_filename, source_filename)
target_file = self.target_nodes.get(target_filename, target_filename)
pull-requests: allow to show range diff in pr view
r3124 raw_id_uid = ''
if self.source_nodes.get(source_filename):
raw_id_uid = self.source_nodes[source_filename].commit.raw_id
if not raw_id_uid and self.target_nodes.get(target_filename):
# in case this is a new file we only have it in target
raw_id_uid = self.target_nodes[target_filename].commit.raw_id
dan
diffs: replace compare controller with new html based diffs:...
r1030
source_filenode, target_filenode = None, None
# TODO: dan: FileNode.lexer works on the content of the file - which
# can be slow - issue #4289 explains a lexer clean up - which once
# done can allow caching a lexer for a filenode to avoid the file lookup
if isinstance(source_file, FileNode):
source_filenode = source_file
diffs: optimize how lexer is fetche for rich highlight mode....
r1356 #source_lexer = source_file.lexer
source_lexer = self._get_lexer_for_filename(source_filename)
source_file.lexer = source_lexer
dan
diffs: replace compare controller with new html based diffs:...
r1030 if isinstance(target_file, FileNode):
target_filenode = target_file
diffs: optimize how lexer is fetche for rich highlight mode....
r1356 #target_lexer = target_file.lexer
target_lexer = self._get_lexer_for_filename(target_filename)
target_file.lexer = target_lexer
dan
diffs: replace compare controller with new html based diffs:...
r1030
source_file_path, target_file_path = None, None
if source_filename != '/dev/null':
source_file_path = source_filename
if target_filename != '/dev/null':
target_file_path = target_filename
source_file_type = source_lexer.name
target_file_type = target_lexer.name
filediff = AttributeDict({
'source_file_path': source_file_path,
'target_file_path': target_file_path,
'source_filenode': source_filenode,
'target_filenode': target_filenode,
'source_file_type': target_file_type,
'target_file_type': source_file_type,
diffs: simplified the datastructure of fillediff. Hopefully this...
r1844 'patch': {'filename': patch['filename'], 'stats': patch['stats']},
'operation': patch['operation'],
dan
diffs: replace compare controller with new html based diffs:...
r1030 'source_mode': patch['stats']['old_mode'],
'target_mode': patch['stats']['new_mode'],
dan
Diffs: fixed missing limited diff container display on large diffs
r3409 'limited_diff': patch['is_limited_diff'],
diffs: simplified the datastructure of fillediff. Hopefully this...
r1844 'hunks': [],
comments: allow commenting on empty files without content.
r3081 'hunk_ops': None,
dan
diffs: replace compare controller with new html based diffs:...
r1030 'diffset': self,
pull-requests: allow to show range diff in pr view
r3124 'raw_id': raw_id_uid,
dan
diffs: replace compare controller with new html based diffs:...
r1030 })
pull-requests: allow to show range diff in pr view
r3124
diffs: don't use highlite on the new ops lines
r3082 file_chunks = patch['chunks'][1:]
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647
diffs: added diff navigation to improve UX when browisng the full context diffs.
r4441 for i, hunk in enumerate(file_chunks, 1):
dan
diffs: replace compare controller with new html based diffs:...
r1030 hunkbit = self.parse_hunk(hunk, source_file, target_file)
diffs: simplified the datastructure of fillediff. Hopefully this...
r1844 hunkbit.source_file_path = source_file_path
hunkbit.target_file_path = target_file_path
diffs: added diff navigation to improve UX when browisng the full context diffs.
r4441 hunkbit.index = i
dan
diffs: replace compare controller with new html based diffs:...
r1030 filediff.hunks.append(hunkbit)
comments: save comments that are not rendered to be displayed as outdated....
r1258
comments: allow commenting on empty files without content.
r3081 # Simulate hunk on OPS type line which doesn't really contain any diff
# this allows commenting on those
diffs: don't use highlite on the new ops lines
r3082 if not file_chunks:
actions = []
libs: major refactor for python3
r5085 for op_id, op_text in list(filediff.patch['stats']['ops'].items()):
diffs: don't use highlite on the new ops lines
r3082 if op_id == DEL_FILENODE:
python3: fixed various code issues...
r4973 actions.append('file was removed')
diffs: don't use highlite on the new ops lines
r3082 elif op_id == BIN_FILENODE:
python3: fixed various code issues...
r4973 actions.append('binary diff hidden')
diffs: don't use highlite on the new ops lines
r3082 else:
libs: major refactor for python3
r5085 actions.append(safe_str(op_text))
python3: fixed various code issues...
r4973 action_line = 'NO CONTENT: ' + \
', '.join(actions) or 'UNDEFINED_ACTION'
comments: allow commenting on empty files without content.
r3081
diffs: don't use highlite on the new ops lines
r3082 hunk_ops = {'source_length': 0, 'source_start': 0,
'lines': [
{'new_lineno': 0, 'old_lineno': 1,
'action': 'unmod-no-hl', 'line': action_line}
],
python3: fixed various code issues...
r4973 'section_header': '', 'target_start': 1, 'target_length': 1}
comments: allow commenting on empty files without content.
r3081
diffs: don't use highlite on the new ops lines
r3082 hunkbit = self.parse_hunk(hunk_ops, source_file, target_file)
hunkbit.source_file_path = source_file_path
hunkbit.target_file_path = target_file_path
filediff.hunk_ops = hunkbit
dan
diffs: replace compare controller with new html based diffs:...
r1030 return filediff
def parse_hunk(self, hunk, source_file, target_file):
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647
dan
diffs: replace compare controller with new html based diffs:...
r1030 result = AttributeDict(dict(
source_start=hunk['source_start'],
source_length=hunk['source_length'],
target_start=hunk['target_start'],
target_length=hunk['target_length'],
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 section_header=safe_str(hunk['section_header']),
dan
diffs: replace compare controller with new html based diffs:...
r1030 lines=[],
))
before, after = [], []
for line in hunk['lines']:
libs: major refactor for python3
r5085
diffs: don't use highlite on the new ops lines
r3082 if line['action'] in ['unmod', 'unmod-no-hl']:
no_hl = line['action'] == 'unmod-no-hl'
libs: major refactor for python3
r5085 parsed_lines = self.parse_lines(before, after, source_file, target_file, no_hl=no_hl)
result.lines.extend(parsed_lines)
dan
diffs: replace compare controller with new html based diffs:...
r1030 after.append(line)
before.append(line)
elif line['action'] == 'add':
after.append(line)
elif line['action'] == 'del':
before.append(line)
dan
ux: make 'no newline at end of file' message more pronounced in diffs
r1032 elif line['action'] == 'old-no-nl':
dan
diffs: replace compare controller with new html based diffs:...
r1030 before.append(line)
libs: major refactor for python3
r5085 #line['line'] = safe_str(line['line'])
dan
ux: make 'no newline at end of file' message more pronounced in diffs
r1032 elif line['action'] == 'new-no-nl':
libs: major refactor for python3
r5085 #line['line'] = safe_str(line['line'])
dan
diffs: replace compare controller with new html based diffs:...
r1030 after.append(line)
diffs: don't use highlite on the new ops lines
r3082 all_actions = [x['action'] for x in after] + [x['action'] for x in before]
no_hl = {x for x in all_actions} == {'unmod-no-hl'}
libs: major refactor for python3
r5085 parsed_no_hl_lines = self.parse_lines(before, after, source_file, target_file, no_hl=no_hl)
result.lines.extend(parsed_no_hl_lines)
# NOTE(marcink): we must keep list() call here, so we can cache the result...
Bartłomiej Wołyńczyk
caching: add option to cache diffs for commits and pull requests....
r2685 result.unified = list(self.as_unified(result.lines))
dan
diffs: replace compare controller with new html based diffs:...
r1030 result.sideside = result.lines
comments: save comments that are not rendered to be displayed as outdated....
r1258
dan
diffs: replace compare controller with new html based diffs:...
r1030 return result
diffs: don't use highlite on the new ops lines
r3082 def parse_lines(self, before_lines, after_lines, source_file, target_file,
no_hl=False):
dan
diffs: replace compare controller with new html based diffs:...
r1030 # TODO: dan: investigate doing the diff comparison and fast highlighting
# on the entire before and after buffered block lines rather than by
# line, this means we can get better 'fast' highlighting if the context
# allows it - eg.
# line 4: """
# line 5: this gets highlighted as a string
# line 6: """
lines = []
diffs: fixed problem with rendering no newline at the end of file markers....
r2252
before_newline = AttributeDict()
after_newline = AttributeDict()
if before_lines and before_lines[-1]['action'] == 'old-no-nl':
before_newline_line = before_lines.pop(-1)
before_newline.content = '\n {}'.format(
render_tokenstream(
libs: major refactor for python3
r5085 [(x[0], '', safe_str(x[1]))
diffs: fixed problem with rendering no newline at the end of file markers....
r2252 for x in [('nonl', before_newline_line['line'])]]))
if after_lines and after_lines[-1]['action'] == 'new-no-nl':
after_newline_line = after_lines.pop(-1)
after_newline.content = '\n {}'.format(
render_tokenstream(
libs: major refactor for python3
r5085 [(x[0], '', safe_str(x[1]))
diffs: fixed problem with rendering no newline at the end of file markers....
r2252 for x in [('nonl', after_newline_line['line'])]]))
dan
diffs: replace compare controller with new html based diffs:...
r1030 while before_lines or after_lines:
before, after = None, None
before_tokens, after_tokens = None, None
if before_lines:
before = before_lines.pop(0)
if after_lines:
after = after_lines.pop(0)
original = AttributeDict()
modified = AttributeDict()
if before:
dan
ux: make 'no newline at end of file' message more pronounced in diffs
r1032 if before['action'] == 'old-no-nl':
libs: major refactor for python3
r5085 before_tokens = [('nonl', safe_str(before['line']))]
dan
ux: make 'no newline at end of file' message more pronounced in diffs
r1032 else:
before_tokens = self.get_line_tokens(
diffs: don't use highlite on the new ops lines
r3082 line_text=before['line'], line_number=before['old_lineno'],
diffs: fixed problem with potential diff display.
r4592 input_file=source_file, no_hl=no_hl, source='before')
dan
diffs: replace compare controller with new html based diffs:...
r1030 original.lineno = before['old_lineno']
original.content = before['line']
original.action = self.action_to_op(before['action'])
Bartłomiej Wołyńczyk
caching: add option to cache diffs for commits and pull requests....
r2685
original.get_comment_args = (
source_file, 'o', before['old_lineno'])
dan
diffs: replace compare controller with new html based diffs:...
r1030
if after:
dan
ux: make 'no newline at end of file' message more pronounced in diffs
r1032 if after['action'] == 'new-no-nl':
libs: major refactor for python3
r5085 after_tokens = [('nonl', safe_str(after['line']))]
dan
ux: make 'no newline at end of file' message more pronounced in diffs
r1032 else:
after_tokens = self.get_line_tokens(
line_text=after['line'], line_number=after['new_lineno'],
diffs: fixed problem with potential diff display.
r4592 input_file=target_file, no_hl=no_hl, source='after')
dan
diffs: replace compare controller with new html based diffs:...
r1030 modified.lineno = after['new_lineno']
modified.content = after['line']
modified.action = self.action_to_op(after['action'])
Bartłomiej Wołyńczyk
caching: add option to cache diffs for commits and pull requests....
r2685
diffs: don't use highlite on the new ops lines
r3082 modified.get_comment_args = (target_file, 'n', after['new_lineno'])
dan
diffs: replace compare controller with new html based diffs:...
r1030
# diff the lines
if before_tokens and after_tokens:
dan
ux: make 'no newline at end of file' message more pronounced in diffs
r1032 o_tokens, m_tokens, similarity = tokens_diff(
before_tokens, after_tokens)
dan
diffs: replace compare controller with new html based diffs:...
r1030 original.content = render_tokenstream(o_tokens)
modified.content = render_tokenstream(m_tokens)
elif before_tokens:
original.content = render_tokenstream(
[(x[0], '', x[1]) for x in before_tokens])
elif after_tokens:
modified.content = render_tokenstream(
[(x[0], '', x[1]) for x in after_tokens])
diffs: fixed problem with rendering no newline at the end of file markers....
r2252 if not before_lines and before_newline:
original.content += before_newline.content
before_newline = None
if not after_lines and after_newline:
modified.content += after_newline.content
after_newline = None
dan
diffs: replace compare controller with new html based diffs:...
r1030 lines.append(AttributeDict({
'original': original,
'modified': modified,
}))
return lines
diffs: fixed problem with potential diff display.
r4592 def get_line_tokens(self, line_text, line_number, input_file=None, no_hl=False, source=''):
dan
diffs: replace compare controller with new html based diffs:...
r1030 filenode = None
filename = None
py3: remove use of pyramid.compat
r4908 if isinstance(input_file, str):
diffs: don't use highlite on the new ops lines
r3082 filename = input_file
elif isinstance(input_file, FileNode):
filenode = input_file
libs: major refactor for python3
r5085 filename = input_file.str_path
dan
diffs: replace compare controller with new html based diffs:...
r1030
diffs: don't use highlite on the new ops lines
r3082 hl_mode = self.HL_NONE if no_hl else self.highlight_mode
if hl_mode == self.HL_REAL and filenode:
diffs: optimize how lexer is fetche for rich highlight mode....
r1356 lexer = self._get_lexer_for_filename(filename)
libs: major refactor for python3
r5085 file_size_allowed = filenode.size < self.max_file_size_limit
diffs: optimize how lexer is fetche for rich highlight mode....
r1356 if line_number and file_size_allowed:
libs: major refactor for python3
r5085 return self.get_tokenized_filenode_line(filenode, line_number, lexer, source)
dan
diffs: replace compare controller with new html based diffs:...
r1030
diffs: don't use highlite on the new ops lines
r3082 if hl_mode in (self.HL_REAL, self.HL_FAST) and filename:
dan
diffs: replace compare controller with new html based diffs:...
r1030 lexer = self._get_lexer_for_filename(filename)
return list(tokenize_string(line_text, lexer))
return list(tokenize_string(line_text, plain_text_lexer))
diffs: fixed problem with potential diff display.
r4592 def get_tokenized_filenode_line(self, filenode, line_number, lexer=None, source=''):
libs: major refactor for python3
r5085 name_hash = hash(filenode)
dan
diffs: replace compare controller with new html based diffs:...
r1030
libs: major refactor for python3
r5085 hl_node_code = self.highlighted_filenodes[source]
diffs: fixed problem with potential diff display.
r4592
libs: major refactor for python3
r5085 if name_hash not in hl_node_code:
hl_node_code[name_hash] = filenode_as_lines_tokens(filenode, lexer)
diffs: fixed case of bogus files diff rendering...
r3444
try:
libs: major refactor for python3
r5085 return hl_node_code[name_hash][line_number - 1]
diffs: fixed case of bogus files diff rendering...
r3444 except Exception:
libs: major refactor for python3
r5085 log.exception('diff rendering error on L:%s and file=%s', line_number - 1, filenode.name)
python3: fixed various code issues...
r4973 return [('', 'L{}: rhodecode diff rendering error'.format(line_number))]
dan
diffs: replace compare controller with new html based diffs:...
r1030
def action_to_op(self, action):
return {
'add': '+',
'del': '-',
'unmod': ' ',
diffs: don't use highlite on the new ops lines
r3082 'unmod-no-hl': ' ',
dan
ux: make 'no newline at end of file' message more pronounced in diffs
r1032 'old-no-nl': ' ',
'new-no-nl': ' ',
dan
diffs: replace compare controller with new html based diffs:...
r1030 }.get(action, action)
def as_unified(self, lines):
comments: save comments that are not rendered to be displayed as outdated....
r1258 """
Return a generator that yields the lines of a diff in unified order
"""
dan
diffs: replace compare controller with new html based diffs:...
r1030 def generator():
buf = []
for line in lines:
if buf and not line.original or line.original.action == ' ':
modernize: updates for python3
r5095 yield from buf
dan
diffs: replace compare controller with new html based diffs:...
r1030 buf = []
if line.original:
if line.original.action == ' ':
yield (line.original.lineno, line.modified.lineno,
dan
diffs: add comments to changeset diffs
r1143 line.original.action, line.original.content,
Bartłomiej Wołyńczyk
caching: add option to cache diffs for commits and pull requests....
r2685 line.original.get_comment_args)
dan
diffs: replace compare controller with new html based diffs:...
r1030 continue
if line.original.action == '-':
yield (line.original.lineno, None,
dan
diffs: add comments to changeset diffs
r1143 line.original.action, line.original.content,
Bartłomiej Wołyńczyk
caching: add option to cache diffs for commits and pull requests....
r2685 line.original.get_comment_args)
dan
diffs: replace compare controller with new html based diffs:...
r1030
if line.modified.action == '+':
buf.append((
None, line.modified.lineno,
dan
diffs: add comments to changeset diffs
r1143 line.modified.action, line.modified.content,
Bartłomiej Wołyńczyk
caching: add option to cache diffs for commits and pull requests....
r2685 line.modified.get_comment_args))
dan
diffs: replace compare controller with new html based diffs:...
r1030 continue
if line.modified:
yield (None, line.modified.lineno,
dan
diffs: add comments to changeset diffs
r1143 line.modified.action, line.modified.content,
Bartłomiej Wołyńczyk
caching: add option to cache diffs for commits and pull requests....
r2685 line.modified.get_comment_args)
dan
diffs: replace compare controller with new html based diffs:...
r1030
modernize: updates for python3
r5095 yield from buf
dan
diffs: replace compare controller with new html based diffs:...
r1030
return generator()