rhodecode-enterprise-ce Commit - r2252:dc922ddb

diffs: fixed problem with rendering no newline at the end of file markers....

marcink -

r2252:dc922ddb stable

parent child

rhodecode/tests/fixtures/hg_diff_no_newline.diff

0 created 644 +10 0

@@ -0,0 +1,10 b''
	1	diff --git a/server.properties b/server.properties
	2	--- a/server.properties
	3	+++ b/server.properties
	4	@@ -1,2 +1,3 @@
	5	property=value
	6	-anotherProperty=value
	7	\ No newline at end of file
	8	+anotherProperty=value
	9	+newProperty=super_important_value
	10	\ No newline at end of file No newline at end of file

rhodecode/lib/codeblocks.py

0 +24 0

             # -*- coding: utf-8 -*-
             # Copyright (C) 2011-2017 RhodeCode GmbH
             #
             # This program is free software: you can redistribute it and/or modify
             # it under the terms of the GNU Affero General Public License, version 3
             # (only), as published by the Free Software Foundation.
             #
             # This program is distributed in the hope that it will be useful,
             # but WITHOUT ANY WARRANTY; without even the implied warranty of
             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
             # GNU General Public License for more details.
             #
             # You should have received a copy of the GNU Affero General Public License
             # along with this program.  If not, see <http://www.gnu.org/licenses/>.
             #
             # This program is dual-licensed. If you wish to learn more about the
             # RhodeCode Enterprise Edition, including its added features, Support services,
             # and proprietary license terms, please see https://rhodecode.com/licenses/
             import logging
             import difflib
             from itertools import groupby
             from pygments import lex
             from pygments.formatters.html import _get_ttype_class as pygment_token_class
             from rhodecode.lib.helpers import (
                 get_lexer_for_filenode, html_escape, get_custom_lexer)
             from rhodecode.lib.utils2 import AttributeDict
             from rhodecode.lib.vcs.nodes import FileNode
             from rhodecode.lib.diff_match_patch import diff_match_patch
             from rhodecode.lib.diffs import LimitedDiffContainer
             from pygments.lexers import get_lexer_by_name
             plain_text_lexer = get_lexer_by_name(
                 'text', stripall=False, stripnl=False, ensurenl=False)
             log = logging.getLogger()
             def filenode_as_lines_tokens(filenode, lexer=None):
                 org_lexer = lexer
                 lexer = lexer or get_lexer_for_filenode(filenode)
                 log.debug('Generating file node pygment tokens for %s, %s, org_lexer:%s',
                           lexer, filenode, org_lexer)
                 tokens = tokenize_string(filenode.content, lexer)
                 lines = split_token_stream(tokens, split_string='\n')
                 rv = list(lines)
                 return rv
             def tokenize_string(content, lexer):
                 """
                 Use pygments to tokenize some content based on a lexer
                 ensuring all original new lines and whitespace is preserved
                 """
                 lexer.stripall = False
                 lexer.stripnl = False
                 lexer.ensurenl = False
                 for token_type, token_text in lex(content, lexer):
                     yield pygment_token_class(token_type), token_text
             def split_token_stream(tokens, split_string=u'\n'):
                 """
                 Take a list of (TokenType, text) tuples and split them by a string
                 >>> split_token_stream([(TEXT, 'some\ntext'), (TEXT, 'more\n')])
                 [(TEXT, 'some'), (TEXT, 'text'),
                  (TEXT, 'more'), (TEXT, 'text')]
                 """
                 buffer = []
                 for token_class, token_text in tokens:
                     parts = token_text.split(split_string)
                     for part in parts[:-1]:
                         buffer.append((token_class, part))
                         yield buffer
                         buffer = []
                     buffer.append((token_class, parts[-1]))
                 if buffer:
                     yield buffer
             def filenode_as_annotated_lines_tokens(filenode):
                 """
                 Take a file node and return a list of annotations => lines, if no annotation
                 is found, it will be None.
                 eg:
                 [
                     (annotation1, [
                         (1, line1_tokens_list),
                         (2, line2_tokens_list),
                     ]),
                     (annotation2, [
                         (3, line1_tokens_list),
                     ]),
                     (None, [
                         (4, line1_tokens_list),
                     ]),
                     (annotation1, [
                         (5, line1_tokens_list),
                         (6, line2_tokens_list),
                     ])
                 ]
                 """
                 commit_cache = {}  # cache commit_getter lookups
                 def _get_annotation(commit_id, commit_getter):
                     if commit_id not in commit_cache:
                         commit_cache[commit_id] = commit_getter()
                     return commit_cache[commit_id]
                 annotation_lookup = {
                     line_no: _get_annotation(commit_id, commit_getter)
                     for line_no, commit_id, commit_getter, line_content
                     in filenode.annotate
                 }
                 annotations_lines = ((annotation_lookup.get(line_no), line_no, tokens)
                                       for line_no, tokens
                                       in enumerate(filenode_as_lines_tokens(filenode), 1))
                 grouped_annotations_lines = groupby(annotations_lines, lambda x: x[0])
                 for annotation, group in grouped_annotations_lines:
                     yield (
                         annotation, [(line_no, tokens)
                                       for (_, line_no, tokens) in group]
                     )
             def render_tokenstream(tokenstream):
                 result = []
                 for token_class, token_ops_texts in rollup_tokenstream(tokenstream):
                     if token_class:
                         result.append(u'<span class="%s">' % token_class)
                     else:
                         result.append(u'<span>')
                     for op_tag, token_text in token_ops_texts:
                         if op_tag:
                             result.append(u'<%s>' % op_tag)
                         escaped_text = html_escape(token_text)
                         # TODO: dan: investigate showing hidden characters like space/nl/tab
                         # escaped_text = escaped_text.replace(' ', '<sp> </sp>')
                         # escaped_text = escaped_text.replace('\n', '<nl>\n</nl>')
                         # escaped_text = escaped_text.replace('\t', '<tab>\t</tab>')
                         result.append(escaped_text)
                         if op_tag:
                             result.append(u'</%s>' % op_tag)
                     result.append(u'</span>')
                 html = ''.join(result)
                 return html
             def rollup_tokenstream(tokenstream):
                 """
                 Group a token stream of the format:
                     ('class', 'op', 'text')
                 or
                     ('class', 'text')
                 into
                     [('class1',
                         [('op1', 'text'),
                          ('op2', 'text')]),
                      ('class2',
                         [('op3', 'text')])]
                 This is used to get the minimal tags necessary when
                 rendering to html eg for a token stream ie.
                 <span class="A"><ins>he</ins>llo</span>
                 vs
                 <span class="A"><ins>he</ins></span><span class="A">llo</span>
                 If a 2 tuple is passed in, the output op will be an empty string.
                 eg:
                 >>> rollup_tokenstream([('classA', '',      'h'),
                                         ('classA', 'del',   'ell'),
                                         ('classA', '',      'o'),
                                         ('classB', '',      ' '),
                                         ('classA', '',      'the'),
                                         ('classA', '',      're'),
                                         ])
                 [('classA', [('', 'h'), ('del', 'ell'), ('', 'o')],
                  ('classB', [('', ' ')],
                  ('classA', [('', 'there')]]
                 """
                 if tokenstream and len(tokenstream[0]) == 2:
                     tokenstream = ((t[0], '', t[1]) for t in tokenstream)
                 result = []
                 for token_class, op_list in groupby(tokenstream, lambda t: t[0]):
                     ops = []
                     for token_op, token_text_list in groupby(op_list, lambda o: o[1]):
                         text_buffer = []
                         for t_class, t_op, t_text in token_text_list:
                             text_buffer.append(t_text)
                         ops.append((token_op, ''.join(text_buffer)))
                     result.append((token_class, ops))
                 return result
             def tokens_diff(old_tokens, new_tokens, use_diff_match_patch=True):
                 """
                 Converts a list of (token_class, token_text) tuples to a list of
                 (token_class, token_op, token_text) tuples where token_op is one of
                 ('ins', 'del', '')
                 :param old_tokens: list of (token_class, token_text) tuples of old line
                 :param new_tokens: list of (token_class, token_text) tuples of new line
                 :param use_diff_match_patch: boolean, will use google's diff match patch
                     library which has options to 'smooth' out the character by character
                     differences making nicer ins/del blocks
                 """
                 old_tokens_result = []
                 new_tokens_result = []
                 similarity = difflib.SequenceMatcher(None,
                     ''.join(token_text for token_class, token_text in old_tokens),
                     ''.join(token_text for token_class, token_text in new_tokens)
                 ).ratio()
                 if similarity < 0.6: # return, the blocks are too different
                     for token_class, token_text in old_tokens:
                         old_tokens_result.append((token_class, '', token_text))
                     for token_class, token_text in new_tokens:
                         new_tokens_result.append((token_class, '', token_text))
                     return old_tokens_result, new_tokens_result, similarity
                 token_sequence_matcher = difflib.SequenceMatcher(None,
                     [x[1] for x in old_tokens],
                     [x[1] for x in new_tokens])
                 for tag, o1, o2, n1, n2 in token_sequence_matcher.get_opcodes():
                     # check the differences by token block types first to give a more
                     # nicer "block" level replacement vs character diffs
                     if tag == 'equal':
                         for token_class, token_text in old_tokens[o1:o2]:
                             old_tokens_result.append((token_class, '', token_text))
                         for token_class, token_text in new_tokens[n1:n2]:
                             new_tokens_result.append((token_class, '', token_text))
                     elif tag == 'delete':
                         for token_class, token_text in old_tokens[o1:o2]:
                             old_tokens_result.append((token_class, 'del', token_text))
                     elif tag == 'insert':
                         for token_class, token_text in new_tokens[n1:n2]:
                             new_tokens_result.append((token_class, 'ins', token_text))
                     elif tag == 'replace':
                         # if same type token blocks must be replaced, do a diff on the
                         # characters in the token blocks to show individual changes
                         old_char_tokens = []
                         new_char_tokens = []
                         for token_class, token_text in old_tokens[o1:o2]:
                             for char in token_text:
                                 old_char_tokens.append((token_class, char))
                         for token_class, token_text in new_tokens[n1:n2]:
                             for char in token_text:
                                 new_char_tokens.append((token_class, char))
                         old_string = ''.join([token_text for
                             token_class, token_text in old_char_tokens])
                         new_string = ''.join([token_text for
                             token_class, token_text in new_char_tokens])
                         char_sequence = difflib.SequenceMatcher(
                             None, old_string, new_string)
                         copcodes = char_sequence.get_opcodes()
                         obuffer, nbuffer = [], []
                         if use_diff_match_patch:
                             dmp = diff_match_patch()
                             dmp.Diff_EditCost = 11 # TODO: dan: extract this to a setting
                             reps = dmp.diff_main(old_string, new_string)
                             dmp.diff_cleanupEfficiency(reps)
                             a, b = 0, 0
                             for op, rep in reps:
                                 l = len(rep)
                                 if op == 0:
                                     for i, c in enumerate(rep):
                                         obuffer.append((old_char_tokens[a+i][0], '', c))
                                         nbuffer.append((new_char_tokens[b+i][0], '', c))
                                     a += l
                                     b += l
                                 elif op == -1:
                                     for i, c in enumerate(rep):
                                         obuffer.append((old_char_tokens[a+i][0], 'del', c))
                                     a += l
                                 elif op == 1:
                                     for i, c in enumerate(rep):
                                         nbuffer.append((new_char_tokens[b+i][0], 'ins', c))
                                     b += l
                         else:
                             for ctag, co1, co2, cn1, cn2 in copcodes:
                                 if ctag == 'equal':
                                     for token_class, token_text in old_char_tokens[co1:co2]:
                                         obuffer.append((token_class, '', token_text))
                                     for token_class, token_text in new_char_tokens[cn1:cn2]:
                                         nbuffer.append((token_class, '', token_text))
                                 elif ctag == 'delete':
                                     for token_class, token_text in old_char_tokens[co1:co2]:
                                         obuffer.append((token_class, 'del', token_text))
                                 elif ctag == 'insert':
                                     for token_class, token_text in new_char_tokens[cn1:cn2]:
                                         nbuffer.append((token_class, 'ins', token_text))
                                 elif ctag == 'replace':
                                     for token_class, token_text in old_char_tokens[co1:co2]:
                                         obuffer.append((token_class, 'del', token_text))
                                     for token_class, token_text in new_char_tokens[cn1:cn2]:
                                         nbuffer.append((token_class, 'ins', token_text))
                         old_tokens_result.extend(obuffer)
                         new_tokens_result.extend(nbuffer)
                 return old_tokens_result, new_tokens_result, similarity
             class DiffSet(object):
                 """
                 An object for parsing the diff result from diffs.DiffProcessor and
                 adding highlighting, side by side/unified renderings and line diffs
                 """
                 HL_REAL = 'REAL' # highlights using original file, slow
                 HL_FAST = 'FAST' # highlights using just the line, fast but not correct
                                  # in the case of multiline code
                 HL_NONE = 'NONE' # no highlighting, fastest
                 def __init__(self, highlight_mode=HL_REAL, repo_name=None,
                              source_repo_name=None,
                              source_node_getter=lambda filename: None,
                              target_node_getter=lambda filename: None,
                              source_nodes=None, target_nodes=None,
                              max_file_size_limit=150 * 1024, # files over this size will
                                                              # use fast highlighting
                              comments=None,
                              ):
                     self.highlight_mode = highlight_mode
                     self.highlighted_filenodes = {}
                     self.source_node_getter = source_node_getter
                     self.target_node_getter = target_node_getter
                     self.source_nodes = source_nodes or {}
                     self.target_nodes = target_nodes or {}
                     self.repo_name = repo_name
                     self.source_repo_name = source_repo_name or repo_name
                     self.comments = comments or {}
                     self.comments_store = self.comments.copy()
                     self.max_file_size_limit = max_file_size_limit
                 def render_patchset(self, patchset, source_ref=None, target_ref=None):
                     diffset = AttributeDict(dict(
                         lines_added=0,
                         lines_deleted=0,
                         changed_files=0,
                         files=[],
                         file_stats={},
                         limited_diff=isinstance(patchset, LimitedDiffContainer),
                         repo_name=self.repo_name,
                         source_repo_name=self.source_repo_name,
                         source_ref=source_ref,
                         target_ref=target_ref,
                     ))
                     for patch in patchset:
                         diffset.file_stats[patch['filename']] = patch['stats']
                         filediff = self.render_patch(patch)
                         filediff.diffset = diffset
                         diffset.files.append(filediff)
                         diffset.changed_files += 1
                         if not patch['stats']['binary']:
                             diffset.lines_added += patch['stats']['added']
                             diffset.lines_deleted += patch['stats']['deleted']
                     return diffset
                 _lexer_cache = {}
                 def _get_lexer_for_filename(self, filename, filenode=None):
                     # cached because we might need to call it twice for source/target
                     if filename not in self._lexer_cache:
                         if filenode:
                             lexer = filenode.lexer
                             extension = filenode.extension
                         else:
                             lexer = FileNode.get_lexer(filename=filename)
                             extension = filename.split('.')[-1]
                         lexer = get_custom_lexer(extension) or lexer
                         self._lexer_cache[filename] = lexer
                     return self._lexer_cache[filename]
                 def render_patch(self, patch):
                     log.debug('rendering diff for %r' % patch['filename'])
                     source_filename = patch['original_filename']
                     target_filename = patch['filename']
                     source_lexer = plain_text_lexer
                     target_lexer = plain_text_lexer
                     if not patch['stats']['binary']:
                         if self.highlight_mode == self.HL_REAL:
                             if (source_filename and patch['operation'] in ('D', 'M')
                                 and source_filename not in self.source_nodes):
                                     self.source_nodes[source_filename] = (
                                         self.source_node_getter(source_filename))
                             if (target_filename and patch['operation'] in ('A', 'M')
                                 and target_filename not in self.target_nodes):
                                     self.target_nodes[target_filename] = (
                                         self.target_node_getter(target_filename))
                         elif self.highlight_mode == self.HL_FAST:
                             source_lexer = self._get_lexer_for_filename(source_filename)
                             target_lexer = self._get_lexer_for_filename(target_filename)
                     source_file = self.source_nodes.get(source_filename, source_filename)
                     target_file = self.target_nodes.get(target_filename, target_filename)
                     source_filenode, target_filenode = None, None
                     # TODO: dan: FileNode.lexer works on the content of the file - which
                     # can be slow - issue #4289 explains a lexer clean up - which once
                     # done can allow caching a lexer for a filenode to avoid the file lookup
                     if isinstance(source_file, FileNode):
                         source_filenode = source_file
                         #source_lexer = source_file.lexer
                         source_lexer = self._get_lexer_for_filename(source_filename)
                         source_file.lexer = source_lexer
                     if isinstance(target_file, FileNode):
                         target_filenode = target_file
                         #target_lexer = target_file.lexer
                         target_lexer = self._get_lexer_for_filename(target_filename)
                         target_file.lexer = target_lexer
                     source_file_path, target_file_path = None, None
                     if source_filename != '/dev/null':
                         source_file_path = source_filename
                     if target_filename != '/dev/null':
                         target_file_path = target_filename
                     source_file_type = source_lexer.name
                     target_file_type = target_lexer.name
                     filediff = AttributeDict({
                         'source_file_path': source_file_path,
                         'target_file_path': target_file_path,
                         'source_filenode': source_filenode,
                         'target_filenode': target_filenode,
                         'source_file_type': target_file_type,
                         'target_file_type': source_file_type,
                         'patch': {'filename': patch['filename'], 'stats': patch['stats']},
                         'operation': patch['operation'],
                         'source_mode': patch['stats']['old_mode'],
                         'target_mode': patch['stats']['new_mode'],
                         'limited_diff': isinstance(patch, LimitedDiffContainer),
                         'hunks': [],
                         'diffset': self,
                     })
                     for hunk in patch['chunks'][1:]:
                         hunkbit = self.parse_hunk(hunk, source_file, target_file)
                         hunkbit.source_file_path = source_file_path
                         hunkbit.target_file_path = target_file_path
                         filediff.hunks.append(hunkbit)
                     left_comments = {}
                     if source_file_path in self.comments_store:
                         for lineno, comments in self.comments_store[source_file_path].items():
                             left_comments[lineno] = comments
                     if target_file_path in self.comments_store:
                         for lineno, comments in self.comments_store[target_file_path].items():
                             left_comments[lineno] = comments
                     # left comments are one that we couldn't place in diff lines.
                     # could be outdated, or the diff changed and this line is no
                     # longer available
                     filediff.left_comments = left_comments
                     return filediff
                 def parse_hunk(self, hunk, source_file, target_file):
                     result = AttributeDict(dict(
                         source_start=hunk['source_start'],
                         source_length=hunk['source_length'],
                         target_start=hunk['target_start'],
                         target_length=hunk['target_length'],
                         section_header=hunk['section_header'],
                         lines=[],
                     ))
                     before, after = [], []
                     for line in hunk['lines']:
                         if line['action'] == 'unmod':
                             result.lines.extend(
                                 self.parse_lines(before, after, source_file, target_file))
                             after.append(line)
                             before.append(line)
                         elif line['action'] == 'add':
                             after.append(line)
                         elif line['action'] == 'del':
                             before.append(line)
                         elif line['action'] == 'old-no-nl':
                             before.append(line)
                         elif line['action'] == 'new-no-nl':
                             after.append(line)
                     result.lines.extend(
                         self.parse_lines(before, after, source_file, target_file))
                     result.unified = self.as_unified(result.lines)
                     result.sideside = result.lines
                     return result
                 def parse_lines(self, before_lines, after_lines, source_file, target_file):
                     # TODO: dan: investigate doing the diff comparison and fast highlighting
                     # on the entire before and after buffered block lines rather than by
                     # line, this means we can get better 'fast' highlighting if the context
                     # allows it - eg.
                     # line 4: """
                     # line 5: this gets highlighted as a string
                     # line 6: """
                     lines = []
+                    before_newline = AttributeDict()
+                    after_newline = AttributeDict()
+                    if before_lines and before_lines[-1]['action'] == 'old-no-nl':
+                        before_newline_line = before_lines.pop(-1)
+                        before_newline.content = '\n {}'.format(
+                            render_tokenstream(
+                                [(x[0], '', x[1])
+                                 for x in [('nonl', before_newline_line['line'])]]))
+                    if after_lines and after_lines[-1]['action'] == 'new-no-nl':
+                        after_newline_line = after_lines.pop(-1)
+                        after_newline.content = '\n {}'.format(
+                            render_tokenstream(
+                                [(x[0], '', x[1])
+                                 for x in [('nonl', after_newline_line['line'])]]))
                     while before_lines or after_lines:
                         before, after = None, None
                         before_tokens, after_tokens = None, None
                         if before_lines:
                             before = before_lines.pop(0)
                         if after_lines:
                             after = after_lines.pop(0)
                         original = AttributeDict()
                         modified = AttributeDict()
                         if before:
                             if before['action'] == 'old-no-nl':
                                 before_tokens = [('nonl', before['line'])]
                             else:
                                 before_tokens = self.get_line_tokens(
                                     line_text=before['line'],
                                     line_number=before['old_lineno'],
                                     file=source_file)
                             original.lineno = before['old_lineno']
                             original.content = before['line']
                             original.action = self.action_to_op(before['action'])
                             original.comments = self.get_comments_for('old',
                                 source_file, before['old_lineno'])
                         if after:
                             if after['action'] == 'new-no-nl':
                                 after_tokens = [('nonl', after['line'])]
                             else:
                                 after_tokens = self.get_line_tokens(
                                     line_text=after['line'], line_number=after['new_lineno'],
                                     file=target_file)
                             modified.lineno = after['new_lineno']
                             modified.content = after['line']
                             modified.action = self.action_to_op(after['action'])
                             modified.comments = self.get_comments_for('new',
                                 target_file, after['new_lineno'])
                         # diff the lines
                         if before_tokens and after_tokens:
                             o_tokens, m_tokens, similarity = tokens_diff(
                                 before_tokens, after_tokens)
                             original.content = render_tokenstream(o_tokens)
                             modified.content = render_tokenstream(m_tokens)
                         elif before_tokens:
                             original.content = render_tokenstream(
                                 [(x[0], '', x[1]) for x in before_tokens])
                         elif after_tokens:
                             modified.content = render_tokenstream(
                                 [(x[0], '', x[1]) for x in after_tokens])
+                        if not before_lines and before_newline:
+                            original.content += before_newline.content
+                            before_newline = None
+                        if not after_lines and after_newline:
+                            modified.content += after_newline.content
+                            after_newline = None
                         lines.append(AttributeDict({
                             'original': original,
                             'modified': modified,
                         }))
                     return lines
                 def get_comments_for(self, version, filename, line_number):
                     if hasattr(filename, 'unicode_path'):
                         filename = filename.unicode_path
                     if not isinstance(filename, basestring):
                         return None
                     line_key = {
                         'old': 'o',
                         'new': 'n',
                     }[version] + str(line_number)
                     if filename in self.comments_store:
                         file_comments = self.comments_store[filename]
                         if line_key in file_comments:
                             return file_comments.pop(line_key)
                 def get_line_tokens(self, line_text, line_number, file=None):
                     filenode = None
                     filename = None
                     if isinstance(file, basestring):
                         filename = file
                     elif isinstance(file, FileNode):
                         filenode = file
                         filename = file.unicode_path
                     if self.highlight_mode == self.HL_REAL and filenode:
                         lexer = self._get_lexer_for_filename(filename)
                         file_size_allowed = file.size < self.max_file_size_limit
                         if line_number and file_size_allowed:
                             return self.get_tokenized_filenode_line(
                                 file, line_number, lexer)
                     if self.highlight_mode in (self.HL_REAL, self.HL_FAST) and filename:
                         lexer = self._get_lexer_for_filename(filename)
                         return list(tokenize_string(line_text, lexer))
                     return list(tokenize_string(line_text, plain_text_lexer))
                 def get_tokenized_filenode_line(self, filenode, line_number, lexer=None):
                     if filenode not in self.highlighted_filenodes:
                         tokenized_lines = filenode_as_lines_tokens(filenode, lexer)
                         self.highlighted_filenodes[filenode] = tokenized_lines
                     return self.highlighted_filenodes[filenode][line_number - 1]
                 def action_to_op(self, action):
                     return {
                         'add': '+',
                         'del': '-',
                         'unmod': ' ',
                         'old-no-nl': ' ',
                         'new-no-nl': ' ',
                     }.get(action, action)
                 def as_unified(self, lines):
                     """
                     Return a generator that yields the lines of a diff in unified order
                     """
                     def generator():
                         buf = []
                         for line in lines:
                             if buf and not line.original or line.original.action == ' ':
                                 for b in buf:
                                     yield b
                                 buf = []
                             if line.original:
                                 if line.original.action == ' ':
                                     yield (line.original.lineno, line.modified.lineno,
                                            line.original.action, line.original.content,
                                            line.original.comments)
                                     continue
                                 if line.original.action == '-':
                                     yield (line.original.lineno, None,
                                            line.original.action, line.original.content,
                                            line.original.comments)
                                 if line.modified.action == '+':
                                     buf.append((
                                         None, line.modified.lineno,
                                         line.modified.action, line.modified.content,
                                         line.modified.comments))
                                     continue
                             if line.modified:
                                 yield (None, line.modified.lineno,
                                        line.modified.action, line.modified.content,
                                        line.modified.comments)
                         for b in buf:
                             yield b
                     return generator()

rhodecode/lib/diffs.py

0 +1 -1

             # -*- coding: utf-8 -*-
             # Copyright (C) 2011-2017 RhodeCode GmbH
             #
             # This program is free software: you can redistribute it and/or modify
             # it under the terms of the GNU Affero General Public License, version 3
             # (only), as published by the Free Software Foundation.
             #
             # This program is distributed in the hope that it will be useful,
             # but WITHOUT ANY WARRANTY; without even the implied warranty of
             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
             # GNU General Public License for more details.
             #
             # You should have received a copy of the GNU Affero General Public License
             # along with this program.  If not, see <http://www.gnu.org/licenses/>.
             #
             # This program is dual-licensed. If you wish to learn more about the
             # RhodeCode Enterprise Edition, including its added features, Support services,
             # and proprietary license terms, please see https://rhodecode.com/licenses/
             """
             Set of diffing helpers, previously part of vcs
             """
             import collections
             import re
             import difflib
             import logging
             from itertools import tee, imap
             from pylons.i18n.translation import _
             from rhodecode.lib.vcs.exceptions import VCSError
             from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode
             from rhodecode.lib.vcs.backends.base import EmptyCommit
             from rhodecode.lib.helpers import escape
             from rhodecode.lib.utils2 import safe_unicode
             log = logging.getLogger(__name__)
             # define max context, a file with more than this numbers of lines is unusable
             # in browser anyway
             MAX_CONTEXT = 1024 * 1014
             class OPS(object):
                 ADD = 'A'
                 MOD = 'M'
                 DEL = 'D'
             def wrap_to_table(str_):
                 return '''<table class="code-difftable">
                             <tr class="line no-comment">
                             <td class="add-comment-line tooltip" title="%s"><span class="add-comment-content"></span></td>
                             <td></td>
                             <td class="lineno new"></td>
                             <td class="code no-comment"><pre>%s</pre></td>
                             </tr>
                           </table>''' % (_('Click to comment'), str_)
             def wrapped_diff(filenode_old, filenode_new, diff_limit=None, file_limit=None,
                              show_full_diff=False, ignore_whitespace=True, line_context=3,
                              enable_comments=False):
                 """
                 returns a wrapped diff into a table, checks for cut_off_limit for file and
                 whole diff and presents proper message
                 """
                 if filenode_old is None:
                     filenode_old = FileNode(filenode_new.path, '', EmptyCommit())
                 if filenode_old.is_binary or filenode_new.is_binary:
                     diff = wrap_to_table(_('Binary file'))
                     stats = None
                     size = 0
                     data = None
                 elif diff_limit != -1 and (diff_limit is None or
                     (filenode_old.size < diff_limit and filenode_new.size < diff_limit)):
                     f_gitdiff = get_gitdiff(filenode_old, filenode_new,
                                             ignore_whitespace=ignore_whitespace,
                                             context=line_context)
                     diff_processor = DiffProcessor(
                         f_gitdiff, format='gitdiff', diff_limit=diff_limit,
                         file_limit=file_limit, show_full_diff=show_full_diff)
                     _parsed = diff_processor.prepare()
                     diff = diff_processor.as_html(enable_comments=enable_comments)
                     stats = _parsed[0]['stats'] if _parsed else None
                     size = len(diff or '')
                     data = _parsed[0] if _parsed else None
                 else:
                     diff = wrap_to_table(_('Changeset was too big and was cut off, use '
                                            'diff menu to display this diff'))
                     stats = None
                     size = 0
                     data = None
                 if not diff:
                     submodules = filter(lambda o: isinstance(o, SubModuleNode),
                                         [filenode_new, filenode_old])
                     if submodules:
                         diff = wrap_to_table(escape('Submodule %r' % submodules[0]))
                     else:
                         diff = wrap_to_table(_('No changes detected'))
                 cs1 = filenode_old.commit.raw_id
                 cs2 = filenode_new.commit.raw_id
                 return size, cs1, cs2, diff, stats, data
             def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
                 """
                 Returns git style diff between given ``filenode_old`` and ``filenode_new``.
                 :param ignore_whitespace: ignore whitespaces in diff
                 """
                 # make sure we pass in default context
                 context = context or 3
                 # protect against IntOverflow when passing HUGE context
                 if context > MAX_CONTEXT:
                     context = MAX_CONTEXT
                 submodules = filter(lambda o: isinstance(o, SubModuleNode),
                                     [filenode_new, filenode_old])
                 if submodules:
                     return ''
                 for filenode in (filenode_old, filenode_new):
                     if not isinstance(filenode, FileNode):
                         raise VCSError(
                             "Given object should be FileNode object, not %s"
                             % filenode.__class__)
                 repo = filenode_new.commit.repository
                 old_commit = filenode_old.commit or repo.EMPTY_COMMIT
                 new_commit = filenode_new.commit
                 vcs_gitdiff = repo.get_diff(
                     old_commit, new_commit, filenode_new.path,
                     ignore_whitespace, context, path1=filenode_old.path)
                 return vcs_gitdiff
             NEW_FILENODE = 1
             DEL_FILENODE = 2
             MOD_FILENODE = 3
             RENAMED_FILENODE = 4
             COPIED_FILENODE = 5
             CHMOD_FILENODE = 6
             BIN_FILENODE = 7
             class LimitedDiffContainer(object):
                 def __init__(self, diff_limit, cur_diff_size, diff):
                     self.diff = diff
                     self.diff_limit = diff_limit
                     self.cur_diff_size = cur_diff_size
                 def __getitem__(self, key):
                     return self.diff.__getitem__(key)
                 def __iter__(self):
                     for l in self.diff:
                         yield l
             class Action(object):
                 """
                 Contains constants for the action value of the lines in a parsed diff.
                 """
                 ADD = 'add'
                 DELETE = 'del'
                 UNMODIFIED = 'unmod'
                 CONTEXT = 'context'
                 OLD_NO_NL = 'old-no-nl'
                 NEW_NO_NL = 'new-no-nl'
             class DiffProcessor(object):
                 """
                 Give it a unified or git diff and it returns a list of the files that were
                 mentioned in the diff together with a dict of meta information that
                 can be used to render it in a HTML template.
                 .. note:: Unicode handling
                    The original diffs are a byte sequence and can contain filenames
                    in mixed encodings. This class generally returns `unicode` objects
                    since the result is intended for presentation to the user.
                 """
                 _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
                 _newline_marker = re.compile(r'^\\ No newline at end of file')
                 # used for inline highlighter word split
                 _token_re = re.compile(r'()(&gt;|&lt;|&amp;|\W+?)')
                 # collapse ranges of commits over given number
                 _collapse_commits_over = 5
                 def __init__(self, diff, format='gitdiff', diff_limit=None,
                              file_limit=None, show_full_diff=True):
                     """
                     :param diff: A `Diff` object representing a diff from a vcs backend
                     :param format: format of diff passed, `udiff` or `gitdiff`
                     :param diff_limit: define the size of diff that is considered "big"
                         based on that parameter cut off will be triggered, set to None
                         to show full diff
                     """
                     self._diff = diff
                     self._format = format
                     self.adds = 0
                     self.removes = 0
                     # calculate diff size
                     self.diff_limit = diff_limit
                     self.file_limit = file_limit
                     self.show_full_diff = show_full_diff
                     self.cur_diff_size = 0
                     self.parsed = False
                     self.parsed_diff = []
                     log.debug('Initialized DiffProcessor with %s mode', format)
                     if format == 'gitdiff':
                         self.differ = self._highlight_line_difflib
                         self._parser = self._parse_gitdiff
                     else:
                         self.differ = self._highlight_line_udiff
                         self._parser = self._new_parse_gitdiff
                 def _copy_iterator(self):
                     """
                     make a fresh copy of generator, we should not iterate thru
                     an original as it's needed for repeating operations on
                     this instance of DiffProcessor
                     """
                     self.__udiff, iterator_copy = tee(self.__udiff)
                     return iterator_copy
                 def _escaper(self, string):
                     """
                     Escaper for diff escapes special chars and checks the diff limit
                     :param string:
                     """
                     self.cur_diff_size += len(string)
                     if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
                         raise DiffLimitExceeded('Diff Limit Exceeded')
                     return safe_unicode(string)\
                         .replace('&', '&amp;')\
                         .replace('<', '&lt;')\
                         .replace('>', '&gt;')
                 def _line_counter(self, l):
                     """
                     Checks each line and bumps total adds/removes for this diff
                     :param l:
                     """
                     if l.startswith('+') and not l.startswith('+++'):
                         self.adds += 1
                     elif l.startswith('-') and not l.startswith('---'):
                         self.removes += 1
                     return safe_unicode(l)
                 def _highlight_line_difflib(self, line, next_):
                     """
                     Highlight inline changes in both lines.
                     """
                     if line['action'] == Action.DELETE:
                         old, new = line, next_
                     else:
                         old, new = next_, line
                     oldwords = self._token_re.split(old['line'])
                     newwords = self._token_re.split(new['line'])
                     sequence = difflib.SequenceMatcher(None, oldwords, newwords)
                     oldfragments, newfragments = [], []
                     for tag, i1, i2, j1, j2 in sequence.get_opcodes():
                         oldfrag = ''.join(oldwords[i1:i2])
                         newfrag = ''.join(newwords[j1:j2])
                         if tag != 'equal':
                             if oldfrag:
                                 oldfrag = '<del>%s</del>' % oldfrag
                             if newfrag:
                                 newfrag = '<ins>%s</ins>' % newfrag
                         oldfragments.append(oldfrag)
                         newfragments.append(newfrag)
                     old['line'] = "".join(oldfragments)
                     new['line'] = "".join(newfragments)
                 def _highlight_line_udiff(self, line, next_):
                     """
                     Highlight inline changes in both lines.
                     """
                     start = 0
                     limit = min(len(line['line']), len(next_['line']))
                     while start < limit and line['line'][start] == next_['line'][start]:
                         start += 1
                     end = -1
                     limit -= start
                     while -end <= limit and line['line'][end] == next_['line'][end]:
                         end -= 1
                     end += 1
                     if start or end:
                         def do(l):
                             last = end + len(l['line'])
                             if l['action'] == Action.ADD:
                                 tag = 'ins'
                             else:
                                 tag = 'del'
                             l['line'] = '%s<%s>%s</%s>%s' % (
                                 l['line'][:start],
                                 tag,
                                 l['line'][start:last],
                                 tag,
                                 l['line'][last:]
                             )
                         do(line)
                         do(next_)
                 def _clean_line(self, line, command):
                     if command in ['+', '-', ' ']:
                         # only modify the line if it's actually a diff thing
                         line = line[1:]
                     return line
                 def _parse_gitdiff(self, inline_diff=True):
                     _files = []
                     diff_container = lambda arg: arg
                     for chunk in self._diff.chunks():
                         head = chunk.header
                         diff = imap(self._escaper, chunk.diff.splitlines(1))
                         raw_diff = chunk.raw
                         limited_diff = False
                         exceeds_limit = False
                         op = None
                         stats = {
                             'added': 0,
                             'deleted': 0,
                             'binary': False,
                             'ops': {},
                         }
                         if head['deleted_file_mode']:
                             op = OPS.DEL
                             stats['binary'] = True
                             stats['ops'][DEL_FILENODE] = 'deleted file'
                         elif head['new_file_mode']:
                             op = OPS.ADD
                             stats['binary'] = True
                             stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
                         else:  # modify operation, can be copy, rename or chmod
                             # CHMOD
                             if head['new_mode'] and head['old_mode']:
                                 op = OPS.MOD
                                 stats['binary'] = True
                                 stats['ops'][CHMOD_FILENODE] = (
                                     'modified file chmod %s => %s' % (
                                         head['old_mode'], head['new_mode']))
                             # RENAME
                             if head['rename_from'] != head['rename_to']:
                                 op = OPS.MOD
                                 stats['binary'] = True
                                 stats['ops'][RENAMED_FILENODE] = (
                                     'file renamed from %s to %s' % (
                                         head['rename_from'], head['rename_to']))
                             # COPY
                             if head.get('copy_from') and head.get('copy_to'):
                                 op = OPS.MOD
                                 stats['binary'] = True
                                 stats['ops'][COPIED_FILENODE] = (
                                     'file copied from %s to %s' % (
                                         head['copy_from'], head['copy_to']))
                             # If our new parsed headers didn't match anything fallback to
                             # old style detection
                             if op is None:
                                 if not head['a_file'] and head['b_file']:
                                     op = OPS.ADD
                                     stats['binary'] = True
                                     stats['ops'][NEW_FILENODE] = 'new file'
                                 elif head['a_file'] and not head['b_file']:
                                     op = OPS.DEL
                                     stats['binary'] = True
                                     stats['ops'][DEL_FILENODE] = 'deleted file'
                             # it's not ADD not DELETE
                             if op is None:
                                 op = OPS.MOD
                                 stats['binary'] = True
                                 stats['ops'][MOD_FILENODE] = 'modified file'
                         # a real non-binary diff
                         if head['a_file'] or head['b_file']:
                             try:
                                 raw_diff, chunks, _stats = self._parse_lines(diff)
                                 stats['binary'] = False
                                 stats['added'] = _stats[0]
                                 stats['deleted'] = _stats[1]
                                 # explicit mark that it's a modified file
                                 if op == OPS.MOD:
                                     stats['ops'][MOD_FILENODE] = 'modified file'
                                 exceeds_limit = len(raw_diff) > self.file_limit
                                 # changed from _escaper function so we validate size of
                                 # each file instead of the whole diff
                                 # diff will hide big files but still show small ones
                                 # from my tests, big files are fairly safe to be parsed
                                 # but the browser is the bottleneck
                                 if not self.show_full_diff and exceeds_limit:
                                     raise DiffLimitExceeded('File Limit Exceeded')
                             except DiffLimitExceeded:
                                 diff_container = lambda _diff: \
                                     LimitedDiffContainer(
                                         self.diff_limit, self.cur_diff_size, _diff)
                                 exceeds_limit = len(raw_diff) > self.file_limit
                                 limited_diff = True
                                 chunks = []
                         else:  # GIT format binary patch, or possibly empty diff
                             if head['bin_patch']:
                                 # we have operation already extracted, but we mark simply
                                 # it's a diff we wont show for binary files
                                 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
                             chunks = []
                         if chunks and not self.show_full_diff and op == OPS.DEL:
                             # if not full diff mode show deleted file contents
                             # TODO: anderson: if the view is not too big, there is no way
                             # to see the content of the file
                             chunks = []
                         chunks.insert(0, [{
                                               'old_lineno': '',
                                               'new_lineno': '',
                                               'action': Action.CONTEXT,
                                               'line': msg,
                                           } for _op, msg in stats['ops'].iteritems()
                                           if _op not in [MOD_FILENODE]])
                         _files.append({
                             'filename': safe_unicode(head['b_path']),
                             'old_revision': head['a_blob_id'],
                             'new_revision': head['b_blob_id'],
                             'chunks': chunks,
                             'raw_diff': safe_unicode(raw_diff),
                             'operation': op,
                             'stats': stats,
                             'exceeds_limit': exceeds_limit,
                             'is_limited_diff': limited_diff,
                         })
                     sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
                                            OPS.DEL: 2}.get(info['operation'])
                     if not inline_diff:
                         return diff_container(sorted(_files, key=sorter))
                     # highlight inline changes
                     for diff_data in _files:
                         for chunk in diff_data['chunks']:
                             lineiter = iter(chunk)
                             try:
                                 while 1:
                                     line = lineiter.next()
                                     if line['action'] not in (
                                             Action.UNMODIFIED, Action.CONTEXT):
                                         nextline = lineiter.next()
                                         if nextline['action'] in ['unmod', 'context'] or \
                                            nextline['action'] == line['action']:
                                             continue
                                         self.differ(line, nextline)
                             except StopIteration:
                                 pass
                     return diff_container(sorted(_files, key=sorter))
                 def _check_large_diff(self):
                     log.debug('Diff exceeds current diff_limit of %s', self.diff_limit)
                     if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
                         raise DiffLimitExceeded('Diff Limit `%s` Exceeded', self.diff_limit)
                 # FIXME: NEWDIFFS: dan: this replaces _parse_gitdiff
                 def _new_parse_gitdiff(self, inline_diff=True):
                     _files = []
                     # this can be overriden later to a LimitedDiffContainer type
                     diff_container = lambda arg: arg
                     for chunk in self._diff.chunks():
                         head = chunk.header
                         log.debug('parsing diff %r' % head)
                         raw_diff = chunk.raw
                         limited_diff = False
                         exceeds_limit = False
                         op = None
                         stats = {
                             'added': 0,
                             'deleted': 0,
                             'binary': False,
                             'old_mode': None,
                             'new_mode': None,
                             'ops': {},
                         }
                         if head['old_mode']:
                             stats['old_mode'] = head['old_mode']
                         if head['new_mode']:
                             stats['new_mode'] = head['new_mode']
                         if head['b_mode']:
                             stats['new_mode'] = head['b_mode']
                         # delete file
                         if head['deleted_file_mode']:
                             op = OPS.DEL
                             stats['binary'] = True
                             stats['ops'][DEL_FILENODE] = 'deleted file'
                         # new file
                         elif head['new_file_mode']:
                             op = OPS.ADD
                             stats['binary'] = True
                             stats['old_mode'] = None
                             stats['new_mode'] = head['new_file_mode']
                             stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
                         # modify operation, can be copy, rename or chmod
                         else:
                             # CHMOD
                             if head['new_mode'] and head['old_mode']:
                                 op = OPS.MOD
                                 stats['binary'] = True
                                 stats['ops'][CHMOD_FILENODE] = (
                                     'modified file chmod %s => %s' % (
                                         head['old_mode'], head['new_mode']))
                             # RENAME
                             if head['rename_from'] != head['rename_to']:
                                 op = OPS.MOD
                                 stats['binary'] = True
                                 stats['renamed'] = (head['rename_from'], head['rename_to'])
                                 stats['ops'][RENAMED_FILENODE] = (
                                     'file renamed from %s to %s' % (
                                         head['rename_from'], head['rename_to']))
                             # COPY
                             if head.get('copy_from') and head.get('copy_to'):
                                 op = OPS.MOD
                                 stats['binary'] = True
                                 stats['copied'] = (head['copy_from'], head['copy_to'])
                                 stats['ops'][COPIED_FILENODE] = (
                                     'file copied from %s to %s' % (
                                         head['copy_from'], head['copy_to']))
                             # If our new parsed headers didn't match anything fallback to
                             # old style detection
                             if op is None:
                                 if not head['a_file'] and head['b_file']:
                                     op = OPS.ADD
                                     stats['binary'] = True
                                     stats['new_file'] = True
                                     stats['ops'][NEW_FILENODE] = 'new file'
                                 elif head['a_file'] and not head['b_file']:
                                     op = OPS.DEL
                                     stats['binary'] = True
                                     stats['ops'][DEL_FILENODE] = 'deleted file'
                             # it's not ADD not DELETE
                             if op is None:
                                 op = OPS.MOD
                                 stats['binary'] = True
                                 stats['ops'][MOD_FILENODE] = 'modified file'
                         # a real non-binary diff
                         if head['a_file'] or head['b_file']:
                             diff = iter(chunk.diff.splitlines(1))
                             # append each file to the diff size
                             raw_chunk_size = len(raw_diff)
                             exceeds_limit = raw_chunk_size > self.file_limit
                             self.cur_diff_size += raw_chunk_size
                             try:
                                 # Check each file instead of the whole diff.
                                 # Diff will hide big files but still show small ones.
                                 # From the tests big files are fairly safe to be parsed
                                 # but the browser is the bottleneck.
                                 if not self.show_full_diff and exceeds_limit:
                                     log.debug('File `%s` exceeds current file_limit of %s',
                                               safe_unicode(head['b_path']), self.file_limit)
                                     raise DiffLimitExceeded(
                                         'File Limit %s Exceeded', self.file_limit)
                                 self._check_large_diff()
                                 raw_diff, chunks, _stats = self._new_parse_lines(diff)
                                 stats['binary'] = False
                                 stats['added'] = _stats[0]
                                 stats['deleted'] = _stats[1]
                                 # explicit mark that it's a modified file
                                 if op == OPS.MOD:
                                     stats['ops'][MOD_FILENODE] = 'modified file'
                             except DiffLimitExceeded:
                                 diff_container = lambda _diff: \
                                     LimitedDiffContainer(
                                         self.diff_limit, self.cur_diff_size, _diff)
                                 limited_diff = True
                                 chunks = []
                         else:  # GIT format binary patch, or possibly empty diff
                             if head['bin_patch']:
                                 # we have operation already extracted, but we mark simply
                                 # it's a diff we wont show for binary files
                                 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
                             chunks = []
                         # Hide content of deleted node by setting empty chunks
                         if chunks and not self.show_full_diff and op == OPS.DEL:
                             # if not full diff mode show deleted file contents
                             # TODO: anderson: if the view is not too big, there is no way
                             # to see the content of the file
                             chunks = []
                         chunks.insert(
 , [{'old_lineno': '',
                                  'new_lineno': '',
                                  'action': Action.CONTEXT,
                                  'line': msg,
                                  } for _op, msg in stats['ops'].iteritems()
                                 if _op not in [MOD_FILENODE]])
                         original_filename = safe_unicode(head['a_path'])
                         _files.append({
                             'original_filename': original_filename,
                             'filename': safe_unicode(head['b_path']),
                             'old_revision': head['a_blob_id'],
                             'new_revision': head['b_blob_id'],
                             'chunks': chunks,
                             'raw_diff': safe_unicode(raw_diff),
                             'operation': op,
                             'stats': stats,
                             'exceeds_limit': exceeds_limit,
                             'is_limited_diff': limited_diff,
                         })
                     sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
                                            OPS.DEL: 2}.get(info['operation'])
                     return diff_container(sorted(_files, key=sorter))
                 # FIXME: NEWDIFFS: dan: this gets replaced by _new_parse_lines
                 def _parse_lines(self, diff):
                     """
                     Parse the diff an return data for the template.
                     """
                     lineiter = iter(diff)
                     stats = [0, 0]
                     chunks = []
                     raw_diff = []
                     try:
                         line = lineiter.next()
                         while line:
                             raw_diff.append(line)
                             lines = []
                             chunks.append(lines)
                             match = self._chunk_re.match(line)
                             if not match:
                                 break
                             gr = match.groups()
                             (old_line, old_end,
                              new_line, new_end) = [int(x or 1) for x in gr[:-1]]
                             old_line -= 1
                             new_line -= 1
                             context = len(gr) == 5
                             old_end += old_line
                             new_end += new_line
                             if context:
                                 # skip context only if it's first line
                                 if int(gr[0]) > 1:
                                     lines.append({
                                         'old_lineno': '...',
                                         'new_lineno': '...',
                                         'action':     Action.CONTEXT,
                                         'line':       line,
                                     })
                             line = lineiter.next()
                             while old_line < old_end or new_line < new_end:
                                 command = ' '
                                 if line:
                                     command = line[0]
                                 affects_old = affects_new = False
                                 # ignore those if we don't expect them
                                 if command in '#@':
                                     continue
                                 elif command == '+':
                                     affects_new = True
                                     action = Action.ADD
                                     stats[0] += 1
                                 elif command == '-':
                                     affects_old = True
                                     action = Action.DELETE
                                     stats[1] += 1
                                 else:
                                     affects_old = affects_new = True
                                     action = Action.UNMODIFIED
                                 if not self._newline_marker.match(line):
                                     old_line += affects_old
                                     new_line += affects_new
                                     lines.append({
                                         'old_lineno':   affects_old and old_line or '',
                                         'new_lineno':   affects_new and new_line or '',
                                         'action':       action,
                                         'line':         self._clean_line(line, command)
                                     })
                                     raw_diff.append(line)
                                 line = lineiter.next()
                                 if self._newline_marker.match(line):
                                     # we need to append to lines, since this is not
                                     # counted in the line specs of diff
                                     lines.append({
                                         'old_lineno':   '...',
                                         'new_lineno':   '...',
                                         'action':       Action.CONTEXT,
                                         'line':         self._clean_line(line, command)
                                     })
                     except StopIteration:
                         pass
                     return ''.join(raw_diff), chunks, stats
                 # FIXME: NEWDIFFS: dan: this replaces _parse_lines
                 def _new_parse_lines(self, diff_iter):
                     """
                     Parse the diff an return data for the template.
                     """
                     stats = [0, 0]
                     chunks = []
                     raw_diff = []
                     diff_iter = imap(lambda s: safe_unicode(s), diff_iter)
                     try:
                         line = diff_iter.next()
                         while line:
                             raw_diff.append(line)
                             match = self._chunk_re.match(line)
                             if not match:
                                 break
                             gr = match.groups()
                             (old_line, old_end,
                              new_line, new_end) = [int(x or 1) for x in gr[:-1]]
                             lines = []
                             hunk = {
                                 'section_header': gr[-1],
                                 'source_start': old_line,
                                 'source_length': old_end,
                                 'target_start': new_line,
                                 'target_length': new_end,
                                 'lines': lines,
                             }
                             chunks.append(hunk)
                             old_line -= 1
                             new_line -= 1
                             context = len(gr) == 5
                             old_end += old_line
                             new_end += new_line
                             line = diff_iter.next()
                             while old_line < old_end or new_line < new_end:
                                 command = ' '
                                 if line:
                                     command = line[0]
                                 affects_old = affects_new = False
                                 # ignore those if we don't expect them
                                 if command in '#@':
                                     continue
                                 elif command == '+':
                                     affects_new = True
                                     action = Action.ADD
                                     stats[0] += 1
                                 elif command == '-':
                                     affects_old = True
                                     action = Action.DELETE
                                     stats[1] += 1
                                 else:
                                     affects_old = affects_new = True
                                     action = Action.UNMODIFIED
                                 if not self._newline_marker.match(line):
                                     old_line += affects_old
                                     new_line += affects_new
                                     lines.append({
                                         'old_lineno':   affects_old and old_line or '',
                                         'new_lineno':   affects_new and new_line or '',
                                         'action':       action,
                                         'line':         self._clean_line(line, command)
                                     })
-                                    raw_diff.append(line)
+                                raw_diff.append(line)
                                 line = diff_iter.next()
                                 if self._newline_marker.match(line):
                                     # we need to append to lines, since this is not
                                     # counted in the line specs of diff
                                     if affects_old:
                                         action = Action.OLD_NO_NL
                                     elif affects_new:
                                         action = Action.NEW_NO_NL
                                     else:
                                         raise Exception('invalid context for no newline')
                                     lines.append({
                                         'old_lineno':   None,
                                         'new_lineno':   None,
                                         'action':       action,
                                         'line':         self._clean_line(line, command)
                                     })
                     except StopIteration:
                         pass
                     return ''.join(raw_diff), chunks, stats
                 def _safe_id(self, idstring):
                     """Make a string safe for including in an id attribute.
                     The HTML spec says that id attributes 'must begin with
                     a letter ([A-Za-z]) and may be followed by any number
                     of letters, digits ([0-9]), hyphens ("-"), underscores
                     ("_"), colons (":"), and periods (".")'. These regexps
                     are slightly over-zealous, in that they remove colons
                     and periods unnecessarily.
                     Whitespace is transformed into underscores, and then
                     anything which is not a hyphen or a character that
                     matches \w (alphanumerics and underscore) is removed.
                     """
                     # Transform all whitespace to underscore
                     idstring = re.sub(r'\s', "_", '%s' % idstring)
                     # Remove everything that is not a hyphen or a member of \w
                     idstring = re.sub(r'(?!-)\W', "", idstring).lower()
                     return idstring
                 def prepare(self, inline_diff=True):
                     """
                     Prepare the passed udiff for HTML rendering.
                     :return: A list of dicts with diff information.
                     """
                     parsed = self._parser(inline_diff=inline_diff)
                     self.parsed = True
                     self.parsed_diff = parsed
                     return parsed
                 def as_raw(self, diff_lines=None):
                     """
                     Returns raw diff as a byte string
                     """
                     return self._diff.raw
                 def as_html(self, table_class='code-difftable', line_class='line',
                             old_lineno_class='lineno old', new_lineno_class='lineno new',
                             code_class='code', enable_comments=False, parsed_lines=None):
                     """
                     Return given diff as html table with customized css classes
                     """
                     def _link_to_if(condition, label, url):
                         """
                         Generates a link if condition is meet or just the label if not.
                         """
                         if condition:
                             return '''<a href="%(url)s" class="tooltip"
                             title="%(title)s">%(label)s</a>''' % {
                                 'title': _('Click to select line'),
                                 'url': url,
                                 'label': label
                             }
                         else:
                             return label
                     if not self.parsed:
                         self.prepare()
                     diff_lines = self.parsed_diff
                     if parsed_lines:
                         diff_lines = parsed_lines
                     _html_empty = True
                     _html = []
                     _html.append('''<table class="%(table_class)s">\n''' % {
                         'table_class': table_class
                     })
                     for diff in diff_lines:
                         for line in diff['chunks']:
                             _html_empty = False
                             for change in line:
                                 _html.append('''<tr class="%(lc)s %(action)s">\n''' % {
                                     'lc': line_class,
                                     'action': change['action']
                                 })
                                 anchor_old_id = ''
                                 anchor_new_id = ''
                                 anchor_old = "%(filename)s_o%(oldline_no)s" % {
                                     'filename': self._safe_id(diff['filename']),
                                     'oldline_no': change['old_lineno']
                                 }
                                 anchor_new = "%(filename)s_n%(oldline_no)s" % {
                                     'filename': self._safe_id(diff['filename']),
                                     'oldline_no': change['new_lineno']
                                 }
                                 cond_old = (change['old_lineno'] != '...' and
                                             change['old_lineno'])
                                 cond_new = (change['new_lineno'] != '...' and
                                             change['new_lineno'])
                                 if cond_old:
                                     anchor_old_id = 'id="%s"' % anchor_old
                                 if cond_new:
                                     anchor_new_id = 'id="%s"' % anchor_new
                                 if change['action'] != Action.CONTEXT:
                                     anchor_link = True
                                 else:
                                     anchor_link = False
                                 ###########################################################
                                 # COMMENT ICONS
                                 ###########################################################
                                 _html.append('''\t<td class="add-comment-line"><span class="add-comment-content">''')
                                 if enable_comments and change['action'] != Action.CONTEXT:
                                     _html.append('''<a href="#"><span class="icon-comment-add"></span></a>''')
                                 _html.append('''</span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>\n''')
                                 ###########################################################
                                 # OLD LINE NUMBER
                                 ###########################################################
                                 _html.append('''\t<td %(a_id)s class="%(olc)s">''' % {
                                     'a_id': anchor_old_id,
                                     'olc': old_lineno_class
                                 })
                                 _html.append('''%(link)s''' % {
                                     'link': _link_to_if(anchor_link, change['old_lineno'],
                                                         '#%s' % anchor_old)
                                 })
                                 _html.append('''</td>\n''')
                                 ###########################################################
                                 # NEW LINE NUMBER
                                 ###########################################################
                                 _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
                                     'a_id': anchor_new_id,
                                     'nlc': new_lineno_class
                                 })
                                 _html.append('''%(link)s''' % {
                                     'link': _link_to_if(anchor_link, change['new_lineno'],
                                                         '#%s' % anchor_new)
                                 })
                                 _html.append('''</td>\n''')
                                 ###########################################################
                                 # CODE
                                 ###########################################################
                                 code_classes = [code_class]
                                 if (not enable_comments or
                                         change['action'] == Action.CONTEXT):
                                     code_classes.append('no-comment')
                                 _html.append('\t<td class="%s">' % ' '.join(code_classes))
                                 _html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {
                                     'code': change['line']
                                 })
                                 _html.append('''\t</td>''')
                                 _html.append('''\n</tr>\n''')
                     _html.append('''</table>''')
                     if _html_empty:
                         return None
                     return ''.join(_html)
                 def stat(self):
                     """
                     Returns tuple of added, and removed lines for this instance
                     """
                     return self.adds, self.removes
                 def get_context_of_line(
                         self, path, diff_line=None, context_before=3, context_after=3):
                     """
                     Returns the context lines for the specified diff line.
                     :type diff_line: :class:`DiffLineNumber`
                     """
                     assert self.parsed, "DiffProcessor is not initialized."
                     if None not in diff_line:
                         raise ValueError(
                             "Cannot specify both line numbers: {}".format(diff_line))
                     file_diff = self._get_file_diff(path)
                     chunk, idx = self._find_chunk_line_index(file_diff, diff_line)
                     first_line_to_include = max(idx - context_before, 0)
                     first_line_after_context = idx + context_after + 1
                     context_lines = chunk[first_line_to_include:first_line_after_context]
                     line_contents = [
                         _context_line(line) for line in context_lines
                         if _is_diff_content(line)]
                     # TODO: johbo: Interim fixup, the diff chunks drop the final newline.
                     # Once they are fixed, we can drop this line here.
                     if line_contents:
                         line_contents[-1] = (
                             line_contents[-1][0], line_contents[-1][1].rstrip('\n') + '\n')
                     return line_contents
                 def find_context(self, path, context, offset=0):
                     """
                     Finds the given `context` inside of the diff.
                     Use the parameter `offset` to specify which offset the target line has
                     inside of the given `context`. This way the correct diff line will be
                     returned.
                     :param offset: Shall be used to specify the offset of the main line
                         within the given `context`.
                     """
                     if offset < 0 or offset >= len(context):
                         raise ValueError(
                             "Only positive values up to the length of the context "
                             "minus one are allowed.")
                     matches = []
                     file_diff = self._get_file_diff(path)
                     for chunk in file_diff['chunks']:
                         context_iter = iter(context)
                         for line_idx, line in enumerate(chunk):
                             try:
                                 if _context_line(line) == context_iter.next():
                                     continue
                             except StopIteration:
                                 matches.append((line_idx, chunk))
                             context_iter = iter(context)
                     # Increment position and triger StopIteration
                     # if we had a match at the end
                     line_idx += 1
                     try:
                         context_iter.next()
                     except StopIteration:
                         matches.append((line_idx, chunk))
                     effective_offset = len(context) - offset
                     found_at_diff_lines = [
                         _line_to_diff_line_number(chunk[idx - effective_offset])
                         for idx, chunk in matches]
                     return found_at_diff_lines
                 def _get_file_diff(self, path):
                     for file_diff in self.parsed_diff:
                         if file_diff['filename'] == path:
                             break
                     else:
                         raise FileNotInDiffException("File {} not in diff".format(path))
                     return file_diff
                 def _find_chunk_line_index(self, file_diff, diff_line):
                     for chunk in file_diff['chunks']:
                         for idx, line in enumerate(chunk):
                             if line['old_lineno'] == diff_line.old:
                                 return chunk, idx
                             if line['new_lineno'] == diff_line.new:
                                 return chunk, idx
                     raise LineNotInDiffException(
                         "The line {} is not part of the diff.".format(diff_line))
             def _is_diff_content(line):
                 return line['action'] in (
                     Action.UNMODIFIED, Action.ADD, Action.DELETE)
             def _context_line(line):
                 return (line['action'], line['line'])
             DiffLineNumber = collections.namedtuple('DiffLineNumber', ['old', 'new'])
             def _line_to_diff_line_number(line):
                 new_line_no = line['new_lineno'] or None
                 old_line_no = line['old_lineno'] or None
                 return DiffLineNumber(old=old_line_no, new=new_line_no)
             class FileNotInDiffException(Exception):
                 """
                 Raised when the context for a missing file is requested.
                 If you request the context for a line in a file which is not part of the
                 given diff, then this exception is raised.
                 """
             class LineNotInDiffException(Exception):
                 """
                 Raised when the context for a missing line is requested.
                 If you request the context for a line in a file and this line is not
                 part of the given diff, then this exception is raised.
                 """
             class DiffLimitExceeded(Exception):
                 pass

rhodecode/tests/lib/test_diffs.py

0 +8 0

             # -*- coding: utf-8 -*-
             # Copyright (C) 2010-2017 RhodeCode GmbH
             #
             # This program is free software: you can redistribute it and/or modify
             # it under the terms of the GNU Affero General Public License, version 3
             # (only), as published by the Free Software Foundation.
             #
             # This program is distributed in the hope that it will be useful,
             # but WITHOUT ANY WARRANTY; without even the implied warranty of
             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
             # GNU General Public License for more details.
             #
             # You should have received a copy of the GNU Affero General Public License
             # along with this program.  If not, see <http://www.gnu.org/licenses/>.
             #
             # This program is dual-licensed. If you wish to learn more about the
             # RhodeCode Enterprise Edition, including its added features, Support services,
             # and proprietary license terms, please see https://rhodecode.com/licenses/
             import textwrap
             import pytest
             from rhodecode.lib.diffs import (
                 DiffProcessor, wrapped_diff,
                 NEW_FILENODE, DEL_FILENODE, MOD_FILENODE, RENAMED_FILENODE,
                 CHMOD_FILENODE, BIN_FILENODE, COPIED_FILENODE)
             from rhodecode.tests.fixture import Fixture
             from rhodecode.lib.vcs.backends.git.repository import GitDiff
             from rhodecode.lib.vcs.backends.hg.repository import MercurialDiff
             from rhodecode.lib.vcs.backends.svn.repository import SubversionDiff
             fixture = Fixture()
             def test_wrapped_diff_limited_file_diff(vcsbackend_random):
                 vcsbackend = vcsbackend_random
                 repo = vcsbackend.create_repo()
                 vcsbackend.add_file(repo, 'a_file', content="line 1\nline 2\nline3\n")
                 commit = repo.get_commit()
                 file_node = commit.get_node('a_file')
                 # Only limit the file diff to trigger the code path
                 result = wrapped_diff(
                     None, file_node, diff_limit=10000, file_limit=1)
                 data = result[5]
                 # Verify that the limits were applied
                 assert data['exceeds_limit'] is True
                 assert data['is_limited_diff'] is True
             def test_diffprocessor_as_html_with_comments():
                 raw_diff = textwrap.dedent('''
                     diff --git a/setup.py b/setup.py
                     index 5b36422..cfd698e 100755
                     --- a/setup.py
                     +++ b/setup.py
                     @@ -2,7 +2,7 @@
                      #!/usr/bin/python
                      # Setup file for X
                      # Copyright (C) No one
                     -
                     +x
                      try:
                          from setuptools import setup, Extension
                      except ImportError:
                 ''')
                 diff = GitDiff(raw_diff)
                 processor = DiffProcessor(diff)
                 processor.prepare()
                 # Note that the cell with the context in line 5 (in the html) has the
                 # no-comment class, which will prevent the add comment icon to be displayed.
                 expected_html = textwrap.dedent('''
                     <table class="code-difftable">
                     <tr class="line context">
                         <td class="add-comment-line"><span class="add-comment-content"></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
                         <td  class="lineno old">...</td>
                         <td  class="lineno new">...</td>
                         <td class="code no-comment">
                             <pre>@@ -2,7 +2,7 @@
                     </pre>
                         </td>
                     </tr>
                     <tr class="line unmod">
                         <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
                         <td id="setuppy_o2" class="lineno old"><a href="#setuppy_o2" class="tooltip"
                                     title="Click to select line">2</a></td>
                         <td id="setuppy_n2" class="lineno new"><a href="#setuppy_n2" class="tooltip"
                                     title="Click to select line">2</a></td>
                         <td class="code">
                             <pre>#!/usr/bin/python
                     </pre>
                         </td>
                     </tr>
                     <tr class="line unmod">
                         <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
                         <td id="setuppy_o3" class="lineno old"><a href="#setuppy_o3" class="tooltip"
                                     title="Click to select line">3</a></td>
                         <td id="setuppy_n3" class="lineno new"><a href="#setuppy_n3" class="tooltip"
                                     title="Click to select line">3</a></td>
                         <td class="code">
                             <pre># Setup file for X
                     </pre>
                         </td>
                     </tr>
                     <tr class="line unmod">
                         <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
                         <td id="setuppy_o4" class="lineno old"><a href="#setuppy_o4" class="tooltip"
                                     title="Click to select line">4</a></td>
                         <td id="setuppy_n4" class="lineno new"><a href="#setuppy_n4" class="tooltip"
                                     title="Click to select line">4</a></td>
                         <td class="code">
                             <pre># Copyright (C) No one
                     </pre>
                         </td>
                     </tr>
                     <tr class="line del">
                         <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
                         <td id="setuppy_o5" class="lineno old"><a href="#setuppy_o5" class="tooltip"
                                     title="Click to select line">5</a></td>
                         <td  class="lineno new"><a href="#setuppy_n" class="tooltip"
                                     title="Click to select line"></a></td>
                         <td class="code">
                             <pre>
                     </pre>
                         </td>
                     </tr>
                     <tr class="line add">
                         <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
                         <td  class="lineno old"><a href="#setuppy_o" class="tooltip"
                                     title="Click to select line"></a></td>
                         <td id="setuppy_n5" class="lineno new"><a href="#setuppy_n5" class="tooltip"
                                     title="Click to select line">5</a></td>
                         <td class="code">
                             <pre><ins>x</ins>
                     </pre>
                         </td>
                     </tr>
                     <tr class="line unmod">
                         <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
                         <td id="setuppy_o6" class="lineno old"><a href="#setuppy_o6" class="tooltip"
                                     title="Click to select line">6</a></td>
                         <td id="setuppy_n6" class="lineno new"><a href="#setuppy_n6" class="tooltip"
                                     title="Click to select line">6</a></td>
                         <td class="code">
                             <pre>try:
                     </pre>
                         </td>
                     </tr>
                     <tr class="line unmod">
                         <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
                         <td id="setuppy_o7" class="lineno old"><a href="#setuppy_o7" class="tooltip"
                                     title="Click to select line">7</a></td>
                         <td id="setuppy_n7" class="lineno new"><a href="#setuppy_n7" class="tooltip"
                                     title="Click to select line">7</a></td>
                         <td class="code">
                             <pre>    from setuptools import setup, Extension
                     </pre>
                         </td>
                     </tr>
                     <tr class="line unmod">
                         <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
                         <td id="setuppy_o8" class="lineno old"><a href="#setuppy_o8" class="tooltip"
                                     title="Click to select line">8</a></td>
                         <td id="setuppy_n8" class="lineno new"><a href="#setuppy_n8" class="tooltip"
                                     title="Click to select line">8</a></td>
                         <td class="code">
                             <pre>except ImportError:
                     </pre>
                         </td>
                     </tr>
                     </table>
                 ''').strip()
                 html = processor.as_html(enable_comments=True).replace('\t', '    ')
                 assert html == expected_html
             class TestMixedFilenameEncodings:
                 @pytest.fixture(scope="class")
                 def raw_diff(self):
                     return fixture.load_resource(
                         'hg_diff_mixed_filename_encodings.diff')
                 @pytest.fixture
                 def processor(self, raw_diff):
                     diff = MercurialDiff(raw_diff)
                     processor = DiffProcessor(diff)
                     return processor
                 def test_filenames_are_decoded_to_unicode(self, processor):
                     diff_data = processor.prepare()
                     filenames = [item['filename'] for item in diff_data]
                     assert filenames == [
                         u'späcial-utf8.txt', u'sp�cial-cp1252.txt', u'sp�cial-latin1.txt']
                 def test_raw_diff_is_decoded_to_unicode(self, processor):
                     diff_data = processor.prepare()
                     raw_diffs = [item['raw_diff'] for item in diff_data]
                     new_file_message = u'\nnew file mode 100644\n'
                     expected_raw_diffs = [
                         u' a/späcial-utf8.txt b/späcial-utf8.txt' + new_file_message,
                         u' a/sp�cial-cp1252.txt b/sp�cial-cp1252.txt' + new_file_message,
                         u' a/sp�cial-latin1.txt b/sp�cial-latin1.txt' + new_file_message]
                     assert raw_diffs == expected_raw_diffs
                 def test_as_raw_preserves_the_encoding(self, processor, raw_diff):
                     assert processor.as_raw() == raw_diff
             # TODO: mikhail: format the following data structure properly
             DIFF_FIXTURES = [
                 ('hg',
                  'hg_diff_add_single_binary_file.diff',
                  [('US Warszawa.jpg', 'A',
                    {'added': 0,
                     'deleted': 0,
                     'binary': True,
                     'ops': {NEW_FILENODE: 'new file 100755',
                             BIN_FILENODE: 'binary diff hidden'}}),
                   ]),
                 ('hg',
                  'hg_diff_mod_single_binary_file.diff',
                  [('US Warszawa.jpg', 'M',
                    {'added': 0,
                     'deleted': 0,
                     'binary': True,
                     'ops': {MOD_FILENODE: 'modified file',
                             BIN_FILENODE: 'binary diff hidden'}}),
                   ]),
                 ('hg',
                  'hg_diff_mod_single_file_and_rename_and_chmod.diff',
                  [('README', 'M',
                    {'added': 3,
                     'deleted': 0,
                     'binary': False,
                     'ops': {MOD_FILENODE: 'modified file',
                             RENAMED_FILENODE: 'file renamed from README.rst to README',
                             CHMOD_FILENODE: 'modified file chmod 100755 => 100644'}}),
                   ]),
                 ('hg',
+                 'hg_diff_no_newline.diff',
+                 [('server.properties', 'M',
+                   {'added': 2,
+                    'deleted': 1,
+                    'binary': False,
+                    'ops': {MOD_FILENODE: 'modified file'}}),
+                  ]),
+                ('hg',
                  'hg_diff_mod_file_and_rename.diff',
                  [('README.rst', 'M',
                    {'added': 3,
                     'deleted': 0,
                     'binary': False,
                     'ops': {MOD_FILENODE: 'modified file',
                             RENAMED_FILENODE: 'file renamed from README to README.rst'}}),
                   ]),
                 ('hg',
                  'hg_diff_del_single_binary_file.diff',
                  [('US Warszawa.jpg', 'D',
                    {'added': 0,
                     'deleted': 0,
                     'binary': True,
                     'ops': {DEL_FILENODE: 'deleted file',
                             BIN_FILENODE: 'binary diff hidden'}}),
                   ]),
                 ('hg',
                  'hg_diff_chmod_and_mod_single_binary_file.diff',
                  [('gravatar.png', 'M',
                    {'added': 0,
                     'deleted': 0,
                     'binary': True,
                     'ops': {CHMOD_FILENODE: 'modified file chmod 100644 => 100755',
                             BIN_FILENODE: 'binary diff hidden'}}),
                   ]),
                 ('hg',
                  'hg_diff_chmod.diff',
                  [('file', 'M',
                    {'added': 0,
                     'deleted': 0,
                     'binary': True,
                     'ops': {CHMOD_FILENODE: 'modified file chmod 100755 => 100644'}}),
                   ]),
                 ('hg',
                  'hg_diff_rename_file.diff',
                  [('file_renamed', 'M',
                    {'added': 0,
                     'deleted': 0,
                     'binary': True,
                     'ops': {RENAMED_FILENODE: 'file renamed from file to file_renamed'}}),
                   ]),
                 ('hg',
                  'hg_diff_rename_and_chmod_file.diff',
                  [('README', 'M',
                    {'added': 0,
                     'deleted': 0,
                     'binary': True,
                     'ops': {CHMOD_FILENODE: 'modified file chmod 100644 => 100755',
                             RENAMED_FILENODE: 'file renamed from README.rst to README'}}),
                   ]),
                 ('hg',
                  'hg_diff_binary_and_normal.diff',
                  [('img/baseline-10px.png', 'A',
                    {'added': 0,
                     'deleted': 0,
                     'binary': True,
                     'ops': {NEW_FILENODE: 'new file 100644',
                             BIN_FILENODE: 'binary diff hidden'}}),
                   ('js/jquery/hashgrid.js', 'A',
                    {'added': 340,
                     'deleted': 0,
                     'binary': False,
                     'ops': {NEW_FILENODE: 'new file 100755'}}),
                   ('index.html', 'M',
                    {'added': 3,
                     'deleted': 2,
                     'binary': False,
                     'ops': {MOD_FILENODE: 'modified file'}}),
                   ('less/docs.less', 'M',
                    {'added': 34,
                     'deleted': 0,
                     'binary': False,
                     'ops': {MOD_FILENODE: 'modified file'}}),
                   ('less/scaffolding.less', 'M',
                    {'added': 1,
                     'deleted': 3,
                     'binary': False,
                     'ops': {MOD_FILENODE: 'modified file'}}),
                   ('readme.markdown', 'M',
                    {'added': 1,
                     'deleted': 10,
                     'binary': False,
                     'ops': {MOD_FILENODE: 'modified file'}}),
                   ('img/baseline-20px.png', 'D',
                    {'added': 0,
                     'deleted': 0,
                     'binary': True,
                     'ops': {DEL_FILENODE: 'deleted file',
                             BIN_FILENODE: 'binary diff hidden'}}),
                   ('js/global.js', 'D',
                    {'added': 0,
                     'deleted': 75,
                     'binary': False,
                     'ops': {DEL_FILENODE: 'deleted file'}})
                   ]),
                 ('git',
                  'git_diff_chmod.diff',
                  [('work-horus.xls', 'M',
                    {'added': 0,
                     'deleted': 0,
                     'binary': True,
                     'ops': {CHMOD_FILENODE: 'modified file chmod 100644 => 100755'}})
                   ]),
                 ('git',
                  'git_diff_rename_file.diff',
                  [('file.xls', 'M',
                    {'added': 0,
                     'deleted': 0,
                     'binary': True,
                     'ops': {
                         RENAMED_FILENODE: 'file renamed from work-horus.xls to file.xls'}})
                   ]),
                 ('git',
                  'git_diff_mod_single_binary_file.diff',
                  [('US Warszawa.jpg', 'M',
                    {'added': 0,
                     'deleted': 0,
                     'binary': True,
                     'ops': {MOD_FILENODE: 'modified file',
                             BIN_FILENODE: 'binary diff hidden'}})
                   ]),
                 ('git',
                  'git_diff_binary_and_normal.diff',
                  [('img/baseline-10px.png', 'A',
                    {'added': 0,
                     'deleted': 0,
                     'binary': True,
                     'ops': {NEW_FILENODE: 'new file 100644',
                             BIN_FILENODE: 'binary diff hidden'}}),
                   ('js/jquery/hashgrid.js', 'A',
                    {'added': 340,
                     'deleted': 0,
                     'binary': False,
                     'ops': {NEW_FILENODE: 'new file 100755'}}),
                   ('index.html', 'M',
                    {'added': 3,
                     'deleted': 2,
                     'binary': False,
                     'ops': {MOD_FILENODE: 'modified file'}}),
                   ('less/docs.less', 'M',
                    {'added': 34,
                     'deleted': 0,
                     'binary': False,
                     'ops': {MOD_FILENODE: 'modified file'}}),
                   ('less/scaffolding.less', 'M',
                    {'added': 1,
                     'deleted': 3,
                     'binary': False,
                     'ops': {MOD_FILENODE: 'modified file'}}),
                   ('readme.markdown', 'M',
                    {'added': 1,
                     'deleted': 10,
                     'binary': False,
                     'ops': {MOD_FILENODE: 'modified file'}}),
                   ('img/baseline-20px.png', 'D',
                    {'added': 0,
                     'deleted': 0,
                     'binary': True,
                     'ops': {DEL_FILENODE: 'deleted file',
                             BIN_FILENODE: 'binary diff hidden'}}),
                   ('js/global.js', 'D',
                    {'added': 0,
                     'deleted': 75,
                     'binary': False,
                     'ops': {DEL_FILENODE: 'deleted file'}}),
                   ]),
                 ('hg',
                  'diff_with_diff_data.diff',
                  [('vcs/backends/base.py', 'M',
                    {'added': 18,
                     'deleted': 2,
                     'binary': False,
                     'ops': {MOD_FILENODE: 'modified file'}}),
                   ('vcs/backends/git/repository.py', 'M',
                    {'added': 46,
                     'deleted': 15,
                     'binary': False,
                     'ops': {MOD_FILENODE: 'modified file'}}),
                   ('vcs/backends/hg.py', 'M',
                    {'added': 22,
                     'deleted': 3,
                     'binary': False,
                     'ops': {MOD_FILENODE: 'modified file'}}),
                   ('vcs/tests/test_git.py', 'M',
                    {'added': 5,
                     'deleted': 5,
                     'binary': False,
                     'ops': {MOD_FILENODE: 'modified file'}}),
                   ('vcs/tests/test_repository.py', 'M',
                    {'added': 174,
                     'deleted': 2,
                     'binary': False,
                     'ops': {MOD_FILENODE: 'modified file'}}),
                   ]),
                 ('hg',
                  'hg_diff_copy_file.diff',
                  [('file2', 'M',
                    {'added': 0,
                     'deleted': 0,
                     'binary': True,
                     'ops': {COPIED_FILENODE: 'file copied from file1 to file2'}}),
                   ]),
                 ('hg',
                  'hg_diff_copy_and_modify_file.diff',
                  [('file3', 'M',
                    {'added': 1,
                     'deleted': 0,
                     'binary': False,
                     'ops': {COPIED_FILENODE: 'file copied from file2 to file3',
                             MOD_FILENODE: 'modified file'}}),
                   ]),
                 ('hg',
                  'hg_diff_copy_and_chmod_file.diff',
                  [('file4', 'M',
                    {'added': 0,
                     'deleted': 0,
                     'binary': True,
                     'ops': {COPIED_FILENODE: 'file copied from file3 to file4',
                             CHMOD_FILENODE: 'modified file chmod 100644 => 100755'}}),
                   ]),
                 ('hg',
                  'hg_diff_copy_chmod_and_edit_file.diff',
                  [('file5', 'M',
                    {'added': 2,
                     'deleted': 1,
                     'binary': False,
                     'ops': {COPIED_FILENODE: 'file copied from file4 to file5',
                             CHMOD_FILENODE: 'modified file chmod 100755 => 100644',
                             MOD_FILENODE: 'modified file'}})]),
                 # Diffs to validate rename and copy file with space in its name
                 ('git',
                  'git_diff_rename_file_with_spaces.diff',
                  [('file_with_  two spaces.txt', 'M',
                      {'added': 0,
                       'deleted': 0,
                       'binary': True,
                       'ops': {
                           RENAMED_FILENODE: (
                               'file renamed from file_with_ spaces.txt to file_with_ '
                               ' two spaces.txt')}
                       }), ]),
                 ('hg',
                  'hg_diff_rename_file_with_spaces.diff',
                  [('file_changed _.txt', 'M',
                      {'added': 0,
                       'deleted': 0,
                       'binary': True,
                       'ops': {
                           RENAMED_FILENODE: (
                               'file renamed from file_ with update.txt to file_changed'
                               ' _.txt')}
                       }), ]),
                 ('hg',
                  'hg_diff_copy_file_with_spaces.diff',
                  [('file_copied_ with  spaces.txt', 'M',
                      {'added': 0,
                       'deleted': 0,
                       'binary': True,
                       'ops': {
                           COPIED_FILENODE: (
                               'file copied from file_changed_without_spaces.txt to'
                               ' file_copied_ with  spaces.txt')}
                       }),
                   ]),
                 # special signs from git
                 ('git',
                  'git_diff_binary_special_files.diff',
                  [('css/_Icon\\r', 'A',
                      {'added': 0,
                       'deleted': 0,
                       'binary': True,
                       'ops': {NEW_FILENODE: 'new file 100644',
                               BIN_FILENODE: 'binary diff hidden'}
                       }),
                   ]),
                 ('git',
                  'git_diff_binary_special_files_2.diff',
                  [('css/Icon\\r', 'A',
                      {'added': 0,
                       'deleted': 0,
                       'binary': True,
                       'ops': {NEW_FILENODE: 'new file 100644', }
                       }),
                   ]),
                 ('svn',
                  'svn_diff_binary_add_file.diff',
                  [('intl.dll', 'A',
                    {'added': 0,
                     'deleted': 0,
                     'binary': False,
                     'ops': {NEW_FILENODE: 'new file 10644',
                             #TODO(Marcink): depends on binary detection on svn patches
                             # BIN_FILENODE: 'binary diff hidden'
                             }
                     }),
                   ]),
                 ('svn',
                  'svn_diff_multiple_changes.diff',
                  [('trunk/doc/images/SettingsOverlay.png', 'M',
                    {'added': 0,
                     'deleted': 0,
                     'binary': False,
                     'ops': {MOD_FILENODE: 'modified file',
                             #TODO(Marcink): depends on binary detection on svn patches
                             # BIN_FILENODE: 'binary diff hidden'
                             }
                     }),
                   ('trunk/doc/source/de/tsvn_ch04.xml', 'M',
                    {'added': 89,
                     'deleted': 34,
                     'binary': False,
                     'ops': {MOD_FILENODE: 'modified file'}
                     }),
                   ('trunk/doc/source/en/tsvn_ch04.xml', 'M',
                    {'added': 66,
                     'deleted': 21,
                     'binary': False,
                     'ops': {MOD_FILENODE: 'modified file'}
                     }),
                   ('trunk/src/Changelog.txt', 'M',
                    {'added': 2,
                     'deleted': 0,
                     'binary': False,
                     'ops': {MOD_FILENODE: 'modified file'}
                     }),
                   ('trunk/src/Resources/TortoiseProcENG.rc', 'M',
                    {'added': 19,
                     'deleted': 13,
                     'binary': False,
                     'ops': {MOD_FILENODE: 'modified file'}
                     }),
                   ('trunk/src/TortoiseProc/SetOverlayPage.cpp', 'M',
                    {'added': 16,
                     'deleted': 1,
                     'binary': False,
                     'ops': {MOD_FILENODE: 'modified file'}
                     }),
                   ('trunk/src/TortoiseProc/SetOverlayPage.h', 'M',
                    {'added': 3,
                     'deleted': 0,
                     'binary': False,
                     'ops': {MOD_FILENODE: 'modified file'}
                     }),
                   ('trunk/src/TortoiseProc/resource.h', 'M',
                    {'added': 2,
                     'deleted': 0,
                     'binary': False,
                     'ops': {MOD_FILENODE: 'modified file'}
                     }),
                   ('trunk/src/TortoiseShell/ShellCache.h', 'M',
                    {'added': 50,
                     'deleted': 1,
                     'binary': False,
                     'ops': {MOD_FILENODE: 'modified file'}
                     }),
                   ]),
                 # TODO: mikhail: do we still need this?
                 # (
                 #     'hg',
                 #     'large_diff.diff',
                 #     [
                 #         ('.hgignore', 'A', {
                 #             'deleted': 0, 'binary': False, 'added': 3, 'ops': {
                 #                 1: 'new file 100644'}}),
                 #         (
                 #             'MANIFEST.in', 'A',
                 #             {'deleted': 0, 'binary': False, 'added': 3, 'ops': {
                 #                 1: 'new file 100644'}}),
                 #         (
                 #             'README.txt', 'A',
                 #             {'deleted': 0, 'binary': False, 'added': 19, 'ops': {
                 #                 1: 'new file 100644'}}),
                 #         (
                 #             'development.ini', 'A', {
                 #                 'deleted': 0, 'binary': False, 'added': 116, 'ops': {
                 #                     1: 'new file 100644'}}),
                 #         (
                 #             'docs/index.txt', 'A', {
                 #                 'deleted': 0, 'binary': False, 'added': 19, 'ops': {
                 #                     1: 'new file 100644'}}),
                 #         (
                 #             'ez_setup.py', 'A', {
                 #                 'deleted': 0, 'binary': False, 'added': 276, 'ops': {
                 #                     1: 'new file 100644'}}),
                 #         (
                 #             'hgapp.py', 'A', {
                 #                 'deleted': 0, 'binary': False, 'added': 26, 'ops': {
                 #                     1: 'new file 100644'}}),
                 #         (
                 #             'hgwebdir.config', 'A', {
                 #                 'deleted': 0, 'binary': False, 'added': 21, 'ops': {
                 #                     1: 'new file 100644'}}),
                 #         (
                 #             'pylons_app.egg-info/PKG-INFO', 'A', {
                 #                 'deleted': 0, 'binary': False, 'added': 10, 'ops': {
                 #                     1: 'new file 100644'}}),
                 #         (
                 #             'pylons_app.egg-info/SOURCES.txt', 'A', {
                 #                 'deleted': 0, 'binary': False, 'added': 33, 'ops': {
                 #                     1: 'new file 100644'}}),
                 #         (
                 #             'pylons_app.egg-info/dependency_links.txt', 'A', {
                 #                 'deleted': 0, 'binary': False, 'added': 1, 'ops': {
                 #                     1: 'new file 100644'}}),
                 #     ]
                 # ),
             ]
             DIFF_FIXTURES_WITH_CONTENT = [
                 (
                     'hg', 'hg_diff_single_file_change_newline.diff',
                     [
                         (
                             'file_b',  # filename
                             'A',  # change
                             {  # stats
                                'added': 1,
                                'deleted': 0,
                                'binary': False,
                                'ops': {NEW_FILENODE: 'new file 100644', }
                             },
                             '@@ -0,0 +1 @@\n+test_content b\n'  # diff
                         ),
                     ],
                 ),
                 (
                     'hg', 'hg_diff_double_file_change_newline.diff',
                     [
                         (
                             'file_b',  # filename
                             'A',  # change
                             {  # stats
                                'added': 1,
                                'deleted': 0,
                                'binary': False,
                                'ops': {NEW_FILENODE: 'new file 100644', }
                             },
                             '@@ -0,0 +1 @@\n+test_content b\n'  # diff
                         ),
                         (
                             'file_c',  # filename
                             'A',  # change
                             {  # stats
                                'added': 1,
                                'deleted': 0,
                                'binary': False,
                                'ops': {NEW_FILENODE: 'new file 100644', }
                             },
                             '@@ -0,0 +1 @@\n+test_content c\n'  # diff
                         ),
                     ],
                 ),
                 (
                     'hg', 'hg_diff_double_file_change_double_newline.diff',
                     [
                         (
                             'file_b',  # filename
                             'A',  # change
                             {  # stats
                                'added': 1,
                                'deleted': 0,
                                'binary': False,
                                'ops': {NEW_FILENODE: 'new file 100644', }
                             },
                             '@@ -0,0 +1 @@\n+test_content b\n\n'  # diff
                         ),
                         (
                             'file_c',  # filename
                             'A',  # change
                             {  # stats
                                'added': 1,
                                'deleted': 0,
                                'binary': False,
                                'ops': {NEW_FILENODE: 'new file 100644', }
                             },
                             '@@ -0,0 +1 @@\n+test_content c\n'  # diff
                         ),
                     ],
                 ),
                 (
                     'hg', 'hg_diff_four_file_change_newline.diff',
                     [
                         (
                             'file',  # filename
                             'A',  # change
                             {  # stats
                                'added': 1,
                                'deleted': 0,
                                'binary': False,
                                'ops': {NEW_FILENODE: 'new file 100644', }
                             },
                             '@@ -0,0 +1,1 @@\n+file\n'  # diff
                         ),
                         (
                             'file2',  # filename
                             'A',  # change
                             {  # stats
                                'added': 1,
                                'deleted': 0,
                                'binary': False,
                                'ops': {NEW_FILENODE: 'new file 100644', }
                             },
                             '@@ -0,0 +1,1 @@\n+another line\n'  # diff
                         ),
                         (
                             'file3',  # filename
                             'A',  # change
                             {  # stats
                                'added': 1,
                                'deleted': 0,
                                'binary': False,
                                'ops': {NEW_FILENODE: 'new file 100644', }
                             },
                             '@@ -0,0 +1,1 @@\n+newline\n'  # diff
                         ),
                         (
                             'file4',  # filename
                             'A',  # change
                             {  # stats
                                'added': 1,
                                'deleted': 0,
                                'binary': False,
                                'ops': {NEW_FILENODE: 'new file 100644', }
                             },
                             '@@ -0,0 +1,1 @@\n+fil4\n\\ No newline at end of file'  # diff
                         ),
                     ],
                 ),
             ]
             diff_class = {
                 'git': GitDiff,
                 'hg': MercurialDiff,
                 'svn': SubversionDiff,
             }
             @pytest.fixture(params=DIFF_FIXTURES)
             def diff_fixture(request):
                 vcs, diff_fixture, expected = request.param
                 diff_txt = fixture.load_resource(diff_fixture)
                 diff = diff_class[vcs](diff_txt)
                 return diff, expected
             def test_diff_lib(diff_fixture):
                 diff, expected_data = diff_fixture
                 diff_proc = DiffProcessor(diff)
                 diff_proc_d = diff_proc.prepare()
                 data = [(x['filename'], x['operation'], x['stats']) for x in diff_proc_d]
                 assert expected_data == data
             @pytest.fixture(params=DIFF_FIXTURES_WITH_CONTENT)
             def diff_fixture_w_content(request):
                 vcs, diff_fixture, expected = request.param
                 diff_txt = fixture.load_resource(diff_fixture)
                 diff = diff_class[vcs](diff_txt)
                 return diff, expected
             def test_diff_lib_newlines(diff_fixture_w_content):
                 diff, expected_data = diff_fixture_w_content
                 diff_proc = DiffProcessor(diff)
                 diff_proc_d = diff_proc.prepare()
                 data = [(x['filename'], x['operation'], x['stats'], x['raw_diff'])
                         for x in diff_proc_d]
                 assert expected_data == data

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages