u/ewong/rhodecode-enterprise-ce-fork Commit - r2546:db577a02

diffs: in case of text lexers don't do any HL because of pygments newline...

marcink -

r2546:db577a02 stable

parent child

rhodecode/lib/codeblocks.py

0 +13 -5

              # -*- coding: utf-8 -*-
              # Copyright (C) 2011-2018 RhodeCode GmbH
              #
              # This program is free software: you can redistribute it and/or modify
              # it under the terms of the GNU Affero General Public License, version 3
              # (only), as published by the Free Software Foundation.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU Affero General Public License
              # along with this program.  If not, see <http://www.gnu.org/licenses/>.
              #
              # This program is dual-licensed. If you wish to learn more about the
              # RhodeCode Enterprise Edition, including its added features, Support services,
              # and proprietary license terms, please see https://rhodecode.com/licenses/
              import logging
              import difflib
              from itertools import groupby
              from pygments import lex
              from pygments.formatters.html import _get_ttype_class as pygment_token_class
+             from pygments.lexers.special import TextLexer, Token
              from rhodecode.lib.helpers import (
                  get_lexer_for_filenode, html_escape, get_custom_lexer)
              from rhodecode.lib.utils2 import AttributeDict
              from rhodecode.lib.vcs.nodes import FileNode
              from rhodecode.lib.diff_match_patch import diff_match_patch
              from rhodecode.lib.diffs import LimitedDiffContainer
              from pygments.lexers import get_lexer_by_name
              plain_text_lexer = get_lexer_by_name(
                  'text', stripall=False, stripnl=False, ensurenl=False)
              log = logging.getLogger()
              def filenode_as_lines_tokens(filenode, lexer=None):
                  org_lexer = lexer
                  lexer = lexer or get_lexer_for_filenode(filenode)
                  log.debug('Generating file node pygment tokens for %s, %s, org_lexer:%s',
                            lexer, filenode, org_lexer)
                  tokens = tokenize_string(filenode.content, lexer)
-                 lines = split_token_stream(tokens, split_string='\n')
+                 lines = split_token_stream(tokens)
                  rv = list(lines)
                  return rv
              def tokenize_string(content, lexer):
                  """
                  Use pygments to tokenize some content based on a lexer
                  ensuring all original new lines and whitespace is preserved
                  """
                  lexer.stripall = False
                  lexer.stripnl = False
                  lexer.ensurenl = False
-                 for token_type, token_text in lex(content, lexer):
+                 if isinstance(lexer, TextLexer):
+                     lexed = [(Token.Text, content)]
+                 else:
+                     lexed = lex(content, lexer)
+                 for token_type, token_text in lexed:
                      yield pygment_token_class(token_type), token_text
-             def split_token_stream(tokens, split_string=u'\n'):
+             def split_token_stream(tokens):
                  """
                  Take a list of (TokenType, text) tuples and split them by a string
-                 >>> split_token_stream([(TEXT, 'some\ntext'), (TEXT, 'more\n')])
+                 split_token_stream([(TEXT, 'some\ntext'), (TEXT, 'more\n')])
                  [(TEXT, 'some'), (TEXT, 'text'),
                   (TEXT, 'more'), (TEXT, 'text')]
                  """
                  buffer = []
                  for token_class, token_text in tokens:
-                     parts = token_text.split(split_string)
+                     parts = token_text.split('\n')
                      for part in parts[:-1]:
                          buffer.append((token_class, part))
                          yield buffer
                          buffer = []
                      buffer.append((token_class, parts[-1]))
                  if buffer:
                      yield buffer
              def filenode_as_annotated_lines_tokens(filenode):
                  """
                  Take a file node and return a list of annotations => lines, if no annotation
                  is found, it will be None.
                  eg:
                  [
                      (annotation1, [
                          (1, line1_tokens_list),
                          (2, line2_tokens_list),
                      ]),
                      (annotation2, [
                          (3, line1_tokens_list),
                      ]),
                      (None, [
                          (4, line1_tokens_list),
                      ]),
                      (annotation1, [
                          (5, line1_tokens_list),
                          (6, line2_tokens_list),
                      ])
                  ]
                  """
                  commit_cache = {}  # cache commit_getter lookups
                  def _get_annotation(commit_id, commit_getter):
                      if commit_id not in commit_cache:
                          commit_cache[commit_id] = commit_getter()
                      return commit_cache[commit_id]
                  annotation_lookup = {
                      line_no: _get_annotation(commit_id, commit_getter)
                      for line_no, commit_id, commit_getter, line_content
                      in filenode.annotate
                  }
                  annotations_lines = ((annotation_lookup.get(line_no), line_no, tokens)
                                        for line_no, tokens
                                        in enumerate(filenode_as_lines_tokens(filenode), 1))
                  grouped_annotations_lines = groupby(annotations_lines, lambda x: x[0])
                  for annotation, group in grouped_annotations_lines:
                      yield (
                          annotation, [(line_no, tokens)
                                        for (_, line_no, tokens) in group]
                      )
              def render_tokenstream(tokenstream):
                  result = []
                  for token_class, token_ops_texts in rollup_tokenstream(tokenstream):
                      if token_class:
                          result.append(u'<span class="%s">' % token_class)
                      else:
                          result.append(u'<span>')
                      for op_tag, token_text in token_ops_texts:
                          if op_tag:
                              result.append(u'<%s>' % op_tag)
                          escaped_text = html_escape(token_text)
                          # TODO: dan: investigate showing hidden characters like space/nl/tab
                          # escaped_text = escaped_text.replace(' ', '<sp> </sp>')
                          # escaped_text = escaped_text.replace('\n', '<nl>\n</nl>')
                          # escaped_text = escaped_text.replace('\t', '<tab>\t</tab>')
                          result.append(escaped_text)
                          if op_tag:
                              result.append(u'</%s>' % op_tag)
                      result.append(u'</span>')
                  html = ''.join(result)
                  return html
              def rollup_tokenstream(tokenstream):
                  """
                  Group a token stream of the format:
                      ('class', 'op', 'text')
                  or
                      ('class', 'text')
                  into
                      [('class1',
                          [('op1', 'text'),
                           ('op2', 'text')]),
                       ('class2',
                          [('op3', 'text')])]
                  This is used to get the minimal tags necessary when
                  rendering to html eg for a token stream ie.
                  <span class="A"><ins>he</ins>llo</span>
                  vs
                  <span class="A"><ins>he</ins></span><span class="A">llo</span>
                  If a 2 tuple is passed in, the output op will be an empty string.
                  eg:
                  >>> rollup_tokenstream([('classA', '',      'h'),
                                          ('classA', 'del',   'ell'),
                                          ('classA', '',      'o'),
                                          ('classB', '',      ' '),
                                          ('classA', '',      'the'),
                                          ('classA', '',      're'),
                                          ])
                  [('classA', [('', 'h'), ('del', 'ell'), ('', 'o')],
                   ('classB', [('', ' ')],
                   ('classA', [('', 'there')]]
                  """
                  if tokenstream and len(tokenstream[0]) == 2:
                      tokenstream = ((t[0], '', t[1]) for t in tokenstream)
                  result = []
                  for token_class, op_list in groupby(tokenstream, lambda t: t[0]):
                      ops = []
                      for token_op, token_text_list in groupby(op_list, lambda o: o[1]):
                          text_buffer = []
                          for t_class, t_op, t_text in token_text_list:
                              text_buffer.append(t_text)
                          ops.append((token_op, ''.join(text_buffer)))
                      result.append((token_class, ops))
                  return result
              def tokens_diff(old_tokens, new_tokens, use_diff_match_patch=True):
                  """
                  Converts a list of (token_class, token_text) tuples to a list of
                  (token_class, token_op, token_text) tuples where token_op is one of
                  ('ins', 'del', '')
                  :param old_tokens: list of (token_class, token_text) tuples of old line
                  :param new_tokens: list of (token_class, token_text) tuples of new line
                  :param use_diff_match_patch: boolean, will use google's diff match patch
                      library which has options to 'smooth' out the character by character
                      differences making nicer ins/del blocks
                  """
                  old_tokens_result = []
                  new_tokens_result = []
                  similarity = difflib.SequenceMatcher(None,
                      ''.join(token_text for token_class, token_text in old_tokens),
                      ''.join(token_text for token_class, token_text in new_tokens)
                  ).ratio()
                  if similarity < 0.6: # return, the blocks are too different
                      for token_class, token_text in old_tokens:
                          old_tokens_result.append((token_class, '', token_text))
                      for token_class, token_text in new_tokens:
                          new_tokens_result.append((token_class, '', token_text))
                      return old_tokens_result, new_tokens_result, similarity
                  token_sequence_matcher = difflib.SequenceMatcher(None,
                      [x[1] for x in old_tokens],
                      [x[1] for x in new_tokens])
                  for tag, o1, o2, n1, n2 in token_sequence_matcher.get_opcodes():
                      # check the differences by token block types first to give a more
                      # nicer "block" level replacement vs character diffs
                      if tag == 'equal':
                          for token_class, token_text in old_tokens[o1:o2]:
                              old_tokens_result.append((token_class, '', token_text))
                          for token_class, token_text in new_tokens[n1:n2]:
                              new_tokens_result.append((token_class, '', token_text))
                      elif tag == 'delete':
                          for token_class, token_text in old_tokens[o1:o2]:
                              old_tokens_result.append((token_class, 'del', token_text))
                      elif tag == 'insert':
                          for token_class, token_text in new_tokens[n1:n2]:
                              new_tokens_result.append((token_class, 'ins', token_text))
                      elif tag == 'replace':
                          # if same type token blocks must be replaced, do a diff on the
                          # characters in the token blocks to show individual changes
                          old_char_tokens = []
                          new_char_tokens = []
                          for token_class, token_text in old_tokens[o1:o2]:
                              for char in token_text:
                                  old_char_tokens.append((token_class, char))
                          for token_class, token_text in new_tokens[n1:n2]:
                              for char in token_text:
                                  new_char_tokens.append((token_class, char))
                          old_string = ''.join([token_text for
                              token_class, token_text in old_char_tokens])
                          new_string = ''.join([token_text for
                              token_class, token_text in new_char_tokens])
                          char_sequence = difflib.SequenceMatcher(
                              None, old_string, new_string)
                          copcodes = char_sequence.get_opcodes()
                          obuffer, nbuffer = [], []
                          if use_diff_match_patch:
                              dmp = diff_match_patch()
                              dmp.Diff_EditCost = 11 # TODO: dan: extract this to a setting
                              reps = dmp.diff_main(old_string, new_string)
                              dmp.diff_cleanupEfficiency(reps)
                              a, b = 0, 0
                              for op, rep in reps:
                                  l = len(rep)
                                  if op == 0:
                                      for i, c in enumerate(rep):
                                          obuffer.append((old_char_tokens[a+i][0], '', c))
                                          nbuffer.append((new_char_tokens[b+i][0], '', c))
                                      a += l
                                      b += l
                                  elif op == -1:
                                      for i, c in enumerate(rep):
                                          obuffer.append((old_char_tokens[a+i][0], 'del', c))
                                      a += l
                                  elif op == 1:
                                      for i, c in enumerate(rep):
                                          nbuffer.append((new_char_tokens[b+i][0], 'ins', c))
                                      b += l
                          else:
                              for ctag, co1, co2, cn1, cn2 in copcodes:
                                  if ctag == 'equal':
                                      for token_class, token_text in old_char_tokens[co1:co2]:
                                          obuffer.append((token_class, '', token_text))
                                      for token_class, token_text in new_char_tokens[cn1:cn2]:
                                          nbuffer.append((token_class, '', token_text))
                                  elif ctag == 'delete':
                                      for token_class, token_text in old_char_tokens[co1:co2]:
                                          obuffer.append((token_class, 'del', token_text))
                                  elif ctag == 'insert':
                                      for token_class, token_text in new_char_tokens[cn1:cn2]:
                                          nbuffer.append((token_class, 'ins', token_text))
                                  elif ctag == 'replace':
                                      for token_class, token_text in old_char_tokens[co1:co2]:
                                          obuffer.append((token_class, 'del', token_text))
                                      for token_class, token_text in new_char_tokens[cn1:cn2]:
                                          nbuffer.append((token_class, 'ins', token_text))
                          old_tokens_result.extend(obuffer)
                          new_tokens_result.extend(nbuffer)
                  return old_tokens_result, new_tokens_result, similarity
              class DiffSet(object):
                  """
                  An object for parsing the diff result from diffs.DiffProcessor and
                  adding highlighting, side by side/unified renderings and line diffs
                  """
                  HL_REAL = 'REAL' # highlights using original file, slow
                  HL_FAST = 'FAST' # highlights using just the line, fast but not correct
                                   # in the case of multiline code
                  HL_NONE = 'NONE' # no highlighting, fastest
                  def __init__(self, highlight_mode=HL_REAL, repo_name=None,
                               source_repo_name=None,
                               source_node_getter=lambda filename: None,
                               target_node_getter=lambda filename: None,
                               source_nodes=None, target_nodes=None,
                               max_file_size_limit=150 * 1024, # files over this size will
                                                               # use fast highlighting
                               comments=None,
                               ):
                      self.highlight_mode = highlight_mode
                      self.highlighted_filenodes = {}
                      self.source_node_getter = source_node_getter
                      self.target_node_getter = target_node_getter
                      self.source_nodes = source_nodes or {}
                      self.target_nodes = target_nodes or {}
                      self.repo_name = repo_name
                      self.source_repo_name = source_repo_name or repo_name
                      self.comments = comments or {}
                      self.comments_store = self.comments.copy()
                      self.max_file_size_limit = max_file_size_limit
                  def render_patchset(self, patchset, source_ref=None, target_ref=None):
                      diffset = AttributeDict(dict(
                          lines_added=0,
                          lines_deleted=0,
                          changed_files=0,
                          files=[],
                          file_stats={},
                          limited_diff=isinstance(patchset, LimitedDiffContainer),
                          repo_name=self.repo_name,
                          source_repo_name=self.source_repo_name,
                          source_ref=source_ref,
                          target_ref=target_ref,
                      ))
                      for patch in patchset:
                          diffset.file_stats[patch['filename']] = patch['stats']
                          filediff = self.render_patch(patch)
                          filediff.diffset = diffset
                          diffset.files.append(filediff)
                          diffset.changed_files += 1
                          if not patch['stats']['binary']:
                              diffset.lines_added += patch['stats']['added']
                              diffset.lines_deleted += patch['stats']['deleted']
                      return diffset
                  _lexer_cache = {}
                  def _get_lexer_for_filename(self, filename, filenode=None):
                      # cached because we might need to call it twice for source/target
                      if filename not in self._lexer_cache:
                          if filenode:
                              lexer = filenode.lexer
                              extension = filenode.extension
                          else:
                              lexer = FileNode.get_lexer(filename=filename)
                              extension = filename.split('.')[-1]
                          lexer = get_custom_lexer(extension) or lexer
                          self._lexer_cache[filename] = lexer
                      return self._lexer_cache[filename]
                  def render_patch(self, patch):
                      log.debug('rendering diff for %r' % patch['filename'])
                      source_filename = patch['original_filename']
                      target_filename = patch['filename']
                      source_lexer = plain_text_lexer
                      target_lexer = plain_text_lexer
                      if not patch['stats']['binary']:
                          if self.highlight_mode == self.HL_REAL:
                              if (source_filename and patch['operation'] in ('D', 'M')
                                  and source_filename not in self.source_nodes):
                                      self.source_nodes[source_filename] = (
                                          self.source_node_getter(source_filename))
                              if (target_filename and patch['operation'] in ('A', 'M')
                                  and target_filename not in self.target_nodes):
                                      self.target_nodes[target_filename] = (
                                          self.target_node_getter(target_filename))
                          elif self.highlight_mode == self.HL_FAST:
                              source_lexer = self._get_lexer_for_filename(source_filename)
                              target_lexer = self._get_lexer_for_filename(target_filename)
                      source_file = self.source_nodes.get(source_filename, source_filename)
                      target_file = self.target_nodes.get(target_filename, target_filename)
                      source_filenode, target_filenode = None, None
                      # TODO: dan: FileNode.lexer works on the content of the file - which
                      # can be slow - issue #4289 explains a lexer clean up - which once
                      # done can allow caching a lexer for a filenode to avoid the file lookup
                      if isinstance(source_file, FileNode):
                          source_filenode = source_file
                          #source_lexer = source_file.lexer
                          source_lexer = self._get_lexer_for_filename(source_filename)
                          source_file.lexer = source_lexer
                      if isinstance(target_file, FileNode):
                          target_filenode = target_file
                          #target_lexer = target_file.lexer
                          target_lexer = self._get_lexer_for_filename(target_filename)
                          target_file.lexer = target_lexer
                      source_file_path, target_file_path = None, None
                      if source_filename != '/dev/null':
                          source_file_path = source_filename
                      if target_filename != '/dev/null':
                          target_file_path = target_filename
                      source_file_type = source_lexer.name
                      target_file_type = target_lexer.name
                      filediff = AttributeDict({
                          'source_file_path': source_file_path,
                          'target_file_path': target_file_path,
                          'source_filenode': source_filenode,
                          'target_filenode': target_filenode,
                          'source_file_type': target_file_type,
                          'target_file_type': source_file_type,
                          'patch': {'filename': patch['filename'], 'stats': patch['stats']},
                          'operation': patch['operation'],
                          'source_mode': patch['stats']['old_mode'],
                          'target_mode': patch['stats']['new_mode'],
                          'limited_diff': isinstance(patch, LimitedDiffContainer),
                          'hunks': [],
                          'diffset': self,
                      })
                      for hunk in patch['chunks'][1:]:
                          hunkbit = self.parse_hunk(hunk, source_file, target_file)
                          hunkbit.source_file_path = source_file_path
                          hunkbit.target_file_path = target_file_path
                          filediff.hunks.append(hunkbit)
                      left_comments = {}
                      if source_file_path in self.comments_store:
                          for lineno, comments in self.comments_store[source_file_path].items():
                              left_comments[lineno] = comments
                      if target_file_path in self.comments_store:
                          for lineno, comments in self.comments_store[target_file_path].items():
                              left_comments[lineno] = comments
                      # left comments are one that we couldn't place in diff lines.
                      # could be outdated, or the diff changed and this line is no
                      # longer available
                      filediff.left_comments = left_comments
                      return filediff
                  def parse_hunk(self, hunk, source_file, target_file):
                      result = AttributeDict(dict(
                          source_start=hunk['source_start'],
                          source_length=hunk['source_length'],
                          target_start=hunk['target_start'],
                          target_length=hunk['target_length'],
                          section_header=hunk['section_header'],
                          lines=[],
                      ))
                      before, after = [], []
                      for line in hunk['lines']:
                          if line['action'] == 'unmod':
                              result.lines.extend(
                                  self.parse_lines(before, after, source_file, target_file))
                              after.append(line)
                              before.append(line)
                          elif line['action'] == 'add':
                              after.append(line)
                          elif line['action'] == 'del':
                              before.append(line)
                          elif line['action'] == 'old-no-nl':
                              before.append(line)
                          elif line['action'] == 'new-no-nl':
                              after.append(line)
                      result.lines.extend(
                          self.parse_lines(before, after, source_file, target_file))
                      result.unified = self.as_unified(result.lines)
                      result.sideside = result.lines
                      return result
                  def parse_lines(self, before_lines, after_lines, source_file, target_file):
                      # TODO: dan: investigate doing the diff comparison and fast highlighting
                      # on the entire before and after buffered block lines rather than by
                      # line, this means we can get better 'fast' highlighting if the context
                      # allows it - eg.
                      # line 4: """
                      # line 5: this gets highlighted as a string
                      # line 6: """
                      lines = []
                      before_newline = AttributeDict()
                      after_newline = AttributeDict()
                      if before_lines and before_lines[-1]['action'] == 'old-no-nl':
                          before_newline_line = before_lines.pop(-1)
                          before_newline.content = '\n {}'.format(
                              render_tokenstream(
                                  [(x[0], '', x[1])
                                   for x in [('nonl', before_newline_line['line'])]]))
                      if after_lines and after_lines[-1]['action'] == 'new-no-nl':
                          after_newline_line = after_lines.pop(-1)
                          after_newline.content = '\n {}'.format(
                              render_tokenstream(
                                  [(x[0], '', x[1])
                                   for x in [('nonl', after_newline_line['line'])]]))
                      while before_lines or after_lines:
                          before, after = None, None
                          before_tokens, after_tokens = None, None
                          if before_lines:
                              before = before_lines.pop(0)
                          if after_lines:
                              after = after_lines.pop(0)
                          original = AttributeDict()
                          modified = AttributeDict()
                          if before:
                              if before['action'] == 'old-no-nl':
                                  before_tokens = [('nonl', before['line'])]
                              else:
                                  before_tokens = self.get_line_tokens(
                                      line_text=before['line'],
                                      line_number=before['old_lineno'],
                                      file=source_file)
                              original.lineno = before['old_lineno']
                              original.content = before['line']
                              original.action = self.action_to_op(before['action'])
                              original.comments = self.get_comments_for('old',
                                  source_file, before['old_lineno'])
                          if after:
                              if after['action'] == 'new-no-nl':
                                  after_tokens = [('nonl', after['line'])]
                              else:
                                  after_tokens = self.get_line_tokens(
                                      line_text=after['line'], line_number=after['new_lineno'],
                                      file=target_file)
                              modified.lineno = after['new_lineno']
                              modified.content = after['line']
                              modified.action = self.action_to_op(after['action'])
                              modified.comments = self.get_comments_for('new',
                                  target_file, after['new_lineno'])
                          # diff the lines
                          if before_tokens and after_tokens:
                              o_tokens, m_tokens, similarity = tokens_diff(
                                  before_tokens, after_tokens)
                              original.content = render_tokenstream(o_tokens)
                              modified.content = render_tokenstream(m_tokens)
                          elif before_tokens:
                              original.content = render_tokenstream(
                                  [(x[0], '', x[1]) for x in before_tokens])
                          elif after_tokens:
                              modified.content = render_tokenstream(
                                  [(x[0], '', x[1]) for x in after_tokens])
                          if not before_lines and before_newline:
                              original.content += before_newline.content
                              before_newline = None
                          if not after_lines and after_newline:
                              modified.content += after_newline.content
                              after_newline = None
                          lines.append(AttributeDict({
                              'original': original,
                              'modified': modified,
                          }))
                      return lines
                  def get_comments_for(self, version, filename, line_number):
                      if hasattr(filename, 'unicode_path'):
                          filename = filename.unicode_path
                      if not isinstance(filename, basestring):
                          return None
                      line_key = {
                          'old': 'o',
                          'new': 'n',
                      }[version] + str(line_number)
                      if filename in self.comments_store:
                          file_comments = self.comments_store[filename]
                          if line_key in file_comments:
                              return file_comments.pop(line_key)
                  def get_line_tokens(self, line_text, line_number, file=None):
                      filenode = None
                      filename = None
                      if isinstance(file, basestring):
                          filename = file
                      elif isinstance(file, FileNode):
                          filenode = file
                          filename = file.unicode_path
                      if self.highlight_mode == self.HL_REAL and filenode:
                          lexer = self._get_lexer_for_filename(filename)
                          file_size_allowed = file.size < self.max_file_size_limit
                          if line_number and file_size_allowed:
                              return self.get_tokenized_filenode_line(
                                  file, line_number, lexer)
                      if self.highlight_mode in (self.HL_REAL, self.HL_FAST) and filename:
                          lexer = self._get_lexer_for_filename(filename)
                          return list(tokenize_string(line_text, lexer))
                      return list(tokenize_string(line_text, plain_text_lexer))
                  def get_tokenized_filenode_line(self, filenode, line_number, lexer=None):
                      if filenode not in self.highlighted_filenodes:
                          tokenized_lines = filenode_as_lines_tokens(filenode, lexer)
                          self.highlighted_filenodes[filenode] = tokenized_lines
                      return self.highlighted_filenodes[filenode][line_number - 1]
                  def action_to_op(self, action):
                      return {
                          'add': '+',
                          'del': '-',
                          'unmod': ' ',
                          'old-no-nl': ' ',
                          'new-no-nl': ' ',
                      }.get(action, action)
                  def as_unified(self, lines):
                      """
                      Return a generator that yields the lines of a diff in unified order
                      """
                      def generator():
                          buf = []
                          for line in lines:
                              if buf and not line.original or line.original.action == ' ':
                                  for b in buf:
                                      yield b
                                  buf = []
                              if line.original:
                                  if line.original.action == ' ':
                                      yield (line.original.lineno, line.modified.lineno,
                                             line.original.action, line.original.content,
                                             line.original.comments)
                                      continue
                                  if line.original.action == '-':
                                      yield (line.original.lineno, None,
                                             line.original.action, line.original.content,
                                             line.original.comments)
                                  if line.modified.action == '+':
                                      buf.append((
                                          None, line.modified.lineno,
                                          line.modified.action, line.modified.content,
                                          line.modified.comments))
                                      continue
                              if line.modified:
                                  yield (None, line.modified.lineno,
                                         line.modified.action, line.modified.content,
                                         line.modified.comments)
                          for b in buf:
                              yield b
                      return generator()

rhodecode/lib/diffs.py

0 +35 -11

              # -*- coding: utf-8 -*-
              # Copyright (C) 2011-2018 RhodeCode GmbH
              #
              # This program is free software: you can redistribute it and/or modify
              # it under the terms of the GNU Affero General Public License, version 3
              # (only), as published by the Free Software Foundation.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU Affero General Public License
              # along with this program.  If not, see <http://www.gnu.org/licenses/>.
              #
              # This program is dual-licensed. If you wish to learn more about the
              # RhodeCode Enterprise Edition, including its added features, Support services,
              # and proprietary license terms, please see https://rhodecode.com/licenses/
              """
              Set of diffing helpers, previously part of vcs
              """
              import re
              import collections
              import difflib
              import logging
              from itertools import tee, imap
              from rhodecode.lib.vcs.exceptions import VCSError
              from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode
              from rhodecode.lib.utils2 import safe_unicode
              log = logging.getLogger(__name__)
              # define max context, a file with more than this numbers of lines is unusable
              # in browser anyway
              MAX_CONTEXT = 1024 * 1014
              class OPS(object):
                  ADD = 'A'
                  MOD = 'M'
                  DEL = 'D'
              def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
                  """
                  Returns git style diff between given ``filenode_old`` and ``filenode_new``.
                  :param ignore_whitespace: ignore whitespaces in diff
                  """
                  # make sure we pass in default context
                  context = context or 3
                  # protect against IntOverflow when passing HUGE context
                  if context > MAX_CONTEXT:
                      context = MAX_CONTEXT
                  submodules = filter(lambda o: isinstance(o, SubModuleNode),
                                      [filenode_new, filenode_old])
                  if submodules:
                      return ''
                  for filenode in (filenode_old, filenode_new):
                      if not isinstance(filenode, FileNode):
                          raise VCSError(
                              "Given object should be FileNode object, not %s"
                              % filenode.__class__)
                  repo = filenode_new.commit.repository
                  old_commit = filenode_old.commit or repo.EMPTY_COMMIT
                  new_commit = filenode_new.commit
                  vcs_gitdiff = repo.get_diff(
                      old_commit, new_commit, filenode_new.path,
                      ignore_whitespace, context, path1=filenode_old.path)
                  return vcs_gitdiff
              NEW_FILENODE = 1
              DEL_FILENODE = 2
              MOD_FILENODE = 3
              RENAMED_FILENODE = 4
              COPIED_FILENODE = 5
              CHMOD_FILENODE = 6
              BIN_FILENODE = 7
              class LimitedDiffContainer(object):
                  def __init__(self, diff_limit, cur_diff_size, diff):
                      self.diff = diff
                      self.diff_limit = diff_limit
                      self.cur_diff_size = cur_diff_size
                  def __getitem__(self, key):
                      return self.diff.__getitem__(key)
                  def __iter__(self):
                      for l in self.diff:
                          yield l
              class Action(object):
                  """
                  Contains constants for the action value of the lines in a parsed diff.
                  """
                  ADD = 'add'
                  DELETE = 'del'
                  UNMODIFIED = 'unmod'
                  CONTEXT = 'context'
                  OLD_NO_NL = 'old-no-nl'
                  NEW_NO_NL = 'new-no-nl'
              class DiffProcessor(object):
                  """
                  Give it a unified or git diff and it returns a list of the files that were
                  mentioned in the diff together with a dict of meta information that
                  can be used to render it in a HTML template.
                  .. note:: Unicode handling
                     The original diffs are a byte sequence and can contain filenames
                     in mixed encodings. This class generally returns `unicode` objects
                     since the result is intended for presentation to the user.
                  """
                  _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
                  _newline_marker = re.compile(r'^\\ No newline at end of file')
                  # used for inline highlighter word split
                  _token_re = re.compile(r'()(&gt;|&lt;|&amp;|\W+?)')
                  # collapse ranges of commits over given number
                  _collapse_commits_over = 5
                  def __init__(self, diff, format='gitdiff', diff_limit=None,
                               file_limit=None, show_full_diff=True):
                      """
                      :param diff: A `Diff` object representing a diff from a vcs backend
                      :param format: format of diff passed, `udiff` or `gitdiff`
                      :param diff_limit: define the size of diff that is considered "big"
                          based on that parameter cut off will be triggered, set to None
                          to show full diff
                      """
                      self._diff = diff
                      self._format = format
                      self.adds = 0
                      self.removes = 0
                      # calculate diff size
                      self.diff_limit = diff_limit
                      self.file_limit = file_limit
                      self.show_full_diff = show_full_diff
                      self.cur_diff_size = 0
                      self.parsed = False
                      self.parsed_diff = []
                      log.debug('Initialized DiffProcessor with %s mode', format)
                      if format == 'gitdiff':
                          self.differ = self._highlight_line_difflib
                          self._parser = self._parse_gitdiff
                      else:
                          self.differ = self._highlight_line_udiff
                          self._parser = self._new_parse_gitdiff
                  def _copy_iterator(self):
                      """
                      make a fresh copy of generator, we should not iterate thru
                      an original as it's needed for repeating operations on
                      this instance of DiffProcessor
                      """
                      self.__udiff, iterator_copy = tee(self.__udiff)
                      return iterator_copy
                  def _escaper(self, string):
                      """
                      Escaper for diff escapes special chars and checks the diff limit
                      :param string:
                      """
                      self.cur_diff_size += len(string)
                      if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
                          raise DiffLimitExceeded('Diff Limit Exceeded')
-                     return safe_unicode(string)\
+                     return string \
                          .replace('&', '&amp;')\
                          .replace('<', '&lt;')\
                          .replace('>', '&gt;')
                  def _line_counter(self, l):
                      """
                      Checks each line and bumps total adds/removes for this diff
                      :param l:
                      """
                      if l.startswith('+') and not l.startswith('+++'):
                          self.adds += 1
                      elif l.startswith('-') and not l.startswith('---'):
                          self.removes += 1
                      return safe_unicode(l)
                  def _highlight_line_difflib(self, line, next_):
                      """
                      Highlight inline changes in both lines.
                      """
                      if line['action'] == Action.DELETE:
                          old, new = line, next_
                      else:
                          old, new = next_, line
                      oldwords = self._token_re.split(old['line'])
                      newwords = self._token_re.split(new['line'])
                      sequence = difflib.SequenceMatcher(None, oldwords, newwords)
                      oldfragments, newfragments = [], []
                      for tag, i1, i2, j1, j2 in sequence.get_opcodes():
                          oldfrag = ''.join(oldwords[i1:i2])
                          newfrag = ''.join(newwords[j1:j2])
                          if tag != 'equal':
                              if oldfrag:
                                  oldfrag = '<del>%s</del>' % oldfrag
                              if newfrag:
                                  newfrag = '<ins>%s</ins>' % newfrag
                          oldfragments.append(oldfrag)
                          newfragments.append(newfrag)
                      old['line'] = "".join(oldfragments)
                      new['line'] = "".join(newfragments)
                  def _highlight_line_udiff(self, line, next_):
                      """
                      Highlight inline changes in both lines.
                      """
                      start = 0
                      limit = min(len(line['line']), len(next_['line']))
                      while start < limit and line['line'][start] == next_['line'][start]:
                          start += 1
                      end = -1
                      limit -= start
                      while -end <= limit and line['line'][end] == next_['line'][end]:
                          end -= 1
                      end += 1
                      if start or end:
                          def do(l):
                              last = end + len(l['line'])
                              if l['action'] == Action.ADD:
                                  tag = 'ins'
                              else:
                                  tag = 'del'
                              l['line'] = '%s<%s>%s</%s>%s' % (
                                  l['line'][:start],
                                  tag,
                                  l['line'][start:last],
                                  tag,
                                  l['line'][last:]
                              )
                          do(line)
                          do(next_)
                  def _clean_line(self, line, command):
                      if command in ['+', '-', ' ']:
                          # only modify the line if it's actually a diff thing
                          line = line[1:]
                      return line
                  def _parse_gitdiff(self, inline_diff=True):
                      _files = []
                      diff_container = lambda arg: arg
                      for chunk in self._diff.chunks():
                          head = chunk.header
-                         diff = imap(self._escaper, chunk.diff.splitlines(1))
+                         diff = imap(self._escaper, self.diff_splitter(chunk.diff))
                          raw_diff = chunk.raw
                          limited_diff = False
                          exceeds_limit = False
                          op = None
                          stats = {
                              'added': 0,
                              'deleted': 0,
                              'binary': False,
                              'ops': {},
                          }
                          if head['deleted_file_mode']:
                              op = OPS.DEL
                              stats['binary'] = True
                              stats['ops'][DEL_FILENODE] = 'deleted file'
                          elif head['new_file_mode']:
                              op = OPS.ADD
                              stats['binary'] = True
                              stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
                          else:  # modify operation, can be copy, rename or chmod
                              # CHMOD
                              if head['new_mode'] and head['old_mode']:
                                  op = OPS.MOD
                                  stats['binary'] = True
                                  stats['ops'][CHMOD_FILENODE] = (
                                      'modified file chmod %s => %s' % (
                                          head['old_mode'], head['new_mode']))
                              # RENAME
                              if head['rename_from'] != head['rename_to']:
                                  op = OPS.MOD
                                  stats['binary'] = True
                                  stats['ops'][RENAMED_FILENODE] = (
                                      'file renamed from %s to %s' % (
                                          head['rename_from'], head['rename_to']))
                              # COPY
                              if head.get('copy_from') and head.get('copy_to'):
                                  op = OPS.MOD
                                  stats['binary'] = True
                                  stats['ops'][COPIED_FILENODE] = (
                                      'file copied from %s to %s' % (
                                          head['copy_from'], head['copy_to']))
                              # If our new parsed headers didn't match anything fallback to
                              # old style detection
                              if op is None:
                                  if not head['a_file'] and head['b_file']:
                                      op = OPS.ADD
                                      stats['binary'] = True
                                      stats['ops'][NEW_FILENODE] = 'new file'
                                  elif head['a_file'] and not head['b_file']:
                                      op = OPS.DEL
                                      stats['binary'] = True
                                      stats['ops'][DEL_FILENODE] = 'deleted file'
                              # it's not ADD not DELETE
                              if op is None:
                                  op = OPS.MOD
                                  stats['binary'] = True
                                  stats['ops'][MOD_FILENODE] = 'modified file'
                          # a real non-binary diff
                          if head['a_file'] or head['b_file']:
                              try:
                                  raw_diff, chunks, _stats = self._parse_lines(diff)
                                  stats['binary'] = False
                                  stats['added'] = _stats[0]
                                  stats['deleted'] = _stats[1]
                                  # explicit mark that it's a modified file
                                  if op == OPS.MOD:
                                      stats['ops'][MOD_FILENODE] = 'modified file'
                                  exceeds_limit = len(raw_diff) > self.file_limit
                                  # changed from _escaper function so we validate size of
                                  # each file instead of the whole diff
                                  # diff will hide big files but still show small ones
                                  # from my tests, big files are fairly safe to be parsed
                                  # but the browser is the bottleneck
                                  if not self.show_full_diff and exceeds_limit:
                                      raise DiffLimitExceeded('File Limit Exceeded')
                              except DiffLimitExceeded:
                                  diff_container = lambda _diff: \
                                      LimitedDiffContainer(
                                          self.diff_limit, self.cur_diff_size, _diff)
                                  exceeds_limit = len(raw_diff) > self.file_limit
                                  limited_diff = True
                                  chunks = []
                          else:  # GIT format binary patch, or possibly empty diff
                              if head['bin_patch']:
                                  # we have operation already extracted, but we mark simply
                                  # it's a diff we wont show for binary files
                                  stats['ops'][BIN_FILENODE] = 'binary diff hidden'
                              chunks = []
                          if chunks and not self.show_full_diff and op == OPS.DEL:
                              # if not full diff mode show deleted file contents
                              # TODO: anderson: if the view is not too big, there is no way
                              # to see the content of the file
                              chunks = []
                          chunks.insert(0, [{
                                                'old_lineno': '',
                                                'new_lineno': '',
                                                'action': Action.CONTEXT,
                                                'line': msg,
                                            } for _op, msg in stats['ops'].iteritems()
                                            if _op not in [MOD_FILENODE]])
                          _files.append({
                              'filename': safe_unicode(head['b_path']),
                              'old_revision': head['a_blob_id'],
                              'new_revision': head['b_blob_id'],
                              'chunks': chunks,
                              'raw_diff': safe_unicode(raw_diff),
                              'operation': op,
                              'stats': stats,
                              'exceeds_limit': exceeds_limit,
                              'is_limited_diff': limited_diff,
                          })
                      sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
                                             OPS.DEL: 2}.get(info['operation'])
                      if not inline_diff:
                          return diff_container(sorted(_files, key=sorter))
                      # highlight inline changes
                      for diff_data in _files:
                          for chunk in diff_data['chunks']:
                              lineiter = iter(chunk)
                              try:
                                  while 1:
                                      line = lineiter.next()
                                      if line['action'] not in (
                                              Action.UNMODIFIED, Action.CONTEXT):
                                          nextline = lineiter.next()
                                          if nextline['action'] in ['unmod', 'context'] or \
                                             nextline['action'] == line['action']:
                                              continue
                                          self.differ(line, nextline)
                              except StopIteration:
                                  pass
                      return diff_container(sorted(_files, key=sorter))
                  def _check_large_diff(self):
                      log.debug('Diff exceeds current diff_limit of %s', self.diff_limit)
                      if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
                          raise DiffLimitExceeded('Diff Limit `%s` Exceeded', self.diff_limit)
                  # FIXME: NEWDIFFS: dan: this replaces _parse_gitdiff
                  def _new_parse_gitdiff(self, inline_diff=True):
                      _files = []
                      # this can be overriden later to a LimitedDiffContainer type
                      diff_container = lambda arg: arg
                      for chunk in self._diff.chunks():
                          head = chunk.header
                          log.debug('parsing diff %r' % head)
                          raw_diff = chunk.raw
                          limited_diff = False
                          exceeds_limit = False
                          op = None
                          stats = {
                              'added': 0,
                              'deleted': 0,
                              'binary': False,
                              'old_mode': None,
                              'new_mode': None,
                              'ops': {},
                          }
                          if head['old_mode']:
                              stats['old_mode'] = head['old_mode']
                          if head['new_mode']:
                              stats['new_mode'] = head['new_mode']
                          if head['b_mode']:
                              stats['new_mode'] = head['b_mode']
                          # delete file
                          if head['deleted_file_mode']:
                              op = OPS.DEL
                              stats['binary'] = True
                              stats['ops'][DEL_FILENODE] = 'deleted file'
                          # new file
                          elif head['new_file_mode']:
                              op = OPS.ADD
                              stats['binary'] = True
                              stats['old_mode'] = None
                              stats['new_mode'] = head['new_file_mode']
                              stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
                          # modify operation, can be copy, rename or chmod
                          else:
                              # CHMOD
                              if head['new_mode'] and head['old_mode']:
                                  op = OPS.MOD
                                  stats['binary'] = True
                                  stats['ops'][CHMOD_FILENODE] = (
                                      'modified file chmod %s => %s' % (
                                          head['old_mode'], head['new_mode']))
                              # RENAME
                              if head['rename_from'] != head['rename_to']:
                                  op = OPS.MOD
                                  stats['binary'] = True
                                  stats['renamed'] = (head['rename_from'], head['rename_to'])
                                  stats['ops'][RENAMED_FILENODE] = (
                                      'file renamed from %s to %s' % (
                                          head['rename_from'], head['rename_to']))
                              # COPY
                              if head.get('copy_from') and head.get('copy_to'):
                                  op = OPS.MOD
                                  stats['binary'] = True
                                  stats['copied'] = (head['copy_from'], head['copy_to'])
                                  stats['ops'][COPIED_FILENODE] = (
                                      'file copied from %s to %s' % (
                                          head['copy_from'], head['copy_to']))
                              # If our new parsed headers didn't match anything fallback to
                              # old style detection
                              if op is None:
                                  if not head['a_file'] and head['b_file']:
                                      op = OPS.ADD
                                      stats['binary'] = True
                                      stats['new_file'] = True
                                      stats['ops'][NEW_FILENODE] = 'new file'
                                  elif head['a_file'] and not head['b_file']:
                                      op = OPS.DEL
                                      stats['binary'] = True
                                      stats['ops'][DEL_FILENODE] = 'deleted file'
                              # it's not ADD not DELETE
                              if op is None:
                                  op = OPS.MOD
                                  stats['binary'] = True
                                  stats['ops'][MOD_FILENODE] = 'modified file'
                          # a real non-binary diff
                          if head['a_file'] or head['b_file']:
-                             diff = iter(chunk.diff.splitlines(1))
+                             # simulate splitlines, so we keep the line end part
+                             diff = self.diff_splitter(chunk.diff)
                              # append each file to the diff size
                              raw_chunk_size = len(raw_diff)
                              exceeds_limit = raw_chunk_size > self.file_limit
                              self.cur_diff_size += raw_chunk_size
                              try:
                                  # Check each file instead of the whole diff.
                                  # Diff will hide big files but still show small ones.
                                  # From the tests big files are fairly safe to be parsed
                                  # but the browser is the bottleneck.
                                  if not self.show_full_diff and exceeds_limit:
                                      log.debug('File `%s` exceeds current file_limit of %s',
                                                safe_unicode(head['b_path']), self.file_limit)
                                      raise DiffLimitExceeded(
                                          'File Limit %s Exceeded', self.file_limit)
                                  self._check_large_diff()
                                  raw_diff, chunks, _stats = self._new_parse_lines(diff)
                                  stats['binary'] = False
                                  stats['added'] = _stats[0]
                                  stats['deleted'] = _stats[1]
                                  # explicit mark that it's a modified file
                                  if op == OPS.MOD:
                                      stats['ops'][MOD_FILENODE] = 'modified file'
                              except DiffLimitExceeded:
                                  diff_container = lambda _diff: \
                                      LimitedDiffContainer(
                                          self.diff_limit, self.cur_diff_size, _diff)
                                  limited_diff = True
                                  chunks = []
                          else:  # GIT format binary patch, or possibly empty diff
                              if head['bin_patch']:
                                  # we have operation already extracted, but we mark simply
                                  # it's a diff we wont show for binary files
                                  stats['ops'][BIN_FILENODE] = 'binary diff hidden'
                              chunks = []
                          # Hide content of deleted node by setting empty chunks
                          if chunks and not self.show_full_diff and op == OPS.DEL:
                              # if not full diff mode show deleted file contents
                              # TODO: anderson: if the view is not too big, there is no way
                              # to see the content of the file
                              chunks = []
                          chunks.insert(
 , [{'old_lineno': '',
                                   'new_lineno': '',
                                   'action': Action.CONTEXT,
                                   'line': msg,
                                   } for _op, msg in stats['ops'].iteritems()
                                  if _op not in [MOD_FILENODE]])
                          original_filename = safe_unicode(head['a_path'])
                          _files.append({
                              'original_filename': original_filename,
                              'filename': safe_unicode(head['b_path']),
                              'old_revision': head['a_blob_id'],
                              'new_revision': head['b_blob_id'],
                              'chunks': chunks,
                              'raw_diff': safe_unicode(raw_diff),
                              'operation': op,
                              'stats': stats,
                              'exceeds_limit': exceeds_limit,
                              'is_limited_diff': limited_diff,
                          })
                      sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
                                             OPS.DEL: 2}.get(info['operation'])
                      return diff_container(sorted(_files, key=sorter))
                  # FIXME: NEWDIFFS: dan: this gets replaced by _new_parse_lines
-                 def _parse_lines(self, diff):
+                 def _parse_lines(self, diff_iter):
                      """
                      Parse the diff an return data for the template.
                      """
-                     lineiter = iter(diff)
                      stats = [0, 0]
                      chunks = []
                      raw_diff = []
                      try:
-                         line = lineiter.next()
+                         line = diff_iter.next()
                          while line:
                              raw_diff.append(line)
                              lines = []
                              chunks.append(lines)
                              match = self._chunk_re.match(line)
                              if not match:
                                  break
                              gr = match.groups()
                              (old_line, old_end,
                               new_line, new_end) = [int(x or 1) for x in gr[:-1]]
                              old_line -= 1
                              new_line -= 1
                              context = len(gr) == 5
                              old_end += old_line
                              new_end += new_line
                              if context:
                                  # skip context only if it's first line
                                  if int(gr[0]) > 1:
                                      lines.append({
                                          'old_lineno': '...',
                                          'new_lineno': '...',
                                          'action':     Action.CONTEXT,
                                          'line':       line,
                                      })
-                             line = lineiter.next()
+                             line = diff_iter.next()
                              while old_line < old_end or new_line < new_end:
                                  command = ' '
                                  if line:
                                      command = line[0]
                                  affects_old = affects_new = False
                                  # ignore those if we don't expect them
                                  if command in '#@':
                                      continue
                                  elif command == '+':
                                      affects_new = True
                                      action = Action.ADD
                                      stats[0] += 1
                                  elif command == '-':
                                      affects_old = True
                                      action = Action.DELETE
                                      stats[1] += 1
                                  else:
                                      affects_old = affects_new = True
                                      action = Action.UNMODIFIED
                                  if not self._newline_marker.match(line):
                                      old_line += affects_old
                                      new_line += affects_new
                                      lines.append({
                                          'old_lineno':   affects_old and old_line or '',
                                          'new_lineno':   affects_new and new_line or '',
                                          'action':       action,
                                          'line':         self._clean_line(line, command)
                                      })
                                      raw_diff.append(line)
-                                 line = lineiter.next()
+                                 line = diff_iter.next()
                                  if self._newline_marker.match(line):
                                      # we need to append to lines, since this is not
                                      # counted in the line specs of diff
                                      lines.append({
                                          'old_lineno':   '...',
                                          'new_lineno':   '...',
                                          'action':       Action.CONTEXT,
                                          'line':         self._clean_line(line, command)
                                      })
                      except StopIteration:
                          pass
                      return ''.join(raw_diff), chunks, stats
                  # FIXME: NEWDIFFS: dan: this replaces _parse_lines
                  def _new_parse_lines(self, diff_iter):
                      """
                      Parse the diff an return data for the template.
                      """
                      stats = [0, 0]
                      chunks = []
                      raw_diff = []
-                     diff_iter = imap(lambda s: safe_unicode(s), diff_iter)
                      try:
                          line = diff_iter.next()
                          while line:
                              raw_diff.append(line)
+                             # match header e.g @@ -0,0 +1 @@\n'
                              match = self._chunk_re.match(line)
                              if not match:
                                  break
                              gr = match.groups()
                              (old_line, old_end,
                               new_line, new_end) = [int(x or 1) for x in gr[:-1]]
                              lines = []
                              hunk = {
                                  'section_header': gr[-1],
                                  'source_start': old_line,
                                  'source_length': old_end,
                                  'target_start': new_line,
                                  'target_length': new_end,
                                  'lines': lines,
                              }
                              chunks.append(hunk)
                              old_line -= 1
                              new_line -= 1
                              context = len(gr) == 5
                              old_end += old_line
                              new_end += new_line
                              line = diff_iter.next()
                              while old_line < old_end or new_line < new_end:
                                  command = ' '
                                  if line:
                                      command = line[0]
                                  affects_old = affects_new = False
                                  # ignore those if we don't expect them
                                  if command in '#@':
                                      continue
                                  elif command == '+':
                                      affects_new = True
                                      action = Action.ADD
                                      stats[0] += 1
                                  elif command == '-':
                                      affects_old = True
                                      action = Action.DELETE
                                      stats[1] += 1
                                  else:
                                      affects_old = affects_new = True
                                      action = Action.UNMODIFIED
                                  if not self._newline_marker.match(line):
                                      old_line += affects_old
                                      new_line += affects_new
                                      lines.append({
                                          'old_lineno':   affects_old and old_line or '',
                                          'new_lineno':   affects_new and new_line or '',
                                          'action':       action,
                                          'line':         self._clean_line(line, command)
                                      })
                                  raw_diff.append(line)
                                  line = diff_iter.next()
                                  if self._newline_marker.match(line):
                                      # we need to append to lines, since this is not
                                      # counted in the line specs of diff
                                      if affects_old:
                                          action = Action.OLD_NO_NL
                                      elif affects_new:
                                          action = Action.NEW_NO_NL
                                      else:
                                          raise Exception('invalid context for no newline')
                                      lines.append({
                                          'old_lineno':   None,
                                          'new_lineno':   None,
                                          'action':       action,
                                          'line':         self._clean_line(line, command)
                                      })
                      except StopIteration:
                          pass
                      return ''.join(raw_diff), chunks, stats
                  def _safe_id(self, idstring):
                      """Make a string safe for including in an id attribute.
                      The HTML spec says that id attributes 'must begin with
                      a letter ([A-Za-z]) and may be followed by any number
                      of letters, digits ([0-9]), hyphens ("-"), underscores
                      ("_"), colons (":"), and periods (".")'. These regexps
                      are slightly over-zealous, in that they remove colons
                      and periods unnecessarily.
                      Whitespace is transformed into underscores, and then
                      anything which is not a hyphen or a character that
                      matches \w (alphanumerics and underscore) is removed.
                      """
                      # Transform all whitespace to underscore
                      idstring = re.sub(r'\s', "_", '%s' % idstring)
                      # Remove everything that is not a hyphen or a member of \w
                      idstring = re.sub(r'(?!-)\W', "", idstring).lower()
                      return idstring
+                 @classmethod
+                 def diff_splitter(cls, string):
+                     """
+                     Diff split that emulates .splitlines() but works only on \n
+                     """
+                     if not string:
+                         return
+                     elif string == '\n':
+                         yield u'\n'
+                     else:
+                         has_newline = string.endswith('\n')
+                         elements = string.split('\n')
+                         if has_newline:
+                             # skip last element as it's empty string from newlines
+                             elements = elements[:-1]
+                         len_elements = len(elements)
+                         for cnt, line in enumerate(elements, start=1):
+                             last_line = cnt == len_elements
+                             if last_line and not has_newline:
+                                 yield safe_unicode(line)
+                             else:
+                                 yield safe_unicode(line) + '\n'
                  def prepare(self, inline_diff=True):
                      """
                      Prepare the passed udiff for HTML rendering.
                      :return: A list of dicts with diff information.
                      """
                      parsed = self._parser(inline_diff=inline_diff)
                      self.parsed = True
                      self.parsed_diff = parsed
                      return parsed
                  def as_raw(self, diff_lines=None):
                      """
                      Returns raw diff as a byte string
                      """
                      return self._diff.raw
                  def as_html(self, table_class='code-difftable', line_class='line',
                              old_lineno_class='lineno old', new_lineno_class='lineno new',
                              code_class='code', enable_comments=False, parsed_lines=None):
                      """
                      Return given diff as html table with customized css classes
                      """
                      # TODO(marcink): not sure how to pass in translator
                      # here in an efficient way, leave the _ for proper gettext extraction
                      _ = lambda s: s
                      def _link_to_if(condition, label, url):
                          """
                          Generates a link if condition is meet or just the label if not.
                          """
                          if condition:
                              return '''<a href="%(url)s" class="tooltip"
                              title="%(title)s">%(label)s</a>''' % {
                                  'title': _('Click to select line'),
                                  'url': url,
                                  'label': label
                              }
                          else:
                              return label
                      if not self.parsed:
                          self.prepare()
                      diff_lines = self.parsed_diff
                      if parsed_lines:
                          diff_lines = parsed_lines
                      _html_empty = True
                      _html = []
                      _html.append('''<table class="%(table_class)s">\n''' % {
                          'table_class': table_class
                      })
                      for diff in diff_lines:
                          for line in diff['chunks']:
                              _html_empty = False
                              for change in line:
                                  _html.append('''<tr class="%(lc)s %(action)s">\n''' % {
                                      'lc': line_class,
                                      'action': change['action']
                                  })
                                  anchor_old_id = ''
                                  anchor_new_id = ''
                                  anchor_old = "%(filename)s_o%(oldline_no)s" % {
                                      'filename': self._safe_id(diff['filename']),
                                      'oldline_no': change['old_lineno']
                                  }
                                  anchor_new = "%(filename)s_n%(oldline_no)s" % {
                                      'filename': self._safe_id(diff['filename']),
                                      'oldline_no': change['new_lineno']
                                  }
                                  cond_old = (change['old_lineno'] != '...' and
                                              change['old_lineno'])
                                  cond_new = (change['new_lineno'] != '...' and
                                              change['new_lineno'])
                                  if cond_old:
                                      anchor_old_id = 'id="%s"' % anchor_old
                                  if cond_new:
                                      anchor_new_id = 'id="%s"' % anchor_new
                                  if change['action'] != Action.CONTEXT:
                                      anchor_link = True
                                  else:
                                      anchor_link = False
                                  ###########################################################
                                  # COMMENT ICONS
                                  ###########################################################
                                  _html.append('''\t<td class="add-comment-line"><span class="add-comment-content">''')
                                  if enable_comments and change['action'] != Action.CONTEXT:
                                      _html.append('''<a href="#"><span class="icon-comment-add"></span></a>''')
                                  _html.append('''</span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>\n''')
                                  ###########################################################
                                  # OLD LINE NUMBER
                                  ###########################################################
                                  _html.append('''\t<td %(a_id)s class="%(olc)s">''' % {
                                      'a_id': anchor_old_id,
                                      'olc': old_lineno_class
                                  })
                                  _html.append('''%(link)s''' % {
                                      'link': _link_to_if(anchor_link, change['old_lineno'],
                                                          '#%s' % anchor_old)
                                  })
                                  _html.append('''</td>\n''')
                                  ###########################################################
                                  # NEW LINE NUMBER
                                  ###########################################################
                                  _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
                                      'a_id': anchor_new_id,
                                      'nlc': new_lineno_class
                                  })
                                  _html.append('''%(link)s''' % {
                                      'link': _link_to_if(anchor_link, change['new_lineno'],
                                                          '#%s' % anchor_new)
                                  })
                                  _html.append('''</td>\n''')
                                  ###########################################################
                                  # CODE
                                  ###########################################################
                                  code_classes = [code_class]
                                  if (not enable_comments or
                                          change['action'] == Action.CONTEXT):
                                      code_classes.append('no-comment')
                                  _html.append('\t<td class="%s">' % ' '.join(code_classes))
                                  _html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {
                                      'code': change['line']
                                  })
                                  _html.append('''\t</td>''')
                                  _html.append('''\n</tr>\n''')
                      _html.append('''</table>''')
                      if _html_empty:
                          return None
                      return ''.join(_html)
                  def stat(self):
                      """
                      Returns tuple of added, and removed lines for this instance
                      """
                      return self.adds, self.removes
                  def get_context_of_line(
                          self, path, diff_line=None, context_before=3, context_after=3):
                      """
                      Returns the context lines for the specified diff line.
                      :type diff_line: :class:`DiffLineNumber`
                      """
                      assert self.parsed, "DiffProcessor is not initialized."
                      if None not in diff_line:
                          raise ValueError(
                              "Cannot specify both line numbers: {}".format(diff_line))
                      file_diff = self._get_file_diff(path)
                      chunk, idx = self._find_chunk_line_index(file_diff, diff_line)
                      first_line_to_include = max(idx - context_before, 0)
                      first_line_after_context = idx + context_after + 1
                      context_lines = chunk[first_line_to_include:first_line_after_context]
                      line_contents = [
                          _context_line(line) for line in context_lines
                          if _is_diff_content(line)]
                      # TODO: johbo: Interim fixup, the diff chunks drop the final newline.
                      # Once they are fixed, we can drop this line here.
                      if line_contents:
                          line_contents[-1] = (
                              line_contents[-1][0], line_contents[-1][1].rstrip('\n') + '\n')
                      return line_contents
                  def find_context(self, path, context, offset=0):
                      """
                      Finds the given `context` inside of the diff.
                      Use the parameter `offset` to specify which offset the target line has
                      inside of the given `context`. This way the correct diff line will be
                      returned.
                      :param offset: Shall be used to specify the offset of the main line
                          within the given `context`.
                      """
                      if offset < 0 or offset >= len(context):
                          raise ValueError(
                              "Only positive values up to the length of the context "
                              "minus one are allowed.")
                      matches = []
                      file_diff = self._get_file_diff(path)
                      for chunk in file_diff['chunks']:
                          context_iter = iter(context)
                          for line_idx, line in enumerate(chunk):
                              try:
                                  if _context_line(line) == context_iter.next():
                                      continue
                              except StopIteration:
                                  matches.append((line_idx, chunk))
                              context_iter = iter(context)
                      # Increment position and triger StopIteration
                      # if we had a match at the end
                      line_idx += 1
                      try:
                          context_iter.next()
                      except StopIteration:
                          matches.append((line_idx, chunk))
                      effective_offset = len(context) - offset
                      found_at_diff_lines = [
                          _line_to_diff_line_number(chunk[idx - effective_offset])
                          for idx, chunk in matches]
                      return found_at_diff_lines
                  def _get_file_diff(self, path):
                      for file_diff in self.parsed_diff:
                          if file_diff['filename'] == path:
                              break
                      else:
                          raise FileNotInDiffException("File {} not in diff".format(path))
                      return file_diff
                  def _find_chunk_line_index(self, file_diff, diff_line):
                      for chunk in file_diff['chunks']:
                          for idx, line in enumerate(chunk):
                              if line['old_lineno'] == diff_line.old:
                                  return chunk, idx
                              if line['new_lineno'] == diff_line.new:
                                  return chunk, idx
                      raise LineNotInDiffException(
                          "The line {} is not part of the diff.".format(diff_line))
              def _is_diff_content(line):
                  return line['action'] in (
                      Action.UNMODIFIED, Action.ADD, Action.DELETE)
              def _context_line(line):
                  return (line['action'], line['line'])
              DiffLineNumber = collections.namedtuple('DiffLineNumber', ['old', 'new'])
              def _line_to_diff_line_number(line):
                  new_line_no = line['new_lineno'] or None
                  old_line_no = line['old_lineno'] or None
                  return DiffLineNumber(old=old_line_no, new=new_line_no)
              class FileNotInDiffException(Exception):
                  """
                  Raised when the context for a missing file is requested.
                  If you request the context for a line in a file which is not part of the
                  given diff, then this exception is raised.
                  """
              class LineNotInDiffException(Exception):
                  """
                  Raised when the context for a missing line is requested.
                  If you request the context for a line in a file and this line is not
                  part of the given diff, then this exception is raised.
                  """
              class DiffLimitExceeded(Exception):
                  pass

rhodecode/tests/lib/test_codeblocks.py

0 +3 -23

              # -*- coding: utf-8 -*-
              # Copyright (C) 2016-2018 RhodeCode GmbH
              #
              # This program is free software: you can redistribute it and/or modify
              # it under the terms of the GNU Affero General Public License, version 3
              # (only), as published by the Free Software Foundation.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU Affero General Public License
              # along with this program.  If not, see <http://www.gnu.org/licenses/>.
              #
              # This program is dual-licensed. If you wish to learn more about the
              # RhodeCode Enterprise Edition, including its added features, Support services,
              # and proprietary license terms, please see https://rhodecode.com/licenses/
              import pytest
              from pygments.lexers import get_lexer_by_name
              from rhodecode.tests import no_newline_id_generator
              from rhodecode.lib.codeblocks import (
                  tokenize_string, split_token_stream, rollup_tokenstream,
                  render_tokenstream)
              class TestTokenizeString(object):
                  python_code = '''
                  import this
                  var = 6
                  print "this"
                  '''
                  def test_tokenize_as_python(self):
                      lexer = get_lexer_by_name('python')
                      tokens = list(tokenize_string(self.python_code, lexer))
                      assert tokens == [
                          ('',    u'\n'),
                          ('',    u'    '),
                          ('kn',  u'import'),
                          ('',    u' '),
                          ('nn',  u'this'),
                          ('',    u'\n'),
                          ('',    u'\n'),
                          ('',    u'    '),
                          ('n',   u'var'),
                          ('',    u' '),
                          ('o',   u'='),
                          ('',    u' '),
                          ('mi',  u'6'),
                          ('',    u'\n'),
                          ('',    u'    '),
                          ('k',   u'print'),
                          ('',    u' '),
                          ('s2',  u'"'),
                          ('s2',  u'this'),
                          ('s2',  u'"'),
                          ('',    u'\n'),
                          ('',    u'\n'),
                          ('',    u'    ')
                      ]
                  def test_tokenize_as_text(self):
                      lexer = get_lexer_by_name('text')
                      tokens = list(tokenize_string(self.python_code, lexer))
                      assert tokens == [
                          ('',
                          u'\n    import this\n\n    var = 6\n    print "this"\n\n    ')
                      ]
              class TestSplitTokenStream(object):
                  def test_split_token_stream(self):
                      lines = list(split_token_stream(
                          [('type1', 'some\ntext'), ('type2', 'more\n')]))
                      assert lines == [
                          [('type1', u'some')],
                          [('type1', u'text'), ('type2', u'more')],
                          [('type2', u'')],
                      ]
-                 def test_split_token_stream_other_char(self):
-                     lines = list(split_token_stream(
-                         [('type1', 'some\ntext'), ('type2', 'more\n')],
-                         split_string='m'))
-                     assert lines == [
-                         [('type1', 'so')],
-                         [('type1', 'e\ntext'), ('type2', '')],
-                         [('type2', 'ore\n')],
+                     ]
-                 def test_split_token_stream_without_char(self):
-                     lines = list(split_token_stream(
-                         [('type1', 'some\ntext'), ('type2', 'more\n')],
-                         split_string='z'))
-                     assert lines == [
-                         [('type1', 'some\ntext'), ('type2', 'more\n')]
+                     ]
                  def test_split_token_stream_single(self):
                      lines = list(split_token_stream(
-                         [('type1', '\n')], split_string='\n'))
+                         [('type1', '\n')]))
                      assert lines == [
                          [('type1', '')],
                          [('type1', '')],
                      ]
                  def test_split_token_stream_single_repeat(self):
                      lines = list(split_token_stream(
-                         [('type1', '\n\n\n')], split_string='\n'))
+                         [('type1', '\n\n\n')]))
                      assert lines == [
                          [('type1', '')],
                          [('type1', '')],
                          [('type1', '')],
                          [('type1', '')],
                      ]
                  def test_split_token_stream_multiple_repeat(self):
                      lines = list(split_token_stream(
-                         [('type1', '\n\n'), ('type2', '\n\n')], split_string='\n'))
+                         [('type1', '\n\n'), ('type2', '\n\n')]))
                      assert lines == [
                          [('type1', '')],
                          [('type1', '')],
                          [('type1', ''), ('type2', '')],
                          [('type2', '')],
                          [('type2', '')],
                      ]
              class TestRollupTokens(object):
                  @pytest.mark.parametrize('tokenstream,output', [
                      ([],
                          []),
                      ([('A', 'hell'), ('A', 'o')], [
                          ('A', [
                              ('', 'hello')]),
                      ]),
                      ([('A', 'hell'), ('B', 'o')], [
                          ('A', [
                              ('', 'hell')]),
                          ('B', [
                              ('', 'o')]),
                      ]),
                      ([('A', 'hel'), ('A', 'lo'), ('B', ' '), ('A', 'there')], [
                          ('A', [
                              ('', 'hello')]),
                          ('B', [
                              ('', ' ')]),
                          ('A', [
                              ('', 'there')]),
                      ]),
                  ])
                  def test_rollup_tokenstream_without_ops(self, tokenstream, output):
                      assert list(rollup_tokenstream(tokenstream)) == output
                  @pytest.mark.parametrize('tokenstream,output', [
                      ([],
                          []),
                      ([('A', '', 'hell'), ('A', '', 'o')], [
                          ('A', [
                              ('', 'hello')]),
                      ]),
                      ([('A', '', 'hell'), ('B', '', 'o')], [
                          ('A', [
                              ('', 'hell')]),
                          ('B', [
                              ('', 'o')]),
                      ]),
                      ([('A', '', 'h'), ('B', '', 'e'), ('C', '', 'y')], [
                          ('A', [
                              ('', 'h')]),
                          ('B', [
                              ('', 'e')]),
                          ('C', [
                              ('', 'y')]),
                      ]),
                      ([('A', '', 'h'), ('A', '', 'e'), ('C', '', 'y')], [
                          ('A', [
                              ('', 'he')]),
                          ('C', [
                              ('', 'y')]),
                      ]),
                      ([('A', 'ins', 'h'), ('A', 'ins', 'e')], [
                          ('A', [
                              ('ins', 'he')
                          ]),
                      ]),
                      ([('A', 'ins', 'h'), ('A', 'del', 'e')], [
                          ('A', [
                              ('ins', 'h'),
                              ('del', 'e')
                          ]),
                      ]),
                      ([('A', 'ins', 'h'), ('B', 'del', 'e'), ('B', 'del', 'y')], [
                          ('A', [
                              ('ins', 'h'),
                          ]),
                          ('B', [
                              ('del', 'ey'),
                          ]),
                      ]),
                      ([('A', 'ins', 'h'), ('A', 'del', 'e'), ('B', 'del', 'y')], [
                          ('A', [
                              ('ins', 'h'),
                              ('del', 'e'),
                          ]),
                          ('B', [
                              ('del', 'y'),
                          ]),
                      ]),
                      ([('A', '', 'some'), ('A', 'ins', 'new'), ('A', '', 'name')], [
                          ('A', [
                              ('', 'some'),
                              ('ins', 'new'),
                              ('', 'name'),
                          ]),
                      ]),
                  ])
                  def test_rollup_tokenstream_with_ops(self, tokenstream, output):
                      assert list(rollup_tokenstream(tokenstream)) == output
              class TestRenderTokenStream(object):
                  @pytest.mark.parametrize('tokenstream,output', [
                      (
                          [],
                          '',
                      ),
                      (
                          [('', '', u'')],
                          '<span></span>',
                      ),
                      (
                          [('', '', u'text')],
                          '<span>text</span>',
                      ),
                      (
                          [('A', '', u'')],
                          '<span class="A"></span>',
                      ),
                      (
                          [('A', '', u'hello')],
                          '<span class="A">hello</span>',
                      ),
                      (
                          [('A', '', u'hel'), ('A', '', u'lo')],
                          '<span class="A">hello</span>',
                      ),
                      (
                          [('A', '', u'two\n'), ('A', '', u'lines')],
                          '<span class="A">two\nlines</span>',
                      ),
                      (
                          [('A', '', u'\nthree\n'), ('A', '', u'lines')],
                          '<span class="A">\nthree\nlines</span>',
                      ),
                      (
                          [('', '', u'\n'), ('A', '', u'line')],
                          '<span>\n</span><span class="A">line</span>',
                      ),
                      (
                          [('', 'ins', u'\n'), ('A', '', u'line')],
                          '<span><ins>\n</ins></span><span class="A">line</span>',
                      ),
                      (
                          [('A', '', u'hel'), ('A', 'ins', u'lo')],
                          '<span class="A">hel<ins>lo</ins></span>',
                      ),
                      (
                          [('A', '', u'hel'), ('A', 'ins', u'l'), ('A', 'ins', u'o')],
                          '<span class="A">hel<ins>lo</ins></span>',
                      ),
                      (
                          [('A', '', u'hel'), ('A', 'ins', u'l'), ('A', 'del', u'o')],
                          '<span class="A">hel<ins>l</ins><del>o</del></span>',
                      ),
                      (
                          [('A', '', u'hel'), ('B', '', u'lo')],
                          '<span class="A">hel</span><span class="B">lo</span>',
                      ),
                      (
                          [('A', '', u'hel'), ('B', 'ins', u'lo')],
                          '<span class="A">hel</span><span class="B"><ins>lo</ins></span>',
                      ),
                  ], ids=no_newline_id_generator)
                  def test_render_tokenstream_with_ops(self, tokenstream, output):
                      html = render_tokenstream(tokenstream)
                      assert html == output
                  @pytest.mark.parametrize('tokenstream,output', [
                      (
                          [('A', u'hel'), ('A', u'lo')],
                          '<span class="A">hello</span>',
                      ),
                      (
                          [('A', u'hel'), ('A', u'l'), ('A', u'o')],
                          '<span class="A">hello</span>',
                      ),
                      (
                          [('A', u'hel'), ('A', u'l'), ('A', u'o')],
                          '<span class="A">hello</span>',
                      ),
                      (
                          [('A', u'hel'), ('B', u'lo')],
                          '<span class="A">hel</span><span class="B">lo</span>',
                      ),
                      (
                          [('A', u'hel'), ('B', u'lo')],
                          '<span class="A">hel</span><span class="B">lo</span>',
                      ),
                  ])
                  def test_render_tokenstream_without_ops(self, tokenstream, output):
                      html = render_tokenstream(tokenstream)
                      assert html == output

rhodecode/tests/lib/test_diffs.py

0 +20 -2

              # -*- coding: utf-8 -*-
              # Copyright (C) 2010-2018 RhodeCode GmbH
              #
              # This program is free software: you can redistribute it and/or modify
              # it under the terms of the GNU Affero General Public License, version 3
              # (only), as published by the Free Software Foundation.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU Affero General Public License
              # along with this program.  If not, see <http://www.gnu.org/licenses/>.
              #
              # This program is dual-licensed. If you wish to learn more about the
              # RhodeCode Enterprise Edition, including its added features, Support services,
              # and proprietary license terms, please see https://rhodecode.com/licenses/
              import textwrap
              import pytest
              from rhodecode.lib.diffs import (
                  DiffProcessor,
                  NEW_FILENODE, DEL_FILENODE, MOD_FILENODE, RENAMED_FILENODE,
                  CHMOD_FILENODE, BIN_FILENODE, COPIED_FILENODE)
-             from rhodecode.tests.fixture import Fixture
+             from rhodecode.tests.fixture import Fixture, no_newline_id_generator
              from rhodecode.lib.vcs.backends.git.repository import GitDiff
              from rhodecode.lib.vcs.backends.hg.repository import MercurialDiff
              from rhodecode.lib.vcs.backends.svn.repository import SubversionDiff
              fixture = Fixture()
              def test_diffprocessor_as_html_with_comments():
                  raw_diff = textwrap.dedent('''
                      diff --git a/setup.py b/setup.py
                      index 5b36422..cfd698e 100755
                      --- a/setup.py
                      +++ b/setup.py
@@ -2,7 +2,7 @@
                       #!/usr/bin/python
                       # Setup file for X
                       # Copyright (C) No one
                      -
                      +x
                       try:
                           from setuptools import setup, Extension
                       except ImportError:
                  ''')
                  diff = GitDiff(raw_diff)
                  processor = DiffProcessor(diff)
                  processor.prepare()
                  # Note that the cell with the context in line 5 (in the html) has the
                  # no-comment class, which will prevent the add comment icon to be displayed.
                  expected_html = textwrap.dedent('''
                      <table class="code-difftable">
                      <tr class="line context">
                          <td class="add-comment-line"><span class="add-comment-content"></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
                          <td  class="lineno old">...</td>
                          <td  class="lineno new">...</td>
                          <td class="code no-comment">
                              <pre>@@ -2,7 +2,7 @@
                      </pre>
                          </td>
                      </tr>
                      <tr class="line unmod">
                          <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
                          <td id="setuppy_o2" class="lineno old"><a href="#setuppy_o2" class="tooltip"
                                      title="Click to select line">2</a></td>
                          <td id="setuppy_n2" class="lineno new"><a href="#setuppy_n2" class="tooltip"
                                      title="Click to select line">2</a></td>
                          <td class="code">
                              <pre>#!/usr/bin/python
                      </pre>
                          </td>
                      </tr>
                      <tr class="line unmod">
                          <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
                          <td id="setuppy_o3" class="lineno old"><a href="#setuppy_o3" class="tooltip"
                                      title="Click to select line">3</a></td>
                          <td id="setuppy_n3" class="lineno new"><a href="#setuppy_n3" class="tooltip"
                                      title="Click to select line">3</a></td>
                          <td class="code">
                              <pre># Setup file for X
                      </pre>
                          </td>
                      </tr>
                      <tr class="line unmod">
                          <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
                          <td id="setuppy_o4" class="lineno old"><a href="#setuppy_o4" class="tooltip"
                                      title="Click to select line">4</a></td>
                          <td id="setuppy_n4" class="lineno new"><a href="#setuppy_n4" class="tooltip"
                                      title="Click to select line">4</a></td>
                          <td class="code">
                              <pre># Copyright (C) No one
                      </pre>
                          </td>
                      </tr>
                      <tr class="line del">
                          <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
                          <td id="setuppy_o5" class="lineno old"><a href="#setuppy_o5" class="tooltip"
                                      title="Click to select line">5</a></td>
                          <td  class="lineno new"><a href="#setuppy_n" class="tooltip"
                                      title="Click to select line"></a></td>
                          <td class="code">
                              <pre>
                      </pre>
                          </td>
                      </tr>
                      <tr class="line add">
                          <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
                          <td  class="lineno old"><a href="#setuppy_o" class="tooltip"
                                      title="Click to select line"></a></td>
                          <td id="setuppy_n5" class="lineno new"><a href="#setuppy_n5" class="tooltip"
                                      title="Click to select line">5</a></td>
                          <td class="code">
                              <pre><ins>x</ins>
                      </pre>
                          </td>
                      </tr>
                      <tr class="line unmod">
                          <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
                          <td id="setuppy_o6" class="lineno old"><a href="#setuppy_o6" class="tooltip"
                                      title="Click to select line">6</a></td>
                          <td id="setuppy_n6" class="lineno new"><a href="#setuppy_n6" class="tooltip"
                                      title="Click to select line">6</a></td>
                          <td class="code">
                              <pre>try:
                      </pre>
                          </td>
                      </tr>
                      <tr class="line unmod">
                          <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
                          <td id="setuppy_o7" class="lineno old"><a href="#setuppy_o7" class="tooltip"
                                      title="Click to select line">7</a></td>
                          <td id="setuppy_n7" class="lineno new"><a href="#setuppy_n7" class="tooltip"
                                      title="Click to select line">7</a></td>
                          <td class="code">
                              <pre>    from setuptools import setup, Extension
                      </pre>
                          </td>
                      </tr>
                      <tr class="line unmod">
                          <td class="add-comment-line"><span class="add-comment-content"><a href="#"><span class="icon-comment-add"></span></a></span></td><td class="comment-toggle tooltip" title="Toggle Comment Thread"><i class="icon-comment"></i></td>
                          <td id="setuppy_o8" class="lineno old"><a href="#setuppy_o8" class="tooltip"
                                      title="Click to select line">8</a></td>
                          <td id="setuppy_n8" class="lineno new"><a href="#setuppy_n8" class="tooltip"
                                      title="Click to select line">8</a></td>
                          <td class="code">
                              <pre>except ImportError:
                      </pre>
                          </td>
                      </tr>
                      </table>
                  ''').strip()
                  html = processor.as_html(enable_comments=True).replace('\t', '    ')
                  assert html == expected_html
-             class TestMixedFilenameEncodings:
+             class TestMixedFilenameEncodings(object):
                  @pytest.fixture(scope="class")
                  def raw_diff(self):
                      return fixture.load_resource(
                          'hg_diff_mixed_filename_encodings.diff')
                  @pytest.fixture
                  def processor(self, raw_diff):
                      diff = MercurialDiff(raw_diff)
                      processor = DiffProcessor(diff)
                      return processor
                  def test_filenames_are_decoded_to_unicode(self, processor):
                      diff_data = processor.prepare()
                      filenames = [item['filename'] for item in diff_data]
                      assert filenames == [
                          u'späcial-utf8.txt', u'sp�cial-cp1252.txt', u'sp�cial-latin1.txt']
                  def test_raw_diff_is_decoded_to_unicode(self, processor):
                      diff_data = processor.prepare()
                      raw_diffs = [item['raw_diff'] for item in diff_data]
                      new_file_message = u'\nnew file mode 100644\n'
                      expected_raw_diffs = [
                          u' a/späcial-utf8.txt b/späcial-utf8.txt' + new_file_message,
                          u' a/sp�cial-cp1252.txt b/sp�cial-cp1252.txt' + new_file_message,
                          u' a/sp�cial-latin1.txt b/sp�cial-latin1.txt' + new_file_message]
                      assert raw_diffs == expected_raw_diffs
                  def test_as_raw_preserves_the_encoding(self, processor, raw_diff):
                      assert processor.as_raw() == raw_diff
              # TODO: mikhail: format the following data structure properly
              DIFF_FIXTURES = [
                  ('hg',
                   'hg_diff_add_single_binary_file.diff',
                   [('US Warszawa.jpg', 'A',
                     {'added': 0,
                      'deleted': 0,
                      'binary': True,
                      'ops': {NEW_FILENODE: 'new file 100755',
                              BIN_FILENODE: 'binary diff hidden'}}),
                    ]),
                  ('hg',
                   'hg_diff_mod_single_binary_file.diff',
                   [('US Warszawa.jpg', 'M',
                     {'added': 0,
                      'deleted': 0,
                      'binary': True,
                      'ops': {MOD_FILENODE: 'modified file',
                              BIN_FILENODE: 'binary diff hidden'}}),
                    ]),
                  ('hg',
                   'hg_diff_mod_single_file_and_rename_and_chmod.diff',
                   [('README', 'M',
                     {'added': 3,
                      'deleted': 0,
                      'binary': False,
                      'ops': {MOD_FILENODE: 'modified file',
                              RENAMED_FILENODE: 'file renamed from README.rst to README',
                              CHMOD_FILENODE: 'modified file chmod 100755 => 100644'}}),
                    ]),
                  ('hg',
                   'hg_diff_no_newline.diff',
                   [('server.properties', 'M',
                     {'added': 2,
                      'deleted': 1,
                      'binary': False,
                      'ops': {MOD_FILENODE: 'modified file'}}),
                    ]),
                  ('hg',
                   'hg_diff_mod_file_and_rename.diff',
                   [('README.rst', 'M',
                     {'added': 3,
                      'deleted': 0,
                      'binary': False,
                      'ops': {MOD_FILENODE: 'modified file',
                              RENAMED_FILENODE: 'file renamed from README to README.rst'}}),
                    ]),
                  ('hg',
                   'hg_diff_del_single_binary_file.diff',
                   [('US Warszawa.jpg', 'D',
                     {'added': 0,
                      'deleted': 0,
                      'binary': True,
                      'ops': {DEL_FILENODE: 'deleted file',
                              BIN_FILENODE: 'binary diff hidden'}}),
                    ]),
                  ('hg',
                   'hg_diff_chmod_and_mod_single_binary_file.diff',
                   [('gravatar.png', 'M',
                     {'added': 0,
                      'deleted': 0,
                      'binary': True,
                      'ops': {CHMOD_FILENODE: 'modified file chmod 100644 => 100755',
                              BIN_FILENODE: 'binary diff hidden'}}),
                    ]),
                  ('hg',
                   'hg_diff_chmod.diff',
                   [('file', 'M',
                     {'added': 0,
                      'deleted': 0,
                      'binary': True,
                      'ops': {CHMOD_FILENODE: 'modified file chmod 100755 => 100644'}}),
                    ]),
                  ('hg',
                   'hg_diff_rename_file.diff',
                   [('file_renamed', 'M',
                     {'added': 0,
                      'deleted': 0,
                      'binary': True,
                      'ops': {RENAMED_FILENODE: 'file renamed from file to file_renamed'}}),
                    ]),
                  ('hg',
                   'hg_diff_rename_and_chmod_file.diff',
                   [('README', 'M',
                     {'added': 0,
                      'deleted': 0,
                      'binary': True,
                      'ops': {CHMOD_FILENODE: 'modified file chmod 100644 => 100755',
                              RENAMED_FILENODE: 'file renamed from README.rst to README'}}),
                    ]),
                  ('hg',
                   'hg_diff_binary_and_normal.diff',
                   [('img/baseline-10px.png', 'A',
                     {'added': 0,
                      'deleted': 0,
                      'binary': True,
                      'ops': {NEW_FILENODE: 'new file 100644',
                              BIN_FILENODE: 'binary diff hidden'}}),
                    ('js/jquery/hashgrid.js', 'A',
                     {'added': 340,
                      'deleted': 0,
                      'binary': False,
                      'ops': {NEW_FILENODE: 'new file 100755'}}),
                    ('index.html', 'M',
                     {'added': 3,
                      'deleted': 2,
                      'binary': False,
                      'ops': {MOD_FILENODE: 'modified file'}}),
                    ('less/docs.less', 'M',
                     {'added': 34,
                      'deleted': 0,
                      'binary': False,
                      'ops': {MOD_FILENODE: 'modified file'}}),
                    ('less/scaffolding.less', 'M',
                     {'added': 1,
                      'deleted': 3,
                      'binary': False,
                      'ops': {MOD_FILENODE: 'modified file'}}),
                    ('readme.markdown', 'M',
                     {'added': 1,
                      'deleted': 10,
                      'binary': False,
                      'ops': {MOD_FILENODE: 'modified file'}}),
                    ('img/baseline-20px.png', 'D',
                     {'added': 0,
                      'deleted': 0,
                      'binary': True,
                      'ops': {DEL_FILENODE: 'deleted file',
                              BIN_FILENODE: 'binary diff hidden'}}),
                    ('js/global.js', 'D',
                     {'added': 0,
                      'deleted': 75,
                      'binary': False,
                      'ops': {DEL_FILENODE: 'deleted file'}})
                    ]),
                  ('git',
                   'git_diff_chmod.diff',
                   [('work-horus.xls', 'M',
                     {'added': 0,
                      'deleted': 0,
                      'binary': True,
                      'ops': {CHMOD_FILENODE: 'modified file chmod 100644 => 100755'}})
                    ]),
                  ('git',
                   'git_diff_rename_file.diff',
                   [('file.xls', 'M',
                     {'added': 0,
                      'deleted': 0,
                      'binary': True,
                      'ops': {
                          RENAMED_FILENODE: 'file renamed from work-horus.xls to file.xls'}})
                    ]),
                  ('git',
                   'git_diff_mod_single_binary_file.diff',
                   [('US Warszawa.jpg', 'M',
                     {'added': 0,
                      'deleted': 0,
                      'binary': True,
                      'ops': {MOD_FILENODE: 'modified file',
                              BIN_FILENODE: 'binary diff hidden'}})
                    ]),
                  ('git',
                   'git_diff_binary_and_normal.diff',
                   [('img/baseline-10px.png', 'A',
                     {'added': 0,
                      'deleted': 0,
                      'binary': True,
                      'ops': {NEW_FILENODE: 'new file 100644',
                              BIN_FILENODE: 'binary diff hidden'}}),
                    ('js/jquery/hashgrid.js', 'A',
                     {'added': 340,
                      'deleted': 0,
                      'binary': False,
                      'ops': {NEW_FILENODE: 'new file 100755'}}),
                    ('index.html', 'M',
                     {'added': 3,
                      'deleted': 2,
                      'binary': False,
                      'ops': {MOD_FILENODE: 'modified file'}}),
                    ('less/docs.less', 'M',
                     {'added': 34,
                      'deleted': 0,
                      'binary': False,
                      'ops': {MOD_FILENODE: 'modified file'}}),
                    ('less/scaffolding.less', 'M',
                     {'added': 1,
                      'deleted': 3,
                      'binary': False,
                      'ops': {MOD_FILENODE: 'modified file'}}),
                    ('readme.markdown', 'M',
                     {'added': 1,
                      'deleted': 10,
                      'binary': False,
                      'ops': {MOD_FILENODE: 'modified file'}}),
                    ('img/baseline-20px.png', 'D',
                     {'added': 0,
                      'deleted': 0,
                      'binary': True,
                      'ops': {DEL_FILENODE: 'deleted file',
                              BIN_FILENODE: 'binary diff hidden'}}),
                    ('js/global.js', 'D',
                     {'added': 0,
                      'deleted': 75,
                      'binary': False,
                      'ops': {DEL_FILENODE: 'deleted file'}}),
                    ]),
                  ('hg',
                   'diff_with_diff_data.diff',
                   [('vcs/backends/base.py', 'M',
                     {'added': 18,
                      'deleted': 2,
                      'binary': False,
                      'ops': {MOD_FILENODE: 'modified file'}}),
                    ('vcs/backends/git/repository.py', 'M',
                     {'added': 46,
                      'deleted': 15,
                      'binary': False,
                      'ops': {MOD_FILENODE: 'modified file'}}),
                    ('vcs/backends/hg.py', 'M',
                     {'added': 22,
                      'deleted': 3,
                      'binary': False,
                      'ops': {MOD_FILENODE: 'modified file'}}),
                    ('vcs/tests/test_git.py', 'M',
                     {'added': 5,
                      'deleted': 5,
                      'binary': False,
                      'ops': {MOD_FILENODE: 'modified file'}}),
                    ('vcs/tests/test_repository.py', 'M',
                     {'added': 174,
                      'deleted': 2,
                      'binary': False,
                      'ops': {MOD_FILENODE: 'modified file'}}),
                    ]),
                  ('hg',
                   'hg_diff_copy_file.diff',
                   [('file2', 'M',
                     {'added': 0,
                      'deleted': 0,
                      'binary': True,
                      'ops': {COPIED_FILENODE: 'file copied from file1 to file2'}}),
                    ]),
                  ('hg',
                   'hg_diff_copy_and_modify_file.diff',
                   [('file3', 'M',
                     {'added': 1,
                      'deleted': 0,
                      'binary': False,
                      'ops': {COPIED_FILENODE: 'file copied from file2 to file3',
                              MOD_FILENODE: 'modified file'}}),
                    ]),
                  ('hg',
                   'hg_diff_copy_and_chmod_file.diff',
                   [('file4', 'M',
                     {'added': 0,
                      'deleted': 0,
                      'binary': True,
                      'ops': {COPIED_FILENODE: 'file copied from file3 to file4',
                              CHMOD_FILENODE: 'modified file chmod 100644 => 100755'}}),
                    ]),
                  ('hg',
                   'hg_diff_copy_chmod_and_edit_file.diff',
                   [('file5', 'M',
                     {'added': 2,
                      'deleted': 1,
                      'binary': False,
                      'ops': {COPIED_FILENODE: 'file copied from file4 to file5',
                              CHMOD_FILENODE: 'modified file chmod 100755 => 100644',
                              MOD_FILENODE: 'modified file'}})]),
                  # Diffs to validate rename and copy file with space in its name
                  ('git',
                   'git_diff_rename_file_with_spaces.diff',
                   [('file_with_  two spaces.txt', 'M',
                       {'added': 0,
                        'deleted': 0,
                        'binary': True,
                        'ops': {
                            RENAMED_FILENODE: (
                                'file renamed from file_with_ spaces.txt to file_with_ '
                                ' two spaces.txt')}
                        }), ]),
                  ('hg',
                   'hg_diff_rename_file_with_spaces.diff',
                   [('file_changed _.txt', 'M',
                       {'added': 0,
                        'deleted': 0,
                        'binary': True,
                        'ops': {
                            RENAMED_FILENODE: (
                                'file renamed from file_ with update.txt to file_changed'
                                ' _.txt')}
                        }), ]),
                  ('hg',
                   'hg_diff_copy_file_with_spaces.diff',
                   [('file_copied_ with  spaces.txt', 'M',
                       {'added': 0,
                        'deleted': 0,
                        'binary': True,
                        'ops': {
                            COPIED_FILENODE: (
                                'file copied from file_changed_without_spaces.txt to'
                                ' file_copied_ with  spaces.txt')}
                        }),
                    ]),
                  # special signs from git
                  ('git',
                   'git_diff_binary_special_files.diff',
                   [('css/_Icon\\r', 'A',
                       {'added': 0,
                        'deleted': 0,
                        'binary': True,
                        'ops': {NEW_FILENODE: 'new file 100644',
                                BIN_FILENODE: 'binary diff hidden'}
                        }),
                    ]),
                  ('git',
                   'git_diff_binary_special_files_2.diff',
                   [('css/Icon\\r', 'A',
                       {'added': 0,
                        'deleted': 0,
                        'binary': True,
                        'ops': {NEW_FILENODE: 'new file 100644', }
                        }),
                    ]),
                  ('svn',
                   'svn_diff_binary_add_file.diff',
                   [('intl.dll', 'A',
                     {'added': 0,
                      'deleted': 0,
                      'binary': False,
                      'ops': {NEW_FILENODE: 'new file 10644',
                              #TODO(Marcink): depends on binary detection on svn patches
                              # BIN_FILENODE: 'binary diff hidden'
                              }
                      }),
                    ]),
                  ('svn',
                   'svn_diff_multiple_changes.diff',
                   [('trunk/doc/images/SettingsOverlay.png', 'M',
                     {'added': 0,
                      'deleted': 0,
                      'binary': False,
                      'ops': {MOD_FILENODE: 'modified file',
                              #TODO(Marcink): depends on binary detection on svn patches
                              # BIN_FILENODE: 'binary diff hidden'
                              }
                      }),
                    ('trunk/doc/source/de/tsvn_ch04.xml', 'M',
                     {'added': 89,
                      'deleted': 34,
                      'binary': False,
                      'ops': {MOD_FILENODE: 'modified file'}
                      }),
                    ('trunk/doc/source/en/tsvn_ch04.xml', 'M',
                     {'added': 66,
                      'deleted': 21,
                      'binary': False,
                      'ops': {MOD_FILENODE: 'modified file'}
                      }),
                    ('trunk/src/Changelog.txt', 'M',
                     {'added': 2,
                      'deleted': 0,
                      'binary': False,
                      'ops': {MOD_FILENODE: 'modified file'}
                      }),
                    ('trunk/src/Resources/TortoiseProcENG.rc', 'M',
                     {'added': 19,
                      'deleted': 13,
                      'binary': False,
                      'ops': {MOD_FILENODE: 'modified file'}
                      }),
                    ('trunk/src/TortoiseProc/SetOverlayPage.cpp', 'M',
                     {'added': 16,
                      'deleted': 1,
                      'binary': False,
                      'ops': {MOD_FILENODE: 'modified file'}
                      }),
                    ('trunk/src/TortoiseProc/SetOverlayPage.h', 'M',
                     {'added': 3,
                      'deleted': 0,
                      'binary': False,
                      'ops': {MOD_FILENODE: 'modified file'}
                      }),
                    ('trunk/src/TortoiseProc/resource.h', 'M',
                     {'added': 2,
                      'deleted': 0,
                      'binary': False,
                      'ops': {MOD_FILENODE: 'modified file'}
                      }),
                    ('trunk/src/TortoiseShell/ShellCache.h', 'M',
                     {'added': 50,
                      'deleted': 1,
                      'binary': False,
                      'ops': {MOD_FILENODE: 'modified file'}
                      }),
                    ]),
                  # TODO: mikhail: do we still need this?
                  # (
                  #     'hg',
                  #     'large_diff.diff',
                  #     [
                  #         ('.hgignore', 'A', {
                  #             'deleted': 0, 'binary': False, 'added': 3, 'ops': {
                  #                 1: 'new file 100644'}}),
                  #         (
                  #             'MANIFEST.in', 'A',
                  #             {'deleted': 0, 'binary': False, 'added': 3, 'ops': {
                  #                 1: 'new file 100644'}}),
                  #         (
                  #             'README.txt', 'A',
                  #             {'deleted': 0, 'binary': False, 'added': 19, 'ops': {
                  #                 1: 'new file 100644'}}),
                  #         (
                  #             'development.ini', 'A', {
                  #                 'deleted': 0, 'binary': False, 'added': 116, 'ops': {
                  #                     1: 'new file 100644'}}),
                  #         (
                  #             'docs/index.txt', 'A', {
                  #                 'deleted': 0, 'binary': False, 'added': 19, 'ops': {
                  #                     1: 'new file 100644'}}),
                  #         (
                  #             'ez_setup.py', 'A', {
                  #                 'deleted': 0, 'binary': False, 'added': 276, 'ops': {
                  #                     1: 'new file 100644'}}),
                  #         (
                  #             'hgapp.py', 'A', {
                  #                 'deleted': 0, 'binary': False, 'added': 26, 'ops': {
                  #                     1: 'new file 100644'}}),
                  #         (
                  #             'hgwebdir.config', 'A', {
                  #                 'deleted': 0, 'binary': False, 'added': 21, 'ops': {
                  #                     1: 'new file 100644'}}),
                  #         (
                  #             'pylons_app.egg-info/PKG-INFO', 'A', {
                  #                 'deleted': 0, 'binary': False, 'added': 10, 'ops': {
                  #                     1: 'new file 100644'}}),
                  #         (
                  #             'pylons_app.egg-info/SOURCES.txt', 'A', {
                  #                 'deleted': 0, 'binary': False, 'added': 33, 'ops': {
                  #                     1: 'new file 100644'}}),
                  #         (
                  #             'pylons_app.egg-info/dependency_links.txt', 'A', {
                  #                 'deleted': 0, 'binary': False, 'added': 1, 'ops': {
                  #                     1: 'new file 100644'}}),
                  #     ]
                  # ),
              ]
              DIFF_FIXTURES_WITH_CONTENT = [
                  (
                      'hg', 'hg_diff_single_file_change_newline.diff',
                      [
                          (
                              'file_b',  # filename
                              'A',  # change
                              {  # stats
                                 'added': 1,
                                 'deleted': 0,
                                 'binary': False,
                                 'ops': {NEW_FILENODE: 'new file 100644', }
                              },
                              '@@ -0,0 +1 @@\n+test_content b\n'  # diff
                          ),
                      ],
                  ),
                  (
                      'hg', 'hg_diff_double_file_change_newline.diff',
                      [
                          (
                              'file_b',  # filename
                              'A',  # change
                              {  # stats
                                 'added': 1,
                                 'deleted': 0,
                                 'binary': False,
                                 'ops': {NEW_FILENODE: 'new file 100644', }
                              },
                              '@@ -0,0 +1 @@\n+test_content b\n'  # diff
                          ),
                          (
                              'file_c',  # filename
                              'A',  # change
                              {  # stats
                                 'added': 1,
                                 'deleted': 0,
                                 'binary': False,
                                 'ops': {NEW_FILENODE: 'new file 100644', }
                              },
                              '@@ -0,0 +1 @@\n+test_content c\n'  # diff
                          ),
                      ],
                  ),
                  (
                      'hg', 'hg_diff_double_file_change_double_newline.diff',
                      [
                          (
                              'file_b',  # filename
                              'A',  # change
                              {  # stats
                                 'added': 1,
                                 'deleted': 0,
                                 'binary': False,
                                 'ops': {NEW_FILENODE: 'new file 100644', }
                              },
                              '@@ -0,0 +1 @@\n+test_content b\n\n'  # diff
                          ),
                          (
                              'file_c',  # filename
                              'A',  # change
                              {  # stats
                                 'added': 1,
                                 'deleted': 0,
                                 'binary': False,
                                 'ops': {NEW_FILENODE: 'new file 100644', }
                              },
                              '@@ -0,0 +1 @@\n+test_content c\n'  # diff
                          ),
                      ],
                  ),
                  (
                      'hg', 'hg_diff_four_file_change_newline.diff',
                      [
                          (
                              'file',  # filename
                              'A',  # change
                              {  # stats
                                 'added': 1,
                                 'deleted': 0,
                                 'binary': False,
                                 'ops': {NEW_FILENODE: 'new file 100644', }
                              },
                              '@@ -0,0 +1,1 @@\n+file\n'  # diff
                          ),
                          (
                              'file2',  # filename
                              'A',  # change
                              {  # stats
                                 'added': 1,
                                 'deleted': 0,
                                 'binary': False,
                                 'ops': {NEW_FILENODE: 'new file 100644', }
                              },
                              '@@ -0,0 +1,1 @@\n+another line\n'  # diff
                          ),
                          (
                              'file3',  # filename
                              'A',  # change
                              {  # stats
                                 'added': 1,
                                 'deleted': 0,
                                 'binary': False,
                                 'ops': {NEW_FILENODE: 'new file 100644', }
                              },
                              '@@ -0,0 +1,1 @@\n+newline\n'  # diff
                          ),
                          (
                              'file4',  # filename
                              'A',  # change
                              {  # stats
                                 'added': 1,
                                 'deleted': 0,
                                 'binary': False,
                                 'ops': {NEW_FILENODE: 'new file 100644', }
                              },
                              '@@ -0,0 +1,1 @@\n+fil4\n\\ No newline at end of file'  # diff
                          ),
                      ],
                  ),
              ]
              diff_class = {
                  'git': GitDiff,
                  'hg': MercurialDiff,
                  'svn': SubversionDiff,
              }
              @pytest.fixture(params=DIFF_FIXTURES)
              def diff_fixture(request):
                  vcs, diff_fixture, expected = request.param
                  diff_txt = fixture.load_resource(diff_fixture)
                  diff = diff_class[vcs](diff_txt)
                  return diff, expected
              def test_diff_lib(diff_fixture):
                  diff, expected_data = diff_fixture
                  diff_proc = DiffProcessor(diff)
                  diff_proc_d = diff_proc.prepare()
                  data = [(x['filename'], x['operation'], x['stats']) for x in diff_proc_d]
                  assert expected_data == data
              @pytest.fixture(params=DIFF_FIXTURES_WITH_CONTENT)
              def diff_fixture_w_content(request):
                  vcs, diff_fixture, expected = request.param
                  diff_txt = fixture.load_resource(diff_fixture)
                  diff = diff_class[vcs](diff_txt)
                  return diff, expected
              def test_diff_lib_newlines(diff_fixture_w_content):
                  diff, expected_data = diff_fixture_w_content
                  diff_proc = DiffProcessor(diff)
                  diff_proc_d = diff_proc.prepare()
                  data = [(x['filename'], x['operation'], x['stats'], x['raw_diff'])
                          for x in diff_proc_d]
                  assert expected_data == data
+             @pytest.mark.parametrize('input_str', [
+                 '',
+                 '\n',
+                 '\n\n',
+                 'First\n+second',
+                 'First\n+second\n',
+                 '\n\n\n Multi \n\n\n',
+                 '\n\n\n Multi beginning',
+                 'Multi end \n\n\n',
+                 'Multi end',
+                 '@@ -0,0 +1 @@\n+test_content \n\n b\n'
+             ], ids=no_newline_id_generator)
+             def test_splitlines(input_str):
+                 result = DiffProcessor.diff_splitter(input_str)
+                 assert list(result) == input_str.splitlines(True)

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages