rhodecode-enterprise-ce Commit - r2546:db577a02

diffs: in case of text lexers don't do any HL because of pygments newline...

marcink -

r2546:db577a02 stable

parent child

rhodecode/lib/codeblocks.py

0 +13 -5

              from pygments import lex
              from pygments.formatters.html import _get_ttype_class as pygment_token_class
+             from pygments.lexers.special import TextLexer, Token
              from rhodecode.lib.helpers import (
                  get_lexer_for_filenode, html_escape, get_custom_lexer)
              from rhodecode.lib.utils2 import AttributeDict
                  log.debug('Generating file node pygment tokens for %s, %s, org_lexer:%s',
                            lexer, filenode, org_lexer)
                  tokens = tokenize_string(filenode.content, lexer)
-                 lines = split_token_stream(tokens, split_string='\n')
+                 lines = split_token_stream(tokens)
                  rv = list(lines)
                  return rv
                  lexer.stripall = False
                  lexer.stripnl = False
                  lexer.ensurenl = False
-                 for token_type, token_text in lex(content, lexer):
+                 if isinstance(lexer, TextLexer):
+                     lexed = [(Token.Text, content)]
+                 else:
+                     lexed = lex(content, lexer)
+                 for token_type, token_text in lexed:
                      yield pygment_token_class(token_type), token_text
-             def split_token_stream(tokens, split_string=u'\n'):
+             def split_token_stream(tokens):
                  """
                  Take a list of (TokenType, text) tuples and split them by a string
-                 >>> split_token_stream([(TEXT, 'some\ntext'), (TEXT, 'more\n')])
+                 split_token_stream([(TEXT, 'some\ntext'), (TEXT, 'more\n')])
                  [(TEXT, 'some'), (TEXT, 'text'),
                   (TEXT, 'more'), (TEXT, 'text')]
                  """
                  buffer = []
                  for token_class, token_text in tokens:
-                     parts = token_text.split(split_string)
+                     parts = token_text.split('\n')
                      for part in parts[:-1]:
                          buffer.append((token_class, part))
                          yield buffer

rhodecode/lib/diffs.py

0 +35 -11

                      :param string:
                      """
                      self.cur_diff_size += len(string)
                      if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
                          raise DiffLimitExceeded('Diff Limit Exceeded')
-                     return safe_unicode(string)\
+                     return string \
                          .replace('&', '&amp;')\
                          .replace('<', '&lt;')\
                          .replace('>', '&gt;')
                      for chunk in self._diff.chunks():
                          head = chunk.header
-                         diff = imap(self._escaper, chunk.diff.splitlines(1))
+                         diff = imap(self._escaper, self.diff_splitter(chunk.diff))
                          raw_diff = chunk.raw
                          limited_diff = False
                          exceeds_limit = False
                          # a real non-binary diff
                          if head['a_file'] or head['b_file']:
-                             diff = iter(chunk.diff.splitlines(1))
+                             # simulate splitlines, so we keep the line end part
+                             diff = self.diff_splitter(chunk.diff)
                              # append each file to the diff size
                              raw_chunk_size = len(raw_diff)
                      return diff_container(sorted(_files, key=sorter))
                  # FIXME: NEWDIFFS: dan: this gets replaced by _new_parse_lines
-                 def _parse_lines(self, diff):
+                 def _parse_lines(self, diff_iter):
                      """
                      Parse the diff an return data for the template.
                      """
-                     lineiter = iter(diff)
                      stats = [0, 0]
                      chunks = []
                      raw_diff = []
                      try:
-                         line = lineiter.next()
+                         line = diff_iter.next()
                          while line:
                              raw_diff.append(line)
                                          'line':       line,
                                      })
-                             line = lineiter.next()
+                             line = diff_iter.next()
                              while old_line < old_end or new_line < new_end:
                                  command = ' '
                                      })
                                      raw_diff.append(line)
-                                 line = lineiter.next()
+                                 line = diff_iter.next()
                                  if self._newline_marker.match(line):
                                      # we need to append to lines, since this is not
                      chunks = []
                      raw_diff = []
-                     diff_iter = imap(lambda s: safe_unicode(s), diff_iter)
                      try:
                          line = diff_iter.next()
                          while line:
                              raw_diff.append(line)
+                             # match header e.g @@ -0,0 +1 @@\n'
                              match = self._chunk_re.match(line)
                              if not match:
                      idstring = re.sub(r'(?!-)\W', "", idstring).lower()
                      return idstring
+                 @classmethod
+                 def diff_splitter(cls, string):
+                     """
+                     Diff split that emulates .splitlines() but works only on \n
+                     """
+                     if not string:
+                         return
+                     elif string == '\n':
+                         yield u'\n'
+                     else:
+                         has_newline = string.endswith('\n')
+                         elements = string.split('\n')
+                         if has_newline:
+                             # skip last element as it's empty string from newlines
+                             elements = elements[:-1]
+                         len_elements = len(elements)
+                         for cnt, line in enumerate(elements, start=1):
+                             last_line = cnt == len_elements
+                             if last_line and not has_newline:
+                                 yield safe_unicode(line)
+                             else:
+                                 yield safe_unicode(line) + '\n'
                  def prepare(self, inline_diff=True):
                      """
                      Prepare the passed udiff for HTML rendering.

rhodecode/tests/lib/test_codeblocks.py

0 +3 -23

                          [('type2', u'')],
                      ]
-                 def test_split_token_stream_other_char(self):
-                     lines = list(split_token_stream(
-                         [('type1', 'some\ntext'), ('type2', 'more\n')],
-                         split_string='m'))
-                     assert lines == [
-                         [('type1', 'so')],
-                         [('type1', 'e\ntext'), ('type2', '')],
-                         [('type2', 'ore\n')],
+                     ]
-                 def test_split_token_stream_without_char(self):
-                     lines = list(split_token_stream(
-                         [('type1', 'some\ntext'), ('type2', 'more\n')],
-                         split_string='z'))
-                     assert lines == [
-                         [('type1', 'some\ntext'), ('type2', 'more\n')]
+                     ]
                  def test_split_token_stream_single(self):
                      lines = list(split_token_stream(
-                         [('type1', '\n')], split_string='\n'))
+                         [('type1', '\n')]))
                      assert lines == [
                          [('type1', '')],
                  def test_split_token_stream_single_repeat(self):
                      lines = list(split_token_stream(
-                         [('type1', '\n\n\n')], split_string='\n'))
+                         [('type1', '\n\n\n')]))
                      assert lines == [
                          [('type1', '')],
                  def test_split_token_stream_multiple_repeat(self):
                      lines = list(split_token_stream(
-                         [('type1', '\n\n'), ('type2', '\n\n')], split_string='\n'))
+                         [('type1', '\n\n'), ('type2', '\n\n')]))
                      assert lines == [
                          [('type1', '')],

rhodecode/tests/lib/test_diffs.py

0 +20 -2

                  DiffProcessor,
                  NEW_FILENODE, DEL_FILENODE, MOD_FILENODE, RENAMED_FILENODE,
                  CHMOD_FILENODE, BIN_FILENODE, COPIED_FILENODE)
-             from rhodecode.tests.fixture import Fixture
+             from rhodecode.tests.fixture import Fixture, no_newline_id_generator
              from rhodecode.lib.vcs.backends.git.repository import GitDiff
              from rhodecode.lib.vcs.backends.hg.repository import MercurialDiff
              from rhodecode.lib.vcs.backends.svn.repository import SubversionDiff
                  assert html == expected_html
-             class TestMixedFilenameEncodings:
+             class TestMixedFilenameEncodings(object):
                  @pytest.fixture(scope="class")
                  def raw_diff(self):
                  data = [(x['filename'], x['operation'], x['stats'], x['raw_diff'])
                          for x in diff_proc_d]
                  assert expected_data == data
+             @pytest.mark.parametrize('input_str', [
+                 '',
+                 '\n',
+                 '\n\n',
+                 'First\n+second',
+                 'First\n+second\n',
+                 '\n\n\n Multi \n\n\n',
+                 '\n\n\n Multi beginning',
+                 'Multi end \n\n\n',
+                 'Multi end',
+                 '@@ -0,0 +1 @@\n+test_content \n\n b\n'
+             ], ids=no_newline_id_generator)
+             def test_splitlines(input_str):
+                 result = DiffProcessor.diff_splitter(input_str)
+                 assert list(result) == input_str.splitlines(True)

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages