rhodecode-enterprise-ce Files · rhodecode/lib/diffs.py

deps: bumped pycryptodome==3.21.0 for security issue

super-admin - - Load All Authors

File last commit:

r5608:6d33e504 default


                r5640:acc4336c

default

Download file

             diffs.py
        
                    1143 lines
            
             | 39.3 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / rhodecode / lib / diffs.py
          
                    History
                
                 |
                  Source
                 | Raw
                 |Copy content
                 |Copy permalink

        super-admin
    
core: updated copyright to 2024

              r5608
            
      # Copyright (C) 2011-2024 RhodeCode GmbH

        marcink
    
project: added all source files and assets

              r1
            
      #

      # This program is free software: you can redistribute it and/or modify

      # it under the terms of the GNU Affero General Public License, version 3

      # (only), as published by the Free Software Foundation.

      #

      # This program is distributed in the hope that it will be useful,

      # but WITHOUT ANY WARRANTY; without even the implied warranty of

      # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

      # GNU General Public License for more details.

      #

      # You should have received a copy of the GNU Affero General Public License

      # along with this program.  If not, see <http://www.gnu.org/licenses/>.

      #

      # This program is dual-licensed. If you wish to learn more about the

      # RhodeCode Enterprise Edition, including its added features, Support services,

      # and proprietary license terms, please see https://rhodecode.com/licenses/

      """

      Set of diffing helpers, previously part of vcs

      """

        super-admin
    
diffs: python3 port

              r5083
            
      import dataclasses

        Bartłomiej Wołyńczyk
    
caching: add option to cache diffs for commits and pull requests....

              r2685
            
      import os

        marcink
    
pylons: fixed code and test suite after removal of pylons.

              r2358
            
      import re

        marcink
    
diff-cache: use bz2 to reduce diff-cache size.

              r2690
            
      import bz2

        marcink
    
diffs: switched bz2 into gzip since it can be 10x faster in some cases with only slight size penalty

              r3854
            
      import gzip

        marcink
    
diffs: added load time for diffs

              r3838
            
      import time

        marcink
    
diff-cache: use bz2 to reduce diff-cache size.

              r2690
            
        marcink
    
project: added all source files and assets

              r1
            
      import difflib

      import logging

        super-admin
    
python3: 2to3 fixes

              r4930
            
      import pickle

      from itertools import tee

        marcink
    
project: added all source files and assets

              r1
            
      from rhodecode.lib.vcs.exceptions import VCSError

      from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode

        super-admin
    
diffs: python3 port

              r5083
            
      from rhodecode.lib.vcs.backends import base

      from rhodecode.lib.str_utils import safe_str

        marcink
    
project: added all source files and assets

              r1
            
      log = logging.getLogger(__name__)

        marcink
    
diffs: limit the file context to ~1mln lines. Fixes #4184...

              r679
            
      # define max context, a file with more than this numbers of lines is unusable

      # in browser anyway

        dan
    
diffs: introducing diff menu for whitespace toggle and context changes

              r3134
            
      MAX_CONTEXT = 20 * 1024

      DEFAULT_CONTEXT = 3

      def get_diff_context(request):

          return MAX_CONTEXT if request.GET.get('fullcontext', '') == '1' else DEFAULT_CONTEXT

      def get_diff_whitespace_flag(request):

          return request.GET.get('ignorews', '') == '1'

        marcink
    
diffs: limit the file context to ~1mln lines. Fixes #4184...

              r679
            
        marcink
    
project: added all source files and assets

              r1
            
        super-admin
    
diffs: python3 port

              r5083
            
      @dataclasses.dataclass

      class OPS:

          ADD: str = 'A'

          MOD: str = 'M'

          DEL: str = 'D'

      @dataclasses.dataclass

      class DiffLineNumber:

          old: int | None

          new: int | None

          def __iter__(self):

              yield self.old

              yield self.new

        marcink
    
project: added all source files and assets

              r1
            
        marcink
    
files: pep8 fixes

              r678
            
        marcink
    
project: added all source files and assets

              r1
            
      def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):

          """

          Returns git style diff between given ``filenode_old`` and ``filenode_new``.

          :param ignore_whitespace: ignore whitespaces in diff

          """

          # make sure we pass in default context

          context = context or 3

        marcink
    
diffs: limit the file context to ~1mln lines. Fixes #4184...

              r679
            
          # protect against IntOverflow when passing HUGE context

          if context > MAX_CONTEXT:

              context = MAX_CONTEXT

        super-admin
    
python3: fixed various code issues...

              r4973
            
          submodules = [o for o in [filenode_new, filenode_old] if isinstance(o, SubModuleNode)]

        marcink
    
project: added all source files and assets

              r1
            
          if submodules:

              return ''

          for filenode in (filenode_old, filenode_new):

              if not isinstance(filenode, FileNode):

        super-admin
    
diffs: python3 port

              r5083
            
                  raise VCSError(f"Given object should be FileNode object, not {filenode.__class__}")

        marcink
    
project: added all source files and assets

              r1
            
          repo = filenode_new.commit.repository

          old_commit = filenode_old.commit or repo.EMPTY_COMMIT

          new_commit = filenode_new.commit

          vcs_gitdiff = repo.get_diff(

              old_commit, new_commit, filenode_new.path,

              ignore_whitespace, context, path1=filenode_old.path)

          return vcs_gitdiff

      NEW_FILENODE = 1

      DEL_FILENODE = 2

      MOD_FILENODE = 3

      RENAMED_FILENODE = 4

      COPIED_FILENODE = 5

      CHMOD_FILENODE = 6

      BIN_FILENODE = 7

      class LimitedDiffContainer(object):

        super-admin
    
diffs: python3 port

              r5083
            
          def __init__(self, diff_limit: int, cur_diff_size, diff):

        marcink
    
project: added all source files and assets

              r1
            
              self.diff = diff

              self.diff_limit = diff_limit

              self.cur_diff_size = cur_diff_size

          def __getitem__(self, key):

              return self.diff.__getitem__(key)

          def __iter__(self):

        super-admin
    
modernize: updates for python3

              r5095
            
              yield from self.diff

        marcink
    
project: added all source files and assets

              r1
            
      class Action(object):

          """

          Contains constants for the action value of the lines in a parsed diff.

          """

          ADD = 'add'

          DELETE = 'del'

          UNMODIFIED = 'unmod'

          CONTEXT = 'context'

        dan
    
ux: make 'no newline at end of file' message more pronounced in diffs

              r1032
            
          OLD_NO_NL = 'old-no-nl'

          NEW_NO_NL = 'new-no-nl'

        marcink
    
project: added all source files and assets

              r1
            
      class DiffProcessor(object):

          """

        super-admin
    
diffs: python3 port

              r5083
            
          Give it a unified or git diff, and it returns a list of the files that were

        marcink
    
project: added all source files and assets

              r1
            
          mentioned in the diff together with a dict of meta information that

        super-admin
    
diffs: python3 port

              r5083
            
          can be used to render it in an HTML template.

        marcink
    
project: added all source files and assets

              r1
            
          .. note:: Unicode handling

             The original diffs are a byte sequence and can contain filenames

             in mixed encodings. This class generally returns `unicode` objects

             since the result is intended for presentation to the user.

          """

        super-admin
    
diffs: python3 port

              r5083
            
          _chunk_re = re.compile(br'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')

          _newline_marker = re.compile(br'^\\ No newline at end of file')

        marcink
    
project: added all source files and assets

              r1
            
          # used for inline highlighter word split

        super-admin
    
diffs: python3 port

              r5083
            
          _token_re = re.compile(br'()(&gt;|&lt;|&amp;|\W+?)')

        marcink
    
project: added all source files and assets

              r1
            
        marcink
    
pull-requests: updated versioning support....

              r1268
            
          # collapse ranges of commits over given number

          _collapse_commits_over = 5

        super-admin
    
diffs: python3 port

              r5083
            
          def __init__(self, diff: base.Diff, diff_format='gitdiff', diff_limit: int = 0,

                       file_limit: int = 0, show_full_diff=True):

        marcink
    
project: added all source files and assets

              r1
            
              """

              :param diff: A `Diff` object representing a diff from a vcs backend

        super-admin
    
diffs: python3 port

              r5083
            
              :param diff_format: format of diff passed, `udiff` or `gitdiff`

        marcink
    
project: added all source files and assets

              r1
            
              :param diff_limit: define the size of diff that is considered "big"

                  based on that parameter cut off will be triggered, set to None

                  to show full diff

              """

              self._diff = diff

        super-admin
    
diffs: python3 port

              r5083
            
              self._format = diff_format

        marcink
    
project: added all source files and assets

              r1
            
              self.adds = 0

              self.removes = 0

              # calculate diff size

              self.diff_limit = diff_limit

              self.file_limit = file_limit

              self.show_full_diff = show_full_diff

              self.cur_diff_size = 0

              self.parsed = False

              self.parsed_diff = []

        super-admin
    
diffs: python3 port

              r5083
            
              log.debug('Initialized DiffProcessor with %s mode', diff_format)

              self.differ = self._highlight_line_udiff

              self._parser = self._new_parse_gitdiff

              if diff_format == 'gitdiff':

        marcink
    
project: added all source files and assets

              r1
            
                  self.differ = self._highlight_line_difflib

                  self._parser = self._parse_gitdiff

        super-admin
    
diffs: python3 port

              r5083
            
                  raise DeprecationWarning('gitdiff usage is deprecated')

        marcink
    
project: added all source files and assets

              r1
            
          def _copy_iterator(self):

              """

              make a fresh copy of generator, we should not iterate thru

              an original as it's needed for repeating operations on

              this instance of DiffProcessor

              """

              self.__udiff, iterator_copy = tee(self.__udiff)

              return iterator_copy

        super-admin
    
diffs: python3 port

              r5083
            
          def _escaper(self, diff_string):

        marcink
    
project: added all source files and assets

              r1
            
              """

              Escaper for diff escapes special chars and checks the diff limit

              :param string:

              """

        super-admin
    
diffs: python3 port

              r5083
            
              self.cur_diff_size += len(diff_string)

        marcink
    
project: added all source files and assets

              r1
            
              if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):

                  raise DiffLimitExceeded('Diff Limit Exceeded')

        super-admin
    
diffs: python3 port

              r5083
            
              return diff_string \

                  .replace(b'&', b'&amp;')\

                  .replace(b'<', b'&lt;')\

                  .replace(b'>', b'&gt;')

        marcink
    
project: added all source files and assets

              r1
            
        super-admin
    
diffs: python3 port

              r5083
            
          def _line_counter(self, diff_line):

        marcink
    
project: added all source files and assets

              r1
            
              """

              Checks each line and bumps total adds/removes for this diff

        super-admin
    
diffs: python3 port

              r5083
            
              :param diff_line:

        marcink
    
project: added all source files and assets

              r1
            
              """

        super-admin
    
diffs: python3 port

              r5083
            
              if diff_line.startswith(b'+') and not diff_line.startswith(b'+++'):

        marcink
    
project: added all source files and assets

              r1
            
                  self.adds += 1

        super-admin
    
diffs: python3 port

              r5083
            
              elif diff_line.startswith(b'-') and not diff_line.startswith(b'---'):

        marcink
    
project: added all source files and assets

              r1
            
                  self.removes += 1

        super-admin
    
diffs: python3 port

              r5083
            
              return diff_line

        marcink
    
project: added all source files and assets

              r1
            
          def _highlight_line_difflib(self, line, next_):

              """

              Highlight inline changes in both lines.

              """

              if line['action'] == Action.DELETE:

                  old, new = line, next_

              else:

                  old, new = next_, line

              oldwords = self._token_re.split(old['line'])

              newwords = self._token_re.split(new['line'])

              sequence = difflib.SequenceMatcher(None, oldwords, newwords)

              oldfragments, newfragments = [], []

              for tag, i1, i2, j1, j2 in sequence.get_opcodes():

                  oldfrag = ''.join(oldwords[i1:i2])

                  newfrag = ''.join(newwords[j1:j2])

                  if tag != 'equal':

                      if oldfrag:

        super-admin
    
diffs: python3 port

              r5083
            
                          oldfrag = f'<del>{oldfrag}</del>'

        marcink
    
project: added all source files and assets

              r1
            
                      if newfrag:

        super-admin
    
diffs: python3 port

              r5083
            
                          newfrag = f'<ins>{newfrag}</ins>'

        marcink
    
project: added all source files and assets

              r1
            
                  oldfragments.append(oldfrag)

                  newfragments.append(newfrag)

              old['line'] = "".join(oldfragments)

              new['line'] = "".join(newfragments)

          def _highlight_line_udiff(self, line, next_):

              """

              Highlight inline changes in both lines.

              """

              start = 0

              limit = min(len(line['line']), len(next_['line']))

              while start < limit and line['line'][start] == next_['line'][start]:

                  start += 1

              end = -1

              limit -= start

              while -end <= limit and line['line'][end] == next_['line'][end]:

                  end -= 1

              end += 1

              if start or end:

                  def do(l):

                      last = end + len(l['line'])

                      if l['action'] == Action.ADD:

                          tag = 'ins'

                      else:

                          tag = 'del'

        super-admin
    
diffs: python3 port

              r5083
            
                      l['line'] = f"{l['line'][:start]}<{tag}>{l['line'][start:last]}</{tag}>{l['line'][last:]}"

        marcink
    
project: added all source files and assets

              r1
            
                  do(line)

                  do(next_)

        super-admin
    
diffs: python3 port

              r5083
            
          def _clean_line(self, line, command: str):

        marcink
    
project: added all source files and assets

              r1
            
              if command in ['+', '-', ' ']:

                  # only modify the line if it's actually a diff thing

                  line = line[1:]

              return line

          def _parse_gitdiff(self, inline_diff=True):

              _files = []

        super-admin
    
diffs: python3 port

              r5083
            
              def diff_container(arg):

                  return arg

        marcink
    
project: added all source files and assets

              r1
            
              for chunk in self._diff.chunks():

                  head = chunk.header

        super-admin
    
py3: 2to3 fixes

              r4931
            
                  diff = map(self._escaper, self.diff_splitter(chunk.diff))

        marcink
    
project: added all source files and assets

              r1
            
                  raw_diff = chunk.raw

                  limited_diff = False

                  exceeds_limit = False

                  op = None

                  stats = {

                      'added': 0,

                      'deleted': 0,

                      'binary': False,

                      'ops': {},

                  }

                  if head['deleted_file_mode']:

                      op = OPS.DEL

                      stats['binary'] = True

                      stats['ops'][DEL_FILENODE] = 'deleted file'

                  elif head['new_file_mode']:

                      op = OPS.ADD

                      stats['binary'] = True

        super-admin
    
diffs: python3 port

              r5083
            
                      stats['ops'][NEW_FILENODE] = f"new file {safe_str(head['new_file_mode'])}"

                  else:  # modify operation, can be: copy, rename or chmod

        marcink
    
project: added all source files and assets

              r1
            
                      # CHMOD

                      if head['new_mode'] and head['old_mode']:

                          op = OPS.MOD

                          stats['binary'] = True

        super-admin
    
diffs: python3 port

              r5083
            
                          stats['ops'][CHMOD_FILENODE] = f"modified file chmod {safe_str(head['old_mode'])} => {safe_str(head['new_mode'])}"

        marcink
    
project: added all source files and assets

              r1
            
                      # RENAME

                      if head['rename_from'] != head['rename_to']:

                          op = OPS.MOD

                          stats['binary'] = True

        super-admin
    
diffs: python3 port

              r5083
            
                          stats['ops'][RENAMED_FILENODE] = f"file renamed from {safe_str(head['rename_from'])} to {safe_str(head['rename_to'])}"

        marcink
    
project: added all source files and assets

              r1
            
                      # COPY

                      if head.get('copy_from') and head.get('copy_to'):

                          op = OPS.MOD

                          stats['binary'] = True

        super-admin
    
diffs: python3 port

              r5083
            
                          stats['ops'][COPIED_FILENODE] = f"file copied from {safe_str(head['copy_from'])} to {safe_str(head['copy_to'])}"

        marcink
    
project: added all source files and assets

              r1
            
                      # If our new parsed headers didn't match anything fallback to

                      # old style detection

                      if op is None:

                          if not head['a_file'] and head['b_file']:

                              op = OPS.ADD

                              stats['binary'] = True

                              stats['ops'][NEW_FILENODE] = 'new file'

                          elif head['a_file'] and not head['b_file']:

                              op = OPS.DEL

                              stats['binary'] = True

                              stats['ops'][DEL_FILENODE] = 'deleted file'

                      # it's not ADD not DELETE

                      if op is None:

                          op = OPS.MOD

                          stats['binary'] = True

                          stats['ops'][MOD_FILENODE] = 'modified file'

                  # a real non-binary diff

                  if head['a_file'] or head['b_file']:

                      try:

                          raw_diff, chunks, _stats = self._parse_lines(diff)

                          stats['binary'] = False

                          stats['added'] = _stats[0]

                          stats['deleted'] = _stats[1]

                          # explicit mark that it's a modified file

                          if op == OPS.MOD:

                              stats['ops'][MOD_FILENODE] = 'modified file'

                          exceeds_limit = len(raw_diff) > self.file_limit

                          # changed from _escaper function so we validate size of

                          # each file instead of the whole diff

                          # diff will hide big files but still show small ones

                          # from my tests, big files are fairly safe to be parsed

                          # but the browser is the bottleneck

                          if not self.show_full_diff and exceeds_limit:

                              raise DiffLimitExceeded('File Limit Exceeded')

                      except DiffLimitExceeded:

        super-admin
    
diffs: python3 port

              r5083
            
                          def diff_container(_diff):

                              return LimitedDiffContainer(self.diff_limit, self.cur_diff_size, _diff)

        marcink
    
project: added all source files and assets

              r1
            
                          exceeds_limit = len(raw_diff) > self.file_limit

                          limited_diff = True

                          chunks = []

                  else:  # GIT format binary patch, or possibly empty diff

                      if head['bin_patch']:

                          # we have operation already extracted, but we mark simply

        super-admin
    
diffs: python3 port

              r5083
            
                          # it's a diff we won't show for binary files

        marcink
    
project: added all source files and assets

              r1
            
                          stats['ops'][BIN_FILENODE] = 'binary diff hidden'

                      chunks = []

                  if chunks and not self.show_full_diff and op == OPS.DEL:

                      # if not full diff mode show deleted file contents

                      # TODO: anderson: if the view is not too big, there is no way

                      # to see the content of the file

                      chunks = []

        super-admin
    
diffs: python3 port

              r5083
            
                  frag = [{

                      'old_lineno': '',

                      'new_lineno': '',

                      'action': Action.CONTEXT,

                      'line': msg,

                  } for _op, msg in list(stats['ops'].items())

                      if _op not in [MOD_FILENODE]]

                  chunks.insert(0, frag)

        marcink
    
project: added all source files and assets

              r1
            
                  _files.append({

        super-admin
    
diffs: python3 port

              r5083
            
                      'filename': safe_str(head['b_path']),

        marcink
    
project: added all source files and assets

              r1
            
                      'old_revision': head['a_blob_id'],

                      'new_revision': head['b_blob_id'],

                      'chunks': chunks,

        super-admin
    
diffs: python3 port

              r5083
            
                      'raw_diff': safe_str(raw_diff),

        marcink
    
project: added all source files and assets

              r1
            
                      'operation': op,

                      'stats': stats,

                      'exceeds_limit': exceeds_limit,

                      'is_limited_diff': limited_diff,

                  })

        super-admin
    
diffs: python3 port

              r5083
            
              def operation_sorter(info):

                  return {OPS.ADD: 0, OPS.MOD: 1, OPS.DEL: 2}.get(info['operation'])

        marcink
    
project: added all source files and assets

              r1
            
              if not inline_diff:

        super-admin
    
diffs: python3 port

              r5083
            
                  return diff_container(sorted(_files, key=operation_sorter))

        marcink
    
project: added all source files and assets

              r1
            
              # highlight inline changes

              for diff_data in _files:

                  for chunk in diff_data['chunks']:

                      lineiter = iter(chunk)

                      try:

                          while 1:

        super-admin
    
python3: 2to3 fixes

              r4930
            
                              line = next(lineiter)

        marcink
    
project: added all source files and assets

              r1
            
                              if line['action'] not in (

                                      Action.UNMODIFIED, Action.CONTEXT):

        super-admin
    
python3: 2to3 fixes

              r4930
            
                                  nextline = next(lineiter)

        marcink
    
project: added all source files and assets

              r1
            
                                  if nextline['action'] in ['unmod', 'context'] or \

                                     nextline['action'] == line['action']:

                                      continue

                                  self.differ(line, nextline)

                      except StopIteration:

                          pass

        super-admin
    
diffs: python3 port

              r5083
            
              return diff_container(sorted(_files, key=operation_sorter))

        marcink
    
project: added all source files and assets

              r1
            
        dan
    
diffs: use whole chunk diff to calculate if it's oversized or not....

              r2070
            
          def _check_large_diff(self):

        marcink
    
diffs: improve logging

              r4324
            
              if self.diff_limit:

                  log.debug('Checking if diff exceeds current diff_limit of %s', self.diff_limit)

        dan
    
diffs: replace compare controller with new html based diffs:...

              r1030
            
              if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):

        super-admin
    
diffs: python3 port

              r5083
            
                  raise DiffLimitExceeded(f'Diff Limit `{self.diff_limit}` Exceeded')

        dan
    
diffs: replace compare controller with new html based diffs:...

              r1030
            
          # FIXME: NEWDIFFS: dan: this replaces _parse_gitdiff

          def _new_parse_gitdiff(self, inline_diff=True):

              _files = []

        dan
    
diffs: use whole chunk diff to calculate if it's oversized or not....

              r2070
            
        super-admin
    
diffs: python3 port

              r5083
            
              # this can be overridden later to a LimitedDiffContainer type

              def diff_container(arg):

                  return arg

        dan
    
diffs: use whole chunk diff to calculate if it's oversized or not....

              r2070
            
        dan
    
diffs: replace compare controller with new html based diffs:...

              r1030
            
              for chunk in self._diff.chunks():

        super-admin
    
diffs: python3 port

              r5083
            
                  head = chunk.header_as_str

                  log.debug('parsing diff chunk %r', chunk)

        dan
    
diffs: replace compare controller with new html based diffs:...

              r1030
            
                  raw_diff = chunk.raw

                  limited_diff = False

                  exceeds_limit = False

        dan
    
diffs: use whole chunk diff to calculate if it's oversized or not....

              r2070
            
        dan
    
diffs: replace compare controller with new html based diffs:...

              r1030
            
                  op = None

                  stats = {

                      'added': 0,

                      'deleted': 0,

                      'binary': False,

        super-admin
    
diffs: python3 port

              r5083
            
                      'old_mode': '',

                      'new_mode': '',

        dan
    
diffs: replace compare controller with new html based diffs:...

              r1030
            
                      'ops': {},

                  }

                  if head['old_mode']:

                      stats['old_mode'] = head['old_mode']

                  if head['new_mode']:

                      stats['new_mode'] = head['new_mode']

                  if head['b_mode']:

                      stats['new_mode'] = head['b_mode']

        dan
    
diffs: use whole chunk diff to calculate if it's oversized or not....

              r2070
            
                  # delete file

        dan
    
diffs: replace compare controller with new html based diffs:...

              r1030
            
                  if head['deleted_file_mode']:

                      op = OPS.DEL

                      stats['binary'] = True

                      stats['ops'][DEL_FILENODE] = 'deleted file'

        dan
    
diffs: use whole chunk diff to calculate if it's oversized or not....

              r2070
            
                  # new file

        dan
    
diffs: replace compare controller with new html based diffs:...

              r1030
            
                  elif head['new_file_mode']:

                      op = OPS.ADD

                      stats['binary'] = True

        super-admin
    
diffs: python3 port

              r5083
            
                      stats['old_mode'] = ''

        dan
    
diffs: replace compare controller with new html based diffs:...

              r1030
            
                      stats['new_mode'] = head['new_file_mode']

        super-admin
    
diffs: python3 port

              r5083
            
                      stats['ops'][NEW_FILENODE] = f"new file {head['new_file_mode']}"

        dan
    
diffs: replace compare controller with new html based diffs:...

              r1030
            
        super-admin
    
diffs: python3 port

              r5083
            
                  # modify operation, can be: copy, rename or chmod

        dan
    
diffs: use whole chunk diff to calculate if it's oversized or not....

              r2070
            
                  else:

        dan
    
diffs: replace compare controller with new html based diffs:...

              r1030
            
                      # CHMOD

                      if head['new_mode'] and head['old_mode']:

                          op = OPS.MOD

                          stats['binary'] = True

        super-admin
    
diffs: python3 port

              r5083
            
                          stats['ops'][CHMOD_FILENODE] = f"modified file chmod {head['old_mode']} => {head['new_mode']}"

        dan
    
diffs: replace compare controller with new html based diffs:...

              r1030
            
                      # RENAME

                      if head['rename_from'] != head['rename_to']:

                          op = OPS.MOD

                          stats['binary'] = True

                          stats['renamed'] = (head['rename_from'], head['rename_to'])

        super-admin
    
diffs: python3 port

              r5083
            
                          stats['ops'][RENAMED_FILENODE] = f"file renamed from {head['rename_from']} to {head['rename_to']}"

        dan
    
diffs: replace compare controller with new html based diffs:...

              r1030
            
                      # COPY

                      if head.get('copy_from') and head.get('copy_to'):

                          op = OPS.MOD

                          stats['binary'] = True

                          stats['copied'] = (head['copy_from'], head['copy_to'])

        super-admin
    
diffs: python3 port

              r5083
            
                          stats['ops'][COPIED_FILENODE] = f"file copied from {head['copy_from']} to {head['copy_to']}"

        marcink
    
project: added all source files and assets

              r1
            
        dan
    
diffs: replace compare controller with new html based diffs:...

              r1030
            
                      # If our new parsed headers didn't match anything fallback to

                      # old style detection

                      if op is None:

                          if not head['a_file'] and head['b_file']:

                              op = OPS.ADD

                              stats['binary'] = True

                              stats['new_file'] = True

                              stats['ops'][NEW_FILENODE] = 'new file'

                          elif head['a_file'] and not head['b_file']:

                              op = OPS.DEL

                              stats['binary'] = True

                              stats['ops'][DEL_FILENODE] = 'deleted file'

                      # it's not ADD not DELETE

                      if op is None:

                          op = OPS.MOD

                          stats['binary'] = True

                          stats['ops'][MOD_FILENODE] = 'modified file'

                  # a real non-binary diff

                  if head['a_file'] or head['b_file']:

        marcink
    
diffs: in case of text lexers don't do any HL because of pygments newline...

              r2546
            
                      # simulate splitlines, so we keep the line end part

                      diff = self.diff_splitter(chunk.diff)

        dan
    
diffs: use whole chunk diff to calculate if it's oversized or not....

              r2070
            
                      # append each file to the diff size

                      raw_chunk_size = len(raw_diff)

                      exceeds_limit = raw_chunk_size > self.file_limit

                      self.cur_diff_size += raw_chunk_size

        dan
    
diffs: replace compare controller with new html based diffs:...

              r1030
            
                      try:

        dan
    
diffs: use whole chunk diff to calculate if it's oversized or not....

              r2070
            
                          # Check each file instead of the whole diff.

                          # Diff will hide big files but still show small ones.

                          # From the tests big files are fairly safe to be parsed

                          # but the browser is the bottleneck.

                          if not self.show_full_diff and exceeds_limit:

                              log.debug('File `%s` exceeds current file_limit of %s',

        super-admin
    
diffs: python3 port

              r5083
            
                                        head['b_path'], self.file_limit)

                              raise DiffLimitExceeded(f'File Limit {self.file_limit} Exceeded')

        dan
    
diffs: use whole chunk diff to calculate if it's oversized or not....

              r2070
            
                          self._check_large_diff()

        dan
    
diffs: replace compare controller with new html based diffs:...

              r1030
            
                          raw_diff, chunks, _stats = self._new_parse_lines(diff)

                          stats['binary'] = False

                          stats['added'] = _stats[0]

                          stats['deleted'] = _stats[1]

                          # explicit mark that it's a modified file

                          if op == OPS.MOD:

                              stats['ops'][MOD_FILENODE] = 'modified file'

                      except DiffLimitExceeded:

        super-admin
    
diffs: python3 port

              r5083
            
                          def limited_diff_container(_diff):

                              return LimitedDiffContainer(self.diff_limit, self.cur_diff_size, _diff)

                          # re-definition of our container wrapper

                          diff_container = limited_diff_container

        dan
    
diffs: replace compare controller with new html based diffs:...

              r1030
            
                          limited_diff = True

                          chunks = []

                  else:  # GIT format binary patch, or possibly empty diff

                      if head['bin_patch']:

                          # we have operation already extracted, but we mark simply

        super-admin
    
diffs: python3 port

              r5083
            
                          # it's a diff we won't show for binary files

        dan
    
diffs: replace compare controller with new html based diffs:...

              r1030
            
                          stats['ops'][BIN_FILENODE] = 'binary diff hidden'

                      chunks = []

        dan
    
diffs: use whole chunk diff to calculate if it's oversized or not....

              r2070
            
                  # Hide content of deleted node by setting empty chunks

        dan
    
diffs: replace compare controller with new html based diffs:...

              r1030
            
                  if chunks and not self.show_full_diff and op == OPS.DEL:

                      # if not full diff mode show deleted file contents

                      # TODO: anderson: if the view is not too big, there is no way

                      # to see the content of the file

                      chunks = []

        super-admin
    
diffs: python3 port

              r5083
            
                  frag = [

                      {'old_lineno': '',

                       'new_lineno': '',

                       'action': Action.CONTEXT,

                       'line': msg,

                       } for _op, msg in list(stats['ops'].items())

                      if _op not in [MOD_FILENODE]]

        dan
    
diffs: replace compare controller with new html based diffs:...

              r1030
            
        super-admin
    
diffs: python3 port

              r5083
            
                  chunks.insert(0, frag)

                  original_filename = safe_str(head['a_path'])

        dan
    
diffs: replace compare controller with new html based diffs:...

              r1030
            
                  _files.append({

                      'original_filename': original_filename,

        super-admin
    
diffs: python3 port

              r5083
            
                      'filename': safe_str(head['b_path']),

        dan
    
diffs: replace compare controller with new html based diffs:...

              r1030
            
                      'old_revision': head['a_blob_id'],

                      'new_revision': head['b_blob_id'],

                      'chunks': chunks,

        super-admin
    
diffs: python3 port

              r5083
            
                      'raw_diff': safe_str(raw_diff),

        dan
    
diffs: replace compare controller with new html based diffs:...

              r1030
            
                      'operation': op,

                      'stats': stats,

                      'exceeds_limit': exceeds_limit,

                      'is_limited_diff': limited_diff,

                  })

        super-admin
    
diffs: python3 port

              r5083
            
              def sorter(info):

                  return {OPS.ADD: 0, OPS.MOD: 1, OPS.DEL: 2}.get(info['operation'])

        dan
    
diffs: replace compare controller with new html based diffs:...

              r1030
            
              return diff_container(sorted(_files, key=sorter))

          # FIXME: NEWDIFFS: dan: this gets replaced by _new_parse_lines

        marcink
    
diffs: in case of text lexers don't do any HL because of pygments newline...

              r2546
            
          def _parse_lines(self, diff_iter):

        marcink
    
project: added all source files and assets

              r1
            
              """

              Parse the diff an return data for the template.

              """

              stats = [0, 0]

              chunks = []

              raw_diff = []

              try:

        super-admin
    
python3: 2to3 fixes

              r4930
            
                  line = next(diff_iter)

        marcink
    
project: added all source files and assets

              r1
            
                  while line:

                      raw_diff.append(line)

                      lines = []

                      chunks.append(lines)

                      match = self._chunk_re.match(line)

                      if not match:

                          break

                      gr = match.groups()

                      (old_line, old_end,

                       new_line, new_end) = [int(x or 1) for x in gr[:-1]]

                      old_line -= 1

                      new_line -= 1

                      context = len(gr) == 5

                      old_end += old_line

                      new_end += new_line

                      if context:

                          # skip context only if it's first line

                          if int(gr[0]) > 1:

                              lines.append({

                                  'old_lineno': '...',

                                  'new_lineno': '...',

                                  'action':     Action.CONTEXT,

                                  'line':       line,

                              })

        super-admin
    
python3: 2to3 fixes

              r4930
            
                      line = next(diff_iter)

        marcink
    
project: added all source files and assets

              r1
            
                      while old_line < old_end or new_line < new_end:

        super-admin
    
diffs: python3 port

              r5083
            
                          command = b' '

        marcink
    
project: added all source files and assets

              r1
            
                          if line:

                              command = line[0]

                          affects_old = affects_new = False

                          # ignore those if we don't expect them

        super-admin
    
diffs: python3 port

              r5083
            
                          if command in b'#@':

        marcink
    
project: added all source files and assets

              r1
            
                              continue

        super-admin
    
diffs: python3 port

              r5083
            
                          elif command == b'+':

        marcink
    
project: added all source files and assets

              r1
            
                              affects_new = True

                              action = Action.ADD

                              stats[0] += 1

        super-admin
    
diffs: python3 port

              r5083
            
                          elif command == b'-':

        marcink
    
project: added all source files and assets

              r1
            
                              affects_old = True

                              action = Action.DELETE

                              stats[1] += 1

                          else:

                              affects_old = affects_new = True

                              action = Action.UNMODIFIED

                          if not self._newline_marker.match(line):

                              old_line += affects_old

                              new_line += affects_new

                              lines.append({

        super-admin
    
diffs: python3 port

              r5083
            
                                  'old_lineno':   affects_old and old_line or b'',

                                  'new_lineno':   affects_new and new_line or b'',

        marcink
    
project: added all source files and assets

              r1
            
                                  'action':       action,

                                  'line':         self._clean_line(line, command)

                              })

                              raw_diff.append(line)

        super-admin
    
python3: 2to3 fixes

              r4930
            
                          line = next(diff_iter)

        marcink
    
project: added all source files and assets

              r1
            
                          if self._newline_marker.match(line):

                              # we need to append to lines, since this is not

                              # counted in the line specs of diff

                              lines.append({

                                  'old_lineno':   '...',

                                  'new_lineno':   '...',

                                  'action':       Action.CONTEXT,

                                  'line':         self._clean_line(line, command)

                              })

              except StopIteration:

                  pass

              return ''.join(raw_diff), chunks, stats

        dan
    
diffs: replace compare controller with new html based diffs:...

              r1030
            
          # FIXME: NEWDIFFS: dan: this replaces _parse_lines

        dan
    
diffs: use whole chunk diff to calculate if it's oversized or not....

              r2070
            
          def _new_parse_lines(self, diff_iter):

        dan
    
diffs: replace compare controller with new html based diffs:...

              r1030
            
              """

              Parse the diff an return data for the template.

              """

              stats = [0, 0]

              chunks = []

              raw_diff = []

              try:

        super-admin
    
python3: 2to3 fixes

              r4930
            
                  line = next(diff_iter)

        super-admin
    
diffs: python3 port

              r5083
            
                  assert isinstance(line, bytes)

        dan
    
diffs: replace compare controller with new html based diffs:...

              r1030
            
                  while line:

                      raw_diff.append(line)

        marcink
    
diffs: in case of text lexers don't do any HL because of pygments newline...

              r2546
            
                      # match header e.g @@ -0,0 +1 @@\n'

        dan
    
diffs: replace compare controller with new html based diffs:...

              r1030
            
                      match = self._chunk_re.match(line)

                      if not match:

                          break

                      gr = match.groups()

        super-admin
    
diffs: python3 port

              r5083
            
        dan
    
diffs: replace compare controller with new html based diffs:...

              r1030
            
                      (old_line, old_end,

                       new_line, new_end) = [int(x or 1) for x in gr[:-1]]

                      lines = []

                      hunk = {

                          'section_header': gr[-1],

                          'source_start': old_line,

                          'source_length': old_end,

                          'target_start': new_line,

                          'target_length': new_end,

                          'lines': lines,

                      }

                      chunks.append(hunk)

                      old_line -= 1

                      new_line -= 1

        super-admin
    
diffs: python3 port

              r5083
            
                      len(gr) == 5

        dan
    
diffs: replace compare controller with new html based diffs:...

              r1030
            
                      old_end += old_line

                      new_end += new_line

        super-admin
    
python3: 2to3 fixes

              r4930
            
                      line = next(diff_iter)

        dan
    
diffs: replace compare controller with new html based diffs:...

              r1030
            
                      while old_line < old_end or new_line < new_end:

                          command = ' '

                          if line:

        super-admin
    
diffs: python3 port

              r5083
            
                              # This is bytes, so we need to convert it to a str

                              command: str = chr(line[0])

        dan
    
diffs: replace compare controller with new html based diffs:...

              r1030
            
                          affects_old = affects_new = False

                          # ignore those if we don't expect them

                          if command in '#@':

                              continue

                          elif command == '+':

                              affects_new = True

                              action = Action.ADD

                              stats[0] += 1

                          elif command == '-':

                              affects_old = True

                              action = Action.DELETE

                              stats[1] += 1

                          else:

                              affects_old = affects_new = True

                              action = Action.UNMODIFIED

                          if not self._newline_marker.match(line):

                              old_line += affects_old

                              new_line += affects_new

                              lines.append({

        super-admin
    
diffs: python3 port

              r5083
            
                                  'old_lineno':   affects_old and old_line or None,

                                  'new_lineno':   affects_new and new_line or None,

        dan
    
diffs: replace compare controller with new html based diffs:...

              r1030
            
                                  'action':       action,

                                  'line':         self._clean_line(line, command)

                              })

        marcink
    
diffs: fixed problem with rendering no newline at the end of file markers....

              r2252
            
                          raw_diff.append(line)

        dan
    
diffs: replace compare controller with new html based diffs:...

              r1030
            
        super-admin
    
python3: 2to3 fixes

              r4930
            
                          line = next(diff_iter)

        dan
    
diffs: replace compare controller with new html based diffs:...

              r1030
            
                          if self._newline_marker.match(line):

                              # we need to append to lines, since this is not

                              # counted in the line specs of diff

                              if affects_old:

        dan
    
ux: make 'no newline at end of file' message more pronounced in diffs

              r1032
            
                                  action = Action.OLD_NO_NL

        dan
    
diffs: replace compare controller with new html based diffs:...

              r1030
            
                              elif affects_new:

        dan
    
ux: make 'no newline at end of file' message more pronounced in diffs

              r1032
            
                                  action = Action.NEW_NO_NL

        dan
    
diffs: replace compare controller with new html based diffs:...

              r1030
            
                              else:

                                  raise Exception('invalid context for no newline')

                              lines.append({

                                  'old_lineno':   None,

                                  'new_lineno':   None,

                                  'action':       action,

                                  'line':         self._clean_line(line, command)

                              })

              except StopIteration:

                  pass

        dan
    
diffs: use whole chunk diff to calculate if it's oversized or not....

              r2070
            
        super-admin
    
diffs: python3 port

              r5083
            
              return b''.join(raw_diff), chunks, stats

        dan
    
diffs: replace compare controller with new html based diffs:...

              r1030
            
        marcink
    
project: added all source files and assets

              r1
            
          def _safe_id(self, idstring):

        super-admin
    
code: fixes to escape characters improperly used

              r5149
            
              r"""Make a string safe for including in an id attribute.

        marcink
    
project: added all source files and assets

              r1
            
              The HTML spec says that id attributes 'must begin with

              a letter ([A-Za-z]) and may be followed by any number

              of letters, digits ([0-9]), hyphens ("-"), underscores

              ("_"), colons (":"), and periods (".")'. These regexps

              are slightly over-zealous, in that they remove colons

              and periods unnecessarily.

              Whitespace is transformed into underscores, and then

              anything which is not a hyphen or a character that

              matches \w (alphanumerics and underscore) is removed.

        super-admin
    
code: fixes to escape characters improperly used

              r5149
            
              """

        marcink
    
project: added all source files and assets

              r1
            
              # Transform all whitespace to underscore

        super-admin
    
diffs: python3 port

              r5083
            
              idstring = re.sub(r'\s', "_", f'{idstring}')

        marcink
    
project: added all source files and assets

              r1
            
              # Remove everything that is not a hyphen or a member of \w

              idstring = re.sub(r'(?!-)\W', "", idstring).lower()

              return idstring

        marcink
    
diffs: in case of text lexers don't do any HL because of pygments newline...

              r2546
            
          @classmethod

        super-admin
    
diffs: python3 port

              r5083
            
          def diff_splitter(cls, diff_string: bytes):

        marcink
    
diffs: in case of text lexers don't do any HL because of pygments newline...

              r2546
            
              """

              Diff split that emulates .splitlines() but works only on \n

              """

        super-admin
    
diffs: python3 port

              r5083
            
              if not diff_string:

        marcink
    
diffs: in case of text lexers don't do any HL because of pygments newline...

              r2546
            
                  return

        super-admin
    
diffs: python3 port

              r5083
            
              elif diff_string == b'\n':

                  yield b'\n'

        marcink
    
diffs: in case of text lexers don't do any HL because of pygments newline...

              r2546
            
              else:

        super-admin
    
diffs: python3 port

              r5083
            
                  has_newline = diff_string.endswith(b'\n')

                  elements = diff_string.split(b'\n')

        marcink
    
diffs: in case of text lexers don't do any HL because of pygments newline...

              r2546
            
                  if has_newline:

                      # skip last element as it's empty string from newlines

                      elements = elements[:-1]

                  len_elements = len(elements)

                  for cnt, line in enumerate(elements, start=1):

                      last_line = cnt == len_elements

                      if last_line and not has_newline:

        super-admin
    
diffs: python3 port

              r5083
            
                          yield line

        marcink
    
diffs: in case of text lexers don't do any HL because of pygments newline...

              r2546
            
                      else:

        super-admin
    
diffs: python3 port

              r5083
            
                          yield line + b'\n'

        marcink
    
diffs: in case of text lexers don't do any HL because of pygments newline...

              r2546
            
        marcink
    
project: added all source files and assets

              r1
            
          def prepare(self, inline_diff=True):

              """

              Prepare the passed udiff for HTML rendering.

              :return: A list of dicts with diff information.

              """

              parsed = self._parser(inline_diff=inline_diff)

              self.parsed = True

              self.parsed_diff = parsed

              return parsed

          def as_raw(self, diff_lines=None):

              """

              Returns raw diff as a byte string

              """

        super-admin
    
diffs: python3 port

              r5083
            
              return self._diff.raw.tobytes()

        marcink
    
project: added all source files and assets

              r1
            
          def stat(self):

              """

              Returns tuple of added, and removed lines for this instance

              """

              return self.adds, self.removes

          def get_context_of_line(

        super-admin
    
diffs: python3 port

              r5083
            
                  self, path, diff_line: DiffLineNumber = None, context_before: int = 3, context_after: int = 3):

        marcink
    
project: added all source files and assets

              r1
            
              """

              Returns the context lines for the specified diff line.

              """

              assert self.parsed, "DiffProcessor is not initialized."

              if None not in diff_line:

        super-admin
    
diffs: python3 port

              r5083
            
                  raise ValueError(f"Cannot specify both line numbers in diff_line: {diff_line}")

        marcink
    
project: added all source files and assets

              r1
            
              file_diff = self._get_file_diff(path)

              chunk, idx = self._find_chunk_line_index(file_diff, diff_line)

              first_line_to_include = max(idx - context_before, 0)

              first_line_after_context = idx + context_after + 1

        super-admin
    
diffs: python3 port

              r5083
            
              context_lines = chunk['lines'][first_line_to_include:first_line_after_context]

        marcink
    
project: added all source files and assets

              r1
            
              line_contents = [

                  _context_line(line) for line in context_lines

        super-admin
    
diffs: python3 port

              r5083
            
                  if _is_diff_content(line)

              ]

        marcink
    
project: added all source files and assets

              r1
            
              # TODO: johbo: Interim fixup, the diff chunks drop the final newline.

              # Once they are fixed, we can drop this line here.

              if line_contents:

                  line_contents[-1] = (

        super-admin
    
diffs: python3 port

              r5083
            
                      line_contents[-1][0], line_contents[-1][1].rstrip(b'\n') + b'\n')

        marcink
    
project: added all source files and assets

              r1
            
              return line_contents

          def find_context(self, path, context, offset=0):

              """

              Finds the given `context` inside of the diff.

              Use the parameter `offset` to specify which offset the target line has

              inside of the given `context`. This way the correct diff line will be

              returned.

              :param offset: Shall be used to specify the offset of the main line

                  within the given `context`.

              """

              if offset < 0 or offset >= len(context):

                  raise ValueError(

                      "Only positive values up to the length of the context "

                      "minus one are allowed.")

              matches = []

              file_diff = self._get_file_diff(path)

              for chunk in file_diff['chunks']:

        super-admin
    
diffs: python3 port

              r5083
            
                  if not isinstance(chunk, dict):

                      continue

        marcink
    
project: added all source files and assets

              r1
            
                  context_iter = iter(context)

        super-admin
    
diffs: python3 port

              r5083
            
                  for line_idx, line in enumerate(chunk['lines']):

        marcink
    
project: added all source files and assets

              r1
            
                      try:

        super-admin
    
python3: 2to3 fixes

              r4930
            
                          if _context_line(line) == next(context_iter):

        marcink
    
project: added all source files and assets

              r1
            
                              continue

                      except StopIteration:

                          matches.append((line_idx, chunk))

                      context_iter = iter(context)

              # Increment position and triger StopIteration

              # if we had a match at the end

              line_idx += 1

              try:

        super-admin
    
python3: 2to3 fixes

              r4930
            
                  next(context_iter)

        marcink
    
project: added all source files and assets

              r1
            
              except StopIteration:

                  matches.append((line_idx, chunk))

              effective_offset = len(context) - offset

              found_at_diff_lines = [

        super-admin
    
diffs: python3 port

              r5083
            
                  _line_to_diff_line_number(chunk['lines'][idx - effective_offset])

        marcink
    
project: added all source files and assets

              r1
            
                  for idx, chunk in matches]

              return found_at_diff_lines

          def _get_file_diff(self, path):

              for file_diff in self.parsed_diff:

                  if file_diff['filename'] == path:

                      break

              else:

        super-admin
    
diffs: python3 port

              r5083
            
                  raise FileNotInDiffException(f"File {path} not in diff")

        marcink
    
project: added all source files and assets

              r1
            
              return file_diff

          def _find_chunk_line_index(self, file_diff, diff_line):

              for chunk in file_diff['chunks']:

        super-admin
    
diffs: python3 port

              r5083
            
                  if not isinstance(chunk, dict):

                      continue

                  for line_idx, line in enumerate(chunk['lines']):

                      if diff_line.old and line['old_lineno'] == diff_line.old:

                          return chunk, line_idx

                      if diff_line.new and line['new_lineno'] == diff_line.new:

                          return chunk, line_idx

              raise LineNotInDiffException(f"The line {diff_line} is not part of the diff.")

        marcink
    
project: added all source files and assets

              r1
            
      def _is_diff_content(line):

          return line['action'] in (

              Action.UNMODIFIED, Action.ADD, Action.DELETE)

      def _context_line(line):

        super-admin
    
diffs: python3 port

              r5083
            
          return line['action'], line['line']

        marcink
    
project: added all source files and assets

              r1
            
      def _line_to_diff_line_number(line):

          new_line_no = line['new_lineno'] or None

          old_line_no = line['old_lineno'] or None

          return DiffLineNumber(old=old_line_no, new=new_line_no)

      class FileNotInDiffException(Exception):

          """

          Raised when the context for a missing file is requested.

          If you request the context for a line in a file which is not part of the

          given diff, then this exception is raised.

          """

      class LineNotInDiffException(Exception):

          """

          Raised when the context for a missing line is requested.

          If you request the context for a line in a file and this line is not

          part of the given diff, then this exception is raised.

          """

      class DiffLimitExceeded(Exception):

          pass

        Bartłomiej Wołyńczyk
    
caching: add option to cache diffs for commits and pull requests....

              r2685
            
        marcink
    
diffs: make validation of version, so we can change diffs and force re-cache if diffs are in old version.

              r3079
            
      # NOTE(marcink): if diffs.mako change, probably this

      # needs a bump to next version

        milka
    
comments: multiple changes on comments navigation/display logic...

              r4543
            
      CURRENT_DIFF_VERSION = 'v5'

        marcink
    
diffs: make validation of version, so we can change diffs and force re-cache if diffs are in old version.

              r3079
            
      def _cleanup_cache_file(cached_diff_file):

          # cleanup file to not store it "damaged"

          try:

              os.remove(cached_diff_file)

          except Exception:

              log.exception('Failed to cleanup path %s', cached_diff_file)

        marcink
    
diffs: switched bz2 into gzip since it can be 10x faster in some cases with only slight size penalty

              r3854
            
      def _get_compression_mode(cached_diff_file):

          mode = 'bz2'

          if 'mode:plain' in cached_diff_file:

              mode = 'plain'

          elif 'mode:gzip' in cached_diff_file:

              mode = 'gzip'

          return mode

        Bartłomiej Wołyńczyk
    
caching: add option to cache diffs for commits and pull requests....

              r2685
            
      def cache_diff(cached_diff_file, diff, commits):

        marcink
    
diffs: switched bz2 into gzip since it can be 10x faster in some cases with only slight size penalty

              r3854
            
          compression_mode = _get_compression_mode(cached_diff_file)

        Bartłomiej Wołyńczyk
    
caching: add option to cache diffs for commits and pull requests....

              r2685
            
          struct = {

        marcink
    
diffs: make validation of version, so we can change diffs and force re-cache if diffs are in old version.

              r3079
            
              'version': CURRENT_DIFF_VERSION,

        Bartłomiej Wołyńczyk
    
caching: add option to cache diffs for commits and pull requests....

              r2685
            
              'diff': diff,

              'commits': commits

          }

        marcink
    
dffs-cache: allow plain mode without bz2 for even better performance but more disk space.

              r3839
            
          start = time.time()

        Bartłomiej Wołyńczyk
    
caching: add option to cache diffs for commits and pull requests....

              r2685
            
          try:

        marcink
    
diffs: switched bz2 into gzip since it can be 10x faster in some cases with only slight size penalty

              r3854
            
              if compression_mode == 'plain':

        marcink
    
dffs-cache: allow plain mode without bz2 for even better performance but more disk space.

              r3839
            
                  with open(cached_diff_file, 'wb') as f:

                      pickle.dump(struct, f)

        marcink
    
diffs: switched bz2 into gzip since it can be 10x faster in some cases with only slight size penalty

              r3854
            
              elif compression_mode == 'gzip':

                  with gzip.GzipFile(cached_diff_file, 'wb') as f:

                      pickle.dump(struct, f)

        marcink
    
dffs-cache: allow plain mode without bz2 for even better performance but more disk space.

              r3839
            
              else:

                  with bz2.BZ2File(cached_diff_file, 'wb') as f:

                      pickle.dump(struct, f)

        Bartłomiej Wołyńczyk
    
caching: add option to cache diffs for commits and pull requests....

              r2685
            
          except Exception:

        super-admin
    
diffs: python3 port

              r5083
            
              log.warning('Failed to save cache', exc_info=True)

        marcink
    
diffs: make validation of version, so we can change diffs and force re-cache if diffs are in old version.

              r3079
            
              _cleanup_cache_file(cached_diff_file)

        Bartłomiej Wołyńczyk
    
caching: add option to cache diffs for commits and pull requests....

              r2685
            
        marcink
    
core: added more accurate time measurement for called functions

              r3853
            
          log.debug('Saved diff cache under %s in %.4fs', cached_diff_file, time.time() - start)

        marcink
    
dffs-cache: allow plain mode without bz2 for even better performance but more disk space.

              r3839
            
        Bartłomiej Wołyńczyk
    
caching: add option to cache diffs for commits and pull requests....

              r2685
            
      def load_cached_diff(cached_diff_file):

        marcink
    
diffs: switched bz2 into gzip since it can be 10x faster in some cases with only slight size penalty

              r3854
            
          compression_mode = _get_compression_mode(cached_diff_file)

        Bartłomiej Wołyńczyk
    
caching: add option to cache diffs for commits and pull requests....

              r2685
            
          default_struct = {

        marcink
    
diffs: make validation of version, so we can change diffs and force re-cache if diffs are in old version.

              r3079
            
              'version': CURRENT_DIFF_VERSION,

        Bartłomiej Wołyńczyk
    
caching: add option to cache diffs for commits and pull requests....

              r2685
            
              'diff': None,

              'commits': None

          }

          has_cache = os.path.isfile(cached_diff_file)

          if not has_cache:

        marcink
    
logging: fixed some log calls.

              r3841
            
              log.debug('Reading diff cache file failed %s', cached_diff_file)

        Bartłomiej Wołyńczyk
    
caching: add option to cache diffs for commits and pull requests....

              r2685
            
              return default_struct

          data = None

        marcink
    
dffs-cache: allow plain mode without bz2 for even better performance but more disk space.

              r3839
            
        marcink
    
diffs: added load time for diffs

              r3838
            
          start = time.time()

        Bartłomiej Wołyńczyk
    
caching: add option to cache diffs for commits and pull requests....

              r2685
            
          try:

        marcink
    
diffs: switched bz2 into gzip since it can be 10x faster in some cases with only slight size penalty

              r3854
            
              if compression_mode == 'plain':

        marcink
    
dffs-cache: allow plain mode without bz2 for even better performance but more disk space.

              r3839
            
                  with open(cached_diff_file, 'rb') as f:

                      data = pickle.load(f)

        marcink
    
diffs: switched bz2 into gzip since it can be 10x faster in some cases with only slight size penalty

              r3854
            
              elif compression_mode == 'gzip':

                  with gzip.GzipFile(cached_diff_file, 'rb') as f:

                      data = pickle.load(f)

        marcink
    
dffs-cache: allow plain mode without bz2 for even better performance but more disk space.

              r3839
            
              else:

                  with bz2.BZ2File(cached_diff_file, 'rb') as f:

                      data = pickle.load(f)

        Bartłomiej Wołyńczyk
    
caching: add option to cache diffs for commits and pull requests....

              r2685
            
          except Exception:

        super-admin
    
diffs: python3 port

              r5083
            
              log.warning('Failed to read diff cache file', exc_info=True)

        Bartłomiej Wołyńczyk
    
caching: add option to cache diffs for commits and pull requests....

              r2685
            
          if not data:

              data = default_struct

          if not isinstance(data, dict):

              # old version of data ?

              data = default_struct

        marcink
    
diffs: make validation of version, so we can change diffs and force re-cache if diffs are in old version.

              r3079
            
          # check version

          if data.get('version') != CURRENT_DIFF_VERSION:

              # purge cache

              _cleanup_cache_file(cached_diff_file)

              return default_struct

        marcink
    
core: added more accurate time measurement for called functions

              r3853
            
          log.debug('Loaded diff cache from %s in %.4fs', cached_diff_file, time.time() - start)

        marcink
    
dffs-cache: allow plain mode without bz2 for even better performance but more disk space.

              r3839
            
        Bartłomiej Wołyńczyk
    
caching: add option to cache diffs for commits and pull requests....

              r2685
            
          return data

      def generate_diff_cache_key(*args):

          """

          Helper to generate a cache key using arguments

          """

          def arg_mapper(input_param):

              input_param = safe_str(input_param)

              # we cannot allow '/' in arguments since it would allow

              # subdirectory usage

              input_param.replace('/', '_')

              return input_param or None  # prevent empty string arguments

          return '_'.join([

        super-admin
    
diffs: python3 port

              r5083
            
              '{}' for _i in range(len(args))]).format(*list(map(arg_mapper, args)))

        Bartłomiej Wołyńczyk
    
caching: add option to cache diffs for commits and pull requests....

              r2685
            
      def diff_cache_exist(cache_storage, *args):

          """

          Based on all generated arguments check and return a cache path

          """

        marcink
    
diffs: switched bz2 into gzip since it can be 10x faster in some cases with only slight size penalty

              r3854
            
          args = list(args) + ['mode:gzip']

        Bartłomiej Wołyńczyk
    
caching: add option to cache diffs for commits and pull requests....

              r2685
            
          cache_key = generate_diff_cache_key(*args)

          cache_file_path = os.path.join(cache_storage, cache_key)

          # prevent path traversal attacks using some param that have e.g '../../'

          if not os.path.abspath(cache_file_path).startswith(cache_storage):

        super-admin
    
diffs: python3 port

              r5083
            
              raise ValueError(f'Final path must be within {cache_storage}')

        Bartłomiej Wołyńczyk
    
caching: add option to cache diffs for commits and pull requests....

              r2685
            
          return cache_file_path

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages