rhodecode-enterprise-ce Commit - r2070:7939c6bf

diffs: use whole chunk diff to calculate if it's oversized or not....

dan -

r2070:7939c6bf default

parent child

configs/development.ini

0 +9 -3

              ## change this to unique ID for security
              app_instance_uuid = rc-production
-             ## cut off limit for large diffs (size in bytes)
-             cut_off_limit_diff = 1024000
-             cut_off_limit_file = 256000
+             ## cut off limit for large diffs (size in bytes). If overall diff size on
+             ## commit, or pull request exceeds this limit this diff will be displayed
+             ## partially. E.g 512000 == 512Kb
+             cut_off_limit_diff = 512000
+             ## cut off limit for large files inside diffs (size in bytes). Each individual
+             ## file inside diff which exceeds this limit will be displayed partially.
+             ##  E.g 128000 == 128Kb
+             cut_off_limit_file = 128000
              ## use cache version of scm repo everywhere
              vcs_full_cache = true

configs/production.ini

0 +9 -3

              ## change this to unique ID for security
              app_instance_uuid = rc-production
-             ## cut off limit for large diffs (size in bytes)
-             cut_off_limit_diff = 1024000
-             cut_off_limit_file = 256000
+             ## cut off limit for large diffs (size in bytes). If overall diff size on
+             ## commit, or pull request exceeds this limit this diff will be displayed
+             ## partially. E.g 512000 == 512Kb
+             cut_off_limit_diff = 512000
+             ## cut off limit for large files inside diffs (size in bytes). Each individual
+             ## file inside diff which exceeds this limit will be displayed partially.
+             ##  E.g 128000 == 128Kb
+             cut_off_limit_file = 128000
              ## use cache version of scm repo everywhere
              vcs_full_cache = true

rhodecode/lib/diffs.py

0 +47 -41

                      self.parsed = False
                      self.parsed_diff = []
+                     log.debug('Initialized DiffProcessor with %s mode', format)
                      if format == 'gitdiff':
                          self.differ = self._highlight_line_difflib
                          self._parser = self._parse_gitdiff
                      return diff_container(sorted(_files, key=sorter))
-                 # FIXME: NEWDIFFS: dan: this replaces the old _escaper function
-                 def _process_line(self, string):
-                     """
-                     Process a diff line, checks the diff limit
-                     :param string:
-                     """
-                     self.cur_diff_size += len(string)
+                 def _check_large_diff(self):
+                     log.debug('Diff exceeds current diff_limit of %s', self.diff_limit)
                      if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
-                         raise DiffLimitExceeded('Diff Limit Exceeded')
-                     return safe_unicode(string)
+                         raise DiffLimitExceeded('Diff Limit `%s` Exceeded', self.diff_limit)
                  # FIXME: NEWDIFFS: dan: this replaces _parse_gitdiff
                  def _new_parse_gitdiff(self, inline_diff=True):
                      _files = []
+                     # this can be overriden later to a LimitedDiffContainer type
                      diff_container = lambda arg: arg
                      for chunk in self._diff.chunks():
                          head = chunk.header
                          log.debug('parsing diff %r' % head)
-                         diff = imap(self._process_line, chunk.diff.splitlines(1))
                          raw_diff = chunk.raw
                          limited_diff = False
                          exceeds_limit = False
-                         # if 'empty_file_to_modify_and_rename' in head['a_path']:
-                         #     1/0
                          op = None
                          stats = {
                              'added': 0,
                          if head['b_mode']:
                              stats['new_mode'] = head['b_mode']
+                         # delete file
                          if head['deleted_file_mode']:
                              op = OPS.DEL
                              stats['binary'] = True
                              stats['ops'][DEL_FILENODE] = 'deleted file'
+                         # new file
                          elif head['new_file_mode']:
                              op = OPS.ADD
                              stats['binary'] = True
                              stats['old_mode'] = None
                              stats['new_mode'] = head['new_file_mode']
                              stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
-                         else:  # modify operation, can be copy, rename or chmod
+                         # modify operation, can be copy, rename or chmod
+                         else:
                              # CHMOD
                              if head['new_mode'] and head['old_mode']:
                                  op = OPS.MOD
                          # a real non-binary diff
                          if head['a_file'] or head['b_file']:
+                             diff = iter(chunk.diff.splitlines(1))
+                             # append each file to the diff size
+                             raw_chunk_size = len(raw_diff)
+                             exceeds_limit = raw_chunk_size > self.file_limit
+                             self.cur_diff_size += raw_chunk_size
                              try:
+                                 # Check each file instead of the whole diff.
+                                 # Diff will hide big files but still show small ones.
+                                 # From the tests big files are fairly safe to be parsed
+                                 # but the browser is the bottleneck.
+                                 if not self.show_full_diff and exceeds_limit:
+                                     log.debug('File `%s` exceeds current file_limit of %s',
+                                               safe_unicode(head['b_path']), self.file_limit)
+                                     raise DiffLimitExceeded(
+                                         'File Limit %s Exceeded', self.file_limit)
+                                 self._check_large_diff()
                                  raw_diff, chunks, _stats = self._new_parse_lines(diff)
                                  stats['binary'] = False
                                  stats['added'] = _stats[0]
                                  # explicit mark that it's a modified file
                                  if op == OPS.MOD:
                                      stats['ops'][MOD_FILENODE] = 'modified file'
-                                 exceeds_limit = len(raw_diff) > self.file_limit
-                                 # changed from _escaper function so we validate size of
-                                 # each file instead of the whole diff
-                                 # diff will hide big files but still show small ones
-                                 # from my tests, big files are fairly safe to be parsed
-                                 # but the browser is the bottleneck
-                                 if not self.show_full_diff and exceeds_limit:
-                                     raise DiffLimitExceeded('File Limit Exceeded')
                              except DiffLimitExceeded:
                                  diff_container = lambda _diff: \
                                      LimitedDiffContainer(
                                          self.diff_limit, self.cur_diff_size, _diff)
-                                 exceeds_limit = len(raw_diff) > self.file_limit
                                  limited_diff = True
                                  chunks = []
                                  stats['ops'][BIN_FILENODE] = 'binary diff hidden'
                              chunks = []
+                         # Hide content of deleted node by setting empty chunks
                          if chunks and not self.show_full_diff and op == OPS.DEL:
                              # if not full diff mode show deleted file contents
                              # TODO: anderson: if the view is not too big, there is no way
                              # to see the content of the file
                              chunks = []
-                         chunks.insert(0, [{
-                                               'old_lineno': '',
-                                               'new_lineno': '',
-                                               'action': Action.CONTEXT,
-                                               'line': msg,
-                                           } for _op, msg in stats['ops'].iteritems()
-                                           if _op not in [MOD_FILENODE]])
+                         chunks.insert(
+, [{'old_lineno': '',
+                                  'new_lineno': '',
+                                  'action': Action.CONTEXT,
+                                  'line': msg,
+                                  } for _op, msg in stats['ops'].iteritems()
+                                 if _op not in [MOD_FILENODE]])
                          original_filename = safe_unicode(head['a_path'])
                          _files.append({
                              'is_limited_diff': limited_diff,
                          })
                      sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
                                             OPS.DEL: 2}.get(info['operation'])
                      return ''.join(raw_diff), chunks, stats
                  # FIXME: NEWDIFFS: dan: this replaces _parse_lines
-                 def _new_parse_lines(self, diff):
+                 def _new_parse_lines(self, diff_iter):
                      """
                      Parse the diff an return data for the template.
                      """
-                     lineiter = iter(diff)
                      stats = [0, 0]
                      chunks = []
                      raw_diff = []
+                     diff_iter = imap(lambda s: safe_unicode(s), diff_iter)
                      try:
-                         line = lineiter.next()
+                         line = diff_iter.next()
                          while line:
                              raw_diff.append(line)
                              old_end += old_line
                              new_end += new_line
-                             line = lineiter.next()
+                             line = diff_iter.next()
                              while old_line < old_end or new_line < new_end:
                                  command = ' '
                                      })
                                      raw_diff.append(line)
-                                 line = lineiter.next()
+                                 line = diff_iter.next()
                                  if self._newline_marker.match(line):
                                      # we need to append to lines, since this is not
                      except StopIteration:
                          pass
                      return ''.join(raw_diff), chunks, stats
                  def _safe_id(self, idstring):

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages