upstream/kallithea Files · rhodecode/lib/diffs.py

When using apply to children flag in repo group permission...

When using apply to children flag in repo group permission change prompt, RhodeCode only applied this to user groups if repository was private. Only thing that shouldn't be allowed is to change the DEFAULT user permission when repository is private.

marcink - - Load All Authors

File last commit:

r3840:dc464486 beta


                r3974:39798d53

default

Download file

             diffs.py
        
                    711 lines
            
             | 25.8 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / rhodecode / lib / diffs.py
          
                    History
                
                 |
                  Annotation
                 | Raw
                 |Copy content
                 |Copy permalink

      # -*- coding: utf-8 -*-

      """

          rhodecode.lib.diffs

          ~~~~~~~~~~~~~~~~~~~

          Set of diffing helpers, previously part of vcs

          :created_on: Dec 4, 2011

          :author: marcink

          :copyright: (C) 2010-2012 Marcin Kuzminski <marcin@python-works.com>

          :original copyright: 2007-2008 by Armin Ronacher

          :license: GPLv3, see COPYING for more details.

      """

      # This program is free software: you can redistribute it and/or modify

      # it under the terms of the GNU General Public License as published by

      # the Free Software Foundation, either version 3 of the License, or

      # (at your option) any later version.

      #

      # This program is distributed in the hope that it will be useful,

      # but WITHOUT ANY WARRANTY; without even the implied warranty of

      # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

      # GNU General Public License for more details.

      #

      # You should have received a copy of the GNU General Public License

      # along with this program.  If not, see <http://www.gnu.org/licenses/>.

      import re

      import difflib

      import logging

      from itertools import tee, imap

      from pylons.i18n.translation import _

      from rhodecode.lib.vcs.exceptions import VCSError

      from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode

      from rhodecode.lib.vcs.backends.base import EmptyChangeset

      from rhodecode.lib.helpers import escape

      from rhodecode.lib.utils2 import safe_unicode, safe_str

      log = logging.getLogger(__name__)

      def wrap_to_table(str_):

          return '''<table class="code-difftable">

                      <tr class="line no-comment">

                      <td class="lineno new"></td>

                      <td class="code no-comment"><pre>%s</pre></td>

                      </tr>

                    </table>''' % str_

      def wrapped_diff(filenode_old, filenode_new, cut_off_limit=None,

                      ignore_whitespace=True, line_context=3,

                      enable_comments=False):

          """

          returns a wrapped diff into a table, checks for cut_off_limit and presents

          proper message

          """

          if filenode_old is None:

              filenode_old = FileNode(filenode_new.path, '', EmptyChangeset())

          if filenode_old.is_binary or filenode_new.is_binary:

              diff = wrap_to_table(_('Binary file'))

              stats = (0, 0)

              size = 0

          elif cut_off_limit != -1 and (cut_off_limit is None or

          (filenode_old.size < cut_off_limit and filenode_new.size < cut_off_limit)):

              f_gitdiff = get_gitdiff(filenode_old, filenode_new,

                                      ignore_whitespace=ignore_whitespace,

                                      context=line_context)

              diff_processor = DiffProcessor(f_gitdiff, format='gitdiff')

              diff = diff_processor.as_html(enable_comments=enable_comments)

              stats = diff_processor.stat()

              size = len(diff or '')

          else:

              diff = wrap_to_table(_('Changeset was too big and was cut off, use '

                                     'diff menu to display this diff'))

              stats = (0, 0)

              size = 0

          if not diff:

              submodules = filter(lambda o: isinstance(o, SubModuleNode),

                                  [filenode_new, filenode_old])

              if submodules:

                  diff = wrap_to_table(escape('Submodule %r' % submodules[0]))

              else:

                  diff = wrap_to_table(_('No changes detected'))

          cs1 = filenode_old.changeset.raw_id

          cs2 = filenode_new.changeset.raw_id

          return size, cs1, cs2, diff, stats

      def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):

          """

          Returns git style diff between given ``filenode_old`` and ``filenode_new``.

          :param ignore_whitespace: ignore whitespaces in diff

          """

          # make sure we pass in default context

          context = context or 3

          submodules = filter(lambda o: isinstance(o, SubModuleNode),

                              [filenode_new, filenode_old])

          if submodules:

              return ''

          for filenode in (filenode_old, filenode_new):

              if not isinstance(filenode, FileNode):

                  raise VCSError("Given object should be FileNode object, not %s"

                      % filenode.__class__)

          repo = filenode_new.changeset.repository

          old_raw_id = getattr(filenode_old.changeset, 'raw_id', repo.EMPTY_CHANGESET)

          new_raw_id = getattr(filenode_new.changeset, 'raw_id', repo.EMPTY_CHANGESET)

          vcs_gitdiff = repo.get_diff(old_raw_id, new_raw_id, filenode_new.path,

                                      ignore_whitespace, context)

          return vcs_gitdiff

      NEW_FILENODE = 1

      DEL_FILENODE = 2

      MOD_FILENODE = 3

      RENAMED_FILENODE = 4

      CHMOD_FILENODE = 5

      BIN_FILENODE = 6

      class DiffLimitExceeded(Exception):

          pass

      class LimitedDiffContainer(object):

          def __init__(self, diff_limit, cur_diff_size, diff):

              self.diff = diff

              self.diff_limit = diff_limit

              self.cur_diff_size = cur_diff_size

          def __iter__(self):

              for l in self.diff:

                  yield l

      class DiffProcessor(object):

          """

          Give it a unified or git diff and it returns a list of the files that were

          mentioned in the diff together with a dict of meta information that

          can be used to render it in a HTML template.

          """

          _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')

          _newline_marker = re.compile(r'^\\ No newline at end of file')

          _git_header_re = re.compile(r"""

              #^diff[ ]--git

                  [ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n

              (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%\n

                 ^rename[ ]from[ ](?P<rename_from>\S+)\n

                 ^rename[ ]to[ ](?P<rename_to>\S+)(?:\n|$))?

              (?:^old[ ]mode[ ](?P<old_mode>\d+)\n

                 ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?

              (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?

              (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?

              (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)

                  \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?

              (?:^(?P<bin_patch>GIT[ ]binary[ ]patch)(?:\n|$))?

              (?:^---[ ](a/(?P<a_file>.+)|/dev/null)(?:\n|$))?

              (?:^\+\+\+[ ](b/(?P<b_file>.+)|/dev/null)(?:\n|$))?

          """, re.VERBOSE | re.MULTILINE)

          _hg_header_re = re.compile(r"""

              #^diff[ ]--git

                  [ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n

              (?:^old[ ]mode[ ](?P<old_mode>\d+)\n

                 ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?

              (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%(?:\n|$))?

              (?:^rename[ ]from[ ](?P<rename_from>\S+)\n

                 ^rename[ ]to[ ](?P<rename_to>\S+)(?:\n|$))?

              (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?

              (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?

              (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)

                  \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?

              (?:^(?P<bin_patch>GIT[ ]binary[ ]patch)(?:\n|$))?

              (?:^---[ ](a/(?P<a_file>.+)|/dev/null)(?:\n|$))?

              (?:^\+\+\+[ ](b/(?P<b_file>.+)|/dev/null)(?:\n|$))?

          """, re.VERBOSE | re.MULTILINE)

          #used for inline highlighter word split

          _token_re = re.compile(r'()(&gt;|&lt;|&amp;|\W+?)')

          def __init__(self, diff, vcs='hg', format='gitdiff', diff_limit=None):

              """

              :param diff:   a text in diff format

              :param vcs: type of version controll hg or git

              :param format: format of diff passed, `udiff` or `gitdiff`

              :param diff_limit: define the size of diff that is considered "big"

                  based on that parameter cut off will be triggered, set to None

                  to show full diff

              """

              if not isinstance(diff, basestring):

                  raise Exception('Diff must be a basestring got %s instead' % type(diff))

              self._diff = diff

              self._format = format

              self.adds = 0

              self.removes = 0

              # calculate diff size

              self.diff_size = len(diff)

              self.diff_limit = diff_limit

              self.cur_diff_size = 0

              self.parsed = False

              self.parsed_diff = []

              self.vcs = vcs

              if format == 'gitdiff':

                  self.differ = self._highlight_line_difflib

                  self._parser = self._parse_gitdiff

              else:

                  self.differ = self._highlight_line_udiff

                  self._parser = self._parse_udiff

          def _copy_iterator(self):

              """

              make a fresh copy of generator, we should not iterate thru

              an original as it's needed for repeating operations on

              this instance of DiffProcessor

              """

              self.__udiff, iterator_copy = tee(self.__udiff)

              return iterator_copy

          def _escaper(self, string):

              """

              Escaper for diff escapes special chars and checks the diff limit

              :param string:

              """

              self.cur_diff_size += len(string)

              # escaper get's iterated on each .next() call and it checks if each

              # parsed line doesn't exceed the diff limit

              if self.diff_limit is not None and self.cur_diff_size > self.diff_limit:

                  raise DiffLimitExceeded('Diff Limit Exceeded')

              return safe_unicode(string).replace('&', '&amp;')\

                      .replace('<', '&lt;')\

                      .replace('>', '&gt;')

          def _line_counter(self, l):

              """

              Checks each line and bumps total adds/removes for this diff

              :param l:

              """

              if l.startswith('+') and not l.startswith('+++'):

                  self.adds += 1

              elif l.startswith('-') and not l.startswith('---'):

                  self.removes += 1

              return safe_unicode(l)

          def _highlight_line_difflib(self, line, next_):

              """

              Highlight inline changes in both lines.

              """

              if line['action'] == 'del':

                  old, new = line, next_

              else:

                  old, new = next_, line

              oldwords = self._token_re.split(old['line'])

              newwords = self._token_re.split(new['line'])

              sequence = difflib.SequenceMatcher(None, oldwords, newwords)

              oldfragments, newfragments = [], []

              for tag, i1, i2, j1, j2 in sequence.get_opcodes():

                  oldfrag = ''.join(oldwords[i1:i2])

                  newfrag = ''.join(newwords[j1:j2])

                  if tag != 'equal':

                      if oldfrag:

                          oldfrag = '<del>%s</del>' % oldfrag

                      if newfrag:

                          newfrag = '<ins>%s</ins>' % newfrag

                  oldfragments.append(oldfrag)

                  newfragments.append(newfrag)

              old['line'] = "".join(oldfragments)

              new['line'] = "".join(newfragments)

          def _highlight_line_udiff(self, line, next_):

              """

              Highlight inline changes in both lines.

              """

              start = 0

              limit = min(len(line['line']), len(next_['line']))

              while start < limit and line['line'][start] == next_['line'][start]:

                  start += 1

              end = -1

              limit -= start

              while -end <= limit and line['line'][end] == next_['line'][end]:

                  end -= 1

              end += 1

              if start or end:

                  def do(l):

                      last = end + len(l['line'])

                      if l['action'] == 'add':

                          tag = 'ins'

                      else:

                          tag = 'del'

                      l['line'] = '%s<%s>%s</%s>%s' % (

                          l['line'][:start],

                          tag,

                          l['line'][start:last],

                          tag,

                          l['line'][last:]

                      )

                  do(line)

                  do(next_)

          def _get_header(self, diff_chunk):

              """

              parses the diff header, and returns parts, and leftover diff

              parts consists of 14 elements::

                  a_path, b_path, similarity_index, rename_from, rename_to,

                  old_mode, new_mode, new_file_mode, deleted_file_mode,

                  a_blob_id, b_blob_id, b_mode, a_file, b_file

              :param diff_chunk:

              """

              if self.vcs == 'git':

                  match = self._git_header_re.match(diff_chunk)

                  diff = diff_chunk[match.end():]

                  return match.groupdict(), imap(self._escaper, diff.splitlines(1))

              elif self.vcs == 'hg':

                  match = self._hg_header_re.match(diff_chunk)

                  diff = diff_chunk[match.end():]

                  return match.groupdict(), imap(self._escaper, diff.splitlines(1))

              else:

                  raise Exception('VCS type %s is not supported' % self.vcs)

          def _clean_line(self, line, command):

              if command in ['+', '-', ' ']:

                  #only modify the line if it's actually a diff thing

                  line = line[1:]

              return line

          def _parse_gitdiff(self, inline_diff=True):

              _files = []

              diff_container = lambda arg: arg

              ##split the diff in chunks of separate --git a/file b/file chunks

              for raw_diff in ('\n' + self._diff).split('\ndiff --git')[1:]:

                  head, diff = self._get_header(raw_diff)

                  op = None

                  stats = {

                      'added': 0,

                      'deleted': 0,

                      'binary': False,

                      'ops': {},

                  }

                  if head['deleted_file_mode']:

                      op = 'D'

                      stats['binary'] = True

                      stats['ops'][DEL_FILENODE] = 'deleted file'

                  elif head['new_file_mode']:

                      op = 'A'

                      stats['binary'] = True

                      stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']

                  else:  # modify operation, can be cp, rename, chmod

                      # CHMOD

                      if head['new_mode'] and head['old_mode']:

                          op = 'M'

                          stats['binary'] = True

                          stats['ops'][CHMOD_FILENODE] = ('modified file chmod %s => %s'

                                              % (head['old_mode'], head['new_mode']))

                      # RENAME

                      if (head['rename_from'] and head['rename_to']

                            and head['rename_from'] != head['rename_to']):

                          op = 'M'

                          stats['binary'] = True

                          stats['ops'][RENAMED_FILENODE] = ('file renamed from %s to %s'

                                          % (head['rename_from'], head['rename_to']))

                      # FALL BACK: detect missed old style add or remove

                      if op is None:

                          if not head['a_file'] and head['b_file']:

                              op = 'A'

                              stats['binary'] = True

                              stats['ops'][NEW_FILENODE] = 'new file'

                          elif head['a_file'] and not head['b_file']:

                              op = 'D'

                              stats['binary'] = True

                              stats['ops'][DEL_FILENODE] = 'deleted file'

                      # it's not ADD not DELETE

                      if op is None:

                          op = 'M'

                          stats['binary'] = True

                          stats['ops'][MOD_FILENODE] = 'modified file'

                  # a real non-binary diff

                  if head['a_file'] or head['b_file']:

                      try:

                          chunks, _stats = self._parse_lines(diff)

                          stats['binary'] = False

                          stats['added'] = _stats[0]

                          stats['deleted'] = _stats[1]

                          # explicit mark that it's a modified file

                          if op == 'M':

                              stats['ops'][MOD_FILENODE] = 'modified file'

                      except DiffLimitExceeded:

                          diff_container = lambda _diff: \

                              LimitedDiffContainer(self.diff_limit,

                                                  self.cur_diff_size, _diff)

                          break

                  else:  # GIT binary patch (or empty diff)

                      # GIT Binary patch

                      if head['bin_patch']:

                          stats['ops'][BIN_FILENODE] = 'binary diff not shown'

                      chunks = []

                  chunks.insert(0, [{

                      'old_lineno': '',

                      'new_lineno': '',

                      'action':     'context',

                      'line':       msg,

                      } for _op, msg in stats['ops'].iteritems()

                        if _op not in [MOD_FILENODE]])

                  _files.append({

                      'filename':         head['b_path'],

                      'old_revision':     head['a_blob_id'],

                      'new_revision':     head['b_blob_id'],

                      'chunks':           chunks,

                      'operation':        op,

                      'stats':            stats,

                  })

              sorter = lambda info: {'A': 0, 'M': 1, 'D': 2}.get(info['operation'])

              if not inline_diff:

                  return diff_container(sorted(_files, key=sorter))

              # highlight inline changes

              for diff_data in _files:

                  for chunk in diff_data['chunks']:

                      lineiter = iter(chunk)

                      try:

                          while 1:

                              line = lineiter.next()

                              if line['action'] not in ['unmod', 'context']:

                                  nextline = lineiter.next()

                                  if nextline['action'] in ['unmod', 'context'] or \

                                     nextline['action'] == line['action']:

                                      continue

                                  self.differ(line, nextline)

                      except StopIteration:

                          pass

              return diff_container(sorted(_files, key=sorter))

          def _parse_udiff(self, inline_diff=True):

              raise NotImplementedError()

          def _parse_lines(self, diff):

              """

              Parse the diff an return data for the template.

              """

              lineiter = iter(diff)

              stats = [0, 0]

              try:

                  chunks = []

                  line = lineiter.next()

                  while line:

                      lines = []

                      chunks.append(lines)

                      match = self._chunk_re.match(line)

                      if not match:

                          break

                      gr = match.groups()

                      (old_line, old_end,

                       new_line, new_end) = [int(x or 1) for x in gr[:-1]]

                      old_line -= 1

                      new_line -= 1

                      context = len(gr) == 5

                      old_end += old_line

                      new_end += new_line

                      if context:

                          # skip context only if it's first line

                          if int(gr[0]) > 1:

                              lines.append({

                                  'old_lineno': '...',

                                  'new_lineno': '...',

                                  'action':     'context',

                                  'line':       line,

                              })

                      line = lineiter.next()

                      while old_line < old_end or new_line < new_end:

                          command = ' '

                          if line:

                              command = line[0]

                          affects_old = affects_new = False

                          # ignore those if we don't expect them

                          if command in '#@':

                              continue

                          elif command == '+':

                              affects_new = True

                              action = 'add'

                              stats[0] += 1

                          elif command == '-':

                              affects_old = True

                              action = 'del'

                              stats[1] += 1

                          else:

                              affects_old = affects_new = True

                              action = 'unmod'

                          if not self._newline_marker.match(line):

                              old_line += affects_old

                              new_line += affects_new

                              lines.append({

                                  'old_lineno':   affects_old and old_line or '',

                                  'new_lineno':   affects_new and new_line or '',

                                  'action':       action,

                                  'line':         self._clean_line(line, command)

                              })

                          line = lineiter.next()

                          if self._newline_marker.match(line):

                              # we need to append to lines, since this is not

                              # counted in the line specs of diff

                              lines.append({

                                  'old_lineno':   '...',

                                  'new_lineno':   '...',

                                  'action':       'context',

                                  'line':         self._clean_line(line, command)

                              })

              except StopIteration:

                  pass

              return chunks, stats

          def _safe_id(self, idstring):

              """Make a string safe for including in an id attribute.

              The HTML spec says that id attributes 'must begin with

              a letter ([A-Za-z]) and may be followed by any number

              of letters, digits ([0-9]), hyphens ("-"), underscores

              ("_"), colons (":"), and periods (".")'. These regexps

              are slightly over-zealous, in that they remove colons

              and periods unnecessarily.

              Whitespace is transformed into underscores, and then

              anything which is not a hyphen or a character that

              matches \w (alphanumerics and underscore) is removed.

              """

              # Transform all whitespace to underscore

              idstring = re.sub(r'\s', "_", '%s' % idstring)

              # Remove everything that is not a hyphen or a member of \w

              idstring = re.sub(r'(?!-)\W', "", idstring).lower()

              return idstring

          def prepare(self, inline_diff=True):

              """

              Prepare the passed udiff for HTML rendering. It'l return a list

              of dicts with diff information

              """

              parsed = self._parser(inline_diff=inline_diff)

              self.parsed = True

              self.parsed_diff = parsed

              return parsed

          def as_raw(self, diff_lines=None):

              """

              Returns raw string diff

              """

              return self._diff

              #return u''.join(imap(self._line_counter, self._diff.splitlines(1)))

          def as_html(self, table_class='code-difftable', line_class='line',

                      old_lineno_class='lineno old', new_lineno_class='lineno new',

                      code_class='code', enable_comments=False, parsed_lines=None):

              """

              Return given diff as html table with customized css classes

              """

              def _link_to_if(condition, label, url):

                  """

                  Generates a link if condition is meet or just the label if not.

                  """

                  if condition:

                      return '''<a href="%(url)s">%(label)s</a>''' % {

                          'url': url,

                          'label': label

                      }

                  else:

                      return label

              if not self.parsed:

                  self.prepare()

              diff_lines = self.parsed_diff

              if parsed_lines:

                  diff_lines = parsed_lines

              _html_empty = True

              _html = []

              _html.append('''<table class="%(table_class)s">\n''' % {

                  'table_class': table_class

              })

              for diff in diff_lines:

                  for line in diff['chunks']:

                      _html_empty = False

                      for change in line:

                          _html.append('''<tr class="%(lc)s %(action)s">\n''' % {

                              'lc': line_class,

                              'action': change['action']

                          })

                          anchor_old_id = ''

                          anchor_new_id = ''

                          anchor_old = "%(filename)s_o%(oldline_no)s" % {

                              'filename': self._safe_id(diff['filename']),

                              'oldline_no': change['old_lineno']

                          }

                          anchor_new = "%(filename)s_n%(oldline_no)s" % {

                              'filename': self._safe_id(diff['filename']),

                              'oldline_no': change['new_lineno']

                          }

                          cond_old = (change['old_lineno'] != '...' and

                                      change['old_lineno'])

                          cond_new = (change['new_lineno'] != '...' and

                                      change['new_lineno'])

                          if cond_old:

                              anchor_old_id = 'id="%s"' % anchor_old

                          if cond_new:

                              anchor_new_id = 'id="%s"' % anchor_new

                          ###########################################################

                          # OLD LINE NUMBER

                          ###########################################################

                          _html.append('''\t<td %(a_id)s class="%(olc)s">''' % {

                              'a_id': anchor_old_id,

                              'olc': old_lineno_class

                          })

                          _html.append('''%(link)s''' % {

                              'link': _link_to_if(True, change['old_lineno'],

                                                  '#%s' % anchor_old)

                          })

                          _html.append('''</td>\n''')

                          ###########################################################

                          # NEW LINE NUMBER

                          ###########################################################

                          _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {

                              'a_id': anchor_new_id,

                              'nlc': new_lineno_class

                          })

                          _html.append('''%(link)s''' % {

                              'link': _link_to_if(True, change['new_lineno'],

                                                  '#%s' % anchor_new)

                          })

                          _html.append('''</td>\n''')

                          ###########################################################

                          # CODE

                          ###########################################################

                          comments = '' if enable_comments else 'no-comment'

                          _html.append('''\t<td class="%(cc)s %(inc)s">''' % {

                              'cc': code_class,

                              'inc': comments

                          })

                          _html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {

                              'code': change['line']

                          })

                          _html.append('''\t</td>''')

                          _html.append('''\n</tr>\n''')

              _html.append('''</table>''')

              if _html_empty:

                  return None

              return ''.join(_html)

          def stat(self):

              """

              Returns tuple of added, and removed lines for this instance

              """

              return self.adds, self.removes

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

				# -- coding: utf-8 --
				"""
				rhodecode.lib.diffs
				~~~~~~~~~~~~~~~~~~~

				Set of diffing helpers, previously part of vcs


				:created_on: Dec 4, 2011
				:author: marcink
				:copyright: (C) 2010-2012 Marcin Kuzminski <marcin@python-works.com>
				:original copyright: 2007-2008 by Armin Ronacher
				:license: GPLv3, see COPYING for more details.
				"""
				# This program is free software: you can redistribute it and/or modify
				# it under the terms of the GNU General Public License as published by
				# the Free Software Foundation, either version 3 of the License, or
				# (at your option) any later version.
				#
				# This program is distributed in the hope that it will be useful,
				# but WITHOUT ANY WARRANTY; without even the implied warranty of
				# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				# GNU General Public License for more details.
				#
				# You should have received a copy of the GNU General Public License
				# along with this program. If not, see <http://www.gnu.org/licenses/>.

				import re
				import difflib
				import logging

				from itertools import tee, imap

				from pylons.i18n.translation import _

				from rhodecode.lib.vcs.exceptions import VCSError
				from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode
				from rhodecode.lib.vcs.backends.base import EmptyChangeset
				from rhodecode.lib.helpers import escape
				from rhodecode.lib.utils2 import safe_unicode, safe_str

				log = logging.getLogger(__name__)


				def wrap_to_table(str_):
				return '''<table class="code-difftable">
				<tr class="line no-comment">
				<td class="lineno new"></td>
				<td class="code no-comment"><pre>%s</pre></td>
				</tr>
				</table>''' % str_


				def wrapped_diff(filenode_old, filenode_new, cut_off_limit=None,
				ignore_whitespace=True, line_context=3,
				enable_comments=False):
				"""
				returns a wrapped diff into a table, checks for cut_off_limit and presents
				proper message
				"""

				if filenode_old is None:
				filenode_old = FileNode(filenode_new.path, '', EmptyChangeset())

				if filenode_old.is_binary or filenode_new.is_binary:
				diff = wrap_to_table(_('Binary file'))
				stats = (0, 0)
				size = 0

				elif cut_off_limit != -1 and (cut_off_limit is None or
				(filenode_old.size < cut_off_limit and filenode_new.size < cut_off_limit)):

				f_gitdiff = get_gitdiff(filenode_old, filenode_new,
				ignore_whitespace=ignore_whitespace,
				context=line_context)
				diff_processor = DiffProcessor(f_gitdiff, format='gitdiff')

				diff = diff_processor.as_html(enable_comments=enable_comments)
				stats = diff_processor.stat()
				size = len(diff or '')
				else:
				diff = wrap_to_table(_('Changeset was too big and was cut off, use '
				'diff menu to display this diff'))
				stats = (0, 0)
				size = 0
				if not diff:
				submodules = filter(lambda o: isinstance(o, SubModuleNode),
				[filenode_new, filenode_old])
				if submodules:
				diff = wrap_to_table(escape('Submodule %r' % submodules[0]))
				else:
				diff = wrap_to_table(_('No changes detected'))

				cs1 = filenode_old.changeset.raw_id
				cs2 = filenode_new.changeset.raw_id

				return size, cs1, cs2, diff, stats


				def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
				"""
				Returns git style diff between given ``filenode_old`` and ``filenode_new``.

				:param ignore_whitespace: ignore whitespaces in diff
				"""
				# make sure we pass in default context
				context = context or 3
				submodules = filter(lambda o: isinstance(o, SubModuleNode),
				[filenode_new, filenode_old])
				if submodules:
				return ''

				for filenode in (filenode_old, filenode_new):
				if not isinstance(filenode, FileNode):
				raise VCSError("Given object should be FileNode object, not %s"
				% filenode.__class__)

				repo = filenode_new.changeset.repository
				old_raw_id = getattr(filenode_old.changeset, 'raw_id', repo.EMPTY_CHANGESET)
				new_raw_id = getattr(filenode_new.changeset, 'raw_id', repo.EMPTY_CHANGESET)

				vcs_gitdiff = repo.get_diff(old_raw_id, new_raw_id, filenode_new.path,
				ignore_whitespace, context)
				return vcs_gitdiff

				NEW_FILENODE = 1
				DEL_FILENODE = 2
				MOD_FILENODE = 3
				RENAMED_FILENODE = 4
				CHMOD_FILENODE = 5
				BIN_FILENODE = 6


				class DiffLimitExceeded(Exception):
				pass


				class LimitedDiffContainer(object):

				def __init__(self, diff_limit, cur_diff_size, diff):
				self.diff = diff
				self.diff_limit = diff_limit
				self.cur_diff_size = cur_diff_size

				def __iter__(self):
				for l in self.diff:
				yield l


				class DiffProcessor(object):
				"""
				Give it a unified or git diff and it returns a list of the files that were
				mentioned in the diff together with a dict of meta information that
				can be used to render it in a HTML template.
				"""
				_chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
				_newline_marker = re.compile(r'^\\ No newline at end of file')
				_git_header_re = re.compile(r"""
				#^diff[ ]--git
				[ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n
				(?:^similarity[ ]index[ ](?P<similarity_index>\d+)%\n
				^rename[ ]from[ ](?P<rename_from>\S+)\n
				^rename[ ]to[ ](?P<rename_to>\S+)(?:\n\|$))?
				(?:^old[ ]mode[ ](?P<old_mode>\d+)\n
				^new[ ]mode[ ](?P<new_mode>\d+)(?:\n\|$))?
				(?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n\|$))?
				(?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n\|$))?
				(?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
				\.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n\|$))?
				(?:^(?P<bin_patch>GIT[ ]binary[ ]patch)(?:\n\|$))?
				(?:^---[ ](a/(?P<a_file>.+)\|/dev/null)(?:\n\|$))?
				(?:^\+\+\+[ ](b/(?P<b_file>.+)\|/dev/null)(?:\n\|$))?
				""", re.VERBOSE \| re.MULTILINE)
				_hg_header_re = re.compile(r"""
				#^diff[ ]--git
				[ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n
				(?:^old[ ]mode[ ](?P<old_mode>\d+)\n
				^new[ ]mode[ ](?P<new_mode>\d+)(?:\n\|$))?
				(?:^similarity[ ]index[ ](?P<similarity_index>\d+)%(?:\n\|$))?
				(?:^rename[ ]from[ ](?P<rename_from>\S+)\n
				^rename[ ]to[ ](?P<rename_to>\S+)(?:\n\|$))?
				(?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n\|$))?
				(?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n\|$))?
				(?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
				\.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n\|$))?
				(?:^(?P<bin_patch>GIT[ ]binary[ ]patch)(?:\n\|$))?
				(?:^---[ ](a/(?P<a_file>.+)\|/dev/null)(?:\n\|$))?
				(?:^\+\+\+[ ](b/(?P<b_file>.+)\|/dev/null)(?:\n\|$))?
				""", re.VERBOSE \| re.MULTILINE)

				#used for inline highlighter word split
				_token_re = re.compile(r'()(>\|<\|&\|\W+?)')

				def __init__(self, diff, vcs='hg', format='gitdiff', diff_limit=None):
				"""
				:param diff: a text in diff format
				:param vcs: type of version controll hg or git
				:param format: format of diff passed, `udiff` or `gitdiff`
				:param diff_limit: define the size of diff that is considered "big"
				based on that parameter cut off will be triggered, set to None
				to show full diff
				"""
				if not isinstance(diff, basestring):
				raise Exception('Diff must be a basestring got %s instead' % type(diff))

				self._diff = diff
				self._format = format
				self.adds = 0
				self.removes = 0
				# calculate diff size
				self.diff_size = len(diff)
				self.diff_limit = diff_limit
				self.cur_diff_size = 0
				self.parsed = False
				self.parsed_diff = []
				self.vcs = vcs

				if format == 'gitdiff':
				self.differ = self._highlight_line_difflib
				self._parser = self._parse_gitdiff
				else:
				self.differ = self._highlight_line_udiff
				self._parser = self._parse_udiff

				def _copy_iterator(self):
				"""
				make a fresh copy of generator, we should not iterate thru
				an original as it's needed for repeating operations on
				this instance of DiffProcessor
				"""
				self.__udiff, iterator_copy = tee(self.__udiff)
				return iterator_copy

				def _escaper(self, string):
				"""
				Escaper for diff escapes special chars and checks the diff limit

				:param string:
				"""

				self.cur_diff_size += len(string)

				# escaper get's iterated on each .next() call and it checks if each
				# parsed line doesn't exceed the diff limit
				if self.diff_limit is not None and self.cur_diff_size > self.diff_limit:
				raise DiffLimitExceeded('Diff Limit Exceeded')

				return safe_unicode(string).replace('&', '&')\
				.replace('<', '<')\
				.replace('>', '>')

				def _line_counter(self, l):
				"""
				Checks each line and bumps total adds/removes for this diff

				:param l:
				"""
				if l.startswith('+') and not l.startswith('+++'):
				self.adds += 1
				elif l.startswith('-') and not l.startswith('---'):
				self.removes += 1
				return safe_unicode(l)

				def _highlight_line_difflib(self, line, next_):
				"""
				Highlight inline changes in both lines.
				"""

				if line['action'] == 'del':
				old, new = line, next_
				else:
				old, new = next_, line

				oldwords = self._token_re.split(old['line'])
				newwords = self._token_re.split(new['line'])
				sequence = difflib.SequenceMatcher(None, oldwords, newwords)

				oldfragments, newfragments = [], []
				for tag, i1, i2, j1, j2 in sequence.get_opcodes():
				oldfrag = ''.join(oldwords[i1:i2])
				newfrag = ''.join(newwords[j1:j2])
				if tag != 'equal':
				if oldfrag:
				oldfrag = '<del>%s</del>' % oldfrag
				if newfrag:
				newfrag = '<ins>%s</ins>' % newfrag
				oldfragments.append(oldfrag)
				newfragments.append(newfrag)

				old['line'] = "".join(oldfragments)
				new['line'] = "".join(newfragments)

				def _highlight_line_udiff(self, line, next_):
				"""
				Highlight inline changes in both lines.
				"""
				start = 0
				limit = min(len(line['line']), len(next_['line']))
				while start < limit and line['line'][start] == next_['line'][start]:
				start += 1
				end = -1
				limit -= start
				while -end <= limit and line['line'][end] == next_['line'][end]:
				end -= 1
				end += 1
				if start or end:
				def do(l):
				last = end + len(l['line'])
				if l['action'] == 'add':
				tag = 'ins'
				else:
				tag = 'del'
				l['line'] = '%s<%s>%s</%s>%s' % (
				l['line'][:start],
				tag,
				l['line'][start:last],
				tag,
				l['line'][last:]
				)
				do(line)
				do(next_)

				def _get_header(self, diff_chunk):
				"""
				parses the diff header, and returns parts, and leftover diff
				parts consists of 14 elements::

				a_path, b_path, similarity_index, rename_from, rename_to,
				old_mode, new_mode, new_file_mode, deleted_file_mode,
				a_blob_id, b_blob_id, b_mode, a_file, b_file

				:param diff_chunk:
				"""

				if self.vcs == 'git':
				match = self._git_header_re.match(diff_chunk)
				diff = diff_chunk[match.end():]
				return match.groupdict(), imap(self._escaper, diff.splitlines(1))
				elif self.vcs == 'hg':
				match = self._hg_header_re.match(diff_chunk)
				diff = diff_chunk[match.end():]
				return match.groupdict(), imap(self._escaper, diff.splitlines(1))
				else:
				raise Exception('VCS type %s is not supported' % self.vcs)

				def _clean_line(self, line, command):
				if command in ['+', '-', ' ']:
				#only modify the line if it's actually a diff thing
				line = line[1:]
				return line

				def _parse_gitdiff(self, inline_diff=True):
				_files = []
				diff_container = lambda arg: arg

				##split the diff in chunks of separate --git a/file b/file chunks
				for raw_diff in ('\n' + self._diff).split('\ndiff --git')[1:]:
				head, diff = self._get_header(raw_diff)

				op = None
				stats = {
				'added': 0,
				'deleted': 0,
				'binary': False,
				'ops': {},
				}

				if head['deleted_file_mode']:
				op = 'D'
				stats['binary'] = True
				stats['ops'][DEL_FILENODE] = 'deleted file'

				elif head['new_file_mode']:
				op = 'A'
				stats['binary'] = True
				stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
				else: # modify operation, can be cp, rename, chmod
				# CHMOD
				if head['new_mode'] and head['old_mode']:
				op = 'M'
				stats['binary'] = True
				stats['ops'][CHMOD_FILENODE] = ('modified file chmod %s => %s'
				% (head['old_mode'], head['new_mode']))
				# RENAME
				if (head['rename_from'] and head['rename_to']
				and head['rename_from'] != head['rename_to']):
				op = 'M'
				stats['binary'] = True
				stats['ops'][RENAMED_FILENODE] = ('file renamed from %s to %s'
				% (head['rename_from'], head['rename_to']))

				# FALL BACK: detect missed old style add or remove
				if op is None:
				if not head['a_file'] and head['b_file']:
				op = 'A'
				stats['binary'] = True
				stats['ops'][NEW_FILENODE] = 'new file'

				elif head['a_file'] and not head['b_file']:
				op = 'D'
				stats['binary'] = True
				stats['ops'][DEL_FILENODE] = 'deleted file'

				# it's not ADD not DELETE
				if op is None:
				op = 'M'
				stats['binary'] = True
				stats['ops'][MOD_FILENODE] = 'modified file'

				# a real non-binary diff
				if head['a_file'] or head['b_file']:
				try:
				chunks, _stats = self._parse_lines(diff)
				stats['binary'] = False
				stats['added'] = _stats[0]
				stats['deleted'] = _stats[1]
				# explicit mark that it's a modified file
				if op == 'M':
				stats['ops'][MOD_FILENODE] = 'modified file'

				except DiffLimitExceeded:
				diff_container = lambda _diff: \
				LimitedDiffContainer(self.diff_limit,
				self.cur_diff_size, _diff)
				break
				else: # GIT binary patch (or empty diff)
				# GIT Binary patch
				if head['bin_patch']:
				stats['ops'][BIN_FILENODE] = 'binary diff not shown'
				chunks = []

				chunks.insert(0, [{
				'old_lineno': '',
				'new_lineno': '',
				'action': 'context',
				'line': msg,
				} for _op, msg in stats['ops'].iteritems()
				if _op not in [MOD_FILENODE]])

				_files.append({
				'filename': head['b_path'],
				'old_revision': head['a_blob_id'],
				'new_revision': head['b_blob_id'],
				'chunks': chunks,
				'operation': op,
				'stats': stats,
				})

				sorter = lambda info: {'A': 0, 'M': 1, 'D': 2}.get(info['operation'])

				if not inline_diff:
				return diff_container(sorted(_files, key=sorter))

				# highlight inline changes
				for diff_data in _files:
				for chunk in diff_data['chunks']:
				lineiter = iter(chunk)
				try:
				while 1:
				line = lineiter.next()
				if line['action'] not in ['unmod', 'context']:
				nextline = lineiter.next()
				if nextline['action'] in ['unmod', 'context'] or \
				nextline['action'] == line['action']:
				continue
				self.differ(line, nextline)
				except StopIteration:
				pass

				return diff_container(sorted(_files, key=sorter))

				def _parse_udiff(self, inline_diff=True):
				raise NotImplementedError()

				def _parse_lines(self, diff):
				"""
				Parse the diff an return data for the template.
				"""

				lineiter = iter(diff)
				stats = [0, 0]

				try:
				chunks = []
				line = lineiter.next()

				while line:
				lines = []
				chunks.append(lines)

				match = self._chunk_re.match(line)

				if not match:
				break

				gr = match.groups()
				(old_line, old_end,
				new_line, new_end) = [int(x or 1) for x in gr[:-1]]
				old_line -= 1
				new_line -= 1

				context = len(gr) == 5
				old_end += old_line
				new_end += new_line

				if context:
				# skip context only if it's first line
				if int(gr[0]) > 1:
				lines.append({
				'old_lineno': '...',
				'new_lineno': '...',
				'action': 'context',
				'line': line,
				})

				line = lineiter.next()

				while old_line < old_end or new_line < new_end:
				command = ' '
				if line:
				command = line[0]

				affects_old = affects_new = False

				# ignore those if we don't expect them
				if command in '#@':
				continue
				elif command == '+':
				affects_new = True
				action = 'add'
				stats[0] += 1
				elif command == '-':
				affects_old = True
				action = 'del'
				stats[1] += 1
				else:
				affects_old = affects_new = True
				action = 'unmod'

				if not self._newline_marker.match(line):
				old_line += affects_old
				new_line += affects_new
				lines.append({
				'old_lineno': affects_old and old_line or '',
				'new_lineno': affects_new and new_line or '',
				'action': action,
				'line': self._clean_line(line, command)
				})

				line = lineiter.next()

				if self._newline_marker.match(line):
				# we need to append to lines, since this is not
				# counted in the line specs of diff
				lines.append({
				'old_lineno': '...',
				'new_lineno': '...',
				'action': 'context',
				'line': self._clean_line(line, command)
				})

				except StopIteration:
				pass
				return chunks, stats

				def _safe_id(self, idstring):
				"""Make a string safe for including in an id attribute.

				The HTML spec says that id attributes 'must begin with
				a letter ([A-Za-z]) and may be followed by any number
				of letters, digits ([0-9]), hyphens ("-"), underscores
				("_"), colons (":"), and periods (".")'. These regexps
				are slightly over-zealous, in that they remove colons
				and periods unnecessarily.

				Whitespace is transformed into underscores, and then
				anything which is not a hyphen or a character that
				matches \w (alphanumerics and underscore) is removed.

				"""
				# Transform all whitespace to underscore
				idstring = re.sub(r'\s', "_", '%s' % idstring)
				# Remove everything that is not a hyphen or a member of \w
				idstring = re.sub(r'(?!-)\W', "", idstring).lower()
				return idstring

				def prepare(self, inline_diff=True):
				"""
				Prepare the passed udiff for HTML rendering. It'l return a list
				of dicts with diff information
				"""
				parsed = self._parser(inline_diff=inline_diff)
				self.parsed = True
				self.parsed_diff = parsed
				return parsed

				def as_raw(self, diff_lines=None):
				"""
				Returns raw string diff
				"""
				return self._diff
				#return u''.join(imap(self._line_counter, self._diff.splitlines(1)))

				def as_html(self, table_class='code-difftable', line_class='line',
				old_lineno_class='lineno old', new_lineno_class='lineno new',
				code_class='code', enable_comments=False, parsed_lines=None):
				"""
				Return given diff as html table with customized css classes
				"""
				def _link_to_if(condition, label, url):
				"""
				Generates a link if condition is meet or just the label if not.
				"""

				if condition:
				return '''<a href="%(url)s">%(label)s</a>''' % {
				'url': url,
				'label': label
				}
				else:
				return label
				if not self.parsed:
				self.prepare()

				diff_lines = self.parsed_diff
				if parsed_lines:
				diff_lines = parsed_lines

				_html_empty = True
				_html = []
				_html.append('''<table class="%(table_class)s">\n''' % {
				'table_class': table_class
				})

				for diff in diff_lines:
				for line in diff['chunks']:
				_html_empty = False
				for change in line:
				_html.append('''<tr class="%(lc)s %(action)s">\n''' % {
				'lc': line_class,
				'action': change['action']
				})
				anchor_old_id = ''
				anchor_new_id = ''
				anchor_old = "%(filename)s_o%(oldline_no)s" % {
				'filename': self._safe_id(diff['filename']),
				'oldline_no': change['old_lineno']
				}
				anchor_new = "%(filename)s_n%(oldline_no)s" % {
				'filename': self._safe_id(diff['filename']),
				'oldline_no': change['new_lineno']
				}
				cond_old = (change['old_lineno'] != '...' and
				change['old_lineno'])
				cond_new = (change['new_lineno'] != '...' and
				change['new_lineno'])
				if cond_old:
				anchor_old_id = 'id="%s"' % anchor_old
				if cond_new:
				anchor_new_id = 'id="%s"' % anchor_new
				###########################################################
				# OLD LINE NUMBER
				###########################################################
				_html.append('''\t<td %(a_id)s class="%(olc)s">''' % {
				'a_id': anchor_old_id,
				'olc': old_lineno_class
				})

				_html.append('''%(link)s''' % {
				'link': _link_to_if(True, change['old_lineno'],
				'#%s' % anchor_old)
				})
				_html.append('''</td>\n''')
				###########################################################
				# NEW LINE NUMBER
				###########################################################

				_html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
				'a_id': anchor_new_id,
				'nlc': new_lineno_class
				})

				_html.append('''%(link)s''' % {
				'link': _link_to_if(True, change['new_lineno'],
				'#%s' % anchor_new)
				})
				_html.append('''</td>\n''')
				###########################################################
				# CODE
				###########################################################
				comments = '' if enable_comments else 'no-comment'
				_html.append('''\t<td class="%(cc)s %(inc)s">''' % {
				'cc': code_class,
				'inc': comments
				})
				_html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {
				'code': change['line']
				})

				_html.append('''\t</td>''')
				_html.append('''\n</tr>\n''')
				_html.append('''</table>''')
				if _html_empty:
				return None
				return ''.join(_html)

				def stat(self):
				"""
				Returns tuple of added, and removed lines for this instance
				"""
				return self.adds, self.removes