upstream/kallithea Files · rhodecode/lib/diffs.py

API: update_user returns new updated user data

marcink - - Load All Authors

File last commit:

r2478:8eab8111 beta


                r2507:374693af

beta

Download file

             diffs.py
        
                    627 lines
            
             | 22.5 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / rhodecode / lib / diffs.py
          
                    History
                
                 |
                  Annotation
                 | Raw
                 |Copy content
                 |Copy permalink

      # -*- coding: utf-8 -*-

      """

          rhodecode.lib.diffs

          ~~~~~~~~~~~~~~~~~~~

          Set of diffing helpers, previously part of vcs

          :created_on: Dec 4, 2011

          :author: marcink

          :copyright: (C) 2010-2012 Marcin Kuzminski <marcin@python-works.com>

          :original copyright: 2007-2008 by Armin Ronacher

          :license: GPLv3, see COPYING for more details.

      """

      # This program is free software: you can redistribute it and/or modify

      # it under the terms of the GNU General Public License as published by

      # the Free Software Foundation, either version 3 of the License, or

      # (at your option) any later version.

      #

      # This program is distributed in the hope that it will be useful,

      # but WITHOUT ANY WARRANTY; without even the implied warranty of

      # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

      # GNU General Public License for more details.

      #

      # You should have received a copy of the GNU General Public License

      # along with this program.  If not, see <http://www.gnu.org/licenses/>.

      import re

      import io

      import difflib

      import markupsafe

      from itertools import tee, imap

      from mercurial import patch

      from mercurial.mdiff import diffopts

      from mercurial.bundlerepo import bundlerepository

      from mercurial import localrepo

      from pylons.i18n.translation import _

      from rhodecode.lib.vcs.exceptions import VCSError

      from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode

      from rhodecode.lib.helpers import escape

      from rhodecode.lib.utils import EmptyChangeset, make_ui

      def wrap_to_table(str_):

          return '''<table class="code-difftable">

                      <tr class="line no-comment">

                      <td class="lineno new"></td>

                      <td class="code no-comment"><pre>%s</pre></td>

                      </tr>

                    </table>''' % str_

      def wrapped_diff(filenode_old, filenode_new, cut_off_limit=None,

                      ignore_whitespace=True, line_context=3,

                      enable_comments=False):

          """

          returns a wrapped diff into a table, checks for cut_off_limit and presents

          proper message

          """

          if filenode_old is None:

              filenode_old = FileNode(filenode_new.path, '', EmptyChangeset())

          if filenode_old.is_binary or filenode_new.is_binary:

              diff = wrap_to_table(_('binary file'))

              stats = (0, 0)

              size = 0

          elif cut_off_limit != -1 and (cut_off_limit is None or

          (filenode_old.size < cut_off_limit and filenode_new.size < cut_off_limit)):

              f_gitdiff = get_gitdiff(filenode_old, filenode_new,

                                      ignore_whitespace=ignore_whitespace,

                                      context=line_context)

              diff_processor = DiffProcessor(f_gitdiff, format='gitdiff')

              diff = diff_processor.as_html(enable_comments=enable_comments)

              stats = diff_processor.stat()

              size = len(diff or '')

          else:

              diff = wrap_to_table(_('Changeset was too big and was cut off, use '

                                     'diff menu to display this diff'))

              stats = (0, 0)

              size = 0

          if not diff:

              submodules = filter(lambda o: isinstance(o, SubModuleNode),

                                  [filenode_new, filenode_old])

              if submodules:

                  diff = wrap_to_table(escape('Submodule %r' % submodules[0]))

              else:

                  diff = wrap_to_table(_('No changes detected'))

          cs1 = filenode_old.changeset.raw_id

          cs2 = filenode_new.changeset.raw_id

          return size, cs1, cs2, diff, stats

      def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):

          """

          Returns git style diff between given ``filenode_old`` and ``filenode_new``.

          :param ignore_whitespace: ignore whitespaces in diff

          """

          # make sure we pass in default context

          context = context or 3

          submodules = filter(lambda o: isinstance(o, SubModuleNode),

                              [filenode_new, filenode_old])

          if submodules:

              return ''

          for filenode in (filenode_old, filenode_new):

              if not isinstance(filenode, FileNode):

                  raise VCSError("Given object should be FileNode object, not %s"

                      % filenode.__class__)

          repo = filenode_new.changeset.repository

          old_raw_id = getattr(filenode_old.changeset, 'raw_id', repo.EMPTY_CHANGESET)

          new_raw_id = getattr(filenode_new.changeset, 'raw_id', repo.EMPTY_CHANGESET)

          vcs_gitdiff = repo.get_diff(old_raw_id, new_raw_id, filenode_new.path,

                                       ignore_whitespace, context)

          return vcs_gitdiff

      class DiffProcessor(object):

          """

          Give it a unified diff and it returns a list of the files that were

          mentioned in the diff together with a dict of meta information that

          can be used to render it in a HTML template.

          """

          _chunk_re = re.compile(r'@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')

          def __init__(self, diff, differ='diff', format='gitdiff'):

              """

              :param diff:   a text in diff format or generator

              :param format: format of diff passed, `udiff` or `gitdiff`

              """

              if isinstance(diff, basestring):

                  diff = [diff]

              self.__udiff = diff

              self.__format = format

              self.adds = 0

              self.removes = 0

              if isinstance(self.__udiff, basestring):

                  self.lines = iter(self.__udiff.splitlines(1))

              elif self.__format == 'gitdiff':

                  udiff_copy = self.copy_iterator()

                  self.lines = imap(self.escaper, self._parse_gitdiff(udiff_copy))

              else:

                  udiff_copy = self.copy_iterator()

                  self.lines = imap(self.escaper, udiff_copy)

              # Select a differ.

              if differ == 'difflib':

                  self.differ = self._highlight_line_difflib

              else:

                  self.differ = self._highlight_line_udiff

          def escaper(self, string):

              return markupsafe.escape(string)

          def copy_iterator(self):

              """

              make a fresh copy of generator, we should not iterate thru

              an original as it's needed for repeating operations on

              this instance of DiffProcessor

              """

              self.__udiff, iterator_copy = tee(self.__udiff)

              return iterator_copy

          def _extract_rev(self, line1, line2):

              """

              Extract the operation (A/M/D), filename and revision hint from a line.

              """

              try:

                  if line1.startswith('--- ') and line2.startswith('+++ '):

                      l1 = line1[4:].split(None, 1)

                      old_filename = (l1[0].replace('a/', '', 1)

                                      if len(l1) >= 1 else None)

                      old_rev = l1[1] if len(l1) == 2 else 'old'

                      l2 = line2[4:].split(None, 1)

                      new_filename = (l2[0].replace('b/', '', 1)

                                      if len(l1) >= 1 else None)

                      new_rev = l2[1] if len(l2) == 2 else 'new'

                      filename = (old_filename

                                  if old_filename != '/dev/null' else new_filename)

                      operation = 'D' if new_filename == '/dev/null' else None

                      if not operation:

                          operation = 'M' if old_filename != '/dev/null' else 'A'

                      return operation, filename, new_rev, old_rev

              except (ValueError, IndexError):

                  pass

              return None, None, None, None

          def _parse_gitdiff(self, diffiterator):

              def line_decoder(l):

                  if l.startswith('+') and not l.startswith('+++'):

                      self.adds += 1

                  elif l.startswith('-') and not l.startswith('---'):

                      self.removes += 1

                  return l.decode('utf8', 'replace')

              output = list(diffiterator)

              size = len(output)

              if size == 2:

                  l = []

                  l.extend([output[0]])

                  l.extend(output[1].splitlines(1))

                  return map(line_decoder, l)

              elif size == 1:

                  return  map(line_decoder, output[0].splitlines(1))

              elif size == 0:

                  return []

              raise Exception('wrong size of diff %s' % size)

          def _highlight_line_difflib(self, line, next_):

              """

              Highlight inline changes in both lines.

              """

              if line['action'] == 'del':

                  old, new = line, next_

              else:

                  old, new = next_, line

              oldwords = re.split(r'(\W)', old['line'])

              newwords = re.split(r'(\W)', new['line'])

              sequence = difflib.SequenceMatcher(None, oldwords, newwords)

              oldfragments, newfragments = [], []

              for tag, i1, i2, j1, j2 in sequence.get_opcodes():

                  oldfrag = ''.join(oldwords[i1:i2])

                  newfrag = ''.join(newwords[j1:j2])

                  if tag != 'equal':

                      if oldfrag:

                          oldfrag = '<del>%s</del>' % oldfrag

                      if newfrag:

                          newfrag = '<ins>%s</ins>' % newfrag

                  oldfragments.append(oldfrag)

                  newfragments.append(newfrag)

              old['line'] = "".join(oldfragments)

              new['line'] = "".join(newfragments)

          def _highlight_line_udiff(self, line, next_):

              """

              Highlight inline changes in both lines.

              """

              start = 0

              limit = min(len(line['line']), len(next_['line']))

              while start < limit and line['line'][start] == next_['line'][start]:

                  start += 1

              end = -1

              limit -= start

              while -end <= limit and line['line'][end] == next_['line'][end]:

                  end -= 1

              end += 1

              if start or end:

                  def do(l):

                      last = end + len(l['line'])

                      if l['action'] == 'add':

                          tag = 'ins'

                      else:

                          tag = 'del'

                      l['line'] = '%s<%s>%s</%s>%s' % (

                          l['line'][:start],

                          tag,

                          l['line'][start:last],

                          tag,

                          l['line'][last:]

                      )

                  do(line)

                  do(next_)

          def _parse_udiff(self, inline_diff=True):

              """

              Parse the diff an return data for the template.

              """

              lineiter = self.lines

              files = []

              try:

                  line = lineiter.next()

                  while 1:

                      # continue until we found the old file

                      if not line.startswith('--- '):

                          line = lineiter.next()

                          continue

                      chunks = []

                      stats = [0, 0]

                      operation, filename, old_rev, new_rev = \

                          self._extract_rev(line, lineiter.next())

                      files.append({

                          'filename':         filename,

                          'old_revision':     old_rev,

                          'new_revision':     new_rev,

                          'chunks':           chunks,

                          'operation':        operation,

                          'stats':            stats,

                      })

                      line = lineiter.next()

                      while line:

                          match = self._chunk_re.match(line)

                          if not match:

                              break

                          lines = []

                          chunks.append(lines)

                          old_line, old_end, new_line, new_end = \

                              [int(x or 1) for x in match.groups()[:-1]]

                          old_line -= 1

                          new_line -= 1

                          gr = match.groups()

                          context = len(gr) == 5

                          old_end += old_line

                          new_end += new_line

                          if context:

                              # skip context only if it's first line

                              if int(gr[0]) > 1:

                                  lines.append({

                                      'old_lineno': '...',

                                      'new_lineno': '...',

                                      'action':     'context',

                                      'line':       line,

                                  })

                          line = lineiter.next()

                          while old_line < old_end or new_line < new_end:

                              if line:

                                  command, line = line[0], line[1:]

                              else:

                                  command = ' '

                              affects_old = affects_new = False

                              # ignore those if we don't expect them

                              if command in '#@':

                                  continue

                              elif command == '+':

                                  affects_new = True

                                  action = 'add'

                                  stats[0] += 1

                              elif command == '-':

                                  affects_old = True

                                  action = 'del'

                                  stats[1] += 1

                              else:

                                  affects_old = affects_new = True

                                  action = 'unmod'

                              if line.find('No newline at end of file') != -1:

                                  lines.append({

                                      'old_lineno':   '...',

                                      'new_lineno':   '...',

                                      'action':       'context',

                                      'line':         line

                                  })

                              else:

                                  old_line += affects_old

                                  new_line += affects_new

                                  lines.append({

                                      'old_lineno':   affects_old and old_line or '',

                                      'new_lineno':   affects_new and new_line or '',

                                      'action':       action,

                                      'line':         line

                                  })

                              line = lineiter.next()

              except StopIteration:

                  pass

              sorter = lambda info: {'A': 0, 'M': 1, 'D': 2}.get(info['operation'])

              if inline_diff is False:

                  return sorted(files, key=sorter)

              # highlight inline changes

              for diff_data in files:

                  for chunk in diff_data['chunks']:

                      lineiter = iter(chunk)

                      try:

                          while 1:

                              line = lineiter.next()

                              if line['action'] != 'unmod':

                                  nextline = lineiter.next()

                                  if nextline['action'] in ['unmod', 'context'] or \

                                     nextline['action'] == line['action']:

                                      continue

                                  self.differ(line, nextline)

                      except StopIteration:

                          pass

              return sorted(files, key=sorter)

          def prepare(self, inline_diff=True):

              """

              Prepare the passed udiff for HTML rendering. It'l return a list

              of dicts

              """

              return self._parse_udiff(inline_diff=inline_diff)

          def _safe_id(self, idstring):

              """Make a string safe for including in an id attribute.

              The HTML spec says that id attributes 'must begin with

              a letter ([A-Za-z]) and may be followed by any number

              of letters, digits ([0-9]), hyphens ("-"), underscores

              ("_"), colons (":"), and periods (".")'. These regexps

              are slightly over-zealous, in that they remove colons

              and periods unnecessarily.

              Whitespace is transformed into underscores, and then

              anything which is not a hyphen or a character that

              matches \w (alphanumerics and underscore) is removed.

              """

              # Transform all whitespace to underscore

              idstring = re.sub(r'\s', "_", '%s' % idstring)

              # Remove everything that is not a hyphen or a member of \w

              idstring = re.sub(r'(?!-)\W', "", idstring).lower()

              return idstring

          def raw_diff(self):

              """

              Returns raw string as udiff

              """

              udiff_copy = self.copy_iterator()

              if self.__format == 'gitdiff':

                  udiff_copy = self._parse_gitdiff(udiff_copy)

              return u''.join(udiff_copy)

          def as_html(self, table_class='code-difftable', line_class='line',

                      new_lineno_class='lineno old', old_lineno_class='lineno new',

                      code_class='code', enable_comments=False, diff_lines=None):

              """

              Return given diff as html table with customized css classes

              """

              def _link_to_if(condition, label, url):

                  """

                  Generates a link if condition is meet or just the label if not.

                  """

                  if condition:

                      return '''<a href="%(url)s">%(label)s</a>''' % {

                          'url': url,

                          'label': label

                      }

                  else:

                      return label

              if diff_lines is None:

                  diff_lines = self.prepare()

              _html_empty = True

              _html = []

              _html.append('''<table class="%(table_class)s">\n''' % {

                  'table_class': table_class

              })

              for diff in diff_lines:

                  for line in diff['chunks']:

                      _html_empty = False

                      for change in line:

                          _html.append('''<tr class="%(lc)s %(action)s">\n''' % {

                              'lc': line_class,

                              'action': change['action']

                          })

                          anchor_old_id = ''

                          anchor_new_id = ''

                          anchor_old = "%(filename)s_o%(oldline_no)s" % {

                              'filename': self._safe_id(diff['filename']),

                              'oldline_no': change['old_lineno']

                          }

                          anchor_new = "%(filename)s_n%(oldline_no)s" % {

                              'filename': self._safe_id(diff['filename']),

                              'oldline_no': change['new_lineno']

                          }

                          cond_old = (change['old_lineno'] != '...' and

                                      change['old_lineno'])

                          cond_new = (change['new_lineno'] != '...' and

                                      change['new_lineno'])

                          if cond_old:

                              anchor_old_id = 'id="%s"' % anchor_old

                          if cond_new:

                              anchor_new_id = 'id="%s"' % anchor_new

                          ###########################################################

                          # OLD LINE NUMBER

                          ###########################################################

                          _html.append('''\t<td %(a_id)s class="%(olc)s">''' % {

                              'a_id': anchor_old_id,

                              'olc': old_lineno_class

                          })

                          _html.append('''%(link)s''' % {

                              'link': _link_to_if(True, change['old_lineno'],

                                                  '#%s' % anchor_old)

                          })

                          _html.append('''</td>\n''')

                          ###########################################################

                          # NEW LINE NUMBER

                          ###########################################################

                          _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {

                              'a_id': anchor_new_id,

                              'nlc': new_lineno_class

                          })

                          _html.append('''%(link)s''' % {

                              'link': _link_to_if(True, change['new_lineno'],

                                                  '#%s' % anchor_new)

                          })

                          _html.append('''</td>\n''')

                          ###########################################################

                          # CODE

                          ###########################################################

                          comments = '' if enable_comments else 'no-comment'

                          _html.append('''\t<td class="%(cc)s %(inc)s">''' % {

                              'cc': code_class,

                              'inc': comments

                          })

                          _html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {

                              'code': change['line']

                          })

                          _html.append('''\t</td>''')

                          _html.append('''\n</tr>\n''')

              _html.append('''</table>''')

              if _html_empty:

                  return None

              return ''.join(_html)

          def stat(self):

              """

              Returns tuple of added, and removed lines for this instance

              """

              return self.adds, self.removes

      class InMemoryBundleRepo(bundlerepository):

          def __init__(self, ui, path, bundlestream):

              self._tempparent = None

              localrepo.localrepository.__init__(self, ui, path)

              self.ui.setconfig('phases', 'publish', False)

              self.bundle = bundlestream

              # dict with the mapping 'filename' -> position in the bundle

              self.bundlefilespos = {}

      def differ(org_repo, org_ref, other_repo, other_ref, discovery_data=None):

          """

          General differ between branches, bookmarks or separate but releated

          repositories

          :param org_repo:

          :type org_repo:

          :param org_ref:

          :type org_ref:

          :param other_repo:

          :type other_repo:

          :param other_ref:

          :type other_ref:

          """

          bundlerepo = None

          ignore_whitespace = False

          context = 3

          org_repo = org_repo.scm_instance._repo

          other_repo = other_repo.scm_instance._repo

          opts = diffopts(git=True, ignorews=ignore_whitespace, context=context)

          org_ref = org_ref[1]

          other_ref = other_ref[1]

          if org_repo != other_repo:

              common, incoming, rheads = discovery_data

              # create a bundle (uncompressed if other repo is not local)

              if other_repo.capable('getbundle') and incoming:

                  # disable repo hooks here since it's just bundle !

                  # patch and reset hooks section of UI config to not run any

                  # hooks on fetching archives with subrepos

                  for k, _ in other_repo.ui.configitems('hooks'):

                      other_repo.ui.setconfig('hooks', k, None)

                  unbundle = other_repo.getbundle('incoming', common=common,

                                                  heads=rheads)

                  buf = io.BytesIO()

                  while True:

                      chunk = unbundle._stream.read(1024 * 4)

                      if not chunk:

                          break

                      buf.write(chunk)

                  buf.seek(0)

                  # replace chunked _stream with data that can do tell() and seek()

                  unbundle._stream = buf

                  ui = make_ui('db')

                  bundlerepo = InMemoryBundleRepo(ui, path=org_repo.root,

                                                  bundlestream=unbundle)

              return ''.join(patch.diff(bundlerepo or org_repo,

                                        node1=org_repo[org_ref].node(),

                                        node2=other_repo[other_ref].node(),

                                        opts=opts))

          else:

              return ''.join(patch.diff(org_repo, node1=org_ref, node2=other_ref,

                                        opts=opts))

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

				# -- coding: utf-8 --
				"""
				rhodecode.lib.diffs
				~~~~~~~~~~~~~~~~~~~

				Set of diffing helpers, previously part of vcs


				:created_on: Dec 4, 2011
				:author: marcink
				:copyright: (C) 2010-2012 Marcin Kuzminski <marcin@python-works.com>
				:original copyright: 2007-2008 by Armin Ronacher
				:license: GPLv3, see COPYING for more details.
				"""
				# This program is free software: you can redistribute it and/or modify
				# it under the terms of the GNU General Public License as published by
				# the Free Software Foundation, either version 3 of the License, or
				# (at your option) any later version.
				#
				# This program is distributed in the hope that it will be useful,
				# but WITHOUT ANY WARRANTY; without even the implied warranty of
				# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				# GNU General Public License for more details.
				#
				# You should have received a copy of the GNU General Public License
				# along with this program. If not, see <http://www.gnu.org/licenses/>.

				import re
				import io
				import difflib
				import markupsafe

				from itertools import tee, imap

				from mercurial import patch
				from mercurial.mdiff import diffopts
				from mercurial.bundlerepo import bundlerepository
				from mercurial import localrepo

				from pylons.i18n.translation import _

				from rhodecode.lib.vcs.exceptions import VCSError
				from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode
				from rhodecode.lib.helpers import escape
				from rhodecode.lib.utils import EmptyChangeset, make_ui


				def wrap_to_table(str_):
				return '''<table class="code-difftable">
				<tr class="line no-comment">
				<td class="lineno new"></td>
				<td class="code no-comment"><pre>%s</pre></td>
				</tr>
				</table>''' % str_


				def wrapped_diff(filenode_old, filenode_new, cut_off_limit=None,
				ignore_whitespace=True, line_context=3,
				enable_comments=False):
				"""
				returns a wrapped diff into a table, checks for cut_off_limit and presents
				proper message
				"""

				if filenode_old is None:
				filenode_old = FileNode(filenode_new.path, '', EmptyChangeset())

				if filenode_old.is_binary or filenode_new.is_binary:
				diff = wrap_to_table(_('binary file'))
				stats = (0, 0)
				size = 0

				elif cut_off_limit != -1 and (cut_off_limit is None or
				(filenode_old.size < cut_off_limit and filenode_new.size < cut_off_limit)):

				f_gitdiff = get_gitdiff(filenode_old, filenode_new,
				ignore_whitespace=ignore_whitespace,
				context=line_context)
				diff_processor = DiffProcessor(f_gitdiff, format='gitdiff')

				diff = diff_processor.as_html(enable_comments=enable_comments)
				stats = diff_processor.stat()
				size = len(diff or '')
				else:
				diff = wrap_to_table(_('Changeset was too big and was cut off, use '
				'diff menu to display this diff'))
				stats = (0, 0)
				size = 0
				if not diff:
				submodules = filter(lambda o: isinstance(o, SubModuleNode),
				[filenode_new, filenode_old])
				if submodules:
				diff = wrap_to_table(escape('Submodule %r' % submodules[0]))
				else:
				diff = wrap_to_table(_('No changes detected'))

				cs1 = filenode_old.changeset.raw_id
				cs2 = filenode_new.changeset.raw_id

				return size, cs1, cs2, diff, stats


				def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
				"""
				Returns git style diff between given ``filenode_old`` and ``filenode_new``.

				:param ignore_whitespace: ignore whitespaces in diff
				"""
				# make sure we pass in default context
				context = context or 3
				submodules = filter(lambda o: isinstance(o, SubModuleNode),
				[filenode_new, filenode_old])
				if submodules:
				return ''

				for filenode in (filenode_old, filenode_new):
				if not isinstance(filenode, FileNode):
				raise VCSError("Given object should be FileNode object, not %s"
				% filenode.__class__)

				repo = filenode_new.changeset.repository
				old_raw_id = getattr(filenode_old.changeset, 'raw_id', repo.EMPTY_CHANGESET)
				new_raw_id = getattr(filenode_new.changeset, 'raw_id', repo.EMPTY_CHANGESET)

				vcs_gitdiff = repo.get_diff(old_raw_id, new_raw_id, filenode_new.path,
				ignore_whitespace, context)
				return vcs_gitdiff


				class DiffProcessor(object):
				"""
				Give it a unified diff and it returns a list of the files that were
				mentioned in the diff together with a dict of meta information that
				can be used to render it in a HTML template.
				"""
				_chunk_re = re.compile(r'@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')

				def __init__(self, diff, differ='diff', format='gitdiff'):
				"""
				:param diff: a text in diff format or generator
				:param format: format of diff passed, `udiff` or `gitdiff`
				"""
				if isinstance(diff, basestring):
				diff = [diff]

				self.__udiff = diff
				self.__format = format
				self.adds = 0
				self.removes = 0

				if isinstance(self.__udiff, basestring):
				self.lines = iter(self.__udiff.splitlines(1))

				elif self.__format == 'gitdiff':
				udiff_copy = self.copy_iterator()
				self.lines = imap(self.escaper, self._parse_gitdiff(udiff_copy))
				else:
				udiff_copy = self.copy_iterator()
				self.lines = imap(self.escaper, udiff_copy)

				# Select a differ.
				if differ == 'difflib':
				self.differ = self._highlight_line_difflib
				else:
				self.differ = self._highlight_line_udiff

				def escaper(self, string):
				return markupsafe.escape(string)

				def copy_iterator(self):
				"""
				make a fresh copy of generator, we should not iterate thru
				an original as it's needed for repeating operations on
				this instance of DiffProcessor
				"""
				self.__udiff, iterator_copy = tee(self.__udiff)
				return iterator_copy

				def _extract_rev(self, line1, line2):
				"""
				Extract the operation (A/M/D), filename and revision hint from a line.
				"""

				try:
				if line1.startswith('--- ') and line2.startswith('+++ '):
				l1 = line1[4:].split(None, 1)
				old_filename = (l1[0].replace('a/', '', 1)
				if len(l1) >= 1 else None)
				old_rev = l1[1] if len(l1) == 2 else 'old'

				l2 = line2[4:].split(None, 1)
				new_filename = (l2[0].replace('b/', '', 1)
				if len(l1) >= 1 else None)
				new_rev = l2[1] if len(l2) == 2 else 'new'

				filename = (old_filename
				if old_filename != '/dev/null' else new_filename)

				operation = 'D' if new_filename == '/dev/null' else None
				if not operation:
				operation = 'M' if old_filename != '/dev/null' else 'A'

				return operation, filename, new_rev, old_rev
				except (ValueError, IndexError):
				pass

				return None, None, None, None

				def _parse_gitdiff(self, diffiterator):
				def line_decoder(l):
				if l.startswith('+') and not l.startswith('+++'):
				self.adds += 1
				elif l.startswith('-') and not l.startswith('---'):
				self.removes += 1
				return l.decode('utf8', 'replace')

				output = list(diffiterator)
				size = len(output)

				if size == 2:
				l = []
				l.extend([output[0]])
				l.extend(output[1].splitlines(1))
				return map(line_decoder, l)
				elif size == 1:
				return map(line_decoder, output[0].splitlines(1))
				elif size == 0:
				return []

				raise Exception('wrong size of diff %s' % size)

				def _highlight_line_difflib(self, line, next_):
				"""
				Highlight inline changes in both lines.
				"""

				if line['action'] == 'del':
				old, new = line, next_
				else:
				old, new = next_, line

				oldwords = re.split(r'(\W)', old['line'])
				newwords = re.split(r'(\W)', new['line'])

				sequence = difflib.SequenceMatcher(None, oldwords, newwords)

				oldfragments, newfragments = [], []
				for tag, i1, i2, j1, j2 in sequence.get_opcodes():
				oldfrag = ''.join(oldwords[i1:i2])
				newfrag = ''.join(newwords[j1:j2])
				if tag != 'equal':
				if oldfrag:
				oldfrag = '<del>%s</del>' % oldfrag
				if newfrag:
				newfrag = '<ins>%s</ins>' % newfrag
				oldfragments.append(oldfrag)
				newfragments.append(newfrag)

				old['line'] = "".join(oldfragments)
				new['line'] = "".join(newfragments)

				def _highlight_line_udiff(self, line, next_):
				"""
				Highlight inline changes in both lines.
				"""
				start = 0
				limit = min(len(line['line']), len(next_['line']))
				while start < limit and line['line'][start] == next_['line'][start]:
				start += 1
				end = -1
				limit -= start
				while -end <= limit and line['line'][end] == next_['line'][end]:
				end -= 1
				end += 1
				if start or end:
				def do(l):
				last = end + len(l['line'])
				if l['action'] == 'add':
				tag = 'ins'
				else:
				tag = 'del'
				l['line'] = '%s<%s>%s</%s>%s' % (
				l['line'][:start],
				tag,
				l['line'][start:last],
				tag,
				l['line'][last:]
				)
				do(line)
				do(next_)

				def _parse_udiff(self, inline_diff=True):
				"""
				Parse the diff an return data for the template.
				"""
				lineiter = self.lines
				files = []
				try:
				line = lineiter.next()
				while 1:
				# continue until we found the old file
				if not line.startswith('--- '):
				line = lineiter.next()
				continue

				chunks = []
				stats = [0, 0]
				operation, filename, old_rev, new_rev = \
				self._extract_rev(line, lineiter.next())
				files.append({
				'filename': filename,
				'old_revision': old_rev,
				'new_revision': new_rev,
				'chunks': chunks,
				'operation': operation,
				'stats': stats,
				})

				line = lineiter.next()
				while line:
				match = self._chunk_re.match(line)
				if not match:
				break

				lines = []
				chunks.append(lines)

				old_line, old_end, new_line, new_end = \
				[int(x or 1) for x in match.groups()[:-1]]
				old_line -= 1
				new_line -= 1
				gr = match.groups()
				context = len(gr) == 5
				old_end += old_line
				new_end += new_line

				if context:
				# skip context only if it's first line
				if int(gr[0]) > 1:
				lines.append({
				'old_lineno': '...',
				'new_lineno': '...',
				'action': 'context',
				'line': line,
				})

				line = lineiter.next()
				while old_line < old_end or new_line < new_end:
				if line:
				command, line = line[0], line[1:]
				else:
				command = ' '
				affects_old = affects_new = False

				# ignore those if we don't expect them
				if command in '#@':
				continue
				elif command == '+':
				affects_new = True
				action = 'add'
				stats[0] += 1
				elif command == '-':
				affects_old = True
				action = 'del'
				stats[1] += 1
				else:
				affects_old = affects_new = True
				action = 'unmod'

				if line.find('No newline at end of file') != -1:
				lines.append({
				'old_lineno': '...',
				'new_lineno': '...',
				'action': 'context',
				'line': line
				})

				else:
				old_line += affects_old
				new_line += affects_new
				lines.append({
				'old_lineno': affects_old and old_line or '',
				'new_lineno': affects_new and new_line or '',
				'action': action,
				'line': line
				})

				line = lineiter.next()

				except StopIteration:
				pass

				sorter = lambda info: {'A': 0, 'M': 1, 'D': 2}.get(info['operation'])
				if inline_diff is False:
				return sorted(files, key=sorter)

				# highlight inline changes
				for diff_data in files:
				for chunk in diff_data['chunks']:
				lineiter = iter(chunk)
				try:
				while 1:
				line = lineiter.next()
				if line['action'] != 'unmod':
				nextline = lineiter.next()
				if nextline['action'] in ['unmod', 'context'] or \
				nextline['action'] == line['action']:
				continue
				self.differ(line, nextline)
				except StopIteration:
				pass

				return sorted(files, key=sorter)

				def prepare(self, inline_diff=True):
				"""
				Prepare the passed udiff for HTML rendering. It'l return a list
				of dicts
				"""
				return self._parse_udiff(inline_diff=inline_diff)

				def _safe_id(self, idstring):
				"""Make a string safe for including in an id attribute.

				The HTML spec says that id attributes 'must begin with
				a letter ([A-Za-z]) and may be followed by any number
				of letters, digits ([0-9]), hyphens ("-"), underscores
				("_"), colons (":"), and periods (".")'. These regexps
				are slightly over-zealous, in that they remove colons
				and periods unnecessarily.

				Whitespace is transformed into underscores, and then
				anything which is not a hyphen or a character that
				matches \w (alphanumerics and underscore) is removed.

				"""
				# Transform all whitespace to underscore
				idstring = re.sub(r'\s', "_", '%s' % idstring)
				# Remove everything that is not a hyphen or a member of \w
				idstring = re.sub(r'(?!-)\W', "", idstring).lower()
				return idstring

				def raw_diff(self):
				"""
				Returns raw string as udiff
				"""
				udiff_copy = self.copy_iterator()
				if self.__format == 'gitdiff':
				udiff_copy = self._parse_gitdiff(udiff_copy)
				return u''.join(udiff_copy)

				def as_html(self, table_class='code-difftable', line_class='line',
				new_lineno_class='lineno old', old_lineno_class='lineno new',
				code_class='code', enable_comments=False, diff_lines=None):
				"""
				Return given diff as html table with customized css classes
				"""
				def _link_to_if(condition, label, url):
				"""
				Generates a link if condition is meet or just the label if not.
				"""

				if condition:
				return '''<a href="%(url)s">%(label)s</a>''' % {
				'url': url,
				'label': label
				}
				else:
				return label
				if diff_lines is None:
				diff_lines = self.prepare()
				_html_empty = True
				_html = []
				_html.append('''<table class="%(table_class)s">\n''' % {
				'table_class': table_class
				})
				for diff in diff_lines:
				for line in diff['chunks']:
				_html_empty = False
				for change in line:
				_html.append('''<tr class="%(lc)s %(action)s">\n''' % {
				'lc': line_class,
				'action': change['action']
				})
				anchor_old_id = ''
				anchor_new_id = ''
				anchor_old = "%(filename)s_o%(oldline_no)s" % {
				'filename': self._safe_id(diff['filename']),
				'oldline_no': change['old_lineno']
				}
				anchor_new = "%(filename)s_n%(oldline_no)s" % {
				'filename': self._safe_id(diff['filename']),
				'oldline_no': change['new_lineno']
				}
				cond_old = (change['old_lineno'] != '...' and
				change['old_lineno'])
				cond_new = (change['new_lineno'] != '...' and
				change['new_lineno'])
				if cond_old:
				anchor_old_id = 'id="%s"' % anchor_old
				if cond_new:
				anchor_new_id = 'id="%s"' % anchor_new
				###########################################################
				# OLD LINE NUMBER
				###########################################################
				_html.append('''\t<td %(a_id)s class="%(olc)s">''' % {
				'a_id': anchor_old_id,
				'olc': old_lineno_class
				})

				_html.append('''%(link)s''' % {
				'link': _link_to_if(True, change['old_lineno'],
				'#%s' % anchor_old)
				})
				_html.append('''</td>\n''')
				###########################################################
				# NEW LINE NUMBER
				###########################################################

				_html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
				'a_id': anchor_new_id,
				'nlc': new_lineno_class
				})

				_html.append('''%(link)s''' % {
				'link': _link_to_if(True, change['new_lineno'],
				'#%s' % anchor_new)
				})
				_html.append('''</td>\n''')
				###########################################################
				# CODE
				###########################################################
				comments = '' if enable_comments else 'no-comment'
				_html.append('''\t<td class="%(cc)s %(inc)s">''' % {
				'cc': code_class,
				'inc': comments
				})
				_html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {
				'code': change['line']
				})
				_html.append('''\t</td>''')
				_html.append('''\n</tr>\n''')
				_html.append('''</table>''')
				if _html_empty:
				return None
				return ''.join(_html)

				def stat(self):
				"""
				Returns tuple of added, and removed lines for this instance
				"""
				return self.adds, self.removes


				class InMemoryBundleRepo(bundlerepository):
				def __init__(self, ui, path, bundlestream):
				self._tempparent = None
				localrepo.localrepository.__init__(self, ui, path)
				self.ui.setconfig('phases', 'publish', False)

				self.bundle = bundlestream

				# dict with the mapping 'filename' -> position in the bundle
				self.bundlefilespos = {}


				def differ(org_repo, org_ref, other_repo, other_ref, discovery_data=None):
				"""
				General differ between branches, bookmarks or separate but releated
				repositories

				:param org_repo:
				:type org_repo:
				:param org_ref:
				:type org_ref:
				:param other_repo:
				:type other_repo:
				:param other_ref:
				:type other_ref:
				"""

				bundlerepo = None
				ignore_whitespace = False
				context = 3
				org_repo = org_repo.scm_instance._repo
				other_repo = other_repo.scm_instance._repo
				opts = diffopts(git=True, ignorews=ignore_whitespace, context=context)
				org_ref = org_ref[1]
				other_ref = other_ref[1]

				if org_repo != other_repo:

				common, incoming, rheads = discovery_data

				# create a bundle (uncompressed if other repo is not local)
				if other_repo.capable('getbundle') and incoming:
				# disable repo hooks here since it's just bundle !
				# patch and reset hooks section of UI config to not run any
				# hooks on fetching archives with subrepos
				for k, _ in other_repo.ui.configitems('hooks'):
				other_repo.ui.setconfig('hooks', k, None)

				unbundle = other_repo.getbundle('incoming', common=common,
				heads=rheads)

				buf = io.BytesIO()
				while True:
				chunk = unbundle._stream.read(1024 * 4)
				if not chunk:
				break
				buf.write(chunk)

				buf.seek(0)
				# replace chunked _stream with data that can do tell() and seek()
				unbundle._stream = buf

				ui = make_ui('db')
				bundlerepo = InMemoryBundleRepo(ui, path=org_repo.root,
				bundlestream=unbundle)

				return ''.join(patch.diff(bundlerepo or org_repo,
				node1=org_repo[org_ref].node(),
				node2=other_repo[other_ref].node(),
				opts=opts))
				else:
				return ''.join(patch.diff(org_repo, node1=org_ref, node2=other_ref,
				opts=opts))