upstream/kallithea Commit - r2843:ab75def5

fixes diff non-unicode encoding missmatch...

marcink -

r2843:ab75def5 beta

parent child

rhodecode/lib/diffs.py

0 +2 -1

             # -*- coding: utf-8 -*-
             """
                 rhodecode.lib.diffs
                 ~~~~~~~~~~~~~~~~~~~
                 Set of diffing helpers, previously part of vcs
                 :created_on: Dec 4, 2011
                 :author: marcink
                 :copyright: (C) 2010-2012 Marcin Kuzminski <marcin@python-works.com>
                 :original copyright: 2007-2008 by Armin Ronacher
                 :license: GPLv3, see COPYING for more details.
             """
             # This program is free software: you can redistribute it and/or modify
             # it under the terms of the GNU General Public License as published by
             # the Free Software Foundation, either version 3 of the License, or
             # (at your option) any later version.
             #
             # This program is distributed in the hope that it will be useful,
             # but WITHOUT ANY WARRANTY; without even the implied warranty of
             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
             # GNU General Public License for more details.
             #
             # You should have received a copy of the GNU General Public License
             # along with this program.  If not, see <http://www.gnu.org/licenses/>.
             import re
             import difflib
             import markupsafe
             from itertools import tee, imap
             from mercurial import patch
             from mercurial.mdiff import diffopts
             from mercurial.bundlerepo import bundlerepository
             from pylons.i18n.translation import _
             from rhodecode.lib.compat import BytesIO
             from rhodecode.lib.vcs.utils.hgcompat import localrepo
             from rhodecode.lib.vcs.exceptions import VCSError
             from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode
             from rhodecode.lib.vcs.backends.base import EmptyChangeset
             from rhodecode.lib.helpers import escape
             from rhodecode.lib.utils import make_ui
+            from rhodecode.lib.utils2 import safe_unicode
             def wrap_to_table(str_):
                 return '''<table class="code-difftable">
                             <tr class="line no-comment">
                             <td class="lineno new"></td>
                             <td class="code no-comment"><pre>%s</pre></td>
                             </tr>
                           </table>''' % str_
             def wrapped_diff(filenode_old, filenode_new, cut_off_limit=None,
                             ignore_whitespace=True, line_context=3,
                             enable_comments=False):
                 """
                 returns a wrapped diff into a table, checks for cut_off_limit and presents
                 proper message
                 """
                 if filenode_old is None:
                     filenode_old = FileNode(filenode_new.path, '', EmptyChangeset())
                 if filenode_old.is_binary or filenode_new.is_binary:
                     diff = wrap_to_table(_('binary file'))
                     stats = (0, 0)
                     size = 0
                 elif cut_off_limit != -1 and (cut_off_limit is None or
                 (filenode_old.size < cut_off_limit and filenode_new.size < cut_off_limit)):
                     f_gitdiff = get_gitdiff(filenode_old, filenode_new,
                                             ignore_whitespace=ignore_whitespace,
                                             context=line_context)
                     diff_processor = DiffProcessor(f_gitdiff, format='gitdiff')
                     diff = diff_processor.as_html(enable_comments=enable_comments)
                     stats = diff_processor.stat()
                     size = len(diff or '')
                 else:
                     diff = wrap_to_table(_('Changeset was too big and was cut off, use '
                                            'diff menu to display this diff'))
                     stats = (0, 0)
                     size = 0
                 if not diff:
                     submodules = filter(lambda o: isinstance(o, SubModuleNode),
                                         [filenode_new, filenode_old])
                     if submodules:
                         diff = wrap_to_table(escape('Submodule %r' % submodules[0]))
                     else:
                         diff = wrap_to_table(_('No changes detected'))
                 cs1 = filenode_old.changeset.raw_id
                 cs2 = filenode_new.changeset.raw_id
                 return size, cs1, cs2, diff, stats
             def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
                 """
                 Returns git style diff between given ``filenode_old`` and ``filenode_new``.
                 :param ignore_whitespace: ignore whitespaces in diff
                 """
                 # make sure we pass in default context
                 context = context or 3
                 submodules = filter(lambda o: isinstance(o, SubModuleNode),
                                     [filenode_new, filenode_old])
                 if submodules:
                     return ''
                 for filenode in (filenode_old, filenode_new):
                     if not isinstance(filenode, FileNode):
                         raise VCSError("Given object should be FileNode object, not %s"
                             % filenode.__class__)
                 repo = filenode_new.changeset.repository
                 old_raw_id = getattr(filenode_old.changeset, 'raw_id', repo.EMPTY_CHANGESET)
                 new_raw_id = getattr(filenode_new.changeset, 'raw_id', repo.EMPTY_CHANGESET)
                 vcs_gitdiff = repo.get_diff(old_raw_id, new_raw_id, filenode_new.path,
                                              ignore_whitespace, context)
                 return vcs_gitdiff
             class DiffProcessor(object):
                 """
                 Give it a unified diff and it returns a list of the files that were
                 mentioned in the diff together with a dict of meta information that
                 can be used to render it in a HTML template.
                 """
                 _chunk_re = re.compile(r'@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
                 _newline_marker = '\\ No newline at end of file\n'
                 def __init__(self, diff, differ='diff', format='gitdiff'):
                     """
                     :param diff:   a text in diff format or generator
                     :param format: format of diff passed, `udiff` or `gitdiff`
                     """
                     if isinstance(diff, basestring):
                         diff = [diff]
                     self.__udiff = diff
                     self.__format = format
                     self.adds = 0
                     self.removes = 0
                     if isinstance(self.__udiff, basestring):
                         self.lines = iter(self.__udiff.splitlines(1))
                     elif self.__format == 'gitdiff':
                         udiff_copy = self.copy_iterator()
                         self.lines = imap(self.escaper, self._parse_gitdiff(udiff_copy))
                     else:
                         udiff_copy = self.copy_iterator()
                         self.lines = imap(self.escaper, udiff_copy)
                     # Select a differ.
                     if differ == 'difflib':
                         self.differ = self._highlight_line_difflib
                     else:
                         self.differ = self._highlight_line_udiff
                 def escaper(self, string):
                     return markupsafe.escape(string)
                 def copy_iterator(self):
                     """
                     make a fresh copy of generator, we should not iterate thru
                     an original as it's needed for repeating operations on
                     this instance of DiffProcessor
                     """
                     self.__udiff, iterator_copy = tee(self.__udiff)
                     return iterator_copy
                 def _extract_rev(self, line1, line2):
                     """
                     Extract the operation (A/M/D), filename and revision hint from a line.
                     """
                     try:
                         if line1.startswith('--- ') and line2.startswith('+++ '):
                             l1 = line1[4:].split(None, 1)
                             old_filename = (l1[0].replace('a/', '', 1)
                                             if len(l1) >= 1 else None)
                             old_rev = l1[1] if len(l1) == 2 else 'old'
                             l2 = line2[4:].split(None, 1)
                             new_filename = (l2[0].replace('b/', '', 1)
                                             if len(l1) >= 1 else None)
                             new_rev = l2[1] if len(l2) == 2 else 'new'
                             filename = (old_filename
                                         if old_filename != '/dev/null' else new_filename)
                             operation = 'D' if new_filename == '/dev/null' else None
                             if not operation:
                                 operation = 'M' if old_filename != '/dev/null' else 'A'
                             return operation, filename, new_rev, old_rev
                     except (ValueError, IndexError):
                         pass
                     return None, None, None, None
                 def _parse_gitdiff(self, diffiterator):
                     def line_decoder(l):
                         if l.startswith('+') and not l.startswith('+++'):
                             self.adds += 1
                         elif l.startswith('-') and not l.startswith('---'):
                             self.removes += 1
-                        return l.decode('utf8', 'replace')
+                        return safe_unicode(l)
                     output = list(diffiterator)
                     size = len(output)
                     if size == 2:
                         l = []
                         l.extend([output[0]])
                         l.extend(output[1].splitlines(1))
                         return map(line_decoder, l)
                     elif size == 1:
                         return  map(line_decoder, output[0].splitlines(1))
                     elif size == 0:
                         return []
                     raise Exception('wrong size of diff %s' % size)
                 def _highlight_line_difflib(self, line, next_):
                     """
                     Highlight inline changes in both lines.
                     """
                     if line['action'] == 'del':
                         old, new = line, next_
                     else:
                         old, new = next_, line
                     oldwords = re.split(r'(\W)', old['line'])
                     newwords = re.split(r'(\W)', new['line'])
                     sequence = difflib.SequenceMatcher(None, oldwords, newwords)
                     oldfragments, newfragments = [], []
                     for tag, i1, i2, j1, j2 in sequence.get_opcodes():
                         oldfrag = ''.join(oldwords[i1:i2])
                         newfrag = ''.join(newwords[j1:j2])
                         if tag != 'equal':
                             if oldfrag:
                                 oldfrag = '<del>%s</del>' % oldfrag
                             if newfrag:
                                 newfrag = '<ins>%s</ins>' % newfrag
                         oldfragments.append(oldfrag)
                         newfragments.append(newfrag)
                     old['line'] = "".join(oldfragments)
                     new['line'] = "".join(newfragments)
                 def _highlight_line_udiff(self, line, next_):
                     """
                     Highlight inline changes in both lines.
                     """
                     start = 0
                     limit = min(len(line['line']), len(next_['line']))
                     while start < limit and line['line'][start] == next_['line'][start]:
                         start += 1
                     end = -1
                     limit -= start
                     while -end <= limit and line['line'][end] == next_['line'][end]:
                         end -= 1
                     end += 1
                     if start or end:
                         def do(l):
                             last = end + len(l['line'])
                             if l['action'] == 'add':
                                 tag = 'ins'
                             else:
                                 tag = 'del'
                             l['line'] = '%s<%s>%s</%s>%s' % (
                                 l['line'][:start],
                                 tag,
                                 l['line'][start:last],
                                 tag,
                                 l['line'][last:]
                             )
                         do(line)
                         do(next_)
                 def _parse_udiff(self, inline_diff=True):
                     """
                     Parse the diff an return data for the template.
                     """
                     lineiter = self.lines
                     files = []
                     try:
                         line = lineiter.next()
                         while 1:
                             # continue until we found the old file
                             if not line.startswith('--- '):
                                 line = lineiter.next()
                                 continue
                             chunks = []
                             stats = [0, 0]
                             operation, filename, old_rev, new_rev = \
                                 self._extract_rev(line, lineiter.next())
                             files.append({
                                 'filename':         filename,
                                 'old_revision':     old_rev,
                                 'new_revision':     new_rev,
                                 'chunks':           chunks,
                                 'operation':        operation,
                                 'stats':            stats,
                             })
                             line = lineiter.next()
                             while line:
                                 match = self._chunk_re.match(line)
                                 if not match:
                                     break
                                 lines = []
                                 chunks.append(lines)
                                 old_line, old_end, new_line, new_end = \
                                     [int(x or 1) for x in match.groups()[:-1]]
                                 old_line -= 1
                                 new_line -= 1
                                 gr = match.groups()
                                 context = len(gr) == 5
                                 old_end += old_line
                                 new_end += new_line
                                 if context:
                                     # skip context only if it's first line
                                     if int(gr[0]) > 1:
                                         lines.append({
                                             'old_lineno': '...',
                                             'new_lineno': '...',
                                             'action':     'context',
                                             'line':       line,
                                         })
                                 line = lineiter.next()
                                 while old_line < old_end or new_line < new_end:
                                     if line:
                                         command = line[0]
                                         if command in ['+', '-', ' ']:
                                             #only modify the line if it's actually a diff
                                             # thing
                                             line = line[1:]
                                     else:
                                         command = ' '
                                     affects_old = affects_new = False
                                     # ignore those if we don't expect them
                                     if command in '#@':
                                         continue
                                     elif command == '+':
                                         affects_new = True
                                         action = 'add'
                                         stats[0] += 1
                                     elif command == '-':
                                         affects_old = True
                                         action = 'del'
                                         stats[1] += 1
                                     else:
                                         affects_old = affects_new = True
                                         action = 'unmod'
                                     if line != self._newline_marker:
                                         old_line += affects_old
                                         new_line += affects_new
                                         lines.append({
                                             'old_lineno':   affects_old and old_line or '',
                                             'new_lineno':   affects_new and new_line or '',
                                             'action':       action,
                                             'line':         line
                                         })
                                     line = lineiter.next()
                                     if line == self._newline_marker:
                                         # we need to append to lines, since this is not
                                         # counted in the line specs of diff
                                         lines.append({
                                             'old_lineno':   '...',
                                             'new_lineno':   '...',
                                             'action':       'context',
                                             'line':         line
                                         })
                     except StopIteration:
                         pass
                     sorter = lambda info: {'A': 0, 'M': 1, 'D': 2}.get(info['operation'])
                     if inline_diff is False:
                         return sorted(files, key=sorter)
                     # highlight inline changes
                     for diff_data in files:
                         for chunk in diff_data['chunks']:
                             lineiter = iter(chunk)
                             try:
                                 while 1:
                                     line = lineiter.next()
                                     if line['action'] not in ['unmod', 'context']:
                                         nextline = lineiter.next()
                                         if nextline['action'] in ['unmod', 'context'] or \
                                            nextline['action'] == line['action']:
                                             continue
                                         self.differ(line, nextline)
                             except StopIteration:
                                 pass
                     return sorted(files, key=sorter)
                 def prepare(self, inline_diff=True):
                     """
                     Prepare the passed udiff for HTML rendering. It'l return a list
                     of dicts
                     """
                     return self._parse_udiff(inline_diff=inline_diff)
                 def _safe_id(self, idstring):
                     """Make a string safe for including in an id attribute.
                     The HTML spec says that id attributes 'must begin with
                     a letter ([A-Za-z]) and may be followed by any number
                     of letters, digits ([0-9]), hyphens ("-"), underscores
                     ("_"), colons (":"), and periods (".")'. These regexps
                     are slightly over-zealous, in that they remove colons
                     and periods unnecessarily.
                     Whitespace is transformed into underscores, and then
                     anything which is not a hyphen or a character that
                     matches \w (alphanumerics and underscore) is removed.
                     """
                     # Transform all whitespace to underscore
                     idstring = re.sub(r'\s', "_", '%s' % idstring)
                     # Remove everything that is not a hyphen or a member of \w
                     idstring = re.sub(r'(?!-)\W', "", idstring).lower()
                     return idstring
                 def raw_diff(self):
                     """
                     Returns raw string as udiff
                     """
                     udiff_copy = self.copy_iterator()
                     if self.__format == 'gitdiff':
                         udiff_copy = self._parse_gitdiff(udiff_copy)
                     return u''.join(udiff_copy)
                 def as_html(self, table_class='code-difftable', line_class='line',
                             new_lineno_class='lineno old', old_lineno_class='lineno new',
                             code_class='code', enable_comments=False, diff_lines=None):
                     """
                     Return given diff as html table with customized css classes
                     """
                     def _link_to_if(condition, label, url):
                         """
                         Generates a link if condition is meet or just the label if not.
                         """
                         if condition:
                             return '''<a href="%(url)s">%(label)s</a>''' % {
                                 'url': url,
                                 'label': label
                             }
                         else:
                             return label
                     if diff_lines is None:
                         diff_lines = self.prepare()
                     _html_empty = True
                     _html = []
                     _html.append('''<table class="%(table_class)s">\n''' % {
                         'table_class': table_class
                     })
                     for diff in diff_lines:
                         for line in diff['chunks']:
                             _html_empty = False
                             for change in line:
                                 _html.append('''<tr class="%(lc)s %(action)s">\n''' % {
                                     'lc': line_class,
                                     'action': change['action']
                                 })
                                 anchor_old_id = ''
                                 anchor_new_id = ''
                                 anchor_old = "%(filename)s_o%(oldline_no)s" % {
                                     'filename': self._safe_id(diff['filename']),
                                     'oldline_no': change['old_lineno']
                                 }
                                 anchor_new = "%(filename)s_n%(oldline_no)s" % {
                                     'filename': self._safe_id(diff['filename']),
                                     'oldline_no': change['new_lineno']
                                 }
                                 cond_old = (change['old_lineno'] != '...' and
                                             change['old_lineno'])
                                 cond_new = (change['new_lineno'] != '...' and
                                             change['new_lineno'])
                                 if cond_old:
                                     anchor_old_id = 'id="%s"' % anchor_old
                                 if cond_new:
                                     anchor_new_id = 'id="%s"' % anchor_new
                                 ###########################################################
                                 # OLD LINE NUMBER
                                 ###########################################################
                                 _html.append('''\t<td %(a_id)s class="%(olc)s">''' % {
                                     'a_id': anchor_old_id,
                                     'olc': old_lineno_class
                                 })
                                 _html.append('''%(link)s''' % {
                                     'link': _link_to_if(True, change['old_lineno'],
                                                         '#%s' % anchor_old)
                                 })
                                 _html.append('''</td>\n''')
                                 ###########################################################
                                 # NEW LINE NUMBER
                                 ###########################################################
                                 _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
                                     'a_id': anchor_new_id,
                                     'nlc': new_lineno_class
                                 })
                                 _html.append('''%(link)s''' % {
                                     'link': _link_to_if(True, change['new_lineno'],
                                                         '#%s' % anchor_new)
                                 })
                                 _html.append('''</td>\n''')
                                 ###########################################################
                                 # CODE
                                 ###########################################################
                                 comments = '' if enable_comments else 'no-comment'
                                 _html.append('''\t<td class="%(cc)s %(inc)s">''' % {
                                     'cc': code_class,
                                     'inc': comments
                                 })
                                 _html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {
                                     'code': change['line']
                                 })
                                 _html.append('''\t</td>''')
                                 _html.append('''\n</tr>\n''')
                     _html.append('''</table>''')
                     if _html_empty:
                         return None
                     return ''.join(_html)
                 def stat(self):
                     """
                     Returns tuple of added, and removed lines for this instance
                     """
                     return self.adds, self.removes
             class InMemoryBundleRepo(bundlerepository):
                 def __init__(self, ui, path, bundlestream):
                     self._tempparent = None
                     localrepo.localrepository.__init__(self, ui, path)
                     self.ui.setconfig('phases', 'publish', False)
                     self.bundle = bundlestream
                     # dict with the mapping 'filename' -> position in the bundle
                     self.bundlefilespos = {}
             def differ(org_repo, org_ref, other_repo, other_ref, discovery_data=None):
                 """
                 General differ between branches, bookmarks or separate but releated
                 repositories
                 :param org_repo:
                 :type org_repo:
                 :param org_ref:
                 :type org_ref:
                 :param other_repo:
                 :type other_repo:
                 :param other_ref:
                 :type other_ref:
                 """
                 bundlerepo = None
                 ignore_whitespace = False
                 context = 3
                 org_repo = org_repo.scm_instance._repo
                 other_repo = other_repo.scm_instance._repo
                 opts = diffopts(git=True, ignorews=ignore_whitespace, context=context)
                 org_ref = org_ref[1]
                 other_ref = other_ref[1]
                 if org_repo != other_repo:
                     common, incoming, rheads = discovery_data
                     other_repo_peer = localrepo.locallegacypeer(other_repo.local())
                     # create a bundle (uncompressed if other repo is not local)
                     if other_repo_peer.capable('getbundle') and incoming:
                         # disable repo hooks here since it's just bundle !
                         # patch and reset hooks section of UI config to not run any
                         # hooks on fetching archives with subrepos
                         for k, _ in other_repo.ui.configitems('hooks'):
                             other_repo.ui.setconfig('hooks', k, None)
                         unbundle = other_repo.getbundle('incoming', common=common,
                                                         heads=None)
                         buf = BytesIO()
                         while True:
                             chunk = unbundle._stream.read(1024 * 4)
                             if not chunk:
                                 break
                             buf.write(chunk)
                         buf.seek(0)
                         # replace chunked _stream with data that can do tell() and seek()
                         unbundle._stream = buf
                         ui = make_ui('db')
                         bundlerepo = InMemoryBundleRepo(ui, path=org_repo.root,
                                                         bundlestream=unbundle)
                     return ''.join(patch.diff(bundlerepo or org_repo,
                                               node1=org_repo[org_ref].node(),
                                               node2=other_repo[other_ref].node(),
                                               opts=opts))
                 else:
                     return ''.join(patch.diff(org_repo, node1=org_ref, node2=other_ref,
                                               opts=opts))

rhodecode/lib/vcs/utils/diffs.py

0 +2 -1

             # -*- coding: utf-8 -*-
             # original copyright: 2007-2008 by Armin Ronacher
             # licensed under the BSD license.
             import re
             import difflib
             import logging
             from difflib import unified_diff
             from itertools import tee, imap
             from mercurial.match import match
             from rhodecode.lib.vcs.exceptions import VCSError
             from rhodecode.lib.vcs.nodes import FileNode, NodeError
+            from rhodecode.lib.vcs.utils import safe_unicode
             def get_udiff(filenode_old, filenode_new, show_whitespace=True):
                 """
                 Returns unified diff between given ``filenode_old`` and ``filenode_new``.
                 """
                 try:
                     filenode_old_date = filenode_old.changeset.date
                 except NodeError:
                     filenode_old_date = None
                 try:
                     filenode_new_date = filenode_new.changeset.date
                 except NodeError:
                     filenode_new_date = None
                 for filenode in (filenode_old, filenode_new):
                     if not isinstance(filenode, FileNode):
                         raise VCSError("Given object should be FileNode object, not %s"
                             % filenode.__class__)
                 if filenode_old_date and filenode_new_date:
                     if not filenode_old_date < filenode_new_date:
                         logging.debug("Generating udiff for filenodes with not increasing "
                             "dates")
                 vcs_udiff = unified_diff(filenode_old.content.splitlines(True),
                                            filenode_new.content.splitlines(True),
                                            filenode_old.name,
                                            filenode_new.name,
                                            filenode_old_date,
                                            filenode_old_date)
                 return vcs_udiff
             def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True):
                 """
                 Returns git style diff between given ``filenode_old`` and ``filenode_new``.
                 :param ignore_whitespace: ignore whitespaces in diff
                 """
                 for filenode in (filenode_old, filenode_new):
                     if not isinstance(filenode, FileNode):
                         raise VCSError("Given object should be FileNode object, not %s"
                             % filenode.__class__)
                 old_raw_id = getattr(filenode_old.changeset, 'raw_id', '0' * 40)
                 new_raw_id = getattr(filenode_new.changeset, 'raw_id', '0' * 40)
                 repo = filenode_new.changeset.repository
                 vcs_gitdiff = repo._get_diff(old_raw_id, new_raw_id, filenode_new.path,
                                              ignore_whitespace)
                 return vcs_gitdiff
             class DiffProcessor(object):
                 """
                 Give it a unified diff and it returns a list of the files that were
                 mentioned in the diff together with a dict of meta information that
                 can be used to render it in a HTML template.
                 """
                 _chunk_re = re.compile(r'@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
                 def __init__(self, diff, differ='diff', format='udiff'):
                     """
                     :param diff:   a text in diff format or generator
                     :param format: format of diff passed, `udiff` or `gitdiff`
                     """
                     if isinstance(diff, basestring):
                         diff = [diff]
                     self.__udiff = diff
                     self.__format = format
                     self.adds = 0
                     self.removes = 0
                     if isinstance(self.__udiff, basestring):
                         self.lines = iter(self.__udiff.splitlines(1))
                     elif self.__format == 'gitdiff':
                         udiff_copy = self.copy_iterator()
                         self.lines = imap(self.escaper, self._parse_gitdiff(udiff_copy))
                     else:
                         udiff_copy = self.copy_iterator()
                         self.lines = imap(self.escaper, udiff_copy)
                     # Select a differ.
                     if differ == 'difflib':
                         self.differ = self._highlight_line_difflib
                     else:
                         self.differ = self._highlight_line_udiff
                 def escaper(self, string):
                     return string.replace('<', '&lt;').replace('>', '&gt;')
                 def copy_iterator(self):
                     """
                     make a fresh copy of generator, we should not iterate thru
                     an original as it's needed for repeating operations on
                     this instance of DiffProcessor
                     """
                     self.__udiff, iterator_copy = tee(self.__udiff)
                     return iterator_copy
                 def _extract_rev(self, line1, line2):
                     """
                     Extract the filename and revision hint from a line.
                     """
                     try:
                         if line1.startswith('--- ') and line2.startswith('+++ '):
                             l1 = line1[4:].split(None, 1)
                             old_filename = l1[0].lstrip('a/') if len(l1) >= 1 else None
                             old_rev = l1[1] if len(l1) == 2 else 'old'
                             l2 = line2[4:].split(None, 1)
                             new_filename = l2[0].lstrip('b/') if len(l1) >= 1 else None
                             new_rev = l2[1] if len(l2) == 2 else 'new'
                             filename = old_filename if (old_filename !=
                                                         'dev/null') else new_filename
                             return filename, new_rev, old_rev
                     except (ValueError, IndexError):
                         pass
                     return None, None, None
                 def _parse_gitdiff(self, diffiterator):
                     def line_decoder(l):
                         if l.startswith('+') and not l.startswith('+++'):
                             self.adds += 1
                         elif l.startswith('-') and not l.startswith('---'):
                             self.removes += 1
-                        return l.decode('utf8', 'replace')
+                        return safe_unicode(l)
                     output = list(diffiterator)
                     size = len(output)
                     if size == 2:
                         l = []
                         l.extend([output[0]])
                         l.extend(output[1].splitlines(1))
                         return map(line_decoder, l)
                     elif size == 1:
                         return  map(line_decoder, output[0].splitlines(1))
                     elif size == 0:
                         return []
                     raise Exception('wrong size of diff %s' % size)
                 def _highlight_line_difflib(self, line, next):
                     """
                     Highlight inline changes in both lines.
                     """
                     if line['action'] == 'del':
                         old, new = line, next
                     else:
                         old, new = next, line
                     oldwords = re.split(r'(\W)', old['line'])
                     newwords = re.split(r'(\W)', new['line'])
                     sequence = difflib.SequenceMatcher(None, oldwords, newwords)
                     oldfragments, newfragments = [], []
                     for tag, i1, i2, j1, j2 in sequence.get_opcodes():
                         oldfrag = ''.join(oldwords[i1:i2])
                         newfrag = ''.join(newwords[j1:j2])
                         if tag != 'equal':
                             if oldfrag:
                                 oldfrag = '<del>%s</del>' % oldfrag
                             if newfrag:
                                 newfrag = '<ins>%s</ins>' % newfrag
                         oldfragments.append(oldfrag)
                         newfragments.append(newfrag)
                     old['line'] = "".join(oldfragments)
                     new['line'] = "".join(newfragments)
                 def _highlight_line_udiff(self, line, next):
                     """
                     Highlight inline changes in both lines.
                     """
                     start = 0
                     limit = min(len(line['line']), len(next['line']))
                     while start < limit and line['line'][start] == next['line'][start]:
                         start += 1
                     end = -1
                     limit -= start
                     while -end <= limit and line['line'][end] == next['line'][end]:
                         end -= 1
                     end += 1
                     if start or end:
                         def do(l):
                             last = end + len(l['line'])
                             if l['action'] == 'add':
                                 tag = 'ins'
                             else:
                                 tag = 'del'
                             l['line'] = '%s<%s>%s</%s>%s' % (
                                 l['line'][:start],
                                 tag,
                                 l['line'][start:last],
                                 tag,
                                 l['line'][last:]
                             )
                         do(line)
                         do(next)
                 def _parse_udiff(self):
                     """
                     Parse the diff an return data for the template.
                     """
                     lineiter = self.lines
                     files = []
                     try:
                         line = lineiter.next()
                         # skip first context
                         skipfirst = True
                         while 1:
                             # continue until we found the old file
                             if not line.startswith('--- '):
                                 line = lineiter.next()
                                 continue
                             chunks = []
                             filename, old_rev, new_rev = \
                                 self._extract_rev(line, lineiter.next())
                             files.append({
                                 'filename':         filename,
                                 'old_revision':     old_rev,
                                 'new_revision':     new_rev,
                                 'chunks':           chunks
                             })
                             line = lineiter.next()
                             while line:
                                 match = self._chunk_re.match(line)
                                 if not match:
                                     break
                                 lines = []
                                 chunks.append(lines)
                                 old_line, old_end, new_line, new_end = \
                                     [int(x or 1) for x in match.groups()[:-1]]
                                 old_line -= 1
                                 new_line -= 1
                                 context = len(match.groups()) == 5
                                 old_end += old_line
                                 new_end += new_line
                                 if context:
                                     if not skipfirst:
                                         lines.append({
                                             'old_lineno': '...',
                                             'new_lineno': '...',
                                             'action': 'context',
                                             'line': line,
                                         })
                                     else:
                                         skipfirst = False
                                 line = lineiter.next()
                                 while old_line < old_end or new_line < new_end:
                                     if line:
                                         command, line = line[0], line[1:]
                                     else:
                                         command = ' '
                                     affects_old = affects_new = False
                                     # ignore those if we don't expect them
                                     if command in '#@':
                                         continue
                                     elif command == '+':
                                         affects_new = True
                                         action = 'add'
                                     elif command == '-':
                                         affects_old = True
                                         action = 'del'
                                     else:
                                         affects_old = affects_new = True
                                         action = 'unmod'
                                     old_line += affects_old
                                     new_line += affects_new
                                     lines.append({
                                         'old_lineno':   affects_old and old_line or '',
                                         'new_lineno':   affects_new and new_line or '',
                                         'action':       action,
                                         'line':         line
                                     })
                                     line = lineiter.next()
                     except StopIteration:
                         pass
                     # highlight inline changes
                     for file in files:
                         for chunk in chunks:
                             lineiter = iter(chunk)
                             #first = True
                             try:
                                 while 1:
                                     line = lineiter.next()
                                     if line['action'] != 'unmod':
                                         nextline = lineiter.next()
                                         if nextline['action'] == 'unmod' or \
                                            nextline['action'] == line['action']:
                                             continue
                                         self.differ(line, nextline)
                             except StopIteration:
                                 pass
                     return files
                 def prepare(self):
                     """
                     Prepare the passed udiff for HTML rendering. It'l return a list
                     of dicts
                     """
                     return self._parse_udiff()
                 def _safe_id(self, idstring):
                     """Make a string safe for including in an id attribute.
                     The HTML spec says that id attributes 'must begin with
                     a letter ([A-Za-z]) and may be followed by any number
                     of letters, digits ([0-9]), hyphens ("-"), underscores
                     ("_"), colons (":"), and periods (".")'. These regexps
                     are slightly over-zealous, in that they remove colons
                     and periods unnecessarily.
                     Whitespace is transformed into underscores, and then
                     anything which is not a hyphen or a character that
                     matches \w (alphanumerics and underscore) is removed.
                     """
                     # Transform all whitespace to underscore
                     idstring = re.sub(r'\s', "_", '%s' % idstring)
                     # Remove everything that is not a hyphen or a member of \w
                     idstring = re.sub(r'(?!-)\W', "", idstring).lower()
                     return idstring
                 def raw_diff(self):
                     """
                     Returns raw string as udiff
                     """
                     udiff_copy = self.copy_iterator()
                     if self.__format == 'gitdiff':
                         udiff_copy = self._parse_gitdiff(udiff_copy)
                     return u''.join(udiff_copy)
                 def as_html(self, table_class='code-difftable', line_class='line',
                             new_lineno_class='lineno old', old_lineno_class='lineno new',
                             code_class='code'):
                     """
                     Return udiff as html table with customized css classes
                     """
                     def _link_to_if(condition, label, url):
                         """
                         Generates a link if condition is meet or just the label if not.
                         """
                         if condition:
                             return '''<a href="%(url)s">%(label)s</a>''' % {'url': url,
                                                                             'label': label}
                         else:
                             return label
                     diff_lines = self.prepare()
                     _html_empty = True
                     _html = []
                     _html.append('''<table class="%(table_class)s">\n''' \
                                                         % {'table_class': table_class})
                     for diff in diff_lines:
                         for line in diff['chunks']:
                             _html_empty = False
                             for change in line:
                                 _html.append('''<tr class="%(line_class)s %(action)s">\n''' \
                                     % {'line_class': line_class,
                                        'action': change['action']})
                                 anchor_old_id = ''
                                 anchor_new_id = ''
                                 anchor_old = "%(filename)s_o%(oldline_no)s" % \
                                             {'filename': self._safe_id(diff['filename']),
                                              'oldline_no': change['old_lineno']}
                                 anchor_new = "%(filename)s_n%(oldline_no)s" % \
                                             {'filename': self._safe_id(diff['filename']),
                                              'oldline_no': change['new_lineno']}
                                 cond_old = change['old_lineno'] != '...' and \
                                                                     change['old_lineno']
                                 cond_new = change['new_lineno'] != '...' and \
                                                                     change['new_lineno']
                                 if cond_old:
                                     anchor_old_id = 'id="%s"' % anchor_old
                                 if cond_new:
                                     anchor_new_id = 'id="%s"' % anchor_new
                                 ###########################################################
                                 # OLD LINE NUMBER
                                 ###########################################################
                                 _html.append('''\t<td %(a_id)s class="%(old_lineno_cls)s">''' \
                                                 % {'a_id': anchor_old_id,
                                                    'old_lineno_cls': old_lineno_class})
                                 _html.append('''<pre>%(link)s</pre>''' \
                                     % {'link':
                                     _link_to_if(cond_old, change['old_lineno'], '#%s' \
                                                                             % anchor_old)})
                                 _html.append('''</td>\n''')
                                 ###########################################################
                                 # NEW LINE NUMBER
                                 ###########################################################
                                 _html.append('''\t<td %(a_id)s class="%(new_lineno_cls)s">''' \
                                                 % {'a_id': anchor_new_id,
                                                    'new_lineno_cls': new_lineno_class})
                                 _html.append('''<pre>%(link)s</pre>''' \
                                     % {'link':
                                     _link_to_if(cond_new, change['new_lineno'], '#%s' \
                                                                             % anchor_new)})
                                 _html.append('''</td>\n''')
                                 ###########################################################
                                 # CODE
                                 ###########################################################
                                 _html.append('''\t<td class="%(code_class)s">''' \
                                                             % {'code_class': code_class})
                                 _html.append('''\n\t\t<pre>%(code)s</pre>\n''' \
                                                             % {'code': change['line']})
                                 _html.append('''\t</td>''')
                                 _html.append('''\n</tr>\n''')
                     _html.append('''</table>''')
                     if _html_empty:
                         return None
                     return ''.join(_html)
                 def stat(self):
                     """
                     Returns tuple of adde,and removed lines for this instance
                     """
                     return self.adds, self.removes

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages