# HG changeset patch # User Daniel Dourvaris # Date 2016-10-19 11:29:51 # Node ID 158ce501aebc2e78f486a7311633d189b754cfe9 # Parent 518666ba6fbd75c2de486432b92b7f45978c4490 diffs: replace compare controller with new html based diffs: * side/side + unified support * redesign of diff changes/operations * added button to see file before the change * auto collapses large diffs refs #4232 diff --git a/LICENSE.txt b/LICENSE.txt --- a/LICENSE.txt +++ b/LICENSE.txt @@ -14,6 +14,8 @@ permission notice: file:licenses/tornado_license.txt Copyright (c) 2015 - pygments-markdown-lexer file:licenses/pygments_markdown_lexer_license.txt + Copyright 2006 - diff_match_patch + file:licenses/diff_match_patch_license.txt All licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/licenses/diff_match_patch_license.txt b/licenses/diff_match_patch_license.txt new file mode 100644 --- /dev/null +++ b/licenses/diff_match_patch_license.txt @@ -0,0 +1,14 @@ +Copyright 2006 Google Inc. +http://code.google.com/p/google-diff-match-patch/ + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/rhodecode/controllers/compare.py b/rhodecode/controllers/compare.py --- a/rhodecode/controllers/compare.py +++ b/rhodecode/controllers/compare.py @@ -31,13 +31,14 @@ from pylons.i18n.translation import _ from rhodecode.controllers.utils import parse_path_ref, get_commit_from_ref_name from rhodecode.lib import helpers as h -from rhodecode.lib import diffs +from rhodecode.lib import diffs, codeblocks from rhodecode.lib.auth import LoginRequired, HasRepoPermissionAnyDecorator from rhodecode.lib.base import BaseRepoController, render from rhodecode.lib.utils import safe_str from rhodecode.lib.utils2 import safe_unicode, str2bool from rhodecode.lib.vcs.exceptions import ( - EmptyRepositoryError, RepositoryError, RepositoryRequirementError) + EmptyRepositoryError, RepositoryError, RepositoryRequirementError, + NodeDoesNotExistError) from rhodecode.model.db import Repository, ChangesetStatus log = logging.getLogger(__name__) @@ -78,7 +79,7 @@ class CompareController(BaseRepoControll def index(self, repo_name): c.compare_home = True c.commit_ranges = [] - c.files = [] + c.diffset = None c.limited_diff = False source_repo = c.rhodecode_db_repo.repo_name target_repo = request.GET.get('target_repo', source_repo) @@ -239,28 +240,23 @@ class CompareController(BaseRepoControll commit1=source_commit, commit2=target_commit, path1=source_path, path=target_path) diff_processor = diffs.DiffProcessor( - txtdiff, format='gitdiff', diff_limit=diff_limit, + txtdiff, format='newdiff', diff_limit=diff_limit, file_limit=file_limit, show_full_diff=c.fulldiff) _parsed = diff_processor.prepare() - c.limited_diff = False - if isinstance(_parsed, diffs.LimitedDiffContainer): - c.limited_diff = True + def _node_getter(commit): + """ Returns a function that returns a node for a commit or None """ + def get_node(fname): + try: + return commit.get_node(fname) + except NodeDoesNotExistError: + return None + return get_node - c.files = [] - c.changes = {} - c.lines_added = 0 - c.lines_deleted = 0 - for f in _parsed: - st = f['stats'] - if not st['binary']: - c.lines_added += st['added'] - c.lines_deleted += st['deleted'] - fid = h.FID('', f['filename']) - c.files.append([fid, f['operation'], f['filename'], f['stats'], f]) - htmldiff = diff_processor.as_html( - enable_comments=False, parsed_lines=[f]) - c.changes[fid] = [f['operation'], f['filename'], htmldiff, f] + c.diffset = codeblocks.DiffSet( + source_node_getter=_node_getter(source_commit), + target_node_getter=_node_getter(target_commit), + ).render_patchset(_parsed, source_ref, target_ref) c.preview_mode = merge diff --git a/rhodecode/lib/codeblocks.py b/rhodecode/lib/codeblocks.py --- a/rhodecode/lib/codeblocks.py +++ b/rhodecode/lib/codeblocks.py @@ -19,13 +19,17 @@ # and proprietary license terms, please see https://rhodecode.com/licenses/ import logging +import difflib from itertools import groupby from pygments import lex from pygments.formatters.html import _get_ttype_class as pygment_token_class -from rhodecode.lib.helpers import get_lexer_for_filenode, html_escape +from rhodecode.lib.helpers import ( + get_lexer_for_filenode, get_lexer_safe, html_escape) from rhodecode.lib.utils2 import AttributeDict from rhodecode.lib.vcs.nodes import FileNode +from rhodecode.lib.diff_match_patch import diff_match_patch +from rhodecode.lib.diffs import LimitedDiffContainer from pygments.lexers import get_lexer_by_name plain_text_lexer = get_lexer_by_name( @@ -38,7 +42,7 @@ log = logging.getLogger() def filenode_as_lines_tokens(filenode, lexer=None): lexer = lexer or get_lexer_for_filenode(filenode) log.debug('Generating file node pygment tokens for %s, %s', lexer, filenode) - tokens = tokenize_string(filenode.content, get_lexer_for_filenode(filenode)) + tokens = tokenize_string(filenode.content, lexer) lines = split_token_stream(tokens, split_string='\n') rv = list(lines) return rv @@ -146,7 +150,11 @@ def render_tokenstream(tokenstream): result.append(u'<%s>' % op_tag) escaped_text = html_escape(token_text) - escaped_text = escaped_text.replace('\n', '\n') + + # TODO: dan: investigate showing hidden characters like space/nl/tab + # escaped_text = escaped_text.replace(' ', ' ') + # escaped_text = escaped_text.replace('\n', '\n') + # escaped_text = escaped_text.replace('\t', '\t') result.append(escaped_text) @@ -212,3 +220,416 @@ def rollup_tokenstream(tokenstream): ops.append((token_op, ''.join(text_buffer))) result.append((token_class, ops)) return result + + +def tokens_diff(old_tokens, new_tokens, use_diff_match_patch=True): + """ + Converts a list of (token_class, token_text) tuples to a list of + (token_class, token_op, token_text) tuples where token_op is one of + ('ins', 'del', '') + + :param old_tokens: list of (token_class, token_text) tuples of old line + :param new_tokens: list of (token_class, token_text) tuples of new line + :param use_diff_match_patch: boolean, will use google's diff match patch + library which has options to 'smooth' out the character by character + differences making nicer ins/del blocks + """ + + old_tokens_result = [] + new_tokens_result = [] + + similarity = difflib.SequenceMatcher(None, + ''.join(token_text for token_class, token_text in old_tokens), + ''.join(token_text for token_class, token_text in new_tokens) + ).ratio() + + if similarity < 0.6: # return, the blocks are too different + for token_class, token_text in old_tokens: + old_tokens_result.append((token_class, '', token_text)) + for token_class, token_text in new_tokens: + new_tokens_result.append((token_class, '', token_text)) + return old_tokens_result, new_tokens_result, similarity + + token_sequence_matcher = difflib.SequenceMatcher(None, + [x[1] for x in old_tokens], + [x[1] for x in new_tokens]) + + for tag, o1, o2, n1, n2 in token_sequence_matcher.get_opcodes(): + # check the differences by token block types first to give a more + # nicer "block" level replacement vs character diffs + + if tag == 'equal': + for token_class, token_text in old_tokens[o1:o2]: + old_tokens_result.append((token_class, '', token_text)) + for token_class, token_text in new_tokens[n1:n2]: + new_tokens_result.append((token_class, '', token_text)) + elif tag == 'delete': + for token_class, token_text in old_tokens[o1:o2]: + old_tokens_result.append((token_class, 'del', token_text)) + elif tag == 'insert': + for token_class, token_text in new_tokens[n1:n2]: + new_tokens_result.append((token_class, 'ins', token_text)) + elif tag == 'replace': + # if same type token blocks must be replaced, do a diff on the + # characters in the token blocks to show individual changes + + old_char_tokens = [] + new_char_tokens = [] + for token_class, token_text in old_tokens[o1:o2]: + for char in token_text: + old_char_tokens.append((token_class, char)) + + for token_class, token_text in new_tokens[n1:n2]: + for char in token_text: + new_char_tokens.append((token_class, char)) + + old_string = ''.join([token_text for + token_class, token_text in old_char_tokens]) + new_string = ''.join([token_text for + token_class, token_text in new_char_tokens]) + + char_sequence = difflib.SequenceMatcher( + None, old_string, new_string) + copcodes = char_sequence.get_opcodes() + obuffer, nbuffer = [], [] + + if use_diff_match_patch: + dmp = diff_match_patch() + dmp.Diff_EditCost = 11 # TODO: dan: extract this to a setting + reps = dmp.diff_main(old_string, new_string) + dmp.diff_cleanupEfficiency(reps) + + a, b = 0, 0 + for op, rep in reps: + l = len(rep) + if op == 0: + for i, c in enumerate(rep): + obuffer.append((old_char_tokens[a+i][0], '', c)) + nbuffer.append((new_char_tokens[b+i][0], '', c)) + a += l + b += l + elif op == -1: + for i, c in enumerate(rep): + obuffer.append((old_char_tokens[a+i][0], 'del', c)) + a += l + elif op == 1: + for i, c in enumerate(rep): + nbuffer.append((new_char_tokens[b+i][0], 'ins', c)) + b += l + else: + for ctag, co1, co2, cn1, cn2 in copcodes: + if ctag == 'equal': + for token_class, token_text in old_char_tokens[co1:co2]: + obuffer.append((token_class, '', token_text)) + for token_class, token_text in new_char_tokens[cn1:cn2]: + nbuffer.append((token_class, '', token_text)) + elif ctag == 'delete': + for token_class, token_text in old_char_tokens[co1:co2]: + obuffer.append((token_class, 'del', token_text)) + elif ctag == 'insert': + for token_class, token_text in new_char_tokens[cn1:cn2]: + nbuffer.append((token_class, 'ins', token_text)) + elif ctag == 'replace': + for token_class, token_text in old_char_tokens[co1:co2]: + obuffer.append((token_class, 'del', token_text)) + for token_class, token_text in new_char_tokens[cn1:cn2]: + nbuffer.append((token_class, 'ins', token_text)) + + old_tokens_result.extend(obuffer) + new_tokens_result.extend(nbuffer) + + return old_tokens_result, new_tokens_result, similarity + + +class DiffSet(object): + """ + An object for parsing the diff result from diffs.DiffProcessor and + adding highlighting, side by side/unified renderings and line diffs + """ + + HL_REAL = 'REAL' # highlights using original file, slow + HL_FAST = 'FAST' # highlights using just the line, fast but not correct + # in the case of multiline code + HL_NONE = 'NONE' # no highlighting, fastest + + def __init__(self, highlight_mode=HL_REAL, + source_node_getter=lambda filename: None, + target_node_getter=lambda filename: None, + source_nodes=None, target_nodes=None, + max_file_size_limit=150 * 1024, # files over this size will + # use fast highlighting + ): + + self.highlight_mode = highlight_mode + self.highlighted_filenodes = {} + self.source_node_getter = source_node_getter + self.target_node_getter = target_node_getter + self.source_nodes = source_nodes or {} + self.target_nodes = target_nodes or {} + + + self.max_file_size_limit = max_file_size_limit + + def render_patchset(self, patchset, source_ref=None, target_ref=None): + diffset = AttributeDict(dict( + lines_added=0, + lines_deleted=0, + changed_files=0, + files=[], + limited_diff=isinstance(patchset, LimitedDiffContainer), + source_ref=source_ref, + target_ref=target_ref, + )) + for patch in patchset: + filediff = self.render_patch(patch) + filediff.diffset = diffset + diffset.files.append(filediff) + diffset.changed_files += 1 + if not patch['stats']['binary']: + diffset.lines_added += patch['stats']['added'] + diffset.lines_deleted += patch['stats']['deleted'] + + return diffset + + _lexer_cache = {} + def _get_lexer_for_filename(self, filename): + # cached because we might need to call it twice for source/target + if filename not in self._lexer_cache: + self._lexer_cache[filename] = get_lexer_safe(filepath=filename) + return self._lexer_cache[filename] + + def render_patch(self, patch): + log.debug('rendering diff for %r' % patch['filename']) + + source_filename = patch['original_filename'] + target_filename = patch['filename'] + + source_lexer = plain_text_lexer + target_lexer = plain_text_lexer + + if not patch['stats']['binary']: + if self.highlight_mode == self.HL_REAL: + if (source_filename and patch['operation'] in ('D', 'M') + and source_filename not in self.source_nodes): + self.source_nodes[source_filename] = ( + self.source_node_getter(source_filename)) + + if (target_filename and patch['operation'] in ('A', 'M') + and target_filename not in self.target_nodes): + self.target_nodes[target_filename] = ( + self.target_node_getter(target_filename)) + + elif self.highlight_mode == self.HL_FAST: + source_lexer = self._get_lexer_for_filename(source_filename) + target_lexer = self._get_lexer_for_filename(target_filename) + + source_file = self.source_nodes.get(source_filename, source_filename) + target_file = self.target_nodes.get(target_filename, target_filename) + + source_filenode, target_filenode = None, None + + # TODO: dan: FileNode.lexer works on the content of the file - which + # can be slow - issue #4289 explains a lexer clean up - which once + # done can allow caching a lexer for a filenode to avoid the file lookup + if isinstance(source_file, FileNode): + source_filenode = source_file + source_lexer = source_file.lexer + if isinstance(target_file, FileNode): + target_filenode = target_file + target_lexer = target_file.lexer + + source_file_path, target_file_path = None, None + + if source_filename != '/dev/null': + source_file_path = source_filename + if target_filename != '/dev/null': + target_file_path = target_filename + + source_file_type = source_lexer.name + target_file_type = target_lexer.name + + op_hunks = patch['chunks'][0] + hunks = patch['chunks'][1:] + + filediff = AttributeDict({ + 'source_file_path': source_file_path, + 'target_file_path': target_file_path, + 'source_filenode': source_filenode, + 'target_filenode': target_filenode, + 'hunks': [], + 'source_file_type': target_file_type, + 'target_file_type': source_file_type, + 'patch': patch, + 'source_mode': patch['stats']['old_mode'], + 'target_mode': patch['stats']['new_mode'], + 'limited_diff': isinstance(patch, LimitedDiffContainer), + 'diffset': self, + }) + + for hunk in hunks: + hunkbit = self.parse_hunk(hunk, source_file, target_file) + hunkbit.filediff = filediff + filediff.hunks.append(hunkbit) + return filediff + + def parse_hunk(self, hunk, source_file, target_file): + result = AttributeDict(dict( + source_start=hunk['source_start'], + source_length=hunk['source_length'], + target_start=hunk['target_start'], + target_length=hunk['target_length'], + section_header=hunk['section_header'], + lines=[], + )) + before, after = [], [] + + for line in hunk['lines']: + if line['action'] == 'unmod': + result.lines.extend( + self.parse_lines(before, after, source_file, target_file)) + after.append(line) + before.append(line) + elif line['action'] == 'add': + after.append(line) + elif line['action'] == 'del': + before.append(line) + elif line['action'] == 'context-old': + before.append(line) + elif line['action'] == 'context-new': + after.append(line) + + result.lines.extend( + self.parse_lines(before, after, source_file, target_file)) + result.unified = self.as_unified(result.lines) + result.sideside = result.lines + return result + + def parse_lines(self, before_lines, after_lines, source_file, target_file): + # TODO: dan: investigate doing the diff comparison and fast highlighting + # on the entire before and after buffered block lines rather than by + # line, this means we can get better 'fast' highlighting if the context + # allows it - eg. + # line 4: """ + # line 5: this gets highlighted as a string + # line 6: """ + + lines = [] + while before_lines or after_lines: + before, after = None, None + before_tokens, after_tokens = None, None + + if before_lines: + before = before_lines.pop(0) + if after_lines: + after = after_lines.pop(0) + + original = AttributeDict() + modified = AttributeDict() + + if before: + before_tokens = self.get_line_tokens( + line_text=before['line'], line_number=before['old_lineno'], + file=source_file) + original.lineno = before['old_lineno'] + original.content = before['line'] + original.action = self.action_to_op(before['action']) + + if after: + after_tokens = self.get_line_tokens( + line_text=after['line'], line_number=after['new_lineno'], + file=target_file) + modified.lineno = after['new_lineno'] + modified.content = after['line'] + modified.action = self.action_to_op(after['action']) + + + # diff the lines + if before_tokens and after_tokens: + o_tokens, m_tokens, similarity = tokens_diff(before_tokens, after_tokens) + original.content = render_tokenstream(o_tokens) + modified.content = render_tokenstream(m_tokens) + elif before_tokens: + original.content = render_tokenstream( + [(x[0], '', x[1]) for x in before_tokens]) + elif after_tokens: + modified.content = render_tokenstream( + [(x[0], '', x[1]) for x in after_tokens]) + + lines.append(AttributeDict({ + 'original': original, + 'modified': modified, + })) + + return lines + + def get_line_tokens(self, line_text, line_number, file=None): + filenode = None + filename = None + + if isinstance(file, basestring): + filename = file + elif isinstance(file, FileNode): + filenode = file + filename = file.unicode_path + + if self.highlight_mode == self.HL_REAL and filenode: + if line_number and file.size < self.max_file_size_limit: + return self.get_tokenized_filenode_line(file, line_number) + + if self.highlight_mode in (self.HL_REAL, self.HL_FAST) and filename: + lexer = self._get_lexer_for_filename(filename) + return list(tokenize_string(line_text, lexer)) + + return list(tokenize_string(line_text, plain_text_lexer)) + + def get_tokenized_filenode_line(self, filenode, line_number): + + if filenode not in self.highlighted_filenodes: + tokenized_lines = filenode_as_lines_tokens(filenode, filenode.lexer) + self.highlighted_filenodes[filenode] = tokenized_lines + return self.highlighted_filenodes[filenode][line_number - 1] + + def action_to_op(self, action): + return { + 'add': '+', + 'del': '-', + 'unmod': ' ', + 'context-old': ' ', + 'context-new': ' ', + }.get(action, action) + + def as_unified(self, lines): + """ Return a generator that yields the lines of a diff in unified order """ + def generator(): + buf = [] + for line in lines: + + if buf and not line.original or line.original.action == ' ': + for b in buf: + yield b + buf = [] + + if line.original: + if line.original.action == ' ': + yield (line.original.lineno, line.modified.lineno, + line.original.action, line.original.content) + continue + + if line.original.action == '-': + yield (line.original.lineno, None, + line.original.action, line.original.content) + + if line.modified.action == '+': + buf.append(( + None, line.modified.lineno, + line.modified.action, line.modified.content)) + continue + + if line.modified: + yield (None, line.modified.lineno, + line.modified.action, line.modified.content) + + for b in buf: + yield b + + return generator() diff --git a/rhodecode/lib/diff_match_patch.py b/rhodecode/lib/diff_match_patch.py new file mode 100644 --- /dev/null +++ b/rhodecode/lib/diff_match_patch.py @@ -0,0 +1,1919 @@ +#!/usr/bin/python2.4 + +from __future__ import division + +"""Diff Match and Patch + +Copyright 2006 Google Inc. +http://code.google.com/p/google-diff-match-patch/ + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +"""Functions for diff, match and patch. + +Computes the difference between two texts to create a patch. +Applies the patch onto another text, allowing for errors. +""" + +__author__ = 'fraser@google.com (Neil Fraser)' + +import math +import re +import sys +import time +import urllib + +class diff_match_patch: + """Class containing the diff, match and patch methods. + + Also contains the behaviour settings. + """ + + def __init__(self): + """Inits a diff_match_patch object with default settings. + Redefine these in your program to override the defaults. + """ + + # Number of seconds to map a diff before giving up (0 for infinity). + self.Diff_Timeout = 1.0 + # Cost of an empty edit operation in terms of edit characters. + self.Diff_EditCost = 4 + # At what point is no match declared (0.0 = perfection, 1.0 = very loose). + self.Match_Threshold = 0.5 + # How far to search for a match (0 = exact location, 1000+ = broad match). + # A match this many characters away from the expected location will add + # 1.0 to the score (0.0 is a perfect match). + self.Match_Distance = 1000 + # When deleting a large block of text (over ~64 characters), how close do + # the contents have to be to match the expected contents. (0.0 = perfection, + # 1.0 = very loose). Note that Match_Threshold controls how closely the + # end points of a delete need to match. + self.Patch_DeleteThreshold = 0.5 + # Chunk size for context length. + self.Patch_Margin = 4 + + # The number of bits in an int. + # Python has no maximum, thus to disable patch splitting set to 0. + # However to avoid long patches in certain pathological cases, use 32. + # Multiple short patches (using native ints) are much faster than long ones. + self.Match_MaxBits = 32 + + # DIFF FUNCTIONS + + # The data structure representing a diff is an array of tuples: + # [(DIFF_DELETE, "Hello"), (DIFF_INSERT, "Goodbye"), (DIFF_EQUAL, " world.")] + # which means: delete "Hello", add "Goodbye" and keep " world." + DIFF_DELETE = -1 + DIFF_INSERT = 1 + DIFF_EQUAL = 0 + + def diff_main(self, text1, text2, checklines=True, deadline=None): + """Find the differences between two texts. Simplifies the problem by + stripping any common prefix or suffix off the texts before diffing. + + Args: + text1: Old string to be diffed. + text2: New string to be diffed. + checklines: Optional speedup flag. If present and false, then don't run + a line-level diff first to identify the changed areas. + Defaults to true, which does a faster, slightly less optimal diff. + deadline: Optional time when the diff should be complete by. Used + internally for recursive calls. Users should set DiffTimeout instead. + + Returns: + Array of changes. + """ + # Set a deadline by which time the diff must be complete. + if deadline == None: + # Unlike in most languages, Python counts time in seconds. + if self.Diff_Timeout <= 0: + deadline = sys.maxint + else: + deadline = time.time() + self.Diff_Timeout + + # Check for null inputs. + if text1 == None or text2 == None: + raise ValueError("Null inputs. (diff_main)") + + # Check for equality (speedup). + if text1 == text2: + if text1: + return [(self.DIFF_EQUAL, text1)] + return [] + + # Trim off common prefix (speedup). + commonlength = self.diff_commonPrefix(text1, text2) + commonprefix = text1[:commonlength] + text1 = text1[commonlength:] + text2 = text2[commonlength:] + + # Trim off common suffix (speedup). + commonlength = self.diff_commonSuffix(text1, text2) + if commonlength == 0: + commonsuffix = '' + else: + commonsuffix = text1[-commonlength:] + text1 = text1[:-commonlength] + text2 = text2[:-commonlength] + + # Compute the diff on the middle block. + diffs = self.diff_compute(text1, text2, checklines, deadline) + + # Restore the prefix and suffix. + if commonprefix: + diffs[:0] = [(self.DIFF_EQUAL, commonprefix)] + if commonsuffix: + diffs.append((self.DIFF_EQUAL, commonsuffix)) + self.diff_cleanupMerge(diffs) + return diffs + + def diff_compute(self, text1, text2, checklines, deadline): + """Find the differences between two texts. Assumes that the texts do not + have any common prefix or suffix. + + Args: + text1: Old string to be diffed. + text2: New string to be diffed. + checklines: Speedup flag. If false, then don't run a line-level diff + first to identify the changed areas. + If true, then run a faster, slightly less optimal diff. + deadline: Time when the diff should be complete by. + + Returns: + Array of changes. + """ + if not text1: + # Just add some text (speedup). + return [(self.DIFF_INSERT, text2)] + + if not text2: + # Just delete some text (speedup). + return [(self.DIFF_DELETE, text1)] + + if len(text1) > len(text2): + (longtext, shorttext) = (text1, text2) + else: + (shorttext, longtext) = (text1, text2) + i = longtext.find(shorttext) + if i != -1: + # Shorter text is inside the longer text (speedup). + diffs = [(self.DIFF_INSERT, longtext[:i]), (self.DIFF_EQUAL, shorttext), + (self.DIFF_INSERT, longtext[i + len(shorttext):])] + # Swap insertions for deletions if diff is reversed. + if len(text1) > len(text2): + diffs[0] = (self.DIFF_DELETE, diffs[0][1]) + diffs[2] = (self.DIFF_DELETE, diffs[2][1]) + return diffs + + if len(shorttext) == 1: + # Single character string. + # After the previous speedup, the character can't be an equality. + return [(self.DIFF_DELETE, text1), (self.DIFF_INSERT, text2)] + + # Check to see if the problem can be split in two. + hm = self.diff_halfMatch(text1, text2) + if hm: + # A half-match was found, sort out the return data. + (text1_a, text1_b, text2_a, text2_b, mid_common) = hm + # Send both pairs off for separate processing. + diffs_a = self.diff_main(text1_a, text2_a, checklines, deadline) + diffs_b = self.diff_main(text1_b, text2_b, checklines, deadline) + # Merge the results. + return diffs_a + [(self.DIFF_EQUAL, mid_common)] + diffs_b + + if checklines and len(text1) > 100 and len(text2) > 100: + return self.diff_lineMode(text1, text2, deadline) + + return self.diff_bisect(text1, text2, deadline) + + def diff_lineMode(self, text1, text2, deadline): + """Do a quick line-level diff on both strings, then rediff the parts for + greater accuracy. + This speedup can produce non-minimal diffs. + + Args: + text1: Old string to be diffed. + text2: New string to be diffed. + deadline: Time when the diff should be complete by. + + Returns: + Array of changes. + """ + + # Scan the text on a line-by-line basis first. + (text1, text2, linearray) = self.diff_linesToChars(text1, text2) + + diffs = self.diff_main(text1, text2, False, deadline) + + # Convert the diff back to original text. + self.diff_charsToLines(diffs, linearray) + # Eliminate freak matches (e.g. blank lines) + self.diff_cleanupSemantic(diffs) + + # Rediff any replacement blocks, this time character-by-character. + # Add a dummy entry at the end. + diffs.append((self.DIFF_EQUAL, '')) + pointer = 0 + count_delete = 0 + count_insert = 0 + text_delete = '' + text_insert = '' + while pointer < len(diffs): + if diffs[pointer][0] == self.DIFF_INSERT: + count_insert += 1 + text_insert += diffs[pointer][1] + elif diffs[pointer][0] == self.DIFF_DELETE: + count_delete += 1 + text_delete += diffs[pointer][1] + elif diffs[pointer][0] == self.DIFF_EQUAL: + # Upon reaching an equality, check for prior redundancies. + if count_delete >= 1 and count_insert >= 1: + # Delete the offending records and add the merged ones. + a = self.diff_main(text_delete, text_insert, False, deadline) + diffs[pointer - count_delete - count_insert : pointer] = a + pointer = pointer - count_delete - count_insert + len(a) + count_insert = 0 + count_delete = 0 + text_delete = '' + text_insert = '' + + pointer += 1 + + diffs.pop() # Remove the dummy entry at the end. + + return diffs + + def diff_bisect(self, text1, text2, deadline): + """Find the 'middle snake' of a diff, split the problem in two + and return the recursively constructed diff. + See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations. + + Args: + text1: Old string to be diffed. + text2: New string to be diffed. + deadline: Time at which to bail if not yet complete. + + Returns: + Array of diff tuples. + """ + + # Cache the text lengths to prevent multiple calls. + text1_length = len(text1) + text2_length = len(text2) + max_d = (text1_length + text2_length + 1) // 2 + v_offset = max_d + v_length = 2 * max_d + v1 = [-1] * v_length + v1[v_offset + 1] = 0 + v2 = v1[:] + delta = text1_length - text2_length + # If the total number of characters is odd, then the front path will + # collide with the reverse path. + front = (delta % 2 != 0) + # Offsets for start and end of k loop. + # Prevents mapping of space beyond the grid. + k1start = 0 + k1end = 0 + k2start = 0 + k2end = 0 + for d in xrange(max_d): + # Bail out if deadline is reached. + if time.time() > deadline: + break + + # Walk the front path one step. + for k1 in xrange(-d + k1start, d + 1 - k1end, 2): + k1_offset = v_offset + k1 + if k1 == -d or (k1 != d and + v1[k1_offset - 1] < v1[k1_offset + 1]): + x1 = v1[k1_offset + 1] + else: + x1 = v1[k1_offset - 1] + 1 + y1 = x1 - k1 + while (x1 < text1_length and y1 < text2_length and + text1[x1] == text2[y1]): + x1 += 1 + y1 += 1 + v1[k1_offset] = x1 + if x1 > text1_length: + # Ran off the right of the graph. + k1end += 2 + elif y1 > text2_length: + # Ran off the bottom of the graph. + k1start += 2 + elif front: + k2_offset = v_offset + delta - k1 + if k2_offset >= 0 and k2_offset < v_length and v2[k2_offset] != -1: + # Mirror x2 onto top-left coordinate system. + x2 = text1_length - v2[k2_offset] + if x1 >= x2: + # Overlap detected. + return self.diff_bisectSplit(text1, text2, x1, y1, deadline) + + # Walk the reverse path one step. + for k2 in xrange(-d + k2start, d + 1 - k2end, 2): + k2_offset = v_offset + k2 + if k2 == -d or (k2 != d and + v2[k2_offset - 1] < v2[k2_offset + 1]): + x2 = v2[k2_offset + 1] + else: + x2 = v2[k2_offset - 1] + 1 + y2 = x2 - k2 + while (x2 < text1_length and y2 < text2_length and + text1[-x2 - 1] == text2[-y2 - 1]): + x2 += 1 + y2 += 1 + v2[k2_offset] = x2 + if x2 > text1_length: + # Ran off the left of the graph. + k2end += 2 + elif y2 > text2_length: + # Ran off the top of the graph. + k2start += 2 + elif not front: + k1_offset = v_offset + delta - k2 + if k1_offset >= 0 and k1_offset < v_length and v1[k1_offset] != -1: + x1 = v1[k1_offset] + y1 = v_offset + x1 - k1_offset + # Mirror x2 onto top-left coordinate system. + x2 = text1_length - x2 + if x1 >= x2: + # Overlap detected. + return self.diff_bisectSplit(text1, text2, x1, y1, deadline) + + # Diff took too long and hit the deadline or + # number of diffs equals number of characters, no commonality at all. + return [(self.DIFF_DELETE, text1), (self.DIFF_INSERT, text2)] + + def diff_bisectSplit(self, text1, text2, x, y, deadline): + """Given the location of the 'middle snake', split the diff in two parts + and recurse. + + Args: + text1: Old string to be diffed. + text2: New string to be diffed. + x: Index of split point in text1. + y: Index of split point in text2. + deadline: Time at which to bail if not yet complete. + + Returns: + Array of diff tuples. + """ + text1a = text1[:x] + text2a = text2[:y] + text1b = text1[x:] + text2b = text2[y:] + + # Compute both diffs serially. + diffs = self.diff_main(text1a, text2a, False, deadline) + diffsb = self.diff_main(text1b, text2b, False, deadline) + + return diffs + diffsb + + def diff_linesToChars(self, text1, text2): + """Split two texts into an array of strings. Reduce the texts to a string + of hashes where each Unicode character represents one line. + + Args: + text1: First string. + text2: Second string. + + Returns: + Three element tuple, containing the encoded text1, the encoded text2 and + the array of unique strings. The zeroth element of the array of unique + strings is intentionally blank. + """ + lineArray = [] # e.g. lineArray[4] == "Hello\n" + lineHash = {} # e.g. lineHash["Hello\n"] == 4 + + # "\x00" is a valid character, but various debuggers don't like it. + # So we'll insert a junk entry to avoid generating a null character. + lineArray.append('') + + def diff_linesToCharsMunge(text): + """Split a text into an array of strings. Reduce the texts to a string + of hashes where each Unicode character represents one line. + Modifies linearray and linehash through being a closure. + + Args: + text: String to encode. + + Returns: + Encoded string. + """ + chars = [] + # Walk the text, pulling out a substring for each line. + # text.split('\n') would would temporarily double our memory footprint. + # Modifying text would create many large strings to garbage collect. + lineStart = 0 + lineEnd = -1 + while lineEnd < len(text) - 1: + lineEnd = text.find('\n', lineStart) + if lineEnd == -1: + lineEnd = len(text) - 1 + line = text[lineStart:lineEnd + 1] + lineStart = lineEnd + 1 + + if line in lineHash: + chars.append(unichr(lineHash[line])) + else: + lineArray.append(line) + lineHash[line] = len(lineArray) - 1 + chars.append(unichr(len(lineArray) - 1)) + return "".join(chars) + + chars1 = diff_linesToCharsMunge(text1) + chars2 = diff_linesToCharsMunge(text2) + return (chars1, chars2, lineArray) + + def diff_charsToLines(self, diffs, lineArray): + """Rehydrate the text in a diff from a string of line hashes to real lines + of text. + + Args: + diffs: Array of diff tuples. + lineArray: Array of unique strings. + """ + for x in xrange(len(diffs)): + text = [] + for char in diffs[x][1]: + text.append(lineArray[ord(char)]) + diffs[x] = (diffs[x][0], "".join(text)) + + def diff_commonPrefix(self, text1, text2): + """Determine the common prefix of two strings. + + Args: + text1: First string. + text2: Second string. + + Returns: + The number of characters common to the start of each string. + """ + # Quick check for common null cases. + if not text1 or not text2 or text1[0] != text2[0]: + return 0 + # Binary search. + # Performance analysis: http://neil.fraser.name/news/2007/10/09/ + pointermin = 0 + pointermax = min(len(text1), len(text2)) + pointermid = pointermax + pointerstart = 0 + while pointermin < pointermid: + if text1[pointerstart:pointermid] == text2[pointerstart:pointermid]: + pointermin = pointermid + pointerstart = pointermin + else: + pointermax = pointermid + pointermid = (pointermax - pointermin) // 2 + pointermin + return pointermid + + def diff_commonSuffix(self, text1, text2): + """Determine the common suffix of two strings. + + Args: + text1: First string. + text2: Second string. + + Returns: + The number of characters common to the end of each string. + """ + # Quick check for common null cases. + if not text1 or not text2 or text1[-1] != text2[-1]: + return 0 + # Binary search. + # Performance analysis: http://neil.fraser.name/news/2007/10/09/ + pointermin = 0 + pointermax = min(len(text1), len(text2)) + pointermid = pointermax + pointerend = 0 + while pointermin < pointermid: + if (text1[-pointermid:len(text1) - pointerend] == + text2[-pointermid:len(text2) - pointerend]): + pointermin = pointermid + pointerend = pointermin + else: + pointermax = pointermid + pointermid = (pointermax - pointermin) // 2 + pointermin + return pointermid + + def diff_commonOverlap(self, text1, text2): + """Determine if the suffix of one string is the prefix of another. + + Args: + text1 First string. + text2 Second string. + + Returns: + The number of characters common to the end of the first + string and the start of the second string. + """ + # Cache the text lengths to prevent multiple calls. + text1_length = len(text1) + text2_length = len(text2) + # Eliminate the null case. + if text1_length == 0 or text2_length == 0: + return 0 + # Truncate the longer string. + if text1_length > text2_length: + text1 = text1[-text2_length:] + elif text1_length < text2_length: + text2 = text2[:text1_length] + text_length = min(text1_length, text2_length) + # Quick check for the worst case. + if text1 == text2: + return text_length + + # Start by looking for a single character match + # and increase length until no match is found. + # Performance analysis: http://neil.fraser.name/news/2010/11/04/ + best = 0 + length = 1 + while True: + pattern = text1[-length:] + found = text2.find(pattern) + if found == -1: + return best + length += found + if found == 0 or text1[-length:] == text2[:length]: + best = length + length += 1 + + def diff_halfMatch(self, text1, text2): + """Do the two texts share a substring which is at least half the length of + the longer text? + This speedup can produce non-minimal diffs. + + Args: + text1: First string. + text2: Second string. + + Returns: + Five element Array, containing the prefix of text1, the suffix of text1, + the prefix of text2, the suffix of text2 and the common middle. Or None + if there was no match. + """ + if self.Diff_Timeout <= 0: + # Don't risk returning a non-optimal diff if we have unlimited time. + return None + if len(text1) > len(text2): + (longtext, shorttext) = (text1, text2) + else: + (shorttext, longtext) = (text1, text2) + if len(longtext) < 4 or len(shorttext) * 2 < len(longtext): + return None # Pointless. + + def diff_halfMatchI(longtext, shorttext, i): + """Does a substring of shorttext exist within longtext such that the + substring is at least half the length of longtext? + Closure, but does not reference any external variables. + + Args: + longtext: Longer string. + shorttext: Shorter string. + i: Start index of quarter length substring within longtext. + + Returns: + Five element Array, containing the prefix of longtext, the suffix of + longtext, the prefix of shorttext, the suffix of shorttext and the + common middle. Or None if there was no match. + """ + seed = longtext[i:i + len(longtext) // 4] + best_common = '' + j = shorttext.find(seed) + while j != -1: + prefixLength = self.diff_commonPrefix(longtext[i:], shorttext[j:]) + suffixLength = self.diff_commonSuffix(longtext[:i], shorttext[:j]) + if len(best_common) < suffixLength + prefixLength: + best_common = (shorttext[j - suffixLength:j] + + shorttext[j:j + prefixLength]) + best_longtext_a = longtext[:i - suffixLength] + best_longtext_b = longtext[i + prefixLength:] + best_shorttext_a = shorttext[:j - suffixLength] + best_shorttext_b = shorttext[j + prefixLength:] + j = shorttext.find(seed, j + 1) + + if len(best_common) * 2 >= len(longtext): + return (best_longtext_a, best_longtext_b, + best_shorttext_a, best_shorttext_b, best_common) + else: + return None + + # First check if the second quarter is the seed for a half-match. + hm1 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 3) // 4) + # Check again based on the third quarter. + hm2 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 1) // 2) + if not hm1 and not hm2: + return None + elif not hm2: + hm = hm1 + elif not hm1: + hm = hm2 + else: + # Both matched. Select the longest. + if len(hm1[4]) > len(hm2[4]): + hm = hm1 + else: + hm = hm2 + + # A half-match was found, sort out the return data. + if len(text1) > len(text2): + (text1_a, text1_b, text2_a, text2_b, mid_common) = hm + else: + (text2_a, text2_b, text1_a, text1_b, mid_common) = hm + return (text1_a, text1_b, text2_a, text2_b, mid_common) + + def diff_cleanupSemantic(self, diffs): + """Reduce the number of edits by eliminating semantically trivial + equalities. + + Args: + diffs: Array of diff tuples. + """ + changes = False + equalities = [] # Stack of indices where equalities are found. + lastequality = None # Always equal to diffs[equalities[-1]][1] + pointer = 0 # Index of current position. + # Number of chars that changed prior to the equality. + length_insertions1, length_deletions1 = 0, 0 + # Number of chars that changed after the equality. + length_insertions2, length_deletions2 = 0, 0 + while pointer < len(diffs): + if diffs[pointer][0] == self.DIFF_EQUAL: # Equality found. + equalities.append(pointer) + length_insertions1, length_insertions2 = length_insertions2, 0 + length_deletions1, length_deletions2 = length_deletions2, 0 + lastequality = diffs[pointer][1] + else: # An insertion or deletion. + if diffs[pointer][0] == self.DIFF_INSERT: + length_insertions2 += len(diffs[pointer][1]) + else: + length_deletions2 += len(diffs[pointer][1]) + # Eliminate an equality that is smaller or equal to the edits on both + # sides of it. + if (lastequality and (len(lastequality) <= + max(length_insertions1, length_deletions1)) and + (len(lastequality) <= max(length_insertions2, length_deletions2))): + # Duplicate record. + diffs.insert(equalities[-1], (self.DIFF_DELETE, lastequality)) + # Change second copy to insert. + diffs[equalities[-1] + 1] = (self.DIFF_INSERT, + diffs[equalities[-1] + 1][1]) + # Throw away the equality we just deleted. + equalities.pop() + # Throw away the previous equality (it needs to be reevaluated). + if len(equalities): + equalities.pop() + if len(equalities): + pointer = equalities[-1] + else: + pointer = -1 + # Reset the counters. + length_insertions1, length_deletions1 = 0, 0 + length_insertions2, length_deletions2 = 0, 0 + lastequality = None + changes = True + pointer += 1 + + # Normalize the diff. + if changes: + self.diff_cleanupMerge(diffs) + self.diff_cleanupSemanticLossless(diffs) + + # Find any overlaps between deletions and insertions. + # e.g: abcxxxxxxdef + # -> abcxxxdef + # e.g: xxxabcdefxxx + # -> defxxxabc + # Only extract an overlap if it is as big as the edit ahead or behind it. + pointer = 1 + while pointer < len(diffs): + if (diffs[pointer - 1][0] == self.DIFF_DELETE and + diffs[pointer][0] == self.DIFF_INSERT): + deletion = diffs[pointer - 1][1] + insertion = diffs[pointer][1] + overlap_length1 = self.diff_commonOverlap(deletion, insertion) + overlap_length2 = self.diff_commonOverlap(insertion, deletion) + if overlap_length1 >= overlap_length2: + if (overlap_length1 >= len(deletion) / 2.0 or + overlap_length1 >= len(insertion) / 2.0): + # Overlap found. Insert an equality and trim the surrounding edits. + diffs.insert(pointer, (self.DIFF_EQUAL, + insertion[:overlap_length1])) + diffs[pointer - 1] = (self.DIFF_DELETE, + deletion[:len(deletion) - overlap_length1]) + diffs[pointer + 1] = (self.DIFF_INSERT, + insertion[overlap_length1:]) + pointer += 1 + else: + if (overlap_length2 >= len(deletion) / 2.0 or + overlap_length2 >= len(insertion) / 2.0): + # Reverse overlap found. + # Insert an equality and swap and trim the surrounding edits. + diffs.insert(pointer, (self.DIFF_EQUAL, deletion[:overlap_length2])) + diffs[pointer - 1] = (self.DIFF_INSERT, + insertion[:len(insertion) - overlap_length2]) + diffs[pointer + 1] = (self.DIFF_DELETE, deletion[overlap_length2:]) + pointer += 1 + pointer += 1 + pointer += 1 + + def diff_cleanupSemanticLossless(self, diffs): + """Look for single edits surrounded on both sides by equalities + which can be shifted sideways to align the edit to a word boundary. + e.g: The cat came. -> The cat came. + + Args: + diffs: Array of diff tuples. + """ + + def diff_cleanupSemanticScore(one, two): + """Given two strings, compute a score representing whether the + internal boundary falls on logical boundaries. + Scores range from 6 (best) to 0 (worst). + Closure, but does not reference any external variables. + + Args: + one: First string. + two: Second string. + + Returns: + The score. + """ + if not one or not two: + # Edges are the best. + return 6 + + # Each port of this function behaves slightly differently due to + # subtle differences in each language's definition of things like + # 'whitespace'. Since this function's purpose is largely cosmetic, + # the choice has been made to use each language's native features + # rather than force total conformity. + char1 = one[-1] + char2 = two[0] + nonAlphaNumeric1 = not char1.isalnum() + nonAlphaNumeric2 = not char2.isalnum() + whitespace1 = nonAlphaNumeric1 and char1.isspace() + whitespace2 = nonAlphaNumeric2 and char2.isspace() + lineBreak1 = whitespace1 and (char1 == "\r" or char1 == "\n") + lineBreak2 = whitespace2 and (char2 == "\r" or char2 == "\n") + blankLine1 = lineBreak1 and self.BLANKLINEEND.search(one) + blankLine2 = lineBreak2 and self.BLANKLINESTART.match(two) + + if blankLine1 or blankLine2: + # Five points for blank lines. + return 5 + elif lineBreak1 or lineBreak2: + # Four points for line breaks. + return 4 + elif nonAlphaNumeric1 and not whitespace1 and whitespace2: + # Three points for end of sentences. + return 3 + elif whitespace1 or whitespace2: + # Two points for whitespace. + return 2 + elif nonAlphaNumeric1 or nonAlphaNumeric2: + # One point for non-alphanumeric. + return 1 + return 0 + + pointer = 1 + # Intentionally ignore the first and last element (don't need checking). + while pointer < len(diffs) - 1: + if (diffs[pointer - 1][0] == self.DIFF_EQUAL and + diffs[pointer + 1][0] == self.DIFF_EQUAL): + # This is a single edit surrounded by equalities. + equality1 = diffs[pointer - 1][1] + edit = diffs[pointer][1] + equality2 = diffs[pointer + 1][1] + + # First, shift the edit as far left as possible. + commonOffset = self.diff_commonSuffix(equality1, edit) + if commonOffset: + commonString = edit[-commonOffset:] + equality1 = equality1[:-commonOffset] + edit = commonString + edit[:-commonOffset] + equality2 = commonString + equality2 + + # Second, step character by character right, looking for the best fit. + bestEquality1 = equality1 + bestEdit = edit + bestEquality2 = equality2 + bestScore = (diff_cleanupSemanticScore(equality1, edit) + + diff_cleanupSemanticScore(edit, equality2)) + while edit and equality2 and edit[0] == equality2[0]: + equality1 += edit[0] + edit = edit[1:] + equality2[0] + equality2 = equality2[1:] + score = (diff_cleanupSemanticScore(equality1, edit) + + diff_cleanupSemanticScore(edit, equality2)) + # The >= encourages trailing rather than leading whitespace on edits. + if score >= bestScore: + bestScore = score + bestEquality1 = equality1 + bestEdit = edit + bestEquality2 = equality2 + + if diffs[pointer - 1][1] != bestEquality1: + # We have an improvement, save it back to the diff. + if bestEquality1: + diffs[pointer - 1] = (diffs[pointer - 1][0], bestEquality1) + else: + del diffs[pointer - 1] + pointer -= 1 + diffs[pointer] = (diffs[pointer][0], bestEdit) + if bestEquality2: + diffs[pointer + 1] = (diffs[pointer + 1][0], bestEquality2) + else: + del diffs[pointer + 1] + pointer -= 1 + pointer += 1 + + # Define some regex patterns for matching boundaries. + BLANKLINEEND = re.compile(r"\n\r?\n$"); + BLANKLINESTART = re.compile(r"^\r?\n\r?\n"); + + def diff_cleanupEfficiency(self, diffs): + """Reduce the number of edits by eliminating operationally trivial + equalities. + + Args: + diffs: Array of diff tuples. + """ + changes = False + equalities = [] # Stack of indices where equalities are found. + lastequality = None # Always equal to diffs[equalities[-1]][1] + pointer = 0 # Index of current position. + pre_ins = False # Is there an insertion operation before the last equality. + pre_del = False # Is there a deletion operation before the last equality. + post_ins = False # Is there an insertion operation after the last equality. + post_del = False # Is there a deletion operation after the last equality. + while pointer < len(diffs): + if diffs[pointer][0] == self.DIFF_EQUAL: # Equality found. + if (len(diffs[pointer][1]) < self.Diff_EditCost and + (post_ins or post_del)): + # Candidate found. + equalities.append(pointer) + pre_ins = post_ins + pre_del = post_del + lastequality = diffs[pointer][1] + else: + # Not a candidate, and can never become one. + equalities = [] + lastequality = None + + post_ins = post_del = False + else: # An insertion or deletion. + if diffs[pointer][0] == self.DIFF_DELETE: + post_del = True + else: + post_ins = True + + # Five types to be split: + # ABXYCD + # AXCD + # ABXC + # AXCD + # ABXC + + if lastequality and ((pre_ins and pre_del and post_ins and post_del) or + ((len(lastequality) < self.Diff_EditCost / 2) and + (pre_ins + pre_del + post_ins + post_del) == 3)): + # Duplicate record. + diffs.insert(equalities[-1], (self.DIFF_DELETE, lastequality)) + # Change second copy to insert. + diffs[equalities[-1] + 1] = (self.DIFF_INSERT, + diffs[equalities[-1] + 1][1]) + equalities.pop() # Throw away the equality we just deleted. + lastequality = None + if pre_ins and pre_del: + # No changes made which could affect previous entry, keep going. + post_ins = post_del = True + equalities = [] + else: + if len(equalities): + equalities.pop() # Throw away the previous equality. + if len(equalities): + pointer = equalities[-1] + else: + pointer = -1 + post_ins = post_del = False + changes = True + pointer += 1 + + if changes: + self.diff_cleanupMerge(diffs) + + def diff_cleanupMerge(self, diffs): + """Reorder and merge like edit sections. Merge equalities. + Any edit section can move as long as it doesn't cross an equality. + + Args: + diffs: Array of diff tuples. + """ + diffs.append((self.DIFF_EQUAL, '')) # Add a dummy entry at the end. + pointer = 0 + count_delete = 0 + count_insert = 0 + text_delete = '' + text_insert = '' + while pointer < len(diffs): + if diffs[pointer][0] == self.DIFF_INSERT: + count_insert += 1 + text_insert += diffs[pointer][1] + pointer += 1 + elif diffs[pointer][0] == self.DIFF_DELETE: + count_delete += 1 + text_delete += diffs[pointer][1] + pointer += 1 + elif diffs[pointer][0] == self.DIFF_EQUAL: + # Upon reaching an equality, check for prior redundancies. + if count_delete + count_insert > 1: + if count_delete != 0 and count_insert != 0: + # Factor out any common prefixies. + commonlength = self.diff_commonPrefix(text_insert, text_delete) + if commonlength != 0: + x = pointer - count_delete - count_insert - 1 + if x >= 0 and diffs[x][0] == self.DIFF_EQUAL: + diffs[x] = (diffs[x][0], diffs[x][1] + + text_insert[:commonlength]) + else: + diffs.insert(0, (self.DIFF_EQUAL, text_insert[:commonlength])) + pointer += 1 + text_insert = text_insert[commonlength:] + text_delete = text_delete[commonlength:] + # Factor out any common suffixies. + commonlength = self.diff_commonSuffix(text_insert, text_delete) + if commonlength != 0: + diffs[pointer] = (diffs[pointer][0], text_insert[-commonlength:] + + diffs[pointer][1]) + text_insert = text_insert[:-commonlength] + text_delete = text_delete[:-commonlength] + # Delete the offending records and add the merged ones. + if count_delete == 0: + diffs[pointer - count_insert : pointer] = [ + (self.DIFF_INSERT, text_insert)] + elif count_insert == 0: + diffs[pointer - count_delete : pointer] = [ + (self.DIFF_DELETE, text_delete)] + else: + diffs[pointer - count_delete - count_insert : pointer] = [ + (self.DIFF_DELETE, text_delete), + (self.DIFF_INSERT, text_insert)] + pointer = pointer - count_delete - count_insert + 1 + if count_delete != 0: + pointer += 1 + if count_insert != 0: + pointer += 1 + elif pointer != 0 and diffs[pointer - 1][0] == self.DIFF_EQUAL: + # Merge this equality with the previous one. + diffs[pointer - 1] = (diffs[pointer - 1][0], + diffs[pointer - 1][1] + diffs[pointer][1]) + del diffs[pointer] + else: + pointer += 1 + + count_insert = 0 + count_delete = 0 + text_delete = '' + text_insert = '' + + if diffs[-1][1] == '': + diffs.pop() # Remove the dummy entry at the end. + + # Second pass: look for single edits surrounded on both sides by equalities + # which can be shifted sideways to eliminate an equality. + # e.g: ABAC -> ABAC + changes = False + pointer = 1 + # Intentionally ignore the first and last element (don't need checking). + while pointer < len(diffs) - 1: + if (diffs[pointer - 1][0] == self.DIFF_EQUAL and + diffs[pointer + 1][0] == self.DIFF_EQUAL): + # This is a single edit surrounded by equalities. + if diffs[pointer][1].endswith(diffs[pointer - 1][1]): + # Shift the edit over the previous equality. + diffs[pointer] = (diffs[pointer][0], + diffs[pointer - 1][1] + + diffs[pointer][1][:-len(diffs[pointer - 1][1])]) + diffs[pointer + 1] = (diffs[pointer + 1][0], + diffs[pointer - 1][1] + diffs[pointer + 1][1]) + del diffs[pointer - 1] + changes = True + elif diffs[pointer][1].startswith(diffs[pointer + 1][1]): + # Shift the edit over the next equality. + diffs[pointer - 1] = (diffs[pointer - 1][0], + diffs[pointer - 1][1] + diffs[pointer + 1][1]) + diffs[pointer] = (diffs[pointer][0], + diffs[pointer][1][len(diffs[pointer + 1][1]):] + + diffs[pointer + 1][1]) + del diffs[pointer + 1] + changes = True + pointer += 1 + + # If shifts were made, the diff needs reordering and another shift sweep. + if changes: + self.diff_cleanupMerge(diffs) + + def diff_xIndex(self, diffs, loc): + """loc is a location in text1, compute and return the equivalent location + in text2. e.g. "The cat" vs "The big cat", 1->1, 5->8 + + Args: + diffs: Array of diff tuples. + loc: Location within text1. + + Returns: + Location within text2. + """ + chars1 = 0 + chars2 = 0 + last_chars1 = 0 + last_chars2 = 0 + for x in xrange(len(diffs)): + (op, text) = diffs[x] + if op != self.DIFF_INSERT: # Equality or deletion. + chars1 += len(text) + if op != self.DIFF_DELETE: # Equality or insertion. + chars2 += len(text) + if chars1 > loc: # Overshot the location. + break + last_chars1 = chars1 + last_chars2 = chars2 + + if len(diffs) != x and diffs[x][0] == self.DIFF_DELETE: + # The location was deleted. + return last_chars2 + # Add the remaining len(character). + return last_chars2 + (loc - last_chars1) + + def diff_prettyHtml(self, diffs): + """Convert a diff array into a pretty HTML report. + + Args: + diffs: Array of diff tuples. + + Returns: + HTML representation. + """ + html = [] + for (op, data) in diffs: + text = (data.replace("&", "&").replace("<", "<") + .replace(">", ">").replace("\n", "¶
")) + if op == self.DIFF_INSERT: + html.append("%s" % text) + elif op == self.DIFF_DELETE: + html.append("%s" % text) + elif op == self.DIFF_EQUAL: + html.append("%s" % text) + return "".join(html) + + def diff_text1(self, diffs): + """Compute and return the source text (all equalities and deletions). + + Args: + diffs: Array of diff tuples. + + Returns: + Source text. + """ + text = [] + for (op, data) in diffs: + if op != self.DIFF_INSERT: + text.append(data) + return "".join(text) + + def diff_text2(self, diffs): + """Compute and return the destination text (all equalities and insertions). + + Args: + diffs: Array of diff tuples. + + Returns: + Destination text. + """ + text = [] + for (op, data) in diffs: + if op != self.DIFF_DELETE: + text.append(data) + return "".join(text) + + def diff_levenshtein(self, diffs): + """Compute the Levenshtein distance; the number of inserted, deleted or + substituted characters. + + Args: + diffs: Array of diff tuples. + + Returns: + Number of changes. + """ + levenshtein = 0 + insertions = 0 + deletions = 0 + for (op, data) in diffs: + if op == self.DIFF_INSERT: + insertions += len(data) + elif op == self.DIFF_DELETE: + deletions += len(data) + elif op == self.DIFF_EQUAL: + # A deletion and an insertion is one substitution. + levenshtein += max(insertions, deletions) + insertions = 0 + deletions = 0 + levenshtein += max(insertions, deletions) + return levenshtein + + def diff_toDelta(self, diffs): + """Crush the diff into an encoded string which describes the operations + required to transform text1 into text2. + E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'. + Operations are tab-separated. Inserted text is escaped using %xx notation. + + Args: + diffs: Array of diff tuples. + + Returns: + Delta text. + """ + text = [] + for (op, data) in diffs: + if op == self.DIFF_INSERT: + # High ascii will raise UnicodeDecodeError. Use Unicode instead. + data = data.encode("utf-8") + text.append("+" + urllib.quote(data, "!~*'();/?:@&=+$,# ")) + elif op == self.DIFF_DELETE: + text.append("-%d" % len(data)) + elif op == self.DIFF_EQUAL: + text.append("=%d" % len(data)) + return "\t".join(text) + + def diff_fromDelta(self, text1, delta): + """Given the original text1, and an encoded string which describes the + operations required to transform text1 into text2, compute the full diff. + + Args: + text1: Source string for the diff. + delta: Delta text. + + Returns: + Array of diff tuples. + + Raises: + ValueError: If invalid input. + """ + if type(delta) == unicode: + # Deltas should be composed of a subset of ascii chars, Unicode not + # required. If this encode raises UnicodeEncodeError, delta is invalid. + delta = delta.encode("ascii") + diffs = [] + pointer = 0 # Cursor in text1 + tokens = delta.split("\t") + for token in tokens: + if token == "": + # Blank tokens are ok (from a trailing \t). + continue + # Each token begins with a one character parameter which specifies the + # operation of this token (delete, insert, equality). + param = token[1:] + if token[0] == "+": + param = urllib.unquote(param).decode("utf-8") + diffs.append((self.DIFF_INSERT, param)) + elif token[0] == "-" or token[0] == "=": + try: + n = int(param) + except ValueError: + raise ValueError("Invalid number in diff_fromDelta: " + param) + if n < 0: + raise ValueError("Negative number in diff_fromDelta: " + param) + text = text1[pointer : pointer + n] + pointer += n + if token[0] == "=": + diffs.append((self.DIFF_EQUAL, text)) + else: + diffs.append((self.DIFF_DELETE, text)) + else: + # Anything else is an error. + raise ValueError("Invalid diff operation in diff_fromDelta: " + + token[0]) + if pointer != len(text1): + raise ValueError( + "Delta length (%d) does not equal source text length (%d)." % + (pointer, len(text1))) + return diffs + + # MATCH FUNCTIONS + + def match_main(self, text, pattern, loc): + """Locate the best instance of 'pattern' in 'text' near 'loc'. + + Args: + text: The text to search. + pattern: The pattern to search for. + loc: The location to search around. + + Returns: + Best match index or -1. + """ + # Check for null inputs. + if text == None or pattern == None: + raise ValueError("Null inputs. (match_main)") + + loc = max(0, min(loc, len(text))) + if text == pattern: + # Shortcut (potentially not guaranteed by the algorithm) + return 0 + elif not text: + # Nothing to match. + return -1 + elif text[loc:loc + len(pattern)] == pattern: + # Perfect match at the perfect spot! (Includes case of null pattern) + return loc + else: + # Do a fuzzy compare. + match = self.match_bitap(text, pattern, loc) + return match + + def match_bitap(self, text, pattern, loc): + """Locate the best instance of 'pattern' in 'text' near 'loc' using the + Bitap algorithm. + + Args: + text: The text to search. + pattern: The pattern to search for. + loc: The location to search around. + + Returns: + Best match index or -1. + """ + # Python doesn't have a maxint limit, so ignore this check. + #if self.Match_MaxBits != 0 and len(pattern) > self.Match_MaxBits: + # raise ValueError("Pattern too long for this application.") + + # Initialise the alphabet. + s = self.match_alphabet(pattern) + + def match_bitapScore(e, x): + """Compute and return the score for a match with e errors and x location. + Accesses loc and pattern through being a closure. + + Args: + e: Number of errors in match. + x: Location of match. + + Returns: + Overall score for match (0.0 = good, 1.0 = bad). + """ + accuracy = float(e) / len(pattern) + proximity = abs(loc - x) + if not self.Match_Distance: + # Dodge divide by zero error. + return proximity and 1.0 or accuracy + return accuracy + (proximity / float(self.Match_Distance)) + + # Highest score beyond which we give up. + score_threshold = self.Match_Threshold + # Is there a nearby exact match? (speedup) + best_loc = text.find(pattern, loc) + if best_loc != -1: + score_threshold = min(match_bitapScore(0, best_loc), score_threshold) + # What about in the other direction? (speedup) + best_loc = text.rfind(pattern, loc + len(pattern)) + if best_loc != -1: + score_threshold = min(match_bitapScore(0, best_loc), score_threshold) + + # Initialise the bit arrays. + matchmask = 1 << (len(pattern) - 1) + best_loc = -1 + + bin_max = len(pattern) + len(text) + # Empty initialization added to appease pychecker. + last_rd = None + for d in xrange(len(pattern)): + # Scan for the best match each iteration allows for one more error. + # Run a binary search to determine how far from 'loc' we can stray at + # this error level. + bin_min = 0 + bin_mid = bin_max + while bin_min < bin_mid: + if match_bitapScore(d, loc + bin_mid) <= score_threshold: + bin_min = bin_mid + else: + bin_max = bin_mid + bin_mid = (bin_max - bin_min) // 2 + bin_min + + # Use the result from this iteration as the maximum for the next. + bin_max = bin_mid + start = max(1, loc - bin_mid + 1) + finish = min(loc + bin_mid, len(text)) + len(pattern) + + rd = [0] * (finish + 2) + rd[finish + 1] = (1 << d) - 1 + for j in xrange(finish, start - 1, -1): + if len(text) <= j - 1: + # Out of range. + charMatch = 0 + else: + charMatch = s.get(text[j - 1], 0) + if d == 0: # First pass: exact match. + rd[j] = ((rd[j + 1] << 1) | 1) & charMatch + else: # Subsequent passes: fuzzy match. + rd[j] = (((rd[j + 1] << 1) | 1) & charMatch) | ( + ((last_rd[j + 1] | last_rd[j]) << 1) | 1) | last_rd[j + 1] + if rd[j] & matchmask: + score = match_bitapScore(d, j - 1) + # This match will almost certainly be better than any existing match. + # But check anyway. + if score <= score_threshold: + # Told you so. + score_threshold = score + best_loc = j - 1 + if best_loc > loc: + # When passing loc, don't exceed our current distance from loc. + start = max(1, 2 * loc - best_loc) + else: + # Already passed loc, downhill from here on in. + break + # No hope for a (better) match at greater error levels. + if match_bitapScore(d + 1, loc) > score_threshold: + break + last_rd = rd + return best_loc + + def match_alphabet(self, pattern): + """Initialise the alphabet for the Bitap algorithm. + + Args: + pattern: The text to encode. + + Returns: + Hash of character locations. + """ + s = {} + for char in pattern: + s[char] = 0 + for i in xrange(len(pattern)): + s[pattern[i]] |= 1 << (len(pattern) - i - 1) + return s + + # PATCH FUNCTIONS + + def patch_addContext(self, patch, text): + """Increase the context until it is unique, + but don't let the pattern expand beyond Match_MaxBits. + + Args: + patch: The patch to grow. + text: Source text. + """ + if len(text) == 0: + return + pattern = text[patch.start2 : patch.start2 + patch.length1] + padding = 0 + + # Look for the first and last matches of pattern in text. If two different + # matches are found, increase the pattern length. + while (text.find(pattern) != text.rfind(pattern) and (self.Match_MaxBits == + 0 or len(pattern) < self.Match_MaxBits - self.Patch_Margin - + self.Patch_Margin)): + padding += self.Patch_Margin + pattern = text[max(0, patch.start2 - padding) : + patch.start2 + patch.length1 + padding] + # Add one chunk for good luck. + padding += self.Patch_Margin + + # Add the prefix. + prefix = text[max(0, patch.start2 - padding) : patch.start2] + if prefix: + patch.diffs[:0] = [(self.DIFF_EQUAL, prefix)] + # Add the suffix. + suffix = text[patch.start2 + patch.length1 : + patch.start2 + patch.length1 + padding] + if suffix: + patch.diffs.append((self.DIFF_EQUAL, suffix)) + + # Roll back the start points. + patch.start1 -= len(prefix) + patch.start2 -= len(prefix) + # Extend lengths. + patch.length1 += len(prefix) + len(suffix) + patch.length2 += len(prefix) + len(suffix) + + def patch_make(self, a, b=None, c=None): + """Compute a list of patches to turn text1 into text2. + Use diffs if provided, otherwise compute it ourselves. + There are four ways to call this function, depending on what data is + available to the caller: + Method 1: + a = text1, b = text2 + Method 2: + a = diffs + Method 3 (optimal): + a = text1, b = diffs + Method 4 (deprecated, use method 3): + a = text1, b = text2, c = diffs + + Args: + a: text1 (methods 1,3,4) or Array of diff tuples for text1 to + text2 (method 2). + b: text2 (methods 1,4) or Array of diff tuples for text1 to + text2 (method 3) or undefined (method 2). + c: Array of diff tuples for text1 to text2 (method 4) or + undefined (methods 1,2,3). + + Returns: + Array of Patch objects. + """ + text1 = None + diffs = None + # Note that texts may arrive as 'str' or 'unicode'. + if isinstance(a, basestring) and isinstance(b, basestring) and c is None: + # Method 1: text1, text2 + # Compute diffs from text1 and text2. + text1 = a + diffs = self.diff_main(text1, b, True) + if len(diffs) > 2: + self.diff_cleanupSemantic(diffs) + self.diff_cleanupEfficiency(diffs) + elif isinstance(a, list) and b is None and c is None: + # Method 2: diffs + # Compute text1 from diffs. + diffs = a + text1 = self.diff_text1(diffs) + elif isinstance(a, basestring) and isinstance(b, list) and c is None: + # Method 3: text1, diffs + text1 = a + diffs = b + elif (isinstance(a, basestring) and isinstance(b, basestring) and + isinstance(c, list)): + # Method 4: text1, text2, diffs + # text2 is not used. + text1 = a + diffs = c + else: + raise ValueError("Unknown call format to patch_make.") + + if not diffs: + return [] # Get rid of the None case. + patches = [] + patch = patch_obj() + char_count1 = 0 # Number of characters into the text1 string. + char_count2 = 0 # Number of characters into the text2 string. + prepatch_text = text1 # Recreate the patches to determine context info. + postpatch_text = text1 + for x in xrange(len(diffs)): + (diff_type, diff_text) = diffs[x] + if len(patch.diffs) == 0 and diff_type != self.DIFF_EQUAL: + # A new patch starts here. + patch.start1 = char_count1 + patch.start2 = char_count2 + if diff_type == self.DIFF_INSERT: + # Insertion + patch.diffs.append(diffs[x]) + patch.length2 += len(diff_text) + postpatch_text = (postpatch_text[:char_count2] + diff_text + + postpatch_text[char_count2:]) + elif diff_type == self.DIFF_DELETE: + # Deletion. + patch.length1 += len(diff_text) + patch.diffs.append(diffs[x]) + postpatch_text = (postpatch_text[:char_count2] + + postpatch_text[char_count2 + len(diff_text):]) + elif (diff_type == self.DIFF_EQUAL and + len(diff_text) <= 2 * self.Patch_Margin and + len(patch.diffs) != 0 and len(diffs) != x + 1): + # Small equality inside a patch. + patch.diffs.append(diffs[x]) + patch.length1 += len(diff_text) + patch.length2 += len(diff_text) + + if (diff_type == self.DIFF_EQUAL and + len(diff_text) >= 2 * self.Patch_Margin): + # Time for a new patch. + if len(patch.diffs) != 0: + self.patch_addContext(patch, prepatch_text) + patches.append(patch) + patch = patch_obj() + # Unlike Unidiff, our patch lists have a rolling context. + # http://code.google.com/p/google-diff-match-patch/wiki/Unidiff + # Update prepatch text & pos to reflect the application of the + # just completed patch. + prepatch_text = postpatch_text + char_count1 = char_count2 + + # Update the current character count. + if diff_type != self.DIFF_INSERT: + char_count1 += len(diff_text) + if diff_type != self.DIFF_DELETE: + char_count2 += len(diff_text) + + # Pick up the leftover patch if not empty. + if len(patch.diffs) != 0: + self.patch_addContext(patch, prepatch_text) + patches.append(patch) + return patches + + def patch_deepCopy(self, patches): + """Given an array of patches, return another array that is identical. + + Args: + patches: Array of Patch objects. + + Returns: + Array of Patch objects. + """ + patchesCopy = [] + for patch in patches: + patchCopy = patch_obj() + # No need to deep copy the tuples since they are immutable. + patchCopy.diffs = patch.diffs[:] + patchCopy.start1 = patch.start1 + patchCopy.start2 = patch.start2 + patchCopy.length1 = patch.length1 + patchCopy.length2 = patch.length2 + patchesCopy.append(patchCopy) + return patchesCopy + + def patch_apply(self, patches, text): + """Merge a set of patches onto the text. Return a patched text, as well + as a list of true/false values indicating which patches were applied. + + Args: + patches: Array of Patch objects. + text: Old text. + + Returns: + Two element Array, containing the new text and an array of boolean values. + """ + if not patches: + return (text, []) + + # Deep copy the patches so that no changes are made to originals. + patches = self.patch_deepCopy(patches) + + nullPadding = self.patch_addPadding(patches) + text = nullPadding + text + nullPadding + self.patch_splitMax(patches) + + # delta keeps track of the offset between the expected and actual location + # of the previous patch. If there are patches expected at positions 10 and + # 20, but the first patch was found at 12, delta is 2 and the second patch + # has an effective expected position of 22. + delta = 0 + results = [] + for patch in patches: + expected_loc = patch.start2 + delta + text1 = self.diff_text1(patch.diffs) + end_loc = -1 + if len(text1) > self.Match_MaxBits: + # patch_splitMax will only provide an oversized pattern in the case of + # a monster delete. + start_loc = self.match_main(text, text1[:self.Match_MaxBits], + expected_loc) + if start_loc != -1: + end_loc = self.match_main(text, text1[-self.Match_MaxBits:], + expected_loc + len(text1) - self.Match_MaxBits) + if end_loc == -1 or start_loc >= end_loc: + # Can't find valid trailing context. Drop this patch. + start_loc = -1 + else: + start_loc = self.match_main(text, text1, expected_loc) + if start_loc == -1: + # No match found. :( + results.append(False) + # Subtract the delta for this failed patch from subsequent patches. + delta -= patch.length2 - patch.length1 + else: + # Found a match. :) + results.append(True) + delta = start_loc - expected_loc + if end_loc == -1: + text2 = text[start_loc : start_loc + len(text1)] + else: + text2 = text[start_loc : end_loc + self.Match_MaxBits] + if text1 == text2: + # Perfect match, just shove the replacement text in. + text = (text[:start_loc] + self.diff_text2(patch.diffs) + + text[start_loc + len(text1):]) + else: + # Imperfect match. + # Run a diff to get a framework of equivalent indices. + diffs = self.diff_main(text1, text2, False) + if (len(text1) > self.Match_MaxBits and + self.diff_levenshtein(diffs) / float(len(text1)) > + self.Patch_DeleteThreshold): + # The end points match, but the content is unacceptably bad. + results[-1] = False + else: + self.diff_cleanupSemanticLossless(diffs) + index1 = 0 + for (op, data) in patch.diffs: + if op != self.DIFF_EQUAL: + index2 = self.diff_xIndex(diffs, index1) + if op == self.DIFF_INSERT: # Insertion + text = text[:start_loc + index2] + data + text[start_loc + + index2:] + elif op == self.DIFF_DELETE: # Deletion + text = text[:start_loc + index2] + text[start_loc + + self.diff_xIndex(diffs, index1 + len(data)):] + if op != self.DIFF_DELETE: + index1 += len(data) + # Strip the padding off. + text = text[len(nullPadding):-len(nullPadding)] + return (text, results) + + def patch_addPadding(self, patches): + """Add some padding on text start and end so that edges can match + something. Intended to be called only from within patch_apply. + + Args: + patches: Array of Patch objects. + + Returns: + The padding string added to each side. + """ + paddingLength = self.Patch_Margin + nullPadding = "" + for x in xrange(1, paddingLength + 1): + nullPadding += chr(x) + + # Bump all the patches forward. + for patch in patches: + patch.start1 += paddingLength + patch.start2 += paddingLength + + # Add some padding on start of first diff. + patch = patches[0] + diffs = patch.diffs + if not diffs or diffs[0][0] != self.DIFF_EQUAL: + # Add nullPadding equality. + diffs.insert(0, (self.DIFF_EQUAL, nullPadding)) + patch.start1 -= paddingLength # Should be 0. + patch.start2 -= paddingLength # Should be 0. + patch.length1 += paddingLength + patch.length2 += paddingLength + elif paddingLength > len(diffs[0][1]): + # Grow first equality. + extraLength = paddingLength - len(diffs[0][1]) + newText = nullPadding[len(diffs[0][1]):] + diffs[0][1] + diffs[0] = (diffs[0][0], newText) + patch.start1 -= extraLength + patch.start2 -= extraLength + patch.length1 += extraLength + patch.length2 += extraLength + + # Add some padding on end of last diff. + patch = patches[-1] + diffs = patch.diffs + if not diffs or diffs[-1][0] != self.DIFF_EQUAL: + # Add nullPadding equality. + diffs.append((self.DIFF_EQUAL, nullPadding)) + patch.length1 += paddingLength + patch.length2 += paddingLength + elif paddingLength > len(diffs[-1][1]): + # Grow last equality. + extraLength = paddingLength - len(diffs[-1][1]) + newText = diffs[-1][1] + nullPadding[:extraLength] + diffs[-1] = (diffs[-1][0], newText) + patch.length1 += extraLength + patch.length2 += extraLength + + return nullPadding + + def patch_splitMax(self, patches): + """Look through the patches and break up any which are longer than the + maximum limit of the match algorithm. + Intended to be called only from within patch_apply. + + Args: + patches: Array of Patch objects. + """ + patch_size = self.Match_MaxBits + if patch_size == 0: + # Python has the option of not splitting strings due to its ability + # to handle integers of arbitrary precision. + return + for x in xrange(len(patches)): + if patches[x].length1 <= patch_size: + continue + bigpatch = patches[x] + # Remove the big old patch. + del patches[x] + x -= 1 + start1 = bigpatch.start1 + start2 = bigpatch.start2 + precontext = '' + while len(bigpatch.diffs) != 0: + # Create one of several smaller patches. + patch = patch_obj() + empty = True + patch.start1 = start1 - len(precontext) + patch.start2 = start2 - len(precontext) + if precontext: + patch.length1 = patch.length2 = len(precontext) + patch.diffs.append((self.DIFF_EQUAL, precontext)) + + while (len(bigpatch.diffs) != 0 and + patch.length1 < patch_size - self.Patch_Margin): + (diff_type, diff_text) = bigpatch.diffs[0] + if diff_type == self.DIFF_INSERT: + # Insertions are harmless. + patch.length2 += len(diff_text) + start2 += len(diff_text) + patch.diffs.append(bigpatch.diffs.pop(0)) + empty = False + elif (diff_type == self.DIFF_DELETE and len(patch.diffs) == 1 and + patch.diffs[0][0] == self.DIFF_EQUAL and + len(diff_text) > 2 * patch_size): + # This is a large deletion. Let it pass in one chunk. + patch.length1 += len(diff_text) + start1 += len(diff_text) + empty = False + patch.diffs.append((diff_type, diff_text)) + del bigpatch.diffs[0] + else: + # Deletion or equality. Only take as much as we can stomach. + diff_text = diff_text[:patch_size - patch.length1 - + self.Patch_Margin] + patch.length1 += len(diff_text) + start1 += len(diff_text) + if diff_type == self.DIFF_EQUAL: + patch.length2 += len(diff_text) + start2 += len(diff_text) + else: + empty = False + + patch.diffs.append((diff_type, diff_text)) + if diff_text == bigpatch.diffs[0][1]: + del bigpatch.diffs[0] + else: + bigpatch.diffs[0] = (bigpatch.diffs[0][0], + bigpatch.diffs[0][1][len(diff_text):]) + + # Compute the head context for the next patch. + precontext = self.diff_text2(patch.diffs) + precontext = precontext[-self.Patch_Margin:] + # Append the end context for this patch. + postcontext = self.diff_text1(bigpatch.diffs)[:self.Patch_Margin] + if postcontext: + patch.length1 += len(postcontext) + patch.length2 += len(postcontext) + if len(patch.diffs) != 0 and patch.diffs[-1][0] == self.DIFF_EQUAL: + patch.diffs[-1] = (self.DIFF_EQUAL, patch.diffs[-1][1] + + postcontext) + else: + patch.diffs.append((self.DIFF_EQUAL, postcontext)) + + if not empty: + x += 1 + patches.insert(x, patch) + + def patch_toText(self, patches): + """Take a list of patches and return a textual representation. + + Args: + patches: Array of Patch objects. + + Returns: + Text representation of patches. + """ + text = [] + for patch in patches: + text.append(str(patch)) + return "".join(text) + + def patch_fromText(self, textline): + """Parse a textual representation of patches and return a list of patch + objects. + + Args: + textline: Text representation of patches. + + Returns: + Array of Patch objects. + + Raises: + ValueError: If invalid input. + """ + if type(textline) == unicode: + # Patches should be composed of a subset of ascii chars, Unicode not + # required. If this encode raises UnicodeEncodeError, patch is invalid. + textline = textline.encode("ascii") + patches = [] + if not textline: + return patches + text = textline.split('\n') + while len(text) != 0: + m = re.match("^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@$", text[0]) + if not m: + raise ValueError("Invalid patch string: " + text[0]) + patch = patch_obj() + patches.append(patch) + patch.start1 = int(m.group(1)) + if m.group(2) == '': + patch.start1 -= 1 + patch.length1 = 1 + elif m.group(2) == '0': + patch.length1 = 0 + else: + patch.start1 -= 1 + patch.length1 = int(m.group(2)) + + patch.start2 = int(m.group(3)) + if m.group(4) == '': + patch.start2 -= 1 + patch.length2 = 1 + elif m.group(4) == '0': + patch.length2 = 0 + else: + patch.start2 -= 1 + patch.length2 = int(m.group(4)) + + del text[0] + + while len(text) != 0: + if text[0]: + sign = text[0][0] + else: + sign = '' + line = urllib.unquote(text[0][1:]) + line = line.decode("utf-8") + if sign == '+': + # Insertion. + patch.diffs.append((self.DIFF_INSERT, line)) + elif sign == '-': + # Deletion. + patch.diffs.append((self.DIFF_DELETE, line)) + elif sign == ' ': + # Minor equality. + patch.diffs.append((self.DIFF_EQUAL, line)) + elif sign == '@': + # Start of next patch. + break + elif sign == '': + # Blank line? Whatever. + pass + else: + # WTF? + raise ValueError("Invalid patch mode: '%s'\n%s" % (sign, line)) + del text[0] + return patches + + +class patch_obj: + """Class representing one patch operation. + """ + + def __init__(self): + """Initializes with an empty list of diffs. + """ + self.diffs = [] + self.start1 = None + self.start2 = None + self.length1 = 0 + self.length2 = 0 + + def __str__(self): + """Emmulate GNU diff's format. + Header: @@ -382,8 +481,9 @@ + Indicies are printed as 1-based, not 0-based. + + Returns: + The GNU diff string. + """ + if self.length1 == 0: + coords1 = str(self.start1) + ",0" + elif self.length1 == 1: + coords1 = str(self.start1 + 1) + else: + coords1 = str(self.start1 + 1) + "," + str(self.length1) + if self.length2 == 0: + coords2 = str(self.start2) + ",0" + elif self.length2 == 1: + coords2 = str(self.start2 + 1) + else: + coords2 = str(self.start2 + 1) + "," + str(self.length2) + text = ["@@ -", coords1, " +", coords2, " @@\n"] + # Escape the body of the patch with %xx notation. + for (op, data) in self.diffs: + if op == diff_match_patch.DIFF_INSERT: + text.append("+") + elif op == diff_match_patch.DIFF_DELETE: + text.append("-") + elif op == diff_match_patch.DIFF_EQUAL: + text.append(" ") + # High ascii will raise UnicodeDecodeError. Use Unicode instead. + data = data.encode("utf-8") + text.append(urllib.quote(data, "!~*'();/?:@&=+$,# ") + "\n") + return "".join(text) \ No newline at end of file diff --git a/rhodecode/lib/diffs.py b/rhodecode/lib/diffs.py --- a/rhodecode/lib/diffs.py +++ b/rhodecode/lib/diffs.py @@ -180,6 +180,8 @@ class Action(object): UNMODIFIED = 'unmod' CONTEXT = 'context' + CONTEXT_OLD = 'context-old' + CONTEXT_NEW = 'context-new' class DiffProcessor(object): @@ -227,7 +229,7 @@ class DiffProcessor(object): self._parser = self._parse_gitdiff else: self.differ = self._highlight_line_udiff - self._parser = self._parse_udiff + self._parser = self._new_parse_gitdiff def _copy_iterator(self): """ @@ -491,9 +493,181 @@ class DiffProcessor(object): return diff_container(sorted(_files, key=sorter)) - def _parse_udiff(self, inline_diff=True): - raise NotImplementedError() + + # FIXME: NEWDIFFS: dan: this replaces the old _escaper function + def _process_line(self, string): + """ + Process a diff line, checks the diff limit + + :param string: + """ + + self.cur_diff_size += len(string) + + if not self.show_full_diff and (self.cur_diff_size > self.diff_limit): + raise DiffLimitExceeded('Diff Limit Exceeded') + + return safe_unicode(string) + + # FIXME: NEWDIFFS: dan: this replaces _parse_gitdiff + def _new_parse_gitdiff(self, inline_diff=True): + _files = [] + diff_container = lambda arg: arg + for chunk in self._diff.chunks(): + head = chunk.header + log.debug('parsing diff %r' % head) + + diff = imap(self._process_line, chunk.diff.splitlines(1)) + raw_diff = chunk.raw + limited_diff = False + exceeds_limit = False + # if 'empty_file_to_modify_and_rename' in head['a_path']: + # 1/0 + op = None + stats = { + 'added': 0, + 'deleted': 0, + 'binary': False, + 'old_mode': None, + 'new_mode': None, + 'ops': {}, + } + if head['old_mode']: + stats['old_mode'] = head['old_mode'] + if head['new_mode']: + stats['new_mode'] = head['new_mode'] + if head['b_mode']: + stats['new_mode'] = head['b_mode'] + + if head['deleted_file_mode']: + op = OPS.DEL + stats['binary'] = True + stats['ops'][DEL_FILENODE] = 'deleted file' + + elif head['new_file_mode']: + op = OPS.ADD + stats['binary'] = True + stats['old_mode'] = None + stats['new_mode'] = head['new_file_mode'] + stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode'] + else: # modify operation, can be copy, rename or chmod + + # CHMOD + if head['new_mode'] and head['old_mode']: + op = OPS.MOD + stats['binary'] = True + stats['ops'][CHMOD_FILENODE] = ( + 'modified file chmod %s => %s' % ( + head['old_mode'], head['new_mode'])) + + # RENAME + if head['rename_from'] != head['rename_to']: + op = OPS.MOD + stats['binary'] = True + stats['renamed'] = (head['rename_from'], head['rename_to']) + stats['ops'][RENAMED_FILENODE] = ( + 'file renamed from %s to %s' % ( + head['rename_from'], head['rename_to'])) + # COPY + if head.get('copy_from') and head.get('copy_to'): + op = OPS.MOD + stats['binary'] = True + stats['copied'] = (head['copy_from'], head['copy_to']) + stats['ops'][COPIED_FILENODE] = ( + 'file copied from %s to %s' % ( + head['copy_from'], head['copy_to'])) + # If our new parsed headers didn't match anything fallback to + # old style detection + if op is None: + if not head['a_file'] and head['b_file']: + op = OPS.ADD + stats['binary'] = True + stats['new_file'] = True + stats['ops'][NEW_FILENODE] = 'new file' + + elif head['a_file'] and not head['b_file']: + op = OPS.DEL + stats['binary'] = True + stats['ops'][DEL_FILENODE] = 'deleted file' + + # it's not ADD not DELETE + if op is None: + op = OPS.MOD + stats['binary'] = True + stats['ops'][MOD_FILENODE] = 'modified file' + + # a real non-binary diff + if head['a_file'] or head['b_file']: + try: + raw_diff, chunks, _stats = self._new_parse_lines(diff) + stats['binary'] = False + stats['added'] = _stats[0] + stats['deleted'] = _stats[1] + # explicit mark that it's a modified file + if op == OPS.MOD: + stats['ops'][MOD_FILENODE] = 'modified file' + exceeds_limit = len(raw_diff) > self.file_limit + + # changed from _escaper function so we validate size of + # each file instead of the whole diff + # diff will hide big files but still show small ones + # from my tests, big files are fairly safe to be parsed + # but the browser is the bottleneck + if not self.show_full_diff and exceeds_limit: + raise DiffLimitExceeded('File Limit Exceeded') + + except DiffLimitExceeded: + diff_container = lambda _diff: \ + LimitedDiffContainer( + self.diff_limit, self.cur_diff_size, _diff) + + exceeds_limit = len(raw_diff) > self.file_limit + limited_diff = True + chunks = [] + + else: # GIT format binary patch, or possibly empty diff + if head['bin_patch']: + # we have operation already extracted, but we mark simply + # it's a diff we wont show for binary files + stats['ops'][BIN_FILENODE] = 'binary diff hidden' + chunks = [] + + if chunks and not self.show_full_diff and op == OPS.DEL: + # if not full diff mode show deleted file contents + # TODO: anderson: if the view is not too big, there is no way + # to see the content of the file + chunks = [] + + chunks.insert(0, [{ + 'old_lineno': '', + 'new_lineno': '', + 'action': Action.CONTEXT, + 'line': msg, + } for _op, msg in stats['ops'].iteritems() + if _op not in [MOD_FILENODE]]) + + original_filename = safe_unicode(head['a_path']) + _files.append({ + 'original_filename': original_filename, + 'filename': safe_unicode(head['b_path']), + 'old_revision': head['a_blob_id'], + 'new_revision': head['b_blob_id'], + 'chunks': chunks, + 'raw_diff': safe_unicode(raw_diff), + 'operation': op, + 'stats': stats, + 'exceeds_limit': exceeds_limit, + 'is_limited_diff': limited_diff, + }) + + + sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1, + OPS.DEL: 2}.get(info['operation']) + + return diff_container(sorted(_files, key=sorter)) + + # FIXME: NEWDIFFS: dan: this gets replaced by _new_parse_lines def _parse_lines(self, diff): """ Parse the diff an return data for the template. @@ -588,6 +762,107 @@ class DiffProcessor(object): pass return ''.join(raw_diff), chunks, stats + # FIXME: NEWDIFFS: dan: this replaces _parse_lines + def _new_parse_lines(self, diff): + """ + Parse the diff an return data for the template. + """ + + lineiter = iter(diff) + stats = [0, 0] + chunks = [] + raw_diff = [] + + try: + line = lineiter.next() + + while line: + raw_diff.append(line) + match = self._chunk_re.match(line) + + if not match: + break + + gr = match.groups() + (old_line, old_end, + new_line, new_end) = [int(x or 1) for x in gr[:-1]] + + lines = [] + hunk = { + 'section_header': gr[-1], + 'source_start': old_line, + 'source_length': old_end, + 'target_start': new_line, + 'target_length': new_end, + 'lines': lines, + } + chunks.append(hunk) + + old_line -= 1 + new_line -= 1 + + context = len(gr) == 5 + old_end += old_line + new_end += new_line + + line = lineiter.next() + + while old_line < old_end or new_line < new_end: + command = ' ' + if line: + command = line[0] + + affects_old = affects_new = False + + # ignore those if we don't expect them + if command in '#@': + continue + elif command == '+': + affects_new = True + action = Action.ADD + stats[0] += 1 + elif command == '-': + affects_old = True + action = Action.DELETE + stats[1] += 1 + else: + affects_old = affects_new = True + action = Action.UNMODIFIED + + if not self._newline_marker.match(line): + old_line += affects_old + new_line += affects_new + lines.append({ + 'old_lineno': affects_old and old_line or '', + 'new_lineno': affects_new and new_line or '', + 'action': action, + 'line': self._clean_line(line, command) + }) + raw_diff.append(line) + + line = lineiter.next() + + if self._newline_marker.match(line): + # we need to append to lines, since this is not + # counted in the line specs of diff + if affects_old: + action = Action.CONTEXT_OLD + elif affects_new: + action = Action.CONTEXT_NEW + else: + raise Exception('invalid context for no newline') + + lines.append({ + 'old_lineno': None, + 'new_lineno': None, + 'action': action, + 'line': self._clean_line(line, command) + }) + + except StopIteration: + pass + return ''.join(raw_diff), chunks, stats + def _safe_id(self, idstring): """Make a string safe for including in an id attribute. diff --git a/rhodecode/lib/vcs/backends/git/diff.py b/rhodecode/lib/vcs/backends/git/diff.py --- a/rhodecode/lib/vcs/backends/git/diff.py +++ b/rhodecode/lib/vcs/backends/git/diff.py @@ -32,11 +32,13 @@ class GitDiff(base.Diff): _header_re = re.compile(r""" #^diff[ ]--git [ ]"?a/(?P.+?)"?[ ]"?b/(?P.+?)"?\n - (?:^similarity[ ]index[ ](?P\d+)%\n - ^rename[ ]from[ ](?P[^\r\n]+)\n - ^rename[ ]to[ ](?P[^\r\n]+)(?:\n|$))? (?:^old[ ]mode[ ](?P\d+)\n ^new[ ]mode[ ](?P\d+)(?:\n|$))? + (?:^similarity[ ]index[ ](?P\d+)%(?:\n|$))? + (?:^rename[ ]from[ ](?P[^\r\n]+)\n + ^rename[ ]to[ ](?P[^\r\n]+)(?:\n|$))? + (?:^copy[ ]from[ ](?P[^\r\n]+)\n + ^copy[ ]to[ ](?P[^\r\n]+)(?:\n|$))? (?:^new[ ]file[ ]mode[ ](?P.+)(?:\n|$))? (?:^deleted[ ]file[ ]mode[ ](?P.+)(?:\n|$))? (?:^index[ ](?P[0-9A-Fa-f]+) diff --git a/rhodecode/public/css/buttons.less b/rhodecode/public/css/buttons.less --- a/rhodecode/public/css/buttons.less +++ b/rhodecode/public/css/buttons.less @@ -188,6 +188,14 @@ input[type="button"] { padding: @padding * 1.2; } +.btn-group { + display: inline-block; + .btn { + float: left; + margin: 0 0 0 -1px; + } +} + .btn-link { background: transparent; border: none; @@ -326,15 +334,15 @@ input[type="submit"], input[type="reset"] { &.btn-danger { &:extend(.btn-danger); - + &:focus { outline: 0; } - + &:hover { &:extend(.btn-danger:hover); } - + &.btn-link { &:extend(.btn-link); color: @alert2; diff --git a/rhodecode/public/css/code-block.less b/rhodecode/public/css/code-block.less --- a/rhodecode/public/css/code-block.less +++ b/rhodecode/public/css/code-block.less @@ -646,14 +646,210 @@ pre.literal-block, .codehilite pre{ @cb-line-height: 18px; @cb-line-code-padding: 10px; +@cb-text-padding: 5px; +@diff-pill-padding: 2px 7px; + +input.diff-collapse-state { + display: none; + + &:checked + .diff { /* file diff is collapsed */ + .cb { + display: none + } + .diff-collapse-indicator { + border-width: 9px 0 9px 15.6px; + border-color: transparent transparent transparent #ccc; + } + .diff-menu { + display: none; + } + margin: -1px 0 0 0; + } + + &+ .diff { /* file diff is expanded */ + .diff-collapse-indicator { + border-width: 15.6px 9px 0 9px; + border-color: #ccc transparent transparent transparent; + } + .diff-menu { + display: block; + } + margin: 20px 0; + } +} +.diff { + border: 1px solid @grey5; + + /* START OVERRIDES */ + .code-highlight { + border: none; // TODO: remove this border from the global + // .code-highlight, it doesn't belong there + } + label { + margin: 0; // TODO: remove this margin definition from global label + // it doesn't belong there - if margin on labels + // are needed for a form they should be defined + // in the form's class + } + /* END OVERRIDES */ + + * { + box-sizing: border-box; + } + .diff-anchor { + visibility: hidden; + } + &:hover { + .diff-anchor { + visibility: visible; + } + } + + .diff-collapse-indicator { + width: 0; + height: 0; + border-style: solid; + float: left; + margin: 2px 2px 0 0; + cursor: pointer; + } + + .diff-heading { + background: @grey7; + cursor: pointer; + display: block; + padding: 5px 10px; + } + .diff-heading:after { + content: ""; + display: table; + clear: both; + } + .diff-heading:hover { + background: #e1e9f4 !important; + } + + .diff-menu { + float: right; + a, button { + padding: 5px; + display: block; + float: left + } + } + .diff-pill { + display: block; + float: left; + padding: @diff-pill-padding; + } + .diff-pill-group { + .diff-pill { + opacity: .8; + &:first-child { + border-radius: @border-radius 0 0 @border-radius; + } + &:last-child { + border-radius: 0 @border-radius @border-radius 0; + } + &:only-child { + border-radius: @border-radius; + } + } + } + .diff-pill { + &[op="name"] { + background: none; + color: @grey2; + opacity: 1; + color: white; + } + &[op="limited"] { + background: @grey2; + color: white; + } + &[op="binary"] { + background: @color7; + color: white; + } + &[op="modified"] { + background: @alert1; + color: white; + } + &[op="renamed"] { + background: @color4; + color: white; + } + &[op="mode"] { + background: @grey3; + color: white; + } + &[op="symlink"] { + background: @color8; + color: white; + } + + &[op="added"] { /* added lines */ + background: @alert1; + color: white; + } + &[op="deleted"] { /* deleted lines */ + background: @alert2; + color: white; + } + + &[op="created"] { /* created file */ + background: @alert1; + color: white; + } + &[op="removed"] { /* deleted file */ + background: @color5; + color: white; + } + } + + .diff-collapse-button, .diff-expand-button { + cursor: pointer; + } + .diff-collapse-button { + display: inline; + } + .diff-expand-button { + display: none; + } + .diff-collapsed .diff-collapse-button { + display: none; + } + .diff-collapsed .diff-expand-button { + display: inline; + } +} table.cb { width: 100%; border-collapse: collapse; - margin-bottom: 10px; - * { - box-sizing: border-box; + .cb-text { + padding: @cb-text-padding; + } + .cb-hunk { + padding: @cb-text-padding; + } + .cb-expand { + display: none; + } + .cb-collapse { + display: inline; + } + &.cb-collapsed { + .cb-line { + display: none; + } + .cb-expand { + display: inline; + } + .cb-collapse { + display: none; + } } /* intentionally general selector since .cb-line-selected must override it @@ -663,18 +859,45 @@ table.cb { .cb-line-fresh .cb-content { background: white !important; } + .cb-warning { + background: #fff4dd; + } - tr.cb-annotate { - border-top: 1px solid #eee; + &.cb-diff-sideside { + td { + &.cb-content { + width: 50%; + } + } + } - &+ .cb-line { + tr { + &.cb-annotate { border-top: 1px solid #eee; + + &+ .cb-line { + border-top: 1px solid #eee; + } + + &:first-child { + border-top: none; + &+ .cb-line { + border-top: none; + } + } } - &:first-child { - border-top: none; - &+ .cb-line { - border-top: none; + &.cb-hunk { + font-family: @font-family-monospace; + color: rgba(0, 0, 0, 0.3); + + td { + &:first-child { + background: #edf2f9; + } + &:last-child { + background: #f4f7fb; + } } } } @@ -686,9 +909,14 @@ table.cb { &.cb-content { font-size: 12.35px; + &.cb-line-selected .cb-code { + background: @comment-highlight-color !important; + } + span.cb-code { line-height: @cb-line-height; padding-left: @cb-line-code-padding; + padding-right: @cb-line-code-padding; display: block; white-space: pre-wrap; font-family: @font-family-monospace; @@ -714,14 +942,38 @@ table.cb { a { display: block; padding-right: @cb-line-code-padding; + padding-left: @cb-line-code-padding; line-height: @cb-line-height; color: rgba(0, 0, 0, 0.3); } } - &.cb-content { - &.cb-line-selected .cb-code { - background: @comment-highlight-color !important; + &.cb-empty { + background: @grey7; + } + + ins { + color: black; + background: #a6f3a6; + text-decoration: none; + } + del { + color: black; + background: #f8cbcb; + text-decoration: none; + } + &.cb-addition { + background: #ecffec; + + &.blob-lineno { + background: #ddffdd; + } + } + &.cb-deletion { + background: #ffecec; + + &.blob-lineno { + background: #ffdddd; } } diff --git a/rhodecode/public/js/src/rhodecode.js b/rhodecode/public/js/src/rhodecode.js --- a/rhodecode/public/js/src/rhodecode.js +++ b/rhodecode/public/js/src/rhodecode.js @@ -221,14 +221,32 @@ var formatSelect2SelectionRefs = functio }; // takes a given html element and scrolls it down offset pixels -function offsetScroll(element, offset){ - setTimeout(function(){ +function offsetScroll(element, offset) { + setTimeout(function() { var location = element.offset().top; // some browsers use body, some use html $('html, body').animate({ scrollTop: (location - offset) }); }, 100); } +// scroll an element `percent`% from the top of page in `time` ms +function scrollToElement(element, percent, time) { + percent = (percent === undefined ? 25 : percent); + time = (time === undefined ? 100 : time); + + var $element = $(element); + var elOffset = $element.offset().top; + var elHeight = $element.height(); + var windowHeight = $(window).height(); + var offset = elOffset; + if (elHeight < windowHeight) { + offset = elOffset - ((windowHeight / (100 / percent)) - (elHeight / 2)); + } + setTimeout(function() { + $('html, body').animate({ scrollTop: offset}); + }, time); +} + /** * global hooks after DOM is loaded */ @@ -418,6 +436,10 @@ function offsetScroll(element, offset){ var result = splitDelimitedHash(location.hash); var loc = result.loc; if (loc.length > 1) { + + var highlightable_line_tds = []; + + // source code line format var page_highlights = loc.substring( loc.indexOf('#') + 1).split('L'); @@ -442,33 +464,27 @@ function offsetScroll(element, offset){ for (pos in h_lines) { var line_td = $('td.cb-lineno#L' + h_lines[pos]); if (line_td.length) { - line_td.addClass('cb-line-selected'); // line number td - line_td.next().addClass('cb-line-selected'); // line content + highlightable_line_tds.push(line_td); } } - var first_line_td = $('td.cb-lineno#L' + h_lines[0]); - if (first_line_td.length) { - var elOffset = first_line_td.offset().top; - var elHeight = first_line_td.height(); - var windowHeight = $(window).height(); - var offset; + } - if (elHeight < windowHeight) { - offset = elOffset - ((windowHeight / 4) - (elHeight / 2)); - } - else { - offset = elOffset; - } - $(function() { // let browser scroll to hash first, then - // scroll the line to the middle of page - setTimeout(function() { - $('html, body').animate({ scrollTop: offset }); - }, 100); - }); - $.Topic('/ui/plugins/code/anchor_focus').prepareOrPublish({ - lineno: first_line_td, - remainder: result.remainder}); - } + // now check a direct id reference (diff page) + if ($(loc).length && $(loc).hasClass('cb-lineno')) { + highlightable_line_tds.push($(loc)); + } + $.each(highlightable_line_tds, function (i, $td) { + $td.addClass('cb-line-selected'); // line number td + $td.next().addClass('cb-line-selected'); // line content + }); + + if (highlightable_line_tds.length) { + var $first_line_td = highlightable_line_tds[0]; + scrollToElement($first_line_td); + $.Topic('/ui/plugins/code/anchor_focus').prepareOrPublish({ + lineno: $first_line_td, + remainder: result.remainder + }); } } } diff --git a/rhodecode/templates/codeblocks/diffs.html b/rhodecode/templates/codeblocks/diffs.html new file mode 100644 --- /dev/null +++ b/rhodecode/templates/codeblocks/diffs.html @@ -0,0 +1,398 @@ +<%def name="diff_line_anchor(filename, line, type)"><% +return '%s_%s_%i' % (h.safeid(filename), type, line) +%> + +<%def name="action_class(action)"><% + return { + '-': 'cb-deletion', + '+': 'cb-addition', + ' ': 'cb-context', + }.get(action, 'cb-empty') +%> + +<%def name="op_class(op_id)"><% + return { + DEL_FILENODE: 'deletion', # file deleted + BIN_FILENODE: 'warning' # binary diff hidden + }.get(op_id, 'addition') +%> + +<%def name="link_for(**kw)"><% +new_args = request.GET.mixed() +new_args.update(kw) +return h.url('', **new_args) +%> + +<%def name="render_diffset(diffset, + + # collapse all file diff entries when there are more than this amount of files in the diff + collapse_when_files_over=20, + + # collapse lines in the diff when more than this amount of lines changed in the file diff + lines_changed_limit=500, +)"> +<% +# TODO: dan: move this to an argument - and set a cookie so that it is saved +# default option for future requests +diff_mode = request.GET.get('diffmode', 'sideside') +if diff_mode not in ('sideside', 'unified'): + diff_mode = 'sideside' + +collapse_all = len(diffset.files) > collapse_when_files_over +%> + +%if diff_mode == 'sideside': + +%endif + +% if diffset.limited_diff: +
+ ${_('The requested commit is too big and content was truncated.')} ${_('Show full diff')} +
+% endif + +
+
+ %if diffset.files: + + + %endif +

+ %if diffset.limited_diff: + ${ungettext('%(num)s file changed', '%(num)s files changed', diffset.changed_files) % {'num': diffset.changed_files}} + %else: + ${ungettext('%(num)s file changed: %(linesadd)s inserted, ''%(linesdel)s deleted', + '%(num)s files changed: %(linesadd)s inserted, %(linesdel)s deleted', diffset.changed_files) % {'num': diffset.changed_files, 'linesadd': diffset.lines_added, 'linesdel': diffset.lines_deleted}} + %endif +

+
+ + %if not diffset.files: +

${_('No files')}

+ %endif + +
+ %for i, filediff in enumerate(diffset.files): + <% + lines_changed = filediff['patch']['stats']['added'] + filediff['patch']['stats']['deleted'] + over_lines_changed_limit = lines_changed > lines_changed_limit + %> + +
+ + ${diff_menu(filediff)} + + %if not filediff.hunks: + %for op_id, op_text in filediff['patch']['stats']['ops'].items(): + + + + %endfor + %endif + %if over_lines_changed_limit: + + + + %endif + %if filediff.patch['is_limited_diff']: + + + + %endif + %for hunk in filediff.hunks: + + + + + %if diff_mode == 'unified': + ${render_hunk_lines_unified(hunk)} + %elif diff_mode == 'sideside': + ${render_hunk_lines_sideside(hunk)} + %else: + + + + %endif + %endfor +
+ %if op_id == DEL_FILENODE: + ${_('File was deleted')} + %elif op_id == BIN_FILENODE: + ${_('Binary file hidden')} + %else: + ${op_text} + %endif +
+ ${_('This diff has been collapsed as it changes many lines, (%i lines changed)' % lines_changed)} + ${_('Show them')} + + ${_('Hide them')} + +
+ ${_('The requested commit is too big and content was truncated.')} ${_('Show full diff')} +
+ ## TODO: dan: add ajax loading of more context here + ## + + ## + + @@ + -${hunk.source_start},${hunk.source_length} + +${hunk.target_start},${hunk.target_length} + ${hunk.section_header} +
unknown diff mode
+
+ %endfor +
+
+ + +<%def name="diff_ops(filediff)"> +<% +stats = filediff['patch']['stats'] +from rhodecode.lib.diffs import NEW_FILENODE, DEL_FILENODE, \ + MOD_FILENODE, RENAMED_FILENODE, CHMOD_FILENODE, BIN_FILENODE +%> + + %if filediff.source_file_path and filediff.target_file_path: + %if filediff.source_file_path != filediff.target_file_path: # file was renamed + ${filediff.target_file_path}${filediff.source_file_path} + %else: + ## file was modified + ${filediff.source_file_path} + %endif + %else: + %if filediff.source_file_path: + ## file was deleted + ${filediff.source_file_path} + %else: + ## file was added + ${filediff.target_file_path} + %endif + %endif + + + %if filediff.patch['is_limited_diff']: + limited diff + %endif + %if RENAMED_FILENODE in stats['ops']: + renamed + %endif + + %if NEW_FILENODE in stats['ops']: + created + %if filediff['target_mode'].startswith('120'): + symlink + %else: + ${nice_mode(filediff['target_mode'])} + %endif + %endif + + %if DEL_FILENODE in stats['ops']: + removed + %endif + + %if CHMOD_FILENODE in stats['ops']: + + ${nice_mode(filediff['source_mode'])} ➡ ${nice_mode(filediff['target_mode'])} + + %endif + + + + + + %if BIN_FILENODE in stats['ops']: + binary + %if MOD_FILENODE in stats['ops']: + modified + %endif + %endif + %if stats['deleted']: + -${stats['deleted']} + %endif + %if stats['added']: + +${stats['added']} + %endif + + + + +<%def name="nice_mode(filemode)"> + ${filemode.startswith('100') and filemode[3:] or filemode} + + +<%def name="diff_menu(filediff)"> +
+%if filediff.diffset.source_ref: + %if filediff.patch['operation'] in ['D', 'M']: + + ${_('Show file before')} + + %else: + + ${_('Show file before')} + + %endif + %if filediff.patch['operation'] in ['A', 'M']: + + ${_('Show file after')} + + %else: + + ${_('Show file after')} + + %endif + + ${_('Raw diff')} + + + ${_('Download diff')} + +%endif +
+ + + +<%def name="render_hunk_lines_sideside(hunk)"> + %for i, line in enumerate(hunk.sideside): + <% + old_line_anchor, new_line_anchor = None, None + if line.original.lineno: + old_line_anchor = diff_line_anchor(hunk.filediff.source_file_path, line.original.lineno, 'o') + if line.modified.lineno: + new_line_anchor = diff_line_anchor(hunk.filediff.target_file_path, line.modified.lineno, 'n') + %> + + + %if line.original.lineno: + ${line.original.lineno} + %endif + + ${line.original.action} ${line.original.content or '' | n} + + + %if line.modified.lineno: + ${line.modified.lineno} + %endif + + + ${line.modified.action} ${line.modified.content or '' | n} + + + %endfor + + + +<%def name="render_hunk_lines_unified(hunk)"> + %for old_line_no, new_line_no, action, content in hunk.unified: + <% + old_line_anchor, new_line_anchor = None, None + if old_line_no: + old_line_anchor = diff_line_anchor(hunk.filediff.source_file_path, old_line_no, 'o') + if new_line_no: + new_line_anchor = diff_line_anchor(hunk.filediff.target_file_path, new_line_no, 'n') + %> + + + %if old_line_anchor: + ${old_line_no} + %endif + + + %if new_line_anchor: + ${new_line_no} + %endif + + ${action} ${content or '' | n} + + + %endfor + diff --git a/rhodecode/templates/compare/compare_diff.html b/rhodecode/templates/compare/compare_diff.html --- a/rhodecode/templates/compare/compare_diff.html +++ b/rhodecode/templates/compare/compare_diff.html @@ -1,5 +1,6 @@ ## -*- coding: utf-8 -*- <%inherit file="/base/base.html"/> +<%namespace name="cbdiffs" file="/codeblocks/diffs.html"/> <%def name="title()"> %if c.compare_home: @@ -53,7 +54,7 @@ ${_('Swap')} %endif
${_('Compare Commits')}
- %if c.files: + %if c.diffset and c.diffset.files:
${_('Comment')}
%endif @@ -248,72 +249,7 @@
##CS <%include file="compare_commits.html"/> - - ## FILES -
- - ${_('Expand All')} | ${_('Collapse All')} - -

- ${diff_block.diff_summary_text(len(c.files), c.lines_added, c.lines_deleted, c.limited_diff)} -

-
-
- %if not c.files: -

${_('No files')}

- %endif - - <%namespace name="diff_block" file="/changeset/diff_block.html"/> - %for FID, change, path, stats, file in c.files: - - - - - - - - - - - - - - - - - - %endfor -
- - - - - - -
${h.fancy_file_stats(stats)}
-
- -
-
- ${diff_block.diff_block_simple([c.changes[FID]])} -
- % if c.limited_diff: - ${diff_block.changeset_message()} - % endif + ${cbdiffs.render_diffset(c.diffset)}
%endif
diff --git a/rhodecode/tests/functional/test_changeset.py b/rhodecode/tests/functional/test_changeset.py --- a/rhodecode/tests/functional/test_changeset.py +++ b/rhodecode/tests/functional/test_changeset.py @@ -104,7 +104,7 @@ class TestChangesetController(object): response.mustcontain(_shorten_commit_id(commit_ids[0])) response.mustcontain(_shorten_commit_id(commit_ids[1])) - + # svn is special if backend.alias == 'svn': response.mustcontain('new file 10644') @@ -146,7 +146,7 @@ class TestChangesetController(object): # files op files response.mustcontain('File no longer present at commit: %s' % _shorten_commit_id(commit_ids[1])) - + # svn is special if backend.alias == 'svn': response.mustcontain('new file 10644') @@ -158,7 +158,7 @@ class TestChangesetController(object): response.mustcontain('Added docstrings to vcs.cli') # commit msg response.mustcontain('Changed theme to ADC theme') # commit msg - self._check_diff_menus(response) + self._check_new_diff_menus(response) def test_changeset_range(self, backend): self._check_changeset_range( @@ -273,7 +273,7 @@ Added a symlink """ + diffs['svn'], } - def _check_diff_menus(self, response, right_menu=False): + def _check_diff_menus(self, response, right_menu=False,): # diff menus for elem in ['Show File', 'Unified Diff', 'Side-by-side Diff', 'Raw Diff', 'Download Diff']: @@ -284,3 +284,16 @@ Added a symlink for elem in ['Ignore whitespace', 'Increase context', 'Hide comments']: response.mustcontain(elem) + + + def _check_new_diff_menus(self, response, right_menu=False,): + # diff menus + for elem in ['Show file before', 'Show file after', + 'Raw diff', 'Download diff']: + response.mustcontain(elem) + + # right pane diff menus + if right_menu: + for elem in ['Ignore whitespace', 'Increase context', + 'Hide comments']: + response.mustcontain(elem) diff --git a/rhodecode/tests/functional/test_compare.py b/rhodecode/tests/functional/test_compare.py --- a/rhodecode/tests/functional/test_compare.py +++ b/rhodecode/tests/functional/test_compare.py @@ -20,6 +20,7 @@ import mock import pytest +import lxml.html from rhodecode.lib.vcs.backends.base import EmptyCommit from rhodecode.lib.vcs.exceptions import RepositoryRequirementError @@ -609,9 +610,12 @@ class ComparePage(AssertResponse): """ def contains_file_links_and_anchors(self, files): + doc = lxml.html.fromstring(self.response.body) for filename, file_id in files: - self.contains_one_link(filename, '#' + file_id) self.contains_one_anchor(file_id) + diffblock = doc.cssselect('[data-f-path="%s"]' % filename) + assert len(diffblock) == 1 + assert len(diffblock[0].cssselect('a[href="#%s"]' % file_id)) == 1 def contains_change_summary(self, files_changed, inserted, deleted): template = ( diff --git a/rhodecode/tests/lib/test_codeblocks.py b/rhodecode/tests/lib/test_codeblocks.py --- a/rhodecode/tests/lib/test_codeblocks.py +++ b/rhodecode/tests/lib/test_codeblocks.py @@ -264,19 +264,19 @@ class TestRenderTokenStream(object): ), ( [('A', '', u'two\n'), ('A', '', u'lines')], - 'two\nlines', + 'two\nlines', ), ( [('A', '', u'\nthree\n'), ('A', '', u'lines')], - '\nthree\nlines', + '\nthree\nlines', ), ( [('', '', u'\n'), ('A', '', u'line')], - '\nline', + '\nline', ), ( [('', 'ins', u'\n'), ('A', '', u'line')], - '\nline', + '\nline', ), ( [('A', '', u'hel'), ('A', 'ins', u'lo')],