rhodecode-enterprise-ce Commit - r1030:158ce501

diffs: replace compare controller with new html based diffs:...

dan -

r1030:158ce501 default

parent child

licenses/diff_match_patch_license.txt

0 created 644 +14 0

			@@ -0,0 +1,14 b''
		1	Copyright 2006 Google Inc.
		2	http://code.google.com/p/google-diff-match-patch/
		3
		4	Licensed under the Apache License, Version 2.0 (the "License");
		5	you may not use this file except in compliance with the License.
		6	You may obtain a copy of the License at
		7
		8	http://www.apache.org/licenses/LICENSE-2.0
		9
		10	Unless required by applicable law or agreed to in writing, software
		11	distributed under the License is distributed on an "AS IS" BASIS,
		12	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		13	See the License for the specific language governing permissions and
		14	limitations under the License.

rhodecode/lib/diff_match_patch.py

0 created 644 +1919 0

This diff has been collapsed as it changes many lines, (1919 lines changed) Show them Hide them
		@@ -0,0 +1,1919 b''
	1	#!/usr/bin/python2.4
	2
	3	from __future__ import division
	4
	5	"""Diff Match and Patch
	6
	7	Copyright 2006 Google Inc.
	8	http://code.google.com/p/google-diff-match-patch/
	9
	10	Licensed under the Apache License, Version 2.0 (the "License");
	11	you may not use this file except in compliance with the License.
	12	You may obtain a copy of the License at
	13
	14	http://www.apache.org/licenses/LICENSE-2.0
	15
	16	Unless required by applicable law or agreed to in writing, software
	17	distributed under the License is distributed on an "AS IS" BASIS,
	18	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	19	See the License for the specific language governing permissions and
	20	limitations under the License.
	21	"""
	22
	23	"""Functions for diff, match and patch.
	24
	25	Computes the difference between two texts to create a patch.
	26	Applies the patch onto another text, allowing for errors.
	27	"""
	28
	29	__author__ = 'fraser@google.com (Neil Fraser)'
	30
	31	import math
	32	import re
	33	import sys
	34	import time
	35	import urllib
	36
	37	class diff_match_patch:
	38	"""Class containing the diff, match and patch methods.
	39
	40	Also contains the behaviour settings.
	41	"""
	42
	43	def __init__(self):
	44	"""Inits a diff_match_patch object with default settings.
	45	Redefine these in your program to override the defaults.
	46	"""
	47
	48	# Number of seconds to map a diff before giving up (0 for infinity).
	49	self.Diff_Timeout = 1.0
	50	# Cost of an empty edit operation in terms of edit characters.
	51	self.Diff_EditCost = 4
	52	# At what point is no match declared (0.0 = perfection, 1.0 = very loose).
	53	self.Match_Threshold = 0.5
	54	# How far to search for a match (0 = exact location, 1000+ = broad match).
	55	# A match this many characters away from the expected location will add
	56	# 1.0 to the score (0.0 is a perfect match).
	57	self.Match_Distance = 1000
	58	# When deleting a large block of text (over ~64 characters), how close do
	59	# the contents have to be to match the expected contents. (0.0 = perfection,
	60	# 1.0 = very loose). Note that Match_Threshold controls how closely the
	61	# end points of a delete need to match.
	62	self.Patch_DeleteThreshold = 0.5
	63	# Chunk size for context length.
	64	self.Patch_Margin = 4
	65
	66	# The number of bits in an int.
	67	# Python has no maximum, thus to disable patch splitting set to 0.
	68	# However to avoid long patches in certain pathological cases, use 32.
	69	# Multiple short patches (using native ints) are much faster than long ones.
	70	self.Match_MaxBits = 32
	71
	72	# DIFF FUNCTIONS
	73
	74	# The data structure representing a diff is an array of tuples:
	75	# [(DIFF_DELETE, "Hello"), (DIFF_INSERT, "Goodbye"), (DIFF_EQUAL, " world.")]
	76	# which means: delete "Hello", add "Goodbye" and keep " world."
	77	DIFF_DELETE = -1
	78	DIFF_INSERT = 1
	79	DIFF_EQUAL = 0
	80
	81	def diff_main(self, text1, text2, checklines=True, deadline=None):
	82	"""Find the differences between two texts. Simplifies the problem by
	83	stripping any common prefix or suffix off the texts before diffing.
	84
	85	Args:
	86	text1: Old string to be diffed.
	87	text2: New string to be diffed.
	88	checklines: Optional speedup flag. If present and false, then don't run
	89	a line-level diff first to identify the changed areas.
	90	Defaults to true, which does a faster, slightly less optimal diff.
	91	deadline: Optional time when the diff should be complete by. Used
	92	internally for recursive calls. Users should set DiffTimeout instead.
	93
	94	Returns:
	95	Array of changes.
	96	"""
	97	# Set a deadline by which time the diff must be complete.
	98	if deadline == None:
	99	# Unlike in most languages, Python counts time in seconds.
	100	if self.Diff_Timeout <= 0:
	101	deadline = sys.maxint
	102	else:
	103	deadline = time.time() + self.Diff_Timeout
	104
	105	# Check for null inputs.
	106	if text1 == None or text2 == None:
	107	raise ValueError("Null inputs. (diff_main)")
	108
	109	# Check for equality (speedup).
	110	if text1 == text2:
	111	if text1:
	112	return [(self.DIFF_EQUAL, text1)]
	113	return []
	114
	115	# Trim off common prefix (speedup).
	116	commonlength = self.diff_commonPrefix(text1, text2)
	117	commonprefix = text1[:commonlength]
	118	text1 = text1[commonlength:]
	119	text2 = text2[commonlength:]
	120
	121	# Trim off common suffix (speedup).
	122	commonlength = self.diff_commonSuffix(text1, text2)
	123	if commonlength == 0:
	124	commonsuffix = ''
	125	else:
	126	commonsuffix = text1[-commonlength:]
	127	text1 = text1[:-commonlength]
	128	text2 = text2[:-commonlength]
	129
	130	# Compute the diff on the middle block.
	131	diffs = self.diff_compute(text1, text2, checklines, deadline)
	132
	133	# Restore the prefix and suffix.
	134	if commonprefix:
	135	diffs[:0] = [(self.DIFF_EQUAL, commonprefix)]
	136	if commonsuffix:
	137	diffs.append((self.DIFF_EQUAL, commonsuffix))
	138	self.diff_cleanupMerge(diffs)
	139	return diffs
	140
	141	def diff_compute(self, text1, text2, checklines, deadline):
	142	"""Find the differences between two texts. Assumes that the texts do not
	143	have any common prefix or suffix.
	144
	145	Args:
	146	text1: Old string to be diffed.
	147	text2: New string to be diffed.
	148	checklines: Speedup flag. If false, then don't run a line-level diff
	149	first to identify the changed areas.
	150	If true, then run a faster, slightly less optimal diff.
	151	deadline: Time when the diff should be complete by.
	152
	153	Returns:
	154	Array of changes.
	155	"""
	156	if not text1:
	157	# Just add some text (speedup).
	158	return [(self.DIFF_INSERT, text2)]
	159
	160	if not text2:
	161	# Just delete some text (speedup).
	162	return [(self.DIFF_DELETE, text1)]
	163
	164	if len(text1) > len(text2):
	165	(longtext, shorttext) = (text1, text2)
	166	else:
	167	(shorttext, longtext) = (text1, text2)
	168	i = longtext.find(shorttext)
	169	if i != -1:
	170	# Shorter text is inside the longer text (speedup).
	171	diffs = [(self.DIFF_INSERT, longtext[:i]), (self.DIFF_EQUAL, shorttext),
	172	(self.DIFF_INSERT, longtext[i + len(shorttext):])]
	173	# Swap insertions for deletions if diff is reversed.
	174	if len(text1) > len(text2):
	175	diffs[0] = (self.DIFF_DELETE, diffs[0][1])
	176	diffs[2] = (self.DIFF_DELETE, diffs[2][1])
	177	return diffs
	178
	179	if len(shorttext) == 1:
	180	# Single character string.
	181	# After the previous speedup, the character can't be an equality.
	182	return [(self.DIFF_DELETE, text1), (self.DIFF_INSERT, text2)]
	183
	184	# Check to see if the problem can be split in two.
	185	hm = self.diff_halfMatch(text1, text2)
	186	if hm:
	187	# A half-match was found, sort out the return data.
	188	(text1_a, text1_b, text2_a, text2_b, mid_common) = hm
	189	# Send both pairs off for separate processing.
	190	diffs_a = self.diff_main(text1_a, text2_a, checklines, deadline)
	191	diffs_b = self.diff_main(text1_b, text2_b, checklines, deadline)
	192	# Merge the results.
	193	return diffs_a + [(self.DIFF_EQUAL, mid_common)] + diffs_b
	194
	195	if checklines and len(text1) > 100 and len(text2) > 100:
	196	return self.diff_lineMode(text1, text2, deadline)
	197
	198	return self.diff_bisect(text1, text2, deadline)
	199
	200	def diff_lineMode(self, text1, text2, deadline):
	201	"""Do a quick line-level diff on both strings, then rediff the parts for
	202	greater accuracy.
	203	This speedup can produce non-minimal diffs.
	204
	205	Args:
	206	text1: Old string to be diffed.
	207	text2: New string to be diffed.
	208	deadline: Time when the diff should be complete by.
	209
	210	Returns:
	211	Array of changes.
	212	"""
	213
	214	# Scan the text on a line-by-line basis first.
	215	(text1, text2, linearray) = self.diff_linesToChars(text1, text2)
	216
	217	diffs = self.diff_main(text1, text2, False, deadline)
	218
	219	# Convert the diff back to original text.
	220	self.diff_charsToLines(diffs, linearray)
	221	# Eliminate freak matches (e.g. blank lines)
	222	self.diff_cleanupSemantic(diffs)
	223
	224	# Rediff any replacement blocks, this time character-by-character.
	225	# Add a dummy entry at the end.
	226	diffs.append((self.DIFF_EQUAL, ''))
	227	pointer = 0
	228	count_delete = 0
	229	count_insert = 0
	230	text_delete = ''
	231	text_insert = ''
	232	while pointer < len(diffs):
	233	if diffs[pointer][0] == self.DIFF_INSERT:
	234	count_insert += 1
	235	text_insert += diffs[pointer][1]
	236	elif diffs[pointer][0] == self.DIFF_DELETE:
	237	count_delete += 1
	238	text_delete += diffs[pointer][1]
	239	elif diffs[pointer][0] == self.DIFF_EQUAL:
	240	# Upon reaching an equality, check for prior redundancies.
	241	if count_delete >= 1 and count_insert >= 1:
	242	# Delete the offending records and add the merged ones.
	243	a = self.diff_main(text_delete, text_insert, False, deadline)
	244	diffs[pointer - count_delete - count_insert : pointer] = a
	245	pointer = pointer - count_delete - count_insert + len(a)
	246	count_insert = 0
	247	count_delete = 0
	248	text_delete = ''
	249	text_insert = ''
	250
	251	pointer += 1
	252
	253	diffs.pop() # Remove the dummy entry at the end.
	254
	255	return diffs
	256
	257	def diff_bisect(self, text1, text2, deadline):
	258	"""Find the 'middle snake' of a diff, split the problem in two
	259	and return the recursively constructed diff.
	260	See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations.
	261
	262	Args:
	263	text1: Old string to be diffed.
	264	text2: New string to be diffed.
	265	deadline: Time at which to bail if not yet complete.
	266
	267	Returns:
	268	Array of diff tuples.
	269	"""
	270
	271	# Cache the text lengths to prevent multiple calls.
	272	text1_length = len(text1)
	273	text2_length = len(text2)
	274	max_d = (text1_length + text2_length + 1) // 2
	275	v_offset = max_d
	276	v_length = 2 * max_d
	277	v1 = [-1] * v_length
	278	v1[v_offset + 1] = 0
	279	v2 = v1[:]
	280	delta = text1_length - text2_length
	281	# If the total number of characters is odd, then the front path will
	282	# collide with the reverse path.
	283	front = (delta % 2 != 0)
	284	# Offsets for start and end of k loop.
	285	# Prevents mapping of space beyond the grid.
	286	k1start = 0
	287	k1end = 0
	288	k2start = 0
	289	k2end = 0
	290	for d in xrange(max_d):
	291	# Bail out if deadline is reached.
	292	if time.time() > deadline:
	293	break
	294
	295	# Walk the front path one step.
	296	for k1 in xrange(-d + k1start, d + 1 - k1end, 2):
	297	k1_offset = v_offset + k1
	298	if k1 == -d or (k1 != d and
	299	v1[k1_offset - 1] < v1[k1_offset + 1]):
	300	x1 = v1[k1_offset + 1]
	301	else:
	302	x1 = v1[k1_offset - 1] + 1
	303	y1 = x1 - k1
	304	while (x1 < text1_length and y1 < text2_length and
	305	text1[x1] == text2[y1]):
	306	x1 += 1
	307	y1 += 1
	308	v1[k1_offset] = x1
	309	if x1 > text1_length:
	310	# Ran off the right of the graph.
	311	k1end += 2
	312	elif y1 > text2_length:
	313	# Ran off the bottom of the graph.
	314	k1start += 2
	315	elif front:
	316	k2_offset = v_offset + delta - k1
	317	if k2_offset >= 0 and k2_offset < v_length and v2[k2_offset] != -1:
	318	# Mirror x2 onto top-left coordinate system.
	319	x2 = text1_length - v2[k2_offset]
	320	if x1 >= x2:
	321	# Overlap detected.
	322	return self.diff_bisectSplit(text1, text2, x1, y1, deadline)
	323
	324	# Walk the reverse path one step.
	325	for k2 in xrange(-d + k2start, d + 1 - k2end, 2):
	326	k2_offset = v_offset + k2
	327	if k2 == -d or (k2 != d and
	328	v2[k2_offset - 1] < v2[k2_offset + 1]):
	329	x2 = v2[k2_offset + 1]
	330	else:
	331	x2 = v2[k2_offset - 1] + 1
	332	y2 = x2 - k2
	333	while (x2 < text1_length and y2 < text2_length and
	334	text1[-x2 - 1] == text2[-y2 - 1]):
	335	x2 += 1
	336	y2 += 1
	337	v2[k2_offset] = x2
	338	if x2 > text1_length:
	339	# Ran off the left of the graph.
	340	k2end += 2
	341	elif y2 > text2_length:
	342	# Ran off the top of the graph.
	343	k2start += 2
	344	elif not front:
	345	k1_offset = v_offset + delta - k2
	346	if k1_offset >= 0 and k1_offset < v_length and v1[k1_offset] != -1:
	347	x1 = v1[k1_offset]
	348	y1 = v_offset + x1 - k1_offset
	349	# Mirror x2 onto top-left coordinate system.
	350	x2 = text1_length - x2
	351	if x1 >= x2:
	352	# Overlap detected.
	353	return self.diff_bisectSplit(text1, text2, x1, y1, deadline)
	354
	355	# Diff took too long and hit the deadline or
	356	# number of diffs equals number of characters, no commonality at all.
	357	return [(self.DIFF_DELETE, text1), (self.DIFF_INSERT, text2)]
	358
	359	def diff_bisectSplit(self, text1, text2, x, y, deadline):
	360	"""Given the location of the 'middle snake', split the diff in two parts
	361	and recurse.
	362
	363	Args:
	364	text1: Old string to be diffed.
	365	text2: New string to be diffed.
	366	x: Index of split point in text1.
	367	y: Index of split point in text2.
	368	deadline: Time at which to bail if not yet complete.
	369
	370	Returns:
	371	Array of diff tuples.
	372	"""
	373	text1a = text1[:x]
	374	text2a = text2[:y]
	375	text1b = text1[x:]
	376	text2b = text2[y:]
	377
	378	# Compute both diffs serially.
	379	diffs = self.diff_main(text1a, text2a, False, deadline)
	380	diffsb = self.diff_main(text1b, text2b, False, deadline)
	381
	382	return diffs + diffsb
	383
	384	def diff_linesToChars(self, text1, text2):
	385	"""Split two texts into an array of strings. Reduce the texts to a string
	386	of hashes where each Unicode character represents one line.
	387
	388	Args:
	389	text1: First string.
	390	text2: Second string.
	391
	392	Returns:
	393	Three element tuple, containing the encoded text1, the encoded text2 and
	394	the array of unique strings. The zeroth element of the array of unique
	395	strings is intentionally blank.
	396	"""
	397	lineArray = [] # e.g. lineArray[4] == "Hello\n"
	398	lineHash = {} # e.g. lineHash["Hello\n"] == 4
	399
	400	# "\x00" is a valid character, but various debuggers don't like it.
	401	# So we'll insert a junk entry to avoid generating a null character.
	402	lineArray.append('')
	403
	404	def diff_linesToCharsMunge(text):
	405	"""Split a text into an array of strings. Reduce the texts to a string
	406	of hashes where each Unicode character represents one line.
	407	Modifies linearray and linehash through being a closure.
	408
	409	Args:
	410	text: String to encode.
	411
	412	Returns:
	413	Encoded string.
	414	"""
	415	chars = []
	416	# Walk the text, pulling out a substring for each line.
	417	# text.split('\n') would would temporarily double our memory footprint.
	418	# Modifying text would create many large strings to garbage collect.
	419	lineStart = 0
	420	lineEnd = -1
	421	while lineEnd < len(text) - 1:
	422	lineEnd = text.find('\n', lineStart)
	423	if lineEnd == -1:
	424	lineEnd = len(text) - 1
	425	line = text[lineStart:lineEnd + 1]
	426	lineStart = lineEnd + 1
	427
	428	if line in lineHash:
	429	chars.append(unichr(lineHash[line]))
	430	else:
	431	lineArray.append(line)
	432	lineHash[line] = len(lineArray) - 1
	433	chars.append(unichr(len(lineArray) - 1))
	434	return "".join(chars)
	435
	436	chars1 = diff_linesToCharsMunge(text1)
	437	chars2 = diff_linesToCharsMunge(text2)
	438	return (chars1, chars2, lineArray)
	439
	440	def diff_charsToLines(self, diffs, lineArray):
	441	"""Rehydrate the text in a diff from a string of line hashes to real lines
	442	of text.
	443
	444	Args:
	445	diffs: Array of diff tuples.
	446	lineArray: Array of unique strings.
	447	"""
	448	for x in xrange(len(diffs)):
	449	text = []
	450	for char in diffs[x][1]:
	451	text.append(lineArray[ord(char)])
	452	diffs[x] = (diffs[x][0], "".join(text))
	453
	454	def diff_commonPrefix(self, text1, text2):
	455	"""Determine the common prefix of two strings.
	456
	457	Args:
	458	text1: First string.
	459	text2: Second string.
	460
	461	Returns:
	462	The number of characters common to the start of each string.
	463	"""
	464	# Quick check for common null cases.
	465	if not text1 or not text2 or text1[0] != text2[0]:
	466	return 0
	467	# Binary search.
	468	# Performance analysis: http://neil.fraser.name/news/2007/10/09/
	469	pointermin = 0
	470	pointermax = min(len(text1), len(text2))
	471	pointermid = pointermax
	472	pointerstart = 0
	473	while pointermin < pointermid:
	474	if text1[pointerstart:pointermid] == text2[pointerstart:pointermid]:
	475	pointermin = pointermid
	476	pointerstart = pointermin
	477	else:
	478	pointermax = pointermid
	479	pointermid = (pointermax - pointermin) // 2 + pointermin
	480	return pointermid
	481
	482	def diff_commonSuffix(self, text1, text2):
	483	"""Determine the common suffix of two strings.
	484
	485	Args:
	486	text1: First string.
	487	text2: Second string.
	488
	489	Returns:
	490	The number of characters common to the end of each string.
	491	"""
	492	# Quick check for common null cases.
	493	if not text1 or not text2 or text1[-1] != text2[-1]:
	494	return 0
	495	# Binary search.
	496	# Performance analysis: http://neil.fraser.name/news/2007/10/09/
	497	pointermin = 0
	498	pointermax = min(len(text1), len(text2))
	499	pointermid = pointermax
	500	pointerend = 0
	501	while pointermin < pointermid:
	502	if (text1[-pointermid:len(text1) - pointerend] ==
	503	text2[-pointermid:len(text2) - pointerend]):
	504	pointermin = pointermid
	505	pointerend = pointermin
	506	else:
	507	pointermax = pointermid
	508	pointermid = (pointermax - pointermin) // 2 + pointermin
	509	return pointermid
	510
	511	def diff_commonOverlap(self, text1, text2):
	512	"""Determine if the suffix of one string is the prefix of another.
	513
	514	Args:
	515	text1 First string.
	516	text2 Second string.
	517
	518	Returns:
	519	The number of characters common to the end of the first
	520	string and the start of the second string.
	521	"""
	522	# Cache the text lengths to prevent multiple calls.
	523	text1_length = len(text1)
	524	text2_length = len(text2)
	525	# Eliminate the null case.
	526	if text1_length == 0 or text2_length == 0:
	527	return 0
	528	# Truncate the longer string.
	529	if text1_length > text2_length:
	530	text1 = text1[-text2_length:]
	531	elif text1_length < text2_length:
	532	text2 = text2[:text1_length]
	533	text_length = min(text1_length, text2_length)
	534	# Quick check for the worst case.
	535	if text1 == text2:
	536	return text_length
	537
	538	# Start by looking for a single character match
	539	# and increase length until no match is found.
	540	# Performance analysis: http://neil.fraser.name/news/2010/11/04/
	541	best = 0
	542	length = 1
	543	while True:
	544	pattern = text1[-length:]
	545	found = text2.find(pattern)
	546	if found == -1:
	547	return best
	548	length += found
	549	if found == 0 or text1[-length:] == text2[:length]:
	550	best = length
	551	length += 1
	552
	553	def diff_halfMatch(self, text1, text2):
	554	"""Do the two texts share a substring which is at least half the length of
	555	the longer text?
	556	This speedup can produce non-minimal diffs.
	557
	558	Args:
	559	text1: First string.
	560	text2: Second string.
	561
	562	Returns:
	563	Five element Array, containing the prefix of text1, the suffix of text1,
	564	the prefix of text2, the suffix of text2 and the common middle. Or None
	565	if there was no match.
	566	"""
	567	if self.Diff_Timeout <= 0:
	568	# Don't risk returning a non-optimal diff if we have unlimited time.
	569	return None
	570	if len(text1) > len(text2):
	571	(longtext, shorttext) = (text1, text2)
	572	else:
	573	(shorttext, longtext) = (text1, text2)
	574	if len(longtext) < 4 or len(shorttext) * 2 < len(longtext):
	575	return None # Pointless.
	576
	577	def diff_halfMatchI(longtext, shorttext, i):
	578	"""Does a substring of shorttext exist within longtext such that the
	579	substring is at least half the length of longtext?
	580	Closure, but does not reference any external variables.
	581
	582	Args:
	583	longtext: Longer string.
	584	shorttext: Shorter string.
	585	i: Start index of quarter length substring within longtext.
	586
	587	Returns:
	588	Five element Array, containing the prefix of longtext, the suffix of
	589	longtext, the prefix of shorttext, the suffix of shorttext and the
	590	common middle. Or None if there was no match.
	591	"""
	592	seed = longtext[i:i + len(longtext) // 4]
	593	best_common = ''
	594	j = shorttext.find(seed)
	595	while j != -1:
	596	prefixLength = self.diff_commonPrefix(longtext[i:], shorttext[j:])
	597	suffixLength = self.diff_commonSuffix(longtext[:i], shorttext[:j])
	598	if len(best_common) < suffixLength + prefixLength:
	599	best_common = (shorttext[j - suffixLength:j] +
	600	shorttext[j:j + prefixLength])
	601	best_longtext_a = longtext[:i - suffixLength]
	602	best_longtext_b = longtext[i + prefixLength:]
	603	best_shorttext_a = shorttext[:j - suffixLength]
	604	best_shorttext_b = shorttext[j + prefixLength:]
	605	j = shorttext.find(seed, j + 1)
	606
	607	if len(best_common) * 2 >= len(longtext):
	608	return (best_longtext_a, best_longtext_b,
	609	best_shorttext_a, best_shorttext_b, best_common)
	610	else:
	611	return None
	612
	613	# First check if the second quarter is the seed for a half-match.
	614	hm1 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 3) // 4)
	615	# Check again based on the third quarter.
	616	hm2 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 1) // 2)
	617	if not hm1 and not hm2:
	618	return None
	619	elif not hm2:
	620	hm = hm1
	621	elif not hm1:
	622	hm = hm2
	623	else:
	624	# Both matched. Select the longest.
	625	if len(hm1[4]) > len(hm2[4]):
	626	hm = hm1
	627	else:
	628	hm = hm2
	629
	630	# A half-match was found, sort out the return data.
	631	if len(text1) > len(text2):
	632	(text1_a, text1_b, text2_a, text2_b, mid_common) = hm
	633	else:
	634	(text2_a, text2_b, text1_a, text1_b, mid_common) = hm
	635	return (text1_a, text1_b, text2_a, text2_b, mid_common)
	636
	637	def diff_cleanupSemantic(self, diffs):
	638	"""Reduce the number of edits by eliminating semantically trivial
	639	equalities.
	640
	641	Args:
	642	diffs: Array of diff tuples.
	643	"""
	644	changes = False
	645	equalities = [] # Stack of indices where equalities are found.
	646	lastequality = None # Always equal to diffs[equalities[-1]][1]
	647	pointer = 0 # Index of current position.
	648	# Number of chars that changed prior to the equality.
	649	length_insertions1, length_deletions1 = 0, 0
	650	# Number of chars that changed after the equality.
	651	length_insertions2, length_deletions2 = 0, 0
	652	while pointer < len(diffs):
	653	if diffs[pointer][0] == self.DIFF_EQUAL: # Equality found.
	654	equalities.append(pointer)
	655	length_insertions1, length_insertions2 = length_insertions2, 0
	656	length_deletions1, length_deletions2 = length_deletions2, 0
	657	lastequality = diffs[pointer][1]
	658	else: # An insertion or deletion.
	659	if diffs[pointer][0] == self.DIFF_INSERT:
	660	length_insertions2 += len(diffs[pointer][1])
	661	else:
	662	length_deletions2 += len(diffs[pointer][1])
	663	# Eliminate an equality that is smaller or equal to the edits on both
	664	# sides of it.
	665	if (lastequality and (len(lastequality) <=
	666	max(length_insertions1, length_deletions1)) and
	667	(len(lastequality) <= max(length_insertions2, length_deletions2))):
	668	# Duplicate record.
	669	diffs.insert(equalities[-1], (self.DIFF_DELETE, lastequality))
	670	# Change second copy to insert.
	671	diffs[equalities[-1] + 1] = (self.DIFF_INSERT,
	672	diffs[equalities[-1] + 1][1])
	673	# Throw away the equality we just deleted.
	674	equalities.pop()
	675	# Throw away the previous equality (it needs to be reevaluated).
	676	if len(equalities):
	677	equalities.pop()
	678	if len(equalities):
	679	pointer = equalities[-1]
	680	else:
	681	pointer = -1
	682	# Reset the counters.
	683	length_insertions1, length_deletions1 = 0, 0
	684	length_insertions2, length_deletions2 = 0, 0
	685	lastequality = None
	686	changes = True
	687	pointer += 1
	688
	689	# Normalize the diff.
	690	if changes:
	691	self.diff_cleanupMerge(diffs)
	692	self.diff_cleanupSemanticLossless(diffs)
	693
	694	# Find any overlaps between deletions and insertions.
	695	# e.g: <del>abcxxx</del><ins>xxxdef</ins>
	696	# -> <del>abc</del>xxx<ins>def</ins>
	697	# e.g: <del>xxxabc</del><ins>defxxx</ins>
	698	# -> <ins>def</ins>xxx<del>abc</del>
	699	# Only extract an overlap if it is as big as the edit ahead or behind it.
	700	pointer = 1
	701	while pointer < len(diffs):
	702	if (diffs[pointer - 1][0] == self.DIFF_DELETE and
	703	diffs[pointer][0] == self.DIFF_INSERT):
	704	deletion = diffs[pointer - 1][1]
	705	insertion = diffs[pointer][1]
	706	overlap_length1 = self.diff_commonOverlap(deletion, insertion)
	707	overlap_length2 = self.diff_commonOverlap(insertion, deletion)
	708	if overlap_length1 >= overlap_length2:
	709	if (overlap_length1 >= len(deletion) / 2.0 or
	710	overlap_length1 >= len(insertion) / 2.0):
	711	# Overlap found. Insert an equality and trim the surrounding edits.
	712	diffs.insert(pointer, (self.DIFF_EQUAL,
	713	insertion[:overlap_length1]))
	714	diffs[pointer - 1] = (self.DIFF_DELETE,
	715	deletion[:len(deletion) - overlap_length1])
	716	diffs[pointer + 1] = (self.DIFF_INSERT,
	717	insertion[overlap_length1:])
	718	pointer += 1
	719	else:
	720	if (overlap_length2 >= len(deletion) / 2.0 or
	721	overlap_length2 >= len(insertion) / 2.0):
	722	# Reverse overlap found.
	723	# Insert an equality and swap and trim the surrounding edits.
	724	diffs.insert(pointer, (self.DIFF_EQUAL, deletion[:overlap_length2]))
	725	diffs[pointer - 1] = (self.DIFF_INSERT,
	726	insertion[:len(insertion) - overlap_length2])
	727	diffs[pointer + 1] = (self.DIFF_DELETE, deletion[overlap_length2:])
	728	pointer += 1
	729	pointer += 1
	730	pointer += 1
	731
	732	def diff_cleanupSemanticLossless(self, diffs):
	733	"""Look for single edits surrounded on both sides by equalities
	734	which can be shifted sideways to align the edit to a word boundary.
	735	e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came.
	736
	737	Args:
	738	diffs: Array of diff tuples.
	739	"""
	740
	741	def diff_cleanupSemanticScore(one, two):
	742	"""Given two strings, compute a score representing whether the
	743	internal boundary falls on logical boundaries.
	744	Scores range from 6 (best) to 0 (worst).
	745	Closure, but does not reference any external variables.
	746
	747	Args:
	748	one: First string.
	749	two: Second string.
	750
	751	Returns:
	752	The score.
	753	"""
	754	if not one or not two:
	755	# Edges are the best.
	756	return 6
	757
	758	# Each port of this function behaves slightly differently due to
	759	# subtle differences in each language's definition of things like
	760	# 'whitespace'. Since this function's purpose is largely cosmetic,
	761	# the choice has been made to use each language's native features
	762	# rather than force total conformity.
	763	char1 = one[-1]
	764	char2 = two[0]
	765	nonAlphaNumeric1 = not char1.isalnum()
	766	nonAlphaNumeric2 = not char2.isalnum()
	767	whitespace1 = nonAlphaNumeric1 and char1.isspace()
	768	whitespace2 = nonAlphaNumeric2 and char2.isspace()
	769	lineBreak1 = whitespace1 and (char1 == "\r" or char1 == "\n")
	770	lineBreak2 = whitespace2 and (char2 == "\r" or char2 == "\n")
	771	blankLine1 = lineBreak1 and self.BLANKLINEEND.search(one)
	772	blankLine2 = lineBreak2 and self.BLANKLINESTART.match(two)
	773
	774	if blankLine1 or blankLine2:
	775	# Five points for blank lines.
	776	return 5
	777	elif lineBreak1 or lineBreak2:
	778	# Four points for line breaks.
	779	return 4
	780	elif nonAlphaNumeric1 and not whitespace1 and whitespace2:
	781	# Three points for end of sentences.
	782	return 3
	783	elif whitespace1 or whitespace2:
	784	# Two points for whitespace.
	785	return 2
	786	elif nonAlphaNumeric1 or nonAlphaNumeric2:
	787	# One point for non-alphanumeric.
	788	return 1
	789	return 0
	790
	791	pointer = 1
	792	# Intentionally ignore the first and last element (don't need checking).
	793	while pointer < len(diffs) - 1:
	794	if (diffs[pointer - 1][0] == self.DIFF_EQUAL and
	795	diffs[pointer + 1][0] == self.DIFF_EQUAL):
	796	# This is a single edit surrounded by equalities.
	797	equality1 = diffs[pointer - 1][1]
	798	edit = diffs[pointer][1]
	799	equality2 = diffs[pointer + 1][1]
	800
	801	# First, shift the edit as far left as possible.
	802	commonOffset = self.diff_commonSuffix(equality1, edit)
	803	if commonOffset:
	804	commonString = edit[-commonOffset:]
	805	equality1 = equality1[:-commonOffset]
	806	edit = commonString + edit[:-commonOffset]
	807	equality2 = commonString + equality2
	808
	809	# Second, step character by character right, looking for the best fit.
	810	bestEquality1 = equality1
	811	bestEdit = edit
	812	bestEquality2 = equality2
	813	bestScore = (diff_cleanupSemanticScore(equality1, edit) +
	814	diff_cleanupSemanticScore(edit, equality2))
	815	while edit and equality2 and edit[0] == equality2[0]:
	816	equality1 += edit[0]
	817	edit = edit[1:] + equality2[0]
	818	equality2 = equality2[1:]
	819	score = (diff_cleanupSemanticScore(equality1, edit) +
	820	diff_cleanupSemanticScore(edit, equality2))
	821	# The >= encourages trailing rather than leading whitespace on edits.
	822	if score >= bestScore:
	823	bestScore = score
	824	bestEquality1 = equality1
	825	bestEdit = edit
	826	bestEquality2 = equality2
	827
	828	if diffs[pointer - 1][1] != bestEquality1:
	829	# We have an improvement, save it back to the diff.
	830	if bestEquality1:
	831	diffs[pointer - 1] = (diffs[pointer - 1][0], bestEquality1)
	832	else:
	833	del diffs[pointer - 1]
	834	pointer -= 1
	835	diffs[pointer] = (diffs[pointer][0], bestEdit)
	836	if bestEquality2:
	837	diffs[pointer + 1] = (diffs[pointer + 1][0], bestEquality2)
	838	else:
	839	del diffs[pointer + 1]
	840	pointer -= 1
	841	pointer += 1
	842
	843	# Define some regex patterns for matching boundaries.
	844	BLANKLINEEND = re.compile(r"\n\r?\n$");
	845	BLANKLINESTART = re.compile(r"^\r?\n\r?\n");
	846
	847	def diff_cleanupEfficiency(self, diffs):
	848	"""Reduce the number of edits by eliminating operationally trivial
	849	equalities.
	850
	851	Args:
	852	diffs: Array of diff tuples.
	853	"""
	854	changes = False
	855	equalities = [] # Stack of indices where equalities are found.
	856	lastequality = None # Always equal to diffs[equalities[-1]][1]
	857	pointer = 0 # Index of current position.
	858	pre_ins = False # Is there an insertion operation before the last equality.
	859	pre_del = False # Is there a deletion operation before the last equality.
	860	post_ins = False # Is there an insertion operation after the last equality.
	861	post_del = False # Is there a deletion operation after the last equality.
	862	while pointer < len(diffs):
	863	if diffs[pointer][0] == self.DIFF_EQUAL: # Equality found.
	864	if (len(diffs[pointer][1]) < self.Diff_EditCost and
	865	(post_ins or post_del)):
	866	# Candidate found.
	867	equalities.append(pointer)
	868	pre_ins = post_ins
	869	pre_del = post_del
	870	lastequality = diffs[pointer][1]
	871	else:
	872	# Not a candidate, and can never become one.
	873	equalities = []
	874	lastequality = None
	875
	876	post_ins = post_del = False
	877	else: # An insertion or deletion.
	878	if diffs[pointer][0] == self.DIFF_DELETE:
	879	post_del = True
	880	else:
	881	post_ins = True
	882
	883	# Five types to be split:
	884	# <ins>A</ins><del>B</del>XY<ins>C</ins><del>D</del>
	885	# <ins>A</ins>X<ins>C</ins><del>D</del>
	886	# <ins>A</ins><del>B</del>X<ins>C</ins>
	887	# <ins>A</del>X<ins>C</ins><del>D</del>
	888	# <ins>A</ins><del>B</del>X<del>C</del>
	889
	890	if lastequality and ((pre_ins and pre_del and post_ins and post_del) or
	891	((len(lastequality) < self.Diff_EditCost / 2) and
	892	(pre_ins + pre_del + post_ins + post_del) == 3)):
	893	# Duplicate record.
	894	diffs.insert(equalities[-1], (self.DIFF_DELETE, lastequality))
	895	# Change second copy to insert.
	896	diffs[equalities[-1] + 1] = (self.DIFF_INSERT,
	897	diffs[equalities[-1] + 1][1])
	898	equalities.pop() # Throw away the equality we just deleted.
	899	lastequality = None
	900	if pre_ins and pre_del:
	901	# No changes made which could affect previous entry, keep going.
	902	post_ins = post_del = True
	903	equalities = []
	904	else:
	905	if len(equalities):
	906	equalities.pop() # Throw away the previous equality.
	907	if len(equalities):
	908	pointer = equalities[-1]
	909	else:
	910	pointer = -1
	911	post_ins = post_del = False
	912	changes = True
	913	pointer += 1
	914
	915	if changes:
	916	self.diff_cleanupMerge(diffs)
	917
	918	def diff_cleanupMerge(self, diffs):
	919	"""Reorder and merge like edit sections. Merge equalities.
	920	Any edit section can move as long as it doesn't cross an equality.
	921
	922	Args:
	923	diffs: Array of diff tuples.
	924	"""
	925	diffs.append((self.DIFF_EQUAL, '')) # Add a dummy entry at the end.
	926	pointer = 0
	927	count_delete = 0
	928	count_insert = 0
	929	text_delete = ''
	930	text_insert = ''
	931	while pointer < len(diffs):
	932	if diffs[pointer][0] == self.DIFF_INSERT:
	933	count_insert += 1
	934	text_insert += diffs[pointer][1]
	935	pointer += 1
	936	elif diffs[pointer][0] == self.DIFF_DELETE:
	937	count_delete += 1
	938	text_delete += diffs[pointer][1]
	939	pointer += 1
	940	elif diffs[pointer][0] == self.DIFF_EQUAL:
	941	# Upon reaching an equality, check for prior redundancies.
	942	if count_delete + count_insert > 1:
	943	if count_delete != 0 and count_insert != 0:
	944	# Factor out any common prefixies.
	945	commonlength = self.diff_commonPrefix(text_insert, text_delete)
	946	if commonlength != 0:
	947	x = pointer - count_delete - count_insert - 1
	948	if x >= 0 and diffs[x][0] == self.DIFF_EQUAL:
	949	diffs[x] = (diffs[x][0], diffs[x][1] +
	950	text_insert[:commonlength])
	951	else:
	952	diffs.insert(0, (self.DIFF_EQUAL, text_insert[:commonlength]))
	953	pointer += 1
	954	text_insert = text_insert[commonlength:]
	955	text_delete = text_delete[commonlength:]
	956	# Factor out any common suffixies.
	957	commonlength = self.diff_commonSuffix(text_insert, text_delete)
	958	if commonlength != 0:
	959	diffs[pointer] = (diffs[pointer][0], text_insert[-commonlength:] +
	960	diffs[pointer][1])
	961	text_insert = text_insert[:-commonlength]
	962	text_delete = text_delete[:-commonlength]
	963	# Delete the offending records and add the merged ones.
	964	if count_delete == 0:
	965	diffs[pointer - count_insert : pointer] = [
	966	(self.DIFF_INSERT, text_insert)]
	967	elif count_insert == 0:
	968	diffs[pointer - count_delete : pointer] = [
	969	(self.DIFF_DELETE, text_delete)]
	970	else:
	971	diffs[pointer - count_delete - count_insert : pointer] = [
	972	(self.DIFF_DELETE, text_delete),
	973	(self.DIFF_INSERT, text_insert)]
	974	pointer = pointer - count_delete - count_insert + 1
	975	if count_delete != 0:
	976	pointer += 1
	977	if count_insert != 0:
	978	pointer += 1
	979	elif pointer != 0 and diffs[pointer - 1][0] == self.DIFF_EQUAL:
	980	# Merge this equality with the previous one.
	981	diffs[pointer - 1] = (diffs[pointer - 1][0],
	982	diffs[pointer - 1][1] + diffs[pointer][1])
	983	del diffs[pointer]
	984	else:
	985	pointer += 1
	986
	987	count_insert = 0
	988	count_delete = 0
	989	text_delete = ''
	990	text_insert = ''
	991
	992	if diffs[-1][1] == '':
	993	diffs.pop() # Remove the dummy entry at the end.
	994
	995	# Second pass: look for single edits surrounded on both sides by equalities
	996	# which can be shifted sideways to eliminate an equality.
	997	# e.g: A<ins>BA</ins>C -> <ins>AB</ins>AC
	998	changes = False
	999	pointer = 1
	1000	# Intentionally ignore the first and last element (don't need checking).
	1001	while pointer < len(diffs) - 1:
	1002	if (diffs[pointer - 1][0] == self.DIFF_EQUAL and
	1003	diffs[pointer + 1][0] == self.DIFF_EQUAL):
	1004	# This is a single edit surrounded by equalities.
	1005	if diffs[pointer][1].endswith(diffs[pointer - 1][1]):
	1006	# Shift the edit over the previous equality.
	1007	diffs[pointer] = (diffs[pointer][0],
	1008	diffs[pointer - 1][1] +
	1009	diffs[pointer][1][:-len(diffs[pointer - 1][1])])
	1010	diffs[pointer + 1] = (diffs[pointer + 1][0],
	1011	diffs[pointer - 1][1] + diffs[pointer + 1][1])
	1012	del diffs[pointer - 1]
	1013	changes = True
	1014	elif diffs[pointer][1].startswith(diffs[pointer + 1][1]):
	1015	# Shift the edit over the next equality.
	1016	diffs[pointer - 1] = (diffs[pointer - 1][0],
	1017	diffs[pointer - 1][1] + diffs[pointer + 1][1])
	1018	diffs[pointer] = (diffs[pointer][0],
	1019	diffs[pointer][1][len(diffs[pointer + 1][1]):] +
	1020	diffs[pointer + 1][1])
	1021	del diffs[pointer + 1]
	1022	changes = True
	1023	pointer += 1
	1024
	1025	# If shifts were made, the diff needs reordering and another shift sweep.
	1026	if changes:
	1027	self.diff_cleanupMerge(diffs)
	1028
	1029	def diff_xIndex(self, diffs, loc):
	1030	"""loc is a location in text1, compute and return the equivalent location
	1031	in text2. e.g. "The cat" vs "The big cat", 1->1, 5->8
	1032
	1033	Args:
	1034	diffs: Array of diff tuples.
	1035	loc: Location within text1.
	1036
	1037	Returns:
	1038	Location within text2.
	1039	"""
	1040	chars1 = 0
	1041	chars2 = 0
	1042	last_chars1 = 0
	1043	last_chars2 = 0
	1044	for x in xrange(len(diffs)):
	1045	(op, text) = diffs[x]
	1046	if op != self.DIFF_INSERT: # Equality or deletion.
	1047	chars1 += len(text)
	1048	if op != self.DIFF_DELETE: # Equality or insertion.
	1049	chars2 += len(text)
	1050	if chars1 > loc: # Overshot the location.
	1051	break
	1052	last_chars1 = chars1
	1053	last_chars2 = chars2
	1054
	1055	if len(diffs) != x and diffs[x][0] == self.DIFF_DELETE:
	1056	# The location was deleted.
	1057	return last_chars2
	1058	# Add the remaining len(character).
	1059	return last_chars2 + (loc - last_chars1)
	1060
	1061	def diff_prettyHtml(self, diffs):
	1062	"""Convert a diff array into a pretty HTML report.
	1063
	1064	Args:
	1065	diffs: Array of diff tuples.
	1066
	1067	Returns:
	1068	HTML representation.
	1069	"""
	1070	html = []
	1071	for (op, data) in diffs:
	1072	text = (data.replace("&", "&").replace("<", "<")
	1073	.replace(">", ">").replace("\n", "¶<br>"))
	1074	if op == self.DIFF_INSERT:
	1075	html.append("<ins style=\"background:#e6ffe6;\">%s</ins>" % text)
	1076	elif op == self.DIFF_DELETE:
	1077	html.append("<del style=\"background:#ffe6e6;\">%s</del>" % text)
	1078	elif op == self.DIFF_EQUAL:
	1079	html.append("<span>%s</span>" % text)
	1080	return "".join(html)
	1081
	1082	def diff_text1(self, diffs):
	1083	"""Compute and return the source text (all equalities and deletions).
	1084
	1085	Args:
	1086	diffs: Array of diff tuples.
	1087
	1088	Returns:
	1089	Source text.
	1090	"""
	1091	text = []
	1092	for (op, data) in diffs:
	1093	if op != self.DIFF_INSERT:
	1094	text.append(data)
	1095	return "".join(text)
	1096
	1097	def diff_text2(self, diffs):
	1098	"""Compute and return the destination text (all equalities and insertions).
	1099
	1100	Args:
	1101	diffs: Array of diff tuples.
	1102
	1103	Returns:
	1104	Destination text.
	1105	"""
	1106	text = []
	1107	for (op, data) in diffs:
	1108	if op != self.DIFF_DELETE:
	1109	text.append(data)
	1110	return "".join(text)
	1111
	1112	def diff_levenshtein(self, diffs):
	1113	"""Compute the Levenshtein distance; the number of inserted, deleted or
	1114	substituted characters.
	1115
	1116	Args:
	1117	diffs: Array of diff tuples.
	1118
	1119	Returns:
	1120	Number of changes.
	1121	"""
	1122	levenshtein = 0
	1123	insertions = 0
	1124	deletions = 0
	1125	for (op, data) in diffs:
	1126	if op == self.DIFF_INSERT:
	1127	insertions += len(data)
	1128	elif op == self.DIFF_DELETE:
	1129	deletions += len(data)
	1130	elif op == self.DIFF_EQUAL:
	1131	# A deletion and an insertion is one substitution.
	1132	levenshtein += max(insertions, deletions)
	1133	insertions = 0
	1134	deletions = 0
	1135	levenshtein += max(insertions, deletions)
	1136	return levenshtein
	1137
	1138	def diff_toDelta(self, diffs):
	1139	"""Crush the diff into an encoded string which describes the operations
	1140	required to transform text1 into text2.
	1141	E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'.
	1142	Operations are tab-separated. Inserted text is escaped using %xx notation.
	1143
	1144	Args:
	1145	diffs: Array of diff tuples.
	1146
	1147	Returns:
	1148	Delta text.
	1149	"""
	1150	text = []
	1151	for (op, data) in diffs:
	1152	if op == self.DIFF_INSERT:
	1153	# High ascii will raise UnicodeDecodeError. Use Unicode instead.
	1154	data = data.encode("utf-8")
	1155	text.append("+" + urllib.quote(data, "!~*'();/?:@&=+$,# "))
	1156	elif op == self.DIFF_DELETE:
	1157	text.append("-%d" % len(data))
	1158	elif op == self.DIFF_EQUAL:
	1159	text.append("=%d" % len(data))
	1160	return "\t".join(text)
	1161
	1162	def diff_fromDelta(self, text1, delta):
	1163	"""Given the original text1, and an encoded string which describes the
	1164	operations required to transform text1 into text2, compute the full diff.
	1165
	1166	Args:
	1167	text1: Source string for the diff.
	1168	delta: Delta text.
	1169
	1170	Returns:
	1171	Array of diff tuples.
	1172
	1173	Raises:
	1174	ValueError: If invalid input.
	1175	"""
	1176	if type(delta) == unicode:
	1177	# Deltas should be composed of a subset of ascii chars, Unicode not
	1178	# required. If this encode raises UnicodeEncodeError, delta is invalid.
	1179	delta = delta.encode("ascii")
	1180	diffs = []
	1181	pointer = 0 # Cursor in text1
	1182	tokens = delta.split("\t")
	1183	for token in tokens:
	1184	if token == "":
	1185	# Blank tokens are ok (from a trailing \t).
	1186	continue
	1187	# Each token begins with a one character parameter which specifies the
	1188	# operation of this token (delete, insert, equality).
	1189	param = token[1:]
	1190	if token[0] == "+":
	1191	param = urllib.unquote(param).decode("utf-8")
	1192	diffs.append((self.DIFF_INSERT, param))
	1193	elif token[0] == "-" or token[0] == "=":
	1194	try:
	1195	n = int(param)
	1196	except ValueError:
	1197	raise ValueError("Invalid number in diff_fromDelta: " + param)
	1198	if n < 0:
	1199	raise ValueError("Negative number in diff_fromDelta: " + param)
	1200	text = text1[pointer : pointer + n]
	1201	pointer += n
	1202	if token[0] == "=":
	1203	diffs.append((self.DIFF_EQUAL, text))
	1204	else:
	1205	diffs.append((self.DIFF_DELETE, text))
	1206	else:
	1207	# Anything else is an error.
	1208	raise ValueError("Invalid diff operation in diff_fromDelta: " +
	1209	token[0])
	1210	if pointer != len(text1):
	1211	raise ValueError(
	1212	"Delta length (%d) does not equal source text length (%d)." %
	1213	(pointer, len(text1)))
	1214	return diffs
	1215
	1216	# MATCH FUNCTIONS
	1217
	1218	def match_main(self, text, pattern, loc):
	1219	"""Locate the best instance of 'pattern' in 'text' near 'loc'.
	1220
	1221	Args:
	1222	text: The text to search.
	1223	pattern: The pattern to search for.
	1224	loc: The location to search around.
	1225
	1226	Returns:
	1227	Best match index or -1.
	1228	"""
	1229	# Check for null inputs.
	1230	if text == None or pattern == None:
	1231	raise ValueError("Null inputs. (match_main)")
	1232
	1233	loc = max(0, min(loc, len(text)))
	1234	if text == pattern:
	1235	# Shortcut (potentially not guaranteed by the algorithm)
	1236	return 0
	1237	elif not text:
	1238	# Nothing to match.
	1239	return -1
	1240	elif text[loc:loc + len(pattern)] == pattern:
	1241	# Perfect match at the perfect spot! (Includes case of null pattern)
	1242	return loc
	1243	else:
	1244	# Do a fuzzy compare.
	1245	match = self.match_bitap(text, pattern, loc)
	1246	return match
	1247
	1248	def match_bitap(self, text, pattern, loc):
	1249	"""Locate the best instance of 'pattern' in 'text' near 'loc' using the
	1250	Bitap algorithm.
	1251
	1252	Args:
	1253	text: The text to search.
	1254	pattern: The pattern to search for.
	1255	loc: The location to search around.
	1256
	1257	Returns:
	1258	Best match index or -1.
	1259	"""
	1260	# Python doesn't have a maxint limit, so ignore this check.
	1261	#if self.Match_MaxBits != 0 and len(pattern) > self.Match_MaxBits:
	1262	# raise ValueError("Pattern too long for this application.")
	1263
	1264	# Initialise the alphabet.
	1265	s = self.match_alphabet(pattern)
	1266
	1267	def match_bitapScore(e, x):
	1268	"""Compute and return the score for a match with e errors and x location.
	1269	Accesses loc and pattern through being a closure.
	1270
	1271	Args:
	1272	e: Number of errors in match.
	1273	x: Location of match.
	1274
	1275	Returns:
	1276	Overall score for match (0.0 = good, 1.0 = bad).
	1277	"""
	1278	accuracy = float(e) / len(pattern)
	1279	proximity = abs(loc - x)
	1280	if not self.Match_Distance:
	1281	# Dodge divide by zero error.
	1282	return proximity and 1.0 or accuracy
	1283	return accuracy + (proximity / float(self.Match_Distance))
	1284
	1285	# Highest score beyond which we give up.
	1286	score_threshold = self.Match_Threshold
	1287	# Is there a nearby exact match? (speedup)
	1288	best_loc = text.find(pattern, loc)
	1289	if best_loc != -1:
	1290	score_threshold = min(match_bitapScore(0, best_loc), score_threshold)
	1291	# What about in the other direction? (speedup)
	1292	best_loc = text.rfind(pattern, loc + len(pattern))
	1293	if best_loc != -1:
	1294	score_threshold = min(match_bitapScore(0, best_loc), score_threshold)
	1295
	1296	# Initialise the bit arrays.
	1297	matchmask = 1 << (len(pattern) - 1)
	1298	best_loc = -1
	1299
	1300	bin_max = len(pattern) + len(text)
	1301	# Empty initialization added to appease pychecker.
	1302	last_rd = None
	1303	for d in xrange(len(pattern)):
	1304	# Scan for the best match each iteration allows for one more error.
	1305	# Run a binary search to determine how far from 'loc' we can stray at
	1306	# this error level.
	1307	bin_min = 0
	1308	bin_mid = bin_max
	1309	while bin_min < bin_mid:
	1310	if match_bitapScore(d, loc + bin_mid) <= score_threshold:
	1311	bin_min = bin_mid
	1312	else:
	1313	bin_max = bin_mid
	1314	bin_mid = (bin_max - bin_min) // 2 + bin_min
	1315
	1316	# Use the result from this iteration as the maximum for the next.
	1317	bin_max = bin_mid
	1318	start = max(1, loc - bin_mid + 1)
	1319	finish = min(loc + bin_mid, len(text)) + len(pattern)
	1320
	1321	rd = [0] * (finish + 2)
	1322	rd[finish + 1] = (1 << d) - 1
	1323	for j in xrange(finish, start - 1, -1):
	1324	if len(text) <= j - 1:
	1325	# Out of range.
	1326	charMatch = 0
	1327	else:
	1328	charMatch = s.get(text[j - 1], 0)
	1329	if d == 0: # First pass: exact match.
	1330	rd[j] = ((rd[j + 1] << 1) \| 1) & charMatch
	1331	else: # Subsequent passes: fuzzy match.
	1332	rd[j] = (((rd[j + 1] << 1) \| 1) & charMatch) \| (
	1333	((last_rd[j + 1] \| last_rd[j]) << 1) \| 1) \| last_rd[j + 1]
	1334	if rd[j] & matchmask:
	1335	score = match_bitapScore(d, j - 1)
	1336	# This match will almost certainly be better than any existing match.
	1337	# But check anyway.
	1338	if score <= score_threshold:
	1339	# Told you so.
	1340	score_threshold = score
	1341	best_loc = j - 1
	1342	if best_loc > loc:
	1343	# When passing loc, don't exceed our current distance from loc.
	1344	start = max(1, 2 * loc - best_loc)
	1345	else:
	1346	# Already passed loc, downhill from here on in.
	1347	break
	1348	# No hope for a (better) match at greater error levels.
	1349	if match_bitapScore(d + 1, loc) > score_threshold:
	1350	break
	1351	last_rd = rd
	1352	return best_loc
	1353
	1354	def match_alphabet(self, pattern):
	1355	"""Initialise the alphabet for the Bitap algorithm.
	1356
	1357	Args:
	1358	pattern: The text to encode.
	1359
	1360	Returns:
	1361	Hash of character locations.
	1362	"""
	1363	s = {}
	1364	for char in pattern:
	1365	s[char] = 0
	1366	for i in xrange(len(pattern)):
	1367	s[pattern[i]] \|= 1 << (len(pattern) - i - 1)
	1368	return s
	1369
	1370	# PATCH FUNCTIONS
	1371
	1372	def patch_addContext(self, patch, text):
	1373	"""Increase the context until it is unique,
	1374	but don't let the pattern expand beyond Match_MaxBits.
	1375
	1376	Args:
	1377	patch: The patch to grow.
	1378	text: Source text.
	1379	"""
	1380	if len(text) == 0:
	1381	return
	1382	pattern = text[patch.start2 : patch.start2 + patch.length1]
	1383	padding = 0
	1384
	1385	# Look for the first and last matches of pattern in text. If two different
	1386	# matches are found, increase the pattern length.
	1387	while (text.find(pattern) != text.rfind(pattern) and (self.Match_MaxBits ==
	1388	0 or len(pattern) < self.Match_MaxBits - self.Patch_Margin -
	1389	self.Patch_Margin)):
	1390	padding += self.Patch_Margin
	1391	pattern = text[max(0, patch.start2 - padding) :
	1392	patch.start2 + patch.length1 + padding]
	1393	# Add one chunk for good luck.
	1394	padding += self.Patch_Margin
	1395
	1396	# Add the prefix.
	1397	prefix = text[max(0, patch.start2 - padding) : patch.start2]
	1398	if prefix:
	1399	patch.diffs[:0] = [(self.DIFF_EQUAL, prefix)]
	1400	# Add the suffix.
	1401	suffix = text[patch.start2 + patch.length1 :
	1402	patch.start2 + patch.length1 + padding]
	1403	if suffix:
	1404	patch.diffs.append((self.DIFF_EQUAL, suffix))
	1405
	1406	# Roll back the start points.
	1407	patch.start1 -= len(prefix)
	1408	patch.start2 -= len(prefix)
	1409	# Extend lengths.
	1410	patch.length1 += len(prefix) + len(suffix)
	1411	patch.length2 += len(prefix) + len(suffix)
	1412
	1413	def patch_make(self, a, b=None, c=None):
	1414	"""Compute a list of patches to turn text1 into text2.
	1415	Use diffs if provided, otherwise compute it ourselves.
	1416	There are four ways to call this function, depending on what data is
	1417	available to the caller:
	1418	Method 1:
	1419	a = text1, b = text2
	1420	Method 2:
	1421	a = diffs
	1422	Method 3 (optimal):
	1423	a = text1, b = diffs
	1424	Method 4 (deprecated, use method 3):
	1425	a = text1, b = text2, c = diffs
	1426
	1427	Args:
	1428	a: text1 (methods 1,3,4) or Array of diff tuples for text1 to
	1429	text2 (method 2).
	1430	b: text2 (methods 1,4) or Array of diff tuples for text1 to
	1431	text2 (method 3) or undefined (method 2).
	1432	c: Array of diff tuples for text1 to text2 (method 4) or
	1433	undefined (methods 1,2,3).
	1434
	1435	Returns:
	1436	Array of Patch objects.
	1437	"""
	1438	text1 = None
	1439	diffs = None
	1440	# Note that texts may arrive as 'str' or 'unicode'.
	1441	if isinstance(a, basestring) and isinstance(b, basestring) and c is None:
	1442	# Method 1: text1, text2
	1443	# Compute diffs from text1 and text2.
	1444	text1 = a
	1445	diffs = self.diff_main(text1, b, True)
	1446	if len(diffs) > 2:
	1447	self.diff_cleanupSemantic(diffs)
	1448	self.diff_cleanupEfficiency(diffs)
	1449	elif isinstance(a, list) and b is None and c is None:
	1450	# Method 2: diffs
	1451	# Compute text1 from diffs.
	1452	diffs = a
	1453	text1 = self.diff_text1(diffs)
	1454	elif isinstance(a, basestring) and isinstance(b, list) and c is None:
	1455	# Method 3: text1, diffs
	1456	text1 = a
	1457	diffs = b
	1458	elif (isinstance(a, basestring) and isinstance(b, basestring) and
	1459	isinstance(c, list)):
	1460	# Method 4: text1, text2, diffs
	1461	# text2 is not used.
	1462	text1 = a
	1463	diffs = c
	1464	else:
	1465	raise ValueError("Unknown call format to patch_make.")
	1466
	1467	if not diffs:
	1468	return [] # Get rid of the None case.
	1469	patches = []
	1470	patch = patch_obj()
	1471	char_count1 = 0 # Number of characters into the text1 string.
	1472	char_count2 = 0 # Number of characters into the text2 string.
	1473	prepatch_text = text1 # Recreate the patches to determine context info.
	1474	postpatch_text = text1
	1475	for x in xrange(len(diffs)):
	1476	(diff_type, diff_text) = diffs[x]
	1477	if len(patch.diffs) == 0 and diff_type != self.DIFF_EQUAL:
	1478	# A new patch starts here.
	1479	patch.start1 = char_count1
	1480	patch.start2 = char_count2
	1481	if diff_type == self.DIFF_INSERT:
	1482	# Insertion
	1483	patch.diffs.append(diffs[x])
	1484	patch.length2 += len(diff_text)
	1485	postpatch_text = (postpatch_text[:char_count2] + diff_text +
	1486	postpatch_text[char_count2:])
	1487	elif diff_type == self.DIFF_DELETE:
	1488	# Deletion.
	1489	patch.length1 += len(diff_text)
	1490	patch.diffs.append(diffs[x])
	1491	postpatch_text = (postpatch_text[:char_count2] +
	1492	postpatch_text[char_count2 + len(diff_text):])
	1493	elif (diff_type == self.DIFF_EQUAL and
	1494	len(diff_text) <= 2 * self.Patch_Margin and
	1495	len(patch.diffs) != 0 and len(diffs) != x + 1):
	1496	# Small equality inside a patch.
	1497	patch.diffs.append(diffs[x])
	1498	patch.length1 += len(diff_text)
	1499	patch.length2 += len(diff_text)
	1500
	1501	if (diff_type == self.DIFF_EQUAL and
	1502	len(diff_text) >= 2 * self.Patch_Margin):
	1503	# Time for a new patch.
	1504	if len(patch.diffs) != 0:
	1505	self.patch_addContext(patch, prepatch_text)
	1506	patches.append(patch)
	1507	patch = patch_obj()
	1508	# Unlike Unidiff, our patch lists have a rolling context.
	1509	# http://code.google.com/p/google-diff-match-patch/wiki/Unidiff
	1510	# Update prepatch text & pos to reflect the application of the
	1511	# just completed patch.
	1512	prepatch_text = postpatch_text
	1513	char_count1 = char_count2
	1514
	1515	# Update the current character count.
	1516	if diff_type != self.DIFF_INSERT:
	1517	char_count1 += len(diff_text)
	1518	if diff_type != self.DIFF_DELETE:
	1519	char_count2 += len(diff_text)
	1520
	1521	# Pick up the leftover patch if not empty.
	1522	if len(patch.diffs) != 0:
	1523	self.patch_addContext(patch, prepatch_text)
	1524	patches.append(patch)
	1525	return patches
	1526
	1527	def patch_deepCopy(self, patches):
	1528	"""Given an array of patches, return another array that is identical.
	1529
	1530	Args:
	1531	patches: Array of Patch objects.
	1532
	1533	Returns:
	1534	Array of Patch objects.
	1535	"""
	1536	patchesCopy = []
	1537	for patch in patches:
	1538	patchCopy = patch_obj()
	1539	# No need to deep copy the tuples since they are immutable.
	1540	patchCopy.diffs = patch.diffs[:]
	1541	patchCopy.start1 = patch.start1
	1542	patchCopy.start2 = patch.start2
	1543	patchCopy.length1 = patch.length1
	1544	patchCopy.length2 = patch.length2
	1545	patchesCopy.append(patchCopy)
	1546	return patchesCopy
	1547
	1548	def patch_apply(self, patches, text):
	1549	"""Merge a set of patches onto the text. Return a patched text, as well
	1550	as a list of true/false values indicating which patches were applied.
	1551
	1552	Args:
	1553	patches: Array of Patch objects.
	1554	text: Old text.
	1555
	1556	Returns:
	1557	Two element Array, containing the new text and an array of boolean values.
	1558	"""
	1559	if not patches:
	1560	return (text, [])
	1561
	1562	# Deep copy the patches so that no changes are made to originals.
	1563	patches = self.patch_deepCopy(patches)
	1564
	1565	nullPadding = self.patch_addPadding(patches)
	1566	text = nullPadding + text + nullPadding
	1567	self.patch_splitMax(patches)
	1568
	1569	# delta keeps track of the offset between the expected and actual location
	1570	# of the previous patch. If there are patches expected at positions 10 and
	1571	# 20, but the first patch was found at 12, delta is 2 and the second patch
	1572	# has an effective expected position of 22.
	1573	delta = 0
	1574	results = []
	1575	for patch in patches:
	1576	expected_loc = patch.start2 + delta
	1577	text1 = self.diff_text1(patch.diffs)
	1578	end_loc = -1
	1579	if len(text1) > self.Match_MaxBits:
	1580	# patch_splitMax will only provide an oversized pattern in the case of
	1581	# a monster delete.
	1582	start_loc = self.match_main(text, text1[:self.Match_MaxBits],
	1583	expected_loc)
	1584	if start_loc != -1:
	1585	end_loc = self.match_main(text, text1[-self.Match_MaxBits:],
	1586	expected_loc + len(text1) - self.Match_MaxBits)
	1587	if end_loc == -1 or start_loc >= end_loc:
	1588	# Can't find valid trailing context. Drop this patch.
	1589	start_loc = -1
	1590	else:
	1591	start_loc = self.match_main(text, text1, expected_loc)
	1592	if start_loc == -1:
	1593	# No match found. :(
	1594	results.append(False)
	1595	# Subtract the delta for this failed patch from subsequent patches.
	1596	delta -= patch.length2 - patch.length1
	1597	else:
	1598	# Found a match. :)
	1599	results.append(True)
	1600	delta = start_loc - expected_loc
	1601	if end_loc == -1:
	1602	text2 = text[start_loc : start_loc + len(text1)]
	1603	else:
	1604	text2 = text[start_loc : end_loc + self.Match_MaxBits]
	1605	if text1 == text2:
	1606	# Perfect match, just shove the replacement text in.
	1607	text = (text[:start_loc] + self.diff_text2(patch.diffs) +
	1608	text[start_loc + len(text1):])
	1609	else:
	1610	# Imperfect match.
	1611	# Run a diff to get a framework of equivalent indices.
	1612	diffs = self.diff_main(text1, text2, False)
	1613	if (len(text1) > self.Match_MaxBits and
	1614	self.diff_levenshtein(diffs) / float(len(text1)) >
	1615	self.Patch_DeleteThreshold):
	1616	# The end points match, but the content is unacceptably bad.
	1617	results[-1] = False
	1618	else:
	1619	self.diff_cleanupSemanticLossless(diffs)
	1620	index1 = 0
	1621	for (op, data) in patch.diffs:
	1622	if op != self.DIFF_EQUAL:
	1623	index2 = self.diff_xIndex(diffs, index1)
	1624	if op == self.DIFF_INSERT: # Insertion
	1625	text = text[:start_loc + index2] + data + text[start_loc +
	1626	index2:]
	1627	elif op == self.DIFF_DELETE: # Deletion
	1628	text = text[:start_loc + index2] + text[start_loc +
	1629	self.diff_xIndex(diffs, index1 + len(data)):]
	1630	if op != self.DIFF_DELETE:
	1631	index1 += len(data)
	1632	# Strip the padding off.
	1633	text = text[len(nullPadding):-len(nullPadding)]
	1634	return (text, results)
	1635
	1636	def patch_addPadding(self, patches):
	1637	"""Add some padding on text start and end so that edges can match
	1638	something. Intended to be called only from within patch_apply.
	1639
	1640	Args:
	1641	patches: Array of Patch objects.
	1642
	1643	Returns:
	1644	The padding string added to each side.
	1645	"""
	1646	paddingLength = self.Patch_Margin
	1647	nullPadding = ""
	1648	for x in xrange(1, paddingLength + 1):
	1649	nullPadding += chr(x)
	1650
	1651	# Bump all the patches forward.
	1652	for patch in patches:
	1653	patch.start1 += paddingLength
	1654	patch.start2 += paddingLength
	1655
	1656	# Add some padding on start of first diff.
	1657	patch = patches[0]
	1658	diffs = patch.diffs
	1659	if not diffs or diffs[0][0] != self.DIFF_EQUAL:
	1660	# Add nullPadding equality.
	1661	diffs.insert(0, (self.DIFF_EQUAL, nullPadding))
	1662	patch.start1 -= paddingLength # Should be 0.
	1663	patch.start2 -= paddingLength # Should be 0.
	1664	patch.length1 += paddingLength
	1665	patch.length2 += paddingLength
	1666	elif paddingLength > len(diffs[0][1]):
	1667	# Grow first equality.
	1668	extraLength = paddingLength - len(diffs[0][1])
	1669	newText = nullPadding[len(diffs[0][1]):] + diffs[0][1]
	1670	diffs[0] = (diffs[0][0], newText)
	1671	patch.start1 -= extraLength
	1672	patch.start2 -= extraLength
	1673	patch.length1 += extraLength
	1674	patch.length2 += extraLength
	1675
	1676	# Add some padding on end of last diff.
	1677	patch = patches[-1]
	1678	diffs = patch.diffs
	1679	if not diffs or diffs[-1][0] != self.DIFF_EQUAL:
	1680	# Add nullPadding equality.
	1681	diffs.append((self.DIFF_EQUAL, nullPadding))
	1682	patch.length1 += paddingLength
	1683	patch.length2 += paddingLength
	1684	elif paddingLength > len(diffs[-1][1]):
	1685	# Grow last equality.
	1686	extraLength = paddingLength - len(diffs[-1][1])
	1687	newText = diffs[-1][1] + nullPadding[:extraLength]
	1688	diffs[-1] = (diffs[-1][0], newText)
	1689	patch.length1 += extraLength
	1690	patch.length2 += extraLength
	1691
	1692	return nullPadding
	1693
	1694	def patch_splitMax(self, patches):
	1695	"""Look through the patches and break up any which are longer than the
	1696	maximum limit of the match algorithm.
	1697	Intended to be called only from within patch_apply.
	1698
	1699	Args:
	1700	patches: Array of Patch objects.
	1701	"""
	1702	patch_size = self.Match_MaxBits
	1703	if patch_size == 0:
	1704	# Python has the option of not splitting strings due to its ability
	1705	# to handle integers of arbitrary precision.
	1706	return
	1707	for x in xrange(len(patches)):
	1708	if patches[x].length1 <= patch_size:
	1709	continue
	1710	bigpatch = patches[x]
	1711	# Remove the big old patch.
	1712	del patches[x]
	1713	x -= 1
	1714	start1 = bigpatch.start1
	1715	start2 = bigpatch.start2
	1716	precontext = ''
	1717	while len(bigpatch.diffs) != 0:
	1718	# Create one of several smaller patches.
	1719	patch = patch_obj()
	1720	empty = True
	1721	patch.start1 = start1 - len(precontext)
	1722	patch.start2 = start2 - len(precontext)
	1723	if precontext:
	1724	patch.length1 = patch.length2 = len(precontext)
	1725	patch.diffs.append((self.DIFF_EQUAL, precontext))
	1726
	1727	while (len(bigpatch.diffs) != 0 and
	1728	patch.length1 < patch_size - self.Patch_Margin):
	1729	(diff_type, diff_text) = bigpatch.diffs[0]
	1730	if diff_type == self.DIFF_INSERT:
	1731	# Insertions are harmless.
	1732	patch.length2 += len(diff_text)
	1733	start2 += len(diff_text)
	1734	patch.diffs.append(bigpatch.diffs.pop(0))
	1735	empty = False
	1736	elif (diff_type == self.DIFF_DELETE and len(patch.diffs) == 1 and
	1737	patch.diffs[0][0] == self.DIFF_EQUAL and
	1738	len(diff_text) > 2 * patch_size):
	1739	# This is a large deletion. Let it pass in one chunk.
	1740	patch.length1 += len(diff_text)
	1741	start1 += len(diff_text)
	1742	empty = False
	1743	patch.diffs.append((diff_type, diff_text))
	1744	del bigpatch.diffs[0]
	1745	else:
	1746	# Deletion or equality. Only take as much as we can stomach.
	1747	diff_text = diff_text[:patch_size - patch.length1 -
	1748	self.Patch_Margin]
	1749	patch.length1 += len(diff_text)
	1750	start1 += len(diff_text)
	1751	if diff_type == self.DIFF_EQUAL:
	1752	patch.length2 += len(diff_text)
	1753	start2 += len(diff_text)
	1754	else:
	1755	empty = False
	1756
	1757	patch.diffs.append((diff_type, diff_text))
	1758	if diff_text == bigpatch.diffs[0][1]:
	1759	del bigpatch.diffs[0]
	1760	else:
	1761	bigpatch.diffs[0] = (bigpatch.diffs[0][0],
	1762	bigpatch.diffs[0][1][len(diff_text):])
	1763
	1764	# Compute the head context for the next patch.
	1765	precontext = self.diff_text2(patch.diffs)
	1766	precontext = precontext[-self.Patch_Margin:]
	1767	# Append the end context for this patch.
	1768	postcontext = self.diff_text1(bigpatch.diffs)[:self.Patch_Margin]
	1769	if postcontext:
	1770	patch.length1 += len(postcontext)
	1771	patch.length2 += len(postcontext)
	1772	if len(patch.diffs) != 0 and patch.diffs[-1][0] == self.DIFF_EQUAL:
	1773	patch.diffs[-1] = (self.DIFF_EQUAL, patch.diffs[-1][1] +
	1774	postcontext)
	1775	else:
	1776	patch.diffs.append((self.DIFF_EQUAL, postcontext))
	1777
	1778	if not empty:
	1779	x += 1
	1780	patches.insert(x, patch)
	1781
	1782	def patch_toText(self, patches):
	1783	"""Take a list of patches and return a textual representation.
	1784
	1785	Args:
	1786	patches: Array of Patch objects.
	1787
	1788	Returns:
	1789	Text representation of patches.
	1790	"""
	1791	text = []
	1792	for patch in patches:
	1793	text.append(str(patch))
	1794	return "".join(text)
	1795
	1796	def patch_fromText(self, textline):
	1797	"""Parse a textual representation of patches and return a list of patch
	1798	objects.
	1799
	1800	Args:
	1801	textline: Text representation of patches.
	1802
	1803	Returns:
	1804	Array of Patch objects.
	1805
	1806	Raises:
	1807	ValueError: If invalid input.
	1808	"""
	1809	if type(textline) == unicode:
	1810	# Patches should be composed of a subset of ascii chars, Unicode not
	1811	# required. If this encode raises UnicodeEncodeError, patch is invalid.
	1812	textline = textline.encode("ascii")
	1813	patches = []
	1814	if not textline:
	1815	return patches
	1816	text = textline.split('\n')
	1817	while len(text) != 0:
	1818	m = re.match("^@@ -(\d+),?(\d) \+(\d+),?(\d) @@$", text[0])
	1819	if not m:
	1820	raise ValueError("Invalid patch string: " + text[0])
	1821	patch = patch_obj()
	1822	patches.append(patch)
	1823	patch.start1 = int(m.group(1))
	1824	if m.group(2) == '':
	1825	patch.start1 -= 1
	1826	patch.length1 = 1
	1827	elif m.group(2) == '0':
	1828	patch.length1 = 0
	1829	else:
	1830	patch.start1 -= 1
	1831	patch.length1 = int(m.group(2))
	1832
	1833	patch.start2 = int(m.group(3))
	1834	if m.group(4) == '':
	1835	patch.start2 -= 1
	1836	patch.length2 = 1
	1837	elif m.group(4) == '0':
	1838	patch.length2 = 0
	1839	else:
	1840	patch.start2 -= 1
	1841	patch.length2 = int(m.group(4))
	1842
	1843	del text[0]
	1844
	1845	while len(text) != 0:
	1846	if text[0]:
	1847	sign = text[0][0]
	1848	else:
	1849	sign = ''
	1850	line = urllib.unquote(text[0][1:])
	1851	line = line.decode("utf-8")
	1852	if sign == '+':
	1853	# Insertion.
	1854	patch.diffs.append((self.DIFF_INSERT, line))
	1855	elif sign == '-':
	1856	# Deletion.
	1857	patch.diffs.append((self.DIFF_DELETE, line))
	1858	elif sign == ' ':
	1859	# Minor equality.
	1860	patch.diffs.append((self.DIFF_EQUAL, line))
	1861	elif sign == '@':
	1862	# Start of next patch.
	1863	break
	1864	elif sign == '':
	1865	# Blank line? Whatever.
	1866	pass
	1867	else:
	1868	# WTF?
	1869	raise ValueError("Invalid patch mode: '%s'\n%s" % (sign, line))
	1870	del text[0]
	1871	return patches
	1872
	1873
	1874	class patch_obj:
	1875	"""Class representing one patch operation.
	1876	"""
	1877
	1878	def __init__(self):
	1879	"""Initializes with an empty list of diffs.
	1880	"""
	1881	self.diffs = []
	1882	self.start1 = None
	1883	self.start2 = None
	1884	self.length1 = 0
	1885	self.length2 = 0
	1886
	1887	def __str__(self):
	1888	"""Emmulate GNU diff's format.
	1889	Header: @@ -382,8 +481,9 @@
	1890	Indicies are printed as 1-based, not 0-based.
	1891
	1892	Returns:
	1893	The GNU diff string.
	1894	"""
	1895	if self.length1 == 0:
	1896	coords1 = str(self.start1) + ",0"
	1897	elif self.length1 == 1:
	1898	coords1 = str(self.start1 + 1)
	1899	else:
	1900	coords1 = str(self.start1 + 1) + "," + str(self.length1)
	1901	if self.length2 == 0:
	1902	coords2 = str(self.start2) + ",0"
	1903	elif self.length2 == 1:
	1904	coords2 = str(self.start2 + 1)
	1905	else:
	1906	coords2 = str(self.start2 + 1) + "," + str(self.length2)
	1907	text = ["@@ -", coords1, " +", coords2, " @@\n"]
	1908	# Escape the body of the patch with %xx notation.
	1909	for (op, data) in self.diffs:
	1910	if op == diff_match_patch.DIFF_INSERT:
	1911	text.append("+")
	1912	elif op == diff_match_patch.DIFF_DELETE:
	1913	text.append("-")
	1914	elif op == diff_match_patch.DIFF_EQUAL:
	1915	text.append(" ")
	1916	# High ascii will raise UnicodeDecodeError. Use Unicode instead.
	1917	data = data.encode("utf-8")
	1918	text.append(urllib.quote(data, "!~*'();/?:@&=+$,# ") + "\n")
	1919	return "".join(text) No newline at end of file

rhodecode/templates/codeblocks/diffs.html

0 created 644 +398 0

			@@ -0,0 +1,398 b''
		1	<%def name="diff_line_anchor(filename, line, type)"><%
		2	return '%s_%s_%i' % (h.safeid(filename), type, line)
		3	%></%def>
		4
		5	<%def name="action_class(action)"><%
		6	return {
		7	'-': 'cb-deletion',
		8	'+': 'cb-addition',
		9	' ': 'cb-context',
		10	}.get(action, 'cb-empty')
		11	%></%def>
		12
		13	<%def name="op_class(op_id)"><%
		14	return {
		15	DEL_FILENODE: 'deletion', # file deleted
		16	BIN_FILENODE: 'warning' # binary diff hidden
		17	}.get(op_id, 'addition')
		18	%></%def>
		19
		20	<%def name="link_for(**kw)"><%
		21	new_args = request.GET.mixed()
		22	new_args.update(kw)
		23	return h.url('', **new_args)
		24	%></%def>
		25
		26	<%def name="render_diffset(diffset,
		27
		28	# collapse all file diff entries when there are more than this amount of files in the diff
		29	collapse_when_files_over=20,
		30
		31	# collapse lines in the diff when more than this amount of lines changed in the file diff
		32	lines_changed_limit=500,
		33	)">
		34	<%
		35	# TODO: dan: move this to an argument - and set a cookie so that it is saved
		36	# default option for future requests
		37	diff_mode = request.GET.get('diffmode', 'sideside')
		38	if diff_mode not in ('sideside', 'unified'):
		39	diff_mode = 'sideside'
		40
		41	collapse_all = len(diffset.files) > collapse_when_files_over
		42	%>
		43
		44	%if diff_mode == 'sideside':
		45	<style>
		46	.wrapper {
		47	max-width: 1600px !important;
		48	}
		49	</style>
		50	%endif
		51
		52	% if diffset.limited_diff:
		53	<div class="alert alert-warning">
		54	${_('The requested commit is too big and content was truncated.')} <a href="${link_for(fulldiff=1)}" onclick="return confirm('${_("Showing a big diff might take some time and resources, continue?")}')">${_('Show full diff')}</a>
		55	</div>
		56	% endif
		57
		58	<div class="cs_files">
		59	<div class="cs_files_title">
		60	%if diffset.files:
		61	<div class="pull-right">
		62	<div class="btn-group">
		63	<a
		64	class="btn ${diff_mode == 'sideside' and 'btn-primary'} tooltip"
		65	title="${_('View side by side')}"
		66	href="${link_for(diffmode='sideside')}">
		67	<span>${_('Side by Side')}</span>
		68	</a>
		69	<a
		70	class="btn ${diff_mode == 'unified' and 'btn-primary'} tooltip"
		71	title="${_('View unified')}" href="${link_for(diffmode='unified')}">
		72	<span>${_('Unified')}</span>
		73	</a>
		74	</div>
		75	</div>
		76	<div class="pull-left">
		77	<div class="btn-group">
		78	<a
		79	class="btn"
		80	href="#"
		81	onclick="$('input[class=diff-collapse-state]').prop('checked', false); return false">${_('Expand All')}</a>
		82	<a
		83	class="btn"
		84	href="#"
		85	onclick="$('input[class=diff-collapse-state]').prop('checked', true); return false">${_('Collapse All')}</a>
		86	</div>
		87	</div>
		88	%endif
		89	<h2 style="padding: 5px; text-align: center;">
		90	%if diffset.limited_diff:
		91	${ungettext('%(num)s file changed', '%(num)s files changed', diffset.changed_files) % {'num': diffset.changed_files}}
		92	%else:
		93	${ungettext('%(num)s file changed: %(linesadd)s inserted, ''%(linesdel)s deleted',
		94	'%(num)s files changed: %(linesadd)s inserted, %(linesdel)s deleted', diffset.changed_files) % {'num': diffset.changed_files, 'linesadd': diffset.lines_added, 'linesdel': diffset.lines_deleted}}
		95	%endif
		96	</h2>
		97	</div>
		98
		99	%if not diffset.files:
		100	<p class="empty_data">${_('No files')}</p>
		101	%endif
		102
		103	<div class="filediffs">
		104	%for i, filediff in enumerate(diffset.files):
		105	<%
		106	lines_changed = filediff['patch']['stats']['added'] + filediff['patch']['stats']['deleted']
		107	over_lines_changed_limit = lines_changed > lines_changed_limit
		108	%>
		109	<input ${collapse_all and 'checked' or ''} class="diff-collapse-state" id="diff-collapse-${i}" type="checkbox">
		110	<div
		111	class="diff"
		112	data-f-path="${filediff['patch']['filename']}"
		113	id="a_${h.FID('', filediff['patch']['filename'])}">
		114	<label for="diff-collapse-${i}" class="diff-heading">
		115	<div class="diff-collapse-indicator"></div>
		116	${diff_ops(filediff)}
		117	</label>
		118	${diff_menu(filediff)}
		119	<table class="cb cb-diff-${diff_mode} code-highlight ${over_lines_changed_limit and 'cb-collapsed' or ''}">
		120	%if not filediff.hunks:
		121	%for op_id, op_text in filediff['patch']['stats']['ops'].items():
		122	<tr>
		123	<td class="cb-text cb-${op_class(op_id)}" ${diff_mode == 'unified' and 'colspan=3' or 'colspan=4'}>
		124	%if op_id == DEL_FILENODE:
		125	${_('File was deleted')}
		126	%elif op_id == BIN_FILENODE:
		127	${_('Binary file hidden')}
		128	%else:
		129	${op_text}
		130	%endif
		131	</td>
		132	</tr>
		133	%endfor
		134	%endif
		135	%if over_lines_changed_limit:
		136	<tr class="cb-warning cb-collapser">
		137	<td class="cb-text" ${diff_mode == 'unified' and 'colspan=3' or 'colspan=4'}>
		138	${_('This diff has been collapsed as it changes many lines, (%i lines changed)' % lines_changed)}
		139	<a href="#" class="cb-expand"
		140	onclick="$(this).closest('table').removeClass('cb-collapsed'); return false;">${_('Show them')}
		141	</a>
		142	<a href="#" class="cb-collapse"
		143	onclick="$(this).closest('table').addClass('cb-collapsed'); return false;">${_('Hide them')}
		144	</a>
		145	</td>
		146	</tr>
		147	%endif
		148	%if filediff.patch['is_limited_diff']:
		149	<tr class="cb-warning cb-collapser">
		150	<td class="cb-text" ${diff_mode == 'unified' and 'colspan=3' or 'colspan=4'}>
		151	${_('The requested commit is too big and content was truncated.')} <a href="${link_for(fulldiff=1)}" onclick="return confirm('${_("Showing a big diff might take some time and resources, continue?")}')">${_('Show full diff')}</a>
		152	</td>
		153	</tr>
		154	%endif
		155	%for hunk in filediff.hunks:
		156	<tr class="cb-hunk">
		157	<td ${diff_mode == 'unified' and 'colspan=2' or ''}>
		158	## TODO: dan: add ajax loading of more context here
		159	## <a href="#">
		160	<i class="icon-more"></i>
		161	## </a>
		162	</td>
		163	<td ${diff_mode == 'sideside' and 'colspan=3' or ''}>
		164	@@
		165	-${hunk.source_start},${hunk.source_length}
		166	+${hunk.target_start},${hunk.target_length}
		167	${hunk.section_header}
		168	</td>
		169	</tr>
		170	%if diff_mode == 'unified':
		171	${render_hunk_lines_unified(hunk)}
		172	%elif diff_mode == 'sideside':
		173	${render_hunk_lines_sideside(hunk)}
		174	%else:
		175	<tr class="cb-line">
		176	<td>unknown diff mode</td>
		177	</tr>
		178	%endif
		179	%endfor
		180	</table>
		181	</div>
		182	%endfor
		183	</div>
		184	</div>
		185	</%def>
		186
		187	<%def name="diff_ops(filediff)">
		188	<%
		189	stats = filediff['patch']['stats']
		190	from rhodecode.lib.diffs import NEW_FILENODE, DEL_FILENODE, \
		191	MOD_FILENODE, RENAMED_FILENODE, CHMOD_FILENODE, BIN_FILENODE
		192	%>
		193	<span class="diff-pill">
		194	%if filediff.source_file_path and filediff.target_file_path:
		195	%if filediff.source_file_path != filediff.target_file_path: # file was renamed
		196	<strong>${filediff.target_file_path}</strong> ⬅ <del>${filediff.source_file_path}</del>
		197	%else:
		198	## file was modified
		199	<strong>${filediff.source_file_path}</strong>
		200	%endif
		201	%else:
		202	%if filediff.source_file_path:
		203	## file was deleted
		204	<strong>${filediff.source_file_path}</strong>
		205	%else:
		206	## file was added
		207	<strong>${filediff.target_file_path}</strong>
		208	%endif
		209	%endif
		210	</span>
		211	<span class="diff-pill-group" style="float: left">
		212	%if filediff.patch['is_limited_diff']:
		213	<span class="diff-pill tooltip" op="limited" title="The stats for this diff are not complete">limited diff</span>
		214	%endif
		215	%if RENAMED_FILENODE in stats['ops']:
		216	<span class="diff-pill" op="renamed">renamed</span>
		217	%endif
		218
		219	%if NEW_FILENODE in stats['ops']:
		220	<span class="diff-pill" op="created">created</span>
		221	%if filediff['target_mode'].startswith('120'):
		222	<span class="diff-pill" op="symlink">symlink</span>
		223	%else:
		224	<span class="diff-pill" op="mode">${nice_mode(filediff['target_mode'])}</span>
		225	%endif
		226	%endif
		227
		228	%if DEL_FILENODE in stats['ops']:
		229	<span class="diff-pill" op="removed">removed</span>
		230	%endif
		231
		232	%if CHMOD_FILENODE in stats['ops']:
		233	<span class="diff-pill" op="mode">
		234	${nice_mode(filediff['source_mode'])} ➡ ${nice_mode(filediff['target_mode'])}
		235	</span>
		236	%endif
		237	</span>
		238
		239	<a class="diff-pill diff-anchor" href="#a_${h.FID('', filediff.patch['filename'])}">¶</a>
		240
		241	<span class="diff-pill-group" style="float: right">
		242	%if BIN_FILENODE in stats['ops']:
		243	<span class="diff-pill" op="binary">binary</span>
		244	%if MOD_FILENODE in stats['ops']:
		245	<span class="diff-pill" op="modified">modified</span>
		246	%endif
		247	%endif
		248	%if stats['deleted']:
		249	<span class="diff-pill" op="deleted">-${stats['deleted']}</span>
		250	%endif
		251	%if stats['added']:
		252	<span class="diff-pill" op="added">+${stats['added']}</span>
		253	%endif
		254	</span>
		255
		256	</%def>
		257
		258	<%def name="nice_mode(filemode)">
		259	${filemode.startswith('100') and filemode[3:] or filemode}
		260	</%def>
		261
		262	<%def name="diff_menu(filediff)">
		263	<div class="diff-menu">
		264	%if filediff.diffset.source_ref:
		265	%if filediff.patch['operation'] in ['D', 'M']:
		266	<a
		267	class="tooltip"
		268	href="${h.url('files_home',repo_name=c.repo_name,f_path=filediff.source_file_path,revision=filediff.diffset.source_ref)}"
		269	title="${h.tooltip(_('Show file at commit: %(commit_id)s') % {'commit_id': filediff.diffset.source_ref[:12]})}"
		270	>
		271	${_('Show file before')}
		272	</a>
		273	%else:
		274	<a
		275	disabled
		276	class="tooltip"
		277	title="${h.tooltip(_('File no longer present at commit: %(commit_id)s') % {'commit_id': filediff.diffset.source_ref[:12]})}"
		278	>
		279	${_('Show file before')}
		280	</a>
		281	%endif
		282	%if filediff.patch['operation'] in ['A', 'M']:
		283	<a
		284	class="tooltip"
		285	href="${h.url('files_home',repo_name=c.repo_name,f_path=filediff.target_file_path,revision=filediff.diffset.target_ref)}"
		286	title="${h.tooltip(_('Show file at commit: %(commit_id)s') % {'commit_id': filediff.diffset.target_ref[:12]})}"
		287	>
		288	${_('Show file after')}
		289	</a>
		290	%else:
		291	<a
		292	disabled
		293	class="tooltip"
		294	title="${h.tooltip(_('File no longer present at commit: %(commit_id)s') % {'commit_id': filediff.diffset.target_ref[:12]})}"
		295	>
		296	${_('Show file after')}
		297	</a>
		298	%endif
		299	<a
		300	class="tooltip"
		301	title="${h.tooltip(_('Raw diff'))}"
		302	href="${h.url('files_diff_home',repo_name=c.repo_name,f_path=filediff.target_file_path,diff2=filediff.diffset.target_ref,diff1=filediff.diffset.source_ref,diff='raw')}"
		303	>
		304	${_('Raw diff')}
		305	</a>
		306	<a
		307	class="tooltip"
		308	title="${h.tooltip(_('Download diff'))}"
		309	href="${h.url('files_diff_home',repo_name=c.repo_name,f_path=filediff.target_file_path,diff2=filediff.diffset.target_ref,diff1=filediff.diffset.source_ref,diff='download')}"
		310	>
		311	${_('Download diff')}
		312	</a>
		313	%endif
		314	</div>
		315	</%def>
		316
		317
		318	<%def name="render_hunk_lines_sideside(hunk)">
		319	%for i, line in enumerate(hunk.sideside):
		320	<%
		321	old_line_anchor, new_line_anchor = None, None
		322	if line.original.lineno:
		323	old_line_anchor = diff_line_anchor(hunk.filediff.source_file_path, line.original.lineno, 'o')
		324	if line.modified.lineno:
		325	new_line_anchor = diff_line_anchor(hunk.filediff.target_file_path, line.modified.lineno, 'n')
		326	%>
		327	<tr class="cb-line">
		328	<td class="cb-lineno ${action_class(line.original.action)}"
		329	data-line-number="${line.original.lineno}"
		330	%if old_line_anchor:
		331	id="${old_line_anchor}"
		332	%endif
		333	>
		334	%if line.original.lineno:
		335	<a name="${old_line_anchor}" href="#${old_line_anchor}">${line.original.lineno}</a>
		336	%endif
		337	</td>
		338	<td class="cb-content ${action_class(line.original.action)}"
		339	data-line-number="o${line.original.lineno}"
		340	><span class="cb-code">${line.original.action} ${line.original.content or '' \| n}</span>
		341	</td>
		342	<td class="cb-lineno ${action_class(line.modified.action)}"
		343	data-line-number="${line.modified.lineno}"
		344	%if new_line_anchor:
		345	id="${new_line_anchor}"
		346	%endif
		347	>
		348	%if line.modified.lineno:
		349	<a name="${new_line_anchor}" href="#${new_line_anchor}">${line.modified.lineno}</a>
		350	%endif
		351	</td>
		352	<td class="cb-content ${action_class(line.modified.action)}"
		353	data-line-number="n${line.modified.lineno}"
		354	>
		355	<span class="cb-code">${line.modified.action} ${line.modified.content or '' \| n}</span>
		356	</td>
		357	</tr>
		358	%endfor
		359	</%def>
		360
		361
		362	<%def name="render_hunk_lines_unified(hunk)">
		363	%for old_line_no, new_line_no, action, content in hunk.unified:
		364	<%
		365	old_line_anchor, new_line_anchor = None, None
		366	if old_line_no:
		367	old_line_anchor = diff_line_anchor(hunk.filediff.source_file_path, old_line_no, 'o')
		368	if new_line_no:
		369	new_line_anchor = diff_line_anchor(hunk.filediff.target_file_path, new_line_no, 'n')
		370	%>
		371	<tr class="cb-line">
		372	<td class="cb-lineno ${action_class(action)}"
		373	data-line-number="${old_line_no}"
		374	%if old_line_anchor:
		375	id="${old_line_anchor}"
		376	%endif
		377	>
		378	%if old_line_anchor:
		379	<a name="${old_line_anchor}" href="#${old_line_anchor}">${old_line_no}</a>
		380	%endif
		381	</td>
		382	<td class="cb-lineno ${action_class(action)}"
		383	data-line-number="${new_line_no}"
		384	%if new_line_anchor:
		385	id="${new_line_anchor}"
		386	%endif
		387	>
		388	%if new_line_anchor:
		389	<a name="${new_line_anchor}" href="#${new_line_anchor}">${new_line_no}</a>
		390	%endif
		391	</td>
		392	<td class="cb-content ${action_class(action)}"
		393	data-line-number="${new_line_no and 'n' or 'o'}${new_line_no or old_line_no}"
		394	><span class="cb-code">${action} ${content or '' \| n}</span>
		395	</td>
		396	</tr>
		397	%endfor
		398	</%def>

LICENSE.txt

0 +2 0

                     file:licenses/tornado_license.txt
                 Copyright (c) 2015 - pygments-markdown-lexer
                     file:licenses/pygments_markdown_lexer_license.txt
+                Copyright 2006 - diff_match_patch
+                    file:licenses/diff_match_patch_license.txt
              All licensed under the Apache License, Version 2.0 (the "License");
              you may not use this file except in compliance with the License.

rhodecode/controllers/compare.py

0 +17 -21

              from rhodecode.controllers.utils import parse_path_ref, get_commit_from_ref_name
              from rhodecode.lib import helpers as h
-             from rhodecode.lib import diffs
+             from rhodecode.lib import diffs, codeblocks
              from rhodecode.lib.auth import LoginRequired, HasRepoPermissionAnyDecorator
              from rhodecode.lib.base import BaseRepoController, render
              from rhodecode.lib.utils import safe_str
              from rhodecode.lib.utils2 import safe_unicode, str2bool
              from rhodecode.lib.vcs.exceptions import (
-                 EmptyRepositoryError, RepositoryError, RepositoryRequirementError)
+                 EmptyRepositoryError, RepositoryError, RepositoryRequirementError,
+                 NodeDoesNotExistError)
              from rhodecode.model.db import Repository, ChangesetStatus
              log = logging.getLogger(__name__)
                  def index(self, repo_name):
                      c.compare_home = True
                      c.commit_ranges = []
-                     c.files = []
+                     c.diffset = None
                      c.limited_diff = False
                      source_repo = c.rhodecode_db_repo.repo_name
                      target_repo = request.GET.get('target_repo', source_repo)
                          commit1=source_commit, commit2=target_commit,
                          path1=source_path, path=target_path)
                      diff_processor = diffs.DiffProcessor(
-                         txtdiff, format='gitdiff', diff_limit=diff_limit,
+                         txtdiff, format='newdiff', diff_limit=diff_limit,
                          file_limit=file_limit, show_full_diff=c.fulldiff)
                      _parsed = diff_processor.prepare()
-                     c.limited_diff = False
-                     if isinstance(_parsed, diffs.LimitedDiffContainer):
-                         c.limited_diff = True
+                     def _node_getter(commit):
+                         """ Returns a function that returns a node for a commit or None """
+                         def get_node(fname):
+                             try:
+                                 return commit.get_node(fname)
+                             except NodeDoesNotExistError:
+                                 return None
+                         return get_node
-                     c.files = []
-                     c.changes = {}
-                     c.lines_added = 0
-                     c.lines_deleted = 0
-                     for f in _parsed:
-                         st = f['stats']
-                         if not st['binary']:
-                             c.lines_added += st['added']
-                             c.lines_deleted += st['deleted']
-                         fid = h.FID('', f['filename'])
-                         c.files.append([fid, f['operation'], f['filename'], f['stats'], f])
-                         htmldiff = diff_processor.as_html(
-                             enable_comments=False, parsed_lines=[f])
-                         c.changes[fid] = [f['operation'], f['filename'], htmldiff, f]
+                     c.diffset = codeblocks.DiffSet(
+                         source_node_getter=_node_getter(source_commit),
+                         target_node_getter=_node_getter(target_commit),
+                     ).render_patchset(_parsed, source_ref, target_ref)
                      c.preview_mode = merge

rhodecode/lib/codeblocks.py

0 +424 -3

		@@ -19,13 +19,17 b''
19	19	# and proprietary license terms, please see https://rhodecode.com/licenses/
20	20
21	21	import logging
	22	import difflib
22	23	from itertools import groupby
23	24
24	25	from pygments import lex
25	26	from pygments.formatters.html import _get_ttype_class as pygment_token_class
26		from rhodecode.lib.helpers import ~~get_lexer_for_filenode~~, ~~html_escape~~
	27	from rhodecode.lib.helpers import (
	28	get_lexer_for_filenode, get_lexer_safe, html_escape)
27	29	from rhodecode.lib.utils2 import AttributeDict
28	30	from rhodecode.lib.vcs.nodes import FileNode
	31	from rhodecode.lib.diff_match_patch import diff_match_patch
	32	from rhodecode.lib.diffs import LimitedDiffContainer
29	33	from pygments.lexers import get_lexer_by_name
30	34
31	35	plain_text_lexer = get_lexer_by_name(
		@@ -38,7 +42,7 b' log = logging.getLogger()'
38	42	def filenode_as_lines_tokens(filenode, lexer=None):
39	43	lexer = lexer or get_lexer_for_filenode(filenode)
40	44	log.debug('Generating file node pygment tokens for %s, %s', lexer, filenode)
41		tokens = tokenize_string(filenode.content, ~~get_~~lexer~~_for_filenode~~(~~filenode~~))
	45	tokens = tokenize_string(filenode.content, lexer)
42	46	lines = split_token_stream(tokens, split_string='\n')
43	47	rv = list(lines)
44	48	return rv
		@@ -146,7 +150,11 b' def render_tokenstream(tokenstream):'
146	150	result.append(u'<%s>' % op_tag)
147	151
148	152	escaped_text = html_escape(token_text)
149		escaped_text = escaped_text.replace('\n', '<nl>\n</nl>')
	153
	154	# TODO: dan: investigate showing hidden characters like space/nl/tab
	155	# escaped_text = escaped_text.replace(' ', '<sp> </sp>')
	156	# escaped_text = escaped_text.replace('\n', '<nl>\n</nl>')
	157	# escaped_text = escaped_text.replace('\t', '<tab>\t</tab>')
150	158
151	159	result.append(escaped_text)
152	160
		@@ -212,3 +220,416 b' def rollup_tokenstream(tokenstream):'
212	220	ops.append((token_op, ''.join(text_buffer)))
213	221	result.append((token_class, ops))
214	222	return result
	223
	224
	225	def tokens_diff(old_tokens, new_tokens, use_diff_match_patch=True):
	226	"""
	227	Converts a list of (token_class, token_text) tuples to a list of
	228	(token_class, token_op, token_text) tuples where token_op is one of
	229	('ins', 'del', '')
	230
	231	:param old_tokens: list of (token_class, token_text) tuples of old line
	232	:param new_tokens: list of (token_class, token_text) tuples of new line
	233	:param use_diff_match_patch: boolean, will use google's diff match patch
	234	library which has options to 'smooth' out the character by character
	235	differences making nicer ins/del blocks
	236	"""
	237
	238	old_tokens_result = []
	239	new_tokens_result = []
	240
	241	similarity = difflib.SequenceMatcher(None,
	242	''.join(token_text for token_class, token_text in old_tokens),
	243	''.join(token_text for token_class, token_text in new_tokens)
	244	).ratio()
	245
	246	if similarity < 0.6: # return, the blocks are too different
	247	for token_class, token_text in old_tokens:
	248	old_tokens_result.append((token_class, '', token_text))
	249	for token_class, token_text in new_tokens:
	250	new_tokens_result.append((token_class, '', token_text))
	251	return old_tokens_result, new_tokens_result, similarity
	252
	253	token_sequence_matcher = difflib.SequenceMatcher(None,
	254	[x[1] for x in old_tokens],
	255	[x[1] for x in new_tokens])
	256
	257	for tag, o1, o2, n1, n2 in token_sequence_matcher.get_opcodes():
	258	# check the differences by token block types first to give a more
	259	# nicer "block" level replacement vs character diffs
	260
	261	if tag == 'equal':
	262	for token_class, token_text in old_tokens[o1:o2]:
	263	old_tokens_result.append((token_class, '', token_text))
	264	for token_class, token_text in new_tokens[n1:n2]:
	265	new_tokens_result.append((token_class, '', token_text))
	266	elif tag == 'delete':
	267	for token_class, token_text in old_tokens[o1:o2]:
	268	old_tokens_result.append((token_class, 'del', token_text))
	269	elif tag == 'insert':
	270	for token_class, token_text in new_tokens[n1:n2]:
	271	new_tokens_result.append((token_class, 'ins', token_text))
	272	elif tag == 'replace':
	273	# if same type token blocks must be replaced, do a diff on the
	274	# characters in the token blocks to show individual changes
	275
	276	old_char_tokens = []
	277	new_char_tokens = []
	278	for token_class, token_text in old_tokens[o1:o2]:
	279	for char in token_text:
	280	old_char_tokens.append((token_class, char))
	281
	282	for token_class, token_text in new_tokens[n1:n2]:
	283	for char in token_text:
	284	new_char_tokens.append((token_class, char))
	285
	286	old_string = ''.join([token_text for
	287	token_class, token_text in old_char_tokens])
	288	new_string = ''.join([token_text for
	289	token_class, token_text in new_char_tokens])
	290
	291	char_sequence = difflib.SequenceMatcher(
	292	None, old_string, new_string)
	293	copcodes = char_sequence.get_opcodes()
	294	obuffer, nbuffer = [], []
	295
	296	if use_diff_match_patch:
	297	dmp = diff_match_patch()
	298	dmp.Diff_EditCost = 11 # TODO: dan: extract this to a setting
	299	reps = dmp.diff_main(old_string, new_string)
	300	dmp.diff_cleanupEfficiency(reps)
	301
	302	a, b = 0, 0
	303	for op, rep in reps:
	304	l = len(rep)
	305	if op == 0:
	306	for i, c in enumerate(rep):
	307	obuffer.append((old_char_tokens[a+i][0], '', c))
	308	nbuffer.append((new_char_tokens[b+i][0], '', c))
	309	a += l
	310	b += l
	311	elif op == -1:
	312	for i, c in enumerate(rep):
	313	obuffer.append((old_char_tokens[a+i][0], 'del', c))
	314	a += l
	315	elif op == 1:
	316	for i, c in enumerate(rep):
	317	nbuffer.append((new_char_tokens[b+i][0], 'ins', c))
	318	b += l
	319	else:
	320	for ctag, co1, co2, cn1, cn2 in copcodes:
	321	if ctag == 'equal':
	322	for token_class, token_text in old_char_tokens[co1:co2]:
	323	obuffer.append((token_class, '', token_text))
	324	for token_class, token_text in new_char_tokens[cn1:cn2]:
	325	nbuffer.append((token_class, '', token_text))
	326	elif ctag == 'delete':
	327	for token_class, token_text in old_char_tokens[co1:co2]:
	328	obuffer.append((token_class, 'del', token_text))
	329	elif ctag == 'insert':
	330	for token_class, token_text in new_char_tokens[cn1:cn2]:
	331	nbuffer.append((token_class, 'ins', token_text))
	332	elif ctag == 'replace':
	333	for token_class, token_text in old_char_tokens[co1:co2]:
	334	obuffer.append((token_class, 'del', token_text))
	335	for token_class, token_text in new_char_tokens[cn1:cn2]:
	336	nbuffer.append((token_class, 'ins', token_text))
	337
	338	old_tokens_result.extend(obuffer)
	339	new_tokens_result.extend(nbuffer)
	340
	341	return old_tokens_result, new_tokens_result, similarity
	342
	343
	344	class DiffSet(object):
	345	"""
	346	An object for parsing the diff result from diffs.DiffProcessor and
	347	adding highlighting, side by side/unified renderings and line diffs
	348	"""
	349
	350	HL_REAL = 'REAL' # highlights using original file, slow
	351	HL_FAST = 'FAST' # highlights using just the line, fast but not correct
	352	# in the case of multiline code
	353	HL_NONE = 'NONE' # no highlighting, fastest
	354
	355	def __init__(self, highlight_mode=HL_REAL,
	356	source_node_getter=lambda filename: None,
	357	target_node_getter=lambda filename: None,
	358	source_nodes=None, target_nodes=None,
	359	max_file_size_limit=150 * 1024, # files over this size will
	360	# use fast highlighting
	361	):
	362
	363	self.highlight_mode = highlight_mode
	364	self.highlighted_filenodes = {}
	365	self.source_node_getter = source_node_getter
	366	self.target_node_getter = target_node_getter
	367	self.source_nodes = source_nodes or {}
	368	self.target_nodes = target_nodes or {}
	369
	370
	371	self.max_file_size_limit = max_file_size_limit
	372
	373	def render_patchset(self, patchset, source_ref=None, target_ref=None):
	374	diffset = AttributeDict(dict(
	375	lines_added=0,
	376	lines_deleted=0,
	377	changed_files=0,
	378	files=[],
	379	limited_diff=isinstance(patchset, LimitedDiffContainer),
	380	source_ref=source_ref,
	381	target_ref=target_ref,
	382	))
	383	for patch in patchset:
	384	filediff = self.render_patch(patch)
	385	filediff.diffset = diffset
	386	diffset.files.append(filediff)
	387	diffset.changed_files += 1
	388	if not patch['stats']['binary']:
	389	diffset.lines_added += patch['stats']['added']
	390	diffset.lines_deleted += patch['stats']['deleted']
	391
	392	return diffset
	393
	394	_lexer_cache = {}
	395	def _get_lexer_for_filename(self, filename):
	396	# cached because we might need to call it twice for source/target
	397	if filename not in self._lexer_cache:
	398	self._lexer_cache[filename] = get_lexer_safe(filepath=filename)
	399	return self._lexer_cache[filename]
	400
	401	def render_patch(self, patch):
	402	log.debug('rendering diff for %r' % patch['filename'])
	403
	404	source_filename = patch['original_filename']
	405	target_filename = patch['filename']
	406
	407	source_lexer = plain_text_lexer
	408	target_lexer = plain_text_lexer
	409
	410	if not patch['stats']['binary']:
	411	if self.highlight_mode == self.HL_REAL:
	412	if (source_filename and patch['operation'] in ('D', 'M')
	413	and source_filename not in self.source_nodes):
	414	self.source_nodes[source_filename] = (
	415	self.source_node_getter(source_filename))
	416
	417	if (target_filename and patch['operation'] in ('A', 'M')
	418	and target_filename not in self.target_nodes):
	419	self.target_nodes[target_filename] = (
	420	self.target_node_getter(target_filename))
	421
	422	elif self.highlight_mode == self.HL_FAST:
	423	source_lexer = self._get_lexer_for_filename(source_filename)
	424	target_lexer = self._get_lexer_for_filename(target_filename)
	425
	426	source_file = self.source_nodes.get(source_filename, source_filename)
	427	target_file = self.target_nodes.get(target_filename, target_filename)
	428
	429	source_filenode, target_filenode = None, None
	430
	431	# TODO: dan: FileNode.lexer works on the content of the file - which
	432	# can be slow - issue #4289 explains a lexer clean up - which once
	433	# done can allow caching a lexer for a filenode to avoid the file lookup
	434	if isinstance(source_file, FileNode):
	435	source_filenode = source_file
	436	source_lexer = source_file.lexer
	437	if isinstance(target_file, FileNode):
	438	target_filenode = target_file
	439	target_lexer = target_file.lexer
	440
	441	source_file_path, target_file_path = None, None
	442
	443	if source_filename != '/dev/null':
	444	source_file_path = source_filename
	445	if target_filename != '/dev/null':
	446	target_file_path = target_filename
	447
	448	source_file_type = source_lexer.name
	449	target_file_type = target_lexer.name
	450
	451	op_hunks = patch['chunks'][0]
	452	hunks = patch['chunks'][1:]
	453
	454	filediff = AttributeDict({
	455	'source_file_path': source_file_path,
	456	'target_file_path': target_file_path,
	457	'source_filenode': source_filenode,
	458	'target_filenode': target_filenode,
	459	'hunks': [],
	460	'source_file_type': target_file_type,
	461	'target_file_type': source_file_type,
	462	'patch': patch,
	463	'source_mode': patch['stats']['old_mode'],
	464	'target_mode': patch['stats']['new_mode'],
	465	'limited_diff': isinstance(patch, LimitedDiffContainer),
	466	'diffset': self,
	467	})
	468
	469	for hunk in hunks:
	470	hunkbit = self.parse_hunk(hunk, source_file, target_file)
	471	hunkbit.filediff = filediff
	472	filediff.hunks.append(hunkbit)
	473	return filediff
	474
	475	def parse_hunk(self, hunk, source_file, target_file):
	476	result = AttributeDict(dict(
	477	source_start=hunk['source_start'],
	478	source_length=hunk['source_length'],
	479	target_start=hunk['target_start'],
	480	target_length=hunk['target_length'],
	481	section_header=hunk['section_header'],
	482	lines=[],
	483	))
	484	before, after = [], []
	485
	486	for line in hunk['lines']:
	487	if line['action'] == 'unmod':
	488	result.lines.extend(
	489	self.parse_lines(before, after, source_file, target_file))
	490	after.append(line)
	491	before.append(line)
	492	elif line['action'] == 'add':
	493	after.append(line)
	494	elif line['action'] == 'del':
	495	before.append(line)
	496	elif line['action'] == 'context-old':
	497	before.append(line)
	498	elif line['action'] == 'context-new':
	499	after.append(line)
	500
	501	result.lines.extend(
	502	self.parse_lines(before, after, source_file, target_file))
	503	result.unified = self.as_unified(result.lines)
	504	result.sideside = result.lines
	505	return result
	506
	507	def parse_lines(self, before_lines, after_lines, source_file, target_file):
	508	# TODO: dan: investigate doing the diff comparison and fast highlighting
	509	# on the entire before and after buffered block lines rather than by
	510	# line, this means we can get better 'fast' highlighting if the context
	511	# allows it - eg.
	512	# line 4: """
	513	# line 5: this gets highlighted as a string
	514	# line 6: """
	515
	516	lines = []
	517	while before_lines or after_lines:
	518	before, after = None, None
	519	before_tokens, after_tokens = None, None
	520
	521	if before_lines:
	522	before = before_lines.pop(0)
	523	if after_lines:
	524	after = after_lines.pop(0)
	525
	526	original = AttributeDict()
	527	modified = AttributeDict()
	528
	529	if before:
	530	before_tokens = self.get_line_tokens(
	531	line_text=before['line'], line_number=before['old_lineno'],
	532	file=source_file)
	533	original.lineno = before['old_lineno']
	534	original.content = before['line']
	535	original.action = self.action_to_op(before['action'])
	536
	537	if after:
	538	after_tokens = self.get_line_tokens(
	539	line_text=after['line'], line_number=after['new_lineno'],
	540	file=target_file)
	541	modified.lineno = after['new_lineno']
	542	modified.content = after['line']
	543	modified.action = self.action_to_op(after['action'])
	544
	545
	546	# diff the lines
	547	if before_tokens and after_tokens:
	548	o_tokens, m_tokens, similarity = tokens_diff(before_tokens, after_tokens)
	549	original.content = render_tokenstream(o_tokens)
	550	modified.content = render_tokenstream(m_tokens)
	551	elif before_tokens:
	552	original.content = render_tokenstream(
	553	[(x[0], '', x[1]) for x in before_tokens])
	554	elif after_tokens:
	555	modified.content = render_tokenstream(
	556	[(x[0], '', x[1]) for x in after_tokens])
	557
	558	lines.append(AttributeDict({
	559	'original': original,
	560	'modified': modified,
	561	}))
	562
	563	return lines
	564
	565	def get_line_tokens(self, line_text, line_number, file=None):
	566	filenode = None
	567	filename = None
	568
	569	if isinstance(file, basestring):
	570	filename = file
	571	elif isinstance(file, FileNode):
	572	filenode = file
	573	filename = file.unicode_path
	574
	575	if self.highlight_mode == self.HL_REAL and filenode:
	576	if line_number and file.size < self.max_file_size_limit:
	577	return self.get_tokenized_filenode_line(file, line_number)
	578
	579	if self.highlight_mode in (self.HL_REAL, self.HL_FAST) and filename:
	580	lexer = self._get_lexer_for_filename(filename)
	581	return list(tokenize_string(line_text, lexer))
	582
	583	return list(tokenize_string(line_text, plain_text_lexer))
	584
	585	def get_tokenized_filenode_line(self, filenode, line_number):
	586
	587	if filenode not in self.highlighted_filenodes:
	588	tokenized_lines = filenode_as_lines_tokens(filenode, filenode.lexer)
	589	self.highlighted_filenodes[filenode] = tokenized_lines
	590	return self.highlighted_filenodes[filenode][line_number - 1]
	591
	592	def action_to_op(self, action):
	593	return {
	594	'add': '+',
	595	'del': '-',
	596	'unmod': ' ',
	597	'context-old': ' ',
	598	'context-new': ' ',
	599	}.get(action, action)
	600
	601	def as_unified(self, lines):
	602	""" Return a generator that yields the lines of a diff in unified order """
	603	def generator():
	604	buf = []
	605	for line in lines:
	606
	607	if buf and not line.original or line.original.action == ' ':
	608	for b in buf:
	609	yield b
	610	buf = []
	611
	612	if line.original:
	613	if line.original.action == ' ':
	614	yield (line.original.lineno, line.modified.lineno,
	615	line.original.action, line.original.content)
	616	continue
	617
	618	if line.original.action == '-':
	619	yield (line.original.lineno, None,
	620	line.original.action, line.original.content)
	621
	622	if line.modified.action == '+':
	623	buf.append((
	624	None, line.modified.lineno,
	625	line.modified.action, line.modified.content))
	626	continue
	627
	628	if line.modified:
	629	yield (None, line.modified.lineno,
	630	line.modified.action, line.modified.content)
	631
	632	for b in buf:
	633	yield b
	634
	635	return generator()

rhodecode/lib/diffs.py

0 +278 -3

		@@ -180,6 +180,8 b' class Action(object):'
180	180	UNMODIFIED = 'unmod'
181	181
182	182	CONTEXT = 'context'
	183	CONTEXT_OLD = 'context-old'
	184	CONTEXT_NEW = 'context-new'
183	185
184	186
185	187	class DiffProcessor(object):
		@@ -227,7 +229,7 b' class DiffProcessor(object):'
227	229	self._parser = self._parse_gitdiff
228	230	else:
229	231	self.differ = self._highlight_line_udiff
230		self._parser = self._parse_udiff
	232	self._parser = self._new_parse_gitdiff
231	233
232	234	def _copy_iterator(self):
233	235	"""
		@@ -491,9 +493,181 b' class DiffProcessor(object):'
491	493
492	494	return diff_container(sorted(_files, key=sorter))
493	495
494		def _parse_udiff(self, inline_diff=True):
495		raise NotImplementedError()
	496
	497	# FIXME: NEWDIFFS: dan: this replaces the old _escaper function
	498	def _process_line(self, string):
	499	"""
	500	Process a diff line, checks the diff limit
	501
	502	:param string:
	503	"""
	504
	505	self.cur_diff_size += len(string)
	506
	507	if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
	508	raise DiffLimitExceeded('Diff Limit Exceeded')
	509
	510	return safe_unicode(string)
	511
	512	# FIXME: NEWDIFFS: dan: this replaces _parse_gitdiff
	513	def _new_parse_gitdiff(self, inline_diff=True):
	514	_files = []
	515	diff_container = lambda arg: arg
	516	for chunk in self._diff.chunks():
	517	head = chunk.header
	518	log.debug('parsing diff %r' % head)
	519
	520	diff = imap(self._process_line, chunk.diff.splitlines(1))
	521	raw_diff = chunk.raw
	522	limited_diff = False
	523	exceeds_limit = False
	524	# if 'empty_file_to_modify_and_rename' in head['a_path']:
	525	# 1/0
	526	op = None
	527	stats = {
	528	'added': 0,
	529	'deleted': 0,
	530	'binary': False,
	531	'old_mode': None,
	532	'new_mode': None,
	533	'ops': {},
	534	}
	535	if head['old_mode']:
	536	stats['old_mode'] = head['old_mode']
	537	if head['new_mode']:
	538	stats['new_mode'] = head['new_mode']
	539	if head['b_mode']:
	540	stats['new_mode'] = head['b_mode']
	541
	542	if head['deleted_file_mode']:
	543	op = OPS.DEL
	544	stats['binary'] = True
	545	stats['ops'][DEL_FILENODE] = 'deleted file'
	546
	547	elif head['new_file_mode']:
	548	op = OPS.ADD
	549	stats['binary'] = True
	550	stats['old_mode'] = None
	551	stats['new_mode'] = head['new_file_mode']
	552	stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
	553	else: # modify operation, can be copy, rename or chmod
	554
	555	# CHMOD
	556	if head['new_mode'] and head['old_mode']:
	557	op = OPS.MOD
	558	stats['binary'] = True
	559	stats['ops'][CHMOD_FILENODE] = (
	560	'modified file chmod %s => %s' % (
	561	head['old_mode'], head['new_mode']))
	562
	563	# RENAME
	564	if head['rename_from'] != head['rename_to']:
	565	op = OPS.MOD
	566	stats['binary'] = True
	567	stats['renamed'] = (head['rename_from'], head['rename_to'])
	568	stats['ops'][RENAMED_FILENODE] = (
	569	'file renamed from %s to %s' % (
	570	head['rename_from'], head['rename_to']))
	571	# COPY
	572	if head.get('copy_from') and head.get('copy_to'):
	573	op = OPS.MOD
	574	stats['binary'] = True
	575	stats['copied'] = (head['copy_from'], head['copy_to'])
	576	stats['ops'][COPIED_FILENODE] = (
	577	'file copied from %s to %s' % (
	578	head['copy_from'], head['copy_to']))
496	579
	580	# If our new parsed headers didn't match anything fallback to
	581	# old style detection
	582	if op is None:
	583	if not head['a_file'] and head['b_file']:
	584	op = OPS.ADD
	585	stats['binary'] = True
	586	stats['new_file'] = True
	587	stats['ops'][NEW_FILENODE] = 'new file'
	588
	589	elif head['a_file'] and not head['b_file']:
	590	op = OPS.DEL
	591	stats['binary'] = True
	592	stats['ops'][DEL_FILENODE] = 'deleted file'
	593
	594	# it's not ADD not DELETE
	595	if op is None:
	596	op = OPS.MOD
	597	stats['binary'] = True
	598	stats['ops'][MOD_FILENODE] = 'modified file'
	599
	600	# a real non-binary diff
	601	if head['a_file'] or head['b_file']:
	602	try:
	603	raw_diff, chunks, _stats = self._new_parse_lines(diff)
	604	stats['binary'] = False
	605	stats['added'] = _stats[0]
	606	stats['deleted'] = _stats[1]
	607	# explicit mark that it's a modified file
	608	if op == OPS.MOD:
	609	stats['ops'][MOD_FILENODE] = 'modified file'
	610	exceeds_limit = len(raw_diff) > self.file_limit
	611
	612	# changed from _escaper function so we validate size of
	613	# each file instead of the whole diff
	614	# diff will hide big files but still show small ones
	615	# from my tests, big files are fairly safe to be parsed
	616	# but the browser is the bottleneck
	617	if not self.show_full_diff and exceeds_limit:
	618	raise DiffLimitExceeded('File Limit Exceeded')
	619
	620	except DiffLimitExceeded:
	621	diff_container = lambda _diff: \
	622	LimitedDiffContainer(
	623	self.diff_limit, self.cur_diff_size, _diff)
	624
	625	exceeds_limit = len(raw_diff) > self.file_limit
	626	limited_diff = True
	627	chunks = []
	628
	629	else: # GIT format binary patch, or possibly empty diff
	630	if head['bin_patch']:
	631	# we have operation already extracted, but we mark simply
	632	# it's a diff we wont show for binary files
	633	stats['ops'][BIN_FILENODE] = 'binary diff hidden'
	634	chunks = []
	635
	636	if chunks and not self.show_full_diff and op == OPS.DEL:
	637	# if not full diff mode show deleted file contents
	638	# TODO: anderson: if the view is not too big, there is no way
	639	# to see the content of the file
	640	chunks = []
	641
	642	chunks.insert(0, [{
	643	'old_lineno': '',
	644	'new_lineno': '',
	645	'action': Action.CONTEXT,
	646	'line': msg,
	647	} for _op, msg in stats['ops'].iteritems()
	648	if _op not in [MOD_FILENODE]])
	649
	650	original_filename = safe_unicode(head['a_path'])
	651	_files.append({
	652	'original_filename': original_filename,
	653	'filename': safe_unicode(head['b_path']),
	654	'old_revision': head['a_blob_id'],
	655	'new_revision': head['b_blob_id'],
	656	'chunks': chunks,
	657	'raw_diff': safe_unicode(raw_diff),
	658	'operation': op,
	659	'stats': stats,
	660	'exceeds_limit': exceeds_limit,
	661	'is_limited_diff': limited_diff,
	662	})
	663
	664
	665	sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
	666	OPS.DEL: 2}.get(info['operation'])
	667
	668	return diff_container(sorted(_files, key=sorter))
	669
	670	# FIXME: NEWDIFFS: dan: this gets replaced by _new_parse_lines
497	671	def _parse_lines(self, diff):
498	672	"""
499	673	Parse the diff an return data for the template.
		@@ -588,6 +762,107 b' class DiffProcessor(object):'
588	762	pass
589	763	return ''.join(raw_diff), chunks, stats
590	764
	765	# FIXME: NEWDIFFS: dan: this replaces _parse_lines
	766	def _new_parse_lines(self, diff):
	767	"""
	768	Parse the diff an return data for the template.
	769	"""
	770
	771	lineiter = iter(diff)
	772	stats = [0, 0]
	773	chunks = []
	774	raw_diff = []
	775
	776	try:
	777	line = lineiter.next()
	778
	779	while line:
	780	raw_diff.append(line)
	781	match = self._chunk_re.match(line)
	782
	783	if not match:
	784	break
	785
	786	gr = match.groups()
	787	(old_line, old_end,
	788	new_line, new_end) = [int(x or 1) for x in gr[:-1]]
	789
	790	lines = []
	791	hunk = {
	792	'section_header': gr[-1],
	793	'source_start': old_line,
	794	'source_length': old_end,
	795	'target_start': new_line,
	796	'target_length': new_end,
	797	'lines': lines,
	798	}
	799	chunks.append(hunk)
	800
	801	old_line -= 1
	802	new_line -= 1
	803
	804	context = len(gr) == 5
	805	old_end += old_line
	806	new_end += new_line
	807
	808	line = lineiter.next()
	809
	810	while old_line < old_end or new_line < new_end:
	811	command = ' '
	812	if line:
	813	command = line[0]
	814
	815	affects_old = affects_new = False
	816
	817	# ignore those if we don't expect them
	818	if command in '#@':
	819	continue
	820	elif command == '+':
	821	affects_new = True
	822	action = Action.ADD
	823	stats[0] += 1
	824	elif command == '-':
	825	affects_old = True
	826	action = Action.DELETE
	827	stats[1] += 1
	828	else:
	829	affects_old = affects_new = True
	830	action = Action.UNMODIFIED
	831
	832	if not self._newline_marker.match(line):
	833	old_line += affects_old
	834	new_line += affects_new
	835	lines.append({
	836	'old_lineno': affects_old and old_line or '',
	837	'new_lineno': affects_new and new_line or '',
	838	'action': action,
	839	'line': self._clean_line(line, command)
	840	})
	841	raw_diff.append(line)
	842
	843	line = lineiter.next()
	844
	845	if self._newline_marker.match(line):
	846	# we need to append to lines, since this is not
	847	# counted in the line specs of diff
	848	if affects_old:
	849	action = Action.CONTEXT_OLD
	850	elif affects_new:
	851	action = Action.CONTEXT_NEW
	852	else:
	853	raise Exception('invalid context for no newline')
	854
	855	lines.append({
	856	'old_lineno': None,
	857	'new_lineno': None,
	858	'action': action,
	859	'line': self._clean_line(line, command)
	860	})
	861
	862	except StopIteration:
	863	pass
	864	return ''.join(raw_diff), chunks, stats
	865
591	866	def _safe_id(self, idstring):
592	867	"""Make a string safe for including in an id attribute.
593	868

rhodecode/lib/vcs/backends/git/diff.py

0 +5 -3

                  _header_re = re.compile(r"""
                      #^diff[ ]--git
                          [ ]"?a/(?P<a_path>.+?)"?[ ]"?b/(?P<b_path>.+?)"?\n
-                     (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%\n
-                        ^rename[ ]from[ ](?P<rename_from>[^\r\n]+)\n
-                        ^rename[ ]to[ ](?P<rename_to>[^\r\n]+)(?:\n|$))?
                      (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
                         ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
+                     (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%(?:\n|$))?
+                     (?:^rename[ ]from[ ](?P<rename_from>[^\r\n]+)\n
+                        ^rename[ ]to[ ](?P<rename_to>[^\r\n]+)(?:\n|$))?
+                     (?:^copy[ ]from[ ](?P<copy_from>[^\r\n]+)\n
+                        ^copy[ ]to[ ](?P<copy_to>[^\r\n]+)(?:\n|$))?
                      (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
                      (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
                      (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)

rhodecode/public/css/buttons.less

0 +11 -3

                  padding: @padding * 1.2;
              }
+             .btn-group {
+                 display: inline-block;
+                 .btn {
+                     float: left;
+                     margin: 0 0 0 -1px;
+                 }
+             }
              .btn-link {
                  background: transparent;
                  border: none;
              input[type="reset"] {
                  &.btn-danger {
                      &:extend(.btn-danger);
                      &:focus {
                          outline: 0;
                      }
                      &:hover {
                          &:extend(.btn-danger:hover);
                      }
                      &.btn-link {
                          &:extend(.btn-link);
                          color: @alert2;

rhodecode/public/css/code-block.less

0 +265 -13

		@@ -646,14 +646,210 b' pre.literal-block, .codehilite pre{'
646	646
647	647	@cb-line-height: 18px;
648	648	@cb-line-code-padding: 10px;
	649	@cb-text-padding: 5px;
649	650
	651	@diff-pill-padding: 2px 7px;
	652
	653	input.diff-collapse-state {
	654	display: none;
	655
	656	&:checked + .diff { /* file diff is collapsed */
	657	.cb {
	658	display: none
	659	}
	660	.diff-collapse-indicator {
	661	border-width: 9px 0 9px 15.6px;
	662	border-color: transparent transparent transparent #ccc;
	663	}
	664	.diff-menu {
	665	display: none;
	666	}
	667	margin: -1px 0 0 0;
	668	}
	669
	670	&+ .diff { /* file diff is expanded */
	671	.diff-collapse-indicator {
	672	border-width: 15.6px 9px 0 9px;
	673	border-color: #ccc transparent transparent transparent;
	674	}
	675	.diff-menu {
	676	display: block;
	677	}
	678	margin: 20px 0;
	679	}
	680	}
	681	.diff {
	682	border: 1px solid @grey5;
	683
	684	/* START OVERRIDES */
	685	.code-highlight {
	686	border: none; // TODO: remove this border from the global
	687	// .code-highlight, it doesn't belong there
	688	}
	689	label {
	690	margin: 0; // TODO: remove this margin definition from global label
	691	// it doesn't belong there - if margin on labels
	692	// are needed for a form they should be defined
	693	// in the form's class
	694	}
	695	/* END OVERRIDES */
	696
	697	* {
	698	box-sizing: border-box;
	699	}
	700	.diff-anchor {
	701	visibility: hidden;
	702	}
	703	&:hover {
	704	.diff-anchor {
	705	visibility: visible;
	706	}
	707	}
	708
	709	.diff-collapse-indicator {
	710	width: 0;
	711	height: 0;
	712	border-style: solid;
	713	float: left;
	714	margin: 2px 2px 0 0;
	715	cursor: pointer;
	716	}
	717
	718	.diff-heading {
	719	background: @grey7;
	720	cursor: pointer;
	721	display: block;
	722	padding: 5px 10px;
	723	}
	724	.diff-heading:after {
	725	content: "";
	726	display: table;
	727	clear: both;
	728	}
	729	.diff-heading:hover {
	730	background: #e1e9f4 !important;
	731	}
	732
	733	.diff-menu {
	734	float: right;
	735	a, button {
	736	padding: 5px;
	737	display: block;
	738	float: left
	739	}
	740	}
	741	.diff-pill {
	742	display: block;
	743	float: left;
	744	padding: @diff-pill-padding;
	745	}
	746	.diff-pill-group {
	747	.diff-pill {
	748	opacity: .8;
	749	&:first-child {
	750	border-radius: @border-radius 0 0 @border-radius;
	751	}
	752	&:last-child {
	753	border-radius: 0 @border-radius @border-radius 0;
	754	}
	755	&:only-child {
	756	border-radius: @border-radius;
	757	}
	758	}
	759	}
	760	.diff-pill {
	761	&[op="name"] {
	762	background: none;
	763	color: @grey2;
	764	opacity: 1;
	765	color: white;
	766	}
	767	&[op="limited"] {
	768	background: @grey2;
	769	color: white;
	770	}
	771	&[op="binary"] {
	772	background: @color7;
	773	color: white;
	774	}
	775	&[op="modified"] {
	776	background: @alert1;
	777	color: white;
	778	}
	779	&[op="renamed"] {
	780	background: @color4;
	781	color: white;
	782	}
	783	&[op="mode"] {
	784	background: @grey3;
	785	color: white;
	786	}
	787	&[op="symlink"] {
	788	background: @color8;
	789	color: white;
	790	}
	791
	792	&[op="added"] { /* added lines */
	793	background: @alert1;
	794	color: white;
	795	}
	796	&[op="deleted"] { /* deleted lines */
	797	background: @alert2;
	798	color: white;
	799	}
	800
	801	&[op="created"] { /* created file */
	802	background: @alert1;
	803	color: white;
	804	}
	805	&[op="removed"] { /* deleted file */
	806	background: @color5;
	807	color: white;
	808	}
	809	}
	810
	811	.diff-collapse-button, .diff-expand-button {
	812	cursor: pointer;
	813	}
	814	.diff-collapse-button {
	815	display: inline;
	816	}
	817	.diff-expand-button {
	818	display: none;
	819	}
	820	.diff-collapsed .diff-collapse-button {
	821	display: none;
	822	}
	823	.diff-collapsed .diff-expand-button {
	824	display: inline;
	825	}
	826	}
650	827	table.cb {
651	828	width: 100%;
652	829	border-collapse: collapse;
653		margin-bottom: 10px;
654	830
655		* {
656		box-sizing: border-box;
	831	.cb-text {
	832	padding: @cb-text-padding;
	833	}
	834	.cb-hunk {
	835	padding: @cb-text-padding;
	836	}
	837	.cb-expand {
	838	display: none;
	839	}
	840	.cb-collapse {
	841	display: inline;
	842	}
	843	&.cb-collapsed {
	844	.cb-line {
	845	display: none;
	846	}
	847	.cb-expand {
	848	display: inline;
	849	}
	850	.cb-collapse {
	851	display: none;
	852	}
657	853	}
658	854
659	855	/* intentionally general selector since .cb-line-selected must override it
		@@ -663,18 +859,45 b' table.cb {'
663	859	.cb-line-fresh .cb-content {
664	860	background: white !important;
665	861	}
	862	.cb-warning {
	863	background: #fff4dd;
	864	}
666	865
667		tr.cb-annotate {
668		border-top: 1px solid #eee;
	866	&.cb-diff-sideside {
	867	td {
	868	&.cb-content {
	869	width: 50%;
	870	}
	871	}
	872	}
669	873
670		&+ .cb-line {
	874	tr {
	875	&.cb-annotate {
671	876	border-top: 1px solid #eee;
	877
	878	&+ .cb-line {
	879	border-top: 1px solid #eee;
	880	}
	881
	882	&:first-child {
	883	border-top: none;
	884	&+ .cb-line {
	885	border-top: none;
	886	}
	887	}
672	888	}
673	889
674		&~~:first~~-ch~~ild~~ {
675		border-top: none;
676		&+ .cb-line {
677		border-top: none;
	890	&.cb-hunk {
	891	font-family: @font-family-monospace;
	892	color: rgba(0, 0, 0, 0.3);
	893
	894	td {
	895	&:first-child {
	896	background: #edf2f9;
	897	}
	898	&:last-child {
	899	background: #f4f7fb;
	900	}
678	901	}
679	902	}
680	903	}
		@@ -686,9 +909,14 b' table.cb {'
686	909	&.cb-content {
687	910	font-size: 12.35px;
688	911
	912	&.cb-line-selected .cb-code {
	913	background: @comment-highlight-color !important;
	914	}
	915
689	916	span.cb-code {
690	917	line-height: @cb-line-height;
691	918	padding-left: @cb-line-code-padding;
	919	padding-right: @cb-line-code-padding;
692	920	display: block;
693	921	white-space: pre-wrap;
694	922	font-family: @font-family-monospace;
		@@ -714,14 +942,38 b' table.cb {'
714	942	a {
715	943	display: block;
716	944	padding-right: @cb-line-code-padding;
	945	padding-left: @cb-line-code-padding;
717	946	line-height: @cb-line-height;
718	947	color: rgba(0, 0, 0, 0.3);
719	948	}
720	949	}
721	950
722		&.cb-~~con~~t~~ent~~ {
723		&.cb-line-selected .cb-code {
724		background: @comment-highlight-color !important;
	951	&.cb-empty {
	952	background: @grey7;
	953	}
	954
	955	ins {
	956	color: black;
	957	background: #a6f3a6;
	958	text-decoration: none;
	959	}
	960	del {
	961	color: black;
	962	background: #f8cbcb;
	963	text-decoration: none;
	964	}
	965	&.cb-addition {
	966	background: #ecffec;
	967
	968	&.blob-lineno {
	969	background: #ddffdd;
	970	}
	971	}
	972	&.cb-deletion {
	973	background: #ffecec;
	974
	975	&.blob-lineno {
	976	background: #ffdddd;
725	977	}
726	978	}
727	979

rhodecode/public/js/src/rhodecode.js

0 +42 -26

              };
              // takes a given html element and scrolls it down offset pixels
-             function offsetScroll(element, offset){
-                 setTimeout(function(){
+             function offsetScroll(element, offset) {
+                 setTimeout(function() {
                      var location = element.offset().top;
                      // some browsers use body, some use html
                      $('html, body').animate({ scrollTop: (location - offset) });
                  }, 100);
              }
+             // scroll an element `percent`% from the top of page in `time` ms
+             function scrollToElement(element, percent, time) {
+                 percent = (percent === undefined ? 25 : percent);
+                 time = (time === undefined ? 100 : time);
+                 var $element = $(element);
+                 var elOffset = $element.offset().top;
+                 var elHeight = $element.height();
+                 var windowHeight = $(window).height();
+                 var offset = elOffset;
+                 if (elHeight < windowHeight) {
+                     offset = elOffset - ((windowHeight / (100 / percent)) - (elHeight / 2));
+                 }
+                 setTimeout(function() {
+                     $('html, body').animate({ scrollTop: offset});
+                 }, time);
+             }
              /**
               * global hooks after DOM is loaded
               */
                      var result = splitDelimitedHash(location.hash);
                      var loc  = result.loc;
                      if (loc.length > 1) {
+                         var highlightable_line_tds = [];
+                         // source code line format
                          var page_highlights = loc.substring(
                              loc.indexOf('#') + 1).split('L');
                              for (pos in h_lines) {
                                  var line_td = $('td.cb-lineno#L' + h_lines[pos]);
                                  if (line_td.length) {
-                                     line_td.addClass('cb-line-selected'); // line number td
-                                     line_td.next().addClass('cb-line-selected'); // line content
+                                     highlightable_line_tds.push(line_td);
                                  }
                              }
-                             var first_line_td = $('td.cb-lineno#L' + h_lines[0]);
-                             if (first_line_td.length) {
-                                 var elOffset = first_line_td.offset().top;
-                                 var elHeight = first_line_td.height();
-                                 var windowHeight = $(window).height();
-                                 var offset;
+                         }
-                                 if (elHeight < windowHeight) {
-                                     offset = elOffset - ((windowHeight / 4) - (elHeight / 2));
+                                 }
-                                 else {
-                                     offset = elOffset;
+                                 }
-                                 $(function() { // let browser scroll to hash first, then
-                                                // scroll the line to the middle of page
-                                     setTimeout(function() {
-                                         $('html, body').animate({ scrollTop: offset });
-                                       }, 100);
-                                 });
-                                 $.Topic('/ui/plugins/code/anchor_focus').prepareOrPublish({
-                                     lineno: first_line_td,
-                                     remainder: result.remainder});
+                             }
+                         // now check a direct id reference (diff page)
+                         if ($(loc).length && $(loc).hasClass('cb-lineno')) {
+                             highlightable_line_tds.push($(loc));
+                         }
+                         $.each(highlightable_line_tds, function (i, $td) {
+                             $td.addClass('cb-line-selected'); // line number td
+                             $td.next().addClass('cb-line-selected'); // line content
+                         });
+                         if (highlightable_line_tds.length) {
+                             var $first_line_td = highlightable_line_tds[0];
+                             scrollToElement($first_line_td);
+                             $.Topic('/ui/plugins/code/anchor_focus').prepareOrPublish({
+                                 lineno: $first_line_td,
+                                 remainder: result.remainder
+                             });
                          }
                      }
                  }

rhodecode/templates/compare/compare_diff.html

0 +3 -67

		@@ -1,5 +1,6 b''
1	1	## -- coding: utf-8 --
2	2	<%inherit file="/base/base.html"/>
	3	<%namespace name="cbdiffs" file="/codeblocks/diffs.html"/>
3	4
4	5	<%def name="title()">
5	6	%if c.compare_home:
		@@ -53,7 +54,7 b''
53	54	<a id="btn-swap" class="btn btn-primary" href="${c.swap_url}"><i class="icon-refresh"></i> ${_('Swap')}</a>
54	55	%endif
55	56	<div id="compare_revs" class="btn btn-primary"><i class ="icon-loop"></i> ${_('Compare Commits')}</div>
56		%if c.files:
	57	%if c.diffset and c.diffset.files:
57	58	<div id="compare_changeset_status_toggle" class="btn btn-primary">${_('Comment')}</div>
58	59	%endif
59	60	</div>
		@@ -248,72 +249,7 b''
248	249	<div id="changeset_compare_view_content">
249	250	##CS
250	251	<%include file="compare_commits.html"/>
251
252		## FILES
253		<div class="cs_files_title">
254		<span class="cs_files_expand">
255		<span id="expand_all_files">${_('Expand All')}</span> \| <span id="collapse_all_files">${_('Collapse All')}</span>
256		</span>
257		<h2>
258		${diff_block.diff_summary_text(len(c.files), c.lines_added, c.lines_deleted, c.limited_diff)}
259		</h2>
260		</div>
261		<div class="cs_files">
262		%if not c.files:
263		<p class="empty_data">${_('No files')}</p>
264		%endif
265		<table class="compare_view_files">
266		<%namespace name="diff_block" file="/changeset/diff_block.html"/>
267		%for FID, change, path, stats, file in c.files:
268		<tr class="cs_${change} collapse_file" fid="${FID}">
269		<td class="cs_icon_td">
270		<span class="collapse_file_icon" fid="${FID}"></span>
271		</td>
272		<td class="cs_icon_td">
273		<div class="flag_status not_reviewed hidden"></div>
274		</td>
275		<td class="cs_${change}" id="a_${FID}">
276		<div class="node">
277		<a href="#a_${FID}">
278		<i class="icon-file-${change.lower()}"></i>
279		${h.safe_unicode(path)}
280		</a>
281		</div>
282		</td>
283		<td>
284		<div class="changes pull-right">${h.fancy_file_stats(stats)}</div>
285		<div class="comment-bubble pull-right" data-path="${path}">
286		<i class="icon-comment"></i>
287		</div>
288		</td>
289		</tr>
290		<tr fid="${FID}" id="diff_${FID}" class="diff_links">
291		<td></td>
292		<td></td>
293		<td class="cs_${change}">
294		%if c.target_repo.repo_name == c.repo_name:
295		${diff_block.diff_menu(c.repo_name, h.safe_unicode(path), c.source_ref, c.target_ref, change, file)}
296		%else:
297		## this is slightly different case later, since the target repo can have this
298		## file in target state than the source repo
299		${diff_block.diff_menu(c.target_repo.repo_name, h.safe_unicode(path), c.source_ref, c.target_ref, change, file)}
300		%endif
301		</td>
302		<td class="td-actions rc-form">
303		</td>
304		</tr>
305		<tr id="tr_${FID}">
306		<td></td>
307		<td></td>
308		<td class="injected_diff" colspan="2">
309		${diff_block.diff_block_simple([c.changes[FID]])}
310		</td>
311		</tr>
312		%endfor
313		</table>
314		% if c.limited_diff:
315		${diff_block.changeset_message()}
316		% endif
	252	${cbdiffs.render_diffset(c.diffset)}
317	253	</div>
318	254	%endif
319	255	</div>

rhodecode/tests/functional/test_changeset.py

0 +17 -4

                      response.mustcontain(_shorten_commit_id(commit_ids[0]))
                      response.mustcontain(_shorten_commit_id(commit_ids[1]))
                      # svn is special
                      if backend.alias == 'svn':
                          response.mustcontain('new file 10644')
                      # files op files
                      response.mustcontain('File no longer present at commit: %s' %
                                           _shorten_commit_id(commit_ids[1]))
                      # svn is special
                      if backend.alias == 'svn':
                          response.mustcontain('new file 10644')
                      response.mustcontain('Added docstrings to vcs.cli')  # commit msg
                      response.mustcontain('Changed theme to ADC theme')  # commit msg
-                     self._check_diff_menus(response)
+                     self._check_new_diff_menus(response)
                  def test_changeset_range(self, backend):
                      self._check_changeset_range(
              """ + diffs['svn'],
                  }
-                 def _check_diff_menus(self, response, right_menu=False):
+                 def _check_diff_menus(self, response, right_menu=False,):
                      # diff menus
                      for elem in ['Show File', 'Unified Diff', 'Side-by-side Diff',
                                   'Raw Diff', 'Download Diff']:
                          for elem in ['Ignore whitespace', 'Increase context',
                                       'Hide comments']:
                              response.mustcontain(elem)
+                 def _check_new_diff_menus(self, response, right_menu=False,):
+                     # diff menus
+                     for elem in ['Show file before', 'Show file after',
+                                  'Raw diff', 'Download diff']:
+                         response.mustcontain(elem)
+                     # right pane diff menus
+                     if right_menu:
+                         for elem in ['Ignore whitespace', 'Increase context',
+                                      'Hide comments']:
+                             response.mustcontain(elem)

rhodecode/tests/functional/test_compare.py

0 +5 -1

              import mock
              import pytest
+             import lxml.html
              from rhodecode.lib.vcs.backends.base import EmptyCommit
              from rhodecode.lib.vcs.exceptions import RepositoryRequirementError
                  """
                  def contains_file_links_and_anchors(self, files):
+                     doc = lxml.html.fromstring(self.response.body)
                      for filename, file_id in files:
-                         self.contains_one_link(filename, '#' + file_id)
                          self.contains_one_anchor(file_id)
+                         diffblock = doc.cssselect('[data-f-path="%s"]' % filename)
+                         assert len(diffblock) == 1
+                         assert len(diffblock[0].cssselect('a[href="#%s"]' % file_id)) == 1
                  def contains_change_summary(self, files_changed, inserted, deleted):
                      template = (

rhodecode/tests/lib/test_codeblocks.py

0 +4 -4

                      ),
                      (
                          [('A', '', u'two\n'), ('A', '', u'lines')],
-                         '<span class="A">two<nl>\n</nl>lines</span>',
+                         '<span class="A">two\nlines</span>',
                      ),
                      (
                          [('A', '', u'\nthree\n'), ('A', '', u'lines')],
-                         '<span class="A"><nl>\n</nl>three<nl>\n</nl>lines</span>',
+                         '<span class="A">\nthree\nlines</span>',
                      ),
                      (
                          [('', '', u'\n'), ('A', '', u'line')],
-                         '<span><nl>\n</nl></span><span class="A">line</span>',
+                         '<span>\n</span><span class="A">line</span>',
                      ),
                      (
                          [('', 'ins', u'\n'), ('A', '', u'line')],
-                         '<span><ins><nl>\n</nl></ins></span><span class="A">line</span>',
+                         '<span><ins>\n</ins></span><span class="A">line</span>',
                      ),
                      (
                          [('A', '', u'hel'), ('A', 'ins', u'lo')],

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages