##// END OF EJS Templates
diffs: replace compare controller with new html based diffs:...
dan -
r1030:158ce501 default
parent child
Show More
@@ -0,0 +1,14
1 Copyright 2006 Google Inc.
2 http://code.google.com/p/google-diff-match-patch/
3
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
This diff has been collapsed as it changes many lines, (1919 lines changed) Show them Hide them
@@ -0,0 +1,1919
1 #!/usr/bin/python2.4
2
3 from __future__ import division
4
5 """Diff Match and Patch
6
7 Copyright 2006 Google Inc.
8 http://code.google.com/p/google-diff-match-patch/
9
10 Licensed under the Apache License, Version 2.0 (the "License");
11 you may not use this file except in compliance with the License.
12 You may obtain a copy of the License at
13
14 http://www.apache.org/licenses/LICENSE-2.0
15
16 Unless required by applicable law or agreed to in writing, software
17 distributed under the License is distributed on an "AS IS" BASIS,
18 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 See the License for the specific language governing permissions and
20 limitations under the License.
21 """
22
23 """Functions for diff, match and patch.
24
25 Computes the difference between two texts to create a patch.
26 Applies the patch onto another text, allowing for errors.
27 """
28
29 __author__ = 'fraser@google.com (Neil Fraser)'
30
31 import math
32 import re
33 import sys
34 import time
35 import urllib
36
37 class diff_match_patch:
38 """Class containing the diff, match and patch methods.
39
40 Also contains the behaviour settings.
41 """
42
43 def __init__(self):
44 """Inits a diff_match_patch object with default settings.
45 Redefine these in your program to override the defaults.
46 """
47
48 # Number of seconds to map a diff before giving up (0 for infinity).
49 self.Diff_Timeout = 1.0
50 # Cost of an empty edit operation in terms of edit characters.
51 self.Diff_EditCost = 4
52 # At what point is no match declared (0.0 = perfection, 1.0 = very loose).
53 self.Match_Threshold = 0.5
54 # How far to search for a match (0 = exact location, 1000+ = broad match).
55 # A match this many characters away from the expected location will add
56 # 1.0 to the score (0.0 is a perfect match).
57 self.Match_Distance = 1000
58 # When deleting a large block of text (over ~64 characters), how close do
59 # the contents have to be to match the expected contents. (0.0 = perfection,
60 # 1.0 = very loose). Note that Match_Threshold controls how closely the
61 # end points of a delete need to match.
62 self.Patch_DeleteThreshold = 0.5
63 # Chunk size for context length.
64 self.Patch_Margin = 4
65
66 # The number of bits in an int.
67 # Python has no maximum, thus to disable patch splitting set to 0.
68 # However to avoid long patches in certain pathological cases, use 32.
69 # Multiple short patches (using native ints) are much faster than long ones.
70 self.Match_MaxBits = 32
71
72 # DIFF FUNCTIONS
73
74 # The data structure representing a diff is an array of tuples:
75 # [(DIFF_DELETE, "Hello"), (DIFF_INSERT, "Goodbye"), (DIFF_EQUAL, " world.")]
76 # which means: delete "Hello", add "Goodbye" and keep " world."
77 DIFF_DELETE = -1
78 DIFF_INSERT = 1
79 DIFF_EQUAL = 0
80
81 def diff_main(self, text1, text2, checklines=True, deadline=None):
82 """Find the differences between two texts. Simplifies the problem by
83 stripping any common prefix or suffix off the texts before diffing.
84
85 Args:
86 text1: Old string to be diffed.
87 text2: New string to be diffed.
88 checklines: Optional speedup flag. If present and false, then don't run
89 a line-level diff first to identify the changed areas.
90 Defaults to true, which does a faster, slightly less optimal diff.
91 deadline: Optional time when the diff should be complete by. Used
92 internally for recursive calls. Users should set DiffTimeout instead.
93
94 Returns:
95 Array of changes.
96 """
97 # Set a deadline by which time the diff must be complete.
98 if deadline == None:
99 # Unlike in most languages, Python counts time in seconds.
100 if self.Diff_Timeout <= 0:
101 deadline = sys.maxint
102 else:
103 deadline = time.time() + self.Diff_Timeout
104
105 # Check for null inputs.
106 if text1 == None or text2 == None:
107 raise ValueError("Null inputs. (diff_main)")
108
109 # Check for equality (speedup).
110 if text1 == text2:
111 if text1:
112 return [(self.DIFF_EQUAL, text1)]
113 return []
114
115 # Trim off common prefix (speedup).
116 commonlength = self.diff_commonPrefix(text1, text2)
117 commonprefix = text1[:commonlength]
118 text1 = text1[commonlength:]
119 text2 = text2[commonlength:]
120
121 # Trim off common suffix (speedup).
122 commonlength = self.diff_commonSuffix(text1, text2)
123 if commonlength == 0:
124 commonsuffix = ''
125 else:
126 commonsuffix = text1[-commonlength:]
127 text1 = text1[:-commonlength]
128 text2 = text2[:-commonlength]
129
130 # Compute the diff on the middle block.
131 diffs = self.diff_compute(text1, text2, checklines, deadline)
132
133 # Restore the prefix and suffix.
134 if commonprefix:
135 diffs[:0] = [(self.DIFF_EQUAL, commonprefix)]
136 if commonsuffix:
137 diffs.append((self.DIFF_EQUAL, commonsuffix))
138 self.diff_cleanupMerge(diffs)
139 return diffs
140
141 def diff_compute(self, text1, text2, checklines, deadline):
142 """Find the differences between two texts. Assumes that the texts do not
143 have any common prefix or suffix.
144
145 Args:
146 text1: Old string to be diffed.
147 text2: New string to be diffed.
148 checklines: Speedup flag. If false, then don't run a line-level diff
149 first to identify the changed areas.
150 If true, then run a faster, slightly less optimal diff.
151 deadline: Time when the diff should be complete by.
152
153 Returns:
154 Array of changes.
155 """
156 if not text1:
157 # Just add some text (speedup).
158 return [(self.DIFF_INSERT, text2)]
159
160 if not text2:
161 # Just delete some text (speedup).
162 return [(self.DIFF_DELETE, text1)]
163
164 if len(text1) > len(text2):
165 (longtext, shorttext) = (text1, text2)
166 else:
167 (shorttext, longtext) = (text1, text2)
168 i = longtext.find(shorttext)
169 if i != -1:
170 # Shorter text is inside the longer text (speedup).
171 diffs = [(self.DIFF_INSERT, longtext[:i]), (self.DIFF_EQUAL, shorttext),
172 (self.DIFF_INSERT, longtext[i + len(shorttext):])]
173 # Swap insertions for deletions if diff is reversed.
174 if len(text1) > len(text2):
175 diffs[0] = (self.DIFF_DELETE, diffs[0][1])
176 diffs[2] = (self.DIFF_DELETE, diffs[2][1])
177 return diffs
178
179 if len(shorttext) == 1:
180 # Single character string.
181 # After the previous speedup, the character can't be an equality.
182 return [(self.DIFF_DELETE, text1), (self.DIFF_INSERT, text2)]
183
184 # Check to see if the problem can be split in two.
185 hm = self.diff_halfMatch(text1, text2)
186 if hm:
187 # A half-match was found, sort out the return data.
188 (text1_a, text1_b, text2_a, text2_b, mid_common) = hm
189 # Send both pairs off for separate processing.
190 diffs_a = self.diff_main(text1_a, text2_a, checklines, deadline)
191 diffs_b = self.diff_main(text1_b, text2_b, checklines, deadline)
192 # Merge the results.
193 return diffs_a + [(self.DIFF_EQUAL, mid_common)] + diffs_b
194
195 if checklines and len(text1) > 100 and len(text2) > 100:
196 return self.diff_lineMode(text1, text2, deadline)
197
198 return self.diff_bisect(text1, text2, deadline)
199
200 def diff_lineMode(self, text1, text2, deadline):
201 """Do a quick line-level diff on both strings, then rediff the parts for
202 greater accuracy.
203 This speedup can produce non-minimal diffs.
204
205 Args:
206 text1: Old string to be diffed.
207 text2: New string to be diffed.
208 deadline: Time when the diff should be complete by.
209
210 Returns:
211 Array of changes.
212 """
213
214 # Scan the text on a line-by-line basis first.
215 (text1, text2, linearray) = self.diff_linesToChars(text1, text2)
216
217 diffs = self.diff_main(text1, text2, False, deadline)
218
219 # Convert the diff back to original text.
220 self.diff_charsToLines(diffs, linearray)
221 # Eliminate freak matches (e.g. blank lines)
222 self.diff_cleanupSemantic(diffs)
223
224 # Rediff any replacement blocks, this time character-by-character.
225 # Add a dummy entry at the end.
226 diffs.append((self.DIFF_EQUAL, ''))
227 pointer = 0
228 count_delete = 0
229 count_insert = 0
230 text_delete = ''
231 text_insert = ''
232 while pointer < len(diffs):
233 if diffs[pointer][0] == self.DIFF_INSERT:
234 count_insert += 1
235 text_insert += diffs[pointer][1]
236 elif diffs[pointer][0] == self.DIFF_DELETE:
237 count_delete += 1
238 text_delete += diffs[pointer][1]
239 elif diffs[pointer][0] == self.DIFF_EQUAL:
240 # Upon reaching an equality, check for prior redundancies.
241 if count_delete >= 1 and count_insert >= 1:
242 # Delete the offending records and add the merged ones.
243 a = self.diff_main(text_delete, text_insert, False, deadline)
244 diffs[pointer - count_delete - count_insert : pointer] = a
245 pointer = pointer - count_delete - count_insert + len(a)
246 count_insert = 0
247 count_delete = 0
248 text_delete = ''
249 text_insert = ''
250
251 pointer += 1
252
253 diffs.pop() # Remove the dummy entry at the end.
254
255 return diffs
256
257 def diff_bisect(self, text1, text2, deadline):
258 """Find the 'middle snake' of a diff, split the problem in two
259 and return the recursively constructed diff.
260 See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations.
261
262 Args:
263 text1: Old string to be diffed.
264 text2: New string to be diffed.
265 deadline: Time at which to bail if not yet complete.
266
267 Returns:
268 Array of diff tuples.
269 """
270
271 # Cache the text lengths to prevent multiple calls.
272 text1_length = len(text1)
273 text2_length = len(text2)
274 max_d = (text1_length + text2_length + 1) // 2
275 v_offset = max_d
276 v_length = 2 * max_d
277 v1 = [-1] * v_length
278 v1[v_offset + 1] = 0
279 v2 = v1[:]
280 delta = text1_length - text2_length
281 # If the total number of characters is odd, then the front path will
282 # collide with the reverse path.
283 front = (delta % 2 != 0)
284 # Offsets for start and end of k loop.
285 # Prevents mapping of space beyond the grid.
286 k1start = 0
287 k1end = 0
288 k2start = 0
289 k2end = 0
290 for d in xrange(max_d):
291 # Bail out if deadline is reached.
292 if time.time() > deadline:
293 break
294
295 # Walk the front path one step.
296 for k1 in xrange(-d + k1start, d + 1 - k1end, 2):
297 k1_offset = v_offset + k1
298 if k1 == -d or (k1 != d and
299 v1[k1_offset - 1] < v1[k1_offset + 1]):
300 x1 = v1[k1_offset + 1]
301 else:
302 x1 = v1[k1_offset - 1] + 1
303 y1 = x1 - k1
304 while (x1 < text1_length and y1 < text2_length and
305 text1[x1] == text2[y1]):
306 x1 += 1
307 y1 += 1
308 v1[k1_offset] = x1
309 if x1 > text1_length:
310 # Ran off the right of the graph.
311 k1end += 2
312 elif y1 > text2_length:
313 # Ran off the bottom of the graph.
314 k1start += 2
315 elif front:
316 k2_offset = v_offset + delta - k1
317 if k2_offset >= 0 and k2_offset < v_length and v2[k2_offset] != -1:
318 # Mirror x2 onto top-left coordinate system.
319 x2 = text1_length - v2[k2_offset]
320 if x1 >= x2:
321 # Overlap detected.
322 return self.diff_bisectSplit(text1, text2, x1, y1, deadline)
323
324 # Walk the reverse path one step.
325 for k2 in xrange(-d + k2start, d + 1 - k2end, 2):
326 k2_offset = v_offset + k2
327 if k2 == -d or (k2 != d and
328 v2[k2_offset - 1] < v2[k2_offset + 1]):
329 x2 = v2[k2_offset + 1]
330 else:
331 x2 = v2[k2_offset - 1] + 1
332 y2 = x2 - k2
333 while (x2 < text1_length and y2 < text2_length and
334 text1[-x2 - 1] == text2[-y2 - 1]):
335 x2 += 1
336 y2 += 1
337 v2[k2_offset] = x2
338 if x2 > text1_length:
339 # Ran off the left of the graph.
340 k2end += 2
341 elif y2 > text2_length:
342 # Ran off the top of the graph.
343 k2start += 2
344 elif not front:
345 k1_offset = v_offset + delta - k2
346 if k1_offset >= 0 and k1_offset < v_length and v1[k1_offset] != -1:
347 x1 = v1[k1_offset]
348 y1 = v_offset + x1 - k1_offset
349 # Mirror x2 onto top-left coordinate system.
350 x2 = text1_length - x2
351 if x1 >= x2:
352 # Overlap detected.
353 return self.diff_bisectSplit(text1, text2, x1, y1, deadline)
354
355 # Diff took too long and hit the deadline or
356 # number of diffs equals number of characters, no commonality at all.
357 return [(self.DIFF_DELETE, text1), (self.DIFF_INSERT, text2)]
358
359 def diff_bisectSplit(self, text1, text2, x, y, deadline):
360 """Given the location of the 'middle snake', split the diff in two parts
361 and recurse.
362
363 Args:
364 text1: Old string to be diffed.
365 text2: New string to be diffed.
366 x: Index of split point in text1.
367 y: Index of split point in text2.
368 deadline: Time at which to bail if not yet complete.
369
370 Returns:
371 Array of diff tuples.
372 """
373 text1a = text1[:x]
374 text2a = text2[:y]
375 text1b = text1[x:]
376 text2b = text2[y:]
377
378 # Compute both diffs serially.
379 diffs = self.diff_main(text1a, text2a, False, deadline)
380 diffsb = self.diff_main(text1b, text2b, False, deadline)
381
382 return diffs + diffsb
383
384 def diff_linesToChars(self, text1, text2):
385 """Split two texts into an array of strings. Reduce the texts to a string
386 of hashes where each Unicode character represents one line.
387
388 Args:
389 text1: First string.
390 text2: Second string.
391
392 Returns:
393 Three element tuple, containing the encoded text1, the encoded text2 and
394 the array of unique strings. The zeroth element of the array of unique
395 strings is intentionally blank.
396 """
397 lineArray = [] # e.g. lineArray[4] == "Hello\n"
398 lineHash = {} # e.g. lineHash["Hello\n"] == 4
399
400 # "\x00" is a valid character, but various debuggers don't like it.
401 # So we'll insert a junk entry to avoid generating a null character.
402 lineArray.append('')
403
404 def diff_linesToCharsMunge(text):
405 """Split a text into an array of strings. Reduce the texts to a string
406 of hashes where each Unicode character represents one line.
407 Modifies linearray and linehash through being a closure.
408
409 Args:
410 text: String to encode.
411
412 Returns:
413 Encoded string.
414 """
415 chars = []
416 # Walk the text, pulling out a substring for each line.
417 # text.split('\n') would would temporarily double our memory footprint.
418 # Modifying text would create many large strings to garbage collect.
419 lineStart = 0
420 lineEnd = -1
421 while lineEnd < len(text) - 1:
422 lineEnd = text.find('\n', lineStart)
423 if lineEnd == -1:
424 lineEnd = len(text) - 1
425 line = text[lineStart:lineEnd + 1]
426 lineStart = lineEnd + 1
427
428 if line in lineHash:
429 chars.append(unichr(lineHash[line]))
430 else:
431 lineArray.append(line)
432 lineHash[line] = len(lineArray) - 1
433 chars.append(unichr(len(lineArray) - 1))
434 return "".join(chars)
435
436 chars1 = diff_linesToCharsMunge(text1)
437 chars2 = diff_linesToCharsMunge(text2)
438 return (chars1, chars2, lineArray)
439
440 def diff_charsToLines(self, diffs, lineArray):
441 """Rehydrate the text in a diff from a string of line hashes to real lines
442 of text.
443
444 Args:
445 diffs: Array of diff tuples.
446 lineArray: Array of unique strings.
447 """
448 for x in xrange(len(diffs)):
449 text = []
450 for char in diffs[x][1]:
451 text.append(lineArray[ord(char)])
452 diffs[x] = (diffs[x][0], "".join(text))
453
454 def diff_commonPrefix(self, text1, text2):
455 """Determine the common prefix of two strings.
456
457 Args:
458 text1: First string.
459 text2: Second string.
460
461 Returns:
462 The number of characters common to the start of each string.
463 """
464 # Quick check for common null cases.
465 if not text1 or not text2 or text1[0] != text2[0]:
466 return 0
467 # Binary search.
468 # Performance analysis: http://neil.fraser.name/news/2007/10/09/
469 pointermin = 0
470 pointermax = min(len(text1), len(text2))
471 pointermid = pointermax
472 pointerstart = 0
473 while pointermin < pointermid:
474 if text1[pointerstart:pointermid] == text2[pointerstart:pointermid]:
475 pointermin = pointermid
476 pointerstart = pointermin
477 else:
478 pointermax = pointermid
479 pointermid = (pointermax - pointermin) // 2 + pointermin
480 return pointermid
481
482 def diff_commonSuffix(self, text1, text2):
483 """Determine the common suffix of two strings.
484
485 Args:
486 text1: First string.
487 text2: Second string.
488
489 Returns:
490 The number of characters common to the end of each string.
491 """
492 # Quick check for common null cases.
493 if not text1 or not text2 or text1[-1] != text2[-1]:
494 return 0
495 # Binary search.
496 # Performance analysis: http://neil.fraser.name/news/2007/10/09/
497 pointermin = 0
498 pointermax = min(len(text1), len(text2))
499 pointermid = pointermax
500 pointerend = 0
501 while pointermin < pointermid:
502 if (text1[-pointermid:len(text1) - pointerend] ==
503 text2[-pointermid:len(text2) - pointerend]):
504 pointermin = pointermid
505 pointerend = pointermin
506 else:
507 pointermax = pointermid
508 pointermid = (pointermax - pointermin) // 2 + pointermin
509 return pointermid
510
511 def diff_commonOverlap(self, text1, text2):
512 """Determine if the suffix of one string is the prefix of another.
513
514 Args:
515 text1 First string.
516 text2 Second string.
517
518 Returns:
519 The number of characters common to the end of the first
520 string and the start of the second string.
521 """
522 # Cache the text lengths to prevent multiple calls.
523 text1_length = len(text1)
524 text2_length = len(text2)
525 # Eliminate the null case.
526 if text1_length == 0 or text2_length == 0:
527 return 0
528 # Truncate the longer string.
529 if text1_length > text2_length:
530 text1 = text1[-text2_length:]
531 elif text1_length < text2_length:
532 text2 = text2[:text1_length]
533 text_length = min(text1_length, text2_length)
534 # Quick check for the worst case.
535 if text1 == text2:
536 return text_length
537
538 # Start by looking for a single character match
539 # and increase length until no match is found.
540 # Performance analysis: http://neil.fraser.name/news/2010/11/04/
541 best = 0
542 length = 1
543 while True:
544 pattern = text1[-length:]
545 found = text2.find(pattern)
546 if found == -1:
547 return best
548 length += found
549 if found == 0 or text1[-length:] == text2[:length]:
550 best = length
551 length += 1
552
553 def diff_halfMatch(self, text1, text2):
554 """Do the two texts share a substring which is at least half the length of
555 the longer text?
556 This speedup can produce non-minimal diffs.
557
558 Args:
559 text1: First string.
560 text2: Second string.
561
562 Returns:
563 Five element Array, containing the prefix of text1, the suffix of text1,
564 the prefix of text2, the suffix of text2 and the common middle. Or None
565 if there was no match.
566 """
567 if self.Diff_Timeout <= 0:
568 # Don't risk returning a non-optimal diff if we have unlimited time.
569 return None
570 if len(text1) > len(text2):
571 (longtext, shorttext) = (text1, text2)
572 else:
573 (shorttext, longtext) = (text1, text2)
574 if len(longtext) < 4 or len(shorttext) * 2 < len(longtext):
575 return None # Pointless.
576
577 def diff_halfMatchI(longtext, shorttext, i):
578 """Does a substring of shorttext exist within longtext such that the
579 substring is at least half the length of longtext?
580 Closure, but does not reference any external variables.
581
582 Args:
583 longtext: Longer string.
584 shorttext: Shorter string.
585 i: Start index of quarter length substring within longtext.
586
587 Returns:
588 Five element Array, containing the prefix of longtext, the suffix of
589 longtext, the prefix of shorttext, the suffix of shorttext and the
590 common middle. Or None if there was no match.
591 """
592 seed = longtext[i:i + len(longtext) // 4]
593 best_common = ''
594 j = shorttext.find(seed)
595 while j != -1:
596 prefixLength = self.diff_commonPrefix(longtext[i:], shorttext[j:])
597 suffixLength = self.diff_commonSuffix(longtext[:i], shorttext[:j])
598 if len(best_common) < suffixLength + prefixLength:
599 best_common = (shorttext[j - suffixLength:j] +
600 shorttext[j:j + prefixLength])
601 best_longtext_a = longtext[:i - suffixLength]
602 best_longtext_b = longtext[i + prefixLength:]
603 best_shorttext_a = shorttext[:j - suffixLength]
604 best_shorttext_b = shorttext[j + prefixLength:]
605 j = shorttext.find(seed, j + 1)
606
607 if len(best_common) * 2 >= len(longtext):
608 return (best_longtext_a, best_longtext_b,
609 best_shorttext_a, best_shorttext_b, best_common)
610 else:
611 return None
612
613 # First check if the second quarter is the seed for a half-match.
614 hm1 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 3) // 4)
615 # Check again based on the third quarter.
616 hm2 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 1) // 2)
617 if not hm1 and not hm2:
618 return None
619 elif not hm2:
620 hm = hm1
621 elif not hm1:
622 hm = hm2
623 else:
624 # Both matched. Select the longest.
625 if len(hm1[4]) > len(hm2[4]):
626 hm = hm1
627 else:
628 hm = hm2
629
630 # A half-match was found, sort out the return data.
631 if len(text1) > len(text2):
632 (text1_a, text1_b, text2_a, text2_b, mid_common) = hm
633 else:
634 (text2_a, text2_b, text1_a, text1_b, mid_common) = hm
635 return (text1_a, text1_b, text2_a, text2_b, mid_common)
636
637 def diff_cleanupSemantic(self, diffs):
638 """Reduce the number of edits by eliminating semantically trivial
639 equalities.
640
641 Args:
642 diffs: Array of diff tuples.
643 """
644 changes = False
645 equalities = [] # Stack of indices where equalities are found.
646 lastequality = None # Always equal to diffs[equalities[-1]][1]
647 pointer = 0 # Index of current position.
648 # Number of chars that changed prior to the equality.
649 length_insertions1, length_deletions1 = 0, 0
650 # Number of chars that changed after the equality.
651 length_insertions2, length_deletions2 = 0, 0
652 while pointer < len(diffs):
653 if diffs[pointer][0] == self.DIFF_EQUAL: # Equality found.
654 equalities.append(pointer)
655 length_insertions1, length_insertions2 = length_insertions2, 0
656 length_deletions1, length_deletions2 = length_deletions2, 0
657 lastequality = diffs[pointer][1]
658 else: # An insertion or deletion.
659 if diffs[pointer][0] == self.DIFF_INSERT:
660 length_insertions2 += len(diffs[pointer][1])
661 else:
662 length_deletions2 += len(diffs[pointer][1])
663 # Eliminate an equality that is smaller or equal to the edits on both
664 # sides of it.
665 if (lastequality and (len(lastequality) <=
666 max(length_insertions1, length_deletions1)) and
667 (len(lastequality) <= max(length_insertions2, length_deletions2))):
668 # Duplicate record.
669 diffs.insert(equalities[-1], (self.DIFF_DELETE, lastequality))
670 # Change second copy to insert.
671 diffs[equalities[-1] + 1] = (self.DIFF_INSERT,
672 diffs[equalities[-1] + 1][1])
673 # Throw away the equality we just deleted.
674 equalities.pop()
675 # Throw away the previous equality (it needs to be reevaluated).
676 if len(equalities):
677 equalities.pop()
678 if len(equalities):
679 pointer = equalities[-1]
680 else:
681 pointer = -1
682 # Reset the counters.
683 length_insertions1, length_deletions1 = 0, 0
684 length_insertions2, length_deletions2 = 0, 0
685 lastequality = None
686 changes = True
687 pointer += 1
688
689 # Normalize the diff.
690 if changes:
691 self.diff_cleanupMerge(diffs)
692 self.diff_cleanupSemanticLossless(diffs)
693
694 # Find any overlaps between deletions and insertions.
695 # e.g: <del>abcxxx</del><ins>xxxdef</ins>
696 # -> <del>abc</del>xxx<ins>def</ins>
697 # e.g: <del>xxxabc</del><ins>defxxx</ins>
698 # -> <ins>def</ins>xxx<del>abc</del>
699 # Only extract an overlap if it is as big as the edit ahead or behind it.
700 pointer = 1
701 while pointer < len(diffs):
702 if (diffs[pointer - 1][0] == self.DIFF_DELETE and
703 diffs[pointer][0] == self.DIFF_INSERT):
704 deletion = diffs[pointer - 1][1]
705 insertion = diffs[pointer][1]
706 overlap_length1 = self.diff_commonOverlap(deletion, insertion)
707 overlap_length2 = self.diff_commonOverlap(insertion, deletion)
708 if overlap_length1 >= overlap_length2:
709 if (overlap_length1 >= len(deletion) / 2.0 or
710 overlap_length1 >= len(insertion) / 2.0):
711 # Overlap found. Insert an equality and trim the surrounding edits.
712 diffs.insert(pointer, (self.DIFF_EQUAL,
713 insertion[:overlap_length1]))
714 diffs[pointer - 1] = (self.DIFF_DELETE,
715 deletion[:len(deletion) - overlap_length1])
716 diffs[pointer + 1] = (self.DIFF_INSERT,
717 insertion[overlap_length1:])
718 pointer += 1
719 else:
720 if (overlap_length2 >= len(deletion) / 2.0 or
721 overlap_length2 >= len(insertion) / 2.0):
722 # Reverse overlap found.
723 # Insert an equality and swap and trim the surrounding edits.
724 diffs.insert(pointer, (self.DIFF_EQUAL, deletion[:overlap_length2]))
725 diffs[pointer - 1] = (self.DIFF_INSERT,
726 insertion[:len(insertion) - overlap_length2])
727 diffs[pointer + 1] = (self.DIFF_DELETE, deletion[overlap_length2:])
728 pointer += 1
729 pointer += 1
730 pointer += 1
731
732 def diff_cleanupSemanticLossless(self, diffs):
733 """Look for single edits surrounded on both sides by equalities
734 which can be shifted sideways to align the edit to a word boundary.
735 e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came.
736
737 Args:
738 diffs: Array of diff tuples.
739 """
740
741 def diff_cleanupSemanticScore(one, two):
742 """Given two strings, compute a score representing whether the
743 internal boundary falls on logical boundaries.
744 Scores range from 6 (best) to 0 (worst).
745 Closure, but does not reference any external variables.
746
747 Args:
748 one: First string.
749 two: Second string.
750
751 Returns:
752 The score.
753 """
754 if not one or not two:
755 # Edges are the best.
756 return 6
757
758 # Each port of this function behaves slightly differently due to
759 # subtle differences in each language's definition of things like
760 # 'whitespace'. Since this function's purpose is largely cosmetic,
761 # the choice has been made to use each language's native features