Show More
@@ -1,6 +1,3 b'' | |||||
1 |
|
||||
2 |
|
||||
3 |
|
||||
4 |
|
|
1 | """Diff Match and Patch | |
5 |
|
2 | |||
6 | Copyright 2006 Google Inc. |
|
3 | Copyright 2006 Google Inc. | |
@@ -25,7 +22,7 b' Computes the difference between two text' | |||||
25 | Applies the patch onto another text, allowing for errors. |
|
22 | Applies the patch onto another text, allowing for errors. | |
26 | """ |
|
23 | """ | |
27 |
|
24 | |||
28 |
__author__ = |
|
25 | __author__ = "fraser@google.com (Neil Fraser)" | |
29 |
|
26 | |||
30 | import math |
|
27 | import math | |
31 | import re |
|
28 | import re | |
@@ -98,7 +95,7 b' class diff_match_patch:' | |||||
98 | if deadline is None: |
|
95 | if deadline is None: | |
99 | # Unlike in most languages, Python counts time in seconds. |
|
96 | # Unlike in most languages, Python counts time in seconds. | |
100 | if self.Diff_Timeout <= 0: |
|
97 | if self.Diff_Timeout <= 0: | |
101 |
deadline = sys.max |
|
98 | deadline = sys.maxsize | |
102 | else: |
|
99 | else: | |
103 | deadline = time.time() + self.Diff_Timeout |
|
100 | deadline = time.time() + self.Diff_Timeout | |
104 |
|
101 | |||
@@ -121,7 +118,7 b' class diff_match_patch:' | |||||
121 | # Trim off common suffix (speedup). |
|
118 | # Trim off common suffix (speedup). | |
122 | commonlength = self.diff_commonSuffix(text1, text2) |
|
119 | commonlength = self.diff_commonSuffix(text1, text2) | |
123 | if commonlength == 0: |
|
120 | if commonlength == 0: | |
124 |
commonsuffix = |
|
121 | commonsuffix = "" | |
125 | else: |
|
122 | else: | |
126 | commonsuffix = text1[-commonlength:] |
|
123 | commonsuffix = text1[-commonlength:] | |
127 | text1 = text1[:-commonlength] |
|
124 | text1 = text1[:-commonlength] | |
@@ -168,8 +165,11 b' class diff_match_patch:' | |||||
168 | i = longtext.find(shorttext) |
|
165 | i = longtext.find(shorttext) | |
169 | if i != -1: |
|
166 | if i != -1: | |
170 | # Shorter text is inside the longer text (speedup). |
|
167 | # Shorter text is inside the longer text (speedup). | |
171 | diffs = [(self.DIFF_INSERT, longtext[:i]), (self.DIFF_EQUAL, shorttext), |
|
168 | diffs = [ | |
172 |
(self.DIFF_INSERT, longtext[i |
|
169 | (self.DIFF_INSERT, longtext[:i]), | |
|
170 | (self.DIFF_EQUAL, shorttext), | |||
|
171 | (self.DIFF_INSERT, longtext[i + len(shorttext) :]), | |||
|
172 | ] | |||
173 | # Swap insertions for deletions if diff is reversed. |
|
173 | # Swap insertions for deletions if diff is reversed. | |
174 | if len(text1) > len(text2): |
|
174 | if len(text1) > len(text2): | |
175 | diffs[0] = (self.DIFF_DELETE, diffs[0][1]) |
|
175 | diffs[0] = (self.DIFF_DELETE, diffs[0][1]) | |
@@ -223,12 +223,12 b' class diff_match_patch:' | |||||
223 |
|
223 | |||
224 | # Rediff any replacement blocks, this time character-by-character. |
|
224 | # Rediff any replacement blocks, this time character-by-character. | |
225 | # Add a dummy entry at the end. |
|
225 | # Add a dummy entry at the end. | |
226 |
diffs.append((self.DIFF_EQUAL, |
|
226 | diffs.append((self.DIFF_EQUAL, "")) | |
227 | pointer = 0 |
|
227 | pointer = 0 | |
228 | count_delete = 0 |
|
228 | count_delete = 0 | |
229 | count_insert = 0 |
|
229 | count_insert = 0 | |
230 |
text_delete = |
|
230 | text_delete = "" | |
231 |
text_insert = |
|
231 | text_insert = "" | |
232 | while pointer < len(diffs): |
|
232 | while pointer < len(diffs): | |
233 | if diffs[pointer][0] == self.DIFF_INSERT: |
|
233 | if diffs[pointer][0] == self.DIFF_INSERT: | |
234 | count_insert += 1 |
|
234 | count_insert += 1 | |
@@ -245,8 +245,8 b' class diff_match_patch:' | |||||
245 | pointer = pointer - count_delete - count_insert + len(a) |
|
245 | pointer = pointer - count_delete - count_insert + len(a) | |
246 | count_insert = 0 |
|
246 | count_insert = 0 | |
247 | count_delete = 0 |
|
247 | count_delete = 0 | |
248 |
text_delete = |
|
248 | text_delete = "" | |
249 |
text_insert = |
|
249 | text_insert = "" | |
250 |
|
250 | |||
251 | pointer += 1 |
|
251 | pointer += 1 | |
252 |
|
252 | |||
@@ -280,7 +280,7 b' class diff_match_patch:' | |||||
280 | delta = text1_length - text2_length |
|
280 | delta = text1_length - text2_length | |
281 | # If the total number of characters is odd, then the front path will |
|
281 | # If the total number of characters is odd, then the front path will | |
282 | # collide with the reverse path. |
|
282 | # collide with the reverse path. | |
283 |
front = |
|
283 | front = delta % 2 != 0 | |
284 | # Offsets for start and end of k loop. |
|
284 | # Offsets for start and end of k loop. | |
285 | # Prevents mapping of space beyond the grid. |
|
285 | # Prevents mapping of space beyond the grid. | |
286 | k1start = 0 |
|
286 | k1start = 0 | |
@@ -295,14 +295,14 b' class diff_match_patch:' | |||||
295 | # Walk the front path one step. |
|
295 | # Walk the front path one step. | |
296 | for k1 in range(-d + k1start, d + 1 - k1end, 2): |
|
296 | for k1 in range(-d + k1start, d + 1 - k1end, 2): | |
297 | k1_offset = v_offset + k1 |
|
297 | k1_offset = v_offset + k1 | |
298 | if k1 == -d or (k1 != d and |
|
298 | if k1 == -d or (k1 != d and v1[k1_offset - 1] < v1[k1_offset + 1]): | |
299 | v1[k1_offset - 1] < v1[k1_offset + 1]): |
|
|||
300 | x1 = v1[k1_offset + 1] |
|
299 | x1 = v1[k1_offset + 1] | |
301 | else: |
|
300 | else: | |
302 | x1 = v1[k1_offset - 1] + 1 |
|
301 | x1 = v1[k1_offset - 1] + 1 | |
303 | y1 = x1 - k1 |
|
302 | y1 = x1 - k1 | |
304 | while (x1 < text1_length and y1 < text2_length and |
|
303 | while ( | |
305 | text1[x1] == text2[y1]): |
|
304 | x1 < text1_length and y1 < text2_length and text1[x1] == text2[y1] | |
|
305 | ): | |||
306 | x1 += 1 |
|
306 | x1 += 1 | |
307 | y1 += 1 |
|
307 | y1 += 1 | |
308 | v1[k1_offset] = x1 |
|
308 | v1[k1_offset] = x1 | |
@@ -324,14 +324,16 b' class diff_match_patch:' | |||||
324 | # Walk the reverse path one step. |
|
324 | # Walk the reverse path one step. | |
325 | for k2 in range(-d + k2start, d + 1 - k2end, 2): |
|
325 | for k2 in range(-d + k2start, d + 1 - k2end, 2): | |
326 | k2_offset = v_offset + k2 |
|
326 | k2_offset = v_offset + k2 | |
327 | if k2 == -d or (k2 != d and |
|
327 | if k2 == -d or (k2 != d and v2[k2_offset - 1] < v2[k2_offset + 1]): | |
328 | v2[k2_offset - 1] < v2[k2_offset + 1]): |
|
|||
329 | x2 = v2[k2_offset + 1] |
|
328 | x2 = v2[k2_offset + 1] | |
330 | else: |
|
329 | else: | |
331 | x2 = v2[k2_offset - 1] + 1 |
|
330 | x2 = v2[k2_offset - 1] + 1 | |
332 | y2 = x2 - k2 |
|
331 | y2 = x2 - k2 | |
333 | while (x2 < text1_length and y2 < text2_length and |
|
332 | while ( | |
334 | text1[-x2 - 1] == text2[-y2 - 1]): |
|
333 | x2 < text1_length | |
|
334 | and y2 < text2_length | |||
|
335 | and text1[-x2 - 1] == text2[-y2 - 1] | |||
|
336 | ): | |||
335 | x2 += 1 |
|
337 | x2 += 1 | |
336 | y2 += 1 |
|
338 | y2 += 1 | |
337 | v2[k2_offset] = x2 |
|
339 | v2[k2_offset] = x2 | |
@@ -399,7 +401,7 b' class diff_match_patch:' | |||||
399 |
|
401 | |||
400 | # "\x00" is a valid character, but various debuggers don't like it. |
|
402 | # "\x00" is a valid character, but various debuggers don't like it. | |
401 | # So we'll insert a junk entry to avoid generating a null character. |
|
403 | # So we'll insert a junk entry to avoid generating a null character. | |
402 |
lineArray.append( |
|
404 | lineArray.append("") | |
403 |
|
405 | |||
404 | def diff_linesToCharsMunge(text): |
|
406 | def diff_linesToCharsMunge(text): | |
405 | """Split a text into an array of strings. Reduce the texts to a string |
|
407 | """Split a text into an array of strings. Reduce the texts to a string | |
@@ -419,18 +421,18 b' class diff_match_patch:' | |||||
419 | lineStart = 0 |
|
421 | lineStart = 0 | |
420 | lineEnd = -1 |
|
422 | lineEnd = -1 | |
421 | while lineEnd < len(text) - 1: |
|
423 | while lineEnd < len(text) - 1: | |
422 |
lineEnd = text.find( |
|
424 | lineEnd = text.find("\n", lineStart) | |
423 | if lineEnd == -1: |
|
425 | if lineEnd == -1: | |
424 | lineEnd = len(text) - 1 |
|
426 | lineEnd = len(text) - 1 | |
425 | line = text[lineStart:lineEnd + 1] |
|
427 | line = text[lineStart : lineEnd + 1] | |
426 | lineStart = lineEnd + 1 |
|
428 | lineStart = lineEnd + 1 | |
427 |
|
429 | |||
428 | if line in lineHash: |
|
430 | if line in lineHash: | |
429 |
chars.append( |
|
431 | chars.append(chr(lineHash[line])) | |
430 | else: |
|
432 | else: | |
431 | lineArray.append(line) |
|
433 | lineArray.append(line) | |
432 | lineHash[line] = len(lineArray) - 1 |
|
434 | lineHash[line] = len(lineArray) - 1 | |
433 |
chars.append( |
|
435 | chars.append(chr(len(lineArray) - 1)) | |
434 | return "".join(chars) |
|
436 | return "".join(chars) | |
435 |
|
437 | |||
436 | chars1 = diff_linesToCharsMunge(text1) |
|
438 | chars1 = diff_linesToCharsMunge(text1) | |
@@ -499,8 +501,10 b' class diff_match_patch:' | |||||
499 | pointermid = pointermax |
|
501 | pointermid = pointermax | |
500 | pointerend = 0 |
|
502 | pointerend = 0 | |
501 | while pointermin < pointermid: |
|
503 | while pointermin < pointermid: | |
502 | if (text1[-pointermid:len(text1) - pointerend] == |
|
504 | if ( | |
503 |
|
|
505 | text1[-pointermid : len(text1) - pointerend] | |
|
506 | == text2[-pointermid : len(text2) - pointerend] | |||
|
507 | ): | |||
504 | pointermin = pointermid |
|
508 | pointermin = pointermid | |
505 | pointerend = pointermin |
|
509 | pointerend = pointermin | |
506 | else: |
|
510 | else: | |
@@ -590,14 +594,16 b' class diff_match_patch:' | |||||
590 | common middle. Or None if there was no match. |
|
594 | common middle. Or None if there was no match. | |
591 | """ |
|
595 | """ | |
592 | seed = longtext[i:i + len(longtext) // 4] |
|
596 | seed = longtext[i : i + len(longtext) // 4] | |
593 |
best_common = |
|
597 | best_common = "" | |
594 | j = shorttext.find(seed) |
|
598 | j = shorttext.find(seed) | |
595 | while j != -1: |
|
599 | while j != -1: | |
596 | prefixLength = self.diff_commonPrefix(longtext[i:], shorttext[j:]) |
|
600 | prefixLength = self.diff_commonPrefix(longtext[i:], shorttext[j:]) | |
597 | suffixLength = self.diff_commonSuffix(longtext[:i], shorttext[:j]) |
|
601 | suffixLength = self.diff_commonSuffix(longtext[:i], shorttext[:j]) | |
598 | if len(best_common) < suffixLength + prefixLength: |
|
602 | if len(best_common) < suffixLength + prefixLength: | |
599 | best_common = (shorttext[j - suffixLength:j] + |
|
603 | best_common = ( | |
600 |
shorttext[j:j |
|
604 | shorttext[j - suffixLength : j] | |
|
605 | + shorttext[j : j + prefixLength] | |||
|
606 | ) | |||
601 | best_longtext_a = longtext[:i - suffixLength] |
|
607 | best_longtext_a = longtext[: i - suffixLength] | |
602 | best_longtext_b = longtext[i + prefixLength:] |
|
608 | best_longtext_b = longtext[i + prefixLength :] | |
603 | best_shorttext_a = shorttext[:j - suffixLength] |
|
609 | best_shorttext_a = shorttext[: j - suffixLength] | |
@@ -605,8 +611,13 b' class diff_match_patch:' | |||||
605 | j = shorttext.find(seed, j + 1) |
|
611 | j = shorttext.find(seed, j + 1) | |
606 |
|
612 | |||
607 | if len(best_common) * 2 >= len(longtext): |
|
613 | if len(best_common) * 2 >= len(longtext): | |
608 | return (best_longtext_a, best_longtext_b, |
|
614 | return ( | |
609 | best_shorttext_a, best_shorttext_b, best_common) |
|
615 | best_longtext_a, | |
|
616 | best_longtext_b, | |||
|
617 | best_shorttext_a, | |||
|
618 | best_shorttext_b, | |||
|
619 | best_common, | |||
|
620 | ) | |||
610 | else: |
|
621 | else: | |
611 | return None |
|
622 | return None | |
612 |
|
623 | |||
@@ -662,14 +673,22 b' class diff_match_patch:' | |||||
662 | length_deletions2 += len(diffs[pointer][1]) |
|
673 | length_deletions2 += len(diffs[pointer][1]) | |
663 | # Eliminate an equality that is smaller or equal to the edits on both |
|
674 | # Eliminate an equality that is smaller or equal to the edits on both | |
664 | # sides of it. |
|
675 | # sides of it. | |
665 | if (lastequality and (len(lastequality) <= |
|
676 | if ( | |
666 | max(length_insertions1, length_deletions1)) and |
|
677 | lastequality | |
667 | (len(lastequality) <= max(length_insertions2, length_deletions2))): |
|
678 | and ( | |
|
679 | len(lastequality) <= max(length_insertions1, length_deletions1) | |||
|
680 | ) | |||
|
681 | and ( | |||
|
682 | len(lastequality) <= max(length_insertions2, length_deletions2) | |||
|
683 | ) | |||
|
684 | ): | |||
668 | # Duplicate record. |
|
685 | # Duplicate record. | |
669 | diffs.insert(equalities[-1], (self.DIFF_DELETE, lastequality)) |
|
686 | diffs.insert(equalities[-1], (self.DIFF_DELETE, lastequality)) | |
670 | # Change second copy to insert. |
|
687 | # Change second copy to insert. | |
671 |
diffs[equalities[-1] + 1] = ( |
|
688 | diffs[equalities[-1] + 1] = ( | |
672 | diffs[equalities[-1] + 1][1]) |
|
689 | self.DIFF_INSERT, | |
|
690 | diffs[equalities[-1] + 1][1], | |||
|
691 | ) | |||
673 | # Throw away the equality we just deleted. |
|
692 | # Throw away the equality we just deleted. | |
674 | equalities.pop() |
|
693 | equalities.pop() | |
675 | # Throw away the previous equality (it needs to be reevaluated). |
|
694 | # Throw away the previous equality (it needs to be reevaluated). | |
@@ -699,32 +718,50 b' class diff_match_patch:' | |||||
699 | # Only extract an overlap if it is as big as the edit ahead or behind it. |
|
718 | # Only extract an overlap if it is as big as the edit ahead or behind it. | |
700 | pointer = 1 |
|
719 | pointer = 1 | |
701 | while pointer < len(diffs): |
|
720 | while pointer < len(diffs): | |
702 | if (diffs[pointer - 1][0] == self.DIFF_DELETE and |
|
721 | if ( | |
703 |
diffs[pointer][0] == self.DIFF_ |
|
722 | diffs[pointer - 1][0] == self.DIFF_DELETE | |
|
723 | and diffs[pointer][0] == self.DIFF_INSERT | |||
|
724 | ): | |||
704 | deletion = diffs[pointer - 1][1] |
|
725 | deletion = diffs[pointer - 1][1] | |
705 | insertion = diffs[pointer][1] |
|
726 | insertion = diffs[pointer][1] | |
706 | overlap_length1 = self.diff_commonOverlap(deletion, insertion) |
|
727 | overlap_length1 = self.diff_commonOverlap(deletion, insertion) | |
707 | overlap_length2 = self.diff_commonOverlap(insertion, deletion) |
|
728 | overlap_length2 = self.diff_commonOverlap(insertion, deletion) | |
708 | if overlap_length1 >= overlap_length2: |
|
729 | if overlap_length1 >= overlap_length2: | |
709 | if (overlap_length1 >= len(deletion) / 2.0 or |
|
730 | if ( | |
710 |
overlap_length1 >= len( |
|
731 | overlap_length1 >= len(deletion) / 2.0 | |
|
732 | or overlap_length1 >= len(insertion) / 2.0 | |||
|
733 | ): | |||
711 | # Overlap found. Insert an equality and trim the surrounding edits. |
|
734 | # Overlap found. Insert an equality and trim the surrounding edits. | |
712 | diffs.insert(pointer, (self.DIFF_EQUAL, |
|
735 | diffs.insert( | |
713 |
|
|
736 | pointer, (self.DIFF_EQUAL, insertion[:overlap_length1]) | |
714 | diffs[pointer - 1] = (self.DIFF_DELETE, |
|
737 | ) | |
715 | deletion[:len(deletion) - overlap_length1]) |
|
738 | diffs[pointer - 1] = ( | |
716 |
|
|
739 | self.DIFF_DELETE, | |
717 |
|
|
740 | deletion[: len(deletion) - overlap_length1], | |
|
741 | ) | |||
|
742 | diffs[pointer + 1] = ( | |||
|
743 | self.DIFF_INSERT, | |||
|
744 | insertion[overlap_length1:], | |||
|
745 | ) | |||
718 | pointer += 1 |
|
746 | pointer += 1 | |
719 | else: |
|
747 | else: | |
720 | if (overlap_length2 >= len(deletion) / 2.0 or |
|
748 | if ( | |
721 |
overlap_length2 >= len( |
|
749 | overlap_length2 >= len(deletion) / 2.0 | |
|
750 | or overlap_length2 >= len(insertion) / 2.0 | |||
|
751 | ): | |||
722 | # Reverse overlap found. |
|
752 | # Reverse overlap found. | |
723 | # Insert an equality and swap and trim the surrounding edits. |
|
753 | # Insert an equality and swap and trim the surrounding edits. | |
724 | diffs.insert(pointer, (self.DIFF_EQUAL, deletion[:overlap_length2])) |
|
754 | diffs.insert( | |
725 | diffs[pointer - 1] = (self.DIFF_INSERT, |
|
755 | pointer, (self.DIFF_EQUAL, deletion[:overlap_length2]) | |
726 | insertion[:len(insertion) - overlap_length2]) |
|
756 | ) | |
727 | diffs[pointer + 1] = (self.DIFF_DELETE, deletion[overlap_length2:]) |
|
757 | diffs[pointer - 1] = ( | |
|
758 | self.DIFF_INSERT, | |||
|
759 | insertion[: len(insertion) - overlap_length2], | |||
|
760 | ) | |||
|
761 | diffs[pointer + 1] = ( | |||
|
762 | self.DIFF_DELETE, | |||
|
763 | deletion[overlap_length2:], | |||
|
764 | ) | |||
728 | pointer += 1 |
|
765 | pointer += 1 | |
729 | pointer += 1 |
|
766 | pointer += 1 | |
730 | pointer += 1 |
|
767 | pointer += 1 | |
@@ -791,8 +828,10 b' class diff_match_patch:' | |||||
791 | pointer = 1 |
|
828 | pointer = 1 | |
792 | # Intentionally ignore the first and last element (don't need checking). |
|
829 | # Intentionally ignore the first and last element (don't need checking). | |
793 | while pointer < len(diffs) - 1: |
|
830 | while pointer < len(diffs) - 1: | |
794 | if (diffs[pointer - 1][0] == self.DIFF_EQUAL and |
|
831 | if ( | |
795 |
diffs[pointer |
|
832 | diffs[pointer - 1][0] == self.DIFF_EQUAL | |
|
833 | and diffs[pointer + 1][0] == self.DIFF_EQUAL | |||
|
834 | ): | |||
796 | # This is a single edit surrounded by equalities. |
|
835 | # This is a single edit surrounded by equalities. | |
797 | equality1 = diffs[pointer - 1][1] |
|
836 | equality1 = diffs[pointer - 1][1] | |
798 | edit = diffs[pointer][1] |
|
837 | edit = diffs[pointer][1] | |
@@ -810,14 +849,16 b' class diff_match_patch:' | |||||
810 | bestEquality1 = equality1 |
|
849 | bestEquality1 = equality1 | |
811 | bestEdit = edit |
|
850 | bestEdit = edit | |
812 | bestEquality2 = equality2 |
|
851 | bestEquality2 = equality2 | |
813 |
bestScore = |
|
852 | bestScore = diff_cleanupSemanticScore( | |
814 | diff_cleanupSemanticScore(edit, equality2)) |
|
853 | equality1, edit | |
|
854 | ) + diff_cleanupSemanticScore(edit, equality2) | |||
815 | while edit and equality2 and edit[0] == equality2[0]: |
|
855 | while edit and equality2 and edit[0] == equality2[0]: | |
816 | equality1 += edit[0] |
|
856 | equality1 += edit[0] | |
817 | edit = edit[1:] + equality2[0] |
|
857 | edit = edit[1:] + equality2[0] | |
818 | equality2 = equality2[1:] |
|
858 | equality2 = equality2[1:] | |
819 |
score = |
|
859 | score = diff_cleanupSemanticScore( | |
820 | diff_cleanupSemanticScore(edit, equality2)) |
|
860 | equality1, edit | |
|
861 | ) + diff_cleanupSemanticScore(edit, equality2) | |||
821 | # The >= encourages trailing rather than leading whitespace on edits. |
|
862 | # The >= encourages trailing rather than leading whitespace on edits. | |
822 | if score >= bestScore: |
|
863 | if score >= bestScore: | |
823 | bestScore = score |
|
864 | bestScore = score | |
@@ -841,8 +882,8 b' class diff_match_patch:' | |||||
841 | pointer += 1 |
|
882 | pointer += 1 | |
842 |
|
883 | |||
843 | # Define some regex patterns for matching boundaries. |
|
884 | # Define some regex patterns for matching boundaries. | |
844 |
BLANKLINEEND = re.compile(r"\n\r?\n$") |
|
885 | BLANKLINEEND = re.compile(r"\n\r?\n$") | |
845 |
BLANKLINESTART = re.compile(r"^\r?\n\r?\n") |
|
886 | BLANKLINESTART = re.compile(r"^\r?\n\r?\n") | |
846 |
|
887 | |||
847 | def diff_cleanupEfficiency(self, diffs): |
|
888 | def diff_cleanupEfficiency(self, diffs): | |
848 | """Reduce the number of edits by eliminating operationally trivial |
|
889 | """Reduce the number of edits by eliminating operationally trivial | |
@@ -861,8 +902,9 b' class diff_match_patch:' | |||||
861 | post_del = False # Is there a deletion operation after the last equality. |
|
902 | post_del = False # Is there a deletion operation after the last equality. | |
862 | while pointer < len(diffs): |
|
903 | while pointer < len(diffs): | |
863 | if diffs[pointer][0] == self.DIFF_EQUAL: # Equality found. |
|
904 | if diffs[pointer][0] == self.DIFF_EQUAL: # Equality found. | |
864 |
if |
|
905 | if len(diffs[pointer][1]) < self.Diff_EditCost and ( | |
865 |
|
|
906 | post_ins or post_del | |
|
907 | ): | |||
866 | # Candidate found. |
|
908 | # Candidate found. | |
867 | equalities.append(pointer) |
|
909 | equalities.append(pointer) | |
868 | pre_ins = post_ins |
|
910 | pre_ins = post_ins | |
@@ -887,14 +929,20 b' class diff_match_patch:' | |||||
887 | # <ins>A</del>X<ins>C</ins><del>D</del> |
|
929 | # <ins>A</del>X<ins>C</ins><del>D</del> | |
888 | # <ins>A</ins><del>B</del>X<del>C</del> |
|
930 | # <ins>A</ins><del>B</del>X<del>C</del> | |
889 |
|
931 | |||
890 | if lastequality and ((pre_ins and pre_del and post_ins and post_del) or |
|
932 | if lastequality and ( | |
891 | ((len(lastequality) < self.Diff_EditCost / 2) and |
|
933 | (pre_ins and pre_del and post_ins and post_del) | |
892 | (pre_ins + pre_del + post_ins + post_del) == 3)): |
|
934 | or ( | |
|
935 | (len(lastequality) < self.Diff_EditCost / 2) | |||
|
936 | and (pre_ins + pre_del + post_ins + post_del) == 3 | |||
|
937 | ) | |||
|
938 | ): | |||
893 | # Duplicate record. |
|
939 | # Duplicate record. | |
894 | diffs.insert(equalities[-1], (self.DIFF_DELETE, lastequality)) |
|
940 | diffs.insert(equalities[-1], (self.DIFF_DELETE, lastequality)) | |
895 | # Change second copy to insert. |
|
941 | # Change second copy to insert. | |
896 |
diffs[equalities[-1] + 1] = ( |
|
942 | diffs[equalities[-1] + 1] = ( | |
897 | diffs[equalities[-1] + 1][1]) |
|
943 | self.DIFF_INSERT, | |
|
944 | diffs[equalities[-1] + 1][1], | |||
|
945 | ) | |||
898 | equalities.pop() # Throw away the equality we just deleted. |
|
946 | equalities.pop() # Throw away the equality we just deleted. | |
899 | lastequality = None |
|
947 | lastequality = None | |
900 | if pre_ins and pre_del: |
|
948 | if pre_ins and pre_del: | |
@@ -922,12 +970,12 b' class diff_match_patch:' | |||||
922 | Args: |
|
970 | Args: | |
923 | diffs: Array of diff tuples. |
|
971 | diffs: Array of diff tuples. | |
924 | """ |
|
972 | """ | |
925 |
diffs.append((self.DIFF_EQUAL, |
|
973 | diffs.append((self.DIFF_EQUAL, "")) # Add a dummy entry at the end. | |
926 | pointer = 0 |
|
974 | pointer = 0 | |
927 | count_delete = 0 |
|
975 | count_delete = 0 | |
928 | count_insert = 0 |
|
976 | count_insert = 0 | |
929 |
text_delete = |
|
977 | text_delete = "" | |
930 |
text_insert = |
|
978 | text_insert = "" | |
931 | while pointer < len(diffs): |
|
979 | while pointer < len(diffs): | |
932 | if diffs[pointer][0] == self.DIFF_INSERT: |
|
980 | if diffs[pointer][0] == self.DIFF_INSERT: | |
933 | count_insert += 1 |
|
981 | count_insert += 1 | |
@@ -946,31 +994,40 b' class diff_match_patch:' | |||||
946 | if commonlength != 0: |
|
994 | if commonlength != 0: | |
947 | x = pointer - count_delete - count_insert - 1 |
|
995 | x = pointer - count_delete - count_insert - 1 | |
948 | if x >= 0 and diffs[x][0] == self.DIFF_EQUAL: |
|
996 | if x >= 0 and diffs[x][0] == self.DIFF_EQUAL: | |
949 | diffs[x] = (diffs[x][0], diffs[x][1] + |
|
997 | diffs[x] = ( | |
950 |
|
|
998 | diffs[x][0], | |
|
999 | diffs[x][1] + text_insert[:commonlength], | |||
|
1000 | ) | |||
951 | else: |
|
1001 | else: | |
952 | diffs.insert(0, (self.DIFF_EQUAL, text_insert[:commonlength])) |
|
1002 | diffs.insert( | |
|
1003 | 0, (self.DIFF_EQUAL, text_insert[:commonlength]) | |||
|
1004 | ) | |||
953 | pointer += 1 |
|
1005 | pointer += 1 | |
954 | text_insert = text_insert[commonlength:] |
|
1006 | text_insert = text_insert[commonlength:] | |
955 | text_delete = text_delete[commonlength:] |
|
1007 | text_delete = text_delete[commonlength:] | |
956 | # Factor out any common suffixies. |
|
1008 | # Factor out any common suffixies. | |
957 | commonlength = self.diff_commonSuffix(text_insert, text_delete) |
|
1009 | commonlength = self.diff_commonSuffix(text_insert, text_delete) | |
958 | if commonlength != 0: |
|
1010 | if commonlength != 0: | |
959 | diffs[pointer] = (diffs[pointer][0], text_insert[-commonlength:] + |
|
1011 | diffs[pointer] = ( | |
960 |
diffs[pointer][ |
|
1012 | diffs[pointer][0], | |
|
1013 | text_insert[-commonlength:] + diffs[pointer][1], | |||
|
1014 | ) | |||
961 | text_insert = text_insert[:-commonlength] |
|
1015 | text_insert = text_insert[:-commonlength] | |
962 | text_delete = text_delete[:-commonlength] |
|
1016 | text_delete = text_delete[:-commonlength] | |
963 | # Delete the offending records and add the merged ones. |
|
1017 | # Delete the offending records and add the merged ones. | |
964 | if count_delete == 0: |
|
1018 | if count_delete == 0: | |
965 | diffs[pointer - count_insert : pointer] = [ |
|
1019 | diffs[pointer - count_insert : pointer] = [ | |
966 |
(self.DIFF_INSERT, text_insert) |
|
1020 | (self.DIFF_INSERT, text_insert) | |
|
1021 | ] | |||
967 | elif count_insert == 0: |
|
1022 | elif count_insert == 0: | |
968 | diffs[pointer - count_delete : pointer] = [ |
|
1023 | diffs[pointer - count_delete : pointer] = [ | |
969 |
(self.DIFF_DELETE, text_delete) |
|
1024 | (self.DIFF_DELETE, text_delete) | |
|
1025 | ] | |||
970 | else: |
|
1026 | else: | |
971 | diffs[pointer - count_delete - count_insert : pointer] = [ |
|
1027 | diffs[pointer - count_delete - count_insert : pointer] = [ | |
972 | (self.DIFF_DELETE, text_delete), |
|
1028 | (self.DIFF_DELETE, text_delete), | |
973 |
(self.DIFF_INSERT, text_insert) |
|
1029 | (self.DIFF_INSERT, text_insert), | |
|
1030 | ] | |||
974 | pointer = pointer - count_delete - count_insert + 1 |
|
1031 | pointer = pointer - count_delete - count_insert + 1 | |
975 | if count_delete != 0: |
|
1032 | if count_delete != 0: | |
976 | pointer += 1 |
|
1033 | pointer += 1 | |
@@ -978,18 +1035,20 b' class diff_match_patch:' | |||||
978 | pointer += 1 |
|
1035 | pointer += 1 | |
979 | elif pointer != 0 and diffs[pointer - 1][0] == self.DIFF_EQUAL: |
|
1036 | elif pointer != 0 and diffs[pointer - 1][0] == self.DIFF_EQUAL: | |
980 | # Merge this equality with the previous one. |
|
1037 | # Merge this equality with the previous one. | |
981 |
diffs[pointer - 1] = ( |
|
1038 | diffs[pointer - 1] = ( | |
982 |
|
|
1039 | diffs[pointer - 1][0], | |
|
1040 | diffs[pointer - 1][1] + diffs[pointer][1], | |||
|
1041 | ) | |||
983 | del diffs[pointer] |
|
1042 | del diffs[pointer] | |
984 | else: |
|
1043 | else: | |
985 | pointer += 1 |
|
1044 | pointer += 1 | |
986 |
|
1045 | |||
987 | count_insert = 0 |
|
1046 | count_insert = 0 | |
988 | count_delete = 0 |
|
1047 | count_delete = 0 | |
989 |
text_delete = |
|
1048 | text_delete = "" | |
990 |
text_insert = |
|
1049 | text_insert = "" | |
991 |
|
1050 | |||
992 |
if diffs[-1][1] == |
|
1051 | if diffs[-1][1] == "": | |
993 | diffs.pop() # Remove the dummy entry at the end. |
|
1052 | diffs.pop() # Remove the dummy entry at the end. | |
994 |
|
1053 | |||
995 | # Second pass: look for single edits surrounded on both sides by equalities |
|
1054 | # Second pass: look for single edits surrounded on both sides by equalities | |
@@ -999,25 +1058,35 b' class diff_match_patch:' | |||||
999 | pointer = 1 |
|
1058 | pointer = 1 | |
1000 | # Intentionally ignore the first and last element (don't need checking). |
|
1059 | # Intentionally ignore the first and last element (don't need checking). | |
1001 | while pointer < len(diffs) - 1: |
|
1060 | while pointer < len(diffs) - 1: | |
1002 | if (diffs[pointer - 1][0] == self.DIFF_EQUAL and |
|
1061 | if ( | |
1003 |
diffs[pointer |
|
1062 | diffs[pointer - 1][0] == self.DIFF_EQUAL | |
|
1063 | and diffs[pointer + 1][0] == self.DIFF_EQUAL | |||
|
1064 | ): | |||
1004 | # This is a single edit surrounded by equalities. |
|
1065 | # This is a single edit surrounded by equalities. | |
1005 | if diffs[pointer][1].endswith(diffs[pointer - 1][1]): |
|
1066 | if diffs[pointer][1].endswith(diffs[pointer - 1][1]): | |
1006 | # Shift the edit over the previous equality. |
|
1067 | # Shift the edit over the previous equality. | |
1007 |
diffs[pointer] = ( |
|
1068 | diffs[pointer] = ( | |
1008 |
diffs[pointer |
|
1069 | diffs[pointer][0], | |
1009 |
|
|
1070 | diffs[pointer - 1][1] | |
1010 |
diffs[pointer |
|
1071 | + diffs[pointer][1][: -len(diffs[pointer - 1][1])], | |
1011 | diffs[pointer - 1][1] + diffs[pointer + 1][1]) |
|
1072 | ) | |
|
1073 | diffs[pointer + 1] = ( | |||
|
1074 | diffs[pointer + 1][0], | |||
|
1075 | diffs[pointer - 1][1] + diffs[pointer + 1][1], | |||
|
1076 | ) | |||
1012 | del diffs[pointer - 1] |
|
1077 | del diffs[pointer - 1] | |
1013 | changes = True |
|
1078 | changes = True | |
1014 | elif diffs[pointer][1].startswith(diffs[pointer + 1][1]): |
|
1079 | elif diffs[pointer][1].startswith(diffs[pointer + 1][1]): | |
1015 | # Shift the edit over the next equality. |
|
1080 | # Shift the edit over the next equality. | |
1016 |
diffs[pointer - 1] = ( |
|
1081 | diffs[pointer - 1] = ( | |
1017 |
|
|
1082 | diffs[pointer - 1][0], | |
1018 |
diffs[pointer] |
|
1083 | diffs[pointer - 1][1] + diffs[pointer + 1][1], | |
1019 | diffs[pointer][1][len(diffs[pointer + 1][1]):] + |
|
1084 | ) | |
1020 |
diffs[pointer |
|
1085 | diffs[pointer] = ( | |
|
1086 | diffs[pointer][0], | |||
|
1087 | diffs[pointer][1][len(diffs[pointer + 1][1]) :] | |||
|
1088 | + diffs[pointer + 1][1], | |||
|
1089 | ) | |||
1021 | del diffs[pointer + 1] |
|
1090 | del diffs[pointer + 1] | |
1022 | changes = True |
|
1091 | changes = True | |
1023 | pointer += 1 |
|
1092 | pointer += 1 | |
@@ -1068,13 +1137,17 b' class diff_match_patch:' | |||||
1068 | HTML representation. |
|
1137 | HTML representation. | |
1069 | """ |
|
1138 | """ | |
1070 | html = [] |
|
1139 | html = [] | |
1071 |
for |
|
1140 | for op, data in diffs: | |
1072 | text = (data.replace("&", "&").replace("<", "<") |
|
1141 | text = ( | |
1073 |
|
|
1142 | data.replace("&", "&") | |
|
1143 | .replace("<", "<") | |||
|
1144 | .replace(">", ">") | |||
|
1145 | .replace("\n", "¶<br>") | |||
|
1146 | ) | |||
1074 | if op == self.DIFF_INSERT: |
|
1147 | if op == self.DIFF_INSERT: | |
1075 |
html.append(" |
|
1148 | html.append('<ins style="background:#e6ffe6;">%s</ins>' % text) | |
1076 | elif op == self.DIFF_DELETE: |
|
1149 | elif op == self.DIFF_DELETE: | |
1077 |
html.append(" |
|
1150 | html.append('<del style="background:#ffe6e6;">%s</del>' % text) | |
1078 | elif op == self.DIFF_EQUAL: |
|
1151 | elif op == self.DIFF_EQUAL: | |
1079 | html.append("<span>%s</span>" % text) |
|
1152 | html.append("<span>%s</span>" % text) | |
1080 | return "".join(html) |
|
1153 | return "".join(html) | |
@@ -1089,7 +1162,7 b' class diff_match_patch:' | |||||
1089 | Source text. |
|
1162 | Source text. | |
1090 | """ |
|
1163 | """ | |
1091 | text = [] |
|
1164 | text = [] | |
1092 |
for |
|
1165 | for op, data in diffs: | |
1093 | if op != self.DIFF_INSERT: |
|
1166 | if op != self.DIFF_INSERT: | |
1094 | text.append(data) |
|
1167 | text.append(data) | |
1095 | return "".join(text) |
|
1168 | return "".join(text) | |
@@ -1104,7 +1177,7 b' class diff_match_patch:' | |||||
1104 | Destination text. |
|
1177 | Destination text. | |
1105 | """ |
|
1178 | """ | |
1106 | text = [] |
|
1179 | text = [] | |
1107 |
for |
|
1180 | for op, data in diffs: | |
1108 | if op != self.DIFF_DELETE: |
|
1181 | if op != self.DIFF_DELETE: | |
1109 | text.append(data) |
|
1182 | text.append(data) | |
1110 | return "".join(text) |
|
1183 | return "".join(text) | |
@@ -1122,7 +1195,7 b' class diff_match_patch:' | |||||
1122 | levenshtein = 0 |
|
1195 | levenshtein = 0 | |
1123 | insertions = 0 |
|
1196 | insertions = 0 | |
1124 | deletions = 0 |
|
1197 | deletions = 0 | |
1125 |
for |
|
1198 | for op, data in diffs: | |
1126 | if op == self.DIFF_INSERT: |
|
1199 | if op == self.DIFF_INSERT: | |
1127 | insertions += len(data) |
|
1200 | insertions += len(data) | |
1128 | elif op == self.DIFF_DELETE: |
|
1201 | elif op == self.DIFF_DELETE: | |
@@ -1148,7 +1221,7 b' class diff_match_patch:' | |||||
1148 | Delta text. |
|
1221 | Delta text. | |
1149 | """ |
|
1222 | """ | |
1150 | text = [] |
|
1223 | text = [] | |
1151 |
for |
|
1224 | for op, data in diffs: | |
1152 | if op == self.DIFF_INSERT: |
|
1225 | if op == self.DIFF_INSERT: | |
1153 | # High ascii will raise UnicodeDecodeError. Use Unicode instead. |
|
1226 | # High ascii will raise UnicodeDecodeError. Use Unicode instead. | |
1154 | data = data.encode("utf-8") |
|
1227 | data = data.encode("utf-8") | |
@@ -1173,7 +1246,7 b' class diff_match_patch:' | |||||
1173 | Raises: |
|
1246 | Raises: | |
1174 | ValueError: If invalid input. |
|
1247 | ValueError: If invalid input. | |
1175 | """ |
|
1248 | """ | |
1176 |
if type(delta) == |
|
1249 | if type(delta) == str: | |
1177 | # Deltas should be composed of a subset of ascii chars, Unicode not |
|
1250 | # Deltas should be composed of a subset of ascii chars, Unicode not | |
1178 | # required. If this encode raises UnicodeEncodeError, delta is invalid. |
|
1251 | # required. If this encode raises UnicodeEncodeError, delta is invalid. | |
1179 | delta = delta.encode("ascii") |
|
1252 | delta = delta.encode("ascii") | |
@@ -1188,7 +1261,7 b' class diff_match_patch:' | |||||
1188 | # operation of this token (delete, insert, equality). |
|
1261 | # operation of this token (delete, insert, equality). | |
1189 | param = token[1:] |
|
1262 | param = token[1:] | |
1190 | if token[0] == "+": |
|
1263 | if token[0] == "+": | |
1191 |
param = urllib.parse.unquote(param) |
|
1264 | param = urllib.parse.unquote(param) | |
1192 | diffs.append((self.DIFF_INSERT, param)) |
|
1265 | diffs.append((self.DIFF_INSERT, param)) | |
1193 | elif token[0] == "-" or token[0] == "=": |
|
1266 | elif token[0] == "-" or token[0] == "=": | |
1194 | try: |
|
1267 | try: | |
@@ -1205,12 +1278,14 b' class diff_match_patch:' | |||||
1205 | diffs.append((self.DIFF_DELETE, text)) |
|
1278 | diffs.append((self.DIFF_DELETE, text)) | |
1206 | else: |
|
1279 | else: | |
1207 | # Anything else is an error. |
|
1280 | # Anything else is an error. | |
1208 | raise ValueError("Invalid diff operation in diff_fromDelta: " + |
|
1281 | raise ValueError( | |
1209 | token[0]) |
|
1282 | "Invalid diff operation in diff_fromDelta: " + token[0] | |
|
1283 | ) | |||
1210 | if pointer != len(text1): |
|
1284 | if pointer != len(text1): | |
1211 | raise ValueError( |
|
1285 | raise ValueError( | |
1212 |
"Delta length (%d) does not equal source text length (%d)." |
|
1286 | "Delta length (%d) does not equal source text length (%d)." | |
1213 |
(pointer, len(text1)) |
|
1287 | % (pointer, len(text1)) | |
|
1288 | ) | |||
1214 | return diffs |
|
1289 | return diffs | |
1215 |
|
1290 | |||
1216 | # MATCH FUNCTIONS |
|
1291 | # MATCH FUNCTIONS | |
@@ -1329,8 +1404,11 b' class diff_match_patch:' | |||||
1329 | if d == 0: # First pass: exact match. |
|
1404 | if d == 0: # First pass: exact match. | |
1330 | rd[j] = ((rd[j + 1] << 1) | 1) & charMatch |
|
1405 | rd[j] = ((rd[j + 1] << 1) | 1) & charMatch | |
1331 | else: # Subsequent passes: fuzzy match. |
|
1406 | else: # Subsequent passes: fuzzy match. | |
1332 | rd[j] = (((rd[j + 1] << 1) | 1) & charMatch) | ( |
|
1407 | rd[j] = ( | |
1333 |
(( |
|
1408 | (((rd[j + 1] << 1) | 1) & charMatch) | |
|
1409 | | (((last_rd[j + 1] | last_rd[j]) << 1) | 1) | |||
|
1410 | | last_rd[j + 1] | |||
|
1411 | ) | |||
1334 | if rd[j] & matchmask: |
|
1412 | if rd[j] & matchmask: | |
1335 | score = match_bitapScore(d, j - 1) |
|
1413 | score = match_bitapScore(d, j - 1) | |
1336 | # This match will almost certainly be better than any existing match. |
|
1414 | # This match will almost certainly be better than any existing match. | |
@@ -1384,12 +1462,14 b' class diff_match_patch:' | |||||
1384 |
|
1462 | |||
1385 | # Look for the first and last matches of pattern in text. If two different |
|
1463 | # Look for the first and last matches of pattern in text. If two different | |
1386 | # matches are found, increase the pattern length. |
|
1464 | # matches are found, increase the pattern length. | |
1387 |
while |
|
1465 | while text.find(pattern) != text.rfind(pattern) and ( | |
1388 |
|
|
1466 | self.Match_MaxBits == 0 | |
1389 | self.Patch_Margin)): |
|
1467 | or len(pattern) < self.Match_MaxBits - self.Patch_Margin - self.Patch_Margin | |
|
1468 | ): | |||
1390 | padding += self.Patch_Margin |
|
1469 | padding += self.Patch_Margin | |
1391 | pattern = text[max(0, patch.start2 - padding) : |
|
1470 | pattern = text[ | |
1392 |
patch.start2 + patch.length1 + padding |
|
1471 | max(0, patch.start2 - padding) : patch.start2 + patch.length1 + padding | |
|
1472 | ] | |||
1393 | # Add one chunk for good luck. |
|
1473 | # Add one chunk for good luck. | |
1394 | padding += self.Patch_Margin |
|
1474 | padding += self.Patch_Margin | |
1395 |
|
1475 | |||
@@ -1398,8 +1478,9 b' class diff_match_patch:' | |||||
1398 | if prefix: |
|
1478 | if prefix: | |
1399 | patch.diffs[:0] = [(self.DIFF_EQUAL, prefix)] |
|
1479 | patch.diffs[:0] = [(self.DIFF_EQUAL, prefix)] | |
1400 | # Add the suffix. |
|
1480 | # Add the suffix. | |
1401 | suffix = text[patch.start2 + patch.length1 : |
|
1481 | suffix = text[ | |
1402 |
|
|
1482 | patch.start2 + patch.length1 : patch.start2 + patch.length1 + padding | |
|
1483 | ] | |||
1403 | if suffix: |
|
1484 | if suffix: | |
1404 | patch.diffs.append((self.DIFF_EQUAL, suffix)) |
|
1485 | patch.diffs.append((self.DIFF_EQUAL, suffix)) | |
1405 |
|
1486 | |||
@@ -1455,8 +1536,7 b' class diff_match_patch:' | |||||
1455 | # Method 3: text1, diffs |
|
1536 | # Method 3: text1, diffs | |
1456 | text1 = a |
|
1537 | text1 = a | |
1457 | diffs = b |
|
1538 | diffs = b | |
1458 |
elif |
|
1539 | elif isinstance(a, str) and isinstance(b, str) and isinstance(c, list): | |
1459 | isinstance(c, list)): |
|
|||
1460 | # Method 4: text1, text2, diffs |
|
1540 | # Method 4: text1, text2, diffs | |
1461 | # text2 is not used. |
|
1541 | # text2 is not used. | |
1462 | text1 = a |
|
1542 | text1 = a | |
@@ -1482,24 +1562,31 b' class diff_match_patch:' | |||||
1482 | # Insertion |
|
1562 | # Insertion | |
1483 | patch.diffs.append(diffs[x]) |
|
1563 | patch.diffs.append(diffs[x]) | |
1484 | patch.length2 += len(diff_text) |
|
1564 | patch.length2 += len(diff_text) | |
1485 | postpatch_text = (postpatch_text[:char_count2] + diff_text + |
|
1565 | postpatch_text = ( | |
1486 |
|
|
1566 | postpatch_text[:char_count2] | |
|
1567 | + diff_text | |||
|
1568 | + postpatch_text[char_count2:] | |||
|
1569 | ) | |||
1487 | elif diff_type == self.DIFF_DELETE: |
|
1570 | elif diff_type == self.DIFF_DELETE: | |
1488 | # Deletion. |
|
1571 | # Deletion. | |
1489 | patch.length1 += len(diff_text) |
|
1572 | patch.length1 += len(diff_text) | |
1490 | patch.diffs.append(diffs[x]) |
|
1573 | patch.diffs.append(diffs[x]) | |
1491 | postpatch_text = (postpatch_text[:char_count2] + |
|
1574 | postpatch_text = ( | |
1492 |
|
|
1575 | postpatch_text[:char_count2] | |
1493 | elif (diff_type == self.DIFF_EQUAL and |
|
1576 | + postpatch_text[char_count2 + len(diff_text) :] | |
1494 | len(diff_text) <= 2 * self.Patch_Margin and |
|
1577 | ) | |
1495 | len(patch.diffs) != 0 and len(diffs) != x + 1): |
|
1578 | elif ( | |
|
1579 | diff_type == self.DIFF_EQUAL | |||
|
1580 | and len(diff_text) <= 2 * self.Patch_Margin | |||
|
1581 | and len(patch.diffs) != 0 | |||
|
1582 | and len(diffs) != x + 1 | |||
|
1583 | ): | |||
1496 | # Small equality inside a patch. |
|
1584 | # Small equality inside a patch. | |
1497 | patch.diffs.append(diffs[x]) |
|
1585 | patch.diffs.append(diffs[x]) | |
1498 | patch.length1 += len(diff_text) |
|
1586 | patch.length1 += len(diff_text) | |
1499 | patch.length2 += len(diff_text) |
|
1587 | patch.length2 += len(diff_text) | |
1500 |
|
1588 | |||
1501 |
if |
|
1589 | if diff_type == self.DIFF_EQUAL and len(diff_text) >= 2 * self.Patch_Margin: | |
1502 | len(diff_text) >= 2 * self.Patch_Margin): |
|
|||
1503 | # Time for a new patch. |
|
1590 | # Time for a new patch. | |
1504 | if len(patch.diffs) != 0: |
|
1591 | if len(patch.diffs) != 0: | |
1505 | self.patch_addContext(patch, prepatch_text) |
|
1592 | self.patch_addContext(patch, prepatch_text) | |
@@ -1579,11 +1666,15 b' class diff_match_patch:' | |||||
1579 | if len(text1) > self.Match_MaxBits: |
|
1666 | if len(text1) > self.Match_MaxBits: | |
1580 | # patch_splitMax will only provide an oversized pattern in the case of |
|
1667 | # patch_splitMax will only provide an oversized pattern in the case of | |
1581 | # a monster delete. |
|
1668 | # a monster delete. | |
1582 |
start_loc = self.match_main( |
|
1669 | start_loc = self.match_main( | |
1583 |
|
|
1670 | text, text1[: self.Match_MaxBits], expected_loc | |
|
1671 | ) | |||
1584 | if start_loc != -1: |
|
1672 | if start_loc != -1: | |
1585 |
end_loc = self.match_main( |
|
1673 | end_loc = self.match_main( | |
1586 | expected_loc + len(text1) - self.Match_MaxBits) |
|
1674 | text, | |
|
1675 | text1[-self.Match_MaxBits :], | |||
|
1676 | expected_loc + len(text1) - self.Match_MaxBits, | |||
|
1677 | ) | |||
1587 | if end_loc == -1 or start_loc >= end_loc: |
|
1678 | if end_loc == -1 or start_loc >= end_loc: | |
1588 | # Can't find valid trailing context. Drop this patch. |
|
1679 | # Can't find valid trailing context. Drop this patch. | |
1589 | start_loc = -1 |
|
1680 | start_loc = -1 | |
@@ -1604,29 +1695,42 b' class diff_match_patch:' | |||||
1604 | text2 = text[start_loc : end_loc + self.Match_MaxBits] |
|
1695 | text2 = text[start_loc : end_loc + self.Match_MaxBits] | |
1605 | if text1 == text2: |
|
1696 | if text1 == text2: | |
1606 | # Perfect match, just shove the replacement text in. |
|
1697 | # Perfect match, just shove the replacement text in. | |
1607 | text = (text[:start_loc] + self.diff_text2(patch.diffs) + |
|
1698 | text = ( | |
1608 |
text[start_loc |
|
1699 | text[:start_loc] | |
|
1700 | + self.diff_text2(patch.diffs) | |||
|
1701 | + text[start_loc + len(text1) :] | |||
|
1702 | ) | |||
1609 | else: |
|
1703 | else: | |
1610 | # Imperfect match. |
|
1704 | # Imperfect match. | |
1611 | # Run a diff to get a framework of equivalent indices. |
|
1705 | # Run a diff to get a framework of equivalent indices. | |
1612 | diffs = self.diff_main(text1, text2, False) |
|
1706 | diffs = self.diff_main(text1, text2, False) | |
1613 | if (len(text1) > self.Match_MaxBits and |
|
1707 | if ( | |
1614 | self.diff_levenshtein(diffs) / float(len(text1)) > |
|
1708 | len(text1) > self.Match_MaxBits | |
1615 | self.Patch_DeleteThreshold): |
|
1709 | and self.diff_levenshtein(diffs) / float(len(text1)) | |
|
1710 | > self.Patch_DeleteThreshold | |||
|
1711 | ): | |||
1616 | # The end points match, but the content is unacceptably bad. |
|
1712 | # The end points match, but the content is unacceptably bad. | |
1617 | results[-1] = False |
|
1713 | results[-1] = False | |
1618 | else: |
|
1714 | else: | |
1619 | self.diff_cleanupSemanticLossless(diffs) |
|
1715 | self.diff_cleanupSemanticLossless(diffs) | |
1620 | index1 = 0 |
|
1716 | index1 = 0 | |
1621 |
for |
|
1717 | for op, data in patch.diffs: | |
1622 | if op != self.DIFF_EQUAL: |
|
1718 | if op != self.DIFF_EQUAL: | |
1623 | index2 = self.diff_xIndex(diffs, index1) |
|
1719 | index2 = self.diff_xIndex(diffs, index1) | |
1624 | if op == self.DIFF_INSERT: # Insertion |
|
1720 | if op == self.DIFF_INSERT: # Insertion | |
1625 | text = text[:start_loc + index2] + data + text[start_loc + |
|
1721 | text = ( | |
1626 |
|
|
1722 | text[: start_loc + index2] | |
|
1723 | + data | |||
|
1724 | + text[start_loc + index2 :] | |||
|
1725 | ) | |||
1627 | elif op == self.DIFF_DELETE: # Deletion |
|
1726 | elif op == self.DIFF_DELETE: # Deletion | |
1628 | text = text[:start_loc + index2] + text[start_loc + |
|
1727 | text = ( | |
1629 | self.diff_xIndex(diffs, index1 + len(data)):] |
|
1728 | text[: start_loc + index2] | |
|
1729 | + text[ | |||
|
1730 | start_loc | |||
|
1731 | + self.diff_xIndex(diffs, index1 + len(data)) : | |||
|
1732 | ] | |||
|
1733 | ) | |||
1630 | if op != self.DIFF_DELETE: |
|
1734 | if op != self.DIFF_DELETE: | |
1631 | index1 += len(data) |
|
1735 | index1 += len(data) | |
1632 | # Strip the padding off. |
|
1736 | # Strip the padding off. | |
@@ -1713,7 +1817,7 b' class diff_match_patch:' | |||||
1713 | x -= 1 |
|
1817 | x -= 1 | |
1714 | start1 = bigpatch.start1 |
|
1818 | start1 = bigpatch.start1 | |
1715 | start2 = bigpatch.start2 |
|
1819 | start2 = bigpatch.start2 | |
1716 |
precontext = |
|
1820 | precontext = "" | |
1717 | while len(bigpatch.diffs) != 0: |
|
1821 | while len(bigpatch.diffs) != 0: | |
1718 | # Create one of several smaller patches. |
|
1822 | # Create one of several smaller patches. | |
1719 | patch = patch_obj() |
|
1823 | patch = patch_obj() | |
@@ -1724,8 +1828,10 b' class diff_match_patch:' | |||||
1724 | patch.length1 = patch.length2 = len(precontext) |
|
1828 | patch.length1 = patch.length2 = len(precontext) | |
1725 | patch.diffs.append((self.DIFF_EQUAL, precontext)) |
|
1829 | patch.diffs.append((self.DIFF_EQUAL, precontext)) | |
1726 |
|
1830 | |||
1727 | while (len(bigpatch.diffs) != 0 and |
|
1831 | while ( | |
1728 | patch.length1 < patch_size - self.Patch_Margin): |
|
1832 | len(bigpatch.diffs) != 0 | |
|
1833 | and patch.length1 < patch_size - self.Patch_Margin | |||
|
1834 | ): | |||
1729 | (diff_type, diff_text) = bigpatch.diffs[0] |
|
1835 | (diff_type, diff_text) = bigpatch.diffs[0] | |
1730 | if diff_type == self.DIFF_INSERT: |
|
1836 | if diff_type == self.DIFF_INSERT: | |
1731 | # Insertions are harmless. |
|
1837 | # Insertions are harmless. | |
@@ -1733,9 +1839,12 b' class diff_match_patch:' | |||||
1733 | start2 += len(diff_text) |
|
1839 | start2 += len(diff_text) | |
1734 | patch.diffs.append(bigpatch.diffs.pop(0)) |
|
1840 | patch.diffs.append(bigpatch.diffs.pop(0)) | |
1735 | empty = False |
|
1841 | empty = False | |
1736 | elif (diff_type == self.DIFF_DELETE and len(patch.diffs) == 1 and |
|
1842 | elif ( | |
1737 |
|
|
1843 | diff_type == self.DIFF_DELETE | |
1738 | len(diff_text) > 2 * patch_size): |
|
1844 | and len(patch.diffs) == 1 | |
|
1845 | and patch.diffs[0][0] == self.DIFF_EQUAL | |||
|
1846 | and len(diff_text) > 2 * patch_size | |||
|
1847 | ): | |||
1739 | # This is a large deletion. Let it pass in one chunk. |
|
1848 | # This is a large deletion. Let it pass in one chunk. | |
1740 | patch.length1 += len(diff_text) |
|
1849 | patch.length1 += len(diff_text) | |
1741 | start1 += len(diff_text) |
|
1850 | start1 += len(diff_text) | |
@@ -1744,8 +1853,9 b' class diff_match_patch:' | |||||
1744 | del bigpatch.diffs[0] |
|
1853 | del bigpatch.diffs[0] | |
1745 | else: |
|
1854 | else: | |
1746 | # Deletion or equality. Only take as much as we can stomach. |
|
1855 | # Deletion or equality. Only take as much as we can stomach. | |
1747 |
diff_text = diff_text[ |
|
1856 | diff_text = diff_text[ | |
1748 |
|
|
1857 | : patch_size - patch.length1 - self.Patch_Margin | |
|
1858 | ] | |||
1749 | patch.length1 += len(diff_text) |
|
1859 | patch.length1 += len(diff_text) | |
1750 | start1 += len(diff_text) |
|
1860 | start1 += len(diff_text) | |
1751 | if diff_type == self.DIFF_EQUAL: |
|
1861 | if diff_type == self.DIFF_EQUAL: | |
@@ -1758,8 +1868,10 b' class diff_match_patch:' | |||||
1758 | if diff_text == bigpatch.diffs[0][1]: |
|
1868 | if diff_text == bigpatch.diffs[0][1]: | |
1759 | del bigpatch.diffs[0] |
|
1869 | del bigpatch.diffs[0] | |
1760 | else: |
|
1870 | else: | |
1761 |
bigpatch.diffs[0] = ( |
|
1871 | bigpatch.diffs[0] = ( | |
1762 |
|
|
1872 | bigpatch.diffs[0][0], | |
|
1873 | bigpatch.diffs[0][1][len(diff_text) :], | |||
|
1874 | ) | |||
1763 |
|
1875 | |||
1764 | # Compute the head context for the next patch. |
|
1876 | # Compute the head context for the next patch. | |
1765 | precontext = self.diff_text2(patch.diffs) |
|
1877 | precontext = self.diff_text2(patch.diffs) | |
@@ -1770,8 +1882,10 b' class diff_match_patch:' | |||||
1770 | patch.length1 += len(postcontext) |
|
1882 | patch.length1 += len(postcontext) | |
1771 | patch.length2 += len(postcontext) |
|
1883 | patch.length2 += len(postcontext) | |
1772 | if len(patch.diffs) != 0 and patch.diffs[-1][0] == self.DIFF_EQUAL: |
|
1884 | if len(patch.diffs) != 0 and patch.diffs[-1][0] == self.DIFF_EQUAL: | |
1773 | patch.diffs[-1] = (self.DIFF_EQUAL, patch.diffs[-1][1] + |
|
1885 | patch.diffs[-1] = ( | |
1774 |
|
|
1886 | self.DIFF_EQUAL, | |
|
1887 | patch.diffs[-1][1] + postcontext, | |||
|
1888 | ) | |||
1775 | else: |
|
1889 | else: | |
1776 | patch.diffs.append((self.DIFF_EQUAL, postcontext)) |
|
1890 | patch.diffs.append((self.DIFF_EQUAL, postcontext)) | |
1777 |
|
1891 | |||
@@ -1813,7 +1927,7 b' class diff_match_patch:' | |||||
1813 | patches = [] |
|
1927 | patches = [] | |
1814 | if not textline: |
|
1928 | if not textline: | |
1815 | return patches |
|
1929 | return patches | |
1816 |
text = textline.split( |
|
1930 | text = textline.split("\n") | |
1817 | while len(text) != 0: |
|
1931 | while len(text) != 0: | |
1818 | m = re.match("^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@$", text[0]) |
|
1932 | m = re.match("^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@$", text[0]) | |
1819 | if not m: |
|
1933 | if not m: | |
@@ -1821,20 +1935,20 b' class diff_match_patch:' | |||||
1821 | patch = patch_obj() |
|
1935 | patch = patch_obj() | |
1822 | patches.append(patch) |
|
1936 | patches.append(patch) | |
1823 | patch.start1 = int(m.group(1)) |
|
1937 | patch.start1 = int(m.group(1)) | |
1824 |
if m.group(2) == |
|
1938 | if m.group(2) == "": | |
1825 | patch.start1 -= 1 |
|
1939 | patch.start1 -= 1 | |
1826 | patch.length1 = 1 |
|
1940 | patch.length1 = 1 | |
1827 |
elif m.group(2) == |
|
1941 | elif m.group(2) == "0": | |
1828 | patch.length1 = 0 |
|
1942 | patch.length1 = 0 | |
1829 | else: |
|
1943 | else: | |
1830 | patch.start1 -= 1 |
|
1944 | patch.start1 -= 1 | |
1831 | patch.length1 = int(m.group(2)) |
|
1945 | patch.length1 = int(m.group(2)) | |
1832 |
|
1946 | |||
1833 | patch.start2 = int(m.group(3)) |
|
1947 | patch.start2 = int(m.group(3)) | |
1834 |
if m.group(4) == |
|
1948 | if m.group(4) == "": | |
1835 | patch.start2 -= 1 |
|
1949 | patch.start2 -= 1 | |
1836 | patch.length2 = 1 |
|
1950 | patch.length2 = 1 | |
1837 |
elif m.group(4) == |
|
1951 | elif m.group(4) == "0": | |
1838 | patch.length2 = 0 |
|
1952 | patch.length2 = 0 | |
1839 | else: |
|
1953 | else: | |
1840 | patch.start2 -= 1 |
|
1954 | patch.start2 -= 1 | |
@@ -1846,22 +1960,22 b' class diff_match_patch:' | |||||
1846 | if text[0]: |
|
1960 | if text[0]: | |
1847 | sign = text[0][0] |
|
1961 | sign = text[0][0] | |
1848 | else: |
|
1962 | else: | |
1849 |
sign = |
|
1963 | sign = "" | |
1850 | line = urllib.parse.unquote(text[0][1:]) |
|
1964 | line = urllib.parse.unquote(text[0][1:]) | |
1851 | line = line.decode("utf-8") |
|
1965 | line = line.decode("utf-8") | |
1852 |
if sign == |
|
1966 | if sign == "+": | |
1853 | # Insertion. |
|
1967 | # Insertion. | |
1854 | patch.diffs.append((self.DIFF_INSERT, line)) |
|
1968 | patch.diffs.append((self.DIFF_INSERT, line)) | |
1855 |
elif sign == |
|
1969 | elif sign == "-": | |
1856 | # Deletion. |
|
1970 | # Deletion. | |
1857 | patch.diffs.append((self.DIFF_DELETE, line)) |
|
1971 | patch.diffs.append((self.DIFF_DELETE, line)) | |
1858 |
elif sign == |
|
1972 | elif sign == " ": | |
1859 | # Minor equality. |
|
1973 | # Minor equality. | |
1860 | patch.diffs.append((self.DIFF_EQUAL, line)) |
|
1974 | patch.diffs.append((self.DIFF_EQUAL, line)) | |
1861 |
elif sign == |
|
1975 | elif sign == "@": | |
1862 | # Start of next patch. |
|
1976 | # Start of next patch. | |
1863 | break |
|
1977 | break | |
1864 |
elif sign == |
|
1978 | elif sign == "": | |
1865 | # Blank line? Whatever. |
|
1979 | # Blank line? Whatever. | |
1866 | pass |
|
1980 | pass | |
1867 | else: |
|
1981 | else: | |
@@ -1872,12 +1986,10 b' class diff_match_patch:' | |||||
1872 |
|
1986 | |||
1873 |
|
1987 | |||
1874 | class patch_obj: |
|
1988 | class patch_obj: | |
1875 | """Class representing one patch operation. |
|
1989 | """Class representing one patch operation.""" | |
1876 | """ |
|
|||
1877 |
|
1990 | |||
1878 | def __init__(self): |
|
1991 | def __init__(self): | |
1879 | """Initializes with an empty list of diffs. |
|
1992 | """Initializes with an empty list of diffs.""" | |
1880 | """ |
|
|||
1881 | self.diffs = [] |
|
1993 | self.diffs = [] | |
1882 | self.start1 = None |
|
1994 | self.start1 = None | |
1883 | self.start2 = None |
|
1995 | self.start2 = None | |
@@ -1906,7 +2018,7 b' class patch_obj:' | |||||
1906 | coords2 = str(self.start2 + 1) + "," + str(self.length2) |
|
2018 | coords2 = str(self.start2 + 1) + "," + str(self.length2) | |
1907 | text = ["@@ -", coords1, " +", coords2, " @@\n"] |
|
2019 | text = ["@@ -", coords1, " +", coords2, " @@\n"] | |
1908 | # Escape the body of the patch with %xx notation. |
|
2020 | # Escape the body of the patch with %xx notation. | |
1909 |
for |
|
2021 | for op, data in self.diffs: | |
1910 | if op == diff_match_patch.DIFF_INSERT: |
|
2022 | if op == diff_match_patch.DIFF_INSERT: | |
1911 | text.append("+") |
|
2023 | text.append("+") | |
1912 | elif op == diff_match_patch.DIFF_DELETE: |
|
2024 | elif op == diff_match_patch.DIFF_DELETE: | |
@@ -1916,4 +2028,4 b' class patch_obj:' | |||||
1916 | # High ascii will raise UnicodeDecodeError. Use Unicode instead. |
|
2028 | # High ascii will raise UnicodeDecodeError. Use Unicode instead. | |
1917 | data = data.encode("utf-8") |
|
2029 | data = data.encode("utf-8") | |
1918 | text.append(urllib.parse.quote(data, "!~*'();/?:@&=+$,# ") + "\n") |
|
2030 | text.append(urllib.parse.quote(data, "!~*'();/?:@&=+$,# ") + "\n") | |
1919 | return "".join(text) No newline at end of file |
|
2031 | return "".join(text) |
General Comments 0
You need to be logged in to leave comments.
Login now