##// END OF EJS Templates
diff-match: python3/code fixes for backport code
super-admin -
r4954:f53817a4 default
parent child Browse files
Show More
@@ -1,6 +1,3 b''
1
2
3
4 1 """Diff Match and Patch
5 2
6 3 Copyright 2006 Google Inc.
@@ -25,7 +22,7 b' Computes the difference between two text'
25 22 Applies the patch onto another text, allowing for errors.
26 23 """
27 24
28 __author__ = 'fraser@google.com (Neil Fraser)'
25 __author__ = "fraser@google.com (Neil Fraser)"
29 26
30 27 import math
31 28 import re
@@ -98,7 +95,7 b' class diff_match_patch:'
98 95 if deadline is None:
99 96 # Unlike in most languages, Python counts time in seconds.
100 97 if self.Diff_Timeout <= 0:
101 deadline = sys.maxint
98 deadline = sys.maxsize
102 99 else:
103 100 deadline = time.time() + self.Diff_Timeout
104 101
@@ -121,7 +118,7 b' class diff_match_patch:'
121 118 # Trim off common suffix (speedup).
122 119 commonlength = self.diff_commonSuffix(text1, text2)
123 120 if commonlength == 0:
124 commonsuffix = ''
121 commonsuffix = ""
125 122 else:
126 123 commonsuffix = text1[-commonlength:]
127 124 text1 = text1[:-commonlength]
@@ -168,8 +165,11 b' class diff_match_patch:'
168 165 i = longtext.find(shorttext)
169 166 if i != -1:
170 167 # Shorter text is inside the longer text (speedup).
171 diffs = [(self.DIFF_INSERT, longtext[:i]), (self.DIFF_EQUAL, shorttext),
172 (self.DIFF_INSERT, longtext[i + len(shorttext):])]
168 diffs = [
169 (self.DIFF_INSERT, longtext[:i]),
170 (self.DIFF_EQUAL, shorttext),
171 (self.DIFF_INSERT, longtext[i + len(shorttext) :]),
172 ]
173 173 # Swap insertions for deletions if diff is reversed.
174 174 if len(text1) > len(text2):
175 175 diffs[0] = (self.DIFF_DELETE, diffs[0][1])
@@ -223,12 +223,12 b' class diff_match_patch:'
223 223
224 224 # Rediff any replacement blocks, this time character-by-character.
225 225 # Add a dummy entry at the end.
226 diffs.append((self.DIFF_EQUAL, ''))
226 diffs.append((self.DIFF_EQUAL, ""))
227 227 pointer = 0
228 228 count_delete = 0
229 229 count_insert = 0
230 text_delete = ''
231 text_insert = ''
230 text_delete = ""
231 text_insert = ""
232 232 while pointer < len(diffs):
233 233 if diffs[pointer][0] == self.DIFF_INSERT:
234 234 count_insert += 1
@@ -245,8 +245,8 b' class diff_match_patch:'
245 245 pointer = pointer - count_delete - count_insert + len(a)
246 246 count_insert = 0
247 247 count_delete = 0
248 text_delete = ''
249 text_insert = ''
248 text_delete = ""
249 text_insert = ""
250 250
251 251 pointer += 1
252 252
@@ -280,7 +280,7 b' class diff_match_patch:'
280 280 delta = text1_length - text2_length
281 281 # If the total number of characters is odd, then the front path will
282 282 # collide with the reverse path.
283 front = (delta % 2 != 0)
283 front = delta % 2 != 0
284 284 # Offsets for start and end of k loop.
285 285 # Prevents mapping of space beyond the grid.
286 286 k1start = 0
@@ -295,14 +295,14 b' class diff_match_patch:'
295 295 # Walk the front path one step.
296 296 for k1 in range(-d + k1start, d + 1 - k1end, 2):
297 297 k1_offset = v_offset + k1
298 if k1 == -d or (k1 != d and
299 v1[k1_offset - 1] < v1[k1_offset + 1]):
298 if k1 == -d or (k1 != d and v1[k1_offset - 1] < v1[k1_offset + 1]):
300 299 x1 = v1[k1_offset + 1]
301 300 else:
302 301 x1 = v1[k1_offset - 1] + 1
303 302 y1 = x1 - k1
304 while (x1 < text1_length and y1 < text2_length and
305 text1[x1] == text2[y1]):
303 while (
304 x1 < text1_length and y1 < text2_length and text1[x1] == text2[y1]
305 ):
306 306 x1 += 1
307 307 y1 += 1
308 308 v1[k1_offset] = x1
@@ -324,14 +324,16 b' class diff_match_patch:'
324 324 # Walk the reverse path one step.
325 325 for k2 in range(-d + k2start, d + 1 - k2end, 2):
326 326 k2_offset = v_offset + k2
327 if k2 == -d or (k2 != d and
328 v2[k2_offset - 1] < v2[k2_offset + 1]):
327 if k2 == -d or (k2 != d and v2[k2_offset - 1] < v2[k2_offset + 1]):
329 328 x2 = v2[k2_offset + 1]
330 329 else:
331 330 x2 = v2[k2_offset - 1] + 1
332 331 y2 = x2 - k2
333 while (x2 < text1_length and y2 < text2_length and
334 text1[-x2 - 1] == text2[-y2 - 1]):
332 while (
333 x2 < text1_length
334 and y2 < text2_length
335 and text1[-x2 - 1] == text2[-y2 - 1]
336 ):
335 337 x2 += 1
336 338 y2 += 1
337 339 v2[k2_offset] = x2
@@ -399,7 +401,7 b' class diff_match_patch:'
399 401
400 402 # "\x00" is a valid character, but various debuggers don't like it.
401 403 # So we'll insert a junk entry to avoid generating a null character.
402 lineArray.append('')
404 lineArray.append("")
403 405
404 406 def diff_linesToCharsMunge(text):
405 407 """Split a text into an array of strings. Reduce the texts to a string
@@ -419,18 +421,18 b' class diff_match_patch:'
419 421 lineStart = 0
420 422 lineEnd = -1
421 423 while lineEnd < len(text) - 1:
422 lineEnd = text.find('\n', lineStart)
424 lineEnd = text.find("\n", lineStart)
423 425 if lineEnd == -1:
424 426 lineEnd = len(text) - 1
425 427 line = text[lineStart:lineEnd + 1]
426 428 lineStart = lineEnd + 1
427 429
428 430 if line in lineHash:
429 chars.append(unichr(lineHash[line]))
431 chars.append(chr(lineHash[line]))
430 432 else:
431 433 lineArray.append(line)
432 434 lineHash[line] = len(lineArray) - 1
433 chars.append(unichr(len(lineArray) - 1))
435 chars.append(chr(len(lineArray) - 1))
434 436 return "".join(chars)
435 437
436 438 chars1 = diff_linesToCharsMunge(text1)
@@ -499,8 +501,10 b' class diff_match_patch:'
499 501 pointermid = pointermax
500 502 pointerend = 0
501 503 while pointermin < pointermid:
502 if (text1[-pointermid:len(text1) - pointerend] ==
503 text2[-pointermid:len(text2) - pointerend]):
504 if (
505 text1[-pointermid : len(text1) - pointerend]
506 == text2[-pointermid : len(text2) - pointerend]
507 ):
504 508 pointermin = pointermid
505 509 pointerend = pointermin
506 510 else:
@@ -590,14 +594,16 b' class diff_match_patch:'
590 594 common middle. Or None if there was no match.
591 595 """
592 596 seed = longtext[i:i + len(longtext) // 4]
593 best_common = ''
597 best_common = ""
594 598 j = shorttext.find(seed)
595 599 while j != -1:
596 600 prefixLength = self.diff_commonPrefix(longtext[i:], shorttext[j:])
597 601 suffixLength = self.diff_commonSuffix(longtext[:i], shorttext[:j])
598 602 if len(best_common) < suffixLength + prefixLength:
599 best_common = (shorttext[j - suffixLength:j] +
600 shorttext[j:j + prefixLength])
603 best_common = (
604 shorttext[j - suffixLength : j]
605 + shorttext[j : j + prefixLength]
606 )
601 607 best_longtext_a = longtext[:i - suffixLength]
602 608 best_longtext_b = longtext[i + prefixLength:]
603 609 best_shorttext_a = shorttext[:j - suffixLength]
@@ -605,8 +611,13 b' class diff_match_patch:'
605 611 j = shorttext.find(seed, j + 1)
606 612
607 613 if len(best_common) * 2 >= len(longtext):
608 return (best_longtext_a, best_longtext_b,
609 best_shorttext_a, best_shorttext_b, best_common)
614 return (
615 best_longtext_a,
616 best_longtext_b,
617 best_shorttext_a,
618 best_shorttext_b,
619 best_common,
620 )
610 621 else:
611 622 return None
612 623
@@ -662,14 +673,22 b' class diff_match_patch:'
662 673 length_deletions2 += len(diffs[pointer][1])
663 674 # Eliminate an equality that is smaller or equal to the edits on both
664 675 # sides of it.
665 if (lastequality and (len(lastequality) <=
666 max(length_insertions1, length_deletions1)) and
667 (len(lastequality) <= max(length_insertions2, length_deletions2))):
676 if (
677 lastequality
678 and (
679 len(lastequality) <= max(length_insertions1, length_deletions1)
680 )
681 and (
682 len(lastequality) <= max(length_insertions2, length_deletions2)
683 )
684 ):
668 685 # Duplicate record.
669 686 diffs.insert(equalities[-1], (self.DIFF_DELETE, lastequality))
670 687 # Change second copy to insert.
671 diffs[equalities[-1] + 1] = (self.DIFF_INSERT,
672 diffs[equalities[-1] + 1][1])
688 diffs[equalities[-1] + 1] = (
689 self.DIFF_INSERT,
690 diffs[equalities[-1] + 1][1],
691 )
673 692 # Throw away the equality we just deleted.
674 693 equalities.pop()
675 694 # Throw away the previous equality (it needs to be reevaluated).
@@ -699,32 +718,50 b' class diff_match_patch:'
699 718 # Only extract an overlap if it is as big as the edit ahead or behind it.
700 719 pointer = 1
701 720 while pointer < len(diffs):
702 if (diffs[pointer - 1][0] == self.DIFF_DELETE and
703 diffs[pointer][0] == self.DIFF_INSERT):
721 if (
722 diffs[pointer - 1][0] == self.DIFF_DELETE
723 and diffs[pointer][0] == self.DIFF_INSERT
724 ):
704 725 deletion = diffs[pointer - 1][1]
705 726 insertion = diffs[pointer][1]
706 727 overlap_length1 = self.diff_commonOverlap(deletion, insertion)
707 728 overlap_length2 = self.diff_commonOverlap(insertion, deletion)
708 729 if overlap_length1 >= overlap_length2:
709 if (overlap_length1 >= len(deletion) / 2.0 or
710 overlap_length1 >= len(insertion) / 2.0):
730 if (
731 overlap_length1 >= len(deletion) / 2.0
732 or overlap_length1 >= len(insertion) / 2.0
733 ):
711 734 # Overlap found. Insert an equality and trim the surrounding edits.
712 diffs.insert(pointer, (self.DIFF_EQUAL,
713 insertion[:overlap_length1]))
714 diffs[pointer - 1] = (self.DIFF_DELETE,
715 deletion[:len(deletion) - overlap_length1])
716 diffs[pointer + 1] = (self.DIFF_INSERT,
717 insertion[overlap_length1:])
735 diffs.insert(
736 pointer, (self.DIFF_EQUAL, insertion[:overlap_length1])
737 )
738 diffs[pointer - 1] = (
739 self.DIFF_DELETE,
740 deletion[: len(deletion) - overlap_length1],
741 )
742 diffs[pointer + 1] = (
743 self.DIFF_INSERT,
744 insertion[overlap_length1:],
745 )
718 746 pointer += 1
719 747 else:
720 if (overlap_length2 >= len(deletion) / 2.0 or
721 overlap_length2 >= len(insertion) / 2.0):
748 if (
749 overlap_length2 >= len(deletion) / 2.0
750 or overlap_length2 >= len(insertion) / 2.0
751 ):
722 752 # Reverse overlap found.
723 753 # Insert an equality and swap and trim the surrounding edits.
724 diffs.insert(pointer, (self.DIFF_EQUAL, deletion[:overlap_length2]))
725 diffs[pointer - 1] = (self.DIFF_INSERT,
726 insertion[:len(insertion) - overlap_length2])
727 diffs[pointer + 1] = (self.DIFF_DELETE, deletion[overlap_length2:])
754 diffs.insert(
755 pointer, (self.DIFF_EQUAL, deletion[:overlap_length2])
756 )
757 diffs[pointer - 1] = (
758 self.DIFF_INSERT,
759 insertion[: len(insertion) - overlap_length2],
760 )
761 diffs[pointer + 1] = (
762 self.DIFF_DELETE,
763 deletion[overlap_length2:],
764 )
728 765 pointer += 1
729 766 pointer += 1
730 767 pointer += 1
@@ -791,8 +828,10 b' class diff_match_patch:'
791 828 pointer = 1
792 829 # Intentionally ignore the first and last element (don't need checking).
793 830 while pointer < len(diffs) - 1:
794 if (diffs[pointer - 1][0] == self.DIFF_EQUAL and
795 diffs[pointer + 1][0] == self.DIFF_EQUAL):
831 if (
832 diffs[pointer - 1][0] == self.DIFF_EQUAL
833 and diffs[pointer + 1][0] == self.DIFF_EQUAL
834 ):
796 835 # This is a single edit surrounded by equalities.
797 836 equality1 = diffs[pointer - 1][1]
798 837 edit = diffs[pointer][1]
@@ -810,14 +849,16 b' class diff_match_patch:'
810 849 bestEquality1 = equality1
811 850 bestEdit = edit
812 851 bestEquality2 = equality2
813 bestScore = (diff_cleanupSemanticScore(equality1, edit) +
814 diff_cleanupSemanticScore(edit, equality2))
852 bestScore = diff_cleanupSemanticScore(
853 equality1, edit
854 ) + diff_cleanupSemanticScore(edit, equality2)
815 855 while edit and equality2 and edit[0] == equality2[0]:
816 856 equality1 += edit[0]
817 857 edit = edit[1:] + equality2[0]
818 858 equality2 = equality2[1:]
819 score = (diff_cleanupSemanticScore(equality1, edit) +
820 diff_cleanupSemanticScore(edit, equality2))
859 score = diff_cleanupSemanticScore(
860 equality1, edit
861 ) + diff_cleanupSemanticScore(edit, equality2)
821 862 # The >= encourages trailing rather than leading whitespace on edits.
822 863 if score >= bestScore:
823 864 bestScore = score
@@ -841,8 +882,8 b' class diff_match_patch:'
841 882 pointer += 1
842 883
843 884 # Define some regex patterns for matching boundaries.
844 BLANKLINEEND = re.compile(r"\n\r?\n$");
845 BLANKLINESTART = re.compile(r"^\r?\n\r?\n");
885 BLANKLINEEND = re.compile(r"\n\r?\n$")
886 BLANKLINESTART = re.compile(r"^\r?\n\r?\n")
846 887
847 888 def diff_cleanupEfficiency(self, diffs):
848 889 """Reduce the number of edits by eliminating operationally trivial
@@ -861,8 +902,9 b' class diff_match_patch:'
861 902 post_del = False # Is there a deletion operation after the last equality.
862 903 while pointer < len(diffs):
863 904 if diffs[pointer][0] == self.DIFF_EQUAL: # Equality found.
864 if (len(diffs[pointer][1]) < self.Diff_EditCost and
865 (post_ins or post_del)):
905 if len(diffs[pointer][1]) < self.Diff_EditCost and (
906 post_ins or post_del
907 ):
866 908 # Candidate found.
867 909 equalities.append(pointer)
868 910 pre_ins = post_ins
@@ -887,14 +929,20 b' class diff_match_patch:'
887 929 # <ins>A</del>X<ins>C</ins><del>D</del>
888 930 # <ins>A</ins><del>B</del>X<del>C</del>
889 931
890 if lastequality and ((pre_ins and pre_del and post_ins and post_del) or
891 ((len(lastequality) < self.Diff_EditCost / 2) and
892 (pre_ins + pre_del + post_ins + post_del) == 3)):
932 if lastequality and (
933 (pre_ins and pre_del and post_ins and post_del)
934 or (
935 (len(lastequality) < self.Diff_EditCost / 2)
936 and (pre_ins + pre_del + post_ins + post_del) == 3
937 )
938 ):
893 939 # Duplicate record.
894 940 diffs.insert(equalities[-1], (self.DIFF_DELETE, lastequality))
895 941 # Change second copy to insert.
896 diffs[equalities[-1] + 1] = (self.DIFF_INSERT,
897 diffs[equalities[-1] + 1][1])
942 diffs[equalities[-1] + 1] = (
943 self.DIFF_INSERT,
944 diffs[equalities[-1] + 1][1],
945 )
898 946 equalities.pop() # Throw away the equality we just deleted.
899 947 lastequality = None
900 948 if pre_ins and pre_del:
@@ -922,12 +970,12 b' class diff_match_patch:'
922 970 Args:
923 971 diffs: Array of diff tuples.
924 972 """
925 diffs.append((self.DIFF_EQUAL, '')) # Add a dummy entry at the end.
973 diffs.append((self.DIFF_EQUAL, "")) # Add a dummy entry at the end.
926 974 pointer = 0
927 975 count_delete = 0
928 976 count_insert = 0
929 text_delete = ''
930 text_insert = ''
977 text_delete = ""
978 text_insert = ""
931 979 while pointer < len(diffs):
932 980 if diffs[pointer][0] == self.DIFF_INSERT:
933 981 count_insert += 1
@@ -946,31 +994,40 b' class diff_match_patch:'
946 994 if commonlength != 0:
947 995 x = pointer - count_delete - count_insert - 1
948 996 if x >= 0 and diffs[x][0] == self.DIFF_EQUAL:
949 diffs[x] = (diffs[x][0], diffs[x][1] +
950 text_insert[:commonlength])
997 diffs[x] = (
998 diffs[x][0],
999 diffs[x][1] + text_insert[:commonlength],
1000 )
951 1001 else:
952 diffs.insert(0, (self.DIFF_EQUAL, text_insert[:commonlength]))
1002 diffs.insert(
1003 0, (self.DIFF_EQUAL, text_insert[:commonlength])
1004 )
953 1005 pointer += 1
954 1006 text_insert = text_insert[commonlength:]
955 1007 text_delete = text_delete[commonlength:]
956 1008 # Factor out any common suffixies.
957 1009 commonlength = self.diff_commonSuffix(text_insert, text_delete)
958 1010 if commonlength != 0:
959 diffs[pointer] = (diffs[pointer][0], text_insert[-commonlength:] +
960 diffs[pointer][1])
1011 diffs[pointer] = (
1012 diffs[pointer][0],
1013 text_insert[-commonlength:] + diffs[pointer][1],
1014 )
961 1015 text_insert = text_insert[:-commonlength]
962 1016 text_delete = text_delete[:-commonlength]
963 1017 # Delete the offending records and add the merged ones.
964 1018 if count_delete == 0:
965 1019 diffs[pointer - count_insert : pointer] = [
966 (self.DIFF_INSERT, text_insert)]
1020 (self.DIFF_INSERT, text_insert)
1021 ]
967 1022 elif count_insert == 0:
968 1023 diffs[pointer - count_delete : pointer] = [
969 (self.DIFF_DELETE, text_delete)]
1024 (self.DIFF_DELETE, text_delete)
1025 ]
970 1026 else:
971 1027 diffs[pointer - count_delete - count_insert : pointer] = [
972 1028 (self.DIFF_DELETE, text_delete),
973 (self.DIFF_INSERT, text_insert)]
1029 (self.DIFF_INSERT, text_insert),
1030 ]
974 1031 pointer = pointer - count_delete - count_insert + 1
975 1032 if count_delete != 0:
976 1033 pointer += 1
@@ -978,18 +1035,20 b' class diff_match_patch:'
978 1035 pointer += 1
979 1036 elif pointer != 0 and diffs[pointer - 1][0] == self.DIFF_EQUAL:
980 1037 # Merge this equality with the previous one.
981 diffs[pointer - 1] = (diffs[pointer - 1][0],
982 diffs[pointer - 1][1] + diffs[pointer][1])
1038 diffs[pointer - 1] = (
1039 diffs[pointer - 1][0],
1040 diffs[pointer - 1][1] + diffs[pointer][1],
1041 )
983 1042 del diffs[pointer]
984 1043 else:
985 1044 pointer += 1
986 1045
987 1046 count_insert = 0
988 1047 count_delete = 0
989 text_delete = ''
990 text_insert = ''
1048 text_delete = ""
1049 text_insert = ""
991 1050
992 if diffs[-1][1] == '':
1051 if diffs[-1][1] == "":
993 1052 diffs.pop() # Remove the dummy entry at the end.
994 1053
995 1054 # Second pass: look for single edits surrounded on both sides by equalities
@@ -999,25 +1058,35 b' class diff_match_patch:'
999 1058 pointer = 1
1000 1059 # Intentionally ignore the first and last element (don't need checking).
1001 1060 while pointer < len(diffs) - 1:
1002 if (diffs[pointer - 1][0] == self.DIFF_EQUAL and
1003 diffs[pointer + 1][0] == self.DIFF_EQUAL):
1061 if (
1062 diffs[pointer - 1][0] == self.DIFF_EQUAL
1063 and diffs[pointer + 1][0] == self.DIFF_EQUAL
1064 ):
1004 1065 # This is a single edit surrounded by equalities.
1005 1066 if diffs[pointer][1].endswith(diffs[pointer - 1][1]):
1006 1067 # Shift the edit over the previous equality.
1007 diffs[pointer] = (diffs[pointer][0],
1008 diffs[pointer - 1][1] +
1009 diffs[pointer][1][:-len(diffs[pointer - 1][1])])
1010 diffs[pointer + 1] = (diffs[pointer + 1][0],
1011 diffs[pointer - 1][1] + diffs[pointer + 1][1])
1068 diffs[pointer] = (
1069 diffs[pointer][0],
1070 diffs[pointer - 1][1]
1071 + diffs[pointer][1][: -len(diffs[pointer - 1][1])],
1072 )
1073 diffs[pointer + 1] = (
1074 diffs[pointer + 1][0],
1075 diffs[pointer - 1][1] + diffs[pointer + 1][1],
1076 )
1012 1077 del diffs[pointer - 1]
1013 1078 changes = True
1014 1079 elif diffs[pointer][1].startswith(diffs[pointer + 1][1]):
1015 1080 # Shift the edit over the next equality.
1016 diffs[pointer - 1] = (diffs[pointer - 1][0],
1017 diffs[pointer - 1][1] + diffs[pointer + 1][1])
1018 diffs[pointer] = (diffs[pointer][0],
1019 diffs[pointer][1][len(diffs[pointer + 1][1]):] +
1020 diffs[pointer + 1][1])
1081 diffs[pointer - 1] = (
1082 diffs[pointer - 1][0],
1083 diffs[pointer - 1][1] + diffs[pointer + 1][1],
1084 )
1085 diffs[pointer] = (
1086 diffs[pointer][0],
1087 diffs[pointer][1][len(diffs[pointer + 1][1]) :]
1088 + diffs[pointer + 1][1],
1089 )
1021 1090 del diffs[pointer + 1]
1022 1091 changes = True
1023 1092 pointer += 1
@@ -1068,13 +1137,17 b' class diff_match_patch:'
1068 1137 HTML representation.
1069 1138 """
1070 1139 html = []
1071 for (op, data) in diffs:
1072 text = (data.replace("&", "&amp;").replace("<", "&lt;")
1073 .replace(">", "&gt;").replace("\n", "&para;<br>"))
1140 for op, data in diffs:
1141 text = (
1142 data.replace("&", "&amp;")
1143 .replace("<", "&lt;")
1144 .replace(">", "&gt;")
1145 .replace("\n", "&para;<br>")
1146 )
1074 1147 if op == self.DIFF_INSERT:
1075 html.append("<ins style=\"background:#e6ffe6;\">%s</ins>" % text)
1148 html.append('<ins style="background:#e6ffe6;">%s</ins>' % text)
1076 1149 elif op == self.DIFF_DELETE:
1077 html.append("<del style=\"background:#ffe6e6;\">%s</del>" % text)
1150 html.append('<del style="background:#ffe6e6;">%s</del>' % text)
1078 1151 elif op == self.DIFF_EQUAL:
1079 1152 html.append("<span>%s</span>" % text)
1080 1153 return "".join(html)
@@ -1089,7 +1162,7 b' class diff_match_patch:'
1089 1162 Source text.
1090 1163 """
1091 1164 text = []
1092 for (op, data) in diffs:
1165 for op, data in diffs:
1093 1166 if op != self.DIFF_INSERT:
1094 1167 text.append(data)
1095 1168 return "".join(text)
@@ -1104,7 +1177,7 b' class diff_match_patch:'
1104 1177 Destination text.
1105 1178 """
1106 1179 text = []
1107 for (op, data) in diffs:
1180 for op, data in diffs:
1108 1181 if op != self.DIFF_DELETE:
1109 1182 text.append(data)
1110 1183 return "".join(text)
@@ -1122,7 +1195,7 b' class diff_match_patch:'
1122 1195 levenshtein = 0
1123 1196 insertions = 0
1124 1197 deletions = 0
1125 for (op, data) in diffs:
1198 for op, data in diffs:
1126 1199 if op == self.DIFF_INSERT:
1127 1200 insertions += len(data)
1128 1201 elif op == self.DIFF_DELETE:
@@ -1148,7 +1221,7 b' class diff_match_patch:'
1148 1221 Delta text.
1149 1222 """
1150 1223 text = []
1151 for (op, data) in diffs:
1224 for op, data in diffs:
1152 1225 if op == self.DIFF_INSERT:
1153 1226 # High ascii will raise UnicodeDecodeError. Use Unicode instead.
1154 1227 data = data.encode("utf-8")
@@ -1173,7 +1246,7 b' class diff_match_patch:'
1173 1246 Raises:
1174 1247 ValueError: If invalid input.
1175 1248 """
1176 if type(delta) == unicode:
1249 if type(delta) == str:
1177 1250 # Deltas should be composed of a subset of ascii chars, Unicode not
1178 1251 # required. If this encode raises UnicodeEncodeError, delta is invalid.
1179 1252 delta = delta.encode("ascii")
@@ -1188,7 +1261,7 b' class diff_match_patch:'
1188 1261 # operation of this token (delete, insert, equality).
1189 1262 param = token[1:]
1190 1263 if token[0] == "+":
1191 param = urllib.parse.unquote(param).decode("utf-8")
1264 param = urllib.parse.unquote(param)
1192 1265 diffs.append((self.DIFF_INSERT, param))
1193 1266 elif token[0] == "-" or token[0] == "=":
1194 1267 try:
@@ -1205,12 +1278,14 b' class diff_match_patch:'
1205 1278 diffs.append((self.DIFF_DELETE, text))
1206 1279 else:
1207 1280 # Anything else is an error.
1208 raise ValueError("Invalid diff operation in diff_fromDelta: " +
1209 token[0])
1281 raise ValueError(
1282 "Invalid diff operation in diff_fromDelta: " + token[0]
1283 )
1210 1284 if pointer != len(text1):
1211 1285 raise ValueError(
1212 "Delta length (%d) does not equal source text length (%d)." %
1213 (pointer, len(text1)))
1286 "Delta length (%d) does not equal source text length (%d)."
1287 % (pointer, len(text1))
1288 )
1214 1289 return diffs
1215 1290
1216 1291 # MATCH FUNCTIONS
@@ -1329,8 +1404,11 b' class diff_match_patch:'
1329 1404 if d == 0: # First pass: exact match.
1330 1405 rd[j] = ((rd[j + 1] << 1) | 1) & charMatch
1331 1406 else: # Subsequent passes: fuzzy match.
1332 rd[j] = (((rd[j + 1] << 1) | 1) & charMatch) | (
1333 ((last_rd[j + 1] | last_rd[j]) << 1) | 1) | last_rd[j + 1]
1407 rd[j] = (
1408 (((rd[j + 1] << 1) | 1) & charMatch)
1409 | (((last_rd[j + 1] | last_rd[j]) << 1) | 1)
1410 | last_rd[j + 1]
1411 )
1334 1412 if rd[j] & matchmask:
1335 1413 score = match_bitapScore(d, j - 1)
1336 1414 # This match will almost certainly be better than any existing match.
@@ -1384,12 +1462,14 b' class diff_match_patch:'
1384 1462
1385 1463 # Look for the first and last matches of pattern in text. If two different
1386 1464 # matches are found, increase the pattern length.
1387 while (text.find(pattern) != text.rfind(pattern) and (self.Match_MaxBits ==
1388 0 or len(pattern) < self.Match_MaxBits - self.Patch_Margin -
1389 self.Patch_Margin)):
1465 while text.find(pattern) != text.rfind(pattern) and (
1466 self.Match_MaxBits == 0
1467 or len(pattern) < self.Match_MaxBits - self.Patch_Margin - self.Patch_Margin
1468 ):
1390 1469 padding += self.Patch_Margin
1391 pattern = text[max(0, patch.start2 - padding) :
1392 patch.start2 + patch.length1 + padding]
1470 pattern = text[
1471 max(0, patch.start2 - padding) : patch.start2 + patch.length1 + padding
1472 ]
1393 1473 # Add one chunk for good luck.
1394 1474 padding += self.Patch_Margin
1395 1475
@@ -1398,8 +1478,9 b' class diff_match_patch:'
1398 1478 if prefix:
1399 1479 patch.diffs[:0] = [(self.DIFF_EQUAL, prefix)]
1400 1480 # Add the suffix.
1401 suffix = text[patch.start2 + patch.length1 :
1402 patch.start2 + patch.length1 + padding]
1481 suffix = text[
1482 patch.start2 + patch.length1 : patch.start2 + patch.length1 + padding
1483 ]
1403 1484 if suffix:
1404 1485 patch.diffs.append((self.DIFF_EQUAL, suffix))
1405 1486
@@ -1455,8 +1536,7 b' class diff_match_patch:'
1455 1536 # Method 3: text1, diffs
1456 1537 text1 = a
1457 1538 diffs = b
1458 elif (isinstance(a, str) and isinstance(b, str) and
1459 isinstance(c, list)):
1539 elif isinstance(a, str) and isinstance(b, str) and isinstance(c, list):
1460 1540 # Method 4: text1, text2, diffs
1461 1541 # text2 is not used.
1462 1542 text1 = a
@@ -1482,24 +1562,31 b' class diff_match_patch:'
1482 1562 # Insertion
1483 1563 patch.diffs.append(diffs[x])
1484 1564 patch.length2 += len(diff_text)
1485 postpatch_text = (postpatch_text[:char_count2] + diff_text +
1486 postpatch_text[char_count2:])
1565 postpatch_text = (
1566 postpatch_text[:char_count2]
1567 + diff_text
1568 + postpatch_text[char_count2:]
1569 )
1487 1570 elif diff_type == self.DIFF_DELETE:
1488 1571 # Deletion.
1489 1572 patch.length1 += len(diff_text)
1490 1573 patch.diffs.append(diffs[x])
1491 postpatch_text = (postpatch_text[:char_count2] +
1492 postpatch_text[char_count2 + len(diff_text):])
1493 elif (diff_type == self.DIFF_EQUAL and
1494 len(diff_text) <= 2 * self.Patch_Margin and
1495 len(patch.diffs) != 0 and len(diffs) != x + 1):
1574 postpatch_text = (
1575 postpatch_text[:char_count2]
1576 + postpatch_text[char_count2 + len(diff_text) :]
1577 )
1578 elif (
1579 diff_type == self.DIFF_EQUAL
1580 and len(diff_text) <= 2 * self.Patch_Margin
1581 and len(patch.diffs) != 0
1582 and len(diffs) != x + 1
1583 ):
1496 1584 # Small equality inside a patch.
1497 1585 patch.diffs.append(diffs[x])
1498 1586 patch.length1 += len(diff_text)
1499 1587 patch.length2 += len(diff_text)
1500 1588
1501 if (diff_type == self.DIFF_EQUAL and
1502 len(diff_text) >= 2 * self.Patch_Margin):
1589 if diff_type == self.DIFF_EQUAL and len(diff_text) >= 2 * self.Patch_Margin:
1503 1590 # Time for a new patch.
1504 1591 if len(patch.diffs) != 0:
1505 1592 self.patch_addContext(patch, prepatch_text)
@@ -1579,11 +1666,15 b' class diff_match_patch:'
1579 1666 if len(text1) > self.Match_MaxBits:
1580 1667 # patch_splitMax will only provide an oversized pattern in the case of
1581 1668 # a monster delete.
1582 start_loc = self.match_main(text, text1[:self.Match_MaxBits],
1583 expected_loc)
1669 start_loc = self.match_main(
1670 text, text1[: self.Match_MaxBits], expected_loc
1671 )
1584 1672 if start_loc != -1:
1585 end_loc = self.match_main(text, text1[-self.Match_MaxBits:],
1586 expected_loc + len(text1) - self.Match_MaxBits)
1673 end_loc = self.match_main(
1674 text,
1675 text1[-self.Match_MaxBits :],
1676 expected_loc + len(text1) - self.Match_MaxBits,
1677 )
1587 1678 if end_loc == -1 or start_loc >= end_loc:
1588 1679 # Can't find valid trailing context. Drop this patch.
1589 1680 start_loc = -1
@@ -1604,29 +1695,42 b' class diff_match_patch:'
1604 1695 text2 = text[start_loc : end_loc + self.Match_MaxBits]
1605 1696 if text1 == text2:
1606 1697 # Perfect match, just shove the replacement text in.
1607 text = (text[:start_loc] + self.diff_text2(patch.diffs) +
1608 text[start_loc + len(text1):])
1698 text = (
1699 text[:start_loc]
1700 + self.diff_text2(patch.diffs)
1701 + text[start_loc + len(text1) :]
1702 )
1609 1703 else:
1610 1704 # Imperfect match.
1611 1705 # Run a diff to get a framework of equivalent indices.
1612 1706 diffs = self.diff_main(text1, text2, False)
1613 if (len(text1) > self.Match_MaxBits and
1614 self.diff_levenshtein(diffs) / float(len(text1)) >
1615 self.Patch_DeleteThreshold):
1707 if (
1708 len(text1) > self.Match_MaxBits
1709 and self.diff_levenshtein(diffs) / float(len(text1))
1710 > self.Patch_DeleteThreshold
1711 ):
1616 1712 # The end points match, but the content is unacceptably bad.
1617 1713 results[-1] = False
1618 1714 else:
1619 1715 self.diff_cleanupSemanticLossless(diffs)
1620 1716 index1 = 0
1621 for (op, data) in patch.diffs:
1717 for op, data in patch.diffs:
1622 1718 if op != self.DIFF_EQUAL:
1623 1719 index2 = self.diff_xIndex(diffs, index1)
1624 1720 if op == self.DIFF_INSERT: # Insertion
1625 text = text[:start_loc + index2] + data + text[start_loc +
1626 index2:]
1721 text = (
1722 text[: start_loc + index2]
1723 + data
1724 + text[start_loc + index2 :]
1725 )
1627 1726 elif op == self.DIFF_DELETE: # Deletion
1628 text = text[:start_loc + index2] + text[start_loc +
1629 self.diff_xIndex(diffs, index1 + len(data)):]
1727 text = (
1728 text[: start_loc + index2]
1729 + text[
1730 start_loc
1731 + self.diff_xIndex(diffs, index1 + len(data)) :
1732 ]
1733 )
1630 1734 if op != self.DIFF_DELETE:
1631 1735 index1 += len(data)
1632 1736 # Strip the padding off.
@@ -1713,7 +1817,7 b' class diff_match_patch:'
1713 1817 x -= 1
1714 1818 start1 = bigpatch.start1
1715 1819 start2 = bigpatch.start2
1716 precontext = ''
1820 precontext = ""
1717 1821 while len(bigpatch.diffs) != 0:
1718 1822 # Create one of several smaller patches.
1719 1823 patch = patch_obj()
@@ -1724,8 +1828,10 b' class diff_match_patch:'
1724 1828 patch.length1 = patch.length2 = len(precontext)
1725 1829 patch.diffs.append((self.DIFF_EQUAL, precontext))
1726 1830
1727 while (len(bigpatch.diffs) != 0 and
1728 patch.length1 < patch_size - self.Patch_Margin):
1831 while (
1832 len(bigpatch.diffs) != 0
1833 and patch.length1 < patch_size - self.Patch_Margin
1834 ):
1729 1835 (diff_type, diff_text) = bigpatch.diffs[0]
1730 1836 if diff_type == self.DIFF_INSERT:
1731 1837 # Insertions are harmless.
@@ -1733,9 +1839,12 b' class diff_match_patch:'
1733 1839 start2 += len(diff_text)
1734 1840 patch.diffs.append(bigpatch.diffs.pop(0))
1735 1841 empty = False
1736 elif (diff_type == self.DIFF_DELETE and len(patch.diffs) == 1 and
1737 patch.diffs[0][0] == self.DIFF_EQUAL and
1738 len(diff_text) > 2 * patch_size):
1842 elif (
1843 diff_type == self.DIFF_DELETE
1844 and len(patch.diffs) == 1
1845 and patch.diffs[0][0] == self.DIFF_EQUAL
1846 and len(diff_text) > 2 * patch_size
1847 ):
1739 1848 # This is a large deletion. Let it pass in one chunk.
1740 1849 patch.length1 += len(diff_text)
1741 1850 start1 += len(diff_text)
@@ -1744,8 +1853,9 b' class diff_match_patch:'
1744 1853 del bigpatch.diffs[0]
1745 1854 else:
1746 1855 # Deletion or equality. Only take as much as we can stomach.
1747 diff_text = diff_text[:patch_size - patch.length1 -
1748 self.Patch_Margin]
1856 diff_text = diff_text[
1857 : patch_size - patch.length1 - self.Patch_Margin
1858 ]
1749 1859 patch.length1 += len(diff_text)
1750 1860 start1 += len(diff_text)
1751 1861 if diff_type == self.DIFF_EQUAL:
@@ -1758,8 +1868,10 b' class diff_match_patch:'
1758 1868 if diff_text == bigpatch.diffs[0][1]:
1759 1869 del bigpatch.diffs[0]
1760 1870 else:
1761 bigpatch.diffs[0] = (bigpatch.diffs[0][0],
1762 bigpatch.diffs[0][1][len(diff_text):])
1871 bigpatch.diffs[0] = (
1872 bigpatch.diffs[0][0],
1873 bigpatch.diffs[0][1][len(diff_text) :],
1874 )
1763 1875
1764 1876 # Compute the head context for the next patch.
1765 1877 precontext = self.diff_text2(patch.diffs)
@@ -1770,8 +1882,10 b' class diff_match_patch:'
1770 1882 patch.length1 += len(postcontext)
1771 1883 patch.length2 += len(postcontext)
1772 1884 if len(patch.diffs) != 0 and patch.diffs[-1][0] == self.DIFF_EQUAL:
1773 patch.diffs[-1] = (self.DIFF_EQUAL, patch.diffs[-1][1] +
1774 postcontext)
1885 patch.diffs[-1] = (
1886 self.DIFF_EQUAL,
1887 patch.diffs[-1][1] + postcontext,
1888 )
1775 1889 else:
1776 1890 patch.diffs.append((self.DIFF_EQUAL, postcontext))
1777 1891
@@ -1813,7 +1927,7 b' class diff_match_patch:'
1813 1927 patches = []
1814 1928 if not textline:
1815 1929 return patches
1816 text = textline.split('\n')
1930 text = textline.split("\n")
1817 1931 while len(text) != 0:
1818 1932 m = re.match("^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@$", text[0])
1819 1933 if not m:
@@ -1821,20 +1935,20 b' class diff_match_patch:'
1821 1935 patch = patch_obj()
1822 1936 patches.append(patch)
1823 1937 patch.start1 = int(m.group(1))
1824 if m.group(2) == '':
1938 if m.group(2) == "":
1825 1939 patch.start1 -= 1
1826 1940 patch.length1 = 1
1827 elif m.group(2) == '0':
1941 elif m.group(2) == "0":
1828 1942 patch.length1 = 0
1829 1943 else:
1830 1944 patch.start1 -= 1
1831 1945 patch.length1 = int(m.group(2))
1832 1946
1833 1947 patch.start2 = int(m.group(3))
1834 if m.group(4) == '':
1948 if m.group(4) == "":
1835 1949 patch.start2 -= 1
1836 1950 patch.length2 = 1
1837 elif m.group(4) == '0':
1951 elif m.group(4) == "0":
1838 1952 patch.length2 = 0
1839 1953 else:
1840 1954 patch.start2 -= 1
@@ -1846,22 +1960,22 b' class diff_match_patch:'
1846 1960 if text[0]:
1847 1961 sign = text[0][0]
1848 1962 else:
1849 sign = ''
1963 sign = ""
1850 1964 line = urllib.parse.unquote(text[0][1:])
1851 1965 line = line.decode("utf-8")
1852 if sign == '+':
1966 if sign == "+":
1853 1967 # Insertion.
1854 1968 patch.diffs.append((self.DIFF_INSERT, line))
1855 elif sign == '-':
1969 elif sign == "-":
1856 1970 # Deletion.
1857 1971 patch.diffs.append((self.DIFF_DELETE, line))
1858 elif sign == ' ':
1972 elif sign == " ":
1859 1973 # Minor equality.
1860 1974 patch.diffs.append((self.DIFF_EQUAL, line))
1861 elif sign == '@':
1975 elif sign == "@":
1862 1976 # Start of next patch.
1863 1977 break
1864 elif sign == '':
1978 elif sign == "":
1865 1979 # Blank line? Whatever.
1866 1980 pass
1867 1981 else:
@@ -1872,12 +1986,10 b' class diff_match_patch:'
1872 1986
1873 1987
1874 1988 class patch_obj:
1875 """Class representing one patch operation.
1876 """
1989 """Class representing one patch operation."""
1877 1990
1878 1991 def __init__(self):
1879 """Initializes with an empty list of diffs.
1880 """
1992 """Initializes with an empty list of diffs."""
1881 1993 self.diffs = []
1882 1994 self.start1 = None
1883 1995 self.start2 = None
@@ -1906,7 +2018,7 b' class patch_obj:'
1906 2018 coords2 = str(self.start2 + 1) + "," + str(self.length2)
1907 2019 text = ["@@ -", coords1, " +", coords2, " @@\n"]
1908 2020 # Escape the body of the patch with %xx notation.
1909 for (op, data) in self.diffs:
2021 for op, data in self.diffs:
1910 2022 if op == diff_match_patch.DIFF_INSERT:
1911 2023 text.append("+")
1912 2024 elif op == diff_match_patch.DIFF_DELETE:
@@ -1916,4 +2028,4 b' class patch_obj:'
1916 2028 # High ascii will raise UnicodeDecodeError. Use Unicode instead.
1917 2029 data = data.encode("utf-8")
1918 2030 text.append(urllib.parse.quote(data, "!~*'();/?:@&=+$,# ") + "\n")
1919 return "".join(text) No newline at end of file
2031 return "".join(text)
General Comments 0
You need to be logged in to leave comments. Login now