##// END OF EJS Templates
diffs: replace compare controller with new html based diffs:...
dan -
r1030:158ce501 default
parent child Browse files
Show More
@@ -0,0 +1,14 b''
1 Copyright 2006 Google Inc.
2 http://code.google.com/p/google-diff-match-patch/
3
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
This diff has been collapsed as it changes many lines, (1919 lines changed) Show them Hide them
@@ -0,0 +1,1919 b''
1 #!/usr/bin/python2.4
2
3 from __future__ import division
4
5 """Diff Match and Patch
6
7 Copyright 2006 Google Inc.
8 http://code.google.com/p/google-diff-match-patch/
9
10 Licensed under the Apache License, Version 2.0 (the "License");
11 you may not use this file except in compliance with the License.
12 You may obtain a copy of the License at
13
14 http://www.apache.org/licenses/LICENSE-2.0
15
16 Unless required by applicable law or agreed to in writing, software
17 distributed under the License is distributed on an "AS IS" BASIS,
18 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 See the License for the specific language governing permissions and
20 limitations under the License.
21 """
22
23 """Functions for diff, match and patch.
24
25 Computes the difference between two texts to create a patch.
26 Applies the patch onto another text, allowing for errors.
27 """
28
29 __author__ = 'fraser@google.com (Neil Fraser)'
30
31 import math
32 import re
33 import sys
34 import time
35 import urllib
36
37 class diff_match_patch:
38 """Class containing the diff, match and patch methods.
39
40 Also contains the behaviour settings.
41 """
42
43 def __init__(self):
44 """Inits a diff_match_patch object with default settings.
45 Redefine these in your program to override the defaults.
46 """
47
48 # Number of seconds to map a diff before giving up (0 for infinity).
49 self.Diff_Timeout = 1.0
50 # Cost of an empty edit operation in terms of edit characters.
51 self.Diff_EditCost = 4
52 # At what point is no match declared (0.0 = perfection, 1.0 = very loose).
53 self.Match_Threshold = 0.5
54 # How far to search for a match (0 = exact location, 1000+ = broad match).
55 # A match this many characters away from the expected location will add
56 # 1.0 to the score (0.0 is a perfect match).
57 self.Match_Distance = 1000
58 # When deleting a large block of text (over ~64 characters), how close do
59 # the contents have to be to match the expected contents. (0.0 = perfection,
60 # 1.0 = very loose). Note that Match_Threshold controls how closely the
61 # end points of a delete need to match.
62 self.Patch_DeleteThreshold = 0.5
63 # Chunk size for context length.
64 self.Patch_Margin = 4
65
66 # The number of bits in an int.
67 # Python has no maximum, thus to disable patch splitting set to 0.
68 # However to avoid long patches in certain pathological cases, use 32.
69 # Multiple short patches (using native ints) are much faster than long ones.
70 self.Match_MaxBits = 32
71
72 # DIFF FUNCTIONS
73
74 # The data structure representing a diff is an array of tuples:
75 # [(DIFF_DELETE, "Hello"), (DIFF_INSERT, "Goodbye"), (DIFF_EQUAL, " world.")]
76 # which means: delete "Hello", add "Goodbye" and keep " world."
77 DIFF_DELETE = -1
78 DIFF_INSERT = 1
79 DIFF_EQUAL = 0
80
81 def diff_main(self, text1, text2, checklines=True, deadline=None):
82 """Find the differences between two texts. Simplifies the problem by
83 stripping any common prefix or suffix off the texts before diffing.
84
85 Args:
86 text1: Old string to be diffed.
87 text2: New string to be diffed.
88 checklines: Optional speedup flag. If present and false, then don't run
89 a line-level diff first to identify the changed areas.
90 Defaults to true, which does a faster, slightly less optimal diff.
91 deadline: Optional time when the diff should be complete by. Used
92 internally for recursive calls. Users should set DiffTimeout instead.
93
94 Returns:
95 Array of changes.
96 """
97 # Set a deadline by which time the diff must be complete.
98 if deadline == None:
99 # Unlike in most languages, Python counts time in seconds.
100 if self.Diff_Timeout <= 0:
101 deadline = sys.maxint
102 else:
103 deadline = time.time() + self.Diff_Timeout
104
105 # Check for null inputs.
106 if text1 == None or text2 == None:
107 raise ValueError("Null inputs. (diff_main)")
108
109 # Check for equality (speedup).
110 if text1 == text2:
111 if text1:
112 return [(self.DIFF_EQUAL, text1)]
113 return []
114
115 # Trim off common prefix (speedup).
116 commonlength = self.diff_commonPrefix(text1, text2)
117 commonprefix = text1[:commonlength]
118 text1 = text1[commonlength:]
119 text2 = text2[commonlength:]
120
121 # Trim off common suffix (speedup).
122 commonlength = self.diff_commonSuffix(text1, text2)
123 if commonlength == 0:
124 commonsuffix = ''
125 else:
126 commonsuffix = text1[-commonlength:]
127 text1 = text1[:-commonlength]
128 text2 = text2[:-commonlength]
129
130 # Compute the diff on the middle block.
131 diffs = self.diff_compute(text1, text2, checklines, deadline)
132
133 # Restore the prefix and suffix.
134 if commonprefix:
135 diffs[:0] = [(self.DIFF_EQUAL, commonprefix)]
136 if commonsuffix:
137 diffs.append((self.DIFF_EQUAL, commonsuffix))
138 self.diff_cleanupMerge(diffs)
139 return diffs
140
141 def diff_compute(self, text1, text2, checklines, deadline):
142 """Find the differences between two texts. Assumes that the texts do not
143 have any common prefix or suffix.
144
145 Args:
146 text1: Old string to be diffed.
147 text2: New string to be diffed.
148 checklines: Speedup flag. If false, then don't run a line-level diff
149 first to identify the changed areas.
150 If true, then run a faster, slightly less optimal diff.
151 deadline: Time when the diff should be complete by.
152
153 Returns:
154 Array of changes.
155 """
156 if not text1:
157 # Just add some text (speedup).
158 return [(self.DIFF_INSERT, text2)]
159
160 if not text2:
161 # Just delete some text (speedup).
162 return [(self.DIFF_DELETE, text1)]
163
164 if len(text1) > len(text2):
165 (longtext, shorttext) = (text1, text2)
166 else:
167 (shorttext, longtext) = (text1, text2)
168 i = longtext.find(shorttext)
169 if i != -1:
170 # Shorter text is inside the longer text (speedup).
171 diffs = [(self.DIFF_INSERT, longtext[:i]), (self.DIFF_EQUAL, shorttext),
172 (self.DIFF_INSERT, longtext[i + len(shorttext):])]
173 # Swap insertions for deletions if diff is reversed.
174 if len(text1) > len(text2):
175 diffs[0] = (self.DIFF_DELETE, diffs[0][1])
176 diffs[2] = (self.DIFF_DELETE, diffs[2][1])
177 return diffs
178
179 if len(shorttext) == 1:
180 # Single character string.
181 # After the previous speedup, the character can't be an equality.
182 return [(self.DIFF_DELETE, text1), (self.DIFF_INSERT, text2)]
183
184 # Check to see if the problem can be split in two.
185 hm = self.diff_halfMatch(text1, text2)
186 if hm:
187 # A half-match was found, sort out the return data.
188 (text1_a, text1_b, text2_a, text2_b, mid_common) = hm
189 # Send both pairs off for separate processing.
190 diffs_a = self.diff_main(text1_a, text2_a, checklines, deadline)
191 diffs_b = self.diff_main(text1_b, text2_b, checklines, deadline)
192 # Merge the results.
193 return diffs_a + [(self.DIFF_EQUAL, mid_common)] + diffs_b
194
195 if checklines and len(text1) > 100 and len(text2) > 100:
196 return self.diff_lineMode(text1, text2, deadline)
197
198 return self.diff_bisect(text1, text2, deadline)
199
200 def diff_lineMode(self, text1, text2, deadline):
201 """Do a quick line-level diff on both strings, then rediff the parts for
202 greater accuracy.
203 This speedup can produce non-minimal diffs.
204
205 Args:
206 text1: Old string to be diffed.
207 text2: New string to be diffed.
208 deadline: Time when the diff should be complete by.
209
210 Returns:
211 Array of changes.
212 """
213
214 # Scan the text on a line-by-line basis first.
215 (text1, text2, linearray) = self.diff_linesToChars(text1, text2)
216
217 diffs = self.diff_main(text1, text2, False, deadline)
218
219 # Convert the diff back to original text.
220 self.diff_charsToLines(diffs, linearray)
221 # Eliminate freak matches (e.g. blank lines)
222 self.diff_cleanupSemantic(diffs)
223
224 # Rediff any replacement blocks, this time character-by-character.
225 # Add a dummy entry at the end.
226 diffs.append((self.DIFF_EQUAL, ''))
227 pointer = 0
228 count_delete = 0
229 count_insert = 0
230 text_delete = ''
231 text_insert = ''
232 while pointer < len(diffs):
233 if diffs[pointer][0] == self.DIFF_INSERT:
234 count_insert += 1
235 text_insert += diffs[pointer][1]
236 elif diffs[pointer][0] == self.DIFF_DELETE:
237 count_delete += 1
238 text_delete += diffs[pointer][1]
239 elif diffs[pointer][0] == self.DIFF_EQUAL:
240 # Upon reaching an equality, check for prior redundancies.
241 if count_delete >= 1 and count_insert >= 1:
242 # Delete the offending records and add the merged ones.
243 a = self.diff_main(text_delete, text_insert, False, deadline)
244 diffs[pointer - count_delete - count_insert : pointer] = a
245 pointer = pointer - count_delete - count_insert + len(a)
246 count_insert = 0
247 count_delete = 0
248 text_delete = ''
249 text_insert = ''
250
251 pointer += 1
252
253 diffs.pop() # Remove the dummy entry at the end.
254
255 return diffs
256
257 def diff_bisect(self, text1, text2, deadline):
258 """Find the 'middle snake' of a diff, split the problem in two
259 and return the recursively constructed diff.
260 See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations.
261
262 Args:
263 text1: Old string to be diffed.
264 text2: New string to be diffed.
265 deadline: Time at which to bail if not yet complete.
266
267 Returns:
268 Array of diff tuples.
269 """
270
271 # Cache the text lengths to prevent multiple calls.
272 text1_length = len(text1)
273 text2_length = len(text2)
274 max_d = (text1_length + text2_length + 1) // 2
275 v_offset = max_d
276 v_length = 2 * max_d
277 v1 = [-1] * v_length
278 v1[v_offset + 1] = 0
279 v2 = v1[:]
280 delta = text1_length - text2_length
281 # If the total number of characters is odd, then the front path will
282 # collide with the reverse path.
283 front = (delta % 2 != 0)
284 # Offsets for start and end of k loop.
285 # Prevents mapping of space beyond the grid.
286 k1start = 0
287 k1end = 0
288 k2start = 0
289 k2end = 0
290 for d in xrange(max_d):
291 # Bail out if deadline is reached.
292 if time.time() > deadline:
293 break
294
295 # Walk the front path one step.
296 for k1 in xrange(-d + k1start, d + 1 - k1end, 2):
297 k1_offset = v_offset + k1
298 if k1 == -d or (k1 != d and
299 v1[k1_offset - 1] < v1[k1_offset + 1]):
300 x1 = v1[k1_offset + 1]
301 else:
302 x1 = v1[k1_offset - 1] + 1
303 y1 = x1 - k1
304 while (x1 < text1_length and y1 < text2_length and
305 text1[x1] == text2[y1]):
306 x1 += 1
307 y1 += 1
308 v1[k1_offset] = x1
309 if x1 > text1_length:
310 # Ran off the right of the graph.
311 k1end += 2
312 elif y1 > text2_length:
313 # Ran off the bottom of the graph.
314 k1start += 2
315 elif front:
316 k2_offset = v_offset + delta - k1
317 if k2_offset >= 0 and k2_offset < v_length and v2[k2_offset] != -1:
318 # Mirror x2 onto top-left coordinate system.
319 x2 = text1_length - v2[k2_offset]
320 if x1 >= x2:
321 # Overlap detected.
322 return self.diff_bisectSplit(text1, text2, x1, y1, deadline)
323
324 # Walk the reverse path one step.
325 for k2 in xrange(-d + k2start, d + 1 - k2end, 2):
326 k2_offset = v_offset + k2
327 if k2 == -d or (k2 != d and
328 v2[k2_offset - 1] < v2[k2_offset + 1]):
329 x2 = v2[k2_offset + 1]
330 else:
331 x2 = v2[k2_offset - 1] + 1
332 y2 = x2 - k2
333 while (x2 < text1_length and y2 < text2_length and
334 text1[-x2 - 1] == text2[-y2 - 1]):
335 x2 += 1
336 y2 += 1
337 v2[k2_offset] = x2
338 if x2 > text1_length:
339 # Ran off the left of the graph.
340 k2end += 2
341 elif y2 > text2_length:
342 # Ran off the top of the graph.
343 k2start += 2
344 elif not front:
345 k1_offset = v_offset + delta - k2
346 if k1_offset >= 0 and k1_offset < v_length and v1[k1_offset] != -1:
347 x1 = v1[k1_offset]
348 y1 = v_offset + x1 - k1_offset
349 # Mirror x2 onto top-left coordinate system.
350 x2 = text1_length - x2
351 if x1 >= x2:
352 # Overlap detected.
353 return self.diff_bisectSplit(text1, text2, x1, y1, deadline)
354
355 # Diff took too long and hit the deadline or
356 # number of diffs equals number of characters, no commonality at all.
357 return [(self.DIFF_DELETE, text1), (self.DIFF_INSERT, text2)]
358
359 def diff_bisectSplit(self, text1, text2, x, y, deadline):
360 """Given the location of the 'middle snake', split the diff in two parts
361 and recurse.
362
363 Args:
364 text1: Old string to be diffed.
365 text2: New string to be diffed.
366 x: Index of split point in text1.
367 y: Index of split point in text2.
368 deadline: Time at which to bail if not yet complete.
369
370 Returns:
371 Array of diff tuples.
372 """
373 text1a = text1[:x]
374 text2a = text2[:y]
375 text1b = text1[x:]
376 text2b = text2[y:]
377
378 # Compute both diffs serially.
379 diffs = self.diff_main(text1a, text2a, False, deadline)
380 diffsb = self.diff_main(text1b, text2b, False, deadline)
381
382 return diffs + diffsb
383
384 def diff_linesToChars(self, text1, text2):
385 """Split two texts into an array of strings. Reduce the texts to a string
386 of hashes where each Unicode character represents one line.
387
388 Args:
389 text1: First string.
390 text2: Second string.
391
392 Returns:
393 Three element tuple, containing the encoded text1, the encoded text2 and
394 the array of unique strings. The zeroth element of the array of unique
395 strings is intentionally blank.
396 """
397 lineArray = [] # e.g. lineArray[4] == "Hello\n"
398 lineHash = {} # e.g. lineHash["Hello\n"] == 4
399
400 # "\x00" is a valid character, but various debuggers don't like it.
401 # So we'll insert a junk entry to avoid generating a null character.
402 lineArray.append('')
403
404 def diff_linesToCharsMunge(text):
405 """Split a text into an array of strings. Reduce the texts to a string
406 of hashes where each Unicode character represents one line.
407 Modifies linearray and linehash through being a closure.
408
409 Args:
410 text: String to encode.
411
412 Returns:
413 Encoded string.
414 """
415 chars = []
416 # Walk the text, pulling out a substring for each line.
417 # text.split('\n') would would temporarily double our memory footprint.
418 # Modifying text would create many large strings to garbage collect.
419 lineStart = 0
420 lineEnd = -1
421 while lineEnd < len(text) - 1:
422 lineEnd = text.find('\n', lineStart)
423 if lineEnd == -1:
424 lineEnd = len(text) - 1
425 line = text[lineStart:lineEnd + 1]
426 lineStart = lineEnd + 1
427
428 if line in lineHash:
429 chars.append(unichr(lineHash[line]))
430 else:
431 lineArray.append(line)
432 lineHash[line] = len(lineArray) - 1
433 chars.append(unichr(len(lineArray) - 1))
434 return "".join(chars)
435
436 chars1 = diff_linesToCharsMunge(text1)
437 chars2 = diff_linesToCharsMunge(text2)
438 return (chars1, chars2, lineArray)
439
440 def diff_charsToLines(self, diffs, lineArray):
441 """Rehydrate the text in a diff from a string of line hashes to real lines
442 of text.
443
444 Args:
445 diffs: Array of diff tuples.
446 lineArray: Array of unique strings.
447 """
448 for x in xrange(len(diffs)):
449 text = []
450 for char in diffs[x][1]:
451 text.append(lineArray[ord(char)])
452 diffs[x] = (diffs[x][0], "".join(text))
453
454 def diff_commonPrefix(self, text1, text2):
455 """Determine the common prefix of two strings.
456
457 Args:
458 text1: First string.
459 text2: Second string.
460
461 Returns:
462 The number of characters common to the start of each string.
463 """
464 # Quick check for common null cases.
465 if not text1 or not text2 or text1[0] != text2[0]:
466 return 0
467 # Binary search.
468 # Performance analysis: http://neil.fraser.name/news/2007/10/09/
469 pointermin = 0
470 pointermax = min(len(text1), len(text2))
471 pointermid = pointermax
472 pointerstart = 0
473 while pointermin < pointermid:
474 if text1[pointerstart:pointermid] == text2[pointerstart:pointermid]:
475 pointermin = pointermid
476 pointerstart = pointermin
477 else:
478 pointermax = pointermid
479 pointermid = (pointermax - pointermin) // 2 + pointermin
480 return pointermid
481
482 def diff_commonSuffix(self, text1, text2):
483 """Determine the common suffix of two strings.
484
485 Args:
486 text1: First string.
487 text2: Second string.
488
489 Returns:
490 The number of characters common to the end of each string.
491 """
492 # Quick check for common null cases.
493 if not text1 or not text2 or text1[-1] != text2[-1]:
494 return 0
495 # Binary search.
496 # Performance analysis: http://neil.fraser.name/news/2007/10/09/
497 pointermin = 0
498 pointermax = min(len(text1), len(text2))
499 pointermid = pointermax
500 pointerend = 0
501 while pointermin < pointermid:
502 if (text1[-pointermid:len(text1) - pointerend] ==
503 text2[-pointermid:len(text2) - pointerend]):
504 pointermin = pointermid
505 pointerend = pointermin
506 else:
507 pointermax = pointermid
508 pointermid = (pointermax - pointermin) // 2 + pointermin
509 return pointermid
510
511 def diff_commonOverlap(self, text1, text2):
512 """Determine if the suffix of one string is the prefix of another.
513
514 Args:
515 text1 First string.
516 text2 Second string.
517
518 Returns:
519 The number of characters common to the end of the first
520 string and the start of the second string.
521 """
522 # Cache the text lengths to prevent multiple calls.
523 text1_length = len(text1)
524 text2_length = len(text2)
525 # Eliminate the null case.
526 if text1_length == 0 or text2_length == 0:
527 return 0
528 # Truncate the longer string.
529 if text1_length > text2_length:
530 text1 = text1[-text2_length:]
531 elif text1_length < text2_length:
532 text2 = text2[:text1_length]
533 text_length = min(text1_length, text2_length)
534 # Quick check for the worst case.
535 if text1 == text2:
536 return text_length
537
538 # Start by looking for a single character match
539 # and increase length until no match is found.
540 # Performance analysis: http://neil.fraser.name/news/2010/11/04/
541 best = 0
542 length = 1
543 while True:
544 pattern = text1[-length:]
545 found = text2.find(pattern)
546 if found == -1:
547 return best
548 length += found
549 if found == 0 or text1[-length:] == text2[:length]:
550 best = length
551 length += 1
552
553 def diff_halfMatch(self, text1, text2):
554 """Do the two texts share a substring which is at least half the length of
555 the longer text?
556 This speedup can produce non-minimal diffs.
557
558 Args:
559 text1: First string.
560 text2: Second string.
561
562 Returns:
563 Five element Array, containing the prefix of text1, the suffix of text1,
564 the prefix of text2, the suffix of text2 and the common middle. Or None
565 if there was no match.
566 """
567 if self.Diff_Timeout <= 0:
568 # Don't risk returning a non-optimal diff if we have unlimited time.
569 return None
570 if len(text1) > len(text2):
571 (longtext, shorttext) = (text1, text2)
572 else:
573 (shorttext, longtext) = (text1, text2)
574 if len(longtext) < 4 or len(shorttext) * 2 < len(longtext):
575 return None # Pointless.
576
577 def diff_halfMatchI(longtext, shorttext, i):
578 """Does a substring of shorttext exist within longtext such that the
579 substring is at least half the length of longtext?
580 Closure, but does not reference any external variables.
581
582 Args:
583 longtext: Longer string.
584 shorttext: Shorter string.
585 i: Start index of quarter length substring within longtext.
586
587 Returns:
588 Five element Array, containing the prefix of longtext, the suffix of
589 longtext, the prefix of shorttext, the suffix of shorttext and the
590 common middle. Or None if there was no match.
591 """
592 seed = longtext[i:i + len(longtext) // 4]
593 best_common = ''
594 j = shorttext.find(seed)
595 while j != -1:
596 prefixLength = self.diff_commonPrefix(longtext[i:], shorttext[j:])
597 suffixLength = self.diff_commonSuffix(longtext[:i], shorttext[:j])
598 if len(best_common) < suffixLength + prefixLength:
599 best_common = (shorttext[j - suffixLength:j] +
600 shorttext[j:j + prefixLength])
601 best_longtext_a = longtext[:i - suffixLength]
602 best_longtext_b = longtext[i + prefixLength:]
603 best_shorttext_a = shorttext[:j - suffixLength]
604 best_shorttext_b = shorttext[j + prefixLength:]
605 j = shorttext.find(seed, j + 1)
606
607 if len(best_common) * 2 >= len(longtext):
608 return (best_longtext_a, best_longtext_b,
609 best_shorttext_a, best_shorttext_b, best_common)
610 else:
611 return None
612
613 # First check if the second quarter is the seed for a half-match.
614 hm1 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 3) // 4)
615 # Check again based on the third quarter.
616 hm2 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 1) // 2)
617 if not hm1 and not hm2:
618 return None
619 elif not hm2:
620 hm = hm1
621 elif not hm1:
622 hm = hm2
623 else:
624 # Both matched. Select the longest.
625 if len(hm1[4]) > len(hm2[4]):
626 hm = hm1
627 else:
628 hm = hm2
629
630 # A half-match was found, sort out the return data.
631 if len(text1) > len(text2):
632 (text1_a, text1_b, text2_a, text2_b, mid_common) = hm
633 else:
634 (text2_a, text2_b, text1_a, text1_b, mid_common) = hm
635 return (text1_a, text1_b, text2_a, text2_b, mid_common)
636
637 def diff_cleanupSemantic(self, diffs):
638 """Reduce the number of edits by eliminating semantically trivial
639 equalities.
640
641 Args:
642 diffs: Array of diff tuples.
643 """
644 changes = False
645 equalities = [] # Stack of indices where equalities are found.
646 lastequality = None # Always equal to diffs[equalities[-1]][1]
647 pointer = 0 # Index of current position.
648 # Number of chars that changed prior to the equality.
649 length_insertions1, length_deletions1 = 0, 0
650 # Number of chars that changed after the equality.
651 length_insertions2, length_deletions2 = 0, 0
652 while pointer < len(diffs):
653 if diffs[pointer][0] == self.DIFF_EQUAL: # Equality found.
654 equalities.append(pointer)
655 length_insertions1, length_insertions2 = length_insertions2, 0
656 length_deletions1, length_deletions2 = length_deletions2, 0
657 lastequality = diffs[pointer][1]
658 else: # An insertion or deletion.
659 if diffs[pointer][0] == self.DIFF_INSERT:
660 length_insertions2 += len(diffs[pointer][1])
661 else:
662 length_deletions2 += len(diffs[pointer][1])
663 # Eliminate an equality that is smaller or equal to the edits on both
664 # sides of it.
665 if (lastequality and (len(lastequality) <=
666 max(length_insertions1, length_deletions1)) and
667 (len(lastequality) <= max(length_insertions2, length_deletions2))):
668 # Duplicate record.
669 diffs.insert(equalities[-1], (self.DIFF_DELETE, lastequality))
670 # Change second copy to insert.
671 diffs[equalities[-1] + 1] = (self.DIFF_INSERT,
672 diffs[equalities[-1] + 1][1])
673 # Throw away the equality we just deleted.
674 equalities.pop()
675 # Throw away the previous equality (it needs to be reevaluated).
676 if len(equalities):
677 equalities.pop()
678 if len(equalities):
679 pointer = equalities[-1]
680 else:
681 pointer = -1
682 # Reset the counters.
683 length_insertions1, length_deletions1 = 0, 0
684 length_insertions2, length_deletions2 = 0, 0
685 lastequality = None
686 changes = True
687 pointer += 1
688
689 # Normalize the diff.
690 if changes:
691 self.diff_cleanupMerge(diffs)
692 self.diff_cleanupSemanticLossless(diffs)
693
694 # Find any overlaps between deletions and insertions.
695 # e.g: <del>abcxxx</del><ins>xxxdef</ins>
696 # -> <del>abc</del>xxx<ins>def</ins>
697 # e.g: <del>xxxabc</del><ins>defxxx</ins>
698 # -> <ins>def</ins>xxx<del>abc</del>
699 # Only extract an overlap if it is as big as the edit ahead or behind it.
700 pointer = 1
701 while pointer < len(diffs):
702 if (diffs[pointer - 1][0] == self.DIFF_DELETE and
703 diffs[pointer][0] == self.DIFF_INSERT):
704 deletion = diffs[pointer - 1][1]
705 insertion = diffs[pointer][1]
706 overlap_length1 = self.diff_commonOverlap(deletion, insertion)
707 overlap_length2 = self.diff_commonOverlap(insertion, deletion)
708 if overlap_length1 >= overlap_length2:
709 if (overlap_length1 >= len(deletion) / 2.0 or
710 overlap_length1 >= len(insertion) / 2.0):
711 # Overlap found. Insert an equality and trim the surrounding edits.
712 diffs.insert(pointer, (self.DIFF_EQUAL,
713 insertion[:overlap_length1]))
714 diffs[pointer - 1] = (self.DIFF_DELETE,
715 deletion[:len(deletion) - overlap_length1])
716 diffs[pointer + 1] = (self.DIFF_INSERT,
717 insertion[overlap_length1:])
718 pointer += 1
719 else:
720 if (overlap_length2 >= len(deletion) / 2.0 or
721 overlap_length2 >= len(insertion) / 2.0):
722 # Reverse overlap found.
723 # Insert an equality and swap and trim the surrounding edits.
724 diffs.insert(pointer, (self.DIFF_EQUAL, deletion[:overlap_length2]))
725 diffs[pointer - 1] = (self.DIFF_INSERT,
726 insertion[:len(insertion) - overlap_length2])
727 diffs[pointer + 1] = (self.DIFF_DELETE, deletion[overlap_length2:])
728 pointer += 1
729 pointer += 1
730 pointer += 1
731
732 def diff_cleanupSemanticLossless(self, diffs):
733 """Look for single edits surrounded on both sides by equalities
734 which can be shifted sideways to align the edit to a word boundary.
735 e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came.
736
737 Args:
738 diffs: Array of diff tuples.
739 """
740
741 def diff_cleanupSemanticScore(one, two):
742 """Given two strings, compute a score representing whether the
743 internal boundary falls on logical boundaries.
744 Scores range from 6 (best) to 0 (worst).
745 Closure, but does not reference any external variables.
746
747 Args:
748 one: First string.
749 two: Second string.
750
751 Returns:
752 The score.
753 """
754 if not one or not two:
755 # Edges are the best.
756 return 6
757
758 # Each port of this function behaves slightly differently due to
759 # subtle differences in each language's definition of things like
760 # 'whitespace'. Since this function's purpose is largely cosmetic,
761 # the choice has been made to use each language's native features
762 # rather than force total conformity.
763 char1 = one[-1]
764 char2 = two[0]
765 nonAlphaNumeric1 = not char1.isalnum()
766 nonAlphaNumeric2 = not char2.isalnum()
767 whitespace1 = nonAlphaNumeric1 and char1.isspace()
768 whitespace2 = nonAlphaNumeric2 and char2.isspace()
769 lineBreak1 = whitespace1 and (char1 == "\r" or char1 == "\n")
770 lineBreak2 = whitespace2 and (char2 == "\r" or char2 == "\n")
771 blankLine1 = lineBreak1 and self.BLANKLINEEND.search(one)
772 blankLine2 = lineBreak2 and self.BLANKLINESTART.match(two)
773
774 if blankLine1 or blankLine2:
775 # Five points for blank lines.
776 return 5
777 elif lineBreak1 or lineBreak2:
778 # Four points for line breaks.
779 return 4
780 elif nonAlphaNumeric1 and not whitespace1 and whitespace2:
781 # Three points for end of sentences.
782 return 3
783 elif whitespace1 or whitespace2:
784 # Two points for whitespace.
785 return 2
786 elif nonAlphaNumeric1 or nonAlphaNumeric2:
787 # One point for non-alphanumeric.
788 return 1
789 return 0
790
791 pointer = 1
792 # Intentionally ignore the first and last element (don't need checking).
793 while pointer < len(diffs) - 1:
794 if (diffs[pointer - 1][0] == self.DIFF_EQUAL and
795 diffs[pointer + 1][0] == self.DIFF_EQUAL):
796 # This is a single edit surrounded by equalities.
797 equality1 = diffs[pointer - 1][1]
798 edit = diffs[pointer][1]
799 equality2 = diffs[pointer + 1][1]
800
801 # First, shift the edit as far left as possible.
802 commonOffset = self.diff_commonSuffix(equality1, edit)
803 if commonOffset:
804 commonString = edit[-commonOffset:]
805 equality1 = equality1[:-commonOffset]
806 edit = commonString + edit[:-commonOffset]
807 equality2 = commonString + equality2
808
809 # Second, step character by character right, looking for the best fit.
810 bestEquality1 = equality1
811 bestEdit = edit
812 bestEquality2 = equality2
813 bestScore = (diff_cleanupSemanticScore(equality1, edit) +
814 diff_cleanupSemanticScore(edit, equality2))
815 while edit and equality2 and edit[0] == equality2[0]:
816 equality1 += edit[0]
817 edit = edit[1:] + equality2[0]
818 equality2 = equality2[1:]
819 score = (diff_cleanupSemanticScore(equality1, edit) +
820 diff_cleanupSemanticScore(edit, equality2))
821 # The >= encourages trailing rather than leading whitespace on edits.
822 if score >= bestScore:
823 bestScore = score
824 bestEquality1 = equality1
825 bestEdit = edit
826 bestEquality2 = equality2
827
828 if diffs[pointer - 1][1] != bestEquality1:
829 # We have an improvement, save it back to the diff.
830 if bestEquality1:
831 diffs[pointer - 1] = (diffs[pointer - 1][0], bestEquality1)
832 else:
833 del diffs[pointer - 1]
834 pointer -= 1
835 diffs[pointer] = (diffs[pointer][0], bestEdit)
836 if bestEquality2:
837 diffs[pointer + 1] = (diffs[pointer + 1][0], bestEquality2)
838 else:
839 del diffs[pointer + 1]
840 pointer -= 1
841 pointer += 1
842
843 # Define some regex patterns for matching boundaries.
844 BLANKLINEEND = re.compile(r"\n\r?\n$");
845 BLANKLINESTART = re.compile(r"^\r?\n\r?\n");
846
847 def diff_cleanupEfficiency(self, diffs):
848 """Reduce the number of edits by eliminating operationally trivial
849 equalities.
850
851 Args:
852 diffs: Array of diff tuples.
853 """
854 changes = False
855 equalities = [] # Stack of indices where equalities are found.
856 lastequality = None # Always equal to diffs[equalities[-1]][1]
857 pointer = 0 # Index of current position.
858 pre_ins = False # Is there an insertion operation before the last equality.
859 pre_del = False # Is there a deletion operation before the last equality.
860 post_ins = False # Is there an insertion operation after the last equality.
861 post_del = False # Is there a deletion operation after the last equality.
862 while pointer < len(diffs):
863 if diffs[pointer][0] == self.DIFF_EQUAL: # Equality found.
864 if (len(diffs[pointer][1]) < self.Diff_EditCost and
865 (post_ins or post_del)):
866 # Candidate found.
867 equalities.append(pointer)
868 pre_ins = post_ins
869 pre_del = post_del
870 lastequality = diffs[pointer][1]
871 else:
872 # Not a candidate, and can never become one.
873 equalities = []
874 lastequality = None
875
876 post_ins = post_del = False
877 else: # An insertion or deletion.
878 if diffs[pointer][0] == self.DIFF_DELETE:
879 post_del = True
880 else:
881 post_ins = True
882
883 # Five types to be split:
884 # <ins>A</ins><del>B</del>XY<ins>C</ins><del>D</del>
885 # <ins>A</ins>X<ins>C</ins><del>D</del>
886 # <ins>A</ins><del>B</del>X<ins>C</ins>
887 # <ins>A</del>X<ins>C</ins><del>D</del>
888 # <ins>A</ins><del>B</del>X<del>C</del>
889
890 if lastequality and ((pre_ins and pre_del and post_ins and post_del) or
891 ((len(lastequality) < self.Diff_EditCost / 2) and
892 (pre_ins + pre_del + post_ins + post_del) == 3)):
893 # Duplicate record.
894 diffs.insert(equalities[-1], (self.DIFF_DELETE, lastequality))
895 # Change second copy to insert.
896 diffs[equalities[-1] + 1] = (self.DIFF_INSERT,
897 diffs[equalities[-1] + 1][1])
898 equalities.pop() # Throw away the equality we just deleted.
899 lastequality = None
900 if pre_ins and pre_del:
901 # No changes made which could affect previous entry, keep going.
902 post_ins = post_del = True
903 equalities = []
904 else:
905 if len(equalities):
906 equalities.pop() # Throw away the previous equality.
907 if len(equalities):
908 pointer = equalities[-1]
909 else:
910 pointer = -1
911 post_ins = post_del = False
912 changes = True
913 pointer += 1
914
915 if changes:
916 self.diff_cleanupMerge(diffs)
917
918 def diff_cleanupMerge(self, diffs):
919 """Reorder and merge like edit sections. Merge equalities.
920 Any edit section can move as long as it doesn't cross an equality.
921
922 Args:
923 diffs: Array of diff tuples.
924 """
925 diffs.append((self.DIFF_EQUAL, '')) # Add a dummy entry at the end.
926 pointer = 0
927 count_delete = 0
928 count_insert = 0
929 text_delete = ''
930 text_insert = ''
931 while pointer < len(diffs):
932 if diffs[pointer][0] == self.DIFF_INSERT:
933 count_insert += 1
934 text_insert += diffs[pointer][1]
935 pointer += 1
936 elif diffs[pointer][0] == self.DIFF_DELETE:
937 count_delete += 1
938 text_delete += diffs[pointer][1]
939 pointer += 1
940 elif diffs[pointer][0] == self.DIFF_EQUAL:
941 # Upon reaching an equality, check for prior redundancies.
942 if count_delete + count_insert > 1:
943 if count_delete != 0 and count_insert != 0:
944 # Factor out any common prefixies.
945 commonlength = self.diff_commonPrefix(text_insert, text_delete)
946 if commonlength != 0:
947 x = pointer - count_delete - count_insert - 1
948 if x >= 0 and diffs[x][0] == self.DIFF_EQUAL:
949 diffs[x] = (diffs[x][0], diffs[x][1] +
950 text_insert[:commonlength])
951 else:
952 diffs.insert(0, (self.DIFF_EQUAL, text_insert[:commonlength]))
953 pointer += 1
954 text_insert = text_insert[commonlength:]
955 text_delete = text_delete[commonlength:]
956 # Factor out any common suffixies.
957 commonlength = self.diff_commonSuffix(text_insert, text_delete)
958 if commonlength != 0:
959 diffs[pointer] = (diffs[pointer][0], text_insert[-commonlength:] +
960 diffs[pointer][1])
961 text_insert = text_insert[:-commonlength]
962 text_delete = text_delete[:-commonlength]
963 # Delete the offending records and add the merged ones.
964 if count_delete == 0:
965 diffs[pointer - count_insert : pointer] = [
966 (self.DIFF_INSERT, text_insert)]
967 elif count_insert == 0:
968 diffs[pointer - count_delete : pointer] = [
969 (self.DIFF_DELETE, text_delete)]
970 else:
971 diffs[pointer - count_delete - count_insert : pointer] = [
972 (self.DIFF_DELETE, text_delete),
973 (self.DIFF_INSERT, text_insert)]
974 pointer = pointer - count_delete - count_insert + 1
975 if count_delete != 0:
976 pointer += 1
977 if count_insert != 0:
978 pointer += 1
979 elif pointer != 0 and diffs[pointer - 1][0] == self.DIFF_EQUAL:
980 # Merge this equality with the previous one.
981 diffs[pointer - 1] = (diffs[pointer - 1][0],
982 diffs[pointer - 1][1] + diffs[pointer][1])
983 del diffs[pointer]
984 else:
985 pointer += 1
986
987 count_insert = 0
988 count_delete = 0
989 text_delete = ''
990 text_insert = ''
991
992 if diffs[-1][1] == '':
993 diffs.pop() # Remove the dummy entry at the end.
994
995 # Second pass: look for single edits surrounded on both sides by equalities
996 # which can be shifted sideways to eliminate an equality.
997 # e.g: A<ins>BA</ins>C -> <ins>AB</ins>AC
998 changes = False
999 pointer = 1
1000 # Intentionally ignore the first and last element (don't need checking).
1001 while pointer < len(diffs) - 1:
1002 if (diffs[pointer - 1][0] == self.DIFF_EQUAL and
1003 diffs[pointer + 1][0] == self.DIFF_EQUAL):
1004 # This is a single edit surrounded by equalities.
1005 if diffs[pointer][1].endswith(diffs[pointer - 1][1]):
1006 # Shift the edit over the previous equality.
1007 diffs[pointer] = (diffs[pointer][0],
1008 diffs[pointer - 1][1] +
1009 diffs[pointer][1][:-len(diffs[pointer - 1][1])])
1010 diffs[pointer + 1] = (diffs[pointer + 1][0],
1011 diffs[pointer - 1][1] + diffs[pointer + 1][1])
1012 del diffs[pointer - 1]
1013 changes = True
1014 elif diffs[pointer][1].startswith(diffs[pointer + 1][1]):
1015 # Shift the edit over the next equality.
1016 diffs[pointer - 1] = (diffs[pointer - 1][0],
1017 diffs[pointer - 1][1] + diffs[pointer + 1][1])
1018 diffs[pointer] = (diffs[pointer][0],
1019 diffs[pointer][1][len(diffs[pointer + 1][1]):] +
1020 diffs[pointer + 1][1])
1021 del diffs[pointer + 1]
1022 changes = True
1023 pointer += 1
1024
1025 # If shifts were made, the diff needs reordering and another shift sweep.
1026 if changes:
1027 self.diff_cleanupMerge(diffs)
1028
1029 def diff_xIndex(self, diffs, loc):
1030 """loc is a location in text1, compute and return the equivalent location
1031 in text2. e.g. "The cat" vs "The big cat", 1->1, 5->8
1032
1033 Args:
1034 diffs: Array of diff tuples.
1035 loc: Location within text1.
1036
1037 Returns:
1038 Location within text2.
1039 """
1040 chars1 = 0
1041 chars2 = 0
1042 last_chars1 = 0
1043 last_chars2 = 0
1044 for x in xrange(len(diffs)):
1045 (op, text) = diffs[x]
1046 if op != self.DIFF_INSERT: # Equality or deletion.
1047 chars1 += len(text)
1048 if op != self.DIFF_DELETE: # Equality or insertion.
1049 chars2 += len(text)
1050 if chars1 > loc: # Overshot the location.
1051 break
1052 last_chars1 = chars1
1053 last_chars2 = chars2
1054
1055 if len(diffs) != x and diffs[x][0] == self.DIFF_DELETE:
1056 # The location was deleted.
1057 return last_chars2
1058 # Add the remaining len(character).
1059 return last_chars2 + (loc - last_chars1)
1060
1061 def diff_prettyHtml(self, diffs):
1062 """Convert a diff array into a pretty HTML report.
1063
1064 Args:
1065 diffs: Array of diff tuples.
1066
1067 Returns:
1068 HTML representation.
1069 """
1070 html = []
1071 for (op, data) in diffs:
1072 text = (data.replace("&", "&amp;").replace("<", "&lt;")
1073 .replace(">", "&gt;").replace("\n", "&para;<br>"))
1074 if op == self.DIFF_INSERT:
1075 html.append("<ins style=\"background:#e6ffe6;\">%s</ins>" % text)
1076 elif op == self.DIFF_DELETE:
1077 html.append("<del style=\"background:#ffe6e6;\">%s</del>" % text)
1078 elif op == self.DIFF_EQUAL:
1079 html.append("<span>%s</span>" % text)
1080 return "".join(html)
1081
1082 def diff_text1(self, diffs):
1083 """Compute and return the source text (all equalities and deletions).
1084
1085 Args:
1086 diffs: Array of diff tuples.
1087
1088 Returns:
1089 Source text.
1090 """
1091 text = []
1092 for (op, data) in diffs:
1093 if op != self.DIFF_INSERT:
1094 text.append(data)
1095 return "".join(text)
1096
1097 def diff_text2(self, diffs):
1098 """Compute and return the destination text (all equalities and insertions).
1099
1100 Args:
1101 diffs: Array of diff tuples.
1102
1103 Returns:
1104 Destination text.
1105 """
1106 text = []
1107 for (op, data) in diffs:
1108 if op != self.DIFF_DELETE:
1109 text.append(data)
1110 return "".join(text)
1111
1112 def diff_levenshtein(self, diffs):
1113 """Compute the Levenshtein distance; the number of inserted, deleted or
1114 substituted characters.
1115
1116 Args:
1117 diffs: Array of diff tuples.
1118
1119 Returns:
1120 Number of changes.
1121 """
1122 levenshtein = 0
1123 insertions = 0
1124 deletions = 0
1125 for (op, data) in diffs:
1126 if op == self.DIFF_INSERT:
1127 insertions += len(data)
1128 elif op == self.DIFF_DELETE:
1129 deletions += len(data)
1130 elif op == self.DIFF_EQUAL:
1131 # A deletion and an insertion is one substitution.
1132 levenshtein += max(insertions, deletions)
1133 insertions = 0
1134 deletions = 0
1135 levenshtein += max(insertions, deletions)
1136 return levenshtein
1137
1138 def diff_toDelta(self, diffs):
1139 """Crush the diff into an encoded string which describes the operations
1140 required to transform text1 into text2.
1141 E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'.
1142 Operations are tab-separated. Inserted text is escaped using %xx notation.
1143
1144 Args:
1145 diffs: Array of diff tuples.
1146
1147 Returns:
1148 Delta text.
1149 """
1150 text = []
1151 for (op, data) in diffs:
1152 if op == self.DIFF_INSERT:
1153 # High ascii will raise UnicodeDecodeError. Use Unicode instead.
1154 data = data.encode("utf-8")
1155 text.append("+" + urllib.quote(data, "!~*'();/?:@&=+$,# "))
1156 elif op == self.DIFF_DELETE:
1157 text.append("-%d" % len(data))
1158 elif op == self.DIFF_EQUAL:
1159 text.append("=%d" % len(data))
1160 return "\t".join(text)
1161
1162 def diff_fromDelta(self, text1, delta):
1163 """Given the original text1, and an encoded string which describes the
1164 operations required to transform text1 into text2, compute the full diff.
1165
1166 Args:
1167 text1: Source string for the diff.
1168 delta: Delta text.
1169
1170 Returns:
1171 Array of diff tuples.
1172
1173 Raises:
1174 ValueError: If invalid input.
1175 """
1176 if type(delta) == unicode:
1177 # Deltas should be composed of a subset of ascii chars, Unicode not
1178 # required. If this encode raises UnicodeEncodeError, delta is invalid.
1179 delta = delta.encode("ascii")
1180 diffs = []
1181 pointer = 0 # Cursor in text1
1182 tokens = delta.split("\t")
1183 for token in tokens:
1184 if token == "":
1185 # Blank tokens are ok (from a trailing \t).
1186 continue
1187 # Each token begins with a one character parameter which specifies the
1188 # operation of this token (delete, insert, equality).
1189 param = token[1:]
1190 if token[0] == "+":
1191 param = urllib.unquote(param).decode("utf-8")
1192 diffs.append((self.DIFF_INSERT, param))
1193 elif token[0] == "-" or token[0] == "=":
1194 try:
1195 n = int(param)
1196 except ValueError:
1197 raise ValueError("Invalid number in diff_fromDelta: " + param)
1198 if n < 0:
1199 raise ValueError("Negative number in diff_fromDelta: " + param)
1200 text = text1[pointer : pointer + n]
1201 pointer += n
1202 if token[0] == "=":
1203 diffs.append((self.DIFF_EQUAL, text))
1204 else:
1205 diffs.append((self.DIFF_DELETE, text))
1206 else:
1207 # Anything else is an error.
1208 raise ValueError("Invalid diff operation in diff_fromDelta: " +
1209 token[0])
1210 if pointer != len(text1):
1211 raise ValueError(
1212 "Delta length (%d) does not equal source text length (%d)." %
1213 (pointer, len(text1)))
1214 return diffs
1215
1216 # MATCH FUNCTIONS
1217
1218 def match_main(self, text, pattern, loc):
1219 """Locate the best instance of 'pattern' in 'text' near 'loc'.
1220
1221 Args:
1222 text: The text to search.
1223 pattern: The pattern to search for.
1224 loc: The location to search around.
1225
1226 Returns:
1227 Best match index or -1.
1228 """
1229 # Check for null inputs.
1230 if text == None or pattern == None:
1231 raise ValueError("Null inputs. (match_main)")
1232
1233 loc = max(0, min(loc, len(text)))
1234 if text == pattern:
1235 # Shortcut (potentially not guaranteed by the algorithm)
1236 return 0
1237 elif not text:
1238 # Nothing to match.
1239 return -1
1240 elif text[loc:loc + len(pattern)] == pattern:
1241 # Perfect match at the perfect spot! (Includes case of null pattern)
1242 return loc
1243 else:
1244 # Do a fuzzy compare.
1245 match = self.match_bitap(text, pattern, loc)
1246 return match
1247
1248 def match_bitap(self, text, pattern, loc):
1249 """Locate the best instance of 'pattern' in 'text' near 'loc' using the
1250 Bitap algorithm.
1251
1252 Args:
1253 text: The text to search.
1254 pattern: The pattern to search for.
1255 loc: The location to search around.
1256
1257 Returns:
1258 Best match index or -1.
1259 """
1260 # Python doesn't have a maxint limit, so ignore this check.
1261 #if self.Match_MaxBits != 0 and len(pattern) > self.Match_MaxBits:
1262 # raise ValueError("Pattern too long for this application.")
1263
1264 # Initialise the alphabet.
1265 s = self.match_alphabet(pattern)
1266
1267 def match_bitapScore(e, x):
1268 """Compute and return the score for a match with e errors and x location.
1269 Accesses loc and pattern through being a closure.
1270
1271 Args:
1272 e: Number of errors in match.
1273 x: Location of match.
1274
1275 Returns:
1276 Overall score for match (0.0 = good, 1.0 = bad).
1277 """
1278 accuracy = float(e) / len(pattern)
1279 proximity = abs(loc - x)
1280 if not self.Match_Distance:
1281 # Dodge divide by zero error.
1282 return proximity and 1.0 or accuracy
1283 return accuracy + (proximity / float(self.Match_Distance))
1284
1285 # Highest score beyond which we give up.
1286 score_threshold = self.Match_Threshold
1287 # Is there a nearby exact match? (speedup)
1288 best_loc = text.find(pattern, loc)
1289 if best_loc != -1:
1290 score_threshold = min(match_bitapScore(0, best_loc), score_threshold)
1291 # What about in the other direction? (speedup)
1292 best_loc = text.rfind(pattern, loc + len(pattern))
1293 if best_loc != -1:
1294 score_threshold = min(match_bitapScore(0, best_loc), score_threshold)
1295
1296 # Initialise the bit arrays.
1297 matchmask = 1 << (len(pattern) - 1)
1298 best_loc = -1
1299
1300 bin_max = len(pattern) + len(text)
1301 # Empty initialization added to appease pychecker.
1302 last_rd = None
1303 for d in xrange(len(pattern)):
1304 # Scan for the best match each iteration allows for one more error.
1305 # Run a binary search to determine how far from 'loc' we can stray at
1306 # this error level.
1307 bin_min = 0
1308 bin_mid = bin_max
1309 while bin_min < bin_mid:
1310 if match_bitapScore(d, loc + bin_mid) <= score_threshold:
1311 bin_min = bin_mid
1312 else:
1313 bin_max = bin_mid
1314 bin_mid = (bin_max - bin_min) // 2 + bin_min
1315
1316 # Use the result from this iteration as the maximum for the next.
1317 bin_max = bin_mid
1318 start = max(1, loc - bin_mid + 1)
1319 finish = min(loc + bin_mid, len(text)) + len(pattern)
1320
1321 rd = [0] * (finish + 2)
1322 rd[finish + 1] = (1 << d) - 1
1323 for j in xrange(finish, start - 1, -1):
1324 if len(text) <= j - 1:
1325 # Out of range.
1326 charMatch = 0
1327 else:
1328 charMatch = s.get(text[j - 1], 0)
1329 if d == 0: # First pass: exact match.
1330 rd[j] = ((rd[j + 1] << 1) | 1) & charMatch
1331 else: # Subsequent passes: fuzzy match.
1332 rd[j] = (((rd[j + 1] << 1) | 1) & charMatch) | (
1333 ((last_rd[j + 1] | last_rd[j]) << 1) | 1) | last_rd[j + 1]
1334 if rd[j] & matchmask:
1335 score = match_bitapScore(d, j - 1)
1336 # This match will almost certainly be better than any existing match.
1337 # But check anyway.
1338 if score <= score_threshold:
1339 # Told you so.
1340 score_threshold = score
1341 best_loc = j - 1
1342 if best_loc > loc:
1343 # When passing loc, don't exceed our current distance from loc.
1344 start = max(1, 2 * loc - best_loc)
1345 else:
1346 # Already passed loc, downhill from here on in.
1347 break
1348 # No hope for a (better) match at greater error levels.
1349 if match_bitapScore(d + 1, loc) > score_threshold:
1350 break
1351 last_rd = rd
1352 return best_loc
1353
1354 def match_alphabet(self, pattern):
1355 """Initialise the alphabet for the Bitap algorithm.
1356
1357 Args:
1358 pattern: The text to encode.
1359
1360 Returns:
1361 Hash of character locations.
1362 """
1363 s = {}
1364 for char in pattern:
1365 s[char] = 0
1366 for i in xrange(len(pattern)):
1367 s[pattern[i]] |= 1 << (len(pattern) - i - 1)
1368 return s
1369
1370 # PATCH FUNCTIONS
1371
1372 def patch_addContext(self, patch, text):
1373 """Increase the context until it is unique,
1374 but don't let the pattern expand beyond Match_MaxBits.
1375
1376 Args:
1377 patch: The patch to grow.
1378 text: Source text.
1379 """
1380 if len(text) == 0:
1381 return
1382 pattern = text[patch.start2 : patch.start2 + patch.length1]
1383 padding = 0
1384
1385 # Look for the first and last matches of pattern in text. If two different
1386 # matches are found, increase the pattern length.
1387 while (text.find(pattern) != text.rfind(pattern) and (self.Match_MaxBits ==
1388 0 or len(pattern) < self.Match_MaxBits - self.Patch_Margin -
1389 self.Patch_Margin)):
1390 padding += self.Patch_Margin
1391 pattern = text[max(0, patch.start2 - padding) :
1392 patch.start2 + patch.length1 + padding]
1393 # Add one chunk for good luck.
1394 padding += self.Patch_Margin
1395
1396 # Add the prefix.
1397 prefix = text[max(0, patch.start2 - padding) : patch.start2]
1398 if prefix:
1399 patch.diffs[:0] = [(self.DIFF_EQUAL, prefix)]
1400 # Add the suffix.
1401 suffix = text[patch.start2 + patch.length1 :
1402 patch.start2 + patch.length1 + padding]
1403 if suffix:
1404 patch.diffs.append((self.DIFF_EQUAL, suffix))
1405
1406 # Roll back the start points.
1407 patch.start1 -= len(prefix)
1408 patch.start2 -= len(prefix)
1409 # Extend lengths.
1410 patch.length1 += len(prefix) + len(suffix)
1411 patch.length2 += len(prefix) + len(suffix)
1412
1413 def patch_make(self, a, b=None, c=None):
1414 """Compute a list of patches to turn text1 into text2.
1415 Use diffs if provided, otherwise compute it ourselves.
1416 There are four ways to call this function, depending on what data is
1417 available to the caller:
1418 Method 1:
1419 a = text1, b = text2
1420 Method 2:
1421 a = diffs
1422 Method 3 (optimal):
1423 a = text1, b = diffs
1424 Method 4 (deprecated, use method 3):
1425 a = text1, b = text2, c = diffs
1426
1427 Args:
1428 a: text1 (methods 1,3,4) or Array of diff tuples for text1 to
1429 text2 (method 2).
1430 b: text2 (methods 1,4) or Array of diff tuples for text1 to
1431 text2 (method 3) or undefined (method 2).
1432 c: Array of diff tuples for text1 to text2 (method 4) or
1433 undefined (methods 1,2,3).
1434
1435 Returns:
1436 Array of Patch objects.
1437 """
1438 text1 = None
1439 diffs = None
1440 # Note that texts may arrive as 'str' or 'unicode'.
1441 if isinstance(a, basestring) and isinstance(b, basestring) and c is None:
1442 # Method 1: text1, text2
1443 # Compute diffs from text1 and text2.
1444 text1 = a
1445 diffs = self.diff_main(text1, b, True)
1446 if len(diffs) > 2:
1447 self.diff_cleanupSemantic(diffs)
1448 self.diff_cleanupEfficiency(diffs)
1449 elif isinstance(a, list) and b is None and c is None:
1450 # Method 2: diffs
1451 # Compute text1 from diffs.
1452 diffs = a
1453 text1 = self.diff_text1(diffs)
1454 elif isinstance(a, basestring) and isinstance(b, list) and c is None:
1455 # Method 3: text1, diffs
1456 text1 = a
1457 diffs = b
1458 elif (isinstance(a, basestring) and isinstance(b, basestring) and
1459 isinstance(c, list)):
1460 # Method 4: text1, text2, diffs
1461 # text2 is not used.
1462 text1 = a
1463 diffs = c
1464 else:
1465 raise ValueError("Unknown call format to patch_make.")
1466
1467 if not diffs:
1468 return [] # Get rid of the None case.
1469 patches = []
1470 patch = patch_obj()
1471 char_count1 = 0 # Number of characters into the text1 string.
1472 char_count2 = 0 # Number of characters into the text2 string.
1473 prepatch_text = text1 # Recreate the patches to determine context info.
1474 postpatch_text = text1
1475 for x in xrange(len(diffs)):
1476 (diff_type, diff_text) = diffs[x]
1477 if len(patch.diffs) == 0 and diff_type != self.DIFF_EQUAL:
1478 # A new patch starts here.
1479 patch.start1 = char_count1
1480 patch.start2 = char_count2
1481 if diff_type == self.DIFF_INSERT:
1482 # Insertion
1483 patch.diffs.append(diffs[x])
1484 patch.length2 += len(diff_text)
1485 postpatch_text = (postpatch_text[:char_count2] + diff_text +
1486 postpatch_text[char_count2:])
1487 elif diff_type == self.DIFF_DELETE:
1488 # Deletion.
1489 patch.length1 += len(diff_text)
1490 patch.diffs.append(diffs[x])
1491 postpatch_text = (postpatch_text[:char_count2] +
1492 postpatch_text[char_count2 + len(diff_text):])
1493 elif (diff_type == self.DIFF_EQUAL and
1494 len(diff_text) <= 2 * self.Patch_Margin and
1495 len(patch.diffs) != 0 and len(diffs) != x + 1):
1496 # Small equality inside a patch.
1497 patch.diffs.append(diffs[x])
1498 patch.length1 += len(diff_text)
1499 patch.length2 += len(diff_text)
1500
1501 if (diff_type == self.DIFF_EQUAL and
1502 len(diff_text) >= 2 * self.Patch_Margin):
1503 # Time for a new patch.
1504 if len(patch.diffs) != 0:
1505 self.patch_addContext(patch, prepatch_text)
1506 patches.append(patch)
1507 patch = patch_obj()
1508 # Unlike Unidiff, our patch lists have a rolling context.
1509 # http://code.google.com/p/google-diff-match-patch/wiki/Unidiff
1510 # Update prepatch text & pos to reflect the application of the
1511 # just completed patch.
1512 prepatch_text = postpatch_text
1513 char_count1 = char_count2
1514
1515 # Update the current character count.
1516 if diff_type != self.DIFF_INSERT:
1517 char_count1 += len(diff_text)
1518 if diff_type != self.DIFF_DELETE:
1519 char_count2 += len(diff_text)
1520
1521 # Pick up the leftover patch if not empty.
1522 if len(patch.diffs) != 0:
1523 self.patch_addContext(patch, prepatch_text)
1524 patches.append(patch)
1525 return patches
1526
1527 def patch_deepCopy(self, patches):
1528 """Given an array of patches, return another array that is identical.
1529
1530 Args:
1531 patches: Array of Patch objects.
1532
1533 Returns:
1534 Array of Patch objects.
1535 """
1536 patchesCopy = []
1537 for patch in patches:
1538 patchCopy = patch_obj()
1539 # No need to deep copy the tuples since they are immutable.
1540 patchCopy.diffs = patch.diffs[:]
1541 patchCopy.start1 = patch.start1
1542 patchCopy.start2 = patch.start2
1543 patchCopy.length1 = patch.length1
1544 patchCopy.length2 = patch.length2
1545 patchesCopy.append(patchCopy)
1546 return patchesCopy
1547
1548 def patch_apply(self, patches, text):
1549 """Merge a set of patches onto the text. Return a patched text, as well
1550 as a list of true/false values indicating which patches were applied.
1551
1552 Args:
1553 patches: Array of Patch objects.
1554 text: Old text.
1555
1556 Returns:
1557 Two element Array, containing the new text and an array of boolean values.
1558 """
1559 if not patches:
1560 return (text, [])
1561
1562 # Deep copy the patches so that no changes are made to originals.
1563 patches = self.patch_deepCopy(patches)
1564
1565 nullPadding = self.patch_addPadding(patches)
1566 text = nullPadding + text + nullPadding
1567 self.patch_splitMax(patches)
1568
1569 # delta keeps track of the offset between the expected and actual location
1570 # of the previous patch. If there are patches expected at positions 10 and
1571 # 20, but the first patch was found at 12, delta is 2 and the second patch
1572 # has an effective expected position of 22.
1573 delta = 0
1574 results = []
1575 for patch in patches:
1576 expected_loc = patch.start2 + delta
1577 text1 = self.diff_text1(patch.diffs)
1578 end_loc = -1
1579 if len(text1) > self.Match_MaxBits:
1580 # patch_splitMax will only provide an oversized pattern in the case of
1581 # a monster delete.
1582 start_loc = self.match_main(text, text1[:self.Match_MaxBits],
1583 expected_loc)
1584 if start_loc != -1:
1585 end_loc = self.match_main(text, text1[-self.Match_MaxBits:],
1586 expected_loc + len(text1) - self.Match_MaxBits)
1587 if end_loc == -1 or start_loc >= end_loc:
1588 # Can't find valid trailing context. Drop this patch.
1589 start_loc = -1
1590 else:
1591 start_loc = self.match_main(text, text1, expected_loc)
1592 if start_loc == -1:
1593 # No match found. :(
1594 results.append(False)
1595 # Subtract the delta for this failed patch from subsequent patches.
1596 delta -= patch.length2 - patch.length1
1597 else:
1598 # Found a match. :)
1599 results.append(True)
1600 delta = start_loc - expected_loc
1601 if end_loc == -1:
1602 text2 = text[start_loc : start_loc + len(text1)]
1603 else:
1604 text2 = text[start_loc : end_loc + self.Match_MaxBits]
1605 if text1 == text2:
1606 # Perfect match, just shove the replacement text in.
1607 text = (text[:start_loc] + self.diff_text2(patch.diffs) +
1608 text[start_loc + len(text1):])
1609 else:
1610 # Imperfect match.
1611 # Run a diff to get a framework of equivalent indices.
1612 diffs = self.diff_main(text1, text2, False)
1613 if (len(text1) > self.Match_MaxBits and
1614 self.diff_levenshtein(diffs) / float(len(text1)) >
1615 self.Patch_DeleteThreshold):
1616 # The end points match, but the content is unacceptably bad.
1617 results[-1] = False
1618 else:
1619 self.diff_cleanupSemanticLossless(diffs)
1620 index1 = 0
1621 for (op, data) in patch.diffs:
1622 if op != self.DIFF_EQUAL:
1623 index2 = self.diff_xIndex(diffs, index1)
1624 if op == self.DIFF_INSERT: # Insertion
1625 text = text[:start_loc + index2] + data + text[start_loc +
1626 index2:]
1627 elif op == self.DIFF_DELETE: # Deletion
1628 text = text[:start_loc + index2] + text[start_loc +
1629 self.diff_xIndex(diffs, index1 + len(data)):]
1630 if op != self.DIFF_DELETE:
1631 index1 += len(data)
1632 # Strip the padding off.
1633 text = text[len(nullPadding):-len(nullPadding)]
1634 return (text, results)
1635
1636 def patch_addPadding(self, patches):
1637 """Add some padding on text start and end so that edges can match
1638 something. Intended to be called only from within patch_apply.
1639
1640 Args:
1641 patches: Array of Patch objects.
1642
1643 Returns:
1644 The padding string added to each side.
1645 """
1646 paddingLength = self.Patch_Margin
1647 nullPadding = ""
1648 for x in xrange(1, paddingLength + 1):
1649 nullPadding += chr(x)
1650
1651 # Bump all the patches forward.
1652 for patch in patches:
1653 patch.start1 += paddingLength
1654 patch.start2 += paddingLength
1655
1656 # Add some padding on start of first diff.
1657 patch = patches[0]
1658 diffs = patch.diffs
1659 if not diffs or diffs[0][0] != self.DIFF_EQUAL:
1660 # Add nullPadding equality.
1661 diffs.insert(0, (self.DIFF_EQUAL, nullPadding))
1662 patch.start1 -= paddingLength # Should be 0.
1663 patch.start2 -= paddingLength # Should be 0.
1664 patch.length1 += paddingLength
1665 patch.length2 += paddingLength
1666 elif paddingLength > len(diffs[0][1]):
1667 # Grow first equality.
1668 extraLength = paddingLength - len(diffs[0][1])
1669 newText = nullPadding[len(diffs[0][1]):] + diffs[0][1]
1670 diffs[0] = (diffs[0][0], newText)
1671 patch.start1 -= extraLength
1672 patch.start2 -= extraLength
1673 patch.length1 += extraLength
1674 patch.length2 += extraLength
1675
1676 # Add some padding on end of last diff.
1677 patch = patches[-1]
1678 diffs = patch.diffs
1679 if not diffs or diffs[-1][0] != self.DIFF_EQUAL:
1680 # Add nullPadding equality.
1681 diffs.append((self.DIFF_EQUAL, nullPadding))
1682 patch.length1 += paddingLength
1683 patch.length2 += paddingLength
1684 elif paddingLength > len(diffs[-1][1]):
1685 # Grow last equality.
1686 extraLength = paddingLength - len(diffs[-1][1])
1687 newText = diffs[-1][1] + nullPadding[:extraLength]
1688 diffs[-1] = (diffs[-1][0], newText)
1689 patch.length1 += extraLength
1690 patch.length2 += extraLength
1691
1692 return nullPadding
1693
1694 def patch_splitMax(self, patches):
1695 """Look through the patches and break up any which are longer than the
1696 maximum limit of the match algorithm.
1697 Intended to be called only from within patch_apply.
1698
1699 Args:
1700 patches: Array of Patch objects.
1701 """
1702 patch_size = self.Match_MaxBits
1703 if patch_size == 0:
1704 # Python has the option of not splitting strings due to its ability
1705 # to handle integers of arbitrary precision.
1706 return
1707 for x in xrange(len(patches)):
1708 if patches[x].length1 <= patch_size:
1709 continue
1710 bigpatch = patches[x]
1711 # Remove the big old patch.
1712 del patches[x]
1713 x -= 1
1714 start1 = bigpatch.start1
1715 start2 = bigpatch.start2
1716 precontext = ''
1717 while len(bigpatch.diffs) != 0:
1718 # Create one of several smaller patches.
1719 patch = patch_obj()
1720 empty = True
1721 patch.start1 = start1 - len(precontext)
1722 patch.start2 = start2 - len(precontext)
1723 if precontext:
1724 patch.length1 = patch.length2 = len(precontext)
1725 patch.diffs.append((self.DIFF_EQUAL, precontext))
1726
1727 while (len(bigpatch.diffs) != 0 and
1728 patch.length1 < patch_size - self.Patch_Margin):
1729 (diff_type, diff_text) = bigpatch.diffs[0]
1730 if diff_type == self.DIFF_INSERT:
1731 # Insertions are harmless.
1732 patch.length2 += len(diff_text)
1733 start2 += len(diff_text)
1734 patch.diffs.append(bigpatch.diffs.pop(0))
1735 empty = False
1736 elif (diff_type == self.DIFF_DELETE and len(patch.diffs) == 1 and
1737 patch.diffs[0][0] == self.DIFF_EQUAL and
1738 len(diff_text) > 2 * patch_size):
1739 # This is a large deletion. Let it pass in one chunk.
1740 patch.length1 += len(diff_text)
1741 start1 += len(diff_text)
1742 empty = False
1743 patch.diffs.append((diff_type, diff_text))
1744 del bigpatch.diffs[0]
1745 else:
1746 # Deletion or equality. Only take as much as we can stomach.
1747 diff_text = diff_text[:patch_size - patch.length1 -
1748 self.Patch_Margin]
1749 patch.length1 += len(diff_text)
1750 start1 += len(diff_text)
1751 if diff_type == self.DIFF_EQUAL:
1752 patch.length2 += len(diff_text)
1753 start2 += len(diff_text)
1754 else:
1755 empty = False
1756
1757 patch.diffs.append((diff_type, diff_text))
1758 if diff_text == bigpatch.diffs[0][1]:
1759 del bigpatch.diffs[0]
1760 else:
1761 bigpatch.diffs[0] = (bigpatch.diffs[0][0],
1762 bigpatch.diffs[0][1][len(diff_text):])
1763
1764 # Compute the head context for the next patch.
1765 precontext = self.diff_text2(patch.diffs)
1766 precontext = precontext[-self.Patch_Margin:]
1767 # Append the end context for this patch.
1768 postcontext = self.diff_text1(bigpatch.diffs)[:self.Patch_Margin]
1769 if postcontext:
1770 patch.length1 += len(postcontext)
1771 patch.length2 += len(postcontext)
1772 if len(patch.diffs) != 0 and patch.diffs[-1][0] == self.DIFF_EQUAL:
1773 patch.diffs[-1] = (self.DIFF_EQUAL, patch.diffs[-1][1] +
1774 postcontext)
1775 else:
1776 patch.diffs.append((self.DIFF_EQUAL, postcontext))
1777
1778 if not empty:
1779 x += 1
1780 patches.insert(x, patch)
1781
1782 def patch_toText(self, patches):
1783 """Take a list of patches and return a textual representation.
1784
1785 Args:
1786 patches: Array of Patch objects.
1787
1788 Returns:
1789 Text representation of patches.
1790 """
1791 text = []
1792 for patch in patches:
1793 text.append(str(patch))
1794 return "".join(text)
1795
1796 def patch_fromText(self, textline):
1797 """Parse a textual representation of patches and return a list of patch
1798 objects.
1799
1800 Args:
1801 textline: Text representation of patches.
1802
1803 Returns:
1804 Array of Patch objects.
1805
1806 Raises:
1807 ValueError: If invalid input.
1808 """
1809 if type(textline) == unicode:
1810 # Patches should be composed of a subset of ascii chars, Unicode not
1811 # required. If this encode raises UnicodeEncodeError, patch is invalid.
1812 textline = textline.encode("ascii")
1813 patches = []
1814 if not textline:
1815 return patches
1816 text = textline.split('\n')
1817 while len(text) != 0:
1818 m = re.match("^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@$", text[0])
1819 if not m:
1820 raise ValueError("Invalid patch string: " + text[0])
1821 patch = patch_obj()
1822 patches.append(patch)
1823 patch.start1 = int(m.group(1))
1824 if m.group(2) == '':
1825 patch.start1 -= 1
1826 patch.length1 = 1
1827 elif m.group(2) == '0':
1828 patch.length1 = 0
1829 else:
1830 patch.start1 -= 1
1831 patch.length1 = int(m.group(2))
1832
1833 patch.start2 = int(m.group(3))
1834 if m.group(4) == '':
1835 patch.start2 -= 1
1836 patch.length2 = 1
1837 elif m.group(4) == '0':
1838 patch.length2 = 0
1839 else:
1840 patch.start2 -= 1
1841 patch.length2 = int(m.group(4))
1842
1843 del text[0]
1844
1845 while len(text) != 0:
1846 if text[0]:
1847 sign = text[0][0]
1848 else:
1849 sign = ''
1850 line = urllib.unquote(text[0][1:])
1851 line = line.decode("utf-8")
1852 if sign == '+':
1853 # Insertion.
1854 patch.diffs.append((self.DIFF_INSERT, line))
1855 elif sign == '-':
1856 # Deletion.
1857 patch.diffs.append((self.DIFF_DELETE, line))
1858 elif sign == ' ':
1859 # Minor equality.
1860 patch.diffs.append((self.DIFF_EQUAL, line))
1861 elif sign == '@':
1862 # Start of next patch.
1863 break
1864 elif sign == '':
1865 # Blank line? Whatever.
1866 pass
1867 else:
1868 # WTF?
1869 raise ValueError("Invalid patch mode: '%s'\n%s" % (sign, line))
1870 del text[0]
1871 return patches
1872
1873
1874 class patch_obj:
1875 """Class representing one patch operation.
1876 """
1877
1878 def __init__(self):
1879 """Initializes with an empty list of diffs.
1880 """
1881 self.diffs = []
1882 self.start1 = None
1883 self.start2 = None
1884 self.length1 = 0
1885 self.length2 = 0
1886
1887 def __str__(self):
1888 """Emmulate GNU diff's format.
1889 Header: @@ -382,8 +481,9 @@
1890 Indicies are printed as 1-based, not 0-based.
1891
1892 Returns:
1893 The GNU diff string.
1894 """
1895 if self.length1 == 0:
1896 coords1 = str(self.start1) + ",0"
1897 elif self.length1 == 1:
1898 coords1 = str(self.start1 + 1)
1899 else:
1900 coords1 = str(self.start1 + 1) + "," + str(self.length1)
1901 if self.length2 == 0:
1902 coords2 = str(self.start2) + ",0"
1903 elif self.length2 == 1:
1904 coords2 = str(self.start2 + 1)
1905 else:
1906 coords2 = str(self.start2 + 1) + "," + str(self.length2)
1907 text = ["@@ -", coords1, " +", coords2, " @@\n"]
1908 # Escape the body of the patch with %xx notation.
1909 for (op, data) in self.diffs:
1910 if op == diff_match_patch.DIFF_INSERT:
1911 text.append("+")
1912 elif op == diff_match_patch.DIFF_DELETE:
1913 text.append("-")
1914 elif op == diff_match_patch.DIFF_EQUAL:
1915 text.append(" ")
1916 # High ascii will raise UnicodeDecodeError. Use Unicode instead.
1917 data = data.encode("utf-8")
1918 text.append(urllib.quote(data, "!~*'();/?:@&=+$,# ") + "\n")
1919 return "".join(text) No newline at end of file
@@ -0,0 +1,398 b''
1 <%def name="diff_line_anchor(filename, line, type)"><%
2 return '%s_%s_%i' % (h.safeid(filename), type, line)
3 %></%def>
4
5 <%def name="action_class(action)"><%
6 return {
7 '-': 'cb-deletion',
8 '+': 'cb-addition',
9 ' ': 'cb-context',
10 }.get(action, 'cb-empty')
11 %></%def>
12
13 <%def name="op_class(op_id)"><%
14 return {
15 DEL_FILENODE: 'deletion', # file deleted
16 BIN_FILENODE: 'warning' # binary diff hidden
17 }.get(op_id, 'addition')
18 %></%def>
19
20 <%def name="link_for(**kw)"><%
21 new_args = request.GET.mixed()
22 new_args.update(kw)
23 return h.url('', **new_args)
24 %></%def>
25
26 <%def name="render_diffset(diffset,
27
28 # collapse all file diff entries when there are more than this amount of files in the diff
29 collapse_when_files_over=20,
30
31 # collapse lines in the diff when more than this amount of lines changed in the file diff
32 lines_changed_limit=500,
33 )">
34 <%
35 # TODO: dan: move this to an argument - and set a cookie so that it is saved
36 # default option for future requests
37 diff_mode = request.GET.get('diffmode', 'sideside')
38 if diff_mode not in ('sideside', 'unified'):
39 diff_mode = 'sideside'
40
41 collapse_all = len(diffset.files) > collapse_when_files_over
42 %>
43
44 %if diff_mode == 'sideside':
45 <style>
46 .wrapper {
47 max-width: 1600px !important;
48 }
49 </style>
50 %endif
51
52 % if diffset.limited_diff:
53 <div class="alert alert-warning">
54 ${_('The requested commit is too big and content was truncated.')} <a href="${link_for(fulldiff=1)}" onclick="return confirm('${_("Showing a big diff might take some time and resources, continue?")}')">${_('Show full diff')}</a>
55 </div>
56 % endif
57
58 <div class="cs_files">
59 <div class="cs_files_title">
60 %if diffset.files:
61 <div class="pull-right">
62 <div class="btn-group">
63 <a
64 class="btn ${diff_mode == 'sideside' and 'btn-primary'} tooltip"
65 title="${_('View side by side')}"
66 href="${link_for(diffmode='sideside')}">
67 <span>${_('Side by Side')}</span>
68 </a>
69 <a
70 class="btn ${diff_mode == 'unified' and 'btn-primary'} tooltip"
71 title="${_('View unified')}" href="${link_for(diffmode='unified')}">
72 <span>${_('Unified')}</span>
73 </a>
74 </div>
75 </div>
76 <div class="pull-left">
77 <div class="btn-group">
78 <a
79 class="btn"
80 href="#"
81 onclick="$('input[class=diff-collapse-state]').prop('checked', false); return false">${_('Expand All')}</a>
82 <a
83 class="btn"
84 href="#"
85 onclick="$('input[class=diff-collapse-state]').prop('checked', true); return false">${_('Collapse All')}</a>
86 </div>
87 </div>
88 %endif
89 <h2 style="padding: 5px; text-align: center;">
90 %if diffset.limited_diff:
91 ${ungettext('%(num)s file changed', '%(num)s files changed', diffset.changed_files) % {'num': diffset.changed_files}}
92 %else:
93 ${ungettext('%(num)s file changed: %(linesadd)s inserted, ''%(linesdel)s deleted',
94 '%(num)s files changed: %(linesadd)s inserted, %(linesdel)s deleted', diffset.changed_files) % {'num': diffset.changed_files, 'linesadd': diffset.lines_added, 'linesdel': diffset.lines_deleted}}
95 %endif
96 </h2>
97 </div>
98
99 %if not diffset.files:
100 <p class="empty_data">${_('No files')}</p>
101 %endif
102
103 <div class="filediffs">
104 %for i, filediff in enumerate(diffset.files):
105 <%
106 lines_changed = filediff['patch']['stats']['added'] + filediff['patch']['stats']['deleted']
107 over_lines_changed_limit = lines_changed > lines_changed_limit
108 %>
109 <input ${collapse_all and 'checked' or ''} class="diff-collapse-state" id="diff-collapse-${i}" type="checkbox">
110 <div
111 class="diff"
112 data-f-path="${filediff['patch']['filename']}"
113 id="a_${h.FID('', filediff['patch']['filename'])}">
114 <label for="diff-collapse-${i}" class="diff-heading">
115 <div class="diff-collapse-indicator"></div>
116 ${diff_ops(filediff)}
117 </label>
118 ${diff_menu(filediff)}
119 <table class="cb cb-diff-${diff_mode} code-highlight ${over_lines_changed_limit and 'cb-collapsed' or ''}">
120 %if not filediff.hunks:
121 %for op_id, op_text in filediff['patch']['stats']['ops'].items():
122 <tr>
123 <td class="cb-text cb-${op_class(op_id)}" ${diff_mode == 'unified' and 'colspan=3' or 'colspan=4'}>
124 %if op_id == DEL_FILENODE:
125 ${_('File was deleted')}
126 %elif op_id == BIN_FILENODE:
127 ${_('Binary file hidden')}
128 %else:
129 ${op_text}
130 %endif
131 </td>
132 </tr>
133 %endfor
134 %endif
135 %if over_lines_changed_limit:
136 <tr class="cb-warning cb-collapser">
137 <td class="cb-text" ${diff_mode == 'unified' and 'colspan=3' or 'colspan=4'}>
138 ${_('This diff has been collapsed as it changes many lines, (%i lines changed)' % lines_changed)}
139 <a href="#" class="cb-expand"
140 onclick="$(this).closest('table').removeClass('cb-collapsed'); return false;">${_('Show them')}
141 </a>
142 <a href="#" class="cb-collapse"
143 onclick="$(this).closest('table').addClass('cb-collapsed'); return false;">${_('Hide them')}
144 </a>
145 </td>
146 </tr>
147 %endif
148 %if filediff.patch['is_limited_diff']:
149 <tr class="cb-warning cb-collapser">
150 <td class="cb-text" ${diff_mode == 'unified' and 'colspan=3' or 'colspan=4'}>
151 ${_('The requested commit is too big and content was truncated.')} <a href="${link_for(fulldiff=1)}" onclick="return confirm('${_("Showing a big diff might take some time and resources, continue?")}')">${_('Show full diff')}</a>
152 </td>
153 </tr>
154 %endif
155 %for hunk in filediff.hunks:
156 <tr class="cb-hunk">
157 <td ${diff_mode == 'unified' and 'colspan=2' or ''}>
158 ## TODO: dan: add ajax loading of more context here
159 ## <a href="#">
160 <i class="icon-more"></i>
161 ## </a>
162 </td>
163 <td ${diff_mode == 'sideside' and 'colspan=3' or ''}>
164 @@
165 -${hunk.source_start},${hunk.source_length}
166 +${hunk.target_start},${hunk.target_length}
167 ${hunk.section_header}
168 </td>
169 </tr>
170 %if diff_mode == 'unified':
171 ${render_hunk_lines_unified(hunk)}
172 %elif diff_mode == 'sideside':
173 ${render_hunk_lines_sideside(hunk)}
174 %else:
175 <tr class="cb-line">
176 <td>unknown diff mode</td>
177 </tr>
178 %endif
179 %endfor
180 </table>
181 </div>
182 %endfor
183 </div>
184 </div>
185 </%def>
186
187 <%def name="diff_ops(filediff)">
188 <%
189 stats = filediff['patch']['stats']
190 from rhodecode.lib.diffs import NEW_FILENODE, DEL_FILENODE, \
191 MOD_FILENODE, RENAMED_FILENODE, CHMOD_FILENODE, BIN_FILENODE
192 %>
193 <span class="diff-pill">
194 %if filediff.source_file_path and filediff.target_file_path:
195 %if filediff.source_file_path != filediff.target_file_path: # file was renamed
196 <strong>${filediff.target_file_path}</strong><del>${filediff.source_file_path}</del>
197 %else:
198 ## file was modified
199 <strong>${filediff.source_file_path}</strong>
200 %endif
201 %else:
202 %if filediff.source_file_path:
203 ## file was deleted
204 <strong>${filediff.source_file_path}</strong>
205 %else:
206 ## file was added
207 <strong>${filediff.target_file_path}</strong>
208 %endif
209 %endif
210 </span>
211 <span class="diff-pill-group" style="float: left">
212 %if filediff.patch['is_limited_diff']:
213 <span class="diff-pill tooltip" op="limited" title="The stats for this diff are not complete">limited diff</span>
214 %endif
215 %if RENAMED_FILENODE in stats['ops']:
216 <span class="diff-pill" op="renamed">renamed</span>
217 %endif
218
219 %if NEW_FILENODE in stats['ops']:
220 <span class="diff-pill" op="created">created</span>
221 %if filediff['target_mode'].startswith('120'):
222 <span class="diff-pill" op="symlink">symlink</span>
223 %else:
224 <span class="diff-pill" op="mode">${nice_mode(filediff['target_mode'])}</span>
225 %endif
226 %endif
227
228 %if DEL_FILENODE in stats['ops']:
229 <span class="diff-pill" op="removed">removed</span>
230 %endif
231
232 %if CHMOD_FILENODE in stats['ops']:
233 <span class="diff-pill" op="mode">
234 ${nice_mode(filediff['source_mode'])} ➡ ${nice_mode(filediff['target_mode'])}
235 </span>
236 %endif
237 </span>
238
239 <a class="diff-pill diff-anchor" href="#a_${h.FID('', filediff.patch['filename'])}"></a>
240
241 <span class="diff-pill-group" style="float: right">
242 %if BIN_FILENODE in stats['ops']:
243 <span class="diff-pill" op="binary">binary</span>
244 %if MOD_FILENODE in stats['ops']:
245 <span class="diff-pill" op="modified">modified</span>
246 %endif
247 %endif
248 %if stats['deleted']:
249 <span class="diff-pill" op="deleted">-${stats['deleted']}</span>
250 %endif
251 %if stats['added']:
252 <span class="diff-pill" op="added">+${stats['added']}</span>
253 %endif
254 </span>
255
256 </%def>
257
258 <%def name="nice_mode(filemode)">
259 ${filemode.startswith('100') and filemode[3:] or filemode}
260 </%def>
261
262 <%def name="diff_menu(filediff)">
263 <div class="diff-menu">
264 %if filediff.diffset.source_ref:
265 %if filediff.patch['operation'] in ['D', 'M']:
266 <a
267 class="tooltip"
268 href="${h.url('files_home',repo_name=c.repo_name,f_path=filediff.source_file_path,revision=filediff.diffset.source_ref)}"
269 title="${h.tooltip(_('Show file at commit: %(commit_id)s') % {'commit_id': filediff.diffset.source_ref[:12]})}"
270 >
271 ${_('Show file before')}
272 </a>
273 %else:
274 <a
275 disabled
276 class="tooltip"
277 title="${h.tooltip(_('File no longer present at commit: %(commit_id)s') % {'commit_id': filediff.diffset.source_ref[:12]})}"
278 >
279 ${_('Show file before')}
280 </a>
281 %endif
282 %if filediff.patch['operation'] in ['A', 'M']:
283 <a
284 class="tooltip"
285 href="${h.url('files_home',repo_name=c.repo_name,f_path=filediff.target_file_path,revision=filediff.diffset.target_ref)}"
286 title="${h.tooltip(_('Show file at commit: %(commit_id)s') % {'commit_id': filediff.diffset.target_ref[:12]})}"
287 >
288 ${_('Show file after')}
289 </a>
290 %else:
291 <a
292 disabled
293 class="tooltip"
294 title="${h.tooltip(_('File no longer present at commit: %(commit_id)s') % {'commit_id': filediff.diffset.target_ref[:12]})}"
295 >
296 ${_('Show file after')}
297 </a>
298 %endif
299 <a
300 class="tooltip"
301 title="${h.tooltip(_('Raw diff'))}"
302 href="${h.url('files_diff_home',repo_name=c.repo_name,f_path=filediff.target_file_path,diff2=filediff.diffset.target_ref,diff1=filediff.diffset.source_ref,diff='raw')}"
303 >
304 ${_('Raw diff')}
305 </a>
306 <a
307 class="tooltip"
308 title="${h.tooltip(_('Download diff'))}"
309 href="${h.url('files_diff_home',repo_name=c.repo_name,f_path=filediff.target_file_path,diff2=filediff.diffset.target_ref,diff1=filediff.diffset.source_ref,diff='download')}"
310 >
311 ${_('Download diff')}
312 </a>
313 %endif
314 </div>
315 </%def>
316
317
318 <%def name="render_hunk_lines_sideside(hunk)">
319 %for i, line in enumerate(hunk.sideside):
320 <%
321 old_line_anchor, new_line_anchor = None, None
322 if line.original.lineno:
323 old_line_anchor = diff_line_anchor(hunk.filediff.source_file_path, line.original.lineno, 'o')
324 if line.modified.lineno:
325 new_line_anchor = diff_line_anchor(hunk.filediff.target_file_path, line.modified.lineno, 'n')
326 %>
327 <tr class="cb-line">
328 <td class="cb-lineno ${action_class(line.original.action)}"
329 data-line-number="${line.original.lineno}"
330 %if old_line_anchor:
331 id="${old_line_anchor}"
332 %endif
333 >
334 %if line.original.lineno:
335 <a name="${old_line_anchor}" href="#${old_line_anchor}">${line.original.lineno}</a>
336 %endif
337 </td>
338 <td class="cb-content ${action_class(line.original.action)}"
339 data-line-number="o${line.original.lineno}"
340 ><span class="cb-code">${line.original.action} ${line.original.content or '' | n}</span>
341 </td>
342 <td class="cb-lineno ${action_class(line.modified.action)}"
343 data-line-number="${line.modified.lineno}"
344 %if new_line_anchor:
345 id="${new_line_anchor}"
346 %endif
347 >
348 %if line.modified.lineno:
349 <a name="${new_line_anchor}" href="#${new_line_anchor}">${line.modified.lineno}</a>
350 %endif
351 </td>
352 <td class="cb-content ${action_class(line.modified.action)}"
353 data-line-number="n${line.modified.lineno}"
354 >
355 <span class="cb-code">${line.modified.action} ${line.modified.content or '' | n}</span>
356 </td>
357 </tr>
358 %endfor
359 </%def>
360
361
362 <%def name="render_hunk_lines_unified(hunk)">
363 %for old_line_no, new_line_no, action, content in hunk.unified:
364 <%
365 old_line_anchor, new_line_anchor = None, None
366 if old_line_no:
367 old_line_anchor = diff_line_anchor(hunk.filediff.source_file_path, old_line_no, 'o')
368 if new_line_no:
369 new_line_anchor = diff_line_anchor(hunk.filediff.target_file_path, new_line_no, 'n')
370 %>
371 <tr class="cb-line">
372 <td class="cb-lineno ${action_class(action)}"
373 data-line-number="${old_line_no}"
374 %if old_line_anchor:
375 id="${old_line_anchor}"
376 %endif
377 >
378 %if old_line_anchor:
379 <a name="${old_line_anchor}" href="#${old_line_anchor}">${old_line_no}</a>
380 %endif
381 </td>
382 <td class="cb-lineno ${action_class(action)}"
383 data-line-number="${new_line_no}"
384 %if new_line_anchor:
385 id="${new_line_anchor}"
386 %endif
387 >
388 %if new_line_anchor:
389 <a name="${new_line_anchor}" href="#${new_line_anchor}">${new_line_no}</a>
390 %endif
391 </td>
392 <td class="cb-content ${action_class(action)}"
393 data-line-number="${new_line_no and 'n' or 'o'}${new_line_no or old_line_no}"
394 ><span class="cb-code">${action} ${content or '' | n}</span>
395 </td>
396 </tr>
397 %endfor
398 </%def>
@@ -14,6 +14,8 b' permission notice:'
14 file:licenses/tornado_license.txt
14 file:licenses/tornado_license.txt
15 Copyright (c) 2015 - pygments-markdown-lexer
15 Copyright (c) 2015 - pygments-markdown-lexer
16 file:licenses/pygments_markdown_lexer_license.txt
16 file:licenses/pygments_markdown_lexer_license.txt
17 Copyright 2006 - diff_match_patch
18 file:licenses/diff_match_patch_license.txt
17
19
18 All licensed under the Apache License, Version 2.0 (the "License");
20 All licensed under the Apache License, Version 2.0 (the "License");
19 you may not use this file except in compliance with the License.
21 you may not use this file except in compliance with the License.
@@ -31,13 +31,14 b' from pylons.i18n.translation import _'
31
31
32 from rhodecode.controllers.utils import parse_path_ref, get_commit_from_ref_name
32 from rhodecode.controllers.utils import parse_path_ref, get_commit_from_ref_name
33 from rhodecode.lib import helpers as h
33 from rhodecode.lib import helpers as h
34 from rhodecode.lib import diffs
34 from rhodecode.lib import diffs, codeblocks
35 from rhodecode.lib.auth import LoginRequired, HasRepoPermissionAnyDecorator
35 from rhodecode.lib.auth import LoginRequired, HasRepoPermissionAnyDecorator
36 from rhodecode.lib.base import BaseRepoController, render
36 from rhodecode.lib.base import BaseRepoController, render
37 from rhodecode.lib.utils import safe_str
37 from rhodecode.lib.utils import safe_str
38 from rhodecode.lib.utils2 import safe_unicode, str2bool
38 from rhodecode.lib.utils2 import safe_unicode, str2bool
39 from rhodecode.lib.vcs.exceptions import (
39 from rhodecode.lib.vcs.exceptions import (
40 EmptyRepositoryError, RepositoryError, RepositoryRequirementError)
40 EmptyRepositoryError, RepositoryError, RepositoryRequirementError,
41 NodeDoesNotExistError)
41 from rhodecode.model.db import Repository, ChangesetStatus
42 from rhodecode.model.db import Repository, ChangesetStatus
42
43
43 log = logging.getLogger(__name__)
44 log = logging.getLogger(__name__)
@@ -78,7 +79,7 b' class CompareController(BaseRepoControll'
78 def index(self, repo_name):
79 def index(self, repo_name):
79 c.compare_home = True
80 c.compare_home = True
80 c.commit_ranges = []
81 c.commit_ranges = []
81 c.files = []
82 c.diffset = None
82 c.limited_diff = False
83 c.limited_diff = False
83 source_repo = c.rhodecode_db_repo.repo_name
84 source_repo = c.rhodecode_db_repo.repo_name
84 target_repo = request.GET.get('target_repo', source_repo)
85 target_repo = request.GET.get('target_repo', source_repo)
@@ -239,28 +240,23 b' class CompareController(BaseRepoControll'
239 commit1=source_commit, commit2=target_commit,
240 commit1=source_commit, commit2=target_commit,
240 path1=source_path, path=target_path)
241 path1=source_path, path=target_path)
241 diff_processor = diffs.DiffProcessor(
242 diff_processor = diffs.DiffProcessor(
242 txtdiff, format='gitdiff', diff_limit=diff_limit,
243 txtdiff, format='newdiff', diff_limit=diff_limit,
243 file_limit=file_limit, show_full_diff=c.fulldiff)
244 file_limit=file_limit, show_full_diff=c.fulldiff)
244 _parsed = diff_processor.prepare()
245 _parsed = diff_processor.prepare()
245
246
246 c.limited_diff = False
247 def _node_getter(commit):
247 if isinstance(_parsed, diffs.LimitedDiffContainer):
248 """ Returns a function that returns a node for a commit or None """
248 c.limited_diff = True
249 def get_node(fname):
250 try:
251 return commit.get_node(fname)
252 except NodeDoesNotExistError:
253 return None
254 return get_node
249
255
250 c.files = []
256 c.diffset = codeblocks.DiffSet(
251 c.changes = {}
257 source_node_getter=_node_getter(source_commit),
252 c.lines_added = 0
258 target_node_getter=_node_getter(target_commit),
253 c.lines_deleted = 0
259 ).render_patchset(_parsed, source_ref, target_ref)
254 for f in _parsed:
255 st = f['stats']
256 if not st['binary']:
257 c.lines_added += st['added']
258 c.lines_deleted += st['deleted']
259 fid = h.FID('', f['filename'])
260 c.files.append([fid, f['operation'], f['filename'], f['stats'], f])
261 htmldiff = diff_processor.as_html(
262 enable_comments=False, parsed_lines=[f])
263 c.changes[fid] = [f['operation'], f['filename'], htmldiff, f]
264
260
265 c.preview_mode = merge
261 c.preview_mode = merge
266
262
@@ -19,13 +19,17 b''
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20
20
21 import logging
21 import logging
22 import difflib
22 from itertools import groupby
23 from itertools import groupby
23
24
24 from pygments import lex
25 from pygments import lex
25 from pygments.formatters.html import _get_ttype_class as pygment_token_class
26 from pygments.formatters.html import _get_ttype_class as pygment_token_class
26 from rhodecode.lib.helpers import get_lexer_for_filenode, html_escape
27 from rhodecode.lib.helpers import (
28 get_lexer_for_filenode, get_lexer_safe, html_escape)
27 from rhodecode.lib.utils2 import AttributeDict
29 from rhodecode.lib.utils2 import AttributeDict
28 from rhodecode.lib.vcs.nodes import FileNode
30 from rhodecode.lib.vcs.nodes import FileNode
31 from rhodecode.lib.diff_match_patch import diff_match_patch
32 from rhodecode.lib.diffs import LimitedDiffContainer
29 from pygments.lexers import get_lexer_by_name
33 from pygments.lexers import get_lexer_by_name
30
34
31 plain_text_lexer = get_lexer_by_name(
35 plain_text_lexer = get_lexer_by_name(
@@ -38,7 +42,7 b' log = logging.getLogger()'
38 def filenode_as_lines_tokens(filenode, lexer=None):
42 def filenode_as_lines_tokens(filenode, lexer=None):
39 lexer = lexer or get_lexer_for_filenode(filenode)
43 lexer = lexer or get_lexer_for_filenode(filenode)
40 log.debug('Generating file node pygment tokens for %s, %s', lexer, filenode)
44 log.debug('Generating file node pygment tokens for %s, %s', lexer, filenode)
41 tokens = tokenize_string(filenode.content, get_lexer_for_filenode(filenode))
45 tokens = tokenize_string(filenode.content, lexer)
42 lines = split_token_stream(tokens, split_string='\n')
46 lines = split_token_stream(tokens, split_string='\n')
43 rv = list(lines)
47 rv = list(lines)
44 return rv
48 return rv
@@ -146,7 +150,11 b' def render_tokenstream(tokenstream):'
146 result.append(u'<%s>' % op_tag)
150 result.append(u'<%s>' % op_tag)
147
151
148 escaped_text = html_escape(token_text)
152 escaped_text = html_escape(token_text)
149 escaped_text = escaped_text.replace('\n', '<nl>\n</nl>')
153
154 # TODO: dan: investigate showing hidden characters like space/nl/tab
155 # escaped_text = escaped_text.replace(' ', '<sp> </sp>')
156 # escaped_text = escaped_text.replace('\n', '<nl>\n</nl>')
157 # escaped_text = escaped_text.replace('\t', '<tab>\t</tab>')
150
158
151 result.append(escaped_text)
159 result.append(escaped_text)
152
160
@@ -212,3 +220,416 b' def rollup_tokenstream(tokenstream):'
212 ops.append((token_op, ''.join(text_buffer)))
220 ops.append((token_op, ''.join(text_buffer)))
213 result.append((token_class, ops))
221 result.append((token_class, ops))
214 return result
222 return result
223
224
225 def tokens_diff(old_tokens, new_tokens, use_diff_match_patch=True):
226 """
227 Converts a list of (token_class, token_text) tuples to a list of
228 (token_class, token_op, token_text) tuples where token_op is one of
229 ('ins', 'del', '')
230
231 :param old_tokens: list of (token_class, token_text) tuples of old line
232 :param new_tokens: list of (token_class, token_text) tuples of new line
233 :param use_diff_match_patch: boolean, will use google's diff match patch
234 library which has options to 'smooth' out the character by character
235 differences making nicer ins/del blocks
236 """
237
238 old_tokens_result = []
239 new_tokens_result = []
240
241 similarity = difflib.SequenceMatcher(None,
242 ''.join(token_text for token_class, token_text in old_tokens),
243 ''.join(token_text for token_class, token_text in new_tokens)
244 ).ratio()
245
246 if similarity < 0.6: # return, the blocks are too different
247 for token_class, token_text in old_tokens:
248 old_tokens_result.append((token_class, '', token_text))
249 for token_class, token_text in new_tokens:
250 new_tokens_result.append((token_class, '', token_text))
251 return old_tokens_result, new_tokens_result, similarity
252
253 token_sequence_matcher = difflib.SequenceMatcher(None,
254 [x[1] for x in old_tokens],
255 [x[1] for x in new_tokens])
256
257 for tag, o1, o2, n1, n2 in token_sequence_matcher.get_opcodes():
258 # check the differences by token block types first to give a more
259 # nicer "block" level replacement vs character diffs
260
261 if tag == 'equal':
262 for token_class, token_text in old_tokens[o1:o2]:
263 old_tokens_result.append((token_class, '', token_text))
264 for token_class, token_text in new_tokens[n1:n2]:
265 new_tokens_result.append((token_class, '', token_text))
266 elif tag == 'delete':
267 for token_class, token_text in old_tokens[o1:o2]:
268 old_tokens_result.append((token_class, 'del', token_text))
269 elif tag == 'insert':
270 for token_class, token_text in new_tokens[n1:n2]:
271 new_tokens_result.append((token_class, 'ins', token_text))
272 elif tag == 'replace':
273 # if same type token blocks must be replaced, do a diff on the
274 # characters in the token blocks to show individual changes
275
276 old_char_tokens = []
277 new_char_tokens = []
278 for token_class, token_text in old_tokens[o1:o2]:
279 for char in token_text:
280 old_char_tokens.append((token_class, char))
281
282 for token_class, token_text in new_tokens[n1:n2]:
283 for char in token_text:
284 new_char_tokens.append((token_class, char))
285
286 old_string = ''.join([token_text for
287 token_class, token_text in old_char_tokens])
288 new_string = ''.join([token_text for
289 token_class, token_text in new_char_tokens])
290
291 char_sequence = difflib.SequenceMatcher(
292 None, old_string, new_string)
293 copcodes = char_sequence.get_opcodes()
294 obuffer, nbuffer = [], []
295
296 if use_diff_match_patch:
297 dmp = diff_match_patch()
298 dmp.Diff_EditCost = 11 # TODO: dan: extract this to a setting
299 reps = dmp.diff_main(old_string, new_string)
300 dmp.diff_cleanupEfficiency(reps)
301
302 a, b = 0, 0
303 for op, rep in reps:
304 l = len(rep)
305 if op == 0:
306 for i, c in enumerate(rep):
307 obuffer.append((old_char_tokens[a+i][0], '', c))
308 nbuffer.append((new_char_tokens[b+i][0], '', c))
309 a += l
310 b += l
311 elif op == -1:
312 for i, c in enumerate(rep):
313 obuffer.append((old_char_tokens[a+i][0], 'del', c))
314 a += l
315 elif op == 1:
316 for i, c in enumerate(rep):
317 nbuffer.append((new_char_tokens[b+i][0], 'ins', c))
318 b += l
319 else:
320 for ctag, co1, co2, cn1, cn2 in copcodes:
321 if ctag == 'equal':
322 for token_class, token_text in old_char_tokens[co1:co2]:
323 obuffer.append((token_class, '', token_text))
324 for token_class, token_text in new_char_tokens[cn1:cn2]:
325 nbuffer.append((token_class, '', token_text))
326 elif ctag == 'delete':
327 for token_class, token_text in old_char_tokens[co1:co2]:
328 obuffer.append((token_class, 'del', token_text))
329 elif ctag == 'insert':
330 for token_class, token_text in new_char_tokens[cn1:cn2]:
331 nbuffer.append((token_class, 'ins', token_text))
332 elif ctag == 'replace':
333 for token_class, token_text in old_char_tokens[co1:co2]:
334 obuffer.append((token_class, 'del', token_text))
335 for token_class, token_text in new_char_tokens[cn1:cn2]:
336 nbuffer.append((token_class, 'ins', token_text))
337
338 old_tokens_result.extend(obuffer)
339 new_tokens_result.extend(nbuffer)
340
341 return old_tokens_result, new_tokens_result, similarity
342
343
344 class DiffSet(object):
345 """
346 An object for parsing the diff result from diffs.DiffProcessor and
347 adding highlighting, side by side/unified renderings and line diffs
348 """
349
350 HL_REAL = 'REAL' # highlights using original file, slow
351 HL_FAST = 'FAST' # highlights using just the line, fast but not correct
352 # in the case of multiline code
353 HL_NONE = 'NONE' # no highlighting, fastest
354
355 def __init__(self, highlight_mode=HL_REAL,
356 source_node_getter=lambda filename: None,
357 target_node_getter=lambda filename: None,
358 source_nodes=None, target_nodes=None,
359 max_file_size_limit=150 * 1024, # files over this size will
360 # use fast highlighting
361 ):
362
363 self.highlight_mode = highlight_mode
364 self.highlighted_filenodes = {}
365 self.source_node_getter = source_node_getter
366 self.target_node_getter = target_node_getter
367 self.source_nodes = source_nodes or {}
368 self.target_nodes = target_nodes or {}
369
370
371 self.max_file_size_limit = max_file_size_limit
372
373 def render_patchset(self, patchset, source_ref=None, target_ref=None):
374 diffset = AttributeDict(dict(
375 lines_added=0,
376 lines_deleted=0,
377 changed_files=0,
378 files=[],
379 limited_diff=isinstance(patchset, LimitedDiffContainer),
380 source_ref=source_ref,
381 target_ref=target_ref,
382 ))
383 for patch in patchset:
384 filediff = self.render_patch(patch)
385 filediff.diffset = diffset
386 diffset.files.append(filediff)
387 diffset.changed_files += 1
388 if not patch['stats']['binary']:
389 diffset.lines_added += patch['stats']['added']
390 diffset.lines_deleted += patch['stats']['deleted']
391
392 return diffset
393
394 _lexer_cache = {}
395 def _get_lexer_for_filename(self, filename):
396 # cached because we might need to call it twice for source/target
397 if filename not in self._lexer_cache:
398 self._lexer_cache[filename] = get_lexer_safe(filepath=filename)
399 return self._lexer_cache[filename]
400
401 def render_patch(self, patch):
402 log.debug('rendering diff for %r' % patch['filename'])
403
404 source_filename = patch['original_filename']
405 target_filename = patch['filename']
406
407 source_lexer = plain_text_lexer
408 target_lexer = plain_text_lexer
409
410 if not patch['stats']['binary']:
411 if self.highlight_mode == self.HL_REAL:
412 if (source_filename and patch['operation'] in ('D', 'M')
413 and source_filename not in self.source_nodes):
414 self.source_nodes[source_filename] = (
415 self.source_node_getter(source_filename))
416
417 if (target_filename and patch['operation'] in ('A', 'M')
418 and target_filename not in self.target_nodes):
419 self.target_nodes[target_filename] = (
420 self.target_node_getter(target_filename))
421
422 elif self.highlight_mode == self.HL_FAST:
423 source_lexer = self._get_lexer_for_filename(source_filename)
424 target_lexer = self._get_lexer_for_filename(target_filename)
425
426 source_file = self.source_nodes.get(source_filename, source_filename)
427 target_file = self.target_nodes.get(target_filename, target_filename)
428
429 source_filenode, target_filenode = None, None
430
431 # TODO: dan: FileNode.lexer works on the content of the file - which
432 # can be slow - issue #4289 explains a lexer clean up - which once
433 # done can allow caching a lexer for a filenode to avoid the file lookup
434 if isinstance(source_file, FileNode):
435 source_filenode = source_file
436 source_lexer = source_file.lexer
437 if isinstance(target_file, FileNode):
438 target_filenode = target_file
439 target_lexer = target_file.lexer
440
441 source_file_path, target_file_path = None, None
442
443 if source_filename != '/dev/null':
444 source_file_path = source_filename
445 if target_filename != '/dev/null':
446 target_file_path = target_filename
447
448 source_file_type = source_lexer.name
449 target_file_type = target_lexer.name
450
451 op_hunks = patch['chunks'][0]
452 hunks = patch['chunks'][1:]
453
454 filediff = AttributeDict({
455 'source_file_path': source_file_path,
456 'target_file_path': target_file_path,
457 'source_filenode': source_filenode,
458 'target_filenode': target_filenode,
459 'hunks': [],
460 'source_file_type': target_file_type,
461 'target_file_type': source_file_type,
462 'patch': patch,
463 'source_mode': patch['stats']['old_mode'],
464 'target_mode': patch['stats']['new_mode'],
465 'limited_diff': isinstance(patch, LimitedDiffContainer),
466 'diffset': self,
467 })
468
469 for hunk in hunks:
470 hunkbit = self.parse_hunk(hunk, source_file, target_file)
471 hunkbit.filediff = filediff
472 filediff.hunks.append(hunkbit)
473 return filediff
474
475 def parse_hunk(self, hunk, source_file, target_file):
476 result = AttributeDict(dict(
477 source_start=hunk['source_start'],
478 source_length=hunk['source_length'],
479 target_start=hunk['target_start'],
480 target_length=hunk['target_length'],
481 section_header=hunk['section_header'],
482 lines=[],
483 ))
484 before, after = [], []
485
486 for line in hunk['lines']:
487 if line['action'] == 'unmod':
488 result.lines.extend(
489 self.parse_lines(before, after, source_file, target_file))
490 after.append(line)
491 before.append(line)
492 elif line['action'] == 'add':
493 after.append(line)
494 elif line['action'] == 'del':
495 before.append(line)
496 elif line['action'] == 'context-old':
497 before.append(line)
498 elif line['action'] == 'context-new':
499 after.append(line)
500
501 result.lines.extend(
502 self.parse_lines(before, after, source_file, target_file))
503 result.unified = self.as_unified(result.lines)
504 result.sideside = result.lines
505 return result
506
507 def parse_lines(self, before_lines, after_lines, source_file, target_file):
508 # TODO: dan: investigate doing the diff comparison and fast highlighting
509 # on the entire before and after buffered block lines rather than by
510 # line, this means we can get better 'fast' highlighting if the context
511 # allows it - eg.
512 # line 4: """
513 # line 5: this gets highlighted as a string
514 # line 6: """
515
516 lines = []
517 while before_lines or after_lines:
518 before, after = None, None
519 before_tokens, after_tokens = None, None
520
521 if before_lines:
522 before = before_lines.pop(0)
523 if after_lines:
524 after = after_lines.pop(0)
525
526 original = AttributeDict()
527 modified = AttributeDict()
528
529 if before:
530 before_tokens = self.get_line_tokens(
531 line_text=before['line'], line_number=before['old_lineno'],
532 file=source_file)
533 original.lineno = before['old_lineno']
534 original.content = before['line']
535 original.action = self.action_to_op(before['action'])
536
537 if after:
538 after_tokens = self.get_line_tokens(
539 line_text=after['line'], line_number=after['new_lineno'],
540 file=target_file)
541 modified.lineno = after['new_lineno']
542 modified.content = after['line']
543 modified.action = self.action_to_op(after['action'])
544
545
546 # diff the lines
547 if before_tokens and after_tokens:
548 o_tokens, m_tokens, similarity = tokens_diff(before_tokens, after_tokens)
549 original.content = render_tokenstream(o_tokens)
550 modified.content = render_tokenstream(m_tokens)
551 elif before_tokens:
552 original.content = render_tokenstream(
553 [(x[0], '', x[1]) for x in before_tokens])
554 elif after_tokens:
555 modified.content = render_tokenstream(
556 [(x[0], '', x[1]) for x in after_tokens])
557
558 lines.append(AttributeDict({
559 'original': original,
560 'modified': modified,
561 }))
562
563 return lines
564
565 def get_line_tokens(self, line_text, line_number, file=None):
566 filenode = None
567 filename = None
568
569 if isinstance(file, basestring):
570 filename = file
571 elif isinstance(file, FileNode):
572 filenode = file
573 filename = file.unicode_path
574
575 if self.highlight_mode == self.HL_REAL and filenode:
576 if line_number and file.size < self.max_file_size_limit:
577 return self.get_tokenized_filenode_line(file, line_number)
578
579 if self.highlight_mode in (self.HL_REAL, self.HL_FAST) and filename:
580 lexer = self._get_lexer_for_filename(filename)
581 return list(tokenize_string(line_text, lexer))
582
583 return list(tokenize_string(line_text, plain_text_lexer))
584
585 def get_tokenized_filenode_line(self, filenode, line_number):
586
587 if filenode not in self.highlighted_filenodes:
588 tokenized_lines = filenode_as_lines_tokens(filenode, filenode.lexer)
589 self.highlighted_filenodes[filenode] = tokenized_lines
590 return self.highlighted_filenodes[filenode][line_number - 1]
591
592 def action_to_op(self, action):
593 return {
594 'add': '+',
595 'del': '-',
596 'unmod': ' ',
597 'context-old': ' ',
598 'context-new': ' ',
599 }.get(action, action)
600
601 def as_unified(self, lines):
602 """ Return a generator that yields the lines of a diff in unified order """
603 def generator():
604 buf = []
605 for line in lines:
606
607 if buf and not line.original or line.original.action == ' ':
608 for b in buf:
609 yield b
610 buf = []
611
612 if line.original:
613 if line.original.action == ' ':
614 yield (line.original.lineno, line.modified.lineno,
615 line.original.action, line.original.content)
616 continue
617
618 if line.original.action == '-':
619 yield (line.original.lineno, None,
620 line.original.action, line.original.content)
621
622 if line.modified.action == '+':
623 buf.append((
624 None, line.modified.lineno,
625 line.modified.action, line.modified.content))
626 continue
627
628 if line.modified:
629 yield (None, line.modified.lineno,
630 line.modified.action, line.modified.content)
631
632 for b in buf:
633 yield b
634
635 return generator()
@@ -180,6 +180,8 b' class Action(object):'
180 UNMODIFIED = 'unmod'
180 UNMODIFIED = 'unmod'
181
181
182 CONTEXT = 'context'
182 CONTEXT = 'context'
183 CONTEXT_OLD = 'context-old'
184 CONTEXT_NEW = 'context-new'
183
185
184
186
185 class DiffProcessor(object):
187 class DiffProcessor(object):
@@ -227,7 +229,7 b' class DiffProcessor(object):'
227 self._parser = self._parse_gitdiff
229 self._parser = self._parse_gitdiff
228 else:
230 else:
229 self.differ = self._highlight_line_udiff
231 self.differ = self._highlight_line_udiff
230 self._parser = self._parse_udiff
232 self._parser = self._new_parse_gitdiff
231
233
232 def _copy_iterator(self):
234 def _copy_iterator(self):
233 """
235 """
@@ -491,9 +493,181 b' class DiffProcessor(object):'
491
493
492 return diff_container(sorted(_files, key=sorter))
494 return diff_container(sorted(_files, key=sorter))
493
495
494 def _parse_udiff(self, inline_diff=True):
496
495 raise NotImplementedError()
497 # FIXME: NEWDIFFS: dan: this replaces the old _escaper function
498 def _process_line(self, string):
499 """
500 Process a diff line, checks the diff limit
501
502 :param string:
503 """
504
505 self.cur_diff_size += len(string)
506
507 if not self.show_full_diff and (self.cur_diff_size > self.diff_limit):
508 raise DiffLimitExceeded('Diff Limit Exceeded')
509
510 return safe_unicode(string)
511
512 # FIXME: NEWDIFFS: dan: this replaces _parse_gitdiff
513 def _new_parse_gitdiff(self, inline_diff=True):
514 _files = []
515 diff_container = lambda arg: arg
516 for chunk in self._diff.chunks():
517 head = chunk.header
518 log.debug('parsing diff %r' % head)
519
520 diff = imap(self._process_line, chunk.diff.splitlines(1))
521 raw_diff = chunk.raw
522 limited_diff = False
523 exceeds_limit = False
524 # if 'empty_file_to_modify_and_rename' in head['a_path']:
525 # 1/0
526 op = None
527 stats = {
528 'added': 0,
529 'deleted': 0,
530 'binary': False,
531 'old_mode': None,
532 'new_mode': None,
533 'ops': {},
534 }
535 if head['old_mode']:
536 stats['old_mode'] = head['old_mode']
537 if head['new_mode']:
538 stats['new_mode'] = head['new_mode']
539 if head['b_mode']:
540 stats['new_mode'] = head['b_mode']
541
542 if head['deleted_file_mode']:
543 op = OPS.DEL
544 stats['binary'] = True
545 stats['ops'][DEL_FILENODE] = 'deleted file'
546
547 elif head['new_file_mode']:
548 op = OPS.ADD
549 stats['binary'] = True
550 stats['old_mode'] = None
551 stats['new_mode'] = head['new_file_mode']
552 stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
553 else: # modify operation, can be copy, rename or chmod
554
555 # CHMOD
556 if head['new_mode'] and head['old_mode']:
557 op = OPS.MOD
558 stats['binary'] = True
559 stats['ops'][CHMOD_FILENODE] = (
560 'modified file chmod %s => %s' % (
561 head['old_mode'], head['new_mode']))
562
563 # RENAME
564 if head['rename_from'] != head['rename_to']:
565 op = OPS.MOD
566 stats['binary'] = True
567 stats['renamed'] = (head['rename_from'], head['rename_to'])
568 stats['ops'][RENAMED_FILENODE] = (
569 'file renamed from %s to %s' % (
570 head['rename_from'], head['rename_to']))
571 # COPY
572 if head.get('copy_from') and head.get('copy_to'):
573 op = OPS.MOD
574 stats['binary'] = True
575 stats['copied'] = (head['copy_from'], head['copy_to'])
576 stats['ops'][COPIED_FILENODE] = (
577 'file copied from %s to %s' % (
578 head['copy_from'], head['copy_to']))
496
579
580 # If our new parsed headers didn't match anything fallback to
581 # old style detection
582 if op is None:
583 if not head['a_file'] and head['b_file']:
584 op = OPS.ADD
585 stats['binary'] = True
586 stats['new_file'] = True
587 stats['ops'][NEW_FILENODE] = 'new file'
588
589 elif head['a_file'] and not head['b_file']:
590 op = OPS.DEL
591 stats['binary'] = True
592 stats['ops'][DEL_FILENODE] = 'deleted file'
593
594 # it's not ADD not DELETE
595 if op is None:
596 op = OPS.MOD
597 stats['binary'] = True
598 stats['ops'][MOD_FILENODE] = 'modified file'
599
600 # a real non-binary diff
601 if head['a_file'] or head['b_file']:
602 try:
603 raw_diff, chunks, _stats = self._new_parse_lines(diff)
604 stats['binary'] = False
605 stats['added'] = _stats[0]
606 stats['deleted'] = _stats[1]
607 # explicit mark that it's a modified file
608 if op == OPS.MOD:
609 stats['ops'][MOD_FILENODE] = 'modified file'
610 exceeds_limit = len(raw_diff) > self.file_limit
611
612 # changed from _escaper function so we validate size of
613 # each file instead of the whole diff
614 # diff will hide big files but still show small ones
615 # from my tests, big files are fairly safe to be parsed
616 # but the browser is the bottleneck
617 if not self.show_full_diff and exceeds_limit:
618 raise DiffLimitExceeded('File Limit Exceeded')
619
620 except DiffLimitExceeded:
621 diff_container = lambda _diff: \
622 LimitedDiffContainer(
623 self.diff_limit, self.cur_diff_size, _diff)
624
625 exceeds_limit = len(raw_diff) > self.file_limit
626 limited_diff = True
627 chunks = []
628
629 else: # GIT format binary patch, or possibly empty diff
630 if head['bin_patch']:
631 # we have operation already extracted, but we mark simply
632 # it's a diff we wont show for binary files
633 stats['ops'][BIN_FILENODE] = 'binary diff hidden'
634 chunks = []
635
636 if chunks and not self.show_full_diff and op == OPS.DEL:
637 # if not full diff mode show deleted file contents
638 # TODO: anderson: if the view is not too big, there is no way
639 # to see the content of the file
640 chunks = []
641
642 chunks.insert(0, [{
643 'old_lineno': '',
644 'new_lineno': '',
645 'action': Action.CONTEXT,
646 'line': msg,
647 } for _op, msg in stats['ops'].iteritems()
648 if _op not in [MOD_FILENODE]])
649
650 original_filename = safe_unicode(head['a_path'])
651 _files.append({
652 'original_filename': original_filename,
653 'filename': safe_unicode(head['b_path']),
654 'old_revision': head['a_blob_id'],
655 'new_revision': head['b_blob_id'],
656 'chunks': chunks,
657 'raw_diff': safe_unicode(raw_diff),
658 'operation': op,
659 'stats': stats,
660 'exceeds_limit': exceeds_limit,
661 'is_limited_diff': limited_diff,
662 })
663
664
665 sorter = lambda info: {OPS.ADD: 0, OPS.MOD: 1,
666 OPS.DEL: 2}.get(info['operation'])
667
668 return diff_container(sorted(_files, key=sorter))
669
670 # FIXME: NEWDIFFS: dan: this gets replaced by _new_parse_lines
497 def _parse_lines(self, diff):
671 def _parse_lines(self, diff):
498 """
672 """
499 Parse the diff an return data for the template.
673 Parse the diff an return data for the template.
@@ -588,6 +762,107 b' class DiffProcessor(object):'
588 pass
762 pass
589 return ''.join(raw_diff), chunks, stats
763 return ''.join(raw_diff), chunks, stats
590
764
765 # FIXME: NEWDIFFS: dan: this replaces _parse_lines
766 def _new_parse_lines(self, diff):
767 """
768 Parse the diff an return data for the template.
769 """
770
771 lineiter = iter(diff)
772 stats = [0, 0]
773 chunks = []
774 raw_diff = []
775
776 try:
777 line = lineiter.next()
778
779 while line:
780 raw_diff.append(line)
781 match = self._chunk_re.match(line)
782
783 if not match:
784 break
785
786 gr = match.groups()
787 (old_line, old_end,
788 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
789
790 lines = []
791 hunk = {
792 'section_header': gr[-1],
793 'source_start': old_line,
794 'source_length': old_end,
795 'target_start': new_line,
796 'target_length': new_end,
797 'lines': lines,
798 }
799 chunks.append(hunk)
800
801 old_line -= 1
802 new_line -= 1
803
804 context = len(gr) == 5
805 old_end += old_line
806 new_end += new_line
807
808 line = lineiter.next()
809
810 while old_line < old_end or new_line < new_end:
811 command = ' '
812 if line:
813 command = line[0]
814
815 affects_old = affects_new = False
816
817 # ignore those if we don't expect them
818 if command in '#@':
819 continue
820 elif command == '+':
821 affects_new = True
822 action = Action.ADD
823 stats[0] += 1
824 elif command == '-':
825 affects_old = True
826 action = Action.DELETE
827 stats[1] += 1
828 else:
829 affects_old = affects_new = True
830 action = Action.UNMODIFIED
831
832 if not self._newline_marker.match(line):
833 old_line += affects_old
834 new_line += affects_new
835 lines.append({
836 'old_lineno': affects_old and old_line or '',
837 'new_lineno': affects_new and new_line or '',
838 'action': action,
839 'line': self._clean_line(line, command)
840 })
841 raw_diff.append(line)
842
843 line = lineiter.next()
844
845 if self._newline_marker.match(line):
846 # we need to append to lines, since this is not
847 # counted in the line specs of diff
848 if affects_old:
849 action = Action.CONTEXT_OLD
850 elif affects_new:
851 action = Action.CONTEXT_NEW
852 else:
853 raise Exception('invalid context for no newline')
854
855 lines.append({
856 'old_lineno': None,
857 'new_lineno': None,
858 'action': action,
859 'line': self._clean_line(line, command)
860 })
861
862 except StopIteration:
863 pass
864 return ''.join(raw_diff), chunks, stats
865
591 def _safe_id(self, idstring):
866 def _safe_id(self, idstring):
592 """Make a string safe for including in an id attribute.
867 """Make a string safe for including in an id attribute.
593
868
@@ -32,11 +32,13 b' class GitDiff(base.Diff):'
32 _header_re = re.compile(r"""
32 _header_re = re.compile(r"""
33 #^diff[ ]--git
33 #^diff[ ]--git
34 [ ]"?a/(?P<a_path>.+?)"?[ ]"?b/(?P<b_path>.+?)"?\n
34 [ ]"?a/(?P<a_path>.+?)"?[ ]"?b/(?P<b_path>.+?)"?\n
35 (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%\n
36 ^rename[ ]from[ ](?P<rename_from>[^\r\n]+)\n
37 ^rename[ ]to[ ](?P<rename_to>[^\r\n]+)(?:\n|$))?
38 (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
35 (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
39 ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
36 ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
37 (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%(?:\n|$))?
38 (?:^rename[ ]from[ ](?P<rename_from>[^\r\n]+)\n
39 ^rename[ ]to[ ](?P<rename_to>[^\r\n]+)(?:\n|$))?
40 (?:^copy[ ]from[ ](?P<copy_from>[^\r\n]+)\n
41 ^copy[ ]to[ ](?P<copy_to>[^\r\n]+)(?:\n|$))?
40 (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
42 (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
41 (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
43 (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
42 (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
44 (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
@@ -188,6 +188,14 b' input[type="button"] {'
188 padding: @padding * 1.2;
188 padding: @padding * 1.2;
189 }
189 }
190
190
191 .btn-group {
192 display: inline-block;
193 .btn {
194 float: left;
195 margin: 0 0 0 -1px;
196 }
197 }
198
191 .btn-link {
199 .btn-link {
192 background: transparent;
200 background: transparent;
193 border: none;
201 border: none;
@@ -646,14 +646,210 b' pre.literal-block, .codehilite pre{'
646
646
647 @cb-line-height: 18px;
647 @cb-line-height: 18px;
648 @cb-line-code-padding: 10px;
648 @cb-line-code-padding: 10px;
649 @cb-text-padding: 5px;
649
650
651 @diff-pill-padding: 2px 7px;
652
653 input.diff-collapse-state {
654 display: none;
655
656 &:checked + .diff { /* file diff is collapsed */
657 .cb {
658 display: none
659 }
660 .diff-collapse-indicator {
661 border-width: 9px 0 9px 15.6px;
662 border-color: transparent transparent transparent #ccc;
663 }
664 .diff-menu {
665 display: none;
666 }
667 margin: -1px 0 0 0;
668 }
669
670 &+ .diff { /* file diff is expanded */
671 .diff-collapse-indicator {
672 border-width: 15.6px 9px 0 9px;
673 border-color: #ccc transparent transparent transparent;
674 }
675 .diff-menu {
676 display: block;
677 }
678 margin: 20px 0;
679 }
680 }
681 .diff {
682 border: 1px solid @grey5;
683
684 /* START OVERRIDES */
685 .code-highlight {
686 border: none; // TODO: remove this border from the global
687 // .code-highlight, it doesn't belong there
688 }
689 label {
690 margin: 0; // TODO: remove this margin definition from global label
691 // it doesn't belong there - if margin on labels
692 // are needed for a form they should be defined
693 // in the form's class
694 }
695 /* END OVERRIDES */
696
697 * {
698 box-sizing: border-box;
699 }
700 .diff-anchor {
701 visibility: hidden;
702 }
703 &:hover {
704 .diff-anchor {
705 visibility: visible;
706 }
707 }
708
709 .diff-collapse-indicator {
710 width: 0;
711 height: 0;
712 border-style: solid;
713 float: left;
714 margin: 2px 2px 0 0;
715 cursor: pointer;
716 }
717
718 .diff-heading {
719 background: @grey7;
720 cursor: pointer;
721 display: block;
722 padding: 5px 10px;
723 }
724 .diff-heading:after {
725 content: "";
726 display: table;
727 clear: both;
728 }
729 .diff-heading:hover {
730 background: #e1e9f4 !important;
731 }
732
733 .diff-menu {
734 float: right;
735 a, button {
736 padding: 5px;
737 display: block;
738 float: left
739 }
740 }
741 .diff-pill {
742 display: block;
743 float: left;
744 padding: @diff-pill-padding;
745 }
746 .diff-pill-group {
747 .diff-pill {
748 opacity: .8;
749 &:first-child {
750 border-radius: @border-radius 0 0 @border-radius;
751 }
752 &:last-child {
753 border-radius: 0 @border-radius @border-radius 0;
754 }
755 &:only-child {
756 border-radius: @border-radius;
757 }
758 }
759 }
760 .diff-pill {
761 &[op="name"] {
762 background: none;
763 color: @grey2;
764 opacity: 1;
765 color: white;
766 }
767 &[op="limited"] {
768 background: @grey2;
769 color: white;
770 }
771 &[op="binary"] {
772 background: @color7;
773 color: white;
774 }
775 &[op="modified"] {
776 background: @alert1;
777 color: white;
778 }
779 &[op="renamed"] {
780 background: @color4;
781 color: white;
782 }
783 &[op="mode"] {
784 background: @grey3;
785 color: white;
786 }
787 &[op="symlink"] {
788 background: @color8;
789 color: white;
790 }
791
792 &[op="added"] { /* added lines */
793 background: @alert1;
794 color: white;
795 }
796 &[op="deleted"] { /* deleted lines */
797 background: @alert2;
798 color: white;
799 }
800
801 &[op="created"] { /* created file */
802 background: @alert1;
803 color: white;
804 }
805 &[op="removed"] { /* deleted file */
806 background: @color5;
807 color: white;
808 }
809 }
810
811 .diff-collapse-button, .diff-expand-button {
812 cursor: pointer;
813 }
814 .diff-collapse-button {
815 display: inline;
816 }
817 .diff-expand-button {
818 display: none;
819 }
820 .diff-collapsed .diff-collapse-button {
821 display: none;
822 }
823 .diff-collapsed .diff-expand-button {
824 display: inline;
825 }
826 }
650 table.cb {
827 table.cb {
651 width: 100%;
828 width: 100%;
652 border-collapse: collapse;
829 border-collapse: collapse;
653 margin-bottom: 10px;
654
830
655 * {
831 .cb-text {
656 box-sizing: border-box;
832 padding: @cb-text-padding;
833 }
834 .cb-hunk {
835 padding: @cb-text-padding;
836 }
837 .cb-expand {
838 display: none;
839 }
840 .cb-collapse {
841 display: inline;
842 }
843 &.cb-collapsed {
844 .cb-line {
845 display: none;
846 }
847 .cb-expand {
848 display: inline;
849 }
850 .cb-collapse {
851 display: none;
852 }
657 }
853 }
658
854
659 /* intentionally general selector since .cb-line-selected must override it
855 /* intentionally general selector since .cb-line-selected must override it
@@ -663,8 +859,20 b' table.cb {'
663 .cb-line-fresh .cb-content {
859 .cb-line-fresh .cb-content {
664 background: white !important;
860 background: white !important;
665 }
861 }
862 .cb-warning {
863 background: #fff4dd;
864 }
666
865
667 tr.cb-annotate {
866 &.cb-diff-sideside {
867 td {
868 &.cb-content {
869 width: 50%;
870 }
871 }
872 }
873
874 tr {
875 &.cb-annotate {
668 border-top: 1px solid #eee;
876 border-top: 1px solid #eee;
669
877
670 &+ .cb-line {
878 &+ .cb-line {
@@ -679,6 +887,21 b' table.cb {'
679 }
887 }
680 }
888 }
681
889
890 &.cb-hunk {
891 font-family: @font-family-monospace;
892 color: rgba(0, 0, 0, 0.3);
893
894 td {
895 &:first-child {
896 background: #edf2f9;
897 }
898 &:last-child {
899 background: #f4f7fb;
900 }
901 }
902 }
903 }
904
682 td {
905 td {
683 vertical-align: top;
906 vertical-align: top;
684 padding: 0;
907 padding: 0;
@@ -686,9 +909,14 b' table.cb {'
686 &.cb-content {
909 &.cb-content {
687 font-size: 12.35px;
910 font-size: 12.35px;
688
911
912 &.cb-line-selected .cb-code {
913 background: @comment-highlight-color !important;
914 }
915
689 span.cb-code {
916 span.cb-code {
690 line-height: @cb-line-height;
917 line-height: @cb-line-height;
691 padding-left: @cb-line-code-padding;
918 padding-left: @cb-line-code-padding;
919 padding-right: @cb-line-code-padding;
692 display: block;
920 display: block;
693 white-space: pre-wrap;
921 white-space: pre-wrap;
694 font-family: @font-family-monospace;
922 font-family: @font-family-monospace;
@@ -714,14 +942,38 b' table.cb {'
714 a {
942 a {
715 display: block;
943 display: block;
716 padding-right: @cb-line-code-padding;
944 padding-right: @cb-line-code-padding;
945 padding-left: @cb-line-code-padding;
717 line-height: @cb-line-height;
946 line-height: @cb-line-height;
718 color: rgba(0, 0, 0, 0.3);
947 color: rgba(0, 0, 0, 0.3);
719 }
948 }
720 }
949 }
721
950
722 &.cb-content {
951 &.cb-empty {
723 &.cb-line-selected .cb-code {
952 background: @grey7;
724 background: @comment-highlight-color !important;
953 }
954
955 ins {
956 color: black;
957 background: #a6f3a6;
958 text-decoration: none;
959 }
960 del {
961 color: black;
962 background: #f8cbcb;
963 text-decoration: none;
964 }
965 &.cb-addition {
966 background: #ecffec;
967
968 &.blob-lineno {
969 background: #ddffdd;
970 }
971 }
972 &.cb-deletion {
973 background: #ffecec;
974
975 &.blob-lineno {
976 background: #ffdddd;
725 }
977 }
726 }
978 }
727
979
@@ -229,6 +229,24 b' function offsetScroll(element, offset){'
229 }, 100);
229 }, 100);
230 }
230 }
231
231
232 // scroll an element `percent`% from the top of page in `time` ms
233 function scrollToElement(element, percent, time) {
234 percent = (percent === undefined ? 25 : percent);
235 time = (time === undefined ? 100 : time);
236
237 var $element = $(element);
238 var elOffset = $element.offset().top;
239 var elHeight = $element.height();
240 var windowHeight = $(window).height();
241 var offset = elOffset;
242 if (elHeight < windowHeight) {
243 offset = elOffset - ((windowHeight / (100 / percent)) - (elHeight / 2));
244 }
245 setTimeout(function() {
246 $('html, body').animate({ scrollTop: offset});
247 }, time);
248 }
249
232 /**
250 /**
233 * global hooks after DOM is loaded
251 * global hooks after DOM is loaded
234 */
252 */
@@ -418,6 +436,10 b' function offsetScroll(element, offset){'
418 var result = splitDelimitedHash(location.hash);
436 var result = splitDelimitedHash(location.hash);
419 var loc = result.loc;
437 var loc = result.loc;
420 if (loc.length > 1) {
438 if (loc.length > 1) {
439
440 var highlightable_line_tds = [];
441
442 // source code line format
421 var page_highlights = loc.substring(
443 var page_highlights = loc.substring(
422 loc.indexOf('#') + 1).split('L');
444 loc.indexOf('#') + 1).split('L');
423
445
@@ -442,33 +464,27 b' function offsetScroll(element, offset){'
442 for (pos in h_lines) {
464 for (pos in h_lines) {
443 var line_td = $('td.cb-lineno#L' + h_lines[pos]);
465 var line_td = $('td.cb-lineno#L' + h_lines[pos]);
444 if (line_td.length) {
466 if (line_td.length) {
445 line_td.addClass('cb-line-selected'); // line number td
467 highlightable_line_tds.push(line_td);
446 line_td.next().addClass('cb-line-selected'); // line content
468 }
447 }
469 }
448 }
470 }
449 var first_line_td = $('td.cb-lineno#L' + h_lines[0]);
450 if (first_line_td.length) {
451 var elOffset = first_line_td.offset().top;
452 var elHeight = first_line_td.height();
453 var windowHeight = $(window).height();
454 var offset;
455
471
456 if (elHeight < windowHeight) {
472 // now check a direct id reference (diff page)
457 offset = elOffset - ((windowHeight / 4) - (elHeight / 2));
473 if ($(loc).length && $(loc).hasClass('cb-lineno')) {
474 highlightable_line_tds.push($(loc));
458 }
475 }
459 else {
476 $.each(highlightable_line_tds, function (i, $td) {
460 offset = elOffset;
477 $td.addClass('cb-line-selected'); // line number td
461 }
478 $td.next().addClass('cb-line-selected'); // line content
462 $(function() { // let browser scroll to hash first, then
463 // scroll the line to the middle of page
464 setTimeout(function() {
465 $('html, body').animate({ scrollTop: offset });
466 }, 100);
467 });
479 });
480
481 if (highlightable_line_tds.length) {
482 var $first_line_td = highlightable_line_tds[0];
483 scrollToElement($first_line_td);
468 $.Topic('/ui/plugins/code/anchor_focus').prepareOrPublish({
484 $.Topic('/ui/plugins/code/anchor_focus').prepareOrPublish({
469 lineno: first_line_td,
485 lineno: $first_line_td,
470 remainder: result.remainder});
486 remainder: result.remainder
471 }
487 });
472 }
488 }
473 }
489 }
474 }
490 }
@@ -1,5 +1,6 b''
1 ## -*- coding: utf-8 -*-
1 ## -*- coding: utf-8 -*-
2 <%inherit file="/base/base.html"/>
2 <%inherit file="/base/base.html"/>
3 <%namespace name="cbdiffs" file="/codeblocks/diffs.html"/>
3
4
4 <%def name="title()">
5 <%def name="title()">
5 %if c.compare_home:
6 %if c.compare_home:
@@ -53,7 +54,7 b''
53 <a id="btn-swap" class="btn btn-primary" href="${c.swap_url}"><i class="icon-refresh"></i> ${_('Swap')}</a>
54 <a id="btn-swap" class="btn btn-primary" href="${c.swap_url}"><i class="icon-refresh"></i> ${_('Swap')}</a>
54 %endif
55 %endif
55 <div id="compare_revs" class="btn btn-primary"><i class ="icon-loop"></i> ${_('Compare Commits')}</div>
56 <div id="compare_revs" class="btn btn-primary"><i class ="icon-loop"></i> ${_('Compare Commits')}</div>
56 %if c.files:
57 %if c.diffset and c.diffset.files:
57 <div id="compare_changeset_status_toggle" class="btn btn-primary">${_('Comment')}</div>
58 <div id="compare_changeset_status_toggle" class="btn btn-primary">${_('Comment')}</div>
58 %endif
59 %endif
59 </div>
60 </div>
@@ -248,72 +249,7 b''
248 <div id="changeset_compare_view_content">
249 <div id="changeset_compare_view_content">
249 ##CS
250 ##CS
250 <%include file="compare_commits.html"/>
251 <%include file="compare_commits.html"/>
251
252 ${cbdiffs.render_diffset(c.diffset)}
252 ## FILES
253 <div class="cs_files_title">
254 <span class="cs_files_expand">
255 <span id="expand_all_files">${_('Expand All')}</span> | <span id="collapse_all_files">${_('Collapse All')}</span>
256 </span>
257 <h2>
258 ${diff_block.diff_summary_text(len(c.files), c.lines_added, c.lines_deleted, c.limited_diff)}
259 </h2>
260 </div>
261 <div class="cs_files">
262 %if not c.files:
263 <p class="empty_data">${_('No files')}</p>
264 %endif
265 <table class="compare_view_files">
266 <%namespace name="diff_block" file="/changeset/diff_block.html"/>
267 %for FID, change, path, stats, file in c.files:
268 <tr class="cs_${change} collapse_file" fid="${FID}">
269 <td class="cs_icon_td">
270 <span class="collapse_file_icon" fid="${FID}"></span>
271 </td>
272 <td class="cs_icon_td">
273 <div class="flag_status not_reviewed hidden"></div>
274 </td>
275 <td class="cs_${change}" id="a_${FID}">
276 <div class="node">
277 <a href="#a_${FID}">
278 <i class="icon-file-${change.lower()}"></i>
279 ${h.safe_unicode(path)}
280 </a>
281 </div>
282 </td>
283 <td>
284 <div class="changes pull-right">${h.fancy_file_stats(stats)}</div>
285 <div class="comment-bubble pull-right" data-path="${path}">
286 <i class="icon-comment"></i>
287 </div>
288 </td>
289 </tr>
290 <tr fid="${FID}" id="diff_${FID}" class="diff_links">
291 <td></td>
292 <td></td>
293 <td class="cs_${change}">
294 %if c.target_repo.repo_name == c.repo_name:
295 ${diff_block.diff_menu(c.repo_name, h.safe_unicode(path), c.source_ref, c.target_ref, change, file)}
296 %else:
297 ## this is slightly different case later, since the target repo can have this
298 ## file in target state than the source repo
299 ${diff_block.diff_menu(c.target_repo.repo_name, h.safe_unicode(path), c.source_ref, c.target_ref, change, file)}
300 %endif
301 </td>
302 <td class="td-actions rc-form">
303 </td>
304 </tr>
305 <tr id="tr_${FID}">
306 <td></td>
307 <td></td>
308 <td class="injected_diff" colspan="2">
309 ${diff_block.diff_block_simple([c.changes[FID]])}
310 </td>
311 </tr>
312 %endfor
313 </table>
314 % if c.limited_diff:
315 ${diff_block.changeset_message()}
316 % endif
317 </div>
253 </div>
318 %endif
254 %endif
319 </div>
255 </div>
@@ -158,7 +158,7 b' class TestChangesetController(object):'
158 response.mustcontain('Added docstrings to vcs.cli') # commit msg
158 response.mustcontain('Added docstrings to vcs.cli') # commit msg
159 response.mustcontain('Changed theme to ADC theme') # commit msg
159 response.mustcontain('Changed theme to ADC theme') # commit msg
160
160
161 self._check_diff_menus(response)
161 self._check_new_diff_menus(response)
162
162
163 def test_changeset_range(self, backend):
163 def test_changeset_range(self, backend):
164 self._check_changeset_range(
164 self._check_changeset_range(
@@ -273,7 +273,7 b' Added a symlink'
273 """ + diffs['svn'],
273 """ + diffs['svn'],
274 }
274 }
275
275
276 def _check_diff_menus(self, response, right_menu=False):
276 def _check_diff_menus(self, response, right_menu=False,):
277 # diff menus
277 # diff menus
278 for elem in ['Show File', 'Unified Diff', 'Side-by-side Diff',
278 for elem in ['Show File', 'Unified Diff', 'Side-by-side Diff',
279 'Raw Diff', 'Download Diff']:
279 'Raw Diff', 'Download Diff']:
@@ -284,3 +284,16 b' Added a symlink'
284 for elem in ['Ignore whitespace', 'Increase context',
284 for elem in ['Ignore whitespace', 'Increase context',
285 'Hide comments']:
285 'Hide comments']:
286 response.mustcontain(elem)
286 response.mustcontain(elem)
287
288
289 def _check_new_diff_menus(self, response, right_menu=False,):
290 # diff menus
291 for elem in ['Show file before', 'Show file after',
292 'Raw diff', 'Download diff']:
293 response.mustcontain(elem)
294
295 # right pane diff menus
296 if right_menu:
297 for elem in ['Ignore whitespace', 'Increase context',
298 'Hide comments']:
299 response.mustcontain(elem)
@@ -20,6 +20,7 b''
20
20
21 import mock
21 import mock
22 import pytest
22 import pytest
23 import lxml.html
23
24
24 from rhodecode.lib.vcs.backends.base import EmptyCommit
25 from rhodecode.lib.vcs.backends.base import EmptyCommit
25 from rhodecode.lib.vcs.exceptions import RepositoryRequirementError
26 from rhodecode.lib.vcs.exceptions import RepositoryRequirementError
@@ -609,9 +610,12 b' class ComparePage(AssertResponse):'
609 """
610 """
610
611
611 def contains_file_links_and_anchors(self, files):
612 def contains_file_links_and_anchors(self, files):
613 doc = lxml.html.fromstring(self.response.body)
612 for filename, file_id in files:
614 for filename, file_id in files:
613 self.contains_one_link(filename, '#' + file_id)
614 self.contains_one_anchor(file_id)
615 self.contains_one_anchor(file_id)
616 diffblock = doc.cssselect('[data-f-path="%s"]' % filename)
617 assert len(diffblock) == 1
618 assert len(diffblock[0].cssselect('a[href="#%s"]' % file_id)) == 1
615
619
616 def contains_change_summary(self, files_changed, inserted, deleted):
620 def contains_change_summary(self, files_changed, inserted, deleted):
617 template = (
621 template = (
@@ -264,19 +264,19 b' class TestRenderTokenStream(object):'
264 ),
264 ),
265 (
265 (
266 [('A', '', u'two\n'), ('A', '', u'lines')],
266 [('A', '', u'two\n'), ('A', '', u'lines')],
267 '<span class="A">two<nl>\n</nl>lines</span>',
267 '<span class="A">two\nlines</span>',
268 ),
268 ),
269 (
269 (
270 [('A', '', u'\nthree\n'), ('A', '', u'lines')],
270 [('A', '', u'\nthree\n'), ('A', '', u'lines')],
271 '<span class="A"><nl>\n</nl>three<nl>\n</nl>lines</span>',
271 '<span class="A">\nthree\nlines</span>',
272 ),
272 ),
273 (
273 (
274 [('', '', u'\n'), ('A', '', u'line')],
274 [('', '', u'\n'), ('A', '', u'line')],
275 '<span><nl>\n</nl></span><span class="A">line</span>',
275 '<span>\n</span><span class="A">line</span>',
276 ),
276 ),
277 (
277 (
278 [('', 'ins', u'\n'), ('A', '', u'line')],
278 [('', 'ins', u'\n'), ('A', '', u'line')],
279 '<span><ins><nl>\n</nl></ins></span><span class="A">line</span>',
279 '<span><ins>\n</ins></span><span class="A">line</span>',
280 ),
280 ),
281 (
281 (
282 [('A', '', u'hel'), ('A', 'ins', u'lo')],
282 [('A', '', u'hel'), ('A', 'ins', u'lo')],
General Comments 0
You need to be logged in to leave comments. Login now