upstream/mercurial-mirror Commit - r37749:54713489

patch: buffer lines for a same hunk...

Jun Wu -

r37749:54713489 default

parent child

mercurial/patch.py

0 +51 -97

              import collections
              import contextlib
              import copy
-             import difflib
              import email
              import errno
              import hashlib
                  else:
                      return difffn(opts, None)
+             def diffsinglehunk(hunklines):
+                 """yield tokens for a list of lines in a single hunk"""
+                 for line in hunklines:
+                     # chomp
+                     chompline = line.rstrip('\n')
+                     # highlight tabs and trailing whitespace
+                     stripline = chompline.rstrip()
+                     if line[0] == '-':
+                         label = 'diff.deleted'
+                     elif line[0] == '+':
+                         label = 'diff.inserted'
+                     else:
+                         raise error.ProgrammingError('unexpected hunk line: %s' % line)
+                     for token in tabsplitter.findall(stripline):
+                         if '\t' == token[0]:
+                             yield (token, 'diff.tab')
+                         else:
+                             yield (token, label)
+                     if chompline != stripline:
+                         yield (chompline[len(stripline):], 'diff.trailingwhitespace')
+                     if chompline != line:
+                         yield (line[len(chompline):], '')
              def difflabel(func, *args, **kw):
                  '''yields 2-tuples of (output, label) based on the output of func()'''
-                 inlinecolor = False
-                 if kw.get(r'opts'):
-                     inlinecolor = kw[r'opts'].worddiff
                  headprefixes = [('diff', 'diff.diffline'),
                                  ('copy', 'diff.extended'),
                                  ('rename', 'diff.extended'),
                                  ('---', 'diff.file_a'),
                                  ('+++', 'diff.file_b')]
                  textprefixes = [('@', 'diff.hunk'),
-                                 ('-', 'diff.deleted'),
-                                 ('+', 'diff.inserted')]
+                                 # - and + are handled by diffsinglehunk
+                                ]
                  head = False
+                 # buffers a hunk, i.e. adjacent "-", "+" lines without other changes.
+                 hunkbuffer = []
+                 def consumehunkbuffer():
+                     if hunkbuffer:
+                         for token in diffsinglehunk(hunkbuffer):
+                             yield token
+                         hunkbuffer[:] = []
                  for chunk in func(*args, **kw):
                      lines = chunk.split('\n')
-                     matches = {}
-                     if inlinecolor:
-                         matches = _findmatches(lines)
                      linecount = len(lines)
                      for i, line in enumerate(lines):
                          if head:
                          else:
                              if line and not line.startswith((' ', '+', '-', '@', '\\')):
                                  head = True
-                         stripline = line
                          diffline = False
                          if not head and line and line.startswith(('+', '-')):
-                             # highlight tabs and trailing whitespace, but only in
-                             # changed lines
-                             stripline = line.rstrip()
                              diffline = True
                          prefixes = textprefixes
                          if head:
                              prefixes = headprefixes
+                         if diffline:
+                             # buffered
+                             bufferedline = line
+                             if i + 1 < linecount:
+                                 bufferedline += "\n"
+                             hunkbuffer.append(bufferedline)
+                         else:
+                             # unbuffered
+                             for token in consumehunkbuffer():
+                                 yield token
+                             stripline = line.rstrip()
                          for prefix, label in prefixes:
                              if stripline.startswith(prefix):
-                                 if diffline:
-                                     if i in matches:
-                                         for t, l in _inlinediff(lines[i].rstrip(),
-                                                                 lines[matches[i]].rstrip(),
-                                                                 label):
-                                             yield (t, l)
-                                     else:
-                                         for token in tabsplitter.findall(stripline):
-                                             if token.startswith('\t'):
-                                                 yield (token, 'diff.tab')
-                                             else:
-                                                 yield (token, label)
-                                 else:
                                      yield (stripline, label)
+                                     if line != stripline:
+                                         yield (line[len(stripline):],
+                                                'diff.trailingwhitespace')
                                  break
                          else:
                              yield (line, '')
-                         if line != stripline:
-                             yield (line[len(stripline):], 'diff.trailingwhitespace')
                          if i + 1 < linecount:
                              yield ('\n', '')
-             def _findmatches(slist):
-                 '''Look for insertion matches to deletion and returns a dict of
-                 correspondences.
-                 '''
-                 lastmatch = 0
-                 matches = {}
-                 for i, line in enumerate(slist):
-                     if line == '':
-                         continue
-                     if line.startswith('-'):
-                         lastmatch = max(lastmatch, i)
-                         newgroup = False
-                         for j, newline in enumerate(slist[lastmatch + 1:]):
-                             if newline == '':
-                                 continue
-                             if newline.startswith('-') and newgroup: # too far, no match
-                                 break
-                             if newline.startswith('+'): # potential match
-                                 newgroup = True
-                                 sim = difflib.SequenceMatcher(None, line, newline).ratio()
-                                 if sim > 0.7:
-                                     lastmatch = lastmatch + 1 + j
-                                     matches[i] = lastmatch
-                                     matches[lastmatch] = i
-                                     break
-                 return matches
-             def _inlinediff(s1, s2, operation):
-                 '''Perform string diff to highlight specific changes.'''
-                 operation_skip = ('+', '?') if operation == 'diff.deleted' else ('-', '?')
-                 if operation == 'diff.deleted':
-                     s2, s1 = s1, s2
-                 buff = []
-                 # we never want to higlight the leading +-
-                 if operation == 'diff.deleted' and s2.startswith('-'):
-                     label = operation
-                     token = '-'
-                     s2 = s2[1:]
-                     s1 = s1[1:]
-                 elif operation == 'diff.inserted' and s1.startswith('+'):
-                     label = operation
-                     token = '+'
-                     s2 = s2[1:]
-                     s1 = s1[1:]
-                 else:
-                     raise error.ProgrammingError("Case not expected, operation = %s" %
-                                                  operation)
-                 s = difflib.ndiff(_nonwordre.split(s2), _nonwordre.split(s1))
-                 for part in s:
-                     if part.startswith(operation_skip) or len(part) == 2:
-                         continue
-                     l = operation + '.highlight'
-                     if part.startswith(' '):
-                         l = operation
-                     if part[2:] == '\t':
-                         l = 'diff.tab'
-                     if l == label: # contiguous token with same label
-                         token += part[2:]
-                         continue
-                     else:
-                         buff.append((token, label))
-                         label = l
-                         token = part[2:]
-                 buff.append((token, label))
-                 return buff
+                     for token in consumehunkbuffer():
+                         yield token
              def diffui(*args, **kw):
                  '''like diff(), but yields 2-tuples of (output, label) for ui.write()'''

tests/test-diff-color.t

0 +5 0

                [diff.deleted|-(to see if it works)]
                [diff.inserted|+three of those lines have]
                [diff.inserted|+collapsed onto one]
+             #if false
                $ hg diff --config experimental.worddiff=True --color=debug
                [diff.diffline|diff --git a/file1 b/file1]
                [diff.file_a|--- a/file1]
                [diff.deleted|-(to see if it works)]
                [diff.inserted|+three of those lines ][diff.inserted.highlight|have]
                [diff.inserted|+][diff.inserted.highlight|collapsed][diff.inserted| onto one]
+             #endif
              multibyte character shouldn't be broken up in word diff:
                >     f.write(b"blah \xe3\x82\xa4 blah\n")
                > EOF
                $ hg ci -m 'slightly change utf8 char' utf8
+             #if false
                $ hg diff --config experimental.worddiff=True --color=debug -c.
                [diff.diffline|diff --git a/utf8 b/utf8]
                [diff.file_a|--- a/utf8]
                [diff.hunk|@@ -1,1 +1,1 @@]
                [diff.deleted|-blah ][diff.deleted.highlight|\xe3\x82\xa2][diff.deleted| blah] (esc)
                [diff.inserted|+blah ][diff.inserted.highlight|\xe3\x82\xa4][diff.inserted| blah] (esc)
+             #endif

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages