upstream/mercurial-mirror Commit - r29834:1ea77b75

bdiff: implement cffi version of bdiff

Maciej Fijalkowski -

r29834:1ea77b75 default

parent child

mercurial/pure/bdiff.py

0 +34 -1

              # bdiff.py - Python implementation of bdiff.c
              #
              # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              from __future__ import absolute_import
              import array
              import difflib
              import re
              import struct
              from . import policy
              policynocffi = policy.policynocffi
              modulepolicy = policy.policy
              def splitnewlines(text):
                  '''like str.splitlines, but only split on newlines.'''
                  lines = [l + '\n' for l in text.split('\n')]
                  if lines:
                      if lines[-1] == '\n':
                          lines.pop()
                      else:
                          lines[-1] = lines[-1][:-1]
                  return lines
              def _normalizeblocks(a, b, blocks):
                  prev = None
                  r = []
                  for curr in blocks:
                      if prev is None:
                          prev = curr
                          continue
                      shift = 0
                      a1, b1, l1 = prev
                      a1end = a1 + l1
                      b1end = b1 + l1
                      a2, b2, l2 = curr
                      a2end = a2 + l2
                      b2end = b2 + l2
                      if a1end == a2:
                          while (a1end + shift < a2end and
                                 a[a1end + shift] == b[b1end + shift]):
                              shift += 1
                      elif b1end == b2:
                          while (b1end + shift < b2end and
                                 a[a1end + shift] == b[b1end + shift]):
                              shift += 1
                      r.append((a1, b1, l1 + shift))
                      prev = a2 + shift, b2 + shift, l2 - shift
                  r.append(prev)
                  return r
              def _tostring(c):
                  if type(c) is array.array:
                      # this copy overhead isn't ideal
                      return c.tostring()
                  return str(c)
              def bdiff(a, b):
                  a = _tostring(a).splitlines(True)
                  b = _tostring(b).splitlines(True)
                  if not a:
                      s = "".join(b)
                      return s and (struct.pack(">lll", 0, 0, len(s)) + s)
                  bin = []
                  p = [0]
                  for i in a: p.append(p[-1] + len(i))
                  d = difflib.SequenceMatcher(None, a, b).get_matching_blocks()
                  d = _normalizeblocks(a, b, d)
                  la = 0
                  lb = 0
                  for am, bm, size in d:
                      s = "".join(b[lb:bm])
                      if am > la or s:
                          bin.append(struct.pack(">lll", p[la], p[am], len(s)) + s)
                      la = am + size
                      lb = bm + size
                  return "".join(bin)
              def blocks(a, b):
                  an = splitnewlines(a)
                  bn = splitnewlines(b)
                  d = difflib.SequenceMatcher(None, an, bn).get_matching_blocks()
                  d = _normalizeblocks(an, bn, d)
                  return [(i, i + n, j, j + n) for (i, j, n) in d]
              def fixws(text, allws):
                  if allws:
                      text = re.sub('[ \t\r]+', '', text)
                  else:
                      text = re.sub('[ \t\r]+', ' ', text)
                      text = text.replace(' \n', '\n')
                  return text
              if modulepolicy not in policynocffi:
                  try:
                      from _bdiff_cffi import ffi, lib
                  except ImportError:
                      if modulepolicy == 'cffi': # strict cffi import
                          raise
                  else:
                      def blocks(sa, sb):
                          a = ffi.new("struct bdiff_line**")
                          b = ffi.new("struct bdiff_line**")
                          ac = ffi.new("char[]", sa)
                          bc = ffi.new("char[]", sb)
+                         l = ffi.new("struct bdiff_hunk*")
                          try:
                              an = lib.bdiff_splitlines(ac, len(sa), a)
                              bn = lib.bdiff_splitlines(bc, len(sb), b)
                              if not a[0] or not b[0]:
                                  raise MemoryError
-                             l = ffi.new("struct bdiff_hunk*")
                              count = lib.bdiff_diff(a[0], an, b[0], bn, l)
                              if count < 0:
                                  raise MemoryError
                              rl = [None] * count
                              h = l.next
                              i = 0
                              while h:
                                  rl[i] = (h.a1, h.a2, h.b1, h.b2)
                                  h = h.next
                                  i += 1
                          finally:
                              lib.free(a[0])
                              lib.free(b[0])
                              lib.bdiff_freehunks(l.next)
                          return rl
+                     def bdiff(sa, sb):
+                         a = ffi.new("struct bdiff_line**")
+                         b = ffi.new("struct bdiff_line**")
+                         ac = ffi.new("char[]", sa)
+                         bc = ffi.new("char[]", sb)
+                         l = ffi.new("struct bdiff_hunk*")
+                         try:
+                             an = lib.bdiff_splitlines(ac, len(sa), a)
+                             bn = lib.bdiff_splitlines(bc, len(sb), b)
+                             if not a[0] or not b[0]:
+                                 raise MemoryError
+                             count = lib.bdiff_diff(a[0], an, b[0], bn, l)
+                             if count < 0:
+                                 raise MemoryError
+                             rl = []
+                             h = l.next
+                             la = lb = 0
+                             while h:
+                                 if h.a1 != la or h.b1 != lb:
+                                     lgt = (b[0] + h.b1).l - (b[0] + lb).l
+                                     rl.append(struct.pack(">lll", (a[0] + la).l - a[0].l,
+                                         (a[0] + h.a1).l - a[0].l, lgt))
+                                     rl.append(str(ffi.buffer((b[0] + lb).l, lgt)))
+                                 la = h.a2
+                                 lb = h.b2
+                                 h = h.next
+                         finally:
+                             lib.free(a[0])
+                             lib.free(b[0])
+                             lib.bdiff_freehunks(l.next)
+                         return "".join(rl)

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages