upstream/mercurial-mirror Files · mercurial/pure/bdiff.py

branchmap: don't use buffer() on Python 3...

branchmap: don't use buffer() on Python 3 This is certainly slower than the Python 2 code, but it works, and we can revisit it later if it's a problem.

Maciej Fijalkowski - - Load All Authors

File last commit:

r30042:d24e03da default


                r31347:279430ee

default

Download file

             bdiff.py
        
                    169 lines
            
             | 4.9 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / mercurial / pure / bdiff.py
          
                    History
                
                 |
                  Source
                 | Raw
                 |Copy content
                 |Copy permalink

        Martin Geisler
    
pure Python implementation of bdiff.c

              r7703
            
      # bdiff.py - Python implementation of bdiff.c

      #

      # Copyright 2009 Matt Mackall <mpm@selenic.com> and others

      #

        Martin Geisler
    
updated license to be explicit about GPL version 2

              r8225
            
      # This software may be used and distributed according to the terms of the

        Matt Mackall
    
Update license to GPLv2+

              r10263
            
      # GNU General Public License version 2 or any later version.

        Martin Geisler
    
pure Python implementation of bdiff.c

              r7703
            
        Gregory Szorc
    
bdiff: use absolute_import

              r27335
            
      from __future__ import absolute_import

        timeless
    
bdiff: (pure) support array.array arrays (issue5130)

              r28389
            
      import array

        Gregory Szorc
    
bdiff: use absolute_import

              r27335
            
      import difflib

      import re

      import struct

        Matt Mackall
    
pure/bdiff: fix circular import

              r7944
            
        Maciej Fijalkowski
    
bdiff: implement cffi version of blocks

              r29833
            
      from . import policy

      policynocffi = policy.policynocffi

      modulepolicy = policy.policy

        Matt Mackall
    
pure/bdiff: fix circular import

              r7944
            
      def splitnewlines(text):

          '''like str.splitlines, but only split on newlines.'''

          lines = [l + '\n' for l in text.split('\n')]

          if lines:

              if lines[-1] == '\n':

                  lines.pop()

              else:

                  lines[-1] = lines[-1][:-1]

          return lines

        Martin Geisler
    
pure Python implementation of bdiff.c

              r7703
            
      def _normalizeblocks(a, b, blocks):

          prev = None

        Dan Villiom Podlaski Christiansen
    
pure bdiff: don't use a generator...

              r14066
            
          r = []

        Martin Geisler
    
pure Python implementation of bdiff.c

              r7703
            
          for curr in blocks:

              if prev is None:

                  prev = curr

                  continue

              shift = 0

              a1, b1, l1 = prev

              a1end = a1 + l1

              b1end = b1 + l1

              a2, b2, l2 = curr

              a2end = a2 + l2

              b2end = b2 + l2

              if a1end == a2:

        Matt Mackall
    
many, many trivial check-code fixups

              r10282
            
                  while (a1end + shift < a2end and

                         a[a1end + shift] == b[b1end + shift]):

        Martin Geisler
    
pure Python implementation of bdiff.c

              r7703
            
                      shift += 1

              elif b1end == b2:

        Matt Mackall
    
many, many trivial check-code fixups

              r10282
            
                  while (b1end + shift < b2end and

                         a[a1end + shift] == b[b1end + shift]):

        Martin Geisler
    
pure Python implementation of bdiff.c

              r7703
            
                      shift += 1

        Dan Villiom Podlaski Christiansen
    
pure bdiff: don't use a generator...

              r14066
            
              r.append((a1, b1, l1 + shift))

        Matt Mackall
    
many, many trivial check-code fixups

              r10282
            
              prev = a2 + shift, b2 + shift, l2 - shift

        Dan Villiom Podlaski Christiansen
    
pure bdiff: don't use a generator...

              r14066
            
          r.append(prev)

          return r

        Martin Geisler
    
pure Python implementation of bdiff.c

              r7703
            
        timeless
    
bdiff: (pure) support array.array arrays (issue5130)

              r28389
            
      def _tostring(c):

          if type(c) is array.array:

              # this copy overhead isn't ideal

              return c.tostring()

          return str(c)

        Martin Geisler
    
pure Python implementation of bdiff.c

              r7703
            
      def bdiff(a, b):

        timeless
    
bdiff: (pure) support array.array arrays (issue5130)

              r28389
            
          a = _tostring(a).splitlines(True)

          b = _tostring(b).splitlines(True)

        Martin Geisler
    
pure Python implementation of bdiff.c

              r7703
            
          if not a:

              s = "".join(b)

              return s and (struct.pack(">lll", 0, 0, len(s)) + s)

          bin = []

          p = [0]

          for i in a: p.append(p[-1] + len(i))

          d = difflib.SequenceMatcher(None, a, b).get_matching_blocks()

          d = _normalizeblocks(a, b, d)

          la = 0

          lb = 0

          for am, bm, size in d:

              s = "".join(b[lb:bm])

              if am > la or s:

                  bin.append(struct.pack(">lll", p[la], p[am], len(s)) + s)

              la = am + size

              lb = bm + size

          return "".join(bin)

      def blocks(a, b):

        Matt Mackall
    
pure/bdiff: fix circular import

              r7944
            
          an = splitnewlines(a)

          bn = splitnewlines(b)

        Martin Geisler
    
pure Python implementation of bdiff.c

              r7703
            
          d = difflib.SequenceMatcher(None, an, bn).get_matching_blocks()

          d = _normalizeblocks(an, bn, d)

          return [(i, i + n, j, j + n) for (i, j, n) in d]

        Patrick Mezard
    
mdiff: replace wscleanup() regexps with C loops...

              r15530
            
      def fixws(text, allws):

          if allws:

              text = re.sub('[ \t\r]+', '', text)

          else:

              text = re.sub('[ \t\r]+', ' ', text)

              text = text.replace(' \n', '\n')

          return text

        Maciej Fijalkowski
    
bdiff: implement cffi version of blocks

              r29833
            
      if modulepolicy not in policynocffi:

          try:

              from _bdiff_cffi import ffi, lib

          except ImportError:

              if modulepolicy == 'cffi': # strict cffi import

                  raise

          else:

              def blocks(sa, sb):

                  a = ffi.new("struct bdiff_line**")

                  b = ffi.new("struct bdiff_line**")

        Maciej Fijalkowski
    
lazymanifest: write a more efficient, pypy friendly version of lazymanifest

              r30042
            
                  ac = ffi.new("char[]", str(sa))

                  bc = ffi.new("char[]", str(sb))

        Maciej Fijalkowski
    
bdiff: implement cffi version of bdiff

              r29834
            
                  l = ffi.new("struct bdiff_hunk*")

        Maciej Fijalkowski
    
bdiff: implement cffi version of blocks

              r29833
            
                  try:

                      an = lib.bdiff_splitlines(ac, len(sa), a)

                      bn = lib.bdiff_splitlines(bc, len(sb), b)

                      if not a[0] or not b[0]:

                          raise MemoryError

                      count = lib.bdiff_diff(a[0], an, b[0], bn, l)

                      if count < 0:

                          raise MemoryError

                      rl = [None] * count

                      h = l.next

                      i = 0

                      while h:

                          rl[i] = (h.a1, h.a2, h.b1, h.b2)

                          h = h.next

                          i += 1

                  finally:

                      lib.free(a[0])

                      lib.free(b[0])

                      lib.bdiff_freehunks(l.next)

                  return rl

        Maciej Fijalkowski
    
bdiff: implement cffi version of bdiff

              r29834
            
              def bdiff(sa, sb):

                  a = ffi.new("struct bdiff_line**")

                  b = ffi.new("struct bdiff_line**")

        Maciej Fijalkowski
    
lazymanifest: write a more efficient, pypy friendly version of lazymanifest

              r30042
            
                  ac = ffi.new("char[]", str(sa))

                  bc = ffi.new("char[]", str(sb))

        Maciej Fijalkowski
    
bdiff: implement cffi version of bdiff

              r29834
            
                  l = ffi.new("struct bdiff_hunk*")

                  try:

                      an = lib.bdiff_splitlines(ac, len(sa), a)

                      bn = lib.bdiff_splitlines(bc, len(sb), b)

                      if not a[0] or not b[0]:

                          raise MemoryError

                      count = lib.bdiff_diff(a[0], an, b[0], bn, l)

                      if count < 0:

                          raise MemoryError

                      rl = []

                      h = l.next

                      la = lb = 0

                      while h:

                          if h.a1 != la or h.b1 != lb:

                              lgt = (b[0] + h.b1).l - (b[0] + lb).l

                              rl.append(struct.pack(">lll", (a[0] + la).l - a[0].l,

                                  (a[0] + h.a1).l - a[0].l, lgt))

                              rl.append(str(ffi.buffer((b[0] + lb).l, lgt)))

                          la = h.a2

                          lb = h.b2

                          h = h.next

                  finally:

                      lib.free(a[0])

                      lib.free(b[0])

                      lib.bdiff_freehunks(l.next)

                  return "".join(rl)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

Martin Geisler pure Python implementation of bdiff.c	r7703	# bdiff.py - Python implementation of bdiff.c
		#
		# Copyright 2009 Matt Mackall <mpm@selenic.com> and others
		#
Martin Geisler updated license to be explicit about GPL version 2	r8225	# This software may be used and distributed according to the terms of the
Matt Mackall Update license to GPLv2+	r10263	# GNU General Public License version 2 or any later version.
Martin Geisler pure Python implementation of bdiff.c	r7703
Gregory Szorc bdiff: use absolute_import	r27335	from __future__ import absolute_import

timeless bdiff: (pure) support array.array arrays (issue5130)	r28389	import array
Gregory Szorc bdiff: use absolute_import	r27335	import difflib
		import re
		import struct
Matt Mackall pure/bdiff: fix circular import	r7944
Maciej Fijalkowski bdiff: implement cffi version of blocks	r29833	from . import policy
		policynocffi = policy.policynocffi
		modulepolicy = policy.policy

Matt Mackall pure/bdiff: fix circular import	r7944	def splitnewlines(text):
		'''like str.splitlines, but only split on newlines.'''
		lines = [l + '\n' for l in text.split('\n')]
		if lines:
		if lines[-1] == '\n':
		lines.pop()
		else:
		lines[-1] = lines[-1][:-1]
		return lines
Martin Geisler pure Python implementation of bdiff.c	r7703
		def _normalizeblocks(a, b, blocks):
		prev = None
Dan Villiom Podlaski Christiansen pure bdiff: don't use a generator...	r14066	r = []
Martin Geisler pure Python implementation of bdiff.c	r7703	for curr in blocks:
		if prev is None:
		prev = curr
		continue
		shift = 0

		a1, b1, l1 = prev
		a1end = a1 + l1
		b1end = b1 + l1

		a2, b2, l2 = curr
		a2end = a2 + l2
		b2end = b2 + l2
		if a1end == a2:
Matt Mackall many, many trivial check-code fixups	r10282	while (a1end + shift < a2end and
		a[a1end + shift] == b[b1end + shift]):
Martin Geisler pure Python implementation of bdiff.c	r7703	shift += 1
		elif b1end == b2:
Matt Mackall many, many trivial check-code fixups	r10282	while (b1end + shift < b2end and
		a[a1end + shift] == b[b1end + shift]):
Martin Geisler pure Python implementation of bdiff.c	r7703	shift += 1
Dan Villiom Podlaski Christiansen pure bdiff: don't use a generator...	r14066	r.append((a1, b1, l1 + shift))
Matt Mackall many, many trivial check-code fixups	r10282	prev = a2 + shift, b2 + shift, l2 - shift
Dan Villiom Podlaski Christiansen pure bdiff: don't use a generator...	r14066	r.append(prev)
		return r
Martin Geisler pure Python implementation of bdiff.c	r7703
timeless bdiff: (pure) support array.array arrays (issue5130)	r28389	def _tostring(c):
		if type(c) is array.array:
		# this copy overhead isn't ideal
		return c.tostring()
		return str(c)

Martin Geisler pure Python implementation of bdiff.c	r7703	def bdiff(a, b):
timeless bdiff: (pure) support array.array arrays (issue5130)	r28389	a = _tostring(a).splitlines(True)
		b = _tostring(b).splitlines(True)
Martin Geisler pure Python implementation of bdiff.c	r7703
		if not a:
		s = "".join(b)
		return s and (struct.pack(">lll", 0, 0, len(s)) + s)

		bin = []
		p = [0]
		for i in a: p.append(p[-1] + len(i))

		d = difflib.SequenceMatcher(None, a, b).get_matching_blocks()
		d = _normalizeblocks(a, b, d)
		la = 0
		lb = 0
		for am, bm, size in d:
		s = "".join(b[lb:bm])
		if am > la or s:
		bin.append(struct.pack(">lll", p[la], p[am], len(s)) + s)
		la = am + size
		lb = bm + size

		return "".join(bin)

		def blocks(a, b):
Matt Mackall pure/bdiff: fix circular import	r7944	an = splitnewlines(a)
		bn = splitnewlines(b)
Martin Geisler pure Python implementation of bdiff.c	r7703	d = difflib.SequenceMatcher(None, an, bn).get_matching_blocks()
		d = _normalizeblocks(an, bn, d)
		return [(i, i + n, j, j + n) for (i, j, n) in d]

Patrick Mezard mdiff: replace wscleanup() regexps with C loops...	r15530	def fixws(text, allws):
		if allws:
		text = re.sub('[ \t\r]+', '', text)
		else:
		text = re.sub('[ \t\r]+', ' ', text)
		text = text.replace(' \n', '\n')
		return text
Maciej Fijalkowski bdiff: implement cffi version of blocks	r29833
		if modulepolicy not in policynocffi:
		try:
		from _bdiff_cffi import ffi, lib
		except ImportError:
		if modulepolicy == 'cffi': # strict cffi import
		raise
		else:
		def blocks(sa, sb):
		a = ffi.new("struct bdiff_line**")
		b = ffi.new("struct bdiff_line**")
Maciej Fijalkowski lazymanifest: write a more efficient, pypy friendly version of lazymanifest	r30042	ac = ffi.new("char[]", str(sa))
		bc = ffi.new("char[]", str(sb))
Maciej Fijalkowski bdiff: implement cffi version of bdiff	r29834	l = ffi.new("struct bdiff_hunk*")
Maciej Fijalkowski bdiff: implement cffi version of blocks	r29833	try:
		an = lib.bdiff_splitlines(ac, len(sa), a)
		bn = lib.bdiff_splitlines(bc, len(sb), b)
		if not a[0] or not b[0]:
		raise MemoryError
		count = lib.bdiff_diff(a[0], an, b[0], bn, l)
		if count < 0:
		raise MemoryError
		rl = [None] * count
		h = l.next
		i = 0
		while h:
		rl[i] = (h.a1, h.a2, h.b1, h.b2)
		h = h.next
		i += 1
		finally:
		lib.free(a[0])
		lib.free(b[0])
		lib.bdiff_freehunks(l.next)
		return rl
Maciej Fijalkowski bdiff: implement cffi version of bdiff	r29834
		def bdiff(sa, sb):
		a = ffi.new("struct bdiff_line**")
		b = ffi.new("struct bdiff_line**")
Maciej Fijalkowski lazymanifest: write a more efficient, pypy friendly version of lazymanifest	r30042	ac = ffi.new("char[]", str(sa))
		bc = ffi.new("char[]", str(sb))
Maciej Fijalkowski bdiff: implement cffi version of bdiff	r29834	l = ffi.new("struct bdiff_hunk*")
		try:
		an = lib.bdiff_splitlines(ac, len(sa), a)
		bn = lib.bdiff_splitlines(bc, len(sb), b)
		if not a[0] or not b[0]:
		raise MemoryError
		count = lib.bdiff_diff(a[0], an, b[0], bn, l)
		if count < 0:
		raise MemoryError
		rl = []
		h = l.next
		la = lb = 0
		while h:
		if h.a1 != la or h.b1 != lb:
		lgt = (b[0] + h.b1).l - (b[0] + lb).l
		rl.append(struct.pack(">lll", (a[0] + la).l - a[0].l,
		(a[0] + h.a1).l - a[0].l, lgt))
		rl.append(str(ffi.buffer((b[0] + lb).l, lgt)))
		la = h.a2
		lb = h.b2
		h = h.next

		finally:
		lib.free(a[0])
		lib.free(b[0])
		lib.bdiff_freehunks(l.next)
		return "".join(rl)