##// END OF EJS Templates
revlog: add clone method...
revlog: add clone method Upcoming patches will introduce functionality for in-place repository/store "upgrades." Copying the contents of a revlog feels sufficiently low-level to warrant being in the revlog class. So this commit implements that functionality. Because full delta recomputation can be *very* expensive (we're talking several hours on the Firefox repository), we support multiple modes of execution with regards to delta (re)use. This will allow repository upgrades to choose the "level" of processing/optimization they wish to perform when converting revlogs. It's not obvious from this commit, but "addrevisioncb" will be used for progress reporting.

File last commit:

r30042:d24e03da default
r30778:1c7368d1 default
Show More
bdiff.py
169 lines | 4.9 KiB | text/x-python | PythonLexer
Martin Geisler
pure Python implementation of bdiff.c
r7703 # bdiff.py - Python implementation of bdiff.c
#
# Copyright 2009 Matt Mackall <mpm@selenic.com> and others
#
Martin Geisler
updated license to be explicit about GPL version 2
r8225 # This software may be used and distributed according to the terms of the
Matt Mackall
Update license to GPLv2+
r10263 # GNU General Public License version 2 or any later version.
Martin Geisler
pure Python implementation of bdiff.c
r7703
Gregory Szorc
bdiff: use absolute_import
r27335 from __future__ import absolute_import
timeless
bdiff: (pure) support array.array arrays (issue5130)
r28389 import array
Gregory Szorc
bdiff: use absolute_import
r27335 import difflib
import re
import struct
Matt Mackall
pure/bdiff: fix circular import
r7944
Maciej Fijalkowski
bdiff: implement cffi version of blocks
r29833 from . import policy
policynocffi = policy.policynocffi
modulepolicy = policy.policy
Matt Mackall
pure/bdiff: fix circular import
r7944 def splitnewlines(text):
'''like str.splitlines, but only split on newlines.'''
lines = [l + '\n' for l in text.split('\n')]
if lines:
if lines[-1] == '\n':
lines.pop()
else:
lines[-1] = lines[-1][:-1]
return lines
Martin Geisler
pure Python implementation of bdiff.c
r7703
def _normalizeblocks(a, b, blocks):
prev = None
Dan Villiom Podlaski Christiansen
pure bdiff: don't use a generator...
r14066 r = []
Martin Geisler
pure Python implementation of bdiff.c
r7703 for curr in blocks:
if prev is None:
prev = curr
continue
shift = 0
a1, b1, l1 = prev
a1end = a1 + l1
b1end = b1 + l1
a2, b2, l2 = curr
a2end = a2 + l2
b2end = b2 + l2
if a1end == a2:
Matt Mackall
many, many trivial check-code fixups
r10282 while (a1end + shift < a2end and
a[a1end + shift] == b[b1end + shift]):
Martin Geisler
pure Python implementation of bdiff.c
r7703 shift += 1
elif b1end == b2:
Matt Mackall
many, many trivial check-code fixups
r10282 while (b1end + shift < b2end and
a[a1end + shift] == b[b1end + shift]):
Martin Geisler
pure Python implementation of bdiff.c
r7703 shift += 1
Dan Villiom Podlaski Christiansen
pure bdiff: don't use a generator...
r14066 r.append((a1, b1, l1 + shift))
Matt Mackall
many, many trivial check-code fixups
r10282 prev = a2 + shift, b2 + shift, l2 - shift
Dan Villiom Podlaski Christiansen
pure bdiff: don't use a generator...
r14066 r.append(prev)
return r
Martin Geisler
pure Python implementation of bdiff.c
r7703
timeless
bdiff: (pure) support array.array arrays (issue5130)
r28389 def _tostring(c):
if type(c) is array.array:
# this copy overhead isn't ideal
return c.tostring()
return str(c)
Martin Geisler
pure Python implementation of bdiff.c
r7703 def bdiff(a, b):
timeless
bdiff: (pure) support array.array arrays (issue5130)
r28389 a = _tostring(a).splitlines(True)
b = _tostring(b).splitlines(True)
Martin Geisler
pure Python implementation of bdiff.c
r7703
if not a:
s = "".join(b)
return s and (struct.pack(">lll", 0, 0, len(s)) + s)
bin = []
p = [0]
for i in a: p.append(p[-1] + len(i))
d = difflib.SequenceMatcher(None, a, b).get_matching_blocks()
d = _normalizeblocks(a, b, d)
la = 0
lb = 0
for am, bm, size in d:
s = "".join(b[lb:bm])
if am > la or s:
bin.append(struct.pack(">lll", p[la], p[am], len(s)) + s)
la = am + size
lb = bm + size
return "".join(bin)
def blocks(a, b):
Matt Mackall
pure/bdiff: fix circular import
r7944 an = splitnewlines(a)
bn = splitnewlines(b)
Martin Geisler
pure Python implementation of bdiff.c
r7703 d = difflib.SequenceMatcher(None, an, bn).get_matching_blocks()
d = _normalizeblocks(an, bn, d)
return [(i, i + n, j, j + n) for (i, j, n) in d]
Patrick Mezard
mdiff: replace wscleanup() regexps with C loops...
r15530 def fixws(text, allws):
if allws:
text = re.sub('[ \t\r]+', '', text)
else:
text = re.sub('[ \t\r]+', ' ', text)
text = text.replace(' \n', '\n')
return text
Maciej Fijalkowski
bdiff: implement cffi version of blocks
r29833
if modulepolicy not in policynocffi:
try:
from _bdiff_cffi import ffi, lib
except ImportError:
if modulepolicy == 'cffi': # strict cffi import
raise
else:
def blocks(sa, sb):
a = ffi.new("struct bdiff_line**")
b = ffi.new("struct bdiff_line**")
Maciej Fijalkowski
lazymanifest: write a more efficient, pypy friendly version of lazymanifest
r30042 ac = ffi.new("char[]", str(sa))
bc = ffi.new("char[]", str(sb))
Maciej Fijalkowski
bdiff: implement cffi version of bdiff
r29834 l = ffi.new("struct bdiff_hunk*")
Maciej Fijalkowski
bdiff: implement cffi version of blocks
r29833 try:
an = lib.bdiff_splitlines(ac, len(sa), a)
bn = lib.bdiff_splitlines(bc, len(sb), b)
if not a[0] or not b[0]:
raise MemoryError
count = lib.bdiff_diff(a[0], an, b[0], bn, l)
if count < 0:
raise MemoryError
rl = [None] * count
h = l.next
i = 0
while h:
rl[i] = (h.a1, h.a2, h.b1, h.b2)
h = h.next
i += 1
finally:
lib.free(a[0])
lib.free(b[0])
lib.bdiff_freehunks(l.next)
return rl
Maciej Fijalkowski
bdiff: implement cffi version of bdiff
r29834
def bdiff(sa, sb):
a = ffi.new("struct bdiff_line**")
b = ffi.new("struct bdiff_line**")
Maciej Fijalkowski
lazymanifest: write a more efficient, pypy friendly version of lazymanifest
r30042 ac = ffi.new("char[]", str(sa))
bc = ffi.new("char[]", str(sb))
Maciej Fijalkowski
bdiff: implement cffi version of bdiff
r29834 l = ffi.new("struct bdiff_hunk*")
try:
an = lib.bdiff_splitlines(ac, len(sa), a)
bn = lib.bdiff_splitlines(bc, len(sb), b)
if not a[0] or not b[0]:
raise MemoryError
count = lib.bdiff_diff(a[0], an, b[0], bn, l)
if count < 0:
raise MemoryError
rl = []
h = l.next
la = lb = 0
while h:
if h.a1 != la or h.b1 != lb:
lgt = (b[0] + h.b1).l - (b[0] + lb).l
rl.append(struct.pack(">lll", (a[0] + la).l - a[0].l,
(a[0] + h.a1).l - a[0].l, lgt))
rl.append(str(ffi.buffer((b[0] + lb).l, lgt)))
la = h.a2
lb = h.b2
h = h.next
finally:
lib.free(a[0])
lib.free(b[0])
lib.bdiff_freehunks(l.next)
return "".join(rl)