upstream/mercurial-mirror Commit - r28587:76d7cab1

1

# mpatch.py - Python implementation of mpatch.c

1

# mpatch.py - Python implementation of mpatch.c

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import cStringIO

10

import cStringIO

11

import struct

11

import struct

12

13

StringIO = cStringIO.StringIO

13

StringIO = cStringIO.StringIO

14

15

# This attempts to apply a series of patches in time proportional to

15

# This attempts to apply a series of patches in time proportional to

16

# the total size of the patches, rather than patches * len(text). This

16

# the total size of the patches, rather than patches * len(text). This

17

# means rather than shuffling strings around, we shuffle around

17

# means rather than shuffling strings around, we shuffle around

18

# pointers to fragments with fragment lists.

18

# pointers to fragments with fragment lists.

19

#

19

#

20

# When the fragment lists get too long, we collapse them. To do this

20

# When the fragment lists get too long, we collapse them. To do this

21

# efficiently, we do all our operations inside a buffer created by

21

# efficiently, we do all our operations inside a buffer created by

22

# mmap and simply use memmove. This avoids creating a bunch of large

22

# mmap and simply use memmove. This avoids creating a bunch of large

23

# temporary string buffers.

23

# temporary string buffers.

24

25

def _pull(dst, src, l): # pull l bytes from src

26

while l:

27

f = src.pop()

28

if f[0] > l: # do we need to split?

29

src.append((f[0] - l, f[1] + l))

30

dst.append((l, f[1]))

31

return

32

dst.append(f)

33

l -= f[0]

34

25

def patches(a, bins):

35

def patches(a, bins):

26

if not bins:

36

if not bins:

27

return a

37

return a

28

38

29

plens = [len(x) for x in bins]

39

plens = [len(x) for x in bins]

30

pl = sum(plens)

40

pl = sum(plens)

31

bl = len(a) + pl

41

bl = len(a) + pl

32

tl = bl + bl + pl # enough for the patches and two working texts

42

tl = bl + bl + pl # enough for the patches and two working texts

33

b1, b2 = 0, bl

43

b1, b2 = 0, bl

34

44

35

if not tl:

45

if not tl:

36

return a

46

return a

37

47

38

m = StringIO()

48

m = StringIO()

39

def move(dest, src, count):

49

def move(dest, src, count):

40

"""move count bytes from src to dest

50

"""move count bytes from src to dest

41

51

42

The file pointer is left at the end of dest.

52

The file pointer is left at the end of dest.

43

"""

53

"""

44

m.seek(src)

54

m.seek(src)

45

buf = m.read(count)

55

buf = m.read(count)

46

m.seek(dest)

56

m.seek(dest)

47

m.write(buf)

57

m.write(buf)

48

58

49

# load our original text

59

# load our original text

50

m.write(a)

60

m.write(a)

51

frags = [(len(a), b1)]

61

frags = [(len(a), b1)]

52

62

53

# copy all the patches into our segment so we can memmove from them

63

# copy all the patches into our segment so we can memmove from them

54

pos = b2 + bl

64

pos = b2 + bl

55

m.seek(pos)

65

m.seek(pos)

56

for p in bins: m.write(p)

66

for p in bins: m.write(p)

57

67

58

def pull(dst, src, l): # pull l bytes from src

59

while l:

60

f = src.pop()

61

if f[0] > l: # do we need to split?

62

src.append((f[0] - l, f[1] + l))

63

dst.append((l, f[1]))

64

return

65

dst.append(f)

66

l -= f[0]

67

68

def collect(buf, list):

68

def collect(buf, list):

69

start = buf

69

start = buf

70

for l, p in reversed(list):

70

for l, p in reversed(list):

71

move(buf, p, l)

71

move(buf, p, l)

72

buf += l

72

buf += l

73

return (buf - start, start)

73

return (buf - start, start)

74

75

for plen in plens:

75

for plen in plens:

76

# if our list gets too long, execute it

76

# if our list gets too long, execute it

77

if len(frags) > 128:

77

if len(frags) > 128:

78

b2, b1 = b1, b2

78

b2, b1 = b1, b2

79

frags = [collect(b1, frags)]

79

frags = [collect(b1, frags)]

80

81

new = []

81

new = []

82

end = pos + plen

82

end = pos + plen

83

last = 0

83

last = 0

84

while pos < end:

84

while pos < end:

85

m.seek(pos)

85

m.seek(pos)

86

p1, p2, l = struct.unpack(">lll", m.read(12))

86

p1, p2, l = struct.unpack(">lll", m.read(12))

87

pull(new, frags, p1 - last) # what didn't change

87

_pull(new, frags, p1 - last) # what didn't change

88

pull([], frags, p2 - p1) # what got deleted

88

_pull([], frags, p2 - p1) # what got deleted

89

new.append((l, pos + 12)) # what got added

89

new.append((l, pos + 12)) # what got added

90

pos += l + 12

90

pos += l + 12

91

last = p2

91

last = p2

92

frags.extend(reversed(new)) # what was left at the end

92

frags.extend(reversed(new)) # what was left at the end

93

94

t = collect(b2, frags)

94

t = collect(b2, frags)

95

96

m.seek(t[1])

96

m.seek(t[1])

97

return m.read(t[0])

97

return m.read(t[0])

98

99

def patchedsize(orig, delta):

99

def patchedsize(orig, delta):

100

outlen, last, bin = 0, 0, 0

100

outlen, last, bin = 0, 0, 0

101

binend = len(delta)

101

binend = len(delta)

102

data = 12

102

data = 12

103

104

while data <= binend:

104

while data <= binend:

105

decode = delta[bin:bin + 12]

105

decode = delta[bin:bin + 12]

106

start, end, length = struct.unpack(">lll", decode)

106

start, end, length = struct.unpack(">lll", decode)

107

if start > end:

107

if start > end:

108

break

108

break

109

bin = data + length

109

bin = data + length

110

data = bin + 12

110

data = bin + 12

111

outlen += start - last

111

outlen += start - last

112

last = end

112

last = end

113

outlen += length

113

outlen += length

114

115

if bin != binend:

115

if bin != binend:

116

raise ValueError("patch cannot be decoded")

116

raise ValueError("patch cannot be decoded")

117

118

outlen += orig - last

118

outlen += orig - last

119

return outlen

119

return outlen

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # mpatch.py - Python implementation of mpatch.c
             #
             # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import cStringIO
             import struct
             StringIO = cStringIO.StringIO
             # This attempts to apply a series of patches in time proportional to
             # the total size of the patches, rather than patches * len(text). This
             # means rather than shuffling strings around, we shuffle around
             # pointers to fragments with fragment lists.
             #
             # When the fragment lists get too long, we collapse them. To do this
             # efficiently, we do all our operations inside a buffer created by
             # mmap and simply use memmove. This avoids creating a bunch of large
             # temporary string buffers.
+            def _pull(dst, src, l): # pull l bytes from src
+                while l:
+                    f = src.pop()
+                    if f[0] > l: # do we need to split?
+                        src.append((f[0] - l, f[1] + l))
+                        dst.append((l, f[1]))
+                        return
+                    dst.append(f)
+                    l -= f[0]
             def patches(a, bins):
                 if not bins:
                     return a
                 plens = [len(x) for x in bins]
                 pl = sum(plens)
                 bl = len(a) + pl
                 tl = bl + bl + pl # enough for the patches and two working texts
                 b1, b2 = 0, bl
                 if not tl:
                     return a
                 m = StringIO()
                 def move(dest, src, count):
                     """move count bytes from src to dest
                     The file pointer is left at the end of dest.
                     """
                     m.seek(src)
                     buf = m.read(count)
                     m.seek(dest)
                     m.write(buf)
                 # load our original text
                 m.write(a)
                 frags = [(len(a), b1)]
                 # copy all the patches into our segment so we can memmove from them
                 pos = b2 + bl
                 m.seek(pos)
                 for p in bins: m.write(p)
-                def pull(dst, src, l): # pull l bytes from src
-                    while l:
-                        f = src.pop()
-                        if f[0] > l: # do we need to split?
-                            src.append((f[0] - l, f[1] + l))
-                            dst.append((l, f[1]))
-                            return
-                        dst.append(f)
-                        l -= f[0]
                 def collect(buf, list):
                     start = buf
                     for l, p in reversed(list):
                         move(buf, p, l)
                         buf += l
                     return (buf - start, start)
                 for plen in plens:
                     # if our list gets too long, execute it
                     if len(frags) > 128:
                         b2, b1 = b1, b2
                         frags = [collect(b1, frags)]
                     new = []
                     end = pos + plen
                     last = 0
                     while pos < end:
                         m.seek(pos)
                         p1, p2, l = struct.unpack(">lll", m.read(12))
-                        pull(new, frags, p1 - last) # what didn't change
+                        _pull(new, frags, p1 - last) # what didn't change
-                        pull([], frags, p2 - p1)    # what got deleted
+                        _pull([], frags, p2 - p1)    # what got deleted
                         new.append((l, pos + 12))   # what got added
                         pos += l + 12
                         last = p2
                     frags.extend(reversed(new))     # what was left at the end
                 t = collect(b2, frags)
                 m.seek(t[1])
                 return m.read(t[0])
             def patchedsize(orig, delta):
                 outlen, last, bin = 0, 0, 0
                 binend = len(delta)
                 data = 12
                 while data <= binend:
                     decode = delta[bin:bin + 12]
                     start, end, length = struct.unpack(">lll", decode)
                     if start > end:
                         break
                     bin = data + length
                     data = bin + 12
                     outlen += start - last
                     last = end
                     outlen += length
                 if bin != binend:
                     raise ValueError("patch cannot be decoded")
                 outlen += orig - last
                 return outlen