Show More
@@ -1,75 +1,144 b'' | |||||
1 | #!/usr/bin/python |
|
1 | #!/usr/bin/python | |
2 | import difflib, struct |
|
2 | import difflib, struct, mmap | |
3 | from cStringIO import StringIO |
|
3 | ||
|
4 | devzero = file("/dev/zero") | |||
4 |
|
5 | |||
5 | def unidiff(a, ad, b, bd, fn): |
|
6 | def unidiff(a, ad, b, bd, fn): | |
6 | if not a and not b: return "" |
|
7 | if not a and not b: return "" | |
7 | a = a.splitlines(1) |
|
8 | a = a.splitlines(1) | |
8 | b = b.splitlines(1) |
|
9 | b = b.splitlines(1) | |
9 | l = list(difflib.unified_diff(a, b, "a/" + fn, "b/" + fn, ad, bd)) |
|
10 | l = list(difflib.unified_diff(a, b, "a/" + fn, "b/" + fn, ad, bd)) | |
10 | return "".join(l) |
|
11 | return "".join(l) | |
11 |
|
12 | |||
12 | def textdiff(a, b): |
|
13 | def textdiff(a, b): | |
13 | return diff(a.splitlines(1), b.splitlines(1)) |
|
14 | return diff(a.splitlines(1), b.splitlines(1)) | |
14 |
|
15 | |||
15 | def sortdiff(a, b): |
|
16 | def sortdiff(a, b): | |
16 | la = lb = 0 |
|
17 | la = lb = 0 | |
17 |
|
18 | |||
18 | while 1: |
|
19 | while 1: | |
19 | if la >= len(a) or lb >= len(b): break |
|
20 | if la >= len(a) or lb >= len(b): break | |
20 | if b[lb] < a[la]: |
|
21 | if b[lb] < a[la]: | |
21 | si = lb |
|
22 | si = lb | |
22 | while lb < len(b) and b[lb] < a[la] : lb += 1 |
|
23 | while lb < len(b) and b[lb] < a[la] : lb += 1 | |
23 | yield "insert", la, la, si, lb |
|
24 | yield "insert", la, la, si, lb | |
24 | elif a[la] < b[lb]: |
|
25 | elif a[la] < b[lb]: | |
25 | si = la |
|
26 | si = la | |
26 | while la < len(a) and a[la] < b[lb]: la += 1 |
|
27 | while la < len(a) and a[la] < b[lb]: la += 1 | |
27 | yield "delete", si, la, lb, lb |
|
28 | yield "delete", si, la, lb, lb | |
28 | else: |
|
29 | else: | |
29 | la += 1 |
|
30 | la += 1 | |
30 | lb += 1 |
|
31 | lb += 1 | |
31 |
|
32 | |||
32 | if lb < len(b): |
|
33 | if lb < len(b): | |
33 | yield "insert", la, la, lb, len(b) |
|
34 | yield "insert", la, la, lb, len(b) | |
34 |
|
35 | |||
35 | if la < len(a): |
|
36 | if la < len(a): | |
36 | yield "delete", la, len(a), lb, lb |
|
37 | yield "delete", la, len(a), lb, lb | |
37 |
|
38 | |||
38 | def diff(a, b, sorted=0): |
|
39 | def diff(a, b, sorted=0): | |
39 | bin = [] |
|
40 | bin = [] | |
40 | p = [0] |
|
41 | p = [0] | |
41 | for i in a: p.append(p[-1] + len(i)) |
|
42 | for i in a: p.append(p[-1] + len(i)) | |
42 |
|
43 | |||
43 | if sorted: |
|
44 | if sorted: | |
44 | d = sortdiff(a, b) |
|
45 | d = sortdiff(a, b) | |
45 | else: |
|
46 | else: | |
46 | d = difflib.SequenceMatcher(None, a, b).get_opcodes() |
|
47 | d = difflib.SequenceMatcher(None, a, b).get_opcodes() | |
47 |
|
48 | |||
48 | for o, m, n, s, t in d: |
|
49 | for o, m, n, s, t in d: | |
49 | if o == 'equal': continue |
|
50 | if o == 'equal': continue | |
50 | s = "".join(b[s:t]) |
|
51 | s = "".join(b[s:t]) | |
51 | bin.append(struct.pack(">lll", p[m], p[n], len(s)) + s) |
|
52 | bin.append(struct.pack(">lll", p[m], p[n], len(s)) + s) | |
52 |
|
53 | |||
53 | return "".join(bin) |
|
54 | return "".join(bin) | |
54 |
|
55 | |||
|
56 | # This attempts to apply a series of patches in time proportional to | |||
|
57 | # the total size of the patches, rather than patches * len(text). This | |||
|
58 | # means rather than shuffling strings around, we shuffle around | |||
|
59 | # pointers to fragments with fragment lists. | |||
|
60 | # | |||
|
61 | # When the fragment lists get too long, we collapse them. To do this | |||
|
62 | # efficiently, we do all our operations inside a buffer created by | |||
|
63 | # mmap and simply use memmove. This avoids creating a bunch of large | |||
|
64 | # temporary string buffers. | |||
|
65 | ||||
|
66 | def patches(a, bins): | |||
|
67 | if not bins: return a | |||
|
68 | ||||
|
69 | plens = [len(x) for x in bins] | |||
|
70 | pl = sum(plens) | |||
|
71 | bl = len(a) + pl | |||
|
72 | tl = bl + bl + pl # enough for the patches and two working texts | |||
|
73 | b1, b2 = 0, bl | |||
|
74 | m = mmap.mmap(devzero.fileno(), tl, mmap.MAP_PRIVATE) | |||
|
75 | ||||
|
76 | # load our original text | |||
|
77 | m.write(a) | |||
|
78 | frags = [(len(a), b1)] | |||
|
79 | ||||
|
80 | # copy all the patches into our segment so we can memmove from them | |||
|
81 | pos = b2 + bl | |||
|
82 | m.seek(pos) | |||
|
83 | for p in bins: m.write(p) | |||
|
84 | ||||
|
85 | def pull(dst, src, l): # pull l bytes from src | |||
|
86 | while l: | |||
|
87 | f = src.pop(0) | |||
|
88 | if f[0] > l: # do we need to split? | |||
|
89 | src.insert(0, (f[0] - l, f[1] + l)) | |||
|
90 | dst.append((l, f[1])) | |||
|
91 | return | |||
|
92 | dst.append(f) | |||
|
93 | l -= f[0] | |||
|
94 | ||||
|
95 | def collect(buf, list): | |||
|
96 | start = buf | |||
|
97 | for l, p in list: | |||
|
98 | m.move(buf, p, l) | |||
|
99 | buf += l | |||
|
100 | return (buf - start, start) | |||
|
101 | ||||
|
102 | for plen in plens: | |||
|
103 | # if our list gets too long, execute it | |||
|
104 | if len(frags) > 128: | |||
|
105 | b2, b1 = b1, b2 | |||
|
106 | frags = [collect(b1, frags)] | |||
|
107 | ||||
|
108 | new = [] | |||
|
109 | end = pos + plen | |||
|
110 | last = 0 | |||
|
111 | while pos < end: | |||
|
112 | p1, p2, l = struct.unpack(">lll", m[pos:pos + 12]) | |||
|
113 | pull(new, frags, p1 - last) # what didn't change | |||
|
114 | pull([], frags, p2 - p1) # what got deleted | |||
|
115 | new.append((l, pos + 12)) # what got added | |||
|
116 | pos += l + 12 | |||
|
117 | last = p2 | |||
|
118 | frags = new + frags # what was left at the end | |||
|
119 | ||||
|
120 | t = collect(b2, frags) | |||
|
121 | ||||
|
122 | return m[t[1]:t[1] + t[0]] | |||
|
123 | ||||
55 | def patch(a, bin): |
|
124 | def patch(a, bin): | |
56 | last = pos = 0 |
|
125 | last = pos = 0 | |
57 | r = [] |
|
126 | r = [] | |
58 |
|
127 | |||
59 | c = 0 |
|
128 | c = 0 | |
60 | while pos < len(bin): |
|
129 | while pos < len(bin): | |
61 | p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12]) |
|
130 | p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12]) | |
62 | pos += 12 |
|
131 | pos += 12 | |
63 | r.append(a[last:p1]) |
|
132 | r.append(a[last:p1]) | |
64 | r.append(bin[pos:pos + l]) |
|
133 | r.append(bin[pos:pos + l]) | |
65 | pos += l |
|
134 | pos += l | |
66 | last = p2 |
|
135 | last = p2 | |
67 | c += 1 |
|
136 | c += 1 | |
68 | r.append(a[last:]) |
|
137 | r.append(a[last:]) | |
69 |
|
138 | |||
70 | return "".join(r) |
|
139 | return "".join(r) | |
71 |
|
140 | |||
72 |
|
141 | |||
73 |
|
142 | |||
74 |
|
143 | |||
75 |
|
144 |
@@ -1,412 +1,409 b'' | |||||
1 | # revlog.py - storage back-end for mercurial |
|
1 | # revlog.py - storage back-end for mercurial | |
2 | # |
|
2 | # | |
3 | # This provides efficient delta storage with O(1) retrieve and append |
|
3 | # This provides efficient delta storage with O(1) retrieve and append | |
4 | # and O(changes) merge between branches |
|
4 | # and O(changes) merge between branches | |
5 | # |
|
5 | # | |
6 | # Copyright 2005 Matt Mackall <mpm@selenic.com> |
|
6 | # Copyright 2005 Matt Mackall <mpm@selenic.com> | |
7 | # |
|
7 | # | |
8 | # This software may be used and distributed according to the terms |
|
8 | # This software may be used and distributed according to the terms | |
9 | # of the GNU General Public License, incorporated herein by reference. |
|
9 | # of the GNU General Public License, incorporated herein by reference. | |
10 |
|
10 | |||
11 | import zlib, struct, sha, os, tempfile, binascii |
|
11 | import zlib, struct, sha, os, tempfile, binascii | |
12 | from mercurial import mdiff |
|
12 | from mercurial import mdiff | |
13 |
|
13 | |||
14 | def hex(node): return binascii.hexlify(node) |
|
14 | def hex(node): return binascii.hexlify(node) | |
15 | def bin(node): return binascii.unhexlify(node) |
|
15 | def bin(node): return binascii.unhexlify(node) | |
16 |
|
16 | |||
17 | def compress(text): |
|
17 | def compress(text): | |
18 | return zlib.compress(text) |
|
18 | return zlib.compress(text) | |
19 |
|
19 | |||
20 | def decompress(bin): |
|
20 | def decompress(bin): | |
21 | return zlib.decompress(bin) |
|
21 | return zlib.decompress(bin) | |
22 |
|
22 | |||
23 | def hash(text, p1, p2): |
|
23 | def hash(text, p1, p2): | |
24 | l = [p1, p2] |
|
24 | l = [p1, p2] | |
25 | l.sort() |
|
25 | l.sort() | |
26 | return sha.sha(l[0] + l[1] + text).digest() |
|
26 | return sha.sha(l[0] + l[1] + text).digest() | |
27 |
|
27 | |||
28 | nullid = "\0" * 20 |
|
28 | nullid = "\0" * 20 | |
29 | indexformat = ">4l20s20s20s" |
|
29 | indexformat = ">4l20s20s20s" | |
30 |
|
30 | |||
31 | class revlog: |
|
31 | class revlog: | |
32 | def __init__(self, opener, indexfile, datafile): |
|
32 | def __init__(self, opener, indexfile, datafile): | |
33 | self.indexfile = indexfile |
|
33 | self.indexfile = indexfile | |
34 | self.datafile = datafile |
|
34 | self.datafile = datafile | |
35 | self.index = [] |
|
35 | self.index = [] | |
36 | self.opener = opener |
|
36 | self.opener = opener | |
37 | self.cache = None |
|
37 | self.cache = None | |
38 | self.nodemap = {nullid: -1} |
|
38 | self.nodemap = {nullid: -1} | |
39 | # read the whole index for now, handle on-demand later |
|
39 | # read the whole index for now, handle on-demand later | |
40 | try: |
|
40 | try: | |
41 | n = 0 |
|
41 | n = 0 | |
42 | i = self.opener(self.indexfile).read() |
|
42 | i = self.opener(self.indexfile).read() | |
43 | s = struct.calcsize(indexformat) |
|
43 | s = struct.calcsize(indexformat) | |
44 | for f in range(0, len(i), s): |
|
44 | for f in xrange(0, len(i), s): | |
45 | # offset, size, base, linkrev, p1, p2, nodeid |
|
45 | # offset, size, base, linkrev, p1, p2, nodeid | |
46 | e = struct.unpack(indexformat, i[f:f + s]) |
|
46 | e = struct.unpack(indexformat, i[f:f + s]) | |
47 | self.nodemap[e[6]] = n |
|
47 | self.nodemap[e[6]] = n | |
48 | self.index.append(e) |
|
48 | self.index.append(e) | |
49 | n += 1 |
|
49 | n += 1 | |
50 | except IOError: pass |
|
50 | except IOError: pass | |
51 |
|
51 | |||
52 | def tip(self): return self.node(len(self.index) - 1) |
|
52 | def tip(self): return self.node(len(self.index) - 1) | |
53 | def count(self): return len(self.index) |
|
53 | def count(self): return len(self.index) | |
54 | def node(self, rev): return (rev < 0) and nullid or self.index[rev][6] |
|
54 | def node(self, rev): return (rev < 0) and nullid or self.index[rev][6] | |
55 | def rev(self, node): return self.nodemap[node] |
|
55 | def rev(self, node): return self.nodemap[node] | |
56 | def linkrev(self, node): return self.index[self.nodemap[node]][3] |
|
56 | def linkrev(self, node): return self.index[self.nodemap[node]][3] | |
57 | def parents(self, node): |
|
57 | def parents(self, node): | |
58 | if node == nullid: return (nullid, nullid) |
|
58 | if node == nullid: return (nullid, nullid) | |
59 | return self.index[self.nodemap[node]][4:6] |
|
59 | return self.index[self.nodemap[node]][4:6] | |
60 |
|
60 | |||
61 | def start(self, rev): return self.index[rev][0] |
|
61 | def start(self, rev): return self.index[rev][0] | |
62 | def length(self, rev): return self.index[rev][1] |
|
62 | def length(self, rev): return self.index[rev][1] | |
63 | def end(self, rev): return self.start(rev) + self.length(rev) |
|
63 | def end(self, rev): return self.start(rev) + self.length(rev) | |
64 | def base(self, rev): return self.index[rev][2] |
|
64 | def base(self, rev): return self.index[rev][2] | |
65 |
|
65 | |||
66 | def lookup(self, id): |
|
66 | def lookup(self, id): | |
67 | try: |
|
67 | try: | |
68 | rev = int(id) |
|
68 | rev = int(id) | |
69 | return self.node(rev) |
|
69 | return self.node(rev) | |
70 | except ValueError: |
|
70 | except ValueError: | |
71 | c = [] |
|
71 | c = [] | |
72 | for n in self.nodemap: |
|
72 | for n in self.nodemap: | |
73 | if id in hex(n): |
|
73 | if id in hex(n): | |
74 | c.append(n) |
|
74 | c.append(n) | |
75 | if len(c) > 1: raise KeyError("Ambiguous identifier") |
|
75 | if len(c) > 1: raise KeyError("Ambiguous identifier") | |
76 | if len(c) < 1: raise KeyError("No match found") |
|
76 | if len(c) < 1: raise KeyError("No match found") | |
77 | return c[0] |
|
77 | return c[0] | |
78 |
|
78 | |||
79 | return None |
|
79 | return None | |
80 |
|
80 | |||
81 | def revisions(self, list): |
|
81 | def revisions(self, list): | |
82 | # this can be optimized to do spans, etc |
|
82 | # this can be optimized to do spans, etc | |
83 | # be stupid for now |
|
83 | # be stupid for now | |
84 | for node in list: |
|
84 | for node in list: | |
85 | yield self.revision(node) |
|
85 | yield self.revision(node) | |
86 |
|
86 | |||
87 | def diff(self, a, b): |
|
87 | def diff(self, a, b): | |
88 | return mdiff.textdiff(a, b) |
|
88 | return mdiff.textdiff(a, b) | |
89 |
|
89 | |||
90 | def patch(self, text, patch): |
|
|||
91 | return mdiff.patch(text, patch) |
|
|||
92 |
|
||||
93 | def revision(self, node): |
|
90 | def revision(self, node): | |
94 | if node == nullid: return "" |
|
91 | if node == nullid: return "" | |
95 | if self.cache and self.cache[0] == node: return self.cache[2] |
|
92 | if self.cache and self.cache[0] == node: return self.cache[2] | |
96 |
|
93 | |||
97 | text = None |
|
94 | text = None | |
98 | rev = self.rev(node) |
|
95 | rev = self.rev(node) | |
99 | base = self.base(rev) |
|
96 | base = self.base(rev) | |
100 | start = self.start(base) |
|
97 | start = self.start(base) | |
101 | end = self.end(rev) |
|
98 | end = self.end(rev) | |
102 |
|
99 | |||
103 | if self.cache and self.cache[1] >= base and self.cache[1] < rev: |
|
100 | if self.cache and self.cache[1] >= base and self.cache[1] < rev: | |
104 | base = self.cache[1] |
|
101 | base = self.cache[1] | |
105 | start = self.start(base + 1) |
|
102 | start = self.start(base + 1) | |
106 | text = self.cache[2] |
|
103 | text = self.cache[2] | |
107 | last = 0 |
|
104 | last = 0 | |
108 |
|
105 | |||
109 | f = self.opener(self.datafile) |
|
106 | f = self.opener(self.datafile) | |
110 | f.seek(start) |
|
107 | f.seek(start) | |
111 | data = f.read(end - start) |
|
108 | data = f.read(end - start) | |
112 |
|
109 | |||
113 | if not text: |
|
110 | if not text: | |
114 | last = self.length(base) |
|
111 | last = self.length(base) | |
115 | text = decompress(data[:last]) |
|
112 | text = decompress(data[:last]) | |
116 |
|
113 | |||
|
114 | bins = [] | |||
117 | for r in xrange(base + 1, rev + 1): |
|
115 | for r in xrange(base + 1, rev + 1): | |
118 | s = self.length(r) |
|
116 | s = self.length(r) | |
119 |
b |
|
117 | bins.append(decompress(data[last:last + s])) | |
120 | text = self.patch(text, b) |
|
|||
121 | last = last + s |
|
118 | last = last + s | |
122 |
|
119 | |||
|
120 | text = mdiff.patches(text, bins) | |||
|
121 | ||||
123 | (p1, p2) = self.parents(node) |
|
122 | (p1, p2) = self.parents(node) | |
124 | if node != hash(text, p1, p2): |
|
123 | if node != hash(text, p1, p2): | |
125 | raise "integrity check failed on %s:%d" % (self.datafile, rev) |
|
124 | raise "integrity check failed on %s:%d" % (self.datafile, rev) | |
126 |
|
125 | |||
127 | self.cache = (node, rev, text) |
|
126 | self.cache = (node, rev, text) | |
128 | return text |
|
127 | return text | |
129 |
|
128 | |||
130 | def addrevision(self, text, transaction, link, p1=None, p2=None): |
|
129 | def addrevision(self, text, transaction, link, p1=None, p2=None): | |
131 | if text is None: text = "" |
|
130 | if text is None: text = "" | |
132 | if p1 is None: p1 = self.tip() |
|
131 | if p1 is None: p1 = self.tip() | |
133 | if p2 is None: p2 = nullid |
|
132 | if p2 is None: p2 = nullid | |
134 |
|
133 | |||
135 | node = hash(text, p1, p2) |
|
134 | node = hash(text, p1, p2) | |
136 |
|
135 | |||
137 | n = self.count() |
|
136 | n = self.count() | |
138 | t = n - 1 |
|
137 | t = n - 1 | |
139 |
|
138 | |||
140 | if n: |
|
139 | if n: | |
141 | base = self.base(t) |
|
140 | base = self.base(t) | |
142 | start = self.start(base) |
|
141 | start = self.start(base) | |
143 | end = self.end(t) |
|
142 | end = self.end(t) | |
144 | prev = self.revision(self.tip()) |
|
143 | prev = self.revision(self.tip()) | |
145 | data = compress(self.diff(prev, text)) |
|
144 | data = compress(self.diff(prev, text)) | |
146 | dist = end - start + len(data) |
|
145 | dist = end - start + len(data) | |
147 |
|
146 | |||
148 | # full versions are inserted when the needed deltas |
|
147 | # full versions are inserted when the needed deltas | |
149 | # become comparable to the uncompressed text |
|
148 | # become comparable to the uncompressed text | |
150 | if not n or dist > len(text) * 2: |
|
149 | if not n or dist > len(text) * 2: | |
151 | data = compress(text) |
|
150 | data = compress(text) | |
152 | base = n |
|
151 | base = n | |
153 | else: |
|
152 | else: | |
154 | base = self.base(t) |
|
153 | base = self.base(t) | |
155 |
|
154 | |||
156 | offset = 0 |
|
155 | offset = 0 | |
157 | if t >= 0: |
|
156 | if t >= 0: | |
158 | offset = self.end(t) |
|
157 | offset = self.end(t) | |
159 |
|
158 | |||
160 | e = (offset, len(data), base, link, p1, p2, node) |
|
159 | e = (offset, len(data), base, link, p1, p2, node) | |
161 |
|
160 | |||
162 | self.index.append(e) |
|
161 | self.index.append(e) | |
163 | self.nodemap[node] = n |
|
162 | self.nodemap[node] = n | |
164 | entry = struct.pack(indexformat, *e) |
|
163 | entry = struct.pack(indexformat, *e) | |
165 |
|
164 | |||
166 | transaction.add(self.datafile, e[0]) |
|
165 | transaction.add(self.datafile, e[0]) | |
167 | self.opener(self.datafile, "a").write(data) |
|
166 | self.opener(self.datafile, "a").write(data) | |
168 | transaction.add(self.indexfile, n * len(entry)) |
|
167 | transaction.add(self.indexfile, n * len(entry)) | |
169 | self.opener(self.indexfile, "a").write(entry) |
|
168 | self.opener(self.indexfile, "a").write(entry) | |
170 |
|
169 | |||
171 | self.cache = (node, n, text) |
|
170 | self.cache = (node, n, text) | |
172 | return node |
|
171 | return node | |
173 |
|
172 | |||
174 | def ancestor(self, a, b): |
|
173 | def ancestor(self, a, b): | |
175 | def expand(list, map): |
|
174 | def expand(list, map): | |
176 | a = [] |
|
175 | a = [] | |
177 | while list: |
|
176 | while list: | |
178 | n = list.pop(0) |
|
177 | n = list.pop(0) | |
179 | map[n] = 1 |
|
178 | map[n] = 1 | |
180 | yield n |
|
179 | yield n | |
181 | for p in self.parents(n): |
|
180 | for p in self.parents(n): | |
182 | if p != nullid and p not in map: |
|
181 | if p != nullid and p not in map: | |
183 | list.append(p) |
|
182 | list.append(p) | |
184 | yield nullid |
|
183 | yield nullid | |
185 |
|
184 | |||
186 | amap = {} |
|
185 | amap = {} | |
187 | bmap = {} |
|
186 | bmap = {} | |
188 | ag = expand([a], amap) |
|
187 | ag = expand([a], amap) | |
189 | bg = expand([b], bmap) |
|
188 | bg = expand([b], bmap) | |
190 | adone = bdone = 0 |
|
189 | adone = bdone = 0 | |
191 |
|
190 | |||
192 | while not adone or not bdone: |
|
191 | while not adone or not bdone: | |
193 | if not adone: |
|
192 | if not adone: | |
194 | an = ag.next() |
|
193 | an = ag.next() | |
195 | if an == nullid: |
|
194 | if an == nullid: | |
196 | adone = 1 |
|
195 | adone = 1 | |
197 | elif an in bmap: |
|
196 | elif an in bmap: | |
198 | return an |
|
197 | return an | |
199 | if not bdone: |
|
198 | if not bdone: | |
200 | bn = bg.next() |
|
199 | bn = bg.next() | |
201 | if bn == nullid: |
|
200 | if bn == nullid: | |
202 | bdone = 1 |
|
201 | bdone = 1 | |
203 | elif bn in amap: |
|
202 | elif bn in amap: | |
204 | return bn |
|
203 | return bn | |
205 |
|
204 | |||
206 | return nullid |
|
205 | return nullid | |
207 |
|
206 | |||
208 | def mergedag(self, other, transaction, linkseq, accumulate = None): |
|
207 | def mergedag(self, other, transaction, linkseq, accumulate = None): | |
209 | """combine the nodes from other's DAG into ours""" |
|
208 | """combine the nodes from other's DAG into ours""" | |
210 | old = self.tip() |
|
209 | old = self.tip() | |
211 | i = self.count() |
|
210 | i = self.count() | |
212 | l = [] |
|
211 | l = [] | |
213 |
|
212 | |||
214 | # merge the other revision log into our DAG |
|
213 | # merge the other revision log into our DAG | |
215 | for r in range(other.count()): |
|
214 | for r in range(other.count()): | |
216 | id = other.node(r) |
|
215 | id = other.node(r) | |
217 | if id not in self.nodemap: |
|
216 | if id not in self.nodemap: | |
218 | (xn, yn) = other.parents(id) |
|
217 | (xn, yn) = other.parents(id) | |
219 | l.append((id, xn, yn)) |
|
218 | l.append((id, xn, yn)) | |
220 | self.nodemap[id] = i |
|
219 | self.nodemap[id] = i | |
221 | i += 1 |
|
220 | i += 1 | |
222 |
|
221 | |||
223 | # merge node date for new nodes |
|
222 | # merge node date for new nodes | |
224 | r = other.revisions([e[0] for e in l]) |
|
223 | r = other.revisions([e[0] for e in l]) | |
225 | for e in l: |
|
224 | for e in l: | |
226 | t = r.next() |
|
225 | t = r.next() | |
227 | if accumulate: accumulate(t) |
|
226 | if accumulate: accumulate(t) | |
228 | self.addrevision(t, transaction, linkseq.next(), e[1], e[2]) |
|
227 | self.addrevision(t, transaction, linkseq.next(), e[1], e[2]) | |
229 |
|
228 | |||
230 | # return the unmerged heads for later resolving |
|
229 | # return the unmerged heads for later resolving | |
231 | return (old, self.tip()) |
|
230 | return (old, self.tip()) | |
232 |
|
231 | |||
233 | def group(self, linkmap): |
|
232 | def group(self, linkmap): | |
234 | # given a list of changeset revs, return a set of deltas and |
|
233 | # given a list of changeset revs, return a set of deltas and | |
235 | # metadata corresponding to nodes the first delta is |
|
234 | # metadata corresponding to nodes the first delta is | |
236 | # parent(nodes[0]) -> nodes[0] the receiver is guaranteed to |
|
235 | # parent(nodes[0]) -> nodes[0] the receiver is guaranteed to | |
237 | # have this parent as it has all history before these |
|
236 | # have this parent as it has all history before these | |
238 | # changesets. parent is parent[0] |
|
237 | # changesets. parent is parent[0] | |
239 |
|
238 | |||
240 | revs = [] |
|
239 | revs = [] | |
241 | needed = {} |
|
240 | needed = {} | |
242 |
|
241 | |||
243 | # find file nodes/revs that match changeset revs |
|
242 | # find file nodes/revs that match changeset revs | |
244 | for i in xrange(0, self.count()): |
|
243 | for i in xrange(0, self.count()): | |
245 | if self.index[i][3] in linkmap: |
|
244 | if self.index[i][3] in linkmap: | |
246 | revs.append(i) |
|
245 | revs.append(i) | |
247 | needed[i] = 1 |
|
246 | needed[i] = 1 | |
248 |
|
247 | |||
249 | # if we don't have any revisions touched by these changesets, bail |
|
248 | # if we don't have any revisions touched by these changesets, bail | |
250 | if not revs: return struct.pack(">l", 0) |
|
249 | if not revs: return struct.pack(">l", 0) | |
251 |
|
250 | |||
252 | # add the parent of the first rev |
|
251 | # add the parent of the first rev | |
253 | p = self.parents(self.node(revs[0]))[0] |
|
252 | p = self.parents(self.node(revs[0]))[0] | |
254 | revs.insert(0, self.rev(p)) |
|
253 | revs.insert(0, self.rev(p)) | |
255 |
|
254 | |||
256 | # for each delta that isn't contiguous in the log, we need to |
|
255 | # for each delta that isn't contiguous in the log, we need to | |
257 | # reconstruct the base, reconstruct the result, and then |
|
256 | # reconstruct the base, reconstruct the result, and then | |
258 | # calculate the delta. We also need to do this where we've |
|
257 | # calculate the delta. We also need to do this where we've | |
259 | # stored a full version and not a delta |
|
258 | # stored a full version and not a delta | |
260 | for i in xrange(0, len(revs) - 1): |
|
259 | for i in xrange(0, len(revs) - 1): | |
261 | a, b = revs[i], revs[i + 1] |
|
260 | a, b = revs[i], revs[i + 1] | |
262 | if a + 1 != b or self.base(b) == b: |
|
261 | if a + 1 != b or self.base(b) == b: | |
263 | for j in xrange(self.base(a), a + 1): |
|
262 | for j in xrange(self.base(a), a + 1): | |
264 | needed[j] = 1 |
|
263 | needed[j] = 1 | |
265 | for j in xrange(self.base(b), b + 1): |
|
264 | for j in xrange(self.base(b), b + 1): | |
266 | needed[j] = 1 |
|
265 | needed[j] = 1 | |
267 |
|
266 | |||
268 | # calculate spans to retrieve from datafile |
|
267 | # calculate spans to retrieve from datafile | |
269 | needed = needed.keys() |
|
268 | needed = needed.keys() | |
270 | needed.sort() |
|
269 | needed.sort() | |
271 | spans = [] |
|
270 | spans = [] | |
272 | for n in needed: |
|
271 | for n in needed: | |
273 | if n < 0: continue |
|
272 | if n < 0: continue | |
274 | o = self.start(n) |
|
273 | o = self.start(n) | |
275 | l = self.length(n) |
|
274 | l = self.length(n) | |
276 | spans.append((o, l, [(n, l)])) |
|
275 | spans.append((o, l, [(n, l)])) | |
277 |
|
276 | |||
278 | # merge spans |
|
277 | # merge spans | |
279 | merge = [spans.pop(0)] |
|
278 | merge = [spans.pop(0)] | |
280 | while spans: |
|
279 | while spans: | |
281 | e = spans.pop(0) |
|
280 | e = spans.pop(0) | |
282 | f = merge[-1] |
|
281 | f = merge[-1] | |
283 | if e[0] == f[0] + f[1]: |
|
282 | if e[0] == f[0] + f[1]: | |
284 | merge[-1] = (f[0], f[1] + e[1], f[2] + e[2]) |
|
283 | merge[-1] = (f[0], f[1] + e[1], f[2] + e[2]) | |
285 | else: |
|
284 | else: | |
286 | merge.append(e) |
|
285 | merge.append(e) | |
287 |
|
286 | |||
288 | # read spans in, divide up chunks |
|
287 | # read spans in, divide up chunks | |
289 | chunks = {} |
|
288 | chunks = {} | |
290 | for span in merge: |
|
289 | for span in merge: | |
291 | # we reopen the file for each span to make http happy for now |
|
290 | # we reopen the file for each span to make http happy for now | |
292 | f = self.opener(self.datafile) |
|
291 | f = self.opener(self.datafile) | |
293 | f.seek(span[0]) |
|
292 | f.seek(span[0]) | |
294 | data = f.read(span[1]) |
|
293 | data = f.read(span[1]) | |
295 |
|
294 | |||
296 | # divide up the span |
|
295 | # divide up the span | |
297 | pos = 0 |
|
296 | pos = 0 | |
298 | for r, l in span[2]: |
|
297 | for r, l in span[2]: | |
299 | chunks[r] = data[pos: pos + l] |
|
298 | chunks[r] = data[pos: pos + l] | |
300 | pos += l |
|
299 | pos += l | |
301 |
|
300 | |||
302 | # helper to reconstruct intermediate versions |
|
301 | # helper to reconstruct intermediate versions | |
303 | def construct(text, base, rev): |
|
302 | def construct(text, base, rev): | |
304 |
for r in range(base + 1, rev + 1) |
|
303 | bins = [decompress(chunks[r]) for r in xrange(base + 1, rev + 1)] | |
305 | b = decompress(chunks[r]) |
|
304 | return mdiff.patches(text, bins) | |
306 | text = self.patch(text, b) |
|
|||
307 | return text |
|
|||
308 |
|
305 | |||
309 | # build deltas |
|
306 | # build deltas | |
310 | deltas = [] |
|
307 | deltas = [] | |
311 | for d in range(0, len(revs) - 1): |
|
308 | for d in xrange(0, len(revs) - 1): | |
312 | a, b = revs[d], revs[d + 1] |
|
309 | a, b = revs[d], revs[d + 1] | |
313 | n = self.node(b) |
|
310 | n = self.node(b) | |
314 |
|
311 | |||
315 | if a + 1 != b or self.base(b) == b: |
|
312 | if a + 1 != b or self.base(b) == b: | |
316 | if a >= 0: |
|
313 | if a >= 0: | |
317 | base = self.base(a) |
|
314 | base = self.base(a) | |
318 | ta = decompress(chunks[self.base(a)]) |
|
315 | ta = decompress(chunks[self.base(a)]) | |
319 | ta = construct(ta, base, a) |
|
316 | ta = construct(ta, base, a) | |
320 | else: |
|
317 | else: | |
321 | ta = "" |
|
318 | ta = "" | |
322 |
|
319 | |||
323 | base = self.base(b) |
|
320 | base = self.base(b) | |
324 | if a > base: |
|
321 | if a > base: | |
325 | base = a |
|
322 | base = a | |
326 | tb = ta |
|
323 | tb = ta | |
327 | else: |
|
324 | else: | |
328 | tb = decompress(chunks[self.base(b)]) |
|
325 | tb = decompress(chunks[self.base(b)]) | |
329 | tb = construct(tb, base, b) |
|
326 | tb = construct(tb, base, b) | |
330 | d = self.diff(ta, tb) |
|
327 | d = self.diff(ta, tb) | |
331 | else: |
|
328 | else: | |
332 | d = decompress(chunks[b]) |
|
329 | d = decompress(chunks[b]) | |
333 |
|
330 | |||
334 | p = self.parents(n) |
|
331 | p = self.parents(n) | |
335 | meta = n + p[0] + p[1] + linkmap[self.linkrev(n)] |
|
332 | meta = n + p[0] + p[1] + linkmap[self.linkrev(n)] | |
336 | l = struct.pack(">l", len(meta) + len(d) + 4) |
|
333 | l = struct.pack(">l", len(meta) + len(d) + 4) | |
337 | deltas.append(l + meta + d) |
|
334 | deltas.append(l + meta + d) | |
338 |
|
335 | |||
339 | l = struct.pack(">l", sum(map(len, deltas)) + 4) |
|
336 | l = struct.pack(">l", sum(map(len, deltas)) + 4) | |
340 | deltas.insert(0, l) |
|
337 | deltas.insert(0, l) | |
341 | return "".join(deltas) |
|
338 | return "".join(deltas) | |
342 |
|
339 | |||
343 | def addgroup(self, data, linkmapper, transaction): |
|
340 | def addgroup(self, data, linkmapper, transaction): | |
344 | # given a set of deltas, add them to the revision log. the |
|
341 | # given a set of deltas, add them to the revision log. the | |
345 | # first delta is against its parent, which should be in our |
|
342 | # first delta is against its parent, which should be in our | |
346 | # log, the rest are against the previous delta. |
|
343 | # log, the rest are against the previous delta. | |
347 |
|
344 | |||
348 | if not data: return self.tip() |
|
345 | if not data: return self.tip() | |
349 |
|
346 | |||
350 | # retrieve the parent revision of the delta chain |
|
347 | # retrieve the parent revision of the delta chain | |
351 | chain = data[24:44] |
|
348 | chain = data[24:44] | |
352 |
|
349 | |||
353 | # track the base of the current delta log |
|
350 | # track the base of the current delta log | |
354 | r = self.count() |
|
351 | r = self.count() | |
355 | t = r - 1 |
|
352 | t = r - 1 | |
356 |
|
353 | |||
357 | base = prev = -1 |
|
354 | base = prev = -1 | |
358 | start = end = 0 |
|
355 | start = end = 0 | |
359 | if r: |
|
356 | if r: | |
360 | start = self.start(self.base(t)) |
|
357 | start = self.start(self.base(t)) | |
361 | end = self.end(t) |
|
358 | end = self.end(t) | |
362 | measure = self.length(self.base(t)) |
|
359 | measure = self.length(self.base(t)) | |
363 | base = self.base(t) |
|
360 | base = self.base(t) | |
364 | prev = self.tip() |
|
361 | prev = self.tip() | |
365 |
|
362 | |||
366 | transaction.add(self.datafile, end) |
|
363 | transaction.add(self.datafile, end) | |
367 | transaction.add(self.indexfile, r * struct.calcsize(indexformat)) |
|
364 | transaction.add(self.indexfile, r * struct.calcsize(indexformat)) | |
368 | dfh = self.opener(self.datafile, "a") |
|
365 | dfh = self.opener(self.datafile, "a") | |
369 | ifh = self.opener(self.indexfile, "a") |
|
366 | ifh = self.opener(self.indexfile, "a") | |
370 |
|
367 | |||
371 | # loop through our set of deltas |
|
368 | # loop through our set of deltas | |
372 | pos = 0 |
|
369 | pos = 0 | |
373 | while pos < len(data): |
|
370 | while pos < len(data): | |
374 | l, node, p1, p2, cs = struct.unpack(">l20s20s20s20s", |
|
371 | l, node, p1, p2, cs = struct.unpack(">l20s20s20s20s", | |
375 | data[pos:pos+84]) |
|
372 | data[pos:pos+84]) | |
376 | link = linkmapper(cs) |
|
373 | link = linkmapper(cs) | |
377 | delta = data[pos + 84:pos + l] |
|
374 | delta = data[pos + 84:pos + l] | |
378 | pos += l |
|
375 | pos += l | |
379 |
|
376 | |||
380 | # full versions are inserted when the needed deltas become |
|
377 | # full versions are inserted when the needed deltas become | |
381 | # comparable to the uncompressed text or when the previous |
|
378 | # comparable to the uncompressed text or when the previous | |
382 | # version is not the one we have a delta against. We use |
|
379 | # version is not the one we have a delta against. We use | |
383 | # the size of the previous full rev as a proxy for the |
|
380 | # the size of the previous full rev as a proxy for the | |
384 | # current size. |
|
381 | # current size. | |
385 |
|
382 | |||
386 | if chain == prev: |
|
383 | if chain == prev: | |
387 | cdelta = compress(delta) |
|
384 | cdelta = compress(delta) | |
388 |
|
385 | |||
389 | if chain != prev or (end - start + len(cdelta)) > measure * 2: |
|
386 | if chain != prev or (end - start + len(cdelta)) > measure * 2: | |
390 | # flush our writes here so we can read it in revision |
|
387 | # flush our writes here so we can read it in revision | |
391 | dfh.flush() |
|
388 | dfh.flush() | |
392 | ifh.flush() |
|
389 | ifh.flush() | |
393 | text = self.revision(chain) |
|
390 | text = self.revision(chain) | |
394 | text = self.patch(text, delta) |
|
391 | text = self.patch(text, delta) | |
395 | chk = self.addrevision(text, transaction, link, p1, p2) |
|
392 | chk = self.addrevision(text, transaction, link, p1, p2) | |
396 | if chk != node: |
|
393 | if chk != node: | |
397 | raise "consistency error adding group" |
|
394 | raise "consistency error adding group" | |
398 | measure = len(text) |
|
395 | measure = len(text) | |
399 | else: |
|
396 | else: | |
400 | e = (end, len(cdelta), self.base(t), link, p1, p2, node) |
|
397 | e = (end, len(cdelta), self.base(t), link, p1, p2, node) | |
401 | self.index.append(e) |
|
398 | self.index.append(e) | |
402 | self.nodemap[node] = r |
|
399 | self.nodemap[node] = r | |
403 | dfh.write(cdelta) |
|
400 | dfh.write(cdelta) | |
404 | ifh.write(struct.pack(indexformat, *e)) |
|
401 | ifh.write(struct.pack(indexformat, *e)) | |
405 |
|
402 | |||
406 | t, r, chain, prev = r, r + 1, node, node |
|
403 | t, r, chain, prev = r, r + 1, node, node | |
407 | start = self.start(self.base(t)) |
|
404 | start = self.start(self.base(t)) | |
408 | end = self.end(t) |
|
405 | end = self.end(t) | |
409 |
|
406 | |||
410 | dfh.close() |
|
407 | dfh.close() | |
411 | ifh.close() |
|
408 | ifh.close() | |
412 | return node |
|
409 | return node |
General Comments 0
You need to be logged in to leave comments.
Login now