##// END OF EJS Templates
Simplify integrity checking...
mpm@selenic.com -
r26:9cf83bf9 default
parent child Browse files
Show More
@@ -1,201 +1,193 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # This provides efficient delta storage with O(1) retrieve and append
3 # This provides efficient delta storage with O(1) retrieve and append
4 # and O(changes) merge between branches
4 # and O(changes) merge between branches
5 #
5 #
6 # Copyright 2005 Matt Mackall <mpm@selenic.com>
6 # Copyright 2005 Matt Mackall <mpm@selenic.com>
7 #
7 #
8 # This software may be used and distributed according to the terms
8 # This software may be used and distributed according to the terms
9 # of the GNU General Public License, incorporated herein by reference.
9 # of the GNU General Public License, incorporated herein by reference.
10
10
11 import zlib, struct, sha, os, tempfile
11 import zlib, struct, sha, os, tempfile
12 from mercurial import mdiff
12 from mercurial import mdiff
13
13
14 def compress(text):
14 def compress(text):
15 return zlib.compress(text)
15 return zlib.compress(text)
16
16
17 def decompress(bin):
17 def decompress(bin):
18 return zlib.decompress(bin)
18 return zlib.decompress(bin)
19
19
20 def hash(text, p1, p2):
20 def hash(text, p1, p2):
21 l = [p1, p2]
21 l = [p1, p2]
22 l.sort()
22 l.sort()
23 return sha.sha(l[0] + l[1] + text).digest()
23 return sha.sha(l[0] + l[1] + text).digest()
24
24
25 nullid = "\0" * 20
25 nullid = "\0" * 20
26 indexformat = ">4l20s20s20s"
26 indexformat = ">4l20s20s20s"
27
27
28 class revlog:
28 class revlog:
29 def __init__(self, opener, indexfile, datafile):
29 def __init__(self, opener, indexfile, datafile):
30 self.indexfile = indexfile
30 self.indexfile = indexfile
31 self.datafile = datafile
31 self.datafile = datafile
32 self.index = []
32 self.index = []
33 self.opener = opener
33 self.opener = opener
34 self.cache = None
34 self.cache = None
35 self.nodemap = { -1: nullid, nullid: -1 }
35 self.nodemap = { -1: nullid, nullid: -1 }
36 # read the whole index for now, handle on-demand later
36 # read the whole index for now, handle on-demand later
37 try:
37 try:
38 n = 0
38 n = 0
39 i = self.opener(self.indexfile).read()
39 i = self.opener(self.indexfile).read()
40 s = struct.calcsize(indexformat)
40 s = struct.calcsize(indexformat)
41 for f in range(0, len(i), s):
41 for f in range(0, len(i), s):
42 # offset, size, base, linkrev, p1, p2, nodeid, changeset
42 # offset, size, base, linkrev, p1, p2, nodeid
43 e = struct.unpack(indexformat, i[f:f + s])
43 e = struct.unpack(indexformat, i[f:f + s])
44 self.nodemap[e[6]] = n
44 self.nodemap[e[6]] = n
45 self.index.append(e)
45 self.index.append(e)
46 n += 1
46 n += 1
47 except IOError: pass
47 except IOError: pass
48
48
49 def tip(self): return self.node(len(self.index) - 1)
49 def tip(self): return self.node(len(self.index) - 1)
50 def count(self): return len(self.index)
50 def count(self): return len(self.index)
51 def node(self, rev): return rev < 0 and nullid or self.index[rev][6]
51 def node(self, rev): return (rev < 0) and nullid or self.index[rev][6]
52 def rev(self, node): return self.nodemap[node]
52 def rev(self, node): return self.nodemap[node]
53 def linkrev(self, node): return self.index[self.nodemap[node]][3]
53 def linkrev(self, node): return self.index[self.nodemap[node]][3]
54 def parents(self, node):
54 def parents(self, node):
55 if node == nullid: return (nullid, nullid)
55 if node == nullid: return (nullid, nullid)
56 return self.index[self.nodemap[node]][4:6]
56 return self.index[self.nodemap[node]][4:6]
57
57
58 def start(self, rev): return self.index[rev][0]
58 def start(self, rev): return self.index[rev][0]
59 def length(self, rev): return self.index[rev][1]
59 def length(self, rev): return self.index[rev][1]
60 def end(self, rev): return self.start(rev) + self.length(rev)
60 def end(self, rev): return self.start(rev) + self.length(rev)
61 def base(self, rev): return self.index[rev][2]
61 def base(self, rev): return self.index[rev][2]
62
62
63 def revisions(self, list):
63 def revisions(self, list):
64 # this can be optimized to do spans, etc
64 # this can be optimized to do spans, etc
65 # be stupid for now
65 # be stupid for now
66 for node in list:
66 for node in list:
67 yield self.revision(node)
67 yield self.revision(node)
68
68
69 def diff(self, a, b):
69 def diff(self, a, b):
70 return mdiff.textdiff(a, b)
70 return mdiff.textdiff(a, b)
71
71
72 def patch(self, text, patch):
72 def patch(self, text, patch):
73 return mdiff.patch(text, patch)
73 return mdiff.patch(text, patch)
74
74
75 def revision(self, node):
75 def revision(self, node):
76 if node is nullid: return ""
76 if node is nullid: return ""
77 if self.cache and self.cache[0] == node: return self.cache[2]
77 if self.cache and self.cache[0] == node: return self.cache[2]
78
78
79 text = None
79 text = None
80 rev = self.rev(node)
80 rev = self.rev(node)
81 base = self.base(rev)
81 base = self.base(rev)
82 start = self.start(base)
82 start = self.start(base)
83 end = self.end(rev)
83 end = self.end(rev)
84
84
85 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
85 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
86 base = self.cache[1]
86 base = self.cache[1]
87 start = self.start(base + 1)
87 start = self.start(base + 1)
88 text = self.cache[2]
88 text = self.cache[2]
89 last = 0
89 last = 0
90
90
91 f = self.opener(self.datafile)
91 f = self.opener(self.datafile)
92 f.seek(start)
92 f.seek(start)
93 data = f.read(end - start)
93 data = f.read(end - start)
94
94
95 if not text:
95 if not text:
96 last = self.length(base)
96 last = self.length(base)
97 text = decompress(data[:last])
97 text = decompress(data[:last])
98
98
99 for r in range(base + 1, rev + 1):
99 for r in range(base + 1, rev + 1):
100 s = self.length(r)
100 s = self.length(r)
101 b = decompress(data[last:last + s])
101 b = decompress(data[last:last + s])
102 text = self.patch(text, b)
102 text = self.patch(text, b)
103 last = last + s
103 last = last + s
104
104
105 (p1, p2) = self.parents(node)
105 (p1, p2) = self.parents(node)
106 if self.node(rev) != hash(text, p1, p2):
106 if node != hash(text, p1, p2):
107 raise "Consistency check failed on %s:%d" % (self.datafile, rev)
107 raise "integrity check failed on %s:%d" % (self.datafile, rev)
108
108
109 self.cache = (node, rev, text)
109 self.cache = (node, rev, text)
110 return text
110 return text
111
111
112 def addrevision(self, text, transaction, link, p1=None, p2=None):
112 def addrevision(self, text, transaction, link, p1=None, p2=None):
113 if text is None: text = ""
113 if text is None: text = ""
114 if p1 is None: p1 = self.tip()
114 if p1 is None: p1 = self.tip()
115 if p2 is None: p2 = nullid
115 if p2 is None: p2 = nullid
116
116
117 node = hash(text, p1, p2)
117 node = hash(text, p1, p2)
118
118
119 n = self.count()
119 n = self.count()
120 t = n - 1
120 t = n - 1
121
121
122 if n:
122 if n:
123 start = self.start(self.base(t))
123 start = self.start(self.base(t))
124 end = self.end(t)
124 end = self.end(t)
125 prev = self.revision(self.tip())
125 prev = self.revision(self.tip())
126 if 0:
127 dd = self.diff(prev, text)
128 tt = self.patch(prev, dd)
129 if tt != text:
130 print prev
131 print text
132 print tt
133 raise "diff+patch failed"
134 data = compress(self.diff(prev, text))
126 data = compress(self.diff(prev, text))
135
127
136 # full versions are inserted when the needed deltas
128 # full versions are inserted when the needed deltas
137 # become comparable to the uncompressed text
129 # become comparable to the uncompressed text
138 if not n or (end + len(data) - start) > len(text) * 2:
130 if not n or (end + len(data) - start) > len(text) * 2:
139 data = compress(text)
131 data = compress(text)
140 base = n
132 base = n
141 else:
133 else:
142 base = self.base(t)
134 base = self.base(t)
143
135
144 offset = 0
136 offset = 0
145 if t >= 0:
137 if t >= 0:
146 offset = self.end(t)
138 offset = self.end(t)
147
139
148 e = (offset, len(data), base, link, p1, p2, node)
140 e = (offset, len(data), base, link, p1, p2, node)
149
141
150 self.index.append(e)
142 self.index.append(e)
151 self.nodemap[node] = n
143 self.nodemap[node] = n
152 entry = struct.pack(indexformat, *e)
144 entry = struct.pack(indexformat, *e)
153
145
154 transaction.add(self.datafile, e[0] - 1)
146 transaction.add(self.datafile, e[0])
155 self.opener(self.datafile, "a").write(data)
147 self.opener(self.datafile, "a").write(data)
156 transaction.add(self.indexfile, (n + 1) * len(entry) - 1)
148 transaction.add(self.indexfile, (n + 1) * len(entry))
157 self.opener(self.indexfile, "a").write(entry)
149 self.opener(self.indexfile, "a").write(entry)
158
150
159 self.cache = (node, n, text)
151 self.cache = (node, n, text)
160 return node
152 return node
161
153
162 def ancestor(self, a, b):
154 def ancestor(self, a, b):
163 def expand(e1, e2, a1, a2):
155 def expand(e1, e2, a1, a2):
164 ne = []
156 ne = []
165 for n in e1:
157 for n in e1:
166 (p1, p2) = self.parents(n)
158 (p1, p2) = self.parents(n)
167 if p1 in a2: return p1
159 if p1 in a2: return p1
168 if p2 in a2: return p2
160 if p2 in a2: return p2
169 if p1 != nullid and p1 not in a1:
161 if p1 != nullid and p1 not in a1:
170 a1[p1] = 1
162 a1[p1] = 1
171 ne.append(p1)
163 ne.append(p1)
172 if p2 != nullid and p2 not in a1:
164 if p2 != nullid and p2 not in a1:
173 a1[p2] = 1
165 a1[p2] = 1
174 ne.append(p2)
166 ne.append(p2)
175 return expand(e2, ne, a2, a1)
167 return expand(e2, ne, a2, a1)
176 return expand([a], [b], {a:1}, {b:1})
168 return expand([a], [b], {a:1}, {b:1})
177
169
178 def mergedag(self, other, transaction, linkseq, accumulate = None):
170 def mergedag(self, other, transaction, linkseq, accumulate = None):
179 """combine the nodes from other's DAG into ours"""
171 """combine the nodes from other's DAG into ours"""
180 old = self.tip()
172 old = self.tip()
181 i = self.count()
173 i = self.count()
182 l = []
174 l = []
183
175
184 # merge the other revision log into our DAG
176 # merge the other revision log into our DAG
185 for r in range(other.count()):
177 for r in range(other.count()):
186 id = other.node(r)
178 id = other.node(r)
187 if id not in self.nodemap:
179 if id not in self.nodemap:
188 (xn, yn) = other.parents(id)
180 (xn, yn) = other.parents(id)
189 l.append((id, xn, yn))
181 l.append((id, xn, yn))
190 self.nodemap[id] = i
182 self.nodemap[id] = i
191 i += 1
183 i += 1
192
184
193 # merge node date for new nodes
185 # merge node date for new nodes
194 r = other.revisions([e[0] for e in l])
186 r = other.revisions([e[0] for e in l])
195 for e in l:
187 for e in l:
196 t = r.next()
188 t = r.next()
197 if accumulate: accumulate(t)
189 if accumulate: accumulate(t)
198 self.addrevision(t, transaction, linkseq.next(), e[1], e[2])
190 self.addrevision(t, transaction, linkseq.next(), e[1], e[2])
199
191
200 # return the unmerged heads for later resolving
192 # return the unmerged heads for later resolving
201 return (old, self.tip())
193 return (old, self.tip())
General Comments 0
You need to be logged in to leave comments. Login now