##// END OF EJS Templates
Change revlog to use new patch code
mpm@selenic.com -
r73:ee1cbe84 default
parent child Browse files
Show More
@@ -1,409 +1,421
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # This provides efficient delta storage with O(1) retrieve and append
4 4 # and O(changes) merge between branches
5 5 #
6 6 # Copyright 2005 Matt Mackall <mpm@selenic.com>
7 7 #
8 8 # This software may be used and distributed according to the terms
9 9 # of the GNU General Public License, incorporated herein by reference.
10 10
11 11 import zlib, struct, sha, os, tempfile, binascii
12 12 from mercurial import mdiff
13 13
14 14 def hex(node): return binascii.hexlify(node)
15 15 def bin(node): return binascii.unhexlify(node)
16 16
17 17 def compress(text):
18 18 return zlib.compress(text)
19 19
20 20 def decompress(bin):
21 21 return zlib.decompress(bin)
22 22
23 23 def hash(text, p1, p2):
24 24 l = [p1, p2]
25 25 l.sort()
26 26 return sha.sha(l[0] + l[1] + text).digest()
27 27
28 28 nullid = "\0" * 20
29 29 indexformat = ">4l20s20s20s"
30 30
31 31 class revlog:
32 32 def __init__(self, opener, indexfile, datafile):
33 33 self.indexfile = indexfile
34 34 self.datafile = datafile
35 35 self.index = []
36 36 self.opener = opener
37 37 self.cache = None
38 self.nodemap = {nullid: -1}
39 38 # read the whole index for now, handle on-demand later
40 39 try:
41 40 n = 0
42 41 i = self.opener(self.indexfile).read()
43 42 s = struct.calcsize(indexformat)
43
44 # preallocate arrays
45 l = len(i)/s
46 self.index = [None] * l
47 m = [None] * l
48
44 49 for f in xrange(0, len(i), s):
45 50 # offset, size, base, linkrev, p1, p2, nodeid
46 51 e = struct.unpack(indexformat, i[f:f + s])
47 self.nodemap[e[6]] = n
48 self.index.append(e)
52 self.index[n] = e
53 m[n] = (e[6], n)
49 54 n += 1
50 except IOError: pass
55
56 self.nodemap = dict(m)
57 except IOError:
58 self.nodemap = {}
59 self.nodemap[nullid] = -1
51 60
52 61 def tip(self): return self.node(len(self.index) - 1)
53 62 def count(self): return len(self.index)
54 63 def node(self, rev): return (rev < 0) and nullid or self.index[rev][6]
55 64 def rev(self, node): return self.nodemap[node]
56 65 def linkrev(self, node): return self.index[self.nodemap[node]][3]
57 66 def parents(self, node):
58 67 if node == nullid: return (nullid, nullid)
59 68 return self.index[self.nodemap[node]][4:6]
60 69
61 70 def start(self, rev): return self.index[rev][0]
62 71 def length(self, rev): return self.index[rev][1]
63 72 def end(self, rev): return self.start(rev) + self.length(rev)
64 73 def base(self, rev): return self.index[rev][2]
65 74
66 75 def lookup(self, id):
67 76 try:
68 77 rev = int(id)
69 78 return self.node(rev)
70 79 except ValueError:
71 80 c = []
72 81 for n in self.nodemap:
73 82 if id in hex(n):
74 83 c.append(n)
75 84 if len(c) > 1: raise KeyError("Ambiguous identifier")
76 85 if len(c) < 1: raise KeyError("No match found")
77 86 return c[0]
78 87
79 88 return None
80 89
81 90 def revisions(self, list):
82 91 # this can be optimized to do spans, etc
83 92 # be stupid for now
84 93 for node in list:
85 94 yield self.revision(node)
86 95
87 96 def diff(self, a, b):
88 97 return mdiff.textdiff(a, b)
89 98
99 def patches(self, t, pl):
100 return mdiff.patches(t, pl)
101
90 102 def revision(self, node):
91 103 if node == nullid: return ""
92 104 if self.cache and self.cache[0] == node: return self.cache[2]
93 105
94 106 text = None
95 107 rev = self.rev(node)
96 108 base = self.base(rev)
97 109 start = self.start(base)
98 110 end = self.end(rev)
99 111
100 112 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
101 113 base = self.cache[1]
102 114 start = self.start(base + 1)
103 115 text = self.cache[2]
104 116 last = 0
105 117
106 118 f = self.opener(self.datafile)
107 119 f.seek(start)
108 120 data = f.read(end - start)
109 121
110 122 if not text:
111 123 last = self.length(base)
112 124 text = decompress(data[:last])
113 125
114 126 bins = []
115 127 for r in xrange(base + 1, rev + 1):
116 128 s = self.length(r)
117 129 bins.append(decompress(data[last:last + s]))
118 130 last = last + s
119 131
120 132 text = mdiff.patches(text, bins)
121 133
122 134 (p1, p2) = self.parents(node)
123 135 if node != hash(text, p1, p2):
124 136 raise "integrity check failed on %s:%d" % (self.datafile, rev)
125 137
126 138 self.cache = (node, rev, text)
127 139 return text
128 140
129 141 def addrevision(self, text, transaction, link, p1=None, p2=None):
130 142 if text is None: text = ""
131 143 if p1 is None: p1 = self.tip()
132 144 if p2 is None: p2 = nullid
133 145
134 146 node = hash(text, p1, p2)
135 147
136 148 n = self.count()
137 149 t = n - 1
138 150
139 151 if n:
140 152 base = self.base(t)
141 153 start = self.start(base)
142 154 end = self.end(t)
143 155 prev = self.revision(self.tip())
144 156 data = compress(self.diff(prev, text))
145 157 dist = end - start + len(data)
146 158
147 159 # full versions are inserted when the needed deltas
148 160 # become comparable to the uncompressed text
149 161 if not n or dist > len(text) * 2:
150 162 data = compress(text)
151 163 base = n
152 164 else:
153 165 base = self.base(t)
154 166
155 167 offset = 0
156 168 if t >= 0:
157 169 offset = self.end(t)
158 170
159 171 e = (offset, len(data), base, link, p1, p2, node)
160 172
161 173 self.index.append(e)
162 174 self.nodemap[node] = n
163 175 entry = struct.pack(indexformat, *e)
164 176
165 177 transaction.add(self.datafile, e[0])
166 178 self.opener(self.datafile, "a").write(data)
167 179 transaction.add(self.indexfile, n * len(entry))
168 180 self.opener(self.indexfile, "a").write(entry)
169 181
170 182 self.cache = (node, n, text)
171 183 return node
172 184
173 185 def ancestor(self, a, b):
174 186 def expand(list, map):
175 187 a = []
176 188 while list:
177 189 n = list.pop(0)
178 190 map[n] = 1
179 191 yield n
180 192 for p in self.parents(n):
181 193 if p != nullid and p not in map:
182 194 list.append(p)
183 195 yield nullid
184 196
185 197 amap = {}
186 198 bmap = {}
187 199 ag = expand([a], amap)
188 200 bg = expand([b], bmap)
189 201 adone = bdone = 0
190 202
191 203 while not adone or not bdone:
192 204 if not adone:
193 205 an = ag.next()
194 206 if an == nullid:
195 207 adone = 1
196 208 elif an in bmap:
197 209 return an
198 210 if not bdone:
199 211 bn = bg.next()
200 212 if bn == nullid:
201 213 bdone = 1
202 214 elif bn in amap:
203 215 return bn
204 216
205 217 return nullid
206 218
207 219 def mergedag(self, other, transaction, linkseq, accumulate = None):
208 220 """combine the nodes from other's DAG into ours"""
209 221 old = self.tip()
210 222 i = self.count()
211 223 l = []
212 224
213 225 # merge the other revision log into our DAG
214 226 for r in range(other.count()):
215 227 id = other.node(r)
216 228 if id not in self.nodemap:
217 229 (xn, yn) = other.parents(id)
218 230 l.append((id, xn, yn))
219 231 self.nodemap[id] = i
220 232 i += 1
221 233
222 234 # merge node date for new nodes
223 235 r = other.revisions([e[0] for e in l])
224 236 for e in l:
225 237 t = r.next()
226 238 if accumulate: accumulate(t)
227 239 self.addrevision(t, transaction, linkseq.next(), e[1], e[2])
228 240
229 241 # return the unmerged heads for later resolving
230 242 return (old, self.tip())
231 243
232 244 def group(self, linkmap):
233 245 # given a list of changeset revs, return a set of deltas and
234 246 # metadata corresponding to nodes the first delta is
235 247 # parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
236 248 # have this parent as it has all history before these
237 249 # changesets. parent is parent[0]
238 250
239 251 revs = []
240 252 needed = {}
241 253
242 254 # find file nodes/revs that match changeset revs
243 255 for i in xrange(0, self.count()):
244 256 if self.index[i][3] in linkmap:
245 257 revs.append(i)
246 258 needed[i] = 1
247 259
248 260 # if we don't have any revisions touched by these changesets, bail
249 261 if not revs: return struct.pack(">l", 0)
250 262
251 263 # add the parent of the first rev
252 264 p = self.parents(self.node(revs[0]))[0]
253 265 revs.insert(0, self.rev(p))
254 266
255 267 # for each delta that isn't contiguous in the log, we need to
256 268 # reconstruct the base, reconstruct the result, and then
257 269 # calculate the delta. We also need to do this where we've
258 270 # stored a full version and not a delta
259 271 for i in xrange(0, len(revs) - 1):
260 272 a, b = revs[i], revs[i + 1]
261 273 if a + 1 != b or self.base(b) == b:
262 274 for j in xrange(self.base(a), a + 1):
263 275 needed[j] = 1
264 276 for j in xrange(self.base(b), b + 1):
265 277 needed[j] = 1
266 278
267 279 # calculate spans to retrieve from datafile
268 280 needed = needed.keys()
269 281 needed.sort()
270 282 spans = []
271 283 for n in needed:
272 284 if n < 0: continue
273 285 o = self.start(n)
274 286 l = self.length(n)
275 287 spans.append((o, l, [(n, l)]))
276 288
277 289 # merge spans
278 290 merge = [spans.pop(0)]
279 291 while spans:
280 292 e = spans.pop(0)
281 293 f = merge[-1]
282 294 if e[0] == f[0] + f[1]:
283 295 merge[-1] = (f[0], f[1] + e[1], f[2] + e[2])
284 296 else:
285 297 merge.append(e)
286 298
287 299 # read spans in, divide up chunks
288 300 chunks = {}
289 301 for span in merge:
290 302 # we reopen the file for each span to make http happy for now
291 303 f = self.opener(self.datafile)
292 304 f.seek(span[0])
293 305 data = f.read(span[1])
294 306
295 307 # divide up the span
296 308 pos = 0
297 309 for r, l in span[2]:
298 310 chunks[r] = data[pos: pos + l]
299 311 pos += l
300 312
301 313 # helper to reconstruct intermediate versions
302 314 def construct(text, base, rev):
303 315 bins = [decompress(chunks[r]) for r in xrange(base + 1, rev + 1)]
304 316 return mdiff.patches(text, bins)
305 317
306 318 # build deltas
307 319 deltas = []
308 320 for d in xrange(0, len(revs) - 1):
309 321 a, b = revs[d], revs[d + 1]
310 322 n = self.node(b)
311 323
312 324 if a + 1 != b or self.base(b) == b:
313 325 if a >= 0:
314 326 base = self.base(a)
315 327 ta = decompress(chunks[self.base(a)])
316 328 ta = construct(ta, base, a)
317 329 else:
318 330 ta = ""
319 331
320 332 base = self.base(b)
321 333 if a > base:
322 334 base = a
323 335 tb = ta
324 336 else:
325 337 tb = decompress(chunks[self.base(b)])
326 338 tb = construct(tb, base, b)
327 339 d = self.diff(ta, tb)
328 340 else:
329 341 d = decompress(chunks[b])
330 342
331 343 p = self.parents(n)
332 344 meta = n + p[0] + p[1] + linkmap[self.linkrev(n)]
333 345 l = struct.pack(">l", len(meta) + len(d) + 4)
334 346 deltas.append(l + meta + d)
335 347
336 348 l = struct.pack(">l", sum(map(len, deltas)) + 4)
337 349 deltas.insert(0, l)
338 350 return "".join(deltas)
339 351
340 352 def addgroup(self, data, linkmapper, transaction):
341 353 # given a set of deltas, add them to the revision log. the
342 354 # first delta is against its parent, which should be in our
343 355 # log, the rest are against the previous delta.
344 356
345 357 if not data: return self.tip()
346 358
347 359 # retrieve the parent revision of the delta chain
348 360 chain = data[24:44]
349 361
350 362 # track the base of the current delta log
351 363 r = self.count()
352 364 t = r - 1
353 365
354 366 base = prev = -1
355 367 start = end = 0
356 368 if r:
357 369 start = self.start(self.base(t))
358 370 end = self.end(t)
359 371 measure = self.length(self.base(t))
360 372 base = self.base(t)
361 373 prev = self.tip()
362 374
363 375 transaction.add(self.datafile, end)
364 376 transaction.add(self.indexfile, r * struct.calcsize(indexformat))
365 377 dfh = self.opener(self.datafile, "a")
366 378 ifh = self.opener(self.indexfile, "a")
367 379
368 380 # loop through our set of deltas
369 381 pos = 0
370 382 while pos < len(data):
371 383 l, node, p1, p2, cs = struct.unpack(">l20s20s20s20s",
372 384 data[pos:pos+84])
373 385 link = linkmapper(cs)
374 386 delta = data[pos + 84:pos + l]
375 387 pos += l
376 388
377 389 # full versions are inserted when the needed deltas become
378 390 # comparable to the uncompressed text or when the previous
379 391 # version is not the one we have a delta against. We use
380 392 # the size of the previous full rev as a proxy for the
381 393 # current size.
382 394
383 395 if chain == prev:
384 396 cdelta = compress(delta)
385 397
386 398 if chain != prev or (end - start + len(cdelta)) > measure * 2:
387 399 # flush our writes here so we can read it in revision
388 400 dfh.flush()
389 401 ifh.flush()
390 402 text = self.revision(chain)
391 text = self.patch(text, delta)
403 text = self.patches(text, [delta])
392 404 chk = self.addrevision(text, transaction, link, p1, p2)
393 405 if chk != node:
394 406 raise "consistency error adding group"
395 407 measure = len(text)
396 408 else:
397 409 e = (end, len(cdelta), self.base(t), link, p1, p2, node)
398 410 self.index.append(e)
399 411 self.nodemap[node] = r
400 412 dfh.write(cdelta)
401 413 ifh.write(struct.pack(indexformat, *e))
402 414
403 415 t, r, chain, prev = r, r + 1, node, node
404 416 start = self.start(self.base(t))
405 417 end = self.end(t)
406 418
407 419 dfh.close()
408 420 ifh.close()
409 421 return node
General Comments 0
You need to be logged in to leave comments. Login now