##// END OF EJS Templates
More useful message on broken addgroup chain
mpm@selenic.com -
r84:b2e35281 default
parent child Browse files
Show More
@@ -1,476 +1,478
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # This provides efficient delta storage with O(1) retrieve and append
4 4 # and O(changes) merge between branches
5 5 #
6 6 # Copyright 2005 Matt Mackall <mpm@selenic.com>
7 7 #
8 8 # This software may be used and distributed according to the terms
9 9 # of the GNU General Public License, incorporated herein by reference.
10 10
11 11 import zlib, struct, sha, os, tempfile, binascii
12 12 from mercurial import mdiff
13 13
14 14 def hex(node): return binascii.hexlify(node)
15 15 def bin(node): return binascii.unhexlify(node)
16 16 def short(node): return hex(node[:4])
17 17
18 18 def compress(text):
19 19 return zlib.compress(text)
20 20
21 21 def decompress(bin):
22 22 return zlib.decompress(bin)
23 23
24 24 def hash(text, p1, p2):
25 25 l = [p1, p2]
26 26 l.sort()
27 27 return sha.sha(l[0] + l[1] + text).digest()
28 28
29 29 nullid = "\0" * 20
30 30 indexformat = ">4l20s20s20s"
31 31
32 32 class lazyparser:
33 33 def __init__(self, data):
34 34 self.data = data
35 35 self.s = struct.calcsize(indexformat)
36 36 self.l = len(data)/self.s
37 37 self.index = [None] * self.l
38 38 self.map = {nullid: -1}
39 39
40 40 if 0:
41 41 n = 0
42 42 i = self.data
43 43 s = struct.calcsize(indexformat)
44 44 for f in xrange(0, len(i), s):
45 45 # offset, size, base, linkrev, p1, p2, nodeid
46 46 e = struct.unpack(indexformat, i[f:f + s])
47 47 self.map[e[6]] = n
48 48 self.index.append(e)
49 49 n += 1
50 50
51 51 def load(self, pos):
52 52 block = pos / 1000
53 53 i = block * 1000
54 54 end = min(self.l, i + 1000)
55 55 while i < end:
56 56 d = self.data[i * self.s: (i + 1) * self.s]
57 57 e = struct.unpack(indexformat, d)
58 58 self.index[i] = e
59 59 self.map[e[6]] = i
60 60 i += 1
61 61
62 62 class lazyindex:
63 63 def __init__(self, parser):
64 64 self.p = parser
65 65 def __len__(self):
66 66 return len(self.p.index)
67 67 def __getitem__(self, pos):
68 68 i = self.p.index[pos]
69 69 if not i:
70 70 self.p.load(pos)
71 71 return self.p.index[pos]
72 72 return i
73 73 def append(self, e):
74 74 self.p.index.append(e)
75 75
76 76 class lazymap:
77 77 def __init__(self, parser):
78 78 self.p = parser
79 79 def load(self, key):
80 80 n = self.p.data.find(key)
81 81 if n < 0: raise KeyError(key)
82 82 pos = n / self.p.s
83 83 self.p.load(pos)
84 84 def __contains__(self, key):
85 85 try:
86 86 self[key]
87 87 return True
88 88 except KeyError:
89 89 return False
90 90 def __getitem__(self, key):
91 91 try:
92 92 return self.p.map[key]
93 93 except KeyError:
94 94 self.load(key)
95 95 return self.p.map[key]
96 96 def __setitem__(self, key, val):
97 97 self.p.map[key] = val
98 98
99 99 class revlog:
100 100 def __init__(self, opener, indexfile, datafile):
101 101 self.indexfile = indexfile
102 102 self.datafile = datafile
103 103 self.opener = opener
104 104 self.cache = None
105 105 # read the whole index for now, handle on-demand later
106 106 try:
107 107 i = self.opener(self.indexfile).read()
108 108 except IOError:
109 109 i = ""
110 110 parser = lazyparser(i)
111 111 self.index = lazyindex(parser)
112 112 self.nodemap = lazymap(parser)
113 113
114 114 def tip(self): return self.node(len(self.index) - 1)
115 115 def count(self): return len(self.index)
116 116 def node(self, rev): return (rev < 0) and nullid or self.index[rev][6]
117 117 def rev(self, node): return self.nodemap[node]
118 118 def linkrev(self, node): return self.index[self.nodemap[node]][3]
119 119 def parents(self, node):
120 120 if node == nullid: return (nullid, nullid)
121 121 return self.index[self.nodemap[node]][4:6]
122 122
123 123 def start(self, rev): return self.index[rev][0]
124 124 def length(self, rev): return self.index[rev][1]
125 125 def end(self, rev): return self.start(rev) + self.length(rev)
126 126 def base(self, rev): return self.index[rev][2]
127 127
128 128 def lookup(self, id):
129 129 try:
130 130 rev = int(id)
131 131 return self.node(rev)
132 132 except ValueError:
133 133 c = []
134 134 for n in self.nodemap:
135 135 if id in hex(n):
136 136 c.append(n)
137 137 if len(c) > 1: raise KeyError("Ambiguous identifier")
138 138 if len(c) < 1: raise KeyError("No match found")
139 139 return c[0]
140 140
141 141 return None
142 142
143 143 def revisions(self, list):
144 144 # this can be optimized to do spans, etc
145 145 # be stupid for now
146 146 for node in list:
147 147 yield self.revision(node)
148 148
149 149 def diff(self, a, b):
150 150 return mdiff.textdiff(a, b)
151 151
152 152 def patches(self, t, pl):
153 153 return mdiff.patches(t, pl)
154 154
155 155 def revision(self, node):
156 156 if node == nullid: return ""
157 157 if self.cache and self.cache[0] == node: return self.cache[2]
158 158
159 159 text = None
160 160 rev = self.rev(node)
161 161 base = self.base(rev)
162 162 start = self.start(base)
163 163 end = self.end(rev)
164 164
165 165 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
166 166 base = self.cache[1]
167 167 start = self.start(base + 1)
168 168 text = self.cache[2]
169 169 last = 0
170 170
171 171 f = self.opener(self.datafile)
172 172 f.seek(start)
173 173 data = f.read(end - start)
174 174
175 175 if not text:
176 176 last = self.length(base)
177 177 text = decompress(data[:last])
178 178
179 179 bins = []
180 180 for r in xrange(base + 1, rev + 1):
181 181 s = self.length(r)
182 182 bins.append(decompress(data[last:last + s]))
183 183 last = last + s
184 184
185 185 text = mdiff.patches(text, bins)
186 186
187 187 (p1, p2) = self.parents(node)
188 188 if node != hash(text, p1, p2):
189 189 raise "integrity check failed on %s:%d" % (self.datafile, rev)
190 190
191 191 self.cache = (node, rev, text)
192 192 return text
193 193
194 194 def addrevision(self, text, transaction, link, p1=None, p2=None):
195 195 if text is None: text = ""
196 196 if p1 is None: p1 = self.tip()
197 197 if p2 is None: p2 = nullid
198 198
199 199 node = hash(text, p1, p2)
200 200
201 201 n = self.count()
202 202 t = n - 1
203 203
204 204 if n:
205 205 base = self.base(t)
206 206 start = self.start(base)
207 207 end = self.end(t)
208 208 prev = self.revision(self.tip())
209 209 data = compress(self.diff(prev, text))
210 210 dist = end - start + len(data)
211 211
212 212 # full versions are inserted when the needed deltas
213 213 # become comparable to the uncompressed text
214 214 if not n or dist > len(text) * 2:
215 215 data = compress(text)
216 216 base = n
217 217 else:
218 218 base = self.base(t)
219 219
220 220 offset = 0
221 221 if t >= 0:
222 222 offset = self.end(t)
223 223
224 224 e = (offset, len(data), base, link, p1, p2, node)
225 225
226 226 self.index.append(e)
227 227 self.nodemap[node] = n
228 228 entry = struct.pack(indexformat, *e)
229 229
230 230 transaction.add(self.datafile, e[0])
231 231 self.opener(self.datafile, "a").write(data)
232 232 transaction.add(self.indexfile, n * len(entry))
233 233 self.opener(self.indexfile, "a").write(entry)
234 234
235 235 self.cache = (node, n, text)
236 236 return node
237 237
238 238 def ancestor(self, a, b):
239 239 def expand(list, map):
240 240 a = []
241 241 while list:
242 242 n = list.pop(0)
243 243 map[n] = 1
244 244 yield n
245 245 for p in self.parents(n):
246 246 if p != nullid and p not in map:
247 247 list.append(p)
248 248 yield nullid
249 249
250 250 amap = {}
251 251 bmap = {}
252 252 ag = expand([a], amap)
253 253 bg = expand([b], bmap)
254 254 adone = bdone = 0
255 255
256 256 while not adone or not bdone:
257 257 if not adone:
258 258 an = ag.next()
259 259 if an == nullid:
260 260 adone = 1
261 261 elif an in bmap:
262 262 return an
263 263 if not bdone:
264 264 bn = bg.next()
265 265 if bn == nullid:
266 266 bdone = 1
267 267 elif bn in amap:
268 268 return bn
269 269
270 270 return nullid
271 271
272 272 def mergedag(self, other, transaction, linkseq, accumulate = None):
273 273 """combine the nodes from other's DAG into ours"""
274 274 old = self.tip()
275 275 i = self.count()
276 276 l = []
277 277
278 278 # merge the other revision log into our DAG
279 279 for r in range(other.count()):
280 280 id = other.node(r)
281 281 if id not in self.nodemap:
282 282 (xn, yn) = other.parents(id)
283 283 l.append((id, xn, yn))
284 284 self.nodemap[id] = i
285 285 i += 1
286 286
287 287 # merge node date for new nodes
288 288 r = other.revisions([e[0] for e in l])
289 289 for e in l:
290 290 t = r.next()
291 291 if accumulate: accumulate(t)
292 292 self.addrevision(t, transaction, linkseq.next(), e[1], e[2])
293 293
294 294 # return the unmerged heads for later resolving
295 295 return (old, self.tip())
296 296
297 297 def group(self, linkmap):
298 298 # given a list of changeset revs, return a set of deltas and
299 299 # metadata corresponding to nodes the first delta is
300 300 # parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
301 301 # have this parent as it has all history before these
302 302 # changesets. parent is parent[0]
303 303
304 304 revs = []
305 305 needed = {}
306 306
307 307 # find file nodes/revs that match changeset revs
308 308 for i in xrange(0, self.count()):
309 309 if self.index[i][3] in linkmap:
310 310 revs.append(i)
311 311 needed[i] = 1
312 312
313 313 # if we don't have any revisions touched by these changesets, bail
314 314 if not revs: return struct.pack(">l", 0)
315 315
316 316 # add the parent of the first rev
317 317 p = self.parents(self.node(revs[0]))[0]
318 318 revs.insert(0, self.rev(p))
319 319
320 320 # for each delta that isn't contiguous in the log, we need to
321 321 # reconstruct the base, reconstruct the result, and then
322 322 # calculate the delta. We also need to do this where we've
323 323 # stored a full version and not a delta
324 324 for i in xrange(0, len(revs) - 1):
325 325 a, b = revs[i], revs[i + 1]
326 326 if a + 1 != b or self.base(b) == b:
327 327 for j in xrange(self.base(a), a + 1):
328 328 needed[j] = 1
329 329 for j in xrange(self.base(b), b + 1):
330 330 needed[j] = 1
331 331
332 332 # calculate spans to retrieve from datafile
333 333 needed = needed.keys()
334 334 needed.sort()
335 335 spans = []
336 336 for n in needed:
337 337 if n < 0: continue
338 338 o = self.start(n)
339 339 l = self.length(n)
340 340 spans.append((o, l, [(n, l)]))
341 341
342 342 # merge spans
343 343 merge = [spans.pop(0)]
344 344 while spans:
345 345 e = spans.pop(0)
346 346 f = merge[-1]
347 347 if e[0] == f[0] + f[1]:
348 348 merge[-1] = (f[0], f[1] + e[1], f[2] + e[2])
349 349 else:
350 350 merge.append(e)
351 351
352 352 # read spans in, divide up chunks
353 353 chunks = {}
354 354 for span in merge:
355 355 # we reopen the file for each span to make http happy for now
356 356 f = self.opener(self.datafile)
357 357 f.seek(span[0])
358 358 data = f.read(span[1])
359 359
360 360 # divide up the span
361 361 pos = 0
362 362 for r, l in span[2]:
363 363 chunks[r] = data[pos: pos + l]
364 364 pos += l
365 365
366 366 # helper to reconstruct intermediate versions
367 367 def construct(text, base, rev):
368 368 bins = [decompress(chunks[r]) for r in xrange(base + 1, rev + 1)]
369 369 return mdiff.patches(text, bins)
370 370
371 371 # build deltas
372 372 deltas = []
373 373 for d in xrange(0, len(revs) - 1):
374 374 a, b = revs[d], revs[d + 1]
375 375 n = self.node(b)
376 376
377 377 if a + 1 != b or self.base(b) == b:
378 378 if a >= 0:
379 379 base = self.base(a)
380 380 ta = decompress(chunks[self.base(a)])
381 381 ta = construct(ta, base, a)
382 382 else:
383 383 ta = ""
384 384
385 385 base = self.base(b)
386 386 if a > base:
387 387 base = a
388 388 tb = ta
389 389 else:
390 390 tb = decompress(chunks[self.base(b)])
391 391 tb = construct(tb, base, b)
392 392 d = self.diff(ta, tb)
393 393 else:
394 394 d = decompress(chunks[b])
395 395
396 396 p = self.parents(n)
397 397 meta = n + p[0] + p[1] + linkmap[self.linkrev(n)]
398 398 l = struct.pack(">l", len(meta) + len(d) + 4)
399 399 deltas.append(l + meta + d)
400 400
401 401 l = struct.pack(">l", sum(map(len, deltas)) + 4)
402 402 deltas.insert(0, l)
403 403 return "".join(deltas)
404 404
405 405 def addgroup(self, data, linkmapper, transaction):
406 406 # given a set of deltas, add them to the revision log. the
407 407 # first delta is against its parent, which should be in our
408 408 # log, the rest are against the previous delta.
409 409
410 410 if not data: return self.tip()
411 411
412 412 # retrieve the parent revision of the delta chain
413 413 chain = data[24:44]
414 if not chain in self.nodemap:
415 raise "unknown base %s" % short(chain[:4])
414 416
415 417 # track the base of the current delta log
416 418 r = self.count()
417 419 t = r - 1
418 420
419 421 base = prev = -1
420 422 start = end = 0
421 423 if r:
422 424 start = self.start(self.base(t))
423 425 end = self.end(t)
424 426 measure = self.length(self.base(t))
425 427 base = self.base(t)
426 428 prev = self.tip()
427 429
428 430 transaction.add(self.datafile, end)
429 431 transaction.add(self.indexfile, r * struct.calcsize(indexformat))
430 432 dfh = self.opener(self.datafile, "a")
431 433 ifh = self.opener(self.indexfile, "a")
432 434
433 435 # loop through our set of deltas
434 436 pos = 0
435 437 while pos < len(data):
436 438 l, node, p1, p2, cs = struct.unpack(">l20s20s20s20s",
437 439 data[pos:pos+84])
438 440 if node in self.nodemap:
439 441 raise "already have %s" % hex(node[:4])
440 442 link = linkmapper(cs)
441 443 delta = data[pos + 84:pos + l]
442 444 pos += l
443 445
444 446 # full versions are inserted when the needed deltas become
445 447 # comparable to the uncompressed text or when the previous
446 448 # version is not the one we have a delta against. We use
447 449 # the size of the previous full rev as a proxy for the
448 450 # current size.
449 451
450 452 if chain == prev:
451 453 cdelta = compress(delta)
452 454
453 455 if chain != prev or (end - start + len(cdelta)) > measure * 2:
454 456 # flush our writes here so we can read it in revision
455 457 dfh.flush()
456 458 ifh.flush()
457 459 text = self.revision(chain)
458 460 text = self.patches(text, [delta])
459 461 chk = self.addrevision(text, transaction, link, p1, p2)
460 462 if chk != node:
461 463 raise "consistency error adding group"
462 464 measure = len(text)
463 465 else:
464 466 e = (end, len(cdelta), self.base(t), link, p1, p2, node)
465 467 self.index.append(e)
466 468 self.nodemap[node] = r
467 469 dfh.write(cdelta)
468 470 ifh.write(struct.pack(indexformat, *e))
469 471
470 472 t, r, chain, prev = r, r + 1, node, node
471 473 start = self.start(self.base(t))
472 474 end = self.end(t)
473 475
474 476 dfh.close()
475 477 ifh.close()
476 478 return node
General Comments 0
You need to be logged in to leave comments. Login now