##// END OF EJS Templates
Fix corruption resulting from skipping parts of a revision group...
Matt Mackall -
r653:94cdd027 default
parent child Browse files
Show More
@@ -1,542 +1,544
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # This provides efficient delta storage with O(1) retrieve and append
4 4 # and O(changes) merge between branches
5 5 #
6 6 # Copyright 2005 Matt Mackall <mpm@selenic.com>
7 7 #
8 8 # This software may be used and distributed according to the terms
9 9 # of the GNU General Public License, incorporated herein by reference.
10 10
11 11 import zlib, struct, sha, binascii, heapq
12 12 from mercurial import mdiff
13 13
14 14 def hex(node): return binascii.hexlify(node)
15 15 def bin(node): return binascii.unhexlify(node)
16 16 def short(node): return hex(node[:6])
17 17
18 18 def compress(text):
19 19 if not text: return text
20 20 if len(text) < 44:
21 21 if text[0] == '\0': return text
22 22 return 'u' + text
23 23 bin = zlib.compress(text)
24 24 if len(bin) > len(text):
25 25 if text[0] == '\0': return text
26 26 return 'u' + text
27 27 return bin
28 28
29 29 def decompress(bin):
30 30 if not bin: return bin
31 31 t = bin[0]
32 32 if t == '\0': return bin
33 33 if t == 'x': return zlib.decompress(bin)
34 34 if t == 'u': return bin[1:]
35 35 raise "unknown compression type %s" % t
36 36
37 37 def hash(text, p1, p2):
38 38 l = [p1, p2]
39 39 l.sort()
40 40 s = sha.new(l[0])
41 41 s.update(l[1])
42 42 s.update(text)
43 43 return s.digest()
44 44
45 45 nullid = "\0" * 20
46 46 indexformat = ">4l20s20s20s"
47 47
48 48 class lazyparser:
49 49 def __init__(self, data, revlog):
50 50 self.data = data
51 51 self.s = struct.calcsize(indexformat)
52 52 self.l = len(data)/self.s
53 53 self.index = [None] * self.l
54 54 self.map = {nullid: -1}
55 55 self.all = 0
56 56 self.revlog = revlog
57 57
58 58 def load(self, pos=None):
59 59 if self.all: return
60 60 if pos is not None:
61 61 block = pos / 1000
62 62 i = block * 1000
63 63 end = min(self.l, i + 1000)
64 64 else:
65 65 self.all = 1
66 66 i = 0
67 67 end = self.l
68 68 self.revlog.index = self.index
69 69 self.revlog.nodemap = self.map
70 70
71 71 while i < end:
72 72 d = self.data[i * self.s: (i + 1) * self.s]
73 73 e = struct.unpack(indexformat, d)
74 74 self.index[i] = e
75 75 self.map[e[6]] = i
76 76 i += 1
77 77
78 78 class lazyindex:
79 79 def __init__(self, parser):
80 80 self.p = parser
81 81 def __len__(self):
82 82 return len(self.p.index)
83 83 def load(self, pos):
84 84 self.p.load(pos)
85 85 return self.p.index[pos]
86 86 def __getitem__(self, pos):
87 87 return self.p.index[pos] or self.load(pos)
88 88 def append(self, e):
89 89 self.p.index.append(e)
90 90
91 91 class lazymap:
92 92 def __init__(self, parser):
93 93 self.p = parser
94 94 def load(self, key):
95 95 if self.p.all: return
96 96 n = self.p.data.find(key)
97 97 if n < 0: raise KeyError("node " + hex(key))
98 98 pos = n / self.p.s
99 99 self.p.load(pos)
100 100 def __contains__(self, key):
101 101 self.p.load()
102 102 return key in self.p.map
103 103 def __iter__(self):
104 104 yield nullid
105 105 for i in xrange(self.p.l):
106 106 try:
107 107 yield self.p.index[i][6]
108 108 except:
109 109 self.p.load(i)
110 110 yield self.p.index[i][6]
111 111 def __getitem__(self, key):
112 112 try:
113 113 return self.p.map[key]
114 114 except KeyError:
115 115 try:
116 116 self.load(key)
117 117 return self.p.map[key]
118 118 except KeyError:
119 119 raise KeyError("node " + hex(key))
120 120 def __setitem__(self, key, val):
121 121 self.p.map[key] = val
122 122
123 123 class revlog:
124 124 def __init__(self, opener, indexfile, datafile):
125 125 self.indexfile = indexfile
126 126 self.datafile = datafile
127 127 self.opener = opener
128 128 self.cache = None
129 129
130 130 try:
131 131 i = self.opener(self.indexfile).read()
132 132 except IOError:
133 133 i = ""
134 134
135 135 if len(i) > 10000:
136 136 # big index, let's parse it on demand
137 137 parser = lazyparser(i, self)
138 138 self.index = lazyindex(parser)
139 139 self.nodemap = lazymap(parser)
140 140 else:
141 141 s = struct.calcsize(indexformat)
142 142 l = len(i) / s
143 143 self.index = [None] * l
144 144 m = [None] * l
145 145
146 146 n = 0
147 147 for f in xrange(0, len(i), s):
148 148 # offset, size, base, linkrev, p1, p2, nodeid
149 149 e = struct.unpack(indexformat, i[f:f + s])
150 150 m[n] = (e[6], n)
151 151 self.index[n] = e
152 152 n += 1
153 153
154 154 self.nodemap = dict(m)
155 155 self.nodemap[nullid] = -1
156 156
157 157 def tip(self): return self.node(len(self.index) - 1)
158 158 def count(self): return len(self.index)
159 159 def node(self, rev): return (rev < 0) and nullid or self.index[rev][6]
160 160 def rev(self, node): return self.nodemap[node]
161 161 def linkrev(self, node): return self.index[self.nodemap[node]][3]
162 162 def parents(self, node):
163 163 if node == nullid: return (nullid, nullid)
164 164 return self.index[self.nodemap[node]][4:6]
165 165
166 166 def start(self, rev): return self.index[rev][0]
167 167 def length(self, rev): return self.index[rev][1]
168 168 def end(self, rev): return self.start(rev) + self.length(rev)
169 169 def base(self, rev): return self.index[rev][2]
170 170
171 171 def heads(self):
172 172 p = {}
173 173 h = []
174 174 for r in range(self.count() - 1, -1, -1):
175 175 n = self.node(r)
176 176 if n not in p:
177 177 h.append(n)
178 178 for pn in self.parents(n):
179 179 p[pn] = 1
180 180 return h
181 181
182 182 def children(self, node):
183 183 c = []
184 184 p = self.rev(node)
185 185 for r in range(p + 1, self.count()):
186 186 n = self.node(r)
187 187 for pn in self.parents(n):
188 188 if pn == p:
189 189 c.append(p)
190 190 continue
191 191 elif pn == nullid:
192 192 continue
193 193 return c
194 194
195 195 def lookup(self, id):
196 196 try:
197 197 rev = int(id)
198 198 if str(rev) != id: raise ValueError
199 199 if rev < 0: rev = self.count() + rev
200 200 if rev < 0 or rev >= self.count(): raise ValueError
201 201 return self.node(rev)
202 202 except (ValueError, OverflowError):
203 203 c = []
204 204 for n in self.nodemap:
205 205 if hex(n).startswith(id):
206 206 c.append(n)
207 207 if len(c) > 1: raise KeyError("Ambiguous identifier")
208 208 if len(c) < 1: raise KeyError("No match found")
209 209 return c[0]
210 210
211 211 return None
212 212
213 213 def diff(self, a, b):
214 214 return mdiff.textdiff(a, b)
215 215
216 216 def patches(self, t, pl):
217 217 return mdiff.patches(t, pl)
218 218
219 219 def delta(self, node):
220 220 r = self.rev(node)
221 221 b = self.base(r)
222 222 if r == b:
223 223 return self.diff(self.revision(self.node(r - 1)),
224 224 self.revision(node))
225 225 else:
226 226 f = self.opener(self.datafile)
227 227 f.seek(self.start(r))
228 228 data = f.read(self.length(r))
229 229 return decompress(data)
230 230
231 231 def revision(self, node):
232 232 if node == nullid: return ""
233 233 if self.cache and self.cache[0] == node: return self.cache[2]
234 234
235 235 text = None
236 236 rev = self.rev(node)
237 237 start, length, base, link, p1, p2, node = self.index[rev]
238 238 end = start + length
239 239 if base != rev: start = self.start(base)
240 240
241 241 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
242 242 base = self.cache[1]
243 243 start = self.start(base + 1)
244 244 text = self.cache[2]
245 245 last = 0
246 246
247 247 f = self.opener(self.datafile)
248 248 f.seek(start)
249 249 data = f.read(end - start)
250 250
251 251 if text is None:
252 252 last = self.length(base)
253 253 text = decompress(data[:last])
254 254
255 255 bins = []
256 256 for r in xrange(base + 1, rev + 1):
257 257 s = self.length(r)
258 258 bins.append(decompress(data[last:last + s]))
259 259 last = last + s
260 260
261 261 text = mdiff.patches(text, bins)
262 262
263 263 if node != hash(text, p1, p2):
264 264 raise IOError("integrity check failed on %s:%d"
265 265 % (self.datafile, rev))
266 266
267 267 self.cache = (node, rev, text)
268 268 return text
269 269
270 270 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
271 271 if text is None: text = ""
272 272 if p1 is None: p1 = self.tip()
273 273 if p2 is None: p2 = nullid
274 274
275 275 node = hash(text, p1, p2)
276 276
277 277 if node in self.nodemap:
278 278 return node
279 279
280 280 n = self.count()
281 281 t = n - 1
282 282
283 283 if n:
284 284 base = self.base(t)
285 285 start = self.start(base)
286 286 end = self.end(t)
287 287 if not d:
288 288 prev = self.revision(self.tip())
289 289 d = self.diff(prev, text)
290 290 data = compress(d)
291 291 dist = end - start + len(data)
292 292
293 293 # full versions are inserted when the needed deltas
294 294 # become comparable to the uncompressed text
295 295 if not n or dist > len(text) * 2:
296 296 data = compress(text)
297 297 base = n
298 298 else:
299 299 base = self.base(t)
300 300
301 301 offset = 0
302 302 if t >= 0:
303 303 offset = self.end(t)
304 304
305 305 e = (offset, len(data), base, link, p1, p2, node)
306 306
307 307 self.index.append(e)
308 308 self.nodemap[node] = n
309 309 entry = struct.pack(indexformat, *e)
310 310
311 311 transaction.add(self.datafile, e[0])
312 312 self.opener(self.datafile, "a").write(data)
313 313 transaction.add(self.indexfile, n * len(entry))
314 314 self.opener(self.indexfile, "a").write(entry)
315 315
316 316 self.cache = (node, n, text)
317 317 return node
318 318
319 319 def ancestor(self, a, b):
320 320 # calculate the distance of every node from root
321 321 dist = {nullid: 0}
322 322 for i in xrange(self.count()):
323 323 n = self.node(i)
324 324 p1, p2 = self.parents(n)
325 325 dist[n] = max(dist[p1], dist[p2]) + 1
326 326
327 327 # traverse ancestors in order of decreasing distance from root
328 328 def ancestors(node):
329 329 # we store negative distances because heap returns smallest member
330 330 h = [(-dist[node], node)]
331 331 seen = {}
332 332 earliest = self.count()
333 333 while h:
334 334 d, n = heapq.heappop(h)
335 335 if n not in seen:
336 336 seen[n] = 1
337 337 r = self.rev(n)
338 338 yield (-d, r, n)
339 339 for p in self.parents(n):
340 340 heapq.heappush(h, (-dist[p], p))
341 341
342 342 x = ancestors(a)
343 343 y = ancestors(b)
344 344 lx = x.next()
345 345 ly = y.next()
346 346
347 347 # increment each ancestor list until it is closer to root than
348 348 # the other, or they match
349 349 while 1:
350 350 if lx == ly:
351 351 return lx[2]
352 352 elif lx < ly:
353 353 ly = y.next()
354 354 elif lx > ly:
355 355 lx = x.next()
356 356
357 357 def group(self, linkmap):
358 358 # given a list of changeset revs, return a set of deltas and
359 359 # metadata corresponding to nodes. the first delta is
360 360 # parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
361 361 # have this parent as it has all history before these
362 362 # changesets. parent is parent[0]
363 363
364 364 revs = []
365 365 needed = {}
366 366
367 367 # find file nodes/revs that match changeset revs
368 368 for i in xrange(0, self.count()):
369 369 if self.index[i][3] in linkmap:
370 370 revs.append(i)
371 371 needed[i] = 1
372 372
373 373 # if we don't have any revisions touched by these changesets, bail
374 374 if not revs:
375 375 yield struct.pack(">l", 0)
376 376 return
377 377
378 378 # add the parent of the first rev
379 379 p = self.parents(self.node(revs[0]))[0]
380 380 revs.insert(0, self.rev(p))
381 381
382 382 # for each delta that isn't contiguous in the log, we need to
383 383 # reconstruct the base, reconstruct the result, and then
384 384 # calculate the delta. We also need to do this where we've
385 385 # stored a full version and not a delta
386 386 for i in xrange(0, len(revs) - 1):
387 387 a, b = revs[i], revs[i + 1]
388 388 if a + 1 != b or self.base(b) == b:
389 389 for j in xrange(self.base(a), a + 1):
390 390 needed[j] = 1
391 391 for j in xrange(self.base(b), b + 1):
392 392 needed[j] = 1
393 393
394 394 # calculate spans to retrieve from datafile
395 395 needed = needed.keys()
396 396 needed.sort()
397 397 spans = []
398 398 oo = -1
399 399 ol = 0
400 400 for n in needed:
401 401 if n < 0: continue
402 402 o = self.start(n)
403 403 l = self.length(n)
404 404 if oo + ol == o: # can we merge with the previous?
405 405 nl = spans[-1][2]
406 406 nl.append((n, l))
407 407 ol += l
408 408 spans[-1] = (oo, ol, nl)
409 409 else:
410 410 oo = o
411 411 ol = l
412 412 spans.append((oo, ol, [(n, l)]))
413 413
414 414 # read spans in, divide up chunks
415 415 chunks = {}
416 416 for span in spans:
417 417 # we reopen the file for each span to make http happy for now
418 418 f = self.opener(self.datafile)
419 419 f.seek(span[0])
420 420 data = f.read(span[1])
421 421
422 422 # divide up the span
423 423 pos = 0
424 424 for r, l in span[2]:
425 425 chunks[r] = decompress(data[pos: pos + l])
426 426 pos += l
427 427
428 428 # helper to reconstruct intermediate versions
429 429 def construct(text, base, rev):
430 430 bins = [chunks[r] for r in xrange(base + 1, rev + 1)]
431 431 return mdiff.patches(text, bins)
432 432
433 433 # build deltas
434 434 deltas = []
435 435 for d in xrange(0, len(revs) - 1):
436 436 a, b = revs[d], revs[d + 1]
437 437 n = self.node(b)
438 438
439 439 # do we need to construct a new delta?
440 440 if a + 1 != b or self.base(b) == b:
441 441 if a >= 0:
442 442 base = self.base(a)
443 443 ta = chunks[self.base(a)]
444 444 ta = construct(ta, base, a)
445 445 else:
446 446 ta = ""
447 447
448 448 base = self.base(b)
449 449 if a > base:
450 450 base = a
451 451 tb = ta
452 452 else:
453 453 tb = chunks[self.base(b)]
454 454 tb = construct(tb, base, b)
455 455 d = self.diff(ta, tb)
456 456 else:
457 457 d = chunks[b]
458 458
459 459 p = self.parents(n)
460 460 meta = n + p[0] + p[1] + linkmap[self.linkrev(n)]
461 461 l = struct.pack(">l", len(meta) + len(d) + 4)
462 462 yield l
463 463 yield meta
464 464 yield d
465 465
466 466 yield struct.pack(">l", 0)
467 467
468 468 def addgroup(self, revs, linkmapper, transaction, unique = 0):
469 469 # given a set of deltas, add them to the revision log. the
470 470 # first delta is against its parent, which should be in our
471 471 # log, the rest are against the previous delta.
472 472
473 473 # track the base of the current delta log
474 474 r = self.count()
475 475 t = r - 1
476 476 node = nullid
477 477
478 base = prev = -1
479 start = end = 0
478 base = -1
479 start = end = measure = 0
480 480 if r:
481 481 start = self.start(self.base(t))
482 482 end = self.end(t)
483 483 measure = self.length(self.base(t))
484 484 base = self.base(t)
485 485 prev = self.tip()
486 486
487 487 transaction.add(self.datafile, end)
488 488 transaction.add(self.indexfile, r * struct.calcsize(indexformat))
489 489 dfh = self.opener(self.datafile, "a")
490 490 ifh = self.opener(self.indexfile, "a")
491 491
492 492 # loop through our set of deltas
493 493 chain = None
494 prev = self.tip()
494 495 for chunk in revs:
495 496 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
496 497 link = linkmapper(cs)
497 498 if node in self.nodemap:
498 499 # this can happen if two branches make the same change
499 500 if unique:
500 501 raise "already have %s" % hex(node[:4])
502 chain = node
501 503 continue
502 504 delta = chunk[80:]
503 505
504 506 if not chain:
505 507 # retrieve the parent revision of the delta chain
506 508 chain = p1
507 509 if not chain in self.nodemap:
508 510 raise "unknown base %s" % short(chain[:4])
509 511
510 512 # full versions are inserted when the needed deltas become
511 513 # comparable to the uncompressed text or when the previous
512 514 # version is not the one we have a delta against. We use
513 515 # the size of the previous full rev as a proxy for the
514 516 # current size.
515 517
516 518 if chain == prev:
517 519 cdelta = compress(delta)
518 520
519 521 if chain != prev or (end - start + len(cdelta)) > measure * 2:
520 522 # flush our writes here so we can read it in revision
521 523 dfh.flush()
522 524 ifh.flush()
523 525 text = self.revision(chain)
524 526 text = self.patches(text, [delta])
525 527 chk = self.addrevision(text, transaction, link, p1, p2)
526 528 if chk != node:
527 529 raise "consistency error adding group"
528 530 measure = len(text)
529 531 else:
530 532 e = (end, len(cdelta), self.base(t), link, p1, p2, node)
531 533 self.index.append(e)
532 534 self.nodemap[node] = r
533 535 dfh.write(cdelta)
534 536 ifh.write(struct.pack(indexformat, *e))
535 537
536 538 t, r, chain, prev = r, r + 1, node, node
537 539 start = self.start(self.base(t))
538 540 end = self.end(t)
539 541
540 542 dfh.close()
541 543 ifh.close()
542 544 return node
General Comments 0
You need to be logged in to leave comments. Login now