##// END OF EJS Templates
Fix traceback on bad revlog.lookup
Matt Mackall -
r1393:67779d34 default
parent child Browse files
Show More
@@ -1,674 +1,674
1 1 """
2 2 revlog.py - storage back-end for mercurial
3 3
4 4 This provides efficient delta storage with O(1) retrieve and append
5 5 and O(changes) merge between branches
6 6
7 7 Copyright 2005 Matt Mackall <mpm@selenic.com>
8 8
9 9 This software may be used and distributed according to the terms
10 10 of the GNU General Public License, incorporated herein by reference.
11 11 """
12 12
13 13 from node import *
14 14 from demandload import demandload
15 15 demandload(globals(), "binascii errno heapq mdiff sha struct zlib")
16 16
17 17 def hash(text, p1, p2):
18 18 """generate a hash from the given text and its parent hashes
19 19
20 20 This hash combines both the current file contents and its history
21 21 in a manner that makes it easy to distinguish nodes with the same
22 22 content in the revision graph.
23 23 """
24 24 l = [p1, p2]
25 25 l.sort()
26 26 s = sha.new(l[0])
27 27 s.update(l[1])
28 28 s.update(text)
29 29 return s.digest()
30 30
31 31 def compress(text):
32 32 """ generate a possibly-compressed representation of text """
33 33 if not text: return text
34 34 if len(text) < 44:
35 35 if text[0] == '\0': return text
36 36 return 'u' + text
37 37 bin = zlib.compress(text)
38 38 if len(bin) > len(text):
39 39 if text[0] == '\0': return text
40 40 return 'u' + text
41 41 return bin
42 42
43 43 def decompress(bin):
44 44 """ decompress the given input """
45 45 if not bin: return bin
46 46 t = bin[0]
47 47 if t == '\0': return bin
48 48 if t == 'x': return zlib.decompress(bin)
49 49 if t == 'u': return bin[1:]
50 50 raise RevlogError("unknown compression type %s" % t)
51 51
52 52 indexformat = ">4l20s20s20s"
53 53
54 54 class lazyparser:
55 55 """
56 56 this class avoids the need to parse the entirety of large indices
57 57
58 58 By default we parse and load 1000 entries at a time.
59 59
60 60 If no position is specified, we load the whole index, and replace
61 61 the lazy objects in revlog with the underlying objects for
62 62 efficiency in cases where we look at most of the nodes.
63 63 """
64 64 def __init__(self, data, revlog):
65 65 self.data = data
66 66 self.s = struct.calcsize(indexformat)
67 67 self.l = len(data)/self.s
68 68 self.index = [None] * self.l
69 69 self.map = {nullid: -1}
70 70 self.all = 0
71 71 self.revlog = revlog
72 72
73 73 def load(self, pos=None):
74 74 if self.all: return
75 75 if pos is not None:
76 76 block = pos / 1000
77 77 i = block * 1000
78 78 end = min(self.l, i + 1000)
79 79 else:
80 80 self.all = 1
81 81 i = 0
82 82 end = self.l
83 83 self.revlog.index = self.index
84 84 self.revlog.nodemap = self.map
85 85
86 86 while i < end:
87 87 d = self.data[i * self.s: (i + 1) * self.s]
88 88 e = struct.unpack(indexformat, d)
89 89 self.index[i] = e
90 90 self.map[e[6]] = i
91 91 i += 1
92 92
93 93 class lazyindex:
94 94 """a lazy version of the index array"""
95 95 def __init__(self, parser):
96 96 self.p = parser
97 97 def __len__(self):
98 98 return len(self.p.index)
99 99 def load(self, pos):
100 100 self.p.load(pos)
101 101 return self.p.index[pos]
102 102 def __getitem__(self, pos):
103 103 return self.p.index[pos] or self.load(pos)
104 104 def append(self, e):
105 105 self.p.index.append(e)
106 106
107 107 class lazymap:
108 108 """a lazy version of the node map"""
109 109 def __init__(self, parser):
110 110 self.p = parser
111 111 def load(self, key):
112 112 if self.p.all: return
113 113 n = self.p.data.find(key)
114 114 if n < 0:
115 115 raise KeyError(key)
116 116 pos = n / self.p.s
117 117 self.p.load(pos)
118 118 def __contains__(self, key):
119 119 self.p.load()
120 120 return key in self.p.map
121 121 def __iter__(self):
122 122 yield nullid
123 123 for i in xrange(self.p.l):
124 124 try:
125 125 yield self.p.index[i][6]
126 126 except:
127 127 self.p.load(i)
128 128 yield self.p.index[i][6]
129 129 def __getitem__(self, key):
130 130 try:
131 131 return self.p.map[key]
132 132 except KeyError:
133 133 try:
134 134 self.load(key)
135 135 return self.p.map[key]
136 136 except KeyError:
137 137 raise KeyError("node " + hex(key))
138 138 def __setitem__(self, key, val):
139 139 self.p.map[key] = val
140 140
141 141 class RevlogError(Exception): pass
142 142
143 143 class revlog:
144 144 """
145 145 the underlying revision storage object
146 146
147 147 A revlog consists of two parts, an index and the revision data.
148 148
149 149 The index is a file with a fixed record size containing
150 150 information on each revision, includings its nodeid (hash), the
151 151 nodeids of its parents, the position and offset of its data within
152 152 the data file, and the revision it's based on. Finally, each entry
153 153 contains a linkrev entry that can serve as a pointer to external
154 154 data.
155 155
156 156 The revision data itself is a linear collection of data chunks.
157 157 Each chunk represents a revision and is usually represented as a
158 158 delta against the previous chunk. To bound lookup time, runs of
159 159 deltas are limited to about 2 times the length of the original
160 160 version data. This makes retrieval of a version proportional to
161 161 its size, or O(1) relative to the number of revisions.
162 162
163 163 Both pieces of the revlog are written to in an append-only
164 164 fashion, which means we never need to rewrite a file to insert or
165 165 remove data, and can use some simple techniques to avoid the need
166 166 for locking while reading.
167 167 """
168 168 def __init__(self, opener, indexfile, datafile):
169 169 """
170 170 create a revlog object
171 171
172 172 opener is a function that abstracts the file opening operation
173 173 and can be used to implement COW semantics or the like.
174 174 """
175 175 self.indexfile = indexfile
176 176 self.datafile = datafile
177 177 self.opener = opener
178 178 self.cache = None
179 179
180 180 try:
181 181 i = self.opener(self.indexfile).read()
182 182 except IOError, inst:
183 183 if inst.errno != errno.ENOENT:
184 184 raise
185 185 i = ""
186 186
187 187 if len(i) > 10000:
188 188 # big index, let's parse it on demand
189 189 parser = lazyparser(i, self)
190 190 self.index = lazyindex(parser)
191 191 self.nodemap = lazymap(parser)
192 192 else:
193 193 s = struct.calcsize(indexformat)
194 194 l = len(i) / s
195 195 self.index = [None] * l
196 196 m = [None] * l
197 197
198 198 n = 0
199 199 for f in xrange(0, len(i), s):
200 200 # offset, size, base, linkrev, p1, p2, nodeid
201 201 e = struct.unpack(indexformat, i[f:f + s])
202 202 m[n] = (e[6], n)
203 203 self.index[n] = e
204 204 n += 1
205 205
206 206 self.nodemap = dict(m)
207 207 self.nodemap[nullid] = -1
208 208
209 209 def tip(self): return self.node(len(self.index) - 1)
210 210 def count(self): return len(self.index)
211 211 def node(self, rev): return (rev < 0) and nullid or self.index[rev][6]
212 212 def rev(self, node):
213 213 try:
214 214 return self.nodemap[node]
215 215 except KeyError:
216 216 raise RevlogError('%s: no node %s' % (self.indexfile, hex(node)))
217 217 def linkrev(self, node): return self.index[self.rev(node)][3]
218 218 def parents(self, node):
219 219 if node == nullid: return (nullid, nullid)
220 220 return self.index[self.rev(node)][4:6]
221 221
222 222 def start(self, rev): return self.index[rev][0]
223 223 def length(self, rev): return self.index[rev][1]
224 224 def end(self, rev): return self.start(rev) + self.length(rev)
225 225 def base(self, rev): return self.index[rev][2]
226 226
227 227 def reachable(self, rev, stop=None):
228 228 reachable = {}
229 229 visit = [rev]
230 230 reachable[rev] = 1
231 231 if stop:
232 232 stopn = self.rev(stop)
233 233 else:
234 234 stopn = 0
235 235 while visit:
236 236 n = visit.pop(0)
237 237 if n == stop:
238 238 continue
239 239 if n == nullid:
240 240 continue
241 241 for p in self.parents(n):
242 242 if self.rev(p) < stopn:
243 243 continue
244 244 if p not in reachable:
245 245 reachable[p] = 1
246 246 visit.append(p)
247 247 return reachable
248 248
249 249 def heads(self, stop=None):
250 250 """return the list of all nodes that have no children"""
251 251 p = {}
252 252 h = []
253 253 stoprev = 0
254 254 if stop and stop in self.nodemap:
255 255 stoprev = self.rev(stop)
256 256
257 257 for r in range(self.count() - 1, -1, -1):
258 258 n = self.node(r)
259 259 if n not in p:
260 260 h.append(n)
261 261 if n == stop:
262 262 break
263 263 if r < stoprev:
264 264 break
265 265 for pn in self.parents(n):
266 266 p[pn] = 1
267 267 return h
268 268
269 269 def children(self, node):
270 270 """find the children of a given node"""
271 271 c = []
272 272 p = self.rev(node)
273 273 for r in range(p + 1, self.count()):
274 274 n = self.node(r)
275 275 for pn in self.parents(n):
276 276 if pn == node:
277 277 c.append(n)
278 278 continue
279 279 elif pn == nullid:
280 280 continue
281 281 return c
282 282
283 283 def lookup(self, id):
284 284 """locate a node based on revision number or subset of hex nodeid"""
285 285 try:
286 286 rev = int(id)
287 287 if str(rev) != id: raise ValueError
288 288 if rev < 0: rev = self.count() + rev
289 289 if rev < 0 or rev >= self.count(): raise ValueError
290 290 return self.node(rev)
291 291 except (ValueError, OverflowError):
292 292 c = []
293 293 for n in self.nodemap:
294 294 if hex(n).startswith(id):
295 295 c.append(n)
296 if len(c) > 1: raise KeyError("Ambiguous identifier")
297 if len(c) < 1: raise KeyError("No match found")
296 if len(c) > 1: raise RevlogError("Ambiguous identifier")
297 if len(c) < 1: raise RevlogError("No match found")
298 298 return c[0]
299 299
300 300 return None
301 301
302 302 def diff(self, a, b):
303 303 """return a delta between two revisions"""
304 304 return mdiff.textdiff(a, b)
305 305
306 306 def patches(self, t, pl):
307 307 """apply a list of patches to a string"""
308 308 return mdiff.patches(t, pl)
309 309
310 310 def delta(self, node):
311 311 """return or calculate a delta between a node and its predecessor"""
312 312 r = self.rev(node)
313 313 b = self.base(r)
314 314 if r == b:
315 315 return self.diff(self.revision(self.node(r - 1)),
316 316 self.revision(node))
317 317 else:
318 318 f = self.opener(self.datafile)
319 319 f.seek(self.start(r))
320 320 data = f.read(self.length(r))
321 321 return decompress(data)
322 322
323 323 def revision(self, node):
324 324 """return an uncompressed revision of a given"""
325 325 if node == nullid: return ""
326 326 if self.cache and self.cache[0] == node: return self.cache[2]
327 327
328 328 # look up what we need to read
329 329 text = None
330 330 rev = self.rev(node)
331 331 start, length, base, link, p1, p2, node = self.index[rev]
332 332 end = start + length
333 333 if base != rev: start = self.start(base)
334 334
335 335 # do we have useful data cached?
336 336 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
337 337 base = self.cache[1]
338 338 start = self.start(base + 1)
339 339 text = self.cache[2]
340 340 last = 0
341 341
342 342 f = self.opener(self.datafile)
343 343 f.seek(start)
344 344 data = f.read(end - start)
345 345
346 346 if text is None:
347 347 last = self.length(base)
348 348 text = decompress(data[:last])
349 349
350 350 bins = []
351 351 for r in xrange(base + 1, rev + 1):
352 352 s = self.length(r)
353 353 bins.append(decompress(data[last:last + s]))
354 354 last = last + s
355 355
356 356 text = mdiff.patches(text, bins)
357 357
358 358 if node != hash(text, p1, p2):
359 359 raise RevlogError("integrity check failed on %s:%d"
360 360 % (self.datafile, rev))
361 361
362 362 self.cache = (node, rev, text)
363 363 return text
364 364
365 365 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
366 366 """add a revision to the log
367 367
368 368 text - the revision data to add
369 369 transaction - the transaction object used for rollback
370 370 link - the linkrev data to add
371 371 p1, p2 - the parent nodeids of the revision
372 372 d - an optional precomputed delta
373 373 """
374 374 if text is None: text = ""
375 375 if p1 is None: p1 = self.tip()
376 376 if p2 is None: p2 = nullid
377 377
378 378 node = hash(text, p1, p2)
379 379
380 380 if node in self.nodemap:
381 381 return node
382 382
383 383 n = self.count()
384 384 t = n - 1
385 385
386 386 if n:
387 387 base = self.base(t)
388 388 start = self.start(base)
389 389 end = self.end(t)
390 390 if not d:
391 391 prev = self.revision(self.tip())
392 392 d = self.diff(prev, text)
393 393 data = compress(d)
394 394 dist = end - start + len(data)
395 395
396 396 # full versions are inserted when the needed deltas
397 397 # become comparable to the uncompressed text
398 398 if not n or dist > len(text) * 2:
399 399 data = compress(text)
400 400 base = n
401 401 else:
402 402 base = self.base(t)
403 403
404 404 offset = 0
405 405 if t >= 0:
406 406 offset = self.end(t)
407 407
408 408 e = (offset, len(data), base, link, p1, p2, node)
409 409
410 410 self.index.append(e)
411 411 self.nodemap[node] = n
412 412 entry = struct.pack(indexformat, *e)
413 413
414 414 transaction.add(self.datafile, e[0])
415 415 self.opener(self.datafile, "a").write(data)
416 416 transaction.add(self.indexfile, n * len(entry))
417 417 self.opener(self.indexfile, "a").write(entry)
418 418
419 419 self.cache = (node, n, text)
420 420 return node
421 421
422 422 def ancestor(self, a, b):
423 423 """calculate the least common ancestor of nodes a and b"""
424 424 # calculate the distance of every node from root
425 425 dist = {nullid: 0}
426 426 for i in xrange(self.count()):
427 427 n = self.node(i)
428 428 p1, p2 = self.parents(n)
429 429 dist[n] = max(dist[p1], dist[p2]) + 1
430 430
431 431 # traverse ancestors in order of decreasing distance from root
432 432 def ancestors(node):
433 433 # we store negative distances because heap returns smallest member
434 434 h = [(-dist[node], node)]
435 435 seen = {}
436 436 earliest = self.count()
437 437 while h:
438 438 d, n = heapq.heappop(h)
439 439 if n not in seen:
440 440 seen[n] = 1
441 441 r = self.rev(n)
442 442 yield (-d, n)
443 443 for p in self.parents(n):
444 444 heapq.heappush(h, (-dist[p], p))
445 445
446 446 def generations(node):
447 447 sg, s = None, {}
448 448 for g,n in ancestors(node):
449 449 if g != sg:
450 450 if sg:
451 451 yield sg, s
452 452 sg, s = g, {n:1}
453 453 else:
454 454 s[n] = 1
455 455 yield sg, s
456 456
457 457 x = generations(a)
458 458 y = generations(b)
459 459 gx = x.next()
460 460 gy = y.next()
461 461
462 462 # increment each ancestor list until it is closer to root than
463 463 # the other, or they match
464 464 while 1:
465 465 #print "ancestor gen %s %s" % (gx[0], gy[0])
466 466 if gx[0] == gy[0]:
467 467 # find the intersection
468 468 i = [ n for n in gx[1] if n in gy[1] ]
469 469 if i:
470 470 return i[0]
471 471 else:
472 472 #print "next"
473 473 gy = y.next()
474 474 gx = x.next()
475 475 elif gx[0] < gy[0]:
476 476 #print "next y"
477 477 gy = y.next()
478 478 else:
479 479 #print "next x"
480 480 gx = x.next()
481 481
482 482 def group(self, linkmap):
483 483 """calculate a delta group
484 484
485 485 Given a list of changeset revs, return a set of deltas and
486 486 metadata corresponding to nodes. the first delta is
487 487 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
488 488 have this parent as it has all history before these
489 489 changesets. parent is parent[0]
490 490 """
491 491 revs = []
492 492 needed = {}
493 493
494 494 # find file nodes/revs that match changeset revs
495 495 for i in xrange(0, self.count()):
496 496 if self.index[i][3] in linkmap:
497 497 revs.append(i)
498 498 needed[i] = 1
499 499
500 500 # if we don't have any revisions touched by these changesets, bail
501 501 if not revs:
502 502 yield struct.pack(">l", 0)
503 503 return
504 504
505 505 # add the parent of the first rev
506 506 p = self.parents(self.node(revs[0]))[0]
507 507 revs.insert(0, self.rev(p))
508 508
509 509 # for each delta that isn't contiguous in the log, we need to
510 510 # reconstruct the base, reconstruct the result, and then
511 511 # calculate the delta. We also need to do this where we've
512 512 # stored a full version and not a delta
513 513 for i in xrange(0, len(revs) - 1):
514 514 a, b = revs[i], revs[i + 1]
515 515 if a + 1 != b or self.base(b) == b:
516 516 for j in xrange(self.base(a), a + 1):
517 517 needed[j] = 1
518 518 for j in xrange(self.base(b), b + 1):
519 519 needed[j] = 1
520 520
521 521 # calculate spans to retrieve from datafile
522 522 needed = needed.keys()
523 523 needed.sort()
524 524 spans = []
525 525 oo = -1
526 526 ol = 0
527 527 for n in needed:
528 528 if n < 0: continue
529 529 o = self.start(n)
530 530 l = self.length(n)
531 531 if oo + ol == o: # can we merge with the previous?
532 532 nl = spans[-1][2]
533 533 nl.append((n, l))
534 534 ol += l
535 535 spans[-1] = (oo, ol, nl)
536 536 else:
537 537 oo = o
538 538 ol = l
539 539 spans.append((oo, ol, [(n, l)]))
540 540
541 541 # read spans in, divide up chunks
542 542 chunks = {}
543 543 for span in spans:
544 544 # we reopen the file for each span to make http happy for now
545 545 f = self.opener(self.datafile)
546 546 f.seek(span[0])
547 547 data = f.read(span[1])
548 548
549 549 # divide up the span
550 550 pos = 0
551 551 for r, l in span[2]:
552 552 chunks[r] = decompress(data[pos: pos + l])
553 553 pos += l
554 554
555 555 # helper to reconstruct intermediate versions
556 556 def construct(text, base, rev):
557 557 bins = [chunks[r] for r in xrange(base + 1, rev + 1)]
558 558 return mdiff.patches(text, bins)
559 559
560 560 # build deltas
561 561 deltas = []
562 562 for d in xrange(0, len(revs) - 1):
563 563 a, b = revs[d], revs[d + 1]
564 564 n = self.node(b)
565 565
566 566 # do we need to construct a new delta?
567 567 if a + 1 != b or self.base(b) == b:
568 568 if a >= 0:
569 569 base = self.base(a)
570 570 ta = chunks[self.base(a)]
571 571 ta = construct(ta, base, a)
572 572 else:
573 573 ta = ""
574 574
575 575 base = self.base(b)
576 576 if a > base:
577 577 base = a
578 578 tb = ta
579 579 else:
580 580 tb = chunks[self.base(b)]
581 581 tb = construct(tb, base, b)
582 582 d = self.diff(ta, tb)
583 583 else:
584 584 d = chunks[b]
585 585
586 586 p = self.parents(n)
587 587 meta = n + p[0] + p[1] + linkmap[self.linkrev(n)]
588 588 l = struct.pack(">l", len(meta) + len(d) + 4)
589 589 yield l
590 590 yield meta
591 591 yield d
592 592
593 593 yield struct.pack(">l", 0)
594 594
595 595 def addgroup(self, revs, linkmapper, transaction, unique=0):
596 596 """
597 597 add a delta group
598 598
599 599 given a set of deltas, add them to the revision log. the
600 600 first delta is against its parent, which should be in our
601 601 log, the rest are against the previous delta.
602 602 """
603 603
604 604 #track the base of the current delta log
605 605 r = self.count()
606 606 t = r - 1
607 607 node = nullid
608 608
609 609 base = prev = -1
610 610 start = end = measure = 0
611 611 if r:
612 612 start = self.start(self.base(t))
613 613 end = self.end(t)
614 614 measure = self.length(self.base(t))
615 615 base = self.base(t)
616 616 prev = self.tip()
617 617
618 618 transaction.add(self.datafile, end)
619 619 transaction.add(self.indexfile, r * struct.calcsize(indexformat))
620 620 dfh = self.opener(self.datafile, "a")
621 621 ifh = self.opener(self.indexfile, "a")
622 622
623 623 # loop through our set of deltas
624 624 chain = None
625 625 for chunk in revs:
626 626 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
627 627 link = linkmapper(cs)
628 628 if node in self.nodemap:
629 629 # this can happen if two branches make the same change
630 630 # if unique:
631 631 # raise RevlogError("already have %s" % hex(node[:4]))
632 632 chain = node
633 633 continue
634 634 delta = chunk[80:]
635 635
636 636 if not chain:
637 637 # retrieve the parent revision of the delta chain
638 638 chain = p1
639 639 if not chain in self.nodemap:
640 640 raise RevlogError("unknown base %s" % short(chain[:4]))
641 641
642 642 # full versions are inserted when the needed deltas become
643 643 # comparable to the uncompressed text or when the previous
644 644 # version is not the one we have a delta against. We use
645 645 # the size of the previous full rev as a proxy for the
646 646 # current size.
647 647
648 648 if chain == prev:
649 649 cdelta = compress(delta)
650 650
651 651 if chain != prev or (end - start + len(cdelta)) > measure * 2:
652 652 # flush our writes here so we can read it in revision
653 653 dfh.flush()
654 654 ifh.flush()
655 655 text = self.revision(chain)
656 656 text = self.patches(text, [delta])
657 657 chk = self.addrevision(text, transaction, link, p1, p2)
658 658 if chk != node:
659 659 raise RevlogError("consistency error adding group")
660 660 measure = len(text)
661 661 else:
662 662 e = (end, len(cdelta), self.base(t), link, p1, p2, node)
663 663 self.index.append(e)
664 664 self.nodemap[node] = r
665 665 dfh.write(cdelta)
666 666 ifh.write(struct.pack(indexformat, *e))
667 667
668 668 t, r, chain, prev = r, r + 1, node, node
669 669 start = self.start(self.base(t))
670 670 end = self.end(t)
671 671
672 672 dfh.close()
673 673 ifh.close()
674 674 return node
General Comments 0
You need to be logged in to leave comments. Login now