##// END OF EJS Templates
LookupError should have same __str__ as RevlogError
Alexander Solovyov -
r6703:bacfee67 default
parent child Browse files
Show More
@@ -1,1330 +1,1333 b''
1 1 """
2 2 revlog.py - storage back-end for mercurial
3 3
4 4 This provides efficient delta storage with O(1) retrieve and append
5 5 and O(changes) merge between branches
6 6
7 7 Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
8 8
9 9 This software may be used and distributed according to the terms
10 10 of the GNU General Public License, incorporated herein by reference.
11 11 """
12 12
13 13 from node import bin, hex, nullid, nullrev, short
14 14 from i18n import _
15 15 import changegroup, errno, ancestor, mdiff
16 16 import struct, util, zlib
17 17
18 18 _pack = struct.pack
19 19 _unpack = struct.unpack
20 20 _compress = zlib.compress
21 21 _decompress = zlib.decompress
22 22 _sha = util.sha1
23 23
24 24 # revlog flags
25 25 REVLOGV0 = 0
26 26 REVLOGNG = 1
27 27 REVLOGNGINLINEDATA = (1 << 16)
28 28 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
29 29 REVLOG_DEFAULT_FORMAT = REVLOGNG
30 30 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
31 31
32 32 class RevlogError(Exception):
33 33 pass
34 34
35 35 class LookupError(RevlogError, KeyError):
36 36 def __init__(self, name, index, message):
37 37 self.name = name
38 38 if isinstance(name, str) and len(name) == 20:
39 39 name = short(name)
40 40 RevlogError.__init__(self, _('%s@%s: %s') % (index, name, message))
41 41
42 def __str__(self):
43 return RevlogError.__str__(self)
44
42 45 def getoffset(q):
43 46 return int(q >> 16)
44 47
45 48 def gettype(q):
46 49 return int(q & 0xFFFF)
47 50
48 51 def offset_type(offset, type):
49 52 return long(long(offset) << 16 | type)
50 53
51 54 def hash(text, p1, p2):
52 55 """generate a hash from the given text and its parent hashes
53 56
54 57 This hash combines both the current file contents and its history
55 58 in a manner that makes it easy to distinguish nodes with the same
56 59 content in the revision graph.
57 60 """
58 61 l = [p1, p2]
59 62 l.sort()
60 63 s = _sha(l[0])
61 64 s.update(l[1])
62 65 s.update(text)
63 66 return s.digest()
64 67
65 68 def compress(text):
66 69 """ generate a possibly-compressed representation of text """
67 70 if not text:
68 71 return ("", text)
69 72 l = len(text)
70 73 bin = None
71 74 if l < 44:
72 75 pass
73 76 elif l > 1000000:
74 77 # zlib makes an internal copy, thus doubling memory usage for
75 78 # large files, so lets do this in pieces
76 79 z = zlib.compressobj()
77 80 p = []
78 81 pos = 0
79 82 while pos < l:
80 83 pos2 = pos + 2**20
81 84 p.append(z.compress(text[pos:pos2]))
82 85 pos = pos2
83 86 p.append(z.flush())
84 87 if sum(map(len, p)) < l:
85 88 bin = "".join(p)
86 89 else:
87 90 bin = _compress(text)
88 91 if bin is None or len(bin) > l:
89 92 if text[0] == '\0':
90 93 return ("", text)
91 94 return ('u', text)
92 95 return ("", bin)
93 96
94 97 def decompress(bin):
95 98 """ decompress the given input """
96 99 if not bin:
97 100 return bin
98 101 t = bin[0]
99 102 if t == '\0':
100 103 return bin
101 104 if t == 'x':
102 105 return _decompress(bin)
103 106 if t == 'u':
104 107 return bin[1:]
105 108 raise RevlogError(_("unknown compression type %r") % t)
106 109
107 110 class lazyparser(object):
108 111 """
109 112 this class avoids the need to parse the entirety of large indices
110 113 """
111 114
112 115 # lazyparser is not safe to use on windows if win32 extensions not
113 116 # available. it keeps file handle open, which make it not possible
114 117 # to break hardlinks on local cloned repos.
115 118
116 119 def __init__(self, dataf, size):
117 120 self.dataf = dataf
118 121 self.s = struct.calcsize(indexformatng)
119 122 self.datasize = size
120 123 self.l = size/self.s
121 124 self.index = [None] * self.l
122 125 self.map = {nullid: nullrev}
123 126 self.allmap = 0
124 127 self.all = 0
125 128 self.mapfind_count = 0
126 129
127 130 def loadmap(self):
128 131 """
129 132 during a commit, we need to make sure the rev being added is
130 133 not a duplicate. This requires loading the entire index,
131 134 which is fairly slow. loadmap can load up just the node map,
132 135 which takes much less time.
133 136 """
134 137 if self.allmap:
135 138 return
136 139 end = self.datasize
137 140 self.allmap = 1
138 141 cur = 0
139 142 count = 0
140 143 blocksize = self.s * 256
141 144 self.dataf.seek(0)
142 145 while cur < end:
143 146 data = self.dataf.read(blocksize)
144 147 off = 0
145 148 for x in xrange(256):
146 149 n = data[off + ngshaoffset:off + ngshaoffset + 20]
147 150 self.map[n] = count
148 151 count += 1
149 152 if count >= self.l:
150 153 break
151 154 off += self.s
152 155 cur += blocksize
153 156
154 157 def loadblock(self, blockstart, blocksize, data=None):
155 158 if self.all:
156 159 return
157 160 if data is None:
158 161 self.dataf.seek(blockstart)
159 162 if blockstart + blocksize > self.datasize:
160 163 # the revlog may have grown since we've started running,
161 164 # but we don't have space in self.index for more entries.
162 165 # limit blocksize so that we don't get too much data.
163 166 blocksize = max(self.datasize - blockstart, 0)
164 167 data = self.dataf.read(blocksize)
165 168 lend = len(data) / self.s
166 169 i = blockstart / self.s
167 170 off = 0
168 171 # lazyindex supports __delitem__
169 172 if lend > len(self.index) - i:
170 173 lend = len(self.index) - i
171 174 for x in xrange(lend):
172 175 if self.index[i + x] == None:
173 176 b = data[off : off + self.s]
174 177 self.index[i + x] = b
175 178 n = b[ngshaoffset:ngshaoffset + 20]
176 179 self.map[n] = i + x
177 180 off += self.s
178 181
179 182 def findnode(self, node):
180 183 """search backwards through the index file for a specific node"""
181 184 if self.allmap:
182 185 return None
183 186
184 187 # hg log will cause many many searches for the manifest
185 188 # nodes. After we get called a few times, just load the whole
186 189 # thing.
187 190 if self.mapfind_count > 8:
188 191 self.loadmap()
189 192 if node in self.map:
190 193 return node
191 194 return None
192 195 self.mapfind_count += 1
193 196 last = self.l - 1
194 197 while self.index[last] != None:
195 198 if last == 0:
196 199 self.all = 1
197 200 self.allmap = 1
198 201 return None
199 202 last -= 1
200 203 end = (last + 1) * self.s
201 204 blocksize = self.s * 256
202 205 while end >= 0:
203 206 start = max(end - blocksize, 0)
204 207 self.dataf.seek(start)
205 208 data = self.dataf.read(end - start)
206 209 findend = end - start
207 210 while True:
208 211 # we're searching backwards, so we have to make sure
209 212 # we don't find a changeset where this node is a parent
210 213 off = data.find(node, 0, findend)
211 214 findend = off
212 215 if off >= 0:
213 216 i = off / self.s
214 217 off = i * self.s
215 218 n = data[off + ngshaoffset:off + ngshaoffset + 20]
216 219 if n == node:
217 220 self.map[n] = i + start / self.s
218 221 return node
219 222 else:
220 223 break
221 224 end -= blocksize
222 225 return None
223 226
224 227 def loadindex(self, i=None, end=None):
225 228 if self.all:
226 229 return
227 230 all = False
228 231 if i == None:
229 232 blockstart = 0
230 233 blocksize = (65536 / self.s) * self.s
231 234 end = self.datasize
232 235 all = True
233 236 else:
234 237 if end:
235 238 blockstart = i * self.s
236 239 end = end * self.s
237 240 blocksize = end - blockstart
238 241 else:
239 242 blockstart = (i & ~1023) * self.s
240 243 blocksize = self.s * 1024
241 244 end = blockstart + blocksize
242 245 while blockstart < end:
243 246 self.loadblock(blockstart, blocksize)
244 247 blockstart += blocksize
245 248 if all:
246 249 self.all = True
247 250
248 251 class lazyindex(object):
249 252 """a lazy version of the index array"""
250 253 def __init__(self, parser):
251 254 self.p = parser
252 255 def __len__(self):
253 256 return len(self.p.index)
254 257 def load(self, pos):
255 258 if pos < 0:
256 259 pos += len(self.p.index)
257 260 self.p.loadindex(pos)
258 261 return self.p.index[pos]
259 262 def __getitem__(self, pos):
260 263 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
261 264 def __setitem__(self, pos, item):
262 265 self.p.index[pos] = _pack(indexformatng, *item)
263 266 def __delitem__(self, pos):
264 267 del self.p.index[pos]
265 268 def insert(self, pos, e):
266 269 self.p.index.insert(pos, _pack(indexformatng, *e))
267 270 def append(self, e):
268 271 self.p.index.append(_pack(indexformatng, *e))
269 272
270 273 class lazymap(object):
271 274 """a lazy version of the node map"""
272 275 def __init__(self, parser):
273 276 self.p = parser
274 277 def load(self, key):
275 278 n = self.p.findnode(key)
276 279 if n == None:
277 280 raise KeyError(key)
278 281 def __contains__(self, key):
279 282 if key in self.p.map:
280 283 return True
281 284 self.p.loadmap()
282 285 return key in self.p.map
283 286 def __iter__(self):
284 287 yield nullid
285 288 for i in xrange(self.p.l):
286 289 ret = self.p.index[i]
287 290 if not ret:
288 291 self.p.loadindex(i)
289 292 ret = self.p.index[i]
290 293 if isinstance(ret, str):
291 294 ret = _unpack(indexformatng, ret)
292 295 yield ret[7]
293 296 def __getitem__(self, key):
294 297 try:
295 298 return self.p.map[key]
296 299 except KeyError:
297 300 try:
298 301 self.load(key)
299 302 return self.p.map[key]
300 303 except KeyError:
301 304 raise KeyError("node " + hex(key))
302 305 def __setitem__(self, key, val):
303 306 self.p.map[key] = val
304 307 def __delitem__(self, key):
305 308 del self.p.map[key]
306 309
307 310 indexformatv0 = ">4l20s20s20s"
308 311 v0shaoffset = 56
309 312
310 313 class revlogoldio(object):
311 314 def __init__(self):
312 315 self.size = struct.calcsize(indexformatv0)
313 316
314 317 def parseindex(self, fp, inline):
315 318 s = self.size
316 319 index = []
317 320 nodemap = {nullid: nullrev}
318 321 n = off = 0
319 322 data = fp.read()
320 323 l = len(data)
321 324 while off + s <= l:
322 325 cur = data[off:off + s]
323 326 off += s
324 327 e = _unpack(indexformatv0, cur)
325 328 # transform to revlogv1 format
326 329 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
327 330 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
328 331 index.append(e2)
329 332 nodemap[e[6]] = n
330 333 n += 1
331 334
332 335 return index, nodemap, None
333 336
334 337 def packentry(self, entry, node, version, rev):
335 338 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
336 339 node(entry[5]), node(entry[6]), entry[7])
337 340 return _pack(indexformatv0, *e2)
338 341
339 342 # index ng:
340 343 # 6 bytes offset
341 344 # 2 bytes flags
342 345 # 4 bytes compressed length
343 346 # 4 bytes uncompressed length
344 347 # 4 bytes: base rev
345 348 # 4 bytes link rev
346 349 # 4 bytes parent 1 rev
347 350 # 4 bytes parent 2 rev
348 351 # 32 bytes: nodeid
349 352 indexformatng = ">Qiiiiii20s12x"
350 353 ngshaoffset = 32
351 354 versionformat = ">I"
352 355
353 356 class revlogio(object):
354 357 def __init__(self):
355 358 self.size = struct.calcsize(indexformatng)
356 359
357 360 def parseindex(self, fp, inline):
358 361 try:
359 362 size = util.fstat(fp).st_size
360 363 except AttributeError:
361 364 size = 0
362 365
363 366 if util.openhardlinks() and not inline and size > 1000000:
364 367 # big index, let's parse it on demand
365 368 parser = lazyparser(fp, size)
366 369 index = lazyindex(parser)
367 370 nodemap = lazymap(parser)
368 371 e = list(index[0])
369 372 type = gettype(e[0])
370 373 e[0] = offset_type(0, type)
371 374 index[0] = e
372 375 return index, nodemap, None
373 376
374 377 s = self.size
375 378 cache = None
376 379 index = []
377 380 nodemap = {nullid: nullrev}
378 381 n = off = 0
379 382 # if we're not using lazymap, always read the whole index
380 383 data = fp.read()
381 384 l = len(data) - s
382 385 append = index.append
383 386 if inline:
384 387 cache = (0, data)
385 388 while off <= l:
386 389 e = _unpack(indexformatng, data[off:off + s])
387 390 nodemap[e[7]] = n
388 391 append(e)
389 392 n += 1
390 393 if e[1] < 0:
391 394 break
392 395 off += e[1] + s
393 396 else:
394 397 while off <= l:
395 398 e = _unpack(indexformatng, data[off:off + s])
396 399 nodemap[e[7]] = n
397 400 append(e)
398 401 n += 1
399 402 off += s
400 403
401 404 e = list(index[0])
402 405 type = gettype(e[0])
403 406 e[0] = offset_type(0, type)
404 407 index[0] = e
405 408
406 409 return index, nodemap, cache
407 410
408 411 def packentry(self, entry, node, version, rev):
409 412 p = _pack(indexformatng, *entry)
410 413 if rev == 0:
411 414 p = _pack(versionformat, version) + p[4:]
412 415 return p
413 416
414 417 class revlog(object):
415 418 """
416 419 the underlying revision storage object
417 420
418 421 A revlog consists of two parts, an index and the revision data.
419 422
420 423 The index is a file with a fixed record size containing
421 424 information on each revision, includings its nodeid (hash), the
422 425 nodeids of its parents, the position and offset of its data within
423 426 the data file, and the revision it's based on. Finally, each entry
424 427 contains a linkrev entry that can serve as a pointer to external
425 428 data.
426 429
427 430 The revision data itself is a linear collection of data chunks.
428 431 Each chunk represents a revision and is usually represented as a
429 432 delta against the previous chunk. To bound lookup time, runs of
430 433 deltas are limited to about 2 times the length of the original
431 434 version data. This makes retrieval of a version proportional to
432 435 its size, or O(1) relative to the number of revisions.
433 436
434 437 Both pieces of the revlog are written to in an append-only
435 438 fashion, which means we never need to rewrite a file to insert or
436 439 remove data, and can use some simple techniques to avoid the need
437 440 for locking while reading.
438 441 """
439 442 def __init__(self, opener, indexfile):
440 443 """
441 444 create a revlog object
442 445
443 446 opener is a function that abstracts the file opening operation
444 447 and can be used to implement COW semantics or the like.
445 448 """
446 449 self.indexfile = indexfile
447 450 self.datafile = indexfile[:-2] + ".d"
448 451 self.opener = opener
449 452 self._cache = None
450 453 self._chunkcache = None
451 454 self.nodemap = {nullid: nullrev}
452 455 self.index = []
453 456
454 457 v = REVLOG_DEFAULT_VERSION
455 458 if hasattr(opener, "defversion"):
456 459 v = opener.defversion
457 460 if v & REVLOGNG:
458 461 v |= REVLOGNGINLINEDATA
459 462
460 463 i = ""
461 464 try:
462 465 f = self.opener(self.indexfile)
463 466 i = f.read(4)
464 467 f.seek(0)
465 468 if len(i) > 0:
466 469 v = struct.unpack(versionformat, i)[0]
467 470 except IOError, inst:
468 471 if inst.errno != errno.ENOENT:
469 472 raise
470 473
471 474 self.version = v
472 475 self._inline = v & REVLOGNGINLINEDATA
473 476 flags = v & ~0xFFFF
474 477 fmt = v & 0xFFFF
475 478 if fmt == REVLOGV0 and flags:
476 479 raise RevlogError(_("index %s unknown flags %#04x for format v0")
477 480 % (self.indexfile, flags >> 16))
478 481 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
479 482 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
480 483 % (self.indexfile, flags >> 16))
481 484 elif fmt > REVLOGNG:
482 485 raise RevlogError(_("index %s unknown format %d")
483 486 % (self.indexfile, fmt))
484 487
485 488 self._io = revlogio()
486 489 if self.version == REVLOGV0:
487 490 self._io = revlogoldio()
488 491 if i:
489 492 d = self._io.parseindex(f, self._inline)
490 493 self.index, self.nodemap, self._chunkcache = d
491 494
492 495 # add the magic null revision at -1
493 496 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
494 497
495 498 def _loadindex(self, start, end):
496 499 """load a block of indexes all at once from the lazy parser"""
497 500 if isinstance(self.index, lazyindex):
498 501 self.index.p.loadindex(start, end)
499 502
500 503 def _loadindexmap(self):
501 504 """loads both the map and the index from the lazy parser"""
502 505 if isinstance(self.index, lazyindex):
503 506 p = self.index.p
504 507 p.loadindex()
505 508 self.nodemap = p.map
506 509
507 510 def _loadmap(self):
508 511 """loads the map from the lazy parser"""
509 512 if isinstance(self.nodemap, lazymap):
510 513 self.nodemap.p.loadmap()
511 514 self.nodemap = self.nodemap.p.map
512 515
513 516 def tip(self):
514 517 return self.node(len(self.index) - 2)
515 518 def count(self):
516 519 return len(self.index) - 1
517 520
518 521 def rev(self, node):
519 522 try:
520 523 return self.nodemap[node]
521 524 except KeyError:
522 525 raise LookupError(node, self.indexfile, _('no node'))
523 526 def node(self, rev):
524 527 return self.index[rev][7]
525 528 def linkrev(self, node):
526 529 return self.index[self.rev(node)][4]
527 530 def parents(self, node):
528 531 d = self.index[self.rev(node)][5:7]
529 532 return (self.node(d[0]), self.node(d[1]))
530 533 def parentrevs(self, rev):
531 534 return self.index[rev][5:7]
532 535 def start(self, rev):
533 536 return int(self.index[rev][0] >> 16)
534 537 def end(self, rev):
535 538 return self.start(rev) + self.length(rev)
536 539 def length(self, rev):
537 540 return self.index[rev][1]
538 541 def base(self, rev):
539 542 return self.index[rev][3]
540 543
541 544 def size(self, rev):
542 545 """return the length of the uncompressed text for a given revision"""
543 546 l = self.index[rev][2]
544 547 if l >= 0:
545 548 return l
546 549
547 550 t = self.revision(self.node(rev))
548 551 return len(t)
549 552
550 553 # alternate implementation, The advantage to this code is it
551 554 # will be faster for a single revision. But, the results are not
552 555 # cached, so finding the size of every revision will be slower.
553 556 """
554 557 if self.cache and self.cache[1] == rev:
555 558 return len(self.cache[2])
556 559
557 560 base = self.base(rev)
558 561 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
559 562 base = self.cache[1]
560 563 text = self.cache[2]
561 564 else:
562 565 text = self.revision(self.node(base))
563 566
564 567 l = len(text)
565 568 for x in xrange(base + 1, rev + 1):
566 569 l = mdiff.patchedsize(l, self.chunk(x))
567 570 return l
568 571 """
569 572
570 573 def reachable(self, node, stop=None):
571 574 """return a hash of all nodes ancestral to a given node, including
572 575 the node itself, stopping when stop is matched"""
573 576 reachable = {}
574 577 visit = [node]
575 578 reachable[node] = 1
576 579 if stop:
577 580 stopn = self.rev(stop)
578 581 else:
579 582 stopn = 0
580 583 while visit:
581 584 n = visit.pop(0)
582 585 if n == stop:
583 586 continue
584 587 if n == nullid:
585 588 continue
586 589 for p in self.parents(n):
587 590 if self.rev(p) < stopn:
588 591 continue
589 592 if p not in reachable:
590 593 reachable[p] = 1
591 594 visit.append(p)
592 595 return reachable
593 596
594 597 def nodesbetween(self, roots=None, heads=None):
595 598 """Return a tuple containing three elements. Elements 1 and 2 contain
596 599 a final list bases and heads after all the unreachable ones have been
597 600 pruned. Element 0 contains a topologically sorted list of all
598 601
599 602 nodes that satisfy these constraints:
600 603 1. All nodes must be descended from a node in roots (the nodes on
601 604 roots are considered descended from themselves).
602 605 2. All nodes must also be ancestors of a node in heads (the nodes in
603 606 heads are considered to be their own ancestors).
604 607
605 608 If roots is unspecified, nullid is assumed as the only root.
606 609 If heads is unspecified, it is taken to be the output of the
607 610 heads method (i.e. a list of all nodes in the repository that
608 611 have no children)."""
609 612 nonodes = ([], [], [])
610 613 if roots is not None:
611 614 roots = list(roots)
612 615 if not roots:
613 616 return nonodes
614 617 lowestrev = min([self.rev(n) for n in roots])
615 618 else:
616 619 roots = [nullid] # Everybody's a descendent of nullid
617 620 lowestrev = nullrev
618 621 if (lowestrev == nullrev) and (heads is None):
619 622 # We want _all_ the nodes!
620 623 return ([self.node(r) for r in xrange(0, self.count())],
621 624 [nullid], list(self.heads()))
622 625 if heads is None:
623 626 # All nodes are ancestors, so the latest ancestor is the last
624 627 # node.
625 628 highestrev = self.count() - 1
626 629 # Set ancestors to None to signal that every node is an ancestor.
627 630 ancestors = None
628 631 # Set heads to an empty dictionary for later discovery of heads
629 632 heads = {}
630 633 else:
631 634 heads = list(heads)
632 635 if not heads:
633 636 return nonodes
634 637 ancestors = {}
635 638 # Turn heads into a dictionary so we can remove 'fake' heads.
636 639 # Also, later we will be using it to filter out the heads we can't
637 640 # find from roots.
638 641 heads = dict.fromkeys(heads, 0)
639 642 # Start at the top and keep marking parents until we're done.
640 643 nodestotag = heads.keys()
641 644 # Remember where the top was so we can use it as a limit later.
642 645 highestrev = max([self.rev(n) for n in nodestotag])
643 646 while nodestotag:
644 647 # grab a node to tag
645 648 n = nodestotag.pop()
646 649 # Never tag nullid
647 650 if n == nullid:
648 651 continue
649 652 # A node's revision number represents its place in a
650 653 # topologically sorted list of nodes.
651 654 r = self.rev(n)
652 655 if r >= lowestrev:
653 656 if n not in ancestors:
654 657 # If we are possibly a descendent of one of the roots
655 658 # and we haven't already been marked as an ancestor
656 659 ancestors[n] = 1 # Mark as ancestor
657 660 # Add non-nullid parents to list of nodes to tag.
658 661 nodestotag.extend([p for p in self.parents(n) if
659 662 p != nullid])
660 663 elif n in heads: # We've seen it before, is it a fake head?
661 664 # So it is, real heads should not be the ancestors of
662 665 # any other heads.
663 666 heads.pop(n)
664 667 if not ancestors:
665 668 return nonodes
666 669 # Now that we have our set of ancestors, we want to remove any
667 670 # roots that are not ancestors.
668 671
669 672 # If one of the roots was nullid, everything is included anyway.
670 673 if lowestrev > nullrev:
671 674 # But, since we weren't, let's recompute the lowest rev to not
672 675 # include roots that aren't ancestors.
673 676
674 677 # Filter out roots that aren't ancestors of heads
675 678 roots = [n for n in roots if n in ancestors]
676 679 # Recompute the lowest revision
677 680 if roots:
678 681 lowestrev = min([self.rev(n) for n in roots])
679 682 else:
680 683 # No more roots? Return empty list
681 684 return nonodes
682 685 else:
683 686 # We are descending from nullid, and don't need to care about
684 687 # any other roots.
685 688 lowestrev = nullrev
686 689 roots = [nullid]
687 690 # Transform our roots list into a 'set' (i.e. a dictionary where the
688 691 # values don't matter.
689 692 descendents = dict.fromkeys(roots, 1)
690 693 # Also, keep the original roots so we can filter out roots that aren't
691 694 # 'real' roots (i.e. are descended from other roots).
692 695 roots = descendents.copy()
693 696 # Our topologically sorted list of output nodes.
694 697 orderedout = []
695 698 # Don't start at nullid since we don't want nullid in our output list,
696 699 # and if nullid shows up in descedents, empty parents will look like
697 700 # they're descendents.
698 701 for r in xrange(max(lowestrev, 0), highestrev + 1):
699 702 n = self.node(r)
700 703 isdescendent = False
701 704 if lowestrev == nullrev: # Everybody is a descendent of nullid
702 705 isdescendent = True
703 706 elif n in descendents:
704 707 # n is already a descendent
705 708 isdescendent = True
706 709 # This check only needs to be done here because all the roots
707 710 # will start being marked is descendents before the loop.
708 711 if n in roots:
709 712 # If n was a root, check if it's a 'real' root.
710 713 p = tuple(self.parents(n))
711 714 # If any of its parents are descendents, it's not a root.
712 715 if (p[0] in descendents) or (p[1] in descendents):
713 716 roots.pop(n)
714 717 else:
715 718 p = tuple(self.parents(n))
716 719 # A node is a descendent if either of its parents are
717 720 # descendents. (We seeded the dependents list with the roots
718 721 # up there, remember?)
719 722 if (p[0] in descendents) or (p[1] in descendents):
720 723 descendents[n] = 1
721 724 isdescendent = True
722 725 if isdescendent and ((ancestors is None) or (n in ancestors)):
723 726 # Only include nodes that are both descendents and ancestors.
724 727 orderedout.append(n)
725 728 if (ancestors is not None) and (n in heads):
726 729 # We're trying to figure out which heads are reachable
727 730 # from roots.
728 731 # Mark this head as having been reached
729 732 heads[n] = 1
730 733 elif ancestors is None:
731 734 # Otherwise, we're trying to discover the heads.
732 735 # Assume this is a head because if it isn't, the next step
733 736 # will eventually remove it.
734 737 heads[n] = 1
735 738 # But, obviously its parents aren't.
736 739 for p in self.parents(n):
737 740 heads.pop(p, None)
738 741 heads = [n for n in heads.iterkeys() if heads[n] != 0]
739 742 roots = roots.keys()
740 743 assert orderedout
741 744 assert roots
742 745 assert heads
743 746 return (orderedout, roots, heads)
744 747
745 748 def heads(self, start=None, stop=None):
746 749 """return the list of all nodes that have no children
747 750
748 751 if start is specified, only heads that are descendants of
749 752 start will be returned
750 753 if stop is specified, it will consider all the revs from stop
751 754 as if they had no children
752 755 """
753 756 if start is None and stop is None:
754 757 count = self.count()
755 758 if not count:
756 759 return [nullid]
757 760 ishead = [1] * (count + 1)
758 761 index = self.index
759 762 for r in xrange(count):
760 763 e = index[r]
761 764 ishead[e[5]] = ishead[e[6]] = 0
762 765 return [self.node(r) for r in xrange(count) if ishead[r]]
763 766
764 767 if start is None:
765 768 start = nullid
766 769 if stop is None:
767 770 stop = []
768 771 stoprevs = dict.fromkeys([self.rev(n) for n in stop])
769 772 startrev = self.rev(start)
770 773 reachable = {startrev: 1}
771 774 heads = {startrev: 1}
772 775
773 776 parentrevs = self.parentrevs
774 777 for r in xrange(startrev + 1, self.count()):
775 778 for p in parentrevs(r):
776 779 if p in reachable:
777 780 if r not in stoprevs:
778 781 reachable[r] = 1
779 782 heads[r] = 1
780 783 if p in heads and p not in stoprevs:
781 784 del heads[p]
782 785
783 786 return [self.node(r) for r in heads]
784 787
785 788 def children(self, node):
786 789 """find the children of a given node"""
787 790 c = []
788 791 p = self.rev(node)
789 792 for r in range(p + 1, self.count()):
790 793 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
791 794 if prevs:
792 795 for pr in prevs:
793 796 if pr == p:
794 797 c.append(self.node(r))
795 798 elif p == nullrev:
796 799 c.append(self.node(r))
797 800 return c
798 801
799 802 def _match(self, id):
800 803 if isinstance(id, (long, int)):
801 804 # rev
802 805 return self.node(id)
803 806 if len(id) == 20:
804 807 # possibly a binary node
805 808 # odds of a binary node being all hex in ASCII are 1 in 10**25
806 809 try:
807 810 node = id
808 811 r = self.rev(node) # quick search the index
809 812 return node
810 813 except LookupError:
811 814 pass # may be partial hex id
812 815 try:
813 816 # str(rev)
814 817 rev = int(id)
815 818 if str(rev) != id:
816 819 raise ValueError
817 820 if rev < 0:
818 821 rev = self.count() + rev
819 822 if rev < 0 or rev >= self.count():
820 823 raise ValueError
821 824 return self.node(rev)
822 825 except (ValueError, OverflowError):
823 826 pass
824 827 if len(id) == 40:
825 828 try:
826 829 # a full hex nodeid?
827 830 node = bin(id)
828 831 r = self.rev(node)
829 832 return node
830 833 except TypeError:
831 834 pass
832 835
833 836 def _partialmatch(self, id):
834 837 if len(id) < 40:
835 838 try:
836 839 # hex(node)[:...]
837 840 bin_id = bin(id[:len(id) & ~1]) # grab an even number of digits
838 841 node = None
839 842 for n in self.nodemap:
840 843 if n.startswith(bin_id) and hex(n).startswith(id):
841 844 if node is not None:
842 845 raise LookupError(id, self.indexfile,
843 846 _('ambiguous identifier'))
844 847 node = n
845 848 if node is not None:
846 849 return node
847 850 except TypeError:
848 851 pass
849 852
850 853 def lookup(self, id):
851 854 """locate a node based on:
852 855 - revision number or str(revision number)
853 856 - nodeid or subset of hex nodeid
854 857 """
855 858 n = self._match(id)
856 859 if n is not None:
857 860 return n
858 861 n = self._partialmatch(id)
859 862 if n:
860 863 return n
861 864
862 865 raise LookupError(id, self.indexfile, _('no match found'))
863 866
864 867 def cmp(self, node, text):
865 868 """compare text with a given file revision"""
866 869 p1, p2 = self.parents(node)
867 870 return hash(text, p1, p2) != node
868 871
869 872 def chunk(self, rev, df=None):
870 873 def loadcache(df):
871 874 if not df:
872 875 if self._inline:
873 876 df = self.opener(self.indexfile)
874 877 else:
875 878 df = self.opener(self.datafile)
876 879 df.seek(start)
877 880 self._chunkcache = (start, df.read(cache_length))
878 881
879 882 start, length = self.start(rev), self.length(rev)
880 883 if self._inline:
881 884 start += (rev + 1) * self._io.size
882 885 end = start + length
883 886
884 887 offset = 0
885 888 if not self._chunkcache:
886 889 cache_length = max(65536, length)
887 890 loadcache(df)
888 891 else:
889 892 cache_start = self._chunkcache[0]
890 893 cache_length = len(self._chunkcache[1])
891 894 cache_end = cache_start + cache_length
892 895 if start >= cache_start and end <= cache_end:
893 896 # it is cached
894 897 offset = start - cache_start
895 898 else:
896 899 cache_length = max(65536, length)
897 900 loadcache(df)
898 901
899 902 # avoid copying large chunks
900 903 c = self._chunkcache[1]
901 904 if cache_length != length:
902 905 c = c[offset:offset + length]
903 906
904 907 return decompress(c)
905 908
906 909 def delta(self, node):
907 910 """return or calculate a delta between a node and its predecessor"""
908 911 r = self.rev(node)
909 912 return self.revdiff(r - 1, r)
910 913
911 914 def revdiff(self, rev1, rev2):
912 915 """return or calculate a delta between two revisions"""
913 916 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
914 917 return self.chunk(rev2)
915 918
916 919 return mdiff.textdiff(self.revision(self.node(rev1)),
917 920 self.revision(self.node(rev2)))
918 921
919 922 def revision(self, node):
920 923 """return an uncompressed revision of a given"""
921 924 if node == nullid:
922 925 return ""
923 926 if self._cache and self._cache[0] == node:
924 927 return str(self._cache[2])
925 928
926 929 # look up what we need to read
927 930 text = None
928 931 rev = self.rev(node)
929 932 base = self.base(rev)
930 933
931 934 # check rev flags
932 935 if self.index[rev][0] & 0xFFFF:
933 936 raise RevlogError(_('incompatible revision flag %x') %
934 937 (self.index[rev][0] & 0xFFFF))
935 938
936 939 df = None
937 940
938 941 # do we have useful data cached?
939 942 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
940 943 base = self._cache[1]
941 944 text = str(self._cache[2])
942 945 self._loadindex(base, rev + 1)
943 946 if not self._inline and rev > base + 1:
944 947 df = self.opener(self.datafile)
945 948 else:
946 949 self._loadindex(base, rev + 1)
947 950 if not self._inline and rev > base:
948 951 df = self.opener(self.datafile)
949 952 text = self.chunk(base, df=df)
950 953
951 954 bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]
952 955 text = mdiff.patches(text, bins)
953 956 p1, p2 = self.parents(node)
954 957 if node != hash(text, p1, p2):
955 958 raise RevlogError(_("integrity check failed on %s:%d")
956 959 % (self.datafile, rev))
957 960
958 961 self._cache = (node, rev, text)
959 962 return text
960 963
961 964 def checkinlinesize(self, tr, fp=None):
962 965 if not self._inline:
963 966 return
964 967 if not fp:
965 968 fp = self.opener(self.indexfile, 'r')
966 969 fp.seek(0, 2)
967 970 size = fp.tell()
968 971 if size < 131072:
969 972 return
970 973 trinfo = tr.find(self.indexfile)
971 974 if trinfo == None:
972 975 raise RevlogError(_("%s not found in the transaction")
973 976 % self.indexfile)
974 977
975 978 trindex = trinfo[2]
976 979 dataoff = self.start(trindex)
977 980
978 981 tr.add(self.datafile, dataoff)
979 982 df = self.opener(self.datafile, 'w')
980 983 try:
981 984 calc = self._io.size
982 985 for r in xrange(self.count()):
983 986 start = self.start(r) + (r + 1) * calc
984 987 length = self.length(r)
985 988 fp.seek(start)
986 989 d = fp.read(length)
987 990 df.write(d)
988 991 finally:
989 992 df.close()
990 993
991 994 fp.close()
992 995 fp = self.opener(self.indexfile, 'w', atomictemp=True)
993 996 self.version &= ~(REVLOGNGINLINEDATA)
994 997 self._inline = False
995 998 for i in xrange(self.count()):
996 999 e = self._io.packentry(self.index[i], self.node, self.version, i)
997 1000 fp.write(e)
998 1001
999 1002 # if we don't call rename, the temp file will never replace the
1000 1003 # real index
1001 1004 fp.rename()
1002 1005
1003 1006 tr.replace(self.indexfile, trindex * calc)
1004 1007 self._chunkcache = None
1005 1008
1006 1009 def addrevision(self, text, transaction, link, p1, p2, d=None):
1007 1010 """add a revision to the log
1008 1011
1009 1012 text - the revision data to add
1010 1013 transaction - the transaction object used for rollback
1011 1014 link - the linkrev data to add
1012 1015 p1, p2 - the parent nodeids of the revision
1013 1016 d - an optional precomputed delta
1014 1017 """
1015 1018 dfh = None
1016 1019 if not self._inline:
1017 1020 dfh = self.opener(self.datafile, "a")
1018 1021 ifh = self.opener(self.indexfile, "a+")
1019 1022 try:
1020 1023 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1021 1024 finally:
1022 1025 if dfh:
1023 1026 dfh.close()
1024 1027 ifh.close()
1025 1028
1026 1029 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1027 1030 node = hash(text, p1, p2)
1028 1031 if node in self.nodemap:
1029 1032 return node
1030 1033
1031 1034 curr = self.count()
1032 1035 prev = curr - 1
1033 1036 base = self.base(prev)
1034 1037 offset = self.end(prev)
1035 1038
1036 1039 if curr:
1037 1040 if not d:
1038 1041 ptext = self.revision(self.node(prev))
1039 1042 d = mdiff.textdiff(ptext, text)
1040 1043 data = compress(d)
1041 1044 l = len(data[1]) + len(data[0])
1042 1045 dist = l + offset - self.start(base)
1043 1046
1044 1047 # full versions are inserted when the needed deltas
1045 1048 # become comparable to the uncompressed text
1046 1049 if not curr or dist > len(text) * 2:
1047 1050 data = compress(text)
1048 1051 l = len(data[1]) + len(data[0])
1049 1052 base = curr
1050 1053
1051 1054 e = (offset_type(offset, 0), l, len(text),
1052 1055 base, link, self.rev(p1), self.rev(p2), node)
1053 1056 self.index.insert(-1, e)
1054 1057 self.nodemap[node] = curr
1055 1058
1056 1059 entry = self._io.packentry(e, self.node, self.version, curr)
1057 1060 if not self._inline:
1058 1061 transaction.add(self.datafile, offset)
1059 1062 transaction.add(self.indexfile, curr * len(entry))
1060 1063 if data[0]:
1061 1064 dfh.write(data[0])
1062 1065 dfh.write(data[1])
1063 1066 dfh.flush()
1064 1067 ifh.write(entry)
1065 1068 else:
1066 1069 offset += curr * self._io.size
1067 1070 transaction.add(self.indexfile, offset, curr)
1068 1071 ifh.write(entry)
1069 1072 ifh.write(data[0])
1070 1073 ifh.write(data[1])
1071 1074 self.checkinlinesize(transaction, ifh)
1072 1075
1073 1076 self._cache = (node, curr, text)
1074 1077 return node
1075 1078
1076 1079 def ancestor(self, a, b):
1077 1080 """calculate the least common ancestor of nodes a and b"""
1078 1081
1079 1082 def parents(rev):
1080 1083 return [p for p in self.parentrevs(rev) if p != nullrev]
1081 1084
1082 1085 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1083 1086 if c is None:
1084 1087 return nullid
1085 1088
1086 1089 return self.node(c)
1087 1090
1088 1091 def group(self, nodelist, lookup, infocollect=None):
1089 1092 """calculate a delta group
1090 1093
1091 1094 Given a list of changeset revs, return a set of deltas and
1092 1095 metadata corresponding to nodes. the first delta is
1093 1096 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1094 1097 have this parent as it has all history before these
1095 1098 changesets. parent is parent[0]
1096 1099 """
1097 1100 revs = [self.rev(n) for n in nodelist]
1098 1101
1099 1102 # if we don't have any revisions touched by these changesets, bail
1100 1103 if not revs:
1101 1104 yield changegroup.closechunk()
1102 1105 return
1103 1106
1104 1107 # add the parent of the first rev
1105 1108 p = self.parents(self.node(revs[0]))[0]
1106 1109 revs.insert(0, self.rev(p))
1107 1110
1108 1111 # build deltas
1109 1112 for d in xrange(0, len(revs) - 1):
1110 1113 a, b = revs[d], revs[d + 1]
1111 1114 nb = self.node(b)
1112 1115
1113 1116 if infocollect is not None:
1114 1117 infocollect(nb)
1115 1118
1116 1119 p = self.parents(nb)
1117 1120 meta = nb + p[0] + p[1] + lookup(nb)
1118 1121 if a == -1:
1119 1122 d = self.revision(nb)
1120 1123 meta += mdiff.trivialdiffheader(len(d))
1121 1124 else:
1122 1125 d = self.revdiff(a, b)
1123 1126 yield changegroup.chunkheader(len(meta) + len(d))
1124 1127 yield meta
1125 1128 if len(d) > 2**20:
1126 1129 pos = 0
1127 1130 while pos < len(d):
1128 1131 pos2 = pos + 2 ** 18
1129 1132 yield d[pos:pos2]
1130 1133 pos = pos2
1131 1134 else:
1132 1135 yield d
1133 1136
1134 1137 yield changegroup.closechunk()
1135 1138
1136 1139 def addgroup(self, revs, linkmapper, transaction):
1137 1140 """
1138 1141 add a delta group
1139 1142
1140 1143 given a set of deltas, add them to the revision log. the
1141 1144 first delta is against its parent, which should be in our
1142 1145 log, the rest are against the previous delta.
1143 1146 """
1144 1147
1145 1148 #track the base of the current delta log
1146 1149 r = self.count()
1147 1150 t = r - 1
1148 1151 node = None
1149 1152
1150 1153 base = prev = nullrev
1151 1154 start = end = textlen = 0
1152 1155 if r:
1153 1156 end = self.end(t)
1154 1157
1155 1158 ifh = self.opener(self.indexfile, "a+")
1156 1159 isize = r * self._io.size
1157 1160 if self._inline:
1158 1161 transaction.add(self.indexfile, end + isize, r)
1159 1162 dfh = None
1160 1163 else:
1161 1164 transaction.add(self.indexfile, isize, r)
1162 1165 transaction.add(self.datafile, end)
1163 1166 dfh = self.opener(self.datafile, "a")
1164 1167
1165 1168 try:
1166 1169 # loop through our set of deltas
1167 1170 chain = None
1168 1171 for chunk in revs:
1169 1172 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1170 1173 link = linkmapper(cs)
1171 1174 if node in self.nodemap:
1172 1175 # this can happen if two branches make the same change
1173 1176 chain = node
1174 1177 continue
1175 1178 delta = buffer(chunk, 80)
1176 1179 del chunk
1177 1180
1178 1181 for p in (p1, p2):
1179 1182 if not p in self.nodemap:
1180 1183 raise LookupError(p, self.indexfile, _('unknown parent'))
1181 1184
1182 1185 if not chain:
1183 1186 # retrieve the parent revision of the delta chain
1184 1187 chain = p1
1185 1188 if not chain in self.nodemap:
1186 1189 raise LookupError(chain, self.indexfile, _('unknown base'))
1187 1190
1188 1191 # full versions are inserted when the needed deltas become
1189 1192 # comparable to the uncompressed text or when the previous
1190 1193 # version is not the one we have a delta against. We use
1191 1194 # the size of the previous full rev as a proxy for the
1192 1195 # current size.
1193 1196
1194 1197 if chain == prev:
1195 1198 cdelta = compress(delta)
1196 1199 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1197 1200 textlen = mdiff.patchedsize(textlen, delta)
1198 1201
1199 1202 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1200 1203 # flush our writes here so we can read it in revision
1201 1204 if dfh:
1202 1205 dfh.flush()
1203 1206 ifh.flush()
1204 1207 text = self.revision(chain)
1205 1208 if len(text) == 0:
1206 1209 # skip over trivial delta header
1207 1210 text = buffer(delta, 12)
1208 1211 else:
1209 1212 text = mdiff.patches(text, [delta])
1210 1213 del delta
1211 1214 chk = self._addrevision(text, transaction, link, p1, p2, None,
1212 1215 ifh, dfh)
1213 1216 if not dfh and not self._inline:
1214 1217 # addrevision switched from inline to conventional
1215 1218 # reopen the index
1216 1219 dfh = self.opener(self.datafile, "a")
1217 1220 ifh = self.opener(self.indexfile, "a")
1218 1221 if chk != node:
1219 1222 raise RevlogError(_("consistency error adding group"))
1220 1223 textlen = len(text)
1221 1224 else:
1222 1225 e = (offset_type(end, 0), cdeltalen, textlen, base,
1223 1226 link, self.rev(p1), self.rev(p2), node)
1224 1227 self.index.insert(-1, e)
1225 1228 self.nodemap[node] = r
1226 1229 entry = self._io.packentry(e, self.node, self.version, r)
1227 1230 if self._inline:
1228 1231 ifh.write(entry)
1229 1232 ifh.write(cdelta[0])
1230 1233 ifh.write(cdelta[1])
1231 1234 self.checkinlinesize(transaction, ifh)
1232 1235 if not self._inline:
1233 1236 dfh = self.opener(self.datafile, "a")
1234 1237 ifh = self.opener(self.indexfile, "a")
1235 1238 else:
1236 1239 dfh.write(cdelta[0])
1237 1240 dfh.write(cdelta[1])
1238 1241 ifh.write(entry)
1239 1242
1240 1243 t, r, chain, prev = r, r + 1, node, node
1241 1244 base = self.base(t)
1242 1245 start = self.start(base)
1243 1246 end = self.end(t)
1244 1247 finally:
1245 1248 if dfh:
1246 1249 dfh.close()
1247 1250 ifh.close()
1248 1251
1249 1252 return node
1250 1253
1251 1254 def strip(self, minlink):
1252 1255 """truncate the revlog on the first revision with a linkrev >= minlink
1253 1256
1254 1257 This function is called when we're stripping revision minlink and
1255 1258 its descendants from the repository.
1256 1259
1257 1260 We have to remove all revisions with linkrev >= minlink, because
1258 1261 the equivalent changelog revisions will be renumbered after the
1259 1262 strip.
1260 1263
1261 1264 So we truncate the revlog on the first of these revisions, and
1262 1265 trust that the caller has saved the revisions that shouldn't be
1263 1266 removed and that it'll readd them after this truncation.
1264 1267 """
1265 1268 if self.count() == 0:
1266 1269 return
1267 1270
1268 1271 if isinstance(self.index, lazyindex):
1269 1272 self._loadindexmap()
1270 1273
1271 1274 for rev in xrange(0, self.count()):
1272 1275 if self.index[rev][4] >= minlink:
1273 1276 break
1274 1277 else:
1275 1278 return
1276 1279
1277 1280 # first truncate the files on disk
1278 1281 end = self.start(rev)
1279 1282 if not self._inline:
1280 1283 df = self.opener(self.datafile, "a")
1281 1284 df.truncate(end)
1282 1285 end = rev * self._io.size
1283 1286 else:
1284 1287 end += rev * self._io.size
1285 1288
1286 1289 indexf = self.opener(self.indexfile, "a")
1287 1290 indexf.truncate(end)
1288 1291
1289 1292 # then reset internal state in memory to forget those revisions
1290 1293 self._cache = None
1291 1294 self._chunkcache = None
1292 1295 for x in xrange(rev, self.count()):
1293 1296 del self.nodemap[self.node(x)]
1294 1297
1295 1298 del self.index[rev:-1]
1296 1299
1297 1300 def checksize(self):
1298 1301 expected = 0
1299 1302 if self.count():
1300 1303 expected = max(0, self.end(self.count() - 1))
1301 1304
1302 1305 try:
1303 1306 f = self.opener(self.datafile)
1304 1307 f.seek(0, 2)
1305 1308 actual = f.tell()
1306 1309 dd = actual - expected
1307 1310 except IOError, inst:
1308 1311 if inst.errno != errno.ENOENT:
1309 1312 raise
1310 1313 dd = 0
1311 1314
1312 1315 try:
1313 1316 f = self.opener(self.indexfile)
1314 1317 f.seek(0, 2)
1315 1318 actual = f.tell()
1316 1319 s = self._io.size
1317 1320 i = max(0, actual / s)
1318 1321 di = actual - (i * s)
1319 1322 if self._inline:
1320 1323 databytes = 0
1321 1324 for r in xrange(self.count()):
1322 1325 databytes += max(0, self.length(r))
1323 1326 dd = 0
1324 1327 di = actual - self.count() * s - databytes
1325 1328 except IOError, inst:
1326 1329 if inst.errno != errno.ENOENT:
1327 1330 raise
1328 1331 di = 0
1329 1332
1330 1333 return (dd, di)
General Comments 0
You need to be logged in to leave comments. Login now