##// END OF EJS Templates
revlog.revision: avoid opening the datafile without need....
Alexis S. L. Carvalho -
r6144:08e0825b default
parent child Browse files
Show More
@@ -1,1319 +1,1319 b''
1 1 """
2 2 revlog.py - storage back-end for mercurial
3 3
4 4 This provides efficient delta storage with O(1) retrieve and append
5 5 and O(changes) merge between branches
6 6
7 7 Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
8 8
9 9 This software may be used and distributed according to the terms
10 10 of the GNU General Public License, incorporated herein by reference.
11 11 """
12 12
13 13 from node import *
14 14 from i18n import _
15 15 import binascii, changegroup, errno, ancestor, mdiff, os
16 16 import sha, struct, util, zlib
17 17
18 18 _pack = struct.pack
19 19 _unpack = struct.unpack
20 20 _compress = zlib.compress
21 21 _decompress = zlib.decompress
22 22 _sha = sha.new
23 23
24 24 # revlog flags
25 25 REVLOGV0 = 0
26 26 REVLOGNG = 1
27 27 REVLOGNGINLINEDATA = (1 << 16)
28 28 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
29 29 REVLOG_DEFAULT_FORMAT = REVLOGNG
30 30 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
31 31
32 32 class RevlogError(Exception):
33 33 pass
34 34
35 35 class LookupError(RevlogError):
36 36 def __init__(self, name, message=None):
37 37 if message is None:
38 38 message = _('not found: %s') % name
39 39 RevlogError.__init__(self, message)
40 40 self.name = name
41 41
42 42 def getoffset(q):
43 43 return int(q >> 16)
44 44
45 45 def gettype(q):
46 46 return int(q & 0xFFFF)
47 47
48 48 def offset_type(offset, type):
49 49 return long(long(offset) << 16 | type)
50 50
51 51 def hash(text, p1, p2):
52 52 """generate a hash from the given text and its parent hashes
53 53
54 54 This hash combines both the current file contents and its history
55 55 in a manner that makes it easy to distinguish nodes with the same
56 56 content in the revision graph.
57 57 """
58 58 l = [p1, p2]
59 59 l.sort()
60 60 s = _sha(l[0])
61 61 s.update(l[1])
62 62 s.update(text)
63 63 return s.digest()
64 64
65 65 def compress(text):
66 66 """ generate a possibly-compressed representation of text """
67 67 if not text:
68 68 return ("", text)
69 69 l = len(text)
70 70 bin = None
71 71 if l < 44:
72 72 pass
73 73 elif l > 1000000:
74 74 # zlib makes an internal copy, thus doubling memory usage for
75 75 # large files, so lets do this in pieces
76 76 z = zlib.compressobj()
77 77 p = []
78 78 pos = 0
79 79 while pos < l:
80 80 pos2 = pos + 2**20
81 81 p.append(z.compress(text[pos:pos2]))
82 82 pos = pos2
83 83 p.append(z.flush())
84 84 if sum(map(len, p)) < l:
85 85 bin = "".join(p)
86 86 else:
87 87 bin = _compress(text)
88 88 if bin is None or len(bin) > l:
89 89 if text[0] == '\0':
90 90 return ("", text)
91 91 return ('u', text)
92 92 return ("", bin)
93 93
94 94 def decompress(bin):
95 95 """ decompress the given input """
96 96 if not bin:
97 97 return bin
98 98 t = bin[0]
99 99 if t == '\0':
100 100 return bin
101 101 if t == 'x':
102 102 return _decompress(bin)
103 103 if t == 'u':
104 104 return bin[1:]
105 105 raise RevlogError(_("unknown compression type %r") % t)
106 106
107 107 class lazyparser(object):
108 108 """
109 109 this class avoids the need to parse the entirety of large indices
110 110 """
111 111
112 112 # lazyparser is not safe to use on windows if win32 extensions not
113 113 # available. it keeps file handle open, which make it not possible
114 114 # to break hardlinks on local cloned repos.
115 115
116 116 def __init__(self, dataf, size):
117 117 self.dataf = dataf
118 118 self.s = struct.calcsize(indexformatng)
119 119 self.datasize = size
120 120 self.l = size/self.s
121 121 self.index = [None] * self.l
122 122 self.map = {nullid: nullrev}
123 123 self.allmap = 0
124 124 self.all = 0
125 125 self.mapfind_count = 0
126 126
127 127 def loadmap(self):
128 128 """
129 129 during a commit, we need to make sure the rev being added is
130 130 not a duplicate. This requires loading the entire index,
131 131 which is fairly slow. loadmap can load up just the node map,
132 132 which takes much less time.
133 133 """
134 134 if self.allmap:
135 135 return
136 136 end = self.datasize
137 137 self.allmap = 1
138 138 cur = 0
139 139 count = 0
140 140 blocksize = self.s * 256
141 141 self.dataf.seek(0)
142 142 while cur < end:
143 143 data = self.dataf.read(blocksize)
144 144 off = 0
145 145 for x in xrange(256):
146 146 n = data[off + ngshaoffset:off + ngshaoffset + 20]
147 147 self.map[n] = count
148 148 count += 1
149 149 if count >= self.l:
150 150 break
151 151 off += self.s
152 152 cur += blocksize
153 153
154 154 def loadblock(self, blockstart, blocksize, data=None):
155 155 if self.all:
156 156 return
157 157 if data is None:
158 158 self.dataf.seek(blockstart)
159 159 if blockstart + blocksize > self.datasize:
160 160 # the revlog may have grown since we've started running,
161 161 # but we don't have space in self.index for more entries.
162 162 # limit blocksize so that we don't get too much data.
163 163 blocksize = max(self.datasize - blockstart, 0)
164 164 data = self.dataf.read(blocksize)
165 165 lend = len(data) / self.s
166 166 i = blockstart / self.s
167 167 off = 0
168 168 # lazyindex supports __delitem__
169 169 if lend > len(self.index) - i:
170 170 lend = len(self.index) - i
171 171 for x in xrange(lend):
172 172 if self.index[i + x] == None:
173 173 b = data[off : off + self.s]
174 174 self.index[i + x] = b
175 175 n = b[ngshaoffset:ngshaoffset + 20]
176 176 self.map[n] = i + x
177 177 off += self.s
178 178
179 179 def findnode(self, node):
180 180 """search backwards through the index file for a specific node"""
181 181 if self.allmap:
182 182 return None
183 183
184 184 # hg log will cause many many searches for the manifest
185 185 # nodes. After we get called a few times, just load the whole
186 186 # thing.
187 187 if self.mapfind_count > 8:
188 188 self.loadmap()
189 189 if node in self.map:
190 190 return node
191 191 return None
192 192 self.mapfind_count += 1
193 193 last = self.l - 1
194 194 while self.index[last] != None:
195 195 if last == 0:
196 196 self.all = 1
197 197 self.allmap = 1
198 198 return None
199 199 last -= 1
200 200 end = (last + 1) * self.s
201 201 blocksize = self.s * 256
202 202 while end >= 0:
203 203 start = max(end - blocksize, 0)
204 204 self.dataf.seek(start)
205 205 data = self.dataf.read(end - start)
206 206 findend = end - start
207 207 while True:
208 208 # we're searching backwards, so we have to make sure
209 209 # we don't find a changeset where this node is a parent
210 210 off = data.find(node, 0, findend)
211 211 findend = off
212 212 if off >= 0:
213 213 i = off / self.s
214 214 off = i * self.s
215 215 n = data[off + ngshaoffset:off + ngshaoffset + 20]
216 216 if n == node:
217 217 self.map[n] = i + start / self.s
218 218 return node
219 219 else:
220 220 break
221 221 end -= blocksize
222 222 return None
223 223
224 224 def loadindex(self, i=None, end=None):
225 225 if self.all:
226 226 return
227 227 all = False
228 228 if i == None:
229 229 blockstart = 0
230 230 blocksize = (65536 / self.s) * self.s
231 231 end = self.datasize
232 232 all = True
233 233 else:
234 234 if end:
235 235 blockstart = i * self.s
236 236 end = end * self.s
237 237 blocksize = end - blockstart
238 238 else:
239 239 blockstart = (i & ~1023) * self.s
240 240 blocksize = self.s * 1024
241 241 end = blockstart + blocksize
242 242 while blockstart < end:
243 243 self.loadblock(blockstart, blocksize)
244 244 blockstart += blocksize
245 245 if all:
246 246 self.all = True
247 247
248 248 class lazyindex(object):
249 249 """a lazy version of the index array"""
250 250 def __init__(self, parser):
251 251 self.p = parser
252 252 def __len__(self):
253 253 return len(self.p.index)
254 254 def load(self, pos):
255 255 if pos < 0:
256 256 pos += len(self.p.index)
257 257 self.p.loadindex(pos)
258 258 return self.p.index[pos]
259 259 def __getitem__(self, pos):
260 260 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
261 261 def __setitem__(self, pos, item):
262 262 self.p.index[pos] = _pack(indexformatng, *item)
263 263 def __delitem__(self, pos):
264 264 del self.p.index[pos]
265 265 def insert(self, pos, e):
266 266 self.p.index.insert(pos, _pack(indexformatng, *e))
267 267 def append(self, e):
268 268 self.p.index.append(_pack(indexformatng, *e))
269 269
270 270 class lazymap(object):
271 271 """a lazy version of the node map"""
272 272 def __init__(self, parser):
273 273 self.p = parser
274 274 def load(self, key):
275 275 n = self.p.findnode(key)
276 276 if n == None:
277 277 raise KeyError(key)
278 278 def __contains__(self, key):
279 279 if key in self.p.map:
280 280 return True
281 281 self.p.loadmap()
282 282 return key in self.p.map
283 283 def __iter__(self):
284 284 yield nullid
285 285 for i in xrange(self.p.l):
286 286 ret = self.p.index[i]
287 287 if not ret:
288 288 self.p.loadindex(i)
289 289 ret = self.p.index[i]
290 290 if isinstance(ret, str):
291 291 ret = _unpack(indexformatng, ret)
292 292 yield ret[7]
293 293 def __getitem__(self, key):
294 294 try:
295 295 return self.p.map[key]
296 296 except KeyError:
297 297 try:
298 298 self.load(key)
299 299 return self.p.map[key]
300 300 except KeyError:
301 301 raise KeyError("node " + hex(key))
302 302 def __setitem__(self, key, val):
303 303 self.p.map[key] = val
304 304 def __delitem__(self, key):
305 305 del self.p.map[key]
306 306
307 307 indexformatv0 = ">4l20s20s20s"
308 308 v0shaoffset = 56
309 309
310 310 class revlogoldio(object):
311 311 def __init__(self):
312 312 self.size = struct.calcsize(indexformatv0)
313 313
314 314 def parseindex(self, fp, inline):
315 315 s = self.size
316 316 index = []
317 317 nodemap = {nullid: nullrev}
318 318 n = off = 0
319 319 data = fp.read()
320 320 l = len(data)
321 321 while off + s <= l:
322 322 cur = data[off:off + s]
323 323 off += s
324 324 e = _unpack(indexformatv0, cur)
325 325 # transform to revlogv1 format
326 326 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
327 327 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
328 328 index.append(e2)
329 329 nodemap[e[6]] = n
330 330 n += 1
331 331
332 332 return index, nodemap, None
333 333
334 334 def packentry(self, entry, node, version, rev):
335 335 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
336 336 node(entry[5]), node(entry[6]), entry[7])
337 337 return _pack(indexformatv0, *e2)
338 338
339 339 # index ng:
340 340 # 6 bytes offset
341 341 # 2 bytes flags
342 342 # 4 bytes compressed length
343 343 # 4 bytes uncompressed length
344 344 # 4 bytes: base rev
345 345 # 4 bytes link rev
346 346 # 4 bytes parent 1 rev
347 347 # 4 bytes parent 2 rev
348 348 # 32 bytes: nodeid
349 349 indexformatng = ">Qiiiiii20s12x"
350 350 ngshaoffset = 32
351 351 versionformat = ">I"
352 352
353 353 class revlogio(object):
354 354 def __init__(self):
355 355 self.size = struct.calcsize(indexformatng)
356 356
357 357 def parseindex(self, fp, inline):
358 358 try:
359 359 size = util.fstat(fp).st_size
360 360 except AttributeError:
361 361 size = 0
362 362
363 363 if util.openhardlinks() and not inline and size > 1000000:
364 364 # big index, let's parse it on demand
365 365 parser = lazyparser(fp, size)
366 366 index = lazyindex(parser)
367 367 nodemap = lazymap(parser)
368 368 e = list(index[0])
369 369 type = gettype(e[0])
370 370 e[0] = offset_type(0, type)
371 371 index[0] = e
372 372 return index, nodemap, None
373 373
374 374 s = self.size
375 375 cache = None
376 376 index = []
377 377 nodemap = {nullid: nullrev}
378 378 n = off = 0
379 379 # if we're not using lazymap, always read the whole index
380 380 data = fp.read()
381 381 l = len(data) - s
382 382 append = index.append
383 383 if inline:
384 384 cache = (0, data)
385 385 while off <= l:
386 386 e = _unpack(indexformatng, data[off:off + s])
387 387 nodemap[e[7]] = n
388 388 append(e)
389 389 n += 1
390 390 if e[1] < 0:
391 391 break
392 392 off += e[1] + s
393 393 else:
394 394 while off <= l:
395 395 e = _unpack(indexformatng, data[off:off + s])
396 396 nodemap[e[7]] = n
397 397 append(e)
398 398 n += 1
399 399 off += s
400 400
401 401 e = list(index[0])
402 402 type = gettype(e[0])
403 403 e[0] = offset_type(0, type)
404 404 index[0] = e
405 405
406 406 return index, nodemap, cache
407 407
408 408 def packentry(self, entry, node, version, rev):
409 409 p = _pack(indexformatng, *entry)
410 410 if rev == 0:
411 411 p = _pack(versionformat, version) + p[4:]
412 412 return p
413 413
414 414 class revlog(object):
415 415 """
416 416 the underlying revision storage object
417 417
418 418 A revlog consists of two parts, an index and the revision data.
419 419
420 420 The index is a file with a fixed record size containing
421 421 information on each revision, includings its nodeid (hash), the
422 422 nodeids of its parents, the position and offset of its data within
423 423 the data file, and the revision it's based on. Finally, each entry
424 424 contains a linkrev entry that can serve as a pointer to external
425 425 data.
426 426
427 427 The revision data itself is a linear collection of data chunks.
428 428 Each chunk represents a revision and is usually represented as a
429 429 delta against the previous chunk. To bound lookup time, runs of
430 430 deltas are limited to about 2 times the length of the original
431 431 version data. This makes retrieval of a version proportional to
432 432 its size, or O(1) relative to the number of revisions.
433 433
434 434 Both pieces of the revlog are written to in an append-only
435 435 fashion, which means we never need to rewrite a file to insert or
436 436 remove data, and can use some simple techniques to avoid the need
437 437 for locking while reading.
438 438 """
439 439 def __init__(self, opener, indexfile):
440 440 """
441 441 create a revlog object
442 442
443 443 opener is a function that abstracts the file opening operation
444 444 and can be used to implement COW semantics or the like.
445 445 """
446 446 self.indexfile = indexfile
447 447 self.datafile = indexfile[:-2] + ".d"
448 448 self.opener = opener
449 449 self._cache = None
450 450 self._chunkcache = None
451 451 self.nodemap = {nullid: nullrev}
452 452 self.index = []
453 453
454 454 v = REVLOG_DEFAULT_VERSION
455 455 if hasattr(opener, "defversion"):
456 456 v = opener.defversion
457 457 if v & REVLOGNG:
458 458 v |= REVLOGNGINLINEDATA
459 459
460 460 i = ""
461 461 try:
462 462 f = self.opener(self.indexfile)
463 463 i = f.read(4)
464 464 f.seek(0)
465 465 if len(i) > 0:
466 466 v = struct.unpack(versionformat, i)[0]
467 467 except IOError, inst:
468 468 if inst.errno != errno.ENOENT:
469 469 raise
470 470
471 471 self.version = v
472 472 self._inline = v & REVLOGNGINLINEDATA
473 473 flags = v & ~0xFFFF
474 474 fmt = v & 0xFFFF
475 475 if fmt == REVLOGV0 and flags:
476 476 raise RevlogError(_("index %s unknown flags %#04x for format v0")
477 477 % (self.indexfile, flags >> 16))
478 478 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
479 479 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
480 480 % (self.indexfile, flags >> 16))
481 481 elif fmt > REVLOGNG:
482 482 raise RevlogError(_("index %s unknown format %d")
483 483 % (self.indexfile, fmt))
484 484
485 485 self._io = revlogio()
486 486 if self.version == REVLOGV0:
487 487 self._io = revlogoldio()
488 488 if i:
489 489 d = self._io.parseindex(f, self._inline)
490 490 self.index, self.nodemap, self._chunkcache = d
491 491
492 492 # add the magic null revision at -1
493 493 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
494 494
495 495 def _loadindex(self, start, end):
496 496 """load a block of indexes all at once from the lazy parser"""
497 497 if isinstance(self.index, lazyindex):
498 498 self.index.p.loadindex(start, end)
499 499
500 500 def _loadindexmap(self):
501 501 """loads both the map and the index from the lazy parser"""
502 502 if isinstance(self.index, lazyindex):
503 503 p = self.index.p
504 504 p.loadindex()
505 505 self.nodemap = p.map
506 506
507 507 def _loadmap(self):
508 508 """loads the map from the lazy parser"""
509 509 if isinstance(self.nodemap, lazymap):
510 510 self.nodemap.p.loadmap()
511 511 self.nodemap = self.nodemap.p.map
512 512
513 513 def tip(self):
514 514 return self.node(len(self.index) - 2)
515 515 def count(self):
516 516 return len(self.index) - 1
517 517
518 518 def rev(self, node):
519 519 try:
520 520 return self.nodemap[node]
521 521 except KeyError:
522 522 raise LookupError(hex(node), _('%s: no node %s') % (self.indexfile, hex(node)))
523 523 def node(self, rev):
524 524 return self.index[rev][7]
525 525 def linkrev(self, node):
526 526 return self.index[self.rev(node)][4]
527 527 def parents(self, node):
528 528 d = self.index[self.rev(node)][5:7]
529 529 return (self.node(d[0]), self.node(d[1]))
530 530 def parentrevs(self, rev):
531 531 return self.index[rev][5:7]
532 532 def start(self, rev):
533 533 return int(self.index[rev][0] >> 16)
534 534 def end(self, rev):
535 535 return self.start(rev) + self.length(rev)
536 536 def length(self, rev):
537 537 return self.index[rev][1]
538 538 def base(self, rev):
539 539 return self.index[rev][3]
540 540
541 541 def size(self, rev):
542 542 """return the length of the uncompressed text for a given revision"""
543 543 l = self.index[rev][2]
544 544 if l >= 0:
545 545 return l
546 546
547 547 t = self.revision(self.node(rev))
548 548 return len(t)
549 549
550 550 # alternate implementation, The advantage to this code is it
551 551 # will be faster for a single revision. But, the results are not
552 552 # cached, so finding the size of every revision will be slower.
553 553 """
554 554 if self.cache and self.cache[1] == rev:
555 555 return len(self.cache[2])
556 556
557 557 base = self.base(rev)
558 558 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
559 559 base = self.cache[1]
560 560 text = self.cache[2]
561 561 else:
562 562 text = self.revision(self.node(base))
563 563
564 564 l = len(text)
565 565 for x in xrange(base + 1, rev + 1):
566 566 l = mdiff.patchedsize(l, self.chunk(x))
567 567 return l
568 568 """
569 569
570 570 def reachable(self, node, stop=None):
571 571 """return a hash of all nodes ancestral to a given node, including
572 572 the node itself, stopping when stop is matched"""
573 573 reachable = {}
574 574 visit = [node]
575 575 reachable[node] = 1
576 576 if stop:
577 577 stopn = self.rev(stop)
578 578 else:
579 579 stopn = 0
580 580 while visit:
581 581 n = visit.pop(0)
582 582 if n == stop:
583 583 continue
584 584 if n == nullid:
585 585 continue
586 586 for p in self.parents(n):
587 587 if self.rev(p) < stopn:
588 588 continue
589 589 if p not in reachable:
590 590 reachable[p] = 1
591 591 visit.append(p)
592 592 return reachable
593 593
594 594 def nodesbetween(self, roots=None, heads=None):
595 595 """Return a tuple containing three elements. Elements 1 and 2 contain
596 596 a final list bases and heads after all the unreachable ones have been
597 597 pruned. Element 0 contains a topologically sorted list of all
598 598
599 599 nodes that satisfy these constraints:
600 600 1. All nodes must be descended from a node in roots (the nodes on
601 601 roots are considered descended from themselves).
602 602 2. All nodes must also be ancestors of a node in heads (the nodes in
603 603 heads are considered to be their own ancestors).
604 604
605 605 If roots is unspecified, nullid is assumed as the only root.
606 606 If heads is unspecified, it is taken to be the output of the
607 607 heads method (i.e. a list of all nodes in the repository that
608 608 have no children)."""
609 609 nonodes = ([], [], [])
610 610 if roots is not None:
611 611 roots = list(roots)
612 612 if not roots:
613 613 return nonodes
614 614 lowestrev = min([self.rev(n) for n in roots])
615 615 else:
616 616 roots = [nullid] # Everybody's a descendent of nullid
617 617 lowestrev = nullrev
618 618 if (lowestrev == nullrev) and (heads is None):
619 619 # We want _all_ the nodes!
620 620 return ([self.node(r) for r in xrange(0, self.count())],
621 621 [nullid], list(self.heads()))
622 622 if heads is None:
623 623 # All nodes are ancestors, so the latest ancestor is the last
624 624 # node.
625 625 highestrev = self.count() - 1
626 626 # Set ancestors to None to signal that every node is an ancestor.
627 627 ancestors = None
628 628 # Set heads to an empty dictionary for later discovery of heads
629 629 heads = {}
630 630 else:
631 631 heads = list(heads)
632 632 if not heads:
633 633 return nonodes
634 634 ancestors = {}
635 635 # Turn heads into a dictionary so we can remove 'fake' heads.
636 636 # Also, later we will be using it to filter out the heads we can't
637 637 # find from roots.
638 638 heads = dict.fromkeys(heads, 0)
639 639 # Start at the top and keep marking parents until we're done.
640 640 nodestotag = heads.keys()
641 641 # Remember where the top was so we can use it as a limit later.
642 642 highestrev = max([self.rev(n) for n in nodestotag])
643 643 while nodestotag:
644 644 # grab a node to tag
645 645 n = nodestotag.pop()
646 646 # Never tag nullid
647 647 if n == nullid:
648 648 continue
649 649 # A node's revision number represents its place in a
650 650 # topologically sorted list of nodes.
651 651 r = self.rev(n)
652 652 if r >= lowestrev:
653 653 if n not in ancestors:
654 654 # If we are possibly a descendent of one of the roots
655 655 # and we haven't already been marked as an ancestor
656 656 ancestors[n] = 1 # Mark as ancestor
657 657 # Add non-nullid parents to list of nodes to tag.
658 658 nodestotag.extend([p for p in self.parents(n) if
659 659 p != nullid])
660 660 elif n in heads: # We've seen it before, is it a fake head?
661 661 # So it is, real heads should not be the ancestors of
662 662 # any other heads.
663 663 heads.pop(n)
664 664 if not ancestors:
665 665 return nonodes
666 666 # Now that we have our set of ancestors, we want to remove any
667 667 # roots that are not ancestors.
668 668
669 669 # If one of the roots was nullid, everything is included anyway.
670 670 if lowestrev > nullrev:
671 671 # But, since we weren't, let's recompute the lowest rev to not
672 672 # include roots that aren't ancestors.
673 673
674 674 # Filter out roots that aren't ancestors of heads
675 675 roots = [n for n in roots if n in ancestors]
676 676 # Recompute the lowest revision
677 677 if roots:
678 678 lowestrev = min([self.rev(n) for n in roots])
679 679 else:
680 680 # No more roots? Return empty list
681 681 return nonodes
682 682 else:
683 683 # We are descending from nullid, and don't need to care about
684 684 # any other roots.
685 685 lowestrev = nullrev
686 686 roots = [nullid]
687 687 # Transform our roots list into a 'set' (i.e. a dictionary where the
688 688 # values don't matter.
689 689 descendents = dict.fromkeys(roots, 1)
690 690 # Also, keep the original roots so we can filter out roots that aren't
691 691 # 'real' roots (i.e. are descended from other roots).
692 692 roots = descendents.copy()
693 693 # Our topologically sorted list of output nodes.
694 694 orderedout = []
695 695 # Don't start at nullid since we don't want nullid in our output list,
696 696 # and if nullid shows up in descedents, empty parents will look like
697 697 # they're descendents.
698 698 for r in xrange(max(lowestrev, 0), highestrev + 1):
699 699 n = self.node(r)
700 700 isdescendent = False
701 701 if lowestrev == nullrev: # Everybody is a descendent of nullid
702 702 isdescendent = True
703 703 elif n in descendents:
704 704 # n is already a descendent
705 705 isdescendent = True
706 706 # This check only needs to be done here because all the roots
707 707 # will start being marked is descendents before the loop.
708 708 if n in roots:
709 709 # If n was a root, check if it's a 'real' root.
710 710 p = tuple(self.parents(n))
711 711 # If any of its parents are descendents, it's not a root.
712 712 if (p[0] in descendents) or (p[1] in descendents):
713 713 roots.pop(n)
714 714 else:
715 715 p = tuple(self.parents(n))
716 716 # A node is a descendent if either of its parents are
717 717 # descendents. (We seeded the dependents list with the roots
718 718 # up there, remember?)
719 719 if (p[0] in descendents) or (p[1] in descendents):
720 720 descendents[n] = 1
721 721 isdescendent = True
722 722 if isdescendent and ((ancestors is None) or (n in ancestors)):
723 723 # Only include nodes that are both descendents and ancestors.
724 724 orderedout.append(n)
725 725 if (ancestors is not None) and (n in heads):
726 726 # We're trying to figure out which heads are reachable
727 727 # from roots.
728 728 # Mark this head as having been reached
729 729 heads[n] = 1
730 730 elif ancestors is None:
731 731 # Otherwise, we're trying to discover the heads.
732 732 # Assume this is a head because if it isn't, the next step
733 733 # will eventually remove it.
734 734 heads[n] = 1
735 735 # But, obviously its parents aren't.
736 736 for p in self.parents(n):
737 737 heads.pop(p, None)
738 738 heads = [n for n in heads.iterkeys() if heads[n] != 0]
739 739 roots = roots.keys()
740 740 assert orderedout
741 741 assert roots
742 742 assert heads
743 743 return (orderedout, roots, heads)
744 744
745 745 def heads(self, start=None, stop=None):
746 746 """return the list of all nodes that have no children
747 747
748 748 if start is specified, only heads that are descendants of
749 749 start will be returned
750 750 if stop is specified, it will consider all the revs from stop
751 751 as if they had no children
752 752 """
753 753 if start is None and stop is None:
754 754 count = self.count()
755 755 if not count:
756 756 return [nullid]
757 757 ishead = [1] * (count + 1)
758 758 index = self.index
759 759 for r in xrange(count):
760 760 e = index[r]
761 761 ishead[e[5]] = ishead[e[6]] = 0
762 762 return [self.node(r) for r in xrange(count) if ishead[r]]
763 763
764 764 if start is None:
765 765 start = nullid
766 766 if stop is None:
767 767 stop = []
768 768 stoprevs = dict.fromkeys([self.rev(n) for n in stop])
769 769 startrev = self.rev(start)
770 770 reachable = {startrev: 1}
771 771 heads = {startrev: 1}
772 772
773 773 parentrevs = self.parentrevs
774 774 for r in xrange(startrev + 1, self.count()):
775 775 for p in parentrevs(r):
776 776 if p in reachable:
777 777 if r not in stoprevs:
778 778 reachable[r] = 1
779 779 heads[r] = 1
780 780 if p in heads and p not in stoprevs:
781 781 del heads[p]
782 782
783 783 return [self.node(r) for r in heads]
784 784
785 785 def children(self, node):
786 786 """find the children of a given node"""
787 787 c = []
788 788 p = self.rev(node)
789 789 for r in range(p + 1, self.count()):
790 790 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
791 791 if prevs:
792 792 for pr in prevs:
793 793 if pr == p:
794 794 c.append(self.node(r))
795 795 elif p == nullrev:
796 796 c.append(self.node(r))
797 797 return c
798 798
799 799 def _match(self, id):
800 800 if isinstance(id, (long, int)):
801 801 # rev
802 802 return self.node(id)
803 803 if len(id) == 20:
804 804 # possibly a binary node
805 805 # odds of a binary node being all hex in ASCII are 1 in 10**25
806 806 try:
807 807 node = id
808 808 r = self.rev(node) # quick search the index
809 809 return node
810 810 except LookupError:
811 811 pass # may be partial hex id
812 812 try:
813 813 # str(rev)
814 814 rev = int(id)
815 815 if str(rev) != id:
816 816 raise ValueError
817 817 if rev < 0:
818 818 rev = self.count() + rev
819 819 if rev < 0 or rev >= self.count():
820 820 raise ValueError
821 821 return self.node(rev)
822 822 except (ValueError, OverflowError):
823 823 pass
824 824 if len(id) == 40:
825 825 try:
826 826 # a full hex nodeid?
827 827 node = bin(id)
828 828 r = self.rev(node)
829 829 return node
830 830 except TypeError:
831 831 pass
832 832
833 833 def _partialmatch(self, id):
834 834 if len(id) < 40:
835 835 try:
836 836 # hex(node)[:...]
837 837 bin_id = bin(id[:len(id) & ~1]) # grab an even number of digits
838 838 node = None
839 839 for n in self.nodemap:
840 840 if n.startswith(bin_id) and hex(n).startswith(id):
841 841 if node is not None:
842 842 raise LookupError(hex(node),
843 843 _("Ambiguous identifier"))
844 844 node = n
845 845 if node is not None:
846 846 return node
847 847 except TypeError:
848 848 pass
849 849
850 850 def lookup(self, id):
851 851 """locate a node based on:
852 852 - revision number or str(revision number)
853 853 - nodeid or subset of hex nodeid
854 854 """
855 855 n = self._match(id)
856 856 if n is not None:
857 857 return n
858 858 n = self._partialmatch(id)
859 859 if n:
860 860 return n
861 861
862 862 raise LookupError(id, _("No match found"))
863 863
864 864 def cmp(self, node, text):
865 865 """compare text with a given file revision"""
866 866 p1, p2 = self.parents(node)
867 867 return hash(text, p1, p2) != node
868 868
869 869 def chunk(self, rev, df=None):
870 870 def loadcache(df):
871 871 if not df:
872 872 if self._inline:
873 873 df = self.opener(self.indexfile)
874 874 else:
875 875 df = self.opener(self.datafile)
876 876 df.seek(start)
877 877 self._chunkcache = (start, df.read(cache_length))
878 878
879 879 start, length = self.start(rev), self.length(rev)
880 880 if self._inline:
881 881 start += (rev + 1) * self._io.size
882 882 end = start + length
883 883
884 884 offset = 0
885 885 if not self._chunkcache:
886 886 cache_length = max(65536, length)
887 887 loadcache(df)
888 888 else:
889 889 cache_start = self._chunkcache[0]
890 890 cache_length = len(self._chunkcache[1])
891 891 cache_end = cache_start + cache_length
892 892 if start >= cache_start and end <= cache_end:
893 893 # it is cached
894 894 offset = start - cache_start
895 895 else:
896 896 cache_length = max(65536, length)
897 897 loadcache(df)
898 898
899 899 # avoid copying large chunks
900 900 c = self._chunkcache[1]
901 901 if cache_length != length:
902 902 c = c[offset:offset + length]
903 903
904 904 return decompress(c)
905 905
906 906 def delta(self, node):
907 907 """return or calculate a delta between a node and its predecessor"""
908 908 r = self.rev(node)
909 909 return self.revdiff(r - 1, r)
910 910
911 911 def revdiff(self, rev1, rev2):
912 912 """return or calculate a delta between two revisions"""
913 913 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
914 914 return self.chunk(rev2)
915 915
916 916 return mdiff.textdiff(self.revision(self.node(rev1)),
917 917 self.revision(self.node(rev2)))
918 918
919 919 def revision(self, node):
920 920 """return an uncompressed revision of a given"""
921 921 if node == nullid:
922 922 return ""
923 923 if self._cache and self._cache[0] == node:
924 924 return str(self._cache[2])
925 925
926 926 # look up what we need to read
927 927 text = None
928 928 rev = self.rev(node)
929 929 base = self.base(rev)
930 930
931 931 # check rev flags
932 932 if self.index[rev][0] & 0xFFFF:
933 933 raise RevlogError(_('incompatible revision flag %x') %
934 934 (self.index[rev][0] & 0xFFFF))
935 935
936 if self._inline:
937 # we probably have the whole chunk cached
938 936 df = None
939 else:
940 df = self.opener(self.datafile)
941 937
942 938 # do we have useful data cached?
943 939 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
944 940 base = self._cache[1]
945 941 text = str(self._cache[2])
946 942 self._loadindex(base, rev + 1)
943 if not self._inline and rev > base + 1:
944 df = self.opener(self.datafile)
947 945 else:
948 946 self._loadindex(base, rev + 1)
947 if not self._inline and rev > base:
948 df = self.opener(self.datafile)
949 949 text = self.chunk(base, df=df)
950 950
951 951 bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]
952 952 text = mdiff.patches(text, bins)
953 953 p1, p2 = self.parents(node)
954 954 if node != hash(text, p1, p2):
955 955 raise RevlogError(_("integrity check failed on %s:%d")
956 956 % (self.datafile, rev))
957 957
958 958 self._cache = (node, rev, text)
959 959 return text
960 960
961 961 def checkinlinesize(self, tr, fp=None):
962 962 if not self._inline:
963 963 return
964 964 if not fp:
965 965 fp = self.opener(self.indexfile, 'r')
966 966 fp.seek(0, 2)
967 967 size = fp.tell()
968 968 if size < 131072:
969 969 return
970 970 trinfo = tr.find(self.indexfile)
971 971 if trinfo == None:
972 972 raise RevlogError(_("%s not found in the transaction")
973 973 % self.indexfile)
974 974
975 975 trindex = trinfo[2]
976 976 dataoff = self.start(trindex)
977 977
978 978 tr.add(self.datafile, dataoff)
979 979 df = self.opener(self.datafile, 'w')
980 980 calc = self._io.size
981 981 for r in xrange(self.count()):
982 982 start = self.start(r) + (r + 1) * calc
983 983 length = self.length(r)
984 984 fp.seek(start)
985 985 d = fp.read(length)
986 986 df.write(d)
987 987 fp.close()
988 988 df.close()
989 989 fp = self.opener(self.indexfile, 'w', atomictemp=True)
990 990 self.version &= ~(REVLOGNGINLINEDATA)
991 991 self._inline = False
992 992 for i in xrange(self.count()):
993 993 e = self._io.packentry(self.index[i], self.node, self.version, i)
994 994 fp.write(e)
995 995
996 996 # if we don't call rename, the temp file will never replace the
997 997 # real index
998 998 fp.rename()
999 999
1000 1000 tr.replace(self.indexfile, trindex * calc)
1001 1001 self._chunkcache = None
1002 1002
1003 1003 def addrevision(self, text, transaction, link, p1, p2, d=None):
1004 1004 """add a revision to the log
1005 1005
1006 1006 text - the revision data to add
1007 1007 transaction - the transaction object used for rollback
1008 1008 link - the linkrev data to add
1009 1009 p1, p2 - the parent nodeids of the revision
1010 1010 d - an optional precomputed delta
1011 1011 """
1012 1012 dfh = None
1013 1013 if not self._inline:
1014 1014 dfh = self.opener(self.datafile, "a")
1015 1015 ifh = self.opener(self.indexfile, "a+")
1016 1016 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1017 1017
1018 1018 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1019 1019 node = hash(text, p1, p2)
1020 1020 if node in self.nodemap:
1021 1021 return node
1022 1022
1023 1023 curr = self.count()
1024 1024 prev = curr - 1
1025 1025 base = self.base(prev)
1026 1026 offset = self.end(prev)
1027 1027
1028 1028 if curr:
1029 1029 if not d:
1030 1030 ptext = self.revision(self.node(prev))
1031 1031 d = mdiff.textdiff(ptext, text)
1032 1032 data = compress(d)
1033 1033 l = len(data[1]) + len(data[0])
1034 1034 dist = l + offset - self.start(base)
1035 1035
1036 1036 # full versions are inserted when the needed deltas
1037 1037 # become comparable to the uncompressed text
1038 1038 if not curr or dist > len(text) * 2:
1039 1039 data = compress(text)
1040 1040 l = len(data[1]) + len(data[0])
1041 1041 base = curr
1042 1042
1043 1043 e = (offset_type(offset, 0), l, len(text),
1044 1044 base, link, self.rev(p1), self.rev(p2), node)
1045 1045 self.index.insert(-1, e)
1046 1046 self.nodemap[node] = curr
1047 1047
1048 1048 entry = self._io.packentry(e, self.node, self.version, curr)
1049 1049 if not self._inline:
1050 1050 transaction.add(self.datafile, offset)
1051 1051 transaction.add(self.indexfile, curr * len(entry))
1052 1052 if data[0]:
1053 1053 dfh.write(data[0])
1054 1054 dfh.write(data[1])
1055 1055 dfh.flush()
1056 1056 ifh.write(entry)
1057 1057 else:
1058 1058 offset += curr * self._io.size
1059 1059 transaction.add(self.indexfile, offset, curr)
1060 1060 ifh.write(entry)
1061 1061 ifh.write(data[0])
1062 1062 ifh.write(data[1])
1063 1063 self.checkinlinesize(transaction, ifh)
1064 1064
1065 1065 self._cache = (node, curr, text)
1066 1066 return node
1067 1067
1068 1068 def ancestor(self, a, b):
1069 1069 """calculate the least common ancestor of nodes a and b"""
1070 1070
1071 1071 def parents(rev):
1072 1072 return [p for p in self.parentrevs(rev) if p != nullrev]
1073 1073
1074 1074 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1075 1075 if c is None:
1076 1076 return nullid
1077 1077
1078 1078 return self.node(c)
1079 1079
1080 1080 def group(self, nodelist, lookup, infocollect=None):
1081 1081 """calculate a delta group
1082 1082
1083 1083 Given a list of changeset revs, return a set of deltas and
1084 1084 metadata corresponding to nodes. the first delta is
1085 1085 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1086 1086 have this parent as it has all history before these
1087 1087 changesets. parent is parent[0]
1088 1088 """
1089 1089 revs = [self.rev(n) for n in nodelist]
1090 1090
1091 1091 # if we don't have any revisions touched by these changesets, bail
1092 1092 if not revs:
1093 1093 yield changegroup.closechunk()
1094 1094 return
1095 1095
1096 1096 # add the parent of the first rev
1097 1097 p = self.parents(self.node(revs[0]))[0]
1098 1098 revs.insert(0, self.rev(p))
1099 1099
1100 1100 # build deltas
1101 1101 for d in xrange(0, len(revs) - 1):
1102 1102 a, b = revs[d], revs[d + 1]
1103 1103 nb = self.node(b)
1104 1104
1105 1105 if infocollect is not None:
1106 1106 infocollect(nb)
1107 1107
1108 1108 p = self.parents(nb)
1109 1109 meta = nb + p[0] + p[1] + lookup(nb)
1110 1110 if a == -1:
1111 1111 d = self.revision(nb)
1112 1112 meta += mdiff.trivialdiffheader(len(d))
1113 1113 else:
1114 1114 d = self.revdiff(a, b)
1115 1115 yield changegroup.chunkheader(len(meta) + len(d))
1116 1116 yield meta
1117 1117 if len(d) > 2**20:
1118 1118 pos = 0
1119 1119 while pos < len(d):
1120 1120 pos2 = pos + 2 ** 18
1121 1121 yield d[pos:pos2]
1122 1122 pos = pos2
1123 1123 else:
1124 1124 yield d
1125 1125
1126 1126 yield changegroup.closechunk()
1127 1127
1128 1128 def addgroup(self, revs, linkmapper, transaction, unique=0):
1129 1129 """
1130 1130 add a delta group
1131 1131
1132 1132 given a set of deltas, add them to the revision log. the
1133 1133 first delta is against its parent, which should be in our
1134 1134 log, the rest are against the previous delta.
1135 1135 """
1136 1136
1137 1137 #track the base of the current delta log
1138 1138 r = self.count()
1139 1139 t = r - 1
1140 1140 node = None
1141 1141
1142 1142 base = prev = nullrev
1143 1143 start = end = textlen = 0
1144 1144 if r:
1145 1145 end = self.end(t)
1146 1146
1147 1147 ifh = self.opener(self.indexfile, "a+")
1148 1148 isize = r * self._io.size
1149 1149 if self._inline:
1150 1150 transaction.add(self.indexfile, end + isize, r)
1151 1151 dfh = None
1152 1152 else:
1153 1153 transaction.add(self.indexfile, isize, r)
1154 1154 transaction.add(self.datafile, end)
1155 1155 dfh = self.opener(self.datafile, "a")
1156 1156
1157 1157 # loop through our set of deltas
1158 1158 chain = None
1159 1159 for chunk in revs:
1160 1160 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1161 1161 link = linkmapper(cs)
1162 1162 if node in self.nodemap:
1163 1163 # this can happen if two branches make the same change
1164 1164 # if unique:
1165 1165 # raise RevlogError(_("already have %s") % hex(node[:4]))
1166 1166 chain = node
1167 1167 continue
1168 1168 delta = buffer(chunk, 80)
1169 1169 del chunk
1170 1170
1171 1171 for p in (p1, p2):
1172 1172 if not p in self.nodemap:
1173 1173 raise LookupError(hex(p), _("unknown parent %s") % short(p))
1174 1174
1175 1175 if not chain:
1176 1176 # retrieve the parent revision of the delta chain
1177 1177 chain = p1
1178 1178 if not chain in self.nodemap:
1179 1179 raise LookupError(hex(chain), _("unknown base %s") % short(chain[:4]))
1180 1180
1181 1181 # full versions are inserted when the needed deltas become
1182 1182 # comparable to the uncompressed text or when the previous
1183 1183 # version is not the one we have a delta against. We use
1184 1184 # the size of the previous full rev as a proxy for the
1185 1185 # current size.
1186 1186
1187 1187 if chain == prev:
1188 1188 cdelta = compress(delta)
1189 1189 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1190 1190 textlen = mdiff.patchedsize(textlen, delta)
1191 1191
1192 1192 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1193 1193 # flush our writes here so we can read it in revision
1194 1194 if dfh:
1195 1195 dfh.flush()
1196 1196 ifh.flush()
1197 1197 text = self.revision(chain)
1198 1198 if len(text) == 0:
1199 1199 # skip over trivial delta header
1200 1200 text = buffer(delta, 12)
1201 1201 else:
1202 1202 text = mdiff.patches(text, [delta])
1203 1203 del delta
1204 1204 chk = self._addrevision(text, transaction, link, p1, p2, None,
1205 1205 ifh, dfh)
1206 1206 if not dfh and not self._inline:
1207 1207 # addrevision switched from inline to conventional
1208 1208 # reopen the index
1209 1209 dfh = self.opener(self.datafile, "a")
1210 1210 ifh = self.opener(self.indexfile, "a")
1211 1211 if chk != node:
1212 1212 raise RevlogError(_("consistency error adding group"))
1213 1213 textlen = len(text)
1214 1214 else:
1215 1215 e = (offset_type(end, 0), cdeltalen, textlen, base,
1216 1216 link, self.rev(p1), self.rev(p2), node)
1217 1217 self.index.insert(-1, e)
1218 1218 self.nodemap[node] = r
1219 1219 entry = self._io.packentry(e, self.node, self.version, r)
1220 1220 if self._inline:
1221 1221 ifh.write(entry)
1222 1222 ifh.write(cdelta[0])
1223 1223 ifh.write(cdelta[1])
1224 1224 self.checkinlinesize(transaction, ifh)
1225 1225 if not self._inline:
1226 1226 dfh = self.opener(self.datafile, "a")
1227 1227 ifh = self.opener(self.indexfile, "a")
1228 1228 else:
1229 1229 dfh.write(cdelta[0])
1230 1230 dfh.write(cdelta[1])
1231 1231 ifh.write(entry)
1232 1232
1233 1233 t, r, chain, prev = r, r + 1, node, node
1234 1234 base = self.base(t)
1235 1235 start = self.start(base)
1236 1236 end = self.end(t)
1237 1237
1238 1238 return node
1239 1239
1240 1240 def strip(self, minlink):
1241 1241 """truncate the revlog on the first revision with a linkrev >= minlink
1242 1242
1243 1243 This function is called when we're stripping revision minlink and
1244 1244 its descendants from the repository.
1245 1245
1246 1246 We have to remove all revisions with linkrev >= minlink, because
1247 1247 the equivalent changelog revisions will be renumbered after the
1248 1248 strip.
1249 1249
1250 1250 So we truncate the revlog on the first of these revisions, and
1251 1251 trust that the caller has saved the revisions that shouldn't be
1252 1252 removed and that it'll readd them after this truncation.
1253 1253 """
1254 1254 if self.count() == 0:
1255 1255 return
1256 1256
1257 1257 if isinstance(self.index, lazyindex):
1258 1258 self._loadindexmap()
1259 1259
1260 1260 for rev in xrange(0, self.count()):
1261 1261 if self.index[rev][4] >= minlink:
1262 1262 break
1263 1263 else:
1264 1264 return
1265 1265
1266 1266 # first truncate the files on disk
1267 1267 end = self.start(rev)
1268 1268 if not self._inline:
1269 1269 df = self.opener(self.datafile, "a")
1270 1270 df.truncate(end)
1271 1271 end = rev * self._io.size
1272 1272 else:
1273 1273 end += rev * self._io.size
1274 1274
1275 1275 indexf = self.opener(self.indexfile, "a")
1276 1276 indexf.truncate(end)
1277 1277
1278 1278 # then reset internal state in memory to forget those revisions
1279 1279 self._cache = None
1280 1280 self._chunkcache = None
1281 1281 for x in xrange(rev, self.count()):
1282 1282 del self.nodemap[self.node(x)]
1283 1283
1284 1284 del self.index[rev:-1]
1285 1285
1286 1286 def checksize(self):
1287 1287 expected = 0
1288 1288 if self.count():
1289 1289 expected = max(0, self.end(self.count() - 1))
1290 1290
1291 1291 try:
1292 1292 f = self.opener(self.datafile)
1293 1293 f.seek(0, 2)
1294 1294 actual = f.tell()
1295 1295 dd = actual - expected
1296 1296 except IOError, inst:
1297 1297 if inst.errno != errno.ENOENT:
1298 1298 raise
1299 1299 dd = 0
1300 1300
1301 1301 try:
1302 1302 f = self.opener(self.indexfile)
1303 1303 f.seek(0, 2)
1304 1304 actual = f.tell()
1305 1305 s = self._io.size
1306 1306 i = max(0, actual / s)
1307 1307 di = actual - (i * s)
1308 1308 if self._inline:
1309 1309 databytes = 0
1310 1310 for r in xrange(self.count()):
1311 1311 databytes += max(0, self.length(r))
1312 1312 dd = 0
1313 1313 di = actual - self.count() * s - databytes
1314 1314 except IOError, inst:
1315 1315 if inst.errno != errno.ENOENT:
1316 1316 raise
1317 1317 di = 0
1318 1318
1319 1319 return (dd, di)
General Comments 0
You need to be logged in to leave comments. Login now