##// END OF EJS Templates
revlog: reduce memory usage in addgroup...
Matt Mackall -
r5445:64cf1c85 default
parent child Browse files
Show More
@@ -1,1276 +1,1284
1 1 """
2 2 revlog.py - storage back-end for mercurial
3 3
4 4 This provides efficient delta storage with O(1) retrieve and append
5 5 and O(changes) merge between branches
6 6
7 7 Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
8 8
9 9 This software may be used and distributed according to the terms
10 10 of the GNU General Public License, incorporated herein by reference.
11 11 """
12 12
13 13 from node import *
14 14 from i18n import _
15 15 import binascii, changegroup, errno, ancestor, mdiff, os
16 16 import sha, struct, util, zlib
17 17
18 18 _pack = struct.pack
19 19 _unpack = struct.unpack
20 20 _compress = zlib.compress
21 21 _decompress = zlib.decompress
22 22 _sha = sha.new
23 23
24 24 # revlog flags
25 25 REVLOGV0 = 0
26 26 REVLOGNG = 1
27 27 REVLOGNGINLINEDATA = (1 << 16)
28 28 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
29 29 REVLOG_DEFAULT_FORMAT = REVLOGNG
30 30 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
31 31
32 32 class RevlogError(Exception):
33 33 pass
34 34 class LookupError(RevlogError):
35 35 pass
36 36
37 37 def getoffset(q):
38 38 return int(q >> 16)
39 39
40 40 def gettype(q):
41 41 return int(q & 0xFFFF)
42 42
43 43 def offset_type(offset, type):
44 44 return long(long(offset) << 16 | type)
45 45
46 46 def hash(text, p1, p2):
47 47 """generate a hash from the given text and its parent hashes
48 48
49 49 This hash combines both the current file contents and its history
50 50 in a manner that makes it easy to distinguish nodes with the same
51 51 content in the revision graph.
52 52 """
53 53 l = [p1, p2]
54 54 l.sort()
55 55 s = _sha(l[0])
56 56 s.update(l[1])
57 57 s.update(text)
58 58 return s.digest()
59 59
60 60 def compress(text):
61 61 """ generate a possibly-compressed representation of text """
62 62 if not text:
63 63 return ("", text)
64 64 if len(text) < 44:
65 65 if text[0] == '\0':
66 66 return ("", text)
67 67 return ('u', text)
68 68 bin = _compress(text)
69 69 if len(bin) > len(text):
70 70 if text[0] == '\0':
71 71 return ("", text)
72 72 return ('u', text)
73 73 return ("", bin)
74 74
75 75 def decompress(bin):
76 76 """ decompress the given input """
77 77 if not bin:
78 78 return bin
79 79 t = bin[0]
80 80 if t == '\0':
81 81 return bin
82 82 if t == 'x':
83 83 return _decompress(bin)
84 84 if t == 'u':
85 85 return bin[1:]
86 86 raise RevlogError(_("unknown compression type %r") % t)
87 87
88 88 class lazyparser(object):
89 89 """
90 90 this class avoids the need to parse the entirety of large indices
91 91 """
92 92
93 93 # lazyparser is not safe to use on windows if win32 extensions not
94 94 # available. it keeps file handle open, which make it not possible
95 95 # to break hardlinks on local cloned repos.
96 96 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
97 97 hasattr(util, 'win32api'))
98 98
99 99 def __init__(self, dataf, size):
100 100 self.dataf = dataf
101 101 self.s = struct.calcsize(indexformatng)
102 102 self.datasize = size
103 103 self.l = size/self.s
104 104 self.index = [None] * self.l
105 105 self.map = {nullid: nullrev}
106 106 self.allmap = 0
107 107 self.all = 0
108 108 self.mapfind_count = 0
109 109
110 110 def loadmap(self):
111 111 """
112 112 during a commit, we need to make sure the rev being added is
113 113 not a duplicate. This requires loading the entire index,
114 114 which is fairly slow. loadmap can load up just the node map,
115 115 which takes much less time.
116 116 """
117 117 if self.allmap:
118 118 return
119 119 end = self.datasize
120 120 self.allmap = 1
121 121 cur = 0
122 122 count = 0
123 123 blocksize = self.s * 256
124 124 self.dataf.seek(0)
125 125 while cur < end:
126 126 data = self.dataf.read(blocksize)
127 127 off = 0
128 128 for x in xrange(256):
129 129 n = data[off + ngshaoffset:off + ngshaoffset + 20]
130 130 self.map[n] = count
131 131 count += 1
132 132 if count >= self.l:
133 133 break
134 134 off += self.s
135 135 cur += blocksize
136 136
137 137 def loadblock(self, blockstart, blocksize, data=None):
138 138 if self.all:
139 139 return
140 140 if data is None:
141 141 self.dataf.seek(blockstart)
142 142 if blockstart + blocksize > self.datasize:
143 143 # the revlog may have grown since we've started running,
144 144 # but we don't have space in self.index for more entries.
145 145 # limit blocksize so that we don't get too much data.
146 146 blocksize = max(self.datasize - blockstart, 0)
147 147 data = self.dataf.read(blocksize)
148 148 lend = len(data) / self.s
149 149 i = blockstart / self.s
150 150 off = 0
151 151 # lazyindex supports __delitem__
152 152 if lend > len(self.index) - i:
153 153 lend = len(self.index) - i
154 154 for x in xrange(lend):
155 155 if self.index[i + x] == None:
156 156 b = data[off : off + self.s]
157 157 self.index[i + x] = b
158 158 n = b[ngshaoffset:ngshaoffset + 20]
159 159 self.map[n] = i + x
160 160 off += self.s
161 161
162 162 def findnode(self, node):
163 163 """search backwards through the index file for a specific node"""
164 164 if self.allmap:
165 165 return None
166 166
167 167 # hg log will cause many many searches for the manifest
168 168 # nodes. After we get called a few times, just load the whole
169 169 # thing.
170 170 if self.mapfind_count > 8:
171 171 self.loadmap()
172 172 if node in self.map:
173 173 return node
174 174 return None
175 175 self.mapfind_count += 1
176 176 last = self.l - 1
177 177 while self.index[last] != None:
178 178 if last == 0:
179 179 self.all = 1
180 180 self.allmap = 1
181 181 return None
182 182 last -= 1
183 183 end = (last + 1) * self.s
184 184 blocksize = self.s * 256
185 185 while end >= 0:
186 186 start = max(end - blocksize, 0)
187 187 self.dataf.seek(start)
188 188 data = self.dataf.read(end - start)
189 189 findend = end - start
190 190 while True:
191 191 # we're searching backwards, so we have to make sure
192 192 # we don't find a changeset where this node is a parent
193 193 off = data.find(node, 0, findend)
194 194 findend = off
195 195 if off >= 0:
196 196 i = off / self.s
197 197 off = i * self.s
198 198 n = data[off + ngshaoffset:off + ngshaoffset + 20]
199 199 if n == node:
200 200 self.map[n] = i + start / self.s
201 201 return node
202 202 else:
203 203 break
204 204 end -= blocksize
205 205 return None
206 206
207 207 def loadindex(self, i=None, end=None):
208 208 if self.all:
209 209 return
210 210 all = False
211 211 if i == None:
212 212 blockstart = 0
213 213 blocksize = (65536 / self.s) * self.s
214 214 end = self.datasize
215 215 all = True
216 216 else:
217 217 if end:
218 218 blockstart = i * self.s
219 219 end = end * self.s
220 220 blocksize = end - blockstart
221 221 else:
222 222 blockstart = (i & ~1023) * self.s
223 223 blocksize = self.s * 1024
224 224 end = blockstart + blocksize
225 225 while blockstart < end:
226 226 self.loadblock(blockstart, blocksize)
227 227 blockstart += blocksize
228 228 if all:
229 229 self.all = True
230 230
231 231 class lazyindex(object):
232 232 """a lazy version of the index array"""
233 233 def __init__(self, parser):
234 234 self.p = parser
235 235 def __len__(self):
236 236 return len(self.p.index)
237 237 def load(self, pos):
238 238 if pos < 0:
239 239 pos += len(self.p.index)
240 240 self.p.loadindex(pos)
241 241 return self.p.index[pos]
242 242 def __getitem__(self, pos):
243 243 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
244 244 def __setitem__(self, pos, item):
245 245 self.p.index[pos] = _pack(indexformatng, *item)
246 246 def __delitem__(self, pos):
247 247 del self.p.index[pos]
248 248 def insert(self, pos, e):
249 249 self.p.index.insert(pos, _pack(indexformatng, *e))
250 250 def append(self, e):
251 251 self.p.index.append(_pack(indexformatng, *e))
252 252
253 253 class lazymap(object):
254 254 """a lazy version of the node map"""
255 255 def __init__(self, parser):
256 256 self.p = parser
257 257 def load(self, key):
258 258 n = self.p.findnode(key)
259 259 if n == None:
260 260 raise KeyError(key)
261 261 def __contains__(self, key):
262 262 if key in self.p.map:
263 263 return True
264 264 self.p.loadmap()
265 265 return key in self.p.map
266 266 def __iter__(self):
267 267 yield nullid
268 268 for i in xrange(self.p.l):
269 269 ret = self.p.index[i]
270 270 if not ret:
271 271 self.p.loadindex(i)
272 272 ret = self.p.index[i]
273 273 if isinstance(ret, str):
274 274 ret = _unpack(indexformatng, ret)
275 275 yield ret[7]
276 276 def __getitem__(self, key):
277 277 try:
278 278 return self.p.map[key]
279 279 except KeyError:
280 280 try:
281 281 self.load(key)
282 282 return self.p.map[key]
283 283 except KeyError:
284 284 raise KeyError("node " + hex(key))
285 285 def __setitem__(self, key, val):
286 286 self.p.map[key] = val
287 287 def __delitem__(self, key):
288 288 del self.p.map[key]
289 289
290 290 indexformatv0 = ">4l20s20s20s"
291 291 v0shaoffset = 56
292 292
293 293 class revlogoldio(object):
294 294 def __init__(self):
295 295 self.size = struct.calcsize(indexformatv0)
296 296
297 297 def parseindex(self, fp, inline):
298 298 s = self.size
299 299 index = []
300 300 nodemap = {nullid: nullrev}
301 301 n = off = 0
302 302 data = fp.read()
303 303 l = len(data)
304 304 while off + s <= l:
305 305 cur = data[off:off + s]
306 306 off += s
307 307 e = _unpack(indexformatv0, cur)
308 308 # transform to revlogv1 format
309 309 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
310 310 nodemap[e[4]], nodemap[e[5]], e[6])
311 311 index.append(e2)
312 312 nodemap[e[6]] = n
313 313 n += 1
314 314
315 315 return index, nodemap, None
316 316
317 317 def packentry(self, entry, node, version, rev):
318 318 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
319 319 node(entry[5]), node(entry[6]), entry[7])
320 320 return _pack(indexformatv0, *e2)
321 321
322 322 # index ng:
323 323 # 6 bytes offset
324 324 # 2 bytes flags
325 325 # 4 bytes compressed length
326 326 # 4 bytes uncompressed length
327 327 # 4 bytes: base rev
328 328 # 4 bytes link rev
329 329 # 4 bytes parent 1 rev
330 330 # 4 bytes parent 2 rev
331 331 # 32 bytes: nodeid
332 332 indexformatng = ">Qiiiiii20s12x"
333 333 ngshaoffset = 32
334 334 versionformat = ">I"
335 335
336 336 class revlogio(object):
337 337 def __init__(self):
338 338 self.size = struct.calcsize(indexformatng)
339 339
340 340 def parseindex(self, fp, inline):
341 341 try:
342 342 size = util.fstat(fp).st_size
343 343 except AttributeError:
344 344 size = 0
345 345
346 346 if lazyparser.safe_to_use and not inline and size > 1000000:
347 347 # big index, let's parse it on demand
348 348 parser = lazyparser(fp, size)
349 349 index = lazyindex(parser)
350 350 nodemap = lazymap(parser)
351 351 e = list(index[0])
352 352 type = gettype(e[0])
353 353 e[0] = offset_type(0, type)
354 354 index[0] = e
355 355 return index, nodemap, None
356 356
357 357 s = self.size
358 358 cache = None
359 359 index = []
360 360 nodemap = {nullid: nullrev}
361 361 n = off = 0
362 362 # if we're not using lazymap, always read the whole index
363 363 data = fp.read()
364 364 l = len(data) - s
365 365 append = index.append
366 366 if inline:
367 367 cache = (0, data)
368 368 while off <= l:
369 369 e = _unpack(indexformatng, data[off:off + s])
370 370 nodemap[e[7]] = n
371 371 append(e)
372 372 n += 1
373 373 if e[1] < 0:
374 374 break
375 375 off += e[1] + s
376 376 else:
377 377 while off <= l:
378 378 e = _unpack(indexformatng, data[off:off + s])
379 379 nodemap[e[7]] = n
380 380 append(e)
381 381 n += 1
382 382 off += s
383 383
384 384 e = list(index[0])
385 385 type = gettype(e[0])
386 386 e[0] = offset_type(0, type)
387 387 index[0] = e
388 388
389 389 return index, nodemap, cache
390 390
391 391 def packentry(self, entry, node, version, rev):
392 392 p = _pack(indexformatng, *entry)
393 393 if rev == 0:
394 394 p = _pack(versionformat, version) + p[4:]
395 395 return p
396 396
397 397 class revlog(object):
398 398 """
399 399 the underlying revision storage object
400 400
401 401 A revlog consists of two parts, an index and the revision data.
402 402
403 403 The index is a file with a fixed record size containing
404 404 information on each revision, includings its nodeid (hash), the
405 405 nodeids of its parents, the position and offset of its data within
406 406 the data file, and the revision it's based on. Finally, each entry
407 407 contains a linkrev entry that can serve as a pointer to external
408 408 data.
409 409
410 410 The revision data itself is a linear collection of data chunks.
411 411 Each chunk represents a revision and is usually represented as a
412 412 delta against the previous chunk. To bound lookup time, runs of
413 413 deltas are limited to about 2 times the length of the original
414 414 version data. This makes retrieval of a version proportional to
415 415 its size, or O(1) relative to the number of revisions.
416 416
417 417 Both pieces of the revlog are written to in an append-only
418 418 fashion, which means we never need to rewrite a file to insert or
419 419 remove data, and can use some simple techniques to avoid the need
420 420 for locking while reading.
421 421 """
422 422 def __init__(self, opener, indexfile):
423 423 """
424 424 create a revlog object
425 425
426 426 opener is a function that abstracts the file opening operation
427 427 and can be used to implement COW semantics or the like.
428 428 """
429 429 self.indexfile = indexfile
430 430 self.datafile = indexfile[:-2] + ".d"
431 431 self.opener = opener
432 432 self._cache = None
433 433 self._chunkcache = None
434 434 self.nodemap = {nullid: nullrev}
435 435 self.index = []
436 436
437 437 v = REVLOG_DEFAULT_VERSION
438 438 if hasattr(opener, "defversion"):
439 439 v = opener.defversion
440 440 if v & REVLOGNG:
441 441 v |= REVLOGNGINLINEDATA
442 442
443 443 i = ""
444 444 try:
445 445 f = self.opener(self.indexfile)
446 446 i = f.read(4)
447 447 f.seek(0)
448 448 if len(i) > 0:
449 449 v = struct.unpack(versionformat, i)[0]
450 450 except IOError, inst:
451 451 if inst.errno != errno.ENOENT:
452 452 raise
453 453
454 454 self.version = v
455 455 self._inline = v & REVLOGNGINLINEDATA
456 456 flags = v & ~0xFFFF
457 457 fmt = v & 0xFFFF
458 458 if fmt == REVLOGV0 and flags:
459 459 raise RevlogError(_("index %s unknown flags %#04x for format v0")
460 460 % (self.indexfile, flags >> 16))
461 461 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
462 462 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
463 463 % (self.indexfile, flags >> 16))
464 464 elif fmt > REVLOGNG:
465 465 raise RevlogError(_("index %s unknown format %d")
466 466 % (self.indexfile, fmt))
467 467
468 468 self._io = revlogio()
469 469 if self.version == REVLOGV0:
470 470 self._io = revlogoldio()
471 471 if i:
472 472 d = self._io.parseindex(f, self._inline)
473 473 self.index, self.nodemap, self._chunkcache = d
474 474
475 475 # add the magic null revision at -1
476 476 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
477 477
478 478 def _loadindex(self, start, end):
479 479 """load a block of indexes all at once from the lazy parser"""
480 480 if isinstance(self.index, lazyindex):
481 481 self.index.p.loadindex(start, end)
482 482
483 483 def _loadindexmap(self):
484 484 """loads both the map and the index from the lazy parser"""
485 485 if isinstance(self.index, lazyindex):
486 486 p = self.index.p
487 487 p.loadindex()
488 488 self.nodemap = p.map
489 489
490 490 def _loadmap(self):
491 491 """loads the map from the lazy parser"""
492 492 if isinstance(self.nodemap, lazymap):
493 493 self.nodemap.p.loadmap()
494 494 self.nodemap = self.nodemap.p.map
495 495
496 496 def tip(self):
497 497 return self.node(len(self.index) - 2)
498 498 def count(self):
499 499 return len(self.index) - 1
500 500
501 501 def rev(self, node):
502 502 try:
503 503 return self.nodemap[node]
504 504 except KeyError:
505 505 raise LookupError(_('%s: no node %s') % (self.indexfile, hex(node)))
506 506 def node(self, rev):
507 507 return self.index[rev][7]
508 508 def linkrev(self, node):
509 509 return self.index[self.rev(node)][4]
510 510 def parents(self, node):
511 511 d = self.index[self.rev(node)][5:7]
512 512 return (self.node(d[0]), self.node(d[1]))
513 513 def parentrevs(self, rev):
514 514 return self.index[rev][5:7]
515 515 def start(self, rev):
516 516 return int(self.index[rev][0] >> 16)
517 517 def end(self, rev):
518 518 return self.start(rev) + self.length(rev)
519 519 def length(self, rev):
520 520 return self.index[rev][1]
521 521 def base(self, rev):
522 522 return self.index[rev][3]
523 523
524 524 def size(self, rev):
525 525 """return the length of the uncompressed text for a given revision"""
526 526 l = self.index[rev][2]
527 527 if l >= 0:
528 528 return l
529 529
530 530 t = self.revision(self.node(rev))
531 531 return len(t)
532 532
533 533 # alternate implementation, The advantage to this code is it
534 534 # will be faster for a single revision. But, the results are not
535 535 # cached, so finding the size of every revision will be slower.
536 536 """
537 537 if self.cache and self.cache[1] == rev:
538 538 return len(self.cache[2])
539 539
540 540 base = self.base(rev)
541 541 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
542 542 base = self.cache[1]
543 543 text = self.cache[2]
544 544 else:
545 545 text = self.revision(self.node(base))
546 546
547 547 l = len(text)
548 548 for x in xrange(base + 1, rev + 1):
549 549 l = mdiff.patchedsize(l, self.chunk(x))
550 550 return l
551 551 """
552 552
553 553 def reachable(self, node, stop=None):
554 554 """return a hash of all nodes ancestral to a given node, including
555 555 the node itself, stopping when stop is matched"""
556 556 reachable = {}
557 557 visit = [node]
558 558 reachable[node] = 1
559 559 if stop:
560 560 stopn = self.rev(stop)
561 561 else:
562 562 stopn = 0
563 563 while visit:
564 564 n = visit.pop(0)
565 565 if n == stop:
566 566 continue
567 567 if n == nullid:
568 568 continue
569 569 for p in self.parents(n):
570 570 if self.rev(p) < stopn:
571 571 continue
572 572 if p not in reachable:
573 573 reachable[p] = 1
574 574 visit.append(p)
575 575 return reachable
576 576
577 577 def nodesbetween(self, roots=None, heads=None):
578 578 """Return a tuple containing three elements. Elements 1 and 2 contain
579 579 a final list bases and heads after all the unreachable ones have been
580 580 pruned. Element 0 contains a topologically sorted list of all
581 581
582 582 nodes that satisfy these constraints:
583 583 1. All nodes must be descended from a node in roots (the nodes on
584 584 roots are considered descended from themselves).
585 585 2. All nodes must also be ancestors of a node in heads (the nodes in
586 586 heads are considered to be their own ancestors).
587 587
588 588 If roots is unspecified, nullid is assumed as the only root.
589 589 If heads is unspecified, it is taken to be the output of the
590 590 heads method (i.e. a list of all nodes in the repository that
591 591 have no children)."""
592 592 nonodes = ([], [], [])
593 593 if roots is not None:
594 594 roots = list(roots)
595 595 if not roots:
596 596 return nonodes
597 597 lowestrev = min([self.rev(n) for n in roots])
598 598 else:
599 599 roots = [nullid] # Everybody's a descendent of nullid
600 600 lowestrev = nullrev
601 601 if (lowestrev == nullrev) and (heads is None):
602 602 # We want _all_ the nodes!
603 603 return ([self.node(r) for r in xrange(0, self.count())],
604 604 [nullid], list(self.heads()))
605 605 if heads is None:
606 606 # All nodes are ancestors, so the latest ancestor is the last
607 607 # node.
608 608 highestrev = self.count() - 1
609 609 # Set ancestors to None to signal that every node is an ancestor.
610 610 ancestors = None
611 611 # Set heads to an empty dictionary for later discovery of heads
612 612 heads = {}
613 613 else:
614 614 heads = list(heads)
615 615 if not heads:
616 616 return nonodes
617 617 ancestors = {}
618 618 # Turn heads into a dictionary so we can remove 'fake' heads.
619 619 # Also, later we will be using it to filter out the heads we can't
620 620 # find from roots.
621 621 heads = dict.fromkeys(heads, 0)
622 622 # Start at the top and keep marking parents until we're done.
623 623 nodestotag = heads.keys()
624 624 # Remember where the top was so we can use it as a limit later.
625 625 highestrev = max([self.rev(n) for n in nodestotag])
626 626 while nodestotag:
627 627 # grab a node to tag
628 628 n = nodestotag.pop()
629 629 # Never tag nullid
630 630 if n == nullid:
631 631 continue
632 632 # A node's revision number represents its place in a
633 633 # topologically sorted list of nodes.
634 634 r = self.rev(n)
635 635 if r >= lowestrev:
636 636 if n not in ancestors:
637 637 # If we are possibly a descendent of one of the roots
638 638 # and we haven't already been marked as an ancestor
639 639 ancestors[n] = 1 # Mark as ancestor
640 640 # Add non-nullid parents to list of nodes to tag.
641 641 nodestotag.extend([p for p in self.parents(n) if
642 642 p != nullid])
643 643 elif n in heads: # We've seen it before, is it a fake head?
644 644 # So it is, real heads should not be the ancestors of
645 645 # any other heads.
646 646 heads.pop(n)
647 647 if not ancestors:
648 648 return nonodes
649 649 # Now that we have our set of ancestors, we want to remove any
650 650 # roots that are not ancestors.
651 651
652 652 # If one of the roots was nullid, everything is included anyway.
653 653 if lowestrev > nullrev:
654 654 # But, since we weren't, let's recompute the lowest rev to not
655 655 # include roots that aren't ancestors.
656 656
657 657 # Filter out roots that aren't ancestors of heads
658 658 roots = [n for n in roots if n in ancestors]
659 659 # Recompute the lowest revision
660 660 if roots:
661 661 lowestrev = min([self.rev(n) for n in roots])
662 662 else:
663 663 # No more roots? Return empty list
664 664 return nonodes
665 665 else:
666 666 # We are descending from nullid, and don't need to care about
667 667 # any other roots.
668 668 lowestrev = nullrev
669 669 roots = [nullid]
670 670 # Transform our roots list into a 'set' (i.e. a dictionary where the
671 671 # values don't matter.
672 672 descendents = dict.fromkeys(roots, 1)
673 673 # Also, keep the original roots so we can filter out roots that aren't
674 674 # 'real' roots (i.e. are descended from other roots).
675 675 roots = descendents.copy()
676 676 # Our topologically sorted list of output nodes.
677 677 orderedout = []
678 678 # Don't start at nullid since we don't want nullid in our output list,
679 679 # and if nullid shows up in descedents, empty parents will look like
680 680 # they're descendents.
681 681 for r in xrange(max(lowestrev, 0), highestrev + 1):
682 682 n = self.node(r)
683 683 isdescendent = False
684 684 if lowestrev == nullrev: # Everybody is a descendent of nullid
685 685 isdescendent = True
686 686 elif n in descendents:
687 687 # n is already a descendent
688 688 isdescendent = True
689 689 # This check only needs to be done here because all the roots
690 690 # will start being marked is descendents before the loop.
691 691 if n in roots:
692 692 # If n was a root, check if it's a 'real' root.
693 693 p = tuple(self.parents(n))
694 694 # If any of its parents are descendents, it's not a root.
695 695 if (p[0] in descendents) or (p[1] in descendents):
696 696 roots.pop(n)
697 697 else:
698 698 p = tuple(self.parents(n))
699 699 # A node is a descendent if either of its parents are
700 700 # descendents. (We seeded the dependents list with the roots
701 701 # up there, remember?)
702 702 if (p[0] in descendents) or (p[1] in descendents):
703 703 descendents[n] = 1
704 704 isdescendent = True
705 705 if isdescendent and ((ancestors is None) or (n in ancestors)):
706 706 # Only include nodes that are both descendents and ancestors.
707 707 orderedout.append(n)
708 708 if (ancestors is not None) and (n in heads):
709 709 # We're trying to figure out which heads are reachable
710 710 # from roots.
711 711 # Mark this head as having been reached
712 712 heads[n] = 1
713 713 elif ancestors is None:
714 714 # Otherwise, we're trying to discover the heads.
715 715 # Assume this is a head because if it isn't, the next step
716 716 # will eventually remove it.
717 717 heads[n] = 1
718 718 # But, obviously its parents aren't.
719 719 for p in self.parents(n):
720 720 heads.pop(p, None)
721 721 heads = [n for n in heads.iterkeys() if heads[n] != 0]
722 722 roots = roots.keys()
723 723 assert orderedout
724 724 assert roots
725 725 assert heads
726 726 return (orderedout, roots, heads)
727 727
728 728 def heads(self, start=None, stop=None):
729 729 """return the list of all nodes that have no children
730 730
731 731 if start is specified, only heads that are descendants of
732 732 start will be returned
733 733 if stop is specified, it will consider all the revs from stop
734 734 as if they had no children
735 735 """
736 736 if start is None and stop is None:
737 737 count = self.count()
738 738 if not count:
739 739 return [nullid]
740 740 ishead = [1] * (count + 1)
741 741 index = self.index
742 742 for r in xrange(count):
743 743 e = index[r]
744 744 ishead[e[5]] = ishead[e[6]] = 0
745 745 return [self.node(r) for r in xrange(count) if ishead[r]]
746 746
747 747 if start is None:
748 748 start = nullid
749 749 if stop is None:
750 750 stop = []
751 751 stoprevs = dict.fromkeys([self.rev(n) for n in stop])
752 752 startrev = self.rev(start)
753 753 reachable = {startrev: 1}
754 754 heads = {startrev: 1}
755 755
756 756 parentrevs = self.parentrevs
757 757 for r in xrange(startrev + 1, self.count()):
758 758 for p in parentrevs(r):
759 759 if p in reachable:
760 760 if r not in stoprevs:
761 761 reachable[r] = 1
762 762 heads[r] = 1
763 763 if p in heads and p not in stoprevs:
764 764 del heads[p]
765 765
766 766 return [self.node(r) for r in heads]
767 767
768 768 def children(self, node):
769 769 """find the children of a given node"""
770 770 c = []
771 771 p = self.rev(node)
772 772 for r in range(p + 1, self.count()):
773 773 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
774 774 if prevs:
775 775 for pr in prevs:
776 776 if pr == p:
777 777 c.append(self.node(r))
778 778 elif p == nullrev:
779 779 c.append(self.node(r))
780 780 return c
781 781
782 782 def _match(self, id):
783 783 if isinstance(id, (long, int)):
784 784 # rev
785 785 return self.node(id)
786 786 if len(id) == 20:
787 787 # possibly a binary node
788 788 # odds of a binary node being all hex in ASCII are 1 in 10**25
789 789 try:
790 790 node = id
791 791 r = self.rev(node) # quick search the index
792 792 return node
793 793 except LookupError:
794 794 pass # may be partial hex id
795 795 try:
796 796 # str(rev)
797 797 rev = int(id)
798 798 if str(rev) != id:
799 799 raise ValueError
800 800 if rev < 0:
801 801 rev = self.count() + rev
802 802 if rev < 0 or rev >= self.count():
803 803 raise ValueError
804 804 return self.node(rev)
805 805 except (ValueError, OverflowError):
806 806 pass
807 807 if len(id) == 40:
808 808 try:
809 809 # a full hex nodeid?
810 810 node = bin(id)
811 811 r = self.rev(node)
812 812 return node
813 813 except TypeError:
814 814 pass
815 815
816 816 def _partialmatch(self, id):
817 817 if len(id) < 40:
818 818 try:
819 819 # hex(node)[:...]
820 820 bin_id = bin(id[:len(id) & ~1]) # grab an even number of digits
821 821 node = None
822 822 for n in self.nodemap:
823 823 if n.startswith(bin_id) and hex(n).startswith(id):
824 824 if node is not None:
825 825 raise LookupError(_("Ambiguous identifier"))
826 826 node = n
827 827 if node is not None:
828 828 return node
829 829 except TypeError:
830 830 pass
831 831
832 832 def lookup(self, id):
833 833 """locate a node based on:
834 834 - revision number or str(revision number)
835 835 - nodeid or subset of hex nodeid
836 836 """
837 837 n = self._match(id)
838 838 if n is not None:
839 839 return n
840 840 n = self._partialmatch(id)
841 841 if n:
842 842 return n
843 843
844 844 raise LookupError(_("No match found"))
845 845
846 846 def cmp(self, node, text):
847 847 """compare text with a given file revision"""
848 848 p1, p2 = self.parents(node)
849 849 return hash(text, p1, p2) != node
850 850
851 851 def chunk(self, rev, df=None):
852 852 def loadcache(df):
853 853 if not df:
854 854 if self._inline:
855 855 df = self.opener(self.indexfile)
856 856 else:
857 857 df = self.opener(self.datafile)
858 858 df.seek(start)
859 859 self._chunkcache = (start, df.read(cache_length))
860 860
861 861 start, length = self.start(rev), self.length(rev)
862 862 if self._inline:
863 863 start += (rev + 1) * self._io.size
864 864 end = start + length
865 865
866 866 offset = 0
867 867 if not self._chunkcache:
868 868 cache_length = max(65536, length)
869 869 loadcache(df)
870 870 else:
871 871 cache_start = self._chunkcache[0]
872 872 cache_length = len(self._chunkcache[1])
873 873 cache_end = cache_start + cache_length
874 874 if start >= cache_start and end <= cache_end:
875 875 # it is cached
876 876 offset = start - cache_start
877 877 else:
878 878 cache_length = max(65536, length)
879 879 loadcache(df)
880 880
881 881 # avoid copying large chunks
882 882 c = self._chunkcache[1]
883 883 if cache_length != length:
884 884 c = c[offset:offset + length]
885 885
886 886 return decompress(c)
887 887
888 888 def delta(self, node):
889 889 """return or calculate a delta between a node and its predecessor"""
890 890 r = self.rev(node)
891 891 return self.revdiff(r - 1, r)
892 892
893 893 def revdiff(self, rev1, rev2):
894 894 """return or calculate a delta between two revisions"""
895 895 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
896 896 return self.chunk(rev2)
897 897
898 898 return mdiff.textdiff(self.revision(self.node(rev1)),
899 899 self.revision(self.node(rev2)))
900 900
901 901 def revision(self, node):
902 902 """return an uncompressed revision of a given"""
903 903 if node == nullid:
904 904 return ""
905 905 if self._cache and self._cache[0] == node:
906 906 return self._cache[2]
907 907
908 908 # look up what we need to read
909 909 text = None
910 910 rev = self.rev(node)
911 911 base = self.base(rev)
912 912
913 913 # check rev flags
914 914 if self.index[rev][0] & 0xFFFF:
915 915 raise RevlogError(_('incompatible revision flag %x') %
916 916 (self.index[rev][0] & 0xFFFF))
917 917
918 918 if self._inline:
919 919 # we probably have the whole chunk cached
920 920 df = None
921 921 else:
922 922 df = self.opener(self.datafile)
923 923
924 924 # do we have useful data cached?
925 925 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
926 926 base = self._cache[1]
927 927 text = self._cache[2]
928 928 self._loadindex(base, rev + 1)
929 929 else:
930 930 self._loadindex(base, rev + 1)
931 931 text = self.chunk(base, df=df)
932 932
933 933 bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]
934 934 text = mdiff.patches(text, bins)
935 935 p1, p2 = self.parents(node)
936 936 if node != hash(text, p1, p2):
937 937 raise RevlogError(_("integrity check failed on %s:%d")
938 938 % (self.datafile, rev))
939 939
940 940 self._cache = (node, rev, text)
941 941 return text
942 942
943 943 def checkinlinesize(self, tr, fp=None):
944 944 if not self._inline:
945 945 return
946 946 if not fp:
947 947 fp = self.opener(self.indexfile, 'r')
948 948 fp.seek(0, 2)
949 949 size = fp.tell()
950 950 if size < 131072:
951 951 return
952 952 trinfo = tr.find(self.indexfile)
953 953 if trinfo == None:
954 954 raise RevlogError(_("%s not found in the transaction")
955 955 % self.indexfile)
956 956
957 957 trindex = trinfo[2]
958 958 dataoff = self.start(trindex)
959 959
960 960 tr.add(self.datafile, dataoff)
961 961 df = self.opener(self.datafile, 'w')
962 962 calc = self._io.size
963 963 for r in xrange(self.count()):
964 964 start = self.start(r) + (r + 1) * calc
965 965 length = self.length(r)
966 966 fp.seek(start)
967 967 d = fp.read(length)
968 968 df.write(d)
969 969 fp.close()
970 970 df.close()
971 971 fp = self.opener(self.indexfile, 'w', atomictemp=True)
972 972 self.version &= ~(REVLOGNGINLINEDATA)
973 973 self._inline = False
974 974 for i in xrange(self.count()):
975 975 e = self._io.packentry(self.index[i], self.node, self.version, i)
976 976 fp.write(e)
977 977
978 978 # if we don't call rename, the temp file will never replace the
979 979 # real index
980 980 fp.rename()
981 981
982 982 tr.replace(self.indexfile, trindex * calc)
983 983 self._chunkcache = None
984 984
985 985 def addrevision(self, text, transaction, link, p1, p2, d=None):
986 986 """add a revision to the log
987 987
988 988 text - the revision data to add
989 989 transaction - the transaction object used for rollback
990 990 link - the linkrev data to add
991 991 p1, p2 - the parent nodeids of the revision
992 992 d - an optional precomputed delta
993 993 """
994 994 dfh = None
995 995 if not self._inline:
996 996 dfh = self.opener(self.datafile, "a")
997 997 ifh = self.opener(self.indexfile, "a+")
998 998 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
999 999
1000 1000 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1001 1001 node = hash(text, p1, p2)
1002 1002 if node in self.nodemap:
1003 1003 return node
1004 1004
1005 1005 curr = self.count()
1006 1006 prev = curr - 1
1007 1007 base = self.base(prev)
1008 1008 offset = self.end(prev)
1009 1009
1010 1010 if curr:
1011 1011 if not d:
1012 1012 ptext = self.revision(self.node(prev))
1013 1013 d = mdiff.textdiff(ptext, text)
1014 1014 data = compress(d)
1015 1015 l = len(data[1]) + len(data[0])
1016 1016 dist = l + offset - self.start(base)
1017 1017
1018 1018 # full versions are inserted when the needed deltas
1019 1019 # become comparable to the uncompressed text
1020 1020 if not curr or dist > len(text) * 2:
1021 1021 data = compress(text)
1022 1022 l = len(data[1]) + len(data[0])
1023 1023 base = curr
1024 1024
1025 1025 e = (offset_type(offset, 0), l, len(text),
1026 1026 base, link, self.rev(p1), self.rev(p2), node)
1027 1027 self.index.insert(-1, e)
1028 1028 self.nodemap[node] = curr
1029 1029
1030 1030 entry = self._io.packentry(e, self.node, self.version, curr)
1031 1031 if not self._inline:
1032 1032 transaction.add(self.datafile, offset)
1033 1033 transaction.add(self.indexfile, curr * len(entry))
1034 1034 if data[0]:
1035 1035 dfh.write(data[0])
1036 1036 dfh.write(data[1])
1037 1037 dfh.flush()
1038 1038 ifh.write(entry)
1039 1039 else:
1040 1040 offset += curr * self._io.size
1041 1041 transaction.add(self.indexfile, offset, curr)
1042 1042 ifh.write(entry)
1043 1043 ifh.write(data[0])
1044 1044 ifh.write(data[1])
1045 1045 self.checkinlinesize(transaction, ifh)
1046 1046
1047 1047 self._cache = (node, curr, text)
1048 1048 return node
1049 1049
1050 1050 def ancestor(self, a, b):
1051 1051 """calculate the least common ancestor of nodes a and b"""
1052 1052
1053 1053 def parents(rev):
1054 1054 return [p for p in self.parentrevs(rev) if p != nullrev]
1055 1055
1056 1056 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1057 1057 if c is None:
1058 1058 return nullid
1059 1059
1060 1060 return self.node(c)
1061 1061
1062 1062 def group(self, nodelist, lookup, infocollect=None):
1063 1063 """calculate a delta group
1064 1064
1065 1065 Given a list of changeset revs, return a set of deltas and
1066 1066 metadata corresponding to nodes. the first delta is
1067 1067 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1068 1068 have this parent as it has all history before these
1069 1069 changesets. parent is parent[0]
1070 1070 """
1071 1071 revs = [self.rev(n) for n in nodelist]
1072 1072
1073 1073 # if we don't have any revisions touched by these changesets, bail
1074 1074 if not revs:
1075 1075 yield changegroup.closechunk()
1076 1076 return
1077 1077
1078 1078 # add the parent of the first rev
1079 1079 p = self.parents(self.node(revs[0]))[0]
1080 1080 revs.insert(0, self.rev(p))
1081 1081
1082 1082 # build deltas
1083 1083 for d in xrange(0, len(revs) - 1):
1084 1084 a, b = revs[d], revs[d + 1]
1085 1085 nb = self.node(b)
1086 1086
1087 1087 if infocollect is not None:
1088 1088 infocollect(nb)
1089 1089
1090 1090 p = self.parents(nb)
1091 1091 meta = nb + p[0] + p[1] + lookup(nb)
1092 1092 if a == -1:
1093 1093 d = self.revision(nb)
1094 1094 meta += mdiff.trivialdiffheader(len(d))
1095 1095 else:
1096 1096 d = self.revdiff(a, b)
1097 1097 yield changegroup.chunkheader(len(meta) + len(d))
1098 1098 yield meta
1099 1099 yield d
1100 1100
1101 1101 yield changegroup.closechunk()
1102 1102
1103 1103 def addgroup(self, revs, linkmapper, transaction, unique=0):
1104 1104 """
1105 1105 add a delta group
1106 1106
1107 1107 given a set of deltas, add them to the revision log. the
1108 1108 first delta is against its parent, which should be in our
1109 1109 log, the rest are against the previous delta.
1110 1110 """
1111 1111
1112 1112 #track the base of the current delta log
1113 1113 r = self.count()
1114 1114 t = r - 1
1115 1115 node = None
1116 1116
1117 1117 base = prev = nullrev
1118 1118 start = end = textlen = 0
1119 1119 if r:
1120 1120 end = self.end(t)
1121 1121
1122 1122 ifh = self.opener(self.indexfile, "a+")
1123 1123 isize = r * self._io.size
1124 1124 if self._inline:
1125 1125 transaction.add(self.indexfile, end + isize, r)
1126 1126 dfh = None
1127 1127 else:
1128 1128 transaction.add(self.indexfile, isize, r)
1129 1129 transaction.add(self.datafile, end)
1130 1130 dfh = self.opener(self.datafile, "a")
1131 1131
1132 1132 # loop through our set of deltas
1133 1133 chain = None
1134 1134 for chunk in revs:
1135 1135 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1136 1136 link = linkmapper(cs)
1137 1137 if node in self.nodemap:
1138 1138 # this can happen if two branches make the same change
1139 1139 # if unique:
1140 1140 # raise RevlogError(_("already have %s") % hex(node[:4]))
1141 1141 chain = node
1142 1142 continue
1143 delta = chunk[80:]
1143 delta = buffer(chunk, 80)
1144 del chunk
1144 1145
1145 1146 for p in (p1, p2):
1146 1147 if not p in self.nodemap:
1147 1148 raise LookupError(_("unknown parent %s") % short(p))
1148 1149
1149 1150 if not chain:
1150 1151 # retrieve the parent revision of the delta chain
1151 1152 chain = p1
1152 1153 if not chain in self.nodemap:
1153 1154 raise LookupError(_("unknown base %s") % short(chain[:4]))
1154 1155
1155 1156 # full versions are inserted when the needed deltas become
1156 1157 # comparable to the uncompressed text or when the previous
1157 1158 # version is not the one we have a delta against. We use
1158 1159 # the size of the previous full rev as a proxy for the
1159 1160 # current size.
1160 1161
1161 1162 if chain == prev:
1162 tempd = compress(delta)
1163 cdelta = tempd[0] + tempd[1]
1163 cdelta = compress(delta)
1164 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1164 1165 textlen = mdiff.patchedsize(textlen, delta)
1165 1166
1166 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
1167 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1167 1168 # flush our writes here so we can read it in revision
1168 1169 if dfh:
1169 1170 dfh.flush()
1170 1171 ifh.flush()
1171 1172 text = self.revision(chain)
1172 text = mdiff.patches(text, [delta])
1173 if len(text) == 0:
1174 # skip over trivial delta header
1175 text = buffer(delta, 12)
1176 else:
1177 text = mdiff.patches(text, [delta])
1178 del delta
1173 1179 chk = self._addrevision(text, transaction, link, p1, p2, None,
1174 1180 ifh, dfh)
1175 1181 if not dfh and not self._inline:
1176 1182 # addrevision switched from inline to conventional
1177 1183 # reopen the index
1178 1184 dfh = self.opener(self.datafile, "a")
1179 1185 ifh = self.opener(self.indexfile, "a")
1180 1186 if chk != node:
1181 1187 raise RevlogError(_("consistency error adding group"))
1182 1188 textlen = len(text)
1183 1189 else:
1184 e = (offset_type(end, 0), len(cdelta), textlen, base,
1190 e = (offset_type(end, 0), cdeltalen, textlen, base,
1185 1191 link, self.rev(p1), self.rev(p2), node)
1186 1192 self.index.insert(-1, e)
1187 1193 self.nodemap[node] = r
1188 1194 entry = self._io.packentry(e, self.node, self.version, r)
1189 1195 if self._inline:
1190 1196 ifh.write(entry)
1191 ifh.write(cdelta)
1197 ifh.write(cdelta[0])
1198 ifh.write(cdelta[1])
1192 1199 self.checkinlinesize(transaction, ifh)
1193 1200 if not self._inline:
1194 1201 dfh = self.opener(self.datafile, "a")
1195 1202 ifh = self.opener(self.indexfile, "a")
1196 1203 else:
1197 dfh.write(cdelta)
1204 dfh.write(cdelta[0])
1205 dfh.write(cdelta[1])
1198 1206 ifh.write(entry)
1199 1207
1200 1208 t, r, chain, prev = r, r + 1, node, node
1201 1209 base = self.base(t)
1202 1210 start = self.start(base)
1203 1211 end = self.end(t)
1204 1212
1205 1213 return node
1206 1214
1207 1215 def strip(self, rev, minlink):
1208 1216 if self.count() == 0 or rev >= self.count():
1209 1217 return
1210 1218
1211 1219 if isinstance(self.index, lazyindex):
1212 1220 self._loadindexmap()
1213 1221
1214 1222 # When stripping away a revision, we need to make sure it
1215 1223 # does not actually belong to an older changeset.
1216 1224 # The minlink parameter defines the oldest revision
1217 1225 # we're allowed to strip away.
1218 1226 while minlink > self.index[rev][4]:
1219 1227 rev += 1
1220 1228 if rev >= self.count():
1221 1229 return
1222 1230
1223 1231 # first truncate the files on disk
1224 1232 end = self.start(rev)
1225 1233 if not self._inline:
1226 1234 df = self.opener(self.datafile, "a")
1227 1235 df.truncate(end)
1228 1236 end = rev * self._io.size
1229 1237 else:
1230 1238 end += rev * self._io.size
1231 1239
1232 1240 indexf = self.opener(self.indexfile, "a")
1233 1241 indexf.truncate(end)
1234 1242
1235 1243 # then reset internal state in memory to forget those revisions
1236 1244 self._cache = None
1237 1245 self._chunkcache = None
1238 1246 for x in xrange(rev, self.count()):
1239 1247 del self.nodemap[self.node(x)]
1240 1248
1241 1249 del self.index[rev:-1]
1242 1250
1243 1251 def checksize(self):
1244 1252 expected = 0
1245 1253 if self.count():
1246 1254 expected = max(0, self.end(self.count() - 1))
1247 1255
1248 1256 try:
1249 1257 f = self.opener(self.datafile)
1250 1258 f.seek(0, 2)
1251 1259 actual = f.tell()
1252 1260 dd = actual - expected
1253 1261 except IOError, inst:
1254 1262 if inst.errno != errno.ENOENT:
1255 1263 raise
1256 1264 dd = 0
1257 1265
1258 1266 try:
1259 1267 f = self.opener(self.indexfile)
1260 1268 f.seek(0, 2)
1261 1269 actual = f.tell()
1262 1270 s = self._io.size
1263 1271 i = max(0, actual / s)
1264 1272 di = actual - (i * s)
1265 1273 if self._inline:
1266 1274 databytes = 0
1267 1275 for r in xrange(self.count()):
1268 1276 databytes += max(0, self.length(r))
1269 1277 dd = 0
1270 1278 di = actual - self.count() * s - databytes
1271 1279 except IOError, inst:
1272 1280 if inst.errno != errno.ENOENT:
1273 1281 raise
1274 1282 di = 0
1275 1283
1276 1284 return (dd, di)
General Comments 0
You need to be logged in to leave comments. Login now