##// END OF EJS Templates
lazyparser: up the blocksize from 512 bytes to 64k
Matt Mackall -
r4992:0a676643 default
parent child Browse files
Show More
@@ -1,1263 +1,1263 b''
1 1 """
2 2 revlog.py - storage back-end for mercurial
3 3
4 4 This provides efficient delta storage with O(1) retrieve and append
5 5 and O(changes) merge between branches
6 6
7 7 Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
8 8
9 9 This software may be used and distributed according to the terms
10 10 of the GNU General Public License, incorporated herein by reference.
11 11 """
12 12
13 13 from node import *
14 14 from i18n import _
15 15 import binascii, changegroup, errno, ancestor, mdiff, os
16 16 import sha, struct, util, zlib
17 17
18 18 # revlog flags
19 19 REVLOGV0 = 0
20 20 REVLOGNG = 1
21 21 REVLOGNGINLINEDATA = (1 << 16)
22 22 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
23 23 REVLOG_DEFAULT_FORMAT = REVLOGNG
24 24 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
25 25
26 26 class RevlogError(Exception):
27 27 pass
28 28 class LookupError(RevlogError):
29 29 pass
30 30
31 31 def getoffset(q):
32 32 if q & 0xFFFF:
33 33 raise RevlogError(_('incompatible revision flag %x') % q)
34 34 return int(q >> 16)
35 35
36 36 def gettype(q):
37 37 return int(q & 0xFFFF)
38 38
39 39 def offset_type(offset, type):
40 40 return long(long(offset) << 16 | type)
41 41
42 42 def hash(text, p1, p2):
43 43 """generate a hash from the given text and its parent hashes
44 44
45 45 This hash combines both the current file contents and its history
46 46 in a manner that makes it easy to distinguish nodes with the same
47 47 content in the revision graph.
48 48 """
49 49 l = [p1, p2]
50 50 l.sort()
51 51 s = sha.new(l[0])
52 52 s.update(l[1])
53 53 s.update(text)
54 54 return s.digest()
55 55
56 56 def compress(text):
57 57 """ generate a possibly-compressed representation of text """
58 58 if not text:
59 59 return ("", text)
60 60 if len(text) < 44:
61 61 if text[0] == '\0':
62 62 return ("", text)
63 63 return ('u', text)
64 64 bin = zlib.compress(text)
65 65 if len(bin) > len(text):
66 66 if text[0] == '\0':
67 67 return ("", text)
68 68 return ('u', text)
69 69 return ("", bin)
70 70
71 71 def decompress(bin):
72 72 """ decompress the given input """
73 73 if not bin:
74 74 return bin
75 75 t = bin[0]
76 76 if t == '\0':
77 77 return bin
78 78 if t == 'x':
79 79 return zlib.decompress(bin)
80 80 if t == 'u':
81 81 return bin[1:]
82 82 raise RevlogError(_("unknown compression type %r") % t)
83 83
84 84 class lazyparser(object):
85 85 """
86 86 this class avoids the need to parse the entirety of large indices
87 87 """
88 88
89 89 # lazyparser is not safe to use on windows if win32 extensions not
90 90 # available. it keeps file handle open, which make it not possible
91 91 # to break hardlinks on local cloned repos.
92 92 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
93 93 hasattr(util, 'win32api'))
94 94
95 95 def __init__(self, dataf, size):
96 96 self.dataf = dataf
97 97 self.s = struct.calcsize(indexformatng)
98 98 self.datasize = size
99 99 self.l = size/self.s
100 100 self.index = [None] * self.l
101 101 self.map = {nullid: nullrev}
102 102 self.allmap = 0
103 103 self.all = 0
104 104 self.mapfind_count = 0
105 105
106 106 def loadmap(self):
107 107 """
108 108 during a commit, we need to make sure the rev being added is
109 109 not a duplicate. This requires loading the entire index,
110 110 which is fairly slow. loadmap can load up just the node map,
111 111 which takes much less time.
112 112 """
113 113 if self.allmap:
114 114 return
115 115 end = self.datasize
116 116 self.allmap = 1
117 117 cur = 0
118 118 count = 0
119 119 blocksize = self.s * 256
120 120 self.dataf.seek(0)
121 121 while cur < end:
122 122 data = self.dataf.read(blocksize)
123 123 off = 0
124 124 for x in xrange(256):
125 125 n = data[off + ngshaoffset:off + ngshaoffset + 20]
126 126 self.map[n] = count
127 127 count += 1
128 128 if count >= self.l:
129 129 break
130 130 off += self.s
131 131 cur += blocksize
132 132
133 133 def loadblock(self, blockstart, blocksize, data=None):
134 134 if self.all:
135 135 return
136 136 if data is None:
137 137 self.dataf.seek(blockstart)
138 138 if blockstart + blocksize > self.datasize:
139 139 # the revlog may have grown since we've started running,
140 140 # but we don't have space in self.index for more entries.
141 141 # limit blocksize so that we don't get too much data.
142 142 blocksize = max(self.datasize - blockstart, 0)
143 143 data = self.dataf.read(blocksize)
144 144 lend = len(data) / self.s
145 145 i = blockstart / self.s
146 146 off = 0
147 147 # lazyindex supports __delitem__
148 148 if lend > len(self.index) - i:
149 149 lend = len(self.index) - i
150 150 for x in xrange(lend):
151 151 if self.index[i + x] == None:
152 152 b = data[off : off + self.s]
153 153 self.index[i + x] = b
154 154 n = b[ngshaoffset:ngshaoffset + 20]
155 155 self.map[n] = i + x
156 156 off += self.s
157 157
158 158 def findnode(self, node):
159 159 """search backwards through the index file for a specific node"""
160 160 if self.allmap:
161 161 return None
162 162
163 163 # hg log will cause many many searches for the manifest
164 164 # nodes. After we get called a few times, just load the whole
165 165 # thing.
166 166 if self.mapfind_count > 8:
167 167 self.loadmap()
168 168 if node in self.map:
169 169 return node
170 170 return None
171 171 self.mapfind_count += 1
172 172 last = self.l - 1
173 173 while self.index[last] != None:
174 174 if last == 0:
175 175 self.all = 1
176 176 self.allmap = 1
177 177 return None
178 178 last -= 1
179 179 end = (last + 1) * self.s
180 180 blocksize = self.s * 256
181 181 while end >= 0:
182 182 start = max(end - blocksize, 0)
183 183 self.dataf.seek(start)
184 184 data = self.dataf.read(end - start)
185 185 findend = end - start
186 186 while True:
187 187 # we're searching backwards, so weh have to make sure
188 188 # we don't find a changeset where this node is a parent
189 189 off = data.rfind(node, 0, findend)
190 190 findend = off
191 191 if off >= 0:
192 192 i = off / self.s
193 193 off = i * self.s
194 194 n = data[off + ngshaoffset:off + ngshaoffset + 20]
195 195 if n == node:
196 196 self.map[n] = i + start / self.s
197 197 return node
198 198 else:
199 199 break
200 200 end -= blocksize
201 201 return None
202 202
203 203 def loadindex(self, i=None, end=None):
204 204 if self.all:
205 205 return
206 206 all = False
207 207 if i == None:
208 208 blockstart = 0
209 blocksize = (512 / self.s) * self.s
209 blocksize = (65536 / self.s) * self.s
210 210 end = self.datasize
211 211 all = True
212 212 else:
213 213 if end:
214 214 blockstart = i * self.s
215 215 end = end * self.s
216 216 blocksize = end - blockstart
217 217 else:
218 blockstart = (i & ~63) * self.s
219 blocksize = self.s * 64
218 blockstart = (i & ~1023) * self.s
219 blocksize = self.s * 1024
220 220 end = blockstart + blocksize
221 221 while blockstart < end:
222 222 self.loadblock(blockstart, blocksize)
223 223 blockstart += blocksize
224 224 if all:
225 225 self.all = True
226 226
227 227 class lazyindex(object):
228 228 """a lazy version of the index array"""
229 229 def __init__(self, parser):
230 230 self.p = parser
231 231 def __len__(self):
232 232 return len(self.p.index)
233 233 def load(self, pos):
234 234 if pos < 0:
235 235 pos += len(self.p.index)
236 236 self.p.loadindex(pos)
237 237 return self.p.index[pos]
238 238 def __getitem__(self, pos):
239 239 ret = self.p.index[pos] or self.load(pos)
240 240 if isinstance(ret, str):
241 241 ret = struct.unpack(indexformatng, ret)
242 242 return ret
243 243 def __setitem__(self, pos, item):
244 244 self.p.index[pos] = item
245 245 def __delitem__(self, pos):
246 246 del self.p.index[pos]
247 247 def insert(self, pos, e):
248 248 self.p.index.insert(pos, e)
249 249 def append(self, e):
250 250 self.p.index.append(e)
251 251
252 252 class lazymap(object):
253 253 """a lazy version of the node map"""
254 254 def __init__(self, parser):
255 255 self.p = parser
256 256 def load(self, key):
257 257 n = self.p.findnode(key)
258 258 if n == None:
259 259 raise KeyError(key)
260 260 def __contains__(self, key):
261 261 if key in self.p.map:
262 262 return True
263 263 self.p.loadmap()
264 264 return key in self.p.map
265 265 def __iter__(self):
266 266 yield nullid
267 267 for i in xrange(self.p.l):
268 268 ret = self.p.index[i]
269 269 if not ret:
270 270 self.p.loadindex(i)
271 271 ret = self.p.index[i]
272 272 if isinstance(ret, str):
273 273 ret = struct.unpack(indexformatng, ret)
274 274 yield ret[7]
275 275 def __getitem__(self, key):
276 276 try:
277 277 return self.p.map[key]
278 278 except KeyError:
279 279 try:
280 280 self.load(key)
281 281 return self.p.map[key]
282 282 except KeyError:
283 283 raise KeyError("node " + hex(key))
284 284 def __setitem__(self, key, val):
285 285 self.p.map[key] = val
286 286 def __delitem__(self, key):
287 287 del self.p.map[key]
288 288
289 289 indexformatv0 = ">4l20s20s20s"
290 290 v0shaoffset = 56
291 291
292 292 class revlogoldio(object):
293 293 def __init__(self):
294 294 self.size = struct.calcsize(indexformatv0)
295 295
296 296 def parseindex(self, fp, inline):
297 297 s = self.size
298 298 index = []
299 299 nodemap = {nullid: nullrev}
300 300 n = off = 0
301 301 data = fp.read()
302 302 l = len(data)
303 303 while off + s <= l:
304 304 cur = data[off:off + s]
305 305 off += s
306 306 e = struct.unpack(indexformatv0, cur)
307 307 # transform to revlogv1 format
308 308 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
309 309 nodemap[e[4]], nodemap[e[5]], e[6])
310 310 index.append(e2)
311 311 nodemap[e[6]] = n
312 312 n += 1
313 313
314 314 return index, nodemap, None
315 315
316 316 def packentry(self, entry, node, version):
317 317 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
318 318 node(entry[5]), node(entry[6]), entry[7])
319 319 return struct.pack(indexformatv0, *e2)
320 320
321 321 # index ng:
322 322 # 6 bytes offset
323 323 # 2 bytes flags
324 324 # 4 bytes compressed length
325 325 # 4 bytes uncompressed length
326 326 # 4 bytes: base rev
327 327 # 4 bytes link rev
328 328 # 4 bytes parent 1 rev
329 329 # 4 bytes parent 2 rev
330 330 # 32 bytes: nodeid
331 331 indexformatng = ">Qiiiiii20s12x"
332 332 ngshaoffset = 32
333 333 versionformat = ">I"
334 334
335 335 class revlogio(object):
336 336 def __init__(self):
337 337 self.size = struct.calcsize(indexformatng)
338 338
339 339 def parseindex(self, fp, inline):
340 340 try:
341 341 size = util.fstat(fp).st_size
342 342 except AttributeError:
343 343 size = 0
344 344
345 345 if lazyparser.safe_to_use and not inline and size > 1000000:
346 346 # big index, let's parse it on demand
347 347 parser = lazyparser(fp, size)
348 348 index = lazyindex(parser)
349 349 nodemap = lazymap(parser)
350 350 e = list(index[0])
351 351 type = gettype(e[0])
352 352 e[0] = offset_type(0, type)
353 353 index[0] = e
354 354 return index, nodemap, None
355 355
356 356 s = self.size
357 357 cache = None
358 358 index = []
359 359 nodemap = {nullid: nullrev}
360 360 n = off = 0
361 361 # if we're not using lazymap, always read the whole index
362 362 data = fp.read()
363 363 l = len(data) - s
364 364 unpack = struct.unpack
365 365 append = index.append
366 366 if inline:
367 367 cache = (0, data)
368 368 while off <= l:
369 369 e = unpack(indexformatng, data[off:off + s])
370 370 nodemap[e[7]] = n
371 371 append(e)
372 372 n += 1
373 373 if e[1] < 0:
374 374 break
375 375 off += e[1] + s
376 376 else:
377 377 while off <= l:
378 378 e = unpack(indexformatng, data[off:off + s])
379 379 nodemap[e[7]] = n
380 380 append(e)
381 381 n += 1
382 382 off += s
383 383
384 384 e = list(index[0])
385 385 type = gettype(e[0])
386 386 e[0] = offset_type(0, type)
387 387 index[0] = e
388 388
389 389 return index, nodemap, cache
390 390
391 391 def packentry(self, entry, node, version):
392 392 p = struct.pack(indexformatng, *entry)
393 393 if not entry[3] and not getoffset(entry[0]) and entry[5] == nullrev:
394 394 p = struct.pack(versionformat, version) + p[4:]
395 395 return p
396 396
397 397 class revlog(object):
398 398 """
399 399 the underlying revision storage object
400 400
401 401 A revlog consists of two parts, an index and the revision data.
402 402
403 403 The index is a file with a fixed record size containing
404 404 information on each revision, includings its nodeid (hash), the
405 405 nodeids of its parents, the position and offset of its data within
406 406 the data file, and the revision it's based on. Finally, each entry
407 407 contains a linkrev entry that can serve as a pointer to external
408 408 data.
409 409
410 410 The revision data itself is a linear collection of data chunks.
411 411 Each chunk represents a revision and is usually represented as a
412 412 delta against the previous chunk. To bound lookup time, runs of
413 413 deltas are limited to about 2 times the length of the original
414 414 version data. This makes retrieval of a version proportional to
415 415 its size, or O(1) relative to the number of revisions.
416 416
417 417 Both pieces of the revlog are written to in an append-only
418 418 fashion, which means we never need to rewrite a file to insert or
419 419 remove data, and can use some simple techniques to avoid the need
420 420 for locking while reading.
421 421 """
422 422 def __init__(self, opener, indexfile):
423 423 """
424 424 create a revlog object
425 425
426 426 opener is a function that abstracts the file opening operation
427 427 and can be used to implement COW semantics or the like.
428 428 """
429 429 self.indexfile = indexfile
430 430 self.datafile = indexfile[:-2] + ".d"
431 431 self.opener = opener
432 432 self._cache = None
433 433 self._chunkcache = None
434 434 self.nodemap = {nullid: nullrev}
435 435 self.index = []
436 436
437 437 v = REVLOG_DEFAULT_VERSION
438 438 if hasattr(opener, "defversion"):
439 439 v = opener.defversion
440 440 if v & REVLOGNG:
441 441 v |= REVLOGNGINLINEDATA
442 442
443 443 i = ""
444 444 try:
445 445 f = self.opener(self.indexfile)
446 446 i = f.read(4)
447 447 f.seek(0)
448 448 if len(i) > 0:
449 449 v = struct.unpack(versionformat, i)[0]
450 450 except IOError, inst:
451 451 if inst.errno != errno.ENOENT:
452 452 raise
453 453
454 454 self.version = v
455 455 self._inline = v & REVLOGNGINLINEDATA
456 456 flags = v & ~0xFFFF
457 457 fmt = v & 0xFFFF
458 458 if fmt == REVLOGV0 and flags:
459 459 raise RevlogError(_("index %s unknown flags %#04x for format v0")
460 460 % (self.indexfile, flags >> 16))
461 461 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
462 462 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
463 463 % (self.indexfile, flags >> 16))
464 464 elif fmt > REVLOGNG:
465 465 raise RevlogError(_("index %s unknown format %d")
466 466 % (self.indexfile, fmt))
467 467
468 468 self._io = revlogio()
469 469 if self.version == REVLOGV0:
470 470 self._io = revlogoldio()
471 471 if i:
472 472 d = self._io.parseindex(f, self._inline)
473 473 self.index, self.nodemap, self._chunkcache = d
474 474
475 475 # add the magic null revision at -1
476 476 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
477 477
478 478 def _loadindex(self, start, end):
479 479 """load a block of indexes all at once from the lazy parser"""
480 480 if isinstance(self.index, lazyindex):
481 481 self.index.p.loadindex(start, end)
482 482
483 483 def _loadindexmap(self):
484 484 """loads both the map and the index from the lazy parser"""
485 485 if isinstance(self.index, lazyindex):
486 486 p = self.index.p
487 487 p.loadindex()
488 488 self.nodemap = p.map
489 489
490 490 def _loadmap(self):
491 491 """loads the map from the lazy parser"""
492 492 if isinstance(self.nodemap, lazymap):
493 493 self.nodemap.p.loadmap()
494 494 self.nodemap = self.nodemap.p.map
495 495
496 496 def tip(self):
497 497 return self.node(len(self.index) - 2)
498 498 def count(self):
499 499 return len(self.index) - 1
500 500
501 501 def rev(self, node):
502 502 try:
503 503 return self.nodemap[node]
504 504 except KeyError:
505 505 raise LookupError(_('%s: no node %s') % (self.indexfile, hex(node)))
506 506 def node(self, rev):
507 507 return self.index[rev][7]
508 508 def linkrev(self, node):
509 509 return self.index[self.rev(node)][4]
510 510 def parents(self, node):
511 511 d = self.index[self.rev(node)][5:7]
512 512 return (self.node(d[0]), self.node(d[1]))
513 513 def parentrevs(self, rev):
514 514 return self.index[rev][5:7]
515 515 def start(self, rev):
516 516 return getoffset(self.index[rev][0])
517 517 def end(self, rev):
518 518 return self.start(rev) + self.length(rev)
519 519 def length(self, rev):
520 520 return self.index[rev][1]
521 521 def base(self, rev):
522 522 return self.index[rev][3]
523 523
524 524 def size(self, rev):
525 525 """return the length of the uncompressed text for a given revision"""
526 526 l = self.index[rev][2]
527 527 if l >= 0:
528 528 return l
529 529
530 530 t = self.revision(self.node(rev))
531 531 return len(t)
532 532
533 533 # alternate implementation, The advantage to this code is it
534 534 # will be faster for a single revision. But, the results are not
535 535 # cached, so finding the size of every revision will be slower.
536 536 """
537 537 if self.cache and self.cache[1] == rev:
538 538 return len(self.cache[2])
539 539
540 540 base = self.base(rev)
541 541 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
542 542 base = self.cache[1]
543 543 text = self.cache[2]
544 544 else:
545 545 text = self.revision(self.node(base))
546 546
547 547 l = len(text)
548 548 for x in xrange(base + 1, rev + 1):
549 549 l = mdiff.patchedsize(l, self.chunk(x))
550 550 return l
551 551 """
552 552
553 553 def reachable(self, node, stop=None):
554 554 """return a hash of all nodes ancestral to a given node, including
555 555 the node itself, stopping when stop is matched"""
556 556 reachable = {}
557 557 visit = [node]
558 558 reachable[node] = 1
559 559 if stop:
560 560 stopn = self.rev(stop)
561 561 else:
562 562 stopn = 0
563 563 while visit:
564 564 n = visit.pop(0)
565 565 if n == stop:
566 566 continue
567 567 if n == nullid:
568 568 continue
569 569 for p in self.parents(n):
570 570 if self.rev(p) < stopn:
571 571 continue
572 572 if p not in reachable:
573 573 reachable[p] = 1
574 574 visit.append(p)
575 575 return reachable
576 576
577 577 def nodesbetween(self, roots=None, heads=None):
578 578 """Return a tuple containing three elements. Elements 1 and 2 contain
579 579 a final list bases and heads after all the unreachable ones have been
580 580 pruned. Element 0 contains a topologically sorted list of all
581 581
582 582 nodes that satisfy these constraints:
583 583 1. All nodes must be descended from a node in roots (the nodes on
584 584 roots are considered descended from themselves).
585 585 2. All nodes must also be ancestors of a node in heads (the nodes in
586 586 heads are considered to be their own ancestors).
587 587
588 588 If roots is unspecified, nullid is assumed as the only root.
589 589 If heads is unspecified, it is taken to be the output of the
590 590 heads method (i.e. a list of all nodes in the repository that
591 591 have no children)."""
592 592 nonodes = ([], [], [])
593 593 if roots is not None:
594 594 roots = list(roots)
595 595 if not roots:
596 596 return nonodes
597 597 lowestrev = min([self.rev(n) for n in roots])
598 598 else:
599 599 roots = [nullid] # Everybody's a descendent of nullid
600 600 lowestrev = nullrev
601 601 if (lowestrev == nullrev) and (heads is None):
602 602 # We want _all_ the nodes!
603 603 return ([self.node(r) for r in xrange(0, self.count())],
604 604 [nullid], list(self.heads()))
605 605 if heads is None:
606 606 # All nodes are ancestors, so the latest ancestor is the last
607 607 # node.
608 608 highestrev = self.count() - 1
609 609 # Set ancestors to None to signal that every node is an ancestor.
610 610 ancestors = None
611 611 # Set heads to an empty dictionary for later discovery of heads
612 612 heads = {}
613 613 else:
614 614 heads = list(heads)
615 615 if not heads:
616 616 return nonodes
617 617 ancestors = {}
618 618 # Turn heads into a dictionary so we can remove 'fake' heads.
619 619 # Also, later we will be using it to filter out the heads we can't
620 620 # find from roots.
621 621 heads = dict.fromkeys(heads, 0)
622 622 # Start at the top and keep marking parents until we're done.
623 623 nodestotag = heads.keys()
624 624 # Remember where the top was so we can use it as a limit later.
625 625 highestrev = max([self.rev(n) for n in nodestotag])
626 626 while nodestotag:
627 627 # grab a node to tag
628 628 n = nodestotag.pop()
629 629 # Never tag nullid
630 630 if n == nullid:
631 631 continue
632 632 # A node's revision number represents its place in a
633 633 # topologically sorted list of nodes.
634 634 r = self.rev(n)
635 635 if r >= lowestrev:
636 636 if n not in ancestors:
637 637 # If we are possibly a descendent of one of the roots
638 638 # and we haven't already been marked as an ancestor
639 639 ancestors[n] = 1 # Mark as ancestor
640 640 # Add non-nullid parents to list of nodes to tag.
641 641 nodestotag.extend([p for p in self.parents(n) if
642 642 p != nullid])
643 643 elif n in heads: # We've seen it before, is it a fake head?
644 644 # So it is, real heads should not be the ancestors of
645 645 # any other heads.
646 646 heads.pop(n)
647 647 if not ancestors:
648 648 return nonodes
649 649 # Now that we have our set of ancestors, we want to remove any
650 650 # roots that are not ancestors.
651 651
652 652 # If one of the roots was nullid, everything is included anyway.
653 653 if lowestrev > nullrev:
654 654 # But, since we weren't, let's recompute the lowest rev to not
655 655 # include roots that aren't ancestors.
656 656
657 657 # Filter out roots that aren't ancestors of heads
658 658 roots = [n for n in roots if n in ancestors]
659 659 # Recompute the lowest revision
660 660 if roots:
661 661 lowestrev = min([self.rev(n) for n in roots])
662 662 else:
663 663 # No more roots? Return empty list
664 664 return nonodes
665 665 else:
666 666 # We are descending from nullid, and don't need to care about
667 667 # any other roots.
668 668 lowestrev = nullrev
669 669 roots = [nullid]
670 670 # Transform our roots list into a 'set' (i.e. a dictionary where the
671 671 # values don't matter.
672 672 descendents = dict.fromkeys(roots, 1)
673 673 # Also, keep the original roots so we can filter out roots that aren't
674 674 # 'real' roots (i.e. are descended from other roots).
675 675 roots = descendents.copy()
676 676 # Our topologically sorted list of output nodes.
677 677 orderedout = []
678 678 # Don't start at nullid since we don't want nullid in our output list,
679 679 # and if nullid shows up in descedents, empty parents will look like
680 680 # they're descendents.
681 681 for r in xrange(max(lowestrev, 0), highestrev + 1):
682 682 n = self.node(r)
683 683 isdescendent = False
684 684 if lowestrev == nullrev: # Everybody is a descendent of nullid
685 685 isdescendent = True
686 686 elif n in descendents:
687 687 # n is already a descendent
688 688 isdescendent = True
689 689 # This check only needs to be done here because all the roots
690 690 # will start being marked is descendents before the loop.
691 691 if n in roots:
692 692 # If n was a root, check if it's a 'real' root.
693 693 p = tuple(self.parents(n))
694 694 # If any of its parents are descendents, it's not a root.
695 695 if (p[0] in descendents) or (p[1] in descendents):
696 696 roots.pop(n)
697 697 else:
698 698 p = tuple(self.parents(n))
699 699 # A node is a descendent if either of its parents are
700 700 # descendents. (We seeded the dependents list with the roots
701 701 # up there, remember?)
702 702 if (p[0] in descendents) or (p[1] in descendents):
703 703 descendents[n] = 1
704 704 isdescendent = True
705 705 if isdescendent and ((ancestors is None) or (n in ancestors)):
706 706 # Only include nodes that are both descendents and ancestors.
707 707 orderedout.append(n)
708 708 if (ancestors is not None) and (n in heads):
709 709 # We're trying to figure out which heads are reachable
710 710 # from roots.
711 711 # Mark this head as having been reached
712 712 heads[n] = 1
713 713 elif ancestors is None:
714 714 # Otherwise, we're trying to discover the heads.
715 715 # Assume this is a head because if it isn't, the next step
716 716 # will eventually remove it.
717 717 heads[n] = 1
718 718 # But, obviously its parents aren't.
719 719 for p in self.parents(n):
720 720 heads.pop(p, None)
721 721 heads = [n for n in heads.iterkeys() if heads[n] != 0]
722 722 roots = roots.keys()
723 723 assert orderedout
724 724 assert roots
725 725 assert heads
726 726 return (orderedout, roots, heads)
727 727
728 728 def heads(self, start=None, stop=None):
729 729 """return the list of all nodes that have no children
730 730
731 731 if start is specified, only heads that are descendants of
732 732 start will be returned
733 733 if stop is specified, it will consider all the revs from stop
734 734 as if they had no children
735 735 """
736 736 if start is None and stop is None:
737 737 count = self.count()
738 738 if not count:
739 739 return [nullid]
740 740 ishead = [1] * (count + 1)
741 741 index = self.index
742 742 for r in xrange(count):
743 743 e = index[r]
744 744 ishead[e[5]] = ishead[e[6]] = 0
745 745 return [self.node(r) for r in xrange(count) if ishead[r]]
746 746
747 747 if start is None:
748 748 start = nullid
749 749 if stop is None:
750 750 stop = []
751 751 stoprevs = dict.fromkeys([self.rev(n) for n in stop])
752 752 startrev = self.rev(start)
753 753 reachable = {startrev: 1}
754 754 heads = {startrev: 1}
755 755
756 756 parentrevs = self.parentrevs
757 757 for r in xrange(startrev + 1, self.count()):
758 758 for p in parentrevs(r):
759 759 if p in reachable:
760 760 if r not in stoprevs:
761 761 reachable[r] = 1
762 762 heads[r] = 1
763 763 if p in heads and p not in stoprevs:
764 764 del heads[p]
765 765
766 766 return [self.node(r) for r in heads]
767 767
768 768 def children(self, node):
769 769 """find the children of a given node"""
770 770 c = []
771 771 p = self.rev(node)
772 772 for r in range(p + 1, self.count()):
773 773 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
774 774 if prevs:
775 775 for pr in prevs:
776 776 if pr == p:
777 777 c.append(self.node(r))
778 778 elif p == nullrev:
779 779 c.append(self.node(r))
780 780 return c
781 781
782 782 def _match(self, id):
783 783 if isinstance(id, (long, int)):
784 784 # rev
785 785 return self.node(id)
786 786 if len(id) == 20:
787 787 # possibly a binary node
788 788 # odds of a binary node being all hex in ASCII are 1 in 10**25
789 789 try:
790 790 node = id
791 791 r = self.rev(node) # quick search the index
792 792 return node
793 793 except LookupError:
794 794 pass # may be partial hex id
795 795 try:
796 796 # str(rev)
797 797 rev = int(id)
798 798 if str(rev) != id:
799 799 raise ValueError
800 800 if rev < 0:
801 801 rev = self.count() + rev
802 802 if rev < 0 or rev >= self.count():
803 803 raise ValueError
804 804 return self.node(rev)
805 805 except (ValueError, OverflowError):
806 806 pass
807 807 if len(id) == 40:
808 808 try:
809 809 # a full hex nodeid?
810 810 node = bin(id)
811 811 r = self.rev(node)
812 812 return node
813 813 except TypeError:
814 814 pass
815 815
816 816 def _partialmatch(self, id):
817 817 if len(id) < 40:
818 818 try:
819 819 # hex(node)[:...]
820 820 bin_id = bin(id[:len(id) & ~1]) # grab an even number of digits
821 821 node = None
822 822 for n in self.nodemap:
823 823 if n.startswith(bin_id) and hex(n).startswith(id):
824 824 if node is not None:
825 825 raise LookupError(_("Ambiguous identifier"))
826 826 node = n
827 827 if node is not None:
828 828 return node
829 829 except TypeError:
830 830 pass
831 831
832 832 def lookup(self, id):
833 833 """locate a node based on:
834 834 - revision number or str(revision number)
835 835 - nodeid or subset of hex nodeid
836 836 """
837 837 n = self._match(id)
838 838 if n is not None:
839 839 return n
840 840 n = self._partialmatch(id)
841 841 if n:
842 842 return n
843 843
844 844 raise LookupError(_("No match found"))
845 845
846 846 def cmp(self, node, text):
847 847 """compare text with a given file revision"""
848 848 p1, p2 = self.parents(node)
849 849 return hash(text, p1, p2) != node
850 850
851 851 def chunk(self, rev, df=None):
852 852 start, length = self.start(rev), self.length(rev)
853 853 if self._inline:
854 854 start += (rev + 1) * self._io.size
855 855 end = start + length
856 856 def loadcache(df):
857 857 cache_length = max(65536, length)
858 858 if not df:
859 859 if self._inline:
860 860 df = self.opener(self.indexfile)
861 861 else:
862 862 df = self.opener(self.datafile)
863 863 df.seek(start)
864 864 self._chunkcache = (start, df.read(cache_length))
865 865
866 866 if not self._chunkcache:
867 867 loadcache(df)
868 868
869 869 cache_start = self._chunkcache[0]
870 870 cache_end = cache_start + len(self._chunkcache[1])
871 871 if start >= cache_start and end <= cache_end:
872 872 # it is cached
873 873 offset = start - cache_start
874 874 else:
875 875 loadcache(df)
876 876 offset = 0
877 877
878 878 # avoid copying large chunks
879 879 c = self._chunkcache[1]
880 880 if len(c) > length:
881 881 c = c[offset:offset + length]
882 882
883 883 return decompress(c)
884 884
885 885 def delta(self, node):
886 886 """return or calculate a delta between a node and its predecessor"""
887 887 r = self.rev(node)
888 888 return self.revdiff(r - 1, r)
889 889
890 890 def revdiff(self, rev1, rev2):
891 891 """return or calculate a delta between two revisions"""
892 892 b1 = self.base(rev1)
893 893 b2 = self.base(rev2)
894 894 if b1 == b2 and rev1 + 1 == rev2:
895 895 return self.chunk(rev2)
896 896 else:
897 897 return mdiff.textdiff(self.revision(self.node(rev1)),
898 898 self.revision(self.node(rev2)))
899 899
900 900 def revision(self, node):
901 901 """return an uncompressed revision of a given"""
902 902 if node == nullid:
903 903 return ""
904 904 if self._cache and self._cache[0] == node:
905 905 return self._cache[2]
906 906
907 907 # look up what we need to read
908 908 text = None
909 909 rev = self.rev(node)
910 910 base = self.base(rev)
911 911
912 912 if self._inline:
913 913 # we probably have the whole chunk cached
914 914 df = None
915 915 else:
916 916 df = self.opener(self.datafile)
917 917
918 918 # do we have useful data cached?
919 919 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
920 920 base = self._cache[1]
921 921 text = self._cache[2]
922 922 self._loadindex(base, rev + 1)
923 923 else:
924 924 self._loadindex(base, rev + 1)
925 925 text = self.chunk(base, df=df)
926 926
927 927 bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]
928 928 text = mdiff.patches(text, bins)
929 929 p1, p2 = self.parents(node)
930 930 if node != hash(text, p1, p2):
931 931 raise RevlogError(_("integrity check failed on %s:%d")
932 932 % (self.datafile, rev))
933 933
934 934 self._cache = (node, rev, text)
935 935 return text
936 936
937 937 def checkinlinesize(self, tr, fp=None):
938 938 if not self._inline:
939 939 return
940 940 if not fp:
941 941 fp = self.opener(self.indexfile, 'r')
942 942 fp.seek(0, 2)
943 943 size = fp.tell()
944 944 if size < 131072:
945 945 return
946 946 trinfo = tr.find(self.indexfile)
947 947 if trinfo == None:
948 948 raise RevlogError(_("%s not found in the transaction")
949 949 % self.indexfile)
950 950
951 951 trindex = trinfo[2]
952 952 dataoff = self.start(trindex)
953 953
954 954 tr.add(self.datafile, dataoff)
955 955 df = self.opener(self.datafile, 'w')
956 956 calc = self._io.size
957 957 for r in xrange(self.count()):
958 958 start = self.start(r) + (r + 1) * calc
959 959 length = self.length(r)
960 960 fp.seek(start)
961 961 d = fp.read(length)
962 962 df.write(d)
963 963 fp.close()
964 964 df.close()
965 965 fp = self.opener(self.indexfile, 'w', atomictemp=True)
966 966 self.version &= ~(REVLOGNGINLINEDATA)
967 967 self._inline = False
968 968 for i in xrange(self.count()):
969 969 e = self._io.packentry(self.index[i], self.node, self.version)
970 970 fp.write(e)
971 971
972 972 # if we don't call rename, the temp file will never replace the
973 973 # real index
974 974 fp.rename()
975 975
976 976 tr.replace(self.indexfile, trindex * calc)
977 977 self._chunkcache = None
978 978
979 979 def addrevision(self, text, transaction, link, p1, p2, d=None):
980 980 """add a revision to the log
981 981
982 982 text - the revision data to add
983 983 transaction - the transaction object used for rollback
984 984 link - the linkrev data to add
985 985 p1, p2 - the parent nodeids of the revision
986 986 d - an optional precomputed delta
987 987 """
988 988 dfh = None
989 989 if not self._inline:
990 990 dfh = self.opener(self.datafile, "a")
991 991 ifh = self.opener(self.indexfile, "a+")
992 992 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
993 993
994 994 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
995 995 node = hash(text, p1, p2)
996 996 if node in self.nodemap:
997 997 return node
998 998
999 999 curr = self.count()
1000 1000 prev = curr - 1
1001 1001 base = self.base(prev)
1002 1002 offset = self.end(prev)
1003 1003
1004 1004 if curr:
1005 1005 if not d:
1006 1006 ptext = self.revision(self.node(prev))
1007 1007 d = mdiff.textdiff(ptext, text)
1008 1008 data = compress(d)
1009 1009 l = len(data[1]) + len(data[0])
1010 1010 dist = l + offset - self.start(base)
1011 1011
1012 1012 # full versions are inserted when the needed deltas
1013 1013 # become comparable to the uncompressed text
1014 1014 if not curr or dist > len(text) * 2:
1015 1015 data = compress(text)
1016 1016 l = len(data[1]) + len(data[0])
1017 1017 base = curr
1018 1018
1019 1019 e = (offset_type(offset, 0), l, len(text),
1020 1020 base, link, self.rev(p1), self.rev(p2), node)
1021 1021 self.index.insert(-1, e)
1022 1022 self.nodemap[node] = curr
1023 1023
1024 1024 entry = self._io.packentry(e, self.node, self.version)
1025 1025 if not self._inline:
1026 1026 transaction.add(self.datafile, offset)
1027 1027 transaction.add(self.indexfile, curr * len(entry))
1028 1028 if data[0]:
1029 1029 dfh.write(data[0])
1030 1030 dfh.write(data[1])
1031 1031 dfh.flush()
1032 1032 ifh.write(entry)
1033 1033 else:
1034 1034 ifh.seek(0, 2)
1035 1035 transaction.add(self.indexfile, ifh.tell(), prev)
1036 1036 ifh.write(entry)
1037 1037 ifh.write(data[0])
1038 1038 ifh.write(data[1])
1039 1039 self.checkinlinesize(transaction, ifh)
1040 1040
1041 1041 self._cache = (node, curr, text)
1042 1042 return node
1043 1043
1044 1044 def ancestor(self, a, b):
1045 1045 """calculate the least common ancestor of nodes a and b"""
1046 1046
1047 1047 def parents(rev):
1048 1048 return [p for p in self.parentrevs(rev) if p != nullrev]
1049 1049
1050 1050 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1051 1051 if c is None:
1052 1052 return nullid
1053 1053
1054 1054 return self.node(c)
1055 1055
1056 1056 def group(self, nodelist, lookup, infocollect=None):
1057 1057 """calculate a delta group
1058 1058
1059 1059 Given a list of changeset revs, return a set of deltas and
1060 1060 metadata corresponding to nodes. the first delta is
1061 1061 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1062 1062 have this parent as it has all history before these
1063 1063 changesets. parent is parent[0]
1064 1064 """
1065 1065 revs = [self.rev(n) for n in nodelist]
1066 1066
1067 1067 # if we don't have any revisions touched by these changesets, bail
1068 1068 if not revs:
1069 1069 yield changegroup.closechunk()
1070 1070 return
1071 1071
1072 1072 # add the parent of the first rev
1073 1073 p = self.parents(self.node(revs[0]))[0]
1074 1074 revs.insert(0, self.rev(p))
1075 1075
1076 1076 # build deltas
1077 1077 for d in xrange(0, len(revs) - 1):
1078 1078 a, b = revs[d], revs[d + 1]
1079 1079 nb = self.node(b)
1080 1080
1081 1081 if infocollect is not None:
1082 1082 infocollect(nb)
1083 1083
1084 1084 d = self.revdiff(a, b)
1085 1085 p = self.parents(nb)
1086 1086 meta = nb + p[0] + p[1] + lookup(nb)
1087 1087 yield changegroup.genchunk("%s%s" % (meta, d))
1088 1088
1089 1089 yield changegroup.closechunk()
1090 1090
1091 1091 def addgroup(self, revs, linkmapper, transaction, unique=0):
1092 1092 """
1093 1093 add a delta group
1094 1094
1095 1095 given a set of deltas, add them to the revision log. the
1096 1096 first delta is against its parent, which should be in our
1097 1097 log, the rest are against the previous delta.
1098 1098 """
1099 1099
1100 1100 #track the base of the current delta log
1101 1101 r = self.count()
1102 1102 t = r - 1
1103 1103 node = None
1104 1104
1105 1105 base = prev = nullrev
1106 1106 start = end = textlen = 0
1107 1107 if r:
1108 1108 end = self.end(t)
1109 1109
1110 1110 ifh = self.opener(self.indexfile, "a+")
1111 1111 ifh.seek(0, 2)
1112 1112 transaction.add(self.indexfile, ifh.tell(), self.count())
1113 1113 if self._inline:
1114 1114 dfh = None
1115 1115 else:
1116 1116 transaction.add(self.datafile, end)
1117 1117 dfh = self.opener(self.datafile, "a")
1118 1118
1119 1119 # loop through our set of deltas
1120 1120 chain = None
1121 1121 for chunk in revs:
1122 1122 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1123 1123 link = linkmapper(cs)
1124 1124 if node in self.nodemap:
1125 1125 # this can happen if two branches make the same change
1126 1126 # if unique:
1127 1127 # raise RevlogError(_("already have %s") % hex(node[:4]))
1128 1128 chain = node
1129 1129 continue
1130 1130 delta = chunk[80:]
1131 1131
1132 1132 for p in (p1, p2):
1133 1133 if not p in self.nodemap:
1134 1134 raise LookupError(_("unknown parent %s") % short(p))
1135 1135
1136 1136 if not chain:
1137 1137 # retrieve the parent revision of the delta chain
1138 1138 chain = p1
1139 1139 if not chain in self.nodemap:
1140 1140 raise LookupError(_("unknown base %s") % short(chain[:4]))
1141 1141
1142 1142 # full versions are inserted when the needed deltas become
1143 1143 # comparable to the uncompressed text or when the previous
1144 1144 # version is not the one we have a delta against. We use
1145 1145 # the size of the previous full rev as a proxy for the
1146 1146 # current size.
1147 1147
1148 1148 if chain == prev:
1149 1149 tempd = compress(delta)
1150 1150 cdelta = tempd[0] + tempd[1]
1151 1151 textlen = mdiff.patchedsize(textlen, delta)
1152 1152
1153 1153 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
1154 1154 # flush our writes here so we can read it in revision
1155 1155 if dfh:
1156 1156 dfh.flush()
1157 1157 ifh.flush()
1158 1158 text = self.revision(chain)
1159 1159 text = mdiff.patches(text, [delta])
1160 1160 chk = self._addrevision(text, transaction, link, p1, p2, None,
1161 1161 ifh, dfh)
1162 1162 if not dfh and not self._inline:
1163 1163 # addrevision switched from inline to conventional
1164 1164 # reopen the index
1165 1165 dfh = self.opener(self.datafile, "a")
1166 1166 ifh = self.opener(self.indexfile, "a")
1167 1167 if chk != node:
1168 1168 raise RevlogError(_("consistency error adding group"))
1169 1169 textlen = len(text)
1170 1170 else:
1171 1171 e = (offset_type(end, 0), len(cdelta), textlen, base,
1172 1172 link, self.rev(p1), self.rev(p2), node)
1173 1173 self.index.insert(-1, e)
1174 1174 self.nodemap[node] = r
1175 1175 entry = self._io.packentry(e, self.node, self.version)
1176 1176 if self._inline:
1177 1177 ifh.write(entry)
1178 1178 ifh.write(cdelta)
1179 1179 self.checkinlinesize(transaction, ifh)
1180 1180 if not self._inline:
1181 1181 dfh = self.opener(self.datafile, "a")
1182 1182 ifh = self.opener(self.indexfile, "a")
1183 1183 else:
1184 1184 dfh.write(cdelta)
1185 1185 ifh.write(entry)
1186 1186
1187 1187 t, r, chain, prev = r, r + 1, node, node
1188 1188 base = self.base(t)
1189 1189 start = self.start(base)
1190 1190 end = self.end(t)
1191 1191
1192 1192 return node
1193 1193
1194 1194 def strip(self, rev, minlink):
1195 1195 if self.count() == 0 or rev >= self.count():
1196 1196 return
1197 1197
1198 1198 if isinstance(self.index, lazyindex):
1199 1199 self._loadindexmap()
1200 1200
1201 1201 # When stripping away a revision, we need to make sure it
1202 1202 # does not actually belong to an older changeset.
1203 1203 # The minlink parameter defines the oldest revision
1204 1204 # we're allowed to strip away.
1205 1205 while minlink > self.index[rev][4]:
1206 1206 rev += 1
1207 1207 if rev >= self.count():
1208 1208 return
1209 1209
1210 1210 # first truncate the files on disk
1211 1211 end = self.start(rev)
1212 1212 if not self._inline:
1213 1213 df = self.opener(self.datafile, "a")
1214 1214 df.truncate(end)
1215 1215 end = rev * self._io.size
1216 1216 else:
1217 1217 end += rev * self._io.size
1218 1218
1219 1219 indexf = self.opener(self.indexfile, "a")
1220 1220 indexf.truncate(end)
1221 1221
1222 1222 # then reset internal state in memory to forget those revisions
1223 1223 self._cache = None
1224 1224 self._chunkcache = None
1225 1225 for x in xrange(rev, self.count()):
1226 1226 del self.nodemap[self.node(x)]
1227 1227
1228 1228 del self.index[rev:-1]
1229 1229
1230 1230 def checksize(self):
1231 1231 expected = 0
1232 1232 if self.count():
1233 1233 expected = self.end(self.count() - 1)
1234 1234
1235 1235 try:
1236 1236 f = self.opener(self.datafile)
1237 1237 f.seek(0, 2)
1238 1238 actual = f.tell()
1239 1239 dd = actual - expected
1240 1240 except IOError, inst:
1241 1241 if inst.errno != errno.ENOENT:
1242 1242 raise
1243 1243 dd = 0
1244 1244
1245 1245 try:
1246 1246 f = self.opener(self.indexfile)
1247 1247 f.seek(0, 2)
1248 1248 actual = f.tell()
1249 1249 s = self._io.size
1250 1250 i = actual / s
1251 1251 di = actual - (i * s)
1252 1252 if self._inline:
1253 1253 databytes = 0
1254 1254 for r in xrange(self.count()):
1255 1255 databytes += self.length(r)
1256 1256 dd = 0
1257 1257 di = actual - self.count() * s - databytes
1258 1258 except IOError, inst:
1259 1259 if inst.errno != errno.ENOENT:
1260 1260 raise
1261 1261 di = 0
1262 1262
1263 1263 return (dd, di)
General Comments 0
You need to be logged in to leave comments. Login now