##// END OF EJS Templates
revlog: minor chunk speed-up
Matt Mackall -
r5006:c2febf54 default
parent child Browse files
Show More
@@ -1,1262 +1,1265 b''
1 1 """
2 2 revlog.py - storage back-end for mercurial
3 3
4 4 This provides efficient delta storage with O(1) retrieve and append
5 5 and O(changes) merge between branches
6 6
7 7 Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
8 8
9 9 This software may be used and distributed according to the terms
10 10 of the GNU General Public License, incorporated herein by reference.
11 11 """
12 12
13 13 from node import *
14 14 from i18n import _
15 15 import binascii, changegroup, errno, ancestor, mdiff, os
16 16 import sha, struct, util, zlib
17 17
18 18 # revlog flags
19 19 REVLOGV0 = 0
20 20 REVLOGNG = 1
21 21 REVLOGNGINLINEDATA = (1 << 16)
22 22 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
23 23 REVLOG_DEFAULT_FORMAT = REVLOGNG
24 24 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
25 25
26 26 class RevlogError(Exception):
27 27 pass
28 28 class LookupError(RevlogError):
29 29 pass
30 30
31 31 def getoffset(q):
32 32 return int(q >> 16)
33 33
34 34 def gettype(q):
35 35 return int(q & 0xFFFF)
36 36
37 37 def offset_type(offset, type):
38 38 return long(long(offset) << 16 | type)
39 39
40 40 def hash(text, p1, p2):
41 41 """generate a hash from the given text and its parent hashes
42 42
43 43 This hash combines both the current file contents and its history
44 44 in a manner that makes it easy to distinguish nodes with the same
45 45 content in the revision graph.
46 46 """
47 47 l = [p1, p2]
48 48 l.sort()
49 49 s = sha.new(l[0])
50 50 s.update(l[1])
51 51 s.update(text)
52 52 return s.digest()
53 53
54 54 def compress(text):
55 55 """ generate a possibly-compressed representation of text """
56 56 if not text:
57 57 return ("", text)
58 58 if len(text) < 44:
59 59 if text[0] == '\0':
60 60 return ("", text)
61 61 return ('u', text)
62 62 bin = zlib.compress(text)
63 63 if len(bin) > len(text):
64 64 if text[0] == '\0':
65 65 return ("", text)
66 66 return ('u', text)
67 67 return ("", bin)
68 68
69 69 def decompress(bin):
70 70 """ decompress the given input """
71 71 if not bin:
72 72 return bin
73 73 t = bin[0]
74 74 if t == '\0':
75 75 return bin
76 76 if t == 'x':
77 77 return zlib.decompress(bin)
78 78 if t == 'u':
79 79 return bin[1:]
80 80 raise RevlogError(_("unknown compression type %r") % t)
81 81
82 82 class lazyparser(object):
83 83 """
84 84 this class avoids the need to parse the entirety of large indices
85 85 """
86 86
87 87 # lazyparser is not safe to use on windows if win32 extensions not
88 88 # available. it keeps file handle open, which make it not possible
89 89 # to break hardlinks on local cloned repos.
90 90 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
91 91 hasattr(util, 'win32api'))
92 92
93 93 def __init__(self, dataf, size):
94 94 self.dataf = dataf
95 95 self.s = struct.calcsize(indexformatng)
96 96 self.datasize = size
97 97 self.l = size/self.s
98 98 self.index = [None] * self.l
99 99 self.map = {nullid: nullrev}
100 100 self.allmap = 0
101 101 self.all = 0
102 102 self.mapfind_count = 0
103 103
104 104 def loadmap(self):
105 105 """
106 106 during a commit, we need to make sure the rev being added is
107 107 not a duplicate. This requires loading the entire index,
108 108 which is fairly slow. loadmap can load up just the node map,
109 109 which takes much less time.
110 110 """
111 111 if self.allmap:
112 112 return
113 113 end = self.datasize
114 114 self.allmap = 1
115 115 cur = 0
116 116 count = 0
117 117 blocksize = self.s * 256
118 118 self.dataf.seek(0)
119 119 while cur < end:
120 120 data = self.dataf.read(blocksize)
121 121 off = 0
122 122 for x in xrange(256):
123 123 n = data[off + ngshaoffset:off + ngshaoffset + 20]
124 124 self.map[n] = count
125 125 count += 1
126 126 if count >= self.l:
127 127 break
128 128 off += self.s
129 129 cur += blocksize
130 130
131 131 def loadblock(self, blockstart, blocksize, data=None):
132 132 if self.all:
133 133 return
134 134 if data is None:
135 135 self.dataf.seek(blockstart)
136 136 if blockstart + blocksize > self.datasize:
137 137 # the revlog may have grown since we've started running,
138 138 # but we don't have space in self.index for more entries.
139 139 # limit blocksize so that we don't get too much data.
140 140 blocksize = max(self.datasize - blockstart, 0)
141 141 data = self.dataf.read(blocksize)
142 142 lend = len(data) / self.s
143 143 i = blockstart / self.s
144 144 off = 0
145 145 # lazyindex supports __delitem__
146 146 if lend > len(self.index) - i:
147 147 lend = len(self.index) - i
148 148 for x in xrange(lend):
149 149 if self.index[i + x] == None:
150 150 b = data[off : off + self.s]
151 151 self.index[i + x] = b
152 152 n = b[ngshaoffset:ngshaoffset + 20]
153 153 self.map[n] = i + x
154 154 off += self.s
155 155
156 156 def findnode(self, node):
157 157 """search backwards through the index file for a specific node"""
158 158 if self.allmap:
159 159 return None
160 160
161 161 # hg log will cause many many searches for the manifest
162 162 # nodes. After we get called a few times, just load the whole
163 163 # thing.
164 164 if self.mapfind_count > 8:
165 165 self.loadmap()
166 166 if node in self.map:
167 167 return node
168 168 return None
169 169 self.mapfind_count += 1
170 170 last = self.l - 1
171 171 while self.index[last] != None:
172 172 if last == 0:
173 173 self.all = 1
174 174 self.allmap = 1
175 175 return None
176 176 last -= 1
177 177 end = (last + 1) * self.s
178 178 blocksize = self.s * 256
179 179 while end >= 0:
180 180 start = max(end - blocksize, 0)
181 181 self.dataf.seek(start)
182 182 data = self.dataf.read(end - start)
183 183 findend = end - start
184 184 while True:
185 185 # we're searching backwards, so we have to make sure
186 186 # we don't find a changeset where this node is a parent
187 187 off = data.find(node, 0, findend)
188 188 findend = off
189 189 if off >= 0:
190 190 i = off / self.s
191 191 off = i * self.s
192 192 n = data[off + ngshaoffset:off + ngshaoffset + 20]
193 193 if n == node:
194 194 self.map[n] = i + start / self.s
195 195 return node
196 196 else:
197 197 break
198 198 end -= blocksize
199 199 return None
200 200
201 201 def loadindex(self, i=None, end=None):
202 202 if self.all:
203 203 return
204 204 all = False
205 205 if i == None:
206 206 blockstart = 0
207 207 blocksize = (65536 / self.s) * self.s
208 208 end = self.datasize
209 209 all = True
210 210 else:
211 211 if end:
212 212 blockstart = i * self.s
213 213 end = end * self.s
214 214 blocksize = end - blockstart
215 215 else:
216 216 blockstart = (i & ~1023) * self.s
217 217 blocksize = self.s * 1024
218 218 end = blockstart + blocksize
219 219 while blockstart < end:
220 220 self.loadblock(blockstart, blocksize)
221 221 blockstart += blocksize
222 222 if all:
223 223 self.all = True
224 224
225 225 class lazyindex(object):
226 226 """a lazy version of the index array"""
227 227 def __init__(self, parser):
228 228 self.p = parser
229 229 def __len__(self):
230 230 return len(self.p.index)
231 231 def load(self, pos):
232 232 if pos < 0:
233 233 pos += len(self.p.index)
234 234 self.p.loadindex(pos)
235 235 return self.p.index[pos]
236 236 def __getitem__(self, pos):
237 237 return struct.unpack(indexformatng,
238 238 self.p.index[pos] or self.load(pos))
239 239 def __setitem__(self, pos, item):
240 240 self.p.index[pos] = struct.pack(indexformatng, *item)
241 241 def __delitem__(self, pos):
242 242 del self.p.index[pos]
243 243 def insert(self, pos, e):
244 244 self.p.index.insert(pos, struct.pack(indexformatng, *e))
245 245 def append(self, e):
246 246 self.p.index.append(struct.pack(indexformatng, *e))
247 247
248 248 class lazymap(object):
249 249 """a lazy version of the node map"""
250 250 def __init__(self, parser):
251 251 self.p = parser
252 252 def load(self, key):
253 253 n = self.p.findnode(key)
254 254 if n == None:
255 255 raise KeyError(key)
256 256 def __contains__(self, key):
257 257 if key in self.p.map:
258 258 return True
259 259 self.p.loadmap()
260 260 return key in self.p.map
261 261 def __iter__(self):
262 262 yield nullid
263 263 for i in xrange(self.p.l):
264 264 ret = self.p.index[i]
265 265 if not ret:
266 266 self.p.loadindex(i)
267 267 ret = self.p.index[i]
268 268 if isinstance(ret, str):
269 269 ret = struct.unpack(indexformatng, ret)
270 270 yield ret[7]
271 271 def __getitem__(self, key):
272 272 try:
273 273 return self.p.map[key]
274 274 except KeyError:
275 275 try:
276 276 self.load(key)
277 277 return self.p.map[key]
278 278 except KeyError:
279 279 raise KeyError("node " + hex(key))
280 280 def __setitem__(self, key, val):
281 281 self.p.map[key] = val
282 282 def __delitem__(self, key):
283 283 del self.p.map[key]
284 284
285 285 indexformatv0 = ">4l20s20s20s"
286 286 v0shaoffset = 56
287 287
288 288 class revlogoldio(object):
289 289 def __init__(self):
290 290 self.size = struct.calcsize(indexformatv0)
291 291
292 292 def parseindex(self, fp, inline):
293 293 s = self.size
294 294 index = []
295 295 nodemap = {nullid: nullrev}
296 296 n = off = 0
297 297 data = fp.read()
298 298 l = len(data)
299 299 while off + s <= l:
300 300 cur = data[off:off + s]
301 301 off += s
302 302 e = struct.unpack(indexformatv0, cur)
303 303 # transform to revlogv1 format
304 304 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
305 305 nodemap[e[4]], nodemap[e[5]], e[6])
306 306 index.append(e2)
307 307 nodemap[e[6]] = n
308 308 n += 1
309 309
310 310 return index, nodemap, None
311 311
312 312 def packentry(self, entry, node, version):
313 313 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
314 314 node(entry[5]), node(entry[6]), entry[7])
315 315 return struct.pack(indexformatv0, *e2)
316 316
317 317 # index ng:
318 318 # 6 bytes offset
319 319 # 2 bytes flags
320 320 # 4 bytes compressed length
321 321 # 4 bytes uncompressed length
322 322 # 4 bytes: base rev
323 323 # 4 bytes link rev
324 324 # 4 bytes parent 1 rev
325 325 # 4 bytes parent 2 rev
326 326 # 32 bytes: nodeid
327 327 indexformatng = ">Qiiiiii20s12x"
328 328 ngshaoffset = 32
329 329 versionformat = ">I"
330 330
331 331 class revlogio(object):
332 332 def __init__(self):
333 333 self.size = struct.calcsize(indexformatng)
334 334
335 335 def parseindex(self, fp, inline):
336 336 try:
337 337 size = util.fstat(fp).st_size
338 338 except AttributeError:
339 339 size = 0
340 340
341 341 if lazyparser.safe_to_use and not inline and size > 1000000:
342 342 # big index, let's parse it on demand
343 343 parser = lazyparser(fp, size)
344 344 index = lazyindex(parser)
345 345 nodemap = lazymap(parser)
346 346 e = list(index[0])
347 347 type = gettype(e[0])
348 348 e[0] = offset_type(0, type)
349 349 index[0] = e
350 350 return index, nodemap, None
351 351
352 352 s = self.size
353 353 cache = None
354 354 index = []
355 355 nodemap = {nullid: nullrev}
356 356 n = off = 0
357 357 # if we're not using lazymap, always read the whole index
358 358 data = fp.read()
359 359 l = len(data) - s
360 360 unpack = struct.unpack
361 361 append = index.append
362 362 if inline:
363 363 cache = (0, data)
364 364 while off <= l:
365 365 e = unpack(indexformatng, data[off:off + s])
366 366 nodemap[e[7]] = n
367 367 append(e)
368 368 n += 1
369 369 if e[1] < 0:
370 370 break
371 371 off += e[1] + s
372 372 else:
373 373 while off <= l:
374 374 e = unpack(indexformatng, data[off:off + s])
375 375 nodemap[e[7]] = n
376 376 append(e)
377 377 n += 1
378 378 off += s
379 379
380 380 e = list(index[0])
381 381 type = gettype(e[0])
382 382 e[0] = offset_type(0, type)
383 383 index[0] = e
384 384
385 385 return index, nodemap, cache
386 386
387 387 def packentry(self, entry, node, version):
388 388 p = struct.pack(indexformatng, *entry)
389 389 if not entry[3] and not getoffset(entry[0]) and entry[5] == nullrev:
390 390 p = struct.pack(versionformat, version) + p[4:]
391 391 return p
392 392
393 393 class revlog(object):
394 394 """
395 395 the underlying revision storage object
396 396
397 397 A revlog consists of two parts, an index and the revision data.
398 398
399 399 The index is a file with a fixed record size containing
400 400 information on each revision, includings its nodeid (hash), the
401 401 nodeids of its parents, the position and offset of its data within
402 402 the data file, and the revision it's based on. Finally, each entry
403 403 contains a linkrev entry that can serve as a pointer to external
404 404 data.
405 405
406 406 The revision data itself is a linear collection of data chunks.
407 407 Each chunk represents a revision and is usually represented as a
408 408 delta against the previous chunk. To bound lookup time, runs of
409 409 deltas are limited to about 2 times the length of the original
410 410 version data. This makes retrieval of a version proportional to
411 411 its size, or O(1) relative to the number of revisions.
412 412
413 413 Both pieces of the revlog are written to in an append-only
414 414 fashion, which means we never need to rewrite a file to insert or
415 415 remove data, and can use some simple techniques to avoid the need
416 416 for locking while reading.
417 417 """
418 418 def __init__(self, opener, indexfile):
419 419 """
420 420 create a revlog object
421 421
422 422 opener is a function that abstracts the file opening operation
423 423 and can be used to implement COW semantics or the like.
424 424 """
425 425 self.indexfile = indexfile
426 426 self.datafile = indexfile[:-2] + ".d"
427 427 self.opener = opener
428 428 self._cache = None
429 429 self._chunkcache = None
430 430 self.nodemap = {nullid: nullrev}
431 431 self.index = []
432 432
433 433 v = REVLOG_DEFAULT_VERSION
434 434 if hasattr(opener, "defversion"):
435 435 v = opener.defversion
436 436 if v & REVLOGNG:
437 437 v |= REVLOGNGINLINEDATA
438 438
439 439 i = ""
440 440 try:
441 441 f = self.opener(self.indexfile)
442 442 i = f.read(4)
443 443 f.seek(0)
444 444 if len(i) > 0:
445 445 v = struct.unpack(versionformat, i)[0]
446 446 except IOError, inst:
447 447 if inst.errno != errno.ENOENT:
448 448 raise
449 449
450 450 self.version = v
451 451 self._inline = v & REVLOGNGINLINEDATA
452 452 flags = v & ~0xFFFF
453 453 fmt = v & 0xFFFF
454 454 if fmt == REVLOGV0 and flags:
455 455 raise RevlogError(_("index %s unknown flags %#04x for format v0")
456 456 % (self.indexfile, flags >> 16))
457 457 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
458 458 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
459 459 % (self.indexfile, flags >> 16))
460 460 elif fmt > REVLOGNG:
461 461 raise RevlogError(_("index %s unknown format %d")
462 462 % (self.indexfile, fmt))
463 463
464 464 self._io = revlogio()
465 465 if self.version == REVLOGV0:
466 466 self._io = revlogoldio()
467 467 if i:
468 468 d = self._io.parseindex(f, self._inline)
469 469 self.index, self.nodemap, self._chunkcache = d
470 470
471 471 # add the magic null revision at -1
472 472 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
473 473
474 474 def _loadindex(self, start, end):
475 475 """load a block of indexes all at once from the lazy parser"""
476 476 if isinstance(self.index, lazyindex):
477 477 self.index.p.loadindex(start, end)
478 478
479 479 def _loadindexmap(self):
480 480 """loads both the map and the index from the lazy parser"""
481 481 if isinstance(self.index, lazyindex):
482 482 p = self.index.p
483 483 p.loadindex()
484 484 self.nodemap = p.map
485 485
486 486 def _loadmap(self):
487 487 """loads the map from the lazy parser"""
488 488 if isinstance(self.nodemap, lazymap):
489 489 self.nodemap.p.loadmap()
490 490 self.nodemap = self.nodemap.p.map
491 491
492 492 def tip(self):
493 493 return self.node(len(self.index) - 2)
494 494 def count(self):
495 495 return len(self.index) - 1
496 496
497 497 def rev(self, node):
498 498 try:
499 499 return self.nodemap[node]
500 500 except KeyError:
501 501 raise LookupError(_('%s: no node %s') % (self.indexfile, hex(node)))
502 502 def node(self, rev):
503 503 return self.index[rev][7]
504 504 def linkrev(self, node):
505 505 return self.index[self.rev(node)][4]
506 506 def parents(self, node):
507 507 d = self.index[self.rev(node)][5:7]
508 508 return (self.node(d[0]), self.node(d[1]))
509 509 def parentrevs(self, rev):
510 510 return self.index[rev][5:7]
511 511 def start(self, rev):
512 return getoffset(self.index[rev][0])
512 return int(self.index[rev][0] >> 16)
513 513 def end(self, rev):
514 514 return self.start(rev) + self.length(rev)
515 515 def length(self, rev):
516 516 return self.index[rev][1]
517 517 def base(self, rev):
518 518 return self.index[rev][3]
519 519
520 520 def size(self, rev):
521 521 """return the length of the uncompressed text for a given revision"""
522 522 l = self.index[rev][2]
523 523 if l >= 0:
524 524 return l
525 525
526 526 t = self.revision(self.node(rev))
527 527 return len(t)
528 528
529 529 # alternate implementation, The advantage to this code is it
530 530 # will be faster for a single revision. But, the results are not
531 531 # cached, so finding the size of every revision will be slower.
532 532 """
533 533 if self.cache and self.cache[1] == rev:
534 534 return len(self.cache[2])
535 535
536 536 base = self.base(rev)
537 537 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
538 538 base = self.cache[1]
539 539 text = self.cache[2]
540 540 else:
541 541 text = self.revision(self.node(base))
542 542
543 543 l = len(text)
544 544 for x in xrange(base + 1, rev + 1):
545 545 l = mdiff.patchedsize(l, self.chunk(x))
546 546 return l
547 547 """
548 548
549 549 def reachable(self, node, stop=None):
550 550 """return a hash of all nodes ancestral to a given node, including
551 551 the node itself, stopping when stop is matched"""
552 552 reachable = {}
553 553 visit = [node]
554 554 reachable[node] = 1
555 555 if stop:
556 556 stopn = self.rev(stop)
557 557 else:
558 558 stopn = 0
559 559 while visit:
560 560 n = visit.pop(0)
561 561 if n == stop:
562 562 continue
563 563 if n == nullid:
564 564 continue
565 565 for p in self.parents(n):
566 566 if self.rev(p) < stopn:
567 567 continue
568 568 if p not in reachable:
569 569 reachable[p] = 1
570 570 visit.append(p)
571 571 return reachable
572 572
573 573 def nodesbetween(self, roots=None, heads=None):
574 574 """Return a tuple containing three elements. Elements 1 and 2 contain
575 575 a final list bases and heads after all the unreachable ones have been
576 576 pruned. Element 0 contains a topologically sorted list of all
577 577
578 578 nodes that satisfy these constraints:
579 579 1. All nodes must be descended from a node in roots (the nodes on
580 580 roots are considered descended from themselves).
581 581 2. All nodes must also be ancestors of a node in heads (the nodes in
582 582 heads are considered to be their own ancestors).
583 583
584 584 If roots is unspecified, nullid is assumed as the only root.
585 585 If heads is unspecified, it is taken to be the output of the
586 586 heads method (i.e. a list of all nodes in the repository that
587 587 have no children)."""
588 588 nonodes = ([], [], [])
589 589 if roots is not None:
590 590 roots = list(roots)
591 591 if not roots:
592 592 return nonodes
593 593 lowestrev = min([self.rev(n) for n in roots])
594 594 else:
595 595 roots = [nullid] # Everybody's a descendent of nullid
596 596 lowestrev = nullrev
597 597 if (lowestrev == nullrev) and (heads is None):
598 598 # We want _all_ the nodes!
599 599 return ([self.node(r) for r in xrange(0, self.count())],
600 600 [nullid], list(self.heads()))
601 601 if heads is None:
602 602 # All nodes are ancestors, so the latest ancestor is the last
603 603 # node.
604 604 highestrev = self.count() - 1
605 605 # Set ancestors to None to signal that every node is an ancestor.
606 606 ancestors = None
607 607 # Set heads to an empty dictionary for later discovery of heads
608 608 heads = {}
609 609 else:
610 610 heads = list(heads)
611 611 if not heads:
612 612 return nonodes
613 613 ancestors = {}
614 614 # Turn heads into a dictionary so we can remove 'fake' heads.
615 615 # Also, later we will be using it to filter out the heads we can't
616 616 # find from roots.
617 617 heads = dict.fromkeys(heads, 0)
618 618 # Start at the top and keep marking parents until we're done.
619 619 nodestotag = heads.keys()
620 620 # Remember where the top was so we can use it as a limit later.
621 621 highestrev = max([self.rev(n) for n in nodestotag])
622 622 while nodestotag:
623 623 # grab a node to tag
624 624 n = nodestotag.pop()
625 625 # Never tag nullid
626 626 if n == nullid:
627 627 continue
628 628 # A node's revision number represents its place in a
629 629 # topologically sorted list of nodes.
630 630 r = self.rev(n)
631 631 if r >= lowestrev:
632 632 if n not in ancestors:
633 633 # If we are possibly a descendent of one of the roots
634 634 # and we haven't already been marked as an ancestor
635 635 ancestors[n] = 1 # Mark as ancestor
636 636 # Add non-nullid parents to list of nodes to tag.
637 637 nodestotag.extend([p for p in self.parents(n) if
638 638 p != nullid])
639 639 elif n in heads: # We've seen it before, is it a fake head?
640 640 # So it is, real heads should not be the ancestors of
641 641 # any other heads.
642 642 heads.pop(n)
643 643 if not ancestors:
644 644 return nonodes
645 645 # Now that we have our set of ancestors, we want to remove any
646 646 # roots that are not ancestors.
647 647
648 648 # If one of the roots was nullid, everything is included anyway.
649 649 if lowestrev > nullrev:
650 650 # But, since we weren't, let's recompute the lowest rev to not
651 651 # include roots that aren't ancestors.
652 652
653 653 # Filter out roots that aren't ancestors of heads
654 654 roots = [n for n in roots if n in ancestors]
655 655 # Recompute the lowest revision
656 656 if roots:
657 657 lowestrev = min([self.rev(n) for n in roots])
658 658 else:
659 659 # No more roots? Return empty list
660 660 return nonodes
661 661 else:
662 662 # We are descending from nullid, and don't need to care about
663 663 # any other roots.
664 664 lowestrev = nullrev
665 665 roots = [nullid]
666 666 # Transform our roots list into a 'set' (i.e. a dictionary where the
667 667 # values don't matter.
668 668 descendents = dict.fromkeys(roots, 1)
669 669 # Also, keep the original roots so we can filter out roots that aren't
670 670 # 'real' roots (i.e. are descended from other roots).
671 671 roots = descendents.copy()
672 672 # Our topologically sorted list of output nodes.
673 673 orderedout = []
674 674 # Don't start at nullid since we don't want nullid in our output list,
675 675 # and if nullid shows up in descedents, empty parents will look like
676 676 # they're descendents.
677 677 for r in xrange(max(lowestrev, 0), highestrev + 1):
678 678 n = self.node(r)
679 679 isdescendent = False
680 680 if lowestrev == nullrev: # Everybody is a descendent of nullid
681 681 isdescendent = True
682 682 elif n in descendents:
683 683 # n is already a descendent
684 684 isdescendent = True
685 685 # This check only needs to be done here because all the roots
686 686 # will start being marked is descendents before the loop.
687 687 if n in roots:
688 688 # If n was a root, check if it's a 'real' root.
689 689 p = tuple(self.parents(n))
690 690 # If any of its parents are descendents, it's not a root.
691 691 if (p[0] in descendents) or (p[1] in descendents):
692 692 roots.pop(n)
693 693 else:
694 694 p = tuple(self.parents(n))
695 695 # A node is a descendent if either of its parents are
696 696 # descendents. (We seeded the dependents list with the roots
697 697 # up there, remember?)
698 698 if (p[0] in descendents) or (p[1] in descendents):
699 699 descendents[n] = 1
700 700 isdescendent = True
701 701 if isdescendent and ((ancestors is None) or (n in ancestors)):
702 702 # Only include nodes that are both descendents and ancestors.
703 703 orderedout.append(n)
704 704 if (ancestors is not None) and (n in heads):
705 705 # We're trying to figure out which heads are reachable
706 706 # from roots.
707 707 # Mark this head as having been reached
708 708 heads[n] = 1
709 709 elif ancestors is None:
710 710 # Otherwise, we're trying to discover the heads.
711 711 # Assume this is a head because if it isn't, the next step
712 712 # will eventually remove it.
713 713 heads[n] = 1
714 714 # But, obviously its parents aren't.
715 715 for p in self.parents(n):
716 716 heads.pop(p, None)
717 717 heads = [n for n in heads.iterkeys() if heads[n] != 0]
718 718 roots = roots.keys()
719 719 assert orderedout
720 720 assert roots
721 721 assert heads
722 722 return (orderedout, roots, heads)
723 723
724 724 def heads(self, start=None, stop=None):
725 725 """return the list of all nodes that have no children
726 726
727 727 if start is specified, only heads that are descendants of
728 728 start will be returned
729 729 if stop is specified, it will consider all the revs from stop
730 730 as if they had no children
731 731 """
732 732 if start is None and stop is None:
733 733 count = self.count()
734 734 if not count:
735 735 return [nullid]
736 736 ishead = [1] * (count + 1)
737 737 index = self.index
738 738 for r in xrange(count):
739 739 e = index[r]
740 740 ishead[e[5]] = ishead[e[6]] = 0
741 741 return [self.node(r) for r in xrange(count) if ishead[r]]
742 742
743 743 if start is None:
744 744 start = nullid
745 745 if stop is None:
746 746 stop = []
747 747 stoprevs = dict.fromkeys([self.rev(n) for n in stop])
748 748 startrev = self.rev(start)
749 749 reachable = {startrev: 1}
750 750 heads = {startrev: 1}
751 751
752 752 parentrevs = self.parentrevs
753 753 for r in xrange(startrev + 1, self.count()):
754 754 for p in parentrevs(r):
755 755 if p in reachable:
756 756 if r not in stoprevs:
757 757 reachable[r] = 1
758 758 heads[r] = 1
759 759 if p in heads and p not in stoprevs:
760 760 del heads[p]
761 761
762 762 return [self.node(r) for r in heads]
763 763
764 764 def children(self, node):
765 765 """find the children of a given node"""
766 766 c = []
767 767 p = self.rev(node)
768 768 for r in range(p + 1, self.count()):
769 769 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
770 770 if prevs:
771 771 for pr in prevs:
772 772 if pr == p:
773 773 c.append(self.node(r))
774 774 elif p == nullrev:
775 775 c.append(self.node(r))
776 776 return c
777 777
778 778 def _match(self, id):
779 779 if isinstance(id, (long, int)):
780 780 # rev
781 781 return self.node(id)
782 782 if len(id) == 20:
783 783 # possibly a binary node
784 784 # odds of a binary node being all hex in ASCII are 1 in 10**25
785 785 try:
786 786 node = id
787 787 r = self.rev(node) # quick search the index
788 788 return node
789 789 except LookupError:
790 790 pass # may be partial hex id
791 791 try:
792 792 # str(rev)
793 793 rev = int(id)
794 794 if str(rev) != id:
795 795 raise ValueError
796 796 if rev < 0:
797 797 rev = self.count() + rev
798 798 if rev < 0 or rev >= self.count():
799 799 raise ValueError
800 800 return self.node(rev)
801 801 except (ValueError, OverflowError):
802 802 pass
803 803 if len(id) == 40:
804 804 try:
805 805 # a full hex nodeid?
806 806 node = bin(id)
807 807 r = self.rev(node)
808 808 return node
809 809 except TypeError:
810 810 pass
811 811
812 812 def _partialmatch(self, id):
813 813 if len(id) < 40:
814 814 try:
815 815 # hex(node)[:...]
816 816 bin_id = bin(id[:len(id) & ~1]) # grab an even number of digits
817 817 node = None
818 818 for n in self.nodemap:
819 819 if n.startswith(bin_id) and hex(n).startswith(id):
820 820 if node is not None:
821 821 raise LookupError(_("Ambiguous identifier"))
822 822 node = n
823 823 if node is not None:
824 824 return node
825 825 except TypeError:
826 826 pass
827 827
828 828 def lookup(self, id):
829 829 """locate a node based on:
830 830 - revision number or str(revision number)
831 831 - nodeid or subset of hex nodeid
832 832 """
833 833 n = self._match(id)
834 834 if n is not None:
835 835 return n
836 836 n = self._partialmatch(id)
837 837 if n:
838 838 return n
839 839
840 840 raise LookupError(_("No match found"))
841 841
842 842 def cmp(self, node, text):
843 843 """compare text with a given file revision"""
844 844 p1, p2 = self.parents(node)
845 845 return hash(text, p1, p2) != node
846 846
847 847 def chunk(self, rev, df=None):
848 start, length = self.start(rev), self.length(rev)
849 if self._inline:
850 start += (rev + 1) * self._io.size
851 end = start + length
852 848 def loadcache(df):
853 cache_length = max(65536, length)
854 849 if not df:
855 850 if self._inline:
856 851 df = self.opener(self.indexfile)
857 852 else:
858 853 df = self.opener(self.datafile)
859 854 df.seek(start)
860 855 self._chunkcache = (start, df.read(cache_length))
861 856
862 if not self._chunkcache:
863 loadcache(df)
857 start, length = self.start(rev), self.length(rev)
858 if self._inline:
859 start += (rev + 1) * self._io.size
860 end = start + length
864 861
865 cache_start = self._chunkcache[0]
866 cache_end = cache_start + len(self._chunkcache[1])
867 if start >= cache_start and end <= cache_end:
868 # it is cached
869 offset = start - cache_start
862 offset = 0
863 if not self._chunkcache:
864 cache_length = max(65536, length)
865 loadcache(df)
870 866 else:
871 loadcache(df)
872 offset = 0
867 cache_start = self._chunkcache[0]
868 cache_length = len(self._chunkcache[1])
869 cache_end = cache_start + cache_length
870 if start >= cache_start and end <= cache_end:
871 # it is cached
872 offset = start - cache_start
873 else:
874 cache_length = max(65536, length)
875 loadcache(df)
873 876
874 877 # avoid copying large chunks
875 878 c = self._chunkcache[1]
876 if len(c) > length:
879 if cache_length != length:
877 880 c = c[offset:offset + length]
878 881
879 882 return decompress(c)
880 883
881 884 def delta(self, node):
882 885 """return or calculate a delta between a node and its predecessor"""
883 886 r = self.rev(node)
884 887 return self.revdiff(r - 1, r)
885 888
886 889 def revdiff(self, rev1, rev2):
887 890 """return or calculate a delta between two revisions"""
888 891 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
889 892 return self.chunk(rev2)
890 893
891 894 return mdiff.textdiff(self.revision(self.node(rev1)),
892 895 self.revision(self.node(rev2)))
893 896
894 897 def revision(self, node):
895 898 """return an uncompressed revision of a given"""
896 899 if node == nullid:
897 900 return ""
898 901 if self._cache and self._cache[0] == node:
899 902 return self._cache[2]
900 903
901 904 # look up what we need to read
902 905 text = None
903 906 rev = self.rev(node)
904 907 base = self.base(rev)
905 908
906 909 # check rev flags
907 910 if self.index[rev][0] & 0xFFFF:
908 911 raise RevlogError(_('incompatible revision flag %x') % q)
909 912
910 913 if self._inline:
911 914 # we probably have the whole chunk cached
912 915 df = None
913 916 else:
914 917 df = self.opener(self.datafile)
915 918
916 919 # do we have useful data cached?
917 920 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
918 921 base = self._cache[1]
919 922 text = self._cache[2]
920 923 self._loadindex(base, rev + 1)
921 924 else:
922 925 self._loadindex(base, rev + 1)
923 926 text = self.chunk(base, df=df)
924 927
925 928 bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]
926 929 text = mdiff.patches(text, bins)
927 930 p1, p2 = self.parents(node)
928 931 if node != hash(text, p1, p2):
929 932 raise RevlogError(_("integrity check failed on %s:%d")
930 933 % (self.datafile, rev))
931 934
932 935 self._cache = (node, rev, text)
933 936 return text
934 937
935 938 def checkinlinesize(self, tr, fp=None):
936 939 if not self._inline:
937 940 return
938 941 if not fp:
939 942 fp = self.opener(self.indexfile, 'r')
940 943 fp.seek(0, 2)
941 944 size = fp.tell()
942 945 if size < 131072:
943 946 return
944 947 trinfo = tr.find(self.indexfile)
945 948 if trinfo == None:
946 949 raise RevlogError(_("%s not found in the transaction")
947 950 % self.indexfile)
948 951
949 952 trindex = trinfo[2]
950 953 dataoff = self.start(trindex)
951 954
952 955 tr.add(self.datafile, dataoff)
953 956 df = self.opener(self.datafile, 'w')
954 957 calc = self._io.size
955 958 for r in xrange(self.count()):
956 959 start = self.start(r) + (r + 1) * calc
957 960 length = self.length(r)
958 961 fp.seek(start)
959 962 d = fp.read(length)
960 963 df.write(d)
961 964 fp.close()
962 965 df.close()
963 966 fp = self.opener(self.indexfile, 'w', atomictemp=True)
964 967 self.version &= ~(REVLOGNGINLINEDATA)
965 968 self._inline = False
966 969 for i in xrange(self.count()):
967 970 e = self._io.packentry(self.index[i], self.node, self.version)
968 971 fp.write(e)
969 972
970 973 # if we don't call rename, the temp file will never replace the
971 974 # real index
972 975 fp.rename()
973 976
974 977 tr.replace(self.indexfile, trindex * calc)
975 978 self._chunkcache = None
976 979
977 980 def addrevision(self, text, transaction, link, p1, p2, d=None):
978 981 """add a revision to the log
979 982
980 983 text - the revision data to add
981 984 transaction - the transaction object used for rollback
982 985 link - the linkrev data to add
983 986 p1, p2 - the parent nodeids of the revision
984 987 d - an optional precomputed delta
985 988 """
986 989 dfh = None
987 990 if not self._inline:
988 991 dfh = self.opener(self.datafile, "a")
989 992 ifh = self.opener(self.indexfile, "a+")
990 993 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
991 994
992 995 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
993 996 node = hash(text, p1, p2)
994 997 if node in self.nodemap:
995 998 return node
996 999
997 1000 curr = self.count()
998 1001 prev = curr - 1
999 1002 base = self.base(prev)
1000 1003 offset = self.end(prev)
1001 1004
1002 1005 if curr:
1003 1006 if not d:
1004 1007 ptext = self.revision(self.node(prev))
1005 1008 d = mdiff.textdiff(ptext, text)
1006 1009 data = compress(d)
1007 1010 l = len(data[1]) + len(data[0])
1008 1011 dist = l + offset - self.start(base)
1009 1012
1010 1013 # full versions are inserted when the needed deltas
1011 1014 # become comparable to the uncompressed text
1012 1015 if not curr or dist > len(text) * 2:
1013 1016 data = compress(text)
1014 1017 l = len(data[1]) + len(data[0])
1015 1018 base = curr
1016 1019
1017 1020 e = (offset_type(offset, 0), l, len(text),
1018 1021 base, link, self.rev(p1), self.rev(p2), node)
1019 1022 self.index.insert(-1, e)
1020 1023 self.nodemap[node] = curr
1021 1024
1022 1025 entry = self._io.packentry(e, self.node, self.version)
1023 1026 if not self._inline:
1024 1027 transaction.add(self.datafile, offset)
1025 1028 transaction.add(self.indexfile, curr * len(entry))
1026 1029 if data[0]:
1027 1030 dfh.write(data[0])
1028 1031 dfh.write(data[1])
1029 1032 dfh.flush()
1030 1033 ifh.write(entry)
1031 1034 else:
1032 1035 offset += curr * self._io.size
1033 1036 transaction.add(self.indexfile, offset, prev)
1034 1037 ifh.write(entry)
1035 1038 ifh.write(data[0])
1036 1039 ifh.write(data[1])
1037 1040 self.checkinlinesize(transaction, ifh)
1038 1041
1039 1042 self._cache = (node, curr, text)
1040 1043 return node
1041 1044
1042 1045 def ancestor(self, a, b):
1043 1046 """calculate the least common ancestor of nodes a and b"""
1044 1047
1045 1048 def parents(rev):
1046 1049 return [p for p in self.parentrevs(rev) if p != nullrev]
1047 1050
1048 1051 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1049 1052 if c is None:
1050 1053 return nullid
1051 1054
1052 1055 return self.node(c)
1053 1056
1054 1057 def group(self, nodelist, lookup, infocollect=None):
1055 1058 """calculate a delta group
1056 1059
1057 1060 Given a list of changeset revs, return a set of deltas and
1058 1061 metadata corresponding to nodes. the first delta is
1059 1062 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1060 1063 have this parent as it has all history before these
1061 1064 changesets. parent is parent[0]
1062 1065 """
1063 1066 revs = [self.rev(n) for n in nodelist]
1064 1067
1065 1068 # if we don't have any revisions touched by these changesets, bail
1066 1069 if not revs:
1067 1070 yield changegroup.closechunk()
1068 1071 return
1069 1072
1070 1073 # add the parent of the first rev
1071 1074 p = self.parents(self.node(revs[0]))[0]
1072 1075 revs.insert(0, self.rev(p))
1073 1076
1074 1077 # build deltas
1075 1078 for d in xrange(0, len(revs) - 1):
1076 1079 a, b = revs[d], revs[d + 1]
1077 1080 nb = self.node(b)
1078 1081
1079 1082 if infocollect is not None:
1080 1083 infocollect(nb)
1081 1084
1082 1085 d = self.revdiff(a, b)
1083 1086 p = self.parents(nb)
1084 1087 meta = nb + p[0] + p[1] + lookup(nb)
1085 1088 yield changegroup.genchunk("%s%s" % (meta, d))
1086 1089
1087 1090 yield changegroup.closechunk()
1088 1091
1089 1092 def addgroup(self, revs, linkmapper, transaction, unique=0):
1090 1093 """
1091 1094 add a delta group
1092 1095
1093 1096 given a set of deltas, add them to the revision log. the
1094 1097 first delta is against its parent, which should be in our
1095 1098 log, the rest are against the previous delta.
1096 1099 """
1097 1100
1098 1101 #track the base of the current delta log
1099 1102 r = self.count()
1100 1103 t = r - 1
1101 1104 node = None
1102 1105
1103 1106 base = prev = nullrev
1104 1107 start = end = textlen = 0
1105 1108 if r:
1106 1109 end = self.end(t)
1107 1110
1108 1111 ifh = self.opener(self.indexfile, "a+")
1109 1112 isize = r * self._io.size
1110 1113 if self._inline:
1111 1114 transaction.add(self.indexfile, end + isize, r)
1112 1115 dfh = None
1113 1116 else:
1114 1117 transaction.add(self.indexfile, isize, r)
1115 1118 transaction.add(self.datafile, end)
1116 1119 dfh = self.opener(self.datafile, "a")
1117 1120
1118 1121 # loop through our set of deltas
1119 1122 chain = None
1120 1123 for chunk in revs:
1121 1124 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1122 1125 link = linkmapper(cs)
1123 1126 if node in self.nodemap:
1124 1127 # this can happen if two branches make the same change
1125 1128 # if unique:
1126 1129 # raise RevlogError(_("already have %s") % hex(node[:4]))
1127 1130 chain = node
1128 1131 continue
1129 1132 delta = chunk[80:]
1130 1133
1131 1134 for p in (p1, p2):
1132 1135 if not p in self.nodemap:
1133 1136 raise LookupError(_("unknown parent %s") % short(p))
1134 1137
1135 1138 if not chain:
1136 1139 # retrieve the parent revision of the delta chain
1137 1140 chain = p1
1138 1141 if not chain in self.nodemap:
1139 1142 raise LookupError(_("unknown base %s") % short(chain[:4]))
1140 1143
1141 1144 # full versions are inserted when the needed deltas become
1142 1145 # comparable to the uncompressed text or when the previous
1143 1146 # version is not the one we have a delta against. We use
1144 1147 # the size of the previous full rev as a proxy for the
1145 1148 # current size.
1146 1149
1147 1150 if chain == prev:
1148 1151 tempd = compress(delta)
1149 1152 cdelta = tempd[0] + tempd[1]
1150 1153 textlen = mdiff.patchedsize(textlen, delta)
1151 1154
1152 1155 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
1153 1156 # flush our writes here so we can read it in revision
1154 1157 if dfh:
1155 1158 dfh.flush()
1156 1159 ifh.flush()
1157 1160 text = self.revision(chain)
1158 1161 text = mdiff.patches(text, [delta])
1159 1162 chk = self._addrevision(text, transaction, link, p1, p2, None,
1160 1163 ifh, dfh)
1161 1164 if not dfh and not self._inline:
1162 1165 # addrevision switched from inline to conventional
1163 1166 # reopen the index
1164 1167 dfh = self.opener(self.datafile, "a")
1165 1168 ifh = self.opener(self.indexfile, "a")
1166 1169 if chk != node:
1167 1170 raise RevlogError(_("consistency error adding group"))
1168 1171 textlen = len(text)
1169 1172 else:
1170 1173 e = (offset_type(end, 0), len(cdelta), textlen, base,
1171 1174 link, self.rev(p1), self.rev(p2), node)
1172 1175 self.index.insert(-1, e)
1173 1176 self.nodemap[node] = r
1174 1177 entry = self._io.packentry(e, self.node, self.version)
1175 1178 if self._inline:
1176 1179 ifh.write(entry)
1177 1180 ifh.write(cdelta)
1178 1181 self.checkinlinesize(transaction, ifh)
1179 1182 if not self._inline:
1180 1183 dfh = self.opener(self.datafile, "a")
1181 1184 ifh = self.opener(self.indexfile, "a")
1182 1185 else:
1183 1186 dfh.write(cdelta)
1184 1187 ifh.write(entry)
1185 1188
1186 1189 t, r, chain, prev = r, r + 1, node, node
1187 1190 base = self.base(t)
1188 1191 start = self.start(base)
1189 1192 end = self.end(t)
1190 1193
1191 1194 return node
1192 1195
1193 1196 def strip(self, rev, minlink):
1194 1197 if self.count() == 0 or rev >= self.count():
1195 1198 return
1196 1199
1197 1200 if isinstance(self.index, lazyindex):
1198 1201 self._loadindexmap()
1199 1202
1200 1203 # When stripping away a revision, we need to make sure it
1201 1204 # does not actually belong to an older changeset.
1202 1205 # The minlink parameter defines the oldest revision
1203 1206 # we're allowed to strip away.
1204 1207 while minlink > self.index[rev][4]:
1205 1208 rev += 1
1206 1209 if rev >= self.count():
1207 1210 return
1208 1211
1209 1212 # first truncate the files on disk
1210 1213 end = self.start(rev)
1211 1214 if not self._inline:
1212 1215 df = self.opener(self.datafile, "a")
1213 1216 df.truncate(end)
1214 1217 end = rev * self._io.size
1215 1218 else:
1216 1219 end += rev * self._io.size
1217 1220
1218 1221 indexf = self.opener(self.indexfile, "a")
1219 1222 indexf.truncate(end)
1220 1223
1221 1224 # then reset internal state in memory to forget those revisions
1222 1225 self._cache = None
1223 1226 self._chunkcache = None
1224 1227 for x in xrange(rev, self.count()):
1225 1228 del self.nodemap[self.node(x)]
1226 1229
1227 1230 del self.index[rev:-1]
1228 1231
1229 1232 def checksize(self):
1230 1233 expected = 0
1231 1234 if self.count():
1232 1235 expected = self.end(self.count() - 1)
1233 1236
1234 1237 try:
1235 1238 f = self.opener(self.datafile)
1236 1239 f.seek(0, 2)
1237 1240 actual = f.tell()
1238 1241 dd = actual - expected
1239 1242 except IOError, inst:
1240 1243 if inst.errno != errno.ENOENT:
1241 1244 raise
1242 1245 dd = 0
1243 1246
1244 1247 try:
1245 1248 f = self.opener(self.indexfile)
1246 1249 f.seek(0, 2)
1247 1250 actual = f.tell()
1248 1251 s = self._io.size
1249 1252 i = actual / s
1250 1253 di = actual - (i * s)
1251 1254 if self._inline:
1252 1255 databytes = 0
1253 1256 for r in xrange(self.count()):
1254 1257 databytes += self.length(r)
1255 1258 dd = 0
1256 1259 di = actual - self.count() * s - databytes
1257 1260 except IOError, inst:
1258 1261 if inst.errno != errno.ENOENT:
1259 1262 raise
1260 1263 di = 0
1261 1264
1262 1265 return (dd, di)
General Comments 0
You need to be logged in to leave comments. Login now