##// END OF EJS Templates
revlog: pull chunkcache back into revlog
Matt Mackall -
r4983:4dbcfc6e default
parent child Browse files
Show More
@@ -1,1277 +1,1278 b''
1 1 """
2 2 revlog.py - storage back-end for mercurial
3 3
4 4 This provides efficient delta storage with O(1) retrieve and append
5 5 and O(changes) merge between branches
6 6
7 7 Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
8 8
9 9 This software may be used and distributed according to the terms
10 10 of the GNU General Public License, incorporated herein by reference.
11 11 """
12 12
13 13 from node import *
14 14 from i18n import _
15 15 import binascii, changegroup, errno, ancestor, mdiff, os
16 16 import sha, struct, util, zlib
17 17
18 18 # revlog version strings
19 19 REVLOGV0 = 0
20 20 REVLOGNG = 1
21 21
22 22 # revlog flags
23 23 REVLOGNGINLINEDATA = (1 << 16)
24 24 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
25 25
26 26 REVLOG_DEFAULT_FORMAT = REVLOGNG
27 27 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
28 28
29 29 def hash(text, p1, p2):
30 30 """generate a hash from the given text and its parent hashes
31 31
32 32 This hash combines both the current file contents and its history
33 33 in a manner that makes it easy to distinguish nodes with the same
34 34 content in the revision graph.
35 35 """
36 36 l = [p1, p2]
37 37 l.sort()
38 38 s = sha.new(l[0])
39 39 s.update(l[1])
40 40 s.update(text)
41 41 return s.digest()
42 42
43 43 def compress(text):
44 44 """ generate a possibly-compressed representation of text """
45 45 if not text:
46 46 return ("", text)
47 47 if len(text) < 44:
48 48 if text[0] == '\0':
49 49 return ("", text)
50 50 return ('u', text)
51 51 bin = zlib.compress(text)
52 52 if len(bin) > len(text):
53 53 if text[0] == '\0':
54 54 return ("", text)
55 55 return ('u', text)
56 56 return ("", bin)
57 57
58 58 def decompress(bin):
59 59 """ decompress the given input """
60 60 if not bin:
61 61 return bin
62 62 t = bin[0]
63 63 if t == '\0':
64 64 return bin
65 65 if t == 'x':
66 66 return zlib.decompress(bin)
67 67 if t == 'u':
68 68 return bin[1:]
69 69 raise RevlogError(_("unknown compression type %r") % t)
70 70
71 71 indexformatv0 = ">4l20s20s20s"
72 72 v0shaoffset = 56
73 73 # index ng:
74 74 # 6 bytes offset
75 75 # 2 bytes flags
76 76 # 4 bytes compressed length
77 77 # 4 bytes uncompressed length
78 78 # 4 bytes: base rev
79 79 # 4 bytes link rev
80 80 # 4 bytes parent 1 rev
81 81 # 4 bytes parent 2 rev
82 82 # 32 bytes: nodeid
83 83 indexformatng = ">Qiiiiii20s12x"
84 84 ngshaoffset = 32
85 85 versionformat = ">I"
86 86
87 87 class lazyparser(object):
88 88 """
89 89 this class avoids the need to parse the entirety of large indices
90 90 """
91 91
92 92 # lazyparser is not safe to use on windows if win32 extensions not
93 93 # available. it keeps file handle open, which make it not possible
94 94 # to break hardlinks on local cloned repos.
95 95 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
96 96 hasattr(util, 'win32api'))
97 97
98 98 def __init__(self, dataf, size):
99 99 self.dataf = dataf
100 100 self.s = struct.calcsize(indexformatng)
101 101 self.datasize = size
102 102 self.l = size/self.s
103 103 self.index = [None] * self.l
104 104 self.map = {nullid: nullrev}
105 105 self.allmap = 0
106 106 self.all = 0
107 107 self.mapfind_count = 0
108 108
109 109 def loadmap(self):
110 110 """
111 111 during a commit, we need to make sure the rev being added is
112 112 not a duplicate. This requires loading the entire index,
113 113 which is fairly slow. loadmap can load up just the node map,
114 114 which takes much less time.
115 115 """
116 116 if self.allmap:
117 117 return
118 118 end = self.datasize
119 119 self.allmap = 1
120 120 cur = 0
121 121 count = 0
122 122 blocksize = self.s * 256
123 123 self.dataf.seek(0)
124 124 while cur < end:
125 125 data = self.dataf.read(blocksize)
126 126 off = 0
127 127 for x in xrange(256):
128 128 n = data[off + ngshaoffset:off + ngshaoffset + 20]
129 129 self.map[n] = count
130 130 count += 1
131 131 if count >= self.l:
132 132 break
133 133 off += self.s
134 134 cur += blocksize
135 135
136 136 def loadblock(self, blockstart, blocksize, data=None):
137 137 if self.all:
138 138 return
139 139 if data is None:
140 140 self.dataf.seek(blockstart)
141 141 if blockstart + blocksize > self.datasize:
142 142 # the revlog may have grown since we've started running,
143 143 # but we don't have space in self.index for more entries.
144 144 # limit blocksize so that we don't get too much data.
145 145 blocksize = max(self.datasize - blockstart, 0)
146 146 data = self.dataf.read(blocksize)
147 147 lend = len(data) / self.s
148 148 i = blockstart / self.s
149 149 off = 0
150 150 # lazyindex supports __delitem__
151 151 if lend > len(self.index) - i:
152 152 lend = len(self.index) - i
153 153 for x in xrange(lend):
154 154 if self.index[i + x] == None:
155 155 b = data[off : off + self.s]
156 156 self.index[i + x] = b
157 157 n = b[ngshaoffset:ngshaoffset + 20]
158 158 self.map[n] = i + x
159 159 off += self.s
160 160
161 161 def findnode(self, node):
162 162 """search backwards through the index file for a specific node"""
163 163 if self.allmap:
164 164 return None
165 165
166 166 # hg log will cause many many searches for the manifest
167 167 # nodes. After we get called a few times, just load the whole
168 168 # thing.
169 169 if self.mapfind_count > 8:
170 170 self.loadmap()
171 171 if node in self.map:
172 172 return node
173 173 return None
174 174 self.mapfind_count += 1
175 175 last = self.l - 1
176 176 while self.index[last] != None:
177 177 if last == 0:
178 178 self.all = 1
179 179 self.allmap = 1
180 180 return None
181 181 last -= 1
182 182 end = (last + 1) * self.s
183 183 blocksize = self.s * 256
184 184 while end >= 0:
185 185 start = max(end - blocksize, 0)
186 186 self.dataf.seek(start)
187 187 data = self.dataf.read(end - start)
188 188 findend = end - start
189 189 while True:
190 190 # we're searching backwards, so weh have to make sure
191 191 # we don't find a changeset where this node is a parent
192 192 off = data.rfind(node, 0, findend)
193 193 findend = off
194 194 if off >= 0:
195 195 i = off / self.s
196 196 off = i * self.s
197 197 n = data[off + ngshaoffset:off + ngshaoffset + 20]
198 198 if n == node:
199 199 self.map[n] = i + start / self.s
200 200 return node
201 201 else:
202 202 break
203 203 end -= blocksize
204 204 return None
205 205
206 206 def loadindex(self, i=None, end=None):
207 207 if self.all:
208 208 return
209 209 all = False
210 210 if i == None:
211 211 blockstart = 0
212 212 blocksize = (512 / self.s) * self.s
213 213 end = self.datasize
214 214 all = True
215 215 else:
216 216 if end:
217 217 blockstart = i * self.s
218 218 end = end * self.s
219 219 blocksize = end - blockstart
220 220 else:
221 221 blockstart = (i & ~63) * self.s
222 222 blocksize = self.s * 64
223 223 end = blockstart + blocksize
224 224 while blockstart < end:
225 225 self.loadblock(blockstart, blocksize)
226 226 blockstart += blocksize
227 227 if all:
228 228 self.all = True
229 229
230 230 class lazyindex(object):
231 231 """a lazy version of the index array"""
232 232 def __init__(self, parser):
233 233 self.p = parser
234 234 def __len__(self):
235 235 return len(self.p.index)
236 236 def load(self, pos):
237 237 if pos < 0:
238 238 pos += len(self.p.index)
239 239 self.p.loadindex(pos)
240 240 return self.p.index[pos]
241 241 def __getitem__(self, pos):
242 242 ret = self.p.index[pos] or self.load(pos)
243 243 if isinstance(ret, str):
244 244 ret = struct.unpack(indexformatng, ret)
245 245 return ret
246 246 def __setitem__(self, pos, item):
247 247 self.p.index[pos] = item
248 248 def __delitem__(self, pos):
249 249 del self.p.index[pos]
250 250 def insert(self, pos, e):
251 251 self.p.index.insert(pos, e)
252 252 def append(self, e):
253 253 self.p.index.append(e)
254 254
255 255 class lazymap(object):
256 256 """a lazy version of the node map"""
257 257 def __init__(self, parser):
258 258 self.p = parser
259 259 def load(self, key):
260 260 n = self.p.findnode(key)
261 261 if n == None:
262 262 raise KeyError(key)
263 263 def __contains__(self, key):
264 264 if key in self.p.map:
265 265 return True
266 266 self.p.loadmap()
267 267 return key in self.p.map
268 268 def __iter__(self):
269 269 yield nullid
270 270 for i in xrange(self.p.l):
271 271 ret = self.p.index[i]
272 272 if not ret:
273 273 self.p.loadindex(i)
274 274 ret = self.p.index[i]
275 275 if isinstance(ret, str):
276 276 ret = struct.unpack(indexformatng, ret)
277 277 yield ret[7]
278 278 def __getitem__(self, key):
279 279 try:
280 280 return self.p.map[key]
281 281 except KeyError:
282 282 try:
283 283 self.load(key)
284 284 return self.p.map[key]
285 285 except KeyError:
286 286 raise KeyError("node " + hex(key))
287 287 def __setitem__(self, key, val):
288 288 self.p.map[key] = val
289 289 def __delitem__(self, key):
290 290 del self.p.map[key]
291 291
292 292 class RevlogError(Exception):
293 293 pass
294 294 class LookupError(RevlogError):
295 295 pass
296 296
297 297 def getoffset(q):
298 298 if q & 0xFFFF:
299 299 raise RevlogError(_('incompatible revision flag %x') % q)
300 300 return int(q >> 16)
301 301
302 302 def gettype(q):
303 303 return int(q & 0xFFFF)
304 304
305 305 def offset_type(offset, type):
306 306 return long(long(offset) << 16 | type)
307 307
308 308 class revlogoldio(object):
309 309 def __init__(self):
310 self.chunkcache = None
311 310 self.size = struct.calcsize(indexformatv0)
312 311
313 312 def parseindex(self, fp, st, inline):
314 313 s = self.size
315 314 index = []
316 315 nodemap = {nullid: nullrev}
317 316 n = off = 0
318 317 data = fp.read()
319 318 l = len(data)
320 319 while off + s <= l:
321 320 cur = data[off:off + s]
322 321 off += s
323 322 e = struct.unpack(indexformatv0, cur)
324 323 # transform to revlogv1 format
325 324 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
326 325 nodemap[e[4]], nodemap[e[5]], e[6])
327 326 index.append(e2)
328 327 nodemap[e[6]] = n
329 328 n += 1
330 329
331 return index, nodemap
330 return index, nodemap, None
332 331
333 332 class revlogio(object):
334 333 def __init__(self):
335 self.chunkcache = None
336 334 self.size = struct.calcsize(indexformatng)
337 335
338 336 def parseindex(self, fp, st, inline):
339 337 if (lazyparser.safe_to_use and not inline and
340 338 st and st.st_size > 1000000):
341 339 # big index, let's parse it on demand
342 340 parser = lazyparser(fp, st.st_size)
343 341 index = lazyindex(parser)
344 342 nodemap = lazymap(parser)
345 343 e = list(index[0])
346 344 type = gettype(e[0])
347 345 e[0] = offset_type(0, type)
348 346 index[0] = e
349 return index, nodemap
347 return index, nodemap, None
350 348
351 349 s = self.size
350 cache = None
352 351 index = []
353 352 nodemap = {nullid: nullrev}
354 353 n = off = 0
355 354 # if we're not using lazymap, always read the whole index
356 355 data = fp.read()
357 356 l = len(data)
358 357 if inline:
359 self.chunkcache = (0, data)
358 cache = (0, data)
360 359 while off + s <= l:
361 360 e = struct.unpack(indexformatng, data[off:off + s])
362 361 index.append(e)
363 362 nodemap[e[7]] = n
364 363 n += 1
365 364 off += s
366 365 if inline:
367 366 if e[1] < 0:
368 367 break
369 368 off += e[1]
370 369
371 370 e = list(index[0])
372 371 type = gettype(e[0])
373 372 e[0] = offset_type(0, type)
374 373 index[0] = e
375 374
376 return index, nodemap
375 return index, nodemap, cache
377 376
378 377 class revlog(object):
379 378 """
380 379 the underlying revision storage object
381 380
382 381 A revlog consists of two parts, an index and the revision data.
383 382
384 383 The index is a file with a fixed record size containing
385 384 information on each revision, includings its nodeid (hash), the
386 385 nodeids of its parents, the position and offset of its data within
387 386 the data file, and the revision it's based on. Finally, each entry
388 387 contains a linkrev entry that can serve as a pointer to external
389 388 data.
390 389
391 390 The revision data itself is a linear collection of data chunks.
392 391 Each chunk represents a revision and is usually represented as a
393 392 delta against the previous chunk. To bound lookup time, runs of
394 393 deltas are limited to about 2 times the length of the original
395 394 version data. This makes retrieval of a version proportional to
396 395 its size, or O(1) relative to the number of revisions.
397 396
398 397 Both pieces of the revlog are written to in an append-only
399 398 fashion, which means we never need to rewrite a file to insert or
400 399 remove data, and can use some simple techniques to avoid the need
401 400 for locking while reading.
402 401 """
403 402 def __init__(self, opener, indexfile):
404 403 """
405 404 create a revlog object
406 405
407 406 opener is a function that abstracts the file opening operation
408 407 and can be used to implement COW semantics or the like.
409 408 """
410 409 self.indexfile = indexfile
411 410 self.datafile = indexfile[:-2] + ".d"
412 411 self.opener = opener
413 412
414 413 self.indexstat = None
415 414 self.cache = None
415 self._chunkcache = None
416 416 self.defversion = REVLOG_DEFAULT_VERSION
417 417 if hasattr(opener, "defversion"):
418 418 self.defversion = opener.defversion
419 419 if self.defversion & REVLOGNG:
420 420 self.defversion |= REVLOGNGINLINEDATA
421 421 self._load()
422 422
423 423 def _load(self):
424 424 v = self.defversion
425 425 try:
426 426 f = self.opener(self.indexfile)
427 427 i = f.read(4)
428 428 f.seek(0)
429 429 if len(i) > 0:
430 430 v = struct.unpack(versionformat, i)[0]
431 431 except IOError, inst:
432 432 if inst.errno != errno.ENOENT:
433 433 raise
434 434 i = ""
435 435 else:
436 436 try:
437 437 st = util.fstat(f)
438 438 except AttributeError, inst:
439 439 st = None
440 440 else:
441 441 oldst = self.indexstat
442 442 if (oldst and st.st_dev == oldst.st_dev
443 443 and st.st_ino == oldst.st_ino
444 444 and st.st_mtime == oldst.st_mtime
445 445 and st.st_ctime == oldst.st_ctime
446 446 and st.st_size == oldst.st_size):
447 447 return
448 448 self.indexstat = st
449 449 flags = v & ~0xFFFF
450 450 fmt = v & 0xFFFF
451 451 if fmt == REVLOGV0:
452 452 if flags:
453 453 raise RevlogError(_("index %s unknown flags %#04x for format v0")
454 454 % (self.indexfile, flags >> 16))
455 455 elif fmt == REVLOGNG:
456 456 if flags & ~REVLOGNGINLINEDATA:
457 457 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
458 458 % (self.indexfile, flags >> 16))
459 459 else:
460 460 raise RevlogError(_("index %s unknown format %d")
461 461 % (self.indexfile, fmt))
462 462 self.version = v
463 463 self._inline = v & REVLOGNGINLINEDATA
464 464 self.nodemap = {nullid: nullrev}
465 465 self.index = []
466 466 self._io = revlogio()
467 467 if self.version == REVLOGV0:
468 468 self._io = revlogoldio()
469 469 if i:
470 self.index, self.nodemap = self._io.parseindex(f, st, self._inline)
470 d = self._io.parseindex(f, st, self._inline)
471 self.index, self.nodemap, self._chunkcache = d
471 472 # add the magic null revision at -1
472 473 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
473 474
474 475 def _loadindex(self, start, end):
475 476 """load a block of indexes all at once from the lazy parser"""
476 477 if isinstance(self.index, lazyindex):
477 478 self.index.p.loadindex(start, end)
478 479
479 480 def _loadindexmap(self):
480 481 """loads both the map and the index from the lazy parser"""
481 482 if isinstance(self.index, lazyindex):
482 483 p = self.index.p
483 484 p.loadindex()
484 485 self.nodemap = p.map
485 486
486 487 def _loadmap(self):
487 488 """loads the map from the lazy parser"""
488 489 if isinstance(self.nodemap, lazymap):
489 490 self.nodemap.p.loadmap()
490 491 self.nodemap = self.nodemap.p.map
491 492
492 493 def tip(self):
493 494 return self.node(len(self.index) - 2)
494 495 def count(self):
495 496 return len(self.index) - 1
496 497
497 498 def rev(self, node):
498 499 try:
499 500 return self.nodemap[node]
500 501 except KeyError:
501 502 raise LookupError(_('%s: no node %s') % (self.indexfile, hex(node)))
502 503 def node(self, rev):
503 504 return self.index[rev][7]
504 505 def linkrev(self, node):
505 506 return self.index[self.rev(node)][4]
506 507 def parents(self, node):
507 508 d = self.index[self.rev(node)][5:7]
508 509 return (self.node(d[0]), self.node(d[1]))
509 510 def parentrevs(self, rev):
510 511 return self.index[rev][5:7]
511 512 def start(self, rev):
512 513 return getoffset(self.index[rev][0])
513 514 def end(self, rev):
514 515 return self.start(rev) + self.length(rev)
515 516 def length(self, rev):
516 517 return self.index[rev][1]
517 518 def base(self, rev):
518 519 return self.index[rev][3]
519 520
520 521 def size(self, rev):
521 522 """return the length of the uncompressed text for a given revision"""
522 523 l = self.index[rev][2]
523 524 if l >= 0:
524 525 return l
525 526
526 527 t = self.revision(self.node(rev))
527 528 return len(t)
528 529
529 530 # alternate implementation, The advantage to this code is it
530 531 # will be faster for a single revision. But, the results are not
531 532 # cached, so finding the size of every revision will be slower.
532 533 """
533 534 if self.cache and self.cache[1] == rev:
534 535 return len(self.cache[2])
535 536
536 537 base = self.base(rev)
537 538 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
538 539 base = self.cache[1]
539 540 text = self.cache[2]
540 541 else:
541 542 text = self.revision(self.node(base))
542 543
543 544 l = len(text)
544 545 for x in xrange(base + 1, rev + 1):
545 546 l = mdiff.patchedsize(l, self.chunk(x))
546 547 return l
547 548 """
548 549
549 550 def reachable(self, node, stop=None):
550 551 """return a hash of all nodes ancestral to a given node, including
551 552 the node itself, stopping when stop is matched"""
552 553 reachable = {}
553 554 visit = [node]
554 555 reachable[node] = 1
555 556 if stop:
556 557 stopn = self.rev(stop)
557 558 else:
558 559 stopn = 0
559 560 while visit:
560 561 n = visit.pop(0)
561 562 if n == stop:
562 563 continue
563 564 if n == nullid:
564 565 continue
565 566 for p in self.parents(n):
566 567 if self.rev(p) < stopn:
567 568 continue
568 569 if p not in reachable:
569 570 reachable[p] = 1
570 571 visit.append(p)
571 572 return reachable
572 573
573 574 def nodesbetween(self, roots=None, heads=None):
574 575 """Return a tuple containing three elements. Elements 1 and 2 contain
575 576 a final list bases and heads after all the unreachable ones have been
576 577 pruned. Element 0 contains a topologically sorted list of all
577 578
578 579 nodes that satisfy these constraints:
579 580 1. All nodes must be descended from a node in roots (the nodes on
580 581 roots are considered descended from themselves).
581 582 2. All nodes must also be ancestors of a node in heads (the nodes in
582 583 heads are considered to be their own ancestors).
583 584
584 585 If roots is unspecified, nullid is assumed as the only root.
585 586 If heads is unspecified, it is taken to be the output of the
586 587 heads method (i.e. a list of all nodes in the repository that
587 588 have no children)."""
588 589 nonodes = ([], [], [])
589 590 if roots is not None:
590 591 roots = list(roots)
591 592 if not roots:
592 593 return nonodes
593 594 lowestrev = min([self.rev(n) for n in roots])
594 595 else:
595 596 roots = [nullid] # Everybody's a descendent of nullid
596 597 lowestrev = nullrev
597 598 if (lowestrev == nullrev) and (heads is None):
598 599 # We want _all_ the nodes!
599 600 return ([self.node(r) for r in xrange(0, self.count())],
600 601 [nullid], list(self.heads()))
601 602 if heads is None:
602 603 # All nodes are ancestors, so the latest ancestor is the last
603 604 # node.
604 605 highestrev = self.count() - 1
605 606 # Set ancestors to None to signal that every node is an ancestor.
606 607 ancestors = None
607 608 # Set heads to an empty dictionary for later discovery of heads
608 609 heads = {}
609 610 else:
610 611 heads = list(heads)
611 612 if not heads:
612 613 return nonodes
613 614 ancestors = {}
614 615 # Turn heads into a dictionary so we can remove 'fake' heads.
615 616 # Also, later we will be using it to filter out the heads we can't
616 617 # find from roots.
617 618 heads = dict.fromkeys(heads, 0)
618 619 # Start at the top and keep marking parents until we're done.
619 620 nodestotag = heads.keys()
620 621 # Remember where the top was so we can use it as a limit later.
621 622 highestrev = max([self.rev(n) for n in nodestotag])
622 623 while nodestotag:
623 624 # grab a node to tag
624 625 n = nodestotag.pop()
625 626 # Never tag nullid
626 627 if n == nullid:
627 628 continue
628 629 # A node's revision number represents its place in a
629 630 # topologically sorted list of nodes.
630 631 r = self.rev(n)
631 632 if r >= lowestrev:
632 633 if n not in ancestors:
633 634 # If we are possibly a descendent of one of the roots
634 635 # and we haven't already been marked as an ancestor
635 636 ancestors[n] = 1 # Mark as ancestor
636 637 # Add non-nullid parents to list of nodes to tag.
637 638 nodestotag.extend([p for p in self.parents(n) if
638 639 p != nullid])
639 640 elif n in heads: # We've seen it before, is it a fake head?
640 641 # So it is, real heads should not be the ancestors of
641 642 # any other heads.
642 643 heads.pop(n)
643 644 if not ancestors:
644 645 return nonodes
645 646 # Now that we have our set of ancestors, we want to remove any
646 647 # roots that are not ancestors.
647 648
648 649 # If one of the roots was nullid, everything is included anyway.
649 650 if lowestrev > nullrev:
650 651 # But, since we weren't, let's recompute the lowest rev to not
651 652 # include roots that aren't ancestors.
652 653
653 654 # Filter out roots that aren't ancestors of heads
654 655 roots = [n for n in roots if n in ancestors]
655 656 # Recompute the lowest revision
656 657 if roots:
657 658 lowestrev = min([self.rev(n) for n in roots])
658 659 else:
659 660 # No more roots? Return empty list
660 661 return nonodes
661 662 else:
662 663 # We are descending from nullid, and don't need to care about
663 664 # any other roots.
664 665 lowestrev = nullrev
665 666 roots = [nullid]
666 667 # Transform our roots list into a 'set' (i.e. a dictionary where the
667 668 # values don't matter.
668 669 descendents = dict.fromkeys(roots, 1)
669 670 # Also, keep the original roots so we can filter out roots that aren't
670 671 # 'real' roots (i.e. are descended from other roots).
671 672 roots = descendents.copy()
672 673 # Our topologically sorted list of output nodes.
673 674 orderedout = []
674 675 # Don't start at nullid since we don't want nullid in our output list,
675 676 # and if nullid shows up in descedents, empty parents will look like
676 677 # they're descendents.
677 678 for r in xrange(max(lowestrev, 0), highestrev + 1):
678 679 n = self.node(r)
679 680 isdescendent = False
680 681 if lowestrev == nullrev: # Everybody is a descendent of nullid
681 682 isdescendent = True
682 683 elif n in descendents:
683 684 # n is already a descendent
684 685 isdescendent = True
685 686 # This check only needs to be done here because all the roots
686 687 # will start being marked is descendents before the loop.
687 688 if n in roots:
688 689 # If n was a root, check if it's a 'real' root.
689 690 p = tuple(self.parents(n))
690 691 # If any of its parents are descendents, it's not a root.
691 692 if (p[0] in descendents) or (p[1] in descendents):
692 693 roots.pop(n)
693 694 else:
694 695 p = tuple(self.parents(n))
695 696 # A node is a descendent if either of its parents are
696 697 # descendents. (We seeded the dependents list with the roots
697 698 # up there, remember?)
698 699 if (p[0] in descendents) or (p[1] in descendents):
699 700 descendents[n] = 1
700 701 isdescendent = True
701 702 if isdescendent and ((ancestors is None) or (n in ancestors)):
702 703 # Only include nodes that are both descendents and ancestors.
703 704 orderedout.append(n)
704 705 if (ancestors is not None) and (n in heads):
705 706 # We're trying to figure out which heads are reachable
706 707 # from roots.
707 708 # Mark this head as having been reached
708 709 heads[n] = 1
709 710 elif ancestors is None:
710 711 # Otherwise, we're trying to discover the heads.
711 712 # Assume this is a head because if it isn't, the next step
712 713 # will eventually remove it.
713 714 heads[n] = 1
714 715 # But, obviously its parents aren't.
715 716 for p in self.parents(n):
716 717 heads.pop(p, None)
717 718 heads = [n for n in heads.iterkeys() if heads[n] != 0]
718 719 roots = roots.keys()
719 720 assert orderedout
720 721 assert roots
721 722 assert heads
722 723 return (orderedout, roots, heads)
723 724
724 725 def heads(self, start=None, stop=None):
725 726 """return the list of all nodes that have no children
726 727
727 728 if start is specified, only heads that are descendants of
728 729 start will be returned
729 730 if stop is specified, it will consider all the revs from stop
730 731 as if they had no children
731 732 """
732 733 if start is None:
733 734 start = nullid
734 735 if stop is None:
735 736 stop = []
736 737 stoprevs = dict.fromkeys([self.rev(n) for n in stop])
737 738 startrev = self.rev(start)
738 739 reachable = {startrev: 1}
739 740 heads = {startrev: 1}
740 741
741 742 parentrevs = self.parentrevs
742 743 for r in xrange(startrev + 1, self.count()):
743 744 for p in parentrevs(r):
744 745 if p in reachable:
745 746 if r not in stoprevs:
746 747 reachable[r] = 1
747 748 heads[r] = 1
748 749 if p in heads and p not in stoprevs:
749 750 del heads[p]
750 751
751 752 return [self.node(r) for r in heads]
752 753
753 754 def children(self, node):
754 755 """find the children of a given node"""
755 756 c = []
756 757 p = self.rev(node)
757 758 for r in range(p + 1, self.count()):
758 759 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
759 760 if prevs:
760 761 for pr in prevs:
761 762 if pr == p:
762 763 c.append(self.node(r))
763 764 elif p == nullrev:
764 765 c.append(self.node(r))
765 766 return c
766 767
767 768 def _match(self, id):
768 769 if isinstance(id, (long, int)):
769 770 # rev
770 771 return self.node(id)
771 772 if len(id) == 20:
772 773 # possibly a binary node
773 774 # odds of a binary node being all hex in ASCII are 1 in 10**25
774 775 try:
775 776 node = id
776 777 r = self.rev(node) # quick search the index
777 778 return node
778 779 except LookupError:
779 780 pass # may be partial hex id
780 781 try:
781 782 # str(rev)
782 783 rev = int(id)
783 784 if str(rev) != id:
784 785 raise ValueError
785 786 if rev < 0:
786 787 rev = self.count() + rev
787 788 if rev < 0 or rev >= self.count():
788 789 raise ValueError
789 790 return self.node(rev)
790 791 except (ValueError, OverflowError):
791 792 pass
792 793 if len(id) == 40:
793 794 try:
794 795 # a full hex nodeid?
795 796 node = bin(id)
796 797 r = self.rev(node)
797 798 return node
798 799 except TypeError:
799 800 pass
800 801
801 802 def _partialmatch(self, id):
802 803 if len(id) < 40:
803 804 try:
804 805 # hex(node)[:...]
805 806 bin_id = bin(id[:len(id) & ~1]) # grab an even number of digits
806 807 node = None
807 808 for n in self.nodemap:
808 809 if n.startswith(bin_id) and hex(n).startswith(id):
809 810 if node is not None:
810 811 raise LookupError(_("Ambiguous identifier"))
811 812 node = n
812 813 if node is not None:
813 814 return node
814 815 except TypeError:
815 816 pass
816 817
817 818 def lookup(self, id):
818 819 """locate a node based on:
819 820 - revision number or str(revision number)
820 821 - nodeid or subset of hex nodeid
821 822 """
822 823 n = self._match(id)
823 824 if n is not None:
824 825 return n
825 826 n = self._partialmatch(id)
826 827 if n:
827 828 return n
828 829
829 830 raise LookupError(_("No match found"))
830 831
831 832 def cmp(self, node, text):
832 833 """compare text with a given file revision"""
833 834 p1, p2 = self.parents(node)
834 835 return hash(text, p1, p2) != node
835 836
836 837 def diff(self, a, b):
837 838 """return a delta between two revisions"""
838 839 return mdiff.textdiff(a, b)
839 840
840 841 def patches(self, t, pl):
841 842 """apply a list of patches to a string"""
842 843 return mdiff.patches(t, pl)
843 844
844 845 def chunk(self, rev, df=None, cachelen=4096):
845 846 start, length = self.start(rev), self.length(rev)
846 847 inline = self._inline
847 848 if inline:
848 849 start += (rev + 1) * self._io.size
849 850 end = start + length
850 851 def loadcache(df):
851 852 cache_length = max(cachelen, length) # 4k
852 853 if not df:
853 854 if inline:
854 855 df = self.opener(self.indexfile)
855 856 else:
856 857 df = self.opener(self.datafile)
857 858 df.seek(start)
858 self._io.chunkcache = (start, df.read(cache_length))
859 self._chunkcache = (start, df.read(cache_length))
859 860
860 if not self._io.chunkcache:
861 if not self._chunkcache:
861 862 loadcache(df)
862 863
863 cache_start = self._io.chunkcache[0]
864 cache_end = cache_start + len(self._io.chunkcache[1])
864 cache_start = self._chunkcache[0]
865 cache_end = cache_start + len(self._chunkcache[1])
865 866 if start >= cache_start and end <= cache_end:
866 867 # it is cached
867 868 offset = start - cache_start
868 869 else:
869 870 loadcache(df)
870 871 offset = 0
871 872
872 return decompress(self._io.chunkcache[1][offset:offset + length])
873 return decompress(self._chunkcache[1][offset:offset + length])
873 874
874 875 def delta(self, node):
875 876 """return or calculate a delta between a node and its predecessor"""
876 877 r = self.rev(node)
877 878 return self.revdiff(r - 1, r)
878 879
879 880 def revdiff(self, rev1, rev2):
880 881 """return or calculate a delta between two revisions"""
881 882 b1 = self.base(rev1)
882 883 b2 = self.base(rev2)
883 884 if b1 == b2 and rev1 + 1 == rev2:
884 885 return self.chunk(rev2)
885 886 else:
886 887 return self.diff(self.revision(self.node(rev1)),
887 888 self.revision(self.node(rev2)))
888 889
889 890 def revision(self, node):
890 891 """return an uncompressed revision of a given"""
891 892 if node == nullid:
892 893 return ""
893 894 if self.cache and self.cache[0] == node:
894 895 return self.cache[2]
895 896
896 897 # look up what we need to read
897 898 text = None
898 899 rev = self.rev(node)
899 900 base = self.base(rev)
900 901
901 902 if self._inline:
902 903 # we probably have the whole chunk cached
903 904 df = None
904 905 else:
905 906 df = self.opener(self.datafile)
906 907
907 908 # do we have useful data cached?
908 909 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
909 910 base = self.cache[1]
910 911 text = self.cache[2]
911 912 self._loadindex(base, rev + 1)
912 913 else:
913 914 self._loadindex(base, rev + 1)
914 915 text = self.chunk(base, df=df)
915 916
916 917 bins = []
917 918 for r in xrange(base + 1, rev + 1):
918 919 bins.append(self.chunk(r, df=df))
919 920
920 921 text = self.patches(text, bins)
921 922
922 923 p1, p2 = self.parents(node)
923 924 if node != hash(text, p1, p2):
924 925 raise RevlogError(_("integrity check failed on %s:%d")
925 926 % (self.datafile, rev))
926 927
927 928 self.cache = (node, rev, text)
928 929 return text
929 930
930 931 def checkinlinesize(self, tr, fp=None):
931 932 if not self._inline:
932 933 return
933 934 if not fp:
934 935 fp = self.opener(self.indexfile, 'r')
935 936 fp.seek(0, 2)
936 937 size = fp.tell()
937 938 if size < 131072:
938 939 return
939 940 trinfo = tr.find(self.indexfile)
940 941 if trinfo == None:
941 942 raise RevlogError(_("%s not found in the transaction")
942 943 % self.indexfile)
943 944
944 945 trindex = trinfo[2]
945 946 dataoff = self.start(trindex)
946 947
947 948 tr.add(self.datafile, dataoff)
948 949 df = self.opener(self.datafile, 'w')
949 950 calc = self._io.size
950 951 for r in xrange(self.count()):
951 952 start = self.start(r) + (r + 1) * calc
952 953 length = self.length(r)
953 954 fp.seek(start)
954 955 d = fp.read(length)
955 956 df.write(d)
956 957 fp.close()
957 958 df.close()
958 959 fp = self.opener(self.indexfile, 'w', atomictemp=True)
959 960 self.version &= ~(REVLOGNGINLINEDATA)
960 961 self._inline = False
961 962 if self.count():
962 963 x = self.index[0]
963 964 e = struct.pack(indexformatng, *x)[4:]
964 965 l = struct.pack(versionformat, self.version)
965 966 fp.write(l)
966 967 fp.write(e)
967 968
968 969 for i in xrange(1, self.count()):
969 970 x = self.index[i]
970 971 e = struct.pack(indexformatng, *x)
971 972 fp.write(e)
972 973
973 974 # if we don't call rename, the temp file will never replace the
974 975 # real index
975 976 fp.rename()
976 977
977 978 tr.replace(self.indexfile, trindex * calc)
978 self._io.chunkcache = None
979 self._chunkcache = None
979 980
980 981 def addrevision(self, text, transaction, link, p1, p2, d=None):
981 982 """add a revision to the log
982 983
983 984 text - the revision data to add
984 985 transaction - the transaction object used for rollback
985 986 link - the linkrev data to add
986 987 p1, p2 - the parent nodeids of the revision
987 988 d - an optional precomputed delta
988 989 """
989 990 dfh = None
990 991 if not self._inline:
991 992 dfh = self.opener(self.datafile, "a")
992 993 ifh = self.opener(self.indexfile, "a+")
993 994 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
994 995
995 996 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
996 997 node = hash(text, p1, p2)
997 998 if node in self.nodemap:
998 999 return node
999 1000
1000 1001 curr = self.count()
1001 1002 prev = curr - 1
1002 1003 base = self.base(prev)
1003 1004 offset = self.end(prev)
1004 1005
1005 1006 if curr:
1006 1007 if not d:
1007 1008 ptext = self.revision(self.node(prev))
1008 1009 d = self.diff(ptext, text)
1009 1010 data = compress(d)
1010 1011 l = len(data[1]) + len(data[0])
1011 1012 dist = l + offset - self.start(base)
1012 1013
1013 1014 # full versions are inserted when the needed deltas
1014 1015 # become comparable to the uncompressed text
1015 1016 if not curr or dist > len(text) * 2:
1016 1017 data = compress(text)
1017 1018 l = len(data[1]) + len(data[0])
1018 1019 base = curr
1019 1020
1020 1021 e = (offset_type(offset, 0), l, len(text),
1021 1022 base, link, self.rev(p1), self.rev(p2), node)
1022 1023 self.index.insert(-1, e)
1023 1024 self.nodemap[node] = curr
1024 1025
1025 1026 if self.version == REVLOGV0:
1026 1027 e = (offset, l, base, link, p1, p2, node)
1027 1028 entry = struct.pack(indexformatv0, *e)
1028 1029 else:
1029 1030 entry = struct.pack(indexformatng, *e)
1030 1031 if not curr:
1031 1032 entry = struct.pack(versionformat, self.version) + entry[4:]
1032 1033
1033 1034 if not self._inline:
1034 1035 transaction.add(self.datafile, offset)
1035 1036 transaction.add(self.indexfile, curr * len(entry))
1036 1037 if data[0]:
1037 1038 dfh.write(data[0])
1038 1039 dfh.write(data[1])
1039 1040 dfh.flush()
1040 1041 ifh.write(entry)
1041 1042 else:
1042 1043 ifh.seek(0, 2)
1043 1044 transaction.add(self.indexfile, ifh.tell(), prev)
1044 1045 ifh.write(entry)
1045 1046 ifh.write(data[0])
1046 1047 ifh.write(data[1])
1047 1048 self.checkinlinesize(transaction, ifh)
1048 1049
1049 1050 self.cache = (node, curr, text)
1050 1051 return node
1051 1052
1052 1053 def ancestor(self, a, b):
1053 1054 """calculate the least common ancestor of nodes a and b"""
1054 1055
1055 1056 def parents(rev):
1056 1057 return [p for p in self.parentrevs(rev) if p != nullrev]
1057 1058
1058 1059 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1059 1060 if c is None:
1060 1061 return nullid
1061 1062
1062 1063 return self.node(c)
1063 1064
1064 1065 def group(self, nodelist, lookup, infocollect=None):
1065 1066 """calculate a delta group
1066 1067
1067 1068 Given a list of changeset revs, return a set of deltas and
1068 1069 metadata corresponding to nodes. the first delta is
1069 1070 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1070 1071 have this parent as it has all history before these
1071 1072 changesets. parent is parent[0]
1072 1073 """
1073 1074 revs = [self.rev(n) for n in nodelist]
1074 1075
1075 1076 # if we don't have any revisions touched by these changesets, bail
1076 1077 if not revs:
1077 1078 yield changegroup.closechunk()
1078 1079 return
1079 1080
1080 1081 # add the parent of the first rev
1081 1082 p = self.parents(self.node(revs[0]))[0]
1082 1083 revs.insert(0, self.rev(p))
1083 1084
1084 1085 # build deltas
1085 1086 for d in xrange(0, len(revs) - 1):
1086 1087 a, b = revs[d], revs[d + 1]
1087 1088 nb = self.node(b)
1088 1089
1089 1090 if infocollect is not None:
1090 1091 infocollect(nb)
1091 1092
1092 1093 d = self.revdiff(a, b)
1093 1094 p = self.parents(nb)
1094 1095 meta = nb + p[0] + p[1] + lookup(nb)
1095 1096 yield changegroup.genchunk("%s%s" % (meta, d))
1096 1097
1097 1098 yield changegroup.closechunk()
1098 1099
1099 1100 def addgroup(self, revs, linkmapper, transaction, unique=0):
1100 1101 """
1101 1102 add a delta group
1102 1103
1103 1104 given a set of deltas, add them to the revision log. the
1104 1105 first delta is against its parent, which should be in our
1105 1106 log, the rest are against the previous delta.
1106 1107 """
1107 1108
1108 1109 #track the base of the current delta log
1109 1110 r = self.count()
1110 1111 t = r - 1
1111 1112 node = None
1112 1113
1113 1114 base = prev = nullrev
1114 1115 start = end = textlen = 0
1115 1116 if r:
1116 1117 end = self.end(t)
1117 1118
1118 1119 ifh = self.opener(self.indexfile, "a+")
1119 1120 ifh.seek(0, 2)
1120 1121 transaction.add(self.indexfile, ifh.tell(), self.count())
1121 1122 if self._inline:
1122 1123 dfh = None
1123 1124 else:
1124 1125 transaction.add(self.datafile, end)
1125 1126 dfh = self.opener(self.datafile, "a")
1126 1127
1127 1128 # loop through our set of deltas
1128 1129 chain = None
1129 1130 for chunk in revs:
1130 1131 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1131 1132 link = linkmapper(cs)
1132 1133 if node in self.nodemap:
1133 1134 # this can happen if two branches make the same change
1134 1135 # if unique:
1135 1136 # raise RevlogError(_("already have %s") % hex(node[:4]))
1136 1137 chain = node
1137 1138 continue
1138 1139 delta = chunk[80:]
1139 1140
1140 1141 for p in (p1, p2):
1141 1142 if not p in self.nodemap:
1142 1143 raise LookupError(_("unknown parent %s") % short(p))
1143 1144
1144 1145 if not chain:
1145 1146 # retrieve the parent revision of the delta chain
1146 1147 chain = p1
1147 1148 if not chain in self.nodemap:
1148 1149 raise LookupError(_("unknown base %s") % short(chain[:4]))
1149 1150
1150 1151 # full versions are inserted when the needed deltas become
1151 1152 # comparable to the uncompressed text or when the previous
1152 1153 # version is not the one we have a delta against. We use
1153 1154 # the size of the previous full rev as a proxy for the
1154 1155 # current size.
1155 1156
1156 1157 if chain == prev:
1157 1158 tempd = compress(delta)
1158 1159 cdelta = tempd[0] + tempd[1]
1159 1160 textlen = mdiff.patchedsize(textlen, delta)
1160 1161
1161 1162 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
1162 1163 # flush our writes here so we can read it in revision
1163 1164 if dfh:
1164 1165 dfh.flush()
1165 1166 ifh.flush()
1166 1167 text = self.revision(chain)
1167 1168 text = self.patches(text, [delta])
1168 1169 chk = self._addrevision(text, transaction, link, p1, p2, None,
1169 1170 ifh, dfh)
1170 1171 if not dfh and not self._inline:
1171 1172 # addrevision switched from inline to conventional
1172 1173 # reopen the index
1173 1174 dfh = self.opener(self.datafile, "a")
1174 1175 ifh = self.opener(self.indexfile, "a")
1175 1176 if chk != node:
1176 1177 raise RevlogError(_("consistency error adding group"))
1177 1178 textlen = len(text)
1178 1179 else:
1179 1180 e = (offset_type(end, 0), len(cdelta), textlen, base,
1180 1181 link, self.rev(p1), self.rev(p2), node)
1181 1182 self.index.insert(-1, e)
1182 1183 self.nodemap[node] = r
1183 1184 if self._inline:
1184 1185 ifh.write(struct.pack(indexformatng, *e))
1185 1186 ifh.write(cdelta)
1186 1187 self.checkinlinesize(transaction, ifh)
1187 1188 if not self._inline:
1188 1189 dfh = self.opener(self.datafile, "a")
1189 1190 ifh = self.opener(self.indexfile, "a")
1190 1191 else:
1191 1192 if self.version == REVLOGV0:
1192 1193 e = (end, len(cdelta), base, link, p1, p2, node)
1193 1194 entry = struct.pack(indexformatv0, *e)
1194 1195 else:
1195 1196 entry = struct.pack(indexformatng, *e)
1196 1197 dfh.write(cdelta)
1197 1198 ifh.write(entry)
1198 1199
1199 1200 t, r, chain, prev = r, r + 1, node, node
1200 1201 base = self.base(t)
1201 1202 start = self.start(base)
1202 1203 end = self.end(t)
1203 1204
1204 1205 return node
1205 1206
1206 1207 def strip(self, rev, minlink):
1207 1208 if self.count() == 0 or rev >= self.count():
1208 1209 return
1209 1210
1210 1211 if isinstance(self.index, lazyindex):
1211 1212 self._loadindexmap()
1212 1213
1213 1214 # When stripping away a revision, we need to make sure it
1214 1215 # does not actually belong to an older changeset.
1215 1216 # The minlink parameter defines the oldest revision
1216 1217 # we're allowed to strip away.
1217 1218 while minlink > self.index[rev][4]:
1218 1219 rev += 1
1219 1220 if rev >= self.count():
1220 1221 return
1221 1222
1222 1223 # first truncate the files on disk
1223 1224 end = self.start(rev)
1224 1225 if not self._inline:
1225 1226 df = self.opener(self.datafile, "a")
1226 1227 df.truncate(end)
1227 1228 end = rev * self._io.size
1228 1229 else:
1229 1230 end += rev * self._io.size
1230 1231
1231 1232 indexf = self.opener(self.indexfile, "a")
1232 1233 indexf.truncate(end)
1233 1234
1234 1235 # then reset internal state in memory to forget those revisions
1235 1236 self.cache = None
1236 self._io.chunkcache = None
1237 self._chunkcache = None
1237 1238 for x in xrange(rev, self.count()):
1238 1239 del self.nodemap[self.node(x)]
1239 1240
1240 1241 del self.index[rev:-1]
1241 1242
1242 1243 def checksize(self):
1243 1244 expected = 0
1244 1245 if self.count():
1245 1246 expected = self.end(self.count() - 1)
1246 1247
1247 1248 try:
1248 1249 f = self.opener(self.datafile)
1249 1250 f.seek(0, 2)
1250 1251 actual = f.tell()
1251 1252 dd = actual - expected
1252 1253 except IOError, inst:
1253 1254 if inst.errno != errno.ENOENT:
1254 1255 raise
1255 1256 dd = 0
1256 1257
1257 1258 try:
1258 1259 f = self.opener(self.indexfile)
1259 1260 f.seek(0, 2)
1260 1261 actual = f.tell()
1261 1262 s = self._io.size
1262 1263 i = actual / s
1263 1264 di = actual - (i * s)
1264 1265 if self._inline:
1265 1266 databytes = 0
1266 1267 for r in xrange(self.count()):
1267 1268 databytes += self.length(r)
1268 1269 dd = 0
1269 1270 di = actual - self.count() * s - databytes
1270 1271 except IOError, inst:
1271 1272 if inst.errno != errno.ENOENT:
1272 1273 raise
1273 1274 di = 0
1274 1275
1275 1276 return (dd, di)
1276 1277
1277 1278
General Comments 0
You need to be logged in to leave comments. Login now