##// END OF EJS Templates
revlog: minor revdiff reorganization
Matt Mackall -
r5005:72082bfc default
parent child Browse files
Show More
@@ -1,1264 +1,1262 b''
1 1 """
2 2 revlog.py - storage back-end for mercurial
3 3
4 4 This provides efficient delta storage with O(1) retrieve and append
5 5 and O(changes) merge between branches
6 6
7 7 Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
8 8
9 9 This software may be used and distributed according to the terms
10 10 of the GNU General Public License, incorporated herein by reference.
11 11 """
12 12
13 13 from node import *
14 14 from i18n import _
15 15 import binascii, changegroup, errno, ancestor, mdiff, os
16 16 import sha, struct, util, zlib
17 17
18 18 # revlog flags
19 19 REVLOGV0 = 0
20 20 REVLOGNG = 1
21 21 REVLOGNGINLINEDATA = (1 << 16)
22 22 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
23 23 REVLOG_DEFAULT_FORMAT = REVLOGNG
24 24 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
25 25
26 26 class RevlogError(Exception):
27 27 pass
28 28 class LookupError(RevlogError):
29 29 pass
30 30
31 31 def getoffset(q):
32 32 return int(q >> 16)
33 33
34 34 def gettype(q):
35 35 return int(q & 0xFFFF)
36 36
37 37 def offset_type(offset, type):
38 38 return long(long(offset) << 16 | type)
39 39
40 40 def hash(text, p1, p2):
41 41 """generate a hash from the given text and its parent hashes
42 42
43 43 This hash combines both the current file contents and its history
44 44 in a manner that makes it easy to distinguish nodes with the same
45 45 content in the revision graph.
46 46 """
47 47 l = [p1, p2]
48 48 l.sort()
49 49 s = sha.new(l[0])
50 50 s.update(l[1])
51 51 s.update(text)
52 52 return s.digest()
53 53
54 54 def compress(text):
55 55 """ generate a possibly-compressed representation of text """
56 56 if not text:
57 57 return ("", text)
58 58 if len(text) < 44:
59 59 if text[0] == '\0':
60 60 return ("", text)
61 61 return ('u', text)
62 62 bin = zlib.compress(text)
63 63 if len(bin) > len(text):
64 64 if text[0] == '\0':
65 65 return ("", text)
66 66 return ('u', text)
67 67 return ("", bin)
68 68
69 69 def decompress(bin):
70 70 """ decompress the given input """
71 71 if not bin:
72 72 return bin
73 73 t = bin[0]
74 74 if t == '\0':
75 75 return bin
76 76 if t == 'x':
77 77 return zlib.decompress(bin)
78 78 if t == 'u':
79 79 return bin[1:]
80 80 raise RevlogError(_("unknown compression type %r") % t)
81 81
82 82 class lazyparser(object):
83 83 """
84 84 this class avoids the need to parse the entirety of large indices
85 85 """
86 86
87 87 # lazyparser is not safe to use on windows if win32 extensions not
88 88 # available. it keeps file handle open, which make it not possible
89 89 # to break hardlinks on local cloned repos.
90 90 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
91 91 hasattr(util, 'win32api'))
92 92
93 93 def __init__(self, dataf, size):
94 94 self.dataf = dataf
95 95 self.s = struct.calcsize(indexformatng)
96 96 self.datasize = size
97 97 self.l = size/self.s
98 98 self.index = [None] * self.l
99 99 self.map = {nullid: nullrev}
100 100 self.allmap = 0
101 101 self.all = 0
102 102 self.mapfind_count = 0
103 103
104 104 def loadmap(self):
105 105 """
106 106 during a commit, we need to make sure the rev being added is
107 107 not a duplicate. This requires loading the entire index,
108 108 which is fairly slow. loadmap can load up just the node map,
109 109 which takes much less time.
110 110 """
111 111 if self.allmap:
112 112 return
113 113 end = self.datasize
114 114 self.allmap = 1
115 115 cur = 0
116 116 count = 0
117 117 blocksize = self.s * 256
118 118 self.dataf.seek(0)
119 119 while cur < end:
120 120 data = self.dataf.read(blocksize)
121 121 off = 0
122 122 for x in xrange(256):
123 123 n = data[off + ngshaoffset:off + ngshaoffset + 20]
124 124 self.map[n] = count
125 125 count += 1
126 126 if count >= self.l:
127 127 break
128 128 off += self.s
129 129 cur += blocksize
130 130
131 131 def loadblock(self, blockstart, blocksize, data=None):
132 132 if self.all:
133 133 return
134 134 if data is None:
135 135 self.dataf.seek(blockstart)
136 136 if blockstart + blocksize > self.datasize:
137 137 # the revlog may have grown since we've started running,
138 138 # but we don't have space in self.index for more entries.
139 139 # limit blocksize so that we don't get too much data.
140 140 blocksize = max(self.datasize - blockstart, 0)
141 141 data = self.dataf.read(blocksize)
142 142 lend = len(data) / self.s
143 143 i = blockstart / self.s
144 144 off = 0
145 145 # lazyindex supports __delitem__
146 146 if lend > len(self.index) - i:
147 147 lend = len(self.index) - i
148 148 for x in xrange(lend):
149 149 if self.index[i + x] == None:
150 150 b = data[off : off + self.s]
151 151 self.index[i + x] = b
152 152 n = b[ngshaoffset:ngshaoffset + 20]
153 153 self.map[n] = i + x
154 154 off += self.s
155 155
156 156 def findnode(self, node):
157 157 """search backwards through the index file for a specific node"""
158 158 if self.allmap:
159 159 return None
160 160
161 161 # hg log will cause many many searches for the manifest
162 162 # nodes. After we get called a few times, just load the whole
163 163 # thing.
164 164 if self.mapfind_count > 8:
165 165 self.loadmap()
166 166 if node in self.map:
167 167 return node
168 168 return None
169 169 self.mapfind_count += 1
170 170 last = self.l - 1
171 171 while self.index[last] != None:
172 172 if last == 0:
173 173 self.all = 1
174 174 self.allmap = 1
175 175 return None
176 176 last -= 1
177 177 end = (last + 1) * self.s
178 178 blocksize = self.s * 256
179 179 while end >= 0:
180 180 start = max(end - blocksize, 0)
181 181 self.dataf.seek(start)
182 182 data = self.dataf.read(end - start)
183 183 findend = end - start
184 184 while True:
185 185 # we're searching backwards, so we have to make sure
186 186 # we don't find a changeset where this node is a parent
187 187 off = data.find(node, 0, findend)
188 188 findend = off
189 189 if off >= 0:
190 190 i = off / self.s
191 191 off = i * self.s
192 192 n = data[off + ngshaoffset:off + ngshaoffset + 20]
193 193 if n == node:
194 194 self.map[n] = i + start / self.s
195 195 return node
196 196 else:
197 197 break
198 198 end -= blocksize
199 199 return None
200 200
201 201 def loadindex(self, i=None, end=None):
202 202 if self.all:
203 203 return
204 204 all = False
205 205 if i == None:
206 206 blockstart = 0
207 207 blocksize = (65536 / self.s) * self.s
208 208 end = self.datasize
209 209 all = True
210 210 else:
211 211 if end:
212 212 blockstart = i * self.s
213 213 end = end * self.s
214 214 blocksize = end - blockstart
215 215 else:
216 216 blockstart = (i & ~1023) * self.s
217 217 blocksize = self.s * 1024
218 218 end = blockstart + blocksize
219 219 while blockstart < end:
220 220 self.loadblock(blockstart, blocksize)
221 221 blockstart += blocksize
222 222 if all:
223 223 self.all = True
224 224
225 225 class lazyindex(object):
226 226 """a lazy version of the index array"""
227 227 def __init__(self, parser):
228 228 self.p = parser
229 229 def __len__(self):
230 230 return len(self.p.index)
231 231 def load(self, pos):
232 232 if pos < 0:
233 233 pos += len(self.p.index)
234 234 self.p.loadindex(pos)
235 235 return self.p.index[pos]
236 236 def __getitem__(self, pos):
237 237 return struct.unpack(indexformatng,
238 238 self.p.index[pos] or self.load(pos))
239 239 def __setitem__(self, pos, item):
240 240 self.p.index[pos] = struct.pack(indexformatng, *item)
241 241 def __delitem__(self, pos):
242 242 del self.p.index[pos]
243 243 def insert(self, pos, e):
244 244 self.p.index.insert(pos, struct.pack(indexformatng, *e))
245 245 def append(self, e):
246 246 self.p.index.append(struct.pack(indexformatng, *e))
247 247
248 248 class lazymap(object):
249 249 """a lazy version of the node map"""
250 250 def __init__(self, parser):
251 251 self.p = parser
252 252 def load(self, key):
253 253 n = self.p.findnode(key)
254 254 if n == None:
255 255 raise KeyError(key)
256 256 def __contains__(self, key):
257 257 if key in self.p.map:
258 258 return True
259 259 self.p.loadmap()
260 260 return key in self.p.map
261 261 def __iter__(self):
262 262 yield nullid
263 263 for i in xrange(self.p.l):
264 264 ret = self.p.index[i]
265 265 if not ret:
266 266 self.p.loadindex(i)
267 267 ret = self.p.index[i]
268 268 if isinstance(ret, str):
269 269 ret = struct.unpack(indexformatng, ret)
270 270 yield ret[7]
271 271 def __getitem__(self, key):
272 272 try:
273 273 return self.p.map[key]
274 274 except KeyError:
275 275 try:
276 276 self.load(key)
277 277 return self.p.map[key]
278 278 except KeyError:
279 279 raise KeyError("node " + hex(key))
280 280 def __setitem__(self, key, val):
281 281 self.p.map[key] = val
282 282 def __delitem__(self, key):
283 283 del self.p.map[key]
284 284
285 285 indexformatv0 = ">4l20s20s20s"
286 286 v0shaoffset = 56
287 287
288 288 class revlogoldio(object):
289 289 def __init__(self):
290 290 self.size = struct.calcsize(indexformatv0)
291 291
292 292 def parseindex(self, fp, inline):
293 293 s = self.size
294 294 index = []
295 295 nodemap = {nullid: nullrev}
296 296 n = off = 0
297 297 data = fp.read()
298 298 l = len(data)
299 299 while off + s <= l:
300 300 cur = data[off:off + s]
301 301 off += s
302 302 e = struct.unpack(indexformatv0, cur)
303 303 # transform to revlogv1 format
304 304 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
305 305 nodemap[e[4]], nodemap[e[5]], e[6])
306 306 index.append(e2)
307 307 nodemap[e[6]] = n
308 308 n += 1
309 309
310 310 return index, nodemap, None
311 311
312 312 def packentry(self, entry, node, version):
313 313 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
314 314 node(entry[5]), node(entry[6]), entry[7])
315 315 return struct.pack(indexformatv0, *e2)
316 316
317 317 # index ng:
318 318 # 6 bytes offset
319 319 # 2 bytes flags
320 320 # 4 bytes compressed length
321 321 # 4 bytes uncompressed length
322 322 # 4 bytes: base rev
323 323 # 4 bytes link rev
324 324 # 4 bytes parent 1 rev
325 325 # 4 bytes parent 2 rev
326 326 # 32 bytes: nodeid
327 327 indexformatng = ">Qiiiiii20s12x"
328 328 ngshaoffset = 32
329 329 versionformat = ">I"
330 330
331 331 class revlogio(object):
332 332 def __init__(self):
333 333 self.size = struct.calcsize(indexformatng)
334 334
335 335 def parseindex(self, fp, inline):
336 336 try:
337 337 size = util.fstat(fp).st_size
338 338 except AttributeError:
339 339 size = 0
340 340
341 341 if lazyparser.safe_to_use and not inline and size > 1000000:
342 342 # big index, let's parse it on demand
343 343 parser = lazyparser(fp, size)
344 344 index = lazyindex(parser)
345 345 nodemap = lazymap(parser)
346 346 e = list(index[0])
347 347 type = gettype(e[0])
348 348 e[0] = offset_type(0, type)
349 349 index[0] = e
350 350 return index, nodemap, None
351 351
352 352 s = self.size
353 353 cache = None
354 354 index = []
355 355 nodemap = {nullid: nullrev}
356 356 n = off = 0
357 357 # if we're not using lazymap, always read the whole index
358 358 data = fp.read()
359 359 l = len(data) - s
360 360 unpack = struct.unpack
361 361 append = index.append
362 362 if inline:
363 363 cache = (0, data)
364 364 while off <= l:
365 365 e = unpack(indexformatng, data[off:off + s])
366 366 nodemap[e[7]] = n
367 367 append(e)
368 368 n += 1
369 369 if e[1] < 0:
370 370 break
371 371 off += e[1] + s
372 372 else:
373 373 while off <= l:
374 374 e = unpack(indexformatng, data[off:off + s])
375 375 nodemap[e[7]] = n
376 376 append(e)
377 377 n += 1
378 378 off += s
379 379
380 380 e = list(index[0])
381 381 type = gettype(e[0])
382 382 e[0] = offset_type(0, type)
383 383 index[0] = e
384 384
385 385 return index, nodemap, cache
386 386
387 387 def packentry(self, entry, node, version):
388 388 p = struct.pack(indexformatng, *entry)
389 389 if not entry[3] and not getoffset(entry[0]) and entry[5] == nullrev:
390 390 p = struct.pack(versionformat, version) + p[4:]
391 391 return p
392 392
393 393 class revlog(object):
394 394 """
395 395 the underlying revision storage object
396 396
397 397 A revlog consists of two parts, an index and the revision data.
398 398
399 399 The index is a file with a fixed record size containing
400 400 information on each revision, includings its nodeid (hash), the
401 401 nodeids of its parents, the position and offset of its data within
402 402 the data file, and the revision it's based on. Finally, each entry
403 403 contains a linkrev entry that can serve as a pointer to external
404 404 data.
405 405
406 406 The revision data itself is a linear collection of data chunks.
407 407 Each chunk represents a revision and is usually represented as a
408 408 delta against the previous chunk. To bound lookup time, runs of
409 409 deltas are limited to about 2 times the length of the original
410 410 version data. This makes retrieval of a version proportional to
411 411 its size, or O(1) relative to the number of revisions.
412 412
413 413 Both pieces of the revlog are written to in an append-only
414 414 fashion, which means we never need to rewrite a file to insert or
415 415 remove data, and can use some simple techniques to avoid the need
416 416 for locking while reading.
417 417 """
418 418 def __init__(self, opener, indexfile):
419 419 """
420 420 create a revlog object
421 421
422 422 opener is a function that abstracts the file opening operation
423 423 and can be used to implement COW semantics or the like.
424 424 """
425 425 self.indexfile = indexfile
426 426 self.datafile = indexfile[:-2] + ".d"
427 427 self.opener = opener
428 428 self._cache = None
429 429 self._chunkcache = None
430 430 self.nodemap = {nullid: nullrev}
431 431 self.index = []
432 432
433 433 v = REVLOG_DEFAULT_VERSION
434 434 if hasattr(opener, "defversion"):
435 435 v = opener.defversion
436 436 if v & REVLOGNG:
437 437 v |= REVLOGNGINLINEDATA
438 438
439 439 i = ""
440 440 try:
441 441 f = self.opener(self.indexfile)
442 442 i = f.read(4)
443 443 f.seek(0)
444 444 if len(i) > 0:
445 445 v = struct.unpack(versionformat, i)[0]
446 446 except IOError, inst:
447 447 if inst.errno != errno.ENOENT:
448 448 raise
449 449
450 450 self.version = v
451 451 self._inline = v & REVLOGNGINLINEDATA
452 452 flags = v & ~0xFFFF
453 453 fmt = v & 0xFFFF
454 454 if fmt == REVLOGV0 and flags:
455 455 raise RevlogError(_("index %s unknown flags %#04x for format v0")
456 456 % (self.indexfile, flags >> 16))
457 457 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
458 458 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
459 459 % (self.indexfile, flags >> 16))
460 460 elif fmt > REVLOGNG:
461 461 raise RevlogError(_("index %s unknown format %d")
462 462 % (self.indexfile, fmt))
463 463
464 464 self._io = revlogio()
465 465 if self.version == REVLOGV0:
466 466 self._io = revlogoldio()
467 467 if i:
468 468 d = self._io.parseindex(f, self._inline)
469 469 self.index, self.nodemap, self._chunkcache = d
470 470
471 471 # add the magic null revision at -1
472 472 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
473 473
474 474 def _loadindex(self, start, end):
475 475 """load a block of indexes all at once from the lazy parser"""
476 476 if isinstance(self.index, lazyindex):
477 477 self.index.p.loadindex(start, end)
478 478
479 479 def _loadindexmap(self):
480 480 """loads both the map and the index from the lazy parser"""
481 481 if isinstance(self.index, lazyindex):
482 482 p = self.index.p
483 483 p.loadindex()
484 484 self.nodemap = p.map
485 485
486 486 def _loadmap(self):
487 487 """loads the map from the lazy parser"""
488 488 if isinstance(self.nodemap, lazymap):
489 489 self.nodemap.p.loadmap()
490 490 self.nodemap = self.nodemap.p.map
491 491
492 492 def tip(self):
493 493 return self.node(len(self.index) - 2)
494 494 def count(self):
495 495 return len(self.index) - 1
496 496
497 497 def rev(self, node):
498 498 try:
499 499 return self.nodemap[node]
500 500 except KeyError:
501 501 raise LookupError(_('%s: no node %s') % (self.indexfile, hex(node)))
502 502 def node(self, rev):
503 503 return self.index[rev][7]
504 504 def linkrev(self, node):
505 505 return self.index[self.rev(node)][4]
506 506 def parents(self, node):
507 507 d = self.index[self.rev(node)][5:7]
508 508 return (self.node(d[0]), self.node(d[1]))
509 509 def parentrevs(self, rev):
510 510 return self.index[rev][5:7]
511 511 def start(self, rev):
512 512 return getoffset(self.index[rev][0])
513 513 def end(self, rev):
514 514 return self.start(rev) + self.length(rev)
515 515 def length(self, rev):
516 516 return self.index[rev][1]
517 517 def base(self, rev):
518 518 return self.index[rev][3]
519 519
520 520 def size(self, rev):
521 521 """return the length of the uncompressed text for a given revision"""
522 522 l = self.index[rev][2]
523 523 if l >= 0:
524 524 return l
525 525
526 526 t = self.revision(self.node(rev))
527 527 return len(t)
528 528
529 529 # alternate implementation, The advantage to this code is it
530 530 # will be faster for a single revision. But, the results are not
531 531 # cached, so finding the size of every revision will be slower.
532 532 """
533 533 if self.cache and self.cache[1] == rev:
534 534 return len(self.cache[2])
535 535
536 536 base = self.base(rev)
537 537 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
538 538 base = self.cache[1]
539 539 text = self.cache[2]
540 540 else:
541 541 text = self.revision(self.node(base))
542 542
543 543 l = len(text)
544 544 for x in xrange(base + 1, rev + 1):
545 545 l = mdiff.patchedsize(l, self.chunk(x))
546 546 return l
547 547 """
548 548
549 549 def reachable(self, node, stop=None):
550 550 """return a hash of all nodes ancestral to a given node, including
551 551 the node itself, stopping when stop is matched"""
552 552 reachable = {}
553 553 visit = [node]
554 554 reachable[node] = 1
555 555 if stop:
556 556 stopn = self.rev(stop)
557 557 else:
558 558 stopn = 0
559 559 while visit:
560 560 n = visit.pop(0)
561 561 if n == stop:
562 562 continue
563 563 if n == nullid:
564 564 continue
565 565 for p in self.parents(n):
566 566 if self.rev(p) < stopn:
567 567 continue
568 568 if p not in reachable:
569 569 reachable[p] = 1
570 570 visit.append(p)
571 571 return reachable
572 572
573 573 def nodesbetween(self, roots=None, heads=None):
574 574 """Return a tuple containing three elements. Elements 1 and 2 contain
575 575 a final list bases and heads after all the unreachable ones have been
576 576 pruned. Element 0 contains a topologically sorted list of all
577 577
578 578 nodes that satisfy these constraints:
579 579 1. All nodes must be descended from a node in roots (the nodes on
580 580 roots are considered descended from themselves).
581 581 2. All nodes must also be ancestors of a node in heads (the nodes in
582 582 heads are considered to be their own ancestors).
583 583
584 584 If roots is unspecified, nullid is assumed as the only root.
585 585 If heads is unspecified, it is taken to be the output of the
586 586 heads method (i.e. a list of all nodes in the repository that
587 587 have no children)."""
588 588 nonodes = ([], [], [])
589 589 if roots is not None:
590 590 roots = list(roots)
591 591 if not roots:
592 592 return nonodes
593 593 lowestrev = min([self.rev(n) for n in roots])
594 594 else:
595 595 roots = [nullid] # Everybody's a descendent of nullid
596 596 lowestrev = nullrev
597 597 if (lowestrev == nullrev) and (heads is None):
598 598 # We want _all_ the nodes!
599 599 return ([self.node(r) for r in xrange(0, self.count())],
600 600 [nullid], list(self.heads()))
601 601 if heads is None:
602 602 # All nodes are ancestors, so the latest ancestor is the last
603 603 # node.
604 604 highestrev = self.count() - 1
605 605 # Set ancestors to None to signal that every node is an ancestor.
606 606 ancestors = None
607 607 # Set heads to an empty dictionary for later discovery of heads
608 608 heads = {}
609 609 else:
610 610 heads = list(heads)
611 611 if not heads:
612 612 return nonodes
613 613 ancestors = {}
614 614 # Turn heads into a dictionary so we can remove 'fake' heads.
615 615 # Also, later we will be using it to filter out the heads we can't
616 616 # find from roots.
617 617 heads = dict.fromkeys(heads, 0)
618 618 # Start at the top and keep marking parents until we're done.
619 619 nodestotag = heads.keys()
620 620 # Remember where the top was so we can use it as a limit later.
621 621 highestrev = max([self.rev(n) for n in nodestotag])
622 622 while nodestotag:
623 623 # grab a node to tag
624 624 n = nodestotag.pop()
625 625 # Never tag nullid
626 626 if n == nullid:
627 627 continue
628 628 # A node's revision number represents its place in a
629 629 # topologically sorted list of nodes.
630 630 r = self.rev(n)
631 631 if r >= lowestrev:
632 632 if n not in ancestors:
633 633 # If we are possibly a descendent of one of the roots
634 634 # and we haven't already been marked as an ancestor
635 635 ancestors[n] = 1 # Mark as ancestor
636 636 # Add non-nullid parents to list of nodes to tag.
637 637 nodestotag.extend([p for p in self.parents(n) if
638 638 p != nullid])
639 639 elif n in heads: # We've seen it before, is it a fake head?
640 640 # So it is, real heads should not be the ancestors of
641 641 # any other heads.
642 642 heads.pop(n)
643 643 if not ancestors:
644 644 return nonodes
645 645 # Now that we have our set of ancestors, we want to remove any
646 646 # roots that are not ancestors.
647 647
648 648 # If one of the roots was nullid, everything is included anyway.
649 649 if lowestrev > nullrev:
650 650 # But, since we weren't, let's recompute the lowest rev to not
651 651 # include roots that aren't ancestors.
652 652
653 653 # Filter out roots that aren't ancestors of heads
654 654 roots = [n for n in roots if n in ancestors]
655 655 # Recompute the lowest revision
656 656 if roots:
657 657 lowestrev = min([self.rev(n) for n in roots])
658 658 else:
659 659 # No more roots? Return empty list
660 660 return nonodes
661 661 else:
662 662 # We are descending from nullid, and don't need to care about
663 663 # any other roots.
664 664 lowestrev = nullrev
665 665 roots = [nullid]
666 666 # Transform our roots list into a 'set' (i.e. a dictionary where the
667 667 # values don't matter.
668 668 descendents = dict.fromkeys(roots, 1)
669 669 # Also, keep the original roots so we can filter out roots that aren't
670 670 # 'real' roots (i.e. are descended from other roots).
671 671 roots = descendents.copy()
672 672 # Our topologically sorted list of output nodes.
673 673 orderedout = []
674 674 # Don't start at nullid since we don't want nullid in our output list,
675 675 # and if nullid shows up in descedents, empty parents will look like
676 676 # they're descendents.
677 677 for r in xrange(max(lowestrev, 0), highestrev + 1):
678 678 n = self.node(r)
679 679 isdescendent = False
680 680 if lowestrev == nullrev: # Everybody is a descendent of nullid
681 681 isdescendent = True
682 682 elif n in descendents:
683 683 # n is already a descendent
684 684 isdescendent = True
685 685 # This check only needs to be done here because all the roots
686 686 # will start being marked is descendents before the loop.
687 687 if n in roots:
688 688 # If n was a root, check if it's a 'real' root.
689 689 p = tuple(self.parents(n))
690 690 # If any of its parents are descendents, it's not a root.
691 691 if (p[0] in descendents) or (p[1] in descendents):
692 692 roots.pop(n)
693 693 else:
694 694 p = tuple(self.parents(n))
695 695 # A node is a descendent if either of its parents are
696 696 # descendents. (We seeded the dependents list with the roots
697 697 # up there, remember?)
698 698 if (p[0] in descendents) or (p[1] in descendents):
699 699 descendents[n] = 1
700 700 isdescendent = True
701 701 if isdescendent and ((ancestors is None) or (n in ancestors)):
702 702 # Only include nodes that are both descendents and ancestors.
703 703 orderedout.append(n)
704 704 if (ancestors is not None) and (n in heads):
705 705 # We're trying to figure out which heads are reachable
706 706 # from roots.
707 707 # Mark this head as having been reached
708 708 heads[n] = 1
709 709 elif ancestors is None:
710 710 # Otherwise, we're trying to discover the heads.
711 711 # Assume this is a head because if it isn't, the next step
712 712 # will eventually remove it.
713 713 heads[n] = 1
714 714 # But, obviously its parents aren't.
715 715 for p in self.parents(n):
716 716 heads.pop(p, None)
717 717 heads = [n for n in heads.iterkeys() if heads[n] != 0]
718 718 roots = roots.keys()
719 719 assert orderedout
720 720 assert roots
721 721 assert heads
722 722 return (orderedout, roots, heads)
723 723
724 724 def heads(self, start=None, stop=None):
725 725 """return the list of all nodes that have no children
726 726
727 727 if start is specified, only heads that are descendants of
728 728 start will be returned
729 729 if stop is specified, it will consider all the revs from stop
730 730 as if they had no children
731 731 """
732 732 if start is None and stop is None:
733 733 count = self.count()
734 734 if not count:
735 735 return [nullid]
736 736 ishead = [1] * (count + 1)
737 737 index = self.index
738 738 for r in xrange(count):
739 739 e = index[r]
740 740 ishead[e[5]] = ishead[e[6]] = 0
741 741 return [self.node(r) for r in xrange(count) if ishead[r]]
742 742
743 743 if start is None:
744 744 start = nullid
745 745 if stop is None:
746 746 stop = []
747 747 stoprevs = dict.fromkeys([self.rev(n) for n in stop])
748 748 startrev = self.rev(start)
749 749 reachable = {startrev: 1}
750 750 heads = {startrev: 1}
751 751
752 752 parentrevs = self.parentrevs
753 753 for r in xrange(startrev + 1, self.count()):
754 754 for p in parentrevs(r):
755 755 if p in reachable:
756 756 if r not in stoprevs:
757 757 reachable[r] = 1
758 758 heads[r] = 1
759 759 if p in heads and p not in stoprevs:
760 760 del heads[p]
761 761
762 762 return [self.node(r) for r in heads]
763 763
764 764 def children(self, node):
765 765 """find the children of a given node"""
766 766 c = []
767 767 p = self.rev(node)
768 768 for r in range(p + 1, self.count()):
769 769 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
770 770 if prevs:
771 771 for pr in prevs:
772 772 if pr == p:
773 773 c.append(self.node(r))
774 774 elif p == nullrev:
775 775 c.append(self.node(r))
776 776 return c
777 777
778 778 def _match(self, id):
779 779 if isinstance(id, (long, int)):
780 780 # rev
781 781 return self.node(id)
782 782 if len(id) == 20:
783 783 # possibly a binary node
784 784 # odds of a binary node being all hex in ASCII are 1 in 10**25
785 785 try:
786 786 node = id
787 787 r = self.rev(node) # quick search the index
788 788 return node
789 789 except LookupError:
790 790 pass # may be partial hex id
791 791 try:
792 792 # str(rev)
793 793 rev = int(id)
794 794 if str(rev) != id:
795 795 raise ValueError
796 796 if rev < 0:
797 797 rev = self.count() + rev
798 798 if rev < 0 or rev >= self.count():
799 799 raise ValueError
800 800 return self.node(rev)
801 801 except (ValueError, OverflowError):
802 802 pass
803 803 if len(id) == 40:
804 804 try:
805 805 # a full hex nodeid?
806 806 node = bin(id)
807 807 r = self.rev(node)
808 808 return node
809 809 except TypeError:
810 810 pass
811 811
812 812 def _partialmatch(self, id):
813 813 if len(id) < 40:
814 814 try:
815 815 # hex(node)[:...]
816 816 bin_id = bin(id[:len(id) & ~1]) # grab an even number of digits
817 817 node = None
818 818 for n in self.nodemap:
819 819 if n.startswith(bin_id) and hex(n).startswith(id):
820 820 if node is not None:
821 821 raise LookupError(_("Ambiguous identifier"))
822 822 node = n
823 823 if node is not None:
824 824 return node
825 825 except TypeError:
826 826 pass
827 827
828 828 def lookup(self, id):
829 829 """locate a node based on:
830 830 - revision number or str(revision number)
831 831 - nodeid or subset of hex nodeid
832 832 """
833 833 n = self._match(id)
834 834 if n is not None:
835 835 return n
836 836 n = self._partialmatch(id)
837 837 if n:
838 838 return n
839 839
840 840 raise LookupError(_("No match found"))
841 841
842 842 def cmp(self, node, text):
843 843 """compare text with a given file revision"""
844 844 p1, p2 = self.parents(node)
845 845 return hash(text, p1, p2) != node
846 846
847 847 def chunk(self, rev, df=None):
848 848 start, length = self.start(rev), self.length(rev)
849 849 if self._inline:
850 850 start += (rev + 1) * self._io.size
851 851 end = start + length
852 852 def loadcache(df):
853 853 cache_length = max(65536, length)
854 854 if not df:
855 855 if self._inline:
856 856 df = self.opener(self.indexfile)
857 857 else:
858 858 df = self.opener(self.datafile)
859 859 df.seek(start)
860 860 self._chunkcache = (start, df.read(cache_length))
861 861
862 862 if not self._chunkcache:
863 863 loadcache(df)
864 864
865 865 cache_start = self._chunkcache[0]
866 866 cache_end = cache_start + len(self._chunkcache[1])
867 867 if start >= cache_start and end <= cache_end:
868 868 # it is cached
869 869 offset = start - cache_start
870 870 else:
871 871 loadcache(df)
872 872 offset = 0
873 873
874 874 # avoid copying large chunks
875 875 c = self._chunkcache[1]
876 876 if len(c) > length:
877 877 c = c[offset:offset + length]
878 878
879 879 return decompress(c)
880 880
881 881 def delta(self, node):
882 882 """return or calculate a delta between a node and its predecessor"""
883 883 r = self.rev(node)
884 884 return self.revdiff(r - 1, r)
885 885
886 886 def revdiff(self, rev1, rev2):
887 887 """return or calculate a delta between two revisions"""
888 b1 = self.base(rev1)
889 b2 = self.base(rev2)
890 if b1 == b2 and rev1 + 1 == rev2:
888 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
891 889 return self.chunk(rev2)
892 else:
893 return mdiff.textdiff(self.revision(self.node(rev1)),
894 self.revision(self.node(rev2)))
890
891 return mdiff.textdiff(self.revision(self.node(rev1)),
892 self.revision(self.node(rev2)))
895 893
896 894 def revision(self, node):
897 895 """return an uncompressed revision of a given"""
898 896 if node == nullid:
899 897 return ""
900 898 if self._cache and self._cache[0] == node:
901 899 return self._cache[2]
902 900
903 901 # look up what we need to read
904 902 text = None
905 903 rev = self.rev(node)
906 904 base = self.base(rev)
907 905
908 906 # check rev flags
909 907 if self.index[rev][0] & 0xFFFF:
910 908 raise RevlogError(_('incompatible revision flag %x') % q)
911 909
912 910 if self._inline:
913 911 # we probably have the whole chunk cached
914 912 df = None
915 913 else:
916 914 df = self.opener(self.datafile)
917 915
918 916 # do we have useful data cached?
919 917 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
920 918 base = self._cache[1]
921 919 text = self._cache[2]
922 920 self._loadindex(base, rev + 1)
923 921 else:
924 922 self._loadindex(base, rev + 1)
925 923 text = self.chunk(base, df=df)
926 924
927 925 bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]
928 926 text = mdiff.patches(text, bins)
929 927 p1, p2 = self.parents(node)
930 928 if node != hash(text, p1, p2):
931 929 raise RevlogError(_("integrity check failed on %s:%d")
932 930 % (self.datafile, rev))
933 931
934 932 self._cache = (node, rev, text)
935 933 return text
936 934
937 935 def checkinlinesize(self, tr, fp=None):
938 936 if not self._inline:
939 937 return
940 938 if not fp:
941 939 fp = self.opener(self.indexfile, 'r')
942 940 fp.seek(0, 2)
943 941 size = fp.tell()
944 942 if size < 131072:
945 943 return
946 944 trinfo = tr.find(self.indexfile)
947 945 if trinfo == None:
948 946 raise RevlogError(_("%s not found in the transaction")
949 947 % self.indexfile)
950 948
951 949 trindex = trinfo[2]
952 950 dataoff = self.start(trindex)
953 951
954 952 tr.add(self.datafile, dataoff)
955 953 df = self.opener(self.datafile, 'w')
956 954 calc = self._io.size
957 955 for r in xrange(self.count()):
958 956 start = self.start(r) + (r + 1) * calc
959 957 length = self.length(r)
960 958 fp.seek(start)
961 959 d = fp.read(length)
962 960 df.write(d)
963 961 fp.close()
964 962 df.close()
965 963 fp = self.opener(self.indexfile, 'w', atomictemp=True)
966 964 self.version &= ~(REVLOGNGINLINEDATA)
967 965 self._inline = False
968 966 for i in xrange(self.count()):
969 967 e = self._io.packentry(self.index[i], self.node, self.version)
970 968 fp.write(e)
971 969
972 970 # if we don't call rename, the temp file will never replace the
973 971 # real index
974 972 fp.rename()
975 973
976 974 tr.replace(self.indexfile, trindex * calc)
977 975 self._chunkcache = None
978 976
979 977 def addrevision(self, text, transaction, link, p1, p2, d=None):
980 978 """add a revision to the log
981 979
982 980 text - the revision data to add
983 981 transaction - the transaction object used for rollback
984 982 link - the linkrev data to add
985 983 p1, p2 - the parent nodeids of the revision
986 984 d - an optional precomputed delta
987 985 """
988 986 dfh = None
989 987 if not self._inline:
990 988 dfh = self.opener(self.datafile, "a")
991 989 ifh = self.opener(self.indexfile, "a+")
992 990 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
993 991
994 992 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
995 993 node = hash(text, p1, p2)
996 994 if node in self.nodemap:
997 995 return node
998 996
999 997 curr = self.count()
1000 998 prev = curr - 1
1001 999 base = self.base(prev)
1002 1000 offset = self.end(prev)
1003 1001
1004 1002 if curr:
1005 1003 if not d:
1006 1004 ptext = self.revision(self.node(prev))
1007 1005 d = mdiff.textdiff(ptext, text)
1008 1006 data = compress(d)
1009 1007 l = len(data[1]) + len(data[0])
1010 1008 dist = l + offset - self.start(base)
1011 1009
1012 1010 # full versions are inserted when the needed deltas
1013 1011 # become comparable to the uncompressed text
1014 1012 if not curr or dist > len(text) * 2:
1015 1013 data = compress(text)
1016 1014 l = len(data[1]) + len(data[0])
1017 1015 base = curr
1018 1016
1019 1017 e = (offset_type(offset, 0), l, len(text),
1020 1018 base, link, self.rev(p1), self.rev(p2), node)
1021 1019 self.index.insert(-1, e)
1022 1020 self.nodemap[node] = curr
1023 1021
1024 1022 entry = self._io.packentry(e, self.node, self.version)
1025 1023 if not self._inline:
1026 1024 transaction.add(self.datafile, offset)
1027 1025 transaction.add(self.indexfile, curr * len(entry))
1028 1026 if data[0]:
1029 1027 dfh.write(data[0])
1030 1028 dfh.write(data[1])
1031 1029 dfh.flush()
1032 1030 ifh.write(entry)
1033 1031 else:
1034 1032 offset += curr * self._io.size
1035 1033 transaction.add(self.indexfile, offset, prev)
1036 1034 ifh.write(entry)
1037 1035 ifh.write(data[0])
1038 1036 ifh.write(data[1])
1039 1037 self.checkinlinesize(transaction, ifh)
1040 1038
1041 1039 self._cache = (node, curr, text)
1042 1040 return node
1043 1041
1044 1042 def ancestor(self, a, b):
1045 1043 """calculate the least common ancestor of nodes a and b"""
1046 1044
1047 1045 def parents(rev):
1048 1046 return [p for p in self.parentrevs(rev) if p != nullrev]
1049 1047
1050 1048 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1051 1049 if c is None:
1052 1050 return nullid
1053 1051
1054 1052 return self.node(c)
1055 1053
1056 1054 def group(self, nodelist, lookup, infocollect=None):
1057 1055 """calculate a delta group
1058 1056
1059 1057 Given a list of changeset revs, return a set of deltas and
1060 1058 metadata corresponding to nodes. the first delta is
1061 1059 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1062 1060 have this parent as it has all history before these
1063 1061 changesets. parent is parent[0]
1064 1062 """
1065 1063 revs = [self.rev(n) for n in nodelist]
1066 1064
1067 1065 # if we don't have any revisions touched by these changesets, bail
1068 1066 if not revs:
1069 1067 yield changegroup.closechunk()
1070 1068 return
1071 1069
1072 1070 # add the parent of the first rev
1073 1071 p = self.parents(self.node(revs[0]))[0]
1074 1072 revs.insert(0, self.rev(p))
1075 1073
1076 1074 # build deltas
1077 1075 for d in xrange(0, len(revs) - 1):
1078 1076 a, b = revs[d], revs[d + 1]
1079 1077 nb = self.node(b)
1080 1078
1081 1079 if infocollect is not None:
1082 1080 infocollect(nb)
1083 1081
1084 1082 d = self.revdiff(a, b)
1085 1083 p = self.parents(nb)
1086 1084 meta = nb + p[0] + p[1] + lookup(nb)
1087 1085 yield changegroup.genchunk("%s%s" % (meta, d))
1088 1086
1089 1087 yield changegroup.closechunk()
1090 1088
1091 1089 def addgroup(self, revs, linkmapper, transaction, unique=0):
1092 1090 """
1093 1091 add a delta group
1094 1092
1095 1093 given a set of deltas, add them to the revision log. the
1096 1094 first delta is against its parent, which should be in our
1097 1095 log, the rest are against the previous delta.
1098 1096 """
1099 1097
1100 1098 #track the base of the current delta log
1101 1099 r = self.count()
1102 1100 t = r - 1
1103 1101 node = None
1104 1102
1105 1103 base = prev = nullrev
1106 1104 start = end = textlen = 0
1107 1105 if r:
1108 1106 end = self.end(t)
1109 1107
1110 1108 ifh = self.opener(self.indexfile, "a+")
1111 1109 isize = r * self._io.size
1112 1110 if self._inline:
1113 1111 transaction.add(self.indexfile, end + isize, r)
1114 1112 dfh = None
1115 1113 else:
1116 1114 transaction.add(self.indexfile, isize, r)
1117 1115 transaction.add(self.datafile, end)
1118 1116 dfh = self.opener(self.datafile, "a")
1119 1117
1120 1118 # loop through our set of deltas
1121 1119 chain = None
1122 1120 for chunk in revs:
1123 1121 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1124 1122 link = linkmapper(cs)
1125 1123 if node in self.nodemap:
1126 1124 # this can happen if two branches make the same change
1127 1125 # if unique:
1128 1126 # raise RevlogError(_("already have %s") % hex(node[:4]))
1129 1127 chain = node
1130 1128 continue
1131 1129 delta = chunk[80:]
1132 1130
1133 1131 for p in (p1, p2):
1134 1132 if not p in self.nodemap:
1135 1133 raise LookupError(_("unknown parent %s") % short(p))
1136 1134
1137 1135 if not chain:
1138 1136 # retrieve the parent revision of the delta chain
1139 1137 chain = p1
1140 1138 if not chain in self.nodemap:
1141 1139 raise LookupError(_("unknown base %s") % short(chain[:4]))
1142 1140
1143 1141 # full versions are inserted when the needed deltas become
1144 1142 # comparable to the uncompressed text or when the previous
1145 1143 # version is not the one we have a delta against. We use
1146 1144 # the size of the previous full rev as a proxy for the
1147 1145 # current size.
1148 1146
1149 1147 if chain == prev:
1150 1148 tempd = compress(delta)
1151 1149 cdelta = tempd[0] + tempd[1]
1152 1150 textlen = mdiff.patchedsize(textlen, delta)
1153 1151
1154 1152 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
1155 1153 # flush our writes here so we can read it in revision
1156 1154 if dfh:
1157 1155 dfh.flush()
1158 1156 ifh.flush()
1159 1157 text = self.revision(chain)
1160 1158 text = mdiff.patches(text, [delta])
1161 1159 chk = self._addrevision(text, transaction, link, p1, p2, None,
1162 1160 ifh, dfh)
1163 1161 if not dfh and not self._inline:
1164 1162 # addrevision switched from inline to conventional
1165 1163 # reopen the index
1166 1164 dfh = self.opener(self.datafile, "a")
1167 1165 ifh = self.opener(self.indexfile, "a")
1168 1166 if chk != node:
1169 1167 raise RevlogError(_("consistency error adding group"))
1170 1168 textlen = len(text)
1171 1169 else:
1172 1170 e = (offset_type(end, 0), len(cdelta), textlen, base,
1173 1171 link, self.rev(p1), self.rev(p2), node)
1174 1172 self.index.insert(-1, e)
1175 1173 self.nodemap[node] = r
1176 1174 entry = self._io.packentry(e, self.node, self.version)
1177 1175 if self._inline:
1178 1176 ifh.write(entry)
1179 1177 ifh.write(cdelta)
1180 1178 self.checkinlinesize(transaction, ifh)
1181 1179 if not self._inline:
1182 1180 dfh = self.opener(self.datafile, "a")
1183 1181 ifh = self.opener(self.indexfile, "a")
1184 1182 else:
1185 1183 dfh.write(cdelta)
1186 1184 ifh.write(entry)
1187 1185
1188 1186 t, r, chain, prev = r, r + 1, node, node
1189 1187 base = self.base(t)
1190 1188 start = self.start(base)
1191 1189 end = self.end(t)
1192 1190
1193 1191 return node
1194 1192
1195 1193 def strip(self, rev, minlink):
1196 1194 if self.count() == 0 or rev >= self.count():
1197 1195 return
1198 1196
1199 1197 if isinstance(self.index, lazyindex):
1200 1198 self._loadindexmap()
1201 1199
1202 1200 # When stripping away a revision, we need to make sure it
1203 1201 # does not actually belong to an older changeset.
1204 1202 # The minlink parameter defines the oldest revision
1205 1203 # we're allowed to strip away.
1206 1204 while minlink > self.index[rev][4]:
1207 1205 rev += 1
1208 1206 if rev >= self.count():
1209 1207 return
1210 1208
1211 1209 # first truncate the files on disk
1212 1210 end = self.start(rev)
1213 1211 if not self._inline:
1214 1212 df = self.opener(self.datafile, "a")
1215 1213 df.truncate(end)
1216 1214 end = rev * self._io.size
1217 1215 else:
1218 1216 end += rev * self._io.size
1219 1217
1220 1218 indexf = self.opener(self.indexfile, "a")
1221 1219 indexf.truncate(end)
1222 1220
1223 1221 # then reset internal state in memory to forget those revisions
1224 1222 self._cache = None
1225 1223 self._chunkcache = None
1226 1224 for x in xrange(rev, self.count()):
1227 1225 del self.nodemap[self.node(x)]
1228 1226
1229 1227 del self.index[rev:-1]
1230 1228
1231 1229 def checksize(self):
1232 1230 expected = 0
1233 1231 if self.count():
1234 1232 expected = self.end(self.count() - 1)
1235 1233
1236 1234 try:
1237 1235 f = self.opener(self.datafile)
1238 1236 f.seek(0, 2)
1239 1237 actual = f.tell()
1240 1238 dd = actual - expected
1241 1239 except IOError, inst:
1242 1240 if inst.errno != errno.ENOENT:
1243 1241 raise
1244 1242 dd = 0
1245 1243
1246 1244 try:
1247 1245 f = self.opener(self.indexfile)
1248 1246 f.seek(0, 2)
1249 1247 actual = f.tell()
1250 1248 s = self._io.size
1251 1249 i = actual / s
1252 1250 di = actual - (i * s)
1253 1251 if self._inline:
1254 1252 databytes = 0
1255 1253 for r in xrange(self.count()):
1256 1254 databytes += self.length(r)
1257 1255 dd = 0
1258 1256 di = actual - self.count() * s - databytes
1259 1257 except IOError, inst:
1260 1258 if inst.errno != errno.ENOENT:
1261 1259 raise
1262 1260 di = 0
1263 1261
1264 1262 return (dd, di)
General Comments 0
You need to be logged in to leave comments. Login now