##// END OF EJS Templates
revlog: abstract out index entry packing...
Matt Mackall -
r4986:58cc017e default
parent child Browse files
Show More
@@ -1,1265 +1,1257 b''
1 1 """
2 2 revlog.py - storage back-end for mercurial
3 3
4 4 This provides efficient delta storage with O(1) retrieve and append
5 5 and O(changes) merge between branches
6 6
7 7 Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
8 8
9 9 This software may be used and distributed according to the terms
10 10 of the GNU General Public License, incorporated herein by reference.
11 11 """
12 12
13 13 from node import *
14 14 from i18n import _
15 15 import binascii, changegroup, errno, ancestor, mdiff, os
16 16 import sha, struct, util, zlib
17 17
18 18 # revlog version strings
19 19 REVLOGV0 = 0
20 20 REVLOGNG = 1
21 21
22 22 # revlog flags
23 23 REVLOGNGINLINEDATA = (1 << 16)
24 24 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
25 25
26 26 REVLOG_DEFAULT_FORMAT = REVLOGNG
27 27 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
28 28
29 29 def hash(text, p1, p2):
30 30 """generate a hash from the given text and its parent hashes
31 31
32 32 This hash combines both the current file contents and its history
33 33 in a manner that makes it easy to distinguish nodes with the same
34 34 content in the revision graph.
35 35 """
36 36 l = [p1, p2]
37 37 l.sort()
38 38 s = sha.new(l[0])
39 39 s.update(l[1])
40 40 s.update(text)
41 41 return s.digest()
42 42
43 43 def compress(text):
44 44 """ generate a possibly-compressed representation of text """
45 45 if not text:
46 46 return ("", text)
47 47 if len(text) < 44:
48 48 if text[0] == '\0':
49 49 return ("", text)
50 50 return ('u', text)
51 51 bin = zlib.compress(text)
52 52 if len(bin) > len(text):
53 53 if text[0] == '\0':
54 54 return ("", text)
55 55 return ('u', text)
56 56 return ("", bin)
57 57
58 58 def decompress(bin):
59 59 """ decompress the given input """
60 60 if not bin:
61 61 return bin
62 62 t = bin[0]
63 63 if t == '\0':
64 64 return bin
65 65 if t == 'x':
66 66 return zlib.decompress(bin)
67 67 if t == 'u':
68 68 return bin[1:]
69 69 raise RevlogError(_("unknown compression type %r") % t)
70 70
71 71 indexformatv0 = ">4l20s20s20s"
72 72 v0shaoffset = 56
73 73 # index ng:
74 74 # 6 bytes offset
75 75 # 2 bytes flags
76 76 # 4 bytes compressed length
77 77 # 4 bytes uncompressed length
78 78 # 4 bytes: base rev
79 79 # 4 bytes link rev
80 80 # 4 bytes parent 1 rev
81 81 # 4 bytes parent 2 rev
82 82 # 32 bytes: nodeid
83 83 indexformatng = ">Qiiiiii20s12x"
84 84 ngshaoffset = 32
85 85 versionformat = ">I"
86 86
87 87 class lazyparser(object):
88 88 """
89 89 this class avoids the need to parse the entirety of large indices
90 90 """
91 91
92 92 # lazyparser is not safe to use on windows if win32 extensions not
93 93 # available. it keeps file handle open, which make it not possible
94 94 # to break hardlinks on local cloned repos.
95 95 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
96 96 hasattr(util, 'win32api'))
97 97
98 98 def __init__(self, dataf, size):
99 99 self.dataf = dataf
100 100 self.s = struct.calcsize(indexformatng)
101 101 self.datasize = size
102 102 self.l = size/self.s
103 103 self.index = [None] * self.l
104 104 self.map = {nullid: nullrev}
105 105 self.allmap = 0
106 106 self.all = 0
107 107 self.mapfind_count = 0
108 108
109 109 def loadmap(self):
110 110 """
111 111 during a commit, we need to make sure the rev being added is
112 112 not a duplicate. This requires loading the entire index,
113 113 which is fairly slow. loadmap can load up just the node map,
114 114 which takes much less time.
115 115 """
116 116 if self.allmap:
117 117 return
118 118 end = self.datasize
119 119 self.allmap = 1
120 120 cur = 0
121 121 count = 0
122 122 blocksize = self.s * 256
123 123 self.dataf.seek(0)
124 124 while cur < end:
125 125 data = self.dataf.read(blocksize)
126 126 off = 0
127 127 for x in xrange(256):
128 128 n = data[off + ngshaoffset:off + ngshaoffset + 20]
129 129 self.map[n] = count
130 130 count += 1
131 131 if count >= self.l:
132 132 break
133 133 off += self.s
134 134 cur += blocksize
135 135
136 136 def loadblock(self, blockstart, blocksize, data=None):
137 137 if self.all:
138 138 return
139 139 if data is None:
140 140 self.dataf.seek(blockstart)
141 141 if blockstart + blocksize > self.datasize:
142 142 # the revlog may have grown since we've started running,
143 143 # but we don't have space in self.index for more entries.
144 144 # limit blocksize so that we don't get too much data.
145 145 blocksize = max(self.datasize - blockstart, 0)
146 146 data = self.dataf.read(blocksize)
147 147 lend = len(data) / self.s
148 148 i = blockstart / self.s
149 149 off = 0
150 150 # lazyindex supports __delitem__
151 151 if lend > len(self.index) - i:
152 152 lend = len(self.index) - i
153 153 for x in xrange(lend):
154 154 if self.index[i + x] == None:
155 155 b = data[off : off + self.s]
156 156 self.index[i + x] = b
157 157 n = b[ngshaoffset:ngshaoffset + 20]
158 158 self.map[n] = i + x
159 159 off += self.s
160 160
161 161 def findnode(self, node):
162 162 """search backwards through the index file for a specific node"""
163 163 if self.allmap:
164 164 return None
165 165
166 166 # hg log will cause many many searches for the manifest
167 167 # nodes. After we get called a few times, just load the whole
168 168 # thing.
169 169 if self.mapfind_count > 8:
170 170 self.loadmap()
171 171 if node in self.map:
172 172 return node
173 173 return None
174 174 self.mapfind_count += 1
175 175 last = self.l - 1
176 176 while self.index[last] != None:
177 177 if last == 0:
178 178 self.all = 1
179 179 self.allmap = 1
180 180 return None
181 181 last -= 1
182 182 end = (last + 1) * self.s
183 183 blocksize = self.s * 256
184 184 while end >= 0:
185 185 start = max(end - blocksize, 0)
186 186 self.dataf.seek(start)
187 187 data = self.dataf.read(end - start)
188 188 findend = end - start
189 189 while True:
190 190 # we're searching backwards, so weh have to make sure
191 191 # we don't find a changeset where this node is a parent
192 192 off = data.rfind(node, 0, findend)
193 193 findend = off
194 194 if off >= 0:
195 195 i = off / self.s
196 196 off = i * self.s
197 197 n = data[off + ngshaoffset:off + ngshaoffset + 20]
198 198 if n == node:
199 199 self.map[n] = i + start / self.s
200 200 return node
201 201 else:
202 202 break
203 203 end -= blocksize
204 204 return None
205 205
206 206 def loadindex(self, i=None, end=None):
207 207 if self.all:
208 208 return
209 209 all = False
210 210 if i == None:
211 211 blockstart = 0
212 212 blocksize = (512 / self.s) * self.s
213 213 end = self.datasize
214 214 all = True
215 215 else:
216 216 if end:
217 217 blockstart = i * self.s
218 218 end = end * self.s
219 219 blocksize = end - blockstart
220 220 else:
221 221 blockstart = (i & ~63) * self.s
222 222 blocksize = self.s * 64
223 223 end = blockstart + blocksize
224 224 while blockstart < end:
225 225 self.loadblock(blockstart, blocksize)
226 226 blockstart += blocksize
227 227 if all:
228 228 self.all = True
229 229
230 230 class lazyindex(object):
231 231 """a lazy version of the index array"""
232 232 def __init__(self, parser):
233 233 self.p = parser
234 234 def __len__(self):
235 235 return len(self.p.index)
236 236 def load(self, pos):
237 237 if pos < 0:
238 238 pos += len(self.p.index)
239 239 self.p.loadindex(pos)
240 240 return self.p.index[pos]
241 241 def __getitem__(self, pos):
242 242 ret = self.p.index[pos] or self.load(pos)
243 243 if isinstance(ret, str):
244 244 ret = struct.unpack(indexformatng, ret)
245 245 return ret
246 246 def __setitem__(self, pos, item):
247 247 self.p.index[pos] = item
248 248 def __delitem__(self, pos):
249 249 del self.p.index[pos]
250 250 def insert(self, pos, e):
251 251 self.p.index.insert(pos, e)
252 252 def append(self, e):
253 253 self.p.index.append(e)
254 254
255 255 class lazymap(object):
256 256 """a lazy version of the node map"""
257 257 def __init__(self, parser):
258 258 self.p = parser
259 259 def load(self, key):
260 260 n = self.p.findnode(key)
261 261 if n == None:
262 262 raise KeyError(key)
263 263 def __contains__(self, key):
264 264 if key in self.p.map:
265 265 return True
266 266 self.p.loadmap()
267 267 return key in self.p.map
268 268 def __iter__(self):
269 269 yield nullid
270 270 for i in xrange(self.p.l):
271 271 ret = self.p.index[i]
272 272 if not ret:
273 273 self.p.loadindex(i)
274 274 ret = self.p.index[i]
275 275 if isinstance(ret, str):
276 276 ret = struct.unpack(indexformatng, ret)
277 277 yield ret[7]
278 278 def __getitem__(self, key):
279 279 try:
280 280 return self.p.map[key]
281 281 except KeyError:
282 282 try:
283 283 self.load(key)
284 284 return self.p.map[key]
285 285 except KeyError:
286 286 raise KeyError("node " + hex(key))
287 287 def __setitem__(self, key, val):
288 288 self.p.map[key] = val
289 289 def __delitem__(self, key):
290 290 del self.p.map[key]
291 291
292 292 class RevlogError(Exception):
293 293 pass
294 294 class LookupError(RevlogError):
295 295 pass
296 296
297 297 def getoffset(q):
298 298 if q & 0xFFFF:
299 299 raise RevlogError(_('incompatible revision flag %x') % q)
300 300 return int(q >> 16)
301 301
302 302 def gettype(q):
303 303 return int(q & 0xFFFF)
304 304
305 305 def offset_type(offset, type):
306 306 return long(long(offset) << 16 | type)
307 307
308 308 class revlogoldio(object):
309 309 def __init__(self):
310 310 self.size = struct.calcsize(indexformatv0)
311 311
312 312 def parseindex(self, fp, inline):
313 313 s = self.size
314 314 index = []
315 315 nodemap = {nullid: nullrev}
316 316 n = off = 0
317 317 data = fp.read()
318 318 l = len(data)
319 319 while off + s <= l:
320 320 cur = data[off:off + s]
321 321 off += s
322 322 e = struct.unpack(indexformatv0, cur)
323 323 # transform to revlogv1 format
324 324 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
325 325 nodemap[e[4]], nodemap[e[5]], e[6])
326 326 index.append(e2)
327 327 nodemap[e[6]] = n
328 328 n += 1
329 329
330 330 return index, nodemap, None
331 331
332 def packentry(self, entry, node, version):
333 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
334 node(entry[5]), node(entry[6]), entry[7])
335 return struct.pack(indexformatv0, *e2)
336
332 337 class revlogio(object):
333 338 def __init__(self):
334 339 self.size = struct.calcsize(indexformatng)
335 340
336 341 def parseindex(self, fp, inline):
337 342 try:
338 343 size = util.fstat(fp).st_size
339 344 except AttributeError:
340 345 size = 0
341 346
342 347 if lazyparser.safe_to_use and not inline and size > 1000000:
343 348 # big index, let's parse it on demand
344 349 parser = lazyparser(fp, size)
345 350 index = lazyindex(parser)
346 351 nodemap = lazymap(parser)
347 352 e = list(index[0])
348 353 type = gettype(e[0])
349 354 e[0] = offset_type(0, type)
350 355 index[0] = e
351 356 return index, nodemap, None
352 357
353 358 s = self.size
354 359 cache = None
355 360 index = []
356 361 nodemap = {nullid: nullrev}
357 362 n = off = 0
358 363 # if we're not using lazymap, always read the whole index
359 364 data = fp.read()
360 365 l = len(data)
361 366 if inline:
362 367 cache = (0, data)
363 368 while off + s <= l:
364 369 e = struct.unpack(indexformatng, data[off:off + s])
365 370 index.append(e)
366 371 nodemap[e[7]] = n
367 372 n += 1
368 373 off += s
369 374 if inline:
370 375 if e[1] < 0:
371 376 break
372 377 off += e[1]
373 378
374 379 e = list(index[0])
375 380 type = gettype(e[0])
376 381 e[0] = offset_type(0, type)
377 382 index[0] = e
378 383
379 384 return index, nodemap, cache
380 385
386 def packentry(self, entry, node, version):
387 p = struct.pack(indexformatng, *entry)
388 if not entry[3] and not getoffset(entry[0]) and entry[5] == nullrev:
389 p = struct.pack(versionformat, version) + p[4:]
390 return p
391
381 392 class revlog(object):
382 393 """
383 394 the underlying revision storage object
384 395
385 396 A revlog consists of two parts, an index and the revision data.
386 397
387 398 The index is a file with a fixed record size containing
388 399 information on each revision, includings its nodeid (hash), the
389 400 nodeids of its parents, the position and offset of its data within
390 401 the data file, and the revision it's based on. Finally, each entry
391 402 contains a linkrev entry that can serve as a pointer to external
392 403 data.
393 404
394 405 The revision data itself is a linear collection of data chunks.
395 406 Each chunk represents a revision and is usually represented as a
396 407 delta against the previous chunk. To bound lookup time, runs of
397 408 deltas are limited to about 2 times the length of the original
398 409 version data. This makes retrieval of a version proportional to
399 410 its size, or O(1) relative to the number of revisions.
400 411
401 412 Both pieces of the revlog are written to in an append-only
402 413 fashion, which means we never need to rewrite a file to insert or
403 414 remove data, and can use some simple techniques to avoid the need
404 415 for locking while reading.
405 416 """
406 417 def __init__(self, opener, indexfile):
407 418 """
408 419 create a revlog object
409 420
410 421 opener is a function that abstracts the file opening operation
411 422 and can be used to implement COW semantics or the like.
412 423 """
413 424 self.indexfile = indexfile
414 425 self.datafile = indexfile[:-2] + ".d"
415 426 self.opener = opener
416 427 self._cache = None
417 428 self._chunkcache = None
418 429 self.nodemap = {nullid: nullrev}
419 430 self.index = []
420 431
421 432 v = REVLOG_DEFAULT_VERSION
422 433 if hasattr(opener, "defversion"):
423 434 v = opener.defversion
424 435 if v & REVLOGNG:
425 436 v |= REVLOGNGINLINEDATA
426 437
427 438 i = ""
428 439 try:
429 440 f = self.opener(self.indexfile)
430 441 i = f.read(4)
431 442 f.seek(0)
432 443 if len(i) > 0:
433 444 v = struct.unpack(versionformat, i)[0]
434 445 except IOError, inst:
435 446 if inst.errno != errno.ENOENT:
436 447 raise
437 448
438 449 self.version = v
439 450 self._inline = v & REVLOGNGINLINEDATA
440 451 flags = v & ~0xFFFF
441 452 fmt = v & 0xFFFF
442 453 if fmt == REVLOGV0 and flags:
443 454 raise RevlogError(_("index %s unknown flags %#04x for format v0")
444 455 % (self.indexfile, flags >> 16))
445 456 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
446 457 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
447 458 % (self.indexfile, flags >> 16))
448 459 elif fmt > REVLOGNG:
449 460 raise RevlogError(_("index %s unknown format %d")
450 461 % (self.indexfile, fmt))
451 462
452 463 self._io = revlogio()
453 464 if self.version == REVLOGV0:
454 465 self._io = revlogoldio()
455 466 if i:
456 467 d = self._io.parseindex(f, self._inline)
457 468 self.index, self.nodemap, self._chunkcache = d
458 469
459 470 # add the magic null revision at -1
460 471 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
461 472
462 473 def _loadindex(self, start, end):
463 474 """load a block of indexes all at once from the lazy parser"""
464 475 if isinstance(self.index, lazyindex):
465 476 self.index.p.loadindex(start, end)
466 477
467 478 def _loadindexmap(self):
468 479 """loads both the map and the index from the lazy parser"""
469 480 if isinstance(self.index, lazyindex):
470 481 p = self.index.p
471 482 p.loadindex()
472 483 self.nodemap = p.map
473 484
474 485 def _loadmap(self):
475 486 """loads the map from the lazy parser"""
476 487 if isinstance(self.nodemap, lazymap):
477 488 self.nodemap.p.loadmap()
478 489 self.nodemap = self.nodemap.p.map
479 490
480 491 def tip(self):
481 492 return self.node(len(self.index) - 2)
482 493 def count(self):
483 494 return len(self.index) - 1
484 495
485 496 def rev(self, node):
486 497 try:
487 498 return self.nodemap[node]
488 499 except KeyError:
489 500 raise LookupError(_('%s: no node %s') % (self.indexfile, hex(node)))
490 501 def node(self, rev):
491 502 return self.index[rev][7]
492 503 def linkrev(self, node):
493 504 return self.index[self.rev(node)][4]
494 505 def parents(self, node):
495 506 d = self.index[self.rev(node)][5:7]
496 507 return (self.node(d[0]), self.node(d[1]))
497 508 def parentrevs(self, rev):
498 509 return self.index[rev][5:7]
499 510 def start(self, rev):
500 511 return getoffset(self.index[rev][0])
501 512 def end(self, rev):
502 513 return self.start(rev) + self.length(rev)
503 514 def length(self, rev):
504 515 return self.index[rev][1]
505 516 def base(self, rev):
506 517 return self.index[rev][3]
507 518
508 519 def size(self, rev):
509 520 """return the length of the uncompressed text for a given revision"""
510 521 l = self.index[rev][2]
511 522 if l >= 0:
512 523 return l
513 524
514 525 t = self.revision(self.node(rev))
515 526 return len(t)
516 527
517 528 # alternate implementation, The advantage to this code is it
518 529 # will be faster for a single revision. But, the results are not
519 530 # cached, so finding the size of every revision will be slower.
520 531 """
521 532 if self.cache and self.cache[1] == rev:
522 533 return len(self.cache[2])
523 534
524 535 base = self.base(rev)
525 536 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
526 537 base = self.cache[1]
527 538 text = self.cache[2]
528 539 else:
529 540 text = self.revision(self.node(base))
530 541
531 542 l = len(text)
532 543 for x in xrange(base + 1, rev + 1):
533 544 l = mdiff.patchedsize(l, self.chunk(x))
534 545 return l
535 546 """
536 547
537 548 def reachable(self, node, stop=None):
538 549 """return a hash of all nodes ancestral to a given node, including
539 550 the node itself, stopping when stop is matched"""
540 551 reachable = {}
541 552 visit = [node]
542 553 reachable[node] = 1
543 554 if stop:
544 555 stopn = self.rev(stop)
545 556 else:
546 557 stopn = 0
547 558 while visit:
548 559 n = visit.pop(0)
549 560 if n == stop:
550 561 continue
551 562 if n == nullid:
552 563 continue
553 564 for p in self.parents(n):
554 565 if self.rev(p) < stopn:
555 566 continue
556 567 if p not in reachable:
557 568 reachable[p] = 1
558 569 visit.append(p)
559 570 return reachable
560 571
561 572 def nodesbetween(self, roots=None, heads=None):
562 573 """Return a tuple containing three elements. Elements 1 and 2 contain
563 574 a final list bases and heads after all the unreachable ones have been
564 575 pruned. Element 0 contains a topologically sorted list of all
565 576
566 577 nodes that satisfy these constraints:
567 578 1. All nodes must be descended from a node in roots (the nodes on
568 579 roots are considered descended from themselves).
569 580 2. All nodes must also be ancestors of a node in heads (the nodes in
570 581 heads are considered to be their own ancestors).
571 582
572 583 If roots is unspecified, nullid is assumed as the only root.
573 584 If heads is unspecified, it is taken to be the output of the
574 585 heads method (i.e. a list of all nodes in the repository that
575 586 have no children)."""
576 587 nonodes = ([], [], [])
577 588 if roots is not None:
578 589 roots = list(roots)
579 590 if not roots:
580 591 return nonodes
581 592 lowestrev = min([self.rev(n) for n in roots])
582 593 else:
583 594 roots = [nullid] # Everybody's a descendent of nullid
584 595 lowestrev = nullrev
585 596 if (lowestrev == nullrev) and (heads is None):
586 597 # We want _all_ the nodes!
587 598 return ([self.node(r) for r in xrange(0, self.count())],
588 599 [nullid], list(self.heads()))
589 600 if heads is None:
590 601 # All nodes are ancestors, so the latest ancestor is the last
591 602 # node.
592 603 highestrev = self.count() - 1
593 604 # Set ancestors to None to signal that every node is an ancestor.
594 605 ancestors = None
595 606 # Set heads to an empty dictionary for later discovery of heads
596 607 heads = {}
597 608 else:
598 609 heads = list(heads)
599 610 if not heads:
600 611 return nonodes
601 612 ancestors = {}
602 613 # Turn heads into a dictionary so we can remove 'fake' heads.
603 614 # Also, later we will be using it to filter out the heads we can't
604 615 # find from roots.
605 616 heads = dict.fromkeys(heads, 0)
606 617 # Start at the top and keep marking parents until we're done.
607 618 nodestotag = heads.keys()
608 619 # Remember where the top was so we can use it as a limit later.
609 620 highestrev = max([self.rev(n) for n in nodestotag])
610 621 while nodestotag:
611 622 # grab a node to tag
612 623 n = nodestotag.pop()
613 624 # Never tag nullid
614 625 if n == nullid:
615 626 continue
616 627 # A node's revision number represents its place in a
617 628 # topologically sorted list of nodes.
618 629 r = self.rev(n)
619 630 if r >= lowestrev:
620 631 if n not in ancestors:
621 632 # If we are possibly a descendent of one of the roots
622 633 # and we haven't already been marked as an ancestor
623 634 ancestors[n] = 1 # Mark as ancestor
624 635 # Add non-nullid parents to list of nodes to tag.
625 636 nodestotag.extend([p for p in self.parents(n) if
626 637 p != nullid])
627 638 elif n in heads: # We've seen it before, is it a fake head?
628 639 # So it is, real heads should not be the ancestors of
629 640 # any other heads.
630 641 heads.pop(n)
631 642 if not ancestors:
632 643 return nonodes
633 644 # Now that we have our set of ancestors, we want to remove any
634 645 # roots that are not ancestors.
635 646
636 647 # If one of the roots was nullid, everything is included anyway.
637 648 if lowestrev > nullrev:
638 649 # But, since we weren't, let's recompute the lowest rev to not
639 650 # include roots that aren't ancestors.
640 651
641 652 # Filter out roots that aren't ancestors of heads
642 653 roots = [n for n in roots if n in ancestors]
643 654 # Recompute the lowest revision
644 655 if roots:
645 656 lowestrev = min([self.rev(n) for n in roots])
646 657 else:
647 658 # No more roots? Return empty list
648 659 return nonodes
649 660 else:
650 661 # We are descending from nullid, and don't need to care about
651 662 # any other roots.
652 663 lowestrev = nullrev
653 664 roots = [nullid]
654 665 # Transform our roots list into a 'set' (i.e. a dictionary where the
655 666 # values don't matter.
656 667 descendents = dict.fromkeys(roots, 1)
657 668 # Also, keep the original roots so we can filter out roots that aren't
658 669 # 'real' roots (i.e. are descended from other roots).
659 670 roots = descendents.copy()
660 671 # Our topologically sorted list of output nodes.
661 672 orderedout = []
662 673 # Don't start at nullid since we don't want nullid in our output list,
663 674 # and if nullid shows up in descedents, empty parents will look like
664 675 # they're descendents.
665 676 for r in xrange(max(lowestrev, 0), highestrev + 1):
666 677 n = self.node(r)
667 678 isdescendent = False
668 679 if lowestrev == nullrev: # Everybody is a descendent of nullid
669 680 isdescendent = True
670 681 elif n in descendents:
671 682 # n is already a descendent
672 683 isdescendent = True
673 684 # This check only needs to be done here because all the roots
674 685 # will start being marked is descendents before the loop.
675 686 if n in roots:
676 687 # If n was a root, check if it's a 'real' root.
677 688 p = tuple(self.parents(n))
678 689 # If any of its parents are descendents, it's not a root.
679 690 if (p[0] in descendents) or (p[1] in descendents):
680 691 roots.pop(n)
681 692 else:
682 693 p = tuple(self.parents(n))
683 694 # A node is a descendent if either of its parents are
684 695 # descendents. (We seeded the dependents list with the roots
685 696 # up there, remember?)
686 697 if (p[0] in descendents) or (p[1] in descendents):
687 698 descendents[n] = 1
688 699 isdescendent = True
689 700 if isdescendent and ((ancestors is None) or (n in ancestors)):
690 701 # Only include nodes that are both descendents and ancestors.
691 702 orderedout.append(n)
692 703 if (ancestors is not None) and (n in heads):
693 704 # We're trying to figure out which heads are reachable
694 705 # from roots.
695 706 # Mark this head as having been reached
696 707 heads[n] = 1
697 708 elif ancestors is None:
698 709 # Otherwise, we're trying to discover the heads.
699 710 # Assume this is a head because if it isn't, the next step
700 711 # will eventually remove it.
701 712 heads[n] = 1
702 713 # But, obviously its parents aren't.
703 714 for p in self.parents(n):
704 715 heads.pop(p, None)
705 716 heads = [n for n in heads.iterkeys() if heads[n] != 0]
706 717 roots = roots.keys()
707 718 assert orderedout
708 719 assert roots
709 720 assert heads
710 721 return (orderedout, roots, heads)
711 722
712 723 def heads(self, start=None, stop=None):
713 724 """return the list of all nodes that have no children
714 725
715 726 if start is specified, only heads that are descendants of
716 727 start will be returned
717 728 if stop is specified, it will consider all the revs from stop
718 729 as if they had no children
719 730 """
720 731 if start is None:
721 732 start = nullid
722 733 if stop is None:
723 734 stop = []
724 735 stoprevs = dict.fromkeys([self.rev(n) for n in stop])
725 736 startrev = self.rev(start)
726 737 reachable = {startrev: 1}
727 738 heads = {startrev: 1}
728 739
729 740 parentrevs = self.parentrevs
730 741 for r in xrange(startrev + 1, self.count()):
731 742 for p in parentrevs(r):
732 743 if p in reachable:
733 744 if r not in stoprevs:
734 745 reachable[r] = 1
735 746 heads[r] = 1
736 747 if p in heads and p not in stoprevs:
737 748 del heads[p]
738 749
739 750 return [self.node(r) for r in heads]
740 751
741 752 def children(self, node):
742 753 """find the children of a given node"""
743 754 c = []
744 755 p = self.rev(node)
745 756 for r in range(p + 1, self.count()):
746 757 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
747 758 if prevs:
748 759 for pr in prevs:
749 760 if pr == p:
750 761 c.append(self.node(r))
751 762 elif p == nullrev:
752 763 c.append(self.node(r))
753 764 return c
754 765
755 766 def _match(self, id):
756 767 if isinstance(id, (long, int)):
757 768 # rev
758 769 return self.node(id)
759 770 if len(id) == 20:
760 771 # possibly a binary node
761 772 # odds of a binary node being all hex in ASCII are 1 in 10**25
762 773 try:
763 774 node = id
764 775 r = self.rev(node) # quick search the index
765 776 return node
766 777 except LookupError:
767 778 pass # may be partial hex id
768 779 try:
769 780 # str(rev)
770 781 rev = int(id)
771 782 if str(rev) != id:
772 783 raise ValueError
773 784 if rev < 0:
774 785 rev = self.count() + rev
775 786 if rev < 0 or rev >= self.count():
776 787 raise ValueError
777 788 return self.node(rev)
778 789 except (ValueError, OverflowError):
779 790 pass
780 791 if len(id) == 40:
781 792 try:
782 793 # a full hex nodeid?
783 794 node = bin(id)
784 795 r = self.rev(node)
785 796 return node
786 797 except TypeError:
787 798 pass
788 799
789 800 def _partialmatch(self, id):
790 801 if len(id) < 40:
791 802 try:
792 803 # hex(node)[:...]
793 804 bin_id = bin(id[:len(id) & ~1]) # grab an even number of digits
794 805 node = None
795 806 for n in self.nodemap:
796 807 if n.startswith(bin_id) and hex(n).startswith(id):
797 808 if node is not None:
798 809 raise LookupError(_("Ambiguous identifier"))
799 810 node = n
800 811 if node is not None:
801 812 return node
802 813 except TypeError:
803 814 pass
804 815
805 816 def lookup(self, id):
806 817 """locate a node based on:
807 818 - revision number or str(revision number)
808 819 - nodeid or subset of hex nodeid
809 820 """
810 821 n = self._match(id)
811 822 if n is not None:
812 823 return n
813 824 n = self._partialmatch(id)
814 825 if n:
815 826 return n
816 827
817 828 raise LookupError(_("No match found"))
818 829
819 830 def cmp(self, node, text):
820 831 """compare text with a given file revision"""
821 832 p1, p2 = self.parents(node)
822 833 return hash(text, p1, p2) != node
823 834
824 835 def diff(self, a, b):
825 836 """return a delta between two revisions"""
826 837 return mdiff.textdiff(a, b)
827 838
828 839 def patches(self, t, pl):
829 840 """apply a list of patches to a string"""
830 841 return mdiff.patches(t, pl)
831 842
832 843 def chunk(self, rev, df=None, cachelen=4096):
833 844 start, length = self.start(rev), self.length(rev)
834 845 inline = self._inline
835 846 if inline:
836 847 start += (rev + 1) * self._io.size
837 848 end = start + length
838 849 def loadcache(df):
839 850 cache_length = max(cachelen, length) # 4k
840 851 if not df:
841 852 if inline:
842 853 df = self.opener(self.indexfile)
843 854 else:
844 855 df = self.opener(self.datafile)
845 856 df.seek(start)
846 857 self._chunkcache = (start, df.read(cache_length))
847 858
848 859 if not self._chunkcache:
849 860 loadcache(df)
850 861
851 862 cache_start = self._chunkcache[0]
852 863 cache_end = cache_start + len(self._chunkcache[1])
853 864 if start >= cache_start and end <= cache_end:
854 865 # it is cached
855 866 offset = start - cache_start
856 867 else:
857 868 loadcache(df)
858 869 offset = 0
859 870
860 871 return decompress(self._chunkcache[1][offset:offset + length])
861 872
862 873 def delta(self, node):
863 874 """return or calculate a delta between a node and its predecessor"""
864 875 r = self.rev(node)
865 876 return self.revdiff(r - 1, r)
866 877
867 878 def revdiff(self, rev1, rev2):
868 879 """return or calculate a delta between two revisions"""
869 880 b1 = self.base(rev1)
870 881 b2 = self.base(rev2)
871 882 if b1 == b2 and rev1 + 1 == rev2:
872 883 return self.chunk(rev2)
873 884 else:
874 885 return self.diff(self.revision(self.node(rev1)),
875 886 self.revision(self.node(rev2)))
876 887
877 888 def revision(self, node):
878 889 """return an uncompressed revision of a given"""
879 890 if node == nullid:
880 891 return ""
881 892 if self._cache and self._cache[0] == node:
882 893 return self._cache[2]
883 894
884 895 # look up what we need to read
885 896 text = None
886 897 rev = self.rev(node)
887 898 base = self.base(rev)
888 899
889 900 if self._inline:
890 901 # we probably have the whole chunk cached
891 902 df = None
892 903 else:
893 904 df = self.opener(self.datafile)
894 905
895 906 # do we have useful data cached?
896 907 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
897 908 base = self._cache[1]
898 909 text = self._cache[2]
899 910 self._loadindex(base, rev + 1)
900 911 else:
901 912 self._loadindex(base, rev + 1)
902 913 text = self.chunk(base, df=df)
903 914
904 915 bins = []
905 916 for r in xrange(base + 1, rev + 1):
906 917 bins.append(self.chunk(r, df=df))
907 918
908 919 text = self.patches(text, bins)
909 920
910 921 p1, p2 = self.parents(node)
911 922 if node != hash(text, p1, p2):
912 923 raise RevlogError(_("integrity check failed on %s:%d")
913 924 % (self.datafile, rev))
914 925
915 926 self._cache = (node, rev, text)
916 927 return text
917 928
918 929 def checkinlinesize(self, tr, fp=None):
919 930 if not self._inline:
920 931 return
921 932 if not fp:
922 933 fp = self.opener(self.indexfile, 'r')
923 934 fp.seek(0, 2)
924 935 size = fp.tell()
925 936 if size < 131072:
926 937 return
927 938 trinfo = tr.find(self.indexfile)
928 939 if trinfo == None:
929 940 raise RevlogError(_("%s not found in the transaction")
930 941 % self.indexfile)
931 942
932 943 trindex = trinfo[2]
933 944 dataoff = self.start(trindex)
934 945
935 946 tr.add(self.datafile, dataoff)
936 947 df = self.opener(self.datafile, 'w')
937 948 calc = self._io.size
938 949 for r in xrange(self.count()):
939 950 start = self.start(r) + (r + 1) * calc
940 951 length = self.length(r)
941 952 fp.seek(start)
942 953 d = fp.read(length)
943 954 df.write(d)
944 955 fp.close()
945 956 df.close()
946 957 fp = self.opener(self.indexfile, 'w', atomictemp=True)
947 958 self.version &= ~(REVLOGNGINLINEDATA)
948 959 self._inline = False
949 if self.count():
950 x = self.index[0]
951 e = struct.pack(indexformatng, *x)[4:]
952 l = struct.pack(versionformat, self.version)
953 fp.write(l)
954 fp.write(e)
955
956 for i in xrange(1, self.count()):
957 x = self.index[i]
958 e = struct.pack(indexformatng, *x)
960 for i in xrange(self.count()):
961 e = self._io.packentry(self.index[i], self.node, self.version)
959 962 fp.write(e)
960 963
961 964 # if we don't call rename, the temp file will never replace the
962 965 # real index
963 966 fp.rename()
964 967
965 968 tr.replace(self.indexfile, trindex * calc)
966 969 self._chunkcache = None
967 970
968 971 def addrevision(self, text, transaction, link, p1, p2, d=None):
969 972 """add a revision to the log
970 973
971 974 text - the revision data to add
972 975 transaction - the transaction object used for rollback
973 976 link - the linkrev data to add
974 977 p1, p2 - the parent nodeids of the revision
975 978 d - an optional precomputed delta
976 979 """
977 980 dfh = None
978 981 if not self._inline:
979 982 dfh = self.opener(self.datafile, "a")
980 983 ifh = self.opener(self.indexfile, "a+")
981 984 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
982 985
983 986 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
984 987 node = hash(text, p1, p2)
985 988 if node in self.nodemap:
986 989 return node
987 990
988 991 curr = self.count()
989 992 prev = curr - 1
990 993 base = self.base(prev)
991 994 offset = self.end(prev)
992 995
993 996 if curr:
994 997 if not d:
995 998 ptext = self.revision(self.node(prev))
996 999 d = self.diff(ptext, text)
997 1000 data = compress(d)
998 1001 l = len(data[1]) + len(data[0])
999 1002 dist = l + offset - self.start(base)
1000 1003
1001 1004 # full versions are inserted when the needed deltas
1002 1005 # become comparable to the uncompressed text
1003 1006 if not curr or dist > len(text) * 2:
1004 1007 data = compress(text)
1005 1008 l = len(data[1]) + len(data[0])
1006 1009 base = curr
1007 1010
1008 1011 e = (offset_type(offset, 0), l, len(text),
1009 1012 base, link, self.rev(p1), self.rev(p2), node)
1010 1013 self.index.insert(-1, e)
1011 1014 self.nodemap[node] = curr
1012 1015
1013 if self.version == REVLOGV0:
1014 e = (offset, l, base, link, p1, p2, node)
1015 entry = struct.pack(indexformatv0, *e)
1016 else:
1017 entry = struct.pack(indexformatng, *e)
1018 if not curr:
1019 entry = struct.pack(versionformat, self.version) + entry[4:]
1020
1016 entry = self._io.packentry(e, self.node, self.version)
1021 1017 if not self._inline:
1022 1018 transaction.add(self.datafile, offset)
1023 1019 transaction.add(self.indexfile, curr * len(entry))
1024 1020 if data[0]:
1025 1021 dfh.write(data[0])
1026 1022 dfh.write(data[1])
1027 1023 dfh.flush()
1028 1024 ifh.write(entry)
1029 1025 else:
1030 1026 ifh.seek(0, 2)
1031 1027 transaction.add(self.indexfile, ifh.tell(), prev)
1032 1028 ifh.write(entry)
1033 1029 ifh.write(data[0])
1034 1030 ifh.write(data[1])
1035 1031 self.checkinlinesize(transaction, ifh)
1036 1032
1037 1033 self._cache = (node, curr, text)
1038 1034 return node
1039 1035
1040 1036 def ancestor(self, a, b):
1041 1037 """calculate the least common ancestor of nodes a and b"""
1042 1038
1043 1039 def parents(rev):
1044 1040 return [p for p in self.parentrevs(rev) if p != nullrev]
1045 1041
1046 1042 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1047 1043 if c is None:
1048 1044 return nullid
1049 1045
1050 1046 return self.node(c)
1051 1047
1052 1048 def group(self, nodelist, lookup, infocollect=None):
1053 1049 """calculate a delta group
1054 1050
1055 1051 Given a list of changeset revs, return a set of deltas and
1056 1052 metadata corresponding to nodes. the first delta is
1057 1053 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1058 1054 have this parent as it has all history before these
1059 1055 changesets. parent is parent[0]
1060 1056 """
1061 1057 revs = [self.rev(n) for n in nodelist]
1062 1058
1063 1059 # if we don't have any revisions touched by these changesets, bail
1064 1060 if not revs:
1065 1061 yield changegroup.closechunk()
1066 1062 return
1067 1063
1068 1064 # add the parent of the first rev
1069 1065 p = self.parents(self.node(revs[0]))[0]
1070 1066 revs.insert(0, self.rev(p))
1071 1067
1072 1068 # build deltas
1073 1069 for d in xrange(0, len(revs) - 1):
1074 1070 a, b = revs[d], revs[d + 1]
1075 1071 nb = self.node(b)
1076 1072
1077 1073 if infocollect is not None:
1078 1074 infocollect(nb)
1079 1075
1080 1076 d = self.revdiff(a, b)
1081 1077 p = self.parents(nb)
1082 1078 meta = nb + p[0] + p[1] + lookup(nb)
1083 1079 yield changegroup.genchunk("%s%s" % (meta, d))
1084 1080
1085 1081 yield changegroup.closechunk()
1086 1082
1087 1083 def addgroup(self, revs, linkmapper, transaction, unique=0):
1088 1084 """
1089 1085 add a delta group
1090 1086
1091 1087 given a set of deltas, add them to the revision log. the
1092 1088 first delta is against its parent, which should be in our
1093 1089 log, the rest are against the previous delta.
1094 1090 """
1095 1091
1096 1092 #track the base of the current delta log
1097 1093 r = self.count()
1098 1094 t = r - 1
1099 1095 node = None
1100 1096
1101 1097 base = prev = nullrev
1102 1098 start = end = textlen = 0
1103 1099 if r:
1104 1100 end = self.end(t)
1105 1101
1106 1102 ifh = self.opener(self.indexfile, "a+")
1107 1103 ifh.seek(0, 2)
1108 1104 transaction.add(self.indexfile, ifh.tell(), self.count())
1109 1105 if self._inline:
1110 1106 dfh = None
1111 1107 else:
1112 1108 transaction.add(self.datafile, end)
1113 1109 dfh = self.opener(self.datafile, "a")
1114 1110
1115 1111 # loop through our set of deltas
1116 1112 chain = None
1117 1113 for chunk in revs:
1118 1114 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1119 1115 link = linkmapper(cs)
1120 1116 if node in self.nodemap:
1121 1117 # this can happen if two branches make the same change
1122 1118 # if unique:
1123 1119 # raise RevlogError(_("already have %s") % hex(node[:4]))
1124 1120 chain = node
1125 1121 continue
1126 1122 delta = chunk[80:]
1127 1123
1128 1124 for p in (p1, p2):
1129 1125 if not p in self.nodemap:
1130 1126 raise LookupError(_("unknown parent %s") % short(p))
1131 1127
1132 1128 if not chain:
1133 1129 # retrieve the parent revision of the delta chain
1134 1130 chain = p1
1135 1131 if not chain in self.nodemap:
1136 1132 raise LookupError(_("unknown base %s") % short(chain[:4]))
1137 1133
1138 1134 # full versions are inserted when the needed deltas become
1139 1135 # comparable to the uncompressed text or when the previous
1140 1136 # version is not the one we have a delta against. We use
1141 1137 # the size of the previous full rev as a proxy for the
1142 1138 # current size.
1143 1139
1144 1140 if chain == prev:
1145 1141 tempd = compress(delta)
1146 1142 cdelta = tempd[0] + tempd[1]
1147 1143 textlen = mdiff.patchedsize(textlen, delta)
1148 1144
1149 1145 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
1150 1146 # flush our writes here so we can read it in revision
1151 1147 if dfh:
1152 1148 dfh.flush()
1153 1149 ifh.flush()
1154 1150 text = self.revision(chain)
1155 1151 text = self.patches(text, [delta])
1156 1152 chk = self._addrevision(text, transaction, link, p1, p2, None,
1157 1153 ifh, dfh)
1158 1154 if not dfh and not self._inline:
1159 1155 # addrevision switched from inline to conventional
1160 1156 # reopen the index
1161 1157 dfh = self.opener(self.datafile, "a")
1162 1158 ifh = self.opener(self.indexfile, "a")
1163 1159 if chk != node:
1164 1160 raise RevlogError(_("consistency error adding group"))
1165 1161 textlen = len(text)
1166 1162 else:
1167 1163 e = (offset_type(end, 0), len(cdelta), textlen, base,
1168 1164 link, self.rev(p1), self.rev(p2), node)
1169 1165 self.index.insert(-1, e)
1170 1166 self.nodemap[node] = r
1167 entry = self._io.packentry(e, self.node, self.version)
1171 1168 if self._inline:
1172 ifh.write(struct.pack(indexformatng, *e))
1169 ifh.write(entry)
1173 1170 ifh.write(cdelta)
1174 1171 self.checkinlinesize(transaction, ifh)
1175 1172 if not self._inline:
1176 1173 dfh = self.opener(self.datafile, "a")
1177 1174 ifh = self.opener(self.indexfile, "a")
1178 1175 else:
1179 if self.version == REVLOGV0:
1180 e = (end, len(cdelta), base, link, p1, p2, node)
1181 entry = struct.pack(indexformatv0, *e)
1182 else:
1183 entry = struct.pack(indexformatng, *e)
1184 1176 dfh.write(cdelta)
1185 1177 ifh.write(entry)
1186 1178
1187 1179 t, r, chain, prev = r, r + 1, node, node
1188 1180 base = self.base(t)
1189 1181 start = self.start(base)
1190 1182 end = self.end(t)
1191 1183
1192 1184 return node
1193 1185
1194 1186 def strip(self, rev, minlink):
1195 1187 if self.count() == 0 or rev >= self.count():
1196 1188 return
1197 1189
1198 1190 if isinstance(self.index, lazyindex):
1199 1191 self._loadindexmap()
1200 1192
1201 1193 # When stripping away a revision, we need to make sure it
1202 1194 # does not actually belong to an older changeset.
1203 1195 # The minlink parameter defines the oldest revision
1204 1196 # we're allowed to strip away.
1205 1197 while minlink > self.index[rev][4]:
1206 1198 rev += 1
1207 1199 if rev >= self.count():
1208 1200 return
1209 1201
1210 1202 # first truncate the files on disk
1211 1203 end = self.start(rev)
1212 1204 if not self._inline:
1213 1205 df = self.opener(self.datafile, "a")
1214 1206 df.truncate(end)
1215 1207 end = rev * self._io.size
1216 1208 else:
1217 1209 end += rev * self._io.size
1218 1210
1219 1211 indexf = self.opener(self.indexfile, "a")
1220 1212 indexf.truncate(end)
1221 1213
1222 1214 # then reset internal state in memory to forget those revisions
1223 1215 self._cache = None
1224 1216 self._chunkcache = None
1225 1217 for x in xrange(rev, self.count()):
1226 1218 del self.nodemap[self.node(x)]
1227 1219
1228 1220 del self.index[rev:-1]
1229 1221
1230 1222 def checksize(self):
1231 1223 expected = 0
1232 1224 if self.count():
1233 1225 expected = self.end(self.count() - 1)
1234 1226
1235 1227 try:
1236 1228 f = self.opener(self.datafile)
1237 1229 f.seek(0, 2)
1238 1230 actual = f.tell()
1239 1231 dd = actual - expected
1240 1232 except IOError, inst:
1241 1233 if inst.errno != errno.ENOENT:
1242 1234 raise
1243 1235 dd = 0
1244 1236
1245 1237 try:
1246 1238 f = self.opener(self.indexfile)
1247 1239 f.seek(0, 2)
1248 1240 actual = f.tell()
1249 1241 s = self._io.size
1250 1242 i = actual / s
1251 1243 di = actual - (i * s)
1252 1244 if self._inline:
1253 1245 databytes = 0
1254 1246 for r in xrange(self.count()):
1255 1247 databytes += self.length(r)
1256 1248 dd = 0
1257 1249 di = actual - self.count() * s - databytes
1258 1250 except IOError, inst:
1259 1251 if inst.errno != errno.ENOENT:
1260 1252 raise
1261 1253 di = 0
1262 1254
1263 1255 return (dd, di)
1264 1256
1265 1257
General Comments 0
You need to be logged in to leave comments. Login now