##// END OF EJS Templates
revlog: localize some fastpath functions
Matt Mackall -
r5007:3addf453 default
parent child Browse files
Show More
@@ -1,1265 +1,1269 b''
1 1 """
2 2 revlog.py - storage back-end for mercurial
3 3
4 4 This provides efficient delta storage with O(1) retrieve and append
5 5 and O(changes) merge between branches
6 6
7 7 Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
8 8
9 9 This software may be used and distributed according to the terms
10 10 of the GNU General Public License, incorporated herein by reference.
11 11 """
12 12
13 13 from node import *
14 14 from i18n import _
15 15 import binascii, changegroup, errno, ancestor, mdiff, os
16 16 import sha, struct, util, zlib
17 17
18 _pack = struct.pack
19 _unpack = struct.unpack
20 _compress = zlib.compress
21 _decompress = zlib.decompress
22 _sha = sha.new
23
18 24 # revlog flags
19 25 REVLOGV0 = 0
20 26 REVLOGNG = 1
21 27 REVLOGNGINLINEDATA = (1 << 16)
22 28 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
23 29 REVLOG_DEFAULT_FORMAT = REVLOGNG
24 30 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
25 31
26 32 class RevlogError(Exception):
27 33 pass
28 34 class LookupError(RevlogError):
29 35 pass
30 36
31 37 def getoffset(q):
32 38 return int(q >> 16)
33 39
34 40 def gettype(q):
35 41 return int(q & 0xFFFF)
36 42
37 43 def offset_type(offset, type):
38 44 return long(long(offset) << 16 | type)
39 45
40 46 def hash(text, p1, p2):
41 47 """generate a hash from the given text and its parent hashes
42 48
43 49 This hash combines both the current file contents and its history
44 50 in a manner that makes it easy to distinguish nodes with the same
45 51 content in the revision graph.
46 52 """
47 53 l = [p1, p2]
48 54 l.sort()
49 s = sha.new(l[0])
55 s = _sha(l[0])
50 56 s.update(l[1])
51 57 s.update(text)
52 58 return s.digest()
53 59
54 60 def compress(text):
55 61 """ generate a possibly-compressed representation of text """
56 62 if not text:
57 63 return ("", text)
58 64 if len(text) < 44:
59 65 if text[0] == '\0':
60 66 return ("", text)
61 67 return ('u', text)
62 bin = zlib.compress(text)
68 bin = _compress(text)
63 69 if len(bin) > len(text):
64 70 if text[0] == '\0':
65 71 return ("", text)
66 72 return ('u', text)
67 73 return ("", bin)
68 74
69 75 def decompress(bin):
70 76 """ decompress the given input """
71 77 if not bin:
72 78 return bin
73 79 t = bin[0]
74 80 if t == '\0':
75 81 return bin
76 82 if t == 'x':
77 return zlib.decompress(bin)
83 return _decompress(bin)
78 84 if t == 'u':
79 85 return bin[1:]
80 86 raise RevlogError(_("unknown compression type %r") % t)
81 87
82 88 class lazyparser(object):
83 89 """
84 90 this class avoids the need to parse the entirety of large indices
85 91 """
86 92
87 93 # lazyparser is not safe to use on windows if win32 extensions not
88 94 # available. it keeps file handle open, which make it not possible
89 95 # to break hardlinks on local cloned repos.
90 96 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
91 97 hasattr(util, 'win32api'))
92 98
93 99 def __init__(self, dataf, size):
94 100 self.dataf = dataf
95 101 self.s = struct.calcsize(indexformatng)
96 102 self.datasize = size
97 103 self.l = size/self.s
98 104 self.index = [None] * self.l
99 105 self.map = {nullid: nullrev}
100 106 self.allmap = 0
101 107 self.all = 0
102 108 self.mapfind_count = 0
103 109
104 110 def loadmap(self):
105 111 """
106 112 during a commit, we need to make sure the rev being added is
107 113 not a duplicate. This requires loading the entire index,
108 114 which is fairly slow. loadmap can load up just the node map,
109 115 which takes much less time.
110 116 """
111 117 if self.allmap:
112 118 return
113 119 end = self.datasize
114 120 self.allmap = 1
115 121 cur = 0
116 122 count = 0
117 123 blocksize = self.s * 256
118 124 self.dataf.seek(0)
119 125 while cur < end:
120 126 data = self.dataf.read(blocksize)
121 127 off = 0
122 128 for x in xrange(256):
123 129 n = data[off + ngshaoffset:off + ngshaoffset + 20]
124 130 self.map[n] = count
125 131 count += 1
126 132 if count >= self.l:
127 133 break
128 134 off += self.s
129 135 cur += blocksize
130 136
131 137 def loadblock(self, blockstart, blocksize, data=None):
132 138 if self.all:
133 139 return
134 140 if data is None:
135 141 self.dataf.seek(blockstart)
136 142 if blockstart + blocksize > self.datasize:
137 143 # the revlog may have grown since we've started running,
138 144 # but we don't have space in self.index for more entries.
139 145 # limit blocksize so that we don't get too much data.
140 146 blocksize = max(self.datasize - blockstart, 0)
141 147 data = self.dataf.read(blocksize)
142 148 lend = len(data) / self.s
143 149 i = blockstart / self.s
144 150 off = 0
145 151 # lazyindex supports __delitem__
146 152 if lend > len(self.index) - i:
147 153 lend = len(self.index) - i
148 154 for x in xrange(lend):
149 155 if self.index[i + x] == None:
150 156 b = data[off : off + self.s]
151 157 self.index[i + x] = b
152 158 n = b[ngshaoffset:ngshaoffset + 20]
153 159 self.map[n] = i + x
154 160 off += self.s
155 161
156 162 def findnode(self, node):
157 163 """search backwards through the index file for a specific node"""
158 164 if self.allmap:
159 165 return None
160 166
161 167 # hg log will cause many many searches for the manifest
162 168 # nodes. After we get called a few times, just load the whole
163 169 # thing.
164 170 if self.mapfind_count > 8:
165 171 self.loadmap()
166 172 if node in self.map:
167 173 return node
168 174 return None
169 175 self.mapfind_count += 1
170 176 last = self.l - 1
171 177 while self.index[last] != None:
172 178 if last == 0:
173 179 self.all = 1
174 180 self.allmap = 1
175 181 return None
176 182 last -= 1
177 183 end = (last + 1) * self.s
178 184 blocksize = self.s * 256
179 185 while end >= 0:
180 186 start = max(end - blocksize, 0)
181 187 self.dataf.seek(start)
182 188 data = self.dataf.read(end - start)
183 189 findend = end - start
184 190 while True:
185 191 # we're searching backwards, so we have to make sure
186 192 # we don't find a changeset where this node is a parent
187 193 off = data.find(node, 0, findend)
188 194 findend = off
189 195 if off >= 0:
190 196 i = off / self.s
191 197 off = i * self.s
192 198 n = data[off + ngshaoffset:off + ngshaoffset + 20]
193 199 if n == node:
194 200 self.map[n] = i + start / self.s
195 201 return node
196 202 else:
197 203 break
198 204 end -= blocksize
199 205 return None
200 206
201 207 def loadindex(self, i=None, end=None):
202 208 if self.all:
203 209 return
204 210 all = False
205 211 if i == None:
206 212 blockstart = 0
207 213 blocksize = (65536 / self.s) * self.s
208 214 end = self.datasize
209 215 all = True
210 216 else:
211 217 if end:
212 218 blockstart = i * self.s
213 219 end = end * self.s
214 220 blocksize = end - blockstart
215 221 else:
216 222 blockstart = (i & ~1023) * self.s
217 223 blocksize = self.s * 1024
218 224 end = blockstart + blocksize
219 225 while blockstart < end:
220 226 self.loadblock(blockstart, blocksize)
221 227 blockstart += blocksize
222 228 if all:
223 229 self.all = True
224 230
225 231 class lazyindex(object):
226 232 """a lazy version of the index array"""
227 233 def __init__(self, parser):
228 234 self.p = parser
229 235 def __len__(self):
230 236 return len(self.p.index)
231 237 def load(self, pos):
232 238 if pos < 0:
233 239 pos += len(self.p.index)
234 240 self.p.loadindex(pos)
235 241 return self.p.index[pos]
236 242 def __getitem__(self, pos):
237 return struct.unpack(indexformatng,
238 self.p.index[pos] or self.load(pos))
243 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
239 244 def __setitem__(self, pos, item):
240 self.p.index[pos] = struct.pack(indexformatng, *item)
245 self.p.index[pos] = _pack(indexformatng, *item)
241 246 def __delitem__(self, pos):
242 247 del self.p.index[pos]
243 248 def insert(self, pos, e):
244 self.p.index.insert(pos, struct.pack(indexformatng, *e))
249 self.p.index.insert(pos, _pack(indexformatng, *e))
245 250 def append(self, e):
246 self.p.index.append(struct.pack(indexformatng, *e))
251 self.p.index.append(_pack(indexformatng, *e))
247 252
248 253 class lazymap(object):
249 254 """a lazy version of the node map"""
250 255 def __init__(self, parser):
251 256 self.p = parser
252 257 def load(self, key):
253 258 n = self.p.findnode(key)
254 259 if n == None:
255 260 raise KeyError(key)
256 261 def __contains__(self, key):
257 262 if key in self.p.map:
258 263 return True
259 264 self.p.loadmap()
260 265 return key in self.p.map
261 266 def __iter__(self):
262 267 yield nullid
263 268 for i in xrange(self.p.l):
264 269 ret = self.p.index[i]
265 270 if not ret:
266 271 self.p.loadindex(i)
267 272 ret = self.p.index[i]
268 273 if isinstance(ret, str):
269 ret = struct.unpack(indexformatng, ret)
274 ret = _unpack(indexformatng, ret)
270 275 yield ret[7]
271 276 def __getitem__(self, key):
272 277 try:
273 278 return self.p.map[key]
274 279 except KeyError:
275 280 try:
276 281 self.load(key)
277 282 return self.p.map[key]
278 283 except KeyError:
279 284 raise KeyError("node " + hex(key))
280 285 def __setitem__(self, key, val):
281 286 self.p.map[key] = val
282 287 def __delitem__(self, key):
283 288 del self.p.map[key]
284 289
285 290 indexformatv0 = ">4l20s20s20s"
286 291 v0shaoffset = 56
287 292
288 293 class revlogoldio(object):
289 294 def __init__(self):
290 295 self.size = struct.calcsize(indexformatv0)
291 296
292 297 def parseindex(self, fp, inline):
293 298 s = self.size
294 299 index = []
295 300 nodemap = {nullid: nullrev}
296 301 n = off = 0
297 302 data = fp.read()
298 303 l = len(data)
299 304 while off + s <= l:
300 305 cur = data[off:off + s]
301 306 off += s
302 e = struct.unpack(indexformatv0, cur)
307 e = _unpack(indexformatv0, cur)
303 308 # transform to revlogv1 format
304 309 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
305 310 nodemap[e[4]], nodemap[e[5]], e[6])
306 311 index.append(e2)
307 312 nodemap[e[6]] = n
308 313 n += 1
309 314
310 315 return index, nodemap, None
311 316
312 317 def packentry(self, entry, node, version):
313 318 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
314 319 node(entry[5]), node(entry[6]), entry[7])
315 return struct.pack(indexformatv0, *e2)
320 return _pack(indexformatv0, *e2)
316 321
317 322 # index ng:
318 323 # 6 bytes offset
319 324 # 2 bytes flags
320 325 # 4 bytes compressed length
321 326 # 4 bytes uncompressed length
322 327 # 4 bytes: base rev
323 328 # 4 bytes link rev
324 329 # 4 bytes parent 1 rev
325 330 # 4 bytes parent 2 rev
326 331 # 32 bytes: nodeid
327 332 indexformatng = ">Qiiiiii20s12x"
328 333 ngshaoffset = 32
329 334 versionformat = ">I"
330 335
331 336 class revlogio(object):
332 337 def __init__(self):
333 338 self.size = struct.calcsize(indexformatng)
334 339
335 340 def parseindex(self, fp, inline):
336 341 try:
337 342 size = util.fstat(fp).st_size
338 343 except AttributeError:
339 344 size = 0
340 345
341 346 if lazyparser.safe_to_use and not inline and size > 1000000:
342 347 # big index, let's parse it on demand
343 348 parser = lazyparser(fp, size)
344 349 index = lazyindex(parser)
345 350 nodemap = lazymap(parser)
346 351 e = list(index[0])
347 352 type = gettype(e[0])
348 353 e[0] = offset_type(0, type)
349 354 index[0] = e
350 355 return index, nodemap, None
351 356
352 357 s = self.size
353 358 cache = None
354 359 index = []
355 360 nodemap = {nullid: nullrev}
356 361 n = off = 0
357 362 # if we're not using lazymap, always read the whole index
358 363 data = fp.read()
359 364 l = len(data) - s
360 unpack = struct.unpack
361 365 append = index.append
362 366 if inline:
363 367 cache = (0, data)
364 368 while off <= l:
365 e = unpack(indexformatng, data[off:off + s])
369 e = _unpack(indexformatng, data[off:off + s])
366 370 nodemap[e[7]] = n
367 371 append(e)
368 372 n += 1
369 373 if e[1] < 0:
370 374 break
371 375 off += e[1] + s
372 376 else:
373 377 while off <= l:
374 e = unpack(indexformatng, data[off:off + s])
378 e = _unpack(indexformatng, data[off:off + s])
375 379 nodemap[e[7]] = n
376 380 append(e)
377 381 n += 1
378 382 off += s
379 383
380 384 e = list(index[0])
381 385 type = gettype(e[0])
382 386 e[0] = offset_type(0, type)
383 387 index[0] = e
384 388
385 389 return index, nodemap, cache
386 390
387 391 def packentry(self, entry, node, version):
388 p = struct.pack(indexformatng, *entry)
392 p = _pack(indexformatng, *entry)
389 393 if not entry[3] and not getoffset(entry[0]) and entry[5] == nullrev:
390 p = struct.pack(versionformat, version) + p[4:]
394 p = _pack(versionformat, version) + p[4:]
391 395 return p
392 396
393 397 class revlog(object):
394 398 """
395 399 the underlying revision storage object
396 400
397 401 A revlog consists of two parts, an index and the revision data.
398 402
399 403 The index is a file with a fixed record size containing
400 404 information on each revision, includings its nodeid (hash), the
401 405 nodeids of its parents, the position and offset of its data within
402 406 the data file, and the revision it's based on. Finally, each entry
403 407 contains a linkrev entry that can serve as a pointer to external
404 408 data.
405 409
406 410 The revision data itself is a linear collection of data chunks.
407 411 Each chunk represents a revision and is usually represented as a
408 412 delta against the previous chunk. To bound lookup time, runs of
409 413 deltas are limited to about 2 times the length of the original
410 414 version data. This makes retrieval of a version proportional to
411 415 its size, or O(1) relative to the number of revisions.
412 416
413 417 Both pieces of the revlog are written to in an append-only
414 418 fashion, which means we never need to rewrite a file to insert or
415 419 remove data, and can use some simple techniques to avoid the need
416 420 for locking while reading.
417 421 """
418 422 def __init__(self, opener, indexfile):
419 423 """
420 424 create a revlog object
421 425
422 426 opener is a function that abstracts the file opening operation
423 427 and can be used to implement COW semantics or the like.
424 428 """
425 429 self.indexfile = indexfile
426 430 self.datafile = indexfile[:-2] + ".d"
427 431 self.opener = opener
428 432 self._cache = None
429 433 self._chunkcache = None
430 434 self.nodemap = {nullid: nullrev}
431 435 self.index = []
432 436
433 437 v = REVLOG_DEFAULT_VERSION
434 438 if hasattr(opener, "defversion"):
435 439 v = opener.defversion
436 440 if v & REVLOGNG:
437 441 v |= REVLOGNGINLINEDATA
438 442
439 443 i = ""
440 444 try:
441 445 f = self.opener(self.indexfile)
442 446 i = f.read(4)
443 447 f.seek(0)
444 448 if len(i) > 0:
445 449 v = struct.unpack(versionformat, i)[0]
446 450 except IOError, inst:
447 451 if inst.errno != errno.ENOENT:
448 452 raise
449 453
450 454 self.version = v
451 455 self._inline = v & REVLOGNGINLINEDATA
452 456 flags = v & ~0xFFFF
453 457 fmt = v & 0xFFFF
454 458 if fmt == REVLOGV0 and flags:
455 459 raise RevlogError(_("index %s unknown flags %#04x for format v0")
456 460 % (self.indexfile, flags >> 16))
457 461 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
458 462 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
459 463 % (self.indexfile, flags >> 16))
460 464 elif fmt > REVLOGNG:
461 465 raise RevlogError(_("index %s unknown format %d")
462 466 % (self.indexfile, fmt))
463 467
464 468 self._io = revlogio()
465 469 if self.version == REVLOGV0:
466 470 self._io = revlogoldio()
467 471 if i:
468 472 d = self._io.parseindex(f, self._inline)
469 473 self.index, self.nodemap, self._chunkcache = d
470 474
471 475 # add the magic null revision at -1
472 476 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
473 477
474 478 def _loadindex(self, start, end):
475 479 """load a block of indexes all at once from the lazy parser"""
476 480 if isinstance(self.index, lazyindex):
477 481 self.index.p.loadindex(start, end)
478 482
479 483 def _loadindexmap(self):
480 484 """loads both the map and the index from the lazy parser"""
481 485 if isinstance(self.index, lazyindex):
482 486 p = self.index.p
483 487 p.loadindex()
484 488 self.nodemap = p.map
485 489
486 490 def _loadmap(self):
487 491 """loads the map from the lazy parser"""
488 492 if isinstance(self.nodemap, lazymap):
489 493 self.nodemap.p.loadmap()
490 494 self.nodemap = self.nodemap.p.map
491 495
492 496 def tip(self):
493 497 return self.node(len(self.index) - 2)
494 498 def count(self):
495 499 return len(self.index) - 1
496 500
497 501 def rev(self, node):
498 502 try:
499 503 return self.nodemap[node]
500 504 except KeyError:
501 505 raise LookupError(_('%s: no node %s') % (self.indexfile, hex(node)))
502 506 def node(self, rev):
503 507 return self.index[rev][7]
504 508 def linkrev(self, node):
505 509 return self.index[self.rev(node)][4]
506 510 def parents(self, node):
507 511 d = self.index[self.rev(node)][5:7]
508 512 return (self.node(d[0]), self.node(d[1]))
509 513 def parentrevs(self, rev):
510 514 return self.index[rev][5:7]
511 515 def start(self, rev):
512 516 return int(self.index[rev][0] >> 16)
513 517 def end(self, rev):
514 518 return self.start(rev) + self.length(rev)
515 519 def length(self, rev):
516 520 return self.index[rev][1]
517 521 def base(self, rev):
518 522 return self.index[rev][3]
519 523
520 524 def size(self, rev):
521 525 """return the length of the uncompressed text for a given revision"""
522 526 l = self.index[rev][2]
523 527 if l >= 0:
524 528 return l
525 529
526 530 t = self.revision(self.node(rev))
527 531 return len(t)
528 532
529 533 # alternate implementation, The advantage to this code is it
530 534 # will be faster for a single revision. But, the results are not
531 535 # cached, so finding the size of every revision will be slower.
532 536 """
533 537 if self.cache and self.cache[1] == rev:
534 538 return len(self.cache[2])
535 539
536 540 base = self.base(rev)
537 541 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
538 542 base = self.cache[1]
539 543 text = self.cache[2]
540 544 else:
541 545 text = self.revision(self.node(base))
542 546
543 547 l = len(text)
544 548 for x in xrange(base + 1, rev + 1):
545 549 l = mdiff.patchedsize(l, self.chunk(x))
546 550 return l
547 551 """
548 552
549 553 def reachable(self, node, stop=None):
550 554 """return a hash of all nodes ancestral to a given node, including
551 555 the node itself, stopping when stop is matched"""
552 556 reachable = {}
553 557 visit = [node]
554 558 reachable[node] = 1
555 559 if stop:
556 560 stopn = self.rev(stop)
557 561 else:
558 562 stopn = 0
559 563 while visit:
560 564 n = visit.pop(0)
561 565 if n == stop:
562 566 continue
563 567 if n == nullid:
564 568 continue
565 569 for p in self.parents(n):
566 570 if self.rev(p) < stopn:
567 571 continue
568 572 if p not in reachable:
569 573 reachable[p] = 1
570 574 visit.append(p)
571 575 return reachable
572 576
573 577 def nodesbetween(self, roots=None, heads=None):
574 578 """Return a tuple containing three elements. Elements 1 and 2 contain
575 579 a final list bases and heads after all the unreachable ones have been
576 580 pruned. Element 0 contains a topologically sorted list of all
577 581
578 582 nodes that satisfy these constraints:
579 583 1. All nodes must be descended from a node in roots (the nodes on
580 584 roots are considered descended from themselves).
581 585 2. All nodes must also be ancestors of a node in heads (the nodes in
582 586 heads are considered to be their own ancestors).
583 587
584 588 If roots is unspecified, nullid is assumed as the only root.
585 589 If heads is unspecified, it is taken to be the output of the
586 590 heads method (i.e. a list of all nodes in the repository that
587 591 have no children)."""
588 592 nonodes = ([], [], [])
589 593 if roots is not None:
590 594 roots = list(roots)
591 595 if not roots:
592 596 return nonodes
593 597 lowestrev = min([self.rev(n) for n in roots])
594 598 else:
595 599 roots = [nullid] # Everybody's a descendent of nullid
596 600 lowestrev = nullrev
597 601 if (lowestrev == nullrev) and (heads is None):
598 602 # We want _all_ the nodes!
599 603 return ([self.node(r) for r in xrange(0, self.count())],
600 604 [nullid], list(self.heads()))
601 605 if heads is None:
602 606 # All nodes are ancestors, so the latest ancestor is the last
603 607 # node.
604 608 highestrev = self.count() - 1
605 609 # Set ancestors to None to signal that every node is an ancestor.
606 610 ancestors = None
607 611 # Set heads to an empty dictionary for later discovery of heads
608 612 heads = {}
609 613 else:
610 614 heads = list(heads)
611 615 if not heads:
612 616 return nonodes
613 617 ancestors = {}
614 618 # Turn heads into a dictionary so we can remove 'fake' heads.
615 619 # Also, later we will be using it to filter out the heads we can't
616 620 # find from roots.
617 621 heads = dict.fromkeys(heads, 0)
618 622 # Start at the top and keep marking parents until we're done.
619 623 nodestotag = heads.keys()
620 624 # Remember where the top was so we can use it as a limit later.
621 625 highestrev = max([self.rev(n) for n in nodestotag])
622 626 while nodestotag:
623 627 # grab a node to tag
624 628 n = nodestotag.pop()
625 629 # Never tag nullid
626 630 if n == nullid:
627 631 continue
628 632 # A node's revision number represents its place in a
629 633 # topologically sorted list of nodes.
630 634 r = self.rev(n)
631 635 if r >= lowestrev:
632 636 if n not in ancestors:
633 637 # If we are possibly a descendent of one of the roots
634 638 # and we haven't already been marked as an ancestor
635 639 ancestors[n] = 1 # Mark as ancestor
636 640 # Add non-nullid parents to list of nodes to tag.
637 641 nodestotag.extend([p for p in self.parents(n) if
638 642 p != nullid])
639 643 elif n in heads: # We've seen it before, is it a fake head?
640 644 # So it is, real heads should not be the ancestors of
641 645 # any other heads.
642 646 heads.pop(n)
643 647 if not ancestors:
644 648 return nonodes
645 649 # Now that we have our set of ancestors, we want to remove any
646 650 # roots that are not ancestors.
647 651
648 652 # If one of the roots was nullid, everything is included anyway.
649 653 if lowestrev > nullrev:
650 654 # But, since we weren't, let's recompute the lowest rev to not
651 655 # include roots that aren't ancestors.
652 656
653 657 # Filter out roots that aren't ancestors of heads
654 658 roots = [n for n in roots if n in ancestors]
655 659 # Recompute the lowest revision
656 660 if roots:
657 661 lowestrev = min([self.rev(n) for n in roots])
658 662 else:
659 663 # No more roots? Return empty list
660 664 return nonodes
661 665 else:
662 666 # We are descending from nullid, and don't need to care about
663 667 # any other roots.
664 668 lowestrev = nullrev
665 669 roots = [nullid]
666 670 # Transform our roots list into a 'set' (i.e. a dictionary where the
667 671 # values don't matter.
668 672 descendents = dict.fromkeys(roots, 1)
669 673 # Also, keep the original roots so we can filter out roots that aren't
670 674 # 'real' roots (i.e. are descended from other roots).
671 675 roots = descendents.copy()
672 676 # Our topologically sorted list of output nodes.
673 677 orderedout = []
674 678 # Don't start at nullid since we don't want nullid in our output list,
675 679 # and if nullid shows up in descedents, empty parents will look like
676 680 # they're descendents.
677 681 for r in xrange(max(lowestrev, 0), highestrev + 1):
678 682 n = self.node(r)
679 683 isdescendent = False
680 684 if lowestrev == nullrev: # Everybody is a descendent of nullid
681 685 isdescendent = True
682 686 elif n in descendents:
683 687 # n is already a descendent
684 688 isdescendent = True
685 689 # This check only needs to be done here because all the roots
686 690 # will start being marked is descendents before the loop.
687 691 if n in roots:
688 692 # If n was a root, check if it's a 'real' root.
689 693 p = tuple(self.parents(n))
690 694 # If any of its parents are descendents, it's not a root.
691 695 if (p[0] in descendents) or (p[1] in descendents):
692 696 roots.pop(n)
693 697 else:
694 698 p = tuple(self.parents(n))
695 699 # A node is a descendent if either of its parents are
696 700 # descendents. (We seeded the dependents list with the roots
697 701 # up there, remember?)
698 702 if (p[0] in descendents) or (p[1] in descendents):
699 703 descendents[n] = 1
700 704 isdescendent = True
701 705 if isdescendent and ((ancestors is None) or (n in ancestors)):
702 706 # Only include nodes that are both descendents and ancestors.
703 707 orderedout.append(n)
704 708 if (ancestors is not None) and (n in heads):
705 709 # We're trying to figure out which heads are reachable
706 710 # from roots.
707 711 # Mark this head as having been reached
708 712 heads[n] = 1
709 713 elif ancestors is None:
710 714 # Otherwise, we're trying to discover the heads.
711 715 # Assume this is a head because if it isn't, the next step
712 716 # will eventually remove it.
713 717 heads[n] = 1
714 718 # But, obviously its parents aren't.
715 719 for p in self.parents(n):
716 720 heads.pop(p, None)
717 721 heads = [n for n in heads.iterkeys() if heads[n] != 0]
718 722 roots = roots.keys()
719 723 assert orderedout
720 724 assert roots
721 725 assert heads
722 726 return (orderedout, roots, heads)
723 727
724 728 def heads(self, start=None, stop=None):
725 729 """return the list of all nodes that have no children
726 730
727 731 if start is specified, only heads that are descendants of
728 732 start will be returned
729 733 if stop is specified, it will consider all the revs from stop
730 734 as if they had no children
731 735 """
732 736 if start is None and stop is None:
733 737 count = self.count()
734 738 if not count:
735 739 return [nullid]
736 740 ishead = [1] * (count + 1)
737 741 index = self.index
738 742 for r in xrange(count):
739 743 e = index[r]
740 744 ishead[e[5]] = ishead[e[6]] = 0
741 745 return [self.node(r) for r in xrange(count) if ishead[r]]
742 746
743 747 if start is None:
744 748 start = nullid
745 749 if stop is None:
746 750 stop = []
747 751 stoprevs = dict.fromkeys([self.rev(n) for n in stop])
748 752 startrev = self.rev(start)
749 753 reachable = {startrev: 1}
750 754 heads = {startrev: 1}
751 755
752 756 parentrevs = self.parentrevs
753 757 for r in xrange(startrev + 1, self.count()):
754 758 for p in parentrevs(r):
755 759 if p in reachable:
756 760 if r not in stoprevs:
757 761 reachable[r] = 1
758 762 heads[r] = 1
759 763 if p in heads and p not in stoprevs:
760 764 del heads[p]
761 765
762 766 return [self.node(r) for r in heads]
763 767
764 768 def children(self, node):
765 769 """find the children of a given node"""
766 770 c = []
767 771 p = self.rev(node)
768 772 for r in range(p + 1, self.count()):
769 773 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
770 774 if prevs:
771 775 for pr in prevs:
772 776 if pr == p:
773 777 c.append(self.node(r))
774 778 elif p == nullrev:
775 779 c.append(self.node(r))
776 780 return c
777 781
778 782 def _match(self, id):
779 783 if isinstance(id, (long, int)):
780 784 # rev
781 785 return self.node(id)
782 786 if len(id) == 20:
783 787 # possibly a binary node
784 788 # odds of a binary node being all hex in ASCII are 1 in 10**25
785 789 try:
786 790 node = id
787 791 r = self.rev(node) # quick search the index
788 792 return node
789 793 except LookupError:
790 794 pass # may be partial hex id
791 795 try:
792 796 # str(rev)
793 797 rev = int(id)
794 798 if str(rev) != id:
795 799 raise ValueError
796 800 if rev < 0:
797 801 rev = self.count() + rev
798 802 if rev < 0 or rev >= self.count():
799 803 raise ValueError
800 804 return self.node(rev)
801 805 except (ValueError, OverflowError):
802 806 pass
803 807 if len(id) == 40:
804 808 try:
805 809 # a full hex nodeid?
806 810 node = bin(id)
807 811 r = self.rev(node)
808 812 return node
809 813 except TypeError:
810 814 pass
811 815
812 816 def _partialmatch(self, id):
813 817 if len(id) < 40:
814 818 try:
815 819 # hex(node)[:...]
816 820 bin_id = bin(id[:len(id) & ~1]) # grab an even number of digits
817 821 node = None
818 822 for n in self.nodemap:
819 823 if n.startswith(bin_id) and hex(n).startswith(id):
820 824 if node is not None:
821 825 raise LookupError(_("Ambiguous identifier"))
822 826 node = n
823 827 if node is not None:
824 828 return node
825 829 except TypeError:
826 830 pass
827 831
828 832 def lookup(self, id):
829 833 """locate a node based on:
830 834 - revision number or str(revision number)
831 835 - nodeid or subset of hex nodeid
832 836 """
833 837 n = self._match(id)
834 838 if n is not None:
835 839 return n
836 840 n = self._partialmatch(id)
837 841 if n:
838 842 return n
839 843
840 844 raise LookupError(_("No match found"))
841 845
842 846 def cmp(self, node, text):
843 847 """compare text with a given file revision"""
844 848 p1, p2 = self.parents(node)
845 849 return hash(text, p1, p2) != node
846 850
847 851 def chunk(self, rev, df=None):
848 852 def loadcache(df):
849 853 if not df:
850 854 if self._inline:
851 855 df = self.opener(self.indexfile)
852 856 else:
853 857 df = self.opener(self.datafile)
854 858 df.seek(start)
855 859 self._chunkcache = (start, df.read(cache_length))
856 860
857 861 start, length = self.start(rev), self.length(rev)
858 862 if self._inline:
859 863 start += (rev + 1) * self._io.size
860 864 end = start + length
861 865
862 866 offset = 0
863 867 if not self._chunkcache:
864 868 cache_length = max(65536, length)
865 869 loadcache(df)
866 870 else:
867 871 cache_start = self._chunkcache[0]
868 872 cache_length = len(self._chunkcache[1])
869 873 cache_end = cache_start + cache_length
870 874 if start >= cache_start and end <= cache_end:
871 875 # it is cached
872 876 offset = start - cache_start
873 877 else:
874 878 cache_length = max(65536, length)
875 879 loadcache(df)
876 880
877 881 # avoid copying large chunks
878 882 c = self._chunkcache[1]
879 883 if cache_length != length:
880 884 c = c[offset:offset + length]
881 885
882 886 return decompress(c)
883 887
884 888 def delta(self, node):
885 889 """return or calculate a delta between a node and its predecessor"""
886 890 r = self.rev(node)
887 891 return self.revdiff(r - 1, r)
888 892
889 893 def revdiff(self, rev1, rev2):
890 894 """return or calculate a delta between two revisions"""
891 895 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
892 896 return self.chunk(rev2)
893 897
894 898 return mdiff.textdiff(self.revision(self.node(rev1)),
895 899 self.revision(self.node(rev2)))
896 900
897 901 def revision(self, node):
898 902 """return an uncompressed revision of a given"""
899 903 if node == nullid:
900 904 return ""
901 905 if self._cache and self._cache[0] == node:
902 906 return self._cache[2]
903 907
904 908 # look up what we need to read
905 909 text = None
906 910 rev = self.rev(node)
907 911 base = self.base(rev)
908 912
909 913 # check rev flags
910 914 if self.index[rev][0] & 0xFFFF:
911 915 raise RevlogError(_('incompatible revision flag %x') % q)
912 916
913 917 if self._inline:
914 918 # we probably have the whole chunk cached
915 919 df = None
916 920 else:
917 921 df = self.opener(self.datafile)
918 922
919 923 # do we have useful data cached?
920 924 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
921 925 base = self._cache[1]
922 926 text = self._cache[2]
923 927 self._loadindex(base, rev + 1)
924 928 else:
925 929 self._loadindex(base, rev + 1)
926 930 text = self.chunk(base, df=df)
927 931
928 932 bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]
929 933 text = mdiff.patches(text, bins)
930 934 p1, p2 = self.parents(node)
931 935 if node != hash(text, p1, p2):
932 936 raise RevlogError(_("integrity check failed on %s:%d")
933 937 % (self.datafile, rev))
934 938
935 939 self._cache = (node, rev, text)
936 940 return text
937 941
938 942 def checkinlinesize(self, tr, fp=None):
939 943 if not self._inline:
940 944 return
941 945 if not fp:
942 946 fp = self.opener(self.indexfile, 'r')
943 947 fp.seek(0, 2)
944 948 size = fp.tell()
945 949 if size < 131072:
946 950 return
947 951 trinfo = tr.find(self.indexfile)
948 952 if trinfo == None:
949 953 raise RevlogError(_("%s not found in the transaction")
950 954 % self.indexfile)
951 955
952 956 trindex = trinfo[2]
953 957 dataoff = self.start(trindex)
954 958
955 959 tr.add(self.datafile, dataoff)
956 960 df = self.opener(self.datafile, 'w')
957 961 calc = self._io.size
958 962 for r in xrange(self.count()):
959 963 start = self.start(r) + (r + 1) * calc
960 964 length = self.length(r)
961 965 fp.seek(start)
962 966 d = fp.read(length)
963 967 df.write(d)
964 968 fp.close()
965 969 df.close()
966 970 fp = self.opener(self.indexfile, 'w', atomictemp=True)
967 971 self.version &= ~(REVLOGNGINLINEDATA)
968 972 self._inline = False
969 973 for i in xrange(self.count()):
970 974 e = self._io.packentry(self.index[i], self.node, self.version)
971 975 fp.write(e)
972 976
973 977 # if we don't call rename, the temp file will never replace the
974 978 # real index
975 979 fp.rename()
976 980
977 981 tr.replace(self.indexfile, trindex * calc)
978 982 self._chunkcache = None
979 983
980 984 def addrevision(self, text, transaction, link, p1, p2, d=None):
981 985 """add a revision to the log
982 986
983 987 text - the revision data to add
984 988 transaction - the transaction object used for rollback
985 989 link - the linkrev data to add
986 990 p1, p2 - the parent nodeids of the revision
987 991 d - an optional precomputed delta
988 992 """
989 993 dfh = None
990 994 if not self._inline:
991 995 dfh = self.opener(self.datafile, "a")
992 996 ifh = self.opener(self.indexfile, "a+")
993 997 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
994 998
995 999 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
996 1000 node = hash(text, p1, p2)
997 1001 if node in self.nodemap:
998 1002 return node
999 1003
1000 1004 curr = self.count()
1001 1005 prev = curr - 1
1002 1006 base = self.base(prev)
1003 1007 offset = self.end(prev)
1004 1008
1005 1009 if curr:
1006 1010 if not d:
1007 1011 ptext = self.revision(self.node(prev))
1008 1012 d = mdiff.textdiff(ptext, text)
1009 1013 data = compress(d)
1010 1014 l = len(data[1]) + len(data[0])
1011 1015 dist = l + offset - self.start(base)
1012 1016
1013 1017 # full versions are inserted when the needed deltas
1014 1018 # become comparable to the uncompressed text
1015 1019 if not curr or dist > len(text) * 2:
1016 1020 data = compress(text)
1017 1021 l = len(data[1]) + len(data[0])
1018 1022 base = curr
1019 1023
1020 1024 e = (offset_type(offset, 0), l, len(text),
1021 1025 base, link, self.rev(p1), self.rev(p2), node)
1022 1026 self.index.insert(-1, e)
1023 1027 self.nodemap[node] = curr
1024 1028
1025 1029 entry = self._io.packentry(e, self.node, self.version)
1026 1030 if not self._inline:
1027 1031 transaction.add(self.datafile, offset)
1028 1032 transaction.add(self.indexfile, curr * len(entry))
1029 1033 if data[0]:
1030 1034 dfh.write(data[0])
1031 1035 dfh.write(data[1])
1032 1036 dfh.flush()
1033 1037 ifh.write(entry)
1034 1038 else:
1035 1039 offset += curr * self._io.size
1036 1040 transaction.add(self.indexfile, offset, prev)
1037 1041 ifh.write(entry)
1038 1042 ifh.write(data[0])
1039 1043 ifh.write(data[1])
1040 1044 self.checkinlinesize(transaction, ifh)
1041 1045
1042 1046 self._cache = (node, curr, text)
1043 1047 return node
1044 1048
1045 1049 def ancestor(self, a, b):
1046 1050 """calculate the least common ancestor of nodes a and b"""
1047 1051
1048 1052 def parents(rev):
1049 1053 return [p for p in self.parentrevs(rev) if p != nullrev]
1050 1054
1051 1055 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1052 1056 if c is None:
1053 1057 return nullid
1054 1058
1055 1059 return self.node(c)
1056 1060
1057 1061 def group(self, nodelist, lookup, infocollect=None):
1058 1062 """calculate a delta group
1059 1063
1060 1064 Given a list of changeset revs, return a set of deltas and
1061 1065 metadata corresponding to nodes. the first delta is
1062 1066 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1063 1067 have this parent as it has all history before these
1064 1068 changesets. parent is parent[0]
1065 1069 """
1066 1070 revs = [self.rev(n) for n in nodelist]
1067 1071
1068 1072 # if we don't have any revisions touched by these changesets, bail
1069 1073 if not revs:
1070 1074 yield changegroup.closechunk()
1071 1075 return
1072 1076
1073 1077 # add the parent of the first rev
1074 1078 p = self.parents(self.node(revs[0]))[0]
1075 1079 revs.insert(0, self.rev(p))
1076 1080
1077 1081 # build deltas
1078 1082 for d in xrange(0, len(revs) - 1):
1079 1083 a, b = revs[d], revs[d + 1]
1080 1084 nb = self.node(b)
1081 1085
1082 1086 if infocollect is not None:
1083 1087 infocollect(nb)
1084 1088
1085 1089 d = self.revdiff(a, b)
1086 1090 p = self.parents(nb)
1087 1091 meta = nb + p[0] + p[1] + lookup(nb)
1088 1092 yield changegroup.genchunk("%s%s" % (meta, d))
1089 1093
1090 1094 yield changegroup.closechunk()
1091 1095
1092 1096 def addgroup(self, revs, linkmapper, transaction, unique=0):
1093 1097 """
1094 1098 add a delta group
1095 1099
1096 1100 given a set of deltas, add them to the revision log. the
1097 1101 first delta is against its parent, which should be in our
1098 1102 log, the rest are against the previous delta.
1099 1103 """
1100 1104
1101 1105 #track the base of the current delta log
1102 1106 r = self.count()
1103 1107 t = r - 1
1104 1108 node = None
1105 1109
1106 1110 base = prev = nullrev
1107 1111 start = end = textlen = 0
1108 1112 if r:
1109 1113 end = self.end(t)
1110 1114
1111 1115 ifh = self.opener(self.indexfile, "a+")
1112 1116 isize = r * self._io.size
1113 1117 if self._inline:
1114 1118 transaction.add(self.indexfile, end + isize, r)
1115 1119 dfh = None
1116 1120 else:
1117 1121 transaction.add(self.indexfile, isize, r)
1118 1122 transaction.add(self.datafile, end)
1119 1123 dfh = self.opener(self.datafile, "a")
1120 1124
1121 1125 # loop through our set of deltas
1122 1126 chain = None
1123 1127 for chunk in revs:
1124 1128 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1125 1129 link = linkmapper(cs)
1126 1130 if node in self.nodemap:
1127 1131 # this can happen if two branches make the same change
1128 1132 # if unique:
1129 1133 # raise RevlogError(_("already have %s") % hex(node[:4]))
1130 1134 chain = node
1131 1135 continue
1132 1136 delta = chunk[80:]
1133 1137
1134 1138 for p in (p1, p2):
1135 1139 if not p in self.nodemap:
1136 1140 raise LookupError(_("unknown parent %s") % short(p))
1137 1141
1138 1142 if not chain:
1139 1143 # retrieve the parent revision of the delta chain
1140 1144 chain = p1
1141 1145 if not chain in self.nodemap:
1142 1146 raise LookupError(_("unknown base %s") % short(chain[:4]))
1143 1147
1144 1148 # full versions are inserted when the needed deltas become
1145 1149 # comparable to the uncompressed text or when the previous
1146 1150 # version is not the one we have a delta against. We use
1147 1151 # the size of the previous full rev as a proxy for the
1148 1152 # current size.
1149 1153
1150 1154 if chain == prev:
1151 1155 tempd = compress(delta)
1152 1156 cdelta = tempd[0] + tempd[1]
1153 1157 textlen = mdiff.patchedsize(textlen, delta)
1154 1158
1155 1159 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
1156 1160 # flush our writes here so we can read it in revision
1157 1161 if dfh:
1158 1162 dfh.flush()
1159 1163 ifh.flush()
1160 1164 text = self.revision(chain)
1161 1165 text = mdiff.patches(text, [delta])
1162 1166 chk = self._addrevision(text, transaction, link, p1, p2, None,
1163 1167 ifh, dfh)
1164 1168 if not dfh and not self._inline:
1165 1169 # addrevision switched from inline to conventional
1166 1170 # reopen the index
1167 1171 dfh = self.opener(self.datafile, "a")
1168 1172 ifh = self.opener(self.indexfile, "a")
1169 1173 if chk != node:
1170 1174 raise RevlogError(_("consistency error adding group"))
1171 1175 textlen = len(text)
1172 1176 else:
1173 1177 e = (offset_type(end, 0), len(cdelta), textlen, base,
1174 1178 link, self.rev(p1), self.rev(p2), node)
1175 1179 self.index.insert(-1, e)
1176 1180 self.nodemap[node] = r
1177 1181 entry = self._io.packentry(e, self.node, self.version)
1178 1182 if self._inline:
1179 1183 ifh.write(entry)
1180 1184 ifh.write(cdelta)
1181 1185 self.checkinlinesize(transaction, ifh)
1182 1186 if not self._inline:
1183 1187 dfh = self.opener(self.datafile, "a")
1184 1188 ifh = self.opener(self.indexfile, "a")
1185 1189 else:
1186 1190 dfh.write(cdelta)
1187 1191 ifh.write(entry)
1188 1192
1189 1193 t, r, chain, prev = r, r + 1, node, node
1190 1194 base = self.base(t)
1191 1195 start = self.start(base)
1192 1196 end = self.end(t)
1193 1197
1194 1198 return node
1195 1199
1196 1200 def strip(self, rev, minlink):
1197 1201 if self.count() == 0 or rev >= self.count():
1198 1202 return
1199 1203
1200 1204 if isinstance(self.index, lazyindex):
1201 1205 self._loadindexmap()
1202 1206
1203 1207 # When stripping away a revision, we need to make sure it
1204 1208 # does not actually belong to an older changeset.
1205 1209 # The minlink parameter defines the oldest revision
1206 1210 # we're allowed to strip away.
1207 1211 while minlink > self.index[rev][4]:
1208 1212 rev += 1
1209 1213 if rev >= self.count():
1210 1214 return
1211 1215
1212 1216 # first truncate the files on disk
1213 1217 end = self.start(rev)
1214 1218 if not self._inline:
1215 1219 df = self.opener(self.datafile, "a")
1216 1220 df.truncate(end)
1217 1221 end = rev * self._io.size
1218 1222 else:
1219 1223 end += rev * self._io.size
1220 1224
1221 1225 indexf = self.opener(self.indexfile, "a")
1222 1226 indexf.truncate(end)
1223 1227
1224 1228 # then reset internal state in memory to forget those revisions
1225 1229 self._cache = None
1226 1230 self._chunkcache = None
1227 1231 for x in xrange(rev, self.count()):
1228 1232 del self.nodemap[self.node(x)]
1229 1233
1230 1234 del self.index[rev:-1]
1231 1235
1232 1236 def checksize(self):
1233 1237 expected = 0
1234 1238 if self.count():
1235 1239 expected = self.end(self.count() - 1)
1236 1240
1237 1241 try:
1238 1242 f = self.opener(self.datafile)
1239 1243 f.seek(0, 2)
1240 1244 actual = f.tell()
1241 1245 dd = actual - expected
1242 1246 except IOError, inst:
1243 1247 if inst.errno != errno.ENOENT:
1244 1248 raise
1245 1249 dd = 0
1246 1250
1247 1251 try:
1248 1252 f = self.opener(self.indexfile)
1249 1253 f.seek(0, 2)
1250 1254 actual = f.tell()
1251 1255 s = self._io.size
1252 1256 i = actual / s
1253 1257 di = actual - (i * s)
1254 1258 if self._inline:
1255 1259 databytes = 0
1256 1260 for r in xrange(self.count()):
1257 1261 databytes += self.length(r)
1258 1262 dd = 0
1259 1263 di = actual - self.count() * s - databytes
1260 1264 except IOError, inst:
1261 1265 if inst.errno != errno.ENOENT:
1262 1266 raise
1263 1267 di = 0
1264 1268
1265 1269 return (dd, di)
General Comments 0
You need to be logged in to leave comments. Login now