##// END OF EJS Templates
revlog: move flag checking out of the offset fastpath
Matt Mackall -
r5004:825516d1 default
parent child Browse files
Show More
@@ -1,1262 +1,1264 b''
1 1 """
2 2 revlog.py - storage back-end for mercurial
3 3
4 4 This provides efficient delta storage with O(1) retrieve and append
5 5 and O(changes) merge between branches
6 6
7 7 Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
8 8
9 9 This software may be used and distributed according to the terms
10 10 of the GNU General Public License, incorporated herein by reference.
11 11 """
12 12
13 13 from node import *
14 14 from i18n import _
15 15 import binascii, changegroup, errno, ancestor, mdiff, os
16 16 import sha, struct, util, zlib
17 17
18 18 # revlog flags
19 19 REVLOGV0 = 0
20 20 REVLOGNG = 1
21 21 REVLOGNGINLINEDATA = (1 << 16)
22 22 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
23 23 REVLOG_DEFAULT_FORMAT = REVLOGNG
24 24 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
25 25
26 26 class RevlogError(Exception):
27 27 pass
28 28 class LookupError(RevlogError):
29 29 pass
30 30
31 31 def getoffset(q):
32 if q & 0xFFFF:
33 raise RevlogError(_('incompatible revision flag %x') % q)
34 32 return int(q >> 16)
35 33
36 34 def gettype(q):
37 35 return int(q & 0xFFFF)
38 36
39 37 def offset_type(offset, type):
40 38 return long(long(offset) << 16 | type)
41 39
42 40 def hash(text, p1, p2):
43 41 """generate a hash from the given text and its parent hashes
44 42
45 43 This hash combines both the current file contents and its history
46 44 in a manner that makes it easy to distinguish nodes with the same
47 45 content in the revision graph.
48 46 """
49 47 l = [p1, p2]
50 48 l.sort()
51 49 s = sha.new(l[0])
52 50 s.update(l[1])
53 51 s.update(text)
54 52 return s.digest()
55 53
56 54 def compress(text):
57 55 """ generate a possibly-compressed representation of text """
58 56 if not text:
59 57 return ("", text)
60 58 if len(text) < 44:
61 59 if text[0] == '\0':
62 60 return ("", text)
63 61 return ('u', text)
64 62 bin = zlib.compress(text)
65 63 if len(bin) > len(text):
66 64 if text[0] == '\0':
67 65 return ("", text)
68 66 return ('u', text)
69 67 return ("", bin)
70 68
71 69 def decompress(bin):
72 70 """ decompress the given input """
73 71 if not bin:
74 72 return bin
75 73 t = bin[0]
76 74 if t == '\0':
77 75 return bin
78 76 if t == 'x':
79 77 return zlib.decompress(bin)
80 78 if t == 'u':
81 79 return bin[1:]
82 80 raise RevlogError(_("unknown compression type %r") % t)
83 81
84 82 class lazyparser(object):
85 83 """
86 84 this class avoids the need to parse the entirety of large indices
87 85 """
88 86
89 87 # lazyparser is not safe to use on windows if win32 extensions not
90 88 # available. it keeps file handle open, which make it not possible
91 89 # to break hardlinks on local cloned repos.
92 90 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
93 91 hasattr(util, 'win32api'))
94 92
95 93 def __init__(self, dataf, size):
96 94 self.dataf = dataf
97 95 self.s = struct.calcsize(indexformatng)
98 96 self.datasize = size
99 97 self.l = size/self.s
100 98 self.index = [None] * self.l
101 99 self.map = {nullid: nullrev}
102 100 self.allmap = 0
103 101 self.all = 0
104 102 self.mapfind_count = 0
105 103
106 104 def loadmap(self):
107 105 """
108 106 during a commit, we need to make sure the rev being added is
109 107 not a duplicate. This requires loading the entire index,
110 108 which is fairly slow. loadmap can load up just the node map,
111 109 which takes much less time.
112 110 """
113 111 if self.allmap:
114 112 return
115 113 end = self.datasize
116 114 self.allmap = 1
117 115 cur = 0
118 116 count = 0
119 117 blocksize = self.s * 256
120 118 self.dataf.seek(0)
121 119 while cur < end:
122 120 data = self.dataf.read(blocksize)
123 121 off = 0
124 122 for x in xrange(256):
125 123 n = data[off + ngshaoffset:off + ngshaoffset + 20]
126 124 self.map[n] = count
127 125 count += 1
128 126 if count >= self.l:
129 127 break
130 128 off += self.s
131 129 cur += blocksize
132 130
133 131 def loadblock(self, blockstart, blocksize, data=None):
134 132 if self.all:
135 133 return
136 134 if data is None:
137 135 self.dataf.seek(blockstart)
138 136 if blockstart + blocksize > self.datasize:
139 137 # the revlog may have grown since we've started running,
140 138 # but we don't have space in self.index for more entries.
141 139 # limit blocksize so that we don't get too much data.
142 140 blocksize = max(self.datasize - blockstart, 0)
143 141 data = self.dataf.read(blocksize)
144 142 lend = len(data) / self.s
145 143 i = blockstart / self.s
146 144 off = 0
147 145 # lazyindex supports __delitem__
148 146 if lend > len(self.index) - i:
149 147 lend = len(self.index) - i
150 148 for x in xrange(lend):
151 149 if self.index[i + x] == None:
152 150 b = data[off : off + self.s]
153 151 self.index[i + x] = b
154 152 n = b[ngshaoffset:ngshaoffset + 20]
155 153 self.map[n] = i + x
156 154 off += self.s
157 155
158 156 def findnode(self, node):
159 157 """search backwards through the index file for a specific node"""
160 158 if self.allmap:
161 159 return None
162 160
163 161 # hg log will cause many many searches for the manifest
164 162 # nodes. After we get called a few times, just load the whole
165 163 # thing.
166 164 if self.mapfind_count > 8:
167 165 self.loadmap()
168 166 if node in self.map:
169 167 return node
170 168 return None
171 169 self.mapfind_count += 1
172 170 last = self.l - 1
173 171 while self.index[last] != None:
174 172 if last == 0:
175 173 self.all = 1
176 174 self.allmap = 1
177 175 return None
178 176 last -= 1
179 177 end = (last + 1) * self.s
180 178 blocksize = self.s * 256
181 179 while end >= 0:
182 180 start = max(end - blocksize, 0)
183 181 self.dataf.seek(start)
184 182 data = self.dataf.read(end - start)
185 183 findend = end - start
186 184 while True:
187 185 # we're searching backwards, so we have to make sure
188 186 # we don't find a changeset where this node is a parent
189 187 off = data.find(node, 0, findend)
190 188 findend = off
191 189 if off >= 0:
192 190 i = off / self.s
193 191 off = i * self.s
194 192 n = data[off + ngshaoffset:off + ngshaoffset + 20]
195 193 if n == node:
196 194 self.map[n] = i + start / self.s
197 195 return node
198 196 else:
199 197 break
200 198 end -= blocksize
201 199 return None
202 200
203 201 def loadindex(self, i=None, end=None):
204 202 if self.all:
205 203 return
206 204 all = False
207 205 if i == None:
208 206 blockstart = 0
209 207 blocksize = (65536 / self.s) * self.s
210 208 end = self.datasize
211 209 all = True
212 210 else:
213 211 if end:
214 212 blockstart = i * self.s
215 213 end = end * self.s
216 214 blocksize = end - blockstart
217 215 else:
218 216 blockstart = (i & ~1023) * self.s
219 217 blocksize = self.s * 1024
220 218 end = blockstart + blocksize
221 219 while blockstart < end:
222 220 self.loadblock(blockstart, blocksize)
223 221 blockstart += blocksize
224 222 if all:
225 223 self.all = True
226 224
227 225 class lazyindex(object):
228 226 """a lazy version of the index array"""
229 227 def __init__(self, parser):
230 228 self.p = parser
231 229 def __len__(self):
232 230 return len(self.p.index)
233 231 def load(self, pos):
234 232 if pos < 0:
235 233 pos += len(self.p.index)
236 234 self.p.loadindex(pos)
237 235 return self.p.index[pos]
238 236 def __getitem__(self, pos):
239 237 return struct.unpack(indexformatng,
240 238 self.p.index[pos] or self.load(pos))
241 239 def __setitem__(self, pos, item):
242 240 self.p.index[pos] = struct.pack(indexformatng, *item)
243 241 def __delitem__(self, pos):
244 242 del self.p.index[pos]
245 243 def insert(self, pos, e):
246 244 self.p.index.insert(pos, struct.pack(indexformatng, *e))
247 245 def append(self, e):
248 246 self.p.index.append(struct.pack(indexformatng, *e))
249 247
250 248 class lazymap(object):
251 249 """a lazy version of the node map"""
252 250 def __init__(self, parser):
253 251 self.p = parser
254 252 def load(self, key):
255 253 n = self.p.findnode(key)
256 254 if n == None:
257 255 raise KeyError(key)
258 256 def __contains__(self, key):
259 257 if key in self.p.map:
260 258 return True
261 259 self.p.loadmap()
262 260 return key in self.p.map
263 261 def __iter__(self):
264 262 yield nullid
265 263 for i in xrange(self.p.l):
266 264 ret = self.p.index[i]
267 265 if not ret:
268 266 self.p.loadindex(i)
269 267 ret = self.p.index[i]
270 268 if isinstance(ret, str):
271 269 ret = struct.unpack(indexformatng, ret)
272 270 yield ret[7]
273 271 def __getitem__(self, key):
274 272 try:
275 273 return self.p.map[key]
276 274 except KeyError:
277 275 try:
278 276 self.load(key)
279 277 return self.p.map[key]
280 278 except KeyError:
281 279 raise KeyError("node " + hex(key))
282 280 def __setitem__(self, key, val):
283 281 self.p.map[key] = val
284 282 def __delitem__(self, key):
285 283 del self.p.map[key]
286 284
287 285 indexformatv0 = ">4l20s20s20s"
288 286 v0shaoffset = 56
289 287
290 288 class revlogoldio(object):
291 289 def __init__(self):
292 290 self.size = struct.calcsize(indexformatv0)
293 291
294 292 def parseindex(self, fp, inline):
295 293 s = self.size
296 294 index = []
297 295 nodemap = {nullid: nullrev}
298 296 n = off = 0
299 297 data = fp.read()
300 298 l = len(data)
301 299 while off + s <= l:
302 300 cur = data[off:off + s]
303 301 off += s
304 302 e = struct.unpack(indexformatv0, cur)
305 303 # transform to revlogv1 format
306 304 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
307 305 nodemap[e[4]], nodemap[e[5]], e[6])
308 306 index.append(e2)
309 307 nodemap[e[6]] = n
310 308 n += 1
311 309
312 310 return index, nodemap, None
313 311
314 312 def packentry(self, entry, node, version):
315 313 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
316 314 node(entry[5]), node(entry[6]), entry[7])
317 315 return struct.pack(indexformatv0, *e2)
318 316
319 317 # index ng:
320 318 # 6 bytes offset
321 319 # 2 bytes flags
322 320 # 4 bytes compressed length
323 321 # 4 bytes uncompressed length
324 322 # 4 bytes: base rev
325 323 # 4 bytes link rev
326 324 # 4 bytes parent 1 rev
327 325 # 4 bytes parent 2 rev
328 326 # 32 bytes: nodeid
329 327 indexformatng = ">Qiiiiii20s12x"
330 328 ngshaoffset = 32
331 329 versionformat = ">I"
332 330
333 331 class revlogio(object):
334 332 def __init__(self):
335 333 self.size = struct.calcsize(indexformatng)
336 334
337 335 def parseindex(self, fp, inline):
338 336 try:
339 337 size = util.fstat(fp).st_size
340 338 except AttributeError:
341 339 size = 0
342 340
343 341 if lazyparser.safe_to_use and not inline and size > 1000000:
344 342 # big index, let's parse it on demand
345 343 parser = lazyparser(fp, size)
346 344 index = lazyindex(parser)
347 345 nodemap = lazymap(parser)
348 346 e = list(index[0])
349 347 type = gettype(e[0])
350 348 e[0] = offset_type(0, type)
351 349 index[0] = e
352 350 return index, nodemap, None
353 351
354 352 s = self.size
355 353 cache = None
356 354 index = []
357 355 nodemap = {nullid: nullrev}
358 356 n = off = 0
359 357 # if we're not using lazymap, always read the whole index
360 358 data = fp.read()
361 359 l = len(data) - s
362 360 unpack = struct.unpack
363 361 append = index.append
364 362 if inline:
365 363 cache = (0, data)
366 364 while off <= l:
367 365 e = unpack(indexformatng, data[off:off + s])
368 366 nodemap[e[7]] = n
369 367 append(e)
370 368 n += 1
371 369 if e[1] < 0:
372 370 break
373 371 off += e[1] + s
374 372 else:
375 373 while off <= l:
376 374 e = unpack(indexformatng, data[off:off + s])
377 375 nodemap[e[7]] = n
378 376 append(e)
379 377 n += 1
380 378 off += s
381 379
382 380 e = list(index[0])
383 381 type = gettype(e[0])
384 382 e[0] = offset_type(0, type)
385 383 index[0] = e
386 384
387 385 return index, nodemap, cache
388 386
389 387 def packentry(self, entry, node, version):
390 388 p = struct.pack(indexformatng, *entry)
391 389 if not entry[3] and not getoffset(entry[0]) and entry[5] == nullrev:
392 390 p = struct.pack(versionformat, version) + p[4:]
393 391 return p
394 392
395 393 class revlog(object):
396 394 """
397 395 the underlying revision storage object
398 396
399 397 A revlog consists of two parts, an index and the revision data.
400 398
401 399 The index is a file with a fixed record size containing
402 400 information on each revision, includings its nodeid (hash), the
403 401 nodeids of its parents, the position and offset of its data within
404 402 the data file, and the revision it's based on. Finally, each entry
405 403 contains a linkrev entry that can serve as a pointer to external
406 404 data.
407 405
408 406 The revision data itself is a linear collection of data chunks.
409 407 Each chunk represents a revision and is usually represented as a
410 408 delta against the previous chunk. To bound lookup time, runs of
411 409 deltas are limited to about 2 times the length of the original
412 410 version data. This makes retrieval of a version proportional to
413 411 its size, or O(1) relative to the number of revisions.
414 412
415 413 Both pieces of the revlog are written to in an append-only
416 414 fashion, which means we never need to rewrite a file to insert or
417 415 remove data, and can use some simple techniques to avoid the need
418 416 for locking while reading.
419 417 """
420 418 def __init__(self, opener, indexfile):
421 419 """
422 420 create a revlog object
423 421
424 422 opener is a function that abstracts the file opening operation
425 423 and can be used to implement COW semantics or the like.
426 424 """
427 425 self.indexfile = indexfile
428 426 self.datafile = indexfile[:-2] + ".d"
429 427 self.opener = opener
430 428 self._cache = None
431 429 self._chunkcache = None
432 430 self.nodemap = {nullid: nullrev}
433 431 self.index = []
434 432
435 433 v = REVLOG_DEFAULT_VERSION
436 434 if hasattr(opener, "defversion"):
437 435 v = opener.defversion
438 436 if v & REVLOGNG:
439 437 v |= REVLOGNGINLINEDATA
440 438
441 439 i = ""
442 440 try:
443 441 f = self.opener(self.indexfile)
444 442 i = f.read(4)
445 443 f.seek(0)
446 444 if len(i) > 0:
447 445 v = struct.unpack(versionformat, i)[0]
448 446 except IOError, inst:
449 447 if inst.errno != errno.ENOENT:
450 448 raise
451 449
452 450 self.version = v
453 451 self._inline = v & REVLOGNGINLINEDATA
454 452 flags = v & ~0xFFFF
455 453 fmt = v & 0xFFFF
456 454 if fmt == REVLOGV0 and flags:
457 455 raise RevlogError(_("index %s unknown flags %#04x for format v0")
458 456 % (self.indexfile, flags >> 16))
459 457 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
460 458 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
461 459 % (self.indexfile, flags >> 16))
462 460 elif fmt > REVLOGNG:
463 461 raise RevlogError(_("index %s unknown format %d")
464 462 % (self.indexfile, fmt))
465 463
466 464 self._io = revlogio()
467 465 if self.version == REVLOGV0:
468 466 self._io = revlogoldio()
469 467 if i:
470 468 d = self._io.parseindex(f, self._inline)
471 469 self.index, self.nodemap, self._chunkcache = d
472 470
473 471 # add the magic null revision at -1
474 472 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
475 473
476 474 def _loadindex(self, start, end):
477 475 """load a block of indexes all at once from the lazy parser"""
478 476 if isinstance(self.index, lazyindex):
479 477 self.index.p.loadindex(start, end)
480 478
481 479 def _loadindexmap(self):
482 480 """loads both the map and the index from the lazy parser"""
483 481 if isinstance(self.index, lazyindex):
484 482 p = self.index.p
485 483 p.loadindex()
486 484 self.nodemap = p.map
487 485
488 486 def _loadmap(self):
489 487 """loads the map from the lazy parser"""
490 488 if isinstance(self.nodemap, lazymap):
491 489 self.nodemap.p.loadmap()
492 490 self.nodemap = self.nodemap.p.map
493 491
494 492 def tip(self):
495 493 return self.node(len(self.index) - 2)
496 494 def count(self):
497 495 return len(self.index) - 1
498 496
499 497 def rev(self, node):
500 498 try:
501 499 return self.nodemap[node]
502 500 except KeyError:
503 501 raise LookupError(_('%s: no node %s') % (self.indexfile, hex(node)))
504 502 def node(self, rev):
505 503 return self.index[rev][7]
506 504 def linkrev(self, node):
507 505 return self.index[self.rev(node)][4]
508 506 def parents(self, node):
509 507 d = self.index[self.rev(node)][5:7]
510 508 return (self.node(d[0]), self.node(d[1]))
511 509 def parentrevs(self, rev):
512 510 return self.index[rev][5:7]
513 511 def start(self, rev):
514 512 return getoffset(self.index[rev][0])
515 513 def end(self, rev):
516 514 return self.start(rev) + self.length(rev)
517 515 def length(self, rev):
518 516 return self.index[rev][1]
519 517 def base(self, rev):
520 518 return self.index[rev][3]
521 519
522 520 def size(self, rev):
523 521 """return the length of the uncompressed text for a given revision"""
524 522 l = self.index[rev][2]
525 523 if l >= 0:
526 524 return l
527 525
528 526 t = self.revision(self.node(rev))
529 527 return len(t)
530 528
531 529 # alternate implementation, The advantage to this code is it
532 530 # will be faster for a single revision. But, the results are not
533 531 # cached, so finding the size of every revision will be slower.
534 532 """
535 533 if self.cache and self.cache[1] == rev:
536 534 return len(self.cache[2])
537 535
538 536 base = self.base(rev)
539 537 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
540 538 base = self.cache[1]
541 539 text = self.cache[2]
542 540 else:
543 541 text = self.revision(self.node(base))
544 542
545 543 l = len(text)
546 544 for x in xrange(base + 1, rev + 1):
547 545 l = mdiff.patchedsize(l, self.chunk(x))
548 546 return l
549 547 """
550 548
551 549 def reachable(self, node, stop=None):
552 550 """return a hash of all nodes ancestral to a given node, including
553 551 the node itself, stopping when stop is matched"""
554 552 reachable = {}
555 553 visit = [node]
556 554 reachable[node] = 1
557 555 if stop:
558 556 stopn = self.rev(stop)
559 557 else:
560 558 stopn = 0
561 559 while visit:
562 560 n = visit.pop(0)
563 561 if n == stop:
564 562 continue
565 563 if n == nullid:
566 564 continue
567 565 for p in self.parents(n):
568 566 if self.rev(p) < stopn:
569 567 continue
570 568 if p not in reachable:
571 569 reachable[p] = 1
572 570 visit.append(p)
573 571 return reachable
574 572
575 573 def nodesbetween(self, roots=None, heads=None):
576 574 """Return a tuple containing three elements. Elements 1 and 2 contain
577 575 a final list bases and heads after all the unreachable ones have been
578 576 pruned. Element 0 contains a topologically sorted list of all
579 577
580 578 nodes that satisfy these constraints:
581 579 1. All nodes must be descended from a node in roots (the nodes on
582 580 roots are considered descended from themselves).
583 581 2. All nodes must also be ancestors of a node in heads (the nodes in
584 582 heads are considered to be their own ancestors).
585 583
586 584 If roots is unspecified, nullid is assumed as the only root.
587 585 If heads is unspecified, it is taken to be the output of the
588 586 heads method (i.e. a list of all nodes in the repository that
589 587 have no children)."""
590 588 nonodes = ([], [], [])
591 589 if roots is not None:
592 590 roots = list(roots)
593 591 if not roots:
594 592 return nonodes
595 593 lowestrev = min([self.rev(n) for n in roots])
596 594 else:
597 595 roots = [nullid] # Everybody's a descendent of nullid
598 596 lowestrev = nullrev
599 597 if (lowestrev == nullrev) and (heads is None):
600 598 # We want _all_ the nodes!
601 599 return ([self.node(r) for r in xrange(0, self.count())],
602 600 [nullid], list(self.heads()))
603 601 if heads is None:
604 602 # All nodes are ancestors, so the latest ancestor is the last
605 603 # node.
606 604 highestrev = self.count() - 1
607 605 # Set ancestors to None to signal that every node is an ancestor.
608 606 ancestors = None
609 607 # Set heads to an empty dictionary for later discovery of heads
610 608 heads = {}
611 609 else:
612 610 heads = list(heads)
613 611 if not heads:
614 612 return nonodes
615 613 ancestors = {}
616 614 # Turn heads into a dictionary so we can remove 'fake' heads.
617 615 # Also, later we will be using it to filter out the heads we can't
618 616 # find from roots.
619 617 heads = dict.fromkeys(heads, 0)
620 618 # Start at the top and keep marking parents until we're done.
621 619 nodestotag = heads.keys()
622 620 # Remember where the top was so we can use it as a limit later.
623 621 highestrev = max([self.rev(n) for n in nodestotag])
624 622 while nodestotag:
625 623 # grab a node to tag
626 624 n = nodestotag.pop()
627 625 # Never tag nullid
628 626 if n == nullid:
629 627 continue
630 628 # A node's revision number represents its place in a
631 629 # topologically sorted list of nodes.
632 630 r = self.rev(n)
633 631 if r >= lowestrev:
634 632 if n not in ancestors:
635 633 # If we are possibly a descendent of one of the roots
636 634 # and we haven't already been marked as an ancestor
637 635 ancestors[n] = 1 # Mark as ancestor
638 636 # Add non-nullid parents to list of nodes to tag.
639 637 nodestotag.extend([p for p in self.parents(n) if
640 638 p != nullid])
641 639 elif n in heads: # We've seen it before, is it a fake head?
642 640 # So it is, real heads should not be the ancestors of
643 641 # any other heads.
644 642 heads.pop(n)
645 643 if not ancestors:
646 644 return nonodes
647 645 # Now that we have our set of ancestors, we want to remove any
648 646 # roots that are not ancestors.
649 647
650 648 # If one of the roots was nullid, everything is included anyway.
651 649 if lowestrev > nullrev:
652 650 # But, since we weren't, let's recompute the lowest rev to not
653 651 # include roots that aren't ancestors.
654 652
655 653 # Filter out roots that aren't ancestors of heads
656 654 roots = [n for n in roots if n in ancestors]
657 655 # Recompute the lowest revision
658 656 if roots:
659 657 lowestrev = min([self.rev(n) for n in roots])
660 658 else:
661 659 # No more roots? Return empty list
662 660 return nonodes
663 661 else:
664 662 # We are descending from nullid, and don't need to care about
665 663 # any other roots.
666 664 lowestrev = nullrev
667 665 roots = [nullid]
668 666 # Transform our roots list into a 'set' (i.e. a dictionary where the
669 667 # values don't matter.
670 668 descendents = dict.fromkeys(roots, 1)
671 669 # Also, keep the original roots so we can filter out roots that aren't
672 670 # 'real' roots (i.e. are descended from other roots).
673 671 roots = descendents.copy()
674 672 # Our topologically sorted list of output nodes.
675 673 orderedout = []
676 674 # Don't start at nullid since we don't want nullid in our output list,
677 675 # and if nullid shows up in descedents, empty parents will look like
678 676 # they're descendents.
679 677 for r in xrange(max(lowestrev, 0), highestrev + 1):
680 678 n = self.node(r)
681 679 isdescendent = False
682 680 if lowestrev == nullrev: # Everybody is a descendent of nullid
683 681 isdescendent = True
684 682 elif n in descendents:
685 683 # n is already a descendent
686 684 isdescendent = True
687 685 # This check only needs to be done here because all the roots
688 686 # will start being marked is descendents before the loop.
689 687 if n in roots:
690 688 # If n was a root, check if it's a 'real' root.
691 689 p = tuple(self.parents(n))
692 690 # If any of its parents are descendents, it's not a root.
693 691 if (p[0] in descendents) or (p[1] in descendents):
694 692 roots.pop(n)
695 693 else:
696 694 p = tuple(self.parents(n))
697 695 # A node is a descendent if either of its parents are
698 696 # descendents. (We seeded the dependents list with the roots
699 697 # up there, remember?)
700 698 if (p[0] in descendents) or (p[1] in descendents):
701 699 descendents[n] = 1
702 700 isdescendent = True
703 701 if isdescendent and ((ancestors is None) or (n in ancestors)):
704 702 # Only include nodes that are both descendents and ancestors.
705 703 orderedout.append(n)
706 704 if (ancestors is not None) and (n in heads):
707 705 # We're trying to figure out which heads are reachable
708 706 # from roots.
709 707 # Mark this head as having been reached
710 708 heads[n] = 1
711 709 elif ancestors is None:
712 710 # Otherwise, we're trying to discover the heads.
713 711 # Assume this is a head because if it isn't, the next step
714 712 # will eventually remove it.
715 713 heads[n] = 1
716 714 # But, obviously its parents aren't.
717 715 for p in self.parents(n):
718 716 heads.pop(p, None)
719 717 heads = [n for n in heads.iterkeys() if heads[n] != 0]
720 718 roots = roots.keys()
721 719 assert orderedout
722 720 assert roots
723 721 assert heads
724 722 return (orderedout, roots, heads)
725 723
726 724 def heads(self, start=None, stop=None):
727 725 """return the list of all nodes that have no children
728 726
729 727 if start is specified, only heads that are descendants of
730 728 start will be returned
731 729 if stop is specified, it will consider all the revs from stop
732 730 as if they had no children
733 731 """
734 732 if start is None and stop is None:
735 733 count = self.count()
736 734 if not count:
737 735 return [nullid]
738 736 ishead = [1] * (count + 1)
739 737 index = self.index
740 738 for r in xrange(count):
741 739 e = index[r]
742 740 ishead[e[5]] = ishead[e[6]] = 0
743 741 return [self.node(r) for r in xrange(count) if ishead[r]]
744 742
745 743 if start is None:
746 744 start = nullid
747 745 if stop is None:
748 746 stop = []
749 747 stoprevs = dict.fromkeys([self.rev(n) for n in stop])
750 748 startrev = self.rev(start)
751 749 reachable = {startrev: 1}
752 750 heads = {startrev: 1}
753 751
754 752 parentrevs = self.parentrevs
755 753 for r in xrange(startrev + 1, self.count()):
756 754 for p in parentrevs(r):
757 755 if p in reachable:
758 756 if r not in stoprevs:
759 757 reachable[r] = 1
760 758 heads[r] = 1
761 759 if p in heads and p not in stoprevs:
762 760 del heads[p]
763 761
764 762 return [self.node(r) for r in heads]
765 763
766 764 def children(self, node):
767 765 """find the children of a given node"""
768 766 c = []
769 767 p = self.rev(node)
770 768 for r in range(p + 1, self.count()):
771 769 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
772 770 if prevs:
773 771 for pr in prevs:
774 772 if pr == p:
775 773 c.append(self.node(r))
776 774 elif p == nullrev:
777 775 c.append(self.node(r))
778 776 return c
779 777
780 778 def _match(self, id):
781 779 if isinstance(id, (long, int)):
782 780 # rev
783 781 return self.node(id)
784 782 if len(id) == 20:
785 783 # possibly a binary node
786 784 # odds of a binary node being all hex in ASCII are 1 in 10**25
787 785 try:
788 786 node = id
789 787 r = self.rev(node) # quick search the index
790 788 return node
791 789 except LookupError:
792 790 pass # may be partial hex id
793 791 try:
794 792 # str(rev)
795 793 rev = int(id)
796 794 if str(rev) != id:
797 795 raise ValueError
798 796 if rev < 0:
799 797 rev = self.count() + rev
800 798 if rev < 0 or rev >= self.count():
801 799 raise ValueError
802 800 return self.node(rev)
803 801 except (ValueError, OverflowError):
804 802 pass
805 803 if len(id) == 40:
806 804 try:
807 805 # a full hex nodeid?
808 806 node = bin(id)
809 807 r = self.rev(node)
810 808 return node
811 809 except TypeError:
812 810 pass
813 811
814 812 def _partialmatch(self, id):
815 813 if len(id) < 40:
816 814 try:
817 815 # hex(node)[:...]
818 816 bin_id = bin(id[:len(id) & ~1]) # grab an even number of digits
819 817 node = None
820 818 for n in self.nodemap:
821 819 if n.startswith(bin_id) and hex(n).startswith(id):
822 820 if node is not None:
823 821 raise LookupError(_("Ambiguous identifier"))
824 822 node = n
825 823 if node is not None:
826 824 return node
827 825 except TypeError:
828 826 pass
829 827
830 828 def lookup(self, id):
831 829 """locate a node based on:
832 830 - revision number or str(revision number)
833 831 - nodeid or subset of hex nodeid
834 832 """
835 833 n = self._match(id)
836 834 if n is not None:
837 835 return n
838 836 n = self._partialmatch(id)
839 837 if n:
840 838 return n
841 839
842 840 raise LookupError(_("No match found"))
843 841
844 842 def cmp(self, node, text):
845 843 """compare text with a given file revision"""
846 844 p1, p2 = self.parents(node)
847 845 return hash(text, p1, p2) != node
848 846
849 847 def chunk(self, rev, df=None):
850 848 start, length = self.start(rev), self.length(rev)
851 849 if self._inline:
852 850 start += (rev + 1) * self._io.size
853 851 end = start + length
854 852 def loadcache(df):
855 853 cache_length = max(65536, length)
856 854 if not df:
857 855 if self._inline:
858 856 df = self.opener(self.indexfile)
859 857 else:
860 858 df = self.opener(self.datafile)
861 859 df.seek(start)
862 860 self._chunkcache = (start, df.read(cache_length))
863 861
864 862 if not self._chunkcache:
865 863 loadcache(df)
866 864
867 865 cache_start = self._chunkcache[0]
868 866 cache_end = cache_start + len(self._chunkcache[1])
869 867 if start >= cache_start and end <= cache_end:
870 868 # it is cached
871 869 offset = start - cache_start
872 870 else:
873 871 loadcache(df)
874 872 offset = 0
875 873
876 874 # avoid copying large chunks
877 875 c = self._chunkcache[1]
878 876 if len(c) > length:
879 877 c = c[offset:offset + length]
880 878
881 879 return decompress(c)
882 880
883 881 def delta(self, node):
884 882 """return or calculate a delta between a node and its predecessor"""
885 883 r = self.rev(node)
886 884 return self.revdiff(r - 1, r)
887 885
888 886 def revdiff(self, rev1, rev2):
889 887 """return or calculate a delta between two revisions"""
890 888 b1 = self.base(rev1)
891 889 b2 = self.base(rev2)
892 890 if b1 == b2 and rev1 + 1 == rev2:
893 891 return self.chunk(rev2)
894 892 else:
895 893 return mdiff.textdiff(self.revision(self.node(rev1)),
896 894 self.revision(self.node(rev2)))
897 895
898 896 def revision(self, node):
899 897 """return an uncompressed revision of a given"""
900 898 if node == nullid:
901 899 return ""
902 900 if self._cache and self._cache[0] == node:
903 901 return self._cache[2]
904 902
905 903 # look up what we need to read
906 904 text = None
907 905 rev = self.rev(node)
908 906 base = self.base(rev)
909 907
908 # check rev flags
909 if self.index[rev][0] & 0xFFFF:
910 raise RevlogError(_('incompatible revision flag %x') % q)
911
910 912 if self._inline:
911 913 # we probably have the whole chunk cached
912 914 df = None
913 915 else:
914 916 df = self.opener(self.datafile)
915 917
916 918 # do we have useful data cached?
917 919 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
918 920 base = self._cache[1]
919 921 text = self._cache[2]
920 922 self._loadindex(base, rev + 1)
921 923 else:
922 924 self._loadindex(base, rev + 1)
923 925 text = self.chunk(base, df=df)
924 926
925 927 bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]
926 928 text = mdiff.patches(text, bins)
927 929 p1, p2 = self.parents(node)
928 930 if node != hash(text, p1, p2):
929 931 raise RevlogError(_("integrity check failed on %s:%d")
930 932 % (self.datafile, rev))
931 933
932 934 self._cache = (node, rev, text)
933 935 return text
934 936
935 937 def checkinlinesize(self, tr, fp=None):
936 938 if not self._inline:
937 939 return
938 940 if not fp:
939 941 fp = self.opener(self.indexfile, 'r')
940 942 fp.seek(0, 2)
941 943 size = fp.tell()
942 944 if size < 131072:
943 945 return
944 946 trinfo = tr.find(self.indexfile)
945 947 if trinfo == None:
946 948 raise RevlogError(_("%s not found in the transaction")
947 949 % self.indexfile)
948 950
949 951 trindex = trinfo[2]
950 952 dataoff = self.start(trindex)
951 953
952 954 tr.add(self.datafile, dataoff)
953 955 df = self.opener(self.datafile, 'w')
954 956 calc = self._io.size
955 957 for r in xrange(self.count()):
956 958 start = self.start(r) + (r + 1) * calc
957 959 length = self.length(r)
958 960 fp.seek(start)
959 961 d = fp.read(length)
960 962 df.write(d)
961 963 fp.close()
962 964 df.close()
963 965 fp = self.opener(self.indexfile, 'w', atomictemp=True)
964 966 self.version &= ~(REVLOGNGINLINEDATA)
965 967 self._inline = False
966 968 for i in xrange(self.count()):
967 969 e = self._io.packentry(self.index[i], self.node, self.version)
968 970 fp.write(e)
969 971
970 972 # if we don't call rename, the temp file will never replace the
971 973 # real index
972 974 fp.rename()
973 975
974 976 tr.replace(self.indexfile, trindex * calc)
975 977 self._chunkcache = None
976 978
977 979 def addrevision(self, text, transaction, link, p1, p2, d=None):
978 980 """add a revision to the log
979 981
980 982 text - the revision data to add
981 983 transaction - the transaction object used for rollback
982 984 link - the linkrev data to add
983 985 p1, p2 - the parent nodeids of the revision
984 986 d - an optional precomputed delta
985 987 """
986 988 dfh = None
987 989 if not self._inline:
988 990 dfh = self.opener(self.datafile, "a")
989 991 ifh = self.opener(self.indexfile, "a+")
990 992 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
991 993
992 994 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
993 995 node = hash(text, p1, p2)
994 996 if node in self.nodemap:
995 997 return node
996 998
997 999 curr = self.count()
998 1000 prev = curr - 1
999 1001 base = self.base(prev)
1000 1002 offset = self.end(prev)
1001 1003
1002 1004 if curr:
1003 1005 if not d:
1004 1006 ptext = self.revision(self.node(prev))
1005 1007 d = mdiff.textdiff(ptext, text)
1006 1008 data = compress(d)
1007 1009 l = len(data[1]) + len(data[0])
1008 1010 dist = l + offset - self.start(base)
1009 1011
1010 1012 # full versions are inserted when the needed deltas
1011 1013 # become comparable to the uncompressed text
1012 1014 if not curr or dist > len(text) * 2:
1013 1015 data = compress(text)
1014 1016 l = len(data[1]) + len(data[0])
1015 1017 base = curr
1016 1018
1017 1019 e = (offset_type(offset, 0), l, len(text),
1018 1020 base, link, self.rev(p1), self.rev(p2), node)
1019 1021 self.index.insert(-1, e)
1020 1022 self.nodemap[node] = curr
1021 1023
1022 1024 entry = self._io.packentry(e, self.node, self.version)
1023 1025 if not self._inline:
1024 1026 transaction.add(self.datafile, offset)
1025 1027 transaction.add(self.indexfile, curr * len(entry))
1026 1028 if data[0]:
1027 1029 dfh.write(data[0])
1028 1030 dfh.write(data[1])
1029 1031 dfh.flush()
1030 1032 ifh.write(entry)
1031 1033 else:
1032 1034 offset += curr * self._io.size
1033 1035 transaction.add(self.indexfile, offset, prev)
1034 1036 ifh.write(entry)
1035 1037 ifh.write(data[0])
1036 1038 ifh.write(data[1])
1037 1039 self.checkinlinesize(transaction, ifh)
1038 1040
1039 1041 self._cache = (node, curr, text)
1040 1042 return node
1041 1043
1042 1044 def ancestor(self, a, b):
1043 1045 """calculate the least common ancestor of nodes a and b"""
1044 1046
1045 1047 def parents(rev):
1046 1048 return [p for p in self.parentrevs(rev) if p != nullrev]
1047 1049
1048 1050 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1049 1051 if c is None:
1050 1052 return nullid
1051 1053
1052 1054 return self.node(c)
1053 1055
1054 1056 def group(self, nodelist, lookup, infocollect=None):
1055 1057 """calculate a delta group
1056 1058
1057 1059 Given a list of changeset revs, return a set of deltas and
1058 1060 metadata corresponding to nodes. the first delta is
1059 1061 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1060 1062 have this parent as it has all history before these
1061 1063 changesets. parent is parent[0]
1062 1064 """
1063 1065 revs = [self.rev(n) for n in nodelist]
1064 1066
1065 1067 # if we don't have any revisions touched by these changesets, bail
1066 1068 if not revs:
1067 1069 yield changegroup.closechunk()
1068 1070 return
1069 1071
1070 1072 # add the parent of the first rev
1071 1073 p = self.parents(self.node(revs[0]))[0]
1072 1074 revs.insert(0, self.rev(p))
1073 1075
1074 1076 # build deltas
1075 1077 for d in xrange(0, len(revs) - 1):
1076 1078 a, b = revs[d], revs[d + 1]
1077 1079 nb = self.node(b)
1078 1080
1079 1081 if infocollect is not None:
1080 1082 infocollect(nb)
1081 1083
1082 1084 d = self.revdiff(a, b)
1083 1085 p = self.parents(nb)
1084 1086 meta = nb + p[0] + p[1] + lookup(nb)
1085 1087 yield changegroup.genchunk("%s%s" % (meta, d))
1086 1088
1087 1089 yield changegroup.closechunk()
1088 1090
1089 1091 def addgroup(self, revs, linkmapper, transaction, unique=0):
1090 1092 """
1091 1093 add a delta group
1092 1094
1093 1095 given a set of deltas, add them to the revision log. the
1094 1096 first delta is against its parent, which should be in our
1095 1097 log, the rest are against the previous delta.
1096 1098 """
1097 1099
1098 1100 #track the base of the current delta log
1099 1101 r = self.count()
1100 1102 t = r - 1
1101 1103 node = None
1102 1104
1103 1105 base = prev = nullrev
1104 1106 start = end = textlen = 0
1105 1107 if r:
1106 1108 end = self.end(t)
1107 1109
1108 1110 ifh = self.opener(self.indexfile, "a+")
1109 1111 isize = r * self._io.size
1110 1112 if self._inline:
1111 1113 transaction.add(self.indexfile, end + isize, r)
1112 1114 dfh = None
1113 1115 else:
1114 1116 transaction.add(self.indexfile, isize, r)
1115 1117 transaction.add(self.datafile, end)
1116 1118 dfh = self.opener(self.datafile, "a")
1117 1119
1118 1120 # loop through our set of deltas
1119 1121 chain = None
1120 1122 for chunk in revs:
1121 1123 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1122 1124 link = linkmapper(cs)
1123 1125 if node in self.nodemap:
1124 1126 # this can happen if two branches make the same change
1125 1127 # if unique:
1126 1128 # raise RevlogError(_("already have %s") % hex(node[:4]))
1127 1129 chain = node
1128 1130 continue
1129 1131 delta = chunk[80:]
1130 1132
1131 1133 for p in (p1, p2):
1132 1134 if not p in self.nodemap:
1133 1135 raise LookupError(_("unknown parent %s") % short(p))
1134 1136
1135 1137 if not chain:
1136 1138 # retrieve the parent revision of the delta chain
1137 1139 chain = p1
1138 1140 if not chain in self.nodemap:
1139 1141 raise LookupError(_("unknown base %s") % short(chain[:4]))
1140 1142
1141 1143 # full versions are inserted when the needed deltas become
1142 1144 # comparable to the uncompressed text or when the previous
1143 1145 # version is not the one we have a delta against. We use
1144 1146 # the size of the previous full rev as a proxy for the
1145 1147 # current size.
1146 1148
1147 1149 if chain == prev:
1148 1150 tempd = compress(delta)
1149 1151 cdelta = tempd[0] + tempd[1]
1150 1152 textlen = mdiff.patchedsize(textlen, delta)
1151 1153
1152 1154 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
1153 1155 # flush our writes here so we can read it in revision
1154 1156 if dfh:
1155 1157 dfh.flush()
1156 1158 ifh.flush()
1157 1159 text = self.revision(chain)
1158 1160 text = mdiff.patches(text, [delta])
1159 1161 chk = self._addrevision(text, transaction, link, p1, p2, None,
1160 1162 ifh, dfh)
1161 1163 if not dfh and not self._inline:
1162 1164 # addrevision switched from inline to conventional
1163 1165 # reopen the index
1164 1166 dfh = self.opener(self.datafile, "a")
1165 1167 ifh = self.opener(self.indexfile, "a")
1166 1168 if chk != node:
1167 1169 raise RevlogError(_("consistency error adding group"))
1168 1170 textlen = len(text)
1169 1171 else:
1170 1172 e = (offset_type(end, 0), len(cdelta), textlen, base,
1171 1173 link, self.rev(p1), self.rev(p2), node)
1172 1174 self.index.insert(-1, e)
1173 1175 self.nodemap[node] = r
1174 1176 entry = self._io.packentry(e, self.node, self.version)
1175 1177 if self._inline:
1176 1178 ifh.write(entry)
1177 1179 ifh.write(cdelta)
1178 1180 self.checkinlinesize(transaction, ifh)
1179 1181 if not self._inline:
1180 1182 dfh = self.opener(self.datafile, "a")
1181 1183 ifh = self.opener(self.indexfile, "a")
1182 1184 else:
1183 1185 dfh.write(cdelta)
1184 1186 ifh.write(entry)
1185 1187
1186 1188 t, r, chain, prev = r, r + 1, node, node
1187 1189 base = self.base(t)
1188 1190 start = self.start(base)
1189 1191 end = self.end(t)
1190 1192
1191 1193 return node
1192 1194
1193 1195 def strip(self, rev, minlink):
1194 1196 if self.count() == 0 or rev >= self.count():
1195 1197 return
1196 1198
1197 1199 if isinstance(self.index, lazyindex):
1198 1200 self._loadindexmap()
1199 1201
1200 1202 # When stripping away a revision, we need to make sure it
1201 1203 # does not actually belong to an older changeset.
1202 1204 # The minlink parameter defines the oldest revision
1203 1205 # we're allowed to strip away.
1204 1206 while minlink > self.index[rev][4]:
1205 1207 rev += 1
1206 1208 if rev >= self.count():
1207 1209 return
1208 1210
1209 1211 # first truncate the files on disk
1210 1212 end = self.start(rev)
1211 1213 if not self._inline:
1212 1214 df = self.opener(self.datafile, "a")
1213 1215 df.truncate(end)
1214 1216 end = rev * self._io.size
1215 1217 else:
1216 1218 end += rev * self._io.size
1217 1219
1218 1220 indexf = self.opener(self.indexfile, "a")
1219 1221 indexf.truncate(end)
1220 1222
1221 1223 # then reset internal state in memory to forget those revisions
1222 1224 self._cache = None
1223 1225 self._chunkcache = None
1224 1226 for x in xrange(rev, self.count()):
1225 1227 del self.nodemap[self.node(x)]
1226 1228
1227 1229 del self.index[rev:-1]
1228 1230
1229 1231 def checksize(self):
1230 1232 expected = 0
1231 1233 if self.count():
1232 1234 expected = self.end(self.count() - 1)
1233 1235
1234 1236 try:
1235 1237 f = self.opener(self.datafile)
1236 1238 f.seek(0, 2)
1237 1239 actual = f.tell()
1238 1240 dd = actual - expected
1239 1241 except IOError, inst:
1240 1242 if inst.errno != errno.ENOENT:
1241 1243 raise
1242 1244 dd = 0
1243 1245
1244 1246 try:
1245 1247 f = self.opener(self.indexfile)
1246 1248 f.seek(0, 2)
1247 1249 actual = f.tell()
1248 1250 s = self._io.size
1249 1251 i = actual / s
1250 1252 di = actual - (i * s)
1251 1253 if self._inline:
1252 1254 databytes = 0
1253 1255 for r in xrange(self.count()):
1254 1256 databytes += self.length(r)
1255 1257 dd = 0
1256 1258 di = actual - self.count() * s - databytes
1257 1259 except IOError, inst:
1258 1260 if inst.errno != errno.ENOENT:
1259 1261 raise
1260 1262 di = 0
1261 1263
1262 1264 return (dd, di)
General Comments 0
You need to be logged in to leave comments. Login now