##// END OF EJS Templates
revlog: change _inline from a function to a variable
Matt Mackall -
r4982:9672e3c4 default
parent child Browse files
Show More
@@ -1,1277 +1,1277 b''
1 1 """
2 2 revlog.py - storage back-end for mercurial
3 3
4 4 This provides efficient delta storage with O(1) retrieve and append
5 5 and O(changes) merge between branches
6 6
7 7 Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
8 8
9 9 This software may be used and distributed according to the terms
10 10 of the GNU General Public License, incorporated herein by reference.
11 11 """
12 12
13 13 from node import *
14 14 from i18n import _
15 15 import binascii, changegroup, errno, ancestor, mdiff, os
16 16 import sha, struct, util, zlib
17 17
18 18 # revlog version strings
19 19 REVLOGV0 = 0
20 20 REVLOGNG = 1
21 21
22 22 # revlog flags
23 23 REVLOGNGINLINEDATA = (1 << 16)
24 24 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
25 25
26 26 REVLOG_DEFAULT_FORMAT = REVLOGNG
27 27 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
28 28
29 29 def hash(text, p1, p2):
30 30 """generate a hash from the given text and its parent hashes
31 31
32 32 This hash combines both the current file contents and its history
33 33 in a manner that makes it easy to distinguish nodes with the same
34 34 content in the revision graph.
35 35 """
36 36 l = [p1, p2]
37 37 l.sort()
38 38 s = sha.new(l[0])
39 39 s.update(l[1])
40 40 s.update(text)
41 41 return s.digest()
42 42
43 43 def compress(text):
44 44 """ generate a possibly-compressed representation of text """
45 45 if not text:
46 46 return ("", text)
47 47 if len(text) < 44:
48 48 if text[0] == '\0':
49 49 return ("", text)
50 50 return ('u', text)
51 51 bin = zlib.compress(text)
52 52 if len(bin) > len(text):
53 53 if text[0] == '\0':
54 54 return ("", text)
55 55 return ('u', text)
56 56 return ("", bin)
57 57
58 58 def decompress(bin):
59 59 """ decompress the given input """
60 60 if not bin:
61 61 return bin
62 62 t = bin[0]
63 63 if t == '\0':
64 64 return bin
65 65 if t == 'x':
66 66 return zlib.decompress(bin)
67 67 if t == 'u':
68 68 return bin[1:]
69 69 raise RevlogError(_("unknown compression type %r") % t)
70 70
71 71 indexformatv0 = ">4l20s20s20s"
72 72 v0shaoffset = 56
73 73 # index ng:
74 74 # 6 bytes offset
75 75 # 2 bytes flags
76 76 # 4 bytes compressed length
77 77 # 4 bytes uncompressed length
78 78 # 4 bytes: base rev
79 79 # 4 bytes link rev
80 80 # 4 bytes parent 1 rev
81 81 # 4 bytes parent 2 rev
82 82 # 32 bytes: nodeid
83 83 indexformatng = ">Qiiiiii20s12x"
84 84 ngshaoffset = 32
85 85 versionformat = ">I"
86 86
87 87 class lazyparser(object):
88 88 """
89 89 this class avoids the need to parse the entirety of large indices
90 90 """
91 91
92 92 # lazyparser is not safe to use on windows if win32 extensions not
93 93 # available. it keeps file handle open, which make it not possible
94 94 # to break hardlinks on local cloned repos.
95 95 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
96 96 hasattr(util, 'win32api'))
97 97
98 98 def __init__(self, dataf, size):
99 99 self.dataf = dataf
100 100 self.s = struct.calcsize(indexformatng)
101 101 self.datasize = size
102 102 self.l = size/self.s
103 103 self.index = [None] * self.l
104 104 self.map = {nullid: nullrev}
105 105 self.allmap = 0
106 106 self.all = 0
107 107 self.mapfind_count = 0
108 108
109 109 def loadmap(self):
110 110 """
111 111 during a commit, we need to make sure the rev being added is
112 112 not a duplicate. This requires loading the entire index,
113 113 which is fairly slow. loadmap can load up just the node map,
114 114 which takes much less time.
115 115 """
116 116 if self.allmap:
117 117 return
118 118 end = self.datasize
119 119 self.allmap = 1
120 120 cur = 0
121 121 count = 0
122 122 blocksize = self.s * 256
123 123 self.dataf.seek(0)
124 124 while cur < end:
125 125 data = self.dataf.read(blocksize)
126 126 off = 0
127 127 for x in xrange(256):
128 128 n = data[off + ngshaoffset:off + ngshaoffset + 20]
129 129 self.map[n] = count
130 130 count += 1
131 131 if count >= self.l:
132 132 break
133 133 off += self.s
134 134 cur += blocksize
135 135
136 136 def loadblock(self, blockstart, blocksize, data=None):
137 137 if self.all:
138 138 return
139 139 if data is None:
140 140 self.dataf.seek(blockstart)
141 141 if blockstart + blocksize > self.datasize:
142 142 # the revlog may have grown since we've started running,
143 143 # but we don't have space in self.index for more entries.
144 144 # limit blocksize so that we don't get too much data.
145 145 blocksize = max(self.datasize - blockstart, 0)
146 146 data = self.dataf.read(blocksize)
147 147 lend = len(data) / self.s
148 148 i = blockstart / self.s
149 149 off = 0
150 150 # lazyindex supports __delitem__
151 151 if lend > len(self.index) - i:
152 152 lend = len(self.index) - i
153 153 for x in xrange(lend):
154 154 if self.index[i + x] == None:
155 155 b = data[off : off + self.s]
156 156 self.index[i + x] = b
157 157 n = b[ngshaoffset:ngshaoffset + 20]
158 158 self.map[n] = i + x
159 159 off += self.s
160 160
161 161 def findnode(self, node):
162 162 """search backwards through the index file for a specific node"""
163 163 if self.allmap:
164 164 return None
165 165
166 166 # hg log will cause many many searches for the manifest
167 167 # nodes. After we get called a few times, just load the whole
168 168 # thing.
169 169 if self.mapfind_count > 8:
170 170 self.loadmap()
171 171 if node in self.map:
172 172 return node
173 173 return None
174 174 self.mapfind_count += 1
175 175 last = self.l - 1
176 176 while self.index[last] != None:
177 177 if last == 0:
178 178 self.all = 1
179 179 self.allmap = 1
180 180 return None
181 181 last -= 1
182 182 end = (last + 1) * self.s
183 183 blocksize = self.s * 256
184 184 while end >= 0:
185 185 start = max(end - blocksize, 0)
186 186 self.dataf.seek(start)
187 187 data = self.dataf.read(end - start)
188 188 findend = end - start
189 189 while True:
190 190 # we're searching backwards, so weh have to make sure
191 191 # we don't find a changeset where this node is a parent
192 192 off = data.rfind(node, 0, findend)
193 193 findend = off
194 194 if off >= 0:
195 195 i = off / self.s
196 196 off = i * self.s
197 197 n = data[off + ngshaoffset:off + ngshaoffset + 20]
198 198 if n == node:
199 199 self.map[n] = i + start / self.s
200 200 return node
201 201 else:
202 202 break
203 203 end -= blocksize
204 204 return None
205 205
206 206 def loadindex(self, i=None, end=None):
207 207 if self.all:
208 208 return
209 209 all = False
210 210 if i == None:
211 211 blockstart = 0
212 212 blocksize = (512 / self.s) * self.s
213 213 end = self.datasize
214 214 all = True
215 215 else:
216 216 if end:
217 217 blockstart = i * self.s
218 218 end = end * self.s
219 219 blocksize = end - blockstart
220 220 else:
221 221 blockstart = (i & ~63) * self.s
222 222 blocksize = self.s * 64
223 223 end = blockstart + blocksize
224 224 while blockstart < end:
225 225 self.loadblock(blockstart, blocksize)
226 226 blockstart += blocksize
227 227 if all:
228 228 self.all = True
229 229
230 230 class lazyindex(object):
231 231 """a lazy version of the index array"""
232 232 def __init__(self, parser):
233 233 self.p = parser
234 234 def __len__(self):
235 235 return len(self.p.index)
236 236 def load(self, pos):
237 237 if pos < 0:
238 238 pos += len(self.p.index)
239 239 self.p.loadindex(pos)
240 240 return self.p.index[pos]
241 241 def __getitem__(self, pos):
242 242 ret = self.p.index[pos] or self.load(pos)
243 243 if isinstance(ret, str):
244 244 ret = struct.unpack(indexformatng, ret)
245 245 return ret
246 246 def __setitem__(self, pos, item):
247 247 self.p.index[pos] = item
248 248 def __delitem__(self, pos):
249 249 del self.p.index[pos]
250 250 def insert(self, pos, e):
251 251 self.p.index.insert(pos, e)
252 252 def append(self, e):
253 253 self.p.index.append(e)
254 254
255 255 class lazymap(object):
256 256 """a lazy version of the node map"""
257 257 def __init__(self, parser):
258 258 self.p = parser
259 259 def load(self, key):
260 260 n = self.p.findnode(key)
261 261 if n == None:
262 262 raise KeyError(key)
263 263 def __contains__(self, key):
264 264 if key in self.p.map:
265 265 return True
266 266 self.p.loadmap()
267 267 return key in self.p.map
268 268 def __iter__(self):
269 269 yield nullid
270 270 for i in xrange(self.p.l):
271 271 ret = self.p.index[i]
272 272 if not ret:
273 273 self.p.loadindex(i)
274 274 ret = self.p.index[i]
275 275 if isinstance(ret, str):
276 276 ret = struct.unpack(indexformatng, ret)
277 277 yield ret[7]
278 278 def __getitem__(self, key):
279 279 try:
280 280 return self.p.map[key]
281 281 except KeyError:
282 282 try:
283 283 self.load(key)
284 284 return self.p.map[key]
285 285 except KeyError:
286 286 raise KeyError("node " + hex(key))
287 287 def __setitem__(self, key, val):
288 288 self.p.map[key] = val
289 289 def __delitem__(self, key):
290 290 del self.p.map[key]
291 291
292 292 class RevlogError(Exception):
293 293 pass
294 294 class LookupError(RevlogError):
295 295 pass
296 296
297 297 def getoffset(q):
298 298 if q & 0xFFFF:
299 299 raise RevlogError(_('incompatible revision flag %x') % q)
300 300 return int(q >> 16)
301 301
302 302 def gettype(q):
303 303 return int(q & 0xFFFF)
304 304
305 305 def offset_type(offset, type):
306 306 return long(long(offset) << 16 | type)
307 307
308 308 class revlogoldio(object):
309 309 def __init__(self):
310 310 self.chunkcache = None
311 311 self.size = struct.calcsize(indexformatv0)
312 312
313 313 def parseindex(self, fp, st, inline):
314 314 s = self.size
315 315 index = []
316 316 nodemap = {nullid: nullrev}
317 317 n = off = 0
318 318 data = fp.read()
319 319 l = len(data)
320 320 while off + s <= l:
321 321 cur = data[off:off + s]
322 322 off += s
323 323 e = struct.unpack(indexformatv0, cur)
324 324 # transform to revlogv1 format
325 325 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
326 326 nodemap[e[4]], nodemap[e[5]], e[6])
327 327 index.append(e2)
328 328 nodemap[e[6]] = n
329 329 n += 1
330 330
331 331 return index, nodemap
332 332
333 333 class revlogio(object):
334 334 def __init__(self):
335 335 self.chunkcache = None
336 336 self.size = struct.calcsize(indexformatng)
337 337
338 338 def parseindex(self, fp, st, inline):
339 339 if (lazyparser.safe_to_use and not inline and
340 340 st and st.st_size > 1000000):
341 341 # big index, let's parse it on demand
342 342 parser = lazyparser(fp, st.st_size)
343 343 index = lazyindex(parser)
344 344 nodemap = lazymap(parser)
345 345 e = list(index[0])
346 346 type = gettype(e[0])
347 347 e[0] = offset_type(0, type)
348 348 index[0] = e
349 349 return index, nodemap
350 350
351 351 s = self.size
352 352 index = []
353 353 nodemap = {nullid: nullrev}
354 354 n = off = 0
355 355 # if we're not using lazymap, always read the whole index
356 356 data = fp.read()
357 357 l = len(data)
358 358 if inline:
359 359 self.chunkcache = (0, data)
360 360 while off + s <= l:
361 361 e = struct.unpack(indexformatng, data[off:off + s])
362 362 index.append(e)
363 363 nodemap[e[7]] = n
364 364 n += 1
365 365 off += s
366 366 if inline:
367 367 if e[1] < 0:
368 368 break
369 369 off += e[1]
370 370
371 371 e = list(index[0])
372 372 type = gettype(e[0])
373 373 e[0] = offset_type(0, type)
374 374 index[0] = e
375 375
376 376 return index, nodemap
377 377
378 378 class revlog(object):
379 379 """
380 380 the underlying revision storage object
381 381
382 382 A revlog consists of two parts, an index and the revision data.
383 383
384 384 The index is a file with a fixed record size containing
385 385 information on each revision, includings its nodeid (hash), the
386 386 nodeids of its parents, the position and offset of its data within
387 387 the data file, and the revision it's based on. Finally, each entry
388 388 contains a linkrev entry that can serve as a pointer to external
389 389 data.
390 390
391 391 The revision data itself is a linear collection of data chunks.
392 392 Each chunk represents a revision and is usually represented as a
393 393 delta against the previous chunk. To bound lookup time, runs of
394 394 deltas are limited to about 2 times the length of the original
395 395 version data. This makes retrieval of a version proportional to
396 396 its size, or O(1) relative to the number of revisions.
397 397
398 398 Both pieces of the revlog are written to in an append-only
399 399 fashion, which means we never need to rewrite a file to insert or
400 400 remove data, and can use some simple techniques to avoid the need
401 401 for locking while reading.
402 402 """
403 403 def __init__(self, opener, indexfile):
404 404 """
405 405 create a revlog object
406 406
407 407 opener is a function that abstracts the file opening operation
408 408 and can be used to implement COW semantics or the like.
409 409 """
410 410 self.indexfile = indexfile
411 411 self.datafile = indexfile[:-2] + ".d"
412 412 self.opener = opener
413 413
414 414 self.indexstat = None
415 415 self.cache = None
416 416 self.defversion = REVLOG_DEFAULT_VERSION
417 417 if hasattr(opener, "defversion"):
418 418 self.defversion = opener.defversion
419 419 if self.defversion & REVLOGNG:
420 420 self.defversion |= REVLOGNGINLINEDATA
421 421 self._load()
422 422
423 423 def _load(self):
424 424 v = self.defversion
425 425 try:
426 426 f = self.opener(self.indexfile)
427 427 i = f.read(4)
428 428 f.seek(0)
429 429 if len(i) > 0:
430 430 v = struct.unpack(versionformat, i)[0]
431 431 except IOError, inst:
432 432 if inst.errno != errno.ENOENT:
433 433 raise
434 434 i = ""
435 435 else:
436 436 try:
437 437 st = util.fstat(f)
438 438 except AttributeError, inst:
439 439 st = None
440 440 else:
441 441 oldst = self.indexstat
442 442 if (oldst and st.st_dev == oldst.st_dev
443 443 and st.st_ino == oldst.st_ino
444 444 and st.st_mtime == oldst.st_mtime
445 445 and st.st_ctime == oldst.st_ctime
446 446 and st.st_size == oldst.st_size):
447 447 return
448 448 self.indexstat = st
449 449 flags = v & ~0xFFFF
450 450 fmt = v & 0xFFFF
451 451 if fmt == REVLOGV0:
452 452 if flags:
453 453 raise RevlogError(_("index %s unknown flags %#04x for format v0")
454 454 % (self.indexfile, flags >> 16))
455 455 elif fmt == REVLOGNG:
456 456 if flags & ~REVLOGNGINLINEDATA:
457 457 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
458 458 % (self.indexfile, flags >> 16))
459 459 else:
460 460 raise RevlogError(_("index %s unknown format %d")
461 461 % (self.indexfile, fmt))
462 462 self.version = v
463 self._inline = v & REVLOGNGINLINEDATA
463 464 self.nodemap = {nullid: nullrev}
464 465 self.index = []
465 466 self._io = revlogio()
466 467 if self.version == REVLOGV0:
467 468 self._io = revlogoldio()
468 469 if i:
469 self.index, self.nodemap = self._io.parseindex(f, st, self._inline())
470 self.index, self.nodemap = self._io.parseindex(f, st, self._inline)
470 471 # add the magic null revision at -1
471 472 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
472 473
473 474 def _loadindex(self, start, end):
474 475 """load a block of indexes all at once from the lazy parser"""
475 476 if isinstance(self.index, lazyindex):
476 477 self.index.p.loadindex(start, end)
477 478
478 479 def _loadindexmap(self):
479 480 """loads both the map and the index from the lazy parser"""
480 481 if isinstance(self.index, lazyindex):
481 482 p = self.index.p
482 483 p.loadindex()
483 484 self.nodemap = p.map
484 485
485 486 def _loadmap(self):
486 487 """loads the map from the lazy parser"""
487 488 if isinstance(self.nodemap, lazymap):
488 489 self.nodemap.p.loadmap()
489 490 self.nodemap = self.nodemap.p.map
490 491
491 def _inline(self):
492 return self.version & REVLOGNGINLINEDATA
493 492 def tip(self):
494 493 return self.node(len(self.index) - 2)
495 494 def count(self):
496 495 return len(self.index) - 1
497 496
498 497 def rev(self, node):
499 498 try:
500 499 return self.nodemap[node]
501 500 except KeyError:
502 501 raise LookupError(_('%s: no node %s') % (self.indexfile, hex(node)))
503 502 def node(self, rev):
504 503 return self.index[rev][7]
505 504 def linkrev(self, node):
506 505 return self.index[self.rev(node)][4]
507 506 def parents(self, node):
508 507 d = self.index[self.rev(node)][5:7]
509 508 return (self.node(d[0]), self.node(d[1]))
510 509 def parentrevs(self, rev):
511 510 return self.index[rev][5:7]
512 511 def start(self, rev):
513 512 return getoffset(self.index[rev][0])
514 513 def end(self, rev):
515 514 return self.start(rev) + self.length(rev)
516 515 def length(self, rev):
517 516 return self.index[rev][1]
518 517 def base(self, rev):
519 518 return self.index[rev][3]
520 519
521 520 def size(self, rev):
522 521 """return the length of the uncompressed text for a given revision"""
523 522 l = self.index[rev][2]
524 523 if l >= 0:
525 524 return l
526 525
527 526 t = self.revision(self.node(rev))
528 527 return len(t)
529 528
530 529 # alternate implementation, The advantage to this code is it
531 530 # will be faster for a single revision. But, the results are not
532 531 # cached, so finding the size of every revision will be slower.
533 532 """
534 533 if self.cache and self.cache[1] == rev:
535 534 return len(self.cache[2])
536 535
537 536 base = self.base(rev)
538 537 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
539 538 base = self.cache[1]
540 539 text = self.cache[2]
541 540 else:
542 541 text = self.revision(self.node(base))
543 542
544 543 l = len(text)
545 544 for x in xrange(base + 1, rev + 1):
546 545 l = mdiff.patchedsize(l, self.chunk(x))
547 546 return l
548 547 """
549 548
550 549 def reachable(self, node, stop=None):
551 550 """return a hash of all nodes ancestral to a given node, including
552 551 the node itself, stopping when stop is matched"""
553 552 reachable = {}
554 553 visit = [node]
555 554 reachable[node] = 1
556 555 if stop:
557 556 stopn = self.rev(stop)
558 557 else:
559 558 stopn = 0
560 559 while visit:
561 560 n = visit.pop(0)
562 561 if n == stop:
563 562 continue
564 563 if n == nullid:
565 564 continue
566 565 for p in self.parents(n):
567 566 if self.rev(p) < stopn:
568 567 continue
569 568 if p not in reachable:
570 569 reachable[p] = 1
571 570 visit.append(p)
572 571 return reachable
573 572
574 573 def nodesbetween(self, roots=None, heads=None):
575 574 """Return a tuple containing three elements. Elements 1 and 2 contain
576 575 a final list bases and heads after all the unreachable ones have been
577 576 pruned. Element 0 contains a topologically sorted list of all
578 577
579 578 nodes that satisfy these constraints:
580 579 1. All nodes must be descended from a node in roots (the nodes on
581 580 roots are considered descended from themselves).
582 581 2. All nodes must also be ancestors of a node in heads (the nodes in
583 582 heads are considered to be their own ancestors).
584 583
585 584 If roots is unspecified, nullid is assumed as the only root.
586 585 If heads is unspecified, it is taken to be the output of the
587 586 heads method (i.e. a list of all nodes in the repository that
588 587 have no children)."""
589 588 nonodes = ([], [], [])
590 589 if roots is not None:
591 590 roots = list(roots)
592 591 if not roots:
593 592 return nonodes
594 593 lowestrev = min([self.rev(n) for n in roots])
595 594 else:
596 595 roots = [nullid] # Everybody's a descendent of nullid
597 596 lowestrev = nullrev
598 597 if (lowestrev == nullrev) and (heads is None):
599 598 # We want _all_ the nodes!
600 599 return ([self.node(r) for r in xrange(0, self.count())],
601 600 [nullid], list(self.heads()))
602 601 if heads is None:
603 602 # All nodes are ancestors, so the latest ancestor is the last
604 603 # node.
605 604 highestrev = self.count() - 1
606 605 # Set ancestors to None to signal that every node is an ancestor.
607 606 ancestors = None
608 607 # Set heads to an empty dictionary for later discovery of heads
609 608 heads = {}
610 609 else:
611 610 heads = list(heads)
612 611 if not heads:
613 612 return nonodes
614 613 ancestors = {}
615 614 # Turn heads into a dictionary so we can remove 'fake' heads.
616 615 # Also, later we will be using it to filter out the heads we can't
617 616 # find from roots.
618 617 heads = dict.fromkeys(heads, 0)
619 618 # Start at the top and keep marking parents until we're done.
620 619 nodestotag = heads.keys()
621 620 # Remember where the top was so we can use it as a limit later.
622 621 highestrev = max([self.rev(n) for n in nodestotag])
623 622 while nodestotag:
624 623 # grab a node to tag
625 624 n = nodestotag.pop()
626 625 # Never tag nullid
627 626 if n == nullid:
628 627 continue
629 628 # A node's revision number represents its place in a
630 629 # topologically sorted list of nodes.
631 630 r = self.rev(n)
632 631 if r >= lowestrev:
633 632 if n not in ancestors:
634 633 # If we are possibly a descendent of one of the roots
635 634 # and we haven't already been marked as an ancestor
636 635 ancestors[n] = 1 # Mark as ancestor
637 636 # Add non-nullid parents to list of nodes to tag.
638 637 nodestotag.extend([p for p in self.parents(n) if
639 638 p != nullid])
640 639 elif n in heads: # We've seen it before, is it a fake head?
641 640 # So it is, real heads should not be the ancestors of
642 641 # any other heads.
643 642 heads.pop(n)
644 643 if not ancestors:
645 644 return nonodes
646 645 # Now that we have our set of ancestors, we want to remove any
647 646 # roots that are not ancestors.
648 647
649 648 # If one of the roots was nullid, everything is included anyway.
650 649 if lowestrev > nullrev:
651 650 # But, since we weren't, let's recompute the lowest rev to not
652 651 # include roots that aren't ancestors.
653 652
654 653 # Filter out roots that aren't ancestors of heads
655 654 roots = [n for n in roots if n in ancestors]
656 655 # Recompute the lowest revision
657 656 if roots:
658 657 lowestrev = min([self.rev(n) for n in roots])
659 658 else:
660 659 # No more roots? Return empty list
661 660 return nonodes
662 661 else:
663 662 # We are descending from nullid, and don't need to care about
664 663 # any other roots.
665 664 lowestrev = nullrev
666 665 roots = [nullid]
667 666 # Transform our roots list into a 'set' (i.e. a dictionary where the
668 667 # values don't matter.
669 668 descendents = dict.fromkeys(roots, 1)
670 669 # Also, keep the original roots so we can filter out roots that aren't
671 670 # 'real' roots (i.e. are descended from other roots).
672 671 roots = descendents.copy()
673 672 # Our topologically sorted list of output nodes.
674 673 orderedout = []
675 674 # Don't start at nullid since we don't want nullid in our output list,
676 675 # and if nullid shows up in descedents, empty parents will look like
677 676 # they're descendents.
678 677 for r in xrange(max(lowestrev, 0), highestrev + 1):
679 678 n = self.node(r)
680 679 isdescendent = False
681 680 if lowestrev == nullrev: # Everybody is a descendent of nullid
682 681 isdescendent = True
683 682 elif n in descendents:
684 683 # n is already a descendent
685 684 isdescendent = True
686 685 # This check only needs to be done here because all the roots
687 686 # will start being marked is descendents before the loop.
688 687 if n in roots:
689 688 # If n was a root, check if it's a 'real' root.
690 689 p = tuple(self.parents(n))
691 690 # If any of its parents are descendents, it's not a root.
692 691 if (p[0] in descendents) or (p[1] in descendents):
693 692 roots.pop(n)
694 693 else:
695 694 p = tuple(self.parents(n))
696 695 # A node is a descendent if either of its parents are
697 696 # descendents. (We seeded the dependents list with the roots
698 697 # up there, remember?)
699 698 if (p[0] in descendents) or (p[1] in descendents):
700 699 descendents[n] = 1
701 700 isdescendent = True
702 701 if isdescendent and ((ancestors is None) or (n in ancestors)):
703 702 # Only include nodes that are both descendents and ancestors.
704 703 orderedout.append(n)
705 704 if (ancestors is not None) and (n in heads):
706 705 # We're trying to figure out which heads are reachable
707 706 # from roots.
708 707 # Mark this head as having been reached
709 708 heads[n] = 1
710 709 elif ancestors is None:
711 710 # Otherwise, we're trying to discover the heads.
712 711 # Assume this is a head because if it isn't, the next step
713 712 # will eventually remove it.
714 713 heads[n] = 1
715 714 # But, obviously its parents aren't.
716 715 for p in self.parents(n):
717 716 heads.pop(p, None)
718 717 heads = [n for n in heads.iterkeys() if heads[n] != 0]
719 718 roots = roots.keys()
720 719 assert orderedout
721 720 assert roots
722 721 assert heads
723 722 return (orderedout, roots, heads)
724 723
725 724 def heads(self, start=None, stop=None):
726 725 """return the list of all nodes that have no children
727 726
728 727 if start is specified, only heads that are descendants of
729 728 start will be returned
730 729 if stop is specified, it will consider all the revs from stop
731 730 as if they had no children
732 731 """
733 732 if start is None:
734 733 start = nullid
735 734 if stop is None:
736 735 stop = []
737 736 stoprevs = dict.fromkeys([self.rev(n) for n in stop])
738 737 startrev = self.rev(start)
739 738 reachable = {startrev: 1}
740 739 heads = {startrev: 1}
741 740
742 741 parentrevs = self.parentrevs
743 742 for r in xrange(startrev + 1, self.count()):
744 743 for p in parentrevs(r):
745 744 if p in reachable:
746 745 if r not in stoprevs:
747 746 reachable[r] = 1
748 747 heads[r] = 1
749 748 if p in heads and p not in stoprevs:
750 749 del heads[p]
751 750
752 751 return [self.node(r) for r in heads]
753 752
754 753 def children(self, node):
755 754 """find the children of a given node"""
756 755 c = []
757 756 p = self.rev(node)
758 757 for r in range(p + 1, self.count()):
759 758 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
760 759 if prevs:
761 760 for pr in prevs:
762 761 if pr == p:
763 762 c.append(self.node(r))
764 763 elif p == nullrev:
765 764 c.append(self.node(r))
766 765 return c
767 766
768 767 def _match(self, id):
769 768 if isinstance(id, (long, int)):
770 769 # rev
771 770 return self.node(id)
772 771 if len(id) == 20:
773 772 # possibly a binary node
774 773 # odds of a binary node being all hex in ASCII are 1 in 10**25
775 774 try:
776 775 node = id
777 776 r = self.rev(node) # quick search the index
778 777 return node
779 778 except LookupError:
780 779 pass # may be partial hex id
781 780 try:
782 781 # str(rev)
783 782 rev = int(id)
784 783 if str(rev) != id:
785 784 raise ValueError
786 785 if rev < 0:
787 786 rev = self.count() + rev
788 787 if rev < 0 or rev >= self.count():
789 788 raise ValueError
790 789 return self.node(rev)
791 790 except (ValueError, OverflowError):
792 791 pass
793 792 if len(id) == 40:
794 793 try:
795 794 # a full hex nodeid?
796 795 node = bin(id)
797 796 r = self.rev(node)
798 797 return node
799 798 except TypeError:
800 799 pass
801 800
802 801 def _partialmatch(self, id):
803 802 if len(id) < 40:
804 803 try:
805 804 # hex(node)[:...]
806 805 bin_id = bin(id[:len(id) & ~1]) # grab an even number of digits
807 806 node = None
808 807 for n in self.nodemap:
809 808 if n.startswith(bin_id) and hex(n).startswith(id):
810 809 if node is not None:
811 810 raise LookupError(_("Ambiguous identifier"))
812 811 node = n
813 812 if node is not None:
814 813 return node
815 814 except TypeError:
816 815 pass
817 816
818 817 def lookup(self, id):
819 818 """locate a node based on:
820 819 - revision number or str(revision number)
821 820 - nodeid or subset of hex nodeid
822 821 """
823 822 n = self._match(id)
824 823 if n is not None:
825 824 return n
826 825 n = self._partialmatch(id)
827 826 if n:
828 827 return n
829 828
830 829 raise LookupError(_("No match found"))
831 830
832 831 def cmp(self, node, text):
833 832 """compare text with a given file revision"""
834 833 p1, p2 = self.parents(node)
835 834 return hash(text, p1, p2) != node
836 835
837 836 def diff(self, a, b):
838 837 """return a delta between two revisions"""
839 838 return mdiff.textdiff(a, b)
840 839
841 840 def patches(self, t, pl):
842 841 """apply a list of patches to a string"""
843 842 return mdiff.patches(t, pl)
844 843
845 844 def chunk(self, rev, df=None, cachelen=4096):
846 845 start, length = self.start(rev), self.length(rev)
847 inline = self._inline()
846 inline = self._inline
848 847 if inline:
849 848 start += (rev + 1) * self._io.size
850 849 end = start + length
851 850 def loadcache(df):
852 851 cache_length = max(cachelen, length) # 4k
853 852 if not df:
854 853 if inline:
855 854 df = self.opener(self.indexfile)
856 855 else:
857 856 df = self.opener(self.datafile)
858 857 df.seek(start)
859 858 self._io.chunkcache = (start, df.read(cache_length))
860 859
861 860 if not self._io.chunkcache:
862 861 loadcache(df)
863 862
864 863 cache_start = self._io.chunkcache[0]
865 864 cache_end = cache_start + len(self._io.chunkcache[1])
866 865 if start >= cache_start and end <= cache_end:
867 866 # it is cached
868 867 offset = start - cache_start
869 868 else:
870 869 loadcache(df)
871 870 offset = 0
872 871
873 872 return decompress(self._io.chunkcache[1][offset:offset + length])
874 873
875 874 def delta(self, node):
876 875 """return or calculate a delta between a node and its predecessor"""
877 876 r = self.rev(node)
878 877 return self.revdiff(r - 1, r)
879 878
880 879 def revdiff(self, rev1, rev2):
881 880 """return or calculate a delta between two revisions"""
882 881 b1 = self.base(rev1)
883 882 b2 = self.base(rev2)
884 883 if b1 == b2 and rev1 + 1 == rev2:
885 884 return self.chunk(rev2)
886 885 else:
887 886 return self.diff(self.revision(self.node(rev1)),
888 887 self.revision(self.node(rev2)))
889 888
890 889 def revision(self, node):
891 890 """return an uncompressed revision of a given"""
892 891 if node == nullid:
893 892 return ""
894 893 if self.cache and self.cache[0] == node:
895 894 return self.cache[2]
896 895
897 896 # look up what we need to read
898 897 text = None
899 898 rev = self.rev(node)
900 899 base = self.base(rev)
901 900
902 if self._inline():
901 if self._inline:
903 902 # we probably have the whole chunk cached
904 903 df = None
905 904 else:
906 905 df = self.opener(self.datafile)
907 906
908 907 # do we have useful data cached?
909 908 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
910 909 base = self.cache[1]
911 910 text = self.cache[2]
912 911 self._loadindex(base, rev + 1)
913 912 else:
914 913 self._loadindex(base, rev + 1)
915 914 text = self.chunk(base, df=df)
916 915
917 916 bins = []
918 917 for r in xrange(base + 1, rev + 1):
919 918 bins.append(self.chunk(r, df=df))
920 919
921 920 text = self.patches(text, bins)
922 921
923 922 p1, p2 = self.parents(node)
924 923 if node != hash(text, p1, p2):
925 924 raise RevlogError(_("integrity check failed on %s:%d")
926 925 % (self.datafile, rev))
927 926
928 927 self.cache = (node, rev, text)
929 928 return text
930 929
931 930 def checkinlinesize(self, tr, fp=None):
932 if not self._inline():
931 if not self._inline:
933 932 return
934 933 if not fp:
935 934 fp = self.opener(self.indexfile, 'r')
936 935 fp.seek(0, 2)
937 936 size = fp.tell()
938 937 if size < 131072:
939 938 return
940 939 trinfo = tr.find(self.indexfile)
941 940 if trinfo == None:
942 941 raise RevlogError(_("%s not found in the transaction")
943 942 % self.indexfile)
944 943
945 944 trindex = trinfo[2]
946 945 dataoff = self.start(trindex)
947 946
948 947 tr.add(self.datafile, dataoff)
949 948 df = self.opener(self.datafile, 'w')
950 949 calc = self._io.size
951 950 for r in xrange(self.count()):
952 951 start = self.start(r) + (r + 1) * calc
953 952 length = self.length(r)
954 953 fp.seek(start)
955 954 d = fp.read(length)
956 955 df.write(d)
957 956 fp.close()
958 957 df.close()
959 958 fp = self.opener(self.indexfile, 'w', atomictemp=True)
960 959 self.version &= ~(REVLOGNGINLINEDATA)
960 self._inline = False
961 961 if self.count():
962 962 x = self.index[0]
963 963 e = struct.pack(indexformatng, *x)[4:]
964 964 l = struct.pack(versionformat, self.version)
965 965 fp.write(l)
966 966 fp.write(e)
967 967
968 968 for i in xrange(1, self.count()):
969 969 x = self.index[i]
970 970 e = struct.pack(indexformatng, *x)
971 971 fp.write(e)
972 972
973 973 # if we don't call rename, the temp file will never replace the
974 974 # real index
975 975 fp.rename()
976 976
977 977 tr.replace(self.indexfile, trindex * calc)
978 978 self._io.chunkcache = None
979 979
980 980 def addrevision(self, text, transaction, link, p1, p2, d=None):
981 981 """add a revision to the log
982 982
983 983 text - the revision data to add
984 984 transaction - the transaction object used for rollback
985 985 link - the linkrev data to add
986 986 p1, p2 - the parent nodeids of the revision
987 987 d - an optional precomputed delta
988 988 """
989 989 dfh = None
990 if not self._inline():
990 if not self._inline:
991 991 dfh = self.opener(self.datafile, "a")
992 992 ifh = self.opener(self.indexfile, "a+")
993 993 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
994 994
995 995 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
996 996 node = hash(text, p1, p2)
997 997 if node in self.nodemap:
998 998 return node
999 999
1000 1000 curr = self.count()
1001 1001 prev = curr - 1
1002 1002 base = self.base(prev)
1003 1003 offset = self.end(prev)
1004 1004
1005 1005 if curr:
1006 1006 if not d:
1007 1007 ptext = self.revision(self.node(prev))
1008 1008 d = self.diff(ptext, text)
1009 1009 data = compress(d)
1010 1010 l = len(data[1]) + len(data[0])
1011 1011 dist = l + offset - self.start(base)
1012 1012
1013 1013 # full versions are inserted when the needed deltas
1014 1014 # become comparable to the uncompressed text
1015 1015 if not curr or dist > len(text) * 2:
1016 1016 data = compress(text)
1017 1017 l = len(data[1]) + len(data[0])
1018 1018 base = curr
1019 1019
1020 1020 e = (offset_type(offset, 0), l, len(text),
1021 1021 base, link, self.rev(p1), self.rev(p2), node)
1022 1022 self.index.insert(-1, e)
1023 1023 self.nodemap[node] = curr
1024 1024
1025 1025 if self.version == REVLOGV0:
1026 1026 e = (offset, l, base, link, p1, p2, node)
1027 1027 entry = struct.pack(indexformatv0, *e)
1028 1028 else:
1029 1029 entry = struct.pack(indexformatng, *e)
1030 1030 if not curr:
1031 1031 entry = struct.pack(versionformat, self.version) + entry[4:]
1032 1032
1033 if not self._inline():
1033 if not self._inline:
1034 1034 transaction.add(self.datafile, offset)
1035 1035 transaction.add(self.indexfile, curr * len(entry))
1036 1036 if data[0]:
1037 1037 dfh.write(data[0])
1038 1038 dfh.write(data[1])
1039 1039 dfh.flush()
1040 1040 ifh.write(entry)
1041 1041 else:
1042 1042 ifh.seek(0, 2)
1043 1043 transaction.add(self.indexfile, ifh.tell(), prev)
1044 1044 ifh.write(entry)
1045 1045 ifh.write(data[0])
1046 1046 ifh.write(data[1])
1047 1047 self.checkinlinesize(transaction, ifh)
1048 1048
1049 1049 self.cache = (node, curr, text)
1050 1050 return node
1051 1051
1052 1052 def ancestor(self, a, b):
1053 1053 """calculate the least common ancestor of nodes a and b"""
1054 1054
1055 1055 def parents(rev):
1056 1056 return [p for p in self.parentrevs(rev) if p != nullrev]
1057 1057
1058 1058 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1059 1059 if c is None:
1060 1060 return nullid
1061 1061
1062 1062 return self.node(c)
1063 1063
1064 1064 def group(self, nodelist, lookup, infocollect=None):
1065 1065 """calculate a delta group
1066 1066
1067 1067 Given a list of changeset revs, return a set of deltas and
1068 1068 metadata corresponding to nodes. the first delta is
1069 1069 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1070 1070 have this parent as it has all history before these
1071 1071 changesets. parent is parent[0]
1072 1072 """
1073 1073 revs = [self.rev(n) for n in nodelist]
1074 1074
1075 1075 # if we don't have any revisions touched by these changesets, bail
1076 1076 if not revs:
1077 1077 yield changegroup.closechunk()
1078 1078 return
1079 1079
1080 1080 # add the parent of the first rev
1081 1081 p = self.parents(self.node(revs[0]))[0]
1082 1082 revs.insert(0, self.rev(p))
1083 1083
1084 1084 # build deltas
1085 1085 for d in xrange(0, len(revs) - 1):
1086 1086 a, b = revs[d], revs[d + 1]
1087 1087 nb = self.node(b)
1088 1088
1089 1089 if infocollect is not None:
1090 1090 infocollect(nb)
1091 1091
1092 1092 d = self.revdiff(a, b)
1093 1093 p = self.parents(nb)
1094 1094 meta = nb + p[0] + p[1] + lookup(nb)
1095 1095 yield changegroup.genchunk("%s%s" % (meta, d))
1096 1096
1097 1097 yield changegroup.closechunk()
1098 1098
1099 1099 def addgroup(self, revs, linkmapper, transaction, unique=0):
1100 1100 """
1101 1101 add a delta group
1102 1102
1103 1103 given a set of deltas, add them to the revision log. the
1104 1104 first delta is against its parent, which should be in our
1105 1105 log, the rest are against the previous delta.
1106 1106 """
1107 1107
1108 1108 #track the base of the current delta log
1109 1109 r = self.count()
1110 1110 t = r - 1
1111 1111 node = None
1112 1112
1113 1113 base = prev = nullrev
1114 1114 start = end = textlen = 0
1115 1115 if r:
1116 1116 end = self.end(t)
1117 1117
1118 1118 ifh = self.opener(self.indexfile, "a+")
1119 1119 ifh.seek(0, 2)
1120 1120 transaction.add(self.indexfile, ifh.tell(), self.count())
1121 if self._inline():
1121 if self._inline:
1122 1122 dfh = None
1123 1123 else:
1124 1124 transaction.add(self.datafile, end)
1125 1125 dfh = self.opener(self.datafile, "a")
1126 1126
1127 1127 # loop through our set of deltas
1128 1128 chain = None
1129 1129 for chunk in revs:
1130 1130 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1131 1131 link = linkmapper(cs)
1132 1132 if node in self.nodemap:
1133 1133 # this can happen if two branches make the same change
1134 1134 # if unique:
1135 1135 # raise RevlogError(_("already have %s") % hex(node[:4]))
1136 1136 chain = node
1137 1137 continue
1138 1138 delta = chunk[80:]
1139 1139
1140 1140 for p in (p1, p2):
1141 1141 if not p in self.nodemap:
1142 1142 raise LookupError(_("unknown parent %s") % short(p))
1143 1143
1144 1144 if not chain:
1145 1145 # retrieve the parent revision of the delta chain
1146 1146 chain = p1
1147 1147 if not chain in self.nodemap:
1148 1148 raise LookupError(_("unknown base %s") % short(chain[:4]))
1149 1149
1150 1150 # full versions are inserted when the needed deltas become
1151 1151 # comparable to the uncompressed text or when the previous
1152 1152 # version is not the one we have a delta against. We use
1153 1153 # the size of the previous full rev as a proxy for the
1154 1154 # current size.
1155 1155
1156 1156 if chain == prev:
1157 1157 tempd = compress(delta)
1158 1158 cdelta = tempd[0] + tempd[1]
1159 1159 textlen = mdiff.patchedsize(textlen, delta)
1160 1160
1161 1161 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
1162 1162 # flush our writes here so we can read it in revision
1163 1163 if dfh:
1164 1164 dfh.flush()
1165 1165 ifh.flush()
1166 1166 text = self.revision(chain)
1167 1167 text = self.patches(text, [delta])
1168 1168 chk = self._addrevision(text, transaction, link, p1, p2, None,
1169 1169 ifh, dfh)
1170 if not dfh and not self._inline():
1170 if not dfh and not self._inline:
1171 1171 # addrevision switched from inline to conventional
1172 1172 # reopen the index
1173 1173 dfh = self.opener(self.datafile, "a")
1174 1174 ifh = self.opener(self.indexfile, "a")
1175 1175 if chk != node:
1176 1176 raise RevlogError(_("consistency error adding group"))
1177 1177 textlen = len(text)
1178 1178 else:
1179 1179 e = (offset_type(end, 0), len(cdelta), textlen, base,
1180 1180 link, self.rev(p1), self.rev(p2), node)
1181 1181 self.index.insert(-1, e)
1182 1182 self.nodemap[node] = r
1183 if self._inline():
1183 if self._inline:
1184 1184 ifh.write(struct.pack(indexformatng, *e))
1185 1185 ifh.write(cdelta)
1186 1186 self.checkinlinesize(transaction, ifh)
1187 if not self._inline():
1187 if not self._inline:
1188 1188 dfh = self.opener(self.datafile, "a")
1189 1189 ifh = self.opener(self.indexfile, "a")
1190 1190 else:
1191 1191 if self.version == REVLOGV0:
1192 1192 e = (end, len(cdelta), base, link, p1, p2, node)
1193 1193 entry = struct.pack(indexformatv0, *e)
1194 1194 else:
1195 1195 entry = struct.pack(indexformatng, *e)
1196 1196 dfh.write(cdelta)
1197 1197 ifh.write(entry)
1198 1198
1199 1199 t, r, chain, prev = r, r + 1, node, node
1200 1200 base = self.base(t)
1201 1201 start = self.start(base)
1202 1202 end = self.end(t)
1203 1203
1204 1204 return node
1205 1205
1206 1206 def strip(self, rev, minlink):
1207 1207 if self.count() == 0 or rev >= self.count():
1208 1208 return
1209 1209
1210 1210 if isinstance(self.index, lazyindex):
1211 1211 self._loadindexmap()
1212 1212
1213 1213 # When stripping away a revision, we need to make sure it
1214 1214 # does not actually belong to an older changeset.
1215 1215 # The minlink parameter defines the oldest revision
1216 1216 # we're allowed to strip away.
1217 1217 while minlink > self.index[rev][4]:
1218 1218 rev += 1
1219 1219 if rev >= self.count():
1220 1220 return
1221 1221
1222 1222 # first truncate the files on disk
1223 1223 end = self.start(rev)
1224 if not self._inline():
1224 if not self._inline:
1225 1225 df = self.opener(self.datafile, "a")
1226 1226 df.truncate(end)
1227 1227 end = rev * self._io.size
1228 1228 else:
1229 1229 end += rev * self._io.size
1230 1230
1231 1231 indexf = self.opener(self.indexfile, "a")
1232 1232 indexf.truncate(end)
1233 1233
1234 1234 # then reset internal state in memory to forget those revisions
1235 1235 self.cache = None
1236 1236 self._io.chunkcache = None
1237 1237 for x in xrange(rev, self.count()):
1238 1238 del self.nodemap[self.node(x)]
1239 1239
1240 1240 del self.index[rev:-1]
1241 1241
1242 1242 def checksize(self):
1243 1243 expected = 0
1244 1244 if self.count():
1245 1245 expected = self.end(self.count() - 1)
1246 1246
1247 1247 try:
1248 1248 f = self.opener(self.datafile)
1249 1249 f.seek(0, 2)
1250 1250 actual = f.tell()
1251 1251 dd = actual - expected
1252 1252 except IOError, inst:
1253 1253 if inst.errno != errno.ENOENT:
1254 1254 raise
1255 1255 dd = 0
1256 1256
1257 1257 try:
1258 1258 f = self.opener(self.indexfile)
1259 1259 f.seek(0, 2)
1260 1260 actual = f.tell()
1261 1261 s = self._io.size
1262 1262 i = actual / s
1263 1263 di = actual - (i * s)
1264 if self._inline():
1264 if self._inline:
1265 1265 databytes = 0
1266 1266 for r in xrange(self.count()):
1267 1267 databytes += self.length(r)
1268 1268 dd = 0
1269 1269 di = actual - self.count() * s - databytes
1270 1270 except IOError, inst:
1271 1271 if inst.errno != errno.ENOENT:
1272 1272 raise
1273 1273 di = 0
1274 1274
1275 1275 return (dd, di)
1276 1276
1277 1277
General Comments 0
You need to be logged in to leave comments. Login now