##// END OF EJS Templates
revlog: parse revlogv0 indexes into v1 internally...
Matt Mackall -
r4977:6cb30bc4 default
parent child Browse files
Show More
@@ -1,1293 +1,1292 b''
1 1 """
2 2 revlog.py - storage back-end for mercurial
3 3
4 4 This provides efficient delta storage with O(1) retrieve and append
5 5 and O(changes) merge between branches
6 6
7 7 Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
8 8
9 9 This software may be used and distributed according to the terms
10 10 of the GNU General Public License, incorporated herein by reference.
11 11 """
12 12
13 13 from node import *
14 14 from i18n import _
15 15 import binascii, changegroup, errno, ancestor, mdiff, os
16 16 import sha, struct, util, zlib
17 17
18 18 # revlog version strings
19 19 REVLOGV0 = 0
20 20 REVLOGNG = 1
21 21
22 22 # revlog flags
23 23 REVLOGNGINLINEDATA = (1 << 16)
24 24 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
25 25
26 26 REVLOG_DEFAULT_FORMAT = REVLOGNG
27 27 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
28 28
29 29 def hash(text, p1, p2):
30 30 """generate a hash from the given text and its parent hashes
31 31
32 32 This hash combines both the current file contents and its history
33 33 in a manner that makes it easy to distinguish nodes with the same
34 34 content in the revision graph.
35 35 """
36 36 l = [p1, p2]
37 37 l.sort()
38 38 s = sha.new(l[0])
39 39 s.update(l[1])
40 40 s.update(text)
41 41 return s.digest()
42 42
43 43 def compress(text):
44 44 """ generate a possibly-compressed representation of text """
45 45 if not text: return ("", text)
46 46 if len(text) < 44:
47 47 if text[0] == '\0': return ("", text)
48 48 return ('u', text)
49 49 bin = zlib.compress(text)
50 50 if len(bin) > len(text):
51 51 if text[0] == '\0': return ("", text)
52 52 return ('u', text)
53 53 return ("", bin)
54 54
55 55 def decompress(bin):
56 56 """ decompress the given input """
57 57 if not bin: return bin
58 58 t = bin[0]
59 59 if t == '\0': return bin
60 60 if t == 'x': return zlib.decompress(bin)
61 61 if t == 'u': return bin[1:]
62 62 raise RevlogError(_("unknown compression type %r") % t)
63 63
64 64 indexformatv0 = ">4l20s20s20s"
65 65 v0shaoffset = 56
66 66 # index ng:
67 67 # 6 bytes offset
68 68 # 2 bytes flags
69 69 # 4 bytes compressed length
70 70 # 4 bytes uncompressed length
71 71 # 4 bytes: base rev
72 72 # 4 bytes link rev
73 73 # 4 bytes parent 1 rev
74 74 # 4 bytes parent 2 rev
75 75 # 32 bytes: nodeid
76 76 indexformatng = ">Qiiiiii20s12x"
77 77 ngshaoffset = 32
78 78 versionformat = ">I"
79 79
80 80 class lazyparser(object):
81 81 """
82 82 this class avoids the need to parse the entirety of large indices
83 83 """
84 84
85 85 # lazyparser is not safe to use on windows if win32 extensions not
86 86 # available. it keeps file handle open, which make it not possible
87 87 # to break hardlinks on local cloned repos.
88 88 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
89 89 hasattr(util, 'win32api'))
90 90
91 91 def __init__(self, dataf, size):
92 92 self.dataf = dataf
93 93 self.s = struct.calcsize(indexformatng)
94 94 self.datasize = size
95 95 self.l = size/self.s
96 96 self.index = [None] * self.l
97 97 self.map = {nullid: nullrev}
98 98 self.allmap = 0
99 99 self.all = 0
100 100 self.mapfind_count = 0
101 101
102 102 def loadmap(self):
103 103 """
104 104 during a commit, we need to make sure the rev being added is
105 105 not a duplicate. This requires loading the entire index,
106 106 which is fairly slow. loadmap can load up just the node map,
107 107 which takes much less time.
108 108 """
109 109 if self.allmap: return
110 110 end = self.datasize
111 111 self.allmap = 1
112 112 cur = 0
113 113 count = 0
114 114 blocksize = self.s * 256
115 115 self.dataf.seek(0)
116 116 while cur < end:
117 117 data = self.dataf.read(blocksize)
118 118 off = 0
119 119 for x in xrange(256):
120 120 n = data[off + ngshaoffset:off + ngshaoffset + 20]
121 121 self.map[n] = count
122 122 count += 1
123 123 if count >= self.l:
124 124 break
125 125 off += self.s
126 126 cur += blocksize
127 127
128 128 def loadblock(self, blockstart, blocksize, data=None):
129 129 if self.all: return
130 130 if data is None:
131 131 self.dataf.seek(blockstart)
132 132 if blockstart + blocksize > self.datasize:
133 133 # the revlog may have grown since we've started running,
134 134 # but we don't have space in self.index for more entries.
135 135 # limit blocksize so that we don't get too much data.
136 136 blocksize = max(self.datasize - blockstart, 0)
137 137 data = self.dataf.read(blocksize)
138 138 lend = len(data) / self.s
139 139 i = blockstart / self.s
140 140 off = 0
141 141 # lazyindex supports __delitem__
142 142 if lend > len(self.index) - i:
143 143 lend = len(self.index) - i
144 144 for x in xrange(lend):
145 145 if self.index[i + x] == None:
146 146 b = data[off : off + self.s]
147 147 self.index[i + x] = b
148 148 n = b[ngshaoffset:ngshaoffset + 20]
149 149 self.map[n] = i + x
150 150 off += self.s
151 151
152 152 def findnode(self, node):
153 153 """search backwards through the index file for a specific node"""
154 154 if self.allmap: return None
155 155
156 156 # hg log will cause many many searches for the manifest
157 157 # nodes. After we get called a few times, just load the whole
158 158 # thing.
159 159 if self.mapfind_count > 8:
160 160 self.loadmap()
161 161 if node in self.map:
162 162 return node
163 163 return None
164 164 self.mapfind_count += 1
165 165 last = self.l - 1
166 166 while self.index[last] != None:
167 167 if last == 0:
168 168 self.all = 1
169 169 self.allmap = 1
170 170 return None
171 171 last -= 1
172 172 end = (last + 1) * self.s
173 173 blocksize = self.s * 256
174 174 while end >= 0:
175 175 start = max(end - blocksize, 0)
176 176 self.dataf.seek(start)
177 177 data = self.dataf.read(end - start)
178 178 findend = end - start
179 179 while True:
180 180 # we're searching backwards, so weh have to make sure
181 181 # we don't find a changeset where this node is a parent
182 182 off = data.rfind(node, 0, findend)
183 183 findend = off
184 184 if off >= 0:
185 185 i = off / self.s
186 186 off = i * self.s
187 187 n = data[off + ngshaoffset:off + ngshaoffset + 20]
188 188 if n == node:
189 189 self.map[n] = i + start / self.s
190 190 return node
191 191 else:
192 192 break
193 193 end -= blocksize
194 194 return None
195 195
196 196 def loadindex(self, i=None, end=None):
197 197 if self.all: return
198 198 all = False
199 199 if i == None:
200 200 blockstart = 0
201 201 blocksize = (512 / self.s) * self.s
202 202 end = self.datasize
203 203 all = True
204 204 else:
205 205 if end:
206 206 blockstart = i * self.s
207 207 end = end * self.s
208 208 blocksize = end - blockstart
209 209 else:
210 210 blockstart = (i & ~63) * self.s
211 211 blocksize = self.s * 64
212 212 end = blockstart + blocksize
213 213 while blockstart < end:
214 214 self.loadblock(blockstart, blocksize)
215 215 blockstart += blocksize
216 216 if all: self.all = True
217 217
218 218 class lazyindex(object):
219 219 """a lazy version of the index array"""
220 220 def __init__(self, parser):
221 221 self.p = parser
222 222 def __len__(self):
223 223 return len(self.p.index)
224 224 def load(self, pos):
225 225 if pos < 0:
226 226 pos += len(self.p.index)
227 227 self.p.loadindex(pos)
228 228 return self.p.index[pos]
229 229 def __getitem__(self, pos):
230 230 ret = self.p.index[pos] or self.load(pos)
231 231 if isinstance(ret, str):
232 232 ret = struct.unpack(indexformatng, ret)
233 233 return ret
234 234 def __setitem__(self, pos, item):
235 235 self.p.index[pos] = item
236 236 def __delitem__(self, pos):
237 237 del self.p.index[pos]
238 238 def append(self, e):
239 239 self.p.index.append(e)
240 240
241 241 class lazymap(object):
242 242 """a lazy version of the node map"""
243 243 def __init__(self, parser):
244 244 self.p = parser
245 245 def load(self, key):
246 246 n = self.p.findnode(key)
247 247 if n == None:
248 248 raise KeyError(key)
249 249 def __contains__(self, key):
250 250 if key in self.p.map:
251 251 return True
252 252 self.p.loadmap()
253 253 return key in self.p.map
254 254 def __iter__(self):
255 255 yield nullid
256 256 for i in xrange(self.p.l):
257 257 ret = self.p.index[i]
258 258 if not ret:
259 259 self.p.loadindex(i)
260 260 ret = self.p.index[i]
261 261 if isinstance(ret, str):
262 262 ret = struct.unpack(indexformatng, ret)
263 263 yield ret[-1]
264 264 def __getitem__(self, key):
265 265 try:
266 266 return self.p.map[key]
267 267 except KeyError:
268 268 try:
269 269 self.load(key)
270 270 return self.p.map[key]
271 271 except KeyError:
272 272 raise KeyError("node " + hex(key))
273 273 def __setitem__(self, key, val):
274 274 self.p.map[key] = val
275 275 def __delitem__(self, key):
276 276 del self.p.map[key]
277 277
278 278 class RevlogError(Exception): pass
279 279 class LookupError(RevlogError): pass
280 280
281 281 def getoffset(q):
282 282 if q & 0xFFFF:
283 283 raise RevlogError(_('incompatible revision flag %x') % q)
284 284 return int(q >> 16)
285 285
286 286 def gettype(q):
287 287 return int(q & 0xFFFF)
288 288
289 289 def offset_type(offset, type):
290 290 return long(long(offset) << 16 | type)
291 291
292 292 class revlogoldio(object):
293 293 def __init__(self):
294 294 self.chunkcache = None
295 self.size = struct.calcsize(indexformatv0)
295 296
296 297 def parseindex(self, fp, st, inline):
297 s = struct.calcsize(indexformatv0)
298 s = self.size
298 299 index = []
299 300 nodemap = {nullid: nullrev}
300 301 n = off = 0
301 302 data = fp.read()
302 303 l = len(data)
303 304 while off + s <= l:
304 305 cur = data[off:off + s]
305 306 off += s
306 307 e = struct.unpack(indexformatv0, cur)
307 index.append(e)
308 nodemap[e[-1]] = n
308 # transform to revlogv1 format
309 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
310 nodemap[e[4]], nodemap[e[5]], e[6])
311 index.append(e2)
312 nodemap[e[6]] = n
309 313 n += 1
310 314
311 315 return index, nodemap
312 316
313 317 class revlogio(object):
314 318 def __init__(self):
315 319 self.chunkcache = None
320 self.size = struct.calcsize(indexformatng)
316 321
317 322 def parseindex(self, fp, st, inline):
318 323 if (lazyparser.safe_to_use and not inline and
319 324 st and st.st_size > 1000000):
320 325 # big index, let's parse it on demand
321 326 parser = lazyparser(fp, st.st_size)
322 327 index = lazyindex(parser)
323 328 nodemap = lazymap(parser)
324 329 e = list(index[0])
325 330 type = gettype(e[0])
326 331 e[0] = offset_type(0, type)
327 332 index[0] = e
328 333 return index, nodemap
329 334
330 s = struct.calcsize(indexformatng)
335 s = self.size
331 336 index = []
332 337 nodemap = {nullid: nullrev}
333 338 n = off = 0
334 339 # if we're not using lazymap, always read the whole index
335 340 data = fp.read()
336 341 l = len(data)
337 342 if inline:
338 343 self.chunkcache = (0, data)
339 344 while off + s <= l:
340 345 e = struct.unpack(indexformatng, data[off:off + s])
341 346 index.append(e)
342 347 nodemap[e[-1]] = n
343 348 n += 1
344 349 off += s
345 350 if inline:
346 351 if e[1] < 0:
347 352 break
348 353 off += e[1]
349 354
350 355 e = list(index[0])
351 356 type = gettype(e[0])
352 357 e[0] = offset_type(0, type)
353 358 index[0] = e
354 359
355 360 return index, nodemap
356 361
357 362 class revlog(object):
358 363 """
359 364 the underlying revision storage object
360 365
361 366 A revlog consists of two parts, an index and the revision data.
362 367
363 368 The index is a file with a fixed record size containing
364 369 information on each revision, includings its nodeid (hash), the
365 370 nodeids of its parents, the position and offset of its data within
366 371 the data file, and the revision it's based on. Finally, each entry
367 372 contains a linkrev entry that can serve as a pointer to external
368 373 data.
369 374
370 375 The revision data itself is a linear collection of data chunks.
371 376 Each chunk represents a revision and is usually represented as a
372 377 delta against the previous chunk. To bound lookup time, runs of
373 378 deltas are limited to about 2 times the length of the original
374 379 version data. This makes retrieval of a version proportional to
375 380 its size, or O(1) relative to the number of revisions.
376 381
377 382 Both pieces of the revlog are written to in an append-only
378 383 fashion, which means we never need to rewrite a file to insert or
379 384 remove data, and can use some simple techniques to avoid the need
380 385 for locking while reading.
381 386 """
382 387 def __init__(self, opener, indexfile):
383 388 """
384 389 create a revlog object
385 390
386 391 opener is a function that abstracts the file opening operation
387 392 and can be used to implement COW semantics or the like.
388 393 """
389 394 self.indexfile = indexfile
390 395 self.datafile = indexfile[:-2] + ".d"
391 396 self.opener = opener
392 397
393 398 self.indexstat = None
394 399 self.cache = None
395 400 self.defversion = REVLOG_DEFAULT_VERSION
396 401 if hasattr(opener, "defversion"):
397 402 self.defversion = opener.defversion
398 403 if self.defversion & REVLOGNG:
399 404 self.defversion |= REVLOGNGINLINEDATA
400 405 self._load()
401 406
402 407 def _load(self):
403 408 v = self.defversion
404 409 try:
405 410 f = self.opener(self.indexfile)
406 411 i = f.read(4)
407 412 f.seek(0)
408 413 if len(i) > 0:
409 414 v = struct.unpack(versionformat, i)[0]
410 415 except IOError, inst:
411 416 if inst.errno != errno.ENOENT:
412 417 raise
413 418 i = ""
414 419 else:
415 420 try:
416 421 st = util.fstat(f)
417 422 except AttributeError, inst:
418 423 st = None
419 424 else:
420 425 oldst = self.indexstat
421 426 if (oldst and st.st_dev == oldst.st_dev
422 427 and st.st_ino == oldst.st_ino
423 428 and st.st_mtime == oldst.st_mtime
424 429 and st.st_ctime == oldst.st_ctime
425 430 and st.st_size == oldst.st_size):
426 431 return
427 432 self.indexstat = st
428 433 flags = v & ~0xFFFF
429 434 fmt = v & 0xFFFF
430 435 if fmt == REVLOGV0:
431 436 if flags:
432 437 raise RevlogError(_("index %s unknown flags %#04x for format v0")
433 438 % (self.indexfile, flags >> 16))
434 439 elif fmt == REVLOGNG:
435 440 if flags & ~REVLOGNGINLINEDATA:
436 441 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
437 442 % (self.indexfile, flags >> 16))
438 443 else:
439 444 raise RevlogError(_("index %s unknown format %d")
440 445 % (self.indexfile, fmt))
441 446 self.version = v
442 447 self.nodemap = {nullid: nullrev}
443 448 self.index = []
444 449 self._io = revlogio()
445 self.indexformat = indexformatng
446 450 if self.version == REVLOGV0:
447 451 self._io = revlogoldio()
448 self.indexformat = indexformatv0
449 452 if i:
450 453 self.index, self.nodemap = self._io.parseindex(f, st, self._inline())
451 454
452 455 def _loadindex(self, start, end):
453 456 """load a block of indexes all at once from the lazy parser"""
454 457 if isinstance(self.index, lazyindex):
455 458 self.index.p.loadindex(start, end)
456 459
457 460 def _loadindexmap(self):
458 461 """loads both the map and the index from the lazy parser"""
459 462 if isinstance(self.index, lazyindex):
460 463 p = self.index.p
461 464 p.loadindex()
462 465 self.nodemap = p.map
463 466
464 467 def _loadmap(self):
465 468 """loads the map from the lazy parser"""
466 469 if isinstance(self.nodemap, lazymap):
467 470 self.nodemap.p.loadmap()
468 471 self.nodemap = self.nodemap.p.map
469 472
470 473 def _inline(self): return self.version & REVLOGNGINLINEDATA
471 474
472 475 def tip(self): return self.node(len(self.index) - 1)
473 476 def count(self): return len(self.index)
474 477 def node(self, rev):
475 478 return rev == nullrev and nullid or self.index[rev][-1]
476 479 def rev(self, node):
477 480 try:
478 481 return self.nodemap[node]
479 482 except KeyError:
480 483 raise LookupError(_('%s: no node %s') % (self.indexfile, hex(node)))
481 484 def linkrev(self, node):
482 485 return (node == nullid) and nullrev or self.index[self.rev(node)][-4]
483 486 def parents(self, node):
484 487 if node == nullid: return (nullid, nullid)
485 488 r = self.rev(node)
486 489 d = self.index[r][-3:-1]
487 if self.version == REVLOGV0:
488 return d
489 490 return (self.node(d[0]), self.node(d[1]))
490 491 def parentrevs(self, rev):
491 492 if rev == nullrev:
492 493 return (nullrev, nullrev)
493 494 d = self.index[rev][-3:-1]
494 if self.version == REVLOGV0:
495 return (self.rev(d[0]), self.rev(d[1]))
496 495 return d
497 496 def start(self, rev):
498 497 if rev == nullrev:
499 498 return 0
500 if self.version != REVLOGV0:
501 return getoffset(self.index[rev][0])
502 return self.index[rev][0]
499 return getoffset(self.index[rev][0])
503 500
504 501 def end(self, rev): return self.start(rev) + self.length(rev)
505 502
506 503 def size(self, rev):
507 504 """return the length of the uncompressed text for a given revision"""
508 505 if rev == nullrev:
509 506 return 0
510 l = -1
511 if self.version != REVLOGV0:
512 l = self.index[rev][2]
507 l = self.index[rev][2]
513 508 if l >= 0:
514 509 return l
515 510
516 511 t = self.revision(self.node(rev))
517 512 return len(t)
518 513
519 514 # alternate implementation, The advantage to this code is it
520 515 # will be faster for a single revision. But, the results are not
521 516 # cached, so finding the size of every revision will be slower.
522 517 """
523 518 if self.cache and self.cache[1] == rev:
524 519 return len(self.cache[2])
525 520
526 521 base = self.base(rev)
527 522 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
528 523 base = self.cache[1]
529 524 text = self.cache[2]
530 525 else:
531 526 text = self.revision(self.node(base))
532 527
533 528 l = len(text)
534 529 for x in xrange(base + 1, rev + 1):
535 530 l = mdiff.patchedsize(l, self.chunk(x))
536 531 return l
537 532 """
538 533
539 534 def length(self, rev):
540 535 if rev == nullrev:
541 536 return 0
542 537 else:
543 538 return self.index[rev][1]
544 539 def base(self, rev):
545 540 if (rev == nullrev):
546 541 return nullrev
547 542 else:
548 543 return self.index[rev][-5]
549 544
550 545 def reachable(self, node, stop=None):
551 546 """return a hash of all nodes ancestral to a given node, including
552 547 the node itself, stopping when stop is matched"""
553 548 reachable = {}
554 549 visit = [node]
555 550 reachable[node] = 1
556 551 if stop:
557 552 stopn = self.rev(stop)
558 553 else:
559 554 stopn = 0
560 555 while visit:
561 556 n = visit.pop(0)
562 557 if n == stop:
563 558 continue
564 559 if n == nullid:
565 560 continue
566 561 for p in self.parents(n):
567 562 if self.rev(p) < stopn:
568 563 continue
569 564 if p not in reachable:
570 565 reachable[p] = 1
571 566 visit.append(p)
572 567 return reachable
573 568
574 569 def nodesbetween(self, roots=None, heads=None):
575 570 """Return a tuple containing three elements. Elements 1 and 2 contain
576 571 a final list bases and heads after all the unreachable ones have been
577 572 pruned. Element 0 contains a topologically sorted list of all
578 573
579 574 nodes that satisfy these constraints:
580 575 1. All nodes must be descended from a node in roots (the nodes on
581 576 roots are considered descended from themselves).
582 577 2. All nodes must also be ancestors of a node in heads (the nodes in
583 578 heads are considered to be their own ancestors).
584 579
585 580 If roots is unspecified, nullid is assumed as the only root.
586 581 If heads is unspecified, it is taken to be the output of the
587 582 heads method (i.e. a list of all nodes in the repository that
588 583 have no children)."""
589 584 nonodes = ([], [], [])
590 585 if roots is not None:
591 586 roots = list(roots)
592 587 if not roots:
593 588 return nonodes
594 589 lowestrev = min([self.rev(n) for n in roots])
595 590 else:
596 591 roots = [nullid] # Everybody's a descendent of nullid
597 592 lowestrev = nullrev
598 593 if (lowestrev == nullrev) and (heads is None):
599 594 # We want _all_ the nodes!
600 595 return ([self.node(r) for r in xrange(0, self.count())],
601 596 [nullid], list(self.heads()))
602 597 if heads is None:
603 598 # All nodes are ancestors, so the latest ancestor is the last
604 599 # node.
605 600 highestrev = self.count() - 1
606 601 # Set ancestors to None to signal that every node is an ancestor.
607 602 ancestors = None
608 603 # Set heads to an empty dictionary for later discovery of heads
609 604 heads = {}
610 605 else:
611 606 heads = list(heads)
612 607 if not heads:
613 608 return nonodes
614 609 ancestors = {}
615 610 # Turn heads into a dictionary so we can remove 'fake' heads.
616 611 # Also, later we will be using it to filter out the heads we can't
617 612 # find from roots.
618 613 heads = dict.fromkeys(heads, 0)
619 614 # Start at the top and keep marking parents until we're done.
620 615 nodestotag = heads.keys()
621 616 # Remember where the top was so we can use it as a limit later.
622 617 highestrev = max([self.rev(n) for n in nodestotag])
623 618 while nodestotag:
624 619 # grab a node to tag
625 620 n = nodestotag.pop()
626 621 # Never tag nullid
627 622 if n == nullid:
628 623 continue
629 624 # A node's revision number represents its place in a
630 625 # topologically sorted list of nodes.
631 626 r = self.rev(n)
632 627 if r >= lowestrev:
633 628 if n not in ancestors:
634 629 # If we are possibly a descendent of one of the roots
635 630 # and we haven't already been marked as an ancestor
636 631 ancestors[n] = 1 # Mark as ancestor
637 632 # Add non-nullid parents to list of nodes to tag.
638 633 nodestotag.extend([p for p in self.parents(n) if
639 634 p != nullid])
640 635 elif n in heads: # We've seen it before, is it a fake head?
641 636 # So it is, real heads should not be the ancestors of
642 637 # any other heads.
643 638 heads.pop(n)
644 639 if not ancestors:
645 640 return nonodes
646 641 # Now that we have our set of ancestors, we want to remove any
647 642 # roots that are not ancestors.
648 643
649 644 # If one of the roots was nullid, everything is included anyway.
650 645 if lowestrev > nullrev:
651 646 # But, since we weren't, let's recompute the lowest rev to not
652 647 # include roots that aren't ancestors.
653 648
654 649 # Filter out roots that aren't ancestors of heads
655 650 roots = [n for n in roots if n in ancestors]
656 651 # Recompute the lowest revision
657 652 if roots:
658 653 lowestrev = min([self.rev(n) for n in roots])
659 654 else:
660 655 # No more roots? Return empty list
661 656 return nonodes
662 657 else:
663 658 # We are descending from nullid, and don't need to care about
664 659 # any other roots.
665 660 lowestrev = nullrev
666 661 roots = [nullid]
667 662 # Transform our roots list into a 'set' (i.e. a dictionary where the
668 663 # values don't matter.
669 664 descendents = dict.fromkeys(roots, 1)
670 665 # Also, keep the original roots so we can filter out roots that aren't
671 666 # 'real' roots (i.e. are descended from other roots).
672 667 roots = descendents.copy()
673 668 # Our topologically sorted list of output nodes.
674 669 orderedout = []
675 670 # Don't start at nullid since we don't want nullid in our output list,
676 671 # and if nullid shows up in descedents, empty parents will look like
677 672 # they're descendents.
678 673 for r in xrange(max(lowestrev, 0), highestrev + 1):
679 674 n = self.node(r)
680 675 isdescendent = False
681 676 if lowestrev == nullrev: # Everybody is a descendent of nullid
682 677 isdescendent = True
683 678 elif n in descendents:
684 679 # n is already a descendent
685 680 isdescendent = True
686 681 # This check only needs to be done here because all the roots
687 682 # will start being marked is descendents before the loop.
688 683 if n in roots:
689 684 # If n was a root, check if it's a 'real' root.
690 685 p = tuple(self.parents(n))
691 686 # If any of its parents are descendents, it's not a root.
692 687 if (p[0] in descendents) or (p[1] in descendents):
693 688 roots.pop(n)
694 689 else:
695 690 p = tuple(self.parents(n))
696 691 # A node is a descendent if either of its parents are
697 692 # descendents. (We seeded the dependents list with the roots
698 693 # up there, remember?)
699 694 if (p[0] in descendents) or (p[1] in descendents):
700 695 descendents[n] = 1
701 696 isdescendent = True
702 697 if isdescendent and ((ancestors is None) or (n in ancestors)):
703 698 # Only include nodes that are both descendents and ancestors.
704 699 orderedout.append(n)
705 700 if (ancestors is not None) and (n in heads):
706 701 # We're trying to figure out which heads are reachable
707 702 # from roots.
708 703 # Mark this head as having been reached
709 704 heads[n] = 1
710 705 elif ancestors is None:
711 706 # Otherwise, we're trying to discover the heads.
712 707 # Assume this is a head because if it isn't, the next step
713 708 # will eventually remove it.
714 709 heads[n] = 1
715 710 # But, obviously its parents aren't.
716 711 for p in self.parents(n):
717 712 heads.pop(p, None)
718 713 heads = [n for n in heads.iterkeys() if heads[n] != 0]
719 714 roots = roots.keys()
720 715 assert orderedout
721 716 assert roots
722 717 assert heads
723 718 return (orderedout, roots, heads)
724 719
725 720 def heads(self, start=None, stop=None):
726 721 """return the list of all nodes that have no children
727 722
728 723 if start is specified, only heads that are descendants of
729 724 start will be returned
730 725 if stop is specified, it will consider all the revs from stop
731 726 as if they had no children
732 727 """
733 728 if start is None:
734 729 start = nullid
735 730 if stop is None:
736 731 stop = []
737 732 stoprevs = dict.fromkeys([self.rev(n) for n in stop])
738 733 startrev = self.rev(start)
739 734 reachable = {startrev: 1}
740 735 heads = {startrev: 1}
741 736
742 737 parentrevs = self.parentrevs
743 738 for r in xrange(startrev + 1, self.count()):
744 739 for p in parentrevs(r):
745 740 if p in reachable:
746 741 if r not in stoprevs:
747 742 reachable[r] = 1
748 743 heads[r] = 1
749 744 if p in heads and p not in stoprevs:
750 745 del heads[p]
751 746
752 747 return [self.node(r) for r in heads]
753 748
754 749 def children(self, node):
755 750 """find the children of a given node"""
756 751 c = []
757 752 p = self.rev(node)
758 753 for r in range(p + 1, self.count()):
759 754 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
760 755 if prevs:
761 756 for pr in prevs:
762 757 if pr == p:
763 758 c.append(self.node(r))
764 759 elif p == nullrev:
765 760 c.append(self.node(r))
766 761 return c
767 762
768 763 def _match(self, id):
769 764 if isinstance(id, (long, int)):
770 765 # rev
771 766 return self.node(id)
772 767 if len(id) == 20:
773 768 # possibly a binary node
774 769 # odds of a binary node being all hex in ASCII are 1 in 10**25
775 770 try:
776 771 node = id
777 772 r = self.rev(node) # quick search the index
778 773 return node
779 774 except LookupError:
780 775 pass # may be partial hex id
781 776 try:
782 777 # str(rev)
783 778 rev = int(id)
784 779 if str(rev) != id: raise ValueError
785 780 if rev < 0: rev = self.count() + rev
786 781 if rev < 0 or rev >= self.count(): raise ValueError
787 782 return self.node(rev)
788 783 except (ValueError, OverflowError):
789 784 pass
790 785 if len(id) == 40:
791 786 try:
792 787 # a full hex nodeid?
793 788 node = bin(id)
794 789 r = self.rev(node)
795 790 return node
796 791 except TypeError:
797 792 pass
798 793
799 794 def _partialmatch(self, id):
800 795 if len(id) < 40:
801 796 try:
802 797 # hex(node)[:...]
803 798 bin_id = bin(id[:len(id) & ~1]) # grab an even number of digits
804 799 node = None
805 800 for n in self.nodemap:
806 801 if n.startswith(bin_id) and hex(n).startswith(id):
807 802 if node is not None:
808 803 raise LookupError(_("Ambiguous identifier"))
809 804 node = n
810 805 if node is not None:
811 806 return node
812 807 except TypeError:
813 808 pass
814 809
815 810 def lookup(self, id):
816 811 """locate a node based on:
817 812 - revision number or str(revision number)
818 813 - nodeid or subset of hex nodeid
819 814 """
820 815
821 816 n = self._match(id)
822 817 if n is not None:
823 818 return n
824 819 n = self._partialmatch(id)
825 820 if n:
826 821 return n
827 822
828 823 raise LookupError(_("No match found"))
829 824
830 825 def cmp(self, node, text):
831 826 """compare text with a given file revision"""
832 827 p1, p2 = self.parents(node)
833 828 return hash(text, p1, p2) != node
834 829
835 830 def diff(self, a, b):
836 831 """return a delta between two revisions"""
837 832 return mdiff.textdiff(a, b)
838 833
839 834 def patches(self, t, pl):
840 835 """apply a list of patches to a string"""
841 836 return mdiff.patches(t, pl)
842 837
843 838 def chunk(self, rev, df=None, cachelen=4096):
844 839 start, length = self.start(rev), self.length(rev)
845 840 inline = self._inline()
846 841 if inline:
847 start += (rev + 1) * struct.calcsize(self.indexformat)
842 start += (rev + 1) * self._io.size
848 843 end = start + length
849 844 def loadcache(df):
850 845 cache_length = max(cachelen, length) # 4k
851 846 if not df:
852 847 if inline:
853 848 df = self.opener(self.indexfile)
854 849 else:
855 850 df = self.opener(self.datafile)
856 851 df.seek(start)
857 852 self._io.chunkcache = (start, df.read(cache_length))
858 853
859 854 if not self._io.chunkcache:
860 855 loadcache(df)
861 856
862 857 cache_start = self._io.chunkcache[0]
863 858 cache_end = cache_start + len(self._io.chunkcache[1])
864 859 if start >= cache_start and end <= cache_end:
865 860 # it is cached
866 861 offset = start - cache_start
867 862 else:
868 863 loadcache(df)
869 864 offset = 0
870 865
871 866 #def checkchunk():
872 867 # df = self.opener(self.datafile)
873 868 # df.seek(start)
874 869 # return df.read(length)
875 870 #assert s == checkchunk()
876 871 return decompress(self._io.chunkcache[1][offset:offset + length])
877 872
878 873 def delta(self, node):
879 874 """return or calculate a delta between a node and its predecessor"""
880 875 r = self.rev(node)
881 876 return self.revdiff(r - 1, r)
882 877
883 878 def revdiff(self, rev1, rev2):
884 879 """return or calculate a delta between two revisions"""
885 880 b1 = self.base(rev1)
886 881 b2 = self.base(rev2)
887 882 if b1 == b2 and rev1 + 1 == rev2:
888 883 return self.chunk(rev2)
889 884 else:
890 885 return self.diff(self.revision(self.node(rev1)),
891 886 self.revision(self.node(rev2)))
892 887
893 888 def revision(self, node):
894 889 """return an uncompressed revision of a given"""
895 890 if node == nullid: return ""
896 891 if self.cache and self.cache[0] == node: return self.cache[2]
897 892
898 893 # look up what we need to read
899 894 text = None
900 895 rev = self.rev(node)
901 896 base = self.base(rev)
902 897
903 898 if self._inline():
904 899 # we probably have the whole chunk cached
905 900 df = None
906 901 else:
907 902 df = self.opener(self.datafile)
908 903
909 904 # do we have useful data cached?
910 905 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
911 906 base = self.cache[1]
912 907 text = self.cache[2]
913 908 self._loadindex(base, rev + 1)
914 909 else:
915 910 self._loadindex(base, rev + 1)
916 911 text = self.chunk(base, df=df)
917 912
918 913 bins = []
919 914 for r in xrange(base + 1, rev + 1):
920 915 bins.append(self.chunk(r, df=df))
921 916
922 917 text = self.patches(text, bins)
923 918
924 919 p1, p2 = self.parents(node)
925 920 if node != hash(text, p1, p2):
926 921 raise RevlogError(_("integrity check failed on %s:%d")
927 922 % (self.datafile, rev))
928 923
929 924 self.cache = (node, rev, text)
930 925 return text
931 926
932 927 def checkinlinesize(self, tr, fp=None):
933 928 if not self._inline():
934 929 return
935 930 if not fp:
936 931 fp = self.opener(self.indexfile, 'r')
937 932 fp.seek(0, 2)
938 933 size = fp.tell()
939 934 if size < 131072:
940 935 return
941 936 trinfo = tr.find(self.indexfile)
942 937 if trinfo == None:
943 938 raise RevlogError(_("%s not found in the transaction")
944 939 % self.indexfile)
945 940
946 941 trindex = trinfo[2]
947 942 dataoff = self.start(trindex)
948 943
949 944 tr.add(self.datafile, dataoff)
950 945 df = self.opener(self.datafile, 'w')
951 calc = struct.calcsize(self.indexformat)
946 calc = self._io.size
952 947 for r in xrange(self.count()):
953 948 start = self.start(r) + (r + 1) * calc
954 949 length = self.length(r)
955 950 fp.seek(start)
956 951 d = fp.read(length)
957 952 df.write(d)
958 953 fp.close()
959 954 df.close()
960 955 fp = self.opener(self.indexfile, 'w', atomictemp=True)
961 956 self.version &= ~(REVLOGNGINLINEDATA)
962 957 if self.count():
963 958 x = self.index[0]
964 e = struct.pack(self.indexformat, *x)[4:]
959 e = struct.pack(indexformatng, *x)[4:]
965 960 l = struct.pack(versionformat, self.version)
966 961 fp.write(l)
967 962 fp.write(e)
968 963
969 964 for i in xrange(1, self.count()):
970 965 x = self.index[i]
971 e = struct.pack(self.indexformat, *x)
966 e = struct.pack(indexformatng, *x)
972 967 fp.write(e)
973 968
974 969 # if we don't call rename, the temp file will never replace the
975 970 # real index
976 971 fp.rename()
977 972
978 973 tr.replace(self.indexfile, trindex * calc)
979 974 self._io.chunkcache = None
980 975
981 976 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
982 977 """add a revision to the log
983 978
984 979 text - the revision data to add
985 980 transaction - the transaction object used for rollback
986 981 link - the linkrev data to add
987 982 p1, p2 - the parent nodeids of the revision
988 983 d - an optional precomputed delta
989 984 """
990 985 if not self._inline():
991 986 dfh = self.opener(self.datafile, "a")
992 987 else:
993 988 dfh = None
994 989 ifh = self.opener(self.indexfile, "a+")
995 990 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
996 991
997 992 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
998 993 if text is None: text = ""
999 994 if p1 is None: p1 = self.tip()
1000 995 if p2 is None: p2 = nullid
1001 996
1002 997 node = hash(text, p1, p2)
1003 998
1004 999 if node in self.nodemap:
1005 1000 return node
1006 1001
1007 1002 n = self.count()
1008 1003 t = n - 1
1009 1004
1010 1005 if n:
1011 1006 base = self.base(t)
1012 1007 start = self.start(base)
1013 1008 end = self.end(t)
1014 1009 if not d:
1015 1010 prev = self.revision(self.tip())
1016 1011 d = self.diff(prev, text)
1017 1012 data = compress(d)
1018 1013 l = len(data[1]) + len(data[0])
1019 1014 dist = end - start + l
1020 1015
1021 1016 # full versions are inserted when the needed deltas
1022 1017 # become comparable to the uncompressed text
1023 1018 if not n or dist > len(text) * 2:
1024 1019 data = compress(text)
1025 1020 l = len(data[1]) + len(data[0])
1026 1021 base = n
1027 1022 else:
1028 1023 base = self.base(t)
1029 1024
1030 1025 offset = 0
1031 1026 if t >= 0:
1032 1027 offset = self.end(t)
1033 1028
1034 if self.version == REVLOGV0:
1035 e = (offset, l, base, link, p1, p2, node)
1036 else:
1037 e = (offset_type(offset, 0), l, len(text),
1038 base, link, self.rev(p1), self.rev(p2), node)
1029 e = (offset_type(offset, 0), l, len(text),
1030 base, link, self.rev(p1), self.rev(p2), node)
1039 1031
1040 1032 self.index.append(e)
1041 1033 self.nodemap[node] = n
1042 entry = struct.pack(self.indexformat, *e)
1034
1035 if self.version == REVLOGV0:
1036 e = (offset, l, base, link, p1, p2, node)
1037 entry = struct.pack(indexformatv0, *e)
1038 else:
1039 entry = struct.pack(indexformatng, *e)
1043 1040
1044 1041 if not self._inline():
1045 1042 transaction.add(self.datafile, offset)
1046 1043 transaction.add(self.indexfile, n * len(entry))
1047 1044 if data[0]:
1048 1045 dfh.write(data[0])
1049 1046 dfh.write(data[1])
1050 1047 dfh.flush()
1051 1048 else:
1052 1049 ifh.seek(0, 2)
1053 1050 transaction.add(self.indexfile, ifh.tell(), self.count() - 1)
1054 1051
1055 1052 if len(self.index) == 1 and self.version != REVLOGV0:
1056 1053 l = struct.pack(versionformat, self.version)
1057 1054 ifh.write(l)
1058 1055 entry = entry[4:]
1059 1056
1060 1057 ifh.write(entry)
1061 1058
1062 1059 if self._inline():
1063 1060 ifh.write(data[0])
1064 1061 ifh.write(data[1])
1065 1062 self.checkinlinesize(transaction, ifh)
1066 1063
1067 1064 self.cache = (node, n, text)
1068 1065 return node
1069 1066
1070 1067 def ancestor(self, a, b):
1071 1068 """calculate the least common ancestor of nodes a and b"""
1072 1069
1073 1070 def parents(rev):
1074 1071 return [p for p in self.parentrevs(rev) if p != nullrev]
1075 1072
1076 1073 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1077 1074 if c is None:
1078 1075 return nullid
1079 1076
1080 1077 return self.node(c)
1081 1078
1082 1079 def group(self, nodelist, lookup, infocollect=None):
1083 1080 """calculate a delta group
1084 1081
1085 1082 Given a list of changeset revs, return a set of deltas and
1086 1083 metadata corresponding to nodes. the first delta is
1087 1084 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1088 1085 have this parent as it has all history before these
1089 1086 changesets. parent is parent[0]
1090 1087 """
1091 1088 revs = [self.rev(n) for n in nodelist]
1092 1089
1093 1090 # if we don't have any revisions touched by these changesets, bail
1094 1091 if not revs:
1095 1092 yield changegroup.closechunk()
1096 1093 return
1097 1094
1098 1095 # add the parent of the first rev
1099 1096 p = self.parents(self.node(revs[0]))[0]
1100 1097 revs.insert(0, self.rev(p))
1101 1098
1102 1099 # build deltas
1103 1100 for d in xrange(0, len(revs) - 1):
1104 1101 a, b = revs[d], revs[d + 1]
1105 1102 nb = self.node(b)
1106 1103
1107 1104 if infocollect is not None:
1108 1105 infocollect(nb)
1109 1106
1110 1107 d = self.revdiff(a, b)
1111 1108 p = self.parents(nb)
1112 1109 meta = nb + p[0] + p[1] + lookup(nb)
1113 1110 yield changegroup.genchunk("%s%s" % (meta, d))
1114 1111
1115 1112 yield changegroup.closechunk()
1116 1113
1117 1114 def addgroup(self, revs, linkmapper, transaction, unique=0):
1118 1115 """
1119 1116 add a delta group
1120 1117
1121 1118 given a set of deltas, add them to the revision log. the
1122 1119 first delta is against its parent, which should be in our
1123 1120 log, the rest are against the previous delta.
1124 1121 """
1125 1122
1126 1123 #track the base of the current delta log
1127 1124 r = self.count()
1128 1125 t = r - 1
1129 1126 node = None
1130 1127
1131 1128 base = prev = nullrev
1132 1129 start = end = textlen = 0
1133 1130 if r:
1134 1131 end = self.end(t)
1135 1132
1136 1133 ifh = self.opener(self.indexfile, "a+")
1137 1134 ifh.seek(0, 2)
1138 1135 transaction.add(self.indexfile, ifh.tell(), self.count())
1139 1136 if self._inline():
1140 1137 dfh = None
1141 1138 else:
1142 1139 transaction.add(self.datafile, end)
1143 1140 dfh = self.opener(self.datafile, "a")
1144 1141
1145 1142 # loop through our set of deltas
1146 1143 chain = None
1147 1144 for chunk in revs:
1148 1145 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1149 1146 link = linkmapper(cs)
1150 1147 if node in self.nodemap:
1151 1148 # this can happen if two branches make the same change
1152 1149 # if unique:
1153 1150 # raise RevlogError(_("already have %s") % hex(node[:4]))
1154 1151 chain = node
1155 1152 continue
1156 1153 delta = chunk[80:]
1157 1154
1158 1155 for p in (p1, p2):
1159 1156 if not p in self.nodemap:
1160 1157 raise LookupError(_("unknown parent %s") % short(p))
1161 1158
1162 1159 if not chain:
1163 1160 # retrieve the parent revision of the delta chain
1164 1161 chain = p1
1165 1162 if not chain in self.nodemap:
1166 1163 raise LookupError(_("unknown base %s") % short(chain[:4]))
1167 1164
1168 1165 # full versions are inserted when the needed deltas become
1169 1166 # comparable to the uncompressed text or when the previous
1170 1167 # version is not the one we have a delta against. We use
1171 1168 # the size of the previous full rev as a proxy for the
1172 1169 # current size.
1173 1170
1174 1171 if chain == prev:
1175 1172 tempd = compress(delta)
1176 1173 cdelta = tempd[0] + tempd[1]
1177 1174 textlen = mdiff.patchedsize(textlen, delta)
1178 1175
1179 1176 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
1180 1177 # flush our writes here so we can read it in revision
1181 1178 if dfh:
1182 1179 dfh.flush()
1183 1180 ifh.flush()
1184 1181 text = self.revision(chain)
1185 1182 text = self.patches(text, [delta])
1186 1183 chk = self._addrevision(text, transaction, link, p1, p2, None,
1187 1184 ifh, dfh)
1188 1185 if not dfh and not self._inline():
1189 1186 # addrevision switched from inline to conventional
1190 1187 # reopen the index
1191 1188 dfh = self.opener(self.datafile, "a")
1192 1189 ifh = self.opener(self.indexfile, "a")
1193 1190 if chk != node:
1194 1191 raise RevlogError(_("consistency error adding group"))
1195 1192 textlen = len(text)
1196 1193 else:
1197 if self.version == REVLOGV0:
1198 e = (end, len(cdelta), base, link, p1, p2, node)
1199 else:
1200 e = (offset_type(end, 0), len(cdelta), textlen, base,
1201 link, self.rev(p1), self.rev(p2), node)
1194 e = (offset_type(end, 0), len(cdelta), textlen, base,
1195 link, self.rev(p1), self.rev(p2), node)
1202 1196 self.index.append(e)
1203 1197 self.nodemap[node] = r
1204 1198 if self._inline():
1205 ifh.write(struct.pack(self.indexformat, *e))
1199 ifh.write(struct.pack(indexformatng, *e))
1206 1200 ifh.write(cdelta)
1207 1201 self.checkinlinesize(transaction, ifh)
1208 1202 if not self._inline():
1209 1203 dfh = self.opener(self.datafile, "a")
1210 1204 ifh = self.opener(self.indexfile, "a")
1211 1205 else:
1206 if self.version == REVLOGV0:
1207 e = (end, len(cdelta), base, link, p1, p2, node)
1208 entry = struct.pack(indexformatv0, *e)
1209 else:
1210 entry = struct.pack(indexformatng, *e)
1212 1211 dfh.write(cdelta)
1213 ifh.write(struct.pack(self.indexformat, *e))
1212 ifh.write(entry)
1214 1213
1215 1214 t, r, chain, prev = r, r + 1, node, node
1216 1215 base = self.base(t)
1217 1216 start = self.start(base)
1218 1217 end = self.end(t)
1219 1218
1220 1219 return node
1221 1220
1222 1221 def strip(self, rev, minlink):
1223 1222 if self.count() == 0 or rev >= self.count():
1224 1223 return
1225 1224
1226 1225 if isinstance(self.index, lazyindex):
1227 1226 self._loadindexmap()
1228 1227
1229 1228 # When stripping away a revision, we need to make sure it
1230 1229 # does not actually belong to an older changeset.
1231 1230 # The minlink parameter defines the oldest revision
1232 1231 # we're allowed to strip away.
1233 1232 while minlink > self.index[rev][-4]:
1234 1233 rev += 1
1235 1234 if rev >= self.count():
1236 1235 return
1237 1236
1238 1237 # first truncate the files on disk
1239 1238 end = self.start(rev)
1240 1239 if not self._inline():
1241 1240 df = self.opener(self.datafile, "a")
1242 1241 df.truncate(end)
1243 end = rev * struct.calcsize(self.indexformat)
1242 end = rev * self._io.size
1244 1243 else:
1245 end += rev * struct.calcsize(self.indexformat)
1244 end += rev * self._io.size
1246 1245
1247 1246 indexf = self.opener(self.indexfile, "a")
1248 1247 indexf.truncate(end)
1249 1248
1250 1249 # then reset internal state in memory to forget those revisions
1251 1250 self.cache = None
1252 1251 self._io.chunkcache = None
1253 1252 for x in xrange(rev, self.count()):
1254 1253 del self.nodemap[self.node(x)]
1255 1254
1256 1255 del self.index[rev:]
1257 1256
1258 1257 def checksize(self):
1259 1258 expected = 0
1260 1259 if self.count():
1261 1260 expected = self.end(self.count() - 1)
1262 1261
1263 1262 try:
1264 1263 f = self.opener(self.datafile)
1265 1264 f.seek(0, 2)
1266 1265 actual = f.tell()
1267 1266 dd = actual - expected
1268 1267 except IOError, inst:
1269 1268 if inst.errno != errno.ENOENT:
1270 1269 raise
1271 1270 dd = 0
1272 1271
1273 1272 try:
1274 1273 f = self.opener(self.indexfile)
1275 1274 f.seek(0, 2)
1276 1275 actual = f.tell()
1277 s = struct.calcsize(self.indexformat)
1276 s = self._io.size
1278 1277 i = actual / s
1279 1278 di = actual - (i * s)
1280 1279 if self._inline():
1281 1280 databytes = 0
1282 1281 for r in xrange(self.count()):
1283 1282 databytes += self.length(r)
1284 1283 dd = 0
1285 1284 di = actual - self.count() * s - databytes
1286 1285 except IOError, inst:
1287 1286 if inst.errno != errno.ENOENT:
1288 1287 raise
1289 1288 di = 0
1290 1289
1291 1290 return (dd, di)
1292 1291
1293 1292
General Comments 0
You need to be logged in to leave comments. Login now