##// END OF EJS Templates
revlog: simplify the v1 immediate parser...
Matt Mackall -
r4975:8b7e480a default
parent child Browse files
Show More
@@ -1,1318 +1,1296 b''
1 1 """
2 2 revlog.py - storage back-end for mercurial
3 3
4 4 This provides efficient delta storage with O(1) retrieve and append
5 5 and O(changes) merge between branches
6 6
7 7 Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
8 8
9 9 This software may be used and distributed according to the terms
10 10 of the GNU General Public License, incorporated herein by reference.
11 11 """
12 12
13 13 from node import *
14 14 from i18n import _
15 15 import binascii, changegroup, errno, ancestor, mdiff, os
16 16 import sha, struct, util, zlib
17 17
18 18 # revlog version strings
19 19 REVLOGV0 = 0
20 20 REVLOGNG = 1
21 21
22 22 # revlog flags
23 23 REVLOGNGINLINEDATA = (1 << 16)
24 24 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
25 25
26 26 REVLOG_DEFAULT_FORMAT = REVLOGNG
27 27 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
28 28
29 29 def hash(text, p1, p2):
30 30 """generate a hash from the given text and its parent hashes
31 31
32 32 This hash combines both the current file contents and its history
33 33 in a manner that makes it easy to distinguish nodes with the same
34 34 content in the revision graph.
35 35 """
36 36 l = [p1, p2]
37 37 l.sort()
38 38 s = sha.new(l[0])
39 39 s.update(l[1])
40 40 s.update(text)
41 41 return s.digest()
42 42
43 43 def compress(text):
44 44 """ generate a possibly-compressed representation of text """
45 45 if not text: return ("", text)
46 46 if len(text) < 44:
47 47 if text[0] == '\0': return ("", text)
48 48 return ('u', text)
49 49 bin = zlib.compress(text)
50 50 if len(bin) > len(text):
51 51 if text[0] == '\0': return ("", text)
52 52 return ('u', text)
53 53 return ("", bin)
54 54
55 55 def decompress(bin):
56 56 """ decompress the given input """
57 57 if not bin: return bin
58 58 t = bin[0]
59 59 if t == '\0': return bin
60 60 if t == 'x': return zlib.decompress(bin)
61 61 if t == 'u': return bin[1:]
62 62 raise RevlogError(_("unknown compression type %r") % t)
63 63
64 64 indexformatv0 = ">4l20s20s20s"
65 65 v0shaoffset = 56
66 66 # index ng:
67 67 # 6 bytes offset
68 68 # 2 bytes flags
69 69 # 4 bytes compressed length
70 70 # 4 bytes uncompressed length
71 71 # 4 bytes: base rev
72 72 # 4 bytes link rev
73 73 # 4 bytes parent 1 rev
74 74 # 4 bytes parent 2 rev
75 75 # 32 bytes: nodeid
76 76 indexformatng = ">Qiiiiii20s12x"
77 77 ngshaoffset = 32
78 78 versionformat = ">I"
79 79
80 80 class lazyparser(object):
81 81 """
82 82 this class avoids the need to parse the entirety of large indices
83 83 """
84 84
85 85 # lazyparser is not safe to use on windows if win32 extensions not
86 86 # available. it keeps file handle open, which make it not possible
87 87 # to break hardlinks on local cloned repos.
88 88 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
89 89 hasattr(util, 'win32api'))
90 90
91 91 def __init__(self, dataf, size, indexformat, shaoffset):
92 92 self.dataf = dataf
93 93 self.format = indexformat
94 94 self.s = struct.calcsize(indexformat)
95 95 self.indexformat = indexformat
96 96 self.datasize = size
97 97 self.l = size/self.s
98 98 self.index = [None] * self.l
99 99 self.map = {nullid: nullrev}
100 100 self.allmap = 0
101 101 self.all = 0
102 102 self.mapfind_count = 0
103 103 self.shaoffset = shaoffset
104 104
105 105 def loadmap(self):
106 106 """
107 107 during a commit, we need to make sure the rev being added is
108 108 not a duplicate. This requires loading the entire index,
109 109 which is fairly slow. loadmap can load up just the node map,
110 110 which takes much less time.
111 111 """
112 112 if self.allmap: return
113 113 end = self.datasize
114 114 self.allmap = 1
115 115 cur = 0
116 116 count = 0
117 117 blocksize = self.s * 256
118 118 self.dataf.seek(0)
119 119 while cur < end:
120 120 data = self.dataf.read(blocksize)
121 121 off = 0
122 122 for x in xrange(256):
123 123 n = data[off + self.shaoffset:off + self.shaoffset + 20]
124 124 self.map[n] = count
125 125 count += 1
126 126 if count >= self.l:
127 127 break
128 128 off += self.s
129 129 cur += blocksize
130 130
131 131 def loadblock(self, blockstart, blocksize, data=None):
132 132 if self.all: return
133 133 if data is None:
134 134 self.dataf.seek(blockstart)
135 135 if blockstart + blocksize > self.datasize:
136 136 # the revlog may have grown since we've started running,
137 137 # but we don't have space in self.index for more entries.
138 138 # limit blocksize so that we don't get too much data.
139 139 blocksize = max(self.datasize - blockstart, 0)
140 140 data = self.dataf.read(blocksize)
141 141 lend = len(data) / self.s
142 142 i = blockstart / self.s
143 143 off = 0
144 144 # lazyindex supports __delitem__
145 145 if lend > len(self.index) - i:
146 146 lend = len(self.index) - i
147 147 for x in xrange(lend):
148 148 if self.index[i + x] == None:
149 149 b = data[off : off + self.s]
150 150 self.index[i + x] = b
151 151 n = b[self.shaoffset:self.shaoffset + 20]
152 152 self.map[n] = i + x
153 153 off += self.s
154 154
155 155 def findnode(self, node):
156 156 """search backwards through the index file for a specific node"""
157 157 if self.allmap: return None
158 158
159 159 # hg log will cause many many searches for the manifest
160 160 # nodes. After we get called a few times, just load the whole
161 161 # thing.
162 162 if self.mapfind_count > 8:
163 163 self.loadmap()
164 164 if node in self.map:
165 165 return node
166 166 return None
167 167 self.mapfind_count += 1
168 168 last = self.l - 1
169 169 while self.index[last] != None:
170 170 if last == 0:
171 171 self.all = 1
172 172 self.allmap = 1
173 173 return None
174 174 last -= 1
175 175 end = (last + 1) * self.s
176 176 blocksize = self.s * 256
177 177 while end >= 0:
178 178 start = max(end - blocksize, 0)
179 179 self.dataf.seek(start)
180 180 data = self.dataf.read(end - start)
181 181 findend = end - start
182 182 while True:
183 183 # we're searching backwards, so weh have to make sure
184 184 # we don't find a changeset where this node is a parent
185 185 off = data.rfind(node, 0, findend)
186 186 findend = off
187 187 if off >= 0:
188 188 i = off / self.s
189 189 off = i * self.s
190 190 n = data[off + self.shaoffset:off + self.shaoffset + 20]
191 191 if n == node:
192 192 self.map[n] = i + start / self.s
193 193 return node
194 194 else:
195 195 break
196 196 end -= blocksize
197 197 return None
198 198
199 199 def loadindex(self, i=None, end=None):
200 200 if self.all: return
201 201 all = False
202 202 if i == None:
203 203 blockstart = 0
204 204 blocksize = (512 / self.s) * self.s
205 205 end = self.datasize
206 206 all = True
207 207 else:
208 208 if end:
209 209 blockstart = i * self.s
210 210 end = end * self.s
211 211 blocksize = end - blockstart
212 212 else:
213 213 blockstart = (i & ~63) * self.s
214 214 blocksize = self.s * 64
215 215 end = blockstart + blocksize
216 216 while blockstart < end:
217 217 self.loadblock(blockstart, blocksize)
218 218 blockstart += blocksize
219 219 if all: self.all = True
220 220
221 221 class lazyindex(object):
222 222 """a lazy version of the index array"""
223 223 def __init__(self, parser):
224 224 self.p = parser
225 225 def __len__(self):
226 226 return len(self.p.index)
227 227 def load(self, pos):
228 228 if pos < 0:
229 229 pos += len(self.p.index)
230 230 self.p.loadindex(pos)
231 231 return self.p.index[pos]
232 232 def __getitem__(self, pos):
233 233 ret = self.p.index[pos] or self.load(pos)
234 234 if isinstance(ret, str):
235 235 ret = struct.unpack(self.p.indexformat, ret)
236 236 return ret
237 237 def __setitem__(self, pos, item):
238 238 self.p.index[pos] = item
239 239 def __delitem__(self, pos):
240 240 del self.p.index[pos]
241 241 def append(self, e):
242 242 self.p.index.append(e)
243 243
244 244 class lazymap(object):
245 245 """a lazy version of the node map"""
246 246 def __init__(self, parser):
247 247 self.p = parser
248 248 def load(self, key):
249 249 n = self.p.findnode(key)
250 250 if n == None:
251 251 raise KeyError(key)
252 252 def __contains__(self, key):
253 253 if key in self.p.map:
254 254 return True
255 255 self.p.loadmap()
256 256 return key in self.p.map
257 257 def __iter__(self):
258 258 yield nullid
259 259 for i in xrange(self.p.l):
260 260 ret = self.p.index[i]
261 261 if not ret:
262 262 self.p.loadindex(i)
263 263 ret = self.p.index[i]
264 264 if isinstance(ret, str):
265 265 ret = struct.unpack(self.p.indexformat, ret)
266 266 yield ret[-1]
267 267 def __getitem__(self, key):
268 268 try:
269 269 return self.p.map[key]
270 270 except KeyError:
271 271 try:
272 272 self.load(key)
273 273 return self.p.map[key]
274 274 except KeyError:
275 275 raise KeyError("node " + hex(key))
276 276 def __setitem__(self, key, val):
277 277 self.p.map[key] = val
278 278 def __delitem__(self, key):
279 279 del self.p.map[key]
280 280
281 281 class RevlogError(Exception): pass
282 282 class LookupError(RevlogError): pass
283 283
284 284 def getoffset(q):
285 285 if q & 0xFFFF:
286 286 raise RevlogError(_('incompatible revision flag %x') % q)
287 287 return int(q >> 16)
288 288
289 289 def gettype(q):
290 290 return int(q & 0xFFFF)
291 291
292 292 def offset_type(offset, type):
293 293 return long(long(offset) << 16 | type)
294 294
295 295 class revlogoldio(object):
296 296 def __init__(self):
297 297 self.chunkcache = None
298 298
299 299 def parseindex(self, fp, st, inline):
300 300 s = struct.calcsize(indexformatv0)
301 301 index = []
302 302 nodemap = {nullid: nullrev}
303 303 n = off = 0
304 304 data = fp.read()
305 305 l = len(data)
306 306 while off + s <= l:
307 307 cur = data[off:off + s]
308 308 off += s
309 309 e = struct.unpack(indexformatv0, cur)
310 310 index.append(e)
311 311 nodemap[e[-1]] = n
312 312 n += 1
313 313
314 314 return index, nodemap
315 315
316 316 class revlogio(object):
317 317 def __init__(self):
318 318 self.chunkcache = None
319 319
320 320 def parseindex(self, fp, st, inline):
321 321 if (lazyparser.safe_to_use and not inline and
322 322 st and st.st_size > 1000000):
323 323 # big index, let's parse it on demand
324 324 parser = lazyparser(fp, st.st_size, indexformatng, ngshaoffset)
325 325 index = lazyindex(parser)
326 326 nodemap = lazymap(parser)
327 327 e = list(index[0])
328 328 type = gettype(e[0])
329 329 e[0] = offset_type(0, type)
330 330 index[0] = e
331 331 return index, nodemap
332 332
333 333 s = struct.calcsize(indexformatng)
334 334 index = []
335 335 nodemap = {nullid: nullrev}
336 n = 0
337 leftover = None
338 while True:
339 if st:
340 data = fp.read(65536)
341 else:
342 # hack for httprangereader, it doesn't do partial reads well
343 data = fp.read()
344 if not data:
345 break
346 if n == 0 and inline:
347 # cache the first chunk
348 self.chunkcache = (0, data)
349 if leftover:
350 data = leftover + data
351 leftover = None
352 off = 0
353 l = len(data)
354 while off < l:
355 if l - off < s:
356 leftover = data[off:]
336 n = off = 0
337 # if we're not using lazymap, always read the whole index
338 data = fp.read()
339 l = len(data)
340 if inline:
341 self.chunkcache = (0, data)
342 while off + s <= l:
343 e = struct.unpack(indexformatng, data[off:off + s])
344 index.append(e)
345 nodemap[e[-1]] = n
346 n += 1
347 off += s
348 if inline:
349 if e[1] < 0:
357 350 break
358 cur = data[off:off + s]
359 off += s
360 e = struct.unpack(indexformatng, cur)
361 index.append(e)
362 nodemap[e[-1]] = n
363 n += 1
364 if inline:
365 if e[1] < 0:
366 break
367 off += e[1]
368 if off > l:
369 # some things don't seek well, just read it
370 fp.read(off - l)
371 break
372 if not st:
373 break
351 off += e[1]
374 352
375 353 e = list(index[0])
376 354 type = gettype(e[0])
377 355 e[0] = offset_type(0, type)
378 356 index[0] = e
379 357
380 358 return index, nodemap
381 359
382 360 class revlog(object):
383 361 """
384 362 the underlying revision storage object
385 363
386 364 A revlog consists of two parts, an index and the revision data.
387 365
388 366 The index is a file with a fixed record size containing
389 367 information on each revision, includings its nodeid (hash), the
390 368 nodeids of its parents, the position and offset of its data within
391 369 the data file, and the revision it's based on. Finally, each entry
392 370 contains a linkrev entry that can serve as a pointer to external
393 371 data.
394 372
395 373 The revision data itself is a linear collection of data chunks.
396 374 Each chunk represents a revision and is usually represented as a
397 375 delta against the previous chunk. To bound lookup time, runs of
398 376 deltas are limited to about 2 times the length of the original
399 377 version data. This makes retrieval of a version proportional to
400 378 its size, or O(1) relative to the number of revisions.
401 379
402 380 Both pieces of the revlog are written to in an append-only
403 381 fashion, which means we never need to rewrite a file to insert or
404 382 remove data, and can use some simple techniques to avoid the need
405 383 for locking while reading.
406 384 """
407 385 def __init__(self, opener, indexfile):
408 386 """
409 387 create a revlog object
410 388
411 389 opener is a function that abstracts the file opening operation
412 390 and can be used to implement COW semantics or the like.
413 391 """
414 392 self.indexfile = indexfile
415 393 self.datafile = indexfile[:-2] + ".d"
416 394 self.opener = opener
417 395
418 396 self.indexstat = None
419 397 self.cache = None
420 398 self.defversion = REVLOG_DEFAULT_VERSION
421 399 if hasattr(opener, "defversion"):
422 400 self.defversion = opener.defversion
423 401 if self.defversion & REVLOGNG:
424 402 self.defversion |= REVLOGNGINLINEDATA
425 403 self._load()
426 404
427 405 def _load(self):
428 406 v = self.defversion
429 407 try:
430 408 f = self.opener(self.indexfile)
431 409 i = f.read(4)
432 410 f.seek(0)
433 411 if len(i) > 0:
434 412 v = struct.unpack(versionformat, i)[0]
435 413 except IOError, inst:
436 414 if inst.errno != errno.ENOENT:
437 415 raise
438 416 i = ""
439 417 else:
440 418 try:
441 419 st = util.fstat(f)
442 420 except AttributeError, inst:
443 421 st = None
444 422 else:
445 423 oldst = self.indexstat
446 424 if (oldst and st.st_dev == oldst.st_dev
447 425 and st.st_ino == oldst.st_ino
448 426 and st.st_mtime == oldst.st_mtime
449 427 and st.st_ctime == oldst.st_ctime
450 428 and st.st_size == oldst.st_size):
451 429 return
452 430 self.indexstat = st
453 431 flags = v & ~0xFFFF
454 432 fmt = v & 0xFFFF
455 433 if fmt == REVLOGV0:
456 434 if flags:
457 435 raise RevlogError(_("index %s unknown flags %#04x for format v0")
458 436 % (self.indexfile, flags >> 16))
459 437 elif fmt == REVLOGNG:
460 438 if flags & ~REVLOGNGINLINEDATA:
461 439 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
462 440 % (self.indexfile, flags >> 16))
463 441 else:
464 442 raise RevlogError(_("index %s unknown format %d")
465 443 % (self.indexfile, fmt))
466 444 self.version = v
467 445 self.nodemap = {nullid: nullrev}
468 446 self.index = []
469 447 self._io = revlogio()
470 448 self.indexformat = indexformatng
471 449 if self.version == REVLOGV0:
472 450 self._io = revlogoldio()
473 451 self.indexformat = indexformatv0
474 452 if i:
475 453 self.index, self.nodemap = self._io.parseindex(f, st, self._inline())
476 454
477 455 def _loadindex(self, start, end):
478 456 """load a block of indexes all at once from the lazy parser"""
479 457 if isinstance(self.index, lazyindex):
480 458 self.index.p.loadindex(start, end)
481 459
482 460 def _loadindexmap(self):
483 461 """loads both the map and the index from the lazy parser"""
484 462 if isinstance(self.index, lazyindex):
485 463 p = self.index.p
486 464 p.loadindex()
487 465 self.nodemap = p.map
488 466
489 467 def _loadmap(self):
490 468 """loads the map from the lazy parser"""
491 469 if isinstance(self.nodemap, lazymap):
492 470 self.nodemap.p.loadmap()
493 471 self.nodemap = self.nodemap.p.map
494 472
495 473 def _inline(self): return self.version & REVLOGNGINLINEDATA
496 474
497 475 def tip(self): return self.node(len(self.index) - 1)
498 476 def count(self): return len(self.index)
499 477 def node(self, rev):
500 478 return rev == nullrev and nullid or self.index[rev][-1]
501 479 def rev(self, node):
502 480 try:
503 481 return self.nodemap[node]
504 482 except KeyError:
505 483 raise LookupError(_('%s: no node %s') % (self.indexfile, hex(node)))
506 484 def linkrev(self, node):
507 485 return (node == nullid) and nullrev or self.index[self.rev(node)][-4]
508 486 def parents(self, node):
509 487 if node == nullid: return (nullid, nullid)
510 488 r = self.rev(node)
511 489 d = self.index[r][-3:-1]
512 490 if self.version == REVLOGV0:
513 491 return d
514 492 return (self.node(d[0]), self.node(d[1]))
515 493 def parentrevs(self, rev):
516 494 if rev == nullrev:
517 495 return (nullrev, nullrev)
518 496 d = self.index[rev][-3:-1]
519 497 if self.version == REVLOGV0:
520 498 return (self.rev(d[0]), self.rev(d[1]))
521 499 return d
522 500 def start(self, rev):
523 501 if rev == nullrev:
524 502 return 0
525 503 if self.version != REVLOGV0:
526 504 return getoffset(self.index[rev][0])
527 505 return self.index[rev][0]
528 506
529 507 def end(self, rev): return self.start(rev) + self.length(rev)
530 508
531 509 def size(self, rev):
532 510 """return the length of the uncompressed text for a given revision"""
533 511 if rev == nullrev:
534 512 return 0
535 513 l = -1
536 514 if self.version != REVLOGV0:
537 515 l = self.index[rev][2]
538 516 if l >= 0:
539 517 return l
540 518
541 519 t = self.revision(self.node(rev))
542 520 return len(t)
543 521
544 522 # alternate implementation, The advantage to this code is it
545 523 # will be faster for a single revision. But, the results are not
546 524 # cached, so finding the size of every revision will be slower.
547 525 """
548 526 if self.cache and self.cache[1] == rev:
549 527 return len(self.cache[2])
550 528
551 529 base = self.base(rev)
552 530 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
553 531 base = self.cache[1]
554 532 text = self.cache[2]
555 533 else:
556 534 text = self.revision(self.node(base))
557 535
558 536 l = len(text)
559 537 for x in xrange(base + 1, rev + 1):
560 538 l = mdiff.patchedsize(l, self.chunk(x))
561 539 return l
562 540 """
563 541
564 542 def length(self, rev):
565 543 if rev == nullrev:
566 544 return 0
567 545 else:
568 546 return self.index[rev][1]
569 547 def base(self, rev):
570 548 if (rev == nullrev):
571 549 return nullrev
572 550 else:
573 551 return self.index[rev][-5]
574 552
575 553 def reachable(self, node, stop=None):
576 554 """return a hash of all nodes ancestral to a given node, including
577 555 the node itself, stopping when stop is matched"""
578 556 reachable = {}
579 557 visit = [node]
580 558 reachable[node] = 1
581 559 if stop:
582 560 stopn = self.rev(stop)
583 561 else:
584 562 stopn = 0
585 563 while visit:
586 564 n = visit.pop(0)
587 565 if n == stop:
588 566 continue
589 567 if n == nullid:
590 568 continue
591 569 for p in self.parents(n):
592 570 if self.rev(p) < stopn:
593 571 continue
594 572 if p not in reachable:
595 573 reachable[p] = 1
596 574 visit.append(p)
597 575 return reachable
598 576
599 577 def nodesbetween(self, roots=None, heads=None):
600 578 """Return a tuple containing three elements. Elements 1 and 2 contain
601 579 a final list bases and heads after all the unreachable ones have been
602 580 pruned. Element 0 contains a topologically sorted list of all
603 581
604 582 nodes that satisfy these constraints:
605 583 1. All nodes must be descended from a node in roots (the nodes on
606 584 roots are considered descended from themselves).
607 585 2. All nodes must also be ancestors of a node in heads (the nodes in
608 586 heads are considered to be their own ancestors).
609 587
610 588 If roots is unspecified, nullid is assumed as the only root.
611 589 If heads is unspecified, it is taken to be the output of the
612 590 heads method (i.e. a list of all nodes in the repository that
613 591 have no children)."""
614 592 nonodes = ([], [], [])
615 593 if roots is not None:
616 594 roots = list(roots)
617 595 if not roots:
618 596 return nonodes
619 597 lowestrev = min([self.rev(n) for n in roots])
620 598 else:
621 599 roots = [nullid] # Everybody's a descendent of nullid
622 600 lowestrev = nullrev
623 601 if (lowestrev == nullrev) and (heads is None):
624 602 # We want _all_ the nodes!
625 603 return ([self.node(r) for r in xrange(0, self.count())],
626 604 [nullid], list(self.heads()))
627 605 if heads is None:
628 606 # All nodes are ancestors, so the latest ancestor is the last
629 607 # node.
630 608 highestrev = self.count() - 1
631 609 # Set ancestors to None to signal that every node is an ancestor.
632 610 ancestors = None
633 611 # Set heads to an empty dictionary for later discovery of heads
634 612 heads = {}
635 613 else:
636 614 heads = list(heads)
637 615 if not heads:
638 616 return nonodes
639 617 ancestors = {}
640 618 # Turn heads into a dictionary so we can remove 'fake' heads.
641 619 # Also, later we will be using it to filter out the heads we can't
642 620 # find from roots.
643 621 heads = dict.fromkeys(heads, 0)
644 622 # Start at the top and keep marking parents until we're done.
645 623 nodestotag = heads.keys()
646 624 # Remember where the top was so we can use it as a limit later.
647 625 highestrev = max([self.rev(n) for n in nodestotag])
648 626 while nodestotag:
649 627 # grab a node to tag
650 628 n = nodestotag.pop()
651 629 # Never tag nullid
652 630 if n == nullid:
653 631 continue
654 632 # A node's revision number represents its place in a
655 633 # topologically sorted list of nodes.
656 634 r = self.rev(n)
657 635 if r >= lowestrev:
658 636 if n not in ancestors:
659 637 # If we are possibly a descendent of one of the roots
660 638 # and we haven't already been marked as an ancestor
661 639 ancestors[n] = 1 # Mark as ancestor
662 640 # Add non-nullid parents to list of nodes to tag.
663 641 nodestotag.extend([p for p in self.parents(n) if
664 642 p != nullid])
665 643 elif n in heads: # We've seen it before, is it a fake head?
666 644 # So it is, real heads should not be the ancestors of
667 645 # any other heads.
668 646 heads.pop(n)
669 647 if not ancestors:
670 648 return nonodes
671 649 # Now that we have our set of ancestors, we want to remove any
672 650 # roots that are not ancestors.
673 651
674 652 # If one of the roots was nullid, everything is included anyway.
675 653 if lowestrev > nullrev:
676 654 # But, since we weren't, let's recompute the lowest rev to not
677 655 # include roots that aren't ancestors.
678 656
679 657 # Filter out roots that aren't ancestors of heads
680 658 roots = [n for n in roots if n in ancestors]
681 659 # Recompute the lowest revision
682 660 if roots:
683 661 lowestrev = min([self.rev(n) for n in roots])
684 662 else:
685 663 # No more roots? Return empty list
686 664 return nonodes
687 665 else:
688 666 # We are descending from nullid, and don't need to care about
689 667 # any other roots.
690 668 lowestrev = nullrev
691 669 roots = [nullid]
692 670 # Transform our roots list into a 'set' (i.e. a dictionary where the
693 671 # values don't matter.
694 672 descendents = dict.fromkeys(roots, 1)
695 673 # Also, keep the original roots so we can filter out roots that aren't
696 674 # 'real' roots (i.e. are descended from other roots).
697 675 roots = descendents.copy()
698 676 # Our topologically sorted list of output nodes.
699 677 orderedout = []
700 678 # Don't start at nullid since we don't want nullid in our output list,
701 679 # and if nullid shows up in descedents, empty parents will look like
702 680 # they're descendents.
703 681 for r in xrange(max(lowestrev, 0), highestrev + 1):
704 682 n = self.node(r)
705 683 isdescendent = False
706 684 if lowestrev == nullrev: # Everybody is a descendent of nullid
707 685 isdescendent = True
708 686 elif n in descendents:
709 687 # n is already a descendent
710 688 isdescendent = True
711 689 # This check only needs to be done here because all the roots
712 690 # will start being marked is descendents before the loop.
713 691 if n in roots:
714 692 # If n was a root, check if it's a 'real' root.
715 693 p = tuple(self.parents(n))
716 694 # If any of its parents are descendents, it's not a root.
717 695 if (p[0] in descendents) or (p[1] in descendents):
718 696 roots.pop(n)
719 697 else:
720 698 p = tuple(self.parents(n))
721 699 # A node is a descendent if either of its parents are
722 700 # descendents. (We seeded the dependents list with the roots
723 701 # up there, remember?)
724 702 if (p[0] in descendents) or (p[1] in descendents):
725 703 descendents[n] = 1
726 704 isdescendent = True
727 705 if isdescendent and ((ancestors is None) or (n in ancestors)):
728 706 # Only include nodes that are both descendents and ancestors.
729 707 orderedout.append(n)
730 708 if (ancestors is not None) and (n in heads):
731 709 # We're trying to figure out which heads are reachable
732 710 # from roots.
733 711 # Mark this head as having been reached
734 712 heads[n] = 1
735 713 elif ancestors is None:
736 714 # Otherwise, we're trying to discover the heads.
737 715 # Assume this is a head because if it isn't, the next step
738 716 # will eventually remove it.
739 717 heads[n] = 1
740 718 # But, obviously its parents aren't.
741 719 for p in self.parents(n):
742 720 heads.pop(p, None)
743 721 heads = [n for n in heads.iterkeys() if heads[n] != 0]
744 722 roots = roots.keys()
745 723 assert orderedout
746 724 assert roots
747 725 assert heads
748 726 return (orderedout, roots, heads)
749 727
750 728 def heads(self, start=None, stop=None):
751 729 """return the list of all nodes that have no children
752 730
753 731 if start is specified, only heads that are descendants of
754 732 start will be returned
755 733 if stop is specified, it will consider all the revs from stop
756 734 as if they had no children
757 735 """
758 736 if start is None:
759 737 start = nullid
760 738 if stop is None:
761 739 stop = []
762 740 stoprevs = dict.fromkeys([self.rev(n) for n in stop])
763 741 startrev = self.rev(start)
764 742 reachable = {startrev: 1}
765 743 heads = {startrev: 1}
766 744
767 745 parentrevs = self.parentrevs
768 746 for r in xrange(startrev + 1, self.count()):
769 747 for p in parentrevs(r):
770 748 if p in reachable:
771 749 if r not in stoprevs:
772 750 reachable[r] = 1
773 751 heads[r] = 1
774 752 if p in heads and p not in stoprevs:
775 753 del heads[p]
776 754
777 755 return [self.node(r) for r in heads]
778 756
779 757 def children(self, node):
780 758 """find the children of a given node"""
781 759 c = []
782 760 p = self.rev(node)
783 761 for r in range(p + 1, self.count()):
784 762 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
785 763 if prevs:
786 764 for pr in prevs:
787 765 if pr == p:
788 766 c.append(self.node(r))
789 767 elif p == nullrev:
790 768 c.append(self.node(r))
791 769 return c
792 770
793 771 def _match(self, id):
794 772 if isinstance(id, (long, int)):
795 773 # rev
796 774 return self.node(id)
797 775 if len(id) == 20:
798 776 # possibly a binary node
799 777 # odds of a binary node being all hex in ASCII are 1 in 10**25
800 778 try:
801 779 node = id
802 780 r = self.rev(node) # quick search the index
803 781 return node
804 782 except LookupError:
805 783 pass # may be partial hex id
806 784 try:
807 785 # str(rev)
808 786 rev = int(id)
809 787 if str(rev) != id: raise ValueError
810 788 if rev < 0: rev = self.count() + rev
811 789 if rev < 0 or rev >= self.count(): raise ValueError
812 790 return self.node(rev)
813 791 except (ValueError, OverflowError):
814 792 pass
815 793 if len(id) == 40:
816 794 try:
817 795 # a full hex nodeid?
818 796 node = bin(id)
819 797 r = self.rev(node)
820 798 return node
821 799 except TypeError:
822 800 pass
823 801
824 802 def _partialmatch(self, id):
825 803 if len(id) < 40:
826 804 try:
827 805 # hex(node)[:...]
828 806 bin_id = bin(id[:len(id) & ~1]) # grab an even number of digits
829 807 node = None
830 808 for n in self.nodemap:
831 809 if n.startswith(bin_id) and hex(n).startswith(id):
832 810 if node is not None:
833 811 raise LookupError(_("Ambiguous identifier"))
834 812 node = n
835 813 if node is not None:
836 814 return node
837 815 except TypeError:
838 816 pass
839 817
840 818 def lookup(self, id):
841 819 """locate a node based on:
842 820 - revision number or str(revision number)
843 821 - nodeid or subset of hex nodeid
844 822 """
845 823
846 824 n = self._match(id)
847 825 if n is not None:
848 826 return n
849 827 n = self._partialmatch(id)
850 828 if n:
851 829 return n
852 830
853 831 raise LookupError(_("No match found"))
854 832
855 833 def cmp(self, node, text):
856 834 """compare text with a given file revision"""
857 835 p1, p2 = self.parents(node)
858 836 return hash(text, p1, p2) != node
859 837
860 838 def diff(self, a, b):
861 839 """return a delta between two revisions"""
862 840 return mdiff.textdiff(a, b)
863 841
864 842 def patches(self, t, pl):
865 843 """apply a list of patches to a string"""
866 844 return mdiff.patches(t, pl)
867 845
868 846 def chunk(self, rev, df=None, cachelen=4096):
869 847 start, length = self.start(rev), self.length(rev)
870 848 inline = self._inline()
871 849 if inline:
872 850 start += (rev + 1) * struct.calcsize(self.indexformat)
873 851 end = start + length
874 852 def loadcache(df):
875 853 cache_length = max(cachelen, length) # 4k
876 854 if not df:
877 855 if inline:
878 856 df = self.opener(self.indexfile)
879 857 else:
880 858 df = self.opener(self.datafile)
881 859 df.seek(start)
882 860 self._io.chunkcache = (start, df.read(cache_length))
883 861
884 862 if not self._io.chunkcache:
885 863 loadcache(df)
886 864
887 865 cache_start = self._io.chunkcache[0]
888 866 cache_end = cache_start + len(self._io.chunkcache[1])
889 867 if start >= cache_start and end <= cache_end:
890 868 # it is cached
891 869 offset = start - cache_start
892 870 else:
893 871 loadcache(df)
894 872 offset = 0
895 873
896 874 #def checkchunk():
897 875 # df = self.opener(self.datafile)
898 876 # df.seek(start)
899 877 # return df.read(length)
900 878 #assert s == checkchunk()
901 879 return decompress(self._io.chunkcache[1][offset:offset + length])
902 880
903 881 def delta(self, node):
904 882 """return or calculate a delta between a node and its predecessor"""
905 883 r = self.rev(node)
906 884 return self.revdiff(r - 1, r)
907 885
908 886 def revdiff(self, rev1, rev2):
909 887 """return or calculate a delta between two revisions"""
910 888 b1 = self.base(rev1)
911 889 b2 = self.base(rev2)
912 890 if b1 == b2 and rev1 + 1 == rev2:
913 891 return self.chunk(rev2)
914 892 else:
915 893 return self.diff(self.revision(self.node(rev1)),
916 894 self.revision(self.node(rev2)))
917 895
918 896 def revision(self, node):
919 897 """return an uncompressed revision of a given"""
920 898 if node == nullid: return ""
921 899 if self.cache and self.cache[0] == node: return self.cache[2]
922 900
923 901 # look up what we need to read
924 902 text = None
925 903 rev = self.rev(node)
926 904 base = self.base(rev)
927 905
928 906 if self._inline():
929 907 # we probably have the whole chunk cached
930 908 df = None
931 909 else:
932 910 df = self.opener(self.datafile)
933 911
934 912 # do we have useful data cached?
935 913 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
936 914 base = self.cache[1]
937 915 text = self.cache[2]
938 916 self._loadindex(base, rev + 1)
939 917 else:
940 918 self._loadindex(base, rev + 1)
941 919 text = self.chunk(base, df=df)
942 920
943 921 bins = []
944 922 for r in xrange(base + 1, rev + 1):
945 923 bins.append(self.chunk(r, df=df))
946 924
947 925 text = self.patches(text, bins)
948 926
949 927 p1, p2 = self.parents(node)
950 928 if node != hash(text, p1, p2):
951 929 raise RevlogError(_("integrity check failed on %s:%d")
952 930 % (self.datafile, rev))
953 931
954 932 self.cache = (node, rev, text)
955 933 return text
956 934
957 935 def checkinlinesize(self, tr, fp=None):
958 936 if not self._inline():
959 937 return
960 938 if not fp:
961 939 fp = self.opener(self.indexfile, 'r')
962 940 fp.seek(0, 2)
963 941 size = fp.tell()
964 942 if size < 131072:
965 943 return
966 944 trinfo = tr.find(self.indexfile)
967 945 if trinfo == None:
968 946 raise RevlogError(_("%s not found in the transaction")
969 947 % self.indexfile)
970 948
971 949 trindex = trinfo[2]
972 950 dataoff = self.start(trindex)
973 951
974 952 tr.add(self.datafile, dataoff)
975 953 df = self.opener(self.datafile, 'w')
976 954 calc = struct.calcsize(self.indexformat)
977 955 for r in xrange(self.count()):
978 956 start = self.start(r) + (r + 1) * calc
979 957 length = self.length(r)
980 958 fp.seek(start)
981 959 d = fp.read(length)
982 960 df.write(d)
983 961 fp.close()
984 962 df.close()
985 963 fp = self.opener(self.indexfile, 'w', atomictemp=True)
986 964 self.version &= ~(REVLOGNGINLINEDATA)
987 965 if self.count():
988 966 x = self.index[0]
989 967 e = struct.pack(self.indexformat, *x)[4:]
990 968 l = struct.pack(versionformat, self.version)
991 969 fp.write(l)
992 970 fp.write(e)
993 971
994 972 for i in xrange(1, self.count()):
995 973 x = self.index[i]
996 974 e = struct.pack(self.indexformat, *x)
997 975 fp.write(e)
998 976
999 977 # if we don't call rename, the temp file will never replace the
1000 978 # real index
1001 979 fp.rename()
1002 980
1003 981 tr.replace(self.indexfile, trindex * calc)
1004 982 self._io.chunkcache = None
1005 983
1006 984 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
1007 985 """add a revision to the log
1008 986
1009 987 text - the revision data to add
1010 988 transaction - the transaction object used for rollback
1011 989 link - the linkrev data to add
1012 990 p1, p2 - the parent nodeids of the revision
1013 991 d - an optional precomputed delta
1014 992 """
1015 993 if not self._inline():
1016 994 dfh = self.opener(self.datafile, "a")
1017 995 else:
1018 996 dfh = None
1019 997 ifh = self.opener(self.indexfile, "a+")
1020 998 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1021 999
1022 1000 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1023 1001 if text is None: text = ""
1024 1002 if p1 is None: p1 = self.tip()
1025 1003 if p2 is None: p2 = nullid
1026 1004
1027 1005 node = hash(text, p1, p2)
1028 1006
1029 1007 if node in self.nodemap:
1030 1008 return node
1031 1009
1032 1010 n = self.count()
1033 1011 t = n - 1
1034 1012
1035 1013 if n:
1036 1014 base = self.base(t)
1037 1015 start = self.start(base)
1038 1016 end = self.end(t)
1039 1017 if not d:
1040 1018 prev = self.revision(self.tip())
1041 1019 d = self.diff(prev, text)
1042 1020 data = compress(d)
1043 1021 l = len(data[1]) + len(data[0])
1044 1022 dist = end - start + l
1045 1023
1046 1024 # full versions are inserted when the needed deltas
1047 1025 # become comparable to the uncompressed text
1048 1026 if not n or dist > len(text) * 2:
1049 1027 data = compress(text)
1050 1028 l = len(data[1]) + len(data[0])
1051 1029 base = n
1052 1030 else:
1053 1031 base = self.base(t)
1054 1032
1055 1033 offset = 0
1056 1034 if t >= 0:
1057 1035 offset = self.end(t)
1058 1036
1059 1037 if self.version == REVLOGV0:
1060 1038 e = (offset, l, base, link, p1, p2, node)
1061 1039 else:
1062 1040 e = (offset_type(offset, 0), l, len(text),
1063 1041 base, link, self.rev(p1), self.rev(p2), node)
1064 1042
1065 1043 self.index.append(e)
1066 1044 self.nodemap[node] = n
1067 1045 entry = struct.pack(self.indexformat, *e)
1068 1046
1069 1047 if not self._inline():
1070 1048 transaction.add(self.datafile, offset)
1071 1049 transaction.add(self.indexfile, n * len(entry))
1072 1050 if data[0]:
1073 1051 dfh.write(data[0])
1074 1052 dfh.write(data[1])
1075 1053 dfh.flush()
1076 1054 else:
1077 1055 ifh.seek(0, 2)
1078 1056 transaction.add(self.indexfile, ifh.tell(), self.count() - 1)
1079 1057
1080 1058 if len(self.index) == 1 and self.version != REVLOGV0:
1081 1059 l = struct.pack(versionformat, self.version)
1082 1060 ifh.write(l)
1083 1061 entry = entry[4:]
1084 1062
1085 1063 ifh.write(entry)
1086 1064
1087 1065 if self._inline():
1088 1066 ifh.write(data[0])
1089 1067 ifh.write(data[1])
1090 1068 self.checkinlinesize(transaction, ifh)
1091 1069
1092 1070 self.cache = (node, n, text)
1093 1071 return node
1094 1072
1095 1073 def ancestor(self, a, b):
1096 1074 """calculate the least common ancestor of nodes a and b"""
1097 1075
1098 1076 def parents(rev):
1099 1077 return [p for p in self.parentrevs(rev) if p != nullrev]
1100 1078
1101 1079 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1102 1080 if c is None:
1103 1081 return nullid
1104 1082
1105 1083 return self.node(c)
1106 1084
1107 1085 def group(self, nodelist, lookup, infocollect=None):
1108 1086 """calculate a delta group
1109 1087
1110 1088 Given a list of changeset revs, return a set of deltas and
1111 1089 metadata corresponding to nodes. the first delta is
1112 1090 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1113 1091 have this parent as it has all history before these
1114 1092 changesets. parent is parent[0]
1115 1093 """
1116 1094 revs = [self.rev(n) for n in nodelist]
1117 1095
1118 1096 # if we don't have any revisions touched by these changesets, bail
1119 1097 if not revs:
1120 1098 yield changegroup.closechunk()
1121 1099 return
1122 1100
1123 1101 # add the parent of the first rev
1124 1102 p = self.parents(self.node(revs[0]))[0]
1125 1103 revs.insert(0, self.rev(p))
1126 1104
1127 1105 # build deltas
1128 1106 for d in xrange(0, len(revs) - 1):
1129 1107 a, b = revs[d], revs[d + 1]
1130 1108 nb = self.node(b)
1131 1109
1132 1110 if infocollect is not None:
1133 1111 infocollect(nb)
1134 1112
1135 1113 d = self.revdiff(a, b)
1136 1114 p = self.parents(nb)
1137 1115 meta = nb + p[0] + p[1] + lookup(nb)
1138 1116 yield changegroup.genchunk("%s%s" % (meta, d))
1139 1117
1140 1118 yield changegroup.closechunk()
1141 1119
1142 1120 def addgroup(self, revs, linkmapper, transaction, unique=0):
1143 1121 """
1144 1122 add a delta group
1145 1123
1146 1124 given a set of deltas, add them to the revision log. the
1147 1125 first delta is against its parent, which should be in our
1148 1126 log, the rest are against the previous delta.
1149 1127 """
1150 1128
1151 1129 #track the base of the current delta log
1152 1130 r = self.count()
1153 1131 t = r - 1
1154 1132 node = None
1155 1133
1156 1134 base = prev = nullrev
1157 1135 start = end = textlen = 0
1158 1136 if r:
1159 1137 end = self.end(t)
1160 1138
1161 1139 ifh = self.opener(self.indexfile, "a+")
1162 1140 ifh.seek(0, 2)
1163 1141 transaction.add(self.indexfile, ifh.tell(), self.count())
1164 1142 if self._inline():
1165 1143 dfh = None
1166 1144 else:
1167 1145 transaction.add(self.datafile, end)
1168 1146 dfh = self.opener(self.datafile, "a")
1169 1147
1170 1148 # loop through our set of deltas
1171 1149 chain = None
1172 1150 for chunk in revs:
1173 1151 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1174 1152 link = linkmapper(cs)
1175 1153 if node in self.nodemap:
1176 1154 # this can happen if two branches make the same change
1177 1155 # if unique:
1178 1156 # raise RevlogError(_("already have %s") % hex(node[:4]))
1179 1157 chain = node
1180 1158 continue
1181 1159 delta = chunk[80:]
1182 1160
1183 1161 for p in (p1, p2):
1184 1162 if not p in self.nodemap:
1185 1163 raise LookupError(_("unknown parent %s") % short(p))
1186 1164
1187 1165 if not chain:
1188 1166 # retrieve the parent revision of the delta chain
1189 1167 chain = p1
1190 1168 if not chain in self.nodemap:
1191 1169 raise LookupError(_("unknown base %s") % short(chain[:4]))
1192 1170
1193 1171 # full versions are inserted when the needed deltas become
1194 1172 # comparable to the uncompressed text or when the previous
1195 1173 # version is not the one we have a delta against. We use
1196 1174 # the size of the previous full rev as a proxy for the
1197 1175 # current size.
1198 1176
1199 1177 if chain == prev:
1200 1178 tempd = compress(delta)
1201 1179 cdelta = tempd[0] + tempd[1]
1202 1180 textlen = mdiff.patchedsize(textlen, delta)
1203 1181
1204 1182 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
1205 1183 # flush our writes here so we can read it in revision
1206 1184 if dfh:
1207 1185 dfh.flush()
1208 1186 ifh.flush()
1209 1187 text = self.revision(chain)
1210 1188 text = self.patches(text, [delta])
1211 1189 chk = self._addrevision(text, transaction, link, p1, p2, None,
1212 1190 ifh, dfh)
1213 1191 if not dfh and not self._inline():
1214 1192 # addrevision switched from inline to conventional
1215 1193 # reopen the index
1216 1194 dfh = self.opener(self.datafile, "a")
1217 1195 ifh = self.opener(self.indexfile, "a")
1218 1196 if chk != node:
1219 1197 raise RevlogError(_("consistency error adding group"))
1220 1198 textlen = len(text)
1221 1199 else:
1222 1200 if self.version == REVLOGV0:
1223 1201 e = (end, len(cdelta), base, link, p1, p2, node)
1224 1202 else:
1225 1203 e = (offset_type(end, 0), len(cdelta), textlen, base,
1226 1204 link, self.rev(p1), self.rev(p2), node)
1227 1205 self.index.append(e)
1228 1206 self.nodemap[node] = r
1229 1207 if self._inline():
1230 1208 ifh.write(struct.pack(self.indexformat, *e))
1231 1209 ifh.write(cdelta)
1232 1210 self.checkinlinesize(transaction, ifh)
1233 1211 if not self._inline():
1234 1212 dfh = self.opener(self.datafile, "a")
1235 1213 ifh = self.opener(self.indexfile, "a")
1236 1214 else:
1237 1215 dfh.write(cdelta)
1238 1216 ifh.write(struct.pack(self.indexformat, *e))
1239 1217
1240 1218 t, r, chain, prev = r, r + 1, node, node
1241 1219 base = self.base(t)
1242 1220 start = self.start(base)
1243 1221 end = self.end(t)
1244 1222
1245 1223 return node
1246 1224
1247 1225 def strip(self, rev, minlink):
1248 1226 if self.count() == 0 or rev >= self.count():
1249 1227 return
1250 1228
1251 1229 if isinstance(self.index, lazyindex):
1252 1230 self._loadindexmap()
1253 1231
1254 1232 # When stripping away a revision, we need to make sure it
1255 1233 # does not actually belong to an older changeset.
1256 1234 # The minlink parameter defines the oldest revision
1257 1235 # we're allowed to strip away.
1258 1236 while minlink > self.index[rev][-4]:
1259 1237 rev += 1
1260 1238 if rev >= self.count():
1261 1239 return
1262 1240
1263 1241 # first truncate the files on disk
1264 1242 end = self.start(rev)
1265 1243 if not self._inline():
1266 1244 df = self.opener(self.datafile, "a")
1267 1245 df.truncate(end)
1268 1246 end = rev * struct.calcsize(self.indexformat)
1269 1247 else:
1270 1248 end += rev * struct.calcsize(self.indexformat)
1271 1249
1272 1250 indexf = self.opener(self.indexfile, "a")
1273 1251 indexf.truncate(end)
1274 1252
1275 1253 # then reset internal state in memory to forget those revisions
1276 1254 self.cache = None
1277 1255 self._io.chunkcache = None
1278 1256 for x in xrange(rev, self.count()):
1279 1257 del self.nodemap[self.node(x)]
1280 1258
1281 1259 del self.index[rev:]
1282 1260
1283 1261 def checksize(self):
1284 1262 expected = 0
1285 1263 if self.count():
1286 1264 expected = self.end(self.count() - 1)
1287 1265
1288 1266 try:
1289 1267 f = self.opener(self.datafile)
1290 1268 f.seek(0, 2)
1291 1269 actual = f.tell()
1292 1270 dd = actual - expected
1293 1271 except IOError, inst:
1294 1272 if inst.errno != errno.ENOENT:
1295 1273 raise
1296 1274 dd = 0
1297 1275
1298 1276 try:
1299 1277 f = self.opener(self.indexfile)
1300 1278 f.seek(0, 2)
1301 1279 actual = f.tell()
1302 1280 s = struct.calcsize(self.indexformat)
1303 1281 i = actual / s
1304 1282 di = actual - (i * s)
1305 1283 if self._inline():
1306 1284 databytes = 0
1307 1285 for r in xrange(self.count()):
1308 1286 databytes += self.length(r)
1309 1287 dd = 0
1310 1288 di = actual - self.count() * s - databytes
1311 1289 except IOError, inst:
1312 1290 if inst.errno != errno.ENOENT:
1313 1291 raise
1314 1292 di = 0
1315 1293
1316 1294 return (dd, di)
1317 1295
1318 1296
General Comments 0
You need to be logged in to leave comments. Login now