##// END OF EJS Templates
revlog: only allow lazy parsing with revlogng files...
Matt Mackall -
r4976:79c39cc9 default
parent child Browse files
Show More
@@ -1,1296 +1,1293 b''
1 1 """
2 2 revlog.py - storage back-end for mercurial
3 3
4 4 This provides efficient delta storage with O(1) retrieve and append
5 5 and O(changes) merge between branches
6 6
7 7 Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
8 8
9 9 This software may be used and distributed according to the terms
10 10 of the GNU General Public License, incorporated herein by reference.
11 11 """
12 12
13 13 from node import *
14 14 from i18n import _
15 15 import binascii, changegroup, errno, ancestor, mdiff, os
16 16 import sha, struct, util, zlib
17 17
18 18 # revlog version strings
19 19 REVLOGV0 = 0
20 20 REVLOGNG = 1
21 21
22 22 # revlog flags
23 23 REVLOGNGINLINEDATA = (1 << 16)
24 24 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
25 25
26 26 REVLOG_DEFAULT_FORMAT = REVLOGNG
27 27 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
28 28
29 29 def hash(text, p1, p2):
30 30 """generate a hash from the given text and its parent hashes
31 31
32 32 This hash combines both the current file contents and its history
33 33 in a manner that makes it easy to distinguish nodes with the same
34 34 content in the revision graph.
35 35 """
36 36 l = [p1, p2]
37 37 l.sort()
38 38 s = sha.new(l[0])
39 39 s.update(l[1])
40 40 s.update(text)
41 41 return s.digest()
42 42
43 43 def compress(text):
44 44 """ generate a possibly-compressed representation of text """
45 45 if not text: return ("", text)
46 46 if len(text) < 44:
47 47 if text[0] == '\0': return ("", text)
48 48 return ('u', text)
49 49 bin = zlib.compress(text)
50 50 if len(bin) > len(text):
51 51 if text[0] == '\0': return ("", text)
52 52 return ('u', text)
53 53 return ("", bin)
54 54
55 55 def decompress(bin):
56 56 """ decompress the given input """
57 57 if not bin: return bin
58 58 t = bin[0]
59 59 if t == '\0': return bin
60 60 if t == 'x': return zlib.decompress(bin)
61 61 if t == 'u': return bin[1:]
62 62 raise RevlogError(_("unknown compression type %r") % t)
63 63
64 64 indexformatv0 = ">4l20s20s20s"
65 65 v0shaoffset = 56
66 66 # index ng:
67 67 # 6 bytes offset
68 68 # 2 bytes flags
69 69 # 4 bytes compressed length
70 70 # 4 bytes uncompressed length
71 71 # 4 bytes: base rev
72 72 # 4 bytes link rev
73 73 # 4 bytes parent 1 rev
74 74 # 4 bytes parent 2 rev
75 75 # 32 bytes: nodeid
76 76 indexformatng = ">Qiiiiii20s12x"
77 77 ngshaoffset = 32
78 78 versionformat = ">I"
79 79
80 80 class lazyparser(object):
81 81 """
82 82 this class avoids the need to parse the entirety of large indices
83 83 """
84 84
85 85 # lazyparser is not safe to use on windows if win32 extensions not
86 86 # available. it keeps file handle open, which make it not possible
87 87 # to break hardlinks on local cloned repos.
88 88 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
89 89 hasattr(util, 'win32api'))
90 90
91 def __init__(self, dataf, size, indexformat, shaoffset):
91 def __init__(self, dataf, size):
92 92 self.dataf = dataf
93 self.format = indexformat
94 self.s = struct.calcsize(indexformat)
95 self.indexformat = indexformat
93 self.s = struct.calcsize(indexformatng)
96 94 self.datasize = size
97 95 self.l = size/self.s
98 96 self.index = [None] * self.l
99 97 self.map = {nullid: nullrev}
100 98 self.allmap = 0
101 99 self.all = 0
102 100 self.mapfind_count = 0
103 self.shaoffset = shaoffset
104 101
105 102 def loadmap(self):
106 103 """
107 104 during a commit, we need to make sure the rev being added is
108 105 not a duplicate. This requires loading the entire index,
109 106 which is fairly slow. loadmap can load up just the node map,
110 107 which takes much less time.
111 108 """
112 109 if self.allmap: return
113 110 end = self.datasize
114 111 self.allmap = 1
115 112 cur = 0
116 113 count = 0
117 114 blocksize = self.s * 256
118 115 self.dataf.seek(0)
119 116 while cur < end:
120 117 data = self.dataf.read(blocksize)
121 118 off = 0
122 119 for x in xrange(256):
123 n = data[off + self.shaoffset:off + self.shaoffset + 20]
120 n = data[off + ngshaoffset:off + ngshaoffset + 20]
124 121 self.map[n] = count
125 122 count += 1
126 123 if count >= self.l:
127 124 break
128 125 off += self.s
129 126 cur += blocksize
130 127
131 128 def loadblock(self, blockstart, blocksize, data=None):
132 129 if self.all: return
133 130 if data is None:
134 131 self.dataf.seek(blockstart)
135 132 if blockstart + blocksize > self.datasize:
136 133 # the revlog may have grown since we've started running,
137 134 # but we don't have space in self.index for more entries.
138 135 # limit blocksize so that we don't get too much data.
139 136 blocksize = max(self.datasize - blockstart, 0)
140 137 data = self.dataf.read(blocksize)
141 138 lend = len(data) / self.s
142 139 i = blockstart / self.s
143 140 off = 0
144 141 # lazyindex supports __delitem__
145 142 if lend > len(self.index) - i:
146 143 lend = len(self.index) - i
147 144 for x in xrange(lend):
148 145 if self.index[i + x] == None:
149 146 b = data[off : off + self.s]
150 147 self.index[i + x] = b
151 n = b[self.shaoffset:self.shaoffset + 20]
148 n = b[ngshaoffset:ngshaoffset + 20]
152 149 self.map[n] = i + x
153 150 off += self.s
154 151
155 152 def findnode(self, node):
156 153 """search backwards through the index file for a specific node"""
157 154 if self.allmap: return None
158 155
159 156 # hg log will cause many many searches for the manifest
160 157 # nodes. After we get called a few times, just load the whole
161 158 # thing.
162 159 if self.mapfind_count > 8:
163 160 self.loadmap()
164 161 if node in self.map:
165 162 return node
166 163 return None
167 164 self.mapfind_count += 1
168 165 last = self.l - 1
169 166 while self.index[last] != None:
170 167 if last == 0:
171 168 self.all = 1
172 169 self.allmap = 1
173 170 return None
174 171 last -= 1
175 172 end = (last + 1) * self.s
176 173 blocksize = self.s * 256
177 174 while end >= 0:
178 175 start = max(end - blocksize, 0)
179 176 self.dataf.seek(start)
180 177 data = self.dataf.read(end - start)
181 178 findend = end - start
182 179 while True:
183 180 # we're searching backwards, so weh have to make sure
184 181 # we don't find a changeset where this node is a parent
185 182 off = data.rfind(node, 0, findend)
186 183 findend = off
187 184 if off >= 0:
188 185 i = off / self.s
189 186 off = i * self.s
190 n = data[off + self.shaoffset:off + self.shaoffset + 20]
187 n = data[off + ngshaoffset:off + ngshaoffset + 20]
191 188 if n == node:
192 189 self.map[n] = i + start / self.s
193 190 return node
194 191 else:
195 192 break
196 193 end -= blocksize
197 194 return None
198 195
199 196 def loadindex(self, i=None, end=None):
200 197 if self.all: return
201 198 all = False
202 199 if i == None:
203 200 blockstart = 0
204 201 blocksize = (512 / self.s) * self.s
205 202 end = self.datasize
206 203 all = True
207 204 else:
208 205 if end:
209 206 blockstart = i * self.s
210 207 end = end * self.s
211 208 blocksize = end - blockstart
212 209 else:
213 210 blockstart = (i & ~63) * self.s
214 211 blocksize = self.s * 64
215 212 end = blockstart + blocksize
216 213 while blockstart < end:
217 214 self.loadblock(blockstart, blocksize)
218 215 blockstart += blocksize
219 216 if all: self.all = True
220 217
221 218 class lazyindex(object):
222 219 """a lazy version of the index array"""
223 220 def __init__(self, parser):
224 221 self.p = parser
225 222 def __len__(self):
226 223 return len(self.p.index)
227 224 def load(self, pos):
228 225 if pos < 0:
229 226 pos += len(self.p.index)
230 227 self.p.loadindex(pos)
231 228 return self.p.index[pos]
232 229 def __getitem__(self, pos):
233 230 ret = self.p.index[pos] or self.load(pos)
234 231 if isinstance(ret, str):
235 ret = struct.unpack(self.p.indexformat, ret)
232 ret = struct.unpack(indexformatng, ret)
236 233 return ret
237 234 def __setitem__(self, pos, item):
238 235 self.p.index[pos] = item
239 236 def __delitem__(self, pos):
240 237 del self.p.index[pos]
241 238 def append(self, e):
242 239 self.p.index.append(e)
243 240
244 241 class lazymap(object):
245 242 """a lazy version of the node map"""
246 243 def __init__(self, parser):
247 244 self.p = parser
248 245 def load(self, key):
249 246 n = self.p.findnode(key)
250 247 if n == None:
251 248 raise KeyError(key)
252 249 def __contains__(self, key):
253 250 if key in self.p.map:
254 251 return True
255 252 self.p.loadmap()
256 253 return key in self.p.map
257 254 def __iter__(self):
258 255 yield nullid
259 256 for i in xrange(self.p.l):
260 257 ret = self.p.index[i]
261 258 if not ret:
262 259 self.p.loadindex(i)
263 260 ret = self.p.index[i]
264 261 if isinstance(ret, str):
265 ret = struct.unpack(self.p.indexformat, ret)
262 ret = struct.unpack(indexformatng, ret)
266 263 yield ret[-1]
267 264 def __getitem__(self, key):
268 265 try:
269 266 return self.p.map[key]
270 267 except KeyError:
271 268 try:
272 269 self.load(key)
273 270 return self.p.map[key]
274 271 except KeyError:
275 272 raise KeyError("node " + hex(key))
276 273 def __setitem__(self, key, val):
277 274 self.p.map[key] = val
278 275 def __delitem__(self, key):
279 276 del self.p.map[key]
280 277
281 278 class RevlogError(Exception): pass
282 279 class LookupError(RevlogError): pass
283 280
284 281 def getoffset(q):
285 282 if q & 0xFFFF:
286 283 raise RevlogError(_('incompatible revision flag %x') % q)
287 284 return int(q >> 16)
288 285
289 286 def gettype(q):
290 287 return int(q & 0xFFFF)
291 288
292 289 def offset_type(offset, type):
293 290 return long(long(offset) << 16 | type)
294 291
295 292 class revlogoldio(object):
296 293 def __init__(self):
297 294 self.chunkcache = None
298 295
299 296 def parseindex(self, fp, st, inline):
300 297 s = struct.calcsize(indexformatv0)
301 298 index = []
302 299 nodemap = {nullid: nullrev}
303 300 n = off = 0
304 301 data = fp.read()
305 302 l = len(data)
306 303 while off + s <= l:
307 304 cur = data[off:off + s]
308 305 off += s
309 306 e = struct.unpack(indexformatv0, cur)
310 307 index.append(e)
311 308 nodemap[e[-1]] = n
312 309 n += 1
313 310
314 311 return index, nodemap
315 312
316 313 class revlogio(object):
317 314 def __init__(self):
318 315 self.chunkcache = None
319 316
320 317 def parseindex(self, fp, st, inline):
321 318 if (lazyparser.safe_to_use and not inline and
322 319 st and st.st_size > 1000000):
323 320 # big index, let's parse it on demand
324 parser = lazyparser(fp, st.st_size, indexformatng, ngshaoffset)
321 parser = lazyparser(fp, st.st_size)
325 322 index = lazyindex(parser)
326 323 nodemap = lazymap(parser)
327 324 e = list(index[0])
328 325 type = gettype(e[0])
329 326 e[0] = offset_type(0, type)
330 327 index[0] = e
331 328 return index, nodemap
332 329
333 330 s = struct.calcsize(indexformatng)
334 331 index = []
335 332 nodemap = {nullid: nullrev}
336 333 n = off = 0
337 334 # if we're not using lazymap, always read the whole index
338 335 data = fp.read()
339 336 l = len(data)
340 337 if inline:
341 338 self.chunkcache = (0, data)
342 339 while off + s <= l:
343 340 e = struct.unpack(indexformatng, data[off:off + s])
344 341 index.append(e)
345 342 nodemap[e[-1]] = n
346 343 n += 1
347 344 off += s
348 345 if inline:
349 346 if e[1] < 0:
350 347 break
351 348 off += e[1]
352 349
353 350 e = list(index[0])
354 351 type = gettype(e[0])
355 352 e[0] = offset_type(0, type)
356 353 index[0] = e
357 354
358 355 return index, nodemap
359 356
360 357 class revlog(object):
361 358 """
362 359 the underlying revision storage object
363 360
364 361 A revlog consists of two parts, an index and the revision data.
365 362
366 363 The index is a file with a fixed record size containing
367 364 information on each revision, includings its nodeid (hash), the
368 365 nodeids of its parents, the position and offset of its data within
369 366 the data file, and the revision it's based on. Finally, each entry
370 367 contains a linkrev entry that can serve as a pointer to external
371 368 data.
372 369
373 370 The revision data itself is a linear collection of data chunks.
374 371 Each chunk represents a revision and is usually represented as a
375 372 delta against the previous chunk. To bound lookup time, runs of
376 373 deltas are limited to about 2 times the length of the original
377 374 version data. This makes retrieval of a version proportional to
378 375 its size, or O(1) relative to the number of revisions.
379 376
380 377 Both pieces of the revlog are written to in an append-only
381 378 fashion, which means we never need to rewrite a file to insert or
382 379 remove data, and can use some simple techniques to avoid the need
383 380 for locking while reading.
384 381 """
385 382 def __init__(self, opener, indexfile):
386 383 """
387 384 create a revlog object
388 385
389 386 opener is a function that abstracts the file opening operation
390 387 and can be used to implement COW semantics or the like.
391 388 """
392 389 self.indexfile = indexfile
393 390 self.datafile = indexfile[:-2] + ".d"
394 391 self.opener = opener
395 392
396 393 self.indexstat = None
397 394 self.cache = None
398 395 self.defversion = REVLOG_DEFAULT_VERSION
399 396 if hasattr(opener, "defversion"):
400 397 self.defversion = opener.defversion
401 398 if self.defversion & REVLOGNG:
402 399 self.defversion |= REVLOGNGINLINEDATA
403 400 self._load()
404 401
405 402 def _load(self):
406 403 v = self.defversion
407 404 try:
408 405 f = self.opener(self.indexfile)
409 406 i = f.read(4)
410 407 f.seek(0)
411 408 if len(i) > 0:
412 409 v = struct.unpack(versionformat, i)[0]
413 410 except IOError, inst:
414 411 if inst.errno != errno.ENOENT:
415 412 raise
416 413 i = ""
417 414 else:
418 415 try:
419 416 st = util.fstat(f)
420 417 except AttributeError, inst:
421 418 st = None
422 419 else:
423 420 oldst = self.indexstat
424 421 if (oldst and st.st_dev == oldst.st_dev
425 422 and st.st_ino == oldst.st_ino
426 423 and st.st_mtime == oldst.st_mtime
427 424 and st.st_ctime == oldst.st_ctime
428 425 and st.st_size == oldst.st_size):
429 426 return
430 427 self.indexstat = st
431 428 flags = v & ~0xFFFF
432 429 fmt = v & 0xFFFF
433 430 if fmt == REVLOGV0:
434 431 if flags:
435 432 raise RevlogError(_("index %s unknown flags %#04x for format v0")
436 433 % (self.indexfile, flags >> 16))
437 434 elif fmt == REVLOGNG:
438 435 if flags & ~REVLOGNGINLINEDATA:
439 436 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
440 437 % (self.indexfile, flags >> 16))
441 438 else:
442 439 raise RevlogError(_("index %s unknown format %d")
443 440 % (self.indexfile, fmt))
444 441 self.version = v
445 442 self.nodemap = {nullid: nullrev}
446 443 self.index = []
447 444 self._io = revlogio()
448 445 self.indexformat = indexformatng
449 446 if self.version == REVLOGV0:
450 447 self._io = revlogoldio()
451 448 self.indexformat = indexformatv0
452 449 if i:
453 450 self.index, self.nodemap = self._io.parseindex(f, st, self._inline())
454 451
455 452 def _loadindex(self, start, end):
456 453 """load a block of indexes all at once from the lazy parser"""
457 454 if isinstance(self.index, lazyindex):
458 455 self.index.p.loadindex(start, end)
459 456
460 457 def _loadindexmap(self):
461 458 """loads both the map and the index from the lazy parser"""
462 459 if isinstance(self.index, lazyindex):
463 460 p = self.index.p
464 461 p.loadindex()
465 462 self.nodemap = p.map
466 463
467 464 def _loadmap(self):
468 465 """loads the map from the lazy parser"""
469 466 if isinstance(self.nodemap, lazymap):
470 467 self.nodemap.p.loadmap()
471 468 self.nodemap = self.nodemap.p.map
472 469
473 470 def _inline(self): return self.version & REVLOGNGINLINEDATA
474 471
475 472 def tip(self): return self.node(len(self.index) - 1)
476 473 def count(self): return len(self.index)
477 474 def node(self, rev):
478 475 return rev == nullrev and nullid or self.index[rev][-1]
479 476 def rev(self, node):
480 477 try:
481 478 return self.nodemap[node]
482 479 except KeyError:
483 480 raise LookupError(_('%s: no node %s') % (self.indexfile, hex(node)))
484 481 def linkrev(self, node):
485 482 return (node == nullid) and nullrev or self.index[self.rev(node)][-4]
486 483 def parents(self, node):
487 484 if node == nullid: return (nullid, nullid)
488 485 r = self.rev(node)
489 486 d = self.index[r][-3:-1]
490 487 if self.version == REVLOGV0:
491 488 return d
492 489 return (self.node(d[0]), self.node(d[1]))
493 490 def parentrevs(self, rev):
494 491 if rev == nullrev:
495 492 return (nullrev, nullrev)
496 493 d = self.index[rev][-3:-1]
497 494 if self.version == REVLOGV0:
498 495 return (self.rev(d[0]), self.rev(d[1]))
499 496 return d
500 497 def start(self, rev):
501 498 if rev == nullrev:
502 499 return 0
503 500 if self.version != REVLOGV0:
504 501 return getoffset(self.index[rev][0])
505 502 return self.index[rev][0]
506 503
507 504 def end(self, rev): return self.start(rev) + self.length(rev)
508 505
509 506 def size(self, rev):
510 507 """return the length of the uncompressed text for a given revision"""
511 508 if rev == nullrev:
512 509 return 0
513 510 l = -1
514 511 if self.version != REVLOGV0:
515 512 l = self.index[rev][2]
516 513 if l >= 0:
517 514 return l
518 515
519 516 t = self.revision(self.node(rev))
520 517 return len(t)
521 518
522 519 # alternate implementation, The advantage to this code is it
523 520 # will be faster for a single revision. But, the results are not
524 521 # cached, so finding the size of every revision will be slower.
525 522 """
526 523 if self.cache and self.cache[1] == rev:
527 524 return len(self.cache[2])
528 525
529 526 base = self.base(rev)
530 527 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
531 528 base = self.cache[1]
532 529 text = self.cache[2]
533 530 else:
534 531 text = self.revision(self.node(base))
535 532
536 533 l = len(text)
537 534 for x in xrange(base + 1, rev + 1):
538 535 l = mdiff.patchedsize(l, self.chunk(x))
539 536 return l
540 537 """
541 538
542 539 def length(self, rev):
543 540 if rev == nullrev:
544 541 return 0
545 542 else:
546 543 return self.index[rev][1]
547 544 def base(self, rev):
548 545 if (rev == nullrev):
549 546 return nullrev
550 547 else:
551 548 return self.index[rev][-5]
552 549
553 550 def reachable(self, node, stop=None):
554 551 """return a hash of all nodes ancestral to a given node, including
555 552 the node itself, stopping when stop is matched"""
556 553 reachable = {}
557 554 visit = [node]
558 555 reachable[node] = 1
559 556 if stop:
560 557 stopn = self.rev(stop)
561 558 else:
562 559 stopn = 0
563 560 while visit:
564 561 n = visit.pop(0)
565 562 if n == stop:
566 563 continue
567 564 if n == nullid:
568 565 continue
569 566 for p in self.parents(n):
570 567 if self.rev(p) < stopn:
571 568 continue
572 569 if p not in reachable:
573 570 reachable[p] = 1
574 571 visit.append(p)
575 572 return reachable
576 573
577 574 def nodesbetween(self, roots=None, heads=None):
578 575 """Return a tuple containing three elements. Elements 1 and 2 contain
579 576 a final list bases and heads after all the unreachable ones have been
580 577 pruned. Element 0 contains a topologically sorted list of all
581 578
582 579 nodes that satisfy these constraints:
583 580 1. All nodes must be descended from a node in roots (the nodes on
584 581 roots are considered descended from themselves).
585 582 2. All nodes must also be ancestors of a node in heads (the nodes in
586 583 heads are considered to be their own ancestors).
587 584
588 585 If roots is unspecified, nullid is assumed as the only root.
589 586 If heads is unspecified, it is taken to be the output of the
590 587 heads method (i.e. a list of all nodes in the repository that
591 588 have no children)."""
592 589 nonodes = ([], [], [])
593 590 if roots is not None:
594 591 roots = list(roots)
595 592 if not roots:
596 593 return nonodes
597 594 lowestrev = min([self.rev(n) for n in roots])
598 595 else:
599 596 roots = [nullid] # Everybody's a descendent of nullid
600 597 lowestrev = nullrev
601 598 if (lowestrev == nullrev) and (heads is None):
602 599 # We want _all_ the nodes!
603 600 return ([self.node(r) for r in xrange(0, self.count())],
604 601 [nullid], list(self.heads()))
605 602 if heads is None:
606 603 # All nodes are ancestors, so the latest ancestor is the last
607 604 # node.
608 605 highestrev = self.count() - 1
609 606 # Set ancestors to None to signal that every node is an ancestor.
610 607 ancestors = None
611 608 # Set heads to an empty dictionary for later discovery of heads
612 609 heads = {}
613 610 else:
614 611 heads = list(heads)
615 612 if not heads:
616 613 return nonodes
617 614 ancestors = {}
618 615 # Turn heads into a dictionary so we can remove 'fake' heads.
619 616 # Also, later we will be using it to filter out the heads we can't
620 617 # find from roots.
621 618 heads = dict.fromkeys(heads, 0)
622 619 # Start at the top and keep marking parents until we're done.
623 620 nodestotag = heads.keys()
624 621 # Remember where the top was so we can use it as a limit later.
625 622 highestrev = max([self.rev(n) for n in nodestotag])
626 623 while nodestotag:
627 624 # grab a node to tag
628 625 n = nodestotag.pop()
629 626 # Never tag nullid
630 627 if n == nullid:
631 628 continue
632 629 # A node's revision number represents its place in a
633 630 # topologically sorted list of nodes.
634 631 r = self.rev(n)
635 632 if r >= lowestrev:
636 633 if n not in ancestors:
637 634 # If we are possibly a descendent of one of the roots
638 635 # and we haven't already been marked as an ancestor
639 636 ancestors[n] = 1 # Mark as ancestor
640 637 # Add non-nullid parents to list of nodes to tag.
641 638 nodestotag.extend([p for p in self.parents(n) if
642 639 p != nullid])
643 640 elif n in heads: # We've seen it before, is it a fake head?
644 641 # So it is, real heads should not be the ancestors of
645 642 # any other heads.
646 643 heads.pop(n)
647 644 if not ancestors:
648 645 return nonodes
649 646 # Now that we have our set of ancestors, we want to remove any
650 647 # roots that are not ancestors.
651 648
652 649 # If one of the roots was nullid, everything is included anyway.
653 650 if lowestrev > nullrev:
654 651 # But, since we weren't, let's recompute the lowest rev to not
655 652 # include roots that aren't ancestors.
656 653
657 654 # Filter out roots that aren't ancestors of heads
658 655 roots = [n for n in roots if n in ancestors]
659 656 # Recompute the lowest revision
660 657 if roots:
661 658 lowestrev = min([self.rev(n) for n in roots])
662 659 else:
663 660 # No more roots? Return empty list
664 661 return nonodes
665 662 else:
666 663 # We are descending from nullid, and don't need to care about
667 664 # any other roots.
668 665 lowestrev = nullrev
669 666 roots = [nullid]
670 667 # Transform our roots list into a 'set' (i.e. a dictionary where the
671 668 # values don't matter.
672 669 descendents = dict.fromkeys(roots, 1)
673 670 # Also, keep the original roots so we can filter out roots that aren't
674 671 # 'real' roots (i.e. are descended from other roots).
675 672 roots = descendents.copy()
676 673 # Our topologically sorted list of output nodes.
677 674 orderedout = []
678 675 # Don't start at nullid since we don't want nullid in our output list,
679 676 # and if nullid shows up in descedents, empty parents will look like
680 677 # they're descendents.
681 678 for r in xrange(max(lowestrev, 0), highestrev + 1):
682 679 n = self.node(r)
683 680 isdescendent = False
684 681 if lowestrev == nullrev: # Everybody is a descendent of nullid
685 682 isdescendent = True
686 683 elif n in descendents:
687 684 # n is already a descendent
688 685 isdescendent = True
689 686 # This check only needs to be done here because all the roots
690 687 # will start being marked is descendents before the loop.
691 688 if n in roots:
692 689 # If n was a root, check if it's a 'real' root.
693 690 p = tuple(self.parents(n))
694 691 # If any of its parents are descendents, it's not a root.
695 692 if (p[0] in descendents) or (p[1] in descendents):
696 693 roots.pop(n)
697 694 else:
698 695 p = tuple(self.parents(n))
699 696 # A node is a descendent if either of its parents are
700 697 # descendents. (We seeded the dependents list with the roots
701 698 # up there, remember?)
702 699 if (p[0] in descendents) or (p[1] in descendents):
703 700 descendents[n] = 1
704 701 isdescendent = True
705 702 if isdescendent and ((ancestors is None) or (n in ancestors)):
706 703 # Only include nodes that are both descendents and ancestors.
707 704 orderedout.append(n)
708 705 if (ancestors is not None) and (n in heads):
709 706 # We're trying to figure out which heads are reachable
710 707 # from roots.
711 708 # Mark this head as having been reached
712 709 heads[n] = 1
713 710 elif ancestors is None:
714 711 # Otherwise, we're trying to discover the heads.
715 712 # Assume this is a head because if it isn't, the next step
716 713 # will eventually remove it.
717 714 heads[n] = 1
718 715 # But, obviously its parents aren't.
719 716 for p in self.parents(n):
720 717 heads.pop(p, None)
721 718 heads = [n for n in heads.iterkeys() if heads[n] != 0]
722 719 roots = roots.keys()
723 720 assert orderedout
724 721 assert roots
725 722 assert heads
726 723 return (orderedout, roots, heads)
727 724
728 725 def heads(self, start=None, stop=None):
729 726 """return the list of all nodes that have no children
730 727
731 728 if start is specified, only heads that are descendants of
732 729 start will be returned
733 730 if stop is specified, it will consider all the revs from stop
734 731 as if they had no children
735 732 """
736 733 if start is None:
737 734 start = nullid
738 735 if stop is None:
739 736 stop = []
740 737 stoprevs = dict.fromkeys([self.rev(n) for n in stop])
741 738 startrev = self.rev(start)
742 739 reachable = {startrev: 1}
743 740 heads = {startrev: 1}
744 741
745 742 parentrevs = self.parentrevs
746 743 for r in xrange(startrev + 1, self.count()):
747 744 for p in parentrevs(r):
748 745 if p in reachable:
749 746 if r not in stoprevs:
750 747 reachable[r] = 1
751 748 heads[r] = 1
752 749 if p in heads and p not in stoprevs:
753 750 del heads[p]
754 751
755 752 return [self.node(r) for r in heads]
756 753
757 754 def children(self, node):
758 755 """find the children of a given node"""
759 756 c = []
760 757 p = self.rev(node)
761 758 for r in range(p + 1, self.count()):
762 759 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
763 760 if prevs:
764 761 for pr in prevs:
765 762 if pr == p:
766 763 c.append(self.node(r))
767 764 elif p == nullrev:
768 765 c.append(self.node(r))
769 766 return c
770 767
771 768 def _match(self, id):
772 769 if isinstance(id, (long, int)):
773 770 # rev
774 771 return self.node(id)
775 772 if len(id) == 20:
776 773 # possibly a binary node
777 774 # odds of a binary node being all hex in ASCII are 1 in 10**25
778 775 try:
779 776 node = id
780 777 r = self.rev(node) # quick search the index
781 778 return node
782 779 except LookupError:
783 780 pass # may be partial hex id
784 781 try:
785 782 # str(rev)
786 783 rev = int(id)
787 784 if str(rev) != id: raise ValueError
788 785 if rev < 0: rev = self.count() + rev
789 786 if rev < 0 or rev >= self.count(): raise ValueError
790 787 return self.node(rev)
791 788 except (ValueError, OverflowError):
792 789 pass
793 790 if len(id) == 40:
794 791 try:
795 792 # a full hex nodeid?
796 793 node = bin(id)
797 794 r = self.rev(node)
798 795 return node
799 796 except TypeError:
800 797 pass
801 798
802 799 def _partialmatch(self, id):
803 800 if len(id) < 40:
804 801 try:
805 802 # hex(node)[:...]
806 803 bin_id = bin(id[:len(id) & ~1]) # grab an even number of digits
807 804 node = None
808 805 for n in self.nodemap:
809 806 if n.startswith(bin_id) and hex(n).startswith(id):
810 807 if node is not None:
811 808 raise LookupError(_("Ambiguous identifier"))
812 809 node = n
813 810 if node is not None:
814 811 return node
815 812 except TypeError:
816 813 pass
817 814
818 815 def lookup(self, id):
819 816 """locate a node based on:
820 817 - revision number or str(revision number)
821 818 - nodeid or subset of hex nodeid
822 819 """
823 820
824 821 n = self._match(id)
825 822 if n is not None:
826 823 return n
827 824 n = self._partialmatch(id)
828 825 if n:
829 826 return n
830 827
831 828 raise LookupError(_("No match found"))
832 829
833 830 def cmp(self, node, text):
834 831 """compare text with a given file revision"""
835 832 p1, p2 = self.parents(node)
836 833 return hash(text, p1, p2) != node
837 834
838 835 def diff(self, a, b):
839 836 """return a delta between two revisions"""
840 837 return mdiff.textdiff(a, b)
841 838
842 839 def patches(self, t, pl):
843 840 """apply a list of patches to a string"""
844 841 return mdiff.patches(t, pl)
845 842
846 843 def chunk(self, rev, df=None, cachelen=4096):
847 844 start, length = self.start(rev), self.length(rev)
848 845 inline = self._inline()
849 846 if inline:
850 847 start += (rev + 1) * struct.calcsize(self.indexformat)
851 848 end = start + length
852 849 def loadcache(df):
853 850 cache_length = max(cachelen, length) # 4k
854 851 if not df:
855 852 if inline:
856 853 df = self.opener(self.indexfile)
857 854 else:
858 855 df = self.opener(self.datafile)
859 856 df.seek(start)
860 857 self._io.chunkcache = (start, df.read(cache_length))
861 858
862 859 if not self._io.chunkcache:
863 860 loadcache(df)
864 861
865 862 cache_start = self._io.chunkcache[0]
866 863 cache_end = cache_start + len(self._io.chunkcache[1])
867 864 if start >= cache_start and end <= cache_end:
868 865 # it is cached
869 866 offset = start - cache_start
870 867 else:
871 868 loadcache(df)
872 869 offset = 0
873 870
874 871 #def checkchunk():
875 872 # df = self.opener(self.datafile)
876 873 # df.seek(start)
877 874 # return df.read(length)
878 875 #assert s == checkchunk()
879 876 return decompress(self._io.chunkcache[1][offset:offset + length])
880 877
881 878 def delta(self, node):
882 879 """return or calculate a delta between a node and its predecessor"""
883 880 r = self.rev(node)
884 881 return self.revdiff(r - 1, r)
885 882
886 883 def revdiff(self, rev1, rev2):
887 884 """return or calculate a delta between two revisions"""
888 885 b1 = self.base(rev1)
889 886 b2 = self.base(rev2)
890 887 if b1 == b2 and rev1 + 1 == rev2:
891 888 return self.chunk(rev2)
892 889 else:
893 890 return self.diff(self.revision(self.node(rev1)),
894 891 self.revision(self.node(rev2)))
895 892
896 893 def revision(self, node):
897 894 """return an uncompressed revision of a given"""
898 895 if node == nullid: return ""
899 896 if self.cache and self.cache[0] == node: return self.cache[2]
900 897
901 898 # look up what we need to read
902 899 text = None
903 900 rev = self.rev(node)
904 901 base = self.base(rev)
905 902
906 903 if self._inline():
907 904 # we probably have the whole chunk cached
908 905 df = None
909 906 else:
910 907 df = self.opener(self.datafile)
911 908
912 909 # do we have useful data cached?
913 910 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
914 911 base = self.cache[1]
915 912 text = self.cache[2]
916 913 self._loadindex(base, rev + 1)
917 914 else:
918 915 self._loadindex(base, rev + 1)
919 916 text = self.chunk(base, df=df)
920 917
921 918 bins = []
922 919 for r in xrange(base + 1, rev + 1):
923 920 bins.append(self.chunk(r, df=df))
924 921
925 922 text = self.patches(text, bins)
926 923
927 924 p1, p2 = self.parents(node)
928 925 if node != hash(text, p1, p2):
929 926 raise RevlogError(_("integrity check failed on %s:%d")
930 927 % (self.datafile, rev))
931 928
932 929 self.cache = (node, rev, text)
933 930 return text
934 931
935 932 def checkinlinesize(self, tr, fp=None):
936 933 if not self._inline():
937 934 return
938 935 if not fp:
939 936 fp = self.opener(self.indexfile, 'r')
940 937 fp.seek(0, 2)
941 938 size = fp.tell()
942 939 if size < 131072:
943 940 return
944 941 trinfo = tr.find(self.indexfile)
945 942 if trinfo == None:
946 943 raise RevlogError(_("%s not found in the transaction")
947 944 % self.indexfile)
948 945
949 946 trindex = trinfo[2]
950 947 dataoff = self.start(trindex)
951 948
952 949 tr.add(self.datafile, dataoff)
953 950 df = self.opener(self.datafile, 'w')
954 951 calc = struct.calcsize(self.indexformat)
955 952 for r in xrange(self.count()):
956 953 start = self.start(r) + (r + 1) * calc
957 954 length = self.length(r)
958 955 fp.seek(start)
959 956 d = fp.read(length)
960 957 df.write(d)
961 958 fp.close()
962 959 df.close()
963 960 fp = self.opener(self.indexfile, 'w', atomictemp=True)
964 961 self.version &= ~(REVLOGNGINLINEDATA)
965 962 if self.count():
966 963 x = self.index[0]
967 964 e = struct.pack(self.indexformat, *x)[4:]
968 965 l = struct.pack(versionformat, self.version)
969 966 fp.write(l)
970 967 fp.write(e)
971 968
972 969 for i in xrange(1, self.count()):
973 970 x = self.index[i]
974 971 e = struct.pack(self.indexformat, *x)
975 972 fp.write(e)
976 973
977 974 # if we don't call rename, the temp file will never replace the
978 975 # real index
979 976 fp.rename()
980 977
981 978 tr.replace(self.indexfile, trindex * calc)
982 979 self._io.chunkcache = None
983 980
984 981 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
985 982 """add a revision to the log
986 983
987 984 text - the revision data to add
988 985 transaction - the transaction object used for rollback
989 986 link - the linkrev data to add
990 987 p1, p2 - the parent nodeids of the revision
991 988 d - an optional precomputed delta
992 989 """
993 990 if not self._inline():
994 991 dfh = self.opener(self.datafile, "a")
995 992 else:
996 993 dfh = None
997 994 ifh = self.opener(self.indexfile, "a+")
998 995 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
999 996
1000 997 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1001 998 if text is None: text = ""
1002 999 if p1 is None: p1 = self.tip()
1003 1000 if p2 is None: p2 = nullid
1004 1001
1005 1002 node = hash(text, p1, p2)
1006 1003
1007 1004 if node in self.nodemap:
1008 1005 return node
1009 1006
1010 1007 n = self.count()
1011 1008 t = n - 1
1012 1009
1013 1010 if n:
1014 1011 base = self.base(t)
1015 1012 start = self.start(base)
1016 1013 end = self.end(t)
1017 1014 if not d:
1018 1015 prev = self.revision(self.tip())
1019 1016 d = self.diff(prev, text)
1020 1017 data = compress(d)
1021 1018 l = len(data[1]) + len(data[0])
1022 1019 dist = end - start + l
1023 1020
1024 1021 # full versions are inserted when the needed deltas
1025 1022 # become comparable to the uncompressed text
1026 1023 if not n or dist > len(text) * 2:
1027 1024 data = compress(text)
1028 1025 l = len(data[1]) + len(data[0])
1029 1026 base = n
1030 1027 else:
1031 1028 base = self.base(t)
1032 1029
1033 1030 offset = 0
1034 1031 if t >= 0:
1035 1032 offset = self.end(t)
1036 1033
1037 1034 if self.version == REVLOGV0:
1038 1035 e = (offset, l, base, link, p1, p2, node)
1039 1036 else:
1040 1037 e = (offset_type(offset, 0), l, len(text),
1041 1038 base, link, self.rev(p1), self.rev(p2), node)
1042 1039
1043 1040 self.index.append(e)
1044 1041 self.nodemap[node] = n
1045 1042 entry = struct.pack(self.indexformat, *e)
1046 1043
1047 1044 if not self._inline():
1048 1045 transaction.add(self.datafile, offset)
1049 1046 transaction.add(self.indexfile, n * len(entry))
1050 1047 if data[0]:
1051 1048 dfh.write(data[0])
1052 1049 dfh.write(data[1])
1053 1050 dfh.flush()
1054 1051 else:
1055 1052 ifh.seek(0, 2)
1056 1053 transaction.add(self.indexfile, ifh.tell(), self.count() - 1)
1057 1054
1058 1055 if len(self.index) == 1 and self.version != REVLOGV0:
1059 1056 l = struct.pack(versionformat, self.version)
1060 1057 ifh.write(l)
1061 1058 entry = entry[4:]
1062 1059
1063 1060 ifh.write(entry)
1064 1061
1065 1062 if self._inline():
1066 1063 ifh.write(data[0])
1067 1064 ifh.write(data[1])
1068 1065 self.checkinlinesize(transaction, ifh)
1069 1066
1070 1067 self.cache = (node, n, text)
1071 1068 return node
1072 1069
1073 1070 def ancestor(self, a, b):
1074 1071 """calculate the least common ancestor of nodes a and b"""
1075 1072
1076 1073 def parents(rev):
1077 1074 return [p for p in self.parentrevs(rev) if p != nullrev]
1078 1075
1079 1076 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1080 1077 if c is None:
1081 1078 return nullid
1082 1079
1083 1080 return self.node(c)
1084 1081
1085 1082 def group(self, nodelist, lookup, infocollect=None):
1086 1083 """calculate a delta group
1087 1084
1088 1085 Given a list of changeset revs, return a set of deltas and
1089 1086 metadata corresponding to nodes. the first delta is
1090 1087 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1091 1088 have this parent as it has all history before these
1092 1089 changesets. parent is parent[0]
1093 1090 """
1094 1091 revs = [self.rev(n) for n in nodelist]
1095 1092
1096 1093 # if we don't have any revisions touched by these changesets, bail
1097 1094 if not revs:
1098 1095 yield changegroup.closechunk()
1099 1096 return
1100 1097
1101 1098 # add the parent of the first rev
1102 1099 p = self.parents(self.node(revs[0]))[0]
1103 1100 revs.insert(0, self.rev(p))
1104 1101
1105 1102 # build deltas
1106 1103 for d in xrange(0, len(revs) - 1):
1107 1104 a, b = revs[d], revs[d + 1]
1108 1105 nb = self.node(b)
1109 1106
1110 1107 if infocollect is not None:
1111 1108 infocollect(nb)
1112 1109
1113 1110 d = self.revdiff(a, b)
1114 1111 p = self.parents(nb)
1115 1112 meta = nb + p[0] + p[1] + lookup(nb)
1116 1113 yield changegroup.genchunk("%s%s" % (meta, d))
1117 1114
1118 1115 yield changegroup.closechunk()
1119 1116
1120 1117 def addgroup(self, revs, linkmapper, transaction, unique=0):
1121 1118 """
1122 1119 add a delta group
1123 1120
1124 1121 given a set of deltas, add them to the revision log. the
1125 1122 first delta is against its parent, which should be in our
1126 1123 log, the rest are against the previous delta.
1127 1124 """
1128 1125
1129 1126 #track the base of the current delta log
1130 1127 r = self.count()
1131 1128 t = r - 1
1132 1129 node = None
1133 1130
1134 1131 base = prev = nullrev
1135 1132 start = end = textlen = 0
1136 1133 if r:
1137 1134 end = self.end(t)
1138 1135
1139 1136 ifh = self.opener(self.indexfile, "a+")
1140 1137 ifh.seek(0, 2)
1141 1138 transaction.add(self.indexfile, ifh.tell(), self.count())
1142 1139 if self._inline():
1143 1140 dfh = None
1144 1141 else:
1145 1142 transaction.add(self.datafile, end)
1146 1143 dfh = self.opener(self.datafile, "a")
1147 1144
1148 1145 # loop through our set of deltas
1149 1146 chain = None
1150 1147 for chunk in revs:
1151 1148 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1152 1149 link = linkmapper(cs)
1153 1150 if node in self.nodemap:
1154 1151 # this can happen if two branches make the same change
1155 1152 # if unique:
1156 1153 # raise RevlogError(_("already have %s") % hex(node[:4]))
1157 1154 chain = node
1158 1155 continue
1159 1156 delta = chunk[80:]
1160 1157
1161 1158 for p in (p1, p2):
1162 1159 if not p in self.nodemap:
1163 1160 raise LookupError(_("unknown parent %s") % short(p))
1164 1161
1165 1162 if not chain:
1166 1163 # retrieve the parent revision of the delta chain
1167 1164 chain = p1
1168 1165 if not chain in self.nodemap:
1169 1166 raise LookupError(_("unknown base %s") % short(chain[:4]))
1170 1167
1171 1168 # full versions are inserted when the needed deltas become
1172 1169 # comparable to the uncompressed text or when the previous
1173 1170 # version is not the one we have a delta against. We use
1174 1171 # the size of the previous full rev as a proxy for the
1175 1172 # current size.
1176 1173
1177 1174 if chain == prev:
1178 1175 tempd = compress(delta)
1179 1176 cdelta = tempd[0] + tempd[1]
1180 1177 textlen = mdiff.patchedsize(textlen, delta)
1181 1178
1182 1179 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
1183 1180 # flush our writes here so we can read it in revision
1184 1181 if dfh:
1185 1182 dfh.flush()
1186 1183 ifh.flush()
1187 1184 text = self.revision(chain)
1188 1185 text = self.patches(text, [delta])
1189 1186 chk = self._addrevision(text, transaction, link, p1, p2, None,
1190 1187 ifh, dfh)
1191 1188 if not dfh and not self._inline():
1192 1189 # addrevision switched from inline to conventional
1193 1190 # reopen the index
1194 1191 dfh = self.opener(self.datafile, "a")
1195 1192 ifh = self.opener(self.indexfile, "a")
1196 1193 if chk != node:
1197 1194 raise RevlogError(_("consistency error adding group"))
1198 1195 textlen = len(text)
1199 1196 else:
1200 1197 if self.version == REVLOGV0:
1201 1198 e = (end, len(cdelta), base, link, p1, p2, node)
1202 1199 else:
1203 1200 e = (offset_type(end, 0), len(cdelta), textlen, base,
1204 1201 link, self.rev(p1), self.rev(p2), node)
1205 1202 self.index.append(e)
1206 1203 self.nodemap[node] = r
1207 1204 if self._inline():
1208 1205 ifh.write(struct.pack(self.indexformat, *e))
1209 1206 ifh.write(cdelta)
1210 1207 self.checkinlinesize(transaction, ifh)
1211 1208 if not self._inline():
1212 1209 dfh = self.opener(self.datafile, "a")
1213 1210 ifh = self.opener(self.indexfile, "a")
1214 1211 else:
1215 1212 dfh.write(cdelta)
1216 1213 ifh.write(struct.pack(self.indexformat, *e))
1217 1214
1218 1215 t, r, chain, prev = r, r + 1, node, node
1219 1216 base = self.base(t)
1220 1217 start = self.start(base)
1221 1218 end = self.end(t)
1222 1219
1223 1220 return node
1224 1221
1225 1222 def strip(self, rev, minlink):
1226 1223 if self.count() == 0 or rev >= self.count():
1227 1224 return
1228 1225
1229 1226 if isinstance(self.index, lazyindex):
1230 1227 self._loadindexmap()
1231 1228
1232 1229 # When stripping away a revision, we need to make sure it
1233 1230 # does not actually belong to an older changeset.
1234 1231 # The minlink parameter defines the oldest revision
1235 1232 # we're allowed to strip away.
1236 1233 while minlink > self.index[rev][-4]:
1237 1234 rev += 1
1238 1235 if rev >= self.count():
1239 1236 return
1240 1237
1241 1238 # first truncate the files on disk
1242 1239 end = self.start(rev)
1243 1240 if not self._inline():
1244 1241 df = self.opener(self.datafile, "a")
1245 1242 df.truncate(end)
1246 1243 end = rev * struct.calcsize(self.indexformat)
1247 1244 else:
1248 1245 end += rev * struct.calcsize(self.indexformat)
1249 1246
1250 1247 indexf = self.opener(self.indexfile, "a")
1251 1248 indexf.truncate(end)
1252 1249
1253 1250 # then reset internal state in memory to forget those revisions
1254 1251 self.cache = None
1255 1252 self._io.chunkcache = None
1256 1253 for x in xrange(rev, self.count()):
1257 1254 del self.nodemap[self.node(x)]
1258 1255
1259 1256 del self.index[rev:]
1260 1257
1261 1258 def checksize(self):
1262 1259 expected = 0
1263 1260 if self.count():
1264 1261 expected = self.end(self.count() - 1)
1265 1262
1266 1263 try:
1267 1264 f = self.opener(self.datafile)
1268 1265 f.seek(0, 2)
1269 1266 actual = f.tell()
1270 1267 dd = actual - expected
1271 1268 except IOError, inst:
1272 1269 if inst.errno != errno.ENOENT:
1273 1270 raise
1274 1271 dd = 0
1275 1272
1276 1273 try:
1277 1274 f = self.opener(self.indexfile)
1278 1275 f.seek(0, 2)
1279 1276 actual = f.tell()
1280 1277 s = struct.calcsize(self.indexformat)
1281 1278 i = actual / s
1282 1279 di = actual - (i * s)
1283 1280 if self._inline():
1284 1281 databytes = 0
1285 1282 for r in xrange(self.count()):
1286 1283 databytes += self.length(r)
1287 1284 dd = 0
1288 1285 di = actual - self.count() * s - databytes
1289 1286 except IOError, inst:
1290 1287 if inst.errno != errno.ENOENT:
1291 1288 raise
1292 1289 di = 0
1293 1290
1294 1291 return (dd, di)
1295 1292
1296 1293
General Comments 0
You need to be logged in to leave comments. Login now