##// END OF EJS Templates
revlog: regroup parsing code
Matt Mackall -
r4971:3e6dae27 default
parent child Browse files
Show More
@@ -1,1292 +1,1297 b''
1 1 """
2 2 revlog.py - storage back-end for mercurial
3 3
4 4 This provides efficient delta storage with O(1) retrieve and append
5 5 and O(changes) merge between branches
6 6
7 7 Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
8 8
9 9 This software may be used and distributed according to the terms
10 10 of the GNU General Public License, incorporated herein by reference.
11 11 """
12 12
13 13 from node import *
14 14 from i18n import _
15 15 import binascii, changegroup, errno, ancestor, mdiff, os
16 16 import sha, struct, util, zlib
17 17
18 18 # revlog version strings
19 19 REVLOGV0 = 0
20 20 REVLOGNG = 1
21 21
22 22 # revlog flags
23 23 REVLOGNGINLINEDATA = (1 << 16)
24 24 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
25 25
26 26 REVLOG_DEFAULT_FORMAT = REVLOGNG
27 27 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
28 28
29 29 def hash(text, p1, p2):
30 30 """generate a hash from the given text and its parent hashes
31 31
32 32 This hash combines both the current file contents and its history
33 33 in a manner that makes it easy to distinguish nodes with the same
34 34 content in the revision graph.
35 35 """
36 36 l = [p1, p2]
37 37 l.sort()
38 38 s = sha.new(l[0])
39 39 s.update(l[1])
40 40 s.update(text)
41 41 return s.digest()
42 42
43 43 def compress(text):
44 44 """ generate a possibly-compressed representation of text """
45 45 if not text: return ("", text)
46 46 if len(text) < 44:
47 47 if text[0] == '\0': return ("", text)
48 48 return ('u', text)
49 49 bin = zlib.compress(text)
50 50 if len(bin) > len(text):
51 51 if text[0] == '\0': return ("", text)
52 52 return ('u', text)
53 53 return ("", bin)
54 54
55 55 def decompress(bin):
56 56 """ decompress the given input """
57 57 if not bin: return bin
58 58 t = bin[0]
59 59 if t == '\0': return bin
60 60 if t == 'x': return zlib.decompress(bin)
61 61 if t == 'u': return bin[1:]
62 62 raise RevlogError(_("unknown compression type %r") % t)
63 63
64 64 indexformatv0 = ">4l20s20s20s"
65 65 v0shaoffset = 56
66 66 # index ng:
67 67 # 6 bytes offset
68 68 # 2 bytes flags
69 69 # 4 bytes compressed length
70 70 # 4 bytes uncompressed length
71 71 # 4 bytes: base rev
72 72 # 4 bytes link rev
73 73 # 4 bytes parent 1 rev
74 74 # 4 bytes parent 2 rev
75 75 # 32 bytes: nodeid
76 76 indexformatng = ">Qiiiiii20s12x"
77 77 ngshaoffset = 32
78 78 versionformat = ">I"
79 79
80 80 class lazyparser(object):
81 81 """
82 82 this class avoids the need to parse the entirety of large indices
83 83 """
84 84
85 85 # lazyparser is not safe to use on windows if win32 extensions not
86 86 # available. it keeps file handle open, which make it not possible
87 87 # to break hardlinks on local cloned repos.
88 88 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
89 89 hasattr(util, 'win32api'))
90 90
91 91 def __init__(self, dataf, size, indexformat, shaoffset):
92 92 self.dataf = dataf
93 93 self.format = indexformat
94 94 self.s = struct.calcsize(indexformat)
95 95 self.indexformat = indexformat
96 96 self.datasize = size
97 97 self.l = size/self.s
98 98 self.index = [None] * self.l
99 99 self.map = {nullid: nullrev}
100 100 self.allmap = 0
101 101 self.all = 0
102 102 self.mapfind_count = 0
103 103 self.shaoffset = shaoffset
104 104
105 105 def loadmap(self):
106 106 """
107 107 during a commit, we need to make sure the rev being added is
108 108 not a duplicate. This requires loading the entire index,
109 109 which is fairly slow. loadmap can load up just the node map,
110 110 which takes much less time.
111 111 """
112 112 if self.allmap: return
113 113 end = self.datasize
114 114 self.allmap = 1
115 115 cur = 0
116 116 count = 0
117 117 blocksize = self.s * 256
118 118 self.dataf.seek(0)
119 119 while cur < end:
120 120 data = self.dataf.read(blocksize)
121 121 off = 0
122 122 for x in xrange(256):
123 123 n = data[off + self.shaoffset:off + self.shaoffset + 20]
124 124 self.map[n] = count
125 125 count += 1
126 126 if count >= self.l:
127 127 break
128 128 off += self.s
129 129 cur += blocksize
130 130
131 131 def loadblock(self, blockstart, blocksize, data=None):
132 132 if self.all: return
133 133 if data is None:
134 134 self.dataf.seek(blockstart)
135 135 if blockstart + blocksize > self.datasize:
136 136 # the revlog may have grown since we've started running,
137 137 # but we don't have space in self.index for more entries.
138 138 # limit blocksize so that we don't get too much data.
139 139 blocksize = max(self.datasize - blockstart, 0)
140 140 data = self.dataf.read(blocksize)
141 141 lend = len(data) / self.s
142 142 i = blockstart / self.s
143 143 off = 0
144 144 # lazyindex supports __delitem__
145 145 if lend > len(self.index) - i:
146 146 lend = len(self.index) - i
147 147 for x in xrange(lend):
148 148 if self.index[i + x] == None:
149 149 b = data[off : off + self.s]
150 150 self.index[i + x] = b
151 151 n = b[self.shaoffset:self.shaoffset + 20]
152 152 self.map[n] = i + x
153 153 off += self.s
154 154
155 155 def findnode(self, node):
156 156 """search backwards through the index file for a specific node"""
157 157 if self.allmap: return None
158 158
159 159 # hg log will cause many many searches for the manifest
160 160 # nodes. After we get called a few times, just load the whole
161 161 # thing.
162 162 if self.mapfind_count > 8:
163 163 self.loadmap()
164 164 if node in self.map:
165 165 return node
166 166 return None
167 167 self.mapfind_count += 1
168 168 last = self.l - 1
169 169 while self.index[last] != None:
170 170 if last == 0:
171 171 self.all = 1
172 172 self.allmap = 1
173 173 return None
174 174 last -= 1
175 175 end = (last + 1) * self.s
176 176 blocksize = self.s * 256
177 177 while end >= 0:
178 178 start = max(end - blocksize, 0)
179 179 self.dataf.seek(start)
180 180 data = self.dataf.read(end - start)
181 181 findend = end - start
182 182 while True:
183 183 # we're searching backwards, so weh have to make sure
184 184 # we don't find a changeset where this node is a parent
185 185 off = data.rfind(node, 0, findend)
186 186 findend = off
187 187 if off >= 0:
188 188 i = off / self.s
189 189 off = i * self.s
190 190 n = data[off + self.shaoffset:off + self.shaoffset + 20]
191 191 if n == node:
192 192 self.map[n] = i + start / self.s
193 193 return node
194 194 else:
195 195 break
196 196 end -= blocksize
197 197 return None
198 198
199 199 def loadindex(self, i=None, end=None):
200 200 if self.all: return
201 201 all = False
202 202 if i == None:
203 203 blockstart = 0
204 204 blocksize = (512 / self.s) * self.s
205 205 end = self.datasize
206 206 all = True
207 207 else:
208 208 if end:
209 209 blockstart = i * self.s
210 210 end = end * self.s
211 211 blocksize = end - blockstart
212 212 else:
213 213 blockstart = (i & ~63) * self.s
214 214 blocksize = self.s * 64
215 215 end = blockstart + blocksize
216 216 while blockstart < end:
217 217 self.loadblock(blockstart, blocksize)
218 218 blockstart += blocksize
219 219 if all: self.all = True
220 220
221 221 class lazyindex(object):
222 222 """a lazy version of the index array"""
223 223 def __init__(self, parser):
224 224 self.p = parser
225 225 def __len__(self):
226 226 return len(self.p.index)
227 227 def load(self, pos):
228 228 if pos < 0:
229 229 pos += len(self.p.index)
230 230 self.p.loadindex(pos)
231 231 return self.p.index[pos]
232 232 def __getitem__(self, pos):
233 233 ret = self.p.index[pos] or self.load(pos)
234 234 if isinstance(ret, str):
235 235 ret = struct.unpack(self.p.indexformat, ret)
236 236 return ret
237 237 def __setitem__(self, pos, item):
238 238 self.p.index[pos] = item
239 239 def __delitem__(self, pos):
240 240 del self.p.index[pos]
241 241 def append(self, e):
242 242 self.p.index.append(e)
243 243
244 244 class lazymap(object):
245 245 """a lazy version of the node map"""
246 246 def __init__(self, parser):
247 247 self.p = parser
248 248 def load(self, key):
249 249 n = self.p.findnode(key)
250 250 if n == None:
251 251 raise KeyError(key)
252 252 def __contains__(self, key):
253 253 if key in self.p.map:
254 254 return True
255 255 self.p.loadmap()
256 256 return key in self.p.map
257 257 def __iter__(self):
258 258 yield nullid
259 259 for i in xrange(self.p.l):
260 260 ret = self.p.index[i]
261 261 if not ret:
262 262 self.p.loadindex(i)
263 263 ret = self.p.index[i]
264 264 if isinstance(ret, str):
265 265 ret = struct.unpack(self.p.indexformat, ret)
266 266 yield ret[-1]
267 267 def __getitem__(self, key):
268 268 try:
269 269 return self.p.map[key]
270 270 except KeyError:
271 271 try:
272 272 self.load(key)
273 273 return self.p.map[key]
274 274 except KeyError:
275 275 raise KeyError("node " + hex(key))
276 276 def __setitem__(self, key, val):
277 277 self.p.map[key] = val
278 278 def __delitem__(self, key):
279 279 del self.p.map[key]
280 280
281 281 class RevlogError(Exception): pass
282 282 class LookupError(RevlogError): pass
283 283
284 284 def getoffset(q):
285 285 if q & 0xFFFF:
286 286 raise RevlogError(_('incompatible revision flag %x') % q)
287 287 return int(q >> 16)
288 288
289 289 def gettype(q):
290 290 return int(q & 0xFFFF)
291 291
292 292 def offset_type(offset, type):
293 293 return long(long(offset) << 16 | type)
294 294
295 295 class revlog(object):
296 296 """
297 297 the underlying revision storage object
298 298
299 299 A revlog consists of two parts, an index and the revision data.
300 300
301 301 The index is a file with a fixed record size containing
302 302 information on each revision, includings its nodeid (hash), the
303 303 nodeids of its parents, the position and offset of its data within
304 304 the data file, and the revision it's based on. Finally, each entry
305 305 contains a linkrev entry that can serve as a pointer to external
306 306 data.
307 307
308 308 The revision data itself is a linear collection of data chunks.
309 309 Each chunk represents a revision and is usually represented as a
310 310 delta against the previous chunk. To bound lookup time, runs of
311 311 deltas are limited to about 2 times the length of the original
312 312 version data. This makes retrieval of a version proportional to
313 313 its size, or O(1) relative to the number of revisions.
314 314
315 315 Both pieces of the revlog are written to in an append-only
316 316 fashion, which means we never need to rewrite a file to insert or
317 317 remove data, and can use some simple techniques to avoid the need
318 318 for locking while reading.
319 319 """
320 320 def __init__(self, opener, indexfile):
321 321 """
322 322 create a revlog object
323 323
324 324 opener is a function that abstracts the file opening operation
325 325 and can be used to implement COW semantics or the like.
326 326 """
327 327 self.indexfile = indexfile
328 328 self.datafile = indexfile[:-2] + ".d"
329 329 self.opener = opener
330 330
331 331 self.indexstat = None
332 332 self.cache = None
333 333 self.chunkcache = None
334 334 self.defversion = REVLOG_DEFAULT_VERSION
335 335 if hasattr(opener, "defversion"):
336 336 self.defversion = opener.defversion
337 337 if self.defversion & REVLOGNG:
338 338 self.defversion |= REVLOGNGINLINEDATA
339 339 self._load()
340 340
341 341 def _load(self):
342 342 v = self.defversion
343 343 try:
344 344 f = self.opener(self.indexfile)
345 345 i = f.read(4)
346 346 f.seek(0)
347 347 if len(i) > 0:
348 348 v = struct.unpack(versionformat, i)[0]
349 349 except IOError, inst:
350 350 if inst.errno != errno.ENOENT:
351 351 raise
352 352 i = ""
353 353 else:
354 354 try:
355 355 st = util.fstat(f)
356 356 except AttributeError, inst:
357 357 st = None
358 358 else:
359 359 oldst = self.indexstat
360 360 if (oldst and st.st_dev == oldst.st_dev
361 361 and st.st_ino == oldst.st_ino
362 362 and st.st_mtime == oldst.st_mtime
363 363 and st.st_ctime == oldst.st_ctime
364 364 and st.st_size == oldst.st_size):
365 365 return
366 366 self.indexstat = st
367 367 flags = v & ~0xFFFF
368 368 fmt = v & 0xFFFF
369 369 if fmt == REVLOGV0:
370 370 if flags:
371 371 raise RevlogError(_("index %s unknown flags %#04x for format v0")
372 372 % (self.indexfile, flags >> 16))
373 373 elif fmt == REVLOGNG:
374 374 if flags & ~REVLOGNGINLINEDATA:
375 375 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
376 376 % (self.indexfile, flags >> 16))
377 377 else:
378 378 raise RevlogError(_("index %s unknown format %d")
379 379 % (self.indexfile, fmt))
380 380 self.version = v
381 if v == REVLOGV0:
381 self.nodemap = {nullid: nullrev}
382 self.index = []
383 self.indexformat = indexformatng
384 if self.version == REVLOGV0:
382 385 self.indexformat = indexformatv0
383 shaoffset = v0shaoffset
384 else:
385 self.indexformat = indexformatng
386 shaoffset = ngshaoffset
387
388 386 if i:
389 if (lazyparser.safe_to_use and not self._inline() and
390 st and st.st_size > 10000):
391 # big index, let's parse it on demand
392 parser = lazyparser(f, st.st_size, self.indexformat, shaoffset)
393 self.index = lazyindex(parser)
394 self.nodemap = lazymap(parser)
395 else:
396 self._parseindex(f, st)
387 self._parseindex(f, st)
388
389 def _parseindex(self, fp, st):
390 shaoffset = ngshaoffset
391 if self.version == REVLOGV0:
392 shaoffset = v0shaoffset
393
394 if (lazyparser.safe_to_use and not self._inline() and
395 st and st.st_size > 10000):
396 # big index, let's parse it on demand
397 parser = lazyparser(fp, st.st_size, self.indexformat, shaoffset)
398 self.index = lazyindex(parser)
399 self.nodemap = lazymap(parser)
397 400 if self.version != REVLOGV0:
398 401 e = list(self.index[0])
399 402 type = gettype(e[0])
400 403 e[0] = offset_type(0, type)
401 404 self.index[0] = e
402 else:
403 self.nodemap = {nullid: nullrev}
404 self.index = []
405 return
405 406
406 def _parseindex(self, fp, st):
407 407 s = struct.calcsize(self.indexformat)
408 408 self.index = []
409 409 self.nodemap = {nullid: nullrev}
410 410 inline = self._inline()
411 411 n = 0
412 412 leftover = None
413 413 while True:
414 414 if st:
415 415 data = fp.read(65536)
416 416 else:
417 417 # hack for httprangereader, it doesn't do partial reads well
418 418 data = fp.read()
419 419 if not data:
420 420 break
421 421 if n == 0 and self._inline():
422 422 # cache the first chunk
423 423 self.chunkcache = (0, data)
424 424 if leftover:
425 425 data = leftover + data
426 426 leftover = None
427 427 off = 0
428 428 l = len(data)
429 429 while off < l:
430 430 if l - off < s:
431 431 leftover = data[off:]
432 432 break
433 433 cur = data[off:off + s]
434 434 off += s
435 435 e = struct.unpack(self.indexformat, cur)
436 436 self.index.append(e)
437 437 self.nodemap[e[-1]] = n
438 438 n += 1
439 439 if inline:
440 440 if e[1] < 0:
441 441 break
442 442 off += e[1]
443 443 if off > l:
444 444 # some things don't seek well, just read it
445 445 fp.read(off - l)
446 446 break
447 447 if not st:
448 448 break
449 449
450 if self.version != REVLOGV0:
451 e = list(self.index[0])
452 type = gettype(e[0])
453 e[0] = offset_type(0, type)
454 self.index[0] = e
450 455
451 456 def _loadindex(self, start, end):
452 457 """load a block of indexes all at once from the lazy parser"""
453 458 if isinstance(self.index, lazyindex):
454 459 self.index.p.loadindex(start, end)
455 460
456 461 def _loadindexmap(self):
457 462 """loads both the map and the index from the lazy parser"""
458 463 if isinstance(self.index, lazyindex):
459 464 p = self.index.p
460 465 p.loadindex()
461 466 self.nodemap = p.map
462 467
463 468 def _loadmap(self):
464 469 """loads the map from the lazy parser"""
465 470 if isinstance(self.nodemap, lazymap):
466 471 self.nodemap.p.loadmap()
467 472 self.nodemap = self.nodemap.p.map
468 473
469 474 def _inline(self): return self.version & REVLOGNGINLINEDATA
470 475
471 476 def tip(self): return self.node(len(self.index) - 1)
472 477 def count(self): return len(self.index)
473 478 def node(self, rev):
474 479 return rev == nullrev and nullid or self.index[rev][-1]
475 480 def rev(self, node):
476 481 try:
477 482 return self.nodemap[node]
478 483 except KeyError:
479 484 raise LookupError(_('%s: no node %s') % (self.indexfile, hex(node)))
480 485 def linkrev(self, node):
481 486 return (node == nullid) and nullrev or self.index[self.rev(node)][-4]
482 487 def parents(self, node):
483 488 if node == nullid: return (nullid, nullid)
484 489 r = self.rev(node)
485 490 d = self.index[r][-3:-1]
486 491 if self.version == REVLOGV0:
487 492 return d
488 493 return (self.node(d[0]), self.node(d[1]))
489 494 def parentrevs(self, rev):
490 495 if rev == nullrev:
491 496 return (nullrev, nullrev)
492 497 d = self.index[rev][-3:-1]
493 498 if self.version == REVLOGV0:
494 499 return (self.rev(d[0]), self.rev(d[1]))
495 500 return d
496 501 def start(self, rev):
497 502 if rev == nullrev:
498 503 return 0
499 504 if self.version != REVLOGV0:
500 505 return getoffset(self.index[rev][0])
501 506 return self.index[rev][0]
502 507
503 508 def end(self, rev): return self.start(rev) + self.length(rev)
504 509
505 510 def size(self, rev):
506 511 """return the length of the uncompressed text for a given revision"""
507 512 if rev == nullrev:
508 513 return 0
509 514 l = -1
510 515 if self.version != REVLOGV0:
511 516 l = self.index[rev][2]
512 517 if l >= 0:
513 518 return l
514 519
515 520 t = self.revision(self.node(rev))
516 521 return len(t)
517 522
518 523 # alternate implementation, The advantage to this code is it
519 524 # will be faster for a single revision. But, the results are not
520 525 # cached, so finding the size of every revision will be slower.
521 526 """
522 527 if self.cache and self.cache[1] == rev:
523 528 return len(self.cache[2])
524 529
525 530 base = self.base(rev)
526 531 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
527 532 base = self.cache[1]
528 533 text = self.cache[2]
529 534 else:
530 535 text = self.revision(self.node(base))
531 536
532 537 l = len(text)
533 538 for x in xrange(base + 1, rev + 1):
534 539 l = mdiff.patchedsize(l, self.chunk(x))
535 540 return l
536 541 """
537 542
538 543 def length(self, rev):
539 544 if rev == nullrev:
540 545 return 0
541 546 else:
542 547 return self.index[rev][1]
543 548 def base(self, rev):
544 549 if (rev == nullrev):
545 550 return nullrev
546 551 else:
547 552 return self.index[rev][-5]
548 553
549 554 def reachable(self, node, stop=None):
550 555 """return a hash of all nodes ancestral to a given node, including
551 556 the node itself, stopping when stop is matched"""
552 557 reachable = {}
553 558 visit = [node]
554 559 reachable[node] = 1
555 560 if stop:
556 561 stopn = self.rev(stop)
557 562 else:
558 563 stopn = 0
559 564 while visit:
560 565 n = visit.pop(0)
561 566 if n == stop:
562 567 continue
563 568 if n == nullid:
564 569 continue
565 570 for p in self.parents(n):
566 571 if self.rev(p) < stopn:
567 572 continue
568 573 if p not in reachable:
569 574 reachable[p] = 1
570 575 visit.append(p)
571 576 return reachable
572 577
573 578 def nodesbetween(self, roots=None, heads=None):
574 579 """Return a tuple containing three elements. Elements 1 and 2 contain
575 580 a final list bases and heads after all the unreachable ones have been
576 581 pruned. Element 0 contains a topologically sorted list of all
577 582
578 583 nodes that satisfy these constraints:
579 584 1. All nodes must be descended from a node in roots (the nodes on
580 585 roots are considered descended from themselves).
581 586 2. All nodes must also be ancestors of a node in heads (the nodes in
582 587 heads are considered to be their own ancestors).
583 588
584 589 If roots is unspecified, nullid is assumed as the only root.
585 590 If heads is unspecified, it is taken to be the output of the
586 591 heads method (i.e. a list of all nodes in the repository that
587 592 have no children)."""
588 593 nonodes = ([], [], [])
589 594 if roots is not None:
590 595 roots = list(roots)
591 596 if not roots:
592 597 return nonodes
593 598 lowestrev = min([self.rev(n) for n in roots])
594 599 else:
595 600 roots = [nullid] # Everybody's a descendent of nullid
596 601 lowestrev = nullrev
597 602 if (lowestrev == nullrev) and (heads is None):
598 603 # We want _all_ the nodes!
599 604 return ([self.node(r) for r in xrange(0, self.count())],
600 605 [nullid], list(self.heads()))
601 606 if heads is None:
602 607 # All nodes are ancestors, so the latest ancestor is the last
603 608 # node.
604 609 highestrev = self.count() - 1
605 610 # Set ancestors to None to signal that every node is an ancestor.
606 611 ancestors = None
607 612 # Set heads to an empty dictionary for later discovery of heads
608 613 heads = {}
609 614 else:
610 615 heads = list(heads)
611 616 if not heads:
612 617 return nonodes
613 618 ancestors = {}
614 619 # Turn heads into a dictionary so we can remove 'fake' heads.
615 620 # Also, later we will be using it to filter out the heads we can't
616 621 # find from roots.
617 622 heads = dict.fromkeys(heads, 0)
618 623 # Start at the top and keep marking parents until we're done.
619 624 nodestotag = heads.keys()
620 625 # Remember where the top was so we can use it as a limit later.
621 626 highestrev = max([self.rev(n) for n in nodestotag])
622 627 while nodestotag:
623 628 # grab a node to tag
624 629 n = nodestotag.pop()
625 630 # Never tag nullid
626 631 if n == nullid:
627 632 continue
628 633 # A node's revision number represents its place in a
629 634 # topologically sorted list of nodes.
630 635 r = self.rev(n)
631 636 if r >= lowestrev:
632 637 if n not in ancestors:
633 638 # If we are possibly a descendent of one of the roots
634 639 # and we haven't already been marked as an ancestor
635 640 ancestors[n] = 1 # Mark as ancestor
636 641 # Add non-nullid parents to list of nodes to tag.
637 642 nodestotag.extend([p for p in self.parents(n) if
638 643 p != nullid])
639 644 elif n in heads: # We've seen it before, is it a fake head?
640 645 # So it is, real heads should not be the ancestors of
641 646 # any other heads.
642 647 heads.pop(n)
643 648 if not ancestors:
644 649 return nonodes
645 650 # Now that we have our set of ancestors, we want to remove any
646 651 # roots that are not ancestors.
647 652
648 653 # If one of the roots was nullid, everything is included anyway.
649 654 if lowestrev > nullrev:
650 655 # But, since we weren't, let's recompute the lowest rev to not
651 656 # include roots that aren't ancestors.
652 657
653 658 # Filter out roots that aren't ancestors of heads
654 659 roots = [n for n in roots if n in ancestors]
655 660 # Recompute the lowest revision
656 661 if roots:
657 662 lowestrev = min([self.rev(n) for n in roots])
658 663 else:
659 664 # No more roots? Return empty list
660 665 return nonodes
661 666 else:
662 667 # We are descending from nullid, and don't need to care about
663 668 # any other roots.
664 669 lowestrev = nullrev
665 670 roots = [nullid]
666 671 # Transform our roots list into a 'set' (i.e. a dictionary where the
667 672 # values don't matter.
668 673 descendents = dict.fromkeys(roots, 1)
669 674 # Also, keep the original roots so we can filter out roots that aren't
670 675 # 'real' roots (i.e. are descended from other roots).
671 676 roots = descendents.copy()
672 677 # Our topologically sorted list of output nodes.
673 678 orderedout = []
674 679 # Don't start at nullid since we don't want nullid in our output list,
675 680 # and if nullid shows up in descedents, empty parents will look like
676 681 # they're descendents.
677 682 for r in xrange(max(lowestrev, 0), highestrev + 1):
678 683 n = self.node(r)
679 684 isdescendent = False
680 685 if lowestrev == nullrev: # Everybody is a descendent of nullid
681 686 isdescendent = True
682 687 elif n in descendents:
683 688 # n is already a descendent
684 689 isdescendent = True
685 690 # This check only needs to be done here because all the roots
686 691 # will start being marked is descendents before the loop.
687 692 if n in roots:
688 693 # If n was a root, check if it's a 'real' root.
689 694 p = tuple(self.parents(n))
690 695 # If any of its parents are descendents, it's not a root.
691 696 if (p[0] in descendents) or (p[1] in descendents):
692 697 roots.pop(n)
693 698 else:
694 699 p = tuple(self.parents(n))
695 700 # A node is a descendent if either of its parents are
696 701 # descendents. (We seeded the dependents list with the roots
697 702 # up there, remember?)
698 703 if (p[0] in descendents) or (p[1] in descendents):
699 704 descendents[n] = 1
700 705 isdescendent = True
701 706 if isdescendent and ((ancestors is None) or (n in ancestors)):
702 707 # Only include nodes that are both descendents and ancestors.
703 708 orderedout.append(n)
704 709 if (ancestors is not None) and (n in heads):
705 710 # We're trying to figure out which heads are reachable
706 711 # from roots.
707 712 # Mark this head as having been reached
708 713 heads[n] = 1
709 714 elif ancestors is None:
710 715 # Otherwise, we're trying to discover the heads.
711 716 # Assume this is a head because if it isn't, the next step
712 717 # will eventually remove it.
713 718 heads[n] = 1
714 719 # But, obviously its parents aren't.
715 720 for p in self.parents(n):
716 721 heads.pop(p, None)
717 722 heads = [n for n in heads.iterkeys() if heads[n] != 0]
718 723 roots = roots.keys()
719 724 assert orderedout
720 725 assert roots
721 726 assert heads
722 727 return (orderedout, roots, heads)
723 728
724 729 def heads(self, start=None, stop=None):
725 730 """return the list of all nodes that have no children
726 731
727 732 if start is specified, only heads that are descendants of
728 733 start will be returned
729 734 if stop is specified, it will consider all the revs from stop
730 735 as if they had no children
731 736 """
732 737 if start is None:
733 738 start = nullid
734 739 if stop is None:
735 740 stop = []
736 741 stoprevs = dict.fromkeys([self.rev(n) for n in stop])
737 742 startrev = self.rev(start)
738 743 reachable = {startrev: 1}
739 744 heads = {startrev: 1}
740 745
741 746 parentrevs = self.parentrevs
742 747 for r in xrange(startrev + 1, self.count()):
743 748 for p in parentrevs(r):
744 749 if p in reachable:
745 750 if r not in stoprevs:
746 751 reachable[r] = 1
747 752 heads[r] = 1
748 753 if p in heads and p not in stoprevs:
749 754 del heads[p]
750 755
751 756 return [self.node(r) for r in heads]
752 757
753 758 def children(self, node):
754 759 """find the children of a given node"""
755 760 c = []
756 761 p = self.rev(node)
757 762 for r in range(p + 1, self.count()):
758 763 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
759 764 if prevs:
760 765 for pr in prevs:
761 766 if pr == p:
762 767 c.append(self.node(r))
763 768 elif p == nullrev:
764 769 c.append(self.node(r))
765 770 return c
766 771
767 772 def _match(self, id):
768 773 if isinstance(id, (long, int)):
769 774 # rev
770 775 return self.node(id)
771 776 if len(id) == 20:
772 777 # possibly a binary node
773 778 # odds of a binary node being all hex in ASCII are 1 in 10**25
774 779 try:
775 780 node = id
776 781 r = self.rev(node) # quick search the index
777 782 return node
778 783 except LookupError:
779 784 pass # may be partial hex id
780 785 try:
781 786 # str(rev)
782 787 rev = int(id)
783 788 if str(rev) != id: raise ValueError
784 789 if rev < 0: rev = self.count() + rev
785 790 if rev < 0 or rev >= self.count(): raise ValueError
786 791 return self.node(rev)
787 792 except (ValueError, OverflowError):
788 793 pass
789 794 if len(id) == 40:
790 795 try:
791 796 # a full hex nodeid?
792 797 node = bin(id)
793 798 r = self.rev(node)
794 799 return node
795 800 except TypeError:
796 801 pass
797 802
798 803 def _partialmatch(self, id):
799 804 if len(id) < 40:
800 805 try:
801 806 # hex(node)[:...]
802 807 bin_id = bin(id[:len(id) & ~1]) # grab an even number of digits
803 808 node = None
804 809 for n in self.nodemap:
805 810 if n.startswith(bin_id) and hex(n).startswith(id):
806 811 if node is not None:
807 812 raise LookupError(_("Ambiguous identifier"))
808 813 node = n
809 814 if node is not None:
810 815 return node
811 816 except TypeError:
812 817 pass
813 818
814 819 def lookup(self, id):
815 820 """locate a node based on:
816 821 - revision number or str(revision number)
817 822 - nodeid or subset of hex nodeid
818 823 """
819 824
820 825 n = self._match(id)
821 826 if n is not None:
822 827 return n
823 828 n = self._partialmatch(id)
824 829 if n:
825 830 return n
826 831
827 832 raise LookupError(_("No match found"))
828 833
829 834 def cmp(self, node, text):
830 835 """compare text with a given file revision"""
831 836 p1, p2 = self.parents(node)
832 837 return hash(text, p1, p2) != node
833 838
834 839 def diff(self, a, b):
835 840 """return a delta between two revisions"""
836 841 return mdiff.textdiff(a, b)
837 842
838 843 def patches(self, t, pl):
839 844 """apply a list of patches to a string"""
840 845 return mdiff.patches(t, pl)
841 846
842 847 def chunk(self, rev, df=None, cachelen=4096):
843 848 start, length = self.start(rev), self.length(rev)
844 849 inline = self._inline()
845 850 if inline:
846 851 start += (rev + 1) * struct.calcsize(self.indexformat)
847 852 end = start + length
848 853 def loadcache(df):
849 854 cache_length = max(cachelen, length) # 4k
850 855 if not df:
851 856 if inline:
852 857 df = self.opener(self.indexfile)
853 858 else:
854 859 df = self.opener(self.datafile)
855 860 df.seek(start)
856 861 self.chunkcache = (start, df.read(cache_length))
857 862
858 863 if not self.chunkcache:
859 864 loadcache(df)
860 865
861 866 cache_start = self.chunkcache[0]
862 867 cache_end = cache_start + len(self.chunkcache[1])
863 868 if start >= cache_start and end <= cache_end:
864 869 # it is cached
865 870 offset = start - cache_start
866 871 else:
867 872 loadcache(df)
868 873 offset = 0
869 874
870 875 #def checkchunk():
871 876 # df = self.opener(self.datafile)
872 877 # df.seek(start)
873 878 # return df.read(length)
874 879 #assert s == checkchunk()
875 880 return decompress(self.chunkcache[1][offset:offset + length])
876 881
877 882 def delta(self, node):
878 883 """return or calculate a delta between a node and its predecessor"""
879 884 r = self.rev(node)
880 885 return self.revdiff(r - 1, r)
881 886
882 887 def revdiff(self, rev1, rev2):
883 888 """return or calculate a delta between two revisions"""
884 889 b1 = self.base(rev1)
885 890 b2 = self.base(rev2)
886 891 if b1 == b2 and rev1 + 1 == rev2:
887 892 return self.chunk(rev2)
888 893 else:
889 894 return self.diff(self.revision(self.node(rev1)),
890 895 self.revision(self.node(rev2)))
891 896
892 897 def revision(self, node):
893 898 """return an uncompressed revision of a given"""
894 899 if node == nullid: return ""
895 900 if self.cache and self.cache[0] == node: return self.cache[2]
896 901
897 902 # look up what we need to read
898 903 text = None
899 904 rev = self.rev(node)
900 905 base = self.base(rev)
901 906
902 907 if self._inline():
903 908 # we probably have the whole chunk cached
904 909 df = None
905 910 else:
906 911 df = self.opener(self.datafile)
907 912
908 913 # do we have useful data cached?
909 914 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
910 915 base = self.cache[1]
911 916 text = self.cache[2]
912 917 self._loadindex(base, rev + 1)
913 918 else:
914 919 self._loadindex(base, rev + 1)
915 920 text = self.chunk(base, df=df)
916 921
917 922 bins = []
918 923 for r in xrange(base + 1, rev + 1):
919 924 bins.append(self.chunk(r, df=df))
920 925
921 926 text = self.patches(text, bins)
922 927
923 928 p1, p2 = self.parents(node)
924 929 if node != hash(text, p1, p2):
925 930 raise RevlogError(_("integrity check failed on %s:%d")
926 931 % (self.datafile, rev))
927 932
928 933 self.cache = (node, rev, text)
929 934 return text
930 935
931 936 def checkinlinesize(self, tr, fp=None):
932 937 if not self._inline():
933 938 return
934 939 if not fp:
935 940 fp = self.opener(self.indexfile, 'r')
936 941 fp.seek(0, 2)
937 942 size = fp.tell()
938 943 if size < 131072:
939 944 return
940 945 trinfo = tr.find(self.indexfile)
941 946 if trinfo == None:
942 947 raise RevlogError(_("%s not found in the transaction")
943 948 % self.indexfile)
944 949
945 950 trindex = trinfo[2]
946 951 dataoff = self.start(trindex)
947 952
948 953 tr.add(self.datafile, dataoff)
949 954 df = self.opener(self.datafile, 'w')
950 955 calc = struct.calcsize(self.indexformat)
951 956 for r in xrange(self.count()):
952 957 start = self.start(r) + (r + 1) * calc
953 958 length = self.length(r)
954 959 fp.seek(start)
955 960 d = fp.read(length)
956 961 df.write(d)
957 962 fp.close()
958 963 df.close()
959 964 fp = self.opener(self.indexfile, 'w', atomictemp=True)
960 965 self.version &= ~(REVLOGNGINLINEDATA)
961 966 if self.count():
962 967 x = self.index[0]
963 968 e = struct.pack(self.indexformat, *x)[4:]
964 969 l = struct.pack(versionformat, self.version)
965 970 fp.write(l)
966 971 fp.write(e)
967 972
968 973 for i in xrange(1, self.count()):
969 974 x = self.index[i]
970 975 e = struct.pack(self.indexformat, *x)
971 976 fp.write(e)
972 977
973 978 # if we don't call rename, the temp file will never replace the
974 979 # real index
975 980 fp.rename()
976 981
977 982 tr.replace(self.indexfile, trindex * calc)
978 983 self.chunkcache = None
979 984
980 985 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
981 986 """add a revision to the log
982 987
983 988 text - the revision data to add
984 989 transaction - the transaction object used for rollback
985 990 link - the linkrev data to add
986 991 p1, p2 - the parent nodeids of the revision
987 992 d - an optional precomputed delta
988 993 """
989 994 if not self._inline():
990 995 dfh = self.opener(self.datafile, "a")
991 996 else:
992 997 dfh = None
993 998 ifh = self.opener(self.indexfile, "a+")
994 999 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
995 1000
996 1001 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
997 1002 if text is None: text = ""
998 1003 if p1 is None: p1 = self.tip()
999 1004 if p2 is None: p2 = nullid
1000 1005
1001 1006 node = hash(text, p1, p2)
1002 1007
1003 1008 if node in self.nodemap:
1004 1009 return node
1005 1010
1006 1011 n = self.count()
1007 1012 t = n - 1
1008 1013
1009 1014 if n:
1010 1015 base = self.base(t)
1011 1016 start = self.start(base)
1012 1017 end = self.end(t)
1013 1018 if not d:
1014 1019 prev = self.revision(self.tip())
1015 1020 d = self.diff(prev, text)
1016 1021 data = compress(d)
1017 1022 l = len(data[1]) + len(data[0])
1018 1023 dist = end - start + l
1019 1024
1020 1025 # full versions are inserted when the needed deltas
1021 1026 # become comparable to the uncompressed text
1022 1027 if not n or dist > len(text) * 2:
1023 1028 data = compress(text)
1024 1029 l = len(data[1]) + len(data[0])
1025 1030 base = n
1026 1031 else:
1027 1032 base = self.base(t)
1028 1033
1029 1034 offset = 0
1030 1035 if t >= 0:
1031 1036 offset = self.end(t)
1032 1037
1033 1038 if self.version == REVLOGV0:
1034 1039 e = (offset, l, base, link, p1, p2, node)
1035 1040 else:
1036 1041 e = (offset_type(offset, 0), l, len(text),
1037 1042 base, link, self.rev(p1), self.rev(p2), node)
1038 1043
1039 1044 self.index.append(e)
1040 1045 self.nodemap[node] = n
1041 1046 entry = struct.pack(self.indexformat, *e)
1042 1047
1043 1048 if not self._inline():
1044 1049 transaction.add(self.datafile, offset)
1045 1050 transaction.add(self.indexfile, n * len(entry))
1046 1051 if data[0]:
1047 1052 dfh.write(data[0])
1048 1053 dfh.write(data[1])
1049 1054 dfh.flush()
1050 1055 else:
1051 1056 ifh.seek(0, 2)
1052 1057 transaction.add(self.indexfile, ifh.tell(), self.count() - 1)
1053 1058
1054 1059 if len(self.index) == 1 and self.version != REVLOGV0:
1055 1060 l = struct.pack(versionformat, self.version)
1056 1061 ifh.write(l)
1057 1062 entry = entry[4:]
1058 1063
1059 1064 ifh.write(entry)
1060 1065
1061 1066 if self._inline():
1062 1067 ifh.write(data[0])
1063 1068 ifh.write(data[1])
1064 1069 self.checkinlinesize(transaction, ifh)
1065 1070
1066 1071 self.cache = (node, n, text)
1067 1072 return node
1068 1073
1069 1074 def ancestor(self, a, b):
1070 1075 """calculate the least common ancestor of nodes a and b"""
1071 1076
1072 1077 def parents(rev):
1073 1078 return [p for p in self.parentrevs(rev) if p != nullrev]
1074 1079
1075 1080 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1076 1081 if c is None:
1077 1082 return nullid
1078 1083
1079 1084 return self.node(c)
1080 1085
1081 1086 def group(self, nodelist, lookup, infocollect=None):
1082 1087 """calculate a delta group
1083 1088
1084 1089 Given a list of changeset revs, return a set of deltas and
1085 1090 metadata corresponding to nodes. the first delta is
1086 1091 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1087 1092 have this parent as it has all history before these
1088 1093 changesets. parent is parent[0]
1089 1094 """
1090 1095 revs = [self.rev(n) for n in nodelist]
1091 1096
1092 1097 # if we don't have any revisions touched by these changesets, bail
1093 1098 if not revs:
1094 1099 yield changegroup.closechunk()
1095 1100 return
1096 1101
1097 1102 # add the parent of the first rev
1098 1103 p = self.parents(self.node(revs[0]))[0]
1099 1104 revs.insert(0, self.rev(p))
1100 1105
1101 1106 # build deltas
1102 1107 for d in xrange(0, len(revs) - 1):
1103 1108 a, b = revs[d], revs[d + 1]
1104 1109 nb = self.node(b)
1105 1110
1106 1111 if infocollect is not None:
1107 1112 infocollect(nb)
1108 1113
1109 1114 d = self.revdiff(a, b)
1110 1115 p = self.parents(nb)
1111 1116 meta = nb + p[0] + p[1] + lookup(nb)
1112 1117 yield changegroup.genchunk("%s%s" % (meta, d))
1113 1118
1114 1119 yield changegroup.closechunk()
1115 1120
1116 1121 def addgroup(self, revs, linkmapper, transaction, unique=0):
1117 1122 """
1118 1123 add a delta group
1119 1124
1120 1125 given a set of deltas, add them to the revision log. the
1121 1126 first delta is against its parent, which should be in our
1122 1127 log, the rest are against the previous delta.
1123 1128 """
1124 1129
1125 1130 #track the base of the current delta log
1126 1131 r = self.count()
1127 1132 t = r - 1
1128 1133 node = None
1129 1134
1130 1135 base = prev = nullrev
1131 1136 start = end = textlen = 0
1132 1137 if r:
1133 1138 end = self.end(t)
1134 1139
1135 1140 ifh = self.opener(self.indexfile, "a+")
1136 1141 ifh.seek(0, 2)
1137 1142 transaction.add(self.indexfile, ifh.tell(), self.count())
1138 1143 if self._inline():
1139 1144 dfh = None
1140 1145 else:
1141 1146 transaction.add(self.datafile, end)
1142 1147 dfh = self.opener(self.datafile, "a")
1143 1148
1144 1149 # loop through our set of deltas
1145 1150 chain = None
1146 1151 for chunk in revs:
1147 1152 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1148 1153 link = linkmapper(cs)
1149 1154 if node in self.nodemap:
1150 1155 # this can happen if two branches make the same change
1151 1156 # if unique:
1152 1157 # raise RevlogError(_("already have %s") % hex(node[:4]))
1153 1158 chain = node
1154 1159 continue
1155 1160 delta = chunk[80:]
1156 1161
1157 1162 for p in (p1, p2):
1158 1163 if not p in self.nodemap:
1159 1164 raise LookupError(_("unknown parent %s") % short(p))
1160 1165
1161 1166 if not chain:
1162 1167 # retrieve the parent revision of the delta chain
1163 1168 chain = p1
1164 1169 if not chain in self.nodemap:
1165 1170 raise LookupError(_("unknown base %s") % short(chain[:4]))
1166 1171
1167 1172 # full versions are inserted when the needed deltas become
1168 1173 # comparable to the uncompressed text or when the previous
1169 1174 # version is not the one we have a delta against. We use
1170 1175 # the size of the previous full rev as a proxy for the
1171 1176 # current size.
1172 1177
1173 1178 if chain == prev:
1174 1179 tempd = compress(delta)
1175 1180 cdelta = tempd[0] + tempd[1]
1176 1181 textlen = mdiff.patchedsize(textlen, delta)
1177 1182
1178 1183 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
1179 1184 # flush our writes here so we can read it in revision
1180 1185 if dfh:
1181 1186 dfh.flush()
1182 1187 ifh.flush()
1183 1188 text = self.revision(chain)
1184 1189 text = self.patches(text, [delta])
1185 1190 chk = self._addrevision(text, transaction, link, p1, p2, None,
1186 1191 ifh, dfh)
1187 1192 if not dfh and not self._inline():
1188 1193 # addrevision switched from inline to conventional
1189 1194 # reopen the index
1190 1195 dfh = self.opener(self.datafile, "a")
1191 1196 ifh = self.opener(self.indexfile, "a")
1192 1197 if chk != node:
1193 1198 raise RevlogError(_("consistency error adding group"))
1194 1199 textlen = len(text)
1195 1200 else:
1196 1201 if self.version == REVLOGV0:
1197 1202 e = (end, len(cdelta), base, link, p1, p2, node)
1198 1203 else:
1199 1204 e = (offset_type(end, 0), len(cdelta), textlen, base,
1200 1205 link, self.rev(p1), self.rev(p2), node)
1201 1206 self.index.append(e)
1202 1207 self.nodemap[node] = r
1203 1208 if self._inline():
1204 1209 ifh.write(struct.pack(self.indexformat, *e))
1205 1210 ifh.write(cdelta)
1206 1211 self.checkinlinesize(transaction, ifh)
1207 1212 if not self._inline():
1208 1213 dfh = self.opener(self.datafile, "a")
1209 1214 ifh = self.opener(self.indexfile, "a")
1210 1215 else:
1211 1216 dfh.write(cdelta)
1212 1217 ifh.write(struct.pack(self.indexformat, *e))
1213 1218
1214 1219 t, r, chain, prev = r, r + 1, node, node
1215 1220 base = self.base(t)
1216 1221 start = self.start(base)
1217 1222 end = self.end(t)
1218 1223
1219 1224 return node
1220 1225
1221 1226 def strip(self, rev, minlink):
1222 1227 if self.count() == 0 or rev >= self.count():
1223 1228 return
1224 1229
1225 1230 if isinstance(self.index, lazyindex):
1226 1231 self._loadindexmap()
1227 1232
1228 1233 # When stripping away a revision, we need to make sure it
1229 1234 # does not actually belong to an older changeset.
1230 1235 # The minlink parameter defines the oldest revision
1231 1236 # we're allowed to strip away.
1232 1237 while minlink > self.index[rev][-4]:
1233 1238 rev += 1
1234 1239 if rev >= self.count():
1235 1240 return
1236 1241
1237 1242 # first truncate the files on disk
1238 1243 end = self.start(rev)
1239 1244 if not self._inline():
1240 1245 df = self.opener(self.datafile, "a")
1241 1246 df.truncate(end)
1242 1247 end = rev * struct.calcsize(self.indexformat)
1243 1248 else:
1244 1249 end += rev * struct.calcsize(self.indexformat)
1245 1250
1246 1251 indexf = self.opener(self.indexfile, "a")
1247 1252 indexf.truncate(end)
1248 1253
1249 1254 # then reset internal state in memory to forget those revisions
1250 1255 self.cache = None
1251 1256 self.chunkcache = None
1252 1257 for x in xrange(rev, self.count()):
1253 1258 del self.nodemap[self.node(x)]
1254 1259
1255 1260 del self.index[rev:]
1256 1261
1257 1262 def checksize(self):
1258 1263 expected = 0
1259 1264 if self.count():
1260 1265 expected = self.end(self.count() - 1)
1261 1266
1262 1267 try:
1263 1268 f = self.opener(self.datafile)
1264 1269 f.seek(0, 2)
1265 1270 actual = f.tell()
1266 1271 dd = actual - expected
1267 1272 except IOError, inst:
1268 1273 if inst.errno != errno.ENOENT:
1269 1274 raise
1270 1275 dd = 0
1271 1276
1272 1277 try:
1273 1278 f = self.opener(self.indexfile)
1274 1279 f.seek(0, 2)
1275 1280 actual = f.tell()
1276 1281 s = struct.calcsize(self.indexformat)
1277 1282 i = actual / s
1278 1283 di = actual - (i * s)
1279 1284 if self._inline():
1280 1285 databytes = 0
1281 1286 for r in xrange(self.count()):
1282 1287 databytes += self.length(r)
1283 1288 dd = 0
1284 1289 di = actual - self.count() * s - databytes
1285 1290 except IOError, inst:
1286 1291 if inst.errno != errno.ENOENT:
1287 1292 raise
1288 1293 di = 0
1289 1294
1290 1295 return (dd, di)
1291 1296
1292 1297
General Comments 0
You need to be logged in to leave comments. Login now