##// END OF EJS Templates
revlog: simplify revlog.__init__...
Matt Mackall -
r4985:e6525e45 default
parent child Browse files
Show More
@@ -1,1277 +1,1265 b''
1 1 """
2 2 revlog.py - storage back-end for mercurial
3 3
4 4 This provides efficient delta storage with O(1) retrieve and append
5 5 and O(changes) merge between branches
6 6
7 7 Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
8 8
9 9 This software may be used and distributed according to the terms
10 10 of the GNU General Public License, incorporated herein by reference.
11 11 """
12 12
13 13 from node import *
14 14 from i18n import _
15 15 import binascii, changegroup, errno, ancestor, mdiff, os
16 16 import sha, struct, util, zlib
17 17
18 18 # revlog version strings
19 19 REVLOGV0 = 0
20 20 REVLOGNG = 1
21 21
22 22 # revlog flags
23 23 REVLOGNGINLINEDATA = (1 << 16)
24 24 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
25 25
26 26 REVLOG_DEFAULT_FORMAT = REVLOGNG
27 27 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
28 28
29 29 def hash(text, p1, p2):
30 30 """generate a hash from the given text and its parent hashes
31 31
32 32 This hash combines both the current file contents and its history
33 33 in a manner that makes it easy to distinguish nodes with the same
34 34 content in the revision graph.
35 35 """
36 36 l = [p1, p2]
37 37 l.sort()
38 38 s = sha.new(l[0])
39 39 s.update(l[1])
40 40 s.update(text)
41 41 return s.digest()
42 42
43 43 def compress(text):
44 44 """ generate a possibly-compressed representation of text """
45 45 if not text:
46 46 return ("", text)
47 47 if len(text) < 44:
48 48 if text[0] == '\0':
49 49 return ("", text)
50 50 return ('u', text)
51 51 bin = zlib.compress(text)
52 52 if len(bin) > len(text):
53 53 if text[0] == '\0':
54 54 return ("", text)
55 55 return ('u', text)
56 56 return ("", bin)
57 57
58 58 def decompress(bin):
59 59 """ decompress the given input """
60 60 if not bin:
61 61 return bin
62 62 t = bin[0]
63 63 if t == '\0':
64 64 return bin
65 65 if t == 'x':
66 66 return zlib.decompress(bin)
67 67 if t == 'u':
68 68 return bin[1:]
69 69 raise RevlogError(_("unknown compression type %r") % t)
70 70
71 71 indexformatv0 = ">4l20s20s20s"
72 72 v0shaoffset = 56
73 73 # index ng:
74 74 # 6 bytes offset
75 75 # 2 bytes flags
76 76 # 4 bytes compressed length
77 77 # 4 bytes uncompressed length
78 78 # 4 bytes: base rev
79 79 # 4 bytes link rev
80 80 # 4 bytes parent 1 rev
81 81 # 4 bytes parent 2 rev
82 82 # 32 bytes: nodeid
83 83 indexformatng = ">Qiiiiii20s12x"
84 84 ngshaoffset = 32
85 85 versionformat = ">I"
86 86
87 87 class lazyparser(object):
88 88 """
89 89 this class avoids the need to parse the entirety of large indices
90 90 """
91 91
92 92 # lazyparser is not safe to use on windows if win32 extensions not
93 93 # available. it keeps file handle open, which make it not possible
94 94 # to break hardlinks on local cloned repos.
95 95 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
96 96 hasattr(util, 'win32api'))
97 97
98 98 def __init__(self, dataf, size):
99 99 self.dataf = dataf
100 100 self.s = struct.calcsize(indexformatng)
101 101 self.datasize = size
102 102 self.l = size/self.s
103 103 self.index = [None] * self.l
104 104 self.map = {nullid: nullrev}
105 105 self.allmap = 0
106 106 self.all = 0
107 107 self.mapfind_count = 0
108 108
109 109 def loadmap(self):
110 110 """
111 111 during a commit, we need to make sure the rev being added is
112 112 not a duplicate. This requires loading the entire index,
113 113 which is fairly slow. loadmap can load up just the node map,
114 114 which takes much less time.
115 115 """
116 116 if self.allmap:
117 117 return
118 118 end = self.datasize
119 119 self.allmap = 1
120 120 cur = 0
121 121 count = 0
122 122 blocksize = self.s * 256
123 123 self.dataf.seek(0)
124 124 while cur < end:
125 125 data = self.dataf.read(blocksize)
126 126 off = 0
127 127 for x in xrange(256):
128 128 n = data[off + ngshaoffset:off + ngshaoffset + 20]
129 129 self.map[n] = count
130 130 count += 1
131 131 if count >= self.l:
132 132 break
133 133 off += self.s
134 134 cur += blocksize
135 135
136 136 def loadblock(self, blockstart, blocksize, data=None):
137 137 if self.all:
138 138 return
139 139 if data is None:
140 140 self.dataf.seek(blockstart)
141 141 if blockstart + blocksize > self.datasize:
142 142 # the revlog may have grown since we've started running,
143 143 # but we don't have space in self.index for more entries.
144 144 # limit blocksize so that we don't get too much data.
145 145 blocksize = max(self.datasize - blockstart, 0)
146 146 data = self.dataf.read(blocksize)
147 147 lend = len(data) / self.s
148 148 i = blockstart / self.s
149 149 off = 0
150 150 # lazyindex supports __delitem__
151 151 if lend > len(self.index) - i:
152 152 lend = len(self.index) - i
153 153 for x in xrange(lend):
154 154 if self.index[i + x] == None:
155 155 b = data[off : off + self.s]
156 156 self.index[i + x] = b
157 157 n = b[ngshaoffset:ngshaoffset + 20]
158 158 self.map[n] = i + x
159 159 off += self.s
160 160
161 161 def findnode(self, node):
162 162 """search backwards through the index file for a specific node"""
163 163 if self.allmap:
164 164 return None
165 165
166 166 # hg log will cause many many searches for the manifest
167 167 # nodes. After we get called a few times, just load the whole
168 168 # thing.
169 169 if self.mapfind_count > 8:
170 170 self.loadmap()
171 171 if node in self.map:
172 172 return node
173 173 return None
174 174 self.mapfind_count += 1
175 175 last = self.l - 1
176 176 while self.index[last] != None:
177 177 if last == 0:
178 178 self.all = 1
179 179 self.allmap = 1
180 180 return None
181 181 last -= 1
182 182 end = (last + 1) * self.s
183 183 blocksize = self.s * 256
184 184 while end >= 0:
185 185 start = max(end - blocksize, 0)
186 186 self.dataf.seek(start)
187 187 data = self.dataf.read(end - start)
188 188 findend = end - start
189 189 while True:
190 190 # we're searching backwards, so weh have to make sure
191 191 # we don't find a changeset where this node is a parent
192 192 off = data.rfind(node, 0, findend)
193 193 findend = off
194 194 if off >= 0:
195 195 i = off / self.s
196 196 off = i * self.s
197 197 n = data[off + ngshaoffset:off + ngshaoffset + 20]
198 198 if n == node:
199 199 self.map[n] = i + start / self.s
200 200 return node
201 201 else:
202 202 break
203 203 end -= blocksize
204 204 return None
205 205
206 206 def loadindex(self, i=None, end=None):
207 207 if self.all:
208 208 return
209 209 all = False
210 210 if i == None:
211 211 blockstart = 0
212 212 blocksize = (512 / self.s) * self.s
213 213 end = self.datasize
214 214 all = True
215 215 else:
216 216 if end:
217 217 blockstart = i * self.s
218 218 end = end * self.s
219 219 blocksize = end - blockstart
220 220 else:
221 221 blockstart = (i & ~63) * self.s
222 222 blocksize = self.s * 64
223 223 end = blockstart + blocksize
224 224 while blockstart < end:
225 225 self.loadblock(blockstart, blocksize)
226 226 blockstart += blocksize
227 227 if all:
228 228 self.all = True
229 229
230 230 class lazyindex(object):
231 231 """a lazy version of the index array"""
232 232 def __init__(self, parser):
233 233 self.p = parser
234 234 def __len__(self):
235 235 return len(self.p.index)
236 236 def load(self, pos):
237 237 if pos < 0:
238 238 pos += len(self.p.index)
239 239 self.p.loadindex(pos)
240 240 return self.p.index[pos]
241 241 def __getitem__(self, pos):
242 242 ret = self.p.index[pos] or self.load(pos)
243 243 if isinstance(ret, str):
244 244 ret = struct.unpack(indexformatng, ret)
245 245 return ret
246 246 def __setitem__(self, pos, item):
247 247 self.p.index[pos] = item
248 248 def __delitem__(self, pos):
249 249 del self.p.index[pos]
250 250 def insert(self, pos, e):
251 251 self.p.index.insert(pos, e)
252 252 def append(self, e):
253 253 self.p.index.append(e)
254 254
255 255 class lazymap(object):
256 256 """a lazy version of the node map"""
257 257 def __init__(self, parser):
258 258 self.p = parser
259 259 def load(self, key):
260 260 n = self.p.findnode(key)
261 261 if n == None:
262 262 raise KeyError(key)
263 263 def __contains__(self, key):
264 264 if key in self.p.map:
265 265 return True
266 266 self.p.loadmap()
267 267 return key in self.p.map
268 268 def __iter__(self):
269 269 yield nullid
270 270 for i in xrange(self.p.l):
271 271 ret = self.p.index[i]
272 272 if not ret:
273 273 self.p.loadindex(i)
274 274 ret = self.p.index[i]
275 275 if isinstance(ret, str):
276 276 ret = struct.unpack(indexformatng, ret)
277 277 yield ret[7]
278 278 def __getitem__(self, key):
279 279 try:
280 280 return self.p.map[key]
281 281 except KeyError:
282 282 try:
283 283 self.load(key)
284 284 return self.p.map[key]
285 285 except KeyError:
286 286 raise KeyError("node " + hex(key))
287 287 def __setitem__(self, key, val):
288 288 self.p.map[key] = val
289 289 def __delitem__(self, key):
290 290 del self.p.map[key]
291 291
292 292 class RevlogError(Exception):
293 293 pass
294 294 class LookupError(RevlogError):
295 295 pass
296 296
297 297 def getoffset(q):
298 298 if q & 0xFFFF:
299 299 raise RevlogError(_('incompatible revision flag %x') % q)
300 300 return int(q >> 16)
301 301
302 302 def gettype(q):
303 303 return int(q & 0xFFFF)
304 304
305 305 def offset_type(offset, type):
306 306 return long(long(offset) << 16 | type)
307 307
308 308 class revlogoldio(object):
309 309 def __init__(self):
310 310 self.size = struct.calcsize(indexformatv0)
311 311
312 def parseindex(self, fp, st, inline):
312 def parseindex(self, fp, inline):
313 313 s = self.size
314 314 index = []
315 315 nodemap = {nullid: nullrev}
316 316 n = off = 0
317 317 data = fp.read()
318 318 l = len(data)
319 319 while off + s <= l:
320 320 cur = data[off:off + s]
321 321 off += s
322 322 e = struct.unpack(indexformatv0, cur)
323 323 # transform to revlogv1 format
324 324 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
325 325 nodemap[e[4]], nodemap[e[5]], e[6])
326 326 index.append(e2)
327 327 nodemap[e[6]] = n
328 328 n += 1
329 329
330 330 return index, nodemap, None
331 331
332 332 class revlogio(object):
333 333 def __init__(self):
334 334 self.size = struct.calcsize(indexformatng)
335 335
336 def parseindex(self, fp, st, inline):
337 if (lazyparser.safe_to_use and not inline and
338 st and st.st_size > 1000000):
336 def parseindex(self, fp, inline):
337 try:
338 size = util.fstat(fp).st_size
339 except AttributeError:
340 size = 0
341
342 if lazyparser.safe_to_use and not inline and size > 1000000:
339 343 # big index, let's parse it on demand
340 parser = lazyparser(fp, st.st_size)
344 parser = lazyparser(fp, size)
341 345 index = lazyindex(parser)
342 346 nodemap = lazymap(parser)
343 347 e = list(index[0])
344 348 type = gettype(e[0])
345 349 e[0] = offset_type(0, type)
346 350 index[0] = e
347 351 return index, nodemap, None
348 352
349 353 s = self.size
350 354 cache = None
351 355 index = []
352 356 nodemap = {nullid: nullrev}
353 357 n = off = 0
354 358 # if we're not using lazymap, always read the whole index
355 359 data = fp.read()
356 360 l = len(data)
357 361 if inline:
358 362 cache = (0, data)
359 363 while off + s <= l:
360 364 e = struct.unpack(indexformatng, data[off:off + s])
361 365 index.append(e)
362 366 nodemap[e[7]] = n
363 367 n += 1
364 368 off += s
365 369 if inline:
366 370 if e[1] < 0:
367 371 break
368 372 off += e[1]
369 373
370 374 e = list(index[0])
371 375 type = gettype(e[0])
372 376 e[0] = offset_type(0, type)
373 377 index[0] = e
374 378
375 379 return index, nodemap, cache
376 380
377 381 class revlog(object):
378 382 """
379 383 the underlying revision storage object
380 384
381 385 A revlog consists of two parts, an index and the revision data.
382 386
383 387 The index is a file with a fixed record size containing
384 388 information on each revision, includings its nodeid (hash), the
385 389 nodeids of its parents, the position and offset of its data within
386 390 the data file, and the revision it's based on. Finally, each entry
387 391 contains a linkrev entry that can serve as a pointer to external
388 392 data.
389 393
390 394 The revision data itself is a linear collection of data chunks.
391 395 Each chunk represents a revision and is usually represented as a
392 396 delta against the previous chunk. To bound lookup time, runs of
393 397 deltas are limited to about 2 times the length of the original
394 398 version data. This makes retrieval of a version proportional to
395 399 its size, or O(1) relative to the number of revisions.
396 400
397 401 Both pieces of the revlog are written to in an append-only
398 402 fashion, which means we never need to rewrite a file to insert or
399 403 remove data, and can use some simple techniques to avoid the need
400 404 for locking while reading.
401 405 """
402 406 def __init__(self, opener, indexfile):
403 407 """
404 408 create a revlog object
405 409
406 410 opener is a function that abstracts the file opening operation
407 411 and can be used to implement COW semantics or the like.
408 412 """
409 413 self.indexfile = indexfile
410 414 self.datafile = indexfile[:-2] + ".d"
411 415 self.opener = opener
412 self.indexstat = None
413 416 self._cache = None
414 417 self._chunkcache = None
415 self.defversion = REVLOG_DEFAULT_VERSION
418 self.nodemap = {nullid: nullrev}
419 self.index = []
420
421 v = REVLOG_DEFAULT_VERSION
416 422 if hasattr(opener, "defversion"):
417 self.defversion = opener.defversion
418 if self.defversion & REVLOGNG:
419 self.defversion |= REVLOGNGINLINEDATA
420 self._load()
423 v = opener.defversion
424 if v & REVLOGNG:
425 v |= REVLOGNGINLINEDATA
421 426
422 def _load(self):
423 v = self.defversion
427 i = ""
424 428 try:
425 429 f = self.opener(self.indexfile)
426 430 i = f.read(4)
427 431 f.seek(0)
428 432 if len(i) > 0:
429 433 v = struct.unpack(versionformat, i)[0]
430 434 except IOError, inst:
431 435 if inst.errno != errno.ENOENT:
432 436 raise
433 i = ""
434 else:
435 try:
436 st = util.fstat(f)
437 except AttributeError, inst:
438 st = None
439 else:
440 oldst = self.indexstat
441 if (oldst and st.st_dev == oldst.st_dev
442 and st.st_ino == oldst.st_ino
443 and st.st_mtime == oldst.st_mtime
444 and st.st_ctime == oldst.st_ctime
445 and st.st_size == oldst.st_size):
446 return
447 self.indexstat = st
437
438 self.version = v
439 self._inline = v & REVLOGNGINLINEDATA
448 440 flags = v & ~0xFFFF
449 441 fmt = v & 0xFFFF
450 if fmt == REVLOGV0:
451 if flags:
452 raise RevlogError(_("index %s unknown flags %#04x for format v0")
453 % (self.indexfile, flags >> 16))
454 elif fmt == REVLOGNG:
455 if flags & ~REVLOGNGINLINEDATA:
456 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
457 % (self.indexfile, flags >> 16))
458 else:
442 if fmt == REVLOGV0 and flags:
443 raise RevlogError(_("index %s unknown flags %#04x for format v0")
444 % (self.indexfile, flags >> 16))
445 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
446 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
447 % (self.indexfile, flags >> 16))
448 elif fmt > REVLOGNG:
459 449 raise RevlogError(_("index %s unknown format %d")
460 450 % (self.indexfile, fmt))
461 self.version = v
462 self._inline = v & REVLOGNGINLINEDATA
463 self.nodemap = {nullid: nullrev}
464 self.index = []
451
465 452 self._io = revlogio()
466 453 if self.version == REVLOGV0:
467 454 self._io = revlogoldio()
468 455 if i:
469 d = self._io.parseindex(f, st, self._inline)
456 d = self._io.parseindex(f, self._inline)
470 457 self.index, self.nodemap, self._chunkcache = d
458
471 459 # add the magic null revision at -1
472 460 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
473 461
474 462 def _loadindex(self, start, end):
475 463 """load a block of indexes all at once from the lazy parser"""
476 464 if isinstance(self.index, lazyindex):
477 465 self.index.p.loadindex(start, end)
478 466
479 467 def _loadindexmap(self):
480 468 """loads both the map and the index from the lazy parser"""
481 469 if isinstance(self.index, lazyindex):
482 470 p = self.index.p
483 471 p.loadindex()
484 472 self.nodemap = p.map
485 473
486 474 def _loadmap(self):
487 475 """loads the map from the lazy parser"""
488 476 if isinstance(self.nodemap, lazymap):
489 477 self.nodemap.p.loadmap()
490 478 self.nodemap = self.nodemap.p.map
491 479
492 480 def tip(self):
493 481 return self.node(len(self.index) - 2)
494 482 def count(self):
495 483 return len(self.index) - 1
496 484
497 485 def rev(self, node):
498 486 try:
499 487 return self.nodemap[node]
500 488 except KeyError:
501 489 raise LookupError(_('%s: no node %s') % (self.indexfile, hex(node)))
502 490 def node(self, rev):
503 491 return self.index[rev][7]
504 492 def linkrev(self, node):
505 493 return self.index[self.rev(node)][4]
506 494 def parents(self, node):
507 495 d = self.index[self.rev(node)][5:7]
508 496 return (self.node(d[0]), self.node(d[1]))
509 497 def parentrevs(self, rev):
510 498 return self.index[rev][5:7]
511 499 def start(self, rev):
512 500 return getoffset(self.index[rev][0])
513 501 def end(self, rev):
514 502 return self.start(rev) + self.length(rev)
515 503 def length(self, rev):
516 504 return self.index[rev][1]
517 505 def base(self, rev):
518 506 return self.index[rev][3]
519 507
520 508 def size(self, rev):
521 509 """return the length of the uncompressed text for a given revision"""
522 510 l = self.index[rev][2]
523 511 if l >= 0:
524 512 return l
525 513
526 514 t = self.revision(self.node(rev))
527 515 return len(t)
528 516
529 517 # alternate implementation, The advantage to this code is it
530 518 # will be faster for a single revision. But, the results are not
531 519 # cached, so finding the size of every revision will be slower.
532 520 """
533 521 if self.cache and self.cache[1] == rev:
534 522 return len(self.cache[2])
535 523
536 524 base = self.base(rev)
537 525 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
538 526 base = self.cache[1]
539 527 text = self.cache[2]
540 528 else:
541 529 text = self.revision(self.node(base))
542 530
543 531 l = len(text)
544 532 for x in xrange(base + 1, rev + 1):
545 533 l = mdiff.patchedsize(l, self.chunk(x))
546 534 return l
547 535 """
548 536
549 537 def reachable(self, node, stop=None):
550 538 """return a hash of all nodes ancestral to a given node, including
551 539 the node itself, stopping when stop is matched"""
552 540 reachable = {}
553 541 visit = [node]
554 542 reachable[node] = 1
555 543 if stop:
556 544 stopn = self.rev(stop)
557 545 else:
558 546 stopn = 0
559 547 while visit:
560 548 n = visit.pop(0)
561 549 if n == stop:
562 550 continue
563 551 if n == nullid:
564 552 continue
565 553 for p in self.parents(n):
566 554 if self.rev(p) < stopn:
567 555 continue
568 556 if p not in reachable:
569 557 reachable[p] = 1
570 558 visit.append(p)
571 559 return reachable
572 560
573 561 def nodesbetween(self, roots=None, heads=None):
574 562 """Return a tuple containing three elements. Elements 1 and 2 contain
575 563 a final list bases and heads after all the unreachable ones have been
576 564 pruned. Element 0 contains a topologically sorted list of all
577 565
578 566 nodes that satisfy these constraints:
579 567 1. All nodes must be descended from a node in roots (the nodes on
580 568 roots are considered descended from themselves).
581 569 2. All nodes must also be ancestors of a node in heads (the nodes in
582 570 heads are considered to be their own ancestors).
583 571
584 572 If roots is unspecified, nullid is assumed as the only root.
585 573 If heads is unspecified, it is taken to be the output of the
586 574 heads method (i.e. a list of all nodes in the repository that
587 575 have no children)."""
588 576 nonodes = ([], [], [])
589 577 if roots is not None:
590 578 roots = list(roots)
591 579 if not roots:
592 580 return nonodes
593 581 lowestrev = min([self.rev(n) for n in roots])
594 582 else:
595 583 roots = [nullid] # Everybody's a descendent of nullid
596 584 lowestrev = nullrev
597 585 if (lowestrev == nullrev) and (heads is None):
598 586 # We want _all_ the nodes!
599 587 return ([self.node(r) for r in xrange(0, self.count())],
600 588 [nullid], list(self.heads()))
601 589 if heads is None:
602 590 # All nodes are ancestors, so the latest ancestor is the last
603 591 # node.
604 592 highestrev = self.count() - 1
605 593 # Set ancestors to None to signal that every node is an ancestor.
606 594 ancestors = None
607 595 # Set heads to an empty dictionary for later discovery of heads
608 596 heads = {}
609 597 else:
610 598 heads = list(heads)
611 599 if not heads:
612 600 return nonodes
613 601 ancestors = {}
614 602 # Turn heads into a dictionary so we can remove 'fake' heads.
615 603 # Also, later we will be using it to filter out the heads we can't
616 604 # find from roots.
617 605 heads = dict.fromkeys(heads, 0)
618 606 # Start at the top and keep marking parents until we're done.
619 607 nodestotag = heads.keys()
620 608 # Remember where the top was so we can use it as a limit later.
621 609 highestrev = max([self.rev(n) for n in nodestotag])
622 610 while nodestotag:
623 611 # grab a node to tag
624 612 n = nodestotag.pop()
625 613 # Never tag nullid
626 614 if n == nullid:
627 615 continue
628 616 # A node's revision number represents its place in a
629 617 # topologically sorted list of nodes.
630 618 r = self.rev(n)
631 619 if r >= lowestrev:
632 620 if n not in ancestors:
633 621 # If we are possibly a descendent of one of the roots
634 622 # and we haven't already been marked as an ancestor
635 623 ancestors[n] = 1 # Mark as ancestor
636 624 # Add non-nullid parents to list of nodes to tag.
637 625 nodestotag.extend([p for p in self.parents(n) if
638 626 p != nullid])
639 627 elif n in heads: # We've seen it before, is it a fake head?
640 628 # So it is, real heads should not be the ancestors of
641 629 # any other heads.
642 630 heads.pop(n)
643 631 if not ancestors:
644 632 return nonodes
645 633 # Now that we have our set of ancestors, we want to remove any
646 634 # roots that are not ancestors.
647 635
648 636 # If one of the roots was nullid, everything is included anyway.
649 637 if lowestrev > nullrev:
650 638 # But, since we weren't, let's recompute the lowest rev to not
651 639 # include roots that aren't ancestors.
652 640
653 641 # Filter out roots that aren't ancestors of heads
654 642 roots = [n for n in roots if n in ancestors]
655 643 # Recompute the lowest revision
656 644 if roots:
657 645 lowestrev = min([self.rev(n) for n in roots])
658 646 else:
659 647 # No more roots? Return empty list
660 648 return nonodes
661 649 else:
662 650 # We are descending from nullid, and don't need to care about
663 651 # any other roots.
664 652 lowestrev = nullrev
665 653 roots = [nullid]
666 654 # Transform our roots list into a 'set' (i.e. a dictionary where the
667 655 # values don't matter.
668 656 descendents = dict.fromkeys(roots, 1)
669 657 # Also, keep the original roots so we can filter out roots that aren't
670 658 # 'real' roots (i.e. are descended from other roots).
671 659 roots = descendents.copy()
672 660 # Our topologically sorted list of output nodes.
673 661 orderedout = []
674 662 # Don't start at nullid since we don't want nullid in our output list,
675 663 # and if nullid shows up in descedents, empty parents will look like
676 664 # they're descendents.
677 665 for r in xrange(max(lowestrev, 0), highestrev + 1):
678 666 n = self.node(r)
679 667 isdescendent = False
680 668 if lowestrev == nullrev: # Everybody is a descendent of nullid
681 669 isdescendent = True
682 670 elif n in descendents:
683 671 # n is already a descendent
684 672 isdescendent = True
685 673 # This check only needs to be done here because all the roots
686 674 # will start being marked is descendents before the loop.
687 675 if n in roots:
688 676 # If n was a root, check if it's a 'real' root.
689 677 p = tuple(self.parents(n))
690 678 # If any of its parents are descendents, it's not a root.
691 679 if (p[0] in descendents) or (p[1] in descendents):
692 680 roots.pop(n)
693 681 else:
694 682 p = tuple(self.parents(n))
695 683 # A node is a descendent if either of its parents are
696 684 # descendents. (We seeded the dependents list with the roots
697 685 # up there, remember?)
698 686 if (p[0] in descendents) or (p[1] in descendents):
699 687 descendents[n] = 1
700 688 isdescendent = True
701 689 if isdescendent and ((ancestors is None) or (n in ancestors)):
702 690 # Only include nodes that are both descendents and ancestors.
703 691 orderedout.append(n)
704 692 if (ancestors is not None) and (n in heads):
705 693 # We're trying to figure out which heads are reachable
706 694 # from roots.
707 695 # Mark this head as having been reached
708 696 heads[n] = 1
709 697 elif ancestors is None:
710 698 # Otherwise, we're trying to discover the heads.
711 699 # Assume this is a head because if it isn't, the next step
712 700 # will eventually remove it.
713 701 heads[n] = 1
714 702 # But, obviously its parents aren't.
715 703 for p in self.parents(n):
716 704 heads.pop(p, None)
717 705 heads = [n for n in heads.iterkeys() if heads[n] != 0]
718 706 roots = roots.keys()
719 707 assert orderedout
720 708 assert roots
721 709 assert heads
722 710 return (orderedout, roots, heads)
723 711
724 712 def heads(self, start=None, stop=None):
725 713 """return the list of all nodes that have no children
726 714
727 715 if start is specified, only heads that are descendants of
728 716 start will be returned
729 717 if stop is specified, it will consider all the revs from stop
730 718 as if they had no children
731 719 """
732 720 if start is None:
733 721 start = nullid
734 722 if stop is None:
735 723 stop = []
736 724 stoprevs = dict.fromkeys([self.rev(n) for n in stop])
737 725 startrev = self.rev(start)
738 726 reachable = {startrev: 1}
739 727 heads = {startrev: 1}
740 728
741 729 parentrevs = self.parentrevs
742 730 for r in xrange(startrev + 1, self.count()):
743 731 for p in parentrevs(r):
744 732 if p in reachable:
745 733 if r not in stoprevs:
746 734 reachable[r] = 1
747 735 heads[r] = 1
748 736 if p in heads and p not in stoprevs:
749 737 del heads[p]
750 738
751 739 return [self.node(r) for r in heads]
752 740
753 741 def children(self, node):
754 742 """find the children of a given node"""
755 743 c = []
756 744 p = self.rev(node)
757 745 for r in range(p + 1, self.count()):
758 746 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
759 747 if prevs:
760 748 for pr in prevs:
761 749 if pr == p:
762 750 c.append(self.node(r))
763 751 elif p == nullrev:
764 752 c.append(self.node(r))
765 753 return c
766 754
767 755 def _match(self, id):
768 756 if isinstance(id, (long, int)):
769 757 # rev
770 758 return self.node(id)
771 759 if len(id) == 20:
772 760 # possibly a binary node
773 761 # odds of a binary node being all hex in ASCII are 1 in 10**25
774 762 try:
775 763 node = id
776 764 r = self.rev(node) # quick search the index
777 765 return node
778 766 except LookupError:
779 767 pass # may be partial hex id
780 768 try:
781 769 # str(rev)
782 770 rev = int(id)
783 771 if str(rev) != id:
784 772 raise ValueError
785 773 if rev < 0:
786 774 rev = self.count() + rev
787 775 if rev < 0 or rev >= self.count():
788 776 raise ValueError
789 777 return self.node(rev)
790 778 except (ValueError, OverflowError):
791 779 pass
792 780 if len(id) == 40:
793 781 try:
794 782 # a full hex nodeid?
795 783 node = bin(id)
796 784 r = self.rev(node)
797 785 return node
798 786 except TypeError:
799 787 pass
800 788
801 789 def _partialmatch(self, id):
802 790 if len(id) < 40:
803 791 try:
804 792 # hex(node)[:...]
805 793 bin_id = bin(id[:len(id) & ~1]) # grab an even number of digits
806 794 node = None
807 795 for n in self.nodemap:
808 796 if n.startswith(bin_id) and hex(n).startswith(id):
809 797 if node is not None:
810 798 raise LookupError(_("Ambiguous identifier"))
811 799 node = n
812 800 if node is not None:
813 801 return node
814 802 except TypeError:
815 803 pass
816 804
817 805 def lookup(self, id):
818 806 """locate a node based on:
819 807 - revision number or str(revision number)
820 808 - nodeid or subset of hex nodeid
821 809 """
822 810 n = self._match(id)
823 811 if n is not None:
824 812 return n
825 813 n = self._partialmatch(id)
826 814 if n:
827 815 return n
828 816
829 817 raise LookupError(_("No match found"))
830 818
831 819 def cmp(self, node, text):
832 820 """compare text with a given file revision"""
833 821 p1, p2 = self.parents(node)
834 822 return hash(text, p1, p2) != node
835 823
836 824 def diff(self, a, b):
837 825 """return a delta between two revisions"""
838 826 return mdiff.textdiff(a, b)
839 827
840 828 def patches(self, t, pl):
841 829 """apply a list of patches to a string"""
842 830 return mdiff.patches(t, pl)
843 831
844 832 def chunk(self, rev, df=None, cachelen=4096):
845 833 start, length = self.start(rev), self.length(rev)
846 834 inline = self._inline
847 835 if inline:
848 836 start += (rev + 1) * self._io.size
849 837 end = start + length
850 838 def loadcache(df):
851 839 cache_length = max(cachelen, length) # 4k
852 840 if not df:
853 841 if inline:
854 842 df = self.opener(self.indexfile)
855 843 else:
856 844 df = self.opener(self.datafile)
857 845 df.seek(start)
858 846 self._chunkcache = (start, df.read(cache_length))
859 847
860 848 if not self._chunkcache:
861 849 loadcache(df)
862 850
863 851 cache_start = self._chunkcache[0]
864 852 cache_end = cache_start + len(self._chunkcache[1])
865 853 if start >= cache_start and end <= cache_end:
866 854 # it is cached
867 855 offset = start - cache_start
868 856 else:
869 857 loadcache(df)
870 858 offset = 0
871 859
872 860 return decompress(self._chunkcache[1][offset:offset + length])
873 861
874 862 def delta(self, node):
875 863 """return or calculate a delta between a node and its predecessor"""
876 864 r = self.rev(node)
877 865 return self.revdiff(r - 1, r)
878 866
879 867 def revdiff(self, rev1, rev2):
880 868 """return or calculate a delta between two revisions"""
881 869 b1 = self.base(rev1)
882 870 b2 = self.base(rev2)
883 871 if b1 == b2 and rev1 + 1 == rev2:
884 872 return self.chunk(rev2)
885 873 else:
886 874 return self.diff(self.revision(self.node(rev1)),
887 875 self.revision(self.node(rev2)))
888 876
889 877 def revision(self, node):
890 878 """return an uncompressed revision of a given"""
891 879 if node == nullid:
892 880 return ""
893 881 if self._cache and self._cache[0] == node:
894 882 return self._cache[2]
895 883
896 884 # look up what we need to read
897 885 text = None
898 886 rev = self.rev(node)
899 887 base = self.base(rev)
900 888
901 889 if self._inline:
902 890 # we probably have the whole chunk cached
903 891 df = None
904 892 else:
905 893 df = self.opener(self.datafile)
906 894
907 895 # do we have useful data cached?
908 896 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
909 897 base = self._cache[1]
910 898 text = self._cache[2]
911 899 self._loadindex(base, rev + 1)
912 900 else:
913 901 self._loadindex(base, rev + 1)
914 902 text = self.chunk(base, df=df)
915 903
916 904 bins = []
917 905 for r in xrange(base + 1, rev + 1):
918 906 bins.append(self.chunk(r, df=df))
919 907
920 908 text = self.patches(text, bins)
921 909
922 910 p1, p2 = self.parents(node)
923 911 if node != hash(text, p1, p2):
924 912 raise RevlogError(_("integrity check failed on %s:%d")
925 913 % (self.datafile, rev))
926 914
927 915 self._cache = (node, rev, text)
928 916 return text
929 917
930 918 def checkinlinesize(self, tr, fp=None):
931 919 if not self._inline:
932 920 return
933 921 if not fp:
934 922 fp = self.opener(self.indexfile, 'r')
935 923 fp.seek(0, 2)
936 924 size = fp.tell()
937 925 if size < 131072:
938 926 return
939 927 trinfo = tr.find(self.indexfile)
940 928 if trinfo == None:
941 929 raise RevlogError(_("%s not found in the transaction")
942 930 % self.indexfile)
943 931
944 932 trindex = trinfo[2]
945 933 dataoff = self.start(trindex)
946 934
947 935 tr.add(self.datafile, dataoff)
948 936 df = self.opener(self.datafile, 'w')
949 937 calc = self._io.size
950 938 for r in xrange(self.count()):
951 939 start = self.start(r) + (r + 1) * calc
952 940 length = self.length(r)
953 941 fp.seek(start)
954 942 d = fp.read(length)
955 943 df.write(d)
956 944 fp.close()
957 945 df.close()
958 946 fp = self.opener(self.indexfile, 'w', atomictemp=True)
959 947 self.version &= ~(REVLOGNGINLINEDATA)
960 948 self._inline = False
961 949 if self.count():
962 950 x = self.index[0]
963 951 e = struct.pack(indexformatng, *x)[4:]
964 952 l = struct.pack(versionformat, self.version)
965 953 fp.write(l)
966 954 fp.write(e)
967 955
968 956 for i in xrange(1, self.count()):
969 957 x = self.index[i]
970 958 e = struct.pack(indexformatng, *x)
971 959 fp.write(e)
972 960
973 961 # if we don't call rename, the temp file will never replace the
974 962 # real index
975 963 fp.rename()
976 964
977 965 tr.replace(self.indexfile, trindex * calc)
978 966 self._chunkcache = None
979 967
980 968 def addrevision(self, text, transaction, link, p1, p2, d=None):
981 969 """add a revision to the log
982 970
983 971 text - the revision data to add
984 972 transaction - the transaction object used for rollback
985 973 link - the linkrev data to add
986 974 p1, p2 - the parent nodeids of the revision
987 975 d - an optional precomputed delta
988 976 """
989 977 dfh = None
990 978 if not self._inline:
991 979 dfh = self.opener(self.datafile, "a")
992 980 ifh = self.opener(self.indexfile, "a+")
993 981 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
994 982
995 983 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
996 984 node = hash(text, p1, p2)
997 985 if node in self.nodemap:
998 986 return node
999 987
1000 988 curr = self.count()
1001 989 prev = curr - 1
1002 990 base = self.base(prev)
1003 991 offset = self.end(prev)
1004 992
1005 993 if curr:
1006 994 if not d:
1007 995 ptext = self.revision(self.node(prev))
1008 996 d = self.diff(ptext, text)
1009 997 data = compress(d)
1010 998 l = len(data[1]) + len(data[0])
1011 999 dist = l + offset - self.start(base)
1012 1000
1013 1001 # full versions are inserted when the needed deltas
1014 1002 # become comparable to the uncompressed text
1015 1003 if not curr or dist > len(text) * 2:
1016 1004 data = compress(text)
1017 1005 l = len(data[1]) + len(data[0])
1018 1006 base = curr
1019 1007
1020 1008 e = (offset_type(offset, 0), l, len(text),
1021 1009 base, link, self.rev(p1), self.rev(p2), node)
1022 1010 self.index.insert(-1, e)
1023 1011 self.nodemap[node] = curr
1024 1012
1025 1013 if self.version == REVLOGV0:
1026 1014 e = (offset, l, base, link, p1, p2, node)
1027 1015 entry = struct.pack(indexformatv0, *e)
1028 1016 else:
1029 1017 entry = struct.pack(indexformatng, *e)
1030 1018 if not curr:
1031 1019 entry = struct.pack(versionformat, self.version) + entry[4:]
1032 1020
1033 1021 if not self._inline:
1034 1022 transaction.add(self.datafile, offset)
1035 1023 transaction.add(self.indexfile, curr * len(entry))
1036 1024 if data[0]:
1037 1025 dfh.write(data[0])
1038 1026 dfh.write(data[1])
1039 1027 dfh.flush()
1040 1028 ifh.write(entry)
1041 1029 else:
1042 1030 ifh.seek(0, 2)
1043 1031 transaction.add(self.indexfile, ifh.tell(), prev)
1044 1032 ifh.write(entry)
1045 1033 ifh.write(data[0])
1046 1034 ifh.write(data[1])
1047 1035 self.checkinlinesize(transaction, ifh)
1048 1036
1049 1037 self._cache = (node, curr, text)
1050 1038 return node
1051 1039
1052 1040 def ancestor(self, a, b):
1053 1041 """calculate the least common ancestor of nodes a and b"""
1054 1042
1055 1043 def parents(rev):
1056 1044 return [p for p in self.parentrevs(rev) if p != nullrev]
1057 1045
1058 1046 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1059 1047 if c is None:
1060 1048 return nullid
1061 1049
1062 1050 return self.node(c)
1063 1051
1064 1052 def group(self, nodelist, lookup, infocollect=None):
1065 1053 """calculate a delta group
1066 1054
1067 1055 Given a list of changeset revs, return a set of deltas and
1068 1056 metadata corresponding to nodes. the first delta is
1069 1057 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1070 1058 have this parent as it has all history before these
1071 1059 changesets. parent is parent[0]
1072 1060 """
1073 1061 revs = [self.rev(n) for n in nodelist]
1074 1062
1075 1063 # if we don't have any revisions touched by these changesets, bail
1076 1064 if not revs:
1077 1065 yield changegroup.closechunk()
1078 1066 return
1079 1067
1080 1068 # add the parent of the first rev
1081 1069 p = self.parents(self.node(revs[0]))[0]
1082 1070 revs.insert(0, self.rev(p))
1083 1071
1084 1072 # build deltas
1085 1073 for d in xrange(0, len(revs) - 1):
1086 1074 a, b = revs[d], revs[d + 1]
1087 1075 nb = self.node(b)
1088 1076
1089 1077 if infocollect is not None:
1090 1078 infocollect(nb)
1091 1079
1092 1080 d = self.revdiff(a, b)
1093 1081 p = self.parents(nb)
1094 1082 meta = nb + p[0] + p[1] + lookup(nb)
1095 1083 yield changegroup.genchunk("%s%s" % (meta, d))
1096 1084
1097 1085 yield changegroup.closechunk()
1098 1086
1099 1087 def addgroup(self, revs, linkmapper, transaction, unique=0):
1100 1088 """
1101 1089 add a delta group
1102 1090
1103 1091 given a set of deltas, add them to the revision log. the
1104 1092 first delta is against its parent, which should be in our
1105 1093 log, the rest are against the previous delta.
1106 1094 """
1107 1095
1108 1096 #track the base of the current delta log
1109 1097 r = self.count()
1110 1098 t = r - 1
1111 1099 node = None
1112 1100
1113 1101 base = prev = nullrev
1114 1102 start = end = textlen = 0
1115 1103 if r:
1116 1104 end = self.end(t)
1117 1105
1118 1106 ifh = self.opener(self.indexfile, "a+")
1119 1107 ifh.seek(0, 2)
1120 1108 transaction.add(self.indexfile, ifh.tell(), self.count())
1121 1109 if self._inline:
1122 1110 dfh = None
1123 1111 else:
1124 1112 transaction.add(self.datafile, end)
1125 1113 dfh = self.opener(self.datafile, "a")
1126 1114
1127 1115 # loop through our set of deltas
1128 1116 chain = None
1129 1117 for chunk in revs:
1130 1118 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1131 1119 link = linkmapper(cs)
1132 1120 if node in self.nodemap:
1133 1121 # this can happen if two branches make the same change
1134 1122 # if unique:
1135 1123 # raise RevlogError(_("already have %s") % hex(node[:4]))
1136 1124 chain = node
1137 1125 continue
1138 1126 delta = chunk[80:]
1139 1127
1140 1128 for p in (p1, p2):
1141 1129 if not p in self.nodemap:
1142 1130 raise LookupError(_("unknown parent %s") % short(p))
1143 1131
1144 1132 if not chain:
1145 1133 # retrieve the parent revision of the delta chain
1146 1134 chain = p1
1147 1135 if not chain in self.nodemap:
1148 1136 raise LookupError(_("unknown base %s") % short(chain[:4]))
1149 1137
1150 1138 # full versions are inserted when the needed deltas become
1151 1139 # comparable to the uncompressed text or when the previous
1152 1140 # version is not the one we have a delta against. We use
1153 1141 # the size of the previous full rev as a proxy for the
1154 1142 # current size.
1155 1143
1156 1144 if chain == prev:
1157 1145 tempd = compress(delta)
1158 1146 cdelta = tempd[0] + tempd[1]
1159 1147 textlen = mdiff.patchedsize(textlen, delta)
1160 1148
1161 1149 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
1162 1150 # flush our writes here so we can read it in revision
1163 1151 if dfh:
1164 1152 dfh.flush()
1165 1153 ifh.flush()
1166 1154 text = self.revision(chain)
1167 1155 text = self.patches(text, [delta])
1168 1156 chk = self._addrevision(text, transaction, link, p1, p2, None,
1169 1157 ifh, dfh)
1170 1158 if not dfh and not self._inline:
1171 1159 # addrevision switched from inline to conventional
1172 1160 # reopen the index
1173 1161 dfh = self.opener(self.datafile, "a")
1174 1162 ifh = self.opener(self.indexfile, "a")
1175 1163 if chk != node:
1176 1164 raise RevlogError(_("consistency error adding group"))
1177 1165 textlen = len(text)
1178 1166 else:
1179 1167 e = (offset_type(end, 0), len(cdelta), textlen, base,
1180 1168 link, self.rev(p1), self.rev(p2), node)
1181 1169 self.index.insert(-1, e)
1182 1170 self.nodemap[node] = r
1183 1171 if self._inline:
1184 1172 ifh.write(struct.pack(indexformatng, *e))
1185 1173 ifh.write(cdelta)
1186 1174 self.checkinlinesize(transaction, ifh)
1187 1175 if not self._inline:
1188 1176 dfh = self.opener(self.datafile, "a")
1189 1177 ifh = self.opener(self.indexfile, "a")
1190 1178 else:
1191 1179 if self.version == REVLOGV0:
1192 1180 e = (end, len(cdelta), base, link, p1, p2, node)
1193 1181 entry = struct.pack(indexformatv0, *e)
1194 1182 else:
1195 1183 entry = struct.pack(indexformatng, *e)
1196 1184 dfh.write(cdelta)
1197 1185 ifh.write(entry)
1198 1186
1199 1187 t, r, chain, prev = r, r + 1, node, node
1200 1188 base = self.base(t)
1201 1189 start = self.start(base)
1202 1190 end = self.end(t)
1203 1191
1204 1192 return node
1205 1193
1206 1194 def strip(self, rev, minlink):
1207 1195 if self.count() == 0 or rev >= self.count():
1208 1196 return
1209 1197
1210 1198 if isinstance(self.index, lazyindex):
1211 1199 self._loadindexmap()
1212 1200
1213 1201 # When stripping away a revision, we need to make sure it
1214 1202 # does not actually belong to an older changeset.
1215 1203 # The minlink parameter defines the oldest revision
1216 1204 # we're allowed to strip away.
1217 1205 while minlink > self.index[rev][4]:
1218 1206 rev += 1
1219 1207 if rev >= self.count():
1220 1208 return
1221 1209
1222 1210 # first truncate the files on disk
1223 1211 end = self.start(rev)
1224 1212 if not self._inline:
1225 1213 df = self.opener(self.datafile, "a")
1226 1214 df.truncate(end)
1227 1215 end = rev * self._io.size
1228 1216 else:
1229 1217 end += rev * self._io.size
1230 1218
1231 1219 indexf = self.opener(self.indexfile, "a")
1232 1220 indexf.truncate(end)
1233 1221
1234 1222 # then reset internal state in memory to forget those revisions
1235 1223 self._cache = None
1236 1224 self._chunkcache = None
1237 1225 for x in xrange(rev, self.count()):
1238 1226 del self.nodemap[self.node(x)]
1239 1227
1240 1228 del self.index[rev:-1]
1241 1229
1242 1230 def checksize(self):
1243 1231 expected = 0
1244 1232 if self.count():
1245 1233 expected = self.end(self.count() - 1)
1246 1234
1247 1235 try:
1248 1236 f = self.opener(self.datafile)
1249 1237 f.seek(0, 2)
1250 1238 actual = f.tell()
1251 1239 dd = actual - expected
1252 1240 except IOError, inst:
1253 1241 if inst.errno != errno.ENOENT:
1254 1242 raise
1255 1243 dd = 0
1256 1244
1257 1245 try:
1258 1246 f = self.opener(self.indexfile)
1259 1247 f.seek(0, 2)
1260 1248 actual = f.tell()
1261 1249 s = self._io.size
1262 1250 i = actual / s
1263 1251 di = actual - (i * s)
1264 1252 if self._inline:
1265 1253 databytes = 0
1266 1254 for r in xrange(self.count()):
1267 1255 databytes += self.length(r)
1268 1256 dd = 0
1269 1257 di = actual - self.count() * s - databytes
1270 1258 except IOError, inst:
1271 1259 if inst.errno != errno.ENOENT:
1272 1260 raise
1273 1261 di = 0
1274 1262
1275 1263 return (dd, di)
1276 1264
1277 1265
General Comments 0
You need to be logged in to leave comments. Login now