##// END OF EJS Templates
revlog: change accesses to index entry elements to use positive offsets
Matt Mackall -
r4978:93d48a8f default
parent child Browse files
Show More
@@ -1,1292 +1,1292 b''
1 1 """
2 2 revlog.py - storage back-end for mercurial
3 3
4 4 This provides efficient delta storage with O(1) retrieve and append
5 5 and O(changes) merge between branches
6 6
7 7 Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
8 8
9 9 This software may be used and distributed according to the terms
10 10 of the GNU General Public License, incorporated herein by reference.
11 11 """
12 12
13 13 from node import *
14 14 from i18n import _
15 15 import binascii, changegroup, errno, ancestor, mdiff, os
16 16 import sha, struct, util, zlib
17 17
18 18 # revlog version strings
19 19 REVLOGV0 = 0
20 20 REVLOGNG = 1
21 21
22 22 # revlog flags
23 23 REVLOGNGINLINEDATA = (1 << 16)
24 24 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
25 25
26 26 REVLOG_DEFAULT_FORMAT = REVLOGNG
27 27 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
28 28
29 29 def hash(text, p1, p2):
30 30 """generate a hash from the given text and its parent hashes
31 31
32 32 This hash combines both the current file contents and its history
33 33 in a manner that makes it easy to distinguish nodes with the same
34 34 content in the revision graph.
35 35 """
36 36 l = [p1, p2]
37 37 l.sort()
38 38 s = sha.new(l[0])
39 39 s.update(l[1])
40 40 s.update(text)
41 41 return s.digest()
42 42
43 43 def compress(text):
44 44 """ generate a possibly-compressed representation of text """
45 45 if not text: return ("", text)
46 46 if len(text) < 44:
47 47 if text[0] == '\0': return ("", text)
48 48 return ('u', text)
49 49 bin = zlib.compress(text)
50 50 if len(bin) > len(text):
51 51 if text[0] == '\0': return ("", text)
52 52 return ('u', text)
53 53 return ("", bin)
54 54
55 55 def decompress(bin):
56 56 """ decompress the given input """
57 57 if not bin: return bin
58 58 t = bin[0]
59 59 if t == '\0': return bin
60 60 if t == 'x': return zlib.decompress(bin)
61 61 if t == 'u': return bin[1:]
62 62 raise RevlogError(_("unknown compression type %r") % t)
63 63
64 64 indexformatv0 = ">4l20s20s20s"
65 65 v0shaoffset = 56
66 66 # index ng:
67 67 # 6 bytes offset
68 68 # 2 bytes flags
69 69 # 4 bytes compressed length
70 70 # 4 bytes uncompressed length
71 71 # 4 bytes: base rev
72 72 # 4 bytes link rev
73 73 # 4 bytes parent 1 rev
74 74 # 4 bytes parent 2 rev
75 75 # 32 bytes: nodeid
76 76 indexformatng = ">Qiiiiii20s12x"
77 77 ngshaoffset = 32
78 78 versionformat = ">I"
79 79
80 80 class lazyparser(object):
81 81 """
82 82 this class avoids the need to parse the entirety of large indices
83 83 """
84 84
85 85 # lazyparser is not safe to use on windows if win32 extensions not
86 86 # available. it keeps file handle open, which make it not possible
87 87 # to break hardlinks on local cloned repos.
88 88 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
89 89 hasattr(util, 'win32api'))
90 90
91 91 def __init__(self, dataf, size):
92 92 self.dataf = dataf
93 93 self.s = struct.calcsize(indexformatng)
94 94 self.datasize = size
95 95 self.l = size/self.s
96 96 self.index = [None] * self.l
97 97 self.map = {nullid: nullrev}
98 98 self.allmap = 0
99 99 self.all = 0
100 100 self.mapfind_count = 0
101 101
102 102 def loadmap(self):
103 103 """
104 104 during a commit, we need to make sure the rev being added is
105 105 not a duplicate. This requires loading the entire index,
106 106 which is fairly slow. loadmap can load up just the node map,
107 107 which takes much less time.
108 108 """
109 109 if self.allmap: return
110 110 end = self.datasize
111 111 self.allmap = 1
112 112 cur = 0
113 113 count = 0
114 114 blocksize = self.s * 256
115 115 self.dataf.seek(0)
116 116 while cur < end:
117 117 data = self.dataf.read(blocksize)
118 118 off = 0
119 119 for x in xrange(256):
120 120 n = data[off + ngshaoffset:off + ngshaoffset + 20]
121 121 self.map[n] = count
122 122 count += 1
123 123 if count >= self.l:
124 124 break
125 125 off += self.s
126 126 cur += blocksize
127 127
128 128 def loadblock(self, blockstart, blocksize, data=None):
129 129 if self.all: return
130 130 if data is None:
131 131 self.dataf.seek(blockstart)
132 132 if blockstart + blocksize > self.datasize:
133 133 # the revlog may have grown since we've started running,
134 134 # but we don't have space in self.index for more entries.
135 135 # limit blocksize so that we don't get too much data.
136 136 blocksize = max(self.datasize - blockstart, 0)
137 137 data = self.dataf.read(blocksize)
138 138 lend = len(data) / self.s
139 139 i = blockstart / self.s
140 140 off = 0
141 141 # lazyindex supports __delitem__
142 142 if lend > len(self.index) - i:
143 143 lend = len(self.index) - i
144 144 for x in xrange(lend):
145 145 if self.index[i + x] == None:
146 146 b = data[off : off + self.s]
147 147 self.index[i + x] = b
148 148 n = b[ngshaoffset:ngshaoffset + 20]
149 149 self.map[n] = i + x
150 150 off += self.s
151 151
152 152 def findnode(self, node):
153 153 """search backwards through the index file for a specific node"""
154 154 if self.allmap: return None
155 155
156 156 # hg log will cause many many searches for the manifest
157 157 # nodes. After we get called a few times, just load the whole
158 158 # thing.
159 159 if self.mapfind_count > 8:
160 160 self.loadmap()
161 161 if node in self.map:
162 162 return node
163 163 return None
164 164 self.mapfind_count += 1
165 165 last = self.l - 1
166 166 while self.index[last] != None:
167 167 if last == 0:
168 168 self.all = 1
169 169 self.allmap = 1
170 170 return None
171 171 last -= 1
172 172 end = (last + 1) * self.s
173 173 blocksize = self.s * 256
174 174 while end >= 0:
175 175 start = max(end - blocksize, 0)
176 176 self.dataf.seek(start)
177 177 data = self.dataf.read(end - start)
178 178 findend = end - start
179 179 while True:
180 180 # we're searching backwards, so weh have to make sure
181 181 # we don't find a changeset where this node is a parent
182 182 off = data.rfind(node, 0, findend)
183 183 findend = off
184 184 if off >= 0:
185 185 i = off / self.s
186 186 off = i * self.s
187 187 n = data[off + ngshaoffset:off + ngshaoffset + 20]
188 188 if n == node:
189 189 self.map[n] = i + start / self.s
190 190 return node
191 191 else:
192 192 break
193 193 end -= blocksize
194 194 return None
195 195
196 196 def loadindex(self, i=None, end=None):
197 197 if self.all: return
198 198 all = False
199 199 if i == None:
200 200 blockstart = 0
201 201 blocksize = (512 / self.s) * self.s
202 202 end = self.datasize
203 203 all = True
204 204 else:
205 205 if end:
206 206 blockstart = i * self.s
207 207 end = end * self.s
208 208 blocksize = end - blockstart
209 209 else:
210 210 blockstart = (i & ~63) * self.s
211 211 blocksize = self.s * 64
212 212 end = blockstart + blocksize
213 213 while blockstart < end:
214 214 self.loadblock(blockstart, blocksize)
215 215 blockstart += blocksize
216 216 if all: self.all = True
217 217
218 218 class lazyindex(object):
219 219 """a lazy version of the index array"""
220 220 def __init__(self, parser):
221 221 self.p = parser
222 222 def __len__(self):
223 223 return len(self.p.index)
224 224 def load(self, pos):
225 225 if pos < 0:
226 226 pos += len(self.p.index)
227 227 self.p.loadindex(pos)
228 228 return self.p.index[pos]
229 229 def __getitem__(self, pos):
230 230 ret = self.p.index[pos] or self.load(pos)
231 231 if isinstance(ret, str):
232 232 ret = struct.unpack(indexformatng, ret)
233 233 return ret
234 234 def __setitem__(self, pos, item):
235 235 self.p.index[pos] = item
236 236 def __delitem__(self, pos):
237 237 del self.p.index[pos]
238 238 def append(self, e):
239 239 self.p.index.append(e)
240 240
241 241 class lazymap(object):
242 242 """a lazy version of the node map"""
243 243 def __init__(self, parser):
244 244 self.p = parser
245 245 def load(self, key):
246 246 n = self.p.findnode(key)
247 247 if n == None:
248 248 raise KeyError(key)
249 249 def __contains__(self, key):
250 250 if key in self.p.map:
251 251 return True
252 252 self.p.loadmap()
253 253 return key in self.p.map
254 254 def __iter__(self):
255 255 yield nullid
256 256 for i in xrange(self.p.l):
257 257 ret = self.p.index[i]
258 258 if not ret:
259 259 self.p.loadindex(i)
260 260 ret = self.p.index[i]
261 261 if isinstance(ret, str):
262 262 ret = struct.unpack(indexformatng, ret)
263 yield ret[-1]
263 yield ret[7]
264 264 def __getitem__(self, key):
265 265 try:
266 266 return self.p.map[key]
267 267 except KeyError:
268 268 try:
269 269 self.load(key)
270 270 return self.p.map[key]
271 271 except KeyError:
272 272 raise KeyError("node " + hex(key))
273 273 def __setitem__(self, key, val):
274 274 self.p.map[key] = val
275 275 def __delitem__(self, key):
276 276 del self.p.map[key]
277 277
278 278 class RevlogError(Exception): pass
279 279 class LookupError(RevlogError): pass
280 280
281 281 def getoffset(q):
282 282 if q & 0xFFFF:
283 283 raise RevlogError(_('incompatible revision flag %x') % q)
284 284 return int(q >> 16)
285 285
286 286 def gettype(q):
287 287 return int(q & 0xFFFF)
288 288
289 289 def offset_type(offset, type):
290 290 return long(long(offset) << 16 | type)
291 291
292 292 class revlogoldio(object):
293 293 def __init__(self):
294 294 self.chunkcache = None
295 295 self.size = struct.calcsize(indexformatv0)
296 296
297 297 def parseindex(self, fp, st, inline):
298 298 s = self.size
299 299 index = []
300 300 nodemap = {nullid: nullrev}
301 301 n = off = 0
302 302 data = fp.read()
303 303 l = len(data)
304 304 while off + s <= l:
305 305 cur = data[off:off + s]
306 306 off += s
307 307 e = struct.unpack(indexformatv0, cur)
308 308 # transform to revlogv1 format
309 309 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
310 310 nodemap[e[4]], nodemap[e[5]], e[6])
311 311 index.append(e2)
312 312 nodemap[e[6]] = n
313 313 n += 1
314 314
315 315 return index, nodemap
316 316
317 317 class revlogio(object):
318 318 def __init__(self):
319 319 self.chunkcache = None
320 320 self.size = struct.calcsize(indexformatng)
321 321
322 322 def parseindex(self, fp, st, inline):
323 323 if (lazyparser.safe_to_use and not inline and
324 324 st and st.st_size > 1000000):
325 325 # big index, let's parse it on demand
326 326 parser = lazyparser(fp, st.st_size)
327 327 index = lazyindex(parser)
328 328 nodemap = lazymap(parser)
329 329 e = list(index[0])
330 330 type = gettype(e[0])
331 331 e[0] = offset_type(0, type)
332 332 index[0] = e
333 333 return index, nodemap
334 334
335 335 s = self.size
336 336 index = []
337 337 nodemap = {nullid: nullrev}
338 338 n = off = 0
339 339 # if we're not using lazymap, always read the whole index
340 340 data = fp.read()
341 341 l = len(data)
342 342 if inline:
343 343 self.chunkcache = (0, data)
344 344 while off + s <= l:
345 345 e = struct.unpack(indexformatng, data[off:off + s])
346 346 index.append(e)
347 nodemap[e[-1]] = n
347 nodemap[e[7]] = n
348 348 n += 1
349 349 off += s
350 350 if inline:
351 351 if e[1] < 0:
352 352 break
353 353 off += e[1]
354 354
355 355 e = list(index[0])
356 356 type = gettype(e[0])
357 357 e[0] = offset_type(0, type)
358 358 index[0] = e
359 359
360 360 return index, nodemap
361 361
362 362 class revlog(object):
363 363 """
364 364 the underlying revision storage object
365 365
366 366 A revlog consists of two parts, an index and the revision data.
367 367
368 368 The index is a file with a fixed record size containing
369 369 information on each revision, includings its nodeid (hash), the
370 370 nodeids of its parents, the position and offset of its data within
371 371 the data file, and the revision it's based on. Finally, each entry
372 372 contains a linkrev entry that can serve as a pointer to external
373 373 data.
374 374
375 375 The revision data itself is a linear collection of data chunks.
376 376 Each chunk represents a revision and is usually represented as a
377 377 delta against the previous chunk. To bound lookup time, runs of
378 378 deltas are limited to about 2 times the length of the original
379 379 version data. This makes retrieval of a version proportional to
380 380 its size, or O(1) relative to the number of revisions.
381 381
382 382 Both pieces of the revlog are written to in an append-only
383 383 fashion, which means we never need to rewrite a file to insert or
384 384 remove data, and can use some simple techniques to avoid the need
385 385 for locking while reading.
386 386 """
387 387 def __init__(self, opener, indexfile):
388 388 """
389 389 create a revlog object
390 390
391 391 opener is a function that abstracts the file opening operation
392 392 and can be used to implement COW semantics or the like.
393 393 """
394 394 self.indexfile = indexfile
395 395 self.datafile = indexfile[:-2] + ".d"
396 396 self.opener = opener
397 397
398 398 self.indexstat = None
399 399 self.cache = None
400 400 self.defversion = REVLOG_DEFAULT_VERSION
401 401 if hasattr(opener, "defversion"):
402 402 self.defversion = opener.defversion
403 403 if self.defversion & REVLOGNG:
404 404 self.defversion |= REVLOGNGINLINEDATA
405 405 self._load()
406 406
407 407 def _load(self):
408 408 v = self.defversion
409 409 try:
410 410 f = self.opener(self.indexfile)
411 411 i = f.read(4)
412 412 f.seek(0)
413 413 if len(i) > 0:
414 414 v = struct.unpack(versionformat, i)[0]
415 415 except IOError, inst:
416 416 if inst.errno != errno.ENOENT:
417 417 raise
418 418 i = ""
419 419 else:
420 420 try:
421 421 st = util.fstat(f)
422 422 except AttributeError, inst:
423 423 st = None
424 424 else:
425 425 oldst = self.indexstat
426 426 if (oldst and st.st_dev == oldst.st_dev
427 427 and st.st_ino == oldst.st_ino
428 428 and st.st_mtime == oldst.st_mtime
429 429 and st.st_ctime == oldst.st_ctime
430 430 and st.st_size == oldst.st_size):
431 431 return
432 432 self.indexstat = st
433 433 flags = v & ~0xFFFF
434 434 fmt = v & 0xFFFF
435 435 if fmt == REVLOGV0:
436 436 if flags:
437 437 raise RevlogError(_("index %s unknown flags %#04x for format v0")
438 438 % (self.indexfile, flags >> 16))
439 439 elif fmt == REVLOGNG:
440 440 if flags & ~REVLOGNGINLINEDATA:
441 441 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
442 442 % (self.indexfile, flags >> 16))
443 443 else:
444 444 raise RevlogError(_("index %s unknown format %d")
445 445 % (self.indexfile, fmt))
446 446 self.version = v
447 447 self.nodemap = {nullid: nullrev}
448 448 self.index = []
449 449 self._io = revlogio()
450 450 if self.version == REVLOGV0:
451 451 self._io = revlogoldio()
452 452 if i:
453 453 self.index, self.nodemap = self._io.parseindex(f, st, self._inline())
454 454
455 455 def _loadindex(self, start, end):
456 456 """load a block of indexes all at once from the lazy parser"""
457 457 if isinstance(self.index, lazyindex):
458 458 self.index.p.loadindex(start, end)
459 459
460 460 def _loadindexmap(self):
461 461 """loads both the map and the index from the lazy parser"""
462 462 if isinstance(self.index, lazyindex):
463 463 p = self.index.p
464 464 p.loadindex()
465 465 self.nodemap = p.map
466 466
467 467 def _loadmap(self):
468 468 """loads the map from the lazy parser"""
469 469 if isinstance(self.nodemap, lazymap):
470 470 self.nodemap.p.loadmap()
471 471 self.nodemap = self.nodemap.p.map
472 472
473 473 def _inline(self): return self.version & REVLOGNGINLINEDATA
474 474
475 475 def tip(self): return self.node(len(self.index) - 1)
476 476 def count(self): return len(self.index)
477 477 def node(self, rev):
478 return rev == nullrev and nullid or self.index[rev][-1]
478 return rev == nullrev and nullid or self.index[rev][7]
479 479 def rev(self, node):
480 480 try:
481 481 return self.nodemap[node]
482 482 except KeyError:
483 483 raise LookupError(_('%s: no node %s') % (self.indexfile, hex(node)))
484 484 def linkrev(self, node):
485 return (node == nullid) and nullrev or self.index[self.rev(node)][-4]
485 return (node == nullid) and nullrev or self.index[self.rev(node)][4]
486 486 def parents(self, node):
487 487 if node == nullid: return (nullid, nullid)
488 488 r = self.rev(node)
489 d = self.index[r][-3:-1]
489 d = self.index[r][5:7]
490 490 return (self.node(d[0]), self.node(d[1]))
491 491 def parentrevs(self, rev):
492 492 if rev == nullrev:
493 493 return (nullrev, nullrev)
494 d = self.index[rev][-3:-1]
494 d = self.index[rev][5:7]
495 495 return d
496 496 def start(self, rev):
497 497 if rev == nullrev:
498 498 return 0
499 499 return getoffset(self.index[rev][0])
500 500
501 501 def end(self, rev): return self.start(rev) + self.length(rev)
502 502
503 503 def size(self, rev):
504 504 """return the length of the uncompressed text for a given revision"""
505 505 if rev == nullrev:
506 506 return 0
507 507 l = self.index[rev][2]
508 508 if l >= 0:
509 509 return l
510 510
511 511 t = self.revision(self.node(rev))
512 512 return len(t)
513 513
514 514 # alternate implementation, The advantage to this code is it
515 515 # will be faster for a single revision. But, the results are not
516 516 # cached, so finding the size of every revision will be slower.
517 517 """
518 518 if self.cache and self.cache[1] == rev:
519 519 return len(self.cache[2])
520 520
521 521 base = self.base(rev)
522 522 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
523 523 base = self.cache[1]
524 524 text = self.cache[2]
525 525 else:
526 526 text = self.revision(self.node(base))
527 527
528 528 l = len(text)
529 529 for x in xrange(base + 1, rev + 1):
530 530 l = mdiff.patchedsize(l, self.chunk(x))
531 531 return l
532 532 """
533 533
534 534 def length(self, rev):
535 535 if rev == nullrev:
536 536 return 0
537 537 else:
538 538 return self.index[rev][1]
539 539 def base(self, rev):
540 540 if (rev == nullrev):
541 541 return nullrev
542 542 else:
543 return self.index[rev][-5]
543 return self.index[rev][3]
544 544
545 545 def reachable(self, node, stop=None):
546 546 """return a hash of all nodes ancestral to a given node, including
547 547 the node itself, stopping when stop is matched"""
548 548 reachable = {}
549 549 visit = [node]
550 550 reachable[node] = 1
551 551 if stop:
552 552 stopn = self.rev(stop)
553 553 else:
554 554 stopn = 0
555 555 while visit:
556 556 n = visit.pop(0)
557 557 if n == stop:
558 558 continue
559 559 if n == nullid:
560 560 continue
561 561 for p in self.parents(n):
562 562 if self.rev(p) < stopn:
563 563 continue
564 564 if p not in reachable:
565 565 reachable[p] = 1
566 566 visit.append(p)
567 567 return reachable
568 568
569 569 def nodesbetween(self, roots=None, heads=None):
570 570 """Return a tuple containing three elements. Elements 1 and 2 contain
571 571 a final list bases and heads after all the unreachable ones have been
572 572 pruned. Element 0 contains a topologically sorted list of all
573 573
574 574 nodes that satisfy these constraints:
575 575 1. All nodes must be descended from a node in roots (the nodes on
576 576 roots are considered descended from themselves).
577 577 2. All nodes must also be ancestors of a node in heads (the nodes in
578 578 heads are considered to be their own ancestors).
579 579
580 580 If roots is unspecified, nullid is assumed as the only root.
581 581 If heads is unspecified, it is taken to be the output of the
582 582 heads method (i.e. a list of all nodes in the repository that
583 583 have no children)."""
584 584 nonodes = ([], [], [])
585 585 if roots is not None:
586 586 roots = list(roots)
587 587 if not roots:
588 588 return nonodes
589 589 lowestrev = min([self.rev(n) for n in roots])
590 590 else:
591 591 roots = [nullid] # Everybody's a descendent of nullid
592 592 lowestrev = nullrev
593 593 if (lowestrev == nullrev) and (heads is None):
594 594 # We want _all_ the nodes!
595 595 return ([self.node(r) for r in xrange(0, self.count())],
596 596 [nullid], list(self.heads()))
597 597 if heads is None:
598 598 # All nodes are ancestors, so the latest ancestor is the last
599 599 # node.
600 600 highestrev = self.count() - 1
601 601 # Set ancestors to None to signal that every node is an ancestor.
602 602 ancestors = None
603 603 # Set heads to an empty dictionary for later discovery of heads
604 604 heads = {}
605 605 else:
606 606 heads = list(heads)
607 607 if not heads:
608 608 return nonodes
609 609 ancestors = {}
610 610 # Turn heads into a dictionary so we can remove 'fake' heads.
611 611 # Also, later we will be using it to filter out the heads we can't
612 612 # find from roots.
613 613 heads = dict.fromkeys(heads, 0)
614 614 # Start at the top and keep marking parents until we're done.
615 615 nodestotag = heads.keys()
616 616 # Remember where the top was so we can use it as a limit later.
617 617 highestrev = max([self.rev(n) for n in nodestotag])
618 618 while nodestotag:
619 619 # grab a node to tag
620 620 n = nodestotag.pop()
621 621 # Never tag nullid
622 622 if n == nullid:
623 623 continue
624 624 # A node's revision number represents its place in a
625 625 # topologically sorted list of nodes.
626 626 r = self.rev(n)
627 627 if r >= lowestrev:
628 628 if n not in ancestors:
629 629 # If we are possibly a descendent of one of the roots
630 630 # and we haven't already been marked as an ancestor
631 631 ancestors[n] = 1 # Mark as ancestor
632 632 # Add non-nullid parents to list of nodes to tag.
633 633 nodestotag.extend([p for p in self.parents(n) if
634 634 p != nullid])
635 635 elif n in heads: # We've seen it before, is it a fake head?
636 636 # So it is, real heads should not be the ancestors of
637 637 # any other heads.
638 638 heads.pop(n)
639 639 if not ancestors:
640 640 return nonodes
641 641 # Now that we have our set of ancestors, we want to remove any
642 642 # roots that are not ancestors.
643 643
644 644 # If one of the roots was nullid, everything is included anyway.
645 645 if lowestrev > nullrev:
646 646 # But, since we weren't, let's recompute the lowest rev to not
647 647 # include roots that aren't ancestors.
648 648
649 649 # Filter out roots that aren't ancestors of heads
650 650 roots = [n for n in roots if n in ancestors]
651 651 # Recompute the lowest revision
652 652 if roots:
653 653 lowestrev = min([self.rev(n) for n in roots])
654 654 else:
655 655 # No more roots? Return empty list
656 656 return nonodes
657 657 else:
658 658 # We are descending from nullid, and don't need to care about
659 659 # any other roots.
660 660 lowestrev = nullrev
661 661 roots = [nullid]
662 662 # Transform our roots list into a 'set' (i.e. a dictionary where the
663 663 # values don't matter.
664 664 descendents = dict.fromkeys(roots, 1)
665 665 # Also, keep the original roots so we can filter out roots that aren't
666 666 # 'real' roots (i.e. are descended from other roots).
667 667 roots = descendents.copy()
668 668 # Our topologically sorted list of output nodes.
669 669 orderedout = []
670 670 # Don't start at nullid since we don't want nullid in our output list,
671 671 # and if nullid shows up in descedents, empty parents will look like
672 672 # they're descendents.
673 673 for r in xrange(max(lowestrev, 0), highestrev + 1):
674 674 n = self.node(r)
675 675 isdescendent = False
676 676 if lowestrev == nullrev: # Everybody is a descendent of nullid
677 677 isdescendent = True
678 678 elif n in descendents:
679 679 # n is already a descendent
680 680 isdescendent = True
681 681 # This check only needs to be done here because all the roots
682 682 # will start being marked is descendents before the loop.
683 683 if n in roots:
684 684 # If n was a root, check if it's a 'real' root.
685 685 p = tuple(self.parents(n))
686 686 # If any of its parents are descendents, it's not a root.
687 687 if (p[0] in descendents) or (p[1] in descendents):
688 688 roots.pop(n)
689 689 else:
690 690 p = tuple(self.parents(n))
691 691 # A node is a descendent if either of its parents are
692 692 # descendents. (We seeded the dependents list with the roots
693 693 # up there, remember?)
694 694 if (p[0] in descendents) or (p[1] in descendents):
695 695 descendents[n] = 1
696 696 isdescendent = True
697 697 if isdescendent and ((ancestors is None) or (n in ancestors)):
698 698 # Only include nodes that are both descendents and ancestors.
699 699 orderedout.append(n)
700 700 if (ancestors is not None) and (n in heads):
701 701 # We're trying to figure out which heads are reachable
702 702 # from roots.
703 703 # Mark this head as having been reached
704 704 heads[n] = 1
705 705 elif ancestors is None:
706 706 # Otherwise, we're trying to discover the heads.
707 707 # Assume this is a head because if it isn't, the next step
708 708 # will eventually remove it.
709 709 heads[n] = 1
710 710 # But, obviously its parents aren't.
711 711 for p in self.parents(n):
712 712 heads.pop(p, None)
713 713 heads = [n for n in heads.iterkeys() if heads[n] != 0]
714 714 roots = roots.keys()
715 715 assert orderedout
716 716 assert roots
717 717 assert heads
718 718 return (orderedout, roots, heads)
719 719
720 720 def heads(self, start=None, stop=None):
721 721 """return the list of all nodes that have no children
722 722
723 723 if start is specified, only heads that are descendants of
724 724 start will be returned
725 725 if stop is specified, it will consider all the revs from stop
726 726 as if they had no children
727 727 """
728 728 if start is None:
729 729 start = nullid
730 730 if stop is None:
731 731 stop = []
732 732 stoprevs = dict.fromkeys([self.rev(n) for n in stop])
733 733 startrev = self.rev(start)
734 734 reachable = {startrev: 1}
735 735 heads = {startrev: 1}
736 736
737 737 parentrevs = self.parentrevs
738 738 for r in xrange(startrev + 1, self.count()):
739 739 for p in parentrevs(r):
740 740 if p in reachable:
741 741 if r not in stoprevs:
742 742 reachable[r] = 1
743 743 heads[r] = 1
744 744 if p in heads and p not in stoprevs:
745 745 del heads[p]
746 746
747 747 return [self.node(r) for r in heads]
748 748
749 749 def children(self, node):
750 750 """find the children of a given node"""
751 751 c = []
752 752 p = self.rev(node)
753 753 for r in range(p + 1, self.count()):
754 754 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
755 755 if prevs:
756 756 for pr in prevs:
757 757 if pr == p:
758 758 c.append(self.node(r))
759 759 elif p == nullrev:
760 760 c.append(self.node(r))
761 761 return c
762 762
763 763 def _match(self, id):
764 764 if isinstance(id, (long, int)):
765 765 # rev
766 766 return self.node(id)
767 767 if len(id) == 20:
768 768 # possibly a binary node
769 769 # odds of a binary node being all hex in ASCII are 1 in 10**25
770 770 try:
771 771 node = id
772 772 r = self.rev(node) # quick search the index
773 773 return node
774 774 except LookupError:
775 775 pass # may be partial hex id
776 776 try:
777 777 # str(rev)
778 778 rev = int(id)
779 779 if str(rev) != id: raise ValueError
780 780 if rev < 0: rev = self.count() + rev
781 781 if rev < 0 or rev >= self.count(): raise ValueError
782 782 return self.node(rev)
783 783 except (ValueError, OverflowError):
784 784 pass
785 785 if len(id) == 40:
786 786 try:
787 787 # a full hex nodeid?
788 788 node = bin(id)
789 789 r = self.rev(node)
790 790 return node
791 791 except TypeError:
792 792 pass
793 793
794 794 def _partialmatch(self, id):
795 795 if len(id) < 40:
796 796 try:
797 797 # hex(node)[:...]
798 798 bin_id = bin(id[:len(id) & ~1]) # grab an even number of digits
799 799 node = None
800 800 for n in self.nodemap:
801 801 if n.startswith(bin_id) and hex(n).startswith(id):
802 802 if node is not None:
803 803 raise LookupError(_("Ambiguous identifier"))
804 804 node = n
805 805 if node is not None:
806 806 return node
807 807 except TypeError:
808 808 pass
809 809
810 810 def lookup(self, id):
811 811 """locate a node based on:
812 812 - revision number or str(revision number)
813 813 - nodeid or subset of hex nodeid
814 814 """
815 815
816 816 n = self._match(id)
817 817 if n is not None:
818 818 return n
819 819 n = self._partialmatch(id)
820 820 if n:
821 821 return n
822 822
823 823 raise LookupError(_("No match found"))
824 824
825 825 def cmp(self, node, text):
826 826 """compare text with a given file revision"""
827 827 p1, p2 = self.parents(node)
828 828 return hash(text, p1, p2) != node
829 829
830 830 def diff(self, a, b):
831 831 """return a delta between two revisions"""
832 832 return mdiff.textdiff(a, b)
833 833
834 834 def patches(self, t, pl):
835 835 """apply a list of patches to a string"""
836 836 return mdiff.patches(t, pl)
837 837
838 838 def chunk(self, rev, df=None, cachelen=4096):
839 839 start, length = self.start(rev), self.length(rev)
840 840 inline = self._inline()
841 841 if inline:
842 842 start += (rev + 1) * self._io.size
843 843 end = start + length
844 844 def loadcache(df):
845 845 cache_length = max(cachelen, length) # 4k
846 846 if not df:
847 847 if inline:
848 848 df = self.opener(self.indexfile)
849 849 else:
850 850 df = self.opener(self.datafile)
851 851 df.seek(start)
852 852 self._io.chunkcache = (start, df.read(cache_length))
853 853
854 854 if not self._io.chunkcache:
855 855 loadcache(df)
856 856
857 857 cache_start = self._io.chunkcache[0]
858 858 cache_end = cache_start + len(self._io.chunkcache[1])
859 859 if start >= cache_start and end <= cache_end:
860 860 # it is cached
861 861 offset = start - cache_start
862 862 else:
863 863 loadcache(df)
864 864 offset = 0
865 865
866 866 #def checkchunk():
867 867 # df = self.opener(self.datafile)
868 868 # df.seek(start)
869 869 # return df.read(length)
870 870 #assert s == checkchunk()
871 871 return decompress(self._io.chunkcache[1][offset:offset + length])
872 872
873 873 def delta(self, node):
874 874 """return or calculate a delta between a node and its predecessor"""
875 875 r = self.rev(node)
876 876 return self.revdiff(r - 1, r)
877 877
878 878 def revdiff(self, rev1, rev2):
879 879 """return or calculate a delta between two revisions"""
880 880 b1 = self.base(rev1)
881 881 b2 = self.base(rev2)
882 882 if b1 == b2 and rev1 + 1 == rev2:
883 883 return self.chunk(rev2)
884 884 else:
885 885 return self.diff(self.revision(self.node(rev1)),
886 886 self.revision(self.node(rev2)))
887 887
888 888 def revision(self, node):
889 889 """return an uncompressed revision of a given"""
890 890 if node == nullid: return ""
891 891 if self.cache and self.cache[0] == node: return self.cache[2]
892 892
893 893 # look up what we need to read
894 894 text = None
895 895 rev = self.rev(node)
896 896 base = self.base(rev)
897 897
898 898 if self._inline():
899 899 # we probably have the whole chunk cached
900 900 df = None
901 901 else:
902 902 df = self.opener(self.datafile)
903 903
904 904 # do we have useful data cached?
905 905 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
906 906 base = self.cache[1]
907 907 text = self.cache[2]
908 908 self._loadindex(base, rev + 1)
909 909 else:
910 910 self._loadindex(base, rev + 1)
911 911 text = self.chunk(base, df=df)
912 912
913 913 bins = []
914 914 for r in xrange(base + 1, rev + 1):
915 915 bins.append(self.chunk(r, df=df))
916 916
917 917 text = self.patches(text, bins)
918 918
919 919 p1, p2 = self.parents(node)
920 920 if node != hash(text, p1, p2):
921 921 raise RevlogError(_("integrity check failed on %s:%d")
922 922 % (self.datafile, rev))
923 923
924 924 self.cache = (node, rev, text)
925 925 return text
926 926
927 927 def checkinlinesize(self, tr, fp=None):
928 928 if not self._inline():
929 929 return
930 930 if not fp:
931 931 fp = self.opener(self.indexfile, 'r')
932 932 fp.seek(0, 2)
933 933 size = fp.tell()
934 934 if size < 131072:
935 935 return
936 936 trinfo = tr.find(self.indexfile)
937 937 if trinfo == None:
938 938 raise RevlogError(_("%s not found in the transaction")
939 939 % self.indexfile)
940 940
941 941 trindex = trinfo[2]
942 942 dataoff = self.start(trindex)
943 943
944 944 tr.add(self.datafile, dataoff)
945 945 df = self.opener(self.datafile, 'w')
946 946 calc = self._io.size
947 947 for r in xrange(self.count()):
948 948 start = self.start(r) + (r + 1) * calc
949 949 length = self.length(r)
950 950 fp.seek(start)
951 951 d = fp.read(length)
952 952 df.write(d)
953 953 fp.close()
954 954 df.close()
955 955 fp = self.opener(self.indexfile, 'w', atomictemp=True)
956 956 self.version &= ~(REVLOGNGINLINEDATA)
957 957 if self.count():
958 958 x = self.index[0]
959 959 e = struct.pack(indexformatng, *x)[4:]
960 960 l = struct.pack(versionformat, self.version)
961 961 fp.write(l)
962 962 fp.write(e)
963 963
964 964 for i in xrange(1, self.count()):
965 965 x = self.index[i]
966 966 e = struct.pack(indexformatng, *x)
967 967 fp.write(e)
968 968
969 969 # if we don't call rename, the temp file will never replace the
970 970 # real index
971 971 fp.rename()
972 972
973 973 tr.replace(self.indexfile, trindex * calc)
974 974 self._io.chunkcache = None
975 975
976 976 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
977 977 """add a revision to the log
978 978
979 979 text - the revision data to add
980 980 transaction - the transaction object used for rollback
981 981 link - the linkrev data to add
982 982 p1, p2 - the parent nodeids of the revision
983 983 d - an optional precomputed delta
984 984 """
985 985 if not self._inline():
986 986 dfh = self.opener(self.datafile, "a")
987 987 else:
988 988 dfh = None
989 989 ifh = self.opener(self.indexfile, "a+")
990 990 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
991 991
992 992 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
993 993 if text is None: text = ""
994 994 if p1 is None: p1 = self.tip()
995 995 if p2 is None: p2 = nullid
996 996
997 997 node = hash(text, p1, p2)
998 998
999 999 if node in self.nodemap:
1000 1000 return node
1001 1001
1002 1002 n = self.count()
1003 1003 t = n - 1
1004 1004
1005 1005 if n:
1006 1006 base = self.base(t)
1007 1007 start = self.start(base)
1008 1008 end = self.end(t)
1009 1009 if not d:
1010 1010 prev = self.revision(self.tip())
1011 1011 d = self.diff(prev, text)
1012 1012 data = compress(d)
1013 1013 l = len(data[1]) + len(data[0])
1014 1014 dist = end - start + l
1015 1015
1016 1016 # full versions are inserted when the needed deltas
1017 1017 # become comparable to the uncompressed text
1018 1018 if not n or dist > len(text) * 2:
1019 1019 data = compress(text)
1020 1020 l = len(data[1]) + len(data[0])
1021 1021 base = n
1022 1022 else:
1023 1023 base = self.base(t)
1024 1024
1025 1025 offset = 0
1026 1026 if t >= 0:
1027 1027 offset = self.end(t)
1028 1028
1029 1029 e = (offset_type(offset, 0), l, len(text),
1030 1030 base, link, self.rev(p1), self.rev(p2), node)
1031 1031
1032 1032 self.index.append(e)
1033 1033 self.nodemap[node] = n
1034 1034
1035 1035 if self.version == REVLOGV0:
1036 1036 e = (offset, l, base, link, p1, p2, node)
1037 1037 entry = struct.pack(indexformatv0, *e)
1038 1038 else:
1039 1039 entry = struct.pack(indexformatng, *e)
1040 1040
1041 1041 if not self._inline():
1042 1042 transaction.add(self.datafile, offset)
1043 1043 transaction.add(self.indexfile, n * len(entry))
1044 1044 if data[0]:
1045 1045 dfh.write(data[0])
1046 1046 dfh.write(data[1])
1047 1047 dfh.flush()
1048 1048 else:
1049 1049 ifh.seek(0, 2)
1050 1050 transaction.add(self.indexfile, ifh.tell(), self.count() - 1)
1051 1051
1052 1052 if len(self.index) == 1 and self.version != REVLOGV0:
1053 1053 l = struct.pack(versionformat, self.version)
1054 1054 ifh.write(l)
1055 1055 entry = entry[4:]
1056 1056
1057 1057 ifh.write(entry)
1058 1058
1059 1059 if self._inline():
1060 1060 ifh.write(data[0])
1061 1061 ifh.write(data[1])
1062 1062 self.checkinlinesize(transaction, ifh)
1063 1063
1064 1064 self.cache = (node, n, text)
1065 1065 return node
1066 1066
1067 1067 def ancestor(self, a, b):
1068 1068 """calculate the least common ancestor of nodes a and b"""
1069 1069
1070 1070 def parents(rev):
1071 1071 return [p for p in self.parentrevs(rev) if p != nullrev]
1072 1072
1073 1073 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1074 1074 if c is None:
1075 1075 return nullid
1076 1076
1077 1077 return self.node(c)
1078 1078
1079 1079 def group(self, nodelist, lookup, infocollect=None):
1080 1080 """calculate a delta group
1081 1081
1082 1082 Given a list of changeset revs, return a set of deltas and
1083 1083 metadata corresponding to nodes. the first delta is
1084 1084 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1085 1085 have this parent as it has all history before these
1086 1086 changesets. parent is parent[0]
1087 1087 """
1088 1088 revs = [self.rev(n) for n in nodelist]
1089 1089
1090 1090 # if we don't have any revisions touched by these changesets, bail
1091 1091 if not revs:
1092 1092 yield changegroup.closechunk()
1093 1093 return
1094 1094
1095 1095 # add the parent of the first rev
1096 1096 p = self.parents(self.node(revs[0]))[0]
1097 1097 revs.insert(0, self.rev(p))
1098 1098
1099 1099 # build deltas
1100 1100 for d in xrange(0, len(revs) - 1):
1101 1101 a, b = revs[d], revs[d + 1]
1102 1102 nb = self.node(b)
1103 1103
1104 1104 if infocollect is not None:
1105 1105 infocollect(nb)
1106 1106
1107 1107 d = self.revdiff(a, b)
1108 1108 p = self.parents(nb)
1109 1109 meta = nb + p[0] + p[1] + lookup(nb)
1110 1110 yield changegroup.genchunk("%s%s" % (meta, d))
1111 1111
1112 1112 yield changegroup.closechunk()
1113 1113
1114 1114 def addgroup(self, revs, linkmapper, transaction, unique=0):
1115 1115 """
1116 1116 add a delta group
1117 1117
1118 1118 given a set of deltas, add them to the revision log. the
1119 1119 first delta is against its parent, which should be in our
1120 1120 log, the rest are against the previous delta.
1121 1121 """
1122 1122
1123 1123 #track the base of the current delta log
1124 1124 r = self.count()
1125 1125 t = r - 1
1126 1126 node = None
1127 1127
1128 1128 base = prev = nullrev
1129 1129 start = end = textlen = 0
1130 1130 if r:
1131 1131 end = self.end(t)
1132 1132
1133 1133 ifh = self.opener(self.indexfile, "a+")
1134 1134 ifh.seek(0, 2)
1135 1135 transaction.add(self.indexfile, ifh.tell(), self.count())
1136 1136 if self._inline():
1137 1137 dfh = None
1138 1138 else:
1139 1139 transaction.add(self.datafile, end)
1140 1140 dfh = self.opener(self.datafile, "a")
1141 1141
1142 1142 # loop through our set of deltas
1143 1143 chain = None
1144 1144 for chunk in revs:
1145 1145 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1146 1146 link = linkmapper(cs)
1147 1147 if node in self.nodemap:
1148 1148 # this can happen if two branches make the same change
1149 1149 # if unique:
1150 1150 # raise RevlogError(_("already have %s") % hex(node[:4]))
1151 1151 chain = node
1152 1152 continue
1153 1153 delta = chunk[80:]
1154 1154
1155 1155 for p in (p1, p2):
1156 1156 if not p in self.nodemap:
1157 1157 raise LookupError(_("unknown parent %s") % short(p))
1158 1158
1159 1159 if not chain:
1160 1160 # retrieve the parent revision of the delta chain
1161 1161 chain = p1
1162 1162 if not chain in self.nodemap:
1163 1163 raise LookupError(_("unknown base %s") % short(chain[:4]))
1164 1164
1165 1165 # full versions are inserted when the needed deltas become
1166 1166 # comparable to the uncompressed text or when the previous
1167 1167 # version is not the one we have a delta against. We use
1168 1168 # the size of the previous full rev as a proxy for the
1169 1169 # current size.
1170 1170
1171 1171 if chain == prev:
1172 1172 tempd = compress(delta)
1173 1173 cdelta = tempd[0] + tempd[1]
1174 1174 textlen = mdiff.patchedsize(textlen, delta)
1175 1175
1176 1176 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
1177 1177 # flush our writes here so we can read it in revision
1178 1178 if dfh:
1179 1179 dfh.flush()
1180 1180 ifh.flush()
1181 1181 text = self.revision(chain)
1182 1182 text = self.patches(text, [delta])
1183 1183 chk = self._addrevision(text, transaction, link, p1, p2, None,
1184 1184 ifh, dfh)
1185 1185 if not dfh and not self._inline():
1186 1186 # addrevision switched from inline to conventional
1187 1187 # reopen the index
1188 1188 dfh = self.opener(self.datafile, "a")
1189 1189 ifh = self.opener(self.indexfile, "a")
1190 1190 if chk != node:
1191 1191 raise RevlogError(_("consistency error adding group"))
1192 1192 textlen = len(text)
1193 1193 else:
1194 1194 e = (offset_type(end, 0), len(cdelta), textlen, base,
1195 1195 link, self.rev(p1), self.rev(p2), node)
1196 1196 self.index.append(e)
1197 1197 self.nodemap[node] = r
1198 1198 if self._inline():
1199 1199 ifh.write(struct.pack(indexformatng, *e))
1200 1200 ifh.write(cdelta)
1201 1201 self.checkinlinesize(transaction, ifh)
1202 1202 if not self._inline():
1203 1203 dfh = self.opener(self.datafile, "a")
1204 1204 ifh = self.opener(self.indexfile, "a")
1205 1205 else:
1206 1206 if self.version == REVLOGV0:
1207 1207 e = (end, len(cdelta), base, link, p1, p2, node)
1208 1208 entry = struct.pack(indexformatv0, *e)
1209 1209 else:
1210 1210 entry = struct.pack(indexformatng, *e)
1211 1211 dfh.write(cdelta)
1212 1212 ifh.write(entry)
1213 1213
1214 1214 t, r, chain, prev = r, r + 1, node, node
1215 1215 base = self.base(t)
1216 1216 start = self.start(base)
1217 1217 end = self.end(t)
1218 1218
1219 1219 return node
1220 1220
1221 1221 def strip(self, rev, minlink):
1222 1222 if self.count() == 0 or rev >= self.count():
1223 1223 return
1224 1224
1225 1225 if isinstance(self.index, lazyindex):
1226 1226 self._loadindexmap()
1227 1227
1228 1228 # When stripping away a revision, we need to make sure it
1229 1229 # does not actually belong to an older changeset.
1230 1230 # The minlink parameter defines the oldest revision
1231 1231 # we're allowed to strip away.
1232 while minlink > self.index[rev][-4]:
1232 while minlink > self.index[rev][4]:
1233 1233 rev += 1
1234 1234 if rev >= self.count():
1235 1235 return
1236 1236
1237 1237 # first truncate the files on disk
1238 1238 end = self.start(rev)
1239 1239 if not self._inline():
1240 1240 df = self.opener(self.datafile, "a")
1241 1241 df.truncate(end)
1242 1242 end = rev * self._io.size
1243 1243 else:
1244 1244 end += rev * self._io.size
1245 1245
1246 1246 indexf = self.opener(self.indexfile, "a")
1247 1247 indexf.truncate(end)
1248 1248
1249 1249 # then reset internal state in memory to forget those revisions
1250 1250 self.cache = None
1251 1251 self._io.chunkcache = None
1252 1252 for x in xrange(rev, self.count()):
1253 1253 del self.nodemap[self.node(x)]
1254 1254
1255 1255 del self.index[rev:]
1256 1256
1257 1257 def checksize(self):
1258 1258 expected = 0
1259 1259 if self.count():
1260 1260 expected = self.end(self.count() - 1)
1261 1261
1262 1262 try:
1263 1263 f = self.opener(self.datafile)
1264 1264 f.seek(0, 2)
1265 1265 actual = f.tell()
1266 1266 dd = actual - expected
1267 1267 except IOError, inst:
1268 1268 if inst.errno != errno.ENOENT:
1269 1269 raise
1270 1270 dd = 0
1271 1271
1272 1272 try:
1273 1273 f = self.opener(self.indexfile)
1274 1274 f.seek(0, 2)
1275 1275 actual = f.tell()
1276 1276 s = self._io.size
1277 1277 i = actual / s
1278 1278 di = actual - (i * s)
1279 1279 if self._inline():
1280 1280 databytes = 0
1281 1281 for r in xrange(self.count()):
1282 1282 databytes += self.length(r)
1283 1283 dd = 0
1284 1284 di = actual - self.count() * s - databytes
1285 1285 except IOError, inst:
1286 1286 if inst.errno != errno.ENOENT:
1287 1287 raise
1288 1288 di = 0
1289 1289
1290 1290 return (dd, di)
1291 1291
1292 1292
General Comments 0
You need to be logged in to leave comments. Login now