##// END OF EJS Templates
lookup should allow -1 to represent nullid (if passed an int as arg)
Benoit Boissinot -
r2641:156fb1fe default
parent child Browse files
Show More
@@ -1,1286 +1,1283 b''
1 1 """
2 2 revlog.py - storage back-end for mercurial
3 3
4 4 This provides efficient delta storage with O(1) retrieve and append
5 5 and O(changes) merge between branches
6 6
7 7 Copyright 2005 Matt Mackall <mpm@selenic.com>
8 8
9 9 This software may be used and distributed according to the terms
10 10 of the GNU General Public License, incorporated herein by reference.
11 11 """
12 12
13 13 from node import *
14 14 from i18n import gettext as _
15 15 from demandload import demandload
16 16 demandload(globals(), "binascii changegroup errno heapq mdiff os")
17 17 demandload(globals(), "sha struct util zlib")
18 18
19 19 # revlog version strings
20 20 REVLOGV0 = 0
21 21 REVLOGNG = 1
22 22
23 23 # revlog flags
24 24 REVLOGNGINLINEDATA = (1 << 16)
25 25 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
26 26
27 27 REVLOG_DEFAULT_FORMAT = REVLOGNG
28 28 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
29 29
30 30 def flagstr(flag):
31 31 if flag == "inline":
32 32 return REVLOGNGINLINEDATA
33 33 raise RevlogError(_("unknown revlog flag %s" % flag))
34 34
35 35 def hash(text, p1, p2):
36 36 """generate a hash from the given text and its parent hashes
37 37
38 38 This hash combines both the current file contents and its history
39 39 in a manner that makes it easy to distinguish nodes with the same
40 40 content in the revision graph.
41 41 """
42 42 l = [p1, p2]
43 43 l.sort()
44 44 s = sha.new(l[0])
45 45 s.update(l[1])
46 46 s.update(text)
47 47 return s.digest()
48 48
49 49 def compress(text):
50 50 """ generate a possibly-compressed representation of text """
51 51 if not text: return ("", text)
52 52 if len(text) < 44:
53 53 if text[0] == '\0': return ("", text)
54 54 return ('u', text)
55 55 bin = zlib.compress(text)
56 56 if len(bin) > len(text):
57 57 if text[0] == '\0': return ("", text)
58 58 return ('u', text)
59 59 return ("", bin)
60 60
61 61 def decompress(bin):
62 62 """ decompress the given input """
63 63 if not bin: return bin
64 64 t = bin[0]
65 65 if t == '\0': return bin
66 66 if t == 'x': return zlib.decompress(bin)
67 67 if t == 'u': return bin[1:]
68 68 raise RevlogError(_("unknown compression type %r") % t)
69 69
70 70 indexformatv0 = ">4l20s20s20s"
71 71 v0shaoffset = 56
72 72 # index ng:
73 73 # 6 bytes offset
74 74 # 2 bytes flags
75 75 # 4 bytes compressed length
76 76 # 4 bytes uncompressed length
77 77 # 4 bytes: base rev
78 78 # 4 bytes link rev
79 79 # 4 bytes parent 1 rev
80 80 # 4 bytes parent 2 rev
81 81 # 32 bytes: nodeid
82 82 indexformatng = ">Qiiiiii20s12x"
83 83 ngshaoffset = 32
84 84 versionformat = ">i"
85 85
86 86 class lazyparser(object):
87 87 """
88 88 this class avoids the need to parse the entirety of large indices
89 89 """
90 90
91 91 # lazyparser is not safe to use on windows if win32 extensions not
92 92 # available. it keeps file handle open, which make it not possible
93 93 # to break hardlinks on local cloned repos.
94 94 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
95 95 hasattr(util, 'win32api'))
96 96
97 97 def __init__(self, dataf, size, indexformat, shaoffset):
98 98 self.dataf = dataf
99 99 self.format = indexformat
100 100 self.s = struct.calcsize(indexformat)
101 101 self.indexformat = indexformat
102 102 self.datasize = size
103 103 self.l = size/self.s
104 104 self.index = [None] * self.l
105 105 self.map = {nullid: -1}
106 106 self.allmap = 0
107 107 self.all = 0
108 108 self.mapfind_count = 0
109 109 self.shaoffset = shaoffset
110 110
111 111 def loadmap(self):
112 112 """
113 113 during a commit, we need to make sure the rev being added is
114 114 not a duplicate. This requires loading the entire index,
115 115 which is fairly slow. loadmap can load up just the node map,
116 116 which takes much less time.
117 117 """
118 118 if self.allmap: return
119 119 start = 0
120 120 end = self.datasize
121 121 self.allmap = 1
122 122 cur = 0
123 123 count = 0
124 124 blocksize = self.s * 256
125 125 self.dataf.seek(0)
126 126 while cur < end:
127 127 data = self.dataf.read(blocksize)
128 128 off = 0
129 129 for x in xrange(256):
130 130 n = data[off + self.shaoffset:off + self.shaoffset + 20]
131 131 self.map[n] = count
132 132 count += 1
133 133 if count >= self.l:
134 134 break
135 135 off += self.s
136 136 cur += blocksize
137 137
138 138 def loadblock(self, blockstart, blocksize, data=None):
139 139 if self.all: return
140 140 if data is None:
141 141 self.dataf.seek(blockstart)
142 142 data = self.dataf.read(blocksize)
143 143 lend = len(data) / self.s
144 144 i = blockstart / self.s
145 145 off = 0
146 146 for x in xrange(lend):
147 147 if self.index[i + x] == None:
148 148 b = data[off : off + self.s]
149 149 self.index[i + x] = b
150 150 n = b[self.shaoffset:self.shaoffset + 20]
151 151 self.map[n] = i + x
152 152 off += self.s
153 153
154 154 def findnode(self, node):
155 155 """search backwards through the index file for a specific node"""
156 156 if self.allmap: return None
157 157
158 158 # hg log will cause many many searches for the manifest
159 159 # nodes. After we get called a few times, just load the whole
160 160 # thing.
161 161 if self.mapfind_count > 8:
162 162 self.loadmap()
163 163 if node in self.map:
164 164 return node
165 165 return None
166 166 self.mapfind_count += 1
167 167 last = self.l - 1
168 168 while self.index[last] != None:
169 169 if last == 0:
170 170 self.all = 1
171 171 self.allmap = 1
172 172 return None
173 173 last -= 1
174 174 end = (last + 1) * self.s
175 175 blocksize = self.s * 256
176 176 while end >= 0:
177 177 start = max(end - blocksize, 0)
178 178 self.dataf.seek(start)
179 179 data = self.dataf.read(end - start)
180 180 findend = end - start
181 181 while True:
182 182 # we're searching backwards, so weh have to make sure
183 183 # we don't find a changeset where this node is a parent
184 184 off = data.rfind(node, 0, findend)
185 185 findend = off
186 186 if off >= 0:
187 187 i = off / self.s
188 188 off = i * self.s
189 189 n = data[off + self.shaoffset:off + self.shaoffset + 20]
190 190 if n == node:
191 191 self.map[n] = i + start / self.s
192 192 return node
193 193 else:
194 194 break
195 195 end -= blocksize
196 196 return None
197 197
198 198 def loadindex(self, i=None, end=None):
199 199 if self.all: return
200 200 all = False
201 201 if i == None:
202 202 blockstart = 0
203 203 blocksize = (512 / self.s) * self.s
204 204 end = self.datasize
205 205 all = True
206 206 else:
207 207 if end:
208 208 blockstart = i * self.s
209 209 end = end * self.s
210 210 blocksize = end - blockstart
211 211 else:
212 212 blockstart = (i & ~(32)) * self.s
213 213 blocksize = self.s * 64
214 214 end = blockstart + blocksize
215 215 while blockstart < end:
216 216 self.loadblock(blockstart, blocksize)
217 217 blockstart += blocksize
218 218 if all: self.all = True
219 219
220 220 class lazyindex(object):
221 221 """a lazy version of the index array"""
222 222 def __init__(self, parser):
223 223 self.p = parser
224 224 def __len__(self):
225 225 return len(self.p.index)
226 226 def load(self, pos):
227 227 if pos < 0:
228 228 pos += len(self.p.index)
229 229 self.p.loadindex(pos)
230 230 return self.p.index[pos]
231 231 def __getitem__(self, pos):
232 232 ret = self.p.index[pos] or self.load(pos)
233 233 if isinstance(ret, str):
234 234 ret = struct.unpack(self.p.indexformat, ret)
235 235 return ret
236 236 def __setitem__(self, pos, item):
237 237 self.p.index[pos] = item
238 238 def __delitem__(self, pos):
239 239 del self.p.index[pos]
240 240 def append(self, e):
241 241 self.p.index.append(e)
242 242
243 243 class lazymap(object):
244 244 """a lazy version of the node map"""
245 245 def __init__(self, parser):
246 246 self.p = parser
247 247 def load(self, key):
248 248 n = self.p.findnode(key)
249 249 if n == None:
250 250 raise KeyError(key)
251 251 def __contains__(self, key):
252 252 if key in self.p.map:
253 253 return True
254 254 self.p.loadmap()
255 255 return key in self.p.map
256 256 def __iter__(self):
257 257 yield nullid
258 258 for i in xrange(self.p.l):
259 259 ret = self.p.index[i]
260 260 if not ret:
261 261 self.p.loadindex(i)
262 262 ret = self.p.index[i]
263 263 if isinstance(ret, str):
264 264 ret = struct.unpack(self.p.indexformat, ret)
265 265 yield ret[-1]
266 266 def __getitem__(self, key):
267 267 try:
268 268 return self.p.map[key]
269 269 except KeyError:
270 270 try:
271 271 self.load(key)
272 272 return self.p.map[key]
273 273 except KeyError:
274 274 raise KeyError("node " + hex(key))
275 275 def __setitem__(self, key, val):
276 276 self.p.map[key] = val
277 277 def __delitem__(self, key):
278 278 del self.p.map[key]
279 279
280 280 class RevlogError(Exception): pass
281 281
282 282 class revlog(object):
283 283 """
284 284 the underlying revision storage object
285 285
286 286 A revlog consists of two parts, an index and the revision data.
287 287
288 288 The index is a file with a fixed record size containing
289 289 information on each revision, includings its nodeid (hash), the
290 290 nodeids of its parents, the position and offset of its data within
291 291 the data file, and the revision it's based on. Finally, each entry
292 292 contains a linkrev entry that can serve as a pointer to external
293 293 data.
294 294
295 295 The revision data itself is a linear collection of data chunks.
296 296 Each chunk represents a revision and is usually represented as a
297 297 delta against the previous chunk. To bound lookup time, runs of
298 298 deltas are limited to about 2 times the length of the original
299 299 version data. This makes retrieval of a version proportional to
300 300 its size, or O(1) relative to the number of revisions.
301 301
302 302 Both pieces of the revlog are written to in an append-only
303 303 fashion, which means we never need to rewrite a file to insert or
304 304 remove data, and can use some simple techniques to avoid the need
305 305 for locking while reading.
306 306 """
307 307 def __init__(self, opener, indexfile, datafile,
308 308 defversion=REVLOG_DEFAULT_VERSION):
309 309 """
310 310 create a revlog object
311 311
312 312 opener is a function that abstracts the file opening operation
313 313 and can be used to implement COW semantics or the like.
314 314 """
315 315 self.indexfile = indexfile
316 316 self.datafile = datafile
317 317 self.opener = opener
318 318
319 319 self.indexstat = None
320 320 self.cache = None
321 321 self.chunkcache = None
322 322 self.defversion = defversion
323 323 self.load()
324 324
325 325 def load(self):
326 326 v = self.defversion
327 327 try:
328 328 f = self.opener(self.indexfile)
329 329 i = f.read(4)
330 330 f.seek(0)
331 331 except IOError, inst:
332 332 if inst.errno != errno.ENOENT:
333 333 raise
334 334 i = ""
335 335 else:
336 336 try:
337 337 st = util.fstat(f)
338 338 except AttributeError, inst:
339 339 st = None
340 340 else:
341 341 oldst = self.indexstat
342 342 if (oldst and st.st_dev == oldst.st_dev
343 343 and st.st_ino == oldst.st_ino
344 344 and st.st_mtime == oldst.st_mtime
345 345 and st.st_ctime == oldst.st_ctime):
346 346 return
347 347 self.indexstat = st
348 348 if len(i) > 0:
349 349 v = struct.unpack(versionformat, i)[0]
350 350 flags = v & ~0xFFFF
351 351 fmt = v & 0xFFFF
352 352 if fmt == REVLOGV0:
353 353 if flags:
354 354 raise RevlogError(_("index %s invalid flags %x for format v0" %
355 355 (self.indexfile, flags)))
356 356 elif fmt == REVLOGNG:
357 357 if flags & ~REVLOGNGINLINEDATA:
358 358 raise RevlogError(_("index %s invalid flags %x for revlogng" %
359 359 (self.indexfile, flags)))
360 360 else:
361 361 raise RevlogError(_("index %s invalid format %d" %
362 362 (self.indexfile, fmt)))
363 363 self.version = v
364 364 if v == REVLOGV0:
365 365 self.indexformat = indexformatv0
366 366 shaoffset = v0shaoffset
367 367 else:
368 368 self.indexformat = indexformatng
369 369 shaoffset = ngshaoffset
370 370
371 371 if i:
372 372 if (lazyparser.safe_to_use and not self.inlinedata() and
373 373 st and st.st_size > 10000):
374 374 # big index, let's parse it on demand
375 375 parser = lazyparser(f, st.st_size, self.indexformat, shaoffset)
376 376 self.index = lazyindex(parser)
377 377 self.nodemap = lazymap(parser)
378 378 else:
379 379 self.parseindex(f, st)
380 380 if self.version != REVLOGV0:
381 381 e = list(self.index[0])
382 382 type = self.ngtype(e[0])
383 383 e[0] = self.offset_type(0, type)
384 384 self.index[0] = e
385 385 else:
386 386 self.nodemap = { nullid: -1}
387 387 self.index = []
388 388
389 389
390 390 def parseindex(self, fp, st):
391 391 s = struct.calcsize(self.indexformat)
392 392 self.index = []
393 393 self.nodemap = {nullid: -1}
394 394 inline = self.inlinedata()
395 395 n = 0
396 396 leftover = None
397 397 while True:
398 398 if st:
399 399 data = fp.read(65536)
400 400 else:
401 401 # hack for httprangereader, it doesn't do partial reads well
402 402 data = fp.read()
403 403 if not data:
404 404 break
405 405 if n == 0 and self.inlinedata():
406 406 # cache the first chunk
407 407 self.chunkcache = (0, data)
408 408 if leftover:
409 409 data = leftover + data
410 410 leftover = None
411 411 off = 0
412 412 l = len(data)
413 413 while off < l:
414 414 if l - off < s:
415 415 leftover = data[off:]
416 416 break
417 417 cur = data[off:off + s]
418 418 off += s
419 419 e = struct.unpack(self.indexformat, cur)
420 420 self.index.append(e)
421 421 self.nodemap[e[-1]] = n
422 422 n += 1
423 423 if inline:
424 424 off += e[1]
425 425 if off > l:
426 426 # some things don't seek well, just read it
427 427 fp.read(off - l)
428 428 if not st:
429 429 break
430 430
431 431
432 432 def ngoffset(self, q):
433 433 if q & 0xFFFF:
434 434 raise RevlogError(_('%s: incompatible revision flag %x') %
435 435 (self.indexfile, q))
436 436 return long(q >> 16)
437 437
438 438 def ngtype(self, q):
439 439 return int(q & 0xFFFF)
440 440
441 441 def offset_type(self, offset, type):
442 442 return long(long(offset) << 16 | type)
443 443
444 444 def loadindex(self, start, end):
445 445 """load a block of indexes all at once from the lazy parser"""
446 446 if isinstance(self.index, lazyindex):
447 447 self.index.p.loadindex(start, end)
448 448
449 449 def loadindexmap(self):
450 450 """loads both the map and the index from the lazy parser"""
451 451 if isinstance(self.index, lazyindex):
452 452 p = self.index.p
453 453 p.loadindex()
454 454 self.nodemap = p.map
455 455
456 456 def loadmap(self):
457 457 """loads the map from the lazy parser"""
458 458 if isinstance(self.nodemap, lazymap):
459 459 self.nodemap.p.loadmap()
460 460 self.nodemap = self.nodemap.p.map
461 461
462 462 def inlinedata(self): return self.version & REVLOGNGINLINEDATA
463 463 def tip(self): return self.node(len(self.index) - 1)
464 464 def count(self): return len(self.index)
465 465 def node(self, rev):
466 466 return (rev < 0) and nullid or self.index[rev][-1]
467 467 def rev(self, node):
468 468 try:
469 469 return self.nodemap[node]
470 470 except KeyError:
471 471 raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))
472 472 def linkrev(self, node): return self.index[self.rev(node)][-4]
473 473 def parents(self, node):
474 474 if node == nullid: return (nullid, nullid)
475 475 r = self.rev(node)
476 476 d = self.index[r][-3:-1]
477 477 if self.version == REVLOGV0:
478 478 return d
479 479 return [ self.node(x) for x in d ]
480 480 def parentrevs(self, rev):
481 481 if rev == -1:
482 482 return (-1, -1)
483 483 d = self.index[rev][-3:-1]
484 484 if self.version == REVLOGV0:
485 485 return [ self.rev(x) for x in d ]
486 486 return d
487 487 def start(self, rev):
488 488 if rev < 0:
489 489 return -1
490 490 if self.version != REVLOGV0:
491 491 return self.ngoffset(self.index[rev][0])
492 492 return self.index[rev][0]
493 493
494 494 def end(self, rev): return self.start(rev) + self.length(rev)
495 495
496 496 def size(self, rev):
497 497 """return the length of the uncompressed text for a given revision"""
498 498 l = -1
499 499 if self.version != REVLOGV0:
500 500 l = self.index[rev][2]
501 501 if l >= 0:
502 502 return l
503 503
504 504 t = self.revision(self.node(rev))
505 505 return len(t)
506 506
507 507 # alternate implementation, The advantage to this code is it
508 508 # will be faster for a single revision. But, the results are not
509 509 # cached, so finding the size of every revision will be slower.
510 510 """
511 511 if self.cache and self.cache[1] == rev:
512 512 return len(self.cache[2])
513 513
514 514 base = self.base(rev)
515 515 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
516 516 base = self.cache[1]
517 517 text = self.cache[2]
518 518 else:
519 519 text = self.revision(self.node(base))
520 520
521 521 l = len(text)
522 522 for x in xrange(base + 1, rev + 1):
523 523 l = mdiff.patchedsize(l, self.chunk(x))
524 524 return l
525 525 """
526 526
527 527 def length(self, rev):
528 528 if rev < 0:
529 529 return 0
530 530 else:
531 531 return self.index[rev][1]
532 532 def base(self, rev): return (rev < 0) and rev or self.index[rev][-5]
533 533
534 534 def reachable(self, rev, stop=None):
535 535 reachable = {}
536 536 visit = [rev]
537 537 reachable[rev] = 1
538 538 if stop:
539 539 stopn = self.rev(stop)
540 540 else:
541 541 stopn = 0
542 542 while visit:
543 543 n = visit.pop(0)
544 544 if n == stop:
545 545 continue
546 546 if n == nullid:
547 547 continue
548 548 for p in self.parents(n):
549 549 if self.rev(p) < stopn:
550 550 continue
551 551 if p not in reachable:
552 552 reachable[p] = 1
553 553 visit.append(p)
554 554 return reachable
555 555
556 556 def nodesbetween(self, roots=None, heads=None):
557 557 """Return a tuple containing three elements. Elements 1 and 2 contain
558 558 a final list bases and heads after all the unreachable ones have been
559 559 pruned. Element 0 contains a topologically sorted list of all
560 560
561 561 nodes that satisfy these constraints:
562 562 1. All nodes must be descended from a node in roots (the nodes on
563 563 roots are considered descended from themselves).
564 564 2. All nodes must also be ancestors of a node in heads (the nodes in
565 565 heads are considered to be their own ancestors).
566 566
567 567 If roots is unspecified, nullid is assumed as the only root.
568 568 If heads is unspecified, it is taken to be the output of the
569 569 heads method (i.e. a list of all nodes in the repository that
570 570 have no children)."""
571 571 nonodes = ([], [], [])
572 572 if roots is not None:
573 573 roots = list(roots)
574 574 if not roots:
575 575 return nonodes
576 576 lowestrev = min([self.rev(n) for n in roots])
577 577 else:
578 578 roots = [nullid] # Everybody's a descendent of nullid
579 579 lowestrev = -1
580 580 if (lowestrev == -1) and (heads is None):
581 581 # We want _all_ the nodes!
582 582 return ([self.node(r) for r in xrange(0, self.count())],
583 583 [nullid], list(self.heads()))
584 584 if heads is None:
585 585 # All nodes are ancestors, so the latest ancestor is the last
586 586 # node.
587 587 highestrev = self.count() - 1
588 588 # Set ancestors to None to signal that every node is an ancestor.
589 589 ancestors = None
590 590 # Set heads to an empty dictionary for later discovery of heads
591 591 heads = {}
592 592 else:
593 593 heads = list(heads)
594 594 if not heads:
595 595 return nonodes
596 596 ancestors = {}
597 597 # Start at the top and keep marking parents until we're done.
598 598 nodestotag = heads[:]
599 599 # Turn heads into a dictionary so we can remove 'fake' heads.
600 600 # Also, later we will be using it to filter out the heads we can't
601 601 # find from roots.
602 602 heads = dict.fromkeys(heads, 0)
603 603 # Remember where the top was so we can use it as a limit later.
604 604 highestrev = max([self.rev(n) for n in nodestotag])
605 605 while nodestotag:
606 606 # grab a node to tag
607 607 n = nodestotag.pop()
608 608 # Never tag nullid
609 609 if n == nullid:
610 610 continue
611 611 # A node's revision number represents its place in a
612 612 # topologically sorted list of nodes.
613 613 r = self.rev(n)
614 614 if r >= lowestrev:
615 615 if n not in ancestors:
616 616 # If we are possibly a descendent of one of the roots
617 617 # and we haven't already been marked as an ancestor
618 618 ancestors[n] = 1 # Mark as ancestor
619 619 # Add non-nullid parents to list of nodes to tag.
620 620 nodestotag.extend([p for p in self.parents(n) if
621 621 p != nullid])
622 622 elif n in heads: # We've seen it before, is it a fake head?
623 623 # So it is, real heads should not be the ancestors of
624 624 # any other heads.
625 625 heads.pop(n)
626 626 if not ancestors:
627 627 return nonodes
628 628 # Now that we have our set of ancestors, we want to remove any
629 629 # roots that are not ancestors.
630 630
631 631 # If one of the roots was nullid, everything is included anyway.
632 632 if lowestrev > -1:
633 633 # But, since we weren't, let's recompute the lowest rev to not
634 634 # include roots that aren't ancestors.
635 635
636 636 # Filter out roots that aren't ancestors of heads
637 637 roots = [n for n in roots if n in ancestors]
638 638 # Recompute the lowest revision
639 639 if roots:
640 640 lowestrev = min([self.rev(n) for n in roots])
641 641 else:
642 642 # No more roots? Return empty list
643 643 return nonodes
644 644 else:
645 645 # We are descending from nullid, and don't need to care about
646 646 # any other roots.
647 647 lowestrev = -1
648 648 roots = [nullid]
649 649 # Transform our roots list into a 'set' (i.e. a dictionary where the
650 650 # values don't matter.
651 651 descendents = dict.fromkeys(roots, 1)
652 652 # Also, keep the original roots so we can filter out roots that aren't
653 653 # 'real' roots (i.e. are descended from other roots).
654 654 roots = descendents.copy()
655 655 # Our topologically sorted list of output nodes.
656 656 orderedout = []
657 657 # Don't start at nullid since we don't want nullid in our output list,
658 658 # and if nullid shows up in descedents, empty parents will look like
659 659 # they're descendents.
660 660 for r in xrange(max(lowestrev, 0), highestrev + 1):
661 661 n = self.node(r)
662 662 isdescendent = False
663 663 if lowestrev == -1: # Everybody is a descendent of nullid
664 664 isdescendent = True
665 665 elif n in descendents:
666 666 # n is already a descendent
667 667 isdescendent = True
668 668 # This check only needs to be done here because all the roots
669 669 # will start being marked is descendents before the loop.
670 670 if n in roots:
671 671 # If n was a root, check if it's a 'real' root.
672 672 p = tuple(self.parents(n))
673 673 # If any of its parents are descendents, it's not a root.
674 674 if (p[0] in descendents) or (p[1] in descendents):
675 675 roots.pop(n)
676 676 else:
677 677 p = tuple(self.parents(n))
678 678 # A node is a descendent if either of its parents are
679 679 # descendents. (We seeded the dependents list with the roots
680 680 # up there, remember?)
681 681 if (p[0] in descendents) or (p[1] in descendents):
682 682 descendents[n] = 1
683 683 isdescendent = True
684 684 if isdescendent and ((ancestors is None) or (n in ancestors)):
685 685 # Only include nodes that are both descendents and ancestors.
686 686 orderedout.append(n)
687 687 if (ancestors is not None) and (n in heads):
688 688 # We're trying to figure out which heads are reachable
689 689 # from roots.
690 690 # Mark this head as having been reached
691 691 heads[n] = 1
692 692 elif ancestors is None:
693 693 # Otherwise, we're trying to discover the heads.
694 694 # Assume this is a head because if it isn't, the next step
695 695 # will eventually remove it.
696 696 heads[n] = 1
697 697 # But, obviously its parents aren't.
698 698 for p in self.parents(n):
699 699 heads.pop(p, None)
700 700 heads = [n for n in heads.iterkeys() if heads[n] != 0]
701 701 roots = roots.keys()
702 702 assert orderedout
703 703 assert roots
704 704 assert heads
705 705 return (orderedout, roots, heads)
706 706
707 707 def heads(self, start=None):
708 708 """return the list of all nodes that have no children
709 709
710 710 if start is specified, only heads that are descendants of
711 711 start will be returned
712 712
713 713 """
714 714 if start is None:
715 715 start = nullid
716 716 startrev = self.rev(start)
717 717 reachable = {startrev: 1}
718 718 heads = {startrev: 1}
719 719
720 720 parentrevs = self.parentrevs
721 721 for r in xrange(startrev + 1, self.count()):
722 722 for p in parentrevs(r):
723 723 if p in reachable:
724 724 reachable[r] = 1
725 725 heads[r] = 1
726 726 if p in heads:
727 727 del heads[p]
728 728 return [self.node(r) for r in heads]
729 729
730 730 def children(self, node):
731 731 """find the children of a given node"""
732 732 c = []
733 733 p = self.rev(node)
734 734 for r in range(p + 1, self.count()):
735 735 n = self.node(r)
736 736 for pn in self.parents(n):
737 737 if pn == node:
738 738 c.append(n)
739 739 continue
740 740 elif pn == nullid:
741 741 continue
742 742 return c
743 743
744 744 def lookup(self, id):
745 745 """locate a node based on revision number or subset of hex nodeid"""
746 746 if id in self.nodemap:
747 747 return id
748 748 if type(id) == type(0):
749 rev = id
750 if rev < 0: rev = self.count() + rev
751 if rev < 0 or rev >= self.count(): return None
752 return self.node(rev)
749 return self.node(id)
753 750 try:
754 751 rev = int(id)
755 752 if str(rev) != id: raise ValueError
756 753 if rev < 0: rev = self.count() + rev
757 754 if rev < 0 or rev >= self.count(): raise ValueError
758 755 return self.node(rev)
759 756 except (ValueError, OverflowError):
760 757 c = []
761 758 for n in self.nodemap:
762 759 if hex(n).startswith(id):
763 760 c.append(n)
764 761 if len(c) > 1: raise RevlogError(_("Ambiguous identifier"))
765 762 if len(c) < 1: raise RevlogError(_("No match found"))
766 763 return c[0]
767 764
768 765 return None
769 766
770 767 def diff(self, a, b):
771 768 """return a delta between two revisions"""
772 769 return mdiff.textdiff(a, b)
773 770
774 771 def patches(self, t, pl):
775 772 """apply a list of patches to a string"""
776 773 return mdiff.patches(t, pl)
777 774
778 775 def chunk(self, rev, df=None, cachelen=4096):
779 776 start, length = self.start(rev), self.length(rev)
780 777 inline = self.inlinedata()
781 778 if inline:
782 779 start += (rev + 1) * struct.calcsize(self.indexformat)
783 780 end = start + length
784 781 def loadcache(df):
785 782 cache_length = max(cachelen, length) # 4k
786 783 if not df:
787 784 if inline:
788 785 df = self.opener(self.indexfile)
789 786 else:
790 787 df = self.opener(self.datafile)
791 788 df.seek(start)
792 789 self.chunkcache = (start, df.read(cache_length))
793 790
794 791 if not self.chunkcache:
795 792 loadcache(df)
796 793
797 794 cache_start = self.chunkcache[0]
798 795 cache_end = cache_start + len(self.chunkcache[1])
799 796 if start >= cache_start and end <= cache_end:
800 797 # it is cached
801 798 offset = start - cache_start
802 799 else:
803 800 loadcache(df)
804 801 offset = 0
805 802
806 803 #def checkchunk():
807 804 # df = self.opener(self.datafile)
808 805 # df.seek(start)
809 806 # return df.read(length)
810 807 #assert s == checkchunk()
811 808 return decompress(self.chunkcache[1][offset:offset + length])
812 809
813 810 def delta(self, node):
814 811 """return or calculate a delta between a node and its predecessor"""
815 812 r = self.rev(node)
816 813 return self.revdiff(r - 1, r)
817 814
818 815 def revdiff(self, rev1, rev2):
819 816 """return or calculate a delta between two revisions"""
820 817 b1 = self.base(rev1)
821 818 b2 = self.base(rev2)
822 819 if b1 == b2 and rev1 + 1 == rev2:
823 820 return self.chunk(rev2)
824 821 else:
825 822 return self.diff(self.revision(self.node(rev1)),
826 823 self.revision(self.node(rev2)))
827 824
828 825 def revision(self, node):
829 826 """return an uncompressed revision of a given"""
830 827 if node == nullid: return ""
831 828 if self.cache and self.cache[0] == node: return self.cache[2]
832 829
833 830 # look up what we need to read
834 831 text = None
835 832 rev = self.rev(node)
836 833 base = self.base(rev)
837 834
838 835 if self.inlinedata():
839 836 # we probably have the whole chunk cached
840 837 df = None
841 838 else:
842 839 df = self.opener(self.datafile)
843 840
844 841 # do we have useful data cached?
845 842 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
846 843 base = self.cache[1]
847 844 text = self.cache[2]
848 845 self.loadindex(base, rev + 1)
849 846 else:
850 847 self.loadindex(base, rev + 1)
851 848 text = self.chunk(base, df=df)
852 849
853 850 bins = []
854 851 for r in xrange(base + 1, rev + 1):
855 852 bins.append(self.chunk(r, df=df))
856 853
857 854 text = self.patches(text, bins)
858 855
859 856 p1, p2 = self.parents(node)
860 857 if node != hash(text, p1, p2):
861 858 raise RevlogError(_("integrity check failed on %s:%d")
862 859 % (self.datafile, rev))
863 860
864 861 self.cache = (node, rev, text)
865 862 return text
866 863
867 864 def checkinlinesize(self, tr, fp=None):
868 865 if not self.inlinedata():
869 866 return
870 867 if not fp:
871 868 fp = self.opener(self.indexfile, 'r')
872 869 fp.seek(0, 2)
873 870 size = fp.tell()
874 871 if size < 131072:
875 872 return
876 873 trinfo = tr.find(self.indexfile)
877 874 if trinfo == None:
878 875 raise RevlogError(_("%s not found in the transaction" %
879 876 self.indexfile))
880 877
881 878 trindex = trinfo[2]
882 879 dataoff = self.start(trindex)
883 880
884 881 tr.add(self.datafile, dataoff)
885 882 df = self.opener(self.datafile, 'w')
886 883 calc = struct.calcsize(self.indexformat)
887 884 for r in xrange(self.count()):
888 885 start = self.start(r) + (r + 1) * calc
889 886 length = self.length(r)
890 887 fp.seek(start)
891 888 d = fp.read(length)
892 889 df.write(d)
893 890 fp.close()
894 891 df.close()
895 892 fp = self.opener(self.indexfile, 'w', atomictemp=True)
896 893 self.version &= ~(REVLOGNGINLINEDATA)
897 894 if self.count():
898 895 x = self.index[0]
899 896 e = struct.pack(self.indexformat, *x)[4:]
900 897 l = struct.pack(versionformat, self.version)
901 898 fp.write(l)
902 899 fp.write(e)
903 900
904 901 for i in xrange(1, self.count()):
905 902 x = self.index[i]
906 903 e = struct.pack(self.indexformat, *x)
907 904 fp.write(e)
908 905
909 906 # if we don't call rename, the temp file will never replace the
910 907 # real index
911 908 fp.rename()
912 909
913 910 tr.replace(self.indexfile, trindex * calc)
914 911 self.chunkcache = None
915 912
916 913 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
917 914 """add a revision to the log
918 915
919 916 text - the revision data to add
920 917 transaction - the transaction object used for rollback
921 918 link - the linkrev data to add
922 919 p1, p2 - the parent nodeids of the revision
923 920 d - an optional precomputed delta
924 921 """
925 922 if text is None: text = ""
926 923 if p1 is None: p1 = self.tip()
927 924 if p2 is None: p2 = nullid
928 925
929 926 node = hash(text, p1, p2)
930 927
931 928 if node in self.nodemap:
932 929 return node
933 930
934 931 n = self.count()
935 932 t = n - 1
936 933
937 934 if n:
938 935 base = self.base(t)
939 936 start = self.start(base)
940 937 end = self.end(t)
941 938 if not d:
942 939 prev = self.revision(self.tip())
943 940 d = self.diff(prev, str(text))
944 941 data = compress(d)
945 942 l = len(data[1]) + len(data[0])
946 943 dist = end - start + l
947 944
948 945 # full versions are inserted when the needed deltas
949 946 # become comparable to the uncompressed text
950 947 if not n or dist > len(text) * 2:
951 948 data = compress(text)
952 949 l = len(data[1]) + len(data[0])
953 950 base = n
954 951 else:
955 952 base = self.base(t)
956 953
957 954 offset = 0
958 955 if t >= 0:
959 956 offset = self.end(t)
960 957
961 958 if self.version == REVLOGV0:
962 959 e = (offset, l, base, link, p1, p2, node)
963 960 else:
964 961 e = (self.offset_type(offset, 0), l, len(text),
965 962 base, link, self.rev(p1), self.rev(p2), node)
966 963
967 964 self.index.append(e)
968 965 self.nodemap[node] = n
969 966 entry = struct.pack(self.indexformat, *e)
970 967
971 968 if not self.inlinedata():
972 969 transaction.add(self.datafile, offset)
973 970 transaction.add(self.indexfile, n * len(entry))
974 971 f = self.opener(self.datafile, "a")
975 972 if data[0]:
976 973 f.write(data[0])
977 974 f.write(data[1])
978 975 f.close()
979 976 f = self.opener(self.indexfile, "a")
980 977 else:
981 978 f = self.opener(self.indexfile, "a+")
982 979 f.seek(0, 2)
983 980 transaction.add(self.indexfile, f.tell(), self.count() - 1)
984 981
985 982 if len(self.index) == 1 and self.version != REVLOGV0:
986 983 l = struct.pack(versionformat, self.version)
987 984 f.write(l)
988 985 entry = entry[4:]
989 986
990 987 f.write(entry)
991 988
992 989 if self.inlinedata():
993 990 f.write(data[0])
994 991 f.write(data[1])
995 992 self.checkinlinesize(transaction, f)
996 993
997 994 self.cache = (node, n, text)
998 995 return node
999 996
1000 997 def ancestor(self, a, b):
1001 998 """calculate the least common ancestor of nodes a and b"""
1002 999
1003 1000 # start with some short cuts for the linear cases
1004 1001 if a == b:
1005 1002 return a
1006 1003 ra = self.rev(a)
1007 1004 rb = self.rev(b)
1008 1005 if ra < rb:
1009 1006 last = b
1010 1007 first = a
1011 1008 else:
1012 1009 last = a
1013 1010 first = b
1014 1011
1015 1012 # reachable won't include stop in the list, so we have to use a parent
1016 1013 reachable = self.reachable(last, stop=self.parents(first)[0])
1017 1014 if first in reachable:
1018 1015 return first
1019 1016
1020 1017 # calculate the distance of every node from root
1021 1018 dist = {nullid: 0}
1022 1019 for i in xrange(self.count()):
1023 1020 n = self.node(i)
1024 1021 p1, p2 = self.parents(n)
1025 1022 dist[n] = max(dist[p1], dist[p2]) + 1
1026 1023
1027 1024 # traverse ancestors in order of decreasing distance from root
1028 1025 def ancestors(node):
1029 1026 # we store negative distances because heap returns smallest member
1030 1027 h = [(-dist[node], node)]
1031 1028 seen = {}
1032 1029 while h:
1033 1030 d, n = heapq.heappop(h)
1034 1031 if n not in seen:
1035 1032 seen[n] = 1
1036 1033 yield (-d, n)
1037 1034 for p in self.parents(n):
1038 1035 heapq.heappush(h, (-dist[p], p))
1039 1036
1040 1037 def generations(node):
1041 1038 sg, s = None, {}
1042 1039 for g,n in ancestors(node):
1043 1040 if g != sg:
1044 1041 if sg:
1045 1042 yield sg, s
1046 1043 sg, s = g, {n:1}
1047 1044 else:
1048 1045 s[n] = 1
1049 1046 yield sg, s
1050 1047
1051 1048 x = generations(a)
1052 1049 y = generations(b)
1053 1050 gx = x.next()
1054 1051 gy = y.next()
1055 1052
1056 1053 # increment each ancestor list until it is closer to root than
1057 1054 # the other, or they match
1058 1055 while 1:
1059 1056 #print "ancestor gen %s %s" % (gx[0], gy[0])
1060 1057 if gx[0] == gy[0]:
1061 1058 # find the intersection
1062 1059 i = [ n for n in gx[1] if n in gy[1] ]
1063 1060 if i:
1064 1061 return i[0]
1065 1062 else:
1066 1063 #print "next"
1067 1064 gy = y.next()
1068 1065 gx = x.next()
1069 1066 elif gx[0] < gy[0]:
1070 1067 #print "next y"
1071 1068 gy = y.next()
1072 1069 else:
1073 1070 #print "next x"
1074 1071 gx = x.next()
1075 1072
1076 1073 def group(self, nodelist, lookup, infocollect=None):
1077 1074 """calculate a delta group
1078 1075
1079 1076 Given a list of changeset revs, return a set of deltas and
1080 1077 metadata corresponding to nodes. the first delta is
1081 1078 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1082 1079 have this parent as it has all history before these
1083 1080 changesets. parent is parent[0]
1084 1081 """
1085 1082 revs = [self.rev(n) for n in nodelist]
1086 1083
1087 1084 # if we don't have any revisions touched by these changesets, bail
1088 1085 if not revs:
1089 1086 yield changegroup.closechunk()
1090 1087 return
1091 1088
1092 1089 # add the parent of the first rev
1093 1090 p = self.parents(self.node(revs[0]))[0]
1094 1091 revs.insert(0, self.rev(p))
1095 1092
1096 1093 # build deltas
1097 1094 for d in xrange(0, len(revs) - 1):
1098 1095 a, b = revs[d], revs[d + 1]
1099 1096 nb = self.node(b)
1100 1097
1101 1098 if infocollect is not None:
1102 1099 infocollect(nb)
1103 1100
1104 1101 d = self.revdiff(a, b)
1105 1102 p = self.parents(nb)
1106 1103 meta = nb + p[0] + p[1] + lookup(nb)
1107 1104 yield changegroup.genchunk("%s%s" % (meta, d))
1108 1105
1109 1106 yield changegroup.closechunk()
1110 1107
1111 1108 def addgroup(self, revs, linkmapper, transaction, unique=0):
1112 1109 """
1113 1110 add a delta group
1114 1111
1115 1112 given a set of deltas, add them to the revision log. the
1116 1113 first delta is against its parent, which should be in our
1117 1114 log, the rest are against the previous delta.
1118 1115 """
1119 1116
1120 1117 #track the base of the current delta log
1121 1118 r = self.count()
1122 1119 t = r - 1
1123 1120 node = None
1124 1121
1125 1122 base = prev = -1
1126 1123 start = end = textlen = 0
1127 1124 if r:
1128 1125 end = self.end(t)
1129 1126
1130 1127 ifh = self.opener(self.indexfile, "a+")
1131 1128 ifh.seek(0, 2)
1132 1129 transaction.add(self.indexfile, ifh.tell(), self.count())
1133 1130 if self.inlinedata():
1134 1131 dfh = None
1135 1132 else:
1136 1133 transaction.add(self.datafile, end)
1137 1134 dfh = self.opener(self.datafile, "a")
1138 1135
1139 1136 # loop through our set of deltas
1140 1137 chain = None
1141 1138 for chunk in revs:
1142 1139 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1143 1140 link = linkmapper(cs)
1144 1141 if node in self.nodemap:
1145 1142 # this can happen if two branches make the same change
1146 1143 # if unique:
1147 1144 # raise RevlogError(_("already have %s") % hex(node[:4]))
1148 1145 chain = node
1149 1146 continue
1150 1147 delta = chunk[80:]
1151 1148
1152 1149 for p in (p1, p2):
1153 1150 if not p in self.nodemap:
1154 1151 raise RevlogError(_("unknown parent %s") % short(p))
1155 1152
1156 1153 if not chain:
1157 1154 # retrieve the parent revision of the delta chain
1158 1155 chain = p1
1159 1156 if not chain in self.nodemap:
1160 1157 raise RevlogError(_("unknown base %s") % short(chain[:4]))
1161 1158
1162 1159 # full versions are inserted when the needed deltas become
1163 1160 # comparable to the uncompressed text or when the previous
1164 1161 # version is not the one we have a delta against. We use
1165 1162 # the size of the previous full rev as a proxy for the
1166 1163 # current size.
1167 1164
1168 1165 if chain == prev:
1169 1166 tempd = compress(delta)
1170 1167 cdelta = tempd[0] + tempd[1]
1171 1168 textlen = mdiff.patchedsize(textlen, delta)
1172 1169
1173 1170 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
1174 1171 # flush our writes here so we can read it in revision
1175 1172 if dfh:
1176 1173 dfh.flush()
1177 1174 ifh.flush()
1178 1175 text = self.revision(chain)
1179 1176 text = self.patches(text, [delta])
1180 1177 chk = self.addrevision(text, transaction, link, p1, p2)
1181 1178 if chk != node:
1182 1179 raise RevlogError(_("consistency error adding group"))
1183 1180 textlen = len(text)
1184 1181 else:
1185 1182 if self.version == REVLOGV0:
1186 1183 e = (end, len(cdelta), base, link, p1, p2, node)
1187 1184 else:
1188 1185 e = (self.offset_type(end, 0), len(cdelta), textlen, base,
1189 1186 link, self.rev(p1), self.rev(p2), node)
1190 1187 self.index.append(e)
1191 1188 self.nodemap[node] = r
1192 1189 if self.inlinedata():
1193 1190 ifh.write(struct.pack(self.indexformat, *e))
1194 1191 ifh.write(cdelta)
1195 1192 self.checkinlinesize(transaction, ifh)
1196 1193 if not self.inlinedata():
1197 1194 dfh = self.opener(self.datafile, "a")
1198 1195 ifh = self.opener(self.indexfile, "a")
1199 1196 else:
1200 1197 if not dfh:
1201 1198 # addrevision switched from inline to conventional
1202 1199 # reopen the index
1203 1200 dfh = self.opener(self.datafile, "a")
1204 1201 ifh = self.opener(self.indexfile, "a")
1205 1202 dfh.write(cdelta)
1206 1203 ifh.write(struct.pack(self.indexformat, *e))
1207 1204
1208 1205 t, r, chain, prev = r, r + 1, node, node
1209 1206 base = self.base(t)
1210 1207 start = self.start(base)
1211 1208 end = self.end(t)
1212 1209
1213 1210 return node
1214 1211
1215 1212 def strip(self, rev, minlink):
1216 1213 if self.count() == 0 or rev >= self.count():
1217 1214 return
1218 1215
1219 1216 if isinstance(self.index, lazyindex):
1220 1217 self.loadindexmap()
1221 1218
1222 1219 # When stripping away a revision, we need to make sure it
1223 1220 # does not actually belong to an older changeset.
1224 1221 # The minlink parameter defines the oldest revision
1225 1222 # we're allowed to strip away.
1226 1223 while minlink > self.index[rev][-4]:
1227 1224 rev += 1
1228 1225 if rev >= self.count():
1229 1226 return
1230 1227
1231 1228 # first truncate the files on disk
1232 1229 end = self.start(rev)
1233 1230 if not self.inlinedata():
1234 1231 df = self.opener(self.datafile, "a")
1235 1232 df.truncate(end)
1236 1233 end = rev * struct.calcsize(self.indexformat)
1237 1234 else:
1238 1235 end += rev * struct.calcsize(self.indexformat)
1239 1236
1240 1237 indexf = self.opener(self.indexfile, "a")
1241 1238 indexf.truncate(end)
1242 1239
1243 1240 # then reset internal state in memory to forget those revisions
1244 1241 self.cache = None
1245 1242 self.chunkcache = None
1246 1243 for x in xrange(rev, self.count()):
1247 1244 del self.nodemap[self.node(x)]
1248 1245
1249 1246 del self.index[rev:]
1250 1247
1251 1248 def checksize(self):
1252 1249 expected = 0
1253 1250 if self.count():
1254 1251 expected = self.end(self.count() - 1)
1255 1252
1256 1253 try:
1257 1254 f = self.opener(self.datafile)
1258 1255 f.seek(0, 2)
1259 1256 actual = f.tell()
1260 1257 dd = actual - expected
1261 1258 except IOError, inst:
1262 1259 if inst.errno != errno.ENOENT:
1263 1260 raise
1264 1261 dd = 0
1265 1262
1266 1263 try:
1267 1264 f = self.opener(self.indexfile)
1268 1265 f.seek(0, 2)
1269 1266 actual = f.tell()
1270 1267 s = struct.calcsize(self.indexformat)
1271 1268 i = actual / s
1272 1269 di = actual - (i * s)
1273 1270 if self.inlinedata():
1274 1271 databytes = 0
1275 1272 for r in xrange(self.count()):
1276 1273 databytes += self.length(r)
1277 1274 dd = 0
1278 1275 di = actual - self.count() * s - databytes
1279 1276 except IOError, inst:
1280 1277 if inst.errno != errno.ENOENT:
1281 1278 raise
1282 1279 di = 0
1283 1280
1284 1281 return (dd, di)
1285 1282
1286 1283
General Comments 0
You need to be logged in to leave comments. Login now