##// END OF EJS Templates
revlog: more nullrev fixes
Benoit Boissinot -
r3585:cac2c17b default
parent child Browse files
Show More
@@ -1,1280 +1,1284
1 1 """
2 2 revlog.py - storage back-end for mercurial
3 3
4 4 This provides efficient delta storage with O(1) retrieve and append
5 5 and O(changes) merge between branches
6 6
7 7 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
8 8
9 9 This software may be used and distributed according to the terms
10 10 of the GNU General Public License, incorporated herein by reference.
11 11 """
12 12
13 13 from node import *
14 14 from i18n import gettext as _
15 15 from demandload import demandload
16 16 demandload(globals(), "binascii changegroup errno ancestor mdiff os")
17 17 demandload(globals(), "sha struct util zlib")
18 18
19 19 # revlog version strings
20 20 REVLOGV0 = 0
21 21 REVLOGNG = 1
22 22
23 23 # revlog flags
24 24 REVLOGNGINLINEDATA = (1 << 16)
25 25 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
26 26
27 27 REVLOG_DEFAULT_FORMAT = REVLOGNG
28 28 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
29 29
30 30 def flagstr(flag):
31 31 if flag == "inline":
32 32 return REVLOGNGINLINEDATA
33 33 raise RevlogError(_("unknown revlog flag %s" % flag))
34 34
35 35 def hash(text, p1, p2):
36 36 """generate a hash from the given text and its parent hashes
37 37
38 38 This hash combines both the current file contents and its history
39 39 in a manner that makes it easy to distinguish nodes with the same
40 40 content in the revision graph.
41 41 """
42 42 l = [p1, p2]
43 43 l.sort()
44 44 s = sha.new(l[0])
45 45 s.update(l[1])
46 46 s.update(text)
47 47 return s.digest()
48 48
49 49 def compress(text):
50 50 """ generate a possibly-compressed representation of text """
51 51 if not text: return ("", text)
52 52 if len(text) < 44:
53 53 if text[0] == '\0': return ("", text)
54 54 return ('u', text)
55 55 bin = zlib.compress(text)
56 56 if len(bin) > len(text):
57 57 if text[0] == '\0': return ("", text)
58 58 return ('u', text)
59 59 return ("", bin)
60 60
61 61 def decompress(bin):
62 62 """ decompress the given input """
63 63 if not bin: return bin
64 64 t = bin[0]
65 65 if t == '\0': return bin
66 66 if t == 'x': return zlib.decompress(bin)
67 67 if t == 'u': return bin[1:]
68 68 raise RevlogError(_("unknown compression type %r") % t)
69 69
70 70 indexformatv0 = ">4l20s20s20s"
71 71 v0shaoffset = 56
72 72 # index ng:
73 73 # 6 bytes offset
74 74 # 2 bytes flags
75 75 # 4 bytes compressed length
76 76 # 4 bytes uncompressed length
77 77 # 4 bytes: base rev
78 78 # 4 bytes link rev
79 79 # 4 bytes parent 1 rev
80 80 # 4 bytes parent 2 rev
81 81 # 32 bytes: nodeid
82 82 indexformatng = ">Qiiiiii20s12x"
83 83 ngshaoffset = 32
84 84 versionformat = ">i"
85 85
86 86 class lazyparser(object):
87 87 """
88 88 this class avoids the need to parse the entirety of large indices
89 89 """
90 90
91 91 # lazyparser is not safe to use on windows if win32 extensions not
92 92 # available. it keeps file handle open, which make it not possible
93 93 # to break hardlinks on local cloned repos.
94 94 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
95 95 hasattr(util, 'win32api'))
96 96
97 97 def __init__(self, dataf, size, indexformat, shaoffset):
98 98 self.dataf = dataf
99 99 self.format = indexformat
100 100 self.s = struct.calcsize(indexformat)
101 101 self.indexformat = indexformat
102 102 self.datasize = size
103 103 self.l = size/self.s
104 104 self.index = [None] * self.l
105 105 self.map = {nullid: nullrev}
106 106 self.allmap = 0
107 107 self.all = 0
108 108 self.mapfind_count = 0
109 109 self.shaoffset = shaoffset
110 110
111 111 def loadmap(self):
112 112 """
113 113 during a commit, we need to make sure the rev being added is
114 114 not a duplicate. This requires loading the entire index,
115 115 which is fairly slow. loadmap can load up just the node map,
116 116 which takes much less time.
117 117 """
118 118 if self.allmap: return
119 119 end = self.datasize
120 120 self.allmap = 1
121 121 cur = 0
122 122 count = 0
123 123 blocksize = self.s * 256
124 124 self.dataf.seek(0)
125 125 while cur < end:
126 126 data = self.dataf.read(blocksize)
127 127 off = 0
128 128 for x in xrange(256):
129 129 n = data[off + self.shaoffset:off + self.shaoffset + 20]
130 130 self.map[n] = count
131 131 count += 1
132 132 if count >= self.l:
133 133 break
134 134 off += self.s
135 135 cur += blocksize
136 136
137 137 def loadblock(self, blockstart, blocksize, data=None):
138 138 if self.all: return
139 139 if data is None:
140 140 self.dataf.seek(blockstart)
141 141 if blockstart + blocksize > self.datasize:
142 142 # the revlog may have grown since we've started running,
143 143 # but we don't have space in self.index for more entries.
144 144 # limit blocksize so that we don't get too much data.
145 145 blocksize = max(self.datasize - blockstart, 0)
146 146 data = self.dataf.read(blocksize)
147 147 lend = len(data) / self.s
148 148 i = blockstart / self.s
149 149 off = 0
150 150 for x in xrange(lend):
151 151 if self.index[i + x] == None:
152 152 b = data[off : off + self.s]
153 153 self.index[i + x] = b
154 154 n = b[self.shaoffset:self.shaoffset + 20]
155 155 self.map[n] = i + x
156 156 off += self.s
157 157
158 158 def findnode(self, node):
159 159 """search backwards through the index file for a specific node"""
160 160 if self.allmap: return None
161 161
162 162 # hg log will cause many many searches for the manifest
163 163 # nodes. After we get called a few times, just load the whole
164 164 # thing.
165 165 if self.mapfind_count > 8:
166 166 self.loadmap()
167 167 if node in self.map:
168 168 return node
169 169 return None
170 170 self.mapfind_count += 1
171 171 last = self.l - 1
172 172 while self.index[last] != None:
173 173 if last == 0:
174 174 self.all = 1
175 175 self.allmap = 1
176 176 return None
177 177 last -= 1
178 178 end = (last + 1) * self.s
179 179 blocksize = self.s * 256
180 180 while end >= 0:
181 181 start = max(end - blocksize, 0)
182 182 self.dataf.seek(start)
183 183 data = self.dataf.read(end - start)
184 184 findend = end - start
185 185 while True:
186 186 # we're searching backwards, so weh have to make sure
187 187 # we don't find a changeset where this node is a parent
188 188 off = data.rfind(node, 0, findend)
189 189 findend = off
190 190 if off >= 0:
191 191 i = off / self.s
192 192 off = i * self.s
193 193 n = data[off + self.shaoffset:off + self.shaoffset + 20]
194 194 if n == node:
195 195 self.map[n] = i + start / self.s
196 196 return node
197 197 else:
198 198 break
199 199 end -= blocksize
200 200 return None
201 201
202 202 def loadindex(self, i=None, end=None):
203 203 if self.all: return
204 204 all = False
205 205 if i == None:
206 206 blockstart = 0
207 207 blocksize = (512 / self.s) * self.s
208 208 end = self.datasize
209 209 all = True
210 210 else:
211 211 if end:
212 212 blockstart = i * self.s
213 213 end = end * self.s
214 214 blocksize = end - blockstart
215 215 else:
216 216 blockstart = (i & ~(32)) * self.s
217 217 blocksize = self.s * 64
218 218 end = blockstart + blocksize
219 219 while blockstart < end:
220 220 self.loadblock(blockstart, blocksize)
221 221 blockstart += blocksize
222 222 if all: self.all = True
223 223
224 224 class lazyindex(object):
225 225 """a lazy version of the index array"""
226 226 def __init__(self, parser):
227 227 self.p = parser
228 228 def __len__(self):
229 229 return len(self.p.index)
230 230 def load(self, pos):
231 231 if pos < 0:
232 232 pos += len(self.p.index)
233 233 self.p.loadindex(pos)
234 234 return self.p.index[pos]
235 235 def __getitem__(self, pos):
236 236 ret = self.p.index[pos] or self.load(pos)
237 237 if isinstance(ret, str):
238 238 ret = struct.unpack(self.p.indexformat, ret)
239 239 return ret
240 240 def __setitem__(self, pos, item):
241 241 self.p.index[pos] = item
242 242 def __delitem__(self, pos):
243 243 del self.p.index[pos]
244 244 def append(self, e):
245 245 self.p.index.append(e)
246 246
247 247 class lazymap(object):
248 248 """a lazy version of the node map"""
249 249 def __init__(self, parser):
250 250 self.p = parser
251 251 def load(self, key):
252 252 n = self.p.findnode(key)
253 253 if n == None:
254 254 raise KeyError(key)
255 255 def __contains__(self, key):
256 256 if key in self.p.map:
257 257 return True
258 258 self.p.loadmap()
259 259 return key in self.p.map
260 260 def __iter__(self):
261 261 yield nullid
262 262 for i in xrange(self.p.l):
263 263 ret = self.p.index[i]
264 264 if not ret:
265 265 self.p.loadindex(i)
266 266 ret = self.p.index[i]
267 267 if isinstance(ret, str):
268 268 ret = struct.unpack(self.p.indexformat, ret)
269 269 yield ret[-1]
270 270 def __getitem__(self, key):
271 271 try:
272 272 return self.p.map[key]
273 273 except KeyError:
274 274 try:
275 275 self.load(key)
276 276 return self.p.map[key]
277 277 except KeyError:
278 278 raise KeyError("node " + hex(key))
279 279 def __setitem__(self, key, val):
280 280 self.p.map[key] = val
281 281 def __delitem__(self, key):
282 282 del self.p.map[key]
283 283
284 284 class RevlogError(Exception): pass
285 285
286 286 class revlog(object):
287 287 """
288 288 the underlying revision storage object
289 289
290 290 A revlog consists of two parts, an index and the revision data.
291 291
292 292 The index is a file with a fixed record size containing
293 293 information on each revision, includings its nodeid (hash), the
294 294 nodeids of its parents, the position and offset of its data within
295 295 the data file, and the revision it's based on. Finally, each entry
296 296 contains a linkrev entry that can serve as a pointer to external
297 297 data.
298 298
299 299 The revision data itself is a linear collection of data chunks.
300 300 Each chunk represents a revision and is usually represented as a
301 301 delta against the previous chunk. To bound lookup time, runs of
302 302 deltas are limited to about 2 times the length of the original
303 303 version data. This makes retrieval of a version proportional to
304 304 its size, or O(1) relative to the number of revisions.
305 305
306 306 Both pieces of the revlog are written to in an append-only
307 307 fashion, which means we never need to rewrite a file to insert or
308 308 remove data, and can use some simple techniques to avoid the need
309 309 for locking while reading.
310 310 """
311 311 def __init__(self, opener, indexfile, datafile,
312 312 defversion=REVLOG_DEFAULT_VERSION):
313 313 """
314 314 create a revlog object
315 315
316 316 opener is a function that abstracts the file opening operation
317 317 and can be used to implement COW semantics or the like.
318 318 """
319 319 self.indexfile = indexfile
320 320 self.datafile = datafile
321 321 self.opener = opener
322 322
323 323 self.indexstat = None
324 324 self.cache = None
325 325 self.chunkcache = None
326 326 self.defversion = defversion
327 327 self.load()
328 328
329 329 def load(self):
330 330 v = self.defversion
331 331 try:
332 332 f = self.opener(self.indexfile)
333 333 i = f.read(4)
334 334 f.seek(0)
335 335 except IOError, inst:
336 336 if inst.errno != errno.ENOENT:
337 337 raise
338 338 i = ""
339 339 else:
340 340 try:
341 341 st = util.fstat(f)
342 342 except AttributeError, inst:
343 343 st = None
344 344 else:
345 345 oldst = self.indexstat
346 346 if (oldst and st.st_dev == oldst.st_dev
347 347 and st.st_ino == oldst.st_ino
348 348 and st.st_mtime == oldst.st_mtime
349 349 and st.st_ctime == oldst.st_ctime):
350 350 return
351 351 self.indexstat = st
352 352 if len(i) > 0:
353 353 v = struct.unpack(versionformat, i)[0]
354 354 flags = v & ~0xFFFF
355 355 fmt = v & 0xFFFF
356 356 if fmt == REVLOGV0:
357 357 if flags:
358 358 raise RevlogError(_("index %s invalid flags %x for format v0" %
359 359 (self.indexfile, flags)))
360 360 elif fmt == REVLOGNG:
361 361 if flags & ~REVLOGNGINLINEDATA:
362 362 raise RevlogError(_("index %s invalid flags %x for revlogng" %
363 363 (self.indexfile, flags)))
364 364 else:
365 365 raise RevlogError(_("index %s invalid format %d" %
366 366 (self.indexfile, fmt)))
367 367 self.version = v
368 368 if v == REVLOGV0:
369 369 self.indexformat = indexformatv0
370 370 shaoffset = v0shaoffset
371 371 else:
372 372 self.indexformat = indexformatng
373 373 shaoffset = ngshaoffset
374 374
375 375 if i:
376 376 if (lazyparser.safe_to_use and not self.inlinedata() and
377 377 st and st.st_size > 10000):
378 378 # big index, let's parse it on demand
379 379 parser = lazyparser(f, st.st_size, self.indexformat, shaoffset)
380 380 self.index = lazyindex(parser)
381 381 self.nodemap = lazymap(parser)
382 382 else:
383 383 self.parseindex(f, st)
384 384 if self.version != REVLOGV0:
385 385 e = list(self.index[0])
386 386 type = self.ngtype(e[0])
387 387 e[0] = self.offset_type(0, type)
388 388 self.index[0] = e
389 389 else:
390 390 self.nodemap = {nullid: nullrev}
391 391 self.index = []
392 392
393 393
394 394 def parseindex(self, fp, st):
395 395 s = struct.calcsize(self.indexformat)
396 396 self.index = []
397 397 self.nodemap = {nullid: nullrev}
398 398 inline = self.inlinedata()
399 399 n = 0
400 400 leftover = None
401 401 while True:
402 402 if st:
403 403 data = fp.read(65536)
404 404 else:
405 405 # hack for httprangereader, it doesn't do partial reads well
406 406 data = fp.read()
407 407 if not data:
408 408 break
409 409 if n == 0 and self.inlinedata():
410 410 # cache the first chunk
411 411 self.chunkcache = (0, data)
412 412 if leftover:
413 413 data = leftover + data
414 414 leftover = None
415 415 off = 0
416 416 l = len(data)
417 417 while off < l:
418 418 if l - off < s:
419 419 leftover = data[off:]
420 420 break
421 421 cur = data[off:off + s]
422 422 off += s
423 423 e = struct.unpack(self.indexformat, cur)
424 424 self.index.append(e)
425 425 self.nodemap[e[-1]] = n
426 426 n += 1
427 427 if inline:
428 428 off += e[1]
429 429 if off > l:
430 430 # some things don't seek well, just read it
431 431 fp.read(off - l)
432 432 if not st:
433 433 break
434 434
435 435
436 436 def ngoffset(self, q):
437 437 if q & 0xFFFF:
438 438 raise RevlogError(_('%s: incompatible revision flag %x') %
439 439 (self.indexfile, q))
440 440 return long(q >> 16)
441 441
442 442 def ngtype(self, q):
443 443 return int(q & 0xFFFF)
444 444
445 445 def offset_type(self, offset, type):
446 446 return long(long(offset) << 16 | type)
447 447
448 448 def loadindex(self, start, end):
449 449 """load a block of indexes all at once from the lazy parser"""
450 450 if isinstance(self.index, lazyindex):
451 451 self.index.p.loadindex(start, end)
452 452
453 453 def loadindexmap(self):
454 454 """loads both the map and the index from the lazy parser"""
455 455 if isinstance(self.index, lazyindex):
456 456 p = self.index.p
457 457 p.loadindex()
458 458 self.nodemap = p.map
459 459
460 460 def loadmap(self):
461 461 """loads the map from the lazy parser"""
462 462 if isinstance(self.nodemap, lazymap):
463 463 self.nodemap.p.loadmap()
464 464 self.nodemap = self.nodemap.p.map
465 465
466 466 def inlinedata(self): return self.version & REVLOGNGINLINEDATA
467 467 def tip(self): return self.node(len(self.index) - 1)
468 468 def count(self): return len(self.index)
469 469 def node(self, rev):
470 return (rev < 0) and nullid or self.index[rev][-1]
470 return rev == nullrev and nullid or self.index[rev][-1]
471 471 def rev(self, node):
472 472 try:
473 473 return self.nodemap[node]
474 474 except KeyError:
475 475 raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))
476 476 def linkrev(self, node):
477 477 return (node == nullid) and nullrev or self.index[self.rev(node)][-4]
478 478 def parents(self, node):
479 479 if node == nullid: return (nullid, nullid)
480 480 r = self.rev(node)
481 481 d = self.index[r][-3:-1]
482 482 if self.version == REVLOGV0:
483 483 return d
484 484 return (self.node(d[0]), self.node(d[1]))
485 485 def parentrevs(self, rev):
486 486 if rev == nullrev:
487 487 return (nullrev, nullrev)
488 488 d = self.index[rev][-3:-1]
489 489 if self.version == REVLOGV0:
490 490 return (self.rev(d[0]), self.rev(d[1]))
491 491 return d
492 492 def start(self, rev):
493 if rev < 0:
493 if rev == nullrev:
494 494 return 0
495 495 if self.version != REVLOGV0:
496 496 return self.ngoffset(self.index[rev][0])
497 497 return self.index[rev][0]
498 498
499 499 def end(self, rev): return self.start(rev) + self.length(rev)
500 500
501 501 def size(self, rev):
502 502 """return the length of the uncompressed text for a given revision"""
503 503 if rev == nullrev:
504 504 return 0
505 505 l = -1
506 506 if self.version != REVLOGV0:
507 507 l = self.index[rev][2]
508 508 if l >= 0:
509 509 return l
510 510
511 511 t = self.revision(self.node(rev))
512 512 return len(t)
513 513
514 514 # alternate implementation, The advantage to this code is it
515 515 # will be faster for a single revision. But, the results are not
516 516 # cached, so finding the size of every revision will be slower.
517 517 """
518 518 if self.cache and self.cache[1] == rev:
519 519 return len(self.cache[2])
520 520
521 521 base = self.base(rev)
522 522 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
523 523 base = self.cache[1]
524 524 text = self.cache[2]
525 525 else:
526 526 text = self.revision(self.node(base))
527 527
528 528 l = len(text)
529 529 for x in xrange(base + 1, rev + 1):
530 530 l = mdiff.patchedsize(l, self.chunk(x))
531 531 return l
532 532 """
533 533
534 534 def length(self, rev):
535 if rev < 0:
535 if rev == nullrev:
536 536 return 0
537 537 else:
538 538 return self.index[rev][1]
539 def base(self, rev): return (rev < 0) and rev or self.index[rev][-5]
539 def base(self, rev):
540 if (rev == nullrev):
541 return nullrev
542 else:
543 return self.index[rev][-5]
540 544
541 545 def reachable(self, rev, stop=None):
542 546 reachable = {}
543 547 visit = [rev]
544 548 reachable[rev] = 1
545 549 if stop:
546 550 stopn = self.rev(stop)
547 551 else:
548 552 stopn = 0
549 553 while visit:
550 554 n = visit.pop(0)
551 555 if n == stop:
552 556 continue
553 557 if n == nullid:
554 558 continue
555 559 for p in self.parents(n):
556 560 if self.rev(p) < stopn:
557 561 continue
558 562 if p not in reachable:
559 563 reachable[p] = 1
560 564 visit.append(p)
561 565 return reachable
562 566
563 567 def nodesbetween(self, roots=None, heads=None):
564 568 """Return a tuple containing three elements. Elements 1 and 2 contain
565 569 a final list bases and heads after all the unreachable ones have been
566 570 pruned. Element 0 contains a topologically sorted list of all
567 571
568 572 nodes that satisfy these constraints:
569 573 1. All nodes must be descended from a node in roots (the nodes on
570 574 roots are considered descended from themselves).
571 575 2. All nodes must also be ancestors of a node in heads (the nodes in
572 576 heads are considered to be their own ancestors).
573 577
574 578 If roots is unspecified, nullid is assumed as the only root.
575 579 If heads is unspecified, it is taken to be the output of the
576 580 heads method (i.e. a list of all nodes in the repository that
577 581 have no children)."""
578 582 nonodes = ([], [], [])
579 583 if roots is not None:
580 584 roots = list(roots)
581 585 if not roots:
582 586 return nonodes
583 587 lowestrev = min([self.rev(n) for n in roots])
584 588 else:
585 589 roots = [nullid] # Everybody's a descendent of nullid
586 590 lowestrev = nullrev
587 591 if (lowestrev == nullrev) and (heads is None):
588 592 # We want _all_ the nodes!
589 593 return ([self.node(r) for r in xrange(0, self.count())],
590 594 [nullid], list(self.heads()))
591 595 if heads is None:
592 596 # All nodes are ancestors, so the latest ancestor is the last
593 597 # node.
594 598 highestrev = self.count() - 1
595 599 # Set ancestors to None to signal that every node is an ancestor.
596 600 ancestors = None
597 601 # Set heads to an empty dictionary for later discovery of heads
598 602 heads = {}
599 603 else:
600 604 heads = list(heads)
601 605 if not heads:
602 606 return nonodes
603 607 ancestors = {}
604 608 # Turn heads into a dictionary so we can remove 'fake' heads.
605 609 # Also, later we will be using it to filter out the heads we can't
606 610 # find from roots.
607 611 heads = dict.fromkeys(heads, 0)
608 612 # Start at the top and keep marking parents until we're done.
609 613 nodestotag = heads.keys()
610 614 # Remember where the top was so we can use it as a limit later.
611 615 highestrev = max([self.rev(n) for n in nodestotag])
612 616 while nodestotag:
613 617 # grab a node to tag
614 618 n = nodestotag.pop()
615 619 # Never tag nullid
616 620 if n == nullid:
617 621 continue
618 622 # A node's revision number represents its place in a
619 623 # topologically sorted list of nodes.
620 624 r = self.rev(n)
621 625 if r >= lowestrev:
622 626 if n not in ancestors:
623 627 # If we are possibly a descendent of one of the roots
624 628 # and we haven't already been marked as an ancestor
625 629 ancestors[n] = 1 # Mark as ancestor
626 630 # Add non-nullid parents to list of nodes to tag.
627 631 nodestotag.extend([p for p in self.parents(n) if
628 632 p != nullid])
629 633 elif n in heads: # We've seen it before, is it a fake head?
630 634 # So it is, real heads should not be the ancestors of
631 635 # any other heads.
632 636 heads.pop(n)
633 637 if not ancestors:
634 638 return nonodes
635 639 # Now that we have our set of ancestors, we want to remove any
636 640 # roots that are not ancestors.
637 641
638 642 # If one of the roots was nullid, everything is included anyway.
639 643 if lowestrev > nullrev:
640 644 # But, since we weren't, let's recompute the lowest rev to not
641 645 # include roots that aren't ancestors.
642 646
643 647 # Filter out roots that aren't ancestors of heads
644 648 roots = [n for n in roots if n in ancestors]
645 649 # Recompute the lowest revision
646 650 if roots:
647 651 lowestrev = min([self.rev(n) for n in roots])
648 652 else:
649 653 # No more roots? Return empty list
650 654 return nonodes
651 655 else:
652 656 # We are descending from nullid, and don't need to care about
653 657 # any other roots.
654 658 lowestrev = nullrev
655 659 roots = [nullid]
656 660 # Transform our roots list into a 'set' (i.e. a dictionary where the
657 661 # values don't matter.
658 662 descendents = dict.fromkeys(roots, 1)
659 663 # Also, keep the original roots so we can filter out roots that aren't
660 664 # 'real' roots (i.e. are descended from other roots).
661 665 roots = descendents.copy()
662 666 # Our topologically sorted list of output nodes.
663 667 orderedout = []
664 668 # Don't start at nullid since we don't want nullid in our output list,
665 669 # and if nullid shows up in descedents, empty parents will look like
666 670 # they're descendents.
667 671 for r in xrange(max(lowestrev, 0), highestrev + 1):
668 672 n = self.node(r)
669 673 isdescendent = False
670 674 if lowestrev == nullrev: # Everybody is a descendent of nullid
671 675 isdescendent = True
672 676 elif n in descendents:
673 677 # n is already a descendent
674 678 isdescendent = True
675 679 # This check only needs to be done here because all the roots
676 680 # will start being marked is descendents before the loop.
677 681 if n in roots:
678 682 # If n was a root, check if it's a 'real' root.
679 683 p = tuple(self.parents(n))
680 684 # If any of its parents are descendents, it's not a root.
681 685 if (p[0] in descendents) or (p[1] in descendents):
682 686 roots.pop(n)
683 687 else:
684 688 p = tuple(self.parents(n))
685 689 # A node is a descendent if either of its parents are
686 690 # descendents. (We seeded the dependents list with the roots
687 691 # up there, remember?)
688 692 if (p[0] in descendents) or (p[1] in descendents):
689 693 descendents[n] = 1
690 694 isdescendent = True
691 695 if isdescendent and ((ancestors is None) or (n in ancestors)):
692 696 # Only include nodes that are both descendents and ancestors.
693 697 orderedout.append(n)
694 698 if (ancestors is not None) and (n in heads):
695 699 # We're trying to figure out which heads are reachable
696 700 # from roots.
697 701 # Mark this head as having been reached
698 702 heads[n] = 1
699 703 elif ancestors is None:
700 704 # Otherwise, we're trying to discover the heads.
701 705 # Assume this is a head because if it isn't, the next step
702 706 # will eventually remove it.
703 707 heads[n] = 1
704 708 # But, obviously its parents aren't.
705 709 for p in self.parents(n):
706 710 heads.pop(p, None)
707 711 heads = [n for n in heads.iterkeys() if heads[n] != 0]
708 712 roots = roots.keys()
709 713 assert orderedout
710 714 assert roots
711 715 assert heads
712 716 return (orderedout, roots, heads)
713 717
714 718 def heads(self, start=None):
715 719 """return the list of all nodes that have no children
716 720
717 721 if start is specified, only heads that are descendants of
718 722 start will be returned
719 723
720 724 """
721 725 if start is None:
722 726 start = nullid
723 727 startrev = self.rev(start)
724 728 reachable = {startrev: 1}
725 729 heads = {startrev: 1}
726 730
727 731 parentrevs = self.parentrevs
728 732 for r in xrange(startrev + 1, self.count()):
729 733 for p in parentrevs(r):
730 734 if p in reachable:
731 735 reachable[r] = 1
732 736 heads[r] = 1
733 737 if p in heads:
734 738 del heads[p]
735 739 return [self.node(r) for r in heads]
736 740
737 741 def children(self, node):
738 742 """find the children of a given node"""
739 743 c = []
740 744 p = self.rev(node)
741 745 for r in range(p + 1, self.count()):
742 746 for pr in self.parentrevs(r):
743 747 if pr == p:
744 748 c.append(self.node(r))
745 749 return c
746 750
747 751 def _match(self, id):
748 752 if isinstance(id, (long, int)):
749 753 # rev
750 754 return self.node(id)
751 755 if len(id) == 20:
752 756 # possibly a binary node
753 757 # odds of a binary node being all hex in ASCII are 1 in 10**25
754 758 try:
755 759 node = id
756 760 r = self.rev(node) # quick search the index
757 761 return node
758 762 except RevlogError:
759 763 pass # may be partial hex id
760 764 try:
761 765 # str(rev)
762 766 rev = int(id)
763 767 if str(rev) != id: raise ValueError
764 768 if rev < 0: rev = self.count() + rev
765 769 if rev < 0 or rev >= self.count(): raise ValueError
766 770 return self.node(rev)
767 771 except (ValueError, OverflowError):
768 772 pass
769 773 if len(id) == 40:
770 774 try:
771 775 # a full hex nodeid?
772 776 node = bin(id)
773 777 r = self.rev(node)
774 778 return node
775 779 except TypeError:
776 780 pass
777 781
778 782 def _partialmatch(self, id):
779 783 if len(id) < 40:
780 784 try:
781 785 # hex(node)[:...]
782 786 bin_id = bin(id[:len(id) & ~1]) # grab an even number of digits
783 787 node = None
784 788 for n in self.nodemap:
785 789 if n.startswith(bin_id) and hex(n).startswith(id):
786 790 if node is not None:
787 791 raise RevlogError(_("Ambiguous identifier"))
788 792 node = n
789 793 if node is not None:
790 794 return node
791 795 except TypeError:
792 796 pass
793 797
794 798 def lookup(self, id):
795 799 """locate a node based on:
796 800 - revision number or str(revision number)
797 801 - nodeid or subset of hex nodeid
798 802 """
799 803
800 804 n = self._match(id)
801 805 if n is not None:
802 806 return n
803 807 n = self._partialmatch(id)
804 808 if n:
805 809 return n
806 810
807 811 raise RevlogError(_("No match found"))
808 812
809 813 def cmp(self, node, text):
810 814 """compare text with a given file revision"""
811 815 p1, p2 = self.parents(node)
812 816 return hash(text, p1, p2) != node
813 817
814 818 def makenode(self, node, text):
815 819 """calculate a file nodeid for text, descended or possibly
816 820 unchanged from node"""
817 821
818 822 if self.cmp(node, text):
819 823 return hash(text, node, nullid)
820 824 return node
821 825
822 826 def diff(self, a, b):
823 827 """return a delta between two revisions"""
824 828 return mdiff.textdiff(a, b)
825 829
826 830 def patches(self, t, pl):
827 831 """apply a list of patches to a string"""
828 832 return mdiff.patches(t, pl)
829 833
830 834 def chunk(self, rev, df=None, cachelen=4096):
831 835 start, length = self.start(rev), self.length(rev)
832 836 inline = self.inlinedata()
833 837 if inline:
834 838 start += (rev + 1) * struct.calcsize(self.indexformat)
835 839 end = start + length
836 840 def loadcache(df):
837 841 cache_length = max(cachelen, length) # 4k
838 842 if not df:
839 843 if inline:
840 844 df = self.opener(self.indexfile)
841 845 else:
842 846 df = self.opener(self.datafile)
843 847 df.seek(start)
844 848 self.chunkcache = (start, df.read(cache_length))
845 849
846 850 if not self.chunkcache:
847 851 loadcache(df)
848 852
849 853 cache_start = self.chunkcache[0]
850 854 cache_end = cache_start + len(self.chunkcache[1])
851 855 if start >= cache_start and end <= cache_end:
852 856 # it is cached
853 857 offset = start - cache_start
854 858 else:
855 859 loadcache(df)
856 860 offset = 0
857 861
858 862 #def checkchunk():
859 863 # df = self.opener(self.datafile)
860 864 # df.seek(start)
861 865 # return df.read(length)
862 866 #assert s == checkchunk()
863 867 return decompress(self.chunkcache[1][offset:offset + length])
864 868
865 869 def delta(self, node):
866 870 """return or calculate a delta between a node and its predecessor"""
867 871 r = self.rev(node)
868 872 return self.revdiff(r - 1, r)
869 873
870 874 def revdiff(self, rev1, rev2):
871 875 """return or calculate a delta between two revisions"""
872 876 b1 = self.base(rev1)
873 877 b2 = self.base(rev2)
874 878 if b1 == b2 and rev1 + 1 == rev2:
875 879 return self.chunk(rev2)
876 880 else:
877 881 return self.diff(self.revision(self.node(rev1)),
878 882 self.revision(self.node(rev2)))
879 883
880 884 def revision(self, node):
881 885 """return an uncompressed revision of a given"""
882 886 if node == nullid: return ""
883 887 if self.cache and self.cache[0] == node: return self.cache[2]
884 888
885 889 # look up what we need to read
886 890 text = None
887 891 rev = self.rev(node)
888 892 base = self.base(rev)
889 893
890 894 if self.inlinedata():
891 895 # we probably have the whole chunk cached
892 896 df = None
893 897 else:
894 898 df = self.opener(self.datafile)
895 899
896 900 # do we have useful data cached?
897 901 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
898 902 base = self.cache[1]
899 903 text = self.cache[2]
900 904 self.loadindex(base, rev + 1)
901 905 else:
902 906 self.loadindex(base, rev + 1)
903 907 text = self.chunk(base, df=df)
904 908
905 909 bins = []
906 910 for r in xrange(base + 1, rev + 1):
907 911 bins.append(self.chunk(r, df=df))
908 912
909 913 text = self.patches(text, bins)
910 914
911 915 p1, p2 = self.parents(node)
912 916 if node != hash(text, p1, p2):
913 917 raise RevlogError(_("integrity check failed on %s:%d")
914 918 % (self.datafile, rev))
915 919
916 920 self.cache = (node, rev, text)
917 921 return text
918 922
919 923 def checkinlinesize(self, tr, fp=None):
920 924 if not self.inlinedata():
921 925 return
922 926 if not fp:
923 927 fp = self.opener(self.indexfile, 'r')
924 928 fp.seek(0, 2)
925 929 size = fp.tell()
926 930 if size < 131072:
927 931 return
928 932 trinfo = tr.find(self.indexfile)
929 933 if trinfo == None:
930 934 raise RevlogError(_("%s not found in the transaction" %
931 935 self.indexfile))
932 936
933 937 trindex = trinfo[2]
934 938 dataoff = self.start(trindex)
935 939
936 940 tr.add(self.datafile, dataoff)
937 941 df = self.opener(self.datafile, 'w')
938 942 calc = struct.calcsize(self.indexformat)
939 943 for r in xrange(self.count()):
940 944 start = self.start(r) + (r + 1) * calc
941 945 length = self.length(r)
942 946 fp.seek(start)
943 947 d = fp.read(length)
944 948 df.write(d)
945 949 fp.close()
946 950 df.close()
947 951 fp = self.opener(self.indexfile, 'w', atomictemp=True)
948 952 self.version &= ~(REVLOGNGINLINEDATA)
949 953 if self.count():
950 954 x = self.index[0]
951 955 e = struct.pack(self.indexformat, *x)[4:]
952 956 l = struct.pack(versionformat, self.version)
953 957 fp.write(l)
954 958 fp.write(e)
955 959
956 960 for i in xrange(1, self.count()):
957 961 x = self.index[i]
958 962 e = struct.pack(self.indexformat, *x)
959 963 fp.write(e)
960 964
961 965 # if we don't call rename, the temp file will never replace the
962 966 # real index
963 967 fp.rename()
964 968
965 969 tr.replace(self.indexfile, trindex * calc)
966 970 self.chunkcache = None
967 971
968 972 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
969 973 """add a revision to the log
970 974
971 975 text - the revision data to add
972 976 transaction - the transaction object used for rollback
973 977 link - the linkrev data to add
974 978 p1, p2 - the parent nodeids of the revision
975 979 d - an optional precomputed delta
976 980 """
977 981 if not self.inlinedata():
978 982 dfh = self.opener(self.datafile, "a")
979 983 else:
980 984 dfh = None
981 985 ifh = self.opener(self.indexfile, "a+")
982 986 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
983 987
984 988 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
985 989 if text is None: text = ""
986 990 if p1 is None: p1 = self.tip()
987 991 if p2 is None: p2 = nullid
988 992
989 993 node = hash(text, p1, p2)
990 994
991 995 if node in self.nodemap:
992 996 return node
993 997
994 998 n = self.count()
995 999 t = n - 1
996 1000
997 1001 if n:
998 1002 base = self.base(t)
999 1003 start = self.start(base)
1000 1004 end = self.end(t)
1001 1005 if not d:
1002 1006 prev = self.revision(self.tip())
1003 1007 d = self.diff(prev, text)
1004 1008 data = compress(d)
1005 1009 l = len(data[1]) + len(data[0])
1006 1010 dist = end - start + l
1007 1011
1008 1012 # full versions are inserted when the needed deltas
1009 1013 # become comparable to the uncompressed text
1010 1014 if not n or dist > len(text) * 2:
1011 1015 data = compress(text)
1012 1016 l = len(data[1]) + len(data[0])
1013 1017 base = n
1014 1018 else:
1015 1019 base = self.base(t)
1016 1020
1017 1021 offset = 0
1018 1022 if t >= 0:
1019 1023 offset = self.end(t)
1020 1024
1021 1025 if self.version == REVLOGV0:
1022 1026 e = (offset, l, base, link, p1, p2, node)
1023 1027 else:
1024 1028 e = (self.offset_type(offset, 0), l, len(text),
1025 1029 base, link, self.rev(p1), self.rev(p2), node)
1026 1030
1027 1031 self.index.append(e)
1028 1032 self.nodemap[node] = n
1029 1033 entry = struct.pack(self.indexformat, *e)
1030 1034
1031 1035 if not self.inlinedata():
1032 1036 transaction.add(self.datafile, offset)
1033 1037 transaction.add(self.indexfile, n * len(entry))
1034 1038 if data[0]:
1035 1039 dfh.write(data[0])
1036 1040 dfh.write(data[1])
1037 1041 dfh.flush()
1038 1042 else:
1039 1043 ifh.seek(0, 2)
1040 1044 transaction.add(self.indexfile, ifh.tell(), self.count() - 1)
1041 1045
1042 1046 if len(self.index) == 1 and self.version != REVLOGV0:
1043 1047 l = struct.pack(versionformat, self.version)
1044 1048 ifh.write(l)
1045 1049 entry = entry[4:]
1046 1050
1047 1051 ifh.write(entry)
1048 1052
1049 1053 if self.inlinedata():
1050 1054 ifh.write(data[0])
1051 1055 ifh.write(data[1])
1052 1056 self.checkinlinesize(transaction, ifh)
1053 1057
1054 1058 self.cache = (node, n, text)
1055 1059 return node
1056 1060
1057 1061 def ancestor(self, a, b):
1058 1062 """calculate the least common ancestor of nodes a and b"""
1059 1063
1060 1064 def parents(rev):
1061 1065 return [p for p in self.parentrevs(rev) if p != nullrev]
1062 1066
1063 1067 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1064 1068 if c is None:
1065 1069 return nullid
1066 1070
1067 1071 return self.node(c)
1068 1072
1069 1073 def group(self, nodelist, lookup, infocollect=None):
1070 1074 """calculate a delta group
1071 1075
1072 1076 Given a list of changeset revs, return a set of deltas and
1073 1077 metadata corresponding to nodes. the first delta is
1074 1078 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1075 1079 have this parent as it has all history before these
1076 1080 changesets. parent is parent[0]
1077 1081 """
1078 1082 revs = [self.rev(n) for n in nodelist]
1079 1083
1080 1084 # if we don't have any revisions touched by these changesets, bail
1081 1085 if not revs:
1082 1086 yield changegroup.closechunk()
1083 1087 return
1084 1088
1085 1089 # add the parent of the first rev
1086 1090 p = self.parents(self.node(revs[0]))[0]
1087 1091 revs.insert(0, self.rev(p))
1088 1092
1089 1093 # build deltas
1090 1094 for d in xrange(0, len(revs) - 1):
1091 1095 a, b = revs[d], revs[d + 1]
1092 1096 nb = self.node(b)
1093 1097
1094 1098 if infocollect is not None:
1095 1099 infocollect(nb)
1096 1100
1097 1101 d = self.revdiff(a, b)
1098 1102 p = self.parents(nb)
1099 1103 meta = nb + p[0] + p[1] + lookup(nb)
1100 1104 yield changegroup.genchunk("%s%s" % (meta, d))
1101 1105
1102 1106 yield changegroup.closechunk()
1103 1107
1104 1108 def addgroup(self, revs, linkmapper, transaction, unique=0):
1105 1109 """
1106 1110 add a delta group
1107 1111
1108 1112 given a set of deltas, add them to the revision log. the
1109 1113 first delta is against its parent, which should be in our
1110 1114 log, the rest are against the previous delta.
1111 1115 """
1112 1116
1113 1117 #track the base of the current delta log
1114 1118 r = self.count()
1115 1119 t = r - 1
1116 1120 node = None
1117 1121
1118 1122 base = prev = nullrev
1119 1123 start = end = textlen = 0
1120 1124 if r:
1121 1125 end = self.end(t)
1122 1126
1123 1127 ifh = self.opener(self.indexfile, "a+")
1124 1128 ifh.seek(0, 2)
1125 1129 transaction.add(self.indexfile, ifh.tell(), self.count())
1126 1130 if self.inlinedata():
1127 1131 dfh = None
1128 1132 else:
1129 1133 transaction.add(self.datafile, end)
1130 1134 dfh = self.opener(self.datafile, "a")
1131 1135
1132 1136 # loop through our set of deltas
1133 1137 chain = None
1134 1138 for chunk in revs:
1135 1139 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1136 1140 link = linkmapper(cs)
1137 1141 if node in self.nodemap:
1138 1142 # this can happen if two branches make the same change
1139 1143 # if unique:
1140 1144 # raise RevlogError(_("already have %s") % hex(node[:4]))
1141 1145 chain = node
1142 1146 continue
1143 1147 delta = chunk[80:]
1144 1148
1145 1149 for p in (p1, p2):
1146 1150 if not p in self.nodemap:
1147 1151 raise RevlogError(_("unknown parent %s") % short(p))
1148 1152
1149 1153 if not chain:
1150 1154 # retrieve the parent revision of the delta chain
1151 1155 chain = p1
1152 1156 if not chain in self.nodemap:
1153 1157 raise RevlogError(_("unknown base %s") % short(chain[:4]))
1154 1158
1155 1159 # full versions are inserted when the needed deltas become
1156 1160 # comparable to the uncompressed text or when the previous
1157 1161 # version is not the one we have a delta against. We use
1158 1162 # the size of the previous full rev as a proxy for the
1159 1163 # current size.
1160 1164
1161 1165 if chain == prev:
1162 1166 tempd = compress(delta)
1163 1167 cdelta = tempd[0] + tempd[1]
1164 1168 textlen = mdiff.patchedsize(textlen, delta)
1165 1169
1166 1170 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
1167 1171 # flush our writes here so we can read it in revision
1168 1172 if dfh:
1169 1173 dfh.flush()
1170 1174 ifh.flush()
1171 1175 text = self.revision(chain)
1172 1176 text = self.patches(text, [delta])
1173 1177 chk = self._addrevision(text, transaction, link, p1, p2, None,
1174 1178 ifh, dfh)
1175 1179 if not dfh and not self.inlinedata():
1176 1180 # addrevision switched from inline to conventional
1177 1181 # reopen the index
1178 1182 dfh = self.opener(self.datafile, "a")
1179 1183 ifh = self.opener(self.indexfile, "a")
1180 1184 if chk != node:
1181 1185 raise RevlogError(_("consistency error adding group"))
1182 1186 textlen = len(text)
1183 1187 else:
1184 1188 if self.version == REVLOGV0:
1185 1189 e = (end, len(cdelta), base, link, p1, p2, node)
1186 1190 else:
1187 1191 e = (self.offset_type(end, 0), len(cdelta), textlen, base,
1188 1192 link, self.rev(p1), self.rev(p2), node)
1189 1193 self.index.append(e)
1190 1194 self.nodemap[node] = r
1191 1195 if self.inlinedata():
1192 1196 ifh.write(struct.pack(self.indexformat, *e))
1193 1197 ifh.write(cdelta)
1194 1198 self.checkinlinesize(transaction, ifh)
1195 1199 if not self.inlinedata():
1196 1200 dfh = self.opener(self.datafile, "a")
1197 1201 ifh = self.opener(self.indexfile, "a")
1198 1202 else:
1199 1203 dfh.write(cdelta)
1200 1204 ifh.write(struct.pack(self.indexformat, *e))
1201 1205
1202 1206 t, r, chain, prev = r, r + 1, node, node
1203 1207 base = self.base(t)
1204 1208 start = self.start(base)
1205 1209 end = self.end(t)
1206 1210
1207 1211 return node
1208 1212
1209 1213 def strip(self, rev, minlink):
1210 1214 if self.count() == 0 or rev >= self.count():
1211 1215 return
1212 1216
1213 1217 if isinstance(self.index, lazyindex):
1214 1218 self.loadindexmap()
1215 1219
1216 1220 # When stripping away a revision, we need to make sure it
1217 1221 # does not actually belong to an older changeset.
1218 1222 # The minlink parameter defines the oldest revision
1219 1223 # we're allowed to strip away.
1220 1224 while minlink > self.index[rev][-4]:
1221 1225 rev += 1
1222 1226 if rev >= self.count():
1223 1227 return
1224 1228
1225 1229 # first truncate the files on disk
1226 1230 end = self.start(rev)
1227 1231 if not self.inlinedata():
1228 1232 df = self.opener(self.datafile, "a")
1229 1233 df.truncate(end)
1230 1234 end = rev * struct.calcsize(self.indexformat)
1231 1235 else:
1232 1236 end += rev * struct.calcsize(self.indexformat)
1233 1237
1234 1238 indexf = self.opener(self.indexfile, "a")
1235 1239 indexf.truncate(end)
1236 1240
1237 1241 # then reset internal state in memory to forget those revisions
1238 1242 self.cache = None
1239 1243 self.chunkcache = None
1240 1244 for x in xrange(rev, self.count()):
1241 1245 del self.nodemap[self.node(x)]
1242 1246
1243 1247 del self.index[rev:]
1244 1248
1245 1249 def checksize(self):
1246 1250 expected = 0
1247 1251 if self.count():
1248 1252 expected = self.end(self.count() - 1)
1249 1253
1250 1254 try:
1251 1255 f = self.opener(self.datafile)
1252 1256 f.seek(0, 2)
1253 1257 actual = f.tell()
1254 1258 dd = actual - expected
1255 1259 except IOError, inst:
1256 1260 if inst.errno != errno.ENOENT:
1257 1261 raise
1258 1262 dd = 0
1259 1263
1260 1264 try:
1261 1265 f = self.opener(self.indexfile)
1262 1266 f.seek(0, 2)
1263 1267 actual = f.tell()
1264 1268 s = struct.calcsize(self.indexformat)
1265 1269 i = actual / s
1266 1270 di = actual - (i * s)
1267 1271 if self.inlinedata():
1268 1272 databytes = 0
1269 1273 for r in xrange(self.count()):
1270 1274 databytes += self.length(r)
1271 1275 dd = 0
1272 1276 di = actual - self.count() * s - databytes
1273 1277 except IOError, inst:
1274 1278 if inst.errno != errno.ENOENT:
1275 1279 raise
1276 1280 di = 0
1277 1281
1278 1282 return (dd, di)
1279 1283
1280 1284
General Comments 0
You need to be logged in to leave comments. Login now