##// END OF EJS Templates
revlog: make lookup handle binary nodes
Matt Mackall -
r2561:494f7787 default
parent child Browse files
Show More
@@ -1,1284 +1,1286
1 1 """
2 2 revlog.py - storage back-end for mercurial
3 3
4 4 This provides efficient delta storage with O(1) retrieve and append
5 5 and O(changes) merge between branches
6 6
7 7 Copyright 2005 Matt Mackall <mpm@selenic.com>
8 8
9 9 This software may be used and distributed according to the terms
10 10 of the GNU General Public License, incorporated herein by reference.
11 11 """
12 12
13 13 from node import *
14 14 from i18n import gettext as _
15 15 from demandload import demandload
16 16 demandload(globals(), "binascii changegroup errno heapq mdiff os")
17 17 demandload(globals(), "sha struct util zlib")
18 18
19 19 # revlog version strings
20 20 REVLOGV0 = 0
21 21 REVLOGNG = 1
22 22
23 23 # revlog flags
24 24 REVLOGNGINLINEDATA = (1 << 16)
25 25 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
26 26
27 27 REVLOG_DEFAULT_FORMAT = REVLOGNG
28 28 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
29 29
30 30 def flagstr(flag):
31 31 if flag == "inline":
32 32 return REVLOGNGINLINEDATA
33 33 raise RevlogError(_("unknown revlog flag %s" % flag))
34 34
35 35 def hash(text, p1, p2):
36 36 """generate a hash from the given text and its parent hashes
37 37
38 38 This hash combines both the current file contents and its history
39 39 in a manner that makes it easy to distinguish nodes with the same
40 40 content in the revision graph.
41 41 """
42 42 l = [p1, p2]
43 43 l.sort()
44 44 s = sha.new(l[0])
45 45 s.update(l[1])
46 46 s.update(text)
47 47 return s.digest()
48 48
49 49 def compress(text):
50 50 """ generate a possibly-compressed representation of text """
51 51 if not text: return ("", text)
52 52 if len(text) < 44:
53 53 if text[0] == '\0': return ("", text)
54 54 return ('u', text)
55 55 bin = zlib.compress(text)
56 56 if len(bin) > len(text):
57 57 if text[0] == '\0': return ("", text)
58 58 return ('u', text)
59 59 return ("", bin)
60 60
61 61 def decompress(bin):
62 62 """ decompress the given input """
63 63 if not bin: return bin
64 64 t = bin[0]
65 65 if t == '\0': return bin
66 66 if t == 'x': return zlib.decompress(bin)
67 67 if t == 'u': return bin[1:]
68 68 raise RevlogError(_("unknown compression type %r") % t)
69 69
70 70 indexformatv0 = ">4l20s20s20s"
71 71 v0shaoffset = 56
72 72 # index ng:
73 73 # 6 bytes offset
74 74 # 2 bytes flags
75 75 # 4 bytes compressed length
76 76 # 4 bytes uncompressed length
77 77 # 4 bytes: base rev
78 78 # 4 bytes link rev
79 79 # 4 bytes parent 1 rev
80 80 # 4 bytes parent 2 rev
81 81 # 32 bytes: nodeid
82 82 indexformatng = ">Qiiiiii20s12x"
83 83 ngshaoffset = 32
84 84 versionformat = ">i"
85 85
86 86 class lazyparser(object):
87 87 """
88 88 this class avoids the need to parse the entirety of large indices
89 89 """
90 90
91 91 # lazyparser is not safe to use on windows if win32 extensions not
92 92 # available. it keeps file handle open, which make it not possible
93 93 # to break hardlinks on local cloned repos.
94 94 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
95 95 hasattr(util, 'win32api'))
96 96
97 97 def __init__(self, dataf, size, indexformat, shaoffset):
98 98 self.dataf = dataf
99 99 self.format = indexformat
100 100 self.s = struct.calcsize(indexformat)
101 101 self.indexformat = indexformat
102 102 self.datasize = size
103 103 self.l = size/self.s
104 104 self.index = [None] * self.l
105 105 self.map = {nullid: -1}
106 106 self.allmap = 0
107 107 self.all = 0
108 108 self.mapfind_count = 0
109 109 self.shaoffset = shaoffset
110 110
111 111 def loadmap(self):
112 112 """
113 113 during a commit, we need to make sure the rev being added is
114 114 not a duplicate. This requires loading the entire index,
115 115 which is fairly slow. loadmap can load up just the node map,
116 116 which takes much less time.
117 117 """
118 118 if self.allmap: return
119 119 start = 0
120 120 end = self.datasize
121 121 self.allmap = 1
122 122 cur = 0
123 123 count = 0
124 124 blocksize = self.s * 256
125 125 self.dataf.seek(0)
126 126 while cur < end:
127 127 data = self.dataf.read(blocksize)
128 128 off = 0
129 129 for x in xrange(256):
130 130 n = data[off + self.shaoffset:off + self.shaoffset + 20]
131 131 self.map[n] = count
132 132 count += 1
133 133 if count >= self.l:
134 134 break
135 135 off += self.s
136 136 cur += blocksize
137 137
138 138 def loadblock(self, blockstart, blocksize, data=None):
139 139 if self.all: return
140 140 if data is None:
141 141 self.dataf.seek(blockstart)
142 142 data = self.dataf.read(blocksize)
143 143 lend = len(data) / self.s
144 144 i = blockstart / self.s
145 145 off = 0
146 146 for x in xrange(lend):
147 147 if self.index[i + x] == None:
148 148 b = data[off : off + self.s]
149 149 self.index[i + x] = b
150 150 n = b[self.shaoffset:self.shaoffset + 20]
151 151 self.map[n] = i + x
152 152 off += self.s
153 153
154 154 def findnode(self, node):
155 155 """search backwards through the index file for a specific node"""
156 156 if self.allmap: return None
157 157
158 158 # hg log will cause many many searches for the manifest
159 159 # nodes. After we get called a few times, just load the whole
160 160 # thing.
161 161 if self.mapfind_count > 8:
162 162 self.loadmap()
163 163 if node in self.map:
164 164 return node
165 165 return None
166 166 self.mapfind_count += 1
167 167 last = self.l - 1
168 168 while self.index[last] != None:
169 169 if last == 0:
170 170 self.all = 1
171 171 self.allmap = 1
172 172 return None
173 173 last -= 1
174 174 end = (last + 1) * self.s
175 175 blocksize = self.s * 256
176 176 while end >= 0:
177 177 start = max(end - blocksize, 0)
178 178 self.dataf.seek(start)
179 179 data = self.dataf.read(end - start)
180 180 findend = end - start
181 181 while True:
182 182 # we're searching backwards, so weh have to make sure
183 183 # we don't find a changeset where this node is a parent
184 184 off = data.rfind(node, 0, findend)
185 185 findend = off
186 186 if off >= 0:
187 187 i = off / self.s
188 188 off = i * self.s
189 189 n = data[off + self.shaoffset:off + self.shaoffset + 20]
190 190 if n == node:
191 191 self.map[n] = i + start / self.s
192 192 return node
193 193 else:
194 194 break
195 195 end -= blocksize
196 196 return None
197 197
198 198 def loadindex(self, i=None, end=None):
199 199 if self.all: return
200 200 all = False
201 201 if i == None:
202 202 blockstart = 0
203 203 blocksize = (512 / self.s) * self.s
204 204 end = self.datasize
205 205 all = True
206 206 else:
207 207 if end:
208 208 blockstart = i * self.s
209 209 end = end * self.s
210 210 blocksize = end - blockstart
211 211 else:
212 212 blockstart = (i & ~(32)) * self.s
213 213 blocksize = self.s * 64
214 214 end = blockstart + blocksize
215 215 while blockstart < end:
216 216 self.loadblock(blockstart, blocksize)
217 217 blockstart += blocksize
218 218 if all: self.all = True
219 219
220 220 class lazyindex(object):
221 221 """a lazy version of the index array"""
222 222 def __init__(self, parser):
223 223 self.p = parser
224 224 def __len__(self):
225 225 return len(self.p.index)
226 226 def load(self, pos):
227 227 if pos < 0:
228 228 pos += len(self.p.index)
229 229 self.p.loadindex(pos)
230 230 return self.p.index[pos]
231 231 def __getitem__(self, pos):
232 232 ret = self.p.index[pos] or self.load(pos)
233 233 if isinstance(ret, str):
234 234 ret = struct.unpack(self.p.indexformat, ret)
235 235 return ret
236 236 def __setitem__(self, pos, item):
237 237 self.p.index[pos] = item
238 238 def __delitem__(self, pos):
239 239 del self.p.index[pos]
240 240 def append(self, e):
241 241 self.p.index.append(e)
242 242
243 243 class lazymap(object):
244 244 """a lazy version of the node map"""
245 245 def __init__(self, parser):
246 246 self.p = parser
247 247 def load(self, key):
248 248 n = self.p.findnode(key)
249 249 if n == None:
250 250 raise KeyError(key)
251 251 def __contains__(self, key):
252 252 if key in self.p.map:
253 253 return True
254 254 self.p.loadmap()
255 255 return key in self.p.map
256 256 def __iter__(self):
257 257 yield nullid
258 258 for i in xrange(self.p.l):
259 259 ret = self.p.index[i]
260 260 if not ret:
261 261 self.p.loadindex(i)
262 262 ret = self.p.index[i]
263 263 if isinstance(ret, str):
264 264 ret = struct.unpack(self.p.indexformat, ret)
265 265 yield ret[-1]
266 266 def __getitem__(self, key):
267 267 try:
268 268 return self.p.map[key]
269 269 except KeyError:
270 270 try:
271 271 self.load(key)
272 272 return self.p.map[key]
273 273 except KeyError:
274 274 raise KeyError("node " + hex(key))
275 275 def __setitem__(self, key, val):
276 276 self.p.map[key] = val
277 277 def __delitem__(self, key):
278 278 del self.p.map[key]
279 279
280 280 class RevlogError(Exception): pass
281 281
282 282 class revlog(object):
283 283 """
284 284 the underlying revision storage object
285 285
286 286 A revlog consists of two parts, an index and the revision data.
287 287
288 288 The index is a file with a fixed record size containing
289 289 information on each revision, includings its nodeid (hash), the
290 290 nodeids of its parents, the position and offset of its data within
291 291 the data file, and the revision it's based on. Finally, each entry
292 292 contains a linkrev entry that can serve as a pointer to external
293 293 data.
294 294
295 295 The revision data itself is a linear collection of data chunks.
296 296 Each chunk represents a revision and is usually represented as a
297 297 delta against the previous chunk. To bound lookup time, runs of
298 298 deltas are limited to about 2 times the length of the original
299 299 version data. This makes retrieval of a version proportional to
300 300 its size, or O(1) relative to the number of revisions.
301 301
302 302 Both pieces of the revlog are written to in an append-only
303 303 fashion, which means we never need to rewrite a file to insert or
304 304 remove data, and can use some simple techniques to avoid the need
305 305 for locking while reading.
306 306 """
307 307 def __init__(self, opener, indexfile, datafile,
308 308 defversion=REVLOG_DEFAULT_VERSION):
309 309 """
310 310 create a revlog object
311 311
312 312 opener is a function that abstracts the file opening operation
313 313 and can be used to implement COW semantics or the like.
314 314 """
315 315 self.indexfile = indexfile
316 316 self.datafile = datafile
317 317 self.opener = opener
318 318
319 319 self.indexstat = None
320 320 self.cache = None
321 321 self.chunkcache = None
322 322 self.defversion = defversion
323 323 self.load()
324 324
325 325 def load(self):
326 326 v = self.defversion
327 327 try:
328 328 f = self.opener(self.indexfile)
329 329 i = f.read(4)
330 330 f.seek(0)
331 331 except IOError, inst:
332 332 if inst.errno != errno.ENOENT:
333 333 raise
334 334 i = ""
335 335 else:
336 336 try:
337 337 st = util.fstat(f)
338 338 except AttributeError, inst:
339 339 st = None
340 340 else:
341 341 oldst = self.indexstat
342 342 if (oldst and st.st_dev == oldst.st_dev
343 343 and st.st_ino == oldst.st_ino
344 344 and st.st_mtime == oldst.st_mtime
345 345 and st.st_ctime == oldst.st_ctime):
346 346 return
347 347 self.indexstat = st
348 348 if len(i) > 0:
349 349 v = struct.unpack(versionformat, i)[0]
350 350 flags = v & ~0xFFFF
351 351 fmt = v & 0xFFFF
352 352 if fmt == REVLOGV0:
353 353 if flags:
354 354 raise RevlogError(_("index %s invalid flags %x for format v0" %
355 355 (self.indexfile, flags)))
356 356 elif fmt == REVLOGNG:
357 357 if flags & ~REVLOGNGINLINEDATA:
358 358 raise RevlogError(_("index %s invalid flags %x for revlogng" %
359 359 (self.indexfile, flags)))
360 360 else:
361 361 raise RevlogError(_("index %s invalid format %d" %
362 362 (self.indexfile, fmt)))
363 363 self.version = v
364 364 if v == REVLOGV0:
365 365 self.indexformat = indexformatv0
366 366 shaoffset = v0shaoffset
367 367 else:
368 368 self.indexformat = indexformatng
369 369 shaoffset = ngshaoffset
370 370
371 371 if i:
372 372 if (lazyparser.safe_to_use and not self.inlinedata() and
373 373 st and st.st_size > 10000):
374 374 # big index, let's parse it on demand
375 375 parser = lazyparser(f, st.st_size, self.indexformat, shaoffset)
376 376 self.index = lazyindex(parser)
377 377 self.nodemap = lazymap(parser)
378 378 else:
379 379 self.parseindex(f, st)
380 380 if self.version != REVLOGV0:
381 381 e = list(self.index[0])
382 382 type = self.ngtype(e[0])
383 383 e[0] = self.offset_type(0, type)
384 384 self.index[0] = e
385 385 else:
386 386 self.nodemap = { nullid: -1}
387 387 self.index = []
388 388
389 389
390 390 def parseindex(self, fp, st):
391 391 s = struct.calcsize(self.indexformat)
392 392 self.index = []
393 393 self.nodemap = {nullid: -1}
394 394 inline = self.inlinedata()
395 395 n = 0
396 396 leftover = None
397 397 while True:
398 398 if st:
399 399 data = fp.read(65536)
400 400 else:
401 401 # hack for httprangereader, it doesn't do partial reads well
402 402 data = fp.read()
403 403 if not data:
404 404 break
405 405 if n == 0 and self.inlinedata():
406 406 # cache the first chunk
407 407 self.chunkcache = (0, data)
408 408 if leftover:
409 409 data = leftover + data
410 410 leftover = None
411 411 off = 0
412 412 l = len(data)
413 413 while off < l:
414 414 if l - off < s:
415 415 leftover = data[off:]
416 416 break
417 417 cur = data[off:off + s]
418 418 off += s
419 419 e = struct.unpack(self.indexformat, cur)
420 420 self.index.append(e)
421 421 self.nodemap[e[-1]] = n
422 422 n += 1
423 423 if inline:
424 424 off += e[1]
425 425 if off > l:
426 426 # some things don't seek well, just read it
427 427 fp.read(off - l)
428 428 if not st:
429 429 break
430 430
431 431
432 432 def ngoffset(self, q):
433 433 if q & 0xFFFF:
434 434 raise RevlogError(_('%s: incompatible revision flag %x') %
435 435 (self.indexfile, q))
436 436 return long(q >> 16)
437 437
438 438 def ngtype(self, q):
439 439 return int(q & 0xFFFF)
440 440
441 441 def offset_type(self, offset, type):
442 442 return long(long(offset) << 16 | type)
443 443
444 444 def loadindex(self, start, end):
445 445 """load a block of indexes all at once from the lazy parser"""
446 446 if isinstance(self.index, lazyindex):
447 447 self.index.p.loadindex(start, end)
448 448
449 449 def loadindexmap(self):
450 450 """loads both the map and the index from the lazy parser"""
451 451 if isinstance(self.index, lazyindex):
452 452 p = self.index.p
453 453 p.loadindex()
454 454 self.nodemap = p.map
455 455
456 456 def loadmap(self):
457 457 """loads the map from the lazy parser"""
458 458 if isinstance(self.nodemap, lazymap):
459 459 self.nodemap.p.loadmap()
460 460 self.nodemap = self.nodemap.p.map
461 461
462 462 def inlinedata(self): return self.version & REVLOGNGINLINEDATA
463 463 def tip(self): return self.node(len(self.index) - 1)
464 464 def count(self): return len(self.index)
465 465 def node(self, rev):
466 466 return (rev < 0) and nullid or self.index[rev][-1]
467 467 def rev(self, node):
468 468 try:
469 469 return self.nodemap[node]
470 470 except KeyError:
471 471 raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))
472 472 def linkrev(self, node): return self.index[self.rev(node)][-4]
473 473 def parents(self, node):
474 474 if node == nullid: return (nullid, nullid)
475 475 r = self.rev(node)
476 476 d = self.index[r][-3:-1]
477 477 if self.version == REVLOGV0:
478 478 return d
479 479 return [ self.node(x) for x in d ]
480 480 def parentrevs(self, rev):
481 481 if rev == -1:
482 482 return (-1, -1)
483 483 d = self.index[rev][-3:-1]
484 484 if self.version == REVLOGV0:
485 485 return [ self.rev(x) for x in d ]
486 486 return d
487 487 def start(self, rev):
488 488 if rev < 0:
489 489 return -1
490 490 if self.version != REVLOGV0:
491 491 return self.ngoffset(self.index[rev][0])
492 492 return self.index[rev][0]
493 493
494 494 def end(self, rev): return self.start(rev) + self.length(rev)
495 495
496 496 def size(self, rev):
497 497 """return the length of the uncompressed text for a given revision"""
498 498 l = -1
499 499 if self.version != REVLOGV0:
500 500 l = self.index[rev][2]
501 501 if l >= 0:
502 502 return l
503 503
504 504 t = self.revision(self.node(rev))
505 505 return len(t)
506 506
507 507 # alternate implementation, The advantage to this code is it
508 508 # will be faster for a single revision. But, the results are not
509 509 # cached, so finding the size of every revision will be slower.
510 510 """
511 511 if self.cache and self.cache[1] == rev:
512 512 return len(self.cache[2])
513 513
514 514 base = self.base(rev)
515 515 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
516 516 base = self.cache[1]
517 517 text = self.cache[2]
518 518 else:
519 519 text = self.revision(self.node(base))
520 520
521 521 l = len(text)
522 522 for x in xrange(base + 1, rev + 1):
523 523 l = mdiff.patchedsize(l, self.chunk(x))
524 524 return l
525 525 """
526 526
527 527 def length(self, rev):
528 528 if rev < 0:
529 529 return 0
530 530 else:
531 531 return self.index[rev][1]
532 532 def base(self, rev): return (rev < 0) and rev or self.index[rev][-5]
533 533
534 534 def reachable(self, rev, stop=None):
535 535 reachable = {}
536 536 visit = [rev]
537 537 reachable[rev] = 1
538 538 if stop:
539 539 stopn = self.rev(stop)
540 540 else:
541 541 stopn = 0
542 542 while visit:
543 543 n = visit.pop(0)
544 544 if n == stop:
545 545 continue
546 546 if n == nullid:
547 547 continue
548 548 for p in self.parents(n):
549 549 if self.rev(p) < stopn:
550 550 continue
551 551 if p not in reachable:
552 552 reachable[p] = 1
553 553 visit.append(p)
554 554 return reachable
555 555
556 556 def nodesbetween(self, roots=None, heads=None):
557 557 """Return a tuple containing three elements. Elements 1 and 2 contain
558 558 a final list bases and heads after all the unreachable ones have been
559 559 pruned. Element 0 contains a topologically sorted list of all
560 560
561 561 nodes that satisfy these constraints:
562 562 1. All nodes must be descended from a node in roots (the nodes on
563 563 roots are considered descended from themselves).
564 564 2. All nodes must also be ancestors of a node in heads (the nodes in
565 565 heads are considered to be their own ancestors).
566 566
567 567 If roots is unspecified, nullid is assumed as the only root.
568 568 If heads is unspecified, it is taken to be the output of the
569 569 heads method (i.e. a list of all nodes in the repository that
570 570 have no children)."""
571 571 nonodes = ([], [], [])
572 572 if roots is not None:
573 573 roots = list(roots)
574 574 if not roots:
575 575 return nonodes
576 576 lowestrev = min([self.rev(n) for n in roots])
577 577 else:
578 578 roots = [nullid] # Everybody's a descendent of nullid
579 579 lowestrev = -1
580 580 if (lowestrev == -1) and (heads is None):
581 581 # We want _all_ the nodes!
582 582 return ([self.node(r) for r in xrange(0, self.count())],
583 583 [nullid], list(self.heads()))
584 584 if heads is None:
585 585 # All nodes are ancestors, so the latest ancestor is the last
586 586 # node.
587 587 highestrev = self.count() - 1
588 588 # Set ancestors to None to signal that every node is an ancestor.
589 589 ancestors = None
590 590 # Set heads to an empty dictionary for later discovery of heads
591 591 heads = {}
592 592 else:
593 593 heads = list(heads)
594 594 if not heads:
595 595 return nonodes
596 596 ancestors = {}
597 597 # Start at the top and keep marking parents until we're done.
598 598 nodestotag = heads[:]
599 599 # Turn heads into a dictionary so we can remove 'fake' heads.
600 600 # Also, later we will be using it to filter out the heads we can't
601 601 # find from roots.
602 602 heads = dict.fromkeys(heads, 0)
603 603 # Remember where the top was so we can use it as a limit later.
604 604 highestrev = max([self.rev(n) for n in nodestotag])
605 605 while nodestotag:
606 606 # grab a node to tag
607 607 n = nodestotag.pop()
608 608 # Never tag nullid
609 609 if n == nullid:
610 610 continue
611 611 # A node's revision number represents its place in a
612 612 # topologically sorted list of nodes.
613 613 r = self.rev(n)
614 614 if r >= lowestrev:
615 615 if n not in ancestors:
616 616 # If we are possibly a descendent of one of the roots
617 617 # and we haven't already been marked as an ancestor
618 618 ancestors[n] = 1 # Mark as ancestor
619 619 # Add non-nullid parents to list of nodes to tag.
620 620 nodestotag.extend([p for p in self.parents(n) if
621 621 p != nullid])
622 622 elif n in heads: # We've seen it before, is it a fake head?
623 623 # So it is, real heads should not be the ancestors of
624 624 # any other heads.
625 625 heads.pop(n)
626 626 if not ancestors:
627 627 return nonodes
628 628 # Now that we have our set of ancestors, we want to remove any
629 629 # roots that are not ancestors.
630 630
631 631 # If one of the roots was nullid, everything is included anyway.
632 632 if lowestrev > -1:
633 633 # But, since we weren't, let's recompute the lowest rev to not
634 634 # include roots that aren't ancestors.
635 635
636 636 # Filter out roots that aren't ancestors of heads
637 637 roots = [n for n in roots if n in ancestors]
638 638 # Recompute the lowest revision
639 639 if roots:
640 640 lowestrev = min([self.rev(n) for n in roots])
641 641 else:
642 642 # No more roots? Return empty list
643 643 return nonodes
644 644 else:
645 645 # We are descending from nullid, and don't need to care about
646 646 # any other roots.
647 647 lowestrev = -1
648 648 roots = [nullid]
649 649 # Transform our roots list into a 'set' (i.e. a dictionary where the
650 650 # values don't matter.
651 651 descendents = dict.fromkeys(roots, 1)
652 652 # Also, keep the original roots so we can filter out roots that aren't
653 653 # 'real' roots (i.e. are descended from other roots).
654 654 roots = descendents.copy()
655 655 # Our topologically sorted list of output nodes.
656 656 orderedout = []
657 657 # Don't start at nullid since we don't want nullid in our output list,
658 658 # and if nullid shows up in descedents, empty parents will look like
659 659 # they're descendents.
660 660 for r in xrange(max(lowestrev, 0), highestrev + 1):
661 661 n = self.node(r)
662 662 isdescendent = False
663 663 if lowestrev == -1: # Everybody is a descendent of nullid
664 664 isdescendent = True
665 665 elif n in descendents:
666 666 # n is already a descendent
667 667 isdescendent = True
668 668 # This check only needs to be done here because all the roots
669 669 # will start being marked is descendents before the loop.
670 670 if n in roots:
671 671 # If n was a root, check if it's a 'real' root.
672 672 p = tuple(self.parents(n))
673 673 # If any of its parents are descendents, it's not a root.
674 674 if (p[0] in descendents) or (p[1] in descendents):
675 675 roots.pop(n)
676 676 else:
677 677 p = tuple(self.parents(n))
678 678 # A node is a descendent if either of its parents are
679 679 # descendents. (We seeded the dependents list with the roots
680 680 # up there, remember?)
681 681 if (p[0] in descendents) or (p[1] in descendents):
682 682 descendents[n] = 1
683 683 isdescendent = True
684 684 if isdescendent and ((ancestors is None) or (n in ancestors)):
685 685 # Only include nodes that are both descendents and ancestors.
686 686 orderedout.append(n)
687 687 if (ancestors is not None) and (n in heads):
688 688 # We're trying to figure out which heads are reachable
689 689 # from roots.
690 690 # Mark this head as having been reached
691 691 heads[n] = 1
692 692 elif ancestors is None:
693 693 # Otherwise, we're trying to discover the heads.
694 694 # Assume this is a head because if it isn't, the next step
695 695 # will eventually remove it.
696 696 heads[n] = 1
697 697 # But, obviously its parents aren't.
698 698 for p in self.parents(n):
699 699 heads.pop(p, None)
700 700 heads = [n for n in heads.iterkeys() if heads[n] != 0]
701 701 roots = roots.keys()
702 702 assert orderedout
703 703 assert roots
704 704 assert heads
705 705 return (orderedout, roots, heads)
706 706
707 707 def heads(self, start=None):
708 708 """return the list of all nodes that have no children
709 709
710 710 if start is specified, only heads that are descendants of
711 711 start will be returned
712 712
713 713 """
714 714 if start is None:
715 715 start = nullid
716 716 startrev = self.rev(start)
717 717 reachable = {startrev: 1}
718 718 heads = {startrev: 1}
719 719
720 720 parentrevs = self.parentrevs
721 721 for r in xrange(startrev + 1, self.count()):
722 722 for p in parentrevs(r):
723 723 if p in reachable:
724 724 reachable[r] = 1
725 725 heads[r] = 1
726 726 if p in heads:
727 727 del heads[p]
728 728 return [self.node(r) for r in heads]
729 729
730 730 def children(self, node):
731 731 """find the children of a given node"""
732 732 c = []
733 733 p = self.rev(node)
734 734 for r in range(p + 1, self.count()):
735 735 n = self.node(r)
736 736 for pn in self.parents(n):
737 737 if pn == node:
738 738 c.append(n)
739 739 continue
740 740 elif pn == nullid:
741 741 continue
742 742 return c
743 743
744 744 def lookup(self, id):
745 745 """locate a node based on revision number or subset of hex nodeid"""
746 if id in self.nodemap:
747 return id
746 748 if type(id) == type(0):
747 749 rev = id
748 750 if rev < 0: rev = self.count() + rev
749 751 if rev < 0 or rev >= self.count(): return None
750 752 return self.node(rev)
751 753 try:
752 754 rev = int(id)
753 755 if str(rev) != id: raise ValueError
754 756 if rev < 0: rev = self.count() + rev
755 757 if rev < 0 or rev >= self.count(): raise ValueError
756 758 return self.node(rev)
757 759 except (ValueError, OverflowError):
758 760 c = []
759 761 for n in self.nodemap:
760 762 if hex(n).startswith(id):
761 763 c.append(n)
762 764 if len(c) > 1: raise RevlogError(_("Ambiguous identifier"))
763 765 if len(c) < 1: raise RevlogError(_("No match found"))
764 766 return c[0]
765 767
766 768 return None
767 769
768 770 def diff(self, a, b):
769 771 """return a delta between two revisions"""
770 772 return mdiff.textdiff(a, b)
771 773
772 774 def patches(self, t, pl):
773 775 """apply a list of patches to a string"""
774 776 return mdiff.patches(t, pl)
775 777
776 778 def chunk(self, rev, df=None, cachelen=4096):
777 779 start, length = self.start(rev), self.length(rev)
778 780 inline = self.inlinedata()
779 781 if inline:
780 782 start += (rev + 1) * struct.calcsize(self.indexformat)
781 783 end = start + length
782 784 def loadcache(df):
783 785 cache_length = max(cachelen, length) # 4k
784 786 if not df:
785 787 if inline:
786 788 df = self.opener(self.indexfile)
787 789 else:
788 790 df = self.opener(self.datafile)
789 791 df.seek(start)
790 792 self.chunkcache = (start, df.read(cache_length))
791 793
792 794 if not self.chunkcache:
793 795 loadcache(df)
794 796
795 797 cache_start = self.chunkcache[0]
796 798 cache_end = cache_start + len(self.chunkcache[1])
797 799 if start >= cache_start and end <= cache_end:
798 800 # it is cached
799 801 offset = start - cache_start
800 802 else:
801 803 loadcache(df)
802 804 offset = 0
803 805
804 806 #def checkchunk():
805 807 # df = self.opener(self.datafile)
806 808 # df.seek(start)
807 809 # return df.read(length)
808 810 #assert s == checkchunk()
809 811 return decompress(self.chunkcache[1][offset:offset + length])
810 812
811 813 def delta(self, node):
812 814 """return or calculate a delta between a node and its predecessor"""
813 815 r = self.rev(node)
814 816 return self.revdiff(r - 1, r)
815 817
816 818 def revdiff(self, rev1, rev2):
817 819 """return or calculate a delta between two revisions"""
818 820 b1 = self.base(rev1)
819 821 b2 = self.base(rev2)
820 822 if b1 == b2 and rev1 + 1 == rev2:
821 823 return self.chunk(rev2)
822 824 else:
823 825 return self.diff(self.revision(self.node(rev1)),
824 826 self.revision(self.node(rev2)))
825 827
826 828 def revision(self, node):
827 829 """return an uncompressed revision of a given"""
828 830 if node == nullid: return ""
829 831 if self.cache and self.cache[0] == node: return self.cache[2]
830 832
831 833 # look up what we need to read
832 834 text = None
833 835 rev = self.rev(node)
834 836 base = self.base(rev)
835 837
836 838 if self.inlinedata():
837 839 # we probably have the whole chunk cached
838 840 df = None
839 841 else:
840 842 df = self.opener(self.datafile)
841 843
842 844 # do we have useful data cached?
843 845 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
844 846 base = self.cache[1]
845 847 text = self.cache[2]
846 848 self.loadindex(base, rev + 1)
847 849 else:
848 850 self.loadindex(base, rev + 1)
849 851 text = self.chunk(base, df=df)
850 852
851 853 bins = []
852 854 for r in xrange(base + 1, rev + 1):
853 855 bins.append(self.chunk(r, df=df))
854 856
855 857 text = self.patches(text, bins)
856 858
857 859 p1, p2 = self.parents(node)
858 860 if node != hash(text, p1, p2):
859 861 raise RevlogError(_("integrity check failed on %s:%d")
860 862 % (self.datafile, rev))
861 863
862 864 self.cache = (node, rev, text)
863 865 return text
864 866
865 867 def checkinlinesize(self, tr, fp=None):
866 868 if not self.inlinedata():
867 869 return
868 870 if not fp:
869 871 fp = self.opener(self.indexfile, 'r')
870 872 fp.seek(0, 2)
871 873 size = fp.tell()
872 874 if size < 131072:
873 875 return
874 876 trinfo = tr.find(self.indexfile)
875 877 if trinfo == None:
876 878 raise RevlogError(_("%s not found in the transaction" %
877 879 self.indexfile))
878 880
879 881 trindex = trinfo[2]
880 882 dataoff = self.start(trindex)
881 883
882 884 tr.add(self.datafile, dataoff)
883 885 df = self.opener(self.datafile, 'w')
884 886 calc = struct.calcsize(self.indexformat)
885 887 for r in xrange(self.count()):
886 888 start = self.start(r) + (r + 1) * calc
887 889 length = self.length(r)
888 890 fp.seek(start)
889 891 d = fp.read(length)
890 892 df.write(d)
891 893 fp.close()
892 894 df.close()
893 895 fp = self.opener(self.indexfile, 'w', atomictemp=True)
894 896 self.version &= ~(REVLOGNGINLINEDATA)
895 897 if self.count():
896 898 x = self.index[0]
897 899 e = struct.pack(self.indexformat, *x)[4:]
898 900 l = struct.pack(versionformat, self.version)
899 901 fp.write(l)
900 902 fp.write(e)
901 903
902 904 for i in xrange(1, self.count()):
903 905 x = self.index[i]
904 906 e = struct.pack(self.indexformat, *x)
905 907 fp.write(e)
906 908
907 909 # if we don't call rename, the temp file will never replace the
908 910 # real index
909 911 fp.rename()
910 912
911 913 tr.replace(self.indexfile, trindex * calc)
912 914 self.chunkcache = None
913 915
914 916 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
915 917 """add a revision to the log
916 918
917 919 text - the revision data to add
918 920 transaction - the transaction object used for rollback
919 921 link - the linkrev data to add
920 922 p1, p2 - the parent nodeids of the revision
921 923 d - an optional precomputed delta
922 924 """
923 925 if text is None: text = ""
924 926 if p1 is None: p1 = self.tip()
925 927 if p2 is None: p2 = nullid
926 928
927 929 node = hash(text, p1, p2)
928 930
929 931 if node in self.nodemap:
930 932 return node
931 933
932 934 n = self.count()
933 935 t = n - 1
934 936
935 937 if n:
936 938 base = self.base(t)
937 939 start = self.start(base)
938 940 end = self.end(t)
939 941 if not d:
940 942 prev = self.revision(self.tip())
941 943 d = self.diff(prev, str(text))
942 944 data = compress(d)
943 945 l = len(data[1]) + len(data[0])
944 946 dist = end - start + l
945 947
946 948 # full versions are inserted when the needed deltas
947 949 # become comparable to the uncompressed text
948 950 if not n or dist > len(text) * 2:
949 951 data = compress(text)
950 952 l = len(data[1]) + len(data[0])
951 953 base = n
952 954 else:
953 955 base = self.base(t)
954 956
955 957 offset = 0
956 958 if t >= 0:
957 959 offset = self.end(t)
958 960
959 961 if self.version == REVLOGV0:
960 962 e = (offset, l, base, link, p1, p2, node)
961 963 else:
962 964 e = (self.offset_type(offset, 0), l, len(text),
963 965 base, link, self.rev(p1), self.rev(p2), node)
964 966
965 967 self.index.append(e)
966 968 self.nodemap[node] = n
967 969 entry = struct.pack(self.indexformat, *e)
968 970
969 971 if not self.inlinedata():
970 972 transaction.add(self.datafile, offset)
971 973 transaction.add(self.indexfile, n * len(entry))
972 974 f = self.opener(self.datafile, "a")
973 975 if data[0]:
974 976 f.write(data[0])
975 977 f.write(data[1])
976 978 f.close()
977 979 f = self.opener(self.indexfile, "a")
978 980 else:
979 981 f = self.opener(self.indexfile, "a+")
980 982 f.seek(0, 2)
981 983 transaction.add(self.indexfile, f.tell(), self.count() - 1)
982 984
983 985 if len(self.index) == 1 and self.version != REVLOGV0:
984 986 l = struct.pack(versionformat, self.version)
985 987 f.write(l)
986 988 entry = entry[4:]
987 989
988 990 f.write(entry)
989 991
990 992 if self.inlinedata():
991 993 f.write(data[0])
992 994 f.write(data[1])
993 995 self.checkinlinesize(transaction, f)
994 996
995 997 self.cache = (node, n, text)
996 998 return node
997 999
998 1000 def ancestor(self, a, b):
999 1001 """calculate the least common ancestor of nodes a and b"""
1000 1002
1001 1003 # start with some short cuts for the linear cases
1002 1004 if a == b:
1003 1005 return a
1004 1006 ra = self.rev(a)
1005 1007 rb = self.rev(b)
1006 1008 if ra < rb:
1007 1009 last = b
1008 1010 first = a
1009 1011 else:
1010 1012 last = a
1011 1013 first = b
1012 1014
1013 1015 # reachable won't include stop in the list, so we have to use a parent
1014 1016 reachable = self.reachable(last, stop=self.parents(first)[0])
1015 1017 if first in reachable:
1016 1018 return first
1017 1019
1018 1020 # calculate the distance of every node from root
1019 1021 dist = {nullid: 0}
1020 1022 for i in xrange(self.count()):
1021 1023 n = self.node(i)
1022 1024 p1, p2 = self.parents(n)
1023 1025 dist[n] = max(dist[p1], dist[p2]) + 1
1024 1026
1025 1027 # traverse ancestors in order of decreasing distance from root
1026 1028 def ancestors(node):
1027 1029 # we store negative distances because heap returns smallest member
1028 1030 h = [(-dist[node], node)]
1029 1031 seen = {}
1030 1032 while h:
1031 1033 d, n = heapq.heappop(h)
1032 1034 if n not in seen:
1033 1035 seen[n] = 1
1034 1036 yield (-d, n)
1035 1037 for p in self.parents(n):
1036 1038 heapq.heappush(h, (-dist[p], p))
1037 1039
1038 1040 def generations(node):
1039 1041 sg, s = None, {}
1040 1042 for g,n in ancestors(node):
1041 1043 if g != sg:
1042 1044 if sg:
1043 1045 yield sg, s
1044 1046 sg, s = g, {n:1}
1045 1047 else:
1046 1048 s[n] = 1
1047 1049 yield sg, s
1048 1050
1049 1051 x = generations(a)
1050 1052 y = generations(b)
1051 1053 gx = x.next()
1052 1054 gy = y.next()
1053 1055
1054 1056 # increment each ancestor list until it is closer to root than
1055 1057 # the other, or they match
1056 1058 while 1:
1057 1059 #print "ancestor gen %s %s" % (gx[0], gy[0])
1058 1060 if gx[0] == gy[0]:
1059 1061 # find the intersection
1060 1062 i = [ n for n in gx[1] if n in gy[1] ]
1061 1063 if i:
1062 1064 return i[0]
1063 1065 else:
1064 1066 #print "next"
1065 1067 gy = y.next()
1066 1068 gx = x.next()
1067 1069 elif gx[0] < gy[0]:
1068 1070 #print "next y"
1069 1071 gy = y.next()
1070 1072 else:
1071 1073 #print "next x"
1072 1074 gx = x.next()
1073 1075
1074 1076 def group(self, nodelist, lookup, infocollect=None):
1075 1077 """calculate a delta group
1076 1078
1077 1079 Given a list of changeset revs, return a set of deltas and
1078 1080 metadata corresponding to nodes. the first delta is
1079 1081 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1080 1082 have this parent as it has all history before these
1081 1083 changesets. parent is parent[0]
1082 1084 """
1083 1085 revs = [self.rev(n) for n in nodelist]
1084 1086
1085 1087 # if we don't have any revisions touched by these changesets, bail
1086 1088 if not revs:
1087 1089 yield changegroup.closechunk()
1088 1090 return
1089 1091
1090 1092 # add the parent of the first rev
1091 1093 p = self.parents(self.node(revs[0]))[0]
1092 1094 revs.insert(0, self.rev(p))
1093 1095
1094 1096 # build deltas
1095 1097 for d in xrange(0, len(revs) - 1):
1096 1098 a, b = revs[d], revs[d + 1]
1097 1099 nb = self.node(b)
1098 1100
1099 1101 if infocollect is not None:
1100 1102 infocollect(nb)
1101 1103
1102 1104 d = self.revdiff(a, b)
1103 1105 p = self.parents(nb)
1104 1106 meta = nb + p[0] + p[1] + lookup(nb)
1105 1107 yield changegroup.genchunk("%s%s" % (meta, d))
1106 1108
1107 1109 yield changegroup.closechunk()
1108 1110
1109 1111 def addgroup(self, revs, linkmapper, transaction, unique=0):
1110 1112 """
1111 1113 add a delta group
1112 1114
1113 1115 given a set of deltas, add them to the revision log. the
1114 1116 first delta is against its parent, which should be in our
1115 1117 log, the rest are against the previous delta.
1116 1118 """
1117 1119
1118 1120 #track the base of the current delta log
1119 1121 r = self.count()
1120 1122 t = r - 1
1121 1123 node = None
1122 1124
1123 1125 base = prev = -1
1124 1126 start = end = textlen = 0
1125 1127 if r:
1126 1128 end = self.end(t)
1127 1129
1128 1130 ifh = self.opener(self.indexfile, "a+")
1129 1131 ifh.seek(0, 2)
1130 1132 transaction.add(self.indexfile, ifh.tell(), self.count())
1131 1133 if self.inlinedata():
1132 1134 dfh = None
1133 1135 else:
1134 1136 transaction.add(self.datafile, end)
1135 1137 dfh = self.opener(self.datafile, "a")
1136 1138
1137 1139 # loop through our set of deltas
1138 1140 chain = None
1139 1141 for chunk in revs:
1140 1142 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1141 1143 link = linkmapper(cs)
1142 1144 if node in self.nodemap:
1143 1145 # this can happen if two branches make the same change
1144 1146 # if unique:
1145 1147 # raise RevlogError(_("already have %s") % hex(node[:4]))
1146 1148 chain = node
1147 1149 continue
1148 1150 delta = chunk[80:]
1149 1151
1150 1152 for p in (p1, p2):
1151 1153 if not p in self.nodemap:
1152 1154 raise RevlogError(_("unknown parent %s") % short(p))
1153 1155
1154 1156 if not chain:
1155 1157 # retrieve the parent revision of the delta chain
1156 1158 chain = p1
1157 1159 if not chain in self.nodemap:
1158 1160 raise RevlogError(_("unknown base %s") % short(chain[:4]))
1159 1161
1160 1162 # full versions are inserted when the needed deltas become
1161 1163 # comparable to the uncompressed text or when the previous
1162 1164 # version is not the one we have a delta against. We use
1163 1165 # the size of the previous full rev as a proxy for the
1164 1166 # current size.
1165 1167
1166 1168 if chain == prev:
1167 1169 tempd = compress(delta)
1168 1170 cdelta = tempd[0] + tempd[1]
1169 1171 textlen = mdiff.patchedsize(textlen, delta)
1170 1172
1171 1173 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
1172 1174 # flush our writes here so we can read it in revision
1173 1175 if dfh:
1174 1176 dfh.flush()
1175 1177 ifh.flush()
1176 1178 text = self.revision(chain)
1177 1179 text = self.patches(text, [delta])
1178 1180 chk = self.addrevision(text, transaction, link, p1, p2)
1179 1181 if chk != node:
1180 1182 raise RevlogError(_("consistency error adding group"))
1181 1183 textlen = len(text)
1182 1184 else:
1183 1185 if self.version == REVLOGV0:
1184 1186 e = (end, len(cdelta), base, link, p1, p2, node)
1185 1187 else:
1186 1188 e = (self.offset_type(end, 0), len(cdelta), textlen, base,
1187 1189 link, self.rev(p1), self.rev(p2), node)
1188 1190 self.index.append(e)
1189 1191 self.nodemap[node] = r
1190 1192 if self.inlinedata():
1191 1193 ifh.write(struct.pack(self.indexformat, *e))
1192 1194 ifh.write(cdelta)
1193 1195 self.checkinlinesize(transaction, ifh)
1194 1196 if not self.inlinedata():
1195 1197 dfh = self.opener(self.datafile, "a")
1196 1198 ifh = self.opener(self.indexfile, "a")
1197 1199 else:
1198 1200 if not dfh:
1199 1201 # addrevision switched from inline to conventional
1200 1202 # reopen the index
1201 1203 dfh = self.opener(self.datafile, "a")
1202 1204 ifh = self.opener(self.indexfile, "a")
1203 1205 dfh.write(cdelta)
1204 1206 ifh.write(struct.pack(self.indexformat, *e))
1205 1207
1206 1208 t, r, chain, prev = r, r + 1, node, node
1207 1209 base = self.base(t)
1208 1210 start = self.start(base)
1209 1211 end = self.end(t)
1210 1212
1211 1213 return node
1212 1214
1213 1215 def strip(self, rev, minlink):
1214 1216 if self.count() == 0 or rev >= self.count():
1215 1217 return
1216 1218
1217 1219 if isinstance(self.index, lazyindex):
1218 1220 self.loadindexmap()
1219 1221
1220 1222 # When stripping away a revision, we need to make sure it
1221 1223 # does not actually belong to an older changeset.
1222 1224 # The minlink parameter defines the oldest revision
1223 1225 # we're allowed to strip away.
1224 1226 while minlink > self.index[rev][-4]:
1225 1227 rev += 1
1226 1228 if rev >= self.count():
1227 1229 return
1228 1230
1229 1231 # first truncate the files on disk
1230 1232 end = self.start(rev)
1231 1233 if not self.inlinedata():
1232 1234 df = self.opener(self.datafile, "a")
1233 1235 df.truncate(end)
1234 1236 end = rev * struct.calcsize(self.indexformat)
1235 1237 else:
1236 1238 end += rev * struct.calcsize(self.indexformat)
1237 1239
1238 1240 indexf = self.opener(self.indexfile, "a")
1239 1241 indexf.truncate(end)
1240 1242
1241 1243 # then reset internal state in memory to forget those revisions
1242 1244 self.cache = None
1243 1245 self.chunkcache = None
1244 1246 for x in xrange(rev, self.count()):
1245 1247 del self.nodemap[self.node(x)]
1246 1248
1247 1249 del self.index[rev:]
1248 1250
1249 1251 def checksize(self):
1250 1252 expected = 0
1251 1253 if self.count():
1252 1254 expected = self.end(self.count() - 1)
1253 1255
1254 1256 try:
1255 1257 f = self.opener(self.datafile)
1256 1258 f.seek(0, 2)
1257 1259 actual = f.tell()
1258 1260 dd = actual - expected
1259 1261 except IOError, inst:
1260 1262 if inst.errno != errno.ENOENT:
1261 1263 raise
1262 1264 dd = 0
1263 1265
1264 1266 try:
1265 1267 f = self.opener(self.indexfile)
1266 1268 f.seek(0, 2)
1267 1269 actual = f.tell()
1268 1270 s = struct.calcsize(self.indexformat)
1269 1271 i = actual / s
1270 1272 di = actual - (i * s)
1271 1273 if self.inlinedata():
1272 1274 databytes = 0
1273 1275 for r in xrange(self.count()):
1274 1276 databytes += self.length(r)
1275 1277 dd = 0
1276 1278 di = actual - self.count() * s - databytes
1277 1279 except IOError, inst:
1278 1280 if inst.errno != errno.ENOENT:
1279 1281 raise
1280 1282 di = 0
1281 1283
1282 1284 return (dd, di)
1283 1285
1284 1286
General Comments 0
You need to be logged in to leave comments. Login now