##// END OF EJS Templates
revlog.py: fix/tweak read ahead code in lazyparser
Alexis S. L. Carvalho -
r4314:43dedce9 default
parent child Browse files
Show More
@@ -1,1295 +1,1295
1 1 """
2 2 revlog.py - storage back-end for mercurial
3 3
4 4 This provides efficient delta storage with O(1) retrieve and append
5 5 and O(changes) merge between branches
6 6
7 7 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
8 8
9 9 This software may be used and distributed according to the terms
10 10 of the GNU General Public License, incorporated herein by reference.
11 11 """
12 12
13 13 from node import *
14 14 from i18n import gettext as _
15 15 from demandload import demandload
16 16 demandload(globals(), "binascii changegroup errno ancestor mdiff os")
17 17 demandload(globals(), "sha struct util zlib")
18 18
19 19 # revlog version strings
20 20 REVLOGV0 = 0
21 21 REVLOGNG = 1
22 22
23 23 # revlog flags
24 24 REVLOGNGINLINEDATA = (1 << 16)
25 25 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
26 26
27 27 REVLOG_DEFAULT_FORMAT = REVLOGNG
28 28 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
29 29
30 30 def flagstr(flag):
31 31 if flag == "inline":
32 32 return REVLOGNGINLINEDATA
33 33 raise RevlogError(_("unknown revlog flag %s") % flag)
34 34
35 35 def hash(text, p1, p2):
36 36 """generate a hash from the given text and its parent hashes
37 37
38 38 This hash combines both the current file contents and its history
39 39 in a manner that makes it easy to distinguish nodes with the same
40 40 content in the revision graph.
41 41 """
42 42 l = [p1, p2]
43 43 l.sort()
44 44 s = sha.new(l[0])
45 45 s.update(l[1])
46 46 s.update(text)
47 47 return s.digest()
48 48
49 49 def compress(text):
50 50 """ generate a possibly-compressed representation of text """
51 51 if not text: return ("", text)
52 52 if len(text) < 44:
53 53 if text[0] == '\0': return ("", text)
54 54 return ('u', text)
55 55 bin = zlib.compress(text)
56 56 if len(bin) > len(text):
57 57 if text[0] == '\0': return ("", text)
58 58 return ('u', text)
59 59 return ("", bin)
60 60
61 61 def decompress(bin):
62 62 """ decompress the given input """
63 63 if not bin: return bin
64 64 t = bin[0]
65 65 if t == '\0': return bin
66 66 if t == 'x': return zlib.decompress(bin)
67 67 if t == 'u': return bin[1:]
68 68 raise RevlogError(_("unknown compression type %r") % t)
69 69
70 70 indexformatv0 = ">4l20s20s20s"
71 71 v0shaoffset = 56
72 72 # index ng:
73 73 # 6 bytes offset
74 74 # 2 bytes flags
75 75 # 4 bytes compressed length
76 76 # 4 bytes uncompressed length
77 77 # 4 bytes: base rev
78 78 # 4 bytes link rev
79 79 # 4 bytes parent 1 rev
80 80 # 4 bytes parent 2 rev
81 81 # 32 bytes: nodeid
82 82 indexformatng = ">Qiiiiii20s12x"
83 83 ngshaoffset = 32
84 84 versionformat = ">I"
85 85
86 86 class lazyparser(object):
87 87 """
88 88 this class avoids the need to parse the entirety of large indices
89 89 """
90 90
91 91 # lazyparser is not safe to use on windows if win32 extensions not
92 92 # available. it keeps file handle open, which make it not possible
93 93 # to break hardlinks on local cloned repos.
94 94 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
95 95 hasattr(util, 'win32api'))
96 96
97 97 def __init__(self, dataf, size, indexformat, shaoffset):
98 98 self.dataf = dataf
99 99 self.format = indexformat
100 100 self.s = struct.calcsize(indexformat)
101 101 self.indexformat = indexformat
102 102 self.datasize = size
103 103 self.l = size/self.s
104 104 self.index = [None] * self.l
105 105 self.map = {nullid: nullrev}
106 106 self.allmap = 0
107 107 self.all = 0
108 108 self.mapfind_count = 0
109 109 self.shaoffset = shaoffset
110 110
111 111 def loadmap(self):
112 112 """
113 113 during a commit, we need to make sure the rev being added is
114 114 not a duplicate. This requires loading the entire index,
115 115 which is fairly slow. loadmap can load up just the node map,
116 116 which takes much less time.
117 117 """
118 118 if self.allmap: return
119 119 end = self.datasize
120 120 self.allmap = 1
121 121 cur = 0
122 122 count = 0
123 123 blocksize = self.s * 256
124 124 self.dataf.seek(0)
125 125 while cur < end:
126 126 data = self.dataf.read(blocksize)
127 127 off = 0
128 128 for x in xrange(256):
129 129 n = data[off + self.shaoffset:off + self.shaoffset + 20]
130 130 self.map[n] = count
131 131 count += 1
132 132 if count >= self.l:
133 133 break
134 134 off += self.s
135 135 cur += blocksize
136 136
137 137 def loadblock(self, blockstart, blocksize, data=None):
138 138 if self.all: return
139 139 if data is None:
140 140 self.dataf.seek(blockstart)
141 141 if blockstart + blocksize > self.datasize:
142 142 # the revlog may have grown since we've started running,
143 143 # but we don't have space in self.index for more entries.
144 144 # limit blocksize so that we don't get too much data.
145 145 blocksize = max(self.datasize - blockstart, 0)
146 146 data = self.dataf.read(blocksize)
147 147 lend = len(data) / self.s
148 148 i = blockstart / self.s
149 149 off = 0
150 150 for x in xrange(lend):
151 151 if self.index[i + x] == None:
152 152 b = data[off : off + self.s]
153 153 self.index[i + x] = b
154 154 n = b[self.shaoffset:self.shaoffset + 20]
155 155 self.map[n] = i + x
156 156 off += self.s
157 157
158 158 def findnode(self, node):
159 159 """search backwards through the index file for a specific node"""
160 160 if self.allmap: return None
161 161
162 162 # hg log will cause many many searches for the manifest
163 163 # nodes. After we get called a few times, just load the whole
164 164 # thing.
165 165 if self.mapfind_count > 8:
166 166 self.loadmap()
167 167 if node in self.map:
168 168 return node
169 169 return None
170 170 self.mapfind_count += 1
171 171 last = self.l - 1
172 172 while self.index[last] != None:
173 173 if last == 0:
174 174 self.all = 1
175 175 self.allmap = 1
176 176 return None
177 177 last -= 1
178 178 end = (last + 1) * self.s
179 179 blocksize = self.s * 256
180 180 while end >= 0:
181 181 start = max(end - blocksize, 0)
182 182 self.dataf.seek(start)
183 183 data = self.dataf.read(end - start)
184 184 findend = end - start
185 185 while True:
186 186 # we're searching backwards, so weh have to make sure
187 187 # we don't find a changeset where this node is a parent
188 188 off = data.rfind(node, 0, findend)
189 189 findend = off
190 190 if off >= 0:
191 191 i = off / self.s
192 192 off = i * self.s
193 193 n = data[off + self.shaoffset:off + self.shaoffset + 20]
194 194 if n == node:
195 195 self.map[n] = i + start / self.s
196 196 return node
197 197 else:
198 198 break
199 199 end -= blocksize
200 200 return None
201 201
202 202 def loadindex(self, i=None, end=None):
203 203 if self.all: return
204 204 all = False
205 205 if i == None:
206 206 blockstart = 0
207 207 blocksize = (512 / self.s) * self.s
208 208 end = self.datasize
209 209 all = True
210 210 else:
211 211 if end:
212 212 blockstart = i * self.s
213 213 end = end * self.s
214 214 blocksize = end - blockstart
215 215 else:
216 blockstart = (i & ~(32)) * self.s
216 blockstart = (i & ~63) * self.s
217 217 blocksize = self.s * 64
218 218 end = blockstart + blocksize
219 219 while blockstart < end:
220 220 self.loadblock(blockstart, blocksize)
221 221 blockstart += blocksize
222 222 if all: self.all = True
223 223
224 224 class lazyindex(object):
225 225 """a lazy version of the index array"""
226 226 def __init__(self, parser):
227 227 self.p = parser
228 228 def __len__(self):
229 229 return len(self.p.index)
230 230 def load(self, pos):
231 231 if pos < 0:
232 232 pos += len(self.p.index)
233 233 self.p.loadindex(pos)
234 234 return self.p.index[pos]
235 235 def __getitem__(self, pos):
236 236 ret = self.p.index[pos] or self.load(pos)
237 237 if isinstance(ret, str):
238 238 ret = struct.unpack(self.p.indexformat, ret)
239 239 return ret
240 240 def __setitem__(self, pos, item):
241 241 self.p.index[pos] = item
242 242 def __delitem__(self, pos):
243 243 del self.p.index[pos]
244 244 def append(self, e):
245 245 self.p.index.append(e)
246 246
247 247 class lazymap(object):
248 248 """a lazy version of the node map"""
249 249 def __init__(self, parser):
250 250 self.p = parser
251 251 def load(self, key):
252 252 n = self.p.findnode(key)
253 253 if n == None:
254 254 raise KeyError(key)
255 255 def __contains__(self, key):
256 256 if key in self.p.map:
257 257 return True
258 258 self.p.loadmap()
259 259 return key in self.p.map
260 260 def __iter__(self):
261 261 yield nullid
262 262 for i in xrange(self.p.l):
263 263 ret = self.p.index[i]
264 264 if not ret:
265 265 self.p.loadindex(i)
266 266 ret = self.p.index[i]
267 267 if isinstance(ret, str):
268 268 ret = struct.unpack(self.p.indexformat, ret)
269 269 yield ret[-1]
270 270 def __getitem__(self, key):
271 271 try:
272 272 return self.p.map[key]
273 273 except KeyError:
274 274 try:
275 275 self.load(key)
276 276 return self.p.map[key]
277 277 except KeyError:
278 278 raise KeyError("node " + hex(key))
279 279 def __setitem__(self, key, val):
280 280 self.p.map[key] = val
281 281 def __delitem__(self, key):
282 282 del self.p.map[key]
283 283
284 284 class RevlogError(Exception): pass
285 285
286 286 class revlog(object):
287 287 """
288 288 the underlying revision storage object
289 289
290 290 A revlog consists of two parts, an index and the revision data.
291 291
292 292 The index is a file with a fixed record size containing
293 293 information on each revision, includings its nodeid (hash), the
294 294 nodeids of its parents, the position and offset of its data within
295 295 the data file, and the revision it's based on. Finally, each entry
296 296 contains a linkrev entry that can serve as a pointer to external
297 297 data.
298 298
299 299 The revision data itself is a linear collection of data chunks.
300 300 Each chunk represents a revision and is usually represented as a
301 301 delta against the previous chunk. To bound lookup time, runs of
302 302 deltas are limited to about 2 times the length of the original
303 303 version data. This makes retrieval of a version proportional to
304 304 its size, or O(1) relative to the number of revisions.
305 305
306 306 Both pieces of the revlog are written to in an append-only
307 307 fashion, which means we never need to rewrite a file to insert or
308 308 remove data, and can use some simple techniques to avoid the need
309 309 for locking while reading.
310 310 """
311 311 def __init__(self, opener, indexfile, datafile,
312 312 defversion=REVLOG_DEFAULT_VERSION):
313 313 """
314 314 create a revlog object
315 315
316 316 opener is a function that abstracts the file opening operation
317 317 and can be used to implement COW semantics or the like.
318 318 """
319 319 self.indexfile = indexfile
320 320 self.datafile = datafile
321 321 self.opener = opener
322 322
323 323 self.indexstat = None
324 324 self.cache = None
325 325 self.chunkcache = None
326 326 self.defversion = defversion
327 327 self.load()
328 328
329 329 def load(self):
330 330 v = self.defversion
331 331 try:
332 332 f = self.opener(self.indexfile)
333 333 i = f.read(4)
334 334 f.seek(0)
335 335 except IOError, inst:
336 336 if inst.errno != errno.ENOENT:
337 337 raise
338 338 i = ""
339 339 else:
340 340 try:
341 341 st = util.fstat(f)
342 342 except AttributeError, inst:
343 343 st = None
344 344 else:
345 345 oldst = self.indexstat
346 346 if (oldst and st.st_dev == oldst.st_dev
347 347 and st.st_ino == oldst.st_ino
348 348 and st.st_mtime == oldst.st_mtime
349 349 and st.st_ctime == oldst.st_ctime):
350 350 return
351 351 self.indexstat = st
352 352 if len(i) > 0:
353 353 v = struct.unpack(versionformat, i)[0]
354 354 flags = v & ~0xFFFF
355 355 fmt = v & 0xFFFF
356 356 if fmt == REVLOGV0:
357 357 if flags:
358 358 raise RevlogError(_("index %s unknown flags %#04x for format v0")
359 359 % (self.indexfile, flags >> 16))
360 360 elif fmt == REVLOGNG:
361 361 if flags & ~REVLOGNGINLINEDATA:
362 362 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
363 363 % (self.indexfile, flags >> 16))
364 364 else:
365 365 raise RevlogError(_("index %s unknown format %d")
366 366 % (self.indexfile, fmt))
367 367 self.version = v
368 368 if v == REVLOGV0:
369 369 self.indexformat = indexformatv0
370 370 shaoffset = v0shaoffset
371 371 else:
372 372 self.indexformat = indexformatng
373 373 shaoffset = ngshaoffset
374 374
375 375 if i:
376 376 if (lazyparser.safe_to_use and not self.inlinedata() and
377 377 st and st.st_size > 10000):
378 378 # big index, let's parse it on demand
379 379 parser = lazyparser(f, st.st_size, self.indexformat, shaoffset)
380 380 self.index = lazyindex(parser)
381 381 self.nodemap = lazymap(parser)
382 382 else:
383 383 self.parseindex(f, st)
384 384 if self.version != REVLOGV0:
385 385 e = list(self.index[0])
386 386 type = self.ngtype(e[0])
387 387 e[0] = self.offset_type(0, type)
388 388 self.index[0] = e
389 389 else:
390 390 self.nodemap = {nullid: nullrev}
391 391 self.index = []
392 392
393 393
394 394 def parseindex(self, fp, st):
395 395 s = struct.calcsize(self.indexformat)
396 396 self.index = []
397 397 self.nodemap = {nullid: nullrev}
398 398 inline = self.inlinedata()
399 399 n = 0
400 400 leftover = None
401 401 while True:
402 402 if st:
403 403 data = fp.read(65536)
404 404 else:
405 405 # hack for httprangereader, it doesn't do partial reads well
406 406 data = fp.read()
407 407 if not data:
408 408 break
409 409 if n == 0 and self.inlinedata():
410 410 # cache the first chunk
411 411 self.chunkcache = (0, data)
412 412 if leftover:
413 413 data = leftover + data
414 414 leftover = None
415 415 off = 0
416 416 l = len(data)
417 417 while off < l:
418 418 if l - off < s:
419 419 leftover = data[off:]
420 420 break
421 421 cur = data[off:off + s]
422 422 off += s
423 423 e = struct.unpack(self.indexformat, cur)
424 424 self.index.append(e)
425 425 self.nodemap[e[-1]] = n
426 426 n += 1
427 427 if inline:
428 428 if e[1] < 0:
429 429 break
430 430 off += e[1]
431 431 if off > l:
432 432 # some things don't seek well, just read it
433 433 fp.read(off - l)
434 434 break
435 435 if not st:
436 436 break
437 437
438 438
439 439 def ngoffset(self, q):
440 440 if q & 0xFFFF:
441 441 raise RevlogError(_('%s: incompatible revision flag %x') %
442 442 (self.indexfile, q))
443 443 return long(q >> 16)
444 444
445 445 def ngtype(self, q):
446 446 return int(q & 0xFFFF)
447 447
448 448 def offset_type(self, offset, type):
449 449 return long(long(offset) << 16 | type)
450 450
451 451 def loadindex(self, start, end):
452 452 """load a block of indexes all at once from the lazy parser"""
453 453 if isinstance(self.index, lazyindex):
454 454 self.index.p.loadindex(start, end)
455 455
456 456 def loadindexmap(self):
457 457 """loads both the map and the index from the lazy parser"""
458 458 if isinstance(self.index, lazyindex):
459 459 p = self.index.p
460 460 p.loadindex()
461 461 self.nodemap = p.map
462 462
463 463 def loadmap(self):
464 464 """loads the map from the lazy parser"""
465 465 if isinstance(self.nodemap, lazymap):
466 466 self.nodemap.p.loadmap()
467 467 self.nodemap = self.nodemap.p.map
468 468
469 469 def inlinedata(self): return self.version & REVLOGNGINLINEDATA
470 470 def tip(self): return self.node(len(self.index) - 1)
471 471 def count(self): return len(self.index)
472 472 def node(self, rev):
473 473 return rev == nullrev and nullid or self.index[rev][-1]
474 474 def rev(self, node):
475 475 try:
476 476 return self.nodemap[node]
477 477 except KeyError:
478 478 raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))
479 479 def linkrev(self, node):
480 480 return (node == nullid) and nullrev or self.index[self.rev(node)][-4]
481 481 def parents(self, node):
482 482 if node == nullid: return (nullid, nullid)
483 483 r = self.rev(node)
484 484 d = self.index[r][-3:-1]
485 485 if self.version == REVLOGV0:
486 486 return d
487 487 return (self.node(d[0]), self.node(d[1]))
488 488 def parentrevs(self, rev):
489 489 if rev == nullrev:
490 490 return (nullrev, nullrev)
491 491 d = self.index[rev][-3:-1]
492 492 if self.version == REVLOGV0:
493 493 return (self.rev(d[0]), self.rev(d[1]))
494 494 return d
495 495 def start(self, rev):
496 496 if rev == nullrev:
497 497 return 0
498 498 if self.version != REVLOGV0:
499 499 return self.ngoffset(self.index[rev][0])
500 500 return self.index[rev][0]
501 501
502 502 def end(self, rev): return self.start(rev) + self.length(rev)
503 503
504 504 def size(self, rev):
505 505 """return the length of the uncompressed text for a given revision"""
506 506 if rev == nullrev:
507 507 return 0
508 508 l = -1
509 509 if self.version != REVLOGV0:
510 510 l = self.index[rev][2]
511 511 if l >= 0:
512 512 return l
513 513
514 514 t = self.revision(self.node(rev))
515 515 return len(t)
516 516
517 517 # alternate implementation, The advantage to this code is it
518 518 # will be faster for a single revision. But, the results are not
519 519 # cached, so finding the size of every revision will be slower.
520 520 """
521 521 if self.cache and self.cache[1] == rev:
522 522 return len(self.cache[2])
523 523
524 524 base = self.base(rev)
525 525 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
526 526 base = self.cache[1]
527 527 text = self.cache[2]
528 528 else:
529 529 text = self.revision(self.node(base))
530 530
531 531 l = len(text)
532 532 for x in xrange(base + 1, rev + 1):
533 533 l = mdiff.patchedsize(l, self.chunk(x))
534 534 return l
535 535 """
536 536
537 537 def length(self, rev):
538 538 if rev == nullrev:
539 539 return 0
540 540 else:
541 541 return self.index[rev][1]
542 542 def base(self, rev):
543 543 if (rev == nullrev):
544 544 return nullrev
545 545 else:
546 546 return self.index[rev][-5]
547 547
548 548 def reachable(self, node, stop=None):
549 549 """return a hash of all nodes ancestral to a given node, including
550 550 the node itself, stopping when stop is matched"""
551 551 reachable = {}
552 552 visit = [node]
553 553 reachable[node] = 1
554 554 if stop:
555 555 stopn = self.rev(stop)
556 556 else:
557 557 stopn = 0
558 558 while visit:
559 559 n = visit.pop(0)
560 560 if n == stop:
561 561 continue
562 562 if n == nullid:
563 563 continue
564 564 for p in self.parents(n):
565 565 if self.rev(p) < stopn:
566 566 continue
567 567 if p not in reachable:
568 568 reachable[p] = 1
569 569 visit.append(p)
570 570 return reachable
571 571
572 572 def nodesbetween(self, roots=None, heads=None):
573 573 """Return a tuple containing three elements. Elements 1 and 2 contain
574 574 a final list bases and heads after all the unreachable ones have been
575 575 pruned. Element 0 contains a topologically sorted list of all
576 576
577 577 nodes that satisfy these constraints:
578 578 1. All nodes must be descended from a node in roots (the nodes on
579 579 roots are considered descended from themselves).
580 580 2. All nodes must also be ancestors of a node in heads (the nodes in
581 581 heads are considered to be their own ancestors).
582 582
583 583 If roots is unspecified, nullid is assumed as the only root.
584 584 If heads is unspecified, it is taken to be the output of the
585 585 heads method (i.e. a list of all nodes in the repository that
586 586 have no children)."""
587 587 nonodes = ([], [], [])
588 588 if roots is not None:
589 589 roots = list(roots)
590 590 if not roots:
591 591 return nonodes
592 592 lowestrev = min([self.rev(n) for n in roots])
593 593 else:
594 594 roots = [nullid] # Everybody's a descendent of nullid
595 595 lowestrev = nullrev
596 596 if (lowestrev == nullrev) and (heads is None):
597 597 # We want _all_ the nodes!
598 598 return ([self.node(r) for r in xrange(0, self.count())],
599 599 [nullid], list(self.heads()))
600 600 if heads is None:
601 601 # All nodes are ancestors, so the latest ancestor is the last
602 602 # node.
603 603 highestrev = self.count() - 1
604 604 # Set ancestors to None to signal that every node is an ancestor.
605 605 ancestors = None
606 606 # Set heads to an empty dictionary for later discovery of heads
607 607 heads = {}
608 608 else:
609 609 heads = list(heads)
610 610 if not heads:
611 611 return nonodes
612 612 ancestors = {}
613 613 # Turn heads into a dictionary so we can remove 'fake' heads.
614 614 # Also, later we will be using it to filter out the heads we can't
615 615 # find from roots.
616 616 heads = dict.fromkeys(heads, 0)
617 617 # Start at the top and keep marking parents until we're done.
618 618 nodestotag = heads.keys()
619 619 # Remember where the top was so we can use it as a limit later.
620 620 highestrev = max([self.rev(n) for n in nodestotag])
621 621 while nodestotag:
622 622 # grab a node to tag
623 623 n = nodestotag.pop()
624 624 # Never tag nullid
625 625 if n == nullid:
626 626 continue
627 627 # A node's revision number represents its place in a
628 628 # topologically sorted list of nodes.
629 629 r = self.rev(n)
630 630 if r >= lowestrev:
631 631 if n not in ancestors:
632 632 # If we are possibly a descendent of one of the roots
633 633 # and we haven't already been marked as an ancestor
634 634 ancestors[n] = 1 # Mark as ancestor
635 635 # Add non-nullid parents to list of nodes to tag.
636 636 nodestotag.extend([p for p in self.parents(n) if
637 637 p != nullid])
638 638 elif n in heads: # We've seen it before, is it a fake head?
639 639 # So it is, real heads should not be the ancestors of
640 640 # any other heads.
641 641 heads.pop(n)
642 642 if not ancestors:
643 643 return nonodes
644 644 # Now that we have our set of ancestors, we want to remove any
645 645 # roots that are not ancestors.
646 646
647 647 # If one of the roots was nullid, everything is included anyway.
648 648 if lowestrev > nullrev:
649 649 # But, since we weren't, let's recompute the lowest rev to not
650 650 # include roots that aren't ancestors.
651 651
652 652 # Filter out roots that aren't ancestors of heads
653 653 roots = [n for n in roots if n in ancestors]
654 654 # Recompute the lowest revision
655 655 if roots:
656 656 lowestrev = min([self.rev(n) for n in roots])
657 657 else:
658 658 # No more roots? Return empty list
659 659 return nonodes
660 660 else:
661 661 # We are descending from nullid, and don't need to care about
662 662 # any other roots.
663 663 lowestrev = nullrev
664 664 roots = [nullid]
665 665 # Transform our roots list into a 'set' (i.e. a dictionary where the
666 666 # values don't matter.
667 667 descendents = dict.fromkeys(roots, 1)
668 668 # Also, keep the original roots so we can filter out roots that aren't
669 669 # 'real' roots (i.e. are descended from other roots).
670 670 roots = descendents.copy()
671 671 # Our topologically sorted list of output nodes.
672 672 orderedout = []
673 673 # Don't start at nullid since we don't want nullid in our output list,
674 674 # and if nullid shows up in descedents, empty parents will look like
675 675 # they're descendents.
676 676 for r in xrange(max(lowestrev, 0), highestrev + 1):
677 677 n = self.node(r)
678 678 isdescendent = False
679 679 if lowestrev == nullrev: # Everybody is a descendent of nullid
680 680 isdescendent = True
681 681 elif n in descendents:
682 682 # n is already a descendent
683 683 isdescendent = True
684 684 # This check only needs to be done here because all the roots
685 685 # will start being marked is descendents before the loop.
686 686 if n in roots:
687 687 # If n was a root, check if it's a 'real' root.
688 688 p = tuple(self.parents(n))
689 689 # If any of its parents are descendents, it's not a root.
690 690 if (p[0] in descendents) or (p[1] in descendents):
691 691 roots.pop(n)
692 692 else:
693 693 p = tuple(self.parents(n))
694 694 # A node is a descendent if either of its parents are
695 695 # descendents. (We seeded the dependents list with the roots
696 696 # up there, remember?)
697 697 if (p[0] in descendents) or (p[1] in descendents):
698 698 descendents[n] = 1
699 699 isdescendent = True
700 700 if isdescendent and ((ancestors is None) or (n in ancestors)):
701 701 # Only include nodes that are both descendents and ancestors.
702 702 orderedout.append(n)
703 703 if (ancestors is not None) and (n in heads):
704 704 # We're trying to figure out which heads are reachable
705 705 # from roots.
706 706 # Mark this head as having been reached
707 707 heads[n] = 1
708 708 elif ancestors is None:
709 709 # Otherwise, we're trying to discover the heads.
710 710 # Assume this is a head because if it isn't, the next step
711 711 # will eventually remove it.
712 712 heads[n] = 1
713 713 # But, obviously its parents aren't.
714 714 for p in self.parents(n):
715 715 heads.pop(p, None)
716 716 heads = [n for n in heads.iterkeys() if heads[n] != 0]
717 717 roots = roots.keys()
718 718 assert orderedout
719 719 assert roots
720 720 assert heads
721 721 return (orderedout, roots, heads)
722 722
723 723 def heads(self, start=None, stop=None):
724 724 """return the list of all nodes that have no children
725 725
726 726 if start is specified, only heads that are descendants of
727 727 start will be returned
728 728 if stop is specified, it will consider all the revs from stop
729 729 as if they had no children
730 730 """
731 731 if start is None:
732 732 start = nullid
733 733 if stop is None:
734 734 stop = []
735 735 stoprevs = dict.fromkeys([self.rev(n) for n in stop])
736 736 startrev = self.rev(start)
737 737 reachable = {startrev: 1}
738 738 heads = {startrev: 1}
739 739
740 740 parentrevs = self.parentrevs
741 741 for r in xrange(startrev + 1, self.count()):
742 742 for p in parentrevs(r):
743 743 if p in reachable:
744 744 if r not in stoprevs:
745 745 reachable[r] = 1
746 746 heads[r] = 1
747 747 if p in heads and p not in stoprevs:
748 748 del heads[p]
749 749
750 750 return [self.node(r) for r in heads]
751 751
752 752 def children(self, node):
753 753 """find the children of a given node"""
754 754 c = []
755 755 p = self.rev(node)
756 756 for r in range(p + 1, self.count()):
757 757 for pr in self.parentrevs(r):
758 758 if pr == p:
759 759 c.append(self.node(r))
760 760 return c
761 761
762 762 def _match(self, id):
763 763 if isinstance(id, (long, int)):
764 764 # rev
765 765 return self.node(id)
766 766 if len(id) == 20:
767 767 # possibly a binary node
768 768 # odds of a binary node being all hex in ASCII are 1 in 10**25
769 769 try:
770 770 node = id
771 771 r = self.rev(node) # quick search the index
772 772 return node
773 773 except RevlogError:
774 774 pass # may be partial hex id
775 775 try:
776 776 # str(rev)
777 777 rev = int(id)
778 778 if str(rev) != id: raise ValueError
779 779 if rev < 0: rev = self.count() + rev
780 780 if rev < 0 or rev >= self.count(): raise ValueError
781 781 return self.node(rev)
782 782 except (ValueError, OverflowError):
783 783 pass
784 784 if len(id) == 40:
785 785 try:
786 786 # a full hex nodeid?
787 787 node = bin(id)
788 788 r = self.rev(node)
789 789 return node
790 790 except TypeError:
791 791 pass
792 792
793 793 def _partialmatch(self, id):
794 794 if len(id) < 40:
795 795 try:
796 796 # hex(node)[:...]
797 797 bin_id = bin(id[:len(id) & ~1]) # grab an even number of digits
798 798 node = None
799 799 for n in self.nodemap:
800 800 if n.startswith(bin_id) and hex(n).startswith(id):
801 801 if node is not None:
802 802 raise RevlogError(_("Ambiguous identifier"))
803 803 node = n
804 804 if node is not None:
805 805 return node
806 806 except TypeError:
807 807 pass
808 808
809 809 def lookup(self, id):
810 810 """locate a node based on:
811 811 - revision number or str(revision number)
812 812 - nodeid or subset of hex nodeid
813 813 """
814 814
815 815 n = self._match(id)
816 816 if n is not None:
817 817 return n
818 818 n = self._partialmatch(id)
819 819 if n:
820 820 return n
821 821
822 822 raise RevlogError(_("No match found"))
823 823
824 824 def cmp(self, node, text):
825 825 """compare text with a given file revision"""
826 826 p1, p2 = self.parents(node)
827 827 return hash(text, p1, p2) != node
828 828
829 829 def makenode(self, node, text):
830 830 """calculate a file nodeid for text, descended or possibly
831 831 unchanged from node"""
832 832
833 833 if self.cmp(node, text):
834 834 return hash(text, node, nullid)
835 835 return node
836 836
837 837 def diff(self, a, b):
838 838 """return a delta between two revisions"""
839 839 return mdiff.textdiff(a, b)
840 840
841 841 def patches(self, t, pl):
842 842 """apply a list of patches to a string"""
843 843 return mdiff.patches(t, pl)
844 844
845 845 def chunk(self, rev, df=None, cachelen=4096):
846 846 start, length = self.start(rev), self.length(rev)
847 847 inline = self.inlinedata()
848 848 if inline:
849 849 start += (rev + 1) * struct.calcsize(self.indexformat)
850 850 end = start + length
851 851 def loadcache(df):
852 852 cache_length = max(cachelen, length) # 4k
853 853 if not df:
854 854 if inline:
855 855 df = self.opener(self.indexfile)
856 856 else:
857 857 df = self.opener(self.datafile)
858 858 df.seek(start)
859 859 self.chunkcache = (start, df.read(cache_length))
860 860
861 861 if not self.chunkcache:
862 862 loadcache(df)
863 863
864 864 cache_start = self.chunkcache[0]
865 865 cache_end = cache_start + len(self.chunkcache[1])
866 866 if start >= cache_start and end <= cache_end:
867 867 # it is cached
868 868 offset = start - cache_start
869 869 else:
870 870 loadcache(df)
871 871 offset = 0
872 872
873 873 #def checkchunk():
874 874 # df = self.opener(self.datafile)
875 875 # df.seek(start)
876 876 # return df.read(length)
877 877 #assert s == checkchunk()
878 878 return decompress(self.chunkcache[1][offset:offset + length])
879 879
880 880 def delta(self, node):
881 881 """return or calculate a delta between a node and its predecessor"""
882 882 r = self.rev(node)
883 883 return self.revdiff(r - 1, r)
884 884
885 885 def revdiff(self, rev1, rev2):
886 886 """return or calculate a delta between two revisions"""
887 887 b1 = self.base(rev1)
888 888 b2 = self.base(rev2)
889 889 if b1 == b2 and rev1 + 1 == rev2:
890 890 return self.chunk(rev2)
891 891 else:
892 892 return self.diff(self.revision(self.node(rev1)),
893 893 self.revision(self.node(rev2)))
894 894
895 895 def revision(self, node):
896 896 """return an uncompressed revision of a given"""
897 897 if node == nullid: return ""
898 898 if self.cache and self.cache[0] == node: return self.cache[2]
899 899
900 900 # look up what we need to read
901 901 text = None
902 902 rev = self.rev(node)
903 903 base = self.base(rev)
904 904
905 905 if self.inlinedata():
906 906 # we probably have the whole chunk cached
907 907 df = None
908 908 else:
909 909 df = self.opener(self.datafile)
910 910
911 911 # do we have useful data cached?
912 912 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
913 913 base = self.cache[1]
914 914 text = self.cache[2]
915 915 self.loadindex(base, rev + 1)
916 916 else:
917 917 self.loadindex(base, rev + 1)
918 918 text = self.chunk(base, df=df)
919 919
920 920 bins = []
921 921 for r in xrange(base + 1, rev + 1):
922 922 bins.append(self.chunk(r, df=df))
923 923
924 924 text = self.patches(text, bins)
925 925
926 926 p1, p2 = self.parents(node)
927 927 if node != hash(text, p1, p2):
928 928 raise RevlogError(_("integrity check failed on %s:%d")
929 929 % (self.datafile, rev))
930 930
931 931 self.cache = (node, rev, text)
932 932 return text
933 933
934 934 def checkinlinesize(self, tr, fp=None):
935 935 if not self.inlinedata():
936 936 return
937 937 if not fp:
938 938 fp = self.opener(self.indexfile, 'r')
939 939 fp.seek(0, 2)
940 940 size = fp.tell()
941 941 if size < 131072:
942 942 return
943 943 trinfo = tr.find(self.indexfile)
944 944 if trinfo == None:
945 945 raise RevlogError(_("%s not found in the transaction")
946 946 % self.indexfile)
947 947
948 948 trindex = trinfo[2]
949 949 dataoff = self.start(trindex)
950 950
951 951 tr.add(self.datafile, dataoff)
952 952 df = self.opener(self.datafile, 'w')
953 953 calc = struct.calcsize(self.indexformat)
954 954 for r in xrange(self.count()):
955 955 start = self.start(r) + (r + 1) * calc
956 956 length = self.length(r)
957 957 fp.seek(start)
958 958 d = fp.read(length)
959 959 df.write(d)
960 960 fp.close()
961 961 df.close()
962 962 fp = self.opener(self.indexfile, 'w', atomictemp=True)
963 963 self.version &= ~(REVLOGNGINLINEDATA)
964 964 if self.count():
965 965 x = self.index[0]
966 966 e = struct.pack(self.indexformat, *x)[4:]
967 967 l = struct.pack(versionformat, self.version)
968 968 fp.write(l)
969 969 fp.write(e)
970 970
971 971 for i in xrange(1, self.count()):
972 972 x = self.index[i]
973 973 e = struct.pack(self.indexformat, *x)
974 974 fp.write(e)
975 975
976 976 # if we don't call rename, the temp file will never replace the
977 977 # real index
978 978 fp.rename()
979 979
980 980 tr.replace(self.indexfile, trindex * calc)
981 981 self.chunkcache = None
982 982
983 983 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
984 984 """add a revision to the log
985 985
986 986 text - the revision data to add
987 987 transaction - the transaction object used for rollback
988 988 link - the linkrev data to add
989 989 p1, p2 - the parent nodeids of the revision
990 990 d - an optional precomputed delta
991 991 """
992 992 if not self.inlinedata():
993 993 dfh = self.opener(self.datafile, "a")
994 994 else:
995 995 dfh = None
996 996 ifh = self.opener(self.indexfile, "a+")
997 997 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
998 998
999 999 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1000 1000 if text is None: text = ""
1001 1001 if p1 is None: p1 = self.tip()
1002 1002 if p2 is None: p2 = nullid
1003 1003
1004 1004 node = hash(text, p1, p2)
1005 1005
1006 1006 if node in self.nodemap:
1007 1007 return node
1008 1008
1009 1009 n = self.count()
1010 1010 t = n - 1
1011 1011
1012 1012 if n:
1013 1013 base = self.base(t)
1014 1014 start = self.start(base)
1015 1015 end = self.end(t)
1016 1016 if not d:
1017 1017 prev = self.revision(self.tip())
1018 1018 d = self.diff(prev, text)
1019 1019 data = compress(d)
1020 1020 l = len(data[1]) + len(data[0])
1021 1021 dist = end - start + l
1022 1022
1023 1023 # full versions are inserted when the needed deltas
1024 1024 # become comparable to the uncompressed text
1025 1025 if not n or dist > len(text) * 2:
1026 1026 data = compress(text)
1027 1027 l = len(data[1]) + len(data[0])
1028 1028 base = n
1029 1029 else:
1030 1030 base = self.base(t)
1031 1031
1032 1032 offset = 0
1033 1033 if t >= 0:
1034 1034 offset = self.end(t)
1035 1035
1036 1036 if self.version == REVLOGV0:
1037 1037 e = (offset, l, base, link, p1, p2, node)
1038 1038 else:
1039 1039 e = (self.offset_type(offset, 0), l, len(text),
1040 1040 base, link, self.rev(p1), self.rev(p2), node)
1041 1041
1042 1042 self.index.append(e)
1043 1043 self.nodemap[node] = n
1044 1044 entry = struct.pack(self.indexformat, *e)
1045 1045
1046 1046 if not self.inlinedata():
1047 1047 transaction.add(self.datafile, offset)
1048 1048 transaction.add(self.indexfile, n * len(entry))
1049 1049 if data[0]:
1050 1050 dfh.write(data[0])
1051 1051 dfh.write(data[1])
1052 1052 dfh.flush()
1053 1053 else:
1054 1054 ifh.seek(0, 2)
1055 1055 transaction.add(self.indexfile, ifh.tell(), self.count() - 1)
1056 1056
1057 1057 if len(self.index) == 1 and self.version != REVLOGV0:
1058 1058 l = struct.pack(versionformat, self.version)
1059 1059 ifh.write(l)
1060 1060 entry = entry[4:]
1061 1061
1062 1062 ifh.write(entry)
1063 1063
1064 1064 if self.inlinedata():
1065 1065 ifh.write(data[0])
1066 1066 ifh.write(data[1])
1067 1067 self.checkinlinesize(transaction, ifh)
1068 1068
1069 1069 self.cache = (node, n, text)
1070 1070 return node
1071 1071
1072 1072 def ancestor(self, a, b):
1073 1073 """calculate the least common ancestor of nodes a and b"""
1074 1074
1075 1075 def parents(rev):
1076 1076 return [p for p in self.parentrevs(rev) if p != nullrev]
1077 1077
1078 1078 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1079 1079 if c is None:
1080 1080 return nullid
1081 1081
1082 1082 return self.node(c)
1083 1083
1084 1084 def group(self, nodelist, lookup, infocollect=None):
1085 1085 """calculate a delta group
1086 1086
1087 1087 Given a list of changeset revs, return a set of deltas and
1088 1088 metadata corresponding to nodes. the first delta is
1089 1089 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1090 1090 have this parent as it has all history before these
1091 1091 changesets. parent is parent[0]
1092 1092 """
1093 1093 revs = [self.rev(n) for n in nodelist]
1094 1094
1095 1095 # if we don't have any revisions touched by these changesets, bail
1096 1096 if not revs:
1097 1097 yield changegroup.closechunk()
1098 1098 return
1099 1099
1100 1100 # add the parent of the first rev
1101 1101 p = self.parents(self.node(revs[0]))[0]
1102 1102 revs.insert(0, self.rev(p))
1103 1103
1104 1104 # build deltas
1105 1105 for d in xrange(0, len(revs) - 1):
1106 1106 a, b = revs[d], revs[d + 1]
1107 1107 nb = self.node(b)
1108 1108
1109 1109 if infocollect is not None:
1110 1110 infocollect(nb)
1111 1111
1112 1112 d = self.revdiff(a, b)
1113 1113 p = self.parents(nb)
1114 1114 meta = nb + p[0] + p[1] + lookup(nb)
1115 1115 yield changegroup.genchunk("%s%s" % (meta, d))
1116 1116
1117 1117 yield changegroup.closechunk()
1118 1118
1119 1119 def addgroup(self, revs, linkmapper, transaction, unique=0):
1120 1120 """
1121 1121 add a delta group
1122 1122
1123 1123 given a set of deltas, add them to the revision log. the
1124 1124 first delta is against its parent, which should be in our
1125 1125 log, the rest are against the previous delta.
1126 1126 """
1127 1127
1128 1128 #track the base of the current delta log
1129 1129 r = self.count()
1130 1130 t = r - 1
1131 1131 node = None
1132 1132
1133 1133 base = prev = nullrev
1134 1134 start = end = textlen = 0
1135 1135 if r:
1136 1136 end = self.end(t)
1137 1137
1138 1138 ifh = self.opener(self.indexfile, "a+")
1139 1139 ifh.seek(0, 2)
1140 1140 transaction.add(self.indexfile, ifh.tell(), self.count())
1141 1141 if self.inlinedata():
1142 1142 dfh = None
1143 1143 else:
1144 1144 transaction.add(self.datafile, end)
1145 1145 dfh = self.opener(self.datafile, "a")
1146 1146
1147 1147 # loop through our set of deltas
1148 1148 chain = None
1149 1149 for chunk in revs:
1150 1150 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1151 1151 link = linkmapper(cs)
1152 1152 if node in self.nodemap:
1153 1153 # this can happen if two branches make the same change
1154 1154 # if unique:
1155 1155 # raise RevlogError(_("already have %s") % hex(node[:4]))
1156 1156 chain = node
1157 1157 continue
1158 1158 delta = chunk[80:]
1159 1159
1160 1160 for p in (p1, p2):
1161 1161 if not p in self.nodemap:
1162 1162 raise RevlogError(_("unknown parent %s") % short(p))
1163 1163
1164 1164 if not chain:
1165 1165 # retrieve the parent revision of the delta chain
1166 1166 chain = p1
1167 1167 if not chain in self.nodemap:
1168 1168 raise RevlogError(_("unknown base %s") % short(chain[:4]))
1169 1169
1170 1170 # full versions are inserted when the needed deltas become
1171 1171 # comparable to the uncompressed text or when the previous
1172 1172 # version is not the one we have a delta against. We use
1173 1173 # the size of the previous full rev as a proxy for the
1174 1174 # current size.
1175 1175
1176 1176 if chain == prev:
1177 1177 tempd = compress(delta)
1178 1178 cdelta = tempd[0] + tempd[1]
1179 1179 textlen = mdiff.patchedsize(textlen, delta)
1180 1180
1181 1181 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
1182 1182 # flush our writes here so we can read it in revision
1183 1183 if dfh:
1184 1184 dfh.flush()
1185 1185 ifh.flush()
1186 1186 text = self.revision(chain)
1187 1187 text = self.patches(text, [delta])
1188 1188 chk = self._addrevision(text, transaction, link, p1, p2, None,
1189 1189 ifh, dfh)
1190 1190 if not dfh and not self.inlinedata():
1191 1191 # addrevision switched from inline to conventional
1192 1192 # reopen the index
1193 1193 dfh = self.opener(self.datafile, "a")
1194 1194 ifh = self.opener(self.indexfile, "a")
1195 1195 if chk != node:
1196 1196 raise RevlogError(_("consistency error adding group"))
1197 1197 textlen = len(text)
1198 1198 else:
1199 1199 if self.version == REVLOGV0:
1200 1200 e = (end, len(cdelta), base, link, p1, p2, node)
1201 1201 else:
1202 1202 e = (self.offset_type(end, 0), len(cdelta), textlen, base,
1203 1203 link, self.rev(p1), self.rev(p2), node)
1204 1204 self.index.append(e)
1205 1205 self.nodemap[node] = r
1206 1206 if self.inlinedata():
1207 1207 ifh.write(struct.pack(self.indexformat, *e))
1208 1208 ifh.write(cdelta)
1209 1209 self.checkinlinesize(transaction, ifh)
1210 1210 if not self.inlinedata():
1211 1211 dfh = self.opener(self.datafile, "a")
1212 1212 ifh = self.opener(self.indexfile, "a")
1213 1213 else:
1214 1214 dfh.write(cdelta)
1215 1215 ifh.write(struct.pack(self.indexformat, *e))
1216 1216
1217 1217 t, r, chain, prev = r, r + 1, node, node
1218 1218 base = self.base(t)
1219 1219 start = self.start(base)
1220 1220 end = self.end(t)
1221 1221
1222 1222 return node
1223 1223
1224 1224 def strip(self, rev, minlink):
1225 1225 if self.count() == 0 or rev >= self.count():
1226 1226 return
1227 1227
1228 1228 if isinstance(self.index, lazyindex):
1229 1229 self.loadindexmap()
1230 1230
1231 1231 # When stripping away a revision, we need to make sure it
1232 1232 # does not actually belong to an older changeset.
1233 1233 # The minlink parameter defines the oldest revision
1234 1234 # we're allowed to strip away.
1235 1235 while minlink > self.index[rev][-4]:
1236 1236 rev += 1
1237 1237 if rev >= self.count():
1238 1238 return
1239 1239
1240 1240 # first truncate the files on disk
1241 1241 end = self.start(rev)
1242 1242 if not self.inlinedata():
1243 1243 df = self.opener(self.datafile, "a")
1244 1244 df.truncate(end)
1245 1245 end = rev * struct.calcsize(self.indexformat)
1246 1246 else:
1247 1247 end += rev * struct.calcsize(self.indexformat)
1248 1248
1249 1249 indexf = self.opener(self.indexfile, "a")
1250 1250 indexf.truncate(end)
1251 1251
1252 1252 # then reset internal state in memory to forget those revisions
1253 1253 self.cache = None
1254 1254 self.chunkcache = None
1255 1255 for x in xrange(rev, self.count()):
1256 1256 del self.nodemap[self.node(x)]
1257 1257
1258 1258 del self.index[rev:]
1259 1259
1260 1260 def checksize(self):
1261 1261 expected = 0
1262 1262 if self.count():
1263 1263 expected = self.end(self.count() - 1)
1264 1264
1265 1265 try:
1266 1266 f = self.opener(self.datafile)
1267 1267 f.seek(0, 2)
1268 1268 actual = f.tell()
1269 1269 dd = actual - expected
1270 1270 except IOError, inst:
1271 1271 if inst.errno != errno.ENOENT:
1272 1272 raise
1273 1273 dd = 0
1274 1274
1275 1275 try:
1276 1276 f = self.opener(self.indexfile)
1277 1277 f.seek(0, 2)
1278 1278 actual = f.tell()
1279 1279 s = struct.calcsize(self.indexformat)
1280 1280 i = actual / s
1281 1281 di = actual - (i * s)
1282 1282 if self.inlinedata():
1283 1283 databytes = 0
1284 1284 for r in xrange(self.count()):
1285 1285 databytes += self.length(r)
1286 1286 dd = 0
1287 1287 di = actual - self.count() * s - databytes
1288 1288 except IOError, inst:
1289 1289 if inst.errno != errno.ENOENT:
1290 1290 raise
1291 1291 di = 0
1292 1292
1293 1293 return (dd, di)
1294 1294
1295 1295
General Comments 0
You need to be logged in to leave comments. Login now