##// END OF EJS Templates
lazyindex: handle __delitem__ in loadblock
Brendan Cully -
r4061:40030c1b default
parent child Browse files
Show More
@@ -1,1292 +1,1295 b''
1 1 """
2 2 revlog.py - storage back-end for mercurial
3 3
4 4 This provides efficient delta storage with O(1) retrieve and append
5 5 and O(changes) merge between branches
6 6
7 7 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
8 8
9 9 This software may be used and distributed according to the terms
10 10 of the GNU General Public License, incorporated herein by reference.
11 11 """
12 12
13 13 from node import *
14 14 from i18n import _
15 15 import binascii, changegroup, errno, ancestor, mdiff, os
16 16 import sha, struct, util, zlib
17 17
18 18 # revlog version strings
19 19 REVLOGV0 = 0
20 20 REVLOGNG = 1
21 21
22 22 # revlog flags
23 23 REVLOGNGINLINEDATA = (1 << 16)
24 24 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
25 25
26 26 REVLOG_DEFAULT_FORMAT = REVLOGNG
27 27 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
28 28
29 29 def flagstr(flag):
30 30 if flag == "inline":
31 31 return REVLOGNGINLINEDATA
32 32 raise RevlogError(_("unknown revlog flag %s") % flag)
33 33
34 34 def hash(text, p1, p2):
35 35 """generate a hash from the given text and its parent hashes
36 36
37 37 This hash combines both the current file contents and its history
38 38 in a manner that makes it easy to distinguish nodes with the same
39 39 content in the revision graph.
40 40 """
41 41 l = [p1, p2]
42 42 l.sort()
43 43 s = sha.new(l[0])
44 44 s.update(l[1])
45 45 s.update(text)
46 46 return s.digest()
47 47
48 48 def compress(text):
49 49 """ generate a possibly-compressed representation of text """
50 50 if not text: return ("", text)
51 51 if len(text) < 44:
52 52 if text[0] == '\0': return ("", text)
53 53 return ('u', text)
54 54 bin = zlib.compress(text)
55 55 if len(bin) > len(text):
56 56 if text[0] == '\0': return ("", text)
57 57 return ('u', text)
58 58 return ("", bin)
59 59
60 60 def decompress(bin):
61 61 """ decompress the given input """
62 62 if not bin: return bin
63 63 t = bin[0]
64 64 if t == '\0': return bin
65 65 if t == 'x': return zlib.decompress(bin)
66 66 if t == 'u': return bin[1:]
67 67 raise RevlogError(_("unknown compression type %r") % t)
68 68
69 69 indexformatv0 = ">4l20s20s20s"
70 70 v0shaoffset = 56
71 71 # index ng:
72 72 # 6 bytes offset
73 73 # 2 bytes flags
74 74 # 4 bytes compressed length
75 75 # 4 bytes uncompressed length
76 76 # 4 bytes: base rev
77 77 # 4 bytes link rev
78 78 # 4 bytes parent 1 rev
79 79 # 4 bytes parent 2 rev
80 80 # 32 bytes: nodeid
81 81 indexformatng = ">Qiiiiii20s12x"
82 82 ngshaoffset = 32
83 83 versionformat = ">I"
84 84
85 85 class lazyparser(object):
86 86 """
87 87 this class avoids the need to parse the entirety of large indices
88 88 """
89 89
90 90 # lazyparser is not safe to use on windows if win32 extensions not
91 91 # available. it keeps file handle open, which make it not possible
92 92 # to break hardlinks on local cloned repos.
93 93 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
94 94 hasattr(util, 'win32api'))
95 95
96 96 def __init__(self, dataf, size, indexformat, shaoffset):
97 97 self.dataf = dataf
98 98 self.format = indexformat
99 99 self.s = struct.calcsize(indexformat)
100 100 self.indexformat = indexformat
101 101 self.datasize = size
102 102 self.l = size/self.s
103 103 self.index = [None] * self.l
104 104 self.map = {nullid: nullrev}
105 105 self.allmap = 0
106 106 self.all = 0
107 107 self.mapfind_count = 0
108 108 self.shaoffset = shaoffset
109 109
110 110 def loadmap(self):
111 111 """
112 112 during a commit, we need to make sure the rev being added is
113 113 not a duplicate. This requires loading the entire index,
114 114 which is fairly slow. loadmap can load up just the node map,
115 115 which takes much less time.
116 116 """
117 117 if self.allmap: return
118 118 end = self.datasize
119 119 self.allmap = 1
120 120 cur = 0
121 121 count = 0
122 122 blocksize = self.s * 256
123 123 self.dataf.seek(0)
124 124 while cur < end:
125 125 data = self.dataf.read(blocksize)
126 126 off = 0
127 127 for x in xrange(256):
128 128 n = data[off + self.shaoffset:off + self.shaoffset + 20]
129 129 self.map[n] = count
130 130 count += 1
131 131 if count >= self.l:
132 132 break
133 133 off += self.s
134 134 cur += blocksize
135 135
136 136 def loadblock(self, blockstart, blocksize, data=None):
137 137 if self.all: return
138 138 if data is None:
139 139 self.dataf.seek(blockstart)
140 140 if blockstart + blocksize > self.datasize:
141 141 # the revlog may have grown since we've started running,
142 142 # but we don't have space in self.index for more entries.
143 143 # limit blocksize so that we don't get too much data.
144 144 blocksize = max(self.datasize - blockstart, 0)
145 145 data = self.dataf.read(blocksize)
146 146 lend = len(data) / self.s
147 147 i = blockstart / self.s
148 148 off = 0
149 # lazyindex supports __delitem__
150 if lend > len(self.index) - i:
151 lend = len(self.index) - i
149 152 for x in xrange(lend):
150 153 if self.index[i + x] == None:
151 154 b = data[off : off + self.s]
152 155 self.index[i + x] = b
153 156 n = b[self.shaoffset:self.shaoffset + 20]
154 157 self.map[n] = i + x
155 158 off += self.s
156 159
157 160 def findnode(self, node):
158 161 """search backwards through the index file for a specific node"""
159 162 if self.allmap: return None
160 163
161 164 # hg log will cause many many searches for the manifest
162 165 # nodes. After we get called a few times, just load the whole
163 166 # thing.
164 167 if self.mapfind_count > 8:
165 168 self.loadmap()
166 169 if node in self.map:
167 170 return node
168 171 return None
169 172 self.mapfind_count += 1
170 173 last = self.l - 1
171 174 while self.index[last] != None:
172 175 if last == 0:
173 176 self.all = 1
174 177 self.allmap = 1
175 178 return None
176 179 last -= 1
177 180 end = (last + 1) * self.s
178 181 blocksize = self.s * 256
179 182 while end >= 0:
180 183 start = max(end - blocksize, 0)
181 184 self.dataf.seek(start)
182 185 data = self.dataf.read(end - start)
183 186 findend = end - start
184 187 while True:
185 188 # we're searching backwards, so weh have to make sure
186 189 # we don't find a changeset where this node is a parent
187 190 off = data.rfind(node, 0, findend)
188 191 findend = off
189 192 if off >= 0:
190 193 i = off / self.s
191 194 off = i * self.s
192 195 n = data[off + self.shaoffset:off + self.shaoffset + 20]
193 196 if n == node:
194 197 self.map[n] = i + start / self.s
195 198 return node
196 199 else:
197 200 break
198 201 end -= blocksize
199 202 return None
200 203
201 204 def loadindex(self, i=None, end=None):
202 205 if self.all: return
203 206 all = False
204 207 if i == None:
205 208 blockstart = 0
206 209 blocksize = (512 / self.s) * self.s
207 210 end = self.datasize
208 211 all = True
209 212 else:
210 213 if end:
211 214 blockstart = i * self.s
212 215 end = end * self.s
213 216 blocksize = end - blockstart
214 217 else:
215 218 blockstart = (i & ~(32)) * self.s
216 219 blocksize = self.s * 64
217 220 end = blockstart + blocksize
218 221 while blockstart < end:
219 222 self.loadblock(blockstart, blocksize)
220 223 blockstart += blocksize
221 224 if all: self.all = True
222 225
223 226 class lazyindex(object):
224 227 """a lazy version of the index array"""
225 228 def __init__(self, parser):
226 229 self.p = parser
227 230 def __len__(self):
228 231 return len(self.p.index)
229 232 def load(self, pos):
230 233 if pos < 0:
231 234 pos += len(self.p.index)
232 235 self.p.loadindex(pos)
233 236 return self.p.index[pos]
234 237 def __getitem__(self, pos):
235 238 ret = self.p.index[pos] or self.load(pos)
236 239 if isinstance(ret, str):
237 240 ret = struct.unpack(self.p.indexformat, ret)
238 241 return ret
239 242 def __setitem__(self, pos, item):
240 243 self.p.index[pos] = item
241 244 def __delitem__(self, pos):
242 245 del self.p.index[pos]
243 246 def append(self, e):
244 247 self.p.index.append(e)
245 248
246 249 class lazymap(object):
247 250 """a lazy version of the node map"""
248 251 def __init__(self, parser):
249 252 self.p = parser
250 253 def load(self, key):
251 254 n = self.p.findnode(key)
252 255 if n == None:
253 256 raise KeyError(key)
254 257 def __contains__(self, key):
255 258 if key in self.p.map:
256 259 return True
257 260 self.p.loadmap()
258 261 return key in self.p.map
259 262 def __iter__(self):
260 263 yield nullid
261 264 for i in xrange(self.p.l):
262 265 ret = self.p.index[i]
263 266 if not ret:
264 267 self.p.loadindex(i)
265 268 ret = self.p.index[i]
266 269 if isinstance(ret, str):
267 270 ret = struct.unpack(self.p.indexformat, ret)
268 271 yield ret[-1]
269 272 def __getitem__(self, key):
270 273 try:
271 274 return self.p.map[key]
272 275 except KeyError:
273 276 try:
274 277 self.load(key)
275 278 return self.p.map[key]
276 279 except KeyError:
277 280 raise KeyError("node " + hex(key))
278 281 def __setitem__(self, key, val):
279 282 self.p.map[key] = val
280 283 def __delitem__(self, key):
281 284 del self.p.map[key]
282 285
283 286 class RevlogError(Exception): pass
284 287 class LookupError(RevlogError): pass
285 288
286 289 class revlog(object):
287 290 """
288 291 the underlying revision storage object
289 292
290 293 A revlog consists of two parts, an index and the revision data.
291 294
292 295 The index is a file with a fixed record size containing
293 296 information on each revision, includings its nodeid (hash), the
294 297 nodeids of its parents, the position and offset of its data within
295 298 the data file, and the revision it's based on. Finally, each entry
296 299 contains a linkrev entry that can serve as a pointer to external
297 300 data.
298 301
299 302 The revision data itself is a linear collection of data chunks.
300 303 Each chunk represents a revision and is usually represented as a
301 304 delta against the previous chunk. To bound lookup time, runs of
302 305 deltas are limited to about 2 times the length of the original
303 306 version data. This makes retrieval of a version proportional to
304 307 its size, or O(1) relative to the number of revisions.
305 308
306 309 Both pieces of the revlog are written to in an append-only
307 310 fashion, which means we never need to rewrite a file to insert or
308 311 remove data, and can use some simple techniques to avoid the need
309 312 for locking while reading.
310 313 """
311 314 def __init__(self, opener, indexfile, datafile,
312 315 defversion=REVLOG_DEFAULT_VERSION):
313 316 """
314 317 create a revlog object
315 318
316 319 opener is a function that abstracts the file opening operation
317 320 and can be used to implement COW semantics or the like.
318 321 """
319 322 self.indexfile = indexfile
320 323 self.datafile = datafile
321 324 self.opener = opener
322 325
323 326 self.indexstat = None
324 327 self.cache = None
325 328 self.chunkcache = None
326 329 self.defversion = defversion
327 330 self.load()
328 331
329 332 def load(self):
330 333 v = self.defversion
331 334 try:
332 335 f = self.opener(self.indexfile)
333 336 i = f.read(4)
334 337 f.seek(0)
335 338 except IOError, inst:
336 339 if inst.errno != errno.ENOENT:
337 340 raise
338 341 i = ""
339 342 else:
340 343 try:
341 344 st = util.fstat(f)
342 345 except AttributeError, inst:
343 346 st = None
344 347 else:
345 348 oldst = self.indexstat
346 349 if (oldst and st.st_dev == oldst.st_dev
347 350 and st.st_ino == oldst.st_ino
348 351 and st.st_mtime == oldst.st_mtime
349 352 and st.st_ctime == oldst.st_ctime):
350 353 return
351 354 self.indexstat = st
352 355 if len(i) > 0:
353 356 v = struct.unpack(versionformat, i)[0]
354 357 flags = v & ~0xFFFF
355 358 fmt = v & 0xFFFF
356 359 if fmt == REVLOGV0:
357 360 if flags:
358 361 raise RevlogError(_("index %s unknown flags %#04x for format v0")
359 362 % (self.indexfile, flags >> 16))
360 363 elif fmt == REVLOGNG:
361 364 if flags & ~REVLOGNGINLINEDATA:
362 365 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
363 366 % (self.indexfile, flags >> 16))
364 367 else:
365 368 raise RevlogError(_("index %s unknown format %d")
366 369 % (self.indexfile, fmt))
367 370 self.version = v
368 371 if v == REVLOGV0:
369 372 self.indexformat = indexformatv0
370 373 shaoffset = v0shaoffset
371 374 else:
372 375 self.indexformat = indexformatng
373 376 shaoffset = ngshaoffset
374 377
375 378 if i:
376 379 if (lazyparser.safe_to_use and not self.inlinedata() and
377 380 st and st.st_size > 10000):
378 381 # big index, let's parse it on demand
379 382 parser = lazyparser(f, st.st_size, self.indexformat, shaoffset)
380 383 self.index = lazyindex(parser)
381 384 self.nodemap = lazymap(parser)
382 385 else:
383 386 self.parseindex(f, st)
384 387 if self.version != REVLOGV0:
385 388 e = list(self.index[0])
386 389 type = self.ngtype(e[0])
387 390 e[0] = self.offset_type(0, type)
388 391 self.index[0] = e
389 392 else:
390 393 self.nodemap = {nullid: nullrev}
391 394 self.index = []
392 395
393 396
394 397 def parseindex(self, fp, st):
395 398 s = struct.calcsize(self.indexformat)
396 399 self.index = []
397 400 self.nodemap = {nullid: nullrev}
398 401 inline = self.inlinedata()
399 402 n = 0
400 403 leftover = None
401 404 while True:
402 405 if st:
403 406 data = fp.read(65536)
404 407 else:
405 408 # hack for httprangereader, it doesn't do partial reads well
406 409 data = fp.read()
407 410 if not data:
408 411 break
409 412 if n == 0 and self.inlinedata():
410 413 # cache the first chunk
411 414 self.chunkcache = (0, data)
412 415 if leftover:
413 416 data = leftover + data
414 417 leftover = None
415 418 off = 0
416 419 l = len(data)
417 420 while off < l:
418 421 if l - off < s:
419 422 leftover = data[off:]
420 423 break
421 424 cur = data[off:off + s]
422 425 off += s
423 426 e = struct.unpack(self.indexformat, cur)
424 427 self.index.append(e)
425 428 self.nodemap[e[-1]] = n
426 429 n += 1
427 430 if inline:
428 431 off += e[1]
429 432 if off > l:
430 433 # some things don't seek well, just read it
431 434 fp.read(off - l)
432 435 if not st:
433 436 break
434 437
435 438
436 439 def ngoffset(self, q):
437 440 if q & 0xFFFF:
438 441 raise RevlogError(_('%s: incompatible revision flag %x') %
439 442 (self.indexfile, q))
440 443 return long(q >> 16)
441 444
442 445 def ngtype(self, q):
443 446 return int(q & 0xFFFF)
444 447
445 448 def offset_type(self, offset, type):
446 449 return long(long(offset) << 16 | type)
447 450
448 451 def loadindex(self, start, end):
449 452 """load a block of indexes all at once from the lazy parser"""
450 453 if isinstance(self.index, lazyindex):
451 454 self.index.p.loadindex(start, end)
452 455
453 456 def loadindexmap(self):
454 457 """loads both the map and the index from the lazy parser"""
455 458 if isinstance(self.index, lazyindex):
456 459 p = self.index.p
457 460 p.loadindex()
458 461 self.nodemap = p.map
459 462
460 463 def loadmap(self):
461 464 """loads the map from the lazy parser"""
462 465 if isinstance(self.nodemap, lazymap):
463 466 self.nodemap.p.loadmap()
464 467 self.nodemap = self.nodemap.p.map
465 468
466 469 def inlinedata(self): return self.version & REVLOGNGINLINEDATA
467 470 def tip(self): return self.node(len(self.index) - 1)
468 471 def count(self): return len(self.index)
469 472 def node(self, rev):
470 473 return rev == nullrev and nullid or self.index[rev][-1]
471 474 def rev(self, node):
472 475 try:
473 476 return self.nodemap[node]
474 477 except KeyError:
475 478 raise LookupError(_('%s: no node %s') % (self.indexfile, hex(node)))
476 479 def linkrev(self, node):
477 480 return (node == nullid) and nullrev or self.index[self.rev(node)][-4]
478 481 def parents(self, node):
479 482 if node == nullid: return (nullid, nullid)
480 483 r = self.rev(node)
481 484 d = self.index[r][-3:-1]
482 485 if self.version == REVLOGV0:
483 486 return d
484 487 return (self.node(d[0]), self.node(d[1]))
485 488 def parentrevs(self, rev):
486 489 if rev == nullrev:
487 490 return (nullrev, nullrev)
488 491 d = self.index[rev][-3:-1]
489 492 if self.version == REVLOGV0:
490 493 return (self.rev(d[0]), self.rev(d[1]))
491 494 return d
492 495 def start(self, rev):
493 496 if rev == nullrev:
494 497 return 0
495 498 if self.version != REVLOGV0:
496 499 return self.ngoffset(self.index[rev][0])
497 500 return self.index[rev][0]
498 501
499 502 def end(self, rev): return self.start(rev) + self.length(rev)
500 503
501 504 def size(self, rev):
502 505 """return the length of the uncompressed text for a given revision"""
503 506 if rev == nullrev:
504 507 return 0
505 508 l = -1
506 509 if self.version != REVLOGV0:
507 510 l = self.index[rev][2]
508 511 if l >= 0:
509 512 return l
510 513
511 514 t = self.revision(self.node(rev))
512 515 return len(t)
513 516
514 517 # alternate implementation, The advantage to this code is it
515 518 # will be faster for a single revision. But, the results are not
516 519 # cached, so finding the size of every revision will be slower.
517 520 """
518 521 if self.cache and self.cache[1] == rev:
519 522 return len(self.cache[2])
520 523
521 524 base = self.base(rev)
522 525 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
523 526 base = self.cache[1]
524 527 text = self.cache[2]
525 528 else:
526 529 text = self.revision(self.node(base))
527 530
528 531 l = len(text)
529 532 for x in xrange(base + 1, rev + 1):
530 533 l = mdiff.patchedsize(l, self.chunk(x))
531 534 return l
532 535 """
533 536
534 537 def length(self, rev):
535 538 if rev == nullrev:
536 539 return 0
537 540 else:
538 541 return self.index[rev][1]
539 542 def base(self, rev):
540 543 if (rev == nullrev):
541 544 return nullrev
542 545 else:
543 546 return self.index[rev][-5]
544 547
545 548 def reachable(self, node, stop=None):
546 549 """return a hash of all nodes ancestral to a given node, including
547 550 the node itself, stopping when stop is matched"""
548 551 reachable = {}
549 552 visit = [node]
550 553 reachable[node] = 1
551 554 if stop:
552 555 stopn = self.rev(stop)
553 556 else:
554 557 stopn = 0
555 558 while visit:
556 559 n = visit.pop(0)
557 560 if n == stop:
558 561 continue
559 562 if n == nullid:
560 563 continue
561 564 for p in self.parents(n):
562 565 if self.rev(p) < stopn:
563 566 continue
564 567 if p not in reachable:
565 568 reachable[p] = 1
566 569 visit.append(p)
567 570 return reachable
568 571
569 572 def nodesbetween(self, roots=None, heads=None):
570 573 """Return a tuple containing three elements. Elements 1 and 2 contain
571 574 a final list bases and heads after all the unreachable ones have been
572 575 pruned. Element 0 contains a topologically sorted list of all
573 576
574 577 nodes that satisfy these constraints:
575 578 1. All nodes must be descended from a node in roots (the nodes on
576 579 roots are considered descended from themselves).
577 580 2. All nodes must also be ancestors of a node in heads (the nodes in
578 581 heads are considered to be their own ancestors).
579 582
580 583 If roots is unspecified, nullid is assumed as the only root.
581 584 If heads is unspecified, it is taken to be the output of the
582 585 heads method (i.e. a list of all nodes in the repository that
583 586 have no children)."""
584 587 nonodes = ([], [], [])
585 588 if roots is not None:
586 589 roots = list(roots)
587 590 if not roots:
588 591 return nonodes
589 592 lowestrev = min([self.rev(n) for n in roots])
590 593 else:
591 594 roots = [nullid] # Everybody's a descendent of nullid
592 595 lowestrev = nullrev
593 596 if (lowestrev == nullrev) and (heads is None):
594 597 # We want _all_ the nodes!
595 598 return ([self.node(r) for r in xrange(0, self.count())],
596 599 [nullid], list(self.heads()))
597 600 if heads is None:
598 601 # All nodes are ancestors, so the latest ancestor is the last
599 602 # node.
600 603 highestrev = self.count() - 1
601 604 # Set ancestors to None to signal that every node is an ancestor.
602 605 ancestors = None
603 606 # Set heads to an empty dictionary for later discovery of heads
604 607 heads = {}
605 608 else:
606 609 heads = list(heads)
607 610 if not heads:
608 611 return nonodes
609 612 ancestors = {}
610 613 # Turn heads into a dictionary so we can remove 'fake' heads.
611 614 # Also, later we will be using it to filter out the heads we can't
612 615 # find from roots.
613 616 heads = dict.fromkeys(heads, 0)
614 617 # Start at the top and keep marking parents until we're done.
615 618 nodestotag = heads.keys()
616 619 # Remember where the top was so we can use it as a limit later.
617 620 highestrev = max([self.rev(n) for n in nodestotag])
618 621 while nodestotag:
619 622 # grab a node to tag
620 623 n = nodestotag.pop()
621 624 # Never tag nullid
622 625 if n == nullid:
623 626 continue
624 627 # A node's revision number represents its place in a
625 628 # topologically sorted list of nodes.
626 629 r = self.rev(n)
627 630 if r >= lowestrev:
628 631 if n not in ancestors:
629 632 # If we are possibly a descendent of one of the roots
630 633 # and we haven't already been marked as an ancestor
631 634 ancestors[n] = 1 # Mark as ancestor
632 635 # Add non-nullid parents to list of nodes to tag.
633 636 nodestotag.extend([p for p in self.parents(n) if
634 637 p != nullid])
635 638 elif n in heads: # We've seen it before, is it a fake head?
636 639 # So it is, real heads should not be the ancestors of
637 640 # any other heads.
638 641 heads.pop(n)
639 642 if not ancestors:
640 643 return nonodes
641 644 # Now that we have our set of ancestors, we want to remove any
642 645 # roots that are not ancestors.
643 646
644 647 # If one of the roots was nullid, everything is included anyway.
645 648 if lowestrev > nullrev:
646 649 # But, since we weren't, let's recompute the lowest rev to not
647 650 # include roots that aren't ancestors.
648 651
649 652 # Filter out roots that aren't ancestors of heads
650 653 roots = [n for n in roots if n in ancestors]
651 654 # Recompute the lowest revision
652 655 if roots:
653 656 lowestrev = min([self.rev(n) for n in roots])
654 657 else:
655 658 # No more roots? Return empty list
656 659 return nonodes
657 660 else:
658 661 # We are descending from nullid, and don't need to care about
659 662 # any other roots.
660 663 lowestrev = nullrev
661 664 roots = [nullid]
662 665 # Transform our roots list into a 'set' (i.e. a dictionary where the
663 666 # values don't matter.
664 667 descendents = dict.fromkeys(roots, 1)
665 668 # Also, keep the original roots so we can filter out roots that aren't
666 669 # 'real' roots (i.e. are descended from other roots).
667 670 roots = descendents.copy()
668 671 # Our topologically sorted list of output nodes.
669 672 orderedout = []
670 673 # Don't start at nullid since we don't want nullid in our output list,
671 674 # and if nullid shows up in descedents, empty parents will look like
672 675 # they're descendents.
673 676 for r in xrange(max(lowestrev, 0), highestrev + 1):
674 677 n = self.node(r)
675 678 isdescendent = False
676 679 if lowestrev == nullrev: # Everybody is a descendent of nullid
677 680 isdescendent = True
678 681 elif n in descendents:
679 682 # n is already a descendent
680 683 isdescendent = True
681 684 # This check only needs to be done here because all the roots
682 685 # will start being marked is descendents before the loop.
683 686 if n in roots:
684 687 # If n was a root, check if it's a 'real' root.
685 688 p = tuple(self.parents(n))
686 689 # If any of its parents are descendents, it's not a root.
687 690 if (p[0] in descendents) or (p[1] in descendents):
688 691 roots.pop(n)
689 692 else:
690 693 p = tuple(self.parents(n))
691 694 # A node is a descendent if either of its parents are
692 695 # descendents. (We seeded the dependents list with the roots
693 696 # up there, remember?)
694 697 if (p[0] in descendents) or (p[1] in descendents):
695 698 descendents[n] = 1
696 699 isdescendent = True
697 700 if isdescendent and ((ancestors is None) or (n in ancestors)):
698 701 # Only include nodes that are both descendents and ancestors.
699 702 orderedout.append(n)
700 703 if (ancestors is not None) and (n in heads):
701 704 # We're trying to figure out which heads are reachable
702 705 # from roots.
703 706 # Mark this head as having been reached
704 707 heads[n] = 1
705 708 elif ancestors is None:
706 709 # Otherwise, we're trying to discover the heads.
707 710 # Assume this is a head because if it isn't, the next step
708 711 # will eventually remove it.
709 712 heads[n] = 1
710 713 # But, obviously its parents aren't.
711 714 for p in self.parents(n):
712 715 heads.pop(p, None)
713 716 heads = [n for n in heads.iterkeys() if heads[n] != 0]
714 717 roots = roots.keys()
715 718 assert orderedout
716 719 assert roots
717 720 assert heads
718 721 return (orderedout, roots, heads)
719 722
720 723 def heads(self, start=None, stop=None):
721 724 """return the list of all nodes that have no children
722 725
723 726 if start is specified, only heads that are descendants of
724 727 start will be returned
725 728 if stop is specified, it will consider all the revs from stop
726 729 as if they had no children
727 730 """
728 731 if start is None:
729 732 start = nullid
730 733 if stop is None:
731 734 stop = []
732 735 stoprevs = dict.fromkeys([self.rev(n) for n in stop])
733 736 startrev = self.rev(start)
734 737 reachable = {startrev: 1}
735 738 heads = {startrev: 1}
736 739
737 740 parentrevs = self.parentrevs
738 741 for r in xrange(startrev + 1, self.count()):
739 742 for p in parentrevs(r):
740 743 if p in reachable:
741 744 if r not in stoprevs:
742 745 reachable[r] = 1
743 746 heads[r] = 1
744 747 if p in heads and p not in stoprevs:
745 748 del heads[p]
746 749
747 750 return [self.node(r) for r in heads]
748 751
749 752 def children(self, node):
750 753 """find the children of a given node"""
751 754 c = []
752 755 p = self.rev(node)
753 756 for r in range(p + 1, self.count()):
754 757 for pr in self.parentrevs(r):
755 758 if pr == p:
756 759 c.append(self.node(r))
757 760 return c
758 761
759 762 def _match(self, id):
760 763 if isinstance(id, (long, int)):
761 764 # rev
762 765 return self.node(id)
763 766 if len(id) == 20:
764 767 # possibly a binary node
765 768 # odds of a binary node being all hex in ASCII are 1 in 10**25
766 769 try:
767 770 node = id
768 771 r = self.rev(node) # quick search the index
769 772 return node
770 773 except LookupError:
771 774 pass # may be partial hex id
772 775 try:
773 776 # str(rev)
774 777 rev = int(id)
775 778 if str(rev) != id: raise ValueError
776 779 if rev < 0: rev = self.count() + rev
777 780 if rev < 0 or rev >= self.count(): raise ValueError
778 781 return self.node(rev)
779 782 except (ValueError, OverflowError):
780 783 pass
781 784 if len(id) == 40:
782 785 try:
783 786 # a full hex nodeid?
784 787 node = bin(id)
785 788 r = self.rev(node)
786 789 return node
787 790 except TypeError:
788 791 pass
789 792
790 793 def _partialmatch(self, id):
791 794 if len(id) < 40:
792 795 try:
793 796 # hex(node)[:...]
794 797 bin_id = bin(id[:len(id) & ~1]) # grab an even number of digits
795 798 node = None
796 799 for n in self.nodemap:
797 800 if n.startswith(bin_id) and hex(n).startswith(id):
798 801 if node is not None:
799 802 raise LookupError(_("Ambiguous identifier"))
800 803 node = n
801 804 if node is not None:
802 805 return node
803 806 except TypeError:
804 807 pass
805 808
806 809 def lookup(self, id):
807 810 """locate a node based on:
808 811 - revision number or str(revision number)
809 812 - nodeid or subset of hex nodeid
810 813 """
811 814
812 815 n = self._match(id)
813 816 if n is not None:
814 817 return n
815 818 n = self._partialmatch(id)
816 819 if n:
817 820 return n
818 821
819 822 raise LookupError(_("No match found"))
820 823
821 824 def cmp(self, node, text):
822 825 """compare text with a given file revision"""
823 826 p1, p2 = self.parents(node)
824 827 return hash(text, p1, p2) != node
825 828
826 829 def makenode(self, node, text):
827 830 """calculate a file nodeid for text, descended or possibly
828 831 unchanged from node"""
829 832
830 833 if self.cmp(node, text):
831 834 return hash(text, node, nullid)
832 835 return node
833 836
834 837 def diff(self, a, b):
835 838 """return a delta between two revisions"""
836 839 return mdiff.textdiff(a, b)
837 840
838 841 def patches(self, t, pl):
839 842 """apply a list of patches to a string"""
840 843 return mdiff.patches(t, pl)
841 844
842 845 def chunk(self, rev, df=None, cachelen=4096):
843 846 start, length = self.start(rev), self.length(rev)
844 847 inline = self.inlinedata()
845 848 if inline:
846 849 start += (rev + 1) * struct.calcsize(self.indexformat)
847 850 end = start + length
848 851 def loadcache(df):
849 852 cache_length = max(cachelen, length) # 4k
850 853 if not df:
851 854 if inline:
852 855 df = self.opener(self.indexfile)
853 856 else:
854 857 df = self.opener(self.datafile)
855 858 df.seek(start)
856 859 self.chunkcache = (start, df.read(cache_length))
857 860
858 861 if not self.chunkcache:
859 862 loadcache(df)
860 863
861 864 cache_start = self.chunkcache[0]
862 865 cache_end = cache_start + len(self.chunkcache[1])
863 866 if start >= cache_start and end <= cache_end:
864 867 # it is cached
865 868 offset = start - cache_start
866 869 else:
867 870 loadcache(df)
868 871 offset = 0
869 872
870 873 #def checkchunk():
871 874 # df = self.opener(self.datafile)
872 875 # df.seek(start)
873 876 # return df.read(length)
874 877 #assert s == checkchunk()
875 878 return decompress(self.chunkcache[1][offset:offset + length])
876 879
877 880 def delta(self, node):
878 881 """return or calculate a delta between a node and its predecessor"""
879 882 r = self.rev(node)
880 883 return self.revdiff(r - 1, r)
881 884
882 885 def revdiff(self, rev1, rev2):
883 886 """return or calculate a delta between two revisions"""
884 887 b1 = self.base(rev1)
885 888 b2 = self.base(rev2)
886 889 if b1 == b2 and rev1 + 1 == rev2:
887 890 return self.chunk(rev2)
888 891 else:
889 892 return self.diff(self.revision(self.node(rev1)),
890 893 self.revision(self.node(rev2)))
891 894
892 895 def revision(self, node):
893 896 """return an uncompressed revision of a given"""
894 897 if node == nullid: return ""
895 898 if self.cache and self.cache[0] == node: return self.cache[2]
896 899
897 900 # look up what we need to read
898 901 text = None
899 902 rev = self.rev(node)
900 903 base = self.base(rev)
901 904
902 905 if self.inlinedata():
903 906 # we probably have the whole chunk cached
904 907 df = None
905 908 else:
906 909 df = self.opener(self.datafile)
907 910
908 911 # do we have useful data cached?
909 912 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
910 913 base = self.cache[1]
911 914 text = self.cache[2]
912 915 self.loadindex(base, rev + 1)
913 916 else:
914 917 self.loadindex(base, rev + 1)
915 918 text = self.chunk(base, df=df)
916 919
917 920 bins = []
918 921 for r in xrange(base + 1, rev + 1):
919 922 bins.append(self.chunk(r, df=df))
920 923
921 924 text = self.patches(text, bins)
922 925
923 926 p1, p2 = self.parents(node)
924 927 if node != hash(text, p1, p2):
925 928 raise RevlogError(_("integrity check failed on %s:%d")
926 929 % (self.datafile, rev))
927 930
928 931 self.cache = (node, rev, text)
929 932 return text
930 933
931 934 def checkinlinesize(self, tr, fp=None):
932 935 if not self.inlinedata():
933 936 return
934 937 if not fp:
935 938 fp = self.opener(self.indexfile, 'r')
936 939 fp.seek(0, 2)
937 940 size = fp.tell()
938 941 if size < 131072:
939 942 return
940 943 trinfo = tr.find(self.indexfile)
941 944 if trinfo == None:
942 945 raise RevlogError(_("%s not found in the transaction")
943 946 % self.indexfile)
944 947
945 948 trindex = trinfo[2]
946 949 dataoff = self.start(trindex)
947 950
948 951 tr.add(self.datafile, dataoff)
949 952 df = self.opener(self.datafile, 'w')
950 953 calc = struct.calcsize(self.indexformat)
951 954 for r in xrange(self.count()):
952 955 start = self.start(r) + (r + 1) * calc
953 956 length = self.length(r)
954 957 fp.seek(start)
955 958 d = fp.read(length)
956 959 df.write(d)
957 960 fp.close()
958 961 df.close()
959 962 fp = self.opener(self.indexfile, 'w', atomictemp=True)
960 963 self.version &= ~(REVLOGNGINLINEDATA)
961 964 if self.count():
962 965 x = self.index[0]
963 966 e = struct.pack(self.indexformat, *x)[4:]
964 967 l = struct.pack(versionformat, self.version)
965 968 fp.write(l)
966 969 fp.write(e)
967 970
968 971 for i in xrange(1, self.count()):
969 972 x = self.index[i]
970 973 e = struct.pack(self.indexformat, *x)
971 974 fp.write(e)
972 975
973 976 # if we don't call rename, the temp file will never replace the
974 977 # real index
975 978 fp.rename()
976 979
977 980 tr.replace(self.indexfile, trindex * calc)
978 981 self.chunkcache = None
979 982
980 983 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
981 984 """add a revision to the log
982 985
983 986 text - the revision data to add
984 987 transaction - the transaction object used for rollback
985 988 link - the linkrev data to add
986 989 p1, p2 - the parent nodeids of the revision
987 990 d - an optional precomputed delta
988 991 """
989 992 if not self.inlinedata():
990 993 dfh = self.opener(self.datafile, "a")
991 994 else:
992 995 dfh = None
993 996 ifh = self.opener(self.indexfile, "a+")
994 997 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
995 998
996 999 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
997 1000 if text is None: text = ""
998 1001 if p1 is None: p1 = self.tip()
999 1002 if p2 is None: p2 = nullid
1000 1003
1001 1004 node = hash(text, p1, p2)
1002 1005
1003 1006 if node in self.nodemap:
1004 1007 return node
1005 1008
1006 1009 n = self.count()
1007 1010 t = n - 1
1008 1011
1009 1012 if n:
1010 1013 base = self.base(t)
1011 1014 start = self.start(base)
1012 1015 end = self.end(t)
1013 1016 if not d:
1014 1017 prev = self.revision(self.tip())
1015 1018 d = self.diff(prev, text)
1016 1019 data = compress(d)
1017 1020 l = len(data[1]) + len(data[0])
1018 1021 dist = end - start + l
1019 1022
1020 1023 # full versions are inserted when the needed deltas
1021 1024 # become comparable to the uncompressed text
1022 1025 if not n or dist > len(text) * 2:
1023 1026 data = compress(text)
1024 1027 l = len(data[1]) + len(data[0])
1025 1028 base = n
1026 1029 else:
1027 1030 base = self.base(t)
1028 1031
1029 1032 offset = 0
1030 1033 if t >= 0:
1031 1034 offset = self.end(t)
1032 1035
1033 1036 if self.version == REVLOGV0:
1034 1037 e = (offset, l, base, link, p1, p2, node)
1035 1038 else:
1036 1039 e = (self.offset_type(offset, 0), l, len(text),
1037 1040 base, link, self.rev(p1), self.rev(p2), node)
1038 1041
1039 1042 self.index.append(e)
1040 1043 self.nodemap[node] = n
1041 1044 entry = struct.pack(self.indexformat, *e)
1042 1045
1043 1046 if not self.inlinedata():
1044 1047 transaction.add(self.datafile, offset)
1045 1048 transaction.add(self.indexfile, n * len(entry))
1046 1049 if data[0]:
1047 1050 dfh.write(data[0])
1048 1051 dfh.write(data[1])
1049 1052 dfh.flush()
1050 1053 else:
1051 1054 ifh.seek(0, 2)
1052 1055 transaction.add(self.indexfile, ifh.tell(), self.count() - 1)
1053 1056
1054 1057 if len(self.index) == 1 and self.version != REVLOGV0:
1055 1058 l = struct.pack(versionformat, self.version)
1056 1059 ifh.write(l)
1057 1060 entry = entry[4:]
1058 1061
1059 1062 ifh.write(entry)
1060 1063
1061 1064 if self.inlinedata():
1062 1065 ifh.write(data[0])
1063 1066 ifh.write(data[1])
1064 1067 self.checkinlinesize(transaction, ifh)
1065 1068
1066 1069 self.cache = (node, n, text)
1067 1070 return node
1068 1071
1069 1072 def ancestor(self, a, b):
1070 1073 """calculate the least common ancestor of nodes a and b"""
1071 1074
1072 1075 def parents(rev):
1073 1076 return [p for p in self.parentrevs(rev) if p != nullrev]
1074 1077
1075 1078 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1076 1079 if c is None:
1077 1080 return nullid
1078 1081
1079 1082 return self.node(c)
1080 1083
1081 1084 def group(self, nodelist, lookup, infocollect=None):
1082 1085 """calculate a delta group
1083 1086
1084 1087 Given a list of changeset revs, return a set of deltas and
1085 1088 metadata corresponding to nodes. the first delta is
1086 1089 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1087 1090 have this parent as it has all history before these
1088 1091 changesets. parent is parent[0]
1089 1092 """
1090 1093 revs = [self.rev(n) for n in nodelist]
1091 1094
1092 1095 # if we don't have any revisions touched by these changesets, bail
1093 1096 if not revs:
1094 1097 yield changegroup.closechunk()
1095 1098 return
1096 1099
1097 1100 # add the parent of the first rev
1098 1101 p = self.parents(self.node(revs[0]))[0]
1099 1102 revs.insert(0, self.rev(p))
1100 1103
1101 1104 # build deltas
1102 1105 for d in xrange(0, len(revs) - 1):
1103 1106 a, b = revs[d], revs[d + 1]
1104 1107 nb = self.node(b)
1105 1108
1106 1109 if infocollect is not None:
1107 1110 infocollect(nb)
1108 1111
1109 1112 d = self.revdiff(a, b)
1110 1113 p = self.parents(nb)
1111 1114 meta = nb + p[0] + p[1] + lookup(nb)
1112 1115 yield changegroup.genchunk("%s%s" % (meta, d))
1113 1116
1114 1117 yield changegroup.closechunk()
1115 1118
1116 1119 def addgroup(self, revs, linkmapper, transaction, unique=0):
1117 1120 """
1118 1121 add a delta group
1119 1122
1120 1123 given a set of deltas, add them to the revision log. the
1121 1124 first delta is against its parent, which should be in our
1122 1125 log, the rest are against the previous delta.
1123 1126 """
1124 1127
1125 1128 #track the base of the current delta log
1126 1129 r = self.count()
1127 1130 t = r - 1
1128 1131 node = None
1129 1132
1130 1133 base = prev = nullrev
1131 1134 start = end = textlen = 0
1132 1135 if r:
1133 1136 end = self.end(t)
1134 1137
1135 1138 ifh = self.opener(self.indexfile, "a+")
1136 1139 ifh.seek(0, 2)
1137 1140 transaction.add(self.indexfile, ifh.tell(), self.count())
1138 1141 if self.inlinedata():
1139 1142 dfh = None
1140 1143 else:
1141 1144 transaction.add(self.datafile, end)
1142 1145 dfh = self.opener(self.datafile, "a")
1143 1146
1144 1147 # loop through our set of deltas
1145 1148 chain = None
1146 1149 for chunk in revs:
1147 1150 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1148 1151 link = linkmapper(cs)
1149 1152 if node in self.nodemap:
1150 1153 # this can happen if two branches make the same change
1151 1154 # if unique:
1152 1155 # raise RevlogError(_("already have %s") % hex(node[:4]))
1153 1156 chain = node
1154 1157 continue
1155 1158 delta = chunk[80:]
1156 1159
1157 1160 for p in (p1, p2):
1158 1161 if not p in self.nodemap:
1159 1162 raise LookupError(_("unknown parent %s") % short(p))
1160 1163
1161 1164 if not chain:
1162 1165 # retrieve the parent revision of the delta chain
1163 1166 chain = p1
1164 1167 if not chain in self.nodemap:
1165 1168 raise LookupError(_("unknown base %s") % short(chain[:4]))
1166 1169
1167 1170 # full versions are inserted when the needed deltas become
1168 1171 # comparable to the uncompressed text or when the previous
1169 1172 # version is not the one we have a delta against. We use
1170 1173 # the size of the previous full rev as a proxy for the
1171 1174 # current size.
1172 1175
1173 1176 if chain == prev:
1174 1177 tempd = compress(delta)
1175 1178 cdelta = tempd[0] + tempd[1]
1176 1179 textlen = mdiff.patchedsize(textlen, delta)
1177 1180
1178 1181 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
1179 1182 # flush our writes here so we can read it in revision
1180 1183 if dfh:
1181 1184 dfh.flush()
1182 1185 ifh.flush()
1183 1186 text = self.revision(chain)
1184 1187 text = self.patches(text, [delta])
1185 1188 chk = self._addrevision(text, transaction, link, p1, p2, None,
1186 1189 ifh, dfh)
1187 1190 if not dfh and not self.inlinedata():
1188 1191 # addrevision switched from inline to conventional
1189 1192 # reopen the index
1190 1193 dfh = self.opener(self.datafile, "a")
1191 1194 ifh = self.opener(self.indexfile, "a")
1192 1195 if chk != node:
1193 1196 raise RevlogError(_("consistency error adding group"))
1194 1197 textlen = len(text)
1195 1198 else:
1196 1199 if self.version == REVLOGV0:
1197 1200 e = (end, len(cdelta), base, link, p1, p2, node)
1198 1201 else:
1199 1202 e = (self.offset_type(end, 0), len(cdelta), textlen, base,
1200 1203 link, self.rev(p1), self.rev(p2), node)
1201 1204 self.index.append(e)
1202 1205 self.nodemap[node] = r
1203 1206 if self.inlinedata():
1204 1207 ifh.write(struct.pack(self.indexformat, *e))
1205 1208 ifh.write(cdelta)
1206 1209 self.checkinlinesize(transaction, ifh)
1207 1210 if not self.inlinedata():
1208 1211 dfh = self.opener(self.datafile, "a")
1209 1212 ifh = self.opener(self.indexfile, "a")
1210 1213 else:
1211 1214 dfh.write(cdelta)
1212 1215 ifh.write(struct.pack(self.indexformat, *e))
1213 1216
1214 1217 t, r, chain, prev = r, r + 1, node, node
1215 1218 base = self.base(t)
1216 1219 start = self.start(base)
1217 1220 end = self.end(t)
1218 1221
1219 1222 return node
1220 1223
1221 1224 def strip(self, rev, minlink):
1222 1225 if self.count() == 0 or rev >= self.count():
1223 1226 return
1224 1227
1225 1228 if isinstance(self.index, lazyindex):
1226 1229 self.loadindexmap()
1227 1230
1228 1231 # When stripping away a revision, we need to make sure it
1229 1232 # does not actually belong to an older changeset.
1230 1233 # The minlink parameter defines the oldest revision
1231 1234 # we're allowed to strip away.
1232 1235 while minlink > self.index[rev][-4]:
1233 1236 rev += 1
1234 1237 if rev >= self.count():
1235 1238 return
1236 1239
1237 1240 # first truncate the files on disk
1238 1241 end = self.start(rev)
1239 1242 if not self.inlinedata():
1240 1243 df = self.opener(self.datafile, "a")
1241 1244 df.truncate(end)
1242 1245 end = rev * struct.calcsize(self.indexformat)
1243 1246 else:
1244 1247 end += rev * struct.calcsize(self.indexformat)
1245 1248
1246 1249 indexf = self.opener(self.indexfile, "a")
1247 1250 indexf.truncate(end)
1248 1251
1249 1252 # then reset internal state in memory to forget those revisions
1250 1253 self.cache = None
1251 1254 self.chunkcache = None
1252 1255 for x in xrange(rev, self.count()):
1253 1256 del self.nodemap[self.node(x)]
1254 1257
1255 1258 del self.index[rev:]
1256 1259
1257 1260 def checksize(self):
1258 1261 expected = 0
1259 1262 if self.count():
1260 1263 expected = self.end(self.count() - 1)
1261 1264
1262 1265 try:
1263 1266 f = self.opener(self.datafile)
1264 1267 f.seek(0, 2)
1265 1268 actual = f.tell()
1266 1269 dd = actual - expected
1267 1270 except IOError, inst:
1268 1271 if inst.errno != errno.ENOENT:
1269 1272 raise
1270 1273 dd = 0
1271 1274
1272 1275 try:
1273 1276 f = self.opener(self.indexfile)
1274 1277 f.seek(0, 2)
1275 1278 actual = f.tell()
1276 1279 s = struct.calcsize(self.indexformat)
1277 1280 i = actual / s
1278 1281 di = actual - (i * s)
1279 1282 if self.inlinedata():
1280 1283 databytes = 0
1281 1284 for r in xrange(self.count()):
1282 1285 databytes += self.length(r)
1283 1286 dd = 0
1284 1287 di = actual - self.count() * s - databytes
1285 1288 except IOError, inst:
1286 1289 if inst.errno != errno.ENOENT:
1287 1290 raise
1288 1291 di = 0
1289 1292
1290 1293 return (dd, di)
1291 1294
1292 1295
General Comments 0
You need to be logged in to leave comments. Login now