##// END OF EJS Templates
lazyparser.findnode: fix typo and s/rfind/find/...
Matt Mackall -
r4994:d36310dd default
parent child Browse files
Show More
@@ -1,1261 +1,1261 b''
1 1 """
2 2 revlog.py - storage back-end for mercurial
3 3
4 4 This provides efficient delta storage with O(1) retrieve and append
5 5 and O(changes) merge between branches
6 6
7 7 Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
8 8
9 9 This software may be used and distributed according to the terms
10 10 of the GNU General Public License, incorporated herein by reference.
11 11 """
12 12
13 13 from node import *
14 14 from i18n import _
15 15 import binascii, changegroup, errno, ancestor, mdiff, os
16 16 import sha, struct, util, zlib
17 17
18 18 # revlog flags
19 19 REVLOGV0 = 0
20 20 REVLOGNG = 1
21 21 REVLOGNGINLINEDATA = (1 << 16)
22 22 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
23 23 REVLOG_DEFAULT_FORMAT = REVLOGNG
24 24 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
25 25
26 26 class RevlogError(Exception):
27 27 pass
28 28 class LookupError(RevlogError):
29 29 pass
30 30
31 31 def getoffset(q):
32 32 if q & 0xFFFF:
33 33 raise RevlogError(_('incompatible revision flag %x') % q)
34 34 return int(q >> 16)
35 35
36 36 def gettype(q):
37 37 return int(q & 0xFFFF)
38 38
39 39 def offset_type(offset, type):
40 40 return long(long(offset) << 16 | type)
41 41
42 42 def hash(text, p1, p2):
43 43 """generate a hash from the given text and its parent hashes
44 44
45 45 This hash combines both the current file contents and its history
46 46 in a manner that makes it easy to distinguish nodes with the same
47 47 content in the revision graph.
48 48 """
49 49 l = [p1, p2]
50 50 l.sort()
51 51 s = sha.new(l[0])
52 52 s.update(l[1])
53 53 s.update(text)
54 54 return s.digest()
55 55
56 56 def compress(text):
57 57 """ generate a possibly-compressed representation of text """
58 58 if not text:
59 59 return ("", text)
60 60 if len(text) < 44:
61 61 if text[0] == '\0':
62 62 return ("", text)
63 63 return ('u', text)
64 64 bin = zlib.compress(text)
65 65 if len(bin) > len(text):
66 66 if text[0] == '\0':
67 67 return ("", text)
68 68 return ('u', text)
69 69 return ("", bin)
70 70
71 71 def decompress(bin):
72 72 """ decompress the given input """
73 73 if not bin:
74 74 return bin
75 75 t = bin[0]
76 76 if t == '\0':
77 77 return bin
78 78 if t == 'x':
79 79 return zlib.decompress(bin)
80 80 if t == 'u':
81 81 return bin[1:]
82 82 raise RevlogError(_("unknown compression type %r") % t)
83 83
84 84 class lazyparser(object):
85 85 """
86 86 this class avoids the need to parse the entirety of large indices
87 87 """
88 88
89 89 # lazyparser is not safe to use on windows if win32 extensions not
90 90 # available. it keeps file handle open, which make it not possible
91 91 # to break hardlinks on local cloned repos.
92 92 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
93 93 hasattr(util, 'win32api'))
94 94
95 95 def __init__(self, dataf, size):
96 96 self.dataf = dataf
97 97 self.s = struct.calcsize(indexformatng)
98 98 self.datasize = size
99 99 self.l = size/self.s
100 100 self.index = [None] * self.l
101 101 self.map = {nullid: nullrev}
102 102 self.allmap = 0
103 103 self.all = 0
104 104 self.mapfind_count = 0
105 105
106 106 def loadmap(self):
107 107 """
108 108 during a commit, we need to make sure the rev being added is
109 109 not a duplicate. This requires loading the entire index,
110 110 which is fairly slow. loadmap can load up just the node map,
111 111 which takes much less time.
112 112 """
113 113 if self.allmap:
114 114 return
115 115 end = self.datasize
116 116 self.allmap = 1
117 117 cur = 0
118 118 count = 0
119 119 blocksize = self.s * 256
120 120 self.dataf.seek(0)
121 121 while cur < end:
122 122 data = self.dataf.read(blocksize)
123 123 off = 0
124 124 for x in xrange(256):
125 125 n = data[off + ngshaoffset:off + ngshaoffset + 20]
126 126 self.map[n] = count
127 127 count += 1
128 128 if count >= self.l:
129 129 break
130 130 off += self.s
131 131 cur += blocksize
132 132
133 133 def loadblock(self, blockstart, blocksize, data=None):
134 134 if self.all:
135 135 return
136 136 if data is None:
137 137 self.dataf.seek(blockstart)
138 138 if blockstart + blocksize > self.datasize:
139 139 # the revlog may have grown since we've started running,
140 140 # but we don't have space in self.index for more entries.
141 141 # limit blocksize so that we don't get too much data.
142 142 blocksize = max(self.datasize - blockstart, 0)
143 143 data = self.dataf.read(blocksize)
144 144 lend = len(data) / self.s
145 145 i = blockstart / self.s
146 146 off = 0
147 147 # lazyindex supports __delitem__
148 148 if lend > len(self.index) - i:
149 149 lend = len(self.index) - i
150 150 for x in xrange(lend):
151 151 if self.index[i + x] == None:
152 152 b = data[off : off + self.s]
153 153 self.index[i + x] = b
154 154 n = b[ngshaoffset:ngshaoffset + 20]
155 155 self.map[n] = i + x
156 156 off += self.s
157 157
158 158 def findnode(self, node):
159 159 """search backwards through the index file for a specific node"""
160 160 if self.allmap:
161 161 return None
162 162
163 163 # hg log will cause many many searches for the manifest
164 164 # nodes. After we get called a few times, just load the whole
165 165 # thing.
166 166 if self.mapfind_count > 8:
167 167 self.loadmap()
168 168 if node in self.map:
169 169 return node
170 170 return None
171 171 self.mapfind_count += 1
172 172 last = self.l - 1
173 173 while self.index[last] != None:
174 174 if last == 0:
175 175 self.all = 1
176 176 self.allmap = 1
177 177 return None
178 178 last -= 1
179 179 end = (last + 1) * self.s
180 180 blocksize = self.s * 256
181 181 while end >= 0:
182 182 start = max(end - blocksize, 0)
183 183 self.dataf.seek(start)
184 184 data = self.dataf.read(end - start)
185 185 findend = end - start
186 186 while True:
187 # we're searching backwards, so weh have to make sure
187 # we're searching backwards, so we have to make sure
188 188 # we don't find a changeset where this node is a parent
189 off = data.rfind(node, 0, findend)
189 off = data.find(node, 0, findend)
190 190 findend = off
191 191 if off >= 0:
192 192 i = off / self.s
193 193 off = i * self.s
194 194 n = data[off + ngshaoffset:off + ngshaoffset + 20]
195 195 if n == node:
196 196 self.map[n] = i + start / self.s
197 197 return node
198 198 else:
199 199 break
200 200 end -= blocksize
201 201 return None
202 202
203 203 def loadindex(self, i=None, end=None):
204 204 if self.all:
205 205 return
206 206 all = False
207 207 if i == None:
208 208 blockstart = 0
209 209 blocksize = (65536 / self.s) * self.s
210 210 end = self.datasize
211 211 all = True
212 212 else:
213 213 if end:
214 214 blockstart = i * self.s
215 215 end = end * self.s
216 216 blocksize = end - blockstart
217 217 else:
218 218 blockstart = (i & ~1023) * self.s
219 219 blocksize = self.s * 1024
220 220 end = blockstart + blocksize
221 221 while blockstart < end:
222 222 self.loadblock(blockstart, blocksize)
223 223 blockstart += blocksize
224 224 if all:
225 225 self.all = True
226 226
227 227 class lazyindex(object):
228 228 """a lazy version of the index array"""
229 229 def __init__(self, parser):
230 230 self.p = parser
231 231 def __len__(self):
232 232 return len(self.p.index)
233 233 def load(self, pos):
234 234 if pos < 0:
235 235 pos += len(self.p.index)
236 236 self.p.loadindex(pos)
237 237 return self.p.index[pos]
238 238 def __getitem__(self, pos):
239 239 return struct.unpack(indexformatng,
240 240 self.p.index[pos] or self.load(pos))
241 241 def __setitem__(self, pos, item):
242 242 self.p.index[pos] = struct.pack(indexformatng, *item)
243 243 def __delitem__(self, pos):
244 244 del self.p.index[pos]
245 245 def insert(self, pos, e):
246 246 self.p.index.insert(pos, struct.pack(indexformatng, *e))
247 247 def append(self, e):
248 248 self.p.index.append(struct.pack(indexformatng, *e))
249 249
250 250 class lazymap(object):
251 251 """a lazy version of the node map"""
252 252 def __init__(self, parser):
253 253 self.p = parser
254 254 def load(self, key):
255 255 n = self.p.findnode(key)
256 256 if n == None:
257 257 raise KeyError(key)
258 258 def __contains__(self, key):
259 259 if key in self.p.map:
260 260 return True
261 261 self.p.loadmap()
262 262 return key in self.p.map
263 263 def __iter__(self):
264 264 yield nullid
265 265 for i in xrange(self.p.l):
266 266 ret = self.p.index[i]
267 267 if not ret:
268 268 self.p.loadindex(i)
269 269 ret = self.p.index[i]
270 270 if isinstance(ret, str):
271 271 ret = struct.unpack(indexformatng, ret)
272 272 yield ret[7]
273 273 def __getitem__(self, key):
274 274 try:
275 275 return self.p.map[key]
276 276 except KeyError:
277 277 try:
278 278 self.load(key)
279 279 return self.p.map[key]
280 280 except KeyError:
281 281 raise KeyError("node " + hex(key))
282 282 def __setitem__(self, key, val):
283 283 self.p.map[key] = val
284 284 def __delitem__(self, key):
285 285 del self.p.map[key]
286 286
287 287 indexformatv0 = ">4l20s20s20s"
288 288 v0shaoffset = 56
289 289
290 290 class revlogoldio(object):
291 291 def __init__(self):
292 292 self.size = struct.calcsize(indexformatv0)
293 293
294 294 def parseindex(self, fp, inline):
295 295 s = self.size
296 296 index = []
297 297 nodemap = {nullid: nullrev}
298 298 n = off = 0
299 299 data = fp.read()
300 300 l = len(data)
301 301 while off + s <= l:
302 302 cur = data[off:off + s]
303 303 off += s
304 304 e = struct.unpack(indexformatv0, cur)
305 305 # transform to revlogv1 format
306 306 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
307 307 nodemap[e[4]], nodemap[e[5]], e[6])
308 308 index.append(e2)
309 309 nodemap[e[6]] = n
310 310 n += 1
311 311
312 312 return index, nodemap, None
313 313
314 314 def packentry(self, entry, node, version):
315 315 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
316 316 node(entry[5]), node(entry[6]), entry[7])
317 317 return struct.pack(indexformatv0, *e2)
318 318
319 319 # index ng:
320 320 # 6 bytes offset
321 321 # 2 bytes flags
322 322 # 4 bytes compressed length
323 323 # 4 bytes uncompressed length
324 324 # 4 bytes: base rev
325 325 # 4 bytes link rev
326 326 # 4 bytes parent 1 rev
327 327 # 4 bytes parent 2 rev
328 328 # 32 bytes: nodeid
329 329 indexformatng = ">Qiiiiii20s12x"
330 330 ngshaoffset = 32
331 331 versionformat = ">I"
332 332
333 333 class revlogio(object):
334 334 def __init__(self):
335 335 self.size = struct.calcsize(indexformatng)
336 336
337 337 def parseindex(self, fp, inline):
338 338 try:
339 339 size = util.fstat(fp).st_size
340 340 except AttributeError:
341 341 size = 0
342 342
343 343 if lazyparser.safe_to_use and not inline and size > 1000000:
344 344 # big index, let's parse it on demand
345 345 parser = lazyparser(fp, size)
346 346 index = lazyindex(parser)
347 347 nodemap = lazymap(parser)
348 348 e = list(index[0])
349 349 type = gettype(e[0])
350 350 e[0] = offset_type(0, type)
351 351 index[0] = e
352 352 return index, nodemap, None
353 353
354 354 s = self.size
355 355 cache = None
356 356 index = []
357 357 nodemap = {nullid: nullrev}
358 358 n = off = 0
359 359 # if we're not using lazymap, always read the whole index
360 360 data = fp.read()
361 361 l = len(data) - s
362 362 unpack = struct.unpack
363 363 append = index.append
364 364 if inline:
365 365 cache = (0, data)
366 366 while off <= l:
367 367 e = unpack(indexformatng, data[off:off + s])
368 368 nodemap[e[7]] = n
369 369 append(e)
370 370 n += 1
371 371 if e[1] < 0:
372 372 break
373 373 off += e[1] + s
374 374 else:
375 375 while off <= l:
376 376 e = unpack(indexformatng, data[off:off + s])
377 377 nodemap[e[7]] = n
378 378 append(e)
379 379 n += 1
380 380 off += s
381 381
382 382 e = list(index[0])
383 383 type = gettype(e[0])
384 384 e[0] = offset_type(0, type)
385 385 index[0] = e
386 386
387 387 return index, nodemap, cache
388 388
389 389 def packentry(self, entry, node, version):
390 390 p = struct.pack(indexformatng, *entry)
391 391 if not entry[3] and not getoffset(entry[0]) and entry[5] == nullrev:
392 392 p = struct.pack(versionformat, version) + p[4:]
393 393 return p
394 394
395 395 class revlog(object):
396 396 """
397 397 the underlying revision storage object
398 398
399 399 A revlog consists of two parts, an index and the revision data.
400 400
401 401 The index is a file with a fixed record size containing
402 402 information on each revision, includings its nodeid (hash), the
403 403 nodeids of its parents, the position and offset of its data within
404 404 the data file, and the revision it's based on. Finally, each entry
405 405 contains a linkrev entry that can serve as a pointer to external
406 406 data.
407 407
408 408 The revision data itself is a linear collection of data chunks.
409 409 Each chunk represents a revision and is usually represented as a
410 410 delta against the previous chunk. To bound lookup time, runs of
411 411 deltas are limited to about 2 times the length of the original
412 412 version data. This makes retrieval of a version proportional to
413 413 its size, or O(1) relative to the number of revisions.
414 414
415 415 Both pieces of the revlog are written to in an append-only
416 416 fashion, which means we never need to rewrite a file to insert or
417 417 remove data, and can use some simple techniques to avoid the need
418 418 for locking while reading.
419 419 """
420 420 def __init__(self, opener, indexfile):
421 421 """
422 422 create a revlog object
423 423
424 424 opener is a function that abstracts the file opening operation
425 425 and can be used to implement COW semantics or the like.
426 426 """
427 427 self.indexfile = indexfile
428 428 self.datafile = indexfile[:-2] + ".d"
429 429 self.opener = opener
430 430 self._cache = None
431 431 self._chunkcache = None
432 432 self.nodemap = {nullid: nullrev}
433 433 self.index = []
434 434
435 435 v = REVLOG_DEFAULT_VERSION
436 436 if hasattr(opener, "defversion"):
437 437 v = opener.defversion
438 438 if v & REVLOGNG:
439 439 v |= REVLOGNGINLINEDATA
440 440
441 441 i = ""
442 442 try:
443 443 f = self.opener(self.indexfile)
444 444 i = f.read(4)
445 445 f.seek(0)
446 446 if len(i) > 0:
447 447 v = struct.unpack(versionformat, i)[0]
448 448 except IOError, inst:
449 449 if inst.errno != errno.ENOENT:
450 450 raise
451 451
452 452 self.version = v
453 453 self._inline = v & REVLOGNGINLINEDATA
454 454 flags = v & ~0xFFFF
455 455 fmt = v & 0xFFFF
456 456 if fmt == REVLOGV0 and flags:
457 457 raise RevlogError(_("index %s unknown flags %#04x for format v0")
458 458 % (self.indexfile, flags >> 16))
459 459 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
460 460 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
461 461 % (self.indexfile, flags >> 16))
462 462 elif fmt > REVLOGNG:
463 463 raise RevlogError(_("index %s unknown format %d")
464 464 % (self.indexfile, fmt))
465 465
466 466 self._io = revlogio()
467 467 if self.version == REVLOGV0:
468 468 self._io = revlogoldio()
469 469 if i:
470 470 d = self._io.parseindex(f, self._inline)
471 471 self.index, self.nodemap, self._chunkcache = d
472 472
473 473 # add the magic null revision at -1
474 474 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
475 475
476 476 def _loadindex(self, start, end):
477 477 """load a block of indexes all at once from the lazy parser"""
478 478 if isinstance(self.index, lazyindex):
479 479 self.index.p.loadindex(start, end)
480 480
481 481 def _loadindexmap(self):
482 482 """loads both the map and the index from the lazy parser"""
483 483 if isinstance(self.index, lazyindex):
484 484 p = self.index.p
485 485 p.loadindex()
486 486 self.nodemap = p.map
487 487
488 488 def _loadmap(self):
489 489 """loads the map from the lazy parser"""
490 490 if isinstance(self.nodemap, lazymap):
491 491 self.nodemap.p.loadmap()
492 492 self.nodemap = self.nodemap.p.map
493 493
494 494 def tip(self):
495 495 return self.node(len(self.index) - 2)
496 496 def count(self):
497 497 return len(self.index) - 1
498 498
499 499 def rev(self, node):
500 500 try:
501 501 return self.nodemap[node]
502 502 except KeyError:
503 503 raise LookupError(_('%s: no node %s') % (self.indexfile, hex(node)))
504 504 def node(self, rev):
505 505 return self.index[rev][7]
506 506 def linkrev(self, node):
507 507 return self.index[self.rev(node)][4]
508 508 def parents(self, node):
509 509 d = self.index[self.rev(node)][5:7]
510 510 return (self.node(d[0]), self.node(d[1]))
511 511 def parentrevs(self, rev):
512 512 return self.index[rev][5:7]
513 513 def start(self, rev):
514 514 return getoffset(self.index[rev][0])
515 515 def end(self, rev):
516 516 return self.start(rev) + self.length(rev)
517 517 def length(self, rev):
518 518 return self.index[rev][1]
519 519 def base(self, rev):
520 520 return self.index[rev][3]
521 521
522 522 def size(self, rev):
523 523 """return the length of the uncompressed text for a given revision"""
524 524 l = self.index[rev][2]
525 525 if l >= 0:
526 526 return l
527 527
528 528 t = self.revision(self.node(rev))
529 529 return len(t)
530 530
531 531 # alternate implementation, The advantage to this code is it
532 532 # will be faster for a single revision. But, the results are not
533 533 # cached, so finding the size of every revision will be slower.
534 534 """
535 535 if self.cache and self.cache[1] == rev:
536 536 return len(self.cache[2])
537 537
538 538 base = self.base(rev)
539 539 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
540 540 base = self.cache[1]
541 541 text = self.cache[2]
542 542 else:
543 543 text = self.revision(self.node(base))
544 544
545 545 l = len(text)
546 546 for x in xrange(base + 1, rev + 1):
547 547 l = mdiff.patchedsize(l, self.chunk(x))
548 548 return l
549 549 """
550 550
551 551 def reachable(self, node, stop=None):
552 552 """return a hash of all nodes ancestral to a given node, including
553 553 the node itself, stopping when stop is matched"""
554 554 reachable = {}
555 555 visit = [node]
556 556 reachable[node] = 1
557 557 if stop:
558 558 stopn = self.rev(stop)
559 559 else:
560 560 stopn = 0
561 561 while visit:
562 562 n = visit.pop(0)
563 563 if n == stop:
564 564 continue
565 565 if n == nullid:
566 566 continue
567 567 for p in self.parents(n):
568 568 if self.rev(p) < stopn:
569 569 continue
570 570 if p not in reachable:
571 571 reachable[p] = 1
572 572 visit.append(p)
573 573 return reachable
574 574
575 575 def nodesbetween(self, roots=None, heads=None):
576 576 """Return a tuple containing three elements. Elements 1 and 2 contain
577 577 a final list bases and heads after all the unreachable ones have been
578 578 pruned. Element 0 contains a topologically sorted list of all
579 579
580 580 nodes that satisfy these constraints:
581 581 1. All nodes must be descended from a node in roots (the nodes on
582 582 roots are considered descended from themselves).
583 583 2. All nodes must also be ancestors of a node in heads (the nodes in
584 584 heads are considered to be their own ancestors).
585 585
586 586 If roots is unspecified, nullid is assumed as the only root.
587 587 If heads is unspecified, it is taken to be the output of the
588 588 heads method (i.e. a list of all nodes in the repository that
589 589 have no children)."""
590 590 nonodes = ([], [], [])
591 591 if roots is not None:
592 592 roots = list(roots)
593 593 if not roots:
594 594 return nonodes
595 595 lowestrev = min([self.rev(n) for n in roots])
596 596 else:
597 597 roots = [nullid] # Everybody's a descendent of nullid
598 598 lowestrev = nullrev
599 599 if (lowestrev == nullrev) and (heads is None):
600 600 # We want _all_ the nodes!
601 601 return ([self.node(r) for r in xrange(0, self.count())],
602 602 [nullid], list(self.heads()))
603 603 if heads is None:
604 604 # All nodes are ancestors, so the latest ancestor is the last
605 605 # node.
606 606 highestrev = self.count() - 1
607 607 # Set ancestors to None to signal that every node is an ancestor.
608 608 ancestors = None
609 609 # Set heads to an empty dictionary for later discovery of heads
610 610 heads = {}
611 611 else:
612 612 heads = list(heads)
613 613 if not heads:
614 614 return nonodes
615 615 ancestors = {}
616 616 # Turn heads into a dictionary so we can remove 'fake' heads.
617 617 # Also, later we will be using it to filter out the heads we can't
618 618 # find from roots.
619 619 heads = dict.fromkeys(heads, 0)
620 620 # Start at the top and keep marking parents until we're done.
621 621 nodestotag = heads.keys()
622 622 # Remember where the top was so we can use it as a limit later.
623 623 highestrev = max([self.rev(n) for n in nodestotag])
624 624 while nodestotag:
625 625 # grab a node to tag
626 626 n = nodestotag.pop()
627 627 # Never tag nullid
628 628 if n == nullid:
629 629 continue
630 630 # A node's revision number represents its place in a
631 631 # topologically sorted list of nodes.
632 632 r = self.rev(n)
633 633 if r >= lowestrev:
634 634 if n not in ancestors:
635 635 # If we are possibly a descendent of one of the roots
636 636 # and we haven't already been marked as an ancestor
637 637 ancestors[n] = 1 # Mark as ancestor
638 638 # Add non-nullid parents to list of nodes to tag.
639 639 nodestotag.extend([p for p in self.parents(n) if
640 640 p != nullid])
641 641 elif n in heads: # We've seen it before, is it a fake head?
642 642 # So it is, real heads should not be the ancestors of
643 643 # any other heads.
644 644 heads.pop(n)
645 645 if not ancestors:
646 646 return nonodes
647 647 # Now that we have our set of ancestors, we want to remove any
648 648 # roots that are not ancestors.
649 649
650 650 # If one of the roots was nullid, everything is included anyway.
651 651 if lowestrev > nullrev:
652 652 # But, since we weren't, let's recompute the lowest rev to not
653 653 # include roots that aren't ancestors.
654 654
655 655 # Filter out roots that aren't ancestors of heads
656 656 roots = [n for n in roots if n in ancestors]
657 657 # Recompute the lowest revision
658 658 if roots:
659 659 lowestrev = min([self.rev(n) for n in roots])
660 660 else:
661 661 # No more roots? Return empty list
662 662 return nonodes
663 663 else:
664 664 # We are descending from nullid, and don't need to care about
665 665 # any other roots.
666 666 lowestrev = nullrev
667 667 roots = [nullid]
668 668 # Transform our roots list into a 'set' (i.e. a dictionary where the
669 669 # values don't matter.
670 670 descendents = dict.fromkeys(roots, 1)
671 671 # Also, keep the original roots so we can filter out roots that aren't
672 672 # 'real' roots (i.e. are descended from other roots).
673 673 roots = descendents.copy()
674 674 # Our topologically sorted list of output nodes.
675 675 orderedout = []
676 676 # Don't start at nullid since we don't want nullid in our output list,
677 677 # and if nullid shows up in descedents, empty parents will look like
678 678 # they're descendents.
679 679 for r in xrange(max(lowestrev, 0), highestrev + 1):
680 680 n = self.node(r)
681 681 isdescendent = False
682 682 if lowestrev == nullrev: # Everybody is a descendent of nullid
683 683 isdescendent = True
684 684 elif n in descendents:
685 685 # n is already a descendent
686 686 isdescendent = True
687 687 # This check only needs to be done here because all the roots
688 688 # will start being marked is descendents before the loop.
689 689 if n in roots:
690 690 # If n was a root, check if it's a 'real' root.
691 691 p = tuple(self.parents(n))
692 692 # If any of its parents are descendents, it's not a root.
693 693 if (p[0] in descendents) or (p[1] in descendents):
694 694 roots.pop(n)
695 695 else:
696 696 p = tuple(self.parents(n))
697 697 # A node is a descendent if either of its parents are
698 698 # descendents. (We seeded the dependents list with the roots
699 699 # up there, remember?)
700 700 if (p[0] in descendents) or (p[1] in descendents):
701 701 descendents[n] = 1
702 702 isdescendent = True
703 703 if isdescendent and ((ancestors is None) or (n in ancestors)):
704 704 # Only include nodes that are both descendents and ancestors.
705 705 orderedout.append(n)
706 706 if (ancestors is not None) and (n in heads):
707 707 # We're trying to figure out which heads are reachable
708 708 # from roots.
709 709 # Mark this head as having been reached
710 710 heads[n] = 1
711 711 elif ancestors is None:
712 712 # Otherwise, we're trying to discover the heads.
713 713 # Assume this is a head because if it isn't, the next step
714 714 # will eventually remove it.
715 715 heads[n] = 1
716 716 # But, obviously its parents aren't.
717 717 for p in self.parents(n):
718 718 heads.pop(p, None)
719 719 heads = [n for n in heads.iterkeys() if heads[n] != 0]
720 720 roots = roots.keys()
721 721 assert orderedout
722 722 assert roots
723 723 assert heads
724 724 return (orderedout, roots, heads)
725 725
726 726 def heads(self, start=None, stop=None):
727 727 """return the list of all nodes that have no children
728 728
729 729 if start is specified, only heads that are descendants of
730 730 start will be returned
731 731 if stop is specified, it will consider all the revs from stop
732 732 as if they had no children
733 733 """
734 734 if start is None and stop is None:
735 735 count = self.count()
736 736 if not count:
737 737 return [nullid]
738 738 ishead = [1] * (count + 1)
739 739 index = self.index
740 740 for r in xrange(count):
741 741 e = index[r]
742 742 ishead[e[5]] = ishead[e[6]] = 0
743 743 return [self.node(r) for r in xrange(count) if ishead[r]]
744 744
745 745 if start is None:
746 746 start = nullid
747 747 if stop is None:
748 748 stop = []
749 749 stoprevs = dict.fromkeys([self.rev(n) for n in stop])
750 750 startrev = self.rev(start)
751 751 reachable = {startrev: 1}
752 752 heads = {startrev: 1}
753 753
754 754 parentrevs = self.parentrevs
755 755 for r in xrange(startrev + 1, self.count()):
756 756 for p in parentrevs(r):
757 757 if p in reachable:
758 758 if r not in stoprevs:
759 759 reachable[r] = 1
760 760 heads[r] = 1
761 761 if p in heads and p not in stoprevs:
762 762 del heads[p]
763 763
764 764 return [self.node(r) for r in heads]
765 765
766 766 def children(self, node):
767 767 """find the children of a given node"""
768 768 c = []
769 769 p = self.rev(node)
770 770 for r in range(p + 1, self.count()):
771 771 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
772 772 if prevs:
773 773 for pr in prevs:
774 774 if pr == p:
775 775 c.append(self.node(r))
776 776 elif p == nullrev:
777 777 c.append(self.node(r))
778 778 return c
779 779
780 780 def _match(self, id):
781 781 if isinstance(id, (long, int)):
782 782 # rev
783 783 return self.node(id)
784 784 if len(id) == 20:
785 785 # possibly a binary node
786 786 # odds of a binary node being all hex in ASCII are 1 in 10**25
787 787 try:
788 788 node = id
789 789 r = self.rev(node) # quick search the index
790 790 return node
791 791 except LookupError:
792 792 pass # may be partial hex id
793 793 try:
794 794 # str(rev)
795 795 rev = int(id)
796 796 if str(rev) != id:
797 797 raise ValueError
798 798 if rev < 0:
799 799 rev = self.count() + rev
800 800 if rev < 0 or rev >= self.count():
801 801 raise ValueError
802 802 return self.node(rev)
803 803 except (ValueError, OverflowError):
804 804 pass
805 805 if len(id) == 40:
806 806 try:
807 807 # a full hex nodeid?
808 808 node = bin(id)
809 809 r = self.rev(node)
810 810 return node
811 811 except TypeError:
812 812 pass
813 813
814 814 def _partialmatch(self, id):
815 815 if len(id) < 40:
816 816 try:
817 817 # hex(node)[:...]
818 818 bin_id = bin(id[:len(id) & ~1]) # grab an even number of digits
819 819 node = None
820 820 for n in self.nodemap:
821 821 if n.startswith(bin_id) and hex(n).startswith(id):
822 822 if node is not None:
823 823 raise LookupError(_("Ambiguous identifier"))
824 824 node = n
825 825 if node is not None:
826 826 return node
827 827 except TypeError:
828 828 pass
829 829
830 830 def lookup(self, id):
831 831 """locate a node based on:
832 832 - revision number or str(revision number)
833 833 - nodeid or subset of hex nodeid
834 834 """
835 835 n = self._match(id)
836 836 if n is not None:
837 837 return n
838 838 n = self._partialmatch(id)
839 839 if n:
840 840 return n
841 841
842 842 raise LookupError(_("No match found"))
843 843
844 844 def cmp(self, node, text):
845 845 """compare text with a given file revision"""
846 846 p1, p2 = self.parents(node)
847 847 return hash(text, p1, p2) != node
848 848
849 849 def chunk(self, rev, df=None):
850 850 start, length = self.start(rev), self.length(rev)
851 851 if self._inline:
852 852 start += (rev + 1) * self._io.size
853 853 end = start + length
854 854 def loadcache(df):
855 855 cache_length = max(65536, length)
856 856 if not df:
857 857 if self._inline:
858 858 df = self.opener(self.indexfile)
859 859 else:
860 860 df = self.opener(self.datafile)
861 861 df.seek(start)
862 862 self._chunkcache = (start, df.read(cache_length))
863 863
864 864 if not self._chunkcache:
865 865 loadcache(df)
866 866
867 867 cache_start = self._chunkcache[0]
868 868 cache_end = cache_start + len(self._chunkcache[1])
869 869 if start >= cache_start and end <= cache_end:
870 870 # it is cached
871 871 offset = start - cache_start
872 872 else:
873 873 loadcache(df)
874 874 offset = 0
875 875
876 876 # avoid copying large chunks
877 877 c = self._chunkcache[1]
878 878 if len(c) > length:
879 879 c = c[offset:offset + length]
880 880
881 881 return decompress(c)
882 882
883 883 def delta(self, node):
884 884 """return or calculate a delta between a node and its predecessor"""
885 885 r = self.rev(node)
886 886 return self.revdiff(r - 1, r)
887 887
888 888 def revdiff(self, rev1, rev2):
889 889 """return or calculate a delta between two revisions"""
890 890 b1 = self.base(rev1)
891 891 b2 = self.base(rev2)
892 892 if b1 == b2 and rev1 + 1 == rev2:
893 893 return self.chunk(rev2)
894 894 else:
895 895 return mdiff.textdiff(self.revision(self.node(rev1)),
896 896 self.revision(self.node(rev2)))
897 897
898 898 def revision(self, node):
899 899 """return an uncompressed revision of a given"""
900 900 if node == nullid:
901 901 return ""
902 902 if self._cache and self._cache[0] == node:
903 903 return self._cache[2]
904 904
905 905 # look up what we need to read
906 906 text = None
907 907 rev = self.rev(node)
908 908 base = self.base(rev)
909 909
910 910 if self._inline:
911 911 # we probably have the whole chunk cached
912 912 df = None
913 913 else:
914 914 df = self.opener(self.datafile)
915 915
916 916 # do we have useful data cached?
917 917 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
918 918 base = self._cache[1]
919 919 text = self._cache[2]
920 920 self._loadindex(base, rev + 1)
921 921 else:
922 922 self._loadindex(base, rev + 1)
923 923 text = self.chunk(base, df=df)
924 924
925 925 bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]
926 926 text = mdiff.patches(text, bins)
927 927 p1, p2 = self.parents(node)
928 928 if node != hash(text, p1, p2):
929 929 raise RevlogError(_("integrity check failed on %s:%d")
930 930 % (self.datafile, rev))
931 931
932 932 self._cache = (node, rev, text)
933 933 return text
934 934
935 935 def checkinlinesize(self, tr, fp=None):
936 936 if not self._inline:
937 937 return
938 938 if not fp:
939 939 fp = self.opener(self.indexfile, 'r')
940 940 fp.seek(0, 2)
941 941 size = fp.tell()
942 942 if size < 131072:
943 943 return
944 944 trinfo = tr.find(self.indexfile)
945 945 if trinfo == None:
946 946 raise RevlogError(_("%s not found in the transaction")
947 947 % self.indexfile)
948 948
949 949 trindex = trinfo[2]
950 950 dataoff = self.start(trindex)
951 951
952 952 tr.add(self.datafile, dataoff)
953 953 df = self.opener(self.datafile, 'w')
954 954 calc = self._io.size
955 955 for r in xrange(self.count()):
956 956 start = self.start(r) + (r + 1) * calc
957 957 length = self.length(r)
958 958 fp.seek(start)
959 959 d = fp.read(length)
960 960 df.write(d)
961 961 fp.close()
962 962 df.close()
963 963 fp = self.opener(self.indexfile, 'w', atomictemp=True)
964 964 self.version &= ~(REVLOGNGINLINEDATA)
965 965 self._inline = False
966 966 for i in xrange(self.count()):
967 967 e = self._io.packentry(self.index[i], self.node, self.version)
968 968 fp.write(e)
969 969
970 970 # if we don't call rename, the temp file will never replace the
971 971 # real index
972 972 fp.rename()
973 973
974 974 tr.replace(self.indexfile, trindex * calc)
975 975 self._chunkcache = None
976 976
977 977 def addrevision(self, text, transaction, link, p1, p2, d=None):
978 978 """add a revision to the log
979 979
980 980 text - the revision data to add
981 981 transaction - the transaction object used for rollback
982 982 link - the linkrev data to add
983 983 p1, p2 - the parent nodeids of the revision
984 984 d - an optional precomputed delta
985 985 """
986 986 dfh = None
987 987 if not self._inline:
988 988 dfh = self.opener(self.datafile, "a")
989 989 ifh = self.opener(self.indexfile, "a+")
990 990 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
991 991
992 992 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
993 993 node = hash(text, p1, p2)
994 994 if node in self.nodemap:
995 995 return node
996 996
997 997 curr = self.count()
998 998 prev = curr - 1
999 999 base = self.base(prev)
1000 1000 offset = self.end(prev)
1001 1001
1002 1002 if curr:
1003 1003 if not d:
1004 1004 ptext = self.revision(self.node(prev))
1005 1005 d = mdiff.textdiff(ptext, text)
1006 1006 data = compress(d)
1007 1007 l = len(data[1]) + len(data[0])
1008 1008 dist = l + offset - self.start(base)
1009 1009
1010 1010 # full versions are inserted when the needed deltas
1011 1011 # become comparable to the uncompressed text
1012 1012 if not curr or dist > len(text) * 2:
1013 1013 data = compress(text)
1014 1014 l = len(data[1]) + len(data[0])
1015 1015 base = curr
1016 1016
1017 1017 e = (offset_type(offset, 0), l, len(text),
1018 1018 base, link, self.rev(p1), self.rev(p2), node)
1019 1019 self.index.insert(-1, e)
1020 1020 self.nodemap[node] = curr
1021 1021
1022 1022 entry = self._io.packentry(e, self.node, self.version)
1023 1023 if not self._inline:
1024 1024 transaction.add(self.datafile, offset)
1025 1025 transaction.add(self.indexfile, curr * len(entry))
1026 1026 if data[0]:
1027 1027 dfh.write(data[0])
1028 1028 dfh.write(data[1])
1029 1029 dfh.flush()
1030 1030 ifh.write(entry)
1031 1031 else:
1032 1032 ifh.seek(0, 2)
1033 1033 transaction.add(self.indexfile, ifh.tell(), prev)
1034 1034 ifh.write(entry)
1035 1035 ifh.write(data[0])
1036 1036 ifh.write(data[1])
1037 1037 self.checkinlinesize(transaction, ifh)
1038 1038
1039 1039 self._cache = (node, curr, text)
1040 1040 return node
1041 1041
1042 1042 def ancestor(self, a, b):
1043 1043 """calculate the least common ancestor of nodes a and b"""
1044 1044
1045 1045 def parents(rev):
1046 1046 return [p for p in self.parentrevs(rev) if p != nullrev]
1047 1047
1048 1048 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1049 1049 if c is None:
1050 1050 return nullid
1051 1051
1052 1052 return self.node(c)
1053 1053
1054 1054 def group(self, nodelist, lookup, infocollect=None):
1055 1055 """calculate a delta group
1056 1056
1057 1057 Given a list of changeset revs, return a set of deltas and
1058 1058 metadata corresponding to nodes. the first delta is
1059 1059 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1060 1060 have this parent as it has all history before these
1061 1061 changesets. parent is parent[0]
1062 1062 """
1063 1063 revs = [self.rev(n) for n in nodelist]
1064 1064
1065 1065 # if we don't have any revisions touched by these changesets, bail
1066 1066 if not revs:
1067 1067 yield changegroup.closechunk()
1068 1068 return
1069 1069
1070 1070 # add the parent of the first rev
1071 1071 p = self.parents(self.node(revs[0]))[0]
1072 1072 revs.insert(0, self.rev(p))
1073 1073
1074 1074 # build deltas
1075 1075 for d in xrange(0, len(revs) - 1):
1076 1076 a, b = revs[d], revs[d + 1]
1077 1077 nb = self.node(b)
1078 1078
1079 1079 if infocollect is not None:
1080 1080 infocollect(nb)
1081 1081
1082 1082 d = self.revdiff(a, b)
1083 1083 p = self.parents(nb)
1084 1084 meta = nb + p[0] + p[1] + lookup(nb)
1085 1085 yield changegroup.genchunk("%s%s" % (meta, d))
1086 1086
1087 1087 yield changegroup.closechunk()
1088 1088
1089 1089 def addgroup(self, revs, linkmapper, transaction, unique=0):
1090 1090 """
1091 1091 add a delta group
1092 1092
1093 1093 given a set of deltas, add them to the revision log. the
1094 1094 first delta is against its parent, which should be in our
1095 1095 log, the rest are against the previous delta.
1096 1096 """
1097 1097
1098 1098 #track the base of the current delta log
1099 1099 r = self.count()
1100 1100 t = r - 1
1101 1101 node = None
1102 1102
1103 1103 base = prev = nullrev
1104 1104 start = end = textlen = 0
1105 1105 if r:
1106 1106 end = self.end(t)
1107 1107
1108 1108 ifh = self.opener(self.indexfile, "a+")
1109 1109 ifh.seek(0, 2)
1110 1110 transaction.add(self.indexfile, ifh.tell(), self.count())
1111 1111 if self._inline:
1112 1112 dfh = None
1113 1113 else:
1114 1114 transaction.add(self.datafile, end)
1115 1115 dfh = self.opener(self.datafile, "a")
1116 1116
1117 1117 # loop through our set of deltas
1118 1118 chain = None
1119 1119 for chunk in revs:
1120 1120 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1121 1121 link = linkmapper(cs)
1122 1122 if node in self.nodemap:
1123 1123 # this can happen if two branches make the same change
1124 1124 # if unique:
1125 1125 # raise RevlogError(_("already have %s") % hex(node[:4]))
1126 1126 chain = node
1127 1127 continue
1128 1128 delta = chunk[80:]
1129 1129
1130 1130 for p in (p1, p2):
1131 1131 if not p in self.nodemap:
1132 1132 raise LookupError(_("unknown parent %s") % short(p))
1133 1133
1134 1134 if not chain:
1135 1135 # retrieve the parent revision of the delta chain
1136 1136 chain = p1
1137 1137 if not chain in self.nodemap:
1138 1138 raise LookupError(_("unknown base %s") % short(chain[:4]))
1139 1139
1140 1140 # full versions are inserted when the needed deltas become
1141 1141 # comparable to the uncompressed text or when the previous
1142 1142 # version is not the one we have a delta against. We use
1143 1143 # the size of the previous full rev as a proxy for the
1144 1144 # current size.
1145 1145
1146 1146 if chain == prev:
1147 1147 tempd = compress(delta)
1148 1148 cdelta = tempd[0] + tempd[1]
1149 1149 textlen = mdiff.patchedsize(textlen, delta)
1150 1150
1151 1151 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
1152 1152 # flush our writes here so we can read it in revision
1153 1153 if dfh:
1154 1154 dfh.flush()
1155 1155 ifh.flush()
1156 1156 text = self.revision(chain)
1157 1157 text = mdiff.patches(text, [delta])
1158 1158 chk = self._addrevision(text, transaction, link, p1, p2, None,
1159 1159 ifh, dfh)
1160 1160 if not dfh and not self._inline:
1161 1161 # addrevision switched from inline to conventional
1162 1162 # reopen the index
1163 1163 dfh = self.opener(self.datafile, "a")
1164 1164 ifh = self.opener(self.indexfile, "a")
1165 1165 if chk != node:
1166 1166 raise RevlogError(_("consistency error adding group"))
1167 1167 textlen = len(text)
1168 1168 else:
1169 1169 e = (offset_type(end, 0), len(cdelta), textlen, base,
1170 1170 link, self.rev(p1), self.rev(p2), node)
1171 1171 self.index.insert(-1, e)
1172 1172 self.nodemap[node] = r
1173 1173 entry = self._io.packentry(e, self.node, self.version)
1174 1174 if self._inline:
1175 1175 ifh.write(entry)
1176 1176 ifh.write(cdelta)
1177 1177 self.checkinlinesize(transaction, ifh)
1178 1178 if not self._inline:
1179 1179 dfh = self.opener(self.datafile, "a")
1180 1180 ifh = self.opener(self.indexfile, "a")
1181 1181 else:
1182 1182 dfh.write(cdelta)
1183 1183 ifh.write(entry)
1184 1184
1185 1185 t, r, chain, prev = r, r + 1, node, node
1186 1186 base = self.base(t)
1187 1187 start = self.start(base)
1188 1188 end = self.end(t)
1189 1189
1190 1190 return node
1191 1191
1192 1192 def strip(self, rev, minlink):
1193 1193 if self.count() == 0 or rev >= self.count():
1194 1194 return
1195 1195
1196 1196 if isinstance(self.index, lazyindex):
1197 1197 self._loadindexmap()
1198 1198
1199 1199 # When stripping away a revision, we need to make sure it
1200 1200 # does not actually belong to an older changeset.
1201 1201 # The minlink parameter defines the oldest revision
1202 1202 # we're allowed to strip away.
1203 1203 while minlink > self.index[rev][4]:
1204 1204 rev += 1
1205 1205 if rev >= self.count():
1206 1206 return
1207 1207
1208 1208 # first truncate the files on disk
1209 1209 end = self.start(rev)
1210 1210 if not self._inline:
1211 1211 df = self.opener(self.datafile, "a")
1212 1212 df.truncate(end)
1213 1213 end = rev * self._io.size
1214 1214 else:
1215 1215 end += rev * self._io.size
1216 1216
1217 1217 indexf = self.opener(self.indexfile, "a")
1218 1218 indexf.truncate(end)
1219 1219
1220 1220 # then reset internal state in memory to forget those revisions
1221 1221 self._cache = None
1222 1222 self._chunkcache = None
1223 1223 for x in xrange(rev, self.count()):
1224 1224 del self.nodemap[self.node(x)]
1225 1225
1226 1226 del self.index[rev:-1]
1227 1227
1228 1228 def checksize(self):
1229 1229 expected = 0
1230 1230 if self.count():
1231 1231 expected = self.end(self.count() - 1)
1232 1232
1233 1233 try:
1234 1234 f = self.opener(self.datafile)
1235 1235 f.seek(0, 2)
1236 1236 actual = f.tell()
1237 1237 dd = actual - expected
1238 1238 except IOError, inst:
1239 1239 if inst.errno != errno.ENOENT:
1240 1240 raise
1241 1241 dd = 0
1242 1242
1243 1243 try:
1244 1244 f = self.opener(self.indexfile)
1245 1245 f.seek(0, 2)
1246 1246 actual = f.tell()
1247 1247 s = self._io.size
1248 1248 i = actual / s
1249 1249 di = actual - (i * s)
1250 1250 if self._inline:
1251 1251 databytes = 0
1252 1252 for r in xrange(self.count()):
1253 1253 databytes += self.length(r)
1254 1254 dd = 0
1255 1255 di = actual - self.count() * s - databytes
1256 1256 except IOError, inst:
1257 1257 if inst.errno != errno.ENOENT:
1258 1258 raise
1259 1259 di = 0
1260 1260
1261 1261 return (dd, di)
General Comments 0
You need to be logged in to leave comments. Login now