##// END OF EJS Templates
revlog: define censored flag for revlogng index...
Mike Edgar -
r23855:4f23081c default
parent child Browse files
Show More
@@ -1,1535 +1,1537 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 # import stuff from node for others to import from revlog
15 15 from node import bin, hex, nullid, nullrev
16 16 from i18n import _
17 17 import ancestor, mdiff, parsers, error, util, templatefilters
18 18 import struct, zlib, errno
19 19
20 20 _pack = struct.pack
21 21 _unpack = struct.unpack
22 22 _compress = zlib.compress
23 23 _decompress = zlib.decompress
24 24 _sha = util.sha1
25 25
26 26 # revlog header flags
27 27 REVLOGV0 = 0
28 28 REVLOGNG = 1
29 29 REVLOGNGINLINEDATA = (1 << 16)
30 30 REVLOGGENERALDELTA = (1 << 17)
31 31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 32 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
35 35
36 36 # revlog index flags
37 REVIDX_KNOWN_FLAGS = 0
37 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
38 REVIDX_DEFAULT_FLAGS = 0
39 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
38 40
39 41 # max size of revlog with inline data
40 42 _maxinline = 131072
41 43 _chunksize = 1048576
42 44
43 45 RevlogError = error.RevlogError
44 46 LookupError = error.LookupError
45 47 CensoredNodeError = error.CensoredNodeError
46 48
47 49 def getoffset(q):
48 50 return int(q >> 16)
49 51
50 52 def gettype(q):
51 53 return int(q & 0xFFFF)
52 54
53 55 def offset_type(offset, type):
54 56 return long(long(offset) << 16 | type)
55 57
56 58 _nullhash = _sha(nullid)
57 59
58 60 def hash(text, p1, p2):
59 61 """generate a hash from the given text and its parent hashes
60 62
61 63 This hash combines both the current file contents and its history
62 64 in a manner that makes it easy to distinguish nodes with the same
63 65 content in the revision graph.
64 66 """
65 67 # As of now, if one of the parent node is null, p2 is null
66 68 if p2 == nullid:
67 69 # deep copy of a hash is faster than creating one
68 70 s = _nullhash.copy()
69 71 s.update(p1)
70 72 else:
71 73 # none of the parent nodes are nullid
72 74 l = [p1, p2]
73 75 l.sort()
74 76 s = _sha(l[0])
75 77 s.update(l[1])
76 78 s.update(text)
77 79 return s.digest()
78 80
79 81 def decompress(bin):
80 82 """ decompress the given input """
81 83 if not bin:
82 84 return bin
83 85 t = bin[0]
84 86 if t == '\0':
85 87 return bin
86 88 if t == 'x':
87 89 try:
88 90 return _decompress(bin)
89 91 except zlib.error, e:
90 92 raise RevlogError(_("revlog decompress error: %s") % str(e))
91 93 if t == 'u':
92 94 return bin[1:]
93 95 raise RevlogError(_("unknown compression type %r") % t)
94 96
95 97 # index v0:
96 98 # 4 bytes: offset
97 99 # 4 bytes: compressed length
98 100 # 4 bytes: base rev
99 101 # 4 bytes: link rev
100 102 # 32 bytes: parent 1 nodeid
101 103 # 32 bytes: parent 2 nodeid
102 104 # 32 bytes: nodeid
103 105 indexformatv0 = ">4l20s20s20s"
104 106 v0shaoffset = 56
105 107
106 108 class revlogoldio(object):
107 109 def __init__(self):
108 110 self.size = struct.calcsize(indexformatv0)
109 111
110 112 def parseindex(self, data, inline):
111 113 s = self.size
112 114 index = []
113 115 nodemap = {nullid: nullrev}
114 116 n = off = 0
115 117 l = len(data)
116 118 while off + s <= l:
117 119 cur = data[off:off + s]
118 120 off += s
119 121 e = _unpack(indexformatv0, cur)
120 122 # transform to revlogv1 format
121 123 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
122 124 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
123 125 index.append(e2)
124 126 nodemap[e[6]] = n
125 127 n += 1
126 128
127 129 # add the magic null revision at -1
128 130 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
129 131
130 132 return index, nodemap, None
131 133
132 134 def packentry(self, entry, node, version, rev):
133 135 if gettype(entry[0]):
134 136 raise RevlogError(_("index entry flags need RevlogNG"))
135 137 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
136 138 node(entry[5]), node(entry[6]), entry[7])
137 139 return _pack(indexformatv0, *e2)
138 140
139 141 # index ng:
140 142 # 6 bytes: offset
141 143 # 2 bytes: flags
142 144 # 4 bytes: compressed length
143 145 # 4 bytes: uncompressed length
144 146 # 4 bytes: base rev
145 147 # 4 bytes: link rev
146 148 # 4 bytes: parent 1 rev
147 149 # 4 bytes: parent 2 rev
148 150 # 32 bytes: nodeid
149 151 indexformatng = ">Qiiiiii20s12x"
150 152 ngshaoffset = 32
151 153 versionformat = ">I"
152 154
153 155 class revlogio(object):
154 156 def __init__(self):
155 157 self.size = struct.calcsize(indexformatng)
156 158
157 159 def parseindex(self, data, inline):
158 160 # call the C implementation to parse the index data
159 161 index, cache = parsers.parse_index2(data, inline)
160 162 return index, getattr(index, 'nodemap', None), cache
161 163
162 164 def packentry(self, entry, node, version, rev):
163 165 p = _pack(indexformatng, *entry)
164 166 if rev == 0:
165 167 p = _pack(versionformat, version) + p[4:]
166 168 return p
167 169
168 170 class revlog(object):
169 171 """
170 172 the underlying revision storage object
171 173
172 174 A revlog consists of two parts, an index and the revision data.
173 175
174 176 The index is a file with a fixed record size containing
175 177 information on each revision, including its nodeid (hash), the
176 178 nodeids of its parents, the position and offset of its data within
177 179 the data file, and the revision it's based on. Finally, each entry
178 180 contains a linkrev entry that can serve as a pointer to external
179 181 data.
180 182
181 183 The revision data itself is a linear collection of data chunks.
182 184 Each chunk represents a revision and is usually represented as a
183 185 delta against the previous chunk. To bound lookup time, runs of
184 186 deltas are limited to about 2 times the length of the original
185 187 version data. This makes retrieval of a version proportional to
186 188 its size, or O(1) relative to the number of revisions.
187 189
188 190 Both pieces of the revlog are written to in an append-only
189 191 fashion, which means we never need to rewrite a file to insert or
190 192 remove data, and can use some simple techniques to avoid the need
191 193 for locking while reading.
192 194 """
193 195 def __init__(self, opener, indexfile):
194 196 """
195 197 create a revlog object
196 198
197 199 opener is a function that abstracts the file opening operation
198 200 and can be used to implement COW semantics or the like.
199 201 """
200 202 self.indexfile = indexfile
201 203 self.datafile = indexfile[:-2] + ".d"
202 204 self.opener = opener
203 205 self._cache = None
204 206 self._basecache = None
205 207 self._chunkcache = (0, '')
206 208 self._chunkcachesize = 65536
207 209 self._maxchainlen = None
208 210 self.index = []
209 211 self._pcache = {}
210 212 self._nodecache = {nullid: nullrev}
211 213 self._nodepos = None
212 214
213 215 v = REVLOG_DEFAULT_VERSION
214 216 opts = getattr(opener, 'options', None)
215 217 if opts is not None:
216 218 if 'revlogv1' in opts:
217 219 if 'generaldelta' in opts:
218 220 v |= REVLOGGENERALDELTA
219 221 else:
220 222 v = 0
221 223 if 'chunkcachesize' in opts:
222 224 self._chunkcachesize = opts['chunkcachesize']
223 225 if 'maxchainlen' in opts:
224 226 self._maxchainlen = opts['maxchainlen']
225 227
226 228 if self._chunkcachesize <= 0:
227 229 raise RevlogError(_('revlog chunk cache size %r is not greater '
228 230 'than 0') % self._chunkcachesize)
229 231 elif self._chunkcachesize & (self._chunkcachesize - 1):
230 232 raise RevlogError(_('revlog chunk cache size %r is not a power '
231 233 'of 2') % self._chunkcachesize)
232 234
233 235 i = ''
234 236 self._initempty = True
235 237 try:
236 238 f = self.opener(self.indexfile)
237 239 i = f.read()
238 240 f.close()
239 241 if len(i) > 0:
240 242 v = struct.unpack(versionformat, i[:4])[0]
241 243 self._initempty = False
242 244 except IOError, inst:
243 245 if inst.errno != errno.ENOENT:
244 246 raise
245 247
246 248 self.version = v
247 249 self._inline = v & REVLOGNGINLINEDATA
248 250 self._generaldelta = v & REVLOGGENERALDELTA
249 251 flags = v & ~0xFFFF
250 252 fmt = v & 0xFFFF
251 253 if fmt == REVLOGV0 and flags:
252 254 raise RevlogError(_("index %s unknown flags %#04x for format v0")
253 255 % (self.indexfile, flags >> 16))
254 256 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
255 257 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
256 258 % (self.indexfile, flags >> 16))
257 259 elif fmt > REVLOGNG:
258 260 raise RevlogError(_("index %s unknown format %d")
259 261 % (self.indexfile, fmt))
260 262
261 263 self._io = revlogio()
262 264 if self.version == REVLOGV0:
263 265 self._io = revlogoldio()
264 266 try:
265 267 d = self._io.parseindex(i, self._inline)
266 268 except (ValueError, IndexError):
267 269 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
268 270 self.index, nodemap, self._chunkcache = d
269 271 if nodemap is not None:
270 272 self.nodemap = self._nodecache = nodemap
271 273 if not self._chunkcache:
272 274 self._chunkclear()
273 275 # revnum -> (chain-length, sum-delta-length)
274 276 self._chaininfocache = {}
275 277
276 278 def tip(self):
277 279 return self.node(len(self.index) - 2)
278 280 def __len__(self):
279 281 return len(self.index) - 1
280 282 def __iter__(self):
281 283 return iter(xrange(len(self)))
282 284 def revs(self, start=0, stop=None):
283 285 """iterate over all rev in this revlog (from start to stop)"""
284 286 step = 1
285 287 if stop is not None:
286 288 if start > stop:
287 289 step = -1
288 290 stop += step
289 291 else:
290 292 stop = len(self)
291 293 return xrange(start, stop, step)
292 294
293 295 @util.propertycache
294 296 def nodemap(self):
295 297 self.rev(self.node(0))
296 298 return self._nodecache
297 299
298 300 def hasnode(self, node):
299 301 try:
300 302 self.rev(node)
301 303 return True
302 304 except KeyError:
303 305 return False
304 306
305 307 def clearcaches(self):
306 308 try:
307 309 self._nodecache.clearcaches()
308 310 except AttributeError:
309 311 self._nodecache = {nullid: nullrev}
310 312 self._nodepos = None
311 313
312 314 def rev(self, node):
313 315 try:
314 316 return self._nodecache[node]
315 317 except TypeError:
316 318 raise
317 319 except RevlogError:
318 320 # parsers.c radix tree lookup failed
319 321 raise LookupError(node, self.indexfile, _('no node'))
320 322 except KeyError:
321 323 # pure python cache lookup failed
322 324 n = self._nodecache
323 325 i = self.index
324 326 p = self._nodepos
325 327 if p is None:
326 328 p = len(i) - 2
327 329 for r in xrange(p, -1, -1):
328 330 v = i[r][7]
329 331 n[v] = r
330 332 if v == node:
331 333 self._nodepos = r - 1
332 334 return r
333 335 raise LookupError(node, self.indexfile, _('no node'))
334 336
335 337 def node(self, rev):
336 338 return self.index[rev][7]
337 339 def linkrev(self, rev):
338 340 return self.index[rev][4]
339 341 def parents(self, node):
340 342 i = self.index
341 343 d = i[self.rev(node)]
342 344 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
343 345 def parentrevs(self, rev):
344 346 return self.index[rev][5:7]
345 347 def start(self, rev):
346 348 return int(self.index[rev][0] >> 16)
347 349 def end(self, rev):
348 350 return self.start(rev) + self.length(rev)
349 351 def length(self, rev):
350 352 return self.index[rev][1]
351 353 def chainbase(self, rev):
352 354 index = self.index
353 355 base = index[rev][3]
354 356 while base != rev:
355 357 rev = base
356 358 base = index[rev][3]
357 359 return base
358 360 def chainlen(self, rev):
359 361 return self._chaininfo(rev)[0]
360 362
361 363 def _chaininfo(self, rev):
362 364 chaininfocache = self._chaininfocache
363 365 if rev in chaininfocache:
364 366 return chaininfocache[rev]
365 367 index = self.index
366 368 generaldelta = self._generaldelta
367 369 iterrev = rev
368 370 e = index[iterrev]
369 371 clen = 0
370 372 compresseddeltalen = 0
371 373 while iterrev != e[3]:
372 374 clen += 1
373 375 compresseddeltalen += e[1]
374 376 if generaldelta:
375 377 iterrev = e[3]
376 378 else:
377 379 iterrev -= 1
378 380 if iterrev in chaininfocache:
379 381 t = chaininfocache[iterrev]
380 382 clen += t[0]
381 383 compresseddeltalen += t[1]
382 384 break
383 385 e = index[iterrev]
384 386 else:
385 387 # Add text length of base since decompressing that also takes
386 388 # work. For cache hits the length is already included.
387 389 compresseddeltalen += e[1]
388 390 r = (clen, compresseddeltalen)
389 391 chaininfocache[rev] = r
390 392 return r
391 393
392 394 def flags(self, rev):
393 395 return self.index[rev][0] & 0xFFFF
394 396 def rawsize(self, rev):
395 397 """return the length of the uncompressed text for a given revision"""
396 398 l = self.index[rev][2]
397 399 if l >= 0:
398 400 return l
399 401
400 402 t = self.revision(self.node(rev))
401 403 return len(t)
402 404 size = rawsize
403 405
404 406 def ancestors(self, revs, stoprev=0, inclusive=False):
405 407 """Generate the ancestors of 'revs' in reverse topological order.
406 408 Does not generate revs lower than stoprev.
407 409
408 410 See the documentation for ancestor.lazyancestors for more details."""
409 411
410 412 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
411 413 inclusive=inclusive)
412 414
413 415 def descendants(self, revs):
414 416 """Generate the descendants of 'revs' in revision order.
415 417
416 418 Yield a sequence of revision numbers starting with a child of
417 419 some rev in revs, i.e., each revision is *not* considered a
418 420 descendant of itself. Results are ordered by revision number (a
419 421 topological sort)."""
420 422 first = min(revs)
421 423 if first == nullrev:
422 424 for i in self:
423 425 yield i
424 426 return
425 427
426 428 seen = set(revs)
427 429 for i in self.revs(start=first + 1):
428 430 for x in self.parentrevs(i):
429 431 if x != nullrev and x in seen:
430 432 seen.add(i)
431 433 yield i
432 434 break
433 435
434 436 def findcommonmissing(self, common=None, heads=None):
435 437 """Return a tuple of the ancestors of common and the ancestors of heads
436 438 that are not ancestors of common. In revset terminology, we return the
437 439 tuple:
438 440
439 441 ::common, (::heads) - (::common)
440 442
441 443 The list is sorted by revision number, meaning it is
442 444 topologically sorted.
443 445
444 446 'heads' and 'common' are both lists of node IDs. If heads is
445 447 not supplied, uses all of the revlog's heads. If common is not
446 448 supplied, uses nullid."""
447 449 if common is None:
448 450 common = [nullid]
449 451 if heads is None:
450 452 heads = self.heads()
451 453
452 454 common = [self.rev(n) for n in common]
453 455 heads = [self.rev(n) for n in heads]
454 456
455 457 # we want the ancestors, but inclusive
456 458 class lazyset(object):
457 459 def __init__(self, lazyvalues):
458 460 self.addedvalues = set()
459 461 self.lazyvalues = lazyvalues
460 462
461 463 def __contains__(self, value):
462 464 return value in self.addedvalues or value in self.lazyvalues
463 465
464 466 def __iter__(self):
465 467 added = self.addedvalues
466 468 for r in added:
467 469 yield r
468 470 for r in self.lazyvalues:
469 471 if not r in added:
470 472 yield r
471 473
472 474 def add(self, value):
473 475 self.addedvalues.add(value)
474 476
475 477 def update(self, values):
476 478 self.addedvalues.update(values)
477 479
478 480 has = lazyset(self.ancestors(common))
479 481 has.add(nullrev)
480 482 has.update(common)
481 483
482 484 # take all ancestors from heads that aren't in has
483 485 missing = set()
484 486 visit = util.deque(r for r in heads if r not in has)
485 487 while visit:
486 488 r = visit.popleft()
487 489 if r in missing:
488 490 continue
489 491 else:
490 492 missing.add(r)
491 493 for p in self.parentrevs(r):
492 494 if p not in has:
493 495 visit.append(p)
494 496 missing = list(missing)
495 497 missing.sort()
496 498 return has, [self.node(r) for r in missing]
497 499
498 500 def incrementalmissingrevs(self, common=None):
499 501 """Return an object that can be used to incrementally compute the
500 502 revision numbers of the ancestors of arbitrary sets that are not
501 503 ancestors of common. This is an ancestor.incrementalmissingancestors
502 504 object.
503 505
504 506 'common' is a list of revision numbers. If common is not supplied, uses
505 507 nullrev.
506 508 """
507 509 if common is None:
508 510 common = [nullrev]
509 511
510 512 return ancestor.incrementalmissingancestors(self.parentrevs, common)
511 513
512 514 def findmissingrevs(self, common=None, heads=None):
513 515 """Return the revision numbers of the ancestors of heads that
514 516 are not ancestors of common.
515 517
516 518 More specifically, return a list of revision numbers corresponding to
517 519 nodes N such that every N satisfies the following constraints:
518 520
519 521 1. N is an ancestor of some node in 'heads'
520 522 2. N is not an ancestor of any node in 'common'
521 523
522 524 The list is sorted by revision number, meaning it is
523 525 topologically sorted.
524 526
525 527 'heads' and 'common' are both lists of revision numbers. If heads is
526 528 not supplied, uses all of the revlog's heads. If common is not
527 529 supplied, uses nullid."""
528 530 if common is None:
529 531 common = [nullrev]
530 532 if heads is None:
531 533 heads = self.headrevs()
532 534
533 535 inc = self.incrementalmissingrevs(common=common)
534 536 return inc.missingancestors(heads)
535 537
536 538 def findmissing(self, common=None, heads=None):
537 539 """Return the ancestors of heads that are not ancestors of common.
538 540
539 541 More specifically, return a list of nodes N such that every N
540 542 satisfies the following constraints:
541 543
542 544 1. N is an ancestor of some node in 'heads'
543 545 2. N is not an ancestor of any node in 'common'
544 546
545 547 The list is sorted by revision number, meaning it is
546 548 topologically sorted.
547 549
548 550 'heads' and 'common' are both lists of node IDs. If heads is
549 551 not supplied, uses all of the revlog's heads. If common is not
550 552 supplied, uses nullid."""
551 553 if common is None:
552 554 common = [nullid]
553 555 if heads is None:
554 556 heads = self.heads()
555 557
556 558 common = [self.rev(n) for n in common]
557 559 heads = [self.rev(n) for n in heads]
558 560
559 561 inc = self.incrementalmissingrevs(common=common)
560 562 return [self.node(r) for r in inc.missingancestors(heads)]
561 563
562 564 def nodesbetween(self, roots=None, heads=None):
563 565 """Return a topological path from 'roots' to 'heads'.
564 566
565 567 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
566 568 topologically sorted list of all nodes N that satisfy both of
567 569 these constraints:
568 570
569 571 1. N is a descendant of some node in 'roots'
570 572 2. N is an ancestor of some node in 'heads'
571 573
572 574 Every node is considered to be both a descendant and an ancestor
573 575 of itself, so every reachable node in 'roots' and 'heads' will be
574 576 included in 'nodes'.
575 577
576 578 'outroots' is the list of reachable nodes in 'roots', i.e., the
577 579 subset of 'roots' that is returned in 'nodes'. Likewise,
578 580 'outheads' is the subset of 'heads' that is also in 'nodes'.
579 581
580 582 'roots' and 'heads' are both lists of node IDs. If 'roots' is
581 583 unspecified, uses nullid as the only root. If 'heads' is
582 584 unspecified, uses list of all of the revlog's heads."""
583 585 nonodes = ([], [], [])
584 586 if roots is not None:
585 587 roots = list(roots)
586 588 if not roots:
587 589 return nonodes
588 590 lowestrev = min([self.rev(n) for n in roots])
589 591 else:
590 592 roots = [nullid] # Everybody's a descendant of nullid
591 593 lowestrev = nullrev
592 594 if (lowestrev == nullrev) and (heads is None):
593 595 # We want _all_ the nodes!
594 596 return ([self.node(r) for r in self], [nullid], list(self.heads()))
595 597 if heads is None:
596 598 # All nodes are ancestors, so the latest ancestor is the last
597 599 # node.
598 600 highestrev = len(self) - 1
599 601 # Set ancestors to None to signal that every node is an ancestor.
600 602 ancestors = None
601 603 # Set heads to an empty dictionary for later discovery of heads
602 604 heads = {}
603 605 else:
604 606 heads = list(heads)
605 607 if not heads:
606 608 return nonodes
607 609 ancestors = set()
608 610 # Turn heads into a dictionary so we can remove 'fake' heads.
609 611 # Also, later we will be using it to filter out the heads we can't
610 612 # find from roots.
611 613 heads = dict.fromkeys(heads, False)
612 614 # Start at the top and keep marking parents until we're done.
613 615 nodestotag = set(heads)
614 616 # Remember where the top was so we can use it as a limit later.
615 617 highestrev = max([self.rev(n) for n in nodestotag])
616 618 while nodestotag:
617 619 # grab a node to tag
618 620 n = nodestotag.pop()
619 621 # Never tag nullid
620 622 if n == nullid:
621 623 continue
622 624 # A node's revision number represents its place in a
623 625 # topologically sorted list of nodes.
624 626 r = self.rev(n)
625 627 if r >= lowestrev:
626 628 if n not in ancestors:
627 629 # If we are possibly a descendant of one of the roots
628 630 # and we haven't already been marked as an ancestor
629 631 ancestors.add(n) # Mark as ancestor
630 632 # Add non-nullid parents to list of nodes to tag.
631 633 nodestotag.update([p for p in self.parents(n) if
632 634 p != nullid])
633 635 elif n in heads: # We've seen it before, is it a fake head?
634 636 # So it is, real heads should not be the ancestors of
635 637 # any other heads.
636 638 heads.pop(n)
637 639 if not ancestors:
638 640 return nonodes
639 641 # Now that we have our set of ancestors, we want to remove any
640 642 # roots that are not ancestors.
641 643
642 644 # If one of the roots was nullid, everything is included anyway.
643 645 if lowestrev > nullrev:
644 646 # But, since we weren't, let's recompute the lowest rev to not
645 647 # include roots that aren't ancestors.
646 648
647 649 # Filter out roots that aren't ancestors of heads
648 650 roots = [n for n in roots if n in ancestors]
649 651 # Recompute the lowest revision
650 652 if roots:
651 653 lowestrev = min([self.rev(n) for n in roots])
652 654 else:
653 655 # No more roots? Return empty list
654 656 return nonodes
655 657 else:
656 658 # We are descending from nullid, and don't need to care about
657 659 # any other roots.
658 660 lowestrev = nullrev
659 661 roots = [nullid]
660 662 # Transform our roots list into a set.
661 663 descendants = set(roots)
662 664 # Also, keep the original roots so we can filter out roots that aren't
663 665 # 'real' roots (i.e. are descended from other roots).
664 666 roots = descendants.copy()
665 667 # Our topologically sorted list of output nodes.
666 668 orderedout = []
667 669 # Don't start at nullid since we don't want nullid in our output list,
668 670 # and if nullid shows up in descendants, empty parents will look like
669 671 # they're descendants.
670 672 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
671 673 n = self.node(r)
672 674 isdescendant = False
673 675 if lowestrev == nullrev: # Everybody is a descendant of nullid
674 676 isdescendant = True
675 677 elif n in descendants:
676 678 # n is already a descendant
677 679 isdescendant = True
678 680 # This check only needs to be done here because all the roots
679 681 # will start being marked is descendants before the loop.
680 682 if n in roots:
681 683 # If n was a root, check if it's a 'real' root.
682 684 p = tuple(self.parents(n))
683 685 # If any of its parents are descendants, it's not a root.
684 686 if (p[0] in descendants) or (p[1] in descendants):
685 687 roots.remove(n)
686 688 else:
687 689 p = tuple(self.parents(n))
688 690 # A node is a descendant if either of its parents are
689 691 # descendants. (We seeded the dependents list with the roots
690 692 # up there, remember?)
691 693 if (p[0] in descendants) or (p[1] in descendants):
692 694 descendants.add(n)
693 695 isdescendant = True
694 696 if isdescendant and ((ancestors is None) or (n in ancestors)):
695 697 # Only include nodes that are both descendants and ancestors.
696 698 orderedout.append(n)
697 699 if (ancestors is not None) and (n in heads):
698 700 # We're trying to figure out which heads are reachable
699 701 # from roots.
700 702 # Mark this head as having been reached
701 703 heads[n] = True
702 704 elif ancestors is None:
703 705 # Otherwise, we're trying to discover the heads.
704 706 # Assume this is a head because if it isn't, the next step
705 707 # will eventually remove it.
706 708 heads[n] = True
707 709 # But, obviously its parents aren't.
708 710 for p in self.parents(n):
709 711 heads.pop(p, None)
710 712 heads = [n for n, flag in heads.iteritems() if flag]
711 713 roots = list(roots)
712 714 assert orderedout
713 715 assert roots
714 716 assert heads
715 717 return (orderedout, roots, heads)
716 718
717 719 def headrevs(self):
718 720 try:
719 721 return self.index.headrevs()
720 722 except AttributeError:
721 723 return self._headrevs()
722 724
723 725 def _headrevs(self):
724 726 count = len(self)
725 727 if not count:
726 728 return [nullrev]
727 729 # we won't iter over filtered rev so nobody is a head at start
728 730 ishead = [0] * (count + 1)
729 731 index = self.index
730 732 for r in self:
731 733 ishead[r] = 1 # I may be an head
732 734 e = index[r]
733 735 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
734 736 return [r for r, val in enumerate(ishead) if val]
735 737
736 738 def heads(self, start=None, stop=None):
737 739 """return the list of all nodes that have no children
738 740
739 741 if start is specified, only heads that are descendants of
740 742 start will be returned
741 743 if stop is specified, it will consider all the revs from stop
742 744 as if they had no children
743 745 """
744 746 if start is None and stop is None:
745 747 if not len(self):
746 748 return [nullid]
747 749 return [self.node(r) for r in self.headrevs()]
748 750
749 751 if start is None:
750 752 start = nullid
751 753 if stop is None:
752 754 stop = []
753 755 stoprevs = set([self.rev(n) for n in stop])
754 756 startrev = self.rev(start)
755 757 reachable = set((startrev,))
756 758 heads = set((startrev,))
757 759
758 760 parentrevs = self.parentrevs
759 761 for r in self.revs(start=startrev + 1):
760 762 for p in parentrevs(r):
761 763 if p in reachable:
762 764 if r not in stoprevs:
763 765 reachable.add(r)
764 766 heads.add(r)
765 767 if p in heads and p not in stoprevs:
766 768 heads.remove(p)
767 769
768 770 return [self.node(r) for r in heads]
769 771
770 772 def children(self, node):
771 773 """find the children of a given node"""
772 774 c = []
773 775 p = self.rev(node)
774 776 for r in self.revs(start=p + 1):
775 777 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
776 778 if prevs:
777 779 for pr in prevs:
778 780 if pr == p:
779 781 c.append(self.node(r))
780 782 elif p == nullrev:
781 783 c.append(self.node(r))
782 784 return c
783 785
784 786 def descendant(self, start, end):
785 787 if start == nullrev:
786 788 return True
787 789 for i in self.descendants([start]):
788 790 if i == end:
789 791 return True
790 792 elif i > end:
791 793 break
792 794 return False
793 795
794 796 def commonancestorsheads(self, a, b):
795 797 """calculate all the heads of the common ancestors of nodes a and b"""
796 798 a, b = self.rev(a), self.rev(b)
797 799 try:
798 800 ancs = self.index.commonancestorsheads(a, b)
799 801 except (AttributeError, OverflowError): # C implementation failed
800 802 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
801 803 return map(self.node, ancs)
802 804
803 805 def isancestor(self, a, b):
804 806 """return True if node a is an ancestor of node b
805 807
806 808 The implementation of this is trivial but the use of
807 809 commonancestorsheads is not."""
808 810 return a in self.commonancestorsheads(a, b)
809 811
810 812 def ancestor(self, a, b):
811 813 """calculate the "best" common ancestor of nodes a and b"""
812 814
813 815 a, b = self.rev(a), self.rev(b)
814 816 try:
815 817 ancs = self.index.ancestors(a, b)
816 818 except (AttributeError, OverflowError):
817 819 ancs = ancestor.ancestors(self.parentrevs, a, b)
818 820 if ancs:
819 821 # choose a consistent winner when there's a tie
820 822 return min(map(self.node, ancs))
821 823 return nullid
822 824
823 825 def _match(self, id):
824 826 if isinstance(id, int):
825 827 # rev
826 828 return self.node(id)
827 829 if len(id) == 20:
828 830 # possibly a binary node
829 831 # odds of a binary node being all hex in ASCII are 1 in 10**25
830 832 try:
831 833 node = id
832 834 self.rev(node) # quick search the index
833 835 return node
834 836 except LookupError:
835 837 pass # may be partial hex id
836 838 try:
837 839 # str(rev)
838 840 rev = int(id)
839 841 if str(rev) != id:
840 842 raise ValueError
841 843 if rev < 0:
842 844 rev = len(self) + rev
843 845 if rev < 0 or rev >= len(self):
844 846 raise ValueError
845 847 return self.node(rev)
846 848 except (ValueError, OverflowError):
847 849 pass
848 850 if len(id) == 40:
849 851 try:
850 852 # a full hex nodeid?
851 853 node = bin(id)
852 854 self.rev(node)
853 855 return node
854 856 except (TypeError, LookupError):
855 857 pass
856 858
857 859 def _partialmatch(self, id):
858 860 try:
859 861 n = self.index.partialmatch(id)
860 862 if n and self.hasnode(n):
861 863 return n
862 864 return None
863 865 except RevlogError:
864 866 # parsers.c radix tree lookup gave multiple matches
865 867 # fall through to slow path that filters hidden revisions
866 868 pass
867 869 except (AttributeError, ValueError):
868 870 # we are pure python, or key was too short to search radix tree
869 871 pass
870 872
871 873 if id in self._pcache:
872 874 return self._pcache[id]
873 875
874 876 if len(id) < 40:
875 877 try:
876 878 # hex(node)[:...]
877 879 l = len(id) // 2 # grab an even number of digits
878 880 prefix = bin(id[:l * 2])
879 881 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
880 882 nl = [n for n in nl if hex(n).startswith(id) and
881 883 self.hasnode(n)]
882 884 if len(nl) > 0:
883 885 if len(nl) == 1:
884 886 self._pcache[id] = nl[0]
885 887 return nl[0]
886 888 raise LookupError(id, self.indexfile,
887 889 _('ambiguous identifier'))
888 890 return None
889 891 except TypeError:
890 892 pass
891 893
892 894 def lookup(self, id):
893 895 """locate a node based on:
894 896 - revision number or str(revision number)
895 897 - nodeid or subset of hex nodeid
896 898 """
897 899 n = self._match(id)
898 900 if n is not None:
899 901 return n
900 902 n = self._partialmatch(id)
901 903 if n:
902 904 return n
903 905
904 906 raise LookupError(id, self.indexfile, _('no match found'))
905 907
906 908 def cmp(self, node, text):
907 909 """compare text with a given file revision
908 910
909 911 returns True if text is different than what is stored.
910 912 """
911 913 p1, p2 = self.parents(node)
912 914 return hash(text, p1, p2) != node
913 915
914 916 def _addchunk(self, offset, data):
915 917 o, d = self._chunkcache
916 918 # try to add to existing cache
917 919 if o + len(d) == offset and len(d) + len(data) < _chunksize:
918 920 self._chunkcache = o, d + data
919 921 else:
920 922 self._chunkcache = offset, data
921 923
922 924 def _loadchunk(self, offset, length):
923 925 if self._inline:
924 926 df = self.opener(self.indexfile)
925 927 else:
926 928 df = self.opener(self.datafile)
927 929
928 930 # Cache data both forward and backward around the requested
929 931 # data, in a fixed size window. This helps speed up operations
930 932 # involving reading the revlog backwards.
931 933 cachesize = self._chunkcachesize
932 934 realoffset = offset & ~(cachesize - 1)
933 935 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
934 936 - realoffset)
935 937 df.seek(realoffset)
936 938 d = df.read(reallength)
937 939 df.close()
938 940 self._addchunk(realoffset, d)
939 941 if offset != realoffset or reallength != length:
940 942 return util.buffer(d, offset - realoffset, length)
941 943 return d
942 944
943 945 def _getchunk(self, offset, length):
944 946 o, d = self._chunkcache
945 947 l = len(d)
946 948
947 949 # is it in the cache?
948 950 cachestart = offset - o
949 951 cacheend = cachestart + length
950 952 if cachestart >= 0 and cacheend <= l:
951 953 if cachestart == 0 and cacheend == l:
952 954 return d # avoid a copy
953 955 return util.buffer(d, cachestart, cacheend - cachestart)
954 956
955 957 return self._loadchunk(offset, length)
956 958
957 959 def _chunkraw(self, startrev, endrev):
958 960 start = self.start(startrev)
959 961 end = self.end(endrev)
960 962 if self._inline:
961 963 start += (startrev + 1) * self._io.size
962 964 end += (endrev + 1) * self._io.size
963 965 length = end - start
964 966 return self._getchunk(start, length)
965 967
966 968 def _chunk(self, rev):
967 969 return decompress(self._chunkraw(rev, rev))
968 970
969 971 def _chunks(self, revs):
970 972 '''faster version of [self._chunk(rev) for rev in revs]
971 973
972 974 Assumes that revs is in ascending order.'''
973 975 if not revs:
974 976 return []
975 977 start = self.start
976 978 length = self.length
977 979 inline = self._inline
978 980 iosize = self._io.size
979 981 buffer = util.buffer
980 982
981 983 l = []
982 984 ladd = l.append
983 985
984 986 # preload the cache
985 987 try:
986 988 while True:
987 989 # ensure that the cache doesn't change out from under us
988 990 _cache = self._chunkcache
989 991 self._chunkraw(revs[0], revs[-1])
990 992 if _cache == self._chunkcache:
991 993 break
992 994 offset, data = _cache
993 995 except OverflowError:
994 996 # issue4215 - we can't cache a run of chunks greater than
995 997 # 2G on Windows
996 998 return [self._chunk(rev) for rev in revs]
997 999
998 1000 for rev in revs:
999 1001 chunkstart = start(rev)
1000 1002 if inline:
1001 1003 chunkstart += (rev + 1) * iosize
1002 1004 chunklength = length(rev)
1003 1005 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1004 1006
1005 1007 return l
1006 1008
1007 1009 def _chunkclear(self):
1008 1010 self._chunkcache = (0, '')
1009 1011
1010 1012 def deltaparent(self, rev):
1011 1013 """return deltaparent of the given revision"""
1012 1014 base = self.index[rev][3]
1013 1015 if base == rev:
1014 1016 return nullrev
1015 1017 elif self._generaldelta:
1016 1018 return base
1017 1019 else:
1018 1020 return rev - 1
1019 1021
1020 1022 def revdiff(self, rev1, rev2):
1021 1023 """return or calculate a delta between two revisions"""
1022 1024 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1023 1025 return str(self._chunk(rev2))
1024 1026
1025 1027 return mdiff.textdiff(self.revision(rev1),
1026 1028 self.revision(rev2))
1027 1029
1028 1030 def revision(self, nodeorrev):
1029 1031 """return an uncompressed revision of a given node or revision
1030 1032 number.
1031 1033 """
1032 1034 if isinstance(nodeorrev, int):
1033 1035 rev = nodeorrev
1034 1036 node = self.node(rev)
1035 1037 else:
1036 1038 node = nodeorrev
1037 1039 rev = None
1038 1040
1039 1041 _cache = self._cache # grab local copy of cache to avoid thread race
1040 1042 cachedrev = None
1041 1043 if node == nullid:
1042 1044 return ""
1043 1045 if _cache:
1044 1046 if _cache[0] == node:
1045 1047 return _cache[2]
1046 1048 cachedrev = _cache[1]
1047 1049
1048 1050 # look up what we need to read
1049 1051 text = None
1050 1052 if rev is None:
1051 1053 rev = self.rev(node)
1052 1054
1053 1055 # check rev flags
1054 1056 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1055 1057 raise RevlogError(_('incompatible revision flag %x') %
1056 1058 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1057 1059
1058 1060 # build delta chain
1059 1061 chain = []
1060 1062 index = self.index # for performance
1061 1063 generaldelta = self._generaldelta
1062 1064 iterrev = rev
1063 1065 e = index[iterrev]
1064 1066 while iterrev != e[3] and iterrev != cachedrev:
1065 1067 chain.append(iterrev)
1066 1068 if generaldelta:
1067 1069 iterrev = e[3]
1068 1070 else:
1069 1071 iterrev -= 1
1070 1072 e = index[iterrev]
1071 1073
1072 1074 if iterrev == cachedrev:
1073 1075 # cache hit
1074 1076 text = _cache[2]
1075 1077 else:
1076 1078 chain.append(iterrev)
1077 1079 chain.reverse()
1078 1080
1079 1081 # drop cache to save memory
1080 1082 self._cache = None
1081 1083
1082 1084 bins = self._chunks(chain)
1083 1085 if text is None:
1084 1086 text = str(bins[0])
1085 1087 bins = bins[1:]
1086 1088
1087 1089 text = mdiff.patches(text, bins)
1088 1090
1089 1091 text = self._checkhash(text, node, rev)
1090 1092
1091 1093 self._cache = (node, rev, text)
1092 1094 return text
1093 1095
1094 1096 def hash(self, text, p1, p2):
1095 1097 """Compute a node hash.
1096 1098
1097 1099 Available as a function so that subclasses can replace the hash
1098 1100 as needed.
1099 1101 """
1100 1102 return hash(text, p1, p2)
1101 1103
1102 1104 def _checkhash(self, text, node, rev):
1103 1105 p1, p2 = self.parents(node)
1104 1106 self.checkhash(text, p1, p2, node, rev)
1105 1107 return text
1106 1108
1107 1109 def checkhash(self, text, p1, p2, node, rev=None):
1108 1110 if node != self.hash(text, p1, p2):
1109 1111 revornode = rev
1110 1112 if revornode is None:
1111 1113 revornode = templatefilters.short(hex(node))
1112 1114 raise RevlogError(_("integrity check failed on %s:%s")
1113 1115 % (self.indexfile, revornode))
1114 1116
1115 1117 def checkinlinesize(self, tr, fp=None):
1116 1118 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1117 1119 return
1118 1120
1119 1121 trinfo = tr.find(self.indexfile)
1120 1122 if trinfo is None:
1121 1123 raise RevlogError(_("%s not found in the transaction")
1122 1124 % self.indexfile)
1123 1125
1124 1126 trindex = trinfo[2]
1125 1127 dataoff = self.start(trindex)
1126 1128
1127 1129 tr.add(self.datafile, dataoff)
1128 1130
1129 1131 if fp:
1130 1132 fp.flush()
1131 1133 fp.close()
1132 1134
1133 1135 df = self.opener(self.datafile, 'w')
1134 1136 try:
1135 1137 for r in self:
1136 1138 df.write(self._chunkraw(r, r))
1137 1139 finally:
1138 1140 df.close()
1139 1141
1140 1142 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1141 1143 self.version &= ~(REVLOGNGINLINEDATA)
1142 1144 self._inline = False
1143 1145 for i in self:
1144 1146 e = self._io.packentry(self.index[i], self.node, self.version, i)
1145 1147 fp.write(e)
1146 1148
1147 1149 # if we don't call close, the temp file will never replace the
1148 1150 # real index
1149 1151 fp.close()
1150 1152
1151 1153 tr.replace(self.indexfile, trindex * self._io.size)
1152 1154 self._chunkclear()
1153 1155
1154 1156 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1155 1157 node=None):
1156 1158 """add a revision to the log
1157 1159
1158 1160 text - the revision data to add
1159 1161 transaction - the transaction object used for rollback
1160 1162 link - the linkrev data to add
1161 1163 p1, p2 - the parent nodeids of the revision
1162 1164 cachedelta - an optional precomputed delta
1163 1165 node - nodeid of revision; typically node is not specified, and it is
1164 1166 computed by default as hash(text, p1, p2), however subclasses might
1165 1167 use different hashing method (and override checkhash() in such case)
1166 1168 """
1167 1169 if link == nullrev:
1168 1170 raise RevlogError(_("attempted to add linkrev -1 to %s")
1169 1171 % self.indexfile)
1170 1172 node = node or self.hash(text, p1, p2)
1171 1173 if node in self.nodemap:
1172 1174 return node
1173 1175
1174 1176 dfh = None
1175 1177 if not self._inline:
1176 1178 dfh = self.opener(self.datafile, "a")
1177 1179 ifh = self.opener(self.indexfile, "a+")
1178 1180 try:
1179 1181 return self._addrevision(node, text, transaction, link, p1, p2,
1180 1182 cachedelta, ifh, dfh)
1181 1183 finally:
1182 1184 if dfh:
1183 1185 dfh.close()
1184 1186 ifh.close()
1185 1187
1186 1188 def compress(self, text):
1187 1189 """ generate a possibly-compressed representation of text """
1188 1190 if not text:
1189 1191 return ("", text)
1190 1192 l = len(text)
1191 1193 bin = None
1192 1194 if l < 44:
1193 1195 pass
1194 1196 elif l > 1000000:
1195 1197 # zlib makes an internal copy, thus doubling memory usage for
1196 1198 # large files, so lets do this in pieces
1197 1199 z = zlib.compressobj()
1198 1200 p = []
1199 1201 pos = 0
1200 1202 while pos < l:
1201 1203 pos2 = pos + 2**20
1202 1204 p.append(z.compress(text[pos:pos2]))
1203 1205 pos = pos2
1204 1206 p.append(z.flush())
1205 1207 if sum(map(len, p)) < l:
1206 1208 bin = "".join(p)
1207 1209 else:
1208 1210 bin = _compress(text)
1209 1211 if bin is None or len(bin) > l:
1210 1212 if text[0] == '\0':
1211 1213 return ("", text)
1212 1214 return ('u', text)
1213 1215 return ("", bin)
1214 1216
1215 1217 def _addrevision(self, node, text, transaction, link, p1, p2,
1216 1218 cachedelta, ifh, dfh):
1217 1219 """internal function to add revisions to the log
1218 1220
1219 1221 see addrevision for argument descriptions.
1220 1222 invariants:
1221 1223 - text is optional (can be None); if not set, cachedelta must be set.
1222 1224 if both are set, they must correspond to each other.
1223 1225 """
1224 1226 btext = [text]
1225 1227 def buildtext():
1226 1228 if btext[0] is not None:
1227 1229 return btext[0]
1228 1230 # flush any pending writes here so we can read it in revision
1229 1231 if dfh:
1230 1232 dfh.flush()
1231 1233 ifh.flush()
1232 1234 basetext = self.revision(self.node(cachedelta[0]))
1233 1235 btext[0] = mdiff.patch(basetext, cachedelta[1])
1234 1236 try:
1235 1237 self.checkhash(btext[0], p1, p2, node)
1236 1238 except CensoredNodeError:
1237 1239 pass # always import a censor tombstone.
1238 1240 return btext[0]
1239 1241
1240 1242 def builddelta(rev):
1241 1243 # can we use the cached delta?
1242 1244 if cachedelta and cachedelta[0] == rev:
1243 1245 delta = cachedelta[1]
1244 1246 else:
1245 1247 t = buildtext()
1246 1248 ptext = self.revision(self.node(rev))
1247 1249 delta = mdiff.textdiff(ptext, t)
1248 1250 data = self.compress(delta)
1249 1251 l = len(data[1]) + len(data[0])
1250 1252 if basecache[0] == rev:
1251 1253 chainbase = basecache[1]
1252 1254 else:
1253 1255 chainbase = self.chainbase(rev)
1254 1256 dist = l + offset - self.start(chainbase)
1255 1257 if self._generaldelta:
1256 1258 base = rev
1257 1259 else:
1258 1260 base = chainbase
1259 1261 chainlen, compresseddeltalen = self._chaininfo(rev)
1260 1262 chainlen += 1
1261 1263 compresseddeltalen += l
1262 1264 return dist, l, data, base, chainbase, chainlen, compresseddeltalen
1263 1265
1264 1266 curr = len(self)
1265 1267 prev = curr - 1
1266 1268 base = chainbase = curr
1267 1269 chainlen = None
1268 1270 offset = self.end(prev)
1269 1271 flags = 0
1270 1272 d = None
1271 1273 if self._basecache is None:
1272 1274 self._basecache = (prev, self.chainbase(prev))
1273 1275 basecache = self._basecache
1274 1276 p1r, p2r = self.rev(p1), self.rev(p2)
1275 1277
1276 1278 # should we try to build a delta?
1277 1279 if prev != nullrev:
1278 1280 if self._generaldelta:
1279 1281 if p1r >= basecache[1]:
1280 1282 d = builddelta(p1r)
1281 1283 elif p2r >= basecache[1]:
1282 1284 d = builddelta(p2r)
1283 1285 else:
1284 1286 d = builddelta(prev)
1285 1287 else:
1286 1288 d = builddelta(prev)
1287 1289 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1288 1290
1289 1291 # full versions are inserted when the needed deltas
1290 1292 # become comparable to the uncompressed text
1291 1293 if text is None:
1292 1294 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1293 1295 cachedelta[1])
1294 1296 else:
1295 1297 textlen = len(text)
1296 1298
1297 1299 # - 'dist' is the distance from the base revision -- bounding it limits
1298 1300 # the amount of I/O we need to do.
1299 1301 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1300 1302 # to apply -- bounding it limits the amount of CPU we consume.
1301 1303 if (d is None or dist > textlen * 4 or l > textlen or
1302 1304 compresseddeltalen > textlen * 2 or
1303 1305 (self._maxchainlen and chainlen > self._maxchainlen)):
1304 1306 text = buildtext()
1305 1307 data = self.compress(text)
1306 1308 l = len(data[1]) + len(data[0])
1307 1309 base = chainbase = curr
1308 1310
1309 1311 e = (offset_type(offset, flags), l, textlen,
1310 1312 base, link, p1r, p2r, node)
1311 1313 self.index.insert(-1, e)
1312 1314 self.nodemap[node] = curr
1313 1315
1314 1316 entry = self._io.packentry(e, self.node, self.version, curr)
1315 1317 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1316 1318
1317 1319 if type(text) == str: # only accept immutable objects
1318 1320 self._cache = (node, curr, text)
1319 1321 self._basecache = (curr, chainbase)
1320 1322 return node
1321 1323
1322 1324 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1323 1325 curr = len(self) - 1
1324 1326 if not self._inline:
1325 1327 transaction.add(self.datafile, offset)
1326 1328 transaction.add(self.indexfile, curr * len(entry))
1327 1329 if data[0]:
1328 1330 dfh.write(data[0])
1329 1331 dfh.write(data[1])
1330 1332 dfh.flush()
1331 1333 ifh.write(entry)
1332 1334 else:
1333 1335 offset += curr * self._io.size
1334 1336 transaction.add(self.indexfile, offset, curr)
1335 1337 ifh.write(entry)
1336 1338 ifh.write(data[0])
1337 1339 ifh.write(data[1])
1338 1340 self.checkinlinesize(transaction, ifh)
1339 1341
1340 1342 def addgroup(self, bundle, linkmapper, transaction):
1341 1343 """
1342 1344 add a delta group
1343 1345
1344 1346 given a set of deltas, add them to the revision log. the
1345 1347 first delta is against its parent, which should be in our
1346 1348 log, the rest are against the previous delta.
1347 1349 """
1348 1350
1349 1351 # track the base of the current delta log
1350 1352 content = []
1351 1353 node = None
1352 1354
1353 1355 r = len(self)
1354 1356 end = 0
1355 1357 if r:
1356 1358 end = self.end(r - 1)
1357 1359 ifh = self.opener(self.indexfile, "a+")
1358 1360 isize = r * self._io.size
1359 1361 if self._inline:
1360 1362 transaction.add(self.indexfile, end + isize, r)
1361 1363 dfh = None
1362 1364 else:
1363 1365 transaction.add(self.indexfile, isize, r)
1364 1366 transaction.add(self.datafile, end)
1365 1367 dfh = self.opener(self.datafile, "a")
1366 1368
1367 1369 try:
1368 1370 # loop through our set of deltas
1369 1371 chain = None
1370 1372 while True:
1371 1373 chunkdata = bundle.deltachunk(chain)
1372 1374 if not chunkdata:
1373 1375 break
1374 1376 node = chunkdata['node']
1375 1377 p1 = chunkdata['p1']
1376 1378 p2 = chunkdata['p2']
1377 1379 cs = chunkdata['cs']
1378 1380 deltabase = chunkdata['deltabase']
1379 1381 delta = chunkdata['delta']
1380 1382
1381 1383 content.append(node)
1382 1384
1383 1385 link = linkmapper(cs)
1384 1386 if node in self.nodemap:
1385 1387 # this can happen if two branches make the same change
1386 1388 chain = node
1387 1389 continue
1388 1390
1389 1391 for p in (p1, p2):
1390 1392 if p not in self.nodemap:
1391 1393 raise LookupError(p, self.indexfile,
1392 1394 _('unknown parent'))
1393 1395
1394 1396 if deltabase not in self.nodemap:
1395 1397 raise LookupError(deltabase, self.indexfile,
1396 1398 _('unknown delta base'))
1397 1399
1398 1400 baserev = self.rev(deltabase)
1399 1401 chain = self._addrevision(node, None, transaction, link,
1400 1402 p1, p2, (baserev, delta), ifh, dfh)
1401 1403 if not dfh and not self._inline:
1402 1404 # addrevision switched from inline to conventional
1403 1405 # reopen the index
1404 1406 ifh.close()
1405 1407 dfh = self.opener(self.datafile, "a")
1406 1408 ifh = self.opener(self.indexfile, "a")
1407 1409 finally:
1408 1410 if dfh:
1409 1411 dfh.close()
1410 1412 ifh.close()
1411 1413
1412 1414 return content
1413 1415
1414 1416 def getstrippoint(self, minlink):
1415 1417 """find the minimum rev that must be stripped to strip the linkrev
1416 1418
1417 1419 Returns a tuple containing the minimum rev and a set of all revs that
1418 1420 have linkrevs that will be broken by this strip.
1419 1421 """
1420 1422 brokenrevs = set()
1421 1423 strippoint = len(self)
1422 1424
1423 1425 heads = {}
1424 1426 futurelargelinkrevs = set()
1425 1427 for head in self.headrevs():
1426 1428 headlinkrev = self.linkrev(head)
1427 1429 heads[head] = headlinkrev
1428 1430 if headlinkrev >= minlink:
1429 1431 futurelargelinkrevs.add(headlinkrev)
1430 1432
1431 1433 # This algorithm involves walking down the rev graph, starting at the
1432 1434 # heads. Since the revs are topologically sorted according to linkrev,
1433 1435 # once all head linkrevs are below the minlink, we know there are
1434 1436 # no more revs that could have a linkrev greater than minlink.
1435 1437 # So we can stop walking.
1436 1438 while futurelargelinkrevs:
1437 1439 strippoint -= 1
1438 1440 linkrev = heads.pop(strippoint)
1439 1441
1440 1442 if linkrev < minlink:
1441 1443 brokenrevs.add(strippoint)
1442 1444 else:
1443 1445 futurelargelinkrevs.remove(linkrev)
1444 1446
1445 1447 for p in self.parentrevs(strippoint):
1446 1448 if p != nullrev:
1447 1449 plinkrev = self.linkrev(p)
1448 1450 heads[p] = plinkrev
1449 1451 if plinkrev >= minlink:
1450 1452 futurelargelinkrevs.add(plinkrev)
1451 1453
1452 1454 return strippoint, brokenrevs
1453 1455
1454 1456 def strip(self, minlink, transaction):
1455 1457 """truncate the revlog on the first revision with a linkrev >= minlink
1456 1458
1457 1459 This function is called when we're stripping revision minlink and
1458 1460 its descendants from the repository.
1459 1461
1460 1462 We have to remove all revisions with linkrev >= minlink, because
1461 1463 the equivalent changelog revisions will be renumbered after the
1462 1464 strip.
1463 1465
1464 1466 So we truncate the revlog on the first of these revisions, and
1465 1467 trust that the caller has saved the revisions that shouldn't be
1466 1468 removed and that it'll re-add them after this truncation.
1467 1469 """
1468 1470 if len(self) == 0:
1469 1471 return
1470 1472
1471 1473 rev, _ = self.getstrippoint(minlink)
1472 1474 if rev == len(self):
1473 1475 return
1474 1476
1475 1477 # first truncate the files on disk
1476 1478 end = self.start(rev)
1477 1479 if not self._inline:
1478 1480 transaction.add(self.datafile, end)
1479 1481 end = rev * self._io.size
1480 1482 else:
1481 1483 end += rev * self._io.size
1482 1484
1483 1485 transaction.add(self.indexfile, end)
1484 1486
1485 1487 # then reset internal state in memory to forget those revisions
1486 1488 self._cache = None
1487 1489 self._chaininfocache = {}
1488 1490 self._chunkclear()
1489 1491 for x in xrange(rev, len(self)):
1490 1492 del self.nodemap[self.node(x)]
1491 1493
1492 1494 del self.index[rev:-1]
1493 1495
1494 1496 def checksize(self):
1495 1497 expected = 0
1496 1498 if len(self):
1497 1499 expected = max(0, self.end(len(self) - 1))
1498 1500
1499 1501 try:
1500 1502 f = self.opener(self.datafile)
1501 1503 f.seek(0, 2)
1502 1504 actual = f.tell()
1503 1505 f.close()
1504 1506 dd = actual - expected
1505 1507 except IOError, inst:
1506 1508 if inst.errno != errno.ENOENT:
1507 1509 raise
1508 1510 dd = 0
1509 1511
1510 1512 try:
1511 1513 f = self.opener(self.indexfile)
1512 1514 f.seek(0, 2)
1513 1515 actual = f.tell()
1514 1516 f.close()
1515 1517 s = self._io.size
1516 1518 i = max(0, actual // s)
1517 1519 di = actual - (i * s)
1518 1520 if self._inline:
1519 1521 databytes = 0
1520 1522 for r in self:
1521 1523 databytes += max(0, self.length(r))
1522 1524 dd = 0
1523 1525 di = actual - len(self) * s - databytes
1524 1526 except IOError, inst:
1525 1527 if inst.errno != errno.ENOENT:
1526 1528 raise
1527 1529 di = 0
1528 1530
1529 1531 return (dd, di)
1530 1532
1531 1533 def files(self):
1532 1534 res = [self.indexfile]
1533 1535 if not self._inline:
1534 1536 res.append(self.datafile)
1535 1537 return res
General Comments 0
You need to be logged in to leave comments. Login now