##// END OF EJS Templates
revlog: raise an exception earlier if an entry is too large (issue4675)...
Jordi Gutiérrez Hermoso -
r25410:eee88912 default
parent child Browse files
Show More
@@ -1,1594 +1,1604 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 # import stuff from node for others to import from revlog
15 15 import collections
16 16 from node import bin, hex, nullid, nullrev
17 17 from i18n import _
18 18 import ancestor, mdiff, parsers, error, util, templatefilters
19 19 import struct, zlib, errno
20 20
21 21 _pack = struct.pack
22 22 _unpack = struct.unpack
23 23 _compress = zlib.compress
24 24 _decompress = zlib.decompress
25 25 _sha = util.sha1
26 26
27 27 # revlog header flags
28 28 REVLOGV0 = 0
29 29 REVLOGNG = 1
30 30 REVLOGNGINLINEDATA = (1 << 16)
31 31 REVLOGGENERALDELTA = (1 << 17)
32 32 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
33 33 REVLOG_DEFAULT_FORMAT = REVLOGNG
34 34 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
35 35 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
36 36
37 37 # revlog index flags
38 38 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
39 39 REVIDX_DEFAULT_FLAGS = 0
40 40 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
41 41
42 42 # max size of revlog with inline data
43 43 _maxinline = 131072
44 44 _chunksize = 1048576
45 45
46 46 RevlogError = error.RevlogError
47 47 LookupError = error.LookupError
48 48 CensoredNodeError = error.CensoredNodeError
49 49
50 50 def getoffset(q):
51 51 return int(q >> 16)
52 52
53 53 def gettype(q):
54 54 return int(q & 0xFFFF)
55 55
56 56 def offset_type(offset, type):
57 57 return long(long(offset) << 16 | type)
58 58
59 59 _nullhash = _sha(nullid)
60 60
61 61 def hash(text, p1, p2):
62 62 """generate a hash from the given text and its parent hashes
63 63
64 64 This hash combines both the current file contents and its history
65 65 in a manner that makes it easy to distinguish nodes with the same
66 66 content in the revision graph.
67 67 """
68 68 # As of now, if one of the parent node is null, p2 is null
69 69 if p2 == nullid:
70 70 # deep copy of a hash is faster than creating one
71 71 s = _nullhash.copy()
72 72 s.update(p1)
73 73 else:
74 74 # none of the parent nodes are nullid
75 75 l = [p1, p2]
76 76 l.sort()
77 77 s = _sha(l[0])
78 78 s.update(l[1])
79 79 s.update(text)
80 80 return s.digest()
81 81
82 82 def decompress(bin):
83 83 """ decompress the given input """
84 84 if not bin:
85 85 return bin
86 86 t = bin[0]
87 87 if t == '\0':
88 88 return bin
89 89 if t == 'x':
90 90 try:
91 91 return _decompress(bin)
92 92 except zlib.error, e:
93 93 raise RevlogError(_("revlog decompress error: %s") % str(e))
94 94 if t == 'u':
95 95 return bin[1:]
96 96 raise RevlogError(_("unknown compression type %r") % t)
97 97
98 98 # index v0:
99 99 # 4 bytes: offset
100 100 # 4 bytes: compressed length
101 101 # 4 bytes: base rev
102 102 # 4 bytes: link rev
103 103 # 32 bytes: parent 1 nodeid
104 104 # 32 bytes: parent 2 nodeid
105 105 # 32 bytes: nodeid
106 106 indexformatv0 = ">4l20s20s20s"
107 107 v0shaoffset = 56
108 108
109 109 class revlogoldio(object):
110 110 def __init__(self):
111 111 self.size = struct.calcsize(indexformatv0)
112 112
113 113 def parseindex(self, data, inline):
114 114 s = self.size
115 115 index = []
116 116 nodemap = {nullid: nullrev}
117 117 n = off = 0
118 118 l = len(data)
119 119 while off + s <= l:
120 120 cur = data[off:off + s]
121 121 off += s
122 122 e = _unpack(indexformatv0, cur)
123 123 # transform to revlogv1 format
124 124 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
125 125 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
126 126 index.append(e2)
127 127 nodemap[e[6]] = n
128 128 n += 1
129 129
130 130 # add the magic null revision at -1
131 131 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
132 132
133 133 return index, nodemap, None
134 134
135 135 def packentry(self, entry, node, version, rev):
136 136 if gettype(entry[0]):
137 137 raise RevlogError(_("index entry flags need RevlogNG"))
138 138 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
139 139 node(entry[5]), node(entry[6]), entry[7])
140 140 return _pack(indexformatv0, *e2)
141 141
142 142 # index ng:
143 143 # 6 bytes: offset
144 144 # 2 bytes: flags
145 145 # 4 bytes: compressed length
146 146 # 4 bytes: uncompressed length
147 147 # 4 bytes: base rev
148 148 # 4 bytes: link rev
149 149 # 4 bytes: parent 1 rev
150 150 # 4 bytes: parent 2 rev
151 151 # 32 bytes: nodeid
152 152 indexformatng = ">Qiiiiii20s12x"
153 153 ngshaoffset = 32
154 154 versionformat = ">I"
155 155
156 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
157 # signed integer)
158 _maxentrysize = 0x7fffffff
159
156 160 class revlogio(object):
157 161 def __init__(self):
158 162 self.size = struct.calcsize(indexformatng)
159 163
160 164 def parseindex(self, data, inline):
161 165 # call the C implementation to parse the index data
162 166 index, cache = parsers.parse_index2(data, inline)
163 167 return index, getattr(index, 'nodemap', None), cache
164 168
165 169 def packentry(self, entry, node, version, rev):
170 uncompressedlength = entry[2]
171 if uncompressedlength > _maxentrysize:
172 raise RevlogError(
173 _("size of %d bytes exceeds maximum revlog storage of 2GiB")
174 % uncompressedlength)
175
166 176 p = _pack(indexformatng, *entry)
167 177 if rev == 0:
168 178 p = _pack(versionformat, version) + p[4:]
169 179 return p
170 180
171 181 class revlog(object):
172 182 """
173 183 the underlying revision storage object
174 184
175 185 A revlog consists of two parts, an index and the revision data.
176 186
177 187 The index is a file with a fixed record size containing
178 188 information on each revision, including its nodeid (hash), the
179 189 nodeids of its parents, the position and offset of its data within
180 190 the data file, and the revision it's based on. Finally, each entry
181 191 contains a linkrev entry that can serve as a pointer to external
182 192 data.
183 193
184 194 The revision data itself is a linear collection of data chunks.
185 195 Each chunk represents a revision and is usually represented as a
186 196 delta against the previous chunk. To bound lookup time, runs of
187 197 deltas are limited to about 2 times the length of the original
188 198 version data. This makes retrieval of a version proportional to
189 199 its size, or O(1) relative to the number of revisions.
190 200
191 201 Both pieces of the revlog are written to in an append-only
192 202 fashion, which means we never need to rewrite a file to insert or
193 203 remove data, and can use some simple techniques to avoid the need
194 204 for locking while reading.
195 205 """
196 206 def __init__(self, opener, indexfile):
197 207 """
198 208 create a revlog object
199 209
200 210 opener is a function that abstracts the file opening operation
201 211 and can be used to implement COW semantics or the like.
202 212 """
203 213 self.indexfile = indexfile
204 214 self.datafile = indexfile[:-2] + ".d"
205 215 self.opener = opener
206 216 self._cache = None
207 217 self._basecache = None
208 218 self._chunkcache = (0, '')
209 219 self._chunkcachesize = 65536
210 220 self._maxchainlen = None
211 221 self.index = []
212 222 self._pcache = {}
213 223 self._nodecache = {nullid: nullrev}
214 224 self._nodepos = None
215 225
216 226 v = REVLOG_DEFAULT_VERSION
217 227 opts = getattr(opener, 'options', None)
218 228 if opts is not None:
219 229 if 'revlogv1' in opts:
220 230 if 'generaldelta' in opts:
221 231 v |= REVLOGGENERALDELTA
222 232 else:
223 233 v = 0
224 234 if 'chunkcachesize' in opts:
225 235 self._chunkcachesize = opts['chunkcachesize']
226 236 if 'maxchainlen' in opts:
227 237 self._maxchainlen = opts['maxchainlen']
228 238
229 239 if self._chunkcachesize <= 0:
230 240 raise RevlogError(_('revlog chunk cache size %r is not greater '
231 241 'than 0') % self._chunkcachesize)
232 242 elif self._chunkcachesize & (self._chunkcachesize - 1):
233 243 raise RevlogError(_('revlog chunk cache size %r is not a power '
234 244 'of 2') % self._chunkcachesize)
235 245
236 246 i = ''
237 247 self._initempty = True
238 248 try:
239 249 f = self.opener(self.indexfile)
240 250 i = f.read()
241 251 f.close()
242 252 if len(i) > 0:
243 253 v = struct.unpack(versionformat, i[:4])[0]
244 254 self._initempty = False
245 255 except IOError, inst:
246 256 if inst.errno != errno.ENOENT:
247 257 raise
248 258
249 259 self.version = v
250 260 self._inline = v & REVLOGNGINLINEDATA
251 261 self._generaldelta = v & REVLOGGENERALDELTA
252 262 flags = v & ~0xFFFF
253 263 fmt = v & 0xFFFF
254 264 if fmt == REVLOGV0 and flags:
255 265 raise RevlogError(_("index %s unknown flags %#04x for format v0")
256 266 % (self.indexfile, flags >> 16))
257 267 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
258 268 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
259 269 % (self.indexfile, flags >> 16))
260 270 elif fmt > REVLOGNG:
261 271 raise RevlogError(_("index %s unknown format %d")
262 272 % (self.indexfile, fmt))
263 273
264 274 self._io = revlogio()
265 275 if self.version == REVLOGV0:
266 276 self._io = revlogoldio()
267 277 try:
268 278 d = self._io.parseindex(i, self._inline)
269 279 except (ValueError, IndexError):
270 280 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
271 281 self.index, nodemap, self._chunkcache = d
272 282 if nodemap is not None:
273 283 self.nodemap = self._nodecache = nodemap
274 284 if not self._chunkcache:
275 285 self._chunkclear()
276 286 # revnum -> (chain-length, sum-delta-length)
277 287 self._chaininfocache = {}
278 288
279 289 def tip(self):
280 290 return self.node(len(self.index) - 2)
281 291 def __contains__(self, rev):
282 292 return 0 <= rev < len(self)
283 293 def __len__(self):
284 294 return len(self.index) - 1
285 295 def __iter__(self):
286 296 return iter(xrange(len(self)))
287 297 def revs(self, start=0, stop=None):
288 298 """iterate over all rev in this revlog (from start to stop)"""
289 299 step = 1
290 300 if stop is not None:
291 301 if start > stop:
292 302 step = -1
293 303 stop += step
294 304 else:
295 305 stop = len(self)
296 306 return xrange(start, stop, step)
297 307
298 308 @util.propertycache
299 309 def nodemap(self):
300 310 self.rev(self.node(0))
301 311 return self._nodecache
302 312
303 313 def hasnode(self, node):
304 314 try:
305 315 self.rev(node)
306 316 return True
307 317 except KeyError:
308 318 return False
309 319
310 320 def clearcaches(self):
311 321 try:
312 322 self._nodecache.clearcaches()
313 323 except AttributeError:
314 324 self._nodecache = {nullid: nullrev}
315 325 self._nodepos = None
316 326
317 327 def rev(self, node):
318 328 try:
319 329 return self._nodecache[node]
320 330 except TypeError:
321 331 raise
322 332 except RevlogError:
323 333 # parsers.c radix tree lookup failed
324 334 raise LookupError(node, self.indexfile, _('no node'))
325 335 except KeyError:
326 336 # pure python cache lookup failed
327 337 n = self._nodecache
328 338 i = self.index
329 339 p = self._nodepos
330 340 if p is None:
331 341 p = len(i) - 2
332 342 for r in xrange(p, -1, -1):
333 343 v = i[r][7]
334 344 n[v] = r
335 345 if v == node:
336 346 self._nodepos = r - 1
337 347 return r
338 348 raise LookupError(node, self.indexfile, _('no node'))
339 349
340 350 def node(self, rev):
341 351 return self.index[rev][7]
342 352 def linkrev(self, rev):
343 353 return self.index[rev][4]
344 354 def parents(self, node):
345 355 i = self.index
346 356 d = i[self.rev(node)]
347 357 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
348 358 def parentrevs(self, rev):
349 359 return self.index[rev][5:7]
350 360 def start(self, rev):
351 361 return int(self.index[rev][0] >> 16)
352 362 def end(self, rev):
353 363 return self.start(rev) + self.length(rev)
354 364 def length(self, rev):
355 365 return self.index[rev][1]
356 366 def chainbase(self, rev):
357 367 index = self.index
358 368 base = index[rev][3]
359 369 while base != rev:
360 370 rev = base
361 371 base = index[rev][3]
362 372 return base
363 373 def chainlen(self, rev):
364 374 return self._chaininfo(rev)[0]
365 375
366 376 def _chaininfo(self, rev):
367 377 chaininfocache = self._chaininfocache
368 378 if rev in chaininfocache:
369 379 return chaininfocache[rev]
370 380 index = self.index
371 381 generaldelta = self._generaldelta
372 382 iterrev = rev
373 383 e = index[iterrev]
374 384 clen = 0
375 385 compresseddeltalen = 0
376 386 while iterrev != e[3]:
377 387 clen += 1
378 388 compresseddeltalen += e[1]
379 389 if generaldelta:
380 390 iterrev = e[3]
381 391 else:
382 392 iterrev -= 1
383 393 if iterrev in chaininfocache:
384 394 t = chaininfocache[iterrev]
385 395 clen += t[0]
386 396 compresseddeltalen += t[1]
387 397 break
388 398 e = index[iterrev]
389 399 else:
390 400 # Add text length of base since decompressing that also takes
391 401 # work. For cache hits the length is already included.
392 402 compresseddeltalen += e[1]
393 403 r = (clen, compresseddeltalen)
394 404 chaininfocache[rev] = r
395 405 return r
396 406
397 407 def flags(self, rev):
398 408 return self.index[rev][0] & 0xFFFF
399 409 def rawsize(self, rev):
400 410 """return the length of the uncompressed text for a given revision"""
401 411 l = self.index[rev][2]
402 412 if l >= 0:
403 413 return l
404 414
405 415 t = self.revision(self.node(rev))
406 416 return len(t)
407 417 size = rawsize
408 418
409 419 def ancestors(self, revs, stoprev=0, inclusive=False):
410 420 """Generate the ancestors of 'revs' in reverse topological order.
411 421 Does not generate revs lower than stoprev.
412 422
413 423 See the documentation for ancestor.lazyancestors for more details."""
414 424
415 425 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
416 426 inclusive=inclusive)
417 427
418 428 def descendants(self, revs):
419 429 """Generate the descendants of 'revs' in revision order.
420 430
421 431 Yield a sequence of revision numbers starting with a child of
422 432 some rev in revs, i.e., each revision is *not* considered a
423 433 descendant of itself. Results are ordered by revision number (a
424 434 topological sort)."""
425 435 first = min(revs)
426 436 if first == nullrev:
427 437 for i in self:
428 438 yield i
429 439 return
430 440
431 441 seen = set(revs)
432 442 for i in self.revs(start=first + 1):
433 443 for x in self.parentrevs(i):
434 444 if x != nullrev and x in seen:
435 445 seen.add(i)
436 446 yield i
437 447 break
438 448
439 449 def findcommonmissing(self, common=None, heads=None):
440 450 """Return a tuple of the ancestors of common and the ancestors of heads
441 451 that are not ancestors of common. In revset terminology, we return the
442 452 tuple:
443 453
444 454 ::common, (::heads) - (::common)
445 455
446 456 The list is sorted by revision number, meaning it is
447 457 topologically sorted.
448 458
449 459 'heads' and 'common' are both lists of node IDs. If heads is
450 460 not supplied, uses all of the revlog's heads. If common is not
451 461 supplied, uses nullid."""
452 462 if common is None:
453 463 common = [nullid]
454 464 if heads is None:
455 465 heads = self.heads()
456 466
457 467 common = [self.rev(n) for n in common]
458 468 heads = [self.rev(n) for n in heads]
459 469
460 470 # we want the ancestors, but inclusive
461 471 class lazyset(object):
462 472 def __init__(self, lazyvalues):
463 473 self.addedvalues = set()
464 474 self.lazyvalues = lazyvalues
465 475
466 476 def __contains__(self, value):
467 477 return value in self.addedvalues or value in self.lazyvalues
468 478
469 479 def __iter__(self):
470 480 added = self.addedvalues
471 481 for r in added:
472 482 yield r
473 483 for r in self.lazyvalues:
474 484 if not r in added:
475 485 yield r
476 486
477 487 def add(self, value):
478 488 self.addedvalues.add(value)
479 489
480 490 def update(self, values):
481 491 self.addedvalues.update(values)
482 492
483 493 has = lazyset(self.ancestors(common))
484 494 has.add(nullrev)
485 495 has.update(common)
486 496
487 497 # take all ancestors from heads that aren't in has
488 498 missing = set()
489 499 visit = collections.deque(r for r in heads if r not in has)
490 500 while visit:
491 501 r = visit.popleft()
492 502 if r in missing:
493 503 continue
494 504 else:
495 505 missing.add(r)
496 506 for p in self.parentrevs(r):
497 507 if p not in has:
498 508 visit.append(p)
499 509 missing = list(missing)
500 510 missing.sort()
501 511 return has, [self.node(r) for r in missing]
502 512
503 513 def incrementalmissingrevs(self, common=None):
504 514 """Return an object that can be used to incrementally compute the
505 515 revision numbers of the ancestors of arbitrary sets that are not
506 516 ancestors of common. This is an ancestor.incrementalmissingancestors
507 517 object.
508 518
509 519 'common' is a list of revision numbers. If common is not supplied, uses
510 520 nullrev.
511 521 """
512 522 if common is None:
513 523 common = [nullrev]
514 524
515 525 return ancestor.incrementalmissingancestors(self.parentrevs, common)
516 526
517 527 def findmissingrevs(self, common=None, heads=None):
518 528 """Return the revision numbers of the ancestors of heads that
519 529 are not ancestors of common.
520 530
521 531 More specifically, return a list of revision numbers corresponding to
522 532 nodes N such that every N satisfies the following constraints:
523 533
524 534 1. N is an ancestor of some node in 'heads'
525 535 2. N is not an ancestor of any node in 'common'
526 536
527 537 The list is sorted by revision number, meaning it is
528 538 topologically sorted.
529 539
530 540 'heads' and 'common' are both lists of revision numbers. If heads is
531 541 not supplied, uses all of the revlog's heads. If common is not
532 542 supplied, uses nullid."""
533 543 if common is None:
534 544 common = [nullrev]
535 545 if heads is None:
536 546 heads = self.headrevs()
537 547
538 548 inc = self.incrementalmissingrevs(common=common)
539 549 return inc.missingancestors(heads)
540 550
541 551 def findmissing(self, common=None, heads=None):
542 552 """Return the ancestors of heads that are not ancestors of common.
543 553
544 554 More specifically, return a list of nodes N such that every N
545 555 satisfies the following constraints:
546 556
547 557 1. N is an ancestor of some node in 'heads'
548 558 2. N is not an ancestor of any node in 'common'
549 559
550 560 The list is sorted by revision number, meaning it is
551 561 topologically sorted.
552 562
553 563 'heads' and 'common' are both lists of node IDs. If heads is
554 564 not supplied, uses all of the revlog's heads. If common is not
555 565 supplied, uses nullid."""
556 566 if common is None:
557 567 common = [nullid]
558 568 if heads is None:
559 569 heads = self.heads()
560 570
561 571 common = [self.rev(n) for n in common]
562 572 heads = [self.rev(n) for n in heads]
563 573
564 574 inc = self.incrementalmissingrevs(common=common)
565 575 return [self.node(r) for r in inc.missingancestors(heads)]
566 576
567 577 def nodesbetween(self, roots=None, heads=None):
568 578 """Return a topological path from 'roots' to 'heads'.
569 579
570 580 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
571 581 topologically sorted list of all nodes N that satisfy both of
572 582 these constraints:
573 583
574 584 1. N is a descendant of some node in 'roots'
575 585 2. N is an ancestor of some node in 'heads'
576 586
577 587 Every node is considered to be both a descendant and an ancestor
578 588 of itself, so every reachable node in 'roots' and 'heads' will be
579 589 included in 'nodes'.
580 590
581 591 'outroots' is the list of reachable nodes in 'roots', i.e., the
582 592 subset of 'roots' that is returned in 'nodes'. Likewise,
583 593 'outheads' is the subset of 'heads' that is also in 'nodes'.
584 594
585 595 'roots' and 'heads' are both lists of node IDs. If 'roots' is
586 596 unspecified, uses nullid as the only root. If 'heads' is
587 597 unspecified, uses list of all of the revlog's heads."""
588 598 nonodes = ([], [], [])
589 599 if roots is not None:
590 600 roots = list(roots)
591 601 if not roots:
592 602 return nonodes
593 603 lowestrev = min([self.rev(n) for n in roots])
594 604 else:
595 605 roots = [nullid] # Everybody's a descendant of nullid
596 606 lowestrev = nullrev
597 607 if (lowestrev == nullrev) and (heads is None):
598 608 # We want _all_ the nodes!
599 609 return ([self.node(r) for r in self], [nullid], list(self.heads()))
600 610 if heads is None:
601 611 # All nodes are ancestors, so the latest ancestor is the last
602 612 # node.
603 613 highestrev = len(self) - 1
604 614 # Set ancestors to None to signal that every node is an ancestor.
605 615 ancestors = None
606 616 # Set heads to an empty dictionary for later discovery of heads
607 617 heads = {}
608 618 else:
609 619 heads = list(heads)
610 620 if not heads:
611 621 return nonodes
612 622 ancestors = set()
613 623 # Turn heads into a dictionary so we can remove 'fake' heads.
614 624 # Also, later we will be using it to filter out the heads we can't
615 625 # find from roots.
616 626 heads = dict.fromkeys(heads, False)
617 627 # Start at the top and keep marking parents until we're done.
618 628 nodestotag = set(heads)
619 629 # Remember where the top was so we can use it as a limit later.
620 630 highestrev = max([self.rev(n) for n in nodestotag])
621 631 while nodestotag:
622 632 # grab a node to tag
623 633 n = nodestotag.pop()
624 634 # Never tag nullid
625 635 if n == nullid:
626 636 continue
627 637 # A node's revision number represents its place in a
628 638 # topologically sorted list of nodes.
629 639 r = self.rev(n)
630 640 if r >= lowestrev:
631 641 if n not in ancestors:
632 642 # If we are possibly a descendant of one of the roots
633 643 # and we haven't already been marked as an ancestor
634 644 ancestors.add(n) # Mark as ancestor
635 645 # Add non-nullid parents to list of nodes to tag.
636 646 nodestotag.update([p for p in self.parents(n) if
637 647 p != nullid])
638 648 elif n in heads: # We've seen it before, is it a fake head?
639 649 # So it is, real heads should not be the ancestors of
640 650 # any other heads.
641 651 heads.pop(n)
642 652 if not ancestors:
643 653 return nonodes
644 654 # Now that we have our set of ancestors, we want to remove any
645 655 # roots that are not ancestors.
646 656
647 657 # If one of the roots was nullid, everything is included anyway.
648 658 if lowestrev > nullrev:
649 659 # But, since we weren't, let's recompute the lowest rev to not
650 660 # include roots that aren't ancestors.
651 661
652 662 # Filter out roots that aren't ancestors of heads
653 663 roots = [n for n in roots if n in ancestors]
654 664 # Recompute the lowest revision
655 665 if roots:
656 666 lowestrev = min([self.rev(n) for n in roots])
657 667 else:
658 668 # No more roots? Return empty list
659 669 return nonodes
660 670 else:
661 671 # We are descending from nullid, and don't need to care about
662 672 # any other roots.
663 673 lowestrev = nullrev
664 674 roots = [nullid]
665 675 # Transform our roots list into a set.
666 676 descendants = set(roots)
667 677 # Also, keep the original roots so we can filter out roots that aren't
668 678 # 'real' roots (i.e. are descended from other roots).
669 679 roots = descendants.copy()
670 680 # Our topologically sorted list of output nodes.
671 681 orderedout = []
672 682 # Don't start at nullid since we don't want nullid in our output list,
673 683 # and if nullid shows up in descendants, empty parents will look like
674 684 # they're descendants.
675 685 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
676 686 n = self.node(r)
677 687 isdescendant = False
678 688 if lowestrev == nullrev: # Everybody is a descendant of nullid
679 689 isdescendant = True
680 690 elif n in descendants:
681 691 # n is already a descendant
682 692 isdescendant = True
683 693 # This check only needs to be done here because all the roots
684 694 # will start being marked is descendants before the loop.
685 695 if n in roots:
686 696 # If n was a root, check if it's a 'real' root.
687 697 p = tuple(self.parents(n))
688 698 # If any of its parents are descendants, it's not a root.
689 699 if (p[0] in descendants) or (p[1] in descendants):
690 700 roots.remove(n)
691 701 else:
692 702 p = tuple(self.parents(n))
693 703 # A node is a descendant if either of its parents are
694 704 # descendants. (We seeded the dependents list with the roots
695 705 # up there, remember?)
696 706 if (p[0] in descendants) or (p[1] in descendants):
697 707 descendants.add(n)
698 708 isdescendant = True
699 709 if isdescendant and ((ancestors is None) or (n in ancestors)):
700 710 # Only include nodes that are both descendants and ancestors.
701 711 orderedout.append(n)
702 712 if (ancestors is not None) and (n in heads):
703 713 # We're trying to figure out which heads are reachable
704 714 # from roots.
705 715 # Mark this head as having been reached
706 716 heads[n] = True
707 717 elif ancestors is None:
708 718 # Otherwise, we're trying to discover the heads.
709 719 # Assume this is a head because if it isn't, the next step
710 720 # will eventually remove it.
711 721 heads[n] = True
712 722 # But, obviously its parents aren't.
713 723 for p in self.parents(n):
714 724 heads.pop(p, None)
715 725 heads = [n for n, flag in heads.iteritems() if flag]
716 726 roots = list(roots)
717 727 assert orderedout
718 728 assert roots
719 729 assert heads
720 730 return (orderedout, roots, heads)
721 731
722 732 def headrevs(self):
723 733 try:
724 734 return self.index.headrevs()
725 735 except AttributeError:
726 736 return self._headrevs()
727 737
728 738 def computephases(self, roots):
729 739 return self.index.computephasesmapsets(roots)
730 740
731 741 def _headrevs(self):
732 742 count = len(self)
733 743 if not count:
734 744 return [nullrev]
735 745 # we won't iter over filtered rev so nobody is a head at start
736 746 ishead = [0] * (count + 1)
737 747 index = self.index
738 748 for r in self:
739 749 ishead[r] = 1 # I may be an head
740 750 e = index[r]
741 751 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
742 752 return [r for r, val in enumerate(ishead) if val]
743 753
744 754 def heads(self, start=None, stop=None):
745 755 """return the list of all nodes that have no children
746 756
747 757 if start is specified, only heads that are descendants of
748 758 start will be returned
749 759 if stop is specified, it will consider all the revs from stop
750 760 as if they had no children
751 761 """
752 762 if start is None and stop is None:
753 763 if not len(self):
754 764 return [nullid]
755 765 return [self.node(r) for r in self.headrevs()]
756 766
757 767 if start is None:
758 768 start = nullid
759 769 if stop is None:
760 770 stop = []
761 771 stoprevs = set([self.rev(n) for n in stop])
762 772 startrev = self.rev(start)
763 773 reachable = set((startrev,))
764 774 heads = set((startrev,))
765 775
766 776 parentrevs = self.parentrevs
767 777 for r in self.revs(start=startrev + 1):
768 778 for p in parentrevs(r):
769 779 if p in reachable:
770 780 if r not in stoprevs:
771 781 reachable.add(r)
772 782 heads.add(r)
773 783 if p in heads and p not in stoprevs:
774 784 heads.remove(p)
775 785
776 786 return [self.node(r) for r in heads]
777 787
778 788 def children(self, node):
779 789 """find the children of a given node"""
780 790 c = []
781 791 p = self.rev(node)
782 792 for r in self.revs(start=p + 1):
783 793 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
784 794 if prevs:
785 795 for pr in prevs:
786 796 if pr == p:
787 797 c.append(self.node(r))
788 798 elif p == nullrev:
789 799 c.append(self.node(r))
790 800 return c
791 801
792 802 def descendant(self, start, end):
793 803 if start == nullrev:
794 804 return True
795 805 for i in self.descendants([start]):
796 806 if i == end:
797 807 return True
798 808 elif i > end:
799 809 break
800 810 return False
801 811
802 812 def commonancestorsheads(self, a, b):
803 813 """calculate all the heads of the common ancestors of nodes a and b"""
804 814 a, b = self.rev(a), self.rev(b)
805 815 try:
806 816 ancs = self.index.commonancestorsheads(a, b)
807 817 except (AttributeError, OverflowError): # C implementation failed
808 818 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
809 819 return map(self.node, ancs)
810 820
811 821 def isancestor(self, a, b):
812 822 """return True if node a is an ancestor of node b
813 823
814 824 The implementation of this is trivial but the use of
815 825 commonancestorsheads is not."""
816 826 return a in self.commonancestorsheads(a, b)
817 827
818 828 def ancestor(self, a, b):
819 829 """calculate the "best" common ancestor of nodes a and b"""
820 830
821 831 a, b = self.rev(a), self.rev(b)
822 832 try:
823 833 ancs = self.index.ancestors(a, b)
824 834 except (AttributeError, OverflowError):
825 835 ancs = ancestor.ancestors(self.parentrevs, a, b)
826 836 if ancs:
827 837 # choose a consistent winner when there's a tie
828 838 return min(map(self.node, ancs))
829 839 return nullid
830 840
831 841 def _match(self, id):
832 842 if isinstance(id, int):
833 843 # rev
834 844 return self.node(id)
835 845 if len(id) == 20:
836 846 # possibly a binary node
837 847 # odds of a binary node being all hex in ASCII are 1 in 10**25
838 848 try:
839 849 node = id
840 850 self.rev(node) # quick search the index
841 851 return node
842 852 except LookupError:
843 853 pass # may be partial hex id
844 854 try:
845 855 # str(rev)
846 856 rev = int(id)
847 857 if str(rev) != id:
848 858 raise ValueError
849 859 if rev < 0:
850 860 rev = len(self) + rev
851 861 if rev < 0 or rev >= len(self):
852 862 raise ValueError
853 863 return self.node(rev)
854 864 except (ValueError, OverflowError):
855 865 pass
856 866 if len(id) == 40:
857 867 try:
858 868 # a full hex nodeid?
859 869 node = bin(id)
860 870 self.rev(node)
861 871 return node
862 872 except (TypeError, LookupError):
863 873 pass
864 874
865 875 def _partialmatch(self, id):
866 876 try:
867 877 n = self.index.partialmatch(id)
868 878 if n and self.hasnode(n):
869 879 return n
870 880 return None
871 881 except RevlogError:
872 882 # parsers.c radix tree lookup gave multiple matches
873 883 # fall through to slow path that filters hidden revisions
874 884 pass
875 885 except (AttributeError, ValueError):
876 886 # we are pure python, or key was too short to search radix tree
877 887 pass
878 888
879 889 if id in self._pcache:
880 890 return self._pcache[id]
881 891
882 892 if len(id) < 40:
883 893 try:
884 894 # hex(node)[:...]
885 895 l = len(id) // 2 # grab an even number of digits
886 896 prefix = bin(id[:l * 2])
887 897 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
888 898 nl = [n for n in nl if hex(n).startswith(id) and
889 899 self.hasnode(n)]
890 900 if len(nl) > 0:
891 901 if len(nl) == 1:
892 902 self._pcache[id] = nl[0]
893 903 return nl[0]
894 904 raise LookupError(id, self.indexfile,
895 905 _('ambiguous identifier'))
896 906 return None
897 907 except TypeError:
898 908 pass
899 909
900 910 def lookup(self, id):
901 911 """locate a node based on:
902 912 - revision number or str(revision number)
903 913 - nodeid or subset of hex nodeid
904 914 """
905 915 n = self._match(id)
906 916 if n is not None:
907 917 return n
908 918 n = self._partialmatch(id)
909 919 if n:
910 920 return n
911 921
912 922 raise LookupError(id, self.indexfile, _('no match found'))
913 923
914 924 def cmp(self, node, text):
915 925 """compare text with a given file revision
916 926
917 927 returns True if text is different than what is stored.
918 928 """
919 929 p1, p2 = self.parents(node)
920 930 return hash(text, p1, p2) != node
921 931
922 932 def _addchunk(self, offset, data):
923 933 o, d = self._chunkcache
924 934 # try to add to existing cache
925 935 if o + len(d) == offset and len(d) + len(data) < _chunksize:
926 936 self._chunkcache = o, d + data
927 937 else:
928 938 self._chunkcache = offset, data
929 939
930 940 def _loadchunk(self, offset, length):
931 941 if self._inline:
932 942 df = self.opener(self.indexfile)
933 943 else:
934 944 df = self.opener(self.datafile)
935 945
936 946 # Cache data both forward and backward around the requested
937 947 # data, in a fixed size window. This helps speed up operations
938 948 # involving reading the revlog backwards.
939 949 cachesize = self._chunkcachesize
940 950 realoffset = offset & ~(cachesize - 1)
941 951 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
942 952 - realoffset)
943 953 df.seek(realoffset)
944 954 d = df.read(reallength)
945 955 df.close()
946 956 self._addchunk(realoffset, d)
947 957 if offset != realoffset or reallength != length:
948 958 return util.buffer(d, offset - realoffset, length)
949 959 return d
950 960
951 961 def _getchunk(self, offset, length):
952 962 o, d = self._chunkcache
953 963 l = len(d)
954 964
955 965 # is it in the cache?
956 966 cachestart = offset - o
957 967 cacheend = cachestart + length
958 968 if cachestart >= 0 and cacheend <= l:
959 969 if cachestart == 0 and cacheend == l:
960 970 return d # avoid a copy
961 971 return util.buffer(d, cachestart, cacheend - cachestart)
962 972
963 973 return self._loadchunk(offset, length)
964 974
965 975 def _chunkraw(self, startrev, endrev):
966 976 start = self.start(startrev)
967 977 end = self.end(endrev)
968 978 if self._inline:
969 979 start += (startrev + 1) * self._io.size
970 980 end += (endrev + 1) * self._io.size
971 981 length = end - start
972 982 return self._getchunk(start, length)
973 983
974 984 def _chunk(self, rev):
975 985 return decompress(self._chunkraw(rev, rev))
976 986
977 987 def _chunks(self, revs):
978 988 '''faster version of [self._chunk(rev) for rev in revs]
979 989
980 990 Assumes that revs is in ascending order.'''
981 991 if not revs:
982 992 return []
983 993 start = self.start
984 994 length = self.length
985 995 inline = self._inline
986 996 iosize = self._io.size
987 997 buffer = util.buffer
988 998
989 999 l = []
990 1000 ladd = l.append
991 1001
992 1002 # preload the cache
993 1003 try:
994 1004 while True:
995 1005 # ensure that the cache doesn't change out from under us
996 1006 _cache = self._chunkcache
997 1007 self._chunkraw(revs[0], revs[-1])
998 1008 if _cache == self._chunkcache:
999 1009 break
1000 1010 offset, data = _cache
1001 1011 except OverflowError:
1002 1012 # issue4215 - we can't cache a run of chunks greater than
1003 1013 # 2G on Windows
1004 1014 return [self._chunk(rev) for rev in revs]
1005 1015
1006 1016 for rev in revs:
1007 1017 chunkstart = start(rev)
1008 1018 if inline:
1009 1019 chunkstart += (rev + 1) * iosize
1010 1020 chunklength = length(rev)
1011 1021 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1012 1022
1013 1023 return l
1014 1024
1015 1025 def _chunkclear(self):
1016 1026 self._chunkcache = (0, '')
1017 1027
1018 1028 def deltaparent(self, rev):
1019 1029 """return deltaparent of the given revision"""
1020 1030 base = self.index[rev][3]
1021 1031 if base == rev:
1022 1032 return nullrev
1023 1033 elif self._generaldelta:
1024 1034 return base
1025 1035 else:
1026 1036 return rev - 1
1027 1037
1028 1038 def revdiff(self, rev1, rev2):
1029 1039 """return or calculate a delta between two revisions"""
1030 1040 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1031 1041 return str(self._chunk(rev2))
1032 1042
1033 1043 return mdiff.textdiff(self.revision(rev1),
1034 1044 self.revision(rev2))
1035 1045
1036 1046 def revision(self, nodeorrev):
1037 1047 """return an uncompressed revision of a given node or revision
1038 1048 number.
1039 1049 """
1040 1050 if isinstance(nodeorrev, int):
1041 1051 rev = nodeorrev
1042 1052 node = self.node(rev)
1043 1053 else:
1044 1054 node = nodeorrev
1045 1055 rev = None
1046 1056
1047 1057 _cache = self._cache # grab local copy of cache to avoid thread race
1048 1058 cachedrev = None
1049 1059 if node == nullid:
1050 1060 return ""
1051 1061 if _cache:
1052 1062 if _cache[0] == node:
1053 1063 return _cache[2]
1054 1064 cachedrev = _cache[1]
1055 1065
1056 1066 # look up what we need to read
1057 1067 text = None
1058 1068 if rev is None:
1059 1069 rev = self.rev(node)
1060 1070
1061 1071 # check rev flags
1062 1072 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1063 1073 raise RevlogError(_('incompatible revision flag %x') %
1064 1074 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1065 1075
1066 1076 # build delta chain
1067 1077 chain = []
1068 1078 index = self.index # for performance
1069 1079 generaldelta = self._generaldelta
1070 1080 iterrev = rev
1071 1081 e = index[iterrev]
1072 1082 while iterrev != e[3] and iterrev != cachedrev:
1073 1083 chain.append(iterrev)
1074 1084 if generaldelta:
1075 1085 iterrev = e[3]
1076 1086 else:
1077 1087 iterrev -= 1
1078 1088 e = index[iterrev]
1079 1089
1080 1090 if iterrev == cachedrev:
1081 1091 # cache hit
1082 1092 text = _cache[2]
1083 1093 else:
1084 1094 chain.append(iterrev)
1085 1095 chain.reverse()
1086 1096
1087 1097 # drop cache to save memory
1088 1098 self._cache = None
1089 1099
1090 1100 bins = self._chunks(chain)
1091 1101 if text is None:
1092 1102 text = str(bins[0])
1093 1103 bins = bins[1:]
1094 1104
1095 1105 text = mdiff.patches(text, bins)
1096 1106
1097 1107 text = self._checkhash(text, node, rev)
1098 1108
1099 1109 self._cache = (node, rev, text)
1100 1110 return text
1101 1111
1102 1112 def hash(self, text, p1, p2):
1103 1113 """Compute a node hash.
1104 1114
1105 1115 Available as a function so that subclasses can replace the hash
1106 1116 as needed.
1107 1117 """
1108 1118 return hash(text, p1, p2)
1109 1119
1110 1120 def _checkhash(self, text, node, rev):
1111 1121 p1, p2 = self.parents(node)
1112 1122 self.checkhash(text, p1, p2, node, rev)
1113 1123 return text
1114 1124
1115 1125 def checkhash(self, text, p1, p2, node, rev=None):
1116 1126 if node != self.hash(text, p1, p2):
1117 1127 revornode = rev
1118 1128 if revornode is None:
1119 1129 revornode = templatefilters.short(hex(node))
1120 1130 raise RevlogError(_("integrity check failed on %s:%s")
1121 1131 % (self.indexfile, revornode))
1122 1132
1123 1133 def checkinlinesize(self, tr, fp=None):
1124 1134 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1125 1135 return
1126 1136
1127 1137 trinfo = tr.find(self.indexfile)
1128 1138 if trinfo is None:
1129 1139 raise RevlogError(_("%s not found in the transaction")
1130 1140 % self.indexfile)
1131 1141
1132 1142 trindex = trinfo[2]
1133 1143 if trindex is not None:
1134 1144 dataoff = self.start(trindex)
1135 1145 else:
1136 1146 # revlog was stripped at start of transaction, use all leftover data
1137 1147 trindex = len(self) - 1
1138 1148 dataoff = self.end(-2)
1139 1149
1140 1150 tr.add(self.datafile, dataoff)
1141 1151
1142 1152 if fp:
1143 1153 fp.flush()
1144 1154 fp.close()
1145 1155
1146 1156 df = self.opener(self.datafile, 'w')
1147 1157 try:
1148 1158 for r in self:
1149 1159 df.write(self._chunkraw(r, r))
1150 1160 finally:
1151 1161 df.close()
1152 1162
1153 1163 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1154 1164 self.version &= ~(REVLOGNGINLINEDATA)
1155 1165 self._inline = False
1156 1166 for i in self:
1157 1167 e = self._io.packentry(self.index[i], self.node, self.version, i)
1158 1168 fp.write(e)
1159 1169
1160 1170 # if we don't call close, the temp file will never replace the
1161 1171 # real index
1162 1172 fp.close()
1163 1173
1164 1174 tr.replace(self.indexfile, trindex * self._io.size)
1165 1175 self._chunkclear()
1166 1176
1167 1177 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1168 1178 node=None):
1169 1179 """add a revision to the log
1170 1180
1171 1181 text - the revision data to add
1172 1182 transaction - the transaction object used for rollback
1173 1183 link - the linkrev data to add
1174 1184 p1, p2 - the parent nodeids of the revision
1175 1185 cachedelta - an optional precomputed delta
1176 1186 node - nodeid of revision; typically node is not specified, and it is
1177 1187 computed by default as hash(text, p1, p2), however subclasses might
1178 1188 use different hashing method (and override checkhash() in such case)
1179 1189 """
1180 1190 if link == nullrev:
1181 1191 raise RevlogError(_("attempted to add linkrev -1 to %s")
1182 1192 % self.indexfile)
1183 1193 node = node or self.hash(text, p1, p2)
1184 1194 if node in self.nodemap:
1185 1195 return node
1186 1196
1187 1197 dfh = None
1188 1198 if not self._inline:
1189 1199 dfh = self.opener(self.datafile, "a")
1190 1200 ifh = self.opener(self.indexfile, "a+")
1191 1201 try:
1192 1202 return self._addrevision(node, text, transaction, link, p1, p2,
1193 1203 REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
1194 1204 finally:
1195 1205 if dfh:
1196 1206 dfh.close()
1197 1207 ifh.close()
1198 1208
1199 1209 def compress(self, text):
1200 1210 """ generate a possibly-compressed representation of text """
1201 1211 if not text:
1202 1212 return ("", text)
1203 1213 l = len(text)
1204 1214 bin = None
1205 1215 if l < 44:
1206 1216 pass
1207 1217 elif l > 1000000:
1208 1218 # zlib makes an internal copy, thus doubling memory usage for
1209 1219 # large files, so lets do this in pieces
1210 1220 z = zlib.compressobj()
1211 1221 p = []
1212 1222 pos = 0
1213 1223 while pos < l:
1214 1224 pos2 = pos + 2**20
1215 1225 p.append(z.compress(text[pos:pos2]))
1216 1226 pos = pos2
1217 1227 p.append(z.flush())
1218 1228 if sum(map(len, p)) < l:
1219 1229 bin = "".join(p)
1220 1230 else:
1221 1231 bin = _compress(text)
1222 1232 if bin is None or len(bin) > l:
1223 1233 if text[0] == '\0':
1224 1234 return ("", text)
1225 1235 return ('u', text)
1226 1236 return ("", bin)
1227 1237
1228 1238 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1229 1239 cachedelta, ifh, dfh):
1230 1240 """internal function to add revisions to the log
1231 1241
1232 1242 see addrevision for argument descriptions.
1233 1243 invariants:
1234 1244 - text is optional (can be None); if not set, cachedelta must be set.
1235 1245 if both are set, they must correspond to each other.
1236 1246 """
1237 1247 btext = [text]
1238 1248 def buildtext():
1239 1249 if btext[0] is not None:
1240 1250 return btext[0]
1241 1251 # flush any pending writes here so we can read it in revision
1242 1252 if dfh:
1243 1253 dfh.flush()
1244 1254 ifh.flush()
1245 1255 baserev = cachedelta[0]
1246 1256 delta = cachedelta[1]
1247 1257 # special case deltas which replace entire base; no need to decode
1248 1258 # base revision. this neatly avoids censored bases, which throw when
1249 1259 # they're decoded.
1250 1260 hlen = struct.calcsize(">lll")
1251 1261 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1252 1262 len(delta) - hlen):
1253 1263 btext[0] = delta[hlen:]
1254 1264 else:
1255 1265 basetext = self.revision(self.node(baserev))
1256 1266 btext[0] = mdiff.patch(basetext, delta)
1257 1267 try:
1258 1268 self.checkhash(btext[0], p1, p2, node)
1259 1269 if flags & REVIDX_ISCENSORED:
1260 1270 raise RevlogError(_('node %s is not censored') % node)
1261 1271 except CensoredNodeError:
1262 1272 # must pass the censored index flag to add censored revisions
1263 1273 if not flags & REVIDX_ISCENSORED:
1264 1274 raise
1265 1275 return btext[0]
1266 1276
1267 1277 def builddelta(rev):
1268 1278 # can we use the cached delta?
1269 1279 if cachedelta and cachedelta[0] == rev:
1270 1280 delta = cachedelta[1]
1271 1281 else:
1272 1282 t = buildtext()
1273 1283 if self.iscensored(rev):
1274 1284 # deltas based on a censored revision must replace the
1275 1285 # full content in one patch, so delta works everywhere
1276 1286 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1277 1287 delta = header + t
1278 1288 else:
1279 1289 ptext = self.revision(self.node(rev))
1280 1290 delta = mdiff.textdiff(ptext, t)
1281 1291 data = self.compress(delta)
1282 1292 l = len(data[1]) + len(data[0])
1283 1293 if basecache[0] == rev:
1284 1294 chainbase = basecache[1]
1285 1295 else:
1286 1296 chainbase = self.chainbase(rev)
1287 1297 dist = l + offset - self.start(chainbase)
1288 1298 if self._generaldelta:
1289 1299 base = rev
1290 1300 else:
1291 1301 base = chainbase
1292 1302 chainlen, compresseddeltalen = self._chaininfo(rev)
1293 1303 chainlen += 1
1294 1304 compresseddeltalen += l
1295 1305 return dist, l, data, base, chainbase, chainlen, compresseddeltalen
1296 1306
1297 1307 curr = len(self)
1298 1308 prev = curr - 1
1299 1309 base = chainbase = curr
1300 1310 chainlen = None
1301 1311 offset = self.end(prev)
1302 1312 d = None
1303 1313 if self._basecache is None:
1304 1314 self._basecache = (prev, self.chainbase(prev))
1305 1315 basecache = self._basecache
1306 1316 p1r, p2r = self.rev(p1), self.rev(p2)
1307 1317
1308 1318 # should we try to build a delta?
1309 1319 if prev != nullrev:
1310 1320 if self._generaldelta:
1311 1321 if p1r >= basecache[1]:
1312 1322 d = builddelta(p1r)
1313 1323 elif p2r >= basecache[1]:
1314 1324 d = builddelta(p2r)
1315 1325 else:
1316 1326 d = builddelta(prev)
1317 1327 else:
1318 1328 d = builddelta(prev)
1319 1329 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1320 1330
1321 1331 # full versions are inserted when the needed deltas
1322 1332 # become comparable to the uncompressed text
1323 1333 if text is None:
1324 1334 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1325 1335 cachedelta[1])
1326 1336 else:
1327 1337 textlen = len(text)
1328 1338
1329 1339 # - 'dist' is the distance from the base revision -- bounding it limits
1330 1340 # the amount of I/O we need to do.
1331 1341 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1332 1342 # to apply -- bounding it limits the amount of CPU we consume.
1333 1343 if (d is None or dist > textlen * 4 or l > textlen or
1334 1344 compresseddeltalen > textlen * 2 or
1335 1345 (self._maxchainlen and chainlen > self._maxchainlen)):
1336 1346 text = buildtext()
1337 1347 data = self.compress(text)
1338 1348 l = len(data[1]) + len(data[0])
1339 1349 base = chainbase = curr
1340 1350
1341 1351 e = (offset_type(offset, flags), l, textlen,
1342 1352 base, link, p1r, p2r, node)
1343 1353 self.index.insert(-1, e)
1344 1354 self.nodemap[node] = curr
1345 1355
1346 1356 entry = self._io.packentry(e, self.node, self.version, curr)
1347 1357 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1348 1358
1349 1359 if type(text) == str: # only accept immutable objects
1350 1360 self._cache = (node, curr, text)
1351 1361 self._basecache = (curr, chainbase)
1352 1362 return node
1353 1363
1354 1364 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1355 1365 curr = len(self) - 1
1356 1366 if not self._inline:
1357 1367 transaction.add(self.datafile, offset)
1358 1368 transaction.add(self.indexfile, curr * len(entry))
1359 1369 if data[0]:
1360 1370 dfh.write(data[0])
1361 1371 dfh.write(data[1])
1362 1372 dfh.flush()
1363 1373 ifh.write(entry)
1364 1374 else:
1365 1375 offset += curr * self._io.size
1366 1376 transaction.add(self.indexfile, offset, curr)
1367 1377 ifh.write(entry)
1368 1378 ifh.write(data[0])
1369 1379 ifh.write(data[1])
1370 1380 self.checkinlinesize(transaction, ifh)
1371 1381
1372 1382 def addgroup(self, bundle, linkmapper, transaction):
1373 1383 """
1374 1384 add a delta group
1375 1385
1376 1386 given a set of deltas, add them to the revision log. the
1377 1387 first delta is against its parent, which should be in our
1378 1388 log, the rest are against the previous delta.
1379 1389 """
1380 1390
1381 1391 # track the base of the current delta log
1382 1392 content = []
1383 1393 node = None
1384 1394
1385 1395 r = len(self)
1386 1396 end = 0
1387 1397 if r:
1388 1398 end = self.end(r - 1)
1389 1399 ifh = self.opener(self.indexfile, "a+")
1390 1400 isize = r * self._io.size
1391 1401 if self._inline:
1392 1402 transaction.add(self.indexfile, end + isize, r)
1393 1403 dfh = None
1394 1404 else:
1395 1405 transaction.add(self.indexfile, isize, r)
1396 1406 transaction.add(self.datafile, end)
1397 1407 dfh = self.opener(self.datafile, "a")
1398 1408 def flush():
1399 1409 if dfh:
1400 1410 dfh.flush()
1401 1411 ifh.flush()
1402 1412 try:
1403 1413 # loop through our set of deltas
1404 1414 chain = None
1405 1415 while True:
1406 1416 chunkdata = bundle.deltachunk(chain)
1407 1417 if not chunkdata:
1408 1418 break
1409 1419 node = chunkdata['node']
1410 1420 p1 = chunkdata['p1']
1411 1421 p2 = chunkdata['p2']
1412 1422 cs = chunkdata['cs']
1413 1423 deltabase = chunkdata['deltabase']
1414 1424 delta = chunkdata['delta']
1415 1425
1416 1426 content.append(node)
1417 1427
1418 1428 link = linkmapper(cs)
1419 1429 if node in self.nodemap:
1420 1430 # this can happen if two branches make the same change
1421 1431 chain = node
1422 1432 continue
1423 1433
1424 1434 for p in (p1, p2):
1425 1435 if p not in self.nodemap:
1426 1436 raise LookupError(p, self.indexfile,
1427 1437 _('unknown parent'))
1428 1438
1429 1439 if deltabase not in self.nodemap:
1430 1440 raise LookupError(deltabase, self.indexfile,
1431 1441 _('unknown delta base'))
1432 1442
1433 1443 baserev = self.rev(deltabase)
1434 1444
1435 1445 if baserev != nullrev and self.iscensored(baserev):
1436 1446 # if base is censored, delta must be full replacement in a
1437 1447 # single patch operation
1438 1448 hlen = struct.calcsize(">lll")
1439 1449 oldlen = self.rawsize(baserev)
1440 1450 newlen = len(delta) - hlen
1441 1451 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1442 1452 raise error.CensoredBaseError(self.indexfile,
1443 1453 self.node(baserev))
1444 1454
1445 1455 flags = REVIDX_DEFAULT_FLAGS
1446 1456 if self._peek_iscensored(baserev, delta, flush):
1447 1457 flags |= REVIDX_ISCENSORED
1448 1458
1449 1459 chain = self._addrevision(node, None, transaction, link,
1450 1460 p1, p2, flags, (baserev, delta),
1451 1461 ifh, dfh)
1452 1462 if not dfh and not self._inline:
1453 1463 # addrevision switched from inline to conventional
1454 1464 # reopen the index
1455 1465 ifh.close()
1456 1466 dfh = self.opener(self.datafile, "a")
1457 1467 ifh = self.opener(self.indexfile, "a")
1458 1468 finally:
1459 1469 if dfh:
1460 1470 dfh.close()
1461 1471 ifh.close()
1462 1472
1463 1473 return content
1464 1474
1465 1475 def iscensored(self, rev):
1466 1476 """Check if a file revision is censored."""
1467 1477 return False
1468 1478
1469 1479 def _peek_iscensored(self, baserev, delta, flush):
1470 1480 """Quickly check if a delta produces a censored revision."""
1471 1481 return False
1472 1482
1473 1483 def getstrippoint(self, minlink):
1474 1484 """find the minimum rev that must be stripped to strip the linkrev
1475 1485
1476 1486 Returns a tuple containing the minimum rev and a set of all revs that
1477 1487 have linkrevs that will be broken by this strip.
1478 1488 """
1479 1489 brokenrevs = set()
1480 1490 strippoint = len(self)
1481 1491
1482 1492 heads = {}
1483 1493 futurelargelinkrevs = set()
1484 1494 for head in self.headrevs():
1485 1495 headlinkrev = self.linkrev(head)
1486 1496 heads[head] = headlinkrev
1487 1497 if headlinkrev >= minlink:
1488 1498 futurelargelinkrevs.add(headlinkrev)
1489 1499
1490 1500 # This algorithm involves walking down the rev graph, starting at the
1491 1501 # heads. Since the revs are topologically sorted according to linkrev,
1492 1502 # once all head linkrevs are below the minlink, we know there are
1493 1503 # no more revs that could have a linkrev greater than minlink.
1494 1504 # So we can stop walking.
1495 1505 while futurelargelinkrevs:
1496 1506 strippoint -= 1
1497 1507 linkrev = heads.pop(strippoint)
1498 1508
1499 1509 if linkrev < minlink:
1500 1510 brokenrevs.add(strippoint)
1501 1511 else:
1502 1512 futurelargelinkrevs.remove(linkrev)
1503 1513
1504 1514 for p in self.parentrevs(strippoint):
1505 1515 if p != nullrev:
1506 1516 plinkrev = self.linkrev(p)
1507 1517 heads[p] = plinkrev
1508 1518 if plinkrev >= minlink:
1509 1519 futurelargelinkrevs.add(plinkrev)
1510 1520
1511 1521 return strippoint, brokenrevs
1512 1522
1513 1523 def strip(self, minlink, transaction):
1514 1524 """truncate the revlog on the first revision with a linkrev >= minlink
1515 1525
1516 1526 This function is called when we're stripping revision minlink and
1517 1527 its descendants from the repository.
1518 1528
1519 1529 We have to remove all revisions with linkrev >= minlink, because
1520 1530 the equivalent changelog revisions will be renumbered after the
1521 1531 strip.
1522 1532
1523 1533 So we truncate the revlog on the first of these revisions, and
1524 1534 trust that the caller has saved the revisions that shouldn't be
1525 1535 removed and that it'll re-add them after this truncation.
1526 1536 """
1527 1537 if len(self) == 0:
1528 1538 return
1529 1539
1530 1540 rev, _ = self.getstrippoint(minlink)
1531 1541 if rev == len(self):
1532 1542 return
1533 1543
1534 1544 # first truncate the files on disk
1535 1545 end = self.start(rev)
1536 1546 if not self._inline:
1537 1547 transaction.add(self.datafile, end)
1538 1548 end = rev * self._io.size
1539 1549 else:
1540 1550 end += rev * self._io.size
1541 1551
1542 1552 transaction.add(self.indexfile, end)
1543 1553
1544 1554 # then reset internal state in memory to forget those revisions
1545 1555 self._cache = None
1546 1556 self._chaininfocache = {}
1547 1557 self._chunkclear()
1548 1558 for x in xrange(rev, len(self)):
1549 1559 del self.nodemap[self.node(x)]
1550 1560
1551 1561 del self.index[rev:-1]
1552 1562
1553 1563 def checksize(self):
1554 1564 expected = 0
1555 1565 if len(self):
1556 1566 expected = max(0, self.end(len(self) - 1))
1557 1567
1558 1568 try:
1559 1569 f = self.opener(self.datafile)
1560 1570 f.seek(0, 2)
1561 1571 actual = f.tell()
1562 1572 f.close()
1563 1573 dd = actual - expected
1564 1574 except IOError, inst:
1565 1575 if inst.errno != errno.ENOENT:
1566 1576 raise
1567 1577 dd = 0
1568 1578
1569 1579 try:
1570 1580 f = self.opener(self.indexfile)
1571 1581 f.seek(0, 2)
1572 1582 actual = f.tell()
1573 1583 f.close()
1574 1584 s = self._io.size
1575 1585 i = max(0, actual // s)
1576 1586 di = actual - (i * s)
1577 1587 if self._inline:
1578 1588 databytes = 0
1579 1589 for r in self:
1580 1590 databytes += max(0, self.length(r))
1581 1591 dd = 0
1582 1592 di = actual - len(self) * s - databytes
1583 1593 except IOError, inst:
1584 1594 if inst.errno != errno.ENOENT:
1585 1595 raise
1586 1596 di = 0
1587 1597
1588 1598 return (dd, di)
1589 1599
1590 1600 def files(self):
1591 1601 res = [self.indexfile]
1592 1602 if not self._inline:
1593 1603 res.append(self.datafile)
1594 1604 return res
General Comments 0
You need to be logged in to leave comments. Login now