##// END OF EJS Templates
revlog: move size limit check to addrevision...
Matt Mackall -
r25459:0bda5bfa default
parent child Browse files
Show More
@@ -1,1604 +1,1604
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 # import stuff from node for others to import from revlog
15 15 import collections
16 16 from node import bin, hex, nullid, nullrev
17 17 from i18n import _
18 18 import ancestor, mdiff, parsers, error, util, templatefilters
19 19 import struct, zlib, errno
20 20
21 21 _pack = struct.pack
22 22 _unpack = struct.unpack
23 23 _compress = zlib.compress
24 24 _decompress = zlib.decompress
25 25 _sha = util.sha1
26 26
27 27 # revlog header flags
28 28 REVLOGV0 = 0
29 29 REVLOGNG = 1
30 30 REVLOGNGINLINEDATA = (1 << 16)
31 31 REVLOGGENERALDELTA = (1 << 17)
32 32 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
33 33 REVLOG_DEFAULT_FORMAT = REVLOGNG
34 34 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
35 35 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
36 36
37 37 # revlog index flags
38 38 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
39 39 REVIDX_DEFAULT_FLAGS = 0
40 40 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
41 41
42 42 # max size of revlog with inline data
43 43 _maxinline = 131072
44 44 _chunksize = 1048576
45 45
46 46 RevlogError = error.RevlogError
47 47 LookupError = error.LookupError
48 48 CensoredNodeError = error.CensoredNodeError
49 49
50 50 def getoffset(q):
51 51 return int(q >> 16)
52 52
53 53 def gettype(q):
54 54 return int(q & 0xFFFF)
55 55
56 56 def offset_type(offset, type):
57 57 return long(long(offset) << 16 | type)
58 58
59 59 _nullhash = _sha(nullid)
60 60
61 61 def hash(text, p1, p2):
62 62 """generate a hash from the given text and its parent hashes
63 63
64 64 This hash combines both the current file contents and its history
65 65 in a manner that makes it easy to distinguish nodes with the same
66 66 content in the revision graph.
67 67 """
68 68 # As of now, if one of the parent node is null, p2 is null
69 69 if p2 == nullid:
70 70 # deep copy of a hash is faster than creating one
71 71 s = _nullhash.copy()
72 72 s.update(p1)
73 73 else:
74 74 # none of the parent nodes are nullid
75 75 l = [p1, p2]
76 76 l.sort()
77 77 s = _sha(l[0])
78 78 s.update(l[1])
79 79 s.update(text)
80 80 return s.digest()
81 81
82 82 def decompress(bin):
83 83 """ decompress the given input """
84 84 if not bin:
85 85 return bin
86 86 t = bin[0]
87 87 if t == '\0':
88 88 return bin
89 89 if t == 'x':
90 90 try:
91 91 return _decompress(bin)
92 92 except zlib.error, e:
93 93 raise RevlogError(_("revlog decompress error: %s") % str(e))
94 94 if t == 'u':
95 95 return bin[1:]
96 96 raise RevlogError(_("unknown compression type %r") % t)
97 97
98 98 # index v0:
99 99 # 4 bytes: offset
100 100 # 4 bytes: compressed length
101 101 # 4 bytes: base rev
102 102 # 4 bytes: link rev
103 103 # 32 bytes: parent 1 nodeid
104 104 # 32 bytes: parent 2 nodeid
105 105 # 32 bytes: nodeid
106 106 indexformatv0 = ">4l20s20s20s"
107 107 v0shaoffset = 56
108 108
109 109 class revlogoldio(object):
110 110 def __init__(self):
111 111 self.size = struct.calcsize(indexformatv0)
112 112
113 113 def parseindex(self, data, inline):
114 114 s = self.size
115 115 index = []
116 116 nodemap = {nullid: nullrev}
117 117 n = off = 0
118 118 l = len(data)
119 119 while off + s <= l:
120 120 cur = data[off:off + s]
121 121 off += s
122 122 e = _unpack(indexformatv0, cur)
123 123 # transform to revlogv1 format
124 124 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
125 125 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
126 126 index.append(e2)
127 127 nodemap[e[6]] = n
128 128 n += 1
129 129
130 130 # add the magic null revision at -1
131 131 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
132 132
133 133 return index, nodemap, None
134 134
135 135 def packentry(self, entry, node, version, rev):
136 136 if gettype(entry[0]):
137 137 raise RevlogError(_("index entry flags need RevlogNG"))
138 138 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
139 139 node(entry[5]), node(entry[6]), entry[7])
140 140 return _pack(indexformatv0, *e2)
141 141
142 142 # index ng:
143 143 # 6 bytes: offset
144 144 # 2 bytes: flags
145 145 # 4 bytes: compressed length
146 146 # 4 bytes: uncompressed length
147 147 # 4 bytes: base rev
148 148 # 4 bytes: link rev
149 149 # 4 bytes: parent 1 rev
150 150 # 4 bytes: parent 2 rev
151 151 # 32 bytes: nodeid
152 152 indexformatng = ">Qiiiiii20s12x"
153 153 ngshaoffset = 32
154 154 versionformat = ">I"
155 155
156 156 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
157 157 # signed integer)
158 158 _maxentrysize = 0x7fffffff
159 159
160 160 class revlogio(object):
161 161 def __init__(self):
162 162 self.size = struct.calcsize(indexformatng)
163 163
164 164 def parseindex(self, data, inline):
165 165 # call the C implementation to parse the index data
166 166 index, cache = parsers.parse_index2(data, inline)
167 167 return index, getattr(index, 'nodemap', None), cache
168 168
169 169 def packentry(self, entry, node, version, rev):
170 uncompressedlength = entry[2]
171 if uncompressedlength > _maxentrysize:
172 raise RevlogError(
173 _("size of %d bytes exceeds maximum revlog storage of 2GiB")
174 % uncompressedlength)
175
176 170 p = _pack(indexformatng, *entry)
177 171 if rev == 0:
178 172 p = _pack(versionformat, version) + p[4:]
179 173 return p
180 174
181 175 class revlog(object):
182 176 """
183 177 the underlying revision storage object
184 178
185 179 A revlog consists of two parts, an index and the revision data.
186 180
187 181 The index is a file with a fixed record size containing
188 182 information on each revision, including its nodeid (hash), the
189 183 nodeids of its parents, the position and offset of its data within
190 184 the data file, and the revision it's based on. Finally, each entry
191 185 contains a linkrev entry that can serve as a pointer to external
192 186 data.
193 187
194 188 The revision data itself is a linear collection of data chunks.
195 189 Each chunk represents a revision and is usually represented as a
196 190 delta against the previous chunk. To bound lookup time, runs of
197 191 deltas are limited to about 2 times the length of the original
198 192 version data. This makes retrieval of a version proportional to
199 193 its size, or O(1) relative to the number of revisions.
200 194
201 195 Both pieces of the revlog are written to in an append-only
202 196 fashion, which means we never need to rewrite a file to insert or
203 197 remove data, and can use some simple techniques to avoid the need
204 198 for locking while reading.
205 199 """
206 200 def __init__(self, opener, indexfile):
207 201 """
208 202 create a revlog object
209 203
210 204 opener is a function that abstracts the file opening operation
211 205 and can be used to implement COW semantics or the like.
212 206 """
213 207 self.indexfile = indexfile
214 208 self.datafile = indexfile[:-2] + ".d"
215 209 self.opener = opener
216 210 self._cache = None
217 211 self._basecache = None
218 212 self._chunkcache = (0, '')
219 213 self._chunkcachesize = 65536
220 214 self._maxchainlen = None
221 215 self.index = []
222 216 self._pcache = {}
223 217 self._nodecache = {nullid: nullrev}
224 218 self._nodepos = None
225 219
226 220 v = REVLOG_DEFAULT_VERSION
227 221 opts = getattr(opener, 'options', None)
228 222 if opts is not None:
229 223 if 'revlogv1' in opts:
230 224 if 'generaldelta' in opts:
231 225 v |= REVLOGGENERALDELTA
232 226 else:
233 227 v = 0
234 228 if 'chunkcachesize' in opts:
235 229 self._chunkcachesize = opts['chunkcachesize']
236 230 if 'maxchainlen' in opts:
237 231 self._maxchainlen = opts['maxchainlen']
238 232
239 233 if self._chunkcachesize <= 0:
240 234 raise RevlogError(_('revlog chunk cache size %r is not greater '
241 235 'than 0') % self._chunkcachesize)
242 236 elif self._chunkcachesize & (self._chunkcachesize - 1):
243 237 raise RevlogError(_('revlog chunk cache size %r is not a power '
244 238 'of 2') % self._chunkcachesize)
245 239
246 240 i = ''
247 241 self._initempty = True
248 242 try:
249 243 f = self.opener(self.indexfile)
250 244 i = f.read()
251 245 f.close()
252 246 if len(i) > 0:
253 247 v = struct.unpack(versionformat, i[:4])[0]
254 248 self._initempty = False
255 249 except IOError, inst:
256 250 if inst.errno != errno.ENOENT:
257 251 raise
258 252
259 253 self.version = v
260 254 self._inline = v & REVLOGNGINLINEDATA
261 255 self._generaldelta = v & REVLOGGENERALDELTA
262 256 flags = v & ~0xFFFF
263 257 fmt = v & 0xFFFF
264 258 if fmt == REVLOGV0 and flags:
265 259 raise RevlogError(_("index %s unknown flags %#04x for format v0")
266 260 % (self.indexfile, flags >> 16))
267 261 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
268 262 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
269 263 % (self.indexfile, flags >> 16))
270 264 elif fmt > REVLOGNG:
271 265 raise RevlogError(_("index %s unknown format %d")
272 266 % (self.indexfile, fmt))
273 267
274 268 self._io = revlogio()
275 269 if self.version == REVLOGV0:
276 270 self._io = revlogoldio()
277 271 try:
278 272 d = self._io.parseindex(i, self._inline)
279 273 except (ValueError, IndexError):
280 274 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
281 275 self.index, nodemap, self._chunkcache = d
282 276 if nodemap is not None:
283 277 self.nodemap = self._nodecache = nodemap
284 278 if not self._chunkcache:
285 279 self._chunkclear()
286 280 # revnum -> (chain-length, sum-delta-length)
287 281 self._chaininfocache = {}
288 282
289 283 def tip(self):
290 284 return self.node(len(self.index) - 2)
291 285 def __contains__(self, rev):
292 286 return 0 <= rev < len(self)
293 287 def __len__(self):
294 288 return len(self.index) - 1
295 289 def __iter__(self):
296 290 return iter(xrange(len(self)))
297 291 def revs(self, start=0, stop=None):
298 292 """iterate over all rev in this revlog (from start to stop)"""
299 293 step = 1
300 294 if stop is not None:
301 295 if start > stop:
302 296 step = -1
303 297 stop += step
304 298 else:
305 299 stop = len(self)
306 300 return xrange(start, stop, step)
307 301
308 302 @util.propertycache
309 303 def nodemap(self):
310 304 self.rev(self.node(0))
311 305 return self._nodecache
312 306
313 307 def hasnode(self, node):
314 308 try:
315 309 self.rev(node)
316 310 return True
317 311 except KeyError:
318 312 return False
319 313
320 314 def clearcaches(self):
321 315 try:
322 316 self._nodecache.clearcaches()
323 317 except AttributeError:
324 318 self._nodecache = {nullid: nullrev}
325 319 self._nodepos = None
326 320
327 321 def rev(self, node):
328 322 try:
329 323 return self._nodecache[node]
330 324 except TypeError:
331 325 raise
332 326 except RevlogError:
333 327 # parsers.c radix tree lookup failed
334 328 raise LookupError(node, self.indexfile, _('no node'))
335 329 except KeyError:
336 330 # pure python cache lookup failed
337 331 n = self._nodecache
338 332 i = self.index
339 333 p = self._nodepos
340 334 if p is None:
341 335 p = len(i) - 2
342 336 for r in xrange(p, -1, -1):
343 337 v = i[r][7]
344 338 n[v] = r
345 339 if v == node:
346 340 self._nodepos = r - 1
347 341 return r
348 342 raise LookupError(node, self.indexfile, _('no node'))
349 343
350 344 def node(self, rev):
351 345 return self.index[rev][7]
352 346 def linkrev(self, rev):
353 347 return self.index[rev][4]
354 348 def parents(self, node):
355 349 i = self.index
356 350 d = i[self.rev(node)]
357 351 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
358 352 def parentrevs(self, rev):
359 353 return self.index[rev][5:7]
360 354 def start(self, rev):
361 355 return int(self.index[rev][0] >> 16)
362 356 def end(self, rev):
363 357 return self.start(rev) + self.length(rev)
364 358 def length(self, rev):
365 359 return self.index[rev][1]
366 360 def chainbase(self, rev):
367 361 index = self.index
368 362 base = index[rev][3]
369 363 while base != rev:
370 364 rev = base
371 365 base = index[rev][3]
372 366 return base
373 367 def chainlen(self, rev):
374 368 return self._chaininfo(rev)[0]
375 369
376 370 def _chaininfo(self, rev):
377 371 chaininfocache = self._chaininfocache
378 372 if rev in chaininfocache:
379 373 return chaininfocache[rev]
380 374 index = self.index
381 375 generaldelta = self._generaldelta
382 376 iterrev = rev
383 377 e = index[iterrev]
384 378 clen = 0
385 379 compresseddeltalen = 0
386 380 while iterrev != e[3]:
387 381 clen += 1
388 382 compresseddeltalen += e[1]
389 383 if generaldelta:
390 384 iterrev = e[3]
391 385 else:
392 386 iterrev -= 1
393 387 if iterrev in chaininfocache:
394 388 t = chaininfocache[iterrev]
395 389 clen += t[0]
396 390 compresseddeltalen += t[1]
397 391 break
398 392 e = index[iterrev]
399 393 else:
400 394 # Add text length of base since decompressing that also takes
401 395 # work. For cache hits the length is already included.
402 396 compresseddeltalen += e[1]
403 397 r = (clen, compresseddeltalen)
404 398 chaininfocache[rev] = r
405 399 return r
406 400
407 401 def flags(self, rev):
408 402 return self.index[rev][0] & 0xFFFF
409 403 def rawsize(self, rev):
410 404 """return the length of the uncompressed text for a given revision"""
411 405 l = self.index[rev][2]
412 406 if l >= 0:
413 407 return l
414 408
415 409 t = self.revision(self.node(rev))
416 410 return len(t)
417 411 size = rawsize
418 412
419 413 def ancestors(self, revs, stoprev=0, inclusive=False):
420 414 """Generate the ancestors of 'revs' in reverse topological order.
421 415 Does not generate revs lower than stoprev.
422 416
423 417 See the documentation for ancestor.lazyancestors for more details."""
424 418
425 419 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
426 420 inclusive=inclusive)
427 421
428 422 def descendants(self, revs):
429 423 """Generate the descendants of 'revs' in revision order.
430 424
431 425 Yield a sequence of revision numbers starting with a child of
432 426 some rev in revs, i.e., each revision is *not* considered a
433 427 descendant of itself. Results are ordered by revision number (a
434 428 topological sort)."""
435 429 first = min(revs)
436 430 if first == nullrev:
437 431 for i in self:
438 432 yield i
439 433 return
440 434
441 435 seen = set(revs)
442 436 for i in self.revs(start=first + 1):
443 437 for x in self.parentrevs(i):
444 438 if x != nullrev and x in seen:
445 439 seen.add(i)
446 440 yield i
447 441 break
448 442
449 443 def findcommonmissing(self, common=None, heads=None):
450 444 """Return a tuple of the ancestors of common and the ancestors of heads
451 445 that are not ancestors of common. In revset terminology, we return the
452 446 tuple:
453 447
454 448 ::common, (::heads) - (::common)
455 449
456 450 The list is sorted by revision number, meaning it is
457 451 topologically sorted.
458 452
459 453 'heads' and 'common' are both lists of node IDs. If heads is
460 454 not supplied, uses all of the revlog's heads. If common is not
461 455 supplied, uses nullid."""
462 456 if common is None:
463 457 common = [nullid]
464 458 if heads is None:
465 459 heads = self.heads()
466 460
467 461 common = [self.rev(n) for n in common]
468 462 heads = [self.rev(n) for n in heads]
469 463
470 464 # we want the ancestors, but inclusive
471 465 class lazyset(object):
472 466 def __init__(self, lazyvalues):
473 467 self.addedvalues = set()
474 468 self.lazyvalues = lazyvalues
475 469
476 470 def __contains__(self, value):
477 471 return value in self.addedvalues or value in self.lazyvalues
478 472
479 473 def __iter__(self):
480 474 added = self.addedvalues
481 475 for r in added:
482 476 yield r
483 477 for r in self.lazyvalues:
484 478 if not r in added:
485 479 yield r
486 480
487 481 def add(self, value):
488 482 self.addedvalues.add(value)
489 483
490 484 def update(self, values):
491 485 self.addedvalues.update(values)
492 486
493 487 has = lazyset(self.ancestors(common))
494 488 has.add(nullrev)
495 489 has.update(common)
496 490
497 491 # take all ancestors from heads that aren't in has
498 492 missing = set()
499 493 visit = collections.deque(r for r in heads if r not in has)
500 494 while visit:
501 495 r = visit.popleft()
502 496 if r in missing:
503 497 continue
504 498 else:
505 499 missing.add(r)
506 500 for p in self.parentrevs(r):
507 501 if p not in has:
508 502 visit.append(p)
509 503 missing = list(missing)
510 504 missing.sort()
511 505 return has, [self.node(r) for r in missing]
512 506
513 507 def incrementalmissingrevs(self, common=None):
514 508 """Return an object that can be used to incrementally compute the
515 509 revision numbers of the ancestors of arbitrary sets that are not
516 510 ancestors of common. This is an ancestor.incrementalmissingancestors
517 511 object.
518 512
519 513 'common' is a list of revision numbers. If common is not supplied, uses
520 514 nullrev.
521 515 """
522 516 if common is None:
523 517 common = [nullrev]
524 518
525 519 return ancestor.incrementalmissingancestors(self.parentrevs, common)
526 520
527 521 def findmissingrevs(self, common=None, heads=None):
528 522 """Return the revision numbers of the ancestors of heads that
529 523 are not ancestors of common.
530 524
531 525 More specifically, return a list of revision numbers corresponding to
532 526 nodes N such that every N satisfies the following constraints:
533 527
534 528 1. N is an ancestor of some node in 'heads'
535 529 2. N is not an ancestor of any node in 'common'
536 530
537 531 The list is sorted by revision number, meaning it is
538 532 topologically sorted.
539 533
540 534 'heads' and 'common' are both lists of revision numbers. If heads is
541 535 not supplied, uses all of the revlog's heads. If common is not
542 536 supplied, uses nullid."""
543 537 if common is None:
544 538 common = [nullrev]
545 539 if heads is None:
546 540 heads = self.headrevs()
547 541
548 542 inc = self.incrementalmissingrevs(common=common)
549 543 return inc.missingancestors(heads)
550 544
551 545 def findmissing(self, common=None, heads=None):
552 546 """Return the ancestors of heads that are not ancestors of common.
553 547
554 548 More specifically, return a list of nodes N such that every N
555 549 satisfies the following constraints:
556 550
557 551 1. N is an ancestor of some node in 'heads'
558 552 2. N is not an ancestor of any node in 'common'
559 553
560 554 The list is sorted by revision number, meaning it is
561 555 topologically sorted.
562 556
563 557 'heads' and 'common' are both lists of node IDs. If heads is
564 558 not supplied, uses all of the revlog's heads. If common is not
565 559 supplied, uses nullid."""
566 560 if common is None:
567 561 common = [nullid]
568 562 if heads is None:
569 563 heads = self.heads()
570 564
571 565 common = [self.rev(n) for n in common]
572 566 heads = [self.rev(n) for n in heads]
573 567
574 568 inc = self.incrementalmissingrevs(common=common)
575 569 return [self.node(r) for r in inc.missingancestors(heads)]
576 570
577 571 def nodesbetween(self, roots=None, heads=None):
578 572 """Return a topological path from 'roots' to 'heads'.
579 573
580 574 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
581 575 topologically sorted list of all nodes N that satisfy both of
582 576 these constraints:
583 577
584 578 1. N is a descendant of some node in 'roots'
585 579 2. N is an ancestor of some node in 'heads'
586 580
587 581 Every node is considered to be both a descendant and an ancestor
588 582 of itself, so every reachable node in 'roots' and 'heads' will be
589 583 included in 'nodes'.
590 584
591 585 'outroots' is the list of reachable nodes in 'roots', i.e., the
592 586 subset of 'roots' that is returned in 'nodes'. Likewise,
593 587 'outheads' is the subset of 'heads' that is also in 'nodes'.
594 588
595 589 'roots' and 'heads' are both lists of node IDs. If 'roots' is
596 590 unspecified, uses nullid as the only root. If 'heads' is
597 591 unspecified, uses list of all of the revlog's heads."""
598 592 nonodes = ([], [], [])
599 593 if roots is not None:
600 594 roots = list(roots)
601 595 if not roots:
602 596 return nonodes
603 597 lowestrev = min([self.rev(n) for n in roots])
604 598 else:
605 599 roots = [nullid] # Everybody's a descendant of nullid
606 600 lowestrev = nullrev
607 601 if (lowestrev == nullrev) and (heads is None):
608 602 # We want _all_ the nodes!
609 603 return ([self.node(r) for r in self], [nullid], list(self.heads()))
610 604 if heads is None:
611 605 # All nodes are ancestors, so the latest ancestor is the last
612 606 # node.
613 607 highestrev = len(self) - 1
614 608 # Set ancestors to None to signal that every node is an ancestor.
615 609 ancestors = None
616 610 # Set heads to an empty dictionary for later discovery of heads
617 611 heads = {}
618 612 else:
619 613 heads = list(heads)
620 614 if not heads:
621 615 return nonodes
622 616 ancestors = set()
623 617 # Turn heads into a dictionary so we can remove 'fake' heads.
624 618 # Also, later we will be using it to filter out the heads we can't
625 619 # find from roots.
626 620 heads = dict.fromkeys(heads, False)
627 621 # Start at the top and keep marking parents until we're done.
628 622 nodestotag = set(heads)
629 623 # Remember where the top was so we can use it as a limit later.
630 624 highestrev = max([self.rev(n) for n in nodestotag])
631 625 while nodestotag:
632 626 # grab a node to tag
633 627 n = nodestotag.pop()
634 628 # Never tag nullid
635 629 if n == nullid:
636 630 continue
637 631 # A node's revision number represents its place in a
638 632 # topologically sorted list of nodes.
639 633 r = self.rev(n)
640 634 if r >= lowestrev:
641 635 if n not in ancestors:
642 636 # If we are possibly a descendant of one of the roots
643 637 # and we haven't already been marked as an ancestor
644 638 ancestors.add(n) # Mark as ancestor
645 639 # Add non-nullid parents to list of nodes to tag.
646 640 nodestotag.update([p for p in self.parents(n) if
647 641 p != nullid])
648 642 elif n in heads: # We've seen it before, is it a fake head?
649 643 # So it is, real heads should not be the ancestors of
650 644 # any other heads.
651 645 heads.pop(n)
652 646 if not ancestors:
653 647 return nonodes
654 648 # Now that we have our set of ancestors, we want to remove any
655 649 # roots that are not ancestors.
656 650
657 651 # If one of the roots was nullid, everything is included anyway.
658 652 if lowestrev > nullrev:
659 653 # But, since we weren't, let's recompute the lowest rev to not
660 654 # include roots that aren't ancestors.
661 655
662 656 # Filter out roots that aren't ancestors of heads
663 657 roots = [n for n in roots if n in ancestors]
664 658 # Recompute the lowest revision
665 659 if roots:
666 660 lowestrev = min([self.rev(n) for n in roots])
667 661 else:
668 662 # No more roots? Return empty list
669 663 return nonodes
670 664 else:
671 665 # We are descending from nullid, and don't need to care about
672 666 # any other roots.
673 667 lowestrev = nullrev
674 668 roots = [nullid]
675 669 # Transform our roots list into a set.
676 670 descendants = set(roots)
677 671 # Also, keep the original roots so we can filter out roots that aren't
678 672 # 'real' roots (i.e. are descended from other roots).
679 673 roots = descendants.copy()
680 674 # Our topologically sorted list of output nodes.
681 675 orderedout = []
682 676 # Don't start at nullid since we don't want nullid in our output list,
683 677 # and if nullid shows up in descendants, empty parents will look like
684 678 # they're descendants.
685 679 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
686 680 n = self.node(r)
687 681 isdescendant = False
688 682 if lowestrev == nullrev: # Everybody is a descendant of nullid
689 683 isdescendant = True
690 684 elif n in descendants:
691 685 # n is already a descendant
692 686 isdescendant = True
693 687 # This check only needs to be done here because all the roots
694 688 # will start being marked is descendants before the loop.
695 689 if n in roots:
696 690 # If n was a root, check if it's a 'real' root.
697 691 p = tuple(self.parents(n))
698 692 # If any of its parents are descendants, it's not a root.
699 693 if (p[0] in descendants) or (p[1] in descendants):
700 694 roots.remove(n)
701 695 else:
702 696 p = tuple(self.parents(n))
703 697 # A node is a descendant if either of its parents are
704 698 # descendants. (We seeded the dependents list with the roots
705 699 # up there, remember?)
706 700 if (p[0] in descendants) or (p[1] in descendants):
707 701 descendants.add(n)
708 702 isdescendant = True
709 703 if isdescendant and ((ancestors is None) or (n in ancestors)):
710 704 # Only include nodes that are both descendants and ancestors.
711 705 orderedout.append(n)
712 706 if (ancestors is not None) and (n in heads):
713 707 # We're trying to figure out which heads are reachable
714 708 # from roots.
715 709 # Mark this head as having been reached
716 710 heads[n] = True
717 711 elif ancestors is None:
718 712 # Otherwise, we're trying to discover the heads.
719 713 # Assume this is a head because if it isn't, the next step
720 714 # will eventually remove it.
721 715 heads[n] = True
722 716 # But, obviously its parents aren't.
723 717 for p in self.parents(n):
724 718 heads.pop(p, None)
725 719 heads = [n for n, flag in heads.iteritems() if flag]
726 720 roots = list(roots)
727 721 assert orderedout
728 722 assert roots
729 723 assert heads
730 724 return (orderedout, roots, heads)
731 725
732 726 def headrevs(self):
733 727 try:
734 728 return self.index.headrevs()
735 729 except AttributeError:
736 730 return self._headrevs()
737 731
738 732 def computephases(self, roots):
739 733 return self.index.computephasesmapsets(roots)
740 734
741 735 def _headrevs(self):
742 736 count = len(self)
743 737 if not count:
744 738 return [nullrev]
745 739 # we won't iter over filtered rev so nobody is a head at start
746 740 ishead = [0] * (count + 1)
747 741 index = self.index
748 742 for r in self:
749 743 ishead[r] = 1 # I may be an head
750 744 e = index[r]
751 745 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
752 746 return [r for r, val in enumerate(ishead) if val]
753 747
754 748 def heads(self, start=None, stop=None):
755 749 """return the list of all nodes that have no children
756 750
757 751 if start is specified, only heads that are descendants of
758 752 start will be returned
759 753 if stop is specified, it will consider all the revs from stop
760 754 as if they had no children
761 755 """
762 756 if start is None and stop is None:
763 757 if not len(self):
764 758 return [nullid]
765 759 return [self.node(r) for r in self.headrevs()]
766 760
767 761 if start is None:
768 762 start = nullid
769 763 if stop is None:
770 764 stop = []
771 765 stoprevs = set([self.rev(n) for n in stop])
772 766 startrev = self.rev(start)
773 767 reachable = set((startrev,))
774 768 heads = set((startrev,))
775 769
776 770 parentrevs = self.parentrevs
777 771 for r in self.revs(start=startrev + 1):
778 772 for p in parentrevs(r):
779 773 if p in reachable:
780 774 if r not in stoprevs:
781 775 reachable.add(r)
782 776 heads.add(r)
783 777 if p in heads and p not in stoprevs:
784 778 heads.remove(p)
785 779
786 780 return [self.node(r) for r in heads]
787 781
788 782 def children(self, node):
789 783 """find the children of a given node"""
790 784 c = []
791 785 p = self.rev(node)
792 786 for r in self.revs(start=p + 1):
793 787 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
794 788 if prevs:
795 789 for pr in prevs:
796 790 if pr == p:
797 791 c.append(self.node(r))
798 792 elif p == nullrev:
799 793 c.append(self.node(r))
800 794 return c
801 795
802 796 def descendant(self, start, end):
803 797 if start == nullrev:
804 798 return True
805 799 for i in self.descendants([start]):
806 800 if i == end:
807 801 return True
808 802 elif i > end:
809 803 break
810 804 return False
811 805
812 806 def commonancestorsheads(self, a, b):
813 807 """calculate all the heads of the common ancestors of nodes a and b"""
814 808 a, b = self.rev(a), self.rev(b)
815 809 try:
816 810 ancs = self.index.commonancestorsheads(a, b)
817 811 except (AttributeError, OverflowError): # C implementation failed
818 812 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
819 813 return map(self.node, ancs)
820 814
821 815 def isancestor(self, a, b):
822 816 """return True if node a is an ancestor of node b
823 817
824 818 The implementation of this is trivial but the use of
825 819 commonancestorsheads is not."""
826 820 return a in self.commonancestorsheads(a, b)
827 821
828 822 def ancestor(self, a, b):
829 823 """calculate the "best" common ancestor of nodes a and b"""
830 824
831 825 a, b = self.rev(a), self.rev(b)
832 826 try:
833 827 ancs = self.index.ancestors(a, b)
834 828 except (AttributeError, OverflowError):
835 829 ancs = ancestor.ancestors(self.parentrevs, a, b)
836 830 if ancs:
837 831 # choose a consistent winner when there's a tie
838 832 return min(map(self.node, ancs))
839 833 return nullid
840 834
841 835 def _match(self, id):
842 836 if isinstance(id, int):
843 837 # rev
844 838 return self.node(id)
845 839 if len(id) == 20:
846 840 # possibly a binary node
847 841 # odds of a binary node being all hex in ASCII are 1 in 10**25
848 842 try:
849 843 node = id
850 844 self.rev(node) # quick search the index
851 845 return node
852 846 except LookupError:
853 847 pass # may be partial hex id
854 848 try:
855 849 # str(rev)
856 850 rev = int(id)
857 851 if str(rev) != id:
858 852 raise ValueError
859 853 if rev < 0:
860 854 rev = len(self) + rev
861 855 if rev < 0 or rev >= len(self):
862 856 raise ValueError
863 857 return self.node(rev)
864 858 except (ValueError, OverflowError):
865 859 pass
866 860 if len(id) == 40:
867 861 try:
868 862 # a full hex nodeid?
869 863 node = bin(id)
870 864 self.rev(node)
871 865 return node
872 866 except (TypeError, LookupError):
873 867 pass
874 868
875 869 def _partialmatch(self, id):
876 870 try:
877 871 n = self.index.partialmatch(id)
878 872 if n and self.hasnode(n):
879 873 return n
880 874 return None
881 875 except RevlogError:
882 876 # parsers.c radix tree lookup gave multiple matches
883 877 # fall through to slow path that filters hidden revisions
884 878 pass
885 879 except (AttributeError, ValueError):
886 880 # we are pure python, or key was too short to search radix tree
887 881 pass
888 882
889 883 if id in self._pcache:
890 884 return self._pcache[id]
891 885
892 886 if len(id) < 40:
893 887 try:
894 888 # hex(node)[:...]
895 889 l = len(id) // 2 # grab an even number of digits
896 890 prefix = bin(id[:l * 2])
897 891 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
898 892 nl = [n for n in nl if hex(n).startswith(id) and
899 893 self.hasnode(n)]
900 894 if len(nl) > 0:
901 895 if len(nl) == 1:
902 896 self._pcache[id] = nl[0]
903 897 return nl[0]
904 898 raise LookupError(id, self.indexfile,
905 899 _('ambiguous identifier'))
906 900 return None
907 901 except TypeError:
908 902 pass
909 903
910 904 def lookup(self, id):
911 905 """locate a node based on:
912 906 - revision number or str(revision number)
913 907 - nodeid or subset of hex nodeid
914 908 """
915 909 n = self._match(id)
916 910 if n is not None:
917 911 return n
918 912 n = self._partialmatch(id)
919 913 if n:
920 914 return n
921 915
922 916 raise LookupError(id, self.indexfile, _('no match found'))
923 917
924 918 def cmp(self, node, text):
925 919 """compare text with a given file revision
926 920
927 921 returns True if text is different than what is stored.
928 922 """
929 923 p1, p2 = self.parents(node)
930 924 return hash(text, p1, p2) != node
931 925
932 926 def _addchunk(self, offset, data):
933 927 o, d = self._chunkcache
934 928 # try to add to existing cache
935 929 if o + len(d) == offset and len(d) + len(data) < _chunksize:
936 930 self._chunkcache = o, d + data
937 931 else:
938 932 self._chunkcache = offset, data
939 933
940 934 def _loadchunk(self, offset, length):
941 935 if self._inline:
942 936 df = self.opener(self.indexfile)
943 937 else:
944 938 df = self.opener(self.datafile)
945 939
946 940 # Cache data both forward and backward around the requested
947 941 # data, in a fixed size window. This helps speed up operations
948 942 # involving reading the revlog backwards.
949 943 cachesize = self._chunkcachesize
950 944 realoffset = offset & ~(cachesize - 1)
951 945 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
952 946 - realoffset)
953 947 df.seek(realoffset)
954 948 d = df.read(reallength)
955 949 df.close()
956 950 self._addchunk(realoffset, d)
957 951 if offset != realoffset or reallength != length:
958 952 return util.buffer(d, offset - realoffset, length)
959 953 return d
960 954
961 955 def _getchunk(self, offset, length):
962 956 o, d = self._chunkcache
963 957 l = len(d)
964 958
965 959 # is it in the cache?
966 960 cachestart = offset - o
967 961 cacheend = cachestart + length
968 962 if cachestart >= 0 and cacheend <= l:
969 963 if cachestart == 0 and cacheend == l:
970 964 return d # avoid a copy
971 965 return util.buffer(d, cachestart, cacheend - cachestart)
972 966
973 967 return self._loadchunk(offset, length)
974 968
975 969 def _chunkraw(self, startrev, endrev):
976 970 start = self.start(startrev)
977 971 end = self.end(endrev)
978 972 if self._inline:
979 973 start += (startrev + 1) * self._io.size
980 974 end += (endrev + 1) * self._io.size
981 975 length = end - start
982 976 return self._getchunk(start, length)
983 977
984 978 def _chunk(self, rev):
985 979 return decompress(self._chunkraw(rev, rev))
986 980
987 981 def _chunks(self, revs):
988 982 '''faster version of [self._chunk(rev) for rev in revs]
989 983
990 984 Assumes that revs is in ascending order.'''
991 985 if not revs:
992 986 return []
993 987 start = self.start
994 988 length = self.length
995 989 inline = self._inline
996 990 iosize = self._io.size
997 991 buffer = util.buffer
998 992
999 993 l = []
1000 994 ladd = l.append
1001 995
1002 996 # preload the cache
1003 997 try:
1004 998 while True:
1005 999 # ensure that the cache doesn't change out from under us
1006 1000 _cache = self._chunkcache
1007 1001 self._chunkraw(revs[0], revs[-1])
1008 1002 if _cache == self._chunkcache:
1009 1003 break
1010 1004 offset, data = _cache
1011 1005 except OverflowError:
1012 1006 # issue4215 - we can't cache a run of chunks greater than
1013 1007 # 2G on Windows
1014 1008 return [self._chunk(rev) for rev in revs]
1015 1009
1016 1010 for rev in revs:
1017 1011 chunkstart = start(rev)
1018 1012 if inline:
1019 1013 chunkstart += (rev + 1) * iosize
1020 1014 chunklength = length(rev)
1021 1015 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1022 1016
1023 1017 return l
1024 1018
1025 1019 def _chunkclear(self):
1026 1020 self._chunkcache = (0, '')
1027 1021
1028 1022 def deltaparent(self, rev):
1029 1023 """return deltaparent of the given revision"""
1030 1024 base = self.index[rev][3]
1031 1025 if base == rev:
1032 1026 return nullrev
1033 1027 elif self._generaldelta:
1034 1028 return base
1035 1029 else:
1036 1030 return rev - 1
1037 1031
1038 1032 def revdiff(self, rev1, rev2):
1039 1033 """return or calculate a delta between two revisions"""
1040 1034 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1041 1035 return str(self._chunk(rev2))
1042 1036
1043 1037 return mdiff.textdiff(self.revision(rev1),
1044 1038 self.revision(rev2))
1045 1039
1046 1040 def revision(self, nodeorrev):
1047 1041 """return an uncompressed revision of a given node or revision
1048 1042 number.
1049 1043 """
1050 1044 if isinstance(nodeorrev, int):
1051 1045 rev = nodeorrev
1052 1046 node = self.node(rev)
1053 1047 else:
1054 1048 node = nodeorrev
1055 1049 rev = None
1056 1050
1057 1051 _cache = self._cache # grab local copy of cache to avoid thread race
1058 1052 cachedrev = None
1059 1053 if node == nullid:
1060 1054 return ""
1061 1055 if _cache:
1062 1056 if _cache[0] == node:
1063 1057 return _cache[2]
1064 1058 cachedrev = _cache[1]
1065 1059
1066 1060 # look up what we need to read
1067 1061 text = None
1068 1062 if rev is None:
1069 1063 rev = self.rev(node)
1070 1064
1071 1065 # check rev flags
1072 1066 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1073 1067 raise RevlogError(_('incompatible revision flag %x') %
1074 1068 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1075 1069
1076 1070 # build delta chain
1077 1071 chain = []
1078 1072 index = self.index # for performance
1079 1073 generaldelta = self._generaldelta
1080 1074 iterrev = rev
1081 1075 e = index[iterrev]
1082 1076 while iterrev != e[3] and iterrev != cachedrev:
1083 1077 chain.append(iterrev)
1084 1078 if generaldelta:
1085 1079 iterrev = e[3]
1086 1080 else:
1087 1081 iterrev -= 1
1088 1082 e = index[iterrev]
1089 1083
1090 1084 if iterrev == cachedrev:
1091 1085 # cache hit
1092 1086 text = _cache[2]
1093 1087 else:
1094 1088 chain.append(iterrev)
1095 1089 chain.reverse()
1096 1090
1097 1091 # drop cache to save memory
1098 1092 self._cache = None
1099 1093
1100 1094 bins = self._chunks(chain)
1101 1095 if text is None:
1102 1096 text = str(bins[0])
1103 1097 bins = bins[1:]
1104 1098
1105 1099 text = mdiff.patches(text, bins)
1106 1100
1107 1101 text = self._checkhash(text, node, rev)
1108 1102
1109 1103 self._cache = (node, rev, text)
1110 1104 return text
1111 1105
1112 1106 def hash(self, text, p1, p2):
1113 1107 """Compute a node hash.
1114 1108
1115 1109 Available as a function so that subclasses can replace the hash
1116 1110 as needed.
1117 1111 """
1118 1112 return hash(text, p1, p2)
1119 1113
1120 1114 def _checkhash(self, text, node, rev):
1121 1115 p1, p2 = self.parents(node)
1122 1116 self.checkhash(text, p1, p2, node, rev)
1123 1117 return text
1124 1118
1125 1119 def checkhash(self, text, p1, p2, node, rev=None):
1126 1120 if node != self.hash(text, p1, p2):
1127 1121 revornode = rev
1128 1122 if revornode is None:
1129 1123 revornode = templatefilters.short(hex(node))
1130 1124 raise RevlogError(_("integrity check failed on %s:%s")
1131 1125 % (self.indexfile, revornode))
1132 1126
1133 1127 def checkinlinesize(self, tr, fp=None):
1134 1128 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1135 1129 return
1136 1130
1137 1131 trinfo = tr.find(self.indexfile)
1138 1132 if trinfo is None:
1139 1133 raise RevlogError(_("%s not found in the transaction")
1140 1134 % self.indexfile)
1141 1135
1142 1136 trindex = trinfo[2]
1143 1137 if trindex is not None:
1144 1138 dataoff = self.start(trindex)
1145 1139 else:
1146 1140 # revlog was stripped at start of transaction, use all leftover data
1147 1141 trindex = len(self) - 1
1148 1142 dataoff = self.end(-2)
1149 1143
1150 1144 tr.add(self.datafile, dataoff)
1151 1145
1152 1146 if fp:
1153 1147 fp.flush()
1154 1148 fp.close()
1155 1149
1156 1150 df = self.opener(self.datafile, 'w')
1157 1151 try:
1158 1152 for r in self:
1159 1153 df.write(self._chunkraw(r, r))
1160 1154 finally:
1161 1155 df.close()
1162 1156
1163 1157 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1164 1158 self.version &= ~(REVLOGNGINLINEDATA)
1165 1159 self._inline = False
1166 1160 for i in self:
1167 1161 e = self._io.packentry(self.index[i], self.node, self.version, i)
1168 1162 fp.write(e)
1169 1163
1170 1164 # if we don't call close, the temp file will never replace the
1171 1165 # real index
1172 1166 fp.close()
1173 1167
1174 1168 tr.replace(self.indexfile, trindex * self._io.size)
1175 1169 self._chunkclear()
1176 1170
1177 1171 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1178 1172 node=None):
1179 1173 """add a revision to the log
1180 1174
1181 1175 text - the revision data to add
1182 1176 transaction - the transaction object used for rollback
1183 1177 link - the linkrev data to add
1184 1178 p1, p2 - the parent nodeids of the revision
1185 1179 cachedelta - an optional precomputed delta
1186 1180 node - nodeid of revision; typically node is not specified, and it is
1187 1181 computed by default as hash(text, p1, p2), however subclasses might
1188 1182 use different hashing method (and override checkhash() in such case)
1189 1183 """
1190 1184 if link == nullrev:
1191 1185 raise RevlogError(_("attempted to add linkrev -1 to %s")
1192 1186 % self.indexfile)
1187
1188 if len(text) > _maxentrysize:
1189 raise RevlogError(
1190 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1191 % (self.indexfile, len(text)))
1192
1193 1193 node = node or self.hash(text, p1, p2)
1194 1194 if node in self.nodemap:
1195 1195 return node
1196 1196
1197 1197 dfh = None
1198 1198 if not self._inline:
1199 1199 dfh = self.opener(self.datafile, "a")
1200 1200 ifh = self.opener(self.indexfile, "a+")
1201 1201 try:
1202 1202 return self._addrevision(node, text, transaction, link, p1, p2,
1203 1203 REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
1204 1204 finally:
1205 1205 if dfh:
1206 1206 dfh.close()
1207 1207 ifh.close()
1208 1208
1209 1209 def compress(self, text):
1210 1210 """ generate a possibly-compressed representation of text """
1211 1211 if not text:
1212 1212 return ("", text)
1213 1213 l = len(text)
1214 1214 bin = None
1215 1215 if l < 44:
1216 1216 pass
1217 1217 elif l > 1000000:
1218 1218 # zlib makes an internal copy, thus doubling memory usage for
1219 1219 # large files, so lets do this in pieces
1220 1220 z = zlib.compressobj()
1221 1221 p = []
1222 1222 pos = 0
1223 1223 while pos < l:
1224 1224 pos2 = pos + 2**20
1225 1225 p.append(z.compress(text[pos:pos2]))
1226 1226 pos = pos2
1227 1227 p.append(z.flush())
1228 1228 if sum(map(len, p)) < l:
1229 1229 bin = "".join(p)
1230 1230 else:
1231 1231 bin = _compress(text)
1232 1232 if bin is None or len(bin) > l:
1233 1233 if text[0] == '\0':
1234 1234 return ("", text)
1235 1235 return ('u', text)
1236 1236 return ("", bin)
1237 1237
1238 1238 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1239 1239 cachedelta, ifh, dfh):
1240 1240 """internal function to add revisions to the log
1241 1241
1242 1242 see addrevision for argument descriptions.
1243 1243 invariants:
1244 1244 - text is optional (can be None); if not set, cachedelta must be set.
1245 1245 if both are set, they must correspond to each other.
1246 1246 """
1247 1247 btext = [text]
1248 1248 def buildtext():
1249 1249 if btext[0] is not None:
1250 1250 return btext[0]
1251 1251 # flush any pending writes here so we can read it in revision
1252 1252 if dfh:
1253 1253 dfh.flush()
1254 1254 ifh.flush()
1255 1255 baserev = cachedelta[0]
1256 1256 delta = cachedelta[1]
1257 1257 # special case deltas which replace entire base; no need to decode
1258 1258 # base revision. this neatly avoids censored bases, which throw when
1259 1259 # they're decoded.
1260 1260 hlen = struct.calcsize(">lll")
1261 1261 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1262 1262 len(delta) - hlen):
1263 1263 btext[0] = delta[hlen:]
1264 1264 else:
1265 1265 basetext = self.revision(self.node(baserev))
1266 1266 btext[0] = mdiff.patch(basetext, delta)
1267 1267 try:
1268 1268 self.checkhash(btext[0], p1, p2, node)
1269 1269 if flags & REVIDX_ISCENSORED:
1270 1270 raise RevlogError(_('node %s is not censored') % node)
1271 1271 except CensoredNodeError:
1272 1272 # must pass the censored index flag to add censored revisions
1273 1273 if not flags & REVIDX_ISCENSORED:
1274 1274 raise
1275 1275 return btext[0]
1276 1276
1277 1277 def builddelta(rev):
1278 1278 # can we use the cached delta?
1279 1279 if cachedelta and cachedelta[0] == rev:
1280 1280 delta = cachedelta[1]
1281 1281 else:
1282 1282 t = buildtext()
1283 1283 if self.iscensored(rev):
1284 1284 # deltas based on a censored revision must replace the
1285 1285 # full content in one patch, so delta works everywhere
1286 1286 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1287 1287 delta = header + t
1288 1288 else:
1289 1289 ptext = self.revision(self.node(rev))
1290 1290 delta = mdiff.textdiff(ptext, t)
1291 1291 data = self.compress(delta)
1292 1292 l = len(data[1]) + len(data[0])
1293 1293 if basecache[0] == rev:
1294 1294 chainbase = basecache[1]
1295 1295 else:
1296 1296 chainbase = self.chainbase(rev)
1297 1297 dist = l + offset - self.start(chainbase)
1298 1298 if self._generaldelta:
1299 1299 base = rev
1300 1300 else:
1301 1301 base = chainbase
1302 1302 chainlen, compresseddeltalen = self._chaininfo(rev)
1303 1303 chainlen += 1
1304 1304 compresseddeltalen += l
1305 1305 return dist, l, data, base, chainbase, chainlen, compresseddeltalen
1306 1306
1307 1307 curr = len(self)
1308 1308 prev = curr - 1
1309 1309 base = chainbase = curr
1310 1310 chainlen = None
1311 1311 offset = self.end(prev)
1312 1312 d = None
1313 1313 if self._basecache is None:
1314 1314 self._basecache = (prev, self.chainbase(prev))
1315 1315 basecache = self._basecache
1316 1316 p1r, p2r = self.rev(p1), self.rev(p2)
1317 1317
1318 1318 # should we try to build a delta?
1319 1319 if prev != nullrev:
1320 1320 if self._generaldelta:
1321 1321 if p1r >= basecache[1]:
1322 1322 d = builddelta(p1r)
1323 1323 elif p2r >= basecache[1]:
1324 1324 d = builddelta(p2r)
1325 1325 else:
1326 1326 d = builddelta(prev)
1327 1327 else:
1328 1328 d = builddelta(prev)
1329 1329 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1330 1330
1331 1331 # full versions are inserted when the needed deltas
1332 1332 # become comparable to the uncompressed text
1333 1333 if text is None:
1334 1334 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1335 1335 cachedelta[1])
1336 1336 else:
1337 1337 textlen = len(text)
1338 1338
1339 1339 # - 'dist' is the distance from the base revision -- bounding it limits
1340 1340 # the amount of I/O we need to do.
1341 1341 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1342 1342 # to apply -- bounding it limits the amount of CPU we consume.
1343 1343 if (d is None or dist > textlen * 4 or l > textlen or
1344 1344 compresseddeltalen > textlen * 2 or
1345 1345 (self._maxchainlen and chainlen > self._maxchainlen)):
1346 1346 text = buildtext()
1347 1347 data = self.compress(text)
1348 1348 l = len(data[1]) + len(data[0])
1349 1349 base = chainbase = curr
1350 1350
1351 1351 e = (offset_type(offset, flags), l, textlen,
1352 1352 base, link, p1r, p2r, node)
1353 1353 self.index.insert(-1, e)
1354 1354 self.nodemap[node] = curr
1355 1355
1356 1356 entry = self._io.packentry(e, self.node, self.version, curr)
1357 1357 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1358 1358
1359 1359 if type(text) == str: # only accept immutable objects
1360 1360 self._cache = (node, curr, text)
1361 1361 self._basecache = (curr, chainbase)
1362 1362 return node
1363 1363
1364 1364 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1365 1365 curr = len(self) - 1
1366 1366 if not self._inline:
1367 1367 transaction.add(self.datafile, offset)
1368 1368 transaction.add(self.indexfile, curr * len(entry))
1369 1369 if data[0]:
1370 1370 dfh.write(data[0])
1371 1371 dfh.write(data[1])
1372 1372 dfh.flush()
1373 1373 ifh.write(entry)
1374 1374 else:
1375 1375 offset += curr * self._io.size
1376 1376 transaction.add(self.indexfile, offset, curr)
1377 1377 ifh.write(entry)
1378 1378 ifh.write(data[0])
1379 1379 ifh.write(data[1])
1380 1380 self.checkinlinesize(transaction, ifh)
1381 1381
1382 1382 def addgroup(self, bundle, linkmapper, transaction):
1383 1383 """
1384 1384 add a delta group
1385 1385
1386 1386 given a set of deltas, add them to the revision log. the
1387 1387 first delta is against its parent, which should be in our
1388 1388 log, the rest are against the previous delta.
1389 1389 """
1390 1390
1391 1391 # track the base of the current delta log
1392 1392 content = []
1393 1393 node = None
1394 1394
1395 1395 r = len(self)
1396 1396 end = 0
1397 1397 if r:
1398 1398 end = self.end(r - 1)
1399 1399 ifh = self.opener(self.indexfile, "a+")
1400 1400 isize = r * self._io.size
1401 1401 if self._inline:
1402 1402 transaction.add(self.indexfile, end + isize, r)
1403 1403 dfh = None
1404 1404 else:
1405 1405 transaction.add(self.indexfile, isize, r)
1406 1406 transaction.add(self.datafile, end)
1407 1407 dfh = self.opener(self.datafile, "a")
1408 1408 def flush():
1409 1409 if dfh:
1410 1410 dfh.flush()
1411 1411 ifh.flush()
1412 1412 try:
1413 1413 # loop through our set of deltas
1414 1414 chain = None
1415 1415 while True:
1416 1416 chunkdata = bundle.deltachunk(chain)
1417 1417 if not chunkdata:
1418 1418 break
1419 1419 node = chunkdata['node']
1420 1420 p1 = chunkdata['p1']
1421 1421 p2 = chunkdata['p2']
1422 1422 cs = chunkdata['cs']
1423 1423 deltabase = chunkdata['deltabase']
1424 1424 delta = chunkdata['delta']
1425 1425
1426 1426 content.append(node)
1427 1427
1428 1428 link = linkmapper(cs)
1429 1429 if node in self.nodemap:
1430 1430 # this can happen if two branches make the same change
1431 1431 chain = node
1432 1432 continue
1433 1433
1434 1434 for p in (p1, p2):
1435 1435 if p not in self.nodemap:
1436 1436 raise LookupError(p, self.indexfile,
1437 1437 _('unknown parent'))
1438 1438
1439 1439 if deltabase not in self.nodemap:
1440 1440 raise LookupError(deltabase, self.indexfile,
1441 1441 _('unknown delta base'))
1442 1442
1443 1443 baserev = self.rev(deltabase)
1444 1444
1445 1445 if baserev != nullrev and self.iscensored(baserev):
1446 1446 # if base is censored, delta must be full replacement in a
1447 1447 # single patch operation
1448 1448 hlen = struct.calcsize(">lll")
1449 1449 oldlen = self.rawsize(baserev)
1450 1450 newlen = len(delta) - hlen
1451 1451 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1452 1452 raise error.CensoredBaseError(self.indexfile,
1453 1453 self.node(baserev))
1454 1454
1455 1455 flags = REVIDX_DEFAULT_FLAGS
1456 1456 if self._peek_iscensored(baserev, delta, flush):
1457 1457 flags |= REVIDX_ISCENSORED
1458 1458
1459 1459 chain = self._addrevision(node, None, transaction, link,
1460 1460 p1, p2, flags, (baserev, delta),
1461 1461 ifh, dfh)
1462 1462 if not dfh and not self._inline:
1463 1463 # addrevision switched from inline to conventional
1464 1464 # reopen the index
1465 1465 ifh.close()
1466 1466 dfh = self.opener(self.datafile, "a")
1467 1467 ifh = self.opener(self.indexfile, "a")
1468 1468 finally:
1469 1469 if dfh:
1470 1470 dfh.close()
1471 1471 ifh.close()
1472 1472
1473 1473 return content
1474 1474
1475 1475 def iscensored(self, rev):
1476 1476 """Check if a file revision is censored."""
1477 1477 return False
1478 1478
1479 1479 def _peek_iscensored(self, baserev, delta, flush):
1480 1480 """Quickly check if a delta produces a censored revision."""
1481 1481 return False
1482 1482
1483 1483 def getstrippoint(self, minlink):
1484 1484 """find the minimum rev that must be stripped to strip the linkrev
1485 1485
1486 1486 Returns a tuple containing the minimum rev and a set of all revs that
1487 1487 have linkrevs that will be broken by this strip.
1488 1488 """
1489 1489 brokenrevs = set()
1490 1490 strippoint = len(self)
1491 1491
1492 1492 heads = {}
1493 1493 futurelargelinkrevs = set()
1494 1494 for head in self.headrevs():
1495 1495 headlinkrev = self.linkrev(head)
1496 1496 heads[head] = headlinkrev
1497 1497 if headlinkrev >= minlink:
1498 1498 futurelargelinkrevs.add(headlinkrev)
1499 1499
1500 1500 # This algorithm involves walking down the rev graph, starting at the
1501 1501 # heads. Since the revs are topologically sorted according to linkrev,
1502 1502 # once all head linkrevs are below the minlink, we know there are
1503 1503 # no more revs that could have a linkrev greater than minlink.
1504 1504 # So we can stop walking.
1505 1505 while futurelargelinkrevs:
1506 1506 strippoint -= 1
1507 1507 linkrev = heads.pop(strippoint)
1508 1508
1509 1509 if linkrev < minlink:
1510 1510 brokenrevs.add(strippoint)
1511 1511 else:
1512 1512 futurelargelinkrevs.remove(linkrev)
1513 1513
1514 1514 for p in self.parentrevs(strippoint):
1515 1515 if p != nullrev:
1516 1516 plinkrev = self.linkrev(p)
1517 1517 heads[p] = plinkrev
1518 1518 if plinkrev >= minlink:
1519 1519 futurelargelinkrevs.add(plinkrev)
1520 1520
1521 1521 return strippoint, brokenrevs
1522 1522
1523 1523 def strip(self, minlink, transaction):
1524 1524 """truncate the revlog on the first revision with a linkrev >= minlink
1525 1525
1526 1526 This function is called when we're stripping revision minlink and
1527 1527 its descendants from the repository.
1528 1528
1529 1529 We have to remove all revisions with linkrev >= minlink, because
1530 1530 the equivalent changelog revisions will be renumbered after the
1531 1531 strip.
1532 1532
1533 1533 So we truncate the revlog on the first of these revisions, and
1534 1534 trust that the caller has saved the revisions that shouldn't be
1535 1535 removed and that it'll re-add them after this truncation.
1536 1536 """
1537 1537 if len(self) == 0:
1538 1538 return
1539 1539
1540 1540 rev, _ = self.getstrippoint(minlink)
1541 1541 if rev == len(self):
1542 1542 return
1543 1543
1544 1544 # first truncate the files on disk
1545 1545 end = self.start(rev)
1546 1546 if not self._inline:
1547 1547 transaction.add(self.datafile, end)
1548 1548 end = rev * self._io.size
1549 1549 else:
1550 1550 end += rev * self._io.size
1551 1551
1552 1552 transaction.add(self.indexfile, end)
1553 1553
1554 1554 # then reset internal state in memory to forget those revisions
1555 1555 self._cache = None
1556 1556 self._chaininfocache = {}
1557 1557 self._chunkclear()
1558 1558 for x in xrange(rev, len(self)):
1559 1559 del self.nodemap[self.node(x)]
1560 1560
1561 1561 del self.index[rev:-1]
1562 1562
1563 1563 def checksize(self):
1564 1564 expected = 0
1565 1565 if len(self):
1566 1566 expected = max(0, self.end(len(self) - 1))
1567 1567
1568 1568 try:
1569 1569 f = self.opener(self.datafile)
1570 1570 f.seek(0, 2)
1571 1571 actual = f.tell()
1572 1572 f.close()
1573 1573 dd = actual - expected
1574 1574 except IOError, inst:
1575 1575 if inst.errno != errno.ENOENT:
1576 1576 raise
1577 1577 dd = 0
1578 1578
1579 1579 try:
1580 1580 f = self.opener(self.indexfile)
1581 1581 f.seek(0, 2)
1582 1582 actual = f.tell()
1583 1583 f.close()
1584 1584 s = self._io.size
1585 1585 i = max(0, actual // s)
1586 1586 di = actual - (i * s)
1587 1587 if self._inline:
1588 1588 databytes = 0
1589 1589 for r in self:
1590 1590 databytes += max(0, self.length(r))
1591 1591 dd = 0
1592 1592 di = actual - len(self) * s - databytes
1593 1593 except IOError, inst:
1594 1594 if inst.errno != errno.ENOENT:
1595 1595 raise
1596 1596 di = 0
1597 1597
1598 1598 return (dd, di)
1599 1599
1600 1600 def files(self):
1601 1601 res = [self.indexfile]
1602 1602 if not self._inline:
1603 1603 res.append(self.datafile)
1604 1604 return res
General Comments 0
You need to be logged in to leave comments. Login now