##// END OF EJS Templates
revlog: add docstring for checkinlinesize()...
Gregory Szorc -
r26376:344a1621 default
parent child Browse files
Show More
@@ -1,1654 +1,1660 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 # import stuff from node for others to import from revlog
15 15 import collections
16 16 from node import bin, hex, nullid, nullrev
17 17 from i18n import _
18 18 import ancestor, mdiff, parsers, error, util, templatefilters
19 19 import struct, zlib, errno
20 20
21 21 _pack = struct.pack
22 22 _unpack = struct.unpack
23 23 _compress = zlib.compress
24 24 _decompress = zlib.decompress
25 25 _sha = util.sha1
26 26
27 27 # revlog header flags
28 28 REVLOGV0 = 0
29 29 REVLOGNG = 1
30 30 REVLOGNGINLINEDATA = (1 << 16)
31 31 REVLOGGENERALDELTA = (1 << 17)
32 32 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
33 33 REVLOG_DEFAULT_FORMAT = REVLOGNG
34 34 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
35 35 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
36 36
37 37 # revlog index flags
38 38 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
39 39 REVIDX_DEFAULT_FLAGS = 0
40 40 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
41 41
42 42 # max size of revlog with inline data
43 43 _maxinline = 131072
44 44 _chunksize = 1048576
45 45
46 46 RevlogError = error.RevlogError
47 47 LookupError = error.LookupError
48 48 CensoredNodeError = error.CensoredNodeError
49 49
50 50 def getoffset(q):
51 51 return int(q >> 16)
52 52
53 53 def gettype(q):
54 54 return int(q & 0xFFFF)
55 55
56 56 def offset_type(offset, type):
57 57 return long(long(offset) << 16 | type)
58 58
59 59 _nullhash = _sha(nullid)
60 60
61 61 def hash(text, p1, p2):
62 62 """generate a hash from the given text and its parent hashes
63 63
64 64 This hash combines both the current file contents and its history
65 65 in a manner that makes it easy to distinguish nodes with the same
66 66 content in the revision graph.
67 67 """
68 68 # As of now, if one of the parent node is null, p2 is null
69 69 if p2 == nullid:
70 70 # deep copy of a hash is faster than creating one
71 71 s = _nullhash.copy()
72 72 s.update(p1)
73 73 else:
74 74 # none of the parent nodes are nullid
75 75 l = [p1, p2]
76 76 l.sort()
77 77 s = _sha(l[0])
78 78 s.update(l[1])
79 79 s.update(text)
80 80 return s.digest()
81 81
82 82 def decompress(bin):
83 83 """ decompress the given input """
84 84 if not bin:
85 85 return bin
86 86 t = bin[0]
87 87 if t == '\0':
88 88 return bin
89 89 if t == 'x':
90 90 try:
91 91 return _decompress(bin)
92 92 except zlib.error as e:
93 93 raise RevlogError(_("revlog decompress error: %s") % str(e))
94 94 if t == 'u':
95 95 return bin[1:]
96 96 raise RevlogError(_("unknown compression type %r") % t)
97 97
98 98 # index v0:
99 99 # 4 bytes: offset
100 100 # 4 bytes: compressed length
101 101 # 4 bytes: base rev
102 102 # 4 bytes: link rev
103 103 # 20 bytes: parent 1 nodeid
104 104 # 20 bytes: parent 2 nodeid
105 105 # 20 bytes: nodeid
106 106 indexformatv0 = ">4l20s20s20s"
107 107
108 108 class revlogoldio(object):
109 109 def __init__(self):
110 110 self.size = struct.calcsize(indexformatv0)
111 111
112 112 def parseindex(self, data, inline):
113 113 s = self.size
114 114 index = []
115 115 nodemap = {nullid: nullrev}
116 116 n = off = 0
117 117 l = len(data)
118 118 while off + s <= l:
119 119 cur = data[off:off + s]
120 120 off += s
121 121 e = _unpack(indexformatv0, cur)
122 122 # transform to revlogv1 format
123 123 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
124 124 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
125 125 index.append(e2)
126 126 nodemap[e[6]] = n
127 127 n += 1
128 128
129 129 # add the magic null revision at -1
130 130 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
131 131
132 132 return index, nodemap, None
133 133
134 134 def packentry(self, entry, node, version, rev):
135 135 if gettype(entry[0]):
136 136 raise RevlogError(_("index entry flags need RevlogNG"))
137 137 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
138 138 node(entry[5]), node(entry[6]), entry[7])
139 139 return _pack(indexformatv0, *e2)
140 140
141 141 # index ng:
142 142 # 6 bytes: offset
143 143 # 2 bytes: flags
144 144 # 4 bytes: compressed length
145 145 # 4 bytes: uncompressed length
146 146 # 4 bytes: base rev
147 147 # 4 bytes: link rev
148 148 # 4 bytes: parent 1 rev
149 149 # 4 bytes: parent 2 rev
150 150 # 32 bytes: nodeid
151 151 indexformatng = ">Qiiiiii20s12x"
152 152 versionformat = ">I"
153 153
154 154 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
155 155 # signed integer)
156 156 _maxentrysize = 0x7fffffff
157 157
158 158 class revlogio(object):
159 159 def __init__(self):
160 160 self.size = struct.calcsize(indexformatng)
161 161
162 162 def parseindex(self, data, inline):
163 163 # call the C implementation to parse the index data
164 164 index, cache = parsers.parse_index2(data, inline)
165 165 return index, getattr(index, 'nodemap', None), cache
166 166
167 167 def packentry(self, entry, node, version, rev):
168 168 p = _pack(indexformatng, *entry)
169 169 if rev == 0:
170 170 p = _pack(versionformat, version) + p[4:]
171 171 return p
172 172
173 173 class revlog(object):
174 174 """
175 175 the underlying revision storage object
176 176
177 177 A revlog consists of two parts, an index and the revision data.
178 178
179 179 The index is a file with a fixed record size containing
180 180 information on each revision, including its nodeid (hash), the
181 181 nodeids of its parents, the position and offset of its data within
182 182 the data file, and the revision it's based on. Finally, each entry
183 183 contains a linkrev entry that can serve as a pointer to external
184 184 data.
185 185
186 186 The revision data itself is a linear collection of data chunks.
187 187 Each chunk represents a revision and is usually represented as a
188 188 delta against the previous chunk. To bound lookup time, runs of
189 189 deltas are limited to about 2 times the length of the original
190 190 version data. This makes retrieval of a version proportional to
191 191 its size, or O(1) relative to the number of revisions.
192 192
193 193 Both pieces of the revlog are written to in an append-only
194 194 fashion, which means we never need to rewrite a file to insert or
195 195 remove data, and can use some simple techniques to avoid the need
196 196 for locking while reading.
197 197 """
198 198 def __init__(self, opener, indexfile):
199 199 """
200 200 create a revlog object
201 201
202 202 opener is a function that abstracts the file opening operation
203 203 and can be used to implement COW semantics or the like.
204 204 """
205 205 self.indexfile = indexfile
206 206 self.datafile = indexfile[:-2] + ".d"
207 207 self.opener = opener
208 208 self._cache = None
209 209 self._basecache = None
210 210 self._chunkcache = (0, '')
211 211 self._chunkcachesize = 65536
212 212 self._maxchainlen = None
213 213 self._aggressivemergedeltas = False
214 214 self.index = []
215 215 self._pcache = {}
216 216 self._nodecache = {nullid: nullrev}
217 217 self._nodepos = None
218 218
219 219 v = REVLOG_DEFAULT_VERSION
220 220 opts = getattr(opener, 'options', None)
221 221 if opts is not None:
222 222 if 'revlogv1' in opts:
223 223 if 'generaldelta' in opts:
224 224 v |= REVLOGGENERALDELTA
225 225 else:
226 226 v = 0
227 227 if 'chunkcachesize' in opts:
228 228 self._chunkcachesize = opts['chunkcachesize']
229 229 if 'maxchainlen' in opts:
230 230 self._maxchainlen = opts['maxchainlen']
231 231 if 'aggressivemergedeltas' in opts:
232 232 self._aggressivemergedeltas = opts['aggressivemergedeltas']
233 233
234 234 if self._chunkcachesize <= 0:
235 235 raise RevlogError(_('revlog chunk cache size %r is not greater '
236 236 'than 0') % self._chunkcachesize)
237 237 elif self._chunkcachesize & (self._chunkcachesize - 1):
238 238 raise RevlogError(_('revlog chunk cache size %r is not a power '
239 239 'of 2') % self._chunkcachesize)
240 240
241 241 indexdata = ''
242 242 self._initempty = True
243 243 try:
244 244 f = self.opener(self.indexfile)
245 245 indexdata = f.read()
246 246 f.close()
247 247 if len(indexdata) > 0:
248 248 v = struct.unpack(versionformat, indexdata[:4])[0]
249 249 self._initempty = False
250 250 except IOError as inst:
251 251 if inst.errno != errno.ENOENT:
252 252 raise
253 253
254 254 self.version = v
255 255 self._inline = v & REVLOGNGINLINEDATA
256 256 self._generaldelta = v & REVLOGGENERALDELTA
257 257 flags = v & ~0xFFFF
258 258 fmt = v & 0xFFFF
259 259 if fmt == REVLOGV0 and flags:
260 260 raise RevlogError(_("index %s unknown flags %#04x for format v0")
261 261 % (self.indexfile, flags >> 16))
262 262 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
263 263 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
264 264 % (self.indexfile, flags >> 16))
265 265 elif fmt > REVLOGNG:
266 266 raise RevlogError(_("index %s unknown format %d")
267 267 % (self.indexfile, fmt))
268 268
269 269 self._io = revlogio()
270 270 if self.version == REVLOGV0:
271 271 self._io = revlogoldio()
272 272 try:
273 273 d = self._io.parseindex(indexdata, self._inline)
274 274 except (ValueError, IndexError):
275 275 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
276 276 self.index, nodemap, self._chunkcache = d
277 277 if nodemap is not None:
278 278 self.nodemap = self._nodecache = nodemap
279 279 if not self._chunkcache:
280 280 self._chunkclear()
281 281 # revnum -> (chain-length, sum-delta-length)
282 282 self._chaininfocache = {}
283 283
284 284 def tip(self):
285 285 return self.node(len(self.index) - 2)
286 286 def __contains__(self, rev):
287 287 return 0 <= rev < len(self)
288 288 def __len__(self):
289 289 return len(self.index) - 1
290 290 def __iter__(self):
291 291 return iter(xrange(len(self)))
292 292 def revs(self, start=0, stop=None):
293 293 """iterate over all rev in this revlog (from start to stop)"""
294 294 step = 1
295 295 if stop is not None:
296 296 if start > stop:
297 297 step = -1
298 298 stop += step
299 299 else:
300 300 stop = len(self)
301 301 return xrange(start, stop, step)
302 302
303 303 @util.propertycache
304 304 def nodemap(self):
305 305 self.rev(self.node(0))
306 306 return self._nodecache
307 307
308 308 def hasnode(self, node):
309 309 try:
310 310 self.rev(node)
311 311 return True
312 312 except KeyError:
313 313 return False
314 314
315 315 def clearcaches(self):
316 316 try:
317 317 self._nodecache.clearcaches()
318 318 except AttributeError:
319 319 self._nodecache = {nullid: nullrev}
320 320 self._nodepos = None
321 321
322 322 def rev(self, node):
323 323 try:
324 324 return self._nodecache[node]
325 325 except TypeError:
326 326 raise
327 327 except RevlogError:
328 328 # parsers.c radix tree lookup failed
329 329 raise LookupError(node, self.indexfile, _('no node'))
330 330 except KeyError:
331 331 # pure python cache lookup failed
332 332 n = self._nodecache
333 333 i = self.index
334 334 p = self._nodepos
335 335 if p is None:
336 336 p = len(i) - 2
337 337 for r in xrange(p, -1, -1):
338 338 v = i[r][7]
339 339 n[v] = r
340 340 if v == node:
341 341 self._nodepos = r - 1
342 342 return r
343 343 raise LookupError(node, self.indexfile, _('no node'))
344 344
345 345 def node(self, rev):
346 346 return self.index[rev][7]
347 347 def linkrev(self, rev):
348 348 return self.index[rev][4]
349 349 def parents(self, node):
350 350 i = self.index
351 351 d = i[self.rev(node)]
352 352 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
353 353 def parentrevs(self, rev):
354 354 return self.index[rev][5:7]
355 355 def start(self, rev):
356 356 return int(self.index[rev][0] >> 16)
357 357 def end(self, rev):
358 358 return self.start(rev) + self.length(rev)
359 359 def length(self, rev):
360 360 return self.index[rev][1]
361 361 def chainbase(self, rev):
362 362 index = self.index
363 363 base = index[rev][3]
364 364 while base != rev:
365 365 rev = base
366 366 base = index[rev][3]
367 367 return base
368 368 def chainlen(self, rev):
369 369 return self._chaininfo(rev)[0]
370 370
371 371 def _chaininfo(self, rev):
372 372 chaininfocache = self._chaininfocache
373 373 if rev in chaininfocache:
374 374 return chaininfocache[rev]
375 375 index = self.index
376 376 generaldelta = self._generaldelta
377 377 iterrev = rev
378 378 e = index[iterrev]
379 379 clen = 0
380 380 compresseddeltalen = 0
381 381 while iterrev != e[3]:
382 382 clen += 1
383 383 compresseddeltalen += e[1]
384 384 if generaldelta:
385 385 iterrev = e[3]
386 386 else:
387 387 iterrev -= 1
388 388 if iterrev in chaininfocache:
389 389 t = chaininfocache[iterrev]
390 390 clen += t[0]
391 391 compresseddeltalen += t[1]
392 392 break
393 393 e = index[iterrev]
394 394 else:
395 395 # Add text length of base since decompressing that also takes
396 396 # work. For cache hits the length is already included.
397 397 compresseddeltalen += e[1]
398 398 r = (clen, compresseddeltalen)
399 399 chaininfocache[rev] = r
400 400 return r
401 401
402 402 def flags(self, rev):
403 403 return self.index[rev][0] & 0xFFFF
404 404 def rawsize(self, rev):
405 405 """return the length of the uncompressed text for a given revision"""
406 406 l = self.index[rev][2]
407 407 if l >= 0:
408 408 return l
409 409
410 410 t = self.revision(self.node(rev))
411 411 return len(t)
412 412 size = rawsize
413 413
414 414 def ancestors(self, revs, stoprev=0, inclusive=False):
415 415 """Generate the ancestors of 'revs' in reverse topological order.
416 416 Does not generate revs lower than stoprev.
417 417
418 418 See the documentation for ancestor.lazyancestors for more details."""
419 419
420 420 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
421 421 inclusive=inclusive)
422 422
423 423 def descendants(self, revs):
424 424 """Generate the descendants of 'revs' in revision order.
425 425
426 426 Yield a sequence of revision numbers starting with a child of
427 427 some rev in revs, i.e., each revision is *not* considered a
428 428 descendant of itself. Results are ordered by revision number (a
429 429 topological sort)."""
430 430 first = min(revs)
431 431 if first == nullrev:
432 432 for i in self:
433 433 yield i
434 434 return
435 435
436 436 seen = set(revs)
437 437 for i in self.revs(start=first + 1):
438 438 for x in self.parentrevs(i):
439 439 if x != nullrev and x in seen:
440 440 seen.add(i)
441 441 yield i
442 442 break
443 443
444 444 def findcommonmissing(self, common=None, heads=None):
445 445 """Return a tuple of the ancestors of common and the ancestors of heads
446 446 that are not ancestors of common. In revset terminology, we return the
447 447 tuple:
448 448
449 449 ::common, (::heads) - (::common)
450 450
451 451 The list is sorted by revision number, meaning it is
452 452 topologically sorted.
453 453
454 454 'heads' and 'common' are both lists of node IDs. If heads is
455 455 not supplied, uses all of the revlog's heads. If common is not
456 456 supplied, uses nullid."""
457 457 if common is None:
458 458 common = [nullid]
459 459 if heads is None:
460 460 heads = self.heads()
461 461
462 462 common = [self.rev(n) for n in common]
463 463 heads = [self.rev(n) for n in heads]
464 464
465 465 # we want the ancestors, but inclusive
466 466 class lazyset(object):
467 467 def __init__(self, lazyvalues):
468 468 self.addedvalues = set()
469 469 self.lazyvalues = lazyvalues
470 470
471 471 def __contains__(self, value):
472 472 return value in self.addedvalues or value in self.lazyvalues
473 473
474 474 def __iter__(self):
475 475 added = self.addedvalues
476 476 for r in added:
477 477 yield r
478 478 for r in self.lazyvalues:
479 479 if not r in added:
480 480 yield r
481 481
482 482 def add(self, value):
483 483 self.addedvalues.add(value)
484 484
485 485 def update(self, values):
486 486 self.addedvalues.update(values)
487 487
488 488 has = lazyset(self.ancestors(common))
489 489 has.add(nullrev)
490 490 has.update(common)
491 491
492 492 # take all ancestors from heads that aren't in has
493 493 missing = set()
494 494 visit = collections.deque(r for r in heads if r not in has)
495 495 while visit:
496 496 r = visit.popleft()
497 497 if r in missing:
498 498 continue
499 499 else:
500 500 missing.add(r)
501 501 for p in self.parentrevs(r):
502 502 if p not in has:
503 503 visit.append(p)
504 504 missing = list(missing)
505 505 missing.sort()
506 506 return has, [self.node(r) for r in missing]
507 507
508 508 def incrementalmissingrevs(self, common=None):
509 509 """Return an object that can be used to incrementally compute the
510 510 revision numbers of the ancestors of arbitrary sets that are not
511 511 ancestors of common. This is an ancestor.incrementalmissingancestors
512 512 object.
513 513
514 514 'common' is a list of revision numbers. If common is not supplied, uses
515 515 nullrev.
516 516 """
517 517 if common is None:
518 518 common = [nullrev]
519 519
520 520 return ancestor.incrementalmissingancestors(self.parentrevs, common)
521 521
522 522 def findmissingrevs(self, common=None, heads=None):
523 523 """Return the revision numbers of the ancestors of heads that
524 524 are not ancestors of common.
525 525
526 526 More specifically, return a list of revision numbers corresponding to
527 527 nodes N such that every N satisfies the following constraints:
528 528
529 529 1. N is an ancestor of some node in 'heads'
530 530 2. N is not an ancestor of any node in 'common'
531 531
532 532 The list is sorted by revision number, meaning it is
533 533 topologically sorted.
534 534
535 535 'heads' and 'common' are both lists of revision numbers. If heads is
536 536 not supplied, uses all of the revlog's heads. If common is not
537 537 supplied, uses nullid."""
538 538 if common is None:
539 539 common = [nullrev]
540 540 if heads is None:
541 541 heads = self.headrevs()
542 542
543 543 inc = self.incrementalmissingrevs(common=common)
544 544 return inc.missingancestors(heads)
545 545
546 546 def findmissing(self, common=None, heads=None):
547 547 """Return the ancestors of heads that are not ancestors of common.
548 548
549 549 More specifically, return a list of nodes N such that every N
550 550 satisfies the following constraints:
551 551
552 552 1. N is an ancestor of some node in 'heads'
553 553 2. N is not an ancestor of any node in 'common'
554 554
555 555 The list is sorted by revision number, meaning it is
556 556 topologically sorted.
557 557
558 558 'heads' and 'common' are both lists of node IDs. If heads is
559 559 not supplied, uses all of the revlog's heads. If common is not
560 560 supplied, uses nullid."""
561 561 if common is None:
562 562 common = [nullid]
563 563 if heads is None:
564 564 heads = self.heads()
565 565
566 566 common = [self.rev(n) for n in common]
567 567 heads = [self.rev(n) for n in heads]
568 568
569 569 inc = self.incrementalmissingrevs(common=common)
570 570 return [self.node(r) for r in inc.missingancestors(heads)]
571 571
572 572 def nodesbetween(self, roots=None, heads=None):
573 573 """Return a topological path from 'roots' to 'heads'.
574 574
575 575 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
576 576 topologically sorted list of all nodes N that satisfy both of
577 577 these constraints:
578 578
579 579 1. N is a descendant of some node in 'roots'
580 580 2. N is an ancestor of some node in 'heads'
581 581
582 582 Every node is considered to be both a descendant and an ancestor
583 583 of itself, so every reachable node in 'roots' and 'heads' will be
584 584 included in 'nodes'.
585 585
586 586 'outroots' is the list of reachable nodes in 'roots', i.e., the
587 587 subset of 'roots' that is returned in 'nodes'. Likewise,
588 588 'outheads' is the subset of 'heads' that is also in 'nodes'.
589 589
590 590 'roots' and 'heads' are both lists of node IDs. If 'roots' is
591 591 unspecified, uses nullid as the only root. If 'heads' is
592 592 unspecified, uses list of all of the revlog's heads."""
593 593 nonodes = ([], [], [])
594 594 if roots is not None:
595 595 roots = list(roots)
596 596 if not roots:
597 597 return nonodes
598 598 lowestrev = min([self.rev(n) for n in roots])
599 599 else:
600 600 roots = [nullid] # Everybody's a descendant of nullid
601 601 lowestrev = nullrev
602 602 if (lowestrev == nullrev) and (heads is None):
603 603 # We want _all_ the nodes!
604 604 return ([self.node(r) for r in self], [nullid], list(self.heads()))
605 605 if heads is None:
606 606 # All nodes are ancestors, so the latest ancestor is the last
607 607 # node.
608 608 highestrev = len(self) - 1
609 609 # Set ancestors to None to signal that every node is an ancestor.
610 610 ancestors = None
611 611 # Set heads to an empty dictionary for later discovery of heads
612 612 heads = {}
613 613 else:
614 614 heads = list(heads)
615 615 if not heads:
616 616 return nonodes
617 617 ancestors = set()
618 618 # Turn heads into a dictionary so we can remove 'fake' heads.
619 619 # Also, later we will be using it to filter out the heads we can't
620 620 # find from roots.
621 621 heads = dict.fromkeys(heads, False)
622 622 # Start at the top and keep marking parents until we're done.
623 623 nodestotag = set(heads)
624 624 # Remember where the top was so we can use it as a limit later.
625 625 highestrev = max([self.rev(n) for n in nodestotag])
626 626 while nodestotag:
627 627 # grab a node to tag
628 628 n = nodestotag.pop()
629 629 # Never tag nullid
630 630 if n == nullid:
631 631 continue
632 632 # A node's revision number represents its place in a
633 633 # topologically sorted list of nodes.
634 634 r = self.rev(n)
635 635 if r >= lowestrev:
636 636 if n not in ancestors:
637 637 # If we are possibly a descendant of one of the roots
638 638 # and we haven't already been marked as an ancestor
639 639 ancestors.add(n) # Mark as ancestor
640 640 # Add non-nullid parents to list of nodes to tag.
641 641 nodestotag.update([p for p in self.parents(n) if
642 642 p != nullid])
643 643 elif n in heads: # We've seen it before, is it a fake head?
644 644 # So it is, real heads should not be the ancestors of
645 645 # any other heads.
646 646 heads.pop(n)
647 647 if not ancestors:
648 648 return nonodes
649 649 # Now that we have our set of ancestors, we want to remove any
650 650 # roots that are not ancestors.
651 651
652 652 # If one of the roots was nullid, everything is included anyway.
653 653 if lowestrev > nullrev:
654 654 # But, since we weren't, let's recompute the lowest rev to not
655 655 # include roots that aren't ancestors.
656 656
657 657 # Filter out roots that aren't ancestors of heads
658 658 roots = [n for n in roots if n in ancestors]
659 659 # Recompute the lowest revision
660 660 if roots:
661 661 lowestrev = min([self.rev(n) for n in roots])
662 662 else:
663 663 # No more roots? Return empty list
664 664 return nonodes
665 665 else:
666 666 # We are descending from nullid, and don't need to care about
667 667 # any other roots.
668 668 lowestrev = nullrev
669 669 roots = [nullid]
670 670 # Transform our roots list into a set.
671 671 descendants = set(roots)
672 672 # Also, keep the original roots so we can filter out roots that aren't
673 673 # 'real' roots (i.e. are descended from other roots).
674 674 roots = descendants.copy()
675 675 # Our topologically sorted list of output nodes.
676 676 orderedout = []
677 677 # Don't start at nullid since we don't want nullid in our output list,
678 678 # and if nullid shows up in descendants, empty parents will look like
679 679 # they're descendants.
680 680 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
681 681 n = self.node(r)
682 682 isdescendant = False
683 683 if lowestrev == nullrev: # Everybody is a descendant of nullid
684 684 isdescendant = True
685 685 elif n in descendants:
686 686 # n is already a descendant
687 687 isdescendant = True
688 688 # This check only needs to be done here because all the roots
689 689 # will start being marked is descendants before the loop.
690 690 if n in roots:
691 691 # If n was a root, check if it's a 'real' root.
692 692 p = tuple(self.parents(n))
693 693 # If any of its parents are descendants, it's not a root.
694 694 if (p[0] in descendants) or (p[1] in descendants):
695 695 roots.remove(n)
696 696 else:
697 697 p = tuple(self.parents(n))
698 698 # A node is a descendant if either of its parents are
699 699 # descendants. (We seeded the dependents list with the roots
700 700 # up there, remember?)
701 701 if (p[0] in descendants) or (p[1] in descendants):
702 702 descendants.add(n)
703 703 isdescendant = True
704 704 if isdescendant and ((ancestors is None) or (n in ancestors)):
705 705 # Only include nodes that are both descendants and ancestors.
706 706 orderedout.append(n)
707 707 if (ancestors is not None) and (n in heads):
708 708 # We're trying to figure out which heads are reachable
709 709 # from roots.
710 710 # Mark this head as having been reached
711 711 heads[n] = True
712 712 elif ancestors is None:
713 713 # Otherwise, we're trying to discover the heads.
714 714 # Assume this is a head because if it isn't, the next step
715 715 # will eventually remove it.
716 716 heads[n] = True
717 717 # But, obviously its parents aren't.
718 718 for p in self.parents(n):
719 719 heads.pop(p, None)
720 720 heads = [n for n, flag in heads.iteritems() if flag]
721 721 roots = list(roots)
722 722 assert orderedout
723 723 assert roots
724 724 assert heads
725 725 return (orderedout, roots, heads)
726 726
727 727 def headrevs(self):
728 728 try:
729 729 return self.index.headrevs()
730 730 except AttributeError:
731 731 return self._headrevs()
732 732
733 733 def computephases(self, roots):
734 734 return self.index.computephasesmapsets(roots)
735 735
736 736 def _headrevs(self):
737 737 count = len(self)
738 738 if not count:
739 739 return [nullrev]
740 740 # we won't iter over filtered rev so nobody is a head at start
741 741 ishead = [0] * (count + 1)
742 742 index = self.index
743 743 for r in self:
744 744 ishead[r] = 1 # I may be an head
745 745 e = index[r]
746 746 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
747 747 return [r for r, val in enumerate(ishead) if val]
748 748
749 749 def heads(self, start=None, stop=None):
750 750 """return the list of all nodes that have no children
751 751
752 752 if start is specified, only heads that are descendants of
753 753 start will be returned
754 754 if stop is specified, it will consider all the revs from stop
755 755 as if they had no children
756 756 """
757 757 if start is None and stop is None:
758 758 if not len(self):
759 759 return [nullid]
760 760 return [self.node(r) for r in self.headrevs()]
761 761
762 762 if start is None:
763 763 start = nullid
764 764 if stop is None:
765 765 stop = []
766 766 stoprevs = set([self.rev(n) for n in stop])
767 767 startrev = self.rev(start)
768 768 reachable = set((startrev,))
769 769 heads = set((startrev,))
770 770
771 771 parentrevs = self.parentrevs
772 772 for r in self.revs(start=startrev + 1):
773 773 for p in parentrevs(r):
774 774 if p in reachable:
775 775 if r not in stoprevs:
776 776 reachable.add(r)
777 777 heads.add(r)
778 778 if p in heads and p not in stoprevs:
779 779 heads.remove(p)
780 780
781 781 return [self.node(r) for r in heads]
782 782
783 783 def children(self, node):
784 784 """find the children of a given node"""
785 785 c = []
786 786 p = self.rev(node)
787 787 for r in self.revs(start=p + 1):
788 788 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
789 789 if prevs:
790 790 for pr in prevs:
791 791 if pr == p:
792 792 c.append(self.node(r))
793 793 elif p == nullrev:
794 794 c.append(self.node(r))
795 795 return c
796 796
797 797 def descendant(self, start, end):
798 798 if start == nullrev:
799 799 return True
800 800 for i in self.descendants([start]):
801 801 if i == end:
802 802 return True
803 803 elif i > end:
804 804 break
805 805 return False
806 806
807 807 def commonancestorsheads(self, a, b):
808 808 """calculate all the heads of the common ancestors of nodes a and b"""
809 809 a, b = self.rev(a), self.rev(b)
810 810 try:
811 811 ancs = self.index.commonancestorsheads(a, b)
812 812 except (AttributeError, OverflowError): # C implementation failed
813 813 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
814 814 return map(self.node, ancs)
815 815
816 816 def isancestor(self, a, b):
817 817 """return True if node a is an ancestor of node b
818 818
819 819 The implementation of this is trivial but the use of
820 820 commonancestorsheads is not."""
821 821 return a in self.commonancestorsheads(a, b)
822 822
823 823 def ancestor(self, a, b):
824 824 """calculate the "best" common ancestor of nodes a and b"""
825 825
826 826 a, b = self.rev(a), self.rev(b)
827 827 try:
828 828 ancs = self.index.ancestors(a, b)
829 829 except (AttributeError, OverflowError):
830 830 ancs = ancestor.ancestors(self.parentrevs, a, b)
831 831 if ancs:
832 832 # choose a consistent winner when there's a tie
833 833 return min(map(self.node, ancs))
834 834 return nullid
835 835
836 836 def _match(self, id):
837 837 if isinstance(id, int):
838 838 # rev
839 839 return self.node(id)
840 840 if len(id) == 20:
841 841 # possibly a binary node
842 842 # odds of a binary node being all hex in ASCII are 1 in 10**25
843 843 try:
844 844 node = id
845 845 self.rev(node) # quick search the index
846 846 return node
847 847 except LookupError:
848 848 pass # may be partial hex id
849 849 try:
850 850 # str(rev)
851 851 rev = int(id)
852 852 if str(rev) != id:
853 853 raise ValueError
854 854 if rev < 0:
855 855 rev = len(self) + rev
856 856 if rev < 0 or rev >= len(self):
857 857 raise ValueError
858 858 return self.node(rev)
859 859 except (ValueError, OverflowError):
860 860 pass
861 861 if len(id) == 40:
862 862 try:
863 863 # a full hex nodeid?
864 864 node = bin(id)
865 865 self.rev(node)
866 866 return node
867 867 except (TypeError, LookupError):
868 868 pass
869 869
870 870 def _partialmatch(self, id):
871 871 try:
872 872 n = self.index.partialmatch(id)
873 873 if n and self.hasnode(n):
874 874 return n
875 875 return None
876 876 except RevlogError:
877 877 # parsers.c radix tree lookup gave multiple matches
878 878 # fall through to slow path that filters hidden revisions
879 879 pass
880 880 except (AttributeError, ValueError):
881 881 # we are pure python, or key was too short to search radix tree
882 882 pass
883 883
884 884 if id in self._pcache:
885 885 return self._pcache[id]
886 886
887 887 if len(id) < 40:
888 888 try:
889 889 # hex(node)[:...]
890 890 l = len(id) // 2 # grab an even number of digits
891 891 prefix = bin(id[:l * 2])
892 892 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
893 893 nl = [n for n in nl if hex(n).startswith(id) and
894 894 self.hasnode(n)]
895 895 if len(nl) > 0:
896 896 if len(nl) == 1:
897 897 self._pcache[id] = nl[0]
898 898 return nl[0]
899 899 raise LookupError(id, self.indexfile,
900 900 _('ambiguous identifier'))
901 901 return None
902 902 except TypeError:
903 903 pass
904 904
905 905 def lookup(self, id):
906 906 """locate a node based on:
907 907 - revision number or str(revision number)
908 908 - nodeid or subset of hex nodeid
909 909 """
910 910 n = self._match(id)
911 911 if n is not None:
912 912 return n
913 913 n = self._partialmatch(id)
914 914 if n:
915 915 return n
916 916
917 917 raise LookupError(id, self.indexfile, _('no match found'))
918 918
919 919 def cmp(self, node, text):
920 920 """compare text with a given file revision
921 921
922 922 returns True if text is different than what is stored.
923 923 """
924 924 p1, p2 = self.parents(node)
925 925 return hash(text, p1, p2) != node
926 926
927 927 def _addchunk(self, offset, data):
928 928 o, d = self._chunkcache
929 929 # try to add to existing cache
930 930 if o + len(d) == offset and len(d) + len(data) < _chunksize:
931 931 self._chunkcache = o, d + data
932 932 else:
933 933 self._chunkcache = offset, data
934 934
935 935 def _loadchunk(self, offset, length):
936 936 if self._inline:
937 937 df = self.opener(self.indexfile)
938 938 else:
939 939 df = self.opener(self.datafile)
940 940
941 941 # Cache data both forward and backward around the requested
942 942 # data, in a fixed size window. This helps speed up operations
943 943 # involving reading the revlog backwards.
944 944 cachesize = self._chunkcachesize
945 945 realoffset = offset & ~(cachesize - 1)
946 946 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
947 947 - realoffset)
948 948 df.seek(realoffset)
949 949 d = df.read(reallength)
950 950 df.close()
951 951 self._addchunk(realoffset, d)
952 952 if offset != realoffset or reallength != length:
953 953 return util.buffer(d, offset - realoffset, length)
954 954 return d
955 955
956 956 def _getchunk(self, offset, length):
957 957 o, d = self._chunkcache
958 958 l = len(d)
959 959
960 960 # is it in the cache?
961 961 cachestart = offset - o
962 962 cacheend = cachestart + length
963 963 if cachestart >= 0 and cacheend <= l:
964 964 if cachestart == 0 and cacheend == l:
965 965 return d # avoid a copy
966 966 return util.buffer(d, cachestart, cacheend - cachestart)
967 967
968 968 return self._loadchunk(offset, length)
969 969
970 970 def _chunkraw(self, startrev, endrev):
971 971 start = self.start(startrev)
972 972 end = self.end(endrev)
973 973 if self._inline:
974 974 start += (startrev + 1) * self._io.size
975 975 end += (endrev + 1) * self._io.size
976 976 length = end - start
977 977 return self._getchunk(start, length)
978 978
979 979 def _chunk(self, rev):
980 980 return decompress(self._chunkraw(rev, rev))
981 981
982 982 def _chunks(self, revs):
983 983 '''faster version of [self._chunk(rev) for rev in revs]
984 984
985 985 Assumes that revs is in ascending order.'''
986 986 if not revs:
987 987 return []
988 988 start = self.start
989 989 length = self.length
990 990 inline = self._inline
991 991 iosize = self._io.size
992 992 buffer = util.buffer
993 993
994 994 l = []
995 995 ladd = l.append
996 996
997 997 # preload the cache
998 998 try:
999 999 while True:
1000 1000 # ensure that the cache doesn't change out from under us
1001 1001 _cache = self._chunkcache
1002 1002 self._chunkraw(revs[0], revs[-1])
1003 1003 if _cache == self._chunkcache:
1004 1004 break
1005 1005 offset, data = _cache
1006 1006 except OverflowError:
1007 1007 # issue4215 - we can't cache a run of chunks greater than
1008 1008 # 2G on Windows
1009 1009 return [self._chunk(rev) for rev in revs]
1010 1010
1011 1011 for rev in revs:
1012 1012 chunkstart = start(rev)
1013 1013 if inline:
1014 1014 chunkstart += (rev + 1) * iosize
1015 1015 chunklength = length(rev)
1016 1016 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1017 1017
1018 1018 return l
1019 1019
1020 1020 def _chunkclear(self):
1021 1021 self._chunkcache = (0, '')
1022 1022
1023 1023 def deltaparent(self, rev):
1024 1024 """return deltaparent of the given revision"""
1025 1025 base = self.index[rev][3]
1026 1026 if base == rev:
1027 1027 return nullrev
1028 1028 elif self._generaldelta:
1029 1029 return base
1030 1030 else:
1031 1031 return rev - 1
1032 1032
1033 1033 def revdiff(self, rev1, rev2):
1034 1034 """return or calculate a delta between two revisions"""
1035 1035 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1036 1036 return str(self._chunk(rev2))
1037 1037
1038 1038 return mdiff.textdiff(self.revision(rev1),
1039 1039 self.revision(rev2))
1040 1040
1041 1041 def revision(self, nodeorrev):
1042 1042 """return an uncompressed revision of a given node or revision
1043 1043 number.
1044 1044 """
1045 1045 if isinstance(nodeorrev, int):
1046 1046 rev = nodeorrev
1047 1047 node = self.node(rev)
1048 1048 else:
1049 1049 node = nodeorrev
1050 1050 rev = None
1051 1051
1052 1052 cachedrev = None
1053 1053 if node == nullid:
1054 1054 return ""
1055 1055 if self._cache:
1056 1056 if self._cache[0] == node:
1057 1057 return self._cache[2]
1058 1058 cachedrev = self._cache[1]
1059 1059
1060 1060 # look up what we need to read
1061 1061 text = None
1062 1062 if rev is None:
1063 1063 rev = self.rev(node)
1064 1064
1065 1065 # check rev flags
1066 1066 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1067 1067 raise RevlogError(_('incompatible revision flag %x') %
1068 1068 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1069 1069
1070 1070 # build delta chain
1071 1071 chain = []
1072 1072 index = self.index # for performance
1073 1073 generaldelta = self._generaldelta
1074 1074 iterrev = rev
1075 1075 e = index[iterrev]
1076 1076 while iterrev != e[3] and iterrev != cachedrev:
1077 1077 chain.append(iterrev)
1078 1078 if generaldelta:
1079 1079 iterrev = e[3]
1080 1080 else:
1081 1081 iterrev -= 1
1082 1082 e = index[iterrev]
1083 1083
1084 1084 if iterrev == cachedrev:
1085 1085 # cache hit
1086 1086 text = self._cache[2]
1087 1087 else:
1088 1088 chain.append(iterrev)
1089 1089 chain.reverse()
1090 1090
1091 1091 # drop cache to save memory
1092 1092 self._cache = None
1093 1093
1094 1094 bins = self._chunks(chain)
1095 1095 if text is None:
1096 1096 text = str(bins[0])
1097 1097 bins = bins[1:]
1098 1098
1099 1099 text = mdiff.patches(text, bins)
1100 1100
1101 1101 text = self._checkhash(text, node, rev)
1102 1102
1103 1103 self._cache = (node, rev, text)
1104 1104 return text
1105 1105
1106 1106 def hash(self, text, p1, p2):
1107 1107 """Compute a node hash.
1108 1108
1109 1109 Available as a function so that subclasses can replace the hash
1110 1110 as needed.
1111 1111 """
1112 1112 return hash(text, p1, p2)
1113 1113
1114 1114 def _checkhash(self, text, node, rev):
1115 1115 p1, p2 = self.parents(node)
1116 1116 self.checkhash(text, p1, p2, node, rev)
1117 1117 return text
1118 1118
1119 1119 def checkhash(self, text, p1, p2, node, rev=None):
1120 1120 if node != self.hash(text, p1, p2):
1121 1121 revornode = rev
1122 1122 if revornode is None:
1123 1123 revornode = templatefilters.short(hex(node))
1124 1124 raise RevlogError(_("integrity check failed on %s:%s")
1125 1125 % (self.indexfile, revornode))
1126 1126
1127 1127 def checkinlinesize(self, tr, fp=None):
1128 """Check if the revlog is too big for inline and convert if so.
1129
1130 This should be called after revisions are added to the revlog. If the
1131 revlog has grown too large to be an inline revlog, it will convert it
1132 to use multiple index and data files.
1133 """
1128 1134 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1129 1135 return
1130 1136
1131 1137 trinfo = tr.find(self.indexfile)
1132 1138 if trinfo is None:
1133 1139 raise RevlogError(_("%s not found in the transaction")
1134 1140 % self.indexfile)
1135 1141
1136 1142 trindex = trinfo[2]
1137 1143 if trindex is not None:
1138 1144 dataoff = self.start(trindex)
1139 1145 else:
1140 1146 # revlog was stripped at start of transaction, use all leftover data
1141 1147 trindex = len(self) - 1
1142 1148 dataoff = self.end(-2)
1143 1149
1144 1150 tr.add(self.datafile, dataoff)
1145 1151
1146 1152 if fp:
1147 1153 fp.flush()
1148 1154 fp.close()
1149 1155
1150 1156 df = self.opener(self.datafile, 'w')
1151 1157 try:
1152 1158 for r in self:
1153 1159 df.write(self._chunkraw(r, r))
1154 1160 finally:
1155 1161 df.close()
1156 1162
1157 1163 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1158 1164 self.version &= ~(REVLOGNGINLINEDATA)
1159 1165 self._inline = False
1160 1166 for i in self:
1161 1167 e = self._io.packentry(self.index[i], self.node, self.version, i)
1162 1168 fp.write(e)
1163 1169
1164 1170 # if we don't call close, the temp file will never replace the
1165 1171 # real index
1166 1172 fp.close()
1167 1173
1168 1174 tr.replace(self.indexfile, trindex * self._io.size)
1169 1175 self._chunkclear()
1170 1176
1171 1177 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1172 1178 node=None):
1173 1179 """add a revision to the log
1174 1180
1175 1181 text - the revision data to add
1176 1182 transaction - the transaction object used for rollback
1177 1183 link - the linkrev data to add
1178 1184 p1, p2 - the parent nodeids of the revision
1179 1185 cachedelta - an optional precomputed delta
1180 1186 node - nodeid of revision; typically node is not specified, and it is
1181 1187 computed by default as hash(text, p1, p2), however subclasses might
1182 1188 use different hashing method (and override checkhash() in such case)
1183 1189 """
1184 1190 if link == nullrev:
1185 1191 raise RevlogError(_("attempted to add linkrev -1 to %s")
1186 1192 % self.indexfile)
1187 1193
1188 1194 if len(text) > _maxentrysize:
1189 1195 raise RevlogError(
1190 1196 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1191 1197 % (self.indexfile, len(text)))
1192 1198
1193 1199 node = node or self.hash(text, p1, p2)
1194 1200 if node in self.nodemap:
1195 1201 return node
1196 1202
1197 1203 dfh = None
1198 1204 if not self._inline:
1199 1205 dfh = self.opener(self.datafile, "a")
1200 1206 ifh = self.opener(self.indexfile, "a+")
1201 1207 try:
1202 1208 return self._addrevision(node, text, transaction, link, p1, p2,
1203 1209 REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
1204 1210 finally:
1205 1211 if dfh:
1206 1212 dfh.close()
1207 1213 ifh.close()
1208 1214
1209 1215 def compress(self, text):
1210 1216 """ generate a possibly-compressed representation of text """
1211 1217 if not text:
1212 1218 return ("", text)
1213 1219 l = len(text)
1214 1220 bin = None
1215 1221 if l < 44:
1216 1222 pass
1217 1223 elif l > 1000000:
1218 1224 # zlib makes an internal copy, thus doubling memory usage for
1219 1225 # large files, so lets do this in pieces
1220 1226 z = zlib.compressobj()
1221 1227 p = []
1222 1228 pos = 0
1223 1229 while pos < l:
1224 1230 pos2 = pos + 2**20
1225 1231 p.append(z.compress(text[pos:pos2]))
1226 1232 pos = pos2
1227 1233 p.append(z.flush())
1228 1234 if sum(map(len, p)) < l:
1229 1235 bin = "".join(p)
1230 1236 else:
1231 1237 bin = _compress(text)
1232 1238 if bin is None or len(bin) > l:
1233 1239 if text[0] == '\0':
1234 1240 return ("", text)
1235 1241 return ('u', text)
1236 1242 return ("", bin)
1237 1243
1238 1244 def _isgooddelta(self, d, textlen):
1239 1245 """Returns True if the given delta is good. Good means that it is within
1240 1246 the disk span, disk size, and chain length bounds that we know to be
1241 1247 performant."""
1242 1248 if d is None:
1243 1249 return False
1244 1250
1245 1251 # - 'dist' is the distance from the base revision -- bounding it limits
1246 1252 # the amount of I/O we need to do.
1247 1253 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1248 1254 # to apply -- bounding it limits the amount of CPU we consume.
1249 1255 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1250 1256 if (dist > textlen * 4 or l > textlen or
1251 1257 compresseddeltalen > textlen * 2 or
1252 1258 (self._maxchainlen and chainlen > self._maxchainlen)):
1253 1259 return False
1254 1260
1255 1261 return True
1256 1262
1257 1263 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1258 1264 cachedelta, ifh, dfh, alwayscache=False):
1259 1265 """internal function to add revisions to the log
1260 1266
1261 1267 see addrevision for argument descriptions.
1262 1268 invariants:
1263 1269 - text is optional (can be None); if not set, cachedelta must be set.
1264 1270 if both are set, they must correspond to each other.
1265 1271 """
1266 1272 btext = [text]
1267 1273 def buildtext():
1268 1274 if btext[0] is not None:
1269 1275 return btext[0]
1270 1276 # flush any pending writes here so we can read it in revision
1271 1277 if dfh:
1272 1278 dfh.flush()
1273 1279 ifh.flush()
1274 1280 baserev = cachedelta[0]
1275 1281 delta = cachedelta[1]
1276 1282 # special case deltas which replace entire base; no need to decode
1277 1283 # base revision. this neatly avoids censored bases, which throw when
1278 1284 # they're decoded.
1279 1285 hlen = struct.calcsize(">lll")
1280 1286 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1281 1287 len(delta) - hlen):
1282 1288 btext[0] = delta[hlen:]
1283 1289 else:
1284 1290 basetext = self.revision(self.node(baserev))
1285 1291 btext[0] = mdiff.patch(basetext, delta)
1286 1292 try:
1287 1293 self.checkhash(btext[0], p1, p2, node)
1288 1294 if flags & REVIDX_ISCENSORED:
1289 1295 raise RevlogError(_('node %s is not censored') % node)
1290 1296 except CensoredNodeError:
1291 1297 # must pass the censored index flag to add censored revisions
1292 1298 if not flags & REVIDX_ISCENSORED:
1293 1299 raise
1294 1300 return btext[0]
1295 1301
1296 1302 def builddelta(rev):
1297 1303 # can we use the cached delta?
1298 1304 if cachedelta and cachedelta[0] == rev:
1299 1305 delta = cachedelta[1]
1300 1306 else:
1301 1307 t = buildtext()
1302 1308 if self.iscensored(rev):
1303 1309 # deltas based on a censored revision must replace the
1304 1310 # full content in one patch, so delta works everywhere
1305 1311 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1306 1312 delta = header + t
1307 1313 else:
1308 1314 ptext = self.revision(self.node(rev))
1309 1315 delta = mdiff.textdiff(ptext, t)
1310 1316 data = self.compress(delta)
1311 1317 l = len(data[1]) + len(data[0])
1312 1318 if basecache[0] == rev:
1313 1319 chainbase = basecache[1]
1314 1320 else:
1315 1321 chainbase = self.chainbase(rev)
1316 1322 dist = l + offset - self.start(chainbase)
1317 1323 if self._generaldelta:
1318 1324 base = rev
1319 1325 else:
1320 1326 base = chainbase
1321 1327 chainlen, compresseddeltalen = self._chaininfo(rev)
1322 1328 chainlen += 1
1323 1329 compresseddeltalen += l
1324 1330 return dist, l, data, base, chainbase, chainlen, compresseddeltalen
1325 1331
1326 1332 curr = len(self)
1327 1333 prev = curr - 1
1328 1334 base = chainbase = curr
1329 1335 chainlen = None
1330 1336 offset = self.end(prev)
1331 1337 d = None
1332 1338 if self._basecache is None:
1333 1339 self._basecache = (prev, self.chainbase(prev))
1334 1340 basecache = self._basecache
1335 1341 p1r, p2r = self.rev(p1), self.rev(p2)
1336 1342
1337 1343 # full versions are inserted when the needed deltas
1338 1344 # become comparable to the uncompressed text
1339 1345 if text is None:
1340 1346 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1341 1347 cachedelta[1])
1342 1348 else:
1343 1349 textlen = len(text)
1344 1350
1345 1351 # should we try to build a delta?
1346 1352 if prev != nullrev:
1347 1353 if self._generaldelta:
1348 1354 if p2r != nullrev and self._aggressivemergedeltas:
1349 1355 d = builddelta(p1r)
1350 1356 d2 = builddelta(p2r)
1351 1357 p1good = self._isgooddelta(d, textlen)
1352 1358 p2good = self._isgooddelta(d2, textlen)
1353 1359 if p1good and p2good:
1354 1360 # If both are good deltas, choose the smallest
1355 1361 if d2[1] < d[1]:
1356 1362 d = d2
1357 1363 elif p2good:
1358 1364 # If only p2 is good, use it
1359 1365 d = d2
1360 1366 elif p1good:
1361 1367 pass
1362 1368 else:
1363 1369 # Neither is good, try against prev to hopefully save us
1364 1370 # a fulltext.
1365 1371 d = builddelta(prev)
1366 1372 else:
1367 1373 # Pick whichever parent is closer to us (to minimize the
1368 1374 # chance of having to build a fulltext). Since
1369 1375 # nullrev == -1, any non-merge commit will always pick p1r.
1370 1376 drev = p2r if p2r > p1r else p1r
1371 1377 d = builddelta(drev)
1372 1378 # If the chosen delta will result in us making a full text,
1373 1379 # give it one last try against prev.
1374 1380 if drev != prev and not self._isgooddelta(d, textlen):
1375 1381 d = builddelta(prev)
1376 1382 else:
1377 1383 d = builddelta(prev)
1378 1384 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1379 1385
1380 1386 if not self._isgooddelta(d, textlen):
1381 1387 text = buildtext()
1382 1388 data = self.compress(text)
1383 1389 l = len(data[1]) + len(data[0])
1384 1390 base = chainbase = curr
1385 1391
1386 1392 e = (offset_type(offset, flags), l, textlen,
1387 1393 base, link, p1r, p2r, node)
1388 1394 self.index.insert(-1, e)
1389 1395 self.nodemap[node] = curr
1390 1396
1391 1397 entry = self._io.packentry(e, self.node, self.version, curr)
1392 1398 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1393 1399
1394 1400 if alwayscache and text is None:
1395 1401 text = buildtext()
1396 1402
1397 1403 if type(text) == str: # only accept immutable objects
1398 1404 self._cache = (node, curr, text)
1399 1405 self._basecache = (curr, chainbase)
1400 1406 return node
1401 1407
1402 1408 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1403 1409 curr = len(self) - 1
1404 1410 if not self._inline:
1405 1411 transaction.add(self.datafile, offset)
1406 1412 transaction.add(self.indexfile, curr * len(entry))
1407 1413 if data[0]:
1408 1414 dfh.write(data[0])
1409 1415 dfh.write(data[1])
1410 1416 dfh.flush()
1411 1417 ifh.write(entry)
1412 1418 else:
1413 1419 offset += curr * self._io.size
1414 1420 transaction.add(self.indexfile, offset, curr)
1415 1421 ifh.write(entry)
1416 1422 ifh.write(data[0])
1417 1423 ifh.write(data[1])
1418 1424 self.checkinlinesize(transaction, ifh)
1419 1425
1420 1426 def addgroup(self, bundle, linkmapper, transaction, addrevisioncb=None):
1421 1427 """
1422 1428 add a delta group
1423 1429
1424 1430 given a set of deltas, add them to the revision log. the
1425 1431 first delta is against its parent, which should be in our
1426 1432 log, the rest are against the previous delta.
1427 1433
1428 1434 If ``addrevisioncb`` is defined, it will be called with arguments of
1429 1435 this revlog and the node that was added.
1430 1436 """
1431 1437
1432 1438 # track the base of the current delta log
1433 1439 content = []
1434 1440 node = None
1435 1441
1436 1442 r = len(self)
1437 1443 end = 0
1438 1444 if r:
1439 1445 end = self.end(r - 1)
1440 1446 ifh = self.opener(self.indexfile, "a+")
1441 1447 isize = r * self._io.size
1442 1448 if self._inline:
1443 1449 transaction.add(self.indexfile, end + isize, r)
1444 1450 dfh = None
1445 1451 else:
1446 1452 transaction.add(self.indexfile, isize, r)
1447 1453 transaction.add(self.datafile, end)
1448 1454 dfh = self.opener(self.datafile, "a")
1449 1455 def flush():
1450 1456 if dfh:
1451 1457 dfh.flush()
1452 1458 ifh.flush()
1453 1459 try:
1454 1460 # loop through our set of deltas
1455 1461 chain = None
1456 1462 while True:
1457 1463 chunkdata = bundle.deltachunk(chain)
1458 1464 if not chunkdata:
1459 1465 break
1460 1466 node = chunkdata['node']
1461 1467 p1 = chunkdata['p1']
1462 1468 p2 = chunkdata['p2']
1463 1469 cs = chunkdata['cs']
1464 1470 deltabase = chunkdata['deltabase']
1465 1471 delta = chunkdata['delta']
1466 1472
1467 1473 content.append(node)
1468 1474
1469 1475 link = linkmapper(cs)
1470 1476 if node in self.nodemap:
1471 1477 # this can happen if two branches make the same change
1472 1478 chain = node
1473 1479 continue
1474 1480
1475 1481 for p in (p1, p2):
1476 1482 if p not in self.nodemap:
1477 1483 raise LookupError(p, self.indexfile,
1478 1484 _('unknown parent'))
1479 1485
1480 1486 if deltabase not in self.nodemap:
1481 1487 raise LookupError(deltabase, self.indexfile,
1482 1488 _('unknown delta base'))
1483 1489
1484 1490 baserev = self.rev(deltabase)
1485 1491
1486 1492 if baserev != nullrev and self.iscensored(baserev):
1487 1493 # if base is censored, delta must be full replacement in a
1488 1494 # single patch operation
1489 1495 hlen = struct.calcsize(">lll")
1490 1496 oldlen = self.rawsize(baserev)
1491 1497 newlen = len(delta) - hlen
1492 1498 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1493 1499 raise error.CensoredBaseError(self.indexfile,
1494 1500 self.node(baserev))
1495 1501
1496 1502 flags = REVIDX_DEFAULT_FLAGS
1497 1503 if self._peek_iscensored(baserev, delta, flush):
1498 1504 flags |= REVIDX_ISCENSORED
1499 1505
1500 1506 # We assume consumers of addrevisioncb will want to retrieve
1501 1507 # the added revision, which will require a call to
1502 1508 # revision(). revision() will fast path if there is a cache
1503 1509 # hit. So, we tell _addrevision() to always cache in this case.
1504 1510 chain = self._addrevision(node, None, transaction, link,
1505 1511 p1, p2, flags, (baserev, delta),
1506 1512 ifh, dfh,
1507 1513 alwayscache=bool(addrevisioncb))
1508 1514
1509 1515 if addrevisioncb:
1510 1516 addrevisioncb(self, chain)
1511 1517
1512 1518 if not dfh and not self._inline:
1513 1519 # addrevision switched from inline to conventional
1514 1520 # reopen the index
1515 1521 ifh.close()
1516 1522 dfh = self.opener(self.datafile, "a")
1517 1523 ifh = self.opener(self.indexfile, "a")
1518 1524 finally:
1519 1525 if dfh:
1520 1526 dfh.close()
1521 1527 ifh.close()
1522 1528
1523 1529 return content
1524 1530
1525 1531 def iscensored(self, rev):
1526 1532 """Check if a file revision is censored."""
1527 1533 return False
1528 1534
1529 1535 def _peek_iscensored(self, baserev, delta, flush):
1530 1536 """Quickly check if a delta produces a censored revision."""
1531 1537 return False
1532 1538
1533 1539 def getstrippoint(self, minlink):
1534 1540 """find the minimum rev that must be stripped to strip the linkrev
1535 1541
1536 1542 Returns a tuple containing the minimum rev and a set of all revs that
1537 1543 have linkrevs that will be broken by this strip.
1538 1544 """
1539 1545 brokenrevs = set()
1540 1546 strippoint = len(self)
1541 1547
1542 1548 heads = {}
1543 1549 futurelargelinkrevs = set()
1544 1550 for head in self.headrevs():
1545 1551 headlinkrev = self.linkrev(head)
1546 1552 heads[head] = headlinkrev
1547 1553 if headlinkrev >= minlink:
1548 1554 futurelargelinkrevs.add(headlinkrev)
1549 1555
1550 1556 # This algorithm involves walking down the rev graph, starting at the
1551 1557 # heads. Since the revs are topologically sorted according to linkrev,
1552 1558 # once all head linkrevs are below the minlink, we know there are
1553 1559 # no more revs that could have a linkrev greater than minlink.
1554 1560 # So we can stop walking.
1555 1561 while futurelargelinkrevs:
1556 1562 strippoint -= 1
1557 1563 linkrev = heads.pop(strippoint)
1558 1564
1559 1565 if linkrev < minlink:
1560 1566 brokenrevs.add(strippoint)
1561 1567 else:
1562 1568 futurelargelinkrevs.remove(linkrev)
1563 1569
1564 1570 for p in self.parentrevs(strippoint):
1565 1571 if p != nullrev:
1566 1572 plinkrev = self.linkrev(p)
1567 1573 heads[p] = plinkrev
1568 1574 if plinkrev >= minlink:
1569 1575 futurelargelinkrevs.add(plinkrev)
1570 1576
1571 1577 return strippoint, brokenrevs
1572 1578
1573 1579 def strip(self, minlink, transaction):
1574 1580 """truncate the revlog on the first revision with a linkrev >= minlink
1575 1581
1576 1582 This function is called when we're stripping revision minlink and
1577 1583 its descendants from the repository.
1578 1584
1579 1585 We have to remove all revisions with linkrev >= minlink, because
1580 1586 the equivalent changelog revisions will be renumbered after the
1581 1587 strip.
1582 1588
1583 1589 So we truncate the revlog on the first of these revisions, and
1584 1590 trust that the caller has saved the revisions that shouldn't be
1585 1591 removed and that it'll re-add them after this truncation.
1586 1592 """
1587 1593 if len(self) == 0:
1588 1594 return
1589 1595
1590 1596 rev, _ = self.getstrippoint(minlink)
1591 1597 if rev == len(self):
1592 1598 return
1593 1599
1594 1600 # first truncate the files on disk
1595 1601 end = self.start(rev)
1596 1602 if not self._inline:
1597 1603 transaction.add(self.datafile, end)
1598 1604 end = rev * self._io.size
1599 1605 else:
1600 1606 end += rev * self._io.size
1601 1607
1602 1608 transaction.add(self.indexfile, end)
1603 1609
1604 1610 # then reset internal state in memory to forget those revisions
1605 1611 self._cache = None
1606 1612 self._chaininfocache = {}
1607 1613 self._chunkclear()
1608 1614 for x in xrange(rev, len(self)):
1609 1615 del self.nodemap[self.node(x)]
1610 1616
1611 1617 del self.index[rev:-1]
1612 1618
1613 1619 def checksize(self):
1614 1620 expected = 0
1615 1621 if len(self):
1616 1622 expected = max(0, self.end(len(self) - 1))
1617 1623
1618 1624 try:
1619 1625 f = self.opener(self.datafile)
1620 1626 f.seek(0, 2)
1621 1627 actual = f.tell()
1622 1628 f.close()
1623 1629 dd = actual - expected
1624 1630 except IOError as inst:
1625 1631 if inst.errno != errno.ENOENT:
1626 1632 raise
1627 1633 dd = 0
1628 1634
1629 1635 try:
1630 1636 f = self.opener(self.indexfile)
1631 1637 f.seek(0, 2)
1632 1638 actual = f.tell()
1633 1639 f.close()
1634 1640 s = self._io.size
1635 1641 i = max(0, actual // s)
1636 1642 di = actual - (i * s)
1637 1643 if self._inline:
1638 1644 databytes = 0
1639 1645 for r in self:
1640 1646 databytes += max(0, self.length(r))
1641 1647 dd = 0
1642 1648 di = actual - len(self) * s - databytes
1643 1649 except IOError as inst:
1644 1650 if inst.errno != errno.ENOENT:
1645 1651 raise
1646 1652 di = 0
1647 1653
1648 1654 return (dd, di)
1649 1655
1650 1656 def files(self):
1651 1657 res = [self.indexfile]
1652 1658 if not self._inline:
1653 1659 res.append(self.datafile)
1654 1660 return res
General Comments 0
You need to be logged in to leave comments. Login now