##// END OF EJS Templates
revlog: optimize _chunkraw when startrev==endrev...
Gregory Szorc -
r30289:1f92056c default
parent child Browse files
Show More
@@ -1,1817 +1,1820
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import errno
18 18 import hashlib
19 19 import os
20 20 import struct
21 21 import zlib
22 22
23 23 # import stuff from node for others to import from revlog
24 24 from .node import (
25 25 bin,
26 26 hex,
27 27 nullid,
28 28 nullrev,
29 29 )
30 30 from .i18n import _
31 31 from . import (
32 32 ancestor,
33 33 error,
34 34 mdiff,
35 35 parsers,
36 36 templatefilters,
37 37 util,
38 38 )
39 39
40 40 _pack = struct.pack
41 41 _unpack = struct.unpack
42 42 _compress = zlib.compress
43 43 _decompress = zlib.decompress
44 44
45 45 # revlog header flags
46 46 REVLOGV0 = 0
47 47 REVLOGNG = 1
48 48 REVLOGNGINLINEDATA = (1 << 16)
49 49 REVLOGGENERALDELTA = (1 << 17)
50 50 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
51 51 REVLOG_DEFAULT_FORMAT = REVLOGNG
52 52 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
53 53 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
54 54
55 55 # revlog index flags
56 56 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
57 57 REVIDX_DEFAULT_FLAGS = 0
58 58 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
59 59
60 60 # max size of revlog with inline data
61 61 _maxinline = 131072
62 62 _chunksize = 1048576
63 63
64 64 RevlogError = error.RevlogError
65 65 LookupError = error.LookupError
66 66 CensoredNodeError = error.CensoredNodeError
67 67
68 68 def getoffset(q):
69 69 return int(q >> 16)
70 70
71 71 def gettype(q):
72 72 return int(q & 0xFFFF)
73 73
74 74 def offset_type(offset, type):
75 75 return long(long(offset) << 16 | type)
76 76
77 77 _nullhash = hashlib.sha1(nullid)
78 78
79 79 def hash(text, p1, p2):
80 80 """generate a hash from the given text and its parent hashes
81 81
82 82 This hash combines both the current file contents and its history
83 83 in a manner that makes it easy to distinguish nodes with the same
84 84 content in the revision graph.
85 85 """
86 86 # As of now, if one of the parent node is null, p2 is null
87 87 if p2 == nullid:
88 88 # deep copy of a hash is faster than creating one
89 89 s = _nullhash.copy()
90 90 s.update(p1)
91 91 else:
92 92 # none of the parent nodes are nullid
93 93 l = [p1, p2]
94 94 l.sort()
95 95 s = hashlib.sha1(l[0])
96 96 s.update(l[1])
97 97 s.update(text)
98 98 return s.digest()
99 99
100 100 def decompress(bin):
101 101 """ decompress the given input """
102 102 if not bin:
103 103 return bin
104 104 t = bin[0]
105 105 if t == '\0':
106 106 return bin
107 107 if t == 'x':
108 108 try:
109 109 return _decompress(bin)
110 110 except zlib.error as e:
111 111 raise RevlogError(_("revlog decompress error: %s") % str(e))
112 112 if t == 'u':
113 113 return util.buffer(bin, 1)
114 114 raise RevlogError(_("unknown compression type %r") % t)
115 115
116 116 # index v0:
117 117 # 4 bytes: offset
118 118 # 4 bytes: compressed length
119 119 # 4 bytes: base rev
120 120 # 4 bytes: link rev
121 121 # 20 bytes: parent 1 nodeid
122 122 # 20 bytes: parent 2 nodeid
123 123 # 20 bytes: nodeid
124 124 indexformatv0 = ">4l20s20s20s"
125 125
126 126 class revlogoldio(object):
127 127 def __init__(self):
128 128 self.size = struct.calcsize(indexformatv0)
129 129
130 130 def parseindex(self, data, inline):
131 131 s = self.size
132 132 index = []
133 133 nodemap = {nullid: nullrev}
134 134 n = off = 0
135 135 l = len(data)
136 136 while off + s <= l:
137 137 cur = data[off:off + s]
138 138 off += s
139 139 e = _unpack(indexformatv0, cur)
140 140 # transform to revlogv1 format
141 141 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
142 142 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
143 143 index.append(e2)
144 144 nodemap[e[6]] = n
145 145 n += 1
146 146
147 147 # add the magic null revision at -1
148 148 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
149 149
150 150 return index, nodemap, None
151 151
152 152 def packentry(self, entry, node, version, rev):
153 153 if gettype(entry[0]):
154 154 raise RevlogError(_("index entry flags need RevlogNG"))
155 155 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
156 156 node(entry[5]), node(entry[6]), entry[7])
157 157 return _pack(indexformatv0, *e2)
158 158
159 159 # index ng:
160 160 # 6 bytes: offset
161 161 # 2 bytes: flags
162 162 # 4 bytes: compressed length
163 163 # 4 bytes: uncompressed length
164 164 # 4 bytes: base rev
165 165 # 4 bytes: link rev
166 166 # 4 bytes: parent 1 rev
167 167 # 4 bytes: parent 2 rev
168 168 # 32 bytes: nodeid
169 169 indexformatng = ">Qiiiiii20s12x"
170 170 versionformat = ">I"
171 171
172 172 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
173 173 # signed integer)
174 174 _maxentrysize = 0x7fffffff
175 175
176 176 class revlogio(object):
177 177 def __init__(self):
178 178 self.size = struct.calcsize(indexformatng)
179 179
180 180 def parseindex(self, data, inline):
181 181 # call the C implementation to parse the index data
182 182 index, cache = parsers.parse_index2(data, inline)
183 183 return index, getattr(index, 'nodemap', None), cache
184 184
185 185 def packentry(self, entry, node, version, rev):
186 186 p = _pack(indexformatng, *entry)
187 187 if rev == 0:
188 188 p = _pack(versionformat, version) + p[4:]
189 189 return p
190 190
191 191 class revlog(object):
192 192 """
193 193 the underlying revision storage object
194 194
195 195 A revlog consists of two parts, an index and the revision data.
196 196
197 197 The index is a file with a fixed record size containing
198 198 information on each revision, including its nodeid (hash), the
199 199 nodeids of its parents, the position and offset of its data within
200 200 the data file, and the revision it's based on. Finally, each entry
201 201 contains a linkrev entry that can serve as a pointer to external
202 202 data.
203 203
204 204 The revision data itself is a linear collection of data chunks.
205 205 Each chunk represents a revision and is usually represented as a
206 206 delta against the previous chunk. To bound lookup time, runs of
207 207 deltas are limited to about 2 times the length of the original
208 208 version data. This makes retrieval of a version proportional to
209 209 its size, or O(1) relative to the number of revisions.
210 210
211 211 Both pieces of the revlog are written to in an append-only
212 212 fashion, which means we never need to rewrite a file to insert or
213 213 remove data, and can use some simple techniques to avoid the need
214 214 for locking while reading.
215 215
216 216 If checkambig, indexfile is opened with checkambig=True at
217 217 writing, to avoid file stat ambiguity.
218 218 """
219 219 def __init__(self, opener, indexfile, checkambig=False):
220 220 """
221 221 create a revlog object
222 222
223 223 opener is a function that abstracts the file opening operation
224 224 and can be used to implement COW semantics or the like.
225 225 """
226 226 self.indexfile = indexfile
227 227 self.datafile = indexfile[:-2] + ".d"
228 228 self.opener = opener
229 229 # When True, indexfile is opened with checkambig=True at writing, to
230 230 # avoid file stat ambiguity.
231 231 self._checkambig = checkambig
232 232 # 3-tuple of (node, rev, text) for a raw revision.
233 233 self._cache = None
234 234 # Maps rev to chain base rev.
235 235 self._chainbasecache = util.lrucachedict(100)
236 236 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
237 237 self._chunkcache = (0, '')
238 238 # How much data to read and cache into the raw revlog data cache.
239 239 self._chunkcachesize = 65536
240 240 self._maxchainlen = None
241 241 self._aggressivemergedeltas = False
242 242 self.index = []
243 243 # Mapping of partial identifiers to full nodes.
244 244 self._pcache = {}
245 245 # Mapping of revision integer to full node.
246 246 self._nodecache = {nullid: nullrev}
247 247 self._nodepos = None
248 248
249 249 v = REVLOG_DEFAULT_VERSION
250 250 opts = getattr(opener, 'options', None)
251 251 if opts is not None:
252 252 if 'revlogv1' in opts:
253 253 if 'generaldelta' in opts:
254 254 v |= REVLOGGENERALDELTA
255 255 else:
256 256 v = 0
257 257 if 'chunkcachesize' in opts:
258 258 self._chunkcachesize = opts['chunkcachesize']
259 259 if 'maxchainlen' in opts:
260 260 self._maxchainlen = opts['maxchainlen']
261 261 if 'aggressivemergedeltas' in opts:
262 262 self._aggressivemergedeltas = opts['aggressivemergedeltas']
263 263 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
264 264
265 265 if self._chunkcachesize <= 0:
266 266 raise RevlogError(_('revlog chunk cache size %r is not greater '
267 267 'than 0') % self._chunkcachesize)
268 268 elif self._chunkcachesize & (self._chunkcachesize - 1):
269 269 raise RevlogError(_('revlog chunk cache size %r is not a power '
270 270 'of 2') % self._chunkcachesize)
271 271
272 272 indexdata = ''
273 273 self._initempty = True
274 274 try:
275 275 f = self.opener(self.indexfile)
276 276 indexdata = f.read()
277 277 f.close()
278 278 if len(indexdata) > 0:
279 279 v = struct.unpack(versionformat, indexdata[:4])[0]
280 280 self._initempty = False
281 281 except IOError as inst:
282 282 if inst.errno != errno.ENOENT:
283 283 raise
284 284
285 285 self.version = v
286 286 self._inline = v & REVLOGNGINLINEDATA
287 287 self._generaldelta = v & REVLOGGENERALDELTA
288 288 flags = v & ~0xFFFF
289 289 fmt = v & 0xFFFF
290 290 if fmt == REVLOGV0 and flags:
291 291 raise RevlogError(_("index %s unknown flags %#04x for format v0")
292 292 % (self.indexfile, flags >> 16))
293 293 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
294 294 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
295 295 % (self.indexfile, flags >> 16))
296 296 elif fmt > REVLOGNG:
297 297 raise RevlogError(_("index %s unknown format %d")
298 298 % (self.indexfile, fmt))
299 299
300 300 self.storedeltachains = True
301 301
302 302 self._io = revlogio()
303 303 if self.version == REVLOGV0:
304 304 self._io = revlogoldio()
305 305 try:
306 306 d = self._io.parseindex(indexdata, self._inline)
307 307 except (ValueError, IndexError):
308 308 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
309 309 self.index, nodemap, self._chunkcache = d
310 310 if nodemap is not None:
311 311 self.nodemap = self._nodecache = nodemap
312 312 if not self._chunkcache:
313 313 self._chunkclear()
314 314 # revnum -> (chain-length, sum-delta-length)
315 315 self._chaininfocache = {}
316 316
317 317 def tip(self):
318 318 return self.node(len(self.index) - 2)
319 319 def __contains__(self, rev):
320 320 return 0 <= rev < len(self)
321 321 def __len__(self):
322 322 return len(self.index) - 1
323 323 def __iter__(self):
324 324 return iter(xrange(len(self)))
325 325 def revs(self, start=0, stop=None):
326 326 """iterate over all rev in this revlog (from start to stop)"""
327 327 step = 1
328 328 if stop is not None:
329 329 if start > stop:
330 330 step = -1
331 331 stop += step
332 332 else:
333 333 stop = len(self)
334 334 return xrange(start, stop, step)
335 335
336 336 @util.propertycache
337 337 def nodemap(self):
338 338 self.rev(self.node(0))
339 339 return self._nodecache
340 340
341 341 def hasnode(self, node):
342 342 try:
343 343 self.rev(node)
344 344 return True
345 345 except KeyError:
346 346 return False
347 347
348 348 def clearcaches(self):
349 349 self._cache = None
350 350 self._chainbasecache.clear()
351 351 self._chunkcache = (0, '')
352 352 self._pcache = {}
353 353
354 354 try:
355 355 self._nodecache.clearcaches()
356 356 except AttributeError:
357 357 self._nodecache = {nullid: nullrev}
358 358 self._nodepos = None
359 359
360 360 def rev(self, node):
361 361 try:
362 362 return self._nodecache[node]
363 363 except TypeError:
364 364 raise
365 365 except RevlogError:
366 366 # parsers.c radix tree lookup failed
367 367 raise LookupError(node, self.indexfile, _('no node'))
368 368 except KeyError:
369 369 # pure python cache lookup failed
370 370 n = self._nodecache
371 371 i = self.index
372 372 p = self._nodepos
373 373 if p is None:
374 374 p = len(i) - 2
375 375 for r in xrange(p, -1, -1):
376 376 v = i[r][7]
377 377 n[v] = r
378 378 if v == node:
379 379 self._nodepos = r - 1
380 380 return r
381 381 raise LookupError(node, self.indexfile, _('no node'))
382 382
383 383 # Accessors for index entries.
384 384
385 385 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
386 386 # are flags.
387 387 def start(self, rev):
388 388 return int(self.index[rev][0] >> 16)
389 389
390 390 def flags(self, rev):
391 391 return self.index[rev][0] & 0xFFFF
392 392
393 393 def length(self, rev):
394 394 return self.index[rev][1]
395 395
396 396 def rawsize(self, rev):
397 397 """return the length of the uncompressed text for a given revision"""
398 398 l = self.index[rev][2]
399 399 if l >= 0:
400 400 return l
401 401
402 402 t = self.revision(self.node(rev))
403 403 return len(t)
404 404 size = rawsize
405 405
406 406 def chainbase(self, rev):
407 407 base = self._chainbasecache.get(rev)
408 408 if base is not None:
409 409 return base
410 410
411 411 index = self.index
412 412 base = index[rev][3]
413 413 while base != rev:
414 414 rev = base
415 415 base = index[rev][3]
416 416
417 417 self._chainbasecache[rev] = base
418 418 return base
419 419
420 420 def linkrev(self, rev):
421 421 return self.index[rev][4]
422 422
423 423 def parentrevs(self, rev):
424 424 return self.index[rev][5:7]
425 425
426 426 def node(self, rev):
427 427 return self.index[rev][7]
428 428
429 429 # Derived from index values.
430 430
431 431 def end(self, rev):
432 432 return self.start(rev) + self.length(rev)
433 433
434 434 def parents(self, node):
435 435 i = self.index
436 436 d = i[self.rev(node)]
437 437 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
438 438
439 439 def chainlen(self, rev):
440 440 return self._chaininfo(rev)[0]
441 441
442 442 def _chaininfo(self, rev):
443 443 chaininfocache = self._chaininfocache
444 444 if rev in chaininfocache:
445 445 return chaininfocache[rev]
446 446 index = self.index
447 447 generaldelta = self._generaldelta
448 448 iterrev = rev
449 449 e = index[iterrev]
450 450 clen = 0
451 451 compresseddeltalen = 0
452 452 while iterrev != e[3]:
453 453 clen += 1
454 454 compresseddeltalen += e[1]
455 455 if generaldelta:
456 456 iterrev = e[3]
457 457 else:
458 458 iterrev -= 1
459 459 if iterrev in chaininfocache:
460 460 t = chaininfocache[iterrev]
461 461 clen += t[0]
462 462 compresseddeltalen += t[1]
463 463 break
464 464 e = index[iterrev]
465 465 else:
466 466 # Add text length of base since decompressing that also takes
467 467 # work. For cache hits the length is already included.
468 468 compresseddeltalen += e[1]
469 469 r = (clen, compresseddeltalen)
470 470 chaininfocache[rev] = r
471 471 return r
472 472
473 473 def _deltachain(self, rev, stoprev=None):
474 474 """Obtain the delta chain for a revision.
475 475
476 476 ``stoprev`` specifies a revision to stop at. If not specified, we
477 477 stop at the base of the chain.
478 478
479 479 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
480 480 revs in ascending order and ``stopped`` is a bool indicating whether
481 481 ``stoprev`` was hit.
482 482 """
483 483 chain = []
484 484
485 485 # Alias to prevent attribute lookup in tight loop.
486 486 index = self.index
487 487 generaldelta = self._generaldelta
488 488
489 489 iterrev = rev
490 490 e = index[iterrev]
491 491 while iterrev != e[3] and iterrev != stoprev:
492 492 chain.append(iterrev)
493 493 if generaldelta:
494 494 iterrev = e[3]
495 495 else:
496 496 iterrev -= 1
497 497 e = index[iterrev]
498 498
499 499 if iterrev == stoprev:
500 500 stopped = True
501 501 else:
502 502 chain.append(iterrev)
503 503 stopped = False
504 504
505 505 chain.reverse()
506 506 return chain, stopped
507 507
508 508 def ancestors(self, revs, stoprev=0, inclusive=False):
509 509 """Generate the ancestors of 'revs' in reverse topological order.
510 510 Does not generate revs lower than stoprev.
511 511
512 512 See the documentation for ancestor.lazyancestors for more details."""
513 513
514 514 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
515 515 inclusive=inclusive)
516 516
517 517 def descendants(self, revs):
518 518 """Generate the descendants of 'revs' in revision order.
519 519
520 520 Yield a sequence of revision numbers starting with a child of
521 521 some rev in revs, i.e., each revision is *not* considered a
522 522 descendant of itself. Results are ordered by revision number (a
523 523 topological sort)."""
524 524 first = min(revs)
525 525 if first == nullrev:
526 526 for i in self:
527 527 yield i
528 528 return
529 529
530 530 seen = set(revs)
531 531 for i in self.revs(start=first + 1):
532 532 for x in self.parentrevs(i):
533 533 if x != nullrev and x in seen:
534 534 seen.add(i)
535 535 yield i
536 536 break
537 537
538 538 def findcommonmissing(self, common=None, heads=None):
539 539 """Return a tuple of the ancestors of common and the ancestors of heads
540 540 that are not ancestors of common. In revset terminology, we return the
541 541 tuple:
542 542
543 543 ::common, (::heads) - (::common)
544 544
545 545 The list is sorted by revision number, meaning it is
546 546 topologically sorted.
547 547
548 548 'heads' and 'common' are both lists of node IDs. If heads is
549 549 not supplied, uses all of the revlog's heads. If common is not
550 550 supplied, uses nullid."""
551 551 if common is None:
552 552 common = [nullid]
553 553 if heads is None:
554 554 heads = self.heads()
555 555
556 556 common = [self.rev(n) for n in common]
557 557 heads = [self.rev(n) for n in heads]
558 558
559 559 # we want the ancestors, but inclusive
560 560 class lazyset(object):
561 561 def __init__(self, lazyvalues):
562 562 self.addedvalues = set()
563 563 self.lazyvalues = lazyvalues
564 564
565 565 def __contains__(self, value):
566 566 return value in self.addedvalues or value in self.lazyvalues
567 567
568 568 def __iter__(self):
569 569 added = self.addedvalues
570 570 for r in added:
571 571 yield r
572 572 for r in self.lazyvalues:
573 573 if not r in added:
574 574 yield r
575 575
576 576 def add(self, value):
577 577 self.addedvalues.add(value)
578 578
579 579 def update(self, values):
580 580 self.addedvalues.update(values)
581 581
582 582 has = lazyset(self.ancestors(common))
583 583 has.add(nullrev)
584 584 has.update(common)
585 585
586 586 # take all ancestors from heads that aren't in has
587 587 missing = set()
588 588 visit = collections.deque(r for r in heads if r not in has)
589 589 while visit:
590 590 r = visit.popleft()
591 591 if r in missing:
592 592 continue
593 593 else:
594 594 missing.add(r)
595 595 for p in self.parentrevs(r):
596 596 if p not in has:
597 597 visit.append(p)
598 598 missing = list(missing)
599 599 missing.sort()
600 600 return has, [self.node(r) for r in missing]
601 601
602 602 def incrementalmissingrevs(self, common=None):
603 603 """Return an object that can be used to incrementally compute the
604 604 revision numbers of the ancestors of arbitrary sets that are not
605 605 ancestors of common. This is an ancestor.incrementalmissingancestors
606 606 object.
607 607
608 608 'common' is a list of revision numbers. If common is not supplied, uses
609 609 nullrev.
610 610 """
611 611 if common is None:
612 612 common = [nullrev]
613 613
614 614 return ancestor.incrementalmissingancestors(self.parentrevs, common)
615 615
616 616 def findmissingrevs(self, common=None, heads=None):
617 617 """Return the revision numbers of the ancestors of heads that
618 618 are not ancestors of common.
619 619
620 620 More specifically, return a list of revision numbers corresponding to
621 621 nodes N such that every N satisfies the following constraints:
622 622
623 623 1. N is an ancestor of some node in 'heads'
624 624 2. N is not an ancestor of any node in 'common'
625 625
626 626 The list is sorted by revision number, meaning it is
627 627 topologically sorted.
628 628
629 629 'heads' and 'common' are both lists of revision numbers. If heads is
630 630 not supplied, uses all of the revlog's heads. If common is not
631 631 supplied, uses nullid."""
632 632 if common is None:
633 633 common = [nullrev]
634 634 if heads is None:
635 635 heads = self.headrevs()
636 636
637 637 inc = self.incrementalmissingrevs(common=common)
638 638 return inc.missingancestors(heads)
639 639
640 640 def findmissing(self, common=None, heads=None):
641 641 """Return the ancestors of heads that are not ancestors of common.
642 642
643 643 More specifically, return a list of nodes N such that every N
644 644 satisfies the following constraints:
645 645
646 646 1. N is an ancestor of some node in 'heads'
647 647 2. N is not an ancestor of any node in 'common'
648 648
649 649 The list is sorted by revision number, meaning it is
650 650 topologically sorted.
651 651
652 652 'heads' and 'common' are both lists of node IDs. If heads is
653 653 not supplied, uses all of the revlog's heads. If common is not
654 654 supplied, uses nullid."""
655 655 if common is None:
656 656 common = [nullid]
657 657 if heads is None:
658 658 heads = self.heads()
659 659
660 660 common = [self.rev(n) for n in common]
661 661 heads = [self.rev(n) for n in heads]
662 662
663 663 inc = self.incrementalmissingrevs(common=common)
664 664 return [self.node(r) for r in inc.missingancestors(heads)]
665 665
666 666 def nodesbetween(self, roots=None, heads=None):
667 667 """Return a topological path from 'roots' to 'heads'.
668 668
669 669 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
670 670 topologically sorted list of all nodes N that satisfy both of
671 671 these constraints:
672 672
673 673 1. N is a descendant of some node in 'roots'
674 674 2. N is an ancestor of some node in 'heads'
675 675
676 676 Every node is considered to be both a descendant and an ancestor
677 677 of itself, so every reachable node in 'roots' and 'heads' will be
678 678 included in 'nodes'.
679 679
680 680 'outroots' is the list of reachable nodes in 'roots', i.e., the
681 681 subset of 'roots' that is returned in 'nodes'. Likewise,
682 682 'outheads' is the subset of 'heads' that is also in 'nodes'.
683 683
684 684 'roots' and 'heads' are both lists of node IDs. If 'roots' is
685 685 unspecified, uses nullid as the only root. If 'heads' is
686 686 unspecified, uses list of all of the revlog's heads."""
687 687 nonodes = ([], [], [])
688 688 if roots is not None:
689 689 roots = list(roots)
690 690 if not roots:
691 691 return nonodes
692 692 lowestrev = min([self.rev(n) for n in roots])
693 693 else:
694 694 roots = [nullid] # Everybody's a descendant of nullid
695 695 lowestrev = nullrev
696 696 if (lowestrev == nullrev) and (heads is None):
697 697 # We want _all_ the nodes!
698 698 return ([self.node(r) for r in self], [nullid], list(self.heads()))
699 699 if heads is None:
700 700 # All nodes are ancestors, so the latest ancestor is the last
701 701 # node.
702 702 highestrev = len(self) - 1
703 703 # Set ancestors to None to signal that every node is an ancestor.
704 704 ancestors = None
705 705 # Set heads to an empty dictionary for later discovery of heads
706 706 heads = {}
707 707 else:
708 708 heads = list(heads)
709 709 if not heads:
710 710 return nonodes
711 711 ancestors = set()
712 712 # Turn heads into a dictionary so we can remove 'fake' heads.
713 713 # Also, later we will be using it to filter out the heads we can't
714 714 # find from roots.
715 715 heads = dict.fromkeys(heads, False)
716 716 # Start at the top and keep marking parents until we're done.
717 717 nodestotag = set(heads)
718 718 # Remember where the top was so we can use it as a limit later.
719 719 highestrev = max([self.rev(n) for n in nodestotag])
720 720 while nodestotag:
721 721 # grab a node to tag
722 722 n = nodestotag.pop()
723 723 # Never tag nullid
724 724 if n == nullid:
725 725 continue
726 726 # A node's revision number represents its place in a
727 727 # topologically sorted list of nodes.
728 728 r = self.rev(n)
729 729 if r >= lowestrev:
730 730 if n not in ancestors:
731 731 # If we are possibly a descendant of one of the roots
732 732 # and we haven't already been marked as an ancestor
733 733 ancestors.add(n) # Mark as ancestor
734 734 # Add non-nullid parents to list of nodes to tag.
735 735 nodestotag.update([p for p in self.parents(n) if
736 736 p != nullid])
737 737 elif n in heads: # We've seen it before, is it a fake head?
738 738 # So it is, real heads should not be the ancestors of
739 739 # any other heads.
740 740 heads.pop(n)
741 741 if not ancestors:
742 742 return nonodes
743 743 # Now that we have our set of ancestors, we want to remove any
744 744 # roots that are not ancestors.
745 745
746 746 # If one of the roots was nullid, everything is included anyway.
747 747 if lowestrev > nullrev:
748 748 # But, since we weren't, let's recompute the lowest rev to not
749 749 # include roots that aren't ancestors.
750 750
751 751 # Filter out roots that aren't ancestors of heads
752 752 roots = [n for n in roots if n in ancestors]
753 753 # Recompute the lowest revision
754 754 if roots:
755 755 lowestrev = min([self.rev(n) for n in roots])
756 756 else:
757 757 # No more roots? Return empty list
758 758 return nonodes
759 759 else:
760 760 # We are descending from nullid, and don't need to care about
761 761 # any other roots.
762 762 lowestrev = nullrev
763 763 roots = [nullid]
764 764 # Transform our roots list into a set.
765 765 descendants = set(roots)
766 766 # Also, keep the original roots so we can filter out roots that aren't
767 767 # 'real' roots (i.e. are descended from other roots).
768 768 roots = descendants.copy()
769 769 # Our topologically sorted list of output nodes.
770 770 orderedout = []
771 771 # Don't start at nullid since we don't want nullid in our output list,
772 772 # and if nullid shows up in descendants, empty parents will look like
773 773 # they're descendants.
774 774 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
775 775 n = self.node(r)
776 776 isdescendant = False
777 777 if lowestrev == nullrev: # Everybody is a descendant of nullid
778 778 isdescendant = True
779 779 elif n in descendants:
780 780 # n is already a descendant
781 781 isdescendant = True
782 782 # This check only needs to be done here because all the roots
783 783 # will start being marked is descendants before the loop.
784 784 if n in roots:
785 785 # If n was a root, check if it's a 'real' root.
786 786 p = tuple(self.parents(n))
787 787 # If any of its parents are descendants, it's not a root.
788 788 if (p[0] in descendants) or (p[1] in descendants):
789 789 roots.remove(n)
790 790 else:
791 791 p = tuple(self.parents(n))
792 792 # A node is a descendant if either of its parents are
793 793 # descendants. (We seeded the dependents list with the roots
794 794 # up there, remember?)
795 795 if (p[0] in descendants) or (p[1] in descendants):
796 796 descendants.add(n)
797 797 isdescendant = True
798 798 if isdescendant and ((ancestors is None) or (n in ancestors)):
799 799 # Only include nodes that are both descendants and ancestors.
800 800 orderedout.append(n)
801 801 if (ancestors is not None) and (n in heads):
802 802 # We're trying to figure out which heads are reachable
803 803 # from roots.
804 804 # Mark this head as having been reached
805 805 heads[n] = True
806 806 elif ancestors is None:
807 807 # Otherwise, we're trying to discover the heads.
808 808 # Assume this is a head because if it isn't, the next step
809 809 # will eventually remove it.
810 810 heads[n] = True
811 811 # But, obviously its parents aren't.
812 812 for p in self.parents(n):
813 813 heads.pop(p, None)
814 814 heads = [n for n, flag in heads.iteritems() if flag]
815 815 roots = list(roots)
816 816 assert orderedout
817 817 assert roots
818 818 assert heads
819 819 return (orderedout, roots, heads)
820 820
821 821 def headrevs(self):
822 822 try:
823 823 return self.index.headrevs()
824 824 except AttributeError:
825 825 return self._headrevs()
826 826
827 827 def computephases(self, roots):
828 828 return self.index.computephasesmapsets(roots)
829 829
830 830 def _headrevs(self):
831 831 count = len(self)
832 832 if not count:
833 833 return [nullrev]
834 834 # we won't iter over filtered rev so nobody is a head at start
835 835 ishead = [0] * (count + 1)
836 836 index = self.index
837 837 for r in self:
838 838 ishead[r] = 1 # I may be an head
839 839 e = index[r]
840 840 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
841 841 return [r for r, val in enumerate(ishead) if val]
842 842
843 843 def heads(self, start=None, stop=None):
844 844 """return the list of all nodes that have no children
845 845
846 846 if start is specified, only heads that are descendants of
847 847 start will be returned
848 848 if stop is specified, it will consider all the revs from stop
849 849 as if they had no children
850 850 """
851 851 if start is None and stop is None:
852 852 if not len(self):
853 853 return [nullid]
854 854 return [self.node(r) for r in self.headrevs()]
855 855
856 856 if start is None:
857 857 start = nullid
858 858 if stop is None:
859 859 stop = []
860 860 stoprevs = set([self.rev(n) for n in stop])
861 861 startrev = self.rev(start)
862 862 reachable = set((startrev,))
863 863 heads = set((startrev,))
864 864
865 865 parentrevs = self.parentrevs
866 866 for r in self.revs(start=startrev + 1):
867 867 for p in parentrevs(r):
868 868 if p in reachable:
869 869 if r not in stoprevs:
870 870 reachable.add(r)
871 871 heads.add(r)
872 872 if p in heads and p not in stoprevs:
873 873 heads.remove(p)
874 874
875 875 return [self.node(r) for r in heads]
876 876
877 877 def children(self, node):
878 878 """find the children of a given node"""
879 879 c = []
880 880 p = self.rev(node)
881 881 for r in self.revs(start=p + 1):
882 882 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
883 883 if prevs:
884 884 for pr in prevs:
885 885 if pr == p:
886 886 c.append(self.node(r))
887 887 elif p == nullrev:
888 888 c.append(self.node(r))
889 889 return c
890 890
891 891 def descendant(self, start, end):
892 892 if start == nullrev:
893 893 return True
894 894 for i in self.descendants([start]):
895 895 if i == end:
896 896 return True
897 897 elif i > end:
898 898 break
899 899 return False
900 900
901 901 def commonancestorsheads(self, a, b):
902 902 """calculate all the heads of the common ancestors of nodes a and b"""
903 903 a, b = self.rev(a), self.rev(b)
904 904 try:
905 905 ancs = self.index.commonancestorsheads(a, b)
906 906 except (AttributeError, OverflowError): # C implementation failed
907 907 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
908 908 return map(self.node, ancs)
909 909
910 910 def isancestor(self, a, b):
911 911 """return True if node a is an ancestor of node b
912 912
913 913 The implementation of this is trivial but the use of
914 914 commonancestorsheads is not."""
915 915 return a in self.commonancestorsheads(a, b)
916 916
917 917 def ancestor(self, a, b):
918 918 """calculate the "best" common ancestor of nodes a and b"""
919 919
920 920 a, b = self.rev(a), self.rev(b)
921 921 try:
922 922 ancs = self.index.ancestors(a, b)
923 923 except (AttributeError, OverflowError):
924 924 ancs = ancestor.ancestors(self.parentrevs, a, b)
925 925 if ancs:
926 926 # choose a consistent winner when there's a tie
927 927 return min(map(self.node, ancs))
928 928 return nullid
929 929
930 930 def _match(self, id):
931 931 if isinstance(id, int):
932 932 # rev
933 933 return self.node(id)
934 934 if len(id) == 20:
935 935 # possibly a binary node
936 936 # odds of a binary node being all hex in ASCII are 1 in 10**25
937 937 try:
938 938 node = id
939 939 self.rev(node) # quick search the index
940 940 return node
941 941 except LookupError:
942 942 pass # may be partial hex id
943 943 try:
944 944 # str(rev)
945 945 rev = int(id)
946 946 if str(rev) != id:
947 947 raise ValueError
948 948 if rev < 0:
949 949 rev = len(self) + rev
950 950 if rev < 0 or rev >= len(self):
951 951 raise ValueError
952 952 return self.node(rev)
953 953 except (ValueError, OverflowError):
954 954 pass
955 955 if len(id) == 40:
956 956 try:
957 957 # a full hex nodeid?
958 958 node = bin(id)
959 959 self.rev(node)
960 960 return node
961 961 except (TypeError, LookupError):
962 962 pass
963 963
964 964 def _partialmatch(self, id):
965 965 try:
966 966 n = self.index.partialmatch(id)
967 967 if n and self.hasnode(n):
968 968 return n
969 969 return None
970 970 except RevlogError:
971 971 # parsers.c radix tree lookup gave multiple matches
972 972 # fast path: for unfiltered changelog, radix tree is accurate
973 973 if not getattr(self, 'filteredrevs', None):
974 974 raise LookupError(id, self.indexfile,
975 975 _('ambiguous identifier'))
976 976 # fall through to slow path that filters hidden revisions
977 977 except (AttributeError, ValueError):
978 978 # we are pure python, or key was too short to search radix tree
979 979 pass
980 980
981 981 if id in self._pcache:
982 982 return self._pcache[id]
983 983
984 984 if len(id) < 40:
985 985 try:
986 986 # hex(node)[:...]
987 987 l = len(id) // 2 # grab an even number of digits
988 988 prefix = bin(id[:l * 2])
989 989 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
990 990 nl = [n for n in nl if hex(n).startswith(id) and
991 991 self.hasnode(n)]
992 992 if len(nl) > 0:
993 993 if len(nl) == 1:
994 994 self._pcache[id] = nl[0]
995 995 return nl[0]
996 996 raise LookupError(id, self.indexfile,
997 997 _('ambiguous identifier'))
998 998 return None
999 999 except TypeError:
1000 1000 pass
1001 1001
1002 1002 def lookup(self, id):
1003 1003 """locate a node based on:
1004 1004 - revision number or str(revision number)
1005 1005 - nodeid or subset of hex nodeid
1006 1006 """
1007 1007 n = self._match(id)
1008 1008 if n is not None:
1009 1009 return n
1010 1010 n = self._partialmatch(id)
1011 1011 if n:
1012 1012 return n
1013 1013
1014 1014 raise LookupError(id, self.indexfile, _('no match found'))
1015 1015
1016 1016 def cmp(self, node, text):
1017 1017 """compare text with a given file revision
1018 1018
1019 1019 returns True if text is different than what is stored.
1020 1020 """
1021 1021 p1, p2 = self.parents(node)
1022 1022 return hash(text, p1, p2) != node
1023 1023
1024 1024 def _addchunk(self, offset, data):
1025 1025 """Add a segment to the revlog cache.
1026 1026
1027 1027 Accepts an absolute offset and the data that is at that location.
1028 1028 """
1029 1029 o, d = self._chunkcache
1030 1030 # try to add to existing cache
1031 1031 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1032 1032 self._chunkcache = o, d + data
1033 1033 else:
1034 1034 self._chunkcache = offset, data
1035 1035
1036 1036 def _loadchunk(self, offset, length, df=None):
1037 1037 """Load a segment of raw data from the revlog.
1038 1038
1039 1039 Accepts an absolute offset, length to read, and an optional existing
1040 1040 file handle to read from.
1041 1041
1042 1042 If an existing file handle is passed, it will be seeked and the
1043 1043 original seek position will NOT be restored.
1044 1044
1045 1045 Returns a str or buffer of raw byte data.
1046 1046 """
1047 1047 if df is not None:
1048 1048 closehandle = False
1049 1049 else:
1050 1050 if self._inline:
1051 1051 df = self.opener(self.indexfile)
1052 1052 else:
1053 1053 df = self.opener(self.datafile)
1054 1054 closehandle = True
1055 1055
1056 1056 # Cache data both forward and backward around the requested
1057 1057 # data, in a fixed size window. This helps speed up operations
1058 1058 # involving reading the revlog backwards.
1059 1059 cachesize = self._chunkcachesize
1060 1060 realoffset = offset & ~(cachesize - 1)
1061 1061 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1062 1062 - realoffset)
1063 1063 df.seek(realoffset)
1064 1064 d = df.read(reallength)
1065 1065 if closehandle:
1066 1066 df.close()
1067 1067 self._addchunk(realoffset, d)
1068 1068 if offset != realoffset or reallength != length:
1069 1069 return util.buffer(d, offset - realoffset, length)
1070 1070 return d
1071 1071
1072 1072 def _getchunk(self, offset, length, df=None):
1073 1073 """Obtain a segment of raw data from the revlog.
1074 1074
1075 1075 Accepts an absolute offset, length of bytes to obtain, and an
1076 1076 optional file handle to the already-opened revlog. If the file
1077 1077 handle is used, it's original seek position will not be preserved.
1078 1078
1079 1079 Requests for data may be returned from a cache.
1080 1080
1081 1081 Returns a str or a buffer instance of raw byte data.
1082 1082 """
1083 1083 o, d = self._chunkcache
1084 1084 l = len(d)
1085 1085
1086 1086 # is it in the cache?
1087 1087 cachestart = offset - o
1088 1088 cacheend = cachestart + length
1089 1089 if cachestart >= 0 and cacheend <= l:
1090 1090 if cachestart == 0 and cacheend == l:
1091 1091 return d # avoid a copy
1092 1092 return util.buffer(d, cachestart, cacheend - cachestart)
1093 1093
1094 1094 return self._loadchunk(offset, length, df=df)
1095 1095
1096 1096 def _chunkraw(self, startrev, endrev, df=None):
1097 1097 """Obtain a segment of raw data corresponding to a range of revisions.
1098 1098
1099 1099 Accepts the start and end revisions and an optional already-open
1100 1100 file handle to be used for reading. If the file handle is read, its
1101 1101 seek position will not be preserved.
1102 1102
1103 1103 Requests for data may be satisfied by a cache.
1104 1104
1105 1105 Returns a 2-tuple of (offset, data) for the requested range of
1106 1106 revisions. Offset is the integer offset from the beginning of the
1107 1107 revlog and data is a str or buffer of the raw byte data.
1108 1108
1109 1109 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1110 1110 to determine where each revision's data begins and ends.
1111 1111 """
1112 1112 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1113 1113 # (functions are expensive).
1114 1114 index = self.index
1115 1115 istart = index[startrev]
1116 iend = index[endrev]
1117 1116 start = int(istart[0] >> 16)
1118 end = int(iend[0] >> 16) + iend[1]
1117 if startrev == endrev:
1118 end = start + istart[1]
1119 else:
1120 iend = index[endrev]
1121 end = int(iend[0] >> 16) + iend[1]
1119 1122
1120 1123 if self._inline:
1121 1124 start += (startrev + 1) * self._io.size
1122 1125 end += (endrev + 1) * self._io.size
1123 1126 length = end - start
1124 1127
1125 1128 return start, self._getchunk(start, length, df=df)
1126 1129
1127 1130 def _chunk(self, rev, df=None):
1128 1131 """Obtain a single decompressed chunk for a revision.
1129 1132
1130 1133 Accepts an integer revision and an optional already-open file handle
1131 1134 to be used for reading. If used, the seek position of the file will not
1132 1135 be preserved.
1133 1136
1134 1137 Returns a str holding uncompressed data for the requested revision.
1135 1138 """
1136 1139 return decompress(self._chunkraw(rev, rev, df=df)[1])
1137 1140
1138 1141 def _chunks(self, revs, df=None):
1139 1142 """Obtain decompressed chunks for the specified revisions.
1140 1143
1141 1144 Accepts an iterable of numeric revisions that are assumed to be in
1142 1145 ascending order. Also accepts an optional already-open file handle
1143 1146 to be used for reading. If used, the seek position of the file will
1144 1147 not be preserved.
1145 1148
1146 1149 This function is similar to calling ``self._chunk()`` multiple times,
1147 1150 but is faster.
1148 1151
1149 1152 Returns a list with decompressed data for each requested revision.
1150 1153 """
1151 1154 if not revs:
1152 1155 return []
1153 1156 start = self.start
1154 1157 length = self.length
1155 1158 inline = self._inline
1156 1159 iosize = self._io.size
1157 1160 buffer = util.buffer
1158 1161
1159 1162 l = []
1160 1163 ladd = l.append
1161 1164
1162 1165 try:
1163 1166 offset, data = self._chunkraw(revs[0], revs[-1], df=df)
1164 1167 except OverflowError:
1165 1168 # issue4215 - we can't cache a run of chunks greater than
1166 1169 # 2G on Windows
1167 1170 return [self._chunk(rev, df=df) for rev in revs]
1168 1171
1169 1172 for rev in revs:
1170 1173 chunkstart = start(rev)
1171 1174 if inline:
1172 1175 chunkstart += (rev + 1) * iosize
1173 1176 chunklength = length(rev)
1174 1177 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1175 1178
1176 1179 return l
1177 1180
1178 1181 def _chunkclear(self):
1179 1182 """Clear the raw chunk cache."""
1180 1183 self._chunkcache = (0, '')
1181 1184
1182 1185 def deltaparent(self, rev):
1183 1186 """return deltaparent of the given revision"""
1184 1187 base = self.index[rev][3]
1185 1188 if base == rev:
1186 1189 return nullrev
1187 1190 elif self._generaldelta:
1188 1191 return base
1189 1192 else:
1190 1193 return rev - 1
1191 1194
1192 1195 def revdiff(self, rev1, rev2):
1193 1196 """return or calculate a delta between two revisions"""
1194 1197 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1195 1198 return str(self._chunk(rev2))
1196 1199
1197 1200 return mdiff.textdiff(self.revision(rev1),
1198 1201 self.revision(rev2))
1199 1202
1200 1203 def revision(self, nodeorrev, _df=None):
1201 1204 """return an uncompressed revision of a given node or revision
1202 1205 number.
1203 1206
1204 1207 _df is an existing file handle to read from. It is meant to only be
1205 1208 used internally.
1206 1209 """
1207 1210 if isinstance(nodeorrev, int):
1208 1211 rev = nodeorrev
1209 1212 node = self.node(rev)
1210 1213 else:
1211 1214 node = nodeorrev
1212 1215 rev = None
1213 1216
1214 1217 cachedrev = None
1215 1218 if node == nullid:
1216 1219 return ""
1217 1220 if self._cache:
1218 1221 if self._cache[0] == node:
1219 1222 return self._cache[2]
1220 1223 cachedrev = self._cache[1]
1221 1224
1222 1225 # look up what we need to read
1223 1226 text = None
1224 1227 if rev is None:
1225 1228 rev = self.rev(node)
1226 1229
1227 1230 # check rev flags
1228 1231 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1229 1232 raise RevlogError(_('incompatible revision flag %x') %
1230 1233 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1231 1234
1232 1235 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1233 1236 if stopped:
1234 1237 text = self._cache[2]
1235 1238
1236 1239 # drop cache to save memory
1237 1240 self._cache = None
1238 1241
1239 1242 bins = self._chunks(chain, df=_df)
1240 1243 if text is None:
1241 1244 text = str(bins[0])
1242 1245 bins = bins[1:]
1243 1246
1244 1247 text = mdiff.patches(text, bins)
1245 1248
1246 1249 text = self._checkhash(text, node, rev)
1247 1250
1248 1251 self._cache = (node, rev, text)
1249 1252 return text
1250 1253
1251 1254 def hash(self, text, p1, p2):
1252 1255 """Compute a node hash.
1253 1256
1254 1257 Available as a function so that subclasses can replace the hash
1255 1258 as needed.
1256 1259 """
1257 1260 return hash(text, p1, p2)
1258 1261
1259 1262 def _checkhash(self, text, node, rev):
1260 1263 p1, p2 = self.parents(node)
1261 1264 self.checkhash(text, p1, p2, node, rev)
1262 1265 return text
1263 1266
1264 1267 def checkhash(self, text, p1, p2, node, rev=None):
1265 1268 if node != self.hash(text, p1, p2):
1266 1269 revornode = rev
1267 1270 if revornode is None:
1268 1271 revornode = templatefilters.short(hex(node))
1269 1272 raise RevlogError(_("integrity check failed on %s:%s")
1270 1273 % (self.indexfile, revornode))
1271 1274
1272 1275 def checkinlinesize(self, tr, fp=None):
1273 1276 """Check if the revlog is too big for inline and convert if so.
1274 1277
1275 1278 This should be called after revisions are added to the revlog. If the
1276 1279 revlog has grown too large to be an inline revlog, it will convert it
1277 1280 to use multiple index and data files.
1278 1281 """
1279 1282 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1280 1283 return
1281 1284
1282 1285 trinfo = tr.find(self.indexfile)
1283 1286 if trinfo is None:
1284 1287 raise RevlogError(_("%s not found in the transaction")
1285 1288 % self.indexfile)
1286 1289
1287 1290 trindex = trinfo[2]
1288 1291 if trindex is not None:
1289 1292 dataoff = self.start(trindex)
1290 1293 else:
1291 1294 # revlog was stripped at start of transaction, use all leftover data
1292 1295 trindex = len(self) - 1
1293 1296 dataoff = self.end(-2)
1294 1297
1295 1298 tr.add(self.datafile, dataoff)
1296 1299
1297 1300 if fp:
1298 1301 fp.flush()
1299 1302 fp.close()
1300 1303
1301 1304 df = self.opener(self.datafile, 'w')
1302 1305 try:
1303 1306 for r in self:
1304 1307 df.write(self._chunkraw(r, r)[1])
1305 1308 finally:
1306 1309 df.close()
1307 1310
1308 1311 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1309 1312 checkambig=self._checkambig)
1310 1313 self.version &= ~(REVLOGNGINLINEDATA)
1311 1314 self._inline = False
1312 1315 for i in self:
1313 1316 e = self._io.packentry(self.index[i], self.node, self.version, i)
1314 1317 fp.write(e)
1315 1318
1316 1319 # if we don't call close, the temp file will never replace the
1317 1320 # real index
1318 1321 fp.close()
1319 1322
1320 1323 tr.replace(self.indexfile, trindex * self._io.size)
1321 1324 self._chunkclear()
1322 1325
1323 1326 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1324 1327 node=None):
1325 1328 """add a revision to the log
1326 1329
1327 1330 text - the revision data to add
1328 1331 transaction - the transaction object used for rollback
1329 1332 link - the linkrev data to add
1330 1333 p1, p2 - the parent nodeids of the revision
1331 1334 cachedelta - an optional precomputed delta
1332 1335 node - nodeid of revision; typically node is not specified, and it is
1333 1336 computed by default as hash(text, p1, p2), however subclasses might
1334 1337 use different hashing method (and override checkhash() in such case)
1335 1338 """
1336 1339 if link == nullrev:
1337 1340 raise RevlogError(_("attempted to add linkrev -1 to %s")
1338 1341 % self.indexfile)
1339 1342
1340 1343 if len(text) > _maxentrysize:
1341 1344 raise RevlogError(
1342 1345 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1343 1346 % (self.indexfile, len(text)))
1344 1347
1345 1348 node = node or self.hash(text, p1, p2)
1346 1349 if node in self.nodemap:
1347 1350 return node
1348 1351
1349 1352 dfh = None
1350 1353 if not self._inline:
1351 1354 dfh = self.opener(self.datafile, "a+")
1352 1355 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1353 1356 try:
1354 1357 return self._addrevision(node, text, transaction, link, p1, p2,
1355 1358 REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
1356 1359 finally:
1357 1360 if dfh:
1358 1361 dfh.close()
1359 1362 ifh.close()
1360 1363
1361 1364 def compress(self, text):
1362 1365 """ generate a possibly-compressed representation of text """
1363 1366 if not text:
1364 1367 return ("", text)
1365 1368 l = len(text)
1366 1369 bin = None
1367 1370 if l < 44:
1368 1371 pass
1369 1372 elif l > 1000000:
1370 1373 # zlib makes an internal copy, thus doubling memory usage for
1371 1374 # large files, so lets do this in pieces
1372 1375 z = zlib.compressobj()
1373 1376 p = []
1374 1377 pos = 0
1375 1378 while pos < l:
1376 1379 pos2 = pos + 2**20
1377 1380 p.append(z.compress(text[pos:pos2]))
1378 1381 pos = pos2
1379 1382 p.append(z.flush())
1380 1383 if sum(map(len, p)) < l:
1381 1384 bin = "".join(p)
1382 1385 else:
1383 1386 bin = _compress(text)
1384 1387 if bin is None or len(bin) > l:
1385 1388 if text[0] == '\0':
1386 1389 return ("", text)
1387 1390 return ('u', text)
1388 1391 return ("", bin)
1389 1392
1390 1393 def _isgooddelta(self, d, textlen):
1391 1394 """Returns True if the given delta is good. Good means that it is within
1392 1395 the disk span, disk size, and chain length bounds that we know to be
1393 1396 performant."""
1394 1397 if d is None:
1395 1398 return False
1396 1399
1397 1400 # - 'dist' is the distance from the base revision -- bounding it limits
1398 1401 # the amount of I/O we need to do.
1399 1402 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1400 1403 # to apply -- bounding it limits the amount of CPU we consume.
1401 1404 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1402 1405 if (dist > textlen * 4 or l > textlen or
1403 1406 compresseddeltalen > textlen * 2 or
1404 1407 (self._maxchainlen and chainlen > self._maxchainlen)):
1405 1408 return False
1406 1409
1407 1410 return True
1408 1411
1409 1412 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1410 1413 cachedelta, ifh, dfh, alwayscache=False):
1411 1414 """internal function to add revisions to the log
1412 1415
1413 1416 see addrevision for argument descriptions.
1414 1417 invariants:
1415 1418 - text is optional (can be None); if not set, cachedelta must be set.
1416 1419 if both are set, they must correspond to each other.
1417 1420 """
1418 1421 btext = [text]
1419 1422 def buildtext():
1420 1423 if btext[0] is not None:
1421 1424 return btext[0]
1422 1425 baserev = cachedelta[0]
1423 1426 delta = cachedelta[1]
1424 1427 # special case deltas which replace entire base; no need to decode
1425 1428 # base revision. this neatly avoids censored bases, which throw when
1426 1429 # they're decoded.
1427 1430 hlen = struct.calcsize(">lll")
1428 1431 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1429 1432 len(delta) - hlen):
1430 1433 btext[0] = delta[hlen:]
1431 1434 else:
1432 1435 if self._inline:
1433 1436 fh = ifh
1434 1437 else:
1435 1438 fh = dfh
1436 1439 basetext = self.revision(self.node(baserev), _df=fh)
1437 1440 btext[0] = mdiff.patch(basetext, delta)
1438 1441 try:
1439 1442 self.checkhash(btext[0], p1, p2, node)
1440 1443 if flags & REVIDX_ISCENSORED:
1441 1444 raise RevlogError(_('node %s is not censored') % node)
1442 1445 except CensoredNodeError:
1443 1446 # must pass the censored index flag to add censored revisions
1444 1447 if not flags & REVIDX_ISCENSORED:
1445 1448 raise
1446 1449 return btext[0]
1447 1450
1448 1451 def builddelta(rev):
1449 1452 # can we use the cached delta?
1450 1453 if cachedelta and cachedelta[0] == rev:
1451 1454 delta = cachedelta[1]
1452 1455 else:
1453 1456 t = buildtext()
1454 1457 if self.iscensored(rev):
1455 1458 # deltas based on a censored revision must replace the
1456 1459 # full content in one patch, so delta works everywhere
1457 1460 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1458 1461 delta = header + t
1459 1462 else:
1460 1463 if self._inline:
1461 1464 fh = ifh
1462 1465 else:
1463 1466 fh = dfh
1464 1467 ptext = self.revision(self.node(rev), _df=fh)
1465 1468 delta = mdiff.textdiff(ptext, t)
1466 1469 header, data = self.compress(delta)
1467 1470 deltalen = len(header) + len(data)
1468 1471 chainbase = self.chainbase(rev)
1469 1472 dist = deltalen + offset - self.start(chainbase)
1470 1473 if self._generaldelta:
1471 1474 base = rev
1472 1475 else:
1473 1476 base = chainbase
1474 1477 chainlen, compresseddeltalen = self._chaininfo(rev)
1475 1478 chainlen += 1
1476 1479 compresseddeltalen += deltalen
1477 1480 return (dist, deltalen, (header, data), base,
1478 1481 chainbase, chainlen, compresseddeltalen)
1479 1482
1480 1483 curr = len(self)
1481 1484 prev = curr - 1
1482 1485 offset = self.end(prev)
1483 1486 delta = None
1484 1487 p1r, p2r = self.rev(p1), self.rev(p2)
1485 1488
1486 1489 # full versions are inserted when the needed deltas
1487 1490 # become comparable to the uncompressed text
1488 1491 if text is None:
1489 1492 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1490 1493 cachedelta[1])
1491 1494 else:
1492 1495 textlen = len(text)
1493 1496
1494 1497 # should we try to build a delta?
1495 1498 if prev != nullrev and self.storedeltachains:
1496 1499 tested = set()
1497 1500 # This condition is true most of the time when processing
1498 1501 # changegroup data into a generaldelta repo. The only time it
1499 1502 # isn't true is if this is the first revision in a delta chain
1500 1503 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
1501 1504 if cachedelta and self._generaldelta and self._lazydeltabase:
1502 1505 # Assume what we received from the server is a good choice
1503 1506 # build delta will reuse the cache
1504 1507 candidatedelta = builddelta(cachedelta[0])
1505 1508 tested.add(cachedelta[0])
1506 1509 if self._isgooddelta(candidatedelta, textlen):
1507 1510 delta = candidatedelta
1508 1511 if delta is None and self._generaldelta:
1509 1512 # exclude already lazy tested base if any
1510 1513 parents = [p for p in (p1r, p2r)
1511 1514 if p != nullrev and p not in tested]
1512 1515 if parents and not self._aggressivemergedeltas:
1513 1516 # Pick whichever parent is closer to us (to minimize the
1514 1517 # chance of having to build a fulltext).
1515 1518 parents = [max(parents)]
1516 1519 tested.update(parents)
1517 1520 pdeltas = []
1518 1521 for p in parents:
1519 1522 pd = builddelta(p)
1520 1523 if self._isgooddelta(pd, textlen):
1521 1524 pdeltas.append(pd)
1522 1525 if pdeltas:
1523 1526 delta = min(pdeltas, key=lambda x: x[1])
1524 1527 if delta is None and prev not in tested:
1525 1528 # other approach failed try against prev to hopefully save us a
1526 1529 # fulltext.
1527 1530 candidatedelta = builddelta(prev)
1528 1531 if self._isgooddelta(candidatedelta, textlen):
1529 1532 delta = candidatedelta
1530 1533 if delta is not None:
1531 1534 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1532 1535 else:
1533 1536 text = buildtext()
1534 1537 data = self.compress(text)
1535 1538 l = len(data[1]) + len(data[0])
1536 1539 base = chainbase = curr
1537 1540
1538 1541 e = (offset_type(offset, flags), l, textlen,
1539 1542 base, link, p1r, p2r, node)
1540 1543 self.index.insert(-1, e)
1541 1544 self.nodemap[node] = curr
1542 1545
1543 1546 entry = self._io.packentry(e, self.node, self.version, curr)
1544 1547 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1545 1548
1546 1549 if alwayscache and text is None:
1547 1550 text = buildtext()
1548 1551
1549 1552 if type(text) == str: # only accept immutable objects
1550 1553 self._cache = (node, curr, text)
1551 1554 self._chainbasecache[curr] = chainbase
1552 1555 return node
1553 1556
1554 1557 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1555 1558 # Files opened in a+ mode have inconsistent behavior on various
1556 1559 # platforms. Windows requires that a file positioning call be made
1557 1560 # when the file handle transitions between reads and writes. See
1558 1561 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1559 1562 # platforms, Python or the platform itself can be buggy. Some versions
1560 1563 # of Solaris have been observed to not append at the end of the file
1561 1564 # if the file was seeked to before the end. See issue4943 for more.
1562 1565 #
1563 1566 # We work around this issue by inserting a seek() before writing.
1564 1567 # Note: This is likely not necessary on Python 3.
1565 1568 ifh.seek(0, os.SEEK_END)
1566 1569 if dfh:
1567 1570 dfh.seek(0, os.SEEK_END)
1568 1571
1569 1572 curr = len(self) - 1
1570 1573 if not self._inline:
1571 1574 transaction.add(self.datafile, offset)
1572 1575 transaction.add(self.indexfile, curr * len(entry))
1573 1576 if data[0]:
1574 1577 dfh.write(data[0])
1575 1578 dfh.write(data[1])
1576 1579 ifh.write(entry)
1577 1580 else:
1578 1581 offset += curr * self._io.size
1579 1582 transaction.add(self.indexfile, offset, curr)
1580 1583 ifh.write(entry)
1581 1584 ifh.write(data[0])
1582 1585 ifh.write(data[1])
1583 1586 self.checkinlinesize(transaction, ifh)
1584 1587
1585 1588 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1586 1589 """
1587 1590 add a delta group
1588 1591
1589 1592 given a set of deltas, add them to the revision log. the
1590 1593 first delta is against its parent, which should be in our
1591 1594 log, the rest are against the previous delta.
1592 1595
1593 1596 If ``addrevisioncb`` is defined, it will be called with arguments of
1594 1597 this revlog and the node that was added.
1595 1598 """
1596 1599
1597 1600 # track the base of the current delta log
1598 1601 content = []
1599 1602 node = None
1600 1603
1601 1604 r = len(self)
1602 1605 end = 0
1603 1606 if r:
1604 1607 end = self.end(r - 1)
1605 1608 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1606 1609 isize = r * self._io.size
1607 1610 if self._inline:
1608 1611 transaction.add(self.indexfile, end + isize, r)
1609 1612 dfh = None
1610 1613 else:
1611 1614 transaction.add(self.indexfile, isize, r)
1612 1615 transaction.add(self.datafile, end)
1613 1616 dfh = self.opener(self.datafile, "a+")
1614 1617 def flush():
1615 1618 if dfh:
1616 1619 dfh.flush()
1617 1620 ifh.flush()
1618 1621 try:
1619 1622 # loop through our set of deltas
1620 1623 chain = None
1621 1624 for chunkdata in iter(lambda: cg.deltachunk(chain), {}):
1622 1625 node = chunkdata['node']
1623 1626 p1 = chunkdata['p1']
1624 1627 p2 = chunkdata['p2']
1625 1628 cs = chunkdata['cs']
1626 1629 deltabase = chunkdata['deltabase']
1627 1630 delta = chunkdata['delta']
1628 1631 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1629 1632
1630 1633 content.append(node)
1631 1634
1632 1635 link = linkmapper(cs)
1633 1636 if node in self.nodemap:
1634 1637 # this can happen if two branches make the same change
1635 1638 chain = node
1636 1639 continue
1637 1640
1638 1641 for p in (p1, p2):
1639 1642 if p not in self.nodemap:
1640 1643 raise LookupError(p, self.indexfile,
1641 1644 _('unknown parent'))
1642 1645
1643 1646 if deltabase not in self.nodemap:
1644 1647 raise LookupError(deltabase, self.indexfile,
1645 1648 _('unknown delta base'))
1646 1649
1647 1650 baserev = self.rev(deltabase)
1648 1651
1649 1652 if baserev != nullrev and self.iscensored(baserev):
1650 1653 # if base is censored, delta must be full replacement in a
1651 1654 # single patch operation
1652 1655 hlen = struct.calcsize(">lll")
1653 1656 oldlen = self.rawsize(baserev)
1654 1657 newlen = len(delta) - hlen
1655 1658 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1656 1659 raise error.CensoredBaseError(self.indexfile,
1657 1660 self.node(baserev))
1658 1661
1659 1662 if not flags and self._peek_iscensored(baserev, delta, flush):
1660 1663 flags |= REVIDX_ISCENSORED
1661 1664
1662 1665 # We assume consumers of addrevisioncb will want to retrieve
1663 1666 # the added revision, which will require a call to
1664 1667 # revision(). revision() will fast path if there is a cache
1665 1668 # hit. So, we tell _addrevision() to always cache in this case.
1666 1669 chain = self._addrevision(node, None, transaction, link,
1667 1670 p1, p2, flags, (baserev, delta),
1668 1671 ifh, dfh,
1669 1672 alwayscache=bool(addrevisioncb))
1670 1673
1671 1674 if addrevisioncb:
1672 1675 addrevisioncb(self, chain)
1673 1676
1674 1677 if not dfh and not self._inline:
1675 1678 # addrevision switched from inline to conventional
1676 1679 # reopen the index
1677 1680 ifh.close()
1678 1681 dfh = self.opener(self.datafile, "a+")
1679 1682 ifh = self.opener(self.indexfile, "a+",
1680 1683 checkambig=self._checkambig)
1681 1684 finally:
1682 1685 if dfh:
1683 1686 dfh.close()
1684 1687 ifh.close()
1685 1688
1686 1689 return content
1687 1690
1688 1691 def iscensored(self, rev):
1689 1692 """Check if a file revision is censored."""
1690 1693 return False
1691 1694
1692 1695 def _peek_iscensored(self, baserev, delta, flush):
1693 1696 """Quickly check if a delta produces a censored revision."""
1694 1697 return False
1695 1698
1696 1699 def getstrippoint(self, minlink):
1697 1700 """find the minimum rev that must be stripped to strip the linkrev
1698 1701
1699 1702 Returns a tuple containing the minimum rev and a set of all revs that
1700 1703 have linkrevs that will be broken by this strip.
1701 1704 """
1702 1705 brokenrevs = set()
1703 1706 strippoint = len(self)
1704 1707
1705 1708 heads = {}
1706 1709 futurelargelinkrevs = set()
1707 1710 for head in self.headrevs():
1708 1711 headlinkrev = self.linkrev(head)
1709 1712 heads[head] = headlinkrev
1710 1713 if headlinkrev >= minlink:
1711 1714 futurelargelinkrevs.add(headlinkrev)
1712 1715
1713 1716 # This algorithm involves walking down the rev graph, starting at the
1714 1717 # heads. Since the revs are topologically sorted according to linkrev,
1715 1718 # once all head linkrevs are below the minlink, we know there are
1716 1719 # no more revs that could have a linkrev greater than minlink.
1717 1720 # So we can stop walking.
1718 1721 while futurelargelinkrevs:
1719 1722 strippoint -= 1
1720 1723 linkrev = heads.pop(strippoint)
1721 1724
1722 1725 if linkrev < minlink:
1723 1726 brokenrevs.add(strippoint)
1724 1727 else:
1725 1728 futurelargelinkrevs.remove(linkrev)
1726 1729
1727 1730 for p in self.parentrevs(strippoint):
1728 1731 if p != nullrev:
1729 1732 plinkrev = self.linkrev(p)
1730 1733 heads[p] = plinkrev
1731 1734 if plinkrev >= minlink:
1732 1735 futurelargelinkrevs.add(plinkrev)
1733 1736
1734 1737 return strippoint, brokenrevs
1735 1738
1736 1739 def strip(self, minlink, transaction):
1737 1740 """truncate the revlog on the first revision with a linkrev >= minlink
1738 1741
1739 1742 This function is called when we're stripping revision minlink and
1740 1743 its descendants from the repository.
1741 1744
1742 1745 We have to remove all revisions with linkrev >= minlink, because
1743 1746 the equivalent changelog revisions will be renumbered after the
1744 1747 strip.
1745 1748
1746 1749 So we truncate the revlog on the first of these revisions, and
1747 1750 trust that the caller has saved the revisions that shouldn't be
1748 1751 removed and that it'll re-add them after this truncation.
1749 1752 """
1750 1753 if len(self) == 0:
1751 1754 return
1752 1755
1753 1756 rev, _ = self.getstrippoint(minlink)
1754 1757 if rev == len(self):
1755 1758 return
1756 1759
1757 1760 # first truncate the files on disk
1758 1761 end = self.start(rev)
1759 1762 if not self._inline:
1760 1763 transaction.add(self.datafile, end)
1761 1764 end = rev * self._io.size
1762 1765 else:
1763 1766 end += rev * self._io.size
1764 1767
1765 1768 transaction.add(self.indexfile, end)
1766 1769
1767 1770 # then reset internal state in memory to forget those revisions
1768 1771 self._cache = None
1769 1772 self._chaininfocache = {}
1770 1773 self._chunkclear()
1771 1774 for x in xrange(rev, len(self)):
1772 1775 del self.nodemap[self.node(x)]
1773 1776
1774 1777 del self.index[rev:-1]
1775 1778
1776 1779 def checksize(self):
1777 1780 expected = 0
1778 1781 if len(self):
1779 1782 expected = max(0, self.end(len(self) - 1))
1780 1783
1781 1784 try:
1782 1785 f = self.opener(self.datafile)
1783 1786 f.seek(0, 2)
1784 1787 actual = f.tell()
1785 1788 f.close()
1786 1789 dd = actual - expected
1787 1790 except IOError as inst:
1788 1791 if inst.errno != errno.ENOENT:
1789 1792 raise
1790 1793 dd = 0
1791 1794
1792 1795 try:
1793 1796 f = self.opener(self.indexfile)
1794 1797 f.seek(0, 2)
1795 1798 actual = f.tell()
1796 1799 f.close()
1797 1800 s = self._io.size
1798 1801 i = max(0, actual // s)
1799 1802 di = actual - (i * s)
1800 1803 if self._inline:
1801 1804 databytes = 0
1802 1805 for r in self:
1803 1806 databytes += max(0, self.length(r))
1804 1807 dd = 0
1805 1808 di = actual - len(self) * s - databytes
1806 1809 except IOError as inst:
1807 1810 if inst.errno != errno.ENOENT:
1808 1811 raise
1809 1812 di = 0
1810 1813
1811 1814 return (dd, di)
1812 1815
1813 1816 def files(self):
1814 1817 res = [self.indexfile]
1815 1818 if not self._inline:
1816 1819 res.append(self.datafile)
1817 1820 return res
General Comments 0
You need to be logged in to leave comments. Login now