##// END OF EJS Templates
revlog: inline start() and end() for perf reasons...
Gregory Szorc -
r30288:ceddc3d9 default
parent child Browse files
Show More
@@ -1,1811 +1,1817
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import errno
18 18 import hashlib
19 19 import os
20 20 import struct
21 21 import zlib
22 22
23 23 # import stuff from node for others to import from revlog
24 24 from .node import (
25 25 bin,
26 26 hex,
27 27 nullid,
28 28 nullrev,
29 29 )
30 30 from .i18n import _
31 31 from . import (
32 32 ancestor,
33 33 error,
34 34 mdiff,
35 35 parsers,
36 36 templatefilters,
37 37 util,
38 38 )
39 39
40 40 _pack = struct.pack
41 41 _unpack = struct.unpack
42 42 _compress = zlib.compress
43 43 _decompress = zlib.decompress
44 44
45 45 # revlog header flags
46 46 REVLOGV0 = 0
47 47 REVLOGNG = 1
48 48 REVLOGNGINLINEDATA = (1 << 16)
49 49 REVLOGGENERALDELTA = (1 << 17)
50 50 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
51 51 REVLOG_DEFAULT_FORMAT = REVLOGNG
52 52 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
53 53 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
54 54
55 55 # revlog index flags
56 56 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
57 57 REVIDX_DEFAULT_FLAGS = 0
58 58 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
59 59
60 60 # max size of revlog with inline data
61 61 _maxinline = 131072
62 62 _chunksize = 1048576
63 63
64 64 RevlogError = error.RevlogError
65 65 LookupError = error.LookupError
66 66 CensoredNodeError = error.CensoredNodeError
67 67
68 68 def getoffset(q):
69 69 return int(q >> 16)
70 70
71 71 def gettype(q):
72 72 return int(q & 0xFFFF)
73 73
74 74 def offset_type(offset, type):
75 75 return long(long(offset) << 16 | type)
76 76
77 77 _nullhash = hashlib.sha1(nullid)
78 78
79 79 def hash(text, p1, p2):
80 80 """generate a hash from the given text and its parent hashes
81 81
82 82 This hash combines both the current file contents and its history
83 83 in a manner that makes it easy to distinguish nodes with the same
84 84 content in the revision graph.
85 85 """
86 86 # As of now, if one of the parent node is null, p2 is null
87 87 if p2 == nullid:
88 88 # deep copy of a hash is faster than creating one
89 89 s = _nullhash.copy()
90 90 s.update(p1)
91 91 else:
92 92 # none of the parent nodes are nullid
93 93 l = [p1, p2]
94 94 l.sort()
95 95 s = hashlib.sha1(l[0])
96 96 s.update(l[1])
97 97 s.update(text)
98 98 return s.digest()
99 99
100 100 def decompress(bin):
101 101 """ decompress the given input """
102 102 if not bin:
103 103 return bin
104 104 t = bin[0]
105 105 if t == '\0':
106 106 return bin
107 107 if t == 'x':
108 108 try:
109 109 return _decompress(bin)
110 110 except zlib.error as e:
111 111 raise RevlogError(_("revlog decompress error: %s") % str(e))
112 112 if t == 'u':
113 113 return util.buffer(bin, 1)
114 114 raise RevlogError(_("unknown compression type %r") % t)
115 115
116 116 # index v0:
117 117 # 4 bytes: offset
118 118 # 4 bytes: compressed length
119 119 # 4 bytes: base rev
120 120 # 4 bytes: link rev
121 121 # 20 bytes: parent 1 nodeid
122 122 # 20 bytes: parent 2 nodeid
123 123 # 20 bytes: nodeid
124 124 indexformatv0 = ">4l20s20s20s"
125 125
126 126 class revlogoldio(object):
127 127 def __init__(self):
128 128 self.size = struct.calcsize(indexformatv0)
129 129
130 130 def parseindex(self, data, inline):
131 131 s = self.size
132 132 index = []
133 133 nodemap = {nullid: nullrev}
134 134 n = off = 0
135 135 l = len(data)
136 136 while off + s <= l:
137 137 cur = data[off:off + s]
138 138 off += s
139 139 e = _unpack(indexformatv0, cur)
140 140 # transform to revlogv1 format
141 141 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
142 142 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
143 143 index.append(e2)
144 144 nodemap[e[6]] = n
145 145 n += 1
146 146
147 147 # add the magic null revision at -1
148 148 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
149 149
150 150 return index, nodemap, None
151 151
152 152 def packentry(self, entry, node, version, rev):
153 153 if gettype(entry[0]):
154 154 raise RevlogError(_("index entry flags need RevlogNG"))
155 155 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
156 156 node(entry[5]), node(entry[6]), entry[7])
157 157 return _pack(indexformatv0, *e2)
158 158
159 159 # index ng:
160 160 # 6 bytes: offset
161 161 # 2 bytes: flags
162 162 # 4 bytes: compressed length
163 163 # 4 bytes: uncompressed length
164 164 # 4 bytes: base rev
165 165 # 4 bytes: link rev
166 166 # 4 bytes: parent 1 rev
167 167 # 4 bytes: parent 2 rev
168 168 # 32 bytes: nodeid
169 169 indexformatng = ">Qiiiiii20s12x"
170 170 versionformat = ">I"
171 171
172 172 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
173 173 # signed integer)
174 174 _maxentrysize = 0x7fffffff
175 175
176 176 class revlogio(object):
177 177 def __init__(self):
178 178 self.size = struct.calcsize(indexformatng)
179 179
180 180 def parseindex(self, data, inline):
181 181 # call the C implementation to parse the index data
182 182 index, cache = parsers.parse_index2(data, inline)
183 183 return index, getattr(index, 'nodemap', None), cache
184 184
185 185 def packentry(self, entry, node, version, rev):
186 186 p = _pack(indexformatng, *entry)
187 187 if rev == 0:
188 188 p = _pack(versionformat, version) + p[4:]
189 189 return p
190 190
191 191 class revlog(object):
192 192 """
193 193 the underlying revision storage object
194 194
195 195 A revlog consists of two parts, an index and the revision data.
196 196
197 197 The index is a file with a fixed record size containing
198 198 information on each revision, including its nodeid (hash), the
199 199 nodeids of its parents, the position and offset of its data within
200 200 the data file, and the revision it's based on. Finally, each entry
201 201 contains a linkrev entry that can serve as a pointer to external
202 202 data.
203 203
204 204 The revision data itself is a linear collection of data chunks.
205 205 Each chunk represents a revision and is usually represented as a
206 206 delta against the previous chunk. To bound lookup time, runs of
207 207 deltas are limited to about 2 times the length of the original
208 208 version data. This makes retrieval of a version proportional to
209 209 its size, or O(1) relative to the number of revisions.
210 210
211 211 Both pieces of the revlog are written to in an append-only
212 212 fashion, which means we never need to rewrite a file to insert or
213 213 remove data, and can use some simple techniques to avoid the need
214 214 for locking while reading.
215 215
216 216 If checkambig, indexfile is opened with checkambig=True at
217 217 writing, to avoid file stat ambiguity.
218 218 """
219 219 def __init__(self, opener, indexfile, checkambig=False):
220 220 """
221 221 create a revlog object
222 222
223 223 opener is a function that abstracts the file opening operation
224 224 and can be used to implement COW semantics or the like.
225 225 """
226 226 self.indexfile = indexfile
227 227 self.datafile = indexfile[:-2] + ".d"
228 228 self.opener = opener
229 229 # When True, indexfile is opened with checkambig=True at writing, to
230 230 # avoid file stat ambiguity.
231 231 self._checkambig = checkambig
232 232 # 3-tuple of (node, rev, text) for a raw revision.
233 233 self._cache = None
234 234 # Maps rev to chain base rev.
235 235 self._chainbasecache = util.lrucachedict(100)
236 236 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
237 237 self._chunkcache = (0, '')
238 238 # How much data to read and cache into the raw revlog data cache.
239 239 self._chunkcachesize = 65536
240 240 self._maxchainlen = None
241 241 self._aggressivemergedeltas = False
242 242 self.index = []
243 243 # Mapping of partial identifiers to full nodes.
244 244 self._pcache = {}
245 245 # Mapping of revision integer to full node.
246 246 self._nodecache = {nullid: nullrev}
247 247 self._nodepos = None
248 248
249 249 v = REVLOG_DEFAULT_VERSION
250 250 opts = getattr(opener, 'options', None)
251 251 if opts is not None:
252 252 if 'revlogv1' in opts:
253 253 if 'generaldelta' in opts:
254 254 v |= REVLOGGENERALDELTA
255 255 else:
256 256 v = 0
257 257 if 'chunkcachesize' in opts:
258 258 self._chunkcachesize = opts['chunkcachesize']
259 259 if 'maxchainlen' in opts:
260 260 self._maxchainlen = opts['maxchainlen']
261 261 if 'aggressivemergedeltas' in opts:
262 262 self._aggressivemergedeltas = opts['aggressivemergedeltas']
263 263 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
264 264
265 265 if self._chunkcachesize <= 0:
266 266 raise RevlogError(_('revlog chunk cache size %r is not greater '
267 267 'than 0') % self._chunkcachesize)
268 268 elif self._chunkcachesize & (self._chunkcachesize - 1):
269 269 raise RevlogError(_('revlog chunk cache size %r is not a power '
270 270 'of 2') % self._chunkcachesize)
271 271
272 272 indexdata = ''
273 273 self._initempty = True
274 274 try:
275 275 f = self.opener(self.indexfile)
276 276 indexdata = f.read()
277 277 f.close()
278 278 if len(indexdata) > 0:
279 279 v = struct.unpack(versionformat, indexdata[:4])[0]
280 280 self._initempty = False
281 281 except IOError as inst:
282 282 if inst.errno != errno.ENOENT:
283 283 raise
284 284
285 285 self.version = v
286 286 self._inline = v & REVLOGNGINLINEDATA
287 287 self._generaldelta = v & REVLOGGENERALDELTA
288 288 flags = v & ~0xFFFF
289 289 fmt = v & 0xFFFF
290 290 if fmt == REVLOGV0 and flags:
291 291 raise RevlogError(_("index %s unknown flags %#04x for format v0")
292 292 % (self.indexfile, flags >> 16))
293 293 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
294 294 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
295 295 % (self.indexfile, flags >> 16))
296 296 elif fmt > REVLOGNG:
297 297 raise RevlogError(_("index %s unknown format %d")
298 298 % (self.indexfile, fmt))
299 299
300 300 self.storedeltachains = True
301 301
302 302 self._io = revlogio()
303 303 if self.version == REVLOGV0:
304 304 self._io = revlogoldio()
305 305 try:
306 306 d = self._io.parseindex(indexdata, self._inline)
307 307 except (ValueError, IndexError):
308 308 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
309 309 self.index, nodemap, self._chunkcache = d
310 310 if nodemap is not None:
311 311 self.nodemap = self._nodecache = nodemap
312 312 if not self._chunkcache:
313 313 self._chunkclear()
314 314 # revnum -> (chain-length, sum-delta-length)
315 315 self._chaininfocache = {}
316 316
317 317 def tip(self):
318 318 return self.node(len(self.index) - 2)
319 319 def __contains__(self, rev):
320 320 return 0 <= rev < len(self)
321 321 def __len__(self):
322 322 return len(self.index) - 1
323 323 def __iter__(self):
324 324 return iter(xrange(len(self)))
325 325 def revs(self, start=0, stop=None):
326 326 """iterate over all rev in this revlog (from start to stop)"""
327 327 step = 1
328 328 if stop is not None:
329 329 if start > stop:
330 330 step = -1
331 331 stop += step
332 332 else:
333 333 stop = len(self)
334 334 return xrange(start, stop, step)
335 335
336 336 @util.propertycache
337 337 def nodemap(self):
338 338 self.rev(self.node(0))
339 339 return self._nodecache
340 340
341 341 def hasnode(self, node):
342 342 try:
343 343 self.rev(node)
344 344 return True
345 345 except KeyError:
346 346 return False
347 347
348 348 def clearcaches(self):
349 349 self._cache = None
350 350 self._chainbasecache.clear()
351 351 self._chunkcache = (0, '')
352 352 self._pcache = {}
353 353
354 354 try:
355 355 self._nodecache.clearcaches()
356 356 except AttributeError:
357 357 self._nodecache = {nullid: nullrev}
358 358 self._nodepos = None
359 359
360 360 def rev(self, node):
361 361 try:
362 362 return self._nodecache[node]
363 363 except TypeError:
364 364 raise
365 365 except RevlogError:
366 366 # parsers.c radix tree lookup failed
367 367 raise LookupError(node, self.indexfile, _('no node'))
368 368 except KeyError:
369 369 # pure python cache lookup failed
370 370 n = self._nodecache
371 371 i = self.index
372 372 p = self._nodepos
373 373 if p is None:
374 374 p = len(i) - 2
375 375 for r in xrange(p, -1, -1):
376 376 v = i[r][7]
377 377 n[v] = r
378 378 if v == node:
379 379 self._nodepos = r - 1
380 380 return r
381 381 raise LookupError(node, self.indexfile, _('no node'))
382 382
383 383 # Accessors for index entries.
384 384
385 385 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
386 386 # are flags.
387 387 def start(self, rev):
388 388 return int(self.index[rev][0] >> 16)
389 389
390 390 def flags(self, rev):
391 391 return self.index[rev][0] & 0xFFFF
392 392
393 393 def length(self, rev):
394 394 return self.index[rev][1]
395 395
396 396 def rawsize(self, rev):
397 397 """return the length of the uncompressed text for a given revision"""
398 398 l = self.index[rev][2]
399 399 if l >= 0:
400 400 return l
401 401
402 402 t = self.revision(self.node(rev))
403 403 return len(t)
404 404 size = rawsize
405 405
406 406 def chainbase(self, rev):
407 407 base = self._chainbasecache.get(rev)
408 408 if base is not None:
409 409 return base
410 410
411 411 index = self.index
412 412 base = index[rev][3]
413 413 while base != rev:
414 414 rev = base
415 415 base = index[rev][3]
416 416
417 417 self._chainbasecache[rev] = base
418 418 return base
419 419
420 420 def linkrev(self, rev):
421 421 return self.index[rev][4]
422 422
423 423 def parentrevs(self, rev):
424 424 return self.index[rev][5:7]
425 425
426 426 def node(self, rev):
427 427 return self.index[rev][7]
428 428
429 429 # Derived from index values.
430 430
431 431 def end(self, rev):
432 432 return self.start(rev) + self.length(rev)
433 433
434 434 def parents(self, node):
435 435 i = self.index
436 436 d = i[self.rev(node)]
437 437 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
438 438
439 439 def chainlen(self, rev):
440 440 return self._chaininfo(rev)[0]
441 441
442 442 def _chaininfo(self, rev):
443 443 chaininfocache = self._chaininfocache
444 444 if rev in chaininfocache:
445 445 return chaininfocache[rev]
446 446 index = self.index
447 447 generaldelta = self._generaldelta
448 448 iterrev = rev
449 449 e = index[iterrev]
450 450 clen = 0
451 451 compresseddeltalen = 0
452 452 while iterrev != e[3]:
453 453 clen += 1
454 454 compresseddeltalen += e[1]
455 455 if generaldelta:
456 456 iterrev = e[3]
457 457 else:
458 458 iterrev -= 1
459 459 if iterrev in chaininfocache:
460 460 t = chaininfocache[iterrev]
461 461 clen += t[0]
462 462 compresseddeltalen += t[1]
463 463 break
464 464 e = index[iterrev]
465 465 else:
466 466 # Add text length of base since decompressing that also takes
467 467 # work. For cache hits the length is already included.
468 468 compresseddeltalen += e[1]
469 469 r = (clen, compresseddeltalen)
470 470 chaininfocache[rev] = r
471 471 return r
472 472
473 473 def _deltachain(self, rev, stoprev=None):
474 474 """Obtain the delta chain for a revision.
475 475
476 476 ``stoprev`` specifies a revision to stop at. If not specified, we
477 477 stop at the base of the chain.
478 478
479 479 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
480 480 revs in ascending order and ``stopped`` is a bool indicating whether
481 481 ``stoprev`` was hit.
482 482 """
483 483 chain = []
484 484
485 485 # Alias to prevent attribute lookup in tight loop.
486 486 index = self.index
487 487 generaldelta = self._generaldelta
488 488
489 489 iterrev = rev
490 490 e = index[iterrev]
491 491 while iterrev != e[3] and iterrev != stoprev:
492 492 chain.append(iterrev)
493 493 if generaldelta:
494 494 iterrev = e[3]
495 495 else:
496 496 iterrev -= 1
497 497 e = index[iterrev]
498 498
499 499 if iterrev == stoprev:
500 500 stopped = True
501 501 else:
502 502 chain.append(iterrev)
503 503 stopped = False
504 504
505 505 chain.reverse()
506 506 return chain, stopped
507 507
508 508 def ancestors(self, revs, stoprev=0, inclusive=False):
509 509 """Generate the ancestors of 'revs' in reverse topological order.
510 510 Does not generate revs lower than stoprev.
511 511
512 512 See the documentation for ancestor.lazyancestors for more details."""
513 513
514 514 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
515 515 inclusive=inclusive)
516 516
517 517 def descendants(self, revs):
518 518 """Generate the descendants of 'revs' in revision order.
519 519
520 520 Yield a sequence of revision numbers starting with a child of
521 521 some rev in revs, i.e., each revision is *not* considered a
522 522 descendant of itself. Results are ordered by revision number (a
523 523 topological sort)."""
524 524 first = min(revs)
525 525 if first == nullrev:
526 526 for i in self:
527 527 yield i
528 528 return
529 529
530 530 seen = set(revs)
531 531 for i in self.revs(start=first + 1):
532 532 for x in self.parentrevs(i):
533 533 if x != nullrev and x in seen:
534 534 seen.add(i)
535 535 yield i
536 536 break
537 537
538 538 def findcommonmissing(self, common=None, heads=None):
539 539 """Return a tuple of the ancestors of common and the ancestors of heads
540 540 that are not ancestors of common. In revset terminology, we return the
541 541 tuple:
542 542
543 543 ::common, (::heads) - (::common)
544 544
545 545 The list is sorted by revision number, meaning it is
546 546 topologically sorted.
547 547
548 548 'heads' and 'common' are both lists of node IDs. If heads is
549 549 not supplied, uses all of the revlog's heads. If common is not
550 550 supplied, uses nullid."""
551 551 if common is None:
552 552 common = [nullid]
553 553 if heads is None:
554 554 heads = self.heads()
555 555
556 556 common = [self.rev(n) for n in common]
557 557 heads = [self.rev(n) for n in heads]
558 558
559 559 # we want the ancestors, but inclusive
560 560 class lazyset(object):
561 561 def __init__(self, lazyvalues):
562 562 self.addedvalues = set()
563 563 self.lazyvalues = lazyvalues
564 564
565 565 def __contains__(self, value):
566 566 return value in self.addedvalues or value in self.lazyvalues
567 567
568 568 def __iter__(self):
569 569 added = self.addedvalues
570 570 for r in added:
571 571 yield r
572 572 for r in self.lazyvalues:
573 573 if not r in added:
574 574 yield r
575 575
576 576 def add(self, value):
577 577 self.addedvalues.add(value)
578 578
579 579 def update(self, values):
580 580 self.addedvalues.update(values)
581 581
582 582 has = lazyset(self.ancestors(common))
583 583 has.add(nullrev)
584 584 has.update(common)
585 585
586 586 # take all ancestors from heads that aren't in has
587 587 missing = set()
588 588 visit = collections.deque(r for r in heads if r not in has)
589 589 while visit:
590 590 r = visit.popleft()
591 591 if r in missing:
592 592 continue
593 593 else:
594 594 missing.add(r)
595 595 for p in self.parentrevs(r):
596 596 if p not in has:
597 597 visit.append(p)
598 598 missing = list(missing)
599 599 missing.sort()
600 600 return has, [self.node(r) for r in missing]
601 601
602 602 def incrementalmissingrevs(self, common=None):
603 603 """Return an object that can be used to incrementally compute the
604 604 revision numbers of the ancestors of arbitrary sets that are not
605 605 ancestors of common. This is an ancestor.incrementalmissingancestors
606 606 object.
607 607
608 608 'common' is a list of revision numbers. If common is not supplied, uses
609 609 nullrev.
610 610 """
611 611 if common is None:
612 612 common = [nullrev]
613 613
614 614 return ancestor.incrementalmissingancestors(self.parentrevs, common)
615 615
616 616 def findmissingrevs(self, common=None, heads=None):
617 617 """Return the revision numbers of the ancestors of heads that
618 618 are not ancestors of common.
619 619
620 620 More specifically, return a list of revision numbers corresponding to
621 621 nodes N such that every N satisfies the following constraints:
622 622
623 623 1. N is an ancestor of some node in 'heads'
624 624 2. N is not an ancestor of any node in 'common'
625 625
626 626 The list is sorted by revision number, meaning it is
627 627 topologically sorted.
628 628
629 629 'heads' and 'common' are both lists of revision numbers. If heads is
630 630 not supplied, uses all of the revlog's heads. If common is not
631 631 supplied, uses nullid."""
632 632 if common is None:
633 633 common = [nullrev]
634 634 if heads is None:
635 635 heads = self.headrevs()
636 636
637 637 inc = self.incrementalmissingrevs(common=common)
638 638 return inc.missingancestors(heads)
639 639
640 640 def findmissing(self, common=None, heads=None):
641 641 """Return the ancestors of heads that are not ancestors of common.
642 642
643 643 More specifically, return a list of nodes N such that every N
644 644 satisfies the following constraints:
645 645
646 646 1. N is an ancestor of some node in 'heads'
647 647 2. N is not an ancestor of any node in 'common'
648 648
649 649 The list is sorted by revision number, meaning it is
650 650 topologically sorted.
651 651
652 652 'heads' and 'common' are both lists of node IDs. If heads is
653 653 not supplied, uses all of the revlog's heads. If common is not
654 654 supplied, uses nullid."""
655 655 if common is None:
656 656 common = [nullid]
657 657 if heads is None:
658 658 heads = self.heads()
659 659
660 660 common = [self.rev(n) for n in common]
661 661 heads = [self.rev(n) for n in heads]
662 662
663 663 inc = self.incrementalmissingrevs(common=common)
664 664 return [self.node(r) for r in inc.missingancestors(heads)]
665 665
666 666 def nodesbetween(self, roots=None, heads=None):
667 667 """Return a topological path from 'roots' to 'heads'.
668 668
669 669 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
670 670 topologically sorted list of all nodes N that satisfy both of
671 671 these constraints:
672 672
673 673 1. N is a descendant of some node in 'roots'
674 674 2. N is an ancestor of some node in 'heads'
675 675
676 676 Every node is considered to be both a descendant and an ancestor
677 677 of itself, so every reachable node in 'roots' and 'heads' will be
678 678 included in 'nodes'.
679 679
680 680 'outroots' is the list of reachable nodes in 'roots', i.e., the
681 681 subset of 'roots' that is returned in 'nodes'. Likewise,
682 682 'outheads' is the subset of 'heads' that is also in 'nodes'.
683 683
684 684 'roots' and 'heads' are both lists of node IDs. If 'roots' is
685 685 unspecified, uses nullid as the only root. If 'heads' is
686 686 unspecified, uses list of all of the revlog's heads."""
687 687 nonodes = ([], [], [])
688 688 if roots is not None:
689 689 roots = list(roots)
690 690 if not roots:
691 691 return nonodes
692 692 lowestrev = min([self.rev(n) for n in roots])
693 693 else:
694 694 roots = [nullid] # Everybody's a descendant of nullid
695 695 lowestrev = nullrev
696 696 if (lowestrev == nullrev) and (heads is None):
697 697 # We want _all_ the nodes!
698 698 return ([self.node(r) for r in self], [nullid], list(self.heads()))
699 699 if heads is None:
700 700 # All nodes are ancestors, so the latest ancestor is the last
701 701 # node.
702 702 highestrev = len(self) - 1
703 703 # Set ancestors to None to signal that every node is an ancestor.
704 704 ancestors = None
705 705 # Set heads to an empty dictionary for later discovery of heads
706 706 heads = {}
707 707 else:
708 708 heads = list(heads)
709 709 if not heads:
710 710 return nonodes
711 711 ancestors = set()
712 712 # Turn heads into a dictionary so we can remove 'fake' heads.
713 713 # Also, later we will be using it to filter out the heads we can't
714 714 # find from roots.
715 715 heads = dict.fromkeys(heads, False)
716 716 # Start at the top and keep marking parents until we're done.
717 717 nodestotag = set(heads)
718 718 # Remember where the top was so we can use it as a limit later.
719 719 highestrev = max([self.rev(n) for n in nodestotag])
720 720 while nodestotag:
721 721 # grab a node to tag
722 722 n = nodestotag.pop()
723 723 # Never tag nullid
724 724 if n == nullid:
725 725 continue
726 726 # A node's revision number represents its place in a
727 727 # topologically sorted list of nodes.
728 728 r = self.rev(n)
729 729 if r >= lowestrev:
730 730 if n not in ancestors:
731 731 # If we are possibly a descendant of one of the roots
732 732 # and we haven't already been marked as an ancestor
733 733 ancestors.add(n) # Mark as ancestor
734 734 # Add non-nullid parents to list of nodes to tag.
735 735 nodestotag.update([p for p in self.parents(n) if
736 736 p != nullid])
737 737 elif n in heads: # We've seen it before, is it a fake head?
738 738 # So it is, real heads should not be the ancestors of
739 739 # any other heads.
740 740 heads.pop(n)
741 741 if not ancestors:
742 742 return nonodes
743 743 # Now that we have our set of ancestors, we want to remove any
744 744 # roots that are not ancestors.
745 745
746 746 # If one of the roots was nullid, everything is included anyway.
747 747 if lowestrev > nullrev:
748 748 # But, since we weren't, let's recompute the lowest rev to not
749 749 # include roots that aren't ancestors.
750 750
751 751 # Filter out roots that aren't ancestors of heads
752 752 roots = [n for n in roots if n in ancestors]
753 753 # Recompute the lowest revision
754 754 if roots:
755 755 lowestrev = min([self.rev(n) for n in roots])
756 756 else:
757 757 # No more roots? Return empty list
758 758 return nonodes
759 759 else:
760 760 # We are descending from nullid, and don't need to care about
761 761 # any other roots.
762 762 lowestrev = nullrev
763 763 roots = [nullid]
764 764 # Transform our roots list into a set.
765 765 descendants = set(roots)
766 766 # Also, keep the original roots so we can filter out roots that aren't
767 767 # 'real' roots (i.e. are descended from other roots).
768 768 roots = descendants.copy()
769 769 # Our topologically sorted list of output nodes.
770 770 orderedout = []
771 771 # Don't start at nullid since we don't want nullid in our output list,
772 772 # and if nullid shows up in descendants, empty parents will look like
773 773 # they're descendants.
774 774 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
775 775 n = self.node(r)
776 776 isdescendant = False
777 777 if lowestrev == nullrev: # Everybody is a descendant of nullid
778 778 isdescendant = True
779 779 elif n in descendants:
780 780 # n is already a descendant
781 781 isdescendant = True
782 782 # This check only needs to be done here because all the roots
783 783 # will start being marked is descendants before the loop.
784 784 if n in roots:
785 785 # If n was a root, check if it's a 'real' root.
786 786 p = tuple(self.parents(n))
787 787 # If any of its parents are descendants, it's not a root.
788 788 if (p[0] in descendants) or (p[1] in descendants):
789 789 roots.remove(n)
790 790 else:
791 791 p = tuple(self.parents(n))
792 792 # A node is a descendant if either of its parents are
793 793 # descendants. (We seeded the dependents list with the roots
794 794 # up there, remember?)
795 795 if (p[0] in descendants) or (p[1] in descendants):
796 796 descendants.add(n)
797 797 isdescendant = True
798 798 if isdescendant and ((ancestors is None) or (n in ancestors)):
799 799 # Only include nodes that are both descendants and ancestors.
800 800 orderedout.append(n)
801 801 if (ancestors is not None) and (n in heads):
802 802 # We're trying to figure out which heads are reachable
803 803 # from roots.
804 804 # Mark this head as having been reached
805 805 heads[n] = True
806 806 elif ancestors is None:
807 807 # Otherwise, we're trying to discover the heads.
808 808 # Assume this is a head because if it isn't, the next step
809 809 # will eventually remove it.
810 810 heads[n] = True
811 811 # But, obviously its parents aren't.
812 812 for p in self.parents(n):
813 813 heads.pop(p, None)
814 814 heads = [n for n, flag in heads.iteritems() if flag]
815 815 roots = list(roots)
816 816 assert orderedout
817 817 assert roots
818 818 assert heads
819 819 return (orderedout, roots, heads)
820 820
821 821 def headrevs(self):
822 822 try:
823 823 return self.index.headrevs()
824 824 except AttributeError:
825 825 return self._headrevs()
826 826
827 827 def computephases(self, roots):
828 828 return self.index.computephasesmapsets(roots)
829 829
830 830 def _headrevs(self):
831 831 count = len(self)
832 832 if not count:
833 833 return [nullrev]
834 834 # we won't iter over filtered rev so nobody is a head at start
835 835 ishead = [0] * (count + 1)
836 836 index = self.index
837 837 for r in self:
838 838 ishead[r] = 1 # I may be an head
839 839 e = index[r]
840 840 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
841 841 return [r for r, val in enumerate(ishead) if val]
842 842
843 843 def heads(self, start=None, stop=None):
844 844 """return the list of all nodes that have no children
845 845
846 846 if start is specified, only heads that are descendants of
847 847 start will be returned
848 848 if stop is specified, it will consider all the revs from stop
849 849 as if they had no children
850 850 """
851 851 if start is None and stop is None:
852 852 if not len(self):
853 853 return [nullid]
854 854 return [self.node(r) for r in self.headrevs()]
855 855
856 856 if start is None:
857 857 start = nullid
858 858 if stop is None:
859 859 stop = []
860 860 stoprevs = set([self.rev(n) for n in stop])
861 861 startrev = self.rev(start)
862 862 reachable = set((startrev,))
863 863 heads = set((startrev,))
864 864
865 865 parentrevs = self.parentrevs
866 866 for r in self.revs(start=startrev + 1):
867 867 for p in parentrevs(r):
868 868 if p in reachable:
869 869 if r not in stoprevs:
870 870 reachable.add(r)
871 871 heads.add(r)
872 872 if p in heads and p not in stoprevs:
873 873 heads.remove(p)
874 874
875 875 return [self.node(r) for r in heads]
876 876
877 877 def children(self, node):
878 878 """find the children of a given node"""
879 879 c = []
880 880 p = self.rev(node)
881 881 for r in self.revs(start=p + 1):
882 882 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
883 883 if prevs:
884 884 for pr in prevs:
885 885 if pr == p:
886 886 c.append(self.node(r))
887 887 elif p == nullrev:
888 888 c.append(self.node(r))
889 889 return c
890 890
891 891 def descendant(self, start, end):
892 892 if start == nullrev:
893 893 return True
894 894 for i in self.descendants([start]):
895 895 if i == end:
896 896 return True
897 897 elif i > end:
898 898 break
899 899 return False
900 900
901 901 def commonancestorsheads(self, a, b):
902 902 """calculate all the heads of the common ancestors of nodes a and b"""
903 903 a, b = self.rev(a), self.rev(b)
904 904 try:
905 905 ancs = self.index.commonancestorsheads(a, b)
906 906 except (AttributeError, OverflowError): # C implementation failed
907 907 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
908 908 return map(self.node, ancs)
909 909
910 910 def isancestor(self, a, b):
911 911 """return True if node a is an ancestor of node b
912 912
913 913 The implementation of this is trivial but the use of
914 914 commonancestorsheads is not."""
915 915 return a in self.commonancestorsheads(a, b)
916 916
917 917 def ancestor(self, a, b):
918 918 """calculate the "best" common ancestor of nodes a and b"""
919 919
920 920 a, b = self.rev(a), self.rev(b)
921 921 try:
922 922 ancs = self.index.ancestors(a, b)
923 923 except (AttributeError, OverflowError):
924 924 ancs = ancestor.ancestors(self.parentrevs, a, b)
925 925 if ancs:
926 926 # choose a consistent winner when there's a tie
927 927 return min(map(self.node, ancs))
928 928 return nullid
929 929
930 930 def _match(self, id):
931 931 if isinstance(id, int):
932 932 # rev
933 933 return self.node(id)
934 934 if len(id) == 20:
935 935 # possibly a binary node
936 936 # odds of a binary node being all hex in ASCII are 1 in 10**25
937 937 try:
938 938 node = id
939 939 self.rev(node) # quick search the index
940 940 return node
941 941 except LookupError:
942 942 pass # may be partial hex id
943 943 try:
944 944 # str(rev)
945 945 rev = int(id)
946 946 if str(rev) != id:
947 947 raise ValueError
948 948 if rev < 0:
949 949 rev = len(self) + rev
950 950 if rev < 0 or rev >= len(self):
951 951 raise ValueError
952 952 return self.node(rev)
953 953 except (ValueError, OverflowError):
954 954 pass
955 955 if len(id) == 40:
956 956 try:
957 957 # a full hex nodeid?
958 958 node = bin(id)
959 959 self.rev(node)
960 960 return node
961 961 except (TypeError, LookupError):
962 962 pass
963 963
964 964 def _partialmatch(self, id):
965 965 try:
966 966 n = self.index.partialmatch(id)
967 967 if n and self.hasnode(n):
968 968 return n
969 969 return None
970 970 except RevlogError:
971 971 # parsers.c radix tree lookup gave multiple matches
972 972 # fast path: for unfiltered changelog, radix tree is accurate
973 973 if not getattr(self, 'filteredrevs', None):
974 974 raise LookupError(id, self.indexfile,
975 975 _('ambiguous identifier'))
976 976 # fall through to slow path that filters hidden revisions
977 977 except (AttributeError, ValueError):
978 978 # we are pure python, or key was too short to search radix tree
979 979 pass
980 980
981 981 if id in self._pcache:
982 982 return self._pcache[id]
983 983
984 984 if len(id) < 40:
985 985 try:
986 986 # hex(node)[:...]
987 987 l = len(id) // 2 # grab an even number of digits
988 988 prefix = bin(id[:l * 2])
989 989 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
990 990 nl = [n for n in nl if hex(n).startswith(id) and
991 991 self.hasnode(n)]
992 992 if len(nl) > 0:
993 993 if len(nl) == 1:
994 994 self._pcache[id] = nl[0]
995 995 return nl[0]
996 996 raise LookupError(id, self.indexfile,
997 997 _('ambiguous identifier'))
998 998 return None
999 999 except TypeError:
1000 1000 pass
1001 1001
1002 1002 def lookup(self, id):
1003 1003 """locate a node based on:
1004 1004 - revision number or str(revision number)
1005 1005 - nodeid or subset of hex nodeid
1006 1006 """
1007 1007 n = self._match(id)
1008 1008 if n is not None:
1009 1009 return n
1010 1010 n = self._partialmatch(id)
1011 1011 if n:
1012 1012 return n
1013 1013
1014 1014 raise LookupError(id, self.indexfile, _('no match found'))
1015 1015
1016 1016 def cmp(self, node, text):
1017 1017 """compare text with a given file revision
1018 1018
1019 1019 returns True if text is different than what is stored.
1020 1020 """
1021 1021 p1, p2 = self.parents(node)
1022 1022 return hash(text, p1, p2) != node
1023 1023
1024 1024 def _addchunk(self, offset, data):
1025 1025 """Add a segment to the revlog cache.
1026 1026
1027 1027 Accepts an absolute offset and the data that is at that location.
1028 1028 """
1029 1029 o, d = self._chunkcache
1030 1030 # try to add to existing cache
1031 1031 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1032 1032 self._chunkcache = o, d + data
1033 1033 else:
1034 1034 self._chunkcache = offset, data
1035 1035
1036 1036 def _loadchunk(self, offset, length, df=None):
1037 1037 """Load a segment of raw data from the revlog.
1038 1038
1039 1039 Accepts an absolute offset, length to read, and an optional existing
1040 1040 file handle to read from.
1041 1041
1042 1042 If an existing file handle is passed, it will be seeked and the
1043 1043 original seek position will NOT be restored.
1044 1044
1045 1045 Returns a str or buffer of raw byte data.
1046 1046 """
1047 1047 if df is not None:
1048 1048 closehandle = False
1049 1049 else:
1050 1050 if self._inline:
1051 1051 df = self.opener(self.indexfile)
1052 1052 else:
1053 1053 df = self.opener(self.datafile)
1054 1054 closehandle = True
1055 1055
1056 1056 # Cache data both forward and backward around the requested
1057 1057 # data, in a fixed size window. This helps speed up operations
1058 1058 # involving reading the revlog backwards.
1059 1059 cachesize = self._chunkcachesize
1060 1060 realoffset = offset & ~(cachesize - 1)
1061 1061 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1062 1062 - realoffset)
1063 1063 df.seek(realoffset)
1064 1064 d = df.read(reallength)
1065 1065 if closehandle:
1066 1066 df.close()
1067 1067 self._addchunk(realoffset, d)
1068 1068 if offset != realoffset or reallength != length:
1069 1069 return util.buffer(d, offset - realoffset, length)
1070 1070 return d
1071 1071
1072 1072 def _getchunk(self, offset, length, df=None):
1073 1073 """Obtain a segment of raw data from the revlog.
1074 1074
1075 1075 Accepts an absolute offset, length of bytes to obtain, and an
1076 1076 optional file handle to the already-opened revlog. If the file
1077 1077 handle is used, it's original seek position will not be preserved.
1078 1078
1079 1079 Requests for data may be returned from a cache.
1080 1080
1081 1081 Returns a str or a buffer instance of raw byte data.
1082 1082 """
1083 1083 o, d = self._chunkcache
1084 1084 l = len(d)
1085 1085
1086 1086 # is it in the cache?
1087 1087 cachestart = offset - o
1088 1088 cacheend = cachestart + length
1089 1089 if cachestart >= 0 and cacheend <= l:
1090 1090 if cachestart == 0 and cacheend == l:
1091 1091 return d # avoid a copy
1092 1092 return util.buffer(d, cachestart, cacheend - cachestart)
1093 1093
1094 1094 return self._loadchunk(offset, length, df=df)
1095 1095
1096 1096 def _chunkraw(self, startrev, endrev, df=None):
1097 1097 """Obtain a segment of raw data corresponding to a range of revisions.
1098 1098
1099 1099 Accepts the start and end revisions and an optional already-open
1100 1100 file handle to be used for reading. If the file handle is read, its
1101 1101 seek position will not be preserved.
1102 1102
1103 1103 Requests for data may be satisfied by a cache.
1104 1104
1105 1105 Returns a 2-tuple of (offset, data) for the requested range of
1106 1106 revisions. Offset is the integer offset from the beginning of the
1107 1107 revlog and data is a str or buffer of the raw byte data.
1108 1108
1109 1109 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1110 1110 to determine where each revision's data begins and ends.
1111 1111 """
1112 start = self.start(startrev)
1113 end = self.end(endrev)
1112 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1113 # (functions are expensive).
1114 index = self.index
1115 istart = index[startrev]
1116 iend = index[endrev]
1117 start = int(istart[0] >> 16)
1118 end = int(iend[0] >> 16) + iend[1]
1119
1114 1120 if self._inline:
1115 1121 start += (startrev + 1) * self._io.size
1116 1122 end += (endrev + 1) * self._io.size
1117 1123 length = end - start
1118 1124
1119 1125 return start, self._getchunk(start, length, df=df)
1120 1126
1121 1127 def _chunk(self, rev, df=None):
1122 1128 """Obtain a single decompressed chunk for a revision.
1123 1129
1124 1130 Accepts an integer revision and an optional already-open file handle
1125 1131 to be used for reading. If used, the seek position of the file will not
1126 1132 be preserved.
1127 1133
1128 1134 Returns a str holding uncompressed data for the requested revision.
1129 1135 """
1130 1136 return decompress(self._chunkraw(rev, rev, df=df)[1])
1131 1137
1132 1138 def _chunks(self, revs, df=None):
1133 1139 """Obtain decompressed chunks for the specified revisions.
1134 1140
1135 1141 Accepts an iterable of numeric revisions that are assumed to be in
1136 1142 ascending order. Also accepts an optional already-open file handle
1137 1143 to be used for reading. If used, the seek position of the file will
1138 1144 not be preserved.
1139 1145
1140 1146 This function is similar to calling ``self._chunk()`` multiple times,
1141 1147 but is faster.
1142 1148
1143 1149 Returns a list with decompressed data for each requested revision.
1144 1150 """
1145 1151 if not revs:
1146 1152 return []
1147 1153 start = self.start
1148 1154 length = self.length
1149 1155 inline = self._inline
1150 1156 iosize = self._io.size
1151 1157 buffer = util.buffer
1152 1158
1153 1159 l = []
1154 1160 ladd = l.append
1155 1161
1156 1162 try:
1157 1163 offset, data = self._chunkraw(revs[0], revs[-1], df=df)
1158 1164 except OverflowError:
1159 1165 # issue4215 - we can't cache a run of chunks greater than
1160 1166 # 2G on Windows
1161 1167 return [self._chunk(rev, df=df) for rev in revs]
1162 1168
1163 1169 for rev in revs:
1164 1170 chunkstart = start(rev)
1165 1171 if inline:
1166 1172 chunkstart += (rev + 1) * iosize
1167 1173 chunklength = length(rev)
1168 1174 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1169 1175
1170 1176 return l
1171 1177
1172 1178 def _chunkclear(self):
1173 1179 """Clear the raw chunk cache."""
1174 1180 self._chunkcache = (0, '')
1175 1181
1176 1182 def deltaparent(self, rev):
1177 1183 """return deltaparent of the given revision"""
1178 1184 base = self.index[rev][3]
1179 1185 if base == rev:
1180 1186 return nullrev
1181 1187 elif self._generaldelta:
1182 1188 return base
1183 1189 else:
1184 1190 return rev - 1
1185 1191
1186 1192 def revdiff(self, rev1, rev2):
1187 1193 """return or calculate a delta between two revisions"""
1188 1194 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1189 1195 return str(self._chunk(rev2))
1190 1196
1191 1197 return mdiff.textdiff(self.revision(rev1),
1192 1198 self.revision(rev2))
1193 1199
1194 1200 def revision(self, nodeorrev, _df=None):
1195 1201 """return an uncompressed revision of a given node or revision
1196 1202 number.
1197 1203
1198 1204 _df is an existing file handle to read from. It is meant to only be
1199 1205 used internally.
1200 1206 """
1201 1207 if isinstance(nodeorrev, int):
1202 1208 rev = nodeorrev
1203 1209 node = self.node(rev)
1204 1210 else:
1205 1211 node = nodeorrev
1206 1212 rev = None
1207 1213
1208 1214 cachedrev = None
1209 1215 if node == nullid:
1210 1216 return ""
1211 1217 if self._cache:
1212 1218 if self._cache[0] == node:
1213 1219 return self._cache[2]
1214 1220 cachedrev = self._cache[1]
1215 1221
1216 1222 # look up what we need to read
1217 1223 text = None
1218 1224 if rev is None:
1219 1225 rev = self.rev(node)
1220 1226
1221 1227 # check rev flags
1222 1228 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1223 1229 raise RevlogError(_('incompatible revision flag %x') %
1224 1230 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1225 1231
1226 1232 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1227 1233 if stopped:
1228 1234 text = self._cache[2]
1229 1235
1230 1236 # drop cache to save memory
1231 1237 self._cache = None
1232 1238
1233 1239 bins = self._chunks(chain, df=_df)
1234 1240 if text is None:
1235 1241 text = str(bins[0])
1236 1242 bins = bins[1:]
1237 1243
1238 1244 text = mdiff.patches(text, bins)
1239 1245
1240 1246 text = self._checkhash(text, node, rev)
1241 1247
1242 1248 self._cache = (node, rev, text)
1243 1249 return text
1244 1250
1245 1251 def hash(self, text, p1, p2):
1246 1252 """Compute a node hash.
1247 1253
1248 1254 Available as a function so that subclasses can replace the hash
1249 1255 as needed.
1250 1256 """
1251 1257 return hash(text, p1, p2)
1252 1258
1253 1259 def _checkhash(self, text, node, rev):
1254 1260 p1, p2 = self.parents(node)
1255 1261 self.checkhash(text, p1, p2, node, rev)
1256 1262 return text
1257 1263
1258 1264 def checkhash(self, text, p1, p2, node, rev=None):
1259 1265 if node != self.hash(text, p1, p2):
1260 1266 revornode = rev
1261 1267 if revornode is None:
1262 1268 revornode = templatefilters.short(hex(node))
1263 1269 raise RevlogError(_("integrity check failed on %s:%s")
1264 1270 % (self.indexfile, revornode))
1265 1271
1266 1272 def checkinlinesize(self, tr, fp=None):
1267 1273 """Check if the revlog is too big for inline and convert if so.
1268 1274
1269 1275 This should be called after revisions are added to the revlog. If the
1270 1276 revlog has grown too large to be an inline revlog, it will convert it
1271 1277 to use multiple index and data files.
1272 1278 """
1273 1279 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1274 1280 return
1275 1281
1276 1282 trinfo = tr.find(self.indexfile)
1277 1283 if trinfo is None:
1278 1284 raise RevlogError(_("%s not found in the transaction")
1279 1285 % self.indexfile)
1280 1286
1281 1287 trindex = trinfo[2]
1282 1288 if trindex is not None:
1283 1289 dataoff = self.start(trindex)
1284 1290 else:
1285 1291 # revlog was stripped at start of transaction, use all leftover data
1286 1292 trindex = len(self) - 1
1287 1293 dataoff = self.end(-2)
1288 1294
1289 1295 tr.add(self.datafile, dataoff)
1290 1296
1291 1297 if fp:
1292 1298 fp.flush()
1293 1299 fp.close()
1294 1300
1295 1301 df = self.opener(self.datafile, 'w')
1296 1302 try:
1297 1303 for r in self:
1298 1304 df.write(self._chunkraw(r, r)[1])
1299 1305 finally:
1300 1306 df.close()
1301 1307
1302 1308 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1303 1309 checkambig=self._checkambig)
1304 1310 self.version &= ~(REVLOGNGINLINEDATA)
1305 1311 self._inline = False
1306 1312 for i in self:
1307 1313 e = self._io.packentry(self.index[i], self.node, self.version, i)
1308 1314 fp.write(e)
1309 1315
1310 1316 # if we don't call close, the temp file will never replace the
1311 1317 # real index
1312 1318 fp.close()
1313 1319
1314 1320 tr.replace(self.indexfile, trindex * self._io.size)
1315 1321 self._chunkclear()
1316 1322
1317 1323 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1318 1324 node=None):
1319 1325 """add a revision to the log
1320 1326
1321 1327 text - the revision data to add
1322 1328 transaction - the transaction object used for rollback
1323 1329 link - the linkrev data to add
1324 1330 p1, p2 - the parent nodeids of the revision
1325 1331 cachedelta - an optional precomputed delta
1326 1332 node - nodeid of revision; typically node is not specified, and it is
1327 1333 computed by default as hash(text, p1, p2), however subclasses might
1328 1334 use different hashing method (and override checkhash() in such case)
1329 1335 """
1330 1336 if link == nullrev:
1331 1337 raise RevlogError(_("attempted to add linkrev -1 to %s")
1332 1338 % self.indexfile)
1333 1339
1334 1340 if len(text) > _maxentrysize:
1335 1341 raise RevlogError(
1336 1342 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1337 1343 % (self.indexfile, len(text)))
1338 1344
1339 1345 node = node or self.hash(text, p1, p2)
1340 1346 if node in self.nodemap:
1341 1347 return node
1342 1348
1343 1349 dfh = None
1344 1350 if not self._inline:
1345 1351 dfh = self.opener(self.datafile, "a+")
1346 1352 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1347 1353 try:
1348 1354 return self._addrevision(node, text, transaction, link, p1, p2,
1349 1355 REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
1350 1356 finally:
1351 1357 if dfh:
1352 1358 dfh.close()
1353 1359 ifh.close()
1354 1360
1355 1361 def compress(self, text):
1356 1362 """ generate a possibly-compressed representation of text """
1357 1363 if not text:
1358 1364 return ("", text)
1359 1365 l = len(text)
1360 1366 bin = None
1361 1367 if l < 44:
1362 1368 pass
1363 1369 elif l > 1000000:
1364 1370 # zlib makes an internal copy, thus doubling memory usage for
1365 1371 # large files, so lets do this in pieces
1366 1372 z = zlib.compressobj()
1367 1373 p = []
1368 1374 pos = 0
1369 1375 while pos < l:
1370 1376 pos2 = pos + 2**20
1371 1377 p.append(z.compress(text[pos:pos2]))
1372 1378 pos = pos2
1373 1379 p.append(z.flush())
1374 1380 if sum(map(len, p)) < l:
1375 1381 bin = "".join(p)
1376 1382 else:
1377 1383 bin = _compress(text)
1378 1384 if bin is None or len(bin) > l:
1379 1385 if text[0] == '\0':
1380 1386 return ("", text)
1381 1387 return ('u', text)
1382 1388 return ("", bin)
1383 1389
1384 1390 def _isgooddelta(self, d, textlen):
1385 1391 """Returns True if the given delta is good. Good means that it is within
1386 1392 the disk span, disk size, and chain length bounds that we know to be
1387 1393 performant."""
1388 1394 if d is None:
1389 1395 return False
1390 1396
1391 1397 # - 'dist' is the distance from the base revision -- bounding it limits
1392 1398 # the amount of I/O we need to do.
1393 1399 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1394 1400 # to apply -- bounding it limits the amount of CPU we consume.
1395 1401 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1396 1402 if (dist > textlen * 4 or l > textlen or
1397 1403 compresseddeltalen > textlen * 2 or
1398 1404 (self._maxchainlen and chainlen > self._maxchainlen)):
1399 1405 return False
1400 1406
1401 1407 return True
1402 1408
1403 1409 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1404 1410 cachedelta, ifh, dfh, alwayscache=False):
1405 1411 """internal function to add revisions to the log
1406 1412
1407 1413 see addrevision for argument descriptions.
1408 1414 invariants:
1409 1415 - text is optional (can be None); if not set, cachedelta must be set.
1410 1416 if both are set, they must correspond to each other.
1411 1417 """
1412 1418 btext = [text]
1413 1419 def buildtext():
1414 1420 if btext[0] is not None:
1415 1421 return btext[0]
1416 1422 baserev = cachedelta[0]
1417 1423 delta = cachedelta[1]
1418 1424 # special case deltas which replace entire base; no need to decode
1419 1425 # base revision. this neatly avoids censored bases, which throw when
1420 1426 # they're decoded.
1421 1427 hlen = struct.calcsize(">lll")
1422 1428 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1423 1429 len(delta) - hlen):
1424 1430 btext[0] = delta[hlen:]
1425 1431 else:
1426 1432 if self._inline:
1427 1433 fh = ifh
1428 1434 else:
1429 1435 fh = dfh
1430 1436 basetext = self.revision(self.node(baserev), _df=fh)
1431 1437 btext[0] = mdiff.patch(basetext, delta)
1432 1438 try:
1433 1439 self.checkhash(btext[0], p1, p2, node)
1434 1440 if flags & REVIDX_ISCENSORED:
1435 1441 raise RevlogError(_('node %s is not censored') % node)
1436 1442 except CensoredNodeError:
1437 1443 # must pass the censored index flag to add censored revisions
1438 1444 if not flags & REVIDX_ISCENSORED:
1439 1445 raise
1440 1446 return btext[0]
1441 1447
1442 1448 def builddelta(rev):
1443 1449 # can we use the cached delta?
1444 1450 if cachedelta and cachedelta[0] == rev:
1445 1451 delta = cachedelta[1]
1446 1452 else:
1447 1453 t = buildtext()
1448 1454 if self.iscensored(rev):
1449 1455 # deltas based on a censored revision must replace the
1450 1456 # full content in one patch, so delta works everywhere
1451 1457 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1452 1458 delta = header + t
1453 1459 else:
1454 1460 if self._inline:
1455 1461 fh = ifh
1456 1462 else:
1457 1463 fh = dfh
1458 1464 ptext = self.revision(self.node(rev), _df=fh)
1459 1465 delta = mdiff.textdiff(ptext, t)
1460 1466 header, data = self.compress(delta)
1461 1467 deltalen = len(header) + len(data)
1462 1468 chainbase = self.chainbase(rev)
1463 1469 dist = deltalen + offset - self.start(chainbase)
1464 1470 if self._generaldelta:
1465 1471 base = rev
1466 1472 else:
1467 1473 base = chainbase
1468 1474 chainlen, compresseddeltalen = self._chaininfo(rev)
1469 1475 chainlen += 1
1470 1476 compresseddeltalen += deltalen
1471 1477 return (dist, deltalen, (header, data), base,
1472 1478 chainbase, chainlen, compresseddeltalen)
1473 1479
1474 1480 curr = len(self)
1475 1481 prev = curr - 1
1476 1482 offset = self.end(prev)
1477 1483 delta = None
1478 1484 p1r, p2r = self.rev(p1), self.rev(p2)
1479 1485
1480 1486 # full versions are inserted when the needed deltas
1481 1487 # become comparable to the uncompressed text
1482 1488 if text is None:
1483 1489 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1484 1490 cachedelta[1])
1485 1491 else:
1486 1492 textlen = len(text)
1487 1493
1488 1494 # should we try to build a delta?
1489 1495 if prev != nullrev and self.storedeltachains:
1490 1496 tested = set()
1491 1497 # This condition is true most of the time when processing
1492 1498 # changegroup data into a generaldelta repo. The only time it
1493 1499 # isn't true is if this is the first revision in a delta chain
1494 1500 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
1495 1501 if cachedelta and self._generaldelta and self._lazydeltabase:
1496 1502 # Assume what we received from the server is a good choice
1497 1503 # build delta will reuse the cache
1498 1504 candidatedelta = builddelta(cachedelta[0])
1499 1505 tested.add(cachedelta[0])
1500 1506 if self._isgooddelta(candidatedelta, textlen):
1501 1507 delta = candidatedelta
1502 1508 if delta is None and self._generaldelta:
1503 1509 # exclude already lazy tested base if any
1504 1510 parents = [p for p in (p1r, p2r)
1505 1511 if p != nullrev and p not in tested]
1506 1512 if parents and not self._aggressivemergedeltas:
1507 1513 # Pick whichever parent is closer to us (to minimize the
1508 1514 # chance of having to build a fulltext).
1509 1515 parents = [max(parents)]
1510 1516 tested.update(parents)
1511 1517 pdeltas = []
1512 1518 for p in parents:
1513 1519 pd = builddelta(p)
1514 1520 if self._isgooddelta(pd, textlen):
1515 1521 pdeltas.append(pd)
1516 1522 if pdeltas:
1517 1523 delta = min(pdeltas, key=lambda x: x[1])
1518 1524 if delta is None and prev not in tested:
1519 1525 # other approach failed try against prev to hopefully save us a
1520 1526 # fulltext.
1521 1527 candidatedelta = builddelta(prev)
1522 1528 if self._isgooddelta(candidatedelta, textlen):
1523 1529 delta = candidatedelta
1524 1530 if delta is not None:
1525 1531 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1526 1532 else:
1527 1533 text = buildtext()
1528 1534 data = self.compress(text)
1529 1535 l = len(data[1]) + len(data[0])
1530 1536 base = chainbase = curr
1531 1537
1532 1538 e = (offset_type(offset, flags), l, textlen,
1533 1539 base, link, p1r, p2r, node)
1534 1540 self.index.insert(-1, e)
1535 1541 self.nodemap[node] = curr
1536 1542
1537 1543 entry = self._io.packentry(e, self.node, self.version, curr)
1538 1544 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1539 1545
1540 1546 if alwayscache and text is None:
1541 1547 text = buildtext()
1542 1548
1543 1549 if type(text) == str: # only accept immutable objects
1544 1550 self._cache = (node, curr, text)
1545 1551 self._chainbasecache[curr] = chainbase
1546 1552 return node
1547 1553
1548 1554 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1549 1555 # Files opened in a+ mode have inconsistent behavior on various
1550 1556 # platforms. Windows requires that a file positioning call be made
1551 1557 # when the file handle transitions between reads and writes. See
1552 1558 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1553 1559 # platforms, Python or the platform itself can be buggy. Some versions
1554 1560 # of Solaris have been observed to not append at the end of the file
1555 1561 # if the file was seeked to before the end. See issue4943 for more.
1556 1562 #
1557 1563 # We work around this issue by inserting a seek() before writing.
1558 1564 # Note: This is likely not necessary on Python 3.
1559 1565 ifh.seek(0, os.SEEK_END)
1560 1566 if dfh:
1561 1567 dfh.seek(0, os.SEEK_END)
1562 1568
1563 1569 curr = len(self) - 1
1564 1570 if not self._inline:
1565 1571 transaction.add(self.datafile, offset)
1566 1572 transaction.add(self.indexfile, curr * len(entry))
1567 1573 if data[0]:
1568 1574 dfh.write(data[0])
1569 1575 dfh.write(data[1])
1570 1576 ifh.write(entry)
1571 1577 else:
1572 1578 offset += curr * self._io.size
1573 1579 transaction.add(self.indexfile, offset, curr)
1574 1580 ifh.write(entry)
1575 1581 ifh.write(data[0])
1576 1582 ifh.write(data[1])
1577 1583 self.checkinlinesize(transaction, ifh)
1578 1584
1579 1585 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1580 1586 """
1581 1587 add a delta group
1582 1588
1583 1589 given a set of deltas, add them to the revision log. the
1584 1590 first delta is against its parent, which should be in our
1585 1591 log, the rest are against the previous delta.
1586 1592
1587 1593 If ``addrevisioncb`` is defined, it will be called with arguments of
1588 1594 this revlog and the node that was added.
1589 1595 """
1590 1596
1591 1597 # track the base of the current delta log
1592 1598 content = []
1593 1599 node = None
1594 1600
1595 1601 r = len(self)
1596 1602 end = 0
1597 1603 if r:
1598 1604 end = self.end(r - 1)
1599 1605 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1600 1606 isize = r * self._io.size
1601 1607 if self._inline:
1602 1608 transaction.add(self.indexfile, end + isize, r)
1603 1609 dfh = None
1604 1610 else:
1605 1611 transaction.add(self.indexfile, isize, r)
1606 1612 transaction.add(self.datafile, end)
1607 1613 dfh = self.opener(self.datafile, "a+")
1608 1614 def flush():
1609 1615 if dfh:
1610 1616 dfh.flush()
1611 1617 ifh.flush()
1612 1618 try:
1613 1619 # loop through our set of deltas
1614 1620 chain = None
1615 1621 for chunkdata in iter(lambda: cg.deltachunk(chain), {}):
1616 1622 node = chunkdata['node']
1617 1623 p1 = chunkdata['p1']
1618 1624 p2 = chunkdata['p2']
1619 1625 cs = chunkdata['cs']
1620 1626 deltabase = chunkdata['deltabase']
1621 1627 delta = chunkdata['delta']
1622 1628 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1623 1629
1624 1630 content.append(node)
1625 1631
1626 1632 link = linkmapper(cs)
1627 1633 if node in self.nodemap:
1628 1634 # this can happen if two branches make the same change
1629 1635 chain = node
1630 1636 continue
1631 1637
1632 1638 for p in (p1, p2):
1633 1639 if p not in self.nodemap:
1634 1640 raise LookupError(p, self.indexfile,
1635 1641 _('unknown parent'))
1636 1642
1637 1643 if deltabase not in self.nodemap:
1638 1644 raise LookupError(deltabase, self.indexfile,
1639 1645 _('unknown delta base'))
1640 1646
1641 1647 baserev = self.rev(deltabase)
1642 1648
1643 1649 if baserev != nullrev and self.iscensored(baserev):
1644 1650 # if base is censored, delta must be full replacement in a
1645 1651 # single patch operation
1646 1652 hlen = struct.calcsize(">lll")
1647 1653 oldlen = self.rawsize(baserev)
1648 1654 newlen = len(delta) - hlen
1649 1655 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1650 1656 raise error.CensoredBaseError(self.indexfile,
1651 1657 self.node(baserev))
1652 1658
1653 1659 if not flags and self._peek_iscensored(baserev, delta, flush):
1654 1660 flags |= REVIDX_ISCENSORED
1655 1661
1656 1662 # We assume consumers of addrevisioncb will want to retrieve
1657 1663 # the added revision, which will require a call to
1658 1664 # revision(). revision() will fast path if there is a cache
1659 1665 # hit. So, we tell _addrevision() to always cache in this case.
1660 1666 chain = self._addrevision(node, None, transaction, link,
1661 1667 p1, p2, flags, (baserev, delta),
1662 1668 ifh, dfh,
1663 1669 alwayscache=bool(addrevisioncb))
1664 1670
1665 1671 if addrevisioncb:
1666 1672 addrevisioncb(self, chain)
1667 1673
1668 1674 if not dfh and not self._inline:
1669 1675 # addrevision switched from inline to conventional
1670 1676 # reopen the index
1671 1677 ifh.close()
1672 1678 dfh = self.opener(self.datafile, "a+")
1673 1679 ifh = self.opener(self.indexfile, "a+",
1674 1680 checkambig=self._checkambig)
1675 1681 finally:
1676 1682 if dfh:
1677 1683 dfh.close()
1678 1684 ifh.close()
1679 1685
1680 1686 return content
1681 1687
1682 1688 def iscensored(self, rev):
1683 1689 """Check if a file revision is censored."""
1684 1690 return False
1685 1691
1686 1692 def _peek_iscensored(self, baserev, delta, flush):
1687 1693 """Quickly check if a delta produces a censored revision."""
1688 1694 return False
1689 1695
1690 1696 def getstrippoint(self, minlink):
1691 1697 """find the minimum rev that must be stripped to strip the linkrev
1692 1698
1693 1699 Returns a tuple containing the minimum rev and a set of all revs that
1694 1700 have linkrevs that will be broken by this strip.
1695 1701 """
1696 1702 brokenrevs = set()
1697 1703 strippoint = len(self)
1698 1704
1699 1705 heads = {}
1700 1706 futurelargelinkrevs = set()
1701 1707 for head in self.headrevs():
1702 1708 headlinkrev = self.linkrev(head)
1703 1709 heads[head] = headlinkrev
1704 1710 if headlinkrev >= minlink:
1705 1711 futurelargelinkrevs.add(headlinkrev)
1706 1712
1707 1713 # This algorithm involves walking down the rev graph, starting at the
1708 1714 # heads. Since the revs are topologically sorted according to linkrev,
1709 1715 # once all head linkrevs are below the minlink, we know there are
1710 1716 # no more revs that could have a linkrev greater than minlink.
1711 1717 # So we can stop walking.
1712 1718 while futurelargelinkrevs:
1713 1719 strippoint -= 1
1714 1720 linkrev = heads.pop(strippoint)
1715 1721
1716 1722 if linkrev < minlink:
1717 1723 brokenrevs.add(strippoint)
1718 1724 else:
1719 1725 futurelargelinkrevs.remove(linkrev)
1720 1726
1721 1727 for p in self.parentrevs(strippoint):
1722 1728 if p != nullrev:
1723 1729 plinkrev = self.linkrev(p)
1724 1730 heads[p] = plinkrev
1725 1731 if plinkrev >= minlink:
1726 1732 futurelargelinkrevs.add(plinkrev)
1727 1733
1728 1734 return strippoint, brokenrevs
1729 1735
1730 1736 def strip(self, minlink, transaction):
1731 1737 """truncate the revlog on the first revision with a linkrev >= minlink
1732 1738
1733 1739 This function is called when we're stripping revision minlink and
1734 1740 its descendants from the repository.
1735 1741
1736 1742 We have to remove all revisions with linkrev >= minlink, because
1737 1743 the equivalent changelog revisions will be renumbered after the
1738 1744 strip.
1739 1745
1740 1746 So we truncate the revlog on the first of these revisions, and
1741 1747 trust that the caller has saved the revisions that shouldn't be
1742 1748 removed and that it'll re-add them after this truncation.
1743 1749 """
1744 1750 if len(self) == 0:
1745 1751 return
1746 1752
1747 1753 rev, _ = self.getstrippoint(minlink)
1748 1754 if rev == len(self):
1749 1755 return
1750 1756
1751 1757 # first truncate the files on disk
1752 1758 end = self.start(rev)
1753 1759 if not self._inline:
1754 1760 transaction.add(self.datafile, end)
1755 1761 end = rev * self._io.size
1756 1762 else:
1757 1763 end += rev * self._io.size
1758 1764
1759 1765 transaction.add(self.indexfile, end)
1760 1766
1761 1767 # then reset internal state in memory to forget those revisions
1762 1768 self._cache = None
1763 1769 self._chaininfocache = {}
1764 1770 self._chunkclear()
1765 1771 for x in xrange(rev, len(self)):
1766 1772 del self.nodemap[self.node(x)]
1767 1773
1768 1774 del self.index[rev:-1]
1769 1775
1770 1776 def checksize(self):
1771 1777 expected = 0
1772 1778 if len(self):
1773 1779 expected = max(0, self.end(len(self) - 1))
1774 1780
1775 1781 try:
1776 1782 f = self.opener(self.datafile)
1777 1783 f.seek(0, 2)
1778 1784 actual = f.tell()
1779 1785 f.close()
1780 1786 dd = actual - expected
1781 1787 except IOError as inst:
1782 1788 if inst.errno != errno.ENOENT:
1783 1789 raise
1784 1790 dd = 0
1785 1791
1786 1792 try:
1787 1793 f = self.opener(self.indexfile)
1788 1794 f.seek(0, 2)
1789 1795 actual = f.tell()
1790 1796 f.close()
1791 1797 s = self._io.size
1792 1798 i = max(0, actual // s)
1793 1799 di = actual - (i * s)
1794 1800 if self._inline:
1795 1801 databytes = 0
1796 1802 for r in self:
1797 1803 databytes += max(0, self.length(r))
1798 1804 dd = 0
1799 1805 di = actual - len(self) * s - databytes
1800 1806 except IOError as inst:
1801 1807 if inst.errno != errno.ENOENT:
1802 1808 raise
1803 1809 di = 0
1804 1810
1805 1811 return (dd, di)
1806 1812
1807 1813 def files(self):
1808 1814 res = [self.indexfile]
1809 1815 if not self._inline:
1810 1816 res.append(self.datafile)
1811 1817 return res
General Comments 0
You need to be logged in to leave comments. Login now