##// END OF EJS Templates
revlog: group delta computation methods under _deltacomputer object...
Paul Morelle -
r35756:f90f6fd1 default
parent child Browse files
Show More
@@ -1,2466 +1,2491 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import binascii
17 17 import collections
18 18 import errno
19 19 import hashlib
20 20 import heapq
21 21 import os
22 22 import struct
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullid,
30 30 nullrev,
31 31 wdirhex,
32 32 wdirid,
33 33 wdirrev,
34 34 )
35 35 from .i18n import _
36 36 from .thirdparty import (
37 37 attr,
38 38 )
39 39 from . import (
40 40 ancestor,
41 41 error,
42 42 mdiff,
43 43 policy,
44 44 pycompat,
45 45 templatefilters,
46 46 util,
47 47 )
48 48
49 49 parsers = policy.importmod(r'parsers')
50 50
51 51 # Aliased for performance.
52 52 _zlibdecompress = zlib.decompress
53 53
54 54 # revlog header flags
55 55 REVLOGV0 = 0
56 56 REVLOGV1 = 1
57 57 # Dummy value until file format is finalized.
58 58 # Reminder: change the bounds check in revlog.__init__ when this is changed.
59 59 REVLOGV2 = 0xDEAD
60 60 FLAG_INLINE_DATA = (1 << 16)
61 61 FLAG_GENERALDELTA = (1 << 17)
62 62 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
63 63 REVLOG_DEFAULT_FORMAT = REVLOGV1
64 64 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
65 65 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
66 66 REVLOGV2_FLAGS = REVLOGV1_FLAGS
67 67
68 68 # revlog index flags
69 69 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
70 70 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
71 71 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
72 72 REVIDX_DEFAULT_FLAGS = 0
73 73 # stable order in which flags need to be processed and their processors applied
74 74 REVIDX_FLAGS_ORDER = [
75 75 REVIDX_ISCENSORED,
76 76 REVIDX_ELLIPSIS,
77 77 REVIDX_EXTSTORED,
78 78 ]
79 79 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
80 80
81 81 # max size of revlog with inline data
82 82 _maxinline = 131072
83 83 _chunksize = 1048576
84 84
85 85 RevlogError = error.RevlogError
86 86 LookupError = error.LookupError
87 87 CensoredNodeError = error.CensoredNodeError
88 88 ProgrammingError = error.ProgrammingError
89 89
90 90 # Store flag processors (cf. 'addflagprocessor()' to register)
91 91 _flagprocessors = {
92 92 REVIDX_ISCENSORED: None,
93 93 }
94 94
95 95 def addflagprocessor(flag, processor):
96 96 """Register a flag processor on a revision data flag.
97 97
98 98 Invariant:
99 99 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
100 100 - Only one flag processor can be registered on a specific flag.
101 101 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
102 102 following signatures:
103 103 - (read) f(self, rawtext) -> text, bool
104 104 - (write) f(self, text) -> rawtext, bool
105 105 - (raw) f(self, rawtext) -> bool
106 106 "text" is presented to the user. "rawtext" is stored in revlog data, not
107 107 directly visible to the user.
108 108 The boolean returned by these transforms is used to determine whether
109 109 the returned text can be used for hash integrity checking. For example,
110 110 if "write" returns False, then "text" is used to generate hash. If
111 111 "write" returns True, that basically means "rawtext" returned by "write"
112 112 should be used to generate hash. Usually, "write" and "read" return
113 113 different booleans. And "raw" returns a same boolean as "write".
114 114
115 115 Note: The 'raw' transform is used for changegroup generation and in some
116 116 debug commands. In this case the transform only indicates whether the
117 117 contents can be used for hash integrity checks.
118 118 """
119 119 if not flag & REVIDX_KNOWN_FLAGS:
120 120 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
121 121 raise ProgrammingError(msg)
122 122 if flag not in REVIDX_FLAGS_ORDER:
123 123 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
124 124 raise ProgrammingError(msg)
125 125 if flag in _flagprocessors:
126 126 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
127 127 raise error.Abort(msg)
128 128 _flagprocessors[flag] = processor
129 129
130 130 def getoffset(q):
131 131 return int(q >> 16)
132 132
133 133 def gettype(q):
134 134 return int(q & 0xFFFF)
135 135
136 136 def offset_type(offset, type):
137 137 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
138 138 raise ValueError('unknown revlog index flags')
139 139 return int(int(offset) << 16 | type)
140 140
141 141 _nullhash = hashlib.sha1(nullid)
142 142
143 143 def hash(text, p1, p2):
144 144 """generate a hash from the given text and its parent hashes
145 145
146 146 This hash combines both the current file contents and its history
147 147 in a manner that makes it easy to distinguish nodes with the same
148 148 content in the revision graph.
149 149 """
150 150 # As of now, if one of the parent node is null, p2 is null
151 151 if p2 == nullid:
152 152 # deep copy of a hash is faster than creating one
153 153 s = _nullhash.copy()
154 154 s.update(p1)
155 155 else:
156 156 # none of the parent nodes are nullid
157 157 if p1 < p2:
158 158 a = p1
159 159 b = p2
160 160 else:
161 161 a = p2
162 162 b = p1
163 163 s = hashlib.sha1(a)
164 164 s.update(b)
165 165 s.update(text)
166 166 return s.digest()
167 167
168 168 def _trimchunk(revlog, revs, startidx, endidx=None):
169 169 """returns revs[startidx:endidx] without empty trailing revs
170 170 """
171 171 length = revlog.length
172 172
173 173 if endidx is None:
174 174 endidx = len(revs)
175 175
176 176 # Trim empty revs at the end, but never the very first revision of a chain
177 177 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
178 178 endidx -= 1
179 179
180 180 return revs[startidx:endidx]
181 181
182 182 def _slicechunk(revlog, revs):
183 183 """slice revs to reduce the amount of unrelated data to be read from disk.
184 184
185 185 ``revs`` is sliced into groups that should be read in one time.
186 186 Assume that revs are sorted.
187 187 """
188 188 start = revlog.start
189 189 length = revlog.length
190 190
191 191 if len(revs) <= 1:
192 192 yield revs
193 193 return
194 194
195 195 startbyte = start(revs[0])
196 196 endbyte = start(revs[-1]) + length(revs[-1])
197 197 readdata = deltachainspan = endbyte - startbyte
198 198
199 199 chainpayload = sum(length(r) for r in revs)
200 200
201 201 if deltachainspan:
202 202 density = chainpayload / float(deltachainspan)
203 203 else:
204 204 density = 1.0
205 205
206 206 # Store the gaps in a heap to have them sorted by decreasing size
207 207 gapsheap = []
208 208 heapq.heapify(gapsheap)
209 209 prevend = None
210 210 for i, rev in enumerate(revs):
211 211 revstart = start(rev)
212 212 revlen = length(rev)
213 213
214 214 # Skip empty revisions to form larger holes
215 215 if revlen == 0:
216 216 continue
217 217
218 218 if prevend is not None:
219 219 gapsize = revstart - prevend
220 220 # only consider holes that are large enough
221 221 if gapsize > revlog._srmingapsize:
222 222 heapq.heappush(gapsheap, (-gapsize, i))
223 223
224 224 prevend = revstart + revlen
225 225
226 226 # Collect the indices of the largest holes until the density is acceptable
227 227 indicesheap = []
228 228 heapq.heapify(indicesheap)
229 229 while gapsheap and density < revlog._srdensitythreshold:
230 230 oppgapsize, gapidx = heapq.heappop(gapsheap)
231 231
232 232 heapq.heappush(indicesheap, gapidx)
233 233
234 234 # the gap sizes are stored as negatives to be sorted decreasingly
235 235 # by the heap
236 236 readdata -= (-oppgapsize)
237 237 if readdata > 0:
238 238 density = chainpayload / float(readdata)
239 239 else:
240 240 density = 1.0
241 241
242 242 # Cut the revs at collected indices
243 243 previdx = 0
244 244 while indicesheap:
245 245 idx = heapq.heappop(indicesheap)
246 246
247 247 chunk = _trimchunk(revlog, revs, previdx, idx)
248 248 if chunk:
249 249 yield chunk
250 250
251 251 previdx = idx
252 252
253 253 chunk = _trimchunk(revlog, revs, previdx)
254 254 if chunk:
255 255 yield chunk
256 256
257 257 @attr.s(slots=True, frozen=True)
258 258 class _deltainfo(object):
259 259 distance = attr.ib()
260 260 deltalen = attr.ib()
261 261 data = attr.ib()
262 262 base = attr.ib()
263 263 chainbase = attr.ib()
264 264 chainlen = attr.ib()
265 265 compresseddeltalen = attr.ib()
266 266
267 class _deltacomputer(object):
268 def __init__(self, revlog):
269 self.revlog = revlog
270
271 def _getcandidaterevs(self, p1, p2, cachedelta):
272 """
273 Provides revisions that present an interest to be diffed against,
274 grouped by level of easiness.
275 """
276 revlog = self.revlog
277 curr = len(revlog)
278 prev = curr - 1
279 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
280
281 # should we try to build a delta?
282 if prev != nullrev and revlog.storedeltachains:
283 tested = set()
284 # This condition is true most of the time when processing
285 # changegroup data into a generaldelta repo. The only time it
286 # isn't true is if this is the first revision in a delta chain
287 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
288 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
289 # Assume what we received from the server is a good choice
290 # build delta will reuse the cache
291 yield (cachedelta[0],)
292 tested.add(cachedelta[0])
293
294 if revlog._generaldelta:
295 # exclude already lazy tested base if any
296 parents = [p for p in (p1r, p2r)
297 if p != nullrev and p not in tested]
298 if parents and not revlog._aggressivemergedeltas:
299 # Pick whichever parent is closer to us (to minimize the
300 # chance of having to build a fulltext).
301 parents = [max(parents)]
302 tested.update(parents)
303 yield parents
304
305 if prev not in tested:
306 # other approach failed try against prev to hopefully save us a
307 # fulltext.
308 yield (prev,)
309
310 def buildtext(self, revinfo, fh):
311 """Builds a fulltext version of a revision
312
313 revinfo: _revisioninfo instance that contains all needed info
314 fh: file handle to either the .i or the .d revlog file,
315 depending on whether it is inlined or not
316 """
317 btext = revinfo.btext
318 if btext[0] is not None:
319 return btext[0]
320
321 revlog = self.revlog
322 cachedelta = revinfo.cachedelta
323 flags = revinfo.flags
324 node = revinfo.node
325
326 baserev = cachedelta[0]
327 delta = cachedelta[1]
328 # special case deltas which replace entire base; no need to decode
329 # base revision. this neatly avoids censored bases, which throw when
330 # they're decoded.
331 hlen = struct.calcsize(">lll")
332 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
333 len(delta) - hlen):
334 btext[0] = delta[hlen:]
335 else:
336 basetext = revlog.revision(baserev, _df=fh, raw=True)
337 btext[0] = mdiff.patch(basetext, delta)
338
339 try:
340 res = revlog._processflags(btext[0], flags, 'read', raw=True)
341 btext[0], validatehash = res
342 if validatehash:
343 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
344 if flags & REVIDX_ISCENSORED:
345 raise RevlogError(_('node %s is not censored') % node)
346 except CensoredNodeError:
347 # must pass the censored index flag to add censored revisions
348 if not flags & REVIDX_ISCENSORED:
349 raise
350 return btext[0]
351
352 def _builddeltadiff(self, base, revinfo, fh):
353 revlog = self.revlog
354 t = self.buildtext(revinfo, fh)
355 if revlog.iscensored(base):
356 # deltas based on a censored revision must replace the
357 # full content in one patch, so delta works everywhere
358 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
359 delta = header + t
360 else:
361 ptext = revlog.revision(base, _df=fh, raw=True)
362 delta = mdiff.textdiff(ptext, t)
363
364 return delta
365
366 def _builddeltainfo(self, revinfo, base, fh):
367 # can we use the cached delta?
368 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
369 delta = revinfo.cachedelta[1]
370 else:
371 delta = self._builddeltadiff(base, revinfo, fh)
372 revlog = self.revlog
373 header, data = revlog.compress(delta)
374 deltalen = len(header) + len(data)
375 chainbase = revlog.chainbase(base)
376 offset = revlog.end(len(revlog) - 1)
377 dist = deltalen + offset - revlog.start(chainbase)
378 if revlog._generaldelta:
379 deltabase = base
380 else:
381 deltabase = chainbase
382 chainlen, compresseddeltalen = revlog._chaininfo(base)
383 chainlen += 1
384 compresseddeltalen += deltalen
385 return _deltainfo(dist, deltalen, (header, data), deltabase,
386 chainbase, chainlen, compresseddeltalen)
387
388 def finddeltainfo(self, revinfo, fh):
389 """Find an acceptable delta against a candidate revision
390
391 revinfo: information about the revision (instance of _revisioninfo)
392 fh: file handle to either the .i or the .d revlog file,
393 depending on whether it is inlined or not
394
395 Returns the first acceptable candidate revision, as ordered by
396 _getcandidaterevs
397 """
398 cachedelta = revinfo.cachedelta
399 p1 = revinfo.p1
400 p2 = revinfo.p2
401 revlog = self.revlog
402
403 deltainfo = None
404 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
405 nominateddeltas = []
406 for candidaterev in candidaterevs:
407 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
408 if revlog._isgooddeltainfo(candidatedelta, revinfo.textlen):
409 nominateddeltas.append(candidatedelta)
410 if nominateddeltas:
411 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
412 break
413
414 return deltainfo
415
267 416 @attr.s(slots=True, frozen=True)
268 417 class _revisioninfo(object):
269 418 """Information about a revision that allows building its fulltext
270 419 node: expected hash of the revision
271 420 p1, p2: parent revs of the revision
272 421 btext: built text cache consisting of a one-element list
273 422 cachedelta: (baserev, uncompressed_delta) or None
274 423 flags: flags associated to the revision storage
275 424
276 425 One of btext[0] or cachedelta must be set.
277 426 """
278 427 node = attr.ib()
279 428 p1 = attr.ib()
280 429 p2 = attr.ib()
281 430 btext = attr.ib()
282 431 textlen = attr.ib()
283 432 cachedelta = attr.ib()
284 433 flags = attr.ib()
285 434
286 435 # index v0:
287 436 # 4 bytes: offset
288 437 # 4 bytes: compressed length
289 438 # 4 bytes: base rev
290 439 # 4 bytes: link rev
291 440 # 20 bytes: parent 1 nodeid
292 441 # 20 bytes: parent 2 nodeid
293 442 # 20 bytes: nodeid
294 443 indexformatv0 = struct.Struct(">4l20s20s20s")
295 444 indexformatv0_pack = indexformatv0.pack
296 445 indexformatv0_unpack = indexformatv0.unpack
297 446
298 447 class revlogoldio(object):
299 448 def __init__(self):
300 449 self.size = indexformatv0.size
301 450
302 451 def parseindex(self, data, inline):
303 452 s = self.size
304 453 index = []
305 454 nodemap = {nullid: nullrev}
306 455 n = off = 0
307 456 l = len(data)
308 457 while off + s <= l:
309 458 cur = data[off:off + s]
310 459 off += s
311 460 e = indexformatv0_unpack(cur)
312 461 # transform to revlogv1 format
313 462 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
314 463 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
315 464 index.append(e2)
316 465 nodemap[e[6]] = n
317 466 n += 1
318 467
319 468 # add the magic null revision at -1
320 469 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
321 470
322 471 return index, nodemap, None
323 472
324 473 def packentry(self, entry, node, version, rev):
325 474 if gettype(entry[0]):
326 475 raise RevlogError(_('index entry flags need revlog version 1'))
327 476 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
328 477 node(entry[5]), node(entry[6]), entry[7])
329 478 return indexformatv0_pack(*e2)
330 479
331 480 # index ng:
332 481 # 6 bytes: offset
333 482 # 2 bytes: flags
334 483 # 4 bytes: compressed length
335 484 # 4 bytes: uncompressed length
336 485 # 4 bytes: base rev
337 486 # 4 bytes: link rev
338 487 # 4 bytes: parent 1 rev
339 488 # 4 bytes: parent 2 rev
340 489 # 32 bytes: nodeid
341 490 indexformatng = struct.Struct(">Qiiiiii20s12x")
342 491 indexformatng_pack = indexformatng.pack
343 492 versionformat = struct.Struct(">I")
344 493 versionformat_pack = versionformat.pack
345 494 versionformat_unpack = versionformat.unpack
346 495
347 496 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
348 497 # signed integer)
349 498 _maxentrysize = 0x7fffffff
350 499
351 500 class revlogio(object):
352 501 def __init__(self):
353 502 self.size = indexformatng.size
354 503
355 504 def parseindex(self, data, inline):
356 505 # call the C implementation to parse the index data
357 506 index, cache = parsers.parse_index2(data, inline)
358 507 return index, getattr(index, 'nodemap', None), cache
359 508
360 509 def packentry(self, entry, node, version, rev):
361 510 p = indexformatng_pack(*entry)
362 511 if rev == 0:
363 512 p = versionformat_pack(version) + p[4:]
364 513 return p
365 514
366 515 class revlog(object):
367 516 """
368 517 the underlying revision storage object
369 518
370 519 A revlog consists of two parts, an index and the revision data.
371 520
372 521 The index is a file with a fixed record size containing
373 522 information on each revision, including its nodeid (hash), the
374 523 nodeids of its parents, the position and offset of its data within
375 524 the data file, and the revision it's based on. Finally, each entry
376 525 contains a linkrev entry that can serve as a pointer to external
377 526 data.
378 527
379 528 The revision data itself is a linear collection of data chunks.
380 529 Each chunk represents a revision and is usually represented as a
381 530 delta against the previous chunk. To bound lookup time, runs of
382 531 deltas are limited to about 2 times the length of the original
383 532 version data. This makes retrieval of a version proportional to
384 533 its size, or O(1) relative to the number of revisions.
385 534
386 535 Both pieces of the revlog are written to in an append-only
387 536 fashion, which means we never need to rewrite a file to insert or
388 537 remove data, and can use some simple techniques to avoid the need
389 538 for locking while reading.
390 539
391 540 If checkambig, indexfile is opened with checkambig=True at
392 541 writing, to avoid file stat ambiguity.
393 542
394 543 If mmaplargeindex is True, and an mmapindexthreshold is set, the
395 544 index will be mmapped rather than read if it is larger than the
396 545 configured threshold.
397 546 """
398 547 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
399 548 mmaplargeindex=False):
400 549 """
401 550 create a revlog object
402 551
403 552 opener is a function that abstracts the file opening operation
404 553 and can be used to implement COW semantics or the like.
405 554 """
406 555 self.indexfile = indexfile
407 556 self.datafile = datafile or (indexfile[:-2] + ".d")
408 557 self.opener = opener
409 558 # When True, indexfile is opened with checkambig=True at writing, to
410 559 # avoid file stat ambiguity.
411 560 self._checkambig = checkambig
412 561 # 3-tuple of (node, rev, text) for a raw revision.
413 562 self._cache = None
414 563 # Maps rev to chain base rev.
415 564 self._chainbasecache = util.lrucachedict(100)
416 565 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
417 566 self._chunkcache = (0, '')
418 567 # How much data to read and cache into the raw revlog data cache.
419 568 self._chunkcachesize = 65536
420 569 self._maxchainlen = None
421 570 self._aggressivemergedeltas = False
422 571 self.index = []
423 572 # Mapping of partial identifiers to full nodes.
424 573 self._pcache = {}
425 574 # Mapping of revision integer to full node.
426 575 self._nodecache = {nullid: nullrev}
427 576 self._nodepos = None
428 577 self._compengine = 'zlib'
429 578 self._maxdeltachainspan = -1
430 579 self._withsparseread = False
431 580 self._srdensitythreshold = 0.25
432 581 self._srmingapsize = 262144
433 582
434 583 mmapindexthreshold = None
435 584 v = REVLOG_DEFAULT_VERSION
436 585 opts = getattr(opener, 'options', None)
437 586 if opts is not None:
438 587 if 'revlogv2' in opts:
439 588 # version 2 revlogs always use generaldelta.
440 589 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
441 590 elif 'revlogv1' in opts:
442 591 if 'generaldelta' in opts:
443 592 v |= FLAG_GENERALDELTA
444 593 else:
445 594 v = 0
446 595 if 'chunkcachesize' in opts:
447 596 self._chunkcachesize = opts['chunkcachesize']
448 597 if 'maxchainlen' in opts:
449 598 self._maxchainlen = opts['maxchainlen']
450 599 if 'aggressivemergedeltas' in opts:
451 600 self._aggressivemergedeltas = opts['aggressivemergedeltas']
452 601 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
453 602 if 'compengine' in opts:
454 603 self._compengine = opts['compengine']
455 604 if 'maxdeltachainspan' in opts:
456 605 self._maxdeltachainspan = opts['maxdeltachainspan']
457 606 if mmaplargeindex and 'mmapindexthreshold' in opts:
458 607 mmapindexthreshold = opts['mmapindexthreshold']
459 608 self._withsparseread = bool(opts.get('with-sparse-read', False))
460 609 if 'sparse-read-density-threshold' in opts:
461 610 self._srdensitythreshold = opts['sparse-read-density-threshold']
462 611 if 'sparse-read-min-gap-size' in opts:
463 612 self._srmingapsize = opts['sparse-read-min-gap-size']
464 613
465 614 if self._chunkcachesize <= 0:
466 615 raise RevlogError(_('revlog chunk cache size %r is not greater '
467 616 'than 0') % self._chunkcachesize)
468 617 elif self._chunkcachesize & (self._chunkcachesize - 1):
469 618 raise RevlogError(_('revlog chunk cache size %r is not a power '
470 619 'of 2') % self._chunkcachesize)
471 620
472 621 indexdata = ''
473 622 self._initempty = True
474 623 try:
475 624 f = self.opener(self.indexfile)
476 625 if (mmapindexthreshold is not None and
477 626 self.opener.fstat(f).st_size >= mmapindexthreshold):
478 627 indexdata = util.buffer(util.mmapread(f))
479 628 else:
480 629 indexdata = f.read()
481 630 f.close()
482 631 if len(indexdata) > 0:
483 632 v = versionformat_unpack(indexdata[:4])[0]
484 633 self._initempty = False
485 634 except IOError as inst:
486 635 if inst.errno != errno.ENOENT:
487 636 raise
488 637
489 638 self.version = v
490 639 self._inline = v & FLAG_INLINE_DATA
491 640 self._generaldelta = v & FLAG_GENERALDELTA
492 641 flags = v & ~0xFFFF
493 642 fmt = v & 0xFFFF
494 643 if fmt == REVLOGV0:
495 644 if flags:
496 645 raise RevlogError(_('unknown flags (%#04x) in version %d '
497 646 'revlog %s') %
498 647 (flags >> 16, fmt, self.indexfile))
499 648 elif fmt == REVLOGV1:
500 649 if flags & ~REVLOGV1_FLAGS:
501 650 raise RevlogError(_('unknown flags (%#04x) in version %d '
502 651 'revlog %s') %
503 652 (flags >> 16, fmt, self.indexfile))
504 653 elif fmt == REVLOGV2:
505 654 if flags & ~REVLOGV2_FLAGS:
506 655 raise RevlogError(_('unknown flags (%#04x) in version %d '
507 656 'revlog %s') %
508 657 (flags >> 16, fmt, self.indexfile))
509 658 else:
510 659 raise RevlogError(_('unknown version (%d) in revlog %s') %
511 660 (fmt, self.indexfile))
512 661
513 662 self.storedeltachains = True
514 663
515 664 self._io = revlogio()
516 665 if self.version == REVLOGV0:
517 666 self._io = revlogoldio()
518 667 try:
519 668 d = self._io.parseindex(indexdata, self._inline)
520 669 except (ValueError, IndexError):
521 670 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
522 671 self.index, nodemap, self._chunkcache = d
523 672 if nodemap is not None:
524 673 self.nodemap = self._nodecache = nodemap
525 674 if not self._chunkcache:
526 675 self._chunkclear()
527 676 # revnum -> (chain-length, sum-delta-length)
528 677 self._chaininfocache = {}
529 678 # revlog header -> revlog compressor
530 679 self._decompressors = {}
531 680
532 681 @util.propertycache
533 682 def _compressor(self):
534 683 return util.compengines[self._compengine].revlogcompressor()
535 684
536 685 def tip(self):
537 686 return self.node(len(self.index) - 2)
538 687 def __contains__(self, rev):
539 688 return 0 <= rev < len(self)
540 689 def __len__(self):
541 690 return len(self.index) - 1
542 691 def __iter__(self):
543 692 return iter(xrange(len(self)))
544 693 def revs(self, start=0, stop=None):
545 694 """iterate over all rev in this revlog (from start to stop)"""
546 695 step = 1
547 696 if stop is not None:
548 697 if start > stop:
549 698 step = -1
550 699 stop += step
551 700 else:
552 701 stop = len(self)
553 702 return xrange(start, stop, step)
554 703
555 704 @util.propertycache
556 705 def nodemap(self):
557 706 self.rev(self.node(0))
558 707 return self._nodecache
559 708
560 709 def hasnode(self, node):
561 710 try:
562 711 self.rev(node)
563 712 return True
564 713 except KeyError:
565 714 return False
566 715
567 716 def clearcaches(self):
568 717 self._cache = None
569 718 self._chainbasecache.clear()
570 719 self._chunkcache = (0, '')
571 720 self._pcache = {}
572 721
573 722 try:
574 723 self._nodecache.clearcaches()
575 724 except AttributeError:
576 725 self._nodecache = {nullid: nullrev}
577 726 self._nodepos = None
578 727
579 728 def rev(self, node):
580 729 try:
581 730 return self._nodecache[node]
582 731 except TypeError:
583 732 raise
584 733 except RevlogError:
585 734 # parsers.c radix tree lookup failed
586 735 if node == wdirid:
587 736 raise error.WdirUnsupported
588 737 raise LookupError(node, self.indexfile, _('no node'))
589 738 except KeyError:
590 739 # pure python cache lookup failed
591 740 n = self._nodecache
592 741 i = self.index
593 742 p = self._nodepos
594 743 if p is None:
595 744 p = len(i) - 2
596 745 for r in xrange(p, -1, -1):
597 746 v = i[r][7]
598 747 n[v] = r
599 748 if v == node:
600 749 self._nodepos = r - 1
601 750 return r
602 751 if node == wdirid:
603 752 raise error.WdirUnsupported
604 753 raise LookupError(node, self.indexfile, _('no node'))
605 754
606 755 # Accessors for index entries.
607 756
608 757 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
609 758 # are flags.
610 759 def start(self, rev):
611 760 return int(self.index[rev][0] >> 16)
612 761
613 762 def flags(self, rev):
614 763 return self.index[rev][0] & 0xFFFF
615 764
616 765 def length(self, rev):
617 766 return self.index[rev][1]
618 767
619 768 def rawsize(self, rev):
620 769 """return the length of the uncompressed text for a given revision"""
621 770 l = self.index[rev][2]
622 771 if l >= 0:
623 772 return l
624 773
625 774 t = self.revision(rev, raw=True)
626 775 return len(t)
627 776
628 777 def size(self, rev):
629 778 """length of non-raw text (processed by a "read" flag processor)"""
630 779 # fast path: if no "read" flag processor could change the content,
631 780 # size is rawsize. note: ELLIPSIS is known to not change the content.
632 781 flags = self.flags(rev)
633 782 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
634 783 return self.rawsize(rev)
635 784
636 785 return len(self.revision(rev, raw=False))
637 786
638 787 def chainbase(self, rev):
639 788 base = self._chainbasecache.get(rev)
640 789 if base is not None:
641 790 return base
642 791
643 792 index = self.index
644 793 base = index[rev][3]
645 794 while base != rev:
646 795 rev = base
647 796 base = index[rev][3]
648 797
649 798 self._chainbasecache[rev] = base
650 799 return base
651 800
652 801 def linkrev(self, rev):
653 802 return self.index[rev][4]
654 803
655 804 def parentrevs(self, rev):
656 805 try:
657 806 entry = self.index[rev]
658 807 except IndexError:
659 808 if rev == wdirrev:
660 809 raise error.WdirUnsupported
661 810 raise
662 811
663 812 return entry[5], entry[6]
664 813
665 814 def node(self, rev):
666 815 try:
667 816 return self.index[rev][7]
668 817 except IndexError:
669 818 if rev == wdirrev:
670 819 raise error.WdirUnsupported
671 820 raise
672 821
673 822 # Derived from index values.
674 823
675 824 def end(self, rev):
676 825 return self.start(rev) + self.length(rev)
677 826
678 827 def parents(self, node):
679 828 i = self.index
680 829 d = i[self.rev(node)]
681 830 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
682 831
683 832 def chainlen(self, rev):
684 833 return self._chaininfo(rev)[0]
685 834
686 835 def _chaininfo(self, rev):
687 836 chaininfocache = self._chaininfocache
688 837 if rev in chaininfocache:
689 838 return chaininfocache[rev]
690 839 index = self.index
691 840 generaldelta = self._generaldelta
692 841 iterrev = rev
693 842 e = index[iterrev]
694 843 clen = 0
695 844 compresseddeltalen = 0
696 845 while iterrev != e[3]:
697 846 clen += 1
698 847 compresseddeltalen += e[1]
699 848 if generaldelta:
700 849 iterrev = e[3]
701 850 else:
702 851 iterrev -= 1
703 852 if iterrev in chaininfocache:
704 853 t = chaininfocache[iterrev]
705 854 clen += t[0]
706 855 compresseddeltalen += t[1]
707 856 break
708 857 e = index[iterrev]
709 858 else:
710 859 # Add text length of base since decompressing that also takes
711 860 # work. For cache hits the length is already included.
712 861 compresseddeltalen += e[1]
713 862 r = (clen, compresseddeltalen)
714 863 chaininfocache[rev] = r
715 864 return r
716 865
717 866 def _deltachain(self, rev, stoprev=None):
718 867 """Obtain the delta chain for a revision.
719 868
720 869 ``stoprev`` specifies a revision to stop at. If not specified, we
721 870 stop at the base of the chain.
722 871
723 872 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
724 873 revs in ascending order and ``stopped`` is a bool indicating whether
725 874 ``stoprev`` was hit.
726 875 """
727 876 # Try C implementation.
728 877 try:
729 878 return self.index.deltachain(rev, stoprev, self._generaldelta)
730 879 except AttributeError:
731 880 pass
732 881
733 882 chain = []
734 883
735 884 # Alias to prevent attribute lookup in tight loop.
736 885 index = self.index
737 886 generaldelta = self._generaldelta
738 887
739 888 iterrev = rev
740 889 e = index[iterrev]
741 890 while iterrev != e[3] and iterrev != stoprev:
742 891 chain.append(iterrev)
743 892 if generaldelta:
744 893 iterrev = e[3]
745 894 else:
746 895 iterrev -= 1
747 896 e = index[iterrev]
748 897
749 898 if iterrev == stoprev:
750 899 stopped = True
751 900 else:
752 901 chain.append(iterrev)
753 902 stopped = False
754 903
755 904 chain.reverse()
756 905 return chain, stopped
757 906
758 907 def ancestors(self, revs, stoprev=0, inclusive=False):
759 908 """Generate the ancestors of 'revs' in reverse topological order.
760 909 Does not generate revs lower than stoprev.
761 910
762 911 See the documentation for ancestor.lazyancestors for more details."""
763 912
764 913 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
765 914 inclusive=inclusive)
766 915
767 916 def descendants(self, revs):
768 917 """Generate the descendants of 'revs' in revision order.
769 918
770 919 Yield a sequence of revision numbers starting with a child of
771 920 some rev in revs, i.e., each revision is *not* considered a
772 921 descendant of itself. Results are ordered by revision number (a
773 922 topological sort)."""
774 923 first = min(revs)
775 924 if first == nullrev:
776 925 for i in self:
777 926 yield i
778 927 return
779 928
780 929 seen = set(revs)
781 930 for i in self.revs(start=first + 1):
782 931 for x in self.parentrevs(i):
783 932 if x != nullrev and x in seen:
784 933 seen.add(i)
785 934 yield i
786 935 break
787 936
788 937 def findcommonmissing(self, common=None, heads=None):
789 938 """Return a tuple of the ancestors of common and the ancestors of heads
790 939 that are not ancestors of common. In revset terminology, we return the
791 940 tuple:
792 941
793 942 ::common, (::heads) - (::common)
794 943
795 944 The list is sorted by revision number, meaning it is
796 945 topologically sorted.
797 946
798 947 'heads' and 'common' are both lists of node IDs. If heads is
799 948 not supplied, uses all of the revlog's heads. If common is not
800 949 supplied, uses nullid."""
801 950 if common is None:
802 951 common = [nullid]
803 952 if heads is None:
804 953 heads = self.heads()
805 954
806 955 common = [self.rev(n) for n in common]
807 956 heads = [self.rev(n) for n in heads]
808 957
809 958 # we want the ancestors, but inclusive
810 959 class lazyset(object):
811 960 def __init__(self, lazyvalues):
812 961 self.addedvalues = set()
813 962 self.lazyvalues = lazyvalues
814 963
815 964 def __contains__(self, value):
816 965 return value in self.addedvalues or value in self.lazyvalues
817 966
818 967 def __iter__(self):
819 968 added = self.addedvalues
820 969 for r in added:
821 970 yield r
822 971 for r in self.lazyvalues:
823 972 if not r in added:
824 973 yield r
825 974
826 975 def add(self, value):
827 976 self.addedvalues.add(value)
828 977
829 978 def update(self, values):
830 979 self.addedvalues.update(values)
831 980
832 981 has = lazyset(self.ancestors(common))
833 982 has.add(nullrev)
834 983 has.update(common)
835 984
836 985 # take all ancestors from heads that aren't in has
837 986 missing = set()
838 987 visit = collections.deque(r for r in heads if r not in has)
839 988 while visit:
840 989 r = visit.popleft()
841 990 if r in missing:
842 991 continue
843 992 else:
844 993 missing.add(r)
845 994 for p in self.parentrevs(r):
846 995 if p not in has:
847 996 visit.append(p)
848 997 missing = list(missing)
849 998 missing.sort()
850 999 return has, [self.node(miss) for miss in missing]
851 1000
852 1001 def incrementalmissingrevs(self, common=None):
853 1002 """Return an object that can be used to incrementally compute the
854 1003 revision numbers of the ancestors of arbitrary sets that are not
855 1004 ancestors of common. This is an ancestor.incrementalmissingancestors
856 1005 object.
857 1006
858 1007 'common' is a list of revision numbers. If common is not supplied, uses
859 1008 nullrev.
860 1009 """
861 1010 if common is None:
862 1011 common = [nullrev]
863 1012
864 1013 return ancestor.incrementalmissingancestors(self.parentrevs, common)
865 1014
866 1015 def findmissingrevs(self, common=None, heads=None):
867 1016 """Return the revision numbers of the ancestors of heads that
868 1017 are not ancestors of common.
869 1018
870 1019 More specifically, return a list of revision numbers corresponding to
871 1020 nodes N such that every N satisfies the following constraints:
872 1021
873 1022 1. N is an ancestor of some node in 'heads'
874 1023 2. N is not an ancestor of any node in 'common'
875 1024
876 1025 The list is sorted by revision number, meaning it is
877 1026 topologically sorted.
878 1027
879 1028 'heads' and 'common' are both lists of revision numbers. If heads is
880 1029 not supplied, uses all of the revlog's heads. If common is not
881 1030 supplied, uses nullid."""
882 1031 if common is None:
883 1032 common = [nullrev]
884 1033 if heads is None:
885 1034 heads = self.headrevs()
886 1035
887 1036 inc = self.incrementalmissingrevs(common=common)
888 1037 return inc.missingancestors(heads)
889 1038
890 1039 def findmissing(self, common=None, heads=None):
891 1040 """Return the ancestors of heads that are not ancestors of common.
892 1041
893 1042 More specifically, return a list of nodes N such that every N
894 1043 satisfies the following constraints:
895 1044
896 1045 1. N is an ancestor of some node in 'heads'
897 1046 2. N is not an ancestor of any node in 'common'
898 1047
899 1048 The list is sorted by revision number, meaning it is
900 1049 topologically sorted.
901 1050
902 1051 'heads' and 'common' are both lists of node IDs. If heads is
903 1052 not supplied, uses all of the revlog's heads. If common is not
904 1053 supplied, uses nullid."""
905 1054 if common is None:
906 1055 common = [nullid]
907 1056 if heads is None:
908 1057 heads = self.heads()
909 1058
910 1059 common = [self.rev(n) for n in common]
911 1060 heads = [self.rev(n) for n in heads]
912 1061
913 1062 inc = self.incrementalmissingrevs(common=common)
914 1063 return [self.node(r) for r in inc.missingancestors(heads)]
915 1064
916 1065 def nodesbetween(self, roots=None, heads=None):
917 1066 """Return a topological path from 'roots' to 'heads'.
918 1067
919 1068 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
920 1069 topologically sorted list of all nodes N that satisfy both of
921 1070 these constraints:
922 1071
923 1072 1. N is a descendant of some node in 'roots'
924 1073 2. N is an ancestor of some node in 'heads'
925 1074
926 1075 Every node is considered to be both a descendant and an ancestor
927 1076 of itself, so every reachable node in 'roots' and 'heads' will be
928 1077 included in 'nodes'.
929 1078
930 1079 'outroots' is the list of reachable nodes in 'roots', i.e., the
931 1080 subset of 'roots' that is returned in 'nodes'. Likewise,
932 1081 'outheads' is the subset of 'heads' that is also in 'nodes'.
933 1082
934 1083 'roots' and 'heads' are both lists of node IDs. If 'roots' is
935 1084 unspecified, uses nullid as the only root. If 'heads' is
936 1085 unspecified, uses list of all of the revlog's heads."""
937 1086 nonodes = ([], [], [])
938 1087 if roots is not None:
939 1088 roots = list(roots)
940 1089 if not roots:
941 1090 return nonodes
942 1091 lowestrev = min([self.rev(n) for n in roots])
943 1092 else:
944 1093 roots = [nullid] # Everybody's a descendant of nullid
945 1094 lowestrev = nullrev
946 1095 if (lowestrev == nullrev) and (heads is None):
947 1096 # We want _all_ the nodes!
948 1097 return ([self.node(r) for r in self], [nullid], list(self.heads()))
949 1098 if heads is None:
950 1099 # All nodes are ancestors, so the latest ancestor is the last
951 1100 # node.
952 1101 highestrev = len(self) - 1
953 1102 # Set ancestors to None to signal that every node is an ancestor.
954 1103 ancestors = None
955 1104 # Set heads to an empty dictionary for later discovery of heads
956 1105 heads = {}
957 1106 else:
958 1107 heads = list(heads)
959 1108 if not heads:
960 1109 return nonodes
961 1110 ancestors = set()
962 1111 # Turn heads into a dictionary so we can remove 'fake' heads.
963 1112 # Also, later we will be using it to filter out the heads we can't
964 1113 # find from roots.
965 1114 heads = dict.fromkeys(heads, False)
966 1115 # Start at the top and keep marking parents until we're done.
967 1116 nodestotag = set(heads)
968 1117 # Remember where the top was so we can use it as a limit later.
969 1118 highestrev = max([self.rev(n) for n in nodestotag])
970 1119 while nodestotag:
971 1120 # grab a node to tag
972 1121 n = nodestotag.pop()
973 1122 # Never tag nullid
974 1123 if n == nullid:
975 1124 continue
976 1125 # A node's revision number represents its place in a
977 1126 # topologically sorted list of nodes.
978 1127 r = self.rev(n)
979 1128 if r >= lowestrev:
980 1129 if n not in ancestors:
981 1130 # If we are possibly a descendant of one of the roots
982 1131 # and we haven't already been marked as an ancestor
983 1132 ancestors.add(n) # Mark as ancestor
984 1133 # Add non-nullid parents to list of nodes to tag.
985 1134 nodestotag.update([p for p in self.parents(n) if
986 1135 p != nullid])
987 1136 elif n in heads: # We've seen it before, is it a fake head?
988 1137 # So it is, real heads should not be the ancestors of
989 1138 # any other heads.
990 1139 heads.pop(n)
991 1140 if not ancestors:
992 1141 return nonodes
993 1142 # Now that we have our set of ancestors, we want to remove any
994 1143 # roots that are not ancestors.
995 1144
996 1145 # If one of the roots was nullid, everything is included anyway.
997 1146 if lowestrev > nullrev:
998 1147 # But, since we weren't, let's recompute the lowest rev to not
999 1148 # include roots that aren't ancestors.
1000 1149
1001 1150 # Filter out roots that aren't ancestors of heads
1002 1151 roots = [root for root in roots if root in ancestors]
1003 1152 # Recompute the lowest revision
1004 1153 if roots:
1005 1154 lowestrev = min([self.rev(root) for root in roots])
1006 1155 else:
1007 1156 # No more roots? Return empty list
1008 1157 return nonodes
1009 1158 else:
1010 1159 # We are descending from nullid, and don't need to care about
1011 1160 # any other roots.
1012 1161 lowestrev = nullrev
1013 1162 roots = [nullid]
1014 1163 # Transform our roots list into a set.
1015 1164 descendants = set(roots)
1016 1165 # Also, keep the original roots so we can filter out roots that aren't
1017 1166 # 'real' roots (i.e. are descended from other roots).
1018 1167 roots = descendants.copy()
1019 1168 # Our topologically sorted list of output nodes.
1020 1169 orderedout = []
1021 1170 # Don't start at nullid since we don't want nullid in our output list,
1022 1171 # and if nullid shows up in descendants, empty parents will look like
1023 1172 # they're descendants.
1024 1173 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1025 1174 n = self.node(r)
1026 1175 isdescendant = False
1027 1176 if lowestrev == nullrev: # Everybody is a descendant of nullid
1028 1177 isdescendant = True
1029 1178 elif n in descendants:
1030 1179 # n is already a descendant
1031 1180 isdescendant = True
1032 1181 # This check only needs to be done here because all the roots
1033 1182 # will start being marked is descendants before the loop.
1034 1183 if n in roots:
1035 1184 # If n was a root, check if it's a 'real' root.
1036 1185 p = tuple(self.parents(n))
1037 1186 # If any of its parents are descendants, it's not a root.
1038 1187 if (p[0] in descendants) or (p[1] in descendants):
1039 1188 roots.remove(n)
1040 1189 else:
1041 1190 p = tuple(self.parents(n))
1042 1191 # A node is a descendant if either of its parents are
1043 1192 # descendants. (We seeded the dependents list with the roots
1044 1193 # up there, remember?)
1045 1194 if (p[0] in descendants) or (p[1] in descendants):
1046 1195 descendants.add(n)
1047 1196 isdescendant = True
1048 1197 if isdescendant and ((ancestors is None) or (n in ancestors)):
1049 1198 # Only include nodes that are both descendants and ancestors.
1050 1199 orderedout.append(n)
1051 1200 if (ancestors is not None) and (n in heads):
1052 1201 # We're trying to figure out which heads are reachable
1053 1202 # from roots.
1054 1203 # Mark this head as having been reached
1055 1204 heads[n] = True
1056 1205 elif ancestors is None:
1057 1206 # Otherwise, we're trying to discover the heads.
1058 1207 # Assume this is a head because if it isn't, the next step
1059 1208 # will eventually remove it.
1060 1209 heads[n] = True
1061 1210 # But, obviously its parents aren't.
1062 1211 for p in self.parents(n):
1063 1212 heads.pop(p, None)
1064 1213 heads = [head for head, flag in heads.iteritems() if flag]
1065 1214 roots = list(roots)
1066 1215 assert orderedout
1067 1216 assert roots
1068 1217 assert heads
1069 1218 return (orderedout, roots, heads)
1070 1219
1071 1220 def headrevs(self):
1072 1221 try:
1073 1222 return self.index.headrevs()
1074 1223 except AttributeError:
1075 1224 return self._headrevs()
1076 1225
1077 1226 def computephases(self, roots):
1078 1227 return self.index.computephasesmapsets(roots)
1079 1228
1080 1229 def _headrevs(self):
1081 1230 count = len(self)
1082 1231 if not count:
1083 1232 return [nullrev]
1084 1233 # we won't iter over filtered rev so nobody is a head at start
1085 1234 ishead = [0] * (count + 1)
1086 1235 index = self.index
1087 1236 for r in self:
1088 1237 ishead[r] = 1 # I may be an head
1089 1238 e = index[r]
1090 1239 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1091 1240 return [r for r, val in enumerate(ishead) if val]
1092 1241
1093 1242 def heads(self, start=None, stop=None):
1094 1243 """return the list of all nodes that have no children
1095 1244
1096 1245 if start is specified, only heads that are descendants of
1097 1246 start will be returned
1098 1247 if stop is specified, it will consider all the revs from stop
1099 1248 as if they had no children
1100 1249 """
1101 1250 if start is None and stop is None:
1102 1251 if not len(self):
1103 1252 return [nullid]
1104 1253 return [self.node(r) for r in self.headrevs()]
1105 1254
1106 1255 if start is None:
1107 1256 start = nullid
1108 1257 if stop is None:
1109 1258 stop = []
1110 1259 stoprevs = set([self.rev(n) for n in stop])
1111 1260 startrev = self.rev(start)
1112 1261 reachable = {startrev}
1113 1262 heads = {startrev}
1114 1263
1115 1264 parentrevs = self.parentrevs
1116 1265 for r in self.revs(start=startrev + 1):
1117 1266 for p in parentrevs(r):
1118 1267 if p in reachable:
1119 1268 if r not in stoprevs:
1120 1269 reachable.add(r)
1121 1270 heads.add(r)
1122 1271 if p in heads and p not in stoprevs:
1123 1272 heads.remove(p)
1124 1273
1125 1274 return [self.node(r) for r in heads]
1126 1275
1127 1276 def children(self, node):
1128 1277 """find the children of a given node"""
1129 1278 c = []
1130 1279 p = self.rev(node)
1131 1280 for r in self.revs(start=p + 1):
1132 1281 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1133 1282 if prevs:
1134 1283 for pr in prevs:
1135 1284 if pr == p:
1136 1285 c.append(self.node(r))
1137 1286 elif p == nullrev:
1138 1287 c.append(self.node(r))
1139 1288 return c
1140 1289
1141 1290 def descendant(self, start, end):
1142 1291 if start == nullrev:
1143 1292 return True
1144 1293 for i in self.descendants([start]):
1145 1294 if i == end:
1146 1295 return True
1147 1296 elif i > end:
1148 1297 break
1149 1298 return False
1150 1299
1151 1300 def commonancestorsheads(self, a, b):
1152 1301 """calculate all the heads of the common ancestors of nodes a and b"""
1153 1302 a, b = self.rev(a), self.rev(b)
1154 1303 try:
1155 1304 ancs = self.index.commonancestorsheads(a, b)
1156 1305 except (AttributeError, OverflowError): # C implementation failed
1157 1306 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1158 1307 return pycompat.maplist(self.node, ancs)
1159 1308
1160 1309 def isancestor(self, a, b):
1161 1310 """return True if node a is an ancestor of node b
1162 1311
1163 1312 The implementation of this is trivial but the use of
1164 1313 commonancestorsheads is not."""
1165 1314 return a in self.commonancestorsheads(a, b)
1166 1315
1167 1316 def ancestor(self, a, b):
1168 1317 """calculate the "best" common ancestor of nodes a and b"""
1169 1318
1170 1319 a, b = self.rev(a), self.rev(b)
1171 1320 try:
1172 1321 ancs = self.index.ancestors(a, b)
1173 1322 except (AttributeError, OverflowError):
1174 1323 ancs = ancestor.ancestors(self.parentrevs, a, b)
1175 1324 if ancs:
1176 1325 # choose a consistent winner when there's a tie
1177 1326 return min(map(self.node, ancs))
1178 1327 return nullid
1179 1328
1180 1329 def _match(self, id):
1181 1330 if isinstance(id, int):
1182 1331 # rev
1183 1332 return self.node(id)
1184 1333 if len(id) == 20:
1185 1334 # possibly a binary node
1186 1335 # odds of a binary node being all hex in ASCII are 1 in 10**25
1187 1336 try:
1188 1337 node = id
1189 1338 self.rev(node) # quick search the index
1190 1339 return node
1191 1340 except LookupError:
1192 1341 pass # may be partial hex id
1193 1342 try:
1194 1343 # str(rev)
1195 1344 rev = int(id)
1196 1345 if str(rev) != id:
1197 1346 raise ValueError
1198 1347 if rev < 0:
1199 1348 rev = len(self) + rev
1200 1349 if rev < 0 or rev >= len(self):
1201 1350 raise ValueError
1202 1351 return self.node(rev)
1203 1352 except (ValueError, OverflowError):
1204 1353 pass
1205 1354 if len(id) == 40:
1206 1355 try:
1207 1356 # a full hex nodeid?
1208 1357 node = bin(id)
1209 1358 self.rev(node)
1210 1359 return node
1211 1360 except (TypeError, LookupError):
1212 1361 pass
1213 1362
1214 1363 def _partialmatch(self, id):
1215 1364 maybewdir = wdirhex.startswith(id)
1216 1365 try:
1217 1366 partial = self.index.partialmatch(id)
1218 1367 if partial and self.hasnode(partial):
1219 1368 if maybewdir:
1220 1369 # single 'ff...' match in radix tree, ambiguous with wdir
1221 1370 raise RevlogError
1222 1371 return partial
1223 1372 if maybewdir:
1224 1373 # no 'ff...' match in radix tree, wdir identified
1225 1374 raise error.WdirUnsupported
1226 1375 return None
1227 1376 except RevlogError:
1228 1377 # parsers.c radix tree lookup gave multiple matches
1229 1378 # fast path: for unfiltered changelog, radix tree is accurate
1230 1379 if not getattr(self, 'filteredrevs', None):
1231 1380 raise LookupError(id, self.indexfile,
1232 1381 _('ambiguous identifier'))
1233 1382 # fall through to slow path that filters hidden revisions
1234 1383 except (AttributeError, ValueError):
1235 1384 # we are pure python, or key was too short to search radix tree
1236 1385 pass
1237 1386
1238 1387 if id in self._pcache:
1239 1388 return self._pcache[id]
1240 1389
1241 1390 if len(id) < 40:
1242 1391 try:
1243 1392 # hex(node)[:...]
1244 1393 l = len(id) // 2 # grab an even number of digits
1245 1394 prefix = bin(id[:l * 2])
1246 1395 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1247 1396 nl = [n for n in nl if hex(n).startswith(id) and
1248 1397 self.hasnode(n)]
1249 1398 if len(nl) > 0:
1250 1399 if len(nl) == 1 and not maybewdir:
1251 1400 self._pcache[id] = nl[0]
1252 1401 return nl[0]
1253 1402 raise LookupError(id, self.indexfile,
1254 1403 _('ambiguous identifier'))
1255 1404 if maybewdir:
1256 1405 raise error.WdirUnsupported
1257 1406 return None
1258 1407 except (TypeError, binascii.Error):
1259 1408 pass
1260 1409
1261 1410 def lookup(self, id):
1262 1411 """locate a node based on:
1263 1412 - revision number or str(revision number)
1264 1413 - nodeid or subset of hex nodeid
1265 1414 """
1266 1415 n = self._match(id)
1267 1416 if n is not None:
1268 1417 return n
1269 1418 n = self._partialmatch(id)
1270 1419 if n:
1271 1420 return n
1272 1421
1273 1422 raise LookupError(id, self.indexfile, _('no match found'))
1274 1423
1275 1424 def shortest(self, hexnode, minlength=1):
1276 1425 """Find the shortest unambiguous prefix that matches hexnode."""
1277 1426 def isvalid(test):
1278 1427 try:
1279 1428 if self._partialmatch(test) is None:
1280 1429 return False
1281 1430
1282 1431 try:
1283 1432 i = int(test)
1284 1433 # if we are a pure int, then starting with zero will not be
1285 1434 # confused as a rev; or, obviously, if the int is larger
1286 1435 # than the value of the tip rev
1287 1436 if test[0] == '0' or i > len(self):
1288 1437 return True
1289 1438 return False
1290 1439 except ValueError:
1291 1440 return True
1292 1441 except error.RevlogError:
1293 1442 return False
1294 1443 except error.WdirUnsupported:
1295 1444 # single 'ff...' match
1296 1445 return True
1297 1446
1298 1447 shortest = hexnode
1299 1448 startlength = max(6, minlength)
1300 1449 length = startlength
1301 1450 while True:
1302 1451 test = hexnode[:length]
1303 1452 if isvalid(test):
1304 1453 shortest = test
1305 1454 if length == minlength or length > startlength:
1306 1455 return shortest
1307 1456 length -= 1
1308 1457 else:
1309 1458 length += 1
1310 1459 if len(shortest) <= length:
1311 1460 return shortest
1312 1461
1313 1462 def cmp(self, node, text):
1314 1463 """compare text with a given file revision
1315 1464
1316 1465 returns True if text is different than what is stored.
1317 1466 """
1318 1467 p1, p2 = self.parents(node)
1319 1468 return hash(text, p1, p2) != node
1320 1469
1321 1470 def _cachesegment(self, offset, data):
1322 1471 """Add a segment to the revlog cache.
1323 1472
1324 1473 Accepts an absolute offset and the data that is at that location.
1325 1474 """
1326 1475 o, d = self._chunkcache
1327 1476 # try to add to existing cache
1328 1477 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1329 1478 self._chunkcache = o, d + data
1330 1479 else:
1331 1480 self._chunkcache = offset, data
1332 1481
1333 1482 def _readsegment(self, offset, length, df=None):
1334 1483 """Load a segment of raw data from the revlog.
1335 1484
1336 1485 Accepts an absolute offset, length to read, and an optional existing
1337 1486 file handle to read from.
1338 1487
1339 1488 If an existing file handle is passed, it will be seeked and the
1340 1489 original seek position will NOT be restored.
1341 1490
1342 1491 Returns a str or buffer of raw byte data.
1343 1492 """
1344 1493 if df is not None:
1345 1494 closehandle = False
1346 1495 else:
1347 1496 if self._inline:
1348 1497 df = self.opener(self.indexfile)
1349 1498 else:
1350 1499 df = self.opener(self.datafile)
1351 1500 closehandle = True
1352 1501
1353 1502 # Cache data both forward and backward around the requested
1354 1503 # data, in a fixed size window. This helps speed up operations
1355 1504 # involving reading the revlog backwards.
1356 1505 cachesize = self._chunkcachesize
1357 1506 realoffset = offset & ~(cachesize - 1)
1358 1507 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1359 1508 - realoffset)
1360 1509 df.seek(realoffset)
1361 1510 d = df.read(reallength)
1362 1511 if closehandle:
1363 1512 df.close()
1364 1513 self._cachesegment(realoffset, d)
1365 1514 if offset != realoffset or reallength != length:
1366 1515 return util.buffer(d, offset - realoffset, length)
1367 1516 return d
1368 1517
1369 1518 def _getsegment(self, offset, length, df=None):
1370 1519 """Obtain a segment of raw data from the revlog.
1371 1520
1372 1521 Accepts an absolute offset, length of bytes to obtain, and an
1373 1522 optional file handle to the already-opened revlog. If the file
1374 1523 handle is used, it's original seek position will not be preserved.
1375 1524
1376 1525 Requests for data may be returned from a cache.
1377 1526
1378 1527 Returns a str or a buffer instance of raw byte data.
1379 1528 """
1380 1529 o, d = self._chunkcache
1381 1530 l = len(d)
1382 1531
1383 1532 # is it in the cache?
1384 1533 cachestart = offset - o
1385 1534 cacheend = cachestart + length
1386 1535 if cachestart >= 0 and cacheend <= l:
1387 1536 if cachestart == 0 and cacheend == l:
1388 1537 return d # avoid a copy
1389 1538 return util.buffer(d, cachestart, cacheend - cachestart)
1390 1539
1391 1540 return self._readsegment(offset, length, df=df)
1392 1541
1393 1542 def _getsegmentforrevs(self, startrev, endrev, df=None):
1394 1543 """Obtain a segment of raw data corresponding to a range of revisions.
1395 1544
1396 1545 Accepts the start and end revisions and an optional already-open
1397 1546 file handle to be used for reading. If the file handle is read, its
1398 1547 seek position will not be preserved.
1399 1548
1400 1549 Requests for data may be satisfied by a cache.
1401 1550
1402 1551 Returns a 2-tuple of (offset, data) for the requested range of
1403 1552 revisions. Offset is the integer offset from the beginning of the
1404 1553 revlog and data is a str or buffer of the raw byte data.
1405 1554
1406 1555 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1407 1556 to determine where each revision's data begins and ends.
1408 1557 """
1409 1558 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1410 1559 # (functions are expensive).
1411 1560 index = self.index
1412 1561 istart = index[startrev]
1413 1562 start = int(istart[0] >> 16)
1414 1563 if startrev == endrev:
1415 1564 end = start + istart[1]
1416 1565 else:
1417 1566 iend = index[endrev]
1418 1567 end = int(iend[0] >> 16) + iend[1]
1419 1568
1420 1569 if self._inline:
1421 1570 start += (startrev + 1) * self._io.size
1422 1571 end += (endrev + 1) * self._io.size
1423 1572 length = end - start
1424 1573
1425 1574 return start, self._getsegment(start, length, df=df)
1426 1575
1427 1576 def _chunk(self, rev, df=None):
1428 1577 """Obtain a single decompressed chunk for a revision.
1429 1578
1430 1579 Accepts an integer revision and an optional already-open file handle
1431 1580 to be used for reading. If used, the seek position of the file will not
1432 1581 be preserved.
1433 1582
1434 1583 Returns a str holding uncompressed data for the requested revision.
1435 1584 """
1436 1585 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1437 1586
1438 1587 def _chunks(self, revs, df=None):
1439 1588 """Obtain decompressed chunks for the specified revisions.
1440 1589
1441 1590 Accepts an iterable of numeric revisions that are assumed to be in
1442 1591 ascending order. Also accepts an optional already-open file handle
1443 1592 to be used for reading. If used, the seek position of the file will
1444 1593 not be preserved.
1445 1594
1446 1595 This function is similar to calling ``self._chunk()`` multiple times,
1447 1596 but is faster.
1448 1597
1449 1598 Returns a list with decompressed data for each requested revision.
1450 1599 """
1451 1600 if not revs:
1452 1601 return []
1453 1602 start = self.start
1454 1603 length = self.length
1455 1604 inline = self._inline
1456 1605 iosize = self._io.size
1457 1606 buffer = util.buffer
1458 1607
1459 1608 l = []
1460 1609 ladd = l.append
1461 1610
1462 1611 if not self._withsparseread:
1463 1612 slicedchunks = (revs,)
1464 1613 else:
1465 1614 slicedchunks = _slicechunk(self, revs)
1466 1615
1467 1616 for revschunk in slicedchunks:
1468 1617 firstrev = revschunk[0]
1469 1618 # Skip trailing revisions with empty diff
1470 1619 for lastrev in revschunk[::-1]:
1471 1620 if length(lastrev) != 0:
1472 1621 break
1473 1622
1474 1623 try:
1475 1624 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1476 1625 except OverflowError:
1477 1626 # issue4215 - we can't cache a run of chunks greater than
1478 1627 # 2G on Windows
1479 1628 return [self._chunk(rev, df=df) for rev in revschunk]
1480 1629
1481 1630 decomp = self.decompress
1482 1631 for rev in revschunk:
1483 1632 chunkstart = start(rev)
1484 1633 if inline:
1485 1634 chunkstart += (rev + 1) * iosize
1486 1635 chunklength = length(rev)
1487 1636 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1488 1637
1489 1638 return l
1490 1639
1491 1640 def _chunkclear(self):
1492 1641 """Clear the raw chunk cache."""
1493 1642 self._chunkcache = (0, '')
1494 1643
1495 1644 def deltaparent(self, rev):
1496 1645 """return deltaparent of the given revision"""
1497 1646 base = self.index[rev][3]
1498 1647 if base == rev:
1499 1648 return nullrev
1500 1649 elif self._generaldelta:
1501 1650 return base
1502 1651 else:
1503 1652 return rev - 1
1504 1653
1505 1654 def revdiff(self, rev1, rev2):
1506 1655 """return or calculate a delta between two revisions
1507 1656
1508 1657 The delta calculated is in binary form and is intended to be written to
1509 1658 revlog data directly. So this function needs raw revision data.
1510 1659 """
1511 1660 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1512 1661 return bytes(self._chunk(rev2))
1513 1662
1514 1663 return mdiff.textdiff(self.revision(rev1, raw=True),
1515 1664 self.revision(rev2, raw=True))
1516 1665
1517 1666 def revision(self, nodeorrev, _df=None, raw=False):
1518 1667 """return an uncompressed revision of a given node or revision
1519 1668 number.
1520 1669
1521 1670 _df - an existing file handle to read from. (internal-only)
1522 1671 raw - an optional argument specifying if the revision data is to be
1523 1672 treated as raw data when applying flag transforms. 'raw' should be set
1524 1673 to True when generating changegroups or in debug commands.
1525 1674 """
1526 1675 if isinstance(nodeorrev, int):
1527 1676 rev = nodeorrev
1528 1677 node = self.node(rev)
1529 1678 else:
1530 1679 node = nodeorrev
1531 1680 rev = None
1532 1681
1533 1682 cachedrev = None
1534 1683 flags = None
1535 1684 rawtext = None
1536 1685 if node == nullid:
1537 1686 return ""
1538 1687 if self._cache:
1539 1688 if self._cache[0] == node:
1540 1689 # _cache only stores rawtext
1541 1690 if raw:
1542 1691 return self._cache[2]
1543 1692 # duplicated, but good for perf
1544 1693 if rev is None:
1545 1694 rev = self.rev(node)
1546 1695 if flags is None:
1547 1696 flags = self.flags(rev)
1548 1697 # no extra flags set, no flag processor runs, text = rawtext
1549 1698 if flags == REVIDX_DEFAULT_FLAGS:
1550 1699 return self._cache[2]
1551 1700 # rawtext is reusable. need to run flag processor
1552 1701 rawtext = self._cache[2]
1553 1702
1554 1703 cachedrev = self._cache[1]
1555 1704
1556 1705 # look up what we need to read
1557 1706 if rawtext is None:
1558 1707 if rev is None:
1559 1708 rev = self.rev(node)
1560 1709
1561 1710 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1562 1711 if stopped:
1563 1712 rawtext = self._cache[2]
1564 1713
1565 1714 # drop cache to save memory
1566 1715 self._cache = None
1567 1716
1568 1717 bins = self._chunks(chain, df=_df)
1569 1718 if rawtext is None:
1570 1719 rawtext = bytes(bins[0])
1571 1720 bins = bins[1:]
1572 1721
1573 1722 rawtext = mdiff.patches(rawtext, bins)
1574 1723 self._cache = (node, rev, rawtext)
1575 1724
1576 1725 if flags is None:
1577 1726 if rev is None:
1578 1727 rev = self.rev(node)
1579 1728 flags = self.flags(rev)
1580 1729
1581 1730 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1582 1731 if validatehash:
1583 1732 self.checkhash(text, node, rev=rev)
1584 1733
1585 1734 return text
1586 1735
1587 1736 def hash(self, text, p1, p2):
1588 1737 """Compute a node hash.
1589 1738
1590 1739 Available as a function so that subclasses can replace the hash
1591 1740 as needed.
1592 1741 """
1593 1742 return hash(text, p1, p2)
1594 1743
1595 1744 def _processflags(self, text, flags, operation, raw=False):
1596 1745 """Inspect revision data flags and applies transforms defined by
1597 1746 registered flag processors.
1598 1747
1599 1748 ``text`` - the revision data to process
1600 1749 ``flags`` - the revision flags
1601 1750 ``operation`` - the operation being performed (read or write)
1602 1751 ``raw`` - an optional argument describing if the raw transform should be
1603 1752 applied.
1604 1753
1605 1754 This method processes the flags in the order (or reverse order if
1606 1755 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1607 1756 flag processors registered for present flags. The order of flags defined
1608 1757 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1609 1758
1610 1759 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1611 1760 processed text and ``validatehash`` is a bool indicating whether the
1612 1761 returned text should be checked for hash integrity.
1613 1762
1614 1763 Note: If the ``raw`` argument is set, it has precedence over the
1615 1764 operation and will only update the value of ``validatehash``.
1616 1765 """
1617 1766 # fast path: no flag processors will run
1618 1767 if flags == 0:
1619 1768 return text, True
1620 1769 if not operation in ('read', 'write'):
1621 1770 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1622 1771 # Check all flags are known.
1623 1772 if flags & ~REVIDX_KNOWN_FLAGS:
1624 1773 raise RevlogError(_("incompatible revision flag '%#x'") %
1625 1774 (flags & ~REVIDX_KNOWN_FLAGS))
1626 1775 validatehash = True
1627 1776 # Depending on the operation (read or write), the order might be
1628 1777 # reversed due to non-commutative transforms.
1629 1778 orderedflags = REVIDX_FLAGS_ORDER
1630 1779 if operation == 'write':
1631 1780 orderedflags = reversed(orderedflags)
1632 1781
1633 1782 for flag in orderedflags:
1634 1783 # If a flagprocessor has been registered for a known flag, apply the
1635 1784 # related operation transform and update result tuple.
1636 1785 if flag & flags:
1637 1786 vhash = True
1638 1787
1639 1788 if flag not in _flagprocessors:
1640 1789 message = _("missing processor for flag '%#x'") % (flag)
1641 1790 raise RevlogError(message)
1642 1791
1643 1792 processor = _flagprocessors[flag]
1644 1793 if processor is not None:
1645 1794 readtransform, writetransform, rawtransform = processor
1646 1795
1647 1796 if raw:
1648 1797 vhash = rawtransform(self, text)
1649 1798 elif operation == 'read':
1650 1799 text, vhash = readtransform(self, text)
1651 1800 else: # write operation
1652 1801 text, vhash = writetransform(self, text)
1653 1802 validatehash = validatehash and vhash
1654 1803
1655 1804 return text, validatehash
1656 1805
1657 1806 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1658 1807 """Check node hash integrity.
1659 1808
1660 1809 Available as a function so that subclasses can extend hash mismatch
1661 1810 behaviors as needed.
1662 1811 """
1663 1812 if p1 is None and p2 is None:
1664 1813 p1, p2 = self.parents(node)
1665 1814 if node != self.hash(text, p1, p2):
1666 1815 revornode = rev
1667 1816 if revornode is None:
1668 1817 revornode = templatefilters.short(hex(node))
1669 1818 raise RevlogError(_("integrity check failed on %s:%s")
1670 1819 % (self.indexfile, pycompat.bytestr(revornode)))
1671 1820
1672 1821 def checkinlinesize(self, tr, fp=None):
1673 1822 """Check if the revlog is too big for inline and convert if so.
1674 1823
1675 1824 This should be called after revisions are added to the revlog. If the
1676 1825 revlog has grown too large to be an inline revlog, it will convert it
1677 1826 to use multiple index and data files.
1678 1827 """
1679 1828 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1680 1829 return
1681 1830
1682 1831 trinfo = tr.find(self.indexfile)
1683 1832 if trinfo is None:
1684 1833 raise RevlogError(_("%s not found in the transaction")
1685 1834 % self.indexfile)
1686 1835
1687 1836 trindex = trinfo[2]
1688 1837 if trindex is not None:
1689 1838 dataoff = self.start(trindex)
1690 1839 else:
1691 1840 # revlog was stripped at start of transaction, use all leftover data
1692 1841 trindex = len(self) - 1
1693 1842 dataoff = self.end(-2)
1694 1843
1695 1844 tr.add(self.datafile, dataoff)
1696 1845
1697 1846 if fp:
1698 1847 fp.flush()
1699 1848 fp.close()
1700 1849
1701 1850 df = self.opener(self.datafile, 'w')
1702 1851 try:
1703 1852 for r in self:
1704 1853 df.write(self._getsegmentforrevs(r, r)[1])
1705 1854 finally:
1706 1855 df.close()
1707 1856
1708 1857 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1709 1858 checkambig=self._checkambig)
1710 1859 self.version &= ~FLAG_INLINE_DATA
1711 1860 self._inline = False
1712 1861 for i in self:
1713 1862 e = self._io.packentry(self.index[i], self.node, self.version, i)
1714 1863 fp.write(e)
1715 1864
1716 1865 # if we don't call close, the temp file will never replace the
1717 1866 # real index
1718 1867 fp.close()
1719 1868
1720 1869 tr.replace(self.indexfile, trindex * self._io.size)
1721 1870 self._chunkclear()
1722 1871
1723 1872 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1724 node=None, flags=REVIDX_DEFAULT_FLAGS):
1873 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1725 1874 """add a revision to the log
1726 1875
1727 1876 text - the revision data to add
1728 1877 transaction - the transaction object used for rollback
1729 1878 link - the linkrev data to add
1730 1879 p1, p2 - the parent nodeids of the revision
1731 1880 cachedelta - an optional precomputed delta
1732 1881 node - nodeid of revision; typically node is not specified, and it is
1733 1882 computed by default as hash(text, p1, p2), however subclasses might
1734 1883 use different hashing method (and override checkhash() in such case)
1735 1884 flags - the known flags to set on the revision
1885 deltacomputer - an optional _deltacomputer instance shared between
1886 multiple calls
1736 1887 """
1737 1888 if link == nullrev:
1738 1889 raise RevlogError(_("attempted to add linkrev -1 to %s")
1739 1890 % self.indexfile)
1740 1891
1741 1892 if flags:
1742 1893 node = node or self.hash(text, p1, p2)
1743 1894
1744 1895 rawtext, validatehash = self._processflags(text, flags, 'write')
1745 1896
1746 1897 # If the flag processor modifies the revision data, ignore any provided
1747 1898 # cachedelta.
1748 1899 if rawtext != text:
1749 1900 cachedelta = None
1750 1901
1751 1902 if len(rawtext) > _maxentrysize:
1752 1903 raise RevlogError(
1753 1904 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1754 1905 % (self.indexfile, len(rawtext)))
1755 1906
1756 1907 node = node or self.hash(rawtext, p1, p2)
1757 1908 if node in self.nodemap:
1758 1909 return node
1759 1910
1760 1911 if validatehash:
1761 1912 self.checkhash(rawtext, node, p1=p1, p2=p2)
1762 1913
1763 1914 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1764 flags, cachedelta=cachedelta)
1915 flags, cachedelta=cachedelta,
1916 deltacomputer=deltacomputer)
1765 1917
1766 1918 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1767 cachedelta=None):
1919 cachedelta=None, deltacomputer=None):
1768 1920 """add a raw revision with known flags, node and parents
1769 1921 useful when reusing a revision not stored in this revlog (ex: received
1770 1922 over wire, or read from an external bundle).
1771 1923 """
1772 1924 dfh = None
1773 1925 if not self._inline:
1774 1926 dfh = self.opener(self.datafile, "a+")
1775 1927 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1776 1928 try:
1777 1929 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1778 flags, cachedelta, ifh, dfh)
1930 flags, cachedelta, ifh, dfh,
1931 deltacomputer=deltacomputer)
1779 1932 finally:
1780 1933 if dfh:
1781 1934 dfh.close()
1782 1935 ifh.close()
1783 1936
1784 1937 def compress(self, data):
1785 1938 """Generate a possibly-compressed representation of data."""
1786 1939 if not data:
1787 1940 return '', data
1788 1941
1789 1942 compressed = self._compressor.compress(data)
1790 1943
1791 1944 if compressed:
1792 1945 # The revlog compressor added the header in the returned data.
1793 1946 return '', compressed
1794 1947
1795 1948 if data[0:1] == '\0':
1796 1949 return '', data
1797 1950 return 'u', data
1798 1951
1799 1952 def decompress(self, data):
1800 1953 """Decompress a revlog chunk.
1801 1954
1802 1955 The chunk is expected to begin with a header identifying the
1803 1956 format type so it can be routed to an appropriate decompressor.
1804 1957 """
1805 1958 if not data:
1806 1959 return data
1807 1960
1808 1961 # Revlogs are read much more frequently than they are written and many
1809 1962 # chunks only take microseconds to decompress, so performance is
1810 1963 # important here.
1811 1964 #
1812 1965 # We can make a few assumptions about revlogs:
1813 1966 #
1814 1967 # 1) the majority of chunks will be compressed (as opposed to inline
1815 1968 # raw data).
1816 1969 # 2) decompressing *any* data will likely by at least 10x slower than
1817 1970 # returning raw inline data.
1818 1971 # 3) we want to prioritize common and officially supported compression
1819 1972 # engines
1820 1973 #
1821 1974 # It follows that we want to optimize for "decompress compressed data
1822 1975 # when encoded with common and officially supported compression engines"
1823 1976 # case over "raw data" and "data encoded by less common or non-official
1824 1977 # compression engines." That is why we have the inline lookup first
1825 1978 # followed by the compengines lookup.
1826 1979 #
1827 1980 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1828 1981 # compressed chunks. And this matters for changelog and manifest reads.
1829 1982 t = data[0:1]
1830 1983
1831 1984 if t == 'x':
1832 1985 try:
1833 1986 return _zlibdecompress(data)
1834 1987 except zlib.error as e:
1835 1988 raise RevlogError(_('revlog decompress error: %s') % str(e))
1836 1989 # '\0' is more common than 'u' so it goes first.
1837 1990 elif t == '\0':
1838 1991 return data
1839 1992 elif t == 'u':
1840 1993 return util.buffer(data, 1)
1841 1994
1842 1995 try:
1843 1996 compressor = self._decompressors[t]
1844 1997 except KeyError:
1845 1998 try:
1846 1999 engine = util.compengines.forrevlogheader(t)
1847 2000 compressor = engine.revlogcompressor()
1848 2001 self._decompressors[t] = compressor
1849 2002 except KeyError:
1850 2003 raise RevlogError(_('unknown compression type %r') % t)
1851 2004
1852 2005 return compressor.decompress(data)
1853 2006
1854 2007 def _isgooddeltainfo(self, d, textlen):
1855 2008 """Returns True if the given delta is good. Good means that it is within
1856 2009 the disk span, disk size, and chain length bounds that we know to be
1857 2010 performant."""
1858 2011 if d is None:
1859 2012 return False
1860 2013
1861 2014 # - 'd.distance' is the distance from the base revision -- bounding it
1862 2015 # limits the amount of I/O we need to do.
1863 2016 # - 'd.compresseddeltalen' is the sum of the total size of deltas we
1864 2017 # need to apply -- bounding it limits the amount of CPU we consume.
1865 2018
1866 2019 defaultmax = textlen * 4
1867 2020 maxdist = self._maxdeltachainspan
1868 2021 if not maxdist:
1869 2022 maxdist = d.distance # ensure the conditional pass
1870 2023 maxdist = max(maxdist, defaultmax)
1871 2024 if (d.distance > maxdist or d.deltalen > textlen or
1872 2025 d.compresseddeltalen > textlen * 2 or
1873 2026 (self._maxchainlen and d.chainlen > self._maxchainlen)):
1874 2027 return False
1875 2028
1876 2029 return True
1877 2030
1878 def _getcandidaterevs(self, p1, p2, cachedelta):
1879 """
1880 Provides revisions that present an interest to be diffed against,
1881 grouped by level of easiness.
1882 """
1883 curr = len(self)
1884 prev = curr - 1
1885 p1r, p2r = self.rev(p1), self.rev(p2)
1886
1887 # should we try to build a delta?
1888 if prev != nullrev and self.storedeltachains:
1889 tested = set()
1890 # This condition is true most of the time when processing
1891 # changegroup data into a generaldelta repo. The only time it
1892 # isn't true is if this is the first revision in a delta chain
1893 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
1894 if cachedelta and self._generaldelta and self._lazydeltabase:
1895 # Assume what we received from the server is a good choice
1896 # build delta will reuse the cache
1897 yield (cachedelta[0],)
1898 tested.add(cachedelta[0])
1899
1900 if self._generaldelta:
1901 # exclude already lazy tested base if any
1902 parents = [p for p in (p1r, p2r)
1903 if p != nullrev and p not in tested]
1904 if parents and not self._aggressivemergedeltas:
1905 # Pick whichever parent is closer to us (to minimize the
1906 # chance of having to build a fulltext).
1907 parents = [max(parents)]
1908 tested.update(parents)
1909 yield parents
1910
1911 if prev not in tested:
1912 # other approach failed try against prev to hopefully save us a
1913 # fulltext.
1914 yield (prev,)
1915
1916 def _buildtext(self, revinfo, fh):
1917 """Builds a fulltext version of a revision
1918
1919 revinfo: _revisioninfo instance that contains all needed info
1920 fh: file handle to either the .i or the .d revlog file,
1921 depending on whether it is inlined or not
1922 """
1923 btext = revinfo.btext
1924 if btext[0] is not None:
1925 return btext[0]
1926
1927 cachedelta = revinfo.cachedelta
1928 flags = revinfo.flags
1929 node = revinfo.node
1930
1931 baserev = cachedelta[0]
1932 delta = cachedelta[1]
1933 # special case deltas which replace entire base; no need to decode
1934 # base revision. this neatly avoids censored bases, which throw when
1935 # they're decoded.
1936 hlen = struct.calcsize(">lll")
1937 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1938 len(delta) - hlen):
1939 btext[0] = delta[hlen:]
1940 else:
1941 basetext = self.revision(baserev, _df=fh, raw=True)
1942 btext[0] = mdiff.patch(basetext, delta)
1943
1944 try:
1945 res = self._processflags(btext[0], flags, 'read', raw=True)
1946 btext[0], validatehash = res
1947 if validatehash:
1948 self.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
1949 if flags & REVIDX_ISCENSORED:
1950 raise RevlogError(_('node %s is not censored') % node)
1951 except CensoredNodeError:
1952 # must pass the censored index flag to add censored revisions
1953 if not flags & REVIDX_ISCENSORED:
1954 raise
1955 return btext[0]
1956
1957 def _builddeltadiff(self, base, revinfo, fh):
1958 t = self._buildtext(revinfo, fh)
1959 if self.iscensored(base):
1960 # deltas based on a censored revision must replace the
1961 # full content in one patch, so delta works everywhere
1962 header = mdiff.replacediffheader(self.rawsize(base), len(t))
1963 delta = header + t
1964 else:
1965 ptext = self.revision(base, _df=fh, raw=True)
1966 delta = mdiff.textdiff(ptext, t)
1967
1968 return delta
1969
1970 def _builddeltainfo(self, revinfo, base, fh):
1971 # can we use the cached delta?
1972 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
1973 delta = revinfo.cachedelta[1]
1974 else:
1975 delta = self._builddeltadiff(base, revinfo, fh)
1976 header, data = self.compress(delta)
1977 deltalen = len(header) + len(data)
1978 chainbase = self.chainbase(base)
1979 offset = self.end(len(self) - 1)
1980 dist = deltalen + offset - self.start(chainbase)
1981 if self._generaldelta:
1982 deltabase = base
1983 else:
1984 deltabase = chainbase
1985 chainlen, compresseddeltalen = self._chaininfo(base)
1986 chainlen += 1
1987 compresseddeltalen += deltalen
1988 return _deltainfo(dist, deltalen, (header, data), deltabase,
1989 chainbase, chainlen, compresseddeltalen)
1990
1991 def _finddeltainfo(self, revinfo, fh):
1992 """Find an acceptable delta against a candidate revision
1993
1994 revinfo: information about the revision (instance of _revisioninfo)
1995 fh: file handle to either the .i or the .d revlog file,
1996 depending on whether it is inlined or not
1997
1998 Returns the first acceptable candidate revision, as ordered by
1999 _getcandidaterevs
2000 """
2001 cachedelta = revinfo.cachedelta
2002 p1 = revinfo.p1
2003 p2 = revinfo.p2
2004
2005 deltainfo = None
2006 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
2007 nominateddeltas = []
2008 for candidaterev in candidaterevs:
2009 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
2010 if self._isgooddeltainfo(candidatedelta, revinfo.textlen):
2011 nominateddeltas.append(candidatedelta)
2012 if nominateddeltas:
2013 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
2014 break
2015
2016 return deltainfo
2017
2018 2031 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2019 cachedelta, ifh, dfh, alwayscache=False):
2032 cachedelta, ifh, dfh, alwayscache=False,
2033 deltacomputer=None):
2020 2034 """internal function to add revisions to the log
2021 2035
2022 2036 see addrevision for argument descriptions.
2023 2037
2024 2038 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2025 2039
2040 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2041 be used.
2042
2026 2043 invariants:
2027 2044 - rawtext is optional (can be None); if not set, cachedelta must be set.
2028 2045 if both are set, they must correspond to each other.
2029 2046 """
2030 2047 if node == nullid:
2031 2048 raise RevlogError(_("%s: attempt to add null revision") %
2032 2049 (self.indexfile))
2033 2050 if node == wdirid:
2034 2051 raise RevlogError(_("%s: attempt to add wdir revision") %
2035 2052 (self.indexfile))
2036 2053
2037 2054 if self._inline:
2038 2055 fh = ifh
2039 2056 else:
2040 2057 fh = dfh
2041 2058
2042 2059 btext = [rawtext]
2043 2060
2044 2061 curr = len(self)
2045 2062 prev = curr - 1
2046 2063 offset = self.end(prev)
2047 2064 p1r, p2r = self.rev(p1), self.rev(p2)
2048 2065
2049 2066 # full versions are inserted when the needed deltas
2050 2067 # become comparable to the uncompressed text
2051 2068 if rawtext is None:
2052 2069 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
2053 2070 cachedelta[1])
2054 2071 else:
2055 2072 textlen = len(rawtext)
2056 2073
2074 if deltacomputer is None:
2075 deltacomputer = _deltacomputer(self)
2076
2057 2077 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2058 deltainfo = self._finddeltainfo(revinfo, fh)
2078 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2059 2079
2060 2080 if deltainfo is not None:
2061 2081 base = deltainfo.base
2062 2082 chainbase = deltainfo.chainbase
2063 2083 data = deltainfo.data
2064 2084 l = deltainfo.deltalen
2065 2085 else:
2066 rawtext = self._buildtext(revinfo, fh)
2086 rawtext = deltacomputer.buildtext(revinfo, fh)
2067 2087 data = self.compress(rawtext)
2068 2088 l = len(data[1]) + len(data[0])
2069 2089 base = chainbase = curr
2070 2090
2071 2091 e = (offset_type(offset, flags), l, textlen,
2072 2092 base, link, p1r, p2r, node)
2073 2093 self.index.insert(-1, e)
2074 2094 self.nodemap[node] = curr
2075 2095
2076 2096 entry = self._io.packentry(e, self.node, self.version, curr)
2077 2097 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2078 2098
2079 2099 if alwayscache and rawtext is None:
2080 rawtext = self._buildtext(revinfo, fh)
2100 rawtext = deltacomputer._buildtext(revinfo, fh)
2081 2101
2082 2102 if type(rawtext) == str: # only accept immutable objects
2083 2103 self._cache = (node, curr, rawtext)
2084 2104 self._chainbasecache[curr] = chainbase
2085 2105 return node
2086 2106
2087 2107 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2088 2108 # Files opened in a+ mode have inconsistent behavior on various
2089 2109 # platforms. Windows requires that a file positioning call be made
2090 2110 # when the file handle transitions between reads and writes. See
2091 2111 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2092 2112 # platforms, Python or the platform itself can be buggy. Some versions
2093 2113 # of Solaris have been observed to not append at the end of the file
2094 2114 # if the file was seeked to before the end. See issue4943 for more.
2095 2115 #
2096 2116 # We work around this issue by inserting a seek() before writing.
2097 2117 # Note: This is likely not necessary on Python 3.
2098 2118 ifh.seek(0, os.SEEK_END)
2099 2119 if dfh:
2100 2120 dfh.seek(0, os.SEEK_END)
2101 2121
2102 2122 curr = len(self) - 1
2103 2123 if not self._inline:
2104 2124 transaction.add(self.datafile, offset)
2105 2125 transaction.add(self.indexfile, curr * len(entry))
2106 2126 if data[0]:
2107 2127 dfh.write(data[0])
2108 2128 dfh.write(data[1])
2109 2129 ifh.write(entry)
2110 2130 else:
2111 2131 offset += curr * self._io.size
2112 2132 transaction.add(self.indexfile, offset, curr)
2113 2133 ifh.write(entry)
2114 2134 ifh.write(data[0])
2115 2135 ifh.write(data[1])
2116 2136 self.checkinlinesize(transaction, ifh)
2117 2137
2118 2138 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2119 2139 """
2120 2140 add a delta group
2121 2141
2122 2142 given a set of deltas, add them to the revision log. the
2123 2143 first delta is against its parent, which should be in our
2124 2144 log, the rest are against the previous delta.
2125 2145
2126 2146 If ``addrevisioncb`` is defined, it will be called with arguments of
2127 2147 this revlog and the node that was added.
2128 2148 """
2129 2149
2130 2150 nodes = []
2131 2151
2132 2152 r = len(self)
2133 2153 end = 0
2134 2154 if r:
2135 2155 end = self.end(r - 1)
2136 2156 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
2137 2157 isize = r * self._io.size
2138 2158 if self._inline:
2139 2159 transaction.add(self.indexfile, end + isize, r)
2140 2160 dfh = None
2141 2161 else:
2142 2162 transaction.add(self.indexfile, isize, r)
2143 2163 transaction.add(self.datafile, end)
2144 2164 dfh = self.opener(self.datafile, "a+")
2145 2165 def flush():
2146 2166 if dfh:
2147 2167 dfh.flush()
2148 2168 ifh.flush()
2149 2169 try:
2170 deltacomputer = _deltacomputer(self)
2150 2171 # loop through our set of deltas
2151 2172 for data in deltas:
2152 2173 node, p1, p2, linknode, deltabase, delta, flags = data
2153 2174 link = linkmapper(linknode)
2154 2175 flags = flags or REVIDX_DEFAULT_FLAGS
2155 2176
2156 2177 nodes.append(node)
2157 2178
2158 2179 if node in self.nodemap:
2159 2180 # this can happen if two branches make the same change
2160 2181 continue
2161 2182
2162 2183 for p in (p1, p2):
2163 2184 if p not in self.nodemap:
2164 2185 raise LookupError(p, self.indexfile,
2165 2186 _('unknown parent'))
2166 2187
2167 2188 if deltabase not in self.nodemap:
2168 2189 raise LookupError(deltabase, self.indexfile,
2169 2190 _('unknown delta base'))
2170 2191
2171 2192 baserev = self.rev(deltabase)
2172 2193
2173 2194 if baserev != nullrev and self.iscensored(baserev):
2174 2195 # if base is censored, delta must be full replacement in a
2175 2196 # single patch operation
2176 2197 hlen = struct.calcsize(">lll")
2177 2198 oldlen = self.rawsize(baserev)
2178 2199 newlen = len(delta) - hlen
2179 2200 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2180 2201 raise error.CensoredBaseError(self.indexfile,
2181 2202 self.node(baserev))
2182 2203
2183 2204 if not flags and self._peek_iscensored(baserev, delta, flush):
2184 2205 flags |= REVIDX_ISCENSORED
2185 2206
2186 2207 # We assume consumers of addrevisioncb will want to retrieve
2187 2208 # the added revision, which will require a call to
2188 2209 # revision(). revision() will fast path if there is a cache
2189 2210 # hit. So, we tell _addrevision() to always cache in this case.
2190 2211 # We're only using addgroup() in the context of changegroup
2191 2212 # generation so the revision data can always be handled as raw
2192 2213 # by the flagprocessor.
2193 2214 self._addrevision(node, None, transaction, link,
2194 2215 p1, p2, flags, (baserev, delta),
2195 2216 ifh, dfh,
2196 alwayscache=bool(addrevisioncb))
2217 alwayscache=bool(addrevisioncb),
2218 deltacomputer=deltacomputer)
2197 2219
2198 2220 if addrevisioncb:
2199 2221 addrevisioncb(self, node)
2200 2222
2201 2223 if not dfh and not self._inline:
2202 2224 # addrevision switched from inline to conventional
2203 2225 # reopen the index
2204 2226 ifh.close()
2205 2227 dfh = self.opener(self.datafile, "a+")
2206 2228 ifh = self.opener(self.indexfile, "a+",
2207 2229 checkambig=self._checkambig)
2208 2230 finally:
2209 2231 if dfh:
2210 2232 dfh.close()
2211 2233 ifh.close()
2212 2234
2213 2235 return nodes
2214 2236
2215 2237 def iscensored(self, rev):
2216 2238 """Check if a file revision is censored."""
2217 2239 return False
2218 2240
2219 2241 def _peek_iscensored(self, baserev, delta, flush):
2220 2242 """Quickly check if a delta produces a censored revision."""
2221 2243 return False
2222 2244
2223 2245 def getstrippoint(self, minlink):
2224 2246 """find the minimum rev that must be stripped to strip the linkrev
2225 2247
2226 2248 Returns a tuple containing the minimum rev and a set of all revs that
2227 2249 have linkrevs that will be broken by this strip.
2228 2250 """
2229 2251 brokenrevs = set()
2230 2252 strippoint = len(self)
2231 2253
2232 2254 heads = {}
2233 2255 futurelargelinkrevs = set()
2234 2256 for head in self.headrevs():
2235 2257 headlinkrev = self.linkrev(head)
2236 2258 heads[head] = headlinkrev
2237 2259 if headlinkrev >= minlink:
2238 2260 futurelargelinkrevs.add(headlinkrev)
2239 2261
2240 2262 # This algorithm involves walking down the rev graph, starting at the
2241 2263 # heads. Since the revs are topologically sorted according to linkrev,
2242 2264 # once all head linkrevs are below the minlink, we know there are
2243 2265 # no more revs that could have a linkrev greater than minlink.
2244 2266 # So we can stop walking.
2245 2267 while futurelargelinkrevs:
2246 2268 strippoint -= 1
2247 2269 linkrev = heads.pop(strippoint)
2248 2270
2249 2271 if linkrev < minlink:
2250 2272 brokenrevs.add(strippoint)
2251 2273 else:
2252 2274 futurelargelinkrevs.remove(linkrev)
2253 2275
2254 2276 for p in self.parentrevs(strippoint):
2255 2277 if p != nullrev:
2256 2278 plinkrev = self.linkrev(p)
2257 2279 heads[p] = plinkrev
2258 2280 if plinkrev >= minlink:
2259 2281 futurelargelinkrevs.add(plinkrev)
2260 2282
2261 2283 return strippoint, brokenrevs
2262 2284
2263 2285 def strip(self, minlink, transaction):
2264 2286 """truncate the revlog on the first revision with a linkrev >= minlink
2265 2287
2266 2288 This function is called when we're stripping revision minlink and
2267 2289 its descendants from the repository.
2268 2290
2269 2291 We have to remove all revisions with linkrev >= minlink, because
2270 2292 the equivalent changelog revisions will be renumbered after the
2271 2293 strip.
2272 2294
2273 2295 So we truncate the revlog on the first of these revisions, and
2274 2296 trust that the caller has saved the revisions that shouldn't be
2275 2297 removed and that it'll re-add them after this truncation.
2276 2298 """
2277 2299 if len(self) == 0:
2278 2300 return
2279 2301
2280 2302 rev, _ = self.getstrippoint(minlink)
2281 2303 if rev == len(self):
2282 2304 return
2283 2305
2284 2306 # first truncate the files on disk
2285 2307 end = self.start(rev)
2286 2308 if not self._inline:
2287 2309 transaction.add(self.datafile, end)
2288 2310 end = rev * self._io.size
2289 2311 else:
2290 2312 end += rev * self._io.size
2291 2313
2292 2314 transaction.add(self.indexfile, end)
2293 2315
2294 2316 # then reset internal state in memory to forget those revisions
2295 2317 self._cache = None
2296 2318 self._chaininfocache = {}
2297 2319 self._chunkclear()
2298 2320 for x in xrange(rev, len(self)):
2299 2321 del self.nodemap[self.node(x)]
2300 2322
2301 2323 del self.index[rev:-1]
2302 2324
2303 2325 def checksize(self):
2304 2326 expected = 0
2305 2327 if len(self):
2306 2328 expected = max(0, self.end(len(self) - 1))
2307 2329
2308 2330 try:
2309 2331 f = self.opener(self.datafile)
2310 2332 f.seek(0, 2)
2311 2333 actual = f.tell()
2312 2334 f.close()
2313 2335 dd = actual - expected
2314 2336 except IOError as inst:
2315 2337 if inst.errno != errno.ENOENT:
2316 2338 raise
2317 2339 dd = 0
2318 2340
2319 2341 try:
2320 2342 f = self.opener(self.indexfile)
2321 2343 f.seek(0, 2)
2322 2344 actual = f.tell()
2323 2345 f.close()
2324 2346 s = self._io.size
2325 2347 i = max(0, actual // s)
2326 2348 di = actual - (i * s)
2327 2349 if self._inline:
2328 2350 databytes = 0
2329 2351 for r in self:
2330 2352 databytes += max(0, self.length(r))
2331 2353 dd = 0
2332 2354 di = actual - len(self) * s - databytes
2333 2355 except IOError as inst:
2334 2356 if inst.errno != errno.ENOENT:
2335 2357 raise
2336 2358 di = 0
2337 2359
2338 2360 return (dd, di)
2339 2361
2340 2362 def files(self):
2341 2363 res = [self.indexfile]
2342 2364 if not self._inline:
2343 2365 res.append(self.datafile)
2344 2366 return res
2345 2367
2346 2368 DELTAREUSEALWAYS = 'always'
2347 2369 DELTAREUSESAMEREVS = 'samerevs'
2348 2370 DELTAREUSENEVER = 'never'
2349 2371
2350 2372 DELTAREUSEFULLADD = 'fulladd'
2351 2373
2352 2374 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2353 2375
2354 2376 def clone(self, tr, destrevlog, addrevisioncb=None,
2355 2377 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2356 2378 """Copy this revlog to another, possibly with format changes.
2357 2379
2358 2380 The destination revlog will contain the same revisions and nodes.
2359 2381 However, it may not be bit-for-bit identical due to e.g. delta encoding
2360 2382 differences.
2361 2383
2362 2384 The ``deltareuse`` argument control how deltas from the existing revlog
2363 2385 are preserved in the destination revlog. The argument can have the
2364 2386 following values:
2365 2387
2366 2388 DELTAREUSEALWAYS
2367 2389 Deltas will always be reused (if possible), even if the destination
2368 2390 revlog would not select the same revisions for the delta. This is the
2369 2391 fastest mode of operation.
2370 2392 DELTAREUSESAMEREVS
2371 2393 Deltas will be reused if the destination revlog would pick the same
2372 2394 revisions for the delta. This mode strikes a balance between speed
2373 2395 and optimization.
2374 2396 DELTAREUSENEVER
2375 2397 Deltas will never be reused. This is the slowest mode of execution.
2376 2398 This mode can be used to recompute deltas (e.g. if the diff/delta
2377 2399 algorithm changes).
2378 2400
2379 2401 Delta computation can be slow, so the choice of delta reuse policy can
2380 2402 significantly affect run time.
2381 2403
2382 2404 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2383 2405 two extremes. Deltas will be reused if they are appropriate. But if the
2384 2406 delta could choose a better revision, it will do so. This means if you
2385 2407 are converting a non-generaldelta revlog to a generaldelta revlog,
2386 2408 deltas will be recomputed if the delta's parent isn't a parent of the
2387 2409 revision.
2388 2410
2389 2411 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2390 2412 controls whether to compute deltas against both parents for merges.
2391 2413 By default, the current default is used.
2392 2414 """
2393 2415 if deltareuse not in self.DELTAREUSEALL:
2394 2416 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2395 2417
2396 2418 if len(destrevlog):
2397 2419 raise ValueError(_('destination revlog is not empty'))
2398 2420
2399 2421 if getattr(self, 'filteredrevs', None):
2400 2422 raise ValueError(_('source revlog has filtered revisions'))
2401 2423 if getattr(destrevlog, 'filteredrevs', None):
2402 2424 raise ValueError(_('destination revlog has filtered revisions'))
2403 2425
2404 2426 # lazydeltabase controls whether to reuse a cached delta, if possible.
2405 2427 oldlazydeltabase = destrevlog._lazydeltabase
2406 2428 oldamd = destrevlog._aggressivemergedeltas
2407 2429
2408 2430 try:
2409 2431 if deltareuse == self.DELTAREUSEALWAYS:
2410 2432 destrevlog._lazydeltabase = True
2411 2433 elif deltareuse == self.DELTAREUSESAMEREVS:
2412 2434 destrevlog._lazydeltabase = False
2413 2435
2414 2436 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2415 2437
2416 2438 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2417 2439 self.DELTAREUSESAMEREVS)
2418 2440
2441 deltacomputer = _deltacomputer(destrevlog)
2419 2442 index = self.index
2420 2443 for rev in self:
2421 2444 entry = index[rev]
2422 2445
2423 2446 # Some classes override linkrev to take filtered revs into
2424 2447 # account. Use raw entry from index.
2425 2448 flags = entry[0] & 0xffff
2426 2449 linkrev = entry[4]
2427 2450 p1 = index[entry[5]][7]
2428 2451 p2 = index[entry[6]][7]
2429 2452 node = entry[7]
2430 2453
2431 2454 # (Possibly) reuse the delta from the revlog if allowed and
2432 2455 # the revlog chunk is a delta.
2433 2456 cachedelta = None
2434 2457 rawtext = None
2435 2458 if populatecachedelta:
2436 2459 dp = self.deltaparent(rev)
2437 2460 if dp != nullrev:
2438 2461 cachedelta = (dp, str(self._chunk(rev)))
2439 2462
2440 2463 if not cachedelta:
2441 2464 rawtext = self.revision(rev, raw=True)
2442 2465
2443 2466
2444 2467 if deltareuse == self.DELTAREUSEFULLADD:
2445 2468 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2446 2469 cachedelta=cachedelta,
2447 node=node, flags=flags)
2470 node=node, flags=flags,
2471 deltacomputer=deltacomputer)
2448 2472 else:
2449 2473 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2450 2474 checkambig=False)
2451 2475 dfh = None
2452 2476 if not destrevlog._inline:
2453 2477 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2454 2478 try:
2455 2479 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2456 p2, flags, cachedelta, ifh, dfh)
2480 p2, flags, cachedelta, ifh, dfh,
2481 deltacomputer=deltacomputer)
2457 2482 finally:
2458 2483 if dfh:
2459 2484 dfh.close()
2460 2485 ifh.close()
2461 2486
2462 2487 if addrevisioncb:
2463 2488 addrevisioncb(self, rev, node)
2464 2489 finally:
2465 2490 destrevlog._lazydeltabase = oldlazydeltabase
2466 2491 destrevlog._aggressivemergedeltas = oldamd
General Comments 0
You need to be logged in to leave comments. Login now