##// END OF EJS Templates
revlog: move index file opening in a method...
Boris Feld -
r35986:91b3b115 default
parent child Browse files
Show More
@@ -1,2495 +1,2502 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import binascii
17 17 import collections
18 18 import errno
19 19 import hashlib
20 20 import heapq
21 21 import os
22 22 import struct
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullid,
30 30 nullrev,
31 31 wdirhex,
32 32 wdirid,
33 33 wdirrev,
34 34 )
35 35 from .i18n import _
36 36 from .thirdparty import (
37 37 attr,
38 38 )
39 39 from . import (
40 40 ancestor,
41 41 error,
42 42 mdiff,
43 43 policy,
44 44 pycompat,
45 45 templatefilters,
46 46 util,
47 47 )
48 48
49 49 parsers = policy.importmod(r'parsers')
50 50
51 51 # Aliased for performance.
52 52 _zlibdecompress = zlib.decompress
53 53
54 54 # revlog header flags
55 55 REVLOGV0 = 0
56 56 REVLOGV1 = 1
57 57 # Dummy value until file format is finalized.
58 58 # Reminder: change the bounds check in revlog.__init__ when this is changed.
59 59 REVLOGV2 = 0xDEAD
60 60 FLAG_INLINE_DATA = (1 << 16)
61 61 FLAG_GENERALDELTA = (1 << 17)
62 62 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
63 63 REVLOG_DEFAULT_FORMAT = REVLOGV1
64 64 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
65 65 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
66 66 REVLOGV2_FLAGS = REVLOGV1_FLAGS
67 67
68 68 # revlog index flags
69 69 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
70 70 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
71 71 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
72 72 REVIDX_DEFAULT_FLAGS = 0
73 73 # stable order in which flags need to be processed and their processors applied
74 74 REVIDX_FLAGS_ORDER = [
75 75 REVIDX_ISCENSORED,
76 76 REVIDX_ELLIPSIS,
77 77 REVIDX_EXTSTORED,
78 78 ]
79 79 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
80 80
81 81 # max size of revlog with inline data
82 82 _maxinline = 131072
83 83 _chunksize = 1048576
84 84
85 85 RevlogError = error.RevlogError
86 86 LookupError = error.LookupError
87 87 CensoredNodeError = error.CensoredNodeError
88 88 ProgrammingError = error.ProgrammingError
89 89
90 90 # Store flag processors (cf. 'addflagprocessor()' to register)
91 91 _flagprocessors = {
92 92 REVIDX_ISCENSORED: None,
93 93 }
94 94
95 95 def addflagprocessor(flag, processor):
96 96 """Register a flag processor on a revision data flag.
97 97
98 98 Invariant:
99 99 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
100 100 - Only one flag processor can be registered on a specific flag.
101 101 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
102 102 following signatures:
103 103 - (read) f(self, rawtext) -> text, bool
104 104 - (write) f(self, text) -> rawtext, bool
105 105 - (raw) f(self, rawtext) -> bool
106 106 "text" is presented to the user. "rawtext" is stored in revlog data, not
107 107 directly visible to the user.
108 108 The boolean returned by these transforms is used to determine whether
109 109 the returned text can be used for hash integrity checking. For example,
110 110 if "write" returns False, then "text" is used to generate hash. If
111 111 "write" returns True, that basically means "rawtext" returned by "write"
112 112 should be used to generate hash. Usually, "write" and "read" return
113 113 different booleans. And "raw" returns a same boolean as "write".
114 114
115 115 Note: The 'raw' transform is used for changegroup generation and in some
116 116 debug commands. In this case the transform only indicates whether the
117 117 contents can be used for hash integrity checks.
118 118 """
119 119 if not flag & REVIDX_KNOWN_FLAGS:
120 120 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
121 121 raise ProgrammingError(msg)
122 122 if flag not in REVIDX_FLAGS_ORDER:
123 123 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
124 124 raise ProgrammingError(msg)
125 125 if flag in _flagprocessors:
126 126 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
127 127 raise error.Abort(msg)
128 128 _flagprocessors[flag] = processor
129 129
130 130 def getoffset(q):
131 131 return int(q >> 16)
132 132
133 133 def gettype(q):
134 134 return int(q & 0xFFFF)
135 135
136 136 def offset_type(offset, type):
137 137 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
138 138 raise ValueError('unknown revlog index flags')
139 139 return int(int(offset) << 16 | type)
140 140
141 141 _nullhash = hashlib.sha1(nullid)
142 142
143 143 def hash(text, p1, p2):
144 144 """generate a hash from the given text and its parent hashes
145 145
146 146 This hash combines both the current file contents and its history
147 147 in a manner that makes it easy to distinguish nodes with the same
148 148 content in the revision graph.
149 149 """
150 150 # As of now, if one of the parent node is null, p2 is null
151 151 if p2 == nullid:
152 152 # deep copy of a hash is faster than creating one
153 153 s = _nullhash.copy()
154 154 s.update(p1)
155 155 else:
156 156 # none of the parent nodes are nullid
157 157 if p1 < p2:
158 158 a = p1
159 159 b = p2
160 160 else:
161 161 a = p2
162 162 b = p1
163 163 s = hashlib.sha1(a)
164 164 s.update(b)
165 165 s.update(text)
166 166 return s.digest()
167 167
168 168 def _trimchunk(revlog, revs, startidx, endidx=None):
169 169 """returns revs[startidx:endidx] without empty trailing revs
170 170 """
171 171 length = revlog.length
172 172
173 173 if endidx is None:
174 174 endidx = len(revs)
175 175
176 176 # Trim empty revs at the end, but never the very first revision of a chain
177 177 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
178 178 endidx -= 1
179 179
180 180 return revs[startidx:endidx]
181 181
182 182 def _slicechunk(revlog, revs):
183 183 """slice revs to reduce the amount of unrelated data to be read from disk.
184 184
185 185 ``revs`` is sliced into groups that should be read in one time.
186 186 Assume that revs are sorted.
187 187 """
188 188 start = revlog.start
189 189 length = revlog.length
190 190
191 191 if len(revs) <= 1:
192 192 yield revs
193 193 return
194 194
195 195 startbyte = start(revs[0])
196 196 endbyte = start(revs[-1]) + length(revs[-1])
197 197 readdata = deltachainspan = endbyte - startbyte
198 198
199 199 chainpayload = sum(length(r) for r in revs)
200 200
201 201 if deltachainspan:
202 202 density = chainpayload / float(deltachainspan)
203 203 else:
204 204 density = 1.0
205 205
206 206 # Store the gaps in a heap to have them sorted by decreasing size
207 207 gapsheap = []
208 208 heapq.heapify(gapsheap)
209 209 prevend = None
210 210 for i, rev in enumerate(revs):
211 211 revstart = start(rev)
212 212 revlen = length(rev)
213 213
214 214 # Skip empty revisions to form larger holes
215 215 if revlen == 0:
216 216 continue
217 217
218 218 if prevend is not None:
219 219 gapsize = revstart - prevend
220 220 # only consider holes that are large enough
221 221 if gapsize > revlog._srmingapsize:
222 222 heapq.heappush(gapsheap, (-gapsize, i))
223 223
224 224 prevend = revstart + revlen
225 225
226 226 # Collect the indices of the largest holes until the density is acceptable
227 227 indicesheap = []
228 228 heapq.heapify(indicesheap)
229 229 while gapsheap and density < revlog._srdensitythreshold:
230 230 oppgapsize, gapidx = heapq.heappop(gapsheap)
231 231
232 232 heapq.heappush(indicesheap, gapidx)
233 233
234 234 # the gap sizes are stored as negatives to be sorted decreasingly
235 235 # by the heap
236 236 readdata -= (-oppgapsize)
237 237 if readdata > 0:
238 238 density = chainpayload / float(readdata)
239 239 else:
240 240 density = 1.0
241 241
242 242 # Cut the revs at collected indices
243 243 previdx = 0
244 244 while indicesheap:
245 245 idx = heapq.heappop(indicesheap)
246 246
247 247 chunk = _trimchunk(revlog, revs, previdx, idx)
248 248 if chunk:
249 249 yield chunk
250 250
251 251 previdx = idx
252 252
253 253 chunk = _trimchunk(revlog, revs, previdx)
254 254 if chunk:
255 255 yield chunk
256 256
257 257 @attr.s(slots=True, frozen=True)
258 258 class _deltainfo(object):
259 259 distance = attr.ib()
260 260 deltalen = attr.ib()
261 261 data = attr.ib()
262 262 base = attr.ib()
263 263 chainbase = attr.ib()
264 264 chainlen = attr.ib()
265 265 compresseddeltalen = attr.ib()
266 266
267 267 class _deltacomputer(object):
268 268 def __init__(self, revlog):
269 269 self.revlog = revlog
270 270
271 271 def _getcandidaterevs(self, p1, p2, cachedelta):
272 272 """
273 273 Provides revisions that present an interest to be diffed against,
274 274 grouped by level of easiness.
275 275 """
276 276 revlog = self.revlog
277 277 curr = len(revlog)
278 278 prev = curr - 1
279 279 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
280 280
281 281 # should we try to build a delta?
282 282 if prev != nullrev and revlog.storedeltachains:
283 283 tested = set()
284 284 # This condition is true most of the time when processing
285 285 # changegroup data into a generaldelta repo. The only time it
286 286 # isn't true is if this is the first revision in a delta chain
287 287 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
288 288 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
289 289 # Assume what we received from the server is a good choice
290 290 # build delta will reuse the cache
291 291 yield (cachedelta[0],)
292 292 tested.add(cachedelta[0])
293 293
294 294 if revlog._generaldelta:
295 295 # exclude already lazy tested base if any
296 296 parents = [p for p in (p1r, p2r)
297 297 if p != nullrev and p not in tested]
298 298 if parents and not revlog._aggressivemergedeltas:
299 299 # Pick whichever parent is closer to us (to minimize the
300 300 # chance of having to build a fulltext).
301 301 parents = [max(parents)]
302 302 tested.update(parents)
303 303 yield parents
304 304
305 305 if prev not in tested:
306 306 # other approach failed try against prev to hopefully save us a
307 307 # fulltext.
308 308 yield (prev,)
309 309
310 310 def buildtext(self, revinfo, fh):
311 311 """Builds a fulltext version of a revision
312 312
313 313 revinfo: _revisioninfo instance that contains all needed info
314 314 fh: file handle to either the .i or the .d revlog file,
315 315 depending on whether it is inlined or not
316 316 """
317 317 btext = revinfo.btext
318 318 if btext[0] is not None:
319 319 return btext[0]
320 320
321 321 revlog = self.revlog
322 322 cachedelta = revinfo.cachedelta
323 323 flags = revinfo.flags
324 324 node = revinfo.node
325 325
326 326 baserev = cachedelta[0]
327 327 delta = cachedelta[1]
328 328 # special case deltas which replace entire base; no need to decode
329 329 # base revision. this neatly avoids censored bases, which throw when
330 330 # they're decoded.
331 331 hlen = struct.calcsize(">lll")
332 332 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
333 333 len(delta) - hlen):
334 334 btext[0] = delta[hlen:]
335 335 else:
336 336 basetext = revlog.revision(baserev, _df=fh, raw=True)
337 337 btext[0] = mdiff.patch(basetext, delta)
338 338
339 339 try:
340 340 res = revlog._processflags(btext[0], flags, 'read', raw=True)
341 341 btext[0], validatehash = res
342 342 if validatehash:
343 343 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
344 344 if flags & REVIDX_ISCENSORED:
345 345 raise RevlogError(_('node %s is not censored') % node)
346 346 except CensoredNodeError:
347 347 # must pass the censored index flag to add censored revisions
348 348 if not flags & REVIDX_ISCENSORED:
349 349 raise
350 350 return btext[0]
351 351
352 352 def _builddeltadiff(self, base, revinfo, fh):
353 353 revlog = self.revlog
354 354 t = self.buildtext(revinfo, fh)
355 355 if revlog.iscensored(base):
356 356 # deltas based on a censored revision must replace the
357 357 # full content in one patch, so delta works everywhere
358 358 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
359 359 delta = header + t
360 360 else:
361 361 ptext = revlog.revision(base, _df=fh, raw=True)
362 362 delta = mdiff.textdiff(ptext, t)
363 363
364 364 return delta
365 365
366 366 def _builddeltainfo(self, revinfo, base, fh):
367 367 # can we use the cached delta?
368 368 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
369 369 delta = revinfo.cachedelta[1]
370 370 else:
371 371 delta = self._builddeltadiff(base, revinfo, fh)
372 372 revlog = self.revlog
373 373 header, data = revlog.compress(delta)
374 374 deltalen = len(header) + len(data)
375 375 chainbase = revlog.chainbase(base)
376 376 offset = revlog.end(len(revlog) - 1)
377 377 dist = deltalen + offset - revlog.start(chainbase)
378 378 if revlog._generaldelta:
379 379 deltabase = base
380 380 else:
381 381 deltabase = chainbase
382 382 chainlen, compresseddeltalen = revlog._chaininfo(base)
383 383 chainlen += 1
384 384 compresseddeltalen += deltalen
385 385 return _deltainfo(dist, deltalen, (header, data), deltabase,
386 386 chainbase, chainlen, compresseddeltalen)
387 387
388 388 def finddeltainfo(self, revinfo, fh):
389 389 """Find an acceptable delta against a candidate revision
390 390
391 391 revinfo: information about the revision (instance of _revisioninfo)
392 392 fh: file handle to either the .i or the .d revlog file,
393 393 depending on whether it is inlined or not
394 394
395 395 Returns the first acceptable candidate revision, as ordered by
396 396 _getcandidaterevs
397 397 """
398 398 cachedelta = revinfo.cachedelta
399 399 p1 = revinfo.p1
400 400 p2 = revinfo.p2
401 401 revlog = self.revlog
402 402
403 403 deltainfo = None
404 404 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
405 405 nominateddeltas = []
406 406 for candidaterev in candidaterevs:
407 407 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
408 408 if revlog._isgooddeltainfo(candidatedelta, revinfo.textlen):
409 409 nominateddeltas.append(candidatedelta)
410 410 if nominateddeltas:
411 411 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
412 412 break
413 413
414 414 return deltainfo
415 415
416 416 @attr.s(slots=True, frozen=True)
417 417 class _revisioninfo(object):
418 418 """Information about a revision that allows building its fulltext
419 419 node: expected hash of the revision
420 420 p1, p2: parent revs of the revision
421 421 btext: built text cache consisting of a one-element list
422 422 cachedelta: (baserev, uncompressed_delta) or None
423 423 flags: flags associated to the revision storage
424 424
425 425 One of btext[0] or cachedelta must be set.
426 426 """
427 427 node = attr.ib()
428 428 p1 = attr.ib()
429 429 p2 = attr.ib()
430 430 btext = attr.ib()
431 431 textlen = attr.ib()
432 432 cachedelta = attr.ib()
433 433 flags = attr.ib()
434 434
435 435 # index v0:
436 436 # 4 bytes: offset
437 437 # 4 bytes: compressed length
438 438 # 4 bytes: base rev
439 439 # 4 bytes: link rev
440 440 # 20 bytes: parent 1 nodeid
441 441 # 20 bytes: parent 2 nodeid
442 442 # 20 bytes: nodeid
443 443 indexformatv0 = struct.Struct(">4l20s20s20s")
444 444 indexformatv0_pack = indexformatv0.pack
445 445 indexformatv0_unpack = indexformatv0.unpack
446 446
447 447 class revlogoldio(object):
448 448 def __init__(self):
449 449 self.size = indexformatv0.size
450 450
451 451 def parseindex(self, data, inline):
452 452 s = self.size
453 453 index = []
454 454 nodemap = {nullid: nullrev}
455 455 n = off = 0
456 456 l = len(data)
457 457 while off + s <= l:
458 458 cur = data[off:off + s]
459 459 off += s
460 460 e = indexformatv0_unpack(cur)
461 461 # transform to revlogv1 format
462 462 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
463 463 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
464 464 index.append(e2)
465 465 nodemap[e[6]] = n
466 466 n += 1
467 467
468 468 # add the magic null revision at -1
469 469 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
470 470
471 471 return index, nodemap, None
472 472
473 473 def packentry(self, entry, node, version, rev):
474 474 if gettype(entry[0]):
475 475 raise RevlogError(_('index entry flags need revlog version 1'))
476 476 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
477 477 node(entry[5]), node(entry[6]), entry[7])
478 478 return indexformatv0_pack(*e2)
479 479
480 480 # index ng:
481 481 # 6 bytes: offset
482 482 # 2 bytes: flags
483 483 # 4 bytes: compressed length
484 484 # 4 bytes: uncompressed length
485 485 # 4 bytes: base rev
486 486 # 4 bytes: link rev
487 487 # 4 bytes: parent 1 rev
488 488 # 4 bytes: parent 2 rev
489 489 # 32 bytes: nodeid
490 490 indexformatng = struct.Struct(">Qiiiiii20s12x")
491 491 indexformatng_pack = indexformatng.pack
492 492 versionformat = struct.Struct(">I")
493 493 versionformat_pack = versionformat.pack
494 494 versionformat_unpack = versionformat.unpack
495 495
496 496 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
497 497 # signed integer)
498 498 _maxentrysize = 0x7fffffff
499 499
500 500 class revlogio(object):
501 501 def __init__(self):
502 502 self.size = indexformatng.size
503 503
504 504 def parseindex(self, data, inline):
505 505 # call the C implementation to parse the index data
506 506 index, cache = parsers.parse_index2(data, inline)
507 507 return index, getattr(index, 'nodemap', None), cache
508 508
509 509 def packentry(self, entry, node, version, rev):
510 510 p = indexformatng_pack(*entry)
511 511 if rev == 0:
512 512 p = versionformat_pack(version) + p[4:]
513 513 return p
514 514
515 515 class revlog(object):
516 516 """
517 517 the underlying revision storage object
518 518
519 519 A revlog consists of two parts, an index and the revision data.
520 520
521 521 The index is a file with a fixed record size containing
522 522 information on each revision, including its nodeid (hash), the
523 523 nodeids of its parents, the position and offset of its data within
524 524 the data file, and the revision it's based on. Finally, each entry
525 525 contains a linkrev entry that can serve as a pointer to external
526 526 data.
527 527
528 528 The revision data itself is a linear collection of data chunks.
529 529 Each chunk represents a revision and is usually represented as a
530 530 delta against the previous chunk. To bound lookup time, runs of
531 531 deltas are limited to about 2 times the length of the original
532 532 version data. This makes retrieval of a version proportional to
533 533 its size, or O(1) relative to the number of revisions.
534 534
535 535 Both pieces of the revlog are written to in an append-only
536 536 fashion, which means we never need to rewrite a file to insert or
537 537 remove data, and can use some simple techniques to avoid the need
538 538 for locking while reading.
539 539
540 540 If checkambig, indexfile is opened with checkambig=True at
541 541 writing, to avoid file stat ambiguity.
542 542
543 543 If mmaplargeindex is True, and an mmapindexthreshold is set, the
544 544 index will be mmapped rather than read if it is larger than the
545 545 configured threshold.
546 546 """
547 547 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
548 548 mmaplargeindex=False):
549 549 """
550 550 create a revlog object
551 551
552 552 opener is a function that abstracts the file opening operation
553 553 and can be used to implement COW semantics or the like.
554 554 """
555 555 self.indexfile = indexfile
556 556 self.datafile = datafile or (indexfile[:-2] + ".d")
557 557 self.opener = opener
558 558 # When True, indexfile is opened with checkambig=True at writing, to
559 559 # avoid file stat ambiguity.
560 560 self._checkambig = checkambig
561 561 # 3-tuple of (node, rev, text) for a raw revision.
562 562 self._cache = None
563 563 # Maps rev to chain base rev.
564 564 self._chainbasecache = util.lrucachedict(100)
565 565 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
566 566 self._chunkcache = (0, '')
567 567 # How much data to read and cache into the raw revlog data cache.
568 568 self._chunkcachesize = 65536
569 569 self._maxchainlen = None
570 570 self._aggressivemergedeltas = False
571 571 self.index = []
572 572 # Mapping of partial identifiers to full nodes.
573 573 self._pcache = {}
574 574 # Mapping of revision integer to full node.
575 575 self._nodecache = {nullid: nullrev}
576 576 self._nodepos = None
577 577 self._compengine = 'zlib'
578 578 self._maxdeltachainspan = -1
579 579 self._withsparseread = False
580 580 self._srdensitythreshold = 0.25
581 581 self._srmingapsize = 262144
582 582
583 583 mmapindexthreshold = None
584 584 v = REVLOG_DEFAULT_VERSION
585 585 opts = getattr(opener, 'options', None)
586 586 if opts is not None:
587 587 if 'revlogv2' in opts:
588 588 # version 2 revlogs always use generaldelta.
589 589 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
590 590 elif 'revlogv1' in opts:
591 591 if 'generaldelta' in opts:
592 592 v |= FLAG_GENERALDELTA
593 593 else:
594 594 v = 0
595 595 if 'chunkcachesize' in opts:
596 596 self._chunkcachesize = opts['chunkcachesize']
597 597 if 'maxchainlen' in opts:
598 598 self._maxchainlen = opts['maxchainlen']
599 599 if 'aggressivemergedeltas' in opts:
600 600 self._aggressivemergedeltas = opts['aggressivemergedeltas']
601 601 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
602 602 if 'compengine' in opts:
603 603 self._compengine = opts['compengine']
604 604 if 'maxdeltachainspan' in opts:
605 605 self._maxdeltachainspan = opts['maxdeltachainspan']
606 606 if mmaplargeindex and 'mmapindexthreshold' in opts:
607 607 mmapindexthreshold = opts['mmapindexthreshold']
608 608 self._withsparseread = bool(opts.get('with-sparse-read', False))
609 609 if 'sparse-read-density-threshold' in opts:
610 610 self._srdensitythreshold = opts['sparse-read-density-threshold']
611 611 if 'sparse-read-min-gap-size' in opts:
612 612 self._srmingapsize = opts['sparse-read-min-gap-size']
613 613
614 614 if self._chunkcachesize <= 0:
615 615 raise RevlogError(_('revlog chunk cache size %r is not greater '
616 616 'than 0') % self._chunkcachesize)
617 617 elif self._chunkcachesize & (self._chunkcachesize - 1):
618 618 raise RevlogError(_('revlog chunk cache size %r is not a power '
619 619 'of 2') % self._chunkcachesize)
620 620
621 621 indexdata = ''
622 622 self._initempty = True
623 623 try:
624 f = self.opener(self.indexfile)
624 f = self._indexfp()
625 625 if (mmapindexthreshold is not None and
626 626 self.opener.fstat(f).st_size >= mmapindexthreshold):
627 627 indexdata = util.buffer(util.mmapread(f))
628 628 else:
629 629 indexdata = f.read()
630 630 f.close()
631 631 if len(indexdata) > 0:
632 632 v = versionformat_unpack(indexdata[:4])[0]
633 633 self._initempty = False
634 634 except IOError as inst:
635 635 if inst.errno != errno.ENOENT:
636 636 raise
637 637
638 638 self.version = v
639 639 self._inline = v & FLAG_INLINE_DATA
640 640 self._generaldelta = v & FLAG_GENERALDELTA
641 641 flags = v & ~0xFFFF
642 642 fmt = v & 0xFFFF
643 643 if fmt == REVLOGV0:
644 644 if flags:
645 645 raise RevlogError(_('unknown flags (%#04x) in version %d '
646 646 'revlog %s') %
647 647 (flags >> 16, fmt, self.indexfile))
648 648 elif fmt == REVLOGV1:
649 649 if flags & ~REVLOGV1_FLAGS:
650 650 raise RevlogError(_('unknown flags (%#04x) in version %d '
651 651 'revlog %s') %
652 652 (flags >> 16, fmt, self.indexfile))
653 653 elif fmt == REVLOGV2:
654 654 if flags & ~REVLOGV2_FLAGS:
655 655 raise RevlogError(_('unknown flags (%#04x) in version %d '
656 656 'revlog %s') %
657 657 (flags >> 16, fmt, self.indexfile))
658 658 else:
659 659 raise RevlogError(_('unknown version (%d) in revlog %s') %
660 660 (fmt, self.indexfile))
661 661
662 662 self.storedeltachains = True
663 663
664 664 self._io = revlogio()
665 665 if self.version == REVLOGV0:
666 666 self._io = revlogoldio()
667 667 try:
668 668 d = self._io.parseindex(indexdata, self._inline)
669 669 except (ValueError, IndexError):
670 670 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
671 671 self.index, nodemap, self._chunkcache = d
672 672 if nodemap is not None:
673 673 self.nodemap = self._nodecache = nodemap
674 674 if not self._chunkcache:
675 675 self._chunkclear()
676 676 # revnum -> (chain-length, sum-delta-length)
677 677 self._chaininfocache = {}
678 678 # revlog header -> revlog compressor
679 679 self._decompressors = {}
680 680
681 681 @util.propertycache
682 682 def _compressor(self):
683 683 return util.compengines[self._compengine].revlogcompressor()
684 684
685 def _indexfp(self, mode='r'):
686 """file object for the revlog's index file"""
687 args = {r'mode': mode}
688 if mode != 'r':
689 args[r'checkambig'] = self._checkambig
690 if mode == 'w':
691 args[r'atomictemp'] = True
692 return self.opener(self.indexfile, **args)
693
685 694 def _datafp(self, mode='r'):
686 695 """file object for the revlog's data file"""
687 696 return self.opener(self.datafile, mode=mode)
688 697
689 698 def tip(self):
690 699 return self.node(len(self.index) - 2)
691 700 def __contains__(self, rev):
692 701 return 0 <= rev < len(self)
693 702 def __len__(self):
694 703 return len(self.index) - 1
695 704 def __iter__(self):
696 705 return iter(xrange(len(self)))
697 706 def revs(self, start=0, stop=None):
698 707 """iterate over all rev in this revlog (from start to stop)"""
699 708 step = 1
700 709 if stop is not None:
701 710 if start > stop:
702 711 step = -1
703 712 stop += step
704 713 else:
705 714 stop = len(self)
706 715 return xrange(start, stop, step)
707 716
708 717 @util.propertycache
709 718 def nodemap(self):
710 719 self.rev(self.node(0))
711 720 return self._nodecache
712 721
713 722 def hasnode(self, node):
714 723 try:
715 724 self.rev(node)
716 725 return True
717 726 except KeyError:
718 727 return False
719 728
720 729 def clearcaches(self):
721 730 self._cache = None
722 731 self._chainbasecache.clear()
723 732 self._chunkcache = (0, '')
724 733 self._pcache = {}
725 734
726 735 try:
727 736 self._nodecache.clearcaches()
728 737 except AttributeError:
729 738 self._nodecache = {nullid: nullrev}
730 739 self._nodepos = None
731 740
732 741 def rev(self, node):
733 742 try:
734 743 return self._nodecache[node]
735 744 except TypeError:
736 745 raise
737 746 except RevlogError:
738 747 # parsers.c radix tree lookup failed
739 748 if node == wdirid:
740 749 raise error.WdirUnsupported
741 750 raise LookupError(node, self.indexfile, _('no node'))
742 751 except KeyError:
743 752 # pure python cache lookup failed
744 753 n = self._nodecache
745 754 i = self.index
746 755 p = self._nodepos
747 756 if p is None:
748 757 p = len(i) - 2
749 758 for r in xrange(p, -1, -1):
750 759 v = i[r][7]
751 760 n[v] = r
752 761 if v == node:
753 762 self._nodepos = r - 1
754 763 return r
755 764 if node == wdirid:
756 765 raise error.WdirUnsupported
757 766 raise LookupError(node, self.indexfile, _('no node'))
758 767
759 768 # Accessors for index entries.
760 769
761 770 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
762 771 # are flags.
763 772 def start(self, rev):
764 773 return int(self.index[rev][0] >> 16)
765 774
766 775 def flags(self, rev):
767 776 return self.index[rev][0] & 0xFFFF
768 777
769 778 def length(self, rev):
770 779 return self.index[rev][1]
771 780
772 781 def rawsize(self, rev):
773 782 """return the length of the uncompressed text for a given revision"""
774 783 l = self.index[rev][2]
775 784 if l >= 0:
776 785 return l
777 786
778 787 t = self.revision(rev, raw=True)
779 788 return len(t)
780 789
781 790 def size(self, rev):
782 791 """length of non-raw text (processed by a "read" flag processor)"""
783 792 # fast path: if no "read" flag processor could change the content,
784 793 # size is rawsize. note: ELLIPSIS is known to not change the content.
785 794 flags = self.flags(rev)
786 795 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
787 796 return self.rawsize(rev)
788 797
789 798 return len(self.revision(rev, raw=False))
790 799
791 800 def chainbase(self, rev):
792 801 base = self._chainbasecache.get(rev)
793 802 if base is not None:
794 803 return base
795 804
796 805 index = self.index
797 806 base = index[rev][3]
798 807 while base != rev:
799 808 rev = base
800 809 base = index[rev][3]
801 810
802 811 self._chainbasecache[rev] = base
803 812 return base
804 813
805 814 def linkrev(self, rev):
806 815 return self.index[rev][4]
807 816
808 817 def parentrevs(self, rev):
809 818 try:
810 819 entry = self.index[rev]
811 820 except IndexError:
812 821 if rev == wdirrev:
813 822 raise error.WdirUnsupported
814 823 raise
815 824
816 825 return entry[5], entry[6]
817 826
818 827 def node(self, rev):
819 828 try:
820 829 return self.index[rev][7]
821 830 except IndexError:
822 831 if rev == wdirrev:
823 832 raise error.WdirUnsupported
824 833 raise
825 834
826 835 # Derived from index values.
827 836
828 837 def end(self, rev):
829 838 return self.start(rev) + self.length(rev)
830 839
831 840 def parents(self, node):
832 841 i = self.index
833 842 d = i[self.rev(node)]
834 843 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
835 844
836 845 def chainlen(self, rev):
837 846 return self._chaininfo(rev)[0]
838 847
839 848 def _chaininfo(self, rev):
840 849 chaininfocache = self._chaininfocache
841 850 if rev in chaininfocache:
842 851 return chaininfocache[rev]
843 852 index = self.index
844 853 generaldelta = self._generaldelta
845 854 iterrev = rev
846 855 e = index[iterrev]
847 856 clen = 0
848 857 compresseddeltalen = 0
849 858 while iterrev != e[3]:
850 859 clen += 1
851 860 compresseddeltalen += e[1]
852 861 if generaldelta:
853 862 iterrev = e[3]
854 863 else:
855 864 iterrev -= 1
856 865 if iterrev in chaininfocache:
857 866 t = chaininfocache[iterrev]
858 867 clen += t[0]
859 868 compresseddeltalen += t[1]
860 869 break
861 870 e = index[iterrev]
862 871 else:
863 872 # Add text length of base since decompressing that also takes
864 873 # work. For cache hits the length is already included.
865 874 compresseddeltalen += e[1]
866 875 r = (clen, compresseddeltalen)
867 876 chaininfocache[rev] = r
868 877 return r
869 878
870 879 def _deltachain(self, rev, stoprev=None):
871 880 """Obtain the delta chain for a revision.
872 881
873 882 ``stoprev`` specifies a revision to stop at. If not specified, we
874 883 stop at the base of the chain.
875 884
876 885 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
877 886 revs in ascending order and ``stopped`` is a bool indicating whether
878 887 ``stoprev`` was hit.
879 888 """
880 889 # Try C implementation.
881 890 try:
882 891 return self.index.deltachain(rev, stoprev, self._generaldelta)
883 892 except AttributeError:
884 893 pass
885 894
886 895 chain = []
887 896
888 897 # Alias to prevent attribute lookup in tight loop.
889 898 index = self.index
890 899 generaldelta = self._generaldelta
891 900
892 901 iterrev = rev
893 902 e = index[iterrev]
894 903 while iterrev != e[3] and iterrev != stoprev:
895 904 chain.append(iterrev)
896 905 if generaldelta:
897 906 iterrev = e[3]
898 907 else:
899 908 iterrev -= 1
900 909 e = index[iterrev]
901 910
902 911 if iterrev == stoprev:
903 912 stopped = True
904 913 else:
905 914 chain.append(iterrev)
906 915 stopped = False
907 916
908 917 chain.reverse()
909 918 return chain, stopped
910 919
911 920 def ancestors(self, revs, stoprev=0, inclusive=False):
912 921 """Generate the ancestors of 'revs' in reverse topological order.
913 922 Does not generate revs lower than stoprev.
914 923
915 924 See the documentation for ancestor.lazyancestors for more details."""
916 925
917 926 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
918 927 inclusive=inclusive)
919 928
920 929 def descendants(self, revs):
921 930 """Generate the descendants of 'revs' in revision order.
922 931
923 932 Yield a sequence of revision numbers starting with a child of
924 933 some rev in revs, i.e., each revision is *not* considered a
925 934 descendant of itself. Results are ordered by revision number (a
926 935 topological sort)."""
927 936 first = min(revs)
928 937 if first == nullrev:
929 938 for i in self:
930 939 yield i
931 940 return
932 941
933 942 seen = set(revs)
934 943 for i in self.revs(start=first + 1):
935 944 for x in self.parentrevs(i):
936 945 if x != nullrev and x in seen:
937 946 seen.add(i)
938 947 yield i
939 948 break
940 949
941 950 def findcommonmissing(self, common=None, heads=None):
942 951 """Return a tuple of the ancestors of common and the ancestors of heads
943 952 that are not ancestors of common. In revset terminology, we return the
944 953 tuple:
945 954
946 955 ::common, (::heads) - (::common)
947 956
948 957 The list is sorted by revision number, meaning it is
949 958 topologically sorted.
950 959
951 960 'heads' and 'common' are both lists of node IDs. If heads is
952 961 not supplied, uses all of the revlog's heads. If common is not
953 962 supplied, uses nullid."""
954 963 if common is None:
955 964 common = [nullid]
956 965 if heads is None:
957 966 heads = self.heads()
958 967
959 968 common = [self.rev(n) for n in common]
960 969 heads = [self.rev(n) for n in heads]
961 970
962 971 # we want the ancestors, but inclusive
963 972 class lazyset(object):
964 973 def __init__(self, lazyvalues):
965 974 self.addedvalues = set()
966 975 self.lazyvalues = lazyvalues
967 976
968 977 def __contains__(self, value):
969 978 return value in self.addedvalues or value in self.lazyvalues
970 979
971 980 def __iter__(self):
972 981 added = self.addedvalues
973 982 for r in added:
974 983 yield r
975 984 for r in self.lazyvalues:
976 985 if not r in added:
977 986 yield r
978 987
979 988 def add(self, value):
980 989 self.addedvalues.add(value)
981 990
982 991 def update(self, values):
983 992 self.addedvalues.update(values)
984 993
985 994 has = lazyset(self.ancestors(common))
986 995 has.add(nullrev)
987 996 has.update(common)
988 997
989 998 # take all ancestors from heads that aren't in has
990 999 missing = set()
991 1000 visit = collections.deque(r for r in heads if r not in has)
992 1001 while visit:
993 1002 r = visit.popleft()
994 1003 if r in missing:
995 1004 continue
996 1005 else:
997 1006 missing.add(r)
998 1007 for p in self.parentrevs(r):
999 1008 if p not in has:
1000 1009 visit.append(p)
1001 1010 missing = list(missing)
1002 1011 missing.sort()
1003 1012 return has, [self.node(miss) for miss in missing]
1004 1013
1005 1014 def incrementalmissingrevs(self, common=None):
1006 1015 """Return an object that can be used to incrementally compute the
1007 1016 revision numbers of the ancestors of arbitrary sets that are not
1008 1017 ancestors of common. This is an ancestor.incrementalmissingancestors
1009 1018 object.
1010 1019
1011 1020 'common' is a list of revision numbers. If common is not supplied, uses
1012 1021 nullrev.
1013 1022 """
1014 1023 if common is None:
1015 1024 common = [nullrev]
1016 1025
1017 1026 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1018 1027
1019 1028 def findmissingrevs(self, common=None, heads=None):
1020 1029 """Return the revision numbers of the ancestors of heads that
1021 1030 are not ancestors of common.
1022 1031
1023 1032 More specifically, return a list of revision numbers corresponding to
1024 1033 nodes N such that every N satisfies the following constraints:
1025 1034
1026 1035 1. N is an ancestor of some node in 'heads'
1027 1036 2. N is not an ancestor of any node in 'common'
1028 1037
1029 1038 The list is sorted by revision number, meaning it is
1030 1039 topologically sorted.
1031 1040
1032 1041 'heads' and 'common' are both lists of revision numbers. If heads is
1033 1042 not supplied, uses all of the revlog's heads. If common is not
1034 1043 supplied, uses nullid."""
1035 1044 if common is None:
1036 1045 common = [nullrev]
1037 1046 if heads is None:
1038 1047 heads = self.headrevs()
1039 1048
1040 1049 inc = self.incrementalmissingrevs(common=common)
1041 1050 return inc.missingancestors(heads)
1042 1051
1043 1052 def findmissing(self, common=None, heads=None):
1044 1053 """Return the ancestors of heads that are not ancestors of common.
1045 1054
1046 1055 More specifically, return a list of nodes N such that every N
1047 1056 satisfies the following constraints:
1048 1057
1049 1058 1. N is an ancestor of some node in 'heads'
1050 1059 2. N is not an ancestor of any node in 'common'
1051 1060
1052 1061 The list is sorted by revision number, meaning it is
1053 1062 topologically sorted.
1054 1063
1055 1064 'heads' and 'common' are both lists of node IDs. If heads is
1056 1065 not supplied, uses all of the revlog's heads. If common is not
1057 1066 supplied, uses nullid."""
1058 1067 if common is None:
1059 1068 common = [nullid]
1060 1069 if heads is None:
1061 1070 heads = self.heads()
1062 1071
1063 1072 common = [self.rev(n) for n in common]
1064 1073 heads = [self.rev(n) for n in heads]
1065 1074
1066 1075 inc = self.incrementalmissingrevs(common=common)
1067 1076 return [self.node(r) for r in inc.missingancestors(heads)]
1068 1077
1069 1078 def nodesbetween(self, roots=None, heads=None):
1070 1079 """Return a topological path from 'roots' to 'heads'.
1071 1080
1072 1081 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1073 1082 topologically sorted list of all nodes N that satisfy both of
1074 1083 these constraints:
1075 1084
1076 1085 1. N is a descendant of some node in 'roots'
1077 1086 2. N is an ancestor of some node in 'heads'
1078 1087
1079 1088 Every node is considered to be both a descendant and an ancestor
1080 1089 of itself, so every reachable node in 'roots' and 'heads' will be
1081 1090 included in 'nodes'.
1082 1091
1083 1092 'outroots' is the list of reachable nodes in 'roots', i.e., the
1084 1093 subset of 'roots' that is returned in 'nodes'. Likewise,
1085 1094 'outheads' is the subset of 'heads' that is also in 'nodes'.
1086 1095
1087 1096 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1088 1097 unspecified, uses nullid as the only root. If 'heads' is
1089 1098 unspecified, uses list of all of the revlog's heads."""
1090 1099 nonodes = ([], [], [])
1091 1100 if roots is not None:
1092 1101 roots = list(roots)
1093 1102 if not roots:
1094 1103 return nonodes
1095 1104 lowestrev = min([self.rev(n) for n in roots])
1096 1105 else:
1097 1106 roots = [nullid] # Everybody's a descendant of nullid
1098 1107 lowestrev = nullrev
1099 1108 if (lowestrev == nullrev) and (heads is None):
1100 1109 # We want _all_ the nodes!
1101 1110 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1102 1111 if heads is None:
1103 1112 # All nodes are ancestors, so the latest ancestor is the last
1104 1113 # node.
1105 1114 highestrev = len(self) - 1
1106 1115 # Set ancestors to None to signal that every node is an ancestor.
1107 1116 ancestors = None
1108 1117 # Set heads to an empty dictionary for later discovery of heads
1109 1118 heads = {}
1110 1119 else:
1111 1120 heads = list(heads)
1112 1121 if not heads:
1113 1122 return nonodes
1114 1123 ancestors = set()
1115 1124 # Turn heads into a dictionary so we can remove 'fake' heads.
1116 1125 # Also, later we will be using it to filter out the heads we can't
1117 1126 # find from roots.
1118 1127 heads = dict.fromkeys(heads, False)
1119 1128 # Start at the top and keep marking parents until we're done.
1120 1129 nodestotag = set(heads)
1121 1130 # Remember where the top was so we can use it as a limit later.
1122 1131 highestrev = max([self.rev(n) for n in nodestotag])
1123 1132 while nodestotag:
1124 1133 # grab a node to tag
1125 1134 n = nodestotag.pop()
1126 1135 # Never tag nullid
1127 1136 if n == nullid:
1128 1137 continue
1129 1138 # A node's revision number represents its place in a
1130 1139 # topologically sorted list of nodes.
1131 1140 r = self.rev(n)
1132 1141 if r >= lowestrev:
1133 1142 if n not in ancestors:
1134 1143 # If we are possibly a descendant of one of the roots
1135 1144 # and we haven't already been marked as an ancestor
1136 1145 ancestors.add(n) # Mark as ancestor
1137 1146 # Add non-nullid parents to list of nodes to tag.
1138 1147 nodestotag.update([p for p in self.parents(n) if
1139 1148 p != nullid])
1140 1149 elif n in heads: # We've seen it before, is it a fake head?
1141 1150 # So it is, real heads should not be the ancestors of
1142 1151 # any other heads.
1143 1152 heads.pop(n)
1144 1153 if not ancestors:
1145 1154 return nonodes
1146 1155 # Now that we have our set of ancestors, we want to remove any
1147 1156 # roots that are not ancestors.
1148 1157
1149 1158 # If one of the roots was nullid, everything is included anyway.
1150 1159 if lowestrev > nullrev:
1151 1160 # But, since we weren't, let's recompute the lowest rev to not
1152 1161 # include roots that aren't ancestors.
1153 1162
1154 1163 # Filter out roots that aren't ancestors of heads
1155 1164 roots = [root for root in roots if root in ancestors]
1156 1165 # Recompute the lowest revision
1157 1166 if roots:
1158 1167 lowestrev = min([self.rev(root) for root in roots])
1159 1168 else:
1160 1169 # No more roots? Return empty list
1161 1170 return nonodes
1162 1171 else:
1163 1172 # We are descending from nullid, and don't need to care about
1164 1173 # any other roots.
1165 1174 lowestrev = nullrev
1166 1175 roots = [nullid]
1167 1176 # Transform our roots list into a set.
1168 1177 descendants = set(roots)
1169 1178 # Also, keep the original roots so we can filter out roots that aren't
1170 1179 # 'real' roots (i.e. are descended from other roots).
1171 1180 roots = descendants.copy()
1172 1181 # Our topologically sorted list of output nodes.
1173 1182 orderedout = []
1174 1183 # Don't start at nullid since we don't want nullid in our output list,
1175 1184 # and if nullid shows up in descendants, empty parents will look like
1176 1185 # they're descendants.
1177 1186 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1178 1187 n = self.node(r)
1179 1188 isdescendant = False
1180 1189 if lowestrev == nullrev: # Everybody is a descendant of nullid
1181 1190 isdescendant = True
1182 1191 elif n in descendants:
1183 1192 # n is already a descendant
1184 1193 isdescendant = True
1185 1194 # This check only needs to be done here because all the roots
1186 1195 # will start being marked is descendants before the loop.
1187 1196 if n in roots:
1188 1197 # If n was a root, check if it's a 'real' root.
1189 1198 p = tuple(self.parents(n))
1190 1199 # If any of its parents are descendants, it's not a root.
1191 1200 if (p[0] in descendants) or (p[1] in descendants):
1192 1201 roots.remove(n)
1193 1202 else:
1194 1203 p = tuple(self.parents(n))
1195 1204 # A node is a descendant if either of its parents are
1196 1205 # descendants. (We seeded the dependents list with the roots
1197 1206 # up there, remember?)
1198 1207 if (p[0] in descendants) or (p[1] in descendants):
1199 1208 descendants.add(n)
1200 1209 isdescendant = True
1201 1210 if isdescendant and ((ancestors is None) or (n in ancestors)):
1202 1211 # Only include nodes that are both descendants and ancestors.
1203 1212 orderedout.append(n)
1204 1213 if (ancestors is not None) and (n in heads):
1205 1214 # We're trying to figure out which heads are reachable
1206 1215 # from roots.
1207 1216 # Mark this head as having been reached
1208 1217 heads[n] = True
1209 1218 elif ancestors is None:
1210 1219 # Otherwise, we're trying to discover the heads.
1211 1220 # Assume this is a head because if it isn't, the next step
1212 1221 # will eventually remove it.
1213 1222 heads[n] = True
1214 1223 # But, obviously its parents aren't.
1215 1224 for p in self.parents(n):
1216 1225 heads.pop(p, None)
1217 1226 heads = [head for head, flag in heads.iteritems() if flag]
1218 1227 roots = list(roots)
1219 1228 assert orderedout
1220 1229 assert roots
1221 1230 assert heads
1222 1231 return (orderedout, roots, heads)
1223 1232
1224 1233 def headrevs(self):
1225 1234 try:
1226 1235 return self.index.headrevs()
1227 1236 except AttributeError:
1228 1237 return self._headrevs()
1229 1238
1230 1239 def computephases(self, roots):
1231 1240 return self.index.computephasesmapsets(roots)
1232 1241
1233 1242 def _headrevs(self):
1234 1243 count = len(self)
1235 1244 if not count:
1236 1245 return [nullrev]
1237 1246 # we won't iter over filtered rev so nobody is a head at start
1238 1247 ishead = [0] * (count + 1)
1239 1248 index = self.index
1240 1249 for r in self:
1241 1250 ishead[r] = 1 # I may be an head
1242 1251 e = index[r]
1243 1252 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1244 1253 return [r for r, val in enumerate(ishead) if val]
1245 1254
1246 1255 def heads(self, start=None, stop=None):
1247 1256 """return the list of all nodes that have no children
1248 1257
1249 1258 if start is specified, only heads that are descendants of
1250 1259 start will be returned
1251 1260 if stop is specified, it will consider all the revs from stop
1252 1261 as if they had no children
1253 1262 """
1254 1263 if start is None and stop is None:
1255 1264 if not len(self):
1256 1265 return [nullid]
1257 1266 return [self.node(r) for r in self.headrevs()]
1258 1267
1259 1268 if start is None:
1260 1269 start = nullid
1261 1270 if stop is None:
1262 1271 stop = []
1263 1272 stoprevs = set([self.rev(n) for n in stop])
1264 1273 startrev = self.rev(start)
1265 1274 reachable = {startrev}
1266 1275 heads = {startrev}
1267 1276
1268 1277 parentrevs = self.parentrevs
1269 1278 for r in self.revs(start=startrev + 1):
1270 1279 for p in parentrevs(r):
1271 1280 if p in reachable:
1272 1281 if r not in stoprevs:
1273 1282 reachable.add(r)
1274 1283 heads.add(r)
1275 1284 if p in heads and p not in stoprevs:
1276 1285 heads.remove(p)
1277 1286
1278 1287 return [self.node(r) for r in heads]
1279 1288
1280 1289 def children(self, node):
1281 1290 """find the children of a given node"""
1282 1291 c = []
1283 1292 p = self.rev(node)
1284 1293 for r in self.revs(start=p + 1):
1285 1294 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1286 1295 if prevs:
1287 1296 for pr in prevs:
1288 1297 if pr == p:
1289 1298 c.append(self.node(r))
1290 1299 elif p == nullrev:
1291 1300 c.append(self.node(r))
1292 1301 return c
1293 1302
1294 1303 def descendant(self, start, end):
1295 1304 if start == nullrev:
1296 1305 return True
1297 1306 for i in self.descendants([start]):
1298 1307 if i == end:
1299 1308 return True
1300 1309 elif i > end:
1301 1310 break
1302 1311 return False
1303 1312
1304 1313 def commonancestorsheads(self, a, b):
1305 1314 """calculate all the heads of the common ancestors of nodes a and b"""
1306 1315 a, b = self.rev(a), self.rev(b)
1307 1316 try:
1308 1317 ancs = self.index.commonancestorsheads(a, b)
1309 1318 except (AttributeError, OverflowError): # C implementation failed
1310 1319 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1311 1320 return pycompat.maplist(self.node, ancs)
1312 1321
1313 1322 def isancestor(self, a, b):
1314 1323 """return True if node a is an ancestor of node b
1315 1324
1316 1325 The implementation of this is trivial but the use of
1317 1326 commonancestorsheads is not."""
1318 1327 return a in self.commonancestorsheads(a, b)
1319 1328
1320 1329 def ancestor(self, a, b):
1321 1330 """calculate the "best" common ancestor of nodes a and b"""
1322 1331
1323 1332 a, b = self.rev(a), self.rev(b)
1324 1333 try:
1325 1334 ancs = self.index.ancestors(a, b)
1326 1335 except (AttributeError, OverflowError):
1327 1336 ancs = ancestor.ancestors(self.parentrevs, a, b)
1328 1337 if ancs:
1329 1338 # choose a consistent winner when there's a tie
1330 1339 return min(map(self.node, ancs))
1331 1340 return nullid
1332 1341
1333 1342 def _match(self, id):
1334 1343 if isinstance(id, int):
1335 1344 # rev
1336 1345 return self.node(id)
1337 1346 if len(id) == 20:
1338 1347 # possibly a binary node
1339 1348 # odds of a binary node being all hex in ASCII are 1 in 10**25
1340 1349 try:
1341 1350 node = id
1342 1351 self.rev(node) # quick search the index
1343 1352 return node
1344 1353 except LookupError:
1345 1354 pass # may be partial hex id
1346 1355 try:
1347 1356 # str(rev)
1348 1357 rev = int(id)
1349 1358 if str(rev) != id:
1350 1359 raise ValueError
1351 1360 if rev < 0:
1352 1361 rev = len(self) + rev
1353 1362 if rev < 0 or rev >= len(self):
1354 1363 raise ValueError
1355 1364 return self.node(rev)
1356 1365 except (ValueError, OverflowError):
1357 1366 pass
1358 1367 if len(id) == 40:
1359 1368 try:
1360 1369 # a full hex nodeid?
1361 1370 node = bin(id)
1362 1371 self.rev(node)
1363 1372 return node
1364 1373 except (TypeError, LookupError):
1365 1374 pass
1366 1375
1367 1376 def _partialmatch(self, id):
1368 1377 maybewdir = wdirhex.startswith(id)
1369 1378 try:
1370 1379 partial = self.index.partialmatch(id)
1371 1380 if partial and self.hasnode(partial):
1372 1381 if maybewdir:
1373 1382 # single 'ff...' match in radix tree, ambiguous with wdir
1374 1383 raise RevlogError
1375 1384 return partial
1376 1385 if maybewdir:
1377 1386 # no 'ff...' match in radix tree, wdir identified
1378 1387 raise error.WdirUnsupported
1379 1388 return None
1380 1389 except RevlogError:
1381 1390 # parsers.c radix tree lookup gave multiple matches
1382 1391 # fast path: for unfiltered changelog, radix tree is accurate
1383 1392 if not getattr(self, 'filteredrevs', None):
1384 1393 raise LookupError(id, self.indexfile,
1385 1394 _('ambiguous identifier'))
1386 1395 # fall through to slow path that filters hidden revisions
1387 1396 except (AttributeError, ValueError):
1388 1397 # we are pure python, or key was too short to search radix tree
1389 1398 pass
1390 1399
1391 1400 if id in self._pcache:
1392 1401 return self._pcache[id]
1393 1402
1394 1403 if len(id) < 40:
1395 1404 try:
1396 1405 # hex(node)[:...]
1397 1406 l = len(id) // 2 # grab an even number of digits
1398 1407 prefix = bin(id[:l * 2])
1399 1408 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1400 1409 nl = [n for n in nl if hex(n).startswith(id) and
1401 1410 self.hasnode(n)]
1402 1411 if len(nl) > 0:
1403 1412 if len(nl) == 1 and not maybewdir:
1404 1413 self._pcache[id] = nl[0]
1405 1414 return nl[0]
1406 1415 raise LookupError(id, self.indexfile,
1407 1416 _('ambiguous identifier'))
1408 1417 if maybewdir:
1409 1418 raise error.WdirUnsupported
1410 1419 return None
1411 1420 except (TypeError, binascii.Error):
1412 1421 pass
1413 1422
1414 1423 def lookup(self, id):
1415 1424 """locate a node based on:
1416 1425 - revision number or str(revision number)
1417 1426 - nodeid or subset of hex nodeid
1418 1427 """
1419 1428 n = self._match(id)
1420 1429 if n is not None:
1421 1430 return n
1422 1431 n = self._partialmatch(id)
1423 1432 if n:
1424 1433 return n
1425 1434
1426 1435 raise LookupError(id, self.indexfile, _('no match found'))
1427 1436
1428 1437 def shortest(self, hexnode, minlength=1):
1429 1438 """Find the shortest unambiguous prefix that matches hexnode."""
1430 1439 def isvalid(test):
1431 1440 try:
1432 1441 if self._partialmatch(test) is None:
1433 1442 return False
1434 1443
1435 1444 try:
1436 1445 i = int(test)
1437 1446 # if we are a pure int, then starting with zero will not be
1438 1447 # confused as a rev; or, obviously, if the int is larger
1439 1448 # than the value of the tip rev
1440 1449 if test[0] == '0' or i > len(self):
1441 1450 return True
1442 1451 return False
1443 1452 except ValueError:
1444 1453 return True
1445 1454 except error.RevlogError:
1446 1455 return False
1447 1456 except error.WdirUnsupported:
1448 1457 # single 'ff...' match
1449 1458 return True
1450 1459
1451 1460 shortest = hexnode
1452 1461 startlength = max(6, minlength)
1453 1462 length = startlength
1454 1463 while True:
1455 1464 test = hexnode[:length]
1456 1465 if isvalid(test):
1457 1466 shortest = test
1458 1467 if length == minlength or length > startlength:
1459 1468 return shortest
1460 1469 length -= 1
1461 1470 else:
1462 1471 length += 1
1463 1472 if len(shortest) <= length:
1464 1473 return shortest
1465 1474
1466 1475 def cmp(self, node, text):
1467 1476 """compare text with a given file revision
1468 1477
1469 1478 returns True if text is different than what is stored.
1470 1479 """
1471 1480 p1, p2 = self.parents(node)
1472 1481 return hash(text, p1, p2) != node
1473 1482
1474 1483 def _cachesegment(self, offset, data):
1475 1484 """Add a segment to the revlog cache.
1476 1485
1477 1486 Accepts an absolute offset and the data that is at that location.
1478 1487 """
1479 1488 o, d = self._chunkcache
1480 1489 # try to add to existing cache
1481 1490 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1482 1491 self._chunkcache = o, d + data
1483 1492 else:
1484 1493 self._chunkcache = offset, data
1485 1494
1486 1495 def _readsegment(self, offset, length, df=None):
1487 1496 """Load a segment of raw data from the revlog.
1488 1497
1489 1498 Accepts an absolute offset, length to read, and an optional existing
1490 1499 file handle to read from.
1491 1500
1492 1501 If an existing file handle is passed, it will be seeked and the
1493 1502 original seek position will NOT be restored.
1494 1503
1495 1504 Returns a str or buffer of raw byte data.
1496 1505 """
1497 1506 if df is not None:
1498 1507 closehandle = False
1499 1508 else:
1500 1509 if self._inline:
1501 df = self.opener(self.indexfile)
1510 df = self._indexfp()
1502 1511 else:
1503 1512 df = self._datafp()
1504 1513 closehandle = True
1505 1514
1506 1515 # Cache data both forward and backward around the requested
1507 1516 # data, in a fixed size window. This helps speed up operations
1508 1517 # involving reading the revlog backwards.
1509 1518 cachesize = self._chunkcachesize
1510 1519 realoffset = offset & ~(cachesize - 1)
1511 1520 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1512 1521 - realoffset)
1513 1522 df.seek(realoffset)
1514 1523 d = df.read(reallength)
1515 1524 if closehandle:
1516 1525 df.close()
1517 1526 self._cachesegment(realoffset, d)
1518 1527 if offset != realoffset or reallength != length:
1519 1528 return util.buffer(d, offset - realoffset, length)
1520 1529 return d
1521 1530
1522 1531 def _getsegment(self, offset, length, df=None):
1523 1532 """Obtain a segment of raw data from the revlog.
1524 1533
1525 1534 Accepts an absolute offset, length of bytes to obtain, and an
1526 1535 optional file handle to the already-opened revlog. If the file
1527 1536 handle is used, it's original seek position will not be preserved.
1528 1537
1529 1538 Requests for data may be returned from a cache.
1530 1539
1531 1540 Returns a str or a buffer instance of raw byte data.
1532 1541 """
1533 1542 o, d = self._chunkcache
1534 1543 l = len(d)
1535 1544
1536 1545 # is it in the cache?
1537 1546 cachestart = offset - o
1538 1547 cacheend = cachestart + length
1539 1548 if cachestart >= 0 and cacheend <= l:
1540 1549 if cachestart == 0 and cacheend == l:
1541 1550 return d # avoid a copy
1542 1551 return util.buffer(d, cachestart, cacheend - cachestart)
1543 1552
1544 1553 return self._readsegment(offset, length, df=df)
1545 1554
1546 1555 def _getsegmentforrevs(self, startrev, endrev, df=None):
1547 1556 """Obtain a segment of raw data corresponding to a range of revisions.
1548 1557
1549 1558 Accepts the start and end revisions and an optional already-open
1550 1559 file handle to be used for reading. If the file handle is read, its
1551 1560 seek position will not be preserved.
1552 1561
1553 1562 Requests for data may be satisfied by a cache.
1554 1563
1555 1564 Returns a 2-tuple of (offset, data) for the requested range of
1556 1565 revisions. Offset is the integer offset from the beginning of the
1557 1566 revlog and data is a str or buffer of the raw byte data.
1558 1567
1559 1568 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1560 1569 to determine where each revision's data begins and ends.
1561 1570 """
1562 1571 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1563 1572 # (functions are expensive).
1564 1573 index = self.index
1565 1574 istart = index[startrev]
1566 1575 start = int(istart[0] >> 16)
1567 1576 if startrev == endrev:
1568 1577 end = start + istart[1]
1569 1578 else:
1570 1579 iend = index[endrev]
1571 1580 end = int(iend[0] >> 16) + iend[1]
1572 1581
1573 1582 if self._inline:
1574 1583 start += (startrev + 1) * self._io.size
1575 1584 end += (endrev + 1) * self._io.size
1576 1585 length = end - start
1577 1586
1578 1587 return start, self._getsegment(start, length, df=df)
1579 1588
1580 1589 def _chunk(self, rev, df=None):
1581 1590 """Obtain a single decompressed chunk for a revision.
1582 1591
1583 1592 Accepts an integer revision and an optional already-open file handle
1584 1593 to be used for reading. If used, the seek position of the file will not
1585 1594 be preserved.
1586 1595
1587 1596 Returns a str holding uncompressed data for the requested revision.
1588 1597 """
1589 1598 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1590 1599
1591 1600 def _chunks(self, revs, df=None):
1592 1601 """Obtain decompressed chunks for the specified revisions.
1593 1602
1594 1603 Accepts an iterable of numeric revisions that are assumed to be in
1595 1604 ascending order. Also accepts an optional already-open file handle
1596 1605 to be used for reading. If used, the seek position of the file will
1597 1606 not be preserved.
1598 1607
1599 1608 This function is similar to calling ``self._chunk()`` multiple times,
1600 1609 but is faster.
1601 1610
1602 1611 Returns a list with decompressed data for each requested revision.
1603 1612 """
1604 1613 if not revs:
1605 1614 return []
1606 1615 start = self.start
1607 1616 length = self.length
1608 1617 inline = self._inline
1609 1618 iosize = self._io.size
1610 1619 buffer = util.buffer
1611 1620
1612 1621 l = []
1613 1622 ladd = l.append
1614 1623
1615 1624 if not self._withsparseread:
1616 1625 slicedchunks = (revs,)
1617 1626 else:
1618 1627 slicedchunks = _slicechunk(self, revs)
1619 1628
1620 1629 for revschunk in slicedchunks:
1621 1630 firstrev = revschunk[0]
1622 1631 # Skip trailing revisions with empty diff
1623 1632 for lastrev in revschunk[::-1]:
1624 1633 if length(lastrev) != 0:
1625 1634 break
1626 1635
1627 1636 try:
1628 1637 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1629 1638 except OverflowError:
1630 1639 # issue4215 - we can't cache a run of chunks greater than
1631 1640 # 2G on Windows
1632 1641 return [self._chunk(rev, df=df) for rev in revschunk]
1633 1642
1634 1643 decomp = self.decompress
1635 1644 for rev in revschunk:
1636 1645 chunkstart = start(rev)
1637 1646 if inline:
1638 1647 chunkstart += (rev + 1) * iosize
1639 1648 chunklength = length(rev)
1640 1649 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1641 1650
1642 1651 return l
1643 1652
1644 1653 def _chunkclear(self):
1645 1654 """Clear the raw chunk cache."""
1646 1655 self._chunkcache = (0, '')
1647 1656
1648 1657 def deltaparent(self, rev):
1649 1658 """return deltaparent of the given revision"""
1650 1659 base = self.index[rev][3]
1651 1660 if base == rev:
1652 1661 return nullrev
1653 1662 elif self._generaldelta:
1654 1663 return base
1655 1664 else:
1656 1665 return rev - 1
1657 1666
1658 1667 def revdiff(self, rev1, rev2):
1659 1668 """return or calculate a delta between two revisions
1660 1669
1661 1670 The delta calculated is in binary form and is intended to be written to
1662 1671 revlog data directly. So this function needs raw revision data.
1663 1672 """
1664 1673 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1665 1674 return bytes(self._chunk(rev2))
1666 1675
1667 1676 return mdiff.textdiff(self.revision(rev1, raw=True),
1668 1677 self.revision(rev2, raw=True))
1669 1678
1670 1679 def revision(self, nodeorrev, _df=None, raw=False):
1671 1680 """return an uncompressed revision of a given node or revision
1672 1681 number.
1673 1682
1674 1683 _df - an existing file handle to read from. (internal-only)
1675 1684 raw - an optional argument specifying if the revision data is to be
1676 1685 treated as raw data when applying flag transforms. 'raw' should be set
1677 1686 to True when generating changegroups or in debug commands.
1678 1687 """
1679 1688 if isinstance(nodeorrev, int):
1680 1689 rev = nodeorrev
1681 1690 node = self.node(rev)
1682 1691 else:
1683 1692 node = nodeorrev
1684 1693 rev = None
1685 1694
1686 1695 cachedrev = None
1687 1696 flags = None
1688 1697 rawtext = None
1689 1698 if node == nullid:
1690 1699 return ""
1691 1700 if self._cache:
1692 1701 if self._cache[0] == node:
1693 1702 # _cache only stores rawtext
1694 1703 if raw:
1695 1704 return self._cache[2]
1696 1705 # duplicated, but good for perf
1697 1706 if rev is None:
1698 1707 rev = self.rev(node)
1699 1708 if flags is None:
1700 1709 flags = self.flags(rev)
1701 1710 # no extra flags set, no flag processor runs, text = rawtext
1702 1711 if flags == REVIDX_DEFAULT_FLAGS:
1703 1712 return self._cache[2]
1704 1713 # rawtext is reusable. need to run flag processor
1705 1714 rawtext = self._cache[2]
1706 1715
1707 1716 cachedrev = self._cache[1]
1708 1717
1709 1718 # look up what we need to read
1710 1719 if rawtext is None:
1711 1720 if rev is None:
1712 1721 rev = self.rev(node)
1713 1722
1714 1723 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1715 1724 if stopped:
1716 1725 rawtext = self._cache[2]
1717 1726
1718 1727 # drop cache to save memory
1719 1728 self._cache = None
1720 1729
1721 1730 bins = self._chunks(chain, df=_df)
1722 1731 if rawtext is None:
1723 1732 rawtext = bytes(bins[0])
1724 1733 bins = bins[1:]
1725 1734
1726 1735 rawtext = mdiff.patches(rawtext, bins)
1727 1736 self._cache = (node, rev, rawtext)
1728 1737
1729 1738 if flags is None:
1730 1739 if rev is None:
1731 1740 rev = self.rev(node)
1732 1741 flags = self.flags(rev)
1733 1742
1734 1743 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1735 1744 if validatehash:
1736 1745 self.checkhash(text, node, rev=rev)
1737 1746
1738 1747 return text
1739 1748
1740 1749 def hash(self, text, p1, p2):
1741 1750 """Compute a node hash.
1742 1751
1743 1752 Available as a function so that subclasses can replace the hash
1744 1753 as needed.
1745 1754 """
1746 1755 return hash(text, p1, p2)
1747 1756
1748 1757 def _processflags(self, text, flags, operation, raw=False):
1749 1758 """Inspect revision data flags and applies transforms defined by
1750 1759 registered flag processors.
1751 1760
1752 1761 ``text`` - the revision data to process
1753 1762 ``flags`` - the revision flags
1754 1763 ``operation`` - the operation being performed (read or write)
1755 1764 ``raw`` - an optional argument describing if the raw transform should be
1756 1765 applied.
1757 1766
1758 1767 This method processes the flags in the order (or reverse order if
1759 1768 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1760 1769 flag processors registered for present flags. The order of flags defined
1761 1770 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1762 1771
1763 1772 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1764 1773 processed text and ``validatehash`` is a bool indicating whether the
1765 1774 returned text should be checked for hash integrity.
1766 1775
1767 1776 Note: If the ``raw`` argument is set, it has precedence over the
1768 1777 operation and will only update the value of ``validatehash``.
1769 1778 """
1770 1779 # fast path: no flag processors will run
1771 1780 if flags == 0:
1772 1781 return text, True
1773 1782 if not operation in ('read', 'write'):
1774 1783 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1775 1784 # Check all flags are known.
1776 1785 if flags & ~REVIDX_KNOWN_FLAGS:
1777 1786 raise RevlogError(_("incompatible revision flag '%#x'") %
1778 1787 (flags & ~REVIDX_KNOWN_FLAGS))
1779 1788 validatehash = True
1780 1789 # Depending on the operation (read or write), the order might be
1781 1790 # reversed due to non-commutative transforms.
1782 1791 orderedflags = REVIDX_FLAGS_ORDER
1783 1792 if operation == 'write':
1784 1793 orderedflags = reversed(orderedflags)
1785 1794
1786 1795 for flag in orderedflags:
1787 1796 # If a flagprocessor has been registered for a known flag, apply the
1788 1797 # related operation transform and update result tuple.
1789 1798 if flag & flags:
1790 1799 vhash = True
1791 1800
1792 1801 if flag not in _flagprocessors:
1793 1802 message = _("missing processor for flag '%#x'") % (flag)
1794 1803 raise RevlogError(message)
1795 1804
1796 1805 processor = _flagprocessors[flag]
1797 1806 if processor is not None:
1798 1807 readtransform, writetransform, rawtransform = processor
1799 1808
1800 1809 if raw:
1801 1810 vhash = rawtransform(self, text)
1802 1811 elif operation == 'read':
1803 1812 text, vhash = readtransform(self, text)
1804 1813 else: # write operation
1805 1814 text, vhash = writetransform(self, text)
1806 1815 validatehash = validatehash and vhash
1807 1816
1808 1817 return text, validatehash
1809 1818
1810 1819 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1811 1820 """Check node hash integrity.
1812 1821
1813 1822 Available as a function so that subclasses can extend hash mismatch
1814 1823 behaviors as needed.
1815 1824 """
1816 1825 if p1 is None and p2 is None:
1817 1826 p1, p2 = self.parents(node)
1818 1827 if node != self.hash(text, p1, p2):
1819 1828 revornode = rev
1820 1829 if revornode is None:
1821 1830 revornode = templatefilters.short(hex(node))
1822 1831 raise RevlogError(_("integrity check failed on %s:%s")
1823 1832 % (self.indexfile, pycompat.bytestr(revornode)))
1824 1833
1825 1834 def checkinlinesize(self, tr, fp=None):
1826 1835 """Check if the revlog is too big for inline and convert if so.
1827 1836
1828 1837 This should be called after revisions are added to the revlog. If the
1829 1838 revlog has grown too large to be an inline revlog, it will convert it
1830 1839 to use multiple index and data files.
1831 1840 """
1832 1841 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1833 1842 return
1834 1843
1835 1844 trinfo = tr.find(self.indexfile)
1836 1845 if trinfo is None:
1837 1846 raise RevlogError(_("%s not found in the transaction")
1838 1847 % self.indexfile)
1839 1848
1840 1849 trindex = trinfo[2]
1841 1850 if trindex is not None:
1842 1851 dataoff = self.start(trindex)
1843 1852 else:
1844 1853 # revlog was stripped at start of transaction, use all leftover data
1845 1854 trindex = len(self) - 1
1846 1855 dataoff = self.end(-2)
1847 1856
1848 1857 tr.add(self.datafile, dataoff)
1849 1858
1850 1859 if fp:
1851 1860 fp.flush()
1852 1861 fp.close()
1853 1862
1854 1863 df = self._datafp('w')
1855 1864 try:
1856 1865 for r in self:
1857 1866 df.write(self._getsegmentforrevs(r, r)[1])
1858 1867 finally:
1859 1868 df.close()
1860 1869
1861 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1862 checkambig=self._checkambig)
1870 fp = self._indexfp('w')
1863 1871 self.version &= ~FLAG_INLINE_DATA
1864 1872 self._inline = False
1865 1873 for i in self:
1866 1874 e = self._io.packentry(self.index[i], self.node, self.version, i)
1867 1875 fp.write(e)
1868 1876
1869 1877 # if we don't call close, the temp file will never replace the
1870 1878 # real index
1871 1879 fp.close()
1872 1880
1873 1881 tr.replace(self.indexfile, trindex * self._io.size)
1874 1882 self._chunkclear()
1875 1883
1876 1884 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1877 1885 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1878 1886 """add a revision to the log
1879 1887
1880 1888 text - the revision data to add
1881 1889 transaction - the transaction object used for rollback
1882 1890 link - the linkrev data to add
1883 1891 p1, p2 - the parent nodeids of the revision
1884 1892 cachedelta - an optional precomputed delta
1885 1893 node - nodeid of revision; typically node is not specified, and it is
1886 1894 computed by default as hash(text, p1, p2), however subclasses might
1887 1895 use different hashing method (and override checkhash() in such case)
1888 1896 flags - the known flags to set on the revision
1889 1897 deltacomputer - an optional _deltacomputer instance shared between
1890 1898 multiple calls
1891 1899 """
1892 1900 if link == nullrev:
1893 1901 raise RevlogError(_("attempted to add linkrev -1 to %s")
1894 1902 % self.indexfile)
1895 1903
1896 1904 if flags:
1897 1905 node = node or self.hash(text, p1, p2)
1898 1906
1899 1907 rawtext, validatehash = self._processflags(text, flags, 'write')
1900 1908
1901 1909 # If the flag processor modifies the revision data, ignore any provided
1902 1910 # cachedelta.
1903 1911 if rawtext != text:
1904 1912 cachedelta = None
1905 1913
1906 1914 if len(rawtext) > _maxentrysize:
1907 1915 raise RevlogError(
1908 1916 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1909 1917 % (self.indexfile, len(rawtext)))
1910 1918
1911 1919 node = node or self.hash(rawtext, p1, p2)
1912 1920 if node in self.nodemap:
1913 1921 return node
1914 1922
1915 1923 if validatehash:
1916 1924 self.checkhash(rawtext, node, p1=p1, p2=p2)
1917 1925
1918 1926 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1919 1927 flags, cachedelta=cachedelta,
1920 1928 deltacomputer=deltacomputer)
1921 1929
1922 1930 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1923 1931 cachedelta=None, deltacomputer=None):
1924 1932 """add a raw revision with known flags, node and parents
1925 1933 useful when reusing a revision not stored in this revlog (ex: received
1926 1934 over wire, or read from an external bundle).
1927 1935 """
1928 1936 dfh = None
1929 1937 if not self._inline:
1930 1938 dfh = self._datafp("a+")
1931 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1939 ifh = self._indexfp("a+")
1932 1940 try:
1933 1941 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1934 1942 flags, cachedelta, ifh, dfh,
1935 1943 deltacomputer=deltacomputer)
1936 1944 finally:
1937 1945 if dfh:
1938 1946 dfh.close()
1939 1947 ifh.close()
1940 1948
1941 1949 def compress(self, data):
1942 1950 """Generate a possibly-compressed representation of data."""
1943 1951 if not data:
1944 1952 return '', data
1945 1953
1946 1954 compressed = self._compressor.compress(data)
1947 1955
1948 1956 if compressed:
1949 1957 # The revlog compressor added the header in the returned data.
1950 1958 return '', compressed
1951 1959
1952 1960 if data[0:1] == '\0':
1953 1961 return '', data
1954 1962 return 'u', data
1955 1963
1956 1964 def decompress(self, data):
1957 1965 """Decompress a revlog chunk.
1958 1966
1959 1967 The chunk is expected to begin with a header identifying the
1960 1968 format type so it can be routed to an appropriate decompressor.
1961 1969 """
1962 1970 if not data:
1963 1971 return data
1964 1972
1965 1973 # Revlogs are read much more frequently than they are written and many
1966 1974 # chunks only take microseconds to decompress, so performance is
1967 1975 # important here.
1968 1976 #
1969 1977 # We can make a few assumptions about revlogs:
1970 1978 #
1971 1979 # 1) the majority of chunks will be compressed (as opposed to inline
1972 1980 # raw data).
1973 1981 # 2) decompressing *any* data will likely by at least 10x slower than
1974 1982 # returning raw inline data.
1975 1983 # 3) we want to prioritize common and officially supported compression
1976 1984 # engines
1977 1985 #
1978 1986 # It follows that we want to optimize for "decompress compressed data
1979 1987 # when encoded with common and officially supported compression engines"
1980 1988 # case over "raw data" and "data encoded by less common or non-official
1981 1989 # compression engines." That is why we have the inline lookup first
1982 1990 # followed by the compengines lookup.
1983 1991 #
1984 1992 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1985 1993 # compressed chunks. And this matters for changelog and manifest reads.
1986 1994 t = data[0:1]
1987 1995
1988 1996 if t == 'x':
1989 1997 try:
1990 1998 return _zlibdecompress(data)
1991 1999 except zlib.error as e:
1992 2000 raise RevlogError(_('revlog decompress error: %s') % str(e))
1993 2001 # '\0' is more common than 'u' so it goes first.
1994 2002 elif t == '\0':
1995 2003 return data
1996 2004 elif t == 'u':
1997 2005 return util.buffer(data, 1)
1998 2006
1999 2007 try:
2000 2008 compressor = self._decompressors[t]
2001 2009 except KeyError:
2002 2010 try:
2003 2011 engine = util.compengines.forrevlogheader(t)
2004 2012 compressor = engine.revlogcompressor()
2005 2013 self._decompressors[t] = compressor
2006 2014 except KeyError:
2007 2015 raise RevlogError(_('unknown compression type %r') % t)
2008 2016
2009 2017 return compressor.decompress(data)
2010 2018
2011 2019 def _isgooddeltainfo(self, d, textlen):
2012 2020 """Returns True if the given delta is good. Good means that it is within
2013 2021 the disk span, disk size, and chain length bounds that we know to be
2014 2022 performant."""
2015 2023 if d is None:
2016 2024 return False
2017 2025
2018 2026 # - 'd.distance' is the distance from the base revision -- bounding it
2019 2027 # limits the amount of I/O we need to do.
2020 2028 # - 'd.compresseddeltalen' is the sum of the total size of deltas we
2021 2029 # need to apply -- bounding it limits the amount of CPU we consume.
2022 2030
2023 2031 defaultmax = textlen * 4
2024 2032 maxdist = self._maxdeltachainspan
2025 2033 if not maxdist:
2026 2034 maxdist = d.distance # ensure the conditional pass
2027 2035 maxdist = max(maxdist, defaultmax)
2028 2036 if (d.distance > maxdist or d.deltalen > textlen or
2029 2037 d.compresseddeltalen > textlen * 2 or
2030 2038 (self._maxchainlen and d.chainlen > self._maxchainlen)):
2031 2039 return False
2032 2040
2033 2041 return True
2034 2042
2035 2043 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2036 2044 cachedelta, ifh, dfh, alwayscache=False,
2037 2045 deltacomputer=None):
2038 2046 """internal function to add revisions to the log
2039 2047
2040 2048 see addrevision for argument descriptions.
2041 2049
2042 2050 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2043 2051
2044 2052 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2045 2053 be used.
2046 2054
2047 2055 invariants:
2048 2056 - rawtext is optional (can be None); if not set, cachedelta must be set.
2049 2057 if both are set, they must correspond to each other.
2050 2058 """
2051 2059 if node == nullid:
2052 2060 raise RevlogError(_("%s: attempt to add null revision") %
2053 2061 (self.indexfile))
2054 2062 if node == wdirid:
2055 2063 raise RevlogError(_("%s: attempt to add wdir revision") %
2056 2064 (self.indexfile))
2057 2065
2058 2066 if self._inline:
2059 2067 fh = ifh
2060 2068 else:
2061 2069 fh = dfh
2062 2070
2063 2071 btext = [rawtext]
2064 2072
2065 2073 curr = len(self)
2066 2074 prev = curr - 1
2067 2075 offset = self.end(prev)
2068 2076 p1r, p2r = self.rev(p1), self.rev(p2)
2069 2077
2070 2078 # full versions are inserted when the needed deltas
2071 2079 # become comparable to the uncompressed text
2072 2080 if rawtext is None:
2073 2081 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
2074 2082 cachedelta[1])
2075 2083 else:
2076 2084 textlen = len(rawtext)
2077 2085
2078 2086 if deltacomputer is None:
2079 2087 deltacomputer = _deltacomputer(self)
2080 2088
2081 2089 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2082 2090 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2083 2091
2084 2092 if deltainfo is not None:
2085 2093 base = deltainfo.base
2086 2094 chainbase = deltainfo.chainbase
2087 2095 data = deltainfo.data
2088 2096 l = deltainfo.deltalen
2089 2097 else:
2090 2098 rawtext = deltacomputer.buildtext(revinfo, fh)
2091 2099 data = self.compress(rawtext)
2092 2100 l = len(data[1]) + len(data[0])
2093 2101 base = chainbase = curr
2094 2102
2095 2103 e = (offset_type(offset, flags), l, textlen,
2096 2104 base, link, p1r, p2r, node)
2097 2105 self.index.insert(-1, e)
2098 2106 self.nodemap[node] = curr
2099 2107
2100 2108 entry = self._io.packentry(e, self.node, self.version, curr)
2101 2109 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2102 2110
2103 2111 if alwayscache and rawtext is None:
2104 2112 rawtext = deltacomputer._buildtext(revinfo, fh)
2105 2113
2106 2114 if type(rawtext) == bytes: # only accept immutable objects
2107 2115 self._cache = (node, curr, rawtext)
2108 2116 self._chainbasecache[curr] = chainbase
2109 2117 return node
2110 2118
2111 2119 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2112 2120 # Files opened in a+ mode have inconsistent behavior on various
2113 2121 # platforms. Windows requires that a file positioning call be made
2114 2122 # when the file handle transitions between reads and writes. See
2115 2123 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2116 2124 # platforms, Python or the platform itself can be buggy. Some versions
2117 2125 # of Solaris have been observed to not append at the end of the file
2118 2126 # if the file was seeked to before the end. See issue4943 for more.
2119 2127 #
2120 2128 # We work around this issue by inserting a seek() before writing.
2121 2129 # Note: This is likely not necessary on Python 3.
2122 2130 ifh.seek(0, os.SEEK_END)
2123 2131 if dfh:
2124 2132 dfh.seek(0, os.SEEK_END)
2125 2133
2126 2134 curr = len(self) - 1
2127 2135 if not self._inline:
2128 2136 transaction.add(self.datafile, offset)
2129 2137 transaction.add(self.indexfile, curr * len(entry))
2130 2138 if data[0]:
2131 2139 dfh.write(data[0])
2132 2140 dfh.write(data[1])
2133 2141 ifh.write(entry)
2134 2142 else:
2135 2143 offset += curr * self._io.size
2136 2144 transaction.add(self.indexfile, offset, curr)
2137 2145 ifh.write(entry)
2138 2146 ifh.write(data[0])
2139 2147 ifh.write(data[1])
2140 2148 self.checkinlinesize(transaction, ifh)
2141 2149
2142 2150 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2143 2151 """
2144 2152 add a delta group
2145 2153
2146 2154 given a set of deltas, add them to the revision log. the
2147 2155 first delta is against its parent, which should be in our
2148 2156 log, the rest are against the previous delta.
2149 2157
2150 2158 If ``addrevisioncb`` is defined, it will be called with arguments of
2151 2159 this revlog and the node that was added.
2152 2160 """
2153 2161
2154 2162 nodes = []
2155 2163
2156 2164 r = len(self)
2157 2165 end = 0
2158 2166 if r:
2159 2167 end = self.end(r - 1)
2160 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
2168 ifh = self._indexfp("a+")
2161 2169 isize = r * self._io.size
2162 2170 if self._inline:
2163 2171 transaction.add(self.indexfile, end + isize, r)
2164 2172 dfh = None
2165 2173 else:
2166 2174 transaction.add(self.indexfile, isize, r)
2167 2175 transaction.add(self.datafile, end)
2168 2176 dfh = self._datafp("a+")
2169 2177 def flush():
2170 2178 if dfh:
2171 2179 dfh.flush()
2172 2180 ifh.flush()
2173 2181 try:
2174 2182 deltacomputer = _deltacomputer(self)
2175 2183 # loop through our set of deltas
2176 2184 for data in deltas:
2177 2185 node, p1, p2, linknode, deltabase, delta, flags = data
2178 2186 link = linkmapper(linknode)
2179 2187 flags = flags or REVIDX_DEFAULT_FLAGS
2180 2188
2181 2189 nodes.append(node)
2182 2190
2183 2191 if node in self.nodemap:
2184 2192 # this can happen if two branches make the same change
2185 2193 continue
2186 2194
2187 2195 for p in (p1, p2):
2188 2196 if p not in self.nodemap:
2189 2197 raise LookupError(p, self.indexfile,
2190 2198 _('unknown parent'))
2191 2199
2192 2200 if deltabase not in self.nodemap:
2193 2201 raise LookupError(deltabase, self.indexfile,
2194 2202 _('unknown delta base'))
2195 2203
2196 2204 baserev = self.rev(deltabase)
2197 2205
2198 2206 if baserev != nullrev and self.iscensored(baserev):
2199 2207 # if base is censored, delta must be full replacement in a
2200 2208 # single patch operation
2201 2209 hlen = struct.calcsize(">lll")
2202 2210 oldlen = self.rawsize(baserev)
2203 2211 newlen = len(delta) - hlen
2204 2212 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2205 2213 raise error.CensoredBaseError(self.indexfile,
2206 2214 self.node(baserev))
2207 2215
2208 2216 if not flags and self._peek_iscensored(baserev, delta, flush):
2209 2217 flags |= REVIDX_ISCENSORED
2210 2218
2211 2219 # We assume consumers of addrevisioncb will want to retrieve
2212 2220 # the added revision, which will require a call to
2213 2221 # revision(). revision() will fast path if there is a cache
2214 2222 # hit. So, we tell _addrevision() to always cache in this case.
2215 2223 # We're only using addgroup() in the context of changegroup
2216 2224 # generation so the revision data can always be handled as raw
2217 2225 # by the flagprocessor.
2218 2226 self._addrevision(node, None, transaction, link,
2219 2227 p1, p2, flags, (baserev, delta),
2220 2228 ifh, dfh,
2221 2229 alwayscache=bool(addrevisioncb),
2222 2230 deltacomputer=deltacomputer)
2223 2231
2224 2232 if addrevisioncb:
2225 2233 addrevisioncb(self, node)
2226 2234
2227 2235 if not dfh and not self._inline:
2228 2236 # addrevision switched from inline to conventional
2229 2237 # reopen the index
2230 2238 ifh.close()
2231 2239 dfh = self._datafp("a+")
2232 ifh = self.opener(self.indexfile, "a+",
2233 checkambig=self._checkambig)
2240 ifh = self._indexfp("a+")
2234 2241 finally:
2235 2242 if dfh:
2236 2243 dfh.close()
2237 2244 ifh.close()
2238 2245
2239 2246 return nodes
2240 2247
2241 2248 def iscensored(self, rev):
2242 2249 """Check if a file revision is censored."""
2243 2250 return False
2244 2251
2245 2252 def _peek_iscensored(self, baserev, delta, flush):
2246 2253 """Quickly check if a delta produces a censored revision."""
2247 2254 return False
2248 2255
2249 2256 def getstrippoint(self, minlink):
2250 2257 """find the minimum rev that must be stripped to strip the linkrev
2251 2258
2252 2259 Returns a tuple containing the minimum rev and a set of all revs that
2253 2260 have linkrevs that will be broken by this strip.
2254 2261 """
2255 2262 brokenrevs = set()
2256 2263 strippoint = len(self)
2257 2264
2258 2265 heads = {}
2259 2266 futurelargelinkrevs = set()
2260 2267 for head in self.headrevs():
2261 2268 headlinkrev = self.linkrev(head)
2262 2269 heads[head] = headlinkrev
2263 2270 if headlinkrev >= minlink:
2264 2271 futurelargelinkrevs.add(headlinkrev)
2265 2272
2266 2273 # This algorithm involves walking down the rev graph, starting at the
2267 2274 # heads. Since the revs are topologically sorted according to linkrev,
2268 2275 # once all head linkrevs are below the minlink, we know there are
2269 2276 # no more revs that could have a linkrev greater than minlink.
2270 2277 # So we can stop walking.
2271 2278 while futurelargelinkrevs:
2272 2279 strippoint -= 1
2273 2280 linkrev = heads.pop(strippoint)
2274 2281
2275 2282 if linkrev < minlink:
2276 2283 brokenrevs.add(strippoint)
2277 2284 else:
2278 2285 futurelargelinkrevs.remove(linkrev)
2279 2286
2280 2287 for p in self.parentrevs(strippoint):
2281 2288 if p != nullrev:
2282 2289 plinkrev = self.linkrev(p)
2283 2290 heads[p] = plinkrev
2284 2291 if plinkrev >= minlink:
2285 2292 futurelargelinkrevs.add(plinkrev)
2286 2293
2287 2294 return strippoint, brokenrevs
2288 2295
2289 2296 def strip(self, minlink, transaction):
2290 2297 """truncate the revlog on the first revision with a linkrev >= minlink
2291 2298
2292 2299 This function is called when we're stripping revision minlink and
2293 2300 its descendants from the repository.
2294 2301
2295 2302 We have to remove all revisions with linkrev >= minlink, because
2296 2303 the equivalent changelog revisions will be renumbered after the
2297 2304 strip.
2298 2305
2299 2306 So we truncate the revlog on the first of these revisions, and
2300 2307 trust that the caller has saved the revisions that shouldn't be
2301 2308 removed and that it'll re-add them after this truncation.
2302 2309 """
2303 2310 if len(self) == 0:
2304 2311 return
2305 2312
2306 2313 rev, _ = self.getstrippoint(minlink)
2307 2314 if rev == len(self):
2308 2315 return
2309 2316
2310 2317 # first truncate the files on disk
2311 2318 end = self.start(rev)
2312 2319 if not self._inline:
2313 2320 transaction.add(self.datafile, end)
2314 2321 end = rev * self._io.size
2315 2322 else:
2316 2323 end += rev * self._io.size
2317 2324
2318 2325 transaction.add(self.indexfile, end)
2319 2326
2320 2327 # then reset internal state in memory to forget those revisions
2321 2328 self._cache = None
2322 2329 self._chaininfocache = {}
2323 2330 self._chunkclear()
2324 2331 for x in xrange(rev, len(self)):
2325 2332 del self.nodemap[self.node(x)]
2326 2333
2327 2334 del self.index[rev:-1]
2328 2335
2329 2336 def checksize(self):
2330 2337 expected = 0
2331 2338 if len(self):
2332 2339 expected = max(0, self.end(len(self) - 1))
2333 2340
2334 2341 try:
2335 2342 f = self._datafp()
2336 2343 f.seek(0, 2)
2337 2344 actual = f.tell()
2338 2345 f.close()
2339 2346 dd = actual - expected
2340 2347 except IOError as inst:
2341 2348 if inst.errno != errno.ENOENT:
2342 2349 raise
2343 2350 dd = 0
2344 2351
2345 2352 try:
2346 2353 f = self.opener(self.indexfile)
2347 2354 f.seek(0, 2)
2348 2355 actual = f.tell()
2349 2356 f.close()
2350 2357 s = self._io.size
2351 2358 i = max(0, actual // s)
2352 2359 di = actual - (i * s)
2353 2360 if self._inline:
2354 2361 databytes = 0
2355 2362 for r in self:
2356 2363 databytes += max(0, self.length(r))
2357 2364 dd = 0
2358 2365 di = actual - len(self) * s - databytes
2359 2366 except IOError as inst:
2360 2367 if inst.errno != errno.ENOENT:
2361 2368 raise
2362 2369 di = 0
2363 2370
2364 2371 return (dd, di)
2365 2372
2366 2373 def files(self):
2367 2374 res = [self.indexfile]
2368 2375 if not self._inline:
2369 2376 res.append(self.datafile)
2370 2377 return res
2371 2378
2372 2379 DELTAREUSEALWAYS = 'always'
2373 2380 DELTAREUSESAMEREVS = 'samerevs'
2374 2381 DELTAREUSENEVER = 'never'
2375 2382
2376 2383 DELTAREUSEFULLADD = 'fulladd'
2377 2384
2378 2385 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2379 2386
2380 2387 def clone(self, tr, destrevlog, addrevisioncb=None,
2381 2388 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2382 2389 """Copy this revlog to another, possibly with format changes.
2383 2390
2384 2391 The destination revlog will contain the same revisions and nodes.
2385 2392 However, it may not be bit-for-bit identical due to e.g. delta encoding
2386 2393 differences.
2387 2394
2388 2395 The ``deltareuse`` argument control how deltas from the existing revlog
2389 2396 are preserved in the destination revlog. The argument can have the
2390 2397 following values:
2391 2398
2392 2399 DELTAREUSEALWAYS
2393 2400 Deltas will always be reused (if possible), even if the destination
2394 2401 revlog would not select the same revisions for the delta. This is the
2395 2402 fastest mode of operation.
2396 2403 DELTAREUSESAMEREVS
2397 2404 Deltas will be reused if the destination revlog would pick the same
2398 2405 revisions for the delta. This mode strikes a balance between speed
2399 2406 and optimization.
2400 2407 DELTAREUSENEVER
2401 2408 Deltas will never be reused. This is the slowest mode of execution.
2402 2409 This mode can be used to recompute deltas (e.g. if the diff/delta
2403 2410 algorithm changes).
2404 2411
2405 2412 Delta computation can be slow, so the choice of delta reuse policy can
2406 2413 significantly affect run time.
2407 2414
2408 2415 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2409 2416 two extremes. Deltas will be reused if they are appropriate. But if the
2410 2417 delta could choose a better revision, it will do so. This means if you
2411 2418 are converting a non-generaldelta revlog to a generaldelta revlog,
2412 2419 deltas will be recomputed if the delta's parent isn't a parent of the
2413 2420 revision.
2414 2421
2415 2422 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2416 2423 controls whether to compute deltas against both parents for merges.
2417 2424 By default, the current default is used.
2418 2425 """
2419 2426 if deltareuse not in self.DELTAREUSEALL:
2420 2427 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2421 2428
2422 2429 if len(destrevlog):
2423 2430 raise ValueError(_('destination revlog is not empty'))
2424 2431
2425 2432 if getattr(self, 'filteredrevs', None):
2426 2433 raise ValueError(_('source revlog has filtered revisions'))
2427 2434 if getattr(destrevlog, 'filteredrevs', None):
2428 2435 raise ValueError(_('destination revlog has filtered revisions'))
2429 2436
2430 2437 # lazydeltabase controls whether to reuse a cached delta, if possible.
2431 2438 oldlazydeltabase = destrevlog._lazydeltabase
2432 2439 oldamd = destrevlog._aggressivemergedeltas
2433 2440
2434 2441 try:
2435 2442 if deltareuse == self.DELTAREUSEALWAYS:
2436 2443 destrevlog._lazydeltabase = True
2437 2444 elif deltareuse == self.DELTAREUSESAMEREVS:
2438 2445 destrevlog._lazydeltabase = False
2439 2446
2440 2447 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2441 2448
2442 2449 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2443 2450 self.DELTAREUSESAMEREVS)
2444 2451
2445 2452 deltacomputer = _deltacomputer(destrevlog)
2446 2453 index = self.index
2447 2454 for rev in self:
2448 2455 entry = index[rev]
2449 2456
2450 2457 # Some classes override linkrev to take filtered revs into
2451 2458 # account. Use raw entry from index.
2452 2459 flags = entry[0] & 0xffff
2453 2460 linkrev = entry[4]
2454 2461 p1 = index[entry[5]][7]
2455 2462 p2 = index[entry[6]][7]
2456 2463 node = entry[7]
2457 2464
2458 2465 # (Possibly) reuse the delta from the revlog if allowed and
2459 2466 # the revlog chunk is a delta.
2460 2467 cachedelta = None
2461 2468 rawtext = None
2462 2469 if populatecachedelta:
2463 2470 dp = self.deltaparent(rev)
2464 2471 if dp != nullrev:
2465 2472 cachedelta = (dp, str(self._chunk(rev)))
2466 2473
2467 2474 if not cachedelta:
2468 2475 rawtext = self.revision(rev, raw=True)
2469 2476
2470 2477
2471 2478 if deltareuse == self.DELTAREUSEFULLADD:
2472 2479 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2473 2480 cachedelta=cachedelta,
2474 2481 node=node, flags=flags,
2475 2482 deltacomputer=deltacomputer)
2476 2483 else:
2477 2484 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2478 2485 checkambig=False)
2479 2486 dfh = None
2480 2487 if not destrevlog._inline:
2481 2488 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2482 2489 try:
2483 2490 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2484 2491 p2, flags, cachedelta, ifh, dfh,
2485 2492 deltacomputer=deltacomputer)
2486 2493 finally:
2487 2494 if dfh:
2488 2495 dfh.close()
2489 2496 ifh.close()
2490 2497
2491 2498 if addrevisioncb:
2492 2499 addrevisioncb(self, rev, node)
2493 2500 finally:
2494 2501 destrevlog._lazydeltabase = oldlazydeltabase
2495 2502 destrevlog._aggressivemergedeltas = oldamd
General Comments 0
You need to be logged in to leave comments. Login now