##// END OF EJS Templates
revlog: make chainbase cache its result for the correct revision...
Paul Morelle -
r38187:fc72beec default
parent child Browse files
Show More
@@ -1,2614 +1,2615
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import hashlib
20 20 import heapq
21 21 import os
22 22 import re
23 23 import struct
24 24 import zlib
25 25
26 26 # import stuff from node for others to import from revlog
27 27 from .node import (
28 28 bin,
29 29 hex,
30 30 nullid,
31 31 nullrev,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .thirdparty import (
39 39 attr,
40 40 )
41 41 from . import (
42 42 ancestor,
43 43 error,
44 44 mdiff,
45 45 policy,
46 46 pycompat,
47 47 templatefilters,
48 48 util,
49 49 )
50 50 from .utils import (
51 51 stringutil,
52 52 )
53 53
54 54 parsers = policy.importmod(r'parsers')
55 55
56 56 # Aliased for performance.
57 57 _zlibdecompress = zlib.decompress
58 58
59 59 # revlog header flags
60 60 REVLOGV0 = 0
61 61 REVLOGV1 = 1
62 62 # Dummy value until file format is finalized.
63 63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
64 64 REVLOGV2 = 0xDEAD
65 65 FLAG_INLINE_DATA = (1 << 16)
66 66 FLAG_GENERALDELTA = (1 << 17)
67 67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
68 68 REVLOG_DEFAULT_FORMAT = REVLOGV1
69 69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
70 70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
71 71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
72 72
73 73 # revlog index flags
74 74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
75 75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
76 76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
77 77 REVIDX_DEFAULT_FLAGS = 0
78 78 # stable order in which flags need to be processed and their processors applied
79 79 REVIDX_FLAGS_ORDER = [
80 80 REVIDX_ISCENSORED,
81 81 REVIDX_ELLIPSIS,
82 82 REVIDX_EXTSTORED,
83 83 ]
84 84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
85 85 # bitmark for flags that could cause rawdata content change
86 86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
87 87
88 88 # max size of revlog with inline data
89 89 _maxinline = 131072
90 90 _chunksize = 1048576
91 91
92 92 RevlogError = error.RevlogError
93 93 LookupError = error.LookupError
94 94 CensoredNodeError = error.CensoredNodeError
95 95 ProgrammingError = error.ProgrammingError
96 96
97 97 # Store flag processors (cf. 'addflagprocessor()' to register)
98 98 _flagprocessors = {
99 99 REVIDX_ISCENSORED: None,
100 100 }
101 101
102 102 _mdre = re.compile('\1\n')
103 103 def parsemeta(text):
104 104 """return (metadatadict, metadatasize)"""
105 105 # text can be buffer, so we can't use .startswith or .index
106 106 if text[:2] != '\1\n':
107 107 return None, None
108 108 s = _mdre.search(text, 2).start()
109 109 mtext = text[2:s]
110 110 meta = {}
111 111 for l in mtext.splitlines():
112 112 k, v = l.split(": ", 1)
113 113 meta[k] = v
114 114 return meta, (s + 2)
115 115
116 116 def packmeta(meta, text):
117 117 keys = sorted(meta)
118 118 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
119 119 return "\1\n%s\1\n%s" % (metatext, text)
120 120
121 121 def _censoredtext(text):
122 122 m, offs = parsemeta(text)
123 123 return m and "censored" in m
124 124
125 125 def addflagprocessor(flag, processor):
126 126 """Register a flag processor on a revision data flag.
127 127
128 128 Invariant:
129 129 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
130 130 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
131 131 - Only one flag processor can be registered on a specific flag.
132 132 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
133 133 following signatures:
134 134 - (read) f(self, rawtext) -> text, bool
135 135 - (write) f(self, text) -> rawtext, bool
136 136 - (raw) f(self, rawtext) -> bool
137 137 "text" is presented to the user. "rawtext" is stored in revlog data, not
138 138 directly visible to the user.
139 139 The boolean returned by these transforms is used to determine whether
140 140 the returned text can be used for hash integrity checking. For example,
141 141 if "write" returns False, then "text" is used to generate hash. If
142 142 "write" returns True, that basically means "rawtext" returned by "write"
143 143 should be used to generate hash. Usually, "write" and "read" return
144 144 different booleans. And "raw" returns a same boolean as "write".
145 145
146 146 Note: The 'raw' transform is used for changegroup generation and in some
147 147 debug commands. In this case the transform only indicates whether the
148 148 contents can be used for hash integrity checks.
149 149 """
150 150 if not flag & REVIDX_KNOWN_FLAGS:
151 151 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
152 152 raise ProgrammingError(msg)
153 153 if flag not in REVIDX_FLAGS_ORDER:
154 154 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
155 155 raise ProgrammingError(msg)
156 156 if flag in _flagprocessors:
157 157 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
158 158 raise error.Abort(msg)
159 159 _flagprocessors[flag] = processor
160 160
161 161 def getoffset(q):
162 162 return int(q >> 16)
163 163
164 164 def gettype(q):
165 165 return int(q & 0xFFFF)
166 166
167 167 def offset_type(offset, type):
168 168 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
169 169 raise ValueError('unknown revlog index flags')
170 170 return int(int(offset) << 16 | type)
171 171
172 172 _nullhash = hashlib.sha1(nullid)
173 173
174 174 def hash(text, p1, p2):
175 175 """generate a hash from the given text and its parent hashes
176 176
177 177 This hash combines both the current file contents and its history
178 178 in a manner that makes it easy to distinguish nodes with the same
179 179 content in the revision graph.
180 180 """
181 181 # As of now, if one of the parent node is null, p2 is null
182 182 if p2 == nullid:
183 183 # deep copy of a hash is faster than creating one
184 184 s = _nullhash.copy()
185 185 s.update(p1)
186 186 else:
187 187 # none of the parent nodes are nullid
188 188 if p1 < p2:
189 189 a = p1
190 190 b = p2
191 191 else:
192 192 a = p2
193 193 b = p1
194 194 s = hashlib.sha1(a)
195 195 s.update(b)
196 196 s.update(text)
197 197 return s.digest()
198 198
199 199 def _trimchunk(revlog, revs, startidx, endidx=None):
200 200 """returns revs[startidx:endidx] without empty trailing revs
201 201 """
202 202 length = revlog.length
203 203
204 204 if endidx is None:
205 205 endidx = len(revs)
206 206
207 207 # Trim empty revs at the end, but never the very first revision of a chain
208 208 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
209 209 endidx -= 1
210 210
211 211 return revs[startidx:endidx]
212 212
213 213 def _slicechunk(revlog, revs):
214 214 """slice revs to reduce the amount of unrelated data to be read from disk.
215 215
216 216 ``revs`` is sliced into groups that should be read in one time.
217 217 Assume that revs are sorted.
218 218 """
219 219 start = revlog.start
220 220 length = revlog.length
221 221
222 222 if len(revs) <= 1:
223 223 yield revs
224 224 return
225 225
226 226 startbyte = start(revs[0])
227 227 endbyte = start(revs[-1]) + length(revs[-1])
228 228 readdata = deltachainspan = endbyte - startbyte
229 229
230 230 chainpayload = sum(length(r) for r in revs)
231 231
232 232 if deltachainspan:
233 233 density = chainpayload / float(deltachainspan)
234 234 else:
235 235 density = 1.0
236 236
237 237 # Store the gaps in a heap to have them sorted by decreasing size
238 238 gapsheap = []
239 239 heapq.heapify(gapsheap)
240 240 prevend = None
241 241 for i, rev in enumerate(revs):
242 242 revstart = start(rev)
243 243 revlen = length(rev)
244 244
245 245 # Skip empty revisions to form larger holes
246 246 if revlen == 0:
247 247 continue
248 248
249 249 if prevend is not None:
250 250 gapsize = revstart - prevend
251 251 # only consider holes that are large enough
252 252 if gapsize > revlog._srmingapsize:
253 253 heapq.heappush(gapsheap, (-gapsize, i))
254 254
255 255 prevend = revstart + revlen
256 256
257 257 # Collect the indices of the largest holes until the density is acceptable
258 258 indicesheap = []
259 259 heapq.heapify(indicesheap)
260 260 while gapsheap and density < revlog._srdensitythreshold:
261 261 oppgapsize, gapidx = heapq.heappop(gapsheap)
262 262
263 263 heapq.heappush(indicesheap, gapidx)
264 264
265 265 # the gap sizes are stored as negatives to be sorted decreasingly
266 266 # by the heap
267 267 readdata -= (-oppgapsize)
268 268 if readdata > 0:
269 269 density = chainpayload / float(readdata)
270 270 else:
271 271 density = 1.0
272 272
273 273 # Cut the revs at collected indices
274 274 previdx = 0
275 275 while indicesheap:
276 276 idx = heapq.heappop(indicesheap)
277 277
278 278 chunk = _trimchunk(revlog, revs, previdx, idx)
279 279 if chunk:
280 280 yield chunk
281 281
282 282 previdx = idx
283 283
284 284 chunk = _trimchunk(revlog, revs, previdx)
285 285 if chunk:
286 286 yield chunk
287 287
288 288 @attr.s(slots=True, frozen=True)
289 289 class _deltainfo(object):
290 290 distance = attr.ib()
291 291 deltalen = attr.ib()
292 292 data = attr.ib()
293 293 base = attr.ib()
294 294 chainbase = attr.ib()
295 295 chainlen = attr.ib()
296 296 compresseddeltalen = attr.ib()
297 297
298 298 class _deltacomputer(object):
299 299 def __init__(self, revlog):
300 300 self.revlog = revlog
301 301
302 302 def _getcandidaterevs(self, p1, p2, cachedelta):
303 303 """
304 304 Provides revisions that present an interest to be diffed against,
305 305 grouped by level of easiness.
306 306 """
307 307 revlog = self.revlog
308 308 gdelta = revlog._generaldelta
309 309 curr = len(revlog)
310 310 prev = curr - 1
311 311 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
312 312
313 313 # should we try to build a delta?
314 314 if prev != nullrev and revlog.storedeltachains:
315 315 tested = set()
316 316 # This condition is true most of the time when processing
317 317 # changegroup data into a generaldelta repo. The only time it
318 318 # isn't true is if this is the first revision in a delta chain
319 319 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
320 320 if cachedelta and gdelta and revlog._lazydeltabase:
321 321 # Assume what we received from the server is a good choice
322 322 # build delta will reuse the cache
323 323 yield (cachedelta[0],)
324 324 tested.add(cachedelta[0])
325 325
326 326 if gdelta:
327 327 # exclude already lazy tested base if any
328 328 parents = [p for p in (p1r, p2r)
329 329 if p != nullrev and p not in tested]
330 330
331 331 if not revlog._aggressivemergedeltas and len(parents) == 2:
332 332 parents.sort()
333 333 # To minimize the chance of having to build a fulltext,
334 334 # pick first whichever parent is closest to us (max rev)
335 335 yield (parents[1],)
336 336 # then the other one (min rev) if the first did not fit
337 337 yield (parents[0],)
338 338 tested.update(parents)
339 339 elif len(parents) > 0:
340 340 # Test all parents (1 or 2), and keep the best candidate
341 341 yield parents
342 342 tested.update(parents)
343 343
344 344 if prev not in tested:
345 345 # other approach failed try against prev to hopefully save us a
346 346 # fulltext.
347 347 yield (prev,)
348 348 tested.add(prev)
349 349
350 350 def buildtext(self, revinfo, fh):
351 351 """Builds a fulltext version of a revision
352 352
353 353 revinfo: _revisioninfo instance that contains all needed info
354 354 fh: file handle to either the .i or the .d revlog file,
355 355 depending on whether it is inlined or not
356 356 """
357 357 btext = revinfo.btext
358 358 if btext[0] is not None:
359 359 return btext[0]
360 360
361 361 revlog = self.revlog
362 362 cachedelta = revinfo.cachedelta
363 363 flags = revinfo.flags
364 364 node = revinfo.node
365 365
366 366 baserev = cachedelta[0]
367 367 delta = cachedelta[1]
368 368 # special case deltas which replace entire base; no need to decode
369 369 # base revision. this neatly avoids censored bases, which throw when
370 370 # they're decoded.
371 371 hlen = struct.calcsize(">lll")
372 372 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
373 373 len(delta) - hlen):
374 374 btext[0] = delta[hlen:]
375 375 else:
376 376 # deltabase is rawtext before changed by flag processors, which is
377 377 # equivalent to non-raw text
378 378 basetext = revlog.revision(baserev, _df=fh, raw=False)
379 379 btext[0] = mdiff.patch(basetext, delta)
380 380
381 381 try:
382 382 res = revlog._processflags(btext[0], flags, 'read', raw=True)
383 383 btext[0], validatehash = res
384 384 if validatehash:
385 385 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
386 386 if flags & REVIDX_ISCENSORED:
387 387 raise RevlogError(_('node %s is not censored') % node)
388 388 except CensoredNodeError:
389 389 # must pass the censored index flag to add censored revisions
390 390 if not flags & REVIDX_ISCENSORED:
391 391 raise
392 392 return btext[0]
393 393
394 394 def _builddeltadiff(self, base, revinfo, fh):
395 395 revlog = self.revlog
396 396 t = self.buildtext(revinfo, fh)
397 397 if revlog.iscensored(base):
398 398 # deltas based on a censored revision must replace the
399 399 # full content in one patch, so delta works everywhere
400 400 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
401 401 delta = header + t
402 402 else:
403 403 ptext = revlog.revision(base, _df=fh, raw=True)
404 404 delta = mdiff.textdiff(ptext, t)
405 405
406 406 return delta
407 407
408 408 def _builddeltainfo(self, revinfo, base, fh):
409 409 # can we use the cached delta?
410 410 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
411 411 delta = revinfo.cachedelta[1]
412 412 else:
413 413 delta = self._builddeltadiff(base, revinfo, fh)
414 414 revlog = self.revlog
415 415 header, data = revlog.compress(delta)
416 416 deltalen = len(header) + len(data)
417 417 chainbase = revlog.chainbase(base)
418 418 offset = revlog.end(len(revlog) - 1)
419 419 dist = deltalen + offset - revlog.start(chainbase)
420 420 if revlog._generaldelta:
421 421 deltabase = base
422 422 else:
423 423 deltabase = chainbase
424 424 chainlen, compresseddeltalen = revlog._chaininfo(base)
425 425 chainlen += 1
426 426 compresseddeltalen += deltalen
427 427 return _deltainfo(dist, deltalen, (header, data), deltabase,
428 428 chainbase, chainlen, compresseddeltalen)
429 429
430 430 def finddeltainfo(self, revinfo, fh):
431 431 """Find an acceptable delta against a candidate revision
432 432
433 433 revinfo: information about the revision (instance of _revisioninfo)
434 434 fh: file handle to either the .i or the .d revlog file,
435 435 depending on whether it is inlined or not
436 436
437 437 Returns the first acceptable candidate revision, as ordered by
438 438 _getcandidaterevs
439 439 """
440 440 cachedelta = revinfo.cachedelta
441 441 p1 = revinfo.p1
442 442 p2 = revinfo.p2
443 443 revlog = self.revlog
444 444
445 445 deltainfo = None
446 446 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
447 447 nominateddeltas = []
448 448 for candidaterev in candidaterevs:
449 449 # no delta for rawtext-changing revs (see "candelta" for why)
450 450 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
451 451 continue
452 452 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
453 453 if revlog._isgooddeltainfo(candidatedelta, revinfo):
454 454 nominateddeltas.append(candidatedelta)
455 455 if nominateddeltas:
456 456 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
457 457 break
458 458
459 459 return deltainfo
460 460
461 461 @attr.s(slots=True, frozen=True)
462 462 class _revisioninfo(object):
463 463 """Information about a revision that allows building its fulltext
464 464 node: expected hash of the revision
465 465 p1, p2: parent revs of the revision
466 466 btext: built text cache consisting of a one-element list
467 467 cachedelta: (baserev, uncompressed_delta) or None
468 468 flags: flags associated to the revision storage
469 469
470 470 One of btext[0] or cachedelta must be set.
471 471 """
472 472 node = attr.ib()
473 473 p1 = attr.ib()
474 474 p2 = attr.ib()
475 475 btext = attr.ib()
476 476 textlen = attr.ib()
477 477 cachedelta = attr.ib()
478 478 flags = attr.ib()
479 479
480 480 # index v0:
481 481 # 4 bytes: offset
482 482 # 4 bytes: compressed length
483 483 # 4 bytes: base rev
484 484 # 4 bytes: link rev
485 485 # 20 bytes: parent 1 nodeid
486 486 # 20 bytes: parent 2 nodeid
487 487 # 20 bytes: nodeid
488 488 indexformatv0 = struct.Struct(">4l20s20s20s")
489 489 indexformatv0_pack = indexformatv0.pack
490 490 indexformatv0_unpack = indexformatv0.unpack
491 491
492 492 class revlogoldio(object):
493 493 def __init__(self):
494 494 self.size = indexformatv0.size
495 495
496 496 def parseindex(self, data, inline):
497 497 s = self.size
498 498 index = []
499 499 nodemap = {nullid: nullrev}
500 500 n = off = 0
501 501 l = len(data)
502 502 while off + s <= l:
503 503 cur = data[off:off + s]
504 504 off += s
505 505 e = indexformatv0_unpack(cur)
506 506 # transform to revlogv1 format
507 507 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
508 508 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
509 509 index.append(e2)
510 510 nodemap[e[6]] = n
511 511 n += 1
512 512
513 513 # add the magic null revision at -1
514 514 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
515 515
516 516 return index, nodemap, None
517 517
518 518 def packentry(self, entry, node, version, rev):
519 519 if gettype(entry[0]):
520 520 raise RevlogError(_('index entry flags need revlog version 1'))
521 521 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
522 522 node(entry[5]), node(entry[6]), entry[7])
523 523 return indexformatv0_pack(*e2)
524 524
525 525 # index ng:
526 526 # 6 bytes: offset
527 527 # 2 bytes: flags
528 528 # 4 bytes: compressed length
529 529 # 4 bytes: uncompressed length
530 530 # 4 bytes: base rev
531 531 # 4 bytes: link rev
532 532 # 4 bytes: parent 1 rev
533 533 # 4 bytes: parent 2 rev
534 534 # 32 bytes: nodeid
535 535 indexformatng = struct.Struct(">Qiiiiii20s12x")
536 536 indexformatng_pack = indexformatng.pack
537 537 versionformat = struct.Struct(">I")
538 538 versionformat_pack = versionformat.pack
539 539 versionformat_unpack = versionformat.unpack
540 540
541 541 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
542 542 # signed integer)
543 543 _maxentrysize = 0x7fffffff
544 544
545 545 class revlogio(object):
546 546 def __init__(self):
547 547 self.size = indexformatng.size
548 548
549 549 def parseindex(self, data, inline):
550 550 # call the C implementation to parse the index data
551 551 index, cache = parsers.parse_index2(data, inline)
552 552 return index, getattr(index, 'nodemap', None), cache
553 553
554 554 def packentry(self, entry, node, version, rev):
555 555 p = indexformatng_pack(*entry)
556 556 if rev == 0:
557 557 p = versionformat_pack(version) + p[4:]
558 558 return p
559 559
560 560 class revlog(object):
561 561 """
562 562 the underlying revision storage object
563 563
564 564 A revlog consists of two parts, an index and the revision data.
565 565
566 566 The index is a file with a fixed record size containing
567 567 information on each revision, including its nodeid (hash), the
568 568 nodeids of its parents, the position and offset of its data within
569 569 the data file, and the revision it's based on. Finally, each entry
570 570 contains a linkrev entry that can serve as a pointer to external
571 571 data.
572 572
573 573 The revision data itself is a linear collection of data chunks.
574 574 Each chunk represents a revision and is usually represented as a
575 575 delta against the previous chunk. To bound lookup time, runs of
576 576 deltas are limited to about 2 times the length of the original
577 577 version data. This makes retrieval of a version proportional to
578 578 its size, or O(1) relative to the number of revisions.
579 579
580 580 Both pieces of the revlog are written to in an append-only
581 581 fashion, which means we never need to rewrite a file to insert or
582 582 remove data, and can use some simple techniques to avoid the need
583 583 for locking while reading.
584 584
585 585 If checkambig, indexfile is opened with checkambig=True at
586 586 writing, to avoid file stat ambiguity.
587 587
588 588 If mmaplargeindex is True, and an mmapindexthreshold is set, the
589 589 index will be mmapped rather than read if it is larger than the
590 590 configured threshold.
591 591
592 592 If censorable is True, the revlog can have censored revisions.
593 593 """
594 594 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
595 595 mmaplargeindex=False, censorable=False):
596 596 """
597 597 create a revlog object
598 598
599 599 opener is a function that abstracts the file opening operation
600 600 and can be used to implement COW semantics or the like.
601 601 """
602 602 self.indexfile = indexfile
603 603 self.datafile = datafile or (indexfile[:-2] + ".d")
604 604 self.opener = opener
605 605 # When True, indexfile is opened with checkambig=True at writing, to
606 606 # avoid file stat ambiguity.
607 607 self._checkambig = checkambig
608 608 self._censorable = censorable
609 609 # 3-tuple of (node, rev, text) for a raw revision.
610 610 self._cache = None
611 611 # Maps rev to chain base rev.
612 612 self._chainbasecache = util.lrucachedict(100)
613 613 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
614 614 self._chunkcache = (0, '')
615 615 # How much data to read and cache into the raw revlog data cache.
616 616 self._chunkcachesize = 65536
617 617 self._maxchainlen = None
618 618 self._aggressivemergedeltas = False
619 619 self.index = []
620 620 # Mapping of partial identifiers to full nodes.
621 621 self._pcache = {}
622 622 # Mapping of revision integer to full node.
623 623 self._nodecache = {nullid: nullrev}
624 624 self._nodepos = None
625 625 self._compengine = 'zlib'
626 626 self._maxdeltachainspan = -1
627 627 self._withsparseread = False
628 628 self._srdensitythreshold = 0.25
629 629 self._srmingapsize = 262144
630 630
631 631 mmapindexthreshold = None
632 632 v = REVLOG_DEFAULT_VERSION
633 633 opts = getattr(opener, 'options', None)
634 634 if opts is not None:
635 635 if 'revlogv2' in opts:
636 636 # version 2 revlogs always use generaldelta.
637 637 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
638 638 elif 'revlogv1' in opts:
639 639 if 'generaldelta' in opts:
640 640 v |= FLAG_GENERALDELTA
641 641 else:
642 642 v = 0
643 643 if 'chunkcachesize' in opts:
644 644 self._chunkcachesize = opts['chunkcachesize']
645 645 if 'maxchainlen' in opts:
646 646 self._maxchainlen = opts['maxchainlen']
647 647 if 'aggressivemergedeltas' in opts:
648 648 self._aggressivemergedeltas = opts['aggressivemergedeltas']
649 649 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
650 650 if 'compengine' in opts:
651 651 self._compengine = opts['compengine']
652 652 if 'maxdeltachainspan' in opts:
653 653 self._maxdeltachainspan = opts['maxdeltachainspan']
654 654 if mmaplargeindex and 'mmapindexthreshold' in opts:
655 655 mmapindexthreshold = opts['mmapindexthreshold']
656 656 self._withsparseread = bool(opts.get('with-sparse-read', False))
657 657 if 'sparse-read-density-threshold' in opts:
658 658 self._srdensitythreshold = opts['sparse-read-density-threshold']
659 659 if 'sparse-read-min-gap-size' in opts:
660 660 self._srmingapsize = opts['sparse-read-min-gap-size']
661 661
662 662 if self._chunkcachesize <= 0:
663 663 raise RevlogError(_('revlog chunk cache size %r is not greater '
664 664 'than 0') % self._chunkcachesize)
665 665 elif self._chunkcachesize & (self._chunkcachesize - 1):
666 666 raise RevlogError(_('revlog chunk cache size %r is not a power '
667 667 'of 2') % self._chunkcachesize)
668 668
669 669 indexdata = ''
670 670 self._initempty = True
671 671 try:
672 672 with self._indexfp() as f:
673 673 if (mmapindexthreshold is not None and
674 674 self.opener.fstat(f).st_size >= mmapindexthreshold):
675 675 indexdata = util.buffer(util.mmapread(f))
676 676 else:
677 677 indexdata = f.read()
678 678 if len(indexdata) > 0:
679 679 v = versionformat_unpack(indexdata[:4])[0]
680 680 self._initempty = False
681 681 except IOError as inst:
682 682 if inst.errno != errno.ENOENT:
683 683 raise
684 684
685 685 self.version = v
686 686 self._inline = v & FLAG_INLINE_DATA
687 687 self._generaldelta = v & FLAG_GENERALDELTA
688 688 flags = v & ~0xFFFF
689 689 fmt = v & 0xFFFF
690 690 if fmt == REVLOGV0:
691 691 if flags:
692 692 raise RevlogError(_('unknown flags (%#04x) in version %d '
693 693 'revlog %s') %
694 694 (flags >> 16, fmt, self.indexfile))
695 695 elif fmt == REVLOGV1:
696 696 if flags & ~REVLOGV1_FLAGS:
697 697 raise RevlogError(_('unknown flags (%#04x) in version %d '
698 698 'revlog %s') %
699 699 (flags >> 16, fmt, self.indexfile))
700 700 elif fmt == REVLOGV2:
701 701 if flags & ~REVLOGV2_FLAGS:
702 702 raise RevlogError(_('unknown flags (%#04x) in version %d '
703 703 'revlog %s') %
704 704 (flags >> 16, fmt, self.indexfile))
705 705 else:
706 706 raise RevlogError(_('unknown version (%d) in revlog %s') %
707 707 (fmt, self.indexfile))
708 708
709 709 self.storedeltachains = True
710 710
711 711 self._io = revlogio()
712 712 if self.version == REVLOGV0:
713 713 self._io = revlogoldio()
714 714 try:
715 715 d = self._io.parseindex(indexdata, self._inline)
716 716 except (ValueError, IndexError):
717 717 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
718 718 self.index, nodemap, self._chunkcache = d
719 719 if nodemap is not None:
720 720 self.nodemap = self._nodecache = nodemap
721 721 if not self._chunkcache:
722 722 self._chunkclear()
723 723 # revnum -> (chain-length, sum-delta-length)
724 724 self._chaininfocache = {}
725 725 # revlog header -> revlog compressor
726 726 self._decompressors = {}
727 727
728 728 @util.propertycache
729 729 def _compressor(self):
730 730 return util.compengines[self._compengine].revlogcompressor()
731 731
732 732 def _indexfp(self, mode='r'):
733 733 """file object for the revlog's index file"""
734 734 args = {r'mode': mode}
735 735 if mode != 'r':
736 736 args[r'checkambig'] = self._checkambig
737 737 if mode == 'w':
738 738 args[r'atomictemp'] = True
739 739 return self.opener(self.indexfile, **args)
740 740
741 741 def _datafp(self, mode='r'):
742 742 """file object for the revlog's data file"""
743 743 return self.opener(self.datafile, mode=mode)
744 744
745 745 @contextlib.contextmanager
746 746 def _datareadfp(self, existingfp=None):
747 747 """file object suitable to read data"""
748 748 if existingfp is not None:
749 749 yield existingfp
750 750 else:
751 751 if self._inline:
752 752 func = self._indexfp
753 753 else:
754 754 func = self._datafp
755 755 with func() as fp:
756 756 yield fp
757 757
758 758 def tip(self):
759 759 return self.node(len(self.index) - 2)
760 760 def __contains__(self, rev):
761 761 return 0 <= rev < len(self)
762 762 def __len__(self):
763 763 return len(self.index) - 1
764 764 def __iter__(self):
765 765 return iter(xrange(len(self)))
766 766 def revs(self, start=0, stop=None):
767 767 """iterate over all rev in this revlog (from start to stop)"""
768 768 step = 1
769 769 if stop is not None:
770 770 if start > stop:
771 771 step = -1
772 772 stop += step
773 773 else:
774 774 stop = len(self)
775 775 return xrange(start, stop, step)
776 776
777 777 @util.propertycache
778 778 def nodemap(self):
779 779 self.rev(self.node(0))
780 780 return self._nodecache
781 781
782 782 def hasnode(self, node):
783 783 try:
784 784 self.rev(node)
785 785 return True
786 786 except KeyError:
787 787 return False
788 788
789 789 def candelta(self, baserev, rev):
790 790 """whether two revisions (baserev, rev) can be delta-ed or not"""
791 791 # Disable delta if either rev requires a content-changing flag
792 792 # processor (ex. LFS). This is because such flag processor can alter
793 793 # the rawtext content that the delta will be based on, and two clients
794 794 # could have a same revlog node with different flags (i.e. different
795 795 # rawtext contents) and the delta could be incompatible.
796 796 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
797 797 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
798 798 return False
799 799 return True
800 800
801 801 def clearcaches(self):
802 802 self._cache = None
803 803 self._chainbasecache.clear()
804 804 self._chunkcache = (0, '')
805 805 self._pcache = {}
806 806
807 807 try:
808 808 self._nodecache.clearcaches()
809 809 except AttributeError:
810 810 self._nodecache = {nullid: nullrev}
811 811 self._nodepos = None
812 812
813 813 def rev(self, node):
814 814 try:
815 815 return self._nodecache[node]
816 816 except TypeError:
817 817 raise
818 818 except RevlogError:
819 819 # parsers.c radix tree lookup failed
820 820 if node == wdirid or node in wdirfilenodeids:
821 821 raise error.WdirUnsupported
822 822 raise LookupError(node, self.indexfile, _('no node'))
823 823 except KeyError:
824 824 # pure python cache lookup failed
825 825 n = self._nodecache
826 826 i = self.index
827 827 p = self._nodepos
828 828 if p is None:
829 829 p = len(i) - 2
830 830 else:
831 831 assert p < len(i)
832 832 for r in xrange(p, -1, -1):
833 833 v = i[r][7]
834 834 n[v] = r
835 835 if v == node:
836 836 self._nodepos = r - 1
837 837 return r
838 838 if node == wdirid or node in wdirfilenodeids:
839 839 raise error.WdirUnsupported
840 840 raise LookupError(node, self.indexfile, _('no node'))
841 841
842 842 # Accessors for index entries.
843 843
844 844 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
845 845 # are flags.
846 846 def start(self, rev):
847 847 return int(self.index[rev][0] >> 16)
848 848
849 849 def flags(self, rev):
850 850 return self.index[rev][0] & 0xFFFF
851 851
852 852 def length(self, rev):
853 853 return self.index[rev][1]
854 854
855 855 def rawsize(self, rev):
856 856 """return the length of the uncompressed text for a given revision"""
857 857 l = self.index[rev][2]
858 858 if l is not None and l >= 0:
859 859 return l
860 860
861 861 t = self.revision(rev, raw=True)
862 862 return len(t)
863 863
864 864 def size(self, rev):
865 865 """length of non-raw text (processed by a "read" flag processor)"""
866 866 # fast path: if no "read" flag processor could change the content,
867 867 # size is rawsize. note: ELLIPSIS is known to not change the content.
868 868 flags = self.flags(rev)
869 869 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
870 870 return self.rawsize(rev)
871 871
872 872 return len(self.revision(rev, raw=False))
873 873
874 874 def chainbase(self, rev):
875 875 base = self._chainbasecache.get(rev)
876 876 if base is not None:
877 877 return base
878 878
879 879 index = self.index
880 base = index[rev][3]
881 while base != rev:
882 rev = base
883 base = index[rev][3]
880 iterrev = rev
881 base = index[iterrev][3]
882 while base != iterrev:
883 iterrev = base
884 base = index[iterrev][3]
884 885
885 886 self._chainbasecache[rev] = base
886 887 return base
887 888
888 889 def linkrev(self, rev):
889 890 return self.index[rev][4]
890 891
891 892 def parentrevs(self, rev):
892 893 try:
893 894 entry = self.index[rev]
894 895 except IndexError:
895 896 if rev == wdirrev:
896 897 raise error.WdirUnsupported
897 898 raise
898 899
899 900 return entry[5], entry[6]
900 901
901 902 def node(self, rev):
902 903 try:
903 904 return self.index[rev][7]
904 905 except IndexError:
905 906 if rev == wdirrev:
906 907 raise error.WdirUnsupported
907 908 raise
908 909
909 910 # Derived from index values.
910 911
911 912 def end(self, rev):
912 913 return self.start(rev) + self.length(rev)
913 914
914 915 def parents(self, node):
915 916 i = self.index
916 917 d = i[self.rev(node)]
917 918 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
918 919
919 920 def chainlen(self, rev):
920 921 return self._chaininfo(rev)[0]
921 922
922 923 def _chaininfo(self, rev):
923 924 chaininfocache = self._chaininfocache
924 925 if rev in chaininfocache:
925 926 return chaininfocache[rev]
926 927 index = self.index
927 928 generaldelta = self._generaldelta
928 929 iterrev = rev
929 930 e = index[iterrev]
930 931 clen = 0
931 932 compresseddeltalen = 0
932 933 while iterrev != e[3]:
933 934 clen += 1
934 935 compresseddeltalen += e[1]
935 936 if generaldelta:
936 937 iterrev = e[3]
937 938 else:
938 939 iterrev -= 1
939 940 if iterrev in chaininfocache:
940 941 t = chaininfocache[iterrev]
941 942 clen += t[0]
942 943 compresseddeltalen += t[1]
943 944 break
944 945 e = index[iterrev]
945 946 else:
946 947 # Add text length of base since decompressing that also takes
947 948 # work. For cache hits the length is already included.
948 949 compresseddeltalen += e[1]
949 950 r = (clen, compresseddeltalen)
950 951 chaininfocache[rev] = r
951 952 return r
952 953
953 954 def _deltachain(self, rev, stoprev=None):
954 955 """Obtain the delta chain for a revision.
955 956
956 957 ``stoprev`` specifies a revision to stop at. If not specified, we
957 958 stop at the base of the chain.
958 959
959 960 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
960 961 revs in ascending order and ``stopped`` is a bool indicating whether
961 962 ``stoprev`` was hit.
962 963 """
963 964 # Try C implementation.
964 965 try:
965 966 return self.index.deltachain(rev, stoprev, self._generaldelta)
966 967 except AttributeError:
967 968 pass
968 969
969 970 chain = []
970 971
971 972 # Alias to prevent attribute lookup in tight loop.
972 973 index = self.index
973 974 generaldelta = self._generaldelta
974 975
975 976 iterrev = rev
976 977 e = index[iterrev]
977 978 while iterrev != e[3] and iterrev != stoprev:
978 979 chain.append(iterrev)
979 980 if generaldelta:
980 981 iterrev = e[3]
981 982 else:
982 983 iterrev -= 1
983 984 e = index[iterrev]
984 985
985 986 if iterrev == stoprev:
986 987 stopped = True
987 988 else:
988 989 chain.append(iterrev)
989 990 stopped = False
990 991
991 992 chain.reverse()
992 993 return chain, stopped
993 994
994 995 def ancestors(self, revs, stoprev=0, inclusive=False):
995 996 """Generate the ancestors of 'revs' in reverse topological order.
996 997 Does not generate revs lower than stoprev.
997 998
998 999 See the documentation for ancestor.lazyancestors for more details."""
999 1000
1000 1001 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1001 1002 inclusive=inclusive)
1002 1003
1003 1004 def descendants(self, revs):
1004 1005 """Generate the descendants of 'revs' in revision order.
1005 1006
1006 1007 Yield a sequence of revision numbers starting with a child of
1007 1008 some rev in revs, i.e., each revision is *not* considered a
1008 1009 descendant of itself. Results are ordered by revision number (a
1009 1010 topological sort)."""
1010 1011 first = min(revs)
1011 1012 if first == nullrev:
1012 1013 for i in self:
1013 1014 yield i
1014 1015 return
1015 1016
1016 1017 seen = set(revs)
1017 1018 for i in self.revs(start=first + 1):
1018 1019 for x in self.parentrevs(i):
1019 1020 if x != nullrev and x in seen:
1020 1021 seen.add(i)
1021 1022 yield i
1022 1023 break
1023 1024
1024 1025 def findcommonmissing(self, common=None, heads=None):
1025 1026 """Return a tuple of the ancestors of common and the ancestors of heads
1026 1027 that are not ancestors of common. In revset terminology, we return the
1027 1028 tuple:
1028 1029
1029 1030 ::common, (::heads) - (::common)
1030 1031
1031 1032 The list is sorted by revision number, meaning it is
1032 1033 topologically sorted.
1033 1034
1034 1035 'heads' and 'common' are both lists of node IDs. If heads is
1035 1036 not supplied, uses all of the revlog's heads. If common is not
1036 1037 supplied, uses nullid."""
1037 1038 if common is None:
1038 1039 common = [nullid]
1039 1040 if heads is None:
1040 1041 heads = self.heads()
1041 1042
1042 1043 common = [self.rev(n) for n in common]
1043 1044 heads = [self.rev(n) for n in heads]
1044 1045
1045 1046 # we want the ancestors, but inclusive
1046 1047 class lazyset(object):
1047 1048 def __init__(self, lazyvalues):
1048 1049 self.addedvalues = set()
1049 1050 self.lazyvalues = lazyvalues
1050 1051
1051 1052 def __contains__(self, value):
1052 1053 return value in self.addedvalues or value in self.lazyvalues
1053 1054
1054 1055 def __iter__(self):
1055 1056 added = self.addedvalues
1056 1057 for r in added:
1057 1058 yield r
1058 1059 for r in self.lazyvalues:
1059 1060 if not r in added:
1060 1061 yield r
1061 1062
1062 1063 def add(self, value):
1063 1064 self.addedvalues.add(value)
1064 1065
1065 1066 def update(self, values):
1066 1067 self.addedvalues.update(values)
1067 1068
1068 1069 has = lazyset(self.ancestors(common))
1069 1070 has.add(nullrev)
1070 1071 has.update(common)
1071 1072
1072 1073 # take all ancestors from heads that aren't in has
1073 1074 missing = set()
1074 1075 visit = collections.deque(r for r in heads if r not in has)
1075 1076 while visit:
1076 1077 r = visit.popleft()
1077 1078 if r in missing:
1078 1079 continue
1079 1080 else:
1080 1081 missing.add(r)
1081 1082 for p in self.parentrevs(r):
1082 1083 if p not in has:
1083 1084 visit.append(p)
1084 1085 missing = list(missing)
1085 1086 missing.sort()
1086 1087 return has, [self.node(miss) for miss in missing]
1087 1088
1088 1089 def incrementalmissingrevs(self, common=None):
1089 1090 """Return an object that can be used to incrementally compute the
1090 1091 revision numbers of the ancestors of arbitrary sets that are not
1091 1092 ancestors of common. This is an ancestor.incrementalmissingancestors
1092 1093 object.
1093 1094
1094 1095 'common' is a list of revision numbers. If common is not supplied, uses
1095 1096 nullrev.
1096 1097 """
1097 1098 if common is None:
1098 1099 common = [nullrev]
1099 1100
1100 1101 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1101 1102
1102 1103 def findmissingrevs(self, common=None, heads=None):
1103 1104 """Return the revision numbers of the ancestors of heads that
1104 1105 are not ancestors of common.
1105 1106
1106 1107 More specifically, return a list of revision numbers corresponding to
1107 1108 nodes N such that every N satisfies the following constraints:
1108 1109
1109 1110 1. N is an ancestor of some node in 'heads'
1110 1111 2. N is not an ancestor of any node in 'common'
1111 1112
1112 1113 The list is sorted by revision number, meaning it is
1113 1114 topologically sorted.
1114 1115
1115 1116 'heads' and 'common' are both lists of revision numbers. If heads is
1116 1117 not supplied, uses all of the revlog's heads. If common is not
1117 1118 supplied, uses nullid."""
1118 1119 if common is None:
1119 1120 common = [nullrev]
1120 1121 if heads is None:
1121 1122 heads = self.headrevs()
1122 1123
1123 1124 inc = self.incrementalmissingrevs(common=common)
1124 1125 return inc.missingancestors(heads)
1125 1126
1126 1127 def findmissing(self, common=None, heads=None):
1127 1128 """Return the ancestors of heads that are not ancestors of common.
1128 1129
1129 1130 More specifically, return a list of nodes N such that every N
1130 1131 satisfies the following constraints:
1131 1132
1132 1133 1. N is an ancestor of some node in 'heads'
1133 1134 2. N is not an ancestor of any node in 'common'
1134 1135
1135 1136 The list is sorted by revision number, meaning it is
1136 1137 topologically sorted.
1137 1138
1138 1139 'heads' and 'common' are both lists of node IDs. If heads is
1139 1140 not supplied, uses all of the revlog's heads. If common is not
1140 1141 supplied, uses nullid."""
1141 1142 if common is None:
1142 1143 common = [nullid]
1143 1144 if heads is None:
1144 1145 heads = self.heads()
1145 1146
1146 1147 common = [self.rev(n) for n in common]
1147 1148 heads = [self.rev(n) for n in heads]
1148 1149
1149 1150 inc = self.incrementalmissingrevs(common=common)
1150 1151 return [self.node(r) for r in inc.missingancestors(heads)]
1151 1152
1152 1153 def nodesbetween(self, roots=None, heads=None):
1153 1154 """Return a topological path from 'roots' to 'heads'.
1154 1155
1155 1156 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1156 1157 topologically sorted list of all nodes N that satisfy both of
1157 1158 these constraints:
1158 1159
1159 1160 1. N is a descendant of some node in 'roots'
1160 1161 2. N is an ancestor of some node in 'heads'
1161 1162
1162 1163 Every node is considered to be both a descendant and an ancestor
1163 1164 of itself, so every reachable node in 'roots' and 'heads' will be
1164 1165 included in 'nodes'.
1165 1166
1166 1167 'outroots' is the list of reachable nodes in 'roots', i.e., the
1167 1168 subset of 'roots' that is returned in 'nodes'. Likewise,
1168 1169 'outheads' is the subset of 'heads' that is also in 'nodes'.
1169 1170
1170 1171 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1171 1172 unspecified, uses nullid as the only root. If 'heads' is
1172 1173 unspecified, uses list of all of the revlog's heads."""
1173 1174 nonodes = ([], [], [])
1174 1175 if roots is not None:
1175 1176 roots = list(roots)
1176 1177 if not roots:
1177 1178 return nonodes
1178 1179 lowestrev = min([self.rev(n) for n in roots])
1179 1180 else:
1180 1181 roots = [nullid] # Everybody's a descendant of nullid
1181 1182 lowestrev = nullrev
1182 1183 if (lowestrev == nullrev) and (heads is None):
1183 1184 # We want _all_ the nodes!
1184 1185 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1185 1186 if heads is None:
1186 1187 # All nodes are ancestors, so the latest ancestor is the last
1187 1188 # node.
1188 1189 highestrev = len(self) - 1
1189 1190 # Set ancestors to None to signal that every node is an ancestor.
1190 1191 ancestors = None
1191 1192 # Set heads to an empty dictionary for later discovery of heads
1192 1193 heads = {}
1193 1194 else:
1194 1195 heads = list(heads)
1195 1196 if not heads:
1196 1197 return nonodes
1197 1198 ancestors = set()
1198 1199 # Turn heads into a dictionary so we can remove 'fake' heads.
1199 1200 # Also, later we will be using it to filter out the heads we can't
1200 1201 # find from roots.
1201 1202 heads = dict.fromkeys(heads, False)
1202 1203 # Start at the top and keep marking parents until we're done.
1203 1204 nodestotag = set(heads)
1204 1205 # Remember where the top was so we can use it as a limit later.
1205 1206 highestrev = max([self.rev(n) for n in nodestotag])
1206 1207 while nodestotag:
1207 1208 # grab a node to tag
1208 1209 n = nodestotag.pop()
1209 1210 # Never tag nullid
1210 1211 if n == nullid:
1211 1212 continue
1212 1213 # A node's revision number represents its place in a
1213 1214 # topologically sorted list of nodes.
1214 1215 r = self.rev(n)
1215 1216 if r >= lowestrev:
1216 1217 if n not in ancestors:
1217 1218 # If we are possibly a descendant of one of the roots
1218 1219 # and we haven't already been marked as an ancestor
1219 1220 ancestors.add(n) # Mark as ancestor
1220 1221 # Add non-nullid parents to list of nodes to tag.
1221 1222 nodestotag.update([p for p in self.parents(n) if
1222 1223 p != nullid])
1223 1224 elif n in heads: # We've seen it before, is it a fake head?
1224 1225 # So it is, real heads should not be the ancestors of
1225 1226 # any other heads.
1226 1227 heads.pop(n)
1227 1228 if not ancestors:
1228 1229 return nonodes
1229 1230 # Now that we have our set of ancestors, we want to remove any
1230 1231 # roots that are not ancestors.
1231 1232
1232 1233 # If one of the roots was nullid, everything is included anyway.
1233 1234 if lowestrev > nullrev:
1234 1235 # But, since we weren't, let's recompute the lowest rev to not
1235 1236 # include roots that aren't ancestors.
1236 1237
1237 1238 # Filter out roots that aren't ancestors of heads
1238 1239 roots = [root for root in roots if root in ancestors]
1239 1240 # Recompute the lowest revision
1240 1241 if roots:
1241 1242 lowestrev = min([self.rev(root) for root in roots])
1242 1243 else:
1243 1244 # No more roots? Return empty list
1244 1245 return nonodes
1245 1246 else:
1246 1247 # We are descending from nullid, and don't need to care about
1247 1248 # any other roots.
1248 1249 lowestrev = nullrev
1249 1250 roots = [nullid]
1250 1251 # Transform our roots list into a set.
1251 1252 descendants = set(roots)
1252 1253 # Also, keep the original roots so we can filter out roots that aren't
1253 1254 # 'real' roots (i.e. are descended from other roots).
1254 1255 roots = descendants.copy()
1255 1256 # Our topologically sorted list of output nodes.
1256 1257 orderedout = []
1257 1258 # Don't start at nullid since we don't want nullid in our output list,
1258 1259 # and if nullid shows up in descendants, empty parents will look like
1259 1260 # they're descendants.
1260 1261 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1261 1262 n = self.node(r)
1262 1263 isdescendant = False
1263 1264 if lowestrev == nullrev: # Everybody is a descendant of nullid
1264 1265 isdescendant = True
1265 1266 elif n in descendants:
1266 1267 # n is already a descendant
1267 1268 isdescendant = True
1268 1269 # This check only needs to be done here because all the roots
1269 1270 # will start being marked is descendants before the loop.
1270 1271 if n in roots:
1271 1272 # If n was a root, check if it's a 'real' root.
1272 1273 p = tuple(self.parents(n))
1273 1274 # If any of its parents are descendants, it's not a root.
1274 1275 if (p[0] in descendants) or (p[1] in descendants):
1275 1276 roots.remove(n)
1276 1277 else:
1277 1278 p = tuple(self.parents(n))
1278 1279 # A node is a descendant if either of its parents are
1279 1280 # descendants. (We seeded the dependents list with the roots
1280 1281 # up there, remember?)
1281 1282 if (p[0] in descendants) or (p[1] in descendants):
1282 1283 descendants.add(n)
1283 1284 isdescendant = True
1284 1285 if isdescendant and ((ancestors is None) or (n in ancestors)):
1285 1286 # Only include nodes that are both descendants and ancestors.
1286 1287 orderedout.append(n)
1287 1288 if (ancestors is not None) and (n in heads):
1288 1289 # We're trying to figure out which heads are reachable
1289 1290 # from roots.
1290 1291 # Mark this head as having been reached
1291 1292 heads[n] = True
1292 1293 elif ancestors is None:
1293 1294 # Otherwise, we're trying to discover the heads.
1294 1295 # Assume this is a head because if it isn't, the next step
1295 1296 # will eventually remove it.
1296 1297 heads[n] = True
1297 1298 # But, obviously its parents aren't.
1298 1299 for p in self.parents(n):
1299 1300 heads.pop(p, None)
1300 1301 heads = [head for head, flag in heads.iteritems() if flag]
1301 1302 roots = list(roots)
1302 1303 assert orderedout
1303 1304 assert roots
1304 1305 assert heads
1305 1306 return (orderedout, roots, heads)
1306 1307
1307 1308 def headrevs(self):
1308 1309 try:
1309 1310 return self.index.headrevs()
1310 1311 except AttributeError:
1311 1312 return self._headrevs()
1312 1313
1313 1314 def computephases(self, roots):
1314 1315 return self.index.computephasesmapsets(roots)
1315 1316
1316 1317 def _headrevs(self):
1317 1318 count = len(self)
1318 1319 if not count:
1319 1320 return [nullrev]
1320 1321 # we won't iter over filtered rev so nobody is a head at start
1321 1322 ishead = [0] * (count + 1)
1322 1323 index = self.index
1323 1324 for r in self:
1324 1325 ishead[r] = 1 # I may be an head
1325 1326 e = index[r]
1326 1327 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1327 1328 return [r for r, val in enumerate(ishead) if val]
1328 1329
1329 1330 def heads(self, start=None, stop=None):
1330 1331 """return the list of all nodes that have no children
1331 1332
1332 1333 if start is specified, only heads that are descendants of
1333 1334 start will be returned
1334 1335 if stop is specified, it will consider all the revs from stop
1335 1336 as if they had no children
1336 1337 """
1337 1338 if start is None and stop is None:
1338 1339 if not len(self):
1339 1340 return [nullid]
1340 1341 return [self.node(r) for r in self.headrevs()]
1341 1342
1342 1343 if start is None:
1343 1344 start = nullid
1344 1345 if stop is None:
1345 1346 stop = []
1346 1347 stoprevs = set([self.rev(n) for n in stop])
1347 1348 startrev = self.rev(start)
1348 1349 reachable = {startrev}
1349 1350 heads = {startrev}
1350 1351
1351 1352 parentrevs = self.parentrevs
1352 1353 for r in self.revs(start=startrev + 1):
1353 1354 for p in parentrevs(r):
1354 1355 if p in reachable:
1355 1356 if r not in stoprevs:
1356 1357 reachable.add(r)
1357 1358 heads.add(r)
1358 1359 if p in heads and p not in stoprevs:
1359 1360 heads.remove(p)
1360 1361
1361 1362 return [self.node(r) for r in heads]
1362 1363
1363 1364 def children(self, node):
1364 1365 """find the children of a given node"""
1365 1366 c = []
1366 1367 p = self.rev(node)
1367 1368 for r in self.revs(start=p + 1):
1368 1369 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1369 1370 if prevs:
1370 1371 for pr in prevs:
1371 1372 if pr == p:
1372 1373 c.append(self.node(r))
1373 1374 elif p == nullrev:
1374 1375 c.append(self.node(r))
1375 1376 return c
1376 1377
1377 1378 def descendant(self, start, end):
1378 1379 if start == nullrev:
1379 1380 return True
1380 1381 for i in self.descendants([start]):
1381 1382 if i == end:
1382 1383 return True
1383 1384 elif i > end:
1384 1385 break
1385 1386 return False
1386 1387
1387 1388 def commonancestorsheads(self, a, b):
1388 1389 """calculate all the heads of the common ancestors of nodes a and b"""
1389 1390 a, b = self.rev(a), self.rev(b)
1390 1391 try:
1391 1392 ancs = self.index.commonancestorsheads(a, b)
1392 1393 except (AttributeError, OverflowError): # C implementation failed
1393 1394 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1394 1395 return pycompat.maplist(self.node, ancs)
1395 1396
1396 1397 def isancestor(self, a, b):
1397 1398 """return True if node a is an ancestor of node b
1398 1399
1399 1400 The implementation of this is trivial but the use of
1400 1401 commonancestorsheads is not."""
1401 1402 return a in self.commonancestorsheads(a, b)
1402 1403
1403 1404 def ancestor(self, a, b):
1404 1405 """calculate the "best" common ancestor of nodes a and b"""
1405 1406
1406 1407 a, b = self.rev(a), self.rev(b)
1407 1408 try:
1408 1409 ancs = self.index.ancestors(a, b)
1409 1410 except (AttributeError, OverflowError):
1410 1411 ancs = ancestor.ancestors(self.parentrevs, a, b)
1411 1412 if ancs:
1412 1413 # choose a consistent winner when there's a tie
1413 1414 return min(map(self.node, ancs))
1414 1415 return nullid
1415 1416
1416 1417 def _match(self, id):
1417 1418 if isinstance(id, int):
1418 1419 # rev
1419 1420 return self.node(id)
1420 1421 if len(id) == 20:
1421 1422 # possibly a binary node
1422 1423 # odds of a binary node being all hex in ASCII are 1 in 10**25
1423 1424 try:
1424 1425 node = id
1425 1426 self.rev(node) # quick search the index
1426 1427 return node
1427 1428 except LookupError:
1428 1429 pass # may be partial hex id
1429 1430 try:
1430 1431 # str(rev)
1431 1432 rev = int(id)
1432 1433 if "%d" % rev != id:
1433 1434 raise ValueError
1434 1435 if rev < 0:
1435 1436 rev = len(self) + rev
1436 1437 if rev < 0 or rev >= len(self):
1437 1438 raise ValueError
1438 1439 return self.node(rev)
1439 1440 except (ValueError, OverflowError):
1440 1441 pass
1441 1442 if len(id) == 40:
1442 1443 try:
1443 1444 # a full hex nodeid?
1444 1445 node = bin(id)
1445 1446 self.rev(node)
1446 1447 return node
1447 1448 except (TypeError, LookupError):
1448 1449 pass
1449 1450
1450 1451 def _partialmatch(self, id):
1451 1452 # we don't care wdirfilenodeids as they should be always full hash
1452 1453 maybewdir = wdirhex.startswith(id)
1453 1454 try:
1454 1455 partial = self.index.partialmatch(id)
1455 1456 if partial and self.hasnode(partial):
1456 1457 if maybewdir:
1457 1458 # single 'ff...' match in radix tree, ambiguous with wdir
1458 1459 raise RevlogError
1459 1460 return partial
1460 1461 if maybewdir:
1461 1462 # no 'ff...' match in radix tree, wdir identified
1462 1463 raise error.WdirUnsupported
1463 1464 return None
1464 1465 except RevlogError:
1465 1466 # parsers.c radix tree lookup gave multiple matches
1466 1467 # fast path: for unfiltered changelog, radix tree is accurate
1467 1468 if not getattr(self, 'filteredrevs', None):
1468 1469 raise LookupError(id, self.indexfile,
1469 1470 _('ambiguous identifier'))
1470 1471 # fall through to slow path that filters hidden revisions
1471 1472 except (AttributeError, ValueError):
1472 1473 # we are pure python, or key was too short to search radix tree
1473 1474 pass
1474 1475
1475 1476 if id in self._pcache:
1476 1477 return self._pcache[id]
1477 1478
1478 1479 if len(id) <= 40:
1479 1480 try:
1480 1481 # hex(node)[:...]
1481 1482 l = len(id) // 2 # grab an even number of digits
1482 1483 prefix = bin(id[:l * 2])
1483 1484 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1484 1485 nl = [n for n in nl if hex(n).startswith(id) and
1485 1486 self.hasnode(n)]
1486 1487 if len(nl) > 0:
1487 1488 if len(nl) == 1 and not maybewdir:
1488 1489 self._pcache[id] = nl[0]
1489 1490 return nl[0]
1490 1491 raise LookupError(id, self.indexfile,
1491 1492 _('ambiguous identifier'))
1492 1493 if maybewdir:
1493 1494 raise error.WdirUnsupported
1494 1495 return None
1495 1496 except TypeError:
1496 1497 pass
1497 1498
1498 1499 def lookup(self, id):
1499 1500 """locate a node based on:
1500 1501 - revision number or str(revision number)
1501 1502 - nodeid or subset of hex nodeid
1502 1503 """
1503 1504 n = self._match(id)
1504 1505 if n is not None:
1505 1506 return n
1506 1507 n = self._partialmatch(id)
1507 1508 if n:
1508 1509 return n
1509 1510
1510 1511 raise LookupError(id, self.indexfile, _('no match found'))
1511 1512
1512 1513 def shortest(self, node, minlength=1):
1513 1514 """Find the shortest unambiguous prefix that matches node."""
1514 1515 def isvalid(prefix):
1515 1516 try:
1516 1517 node = self._partialmatch(prefix)
1517 1518 except error.RevlogError:
1518 1519 return False
1519 1520 except error.WdirUnsupported:
1520 1521 # single 'ff...' match
1521 1522 return True
1522 1523 if node is None:
1523 1524 raise LookupError(node, self.indexfile, _('no node'))
1524 1525 return True
1525 1526
1526 1527 def maybewdir(prefix):
1527 1528 return all(c == 'f' for c in prefix)
1528 1529
1529 1530 hexnode = hex(node)
1530 1531
1531 1532 def disambiguate(hexnode, minlength):
1532 1533 """Disambiguate against wdirid."""
1533 1534 for length in range(minlength, 41):
1534 1535 prefix = hexnode[:length]
1535 1536 if not maybewdir(prefix):
1536 1537 return prefix
1537 1538
1538 1539 if not getattr(self, 'filteredrevs', None):
1539 1540 try:
1540 1541 length = max(self.index.shortest(node), minlength)
1541 1542 return disambiguate(hexnode, length)
1542 1543 except RevlogError:
1543 1544 if node != wdirid:
1544 1545 raise LookupError(node, self.indexfile, _('no node'))
1545 1546 except AttributeError:
1546 1547 # Fall through to pure code
1547 1548 pass
1548 1549
1549 1550 if node == wdirid:
1550 1551 for length in range(minlength, 41):
1551 1552 prefix = hexnode[:length]
1552 1553 if isvalid(prefix):
1553 1554 return prefix
1554 1555
1555 1556 for length in range(minlength, 41):
1556 1557 prefix = hexnode[:length]
1557 1558 if isvalid(prefix):
1558 1559 return disambiguate(hexnode, length)
1559 1560
1560 1561 def cmp(self, node, text):
1561 1562 """compare text with a given file revision
1562 1563
1563 1564 returns True if text is different than what is stored.
1564 1565 """
1565 1566 p1, p2 = self.parents(node)
1566 1567 return hash(text, p1, p2) != node
1567 1568
1568 1569 def _cachesegment(self, offset, data):
1569 1570 """Add a segment to the revlog cache.
1570 1571
1571 1572 Accepts an absolute offset and the data that is at that location.
1572 1573 """
1573 1574 o, d = self._chunkcache
1574 1575 # try to add to existing cache
1575 1576 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1576 1577 self._chunkcache = o, d + data
1577 1578 else:
1578 1579 self._chunkcache = offset, data
1579 1580
1580 1581 def _readsegment(self, offset, length, df=None):
1581 1582 """Load a segment of raw data from the revlog.
1582 1583
1583 1584 Accepts an absolute offset, length to read, and an optional existing
1584 1585 file handle to read from.
1585 1586
1586 1587 If an existing file handle is passed, it will be seeked and the
1587 1588 original seek position will NOT be restored.
1588 1589
1589 1590 Returns a str or buffer of raw byte data.
1590 1591 """
1591 1592 # Cache data both forward and backward around the requested
1592 1593 # data, in a fixed size window. This helps speed up operations
1593 1594 # involving reading the revlog backwards.
1594 1595 cachesize = self._chunkcachesize
1595 1596 realoffset = offset & ~(cachesize - 1)
1596 1597 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1597 1598 - realoffset)
1598 1599 with self._datareadfp(df) as df:
1599 1600 df.seek(realoffset)
1600 1601 d = df.read(reallength)
1601 1602 self._cachesegment(realoffset, d)
1602 1603 if offset != realoffset or reallength != length:
1603 1604 return util.buffer(d, offset - realoffset, length)
1604 1605 return d
1605 1606
1606 1607 def _getsegment(self, offset, length, df=None):
1607 1608 """Obtain a segment of raw data from the revlog.
1608 1609
1609 1610 Accepts an absolute offset, length of bytes to obtain, and an
1610 1611 optional file handle to the already-opened revlog. If the file
1611 1612 handle is used, it's original seek position will not be preserved.
1612 1613
1613 1614 Requests for data may be returned from a cache.
1614 1615
1615 1616 Returns a str or a buffer instance of raw byte data.
1616 1617 """
1617 1618 o, d = self._chunkcache
1618 1619 l = len(d)
1619 1620
1620 1621 # is it in the cache?
1621 1622 cachestart = offset - o
1622 1623 cacheend = cachestart + length
1623 1624 if cachestart >= 0 and cacheend <= l:
1624 1625 if cachestart == 0 and cacheend == l:
1625 1626 return d # avoid a copy
1626 1627 return util.buffer(d, cachestart, cacheend - cachestart)
1627 1628
1628 1629 return self._readsegment(offset, length, df=df)
1629 1630
1630 1631 def _getsegmentforrevs(self, startrev, endrev, df=None):
1631 1632 """Obtain a segment of raw data corresponding to a range of revisions.
1632 1633
1633 1634 Accepts the start and end revisions and an optional already-open
1634 1635 file handle to be used for reading. If the file handle is read, its
1635 1636 seek position will not be preserved.
1636 1637
1637 1638 Requests for data may be satisfied by a cache.
1638 1639
1639 1640 Returns a 2-tuple of (offset, data) for the requested range of
1640 1641 revisions. Offset is the integer offset from the beginning of the
1641 1642 revlog and data is a str or buffer of the raw byte data.
1642 1643
1643 1644 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1644 1645 to determine where each revision's data begins and ends.
1645 1646 """
1646 1647 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1647 1648 # (functions are expensive).
1648 1649 index = self.index
1649 1650 istart = index[startrev]
1650 1651 start = int(istart[0] >> 16)
1651 1652 if startrev == endrev:
1652 1653 end = start + istart[1]
1653 1654 else:
1654 1655 iend = index[endrev]
1655 1656 end = int(iend[0] >> 16) + iend[1]
1656 1657
1657 1658 if self._inline:
1658 1659 start += (startrev + 1) * self._io.size
1659 1660 end += (endrev + 1) * self._io.size
1660 1661 length = end - start
1661 1662
1662 1663 return start, self._getsegment(start, length, df=df)
1663 1664
1664 1665 def _chunk(self, rev, df=None):
1665 1666 """Obtain a single decompressed chunk for a revision.
1666 1667
1667 1668 Accepts an integer revision and an optional already-open file handle
1668 1669 to be used for reading. If used, the seek position of the file will not
1669 1670 be preserved.
1670 1671
1671 1672 Returns a str holding uncompressed data for the requested revision.
1672 1673 """
1673 1674 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1674 1675
1675 1676 def _chunks(self, revs, df=None):
1676 1677 """Obtain decompressed chunks for the specified revisions.
1677 1678
1678 1679 Accepts an iterable of numeric revisions that are assumed to be in
1679 1680 ascending order. Also accepts an optional already-open file handle
1680 1681 to be used for reading. If used, the seek position of the file will
1681 1682 not be preserved.
1682 1683
1683 1684 This function is similar to calling ``self._chunk()`` multiple times,
1684 1685 but is faster.
1685 1686
1686 1687 Returns a list with decompressed data for each requested revision.
1687 1688 """
1688 1689 if not revs:
1689 1690 return []
1690 1691 start = self.start
1691 1692 length = self.length
1692 1693 inline = self._inline
1693 1694 iosize = self._io.size
1694 1695 buffer = util.buffer
1695 1696
1696 1697 l = []
1697 1698 ladd = l.append
1698 1699
1699 1700 if not self._withsparseread:
1700 1701 slicedchunks = (revs,)
1701 1702 else:
1702 1703 slicedchunks = _slicechunk(self, revs)
1703 1704
1704 1705 for revschunk in slicedchunks:
1705 1706 firstrev = revschunk[0]
1706 1707 # Skip trailing revisions with empty diff
1707 1708 for lastrev in revschunk[::-1]:
1708 1709 if length(lastrev) != 0:
1709 1710 break
1710 1711
1711 1712 try:
1712 1713 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1713 1714 except OverflowError:
1714 1715 # issue4215 - we can't cache a run of chunks greater than
1715 1716 # 2G on Windows
1716 1717 return [self._chunk(rev, df=df) for rev in revschunk]
1717 1718
1718 1719 decomp = self.decompress
1719 1720 for rev in revschunk:
1720 1721 chunkstart = start(rev)
1721 1722 if inline:
1722 1723 chunkstart += (rev + 1) * iosize
1723 1724 chunklength = length(rev)
1724 1725 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1725 1726
1726 1727 return l
1727 1728
1728 1729 def _chunkclear(self):
1729 1730 """Clear the raw chunk cache."""
1730 1731 self._chunkcache = (0, '')
1731 1732
1732 1733 def deltaparent(self, rev):
1733 1734 """return deltaparent of the given revision"""
1734 1735 base = self.index[rev][3]
1735 1736 if base == rev:
1736 1737 return nullrev
1737 1738 elif self._generaldelta:
1738 1739 return base
1739 1740 else:
1740 1741 return rev - 1
1741 1742
1742 1743 def revdiff(self, rev1, rev2):
1743 1744 """return or calculate a delta between two revisions
1744 1745
1745 1746 The delta calculated is in binary form and is intended to be written to
1746 1747 revlog data directly. So this function needs raw revision data.
1747 1748 """
1748 1749 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1749 1750 return bytes(self._chunk(rev2))
1750 1751
1751 1752 return mdiff.textdiff(self.revision(rev1, raw=True),
1752 1753 self.revision(rev2, raw=True))
1753 1754
1754 1755 def revision(self, nodeorrev, _df=None, raw=False):
1755 1756 """return an uncompressed revision of a given node or revision
1756 1757 number.
1757 1758
1758 1759 _df - an existing file handle to read from. (internal-only)
1759 1760 raw - an optional argument specifying if the revision data is to be
1760 1761 treated as raw data when applying flag transforms. 'raw' should be set
1761 1762 to True when generating changegroups or in debug commands.
1762 1763 """
1763 1764 if isinstance(nodeorrev, int):
1764 1765 rev = nodeorrev
1765 1766 node = self.node(rev)
1766 1767 else:
1767 1768 node = nodeorrev
1768 1769 rev = None
1769 1770
1770 1771 cachedrev = None
1771 1772 flags = None
1772 1773 rawtext = None
1773 1774 if node == nullid:
1774 1775 return ""
1775 1776 if self._cache:
1776 1777 if self._cache[0] == node:
1777 1778 # _cache only stores rawtext
1778 1779 if raw:
1779 1780 return self._cache[2]
1780 1781 # duplicated, but good for perf
1781 1782 if rev is None:
1782 1783 rev = self.rev(node)
1783 1784 if flags is None:
1784 1785 flags = self.flags(rev)
1785 1786 # no extra flags set, no flag processor runs, text = rawtext
1786 1787 if flags == REVIDX_DEFAULT_FLAGS:
1787 1788 return self._cache[2]
1788 1789 # rawtext is reusable. need to run flag processor
1789 1790 rawtext = self._cache[2]
1790 1791
1791 1792 cachedrev = self._cache[1]
1792 1793
1793 1794 # look up what we need to read
1794 1795 if rawtext is None:
1795 1796 if rev is None:
1796 1797 rev = self.rev(node)
1797 1798
1798 1799 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1799 1800 if stopped:
1800 1801 rawtext = self._cache[2]
1801 1802
1802 1803 # drop cache to save memory
1803 1804 self._cache = None
1804 1805
1805 1806 bins = self._chunks(chain, df=_df)
1806 1807 if rawtext is None:
1807 1808 rawtext = bytes(bins[0])
1808 1809 bins = bins[1:]
1809 1810
1810 1811 rawtext = mdiff.patches(rawtext, bins)
1811 1812 self._cache = (node, rev, rawtext)
1812 1813
1813 1814 if flags is None:
1814 1815 if rev is None:
1815 1816 rev = self.rev(node)
1816 1817 flags = self.flags(rev)
1817 1818
1818 1819 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1819 1820 if validatehash:
1820 1821 self.checkhash(text, node, rev=rev)
1821 1822
1822 1823 return text
1823 1824
1824 1825 def hash(self, text, p1, p2):
1825 1826 """Compute a node hash.
1826 1827
1827 1828 Available as a function so that subclasses can replace the hash
1828 1829 as needed.
1829 1830 """
1830 1831 return hash(text, p1, p2)
1831 1832
1832 1833 def _processflags(self, text, flags, operation, raw=False):
1833 1834 """Inspect revision data flags and applies transforms defined by
1834 1835 registered flag processors.
1835 1836
1836 1837 ``text`` - the revision data to process
1837 1838 ``flags`` - the revision flags
1838 1839 ``operation`` - the operation being performed (read or write)
1839 1840 ``raw`` - an optional argument describing if the raw transform should be
1840 1841 applied.
1841 1842
1842 1843 This method processes the flags in the order (or reverse order if
1843 1844 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1844 1845 flag processors registered for present flags. The order of flags defined
1845 1846 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1846 1847
1847 1848 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1848 1849 processed text and ``validatehash`` is a bool indicating whether the
1849 1850 returned text should be checked for hash integrity.
1850 1851
1851 1852 Note: If the ``raw`` argument is set, it has precedence over the
1852 1853 operation and will only update the value of ``validatehash``.
1853 1854 """
1854 1855 # fast path: no flag processors will run
1855 1856 if flags == 0:
1856 1857 return text, True
1857 1858 if not operation in ('read', 'write'):
1858 1859 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1859 1860 # Check all flags are known.
1860 1861 if flags & ~REVIDX_KNOWN_FLAGS:
1861 1862 raise RevlogError(_("incompatible revision flag '%#x'") %
1862 1863 (flags & ~REVIDX_KNOWN_FLAGS))
1863 1864 validatehash = True
1864 1865 # Depending on the operation (read or write), the order might be
1865 1866 # reversed due to non-commutative transforms.
1866 1867 orderedflags = REVIDX_FLAGS_ORDER
1867 1868 if operation == 'write':
1868 1869 orderedflags = reversed(orderedflags)
1869 1870
1870 1871 for flag in orderedflags:
1871 1872 # If a flagprocessor has been registered for a known flag, apply the
1872 1873 # related operation transform and update result tuple.
1873 1874 if flag & flags:
1874 1875 vhash = True
1875 1876
1876 1877 if flag not in _flagprocessors:
1877 1878 message = _("missing processor for flag '%#x'") % (flag)
1878 1879 raise RevlogError(message)
1879 1880
1880 1881 processor = _flagprocessors[flag]
1881 1882 if processor is not None:
1882 1883 readtransform, writetransform, rawtransform = processor
1883 1884
1884 1885 if raw:
1885 1886 vhash = rawtransform(self, text)
1886 1887 elif operation == 'read':
1887 1888 text, vhash = readtransform(self, text)
1888 1889 else: # write operation
1889 1890 text, vhash = writetransform(self, text)
1890 1891 validatehash = validatehash and vhash
1891 1892
1892 1893 return text, validatehash
1893 1894
1894 1895 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1895 1896 """Check node hash integrity.
1896 1897
1897 1898 Available as a function so that subclasses can extend hash mismatch
1898 1899 behaviors as needed.
1899 1900 """
1900 1901 try:
1901 1902 if p1 is None and p2 is None:
1902 1903 p1, p2 = self.parents(node)
1903 1904 if node != self.hash(text, p1, p2):
1904 1905 revornode = rev
1905 1906 if revornode is None:
1906 1907 revornode = templatefilters.short(hex(node))
1907 1908 raise RevlogError(_("integrity check failed on %s:%s")
1908 1909 % (self.indexfile, pycompat.bytestr(revornode)))
1909 1910 except RevlogError:
1910 1911 if self._censorable and _censoredtext(text):
1911 1912 raise error.CensoredNodeError(self.indexfile, node, text)
1912 1913 raise
1913 1914
1914 1915 def _enforceinlinesize(self, tr, fp=None):
1915 1916 """Check if the revlog is too big for inline and convert if so.
1916 1917
1917 1918 This should be called after revisions are added to the revlog. If the
1918 1919 revlog has grown too large to be an inline revlog, it will convert it
1919 1920 to use multiple index and data files.
1920 1921 """
1921 1922 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1922 1923 return
1923 1924
1924 1925 trinfo = tr.find(self.indexfile)
1925 1926 if trinfo is None:
1926 1927 raise RevlogError(_("%s not found in the transaction")
1927 1928 % self.indexfile)
1928 1929
1929 1930 trindex = trinfo[2]
1930 1931 if trindex is not None:
1931 1932 dataoff = self.start(trindex)
1932 1933 else:
1933 1934 # revlog was stripped at start of transaction, use all leftover data
1934 1935 trindex = len(self) - 1
1935 1936 dataoff = self.end(-2)
1936 1937
1937 1938 tr.add(self.datafile, dataoff)
1938 1939
1939 1940 if fp:
1940 1941 fp.flush()
1941 1942 fp.close()
1942 1943
1943 1944 with self._datafp('w') as df:
1944 1945 for r in self:
1945 1946 df.write(self._getsegmentforrevs(r, r)[1])
1946 1947
1947 1948 with self._indexfp('w') as fp:
1948 1949 self.version &= ~FLAG_INLINE_DATA
1949 1950 self._inline = False
1950 1951 io = self._io
1951 1952 for i in self:
1952 1953 e = io.packentry(self.index[i], self.node, self.version, i)
1953 1954 fp.write(e)
1954 1955
1955 1956 # the temp file replace the real index when we exit the context
1956 1957 # manager
1957 1958
1958 1959 tr.replace(self.indexfile, trindex * self._io.size)
1959 1960 self._chunkclear()
1960 1961
1961 1962 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1962 1963 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1963 1964 """add a revision to the log
1964 1965
1965 1966 text - the revision data to add
1966 1967 transaction - the transaction object used for rollback
1967 1968 link - the linkrev data to add
1968 1969 p1, p2 - the parent nodeids of the revision
1969 1970 cachedelta - an optional precomputed delta
1970 1971 node - nodeid of revision; typically node is not specified, and it is
1971 1972 computed by default as hash(text, p1, p2), however subclasses might
1972 1973 use different hashing method (and override checkhash() in such case)
1973 1974 flags - the known flags to set on the revision
1974 1975 deltacomputer - an optional _deltacomputer instance shared between
1975 1976 multiple calls
1976 1977 """
1977 1978 if link == nullrev:
1978 1979 raise RevlogError(_("attempted to add linkrev -1 to %s")
1979 1980 % self.indexfile)
1980 1981
1981 1982 if flags:
1982 1983 node = node or self.hash(text, p1, p2)
1983 1984
1984 1985 rawtext, validatehash = self._processflags(text, flags, 'write')
1985 1986
1986 1987 # If the flag processor modifies the revision data, ignore any provided
1987 1988 # cachedelta.
1988 1989 if rawtext != text:
1989 1990 cachedelta = None
1990 1991
1991 1992 if len(rawtext) > _maxentrysize:
1992 1993 raise RevlogError(
1993 1994 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1994 1995 % (self.indexfile, len(rawtext)))
1995 1996
1996 1997 node = node or self.hash(rawtext, p1, p2)
1997 1998 if node in self.nodemap:
1998 1999 return node
1999 2000
2000 2001 if validatehash:
2001 2002 self.checkhash(rawtext, node, p1=p1, p2=p2)
2002 2003
2003 2004 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2004 2005 flags, cachedelta=cachedelta,
2005 2006 deltacomputer=deltacomputer)
2006 2007
2007 2008 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2008 2009 cachedelta=None, deltacomputer=None):
2009 2010 """add a raw revision with known flags, node and parents
2010 2011 useful when reusing a revision not stored in this revlog (ex: received
2011 2012 over wire, or read from an external bundle).
2012 2013 """
2013 2014 dfh = None
2014 2015 if not self._inline:
2015 2016 dfh = self._datafp("a+")
2016 2017 ifh = self._indexfp("a+")
2017 2018 try:
2018 2019 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2019 2020 flags, cachedelta, ifh, dfh,
2020 2021 deltacomputer=deltacomputer)
2021 2022 finally:
2022 2023 if dfh:
2023 2024 dfh.close()
2024 2025 ifh.close()
2025 2026
2026 2027 def compress(self, data):
2027 2028 """Generate a possibly-compressed representation of data."""
2028 2029 if not data:
2029 2030 return '', data
2030 2031
2031 2032 compressed = self._compressor.compress(data)
2032 2033
2033 2034 if compressed:
2034 2035 # The revlog compressor added the header in the returned data.
2035 2036 return '', compressed
2036 2037
2037 2038 if data[0:1] == '\0':
2038 2039 return '', data
2039 2040 return 'u', data
2040 2041
2041 2042 def decompress(self, data):
2042 2043 """Decompress a revlog chunk.
2043 2044
2044 2045 The chunk is expected to begin with a header identifying the
2045 2046 format type so it can be routed to an appropriate decompressor.
2046 2047 """
2047 2048 if not data:
2048 2049 return data
2049 2050
2050 2051 # Revlogs are read much more frequently than they are written and many
2051 2052 # chunks only take microseconds to decompress, so performance is
2052 2053 # important here.
2053 2054 #
2054 2055 # We can make a few assumptions about revlogs:
2055 2056 #
2056 2057 # 1) the majority of chunks will be compressed (as opposed to inline
2057 2058 # raw data).
2058 2059 # 2) decompressing *any* data will likely by at least 10x slower than
2059 2060 # returning raw inline data.
2060 2061 # 3) we want to prioritize common and officially supported compression
2061 2062 # engines
2062 2063 #
2063 2064 # It follows that we want to optimize for "decompress compressed data
2064 2065 # when encoded with common and officially supported compression engines"
2065 2066 # case over "raw data" and "data encoded by less common or non-official
2066 2067 # compression engines." That is why we have the inline lookup first
2067 2068 # followed by the compengines lookup.
2068 2069 #
2069 2070 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2070 2071 # compressed chunks. And this matters for changelog and manifest reads.
2071 2072 t = data[0:1]
2072 2073
2073 2074 if t == 'x':
2074 2075 try:
2075 2076 return _zlibdecompress(data)
2076 2077 except zlib.error as e:
2077 2078 raise RevlogError(_('revlog decompress error: %s') %
2078 2079 stringutil.forcebytestr(e))
2079 2080 # '\0' is more common than 'u' so it goes first.
2080 2081 elif t == '\0':
2081 2082 return data
2082 2083 elif t == 'u':
2083 2084 return util.buffer(data, 1)
2084 2085
2085 2086 try:
2086 2087 compressor = self._decompressors[t]
2087 2088 except KeyError:
2088 2089 try:
2089 2090 engine = util.compengines.forrevlogheader(t)
2090 2091 compressor = engine.revlogcompressor()
2091 2092 self._decompressors[t] = compressor
2092 2093 except KeyError:
2093 2094 raise RevlogError(_('unknown compression type %r') % t)
2094 2095
2095 2096 return compressor.decompress(data)
2096 2097
2097 2098 def _isgooddeltainfo(self, deltainfo, revinfo):
2098 2099 """Returns True if the given delta is good. Good means that it is within
2099 2100 the disk span, disk size, and chain length bounds that we know to be
2100 2101 performant."""
2101 2102 if deltainfo is None:
2102 2103 return False
2103 2104
2104 2105 # - 'deltainfo.distance' is the distance from the base revision --
2105 2106 # bounding it limits the amount of I/O we need to do.
2106 2107 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2107 2108 # deltas we need to apply -- bounding it limits the amount of CPU
2108 2109 # we consume.
2109 2110
2110 2111 textlen = revinfo.textlen
2111 2112 defaultmax = textlen * 4
2112 2113 maxdist = self._maxdeltachainspan
2113 2114 if not maxdist:
2114 2115 maxdist = deltainfo.distance # ensure the conditional pass
2115 2116 maxdist = max(maxdist, defaultmax)
2116 2117 if (deltainfo.distance > maxdist or deltainfo.deltalen > textlen or
2117 2118 deltainfo.compresseddeltalen > textlen * 2 or
2118 2119 (self._maxchainlen and deltainfo.chainlen > self._maxchainlen)):
2119 2120 return False
2120 2121
2121 2122 return True
2122 2123
2123 2124 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2124 2125 cachedelta, ifh, dfh, alwayscache=False,
2125 2126 deltacomputer=None):
2126 2127 """internal function to add revisions to the log
2127 2128
2128 2129 see addrevision for argument descriptions.
2129 2130
2130 2131 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2131 2132
2132 2133 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2133 2134 be used.
2134 2135
2135 2136 invariants:
2136 2137 - rawtext is optional (can be None); if not set, cachedelta must be set.
2137 2138 if both are set, they must correspond to each other.
2138 2139 """
2139 2140 if node == nullid:
2140 2141 raise RevlogError(_("%s: attempt to add null revision") %
2141 2142 (self.indexfile))
2142 2143 if node == wdirid or node in wdirfilenodeids:
2143 2144 raise RevlogError(_("%s: attempt to add wdir revision") %
2144 2145 (self.indexfile))
2145 2146
2146 2147 if self._inline:
2147 2148 fh = ifh
2148 2149 else:
2149 2150 fh = dfh
2150 2151
2151 2152 btext = [rawtext]
2152 2153
2153 2154 curr = len(self)
2154 2155 prev = curr - 1
2155 2156 offset = self.end(prev)
2156 2157 p1r, p2r = self.rev(p1), self.rev(p2)
2157 2158
2158 2159 # full versions are inserted when the needed deltas
2159 2160 # become comparable to the uncompressed text
2160 2161 if rawtext is None:
2161 2162 # need rawtext size, before changed by flag processors, which is
2162 2163 # the non-raw size. use revlog explicitly to avoid filelog's extra
2163 2164 # logic that might remove metadata size.
2164 2165 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2165 2166 cachedelta[1])
2166 2167 else:
2167 2168 textlen = len(rawtext)
2168 2169
2169 2170 if deltacomputer is None:
2170 2171 deltacomputer = _deltacomputer(self)
2171 2172
2172 2173 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2173 2174
2174 2175 # no delta for flag processor revision (see "candelta" for why)
2175 2176 # not calling candelta since only one revision needs test, also to
2176 2177 # avoid overhead fetching flags again.
2177 2178 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2178 2179 deltainfo = None
2179 2180 else:
2180 2181 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2181 2182
2182 2183 if deltainfo is not None:
2183 2184 base = deltainfo.base
2184 2185 chainbase = deltainfo.chainbase
2185 2186 data = deltainfo.data
2186 2187 l = deltainfo.deltalen
2187 2188 else:
2188 2189 rawtext = deltacomputer.buildtext(revinfo, fh)
2189 2190 data = self.compress(rawtext)
2190 2191 l = len(data[1]) + len(data[0])
2191 2192 base = chainbase = curr
2192 2193
2193 2194 e = (offset_type(offset, flags), l, textlen,
2194 2195 base, link, p1r, p2r, node)
2195 2196 self.index.insert(-1, e)
2196 2197 self.nodemap[node] = curr
2197 2198
2198 2199 entry = self._io.packentry(e, self.node, self.version, curr)
2199 2200 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2200 2201
2201 2202 if alwayscache and rawtext is None:
2202 2203 rawtext = deltacomputer._buildtext(revinfo, fh)
2203 2204
2204 2205 if type(rawtext) == bytes: # only accept immutable objects
2205 2206 self._cache = (node, curr, rawtext)
2206 2207 self._chainbasecache[curr] = chainbase
2207 2208 return node
2208 2209
2209 2210 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2210 2211 # Files opened in a+ mode have inconsistent behavior on various
2211 2212 # platforms. Windows requires that a file positioning call be made
2212 2213 # when the file handle transitions between reads and writes. See
2213 2214 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2214 2215 # platforms, Python or the platform itself can be buggy. Some versions
2215 2216 # of Solaris have been observed to not append at the end of the file
2216 2217 # if the file was seeked to before the end. See issue4943 for more.
2217 2218 #
2218 2219 # We work around this issue by inserting a seek() before writing.
2219 2220 # Note: This is likely not necessary on Python 3.
2220 2221 ifh.seek(0, os.SEEK_END)
2221 2222 if dfh:
2222 2223 dfh.seek(0, os.SEEK_END)
2223 2224
2224 2225 curr = len(self) - 1
2225 2226 if not self._inline:
2226 2227 transaction.add(self.datafile, offset)
2227 2228 transaction.add(self.indexfile, curr * len(entry))
2228 2229 if data[0]:
2229 2230 dfh.write(data[0])
2230 2231 dfh.write(data[1])
2231 2232 ifh.write(entry)
2232 2233 else:
2233 2234 offset += curr * self._io.size
2234 2235 transaction.add(self.indexfile, offset, curr)
2235 2236 ifh.write(entry)
2236 2237 ifh.write(data[0])
2237 2238 ifh.write(data[1])
2238 2239 self._enforceinlinesize(transaction, ifh)
2239 2240
2240 2241 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2241 2242 """
2242 2243 add a delta group
2243 2244
2244 2245 given a set of deltas, add them to the revision log. the
2245 2246 first delta is against its parent, which should be in our
2246 2247 log, the rest are against the previous delta.
2247 2248
2248 2249 If ``addrevisioncb`` is defined, it will be called with arguments of
2249 2250 this revlog and the node that was added.
2250 2251 """
2251 2252
2252 2253 nodes = []
2253 2254
2254 2255 r = len(self)
2255 2256 end = 0
2256 2257 if r:
2257 2258 end = self.end(r - 1)
2258 2259 ifh = self._indexfp("a+")
2259 2260 isize = r * self._io.size
2260 2261 if self._inline:
2261 2262 transaction.add(self.indexfile, end + isize, r)
2262 2263 dfh = None
2263 2264 else:
2264 2265 transaction.add(self.indexfile, isize, r)
2265 2266 transaction.add(self.datafile, end)
2266 2267 dfh = self._datafp("a+")
2267 2268 def flush():
2268 2269 if dfh:
2269 2270 dfh.flush()
2270 2271 ifh.flush()
2271 2272 try:
2272 2273 deltacomputer = _deltacomputer(self)
2273 2274 # loop through our set of deltas
2274 2275 for data in deltas:
2275 2276 node, p1, p2, linknode, deltabase, delta, flags = data
2276 2277 link = linkmapper(linknode)
2277 2278 flags = flags or REVIDX_DEFAULT_FLAGS
2278 2279
2279 2280 nodes.append(node)
2280 2281
2281 2282 if node in self.nodemap:
2282 2283 # this can happen if two branches make the same change
2283 2284 continue
2284 2285
2285 2286 for p in (p1, p2):
2286 2287 if p not in self.nodemap:
2287 2288 raise LookupError(p, self.indexfile,
2288 2289 _('unknown parent'))
2289 2290
2290 2291 if deltabase not in self.nodemap:
2291 2292 raise LookupError(deltabase, self.indexfile,
2292 2293 _('unknown delta base'))
2293 2294
2294 2295 baserev = self.rev(deltabase)
2295 2296
2296 2297 if baserev != nullrev and self.iscensored(baserev):
2297 2298 # if base is censored, delta must be full replacement in a
2298 2299 # single patch operation
2299 2300 hlen = struct.calcsize(">lll")
2300 2301 oldlen = self.rawsize(baserev)
2301 2302 newlen = len(delta) - hlen
2302 2303 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2303 2304 raise error.CensoredBaseError(self.indexfile,
2304 2305 self.node(baserev))
2305 2306
2306 2307 if not flags and self._peek_iscensored(baserev, delta, flush):
2307 2308 flags |= REVIDX_ISCENSORED
2308 2309
2309 2310 # We assume consumers of addrevisioncb will want to retrieve
2310 2311 # the added revision, which will require a call to
2311 2312 # revision(). revision() will fast path if there is a cache
2312 2313 # hit. So, we tell _addrevision() to always cache in this case.
2313 2314 # We're only using addgroup() in the context of changegroup
2314 2315 # generation so the revision data can always be handled as raw
2315 2316 # by the flagprocessor.
2316 2317 self._addrevision(node, None, transaction, link,
2317 2318 p1, p2, flags, (baserev, delta),
2318 2319 ifh, dfh,
2319 2320 alwayscache=bool(addrevisioncb),
2320 2321 deltacomputer=deltacomputer)
2321 2322
2322 2323 if addrevisioncb:
2323 2324 addrevisioncb(self, node)
2324 2325
2325 2326 if not dfh and not self._inline:
2326 2327 # addrevision switched from inline to conventional
2327 2328 # reopen the index
2328 2329 ifh.close()
2329 2330 dfh = self._datafp("a+")
2330 2331 ifh = self._indexfp("a+")
2331 2332 finally:
2332 2333 if dfh:
2333 2334 dfh.close()
2334 2335 ifh.close()
2335 2336
2336 2337 return nodes
2337 2338
2338 2339 def iscensored(self, rev):
2339 2340 """Check if a file revision is censored."""
2340 2341 if not self._censorable:
2341 2342 return False
2342 2343
2343 2344 return self.flags(rev) & REVIDX_ISCENSORED
2344 2345
2345 2346 def _peek_iscensored(self, baserev, delta, flush):
2346 2347 """Quickly check if a delta produces a censored revision."""
2347 2348 if not self._censorable:
2348 2349 return False
2349 2350
2350 2351 # Fragile heuristic: unless new file meta keys are added alphabetically
2351 2352 # preceding "censored", all censored revisions are prefixed by
2352 2353 # "\1\ncensored:". A delta producing such a censored revision must be a
2353 2354 # full-replacement delta, so we inspect the first and only patch in the
2354 2355 # delta for this prefix.
2355 2356 hlen = struct.calcsize(">lll")
2356 2357 if len(delta) <= hlen:
2357 2358 return False
2358 2359
2359 2360 oldlen = self.rawsize(baserev)
2360 2361 newlen = len(delta) - hlen
2361 2362 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2362 2363 return False
2363 2364
2364 2365 add = "\1\ncensored:"
2365 2366 addlen = len(add)
2366 2367 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2367 2368
2368 2369 def getstrippoint(self, minlink):
2369 2370 """find the minimum rev that must be stripped to strip the linkrev
2370 2371
2371 2372 Returns a tuple containing the minimum rev and a set of all revs that
2372 2373 have linkrevs that will be broken by this strip.
2373 2374 """
2374 2375 brokenrevs = set()
2375 2376 strippoint = len(self)
2376 2377
2377 2378 heads = {}
2378 2379 futurelargelinkrevs = set()
2379 2380 for head in self.headrevs():
2380 2381 headlinkrev = self.linkrev(head)
2381 2382 heads[head] = headlinkrev
2382 2383 if headlinkrev >= minlink:
2383 2384 futurelargelinkrevs.add(headlinkrev)
2384 2385
2385 2386 # This algorithm involves walking down the rev graph, starting at the
2386 2387 # heads. Since the revs are topologically sorted according to linkrev,
2387 2388 # once all head linkrevs are below the minlink, we know there are
2388 2389 # no more revs that could have a linkrev greater than minlink.
2389 2390 # So we can stop walking.
2390 2391 while futurelargelinkrevs:
2391 2392 strippoint -= 1
2392 2393 linkrev = heads.pop(strippoint)
2393 2394
2394 2395 if linkrev < minlink:
2395 2396 brokenrevs.add(strippoint)
2396 2397 else:
2397 2398 futurelargelinkrevs.remove(linkrev)
2398 2399
2399 2400 for p in self.parentrevs(strippoint):
2400 2401 if p != nullrev:
2401 2402 plinkrev = self.linkrev(p)
2402 2403 heads[p] = plinkrev
2403 2404 if plinkrev >= minlink:
2404 2405 futurelargelinkrevs.add(plinkrev)
2405 2406
2406 2407 return strippoint, brokenrevs
2407 2408
2408 2409 def strip(self, minlink, transaction):
2409 2410 """truncate the revlog on the first revision with a linkrev >= minlink
2410 2411
2411 2412 This function is called when we're stripping revision minlink and
2412 2413 its descendants from the repository.
2413 2414
2414 2415 We have to remove all revisions with linkrev >= minlink, because
2415 2416 the equivalent changelog revisions will be renumbered after the
2416 2417 strip.
2417 2418
2418 2419 So we truncate the revlog on the first of these revisions, and
2419 2420 trust that the caller has saved the revisions that shouldn't be
2420 2421 removed and that it'll re-add them after this truncation.
2421 2422 """
2422 2423 if len(self) == 0:
2423 2424 return
2424 2425
2425 2426 rev, _ = self.getstrippoint(minlink)
2426 2427 if rev == len(self):
2427 2428 return
2428 2429
2429 2430 # first truncate the files on disk
2430 2431 end = self.start(rev)
2431 2432 if not self._inline:
2432 2433 transaction.add(self.datafile, end)
2433 2434 end = rev * self._io.size
2434 2435 else:
2435 2436 end += rev * self._io.size
2436 2437
2437 2438 transaction.add(self.indexfile, end)
2438 2439
2439 2440 # then reset internal state in memory to forget those revisions
2440 2441 self._cache = None
2441 2442 self._chaininfocache = {}
2442 2443 self._chunkclear()
2443 2444 for x in xrange(rev, len(self)):
2444 2445 del self.nodemap[self.node(x)]
2445 2446
2446 2447 del self.index[rev:-1]
2447 2448 self._nodepos = None
2448 2449
2449 2450 def checksize(self):
2450 2451 expected = 0
2451 2452 if len(self):
2452 2453 expected = max(0, self.end(len(self) - 1))
2453 2454
2454 2455 try:
2455 2456 with self._datafp() as f:
2456 2457 f.seek(0, 2)
2457 2458 actual = f.tell()
2458 2459 dd = actual - expected
2459 2460 except IOError as inst:
2460 2461 if inst.errno != errno.ENOENT:
2461 2462 raise
2462 2463 dd = 0
2463 2464
2464 2465 try:
2465 2466 f = self.opener(self.indexfile)
2466 2467 f.seek(0, 2)
2467 2468 actual = f.tell()
2468 2469 f.close()
2469 2470 s = self._io.size
2470 2471 i = max(0, actual // s)
2471 2472 di = actual - (i * s)
2472 2473 if self._inline:
2473 2474 databytes = 0
2474 2475 for r in self:
2475 2476 databytes += max(0, self.length(r))
2476 2477 dd = 0
2477 2478 di = actual - len(self) * s - databytes
2478 2479 except IOError as inst:
2479 2480 if inst.errno != errno.ENOENT:
2480 2481 raise
2481 2482 di = 0
2482 2483
2483 2484 return (dd, di)
2484 2485
2485 2486 def files(self):
2486 2487 res = [self.indexfile]
2487 2488 if not self._inline:
2488 2489 res.append(self.datafile)
2489 2490 return res
2490 2491
2491 2492 DELTAREUSEALWAYS = 'always'
2492 2493 DELTAREUSESAMEREVS = 'samerevs'
2493 2494 DELTAREUSENEVER = 'never'
2494 2495
2495 2496 DELTAREUSEFULLADD = 'fulladd'
2496 2497
2497 2498 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2498 2499
2499 2500 def clone(self, tr, destrevlog, addrevisioncb=None,
2500 2501 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2501 2502 """Copy this revlog to another, possibly with format changes.
2502 2503
2503 2504 The destination revlog will contain the same revisions and nodes.
2504 2505 However, it may not be bit-for-bit identical due to e.g. delta encoding
2505 2506 differences.
2506 2507
2507 2508 The ``deltareuse`` argument control how deltas from the existing revlog
2508 2509 are preserved in the destination revlog. The argument can have the
2509 2510 following values:
2510 2511
2511 2512 DELTAREUSEALWAYS
2512 2513 Deltas will always be reused (if possible), even if the destination
2513 2514 revlog would not select the same revisions for the delta. This is the
2514 2515 fastest mode of operation.
2515 2516 DELTAREUSESAMEREVS
2516 2517 Deltas will be reused if the destination revlog would pick the same
2517 2518 revisions for the delta. This mode strikes a balance between speed
2518 2519 and optimization.
2519 2520 DELTAREUSENEVER
2520 2521 Deltas will never be reused. This is the slowest mode of execution.
2521 2522 This mode can be used to recompute deltas (e.g. if the diff/delta
2522 2523 algorithm changes).
2523 2524
2524 2525 Delta computation can be slow, so the choice of delta reuse policy can
2525 2526 significantly affect run time.
2526 2527
2527 2528 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2528 2529 two extremes. Deltas will be reused if they are appropriate. But if the
2529 2530 delta could choose a better revision, it will do so. This means if you
2530 2531 are converting a non-generaldelta revlog to a generaldelta revlog,
2531 2532 deltas will be recomputed if the delta's parent isn't a parent of the
2532 2533 revision.
2533 2534
2534 2535 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2535 2536 controls whether to compute deltas against both parents for merges.
2536 2537 By default, the current default is used.
2537 2538 """
2538 2539 if deltareuse not in self.DELTAREUSEALL:
2539 2540 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2540 2541
2541 2542 if len(destrevlog):
2542 2543 raise ValueError(_('destination revlog is not empty'))
2543 2544
2544 2545 if getattr(self, 'filteredrevs', None):
2545 2546 raise ValueError(_('source revlog has filtered revisions'))
2546 2547 if getattr(destrevlog, 'filteredrevs', None):
2547 2548 raise ValueError(_('destination revlog has filtered revisions'))
2548 2549
2549 2550 # lazydeltabase controls whether to reuse a cached delta, if possible.
2550 2551 oldlazydeltabase = destrevlog._lazydeltabase
2551 2552 oldamd = destrevlog._aggressivemergedeltas
2552 2553
2553 2554 try:
2554 2555 if deltareuse == self.DELTAREUSEALWAYS:
2555 2556 destrevlog._lazydeltabase = True
2556 2557 elif deltareuse == self.DELTAREUSESAMEREVS:
2557 2558 destrevlog._lazydeltabase = False
2558 2559
2559 2560 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2560 2561
2561 2562 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2562 2563 self.DELTAREUSESAMEREVS)
2563 2564
2564 2565 deltacomputer = _deltacomputer(destrevlog)
2565 2566 index = self.index
2566 2567 for rev in self:
2567 2568 entry = index[rev]
2568 2569
2569 2570 # Some classes override linkrev to take filtered revs into
2570 2571 # account. Use raw entry from index.
2571 2572 flags = entry[0] & 0xffff
2572 2573 linkrev = entry[4]
2573 2574 p1 = index[entry[5]][7]
2574 2575 p2 = index[entry[6]][7]
2575 2576 node = entry[7]
2576 2577
2577 2578 # (Possibly) reuse the delta from the revlog if allowed and
2578 2579 # the revlog chunk is a delta.
2579 2580 cachedelta = None
2580 2581 rawtext = None
2581 2582 if populatecachedelta:
2582 2583 dp = self.deltaparent(rev)
2583 2584 if dp != nullrev:
2584 2585 cachedelta = (dp, bytes(self._chunk(rev)))
2585 2586
2586 2587 if not cachedelta:
2587 2588 rawtext = self.revision(rev, raw=True)
2588 2589
2589 2590
2590 2591 if deltareuse == self.DELTAREUSEFULLADD:
2591 2592 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2592 2593 cachedelta=cachedelta,
2593 2594 node=node, flags=flags,
2594 2595 deltacomputer=deltacomputer)
2595 2596 else:
2596 2597 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2597 2598 checkambig=False)
2598 2599 dfh = None
2599 2600 if not destrevlog._inline:
2600 2601 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2601 2602 try:
2602 2603 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2603 2604 p2, flags, cachedelta, ifh, dfh,
2604 2605 deltacomputer=deltacomputer)
2605 2606 finally:
2606 2607 if dfh:
2607 2608 dfh.close()
2608 2609 ifh.close()
2609 2610
2610 2611 if addrevisioncb:
2611 2612 addrevisioncb(self, rev, node)
2612 2613 finally:
2613 2614 destrevlog._lazydeltabase = oldlazydeltabase
2614 2615 destrevlog._aggressivemergedeltas = oldamd
General Comments 0
You need to be logged in to leave comments. Login now