##// END OF EJS Templates
revlog: make getcandidaterevs more consistent about updating tested revs set...
Paul Morelle -
r38186:faa01541 default
parent child Browse files
Show More
@@ -1,2613 +1,2614
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import hashlib
20 20 import heapq
21 21 import os
22 22 import re
23 23 import struct
24 24 import zlib
25 25
26 26 # import stuff from node for others to import from revlog
27 27 from .node import (
28 28 bin,
29 29 hex,
30 30 nullid,
31 31 nullrev,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .thirdparty import (
39 39 attr,
40 40 )
41 41 from . import (
42 42 ancestor,
43 43 error,
44 44 mdiff,
45 45 policy,
46 46 pycompat,
47 47 templatefilters,
48 48 util,
49 49 )
50 50 from .utils import (
51 51 stringutil,
52 52 )
53 53
54 54 parsers = policy.importmod(r'parsers')
55 55
56 56 # Aliased for performance.
57 57 _zlibdecompress = zlib.decompress
58 58
59 59 # revlog header flags
60 60 REVLOGV0 = 0
61 61 REVLOGV1 = 1
62 62 # Dummy value until file format is finalized.
63 63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
64 64 REVLOGV2 = 0xDEAD
65 65 FLAG_INLINE_DATA = (1 << 16)
66 66 FLAG_GENERALDELTA = (1 << 17)
67 67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
68 68 REVLOG_DEFAULT_FORMAT = REVLOGV1
69 69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
70 70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
71 71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
72 72
73 73 # revlog index flags
74 74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
75 75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
76 76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
77 77 REVIDX_DEFAULT_FLAGS = 0
78 78 # stable order in which flags need to be processed and their processors applied
79 79 REVIDX_FLAGS_ORDER = [
80 80 REVIDX_ISCENSORED,
81 81 REVIDX_ELLIPSIS,
82 82 REVIDX_EXTSTORED,
83 83 ]
84 84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
85 85 # bitmark for flags that could cause rawdata content change
86 86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
87 87
88 88 # max size of revlog with inline data
89 89 _maxinline = 131072
90 90 _chunksize = 1048576
91 91
92 92 RevlogError = error.RevlogError
93 93 LookupError = error.LookupError
94 94 CensoredNodeError = error.CensoredNodeError
95 95 ProgrammingError = error.ProgrammingError
96 96
97 97 # Store flag processors (cf. 'addflagprocessor()' to register)
98 98 _flagprocessors = {
99 99 REVIDX_ISCENSORED: None,
100 100 }
101 101
102 102 _mdre = re.compile('\1\n')
103 103 def parsemeta(text):
104 104 """return (metadatadict, metadatasize)"""
105 105 # text can be buffer, so we can't use .startswith or .index
106 106 if text[:2] != '\1\n':
107 107 return None, None
108 108 s = _mdre.search(text, 2).start()
109 109 mtext = text[2:s]
110 110 meta = {}
111 111 for l in mtext.splitlines():
112 112 k, v = l.split(": ", 1)
113 113 meta[k] = v
114 114 return meta, (s + 2)
115 115
116 116 def packmeta(meta, text):
117 117 keys = sorted(meta)
118 118 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
119 119 return "\1\n%s\1\n%s" % (metatext, text)
120 120
121 121 def _censoredtext(text):
122 122 m, offs = parsemeta(text)
123 123 return m and "censored" in m
124 124
125 125 def addflagprocessor(flag, processor):
126 126 """Register a flag processor on a revision data flag.
127 127
128 128 Invariant:
129 129 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
130 130 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
131 131 - Only one flag processor can be registered on a specific flag.
132 132 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
133 133 following signatures:
134 134 - (read) f(self, rawtext) -> text, bool
135 135 - (write) f(self, text) -> rawtext, bool
136 136 - (raw) f(self, rawtext) -> bool
137 137 "text" is presented to the user. "rawtext" is stored in revlog data, not
138 138 directly visible to the user.
139 139 The boolean returned by these transforms is used to determine whether
140 140 the returned text can be used for hash integrity checking. For example,
141 141 if "write" returns False, then "text" is used to generate hash. If
142 142 "write" returns True, that basically means "rawtext" returned by "write"
143 143 should be used to generate hash. Usually, "write" and "read" return
144 144 different booleans. And "raw" returns a same boolean as "write".
145 145
146 146 Note: The 'raw' transform is used for changegroup generation and in some
147 147 debug commands. In this case the transform only indicates whether the
148 148 contents can be used for hash integrity checks.
149 149 """
150 150 if not flag & REVIDX_KNOWN_FLAGS:
151 151 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
152 152 raise ProgrammingError(msg)
153 153 if flag not in REVIDX_FLAGS_ORDER:
154 154 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
155 155 raise ProgrammingError(msg)
156 156 if flag in _flagprocessors:
157 157 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
158 158 raise error.Abort(msg)
159 159 _flagprocessors[flag] = processor
160 160
161 161 def getoffset(q):
162 162 return int(q >> 16)
163 163
164 164 def gettype(q):
165 165 return int(q & 0xFFFF)
166 166
167 167 def offset_type(offset, type):
168 168 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
169 169 raise ValueError('unknown revlog index flags')
170 170 return int(int(offset) << 16 | type)
171 171
172 172 _nullhash = hashlib.sha1(nullid)
173 173
174 174 def hash(text, p1, p2):
175 175 """generate a hash from the given text and its parent hashes
176 176
177 177 This hash combines both the current file contents and its history
178 178 in a manner that makes it easy to distinguish nodes with the same
179 179 content in the revision graph.
180 180 """
181 181 # As of now, if one of the parent node is null, p2 is null
182 182 if p2 == nullid:
183 183 # deep copy of a hash is faster than creating one
184 184 s = _nullhash.copy()
185 185 s.update(p1)
186 186 else:
187 187 # none of the parent nodes are nullid
188 188 if p1 < p2:
189 189 a = p1
190 190 b = p2
191 191 else:
192 192 a = p2
193 193 b = p1
194 194 s = hashlib.sha1(a)
195 195 s.update(b)
196 196 s.update(text)
197 197 return s.digest()
198 198
199 199 def _trimchunk(revlog, revs, startidx, endidx=None):
200 200 """returns revs[startidx:endidx] without empty trailing revs
201 201 """
202 202 length = revlog.length
203 203
204 204 if endidx is None:
205 205 endidx = len(revs)
206 206
207 207 # Trim empty revs at the end, but never the very first revision of a chain
208 208 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
209 209 endidx -= 1
210 210
211 211 return revs[startidx:endidx]
212 212
213 213 def _slicechunk(revlog, revs):
214 214 """slice revs to reduce the amount of unrelated data to be read from disk.
215 215
216 216 ``revs`` is sliced into groups that should be read in one time.
217 217 Assume that revs are sorted.
218 218 """
219 219 start = revlog.start
220 220 length = revlog.length
221 221
222 222 if len(revs) <= 1:
223 223 yield revs
224 224 return
225 225
226 226 startbyte = start(revs[0])
227 227 endbyte = start(revs[-1]) + length(revs[-1])
228 228 readdata = deltachainspan = endbyte - startbyte
229 229
230 230 chainpayload = sum(length(r) for r in revs)
231 231
232 232 if deltachainspan:
233 233 density = chainpayload / float(deltachainspan)
234 234 else:
235 235 density = 1.0
236 236
237 237 # Store the gaps in a heap to have them sorted by decreasing size
238 238 gapsheap = []
239 239 heapq.heapify(gapsheap)
240 240 prevend = None
241 241 for i, rev in enumerate(revs):
242 242 revstart = start(rev)
243 243 revlen = length(rev)
244 244
245 245 # Skip empty revisions to form larger holes
246 246 if revlen == 0:
247 247 continue
248 248
249 249 if prevend is not None:
250 250 gapsize = revstart - prevend
251 251 # only consider holes that are large enough
252 252 if gapsize > revlog._srmingapsize:
253 253 heapq.heappush(gapsheap, (-gapsize, i))
254 254
255 255 prevend = revstart + revlen
256 256
257 257 # Collect the indices of the largest holes until the density is acceptable
258 258 indicesheap = []
259 259 heapq.heapify(indicesheap)
260 260 while gapsheap and density < revlog._srdensitythreshold:
261 261 oppgapsize, gapidx = heapq.heappop(gapsheap)
262 262
263 263 heapq.heappush(indicesheap, gapidx)
264 264
265 265 # the gap sizes are stored as negatives to be sorted decreasingly
266 266 # by the heap
267 267 readdata -= (-oppgapsize)
268 268 if readdata > 0:
269 269 density = chainpayload / float(readdata)
270 270 else:
271 271 density = 1.0
272 272
273 273 # Cut the revs at collected indices
274 274 previdx = 0
275 275 while indicesheap:
276 276 idx = heapq.heappop(indicesheap)
277 277
278 278 chunk = _trimchunk(revlog, revs, previdx, idx)
279 279 if chunk:
280 280 yield chunk
281 281
282 282 previdx = idx
283 283
284 284 chunk = _trimchunk(revlog, revs, previdx)
285 285 if chunk:
286 286 yield chunk
287 287
288 288 @attr.s(slots=True, frozen=True)
289 289 class _deltainfo(object):
290 290 distance = attr.ib()
291 291 deltalen = attr.ib()
292 292 data = attr.ib()
293 293 base = attr.ib()
294 294 chainbase = attr.ib()
295 295 chainlen = attr.ib()
296 296 compresseddeltalen = attr.ib()
297 297
298 298 class _deltacomputer(object):
299 299 def __init__(self, revlog):
300 300 self.revlog = revlog
301 301
302 302 def _getcandidaterevs(self, p1, p2, cachedelta):
303 303 """
304 304 Provides revisions that present an interest to be diffed against,
305 305 grouped by level of easiness.
306 306 """
307 307 revlog = self.revlog
308 308 gdelta = revlog._generaldelta
309 309 curr = len(revlog)
310 310 prev = curr - 1
311 311 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
312 312
313 313 # should we try to build a delta?
314 314 if prev != nullrev and revlog.storedeltachains:
315 315 tested = set()
316 316 # This condition is true most of the time when processing
317 317 # changegroup data into a generaldelta repo. The only time it
318 318 # isn't true is if this is the first revision in a delta chain
319 319 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
320 320 if cachedelta and gdelta and revlog._lazydeltabase:
321 321 # Assume what we received from the server is a good choice
322 322 # build delta will reuse the cache
323 323 yield (cachedelta[0],)
324 324 tested.add(cachedelta[0])
325 325
326 326 if gdelta:
327 327 # exclude already lazy tested base if any
328 328 parents = [p for p in (p1r, p2r)
329 329 if p != nullrev and p not in tested]
330 330
331 331 if not revlog._aggressivemergedeltas and len(parents) == 2:
332 332 parents.sort()
333 333 # To minimize the chance of having to build a fulltext,
334 334 # pick first whichever parent is closest to us (max rev)
335 335 yield (parents[1],)
336 336 # then the other one (min rev) if the first did not fit
337 337 yield (parents[0],)
338 338 tested.update(parents)
339 339 elif len(parents) > 0:
340 340 # Test all parents (1 or 2), and keep the best candidate
341 341 yield parents
342 342 tested.update(parents)
343 343
344 344 if prev not in tested:
345 345 # other approach failed try against prev to hopefully save us a
346 346 # fulltext.
347 347 yield (prev,)
348 tested.add(prev)
348 349
349 350 def buildtext(self, revinfo, fh):
350 351 """Builds a fulltext version of a revision
351 352
352 353 revinfo: _revisioninfo instance that contains all needed info
353 354 fh: file handle to either the .i or the .d revlog file,
354 355 depending on whether it is inlined or not
355 356 """
356 357 btext = revinfo.btext
357 358 if btext[0] is not None:
358 359 return btext[0]
359 360
360 361 revlog = self.revlog
361 362 cachedelta = revinfo.cachedelta
362 363 flags = revinfo.flags
363 364 node = revinfo.node
364 365
365 366 baserev = cachedelta[0]
366 367 delta = cachedelta[1]
367 368 # special case deltas which replace entire base; no need to decode
368 369 # base revision. this neatly avoids censored bases, which throw when
369 370 # they're decoded.
370 371 hlen = struct.calcsize(">lll")
371 372 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
372 373 len(delta) - hlen):
373 374 btext[0] = delta[hlen:]
374 375 else:
375 376 # deltabase is rawtext before changed by flag processors, which is
376 377 # equivalent to non-raw text
377 378 basetext = revlog.revision(baserev, _df=fh, raw=False)
378 379 btext[0] = mdiff.patch(basetext, delta)
379 380
380 381 try:
381 382 res = revlog._processflags(btext[0], flags, 'read', raw=True)
382 383 btext[0], validatehash = res
383 384 if validatehash:
384 385 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
385 386 if flags & REVIDX_ISCENSORED:
386 387 raise RevlogError(_('node %s is not censored') % node)
387 388 except CensoredNodeError:
388 389 # must pass the censored index flag to add censored revisions
389 390 if not flags & REVIDX_ISCENSORED:
390 391 raise
391 392 return btext[0]
392 393
393 394 def _builddeltadiff(self, base, revinfo, fh):
394 395 revlog = self.revlog
395 396 t = self.buildtext(revinfo, fh)
396 397 if revlog.iscensored(base):
397 398 # deltas based on a censored revision must replace the
398 399 # full content in one patch, so delta works everywhere
399 400 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
400 401 delta = header + t
401 402 else:
402 403 ptext = revlog.revision(base, _df=fh, raw=True)
403 404 delta = mdiff.textdiff(ptext, t)
404 405
405 406 return delta
406 407
407 408 def _builddeltainfo(self, revinfo, base, fh):
408 409 # can we use the cached delta?
409 410 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
410 411 delta = revinfo.cachedelta[1]
411 412 else:
412 413 delta = self._builddeltadiff(base, revinfo, fh)
413 414 revlog = self.revlog
414 415 header, data = revlog.compress(delta)
415 416 deltalen = len(header) + len(data)
416 417 chainbase = revlog.chainbase(base)
417 418 offset = revlog.end(len(revlog) - 1)
418 419 dist = deltalen + offset - revlog.start(chainbase)
419 420 if revlog._generaldelta:
420 421 deltabase = base
421 422 else:
422 423 deltabase = chainbase
423 424 chainlen, compresseddeltalen = revlog._chaininfo(base)
424 425 chainlen += 1
425 426 compresseddeltalen += deltalen
426 427 return _deltainfo(dist, deltalen, (header, data), deltabase,
427 428 chainbase, chainlen, compresseddeltalen)
428 429
429 430 def finddeltainfo(self, revinfo, fh):
430 431 """Find an acceptable delta against a candidate revision
431 432
432 433 revinfo: information about the revision (instance of _revisioninfo)
433 434 fh: file handle to either the .i or the .d revlog file,
434 435 depending on whether it is inlined or not
435 436
436 437 Returns the first acceptable candidate revision, as ordered by
437 438 _getcandidaterevs
438 439 """
439 440 cachedelta = revinfo.cachedelta
440 441 p1 = revinfo.p1
441 442 p2 = revinfo.p2
442 443 revlog = self.revlog
443 444
444 445 deltainfo = None
445 446 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
446 447 nominateddeltas = []
447 448 for candidaterev in candidaterevs:
448 449 # no delta for rawtext-changing revs (see "candelta" for why)
449 450 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
450 451 continue
451 452 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
452 453 if revlog._isgooddeltainfo(candidatedelta, revinfo):
453 454 nominateddeltas.append(candidatedelta)
454 455 if nominateddeltas:
455 456 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
456 457 break
457 458
458 459 return deltainfo
459 460
460 461 @attr.s(slots=True, frozen=True)
461 462 class _revisioninfo(object):
462 463 """Information about a revision that allows building its fulltext
463 464 node: expected hash of the revision
464 465 p1, p2: parent revs of the revision
465 466 btext: built text cache consisting of a one-element list
466 467 cachedelta: (baserev, uncompressed_delta) or None
467 468 flags: flags associated to the revision storage
468 469
469 470 One of btext[0] or cachedelta must be set.
470 471 """
471 472 node = attr.ib()
472 473 p1 = attr.ib()
473 474 p2 = attr.ib()
474 475 btext = attr.ib()
475 476 textlen = attr.ib()
476 477 cachedelta = attr.ib()
477 478 flags = attr.ib()
478 479
479 480 # index v0:
480 481 # 4 bytes: offset
481 482 # 4 bytes: compressed length
482 483 # 4 bytes: base rev
483 484 # 4 bytes: link rev
484 485 # 20 bytes: parent 1 nodeid
485 486 # 20 bytes: parent 2 nodeid
486 487 # 20 bytes: nodeid
487 488 indexformatv0 = struct.Struct(">4l20s20s20s")
488 489 indexformatv0_pack = indexformatv0.pack
489 490 indexformatv0_unpack = indexformatv0.unpack
490 491
491 492 class revlogoldio(object):
492 493 def __init__(self):
493 494 self.size = indexformatv0.size
494 495
495 496 def parseindex(self, data, inline):
496 497 s = self.size
497 498 index = []
498 499 nodemap = {nullid: nullrev}
499 500 n = off = 0
500 501 l = len(data)
501 502 while off + s <= l:
502 503 cur = data[off:off + s]
503 504 off += s
504 505 e = indexformatv0_unpack(cur)
505 506 # transform to revlogv1 format
506 507 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
507 508 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
508 509 index.append(e2)
509 510 nodemap[e[6]] = n
510 511 n += 1
511 512
512 513 # add the magic null revision at -1
513 514 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
514 515
515 516 return index, nodemap, None
516 517
517 518 def packentry(self, entry, node, version, rev):
518 519 if gettype(entry[0]):
519 520 raise RevlogError(_('index entry flags need revlog version 1'))
520 521 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
521 522 node(entry[5]), node(entry[6]), entry[7])
522 523 return indexformatv0_pack(*e2)
523 524
524 525 # index ng:
525 526 # 6 bytes: offset
526 527 # 2 bytes: flags
527 528 # 4 bytes: compressed length
528 529 # 4 bytes: uncompressed length
529 530 # 4 bytes: base rev
530 531 # 4 bytes: link rev
531 532 # 4 bytes: parent 1 rev
532 533 # 4 bytes: parent 2 rev
533 534 # 32 bytes: nodeid
534 535 indexformatng = struct.Struct(">Qiiiiii20s12x")
535 536 indexformatng_pack = indexformatng.pack
536 537 versionformat = struct.Struct(">I")
537 538 versionformat_pack = versionformat.pack
538 539 versionformat_unpack = versionformat.unpack
539 540
540 541 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
541 542 # signed integer)
542 543 _maxentrysize = 0x7fffffff
543 544
544 545 class revlogio(object):
545 546 def __init__(self):
546 547 self.size = indexformatng.size
547 548
548 549 def parseindex(self, data, inline):
549 550 # call the C implementation to parse the index data
550 551 index, cache = parsers.parse_index2(data, inline)
551 552 return index, getattr(index, 'nodemap', None), cache
552 553
553 554 def packentry(self, entry, node, version, rev):
554 555 p = indexformatng_pack(*entry)
555 556 if rev == 0:
556 557 p = versionformat_pack(version) + p[4:]
557 558 return p
558 559
559 560 class revlog(object):
560 561 """
561 562 the underlying revision storage object
562 563
563 564 A revlog consists of two parts, an index and the revision data.
564 565
565 566 The index is a file with a fixed record size containing
566 567 information on each revision, including its nodeid (hash), the
567 568 nodeids of its parents, the position and offset of its data within
568 569 the data file, and the revision it's based on. Finally, each entry
569 570 contains a linkrev entry that can serve as a pointer to external
570 571 data.
571 572
572 573 The revision data itself is a linear collection of data chunks.
573 574 Each chunk represents a revision and is usually represented as a
574 575 delta against the previous chunk. To bound lookup time, runs of
575 576 deltas are limited to about 2 times the length of the original
576 577 version data. This makes retrieval of a version proportional to
577 578 its size, or O(1) relative to the number of revisions.
578 579
579 580 Both pieces of the revlog are written to in an append-only
580 581 fashion, which means we never need to rewrite a file to insert or
581 582 remove data, and can use some simple techniques to avoid the need
582 583 for locking while reading.
583 584
584 585 If checkambig, indexfile is opened with checkambig=True at
585 586 writing, to avoid file stat ambiguity.
586 587
587 588 If mmaplargeindex is True, and an mmapindexthreshold is set, the
588 589 index will be mmapped rather than read if it is larger than the
589 590 configured threshold.
590 591
591 592 If censorable is True, the revlog can have censored revisions.
592 593 """
593 594 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
594 595 mmaplargeindex=False, censorable=False):
595 596 """
596 597 create a revlog object
597 598
598 599 opener is a function that abstracts the file opening operation
599 600 and can be used to implement COW semantics or the like.
600 601 """
601 602 self.indexfile = indexfile
602 603 self.datafile = datafile or (indexfile[:-2] + ".d")
603 604 self.opener = opener
604 605 # When True, indexfile is opened with checkambig=True at writing, to
605 606 # avoid file stat ambiguity.
606 607 self._checkambig = checkambig
607 608 self._censorable = censorable
608 609 # 3-tuple of (node, rev, text) for a raw revision.
609 610 self._cache = None
610 611 # Maps rev to chain base rev.
611 612 self._chainbasecache = util.lrucachedict(100)
612 613 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
613 614 self._chunkcache = (0, '')
614 615 # How much data to read and cache into the raw revlog data cache.
615 616 self._chunkcachesize = 65536
616 617 self._maxchainlen = None
617 618 self._aggressivemergedeltas = False
618 619 self.index = []
619 620 # Mapping of partial identifiers to full nodes.
620 621 self._pcache = {}
621 622 # Mapping of revision integer to full node.
622 623 self._nodecache = {nullid: nullrev}
623 624 self._nodepos = None
624 625 self._compengine = 'zlib'
625 626 self._maxdeltachainspan = -1
626 627 self._withsparseread = False
627 628 self._srdensitythreshold = 0.25
628 629 self._srmingapsize = 262144
629 630
630 631 mmapindexthreshold = None
631 632 v = REVLOG_DEFAULT_VERSION
632 633 opts = getattr(opener, 'options', None)
633 634 if opts is not None:
634 635 if 'revlogv2' in opts:
635 636 # version 2 revlogs always use generaldelta.
636 637 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
637 638 elif 'revlogv1' in opts:
638 639 if 'generaldelta' in opts:
639 640 v |= FLAG_GENERALDELTA
640 641 else:
641 642 v = 0
642 643 if 'chunkcachesize' in opts:
643 644 self._chunkcachesize = opts['chunkcachesize']
644 645 if 'maxchainlen' in opts:
645 646 self._maxchainlen = opts['maxchainlen']
646 647 if 'aggressivemergedeltas' in opts:
647 648 self._aggressivemergedeltas = opts['aggressivemergedeltas']
648 649 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
649 650 if 'compengine' in opts:
650 651 self._compengine = opts['compengine']
651 652 if 'maxdeltachainspan' in opts:
652 653 self._maxdeltachainspan = opts['maxdeltachainspan']
653 654 if mmaplargeindex and 'mmapindexthreshold' in opts:
654 655 mmapindexthreshold = opts['mmapindexthreshold']
655 656 self._withsparseread = bool(opts.get('with-sparse-read', False))
656 657 if 'sparse-read-density-threshold' in opts:
657 658 self._srdensitythreshold = opts['sparse-read-density-threshold']
658 659 if 'sparse-read-min-gap-size' in opts:
659 660 self._srmingapsize = opts['sparse-read-min-gap-size']
660 661
661 662 if self._chunkcachesize <= 0:
662 663 raise RevlogError(_('revlog chunk cache size %r is not greater '
663 664 'than 0') % self._chunkcachesize)
664 665 elif self._chunkcachesize & (self._chunkcachesize - 1):
665 666 raise RevlogError(_('revlog chunk cache size %r is not a power '
666 667 'of 2') % self._chunkcachesize)
667 668
668 669 indexdata = ''
669 670 self._initempty = True
670 671 try:
671 672 with self._indexfp() as f:
672 673 if (mmapindexthreshold is not None and
673 674 self.opener.fstat(f).st_size >= mmapindexthreshold):
674 675 indexdata = util.buffer(util.mmapread(f))
675 676 else:
676 677 indexdata = f.read()
677 678 if len(indexdata) > 0:
678 679 v = versionformat_unpack(indexdata[:4])[0]
679 680 self._initempty = False
680 681 except IOError as inst:
681 682 if inst.errno != errno.ENOENT:
682 683 raise
683 684
684 685 self.version = v
685 686 self._inline = v & FLAG_INLINE_DATA
686 687 self._generaldelta = v & FLAG_GENERALDELTA
687 688 flags = v & ~0xFFFF
688 689 fmt = v & 0xFFFF
689 690 if fmt == REVLOGV0:
690 691 if flags:
691 692 raise RevlogError(_('unknown flags (%#04x) in version %d '
692 693 'revlog %s') %
693 694 (flags >> 16, fmt, self.indexfile))
694 695 elif fmt == REVLOGV1:
695 696 if flags & ~REVLOGV1_FLAGS:
696 697 raise RevlogError(_('unknown flags (%#04x) in version %d '
697 698 'revlog %s') %
698 699 (flags >> 16, fmt, self.indexfile))
699 700 elif fmt == REVLOGV2:
700 701 if flags & ~REVLOGV2_FLAGS:
701 702 raise RevlogError(_('unknown flags (%#04x) in version %d '
702 703 'revlog %s') %
703 704 (flags >> 16, fmt, self.indexfile))
704 705 else:
705 706 raise RevlogError(_('unknown version (%d) in revlog %s') %
706 707 (fmt, self.indexfile))
707 708
708 709 self.storedeltachains = True
709 710
710 711 self._io = revlogio()
711 712 if self.version == REVLOGV0:
712 713 self._io = revlogoldio()
713 714 try:
714 715 d = self._io.parseindex(indexdata, self._inline)
715 716 except (ValueError, IndexError):
716 717 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
717 718 self.index, nodemap, self._chunkcache = d
718 719 if nodemap is not None:
719 720 self.nodemap = self._nodecache = nodemap
720 721 if not self._chunkcache:
721 722 self._chunkclear()
722 723 # revnum -> (chain-length, sum-delta-length)
723 724 self._chaininfocache = {}
724 725 # revlog header -> revlog compressor
725 726 self._decompressors = {}
726 727
727 728 @util.propertycache
728 729 def _compressor(self):
729 730 return util.compengines[self._compengine].revlogcompressor()
730 731
731 732 def _indexfp(self, mode='r'):
732 733 """file object for the revlog's index file"""
733 734 args = {r'mode': mode}
734 735 if mode != 'r':
735 736 args[r'checkambig'] = self._checkambig
736 737 if mode == 'w':
737 738 args[r'atomictemp'] = True
738 739 return self.opener(self.indexfile, **args)
739 740
740 741 def _datafp(self, mode='r'):
741 742 """file object for the revlog's data file"""
742 743 return self.opener(self.datafile, mode=mode)
743 744
744 745 @contextlib.contextmanager
745 746 def _datareadfp(self, existingfp=None):
746 747 """file object suitable to read data"""
747 748 if existingfp is not None:
748 749 yield existingfp
749 750 else:
750 751 if self._inline:
751 752 func = self._indexfp
752 753 else:
753 754 func = self._datafp
754 755 with func() as fp:
755 756 yield fp
756 757
757 758 def tip(self):
758 759 return self.node(len(self.index) - 2)
759 760 def __contains__(self, rev):
760 761 return 0 <= rev < len(self)
761 762 def __len__(self):
762 763 return len(self.index) - 1
763 764 def __iter__(self):
764 765 return iter(xrange(len(self)))
765 766 def revs(self, start=0, stop=None):
766 767 """iterate over all rev in this revlog (from start to stop)"""
767 768 step = 1
768 769 if stop is not None:
769 770 if start > stop:
770 771 step = -1
771 772 stop += step
772 773 else:
773 774 stop = len(self)
774 775 return xrange(start, stop, step)
775 776
776 777 @util.propertycache
777 778 def nodemap(self):
778 779 self.rev(self.node(0))
779 780 return self._nodecache
780 781
781 782 def hasnode(self, node):
782 783 try:
783 784 self.rev(node)
784 785 return True
785 786 except KeyError:
786 787 return False
787 788
788 789 def candelta(self, baserev, rev):
789 790 """whether two revisions (baserev, rev) can be delta-ed or not"""
790 791 # Disable delta if either rev requires a content-changing flag
791 792 # processor (ex. LFS). This is because such flag processor can alter
792 793 # the rawtext content that the delta will be based on, and two clients
793 794 # could have a same revlog node with different flags (i.e. different
794 795 # rawtext contents) and the delta could be incompatible.
795 796 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
796 797 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
797 798 return False
798 799 return True
799 800
800 801 def clearcaches(self):
801 802 self._cache = None
802 803 self._chainbasecache.clear()
803 804 self._chunkcache = (0, '')
804 805 self._pcache = {}
805 806
806 807 try:
807 808 self._nodecache.clearcaches()
808 809 except AttributeError:
809 810 self._nodecache = {nullid: nullrev}
810 811 self._nodepos = None
811 812
812 813 def rev(self, node):
813 814 try:
814 815 return self._nodecache[node]
815 816 except TypeError:
816 817 raise
817 818 except RevlogError:
818 819 # parsers.c radix tree lookup failed
819 820 if node == wdirid or node in wdirfilenodeids:
820 821 raise error.WdirUnsupported
821 822 raise LookupError(node, self.indexfile, _('no node'))
822 823 except KeyError:
823 824 # pure python cache lookup failed
824 825 n = self._nodecache
825 826 i = self.index
826 827 p = self._nodepos
827 828 if p is None:
828 829 p = len(i) - 2
829 830 else:
830 831 assert p < len(i)
831 832 for r in xrange(p, -1, -1):
832 833 v = i[r][7]
833 834 n[v] = r
834 835 if v == node:
835 836 self._nodepos = r - 1
836 837 return r
837 838 if node == wdirid or node in wdirfilenodeids:
838 839 raise error.WdirUnsupported
839 840 raise LookupError(node, self.indexfile, _('no node'))
840 841
841 842 # Accessors for index entries.
842 843
843 844 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
844 845 # are flags.
845 846 def start(self, rev):
846 847 return int(self.index[rev][0] >> 16)
847 848
848 849 def flags(self, rev):
849 850 return self.index[rev][0] & 0xFFFF
850 851
851 852 def length(self, rev):
852 853 return self.index[rev][1]
853 854
854 855 def rawsize(self, rev):
855 856 """return the length of the uncompressed text for a given revision"""
856 857 l = self.index[rev][2]
857 858 if l is not None and l >= 0:
858 859 return l
859 860
860 861 t = self.revision(rev, raw=True)
861 862 return len(t)
862 863
863 864 def size(self, rev):
864 865 """length of non-raw text (processed by a "read" flag processor)"""
865 866 # fast path: if no "read" flag processor could change the content,
866 867 # size is rawsize. note: ELLIPSIS is known to not change the content.
867 868 flags = self.flags(rev)
868 869 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
869 870 return self.rawsize(rev)
870 871
871 872 return len(self.revision(rev, raw=False))
872 873
873 874 def chainbase(self, rev):
874 875 base = self._chainbasecache.get(rev)
875 876 if base is not None:
876 877 return base
877 878
878 879 index = self.index
879 880 base = index[rev][3]
880 881 while base != rev:
881 882 rev = base
882 883 base = index[rev][3]
883 884
884 885 self._chainbasecache[rev] = base
885 886 return base
886 887
887 888 def linkrev(self, rev):
888 889 return self.index[rev][4]
889 890
890 891 def parentrevs(self, rev):
891 892 try:
892 893 entry = self.index[rev]
893 894 except IndexError:
894 895 if rev == wdirrev:
895 896 raise error.WdirUnsupported
896 897 raise
897 898
898 899 return entry[5], entry[6]
899 900
900 901 def node(self, rev):
901 902 try:
902 903 return self.index[rev][7]
903 904 except IndexError:
904 905 if rev == wdirrev:
905 906 raise error.WdirUnsupported
906 907 raise
907 908
908 909 # Derived from index values.
909 910
910 911 def end(self, rev):
911 912 return self.start(rev) + self.length(rev)
912 913
913 914 def parents(self, node):
914 915 i = self.index
915 916 d = i[self.rev(node)]
916 917 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
917 918
918 919 def chainlen(self, rev):
919 920 return self._chaininfo(rev)[0]
920 921
921 922 def _chaininfo(self, rev):
922 923 chaininfocache = self._chaininfocache
923 924 if rev in chaininfocache:
924 925 return chaininfocache[rev]
925 926 index = self.index
926 927 generaldelta = self._generaldelta
927 928 iterrev = rev
928 929 e = index[iterrev]
929 930 clen = 0
930 931 compresseddeltalen = 0
931 932 while iterrev != e[3]:
932 933 clen += 1
933 934 compresseddeltalen += e[1]
934 935 if generaldelta:
935 936 iterrev = e[3]
936 937 else:
937 938 iterrev -= 1
938 939 if iterrev in chaininfocache:
939 940 t = chaininfocache[iterrev]
940 941 clen += t[0]
941 942 compresseddeltalen += t[1]
942 943 break
943 944 e = index[iterrev]
944 945 else:
945 946 # Add text length of base since decompressing that also takes
946 947 # work. For cache hits the length is already included.
947 948 compresseddeltalen += e[1]
948 949 r = (clen, compresseddeltalen)
949 950 chaininfocache[rev] = r
950 951 return r
951 952
952 953 def _deltachain(self, rev, stoprev=None):
953 954 """Obtain the delta chain for a revision.
954 955
955 956 ``stoprev`` specifies a revision to stop at. If not specified, we
956 957 stop at the base of the chain.
957 958
958 959 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
959 960 revs in ascending order and ``stopped`` is a bool indicating whether
960 961 ``stoprev`` was hit.
961 962 """
962 963 # Try C implementation.
963 964 try:
964 965 return self.index.deltachain(rev, stoprev, self._generaldelta)
965 966 except AttributeError:
966 967 pass
967 968
968 969 chain = []
969 970
970 971 # Alias to prevent attribute lookup in tight loop.
971 972 index = self.index
972 973 generaldelta = self._generaldelta
973 974
974 975 iterrev = rev
975 976 e = index[iterrev]
976 977 while iterrev != e[3] and iterrev != stoprev:
977 978 chain.append(iterrev)
978 979 if generaldelta:
979 980 iterrev = e[3]
980 981 else:
981 982 iterrev -= 1
982 983 e = index[iterrev]
983 984
984 985 if iterrev == stoprev:
985 986 stopped = True
986 987 else:
987 988 chain.append(iterrev)
988 989 stopped = False
989 990
990 991 chain.reverse()
991 992 return chain, stopped
992 993
993 994 def ancestors(self, revs, stoprev=0, inclusive=False):
994 995 """Generate the ancestors of 'revs' in reverse topological order.
995 996 Does not generate revs lower than stoprev.
996 997
997 998 See the documentation for ancestor.lazyancestors for more details."""
998 999
999 1000 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1000 1001 inclusive=inclusive)
1001 1002
1002 1003 def descendants(self, revs):
1003 1004 """Generate the descendants of 'revs' in revision order.
1004 1005
1005 1006 Yield a sequence of revision numbers starting with a child of
1006 1007 some rev in revs, i.e., each revision is *not* considered a
1007 1008 descendant of itself. Results are ordered by revision number (a
1008 1009 topological sort)."""
1009 1010 first = min(revs)
1010 1011 if first == nullrev:
1011 1012 for i in self:
1012 1013 yield i
1013 1014 return
1014 1015
1015 1016 seen = set(revs)
1016 1017 for i in self.revs(start=first + 1):
1017 1018 for x in self.parentrevs(i):
1018 1019 if x != nullrev and x in seen:
1019 1020 seen.add(i)
1020 1021 yield i
1021 1022 break
1022 1023
1023 1024 def findcommonmissing(self, common=None, heads=None):
1024 1025 """Return a tuple of the ancestors of common and the ancestors of heads
1025 1026 that are not ancestors of common. In revset terminology, we return the
1026 1027 tuple:
1027 1028
1028 1029 ::common, (::heads) - (::common)
1029 1030
1030 1031 The list is sorted by revision number, meaning it is
1031 1032 topologically sorted.
1032 1033
1033 1034 'heads' and 'common' are both lists of node IDs. If heads is
1034 1035 not supplied, uses all of the revlog's heads. If common is not
1035 1036 supplied, uses nullid."""
1036 1037 if common is None:
1037 1038 common = [nullid]
1038 1039 if heads is None:
1039 1040 heads = self.heads()
1040 1041
1041 1042 common = [self.rev(n) for n in common]
1042 1043 heads = [self.rev(n) for n in heads]
1043 1044
1044 1045 # we want the ancestors, but inclusive
1045 1046 class lazyset(object):
1046 1047 def __init__(self, lazyvalues):
1047 1048 self.addedvalues = set()
1048 1049 self.lazyvalues = lazyvalues
1049 1050
1050 1051 def __contains__(self, value):
1051 1052 return value in self.addedvalues or value in self.lazyvalues
1052 1053
1053 1054 def __iter__(self):
1054 1055 added = self.addedvalues
1055 1056 for r in added:
1056 1057 yield r
1057 1058 for r in self.lazyvalues:
1058 1059 if not r in added:
1059 1060 yield r
1060 1061
1061 1062 def add(self, value):
1062 1063 self.addedvalues.add(value)
1063 1064
1064 1065 def update(self, values):
1065 1066 self.addedvalues.update(values)
1066 1067
1067 1068 has = lazyset(self.ancestors(common))
1068 1069 has.add(nullrev)
1069 1070 has.update(common)
1070 1071
1071 1072 # take all ancestors from heads that aren't in has
1072 1073 missing = set()
1073 1074 visit = collections.deque(r for r in heads if r not in has)
1074 1075 while visit:
1075 1076 r = visit.popleft()
1076 1077 if r in missing:
1077 1078 continue
1078 1079 else:
1079 1080 missing.add(r)
1080 1081 for p in self.parentrevs(r):
1081 1082 if p not in has:
1082 1083 visit.append(p)
1083 1084 missing = list(missing)
1084 1085 missing.sort()
1085 1086 return has, [self.node(miss) for miss in missing]
1086 1087
1087 1088 def incrementalmissingrevs(self, common=None):
1088 1089 """Return an object that can be used to incrementally compute the
1089 1090 revision numbers of the ancestors of arbitrary sets that are not
1090 1091 ancestors of common. This is an ancestor.incrementalmissingancestors
1091 1092 object.
1092 1093
1093 1094 'common' is a list of revision numbers. If common is not supplied, uses
1094 1095 nullrev.
1095 1096 """
1096 1097 if common is None:
1097 1098 common = [nullrev]
1098 1099
1099 1100 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1100 1101
1101 1102 def findmissingrevs(self, common=None, heads=None):
1102 1103 """Return the revision numbers of the ancestors of heads that
1103 1104 are not ancestors of common.
1104 1105
1105 1106 More specifically, return a list of revision numbers corresponding to
1106 1107 nodes N such that every N satisfies the following constraints:
1107 1108
1108 1109 1. N is an ancestor of some node in 'heads'
1109 1110 2. N is not an ancestor of any node in 'common'
1110 1111
1111 1112 The list is sorted by revision number, meaning it is
1112 1113 topologically sorted.
1113 1114
1114 1115 'heads' and 'common' are both lists of revision numbers. If heads is
1115 1116 not supplied, uses all of the revlog's heads. If common is not
1116 1117 supplied, uses nullid."""
1117 1118 if common is None:
1118 1119 common = [nullrev]
1119 1120 if heads is None:
1120 1121 heads = self.headrevs()
1121 1122
1122 1123 inc = self.incrementalmissingrevs(common=common)
1123 1124 return inc.missingancestors(heads)
1124 1125
1125 1126 def findmissing(self, common=None, heads=None):
1126 1127 """Return the ancestors of heads that are not ancestors of common.
1127 1128
1128 1129 More specifically, return a list of nodes N such that every N
1129 1130 satisfies the following constraints:
1130 1131
1131 1132 1. N is an ancestor of some node in 'heads'
1132 1133 2. N is not an ancestor of any node in 'common'
1133 1134
1134 1135 The list is sorted by revision number, meaning it is
1135 1136 topologically sorted.
1136 1137
1137 1138 'heads' and 'common' are both lists of node IDs. If heads is
1138 1139 not supplied, uses all of the revlog's heads. If common is not
1139 1140 supplied, uses nullid."""
1140 1141 if common is None:
1141 1142 common = [nullid]
1142 1143 if heads is None:
1143 1144 heads = self.heads()
1144 1145
1145 1146 common = [self.rev(n) for n in common]
1146 1147 heads = [self.rev(n) for n in heads]
1147 1148
1148 1149 inc = self.incrementalmissingrevs(common=common)
1149 1150 return [self.node(r) for r in inc.missingancestors(heads)]
1150 1151
1151 1152 def nodesbetween(self, roots=None, heads=None):
1152 1153 """Return a topological path from 'roots' to 'heads'.
1153 1154
1154 1155 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1155 1156 topologically sorted list of all nodes N that satisfy both of
1156 1157 these constraints:
1157 1158
1158 1159 1. N is a descendant of some node in 'roots'
1159 1160 2. N is an ancestor of some node in 'heads'
1160 1161
1161 1162 Every node is considered to be both a descendant and an ancestor
1162 1163 of itself, so every reachable node in 'roots' and 'heads' will be
1163 1164 included in 'nodes'.
1164 1165
1165 1166 'outroots' is the list of reachable nodes in 'roots', i.e., the
1166 1167 subset of 'roots' that is returned in 'nodes'. Likewise,
1167 1168 'outheads' is the subset of 'heads' that is also in 'nodes'.
1168 1169
1169 1170 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1170 1171 unspecified, uses nullid as the only root. If 'heads' is
1171 1172 unspecified, uses list of all of the revlog's heads."""
1172 1173 nonodes = ([], [], [])
1173 1174 if roots is not None:
1174 1175 roots = list(roots)
1175 1176 if not roots:
1176 1177 return nonodes
1177 1178 lowestrev = min([self.rev(n) for n in roots])
1178 1179 else:
1179 1180 roots = [nullid] # Everybody's a descendant of nullid
1180 1181 lowestrev = nullrev
1181 1182 if (lowestrev == nullrev) and (heads is None):
1182 1183 # We want _all_ the nodes!
1183 1184 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1184 1185 if heads is None:
1185 1186 # All nodes are ancestors, so the latest ancestor is the last
1186 1187 # node.
1187 1188 highestrev = len(self) - 1
1188 1189 # Set ancestors to None to signal that every node is an ancestor.
1189 1190 ancestors = None
1190 1191 # Set heads to an empty dictionary for later discovery of heads
1191 1192 heads = {}
1192 1193 else:
1193 1194 heads = list(heads)
1194 1195 if not heads:
1195 1196 return nonodes
1196 1197 ancestors = set()
1197 1198 # Turn heads into a dictionary so we can remove 'fake' heads.
1198 1199 # Also, later we will be using it to filter out the heads we can't
1199 1200 # find from roots.
1200 1201 heads = dict.fromkeys(heads, False)
1201 1202 # Start at the top and keep marking parents until we're done.
1202 1203 nodestotag = set(heads)
1203 1204 # Remember where the top was so we can use it as a limit later.
1204 1205 highestrev = max([self.rev(n) for n in nodestotag])
1205 1206 while nodestotag:
1206 1207 # grab a node to tag
1207 1208 n = nodestotag.pop()
1208 1209 # Never tag nullid
1209 1210 if n == nullid:
1210 1211 continue
1211 1212 # A node's revision number represents its place in a
1212 1213 # topologically sorted list of nodes.
1213 1214 r = self.rev(n)
1214 1215 if r >= lowestrev:
1215 1216 if n not in ancestors:
1216 1217 # If we are possibly a descendant of one of the roots
1217 1218 # and we haven't already been marked as an ancestor
1218 1219 ancestors.add(n) # Mark as ancestor
1219 1220 # Add non-nullid parents to list of nodes to tag.
1220 1221 nodestotag.update([p for p in self.parents(n) if
1221 1222 p != nullid])
1222 1223 elif n in heads: # We've seen it before, is it a fake head?
1223 1224 # So it is, real heads should not be the ancestors of
1224 1225 # any other heads.
1225 1226 heads.pop(n)
1226 1227 if not ancestors:
1227 1228 return nonodes
1228 1229 # Now that we have our set of ancestors, we want to remove any
1229 1230 # roots that are not ancestors.
1230 1231
1231 1232 # If one of the roots was nullid, everything is included anyway.
1232 1233 if lowestrev > nullrev:
1233 1234 # But, since we weren't, let's recompute the lowest rev to not
1234 1235 # include roots that aren't ancestors.
1235 1236
1236 1237 # Filter out roots that aren't ancestors of heads
1237 1238 roots = [root for root in roots if root in ancestors]
1238 1239 # Recompute the lowest revision
1239 1240 if roots:
1240 1241 lowestrev = min([self.rev(root) for root in roots])
1241 1242 else:
1242 1243 # No more roots? Return empty list
1243 1244 return nonodes
1244 1245 else:
1245 1246 # We are descending from nullid, and don't need to care about
1246 1247 # any other roots.
1247 1248 lowestrev = nullrev
1248 1249 roots = [nullid]
1249 1250 # Transform our roots list into a set.
1250 1251 descendants = set(roots)
1251 1252 # Also, keep the original roots so we can filter out roots that aren't
1252 1253 # 'real' roots (i.e. are descended from other roots).
1253 1254 roots = descendants.copy()
1254 1255 # Our topologically sorted list of output nodes.
1255 1256 orderedout = []
1256 1257 # Don't start at nullid since we don't want nullid in our output list,
1257 1258 # and if nullid shows up in descendants, empty parents will look like
1258 1259 # they're descendants.
1259 1260 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1260 1261 n = self.node(r)
1261 1262 isdescendant = False
1262 1263 if lowestrev == nullrev: # Everybody is a descendant of nullid
1263 1264 isdescendant = True
1264 1265 elif n in descendants:
1265 1266 # n is already a descendant
1266 1267 isdescendant = True
1267 1268 # This check only needs to be done here because all the roots
1268 1269 # will start being marked is descendants before the loop.
1269 1270 if n in roots:
1270 1271 # If n was a root, check if it's a 'real' root.
1271 1272 p = tuple(self.parents(n))
1272 1273 # If any of its parents are descendants, it's not a root.
1273 1274 if (p[0] in descendants) or (p[1] in descendants):
1274 1275 roots.remove(n)
1275 1276 else:
1276 1277 p = tuple(self.parents(n))
1277 1278 # A node is a descendant if either of its parents are
1278 1279 # descendants. (We seeded the dependents list with the roots
1279 1280 # up there, remember?)
1280 1281 if (p[0] in descendants) or (p[1] in descendants):
1281 1282 descendants.add(n)
1282 1283 isdescendant = True
1283 1284 if isdescendant and ((ancestors is None) or (n in ancestors)):
1284 1285 # Only include nodes that are both descendants and ancestors.
1285 1286 orderedout.append(n)
1286 1287 if (ancestors is not None) and (n in heads):
1287 1288 # We're trying to figure out which heads are reachable
1288 1289 # from roots.
1289 1290 # Mark this head as having been reached
1290 1291 heads[n] = True
1291 1292 elif ancestors is None:
1292 1293 # Otherwise, we're trying to discover the heads.
1293 1294 # Assume this is a head because if it isn't, the next step
1294 1295 # will eventually remove it.
1295 1296 heads[n] = True
1296 1297 # But, obviously its parents aren't.
1297 1298 for p in self.parents(n):
1298 1299 heads.pop(p, None)
1299 1300 heads = [head for head, flag in heads.iteritems() if flag]
1300 1301 roots = list(roots)
1301 1302 assert orderedout
1302 1303 assert roots
1303 1304 assert heads
1304 1305 return (orderedout, roots, heads)
1305 1306
1306 1307 def headrevs(self):
1307 1308 try:
1308 1309 return self.index.headrevs()
1309 1310 except AttributeError:
1310 1311 return self._headrevs()
1311 1312
1312 1313 def computephases(self, roots):
1313 1314 return self.index.computephasesmapsets(roots)
1314 1315
1315 1316 def _headrevs(self):
1316 1317 count = len(self)
1317 1318 if not count:
1318 1319 return [nullrev]
1319 1320 # we won't iter over filtered rev so nobody is a head at start
1320 1321 ishead = [0] * (count + 1)
1321 1322 index = self.index
1322 1323 for r in self:
1323 1324 ishead[r] = 1 # I may be an head
1324 1325 e = index[r]
1325 1326 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1326 1327 return [r for r, val in enumerate(ishead) if val]
1327 1328
1328 1329 def heads(self, start=None, stop=None):
1329 1330 """return the list of all nodes that have no children
1330 1331
1331 1332 if start is specified, only heads that are descendants of
1332 1333 start will be returned
1333 1334 if stop is specified, it will consider all the revs from stop
1334 1335 as if they had no children
1335 1336 """
1336 1337 if start is None and stop is None:
1337 1338 if not len(self):
1338 1339 return [nullid]
1339 1340 return [self.node(r) for r in self.headrevs()]
1340 1341
1341 1342 if start is None:
1342 1343 start = nullid
1343 1344 if stop is None:
1344 1345 stop = []
1345 1346 stoprevs = set([self.rev(n) for n in stop])
1346 1347 startrev = self.rev(start)
1347 1348 reachable = {startrev}
1348 1349 heads = {startrev}
1349 1350
1350 1351 parentrevs = self.parentrevs
1351 1352 for r in self.revs(start=startrev + 1):
1352 1353 for p in parentrevs(r):
1353 1354 if p in reachable:
1354 1355 if r not in stoprevs:
1355 1356 reachable.add(r)
1356 1357 heads.add(r)
1357 1358 if p in heads and p not in stoprevs:
1358 1359 heads.remove(p)
1359 1360
1360 1361 return [self.node(r) for r in heads]
1361 1362
1362 1363 def children(self, node):
1363 1364 """find the children of a given node"""
1364 1365 c = []
1365 1366 p = self.rev(node)
1366 1367 for r in self.revs(start=p + 1):
1367 1368 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1368 1369 if prevs:
1369 1370 for pr in prevs:
1370 1371 if pr == p:
1371 1372 c.append(self.node(r))
1372 1373 elif p == nullrev:
1373 1374 c.append(self.node(r))
1374 1375 return c
1375 1376
1376 1377 def descendant(self, start, end):
1377 1378 if start == nullrev:
1378 1379 return True
1379 1380 for i in self.descendants([start]):
1380 1381 if i == end:
1381 1382 return True
1382 1383 elif i > end:
1383 1384 break
1384 1385 return False
1385 1386
1386 1387 def commonancestorsheads(self, a, b):
1387 1388 """calculate all the heads of the common ancestors of nodes a and b"""
1388 1389 a, b = self.rev(a), self.rev(b)
1389 1390 try:
1390 1391 ancs = self.index.commonancestorsheads(a, b)
1391 1392 except (AttributeError, OverflowError): # C implementation failed
1392 1393 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1393 1394 return pycompat.maplist(self.node, ancs)
1394 1395
1395 1396 def isancestor(self, a, b):
1396 1397 """return True if node a is an ancestor of node b
1397 1398
1398 1399 The implementation of this is trivial but the use of
1399 1400 commonancestorsheads is not."""
1400 1401 return a in self.commonancestorsheads(a, b)
1401 1402
1402 1403 def ancestor(self, a, b):
1403 1404 """calculate the "best" common ancestor of nodes a and b"""
1404 1405
1405 1406 a, b = self.rev(a), self.rev(b)
1406 1407 try:
1407 1408 ancs = self.index.ancestors(a, b)
1408 1409 except (AttributeError, OverflowError):
1409 1410 ancs = ancestor.ancestors(self.parentrevs, a, b)
1410 1411 if ancs:
1411 1412 # choose a consistent winner when there's a tie
1412 1413 return min(map(self.node, ancs))
1413 1414 return nullid
1414 1415
1415 1416 def _match(self, id):
1416 1417 if isinstance(id, int):
1417 1418 # rev
1418 1419 return self.node(id)
1419 1420 if len(id) == 20:
1420 1421 # possibly a binary node
1421 1422 # odds of a binary node being all hex in ASCII are 1 in 10**25
1422 1423 try:
1423 1424 node = id
1424 1425 self.rev(node) # quick search the index
1425 1426 return node
1426 1427 except LookupError:
1427 1428 pass # may be partial hex id
1428 1429 try:
1429 1430 # str(rev)
1430 1431 rev = int(id)
1431 1432 if "%d" % rev != id:
1432 1433 raise ValueError
1433 1434 if rev < 0:
1434 1435 rev = len(self) + rev
1435 1436 if rev < 0 or rev >= len(self):
1436 1437 raise ValueError
1437 1438 return self.node(rev)
1438 1439 except (ValueError, OverflowError):
1439 1440 pass
1440 1441 if len(id) == 40:
1441 1442 try:
1442 1443 # a full hex nodeid?
1443 1444 node = bin(id)
1444 1445 self.rev(node)
1445 1446 return node
1446 1447 except (TypeError, LookupError):
1447 1448 pass
1448 1449
1449 1450 def _partialmatch(self, id):
1450 1451 # we don't care wdirfilenodeids as they should be always full hash
1451 1452 maybewdir = wdirhex.startswith(id)
1452 1453 try:
1453 1454 partial = self.index.partialmatch(id)
1454 1455 if partial and self.hasnode(partial):
1455 1456 if maybewdir:
1456 1457 # single 'ff...' match in radix tree, ambiguous with wdir
1457 1458 raise RevlogError
1458 1459 return partial
1459 1460 if maybewdir:
1460 1461 # no 'ff...' match in radix tree, wdir identified
1461 1462 raise error.WdirUnsupported
1462 1463 return None
1463 1464 except RevlogError:
1464 1465 # parsers.c radix tree lookup gave multiple matches
1465 1466 # fast path: for unfiltered changelog, radix tree is accurate
1466 1467 if not getattr(self, 'filteredrevs', None):
1467 1468 raise LookupError(id, self.indexfile,
1468 1469 _('ambiguous identifier'))
1469 1470 # fall through to slow path that filters hidden revisions
1470 1471 except (AttributeError, ValueError):
1471 1472 # we are pure python, or key was too short to search radix tree
1472 1473 pass
1473 1474
1474 1475 if id in self._pcache:
1475 1476 return self._pcache[id]
1476 1477
1477 1478 if len(id) <= 40:
1478 1479 try:
1479 1480 # hex(node)[:...]
1480 1481 l = len(id) // 2 # grab an even number of digits
1481 1482 prefix = bin(id[:l * 2])
1482 1483 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1483 1484 nl = [n for n in nl if hex(n).startswith(id) and
1484 1485 self.hasnode(n)]
1485 1486 if len(nl) > 0:
1486 1487 if len(nl) == 1 and not maybewdir:
1487 1488 self._pcache[id] = nl[0]
1488 1489 return nl[0]
1489 1490 raise LookupError(id, self.indexfile,
1490 1491 _('ambiguous identifier'))
1491 1492 if maybewdir:
1492 1493 raise error.WdirUnsupported
1493 1494 return None
1494 1495 except TypeError:
1495 1496 pass
1496 1497
1497 1498 def lookup(self, id):
1498 1499 """locate a node based on:
1499 1500 - revision number or str(revision number)
1500 1501 - nodeid or subset of hex nodeid
1501 1502 """
1502 1503 n = self._match(id)
1503 1504 if n is not None:
1504 1505 return n
1505 1506 n = self._partialmatch(id)
1506 1507 if n:
1507 1508 return n
1508 1509
1509 1510 raise LookupError(id, self.indexfile, _('no match found'))
1510 1511
1511 1512 def shortest(self, node, minlength=1):
1512 1513 """Find the shortest unambiguous prefix that matches node."""
1513 1514 def isvalid(prefix):
1514 1515 try:
1515 1516 node = self._partialmatch(prefix)
1516 1517 except error.RevlogError:
1517 1518 return False
1518 1519 except error.WdirUnsupported:
1519 1520 # single 'ff...' match
1520 1521 return True
1521 1522 if node is None:
1522 1523 raise LookupError(node, self.indexfile, _('no node'))
1523 1524 return True
1524 1525
1525 1526 def maybewdir(prefix):
1526 1527 return all(c == 'f' for c in prefix)
1527 1528
1528 1529 hexnode = hex(node)
1529 1530
1530 1531 def disambiguate(hexnode, minlength):
1531 1532 """Disambiguate against wdirid."""
1532 1533 for length in range(minlength, 41):
1533 1534 prefix = hexnode[:length]
1534 1535 if not maybewdir(prefix):
1535 1536 return prefix
1536 1537
1537 1538 if not getattr(self, 'filteredrevs', None):
1538 1539 try:
1539 1540 length = max(self.index.shortest(node), minlength)
1540 1541 return disambiguate(hexnode, length)
1541 1542 except RevlogError:
1542 1543 if node != wdirid:
1543 1544 raise LookupError(node, self.indexfile, _('no node'))
1544 1545 except AttributeError:
1545 1546 # Fall through to pure code
1546 1547 pass
1547 1548
1548 1549 if node == wdirid:
1549 1550 for length in range(minlength, 41):
1550 1551 prefix = hexnode[:length]
1551 1552 if isvalid(prefix):
1552 1553 return prefix
1553 1554
1554 1555 for length in range(minlength, 41):
1555 1556 prefix = hexnode[:length]
1556 1557 if isvalid(prefix):
1557 1558 return disambiguate(hexnode, length)
1558 1559
1559 1560 def cmp(self, node, text):
1560 1561 """compare text with a given file revision
1561 1562
1562 1563 returns True if text is different than what is stored.
1563 1564 """
1564 1565 p1, p2 = self.parents(node)
1565 1566 return hash(text, p1, p2) != node
1566 1567
1567 1568 def _cachesegment(self, offset, data):
1568 1569 """Add a segment to the revlog cache.
1569 1570
1570 1571 Accepts an absolute offset and the data that is at that location.
1571 1572 """
1572 1573 o, d = self._chunkcache
1573 1574 # try to add to existing cache
1574 1575 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1575 1576 self._chunkcache = o, d + data
1576 1577 else:
1577 1578 self._chunkcache = offset, data
1578 1579
1579 1580 def _readsegment(self, offset, length, df=None):
1580 1581 """Load a segment of raw data from the revlog.
1581 1582
1582 1583 Accepts an absolute offset, length to read, and an optional existing
1583 1584 file handle to read from.
1584 1585
1585 1586 If an existing file handle is passed, it will be seeked and the
1586 1587 original seek position will NOT be restored.
1587 1588
1588 1589 Returns a str or buffer of raw byte data.
1589 1590 """
1590 1591 # Cache data both forward and backward around the requested
1591 1592 # data, in a fixed size window. This helps speed up operations
1592 1593 # involving reading the revlog backwards.
1593 1594 cachesize = self._chunkcachesize
1594 1595 realoffset = offset & ~(cachesize - 1)
1595 1596 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1596 1597 - realoffset)
1597 1598 with self._datareadfp(df) as df:
1598 1599 df.seek(realoffset)
1599 1600 d = df.read(reallength)
1600 1601 self._cachesegment(realoffset, d)
1601 1602 if offset != realoffset or reallength != length:
1602 1603 return util.buffer(d, offset - realoffset, length)
1603 1604 return d
1604 1605
1605 1606 def _getsegment(self, offset, length, df=None):
1606 1607 """Obtain a segment of raw data from the revlog.
1607 1608
1608 1609 Accepts an absolute offset, length of bytes to obtain, and an
1609 1610 optional file handle to the already-opened revlog. If the file
1610 1611 handle is used, it's original seek position will not be preserved.
1611 1612
1612 1613 Requests for data may be returned from a cache.
1613 1614
1614 1615 Returns a str or a buffer instance of raw byte data.
1615 1616 """
1616 1617 o, d = self._chunkcache
1617 1618 l = len(d)
1618 1619
1619 1620 # is it in the cache?
1620 1621 cachestart = offset - o
1621 1622 cacheend = cachestart + length
1622 1623 if cachestart >= 0 and cacheend <= l:
1623 1624 if cachestart == 0 and cacheend == l:
1624 1625 return d # avoid a copy
1625 1626 return util.buffer(d, cachestart, cacheend - cachestart)
1626 1627
1627 1628 return self._readsegment(offset, length, df=df)
1628 1629
1629 1630 def _getsegmentforrevs(self, startrev, endrev, df=None):
1630 1631 """Obtain a segment of raw data corresponding to a range of revisions.
1631 1632
1632 1633 Accepts the start and end revisions and an optional already-open
1633 1634 file handle to be used for reading. If the file handle is read, its
1634 1635 seek position will not be preserved.
1635 1636
1636 1637 Requests for data may be satisfied by a cache.
1637 1638
1638 1639 Returns a 2-tuple of (offset, data) for the requested range of
1639 1640 revisions. Offset is the integer offset from the beginning of the
1640 1641 revlog and data is a str or buffer of the raw byte data.
1641 1642
1642 1643 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1643 1644 to determine where each revision's data begins and ends.
1644 1645 """
1645 1646 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1646 1647 # (functions are expensive).
1647 1648 index = self.index
1648 1649 istart = index[startrev]
1649 1650 start = int(istart[0] >> 16)
1650 1651 if startrev == endrev:
1651 1652 end = start + istart[1]
1652 1653 else:
1653 1654 iend = index[endrev]
1654 1655 end = int(iend[0] >> 16) + iend[1]
1655 1656
1656 1657 if self._inline:
1657 1658 start += (startrev + 1) * self._io.size
1658 1659 end += (endrev + 1) * self._io.size
1659 1660 length = end - start
1660 1661
1661 1662 return start, self._getsegment(start, length, df=df)
1662 1663
1663 1664 def _chunk(self, rev, df=None):
1664 1665 """Obtain a single decompressed chunk for a revision.
1665 1666
1666 1667 Accepts an integer revision and an optional already-open file handle
1667 1668 to be used for reading. If used, the seek position of the file will not
1668 1669 be preserved.
1669 1670
1670 1671 Returns a str holding uncompressed data for the requested revision.
1671 1672 """
1672 1673 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1673 1674
1674 1675 def _chunks(self, revs, df=None):
1675 1676 """Obtain decompressed chunks for the specified revisions.
1676 1677
1677 1678 Accepts an iterable of numeric revisions that are assumed to be in
1678 1679 ascending order. Also accepts an optional already-open file handle
1679 1680 to be used for reading. If used, the seek position of the file will
1680 1681 not be preserved.
1681 1682
1682 1683 This function is similar to calling ``self._chunk()`` multiple times,
1683 1684 but is faster.
1684 1685
1685 1686 Returns a list with decompressed data for each requested revision.
1686 1687 """
1687 1688 if not revs:
1688 1689 return []
1689 1690 start = self.start
1690 1691 length = self.length
1691 1692 inline = self._inline
1692 1693 iosize = self._io.size
1693 1694 buffer = util.buffer
1694 1695
1695 1696 l = []
1696 1697 ladd = l.append
1697 1698
1698 1699 if not self._withsparseread:
1699 1700 slicedchunks = (revs,)
1700 1701 else:
1701 1702 slicedchunks = _slicechunk(self, revs)
1702 1703
1703 1704 for revschunk in slicedchunks:
1704 1705 firstrev = revschunk[0]
1705 1706 # Skip trailing revisions with empty diff
1706 1707 for lastrev in revschunk[::-1]:
1707 1708 if length(lastrev) != 0:
1708 1709 break
1709 1710
1710 1711 try:
1711 1712 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1712 1713 except OverflowError:
1713 1714 # issue4215 - we can't cache a run of chunks greater than
1714 1715 # 2G on Windows
1715 1716 return [self._chunk(rev, df=df) for rev in revschunk]
1716 1717
1717 1718 decomp = self.decompress
1718 1719 for rev in revschunk:
1719 1720 chunkstart = start(rev)
1720 1721 if inline:
1721 1722 chunkstart += (rev + 1) * iosize
1722 1723 chunklength = length(rev)
1723 1724 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1724 1725
1725 1726 return l
1726 1727
1727 1728 def _chunkclear(self):
1728 1729 """Clear the raw chunk cache."""
1729 1730 self._chunkcache = (0, '')
1730 1731
1731 1732 def deltaparent(self, rev):
1732 1733 """return deltaparent of the given revision"""
1733 1734 base = self.index[rev][3]
1734 1735 if base == rev:
1735 1736 return nullrev
1736 1737 elif self._generaldelta:
1737 1738 return base
1738 1739 else:
1739 1740 return rev - 1
1740 1741
1741 1742 def revdiff(self, rev1, rev2):
1742 1743 """return or calculate a delta between two revisions
1743 1744
1744 1745 The delta calculated is in binary form and is intended to be written to
1745 1746 revlog data directly. So this function needs raw revision data.
1746 1747 """
1747 1748 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1748 1749 return bytes(self._chunk(rev2))
1749 1750
1750 1751 return mdiff.textdiff(self.revision(rev1, raw=True),
1751 1752 self.revision(rev2, raw=True))
1752 1753
1753 1754 def revision(self, nodeorrev, _df=None, raw=False):
1754 1755 """return an uncompressed revision of a given node or revision
1755 1756 number.
1756 1757
1757 1758 _df - an existing file handle to read from. (internal-only)
1758 1759 raw - an optional argument specifying if the revision data is to be
1759 1760 treated as raw data when applying flag transforms. 'raw' should be set
1760 1761 to True when generating changegroups or in debug commands.
1761 1762 """
1762 1763 if isinstance(nodeorrev, int):
1763 1764 rev = nodeorrev
1764 1765 node = self.node(rev)
1765 1766 else:
1766 1767 node = nodeorrev
1767 1768 rev = None
1768 1769
1769 1770 cachedrev = None
1770 1771 flags = None
1771 1772 rawtext = None
1772 1773 if node == nullid:
1773 1774 return ""
1774 1775 if self._cache:
1775 1776 if self._cache[0] == node:
1776 1777 # _cache only stores rawtext
1777 1778 if raw:
1778 1779 return self._cache[2]
1779 1780 # duplicated, but good for perf
1780 1781 if rev is None:
1781 1782 rev = self.rev(node)
1782 1783 if flags is None:
1783 1784 flags = self.flags(rev)
1784 1785 # no extra flags set, no flag processor runs, text = rawtext
1785 1786 if flags == REVIDX_DEFAULT_FLAGS:
1786 1787 return self._cache[2]
1787 1788 # rawtext is reusable. need to run flag processor
1788 1789 rawtext = self._cache[2]
1789 1790
1790 1791 cachedrev = self._cache[1]
1791 1792
1792 1793 # look up what we need to read
1793 1794 if rawtext is None:
1794 1795 if rev is None:
1795 1796 rev = self.rev(node)
1796 1797
1797 1798 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1798 1799 if stopped:
1799 1800 rawtext = self._cache[2]
1800 1801
1801 1802 # drop cache to save memory
1802 1803 self._cache = None
1803 1804
1804 1805 bins = self._chunks(chain, df=_df)
1805 1806 if rawtext is None:
1806 1807 rawtext = bytes(bins[0])
1807 1808 bins = bins[1:]
1808 1809
1809 1810 rawtext = mdiff.patches(rawtext, bins)
1810 1811 self._cache = (node, rev, rawtext)
1811 1812
1812 1813 if flags is None:
1813 1814 if rev is None:
1814 1815 rev = self.rev(node)
1815 1816 flags = self.flags(rev)
1816 1817
1817 1818 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1818 1819 if validatehash:
1819 1820 self.checkhash(text, node, rev=rev)
1820 1821
1821 1822 return text
1822 1823
1823 1824 def hash(self, text, p1, p2):
1824 1825 """Compute a node hash.
1825 1826
1826 1827 Available as a function so that subclasses can replace the hash
1827 1828 as needed.
1828 1829 """
1829 1830 return hash(text, p1, p2)
1830 1831
1831 1832 def _processflags(self, text, flags, operation, raw=False):
1832 1833 """Inspect revision data flags and applies transforms defined by
1833 1834 registered flag processors.
1834 1835
1835 1836 ``text`` - the revision data to process
1836 1837 ``flags`` - the revision flags
1837 1838 ``operation`` - the operation being performed (read or write)
1838 1839 ``raw`` - an optional argument describing if the raw transform should be
1839 1840 applied.
1840 1841
1841 1842 This method processes the flags in the order (or reverse order if
1842 1843 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1843 1844 flag processors registered for present flags. The order of flags defined
1844 1845 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1845 1846
1846 1847 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1847 1848 processed text and ``validatehash`` is a bool indicating whether the
1848 1849 returned text should be checked for hash integrity.
1849 1850
1850 1851 Note: If the ``raw`` argument is set, it has precedence over the
1851 1852 operation and will only update the value of ``validatehash``.
1852 1853 """
1853 1854 # fast path: no flag processors will run
1854 1855 if flags == 0:
1855 1856 return text, True
1856 1857 if not operation in ('read', 'write'):
1857 1858 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1858 1859 # Check all flags are known.
1859 1860 if flags & ~REVIDX_KNOWN_FLAGS:
1860 1861 raise RevlogError(_("incompatible revision flag '%#x'") %
1861 1862 (flags & ~REVIDX_KNOWN_FLAGS))
1862 1863 validatehash = True
1863 1864 # Depending on the operation (read or write), the order might be
1864 1865 # reversed due to non-commutative transforms.
1865 1866 orderedflags = REVIDX_FLAGS_ORDER
1866 1867 if operation == 'write':
1867 1868 orderedflags = reversed(orderedflags)
1868 1869
1869 1870 for flag in orderedflags:
1870 1871 # If a flagprocessor has been registered for a known flag, apply the
1871 1872 # related operation transform and update result tuple.
1872 1873 if flag & flags:
1873 1874 vhash = True
1874 1875
1875 1876 if flag not in _flagprocessors:
1876 1877 message = _("missing processor for flag '%#x'") % (flag)
1877 1878 raise RevlogError(message)
1878 1879
1879 1880 processor = _flagprocessors[flag]
1880 1881 if processor is not None:
1881 1882 readtransform, writetransform, rawtransform = processor
1882 1883
1883 1884 if raw:
1884 1885 vhash = rawtransform(self, text)
1885 1886 elif operation == 'read':
1886 1887 text, vhash = readtransform(self, text)
1887 1888 else: # write operation
1888 1889 text, vhash = writetransform(self, text)
1889 1890 validatehash = validatehash and vhash
1890 1891
1891 1892 return text, validatehash
1892 1893
1893 1894 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1894 1895 """Check node hash integrity.
1895 1896
1896 1897 Available as a function so that subclasses can extend hash mismatch
1897 1898 behaviors as needed.
1898 1899 """
1899 1900 try:
1900 1901 if p1 is None and p2 is None:
1901 1902 p1, p2 = self.parents(node)
1902 1903 if node != self.hash(text, p1, p2):
1903 1904 revornode = rev
1904 1905 if revornode is None:
1905 1906 revornode = templatefilters.short(hex(node))
1906 1907 raise RevlogError(_("integrity check failed on %s:%s")
1907 1908 % (self.indexfile, pycompat.bytestr(revornode)))
1908 1909 except RevlogError:
1909 1910 if self._censorable and _censoredtext(text):
1910 1911 raise error.CensoredNodeError(self.indexfile, node, text)
1911 1912 raise
1912 1913
1913 1914 def _enforceinlinesize(self, tr, fp=None):
1914 1915 """Check if the revlog is too big for inline and convert if so.
1915 1916
1916 1917 This should be called after revisions are added to the revlog. If the
1917 1918 revlog has grown too large to be an inline revlog, it will convert it
1918 1919 to use multiple index and data files.
1919 1920 """
1920 1921 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1921 1922 return
1922 1923
1923 1924 trinfo = tr.find(self.indexfile)
1924 1925 if trinfo is None:
1925 1926 raise RevlogError(_("%s not found in the transaction")
1926 1927 % self.indexfile)
1927 1928
1928 1929 trindex = trinfo[2]
1929 1930 if trindex is not None:
1930 1931 dataoff = self.start(trindex)
1931 1932 else:
1932 1933 # revlog was stripped at start of transaction, use all leftover data
1933 1934 trindex = len(self) - 1
1934 1935 dataoff = self.end(-2)
1935 1936
1936 1937 tr.add(self.datafile, dataoff)
1937 1938
1938 1939 if fp:
1939 1940 fp.flush()
1940 1941 fp.close()
1941 1942
1942 1943 with self._datafp('w') as df:
1943 1944 for r in self:
1944 1945 df.write(self._getsegmentforrevs(r, r)[1])
1945 1946
1946 1947 with self._indexfp('w') as fp:
1947 1948 self.version &= ~FLAG_INLINE_DATA
1948 1949 self._inline = False
1949 1950 io = self._io
1950 1951 for i in self:
1951 1952 e = io.packentry(self.index[i], self.node, self.version, i)
1952 1953 fp.write(e)
1953 1954
1954 1955 # the temp file replace the real index when we exit the context
1955 1956 # manager
1956 1957
1957 1958 tr.replace(self.indexfile, trindex * self._io.size)
1958 1959 self._chunkclear()
1959 1960
1960 1961 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1961 1962 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1962 1963 """add a revision to the log
1963 1964
1964 1965 text - the revision data to add
1965 1966 transaction - the transaction object used for rollback
1966 1967 link - the linkrev data to add
1967 1968 p1, p2 - the parent nodeids of the revision
1968 1969 cachedelta - an optional precomputed delta
1969 1970 node - nodeid of revision; typically node is not specified, and it is
1970 1971 computed by default as hash(text, p1, p2), however subclasses might
1971 1972 use different hashing method (and override checkhash() in such case)
1972 1973 flags - the known flags to set on the revision
1973 1974 deltacomputer - an optional _deltacomputer instance shared between
1974 1975 multiple calls
1975 1976 """
1976 1977 if link == nullrev:
1977 1978 raise RevlogError(_("attempted to add linkrev -1 to %s")
1978 1979 % self.indexfile)
1979 1980
1980 1981 if flags:
1981 1982 node = node or self.hash(text, p1, p2)
1982 1983
1983 1984 rawtext, validatehash = self._processflags(text, flags, 'write')
1984 1985
1985 1986 # If the flag processor modifies the revision data, ignore any provided
1986 1987 # cachedelta.
1987 1988 if rawtext != text:
1988 1989 cachedelta = None
1989 1990
1990 1991 if len(rawtext) > _maxentrysize:
1991 1992 raise RevlogError(
1992 1993 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1993 1994 % (self.indexfile, len(rawtext)))
1994 1995
1995 1996 node = node or self.hash(rawtext, p1, p2)
1996 1997 if node in self.nodemap:
1997 1998 return node
1998 1999
1999 2000 if validatehash:
2000 2001 self.checkhash(rawtext, node, p1=p1, p2=p2)
2001 2002
2002 2003 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2003 2004 flags, cachedelta=cachedelta,
2004 2005 deltacomputer=deltacomputer)
2005 2006
2006 2007 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2007 2008 cachedelta=None, deltacomputer=None):
2008 2009 """add a raw revision with known flags, node and parents
2009 2010 useful when reusing a revision not stored in this revlog (ex: received
2010 2011 over wire, or read from an external bundle).
2011 2012 """
2012 2013 dfh = None
2013 2014 if not self._inline:
2014 2015 dfh = self._datafp("a+")
2015 2016 ifh = self._indexfp("a+")
2016 2017 try:
2017 2018 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2018 2019 flags, cachedelta, ifh, dfh,
2019 2020 deltacomputer=deltacomputer)
2020 2021 finally:
2021 2022 if dfh:
2022 2023 dfh.close()
2023 2024 ifh.close()
2024 2025
2025 2026 def compress(self, data):
2026 2027 """Generate a possibly-compressed representation of data."""
2027 2028 if not data:
2028 2029 return '', data
2029 2030
2030 2031 compressed = self._compressor.compress(data)
2031 2032
2032 2033 if compressed:
2033 2034 # The revlog compressor added the header in the returned data.
2034 2035 return '', compressed
2035 2036
2036 2037 if data[0:1] == '\0':
2037 2038 return '', data
2038 2039 return 'u', data
2039 2040
2040 2041 def decompress(self, data):
2041 2042 """Decompress a revlog chunk.
2042 2043
2043 2044 The chunk is expected to begin with a header identifying the
2044 2045 format type so it can be routed to an appropriate decompressor.
2045 2046 """
2046 2047 if not data:
2047 2048 return data
2048 2049
2049 2050 # Revlogs are read much more frequently than they are written and many
2050 2051 # chunks only take microseconds to decompress, so performance is
2051 2052 # important here.
2052 2053 #
2053 2054 # We can make a few assumptions about revlogs:
2054 2055 #
2055 2056 # 1) the majority of chunks will be compressed (as opposed to inline
2056 2057 # raw data).
2057 2058 # 2) decompressing *any* data will likely by at least 10x slower than
2058 2059 # returning raw inline data.
2059 2060 # 3) we want to prioritize common and officially supported compression
2060 2061 # engines
2061 2062 #
2062 2063 # It follows that we want to optimize for "decompress compressed data
2063 2064 # when encoded with common and officially supported compression engines"
2064 2065 # case over "raw data" and "data encoded by less common or non-official
2065 2066 # compression engines." That is why we have the inline lookup first
2066 2067 # followed by the compengines lookup.
2067 2068 #
2068 2069 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2069 2070 # compressed chunks. And this matters for changelog and manifest reads.
2070 2071 t = data[0:1]
2071 2072
2072 2073 if t == 'x':
2073 2074 try:
2074 2075 return _zlibdecompress(data)
2075 2076 except zlib.error as e:
2076 2077 raise RevlogError(_('revlog decompress error: %s') %
2077 2078 stringutil.forcebytestr(e))
2078 2079 # '\0' is more common than 'u' so it goes first.
2079 2080 elif t == '\0':
2080 2081 return data
2081 2082 elif t == 'u':
2082 2083 return util.buffer(data, 1)
2083 2084
2084 2085 try:
2085 2086 compressor = self._decompressors[t]
2086 2087 except KeyError:
2087 2088 try:
2088 2089 engine = util.compengines.forrevlogheader(t)
2089 2090 compressor = engine.revlogcompressor()
2090 2091 self._decompressors[t] = compressor
2091 2092 except KeyError:
2092 2093 raise RevlogError(_('unknown compression type %r') % t)
2093 2094
2094 2095 return compressor.decompress(data)
2095 2096
2096 2097 def _isgooddeltainfo(self, deltainfo, revinfo):
2097 2098 """Returns True if the given delta is good. Good means that it is within
2098 2099 the disk span, disk size, and chain length bounds that we know to be
2099 2100 performant."""
2100 2101 if deltainfo is None:
2101 2102 return False
2102 2103
2103 2104 # - 'deltainfo.distance' is the distance from the base revision --
2104 2105 # bounding it limits the amount of I/O we need to do.
2105 2106 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2106 2107 # deltas we need to apply -- bounding it limits the amount of CPU
2107 2108 # we consume.
2108 2109
2109 2110 textlen = revinfo.textlen
2110 2111 defaultmax = textlen * 4
2111 2112 maxdist = self._maxdeltachainspan
2112 2113 if not maxdist:
2113 2114 maxdist = deltainfo.distance # ensure the conditional pass
2114 2115 maxdist = max(maxdist, defaultmax)
2115 2116 if (deltainfo.distance > maxdist or deltainfo.deltalen > textlen or
2116 2117 deltainfo.compresseddeltalen > textlen * 2 or
2117 2118 (self._maxchainlen and deltainfo.chainlen > self._maxchainlen)):
2118 2119 return False
2119 2120
2120 2121 return True
2121 2122
2122 2123 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2123 2124 cachedelta, ifh, dfh, alwayscache=False,
2124 2125 deltacomputer=None):
2125 2126 """internal function to add revisions to the log
2126 2127
2127 2128 see addrevision for argument descriptions.
2128 2129
2129 2130 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2130 2131
2131 2132 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2132 2133 be used.
2133 2134
2134 2135 invariants:
2135 2136 - rawtext is optional (can be None); if not set, cachedelta must be set.
2136 2137 if both are set, they must correspond to each other.
2137 2138 """
2138 2139 if node == nullid:
2139 2140 raise RevlogError(_("%s: attempt to add null revision") %
2140 2141 (self.indexfile))
2141 2142 if node == wdirid or node in wdirfilenodeids:
2142 2143 raise RevlogError(_("%s: attempt to add wdir revision") %
2143 2144 (self.indexfile))
2144 2145
2145 2146 if self._inline:
2146 2147 fh = ifh
2147 2148 else:
2148 2149 fh = dfh
2149 2150
2150 2151 btext = [rawtext]
2151 2152
2152 2153 curr = len(self)
2153 2154 prev = curr - 1
2154 2155 offset = self.end(prev)
2155 2156 p1r, p2r = self.rev(p1), self.rev(p2)
2156 2157
2157 2158 # full versions are inserted when the needed deltas
2158 2159 # become comparable to the uncompressed text
2159 2160 if rawtext is None:
2160 2161 # need rawtext size, before changed by flag processors, which is
2161 2162 # the non-raw size. use revlog explicitly to avoid filelog's extra
2162 2163 # logic that might remove metadata size.
2163 2164 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2164 2165 cachedelta[1])
2165 2166 else:
2166 2167 textlen = len(rawtext)
2167 2168
2168 2169 if deltacomputer is None:
2169 2170 deltacomputer = _deltacomputer(self)
2170 2171
2171 2172 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2172 2173
2173 2174 # no delta for flag processor revision (see "candelta" for why)
2174 2175 # not calling candelta since only one revision needs test, also to
2175 2176 # avoid overhead fetching flags again.
2176 2177 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2177 2178 deltainfo = None
2178 2179 else:
2179 2180 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2180 2181
2181 2182 if deltainfo is not None:
2182 2183 base = deltainfo.base
2183 2184 chainbase = deltainfo.chainbase
2184 2185 data = deltainfo.data
2185 2186 l = deltainfo.deltalen
2186 2187 else:
2187 2188 rawtext = deltacomputer.buildtext(revinfo, fh)
2188 2189 data = self.compress(rawtext)
2189 2190 l = len(data[1]) + len(data[0])
2190 2191 base = chainbase = curr
2191 2192
2192 2193 e = (offset_type(offset, flags), l, textlen,
2193 2194 base, link, p1r, p2r, node)
2194 2195 self.index.insert(-1, e)
2195 2196 self.nodemap[node] = curr
2196 2197
2197 2198 entry = self._io.packentry(e, self.node, self.version, curr)
2198 2199 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2199 2200
2200 2201 if alwayscache and rawtext is None:
2201 2202 rawtext = deltacomputer._buildtext(revinfo, fh)
2202 2203
2203 2204 if type(rawtext) == bytes: # only accept immutable objects
2204 2205 self._cache = (node, curr, rawtext)
2205 2206 self._chainbasecache[curr] = chainbase
2206 2207 return node
2207 2208
2208 2209 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2209 2210 # Files opened in a+ mode have inconsistent behavior on various
2210 2211 # platforms. Windows requires that a file positioning call be made
2211 2212 # when the file handle transitions between reads and writes. See
2212 2213 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2213 2214 # platforms, Python or the platform itself can be buggy. Some versions
2214 2215 # of Solaris have been observed to not append at the end of the file
2215 2216 # if the file was seeked to before the end. See issue4943 for more.
2216 2217 #
2217 2218 # We work around this issue by inserting a seek() before writing.
2218 2219 # Note: This is likely not necessary on Python 3.
2219 2220 ifh.seek(0, os.SEEK_END)
2220 2221 if dfh:
2221 2222 dfh.seek(0, os.SEEK_END)
2222 2223
2223 2224 curr = len(self) - 1
2224 2225 if not self._inline:
2225 2226 transaction.add(self.datafile, offset)
2226 2227 transaction.add(self.indexfile, curr * len(entry))
2227 2228 if data[0]:
2228 2229 dfh.write(data[0])
2229 2230 dfh.write(data[1])
2230 2231 ifh.write(entry)
2231 2232 else:
2232 2233 offset += curr * self._io.size
2233 2234 transaction.add(self.indexfile, offset, curr)
2234 2235 ifh.write(entry)
2235 2236 ifh.write(data[0])
2236 2237 ifh.write(data[1])
2237 2238 self._enforceinlinesize(transaction, ifh)
2238 2239
2239 2240 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2240 2241 """
2241 2242 add a delta group
2242 2243
2243 2244 given a set of deltas, add them to the revision log. the
2244 2245 first delta is against its parent, which should be in our
2245 2246 log, the rest are against the previous delta.
2246 2247
2247 2248 If ``addrevisioncb`` is defined, it will be called with arguments of
2248 2249 this revlog and the node that was added.
2249 2250 """
2250 2251
2251 2252 nodes = []
2252 2253
2253 2254 r = len(self)
2254 2255 end = 0
2255 2256 if r:
2256 2257 end = self.end(r - 1)
2257 2258 ifh = self._indexfp("a+")
2258 2259 isize = r * self._io.size
2259 2260 if self._inline:
2260 2261 transaction.add(self.indexfile, end + isize, r)
2261 2262 dfh = None
2262 2263 else:
2263 2264 transaction.add(self.indexfile, isize, r)
2264 2265 transaction.add(self.datafile, end)
2265 2266 dfh = self._datafp("a+")
2266 2267 def flush():
2267 2268 if dfh:
2268 2269 dfh.flush()
2269 2270 ifh.flush()
2270 2271 try:
2271 2272 deltacomputer = _deltacomputer(self)
2272 2273 # loop through our set of deltas
2273 2274 for data in deltas:
2274 2275 node, p1, p2, linknode, deltabase, delta, flags = data
2275 2276 link = linkmapper(linknode)
2276 2277 flags = flags or REVIDX_DEFAULT_FLAGS
2277 2278
2278 2279 nodes.append(node)
2279 2280
2280 2281 if node in self.nodemap:
2281 2282 # this can happen if two branches make the same change
2282 2283 continue
2283 2284
2284 2285 for p in (p1, p2):
2285 2286 if p not in self.nodemap:
2286 2287 raise LookupError(p, self.indexfile,
2287 2288 _('unknown parent'))
2288 2289
2289 2290 if deltabase not in self.nodemap:
2290 2291 raise LookupError(deltabase, self.indexfile,
2291 2292 _('unknown delta base'))
2292 2293
2293 2294 baserev = self.rev(deltabase)
2294 2295
2295 2296 if baserev != nullrev and self.iscensored(baserev):
2296 2297 # if base is censored, delta must be full replacement in a
2297 2298 # single patch operation
2298 2299 hlen = struct.calcsize(">lll")
2299 2300 oldlen = self.rawsize(baserev)
2300 2301 newlen = len(delta) - hlen
2301 2302 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2302 2303 raise error.CensoredBaseError(self.indexfile,
2303 2304 self.node(baserev))
2304 2305
2305 2306 if not flags and self._peek_iscensored(baserev, delta, flush):
2306 2307 flags |= REVIDX_ISCENSORED
2307 2308
2308 2309 # We assume consumers of addrevisioncb will want to retrieve
2309 2310 # the added revision, which will require a call to
2310 2311 # revision(). revision() will fast path if there is a cache
2311 2312 # hit. So, we tell _addrevision() to always cache in this case.
2312 2313 # We're only using addgroup() in the context of changegroup
2313 2314 # generation so the revision data can always be handled as raw
2314 2315 # by the flagprocessor.
2315 2316 self._addrevision(node, None, transaction, link,
2316 2317 p1, p2, flags, (baserev, delta),
2317 2318 ifh, dfh,
2318 2319 alwayscache=bool(addrevisioncb),
2319 2320 deltacomputer=deltacomputer)
2320 2321
2321 2322 if addrevisioncb:
2322 2323 addrevisioncb(self, node)
2323 2324
2324 2325 if not dfh and not self._inline:
2325 2326 # addrevision switched from inline to conventional
2326 2327 # reopen the index
2327 2328 ifh.close()
2328 2329 dfh = self._datafp("a+")
2329 2330 ifh = self._indexfp("a+")
2330 2331 finally:
2331 2332 if dfh:
2332 2333 dfh.close()
2333 2334 ifh.close()
2334 2335
2335 2336 return nodes
2336 2337
2337 2338 def iscensored(self, rev):
2338 2339 """Check if a file revision is censored."""
2339 2340 if not self._censorable:
2340 2341 return False
2341 2342
2342 2343 return self.flags(rev) & REVIDX_ISCENSORED
2343 2344
2344 2345 def _peek_iscensored(self, baserev, delta, flush):
2345 2346 """Quickly check if a delta produces a censored revision."""
2346 2347 if not self._censorable:
2347 2348 return False
2348 2349
2349 2350 # Fragile heuristic: unless new file meta keys are added alphabetically
2350 2351 # preceding "censored", all censored revisions are prefixed by
2351 2352 # "\1\ncensored:". A delta producing such a censored revision must be a
2352 2353 # full-replacement delta, so we inspect the first and only patch in the
2353 2354 # delta for this prefix.
2354 2355 hlen = struct.calcsize(">lll")
2355 2356 if len(delta) <= hlen:
2356 2357 return False
2357 2358
2358 2359 oldlen = self.rawsize(baserev)
2359 2360 newlen = len(delta) - hlen
2360 2361 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2361 2362 return False
2362 2363
2363 2364 add = "\1\ncensored:"
2364 2365 addlen = len(add)
2365 2366 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2366 2367
2367 2368 def getstrippoint(self, minlink):
2368 2369 """find the minimum rev that must be stripped to strip the linkrev
2369 2370
2370 2371 Returns a tuple containing the minimum rev and a set of all revs that
2371 2372 have linkrevs that will be broken by this strip.
2372 2373 """
2373 2374 brokenrevs = set()
2374 2375 strippoint = len(self)
2375 2376
2376 2377 heads = {}
2377 2378 futurelargelinkrevs = set()
2378 2379 for head in self.headrevs():
2379 2380 headlinkrev = self.linkrev(head)
2380 2381 heads[head] = headlinkrev
2381 2382 if headlinkrev >= minlink:
2382 2383 futurelargelinkrevs.add(headlinkrev)
2383 2384
2384 2385 # This algorithm involves walking down the rev graph, starting at the
2385 2386 # heads. Since the revs are topologically sorted according to linkrev,
2386 2387 # once all head linkrevs are below the minlink, we know there are
2387 2388 # no more revs that could have a linkrev greater than minlink.
2388 2389 # So we can stop walking.
2389 2390 while futurelargelinkrevs:
2390 2391 strippoint -= 1
2391 2392 linkrev = heads.pop(strippoint)
2392 2393
2393 2394 if linkrev < minlink:
2394 2395 brokenrevs.add(strippoint)
2395 2396 else:
2396 2397 futurelargelinkrevs.remove(linkrev)
2397 2398
2398 2399 for p in self.parentrevs(strippoint):
2399 2400 if p != nullrev:
2400 2401 plinkrev = self.linkrev(p)
2401 2402 heads[p] = plinkrev
2402 2403 if plinkrev >= minlink:
2403 2404 futurelargelinkrevs.add(plinkrev)
2404 2405
2405 2406 return strippoint, brokenrevs
2406 2407
2407 2408 def strip(self, minlink, transaction):
2408 2409 """truncate the revlog on the first revision with a linkrev >= minlink
2409 2410
2410 2411 This function is called when we're stripping revision minlink and
2411 2412 its descendants from the repository.
2412 2413
2413 2414 We have to remove all revisions with linkrev >= minlink, because
2414 2415 the equivalent changelog revisions will be renumbered after the
2415 2416 strip.
2416 2417
2417 2418 So we truncate the revlog on the first of these revisions, and
2418 2419 trust that the caller has saved the revisions that shouldn't be
2419 2420 removed and that it'll re-add them after this truncation.
2420 2421 """
2421 2422 if len(self) == 0:
2422 2423 return
2423 2424
2424 2425 rev, _ = self.getstrippoint(minlink)
2425 2426 if rev == len(self):
2426 2427 return
2427 2428
2428 2429 # first truncate the files on disk
2429 2430 end = self.start(rev)
2430 2431 if not self._inline:
2431 2432 transaction.add(self.datafile, end)
2432 2433 end = rev * self._io.size
2433 2434 else:
2434 2435 end += rev * self._io.size
2435 2436
2436 2437 transaction.add(self.indexfile, end)
2437 2438
2438 2439 # then reset internal state in memory to forget those revisions
2439 2440 self._cache = None
2440 2441 self._chaininfocache = {}
2441 2442 self._chunkclear()
2442 2443 for x in xrange(rev, len(self)):
2443 2444 del self.nodemap[self.node(x)]
2444 2445
2445 2446 del self.index[rev:-1]
2446 2447 self._nodepos = None
2447 2448
2448 2449 def checksize(self):
2449 2450 expected = 0
2450 2451 if len(self):
2451 2452 expected = max(0, self.end(len(self) - 1))
2452 2453
2453 2454 try:
2454 2455 with self._datafp() as f:
2455 2456 f.seek(0, 2)
2456 2457 actual = f.tell()
2457 2458 dd = actual - expected
2458 2459 except IOError as inst:
2459 2460 if inst.errno != errno.ENOENT:
2460 2461 raise
2461 2462 dd = 0
2462 2463
2463 2464 try:
2464 2465 f = self.opener(self.indexfile)
2465 2466 f.seek(0, 2)
2466 2467 actual = f.tell()
2467 2468 f.close()
2468 2469 s = self._io.size
2469 2470 i = max(0, actual // s)
2470 2471 di = actual - (i * s)
2471 2472 if self._inline:
2472 2473 databytes = 0
2473 2474 for r in self:
2474 2475 databytes += max(0, self.length(r))
2475 2476 dd = 0
2476 2477 di = actual - len(self) * s - databytes
2477 2478 except IOError as inst:
2478 2479 if inst.errno != errno.ENOENT:
2479 2480 raise
2480 2481 di = 0
2481 2482
2482 2483 return (dd, di)
2483 2484
2484 2485 def files(self):
2485 2486 res = [self.indexfile]
2486 2487 if not self._inline:
2487 2488 res.append(self.datafile)
2488 2489 return res
2489 2490
2490 2491 DELTAREUSEALWAYS = 'always'
2491 2492 DELTAREUSESAMEREVS = 'samerevs'
2492 2493 DELTAREUSENEVER = 'never'
2493 2494
2494 2495 DELTAREUSEFULLADD = 'fulladd'
2495 2496
2496 2497 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2497 2498
2498 2499 def clone(self, tr, destrevlog, addrevisioncb=None,
2499 2500 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2500 2501 """Copy this revlog to another, possibly with format changes.
2501 2502
2502 2503 The destination revlog will contain the same revisions and nodes.
2503 2504 However, it may not be bit-for-bit identical due to e.g. delta encoding
2504 2505 differences.
2505 2506
2506 2507 The ``deltareuse`` argument control how deltas from the existing revlog
2507 2508 are preserved in the destination revlog. The argument can have the
2508 2509 following values:
2509 2510
2510 2511 DELTAREUSEALWAYS
2511 2512 Deltas will always be reused (if possible), even if the destination
2512 2513 revlog would not select the same revisions for the delta. This is the
2513 2514 fastest mode of operation.
2514 2515 DELTAREUSESAMEREVS
2515 2516 Deltas will be reused if the destination revlog would pick the same
2516 2517 revisions for the delta. This mode strikes a balance between speed
2517 2518 and optimization.
2518 2519 DELTAREUSENEVER
2519 2520 Deltas will never be reused. This is the slowest mode of execution.
2520 2521 This mode can be used to recompute deltas (e.g. if the diff/delta
2521 2522 algorithm changes).
2522 2523
2523 2524 Delta computation can be slow, so the choice of delta reuse policy can
2524 2525 significantly affect run time.
2525 2526
2526 2527 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2527 2528 two extremes. Deltas will be reused if they are appropriate. But if the
2528 2529 delta could choose a better revision, it will do so. This means if you
2529 2530 are converting a non-generaldelta revlog to a generaldelta revlog,
2530 2531 deltas will be recomputed if the delta's parent isn't a parent of the
2531 2532 revision.
2532 2533
2533 2534 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2534 2535 controls whether to compute deltas against both parents for merges.
2535 2536 By default, the current default is used.
2536 2537 """
2537 2538 if deltareuse not in self.DELTAREUSEALL:
2538 2539 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2539 2540
2540 2541 if len(destrevlog):
2541 2542 raise ValueError(_('destination revlog is not empty'))
2542 2543
2543 2544 if getattr(self, 'filteredrevs', None):
2544 2545 raise ValueError(_('source revlog has filtered revisions'))
2545 2546 if getattr(destrevlog, 'filteredrevs', None):
2546 2547 raise ValueError(_('destination revlog has filtered revisions'))
2547 2548
2548 2549 # lazydeltabase controls whether to reuse a cached delta, if possible.
2549 2550 oldlazydeltabase = destrevlog._lazydeltabase
2550 2551 oldamd = destrevlog._aggressivemergedeltas
2551 2552
2552 2553 try:
2553 2554 if deltareuse == self.DELTAREUSEALWAYS:
2554 2555 destrevlog._lazydeltabase = True
2555 2556 elif deltareuse == self.DELTAREUSESAMEREVS:
2556 2557 destrevlog._lazydeltabase = False
2557 2558
2558 2559 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2559 2560
2560 2561 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2561 2562 self.DELTAREUSESAMEREVS)
2562 2563
2563 2564 deltacomputer = _deltacomputer(destrevlog)
2564 2565 index = self.index
2565 2566 for rev in self:
2566 2567 entry = index[rev]
2567 2568
2568 2569 # Some classes override linkrev to take filtered revs into
2569 2570 # account. Use raw entry from index.
2570 2571 flags = entry[0] & 0xffff
2571 2572 linkrev = entry[4]
2572 2573 p1 = index[entry[5]][7]
2573 2574 p2 = index[entry[6]][7]
2574 2575 node = entry[7]
2575 2576
2576 2577 # (Possibly) reuse the delta from the revlog if allowed and
2577 2578 # the revlog chunk is a delta.
2578 2579 cachedelta = None
2579 2580 rawtext = None
2580 2581 if populatecachedelta:
2581 2582 dp = self.deltaparent(rev)
2582 2583 if dp != nullrev:
2583 2584 cachedelta = (dp, bytes(self._chunk(rev)))
2584 2585
2585 2586 if not cachedelta:
2586 2587 rawtext = self.revision(rev, raw=True)
2587 2588
2588 2589
2589 2590 if deltareuse == self.DELTAREUSEFULLADD:
2590 2591 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2591 2592 cachedelta=cachedelta,
2592 2593 node=node, flags=flags,
2593 2594 deltacomputer=deltacomputer)
2594 2595 else:
2595 2596 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2596 2597 checkambig=False)
2597 2598 dfh = None
2598 2599 if not destrevlog._inline:
2599 2600 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2600 2601 try:
2601 2602 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2602 2603 p2, flags, cachedelta, ifh, dfh,
2603 2604 deltacomputer=deltacomputer)
2604 2605 finally:
2605 2606 if dfh:
2606 2607 dfh.close()
2607 2608 ifh.close()
2608 2609
2609 2610 if addrevisioncb:
2610 2611 addrevisioncb(self, rev, node)
2611 2612 finally:
2612 2613 destrevlog._lazydeltabase = oldlazydeltabase
2613 2614 destrevlog._aggressivemergedeltas = oldamd
General Comments 0
You need to be logged in to leave comments. Login now