##// END OF EJS Templates
revlog: efficient implementation of 'descendant'...
Boris Feld -
r38532:6db38c9d default
parent child Browse files
Show More
@@ -1,2622 +1,2620 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import hashlib
20 20 import heapq
21 21 import os
22 22 import re
23 23 import struct
24 24 import zlib
25 25
26 26 # import stuff from node for others to import from revlog
27 27 from .node import (
28 28 bin,
29 29 hex,
30 30 nullid,
31 31 nullrev,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .thirdparty import (
39 39 attr,
40 40 )
41 41 from . import (
42 42 ancestor,
43 43 error,
44 44 mdiff,
45 45 policy,
46 46 pycompat,
47 47 templatefilters,
48 48 util,
49 49 )
50 50 from .utils import (
51 51 stringutil,
52 52 )
53 53
54 54 parsers = policy.importmod(r'parsers')
55 55
56 56 # Aliased for performance.
57 57 _zlibdecompress = zlib.decompress
58 58
59 59 # revlog header flags
60 60 REVLOGV0 = 0
61 61 REVLOGV1 = 1
62 62 # Dummy value until file format is finalized.
63 63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
64 64 REVLOGV2 = 0xDEAD
65 65 FLAG_INLINE_DATA = (1 << 16)
66 66 FLAG_GENERALDELTA = (1 << 17)
67 67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
68 68 REVLOG_DEFAULT_FORMAT = REVLOGV1
69 69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
70 70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
71 71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
72 72
73 73 # revlog index flags
74 74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
75 75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
76 76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
77 77 REVIDX_DEFAULT_FLAGS = 0
78 78 # stable order in which flags need to be processed and their processors applied
79 79 REVIDX_FLAGS_ORDER = [
80 80 REVIDX_ISCENSORED,
81 81 REVIDX_ELLIPSIS,
82 82 REVIDX_EXTSTORED,
83 83 ]
84 84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
85 85 # bitmark for flags that could cause rawdata content change
86 86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
87 87
88 88 # max size of revlog with inline data
89 89 _maxinline = 131072
90 90 _chunksize = 1048576
91 91
92 92 RevlogError = error.RevlogError
93 93 LookupError = error.LookupError
94 94 CensoredNodeError = error.CensoredNodeError
95 95 ProgrammingError = error.ProgrammingError
96 96
97 97 # Store flag processors (cf. 'addflagprocessor()' to register)
98 98 _flagprocessors = {
99 99 REVIDX_ISCENSORED: None,
100 100 }
101 101
102 102 _mdre = re.compile('\1\n')
103 103 def parsemeta(text):
104 104 """return (metadatadict, metadatasize)"""
105 105 # text can be buffer, so we can't use .startswith or .index
106 106 if text[:2] != '\1\n':
107 107 return None, None
108 108 s = _mdre.search(text, 2).start()
109 109 mtext = text[2:s]
110 110 meta = {}
111 111 for l in mtext.splitlines():
112 112 k, v = l.split(": ", 1)
113 113 meta[k] = v
114 114 return meta, (s + 2)
115 115
116 116 def packmeta(meta, text):
117 117 keys = sorted(meta)
118 118 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
119 119 return "\1\n%s\1\n%s" % (metatext, text)
120 120
121 121 def _censoredtext(text):
122 122 m, offs = parsemeta(text)
123 123 return m and "censored" in m
124 124
125 125 def addflagprocessor(flag, processor):
126 126 """Register a flag processor on a revision data flag.
127 127
128 128 Invariant:
129 129 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
130 130 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
131 131 - Only one flag processor can be registered on a specific flag.
132 132 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
133 133 following signatures:
134 134 - (read) f(self, rawtext) -> text, bool
135 135 - (write) f(self, text) -> rawtext, bool
136 136 - (raw) f(self, rawtext) -> bool
137 137 "text" is presented to the user. "rawtext" is stored in revlog data, not
138 138 directly visible to the user.
139 139 The boolean returned by these transforms is used to determine whether
140 140 the returned text can be used for hash integrity checking. For example,
141 141 if "write" returns False, then "text" is used to generate hash. If
142 142 "write" returns True, that basically means "rawtext" returned by "write"
143 143 should be used to generate hash. Usually, "write" and "read" return
144 144 different booleans. And "raw" returns a same boolean as "write".
145 145
146 146 Note: The 'raw' transform is used for changegroup generation and in some
147 147 debug commands. In this case the transform only indicates whether the
148 148 contents can be used for hash integrity checks.
149 149 """
150 150 if not flag & REVIDX_KNOWN_FLAGS:
151 151 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
152 152 raise ProgrammingError(msg)
153 153 if flag not in REVIDX_FLAGS_ORDER:
154 154 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
155 155 raise ProgrammingError(msg)
156 156 if flag in _flagprocessors:
157 157 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
158 158 raise error.Abort(msg)
159 159 _flagprocessors[flag] = processor
160 160
161 161 def getoffset(q):
162 162 return int(q >> 16)
163 163
164 164 def gettype(q):
165 165 return int(q & 0xFFFF)
166 166
167 167 def offset_type(offset, type):
168 168 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
169 169 raise ValueError('unknown revlog index flags')
170 170 return int(int(offset) << 16 | type)
171 171
172 172 _nullhash = hashlib.sha1(nullid)
173 173
174 174 def hash(text, p1, p2):
175 175 """generate a hash from the given text and its parent hashes
176 176
177 177 This hash combines both the current file contents and its history
178 178 in a manner that makes it easy to distinguish nodes with the same
179 179 content in the revision graph.
180 180 """
181 181 # As of now, if one of the parent node is null, p2 is null
182 182 if p2 == nullid:
183 183 # deep copy of a hash is faster than creating one
184 184 s = _nullhash.copy()
185 185 s.update(p1)
186 186 else:
187 187 # none of the parent nodes are nullid
188 188 if p1 < p2:
189 189 a = p1
190 190 b = p2
191 191 else:
192 192 a = p2
193 193 b = p1
194 194 s = hashlib.sha1(a)
195 195 s.update(b)
196 196 s.update(text)
197 197 return s.digest()
198 198
199 199 def _trimchunk(revlog, revs, startidx, endidx=None):
200 200 """returns revs[startidx:endidx] without empty trailing revs
201 201 """
202 202 length = revlog.length
203 203
204 204 if endidx is None:
205 205 endidx = len(revs)
206 206
207 207 # Trim empty revs at the end, but never the very first revision of a chain
208 208 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
209 209 endidx -= 1
210 210
211 211 return revs[startidx:endidx]
212 212
213 213 def _slicechunk(revlog, revs):
214 214 """slice revs to reduce the amount of unrelated data to be read from disk.
215 215
216 216 ``revs`` is sliced into groups that should be read in one time.
217 217 Assume that revs are sorted.
218 218 """
219 219 start = revlog.start
220 220 length = revlog.length
221 221
222 222 if len(revs) <= 1:
223 223 yield revs
224 224 return
225 225
226 226 startbyte = start(revs[0])
227 227 endbyte = start(revs[-1]) + length(revs[-1])
228 228 readdata = deltachainspan = endbyte - startbyte
229 229
230 230 chainpayload = sum(length(r) for r in revs)
231 231
232 232 if deltachainspan:
233 233 density = chainpayload / float(deltachainspan)
234 234 else:
235 235 density = 1.0
236 236
237 237 # Store the gaps in a heap to have them sorted by decreasing size
238 238 gapsheap = []
239 239 heapq.heapify(gapsheap)
240 240 prevend = None
241 241 for i, rev in enumerate(revs):
242 242 revstart = start(rev)
243 243 revlen = length(rev)
244 244
245 245 # Skip empty revisions to form larger holes
246 246 if revlen == 0:
247 247 continue
248 248
249 249 if prevend is not None:
250 250 gapsize = revstart - prevend
251 251 # only consider holes that are large enough
252 252 if gapsize > revlog._srmingapsize:
253 253 heapq.heappush(gapsheap, (-gapsize, i))
254 254
255 255 prevend = revstart + revlen
256 256
257 257 # Collect the indices of the largest holes until the density is acceptable
258 258 indicesheap = []
259 259 heapq.heapify(indicesheap)
260 260 while gapsheap and density < revlog._srdensitythreshold:
261 261 oppgapsize, gapidx = heapq.heappop(gapsheap)
262 262
263 263 heapq.heappush(indicesheap, gapidx)
264 264
265 265 # the gap sizes are stored as negatives to be sorted decreasingly
266 266 # by the heap
267 267 readdata -= (-oppgapsize)
268 268 if readdata > 0:
269 269 density = chainpayload / float(readdata)
270 270 else:
271 271 density = 1.0
272 272
273 273 # Cut the revs at collected indices
274 274 previdx = 0
275 275 while indicesheap:
276 276 idx = heapq.heappop(indicesheap)
277 277
278 278 chunk = _trimchunk(revlog, revs, previdx, idx)
279 279 if chunk:
280 280 yield chunk
281 281
282 282 previdx = idx
283 283
284 284 chunk = _trimchunk(revlog, revs, previdx)
285 285 if chunk:
286 286 yield chunk
287 287
288 288 @attr.s(slots=True, frozen=True)
289 289 class _deltainfo(object):
290 290 distance = attr.ib()
291 291 deltalen = attr.ib()
292 292 data = attr.ib()
293 293 base = attr.ib()
294 294 chainbase = attr.ib()
295 295 chainlen = attr.ib()
296 296 compresseddeltalen = attr.ib()
297 297
298 298 class _deltacomputer(object):
299 299 def __init__(self, revlog):
300 300 self.revlog = revlog
301 301
302 302 def _getcandidaterevs(self, p1, p2, cachedelta):
303 303 """
304 304 Provides revisions that present an interest to be diffed against,
305 305 grouped by level of easiness.
306 306 """
307 307 revlog = self.revlog
308 308 gdelta = revlog._generaldelta
309 309 curr = len(revlog)
310 310 prev = curr - 1
311 311 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
312 312
313 313 # should we try to build a delta?
314 314 if prev != nullrev and revlog.storedeltachains:
315 315 tested = set()
316 316 # This condition is true most of the time when processing
317 317 # changegroup data into a generaldelta repo. The only time it
318 318 # isn't true is if this is the first revision in a delta chain
319 319 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
320 320 if cachedelta and gdelta and revlog._lazydeltabase:
321 321 # Assume what we received from the server is a good choice
322 322 # build delta will reuse the cache
323 323 yield (cachedelta[0],)
324 324 tested.add(cachedelta[0])
325 325
326 326 if gdelta:
327 327 # exclude already lazy tested base if any
328 328 parents = [p for p in (p1r, p2r)
329 329 if p != nullrev and p not in tested]
330 330
331 331 if not revlog._aggressivemergedeltas and len(parents) == 2:
332 332 parents.sort()
333 333 # To minimize the chance of having to build a fulltext,
334 334 # pick first whichever parent is closest to us (max rev)
335 335 yield (parents[1],)
336 336 # then the other one (min rev) if the first did not fit
337 337 yield (parents[0],)
338 338 tested.update(parents)
339 339 elif len(parents) > 0:
340 340 # Test all parents (1 or 2), and keep the best candidate
341 341 yield parents
342 342 tested.update(parents)
343 343
344 344 if prev not in tested:
345 345 # other approach failed try against prev to hopefully save us a
346 346 # fulltext.
347 347 yield (prev,)
348 348 tested.add(prev)
349 349
350 350 def buildtext(self, revinfo, fh):
351 351 """Builds a fulltext version of a revision
352 352
353 353 revinfo: _revisioninfo instance that contains all needed info
354 354 fh: file handle to either the .i or the .d revlog file,
355 355 depending on whether it is inlined or not
356 356 """
357 357 btext = revinfo.btext
358 358 if btext[0] is not None:
359 359 return btext[0]
360 360
361 361 revlog = self.revlog
362 362 cachedelta = revinfo.cachedelta
363 363 flags = revinfo.flags
364 364 node = revinfo.node
365 365
366 366 baserev = cachedelta[0]
367 367 delta = cachedelta[1]
368 368 # special case deltas which replace entire base; no need to decode
369 369 # base revision. this neatly avoids censored bases, which throw when
370 370 # they're decoded.
371 371 hlen = struct.calcsize(">lll")
372 372 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
373 373 len(delta) - hlen):
374 374 btext[0] = delta[hlen:]
375 375 else:
376 376 # deltabase is rawtext before changed by flag processors, which is
377 377 # equivalent to non-raw text
378 378 basetext = revlog.revision(baserev, _df=fh, raw=False)
379 379 btext[0] = mdiff.patch(basetext, delta)
380 380
381 381 try:
382 382 res = revlog._processflags(btext[0], flags, 'read', raw=True)
383 383 btext[0], validatehash = res
384 384 if validatehash:
385 385 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
386 386 if flags & REVIDX_ISCENSORED:
387 387 raise RevlogError(_('node %s is not censored') % node)
388 388 except CensoredNodeError:
389 389 # must pass the censored index flag to add censored revisions
390 390 if not flags & REVIDX_ISCENSORED:
391 391 raise
392 392 return btext[0]
393 393
394 394 def _builddeltadiff(self, base, revinfo, fh):
395 395 revlog = self.revlog
396 396 t = self.buildtext(revinfo, fh)
397 397 if revlog.iscensored(base):
398 398 # deltas based on a censored revision must replace the
399 399 # full content in one patch, so delta works everywhere
400 400 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
401 401 delta = header + t
402 402 else:
403 403 ptext = revlog.revision(base, _df=fh, raw=True)
404 404 delta = mdiff.textdiff(ptext, t)
405 405
406 406 return delta
407 407
408 408 def _builddeltainfo(self, revinfo, base, fh):
409 409 # can we use the cached delta?
410 410 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
411 411 delta = revinfo.cachedelta[1]
412 412 else:
413 413 delta = self._builddeltadiff(base, revinfo, fh)
414 414 revlog = self.revlog
415 415 header, data = revlog.compress(delta)
416 416 deltalen = len(header) + len(data)
417 417 chainbase = revlog.chainbase(base)
418 418 offset = revlog.end(len(revlog) - 1)
419 419 dist = deltalen + offset - revlog.start(chainbase)
420 420 if revlog._generaldelta:
421 421 deltabase = base
422 422 else:
423 423 deltabase = chainbase
424 424 chainlen, compresseddeltalen = revlog._chaininfo(base)
425 425 chainlen += 1
426 426 compresseddeltalen += deltalen
427 427 return _deltainfo(dist, deltalen, (header, data), deltabase,
428 428 chainbase, chainlen, compresseddeltalen)
429 429
430 430 def finddeltainfo(self, revinfo, fh):
431 431 """Find an acceptable delta against a candidate revision
432 432
433 433 revinfo: information about the revision (instance of _revisioninfo)
434 434 fh: file handle to either the .i or the .d revlog file,
435 435 depending on whether it is inlined or not
436 436
437 437 Returns the first acceptable candidate revision, as ordered by
438 438 _getcandidaterevs
439 439 """
440 440 cachedelta = revinfo.cachedelta
441 441 p1 = revinfo.p1
442 442 p2 = revinfo.p2
443 443 revlog = self.revlog
444 444
445 445 deltainfo = None
446 446 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
447 447 nominateddeltas = []
448 448 for candidaterev in candidaterevs:
449 449 # no delta for rawtext-changing revs (see "candelta" for why)
450 450 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
451 451 continue
452 452 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
453 453 if revlog._isgooddeltainfo(candidatedelta, revinfo):
454 454 nominateddeltas.append(candidatedelta)
455 455 if nominateddeltas:
456 456 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
457 457 break
458 458
459 459 return deltainfo
460 460
461 461 @attr.s(slots=True, frozen=True)
462 462 class _revisioninfo(object):
463 463 """Information about a revision that allows building its fulltext
464 464 node: expected hash of the revision
465 465 p1, p2: parent revs of the revision
466 466 btext: built text cache consisting of a one-element list
467 467 cachedelta: (baserev, uncompressed_delta) or None
468 468 flags: flags associated to the revision storage
469 469
470 470 One of btext[0] or cachedelta must be set.
471 471 """
472 472 node = attr.ib()
473 473 p1 = attr.ib()
474 474 p2 = attr.ib()
475 475 btext = attr.ib()
476 476 textlen = attr.ib()
477 477 cachedelta = attr.ib()
478 478 flags = attr.ib()
479 479
480 480 # index v0:
481 481 # 4 bytes: offset
482 482 # 4 bytes: compressed length
483 483 # 4 bytes: base rev
484 484 # 4 bytes: link rev
485 485 # 20 bytes: parent 1 nodeid
486 486 # 20 bytes: parent 2 nodeid
487 487 # 20 bytes: nodeid
488 488 indexformatv0 = struct.Struct(">4l20s20s20s")
489 489 indexformatv0_pack = indexformatv0.pack
490 490 indexformatv0_unpack = indexformatv0.unpack
491 491
492 492 class revlogoldio(object):
493 493 def __init__(self):
494 494 self.size = indexformatv0.size
495 495
496 496 def parseindex(self, data, inline):
497 497 s = self.size
498 498 index = []
499 499 nodemap = {nullid: nullrev}
500 500 n = off = 0
501 501 l = len(data)
502 502 while off + s <= l:
503 503 cur = data[off:off + s]
504 504 off += s
505 505 e = indexformatv0_unpack(cur)
506 506 # transform to revlogv1 format
507 507 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
508 508 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
509 509 index.append(e2)
510 510 nodemap[e[6]] = n
511 511 n += 1
512 512
513 513 # add the magic null revision at -1
514 514 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
515 515
516 516 return index, nodemap, None
517 517
518 518 def packentry(self, entry, node, version, rev):
519 519 if gettype(entry[0]):
520 520 raise RevlogError(_('index entry flags need revlog version 1'))
521 521 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
522 522 node(entry[5]), node(entry[6]), entry[7])
523 523 return indexformatv0_pack(*e2)
524 524
525 525 # index ng:
526 526 # 6 bytes: offset
527 527 # 2 bytes: flags
528 528 # 4 bytes: compressed length
529 529 # 4 bytes: uncompressed length
530 530 # 4 bytes: base rev
531 531 # 4 bytes: link rev
532 532 # 4 bytes: parent 1 rev
533 533 # 4 bytes: parent 2 rev
534 534 # 32 bytes: nodeid
535 535 indexformatng = struct.Struct(">Qiiiiii20s12x")
536 536 indexformatng_pack = indexformatng.pack
537 537 versionformat = struct.Struct(">I")
538 538 versionformat_pack = versionformat.pack
539 539 versionformat_unpack = versionformat.unpack
540 540
541 541 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
542 542 # signed integer)
543 543 _maxentrysize = 0x7fffffff
544 544
545 545 class revlogio(object):
546 546 def __init__(self):
547 547 self.size = indexformatng.size
548 548
549 549 def parseindex(self, data, inline):
550 550 # call the C implementation to parse the index data
551 551 index, cache = parsers.parse_index2(data, inline)
552 552 return index, getattr(index, 'nodemap', None), cache
553 553
554 554 def packentry(self, entry, node, version, rev):
555 555 p = indexformatng_pack(*entry)
556 556 if rev == 0:
557 557 p = versionformat_pack(version) + p[4:]
558 558 return p
559 559
560 560 class revlog(object):
561 561 """
562 562 the underlying revision storage object
563 563
564 564 A revlog consists of two parts, an index and the revision data.
565 565
566 566 The index is a file with a fixed record size containing
567 567 information on each revision, including its nodeid (hash), the
568 568 nodeids of its parents, the position and offset of its data within
569 569 the data file, and the revision it's based on. Finally, each entry
570 570 contains a linkrev entry that can serve as a pointer to external
571 571 data.
572 572
573 573 The revision data itself is a linear collection of data chunks.
574 574 Each chunk represents a revision and is usually represented as a
575 575 delta against the previous chunk. To bound lookup time, runs of
576 576 deltas are limited to about 2 times the length of the original
577 577 version data. This makes retrieval of a version proportional to
578 578 its size, or O(1) relative to the number of revisions.
579 579
580 580 Both pieces of the revlog are written to in an append-only
581 581 fashion, which means we never need to rewrite a file to insert or
582 582 remove data, and can use some simple techniques to avoid the need
583 583 for locking while reading.
584 584
585 585 If checkambig, indexfile is opened with checkambig=True at
586 586 writing, to avoid file stat ambiguity.
587 587
588 588 If mmaplargeindex is True, and an mmapindexthreshold is set, the
589 589 index will be mmapped rather than read if it is larger than the
590 590 configured threshold.
591 591
592 592 If censorable is True, the revlog can have censored revisions.
593 593 """
594 594 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
595 595 mmaplargeindex=False, censorable=False):
596 596 """
597 597 create a revlog object
598 598
599 599 opener is a function that abstracts the file opening operation
600 600 and can be used to implement COW semantics or the like.
601 601 """
602 602 self.indexfile = indexfile
603 603 self.datafile = datafile or (indexfile[:-2] + ".d")
604 604 self.opener = opener
605 605 # When True, indexfile is opened with checkambig=True at writing, to
606 606 # avoid file stat ambiguity.
607 607 self._checkambig = checkambig
608 608 self._censorable = censorable
609 609 # 3-tuple of (node, rev, text) for a raw revision.
610 610 self._cache = None
611 611 # Maps rev to chain base rev.
612 612 self._chainbasecache = util.lrucachedict(100)
613 613 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
614 614 self._chunkcache = (0, '')
615 615 # How much data to read and cache into the raw revlog data cache.
616 616 self._chunkcachesize = 65536
617 617 self._maxchainlen = None
618 618 self._aggressivemergedeltas = False
619 619 self.index = []
620 620 # Mapping of partial identifiers to full nodes.
621 621 self._pcache = {}
622 622 # Mapping of revision integer to full node.
623 623 self._nodecache = {nullid: nullrev}
624 624 self._nodepos = None
625 625 self._compengine = 'zlib'
626 626 self._maxdeltachainspan = -1
627 627 self._withsparseread = False
628 628 self._srdensitythreshold = 0.25
629 629 self._srmingapsize = 262144
630 630
631 631 mmapindexthreshold = None
632 632 v = REVLOG_DEFAULT_VERSION
633 633 opts = getattr(opener, 'options', None)
634 634 if opts is not None:
635 635 if 'revlogv2' in opts:
636 636 # version 2 revlogs always use generaldelta.
637 637 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
638 638 elif 'revlogv1' in opts:
639 639 if 'generaldelta' in opts:
640 640 v |= FLAG_GENERALDELTA
641 641 else:
642 642 v = 0
643 643 if 'chunkcachesize' in opts:
644 644 self._chunkcachesize = opts['chunkcachesize']
645 645 if 'maxchainlen' in opts:
646 646 self._maxchainlen = opts['maxchainlen']
647 647 if 'aggressivemergedeltas' in opts:
648 648 self._aggressivemergedeltas = opts['aggressivemergedeltas']
649 649 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
650 650 if 'compengine' in opts:
651 651 self._compengine = opts['compengine']
652 652 if 'maxdeltachainspan' in opts:
653 653 self._maxdeltachainspan = opts['maxdeltachainspan']
654 654 if mmaplargeindex and 'mmapindexthreshold' in opts:
655 655 mmapindexthreshold = opts['mmapindexthreshold']
656 656 self._withsparseread = bool(opts.get('with-sparse-read', False))
657 657 if 'sparse-read-density-threshold' in opts:
658 658 self._srdensitythreshold = opts['sparse-read-density-threshold']
659 659 if 'sparse-read-min-gap-size' in opts:
660 660 self._srmingapsize = opts['sparse-read-min-gap-size']
661 661
662 662 if self._chunkcachesize <= 0:
663 663 raise RevlogError(_('revlog chunk cache size %r is not greater '
664 664 'than 0') % self._chunkcachesize)
665 665 elif self._chunkcachesize & (self._chunkcachesize - 1):
666 666 raise RevlogError(_('revlog chunk cache size %r is not a power '
667 667 'of 2') % self._chunkcachesize)
668 668
669 669 indexdata = ''
670 670 self._initempty = True
671 671 try:
672 672 with self._indexfp() as f:
673 673 if (mmapindexthreshold is not None and
674 674 self.opener.fstat(f).st_size >= mmapindexthreshold):
675 675 indexdata = util.buffer(util.mmapread(f))
676 676 else:
677 677 indexdata = f.read()
678 678 if len(indexdata) > 0:
679 679 v = versionformat_unpack(indexdata[:4])[0]
680 680 self._initempty = False
681 681 except IOError as inst:
682 682 if inst.errno != errno.ENOENT:
683 683 raise
684 684
685 685 self.version = v
686 686 self._inline = v & FLAG_INLINE_DATA
687 687 self._generaldelta = v & FLAG_GENERALDELTA
688 688 flags = v & ~0xFFFF
689 689 fmt = v & 0xFFFF
690 690 if fmt == REVLOGV0:
691 691 if flags:
692 692 raise RevlogError(_('unknown flags (%#04x) in version %d '
693 693 'revlog %s') %
694 694 (flags >> 16, fmt, self.indexfile))
695 695 elif fmt == REVLOGV1:
696 696 if flags & ~REVLOGV1_FLAGS:
697 697 raise RevlogError(_('unknown flags (%#04x) in version %d '
698 698 'revlog %s') %
699 699 (flags >> 16, fmt, self.indexfile))
700 700 elif fmt == REVLOGV2:
701 701 if flags & ~REVLOGV2_FLAGS:
702 702 raise RevlogError(_('unknown flags (%#04x) in version %d '
703 703 'revlog %s') %
704 704 (flags >> 16, fmt, self.indexfile))
705 705 else:
706 706 raise RevlogError(_('unknown version (%d) in revlog %s') %
707 707 (fmt, self.indexfile))
708 708
709 709 self.storedeltachains = True
710 710
711 711 self._io = revlogio()
712 712 if self.version == REVLOGV0:
713 713 self._io = revlogoldio()
714 714 try:
715 715 d = self._io.parseindex(indexdata, self._inline)
716 716 except (ValueError, IndexError):
717 717 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
718 718 self.index, nodemap, self._chunkcache = d
719 719 if nodemap is not None:
720 720 self.nodemap = self._nodecache = nodemap
721 721 if not self._chunkcache:
722 722 self._chunkclear()
723 723 # revnum -> (chain-length, sum-delta-length)
724 724 self._chaininfocache = {}
725 725 # revlog header -> revlog compressor
726 726 self._decompressors = {}
727 727
728 728 @util.propertycache
729 729 def _compressor(self):
730 730 return util.compengines[self._compengine].revlogcompressor()
731 731
732 732 def _indexfp(self, mode='r'):
733 733 """file object for the revlog's index file"""
734 734 args = {r'mode': mode}
735 735 if mode != 'r':
736 736 args[r'checkambig'] = self._checkambig
737 737 if mode == 'w':
738 738 args[r'atomictemp'] = True
739 739 return self.opener(self.indexfile, **args)
740 740
741 741 def _datafp(self, mode='r'):
742 742 """file object for the revlog's data file"""
743 743 return self.opener(self.datafile, mode=mode)
744 744
745 745 @contextlib.contextmanager
746 746 def _datareadfp(self, existingfp=None):
747 747 """file object suitable to read data"""
748 748 if existingfp is not None:
749 749 yield existingfp
750 750 else:
751 751 if self._inline:
752 752 func = self._indexfp
753 753 else:
754 754 func = self._datafp
755 755 with func() as fp:
756 756 yield fp
757 757
758 758 def tip(self):
759 759 return self.node(len(self.index) - 2)
760 760 def __contains__(self, rev):
761 761 return 0 <= rev < len(self)
762 762 def __len__(self):
763 763 return len(self.index) - 1
764 764 def __iter__(self):
765 765 return iter(xrange(len(self)))
766 766 def revs(self, start=0, stop=None):
767 767 """iterate over all rev in this revlog (from start to stop)"""
768 768 step = 1
769 769 if stop is not None:
770 770 if start > stop:
771 771 step = -1
772 772 stop += step
773 773 else:
774 774 stop = len(self)
775 775 return xrange(start, stop, step)
776 776
777 777 @util.propertycache
778 778 def nodemap(self):
779 779 self.rev(self.node(0))
780 780 return self._nodecache
781 781
782 782 def hasnode(self, node):
783 783 try:
784 784 self.rev(node)
785 785 return True
786 786 except KeyError:
787 787 return False
788 788
789 789 def candelta(self, baserev, rev):
790 790 """whether two revisions (baserev, rev) can be delta-ed or not"""
791 791 # Disable delta if either rev requires a content-changing flag
792 792 # processor (ex. LFS). This is because such flag processor can alter
793 793 # the rawtext content that the delta will be based on, and two clients
794 794 # could have a same revlog node with different flags (i.e. different
795 795 # rawtext contents) and the delta could be incompatible.
796 796 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
797 797 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
798 798 return False
799 799 return True
800 800
801 801 def clearcaches(self):
802 802 self._cache = None
803 803 self._chainbasecache.clear()
804 804 self._chunkcache = (0, '')
805 805 self._pcache = {}
806 806
807 807 try:
808 808 self._nodecache.clearcaches()
809 809 except AttributeError:
810 810 self._nodecache = {nullid: nullrev}
811 811 self._nodepos = None
812 812
813 813 def rev(self, node):
814 814 try:
815 815 return self._nodecache[node]
816 816 except TypeError:
817 817 raise
818 818 except RevlogError:
819 819 # parsers.c radix tree lookup failed
820 820 if node == wdirid or node in wdirfilenodeids:
821 821 raise error.WdirUnsupported
822 822 raise LookupError(node, self.indexfile, _('no node'))
823 823 except KeyError:
824 824 # pure python cache lookup failed
825 825 n = self._nodecache
826 826 i = self.index
827 827 p = self._nodepos
828 828 if p is None:
829 829 p = len(i) - 2
830 830 else:
831 831 assert p < len(i)
832 832 for r in xrange(p, -1, -1):
833 833 v = i[r][7]
834 834 n[v] = r
835 835 if v == node:
836 836 self._nodepos = r - 1
837 837 return r
838 838 if node == wdirid or node in wdirfilenodeids:
839 839 raise error.WdirUnsupported
840 840 raise LookupError(node, self.indexfile, _('no node'))
841 841
842 842 # Accessors for index entries.
843 843
844 844 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
845 845 # are flags.
846 846 def start(self, rev):
847 847 return int(self.index[rev][0] >> 16)
848 848
849 849 def flags(self, rev):
850 850 return self.index[rev][0] & 0xFFFF
851 851
852 852 def length(self, rev):
853 853 return self.index[rev][1]
854 854
855 855 def rawsize(self, rev):
856 856 """return the length of the uncompressed text for a given revision"""
857 857 l = self.index[rev][2]
858 858 if l >= 0:
859 859 return l
860 860
861 861 t = self.revision(rev, raw=True)
862 862 return len(t)
863 863
864 864 def size(self, rev):
865 865 """length of non-raw text (processed by a "read" flag processor)"""
866 866 # fast path: if no "read" flag processor could change the content,
867 867 # size is rawsize. note: ELLIPSIS is known to not change the content.
868 868 flags = self.flags(rev)
869 869 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
870 870 return self.rawsize(rev)
871 871
872 872 return len(self.revision(rev, raw=False))
873 873
874 874 def chainbase(self, rev):
875 875 base = self._chainbasecache.get(rev)
876 876 if base is not None:
877 877 return base
878 878
879 879 index = self.index
880 880 iterrev = rev
881 881 base = index[iterrev][3]
882 882 while base != iterrev:
883 883 iterrev = base
884 884 base = index[iterrev][3]
885 885
886 886 self._chainbasecache[rev] = base
887 887 return base
888 888
889 889 def linkrev(self, rev):
890 890 return self.index[rev][4]
891 891
892 892 def parentrevs(self, rev):
893 893 try:
894 894 entry = self.index[rev]
895 895 except IndexError:
896 896 if rev == wdirrev:
897 897 raise error.WdirUnsupported
898 898 raise
899 899
900 900 return entry[5], entry[6]
901 901
902 902 def node(self, rev):
903 903 try:
904 904 return self.index[rev][7]
905 905 except IndexError:
906 906 if rev == wdirrev:
907 907 raise error.WdirUnsupported
908 908 raise
909 909
910 910 # Derived from index values.
911 911
912 912 def end(self, rev):
913 913 return self.start(rev) + self.length(rev)
914 914
915 915 def parents(self, node):
916 916 i = self.index
917 917 d = i[self.rev(node)]
918 918 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
919 919
920 920 def chainlen(self, rev):
921 921 return self._chaininfo(rev)[0]
922 922
923 923 def _chaininfo(self, rev):
924 924 chaininfocache = self._chaininfocache
925 925 if rev in chaininfocache:
926 926 return chaininfocache[rev]
927 927 index = self.index
928 928 generaldelta = self._generaldelta
929 929 iterrev = rev
930 930 e = index[iterrev]
931 931 clen = 0
932 932 compresseddeltalen = 0
933 933 while iterrev != e[3]:
934 934 clen += 1
935 935 compresseddeltalen += e[1]
936 936 if generaldelta:
937 937 iterrev = e[3]
938 938 else:
939 939 iterrev -= 1
940 940 if iterrev in chaininfocache:
941 941 t = chaininfocache[iterrev]
942 942 clen += t[0]
943 943 compresseddeltalen += t[1]
944 944 break
945 945 e = index[iterrev]
946 946 else:
947 947 # Add text length of base since decompressing that also takes
948 948 # work. For cache hits the length is already included.
949 949 compresseddeltalen += e[1]
950 950 r = (clen, compresseddeltalen)
951 951 chaininfocache[rev] = r
952 952 return r
953 953
954 954 def _deltachain(self, rev, stoprev=None):
955 955 """Obtain the delta chain for a revision.
956 956
957 957 ``stoprev`` specifies a revision to stop at. If not specified, we
958 958 stop at the base of the chain.
959 959
960 960 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
961 961 revs in ascending order and ``stopped`` is a bool indicating whether
962 962 ``stoprev`` was hit.
963 963 """
964 964 # Try C implementation.
965 965 try:
966 966 return self.index.deltachain(rev, stoprev, self._generaldelta)
967 967 except AttributeError:
968 968 pass
969 969
970 970 chain = []
971 971
972 972 # Alias to prevent attribute lookup in tight loop.
973 973 index = self.index
974 974 generaldelta = self._generaldelta
975 975
976 976 iterrev = rev
977 977 e = index[iterrev]
978 978 while iterrev != e[3] and iterrev != stoprev:
979 979 chain.append(iterrev)
980 980 if generaldelta:
981 981 iterrev = e[3]
982 982 else:
983 983 iterrev -= 1
984 984 e = index[iterrev]
985 985
986 986 if iterrev == stoprev:
987 987 stopped = True
988 988 else:
989 989 chain.append(iterrev)
990 990 stopped = False
991 991
992 992 chain.reverse()
993 993 return chain, stopped
994 994
995 995 def ancestors(self, revs, stoprev=0, inclusive=False):
996 996 """Generate the ancestors of 'revs' in reverse topological order.
997 997 Does not generate revs lower than stoprev.
998 998
999 999 See the documentation for ancestor.lazyancestors for more details."""
1000 1000
1001 1001 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1002 1002 inclusive=inclusive)
1003 1003
1004 1004 def descendants(self, revs):
1005 1005 """Generate the descendants of 'revs' in revision order.
1006 1006
1007 1007 Yield a sequence of revision numbers starting with a child of
1008 1008 some rev in revs, i.e., each revision is *not* considered a
1009 1009 descendant of itself. Results are ordered by revision number (a
1010 1010 topological sort)."""
1011 1011 first = min(revs)
1012 1012 if first == nullrev:
1013 1013 for i in self:
1014 1014 yield i
1015 1015 return
1016 1016
1017 1017 seen = set(revs)
1018 1018 for i in self.revs(start=first + 1):
1019 1019 for x in self.parentrevs(i):
1020 1020 if x != nullrev and x in seen:
1021 1021 seen.add(i)
1022 1022 yield i
1023 1023 break
1024 1024
1025 1025 def findcommonmissing(self, common=None, heads=None):
1026 1026 """Return a tuple of the ancestors of common and the ancestors of heads
1027 1027 that are not ancestors of common. In revset terminology, we return the
1028 1028 tuple:
1029 1029
1030 1030 ::common, (::heads) - (::common)
1031 1031
1032 1032 The list is sorted by revision number, meaning it is
1033 1033 topologically sorted.
1034 1034
1035 1035 'heads' and 'common' are both lists of node IDs. If heads is
1036 1036 not supplied, uses all of the revlog's heads. If common is not
1037 1037 supplied, uses nullid."""
1038 1038 if common is None:
1039 1039 common = [nullid]
1040 1040 if heads is None:
1041 1041 heads = self.heads()
1042 1042
1043 1043 common = [self.rev(n) for n in common]
1044 1044 heads = [self.rev(n) for n in heads]
1045 1045
1046 1046 # we want the ancestors, but inclusive
1047 1047 class lazyset(object):
1048 1048 def __init__(self, lazyvalues):
1049 1049 self.addedvalues = set()
1050 1050 self.lazyvalues = lazyvalues
1051 1051
1052 1052 def __contains__(self, value):
1053 1053 return value in self.addedvalues or value in self.lazyvalues
1054 1054
1055 1055 def __iter__(self):
1056 1056 added = self.addedvalues
1057 1057 for r in added:
1058 1058 yield r
1059 1059 for r in self.lazyvalues:
1060 1060 if not r in added:
1061 1061 yield r
1062 1062
1063 1063 def add(self, value):
1064 1064 self.addedvalues.add(value)
1065 1065
1066 1066 def update(self, values):
1067 1067 self.addedvalues.update(values)
1068 1068
1069 1069 has = lazyset(self.ancestors(common))
1070 1070 has.add(nullrev)
1071 1071 has.update(common)
1072 1072
1073 1073 # take all ancestors from heads that aren't in has
1074 1074 missing = set()
1075 1075 visit = collections.deque(r for r in heads if r not in has)
1076 1076 while visit:
1077 1077 r = visit.popleft()
1078 1078 if r in missing:
1079 1079 continue
1080 1080 else:
1081 1081 missing.add(r)
1082 1082 for p in self.parentrevs(r):
1083 1083 if p not in has:
1084 1084 visit.append(p)
1085 1085 missing = list(missing)
1086 1086 missing.sort()
1087 1087 return has, [self.node(miss) for miss in missing]
1088 1088
1089 1089 def incrementalmissingrevs(self, common=None):
1090 1090 """Return an object that can be used to incrementally compute the
1091 1091 revision numbers of the ancestors of arbitrary sets that are not
1092 1092 ancestors of common. This is an ancestor.incrementalmissingancestors
1093 1093 object.
1094 1094
1095 1095 'common' is a list of revision numbers. If common is not supplied, uses
1096 1096 nullrev.
1097 1097 """
1098 1098 if common is None:
1099 1099 common = [nullrev]
1100 1100
1101 1101 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1102 1102
1103 1103 def findmissingrevs(self, common=None, heads=None):
1104 1104 """Return the revision numbers of the ancestors of heads that
1105 1105 are not ancestors of common.
1106 1106
1107 1107 More specifically, return a list of revision numbers corresponding to
1108 1108 nodes N such that every N satisfies the following constraints:
1109 1109
1110 1110 1. N is an ancestor of some node in 'heads'
1111 1111 2. N is not an ancestor of any node in 'common'
1112 1112
1113 1113 The list is sorted by revision number, meaning it is
1114 1114 topologically sorted.
1115 1115
1116 1116 'heads' and 'common' are both lists of revision numbers. If heads is
1117 1117 not supplied, uses all of the revlog's heads. If common is not
1118 1118 supplied, uses nullid."""
1119 1119 if common is None:
1120 1120 common = [nullrev]
1121 1121 if heads is None:
1122 1122 heads = self.headrevs()
1123 1123
1124 1124 inc = self.incrementalmissingrevs(common=common)
1125 1125 return inc.missingancestors(heads)
1126 1126
1127 1127 def findmissing(self, common=None, heads=None):
1128 1128 """Return the ancestors of heads that are not ancestors of common.
1129 1129
1130 1130 More specifically, return a list of nodes N such that every N
1131 1131 satisfies the following constraints:
1132 1132
1133 1133 1. N is an ancestor of some node in 'heads'
1134 1134 2. N is not an ancestor of any node in 'common'
1135 1135
1136 1136 The list is sorted by revision number, meaning it is
1137 1137 topologically sorted.
1138 1138
1139 1139 'heads' and 'common' are both lists of node IDs. If heads is
1140 1140 not supplied, uses all of the revlog's heads. If common is not
1141 1141 supplied, uses nullid."""
1142 1142 if common is None:
1143 1143 common = [nullid]
1144 1144 if heads is None:
1145 1145 heads = self.heads()
1146 1146
1147 1147 common = [self.rev(n) for n in common]
1148 1148 heads = [self.rev(n) for n in heads]
1149 1149
1150 1150 inc = self.incrementalmissingrevs(common=common)
1151 1151 return [self.node(r) for r in inc.missingancestors(heads)]
1152 1152
1153 1153 def nodesbetween(self, roots=None, heads=None):
1154 1154 """Return a topological path from 'roots' to 'heads'.
1155 1155
1156 1156 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1157 1157 topologically sorted list of all nodes N that satisfy both of
1158 1158 these constraints:
1159 1159
1160 1160 1. N is a descendant of some node in 'roots'
1161 1161 2. N is an ancestor of some node in 'heads'
1162 1162
1163 1163 Every node is considered to be both a descendant and an ancestor
1164 1164 of itself, so every reachable node in 'roots' and 'heads' will be
1165 1165 included in 'nodes'.
1166 1166
1167 1167 'outroots' is the list of reachable nodes in 'roots', i.e., the
1168 1168 subset of 'roots' that is returned in 'nodes'. Likewise,
1169 1169 'outheads' is the subset of 'heads' that is also in 'nodes'.
1170 1170
1171 1171 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1172 1172 unspecified, uses nullid as the only root. If 'heads' is
1173 1173 unspecified, uses list of all of the revlog's heads."""
1174 1174 nonodes = ([], [], [])
1175 1175 if roots is not None:
1176 1176 roots = list(roots)
1177 1177 if not roots:
1178 1178 return nonodes
1179 1179 lowestrev = min([self.rev(n) for n in roots])
1180 1180 else:
1181 1181 roots = [nullid] # Everybody's a descendant of nullid
1182 1182 lowestrev = nullrev
1183 1183 if (lowestrev == nullrev) and (heads is None):
1184 1184 # We want _all_ the nodes!
1185 1185 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1186 1186 if heads is None:
1187 1187 # All nodes are ancestors, so the latest ancestor is the last
1188 1188 # node.
1189 1189 highestrev = len(self) - 1
1190 1190 # Set ancestors to None to signal that every node is an ancestor.
1191 1191 ancestors = None
1192 1192 # Set heads to an empty dictionary for later discovery of heads
1193 1193 heads = {}
1194 1194 else:
1195 1195 heads = list(heads)
1196 1196 if not heads:
1197 1197 return nonodes
1198 1198 ancestors = set()
1199 1199 # Turn heads into a dictionary so we can remove 'fake' heads.
1200 1200 # Also, later we will be using it to filter out the heads we can't
1201 1201 # find from roots.
1202 1202 heads = dict.fromkeys(heads, False)
1203 1203 # Start at the top and keep marking parents until we're done.
1204 1204 nodestotag = set(heads)
1205 1205 # Remember where the top was so we can use it as a limit later.
1206 1206 highestrev = max([self.rev(n) for n in nodestotag])
1207 1207 while nodestotag:
1208 1208 # grab a node to tag
1209 1209 n = nodestotag.pop()
1210 1210 # Never tag nullid
1211 1211 if n == nullid:
1212 1212 continue
1213 1213 # A node's revision number represents its place in a
1214 1214 # topologically sorted list of nodes.
1215 1215 r = self.rev(n)
1216 1216 if r >= lowestrev:
1217 1217 if n not in ancestors:
1218 1218 # If we are possibly a descendant of one of the roots
1219 1219 # and we haven't already been marked as an ancestor
1220 1220 ancestors.add(n) # Mark as ancestor
1221 1221 # Add non-nullid parents to list of nodes to tag.
1222 1222 nodestotag.update([p for p in self.parents(n) if
1223 1223 p != nullid])
1224 1224 elif n in heads: # We've seen it before, is it a fake head?
1225 1225 # So it is, real heads should not be the ancestors of
1226 1226 # any other heads.
1227 1227 heads.pop(n)
1228 1228 if not ancestors:
1229 1229 return nonodes
1230 1230 # Now that we have our set of ancestors, we want to remove any
1231 1231 # roots that are not ancestors.
1232 1232
1233 1233 # If one of the roots was nullid, everything is included anyway.
1234 1234 if lowestrev > nullrev:
1235 1235 # But, since we weren't, let's recompute the lowest rev to not
1236 1236 # include roots that aren't ancestors.
1237 1237
1238 1238 # Filter out roots that aren't ancestors of heads
1239 1239 roots = [root for root in roots if root in ancestors]
1240 1240 # Recompute the lowest revision
1241 1241 if roots:
1242 1242 lowestrev = min([self.rev(root) for root in roots])
1243 1243 else:
1244 1244 # No more roots? Return empty list
1245 1245 return nonodes
1246 1246 else:
1247 1247 # We are descending from nullid, and don't need to care about
1248 1248 # any other roots.
1249 1249 lowestrev = nullrev
1250 1250 roots = [nullid]
1251 1251 # Transform our roots list into a set.
1252 1252 descendants = set(roots)
1253 1253 # Also, keep the original roots so we can filter out roots that aren't
1254 1254 # 'real' roots (i.e. are descended from other roots).
1255 1255 roots = descendants.copy()
1256 1256 # Our topologically sorted list of output nodes.
1257 1257 orderedout = []
1258 1258 # Don't start at nullid since we don't want nullid in our output list,
1259 1259 # and if nullid shows up in descendants, empty parents will look like
1260 1260 # they're descendants.
1261 1261 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1262 1262 n = self.node(r)
1263 1263 isdescendant = False
1264 1264 if lowestrev == nullrev: # Everybody is a descendant of nullid
1265 1265 isdescendant = True
1266 1266 elif n in descendants:
1267 1267 # n is already a descendant
1268 1268 isdescendant = True
1269 1269 # This check only needs to be done here because all the roots
1270 1270 # will start being marked is descendants before the loop.
1271 1271 if n in roots:
1272 1272 # If n was a root, check if it's a 'real' root.
1273 1273 p = tuple(self.parents(n))
1274 1274 # If any of its parents are descendants, it's not a root.
1275 1275 if (p[0] in descendants) or (p[1] in descendants):
1276 1276 roots.remove(n)
1277 1277 else:
1278 1278 p = tuple(self.parents(n))
1279 1279 # A node is a descendant if either of its parents are
1280 1280 # descendants. (We seeded the dependents list with the roots
1281 1281 # up there, remember?)
1282 1282 if (p[0] in descendants) or (p[1] in descendants):
1283 1283 descendants.add(n)
1284 1284 isdescendant = True
1285 1285 if isdescendant and ((ancestors is None) or (n in ancestors)):
1286 1286 # Only include nodes that are both descendants and ancestors.
1287 1287 orderedout.append(n)
1288 1288 if (ancestors is not None) and (n in heads):
1289 1289 # We're trying to figure out which heads are reachable
1290 1290 # from roots.
1291 1291 # Mark this head as having been reached
1292 1292 heads[n] = True
1293 1293 elif ancestors is None:
1294 1294 # Otherwise, we're trying to discover the heads.
1295 1295 # Assume this is a head because if it isn't, the next step
1296 1296 # will eventually remove it.
1297 1297 heads[n] = True
1298 1298 # But, obviously its parents aren't.
1299 1299 for p in self.parents(n):
1300 1300 heads.pop(p, None)
1301 1301 heads = [head for head, flag in heads.iteritems() if flag]
1302 1302 roots = list(roots)
1303 1303 assert orderedout
1304 1304 assert roots
1305 1305 assert heads
1306 1306 return (orderedout, roots, heads)
1307 1307
1308 1308 def headrevs(self):
1309 1309 try:
1310 1310 return self.index.headrevs()
1311 1311 except AttributeError:
1312 1312 return self._headrevs()
1313 1313
1314 1314 def computephases(self, roots):
1315 1315 return self.index.computephasesmapsets(roots)
1316 1316
1317 1317 def _headrevs(self):
1318 1318 count = len(self)
1319 1319 if not count:
1320 1320 return [nullrev]
1321 1321 # we won't iter over filtered rev so nobody is a head at start
1322 1322 ishead = [0] * (count + 1)
1323 1323 index = self.index
1324 1324 for r in self:
1325 1325 ishead[r] = 1 # I may be an head
1326 1326 e = index[r]
1327 1327 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1328 1328 return [r for r, val in enumerate(ishead) if val]
1329 1329
1330 1330 def heads(self, start=None, stop=None):
1331 1331 """return the list of all nodes that have no children
1332 1332
1333 1333 if start is specified, only heads that are descendants of
1334 1334 start will be returned
1335 1335 if stop is specified, it will consider all the revs from stop
1336 1336 as if they had no children
1337 1337 """
1338 1338 if start is None and stop is None:
1339 1339 if not len(self):
1340 1340 return [nullid]
1341 1341 return [self.node(r) for r in self.headrevs()]
1342 1342
1343 1343 if start is None:
1344 1344 start = nullid
1345 1345 if stop is None:
1346 1346 stop = []
1347 1347 stoprevs = set([self.rev(n) for n in stop])
1348 1348 startrev = self.rev(start)
1349 1349 reachable = {startrev}
1350 1350 heads = {startrev}
1351 1351
1352 1352 parentrevs = self.parentrevs
1353 1353 for r in self.revs(start=startrev + 1):
1354 1354 for p in parentrevs(r):
1355 1355 if p in reachable:
1356 1356 if r not in stoprevs:
1357 1357 reachable.add(r)
1358 1358 heads.add(r)
1359 1359 if p in heads and p not in stoprevs:
1360 1360 heads.remove(p)
1361 1361
1362 1362 return [self.node(r) for r in heads]
1363 1363
1364 1364 def children(self, node):
1365 1365 """find the children of a given node"""
1366 1366 c = []
1367 1367 p = self.rev(node)
1368 1368 for r in self.revs(start=p + 1):
1369 1369 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1370 1370 if prevs:
1371 1371 for pr in prevs:
1372 1372 if pr == p:
1373 1373 c.append(self.node(r))
1374 1374 elif p == nullrev:
1375 1375 c.append(self.node(r))
1376 1376 return c
1377 1377
1378 1378 def descendant(self, start, end):
1379 """True if revision 'end' is an descendant of revision 'start'
1380
1381 A revision is considered as a descendant of itself."""
1379 1382 if start == nullrev:
1380 1383 return True
1381 1384 elif start == end:
1382 1385 return True
1383 for i in self.descendants([start]):
1384 if i == end:
1385 return True
1386 elif i > end:
1387 break
1388 return False
1386 return start in self._commonancestorsheads(start, end)
1389 1387
1390 1388 def commonancestorsheads(self, a, b):
1391 1389 """calculate all the heads of the common ancestors of nodes a and b"""
1392 1390 a, b = self.rev(a), self.rev(b)
1393 1391 ancs = self._commonancestorsheads(a, b)
1394 1392 return pycompat.maplist(self.node, ancs)
1395 1393
1396 1394 def _commonancestorsheads(self, *revs):
1397 1395 """calculate all the heads of the common ancestors of revs"""
1398 1396 try:
1399 1397 ancs = self.index.commonancestorsheads(*revs)
1400 1398 except (AttributeError, OverflowError): # C implementation failed
1401 1399 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1402 1400 return ancs
1403 1401
1404 1402 def isancestor(self, a, b):
1405 1403 """return True if node a is an ancestor of node b
1406 1404
1407 1405 The implementation of this is trivial but the use of
1408 1406 commonancestorsheads is not."""
1409 1407 return a in self.commonancestorsheads(a, b)
1410 1408
1411 1409 def ancestor(self, a, b):
1412 1410 """calculate the "best" common ancestor of nodes a and b"""
1413 1411
1414 1412 a, b = self.rev(a), self.rev(b)
1415 1413 try:
1416 1414 ancs = self.index.ancestors(a, b)
1417 1415 except (AttributeError, OverflowError):
1418 1416 ancs = ancestor.ancestors(self.parentrevs, a, b)
1419 1417 if ancs:
1420 1418 # choose a consistent winner when there's a tie
1421 1419 return min(map(self.node, ancs))
1422 1420 return nullid
1423 1421
1424 1422 def _match(self, id):
1425 1423 if isinstance(id, int):
1426 1424 # rev
1427 1425 return self.node(id)
1428 1426 if len(id) == 20:
1429 1427 # possibly a binary node
1430 1428 # odds of a binary node being all hex in ASCII are 1 in 10**25
1431 1429 try:
1432 1430 node = id
1433 1431 self.rev(node) # quick search the index
1434 1432 return node
1435 1433 except LookupError:
1436 1434 pass # may be partial hex id
1437 1435 try:
1438 1436 # str(rev)
1439 1437 rev = int(id)
1440 1438 if "%d" % rev != id:
1441 1439 raise ValueError
1442 1440 if rev < 0:
1443 1441 rev = len(self) + rev
1444 1442 if rev < 0 or rev >= len(self):
1445 1443 raise ValueError
1446 1444 return self.node(rev)
1447 1445 except (ValueError, OverflowError):
1448 1446 pass
1449 1447 if len(id) == 40:
1450 1448 try:
1451 1449 # a full hex nodeid?
1452 1450 node = bin(id)
1453 1451 self.rev(node)
1454 1452 return node
1455 1453 except (TypeError, LookupError):
1456 1454 pass
1457 1455
1458 1456 def _partialmatch(self, id):
1459 1457 # we don't care wdirfilenodeids as they should be always full hash
1460 1458 maybewdir = wdirhex.startswith(id)
1461 1459 try:
1462 1460 partial = self.index.partialmatch(id)
1463 1461 if partial and self.hasnode(partial):
1464 1462 if maybewdir:
1465 1463 # single 'ff...' match in radix tree, ambiguous with wdir
1466 1464 raise RevlogError
1467 1465 return partial
1468 1466 if maybewdir:
1469 1467 # no 'ff...' match in radix tree, wdir identified
1470 1468 raise error.WdirUnsupported
1471 1469 return None
1472 1470 except RevlogError:
1473 1471 # parsers.c radix tree lookup gave multiple matches
1474 1472 # fast path: for unfiltered changelog, radix tree is accurate
1475 1473 if not getattr(self, 'filteredrevs', None):
1476 1474 raise LookupError(id, self.indexfile,
1477 1475 _('ambiguous identifier'))
1478 1476 # fall through to slow path that filters hidden revisions
1479 1477 except (AttributeError, ValueError):
1480 1478 # we are pure python, or key was too short to search radix tree
1481 1479 pass
1482 1480
1483 1481 if id in self._pcache:
1484 1482 return self._pcache[id]
1485 1483
1486 1484 if len(id) <= 40:
1487 1485 try:
1488 1486 # hex(node)[:...]
1489 1487 l = len(id) // 2 # grab an even number of digits
1490 1488 prefix = bin(id[:l * 2])
1491 1489 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1492 1490 nl = [n for n in nl if hex(n).startswith(id) and
1493 1491 self.hasnode(n)]
1494 1492 if len(nl) > 0:
1495 1493 if len(nl) == 1 and not maybewdir:
1496 1494 self._pcache[id] = nl[0]
1497 1495 return nl[0]
1498 1496 raise LookupError(id, self.indexfile,
1499 1497 _('ambiguous identifier'))
1500 1498 if maybewdir:
1501 1499 raise error.WdirUnsupported
1502 1500 return None
1503 1501 except TypeError:
1504 1502 pass
1505 1503
1506 1504 def lookup(self, id):
1507 1505 """locate a node based on:
1508 1506 - revision number or str(revision number)
1509 1507 - nodeid or subset of hex nodeid
1510 1508 """
1511 1509 n = self._match(id)
1512 1510 if n is not None:
1513 1511 return n
1514 1512 n = self._partialmatch(id)
1515 1513 if n:
1516 1514 return n
1517 1515
1518 1516 raise LookupError(id, self.indexfile, _('no match found'))
1519 1517
1520 1518 def shortest(self, node, minlength=1):
1521 1519 """Find the shortest unambiguous prefix that matches node."""
1522 1520 def isvalid(prefix):
1523 1521 try:
1524 1522 node = self._partialmatch(prefix)
1525 1523 except error.RevlogError:
1526 1524 return False
1527 1525 except error.WdirUnsupported:
1528 1526 # single 'ff...' match
1529 1527 return True
1530 1528 if node is None:
1531 1529 raise LookupError(node, self.indexfile, _('no node'))
1532 1530 return True
1533 1531
1534 1532 def maybewdir(prefix):
1535 1533 return all(c == 'f' for c in prefix)
1536 1534
1537 1535 hexnode = hex(node)
1538 1536
1539 1537 def disambiguate(hexnode, minlength):
1540 1538 """Disambiguate against wdirid."""
1541 1539 for length in range(minlength, 41):
1542 1540 prefix = hexnode[:length]
1543 1541 if not maybewdir(prefix):
1544 1542 return prefix
1545 1543
1546 1544 if not getattr(self, 'filteredrevs', None):
1547 1545 try:
1548 1546 length = max(self.index.shortest(node), minlength)
1549 1547 return disambiguate(hexnode, length)
1550 1548 except RevlogError:
1551 1549 if node != wdirid:
1552 1550 raise LookupError(node, self.indexfile, _('no node'))
1553 1551 except AttributeError:
1554 1552 # Fall through to pure code
1555 1553 pass
1556 1554
1557 1555 if node == wdirid:
1558 1556 for length in range(minlength, 41):
1559 1557 prefix = hexnode[:length]
1560 1558 if isvalid(prefix):
1561 1559 return prefix
1562 1560
1563 1561 for length in range(minlength, 41):
1564 1562 prefix = hexnode[:length]
1565 1563 if isvalid(prefix):
1566 1564 return disambiguate(hexnode, length)
1567 1565
1568 1566 def cmp(self, node, text):
1569 1567 """compare text with a given file revision
1570 1568
1571 1569 returns True if text is different than what is stored.
1572 1570 """
1573 1571 p1, p2 = self.parents(node)
1574 1572 return hash(text, p1, p2) != node
1575 1573
1576 1574 def _cachesegment(self, offset, data):
1577 1575 """Add a segment to the revlog cache.
1578 1576
1579 1577 Accepts an absolute offset and the data that is at that location.
1580 1578 """
1581 1579 o, d = self._chunkcache
1582 1580 # try to add to existing cache
1583 1581 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1584 1582 self._chunkcache = o, d + data
1585 1583 else:
1586 1584 self._chunkcache = offset, data
1587 1585
1588 1586 def _readsegment(self, offset, length, df=None):
1589 1587 """Load a segment of raw data from the revlog.
1590 1588
1591 1589 Accepts an absolute offset, length to read, and an optional existing
1592 1590 file handle to read from.
1593 1591
1594 1592 If an existing file handle is passed, it will be seeked and the
1595 1593 original seek position will NOT be restored.
1596 1594
1597 1595 Returns a str or buffer of raw byte data.
1598 1596 """
1599 1597 # Cache data both forward and backward around the requested
1600 1598 # data, in a fixed size window. This helps speed up operations
1601 1599 # involving reading the revlog backwards.
1602 1600 cachesize = self._chunkcachesize
1603 1601 realoffset = offset & ~(cachesize - 1)
1604 1602 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1605 1603 - realoffset)
1606 1604 with self._datareadfp(df) as df:
1607 1605 df.seek(realoffset)
1608 1606 d = df.read(reallength)
1609 1607 self._cachesegment(realoffset, d)
1610 1608 if offset != realoffset or reallength != length:
1611 1609 return util.buffer(d, offset - realoffset, length)
1612 1610 return d
1613 1611
1614 1612 def _getsegment(self, offset, length, df=None):
1615 1613 """Obtain a segment of raw data from the revlog.
1616 1614
1617 1615 Accepts an absolute offset, length of bytes to obtain, and an
1618 1616 optional file handle to the already-opened revlog. If the file
1619 1617 handle is used, it's original seek position will not be preserved.
1620 1618
1621 1619 Requests for data may be returned from a cache.
1622 1620
1623 1621 Returns a str or a buffer instance of raw byte data.
1624 1622 """
1625 1623 o, d = self._chunkcache
1626 1624 l = len(d)
1627 1625
1628 1626 # is it in the cache?
1629 1627 cachestart = offset - o
1630 1628 cacheend = cachestart + length
1631 1629 if cachestart >= 0 and cacheend <= l:
1632 1630 if cachestart == 0 and cacheend == l:
1633 1631 return d # avoid a copy
1634 1632 return util.buffer(d, cachestart, cacheend - cachestart)
1635 1633
1636 1634 return self._readsegment(offset, length, df=df)
1637 1635
1638 1636 def _getsegmentforrevs(self, startrev, endrev, df=None):
1639 1637 """Obtain a segment of raw data corresponding to a range of revisions.
1640 1638
1641 1639 Accepts the start and end revisions and an optional already-open
1642 1640 file handle to be used for reading. If the file handle is read, its
1643 1641 seek position will not be preserved.
1644 1642
1645 1643 Requests for data may be satisfied by a cache.
1646 1644
1647 1645 Returns a 2-tuple of (offset, data) for the requested range of
1648 1646 revisions. Offset is the integer offset from the beginning of the
1649 1647 revlog and data is a str or buffer of the raw byte data.
1650 1648
1651 1649 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1652 1650 to determine where each revision's data begins and ends.
1653 1651 """
1654 1652 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1655 1653 # (functions are expensive).
1656 1654 index = self.index
1657 1655 istart = index[startrev]
1658 1656 start = int(istart[0] >> 16)
1659 1657 if startrev == endrev:
1660 1658 end = start + istart[1]
1661 1659 else:
1662 1660 iend = index[endrev]
1663 1661 end = int(iend[0] >> 16) + iend[1]
1664 1662
1665 1663 if self._inline:
1666 1664 start += (startrev + 1) * self._io.size
1667 1665 end += (endrev + 1) * self._io.size
1668 1666 length = end - start
1669 1667
1670 1668 return start, self._getsegment(start, length, df=df)
1671 1669
1672 1670 def _chunk(self, rev, df=None):
1673 1671 """Obtain a single decompressed chunk for a revision.
1674 1672
1675 1673 Accepts an integer revision and an optional already-open file handle
1676 1674 to be used for reading. If used, the seek position of the file will not
1677 1675 be preserved.
1678 1676
1679 1677 Returns a str holding uncompressed data for the requested revision.
1680 1678 """
1681 1679 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1682 1680
1683 1681 def _chunks(self, revs, df=None):
1684 1682 """Obtain decompressed chunks for the specified revisions.
1685 1683
1686 1684 Accepts an iterable of numeric revisions that are assumed to be in
1687 1685 ascending order. Also accepts an optional already-open file handle
1688 1686 to be used for reading. If used, the seek position of the file will
1689 1687 not be preserved.
1690 1688
1691 1689 This function is similar to calling ``self._chunk()`` multiple times,
1692 1690 but is faster.
1693 1691
1694 1692 Returns a list with decompressed data for each requested revision.
1695 1693 """
1696 1694 if not revs:
1697 1695 return []
1698 1696 start = self.start
1699 1697 length = self.length
1700 1698 inline = self._inline
1701 1699 iosize = self._io.size
1702 1700 buffer = util.buffer
1703 1701
1704 1702 l = []
1705 1703 ladd = l.append
1706 1704
1707 1705 if not self._withsparseread:
1708 1706 slicedchunks = (revs,)
1709 1707 else:
1710 1708 slicedchunks = _slicechunk(self, revs)
1711 1709
1712 1710 for revschunk in slicedchunks:
1713 1711 firstrev = revschunk[0]
1714 1712 # Skip trailing revisions with empty diff
1715 1713 for lastrev in revschunk[::-1]:
1716 1714 if length(lastrev) != 0:
1717 1715 break
1718 1716
1719 1717 try:
1720 1718 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1721 1719 except OverflowError:
1722 1720 # issue4215 - we can't cache a run of chunks greater than
1723 1721 # 2G on Windows
1724 1722 return [self._chunk(rev, df=df) for rev in revschunk]
1725 1723
1726 1724 decomp = self.decompress
1727 1725 for rev in revschunk:
1728 1726 chunkstart = start(rev)
1729 1727 if inline:
1730 1728 chunkstart += (rev + 1) * iosize
1731 1729 chunklength = length(rev)
1732 1730 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1733 1731
1734 1732 return l
1735 1733
1736 1734 def _chunkclear(self):
1737 1735 """Clear the raw chunk cache."""
1738 1736 self._chunkcache = (0, '')
1739 1737
1740 1738 def deltaparent(self, rev):
1741 1739 """return deltaparent of the given revision"""
1742 1740 base = self.index[rev][3]
1743 1741 if base == rev:
1744 1742 return nullrev
1745 1743 elif self._generaldelta:
1746 1744 return base
1747 1745 else:
1748 1746 return rev - 1
1749 1747
1750 1748 def revdiff(self, rev1, rev2):
1751 1749 """return or calculate a delta between two revisions
1752 1750
1753 1751 The delta calculated is in binary form and is intended to be written to
1754 1752 revlog data directly. So this function needs raw revision data.
1755 1753 """
1756 1754 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1757 1755 return bytes(self._chunk(rev2))
1758 1756
1759 1757 return mdiff.textdiff(self.revision(rev1, raw=True),
1760 1758 self.revision(rev2, raw=True))
1761 1759
1762 1760 def revision(self, nodeorrev, _df=None, raw=False):
1763 1761 """return an uncompressed revision of a given node or revision
1764 1762 number.
1765 1763
1766 1764 _df - an existing file handle to read from. (internal-only)
1767 1765 raw - an optional argument specifying if the revision data is to be
1768 1766 treated as raw data when applying flag transforms. 'raw' should be set
1769 1767 to True when generating changegroups or in debug commands.
1770 1768 """
1771 1769 if isinstance(nodeorrev, int):
1772 1770 rev = nodeorrev
1773 1771 node = self.node(rev)
1774 1772 else:
1775 1773 node = nodeorrev
1776 1774 rev = None
1777 1775
1778 1776 cachedrev = None
1779 1777 flags = None
1780 1778 rawtext = None
1781 1779 if node == nullid:
1782 1780 return ""
1783 1781 if self._cache:
1784 1782 if self._cache[0] == node:
1785 1783 # _cache only stores rawtext
1786 1784 if raw:
1787 1785 return self._cache[2]
1788 1786 # duplicated, but good for perf
1789 1787 if rev is None:
1790 1788 rev = self.rev(node)
1791 1789 if flags is None:
1792 1790 flags = self.flags(rev)
1793 1791 # no extra flags set, no flag processor runs, text = rawtext
1794 1792 if flags == REVIDX_DEFAULT_FLAGS:
1795 1793 return self._cache[2]
1796 1794 # rawtext is reusable. need to run flag processor
1797 1795 rawtext = self._cache[2]
1798 1796
1799 1797 cachedrev = self._cache[1]
1800 1798
1801 1799 # look up what we need to read
1802 1800 if rawtext is None:
1803 1801 if rev is None:
1804 1802 rev = self.rev(node)
1805 1803
1806 1804 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1807 1805 if stopped:
1808 1806 rawtext = self._cache[2]
1809 1807
1810 1808 # drop cache to save memory
1811 1809 self._cache = None
1812 1810
1813 1811 bins = self._chunks(chain, df=_df)
1814 1812 if rawtext is None:
1815 1813 rawtext = bytes(bins[0])
1816 1814 bins = bins[1:]
1817 1815
1818 1816 rawtext = mdiff.patches(rawtext, bins)
1819 1817 self._cache = (node, rev, rawtext)
1820 1818
1821 1819 if flags is None:
1822 1820 if rev is None:
1823 1821 rev = self.rev(node)
1824 1822 flags = self.flags(rev)
1825 1823
1826 1824 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1827 1825 if validatehash:
1828 1826 self.checkhash(text, node, rev=rev)
1829 1827
1830 1828 return text
1831 1829
1832 1830 def hash(self, text, p1, p2):
1833 1831 """Compute a node hash.
1834 1832
1835 1833 Available as a function so that subclasses can replace the hash
1836 1834 as needed.
1837 1835 """
1838 1836 return hash(text, p1, p2)
1839 1837
1840 1838 def _processflags(self, text, flags, operation, raw=False):
1841 1839 """Inspect revision data flags and applies transforms defined by
1842 1840 registered flag processors.
1843 1841
1844 1842 ``text`` - the revision data to process
1845 1843 ``flags`` - the revision flags
1846 1844 ``operation`` - the operation being performed (read or write)
1847 1845 ``raw`` - an optional argument describing if the raw transform should be
1848 1846 applied.
1849 1847
1850 1848 This method processes the flags in the order (or reverse order if
1851 1849 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1852 1850 flag processors registered for present flags. The order of flags defined
1853 1851 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1854 1852
1855 1853 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1856 1854 processed text and ``validatehash`` is a bool indicating whether the
1857 1855 returned text should be checked for hash integrity.
1858 1856
1859 1857 Note: If the ``raw`` argument is set, it has precedence over the
1860 1858 operation and will only update the value of ``validatehash``.
1861 1859 """
1862 1860 # fast path: no flag processors will run
1863 1861 if flags == 0:
1864 1862 return text, True
1865 1863 if not operation in ('read', 'write'):
1866 1864 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1867 1865 # Check all flags are known.
1868 1866 if flags & ~REVIDX_KNOWN_FLAGS:
1869 1867 raise RevlogError(_("incompatible revision flag '%#x'") %
1870 1868 (flags & ~REVIDX_KNOWN_FLAGS))
1871 1869 validatehash = True
1872 1870 # Depending on the operation (read or write), the order might be
1873 1871 # reversed due to non-commutative transforms.
1874 1872 orderedflags = REVIDX_FLAGS_ORDER
1875 1873 if operation == 'write':
1876 1874 orderedflags = reversed(orderedflags)
1877 1875
1878 1876 for flag in orderedflags:
1879 1877 # If a flagprocessor has been registered for a known flag, apply the
1880 1878 # related operation transform and update result tuple.
1881 1879 if flag & flags:
1882 1880 vhash = True
1883 1881
1884 1882 if flag not in _flagprocessors:
1885 1883 message = _("missing processor for flag '%#x'") % (flag)
1886 1884 raise RevlogError(message)
1887 1885
1888 1886 processor = _flagprocessors[flag]
1889 1887 if processor is not None:
1890 1888 readtransform, writetransform, rawtransform = processor
1891 1889
1892 1890 if raw:
1893 1891 vhash = rawtransform(self, text)
1894 1892 elif operation == 'read':
1895 1893 text, vhash = readtransform(self, text)
1896 1894 else: # write operation
1897 1895 text, vhash = writetransform(self, text)
1898 1896 validatehash = validatehash and vhash
1899 1897
1900 1898 return text, validatehash
1901 1899
1902 1900 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1903 1901 """Check node hash integrity.
1904 1902
1905 1903 Available as a function so that subclasses can extend hash mismatch
1906 1904 behaviors as needed.
1907 1905 """
1908 1906 try:
1909 1907 if p1 is None and p2 is None:
1910 1908 p1, p2 = self.parents(node)
1911 1909 if node != self.hash(text, p1, p2):
1912 1910 revornode = rev
1913 1911 if revornode is None:
1914 1912 revornode = templatefilters.short(hex(node))
1915 1913 raise RevlogError(_("integrity check failed on %s:%s")
1916 1914 % (self.indexfile, pycompat.bytestr(revornode)))
1917 1915 except RevlogError:
1918 1916 if self._censorable and _censoredtext(text):
1919 1917 raise error.CensoredNodeError(self.indexfile, node, text)
1920 1918 raise
1921 1919
1922 1920 def _enforceinlinesize(self, tr, fp=None):
1923 1921 """Check if the revlog is too big for inline and convert if so.
1924 1922
1925 1923 This should be called after revisions are added to the revlog. If the
1926 1924 revlog has grown too large to be an inline revlog, it will convert it
1927 1925 to use multiple index and data files.
1928 1926 """
1929 1927 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1930 1928 return
1931 1929
1932 1930 trinfo = tr.find(self.indexfile)
1933 1931 if trinfo is None:
1934 1932 raise RevlogError(_("%s not found in the transaction")
1935 1933 % self.indexfile)
1936 1934
1937 1935 trindex = trinfo[2]
1938 1936 if trindex is not None:
1939 1937 dataoff = self.start(trindex)
1940 1938 else:
1941 1939 # revlog was stripped at start of transaction, use all leftover data
1942 1940 trindex = len(self) - 1
1943 1941 dataoff = self.end(-2)
1944 1942
1945 1943 tr.add(self.datafile, dataoff)
1946 1944
1947 1945 if fp:
1948 1946 fp.flush()
1949 1947 fp.close()
1950 1948
1951 1949 with self._datafp('w') as df:
1952 1950 for r in self:
1953 1951 df.write(self._getsegmentforrevs(r, r)[1])
1954 1952
1955 1953 with self._indexfp('w') as fp:
1956 1954 self.version &= ~FLAG_INLINE_DATA
1957 1955 self._inline = False
1958 1956 io = self._io
1959 1957 for i in self:
1960 1958 e = io.packentry(self.index[i], self.node, self.version, i)
1961 1959 fp.write(e)
1962 1960
1963 1961 # the temp file replace the real index when we exit the context
1964 1962 # manager
1965 1963
1966 1964 tr.replace(self.indexfile, trindex * self._io.size)
1967 1965 self._chunkclear()
1968 1966
1969 1967 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1970 1968 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1971 1969 """add a revision to the log
1972 1970
1973 1971 text - the revision data to add
1974 1972 transaction - the transaction object used for rollback
1975 1973 link - the linkrev data to add
1976 1974 p1, p2 - the parent nodeids of the revision
1977 1975 cachedelta - an optional precomputed delta
1978 1976 node - nodeid of revision; typically node is not specified, and it is
1979 1977 computed by default as hash(text, p1, p2), however subclasses might
1980 1978 use different hashing method (and override checkhash() in such case)
1981 1979 flags - the known flags to set on the revision
1982 1980 deltacomputer - an optional _deltacomputer instance shared between
1983 1981 multiple calls
1984 1982 """
1985 1983 if link == nullrev:
1986 1984 raise RevlogError(_("attempted to add linkrev -1 to %s")
1987 1985 % self.indexfile)
1988 1986
1989 1987 if flags:
1990 1988 node = node or self.hash(text, p1, p2)
1991 1989
1992 1990 rawtext, validatehash = self._processflags(text, flags, 'write')
1993 1991
1994 1992 # If the flag processor modifies the revision data, ignore any provided
1995 1993 # cachedelta.
1996 1994 if rawtext != text:
1997 1995 cachedelta = None
1998 1996
1999 1997 if len(rawtext) > _maxentrysize:
2000 1998 raise RevlogError(
2001 1999 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2002 2000 % (self.indexfile, len(rawtext)))
2003 2001
2004 2002 node = node or self.hash(rawtext, p1, p2)
2005 2003 if node in self.nodemap:
2006 2004 return node
2007 2005
2008 2006 if validatehash:
2009 2007 self.checkhash(rawtext, node, p1=p1, p2=p2)
2010 2008
2011 2009 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2012 2010 flags, cachedelta=cachedelta,
2013 2011 deltacomputer=deltacomputer)
2014 2012
2015 2013 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2016 2014 cachedelta=None, deltacomputer=None):
2017 2015 """add a raw revision with known flags, node and parents
2018 2016 useful when reusing a revision not stored in this revlog (ex: received
2019 2017 over wire, or read from an external bundle).
2020 2018 """
2021 2019 dfh = None
2022 2020 if not self._inline:
2023 2021 dfh = self._datafp("a+")
2024 2022 ifh = self._indexfp("a+")
2025 2023 try:
2026 2024 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2027 2025 flags, cachedelta, ifh, dfh,
2028 2026 deltacomputer=deltacomputer)
2029 2027 finally:
2030 2028 if dfh:
2031 2029 dfh.close()
2032 2030 ifh.close()
2033 2031
2034 2032 def compress(self, data):
2035 2033 """Generate a possibly-compressed representation of data."""
2036 2034 if not data:
2037 2035 return '', data
2038 2036
2039 2037 compressed = self._compressor.compress(data)
2040 2038
2041 2039 if compressed:
2042 2040 # The revlog compressor added the header in the returned data.
2043 2041 return '', compressed
2044 2042
2045 2043 if data[0:1] == '\0':
2046 2044 return '', data
2047 2045 return 'u', data
2048 2046
2049 2047 def decompress(self, data):
2050 2048 """Decompress a revlog chunk.
2051 2049
2052 2050 The chunk is expected to begin with a header identifying the
2053 2051 format type so it can be routed to an appropriate decompressor.
2054 2052 """
2055 2053 if not data:
2056 2054 return data
2057 2055
2058 2056 # Revlogs are read much more frequently than they are written and many
2059 2057 # chunks only take microseconds to decompress, so performance is
2060 2058 # important here.
2061 2059 #
2062 2060 # We can make a few assumptions about revlogs:
2063 2061 #
2064 2062 # 1) the majority of chunks will be compressed (as opposed to inline
2065 2063 # raw data).
2066 2064 # 2) decompressing *any* data will likely by at least 10x slower than
2067 2065 # returning raw inline data.
2068 2066 # 3) we want to prioritize common and officially supported compression
2069 2067 # engines
2070 2068 #
2071 2069 # It follows that we want to optimize for "decompress compressed data
2072 2070 # when encoded with common and officially supported compression engines"
2073 2071 # case over "raw data" and "data encoded by less common or non-official
2074 2072 # compression engines." That is why we have the inline lookup first
2075 2073 # followed by the compengines lookup.
2076 2074 #
2077 2075 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2078 2076 # compressed chunks. And this matters for changelog and manifest reads.
2079 2077 t = data[0:1]
2080 2078
2081 2079 if t == 'x':
2082 2080 try:
2083 2081 return _zlibdecompress(data)
2084 2082 except zlib.error as e:
2085 2083 raise RevlogError(_('revlog decompress error: %s') %
2086 2084 stringutil.forcebytestr(e))
2087 2085 # '\0' is more common than 'u' so it goes first.
2088 2086 elif t == '\0':
2089 2087 return data
2090 2088 elif t == 'u':
2091 2089 return util.buffer(data, 1)
2092 2090
2093 2091 try:
2094 2092 compressor = self._decompressors[t]
2095 2093 except KeyError:
2096 2094 try:
2097 2095 engine = util.compengines.forrevlogheader(t)
2098 2096 compressor = engine.revlogcompressor()
2099 2097 self._decompressors[t] = compressor
2100 2098 except KeyError:
2101 2099 raise RevlogError(_('unknown compression type %r') % t)
2102 2100
2103 2101 return compressor.decompress(data)
2104 2102
2105 2103 def _isgooddeltainfo(self, deltainfo, revinfo):
2106 2104 """Returns True if the given delta is good. Good means that it is within
2107 2105 the disk span, disk size, and chain length bounds that we know to be
2108 2106 performant."""
2109 2107 if deltainfo is None:
2110 2108 return False
2111 2109
2112 2110 # - 'deltainfo.distance' is the distance from the base revision --
2113 2111 # bounding it limits the amount of I/O we need to do.
2114 2112 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2115 2113 # deltas we need to apply -- bounding it limits the amount of CPU
2116 2114 # we consume.
2117 2115
2118 2116 textlen = revinfo.textlen
2119 2117 defaultmax = textlen * 4
2120 2118 maxdist = self._maxdeltachainspan
2121 2119 if not maxdist:
2122 2120 maxdist = deltainfo.distance # ensure the conditional pass
2123 2121 maxdist = max(maxdist, defaultmax)
2124 2122 if (deltainfo.distance > maxdist or deltainfo.deltalen > textlen or
2125 2123 deltainfo.compresseddeltalen > textlen * 2 or
2126 2124 (self._maxchainlen and deltainfo.chainlen > self._maxchainlen)):
2127 2125 return False
2128 2126
2129 2127 return True
2130 2128
2131 2129 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2132 2130 cachedelta, ifh, dfh, alwayscache=False,
2133 2131 deltacomputer=None):
2134 2132 """internal function to add revisions to the log
2135 2133
2136 2134 see addrevision for argument descriptions.
2137 2135
2138 2136 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2139 2137
2140 2138 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2141 2139 be used.
2142 2140
2143 2141 invariants:
2144 2142 - rawtext is optional (can be None); if not set, cachedelta must be set.
2145 2143 if both are set, they must correspond to each other.
2146 2144 """
2147 2145 if node == nullid:
2148 2146 raise RevlogError(_("%s: attempt to add null revision") %
2149 2147 (self.indexfile))
2150 2148 if node == wdirid or node in wdirfilenodeids:
2151 2149 raise RevlogError(_("%s: attempt to add wdir revision") %
2152 2150 (self.indexfile))
2153 2151
2154 2152 if self._inline:
2155 2153 fh = ifh
2156 2154 else:
2157 2155 fh = dfh
2158 2156
2159 2157 btext = [rawtext]
2160 2158
2161 2159 curr = len(self)
2162 2160 prev = curr - 1
2163 2161 offset = self.end(prev)
2164 2162 p1r, p2r = self.rev(p1), self.rev(p2)
2165 2163
2166 2164 # full versions are inserted when the needed deltas
2167 2165 # become comparable to the uncompressed text
2168 2166 if rawtext is None:
2169 2167 # need rawtext size, before changed by flag processors, which is
2170 2168 # the non-raw size. use revlog explicitly to avoid filelog's extra
2171 2169 # logic that might remove metadata size.
2172 2170 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2173 2171 cachedelta[1])
2174 2172 else:
2175 2173 textlen = len(rawtext)
2176 2174
2177 2175 if deltacomputer is None:
2178 2176 deltacomputer = _deltacomputer(self)
2179 2177
2180 2178 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2181 2179
2182 2180 # no delta for flag processor revision (see "candelta" for why)
2183 2181 # not calling candelta since only one revision needs test, also to
2184 2182 # avoid overhead fetching flags again.
2185 2183 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2186 2184 deltainfo = None
2187 2185 else:
2188 2186 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2189 2187
2190 2188 if deltainfo is not None:
2191 2189 base = deltainfo.base
2192 2190 chainbase = deltainfo.chainbase
2193 2191 data = deltainfo.data
2194 2192 l = deltainfo.deltalen
2195 2193 else:
2196 2194 rawtext = deltacomputer.buildtext(revinfo, fh)
2197 2195 data = self.compress(rawtext)
2198 2196 l = len(data[1]) + len(data[0])
2199 2197 base = chainbase = curr
2200 2198
2201 2199 e = (offset_type(offset, flags), l, textlen,
2202 2200 base, link, p1r, p2r, node)
2203 2201 self.index.insert(-1, e)
2204 2202 self.nodemap[node] = curr
2205 2203
2206 2204 entry = self._io.packentry(e, self.node, self.version, curr)
2207 2205 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2208 2206
2209 2207 if alwayscache and rawtext is None:
2210 2208 rawtext = deltacomputer._buildtext(revinfo, fh)
2211 2209
2212 2210 if type(rawtext) == bytes: # only accept immutable objects
2213 2211 self._cache = (node, curr, rawtext)
2214 2212 self._chainbasecache[curr] = chainbase
2215 2213 return node
2216 2214
2217 2215 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2218 2216 # Files opened in a+ mode have inconsistent behavior on various
2219 2217 # platforms. Windows requires that a file positioning call be made
2220 2218 # when the file handle transitions between reads and writes. See
2221 2219 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2222 2220 # platforms, Python or the platform itself can be buggy. Some versions
2223 2221 # of Solaris have been observed to not append at the end of the file
2224 2222 # if the file was seeked to before the end. See issue4943 for more.
2225 2223 #
2226 2224 # We work around this issue by inserting a seek() before writing.
2227 2225 # Note: This is likely not necessary on Python 3.
2228 2226 ifh.seek(0, os.SEEK_END)
2229 2227 if dfh:
2230 2228 dfh.seek(0, os.SEEK_END)
2231 2229
2232 2230 curr = len(self) - 1
2233 2231 if not self._inline:
2234 2232 transaction.add(self.datafile, offset)
2235 2233 transaction.add(self.indexfile, curr * len(entry))
2236 2234 if data[0]:
2237 2235 dfh.write(data[0])
2238 2236 dfh.write(data[1])
2239 2237 ifh.write(entry)
2240 2238 else:
2241 2239 offset += curr * self._io.size
2242 2240 transaction.add(self.indexfile, offset, curr)
2243 2241 ifh.write(entry)
2244 2242 ifh.write(data[0])
2245 2243 ifh.write(data[1])
2246 2244 self._enforceinlinesize(transaction, ifh)
2247 2245
2248 2246 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2249 2247 """
2250 2248 add a delta group
2251 2249
2252 2250 given a set of deltas, add them to the revision log. the
2253 2251 first delta is against its parent, which should be in our
2254 2252 log, the rest are against the previous delta.
2255 2253
2256 2254 If ``addrevisioncb`` is defined, it will be called with arguments of
2257 2255 this revlog and the node that was added.
2258 2256 """
2259 2257
2260 2258 nodes = []
2261 2259
2262 2260 r = len(self)
2263 2261 end = 0
2264 2262 if r:
2265 2263 end = self.end(r - 1)
2266 2264 ifh = self._indexfp("a+")
2267 2265 isize = r * self._io.size
2268 2266 if self._inline:
2269 2267 transaction.add(self.indexfile, end + isize, r)
2270 2268 dfh = None
2271 2269 else:
2272 2270 transaction.add(self.indexfile, isize, r)
2273 2271 transaction.add(self.datafile, end)
2274 2272 dfh = self._datafp("a+")
2275 2273 def flush():
2276 2274 if dfh:
2277 2275 dfh.flush()
2278 2276 ifh.flush()
2279 2277 try:
2280 2278 deltacomputer = _deltacomputer(self)
2281 2279 # loop through our set of deltas
2282 2280 for data in deltas:
2283 2281 node, p1, p2, linknode, deltabase, delta, flags = data
2284 2282 link = linkmapper(linknode)
2285 2283 flags = flags or REVIDX_DEFAULT_FLAGS
2286 2284
2287 2285 nodes.append(node)
2288 2286
2289 2287 if node in self.nodemap:
2290 2288 # this can happen if two branches make the same change
2291 2289 continue
2292 2290
2293 2291 for p in (p1, p2):
2294 2292 if p not in self.nodemap:
2295 2293 raise LookupError(p, self.indexfile,
2296 2294 _('unknown parent'))
2297 2295
2298 2296 if deltabase not in self.nodemap:
2299 2297 raise LookupError(deltabase, self.indexfile,
2300 2298 _('unknown delta base'))
2301 2299
2302 2300 baserev = self.rev(deltabase)
2303 2301
2304 2302 if baserev != nullrev and self.iscensored(baserev):
2305 2303 # if base is censored, delta must be full replacement in a
2306 2304 # single patch operation
2307 2305 hlen = struct.calcsize(">lll")
2308 2306 oldlen = self.rawsize(baserev)
2309 2307 newlen = len(delta) - hlen
2310 2308 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2311 2309 raise error.CensoredBaseError(self.indexfile,
2312 2310 self.node(baserev))
2313 2311
2314 2312 if not flags and self._peek_iscensored(baserev, delta, flush):
2315 2313 flags |= REVIDX_ISCENSORED
2316 2314
2317 2315 # We assume consumers of addrevisioncb will want to retrieve
2318 2316 # the added revision, which will require a call to
2319 2317 # revision(). revision() will fast path if there is a cache
2320 2318 # hit. So, we tell _addrevision() to always cache in this case.
2321 2319 # We're only using addgroup() in the context of changegroup
2322 2320 # generation so the revision data can always be handled as raw
2323 2321 # by the flagprocessor.
2324 2322 self._addrevision(node, None, transaction, link,
2325 2323 p1, p2, flags, (baserev, delta),
2326 2324 ifh, dfh,
2327 2325 alwayscache=bool(addrevisioncb),
2328 2326 deltacomputer=deltacomputer)
2329 2327
2330 2328 if addrevisioncb:
2331 2329 addrevisioncb(self, node)
2332 2330
2333 2331 if not dfh and not self._inline:
2334 2332 # addrevision switched from inline to conventional
2335 2333 # reopen the index
2336 2334 ifh.close()
2337 2335 dfh = self._datafp("a+")
2338 2336 ifh = self._indexfp("a+")
2339 2337 finally:
2340 2338 if dfh:
2341 2339 dfh.close()
2342 2340 ifh.close()
2343 2341
2344 2342 return nodes
2345 2343
2346 2344 def iscensored(self, rev):
2347 2345 """Check if a file revision is censored."""
2348 2346 if not self._censorable:
2349 2347 return False
2350 2348
2351 2349 return self.flags(rev) & REVIDX_ISCENSORED
2352 2350
2353 2351 def _peek_iscensored(self, baserev, delta, flush):
2354 2352 """Quickly check if a delta produces a censored revision."""
2355 2353 if not self._censorable:
2356 2354 return False
2357 2355
2358 2356 # Fragile heuristic: unless new file meta keys are added alphabetically
2359 2357 # preceding "censored", all censored revisions are prefixed by
2360 2358 # "\1\ncensored:". A delta producing such a censored revision must be a
2361 2359 # full-replacement delta, so we inspect the first and only patch in the
2362 2360 # delta for this prefix.
2363 2361 hlen = struct.calcsize(">lll")
2364 2362 if len(delta) <= hlen:
2365 2363 return False
2366 2364
2367 2365 oldlen = self.rawsize(baserev)
2368 2366 newlen = len(delta) - hlen
2369 2367 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2370 2368 return False
2371 2369
2372 2370 add = "\1\ncensored:"
2373 2371 addlen = len(add)
2374 2372 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2375 2373
2376 2374 def getstrippoint(self, minlink):
2377 2375 """find the minimum rev that must be stripped to strip the linkrev
2378 2376
2379 2377 Returns a tuple containing the minimum rev and a set of all revs that
2380 2378 have linkrevs that will be broken by this strip.
2381 2379 """
2382 2380 brokenrevs = set()
2383 2381 strippoint = len(self)
2384 2382
2385 2383 heads = {}
2386 2384 futurelargelinkrevs = set()
2387 2385 for head in self.headrevs():
2388 2386 headlinkrev = self.linkrev(head)
2389 2387 heads[head] = headlinkrev
2390 2388 if headlinkrev >= minlink:
2391 2389 futurelargelinkrevs.add(headlinkrev)
2392 2390
2393 2391 # This algorithm involves walking down the rev graph, starting at the
2394 2392 # heads. Since the revs are topologically sorted according to linkrev,
2395 2393 # once all head linkrevs are below the minlink, we know there are
2396 2394 # no more revs that could have a linkrev greater than minlink.
2397 2395 # So we can stop walking.
2398 2396 while futurelargelinkrevs:
2399 2397 strippoint -= 1
2400 2398 linkrev = heads.pop(strippoint)
2401 2399
2402 2400 if linkrev < minlink:
2403 2401 brokenrevs.add(strippoint)
2404 2402 else:
2405 2403 futurelargelinkrevs.remove(linkrev)
2406 2404
2407 2405 for p in self.parentrevs(strippoint):
2408 2406 if p != nullrev:
2409 2407 plinkrev = self.linkrev(p)
2410 2408 heads[p] = plinkrev
2411 2409 if plinkrev >= minlink:
2412 2410 futurelargelinkrevs.add(plinkrev)
2413 2411
2414 2412 return strippoint, brokenrevs
2415 2413
2416 2414 def strip(self, minlink, transaction):
2417 2415 """truncate the revlog on the first revision with a linkrev >= minlink
2418 2416
2419 2417 This function is called when we're stripping revision minlink and
2420 2418 its descendants from the repository.
2421 2419
2422 2420 We have to remove all revisions with linkrev >= minlink, because
2423 2421 the equivalent changelog revisions will be renumbered after the
2424 2422 strip.
2425 2423
2426 2424 So we truncate the revlog on the first of these revisions, and
2427 2425 trust that the caller has saved the revisions that shouldn't be
2428 2426 removed and that it'll re-add them after this truncation.
2429 2427 """
2430 2428 if len(self) == 0:
2431 2429 return
2432 2430
2433 2431 rev, _ = self.getstrippoint(minlink)
2434 2432 if rev == len(self):
2435 2433 return
2436 2434
2437 2435 # first truncate the files on disk
2438 2436 end = self.start(rev)
2439 2437 if not self._inline:
2440 2438 transaction.add(self.datafile, end)
2441 2439 end = rev * self._io.size
2442 2440 else:
2443 2441 end += rev * self._io.size
2444 2442
2445 2443 transaction.add(self.indexfile, end)
2446 2444
2447 2445 # then reset internal state in memory to forget those revisions
2448 2446 self._cache = None
2449 2447 self._chaininfocache = {}
2450 2448 self._chunkclear()
2451 2449 for x in xrange(rev, len(self)):
2452 2450 del self.nodemap[self.node(x)]
2453 2451
2454 2452 del self.index[rev:-1]
2455 2453 self._nodepos = None
2456 2454
2457 2455 def checksize(self):
2458 2456 expected = 0
2459 2457 if len(self):
2460 2458 expected = max(0, self.end(len(self) - 1))
2461 2459
2462 2460 try:
2463 2461 with self._datafp() as f:
2464 2462 f.seek(0, 2)
2465 2463 actual = f.tell()
2466 2464 dd = actual - expected
2467 2465 except IOError as inst:
2468 2466 if inst.errno != errno.ENOENT:
2469 2467 raise
2470 2468 dd = 0
2471 2469
2472 2470 try:
2473 2471 f = self.opener(self.indexfile)
2474 2472 f.seek(0, 2)
2475 2473 actual = f.tell()
2476 2474 f.close()
2477 2475 s = self._io.size
2478 2476 i = max(0, actual // s)
2479 2477 di = actual - (i * s)
2480 2478 if self._inline:
2481 2479 databytes = 0
2482 2480 for r in self:
2483 2481 databytes += max(0, self.length(r))
2484 2482 dd = 0
2485 2483 di = actual - len(self) * s - databytes
2486 2484 except IOError as inst:
2487 2485 if inst.errno != errno.ENOENT:
2488 2486 raise
2489 2487 di = 0
2490 2488
2491 2489 return (dd, di)
2492 2490
2493 2491 def files(self):
2494 2492 res = [self.indexfile]
2495 2493 if not self._inline:
2496 2494 res.append(self.datafile)
2497 2495 return res
2498 2496
2499 2497 DELTAREUSEALWAYS = 'always'
2500 2498 DELTAREUSESAMEREVS = 'samerevs'
2501 2499 DELTAREUSENEVER = 'never'
2502 2500
2503 2501 DELTAREUSEFULLADD = 'fulladd'
2504 2502
2505 2503 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2506 2504
2507 2505 def clone(self, tr, destrevlog, addrevisioncb=None,
2508 2506 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2509 2507 """Copy this revlog to another, possibly with format changes.
2510 2508
2511 2509 The destination revlog will contain the same revisions and nodes.
2512 2510 However, it may not be bit-for-bit identical due to e.g. delta encoding
2513 2511 differences.
2514 2512
2515 2513 The ``deltareuse`` argument control how deltas from the existing revlog
2516 2514 are preserved in the destination revlog. The argument can have the
2517 2515 following values:
2518 2516
2519 2517 DELTAREUSEALWAYS
2520 2518 Deltas will always be reused (if possible), even if the destination
2521 2519 revlog would not select the same revisions for the delta. This is the
2522 2520 fastest mode of operation.
2523 2521 DELTAREUSESAMEREVS
2524 2522 Deltas will be reused if the destination revlog would pick the same
2525 2523 revisions for the delta. This mode strikes a balance between speed
2526 2524 and optimization.
2527 2525 DELTAREUSENEVER
2528 2526 Deltas will never be reused. This is the slowest mode of execution.
2529 2527 This mode can be used to recompute deltas (e.g. if the diff/delta
2530 2528 algorithm changes).
2531 2529
2532 2530 Delta computation can be slow, so the choice of delta reuse policy can
2533 2531 significantly affect run time.
2534 2532
2535 2533 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2536 2534 two extremes. Deltas will be reused if they are appropriate. But if the
2537 2535 delta could choose a better revision, it will do so. This means if you
2538 2536 are converting a non-generaldelta revlog to a generaldelta revlog,
2539 2537 deltas will be recomputed if the delta's parent isn't a parent of the
2540 2538 revision.
2541 2539
2542 2540 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2543 2541 controls whether to compute deltas against both parents for merges.
2544 2542 By default, the current default is used.
2545 2543 """
2546 2544 if deltareuse not in self.DELTAREUSEALL:
2547 2545 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2548 2546
2549 2547 if len(destrevlog):
2550 2548 raise ValueError(_('destination revlog is not empty'))
2551 2549
2552 2550 if getattr(self, 'filteredrevs', None):
2553 2551 raise ValueError(_('source revlog has filtered revisions'))
2554 2552 if getattr(destrevlog, 'filteredrevs', None):
2555 2553 raise ValueError(_('destination revlog has filtered revisions'))
2556 2554
2557 2555 # lazydeltabase controls whether to reuse a cached delta, if possible.
2558 2556 oldlazydeltabase = destrevlog._lazydeltabase
2559 2557 oldamd = destrevlog._aggressivemergedeltas
2560 2558
2561 2559 try:
2562 2560 if deltareuse == self.DELTAREUSEALWAYS:
2563 2561 destrevlog._lazydeltabase = True
2564 2562 elif deltareuse == self.DELTAREUSESAMEREVS:
2565 2563 destrevlog._lazydeltabase = False
2566 2564
2567 2565 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2568 2566
2569 2567 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2570 2568 self.DELTAREUSESAMEREVS)
2571 2569
2572 2570 deltacomputer = _deltacomputer(destrevlog)
2573 2571 index = self.index
2574 2572 for rev in self:
2575 2573 entry = index[rev]
2576 2574
2577 2575 # Some classes override linkrev to take filtered revs into
2578 2576 # account. Use raw entry from index.
2579 2577 flags = entry[0] & 0xffff
2580 2578 linkrev = entry[4]
2581 2579 p1 = index[entry[5]][7]
2582 2580 p2 = index[entry[6]][7]
2583 2581 node = entry[7]
2584 2582
2585 2583 # (Possibly) reuse the delta from the revlog if allowed and
2586 2584 # the revlog chunk is a delta.
2587 2585 cachedelta = None
2588 2586 rawtext = None
2589 2587 if populatecachedelta:
2590 2588 dp = self.deltaparent(rev)
2591 2589 if dp != nullrev:
2592 2590 cachedelta = (dp, bytes(self._chunk(rev)))
2593 2591
2594 2592 if not cachedelta:
2595 2593 rawtext = self.revision(rev, raw=True)
2596 2594
2597 2595
2598 2596 if deltareuse == self.DELTAREUSEFULLADD:
2599 2597 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2600 2598 cachedelta=cachedelta,
2601 2599 node=node, flags=flags,
2602 2600 deltacomputer=deltacomputer)
2603 2601 else:
2604 2602 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2605 2603 checkambig=False)
2606 2604 dfh = None
2607 2605 if not destrevlog._inline:
2608 2606 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2609 2607 try:
2610 2608 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2611 2609 p2, flags, cachedelta, ifh, dfh,
2612 2610 deltacomputer=deltacomputer)
2613 2611 finally:
2614 2612 if dfh:
2615 2613 dfh.close()
2616 2614 ifh.close()
2617 2615
2618 2616 if addrevisioncb:
2619 2617 addrevisioncb(self, rev, node)
2620 2618 finally:
2621 2619 destrevlog._lazydeltabase = oldlazydeltabase
2622 2620 destrevlog._aggressivemergedeltas = oldamd
General Comments 0
You need to be logged in to leave comments. Login now