##// END OF EJS Templates
revlog: early return in _slicechunk when span is already small enough...
Boris Feld -
r38654:d083ae26 default
parent child Browse files
Show More
@@ -1,2625 +1,2629 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import hashlib
20 20 import heapq
21 21 import os
22 22 import re
23 23 import struct
24 24 import zlib
25 25
26 26 # import stuff from node for others to import from revlog
27 27 from .node import (
28 28 bin,
29 29 hex,
30 30 nullid,
31 31 nullrev,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .thirdparty import (
39 39 attr,
40 40 )
41 41 from . import (
42 42 ancestor,
43 43 error,
44 44 mdiff,
45 45 policy,
46 46 pycompat,
47 47 templatefilters,
48 48 util,
49 49 )
50 50 from .utils import (
51 51 stringutil,
52 52 )
53 53
54 54 parsers = policy.importmod(r'parsers')
55 55
56 56 # Aliased for performance.
57 57 _zlibdecompress = zlib.decompress
58 58
59 59 # revlog header flags
60 60 REVLOGV0 = 0
61 61 REVLOGV1 = 1
62 62 # Dummy value until file format is finalized.
63 63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
64 64 REVLOGV2 = 0xDEAD
65 65 FLAG_INLINE_DATA = (1 << 16)
66 66 FLAG_GENERALDELTA = (1 << 17)
67 67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
68 68 REVLOG_DEFAULT_FORMAT = REVLOGV1
69 69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
70 70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
71 71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
72 72
73 73 # revlog index flags
74 74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
75 75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
76 76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
77 77 REVIDX_DEFAULT_FLAGS = 0
78 78 # stable order in which flags need to be processed and their processors applied
79 79 REVIDX_FLAGS_ORDER = [
80 80 REVIDX_ISCENSORED,
81 81 REVIDX_ELLIPSIS,
82 82 REVIDX_EXTSTORED,
83 83 ]
84 84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
85 85 # bitmark for flags that could cause rawdata content change
86 86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
87 87
88 88 # max size of revlog with inline data
89 89 _maxinline = 131072
90 90 _chunksize = 1048576
91 91
92 92 RevlogError = error.RevlogError
93 93 LookupError = error.LookupError
94 94 CensoredNodeError = error.CensoredNodeError
95 95 ProgrammingError = error.ProgrammingError
96 96
97 97 # Store flag processors (cf. 'addflagprocessor()' to register)
98 98 _flagprocessors = {
99 99 REVIDX_ISCENSORED: None,
100 100 }
101 101
102 102 _mdre = re.compile('\1\n')
103 103 def parsemeta(text):
104 104 """return (metadatadict, metadatasize)"""
105 105 # text can be buffer, so we can't use .startswith or .index
106 106 if text[:2] != '\1\n':
107 107 return None, None
108 108 s = _mdre.search(text, 2).start()
109 109 mtext = text[2:s]
110 110 meta = {}
111 111 for l in mtext.splitlines():
112 112 k, v = l.split(": ", 1)
113 113 meta[k] = v
114 114 return meta, (s + 2)
115 115
116 116 def packmeta(meta, text):
117 117 keys = sorted(meta)
118 118 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
119 119 return "\1\n%s\1\n%s" % (metatext, text)
120 120
121 121 def _censoredtext(text):
122 122 m, offs = parsemeta(text)
123 123 return m and "censored" in m
124 124
125 125 def addflagprocessor(flag, processor):
126 126 """Register a flag processor on a revision data flag.
127 127
128 128 Invariant:
129 129 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
130 130 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
131 131 - Only one flag processor can be registered on a specific flag.
132 132 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
133 133 following signatures:
134 134 - (read) f(self, rawtext) -> text, bool
135 135 - (write) f(self, text) -> rawtext, bool
136 136 - (raw) f(self, rawtext) -> bool
137 137 "text" is presented to the user. "rawtext" is stored in revlog data, not
138 138 directly visible to the user.
139 139 The boolean returned by these transforms is used to determine whether
140 140 the returned text can be used for hash integrity checking. For example,
141 141 if "write" returns False, then "text" is used to generate hash. If
142 142 "write" returns True, that basically means "rawtext" returned by "write"
143 143 should be used to generate hash. Usually, "write" and "read" return
144 144 different booleans. And "raw" returns a same boolean as "write".
145 145
146 146 Note: The 'raw' transform is used for changegroup generation and in some
147 147 debug commands. In this case the transform only indicates whether the
148 148 contents can be used for hash integrity checks.
149 149 """
150 150 if not flag & REVIDX_KNOWN_FLAGS:
151 151 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
152 152 raise ProgrammingError(msg)
153 153 if flag not in REVIDX_FLAGS_ORDER:
154 154 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
155 155 raise ProgrammingError(msg)
156 156 if flag in _flagprocessors:
157 157 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
158 158 raise error.Abort(msg)
159 159 _flagprocessors[flag] = processor
160 160
161 161 def getoffset(q):
162 162 return int(q >> 16)
163 163
164 164 def gettype(q):
165 165 return int(q & 0xFFFF)
166 166
167 167 def offset_type(offset, type):
168 168 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
169 169 raise ValueError('unknown revlog index flags')
170 170 return int(int(offset) << 16 | type)
171 171
172 172 _nullhash = hashlib.sha1(nullid)
173 173
174 174 def hash(text, p1, p2):
175 175 """generate a hash from the given text and its parent hashes
176 176
177 177 This hash combines both the current file contents and its history
178 178 in a manner that makes it easy to distinguish nodes with the same
179 179 content in the revision graph.
180 180 """
181 181 # As of now, if one of the parent node is null, p2 is null
182 182 if p2 == nullid:
183 183 # deep copy of a hash is faster than creating one
184 184 s = _nullhash.copy()
185 185 s.update(p1)
186 186 else:
187 187 # none of the parent nodes are nullid
188 188 if p1 < p2:
189 189 a = p1
190 190 b = p2
191 191 else:
192 192 a = p2
193 193 b = p1
194 194 s = hashlib.sha1(a)
195 195 s.update(b)
196 196 s.update(text)
197 197 return s.digest()
198 198
199 199 def _trimchunk(revlog, revs, startidx, endidx=None):
200 200 """returns revs[startidx:endidx] without empty trailing revs
201 201 """
202 202 length = revlog.length
203 203
204 204 if endidx is None:
205 205 endidx = len(revs)
206 206
207 207 # Trim empty revs at the end, but never the very first revision of a chain
208 208 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
209 209 endidx -= 1
210 210
211 211 return revs[startidx:endidx]
212 212
213 213 def _slicechunk(revlog, revs):
214 214 """slice revs to reduce the amount of unrelated data to be read from disk.
215 215
216 216 ``revs`` is sliced into groups that should be read in one time.
217 217 Assume that revs are sorted.
218 218 """
219 219 start = revlog.start
220 220 length = revlog.length
221 221
222 222 if len(revs) <= 1:
223 223 yield revs
224 224 return
225 225
226 226 startbyte = start(revs[0])
227 227 endbyte = start(revs[-1]) + length(revs[-1])
228 228 readdata = deltachainspan = endbyte - startbyte
229 229
230 if deltachainspan < revlog._srmingapsize:
231 yield revs
232 return
233
230 234 chainpayload = sum(length(r) for r in revs)
231 235
232 236 if deltachainspan:
233 237 density = chainpayload / float(deltachainspan)
234 238 else:
235 239 density = 1.0
236 240
237 241 if density >= revlog._srdensitythreshold:
238 242 yield revs
239 243 return
240 244
241 245 # Store the gaps in a heap to have them sorted by decreasing size
242 246 gapsheap = []
243 247 heapq.heapify(gapsheap)
244 248 prevend = None
245 249 for i, rev in enumerate(revs):
246 250 revstart = start(rev)
247 251 revlen = length(rev)
248 252
249 253 # Skip empty revisions to form larger holes
250 254 if revlen == 0:
251 255 continue
252 256
253 257 if prevend is not None:
254 258 gapsize = revstart - prevend
255 259 # only consider holes that are large enough
256 260 if gapsize > revlog._srmingapsize:
257 261 heapq.heappush(gapsheap, (-gapsize, i))
258 262
259 263 prevend = revstart + revlen
260 264
261 265 # Collect the indices of the largest holes until the density is acceptable
262 266 indicesheap = []
263 267 heapq.heapify(indicesheap)
264 268 while gapsheap and density < revlog._srdensitythreshold:
265 269 oppgapsize, gapidx = heapq.heappop(gapsheap)
266 270
267 271 heapq.heappush(indicesheap, gapidx)
268 272
269 273 # the gap sizes are stored as negatives to be sorted decreasingly
270 274 # by the heap
271 275 readdata -= (-oppgapsize)
272 276 if readdata > 0:
273 277 density = chainpayload / float(readdata)
274 278 else:
275 279 density = 1.0
276 280
277 281 # Cut the revs at collected indices
278 282 previdx = 0
279 283 while indicesheap:
280 284 idx = heapq.heappop(indicesheap)
281 285
282 286 chunk = _trimchunk(revlog, revs, previdx, idx)
283 287 if chunk:
284 288 yield chunk
285 289
286 290 previdx = idx
287 291
288 292 chunk = _trimchunk(revlog, revs, previdx)
289 293 if chunk:
290 294 yield chunk
291 295
292 296 @attr.s(slots=True, frozen=True)
293 297 class _deltainfo(object):
294 298 distance = attr.ib()
295 299 deltalen = attr.ib()
296 300 data = attr.ib()
297 301 base = attr.ib()
298 302 chainbase = attr.ib()
299 303 chainlen = attr.ib()
300 304 compresseddeltalen = attr.ib()
301 305
302 306 class _deltacomputer(object):
303 307 def __init__(self, revlog):
304 308 self.revlog = revlog
305 309
306 310 def _getcandidaterevs(self, p1, p2, cachedelta):
307 311 """
308 312 Provides revisions that present an interest to be diffed against,
309 313 grouped by level of easiness.
310 314 """
311 315 revlog = self.revlog
312 316 gdelta = revlog._generaldelta
313 317 curr = len(revlog)
314 318 prev = curr - 1
315 319 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
316 320
317 321 # should we try to build a delta?
318 322 if prev != nullrev and revlog.storedeltachains:
319 323 tested = set()
320 324 # This condition is true most of the time when processing
321 325 # changegroup data into a generaldelta repo. The only time it
322 326 # isn't true is if this is the first revision in a delta chain
323 327 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
324 328 if cachedelta and gdelta and revlog._lazydeltabase:
325 329 # Assume what we received from the server is a good choice
326 330 # build delta will reuse the cache
327 331 yield (cachedelta[0],)
328 332 tested.add(cachedelta[0])
329 333
330 334 if gdelta:
331 335 # exclude already lazy tested base if any
332 336 parents = [p for p in (p1r, p2r)
333 337 if p != nullrev and p not in tested]
334 338
335 339 if not revlog._aggressivemergedeltas and len(parents) == 2:
336 340 parents.sort()
337 341 # To minimize the chance of having to build a fulltext,
338 342 # pick first whichever parent is closest to us (max rev)
339 343 yield (parents[1],)
340 344 # then the other one (min rev) if the first did not fit
341 345 yield (parents[0],)
342 346 tested.update(parents)
343 347 elif len(parents) > 0:
344 348 # Test all parents (1 or 2), and keep the best candidate
345 349 yield parents
346 350 tested.update(parents)
347 351
348 352 if prev not in tested:
349 353 # other approach failed try against prev to hopefully save us a
350 354 # fulltext.
351 355 yield (prev,)
352 356 tested.add(prev)
353 357
354 358 def buildtext(self, revinfo, fh):
355 359 """Builds a fulltext version of a revision
356 360
357 361 revinfo: _revisioninfo instance that contains all needed info
358 362 fh: file handle to either the .i or the .d revlog file,
359 363 depending on whether it is inlined or not
360 364 """
361 365 btext = revinfo.btext
362 366 if btext[0] is not None:
363 367 return btext[0]
364 368
365 369 revlog = self.revlog
366 370 cachedelta = revinfo.cachedelta
367 371 flags = revinfo.flags
368 372 node = revinfo.node
369 373
370 374 baserev = cachedelta[0]
371 375 delta = cachedelta[1]
372 376 # special case deltas which replace entire base; no need to decode
373 377 # base revision. this neatly avoids censored bases, which throw when
374 378 # they're decoded.
375 379 hlen = struct.calcsize(">lll")
376 380 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
377 381 len(delta) - hlen):
378 382 btext[0] = delta[hlen:]
379 383 else:
380 384 # deltabase is rawtext before changed by flag processors, which is
381 385 # equivalent to non-raw text
382 386 basetext = revlog.revision(baserev, _df=fh, raw=False)
383 387 btext[0] = mdiff.patch(basetext, delta)
384 388
385 389 try:
386 390 res = revlog._processflags(btext[0], flags, 'read', raw=True)
387 391 btext[0], validatehash = res
388 392 if validatehash:
389 393 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
390 394 if flags & REVIDX_ISCENSORED:
391 395 raise RevlogError(_('node %s is not censored') % node)
392 396 except CensoredNodeError:
393 397 # must pass the censored index flag to add censored revisions
394 398 if not flags & REVIDX_ISCENSORED:
395 399 raise
396 400 return btext[0]
397 401
398 402 def _builddeltadiff(self, base, revinfo, fh):
399 403 revlog = self.revlog
400 404 t = self.buildtext(revinfo, fh)
401 405 if revlog.iscensored(base):
402 406 # deltas based on a censored revision must replace the
403 407 # full content in one patch, so delta works everywhere
404 408 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
405 409 delta = header + t
406 410 else:
407 411 ptext = revlog.revision(base, _df=fh, raw=True)
408 412 delta = mdiff.textdiff(ptext, t)
409 413
410 414 return delta
411 415
412 416 def _builddeltainfo(self, revinfo, base, fh):
413 417 # can we use the cached delta?
414 418 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
415 419 delta = revinfo.cachedelta[1]
416 420 else:
417 421 delta = self._builddeltadiff(base, revinfo, fh)
418 422 revlog = self.revlog
419 423 header, data = revlog.compress(delta)
420 424 deltalen = len(header) + len(data)
421 425 chainbase = revlog.chainbase(base)
422 426 offset = revlog.end(len(revlog) - 1)
423 427 dist = deltalen + offset - revlog.start(chainbase)
424 428 if revlog._generaldelta:
425 429 deltabase = base
426 430 else:
427 431 deltabase = chainbase
428 432 chainlen, compresseddeltalen = revlog._chaininfo(base)
429 433 chainlen += 1
430 434 compresseddeltalen += deltalen
431 435 return _deltainfo(dist, deltalen, (header, data), deltabase,
432 436 chainbase, chainlen, compresseddeltalen)
433 437
434 438 def finddeltainfo(self, revinfo, fh):
435 439 """Find an acceptable delta against a candidate revision
436 440
437 441 revinfo: information about the revision (instance of _revisioninfo)
438 442 fh: file handle to either the .i or the .d revlog file,
439 443 depending on whether it is inlined or not
440 444
441 445 Returns the first acceptable candidate revision, as ordered by
442 446 _getcandidaterevs
443 447 """
444 448 cachedelta = revinfo.cachedelta
445 449 p1 = revinfo.p1
446 450 p2 = revinfo.p2
447 451 revlog = self.revlog
448 452
449 453 deltainfo = None
450 454 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
451 455 nominateddeltas = []
452 456 for candidaterev in candidaterevs:
453 457 # no delta for rawtext-changing revs (see "candelta" for why)
454 458 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
455 459 continue
456 460 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
457 461 if revlog._isgooddeltainfo(candidatedelta, revinfo):
458 462 nominateddeltas.append(candidatedelta)
459 463 if nominateddeltas:
460 464 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
461 465 break
462 466
463 467 return deltainfo
464 468
465 469 @attr.s(slots=True, frozen=True)
466 470 class _revisioninfo(object):
467 471 """Information about a revision that allows building its fulltext
468 472 node: expected hash of the revision
469 473 p1, p2: parent revs of the revision
470 474 btext: built text cache consisting of a one-element list
471 475 cachedelta: (baserev, uncompressed_delta) or None
472 476 flags: flags associated to the revision storage
473 477
474 478 One of btext[0] or cachedelta must be set.
475 479 """
476 480 node = attr.ib()
477 481 p1 = attr.ib()
478 482 p2 = attr.ib()
479 483 btext = attr.ib()
480 484 textlen = attr.ib()
481 485 cachedelta = attr.ib()
482 486 flags = attr.ib()
483 487
484 488 # index v0:
485 489 # 4 bytes: offset
486 490 # 4 bytes: compressed length
487 491 # 4 bytes: base rev
488 492 # 4 bytes: link rev
489 493 # 20 bytes: parent 1 nodeid
490 494 # 20 bytes: parent 2 nodeid
491 495 # 20 bytes: nodeid
492 496 indexformatv0 = struct.Struct(">4l20s20s20s")
493 497 indexformatv0_pack = indexformatv0.pack
494 498 indexformatv0_unpack = indexformatv0.unpack
495 499
496 500 class revlogoldio(object):
497 501 def __init__(self):
498 502 self.size = indexformatv0.size
499 503
500 504 def parseindex(self, data, inline):
501 505 s = self.size
502 506 index = []
503 507 nodemap = {nullid: nullrev}
504 508 n = off = 0
505 509 l = len(data)
506 510 while off + s <= l:
507 511 cur = data[off:off + s]
508 512 off += s
509 513 e = indexformatv0_unpack(cur)
510 514 # transform to revlogv1 format
511 515 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
512 516 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
513 517 index.append(e2)
514 518 nodemap[e[6]] = n
515 519 n += 1
516 520
517 521 # add the magic null revision at -1
518 522 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
519 523
520 524 return index, nodemap, None
521 525
522 526 def packentry(self, entry, node, version, rev):
523 527 if gettype(entry[0]):
524 528 raise RevlogError(_('index entry flags need revlog version 1'))
525 529 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
526 530 node(entry[5]), node(entry[6]), entry[7])
527 531 return indexformatv0_pack(*e2)
528 532
529 533 # index ng:
530 534 # 6 bytes: offset
531 535 # 2 bytes: flags
532 536 # 4 bytes: compressed length
533 537 # 4 bytes: uncompressed length
534 538 # 4 bytes: base rev
535 539 # 4 bytes: link rev
536 540 # 4 bytes: parent 1 rev
537 541 # 4 bytes: parent 2 rev
538 542 # 32 bytes: nodeid
539 543 indexformatng = struct.Struct(">Qiiiiii20s12x")
540 544 indexformatng_pack = indexformatng.pack
541 545 versionformat = struct.Struct(">I")
542 546 versionformat_pack = versionformat.pack
543 547 versionformat_unpack = versionformat.unpack
544 548
545 549 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
546 550 # signed integer)
547 551 _maxentrysize = 0x7fffffff
548 552
549 553 class revlogio(object):
550 554 def __init__(self):
551 555 self.size = indexformatng.size
552 556
553 557 def parseindex(self, data, inline):
554 558 # call the C implementation to parse the index data
555 559 index, cache = parsers.parse_index2(data, inline)
556 560 return index, getattr(index, 'nodemap', None), cache
557 561
558 562 def packentry(self, entry, node, version, rev):
559 563 p = indexformatng_pack(*entry)
560 564 if rev == 0:
561 565 p = versionformat_pack(version) + p[4:]
562 566 return p
563 567
564 568 class revlog(object):
565 569 """
566 570 the underlying revision storage object
567 571
568 572 A revlog consists of two parts, an index and the revision data.
569 573
570 574 The index is a file with a fixed record size containing
571 575 information on each revision, including its nodeid (hash), the
572 576 nodeids of its parents, the position and offset of its data within
573 577 the data file, and the revision it's based on. Finally, each entry
574 578 contains a linkrev entry that can serve as a pointer to external
575 579 data.
576 580
577 581 The revision data itself is a linear collection of data chunks.
578 582 Each chunk represents a revision and is usually represented as a
579 583 delta against the previous chunk. To bound lookup time, runs of
580 584 deltas are limited to about 2 times the length of the original
581 585 version data. This makes retrieval of a version proportional to
582 586 its size, or O(1) relative to the number of revisions.
583 587
584 588 Both pieces of the revlog are written to in an append-only
585 589 fashion, which means we never need to rewrite a file to insert or
586 590 remove data, and can use some simple techniques to avoid the need
587 591 for locking while reading.
588 592
589 593 If checkambig, indexfile is opened with checkambig=True at
590 594 writing, to avoid file stat ambiguity.
591 595
592 596 If mmaplargeindex is True, and an mmapindexthreshold is set, the
593 597 index will be mmapped rather than read if it is larger than the
594 598 configured threshold.
595 599
596 600 If censorable is True, the revlog can have censored revisions.
597 601 """
598 602 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
599 603 mmaplargeindex=False, censorable=False):
600 604 """
601 605 create a revlog object
602 606
603 607 opener is a function that abstracts the file opening operation
604 608 and can be used to implement COW semantics or the like.
605 609 """
606 610 self.indexfile = indexfile
607 611 self.datafile = datafile or (indexfile[:-2] + ".d")
608 612 self.opener = opener
609 613 # When True, indexfile is opened with checkambig=True at writing, to
610 614 # avoid file stat ambiguity.
611 615 self._checkambig = checkambig
612 616 self._censorable = censorable
613 617 # 3-tuple of (node, rev, text) for a raw revision.
614 618 self._cache = None
615 619 # Maps rev to chain base rev.
616 620 self._chainbasecache = util.lrucachedict(100)
617 621 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
618 622 self._chunkcache = (0, '')
619 623 # How much data to read and cache into the raw revlog data cache.
620 624 self._chunkcachesize = 65536
621 625 self._maxchainlen = None
622 626 self._aggressivemergedeltas = True
623 627 self.index = []
624 628 # Mapping of partial identifiers to full nodes.
625 629 self._pcache = {}
626 630 # Mapping of revision integer to full node.
627 631 self._nodecache = {nullid: nullrev}
628 632 self._nodepos = None
629 633 self._compengine = 'zlib'
630 634 self._maxdeltachainspan = -1
631 635 self._withsparseread = False
632 636 self._srdensitythreshold = 0.50
633 637 self._srmingapsize = 262144
634 638
635 639 mmapindexthreshold = None
636 640 v = REVLOG_DEFAULT_VERSION
637 641 opts = getattr(opener, 'options', None)
638 642 if opts is not None:
639 643 if 'revlogv2' in opts:
640 644 # version 2 revlogs always use generaldelta.
641 645 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
642 646 elif 'revlogv1' in opts:
643 647 if 'generaldelta' in opts:
644 648 v |= FLAG_GENERALDELTA
645 649 else:
646 650 v = 0
647 651 if 'chunkcachesize' in opts:
648 652 self._chunkcachesize = opts['chunkcachesize']
649 653 if 'maxchainlen' in opts:
650 654 self._maxchainlen = opts['maxchainlen']
651 655 if 'aggressivemergedeltas' in opts:
652 656 self._aggressivemergedeltas = opts['aggressivemergedeltas']
653 657 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
654 658 if 'compengine' in opts:
655 659 self._compengine = opts['compengine']
656 660 if 'maxdeltachainspan' in opts:
657 661 self._maxdeltachainspan = opts['maxdeltachainspan']
658 662 if mmaplargeindex and 'mmapindexthreshold' in opts:
659 663 mmapindexthreshold = opts['mmapindexthreshold']
660 664 self._withsparseread = bool(opts.get('with-sparse-read', False))
661 665 if 'sparse-read-density-threshold' in opts:
662 666 self._srdensitythreshold = opts['sparse-read-density-threshold']
663 667 if 'sparse-read-min-gap-size' in opts:
664 668 self._srmingapsize = opts['sparse-read-min-gap-size']
665 669
666 670 if self._chunkcachesize <= 0:
667 671 raise RevlogError(_('revlog chunk cache size %r is not greater '
668 672 'than 0') % self._chunkcachesize)
669 673 elif self._chunkcachesize & (self._chunkcachesize - 1):
670 674 raise RevlogError(_('revlog chunk cache size %r is not a power '
671 675 'of 2') % self._chunkcachesize)
672 676
673 677 indexdata = ''
674 678 self._initempty = True
675 679 try:
676 680 with self._indexfp() as f:
677 681 if (mmapindexthreshold is not None and
678 682 self.opener.fstat(f).st_size >= mmapindexthreshold):
679 683 indexdata = util.buffer(util.mmapread(f))
680 684 else:
681 685 indexdata = f.read()
682 686 if len(indexdata) > 0:
683 687 v = versionformat_unpack(indexdata[:4])[0]
684 688 self._initempty = False
685 689 except IOError as inst:
686 690 if inst.errno != errno.ENOENT:
687 691 raise
688 692
689 693 self.version = v
690 694 self._inline = v & FLAG_INLINE_DATA
691 695 self._generaldelta = v & FLAG_GENERALDELTA
692 696 flags = v & ~0xFFFF
693 697 fmt = v & 0xFFFF
694 698 if fmt == REVLOGV0:
695 699 if flags:
696 700 raise RevlogError(_('unknown flags (%#04x) in version %d '
697 701 'revlog %s') %
698 702 (flags >> 16, fmt, self.indexfile))
699 703 elif fmt == REVLOGV1:
700 704 if flags & ~REVLOGV1_FLAGS:
701 705 raise RevlogError(_('unknown flags (%#04x) in version %d '
702 706 'revlog %s') %
703 707 (flags >> 16, fmt, self.indexfile))
704 708 elif fmt == REVLOGV2:
705 709 if flags & ~REVLOGV2_FLAGS:
706 710 raise RevlogError(_('unknown flags (%#04x) in version %d '
707 711 'revlog %s') %
708 712 (flags >> 16, fmt, self.indexfile))
709 713 else:
710 714 raise RevlogError(_('unknown version (%d) in revlog %s') %
711 715 (fmt, self.indexfile))
712 716
713 717 self.storedeltachains = True
714 718
715 719 self._io = revlogio()
716 720 if self.version == REVLOGV0:
717 721 self._io = revlogoldio()
718 722 try:
719 723 d = self._io.parseindex(indexdata, self._inline)
720 724 except (ValueError, IndexError):
721 725 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
722 726 self.index, nodemap, self._chunkcache = d
723 727 if nodemap is not None:
724 728 self.nodemap = self._nodecache = nodemap
725 729 if not self._chunkcache:
726 730 self._chunkclear()
727 731 # revnum -> (chain-length, sum-delta-length)
728 732 self._chaininfocache = {}
729 733 # revlog header -> revlog compressor
730 734 self._decompressors = {}
731 735
732 736 @util.propertycache
733 737 def _compressor(self):
734 738 return util.compengines[self._compengine].revlogcompressor()
735 739
736 740 def _indexfp(self, mode='r'):
737 741 """file object for the revlog's index file"""
738 742 args = {r'mode': mode}
739 743 if mode != 'r':
740 744 args[r'checkambig'] = self._checkambig
741 745 if mode == 'w':
742 746 args[r'atomictemp'] = True
743 747 return self.opener(self.indexfile, **args)
744 748
745 749 def _datafp(self, mode='r'):
746 750 """file object for the revlog's data file"""
747 751 return self.opener(self.datafile, mode=mode)
748 752
749 753 @contextlib.contextmanager
750 754 def _datareadfp(self, existingfp=None):
751 755 """file object suitable to read data"""
752 756 if existingfp is not None:
753 757 yield existingfp
754 758 else:
755 759 if self._inline:
756 760 func = self._indexfp
757 761 else:
758 762 func = self._datafp
759 763 with func() as fp:
760 764 yield fp
761 765
762 766 def tip(self):
763 767 return self.node(len(self.index) - 2)
764 768 def __contains__(self, rev):
765 769 return 0 <= rev < len(self)
766 770 def __len__(self):
767 771 return len(self.index) - 1
768 772 def __iter__(self):
769 773 return iter(xrange(len(self)))
770 774 def revs(self, start=0, stop=None):
771 775 """iterate over all rev in this revlog (from start to stop)"""
772 776 step = 1
773 777 if stop is not None:
774 778 if start > stop:
775 779 step = -1
776 780 stop += step
777 781 else:
778 782 stop = len(self)
779 783 return xrange(start, stop, step)
780 784
781 785 @util.propertycache
782 786 def nodemap(self):
783 787 self.rev(self.node(0))
784 788 return self._nodecache
785 789
786 790 def hasnode(self, node):
787 791 try:
788 792 self.rev(node)
789 793 return True
790 794 except KeyError:
791 795 return False
792 796
793 797 def candelta(self, baserev, rev):
794 798 """whether two revisions (baserev, rev) can be delta-ed or not"""
795 799 # Disable delta if either rev requires a content-changing flag
796 800 # processor (ex. LFS). This is because such flag processor can alter
797 801 # the rawtext content that the delta will be based on, and two clients
798 802 # could have a same revlog node with different flags (i.e. different
799 803 # rawtext contents) and the delta could be incompatible.
800 804 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
801 805 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
802 806 return False
803 807 return True
804 808
805 809 def clearcaches(self):
806 810 self._cache = None
807 811 self._chainbasecache.clear()
808 812 self._chunkcache = (0, '')
809 813 self._pcache = {}
810 814
811 815 try:
812 816 self._nodecache.clearcaches()
813 817 except AttributeError:
814 818 self._nodecache = {nullid: nullrev}
815 819 self._nodepos = None
816 820
817 821 def rev(self, node):
818 822 try:
819 823 return self._nodecache[node]
820 824 except TypeError:
821 825 raise
822 826 except RevlogError:
823 827 # parsers.c radix tree lookup failed
824 828 if node == wdirid or node in wdirfilenodeids:
825 829 raise error.WdirUnsupported
826 830 raise LookupError(node, self.indexfile, _('no node'))
827 831 except KeyError:
828 832 # pure python cache lookup failed
829 833 n = self._nodecache
830 834 i = self.index
831 835 p = self._nodepos
832 836 if p is None:
833 837 p = len(i) - 2
834 838 else:
835 839 assert p < len(i)
836 840 for r in xrange(p, -1, -1):
837 841 v = i[r][7]
838 842 n[v] = r
839 843 if v == node:
840 844 self._nodepos = r - 1
841 845 return r
842 846 if node == wdirid or node in wdirfilenodeids:
843 847 raise error.WdirUnsupported
844 848 raise LookupError(node, self.indexfile, _('no node'))
845 849
846 850 # Accessors for index entries.
847 851
848 852 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
849 853 # are flags.
850 854 def start(self, rev):
851 855 return int(self.index[rev][0] >> 16)
852 856
853 857 def flags(self, rev):
854 858 return self.index[rev][0] & 0xFFFF
855 859
856 860 def length(self, rev):
857 861 return self.index[rev][1]
858 862
859 863 def rawsize(self, rev):
860 864 """return the length of the uncompressed text for a given revision"""
861 865 l = self.index[rev][2]
862 866 if l >= 0:
863 867 return l
864 868
865 869 t = self.revision(rev, raw=True)
866 870 return len(t)
867 871
868 872 def size(self, rev):
869 873 """length of non-raw text (processed by a "read" flag processor)"""
870 874 # fast path: if no "read" flag processor could change the content,
871 875 # size is rawsize. note: ELLIPSIS is known to not change the content.
872 876 flags = self.flags(rev)
873 877 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
874 878 return self.rawsize(rev)
875 879
876 880 return len(self.revision(rev, raw=False))
877 881
878 882 def chainbase(self, rev):
879 883 base = self._chainbasecache.get(rev)
880 884 if base is not None:
881 885 return base
882 886
883 887 index = self.index
884 888 iterrev = rev
885 889 base = index[iterrev][3]
886 890 while base != iterrev:
887 891 iterrev = base
888 892 base = index[iterrev][3]
889 893
890 894 self._chainbasecache[rev] = base
891 895 return base
892 896
893 897 def linkrev(self, rev):
894 898 return self.index[rev][4]
895 899
896 900 def parentrevs(self, rev):
897 901 try:
898 902 entry = self.index[rev]
899 903 except IndexError:
900 904 if rev == wdirrev:
901 905 raise error.WdirUnsupported
902 906 raise
903 907
904 908 return entry[5], entry[6]
905 909
906 910 def node(self, rev):
907 911 try:
908 912 return self.index[rev][7]
909 913 except IndexError:
910 914 if rev == wdirrev:
911 915 raise error.WdirUnsupported
912 916 raise
913 917
914 918 # Derived from index values.
915 919
916 920 def end(self, rev):
917 921 return self.start(rev) + self.length(rev)
918 922
919 923 def parents(self, node):
920 924 i = self.index
921 925 d = i[self.rev(node)]
922 926 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
923 927
924 928 def chainlen(self, rev):
925 929 return self._chaininfo(rev)[0]
926 930
927 931 def _chaininfo(self, rev):
928 932 chaininfocache = self._chaininfocache
929 933 if rev in chaininfocache:
930 934 return chaininfocache[rev]
931 935 index = self.index
932 936 generaldelta = self._generaldelta
933 937 iterrev = rev
934 938 e = index[iterrev]
935 939 clen = 0
936 940 compresseddeltalen = 0
937 941 while iterrev != e[3]:
938 942 clen += 1
939 943 compresseddeltalen += e[1]
940 944 if generaldelta:
941 945 iterrev = e[3]
942 946 else:
943 947 iterrev -= 1
944 948 if iterrev in chaininfocache:
945 949 t = chaininfocache[iterrev]
946 950 clen += t[0]
947 951 compresseddeltalen += t[1]
948 952 break
949 953 e = index[iterrev]
950 954 else:
951 955 # Add text length of base since decompressing that also takes
952 956 # work. For cache hits the length is already included.
953 957 compresseddeltalen += e[1]
954 958 r = (clen, compresseddeltalen)
955 959 chaininfocache[rev] = r
956 960 return r
957 961
958 962 def _deltachain(self, rev, stoprev=None):
959 963 """Obtain the delta chain for a revision.
960 964
961 965 ``stoprev`` specifies a revision to stop at. If not specified, we
962 966 stop at the base of the chain.
963 967
964 968 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
965 969 revs in ascending order and ``stopped`` is a bool indicating whether
966 970 ``stoprev`` was hit.
967 971 """
968 972 # Try C implementation.
969 973 try:
970 974 return self.index.deltachain(rev, stoprev, self._generaldelta)
971 975 except AttributeError:
972 976 pass
973 977
974 978 chain = []
975 979
976 980 # Alias to prevent attribute lookup in tight loop.
977 981 index = self.index
978 982 generaldelta = self._generaldelta
979 983
980 984 iterrev = rev
981 985 e = index[iterrev]
982 986 while iterrev != e[3] and iterrev != stoprev:
983 987 chain.append(iterrev)
984 988 if generaldelta:
985 989 iterrev = e[3]
986 990 else:
987 991 iterrev -= 1
988 992 e = index[iterrev]
989 993
990 994 if iterrev == stoprev:
991 995 stopped = True
992 996 else:
993 997 chain.append(iterrev)
994 998 stopped = False
995 999
996 1000 chain.reverse()
997 1001 return chain, stopped
998 1002
999 1003 def ancestors(self, revs, stoprev=0, inclusive=False):
1000 1004 """Generate the ancestors of 'revs' in reverse topological order.
1001 1005 Does not generate revs lower than stoprev.
1002 1006
1003 1007 See the documentation for ancestor.lazyancestors for more details."""
1004 1008
1005 1009 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1006 1010 inclusive=inclusive)
1007 1011
1008 1012 def descendants(self, revs):
1009 1013 """Generate the descendants of 'revs' in revision order.
1010 1014
1011 1015 Yield a sequence of revision numbers starting with a child of
1012 1016 some rev in revs, i.e., each revision is *not* considered a
1013 1017 descendant of itself. Results are ordered by revision number (a
1014 1018 topological sort)."""
1015 1019 first = min(revs)
1016 1020 if first == nullrev:
1017 1021 for i in self:
1018 1022 yield i
1019 1023 return
1020 1024
1021 1025 seen = set(revs)
1022 1026 for i in self.revs(start=first + 1):
1023 1027 for x in self.parentrevs(i):
1024 1028 if x != nullrev and x in seen:
1025 1029 seen.add(i)
1026 1030 yield i
1027 1031 break
1028 1032
1029 1033 def findcommonmissing(self, common=None, heads=None):
1030 1034 """Return a tuple of the ancestors of common and the ancestors of heads
1031 1035 that are not ancestors of common. In revset terminology, we return the
1032 1036 tuple:
1033 1037
1034 1038 ::common, (::heads) - (::common)
1035 1039
1036 1040 The list is sorted by revision number, meaning it is
1037 1041 topologically sorted.
1038 1042
1039 1043 'heads' and 'common' are both lists of node IDs. If heads is
1040 1044 not supplied, uses all of the revlog's heads. If common is not
1041 1045 supplied, uses nullid."""
1042 1046 if common is None:
1043 1047 common = [nullid]
1044 1048 if heads is None:
1045 1049 heads = self.heads()
1046 1050
1047 1051 common = [self.rev(n) for n in common]
1048 1052 heads = [self.rev(n) for n in heads]
1049 1053
1050 1054 # we want the ancestors, but inclusive
1051 1055 class lazyset(object):
1052 1056 def __init__(self, lazyvalues):
1053 1057 self.addedvalues = set()
1054 1058 self.lazyvalues = lazyvalues
1055 1059
1056 1060 def __contains__(self, value):
1057 1061 return value in self.addedvalues or value in self.lazyvalues
1058 1062
1059 1063 def __iter__(self):
1060 1064 added = self.addedvalues
1061 1065 for r in added:
1062 1066 yield r
1063 1067 for r in self.lazyvalues:
1064 1068 if not r in added:
1065 1069 yield r
1066 1070
1067 1071 def add(self, value):
1068 1072 self.addedvalues.add(value)
1069 1073
1070 1074 def update(self, values):
1071 1075 self.addedvalues.update(values)
1072 1076
1073 1077 has = lazyset(self.ancestors(common))
1074 1078 has.add(nullrev)
1075 1079 has.update(common)
1076 1080
1077 1081 # take all ancestors from heads that aren't in has
1078 1082 missing = set()
1079 1083 visit = collections.deque(r for r in heads if r not in has)
1080 1084 while visit:
1081 1085 r = visit.popleft()
1082 1086 if r in missing:
1083 1087 continue
1084 1088 else:
1085 1089 missing.add(r)
1086 1090 for p in self.parentrevs(r):
1087 1091 if p not in has:
1088 1092 visit.append(p)
1089 1093 missing = list(missing)
1090 1094 missing.sort()
1091 1095 return has, [self.node(miss) for miss in missing]
1092 1096
1093 1097 def incrementalmissingrevs(self, common=None):
1094 1098 """Return an object that can be used to incrementally compute the
1095 1099 revision numbers of the ancestors of arbitrary sets that are not
1096 1100 ancestors of common. This is an ancestor.incrementalmissingancestors
1097 1101 object.
1098 1102
1099 1103 'common' is a list of revision numbers. If common is not supplied, uses
1100 1104 nullrev.
1101 1105 """
1102 1106 if common is None:
1103 1107 common = [nullrev]
1104 1108
1105 1109 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1106 1110
1107 1111 def findmissingrevs(self, common=None, heads=None):
1108 1112 """Return the revision numbers of the ancestors of heads that
1109 1113 are not ancestors of common.
1110 1114
1111 1115 More specifically, return a list of revision numbers corresponding to
1112 1116 nodes N such that every N satisfies the following constraints:
1113 1117
1114 1118 1. N is an ancestor of some node in 'heads'
1115 1119 2. N is not an ancestor of any node in 'common'
1116 1120
1117 1121 The list is sorted by revision number, meaning it is
1118 1122 topologically sorted.
1119 1123
1120 1124 'heads' and 'common' are both lists of revision numbers. If heads is
1121 1125 not supplied, uses all of the revlog's heads. If common is not
1122 1126 supplied, uses nullid."""
1123 1127 if common is None:
1124 1128 common = [nullrev]
1125 1129 if heads is None:
1126 1130 heads = self.headrevs()
1127 1131
1128 1132 inc = self.incrementalmissingrevs(common=common)
1129 1133 return inc.missingancestors(heads)
1130 1134
1131 1135 def findmissing(self, common=None, heads=None):
1132 1136 """Return the ancestors of heads that are not ancestors of common.
1133 1137
1134 1138 More specifically, return a list of nodes N such that every N
1135 1139 satisfies the following constraints:
1136 1140
1137 1141 1. N is an ancestor of some node in 'heads'
1138 1142 2. N is not an ancestor of any node in 'common'
1139 1143
1140 1144 The list is sorted by revision number, meaning it is
1141 1145 topologically sorted.
1142 1146
1143 1147 'heads' and 'common' are both lists of node IDs. If heads is
1144 1148 not supplied, uses all of the revlog's heads. If common is not
1145 1149 supplied, uses nullid."""
1146 1150 if common is None:
1147 1151 common = [nullid]
1148 1152 if heads is None:
1149 1153 heads = self.heads()
1150 1154
1151 1155 common = [self.rev(n) for n in common]
1152 1156 heads = [self.rev(n) for n in heads]
1153 1157
1154 1158 inc = self.incrementalmissingrevs(common=common)
1155 1159 return [self.node(r) for r in inc.missingancestors(heads)]
1156 1160
1157 1161 def nodesbetween(self, roots=None, heads=None):
1158 1162 """Return a topological path from 'roots' to 'heads'.
1159 1163
1160 1164 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1161 1165 topologically sorted list of all nodes N that satisfy both of
1162 1166 these constraints:
1163 1167
1164 1168 1. N is a descendant of some node in 'roots'
1165 1169 2. N is an ancestor of some node in 'heads'
1166 1170
1167 1171 Every node is considered to be both a descendant and an ancestor
1168 1172 of itself, so every reachable node in 'roots' and 'heads' will be
1169 1173 included in 'nodes'.
1170 1174
1171 1175 'outroots' is the list of reachable nodes in 'roots', i.e., the
1172 1176 subset of 'roots' that is returned in 'nodes'. Likewise,
1173 1177 'outheads' is the subset of 'heads' that is also in 'nodes'.
1174 1178
1175 1179 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1176 1180 unspecified, uses nullid as the only root. If 'heads' is
1177 1181 unspecified, uses list of all of the revlog's heads."""
1178 1182 nonodes = ([], [], [])
1179 1183 if roots is not None:
1180 1184 roots = list(roots)
1181 1185 if not roots:
1182 1186 return nonodes
1183 1187 lowestrev = min([self.rev(n) for n in roots])
1184 1188 else:
1185 1189 roots = [nullid] # Everybody's a descendant of nullid
1186 1190 lowestrev = nullrev
1187 1191 if (lowestrev == nullrev) and (heads is None):
1188 1192 # We want _all_ the nodes!
1189 1193 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1190 1194 if heads is None:
1191 1195 # All nodes are ancestors, so the latest ancestor is the last
1192 1196 # node.
1193 1197 highestrev = len(self) - 1
1194 1198 # Set ancestors to None to signal that every node is an ancestor.
1195 1199 ancestors = None
1196 1200 # Set heads to an empty dictionary for later discovery of heads
1197 1201 heads = {}
1198 1202 else:
1199 1203 heads = list(heads)
1200 1204 if not heads:
1201 1205 return nonodes
1202 1206 ancestors = set()
1203 1207 # Turn heads into a dictionary so we can remove 'fake' heads.
1204 1208 # Also, later we will be using it to filter out the heads we can't
1205 1209 # find from roots.
1206 1210 heads = dict.fromkeys(heads, False)
1207 1211 # Start at the top and keep marking parents until we're done.
1208 1212 nodestotag = set(heads)
1209 1213 # Remember where the top was so we can use it as a limit later.
1210 1214 highestrev = max([self.rev(n) for n in nodestotag])
1211 1215 while nodestotag:
1212 1216 # grab a node to tag
1213 1217 n = nodestotag.pop()
1214 1218 # Never tag nullid
1215 1219 if n == nullid:
1216 1220 continue
1217 1221 # A node's revision number represents its place in a
1218 1222 # topologically sorted list of nodes.
1219 1223 r = self.rev(n)
1220 1224 if r >= lowestrev:
1221 1225 if n not in ancestors:
1222 1226 # If we are possibly a descendant of one of the roots
1223 1227 # and we haven't already been marked as an ancestor
1224 1228 ancestors.add(n) # Mark as ancestor
1225 1229 # Add non-nullid parents to list of nodes to tag.
1226 1230 nodestotag.update([p for p in self.parents(n) if
1227 1231 p != nullid])
1228 1232 elif n in heads: # We've seen it before, is it a fake head?
1229 1233 # So it is, real heads should not be the ancestors of
1230 1234 # any other heads.
1231 1235 heads.pop(n)
1232 1236 if not ancestors:
1233 1237 return nonodes
1234 1238 # Now that we have our set of ancestors, we want to remove any
1235 1239 # roots that are not ancestors.
1236 1240
1237 1241 # If one of the roots was nullid, everything is included anyway.
1238 1242 if lowestrev > nullrev:
1239 1243 # But, since we weren't, let's recompute the lowest rev to not
1240 1244 # include roots that aren't ancestors.
1241 1245
1242 1246 # Filter out roots that aren't ancestors of heads
1243 1247 roots = [root for root in roots if root in ancestors]
1244 1248 # Recompute the lowest revision
1245 1249 if roots:
1246 1250 lowestrev = min([self.rev(root) for root in roots])
1247 1251 else:
1248 1252 # No more roots? Return empty list
1249 1253 return nonodes
1250 1254 else:
1251 1255 # We are descending from nullid, and don't need to care about
1252 1256 # any other roots.
1253 1257 lowestrev = nullrev
1254 1258 roots = [nullid]
1255 1259 # Transform our roots list into a set.
1256 1260 descendants = set(roots)
1257 1261 # Also, keep the original roots so we can filter out roots that aren't
1258 1262 # 'real' roots (i.e. are descended from other roots).
1259 1263 roots = descendants.copy()
1260 1264 # Our topologically sorted list of output nodes.
1261 1265 orderedout = []
1262 1266 # Don't start at nullid since we don't want nullid in our output list,
1263 1267 # and if nullid shows up in descendants, empty parents will look like
1264 1268 # they're descendants.
1265 1269 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1266 1270 n = self.node(r)
1267 1271 isdescendant = False
1268 1272 if lowestrev == nullrev: # Everybody is a descendant of nullid
1269 1273 isdescendant = True
1270 1274 elif n in descendants:
1271 1275 # n is already a descendant
1272 1276 isdescendant = True
1273 1277 # This check only needs to be done here because all the roots
1274 1278 # will start being marked is descendants before the loop.
1275 1279 if n in roots:
1276 1280 # If n was a root, check if it's a 'real' root.
1277 1281 p = tuple(self.parents(n))
1278 1282 # If any of its parents are descendants, it's not a root.
1279 1283 if (p[0] in descendants) or (p[1] in descendants):
1280 1284 roots.remove(n)
1281 1285 else:
1282 1286 p = tuple(self.parents(n))
1283 1287 # A node is a descendant if either of its parents are
1284 1288 # descendants. (We seeded the dependents list with the roots
1285 1289 # up there, remember?)
1286 1290 if (p[0] in descendants) or (p[1] in descendants):
1287 1291 descendants.add(n)
1288 1292 isdescendant = True
1289 1293 if isdescendant and ((ancestors is None) or (n in ancestors)):
1290 1294 # Only include nodes that are both descendants and ancestors.
1291 1295 orderedout.append(n)
1292 1296 if (ancestors is not None) and (n in heads):
1293 1297 # We're trying to figure out which heads are reachable
1294 1298 # from roots.
1295 1299 # Mark this head as having been reached
1296 1300 heads[n] = True
1297 1301 elif ancestors is None:
1298 1302 # Otherwise, we're trying to discover the heads.
1299 1303 # Assume this is a head because if it isn't, the next step
1300 1304 # will eventually remove it.
1301 1305 heads[n] = True
1302 1306 # But, obviously its parents aren't.
1303 1307 for p in self.parents(n):
1304 1308 heads.pop(p, None)
1305 1309 heads = [head for head, flag in heads.iteritems() if flag]
1306 1310 roots = list(roots)
1307 1311 assert orderedout
1308 1312 assert roots
1309 1313 assert heads
1310 1314 return (orderedout, roots, heads)
1311 1315
1312 1316 def headrevs(self):
1313 1317 try:
1314 1318 return self.index.headrevs()
1315 1319 except AttributeError:
1316 1320 return self._headrevs()
1317 1321
1318 1322 def computephases(self, roots):
1319 1323 return self.index.computephasesmapsets(roots)
1320 1324
1321 1325 def _headrevs(self):
1322 1326 count = len(self)
1323 1327 if not count:
1324 1328 return [nullrev]
1325 1329 # we won't iter over filtered rev so nobody is a head at start
1326 1330 ishead = [0] * (count + 1)
1327 1331 index = self.index
1328 1332 for r in self:
1329 1333 ishead[r] = 1 # I may be an head
1330 1334 e = index[r]
1331 1335 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1332 1336 return [r for r, val in enumerate(ishead) if val]
1333 1337
1334 1338 def heads(self, start=None, stop=None):
1335 1339 """return the list of all nodes that have no children
1336 1340
1337 1341 if start is specified, only heads that are descendants of
1338 1342 start will be returned
1339 1343 if stop is specified, it will consider all the revs from stop
1340 1344 as if they had no children
1341 1345 """
1342 1346 if start is None and stop is None:
1343 1347 if not len(self):
1344 1348 return [nullid]
1345 1349 return [self.node(r) for r in self.headrevs()]
1346 1350
1347 1351 if start is None:
1348 1352 start = nullid
1349 1353 if stop is None:
1350 1354 stop = []
1351 1355 stoprevs = set([self.rev(n) for n in stop])
1352 1356 startrev = self.rev(start)
1353 1357 reachable = {startrev}
1354 1358 heads = {startrev}
1355 1359
1356 1360 parentrevs = self.parentrevs
1357 1361 for r in self.revs(start=startrev + 1):
1358 1362 for p in parentrevs(r):
1359 1363 if p in reachable:
1360 1364 if r not in stoprevs:
1361 1365 reachable.add(r)
1362 1366 heads.add(r)
1363 1367 if p in heads and p not in stoprevs:
1364 1368 heads.remove(p)
1365 1369
1366 1370 return [self.node(r) for r in heads]
1367 1371
1368 1372 def children(self, node):
1369 1373 """find the children of a given node"""
1370 1374 c = []
1371 1375 p = self.rev(node)
1372 1376 for r in self.revs(start=p + 1):
1373 1377 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1374 1378 if prevs:
1375 1379 for pr in prevs:
1376 1380 if pr == p:
1377 1381 c.append(self.node(r))
1378 1382 elif p == nullrev:
1379 1383 c.append(self.node(r))
1380 1384 return c
1381 1385
1382 1386 def descendant(self, start, end):
1383 1387 """True if revision 'end' is an descendant of revision 'start'
1384 1388
1385 1389 A revision is considered as a descendant of itself."""
1386 1390 if start == nullrev:
1387 1391 return True
1388 1392 elif start == end:
1389 1393 return True
1390 1394 return start in self._commonancestorsheads(start, end)
1391 1395
1392 1396 def commonancestorsheads(self, a, b):
1393 1397 """calculate all the heads of the common ancestors of nodes a and b"""
1394 1398 a, b = self.rev(a), self.rev(b)
1395 1399 ancs = self._commonancestorsheads(a, b)
1396 1400 return pycompat.maplist(self.node, ancs)
1397 1401
1398 1402 def _commonancestorsheads(self, *revs):
1399 1403 """calculate all the heads of the common ancestors of revs"""
1400 1404 try:
1401 1405 ancs = self.index.commonancestorsheads(*revs)
1402 1406 except (AttributeError, OverflowError): # C implementation failed
1403 1407 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1404 1408 return ancs
1405 1409
1406 1410 def isancestor(self, a, b):
1407 1411 """return True if node a is an ancestor of node b
1408 1412
1409 1413 The implementation of this is trivial but the use of
1410 1414 commonancestorsheads is not."""
1411 1415 a, b = self.rev(a), self.rev(b)
1412 1416 return self.descendant(a, b)
1413 1417
1414 1418 def ancestor(self, a, b):
1415 1419 """calculate the "best" common ancestor of nodes a and b"""
1416 1420
1417 1421 a, b = self.rev(a), self.rev(b)
1418 1422 try:
1419 1423 ancs = self.index.ancestors(a, b)
1420 1424 except (AttributeError, OverflowError):
1421 1425 ancs = ancestor.ancestors(self.parentrevs, a, b)
1422 1426 if ancs:
1423 1427 # choose a consistent winner when there's a tie
1424 1428 return min(map(self.node, ancs))
1425 1429 return nullid
1426 1430
1427 1431 def _match(self, id):
1428 1432 if isinstance(id, int):
1429 1433 # rev
1430 1434 return self.node(id)
1431 1435 if len(id) == 20:
1432 1436 # possibly a binary node
1433 1437 # odds of a binary node being all hex in ASCII are 1 in 10**25
1434 1438 try:
1435 1439 node = id
1436 1440 self.rev(node) # quick search the index
1437 1441 return node
1438 1442 except LookupError:
1439 1443 pass # may be partial hex id
1440 1444 try:
1441 1445 # str(rev)
1442 1446 rev = int(id)
1443 1447 if "%d" % rev != id:
1444 1448 raise ValueError
1445 1449 if rev < 0:
1446 1450 rev = len(self) + rev
1447 1451 if rev < 0 or rev >= len(self):
1448 1452 raise ValueError
1449 1453 return self.node(rev)
1450 1454 except (ValueError, OverflowError):
1451 1455 pass
1452 1456 if len(id) == 40:
1453 1457 try:
1454 1458 # a full hex nodeid?
1455 1459 node = bin(id)
1456 1460 self.rev(node)
1457 1461 return node
1458 1462 except (TypeError, LookupError):
1459 1463 pass
1460 1464
1461 1465 def _partialmatch(self, id):
1462 1466 # we don't care wdirfilenodeids as they should be always full hash
1463 1467 maybewdir = wdirhex.startswith(id)
1464 1468 try:
1465 1469 partial = self.index.partialmatch(id)
1466 1470 if partial and self.hasnode(partial):
1467 1471 if maybewdir:
1468 1472 # single 'ff...' match in radix tree, ambiguous with wdir
1469 1473 raise RevlogError
1470 1474 return partial
1471 1475 if maybewdir:
1472 1476 # no 'ff...' match in radix tree, wdir identified
1473 1477 raise error.WdirUnsupported
1474 1478 return None
1475 1479 except RevlogError:
1476 1480 # parsers.c radix tree lookup gave multiple matches
1477 1481 # fast path: for unfiltered changelog, radix tree is accurate
1478 1482 if not getattr(self, 'filteredrevs', None):
1479 1483 raise LookupError(id, self.indexfile,
1480 1484 _('ambiguous identifier'))
1481 1485 # fall through to slow path that filters hidden revisions
1482 1486 except (AttributeError, ValueError):
1483 1487 # we are pure python, or key was too short to search radix tree
1484 1488 pass
1485 1489
1486 1490 if id in self._pcache:
1487 1491 return self._pcache[id]
1488 1492
1489 1493 if len(id) <= 40:
1490 1494 try:
1491 1495 # hex(node)[:...]
1492 1496 l = len(id) // 2 # grab an even number of digits
1493 1497 prefix = bin(id[:l * 2])
1494 1498 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1495 1499 nl = [n for n in nl if hex(n).startswith(id) and
1496 1500 self.hasnode(n)]
1497 1501 if len(nl) > 0:
1498 1502 if len(nl) == 1 and not maybewdir:
1499 1503 self._pcache[id] = nl[0]
1500 1504 return nl[0]
1501 1505 raise LookupError(id, self.indexfile,
1502 1506 _('ambiguous identifier'))
1503 1507 if maybewdir:
1504 1508 raise error.WdirUnsupported
1505 1509 return None
1506 1510 except TypeError:
1507 1511 pass
1508 1512
1509 1513 def lookup(self, id):
1510 1514 """locate a node based on:
1511 1515 - revision number or str(revision number)
1512 1516 - nodeid or subset of hex nodeid
1513 1517 """
1514 1518 n = self._match(id)
1515 1519 if n is not None:
1516 1520 return n
1517 1521 n = self._partialmatch(id)
1518 1522 if n:
1519 1523 return n
1520 1524
1521 1525 raise LookupError(id, self.indexfile, _('no match found'))
1522 1526
1523 1527 def shortest(self, node, minlength=1):
1524 1528 """Find the shortest unambiguous prefix that matches node."""
1525 1529 def isvalid(prefix):
1526 1530 try:
1527 1531 node = self._partialmatch(prefix)
1528 1532 except error.RevlogError:
1529 1533 return False
1530 1534 except error.WdirUnsupported:
1531 1535 # single 'ff...' match
1532 1536 return True
1533 1537 if node is None:
1534 1538 raise LookupError(node, self.indexfile, _('no node'))
1535 1539 return True
1536 1540
1537 1541 def maybewdir(prefix):
1538 1542 return all(c == 'f' for c in prefix)
1539 1543
1540 1544 hexnode = hex(node)
1541 1545
1542 1546 def disambiguate(hexnode, minlength):
1543 1547 """Disambiguate against wdirid."""
1544 1548 for length in range(minlength, 41):
1545 1549 prefix = hexnode[:length]
1546 1550 if not maybewdir(prefix):
1547 1551 return prefix
1548 1552
1549 1553 if not getattr(self, 'filteredrevs', None):
1550 1554 try:
1551 1555 length = max(self.index.shortest(node), minlength)
1552 1556 return disambiguate(hexnode, length)
1553 1557 except RevlogError:
1554 1558 if node != wdirid:
1555 1559 raise LookupError(node, self.indexfile, _('no node'))
1556 1560 except AttributeError:
1557 1561 # Fall through to pure code
1558 1562 pass
1559 1563
1560 1564 if node == wdirid:
1561 1565 for length in range(minlength, 41):
1562 1566 prefix = hexnode[:length]
1563 1567 if isvalid(prefix):
1564 1568 return prefix
1565 1569
1566 1570 for length in range(minlength, 41):
1567 1571 prefix = hexnode[:length]
1568 1572 if isvalid(prefix):
1569 1573 return disambiguate(hexnode, length)
1570 1574
1571 1575 def cmp(self, node, text):
1572 1576 """compare text with a given file revision
1573 1577
1574 1578 returns True if text is different than what is stored.
1575 1579 """
1576 1580 p1, p2 = self.parents(node)
1577 1581 return hash(text, p1, p2) != node
1578 1582
1579 1583 def _cachesegment(self, offset, data):
1580 1584 """Add a segment to the revlog cache.
1581 1585
1582 1586 Accepts an absolute offset and the data that is at that location.
1583 1587 """
1584 1588 o, d = self._chunkcache
1585 1589 # try to add to existing cache
1586 1590 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1587 1591 self._chunkcache = o, d + data
1588 1592 else:
1589 1593 self._chunkcache = offset, data
1590 1594
1591 1595 def _readsegment(self, offset, length, df=None):
1592 1596 """Load a segment of raw data from the revlog.
1593 1597
1594 1598 Accepts an absolute offset, length to read, and an optional existing
1595 1599 file handle to read from.
1596 1600
1597 1601 If an existing file handle is passed, it will be seeked and the
1598 1602 original seek position will NOT be restored.
1599 1603
1600 1604 Returns a str or buffer of raw byte data.
1601 1605 """
1602 1606 # Cache data both forward and backward around the requested
1603 1607 # data, in a fixed size window. This helps speed up operations
1604 1608 # involving reading the revlog backwards.
1605 1609 cachesize = self._chunkcachesize
1606 1610 realoffset = offset & ~(cachesize - 1)
1607 1611 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1608 1612 - realoffset)
1609 1613 with self._datareadfp(df) as df:
1610 1614 df.seek(realoffset)
1611 1615 d = df.read(reallength)
1612 1616 self._cachesegment(realoffset, d)
1613 1617 if offset != realoffset or reallength != length:
1614 1618 return util.buffer(d, offset - realoffset, length)
1615 1619 return d
1616 1620
1617 1621 def _getsegment(self, offset, length, df=None):
1618 1622 """Obtain a segment of raw data from the revlog.
1619 1623
1620 1624 Accepts an absolute offset, length of bytes to obtain, and an
1621 1625 optional file handle to the already-opened revlog. If the file
1622 1626 handle is used, it's original seek position will not be preserved.
1623 1627
1624 1628 Requests for data may be returned from a cache.
1625 1629
1626 1630 Returns a str or a buffer instance of raw byte data.
1627 1631 """
1628 1632 o, d = self._chunkcache
1629 1633 l = len(d)
1630 1634
1631 1635 # is it in the cache?
1632 1636 cachestart = offset - o
1633 1637 cacheend = cachestart + length
1634 1638 if cachestart >= 0 and cacheend <= l:
1635 1639 if cachestart == 0 and cacheend == l:
1636 1640 return d # avoid a copy
1637 1641 return util.buffer(d, cachestart, cacheend - cachestart)
1638 1642
1639 1643 return self._readsegment(offset, length, df=df)
1640 1644
1641 1645 def _getsegmentforrevs(self, startrev, endrev, df=None):
1642 1646 """Obtain a segment of raw data corresponding to a range of revisions.
1643 1647
1644 1648 Accepts the start and end revisions and an optional already-open
1645 1649 file handle to be used for reading. If the file handle is read, its
1646 1650 seek position will not be preserved.
1647 1651
1648 1652 Requests for data may be satisfied by a cache.
1649 1653
1650 1654 Returns a 2-tuple of (offset, data) for the requested range of
1651 1655 revisions. Offset is the integer offset from the beginning of the
1652 1656 revlog and data is a str or buffer of the raw byte data.
1653 1657
1654 1658 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1655 1659 to determine where each revision's data begins and ends.
1656 1660 """
1657 1661 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1658 1662 # (functions are expensive).
1659 1663 index = self.index
1660 1664 istart = index[startrev]
1661 1665 start = int(istart[0] >> 16)
1662 1666 if startrev == endrev:
1663 1667 end = start + istart[1]
1664 1668 else:
1665 1669 iend = index[endrev]
1666 1670 end = int(iend[0] >> 16) + iend[1]
1667 1671
1668 1672 if self._inline:
1669 1673 start += (startrev + 1) * self._io.size
1670 1674 end += (endrev + 1) * self._io.size
1671 1675 length = end - start
1672 1676
1673 1677 return start, self._getsegment(start, length, df=df)
1674 1678
1675 1679 def _chunk(self, rev, df=None):
1676 1680 """Obtain a single decompressed chunk for a revision.
1677 1681
1678 1682 Accepts an integer revision and an optional already-open file handle
1679 1683 to be used for reading. If used, the seek position of the file will not
1680 1684 be preserved.
1681 1685
1682 1686 Returns a str holding uncompressed data for the requested revision.
1683 1687 """
1684 1688 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1685 1689
1686 1690 def _chunks(self, revs, df=None):
1687 1691 """Obtain decompressed chunks for the specified revisions.
1688 1692
1689 1693 Accepts an iterable of numeric revisions that are assumed to be in
1690 1694 ascending order. Also accepts an optional already-open file handle
1691 1695 to be used for reading. If used, the seek position of the file will
1692 1696 not be preserved.
1693 1697
1694 1698 This function is similar to calling ``self._chunk()`` multiple times,
1695 1699 but is faster.
1696 1700
1697 1701 Returns a list with decompressed data for each requested revision.
1698 1702 """
1699 1703 if not revs:
1700 1704 return []
1701 1705 start = self.start
1702 1706 length = self.length
1703 1707 inline = self._inline
1704 1708 iosize = self._io.size
1705 1709 buffer = util.buffer
1706 1710
1707 1711 l = []
1708 1712 ladd = l.append
1709 1713
1710 1714 if not self._withsparseread:
1711 1715 slicedchunks = (revs,)
1712 1716 else:
1713 1717 slicedchunks = _slicechunk(self, revs)
1714 1718
1715 1719 for revschunk in slicedchunks:
1716 1720 firstrev = revschunk[0]
1717 1721 # Skip trailing revisions with empty diff
1718 1722 for lastrev in revschunk[::-1]:
1719 1723 if length(lastrev) != 0:
1720 1724 break
1721 1725
1722 1726 try:
1723 1727 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1724 1728 except OverflowError:
1725 1729 # issue4215 - we can't cache a run of chunks greater than
1726 1730 # 2G on Windows
1727 1731 return [self._chunk(rev, df=df) for rev in revschunk]
1728 1732
1729 1733 decomp = self.decompress
1730 1734 for rev in revschunk:
1731 1735 chunkstart = start(rev)
1732 1736 if inline:
1733 1737 chunkstart += (rev + 1) * iosize
1734 1738 chunklength = length(rev)
1735 1739 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1736 1740
1737 1741 return l
1738 1742
1739 1743 def _chunkclear(self):
1740 1744 """Clear the raw chunk cache."""
1741 1745 self._chunkcache = (0, '')
1742 1746
1743 1747 def deltaparent(self, rev):
1744 1748 """return deltaparent of the given revision"""
1745 1749 base = self.index[rev][3]
1746 1750 if base == rev:
1747 1751 return nullrev
1748 1752 elif self._generaldelta:
1749 1753 return base
1750 1754 else:
1751 1755 return rev - 1
1752 1756
1753 1757 def revdiff(self, rev1, rev2):
1754 1758 """return or calculate a delta between two revisions
1755 1759
1756 1760 The delta calculated is in binary form and is intended to be written to
1757 1761 revlog data directly. So this function needs raw revision data.
1758 1762 """
1759 1763 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1760 1764 return bytes(self._chunk(rev2))
1761 1765
1762 1766 return mdiff.textdiff(self.revision(rev1, raw=True),
1763 1767 self.revision(rev2, raw=True))
1764 1768
1765 1769 def revision(self, nodeorrev, _df=None, raw=False):
1766 1770 """return an uncompressed revision of a given node or revision
1767 1771 number.
1768 1772
1769 1773 _df - an existing file handle to read from. (internal-only)
1770 1774 raw - an optional argument specifying if the revision data is to be
1771 1775 treated as raw data when applying flag transforms. 'raw' should be set
1772 1776 to True when generating changegroups or in debug commands.
1773 1777 """
1774 1778 if isinstance(nodeorrev, int):
1775 1779 rev = nodeorrev
1776 1780 node = self.node(rev)
1777 1781 else:
1778 1782 node = nodeorrev
1779 1783 rev = None
1780 1784
1781 1785 cachedrev = None
1782 1786 flags = None
1783 1787 rawtext = None
1784 1788 if node == nullid:
1785 1789 return ""
1786 1790 if self._cache:
1787 1791 if self._cache[0] == node:
1788 1792 # _cache only stores rawtext
1789 1793 if raw:
1790 1794 return self._cache[2]
1791 1795 # duplicated, but good for perf
1792 1796 if rev is None:
1793 1797 rev = self.rev(node)
1794 1798 if flags is None:
1795 1799 flags = self.flags(rev)
1796 1800 # no extra flags set, no flag processor runs, text = rawtext
1797 1801 if flags == REVIDX_DEFAULT_FLAGS:
1798 1802 return self._cache[2]
1799 1803 # rawtext is reusable. need to run flag processor
1800 1804 rawtext = self._cache[2]
1801 1805
1802 1806 cachedrev = self._cache[1]
1803 1807
1804 1808 # look up what we need to read
1805 1809 if rawtext is None:
1806 1810 if rev is None:
1807 1811 rev = self.rev(node)
1808 1812
1809 1813 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1810 1814 if stopped:
1811 1815 rawtext = self._cache[2]
1812 1816
1813 1817 # drop cache to save memory
1814 1818 self._cache = None
1815 1819
1816 1820 bins = self._chunks(chain, df=_df)
1817 1821 if rawtext is None:
1818 1822 rawtext = bytes(bins[0])
1819 1823 bins = bins[1:]
1820 1824
1821 1825 rawtext = mdiff.patches(rawtext, bins)
1822 1826 self._cache = (node, rev, rawtext)
1823 1827
1824 1828 if flags is None:
1825 1829 if rev is None:
1826 1830 rev = self.rev(node)
1827 1831 flags = self.flags(rev)
1828 1832
1829 1833 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1830 1834 if validatehash:
1831 1835 self.checkhash(text, node, rev=rev)
1832 1836
1833 1837 return text
1834 1838
1835 1839 def hash(self, text, p1, p2):
1836 1840 """Compute a node hash.
1837 1841
1838 1842 Available as a function so that subclasses can replace the hash
1839 1843 as needed.
1840 1844 """
1841 1845 return hash(text, p1, p2)
1842 1846
1843 1847 def _processflags(self, text, flags, operation, raw=False):
1844 1848 """Inspect revision data flags and applies transforms defined by
1845 1849 registered flag processors.
1846 1850
1847 1851 ``text`` - the revision data to process
1848 1852 ``flags`` - the revision flags
1849 1853 ``operation`` - the operation being performed (read or write)
1850 1854 ``raw`` - an optional argument describing if the raw transform should be
1851 1855 applied.
1852 1856
1853 1857 This method processes the flags in the order (or reverse order if
1854 1858 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1855 1859 flag processors registered for present flags. The order of flags defined
1856 1860 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1857 1861
1858 1862 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1859 1863 processed text and ``validatehash`` is a bool indicating whether the
1860 1864 returned text should be checked for hash integrity.
1861 1865
1862 1866 Note: If the ``raw`` argument is set, it has precedence over the
1863 1867 operation and will only update the value of ``validatehash``.
1864 1868 """
1865 1869 # fast path: no flag processors will run
1866 1870 if flags == 0:
1867 1871 return text, True
1868 1872 if not operation in ('read', 'write'):
1869 1873 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1870 1874 # Check all flags are known.
1871 1875 if flags & ~REVIDX_KNOWN_FLAGS:
1872 1876 raise RevlogError(_("incompatible revision flag '%#x'") %
1873 1877 (flags & ~REVIDX_KNOWN_FLAGS))
1874 1878 validatehash = True
1875 1879 # Depending on the operation (read or write), the order might be
1876 1880 # reversed due to non-commutative transforms.
1877 1881 orderedflags = REVIDX_FLAGS_ORDER
1878 1882 if operation == 'write':
1879 1883 orderedflags = reversed(orderedflags)
1880 1884
1881 1885 for flag in orderedflags:
1882 1886 # If a flagprocessor has been registered for a known flag, apply the
1883 1887 # related operation transform and update result tuple.
1884 1888 if flag & flags:
1885 1889 vhash = True
1886 1890
1887 1891 if flag not in _flagprocessors:
1888 1892 message = _("missing processor for flag '%#x'") % (flag)
1889 1893 raise RevlogError(message)
1890 1894
1891 1895 processor = _flagprocessors[flag]
1892 1896 if processor is not None:
1893 1897 readtransform, writetransform, rawtransform = processor
1894 1898
1895 1899 if raw:
1896 1900 vhash = rawtransform(self, text)
1897 1901 elif operation == 'read':
1898 1902 text, vhash = readtransform(self, text)
1899 1903 else: # write operation
1900 1904 text, vhash = writetransform(self, text)
1901 1905 validatehash = validatehash and vhash
1902 1906
1903 1907 return text, validatehash
1904 1908
1905 1909 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1906 1910 """Check node hash integrity.
1907 1911
1908 1912 Available as a function so that subclasses can extend hash mismatch
1909 1913 behaviors as needed.
1910 1914 """
1911 1915 try:
1912 1916 if p1 is None and p2 is None:
1913 1917 p1, p2 = self.parents(node)
1914 1918 if node != self.hash(text, p1, p2):
1915 1919 revornode = rev
1916 1920 if revornode is None:
1917 1921 revornode = templatefilters.short(hex(node))
1918 1922 raise RevlogError(_("integrity check failed on %s:%s")
1919 1923 % (self.indexfile, pycompat.bytestr(revornode)))
1920 1924 except RevlogError:
1921 1925 if self._censorable and _censoredtext(text):
1922 1926 raise error.CensoredNodeError(self.indexfile, node, text)
1923 1927 raise
1924 1928
1925 1929 def _enforceinlinesize(self, tr, fp=None):
1926 1930 """Check if the revlog is too big for inline and convert if so.
1927 1931
1928 1932 This should be called after revisions are added to the revlog. If the
1929 1933 revlog has grown too large to be an inline revlog, it will convert it
1930 1934 to use multiple index and data files.
1931 1935 """
1932 1936 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1933 1937 return
1934 1938
1935 1939 trinfo = tr.find(self.indexfile)
1936 1940 if trinfo is None:
1937 1941 raise RevlogError(_("%s not found in the transaction")
1938 1942 % self.indexfile)
1939 1943
1940 1944 trindex = trinfo[2]
1941 1945 if trindex is not None:
1942 1946 dataoff = self.start(trindex)
1943 1947 else:
1944 1948 # revlog was stripped at start of transaction, use all leftover data
1945 1949 trindex = len(self) - 1
1946 1950 dataoff = self.end(-2)
1947 1951
1948 1952 tr.add(self.datafile, dataoff)
1949 1953
1950 1954 if fp:
1951 1955 fp.flush()
1952 1956 fp.close()
1953 1957
1954 1958 with self._datafp('w') as df:
1955 1959 for r in self:
1956 1960 df.write(self._getsegmentforrevs(r, r)[1])
1957 1961
1958 1962 with self._indexfp('w') as fp:
1959 1963 self.version &= ~FLAG_INLINE_DATA
1960 1964 self._inline = False
1961 1965 io = self._io
1962 1966 for i in self:
1963 1967 e = io.packentry(self.index[i], self.node, self.version, i)
1964 1968 fp.write(e)
1965 1969
1966 1970 # the temp file replace the real index when we exit the context
1967 1971 # manager
1968 1972
1969 1973 tr.replace(self.indexfile, trindex * self._io.size)
1970 1974 self._chunkclear()
1971 1975
1972 1976 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1973 1977 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1974 1978 """add a revision to the log
1975 1979
1976 1980 text - the revision data to add
1977 1981 transaction - the transaction object used for rollback
1978 1982 link - the linkrev data to add
1979 1983 p1, p2 - the parent nodeids of the revision
1980 1984 cachedelta - an optional precomputed delta
1981 1985 node - nodeid of revision; typically node is not specified, and it is
1982 1986 computed by default as hash(text, p1, p2), however subclasses might
1983 1987 use different hashing method (and override checkhash() in such case)
1984 1988 flags - the known flags to set on the revision
1985 1989 deltacomputer - an optional _deltacomputer instance shared between
1986 1990 multiple calls
1987 1991 """
1988 1992 if link == nullrev:
1989 1993 raise RevlogError(_("attempted to add linkrev -1 to %s")
1990 1994 % self.indexfile)
1991 1995
1992 1996 if flags:
1993 1997 node = node or self.hash(text, p1, p2)
1994 1998
1995 1999 rawtext, validatehash = self._processflags(text, flags, 'write')
1996 2000
1997 2001 # If the flag processor modifies the revision data, ignore any provided
1998 2002 # cachedelta.
1999 2003 if rawtext != text:
2000 2004 cachedelta = None
2001 2005
2002 2006 if len(rawtext) > _maxentrysize:
2003 2007 raise RevlogError(
2004 2008 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2005 2009 % (self.indexfile, len(rawtext)))
2006 2010
2007 2011 node = node or self.hash(rawtext, p1, p2)
2008 2012 if node in self.nodemap:
2009 2013 return node
2010 2014
2011 2015 if validatehash:
2012 2016 self.checkhash(rawtext, node, p1=p1, p2=p2)
2013 2017
2014 2018 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2015 2019 flags, cachedelta=cachedelta,
2016 2020 deltacomputer=deltacomputer)
2017 2021
2018 2022 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2019 2023 cachedelta=None, deltacomputer=None):
2020 2024 """add a raw revision with known flags, node and parents
2021 2025 useful when reusing a revision not stored in this revlog (ex: received
2022 2026 over wire, or read from an external bundle).
2023 2027 """
2024 2028 dfh = None
2025 2029 if not self._inline:
2026 2030 dfh = self._datafp("a+")
2027 2031 ifh = self._indexfp("a+")
2028 2032 try:
2029 2033 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2030 2034 flags, cachedelta, ifh, dfh,
2031 2035 deltacomputer=deltacomputer)
2032 2036 finally:
2033 2037 if dfh:
2034 2038 dfh.close()
2035 2039 ifh.close()
2036 2040
2037 2041 def compress(self, data):
2038 2042 """Generate a possibly-compressed representation of data."""
2039 2043 if not data:
2040 2044 return '', data
2041 2045
2042 2046 compressed = self._compressor.compress(data)
2043 2047
2044 2048 if compressed:
2045 2049 # The revlog compressor added the header in the returned data.
2046 2050 return '', compressed
2047 2051
2048 2052 if data[0:1] == '\0':
2049 2053 return '', data
2050 2054 return 'u', data
2051 2055
2052 2056 def decompress(self, data):
2053 2057 """Decompress a revlog chunk.
2054 2058
2055 2059 The chunk is expected to begin with a header identifying the
2056 2060 format type so it can be routed to an appropriate decompressor.
2057 2061 """
2058 2062 if not data:
2059 2063 return data
2060 2064
2061 2065 # Revlogs are read much more frequently than they are written and many
2062 2066 # chunks only take microseconds to decompress, so performance is
2063 2067 # important here.
2064 2068 #
2065 2069 # We can make a few assumptions about revlogs:
2066 2070 #
2067 2071 # 1) the majority of chunks will be compressed (as opposed to inline
2068 2072 # raw data).
2069 2073 # 2) decompressing *any* data will likely by at least 10x slower than
2070 2074 # returning raw inline data.
2071 2075 # 3) we want to prioritize common and officially supported compression
2072 2076 # engines
2073 2077 #
2074 2078 # It follows that we want to optimize for "decompress compressed data
2075 2079 # when encoded with common and officially supported compression engines"
2076 2080 # case over "raw data" and "data encoded by less common or non-official
2077 2081 # compression engines." That is why we have the inline lookup first
2078 2082 # followed by the compengines lookup.
2079 2083 #
2080 2084 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2081 2085 # compressed chunks. And this matters for changelog and manifest reads.
2082 2086 t = data[0:1]
2083 2087
2084 2088 if t == 'x':
2085 2089 try:
2086 2090 return _zlibdecompress(data)
2087 2091 except zlib.error as e:
2088 2092 raise RevlogError(_('revlog decompress error: %s') %
2089 2093 stringutil.forcebytestr(e))
2090 2094 # '\0' is more common than 'u' so it goes first.
2091 2095 elif t == '\0':
2092 2096 return data
2093 2097 elif t == 'u':
2094 2098 return util.buffer(data, 1)
2095 2099
2096 2100 try:
2097 2101 compressor = self._decompressors[t]
2098 2102 except KeyError:
2099 2103 try:
2100 2104 engine = util.compengines.forrevlogheader(t)
2101 2105 compressor = engine.revlogcompressor()
2102 2106 self._decompressors[t] = compressor
2103 2107 except KeyError:
2104 2108 raise RevlogError(_('unknown compression type %r') % t)
2105 2109
2106 2110 return compressor.decompress(data)
2107 2111
2108 2112 def _isgooddeltainfo(self, deltainfo, revinfo):
2109 2113 """Returns True if the given delta is good. Good means that it is within
2110 2114 the disk span, disk size, and chain length bounds that we know to be
2111 2115 performant."""
2112 2116 if deltainfo is None:
2113 2117 return False
2114 2118
2115 2119 # - 'deltainfo.distance' is the distance from the base revision --
2116 2120 # bounding it limits the amount of I/O we need to do.
2117 2121 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2118 2122 # deltas we need to apply -- bounding it limits the amount of CPU
2119 2123 # we consume.
2120 2124
2121 2125 textlen = revinfo.textlen
2122 2126 defaultmax = textlen * 4
2123 2127 maxdist = self._maxdeltachainspan
2124 2128 if not maxdist:
2125 2129 maxdist = deltainfo.distance # ensure the conditional pass
2126 2130 maxdist = max(maxdist, defaultmax)
2127 2131 if (deltainfo.distance > maxdist or deltainfo.deltalen > textlen or
2128 2132 deltainfo.compresseddeltalen > textlen * 2 or
2129 2133 (self._maxchainlen and deltainfo.chainlen > self._maxchainlen)):
2130 2134 return False
2131 2135
2132 2136 return True
2133 2137
2134 2138 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2135 2139 cachedelta, ifh, dfh, alwayscache=False,
2136 2140 deltacomputer=None):
2137 2141 """internal function to add revisions to the log
2138 2142
2139 2143 see addrevision for argument descriptions.
2140 2144
2141 2145 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2142 2146
2143 2147 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2144 2148 be used.
2145 2149
2146 2150 invariants:
2147 2151 - rawtext is optional (can be None); if not set, cachedelta must be set.
2148 2152 if both are set, they must correspond to each other.
2149 2153 """
2150 2154 if node == nullid:
2151 2155 raise RevlogError(_("%s: attempt to add null revision") %
2152 2156 (self.indexfile))
2153 2157 if node == wdirid or node in wdirfilenodeids:
2154 2158 raise RevlogError(_("%s: attempt to add wdir revision") %
2155 2159 (self.indexfile))
2156 2160
2157 2161 if self._inline:
2158 2162 fh = ifh
2159 2163 else:
2160 2164 fh = dfh
2161 2165
2162 2166 btext = [rawtext]
2163 2167
2164 2168 curr = len(self)
2165 2169 prev = curr - 1
2166 2170 offset = self.end(prev)
2167 2171 p1r, p2r = self.rev(p1), self.rev(p2)
2168 2172
2169 2173 # full versions are inserted when the needed deltas
2170 2174 # become comparable to the uncompressed text
2171 2175 if rawtext is None:
2172 2176 # need rawtext size, before changed by flag processors, which is
2173 2177 # the non-raw size. use revlog explicitly to avoid filelog's extra
2174 2178 # logic that might remove metadata size.
2175 2179 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2176 2180 cachedelta[1])
2177 2181 else:
2178 2182 textlen = len(rawtext)
2179 2183
2180 2184 if deltacomputer is None:
2181 2185 deltacomputer = _deltacomputer(self)
2182 2186
2183 2187 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2184 2188
2185 2189 # no delta for flag processor revision (see "candelta" for why)
2186 2190 # not calling candelta since only one revision needs test, also to
2187 2191 # avoid overhead fetching flags again.
2188 2192 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2189 2193 deltainfo = None
2190 2194 else:
2191 2195 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2192 2196
2193 2197 if deltainfo is not None:
2194 2198 base = deltainfo.base
2195 2199 chainbase = deltainfo.chainbase
2196 2200 data = deltainfo.data
2197 2201 l = deltainfo.deltalen
2198 2202 else:
2199 2203 rawtext = deltacomputer.buildtext(revinfo, fh)
2200 2204 data = self.compress(rawtext)
2201 2205 l = len(data[1]) + len(data[0])
2202 2206 base = chainbase = curr
2203 2207
2204 2208 e = (offset_type(offset, flags), l, textlen,
2205 2209 base, link, p1r, p2r, node)
2206 2210 self.index.insert(-1, e)
2207 2211 self.nodemap[node] = curr
2208 2212
2209 2213 entry = self._io.packentry(e, self.node, self.version, curr)
2210 2214 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2211 2215
2212 2216 if alwayscache and rawtext is None:
2213 2217 rawtext = deltacomputer._buildtext(revinfo, fh)
2214 2218
2215 2219 if type(rawtext) == bytes: # only accept immutable objects
2216 2220 self._cache = (node, curr, rawtext)
2217 2221 self._chainbasecache[curr] = chainbase
2218 2222 return node
2219 2223
2220 2224 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2221 2225 # Files opened in a+ mode have inconsistent behavior on various
2222 2226 # platforms. Windows requires that a file positioning call be made
2223 2227 # when the file handle transitions between reads and writes. See
2224 2228 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2225 2229 # platforms, Python or the platform itself can be buggy. Some versions
2226 2230 # of Solaris have been observed to not append at the end of the file
2227 2231 # if the file was seeked to before the end. See issue4943 for more.
2228 2232 #
2229 2233 # We work around this issue by inserting a seek() before writing.
2230 2234 # Note: This is likely not necessary on Python 3.
2231 2235 ifh.seek(0, os.SEEK_END)
2232 2236 if dfh:
2233 2237 dfh.seek(0, os.SEEK_END)
2234 2238
2235 2239 curr = len(self) - 1
2236 2240 if not self._inline:
2237 2241 transaction.add(self.datafile, offset)
2238 2242 transaction.add(self.indexfile, curr * len(entry))
2239 2243 if data[0]:
2240 2244 dfh.write(data[0])
2241 2245 dfh.write(data[1])
2242 2246 ifh.write(entry)
2243 2247 else:
2244 2248 offset += curr * self._io.size
2245 2249 transaction.add(self.indexfile, offset, curr)
2246 2250 ifh.write(entry)
2247 2251 ifh.write(data[0])
2248 2252 ifh.write(data[1])
2249 2253 self._enforceinlinesize(transaction, ifh)
2250 2254
2251 2255 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2252 2256 """
2253 2257 add a delta group
2254 2258
2255 2259 given a set of deltas, add them to the revision log. the
2256 2260 first delta is against its parent, which should be in our
2257 2261 log, the rest are against the previous delta.
2258 2262
2259 2263 If ``addrevisioncb`` is defined, it will be called with arguments of
2260 2264 this revlog and the node that was added.
2261 2265 """
2262 2266
2263 2267 nodes = []
2264 2268
2265 2269 r = len(self)
2266 2270 end = 0
2267 2271 if r:
2268 2272 end = self.end(r - 1)
2269 2273 ifh = self._indexfp("a+")
2270 2274 isize = r * self._io.size
2271 2275 if self._inline:
2272 2276 transaction.add(self.indexfile, end + isize, r)
2273 2277 dfh = None
2274 2278 else:
2275 2279 transaction.add(self.indexfile, isize, r)
2276 2280 transaction.add(self.datafile, end)
2277 2281 dfh = self._datafp("a+")
2278 2282 def flush():
2279 2283 if dfh:
2280 2284 dfh.flush()
2281 2285 ifh.flush()
2282 2286 try:
2283 2287 deltacomputer = _deltacomputer(self)
2284 2288 # loop through our set of deltas
2285 2289 for data in deltas:
2286 2290 node, p1, p2, linknode, deltabase, delta, flags = data
2287 2291 link = linkmapper(linknode)
2288 2292 flags = flags or REVIDX_DEFAULT_FLAGS
2289 2293
2290 2294 nodes.append(node)
2291 2295
2292 2296 if node in self.nodemap:
2293 2297 # this can happen if two branches make the same change
2294 2298 continue
2295 2299
2296 2300 for p in (p1, p2):
2297 2301 if p not in self.nodemap:
2298 2302 raise LookupError(p, self.indexfile,
2299 2303 _('unknown parent'))
2300 2304
2301 2305 if deltabase not in self.nodemap:
2302 2306 raise LookupError(deltabase, self.indexfile,
2303 2307 _('unknown delta base'))
2304 2308
2305 2309 baserev = self.rev(deltabase)
2306 2310
2307 2311 if baserev != nullrev and self.iscensored(baserev):
2308 2312 # if base is censored, delta must be full replacement in a
2309 2313 # single patch operation
2310 2314 hlen = struct.calcsize(">lll")
2311 2315 oldlen = self.rawsize(baserev)
2312 2316 newlen = len(delta) - hlen
2313 2317 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2314 2318 raise error.CensoredBaseError(self.indexfile,
2315 2319 self.node(baserev))
2316 2320
2317 2321 if not flags and self._peek_iscensored(baserev, delta, flush):
2318 2322 flags |= REVIDX_ISCENSORED
2319 2323
2320 2324 # We assume consumers of addrevisioncb will want to retrieve
2321 2325 # the added revision, which will require a call to
2322 2326 # revision(). revision() will fast path if there is a cache
2323 2327 # hit. So, we tell _addrevision() to always cache in this case.
2324 2328 # We're only using addgroup() in the context of changegroup
2325 2329 # generation so the revision data can always be handled as raw
2326 2330 # by the flagprocessor.
2327 2331 self._addrevision(node, None, transaction, link,
2328 2332 p1, p2, flags, (baserev, delta),
2329 2333 ifh, dfh,
2330 2334 alwayscache=bool(addrevisioncb),
2331 2335 deltacomputer=deltacomputer)
2332 2336
2333 2337 if addrevisioncb:
2334 2338 addrevisioncb(self, node)
2335 2339
2336 2340 if not dfh and not self._inline:
2337 2341 # addrevision switched from inline to conventional
2338 2342 # reopen the index
2339 2343 ifh.close()
2340 2344 dfh = self._datafp("a+")
2341 2345 ifh = self._indexfp("a+")
2342 2346 finally:
2343 2347 if dfh:
2344 2348 dfh.close()
2345 2349 ifh.close()
2346 2350
2347 2351 return nodes
2348 2352
2349 2353 def iscensored(self, rev):
2350 2354 """Check if a file revision is censored."""
2351 2355 if not self._censorable:
2352 2356 return False
2353 2357
2354 2358 return self.flags(rev) & REVIDX_ISCENSORED
2355 2359
2356 2360 def _peek_iscensored(self, baserev, delta, flush):
2357 2361 """Quickly check if a delta produces a censored revision."""
2358 2362 if not self._censorable:
2359 2363 return False
2360 2364
2361 2365 # Fragile heuristic: unless new file meta keys are added alphabetically
2362 2366 # preceding "censored", all censored revisions are prefixed by
2363 2367 # "\1\ncensored:". A delta producing such a censored revision must be a
2364 2368 # full-replacement delta, so we inspect the first and only patch in the
2365 2369 # delta for this prefix.
2366 2370 hlen = struct.calcsize(">lll")
2367 2371 if len(delta) <= hlen:
2368 2372 return False
2369 2373
2370 2374 oldlen = self.rawsize(baserev)
2371 2375 newlen = len(delta) - hlen
2372 2376 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2373 2377 return False
2374 2378
2375 2379 add = "\1\ncensored:"
2376 2380 addlen = len(add)
2377 2381 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2378 2382
2379 2383 def getstrippoint(self, minlink):
2380 2384 """find the minimum rev that must be stripped to strip the linkrev
2381 2385
2382 2386 Returns a tuple containing the minimum rev and a set of all revs that
2383 2387 have linkrevs that will be broken by this strip.
2384 2388 """
2385 2389 brokenrevs = set()
2386 2390 strippoint = len(self)
2387 2391
2388 2392 heads = {}
2389 2393 futurelargelinkrevs = set()
2390 2394 for head in self.headrevs():
2391 2395 headlinkrev = self.linkrev(head)
2392 2396 heads[head] = headlinkrev
2393 2397 if headlinkrev >= minlink:
2394 2398 futurelargelinkrevs.add(headlinkrev)
2395 2399
2396 2400 # This algorithm involves walking down the rev graph, starting at the
2397 2401 # heads. Since the revs are topologically sorted according to linkrev,
2398 2402 # once all head linkrevs are below the minlink, we know there are
2399 2403 # no more revs that could have a linkrev greater than minlink.
2400 2404 # So we can stop walking.
2401 2405 while futurelargelinkrevs:
2402 2406 strippoint -= 1
2403 2407 linkrev = heads.pop(strippoint)
2404 2408
2405 2409 if linkrev < minlink:
2406 2410 brokenrevs.add(strippoint)
2407 2411 else:
2408 2412 futurelargelinkrevs.remove(linkrev)
2409 2413
2410 2414 for p in self.parentrevs(strippoint):
2411 2415 if p != nullrev:
2412 2416 plinkrev = self.linkrev(p)
2413 2417 heads[p] = plinkrev
2414 2418 if plinkrev >= minlink:
2415 2419 futurelargelinkrevs.add(plinkrev)
2416 2420
2417 2421 return strippoint, brokenrevs
2418 2422
2419 2423 def strip(self, minlink, transaction):
2420 2424 """truncate the revlog on the first revision with a linkrev >= minlink
2421 2425
2422 2426 This function is called when we're stripping revision minlink and
2423 2427 its descendants from the repository.
2424 2428
2425 2429 We have to remove all revisions with linkrev >= minlink, because
2426 2430 the equivalent changelog revisions will be renumbered after the
2427 2431 strip.
2428 2432
2429 2433 So we truncate the revlog on the first of these revisions, and
2430 2434 trust that the caller has saved the revisions that shouldn't be
2431 2435 removed and that it'll re-add them after this truncation.
2432 2436 """
2433 2437 if len(self) == 0:
2434 2438 return
2435 2439
2436 2440 rev, _ = self.getstrippoint(minlink)
2437 2441 if rev == len(self):
2438 2442 return
2439 2443
2440 2444 # first truncate the files on disk
2441 2445 end = self.start(rev)
2442 2446 if not self._inline:
2443 2447 transaction.add(self.datafile, end)
2444 2448 end = rev * self._io.size
2445 2449 else:
2446 2450 end += rev * self._io.size
2447 2451
2448 2452 transaction.add(self.indexfile, end)
2449 2453
2450 2454 # then reset internal state in memory to forget those revisions
2451 2455 self._cache = None
2452 2456 self._chaininfocache = {}
2453 2457 self._chunkclear()
2454 2458 for x in xrange(rev, len(self)):
2455 2459 del self.nodemap[self.node(x)]
2456 2460
2457 2461 del self.index[rev:-1]
2458 2462 self._nodepos = None
2459 2463
2460 2464 def checksize(self):
2461 2465 expected = 0
2462 2466 if len(self):
2463 2467 expected = max(0, self.end(len(self) - 1))
2464 2468
2465 2469 try:
2466 2470 with self._datafp() as f:
2467 2471 f.seek(0, 2)
2468 2472 actual = f.tell()
2469 2473 dd = actual - expected
2470 2474 except IOError as inst:
2471 2475 if inst.errno != errno.ENOENT:
2472 2476 raise
2473 2477 dd = 0
2474 2478
2475 2479 try:
2476 2480 f = self.opener(self.indexfile)
2477 2481 f.seek(0, 2)
2478 2482 actual = f.tell()
2479 2483 f.close()
2480 2484 s = self._io.size
2481 2485 i = max(0, actual // s)
2482 2486 di = actual - (i * s)
2483 2487 if self._inline:
2484 2488 databytes = 0
2485 2489 for r in self:
2486 2490 databytes += max(0, self.length(r))
2487 2491 dd = 0
2488 2492 di = actual - len(self) * s - databytes
2489 2493 except IOError as inst:
2490 2494 if inst.errno != errno.ENOENT:
2491 2495 raise
2492 2496 di = 0
2493 2497
2494 2498 return (dd, di)
2495 2499
2496 2500 def files(self):
2497 2501 res = [self.indexfile]
2498 2502 if not self._inline:
2499 2503 res.append(self.datafile)
2500 2504 return res
2501 2505
2502 2506 DELTAREUSEALWAYS = 'always'
2503 2507 DELTAREUSESAMEREVS = 'samerevs'
2504 2508 DELTAREUSENEVER = 'never'
2505 2509
2506 2510 DELTAREUSEFULLADD = 'fulladd'
2507 2511
2508 2512 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2509 2513
2510 2514 def clone(self, tr, destrevlog, addrevisioncb=None,
2511 2515 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2512 2516 """Copy this revlog to another, possibly with format changes.
2513 2517
2514 2518 The destination revlog will contain the same revisions and nodes.
2515 2519 However, it may not be bit-for-bit identical due to e.g. delta encoding
2516 2520 differences.
2517 2521
2518 2522 The ``deltareuse`` argument control how deltas from the existing revlog
2519 2523 are preserved in the destination revlog. The argument can have the
2520 2524 following values:
2521 2525
2522 2526 DELTAREUSEALWAYS
2523 2527 Deltas will always be reused (if possible), even if the destination
2524 2528 revlog would not select the same revisions for the delta. This is the
2525 2529 fastest mode of operation.
2526 2530 DELTAREUSESAMEREVS
2527 2531 Deltas will be reused if the destination revlog would pick the same
2528 2532 revisions for the delta. This mode strikes a balance between speed
2529 2533 and optimization.
2530 2534 DELTAREUSENEVER
2531 2535 Deltas will never be reused. This is the slowest mode of execution.
2532 2536 This mode can be used to recompute deltas (e.g. if the diff/delta
2533 2537 algorithm changes).
2534 2538
2535 2539 Delta computation can be slow, so the choice of delta reuse policy can
2536 2540 significantly affect run time.
2537 2541
2538 2542 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2539 2543 two extremes. Deltas will be reused if they are appropriate. But if the
2540 2544 delta could choose a better revision, it will do so. This means if you
2541 2545 are converting a non-generaldelta revlog to a generaldelta revlog,
2542 2546 deltas will be recomputed if the delta's parent isn't a parent of the
2543 2547 revision.
2544 2548
2545 2549 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2546 2550 controls whether to compute deltas against both parents for merges.
2547 2551 By default, the current default is used.
2548 2552 """
2549 2553 if deltareuse not in self.DELTAREUSEALL:
2550 2554 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2551 2555
2552 2556 if len(destrevlog):
2553 2557 raise ValueError(_('destination revlog is not empty'))
2554 2558
2555 2559 if getattr(self, 'filteredrevs', None):
2556 2560 raise ValueError(_('source revlog has filtered revisions'))
2557 2561 if getattr(destrevlog, 'filteredrevs', None):
2558 2562 raise ValueError(_('destination revlog has filtered revisions'))
2559 2563
2560 2564 # lazydeltabase controls whether to reuse a cached delta, if possible.
2561 2565 oldlazydeltabase = destrevlog._lazydeltabase
2562 2566 oldamd = destrevlog._aggressivemergedeltas
2563 2567
2564 2568 try:
2565 2569 if deltareuse == self.DELTAREUSEALWAYS:
2566 2570 destrevlog._lazydeltabase = True
2567 2571 elif deltareuse == self.DELTAREUSESAMEREVS:
2568 2572 destrevlog._lazydeltabase = False
2569 2573
2570 2574 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2571 2575
2572 2576 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2573 2577 self.DELTAREUSESAMEREVS)
2574 2578
2575 2579 deltacomputer = _deltacomputer(destrevlog)
2576 2580 index = self.index
2577 2581 for rev in self:
2578 2582 entry = index[rev]
2579 2583
2580 2584 # Some classes override linkrev to take filtered revs into
2581 2585 # account. Use raw entry from index.
2582 2586 flags = entry[0] & 0xffff
2583 2587 linkrev = entry[4]
2584 2588 p1 = index[entry[5]][7]
2585 2589 p2 = index[entry[6]][7]
2586 2590 node = entry[7]
2587 2591
2588 2592 # (Possibly) reuse the delta from the revlog if allowed and
2589 2593 # the revlog chunk is a delta.
2590 2594 cachedelta = None
2591 2595 rawtext = None
2592 2596 if populatecachedelta:
2593 2597 dp = self.deltaparent(rev)
2594 2598 if dp != nullrev:
2595 2599 cachedelta = (dp, bytes(self._chunk(rev)))
2596 2600
2597 2601 if not cachedelta:
2598 2602 rawtext = self.revision(rev, raw=True)
2599 2603
2600 2604
2601 2605 if deltareuse == self.DELTAREUSEFULLADD:
2602 2606 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2603 2607 cachedelta=cachedelta,
2604 2608 node=node, flags=flags,
2605 2609 deltacomputer=deltacomputer)
2606 2610 else:
2607 2611 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2608 2612 checkambig=False)
2609 2613 dfh = None
2610 2614 if not destrevlog._inline:
2611 2615 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2612 2616 try:
2613 2617 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2614 2618 p2, flags, cachedelta, ifh, dfh,
2615 2619 deltacomputer=deltacomputer)
2616 2620 finally:
2617 2621 if dfh:
2618 2622 dfh.close()
2619 2623 ifh.close()
2620 2624
2621 2625 if addrevisioncb:
2622 2626 addrevisioncb(self, rev, node)
2623 2627 finally:
2624 2628 destrevlog._lazydeltabase = oldlazydeltabase
2625 2629 destrevlog._aggressivemergedeltas = oldamd
General Comments 0
You need to be logged in to leave comments. Login now