##// END OF EJS Templates
revlog: suggest other parent when a parent was refused for a delta (issue5481)...
Paul Morelle -
r38114:9bf0bd4d default
parent child Browse files
Show More
@@ -1,2603 +1,2610
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import hashlib
20 20 import heapq
21 21 import os
22 22 import re
23 23 import struct
24 24 import zlib
25 25
26 26 # import stuff from node for others to import from revlog
27 27 from .node import (
28 28 bin,
29 29 hex,
30 30 nullid,
31 31 nullrev,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .thirdparty import (
39 39 attr,
40 40 )
41 41 from . import (
42 42 ancestor,
43 43 error,
44 44 mdiff,
45 45 policy,
46 46 pycompat,
47 47 templatefilters,
48 48 util,
49 49 )
50 50 from .utils import (
51 51 stringutil,
52 52 )
53 53
54 54 parsers = policy.importmod(r'parsers')
55 55
56 56 # Aliased for performance.
57 57 _zlibdecompress = zlib.decompress
58 58
59 59 # revlog header flags
60 60 REVLOGV0 = 0
61 61 REVLOGV1 = 1
62 62 # Dummy value until file format is finalized.
63 63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
64 64 REVLOGV2 = 0xDEAD
65 65 FLAG_INLINE_DATA = (1 << 16)
66 66 FLAG_GENERALDELTA = (1 << 17)
67 67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
68 68 REVLOG_DEFAULT_FORMAT = REVLOGV1
69 69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
70 70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
71 71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
72 72
73 73 # revlog index flags
74 74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
75 75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
76 76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
77 77 REVIDX_DEFAULT_FLAGS = 0
78 78 # stable order in which flags need to be processed and their processors applied
79 79 REVIDX_FLAGS_ORDER = [
80 80 REVIDX_ISCENSORED,
81 81 REVIDX_ELLIPSIS,
82 82 REVIDX_EXTSTORED,
83 83 ]
84 84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
85 85 # bitmark for flags that could cause rawdata content change
86 86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
87 87
88 88 # max size of revlog with inline data
89 89 _maxinline = 131072
90 90 _chunksize = 1048576
91 91
92 92 RevlogError = error.RevlogError
93 93 LookupError = error.LookupError
94 94 CensoredNodeError = error.CensoredNodeError
95 95 ProgrammingError = error.ProgrammingError
96 96
97 97 # Store flag processors (cf. 'addflagprocessor()' to register)
98 98 _flagprocessors = {
99 99 REVIDX_ISCENSORED: None,
100 100 }
101 101
102 102 _mdre = re.compile('\1\n')
103 103 def parsemeta(text):
104 104 """return (metadatadict, metadatasize)"""
105 105 # text can be buffer, so we can't use .startswith or .index
106 106 if text[:2] != '\1\n':
107 107 return None, None
108 108 s = _mdre.search(text, 2).start()
109 109 mtext = text[2:s]
110 110 meta = {}
111 111 for l in mtext.splitlines():
112 112 k, v = l.split(": ", 1)
113 113 meta[k] = v
114 114 return meta, (s + 2)
115 115
116 116 def packmeta(meta, text):
117 117 keys = sorted(meta)
118 118 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
119 119 return "\1\n%s\1\n%s" % (metatext, text)
120 120
121 121 def _censoredtext(text):
122 122 m, offs = parsemeta(text)
123 123 return m and "censored" in m
124 124
125 125 def addflagprocessor(flag, processor):
126 126 """Register a flag processor on a revision data flag.
127 127
128 128 Invariant:
129 129 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
130 130 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
131 131 - Only one flag processor can be registered on a specific flag.
132 132 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
133 133 following signatures:
134 134 - (read) f(self, rawtext) -> text, bool
135 135 - (write) f(self, text) -> rawtext, bool
136 136 - (raw) f(self, rawtext) -> bool
137 137 "text" is presented to the user. "rawtext" is stored in revlog data, not
138 138 directly visible to the user.
139 139 The boolean returned by these transforms is used to determine whether
140 140 the returned text can be used for hash integrity checking. For example,
141 141 if "write" returns False, then "text" is used to generate hash. If
142 142 "write" returns True, that basically means "rawtext" returned by "write"
143 143 should be used to generate hash. Usually, "write" and "read" return
144 144 different booleans. And "raw" returns a same boolean as "write".
145 145
146 146 Note: The 'raw' transform is used for changegroup generation and in some
147 147 debug commands. In this case the transform only indicates whether the
148 148 contents can be used for hash integrity checks.
149 149 """
150 150 if not flag & REVIDX_KNOWN_FLAGS:
151 151 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
152 152 raise ProgrammingError(msg)
153 153 if flag not in REVIDX_FLAGS_ORDER:
154 154 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
155 155 raise ProgrammingError(msg)
156 156 if flag in _flagprocessors:
157 157 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
158 158 raise error.Abort(msg)
159 159 _flagprocessors[flag] = processor
160 160
161 161 def getoffset(q):
162 162 return int(q >> 16)
163 163
164 164 def gettype(q):
165 165 return int(q & 0xFFFF)
166 166
167 167 def offset_type(offset, type):
168 168 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
169 169 raise ValueError('unknown revlog index flags')
170 170 return int(int(offset) << 16 | type)
171 171
172 172 _nullhash = hashlib.sha1(nullid)
173 173
174 174 def hash(text, p1, p2):
175 175 """generate a hash from the given text and its parent hashes
176 176
177 177 This hash combines both the current file contents and its history
178 178 in a manner that makes it easy to distinguish nodes with the same
179 179 content in the revision graph.
180 180 """
181 181 # As of now, if one of the parent node is null, p2 is null
182 182 if p2 == nullid:
183 183 # deep copy of a hash is faster than creating one
184 184 s = _nullhash.copy()
185 185 s.update(p1)
186 186 else:
187 187 # none of the parent nodes are nullid
188 188 if p1 < p2:
189 189 a = p1
190 190 b = p2
191 191 else:
192 192 a = p2
193 193 b = p1
194 194 s = hashlib.sha1(a)
195 195 s.update(b)
196 196 s.update(text)
197 197 return s.digest()
198 198
199 199 def _trimchunk(revlog, revs, startidx, endidx=None):
200 200 """returns revs[startidx:endidx] without empty trailing revs
201 201 """
202 202 length = revlog.length
203 203
204 204 if endidx is None:
205 205 endidx = len(revs)
206 206
207 207 # Trim empty revs at the end, but never the very first revision of a chain
208 208 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
209 209 endidx -= 1
210 210
211 211 return revs[startidx:endidx]
212 212
213 213 def _slicechunk(revlog, revs):
214 214 """slice revs to reduce the amount of unrelated data to be read from disk.
215 215
216 216 ``revs`` is sliced into groups that should be read in one time.
217 217 Assume that revs are sorted.
218 218 """
219 219 start = revlog.start
220 220 length = revlog.length
221 221
222 222 if len(revs) <= 1:
223 223 yield revs
224 224 return
225 225
226 226 startbyte = start(revs[0])
227 227 endbyte = start(revs[-1]) + length(revs[-1])
228 228 readdata = deltachainspan = endbyte - startbyte
229 229
230 230 chainpayload = sum(length(r) for r in revs)
231 231
232 232 if deltachainspan:
233 233 density = chainpayload / float(deltachainspan)
234 234 else:
235 235 density = 1.0
236 236
237 237 # Store the gaps in a heap to have them sorted by decreasing size
238 238 gapsheap = []
239 239 heapq.heapify(gapsheap)
240 240 prevend = None
241 241 for i, rev in enumerate(revs):
242 242 revstart = start(rev)
243 243 revlen = length(rev)
244 244
245 245 # Skip empty revisions to form larger holes
246 246 if revlen == 0:
247 247 continue
248 248
249 249 if prevend is not None:
250 250 gapsize = revstart - prevend
251 251 # only consider holes that are large enough
252 252 if gapsize > revlog._srmingapsize:
253 253 heapq.heappush(gapsheap, (-gapsize, i))
254 254
255 255 prevend = revstart + revlen
256 256
257 257 # Collect the indices of the largest holes until the density is acceptable
258 258 indicesheap = []
259 259 heapq.heapify(indicesheap)
260 260 while gapsheap and density < revlog._srdensitythreshold:
261 261 oppgapsize, gapidx = heapq.heappop(gapsheap)
262 262
263 263 heapq.heappush(indicesheap, gapidx)
264 264
265 265 # the gap sizes are stored as negatives to be sorted decreasingly
266 266 # by the heap
267 267 readdata -= (-oppgapsize)
268 268 if readdata > 0:
269 269 density = chainpayload / float(readdata)
270 270 else:
271 271 density = 1.0
272 272
273 273 # Cut the revs at collected indices
274 274 previdx = 0
275 275 while indicesheap:
276 276 idx = heapq.heappop(indicesheap)
277 277
278 278 chunk = _trimchunk(revlog, revs, previdx, idx)
279 279 if chunk:
280 280 yield chunk
281 281
282 282 previdx = idx
283 283
284 284 chunk = _trimchunk(revlog, revs, previdx)
285 285 if chunk:
286 286 yield chunk
287 287
288 288 @attr.s(slots=True, frozen=True)
289 289 class _deltainfo(object):
290 290 distance = attr.ib()
291 291 deltalen = attr.ib()
292 292 data = attr.ib()
293 293 base = attr.ib()
294 294 chainbase = attr.ib()
295 295 chainlen = attr.ib()
296 296 compresseddeltalen = attr.ib()
297 297
298 298 class _deltacomputer(object):
299 299 def __init__(self, revlog):
300 300 self.revlog = revlog
301 301
302 302 def _getcandidaterevs(self, p1, p2, cachedelta):
303 303 """
304 304 Provides revisions that present an interest to be diffed against,
305 305 grouped by level of easiness.
306 306 """
307 307 revlog = self.revlog
308 308 curr = len(revlog)
309 309 prev = curr - 1
310 310 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
311 311
312 312 # should we try to build a delta?
313 313 if prev != nullrev and revlog.storedeltachains:
314 314 tested = set()
315 315 # This condition is true most of the time when processing
316 316 # changegroup data into a generaldelta repo. The only time it
317 317 # isn't true is if this is the first revision in a delta chain
318 318 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
319 319 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
320 320 # Assume what we received from the server is a good choice
321 321 # build delta will reuse the cache
322 322 yield (cachedelta[0],)
323 323 tested.add(cachedelta[0])
324 324
325 325 if revlog._generaldelta:
326 326 # exclude already lazy tested base if any
327 327 parents = [p for p in (p1r, p2r)
328 328 if p != nullrev and p not in tested]
329 if parents and not revlog._aggressivemergedeltas:
330 # Pick whichever parent is closer to us (to minimize the
331 # chance of having to build a fulltext).
332 parents = [max(parents)]
333 tested.update(parents)
334 yield parents
329
330 if not revlog._aggressivemergedeltas and len(parents) == 2:
331 parents.sort()
332 # To minimize the chance of having to build a fulltext,
333 # pick first whichever parent is closest to us (max rev)
334 yield (parents[1],)
335 # then the other one (min rev) if the first did not fit
336 yield (parents[0],)
337 tested.update(parents)
338 elif len(parents) > 0:
339 # Test all parents (1 or 2), and keep the best candidate
340 yield parents
341 tested.update(parents)
335 342
336 343 if prev not in tested:
337 344 # other approach failed try against prev to hopefully save us a
338 345 # fulltext.
339 346 yield (prev,)
340 347
341 348 def buildtext(self, revinfo, fh):
342 349 """Builds a fulltext version of a revision
343 350
344 351 revinfo: _revisioninfo instance that contains all needed info
345 352 fh: file handle to either the .i or the .d revlog file,
346 353 depending on whether it is inlined or not
347 354 """
348 355 btext = revinfo.btext
349 356 if btext[0] is not None:
350 357 return btext[0]
351 358
352 359 revlog = self.revlog
353 360 cachedelta = revinfo.cachedelta
354 361 flags = revinfo.flags
355 362 node = revinfo.node
356 363
357 364 baserev = cachedelta[0]
358 365 delta = cachedelta[1]
359 366 # special case deltas which replace entire base; no need to decode
360 367 # base revision. this neatly avoids censored bases, which throw when
361 368 # they're decoded.
362 369 hlen = struct.calcsize(">lll")
363 370 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
364 371 len(delta) - hlen):
365 372 btext[0] = delta[hlen:]
366 373 else:
367 374 # deltabase is rawtext before changed by flag processors, which is
368 375 # equivalent to non-raw text
369 376 basetext = revlog.revision(baserev, _df=fh, raw=False)
370 377 btext[0] = mdiff.patch(basetext, delta)
371 378
372 379 try:
373 380 res = revlog._processflags(btext[0], flags, 'read', raw=True)
374 381 btext[0], validatehash = res
375 382 if validatehash:
376 383 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
377 384 if flags & REVIDX_ISCENSORED:
378 385 raise RevlogError(_('node %s is not censored') % node)
379 386 except CensoredNodeError:
380 387 # must pass the censored index flag to add censored revisions
381 388 if not flags & REVIDX_ISCENSORED:
382 389 raise
383 390 return btext[0]
384 391
385 392 def _builddeltadiff(self, base, revinfo, fh):
386 393 revlog = self.revlog
387 394 t = self.buildtext(revinfo, fh)
388 395 if revlog.iscensored(base):
389 396 # deltas based on a censored revision must replace the
390 397 # full content in one patch, so delta works everywhere
391 398 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
392 399 delta = header + t
393 400 else:
394 401 ptext = revlog.revision(base, _df=fh, raw=True)
395 402 delta = mdiff.textdiff(ptext, t)
396 403
397 404 return delta
398 405
399 406 def _builddeltainfo(self, revinfo, base, fh):
400 407 # can we use the cached delta?
401 408 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
402 409 delta = revinfo.cachedelta[1]
403 410 else:
404 411 delta = self._builddeltadiff(base, revinfo, fh)
405 412 revlog = self.revlog
406 413 header, data = revlog.compress(delta)
407 414 deltalen = len(header) + len(data)
408 415 chainbase = revlog.chainbase(base)
409 416 offset = revlog.end(len(revlog) - 1)
410 417 dist = deltalen + offset - revlog.start(chainbase)
411 418 if revlog._generaldelta:
412 419 deltabase = base
413 420 else:
414 421 deltabase = chainbase
415 422 chainlen, compresseddeltalen = revlog._chaininfo(base)
416 423 chainlen += 1
417 424 compresseddeltalen += deltalen
418 425 return _deltainfo(dist, deltalen, (header, data), deltabase,
419 426 chainbase, chainlen, compresseddeltalen)
420 427
421 428 def finddeltainfo(self, revinfo, fh):
422 429 """Find an acceptable delta against a candidate revision
423 430
424 431 revinfo: information about the revision (instance of _revisioninfo)
425 432 fh: file handle to either the .i or the .d revlog file,
426 433 depending on whether it is inlined or not
427 434
428 435 Returns the first acceptable candidate revision, as ordered by
429 436 _getcandidaterevs
430 437 """
431 438 cachedelta = revinfo.cachedelta
432 439 p1 = revinfo.p1
433 440 p2 = revinfo.p2
434 441 revlog = self.revlog
435 442
436 443 deltainfo = None
437 444 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
438 445 nominateddeltas = []
439 446 for candidaterev in candidaterevs:
440 447 # no delta for rawtext-changing revs (see "candelta" for why)
441 448 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
442 449 continue
443 450 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
444 451 if revlog._isgooddeltainfo(candidatedelta, revinfo.textlen):
445 452 nominateddeltas.append(candidatedelta)
446 453 if nominateddeltas:
447 454 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
448 455 break
449 456
450 457 return deltainfo
451 458
452 459 @attr.s(slots=True, frozen=True)
453 460 class _revisioninfo(object):
454 461 """Information about a revision that allows building its fulltext
455 462 node: expected hash of the revision
456 463 p1, p2: parent revs of the revision
457 464 btext: built text cache consisting of a one-element list
458 465 cachedelta: (baserev, uncompressed_delta) or None
459 466 flags: flags associated to the revision storage
460 467
461 468 One of btext[0] or cachedelta must be set.
462 469 """
463 470 node = attr.ib()
464 471 p1 = attr.ib()
465 472 p2 = attr.ib()
466 473 btext = attr.ib()
467 474 textlen = attr.ib()
468 475 cachedelta = attr.ib()
469 476 flags = attr.ib()
470 477
471 478 # index v0:
472 479 # 4 bytes: offset
473 480 # 4 bytes: compressed length
474 481 # 4 bytes: base rev
475 482 # 4 bytes: link rev
476 483 # 20 bytes: parent 1 nodeid
477 484 # 20 bytes: parent 2 nodeid
478 485 # 20 bytes: nodeid
479 486 indexformatv0 = struct.Struct(">4l20s20s20s")
480 487 indexformatv0_pack = indexformatv0.pack
481 488 indexformatv0_unpack = indexformatv0.unpack
482 489
483 490 class revlogoldio(object):
484 491 def __init__(self):
485 492 self.size = indexformatv0.size
486 493
487 494 def parseindex(self, data, inline):
488 495 s = self.size
489 496 index = []
490 497 nodemap = {nullid: nullrev}
491 498 n = off = 0
492 499 l = len(data)
493 500 while off + s <= l:
494 501 cur = data[off:off + s]
495 502 off += s
496 503 e = indexformatv0_unpack(cur)
497 504 # transform to revlogv1 format
498 505 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
499 506 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
500 507 index.append(e2)
501 508 nodemap[e[6]] = n
502 509 n += 1
503 510
504 511 # add the magic null revision at -1
505 512 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
506 513
507 514 return index, nodemap, None
508 515
509 516 def packentry(self, entry, node, version, rev):
510 517 if gettype(entry[0]):
511 518 raise RevlogError(_('index entry flags need revlog version 1'))
512 519 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
513 520 node(entry[5]), node(entry[6]), entry[7])
514 521 return indexformatv0_pack(*e2)
515 522
516 523 # index ng:
517 524 # 6 bytes: offset
518 525 # 2 bytes: flags
519 526 # 4 bytes: compressed length
520 527 # 4 bytes: uncompressed length
521 528 # 4 bytes: base rev
522 529 # 4 bytes: link rev
523 530 # 4 bytes: parent 1 rev
524 531 # 4 bytes: parent 2 rev
525 532 # 32 bytes: nodeid
526 533 indexformatng = struct.Struct(">Qiiiiii20s12x")
527 534 indexformatng_pack = indexformatng.pack
528 535 versionformat = struct.Struct(">I")
529 536 versionformat_pack = versionformat.pack
530 537 versionformat_unpack = versionformat.unpack
531 538
532 539 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
533 540 # signed integer)
534 541 _maxentrysize = 0x7fffffff
535 542
536 543 class revlogio(object):
537 544 def __init__(self):
538 545 self.size = indexformatng.size
539 546
540 547 def parseindex(self, data, inline):
541 548 # call the C implementation to parse the index data
542 549 index, cache = parsers.parse_index2(data, inline)
543 550 return index, getattr(index, 'nodemap', None), cache
544 551
545 552 def packentry(self, entry, node, version, rev):
546 553 p = indexformatng_pack(*entry)
547 554 if rev == 0:
548 555 p = versionformat_pack(version) + p[4:]
549 556 return p
550 557
551 558 class revlog(object):
552 559 """
553 560 the underlying revision storage object
554 561
555 562 A revlog consists of two parts, an index and the revision data.
556 563
557 564 The index is a file with a fixed record size containing
558 565 information on each revision, including its nodeid (hash), the
559 566 nodeids of its parents, the position and offset of its data within
560 567 the data file, and the revision it's based on. Finally, each entry
561 568 contains a linkrev entry that can serve as a pointer to external
562 569 data.
563 570
564 571 The revision data itself is a linear collection of data chunks.
565 572 Each chunk represents a revision and is usually represented as a
566 573 delta against the previous chunk. To bound lookup time, runs of
567 574 deltas are limited to about 2 times the length of the original
568 575 version data. This makes retrieval of a version proportional to
569 576 its size, or O(1) relative to the number of revisions.
570 577
571 578 Both pieces of the revlog are written to in an append-only
572 579 fashion, which means we never need to rewrite a file to insert or
573 580 remove data, and can use some simple techniques to avoid the need
574 581 for locking while reading.
575 582
576 583 If checkambig, indexfile is opened with checkambig=True at
577 584 writing, to avoid file stat ambiguity.
578 585
579 586 If mmaplargeindex is True, and an mmapindexthreshold is set, the
580 587 index will be mmapped rather than read if it is larger than the
581 588 configured threshold.
582 589
583 590 If censorable is True, the revlog can have censored revisions.
584 591 """
585 592 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
586 593 mmaplargeindex=False, censorable=False):
587 594 """
588 595 create a revlog object
589 596
590 597 opener is a function that abstracts the file opening operation
591 598 and can be used to implement COW semantics or the like.
592 599 """
593 600 self.indexfile = indexfile
594 601 self.datafile = datafile or (indexfile[:-2] + ".d")
595 602 self.opener = opener
596 603 # When True, indexfile is opened with checkambig=True at writing, to
597 604 # avoid file stat ambiguity.
598 605 self._checkambig = checkambig
599 606 self._censorable = censorable
600 607 # 3-tuple of (node, rev, text) for a raw revision.
601 608 self._cache = None
602 609 # Maps rev to chain base rev.
603 610 self._chainbasecache = util.lrucachedict(100)
604 611 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
605 612 self._chunkcache = (0, '')
606 613 # How much data to read and cache into the raw revlog data cache.
607 614 self._chunkcachesize = 65536
608 615 self._maxchainlen = None
609 616 self._aggressivemergedeltas = False
610 617 self.index = []
611 618 # Mapping of partial identifiers to full nodes.
612 619 self._pcache = {}
613 620 # Mapping of revision integer to full node.
614 621 self._nodecache = {nullid: nullrev}
615 622 self._nodepos = None
616 623 self._compengine = 'zlib'
617 624 self._maxdeltachainspan = -1
618 625 self._withsparseread = False
619 626 self._srdensitythreshold = 0.25
620 627 self._srmingapsize = 262144
621 628
622 629 mmapindexthreshold = None
623 630 v = REVLOG_DEFAULT_VERSION
624 631 opts = getattr(opener, 'options', None)
625 632 if opts is not None:
626 633 if 'revlogv2' in opts:
627 634 # version 2 revlogs always use generaldelta.
628 635 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
629 636 elif 'revlogv1' in opts:
630 637 if 'generaldelta' in opts:
631 638 v |= FLAG_GENERALDELTA
632 639 else:
633 640 v = 0
634 641 if 'chunkcachesize' in opts:
635 642 self._chunkcachesize = opts['chunkcachesize']
636 643 if 'maxchainlen' in opts:
637 644 self._maxchainlen = opts['maxchainlen']
638 645 if 'aggressivemergedeltas' in opts:
639 646 self._aggressivemergedeltas = opts['aggressivemergedeltas']
640 647 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
641 648 if 'compengine' in opts:
642 649 self._compengine = opts['compengine']
643 650 if 'maxdeltachainspan' in opts:
644 651 self._maxdeltachainspan = opts['maxdeltachainspan']
645 652 if mmaplargeindex and 'mmapindexthreshold' in opts:
646 653 mmapindexthreshold = opts['mmapindexthreshold']
647 654 self._withsparseread = bool(opts.get('with-sparse-read', False))
648 655 if 'sparse-read-density-threshold' in opts:
649 656 self._srdensitythreshold = opts['sparse-read-density-threshold']
650 657 if 'sparse-read-min-gap-size' in opts:
651 658 self._srmingapsize = opts['sparse-read-min-gap-size']
652 659
653 660 if self._chunkcachesize <= 0:
654 661 raise RevlogError(_('revlog chunk cache size %r is not greater '
655 662 'than 0') % self._chunkcachesize)
656 663 elif self._chunkcachesize & (self._chunkcachesize - 1):
657 664 raise RevlogError(_('revlog chunk cache size %r is not a power '
658 665 'of 2') % self._chunkcachesize)
659 666
660 667 indexdata = ''
661 668 self._initempty = True
662 669 try:
663 670 with self._indexfp() as f:
664 671 if (mmapindexthreshold is not None and
665 672 self.opener.fstat(f).st_size >= mmapindexthreshold):
666 673 indexdata = util.buffer(util.mmapread(f))
667 674 else:
668 675 indexdata = f.read()
669 676 if len(indexdata) > 0:
670 677 v = versionformat_unpack(indexdata[:4])[0]
671 678 self._initempty = False
672 679 except IOError as inst:
673 680 if inst.errno != errno.ENOENT:
674 681 raise
675 682
676 683 self.version = v
677 684 self._inline = v & FLAG_INLINE_DATA
678 685 self._generaldelta = v & FLAG_GENERALDELTA
679 686 flags = v & ~0xFFFF
680 687 fmt = v & 0xFFFF
681 688 if fmt == REVLOGV0:
682 689 if flags:
683 690 raise RevlogError(_('unknown flags (%#04x) in version %d '
684 691 'revlog %s') %
685 692 (flags >> 16, fmt, self.indexfile))
686 693 elif fmt == REVLOGV1:
687 694 if flags & ~REVLOGV1_FLAGS:
688 695 raise RevlogError(_('unknown flags (%#04x) in version %d '
689 696 'revlog %s') %
690 697 (flags >> 16, fmt, self.indexfile))
691 698 elif fmt == REVLOGV2:
692 699 if flags & ~REVLOGV2_FLAGS:
693 700 raise RevlogError(_('unknown flags (%#04x) in version %d '
694 701 'revlog %s') %
695 702 (flags >> 16, fmt, self.indexfile))
696 703 else:
697 704 raise RevlogError(_('unknown version (%d) in revlog %s') %
698 705 (fmt, self.indexfile))
699 706
700 707 self.storedeltachains = True
701 708
702 709 self._io = revlogio()
703 710 if self.version == REVLOGV0:
704 711 self._io = revlogoldio()
705 712 try:
706 713 d = self._io.parseindex(indexdata, self._inline)
707 714 except (ValueError, IndexError):
708 715 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
709 716 self.index, nodemap, self._chunkcache = d
710 717 if nodemap is not None:
711 718 self.nodemap = self._nodecache = nodemap
712 719 if not self._chunkcache:
713 720 self._chunkclear()
714 721 # revnum -> (chain-length, sum-delta-length)
715 722 self._chaininfocache = {}
716 723 # revlog header -> revlog compressor
717 724 self._decompressors = {}
718 725
719 726 @util.propertycache
720 727 def _compressor(self):
721 728 return util.compengines[self._compengine].revlogcompressor()
722 729
723 730 def _indexfp(self, mode='r'):
724 731 """file object for the revlog's index file"""
725 732 args = {r'mode': mode}
726 733 if mode != 'r':
727 734 args[r'checkambig'] = self._checkambig
728 735 if mode == 'w':
729 736 args[r'atomictemp'] = True
730 737 return self.opener(self.indexfile, **args)
731 738
732 739 def _datafp(self, mode='r'):
733 740 """file object for the revlog's data file"""
734 741 return self.opener(self.datafile, mode=mode)
735 742
736 743 @contextlib.contextmanager
737 744 def _datareadfp(self, existingfp=None):
738 745 """file object suitable to read data"""
739 746 if existingfp is not None:
740 747 yield existingfp
741 748 else:
742 749 if self._inline:
743 750 func = self._indexfp
744 751 else:
745 752 func = self._datafp
746 753 with func() as fp:
747 754 yield fp
748 755
749 756 def tip(self):
750 757 return self.node(len(self.index) - 2)
751 758 def __contains__(self, rev):
752 759 return 0 <= rev < len(self)
753 760 def __len__(self):
754 761 return len(self.index) - 1
755 762 def __iter__(self):
756 763 return iter(xrange(len(self)))
757 764 def revs(self, start=0, stop=None):
758 765 """iterate over all rev in this revlog (from start to stop)"""
759 766 step = 1
760 767 if stop is not None:
761 768 if start > stop:
762 769 step = -1
763 770 stop += step
764 771 else:
765 772 stop = len(self)
766 773 return xrange(start, stop, step)
767 774
768 775 @util.propertycache
769 776 def nodemap(self):
770 777 self.rev(self.node(0))
771 778 return self._nodecache
772 779
773 780 def hasnode(self, node):
774 781 try:
775 782 self.rev(node)
776 783 return True
777 784 except KeyError:
778 785 return False
779 786
780 787 def candelta(self, baserev, rev):
781 788 """whether two revisions (baserev, rev) can be delta-ed or not"""
782 789 # Disable delta if either rev requires a content-changing flag
783 790 # processor (ex. LFS). This is because such flag processor can alter
784 791 # the rawtext content that the delta will be based on, and two clients
785 792 # could have a same revlog node with different flags (i.e. different
786 793 # rawtext contents) and the delta could be incompatible.
787 794 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
788 795 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
789 796 return False
790 797 return True
791 798
792 799 def clearcaches(self):
793 800 self._cache = None
794 801 self._chainbasecache.clear()
795 802 self._chunkcache = (0, '')
796 803 self._pcache = {}
797 804
798 805 try:
799 806 self._nodecache.clearcaches()
800 807 except AttributeError:
801 808 self._nodecache = {nullid: nullrev}
802 809 self._nodepos = None
803 810
804 811 def rev(self, node):
805 812 try:
806 813 return self._nodecache[node]
807 814 except TypeError:
808 815 raise
809 816 except RevlogError:
810 817 # parsers.c radix tree lookup failed
811 818 if node == wdirid or node in wdirfilenodeids:
812 819 raise error.WdirUnsupported
813 820 raise LookupError(node, self.indexfile, _('no node'))
814 821 except KeyError:
815 822 # pure python cache lookup failed
816 823 n = self._nodecache
817 824 i = self.index
818 825 p = self._nodepos
819 826 if p is None:
820 827 p = len(i) - 2
821 828 else:
822 829 assert p < len(i)
823 830 for r in xrange(p, -1, -1):
824 831 v = i[r][7]
825 832 n[v] = r
826 833 if v == node:
827 834 self._nodepos = r - 1
828 835 return r
829 836 if node == wdirid or node in wdirfilenodeids:
830 837 raise error.WdirUnsupported
831 838 raise LookupError(node, self.indexfile, _('no node'))
832 839
833 840 # Accessors for index entries.
834 841
835 842 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
836 843 # are flags.
837 844 def start(self, rev):
838 845 return int(self.index[rev][0] >> 16)
839 846
840 847 def flags(self, rev):
841 848 return self.index[rev][0] & 0xFFFF
842 849
843 850 def length(self, rev):
844 851 return self.index[rev][1]
845 852
846 853 def rawsize(self, rev):
847 854 """return the length of the uncompressed text for a given revision"""
848 855 l = self.index[rev][2]
849 856 if l is not None and l >= 0:
850 857 return l
851 858
852 859 t = self.revision(rev, raw=True)
853 860 return len(t)
854 861
855 862 def size(self, rev):
856 863 """length of non-raw text (processed by a "read" flag processor)"""
857 864 # fast path: if no "read" flag processor could change the content,
858 865 # size is rawsize. note: ELLIPSIS is known to not change the content.
859 866 flags = self.flags(rev)
860 867 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
861 868 return self.rawsize(rev)
862 869
863 870 return len(self.revision(rev, raw=False))
864 871
865 872 def chainbase(self, rev):
866 873 base = self._chainbasecache.get(rev)
867 874 if base is not None:
868 875 return base
869 876
870 877 index = self.index
871 878 base = index[rev][3]
872 879 while base != rev:
873 880 rev = base
874 881 base = index[rev][3]
875 882
876 883 self._chainbasecache[rev] = base
877 884 return base
878 885
879 886 def linkrev(self, rev):
880 887 return self.index[rev][4]
881 888
882 889 def parentrevs(self, rev):
883 890 try:
884 891 entry = self.index[rev]
885 892 except IndexError:
886 893 if rev == wdirrev:
887 894 raise error.WdirUnsupported
888 895 raise
889 896
890 897 return entry[5], entry[6]
891 898
892 899 def node(self, rev):
893 900 try:
894 901 return self.index[rev][7]
895 902 except IndexError:
896 903 if rev == wdirrev:
897 904 raise error.WdirUnsupported
898 905 raise
899 906
900 907 # Derived from index values.
901 908
902 909 def end(self, rev):
903 910 return self.start(rev) + self.length(rev)
904 911
905 912 def parents(self, node):
906 913 i = self.index
907 914 d = i[self.rev(node)]
908 915 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
909 916
910 917 def chainlen(self, rev):
911 918 return self._chaininfo(rev)[0]
912 919
913 920 def _chaininfo(self, rev):
914 921 chaininfocache = self._chaininfocache
915 922 if rev in chaininfocache:
916 923 return chaininfocache[rev]
917 924 index = self.index
918 925 generaldelta = self._generaldelta
919 926 iterrev = rev
920 927 e = index[iterrev]
921 928 clen = 0
922 929 compresseddeltalen = 0
923 930 while iterrev != e[3]:
924 931 clen += 1
925 932 compresseddeltalen += e[1]
926 933 if generaldelta:
927 934 iterrev = e[3]
928 935 else:
929 936 iterrev -= 1
930 937 if iterrev in chaininfocache:
931 938 t = chaininfocache[iterrev]
932 939 clen += t[0]
933 940 compresseddeltalen += t[1]
934 941 break
935 942 e = index[iterrev]
936 943 else:
937 944 # Add text length of base since decompressing that also takes
938 945 # work. For cache hits the length is already included.
939 946 compresseddeltalen += e[1]
940 947 r = (clen, compresseddeltalen)
941 948 chaininfocache[rev] = r
942 949 return r
943 950
944 951 def _deltachain(self, rev, stoprev=None):
945 952 """Obtain the delta chain for a revision.
946 953
947 954 ``stoprev`` specifies a revision to stop at. If not specified, we
948 955 stop at the base of the chain.
949 956
950 957 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
951 958 revs in ascending order and ``stopped`` is a bool indicating whether
952 959 ``stoprev`` was hit.
953 960 """
954 961 # Try C implementation.
955 962 try:
956 963 return self.index.deltachain(rev, stoprev, self._generaldelta)
957 964 except AttributeError:
958 965 pass
959 966
960 967 chain = []
961 968
962 969 # Alias to prevent attribute lookup in tight loop.
963 970 index = self.index
964 971 generaldelta = self._generaldelta
965 972
966 973 iterrev = rev
967 974 e = index[iterrev]
968 975 while iterrev != e[3] and iterrev != stoprev:
969 976 chain.append(iterrev)
970 977 if generaldelta:
971 978 iterrev = e[3]
972 979 else:
973 980 iterrev -= 1
974 981 e = index[iterrev]
975 982
976 983 if iterrev == stoprev:
977 984 stopped = True
978 985 else:
979 986 chain.append(iterrev)
980 987 stopped = False
981 988
982 989 chain.reverse()
983 990 return chain, stopped
984 991
985 992 def ancestors(self, revs, stoprev=0, inclusive=False):
986 993 """Generate the ancestors of 'revs' in reverse topological order.
987 994 Does not generate revs lower than stoprev.
988 995
989 996 See the documentation for ancestor.lazyancestors for more details."""
990 997
991 998 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
992 999 inclusive=inclusive)
993 1000
994 1001 def descendants(self, revs):
995 1002 """Generate the descendants of 'revs' in revision order.
996 1003
997 1004 Yield a sequence of revision numbers starting with a child of
998 1005 some rev in revs, i.e., each revision is *not* considered a
999 1006 descendant of itself. Results are ordered by revision number (a
1000 1007 topological sort)."""
1001 1008 first = min(revs)
1002 1009 if first == nullrev:
1003 1010 for i in self:
1004 1011 yield i
1005 1012 return
1006 1013
1007 1014 seen = set(revs)
1008 1015 for i in self.revs(start=first + 1):
1009 1016 for x in self.parentrevs(i):
1010 1017 if x != nullrev and x in seen:
1011 1018 seen.add(i)
1012 1019 yield i
1013 1020 break
1014 1021
1015 1022 def findcommonmissing(self, common=None, heads=None):
1016 1023 """Return a tuple of the ancestors of common and the ancestors of heads
1017 1024 that are not ancestors of common. In revset terminology, we return the
1018 1025 tuple:
1019 1026
1020 1027 ::common, (::heads) - (::common)
1021 1028
1022 1029 The list is sorted by revision number, meaning it is
1023 1030 topologically sorted.
1024 1031
1025 1032 'heads' and 'common' are both lists of node IDs. If heads is
1026 1033 not supplied, uses all of the revlog's heads. If common is not
1027 1034 supplied, uses nullid."""
1028 1035 if common is None:
1029 1036 common = [nullid]
1030 1037 if heads is None:
1031 1038 heads = self.heads()
1032 1039
1033 1040 common = [self.rev(n) for n in common]
1034 1041 heads = [self.rev(n) for n in heads]
1035 1042
1036 1043 # we want the ancestors, but inclusive
1037 1044 class lazyset(object):
1038 1045 def __init__(self, lazyvalues):
1039 1046 self.addedvalues = set()
1040 1047 self.lazyvalues = lazyvalues
1041 1048
1042 1049 def __contains__(self, value):
1043 1050 return value in self.addedvalues or value in self.lazyvalues
1044 1051
1045 1052 def __iter__(self):
1046 1053 added = self.addedvalues
1047 1054 for r in added:
1048 1055 yield r
1049 1056 for r in self.lazyvalues:
1050 1057 if not r in added:
1051 1058 yield r
1052 1059
1053 1060 def add(self, value):
1054 1061 self.addedvalues.add(value)
1055 1062
1056 1063 def update(self, values):
1057 1064 self.addedvalues.update(values)
1058 1065
1059 1066 has = lazyset(self.ancestors(common))
1060 1067 has.add(nullrev)
1061 1068 has.update(common)
1062 1069
1063 1070 # take all ancestors from heads that aren't in has
1064 1071 missing = set()
1065 1072 visit = collections.deque(r for r in heads if r not in has)
1066 1073 while visit:
1067 1074 r = visit.popleft()
1068 1075 if r in missing:
1069 1076 continue
1070 1077 else:
1071 1078 missing.add(r)
1072 1079 for p in self.parentrevs(r):
1073 1080 if p not in has:
1074 1081 visit.append(p)
1075 1082 missing = list(missing)
1076 1083 missing.sort()
1077 1084 return has, [self.node(miss) for miss in missing]
1078 1085
1079 1086 def incrementalmissingrevs(self, common=None):
1080 1087 """Return an object that can be used to incrementally compute the
1081 1088 revision numbers of the ancestors of arbitrary sets that are not
1082 1089 ancestors of common. This is an ancestor.incrementalmissingancestors
1083 1090 object.
1084 1091
1085 1092 'common' is a list of revision numbers. If common is not supplied, uses
1086 1093 nullrev.
1087 1094 """
1088 1095 if common is None:
1089 1096 common = [nullrev]
1090 1097
1091 1098 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1092 1099
1093 1100 def findmissingrevs(self, common=None, heads=None):
1094 1101 """Return the revision numbers of the ancestors of heads that
1095 1102 are not ancestors of common.
1096 1103
1097 1104 More specifically, return a list of revision numbers corresponding to
1098 1105 nodes N such that every N satisfies the following constraints:
1099 1106
1100 1107 1. N is an ancestor of some node in 'heads'
1101 1108 2. N is not an ancestor of any node in 'common'
1102 1109
1103 1110 The list is sorted by revision number, meaning it is
1104 1111 topologically sorted.
1105 1112
1106 1113 'heads' and 'common' are both lists of revision numbers. If heads is
1107 1114 not supplied, uses all of the revlog's heads. If common is not
1108 1115 supplied, uses nullid."""
1109 1116 if common is None:
1110 1117 common = [nullrev]
1111 1118 if heads is None:
1112 1119 heads = self.headrevs()
1113 1120
1114 1121 inc = self.incrementalmissingrevs(common=common)
1115 1122 return inc.missingancestors(heads)
1116 1123
1117 1124 def findmissing(self, common=None, heads=None):
1118 1125 """Return the ancestors of heads that are not ancestors of common.
1119 1126
1120 1127 More specifically, return a list of nodes N such that every N
1121 1128 satisfies the following constraints:
1122 1129
1123 1130 1. N is an ancestor of some node in 'heads'
1124 1131 2. N is not an ancestor of any node in 'common'
1125 1132
1126 1133 The list is sorted by revision number, meaning it is
1127 1134 topologically sorted.
1128 1135
1129 1136 'heads' and 'common' are both lists of node IDs. If heads is
1130 1137 not supplied, uses all of the revlog's heads. If common is not
1131 1138 supplied, uses nullid."""
1132 1139 if common is None:
1133 1140 common = [nullid]
1134 1141 if heads is None:
1135 1142 heads = self.heads()
1136 1143
1137 1144 common = [self.rev(n) for n in common]
1138 1145 heads = [self.rev(n) for n in heads]
1139 1146
1140 1147 inc = self.incrementalmissingrevs(common=common)
1141 1148 return [self.node(r) for r in inc.missingancestors(heads)]
1142 1149
1143 1150 def nodesbetween(self, roots=None, heads=None):
1144 1151 """Return a topological path from 'roots' to 'heads'.
1145 1152
1146 1153 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1147 1154 topologically sorted list of all nodes N that satisfy both of
1148 1155 these constraints:
1149 1156
1150 1157 1. N is a descendant of some node in 'roots'
1151 1158 2. N is an ancestor of some node in 'heads'
1152 1159
1153 1160 Every node is considered to be both a descendant and an ancestor
1154 1161 of itself, so every reachable node in 'roots' and 'heads' will be
1155 1162 included in 'nodes'.
1156 1163
1157 1164 'outroots' is the list of reachable nodes in 'roots', i.e., the
1158 1165 subset of 'roots' that is returned in 'nodes'. Likewise,
1159 1166 'outheads' is the subset of 'heads' that is also in 'nodes'.
1160 1167
1161 1168 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1162 1169 unspecified, uses nullid as the only root. If 'heads' is
1163 1170 unspecified, uses list of all of the revlog's heads."""
1164 1171 nonodes = ([], [], [])
1165 1172 if roots is not None:
1166 1173 roots = list(roots)
1167 1174 if not roots:
1168 1175 return nonodes
1169 1176 lowestrev = min([self.rev(n) for n in roots])
1170 1177 else:
1171 1178 roots = [nullid] # Everybody's a descendant of nullid
1172 1179 lowestrev = nullrev
1173 1180 if (lowestrev == nullrev) and (heads is None):
1174 1181 # We want _all_ the nodes!
1175 1182 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1176 1183 if heads is None:
1177 1184 # All nodes are ancestors, so the latest ancestor is the last
1178 1185 # node.
1179 1186 highestrev = len(self) - 1
1180 1187 # Set ancestors to None to signal that every node is an ancestor.
1181 1188 ancestors = None
1182 1189 # Set heads to an empty dictionary for later discovery of heads
1183 1190 heads = {}
1184 1191 else:
1185 1192 heads = list(heads)
1186 1193 if not heads:
1187 1194 return nonodes
1188 1195 ancestors = set()
1189 1196 # Turn heads into a dictionary so we can remove 'fake' heads.
1190 1197 # Also, later we will be using it to filter out the heads we can't
1191 1198 # find from roots.
1192 1199 heads = dict.fromkeys(heads, False)
1193 1200 # Start at the top and keep marking parents until we're done.
1194 1201 nodestotag = set(heads)
1195 1202 # Remember where the top was so we can use it as a limit later.
1196 1203 highestrev = max([self.rev(n) for n in nodestotag])
1197 1204 while nodestotag:
1198 1205 # grab a node to tag
1199 1206 n = nodestotag.pop()
1200 1207 # Never tag nullid
1201 1208 if n == nullid:
1202 1209 continue
1203 1210 # A node's revision number represents its place in a
1204 1211 # topologically sorted list of nodes.
1205 1212 r = self.rev(n)
1206 1213 if r >= lowestrev:
1207 1214 if n not in ancestors:
1208 1215 # If we are possibly a descendant of one of the roots
1209 1216 # and we haven't already been marked as an ancestor
1210 1217 ancestors.add(n) # Mark as ancestor
1211 1218 # Add non-nullid parents to list of nodes to tag.
1212 1219 nodestotag.update([p for p in self.parents(n) if
1213 1220 p != nullid])
1214 1221 elif n in heads: # We've seen it before, is it a fake head?
1215 1222 # So it is, real heads should not be the ancestors of
1216 1223 # any other heads.
1217 1224 heads.pop(n)
1218 1225 if not ancestors:
1219 1226 return nonodes
1220 1227 # Now that we have our set of ancestors, we want to remove any
1221 1228 # roots that are not ancestors.
1222 1229
1223 1230 # If one of the roots was nullid, everything is included anyway.
1224 1231 if lowestrev > nullrev:
1225 1232 # But, since we weren't, let's recompute the lowest rev to not
1226 1233 # include roots that aren't ancestors.
1227 1234
1228 1235 # Filter out roots that aren't ancestors of heads
1229 1236 roots = [root for root in roots if root in ancestors]
1230 1237 # Recompute the lowest revision
1231 1238 if roots:
1232 1239 lowestrev = min([self.rev(root) for root in roots])
1233 1240 else:
1234 1241 # No more roots? Return empty list
1235 1242 return nonodes
1236 1243 else:
1237 1244 # We are descending from nullid, and don't need to care about
1238 1245 # any other roots.
1239 1246 lowestrev = nullrev
1240 1247 roots = [nullid]
1241 1248 # Transform our roots list into a set.
1242 1249 descendants = set(roots)
1243 1250 # Also, keep the original roots so we can filter out roots that aren't
1244 1251 # 'real' roots (i.e. are descended from other roots).
1245 1252 roots = descendants.copy()
1246 1253 # Our topologically sorted list of output nodes.
1247 1254 orderedout = []
1248 1255 # Don't start at nullid since we don't want nullid in our output list,
1249 1256 # and if nullid shows up in descendants, empty parents will look like
1250 1257 # they're descendants.
1251 1258 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1252 1259 n = self.node(r)
1253 1260 isdescendant = False
1254 1261 if lowestrev == nullrev: # Everybody is a descendant of nullid
1255 1262 isdescendant = True
1256 1263 elif n in descendants:
1257 1264 # n is already a descendant
1258 1265 isdescendant = True
1259 1266 # This check only needs to be done here because all the roots
1260 1267 # will start being marked is descendants before the loop.
1261 1268 if n in roots:
1262 1269 # If n was a root, check if it's a 'real' root.
1263 1270 p = tuple(self.parents(n))
1264 1271 # If any of its parents are descendants, it's not a root.
1265 1272 if (p[0] in descendants) or (p[1] in descendants):
1266 1273 roots.remove(n)
1267 1274 else:
1268 1275 p = tuple(self.parents(n))
1269 1276 # A node is a descendant if either of its parents are
1270 1277 # descendants. (We seeded the dependents list with the roots
1271 1278 # up there, remember?)
1272 1279 if (p[0] in descendants) or (p[1] in descendants):
1273 1280 descendants.add(n)
1274 1281 isdescendant = True
1275 1282 if isdescendant and ((ancestors is None) or (n in ancestors)):
1276 1283 # Only include nodes that are both descendants and ancestors.
1277 1284 orderedout.append(n)
1278 1285 if (ancestors is not None) and (n in heads):
1279 1286 # We're trying to figure out which heads are reachable
1280 1287 # from roots.
1281 1288 # Mark this head as having been reached
1282 1289 heads[n] = True
1283 1290 elif ancestors is None:
1284 1291 # Otherwise, we're trying to discover the heads.
1285 1292 # Assume this is a head because if it isn't, the next step
1286 1293 # will eventually remove it.
1287 1294 heads[n] = True
1288 1295 # But, obviously its parents aren't.
1289 1296 for p in self.parents(n):
1290 1297 heads.pop(p, None)
1291 1298 heads = [head for head, flag in heads.iteritems() if flag]
1292 1299 roots = list(roots)
1293 1300 assert orderedout
1294 1301 assert roots
1295 1302 assert heads
1296 1303 return (orderedout, roots, heads)
1297 1304
1298 1305 def headrevs(self):
1299 1306 try:
1300 1307 return self.index.headrevs()
1301 1308 except AttributeError:
1302 1309 return self._headrevs()
1303 1310
1304 1311 def computephases(self, roots):
1305 1312 return self.index.computephasesmapsets(roots)
1306 1313
1307 1314 def _headrevs(self):
1308 1315 count = len(self)
1309 1316 if not count:
1310 1317 return [nullrev]
1311 1318 # we won't iter over filtered rev so nobody is a head at start
1312 1319 ishead = [0] * (count + 1)
1313 1320 index = self.index
1314 1321 for r in self:
1315 1322 ishead[r] = 1 # I may be an head
1316 1323 e = index[r]
1317 1324 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1318 1325 return [r for r, val in enumerate(ishead) if val]
1319 1326
1320 1327 def heads(self, start=None, stop=None):
1321 1328 """return the list of all nodes that have no children
1322 1329
1323 1330 if start is specified, only heads that are descendants of
1324 1331 start will be returned
1325 1332 if stop is specified, it will consider all the revs from stop
1326 1333 as if they had no children
1327 1334 """
1328 1335 if start is None and stop is None:
1329 1336 if not len(self):
1330 1337 return [nullid]
1331 1338 return [self.node(r) for r in self.headrevs()]
1332 1339
1333 1340 if start is None:
1334 1341 start = nullid
1335 1342 if stop is None:
1336 1343 stop = []
1337 1344 stoprevs = set([self.rev(n) for n in stop])
1338 1345 startrev = self.rev(start)
1339 1346 reachable = {startrev}
1340 1347 heads = {startrev}
1341 1348
1342 1349 parentrevs = self.parentrevs
1343 1350 for r in self.revs(start=startrev + 1):
1344 1351 for p in parentrevs(r):
1345 1352 if p in reachable:
1346 1353 if r not in stoprevs:
1347 1354 reachable.add(r)
1348 1355 heads.add(r)
1349 1356 if p in heads and p not in stoprevs:
1350 1357 heads.remove(p)
1351 1358
1352 1359 return [self.node(r) for r in heads]
1353 1360
1354 1361 def children(self, node):
1355 1362 """find the children of a given node"""
1356 1363 c = []
1357 1364 p = self.rev(node)
1358 1365 for r in self.revs(start=p + 1):
1359 1366 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1360 1367 if prevs:
1361 1368 for pr in prevs:
1362 1369 if pr == p:
1363 1370 c.append(self.node(r))
1364 1371 elif p == nullrev:
1365 1372 c.append(self.node(r))
1366 1373 return c
1367 1374
1368 1375 def descendant(self, start, end):
1369 1376 if start == nullrev:
1370 1377 return True
1371 1378 for i in self.descendants([start]):
1372 1379 if i == end:
1373 1380 return True
1374 1381 elif i > end:
1375 1382 break
1376 1383 return False
1377 1384
1378 1385 def commonancestorsheads(self, a, b):
1379 1386 """calculate all the heads of the common ancestors of nodes a and b"""
1380 1387 a, b = self.rev(a), self.rev(b)
1381 1388 try:
1382 1389 ancs = self.index.commonancestorsheads(a, b)
1383 1390 except (AttributeError, OverflowError): # C implementation failed
1384 1391 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1385 1392 return pycompat.maplist(self.node, ancs)
1386 1393
1387 1394 def isancestor(self, a, b):
1388 1395 """return True if node a is an ancestor of node b
1389 1396
1390 1397 The implementation of this is trivial but the use of
1391 1398 commonancestorsheads is not."""
1392 1399 return a in self.commonancestorsheads(a, b)
1393 1400
1394 1401 def ancestor(self, a, b):
1395 1402 """calculate the "best" common ancestor of nodes a and b"""
1396 1403
1397 1404 a, b = self.rev(a), self.rev(b)
1398 1405 try:
1399 1406 ancs = self.index.ancestors(a, b)
1400 1407 except (AttributeError, OverflowError):
1401 1408 ancs = ancestor.ancestors(self.parentrevs, a, b)
1402 1409 if ancs:
1403 1410 # choose a consistent winner when there's a tie
1404 1411 return min(map(self.node, ancs))
1405 1412 return nullid
1406 1413
1407 1414 def _match(self, id):
1408 1415 if isinstance(id, int):
1409 1416 # rev
1410 1417 return self.node(id)
1411 1418 if len(id) == 20:
1412 1419 # possibly a binary node
1413 1420 # odds of a binary node being all hex in ASCII are 1 in 10**25
1414 1421 try:
1415 1422 node = id
1416 1423 self.rev(node) # quick search the index
1417 1424 return node
1418 1425 except LookupError:
1419 1426 pass # may be partial hex id
1420 1427 try:
1421 1428 # str(rev)
1422 1429 rev = int(id)
1423 1430 if "%d" % rev != id:
1424 1431 raise ValueError
1425 1432 if rev < 0:
1426 1433 rev = len(self) + rev
1427 1434 if rev < 0 or rev >= len(self):
1428 1435 raise ValueError
1429 1436 return self.node(rev)
1430 1437 except (ValueError, OverflowError):
1431 1438 pass
1432 1439 if len(id) == 40:
1433 1440 try:
1434 1441 # a full hex nodeid?
1435 1442 node = bin(id)
1436 1443 self.rev(node)
1437 1444 return node
1438 1445 except (TypeError, LookupError):
1439 1446 pass
1440 1447
1441 1448 def _partialmatch(self, id):
1442 1449 # we don't care wdirfilenodeids as they should be always full hash
1443 1450 maybewdir = wdirhex.startswith(id)
1444 1451 try:
1445 1452 partial = self.index.partialmatch(id)
1446 1453 if partial and self.hasnode(partial):
1447 1454 if maybewdir:
1448 1455 # single 'ff...' match in radix tree, ambiguous with wdir
1449 1456 raise RevlogError
1450 1457 return partial
1451 1458 if maybewdir:
1452 1459 # no 'ff...' match in radix tree, wdir identified
1453 1460 raise error.WdirUnsupported
1454 1461 return None
1455 1462 except RevlogError:
1456 1463 # parsers.c radix tree lookup gave multiple matches
1457 1464 # fast path: for unfiltered changelog, radix tree is accurate
1458 1465 if not getattr(self, 'filteredrevs', None):
1459 1466 raise LookupError(id, self.indexfile,
1460 1467 _('ambiguous identifier'))
1461 1468 # fall through to slow path that filters hidden revisions
1462 1469 except (AttributeError, ValueError):
1463 1470 # we are pure python, or key was too short to search radix tree
1464 1471 pass
1465 1472
1466 1473 if id in self._pcache:
1467 1474 return self._pcache[id]
1468 1475
1469 1476 if len(id) <= 40:
1470 1477 try:
1471 1478 # hex(node)[:...]
1472 1479 l = len(id) // 2 # grab an even number of digits
1473 1480 prefix = bin(id[:l * 2])
1474 1481 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1475 1482 nl = [n for n in nl if hex(n).startswith(id) and
1476 1483 self.hasnode(n)]
1477 1484 if len(nl) > 0:
1478 1485 if len(nl) == 1 and not maybewdir:
1479 1486 self._pcache[id] = nl[0]
1480 1487 return nl[0]
1481 1488 raise LookupError(id, self.indexfile,
1482 1489 _('ambiguous identifier'))
1483 1490 if maybewdir:
1484 1491 raise error.WdirUnsupported
1485 1492 return None
1486 1493 except TypeError:
1487 1494 pass
1488 1495
1489 1496 def lookup(self, id):
1490 1497 """locate a node based on:
1491 1498 - revision number or str(revision number)
1492 1499 - nodeid or subset of hex nodeid
1493 1500 """
1494 1501 n = self._match(id)
1495 1502 if n is not None:
1496 1503 return n
1497 1504 n = self._partialmatch(id)
1498 1505 if n:
1499 1506 return n
1500 1507
1501 1508 raise LookupError(id, self.indexfile, _('no match found'))
1502 1509
1503 1510 def shortest(self, node, minlength=1):
1504 1511 """Find the shortest unambiguous prefix that matches node."""
1505 1512 def isvalid(prefix):
1506 1513 try:
1507 1514 node = self._partialmatch(prefix)
1508 1515 except error.RevlogError:
1509 1516 return False
1510 1517 except error.WdirUnsupported:
1511 1518 # single 'ff...' match
1512 1519 return True
1513 1520 if node is None:
1514 1521 raise LookupError(node, self.indexfile, _('no node'))
1515 1522 return True
1516 1523
1517 1524 def maybewdir(prefix):
1518 1525 return all(c == 'f' for c in prefix)
1519 1526
1520 1527 hexnode = hex(node)
1521 1528
1522 1529 def disambiguate(hexnode, minlength):
1523 1530 """Disambiguate against wdirid."""
1524 1531 for length in range(minlength, 41):
1525 1532 prefix = hexnode[:length]
1526 1533 if not maybewdir(prefix):
1527 1534 return prefix
1528 1535
1529 1536 if not getattr(self, 'filteredrevs', None):
1530 1537 try:
1531 1538 length = max(self.index.shortest(node), minlength)
1532 1539 return disambiguate(hexnode, length)
1533 1540 except RevlogError:
1534 1541 if node != wdirid:
1535 1542 raise LookupError(node, self.indexfile, _('no node'))
1536 1543 except AttributeError:
1537 1544 # Fall through to pure code
1538 1545 pass
1539 1546
1540 1547 if node == wdirid:
1541 1548 for length in range(minlength, 41):
1542 1549 prefix = hexnode[:length]
1543 1550 if isvalid(prefix):
1544 1551 return prefix
1545 1552
1546 1553 for length in range(minlength, 41):
1547 1554 prefix = hexnode[:length]
1548 1555 if isvalid(prefix):
1549 1556 return disambiguate(hexnode, length)
1550 1557
1551 1558 def cmp(self, node, text):
1552 1559 """compare text with a given file revision
1553 1560
1554 1561 returns True if text is different than what is stored.
1555 1562 """
1556 1563 p1, p2 = self.parents(node)
1557 1564 return hash(text, p1, p2) != node
1558 1565
1559 1566 def _cachesegment(self, offset, data):
1560 1567 """Add a segment to the revlog cache.
1561 1568
1562 1569 Accepts an absolute offset and the data that is at that location.
1563 1570 """
1564 1571 o, d = self._chunkcache
1565 1572 # try to add to existing cache
1566 1573 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1567 1574 self._chunkcache = o, d + data
1568 1575 else:
1569 1576 self._chunkcache = offset, data
1570 1577
1571 1578 def _readsegment(self, offset, length, df=None):
1572 1579 """Load a segment of raw data from the revlog.
1573 1580
1574 1581 Accepts an absolute offset, length to read, and an optional existing
1575 1582 file handle to read from.
1576 1583
1577 1584 If an existing file handle is passed, it will be seeked and the
1578 1585 original seek position will NOT be restored.
1579 1586
1580 1587 Returns a str or buffer of raw byte data.
1581 1588 """
1582 1589 # Cache data both forward and backward around the requested
1583 1590 # data, in a fixed size window. This helps speed up operations
1584 1591 # involving reading the revlog backwards.
1585 1592 cachesize = self._chunkcachesize
1586 1593 realoffset = offset & ~(cachesize - 1)
1587 1594 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1588 1595 - realoffset)
1589 1596 with self._datareadfp(df) as df:
1590 1597 df.seek(realoffset)
1591 1598 d = df.read(reallength)
1592 1599 self._cachesegment(realoffset, d)
1593 1600 if offset != realoffset or reallength != length:
1594 1601 return util.buffer(d, offset - realoffset, length)
1595 1602 return d
1596 1603
1597 1604 def _getsegment(self, offset, length, df=None):
1598 1605 """Obtain a segment of raw data from the revlog.
1599 1606
1600 1607 Accepts an absolute offset, length of bytes to obtain, and an
1601 1608 optional file handle to the already-opened revlog. If the file
1602 1609 handle is used, it's original seek position will not be preserved.
1603 1610
1604 1611 Requests for data may be returned from a cache.
1605 1612
1606 1613 Returns a str or a buffer instance of raw byte data.
1607 1614 """
1608 1615 o, d = self._chunkcache
1609 1616 l = len(d)
1610 1617
1611 1618 # is it in the cache?
1612 1619 cachestart = offset - o
1613 1620 cacheend = cachestart + length
1614 1621 if cachestart >= 0 and cacheend <= l:
1615 1622 if cachestart == 0 and cacheend == l:
1616 1623 return d # avoid a copy
1617 1624 return util.buffer(d, cachestart, cacheend - cachestart)
1618 1625
1619 1626 return self._readsegment(offset, length, df=df)
1620 1627
1621 1628 def _getsegmentforrevs(self, startrev, endrev, df=None):
1622 1629 """Obtain a segment of raw data corresponding to a range of revisions.
1623 1630
1624 1631 Accepts the start and end revisions and an optional already-open
1625 1632 file handle to be used for reading. If the file handle is read, its
1626 1633 seek position will not be preserved.
1627 1634
1628 1635 Requests for data may be satisfied by a cache.
1629 1636
1630 1637 Returns a 2-tuple of (offset, data) for the requested range of
1631 1638 revisions. Offset is the integer offset from the beginning of the
1632 1639 revlog and data is a str or buffer of the raw byte data.
1633 1640
1634 1641 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1635 1642 to determine where each revision's data begins and ends.
1636 1643 """
1637 1644 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1638 1645 # (functions are expensive).
1639 1646 index = self.index
1640 1647 istart = index[startrev]
1641 1648 start = int(istart[0] >> 16)
1642 1649 if startrev == endrev:
1643 1650 end = start + istart[1]
1644 1651 else:
1645 1652 iend = index[endrev]
1646 1653 end = int(iend[0] >> 16) + iend[1]
1647 1654
1648 1655 if self._inline:
1649 1656 start += (startrev + 1) * self._io.size
1650 1657 end += (endrev + 1) * self._io.size
1651 1658 length = end - start
1652 1659
1653 1660 return start, self._getsegment(start, length, df=df)
1654 1661
1655 1662 def _chunk(self, rev, df=None):
1656 1663 """Obtain a single decompressed chunk for a revision.
1657 1664
1658 1665 Accepts an integer revision and an optional already-open file handle
1659 1666 to be used for reading. If used, the seek position of the file will not
1660 1667 be preserved.
1661 1668
1662 1669 Returns a str holding uncompressed data for the requested revision.
1663 1670 """
1664 1671 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1665 1672
1666 1673 def _chunks(self, revs, df=None):
1667 1674 """Obtain decompressed chunks for the specified revisions.
1668 1675
1669 1676 Accepts an iterable of numeric revisions that are assumed to be in
1670 1677 ascending order. Also accepts an optional already-open file handle
1671 1678 to be used for reading. If used, the seek position of the file will
1672 1679 not be preserved.
1673 1680
1674 1681 This function is similar to calling ``self._chunk()`` multiple times,
1675 1682 but is faster.
1676 1683
1677 1684 Returns a list with decompressed data for each requested revision.
1678 1685 """
1679 1686 if not revs:
1680 1687 return []
1681 1688 start = self.start
1682 1689 length = self.length
1683 1690 inline = self._inline
1684 1691 iosize = self._io.size
1685 1692 buffer = util.buffer
1686 1693
1687 1694 l = []
1688 1695 ladd = l.append
1689 1696
1690 1697 if not self._withsparseread:
1691 1698 slicedchunks = (revs,)
1692 1699 else:
1693 1700 slicedchunks = _slicechunk(self, revs)
1694 1701
1695 1702 for revschunk in slicedchunks:
1696 1703 firstrev = revschunk[0]
1697 1704 # Skip trailing revisions with empty diff
1698 1705 for lastrev in revschunk[::-1]:
1699 1706 if length(lastrev) != 0:
1700 1707 break
1701 1708
1702 1709 try:
1703 1710 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1704 1711 except OverflowError:
1705 1712 # issue4215 - we can't cache a run of chunks greater than
1706 1713 # 2G on Windows
1707 1714 return [self._chunk(rev, df=df) for rev in revschunk]
1708 1715
1709 1716 decomp = self.decompress
1710 1717 for rev in revschunk:
1711 1718 chunkstart = start(rev)
1712 1719 if inline:
1713 1720 chunkstart += (rev + 1) * iosize
1714 1721 chunklength = length(rev)
1715 1722 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1716 1723
1717 1724 return l
1718 1725
1719 1726 def _chunkclear(self):
1720 1727 """Clear the raw chunk cache."""
1721 1728 self._chunkcache = (0, '')
1722 1729
1723 1730 def deltaparent(self, rev):
1724 1731 """return deltaparent of the given revision"""
1725 1732 base = self.index[rev][3]
1726 1733 if base == rev:
1727 1734 return nullrev
1728 1735 elif self._generaldelta:
1729 1736 return base
1730 1737 else:
1731 1738 return rev - 1
1732 1739
1733 1740 def revdiff(self, rev1, rev2):
1734 1741 """return or calculate a delta between two revisions
1735 1742
1736 1743 The delta calculated is in binary form and is intended to be written to
1737 1744 revlog data directly. So this function needs raw revision data.
1738 1745 """
1739 1746 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1740 1747 return bytes(self._chunk(rev2))
1741 1748
1742 1749 return mdiff.textdiff(self.revision(rev1, raw=True),
1743 1750 self.revision(rev2, raw=True))
1744 1751
1745 1752 def revision(self, nodeorrev, _df=None, raw=False):
1746 1753 """return an uncompressed revision of a given node or revision
1747 1754 number.
1748 1755
1749 1756 _df - an existing file handle to read from. (internal-only)
1750 1757 raw - an optional argument specifying if the revision data is to be
1751 1758 treated as raw data when applying flag transforms. 'raw' should be set
1752 1759 to True when generating changegroups or in debug commands.
1753 1760 """
1754 1761 if isinstance(nodeorrev, int):
1755 1762 rev = nodeorrev
1756 1763 node = self.node(rev)
1757 1764 else:
1758 1765 node = nodeorrev
1759 1766 rev = None
1760 1767
1761 1768 cachedrev = None
1762 1769 flags = None
1763 1770 rawtext = None
1764 1771 if node == nullid:
1765 1772 return ""
1766 1773 if self._cache:
1767 1774 if self._cache[0] == node:
1768 1775 # _cache only stores rawtext
1769 1776 if raw:
1770 1777 return self._cache[2]
1771 1778 # duplicated, but good for perf
1772 1779 if rev is None:
1773 1780 rev = self.rev(node)
1774 1781 if flags is None:
1775 1782 flags = self.flags(rev)
1776 1783 # no extra flags set, no flag processor runs, text = rawtext
1777 1784 if flags == REVIDX_DEFAULT_FLAGS:
1778 1785 return self._cache[2]
1779 1786 # rawtext is reusable. need to run flag processor
1780 1787 rawtext = self._cache[2]
1781 1788
1782 1789 cachedrev = self._cache[1]
1783 1790
1784 1791 # look up what we need to read
1785 1792 if rawtext is None:
1786 1793 if rev is None:
1787 1794 rev = self.rev(node)
1788 1795
1789 1796 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1790 1797 if stopped:
1791 1798 rawtext = self._cache[2]
1792 1799
1793 1800 # drop cache to save memory
1794 1801 self._cache = None
1795 1802
1796 1803 bins = self._chunks(chain, df=_df)
1797 1804 if rawtext is None:
1798 1805 rawtext = bytes(bins[0])
1799 1806 bins = bins[1:]
1800 1807
1801 1808 rawtext = mdiff.patches(rawtext, bins)
1802 1809 self._cache = (node, rev, rawtext)
1803 1810
1804 1811 if flags is None:
1805 1812 if rev is None:
1806 1813 rev = self.rev(node)
1807 1814 flags = self.flags(rev)
1808 1815
1809 1816 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1810 1817 if validatehash:
1811 1818 self.checkhash(text, node, rev=rev)
1812 1819
1813 1820 return text
1814 1821
1815 1822 def hash(self, text, p1, p2):
1816 1823 """Compute a node hash.
1817 1824
1818 1825 Available as a function so that subclasses can replace the hash
1819 1826 as needed.
1820 1827 """
1821 1828 return hash(text, p1, p2)
1822 1829
1823 1830 def _processflags(self, text, flags, operation, raw=False):
1824 1831 """Inspect revision data flags and applies transforms defined by
1825 1832 registered flag processors.
1826 1833
1827 1834 ``text`` - the revision data to process
1828 1835 ``flags`` - the revision flags
1829 1836 ``operation`` - the operation being performed (read or write)
1830 1837 ``raw`` - an optional argument describing if the raw transform should be
1831 1838 applied.
1832 1839
1833 1840 This method processes the flags in the order (or reverse order if
1834 1841 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1835 1842 flag processors registered for present flags. The order of flags defined
1836 1843 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1837 1844
1838 1845 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1839 1846 processed text and ``validatehash`` is a bool indicating whether the
1840 1847 returned text should be checked for hash integrity.
1841 1848
1842 1849 Note: If the ``raw`` argument is set, it has precedence over the
1843 1850 operation and will only update the value of ``validatehash``.
1844 1851 """
1845 1852 # fast path: no flag processors will run
1846 1853 if flags == 0:
1847 1854 return text, True
1848 1855 if not operation in ('read', 'write'):
1849 1856 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1850 1857 # Check all flags are known.
1851 1858 if flags & ~REVIDX_KNOWN_FLAGS:
1852 1859 raise RevlogError(_("incompatible revision flag '%#x'") %
1853 1860 (flags & ~REVIDX_KNOWN_FLAGS))
1854 1861 validatehash = True
1855 1862 # Depending on the operation (read or write), the order might be
1856 1863 # reversed due to non-commutative transforms.
1857 1864 orderedflags = REVIDX_FLAGS_ORDER
1858 1865 if operation == 'write':
1859 1866 orderedflags = reversed(orderedflags)
1860 1867
1861 1868 for flag in orderedflags:
1862 1869 # If a flagprocessor has been registered for a known flag, apply the
1863 1870 # related operation transform and update result tuple.
1864 1871 if flag & flags:
1865 1872 vhash = True
1866 1873
1867 1874 if flag not in _flagprocessors:
1868 1875 message = _("missing processor for flag '%#x'") % (flag)
1869 1876 raise RevlogError(message)
1870 1877
1871 1878 processor = _flagprocessors[flag]
1872 1879 if processor is not None:
1873 1880 readtransform, writetransform, rawtransform = processor
1874 1881
1875 1882 if raw:
1876 1883 vhash = rawtransform(self, text)
1877 1884 elif operation == 'read':
1878 1885 text, vhash = readtransform(self, text)
1879 1886 else: # write operation
1880 1887 text, vhash = writetransform(self, text)
1881 1888 validatehash = validatehash and vhash
1882 1889
1883 1890 return text, validatehash
1884 1891
1885 1892 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1886 1893 """Check node hash integrity.
1887 1894
1888 1895 Available as a function so that subclasses can extend hash mismatch
1889 1896 behaviors as needed.
1890 1897 """
1891 1898 try:
1892 1899 if p1 is None and p2 is None:
1893 1900 p1, p2 = self.parents(node)
1894 1901 if node != self.hash(text, p1, p2):
1895 1902 revornode = rev
1896 1903 if revornode is None:
1897 1904 revornode = templatefilters.short(hex(node))
1898 1905 raise RevlogError(_("integrity check failed on %s:%s")
1899 1906 % (self.indexfile, pycompat.bytestr(revornode)))
1900 1907 except RevlogError:
1901 1908 if self._censorable and _censoredtext(text):
1902 1909 raise error.CensoredNodeError(self.indexfile, node, text)
1903 1910 raise
1904 1911
1905 1912 def _enforceinlinesize(self, tr, fp=None):
1906 1913 """Check if the revlog is too big for inline and convert if so.
1907 1914
1908 1915 This should be called after revisions are added to the revlog. If the
1909 1916 revlog has grown too large to be an inline revlog, it will convert it
1910 1917 to use multiple index and data files.
1911 1918 """
1912 1919 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1913 1920 return
1914 1921
1915 1922 trinfo = tr.find(self.indexfile)
1916 1923 if trinfo is None:
1917 1924 raise RevlogError(_("%s not found in the transaction")
1918 1925 % self.indexfile)
1919 1926
1920 1927 trindex = trinfo[2]
1921 1928 if trindex is not None:
1922 1929 dataoff = self.start(trindex)
1923 1930 else:
1924 1931 # revlog was stripped at start of transaction, use all leftover data
1925 1932 trindex = len(self) - 1
1926 1933 dataoff = self.end(-2)
1927 1934
1928 1935 tr.add(self.datafile, dataoff)
1929 1936
1930 1937 if fp:
1931 1938 fp.flush()
1932 1939 fp.close()
1933 1940
1934 1941 with self._datafp('w') as df:
1935 1942 for r in self:
1936 1943 df.write(self._getsegmentforrevs(r, r)[1])
1937 1944
1938 1945 with self._indexfp('w') as fp:
1939 1946 self.version &= ~FLAG_INLINE_DATA
1940 1947 self._inline = False
1941 1948 io = self._io
1942 1949 for i in self:
1943 1950 e = io.packentry(self.index[i], self.node, self.version, i)
1944 1951 fp.write(e)
1945 1952
1946 1953 # the temp file replace the real index when we exit the context
1947 1954 # manager
1948 1955
1949 1956 tr.replace(self.indexfile, trindex * self._io.size)
1950 1957 self._chunkclear()
1951 1958
1952 1959 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1953 1960 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1954 1961 """add a revision to the log
1955 1962
1956 1963 text - the revision data to add
1957 1964 transaction - the transaction object used for rollback
1958 1965 link - the linkrev data to add
1959 1966 p1, p2 - the parent nodeids of the revision
1960 1967 cachedelta - an optional precomputed delta
1961 1968 node - nodeid of revision; typically node is not specified, and it is
1962 1969 computed by default as hash(text, p1, p2), however subclasses might
1963 1970 use different hashing method (and override checkhash() in such case)
1964 1971 flags - the known flags to set on the revision
1965 1972 deltacomputer - an optional _deltacomputer instance shared between
1966 1973 multiple calls
1967 1974 """
1968 1975 if link == nullrev:
1969 1976 raise RevlogError(_("attempted to add linkrev -1 to %s")
1970 1977 % self.indexfile)
1971 1978
1972 1979 if flags:
1973 1980 node = node or self.hash(text, p1, p2)
1974 1981
1975 1982 rawtext, validatehash = self._processflags(text, flags, 'write')
1976 1983
1977 1984 # If the flag processor modifies the revision data, ignore any provided
1978 1985 # cachedelta.
1979 1986 if rawtext != text:
1980 1987 cachedelta = None
1981 1988
1982 1989 if len(rawtext) > _maxentrysize:
1983 1990 raise RevlogError(
1984 1991 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1985 1992 % (self.indexfile, len(rawtext)))
1986 1993
1987 1994 node = node or self.hash(rawtext, p1, p2)
1988 1995 if node in self.nodemap:
1989 1996 return node
1990 1997
1991 1998 if validatehash:
1992 1999 self.checkhash(rawtext, node, p1=p1, p2=p2)
1993 2000
1994 2001 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1995 2002 flags, cachedelta=cachedelta,
1996 2003 deltacomputer=deltacomputer)
1997 2004
1998 2005 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1999 2006 cachedelta=None, deltacomputer=None):
2000 2007 """add a raw revision with known flags, node and parents
2001 2008 useful when reusing a revision not stored in this revlog (ex: received
2002 2009 over wire, or read from an external bundle).
2003 2010 """
2004 2011 dfh = None
2005 2012 if not self._inline:
2006 2013 dfh = self._datafp("a+")
2007 2014 ifh = self._indexfp("a+")
2008 2015 try:
2009 2016 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2010 2017 flags, cachedelta, ifh, dfh,
2011 2018 deltacomputer=deltacomputer)
2012 2019 finally:
2013 2020 if dfh:
2014 2021 dfh.close()
2015 2022 ifh.close()
2016 2023
2017 2024 def compress(self, data):
2018 2025 """Generate a possibly-compressed representation of data."""
2019 2026 if not data:
2020 2027 return '', data
2021 2028
2022 2029 compressed = self._compressor.compress(data)
2023 2030
2024 2031 if compressed:
2025 2032 # The revlog compressor added the header in the returned data.
2026 2033 return '', compressed
2027 2034
2028 2035 if data[0:1] == '\0':
2029 2036 return '', data
2030 2037 return 'u', data
2031 2038
2032 2039 def decompress(self, data):
2033 2040 """Decompress a revlog chunk.
2034 2041
2035 2042 The chunk is expected to begin with a header identifying the
2036 2043 format type so it can be routed to an appropriate decompressor.
2037 2044 """
2038 2045 if not data:
2039 2046 return data
2040 2047
2041 2048 # Revlogs are read much more frequently than they are written and many
2042 2049 # chunks only take microseconds to decompress, so performance is
2043 2050 # important here.
2044 2051 #
2045 2052 # We can make a few assumptions about revlogs:
2046 2053 #
2047 2054 # 1) the majority of chunks will be compressed (as opposed to inline
2048 2055 # raw data).
2049 2056 # 2) decompressing *any* data will likely by at least 10x slower than
2050 2057 # returning raw inline data.
2051 2058 # 3) we want to prioritize common and officially supported compression
2052 2059 # engines
2053 2060 #
2054 2061 # It follows that we want to optimize for "decompress compressed data
2055 2062 # when encoded with common and officially supported compression engines"
2056 2063 # case over "raw data" and "data encoded by less common or non-official
2057 2064 # compression engines." That is why we have the inline lookup first
2058 2065 # followed by the compengines lookup.
2059 2066 #
2060 2067 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2061 2068 # compressed chunks. And this matters for changelog and manifest reads.
2062 2069 t = data[0:1]
2063 2070
2064 2071 if t == 'x':
2065 2072 try:
2066 2073 return _zlibdecompress(data)
2067 2074 except zlib.error as e:
2068 2075 raise RevlogError(_('revlog decompress error: %s') %
2069 2076 stringutil.forcebytestr(e))
2070 2077 # '\0' is more common than 'u' so it goes first.
2071 2078 elif t == '\0':
2072 2079 return data
2073 2080 elif t == 'u':
2074 2081 return util.buffer(data, 1)
2075 2082
2076 2083 try:
2077 2084 compressor = self._decompressors[t]
2078 2085 except KeyError:
2079 2086 try:
2080 2087 engine = util.compengines.forrevlogheader(t)
2081 2088 compressor = engine.revlogcompressor()
2082 2089 self._decompressors[t] = compressor
2083 2090 except KeyError:
2084 2091 raise RevlogError(_('unknown compression type %r') % t)
2085 2092
2086 2093 return compressor.decompress(data)
2087 2094
2088 2095 def _isgooddeltainfo(self, d, textlen):
2089 2096 """Returns True if the given delta is good. Good means that it is within
2090 2097 the disk span, disk size, and chain length bounds that we know to be
2091 2098 performant."""
2092 2099 if d is None:
2093 2100 return False
2094 2101
2095 2102 # - 'd.distance' is the distance from the base revision -- bounding it
2096 2103 # limits the amount of I/O we need to do.
2097 2104 # - 'd.compresseddeltalen' is the sum of the total size of deltas we
2098 2105 # need to apply -- bounding it limits the amount of CPU we consume.
2099 2106
2100 2107 defaultmax = textlen * 4
2101 2108 maxdist = self._maxdeltachainspan
2102 2109 if not maxdist:
2103 2110 maxdist = d.distance # ensure the conditional pass
2104 2111 maxdist = max(maxdist, defaultmax)
2105 2112 if (d.distance > maxdist or d.deltalen > textlen or
2106 2113 d.compresseddeltalen > textlen * 2 or
2107 2114 (self._maxchainlen and d.chainlen > self._maxchainlen)):
2108 2115 return False
2109 2116
2110 2117 return True
2111 2118
2112 2119 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2113 2120 cachedelta, ifh, dfh, alwayscache=False,
2114 2121 deltacomputer=None):
2115 2122 """internal function to add revisions to the log
2116 2123
2117 2124 see addrevision for argument descriptions.
2118 2125
2119 2126 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2120 2127
2121 2128 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2122 2129 be used.
2123 2130
2124 2131 invariants:
2125 2132 - rawtext is optional (can be None); if not set, cachedelta must be set.
2126 2133 if both are set, they must correspond to each other.
2127 2134 """
2128 2135 if node == nullid:
2129 2136 raise RevlogError(_("%s: attempt to add null revision") %
2130 2137 (self.indexfile))
2131 2138 if node == wdirid or node in wdirfilenodeids:
2132 2139 raise RevlogError(_("%s: attempt to add wdir revision") %
2133 2140 (self.indexfile))
2134 2141
2135 2142 if self._inline:
2136 2143 fh = ifh
2137 2144 else:
2138 2145 fh = dfh
2139 2146
2140 2147 btext = [rawtext]
2141 2148
2142 2149 curr = len(self)
2143 2150 prev = curr - 1
2144 2151 offset = self.end(prev)
2145 2152 p1r, p2r = self.rev(p1), self.rev(p2)
2146 2153
2147 2154 # full versions are inserted when the needed deltas
2148 2155 # become comparable to the uncompressed text
2149 2156 if rawtext is None:
2150 2157 # need rawtext size, before changed by flag processors, which is
2151 2158 # the non-raw size. use revlog explicitly to avoid filelog's extra
2152 2159 # logic that might remove metadata size.
2153 2160 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2154 2161 cachedelta[1])
2155 2162 else:
2156 2163 textlen = len(rawtext)
2157 2164
2158 2165 if deltacomputer is None:
2159 2166 deltacomputer = _deltacomputer(self)
2160 2167
2161 2168 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2162 2169
2163 2170 # no delta for flag processor revision (see "candelta" for why)
2164 2171 # not calling candelta since only one revision needs test, also to
2165 2172 # avoid overhead fetching flags again.
2166 2173 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2167 2174 deltainfo = None
2168 2175 else:
2169 2176 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2170 2177
2171 2178 if deltainfo is not None:
2172 2179 base = deltainfo.base
2173 2180 chainbase = deltainfo.chainbase
2174 2181 data = deltainfo.data
2175 2182 l = deltainfo.deltalen
2176 2183 else:
2177 2184 rawtext = deltacomputer.buildtext(revinfo, fh)
2178 2185 data = self.compress(rawtext)
2179 2186 l = len(data[1]) + len(data[0])
2180 2187 base = chainbase = curr
2181 2188
2182 2189 e = (offset_type(offset, flags), l, textlen,
2183 2190 base, link, p1r, p2r, node)
2184 2191 self.index.insert(-1, e)
2185 2192 self.nodemap[node] = curr
2186 2193
2187 2194 entry = self._io.packentry(e, self.node, self.version, curr)
2188 2195 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2189 2196
2190 2197 if alwayscache and rawtext is None:
2191 2198 rawtext = deltacomputer._buildtext(revinfo, fh)
2192 2199
2193 2200 if type(rawtext) == bytes: # only accept immutable objects
2194 2201 self._cache = (node, curr, rawtext)
2195 2202 self._chainbasecache[curr] = chainbase
2196 2203 return node
2197 2204
2198 2205 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2199 2206 # Files opened in a+ mode have inconsistent behavior on various
2200 2207 # platforms. Windows requires that a file positioning call be made
2201 2208 # when the file handle transitions between reads and writes. See
2202 2209 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2203 2210 # platforms, Python or the platform itself can be buggy. Some versions
2204 2211 # of Solaris have been observed to not append at the end of the file
2205 2212 # if the file was seeked to before the end. See issue4943 for more.
2206 2213 #
2207 2214 # We work around this issue by inserting a seek() before writing.
2208 2215 # Note: This is likely not necessary on Python 3.
2209 2216 ifh.seek(0, os.SEEK_END)
2210 2217 if dfh:
2211 2218 dfh.seek(0, os.SEEK_END)
2212 2219
2213 2220 curr = len(self) - 1
2214 2221 if not self._inline:
2215 2222 transaction.add(self.datafile, offset)
2216 2223 transaction.add(self.indexfile, curr * len(entry))
2217 2224 if data[0]:
2218 2225 dfh.write(data[0])
2219 2226 dfh.write(data[1])
2220 2227 ifh.write(entry)
2221 2228 else:
2222 2229 offset += curr * self._io.size
2223 2230 transaction.add(self.indexfile, offset, curr)
2224 2231 ifh.write(entry)
2225 2232 ifh.write(data[0])
2226 2233 ifh.write(data[1])
2227 2234 self._enforceinlinesize(transaction, ifh)
2228 2235
2229 2236 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2230 2237 """
2231 2238 add a delta group
2232 2239
2233 2240 given a set of deltas, add them to the revision log. the
2234 2241 first delta is against its parent, which should be in our
2235 2242 log, the rest are against the previous delta.
2236 2243
2237 2244 If ``addrevisioncb`` is defined, it will be called with arguments of
2238 2245 this revlog and the node that was added.
2239 2246 """
2240 2247
2241 2248 nodes = []
2242 2249
2243 2250 r = len(self)
2244 2251 end = 0
2245 2252 if r:
2246 2253 end = self.end(r - 1)
2247 2254 ifh = self._indexfp("a+")
2248 2255 isize = r * self._io.size
2249 2256 if self._inline:
2250 2257 transaction.add(self.indexfile, end + isize, r)
2251 2258 dfh = None
2252 2259 else:
2253 2260 transaction.add(self.indexfile, isize, r)
2254 2261 transaction.add(self.datafile, end)
2255 2262 dfh = self._datafp("a+")
2256 2263 def flush():
2257 2264 if dfh:
2258 2265 dfh.flush()
2259 2266 ifh.flush()
2260 2267 try:
2261 2268 deltacomputer = _deltacomputer(self)
2262 2269 # loop through our set of deltas
2263 2270 for data in deltas:
2264 2271 node, p1, p2, linknode, deltabase, delta, flags = data
2265 2272 link = linkmapper(linknode)
2266 2273 flags = flags or REVIDX_DEFAULT_FLAGS
2267 2274
2268 2275 nodes.append(node)
2269 2276
2270 2277 if node in self.nodemap:
2271 2278 # this can happen if two branches make the same change
2272 2279 continue
2273 2280
2274 2281 for p in (p1, p2):
2275 2282 if p not in self.nodemap:
2276 2283 raise LookupError(p, self.indexfile,
2277 2284 _('unknown parent'))
2278 2285
2279 2286 if deltabase not in self.nodemap:
2280 2287 raise LookupError(deltabase, self.indexfile,
2281 2288 _('unknown delta base'))
2282 2289
2283 2290 baserev = self.rev(deltabase)
2284 2291
2285 2292 if baserev != nullrev and self.iscensored(baserev):
2286 2293 # if base is censored, delta must be full replacement in a
2287 2294 # single patch operation
2288 2295 hlen = struct.calcsize(">lll")
2289 2296 oldlen = self.rawsize(baserev)
2290 2297 newlen = len(delta) - hlen
2291 2298 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2292 2299 raise error.CensoredBaseError(self.indexfile,
2293 2300 self.node(baserev))
2294 2301
2295 2302 if not flags and self._peek_iscensored(baserev, delta, flush):
2296 2303 flags |= REVIDX_ISCENSORED
2297 2304
2298 2305 # We assume consumers of addrevisioncb will want to retrieve
2299 2306 # the added revision, which will require a call to
2300 2307 # revision(). revision() will fast path if there is a cache
2301 2308 # hit. So, we tell _addrevision() to always cache in this case.
2302 2309 # We're only using addgroup() in the context of changegroup
2303 2310 # generation so the revision data can always be handled as raw
2304 2311 # by the flagprocessor.
2305 2312 self._addrevision(node, None, transaction, link,
2306 2313 p1, p2, flags, (baserev, delta),
2307 2314 ifh, dfh,
2308 2315 alwayscache=bool(addrevisioncb),
2309 2316 deltacomputer=deltacomputer)
2310 2317
2311 2318 if addrevisioncb:
2312 2319 addrevisioncb(self, node)
2313 2320
2314 2321 if not dfh and not self._inline:
2315 2322 # addrevision switched from inline to conventional
2316 2323 # reopen the index
2317 2324 ifh.close()
2318 2325 dfh = self._datafp("a+")
2319 2326 ifh = self._indexfp("a+")
2320 2327 finally:
2321 2328 if dfh:
2322 2329 dfh.close()
2323 2330 ifh.close()
2324 2331
2325 2332 return nodes
2326 2333
2327 2334 def iscensored(self, rev):
2328 2335 """Check if a file revision is censored."""
2329 2336 if not self._censorable:
2330 2337 return False
2331 2338
2332 2339 return self.flags(rev) & REVIDX_ISCENSORED
2333 2340
2334 2341 def _peek_iscensored(self, baserev, delta, flush):
2335 2342 """Quickly check if a delta produces a censored revision."""
2336 2343 if not self._censorable:
2337 2344 return False
2338 2345
2339 2346 # Fragile heuristic: unless new file meta keys are added alphabetically
2340 2347 # preceding "censored", all censored revisions are prefixed by
2341 2348 # "\1\ncensored:". A delta producing such a censored revision must be a
2342 2349 # full-replacement delta, so we inspect the first and only patch in the
2343 2350 # delta for this prefix.
2344 2351 hlen = struct.calcsize(">lll")
2345 2352 if len(delta) <= hlen:
2346 2353 return False
2347 2354
2348 2355 oldlen = self.rawsize(baserev)
2349 2356 newlen = len(delta) - hlen
2350 2357 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2351 2358 return False
2352 2359
2353 2360 add = "\1\ncensored:"
2354 2361 addlen = len(add)
2355 2362 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2356 2363
2357 2364 def getstrippoint(self, minlink):
2358 2365 """find the minimum rev that must be stripped to strip the linkrev
2359 2366
2360 2367 Returns a tuple containing the minimum rev and a set of all revs that
2361 2368 have linkrevs that will be broken by this strip.
2362 2369 """
2363 2370 brokenrevs = set()
2364 2371 strippoint = len(self)
2365 2372
2366 2373 heads = {}
2367 2374 futurelargelinkrevs = set()
2368 2375 for head in self.headrevs():
2369 2376 headlinkrev = self.linkrev(head)
2370 2377 heads[head] = headlinkrev
2371 2378 if headlinkrev >= minlink:
2372 2379 futurelargelinkrevs.add(headlinkrev)
2373 2380
2374 2381 # This algorithm involves walking down the rev graph, starting at the
2375 2382 # heads. Since the revs are topologically sorted according to linkrev,
2376 2383 # once all head linkrevs are below the minlink, we know there are
2377 2384 # no more revs that could have a linkrev greater than minlink.
2378 2385 # So we can stop walking.
2379 2386 while futurelargelinkrevs:
2380 2387 strippoint -= 1
2381 2388 linkrev = heads.pop(strippoint)
2382 2389
2383 2390 if linkrev < minlink:
2384 2391 brokenrevs.add(strippoint)
2385 2392 else:
2386 2393 futurelargelinkrevs.remove(linkrev)
2387 2394
2388 2395 for p in self.parentrevs(strippoint):
2389 2396 if p != nullrev:
2390 2397 plinkrev = self.linkrev(p)
2391 2398 heads[p] = plinkrev
2392 2399 if plinkrev >= minlink:
2393 2400 futurelargelinkrevs.add(plinkrev)
2394 2401
2395 2402 return strippoint, brokenrevs
2396 2403
2397 2404 def strip(self, minlink, transaction):
2398 2405 """truncate the revlog on the first revision with a linkrev >= minlink
2399 2406
2400 2407 This function is called when we're stripping revision minlink and
2401 2408 its descendants from the repository.
2402 2409
2403 2410 We have to remove all revisions with linkrev >= minlink, because
2404 2411 the equivalent changelog revisions will be renumbered after the
2405 2412 strip.
2406 2413
2407 2414 So we truncate the revlog on the first of these revisions, and
2408 2415 trust that the caller has saved the revisions that shouldn't be
2409 2416 removed and that it'll re-add them after this truncation.
2410 2417 """
2411 2418 if len(self) == 0:
2412 2419 return
2413 2420
2414 2421 rev, _ = self.getstrippoint(minlink)
2415 2422 if rev == len(self):
2416 2423 return
2417 2424
2418 2425 # first truncate the files on disk
2419 2426 end = self.start(rev)
2420 2427 if not self._inline:
2421 2428 transaction.add(self.datafile, end)
2422 2429 end = rev * self._io.size
2423 2430 else:
2424 2431 end += rev * self._io.size
2425 2432
2426 2433 transaction.add(self.indexfile, end)
2427 2434
2428 2435 # then reset internal state in memory to forget those revisions
2429 2436 self._cache = None
2430 2437 self._chaininfocache = {}
2431 2438 self._chunkclear()
2432 2439 for x in xrange(rev, len(self)):
2433 2440 del self.nodemap[self.node(x)]
2434 2441
2435 2442 del self.index[rev:-1]
2436 2443 self._nodepos = None
2437 2444
2438 2445 def checksize(self):
2439 2446 expected = 0
2440 2447 if len(self):
2441 2448 expected = max(0, self.end(len(self) - 1))
2442 2449
2443 2450 try:
2444 2451 with self._datafp() as f:
2445 2452 f.seek(0, 2)
2446 2453 actual = f.tell()
2447 2454 dd = actual - expected
2448 2455 except IOError as inst:
2449 2456 if inst.errno != errno.ENOENT:
2450 2457 raise
2451 2458 dd = 0
2452 2459
2453 2460 try:
2454 2461 f = self.opener(self.indexfile)
2455 2462 f.seek(0, 2)
2456 2463 actual = f.tell()
2457 2464 f.close()
2458 2465 s = self._io.size
2459 2466 i = max(0, actual // s)
2460 2467 di = actual - (i * s)
2461 2468 if self._inline:
2462 2469 databytes = 0
2463 2470 for r in self:
2464 2471 databytes += max(0, self.length(r))
2465 2472 dd = 0
2466 2473 di = actual - len(self) * s - databytes
2467 2474 except IOError as inst:
2468 2475 if inst.errno != errno.ENOENT:
2469 2476 raise
2470 2477 di = 0
2471 2478
2472 2479 return (dd, di)
2473 2480
2474 2481 def files(self):
2475 2482 res = [self.indexfile]
2476 2483 if not self._inline:
2477 2484 res.append(self.datafile)
2478 2485 return res
2479 2486
2480 2487 DELTAREUSEALWAYS = 'always'
2481 2488 DELTAREUSESAMEREVS = 'samerevs'
2482 2489 DELTAREUSENEVER = 'never'
2483 2490
2484 2491 DELTAREUSEFULLADD = 'fulladd'
2485 2492
2486 2493 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2487 2494
2488 2495 def clone(self, tr, destrevlog, addrevisioncb=None,
2489 2496 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2490 2497 """Copy this revlog to another, possibly with format changes.
2491 2498
2492 2499 The destination revlog will contain the same revisions and nodes.
2493 2500 However, it may not be bit-for-bit identical due to e.g. delta encoding
2494 2501 differences.
2495 2502
2496 2503 The ``deltareuse`` argument control how deltas from the existing revlog
2497 2504 are preserved in the destination revlog. The argument can have the
2498 2505 following values:
2499 2506
2500 2507 DELTAREUSEALWAYS
2501 2508 Deltas will always be reused (if possible), even if the destination
2502 2509 revlog would not select the same revisions for the delta. This is the
2503 2510 fastest mode of operation.
2504 2511 DELTAREUSESAMEREVS
2505 2512 Deltas will be reused if the destination revlog would pick the same
2506 2513 revisions for the delta. This mode strikes a balance between speed
2507 2514 and optimization.
2508 2515 DELTAREUSENEVER
2509 2516 Deltas will never be reused. This is the slowest mode of execution.
2510 2517 This mode can be used to recompute deltas (e.g. if the diff/delta
2511 2518 algorithm changes).
2512 2519
2513 2520 Delta computation can be slow, so the choice of delta reuse policy can
2514 2521 significantly affect run time.
2515 2522
2516 2523 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2517 2524 two extremes. Deltas will be reused if they are appropriate. But if the
2518 2525 delta could choose a better revision, it will do so. This means if you
2519 2526 are converting a non-generaldelta revlog to a generaldelta revlog,
2520 2527 deltas will be recomputed if the delta's parent isn't a parent of the
2521 2528 revision.
2522 2529
2523 2530 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2524 2531 controls whether to compute deltas against both parents for merges.
2525 2532 By default, the current default is used.
2526 2533 """
2527 2534 if deltareuse not in self.DELTAREUSEALL:
2528 2535 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2529 2536
2530 2537 if len(destrevlog):
2531 2538 raise ValueError(_('destination revlog is not empty'))
2532 2539
2533 2540 if getattr(self, 'filteredrevs', None):
2534 2541 raise ValueError(_('source revlog has filtered revisions'))
2535 2542 if getattr(destrevlog, 'filteredrevs', None):
2536 2543 raise ValueError(_('destination revlog has filtered revisions'))
2537 2544
2538 2545 # lazydeltabase controls whether to reuse a cached delta, if possible.
2539 2546 oldlazydeltabase = destrevlog._lazydeltabase
2540 2547 oldamd = destrevlog._aggressivemergedeltas
2541 2548
2542 2549 try:
2543 2550 if deltareuse == self.DELTAREUSEALWAYS:
2544 2551 destrevlog._lazydeltabase = True
2545 2552 elif deltareuse == self.DELTAREUSESAMEREVS:
2546 2553 destrevlog._lazydeltabase = False
2547 2554
2548 2555 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2549 2556
2550 2557 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2551 2558 self.DELTAREUSESAMEREVS)
2552 2559
2553 2560 deltacomputer = _deltacomputer(destrevlog)
2554 2561 index = self.index
2555 2562 for rev in self:
2556 2563 entry = index[rev]
2557 2564
2558 2565 # Some classes override linkrev to take filtered revs into
2559 2566 # account. Use raw entry from index.
2560 2567 flags = entry[0] & 0xffff
2561 2568 linkrev = entry[4]
2562 2569 p1 = index[entry[5]][7]
2563 2570 p2 = index[entry[6]][7]
2564 2571 node = entry[7]
2565 2572
2566 2573 # (Possibly) reuse the delta from the revlog if allowed and
2567 2574 # the revlog chunk is a delta.
2568 2575 cachedelta = None
2569 2576 rawtext = None
2570 2577 if populatecachedelta:
2571 2578 dp = self.deltaparent(rev)
2572 2579 if dp != nullrev:
2573 2580 cachedelta = (dp, bytes(self._chunk(rev)))
2574 2581
2575 2582 if not cachedelta:
2576 2583 rawtext = self.revision(rev, raw=True)
2577 2584
2578 2585
2579 2586 if deltareuse == self.DELTAREUSEFULLADD:
2580 2587 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2581 2588 cachedelta=cachedelta,
2582 2589 node=node, flags=flags,
2583 2590 deltacomputer=deltacomputer)
2584 2591 else:
2585 2592 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2586 2593 checkambig=False)
2587 2594 dfh = None
2588 2595 if not destrevlog._inline:
2589 2596 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2590 2597 try:
2591 2598 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2592 2599 p2, flags, cachedelta, ifh, dfh,
2593 2600 deltacomputer=deltacomputer)
2594 2601 finally:
2595 2602 if dfh:
2596 2603 dfh.close()
2597 2604 ifh.close()
2598 2605
2599 2606 if addrevisioncb:
2600 2607 addrevisioncb(self, rev, node)
2601 2608 finally:
2602 2609 destrevlog._lazydeltabase = oldlazydeltabase
2603 2610 destrevlog._aggressivemergedeltas = oldamd
General Comments 0
You need to be logged in to leave comments. Login now