##// END OF EJS Templates
revlog: resolve lfs rawtext to vanilla rawtext before applying delta...
Jun Wu -
r36889:369aadf7 4.5.1 stable
parent child Browse files
Show More
@@ -1,2516 +1,2521
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import binascii
17 17 import collections
18 18 import errno
19 19 import hashlib
20 20 import heapq
21 21 import os
22 22 import struct
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullid,
30 30 nullrev,
31 31 wdirhex,
32 32 wdirid,
33 33 wdirrev,
34 34 )
35 35 from .i18n import _
36 36 from .thirdparty import (
37 37 attr,
38 38 )
39 39 from . import (
40 40 ancestor,
41 41 error,
42 42 mdiff,
43 43 policy,
44 44 pycompat,
45 45 templatefilters,
46 46 util,
47 47 )
48 48
49 49 parsers = policy.importmod(r'parsers')
50 50
51 51 # Aliased for performance.
52 52 _zlibdecompress = zlib.decompress
53 53
54 54 # revlog header flags
55 55 REVLOGV0 = 0
56 56 REVLOGV1 = 1
57 57 # Dummy value until file format is finalized.
58 58 # Reminder: change the bounds check in revlog.__init__ when this is changed.
59 59 REVLOGV2 = 0xDEAD
60 60 FLAG_INLINE_DATA = (1 << 16)
61 61 FLAG_GENERALDELTA = (1 << 17)
62 62 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
63 63 REVLOG_DEFAULT_FORMAT = REVLOGV1
64 64 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
65 65 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
66 66 REVLOGV2_FLAGS = REVLOGV1_FLAGS
67 67
68 68 # revlog index flags
69 69 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
70 70 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
71 71 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
72 72 REVIDX_DEFAULT_FLAGS = 0
73 73 # stable order in which flags need to be processed and their processors applied
74 74 REVIDX_FLAGS_ORDER = [
75 75 REVIDX_ISCENSORED,
76 76 REVIDX_ELLIPSIS,
77 77 REVIDX_EXTSTORED,
78 78 ]
79 79 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
80 80 # bitmark for flags that could cause rawdata content change
81 81 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
82 82
83 83 # max size of revlog with inline data
84 84 _maxinline = 131072
85 85 _chunksize = 1048576
86 86
87 87 RevlogError = error.RevlogError
88 88 LookupError = error.LookupError
89 89 CensoredNodeError = error.CensoredNodeError
90 90 ProgrammingError = error.ProgrammingError
91 91
92 92 # Store flag processors (cf. 'addflagprocessor()' to register)
93 93 _flagprocessors = {
94 94 REVIDX_ISCENSORED: None,
95 95 }
96 96
97 97 def addflagprocessor(flag, processor):
98 98 """Register a flag processor on a revision data flag.
99 99
100 100 Invariant:
101 101 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
102 102 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
103 103 - Only one flag processor can be registered on a specific flag.
104 104 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
105 105 following signatures:
106 106 - (read) f(self, rawtext) -> text, bool
107 107 - (write) f(self, text) -> rawtext, bool
108 108 - (raw) f(self, rawtext) -> bool
109 109 "text" is presented to the user. "rawtext" is stored in revlog data, not
110 110 directly visible to the user.
111 111 The boolean returned by these transforms is used to determine whether
112 112 the returned text can be used for hash integrity checking. For example,
113 113 if "write" returns False, then "text" is used to generate hash. If
114 114 "write" returns True, that basically means "rawtext" returned by "write"
115 115 should be used to generate hash. Usually, "write" and "read" return
116 116 different booleans. And "raw" returns a same boolean as "write".
117 117
118 118 Note: The 'raw' transform is used for changegroup generation and in some
119 119 debug commands. In this case the transform only indicates whether the
120 120 contents can be used for hash integrity checks.
121 121 """
122 122 if not flag & REVIDX_KNOWN_FLAGS:
123 123 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
124 124 raise ProgrammingError(msg)
125 125 if flag not in REVIDX_FLAGS_ORDER:
126 126 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
127 127 raise ProgrammingError(msg)
128 128 if flag in _flagprocessors:
129 129 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
130 130 raise error.Abort(msg)
131 131 _flagprocessors[flag] = processor
132 132
133 133 def getoffset(q):
134 134 return int(q >> 16)
135 135
136 136 def gettype(q):
137 137 return int(q & 0xFFFF)
138 138
139 139 def offset_type(offset, type):
140 140 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
141 141 raise ValueError('unknown revlog index flags')
142 142 return int(int(offset) << 16 | type)
143 143
144 144 _nullhash = hashlib.sha1(nullid)
145 145
146 146 def hash(text, p1, p2):
147 147 """generate a hash from the given text and its parent hashes
148 148
149 149 This hash combines both the current file contents and its history
150 150 in a manner that makes it easy to distinguish nodes with the same
151 151 content in the revision graph.
152 152 """
153 153 # As of now, if one of the parent node is null, p2 is null
154 154 if p2 == nullid:
155 155 # deep copy of a hash is faster than creating one
156 156 s = _nullhash.copy()
157 157 s.update(p1)
158 158 else:
159 159 # none of the parent nodes are nullid
160 160 if p1 < p2:
161 161 a = p1
162 162 b = p2
163 163 else:
164 164 a = p2
165 165 b = p1
166 166 s = hashlib.sha1(a)
167 167 s.update(b)
168 168 s.update(text)
169 169 return s.digest()
170 170
171 171 def _trimchunk(revlog, revs, startidx, endidx=None):
172 172 """returns revs[startidx:endidx] without empty trailing revs
173 173 """
174 174 length = revlog.length
175 175
176 176 if endidx is None:
177 177 endidx = len(revs)
178 178
179 179 # Trim empty revs at the end, but never the very first revision of a chain
180 180 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
181 181 endidx -= 1
182 182
183 183 return revs[startidx:endidx]
184 184
185 185 def _slicechunk(revlog, revs):
186 186 """slice revs to reduce the amount of unrelated data to be read from disk.
187 187
188 188 ``revs`` is sliced into groups that should be read in one time.
189 189 Assume that revs are sorted.
190 190 """
191 191 start = revlog.start
192 192 length = revlog.length
193 193
194 194 if len(revs) <= 1:
195 195 yield revs
196 196 return
197 197
198 198 startbyte = start(revs[0])
199 199 endbyte = start(revs[-1]) + length(revs[-1])
200 200 readdata = deltachainspan = endbyte - startbyte
201 201
202 202 chainpayload = sum(length(r) for r in revs)
203 203
204 204 if deltachainspan:
205 205 density = chainpayload / float(deltachainspan)
206 206 else:
207 207 density = 1.0
208 208
209 209 # Store the gaps in a heap to have them sorted by decreasing size
210 210 gapsheap = []
211 211 heapq.heapify(gapsheap)
212 212 prevend = None
213 213 for i, rev in enumerate(revs):
214 214 revstart = start(rev)
215 215 revlen = length(rev)
216 216
217 217 # Skip empty revisions to form larger holes
218 218 if revlen == 0:
219 219 continue
220 220
221 221 if prevend is not None:
222 222 gapsize = revstart - prevend
223 223 # only consider holes that are large enough
224 224 if gapsize > revlog._srmingapsize:
225 225 heapq.heappush(gapsheap, (-gapsize, i))
226 226
227 227 prevend = revstart + revlen
228 228
229 229 # Collect the indices of the largest holes until the density is acceptable
230 230 indicesheap = []
231 231 heapq.heapify(indicesheap)
232 232 while gapsheap and density < revlog._srdensitythreshold:
233 233 oppgapsize, gapidx = heapq.heappop(gapsheap)
234 234
235 235 heapq.heappush(indicesheap, gapidx)
236 236
237 237 # the gap sizes are stored as negatives to be sorted decreasingly
238 238 # by the heap
239 239 readdata -= (-oppgapsize)
240 240 if readdata > 0:
241 241 density = chainpayload / float(readdata)
242 242 else:
243 243 density = 1.0
244 244
245 245 # Cut the revs at collected indices
246 246 previdx = 0
247 247 while indicesheap:
248 248 idx = heapq.heappop(indicesheap)
249 249
250 250 chunk = _trimchunk(revlog, revs, previdx, idx)
251 251 if chunk:
252 252 yield chunk
253 253
254 254 previdx = idx
255 255
256 256 chunk = _trimchunk(revlog, revs, previdx)
257 257 if chunk:
258 258 yield chunk
259 259
260 260 @attr.s(slots=True, frozen=True)
261 261 class _deltainfo(object):
262 262 distance = attr.ib()
263 263 deltalen = attr.ib()
264 264 data = attr.ib()
265 265 base = attr.ib()
266 266 chainbase = attr.ib()
267 267 chainlen = attr.ib()
268 268 compresseddeltalen = attr.ib()
269 269
270 270 class _deltacomputer(object):
271 271 def __init__(self, revlog):
272 272 self.revlog = revlog
273 273
274 274 def _getcandidaterevs(self, p1, p2, cachedelta):
275 275 """
276 276 Provides revisions that present an interest to be diffed against,
277 277 grouped by level of easiness.
278 278 """
279 279 revlog = self.revlog
280 280 curr = len(revlog)
281 281 prev = curr - 1
282 282 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
283 283
284 284 # should we try to build a delta?
285 285 if prev != nullrev and revlog.storedeltachains:
286 286 tested = set()
287 287 # This condition is true most of the time when processing
288 288 # changegroup data into a generaldelta repo. The only time it
289 289 # isn't true is if this is the first revision in a delta chain
290 290 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
291 291 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
292 292 # Assume what we received from the server is a good choice
293 293 # build delta will reuse the cache
294 294 yield (cachedelta[0],)
295 295 tested.add(cachedelta[0])
296 296
297 297 if revlog._generaldelta:
298 298 # exclude already lazy tested base if any
299 299 parents = [p for p in (p1r, p2r)
300 300 if p != nullrev and p not in tested]
301 301 if parents and not revlog._aggressivemergedeltas:
302 302 # Pick whichever parent is closer to us (to minimize the
303 303 # chance of having to build a fulltext).
304 304 parents = [max(parents)]
305 305 tested.update(parents)
306 306 yield parents
307 307
308 308 if prev not in tested:
309 309 # other approach failed try against prev to hopefully save us a
310 310 # fulltext.
311 311 yield (prev,)
312 312
313 313 def buildtext(self, revinfo, fh):
314 314 """Builds a fulltext version of a revision
315 315
316 316 revinfo: _revisioninfo instance that contains all needed info
317 317 fh: file handle to either the .i or the .d revlog file,
318 318 depending on whether it is inlined or not
319 319 """
320 320 btext = revinfo.btext
321 321 if btext[0] is not None:
322 322 return btext[0]
323 323
324 324 revlog = self.revlog
325 325 cachedelta = revinfo.cachedelta
326 326 flags = revinfo.flags
327 327 node = revinfo.node
328 328
329 329 baserev = cachedelta[0]
330 330 delta = cachedelta[1]
331 331 # special case deltas which replace entire base; no need to decode
332 332 # base revision. this neatly avoids censored bases, which throw when
333 333 # they're decoded.
334 334 hlen = struct.calcsize(">lll")
335 335 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
336 336 len(delta) - hlen):
337 337 btext[0] = delta[hlen:]
338 338 else:
339 basetext = revlog.revision(baserev, _df=fh, raw=True)
339 # deltabase is rawtext before changed by flag processors, which is
340 # equivalent to non-raw text
341 basetext = revlog.revision(baserev, _df=fh, raw=False)
340 342 btext[0] = mdiff.patch(basetext, delta)
341 343
342 344 try:
343 345 res = revlog._processflags(btext[0], flags, 'read', raw=True)
344 346 btext[0], validatehash = res
345 347 if validatehash:
346 348 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
347 349 if flags & REVIDX_ISCENSORED:
348 350 raise RevlogError(_('node %s is not censored') % node)
349 351 except CensoredNodeError:
350 352 # must pass the censored index flag to add censored revisions
351 353 if not flags & REVIDX_ISCENSORED:
352 354 raise
353 355 return btext[0]
354 356
355 357 def _builddeltadiff(self, base, revinfo, fh):
356 358 revlog = self.revlog
357 359 t = self.buildtext(revinfo, fh)
358 360 if revlog.iscensored(base):
359 361 # deltas based on a censored revision must replace the
360 362 # full content in one patch, so delta works everywhere
361 363 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
362 364 delta = header + t
363 365 else:
364 366 ptext = revlog.revision(base, _df=fh, raw=True)
365 367 delta = mdiff.textdiff(ptext, t)
366 368
367 369 return delta
368 370
369 371 def _builddeltainfo(self, revinfo, base, fh):
370 372 # can we use the cached delta?
371 373 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
372 374 delta = revinfo.cachedelta[1]
373 375 else:
374 376 delta = self._builddeltadiff(base, revinfo, fh)
375 377 revlog = self.revlog
376 378 header, data = revlog.compress(delta)
377 379 deltalen = len(header) + len(data)
378 380 chainbase = revlog.chainbase(base)
379 381 offset = revlog.end(len(revlog) - 1)
380 382 dist = deltalen + offset - revlog.start(chainbase)
381 383 if revlog._generaldelta:
382 384 deltabase = base
383 385 else:
384 386 deltabase = chainbase
385 387 chainlen, compresseddeltalen = revlog._chaininfo(base)
386 388 chainlen += 1
387 389 compresseddeltalen += deltalen
388 390 return _deltainfo(dist, deltalen, (header, data), deltabase,
389 391 chainbase, chainlen, compresseddeltalen)
390 392
391 393 def finddeltainfo(self, revinfo, fh):
392 394 """Find an acceptable delta against a candidate revision
393 395
394 396 revinfo: information about the revision (instance of _revisioninfo)
395 397 fh: file handle to either the .i or the .d revlog file,
396 398 depending on whether it is inlined or not
397 399
398 400 Returns the first acceptable candidate revision, as ordered by
399 401 _getcandidaterevs
400 402 """
401 403 cachedelta = revinfo.cachedelta
402 404 p1 = revinfo.p1
403 405 p2 = revinfo.p2
404 406 revlog = self.revlog
405 407
406 408 deltainfo = None
407 409 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
408 410 nominateddeltas = []
409 411 for candidaterev in candidaterevs:
410 412 # no delta for rawtext-changing revs (see "candelta" for why)
411 413 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
412 414 continue
413 415 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
414 416 if revlog._isgooddeltainfo(candidatedelta, revinfo.textlen):
415 417 nominateddeltas.append(candidatedelta)
416 418 if nominateddeltas:
417 419 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
418 420 break
419 421
420 422 return deltainfo
421 423
422 424 @attr.s(slots=True, frozen=True)
423 425 class _revisioninfo(object):
424 426 """Information about a revision that allows building its fulltext
425 427 node: expected hash of the revision
426 428 p1, p2: parent revs of the revision
427 429 btext: built text cache consisting of a one-element list
428 430 cachedelta: (baserev, uncompressed_delta) or None
429 431 flags: flags associated to the revision storage
430 432
431 433 One of btext[0] or cachedelta must be set.
432 434 """
433 435 node = attr.ib()
434 436 p1 = attr.ib()
435 437 p2 = attr.ib()
436 438 btext = attr.ib()
437 439 textlen = attr.ib()
438 440 cachedelta = attr.ib()
439 441 flags = attr.ib()
440 442
441 443 # index v0:
442 444 # 4 bytes: offset
443 445 # 4 bytes: compressed length
444 446 # 4 bytes: base rev
445 447 # 4 bytes: link rev
446 448 # 20 bytes: parent 1 nodeid
447 449 # 20 bytes: parent 2 nodeid
448 450 # 20 bytes: nodeid
449 451 indexformatv0 = struct.Struct(">4l20s20s20s")
450 452 indexformatv0_pack = indexformatv0.pack
451 453 indexformatv0_unpack = indexformatv0.unpack
452 454
453 455 class revlogoldio(object):
454 456 def __init__(self):
455 457 self.size = indexformatv0.size
456 458
457 459 def parseindex(self, data, inline):
458 460 s = self.size
459 461 index = []
460 462 nodemap = {nullid: nullrev}
461 463 n = off = 0
462 464 l = len(data)
463 465 while off + s <= l:
464 466 cur = data[off:off + s]
465 467 off += s
466 468 e = indexformatv0_unpack(cur)
467 469 # transform to revlogv1 format
468 470 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
469 471 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
470 472 index.append(e2)
471 473 nodemap[e[6]] = n
472 474 n += 1
473 475
474 476 # add the magic null revision at -1
475 477 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
476 478
477 479 return index, nodemap, None
478 480
479 481 def packentry(self, entry, node, version, rev):
480 482 if gettype(entry[0]):
481 483 raise RevlogError(_('index entry flags need revlog version 1'))
482 484 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
483 485 node(entry[5]), node(entry[6]), entry[7])
484 486 return indexformatv0_pack(*e2)
485 487
486 488 # index ng:
487 489 # 6 bytes: offset
488 490 # 2 bytes: flags
489 491 # 4 bytes: compressed length
490 492 # 4 bytes: uncompressed length
491 493 # 4 bytes: base rev
492 494 # 4 bytes: link rev
493 495 # 4 bytes: parent 1 rev
494 496 # 4 bytes: parent 2 rev
495 497 # 32 bytes: nodeid
496 498 indexformatng = struct.Struct(">Qiiiiii20s12x")
497 499 indexformatng_pack = indexformatng.pack
498 500 versionformat = struct.Struct(">I")
499 501 versionformat_pack = versionformat.pack
500 502 versionformat_unpack = versionformat.unpack
501 503
502 504 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
503 505 # signed integer)
504 506 _maxentrysize = 0x7fffffff
505 507
506 508 class revlogio(object):
507 509 def __init__(self):
508 510 self.size = indexformatng.size
509 511
510 512 def parseindex(self, data, inline):
511 513 # call the C implementation to parse the index data
512 514 index, cache = parsers.parse_index2(data, inline)
513 515 return index, getattr(index, 'nodemap', None), cache
514 516
515 517 def packentry(self, entry, node, version, rev):
516 518 p = indexformatng_pack(*entry)
517 519 if rev == 0:
518 520 p = versionformat_pack(version) + p[4:]
519 521 return p
520 522
521 523 class revlog(object):
522 524 """
523 525 the underlying revision storage object
524 526
525 527 A revlog consists of two parts, an index and the revision data.
526 528
527 529 The index is a file with a fixed record size containing
528 530 information on each revision, including its nodeid (hash), the
529 531 nodeids of its parents, the position and offset of its data within
530 532 the data file, and the revision it's based on. Finally, each entry
531 533 contains a linkrev entry that can serve as a pointer to external
532 534 data.
533 535
534 536 The revision data itself is a linear collection of data chunks.
535 537 Each chunk represents a revision and is usually represented as a
536 538 delta against the previous chunk. To bound lookup time, runs of
537 539 deltas are limited to about 2 times the length of the original
538 540 version data. This makes retrieval of a version proportional to
539 541 its size, or O(1) relative to the number of revisions.
540 542
541 543 Both pieces of the revlog are written to in an append-only
542 544 fashion, which means we never need to rewrite a file to insert or
543 545 remove data, and can use some simple techniques to avoid the need
544 546 for locking while reading.
545 547
546 548 If checkambig, indexfile is opened with checkambig=True at
547 549 writing, to avoid file stat ambiguity.
548 550
549 551 If mmaplargeindex is True, and an mmapindexthreshold is set, the
550 552 index will be mmapped rather than read if it is larger than the
551 553 configured threshold.
552 554 """
553 555 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
554 556 mmaplargeindex=False):
555 557 """
556 558 create a revlog object
557 559
558 560 opener is a function that abstracts the file opening operation
559 561 and can be used to implement COW semantics or the like.
560 562 """
561 563 self.indexfile = indexfile
562 564 self.datafile = datafile or (indexfile[:-2] + ".d")
563 565 self.opener = opener
564 566 # When True, indexfile is opened with checkambig=True at writing, to
565 567 # avoid file stat ambiguity.
566 568 self._checkambig = checkambig
567 569 # 3-tuple of (node, rev, text) for a raw revision.
568 570 self._cache = None
569 571 # Maps rev to chain base rev.
570 572 self._chainbasecache = util.lrucachedict(100)
571 573 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
572 574 self._chunkcache = (0, '')
573 575 # How much data to read and cache into the raw revlog data cache.
574 576 self._chunkcachesize = 65536
575 577 self._maxchainlen = None
576 578 self._aggressivemergedeltas = False
577 579 self.index = []
578 580 # Mapping of partial identifiers to full nodes.
579 581 self._pcache = {}
580 582 # Mapping of revision integer to full node.
581 583 self._nodecache = {nullid: nullrev}
582 584 self._nodepos = None
583 585 self._compengine = 'zlib'
584 586 self._maxdeltachainspan = -1
585 587 self._withsparseread = False
586 588 self._srdensitythreshold = 0.25
587 589 self._srmingapsize = 262144
588 590
589 591 mmapindexthreshold = None
590 592 v = REVLOG_DEFAULT_VERSION
591 593 opts = getattr(opener, 'options', None)
592 594 if opts is not None:
593 595 if 'revlogv2' in opts:
594 596 # version 2 revlogs always use generaldelta.
595 597 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
596 598 elif 'revlogv1' in opts:
597 599 if 'generaldelta' in opts:
598 600 v |= FLAG_GENERALDELTA
599 601 else:
600 602 v = 0
601 603 if 'chunkcachesize' in opts:
602 604 self._chunkcachesize = opts['chunkcachesize']
603 605 if 'maxchainlen' in opts:
604 606 self._maxchainlen = opts['maxchainlen']
605 607 if 'aggressivemergedeltas' in opts:
606 608 self._aggressivemergedeltas = opts['aggressivemergedeltas']
607 609 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
608 610 if 'compengine' in opts:
609 611 self._compengine = opts['compengine']
610 612 if 'maxdeltachainspan' in opts:
611 613 self._maxdeltachainspan = opts['maxdeltachainspan']
612 614 if mmaplargeindex and 'mmapindexthreshold' in opts:
613 615 mmapindexthreshold = opts['mmapindexthreshold']
614 616 self._withsparseread = bool(opts.get('with-sparse-read', False))
615 617 if 'sparse-read-density-threshold' in opts:
616 618 self._srdensitythreshold = opts['sparse-read-density-threshold']
617 619 if 'sparse-read-min-gap-size' in opts:
618 620 self._srmingapsize = opts['sparse-read-min-gap-size']
619 621
620 622 if self._chunkcachesize <= 0:
621 623 raise RevlogError(_('revlog chunk cache size %r is not greater '
622 624 'than 0') % self._chunkcachesize)
623 625 elif self._chunkcachesize & (self._chunkcachesize - 1):
624 626 raise RevlogError(_('revlog chunk cache size %r is not a power '
625 627 'of 2') % self._chunkcachesize)
626 628
627 629 indexdata = ''
628 630 self._initempty = True
629 631 try:
630 632 f = self.opener(self.indexfile)
631 633 if (mmapindexthreshold is not None and
632 634 self.opener.fstat(f).st_size >= mmapindexthreshold):
633 635 indexdata = util.buffer(util.mmapread(f))
634 636 else:
635 637 indexdata = f.read()
636 638 f.close()
637 639 if len(indexdata) > 0:
638 640 v = versionformat_unpack(indexdata[:4])[0]
639 641 self._initempty = False
640 642 except IOError as inst:
641 643 if inst.errno != errno.ENOENT:
642 644 raise
643 645
644 646 self.version = v
645 647 self._inline = v & FLAG_INLINE_DATA
646 648 self._generaldelta = v & FLAG_GENERALDELTA
647 649 flags = v & ~0xFFFF
648 650 fmt = v & 0xFFFF
649 651 if fmt == REVLOGV0:
650 652 if flags:
651 653 raise RevlogError(_('unknown flags (%#04x) in version %d '
652 654 'revlog %s') %
653 655 (flags >> 16, fmt, self.indexfile))
654 656 elif fmt == REVLOGV1:
655 657 if flags & ~REVLOGV1_FLAGS:
656 658 raise RevlogError(_('unknown flags (%#04x) in version %d '
657 659 'revlog %s') %
658 660 (flags >> 16, fmt, self.indexfile))
659 661 elif fmt == REVLOGV2:
660 662 if flags & ~REVLOGV2_FLAGS:
661 663 raise RevlogError(_('unknown flags (%#04x) in version %d '
662 664 'revlog %s') %
663 665 (flags >> 16, fmt, self.indexfile))
664 666 else:
665 667 raise RevlogError(_('unknown version (%d) in revlog %s') %
666 668 (fmt, self.indexfile))
667 669
668 670 self.storedeltachains = True
669 671
670 672 self._io = revlogio()
671 673 if self.version == REVLOGV0:
672 674 self._io = revlogoldio()
673 675 try:
674 676 d = self._io.parseindex(indexdata, self._inline)
675 677 except (ValueError, IndexError):
676 678 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
677 679 self.index, nodemap, self._chunkcache = d
678 680 if nodemap is not None:
679 681 self.nodemap = self._nodecache = nodemap
680 682 if not self._chunkcache:
681 683 self._chunkclear()
682 684 # revnum -> (chain-length, sum-delta-length)
683 685 self._chaininfocache = {}
684 686 # revlog header -> revlog compressor
685 687 self._decompressors = {}
686 688
687 689 @util.propertycache
688 690 def _compressor(self):
689 691 return util.compengines[self._compengine].revlogcompressor()
690 692
691 693 def tip(self):
692 694 return self.node(len(self.index) - 2)
693 695 def __contains__(self, rev):
694 696 return 0 <= rev < len(self)
695 697 def __len__(self):
696 698 return len(self.index) - 1
697 699 def __iter__(self):
698 700 return iter(xrange(len(self)))
699 701 def revs(self, start=0, stop=None):
700 702 """iterate over all rev in this revlog (from start to stop)"""
701 703 step = 1
702 704 if stop is not None:
703 705 if start > stop:
704 706 step = -1
705 707 stop += step
706 708 else:
707 709 stop = len(self)
708 710 return xrange(start, stop, step)
709 711
710 712 @util.propertycache
711 713 def nodemap(self):
712 714 self.rev(self.node(0))
713 715 return self._nodecache
714 716
715 717 def hasnode(self, node):
716 718 try:
717 719 self.rev(node)
718 720 return True
719 721 except KeyError:
720 722 return False
721 723
722 724 def candelta(self, baserev, rev):
723 725 """whether two revisions (baserev, rev) can be delta-ed or not"""
724 726 # Disable delta if either rev requires a content-changing flag
725 727 # processor (ex. LFS). This is because such flag processor can alter
726 728 # the rawtext content that the delta will be based on, and two clients
727 729 # could have a same revlog node with different flags (i.e. different
728 730 # rawtext contents) and the delta could be incompatible.
729 731 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
730 732 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
731 733 return False
732 734 return True
733 735
734 736 def clearcaches(self):
735 737 self._cache = None
736 738 self._chainbasecache.clear()
737 739 self._chunkcache = (0, '')
738 740 self._pcache = {}
739 741
740 742 try:
741 743 self._nodecache.clearcaches()
742 744 except AttributeError:
743 745 self._nodecache = {nullid: nullrev}
744 746 self._nodepos = None
745 747
746 748 def rev(self, node):
747 749 try:
748 750 return self._nodecache[node]
749 751 except TypeError:
750 752 raise
751 753 except RevlogError:
752 754 # parsers.c radix tree lookup failed
753 755 if node == wdirid:
754 756 raise error.WdirUnsupported
755 757 raise LookupError(node, self.indexfile, _('no node'))
756 758 except KeyError:
757 759 # pure python cache lookup failed
758 760 n = self._nodecache
759 761 i = self.index
760 762 p = self._nodepos
761 763 if p is None:
762 764 p = len(i) - 2
763 765 for r in xrange(p, -1, -1):
764 766 v = i[r][7]
765 767 n[v] = r
766 768 if v == node:
767 769 self._nodepos = r - 1
768 770 return r
769 771 if node == wdirid:
770 772 raise error.WdirUnsupported
771 773 raise LookupError(node, self.indexfile, _('no node'))
772 774
773 775 # Accessors for index entries.
774 776
775 777 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
776 778 # are flags.
777 779 def start(self, rev):
778 780 return int(self.index[rev][0] >> 16)
779 781
780 782 def flags(self, rev):
781 783 return self.index[rev][0] & 0xFFFF
782 784
783 785 def length(self, rev):
784 786 return self.index[rev][1]
785 787
786 788 def rawsize(self, rev):
787 789 """return the length of the uncompressed text for a given revision"""
788 790 l = self.index[rev][2]
789 791 if l >= 0:
790 792 return l
791 793
792 794 t = self.revision(rev, raw=True)
793 795 return len(t)
794 796
795 797 def size(self, rev):
796 798 """length of non-raw text (processed by a "read" flag processor)"""
797 799 # fast path: if no "read" flag processor could change the content,
798 800 # size is rawsize. note: ELLIPSIS is known to not change the content.
799 801 flags = self.flags(rev)
800 802 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
801 803 return self.rawsize(rev)
802 804
803 805 return len(self.revision(rev, raw=False))
804 806
805 807 def chainbase(self, rev):
806 808 base = self._chainbasecache.get(rev)
807 809 if base is not None:
808 810 return base
809 811
810 812 index = self.index
811 813 base = index[rev][3]
812 814 while base != rev:
813 815 rev = base
814 816 base = index[rev][3]
815 817
816 818 self._chainbasecache[rev] = base
817 819 return base
818 820
819 821 def linkrev(self, rev):
820 822 return self.index[rev][4]
821 823
822 824 def parentrevs(self, rev):
823 825 try:
824 826 entry = self.index[rev]
825 827 except IndexError:
826 828 if rev == wdirrev:
827 829 raise error.WdirUnsupported
828 830 raise
829 831
830 832 return entry[5], entry[6]
831 833
832 834 def node(self, rev):
833 835 try:
834 836 return self.index[rev][7]
835 837 except IndexError:
836 838 if rev == wdirrev:
837 839 raise error.WdirUnsupported
838 840 raise
839 841
840 842 # Derived from index values.
841 843
842 844 def end(self, rev):
843 845 return self.start(rev) + self.length(rev)
844 846
845 847 def parents(self, node):
846 848 i = self.index
847 849 d = i[self.rev(node)]
848 850 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
849 851
850 852 def chainlen(self, rev):
851 853 return self._chaininfo(rev)[0]
852 854
853 855 def _chaininfo(self, rev):
854 856 chaininfocache = self._chaininfocache
855 857 if rev in chaininfocache:
856 858 return chaininfocache[rev]
857 859 index = self.index
858 860 generaldelta = self._generaldelta
859 861 iterrev = rev
860 862 e = index[iterrev]
861 863 clen = 0
862 864 compresseddeltalen = 0
863 865 while iterrev != e[3]:
864 866 clen += 1
865 867 compresseddeltalen += e[1]
866 868 if generaldelta:
867 869 iterrev = e[3]
868 870 else:
869 871 iterrev -= 1
870 872 if iterrev in chaininfocache:
871 873 t = chaininfocache[iterrev]
872 874 clen += t[0]
873 875 compresseddeltalen += t[1]
874 876 break
875 877 e = index[iterrev]
876 878 else:
877 879 # Add text length of base since decompressing that also takes
878 880 # work. For cache hits the length is already included.
879 881 compresseddeltalen += e[1]
880 882 r = (clen, compresseddeltalen)
881 883 chaininfocache[rev] = r
882 884 return r
883 885
884 886 def _deltachain(self, rev, stoprev=None):
885 887 """Obtain the delta chain for a revision.
886 888
887 889 ``stoprev`` specifies a revision to stop at. If not specified, we
888 890 stop at the base of the chain.
889 891
890 892 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
891 893 revs in ascending order and ``stopped`` is a bool indicating whether
892 894 ``stoprev`` was hit.
893 895 """
894 896 # Try C implementation.
895 897 try:
896 898 return self.index.deltachain(rev, stoprev, self._generaldelta)
897 899 except AttributeError:
898 900 pass
899 901
900 902 chain = []
901 903
902 904 # Alias to prevent attribute lookup in tight loop.
903 905 index = self.index
904 906 generaldelta = self._generaldelta
905 907
906 908 iterrev = rev
907 909 e = index[iterrev]
908 910 while iterrev != e[3] and iterrev != stoprev:
909 911 chain.append(iterrev)
910 912 if generaldelta:
911 913 iterrev = e[3]
912 914 else:
913 915 iterrev -= 1
914 916 e = index[iterrev]
915 917
916 918 if iterrev == stoprev:
917 919 stopped = True
918 920 else:
919 921 chain.append(iterrev)
920 922 stopped = False
921 923
922 924 chain.reverse()
923 925 return chain, stopped
924 926
925 927 def ancestors(self, revs, stoprev=0, inclusive=False):
926 928 """Generate the ancestors of 'revs' in reverse topological order.
927 929 Does not generate revs lower than stoprev.
928 930
929 931 See the documentation for ancestor.lazyancestors for more details."""
930 932
931 933 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
932 934 inclusive=inclusive)
933 935
934 936 def descendants(self, revs):
935 937 """Generate the descendants of 'revs' in revision order.
936 938
937 939 Yield a sequence of revision numbers starting with a child of
938 940 some rev in revs, i.e., each revision is *not* considered a
939 941 descendant of itself. Results are ordered by revision number (a
940 942 topological sort)."""
941 943 first = min(revs)
942 944 if first == nullrev:
943 945 for i in self:
944 946 yield i
945 947 return
946 948
947 949 seen = set(revs)
948 950 for i in self.revs(start=first + 1):
949 951 for x in self.parentrevs(i):
950 952 if x != nullrev and x in seen:
951 953 seen.add(i)
952 954 yield i
953 955 break
954 956
955 957 def findcommonmissing(self, common=None, heads=None):
956 958 """Return a tuple of the ancestors of common and the ancestors of heads
957 959 that are not ancestors of common. In revset terminology, we return the
958 960 tuple:
959 961
960 962 ::common, (::heads) - (::common)
961 963
962 964 The list is sorted by revision number, meaning it is
963 965 topologically sorted.
964 966
965 967 'heads' and 'common' are both lists of node IDs. If heads is
966 968 not supplied, uses all of the revlog's heads. If common is not
967 969 supplied, uses nullid."""
968 970 if common is None:
969 971 common = [nullid]
970 972 if heads is None:
971 973 heads = self.heads()
972 974
973 975 common = [self.rev(n) for n in common]
974 976 heads = [self.rev(n) for n in heads]
975 977
976 978 # we want the ancestors, but inclusive
977 979 class lazyset(object):
978 980 def __init__(self, lazyvalues):
979 981 self.addedvalues = set()
980 982 self.lazyvalues = lazyvalues
981 983
982 984 def __contains__(self, value):
983 985 return value in self.addedvalues or value in self.lazyvalues
984 986
985 987 def __iter__(self):
986 988 added = self.addedvalues
987 989 for r in added:
988 990 yield r
989 991 for r in self.lazyvalues:
990 992 if not r in added:
991 993 yield r
992 994
993 995 def add(self, value):
994 996 self.addedvalues.add(value)
995 997
996 998 def update(self, values):
997 999 self.addedvalues.update(values)
998 1000
999 1001 has = lazyset(self.ancestors(common))
1000 1002 has.add(nullrev)
1001 1003 has.update(common)
1002 1004
1003 1005 # take all ancestors from heads that aren't in has
1004 1006 missing = set()
1005 1007 visit = collections.deque(r for r in heads if r not in has)
1006 1008 while visit:
1007 1009 r = visit.popleft()
1008 1010 if r in missing:
1009 1011 continue
1010 1012 else:
1011 1013 missing.add(r)
1012 1014 for p in self.parentrevs(r):
1013 1015 if p not in has:
1014 1016 visit.append(p)
1015 1017 missing = list(missing)
1016 1018 missing.sort()
1017 1019 return has, [self.node(miss) for miss in missing]
1018 1020
1019 1021 def incrementalmissingrevs(self, common=None):
1020 1022 """Return an object that can be used to incrementally compute the
1021 1023 revision numbers of the ancestors of arbitrary sets that are not
1022 1024 ancestors of common. This is an ancestor.incrementalmissingancestors
1023 1025 object.
1024 1026
1025 1027 'common' is a list of revision numbers. If common is not supplied, uses
1026 1028 nullrev.
1027 1029 """
1028 1030 if common is None:
1029 1031 common = [nullrev]
1030 1032
1031 1033 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1032 1034
1033 1035 def findmissingrevs(self, common=None, heads=None):
1034 1036 """Return the revision numbers of the ancestors of heads that
1035 1037 are not ancestors of common.
1036 1038
1037 1039 More specifically, return a list of revision numbers corresponding to
1038 1040 nodes N such that every N satisfies the following constraints:
1039 1041
1040 1042 1. N is an ancestor of some node in 'heads'
1041 1043 2. N is not an ancestor of any node in 'common'
1042 1044
1043 1045 The list is sorted by revision number, meaning it is
1044 1046 topologically sorted.
1045 1047
1046 1048 'heads' and 'common' are both lists of revision numbers. If heads is
1047 1049 not supplied, uses all of the revlog's heads. If common is not
1048 1050 supplied, uses nullid."""
1049 1051 if common is None:
1050 1052 common = [nullrev]
1051 1053 if heads is None:
1052 1054 heads = self.headrevs()
1053 1055
1054 1056 inc = self.incrementalmissingrevs(common=common)
1055 1057 return inc.missingancestors(heads)
1056 1058
1057 1059 def findmissing(self, common=None, heads=None):
1058 1060 """Return the ancestors of heads that are not ancestors of common.
1059 1061
1060 1062 More specifically, return a list of nodes N such that every N
1061 1063 satisfies the following constraints:
1062 1064
1063 1065 1. N is an ancestor of some node in 'heads'
1064 1066 2. N is not an ancestor of any node in 'common'
1065 1067
1066 1068 The list is sorted by revision number, meaning it is
1067 1069 topologically sorted.
1068 1070
1069 1071 'heads' and 'common' are both lists of node IDs. If heads is
1070 1072 not supplied, uses all of the revlog's heads. If common is not
1071 1073 supplied, uses nullid."""
1072 1074 if common is None:
1073 1075 common = [nullid]
1074 1076 if heads is None:
1075 1077 heads = self.heads()
1076 1078
1077 1079 common = [self.rev(n) for n in common]
1078 1080 heads = [self.rev(n) for n in heads]
1079 1081
1080 1082 inc = self.incrementalmissingrevs(common=common)
1081 1083 return [self.node(r) for r in inc.missingancestors(heads)]
1082 1084
1083 1085 def nodesbetween(self, roots=None, heads=None):
1084 1086 """Return a topological path from 'roots' to 'heads'.
1085 1087
1086 1088 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1087 1089 topologically sorted list of all nodes N that satisfy both of
1088 1090 these constraints:
1089 1091
1090 1092 1. N is a descendant of some node in 'roots'
1091 1093 2. N is an ancestor of some node in 'heads'
1092 1094
1093 1095 Every node is considered to be both a descendant and an ancestor
1094 1096 of itself, so every reachable node in 'roots' and 'heads' will be
1095 1097 included in 'nodes'.
1096 1098
1097 1099 'outroots' is the list of reachable nodes in 'roots', i.e., the
1098 1100 subset of 'roots' that is returned in 'nodes'. Likewise,
1099 1101 'outheads' is the subset of 'heads' that is also in 'nodes'.
1100 1102
1101 1103 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1102 1104 unspecified, uses nullid as the only root. If 'heads' is
1103 1105 unspecified, uses list of all of the revlog's heads."""
1104 1106 nonodes = ([], [], [])
1105 1107 if roots is not None:
1106 1108 roots = list(roots)
1107 1109 if not roots:
1108 1110 return nonodes
1109 1111 lowestrev = min([self.rev(n) for n in roots])
1110 1112 else:
1111 1113 roots = [nullid] # Everybody's a descendant of nullid
1112 1114 lowestrev = nullrev
1113 1115 if (lowestrev == nullrev) and (heads is None):
1114 1116 # We want _all_ the nodes!
1115 1117 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1116 1118 if heads is None:
1117 1119 # All nodes are ancestors, so the latest ancestor is the last
1118 1120 # node.
1119 1121 highestrev = len(self) - 1
1120 1122 # Set ancestors to None to signal that every node is an ancestor.
1121 1123 ancestors = None
1122 1124 # Set heads to an empty dictionary for later discovery of heads
1123 1125 heads = {}
1124 1126 else:
1125 1127 heads = list(heads)
1126 1128 if not heads:
1127 1129 return nonodes
1128 1130 ancestors = set()
1129 1131 # Turn heads into a dictionary so we can remove 'fake' heads.
1130 1132 # Also, later we will be using it to filter out the heads we can't
1131 1133 # find from roots.
1132 1134 heads = dict.fromkeys(heads, False)
1133 1135 # Start at the top and keep marking parents until we're done.
1134 1136 nodestotag = set(heads)
1135 1137 # Remember where the top was so we can use it as a limit later.
1136 1138 highestrev = max([self.rev(n) for n in nodestotag])
1137 1139 while nodestotag:
1138 1140 # grab a node to tag
1139 1141 n = nodestotag.pop()
1140 1142 # Never tag nullid
1141 1143 if n == nullid:
1142 1144 continue
1143 1145 # A node's revision number represents its place in a
1144 1146 # topologically sorted list of nodes.
1145 1147 r = self.rev(n)
1146 1148 if r >= lowestrev:
1147 1149 if n not in ancestors:
1148 1150 # If we are possibly a descendant of one of the roots
1149 1151 # and we haven't already been marked as an ancestor
1150 1152 ancestors.add(n) # Mark as ancestor
1151 1153 # Add non-nullid parents to list of nodes to tag.
1152 1154 nodestotag.update([p for p in self.parents(n) if
1153 1155 p != nullid])
1154 1156 elif n in heads: # We've seen it before, is it a fake head?
1155 1157 # So it is, real heads should not be the ancestors of
1156 1158 # any other heads.
1157 1159 heads.pop(n)
1158 1160 if not ancestors:
1159 1161 return nonodes
1160 1162 # Now that we have our set of ancestors, we want to remove any
1161 1163 # roots that are not ancestors.
1162 1164
1163 1165 # If one of the roots was nullid, everything is included anyway.
1164 1166 if lowestrev > nullrev:
1165 1167 # But, since we weren't, let's recompute the lowest rev to not
1166 1168 # include roots that aren't ancestors.
1167 1169
1168 1170 # Filter out roots that aren't ancestors of heads
1169 1171 roots = [root for root in roots if root in ancestors]
1170 1172 # Recompute the lowest revision
1171 1173 if roots:
1172 1174 lowestrev = min([self.rev(root) for root in roots])
1173 1175 else:
1174 1176 # No more roots? Return empty list
1175 1177 return nonodes
1176 1178 else:
1177 1179 # We are descending from nullid, and don't need to care about
1178 1180 # any other roots.
1179 1181 lowestrev = nullrev
1180 1182 roots = [nullid]
1181 1183 # Transform our roots list into a set.
1182 1184 descendants = set(roots)
1183 1185 # Also, keep the original roots so we can filter out roots that aren't
1184 1186 # 'real' roots (i.e. are descended from other roots).
1185 1187 roots = descendants.copy()
1186 1188 # Our topologically sorted list of output nodes.
1187 1189 orderedout = []
1188 1190 # Don't start at nullid since we don't want nullid in our output list,
1189 1191 # and if nullid shows up in descendants, empty parents will look like
1190 1192 # they're descendants.
1191 1193 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1192 1194 n = self.node(r)
1193 1195 isdescendant = False
1194 1196 if lowestrev == nullrev: # Everybody is a descendant of nullid
1195 1197 isdescendant = True
1196 1198 elif n in descendants:
1197 1199 # n is already a descendant
1198 1200 isdescendant = True
1199 1201 # This check only needs to be done here because all the roots
1200 1202 # will start being marked is descendants before the loop.
1201 1203 if n in roots:
1202 1204 # If n was a root, check if it's a 'real' root.
1203 1205 p = tuple(self.parents(n))
1204 1206 # If any of its parents are descendants, it's not a root.
1205 1207 if (p[0] in descendants) or (p[1] in descendants):
1206 1208 roots.remove(n)
1207 1209 else:
1208 1210 p = tuple(self.parents(n))
1209 1211 # A node is a descendant if either of its parents are
1210 1212 # descendants. (We seeded the dependents list with the roots
1211 1213 # up there, remember?)
1212 1214 if (p[0] in descendants) or (p[1] in descendants):
1213 1215 descendants.add(n)
1214 1216 isdescendant = True
1215 1217 if isdescendant and ((ancestors is None) or (n in ancestors)):
1216 1218 # Only include nodes that are both descendants and ancestors.
1217 1219 orderedout.append(n)
1218 1220 if (ancestors is not None) and (n in heads):
1219 1221 # We're trying to figure out which heads are reachable
1220 1222 # from roots.
1221 1223 # Mark this head as having been reached
1222 1224 heads[n] = True
1223 1225 elif ancestors is None:
1224 1226 # Otherwise, we're trying to discover the heads.
1225 1227 # Assume this is a head because if it isn't, the next step
1226 1228 # will eventually remove it.
1227 1229 heads[n] = True
1228 1230 # But, obviously its parents aren't.
1229 1231 for p in self.parents(n):
1230 1232 heads.pop(p, None)
1231 1233 heads = [head for head, flag in heads.iteritems() if flag]
1232 1234 roots = list(roots)
1233 1235 assert orderedout
1234 1236 assert roots
1235 1237 assert heads
1236 1238 return (orderedout, roots, heads)
1237 1239
1238 1240 def headrevs(self):
1239 1241 try:
1240 1242 return self.index.headrevs()
1241 1243 except AttributeError:
1242 1244 return self._headrevs()
1243 1245
1244 1246 def computephases(self, roots):
1245 1247 return self.index.computephasesmapsets(roots)
1246 1248
1247 1249 def _headrevs(self):
1248 1250 count = len(self)
1249 1251 if not count:
1250 1252 return [nullrev]
1251 1253 # we won't iter over filtered rev so nobody is a head at start
1252 1254 ishead = [0] * (count + 1)
1253 1255 index = self.index
1254 1256 for r in self:
1255 1257 ishead[r] = 1 # I may be an head
1256 1258 e = index[r]
1257 1259 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1258 1260 return [r for r, val in enumerate(ishead) if val]
1259 1261
1260 1262 def heads(self, start=None, stop=None):
1261 1263 """return the list of all nodes that have no children
1262 1264
1263 1265 if start is specified, only heads that are descendants of
1264 1266 start will be returned
1265 1267 if stop is specified, it will consider all the revs from stop
1266 1268 as if they had no children
1267 1269 """
1268 1270 if start is None and stop is None:
1269 1271 if not len(self):
1270 1272 return [nullid]
1271 1273 return [self.node(r) for r in self.headrevs()]
1272 1274
1273 1275 if start is None:
1274 1276 start = nullid
1275 1277 if stop is None:
1276 1278 stop = []
1277 1279 stoprevs = set([self.rev(n) for n in stop])
1278 1280 startrev = self.rev(start)
1279 1281 reachable = {startrev}
1280 1282 heads = {startrev}
1281 1283
1282 1284 parentrevs = self.parentrevs
1283 1285 for r in self.revs(start=startrev + 1):
1284 1286 for p in parentrevs(r):
1285 1287 if p in reachable:
1286 1288 if r not in stoprevs:
1287 1289 reachable.add(r)
1288 1290 heads.add(r)
1289 1291 if p in heads and p not in stoprevs:
1290 1292 heads.remove(p)
1291 1293
1292 1294 return [self.node(r) for r in heads]
1293 1295
1294 1296 def children(self, node):
1295 1297 """find the children of a given node"""
1296 1298 c = []
1297 1299 p = self.rev(node)
1298 1300 for r in self.revs(start=p + 1):
1299 1301 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1300 1302 if prevs:
1301 1303 for pr in prevs:
1302 1304 if pr == p:
1303 1305 c.append(self.node(r))
1304 1306 elif p == nullrev:
1305 1307 c.append(self.node(r))
1306 1308 return c
1307 1309
1308 1310 def descendant(self, start, end):
1309 1311 if start == nullrev:
1310 1312 return True
1311 1313 for i in self.descendants([start]):
1312 1314 if i == end:
1313 1315 return True
1314 1316 elif i > end:
1315 1317 break
1316 1318 return False
1317 1319
1318 1320 def commonancestorsheads(self, a, b):
1319 1321 """calculate all the heads of the common ancestors of nodes a and b"""
1320 1322 a, b = self.rev(a), self.rev(b)
1321 1323 try:
1322 1324 ancs = self.index.commonancestorsheads(a, b)
1323 1325 except (AttributeError, OverflowError): # C implementation failed
1324 1326 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1325 1327 return pycompat.maplist(self.node, ancs)
1326 1328
1327 1329 def isancestor(self, a, b):
1328 1330 """return True if node a is an ancestor of node b
1329 1331
1330 1332 The implementation of this is trivial but the use of
1331 1333 commonancestorsheads is not."""
1332 1334 return a in self.commonancestorsheads(a, b)
1333 1335
1334 1336 def ancestor(self, a, b):
1335 1337 """calculate the "best" common ancestor of nodes a and b"""
1336 1338
1337 1339 a, b = self.rev(a), self.rev(b)
1338 1340 try:
1339 1341 ancs = self.index.ancestors(a, b)
1340 1342 except (AttributeError, OverflowError):
1341 1343 ancs = ancestor.ancestors(self.parentrevs, a, b)
1342 1344 if ancs:
1343 1345 # choose a consistent winner when there's a tie
1344 1346 return min(map(self.node, ancs))
1345 1347 return nullid
1346 1348
1347 1349 def _match(self, id):
1348 1350 if isinstance(id, int):
1349 1351 # rev
1350 1352 return self.node(id)
1351 1353 if len(id) == 20:
1352 1354 # possibly a binary node
1353 1355 # odds of a binary node being all hex in ASCII are 1 in 10**25
1354 1356 try:
1355 1357 node = id
1356 1358 self.rev(node) # quick search the index
1357 1359 return node
1358 1360 except LookupError:
1359 1361 pass # may be partial hex id
1360 1362 try:
1361 1363 # str(rev)
1362 1364 rev = int(id)
1363 1365 if str(rev) != id:
1364 1366 raise ValueError
1365 1367 if rev < 0:
1366 1368 rev = len(self) + rev
1367 1369 if rev < 0 or rev >= len(self):
1368 1370 raise ValueError
1369 1371 return self.node(rev)
1370 1372 except (ValueError, OverflowError):
1371 1373 pass
1372 1374 if len(id) == 40:
1373 1375 try:
1374 1376 # a full hex nodeid?
1375 1377 node = bin(id)
1376 1378 self.rev(node)
1377 1379 return node
1378 1380 except (TypeError, LookupError):
1379 1381 pass
1380 1382
1381 1383 def _partialmatch(self, id):
1382 1384 maybewdir = wdirhex.startswith(id)
1383 1385 try:
1384 1386 partial = self.index.partialmatch(id)
1385 1387 if partial and self.hasnode(partial):
1386 1388 if maybewdir:
1387 1389 # single 'ff...' match in radix tree, ambiguous with wdir
1388 1390 raise RevlogError
1389 1391 return partial
1390 1392 if maybewdir:
1391 1393 # no 'ff...' match in radix tree, wdir identified
1392 1394 raise error.WdirUnsupported
1393 1395 return None
1394 1396 except RevlogError:
1395 1397 # parsers.c radix tree lookup gave multiple matches
1396 1398 # fast path: for unfiltered changelog, radix tree is accurate
1397 1399 if not getattr(self, 'filteredrevs', None):
1398 1400 raise LookupError(id, self.indexfile,
1399 1401 _('ambiguous identifier'))
1400 1402 # fall through to slow path that filters hidden revisions
1401 1403 except (AttributeError, ValueError):
1402 1404 # we are pure python, or key was too short to search radix tree
1403 1405 pass
1404 1406
1405 1407 if id in self._pcache:
1406 1408 return self._pcache[id]
1407 1409
1408 1410 if len(id) < 40:
1409 1411 try:
1410 1412 # hex(node)[:...]
1411 1413 l = len(id) // 2 # grab an even number of digits
1412 1414 prefix = bin(id[:l * 2])
1413 1415 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1414 1416 nl = [n for n in nl if hex(n).startswith(id) and
1415 1417 self.hasnode(n)]
1416 1418 if len(nl) > 0:
1417 1419 if len(nl) == 1 and not maybewdir:
1418 1420 self._pcache[id] = nl[0]
1419 1421 return nl[0]
1420 1422 raise LookupError(id, self.indexfile,
1421 1423 _('ambiguous identifier'))
1422 1424 if maybewdir:
1423 1425 raise error.WdirUnsupported
1424 1426 return None
1425 1427 except (TypeError, binascii.Error):
1426 1428 pass
1427 1429
1428 1430 def lookup(self, id):
1429 1431 """locate a node based on:
1430 1432 - revision number or str(revision number)
1431 1433 - nodeid or subset of hex nodeid
1432 1434 """
1433 1435 n = self._match(id)
1434 1436 if n is not None:
1435 1437 return n
1436 1438 n = self._partialmatch(id)
1437 1439 if n:
1438 1440 return n
1439 1441
1440 1442 raise LookupError(id, self.indexfile, _('no match found'))
1441 1443
1442 1444 def shortest(self, hexnode, minlength=1):
1443 1445 """Find the shortest unambiguous prefix that matches hexnode."""
1444 1446 def isvalid(test):
1445 1447 try:
1446 1448 if self._partialmatch(test) is None:
1447 1449 return False
1448 1450
1449 1451 try:
1450 1452 i = int(test)
1451 1453 # if we are a pure int, then starting with zero will not be
1452 1454 # confused as a rev; or, obviously, if the int is larger
1453 1455 # than the value of the tip rev
1454 1456 if test[0] == '0' or i > len(self):
1455 1457 return True
1456 1458 return False
1457 1459 except ValueError:
1458 1460 return True
1459 1461 except error.RevlogError:
1460 1462 return False
1461 1463 except error.WdirUnsupported:
1462 1464 # single 'ff...' match
1463 1465 return True
1464 1466
1465 1467 shortest = hexnode
1466 1468 startlength = max(6, minlength)
1467 1469 length = startlength
1468 1470 while True:
1469 1471 test = hexnode[:length]
1470 1472 if isvalid(test):
1471 1473 shortest = test
1472 1474 if length == minlength or length > startlength:
1473 1475 return shortest
1474 1476 length -= 1
1475 1477 else:
1476 1478 length += 1
1477 1479 if len(shortest) <= length:
1478 1480 return shortest
1479 1481
1480 1482 def cmp(self, node, text):
1481 1483 """compare text with a given file revision
1482 1484
1483 1485 returns True if text is different than what is stored.
1484 1486 """
1485 1487 p1, p2 = self.parents(node)
1486 1488 return hash(text, p1, p2) != node
1487 1489
1488 1490 def _cachesegment(self, offset, data):
1489 1491 """Add a segment to the revlog cache.
1490 1492
1491 1493 Accepts an absolute offset and the data that is at that location.
1492 1494 """
1493 1495 o, d = self._chunkcache
1494 1496 # try to add to existing cache
1495 1497 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1496 1498 self._chunkcache = o, d + data
1497 1499 else:
1498 1500 self._chunkcache = offset, data
1499 1501
1500 1502 def _readsegment(self, offset, length, df=None):
1501 1503 """Load a segment of raw data from the revlog.
1502 1504
1503 1505 Accepts an absolute offset, length to read, and an optional existing
1504 1506 file handle to read from.
1505 1507
1506 1508 If an existing file handle is passed, it will be seeked and the
1507 1509 original seek position will NOT be restored.
1508 1510
1509 1511 Returns a str or buffer of raw byte data.
1510 1512 """
1511 1513 if df is not None:
1512 1514 closehandle = False
1513 1515 else:
1514 1516 if self._inline:
1515 1517 df = self.opener(self.indexfile)
1516 1518 else:
1517 1519 df = self.opener(self.datafile)
1518 1520 closehandle = True
1519 1521
1520 1522 # Cache data both forward and backward around the requested
1521 1523 # data, in a fixed size window. This helps speed up operations
1522 1524 # involving reading the revlog backwards.
1523 1525 cachesize = self._chunkcachesize
1524 1526 realoffset = offset & ~(cachesize - 1)
1525 1527 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1526 1528 - realoffset)
1527 1529 df.seek(realoffset)
1528 1530 d = df.read(reallength)
1529 1531 if closehandle:
1530 1532 df.close()
1531 1533 self._cachesegment(realoffset, d)
1532 1534 if offset != realoffset or reallength != length:
1533 1535 return util.buffer(d, offset - realoffset, length)
1534 1536 return d
1535 1537
1536 1538 def _getsegment(self, offset, length, df=None):
1537 1539 """Obtain a segment of raw data from the revlog.
1538 1540
1539 1541 Accepts an absolute offset, length of bytes to obtain, and an
1540 1542 optional file handle to the already-opened revlog. If the file
1541 1543 handle is used, it's original seek position will not be preserved.
1542 1544
1543 1545 Requests for data may be returned from a cache.
1544 1546
1545 1547 Returns a str or a buffer instance of raw byte data.
1546 1548 """
1547 1549 o, d = self._chunkcache
1548 1550 l = len(d)
1549 1551
1550 1552 # is it in the cache?
1551 1553 cachestart = offset - o
1552 1554 cacheend = cachestart + length
1553 1555 if cachestart >= 0 and cacheend <= l:
1554 1556 if cachestart == 0 and cacheend == l:
1555 1557 return d # avoid a copy
1556 1558 return util.buffer(d, cachestart, cacheend - cachestart)
1557 1559
1558 1560 return self._readsegment(offset, length, df=df)
1559 1561
1560 1562 def _getsegmentforrevs(self, startrev, endrev, df=None):
1561 1563 """Obtain a segment of raw data corresponding to a range of revisions.
1562 1564
1563 1565 Accepts the start and end revisions and an optional already-open
1564 1566 file handle to be used for reading. If the file handle is read, its
1565 1567 seek position will not be preserved.
1566 1568
1567 1569 Requests for data may be satisfied by a cache.
1568 1570
1569 1571 Returns a 2-tuple of (offset, data) for the requested range of
1570 1572 revisions. Offset is the integer offset from the beginning of the
1571 1573 revlog and data is a str or buffer of the raw byte data.
1572 1574
1573 1575 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1574 1576 to determine where each revision's data begins and ends.
1575 1577 """
1576 1578 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1577 1579 # (functions are expensive).
1578 1580 index = self.index
1579 1581 istart = index[startrev]
1580 1582 start = int(istart[0] >> 16)
1581 1583 if startrev == endrev:
1582 1584 end = start + istart[1]
1583 1585 else:
1584 1586 iend = index[endrev]
1585 1587 end = int(iend[0] >> 16) + iend[1]
1586 1588
1587 1589 if self._inline:
1588 1590 start += (startrev + 1) * self._io.size
1589 1591 end += (endrev + 1) * self._io.size
1590 1592 length = end - start
1591 1593
1592 1594 return start, self._getsegment(start, length, df=df)
1593 1595
1594 1596 def _chunk(self, rev, df=None):
1595 1597 """Obtain a single decompressed chunk for a revision.
1596 1598
1597 1599 Accepts an integer revision and an optional already-open file handle
1598 1600 to be used for reading. If used, the seek position of the file will not
1599 1601 be preserved.
1600 1602
1601 1603 Returns a str holding uncompressed data for the requested revision.
1602 1604 """
1603 1605 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1604 1606
1605 1607 def _chunks(self, revs, df=None):
1606 1608 """Obtain decompressed chunks for the specified revisions.
1607 1609
1608 1610 Accepts an iterable of numeric revisions that are assumed to be in
1609 1611 ascending order. Also accepts an optional already-open file handle
1610 1612 to be used for reading. If used, the seek position of the file will
1611 1613 not be preserved.
1612 1614
1613 1615 This function is similar to calling ``self._chunk()`` multiple times,
1614 1616 but is faster.
1615 1617
1616 1618 Returns a list with decompressed data for each requested revision.
1617 1619 """
1618 1620 if not revs:
1619 1621 return []
1620 1622 start = self.start
1621 1623 length = self.length
1622 1624 inline = self._inline
1623 1625 iosize = self._io.size
1624 1626 buffer = util.buffer
1625 1627
1626 1628 l = []
1627 1629 ladd = l.append
1628 1630
1629 1631 if not self._withsparseread:
1630 1632 slicedchunks = (revs,)
1631 1633 else:
1632 1634 slicedchunks = _slicechunk(self, revs)
1633 1635
1634 1636 for revschunk in slicedchunks:
1635 1637 firstrev = revschunk[0]
1636 1638 # Skip trailing revisions with empty diff
1637 1639 for lastrev in revschunk[::-1]:
1638 1640 if length(lastrev) != 0:
1639 1641 break
1640 1642
1641 1643 try:
1642 1644 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1643 1645 except OverflowError:
1644 1646 # issue4215 - we can't cache a run of chunks greater than
1645 1647 # 2G on Windows
1646 1648 return [self._chunk(rev, df=df) for rev in revschunk]
1647 1649
1648 1650 decomp = self.decompress
1649 1651 for rev in revschunk:
1650 1652 chunkstart = start(rev)
1651 1653 if inline:
1652 1654 chunkstart += (rev + 1) * iosize
1653 1655 chunklength = length(rev)
1654 1656 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1655 1657
1656 1658 return l
1657 1659
1658 1660 def _chunkclear(self):
1659 1661 """Clear the raw chunk cache."""
1660 1662 self._chunkcache = (0, '')
1661 1663
1662 1664 def deltaparent(self, rev):
1663 1665 """return deltaparent of the given revision"""
1664 1666 base = self.index[rev][3]
1665 1667 if base == rev:
1666 1668 return nullrev
1667 1669 elif self._generaldelta:
1668 1670 return base
1669 1671 else:
1670 1672 return rev - 1
1671 1673
1672 1674 def revdiff(self, rev1, rev2):
1673 1675 """return or calculate a delta between two revisions
1674 1676
1675 1677 The delta calculated is in binary form and is intended to be written to
1676 1678 revlog data directly. So this function needs raw revision data.
1677 1679 """
1678 1680 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1679 1681 return bytes(self._chunk(rev2))
1680 1682
1681 1683 return mdiff.textdiff(self.revision(rev1, raw=True),
1682 1684 self.revision(rev2, raw=True))
1683 1685
1684 1686 def revision(self, nodeorrev, _df=None, raw=False):
1685 1687 """return an uncompressed revision of a given node or revision
1686 1688 number.
1687 1689
1688 1690 _df - an existing file handle to read from. (internal-only)
1689 1691 raw - an optional argument specifying if the revision data is to be
1690 1692 treated as raw data when applying flag transforms. 'raw' should be set
1691 1693 to True when generating changegroups or in debug commands.
1692 1694 """
1693 1695 if isinstance(nodeorrev, int):
1694 1696 rev = nodeorrev
1695 1697 node = self.node(rev)
1696 1698 else:
1697 1699 node = nodeorrev
1698 1700 rev = None
1699 1701
1700 1702 cachedrev = None
1701 1703 flags = None
1702 1704 rawtext = None
1703 1705 if node == nullid:
1704 1706 return ""
1705 1707 if self._cache:
1706 1708 if self._cache[0] == node:
1707 1709 # _cache only stores rawtext
1708 1710 if raw:
1709 1711 return self._cache[2]
1710 1712 # duplicated, but good for perf
1711 1713 if rev is None:
1712 1714 rev = self.rev(node)
1713 1715 if flags is None:
1714 1716 flags = self.flags(rev)
1715 1717 # no extra flags set, no flag processor runs, text = rawtext
1716 1718 if flags == REVIDX_DEFAULT_FLAGS:
1717 1719 return self._cache[2]
1718 1720 # rawtext is reusable. need to run flag processor
1719 1721 rawtext = self._cache[2]
1720 1722
1721 1723 cachedrev = self._cache[1]
1722 1724
1723 1725 # look up what we need to read
1724 1726 if rawtext is None:
1725 1727 if rev is None:
1726 1728 rev = self.rev(node)
1727 1729
1728 1730 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1729 1731 if stopped:
1730 1732 rawtext = self._cache[2]
1731 1733
1732 1734 # drop cache to save memory
1733 1735 self._cache = None
1734 1736
1735 1737 bins = self._chunks(chain, df=_df)
1736 1738 if rawtext is None:
1737 1739 rawtext = bytes(bins[0])
1738 1740 bins = bins[1:]
1739 1741
1740 1742 rawtext = mdiff.patches(rawtext, bins)
1741 1743 self._cache = (node, rev, rawtext)
1742 1744
1743 1745 if flags is None:
1744 1746 if rev is None:
1745 1747 rev = self.rev(node)
1746 1748 flags = self.flags(rev)
1747 1749
1748 1750 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1749 1751 if validatehash:
1750 1752 self.checkhash(text, node, rev=rev)
1751 1753
1752 1754 return text
1753 1755
1754 1756 def hash(self, text, p1, p2):
1755 1757 """Compute a node hash.
1756 1758
1757 1759 Available as a function so that subclasses can replace the hash
1758 1760 as needed.
1759 1761 """
1760 1762 return hash(text, p1, p2)
1761 1763
1762 1764 def _processflags(self, text, flags, operation, raw=False):
1763 1765 """Inspect revision data flags and applies transforms defined by
1764 1766 registered flag processors.
1765 1767
1766 1768 ``text`` - the revision data to process
1767 1769 ``flags`` - the revision flags
1768 1770 ``operation`` - the operation being performed (read or write)
1769 1771 ``raw`` - an optional argument describing if the raw transform should be
1770 1772 applied.
1771 1773
1772 1774 This method processes the flags in the order (or reverse order if
1773 1775 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1774 1776 flag processors registered for present flags. The order of flags defined
1775 1777 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1776 1778
1777 1779 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1778 1780 processed text and ``validatehash`` is a bool indicating whether the
1779 1781 returned text should be checked for hash integrity.
1780 1782
1781 1783 Note: If the ``raw`` argument is set, it has precedence over the
1782 1784 operation and will only update the value of ``validatehash``.
1783 1785 """
1784 1786 # fast path: no flag processors will run
1785 1787 if flags == 0:
1786 1788 return text, True
1787 1789 if not operation in ('read', 'write'):
1788 1790 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1789 1791 # Check all flags are known.
1790 1792 if flags & ~REVIDX_KNOWN_FLAGS:
1791 1793 raise RevlogError(_("incompatible revision flag '%#x'") %
1792 1794 (flags & ~REVIDX_KNOWN_FLAGS))
1793 1795 validatehash = True
1794 1796 # Depending on the operation (read or write), the order might be
1795 1797 # reversed due to non-commutative transforms.
1796 1798 orderedflags = REVIDX_FLAGS_ORDER
1797 1799 if operation == 'write':
1798 1800 orderedflags = reversed(orderedflags)
1799 1801
1800 1802 for flag in orderedflags:
1801 1803 # If a flagprocessor has been registered for a known flag, apply the
1802 1804 # related operation transform and update result tuple.
1803 1805 if flag & flags:
1804 1806 vhash = True
1805 1807
1806 1808 if flag not in _flagprocessors:
1807 1809 message = _("missing processor for flag '%#x'") % (flag)
1808 1810 raise RevlogError(message)
1809 1811
1810 1812 processor = _flagprocessors[flag]
1811 1813 if processor is not None:
1812 1814 readtransform, writetransform, rawtransform = processor
1813 1815
1814 1816 if raw:
1815 1817 vhash = rawtransform(self, text)
1816 1818 elif operation == 'read':
1817 1819 text, vhash = readtransform(self, text)
1818 1820 else: # write operation
1819 1821 text, vhash = writetransform(self, text)
1820 1822 validatehash = validatehash and vhash
1821 1823
1822 1824 return text, validatehash
1823 1825
1824 1826 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1825 1827 """Check node hash integrity.
1826 1828
1827 1829 Available as a function so that subclasses can extend hash mismatch
1828 1830 behaviors as needed.
1829 1831 """
1830 1832 if p1 is None and p2 is None:
1831 1833 p1, p2 = self.parents(node)
1832 1834 if node != self.hash(text, p1, p2):
1833 1835 revornode = rev
1834 1836 if revornode is None:
1835 1837 revornode = templatefilters.short(hex(node))
1836 1838 raise RevlogError(_("integrity check failed on %s:%s")
1837 1839 % (self.indexfile, pycompat.bytestr(revornode)))
1838 1840
1839 1841 def checkinlinesize(self, tr, fp=None):
1840 1842 """Check if the revlog is too big for inline and convert if so.
1841 1843
1842 1844 This should be called after revisions are added to the revlog. If the
1843 1845 revlog has grown too large to be an inline revlog, it will convert it
1844 1846 to use multiple index and data files.
1845 1847 """
1846 1848 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1847 1849 return
1848 1850
1849 1851 trinfo = tr.find(self.indexfile)
1850 1852 if trinfo is None:
1851 1853 raise RevlogError(_("%s not found in the transaction")
1852 1854 % self.indexfile)
1853 1855
1854 1856 trindex = trinfo[2]
1855 1857 if trindex is not None:
1856 1858 dataoff = self.start(trindex)
1857 1859 else:
1858 1860 # revlog was stripped at start of transaction, use all leftover data
1859 1861 trindex = len(self) - 1
1860 1862 dataoff = self.end(-2)
1861 1863
1862 1864 tr.add(self.datafile, dataoff)
1863 1865
1864 1866 if fp:
1865 1867 fp.flush()
1866 1868 fp.close()
1867 1869
1868 1870 df = self.opener(self.datafile, 'w')
1869 1871 try:
1870 1872 for r in self:
1871 1873 df.write(self._getsegmentforrevs(r, r)[1])
1872 1874 finally:
1873 1875 df.close()
1874 1876
1875 1877 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1876 1878 checkambig=self._checkambig)
1877 1879 self.version &= ~FLAG_INLINE_DATA
1878 1880 self._inline = False
1879 1881 for i in self:
1880 1882 e = self._io.packentry(self.index[i], self.node, self.version, i)
1881 1883 fp.write(e)
1882 1884
1883 1885 # if we don't call close, the temp file will never replace the
1884 1886 # real index
1885 1887 fp.close()
1886 1888
1887 1889 tr.replace(self.indexfile, trindex * self._io.size)
1888 1890 self._chunkclear()
1889 1891
1890 1892 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1891 1893 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1892 1894 """add a revision to the log
1893 1895
1894 1896 text - the revision data to add
1895 1897 transaction - the transaction object used for rollback
1896 1898 link - the linkrev data to add
1897 1899 p1, p2 - the parent nodeids of the revision
1898 1900 cachedelta - an optional precomputed delta
1899 1901 node - nodeid of revision; typically node is not specified, and it is
1900 1902 computed by default as hash(text, p1, p2), however subclasses might
1901 1903 use different hashing method (and override checkhash() in such case)
1902 1904 flags - the known flags to set on the revision
1903 1905 deltacomputer - an optional _deltacomputer instance shared between
1904 1906 multiple calls
1905 1907 """
1906 1908 if link == nullrev:
1907 1909 raise RevlogError(_("attempted to add linkrev -1 to %s")
1908 1910 % self.indexfile)
1909 1911
1910 1912 if flags:
1911 1913 node = node or self.hash(text, p1, p2)
1912 1914
1913 1915 rawtext, validatehash = self._processflags(text, flags, 'write')
1914 1916
1915 1917 # If the flag processor modifies the revision data, ignore any provided
1916 1918 # cachedelta.
1917 1919 if rawtext != text:
1918 1920 cachedelta = None
1919 1921
1920 1922 if len(rawtext) > _maxentrysize:
1921 1923 raise RevlogError(
1922 1924 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1923 1925 % (self.indexfile, len(rawtext)))
1924 1926
1925 1927 node = node or self.hash(rawtext, p1, p2)
1926 1928 if node in self.nodemap:
1927 1929 return node
1928 1930
1929 1931 if validatehash:
1930 1932 self.checkhash(rawtext, node, p1=p1, p2=p2)
1931 1933
1932 1934 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1933 1935 flags, cachedelta=cachedelta,
1934 1936 deltacomputer=deltacomputer)
1935 1937
1936 1938 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1937 1939 cachedelta=None, deltacomputer=None):
1938 1940 """add a raw revision with known flags, node and parents
1939 1941 useful when reusing a revision not stored in this revlog (ex: received
1940 1942 over wire, or read from an external bundle).
1941 1943 """
1942 1944 dfh = None
1943 1945 if not self._inline:
1944 1946 dfh = self.opener(self.datafile, "a+")
1945 1947 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1946 1948 try:
1947 1949 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1948 1950 flags, cachedelta, ifh, dfh,
1949 1951 deltacomputer=deltacomputer)
1950 1952 finally:
1951 1953 if dfh:
1952 1954 dfh.close()
1953 1955 ifh.close()
1954 1956
1955 1957 def compress(self, data):
1956 1958 """Generate a possibly-compressed representation of data."""
1957 1959 if not data:
1958 1960 return '', data
1959 1961
1960 1962 compressed = self._compressor.compress(data)
1961 1963
1962 1964 if compressed:
1963 1965 # The revlog compressor added the header in the returned data.
1964 1966 return '', compressed
1965 1967
1966 1968 if data[0:1] == '\0':
1967 1969 return '', data
1968 1970 return 'u', data
1969 1971
1970 1972 def decompress(self, data):
1971 1973 """Decompress a revlog chunk.
1972 1974
1973 1975 The chunk is expected to begin with a header identifying the
1974 1976 format type so it can be routed to an appropriate decompressor.
1975 1977 """
1976 1978 if not data:
1977 1979 return data
1978 1980
1979 1981 # Revlogs are read much more frequently than they are written and many
1980 1982 # chunks only take microseconds to decompress, so performance is
1981 1983 # important here.
1982 1984 #
1983 1985 # We can make a few assumptions about revlogs:
1984 1986 #
1985 1987 # 1) the majority of chunks will be compressed (as opposed to inline
1986 1988 # raw data).
1987 1989 # 2) decompressing *any* data will likely by at least 10x slower than
1988 1990 # returning raw inline data.
1989 1991 # 3) we want to prioritize common and officially supported compression
1990 1992 # engines
1991 1993 #
1992 1994 # It follows that we want to optimize for "decompress compressed data
1993 1995 # when encoded with common and officially supported compression engines"
1994 1996 # case over "raw data" and "data encoded by less common or non-official
1995 1997 # compression engines." That is why we have the inline lookup first
1996 1998 # followed by the compengines lookup.
1997 1999 #
1998 2000 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1999 2001 # compressed chunks. And this matters for changelog and manifest reads.
2000 2002 t = data[0:1]
2001 2003
2002 2004 if t == 'x':
2003 2005 try:
2004 2006 return _zlibdecompress(data)
2005 2007 except zlib.error as e:
2006 2008 raise RevlogError(_('revlog decompress error: %s') % str(e))
2007 2009 # '\0' is more common than 'u' so it goes first.
2008 2010 elif t == '\0':
2009 2011 return data
2010 2012 elif t == 'u':
2011 2013 return util.buffer(data, 1)
2012 2014
2013 2015 try:
2014 2016 compressor = self._decompressors[t]
2015 2017 except KeyError:
2016 2018 try:
2017 2019 engine = util.compengines.forrevlogheader(t)
2018 2020 compressor = engine.revlogcompressor()
2019 2021 self._decompressors[t] = compressor
2020 2022 except KeyError:
2021 2023 raise RevlogError(_('unknown compression type %r') % t)
2022 2024
2023 2025 return compressor.decompress(data)
2024 2026
2025 2027 def _isgooddeltainfo(self, d, textlen):
2026 2028 """Returns True if the given delta is good. Good means that it is within
2027 2029 the disk span, disk size, and chain length bounds that we know to be
2028 2030 performant."""
2029 2031 if d is None:
2030 2032 return False
2031 2033
2032 2034 # - 'd.distance' is the distance from the base revision -- bounding it
2033 2035 # limits the amount of I/O we need to do.
2034 2036 # - 'd.compresseddeltalen' is the sum of the total size of deltas we
2035 2037 # need to apply -- bounding it limits the amount of CPU we consume.
2036 2038
2037 2039 defaultmax = textlen * 4
2038 2040 maxdist = self._maxdeltachainspan
2039 2041 if not maxdist:
2040 2042 maxdist = d.distance # ensure the conditional pass
2041 2043 maxdist = max(maxdist, defaultmax)
2042 2044 if (d.distance > maxdist or d.deltalen > textlen or
2043 2045 d.compresseddeltalen > textlen * 2 or
2044 2046 (self._maxchainlen and d.chainlen > self._maxchainlen)):
2045 2047 return False
2046 2048
2047 2049 return True
2048 2050
2049 2051 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2050 2052 cachedelta, ifh, dfh, alwayscache=False,
2051 2053 deltacomputer=None):
2052 2054 """internal function to add revisions to the log
2053 2055
2054 2056 see addrevision for argument descriptions.
2055 2057
2056 2058 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2057 2059
2058 2060 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2059 2061 be used.
2060 2062
2061 2063 invariants:
2062 2064 - rawtext is optional (can be None); if not set, cachedelta must be set.
2063 2065 if both are set, they must correspond to each other.
2064 2066 """
2065 2067 if node == nullid:
2066 2068 raise RevlogError(_("%s: attempt to add null revision") %
2067 2069 (self.indexfile))
2068 2070 if node == wdirid:
2069 2071 raise RevlogError(_("%s: attempt to add wdir revision") %
2070 2072 (self.indexfile))
2071 2073
2072 2074 if self._inline:
2073 2075 fh = ifh
2074 2076 else:
2075 2077 fh = dfh
2076 2078
2077 2079 btext = [rawtext]
2078 2080
2079 2081 curr = len(self)
2080 2082 prev = curr - 1
2081 2083 offset = self.end(prev)
2082 2084 p1r, p2r = self.rev(p1), self.rev(p2)
2083 2085
2084 2086 # full versions are inserted when the needed deltas
2085 2087 # become comparable to the uncompressed text
2086 2088 if rawtext is None:
2087 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
2089 # need rawtext size, before changed by flag processors, which is
2090 # the non-raw size. use revlog explicitly to avoid filelog's extra
2091 # logic that might remove metadata size.
2092 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2088 2093 cachedelta[1])
2089 2094 else:
2090 2095 textlen = len(rawtext)
2091 2096
2092 2097 if deltacomputer is None:
2093 2098 deltacomputer = _deltacomputer(self)
2094 2099
2095 2100 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2096 2101
2097 2102 # no delta for flag processor revision (see "candelta" for why)
2098 2103 # not calling candelta since only one revision needs test, also to
2099 2104 # avoid overhead fetching flags again.
2100 2105 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2101 2106 deltainfo = None
2102 2107 else:
2103 2108 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2104 2109
2105 2110 if deltainfo is not None:
2106 2111 base = deltainfo.base
2107 2112 chainbase = deltainfo.chainbase
2108 2113 data = deltainfo.data
2109 2114 l = deltainfo.deltalen
2110 2115 else:
2111 2116 rawtext = deltacomputer.buildtext(revinfo, fh)
2112 2117 data = self.compress(rawtext)
2113 2118 l = len(data[1]) + len(data[0])
2114 2119 base = chainbase = curr
2115 2120
2116 2121 e = (offset_type(offset, flags), l, textlen,
2117 2122 base, link, p1r, p2r, node)
2118 2123 self.index.insert(-1, e)
2119 2124 self.nodemap[node] = curr
2120 2125
2121 2126 entry = self._io.packentry(e, self.node, self.version, curr)
2122 2127 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2123 2128
2124 2129 if alwayscache and rawtext is None:
2125 2130 rawtext = deltacomputer._buildtext(revinfo, fh)
2126 2131
2127 2132 if type(rawtext) == str: # only accept immutable objects
2128 2133 self._cache = (node, curr, rawtext)
2129 2134 self._chainbasecache[curr] = chainbase
2130 2135 return node
2131 2136
2132 2137 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2133 2138 # Files opened in a+ mode have inconsistent behavior on various
2134 2139 # platforms. Windows requires that a file positioning call be made
2135 2140 # when the file handle transitions between reads and writes. See
2136 2141 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2137 2142 # platforms, Python or the platform itself can be buggy. Some versions
2138 2143 # of Solaris have been observed to not append at the end of the file
2139 2144 # if the file was seeked to before the end. See issue4943 for more.
2140 2145 #
2141 2146 # We work around this issue by inserting a seek() before writing.
2142 2147 # Note: This is likely not necessary on Python 3.
2143 2148 ifh.seek(0, os.SEEK_END)
2144 2149 if dfh:
2145 2150 dfh.seek(0, os.SEEK_END)
2146 2151
2147 2152 curr = len(self) - 1
2148 2153 if not self._inline:
2149 2154 transaction.add(self.datafile, offset)
2150 2155 transaction.add(self.indexfile, curr * len(entry))
2151 2156 if data[0]:
2152 2157 dfh.write(data[0])
2153 2158 dfh.write(data[1])
2154 2159 ifh.write(entry)
2155 2160 else:
2156 2161 offset += curr * self._io.size
2157 2162 transaction.add(self.indexfile, offset, curr)
2158 2163 ifh.write(entry)
2159 2164 ifh.write(data[0])
2160 2165 ifh.write(data[1])
2161 2166 self.checkinlinesize(transaction, ifh)
2162 2167
2163 2168 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2164 2169 """
2165 2170 add a delta group
2166 2171
2167 2172 given a set of deltas, add them to the revision log. the
2168 2173 first delta is against its parent, which should be in our
2169 2174 log, the rest are against the previous delta.
2170 2175
2171 2176 If ``addrevisioncb`` is defined, it will be called with arguments of
2172 2177 this revlog and the node that was added.
2173 2178 """
2174 2179
2175 2180 nodes = []
2176 2181
2177 2182 r = len(self)
2178 2183 end = 0
2179 2184 if r:
2180 2185 end = self.end(r - 1)
2181 2186 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
2182 2187 isize = r * self._io.size
2183 2188 if self._inline:
2184 2189 transaction.add(self.indexfile, end + isize, r)
2185 2190 dfh = None
2186 2191 else:
2187 2192 transaction.add(self.indexfile, isize, r)
2188 2193 transaction.add(self.datafile, end)
2189 2194 dfh = self.opener(self.datafile, "a+")
2190 2195 def flush():
2191 2196 if dfh:
2192 2197 dfh.flush()
2193 2198 ifh.flush()
2194 2199 try:
2195 2200 deltacomputer = _deltacomputer(self)
2196 2201 # loop through our set of deltas
2197 2202 for data in deltas:
2198 2203 node, p1, p2, linknode, deltabase, delta, flags = data
2199 2204 link = linkmapper(linknode)
2200 2205 flags = flags or REVIDX_DEFAULT_FLAGS
2201 2206
2202 2207 nodes.append(node)
2203 2208
2204 2209 if node in self.nodemap:
2205 2210 # this can happen if two branches make the same change
2206 2211 continue
2207 2212
2208 2213 for p in (p1, p2):
2209 2214 if p not in self.nodemap:
2210 2215 raise LookupError(p, self.indexfile,
2211 2216 _('unknown parent'))
2212 2217
2213 2218 if deltabase not in self.nodemap:
2214 2219 raise LookupError(deltabase, self.indexfile,
2215 2220 _('unknown delta base'))
2216 2221
2217 2222 baserev = self.rev(deltabase)
2218 2223
2219 2224 if baserev != nullrev and self.iscensored(baserev):
2220 2225 # if base is censored, delta must be full replacement in a
2221 2226 # single patch operation
2222 2227 hlen = struct.calcsize(">lll")
2223 2228 oldlen = self.rawsize(baserev)
2224 2229 newlen = len(delta) - hlen
2225 2230 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2226 2231 raise error.CensoredBaseError(self.indexfile,
2227 2232 self.node(baserev))
2228 2233
2229 2234 if not flags and self._peek_iscensored(baserev, delta, flush):
2230 2235 flags |= REVIDX_ISCENSORED
2231 2236
2232 2237 # We assume consumers of addrevisioncb will want to retrieve
2233 2238 # the added revision, which will require a call to
2234 2239 # revision(). revision() will fast path if there is a cache
2235 2240 # hit. So, we tell _addrevision() to always cache in this case.
2236 2241 # We're only using addgroup() in the context of changegroup
2237 2242 # generation so the revision data can always be handled as raw
2238 2243 # by the flagprocessor.
2239 2244 self._addrevision(node, None, transaction, link,
2240 2245 p1, p2, flags, (baserev, delta),
2241 2246 ifh, dfh,
2242 2247 alwayscache=bool(addrevisioncb),
2243 2248 deltacomputer=deltacomputer)
2244 2249
2245 2250 if addrevisioncb:
2246 2251 addrevisioncb(self, node)
2247 2252
2248 2253 if not dfh and not self._inline:
2249 2254 # addrevision switched from inline to conventional
2250 2255 # reopen the index
2251 2256 ifh.close()
2252 2257 dfh = self.opener(self.datafile, "a+")
2253 2258 ifh = self.opener(self.indexfile, "a+",
2254 2259 checkambig=self._checkambig)
2255 2260 finally:
2256 2261 if dfh:
2257 2262 dfh.close()
2258 2263 ifh.close()
2259 2264
2260 2265 return nodes
2261 2266
2262 2267 def iscensored(self, rev):
2263 2268 """Check if a file revision is censored."""
2264 2269 return False
2265 2270
2266 2271 def _peek_iscensored(self, baserev, delta, flush):
2267 2272 """Quickly check if a delta produces a censored revision."""
2268 2273 return False
2269 2274
2270 2275 def getstrippoint(self, minlink):
2271 2276 """find the minimum rev that must be stripped to strip the linkrev
2272 2277
2273 2278 Returns a tuple containing the minimum rev and a set of all revs that
2274 2279 have linkrevs that will be broken by this strip.
2275 2280 """
2276 2281 brokenrevs = set()
2277 2282 strippoint = len(self)
2278 2283
2279 2284 heads = {}
2280 2285 futurelargelinkrevs = set()
2281 2286 for head in self.headrevs():
2282 2287 headlinkrev = self.linkrev(head)
2283 2288 heads[head] = headlinkrev
2284 2289 if headlinkrev >= minlink:
2285 2290 futurelargelinkrevs.add(headlinkrev)
2286 2291
2287 2292 # This algorithm involves walking down the rev graph, starting at the
2288 2293 # heads. Since the revs are topologically sorted according to linkrev,
2289 2294 # once all head linkrevs are below the minlink, we know there are
2290 2295 # no more revs that could have a linkrev greater than minlink.
2291 2296 # So we can stop walking.
2292 2297 while futurelargelinkrevs:
2293 2298 strippoint -= 1
2294 2299 linkrev = heads.pop(strippoint)
2295 2300
2296 2301 if linkrev < minlink:
2297 2302 brokenrevs.add(strippoint)
2298 2303 else:
2299 2304 futurelargelinkrevs.remove(linkrev)
2300 2305
2301 2306 for p in self.parentrevs(strippoint):
2302 2307 if p != nullrev:
2303 2308 plinkrev = self.linkrev(p)
2304 2309 heads[p] = plinkrev
2305 2310 if plinkrev >= minlink:
2306 2311 futurelargelinkrevs.add(plinkrev)
2307 2312
2308 2313 return strippoint, brokenrevs
2309 2314
2310 2315 def strip(self, minlink, transaction):
2311 2316 """truncate the revlog on the first revision with a linkrev >= minlink
2312 2317
2313 2318 This function is called when we're stripping revision minlink and
2314 2319 its descendants from the repository.
2315 2320
2316 2321 We have to remove all revisions with linkrev >= minlink, because
2317 2322 the equivalent changelog revisions will be renumbered after the
2318 2323 strip.
2319 2324
2320 2325 So we truncate the revlog on the first of these revisions, and
2321 2326 trust that the caller has saved the revisions that shouldn't be
2322 2327 removed and that it'll re-add them after this truncation.
2323 2328 """
2324 2329 if len(self) == 0:
2325 2330 return
2326 2331
2327 2332 rev, _ = self.getstrippoint(minlink)
2328 2333 if rev == len(self):
2329 2334 return
2330 2335
2331 2336 # first truncate the files on disk
2332 2337 end = self.start(rev)
2333 2338 if not self._inline:
2334 2339 transaction.add(self.datafile, end)
2335 2340 end = rev * self._io.size
2336 2341 else:
2337 2342 end += rev * self._io.size
2338 2343
2339 2344 transaction.add(self.indexfile, end)
2340 2345
2341 2346 # then reset internal state in memory to forget those revisions
2342 2347 self._cache = None
2343 2348 self._chaininfocache = {}
2344 2349 self._chunkclear()
2345 2350 for x in xrange(rev, len(self)):
2346 2351 del self.nodemap[self.node(x)]
2347 2352
2348 2353 del self.index[rev:-1]
2349 2354
2350 2355 def checksize(self):
2351 2356 expected = 0
2352 2357 if len(self):
2353 2358 expected = max(0, self.end(len(self) - 1))
2354 2359
2355 2360 try:
2356 2361 f = self.opener(self.datafile)
2357 2362 f.seek(0, 2)
2358 2363 actual = f.tell()
2359 2364 f.close()
2360 2365 dd = actual - expected
2361 2366 except IOError as inst:
2362 2367 if inst.errno != errno.ENOENT:
2363 2368 raise
2364 2369 dd = 0
2365 2370
2366 2371 try:
2367 2372 f = self.opener(self.indexfile)
2368 2373 f.seek(0, 2)
2369 2374 actual = f.tell()
2370 2375 f.close()
2371 2376 s = self._io.size
2372 2377 i = max(0, actual // s)
2373 2378 di = actual - (i * s)
2374 2379 if self._inline:
2375 2380 databytes = 0
2376 2381 for r in self:
2377 2382 databytes += max(0, self.length(r))
2378 2383 dd = 0
2379 2384 di = actual - len(self) * s - databytes
2380 2385 except IOError as inst:
2381 2386 if inst.errno != errno.ENOENT:
2382 2387 raise
2383 2388 di = 0
2384 2389
2385 2390 return (dd, di)
2386 2391
2387 2392 def files(self):
2388 2393 res = [self.indexfile]
2389 2394 if not self._inline:
2390 2395 res.append(self.datafile)
2391 2396 return res
2392 2397
2393 2398 DELTAREUSEALWAYS = 'always'
2394 2399 DELTAREUSESAMEREVS = 'samerevs'
2395 2400 DELTAREUSENEVER = 'never'
2396 2401
2397 2402 DELTAREUSEFULLADD = 'fulladd'
2398 2403
2399 2404 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2400 2405
2401 2406 def clone(self, tr, destrevlog, addrevisioncb=None,
2402 2407 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2403 2408 """Copy this revlog to another, possibly with format changes.
2404 2409
2405 2410 The destination revlog will contain the same revisions and nodes.
2406 2411 However, it may not be bit-for-bit identical due to e.g. delta encoding
2407 2412 differences.
2408 2413
2409 2414 The ``deltareuse`` argument control how deltas from the existing revlog
2410 2415 are preserved in the destination revlog. The argument can have the
2411 2416 following values:
2412 2417
2413 2418 DELTAREUSEALWAYS
2414 2419 Deltas will always be reused (if possible), even if the destination
2415 2420 revlog would not select the same revisions for the delta. This is the
2416 2421 fastest mode of operation.
2417 2422 DELTAREUSESAMEREVS
2418 2423 Deltas will be reused if the destination revlog would pick the same
2419 2424 revisions for the delta. This mode strikes a balance between speed
2420 2425 and optimization.
2421 2426 DELTAREUSENEVER
2422 2427 Deltas will never be reused. This is the slowest mode of execution.
2423 2428 This mode can be used to recompute deltas (e.g. if the diff/delta
2424 2429 algorithm changes).
2425 2430
2426 2431 Delta computation can be slow, so the choice of delta reuse policy can
2427 2432 significantly affect run time.
2428 2433
2429 2434 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2430 2435 two extremes. Deltas will be reused if they are appropriate. But if the
2431 2436 delta could choose a better revision, it will do so. This means if you
2432 2437 are converting a non-generaldelta revlog to a generaldelta revlog,
2433 2438 deltas will be recomputed if the delta's parent isn't a parent of the
2434 2439 revision.
2435 2440
2436 2441 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2437 2442 controls whether to compute deltas against both parents for merges.
2438 2443 By default, the current default is used.
2439 2444 """
2440 2445 if deltareuse not in self.DELTAREUSEALL:
2441 2446 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2442 2447
2443 2448 if len(destrevlog):
2444 2449 raise ValueError(_('destination revlog is not empty'))
2445 2450
2446 2451 if getattr(self, 'filteredrevs', None):
2447 2452 raise ValueError(_('source revlog has filtered revisions'))
2448 2453 if getattr(destrevlog, 'filteredrevs', None):
2449 2454 raise ValueError(_('destination revlog has filtered revisions'))
2450 2455
2451 2456 # lazydeltabase controls whether to reuse a cached delta, if possible.
2452 2457 oldlazydeltabase = destrevlog._lazydeltabase
2453 2458 oldamd = destrevlog._aggressivemergedeltas
2454 2459
2455 2460 try:
2456 2461 if deltareuse == self.DELTAREUSEALWAYS:
2457 2462 destrevlog._lazydeltabase = True
2458 2463 elif deltareuse == self.DELTAREUSESAMEREVS:
2459 2464 destrevlog._lazydeltabase = False
2460 2465
2461 2466 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2462 2467
2463 2468 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2464 2469 self.DELTAREUSESAMEREVS)
2465 2470
2466 2471 deltacomputer = _deltacomputer(destrevlog)
2467 2472 index = self.index
2468 2473 for rev in self:
2469 2474 entry = index[rev]
2470 2475
2471 2476 # Some classes override linkrev to take filtered revs into
2472 2477 # account. Use raw entry from index.
2473 2478 flags = entry[0] & 0xffff
2474 2479 linkrev = entry[4]
2475 2480 p1 = index[entry[5]][7]
2476 2481 p2 = index[entry[6]][7]
2477 2482 node = entry[7]
2478 2483
2479 2484 # (Possibly) reuse the delta from the revlog if allowed and
2480 2485 # the revlog chunk is a delta.
2481 2486 cachedelta = None
2482 2487 rawtext = None
2483 2488 if populatecachedelta:
2484 2489 dp = self.deltaparent(rev)
2485 2490 if dp != nullrev:
2486 2491 cachedelta = (dp, str(self._chunk(rev)))
2487 2492
2488 2493 if not cachedelta:
2489 2494 rawtext = self.revision(rev, raw=True)
2490 2495
2491 2496
2492 2497 if deltareuse == self.DELTAREUSEFULLADD:
2493 2498 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2494 2499 cachedelta=cachedelta,
2495 2500 node=node, flags=flags,
2496 2501 deltacomputer=deltacomputer)
2497 2502 else:
2498 2503 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2499 2504 checkambig=False)
2500 2505 dfh = None
2501 2506 if not destrevlog._inline:
2502 2507 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2503 2508 try:
2504 2509 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2505 2510 p2, flags, cachedelta, ifh, dfh,
2506 2511 deltacomputer=deltacomputer)
2507 2512 finally:
2508 2513 if dfh:
2509 2514 dfh.close()
2510 2515 ifh.close()
2511 2516
2512 2517 if addrevisioncb:
2513 2518 addrevisioncb(self, rev, node)
2514 2519 finally:
2515 2520 destrevlog._lazydeltabase = oldlazydeltabase
2516 2521 destrevlog._aggressivemergedeltas = oldamd
@@ -1,97 +1,97
1 1 In this test, we want to test LFS bundle application on both LFS and non-LFS
2 2 repos.
3 3
4 4 To make it more interesting, the file revisions will contain hg filelog
5 5 metadata ('\1\n'). The bundle will have 1 file revision overlapping with the
6 6 destination repo.
7 7
8 8 # rev 1 2 3
9 9 # repo: yes yes no
10 10 # bundle: no (base) yes yes (deltabase: 2 if possible)
11 11
12 12 It is interesting because rev 2 could have been stored as LFS in the repo, and
13 13 non-LFS in the bundle; or vice-versa.
14 14
15 15 Init
16 16
17 17 $ cat >> $HGRCPATH << EOF
18 18 > [extensions]
19 19 > lfs=
20 20 > drawdag=$TESTDIR/drawdag.py
21 21 > [lfs]
22 22 > url=file:$TESTTMP/lfs-remote
23 23 > EOF
24 24
25 25 Helper functions
26 26
27 27 $ commitxy() {
28 28 > hg debugdrawdag "$@" <<'EOS'
29 29 > Y # Y/X=\1\nAAAA\nE\nF
30 30 > | # Y/Y=\1\nAAAA\nG\nH
31 31 > X # X/X=\1\nAAAA\nC\n
32 32 > # X/Y=\1\nAAAA\nD\n
33 33 > EOS
34 34 > }
35 35
36 36 $ commitz() {
37 37 > hg debugdrawdag "$@" <<'EOS'
38 38 > Z # Z/X=\1\nAAAA\nI\n
39 39 > | # Z/Y=\1\nAAAA\nJ\n
40 40 > | # Z/Z=\1\nZ
41 41 > Y
42 42 > EOS
43 43 > }
44 44
45 45 $ enablelfs() {
46 46 > cat >> .hg/hgrc <<EOF
47 47 > [lfs]
48 48 > track=all()
49 49 > EOF
50 50 > }
51 51
52 52 Generate bundles
53 53
54 54 $ for i in normal lfs; do
55 55 > NAME=src-$i
56 56 > hg init $TESTTMP/$NAME
57 57 > cd $TESTTMP/$NAME
58 58 > [ $i = lfs ] && enablelfs
59 59 > commitxy
60 60 > commitz
61 61 > hg bundle -q --base X -r Y+Z $TESTTMP/$NAME.bundle
62 62 > SRCNAMES="$SRCNAMES $NAME"
63 63 > done
64 64
65 65 Prepare destination repos
66 66
67 67 $ for i in normal lfs; do
68 68 > NAME=dst-$i
69 69 > hg init $TESTTMP/$NAME
70 70 > cd $TESTTMP/$NAME
71 71 > [ $i = lfs ] && enablelfs
72 72 > commitxy
73 73 > DSTNAMES="$DSTNAMES $NAME"
74 74 > done
75 75
76 76 Apply bundles
77 77
78 78 $ for i in $SRCNAMES; do
79 79 > for j in $DSTNAMES; do
80 80 > echo ---- Applying $i.bundle to $j ----
81 81 > cp -R $TESTTMP/$j $TESTTMP/tmp-$i-$j
82 82 > cd $TESTTMP/tmp-$i-$j
83 83 > if hg unbundle $TESTTMP/$i.bundle -q 2>/dev/null; then
84 84 > hg verify -q && echo OK
85 85 > else
86 86 > echo CRASHED
87 87 > fi
88 88 > done
89 89 > done
90 90 ---- Applying src-normal.bundle to dst-normal ----
91 91 OK
92 92 ---- Applying src-normal.bundle to dst-lfs ----
93 CRASHED
93 OK
94 94 ---- Applying src-lfs.bundle to dst-normal ----
95 95 OK
96 96 ---- Applying src-lfs.bundle to dst-lfs ----
97 97 OK
General Comments 0
You need to be logged in to leave comments. Login now