##// END OF EJS Templates
revlog: document and test _slicechunk
Boris Feld -
r38659:f62b8fb0 default
parent child Browse files
Show More
@@ -1,2710 +1,2744 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import hashlib
20 20 import heapq
21 21 import os
22 22 import re
23 23 import struct
24 24 import zlib
25 25
26 26 # import stuff from node for others to import from revlog
27 27 from .node import (
28 28 bin,
29 29 hex,
30 30 nullid,
31 31 nullrev,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .thirdparty import (
39 39 attr,
40 40 )
41 41 from . import (
42 42 ancestor,
43 43 error,
44 44 mdiff,
45 45 policy,
46 46 pycompat,
47 47 templatefilters,
48 48 util,
49 49 )
50 50 from .utils import (
51 51 stringutil,
52 52 )
53 53
54 54 parsers = policy.importmod(r'parsers')
55 55
56 56 # Aliased for performance.
57 57 _zlibdecompress = zlib.decompress
58 58
59 59 # revlog header flags
60 60 REVLOGV0 = 0
61 61 REVLOGV1 = 1
62 62 # Dummy value until file format is finalized.
63 63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
64 64 REVLOGV2 = 0xDEAD
65 65 FLAG_INLINE_DATA = (1 << 16)
66 66 FLAG_GENERALDELTA = (1 << 17)
67 67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
68 68 REVLOG_DEFAULT_FORMAT = REVLOGV1
69 69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
70 70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
71 71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
72 72
73 73 # revlog index flags
74 74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
75 75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
76 76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
77 77 REVIDX_DEFAULT_FLAGS = 0
78 78 # stable order in which flags need to be processed and their processors applied
79 79 REVIDX_FLAGS_ORDER = [
80 80 REVIDX_ISCENSORED,
81 81 REVIDX_ELLIPSIS,
82 82 REVIDX_EXTSTORED,
83 83 ]
84 84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
85 85 # bitmark for flags that could cause rawdata content change
86 86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
87 87
88 88 # max size of revlog with inline data
89 89 _maxinline = 131072
90 90 _chunksize = 1048576
91 91
92 92 RevlogError = error.RevlogError
93 93 LookupError = error.LookupError
94 94 CensoredNodeError = error.CensoredNodeError
95 95 ProgrammingError = error.ProgrammingError
96 96
97 97 # Store flag processors (cf. 'addflagprocessor()' to register)
98 98 _flagprocessors = {
99 99 REVIDX_ISCENSORED: None,
100 100 }
101 101
102 102 _mdre = re.compile('\1\n')
103 103 def parsemeta(text):
104 104 """return (metadatadict, metadatasize)"""
105 105 # text can be buffer, so we can't use .startswith or .index
106 106 if text[:2] != '\1\n':
107 107 return None, None
108 108 s = _mdre.search(text, 2).start()
109 109 mtext = text[2:s]
110 110 meta = {}
111 111 for l in mtext.splitlines():
112 112 k, v = l.split(": ", 1)
113 113 meta[k] = v
114 114 return meta, (s + 2)
115 115
116 116 def packmeta(meta, text):
117 117 keys = sorted(meta)
118 118 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
119 119 return "\1\n%s\1\n%s" % (metatext, text)
120 120
121 121 def _censoredtext(text):
122 122 m, offs = parsemeta(text)
123 123 return m and "censored" in m
124 124
125 125 def addflagprocessor(flag, processor):
126 126 """Register a flag processor on a revision data flag.
127 127
128 128 Invariant:
129 129 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
130 130 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
131 131 - Only one flag processor can be registered on a specific flag.
132 132 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
133 133 following signatures:
134 134 - (read) f(self, rawtext) -> text, bool
135 135 - (write) f(self, text) -> rawtext, bool
136 136 - (raw) f(self, rawtext) -> bool
137 137 "text" is presented to the user. "rawtext" is stored in revlog data, not
138 138 directly visible to the user.
139 139 The boolean returned by these transforms is used to determine whether
140 140 the returned text can be used for hash integrity checking. For example,
141 141 if "write" returns False, then "text" is used to generate hash. If
142 142 "write" returns True, that basically means "rawtext" returned by "write"
143 143 should be used to generate hash. Usually, "write" and "read" return
144 144 different booleans. And "raw" returns a same boolean as "write".
145 145
146 146 Note: The 'raw' transform is used for changegroup generation and in some
147 147 debug commands. In this case the transform only indicates whether the
148 148 contents can be used for hash integrity checks.
149 149 """
150 150 if not flag & REVIDX_KNOWN_FLAGS:
151 151 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
152 152 raise ProgrammingError(msg)
153 153 if flag not in REVIDX_FLAGS_ORDER:
154 154 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
155 155 raise ProgrammingError(msg)
156 156 if flag in _flagprocessors:
157 157 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
158 158 raise error.Abort(msg)
159 159 _flagprocessors[flag] = processor
160 160
161 161 def getoffset(q):
162 162 return int(q >> 16)
163 163
164 164 def gettype(q):
165 165 return int(q & 0xFFFF)
166 166
167 167 def offset_type(offset, type):
168 168 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
169 169 raise ValueError('unknown revlog index flags')
170 170 return int(int(offset) << 16 | type)
171 171
172 172 _nullhash = hashlib.sha1(nullid)
173 173
174 174 def hash(text, p1, p2):
175 175 """generate a hash from the given text and its parent hashes
176 176
177 177 This hash combines both the current file contents and its history
178 178 in a manner that makes it easy to distinguish nodes with the same
179 179 content in the revision graph.
180 180 """
181 181 # As of now, if one of the parent node is null, p2 is null
182 182 if p2 == nullid:
183 183 # deep copy of a hash is faster than creating one
184 184 s = _nullhash.copy()
185 185 s.update(p1)
186 186 else:
187 187 # none of the parent nodes are nullid
188 188 if p1 < p2:
189 189 a = p1
190 190 b = p2
191 191 else:
192 192 a = p2
193 193 b = p1
194 194 s = hashlib.sha1(a)
195 195 s.update(b)
196 196 s.update(text)
197 197 return s.digest()
198 198
199 199 class _testrevlog(object):
200 200 """minimalist fake revlog to use in doctests"""
201 201
202 202 def __init__(self, data, density=0.5, mingap=0):
203 203 """data is an list of revision payload boundaries"""
204 204 self._data = data
205 205 self._srdensitythreshold = density
206 206 self._srmingapsize = mingap
207 207
208 208 def start(self, rev):
209 209 if rev == 0:
210 210 return 0
211 211 return self._data[rev - 1]
212 212
213 213 def end(self, rev):
214 214 return self._data[rev]
215 215
216 216 def length(self, rev):
217 217 return self.end(rev) - self.start(rev)
218 218
219 219 def _trimchunk(revlog, revs, startidx, endidx=None):
220 220 """returns revs[startidx:endidx] without empty trailing revs
221 221
222 222 Doctest Setup
223 223 >>> revlog = _testrevlog([
224 224 ... 5, #0
225 225 ... 10, #1
226 226 ... 12, #2
227 227 ... 12, #3 (empty)
228 228 ... 17, #4
229 229 ... 21, #5
230 230 ... 21, #6 (empty)
231 231 ... ])
232 232
233 233 Contiguous cases:
234 234 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
235 235 [0, 1, 2, 3, 4, 5]
236 236 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
237 237 [0, 1, 2, 3, 4]
238 238 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
239 239 [0, 1, 2]
240 240 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
241 241 [2]
242 242 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
243 243 [3, 4, 5]
244 244 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
245 245 [3, 4]
246 246
247 247 Discontiguous cases:
248 248 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
249 249 [1, 3, 5]
250 250 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
251 251 [1]
252 252 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
253 253 [3, 5]
254 254 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
255 255 [3, 5]
256 256 """
257 257 length = revlog.length
258 258
259 259 if endidx is None:
260 260 endidx = len(revs)
261 261
262 262 # Trim empty revs at the end, but never the very first revision of a chain
263 263 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
264 264 endidx -= 1
265 265
266 266 return revs[startidx:endidx]
267 267
268 268 def _segmentspan(revlog, revs):
269 269 """Get the byte span of a segment of revisions
270 270
271 271 revs is a sorted array of revision numbers
272 272
273 273 >>> revlog = _testrevlog([
274 274 ... 5, #0
275 275 ... 10, #1
276 276 ... 12, #2
277 277 ... 12, #3 (empty)
278 278 ... 17, #4
279 279 ... ])
280 280
281 281 >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
282 282 17
283 283 >>> _segmentspan(revlog, [0, 4])
284 284 17
285 285 >>> _segmentspan(revlog, [3, 4])
286 286 5
287 287 >>> _segmentspan(revlog, [1, 2, 3,])
288 288 7
289 289 >>> _segmentspan(revlog, [1, 3])
290 290 7
291 291 """
292 292 if not revs:
293 293 return 0
294 294 return revlog.end(revs[-1]) - revlog.start(revs[0])
295 295
296 296 def _slicechunk(revlog, revs):
297 297 """slice revs to reduce the amount of unrelated data to be read from disk.
298 298
299 299 ``revs`` is sliced into groups that should be read in one time.
300 300 Assume that revs are sorted.
301
302 The initial chunk is sliced until the overall density (payload/chunks-span
303 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
304 `revlog._srmingapsize` is skipped.
305
306 >>> revlog = _testrevlog([
307 ... 5, #00 (5)
308 ... 10, #01 (5)
309 ... 12, #02 (2)
310 ... 12, #03 (empty)
311 ... 27, #04 (15)
312 ... 31, #05 (4)
313 ... 31, #06 (empty)
314 ... 42, #07 (11)
315 ... 47, #08 (5)
316 ... 47, #09 (empty)
317 ... 48, #10 (1)
318 ... 51, #11 (3)
319 ... 74, #12 (23)
320 ... 85, #13 (11)
321 ... 86, #14 (1)
322 ... 91, #15 (5)
323 ... ])
324
325 >>> list(_slicechunk(revlog, range(16)))
326 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
327 >>> list(_slicechunk(revlog, [0, 15]))
328 [[0], [15]]
329 >>> list(_slicechunk(revlog, [0, 11, 15]))
330 [[0], [11], [15]]
331 >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
332 [[0], [11, 13, 15]]
333 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
334 [[1, 2], [5, 8, 10, 11], [14]]
301 335 """
302 336 start = revlog.start
303 337 length = revlog.length
304 338
305 339 if len(revs) <= 1:
306 340 yield revs
307 341 return
308 342
309 343 readdata = deltachainspan = _segmentspan(revlog, revs)
310 344
311 345 if deltachainspan < revlog._srmingapsize:
312 346 yield revs
313 347 return
314 348
315 349 chainpayload = sum(length(r) for r in revs)
316 350
317 351 if deltachainspan:
318 352 density = chainpayload / float(deltachainspan)
319 353 else:
320 354 density = 1.0
321 355
322 356 if density >= revlog._srdensitythreshold:
323 357 yield revs
324 358 return
325 359
326 360 # Store the gaps in a heap to have them sorted by decreasing size
327 361 gapsheap = []
328 362 heapq.heapify(gapsheap)
329 363 prevend = None
330 364 for i, rev in enumerate(revs):
331 365 revstart = start(rev)
332 366 revlen = length(rev)
333 367
334 368 # Skip empty revisions to form larger holes
335 369 if revlen == 0:
336 370 continue
337 371
338 372 if prevend is not None:
339 373 gapsize = revstart - prevend
340 374 # only consider holes that are large enough
341 375 if gapsize > revlog._srmingapsize:
342 376 heapq.heappush(gapsheap, (-gapsize, i))
343 377
344 378 prevend = revstart + revlen
345 379
346 380 # Collect the indices of the largest holes until the density is acceptable
347 381 indicesheap = []
348 382 heapq.heapify(indicesheap)
349 383 while gapsheap and density < revlog._srdensitythreshold:
350 384 oppgapsize, gapidx = heapq.heappop(gapsheap)
351 385
352 386 heapq.heappush(indicesheap, gapidx)
353 387
354 388 # the gap sizes are stored as negatives to be sorted decreasingly
355 389 # by the heap
356 390 readdata -= (-oppgapsize)
357 391 if readdata > 0:
358 392 density = chainpayload / float(readdata)
359 393 else:
360 394 density = 1.0
361 395
362 396 # Cut the revs at collected indices
363 397 previdx = 0
364 398 while indicesheap:
365 399 idx = heapq.heappop(indicesheap)
366 400
367 401 chunk = _trimchunk(revlog, revs, previdx, idx)
368 402 if chunk:
369 403 yield chunk
370 404
371 405 previdx = idx
372 406
373 407 chunk = _trimchunk(revlog, revs, previdx)
374 408 if chunk:
375 409 yield chunk
376 410
377 411 @attr.s(slots=True, frozen=True)
378 412 class _deltainfo(object):
379 413 distance = attr.ib()
380 414 deltalen = attr.ib()
381 415 data = attr.ib()
382 416 base = attr.ib()
383 417 chainbase = attr.ib()
384 418 chainlen = attr.ib()
385 419 compresseddeltalen = attr.ib()
386 420
387 421 class _deltacomputer(object):
388 422 def __init__(self, revlog):
389 423 self.revlog = revlog
390 424
391 425 def _getcandidaterevs(self, p1, p2, cachedelta):
392 426 """
393 427 Provides revisions that present an interest to be diffed against,
394 428 grouped by level of easiness.
395 429 """
396 430 revlog = self.revlog
397 431 gdelta = revlog._generaldelta
398 432 curr = len(revlog)
399 433 prev = curr - 1
400 434 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
401 435
402 436 # should we try to build a delta?
403 437 if prev != nullrev and revlog.storedeltachains:
404 438 tested = set()
405 439 # This condition is true most of the time when processing
406 440 # changegroup data into a generaldelta repo. The only time it
407 441 # isn't true is if this is the first revision in a delta chain
408 442 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
409 443 if cachedelta and gdelta and revlog._lazydeltabase:
410 444 # Assume what we received from the server is a good choice
411 445 # build delta will reuse the cache
412 446 yield (cachedelta[0],)
413 447 tested.add(cachedelta[0])
414 448
415 449 if gdelta:
416 450 # exclude already lazy tested base if any
417 451 parents = [p for p in (p1r, p2r)
418 452 if p != nullrev and p not in tested]
419 453
420 454 if not revlog._aggressivemergedeltas and len(parents) == 2:
421 455 parents.sort()
422 456 # To minimize the chance of having to build a fulltext,
423 457 # pick first whichever parent is closest to us (max rev)
424 458 yield (parents[1],)
425 459 # then the other one (min rev) if the first did not fit
426 460 yield (parents[0],)
427 461 tested.update(parents)
428 462 elif len(parents) > 0:
429 463 # Test all parents (1 or 2), and keep the best candidate
430 464 yield parents
431 465 tested.update(parents)
432 466
433 467 if prev not in tested:
434 468 # other approach failed try against prev to hopefully save us a
435 469 # fulltext.
436 470 yield (prev,)
437 471 tested.add(prev)
438 472
439 473 def buildtext(self, revinfo, fh):
440 474 """Builds a fulltext version of a revision
441 475
442 476 revinfo: _revisioninfo instance that contains all needed info
443 477 fh: file handle to either the .i or the .d revlog file,
444 478 depending on whether it is inlined or not
445 479 """
446 480 btext = revinfo.btext
447 481 if btext[0] is not None:
448 482 return btext[0]
449 483
450 484 revlog = self.revlog
451 485 cachedelta = revinfo.cachedelta
452 486 flags = revinfo.flags
453 487 node = revinfo.node
454 488
455 489 baserev = cachedelta[0]
456 490 delta = cachedelta[1]
457 491 # special case deltas which replace entire base; no need to decode
458 492 # base revision. this neatly avoids censored bases, which throw when
459 493 # they're decoded.
460 494 hlen = struct.calcsize(">lll")
461 495 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
462 496 len(delta) - hlen):
463 497 btext[0] = delta[hlen:]
464 498 else:
465 499 # deltabase is rawtext before changed by flag processors, which is
466 500 # equivalent to non-raw text
467 501 basetext = revlog.revision(baserev, _df=fh, raw=False)
468 502 btext[0] = mdiff.patch(basetext, delta)
469 503
470 504 try:
471 505 res = revlog._processflags(btext[0], flags, 'read', raw=True)
472 506 btext[0], validatehash = res
473 507 if validatehash:
474 508 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
475 509 if flags & REVIDX_ISCENSORED:
476 510 raise RevlogError(_('node %s is not censored') % node)
477 511 except CensoredNodeError:
478 512 # must pass the censored index flag to add censored revisions
479 513 if not flags & REVIDX_ISCENSORED:
480 514 raise
481 515 return btext[0]
482 516
483 517 def _builddeltadiff(self, base, revinfo, fh):
484 518 revlog = self.revlog
485 519 t = self.buildtext(revinfo, fh)
486 520 if revlog.iscensored(base):
487 521 # deltas based on a censored revision must replace the
488 522 # full content in one patch, so delta works everywhere
489 523 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
490 524 delta = header + t
491 525 else:
492 526 ptext = revlog.revision(base, _df=fh, raw=True)
493 527 delta = mdiff.textdiff(ptext, t)
494 528
495 529 return delta
496 530
497 531 def _builddeltainfo(self, revinfo, base, fh):
498 532 # can we use the cached delta?
499 533 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
500 534 delta = revinfo.cachedelta[1]
501 535 else:
502 536 delta = self._builddeltadiff(base, revinfo, fh)
503 537 revlog = self.revlog
504 538 header, data = revlog.compress(delta)
505 539 deltalen = len(header) + len(data)
506 540 chainbase = revlog.chainbase(base)
507 541 offset = revlog.end(len(revlog) - 1)
508 542 dist = deltalen + offset - revlog.start(chainbase)
509 543 if revlog._generaldelta:
510 544 deltabase = base
511 545 else:
512 546 deltabase = chainbase
513 547 chainlen, compresseddeltalen = revlog._chaininfo(base)
514 548 chainlen += 1
515 549 compresseddeltalen += deltalen
516 550 return _deltainfo(dist, deltalen, (header, data), deltabase,
517 551 chainbase, chainlen, compresseddeltalen)
518 552
519 553 def finddeltainfo(self, revinfo, fh):
520 554 """Find an acceptable delta against a candidate revision
521 555
522 556 revinfo: information about the revision (instance of _revisioninfo)
523 557 fh: file handle to either the .i or the .d revlog file,
524 558 depending on whether it is inlined or not
525 559
526 560 Returns the first acceptable candidate revision, as ordered by
527 561 _getcandidaterevs
528 562 """
529 563 cachedelta = revinfo.cachedelta
530 564 p1 = revinfo.p1
531 565 p2 = revinfo.p2
532 566 revlog = self.revlog
533 567
534 568 deltainfo = None
535 569 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
536 570 nominateddeltas = []
537 571 for candidaterev in candidaterevs:
538 572 # no delta for rawtext-changing revs (see "candelta" for why)
539 573 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
540 574 continue
541 575 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
542 576 if revlog._isgooddeltainfo(candidatedelta, revinfo):
543 577 nominateddeltas.append(candidatedelta)
544 578 if nominateddeltas:
545 579 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
546 580 break
547 581
548 582 return deltainfo
549 583
550 584 @attr.s(slots=True, frozen=True)
551 585 class _revisioninfo(object):
552 586 """Information about a revision that allows building its fulltext
553 587 node: expected hash of the revision
554 588 p1, p2: parent revs of the revision
555 589 btext: built text cache consisting of a one-element list
556 590 cachedelta: (baserev, uncompressed_delta) or None
557 591 flags: flags associated to the revision storage
558 592
559 593 One of btext[0] or cachedelta must be set.
560 594 """
561 595 node = attr.ib()
562 596 p1 = attr.ib()
563 597 p2 = attr.ib()
564 598 btext = attr.ib()
565 599 textlen = attr.ib()
566 600 cachedelta = attr.ib()
567 601 flags = attr.ib()
568 602
569 603 # index v0:
570 604 # 4 bytes: offset
571 605 # 4 bytes: compressed length
572 606 # 4 bytes: base rev
573 607 # 4 bytes: link rev
574 608 # 20 bytes: parent 1 nodeid
575 609 # 20 bytes: parent 2 nodeid
576 610 # 20 bytes: nodeid
577 611 indexformatv0 = struct.Struct(">4l20s20s20s")
578 612 indexformatv0_pack = indexformatv0.pack
579 613 indexformatv0_unpack = indexformatv0.unpack
580 614
581 615 class revlogoldio(object):
582 616 def __init__(self):
583 617 self.size = indexformatv0.size
584 618
585 619 def parseindex(self, data, inline):
586 620 s = self.size
587 621 index = []
588 622 nodemap = {nullid: nullrev}
589 623 n = off = 0
590 624 l = len(data)
591 625 while off + s <= l:
592 626 cur = data[off:off + s]
593 627 off += s
594 628 e = indexformatv0_unpack(cur)
595 629 # transform to revlogv1 format
596 630 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
597 631 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
598 632 index.append(e2)
599 633 nodemap[e[6]] = n
600 634 n += 1
601 635
602 636 # add the magic null revision at -1
603 637 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
604 638
605 639 return index, nodemap, None
606 640
607 641 def packentry(self, entry, node, version, rev):
608 642 if gettype(entry[0]):
609 643 raise RevlogError(_('index entry flags need revlog version 1'))
610 644 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
611 645 node(entry[5]), node(entry[6]), entry[7])
612 646 return indexformatv0_pack(*e2)
613 647
614 648 # index ng:
615 649 # 6 bytes: offset
616 650 # 2 bytes: flags
617 651 # 4 bytes: compressed length
618 652 # 4 bytes: uncompressed length
619 653 # 4 bytes: base rev
620 654 # 4 bytes: link rev
621 655 # 4 bytes: parent 1 rev
622 656 # 4 bytes: parent 2 rev
623 657 # 32 bytes: nodeid
624 658 indexformatng = struct.Struct(">Qiiiiii20s12x")
625 659 indexformatng_pack = indexformatng.pack
626 660 versionformat = struct.Struct(">I")
627 661 versionformat_pack = versionformat.pack
628 662 versionformat_unpack = versionformat.unpack
629 663
630 664 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
631 665 # signed integer)
632 666 _maxentrysize = 0x7fffffff
633 667
634 668 class revlogio(object):
635 669 def __init__(self):
636 670 self.size = indexformatng.size
637 671
638 672 def parseindex(self, data, inline):
639 673 # call the C implementation to parse the index data
640 674 index, cache = parsers.parse_index2(data, inline)
641 675 return index, getattr(index, 'nodemap', None), cache
642 676
643 677 def packentry(self, entry, node, version, rev):
644 678 p = indexformatng_pack(*entry)
645 679 if rev == 0:
646 680 p = versionformat_pack(version) + p[4:]
647 681 return p
648 682
649 683 class revlog(object):
650 684 """
651 685 the underlying revision storage object
652 686
653 687 A revlog consists of two parts, an index and the revision data.
654 688
655 689 The index is a file with a fixed record size containing
656 690 information on each revision, including its nodeid (hash), the
657 691 nodeids of its parents, the position and offset of its data within
658 692 the data file, and the revision it's based on. Finally, each entry
659 693 contains a linkrev entry that can serve as a pointer to external
660 694 data.
661 695
662 696 The revision data itself is a linear collection of data chunks.
663 697 Each chunk represents a revision and is usually represented as a
664 698 delta against the previous chunk. To bound lookup time, runs of
665 699 deltas are limited to about 2 times the length of the original
666 700 version data. This makes retrieval of a version proportional to
667 701 its size, or O(1) relative to the number of revisions.
668 702
669 703 Both pieces of the revlog are written to in an append-only
670 704 fashion, which means we never need to rewrite a file to insert or
671 705 remove data, and can use some simple techniques to avoid the need
672 706 for locking while reading.
673 707
674 708 If checkambig, indexfile is opened with checkambig=True at
675 709 writing, to avoid file stat ambiguity.
676 710
677 711 If mmaplargeindex is True, and an mmapindexthreshold is set, the
678 712 index will be mmapped rather than read if it is larger than the
679 713 configured threshold.
680 714
681 715 If censorable is True, the revlog can have censored revisions.
682 716 """
683 717 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
684 718 mmaplargeindex=False, censorable=False):
685 719 """
686 720 create a revlog object
687 721
688 722 opener is a function that abstracts the file opening operation
689 723 and can be used to implement COW semantics or the like.
690 724 """
691 725 self.indexfile = indexfile
692 726 self.datafile = datafile or (indexfile[:-2] + ".d")
693 727 self.opener = opener
694 728 # When True, indexfile is opened with checkambig=True at writing, to
695 729 # avoid file stat ambiguity.
696 730 self._checkambig = checkambig
697 731 self._censorable = censorable
698 732 # 3-tuple of (node, rev, text) for a raw revision.
699 733 self._cache = None
700 734 # Maps rev to chain base rev.
701 735 self._chainbasecache = util.lrucachedict(100)
702 736 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
703 737 self._chunkcache = (0, '')
704 738 # How much data to read and cache into the raw revlog data cache.
705 739 self._chunkcachesize = 65536
706 740 self._maxchainlen = None
707 741 self._aggressivemergedeltas = True
708 742 self.index = []
709 743 # Mapping of partial identifiers to full nodes.
710 744 self._pcache = {}
711 745 # Mapping of revision integer to full node.
712 746 self._nodecache = {nullid: nullrev}
713 747 self._nodepos = None
714 748 self._compengine = 'zlib'
715 749 self._maxdeltachainspan = -1
716 750 self._withsparseread = False
717 751 self._srdensitythreshold = 0.50
718 752 self._srmingapsize = 262144
719 753
720 754 mmapindexthreshold = None
721 755 v = REVLOG_DEFAULT_VERSION
722 756 opts = getattr(opener, 'options', None)
723 757 if opts is not None:
724 758 if 'revlogv2' in opts:
725 759 # version 2 revlogs always use generaldelta.
726 760 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
727 761 elif 'revlogv1' in opts:
728 762 if 'generaldelta' in opts:
729 763 v |= FLAG_GENERALDELTA
730 764 else:
731 765 v = 0
732 766 if 'chunkcachesize' in opts:
733 767 self._chunkcachesize = opts['chunkcachesize']
734 768 if 'maxchainlen' in opts:
735 769 self._maxchainlen = opts['maxchainlen']
736 770 if 'aggressivemergedeltas' in opts:
737 771 self._aggressivemergedeltas = opts['aggressivemergedeltas']
738 772 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
739 773 if 'compengine' in opts:
740 774 self._compengine = opts['compengine']
741 775 if 'maxdeltachainspan' in opts:
742 776 self._maxdeltachainspan = opts['maxdeltachainspan']
743 777 if mmaplargeindex and 'mmapindexthreshold' in opts:
744 778 mmapindexthreshold = opts['mmapindexthreshold']
745 779 self._withsparseread = bool(opts.get('with-sparse-read', False))
746 780 if 'sparse-read-density-threshold' in opts:
747 781 self._srdensitythreshold = opts['sparse-read-density-threshold']
748 782 if 'sparse-read-min-gap-size' in opts:
749 783 self._srmingapsize = opts['sparse-read-min-gap-size']
750 784
751 785 if self._chunkcachesize <= 0:
752 786 raise RevlogError(_('revlog chunk cache size %r is not greater '
753 787 'than 0') % self._chunkcachesize)
754 788 elif self._chunkcachesize & (self._chunkcachesize - 1):
755 789 raise RevlogError(_('revlog chunk cache size %r is not a power '
756 790 'of 2') % self._chunkcachesize)
757 791
758 792 indexdata = ''
759 793 self._initempty = True
760 794 try:
761 795 with self._indexfp() as f:
762 796 if (mmapindexthreshold is not None and
763 797 self.opener.fstat(f).st_size >= mmapindexthreshold):
764 798 indexdata = util.buffer(util.mmapread(f))
765 799 else:
766 800 indexdata = f.read()
767 801 if len(indexdata) > 0:
768 802 v = versionformat_unpack(indexdata[:4])[0]
769 803 self._initempty = False
770 804 except IOError as inst:
771 805 if inst.errno != errno.ENOENT:
772 806 raise
773 807
774 808 self.version = v
775 809 self._inline = v & FLAG_INLINE_DATA
776 810 self._generaldelta = v & FLAG_GENERALDELTA
777 811 flags = v & ~0xFFFF
778 812 fmt = v & 0xFFFF
779 813 if fmt == REVLOGV0:
780 814 if flags:
781 815 raise RevlogError(_('unknown flags (%#04x) in version %d '
782 816 'revlog %s') %
783 817 (flags >> 16, fmt, self.indexfile))
784 818 elif fmt == REVLOGV1:
785 819 if flags & ~REVLOGV1_FLAGS:
786 820 raise RevlogError(_('unknown flags (%#04x) in version %d '
787 821 'revlog %s') %
788 822 (flags >> 16, fmt, self.indexfile))
789 823 elif fmt == REVLOGV2:
790 824 if flags & ~REVLOGV2_FLAGS:
791 825 raise RevlogError(_('unknown flags (%#04x) in version %d '
792 826 'revlog %s') %
793 827 (flags >> 16, fmt, self.indexfile))
794 828 else:
795 829 raise RevlogError(_('unknown version (%d) in revlog %s') %
796 830 (fmt, self.indexfile))
797 831
798 832 self.storedeltachains = True
799 833
800 834 self._io = revlogio()
801 835 if self.version == REVLOGV0:
802 836 self._io = revlogoldio()
803 837 try:
804 838 d = self._io.parseindex(indexdata, self._inline)
805 839 except (ValueError, IndexError):
806 840 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
807 841 self.index, nodemap, self._chunkcache = d
808 842 if nodemap is not None:
809 843 self.nodemap = self._nodecache = nodemap
810 844 if not self._chunkcache:
811 845 self._chunkclear()
812 846 # revnum -> (chain-length, sum-delta-length)
813 847 self._chaininfocache = {}
814 848 # revlog header -> revlog compressor
815 849 self._decompressors = {}
816 850
817 851 @util.propertycache
818 852 def _compressor(self):
819 853 return util.compengines[self._compengine].revlogcompressor()
820 854
821 855 def _indexfp(self, mode='r'):
822 856 """file object for the revlog's index file"""
823 857 args = {r'mode': mode}
824 858 if mode != 'r':
825 859 args[r'checkambig'] = self._checkambig
826 860 if mode == 'w':
827 861 args[r'atomictemp'] = True
828 862 return self.opener(self.indexfile, **args)
829 863
830 864 def _datafp(self, mode='r'):
831 865 """file object for the revlog's data file"""
832 866 return self.opener(self.datafile, mode=mode)
833 867
834 868 @contextlib.contextmanager
835 869 def _datareadfp(self, existingfp=None):
836 870 """file object suitable to read data"""
837 871 if existingfp is not None:
838 872 yield existingfp
839 873 else:
840 874 if self._inline:
841 875 func = self._indexfp
842 876 else:
843 877 func = self._datafp
844 878 with func() as fp:
845 879 yield fp
846 880
847 881 def tip(self):
848 882 return self.node(len(self.index) - 2)
849 883 def __contains__(self, rev):
850 884 return 0 <= rev < len(self)
851 885 def __len__(self):
852 886 return len(self.index) - 1
853 887 def __iter__(self):
854 888 return iter(xrange(len(self)))
855 889 def revs(self, start=0, stop=None):
856 890 """iterate over all rev in this revlog (from start to stop)"""
857 891 step = 1
858 892 if stop is not None:
859 893 if start > stop:
860 894 step = -1
861 895 stop += step
862 896 else:
863 897 stop = len(self)
864 898 return xrange(start, stop, step)
865 899
866 900 @util.propertycache
867 901 def nodemap(self):
868 902 self.rev(self.node(0))
869 903 return self._nodecache
870 904
871 905 def hasnode(self, node):
872 906 try:
873 907 self.rev(node)
874 908 return True
875 909 except KeyError:
876 910 return False
877 911
878 912 def candelta(self, baserev, rev):
879 913 """whether two revisions (baserev, rev) can be delta-ed or not"""
880 914 # Disable delta if either rev requires a content-changing flag
881 915 # processor (ex. LFS). This is because such flag processor can alter
882 916 # the rawtext content that the delta will be based on, and two clients
883 917 # could have a same revlog node with different flags (i.e. different
884 918 # rawtext contents) and the delta could be incompatible.
885 919 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
886 920 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
887 921 return False
888 922 return True
889 923
890 924 def clearcaches(self):
891 925 self._cache = None
892 926 self._chainbasecache.clear()
893 927 self._chunkcache = (0, '')
894 928 self._pcache = {}
895 929
896 930 try:
897 931 self._nodecache.clearcaches()
898 932 except AttributeError:
899 933 self._nodecache = {nullid: nullrev}
900 934 self._nodepos = None
901 935
902 936 def rev(self, node):
903 937 try:
904 938 return self._nodecache[node]
905 939 except TypeError:
906 940 raise
907 941 except RevlogError:
908 942 # parsers.c radix tree lookup failed
909 943 if node == wdirid or node in wdirfilenodeids:
910 944 raise error.WdirUnsupported
911 945 raise LookupError(node, self.indexfile, _('no node'))
912 946 except KeyError:
913 947 # pure python cache lookup failed
914 948 n = self._nodecache
915 949 i = self.index
916 950 p = self._nodepos
917 951 if p is None:
918 952 p = len(i) - 2
919 953 else:
920 954 assert p < len(i)
921 955 for r in xrange(p, -1, -1):
922 956 v = i[r][7]
923 957 n[v] = r
924 958 if v == node:
925 959 self._nodepos = r - 1
926 960 return r
927 961 if node == wdirid or node in wdirfilenodeids:
928 962 raise error.WdirUnsupported
929 963 raise LookupError(node, self.indexfile, _('no node'))
930 964
931 965 # Accessors for index entries.
932 966
933 967 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
934 968 # are flags.
935 969 def start(self, rev):
936 970 return int(self.index[rev][0] >> 16)
937 971
938 972 def flags(self, rev):
939 973 return self.index[rev][0] & 0xFFFF
940 974
941 975 def length(self, rev):
942 976 return self.index[rev][1]
943 977
944 978 def rawsize(self, rev):
945 979 """return the length of the uncompressed text for a given revision"""
946 980 l = self.index[rev][2]
947 981 if l >= 0:
948 982 return l
949 983
950 984 t = self.revision(rev, raw=True)
951 985 return len(t)
952 986
953 987 def size(self, rev):
954 988 """length of non-raw text (processed by a "read" flag processor)"""
955 989 # fast path: if no "read" flag processor could change the content,
956 990 # size is rawsize. note: ELLIPSIS is known to not change the content.
957 991 flags = self.flags(rev)
958 992 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
959 993 return self.rawsize(rev)
960 994
961 995 return len(self.revision(rev, raw=False))
962 996
963 997 def chainbase(self, rev):
964 998 base = self._chainbasecache.get(rev)
965 999 if base is not None:
966 1000 return base
967 1001
968 1002 index = self.index
969 1003 iterrev = rev
970 1004 base = index[iterrev][3]
971 1005 while base != iterrev:
972 1006 iterrev = base
973 1007 base = index[iterrev][3]
974 1008
975 1009 self._chainbasecache[rev] = base
976 1010 return base
977 1011
978 1012 def linkrev(self, rev):
979 1013 return self.index[rev][4]
980 1014
981 1015 def parentrevs(self, rev):
982 1016 try:
983 1017 entry = self.index[rev]
984 1018 except IndexError:
985 1019 if rev == wdirrev:
986 1020 raise error.WdirUnsupported
987 1021 raise
988 1022
989 1023 return entry[5], entry[6]
990 1024
991 1025 def node(self, rev):
992 1026 try:
993 1027 return self.index[rev][7]
994 1028 except IndexError:
995 1029 if rev == wdirrev:
996 1030 raise error.WdirUnsupported
997 1031 raise
998 1032
999 1033 # Derived from index values.
1000 1034
1001 1035 def end(self, rev):
1002 1036 return self.start(rev) + self.length(rev)
1003 1037
1004 1038 def parents(self, node):
1005 1039 i = self.index
1006 1040 d = i[self.rev(node)]
1007 1041 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
1008 1042
1009 1043 def chainlen(self, rev):
1010 1044 return self._chaininfo(rev)[0]
1011 1045
1012 1046 def _chaininfo(self, rev):
1013 1047 chaininfocache = self._chaininfocache
1014 1048 if rev in chaininfocache:
1015 1049 return chaininfocache[rev]
1016 1050 index = self.index
1017 1051 generaldelta = self._generaldelta
1018 1052 iterrev = rev
1019 1053 e = index[iterrev]
1020 1054 clen = 0
1021 1055 compresseddeltalen = 0
1022 1056 while iterrev != e[3]:
1023 1057 clen += 1
1024 1058 compresseddeltalen += e[1]
1025 1059 if generaldelta:
1026 1060 iterrev = e[3]
1027 1061 else:
1028 1062 iterrev -= 1
1029 1063 if iterrev in chaininfocache:
1030 1064 t = chaininfocache[iterrev]
1031 1065 clen += t[0]
1032 1066 compresseddeltalen += t[1]
1033 1067 break
1034 1068 e = index[iterrev]
1035 1069 else:
1036 1070 # Add text length of base since decompressing that also takes
1037 1071 # work. For cache hits the length is already included.
1038 1072 compresseddeltalen += e[1]
1039 1073 r = (clen, compresseddeltalen)
1040 1074 chaininfocache[rev] = r
1041 1075 return r
1042 1076
1043 1077 def _deltachain(self, rev, stoprev=None):
1044 1078 """Obtain the delta chain for a revision.
1045 1079
1046 1080 ``stoprev`` specifies a revision to stop at. If not specified, we
1047 1081 stop at the base of the chain.
1048 1082
1049 1083 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1050 1084 revs in ascending order and ``stopped`` is a bool indicating whether
1051 1085 ``stoprev`` was hit.
1052 1086 """
1053 1087 # Try C implementation.
1054 1088 try:
1055 1089 return self.index.deltachain(rev, stoprev, self._generaldelta)
1056 1090 except AttributeError:
1057 1091 pass
1058 1092
1059 1093 chain = []
1060 1094
1061 1095 # Alias to prevent attribute lookup in tight loop.
1062 1096 index = self.index
1063 1097 generaldelta = self._generaldelta
1064 1098
1065 1099 iterrev = rev
1066 1100 e = index[iterrev]
1067 1101 while iterrev != e[3] and iterrev != stoprev:
1068 1102 chain.append(iterrev)
1069 1103 if generaldelta:
1070 1104 iterrev = e[3]
1071 1105 else:
1072 1106 iterrev -= 1
1073 1107 e = index[iterrev]
1074 1108
1075 1109 if iterrev == stoprev:
1076 1110 stopped = True
1077 1111 else:
1078 1112 chain.append(iterrev)
1079 1113 stopped = False
1080 1114
1081 1115 chain.reverse()
1082 1116 return chain, stopped
1083 1117
1084 1118 def ancestors(self, revs, stoprev=0, inclusive=False):
1085 1119 """Generate the ancestors of 'revs' in reverse topological order.
1086 1120 Does not generate revs lower than stoprev.
1087 1121
1088 1122 See the documentation for ancestor.lazyancestors for more details."""
1089 1123
1090 1124 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1091 1125 inclusive=inclusive)
1092 1126
1093 1127 def descendants(self, revs):
1094 1128 """Generate the descendants of 'revs' in revision order.
1095 1129
1096 1130 Yield a sequence of revision numbers starting with a child of
1097 1131 some rev in revs, i.e., each revision is *not* considered a
1098 1132 descendant of itself. Results are ordered by revision number (a
1099 1133 topological sort)."""
1100 1134 first = min(revs)
1101 1135 if first == nullrev:
1102 1136 for i in self:
1103 1137 yield i
1104 1138 return
1105 1139
1106 1140 seen = set(revs)
1107 1141 for i in self.revs(start=first + 1):
1108 1142 for x in self.parentrevs(i):
1109 1143 if x != nullrev and x in seen:
1110 1144 seen.add(i)
1111 1145 yield i
1112 1146 break
1113 1147
1114 1148 def findcommonmissing(self, common=None, heads=None):
1115 1149 """Return a tuple of the ancestors of common and the ancestors of heads
1116 1150 that are not ancestors of common. In revset terminology, we return the
1117 1151 tuple:
1118 1152
1119 1153 ::common, (::heads) - (::common)
1120 1154
1121 1155 The list is sorted by revision number, meaning it is
1122 1156 topologically sorted.
1123 1157
1124 1158 'heads' and 'common' are both lists of node IDs. If heads is
1125 1159 not supplied, uses all of the revlog's heads. If common is not
1126 1160 supplied, uses nullid."""
1127 1161 if common is None:
1128 1162 common = [nullid]
1129 1163 if heads is None:
1130 1164 heads = self.heads()
1131 1165
1132 1166 common = [self.rev(n) for n in common]
1133 1167 heads = [self.rev(n) for n in heads]
1134 1168
1135 1169 # we want the ancestors, but inclusive
1136 1170 class lazyset(object):
1137 1171 def __init__(self, lazyvalues):
1138 1172 self.addedvalues = set()
1139 1173 self.lazyvalues = lazyvalues
1140 1174
1141 1175 def __contains__(self, value):
1142 1176 return value in self.addedvalues or value in self.lazyvalues
1143 1177
1144 1178 def __iter__(self):
1145 1179 added = self.addedvalues
1146 1180 for r in added:
1147 1181 yield r
1148 1182 for r in self.lazyvalues:
1149 1183 if not r in added:
1150 1184 yield r
1151 1185
1152 1186 def add(self, value):
1153 1187 self.addedvalues.add(value)
1154 1188
1155 1189 def update(self, values):
1156 1190 self.addedvalues.update(values)
1157 1191
1158 1192 has = lazyset(self.ancestors(common))
1159 1193 has.add(nullrev)
1160 1194 has.update(common)
1161 1195
1162 1196 # take all ancestors from heads that aren't in has
1163 1197 missing = set()
1164 1198 visit = collections.deque(r for r in heads if r not in has)
1165 1199 while visit:
1166 1200 r = visit.popleft()
1167 1201 if r in missing:
1168 1202 continue
1169 1203 else:
1170 1204 missing.add(r)
1171 1205 for p in self.parentrevs(r):
1172 1206 if p not in has:
1173 1207 visit.append(p)
1174 1208 missing = list(missing)
1175 1209 missing.sort()
1176 1210 return has, [self.node(miss) for miss in missing]
1177 1211
1178 1212 def incrementalmissingrevs(self, common=None):
1179 1213 """Return an object that can be used to incrementally compute the
1180 1214 revision numbers of the ancestors of arbitrary sets that are not
1181 1215 ancestors of common. This is an ancestor.incrementalmissingancestors
1182 1216 object.
1183 1217
1184 1218 'common' is a list of revision numbers. If common is not supplied, uses
1185 1219 nullrev.
1186 1220 """
1187 1221 if common is None:
1188 1222 common = [nullrev]
1189 1223
1190 1224 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1191 1225
1192 1226 def findmissingrevs(self, common=None, heads=None):
1193 1227 """Return the revision numbers of the ancestors of heads that
1194 1228 are not ancestors of common.
1195 1229
1196 1230 More specifically, return a list of revision numbers corresponding to
1197 1231 nodes N such that every N satisfies the following constraints:
1198 1232
1199 1233 1. N is an ancestor of some node in 'heads'
1200 1234 2. N is not an ancestor of any node in 'common'
1201 1235
1202 1236 The list is sorted by revision number, meaning it is
1203 1237 topologically sorted.
1204 1238
1205 1239 'heads' and 'common' are both lists of revision numbers. If heads is
1206 1240 not supplied, uses all of the revlog's heads. If common is not
1207 1241 supplied, uses nullid."""
1208 1242 if common is None:
1209 1243 common = [nullrev]
1210 1244 if heads is None:
1211 1245 heads = self.headrevs()
1212 1246
1213 1247 inc = self.incrementalmissingrevs(common=common)
1214 1248 return inc.missingancestors(heads)
1215 1249
1216 1250 def findmissing(self, common=None, heads=None):
1217 1251 """Return the ancestors of heads that are not ancestors of common.
1218 1252
1219 1253 More specifically, return a list of nodes N such that every N
1220 1254 satisfies the following constraints:
1221 1255
1222 1256 1. N is an ancestor of some node in 'heads'
1223 1257 2. N is not an ancestor of any node in 'common'
1224 1258
1225 1259 The list is sorted by revision number, meaning it is
1226 1260 topologically sorted.
1227 1261
1228 1262 'heads' and 'common' are both lists of node IDs. If heads is
1229 1263 not supplied, uses all of the revlog's heads. If common is not
1230 1264 supplied, uses nullid."""
1231 1265 if common is None:
1232 1266 common = [nullid]
1233 1267 if heads is None:
1234 1268 heads = self.heads()
1235 1269
1236 1270 common = [self.rev(n) for n in common]
1237 1271 heads = [self.rev(n) for n in heads]
1238 1272
1239 1273 inc = self.incrementalmissingrevs(common=common)
1240 1274 return [self.node(r) for r in inc.missingancestors(heads)]
1241 1275
1242 1276 def nodesbetween(self, roots=None, heads=None):
1243 1277 """Return a topological path from 'roots' to 'heads'.
1244 1278
1245 1279 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1246 1280 topologically sorted list of all nodes N that satisfy both of
1247 1281 these constraints:
1248 1282
1249 1283 1. N is a descendant of some node in 'roots'
1250 1284 2. N is an ancestor of some node in 'heads'
1251 1285
1252 1286 Every node is considered to be both a descendant and an ancestor
1253 1287 of itself, so every reachable node in 'roots' and 'heads' will be
1254 1288 included in 'nodes'.
1255 1289
1256 1290 'outroots' is the list of reachable nodes in 'roots', i.e., the
1257 1291 subset of 'roots' that is returned in 'nodes'. Likewise,
1258 1292 'outheads' is the subset of 'heads' that is also in 'nodes'.
1259 1293
1260 1294 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1261 1295 unspecified, uses nullid as the only root. If 'heads' is
1262 1296 unspecified, uses list of all of the revlog's heads."""
1263 1297 nonodes = ([], [], [])
1264 1298 if roots is not None:
1265 1299 roots = list(roots)
1266 1300 if not roots:
1267 1301 return nonodes
1268 1302 lowestrev = min([self.rev(n) for n in roots])
1269 1303 else:
1270 1304 roots = [nullid] # Everybody's a descendant of nullid
1271 1305 lowestrev = nullrev
1272 1306 if (lowestrev == nullrev) and (heads is None):
1273 1307 # We want _all_ the nodes!
1274 1308 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1275 1309 if heads is None:
1276 1310 # All nodes are ancestors, so the latest ancestor is the last
1277 1311 # node.
1278 1312 highestrev = len(self) - 1
1279 1313 # Set ancestors to None to signal that every node is an ancestor.
1280 1314 ancestors = None
1281 1315 # Set heads to an empty dictionary for later discovery of heads
1282 1316 heads = {}
1283 1317 else:
1284 1318 heads = list(heads)
1285 1319 if not heads:
1286 1320 return nonodes
1287 1321 ancestors = set()
1288 1322 # Turn heads into a dictionary so we can remove 'fake' heads.
1289 1323 # Also, later we will be using it to filter out the heads we can't
1290 1324 # find from roots.
1291 1325 heads = dict.fromkeys(heads, False)
1292 1326 # Start at the top and keep marking parents until we're done.
1293 1327 nodestotag = set(heads)
1294 1328 # Remember where the top was so we can use it as a limit later.
1295 1329 highestrev = max([self.rev(n) for n in nodestotag])
1296 1330 while nodestotag:
1297 1331 # grab a node to tag
1298 1332 n = nodestotag.pop()
1299 1333 # Never tag nullid
1300 1334 if n == nullid:
1301 1335 continue
1302 1336 # A node's revision number represents its place in a
1303 1337 # topologically sorted list of nodes.
1304 1338 r = self.rev(n)
1305 1339 if r >= lowestrev:
1306 1340 if n not in ancestors:
1307 1341 # If we are possibly a descendant of one of the roots
1308 1342 # and we haven't already been marked as an ancestor
1309 1343 ancestors.add(n) # Mark as ancestor
1310 1344 # Add non-nullid parents to list of nodes to tag.
1311 1345 nodestotag.update([p for p in self.parents(n) if
1312 1346 p != nullid])
1313 1347 elif n in heads: # We've seen it before, is it a fake head?
1314 1348 # So it is, real heads should not be the ancestors of
1315 1349 # any other heads.
1316 1350 heads.pop(n)
1317 1351 if not ancestors:
1318 1352 return nonodes
1319 1353 # Now that we have our set of ancestors, we want to remove any
1320 1354 # roots that are not ancestors.
1321 1355
1322 1356 # If one of the roots was nullid, everything is included anyway.
1323 1357 if lowestrev > nullrev:
1324 1358 # But, since we weren't, let's recompute the lowest rev to not
1325 1359 # include roots that aren't ancestors.
1326 1360
1327 1361 # Filter out roots that aren't ancestors of heads
1328 1362 roots = [root for root in roots if root in ancestors]
1329 1363 # Recompute the lowest revision
1330 1364 if roots:
1331 1365 lowestrev = min([self.rev(root) for root in roots])
1332 1366 else:
1333 1367 # No more roots? Return empty list
1334 1368 return nonodes
1335 1369 else:
1336 1370 # We are descending from nullid, and don't need to care about
1337 1371 # any other roots.
1338 1372 lowestrev = nullrev
1339 1373 roots = [nullid]
1340 1374 # Transform our roots list into a set.
1341 1375 descendants = set(roots)
1342 1376 # Also, keep the original roots so we can filter out roots that aren't
1343 1377 # 'real' roots (i.e. are descended from other roots).
1344 1378 roots = descendants.copy()
1345 1379 # Our topologically sorted list of output nodes.
1346 1380 orderedout = []
1347 1381 # Don't start at nullid since we don't want nullid in our output list,
1348 1382 # and if nullid shows up in descendants, empty parents will look like
1349 1383 # they're descendants.
1350 1384 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1351 1385 n = self.node(r)
1352 1386 isdescendant = False
1353 1387 if lowestrev == nullrev: # Everybody is a descendant of nullid
1354 1388 isdescendant = True
1355 1389 elif n in descendants:
1356 1390 # n is already a descendant
1357 1391 isdescendant = True
1358 1392 # This check only needs to be done here because all the roots
1359 1393 # will start being marked is descendants before the loop.
1360 1394 if n in roots:
1361 1395 # If n was a root, check if it's a 'real' root.
1362 1396 p = tuple(self.parents(n))
1363 1397 # If any of its parents are descendants, it's not a root.
1364 1398 if (p[0] in descendants) or (p[1] in descendants):
1365 1399 roots.remove(n)
1366 1400 else:
1367 1401 p = tuple(self.parents(n))
1368 1402 # A node is a descendant if either of its parents are
1369 1403 # descendants. (We seeded the dependents list with the roots
1370 1404 # up there, remember?)
1371 1405 if (p[0] in descendants) or (p[1] in descendants):
1372 1406 descendants.add(n)
1373 1407 isdescendant = True
1374 1408 if isdescendant and ((ancestors is None) or (n in ancestors)):
1375 1409 # Only include nodes that are both descendants and ancestors.
1376 1410 orderedout.append(n)
1377 1411 if (ancestors is not None) and (n in heads):
1378 1412 # We're trying to figure out which heads are reachable
1379 1413 # from roots.
1380 1414 # Mark this head as having been reached
1381 1415 heads[n] = True
1382 1416 elif ancestors is None:
1383 1417 # Otherwise, we're trying to discover the heads.
1384 1418 # Assume this is a head because if it isn't, the next step
1385 1419 # will eventually remove it.
1386 1420 heads[n] = True
1387 1421 # But, obviously its parents aren't.
1388 1422 for p in self.parents(n):
1389 1423 heads.pop(p, None)
1390 1424 heads = [head for head, flag in heads.iteritems() if flag]
1391 1425 roots = list(roots)
1392 1426 assert orderedout
1393 1427 assert roots
1394 1428 assert heads
1395 1429 return (orderedout, roots, heads)
1396 1430
1397 1431 def headrevs(self):
1398 1432 try:
1399 1433 return self.index.headrevs()
1400 1434 except AttributeError:
1401 1435 return self._headrevs()
1402 1436
1403 1437 def computephases(self, roots):
1404 1438 return self.index.computephasesmapsets(roots)
1405 1439
1406 1440 def _headrevs(self):
1407 1441 count = len(self)
1408 1442 if not count:
1409 1443 return [nullrev]
1410 1444 # we won't iter over filtered rev so nobody is a head at start
1411 1445 ishead = [0] * (count + 1)
1412 1446 index = self.index
1413 1447 for r in self:
1414 1448 ishead[r] = 1 # I may be an head
1415 1449 e = index[r]
1416 1450 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1417 1451 return [r for r, val in enumerate(ishead) if val]
1418 1452
1419 1453 def heads(self, start=None, stop=None):
1420 1454 """return the list of all nodes that have no children
1421 1455
1422 1456 if start is specified, only heads that are descendants of
1423 1457 start will be returned
1424 1458 if stop is specified, it will consider all the revs from stop
1425 1459 as if they had no children
1426 1460 """
1427 1461 if start is None and stop is None:
1428 1462 if not len(self):
1429 1463 return [nullid]
1430 1464 return [self.node(r) for r in self.headrevs()]
1431 1465
1432 1466 if start is None:
1433 1467 start = nullid
1434 1468 if stop is None:
1435 1469 stop = []
1436 1470 stoprevs = set([self.rev(n) for n in stop])
1437 1471 startrev = self.rev(start)
1438 1472 reachable = {startrev}
1439 1473 heads = {startrev}
1440 1474
1441 1475 parentrevs = self.parentrevs
1442 1476 for r in self.revs(start=startrev + 1):
1443 1477 for p in parentrevs(r):
1444 1478 if p in reachable:
1445 1479 if r not in stoprevs:
1446 1480 reachable.add(r)
1447 1481 heads.add(r)
1448 1482 if p in heads and p not in stoprevs:
1449 1483 heads.remove(p)
1450 1484
1451 1485 return [self.node(r) for r in heads]
1452 1486
1453 1487 def children(self, node):
1454 1488 """find the children of a given node"""
1455 1489 c = []
1456 1490 p = self.rev(node)
1457 1491 for r in self.revs(start=p + 1):
1458 1492 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1459 1493 if prevs:
1460 1494 for pr in prevs:
1461 1495 if pr == p:
1462 1496 c.append(self.node(r))
1463 1497 elif p == nullrev:
1464 1498 c.append(self.node(r))
1465 1499 return c
1466 1500
1467 1501 def descendant(self, start, end):
1468 1502 """True if revision 'end' is an descendant of revision 'start'
1469 1503
1470 1504 A revision is considered as a descendant of itself."""
1471 1505 if start == nullrev:
1472 1506 return True
1473 1507 elif start == end:
1474 1508 return True
1475 1509 return start in self._commonancestorsheads(start, end)
1476 1510
1477 1511 def commonancestorsheads(self, a, b):
1478 1512 """calculate all the heads of the common ancestors of nodes a and b"""
1479 1513 a, b = self.rev(a), self.rev(b)
1480 1514 ancs = self._commonancestorsheads(a, b)
1481 1515 return pycompat.maplist(self.node, ancs)
1482 1516
1483 1517 def _commonancestorsheads(self, *revs):
1484 1518 """calculate all the heads of the common ancestors of revs"""
1485 1519 try:
1486 1520 ancs = self.index.commonancestorsheads(*revs)
1487 1521 except (AttributeError, OverflowError): # C implementation failed
1488 1522 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1489 1523 return ancs
1490 1524
1491 1525 def isancestor(self, a, b):
1492 1526 """return True if node a is an ancestor of node b
1493 1527
1494 1528 The implementation of this is trivial but the use of
1495 1529 commonancestorsheads is not."""
1496 1530 a, b = self.rev(a), self.rev(b)
1497 1531 return self.descendant(a, b)
1498 1532
1499 1533 def ancestor(self, a, b):
1500 1534 """calculate the "best" common ancestor of nodes a and b"""
1501 1535
1502 1536 a, b = self.rev(a), self.rev(b)
1503 1537 try:
1504 1538 ancs = self.index.ancestors(a, b)
1505 1539 except (AttributeError, OverflowError):
1506 1540 ancs = ancestor.ancestors(self.parentrevs, a, b)
1507 1541 if ancs:
1508 1542 # choose a consistent winner when there's a tie
1509 1543 return min(map(self.node, ancs))
1510 1544 return nullid
1511 1545
1512 1546 def _match(self, id):
1513 1547 if isinstance(id, int):
1514 1548 # rev
1515 1549 return self.node(id)
1516 1550 if len(id) == 20:
1517 1551 # possibly a binary node
1518 1552 # odds of a binary node being all hex in ASCII are 1 in 10**25
1519 1553 try:
1520 1554 node = id
1521 1555 self.rev(node) # quick search the index
1522 1556 return node
1523 1557 except LookupError:
1524 1558 pass # may be partial hex id
1525 1559 try:
1526 1560 # str(rev)
1527 1561 rev = int(id)
1528 1562 if "%d" % rev != id:
1529 1563 raise ValueError
1530 1564 if rev < 0:
1531 1565 rev = len(self) + rev
1532 1566 if rev < 0 or rev >= len(self):
1533 1567 raise ValueError
1534 1568 return self.node(rev)
1535 1569 except (ValueError, OverflowError):
1536 1570 pass
1537 1571 if len(id) == 40:
1538 1572 try:
1539 1573 # a full hex nodeid?
1540 1574 node = bin(id)
1541 1575 self.rev(node)
1542 1576 return node
1543 1577 except (TypeError, LookupError):
1544 1578 pass
1545 1579
1546 1580 def _partialmatch(self, id):
1547 1581 # we don't care wdirfilenodeids as they should be always full hash
1548 1582 maybewdir = wdirhex.startswith(id)
1549 1583 try:
1550 1584 partial = self.index.partialmatch(id)
1551 1585 if partial and self.hasnode(partial):
1552 1586 if maybewdir:
1553 1587 # single 'ff...' match in radix tree, ambiguous with wdir
1554 1588 raise RevlogError
1555 1589 return partial
1556 1590 if maybewdir:
1557 1591 # no 'ff...' match in radix tree, wdir identified
1558 1592 raise error.WdirUnsupported
1559 1593 return None
1560 1594 except RevlogError:
1561 1595 # parsers.c radix tree lookup gave multiple matches
1562 1596 # fast path: for unfiltered changelog, radix tree is accurate
1563 1597 if not getattr(self, 'filteredrevs', None):
1564 1598 raise LookupError(id, self.indexfile,
1565 1599 _('ambiguous identifier'))
1566 1600 # fall through to slow path that filters hidden revisions
1567 1601 except (AttributeError, ValueError):
1568 1602 # we are pure python, or key was too short to search radix tree
1569 1603 pass
1570 1604
1571 1605 if id in self._pcache:
1572 1606 return self._pcache[id]
1573 1607
1574 1608 if len(id) <= 40:
1575 1609 try:
1576 1610 # hex(node)[:...]
1577 1611 l = len(id) // 2 # grab an even number of digits
1578 1612 prefix = bin(id[:l * 2])
1579 1613 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1580 1614 nl = [n for n in nl if hex(n).startswith(id) and
1581 1615 self.hasnode(n)]
1582 1616 if len(nl) > 0:
1583 1617 if len(nl) == 1 and not maybewdir:
1584 1618 self._pcache[id] = nl[0]
1585 1619 return nl[0]
1586 1620 raise LookupError(id, self.indexfile,
1587 1621 _('ambiguous identifier'))
1588 1622 if maybewdir:
1589 1623 raise error.WdirUnsupported
1590 1624 return None
1591 1625 except TypeError:
1592 1626 pass
1593 1627
1594 1628 def lookup(self, id):
1595 1629 """locate a node based on:
1596 1630 - revision number or str(revision number)
1597 1631 - nodeid or subset of hex nodeid
1598 1632 """
1599 1633 n = self._match(id)
1600 1634 if n is not None:
1601 1635 return n
1602 1636 n = self._partialmatch(id)
1603 1637 if n:
1604 1638 return n
1605 1639
1606 1640 raise LookupError(id, self.indexfile, _('no match found'))
1607 1641
1608 1642 def shortest(self, node, minlength=1):
1609 1643 """Find the shortest unambiguous prefix that matches node."""
1610 1644 def isvalid(prefix):
1611 1645 try:
1612 1646 node = self._partialmatch(prefix)
1613 1647 except error.RevlogError:
1614 1648 return False
1615 1649 except error.WdirUnsupported:
1616 1650 # single 'ff...' match
1617 1651 return True
1618 1652 if node is None:
1619 1653 raise LookupError(node, self.indexfile, _('no node'))
1620 1654 return True
1621 1655
1622 1656 def maybewdir(prefix):
1623 1657 return all(c == 'f' for c in prefix)
1624 1658
1625 1659 hexnode = hex(node)
1626 1660
1627 1661 def disambiguate(hexnode, minlength):
1628 1662 """Disambiguate against wdirid."""
1629 1663 for length in range(minlength, 41):
1630 1664 prefix = hexnode[:length]
1631 1665 if not maybewdir(prefix):
1632 1666 return prefix
1633 1667
1634 1668 if not getattr(self, 'filteredrevs', None):
1635 1669 try:
1636 1670 length = max(self.index.shortest(node), minlength)
1637 1671 return disambiguate(hexnode, length)
1638 1672 except RevlogError:
1639 1673 if node != wdirid:
1640 1674 raise LookupError(node, self.indexfile, _('no node'))
1641 1675 except AttributeError:
1642 1676 # Fall through to pure code
1643 1677 pass
1644 1678
1645 1679 if node == wdirid:
1646 1680 for length in range(minlength, 41):
1647 1681 prefix = hexnode[:length]
1648 1682 if isvalid(prefix):
1649 1683 return prefix
1650 1684
1651 1685 for length in range(minlength, 41):
1652 1686 prefix = hexnode[:length]
1653 1687 if isvalid(prefix):
1654 1688 return disambiguate(hexnode, length)
1655 1689
1656 1690 def cmp(self, node, text):
1657 1691 """compare text with a given file revision
1658 1692
1659 1693 returns True if text is different than what is stored.
1660 1694 """
1661 1695 p1, p2 = self.parents(node)
1662 1696 return hash(text, p1, p2) != node
1663 1697
1664 1698 def _cachesegment(self, offset, data):
1665 1699 """Add a segment to the revlog cache.
1666 1700
1667 1701 Accepts an absolute offset and the data that is at that location.
1668 1702 """
1669 1703 o, d = self._chunkcache
1670 1704 # try to add to existing cache
1671 1705 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1672 1706 self._chunkcache = o, d + data
1673 1707 else:
1674 1708 self._chunkcache = offset, data
1675 1709
1676 1710 def _readsegment(self, offset, length, df=None):
1677 1711 """Load a segment of raw data from the revlog.
1678 1712
1679 1713 Accepts an absolute offset, length to read, and an optional existing
1680 1714 file handle to read from.
1681 1715
1682 1716 If an existing file handle is passed, it will be seeked and the
1683 1717 original seek position will NOT be restored.
1684 1718
1685 1719 Returns a str or buffer of raw byte data.
1686 1720 """
1687 1721 # Cache data both forward and backward around the requested
1688 1722 # data, in a fixed size window. This helps speed up operations
1689 1723 # involving reading the revlog backwards.
1690 1724 cachesize = self._chunkcachesize
1691 1725 realoffset = offset & ~(cachesize - 1)
1692 1726 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1693 1727 - realoffset)
1694 1728 with self._datareadfp(df) as df:
1695 1729 df.seek(realoffset)
1696 1730 d = df.read(reallength)
1697 1731 self._cachesegment(realoffset, d)
1698 1732 if offset != realoffset or reallength != length:
1699 1733 return util.buffer(d, offset - realoffset, length)
1700 1734 return d
1701 1735
1702 1736 def _getsegment(self, offset, length, df=None):
1703 1737 """Obtain a segment of raw data from the revlog.
1704 1738
1705 1739 Accepts an absolute offset, length of bytes to obtain, and an
1706 1740 optional file handle to the already-opened revlog. If the file
1707 1741 handle is used, it's original seek position will not be preserved.
1708 1742
1709 1743 Requests for data may be returned from a cache.
1710 1744
1711 1745 Returns a str or a buffer instance of raw byte data.
1712 1746 """
1713 1747 o, d = self._chunkcache
1714 1748 l = len(d)
1715 1749
1716 1750 # is it in the cache?
1717 1751 cachestart = offset - o
1718 1752 cacheend = cachestart + length
1719 1753 if cachestart >= 0 and cacheend <= l:
1720 1754 if cachestart == 0 and cacheend == l:
1721 1755 return d # avoid a copy
1722 1756 return util.buffer(d, cachestart, cacheend - cachestart)
1723 1757
1724 1758 return self._readsegment(offset, length, df=df)
1725 1759
1726 1760 def _getsegmentforrevs(self, startrev, endrev, df=None):
1727 1761 """Obtain a segment of raw data corresponding to a range of revisions.
1728 1762
1729 1763 Accepts the start and end revisions and an optional already-open
1730 1764 file handle to be used for reading. If the file handle is read, its
1731 1765 seek position will not be preserved.
1732 1766
1733 1767 Requests for data may be satisfied by a cache.
1734 1768
1735 1769 Returns a 2-tuple of (offset, data) for the requested range of
1736 1770 revisions. Offset is the integer offset from the beginning of the
1737 1771 revlog and data is a str or buffer of the raw byte data.
1738 1772
1739 1773 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1740 1774 to determine where each revision's data begins and ends.
1741 1775 """
1742 1776 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1743 1777 # (functions are expensive).
1744 1778 index = self.index
1745 1779 istart = index[startrev]
1746 1780 start = int(istart[0] >> 16)
1747 1781 if startrev == endrev:
1748 1782 end = start + istart[1]
1749 1783 else:
1750 1784 iend = index[endrev]
1751 1785 end = int(iend[0] >> 16) + iend[1]
1752 1786
1753 1787 if self._inline:
1754 1788 start += (startrev + 1) * self._io.size
1755 1789 end += (endrev + 1) * self._io.size
1756 1790 length = end - start
1757 1791
1758 1792 return start, self._getsegment(start, length, df=df)
1759 1793
1760 1794 def _chunk(self, rev, df=None):
1761 1795 """Obtain a single decompressed chunk for a revision.
1762 1796
1763 1797 Accepts an integer revision and an optional already-open file handle
1764 1798 to be used for reading. If used, the seek position of the file will not
1765 1799 be preserved.
1766 1800
1767 1801 Returns a str holding uncompressed data for the requested revision.
1768 1802 """
1769 1803 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1770 1804
1771 1805 def _chunks(self, revs, df=None):
1772 1806 """Obtain decompressed chunks for the specified revisions.
1773 1807
1774 1808 Accepts an iterable of numeric revisions that are assumed to be in
1775 1809 ascending order. Also accepts an optional already-open file handle
1776 1810 to be used for reading. If used, the seek position of the file will
1777 1811 not be preserved.
1778 1812
1779 1813 This function is similar to calling ``self._chunk()`` multiple times,
1780 1814 but is faster.
1781 1815
1782 1816 Returns a list with decompressed data for each requested revision.
1783 1817 """
1784 1818 if not revs:
1785 1819 return []
1786 1820 start = self.start
1787 1821 length = self.length
1788 1822 inline = self._inline
1789 1823 iosize = self._io.size
1790 1824 buffer = util.buffer
1791 1825
1792 1826 l = []
1793 1827 ladd = l.append
1794 1828
1795 1829 if not self._withsparseread:
1796 1830 slicedchunks = (revs,)
1797 1831 else:
1798 1832 slicedchunks = _slicechunk(self, revs)
1799 1833
1800 1834 for revschunk in slicedchunks:
1801 1835 firstrev = revschunk[0]
1802 1836 # Skip trailing revisions with empty diff
1803 1837 for lastrev in revschunk[::-1]:
1804 1838 if length(lastrev) != 0:
1805 1839 break
1806 1840
1807 1841 try:
1808 1842 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1809 1843 except OverflowError:
1810 1844 # issue4215 - we can't cache a run of chunks greater than
1811 1845 # 2G on Windows
1812 1846 return [self._chunk(rev, df=df) for rev in revschunk]
1813 1847
1814 1848 decomp = self.decompress
1815 1849 for rev in revschunk:
1816 1850 chunkstart = start(rev)
1817 1851 if inline:
1818 1852 chunkstart += (rev + 1) * iosize
1819 1853 chunklength = length(rev)
1820 1854 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1821 1855
1822 1856 return l
1823 1857
1824 1858 def _chunkclear(self):
1825 1859 """Clear the raw chunk cache."""
1826 1860 self._chunkcache = (0, '')
1827 1861
1828 1862 def deltaparent(self, rev):
1829 1863 """return deltaparent of the given revision"""
1830 1864 base = self.index[rev][3]
1831 1865 if base == rev:
1832 1866 return nullrev
1833 1867 elif self._generaldelta:
1834 1868 return base
1835 1869 else:
1836 1870 return rev - 1
1837 1871
1838 1872 def revdiff(self, rev1, rev2):
1839 1873 """return or calculate a delta between two revisions
1840 1874
1841 1875 The delta calculated is in binary form and is intended to be written to
1842 1876 revlog data directly. So this function needs raw revision data.
1843 1877 """
1844 1878 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1845 1879 return bytes(self._chunk(rev2))
1846 1880
1847 1881 return mdiff.textdiff(self.revision(rev1, raw=True),
1848 1882 self.revision(rev2, raw=True))
1849 1883
1850 1884 def revision(self, nodeorrev, _df=None, raw=False):
1851 1885 """return an uncompressed revision of a given node or revision
1852 1886 number.
1853 1887
1854 1888 _df - an existing file handle to read from. (internal-only)
1855 1889 raw - an optional argument specifying if the revision data is to be
1856 1890 treated as raw data when applying flag transforms. 'raw' should be set
1857 1891 to True when generating changegroups or in debug commands.
1858 1892 """
1859 1893 if isinstance(nodeorrev, int):
1860 1894 rev = nodeorrev
1861 1895 node = self.node(rev)
1862 1896 else:
1863 1897 node = nodeorrev
1864 1898 rev = None
1865 1899
1866 1900 cachedrev = None
1867 1901 flags = None
1868 1902 rawtext = None
1869 1903 if node == nullid:
1870 1904 return ""
1871 1905 if self._cache:
1872 1906 if self._cache[0] == node:
1873 1907 # _cache only stores rawtext
1874 1908 if raw:
1875 1909 return self._cache[2]
1876 1910 # duplicated, but good for perf
1877 1911 if rev is None:
1878 1912 rev = self.rev(node)
1879 1913 if flags is None:
1880 1914 flags = self.flags(rev)
1881 1915 # no extra flags set, no flag processor runs, text = rawtext
1882 1916 if flags == REVIDX_DEFAULT_FLAGS:
1883 1917 return self._cache[2]
1884 1918 # rawtext is reusable. need to run flag processor
1885 1919 rawtext = self._cache[2]
1886 1920
1887 1921 cachedrev = self._cache[1]
1888 1922
1889 1923 # look up what we need to read
1890 1924 if rawtext is None:
1891 1925 if rev is None:
1892 1926 rev = self.rev(node)
1893 1927
1894 1928 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1895 1929 if stopped:
1896 1930 rawtext = self._cache[2]
1897 1931
1898 1932 # drop cache to save memory
1899 1933 self._cache = None
1900 1934
1901 1935 bins = self._chunks(chain, df=_df)
1902 1936 if rawtext is None:
1903 1937 rawtext = bytes(bins[0])
1904 1938 bins = bins[1:]
1905 1939
1906 1940 rawtext = mdiff.patches(rawtext, bins)
1907 1941 self._cache = (node, rev, rawtext)
1908 1942
1909 1943 if flags is None:
1910 1944 if rev is None:
1911 1945 rev = self.rev(node)
1912 1946 flags = self.flags(rev)
1913 1947
1914 1948 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1915 1949 if validatehash:
1916 1950 self.checkhash(text, node, rev=rev)
1917 1951
1918 1952 return text
1919 1953
1920 1954 def hash(self, text, p1, p2):
1921 1955 """Compute a node hash.
1922 1956
1923 1957 Available as a function so that subclasses can replace the hash
1924 1958 as needed.
1925 1959 """
1926 1960 return hash(text, p1, p2)
1927 1961
1928 1962 def _processflags(self, text, flags, operation, raw=False):
1929 1963 """Inspect revision data flags and applies transforms defined by
1930 1964 registered flag processors.
1931 1965
1932 1966 ``text`` - the revision data to process
1933 1967 ``flags`` - the revision flags
1934 1968 ``operation`` - the operation being performed (read or write)
1935 1969 ``raw`` - an optional argument describing if the raw transform should be
1936 1970 applied.
1937 1971
1938 1972 This method processes the flags in the order (or reverse order if
1939 1973 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1940 1974 flag processors registered for present flags. The order of flags defined
1941 1975 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1942 1976
1943 1977 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1944 1978 processed text and ``validatehash`` is a bool indicating whether the
1945 1979 returned text should be checked for hash integrity.
1946 1980
1947 1981 Note: If the ``raw`` argument is set, it has precedence over the
1948 1982 operation and will only update the value of ``validatehash``.
1949 1983 """
1950 1984 # fast path: no flag processors will run
1951 1985 if flags == 0:
1952 1986 return text, True
1953 1987 if not operation in ('read', 'write'):
1954 1988 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1955 1989 # Check all flags are known.
1956 1990 if flags & ~REVIDX_KNOWN_FLAGS:
1957 1991 raise RevlogError(_("incompatible revision flag '%#x'") %
1958 1992 (flags & ~REVIDX_KNOWN_FLAGS))
1959 1993 validatehash = True
1960 1994 # Depending on the operation (read or write), the order might be
1961 1995 # reversed due to non-commutative transforms.
1962 1996 orderedflags = REVIDX_FLAGS_ORDER
1963 1997 if operation == 'write':
1964 1998 orderedflags = reversed(orderedflags)
1965 1999
1966 2000 for flag in orderedflags:
1967 2001 # If a flagprocessor has been registered for a known flag, apply the
1968 2002 # related operation transform and update result tuple.
1969 2003 if flag & flags:
1970 2004 vhash = True
1971 2005
1972 2006 if flag not in _flagprocessors:
1973 2007 message = _("missing processor for flag '%#x'") % (flag)
1974 2008 raise RevlogError(message)
1975 2009
1976 2010 processor = _flagprocessors[flag]
1977 2011 if processor is not None:
1978 2012 readtransform, writetransform, rawtransform = processor
1979 2013
1980 2014 if raw:
1981 2015 vhash = rawtransform(self, text)
1982 2016 elif operation == 'read':
1983 2017 text, vhash = readtransform(self, text)
1984 2018 else: # write operation
1985 2019 text, vhash = writetransform(self, text)
1986 2020 validatehash = validatehash and vhash
1987 2021
1988 2022 return text, validatehash
1989 2023
1990 2024 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1991 2025 """Check node hash integrity.
1992 2026
1993 2027 Available as a function so that subclasses can extend hash mismatch
1994 2028 behaviors as needed.
1995 2029 """
1996 2030 try:
1997 2031 if p1 is None and p2 is None:
1998 2032 p1, p2 = self.parents(node)
1999 2033 if node != self.hash(text, p1, p2):
2000 2034 revornode = rev
2001 2035 if revornode is None:
2002 2036 revornode = templatefilters.short(hex(node))
2003 2037 raise RevlogError(_("integrity check failed on %s:%s")
2004 2038 % (self.indexfile, pycompat.bytestr(revornode)))
2005 2039 except RevlogError:
2006 2040 if self._censorable and _censoredtext(text):
2007 2041 raise error.CensoredNodeError(self.indexfile, node, text)
2008 2042 raise
2009 2043
2010 2044 def _enforceinlinesize(self, tr, fp=None):
2011 2045 """Check if the revlog is too big for inline and convert if so.
2012 2046
2013 2047 This should be called after revisions are added to the revlog. If the
2014 2048 revlog has grown too large to be an inline revlog, it will convert it
2015 2049 to use multiple index and data files.
2016 2050 """
2017 2051 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
2018 2052 return
2019 2053
2020 2054 trinfo = tr.find(self.indexfile)
2021 2055 if trinfo is None:
2022 2056 raise RevlogError(_("%s not found in the transaction")
2023 2057 % self.indexfile)
2024 2058
2025 2059 trindex = trinfo[2]
2026 2060 if trindex is not None:
2027 2061 dataoff = self.start(trindex)
2028 2062 else:
2029 2063 # revlog was stripped at start of transaction, use all leftover data
2030 2064 trindex = len(self) - 1
2031 2065 dataoff = self.end(-2)
2032 2066
2033 2067 tr.add(self.datafile, dataoff)
2034 2068
2035 2069 if fp:
2036 2070 fp.flush()
2037 2071 fp.close()
2038 2072
2039 2073 with self._datafp('w') as df:
2040 2074 for r in self:
2041 2075 df.write(self._getsegmentforrevs(r, r)[1])
2042 2076
2043 2077 with self._indexfp('w') as fp:
2044 2078 self.version &= ~FLAG_INLINE_DATA
2045 2079 self._inline = False
2046 2080 io = self._io
2047 2081 for i in self:
2048 2082 e = io.packentry(self.index[i], self.node, self.version, i)
2049 2083 fp.write(e)
2050 2084
2051 2085 # the temp file replace the real index when we exit the context
2052 2086 # manager
2053 2087
2054 2088 tr.replace(self.indexfile, trindex * self._io.size)
2055 2089 self._chunkclear()
2056 2090
2057 2091 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
2058 2092 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
2059 2093 """add a revision to the log
2060 2094
2061 2095 text - the revision data to add
2062 2096 transaction - the transaction object used for rollback
2063 2097 link - the linkrev data to add
2064 2098 p1, p2 - the parent nodeids of the revision
2065 2099 cachedelta - an optional precomputed delta
2066 2100 node - nodeid of revision; typically node is not specified, and it is
2067 2101 computed by default as hash(text, p1, p2), however subclasses might
2068 2102 use different hashing method (and override checkhash() in such case)
2069 2103 flags - the known flags to set on the revision
2070 2104 deltacomputer - an optional _deltacomputer instance shared between
2071 2105 multiple calls
2072 2106 """
2073 2107 if link == nullrev:
2074 2108 raise RevlogError(_("attempted to add linkrev -1 to %s")
2075 2109 % self.indexfile)
2076 2110
2077 2111 if flags:
2078 2112 node = node or self.hash(text, p1, p2)
2079 2113
2080 2114 rawtext, validatehash = self._processflags(text, flags, 'write')
2081 2115
2082 2116 # If the flag processor modifies the revision data, ignore any provided
2083 2117 # cachedelta.
2084 2118 if rawtext != text:
2085 2119 cachedelta = None
2086 2120
2087 2121 if len(rawtext) > _maxentrysize:
2088 2122 raise RevlogError(
2089 2123 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2090 2124 % (self.indexfile, len(rawtext)))
2091 2125
2092 2126 node = node or self.hash(rawtext, p1, p2)
2093 2127 if node in self.nodemap:
2094 2128 return node
2095 2129
2096 2130 if validatehash:
2097 2131 self.checkhash(rawtext, node, p1=p1, p2=p2)
2098 2132
2099 2133 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2100 2134 flags, cachedelta=cachedelta,
2101 2135 deltacomputer=deltacomputer)
2102 2136
2103 2137 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2104 2138 cachedelta=None, deltacomputer=None):
2105 2139 """add a raw revision with known flags, node and parents
2106 2140 useful when reusing a revision not stored in this revlog (ex: received
2107 2141 over wire, or read from an external bundle).
2108 2142 """
2109 2143 dfh = None
2110 2144 if not self._inline:
2111 2145 dfh = self._datafp("a+")
2112 2146 ifh = self._indexfp("a+")
2113 2147 try:
2114 2148 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2115 2149 flags, cachedelta, ifh, dfh,
2116 2150 deltacomputer=deltacomputer)
2117 2151 finally:
2118 2152 if dfh:
2119 2153 dfh.close()
2120 2154 ifh.close()
2121 2155
2122 2156 def compress(self, data):
2123 2157 """Generate a possibly-compressed representation of data."""
2124 2158 if not data:
2125 2159 return '', data
2126 2160
2127 2161 compressed = self._compressor.compress(data)
2128 2162
2129 2163 if compressed:
2130 2164 # The revlog compressor added the header in the returned data.
2131 2165 return '', compressed
2132 2166
2133 2167 if data[0:1] == '\0':
2134 2168 return '', data
2135 2169 return 'u', data
2136 2170
2137 2171 def decompress(self, data):
2138 2172 """Decompress a revlog chunk.
2139 2173
2140 2174 The chunk is expected to begin with a header identifying the
2141 2175 format type so it can be routed to an appropriate decompressor.
2142 2176 """
2143 2177 if not data:
2144 2178 return data
2145 2179
2146 2180 # Revlogs are read much more frequently than they are written and many
2147 2181 # chunks only take microseconds to decompress, so performance is
2148 2182 # important here.
2149 2183 #
2150 2184 # We can make a few assumptions about revlogs:
2151 2185 #
2152 2186 # 1) the majority of chunks will be compressed (as opposed to inline
2153 2187 # raw data).
2154 2188 # 2) decompressing *any* data will likely by at least 10x slower than
2155 2189 # returning raw inline data.
2156 2190 # 3) we want to prioritize common and officially supported compression
2157 2191 # engines
2158 2192 #
2159 2193 # It follows that we want to optimize for "decompress compressed data
2160 2194 # when encoded with common and officially supported compression engines"
2161 2195 # case over "raw data" and "data encoded by less common or non-official
2162 2196 # compression engines." That is why we have the inline lookup first
2163 2197 # followed by the compengines lookup.
2164 2198 #
2165 2199 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2166 2200 # compressed chunks. And this matters for changelog and manifest reads.
2167 2201 t = data[0:1]
2168 2202
2169 2203 if t == 'x':
2170 2204 try:
2171 2205 return _zlibdecompress(data)
2172 2206 except zlib.error as e:
2173 2207 raise RevlogError(_('revlog decompress error: %s') %
2174 2208 stringutil.forcebytestr(e))
2175 2209 # '\0' is more common than 'u' so it goes first.
2176 2210 elif t == '\0':
2177 2211 return data
2178 2212 elif t == 'u':
2179 2213 return util.buffer(data, 1)
2180 2214
2181 2215 try:
2182 2216 compressor = self._decompressors[t]
2183 2217 except KeyError:
2184 2218 try:
2185 2219 engine = util.compengines.forrevlogheader(t)
2186 2220 compressor = engine.revlogcompressor()
2187 2221 self._decompressors[t] = compressor
2188 2222 except KeyError:
2189 2223 raise RevlogError(_('unknown compression type %r') % t)
2190 2224
2191 2225 return compressor.decompress(data)
2192 2226
2193 2227 def _isgooddeltainfo(self, deltainfo, revinfo):
2194 2228 """Returns True if the given delta is good. Good means that it is within
2195 2229 the disk span, disk size, and chain length bounds that we know to be
2196 2230 performant."""
2197 2231 if deltainfo is None:
2198 2232 return False
2199 2233
2200 2234 # - 'deltainfo.distance' is the distance from the base revision --
2201 2235 # bounding it limits the amount of I/O we need to do.
2202 2236 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2203 2237 # deltas we need to apply -- bounding it limits the amount of CPU
2204 2238 # we consume.
2205 2239
2206 2240 textlen = revinfo.textlen
2207 2241 defaultmax = textlen * 4
2208 2242 maxdist = self._maxdeltachainspan
2209 2243 if not maxdist:
2210 2244 maxdist = deltainfo.distance # ensure the conditional pass
2211 2245 maxdist = max(maxdist, defaultmax)
2212 2246 if (deltainfo.distance > maxdist or deltainfo.deltalen > textlen or
2213 2247 deltainfo.compresseddeltalen > textlen * 2 or
2214 2248 (self._maxchainlen and deltainfo.chainlen > self._maxchainlen)):
2215 2249 return False
2216 2250
2217 2251 return True
2218 2252
2219 2253 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2220 2254 cachedelta, ifh, dfh, alwayscache=False,
2221 2255 deltacomputer=None):
2222 2256 """internal function to add revisions to the log
2223 2257
2224 2258 see addrevision for argument descriptions.
2225 2259
2226 2260 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2227 2261
2228 2262 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2229 2263 be used.
2230 2264
2231 2265 invariants:
2232 2266 - rawtext is optional (can be None); if not set, cachedelta must be set.
2233 2267 if both are set, they must correspond to each other.
2234 2268 """
2235 2269 if node == nullid:
2236 2270 raise RevlogError(_("%s: attempt to add null revision") %
2237 2271 (self.indexfile))
2238 2272 if node == wdirid or node in wdirfilenodeids:
2239 2273 raise RevlogError(_("%s: attempt to add wdir revision") %
2240 2274 (self.indexfile))
2241 2275
2242 2276 if self._inline:
2243 2277 fh = ifh
2244 2278 else:
2245 2279 fh = dfh
2246 2280
2247 2281 btext = [rawtext]
2248 2282
2249 2283 curr = len(self)
2250 2284 prev = curr - 1
2251 2285 offset = self.end(prev)
2252 2286 p1r, p2r = self.rev(p1), self.rev(p2)
2253 2287
2254 2288 # full versions are inserted when the needed deltas
2255 2289 # become comparable to the uncompressed text
2256 2290 if rawtext is None:
2257 2291 # need rawtext size, before changed by flag processors, which is
2258 2292 # the non-raw size. use revlog explicitly to avoid filelog's extra
2259 2293 # logic that might remove metadata size.
2260 2294 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2261 2295 cachedelta[1])
2262 2296 else:
2263 2297 textlen = len(rawtext)
2264 2298
2265 2299 if deltacomputer is None:
2266 2300 deltacomputer = _deltacomputer(self)
2267 2301
2268 2302 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2269 2303
2270 2304 # no delta for flag processor revision (see "candelta" for why)
2271 2305 # not calling candelta since only one revision needs test, also to
2272 2306 # avoid overhead fetching flags again.
2273 2307 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2274 2308 deltainfo = None
2275 2309 else:
2276 2310 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2277 2311
2278 2312 if deltainfo is not None:
2279 2313 base = deltainfo.base
2280 2314 chainbase = deltainfo.chainbase
2281 2315 data = deltainfo.data
2282 2316 l = deltainfo.deltalen
2283 2317 else:
2284 2318 rawtext = deltacomputer.buildtext(revinfo, fh)
2285 2319 data = self.compress(rawtext)
2286 2320 l = len(data[1]) + len(data[0])
2287 2321 base = chainbase = curr
2288 2322
2289 2323 e = (offset_type(offset, flags), l, textlen,
2290 2324 base, link, p1r, p2r, node)
2291 2325 self.index.insert(-1, e)
2292 2326 self.nodemap[node] = curr
2293 2327
2294 2328 entry = self._io.packentry(e, self.node, self.version, curr)
2295 2329 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2296 2330
2297 2331 if alwayscache and rawtext is None:
2298 2332 rawtext = deltacomputer._buildtext(revinfo, fh)
2299 2333
2300 2334 if type(rawtext) == bytes: # only accept immutable objects
2301 2335 self._cache = (node, curr, rawtext)
2302 2336 self._chainbasecache[curr] = chainbase
2303 2337 return node
2304 2338
2305 2339 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2306 2340 # Files opened in a+ mode have inconsistent behavior on various
2307 2341 # platforms. Windows requires that a file positioning call be made
2308 2342 # when the file handle transitions between reads and writes. See
2309 2343 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2310 2344 # platforms, Python or the platform itself can be buggy. Some versions
2311 2345 # of Solaris have been observed to not append at the end of the file
2312 2346 # if the file was seeked to before the end. See issue4943 for more.
2313 2347 #
2314 2348 # We work around this issue by inserting a seek() before writing.
2315 2349 # Note: This is likely not necessary on Python 3.
2316 2350 ifh.seek(0, os.SEEK_END)
2317 2351 if dfh:
2318 2352 dfh.seek(0, os.SEEK_END)
2319 2353
2320 2354 curr = len(self) - 1
2321 2355 if not self._inline:
2322 2356 transaction.add(self.datafile, offset)
2323 2357 transaction.add(self.indexfile, curr * len(entry))
2324 2358 if data[0]:
2325 2359 dfh.write(data[0])
2326 2360 dfh.write(data[1])
2327 2361 ifh.write(entry)
2328 2362 else:
2329 2363 offset += curr * self._io.size
2330 2364 transaction.add(self.indexfile, offset, curr)
2331 2365 ifh.write(entry)
2332 2366 ifh.write(data[0])
2333 2367 ifh.write(data[1])
2334 2368 self._enforceinlinesize(transaction, ifh)
2335 2369
2336 2370 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2337 2371 """
2338 2372 add a delta group
2339 2373
2340 2374 given a set of deltas, add them to the revision log. the
2341 2375 first delta is against its parent, which should be in our
2342 2376 log, the rest are against the previous delta.
2343 2377
2344 2378 If ``addrevisioncb`` is defined, it will be called with arguments of
2345 2379 this revlog and the node that was added.
2346 2380 """
2347 2381
2348 2382 nodes = []
2349 2383
2350 2384 r = len(self)
2351 2385 end = 0
2352 2386 if r:
2353 2387 end = self.end(r - 1)
2354 2388 ifh = self._indexfp("a+")
2355 2389 isize = r * self._io.size
2356 2390 if self._inline:
2357 2391 transaction.add(self.indexfile, end + isize, r)
2358 2392 dfh = None
2359 2393 else:
2360 2394 transaction.add(self.indexfile, isize, r)
2361 2395 transaction.add(self.datafile, end)
2362 2396 dfh = self._datafp("a+")
2363 2397 def flush():
2364 2398 if dfh:
2365 2399 dfh.flush()
2366 2400 ifh.flush()
2367 2401 try:
2368 2402 deltacomputer = _deltacomputer(self)
2369 2403 # loop through our set of deltas
2370 2404 for data in deltas:
2371 2405 node, p1, p2, linknode, deltabase, delta, flags = data
2372 2406 link = linkmapper(linknode)
2373 2407 flags = flags or REVIDX_DEFAULT_FLAGS
2374 2408
2375 2409 nodes.append(node)
2376 2410
2377 2411 if node in self.nodemap:
2378 2412 # this can happen if two branches make the same change
2379 2413 continue
2380 2414
2381 2415 for p in (p1, p2):
2382 2416 if p not in self.nodemap:
2383 2417 raise LookupError(p, self.indexfile,
2384 2418 _('unknown parent'))
2385 2419
2386 2420 if deltabase not in self.nodemap:
2387 2421 raise LookupError(deltabase, self.indexfile,
2388 2422 _('unknown delta base'))
2389 2423
2390 2424 baserev = self.rev(deltabase)
2391 2425
2392 2426 if baserev != nullrev and self.iscensored(baserev):
2393 2427 # if base is censored, delta must be full replacement in a
2394 2428 # single patch operation
2395 2429 hlen = struct.calcsize(">lll")
2396 2430 oldlen = self.rawsize(baserev)
2397 2431 newlen = len(delta) - hlen
2398 2432 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2399 2433 raise error.CensoredBaseError(self.indexfile,
2400 2434 self.node(baserev))
2401 2435
2402 2436 if not flags and self._peek_iscensored(baserev, delta, flush):
2403 2437 flags |= REVIDX_ISCENSORED
2404 2438
2405 2439 # We assume consumers of addrevisioncb will want to retrieve
2406 2440 # the added revision, which will require a call to
2407 2441 # revision(). revision() will fast path if there is a cache
2408 2442 # hit. So, we tell _addrevision() to always cache in this case.
2409 2443 # We're only using addgroup() in the context of changegroup
2410 2444 # generation so the revision data can always be handled as raw
2411 2445 # by the flagprocessor.
2412 2446 self._addrevision(node, None, transaction, link,
2413 2447 p1, p2, flags, (baserev, delta),
2414 2448 ifh, dfh,
2415 2449 alwayscache=bool(addrevisioncb),
2416 2450 deltacomputer=deltacomputer)
2417 2451
2418 2452 if addrevisioncb:
2419 2453 addrevisioncb(self, node)
2420 2454
2421 2455 if not dfh and not self._inline:
2422 2456 # addrevision switched from inline to conventional
2423 2457 # reopen the index
2424 2458 ifh.close()
2425 2459 dfh = self._datafp("a+")
2426 2460 ifh = self._indexfp("a+")
2427 2461 finally:
2428 2462 if dfh:
2429 2463 dfh.close()
2430 2464 ifh.close()
2431 2465
2432 2466 return nodes
2433 2467
2434 2468 def iscensored(self, rev):
2435 2469 """Check if a file revision is censored."""
2436 2470 if not self._censorable:
2437 2471 return False
2438 2472
2439 2473 return self.flags(rev) & REVIDX_ISCENSORED
2440 2474
2441 2475 def _peek_iscensored(self, baserev, delta, flush):
2442 2476 """Quickly check if a delta produces a censored revision."""
2443 2477 if not self._censorable:
2444 2478 return False
2445 2479
2446 2480 # Fragile heuristic: unless new file meta keys are added alphabetically
2447 2481 # preceding "censored", all censored revisions are prefixed by
2448 2482 # "\1\ncensored:". A delta producing such a censored revision must be a
2449 2483 # full-replacement delta, so we inspect the first and only patch in the
2450 2484 # delta for this prefix.
2451 2485 hlen = struct.calcsize(">lll")
2452 2486 if len(delta) <= hlen:
2453 2487 return False
2454 2488
2455 2489 oldlen = self.rawsize(baserev)
2456 2490 newlen = len(delta) - hlen
2457 2491 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2458 2492 return False
2459 2493
2460 2494 add = "\1\ncensored:"
2461 2495 addlen = len(add)
2462 2496 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2463 2497
2464 2498 def getstrippoint(self, minlink):
2465 2499 """find the minimum rev that must be stripped to strip the linkrev
2466 2500
2467 2501 Returns a tuple containing the minimum rev and a set of all revs that
2468 2502 have linkrevs that will be broken by this strip.
2469 2503 """
2470 2504 brokenrevs = set()
2471 2505 strippoint = len(self)
2472 2506
2473 2507 heads = {}
2474 2508 futurelargelinkrevs = set()
2475 2509 for head in self.headrevs():
2476 2510 headlinkrev = self.linkrev(head)
2477 2511 heads[head] = headlinkrev
2478 2512 if headlinkrev >= minlink:
2479 2513 futurelargelinkrevs.add(headlinkrev)
2480 2514
2481 2515 # This algorithm involves walking down the rev graph, starting at the
2482 2516 # heads. Since the revs are topologically sorted according to linkrev,
2483 2517 # once all head linkrevs are below the minlink, we know there are
2484 2518 # no more revs that could have a linkrev greater than minlink.
2485 2519 # So we can stop walking.
2486 2520 while futurelargelinkrevs:
2487 2521 strippoint -= 1
2488 2522 linkrev = heads.pop(strippoint)
2489 2523
2490 2524 if linkrev < minlink:
2491 2525 brokenrevs.add(strippoint)
2492 2526 else:
2493 2527 futurelargelinkrevs.remove(linkrev)
2494 2528
2495 2529 for p in self.parentrevs(strippoint):
2496 2530 if p != nullrev:
2497 2531 plinkrev = self.linkrev(p)
2498 2532 heads[p] = plinkrev
2499 2533 if plinkrev >= minlink:
2500 2534 futurelargelinkrevs.add(plinkrev)
2501 2535
2502 2536 return strippoint, brokenrevs
2503 2537
2504 2538 def strip(self, minlink, transaction):
2505 2539 """truncate the revlog on the first revision with a linkrev >= minlink
2506 2540
2507 2541 This function is called when we're stripping revision minlink and
2508 2542 its descendants from the repository.
2509 2543
2510 2544 We have to remove all revisions with linkrev >= minlink, because
2511 2545 the equivalent changelog revisions will be renumbered after the
2512 2546 strip.
2513 2547
2514 2548 So we truncate the revlog on the first of these revisions, and
2515 2549 trust that the caller has saved the revisions that shouldn't be
2516 2550 removed and that it'll re-add them after this truncation.
2517 2551 """
2518 2552 if len(self) == 0:
2519 2553 return
2520 2554
2521 2555 rev, _ = self.getstrippoint(minlink)
2522 2556 if rev == len(self):
2523 2557 return
2524 2558
2525 2559 # first truncate the files on disk
2526 2560 end = self.start(rev)
2527 2561 if not self._inline:
2528 2562 transaction.add(self.datafile, end)
2529 2563 end = rev * self._io.size
2530 2564 else:
2531 2565 end += rev * self._io.size
2532 2566
2533 2567 transaction.add(self.indexfile, end)
2534 2568
2535 2569 # then reset internal state in memory to forget those revisions
2536 2570 self._cache = None
2537 2571 self._chaininfocache = {}
2538 2572 self._chunkclear()
2539 2573 for x in xrange(rev, len(self)):
2540 2574 del self.nodemap[self.node(x)]
2541 2575
2542 2576 del self.index[rev:-1]
2543 2577 self._nodepos = None
2544 2578
2545 2579 def checksize(self):
2546 2580 expected = 0
2547 2581 if len(self):
2548 2582 expected = max(0, self.end(len(self) - 1))
2549 2583
2550 2584 try:
2551 2585 with self._datafp() as f:
2552 2586 f.seek(0, 2)
2553 2587 actual = f.tell()
2554 2588 dd = actual - expected
2555 2589 except IOError as inst:
2556 2590 if inst.errno != errno.ENOENT:
2557 2591 raise
2558 2592 dd = 0
2559 2593
2560 2594 try:
2561 2595 f = self.opener(self.indexfile)
2562 2596 f.seek(0, 2)
2563 2597 actual = f.tell()
2564 2598 f.close()
2565 2599 s = self._io.size
2566 2600 i = max(0, actual // s)
2567 2601 di = actual - (i * s)
2568 2602 if self._inline:
2569 2603 databytes = 0
2570 2604 for r in self:
2571 2605 databytes += max(0, self.length(r))
2572 2606 dd = 0
2573 2607 di = actual - len(self) * s - databytes
2574 2608 except IOError as inst:
2575 2609 if inst.errno != errno.ENOENT:
2576 2610 raise
2577 2611 di = 0
2578 2612
2579 2613 return (dd, di)
2580 2614
2581 2615 def files(self):
2582 2616 res = [self.indexfile]
2583 2617 if not self._inline:
2584 2618 res.append(self.datafile)
2585 2619 return res
2586 2620
2587 2621 DELTAREUSEALWAYS = 'always'
2588 2622 DELTAREUSESAMEREVS = 'samerevs'
2589 2623 DELTAREUSENEVER = 'never'
2590 2624
2591 2625 DELTAREUSEFULLADD = 'fulladd'
2592 2626
2593 2627 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2594 2628
2595 2629 def clone(self, tr, destrevlog, addrevisioncb=None,
2596 2630 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2597 2631 """Copy this revlog to another, possibly with format changes.
2598 2632
2599 2633 The destination revlog will contain the same revisions and nodes.
2600 2634 However, it may not be bit-for-bit identical due to e.g. delta encoding
2601 2635 differences.
2602 2636
2603 2637 The ``deltareuse`` argument control how deltas from the existing revlog
2604 2638 are preserved in the destination revlog. The argument can have the
2605 2639 following values:
2606 2640
2607 2641 DELTAREUSEALWAYS
2608 2642 Deltas will always be reused (if possible), even if the destination
2609 2643 revlog would not select the same revisions for the delta. This is the
2610 2644 fastest mode of operation.
2611 2645 DELTAREUSESAMEREVS
2612 2646 Deltas will be reused if the destination revlog would pick the same
2613 2647 revisions for the delta. This mode strikes a balance between speed
2614 2648 and optimization.
2615 2649 DELTAREUSENEVER
2616 2650 Deltas will never be reused. This is the slowest mode of execution.
2617 2651 This mode can be used to recompute deltas (e.g. if the diff/delta
2618 2652 algorithm changes).
2619 2653
2620 2654 Delta computation can be slow, so the choice of delta reuse policy can
2621 2655 significantly affect run time.
2622 2656
2623 2657 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2624 2658 two extremes. Deltas will be reused if they are appropriate. But if the
2625 2659 delta could choose a better revision, it will do so. This means if you
2626 2660 are converting a non-generaldelta revlog to a generaldelta revlog,
2627 2661 deltas will be recomputed if the delta's parent isn't a parent of the
2628 2662 revision.
2629 2663
2630 2664 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2631 2665 controls whether to compute deltas against both parents for merges.
2632 2666 By default, the current default is used.
2633 2667 """
2634 2668 if deltareuse not in self.DELTAREUSEALL:
2635 2669 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2636 2670
2637 2671 if len(destrevlog):
2638 2672 raise ValueError(_('destination revlog is not empty'))
2639 2673
2640 2674 if getattr(self, 'filteredrevs', None):
2641 2675 raise ValueError(_('source revlog has filtered revisions'))
2642 2676 if getattr(destrevlog, 'filteredrevs', None):
2643 2677 raise ValueError(_('destination revlog has filtered revisions'))
2644 2678
2645 2679 # lazydeltabase controls whether to reuse a cached delta, if possible.
2646 2680 oldlazydeltabase = destrevlog._lazydeltabase
2647 2681 oldamd = destrevlog._aggressivemergedeltas
2648 2682
2649 2683 try:
2650 2684 if deltareuse == self.DELTAREUSEALWAYS:
2651 2685 destrevlog._lazydeltabase = True
2652 2686 elif deltareuse == self.DELTAREUSESAMEREVS:
2653 2687 destrevlog._lazydeltabase = False
2654 2688
2655 2689 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2656 2690
2657 2691 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2658 2692 self.DELTAREUSESAMEREVS)
2659 2693
2660 2694 deltacomputer = _deltacomputer(destrevlog)
2661 2695 index = self.index
2662 2696 for rev in self:
2663 2697 entry = index[rev]
2664 2698
2665 2699 # Some classes override linkrev to take filtered revs into
2666 2700 # account. Use raw entry from index.
2667 2701 flags = entry[0] & 0xffff
2668 2702 linkrev = entry[4]
2669 2703 p1 = index[entry[5]][7]
2670 2704 p2 = index[entry[6]][7]
2671 2705 node = entry[7]
2672 2706
2673 2707 # (Possibly) reuse the delta from the revlog if allowed and
2674 2708 # the revlog chunk is a delta.
2675 2709 cachedelta = None
2676 2710 rawtext = None
2677 2711 if populatecachedelta:
2678 2712 dp = self.deltaparent(rev)
2679 2713 if dp != nullrev:
2680 2714 cachedelta = (dp, bytes(self._chunk(rev)))
2681 2715
2682 2716 if not cachedelta:
2683 2717 rawtext = self.revision(rev, raw=True)
2684 2718
2685 2719
2686 2720 if deltareuse == self.DELTAREUSEFULLADD:
2687 2721 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2688 2722 cachedelta=cachedelta,
2689 2723 node=node, flags=flags,
2690 2724 deltacomputer=deltacomputer)
2691 2725 else:
2692 2726 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2693 2727 checkambig=False)
2694 2728 dfh = None
2695 2729 if not destrevlog._inline:
2696 2730 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2697 2731 try:
2698 2732 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2699 2733 p2, flags, cachedelta, ifh, dfh,
2700 2734 deltacomputer=deltacomputer)
2701 2735 finally:
2702 2736 if dfh:
2703 2737 dfh.close()
2704 2738 ifh.close()
2705 2739
2706 2740 if addrevisioncb:
2707 2741 addrevisioncb(self, rev, node)
2708 2742 finally:
2709 2743 destrevlog._lazydeltabase = oldlazydeltabase
2710 2744 destrevlog._aggressivemergedeltas = oldamd
General Comments 0
You need to be logged in to leave comments. Login now