##// END OF EJS Templates
revlog: compute snapshot depth on delta info...
Boris Feld -
r39190:e0da43e2 default
parent child Browse files
Show More
@@ -1,3039 +1,3053 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import hashlib
20 20 import heapq
21 21 import os
22 22 import re
23 23 import struct
24 24 import zlib
25 25
26 26 # import stuff from node for others to import from revlog
27 27 from .node import (
28 28 bin,
29 29 hex,
30 30 nullid,
31 31 nullrev,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .thirdparty import (
39 39 attr,
40 40 )
41 41 from . import (
42 42 ancestor,
43 43 error,
44 44 mdiff,
45 45 policy,
46 46 pycompat,
47 47 templatefilters,
48 48 util,
49 49 )
50 50 from .utils import (
51 51 stringutil,
52 52 )
53 53
54 54 parsers = policy.importmod(r'parsers')
55 55
56 56 # Aliased for performance.
57 57 _zlibdecompress = zlib.decompress
58 58
59 59 # revlog header flags
60 60 REVLOGV0 = 0
61 61 REVLOGV1 = 1
62 62 # Dummy value until file format is finalized.
63 63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
64 64 REVLOGV2 = 0xDEAD
65 65 FLAG_INLINE_DATA = (1 << 16)
66 66 FLAG_GENERALDELTA = (1 << 17)
67 67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
68 68 REVLOG_DEFAULT_FORMAT = REVLOGV1
69 69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
70 70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
71 71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
72 72
73 73 # revlog index flags
74 74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
75 75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
76 76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
77 77 REVIDX_DEFAULT_FLAGS = 0
78 78 # stable order in which flags need to be processed and their processors applied
79 79 REVIDX_FLAGS_ORDER = [
80 80 REVIDX_ISCENSORED,
81 81 REVIDX_ELLIPSIS,
82 82 REVIDX_EXTSTORED,
83 83 ]
84 84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
85 85 # bitmark for flags that could cause rawdata content change
86 86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
87 87
88 88 # max size of revlog with inline data
89 89 _maxinline = 131072
90 90 _chunksize = 1048576
91 91
92 92 RevlogError = error.RevlogError
93 93 LookupError = error.LookupError
94 94 AmbiguousPrefixLookupError = error.AmbiguousPrefixLookupError
95 95 CensoredNodeError = error.CensoredNodeError
96 96 ProgrammingError = error.ProgrammingError
97 97
98 98 # Store flag processors (cf. 'addflagprocessor()' to register)
99 99 _flagprocessors = {
100 100 REVIDX_ISCENSORED: None,
101 101 }
102 102
103 103 _mdre = re.compile('\1\n')
104 104 def parsemeta(text):
105 105 """return (metadatadict, metadatasize)"""
106 106 # text can be buffer, so we can't use .startswith or .index
107 107 if text[:2] != '\1\n':
108 108 return None, None
109 109 s = _mdre.search(text, 2).start()
110 110 mtext = text[2:s]
111 111 meta = {}
112 112 for l in mtext.splitlines():
113 113 k, v = l.split(": ", 1)
114 114 meta[k] = v
115 115 return meta, (s + 2)
116 116
117 117 def packmeta(meta, text):
118 118 keys = sorted(meta)
119 119 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
120 120 return "\1\n%s\1\n%s" % (metatext, text)
121 121
122 122 def _censoredtext(text):
123 123 m, offs = parsemeta(text)
124 124 return m and "censored" in m
125 125
126 126 def addflagprocessor(flag, processor):
127 127 """Register a flag processor on a revision data flag.
128 128
129 129 Invariant:
130 130 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
131 131 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
132 132 - Only one flag processor can be registered on a specific flag.
133 133 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
134 134 following signatures:
135 135 - (read) f(self, rawtext) -> text, bool
136 136 - (write) f(self, text) -> rawtext, bool
137 137 - (raw) f(self, rawtext) -> bool
138 138 "text" is presented to the user. "rawtext" is stored in revlog data, not
139 139 directly visible to the user.
140 140 The boolean returned by these transforms is used to determine whether
141 141 the returned text can be used for hash integrity checking. For example,
142 142 if "write" returns False, then "text" is used to generate hash. If
143 143 "write" returns True, that basically means "rawtext" returned by "write"
144 144 should be used to generate hash. Usually, "write" and "read" return
145 145 different booleans. And "raw" returns a same boolean as "write".
146 146
147 147 Note: The 'raw' transform is used for changegroup generation and in some
148 148 debug commands. In this case the transform only indicates whether the
149 149 contents can be used for hash integrity checks.
150 150 """
151 151 if not flag & REVIDX_KNOWN_FLAGS:
152 152 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
153 153 raise ProgrammingError(msg)
154 154 if flag not in REVIDX_FLAGS_ORDER:
155 155 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
156 156 raise ProgrammingError(msg)
157 157 if flag in _flagprocessors:
158 158 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
159 159 raise error.Abort(msg)
160 160 _flagprocessors[flag] = processor
161 161
162 162 def getoffset(q):
163 163 return int(q >> 16)
164 164
165 165 def gettype(q):
166 166 return int(q & 0xFFFF)
167 167
168 168 def offset_type(offset, type):
169 169 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
170 170 raise ValueError('unknown revlog index flags')
171 171 return int(int(offset) << 16 | type)
172 172
173 173 _nullhash = hashlib.sha1(nullid)
174 174
175 175 def hash(text, p1, p2):
176 176 """generate a hash from the given text and its parent hashes
177 177
178 178 This hash combines both the current file contents and its history
179 179 in a manner that makes it easy to distinguish nodes with the same
180 180 content in the revision graph.
181 181 """
182 182 # As of now, if one of the parent node is null, p2 is null
183 183 if p2 == nullid:
184 184 # deep copy of a hash is faster than creating one
185 185 s = _nullhash.copy()
186 186 s.update(p1)
187 187 else:
188 188 # none of the parent nodes are nullid
189 189 if p1 < p2:
190 190 a = p1
191 191 b = p2
192 192 else:
193 193 a = p2
194 194 b = p1
195 195 s = hashlib.sha1(a)
196 196 s.update(b)
197 197 s.update(text)
198 198 return s.digest()
199 199
200 200 class _testrevlog(object):
201 201 """minimalist fake revlog to use in doctests"""
202 202
203 203 def __init__(self, data, density=0.5, mingap=0):
204 204 """data is an list of revision payload boundaries"""
205 205 self._data = data
206 206 self._srdensitythreshold = density
207 207 self._srmingapsize = mingap
208 208
209 209 def start(self, rev):
210 210 if rev == 0:
211 211 return 0
212 212 return self._data[rev - 1]
213 213
214 214 def end(self, rev):
215 215 return self._data[rev]
216 216
217 217 def length(self, rev):
218 218 return self.end(rev) - self.start(rev)
219 219
220 220 def __len__(self):
221 221 return len(self._data)
222 222
223 223 def _trimchunk(revlog, revs, startidx, endidx=None):
224 224 """returns revs[startidx:endidx] without empty trailing revs
225 225
226 226 Doctest Setup
227 227 >>> revlog = _testrevlog([
228 228 ... 5, #0
229 229 ... 10, #1
230 230 ... 12, #2
231 231 ... 12, #3 (empty)
232 232 ... 17, #4
233 233 ... 21, #5
234 234 ... 21, #6 (empty)
235 235 ... ])
236 236
237 237 Contiguous cases:
238 238 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
239 239 [0, 1, 2, 3, 4, 5]
240 240 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
241 241 [0, 1, 2, 3, 4]
242 242 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
243 243 [0, 1, 2]
244 244 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
245 245 [2]
246 246 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
247 247 [3, 4, 5]
248 248 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
249 249 [3, 4]
250 250
251 251 Discontiguous cases:
252 252 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
253 253 [1, 3, 5]
254 254 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
255 255 [1]
256 256 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
257 257 [3, 5]
258 258 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
259 259 [3, 5]
260 260 """
261 261 length = revlog.length
262 262
263 263 if endidx is None:
264 264 endidx = len(revs)
265 265
266 266 # If we have a non-emtpy delta candidate, there are nothing to trim
267 267 if revs[endidx - 1] < len(revlog):
268 268 # Trim empty revs at the end, except the very first revision of a chain
269 269 while (endidx > 1
270 270 and endidx > startidx
271 271 and length(revs[endidx - 1]) == 0):
272 272 endidx -= 1
273 273
274 274 return revs[startidx:endidx]
275 275
276 276 def _segmentspan(revlog, revs, deltainfo=None):
277 277 """Get the byte span of a segment of revisions
278 278
279 279 revs is a sorted array of revision numbers
280 280
281 281 >>> revlog = _testrevlog([
282 282 ... 5, #0
283 283 ... 10, #1
284 284 ... 12, #2
285 285 ... 12, #3 (empty)
286 286 ... 17, #4
287 287 ... ])
288 288
289 289 >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
290 290 17
291 291 >>> _segmentspan(revlog, [0, 4])
292 292 17
293 293 >>> _segmentspan(revlog, [3, 4])
294 294 5
295 295 >>> _segmentspan(revlog, [1, 2, 3,])
296 296 7
297 297 >>> _segmentspan(revlog, [1, 3])
298 298 7
299 299 """
300 300 if not revs:
301 301 return 0
302 302 if deltainfo is not None and len(revlog) <= revs[-1]:
303 303 if len(revs) == 1:
304 304 return deltainfo.deltalen
305 305 offset = revlog.end(len(revlog) - 1)
306 306 end = deltainfo.deltalen + offset
307 307 else:
308 308 end = revlog.end(revs[-1])
309 309 return end - revlog.start(revs[0])
310 310
311 311 def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):
312 312 """slice revs to reduce the amount of unrelated data to be read from disk.
313 313
314 314 ``revs`` is sliced into groups that should be read in one time.
315 315 Assume that revs are sorted.
316 316
317 317 The initial chunk is sliced until the overall density (payload/chunks-span
318 318 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
319 319 `revlog._srmingapsize` is skipped.
320 320
321 321 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
322 322 For consistency with other slicing choice, this limit won't go lower than
323 323 `revlog._srmingapsize`.
324 324
325 325 If individual revisions chunk are larger than this limit, they will still
326 326 be raised individually.
327 327
328 328 >>> revlog = _testrevlog([
329 329 ... 5, #00 (5)
330 330 ... 10, #01 (5)
331 331 ... 12, #02 (2)
332 332 ... 12, #03 (empty)
333 333 ... 27, #04 (15)
334 334 ... 31, #05 (4)
335 335 ... 31, #06 (empty)
336 336 ... 42, #07 (11)
337 337 ... 47, #08 (5)
338 338 ... 47, #09 (empty)
339 339 ... 48, #10 (1)
340 340 ... 51, #11 (3)
341 341 ... 74, #12 (23)
342 342 ... 85, #13 (11)
343 343 ... 86, #14 (1)
344 344 ... 91, #15 (5)
345 345 ... ])
346 346
347 347 >>> list(_slicechunk(revlog, list(range(16))))
348 348 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
349 349 >>> list(_slicechunk(revlog, [0, 15]))
350 350 [[0], [15]]
351 351 >>> list(_slicechunk(revlog, [0, 11, 15]))
352 352 [[0], [11], [15]]
353 353 >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
354 354 [[0], [11, 13, 15]]
355 355 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
356 356 [[1, 2], [5, 8, 10, 11], [14]]
357 357
358 358 Slicing with a maximum chunk size
359 359 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
360 360 [[0], [11], [13], [15]]
361 361 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
362 362 [[0], [11], [13, 15]]
363 363 """
364 364 if targetsize is not None:
365 365 targetsize = max(targetsize, revlog._srmingapsize)
366 366 # targetsize should not be specified when evaluating delta candidates:
367 367 # * targetsize is used to ensure we stay within specification when reading,
368 368 # * deltainfo is used to pick are good delta chain when writing.
369 369 if not (deltainfo is None or targetsize is None):
370 370 msg = 'cannot use `targetsize` with a `deltainfo`'
371 371 raise error.ProgrammingError(msg)
372 372 for chunk in _slicechunktodensity(revlog, revs,
373 373 deltainfo,
374 374 revlog._srdensitythreshold,
375 375 revlog._srmingapsize):
376 376 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
377 377 yield subchunk
378 378
379 379 def _slicechunktosize(revlog, revs, targetsize=None):
380 380 """slice revs to match the target size
381 381
382 382 This is intended to be used on chunk that density slicing selected by that
383 383 are still too large compared to the read garantee of revlog. This might
384 384 happens when "minimal gap size" interrupted the slicing or when chain are
385 385 built in a way that create large blocks next to each other.
386 386
387 387 >>> revlog = _testrevlog([
388 388 ... 3, #0 (3)
389 389 ... 5, #1 (2)
390 390 ... 6, #2 (1)
391 391 ... 8, #3 (2)
392 392 ... 8, #4 (empty)
393 393 ... 11, #5 (3)
394 394 ... 12, #6 (1)
395 395 ... 13, #7 (1)
396 396 ... 14, #8 (1)
397 397 ... ])
398 398
399 399 Cases where chunk is already small enough
400 400 >>> list(_slicechunktosize(revlog, [0], 3))
401 401 [[0]]
402 402 >>> list(_slicechunktosize(revlog, [6, 7], 3))
403 403 [[6, 7]]
404 404 >>> list(_slicechunktosize(revlog, [0], None))
405 405 [[0]]
406 406 >>> list(_slicechunktosize(revlog, [6, 7], None))
407 407 [[6, 7]]
408 408
409 409 cases where we need actual slicing
410 410 >>> list(_slicechunktosize(revlog, [0, 1], 3))
411 411 [[0], [1]]
412 412 >>> list(_slicechunktosize(revlog, [1, 3], 3))
413 413 [[1], [3]]
414 414 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
415 415 [[1, 2], [3]]
416 416 >>> list(_slicechunktosize(revlog, [3, 5], 3))
417 417 [[3], [5]]
418 418 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
419 419 [[3], [5]]
420 420 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
421 421 [[5], [6, 7, 8]]
422 422 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
423 423 [[0], [1, 2], [3], [5], [6, 7, 8]]
424 424
425 425 Case with too large individual chunk (must return valid chunk)
426 426 >>> list(_slicechunktosize(revlog, [0, 1], 2))
427 427 [[0], [1]]
428 428 >>> list(_slicechunktosize(revlog, [1, 3], 1))
429 429 [[1], [3]]
430 430 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
431 431 [[3], [5]]
432 432 """
433 433 assert targetsize is None or 0 <= targetsize
434 434 if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
435 435 yield revs
436 436 return
437 437
438 438 startrevidx = 0
439 439 startdata = revlog.start(revs[0])
440 440 endrevidx = 0
441 441 iterrevs = enumerate(revs)
442 442 next(iterrevs) # skip first rev.
443 443 for idx, r in iterrevs:
444 444 span = revlog.end(r) - startdata
445 445 if span <= targetsize:
446 446 endrevidx = idx
447 447 else:
448 448 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
449 449 if chunk:
450 450 yield chunk
451 451 startrevidx = idx
452 452 startdata = revlog.start(r)
453 453 endrevidx = idx
454 454 yield _trimchunk(revlog, revs, startrevidx)
455 455
456 456 def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
457 457 mingapsize=0):
458 458 """slice revs to reduce the amount of unrelated data to be read from disk.
459 459
460 460 ``revs`` is sliced into groups that should be read in one time.
461 461 Assume that revs are sorted.
462 462
463 463 ``deltainfo`` is a _deltainfo instance of a revision that we would append
464 464 to the top of the revlog.
465 465
466 466 The initial chunk is sliced until the overall density (payload/chunks-span
467 467 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
468 468 skipped.
469 469
470 470 >>> revlog = _testrevlog([
471 471 ... 5, #00 (5)
472 472 ... 10, #01 (5)
473 473 ... 12, #02 (2)
474 474 ... 12, #03 (empty)
475 475 ... 27, #04 (15)
476 476 ... 31, #05 (4)
477 477 ... 31, #06 (empty)
478 478 ... 42, #07 (11)
479 479 ... 47, #08 (5)
480 480 ... 47, #09 (empty)
481 481 ... 48, #10 (1)
482 482 ... 51, #11 (3)
483 483 ... 74, #12 (23)
484 484 ... 85, #13 (11)
485 485 ... 86, #14 (1)
486 486 ... 91, #15 (5)
487 487 ... ])
488 488
489 489 >>> list(_slicechunktodensity(revlog, list(range(16))))
490 490 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
491 491 >>> list(_slicechunktodensity(revlog, [0, 15]))
492 492 [[0], [15]]
493 493 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
494 494 [[0], [11], [15]]
495 495 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
496 496 [[0], [11, 13, 15]]
497 497 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
498 498 [[1, 2], [5, 8, 10, 11], [14]]
499 499 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
500 500 ... mingapsize=20))
501 501 [[1, 2, 3, 5, 8, 10, 11], [14]]
502 502 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
503 503 ... targetdensity=0.95))
504 504 [[1, 2], [5], [8, 10, 11], [14]]
505 505 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
506 506 ... targetdensity=0.95, mingapsize=12))
507 507 [[1, 2], [5, 8, 10, 11], [14]]
508 508 """
509 509 start = revlog.start
510 510 length = revlog.length
511 511
512 512 if len(revs) <= 1:
513 513 yield revs
514 514 return
515 515
516 516 nextrev = len(revlog)
517 517 nextoffset = revlog.end(nextrev - 1)
518 518
519 519 if deltainfo is None:
520 520 deltachainspan = _segmentspan(revlog, revs)
521 521 chainpayload = sum(length(r) for r in revs)
522 522 else:
523 523 deltachainspan = deltainfo.distance
524 524 chainpayload = deltainfo.compresseddeltalen
525 525
526 526 if deltachainspan < mingapsize:
527 527 yield revs
528 528 return
529 529
530 530 readdata = deltachainspan
531 531
532 532 if deltachainspan:
533 533 density = chainpayload / float(deltachainspan)
534 534 else:
535 535 density = 1.0
536 536
537 537 if density >= targetdensity:
538 538 yield revs
539 539 return
540 540
541 541 if deltainfo is not None and deltainfo.deltalen:
542 542 revs = list(revs)
543 543 revs.append(nextrev)
544 544
545 545 # Store the gaps in a heap to have them sorted by decreasing size
546 546 gapsheap = []
547 547 heapq.heapify(gapsheap)
548 548 prevend = None
549 549 for i, rev in enumerate(revs):
550 550 if rev < nextrev:
551 551 revstart = start(rev)
552 552 revlen = length(rev)
553 553 else:
554 554 revstart = nextoffset
555 555 revlen = deltainfo.deltalen
556 556
557 557 # Skip empty revisions to form larger holes
558 558 if revlen == 0:
559 559 continue
560 560
561 561 if prevend is not None:
562 562 gapsize = revstart - prevend
563 563 # only consider holes that are large enough
564 564 if gapsize > mingapsize:
565 565 heapq.heappush(gapsheap, (-gapsize, i))
566 566
567 567 prevend = revstart + revlen
568 568
569 569 # Collect the indices of the largest holes until the density is acceptable
570 570 indicesheap = []
571 571 heapq.heapify(indicesheap)
572 572 while gapsheap and density < targetdensity:
573 573 oppgapsize, gapidx = heapq.heappop(gapsheap)
574 574
575 575 heapq.heappush(indicesheap, gapidx)
576 576
577 577 # the gap sizes are stored as negatives to be sorted decreasingly
578 578 # by the heap
579 579 readdata -= (-oppgapsize)
580 580 if readdata > 0:
581 581 density = chainpayload / float(readdata)
582 582 else:
583 583 density = 1.0
584 584
585 585 # Cut the revs at collected indices
586 586 previdx = 0
587 587 while indicesheap:
588 588 idx = heapq.heappop(indicesheap)
589 589
590 590 chunk = _trimchunk(revlog, revs, previdx, idx)
591 591 if chunk:
592 592 yield chunk
593 593
594 594 previdx = idx
595 595
596 596 chunk = _trimchunk(revlog, revs, previdx)
597 597 if chunk:
598 598 yield chunk
599 599
600 600 @attr.s(slots=True, frozen=True)
601 601 class _deltainfo(object):
602 602 distance = attr.ib()
603 603 deltalen = attr.ib()
604 604 data = attr.ib()
605 605 base = attr.ib()
606 606 chainbase = attr.ib()
607 607 chainlen = attr.ib()
608 608 compresseddeltalen = attr.ib()
609 snapshotdepth = attr.ib()
609 610
610 611 class _deltacomputer(object):
611 612 def __init__(self, revlog):
612 613 self.revlog = revlog
613 614
614 615 def _getcandidaterevs(self, p1, p2, cachedelta):
615 616 """
616 617 Provides revisions that present an interest to be diffed against,
617 618 grouped by level of easiness.
618 619 """
619 620 revlog = self.revlog
620 621 gdelta = revlog._generaldelta
621 622 curr = len(revlog)
622 623 prev = curr - 1
623 624 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
624 625
625 626 # should we try to build a delta?
626 627 if prev != nullrev and revlog.storedeltachains:
627 628 tested = set()
628 629 # This condition is true most of the time when processing
629 630 # changegroup data into a generaldelta repo. The only time it
630 631 # isn't true is if this is the first revision in a delta chain
631 632 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
632 633 if cachedelta and gdelta and revlog._lazydeltabase:
633 634 # Assume what we received from the server is a good choice
634 635 # build delta will reuse the cache
635 636 yield (cachedelta[0],)
636 637 tested.add(cachedelta[0])
637 638
638 639 if gdelta:
639 640 # exclude already lazy tested base if any
640 641 parents = [p for p in (p1r, p2r)
641 642 if p != nullrev and p not in tested]
642 643
643 644 if not revlog._deltabothparents and len(parents) == 2:
644 645 parents.sort()
645 646 # To minimize the chance of having to build a fulltext,
646 647 # pick first whichever parent is closest to us (max rev)
647 648 yield (parents[1],)
648 649 # then the other one (min rev) if the first did not fit
649 650 yield (parents[0],)
650 651 tested.update(parents)
651 652 elif len(parents) > 0:
652 653 # Test all parents (1 or 2), and keep the best candidate
653 654 yield parents
654 655 tested.update(parents)
655 656
656 657 if prev not in tested:
657 658 # other approach failed try against prev to hopefully save us a
658 659 # fulltext.
659 660 yield (prev,)
660 661 tested.add(prev)
661 662
662 663 def buildtext(self, revinfo, fh):
663 664 """Builds a fulltext version of a revision
664 665
665 666 revinfo: _revisioninfo instance that contains all needed info
666 667 fh: file handle to either the .i or the .d revlog file,
667 668 depending on whether it is inlined or not
668 669 """
669 670 btext = revinfo.btext
670 671 if btext[0] is not None:
671 672 return btext[0]
672 673
673 674 revlog = self.revlog
674 675 cachedelta = revinfo.cachedelta
675 676 flags = revinfo.flags
676 677 node = revinfo.node
677 678
678 679 baserev = cachedelta[0]
679 680 delta = cachedelta[1]
680 681 # special case deltas which replace entire base; no need to decode
681 682 # base revision. this neatly avoids censored bases, which throw when
682 683 # they're decoded.
683 684 hlen = struct.calcsize(">lll")
684 685 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
685 686 len(delta) - hlen):
686 687 btext[0] = delta[hlen:]
687 688 else:
688 689 # deltabase is rawtext before changed by flag processors, which is
689 690 # equivalent to non-raw text
690 691 basetext = revlog.revision(baserev, _df=fh, raw=False)
691 692 btext[0] = mdiff.patch(basetext, delta)
692 693
693 694 try:
694 695 res = revlog._processflags(btext[0], flags, 'read', raw=True)
695 696 btext[0], validatehash = res
696 697 if validatehash:
697 698 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
698 699 if flags & REVIDX_ISCENSORED:
699 700 raise RevlogError(_('node %s is not censored') % node)
700 701 except CensoredNodeError:
701 702 # must pass the censored index flag to add censored revisions
702 703 if not flags & REVIDX_ISCENSORED:
703 704 raise
704 705 return btext[0]
705 706
706 707 def _builddeltadiff(self, base, revinfo, fh):
707 708 revlog = self.revlog
708 709 t = self.buildtext(revinfo, fh)
709 710 if revlog.iscensored(base):
710 711 # deltas based on a censored revision must replace the
711 712 # full content in one patch, so delta works everywhere
712 713 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
713 714 delta = header + t
714 715 else:
715 716 ptext = revlog.revision(base, _df=fh, raw=True)
716 717 delta = mdiff.textdiff(ptext, t)
717 718
718 719 return delta
719 720
720 721 def _builddeltainfo(self, revinfo, base, fh):
721 722 # can we use the cached delta?
722 723 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
723 724 delta = revinfo.cachedelta[1]
724 725 else:
725 726 delta = self._builddeltadiff(base, revinfo, fh)
726 727 revlog = self.revlog
727 728 header, data = revlog.compress(delta)
728 729 deltalen = len(header) + len(data)
729 730 chainbase = revlog.chainbase(base)
730 731 offset = revlog.end(len(revlog) - 1)
731 732 dist = deltalen + offset - revlog.start(chainbase)
732 733 if revlog._generaldelta:
733 734 deltabase = base
734 735 else:
735 736 deltabase = chainbase
736 737 chainlen, compresseddeltalen = revlog._chaininfo(base)
737 738 chainlen += 1
738 739 compresseddeltalen += deltalen
740
741 revlog = self.revlog
742 snapshotdepth = None
743 if deltabase == nullrev:
744 snapshotdepth = 0
745 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
746 # A delta chain should always be one full snapshot,
747 # zero or more semi-snapshots, and zero or more deltas
748 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
749 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
750 snapshotdepth = len(revlog._deltachain(deltabase)[0])
751
739 752 return _deltainfo(dist, deltalen, (header, data), deltabase,
740 chainbase, chainlen, compresseddeltalen)
753 chainbase, chainlen, compresseddeltalen,
754 snapshotdepth)
741 755
742 756 def finddeltainfo(self, revinfo, fh):
743 757 """Find an acceptable delta against a candidate revision
744 758
745 759 revinfo: information about the revision (instance of _revisioninfo)
746 760 fh: file handle to either the .i or the .d revlog file,
747 761 depending on whether it is inlined or not
748 762
749 763 Returns the first acceptable candidate revision, as ordered by
750 764 _getcandidaterevs
751 765 """
752 766 if not revinfo.textlen:
753 767 return None # empty file do not need delta
754 768
755 769 cachedelta = revinfo.cachedelta
756 770 p1 = revinfo.p1
757 771 p2 = revinfo.p2
758 772 revlog = self.revlog
759 773
760 774 deltalength = self.revlog.length
761 775 deltaparent = self.revlog.deltaparent
762 776
763 777 deltainfo = None
764 778 deltas_limit = revinfo.textlen * LIMIT_DELTA2TEXT
765 779 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
766 780 # filter out delta base that will never produce good delta
767 781 candidaterevs = [r for r in candidaterevs
768 782 if self.revlog.length(r) <= deltas_limit]
769 783 nominateddeltas = []
770 784 for candidaterev in candidaterevs:
771 785 # skip over empty delta (no need to include them in a chain)
772 786 while candidaterev != nullrev and not deltalength(candidaterev):
773 787 candidaterev = deltaparent(candidaterev)
774 788 # no need to try a delta against nullid, this will be handled
775 789 # by fulltext later.
776 790 if candidaterev == nullrev:
777 791 continue
778 792 # no delta for rawtext-changing revs (see "candelta" for why)
779 793 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
780 794 continue
781 795 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
782 796 if revlog._isgooddeltainfo(candidatedelta, revinfo):
783 797 nominateddeltas.append(candidatedelta)
784 798 if nominateddeltas:
785 799 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
786 800 break
787 801
788 802 return deltainfo
789 803
790 804 @attr.s(slots=True, frozen=True)
791 805 class _revisioninfo(object):
792 806 """Information about a revision that allows building its fulltext
793 807 node: expected hash of the revision
794 808 p1, p2: parent revs of the revision
795 809 btext: built text cache consisting of a one-element list
796 810 cachedelta: (baserev, uncompressed_delta) or None
797 811 flags: flags associated to the revision storage
798 812
799 813 One of btext[0] or cachedelta must be set.
800 814 """
801 815 node = attr.ib()
802 816 p1 = attr.ib()
803 817 p2 = attr.ib()
804 818 btext = attr.ib()
805 819 textlen = attr.ib()
806 820 cachedelta = attr.ib()
807 821 flags = attr.ib()
808 822
809 823 # index v0:
810 824 # 4 bytes: offset
811 825 # 4 bytes: compressed length
812 826 # 4 bytes: base rev
813 827 # 4 bytes: link rev
814 828 # 20 bytes: parent 1 nodeid
815 829 # 20 bytes: parent 2 nodeid
816 830 # 20 bytes: nodeid
817 831 indexformatv0 = struct.Struct(">4l20s20s20s")
818 832 indexformatv0_pack = indexformatv0.pack
819 833 indexformatv0_unpack = indexformatv0.unpack
820 834
821 835 class revlogoldindex(list):
822 836 def __getitem__(self, i):
823 837 if i == -1:
824 838 return (0, 0, 0, -1, -1, -1, -1, nullid)
825 839 return list.__getitem__(self, i)
826 840
827 841 # maximum <delta-chain-data>/<revision-text-length> ratio
828 842 LIMIT_DELTA2TEXT = 2
829 843
830 844 class revlogoldio(object):
831 845 def __init__(self):
832 846 self.size = indexformatv0.size
833 847
834 848 def parseindex(self, data, inline):
835 849 s = self.size
836 850 index = []
837 851 nodemap = {nullid: nullrev}
838 852 n = off = 0
839 853 l = len(data)
840 854 while off + s <= l:
841 855 cur = data[off:off + s]
842 856 off += s
843 857 e = indexformatv0_unpack(cur)
844 858 # transform to revlogv1 format
845 859 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
846 860 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
847 861 index.append(e2)
848 862 nodemap[e[6]] = n
849 863 n += 1
850 864
851 865 return revlogoldindex(index), nodemap, None
852 866
853 867 def packentry(self, entry, node, version, rev):
854 868 if gettype(entry[0]):
855 869 raise RevlogError(_('index entry flags need revlog version 1'))
856 870 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
857 871 node(entry[5]), node(entry[6]), entry[7])
858 872 return indexformatv0_pack(*e2)
859 873
860 874 # index ng:
861 875 # 6 bytes: offset
862 876 # 2 bytes: flags
863 877 # 4 bytes: compressed length
864 878 # 4 bytes: uncompressed length
865 879 # 4 bytes: base rev
866 880 # 4 bytes: link rev
867 881 # 4 bytes: parent 1 rev
868 882 # 4 bytes: parent 2 rev
869 883 # 32 bytes: nodeid
870 884 indexformatng = struct.Struct(">Qiiiiii20s12x")
871 885 indexformatng_pack = indexformatng.pack
872 886 versionformat = struct.Struct(">I")
873 887 versionformat_pack = versionformat.pack
874 888 versionformat_unpack = versionformat.unpack
875 889
876 890 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
877 891 # signed integer)
878 892 _maxentrysize = 0x7fffffff
879 893
880 894 class revlogio(object):
881 895 def __init__(self):
882 896 self.size = indexformatng.size
883 897
884 898 def parseindex(self, data, inline):
885 899 # call the C implementation to parse the index data
886 900 index, cache = parsers.parse_index2(data, inline)
887 901 return index, getattr(index, 'nodemap', None), cache
888 902
889 903 def packentry(self, entry, node, version, rev):
890 904 p = indexformatng_pack(*entry)
891 905 if rev == 0:
892 906 p = versionformat_pack(version) + p[4:]
893 907 return p
894 908
895 909 class revlog(object):
896 910 """
897 911 the underlying revision storage object
898 912
899 913 A revlog consists of two parts, an index and the revision data.
900 914
901 915 The index is a file with a fixed record size containing
902 916 information on each revision, including its nodeid (hash), the
903 917 nodeids of its parents, the position and offset of its data within
904 918 the data file, and the revision it's based on. Finally, each entry
905 919 contains a linkrev entry that can serve as a pointer to external
906 920 data.
907 921
908 922 The revision data itself is a linear collection of data chunks.
909 923 Each chunk represents a revision and is usually represented as a
910 924 delta against the previous chunk. To bound lookup time, runs of
911 925 deltas are limited to about 2 times the length of the original
912 926 version data. This makes retrieval of a version proportional to
913 927 its size, or O(1) relative to the number of revisions.
914 928
915 929 Both pieces of the revlog are written to in an append-only
916 930 fashion, which means we never need to rewrite a file to insert or
917 931 remove data, and can use some simple techniques to avoid the need
918 932 for locking while reading.
919 933
920 934 If checkambig, indexfile is opened with checkambig=True at
921 935 writing, to avoid file stat ambiguity.
922 936
923 937 If mmaplargeindex is True, and an mmapindexthreshold is set, the
924 938 index will be mmapped rather than read if it is larger than the
925 939 configured threshold.
926 940
927 941 If censorable is True, the revlog can have censored revisions.
928 942 """
929 943 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
930 944 mmaplargeindex=False, censorable=False):
931 945 """
932 946 create a revlog object
933 947
934 948 opener is a function that abstracts the file opening operation
935 949 and can be used to implement COW semantics or the like.
936 950 """
937 951 self.indexfile = indexfile
938 952 self.datafile = datafile or (indexfile[:-2] + ".d")
939 953 self.opener = opener
940 954 # When True, indexfile is opened with checkambig=True at writing, to
941 955 # avoid file stat ambiguity.
942 956 self._checkambig = checkambig
943 957 self._censorable = censorable
944 958 # 3-tuple of (node, rev, text) for a raw revision.
945 959 self._cache = None
946 960 # Maps rev to chain base rev.
947 961 self._chainbasecache = util.lrucachedict(100)
948 962 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
949 963 self._chunkcache = (0, '')
950 964 # How much data to read and cache into the raw revlog data cache.
951 965 self._chunkcachesize = 65536
952 966 self._maxchainlen = None
953 967 self._deltabothparents = True
954 968 self.index = []
955 969 # Mapping of partial identifiers to full nodes.
956 970 self._pcache = {}
957 971 # Mapping of revision integer to full node.
958 972 self._nodecache = {nullid: nullrev}
959 973 self._nodepos = None
960 974 self._compengine = 'zlib'
961 975 self._maxdeltachainspan = -1
962 976 self._withsparseread = False
963 977 self._sparserevlog = False
964 978 self._srdensitythreshold = 0.50
965 979 self._srmingapsize = 262144
966 980
967 981 mmapindexthreshold = None
968 982 v = REVLOG_DEFAULT_VERSION
969 983 opts = getattr(opener, 'options', None)
970 984 if opts is not None:
971 985 if 'revlogv2' in opts:
972 986 # version 2 revlogs always use generaldelta.
973 987 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
974 988 elif 'revlogv1' in opts:
975 989 if 'generaldelta' in opts:
976 990 v |= FLAG_GENERALDELTA
977 991 else:
978 992 v = 0
979 993 if 'chunkcachesize' in opts:
980 994 self._chunkcachesize = opts['chunkcachesize']
981 995 if 'maxchainlen' in opts:
982 996 self._maxchainlen = opts['maxchainlen']
983 997 if 'deltabothparents' in opts:
984 998 self._deltabothparents = opts['deltabothparents']
985 999 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
986 1000 if 'compengine' in opts:
987 1001 self._compengine = opts['compengine']
988 1002 if 'maxdeltachainspan' in opts:
989 1003 self._maxdeltachainspan = opts['maxdeltachainspan']
990 1004 if mmaplargeindex and 'mmapindexthreshold' in opts:
991 1005 mmapindexthreshold = opts['mmapindexthreshold']
992 1006 self._sparserevlog = bool(opts.get('sparse-revlog', False))
993 1007 withsparseread = bool(opts.get('with-sparse-read', False))
994 1008 # sparse-revlog forces sparse-read
995 1009 self._withsparseread = self._sparserevlog or withsparseread
996 1010 if 'sparse-read-density-threshold' in opts:
997 1011 self._srdensitythreshold = opts['sparse-read-density-threshold']
998 1012 if 'sparse-read-min-gap-size' in opts:
999 1013 self._srmingapsize = opts['sparse-read-min-gap-size']
1000 1014
1001 1015 if self._chunkcachesize <= 0:
1002 1016 raise RevlogError(_('revlog chunk cache size %r is not greater '
1003 1017 'than 0') % self._chunkcachesize)
1004 1018 elif self._chunkcachesize & (self._chunkcachesize - 1):
1005 1019 raise RevlogError(_('revlog chunk cache size %r is not a power '
1006 1020 'of 2') % self._chunkcachesize)
1007 1021
1008 1022 indexdata = ''
1009 1023 self._initempty = True
1010 1024 try:
1011 1025 with self._indexfp() as f:
1012 1026 if (mmapindexthreshold is not None and
1013 1027 self.opener.fstat(f).st_size >= mmapindexthreshold):
1014 1028 indexdata = util.buffer(util.mmapread(f))
1015 1029 else:
1016 1030 indexdata = f.read()
1017 1031 if len(indexdata) > 0:
1018 1032 v = versionformat_unpack(indexdata[:4])[0]
1019 1033 self._initempty = False
1020 1034 except IOError as inst:
1021 1035 if inst.errno != errno.ENOENT:
1022 1036 raise
1023 1037
1024 1038 self.version = v
1025 1039 self._inline = v & FLAG_INLINE_DATA
1026 1040 self._generaldelta = v & FLAG_GENERALDELTA
1027 1041 flags = v & ~0xFFFF
1028 1042 fmt = v & 0xFFFF
1029 1043 if fmt == REVLOGV0:
1030 1044 if flags:
1031 1045 raise RevlogError(_('unknown flags (%#04x) in version %d '
1032 1046 'revlog %s') %
1033 1047 (flags >> 16, fmt, self.indexfile))
1034 1048 elif fmt == REVLOGV1:
1035 1049 if flags & ~REVLOGV1_FLAGS:
1036 1050 raise RevlogError(_('unknown flags (%#04x) in version %d '
1037 1051 'revlog %s') %
1038 1052 (flags >> 16, fmt, self.indexfile))
1039 1053 elif fmt == REVLOGV2:
1040 1054 if flags & ~REVLOGV2_FLAGS:
1041 1055 raise RevlogError(_('unknown flags (%#04x) in version %d '
1042 1056 'revlog %s') %
1043 1057 (flags >> 16, fmt, self.indexfile))
1044 1058 else:
1045 1059 raise RevlogError(_('unknown version (%d) in revlog %s') %
1046 1060 (fmt, self.indexfile))
1047 1061
1048 1062 self.storedeltachains = True
1049 1063
1050 1064 self._io = revlogio()
1051 1065 if self.version == REVLOGV0:
1052 1066 self._io = revlogoldio()
1053 1067 try:
1054 1068 d = self._io.parseindex(indexdata, self._inline)
1055 1069 except (ValueError, IndexError):
1056 1070 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
1057 1071 self.index, nodemap, self._chunkcache = d
1058 1072 if nodemap is not None:
1059 1073 self.nodemap = self._nodecache = nodemap
1060 1074 if not self._chunkcache:
1061 1075 self._chunkclear()
1062 1076 # revnum -> (chain-length, sum-delta-length)
1063 1077 self._chaininfocache = {}
1064 1078 # revlog header -> revlog compressor
1065 1079 self._decompressors = {}
1066 1080
1067 1081 @util.propertycache
1068 1082 def _compressor(self):
1069 1083 return util.compengines[self._compengine].revlogcompressor()
1070 1084
1071 1085 def _indexfp(self, mode='r'):
1072 1086 """file object for the revlog's index file"""
1073 1087 args = {r'mode': mode}
1074 1088 if mode != 'r':
1075 1089 args[r'checkambig'] = self._checkambig
1076 1090 if mode == 'w':
1077 1091 args[r'atomictemp'] = True
1078 1092 return self.opener(self.indexfile, **args)
1079 1093
1080 1094 def _datafp(self, mode='r'):
1081 1095 """file object for the revlog's data file"""
1082 1096 return self.opener(self.datafile, mode=mode)
1083 1097
1084 1098 @contextlib.contextmanager
1085 1099 def _datareadfp(self, existingfp=None):
1086 1100 """file object suitable to read data"""
1087 1101 if existingfp is not None:
1088 1102 yield existingfp
1089 1103 else:
1090 1104 if self._inline:
1091 1105 func = self._indexfp
1092 1106 else:
1093 1107 func = self._datafp
1094 1108 with func() as fp:
1095 1109 yield fp
1096 1110
1097 1111 def tip(self):
1098 1112 return self.node(len(self.index) - 1)
1099 1113 def __contains__(self, rev):
1100 1114 return 0 <= rev < len(self)
1101 1115 def __len__(self):
1102 1116 return len(self.index)
1103 1117 def __iter__(self):
1104 1118 return iter(pycompat.xrange(len(self)))
1105 1119 def revs(self, start=0, stop=None):
1106 1120 """iterate over all rev in this revlog (from start to stop)"""
1107 1121 step = 1
1108 1122 length = len(self)
1109 1123 if stop is not None:
1110 1124 if start > stop:
1111 1125 step = -1
1112 1126 stop += step
1113 1127 if stop > length:
1114 1128 stop = length
1115 1129 else:
1116 1130 stop = length
1117 1131 return pycompat.xrange(start, stop, step)
1118 1132
1119 1133 @util.propertycache
1120 1134 def nodemap(self):
1121 1135 if self.index:
1122 1136 # populate mapping down to the initial node
1123 1137 node0 = self.index[0][7] # get around changelog filtering
1124 1138 self.rev(node0)
1125 1139 return self._nodecache
1126 1140
1127 1141 def hasnode(self, node):
1128 1142 try:
1129 1143 self.rev(node)
1130 1144 return True
1131 1145 except KeyError:
1132 1146 return False
1133 1147
1134 1148 def candelta(self, baserev, rev):
1135 1149 """whether two revisions (baserev, rev) can be delta-ed or not"""
1136 1150 # Disable delta if either rev requires a content-changing flag
1137 1151 # processor (ex. LFS). This is because such flag processor can alter
1138 1152 # the rawtext content that the delta will be based on, and two clients
1139 1153 # could have a same revlog node with different flags (i.e. different
1140 1154 # rawtext contents) and the delta could be incompatible.
1141 1155 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
1142 1156 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
1143 1157 return False
1144 1158 return True
1145 1159
1146 1160 def clearcaches(self):
1147 1161 self._cache = None
1148 1162 self._chainbasecache.clear()
1149 1163 self._chunkcache = (0, '')
1150 1164 self._pcache = {}
1151 1165
1152 1166 try:
1153 1167 self._nodecache.clearcaches()
1154 1168 except AttributeError:
1155 1169 self._nodecache = {nullid: nullrev}
1156 1170 self._nodepos = None
1157 1171
1158 1172 def rev(self, node):
1159 1173 try:
1160 1174 return self._nodecache[node]
1161 1175 except TypeError:
1162 1176 raise
1163 1177 except RevlogError:
1164 1178 # parsers.c radix tree lookup failed
1165 1179 if node == wdirid or node in wdirfilenodeids:
1166 1180 raise error.WdirUnsupported
1167 1181 raise LookupError(node, self.indexfile, _('no node'))
1168 1182 except KeyError:
1169 1183 # pure python cache lookup failed
1170 1184 n = self._nodecache
1171 1185 i = self.index
1172 1186 p = self._nodepos
1173 1187 if p is None:
1174 1188 p = len(i) - 1
1175 1189 else:
1176 1190 assert p < len(i)
1177 1191 for r in pycompat.xrange(p, -1, -1):
1178 1192 v = i[r][7]
1179 1193 n[v] = r
1180 1194 if v == node:
1181 1195 self._nodepos = r - 1
1182 1196 return r
1183 1197 if node == wdirid or node in wdirfilenodeids:
1184 1198 raise error.WdirUnsupported
1185 1199 raise LookupError(node, self.indexfile, _('no node'))
1186 1200
1187 1201 # Accessors for index entries.
1188 1202
1189 1203 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1190 1204 # are flags.
1191 1205 def start(self, rev):
1192 1206 return int(self.index[rev][0] >> 16)
1193 1207
1194 1208 def flags(self, rev):
1195 1209 return self.index[rev][0] & 0xFFFF
1196 1210
1197 1211 def length(self, rev):
1198 1212 return self.index[rev][1]
1199 1213
1200 1214 def rawsize(self, rev):
1201 1215 """return the length of the uncompressed text for a given revision"""
1202 1216 l = self.index[rev][2]
1203 1217 if l >= 0:
1204 1218 return l
1205 1219
1206 1220 t = self.revision(rev, raw=True)
1207 1221 return len(t)
1208 1222
1209 1223 def size(self, rev):
1210 1224 """length of non-raw text (processed by a "read" flag processor)"""
1211 1225 # fast path: if no "read" flag processor could change the content,
1212 1226 # size is rawsize. note: ELLIPSIS is known to not change the content.
1213 1227 flags = self.flags(rev)
1214 1228 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1215 1229 return self.rawsize(rev)
1216 1230
1217 1231 return len(self.revision(rev, raw=False))
1218 1232
1219 1233 def chainbase(self, rev):
1220 1234 base = self._chainbasecache.get(rev)
1221 1235 if base is not None:
1222 1236 return base
1223 1237
1224 1238 index = self.index
1225 1239 iterrev = rev
1226 1240 base = index[iterrev][3]
1227 1241 while base != iterrev:
1228 1242 iterrev = base
1229 1243 base = index[iterrev][3]
1230 1244
1231 1245 self._chainbasecache[rev] = base
1232 1246 return base
1233 1247
1234 1248 def linkrev(self, rev):
1235 1249 return self.index[rev][4]
1236 1250
1237 1251 def parentrevs(self, rev):
1238 1252 try:
1239 1253 entry = self.index[rev]
1240 1254 except IndexError:
1241 1255 if rev == wdirrev:
1242 1256 raise error.WdirUnsupported
1243 1257 raise
1244 1258
1245 1259 return entry[5], entry[6]
1246 1260
1247 1261 def node(self, rev):
1248 1262 try:
1249 1263 return self.index[rev][7]
1250 1264 except IndexError:
1251 1265 if rev == wdirrev:
1252 1266 raise error.WdirUnsupported
1253 1267 raise
1254 1268
1255 1269 # Derived from index values.
1256 1270
1257 1271 def end(self, rev):
1258 1272 return self.start(rev) + self.length(rev)
1259 1273
1260 1274 def parents(self, node):
1261 1275 i = self.index
1262 1276 d = i[self.rev(node)]
1263 1277 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
1264 1278
1265 1279 def chainlen(self, rev):
1266 1280 return self._chaininfo(rev)[0]
1267 1281
1268 1282 def _chaininfo(self, rev):
1269 1283 chaininfocache = self._chaininfocache
1270 1284 if rev in chaininfocache:
1271 1285 return chaininfocache[rev]
1272 1286 index = self.index
1273 1287 generaldelta = self._generaldelta
1274 1288 iterrev = rev
1275 1289 e = index[iterrev]
1276 1290 clen = 0
1277 1291 compresseddeltalen = 0
1278 1292 while iterrev != e[3]:
1279 1293 clen += 1
1280 1294 compresseddeltalen += e[1]
1281 1295 if generaldelta:
1282 1296 iterrev = e[3]
1283 1297 else:
1284 1298 iterrev -= 1
1285 1299 if iterrev in chaininfocache:
1286 1300 t = chaininfocache[iterrev]
1287 1301 clen += t[0]
1288 1302 compresseddeltalen += t[1]
1289 1303 break
1290 1304 e = index[iterrev]
1291 1305 else:
1292 1306 # Add text length of base since decompressing that also takes
1293 1307 # work. For cache hits the length is already included.
1294 1308 compresseddeltalen += e[1]
1295 1309 r = (clen, compresseddeltalen)
1296 1310 chaininfocache[rev] = r
1297 1311 return r
1298 1312
1299 1313 def _deltachain(self, rev, stoprev=None):
1300 1314 """Obtain the delta chain for a revision.
1301 1315
1302 1316 ``stoprev`` specifies a revision to stop at. If not specified, we
1303 1317 stop at the base of the chain.
1304 1318
1305 1319 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1306 1320 revs in ascending order and ``stopped`` is a bool indicating whether
1307 1321 ``stoprev`` was hit.
1308 1322 """
1309 1323 # Try C implementation.
1310 1324 try:
1311 1325 return self.index.deltachain(rev, stoprev, self._generaldelta)
1312 1326 except AttributeError:
1313 1327 pass
1314 1328
1315 1329 chain = []
1316 1330
1317 1331 # Alias to prevent attribute lookup in tight loop.
1318 1332 index = self.index
1319 1333 generaldelta = self._generaldelta
1320 1334
1321 1335 iterrev = rev
1322 1336 e = index[iterrev]
1323 1337 while iterrev != e[3] and iterrev != stoprev:
1324 1338 chain.append(iterrev)
1325 1339 if generaldelta:
1326 1340 iterrev = e[3]
1327 1341 else:
1328 1342 iterrev -= 1
1329 1343 e = index[iterrev]
1330 1344
1331 1345 if iterrev == stoprev:
1332 1346 stopped = True
1333 1347 else:
1334 1348 chain.append(iterrev)
1335 1349 stopped = False
1336 1350
1337 1351 chain.reverse()
1338 1352 return chain, stopped
1339 1353
1340 1354 def ancestors(self, revs, stoprev=0, inclusive=False):
1341 1355 """Generate the ancestors of 'revs' in reverse topological order.
1342 1356 Does not generate revs lower than stoprev.
1343 1357
1344 1358 See the documentation for ancestor.lazyancestors for more details."""
1345 1359
1346 1360 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1347 1361 inclusive=inclusive)
1348 1362
1349 1363 def descendants(self, revs):
1350 1364 """Generate the descendants of 'revs' in revision order.
1351 1365
1352 1366 Yield a sequence of revision numbers starting with a child of
1353 1367 some rev in revs, i.e., each revision is *not* considered a
1354 1368 descendant of itself. Results are ordered by revision number (a
1355 1369 topological sort)."""
1356 1370 first = min(revs)
1357 1371 if first == nullrev:
1358 1372 for i in self:
1359 1373 yield i
1360 1374 return
1361 1375
1362 1376 seen = set(revs)
1363 1377 for i in self.revs(start=first + 1):
1364 1378 for x in self.parentrevs(i):
1365 1379 if x != nullrev and x in seen:
1366 1380 seen.add(i)
1367 1381 yield i
1368 1382 break
1369 1383
1370 1384 def findcommonmissing(self, common=None, heads=None):
1371 1385 """Return a tuple of the ancestors of common and the ancestors of heads
1372 1386 that are not ancestors of common. In revset terminology, we return the
1373 1387 tuple:
1374 1388
1375 1389 ::common, (::heads) - (::common)
1376 1390
1377 1391 The list is sorted by revision number, meaning it is
1378 1392 topologically sorted.
1379 1393
1380 1394 'heads' and 'common' are both lists of node IDs. If heads is
1381 1395 not supplied, uses all of the revlog's heads. If common is not
1382 1396 supplied, uses nullid."""
1383 1397 if common is None:
1384 1398 common = [nullid]
1385 1399 if heads is None:
1386 1400 heads = self.heads()
1387 1401
1388 1402 common = [self.rev(n) for n in common]
1389 1403 heads = [self.rev(n) for n in heads]
1390 1404
1391 1405 # we want the ancestors, but inclusive
1392 1406 class lazyset(object):
1393 1407 def __init__(self, lazyvalues):
1394 1408 self.addedvalues = set()
1395 1409 self.lazyvalues = lazyvalues
1396 1410
1397 1411 def __contains__(self, value):
1398 1412 return value in self.addedvalues or value in self.lazyvalues
1399 1413
1400 1414 def __iter__(self):
1401 1415 added = self.addedvalues
1402 1416 for r in added:
1403 1417 yield r
1404 1418 for r in self.lazyvalues:
1405 1419 if not r in added:
1406 1420 yield r
1407 1421
1408 1422 def add(self, value):
1409 1423 self.addedvalues.add(value)
1410 1424
1411 1425 def update(self, values):
1412 1426 self.addedvalues.update(values)
1413 1427
1414 1428 has = lazyset(self.ancestors(common))
1415 1429 has.add(nullrev)
1416 1430 has.update(common)
1417 1431
1418 1432 # take all ancestors from heads that aren't in has
1419 1433 missing = set()
1420 1434 visit = collections.deque(r for r in heads if r not in has)
1421 1435 while visit:
1422 1436 r = visit.popleft()
1423 1437 if r in missing:
1424 1438 continue
1425 1439 else:
1426 1440 missing.add(r)
1427 1441 for p in self.parentrevs(r):
1428 1442 if p not in has:
1429 1443 visit.append(p)
1430 1444 missing = list(missing)
1431 1445 missing.sort()
1432 1446 return has, [self.node(miss) for miss in missing]
1433 1447
1434 1448 def incrementalmissingrevs(self, common=None):
1435 1449 """Return an object that can be used to incrementally compute the
1436 1450 revision numbers of the ancestors of arbitrary sets that are not
1437 1451 ancestors of common. This is an ancestor.incrementalmissingancestors
1438 1452 object.
1439 1453
1440 1454 'common' is a list of revision numbers. If common is not supplied, uses
1441 1455 nullrev.
1442 1456 """
1443 1457 if common is None:
1444 1458 common = [nullrev]
1445 1459
1446 1460 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1447 1461
1448 1462 def findmissingrevs(self, common=None, heads=None):
1449 1463 """Return the revision numbers of the ancestors of heads that
1450 1464 are not ancestors of common.
1451 1465
1452 1466 More specifically, return a list of revision numbers corresponding to
1453 1467 nodes N such that every N satisfies the following constraints:
1454 1468
1455 1469 1. N is an ancestor of some node in 'heads'
1456 1470 2. N is not an ancestor of any node in 'common'
1457 1471
1458 1472 The list is sorted by revision number, meaning it is
1459 1473 topologically sorted.
1460 1474
1461 1475 'heads' and 'common' are both lists of revision numbers. If heads is
1462 1476 not supplied, uses all of the revlog's heads. If common is not
1463 1477 supplied, uses nullid."""
1464 1478 if common is None:
1465 1479 common = [nullrev]
1466 1480 if heads is None:
1467 1481 heads = self.headrevs()
1468 1482
1469 1483 inc = self.incrementalmissingrevs(common=common)
1470 1484 return inc.missingancestors(heads)
1471 1485
1472 1486 def findmissing(self, common=None, heads=None):
1473 1487 """Return the ancestors of heads that are not ancestors of common.
1474 1488
1475 1489 More specifically, return a list of nodes N such that every N
1476 1490 satisfies the following constraints:
1477 1491
1478 1492 1. N is an ancestor of some node in 'heads'
1479 1493 2. N is not an ancestor of any node in 'common'
1480 1494
1481 1495 The list is sorted by revision number, meaning it is
1482 1496 topologically sorted.
1483 1497
1484 1498 'heads' and 'common' are both lists of node IDs. If heads is
1485 1499 not supplied, uses all of the revlog's heads. If common is not
1486 1500 supplied, uses nullid."""
1487 1501 if common is None:
1488 1502 common = [nullid]
1489 1503 if heads is None:
1490 1504 heads = self.heads()
1491 1505
1492 1506 common = [self.rev(n) for n in common]
1493 1507 heads = [self.rev(n) for n in heads]
1494 1508
1495 1509 inc = self.incrementalmissingrevs(common=common)
1496 1510 return [self.node(r) for r in inc.missingancestors(heads)]
1497 1511
1498 1512 def nodesbetween(self, roots=None, heads=None):
1499 1513 """Return a topological path from 'roots' to 'heads'.
1500 1514
1501 1515 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1502 1516 topologically sorted list of all nodes N that satisfy both of
1503 1517 these constraints:
1504 1518
1505 1519 1. N is a descendant of some node in 'roots'
1506 1520 2. N is an ancestor of some node in 'heads'
1507 1521
1508 1522 Every node is considered to be both a descendant and an ancestor
1509 1523 of itself, so every reachable node in 'roots' and 'heads' will be
1510 1524 included in 'nodes'.
1511 1525
1512 1526 'outroots' is the list of reachable nodes in 'roots', i.e., the
1513 1527 subset of 'roots' that is returned in 'nodes'. Likewise,
1514 1528 'outheads' is the subset of 'heads' that is also in 'nodes'.
1515 1529
1516 1530 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1517 1531 unspecified, uses nullid as the only root. If 'heads' is
1518 1532 unspecified, uses list of all of the revlog's heads."""
1519 1533 nonodes = ([], [], [])
1520 1534 if roots is not None:
1521 1535 roots = list(roots)
1522 1536 if not roots:
1523 1537 return nonodes
1524 1538 lowestrev = min([self.rev(n) for n in roots])
1525 1539 else:
1526 1540 roots = [nullid] # Everybody's a descendant of nullid
1527 1541 lowestrev = nullrev
1528 1542 if (lowestrev == nullrev) and (heads is None):
1529 1543 # We want _all_ the nodes!
1530 1544 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1531 1545 if heads is None:
1532 1546 # All nodes are ancestors, so the latest ancestor is the last
1533 1547 # node.
1534 1548 highestrev = len(self) - 1
1535 1549 # Set ancestors to None to signal that every node is an ancestor.
1536 1550 ancestors = None
1537 1551 # Set heads to an empty dictionary for later discovery of heads
1538 1552 heads = {}
1539 1553 else:
1540 1554 heads = list(heads)
1541 1555 if not heads:
1542 1556 return nonodes
1543 1557 ancestors = set()
1544 1558 # Turn heads into a dictionary so we can remove 'fake' heads.
1545 1559 # Also, later we will be using it to filter out the heads we can't
1546 1560 # find from roots.
1547 1561 heads = dict.fromkeys(heads, False)
1548 1562 # Start at the top and keep marking parents until we're done.
1549 1563 nodestotag = set(heads)
1550 1564 # Remember where the top was so we can use it as a limit later.
1551 1565 highestrev = max([self.rev(n) for n in nodestotag])
1552 1566 while nodestotag:
1553 1567 # grab a node to tag
1554 1568 n = nodestotag.pop()
1555 1569 # Never tag nullid
1556 1570 if n == nullid:
1557 1571 continue
1558 1572 # A node's revision number represents its place in a
1559 1573 # topologically sorted list of nodes.
1560 1574 r = self.rev(n)
1561 1575 if r >= lowestrev:
1562 1576 if n not in ancestors:
1563 1577 # If we are possibly a descendant of one of the roots
1564 1578 # and we haven't already been marked as an ancestor
1565 1579 ancestors.add(n) # Mark as ancestor
1566 1580 # Add non-nullid parents to list of nodes to tag.
1567 1581 nodestotag.update([p for p in self.parents(n) if
1568 1582 p != nullid])
1569 1583 elif n in heads: # We've seen it before, is it a fake head?
1570 1584 # So it is, real heads should not be the ancestors of
1571 1585 # any other heads.
1572 1586 heads.pop(n)
1573 1587 if not ancestors:
1574 1588 return nonodes
1575 1589 # Now that we have our set of ancestors, we want to remove any
1576 1590 # roots that are not ancestors.
1577 1591
1578 1592 # If one of the roots was nullid, everything is included anyway.
1579 1593 if lowestrev > nullrev:
1580 1594 # But, since we weren't, let's recompute the lowest rev to not
1581 1595 # include roots that aren't ancestors.
1582 1596
1583 1597 # Filter out roots that aren't ancestors of heads
1584 1598 roots = [root for root in roots if root in ancestors]
1585 1599 # Recompute the lowest revision
1586 1600 if roots:
1587 1601 lowestrev = min([self.rev(root) for root in roots])
1588 1602 else:
1589 1603 # No more roots? Return empty list
1590 1604 return nonodes
1591 1605 else:
1592 1606 # We are descending from nullid, and don't need to care about
1593 1607 # any other roots.
1594 1608 lowestrev = nullrev
1595 1609 roots = [nullid]
1596 1610 # Transform our roots list into a set.
1597 1611 descendants = set(roots)
1598 1612 # Also, keep the original roots so we can filter out roots that aren't
1599 1613 # 'real' roots (i.e. are descended from other roots).
1600 1614 roots = descendants.copy()
1601 1615 # Our topologically sorted list of output nodes.
1602 1616 orderedout = []
1603 1617 # Don't start at nullid since we don't want nullid in our output list,
1604 1618 # and if nullid shows up in descendants, empty parents will look like
1605 1619 # they're descendants.
1606 1620 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1607 1621 n = self.node(r)
1608 1622 isdescendant = False
1609 1623 if lowestrev == nullrev: # Everybody is a descendant of nullid
1610 1624 isdescendant = True
1611 1625 elif n in descendants:
1612 1626 # n is already a descendant
1613 1627 isdescendant = True
1614 1628 # This check only needs to be done here because all the roots
1615 1629 # will start being marked is descendants before the loop.
1616 1630 if n in roots:
1617 1631 # If n was a root, check if it's a 'real' root.
1618 1632 p = tuple(self.parents(n))
1619 1633 # If any of its parents are descendants, it's not a root.
1620 1634 if (p[0] in descendants) or (p[1] in descendants):
1621 1635 roots.remove(n)
1622 1636 else:
1623 1637 p = tuple(self.parents(n))
1624 1638 # A node is a descendant if either of its parents are
1625 1639 # descendants. (We seeded the dependents list with the roots
1626 1640 # up there, remember?)
1627 1641 if (p[0] in descendants) or (p[1] in descendants):
1628 1642 descendants.add(n)
1629 1643 isdescendant = True
1630 1644 if isdescendant and ((ancestors is None) or (n in ancestors)):
1631 1645 # Only include nodes that are both descendants and ancestors.
1632 1646 orderedout.append(n)
1633 1647 if (ancestors is not None) and (n in heads):
1634 1648 # We're trying to figure out which heads are reachable
1635 1649 # from roots.
1636 1650 # Mark this head as having been reached
1637 1651 heads[n] = True
1638 1652 elif ancestors is None:
1639 1653 # Otherwise, we're trying to discover the heads.
1640 1654 # Assume this is a head because if it isn't, the next step
1641 1655 # will eventually remove it.
1642 1656 heads[n] = True
1643 1657 # But, obviously its parents aren't.
1644 1658 for p in self.parents(n):
1645 1659 heads.pop(p, None)
1646 1660 heads = [head for head, flag in heads.iteritems() if flag]
1647 1661 roots = list(roots)
1648 1662 assert orderedout
1649 1663 assert roots
1650 1664 assert heads
1651 1665 return (orderedout, roots, heads)
1652 1666
1653 1667 def headrevs(self):
1654 1668 try:
1655 1669 return self.index.headrevs()
1656 1670 except AttributeError:
1657 1671 return self._headrevs()
1658 1672
1659 1673 def computephases(self, roots):
1660 1674 return self.index.computephasesmapsets(roots)
1661 1675
1662 1676 def _headrevs(self):
1663 1677 count = len(self)
1664 1678 if not count:
1665 1679 return [nullrev]
1666 1680 # we won't iter over filtered rev so nobody is a head at start
1667 1681 ishead = [0] * (count + 1)
1668 1682 index = self.index
1669 1683 for r in self:
1670 1684 ishead[r] = 1 # I may be an head
1671 1685 e = index[r]
1672 1686 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1673 1687 return [r for r, val in enumerate(ishead) if val]
1674 1688
1675 1689 def heads(self, start=None, stop=None):
1676 1690 """return the list of all nodes that have no children
1677 1691
1678 1692 if start is specified, only heads that are descendants of
1679 1693 start will be returned
1680 1694 if stop is specified, it will consider all the revs from stop
1681 1695 as if they had no children
1682 1696 """
1683 1697 if start is None and stop is None:
1684 1698 if not len(self):
1685 1699 return [nullid]
1686 1700 return [self.node(r) for r in self.headrevs()]
1687 1701
1688 1702 if start is None:
1689 1703 start = nullid
1690 1704 if stop is None:
1691 1705 stop = []
1692 1706 stoprevs = set([self.rev(n) for n in stop])
1693 1707 startrev = self.rev(start)
1694 1708 reachable = {startrev}
1695 1709 heads = {startrev}
1696 1710
1697 1711 parentrevs = self.parentrevs
1698 1712 for r in self.revs(start=startrev + 1):
1699 1713 for p in parentrevs(r):
1700 1714 if p in reachable:
1701 1715 if r not in stoprevs:
1702 1716 reachable.add(r)
1703 1717 heads.add(r)
1704 1718 if p in heads and p not in stoprevs:
1705 1719 heads.remove(p)
1706 1720
1707 1721 return [self.node(r) for r in heads]
1708 1722
1709 1723 def children(self, node):
1710 1724 """find the children of a given node"""
1711 1725 c = []
1712 1726 p = self.rev(node)
1713 1727 for r in self.revs(start=p + 1):
1714 1728 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1715 1729 if prevs:
1716 1730 for pr in prevs:
1717 1731 if pr == p:
1718 1732 c.append(self.node(r))
1719 1733 elif p == nullrev:
1720 1734 c.append(self.node(r))
1721 1735 return c
1722 1736
1723 1737 def commonancestorsheads(self, a, b):
1724 1738 """calculate all the heads of the common ancestors of nodes a and b"""
1725 1739 a, b = self.rev(a), self.rev(b)
1726 1740 ancs = self._commonancestorsheads(a, b)
1727 1741 return pycompat.maplist(self.node, ancs)
1728 1742
1729 1743 def _commonancestorsheads(self, *revs):
1730 1744 """calculate all the heads of the common ancestors of revs"""
1731 1745 try:
1732 1746 ancs = self.index.commonancestorsheads(*revs)
1733 1747 except (AttributeError, OverflowError): # C implementation failed
1734 1748 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1735 1749 return ancs
1736 1750
1737 1751 def isancestor(self, a, b):
1738 1752 """return True if node a is an ancestor of node b
1739 1753
1740 1754 A revision is considered an ancestor of itself."""
1741 1755 a, b = self.rev(a), self.rev(b)
1742 1756 return self.isancestorrev(a, b)
1743 1757
1744 1758 def isancestorrev(self, a, b):
1745 1759 """return True if revision a is an ancestor of revision b
1746 1760
1747 1761 A revision is considered an ancestor of itself.
1748 1762
1749 1763 The implementation of this is trivial but the use of
1750 1764 commonancestorsheads is not."""
1751 1765 if a == nullrev:
1752 1766 return True
1753 1767 elif a == b:
1754 1768 return True
1755 1769 elif a > b:
1756 1770 return False
1757 1771 return a in self._commonancestorsheads(a, b)
1758 1772
1759 1773 def ancestor(self, a, b):
1760 1774 """calculate the "best" common ancestor of nodes a and b"""
1761 1775
1762 1776 a, b = self.rev(a), self.rev(b)
1763 1777 try:
1764 1778 ancs = self.index.ancestors(a, b)
1765 1779 except (AttributeError, OverflowError):
1766 1780 ancs = ancestor.ancestors(self.parentrevs, a, b)
1767 1781 if ancs:
1768 1782 # choose a consistent winner when there's a tie
1769 1783 return min(map(self.node, ancs))
1770 1784 return nullid
1771 1785
1772 1786 def _match(self, id):
1773 1787 if isinstance(id, int):
1774 1788 # rev
1775 1789 return self.node(id)
1776 1790 if len(id) == 20:
1777 1791 # possibly a binary node
1778 1792 # odds of a binary node being all hex in ASCII are 1 in 10**25
1779 1793 try:
1780 1794 node = id
1781 1795 self.rev(node) # quick search the index
1782 1796 return node
1783 1797 except LookupError:
1784 1798 pass # may be partial hex id
1785 1799 try:
1786 1800 # str(rev)
1787 1801 rev = int(id)
1788 1802 if "%d" % rev != id:
1789 1803 raise ValueError
1790 1804 if rev < 0:
1791 1805 rev = len(self) + rev
1792 1806 if rev < 0 or rev >= len(self):
1793 1807 raise ValueError
1794 1808 return self.node(rev)
1795 1809 except (ValueError, OverflowError):
1796 1810 pass
1797 1811 if len(id) == 40:
1798 1812 try:
1799 1813 # a full hex nodeid?
1800 1814 node = bin(id)
1801 1815 self.rev(node)
1802 1816 return node
1803 1817 except (TypeError, LookupError):
1804 1818 pass
1805 1819
1806 1820 def _partialmatch(self, id):
1807 1821 # we don't care wdirfilenodeids as they should be always full hash
1808 1822 maybewdir = wdirhex.startswith(id)
1809 1823 try:
1810 1824 partial = self.index.partialmatch(id)
1811 1825 if partial and self.hasnode(partial):
1812 1826 if maybewdir:
1813 1827 # single 'ff...' match in radix tree, ambiguous with wdir
1814 1828 raise RevlogError
1815 1829 return partial
1816 1830 if maybewdir:
1817 1831 # no 'ff...' match in radix tree, wdir identified
1818 1832 raise error.WdirUnsupported
1819 1833 return None
1820 1834 except RevlogError:
1821 1835 # parsers.c radix tree lookup gave multiple matches
1822 1836 # fast path: for unfiltered changelog, radix tree is accurate
1823 1837 if not getattr(self, 'filteredrevs', None):
1824 1838 raise AmbiguousPrefixLookupError(id, self.indexfile,
1825 1839 _('ambiguous identifier'))
1826 1840 # fall through to slow path that filters hidden revisions
1827 1841 except (AttributeError, ValueError):
1828 1842 # we are pure python, or key was too short to search radix tree
1829 1843 pass
1830 1844
1831 1845 if id in self._pcache:
1832 1846 return self._pcache[id]
1833 1847
1834 1848 if len(id) <= 40:
1835 1849 try:
1836 1850 # hex(node)[:...]
1837 1851 l = len(id) // 2 # grab an even number of digits
1838 1852 prefix = bin(id[:l * 2])
1839 1853 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1840 1854 nl = [n for n in nl if hex(n).startswith(id) and
1841 1855 self.hasnode(n)]
1842 1856 if len(nl) > 0:
1843 1857 if len(nl) == 1 and not maybewdir:
1844 1858 self._pcache[id] = nl[0]
1845 1859 return nl[0]
1846 1860 raise AmbiguousPrefixLookupError(id, self.indexfile,
1847 1861 _('ambiguous identifier'))
1848 1862 if maybewdir:
1849 1863 raise error.WdirUnsupported
1850 1864 return None
1851 1865 except TypeError:
1852 1866 pass
1853 1867
1854 1868 def lookup(self, id):
1855 1869 """locate a node based on:
1856 1870 - revision number or str(revision number)
1857 1871 - nodeid or subset of hex nodeid
1858 1872 """
1859 1873 n = self._match(id)
1860 1874 if n is not None:
1861 1875 return n
1862 1876 n = self._partialmatch(id)
1863 1877 if n:
1864 1878 return n
1865 1879
1866 1880 raise LookupError(id, self.indexfile, _('no match found'))
1867 1881
1868 1882 def shortest(self, node, minlength=1):
1869 1883 """Find the shortest unambiguous prefix that matches node."""
1870 1884 def isvalid(prefix):
1871 1885 try:
1872 1886 node = self._partialmatch(prefix)
1873 1887 except error.RevlogError:
1874 1888 return False
1875 1889 except error.WdirUnsupported:
1876 1890 # single 'ff...' match
1877 1891 return True
1878 1892 if node is None:
1879 1893 raise LookupError(node, self.indexfile, _('no node'))
1880 1894 return True
1881 1895
1882 1896 def maybewdir(prefix):
1883 1897 return all(c == 'f' for c in prefix)
1884 1898
1885 1899 hexnode = hex(node)
1886 1900
1887 1901 def disambiguate(hexnode, minlength):
1888 1902 """Disambiguate against wdirid."""
1889 1903 for length in range(minlength, 41):
1890 1904 prefix = hexnode[:length]
1891 1905 if not maybewdir(prefix):
1892 1906 return prefix
1893 1907
1894 1908 if not getattr(self, 'filteredrevs', None):
1895 1909 try:
1896 1910 length = max(self.index.shortest(node), minlength)
1897 1911 return disambiguate(hexnode, length)
1898 1912 except RevlogError:
1899 1913 if node != wdirid:
1900 1914 raise LookupError(node, self.indexfile, _('no node'))
1901 1915 except AttributeError:
1902 1916 # Fall through to pure code
1903 1917 pass
1904 1918
1905 1919 if node == wdirid:
1906 1920 for length in range(minlength, 41):
1907 1921 prefix = hexnode[:length]
1908 1922 if isvalid(prefix):
1909 1923 return prefix
1910 1924
1911 1925 for length in range(minlength, 41):
1912 1926 prefix = hexnode[:length]
1913 1927 if isvalid(prefix):
1914 1928 return disambiguate(hexnode, length)
1915 1929
1916 1930 def cmp(self, node, text):
1917 1931 """compare text with a given file revision
1918 1932
1919 1933 returns True if text is different than what is stored.
1920 1934 """
1921 1935 p1, p2 = self.parents(node)
1922 1936 return hash(text, p1, p2) != node
1923 1937
1924 1938 def _cachesegment(self, offset, data):
1925 1939 """Add a segment to the revlog cache.
1926 1940
1927 1941 Accepts an absolute offset and the data that is at that location.
1928 1942 """
1929 1943 o, d = self._chunkcache
1930 1944 # try to add to existing cache
1931 1945 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1932 1946 self._chunkcache = o, d + data
1933 1947 else:
1934 1948 self._chunkcache = offset, data
1935 1949
1936 1950 def _readsegment(self, offset, length, df=None):
1937 1951 """Load a segment of raw data from the revlog.
1938 1952
1939 1953 Accepts an absolute offset, length to read, and an optional existing
1940 1954 file handle to read from.
1941 1955
1942 1956 If an existing file handle is passed, it will be seeked and the
1943 1957 original seek position will NOT be restored.
1944 1958
1945 1959 Returns a str or buffer of raw byte data.
1946 1960 """
1947 1961 # Cache data both forward and backward around the requested
1948 1962 # data, in a fixed size window. This helps speed up operations
1949 1963 # involving reading the revlog backwards.
1950 1964 cachesize = self._chunkcachesize
1951 1965 realoffset = offset & ~(cachesize - 1)
1952 1966 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1953 1967 - realoffset)
1954 1968 with self._datareadfp(df) as df:
1955 1969 df.seek(realoffset)
1956 1970 d = df.read(reallength)
1957 1971 self._cachesegment(realoffset, d)
1958 1972 if offset != realoffset or reallength != length:
1959 1973 return util.buffer(d, offset - realoffset, length)
1960 1974 return d
1961 1975
1962 1976 def _getsegment(self, offset, length, df=None):
1963 1977 """Obtain a segment of raw data from the revlog.
1964 1978
1965 1979 Accepts an absolute offset, length of bytes to obtain, and an
1966 1980 optional file handle to the already-opened revlog. If the file
1967 1981 handle is used, it's original seek position will not be preserved.
1968 1982
1969 1983 Requests for data may be returned from a cache.
1970 1984
1971 1985 Returns a str or a buffer instance of raw byte data.
1972 1986 """
1973 1987 o, d = self._chunkcache
1974 1988 l = len(d)
1975 1989
1976 1990 # is it in the cache?
1977 1991 cachestart = offset - o
1978 1992 cacheend = cachestart + length
1979 1993 if cachestart >= 0 and cacheend <= l:
1980 1994 if cachestart == 0 and cacheend == l:
1981 1995 return d # avoid a copy
1982 1996 return util.buffer(d, cachestart, cacheend - cachestart)
1983 1997
1984 1998 return self._readsegment(offset, length, df=df)
1985 1999
1986 2000 def _getsegmentforrevs(self, startrev, endrev, df=None):
1987 2001 """Obtain a segment of raw data corresponding to a range of revisions.
1988 2002
1989 2003 Accepts the start and end revisions and an optional already-open
1990 2004 file handle to be used for reading. If the file handle is read, its
1991 2005 seek position will not be preserved.
1992 2006
1993 2007 Requests for data may be satisfied by a cache.
1994 2008
1995 2009 Returns a 2-tuple of (offset, data) for the requested range of
1996 2010 revisions. Offset is the integer offset from the beginning of the
1997 2011 revlog and data is a str or buffer of the raw byte data.
1998 2012
1999 2013 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
2000 2014 to determine where each revision's data begins and ends.
2001 2015 """
2002 2016 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
2003 2017 # (functions are expensive).
2004 2018 index = self.index
2005 2019 istart = index[startrev]
2006 2020 start = int(istart[0] >> 16)
2007 2021 if startrev == endrev:
2008 2022 end = start + istart[1]
2009 2023 else:
2010 2024 iend = index[endrev]
2011 2025 end = int(iend[0] >> 16) + iend[1]
2012 2026
2013 2027 if self._inline:
2014 2028 start += (startrev + 1) * self._io.size
2015 2029 end += (endrev + 1) * self._io.size
2016 2030 length = end - start
2017 2031
2018 2032 return start, self._getsegment(start, length, df=df)
2019 2033
2020 2034 def _chunk(self, rev, df=None):
2021 2035 """Obtain a single decompressed chunk for a revision.
2022 2036
2023 2037 Accepts an integer revision and an optional already-open file handle
2024 2038 to be used for reading. If used, the seek position of the file will not
2025 2039 be preserved.
2026 2040
2027 2041 Returns a str holding uncompressed data for the requested revision.
2028 2042 """
2029 2043 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
2030 2044
2031 2045 def _chunks(self, revs, df=None, targetsize=None):
2032 2046 """Obtain decompressed chunks for the specified revisions.
2033 2047
2034 2048 Accepts an iterable of numeric revisions that are assumed to be in
2035 2049 ascending order. Also accepts an optional already-open file handle
2036 2050 to be used for reading. If used, the seek position of the file will
2037 2051 not be preserved.
2038 2052
2039 2053 This function is similar to calling ``self._chunk()`` multiple times,
2040 2054 but is faster.
2041 2055
2042 2056 Returns a list with decompressed data for each requested revision.
2043 2057 """
2044 2058 if not revs:
2045 2059 return []
2046 2060 start = self.start
2047 2061 length = self.length
2048 2062 inline = self._inline
2049 2063 iosize = self._io.size
2050 2064 buffer = util.buffer
2051 2065
2052 2066 l = []
2053 2067 ladd = l.append
2054 2068
2055 2069 if not self._withsparseread:
2056 2070 slicedchunks = (revs,)
2057 2071 else:
2058 2072 slicedchunks = _slicechunk(self, revs, targetsize=targetsize)
2059 2073
2060 2074 for revschunk in slicedchunks:
2061 2075 firstrev = revschunk[0]
2062 2076 # Skip trailing revisions with empty diff
2063 2077 for lastrev in revschunk[::-1]:
2064 2078 if length(lastrev) != 0:
2065 2079 break
2066 2080
2067 2081 try:
2068 2082 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
2069 2083 except OverflowError:
2070 2084 # issue4215 - we can't cache a run of chunks greater than
2071 2085 # 2G on Windows
2072 2086 return [self._chunk(rev, df=df) for rev in revschunk]
2073 2087
2074 2088 decomp = self.decompress
2075 2089 for rev in revschunk:
2076 2090 chunkstart = start(rev)
2077 2091 if inline:
2078 2092 chunkstart += (rev + 1) * iosize
2079 2093 chunklength = length(rev)
2080 2094 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
2081 2095
2082 2096 return l
2083 2097
2084 2098 def _chunkclear(self):
2085 2099 """Clear the raw chunk cache."""
2086 2100 self._chunkcache = (0, '')
2087 2101
2088 2102 def deltaparent(self, rev):
2089 2103 """return deltaparent of the given revision"""
2090 2104 base = self.index[rev][3]
2091 2105 if base == rev:
2092 2106 return nullrev
2093 2107 elif self._generaldelta:
2094 2108 return base
2095 2109 else:
2096 2110 return rev - 1
2097 2111
2098 2112 def issnapshot(self, rev):
2099 2113 """tells whether rev is a snapshot
2100 2114 """
2101 2115 if rev == nullrev:
2102 2116 return True
2103 2117 deltap = self.deltaparent(rev)
2104 2118 if deltap == nullrev:
2105 2119 return True
2106 2120 p1, p2 = self.parentrevs(rev)
2107 2121 if deltap in (p1, p2):
2108 2122 return False
2109 2123 return self.issnapshot(deltap)
2110 2124
2111 2125 def snapshotdepth(self, rev):
2112 2126 """number of snapshot in the chain before this one"""
2113 2127 if not self.issnapshot(rev):
2114 2128 raise ProgrammingError('revision %d not a snapshot')
2115 2129 return len(self._deltachain(rev)[0]) - 1
2116 2130
2117 2131 def revdiff(self, rev1, rev2):
2118 2132 """return or calculate a delta between two revisions
2119 2133
2120 2134 The delta calculated is in binary form and is intended to be written to
2121 2135 revlog data directly. So this function needs raw revision data.
2122 2136 """
2123 2137 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2124 2138 return bytes(self._chunk(rev2))
2125 2139
2126 2140 return mdiff.textdiff(self.revision(rev1, raw=True),
2127 2141 self.revision(rev2, raw=True))
2128 2142
2129 2143 def revision(self, nodeorrev, _df=None, raw=False):
2130 2144 """return an uncompressed revision of a given node or revision
2131 2145 number.
2132 2146
2133 2147 _df - an existing file handle to read from. (internal-only)
2134 2148 raw - an optional argument specifying if the revision data is to be
2135 2149 treated as raw data when applying flag transforms. 'raw' should be set
2136 2150 to True when generating changegroups or in debug commands.
2137 2151 """
2138 2152 if isinstance(nodeorrev, int):
2139 2153 rev = nodeorrev
2140 2154 node = self.node(rev)
2141 2155 else:
2142 2156 node = nodeorrev
2143 2157 rev = None
2144 2158
2145 2159 cachedrev = None
2146 2160 flags = None
2147 2161 rawtext = None
2148 2162 if node == nullid:
2149 2163 return ""
2150 2164 if self._cache:
2151 2165 if self._cache[0] == node:
2152 2166 # _cache only stores rawtext
2153 2167 if raw:
2154 2168 return self._cache[2]
2155 2169 # duplicated, but good for perf
2156 2170 if rev is None:
2157 2171 rev = self.rev(node)
2158 2172 if flags is None:
2159 2173 flags = self.flags(rev)
2160 2174 # no extra flags set, no flag processor runs, text = rawtext
2161 2175 if flags == REVIDX_DEFAULT_FLAGS:
2162 2176 return self._cache[2]
2163 2177 # rawtext is reusable. need to run flag processor
2164 2178 rawtext = self._cache[2]
2165 2179
2166 2180 cachedrev = self._cache[1]
2167 2181
2168 2182 # look up what we need to read
2169 2183 if rawtext is None:
2170 2184 if rev is None:
2171 2185 rev = self.rev(node)
2172 2186
2173 2187 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2174 2188 if stopped:
2175 2189 rawtext = self._cache[2]
2176 2190
2177 2191 # drop cache to save memory
2178 2192 self._cache = None
2179 2193
2180 2194 targetsize = None
2181 2195 rawsize = self.index[rev][2]
2182 2196 if 0 <= rawsize:
2183 2197 targetsize = 4 * rawsize
2184 2198
2185 2199 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2186 2200 if rawtext is None:
2187 2201 rawtext = bytes(bins[0])
2188 2202 bins = bins[1:]
2189 2203
2190 2204 rawtext = mdiff.patches(rawtext, bins)
2191 2205 self._cache = (node, rev, rawtext)
2192 2206
2193 2207 if flags is None:
2194 2208 if rev is None:
2195 2209 rev = self.rev(node)
2196 2210 flags = self.flags(rev)
2197 2211
2198 2212 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
2199 2213 if validatehash:
2200 2214 self.checkhash(text, node, rev=rev)
2201 2215
2202 2216 return text
2203 2217
2204 2218 def hash(self, text, p1, p2):
2205 2219 """Compute a node hash.
2206 2220
2207 2221 Available as a function so that subclasses can replace the hash
2208 2222 as needed.
2209 2223 """
2210 2224 return hash(text, p1, p2)
2211 2225
2212 2226 def _processflags(self, text, flags, operation, raw=False):
2213 2227 """Inspect revision data flags and applies transforms defined by
2214 2228 registered flag processors.
2215 2229
2216 2230 ``text`` - the revision data to process
2217 2231 ``flags`` - the revision flags
2218 2232 ``operation`` - the operation being performed (read or write)
2219 2233 ``raw`` - an optional argument describing if the raw transform should be
2220 2234 applied.
2221 2235
2222 2236 This method processes the flags in the order (or reverse order if
2223 2237 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
2224 2238 flag processors registered for present flags. The order of flags defined
2225 2239 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
2226 2240
2227 2241 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
2228 2242 processed text and ``validatehash`` is a bool indicating whether the
2229 2243 returned text should be checked for hash integrity.
2230 2244
2231 2245 Note: If the ``raw`` argument is set, it has precedence over the
2232 2246 operation and will only update the value of ``validatehash``.
2233 2247 """
2234 2248 # fast path: no flag processors will run
2235 2249 if flags == 0:
2236 2250 return text, True
2237 2251 if not operation in ('read', 'write'):
2238 2252 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
2239 2253 # Check all flags are known.
2240 2254 if flags & ~REVIDX_KNOWN_FLAGS:
2241 2255 raise RevlogError(_("incompatible revision flag '%#x'") %
2242 2256 (flags & ~REVIDX_KNOWN_FLAGS))
2243 2257 validatehash = True
2244 2258 # Depending on the operation (read or write), the order might be
2245 2259 # reversed due to non-commutative transforms.
2246 2260 orderedflags = REVIDX_FLAGS_ORDER
2247 2261 if operation == 'write':
2248 2262 orderedflags = reversed(orderedflags)
2249 2263
2250 2264 for flag in orderedflags:
2251 2265 # If a flagprocessor has been registered for a known flag, apply the
2252 2266 # related operation transform and update result tuple.
2253 2267 if flag & flags:
2254 2268 vhash = True
2255 2269
2256 2270 if flag not in _flagprocessors:
2257 2271 message = _("missing processor for flag '%#x'") % (flag)
2258 2272 raise RevlogError(message)
2259 2273
2260 2274 processor = _flagprocessors[flag]
2261 2275 if processor is not None:
2262 2276 readtransform, writetransform, rawtransform = processor
2263 2277
2264 2278 if raw:
2265 2279 vhash = rawtransform(self, text)
2266 2280 elif operation == 'read':
2267 2281 text, vhash = readtransform(self, text)
2268 2282 else: # write operation
2269 2283 text, vhash = writetransform(self, text)
2270 2284 validatehash = validatehash and vhash
2271 2285
2272 2286 return text, validatehash
2273 2287
2274 2288 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2275 2289 """Check node hash integrity.
2276 2290
2277 2291 Available as a function so that subclasses can extend hash mismatch
2278 2292 behaviors as needed.
2279 2293 """
2280 2294 try:
2281 2295 if p1 is None and p2 is None:
2282 2296 p1, p2 = self.parents(node)
2283 2297 if node != self.hash(text, p1, p2):
2284 2298 revornode = rev
2285 2299 if revornode is None:
2286 2300 revornode = templatefilters.short(hex(node))
2287 2301 raise RevlogError(_("integrity check failed on %s:%s")
2288 2302 % (self.indexfile, pycompat.bytestr(revornode)))
2289 2303 except RevlogError:
2290 2304 if self._censorable and _censoredtext(text):
2291 2305 raise error.CensoredNodeError(self.indexfile, node, text)
2292 2306 raise
2293 2307
2294 2308 def _enforceinlinesize(self, tr, fp=None):
2295 2309 """Check if the revlog is too big for inline and convert if so.
2296 2310
2297 2311 This should be called after revisions are added to the revlog. If the
2298 2312 revlog has grown too large to be an inline revlog, it will convert it
2299 2313 to use multiple index and data files.
2300 2314 """
2301 2315 tiprev = len(self) - 1
2302 2316 if (not self._inline or
2303 2317 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
2304 2318 return
2305 2319
2306 2320 trinfo = tr.find(self.indexfile)
2307 2321 if trinfo is None:
2308 2322 raise RevlogError(_("%s not found in the transaction")
2309 2323 % self.indexfile)
2310 2324
2311 2325 trindex = trinfo[2]
2312 2326 if trindex is not None:
2313 2327 dataoff = self.start(trindex)
2314 2328 else:
2315 2329 # revlog was stripped at start of transaction, use all leftover data
2316 2330 trindex = len(self) - 1
2317 2331 dataoff = self.end(tiprev)
2318 2332
2319 2333 tr.add(self.datafile, dataoff)
2320 2334
2321 2335 if fp:
2322 2336 fp.flush()
2323 2337 fp.close()
2324 2338
2325 2339 with self._datafp('w') as df:
2326 2340 for r in self:
2327 2341 df.write(self._getsegmentforrevs(r, r)[1])
2328 2342
2329 2343 with self._indexfp('w') as fp:
2330 2344 self.version &= ~FLAG_INLINE_DATA
2331 2345 self._inline = False
2332 2346 io = self._io
2333 2347 for i in self:
2334 2348 e = io.packentry(self.index[i], self.node, self.version, i)
2335 2349 fp.write(e)
2336 2350
2337 2351 # the temp file replace the real index when we exit the context
2338 2352 # manager
2339 2353
2340 2354 tr.replace(self.indexfile, trindex * self._io.size)
2341 2355 self._chunkclear()
2342 2356
2343 2357 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
2344 2358 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
2345 2359 """add a revision to the log
2346 2360
2347 2361 text - the revision data to add
2348 2362 transaction - the transaction object used for rollback
2349 2363 link - the linkrev data to add
2350 2364 p1, p2 - the parent nodeids of the revision
2351 2365 cachedelta - an optional precomputed delta
2352 2366 node - nodeid of revision; typically node is not specified, and it is
2353 2367 computed by default as hash(text, p1, p2), however subclasses might
2354 2368 use different hashing method (and override checkhash() in such case)
2355 2369 flags - the known flags to set on the revision
2356 2370 deltacomputer - an optional _deltacomputer instance shared between
2357 2371 multiple calls
2358 2372 """
2359 2373 if link == nullrev:
2360 2374 raise RevlogError(_("attempted to add linkrev -1 to %s")
2361 2375 % self.indexfile)
2362 2376
2363 2377 if flags:
2364 2378 node = node or self.hash(text, p1, p2)
2365 2379
2366 2380 rawtext, validatehash = self._processflags(text, flags, 'write')
2367 2381
2368 2382 # If the flag processor modifies the revision data, ignore any provided
2369 2383 # cachedelta.
2370 2384 if rawtext != text:
2371 2385 cachedelta = None
2372 2386
2373 2387 if len(rawtext) > _maxentrysize:
2374 2388 raise RevlogError(
2375 2389 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2376 2390 % (self.indexfile, len(rawtext)))
2377 2391
2378 2392 node = node or self.hash(rawtext, p1, p2)
2379 2393 if node in self.nodemap:
2380 2394 return node
2381 2395
2382 2396 if validatehash:
2383 2397 self.checkhash(rawtext, node, p1=p1, p2=p2)
2384 2398
2385 2399 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2386 2400 flags, cachedelta=cachedelta,
2387 2401 deltacomputer=deltacomputer)
2388 2402
2389 2403 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2390 2404 cachedelta=None, deltacomputer=None):
2391 2405 """add a raw revision with known flags, node and parents
2392 2406 useful when reusing a revision not stored in this revlog (ex: received
2393 2407 over wire, or read from an external bundle).
2394 2408 """
2395 2409 dfh = None
2396 2410 if not self._inline:
2397 2411 dfh = self._datafp("a+")
2398 2412 ifh = self._indexfp("a+")
2399 2413 try:
2400 2414 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2401 2415 flags, cachedelta, ifh, dfh,
2402 2416 deltacomputer=deltacomputer)
2403 2417 finally:
2404 2418 if dfh:
2405 2419 dfh.close()
2406 2420 ifh.close()
2407 2421
2408 2422 def compress(self, data):
2409 2423 """Generate a possibly-compressed representation of data."""
2410 2424 if not data:
2411 2425 return '', data
2412 2426
2413 2427 compressed = self._compressor.compress(data)
2414 2428
2415 2429 if compressed:
2416 2430 # The revlog compressor added the header in the returned data.
2417 2431 return '', compressed
2418 2432
2419 2433 if data[0:1] == '\0':
2420 2434 return '', data
2421 2435 return 'u', data
2422 2436
2423 2437 def decompress(self, data):
2424 2438 """Decompress a revlog chunk.
2425 2439
2426 2440 The chunk is expected to begin with a header identifying the
2427 2441 format type so it can be routed to an appropriate decompressor.
2428 2442 """
2429 2443 if not data:
2430 2444 return data
2431 2445
2432 2446 # Revlogs are read much more frequently than they are written and many
2433 2447 # chunks only take microseconds to decompress, so performance is
2434 2448 # important here.
2435 2449 #
2436 2450 # We can make a few assumptions about revlogs:
2437 2451 #
2438 2452 # 1) the majority of chunks will be compressed (as opposed to inline
2439 2453 # raw data).
2440 2454 # 2) decompressing *any* data will likely by at least 10x slower than
2441 2455 # returning raw inline data.
2442 2456 # 3) we want to prioritize common and officially supported compression
2443 2457 # engines
2444 2458 #
2445 2459 # It follows that we want to optimize for "decompress compressed data
2446 2460 # when encoded with common and officially supported compression engines"
2447 2461 # case over "raw data" and "data encoded by less common or non-official
2448 2462 # compression engines." That is why we have the inline lookup first
2449 2463 # followed by the compengines lookup.
2450 2464 #
2451 2465 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2452 2466 # compressed chunks. And this matters for changelog and manifest reads.
2453 2467 t = data[0:1]
2454 2468
2455 2469 if t == 'x':
2456 2470 try:
2457 2471 return _zlibdecompress(data)
2458 2472 except zlib.error as e:
2459 2473 raise RevlogError(_('revlog decompress error: %s') %
2460 2474 stringutil.forcebytestr(e))
2461 2475 # '\0' is more common than 'u' so it goes first.
2462 2476 elif t == '\0':
2463 2477 return data
2464 2478 elif t == 'u':
2465 2479 return util.buffer(data, 1)
2466 2480
2467 2481 try:
2468 2482 compressor = self._decompressors[t]
2469 2483 except KeyError:
2470 2484 try:
2471 2485 engine = util.compengines.forrevlogheader(t)
2472 2486 compressor = engine.revlogcompressor()
2473 2487 self._decompressors[t] = compressor
2474 2488 except KeyError:
2475 2489 raise RevlogError(_('unknown compression type %r') % t)
2476 2490
2477 2491 return compressor.decompress(data)
2478 2492
2479 2493 def _isgooddeltainfo(self, deltainfo, revinfo):
2480 2494 """Returns True if the given delta is good. Good means that it is within
2481 2495 the disk span, disk size, and chain length bounds that we know to be
2482 2496 performant."""
2483 2497 if deltainfo is None:
2484 2498 return False
2485 2499
2486 2500 # - 'deltainfo.distance' is the distance from the base revision --
2487 2501 # bounding it limits the amount of I/O we need to do.
2488 2502 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2489 2503 # deltas we need to apply -- bounding it limits the amount of CPU
2490 2504 # we consume.
2491 2505
2492 2506 if self._sparserevlog:
2493 2507 # As sparse-read will be used, we can consider that the distance,
2494 2508 # instead of being the span of the whole chunk,
2495 2509 # is the span of the largest read chunk
2496 2510 base = deltainfo.base
2497 2511
2498 2512 if base != nullrev:
2499 2513 deltachain = self._deltachain(base)[0]
2500 2514 else:
2501 2515 deltachain = []
2502 2516
2503 2517 chunks = _slicechunk(self, deltachain, deltainfo)
2504 2518 all_span = [_segmentspan(self, revs, deltainfo) for revs in chunks]
2505 2519 distance = max(all_span)
2506 2520 else:
2507 2521 distance = deltainfo.distance
2508 2522
2509 2523 textlen = revinfo.textlen
2510 2524 defaultmax = textlen * 4
2511 2525 maxdist = self._maxdeltachainspan
2512 2526 if not maxdist:
2513 2527 maxdist = distance # ensure the conditional pass
2514 2528 maxdist = max(maxdist, defaultmax)
2515 2529 if self._sparserevlog and maxdist < self._srmingapsize:
2516 2530 # In multiple place, we are ignoring irrelevant data range below a
2517 2531 # certain size. Be also apply this tradeoff here and relax span
2518 2532 # constraint for small enought content.
2519 2533 maxdist = self._srmingapsize
2520 2534
2521 2535 # Bad delta from read span:
2522 2536 #
2523 2537 # If the span of data read is larger than the maximum allowed.
2524 2538 if maxdist < distance:
2525 2539 return False
2526 2540
2527 2541 # Bad delta from new delta size:
2528 2542 #
2529 2543 # If the delta size is larger than the target text, storing the
2530 2544 # delta will be inefficient.
2531 2545 if textlen < deltainfo.deltalen:
2532 2546 return False
2533 2547
2534 2548 # Bad delta from cumulated payload size:
2535 2549 #
2536 2550 # If the sum of delta get larger than K * target text length.
2537 2551 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
2538 2552 return False
2539 2553
2540 2554 # Bad delta from chain length:
2541 2555 #
2542 2556 # If the number of delta in the chain gets too high.
2543 2557 if self._maxchainlen and self._maxchainlen < deltainfo.chainlen:
2544 2558 return False
2545 2559
2546 2560 return True
2547 2561
2548 2562 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2549 2563 cachedelta, ifh, dfh, alwayscache=False,
2550 2564 deltacomputer=None):
2551 2565 """internal function to add revisions to the log
2552 2566
2553 2567 see addrevision for argument descriptions.
2554 2568
2555 2569 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2556 2570
2557 2571 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2558 2572 be used.
2559 2573
2560 2574 invariants:
2561 2575 - rawtext is optional (can be None); if not set, cachedelta must be set.
2562 2576 if both are set, they must correspond to each other.
2563 2577 """
2564 2578 if node == nullid:
2565 2579 raise RevlogError(_("%s: attempt to add null revision") %
2566 2580 (self.indexfile))
2567 2581 if node == wdirid or node in wdirfilenodeids:
2568 2582 raise RevlogError(_("%s: attempt to add wdir revision") %
2569 2583 (self.indexfile))
2570 2584
2571 2585 if self._inline:
2572 2586 fh = ifh
2573 2587 else:
2574 2588 fh = dfh
2575 2589
2576 2590 btext = [rawtext]
2577 2591
2578 2592 curr = len(self)
2579 2593 prev = curr - 1
2580 2594 offset = self.end(prev)
2581 2595 p1r, p2r = self.rev(p1), self.rev(p2)
2582 2596
2583 2597 # full versions are inserted when the needed deltas
2584 2598 # become comparable to the uncompressed text
2585 2599 if rawtext is None:
2586 2600 # need rawtext size, before changed by flag processors, which is
2587 2601 # the non-raw size. use revlog explicitly to avoid filelog's extra
2588 2602 # logic that might remove metadata size.
2589 2603 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2590 2604 cachedelta[1])
2591 2605 else:
2592 2606 textlen = len(rawtext)
2593 2607
2594 2608 if deltacomputer is None:
2595 2609 deltacomputer = _deltacomputer(self)
2596 2610
2597 2611 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2598 2612
2599 2613 # no delta for flag processor revision (see "candelta" for why)
2600 2614 # not calling candelta since only one revision needs test, also to
2601 2615 # avoid overhead fetching flags again.
2602 2616 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2603 2617 deltainfo = None
2604 2618 else:
2605 2619 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2606 2620
2607 2621 if deltainfo is not None:
2608 2622 base = deltainfo.base
2609 2623 chainbase = deltainfo.chainbase
2610 2624 data = deltainfo.data
2611 2625 l = deltainfo.deltalen
2612 2626 else:
2613 2627 rawtext = deltacomputer.buildtext(revinfo, fh)
2614 2628 data = self.compress(rawtext)
2615 2629 l = len(data[1]) + len(data[0])
2616 2630 base = chainbase = curr
2617 2631
2618 2632 e = (offset_type(offset, flags), l, textlen,
2619 2633 base, link, p1r, p2r, node)
2620 2634 self.index.append(e)
2621 2635 self.nodemap[node] = curr
2622 2636
2623 2637 entry = self._io.packentry(e, self.node, self.version, curr)
2624 2638 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2625 2639
2626 2640 if alwayscache and rawtext is None:
2627 2641 rawtext = deltacomputer._buildtext(revinfo, fh)
2628 2642
2629 2643 if type(rawtext) == bytes: # only accept immutable objects
2630 2644 self._cache = (node, curr, rawtext)
2631 2645 self._chainbasecache[curr] = chainbase
2632 2646 return node
2633 2647
2634 2648 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2635 2649 # Files opened in a+ mode have inconsistent behavior on various
2636 2650 # platforms. Windows requires that a file positioning call be made
2637 2651 # when the file handle transitions between reads and writes. See
2638 2652 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2639 2653 # platforms, Python or the platform itself can be buggy. Some versions
2640 2654 # of Solaris have been observed to not append at the end of the file
2641 2655 # if the file was seeked to before the end. See issue4943 for more.
2642 2656 #
2643 2657 # We work around this issue by inserting a seek() before writing.
2644 2658 # Note: This is likely not necessary on Python 3.
2645 2659 ifh.seek(0, os.SEEK_END)
2646 2660 if dfh:
2647 2661 dfh.seek(0, os.SEEK_END)
2648 2662
2649 2663 curr = len(self) - 1
2650 2664 if not self._inline:
2651 2665 transaction.add(self.datafile, offset)
2652 2666 transaction.add(self.indexfile, curr * len(entry))
2653 2667 if data[0]:
2654 2668 dfh.write(data[0])
2655 2669 dfh.write(data[1])
2656 2670 ifh.write(entry)
2657 2671 else:
2658 2672 offset += curr * self._io.size
2659 2673 transaction.add(self.indexfile, offset, curr)
2660 2674 ifh.write(entry)
2661 2675 ifh.write(data[0])
2662 2676 ifh.write(data[1])
2663 2677 self._enforceinlinesize(transaction, ifh)
2664 2678
2665 2679 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2666 2680 """
2667 2681 add a delta group
2668 2682
2669 2683 given a set of deltas, add them to the revision log. the
2670 2684 first delta is against its parent, which should be in our
2671 2685 log, the rest are against the previous delta.
2672 2686
2673 2687 If ``addrevisioncb`` is defined, it will be called with arguments of
2674 2688 this revlog and the node that was added.
2675 2689 """
2676 2690
2677 2691 nodes = []
2678 2692
2679 2693 r = len(self)
2680 2694 end = 0
2681 2695 if r:
2682 2696 end = self.end(r - 1)
2683 2697 ifh = self._indexfp("a+")
2684 2698 isize = r * self._io.size
2685 2699 if self._inline:
2686 2700 transaction.add(self.indexfile, end + isize, r)
2687 2701 dfh = None
2688 2702 else:
2689 2703 transaction.add(self.indexfile, isize, r)
2690 2704 transaction.add(self.datafile, end)
2691 2705 dfh = self._datafp("a+")
2692 2706 def flush():
2693 2707 if dfh:
2694 2708 dfh.flush()
2695 2709 ifh.flush()
2696 2710 try:
2697 2711 deltacomputer = _deltacomputer(self)
2698 2712 # loop through our set of deltas
2699 2713 for data in deltas:
2700 2714 node, p1, p2, linknode, deltabase, delta, flags = data
2701 2715 link = linkmapper(linknode)
2702 2716 flags = flags or REVIDX_DEFAULT_FLAGS
2703 2717
2704 2718 nodes.append(node)
2705 2719
2706 2720 if node in self.nodemap:
2707 2721 # this can happen if two branches make the same change
2708 2722 continue
2709 2723
2710 2724 for p in (p1, p2):
2711 2725 if p not in self.nodemap:
2712 2726 raise LookupError(p, self.indexfile,
2713 2727 _('unknown parent'))
2714 2728
2715 2729 if deltabase not in self.nodemap:
2716 2730 raise LookupError(deltabase, self.indexfile,
2717 2731 _('unknown delta base'))
2718 2732
2719 2733 baserev = self.rev(deltabase)
2720 2734
2721 2735 if baserev != nullrev and self.iscensored(baserev):
2722 2736 # if base is censored, delta must be full replacement in a
2723 2737 # single patch operation
2724 2738 hlen = struct.calcsize(">lll")
2725 2739 oldlen = self.rawsize(baserev)
2726 2740 newlen = len(delta) - hlen
2727 2741 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2728 2742 raise error.CensoredBaseError(self.indexfile,
2729 2743 self.node(baserev))
2730 2744
2731 2745 if not flags and self._peek_iscensored(baserev, delta, flush):
2732 2746 flags |= REVIDX_ISCENSORED
2733 2747
2734 2748 # We assume consumers of addrevisioncb will want to retrieve
2735 2749 # the added revision, which will require a call to
2736 2750 # revision(). revision() will fast path if there is a cache
2737 2751 # hit. So, we tell _addrevision() to always cache in this case.
2738 2752 # We're only using addgroup() in the context of changegroup
2739 2753 # generation so the revision data can always be handled as raw
2740 2754 # by the flagprocessor.
2741 2755 self._addrevision(node, None, transaction, link,
2742 2756 p1, p2, flags, (baserev, delta),
2743 2757 ifh, dfh,
2744 2758 alwayscache=bool(addrevisioncb),
2745 2759 deltacomputer=deltacomputer)
2746 2760
2747 2761 if addrevisioncb:
2748 2762 addrevisioncb(self, node)
2749 2763
2750 2764 if not dfh and not self._inline:
2751 2765 # addrevision switched from inline to conventional
2752 2766 # reopen the index
2753 2767 ifh.close()
2754 2768 dfh = self._datafp("a+")
2755 2769 ifh = self._indexfp("a+")
2756 2770 finally:
2757 2771 if dfh:
2758 2772 dfh.close()
2759 2773 ifh.close()
2760 2774
2761 2775 return nodes
2762 2776
2763 2777 def iscensored(self, rev):
2764 2778 """Check if a file revision is censored."""
2765 2779 if not self._censorable:
2766 2780 return False
2767 2781
2768 2782 return self.flags(rev) & REVIDX_ISCENSORED
2769 2783
2770 2784 def _peek_iscensored(self, baserev, delta, flush):
2771 2785 """Quickly check if a delta produces a censored revision."""
2772 2786 if not self._censorable:
2773 2787 return False
2774 2788
2775 2789 # Fragile heuristic: unless new file meta keys are added alphabetically
2776 2790 # preceding "censored", all censored revisions are prefixed by
2777 2791 # "\1\ncensored:". A delta producing such a censored revision must be a
2778 2792 # full-replacement delta, so we inspect the first and only patch in the
2779 2793 # delta for this prefix.
2780 2794 hlen = struct.calcsize(">lll")
2781 2795 if len(delta) <= hlen:
2782 2796 return False
2783 2797
2784 2798 oldlen = self.rawsize(baserev)
2785 2799 newlen = len(delta) - hlen
2786 2800 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2787 2801 return False
2788 2802
2789 2803 add = "\1\ncensored:"
2790 2804 addlen = len(add)
2791 2805 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2792 2806
2793 2807 def getstrippoint(self, minlink):
2794 2808 """find the minimum rev that must be stripped to strip the linkrev
2795 2809
2796 2810 Returns a tuple containing the minimum rev and a set of all revs that
2797 2811 have linkrevs that will be broken by this strip.
2798 2812 """
2799 2813 brokenrevs = set()
2800 2814 strippoint = len(self)
2801 2815
2802 2816 heads = {}
2803 2817 futurelargelinkrevs = set()
2804 2818 for head in self.headrevs():
2805 2819 headlinkrev = self.linkrev(head)
2806 2820 heads[head] = headlinkrev
2807 2821 if headlinkrev >= minlink:
2808 2822 futurelargelinkrevs.add(headlinkrev)
2809 2823
2810 2824 # This algorithm involves walking down the rev graph, starting at the
2811 2825 # heads. Since the revs are topologically sorted according to linkrev,
2812 2826 # once all head linkrevs are below the minlink, we know there are
2813 2827 # no more revs that could have a linkrev greater than minlink.
2814 2828 # So we can stop walking.
2815 2829 while futurelargelinkrevs:
2816 2830 strippoint -= 1
2817 2831 linkrev = heads.pop(strippoint)
2818 2832
2819 2833 if linkrev < minlink:
2820 2834 brokenrevs.add(strippoint)
2821 2835 else:
2822 2836 futurelargelinkrevs.remove(linkrev)
2823 2837
2824 2838 for p in self.parentrevs(strippoint):
2825 2839 if p != nullrev:
2826 2840 plinkrev = self.linkrev(p)
2827 2841 heads[p] = plinkrev
2828 2842 if plinkrev >= minlink:
2829 2843 futurelargelinkrevs.add(plinkrev)
2830 2844
2831 2845 return strippoint, brokenrevs
2832 2846
2833 2847 def strip(self, minlink, transaction):
2834 2848 """truncate the revlog on the first revision with a linkrev >= minlink
2835 2849
2836 2850 This function is called when we're stripping revision minlink and
2837 2851 its descendants from the repository.
2838 2852
2839 2853 We have to remove all revisions with linkrev >= minlink, because
2840 2854 the equivalent changelog revisions will be renumbered after the
2841 2855 strip.
2842 2856
2843 2857 So we truncate the revlog on the first of these revisions, and
2844 2858 trust that the caller has saved the revisions that shouldn't be
2845 2859 removed and that it'll re-add them after this truncation.
2846 2860 """
2847 2861 if len(self) == 0:
2848 2862 return
2849 2863
2850 2864 rev, _ = self.getstrippoint(minlink)
2851 2865 if rev == len(self):
2852 2866 return
2853 2867
2854 2868 # first truncate the files on disk
2855 2869 end = self.start(rev)
2856 2870 if not self._inline:
2857 2871 transaction.add(self.datafile, end)
2858 2872 end = rev * self._io.size
2859 2873 else:
2860 2874 end += rev * self._io.size
2861 2875
2862 2876 transaction.add(self.indexfile, end)
2863 2877
2864 2878 # then reset internal state in memory to forget those revisions
2865 2879 self._cache = None
2866 2880 self._chaininfocache = {}
2867 2881 self._chunkclear()
2868 2882 for x in pycompat.xrange(rev, len(self)):
2869 2883 del self.nodemap[self.node(x)]
2870 2884
2871 2885 del self.index[rev:-1]
2872 2886 self._nodepos = None
2873 2887
2874 2888 def checksize(self):
2875 2889 expected = 0
2876 2890 if len(self):
2877 2891 expected = max(0, self.end(len(self) - 1))
2878 2892
2879 2893 try:
2880 2894 with self._datafp() as f:
2881 2895 f.seek(0, 2)
2882 2896 actual = f.tell()
2883 2897 dd = actual - expected
2884 2898 except IOError as inst:
2885 2899 if inst.errno != errno.ENOENT:
2886 2900 raise
2887 2901 dd = 0
2888 2902
2889 2903 try:
2890 2904 f = self.opener(self.indexfile)
2891 2905 f.seek(0, 2)
2892 2906 actual = f.tell()
2893 2907 f.close()
2894 2908 s = self._io.size
2895 2909 i = max(0, actual // s)
2896 2910 di = actual - (i * s)
2897 2911 if self._inline:
2898 2912 databytes = 0
2899 2913 for r in self:
2900 2914 databytes += max(0, self.length(r))
2901 2915 dd = 0
2902 2916 di = actual - len(self) * s - databytes
2903 2917 except IOError as inst:
2904 2918 if inst.errno != errno.ENOENT:
2905 2919 raise
2906 2920 di = 0
2907 2921
2908 2922 return (dd, di)
2909 2923
2910 2924 def files(self):
2911 2925 res = [self.indexfile]
2912 2926 if not self._inline:
2913 2927 res.append(self.datafile)
2914 2928 return res
2915 2929
2916 2930 DELTAREUSEALWAYS = 'always'
2917 2931 DELTAREUSESAMEREVS = 'samerevs'
2918 2932 DELTAREUSENEVER = 'never'
2919 2933
2920 2934 DELTAREUSEFULLADD = 'fulladd'
2921 2935
2922 2936 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2923 2937
2924 2938 def clone(self, tr, destrevlog, addrevisioncb=None,
2925 2939 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2926 2940 """Copy this revlog to another, possibly with format changes.
2927 2941
2928 2942 The destination revlog will contain the same revisions and nodes.
2929 2943 However, it may not be bit-for-bit identical due to e.g. delta encoding
2930 2944 differences.
2931 2945
2932 2946 The ``deltareuse`` argument control how deltas from the existing revlog
2933 2947 are preserved in the destination revlog. The argument can have the
2934 2948 following values:
2935 2949
2936 2950 DELTAREUSEALWAYS
2937 2951 Deltas will always be reused (if possible), even if the destination
2938 2952 revlog would not select the same revisions for the delta. This is the
2939 2953 fastest mode of operation.
2940 2954 DELTAREUSESAMEREVS
2941 2955 Deltas will be reused if the destination revlog would pick the same
2942 2956 revisions for the delta. This mode strikes a balance between speed
2943 2957 and optimization.
2944 2958 DELTAREUSENEVER
2945 2959 Deltas will never be reused. This is the slowest mode of execution.
2946 2960 This mode can be used to recompute deltas (e.g. if the diff/delta
2947 2961 algorithm changes).
2948 2962
2949 2963 Delta computation can be slow, so the choice of delta reuse policy can
2950 2964 significantly affect run time.
2951 2965
2952 2966 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2953 2967 two extremes. Deltas will be reused if they are appropriate. But if the
2954 2968 delta could choose a better revision, it will do so. This means if you
2955 2969 are converting a non-generaldelta revlog to a generaldelta revlog,
2956 2970 deltas will be recomputed if the delta's parent isn't a parent of the
2957 2971 revision.
2958 2972
2959 2973 In addition to the delta policy, the ``deltabothparents`` argument
2960 2974 controls whether to compute deltas against both parents for merges.
2961 2975 By default, the current default is used.
2962 2976 """
2963 2977 if deltareuse not in self.DELTAREUSEALL:
2964 2978 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2965 2979
2966 2980 if len(destrevlog):
2967 2981 raise ValueError(_('destination revlog is not empty'))
2968 2982
2969 2983 if getattr(self, 'filteredrevs', None):
2970 2984 raise ValueError(_('source revlog has filtered revisions'))
2971 2985 if getattr(destrevlog, 'filteredrevs', None):
2972 2986 raise ValueError(_('destination revlog has filtered revisions'))
2973 2987
2974 2988 # lazydeltabase controls whether to reuse a cached delta, if possible.
2975 2989 oldlazydeltabase = destrevlog._lazydeltabase
2976 2990 oldamd = destrevlog._deltabothparents
2977 2991
2978 2992 try:
2979 2993 if deltareuse == self.DELTAREUSEALWAYS:
2980 2994 destrevlog._lazydeltabase = True
2981 2995 elif deltareuse == self.DELTAREUSESAMEREVS:
2982 2996 destrevlog._lazydeltabase = False
2983 2997
2984 2998 destrevlog._deltabothparents = deltabothparents or oldamd
2985 2999
2986 3000 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2987 3001 self.DELTAREUSESAMEREVS)
2988 3002
2989 3003 deltacomputer = _deltacomputer(destrevlog)
2990 3004 index = self.index
2991 3005 for rev in self:
2992 3006 entry = index[rev]
2993 3007
2994 3008 # Some classes override linkrev to take filtered revs into
2995 3009 # account. Use raw entry from index.
2996 3010 flags = entry[0] & 0xffff
2997 3011 linkrev = entry[4]
2998 3012 p1 = index[entry[5]][7]
2999 3013 p2 = index[entry[6]][7]
3000 3014 node = entry[7]
3001 3015
3002 3016 # (Possibly) reuse the delta from the revlog if allowed and
3003 3017 # the revlog chunk is a delta.
3004 3018 cachedelta = None
3005 3019 rawtext = None
3006 3020 if populatecachedelta:
3007 3021 dp = self.deltaparent(rev)
3008 3022 if dp != nullrev:
3009 3023 cachedelta = (dp, bytes(self._chunk(rev)))
3010 3024
3011 3025 if not cachedelta:
3012 3026 rawtext = self.revision(rev, raw=True)
3013 3027
3014 3028
3015 3029 if deltareuse == self.DELTAREUSEFULLADD:
3016 3030 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
3017 3031 cachedelta=cachedelta,
3018 3032 node=node, flags=flags,
3019 3033 deltacomputer=deltacomputer)
3020 3034 else:
3021 3035 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
3022 3036 checkambig=False)
3023 3037 dfh = None
3024 3038 if not destrevlog._inline:
3025 3039 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
3026 3040 try:
3027 3041 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
3028 3042 p2, flags, cachedelta, ifh, dfh,
3029 3043 deltacomputer=deltacomputer)
3030 3044 finally:
3031 3045 if dfh:
3032 3046 dfh.close()
3033 3047 ifh.close()
3034 3048
3035 3049 if addrevisioncb:
3036 3050 addrevisioncb(self, rev, node)
3037 3051 finally:
3038 3052 destrevlog._lazydeltabase = oldlazydeltabase
3039 3053 destrevlog._deltabothparents = oldamd
General Comments 0
You need to be logged in to leave comments. Login now