##// END OF EJS Templates
revlog: only consider the span of the delta section...
Paul Morelle -
r39193:a98e926b default
parent child Browse files
Show More
@@ -1,3068 +1,3073 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import hashlib
20 20 import heapq
21 21 import os
22 22 import re
23 23 import struct
24 24 import zlib
25 25
26 26 # import stuff from node for others to import from revlog
27 27 from .node import (
28 28 bin,
29 29 hex,
30 30 nullid,
31 31 nullrev,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .thirdparty import (
39 39 attr,
40 40 )
41 41 from . import (
42 42 ancestor,
43 43 error,
44 44 mdiff,
45 45 policy,
46 46 pycompat,
47 47 templatefilters,
48 48 util,
49 49 )
50 50 from .utils import (
51 51 stringutil,
52 52 )
53 53
54 54 parsers = policy.importmod(r'parsers')
55 55
56 56 # Aliased for performance.
57 57 _zlibdecompress = zlib.decompress
58 58
59 59 # revlog header flags
60 60 REVLOGV0 = 0
61 61 REVLOGV1 = 1
62 62 # Dummy value until file format is finalized.
63 63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
64 64 REVLOGV2 = 0xDEAD
65 65 FLAG_INLINE_DATA = (1 << 16)
66 66 FLAG_GENERALDELTA = (1 << 17)
67 67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
68 68 REVLOG_DEFAULT_FORMAT = REVLOGV1
69 69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
70 70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
71 71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
72 72
73 73 # revlog index flags
74 74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
75 75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
76 76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
77 77 REVIDX_DEFAULT_FLAGS = 0
78 78 # stable order in which flags need to be processed and their processors applied
79 79 REVIDX_FLAGS_ORDER = [
80 80 REVIDX_ISCENSORED,
81 81 REVIDX_ELLIPSIS,
82 82 REVIDX_EXTSTORED,
83 83 ]
84 84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
85 85 # bitmark for flags that could cause rawdata content change
86 86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
87 87
88 88 # max size of revlog with inline data
89 89 _maxinline = 131072
90 90 _chunksize = 1048576
91 91
92 92 RevlogError = error.RevlogError
93 93 LookupError = error.LookupError
94 94 AmbiguousPrefixLookupError = error.AmbiguousPrefixLookupError
95 95 CensoredNodeError = error.CensoredNodeError
96 96 ProgrammingError = error.ProgrammingError
97 97
98 98 # Store flag processors (cf. 'addflagprocessor()' to register)
99 99 _flagprocessors = {
100 100 REVIDX_ISCENSORED: None,
101 101 }
102 102
103 103 _mdre = re.compile('\1\n')
104 104 def parsemeta(text):
105 105 """return (metadatadict, metadatasize)"""
106 106 # text can be buffer, so we can't use .startswith or .index
107 107 if text[:2] != '\1\n':
108 108 return None, None
109 109 s = _mdre.search(text, 2).start()
110 110 mtext = text[2:s]
111 111 meta = {}
112 112 for l in mtext.splitlines():
113 113 k, v = l.split(": ", 1)
114 114 meta[k] = v
115 115 return meta, (s + 2)
116 116
117 117 def packmeta(meta, text):
118 118 keys = sorted(meta)
119 119 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
120 120 return "\1\n%s\1\n%s" % (metatext, text)
121 121
122 122 def _censoredtext(text):
123 123 m, offs = parsemeta(text)
124 124 return m and "censored" in m
125 125
126 126 def addflagprocessor(flag, processor):
127 127 """Register a flag processor on a revision data flag.
128 128
129 129 Invariant:
130 130 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
131 131 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
132 132 - Only one flag processor can be registered on a specific flag.
133 133 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
134 134 following signatures:
135 135 - (read) f(self, rawtext) -> text, bool
136 136 - (write) f(self, text) -> rawtext, bool
137 137 - (raw) f(self, rawtext) -> bool
138 138 "text" is presented to the user. "rawtext" is stored in revlog data, not
139 139 directly visible to the user.
140 140 The boolean returned by these transforms is used to determine whether
141 141 the returned text can be used for hash integrity checking. For example,
142 142 if "write" returns False, then "text" is used to generate hash. If
143 143 "write" returns True, that basically means "rawtext" returned by "write"
144 144 should be used to generate hash. Usually, "write" and "read" return
145 145 different booleans. And "raw" returns a same boolean as "write".
146 146
147 147 Note: The 'raw' transform is used for changegroup generation and in some
148 148 debug commands. In this case the transform only indicates whether the
149 149 contents can be used for hash integrity checks.
150 150 """
151 151 if not flag & REVIDX_KNOWN_FLAGS:
152 152 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
153 153 raise ProgrammingError(msg)
154 154 if flag not in REVIDX_FLAGS_ORDER:
155 155 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
156 156 raise ProgrammingError(msg)
157 157 if flag in _flagprocessors:
158 158 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
159 159 raise error.Abort(msg)
160 160 _flagprocessors[flag] = processor
161 161
162 162 def getoffset(q):
163 163 return int(q >> 16)
164 164
165 165 def gettype(q):
166 166 return int(q & 0xFFFF)
167 167
168 168 def offset_type(offset, type):
169 169 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
170 170 raise ValueError('unknown revlog index flags')
171 171 return int(int(offset) << 16 | type)
172 172
173 173 _nullhash = hashlib.sha1(nullid)
174 174
175 175 def hash(text, p1, p2):
176 176 """generate a hash from the given text and its parent hashes
177 177
178 178 This hash combines both the current file contents and its history
179 179 in a manner that makes it easy to distinguish nodes with the same
180 180 content in the revision graph.
181 181 """
182 182 # As of now, if one of the parent node is null, p2 is null
183 183 if p2 == nullid:
184 184 # deep copy of a hash is faster than creating one
185 185 s = _nullhash.copy()
186 186 s.update(p1)
187 187 else:
188 188 # none of the parent nodes are nullid
189 189 if p1 < p2:
190 190 a = p1
191 191 b = p2
192 192 else:
193 193 a = p2
194 194 b = p1
195 195 s = hashlib.sha1(a)
196 196 s.update(b)
197 197 s.update(text)
198 198 return s.digest()
199 199
200 200 class _testrevlog(object):
201 201 """minimalist fake revlog to use in doctests"""
202 202
203 203 def __init__(self, data, density=0.5, mingap=0):
204 204 """data is an list of revision payload boundaries"""
205 205 self._data = data
206 206 self._srdensitythreshold = density
207 207 self._srmingapsize = mingap
208 208
209 209 def start(self, rev):
210 210 if rev == 0:
211 211 return 0
212 212 return self._data[rev - 1]
213 213
214 214 def end(self, rev):
215 215 return self._data[rev]
216 216
217 217 def length(self, rev):
218 218 return self.end(rev) - self.start(rev)
219 219
220 220 def __len__(self):
221 221 return len(self._data)
222 222
223 223 def _trimchunk(revlog, revs, startidx, endidx=None):
224 224 """returns revs[startidx:endidx] without empty trailing revs
225 225
226 226 Doctest Setup
227 227 >>> revlog = _testrevlog([
228 228 ... 5, #0
229 229 ... 10, #1
230 230 ... 12, #2
231 231 ... 12, #3 (empty)
232 232 ... 17, #4
233 233 ... 21, #5
234 234 ... 21, #6 (empty)
235 235 ... ])
236 236
237 237 Contiguous cases:
238 238 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
239 239 [0, 1, 2, 3, 4, 5]
240 240 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
241 241 [0, 1, 2, 3, 4]
242 242 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
243 243 [0, 1, 2]
244 244 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
245 245 [2]
246 246 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
247 247 [3, 4, 5]
248 248 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
249 249 [3, 4]
250 250
251 251 Discontiguous cases:
252 252 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
253 253 [1, 3, 5]
254 254 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
255 255 [1]
256 256 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
257 257 [3, 5]
258 258 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
259 259 [3, 5]
260 260 """
261 261 length = revlog.length
262 262
263 263 if endidx is None:
264 264 endidx = len(revs)
265 265
266 266 # If we have a non-emtpy delta candidate, there are nothing to trim
267 267 if revs[endidx - 1] < len(revlog):
268 268 # Trim empty revs at the end, except the very first revision of a chain
269 269 while (endidx > 1
270 270 and endidx > startidx
271 271 and length(revs[endidx - 1]) == 0):
272 272 endidx -= 1
273 273
274 274 return revs[startidx:endidx]
275 275
276 276 def _segmentspan(revlog, revs, deltainfo=None):
277 277 """Get the byte span of a segment of revisions
278 278
279 279 revs is a sorted array of revision numbers
280 280
281 281 >>> revlog = _testrevlog([
282 282 ... 5, #0
283 283 ... 10, #1
284 284 ... 12, #2
285 285 ... 12, #3 (empty)
286 286 ... 17, #4
287 287 ... ])
288 288
289 289 >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
290 290 17
291 291 >>> _segmentspan(revlog, [0, 4])
292 292 17
293 293 >>> _segmentspan(revlog, [3, 4])
294 294 5
295 295 >>> _segmentspan(revlog, [1, 2, 3,])
296 296 7
297 297 >>> _segmentspan(revlog, [1, 3])
298 298 7
299 299 """
300 300 if not revs:
301 301 return 0
302 302 if deltainfo is not None and len(revlog) <= revs[-1]:
303 303 if len(revs) == 1:
304 304 return deltainfo.deltalen
305 305 offset = revlog.end(len(revlog) - 1)
306 306 end = deltainfo.deltalen + offset
307 307 else:
308 308 end = revlog.end(revs[-1])
309 309 return end - revlog.start(revs[0])
310 310
311 311 def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):
312 312 """slice revs to reduce the amount of unrelated data to be read from disk.
313 313
314 314 ``revs`` is sliced into groups that should be read in one time.
315 315 Assume that revs are sorted.
316 316
317 317 The initial chunk is sliced until the overall density (payload/chunks-span
318 318 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
319 319 `revlog._srmingapsize` is skipped.
320 320
321 321 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
322 322 For consistency with other slicing choice, this limit won't go lower than
323 323 `revlog._srmingapsize`.
324 324
325 325 If individual revisions chunk are larger than this limit, they will still
326 326 be raised individually.
327 327
328 328 >>> revlog = _testrevlog([
329 329 ... 5, #00 (5)
330 330 ... 10, #01 (5)
331 331 ... 12, #02 (2)
332 332 ... 12, #03 (empty)
333 333 ... 27, #04 (15)
334 334 ... 31, #05 (4)
335 335 ... 31, #06 (empty)
336 336 ... 42, #07 (11)
337 337 ... 47, #08 (5)
338 338 ... 47, #09 (empty)
339 339 ... 48, #10 (1)
340 340 ... 51, #11 (3)
341 341 ... 74, #12 (23)
342 342 ... 85, #13 (11)
343 343 ... 86, #14 (1)
344 344 ... 91, #15 (5)
345 345 ... ])
346 346
347 347 >>> list(_slicechunk(revlog, list(range(16))))
348 348 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
349 349 >>> list(_slicechunk(revlog, [0, 15]))
350 350 [[0], [15]]
351 351 >>> list(_slicechunk(revlog, [0, 11, 15]))
352 352 [[0], [11], [15]]
353 353 >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
354 354 [[0], [11, 13, 15]]
355 355 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
356 356 [[1, 2], [5, 8, 10, 11], [14]]
357 357
358 358 Slicing with a maximum chunk size
359 359 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
360 360 [[0], [11], [13], [15]]
361 361 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
362 362 [[0], [11], [13, 15]]
363 363 """
364 364 if targetsize is not None:
365 365 targetsize = max(targetsize, revlog._srmingapsize)
366 366 # targetsize should not be specified when evaluating delta candidates:
367 367 # * targetsize is used to ensure we stay within specification when reading,
368 368 # * deltainfo is used to pick are good delta chain when writing.
369 369 if not (deltainfo is None or targetsize is None):
370 370 msg = 'cannot use `targetsize` with a `deltainfo`'
371 371 raise error.ProgrammingError(msg)
372 372 for chunk in _slicechunktodensity(revlog, revs,
373 373 deltainfo,
374 374 revlog._srdensitythreshold,
375 375 revlog._srmingapsize):
376 376 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
377 377 yield subchunk
378 378
379 379 def _slicechunktosize(revlog, revs, targetsize=None):
380 380 """slice revs to match the target size
381 381
382 382 This is intended to be used on chunk that density slicing selected by that
383 383 are still too large compared to the read garantee of revlog. This might
384 384 happens when "minimal gap size" interrupted the slicing or when chain are
385 385 built in a way that create large blocks next to each other.
386 386
387 387 >>> revlog = _testrevlog([
388 388 ... 3, #0 (3)
389 389 ... 5, #1 (2)
390 390 ... 6, #2 (1)
391 391 ... 8, #3 (2)
392 392 ... 8, #4 (empty)
393 393 ... 11, #5 (3)
394 394 ... 12, #6 (1)
395 395 ... 13, #7 (1)
396 396 ... 14, #8 (1)
397 397 ... ])
398 398
399 399 Cases where chunk is already small enough
400 400 >>> list(_slicechunktosize(revlog, [0], 3))
401 401 [[0]]
402 402 >>> list(_slicechunktosize(revlog, [6, 7], 3))
403 403 [[6, 7]]
404 404 >>> list(_slicechunktosize(revlog, [0], None))
405 405 [[0]]
406 406 >>> list(_slicechunktosize(revlog, [6, 7], None))
407 407 [[6, 7]]
408 408
409 409 cases where we need actual slicing
410 410 >>> list(_slicechunktosize(revlog, [0, 1], 3))
411 411 [[0], [1]]
412 412 >>> list(_slicechunktosize(revlog, [1, 3], 3))
413 413 [[1], [3]]
414 414 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
415 415 [[1, 2], [3]]
416 416 >>> list(_slicechunktosize(revlog, [3, 5], 3))
417 417 [[3], [5]]
418 418 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
419 419 [[3], [5]]
420 420 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
421 421 [[5], [6, 7, 8]]
422 422 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
423 423 [[0], [1, 2], [3], [5], [6, 7, 8]]
424 424
425 425 Case with too large individual chunk (must return valid chunk)
426 426 >>> list(_slicechunktosize(revlog, [0, 1], 2))
427 427 [[0], [1]]
428 428 >>> list(_slicechunktosize(revlog, [1, 3], 1))
429 429 [[1], [3]]
430 430 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
431 431 [[3], [5]]
432 432 """
433 433 assert targetsize is None or 0 <= targetsize
434 434 if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
435 435 yield revs
436 436 return
437 437
438 438 startrevidx = 0
439 439 startdata = revlog.start(revs[0])
440 440 endrevidx = 0
441 441 iterrevs = enumerate(revs)
442 442 next(iterrevs) # skip first rev.
443 443 for idx, r in iterrevs:
444 444 span = revlog.end(r) - startdata
445 445 if span <= targetsize:
446 446 endrevidx = idx
447 447 else:
448 448 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
449 449 if chunk:
450 450 yield chunk
451 451 startrevidx = idx
452 452 startdata = revlog.start(r)
453 453 endrevidx = idx
454 454 yield _trimchunk(revlog, revs, startrevidx)
455 455
456 456 def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
457 457 mingapsize=0):
458 458 """slice revs to reduce the amount of unrelated data to be read from disk.
459 459
460 460 ``revs`` is sliced into groups that should be read in one time.
461 461 Assume that revs are sorted.
462 462
463 463 ``deltainfo`` is a _deltainfo instance of a revision that we would append
464 464 to the top of the revlog.
465 465
466 466 The initial chunk is sliced until the overall density (payload/chunks-span
467 467 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
468 468 skipped.
469 469
470 470 >>> revlog = _testrevlog([
471 471 ... 5, #00 (5)
472 472 ... 10, #01 (5)
473 473 ... 12, #02 (2)
474 474 ... 12, #03 (empty)
475 475 ... 27, #04 (15)
476 476 ... 31, #05 (4)
477 477 ... 31, #06 (empty)
478 478 ... 42, #07 (11)
479 479 ... 47, #08 (5)
480 480 ... 47, #09 (empty)
481 481 ... 48, #10 (1)
482 482 ... 51, #11 (3)
483 483 ... 74, #12 (23)
484 484 ... 85, #13 (11)
485 485 ... 86, #14 (1)
486 486 ... 91, #15 (5)
487 487 ... ])
488 488
489 489 >>> list(_slicechunktodensity(revlog, list(range(16))))
490 490 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
491 491 >>> list(_slicechunktodensity(revlog, [0, 15]))
492 492 [[0], [15]]
493 493 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
494 494 [[0], [11], [15]]
495 495 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
496 496 [[0], [11, 13, 15]]
497 497 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
498 498 [[1, 2], [5, 8, 10, 11], [14]]
499 499 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
500 500 ... mingapsize=20))
501 501 [[1, 2, 3, 5, 8, 10, 11], [14]]
502 502 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
503 503 ... targetdensity=0.95))
504 504 [[1, 2], [5], [8, 10, 11], [14]]
505 505 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
506 506 ... targetdensity=0.95, mingapsize=12))
507 507 [[1, 2], [5, 8, 10, 11], [14]]
508 508 """
509 509 start = revlog.start
510 510 length = revlog.length
511 511
512 512 if len(revs) <= 1:
513 513 yield revs
514 514 return
515 515
516 516 nextrev = len(revlog)
517 517 nextoffset = revlog.end(nextrev - 1)
518 518
519 519 if deltainfo is None:
520 520 deltachainspan = _segmentspan(revlog, revs)
521 521 chainpayload = sum(length(r) for r in revs)
522 522 else:
523 523 deltachainspan = deltainfo.distance
524 524 chainpayload = deltainfo.compresseddeltalen
525 525
526 526 if deltachainspan < mingapsize:
527 527 yield revs
528 528 return
529 529
530 530 readdata = deltachainspan
531 531
532 532 if deltachainspan:
533 533 density = chainpayload / float(deltachainspan)
534 534 else:
535 535 density = 1.0
536 536
537 537 if density >= targetdensity:
538 538 yield revs
539 539 return
540 540
541 541 if deltainfo is not None and deltainfo.deltalen:
542 542 revs = list(revs)
543 543 revs.append(nextrev)
544 544
545 545 # Store the gaps in a heap to have them sorted by decreasing size
546 546 gapsheap = []
547 547 heapq.heapify(gapsheap)
548 548 prevend = None
549 549 for i, rev in enumerate(revs):
550 550 if rev < nextrev:
551 551 revstart = start(rev)
552 552 revlen = length(rev)
553 553 else:
554 554 revstart = nextoffset
555 555 revlen = deltainfo.deltalen
556 556
557 557 # Skip empty revisions to form larger holes
558 558 if revlen == 0:
559 559 continue
560 560
561 561 if prevend is not None:
562 562 gapsize = revstart - prevend
563 563 # only consider holes that are large enough
564 564 if gapsize > mingapsize:
565 565 heapq.heappush(gapsheap, (-gapsize, i))
566 566
567 567 prevend = revstart + revlen
568 568
569 569 # Collect the indices of the largest holes until the density is acceptable
570 570 indicesheap = []
571 571 heapq.heapify(indicesheap)
572 572 while gapsheap and density < targetdensity:
573 573 oppgapsize, gapidx = heapq.heappop(gapsheap)
574 574
575 575 heapq.heappush(indicesheap, gapidx)
576 576
577 577 # the gap sizes are stored as negatives to be sorted decreasingly
578 578 # by the heap
579 579 readdata -= (-oppgapsize)
580 580 if readdata > 0:
581 581 density = chainpayload / float(readdata)
582 582 else:
583 583 density = 1.0
584 584
585 585 # Cut the revs at collected indices
586 586 previdx = 0
587 587 while indicesheap:
588 588 idx = heapq.heappop(indicesheap)
589 589
590 590 chunk = _trimchunk(revlog, revs, previdx, idx)
591 591 if chunk:
592 592 yield chunk
593 593
594 594 previdx = idx
595 595
596 596 chunk = _trimchunk(revlog, revs, previdx)
597 597 if chunk:
598 598 yield chunk
599 599
600 600 @attr.s(slots=True, frozen=True)
601 601 class _deltainfo(object):
602 602 distance = attr.ib()
603 603 deltalen = attr.ib()
604 604 data = attr.ib()
605 605 base = attr.ib()
606 606 chainbase = attr.ib()
607 607 chainlen = attr.ib()
608 608 compresseddeltalen = attr.ib()
609 609 snapshotdepth = attr.ib()
610 610
611 611 class _deltacomputer(object):
612 612 def __init__(self, revlog):
613 613 self.revlog = revlog
614 614
615 615 def _getcandidaterevs(self, p1, p2, cachedelta):
616 616 """
617 617 Provides revisions that present an interest to be diffed against,
618 618 grouped by level of easiness.
619 619 """
620 620 revlog = self.revlog
621 621 gdelta = revlog._generaldelta
622 622 curr = len(revlog)
623 623 prev = curr - 1
624 624 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
625 625
626 626 # should we try to build a delta?
627 627 if prev != nullrev and revlog.storedeltachains:
628 628 tested = set()
629 629 # This condition is true most of the time when processing
630 630 # changegroup data into a generaldelta repo. The only time it
631 631 # isn't true is if this is the first revision in a delta chain
632 632 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
633 633 if cachedelta and gdelta and revlog._lazydeltabase:
634 634 # Assume what we received from the server is a good choice
635 635 # build delta will reuse the cache
636 636 yield (cachedelta[0],)
637 637 tested.add(cachedelta[0])
638 638
639 639 if gdelta:
640 640 # exclude already lazy tested base if any
641 641 parents = [p for p in (p1r, p2r)
642 642 if p != nullrev and p not in tested]
643 643
644 644 if not revlog._deltabothparents and len(parents) == 2:
645 645 parents.sort()
646 646 # To minimize the chance of having to build a fulltext,
647 647 # pick first whichever parent is closest to us (max rev)
648 648 yield (parents[1],)
649 649 # then the other one (min rev) if the first did not fit
650 650 yield (parents[0],)
651 651 tested.update(parents)
652 652 elif len(parents) > 0:
653 653 # Test all parents (1 or 2), and keep the best candidate
654 654 yield parents
655 655 tested.update(parents)
656 656
657 657 if prev not in tested:
658 658 # other approach failed try against prev to hopefully save us a
659 659 # fulltext.
660 660 yield (prev,)
661 661 tested.add(prev)
662 662
663 663 def buildtext(self, revinfo, fh):
664 664 """Builds a fulltext version of a revision
665 665
666 666 revinfo: _revisioninfo instance that contains all needed info
667 667 fh: file handle to either the .i or the .d revlog file,
668 668 depending on whether it is inlined or not
669 669 """
670 670 btext = revinfo.btext
671 671 if btext[0] is not None:
672 672 return btext[0]
673 673
674 674 revlog = self.revlog
675 675 cachedelta = revinfo.cachedelta
676 676 flags = revinfo.flags
677 677 node = revinfo.node
678 678
679 679 baserev = cachedelta[0]
680 680 delta = cachedelta[1]
681 681 # special case deltas which replace entire base; no need to decode
682 682 # base revision. this neatly avoids censored bases, which throw when
683 683 # they're decoded.
684 684 hlen = struct.calcsize(">lll")
685 685 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
686 686 len(delta) - hlen):
687 687 btext[0] = delta[hlen:]
688 688 else:
689 689 # deltabase is rawtext before changed by flag processors, which is
690 690 # equivalent to non-raw text
691 691 basetext = revlog.revision(baserev, _df=fh, raw=False)
692 692 btext[0] = mdiff.patch(basetext, delta)
693 693
694 694 try:
695 695 res = revlog._processflags(btext[0], flags, 'read', raw=True)
696 696 btext[0], validatehash = res
697 697 if validatehash:
698 698 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
699 699 if flags & REVIDX_ISCENSORED:
700 700 raise RevlogError(_('node %s is not censored') % node)
701 701 except CensoredNodeError:
702 702 # must pass the censored index flag to add censored revisions
703 703 if not flags & REVIDX_ISCENSORED:
704 704 raise
705 705 return btext[0]
706 706
707 707 def _builddeltadiff(self, base, revinfo, fh):
708 708 revlog = self.revlog
709 709 t = self.buildtext(revinfo, fh)
710 710 if revlog.iscensored(base):
711 711 # deltas based on a censored revision must replace the
712 712 # full content in one patch, so delta works everywhere
713 713 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
714 714 delta = header + t
715 715 else:
716 716 ptext = revlog.revision(base, _df=fh, raw=True)
717 717 delta = mdiff.textdiff(ptext, t)
718 718
719 719 return delta
720 720
721 721 def _builddeltainfo(self, revinfo, base, fh):
722 722 # can we use the cached delta?
723 723 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
724 724 delta = revinfo.cachedelta[1]
725 725 else:
726 726 delta = self._builddeltadiff(base, revinfo, fh)
727 727 revlog = self.revlog
728 728 header, data = revlog.compress(delta)
729 729 deltalen = len(header) + len(data)
730 730 chainbase = revlog.chainbase(base)
731 731 offset = revlog.end(len(revlog) - 1)
732 732 dist = deltalen + offset - revlog.start(chainbase)
733 733 if revlog._generaldelta:
734 734 deltabase = base
735 735 else:
736 736 deltabase = chainbase
737 737 chainlen, compresseddeltalen = revlog._chaininfo(base)
738 738 chainlen += 1
739 739 compresseddeltalen += deltalen
740 740
741 741 revlog = self.revlog
742 742 snapshotdepth = None
743 743 if deltabase == nullrev:
744 744 snapshotdepth = 0
745 745 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
746 746 # A delta chain should always be one full snapshot,
747 747 # zero or more semi-snapshots, and zero or more deltas
748 748 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
749 749 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
750 750 snapshotdepth = len(revlog._deltachain(deltabase)[0])
751 751
752 752 return _deltainfo(dist, deltalen, (header, data), deltabase,
753 753 chainbase, chainlen, compresseddeltalen,
754 754 snapshotdepth)
755 755
756 756 def finddeltainfo(self, revinfo, fh):
757 757 """Find an acceptable delta against a candidate revision
758 758
759 759 revinfo: information about the revision (instance of _revisioninfo)
760 760 fh: file handle to either the .i or the .d revlog file,
761 761 depending on whether it is inlined or not
762 762
763 763 Returns the first acceptable candidate revision, as ordered by
764 764 _getcandidaterevs
765 765 """
766 766 if not revinfo.textlen:
767 767 return None # empty file do not need delta
768 768
769 769 cachedelta = revinfo.cachedelta
770 770 p1 = revinfo.p1
771 771 p2 = revinfo.p2
772 772 revlog = self.revlog
773 773
774 774 deltalength = self.revlog.length
775 775 deltaparent = self.revlog.deltaparent
776 776
777 777 deltainfo = None
778 778 deltas_limit = revinfo.textlen * LIMIT_DELTA2TEXT
779 779 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
780 780 # filter out delta base that will never produce good delta
781 781 candidaterevs = [r for r in candidaterevs
782 782 if self.revlog.length(r) <= deltas_limit]
783 783 nominateddeltas = []
784 784 for candidaterev in candidaterevs:
785 785 # skip over empty delta (no need to include them in a chain)
786 786 while candidaterev != nullrev and not deltalength(candidaterev):
787 787 candidaterev = deltaparent(candidaterev)
788 788 # no need to try a delta against nullid, this will be handled
789 789 # by fulltext later.
790 790 if candidaterev == nullrev:
791 791 continue
792 792 # no delta for rawtext-changing revs (see "candelta" for why)
793 793 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
794 794 continue
795 795 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
796 796 if revlog._isgooddeltainfo(candidatedelta, revinfo):
797 797 nominateddeltas.append(candidatedelta)
798 798 if nominateddeltas:
799 799 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
800 800 break
801 801
802 802 return deltainfo
803 803
804 804 @attr.s(slots=True, frozen=True)
805 805 class _revisioninfo(object):
806 806 """Information about a revision that allows building its fulltext
807 807 node: expected hash of the revision
808 808 p1, p2: parent revs of the revision
809 809 btext: built text cache consisting of a one-element list
810 810 cachedelta: (baserev, uncompressed_delta) or None
811 811 flags: flags associated to the revision storage
812 812
813 813 One of btext[0] or cachedelta must be set.
814 814 """
815 815 node = attr.ib()
816 816 p1 = attr.ib()
817 817 p2 = attr.ib()
818 818 btext = attr.ib()
819 819 textlen = attr.ib()
820 820 cachedelta = attr.ib()
821 821 flags = attr.ib()
822 822
823 823 # index v0:
824 824 # 4 bytes: offset
825 825 # 4 bytes: compressed length
826 826 # 4 bytes: base rev
827 827 # 4 bytes: link rev
828 828 # 20 bytes: parent 1 nodeid
829 829 # 20 bytes: parent 2 nodeid
830 830 # 20 bytes: nodeid
831 831 indexformatv0 = struct.Struct(">4l20s20s20s")
832 832 indexformatv0_pack = indexformatv0.pack
833 833 indexformatv0_unpack = indexformatv0.unpack
834 834
835 835 class revlogoldindex(list):
836 836 def __getitem__(self, i):
837 837 if i == -1:
838 838 return (0, 0, 0, -1, -1, -1, -1, nullid)
839 839 return list.__getitem__(self, i)
840 840
841 841 # maximum <delta-chain-data>/<revision-text-length> ratio
842 842 LIMIT_DELTA2TEXT = 2
843 843
844 844 class revlogoldio(object):
845 845 def __init__(self):
846 846 self.size = indexformatv0.size
847 847
848 848 def parseindex(self, data, inline):
849 849 s = self.size
850 850 index = []
851 851 nodemap = {nullid: nullrev}
852 852 n = off = 0
853 853 l = len(data)
854 854 while off + s <= l:
855 855 cur = data[off:off + s]
856 856 off += s
857 857 e = indexformatv0_unpack(cur)
858 858 # transform to revlogv1 format
859 859 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
860 860 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
861 861 index.append(e2)
862 862 nodemap[e[6]] = n
863 863 n += 1
864 864
865 865 return revlogoldindex(index), nodemap, None
866 866
867 867 def packentry(self, entry, node, version, rev):
868 868 if gettype(entry[0]):
869 869 raise RevlogError(_('index entry flags need revlog version 1'))
870 870 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
871 871 node(entry[5]), node(entry[6]), entry[7])
872 872 return indexformatv0_pack(*e2)
873 873
874 874 # index ng:
875 875 # 6 bytes: offset
876 876 # 2 bytes: flags
877 877 # 4 bytes: compressed length
878 878 # 4 bytes: uncompressed length
879 879 # 4 bytes: base rev
880 880 # 4 bytes: link rev
881 881 # 4 bytes: parent 1 rev
882 882 # 4 bytes: parent 2 rev
883 883 # 32 bytes: nodeid
884 884 indexformatng = struct.Struct(">Qiiiiii20s12x")
885 885 indexformatng_pack = indexformatng.pack
886 886 versionformat = struct.Struct(">I")
887 887 versionformat_pack = versionformat.pack
888 888 versionformat_unpack = versionformat.unpack
889 889
890 890 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
891 891 # signed integer)
892 892 _maxentrysize = 0x7fffffff
893 893
894 894 class revlogio(object):
895 895 def __init__(self):
896 896 self.size = indexformatng.size
897 897
898 898 def parseindex(self, data, inline):
899 899 # call the C implementation to parse the index data
900 900 index, cache = parsers.parse_index2(data, inline)
901 901 return index, getattr(index, 'nodemap', None), cache
902 902
903 903 def packentry(self, entry, node, version, rev):
904 904 p = indexformatng_pack(*entry)
905 905 if rev == 0:
906 906 p = versionformat_pack(version) + p[4:]
907 907 return p
908 908
909 909 class revlog(object):
910 910 """
911 911 the underlying revision storage object
912 912
913 913 A revlog consists of two parts, an index and the revision data.
914 914
915 915 The index is a file with a fixed record size containing
916 916 information on each revision, including its nodeid (hash), the
917 917 nodeids of its parents, the position and offset of its data within
918 918 the data file, and the revision it's based on. Finally, each entry
919 919 contains a linkrev entry that can serve as a pointer to external
920 920 data.
921 921
922 922 The revision data itself is a linear collection of data chunks.
923 923 Each chunk represents a revision and is usually represented as a
924 924 delta against the previous chunk. To bound lookup time, runs of
925 925 deltas are limited to about 2 times the length of the original
926 926 version data. This makes retrieval of a version proportional to
927 927 its size, or O(1) relative to the number of revisions.
928 928
929 929 Both pieces of the revlog are written to in an append-only
930 930 fashion, which means we never need to rewrite a file to insert or
931 931 remove data, and can use some simple techniques to avoid the need
932 932 for locking while reading.
933 933
934 934 If checkambig, indexfile is opened with checkambig=True at
935 935 writing, to avoid file stat ambiguity.
936 936
937 937 If mmaplargeindex is True, and an mmapindexthreshold is set, the
938 938 index will be mmapped rather than read if it is larger than the
939 939 configured threshold.
940 940
941 941 If censorable is True, the revlog can have censored revisions.
942 942 """
943 943 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
944 944 mmaplargeindex=False, censorable=False):
945 945 """
946 946 create a revlog object
947 947
948 948 opener is a function that abstracts the file opening operation
949 949 and can be used to implement COW semantics or the like.
950 950 """
951 951 self.indexfile = indexfile
952 952 self.datafile = datafile or (indexfile[:-2] + ".d")
953 953 self.opener = opener
954 954 # When True, indexfile is opened with checkambig=True at writing, to
955 955 # avoid file stat ambiguity.
956 956 self._checkambig = checkambig
957 957 self._censorable = censorable
958 958 # 3-tuple of (node, rev, text) for a raw revision.
959 959 self._cache = None
960 960 # Maps rev to chain base rev.
961 961 self._chainbasecache = util.lrucachedict(100)
962 962 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
963 963 self._chunkcache = (0, '')
964 964 # How much data to read and cache into the raw revlog data cache.
965 965 self._chunkcachesize = 65536
966 966 self._maxchainlen = None
967 967 self._deltabothparents = True
968 968 self.index = []
969 969 # Mapping of partial identifiers to full nodes.
970 970 self._pcache = {}
971 971 # Mapping of revision integer to full node.
972 972 self._nodecache = {nullid: nullrev}
973 973 self._nodepos = None
974 974 self._compengine = 'zlib'
975 975 self._maxdeltachainspan = -1
976 976 self._withsparseread = False
977 977 self._sparserevlog = False
978 978 self._srdensitythreshold = 0.50
979 979 self._srmingapsize = 262144
980 980
981 981 mmapindexthreshold = None
982 982 v = REVLOG_DEFAULT_VERSION
983 983 opts = getattr(opener, 'options', None)
984 984 if opts is not None:
985 985 if 'revlogv2' in opts:
986 986 # version 2 revlogs always use generaldelta.
987 987 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
988 988 elif 'revlogv1' in opts:
989 989 if 'generaldelta' in opts:
990 990 v |= FLAG_GENERALDELTA
991 991 else:
992 992 v = 0
993 993 if 'chunkcachesize' in opts:
994 994 self._chunkcachesize = opts['chunkcachesize']
995 995 if 'maxchainlen' in opts:
996 996 self._maxchainlen = opts['maxchainlen']
997 997 if 'deltabothparents' in opts:
998 998 self._deltabothparents = opts['deltabothparents']
999 999 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
1000 1000 if 'compengine' in opts:
1001 1001 self._compengine = opts['compengine']
1002 1002 if 'maxdeltachainspan' in opts:
1003 1003 self._maxdeltachainspan = opts['maxdeltachainspan']
1004 1004 if mmaplargeindex and 'mmapindexthreshold' in opts:
1005 1005 mmapindexthreshold = opts['mmapindexthreshold']
1006 1006 self._sparserevlog = bool(opts.get('sparse-revlog', False))
1007 1007 withsparseread = bool(opts.get('with-sparse-read', False))
1008 1008 # sparse-revlog forces sparse-read
1009 1009 self._withsparseread = self._sparserevlog or withsparseread
1010 1010 if 'sparse-read-density-threshold' in opts:
1011 1011 self._srdensitythreshold = opts['sparse-read-density-threshold']
1012 1012 if 'sparse-read-min-gap-size' in opts:
1013 1013 self._srmingapsize = opts['sparse-read-min-gap-size']
1014 1014
1015 1015 if self._chunkcachesize <= 0:
1016 1016 raise RevlogError(_('revlog chunk cache size %r is not greater '
1017 1017 'than 0') % self._chunkcachesize)
1018 1018 elif self._chunkcachesize & (self._chunkcachesize - 1):
1019 1019 raise RevlogError(_('revlog chunk cache size %r is not a power '
1020 1020 'of 2') % self._chunkcachesize)
1021 1021
1022 1022 indexdata = ''
1023 1023 self._initempty = True
1024 1024 try:
1025 1025 with self._indexfp() as f:
1026 1026 if (mmapindexthreshold is not None and
1027 1027 self.opener.fstat(f).st_size >= mmapindexthreshold):
1028 1028 indexdata = util.buffer(util.mmapread(f))
1029 1029 else:
1030 1030 indexdata = f.read()
1031 1031 if len(indexdata) > 0:
1032 1032 v = versionformat_unpack(indexdata[:4])[0]
1033 1033 self._initempty = False
1034 1034 except IOError as inst:
1035 1035 if inst.errno != errno.ENOENT:
1036 1036 raise
1037 1037
1038 1038 self.version = v
1039 1039 self._inline = v & FLAG_INLINE_DATA
1040 1040 self._generaldelta = v & FLAG_GENERALDELTA
1041 1041 flags = v & ~0xFFFF
1042 1042 fmt = v & 0xFFFF
1043 1043 if fmt == REVLOGV0:
1044 1044 if flags:
1045 1045 raise RevlogError(_('unknown flags (%#04x) in version %d '
1046 1046 'revlog %s') %
1047 1047 (flags >> 16, fmt, self.indexfile))
1048 1048 elif fmt == REVLOGV1:
1049 1049 if flags & ~REVLOGV1_FLAGS:
1050 1050 raise RevlogError(_('unknown flags (%#04x) in version %d '
1051 1051 'revlog %s') %
1052 1052 (flags >> 16, fmt, self.indexfile))
1053 1053 elif fmt == REVLOGV2:
1054 1054 if flags & ~REVLOGV2_FLAGS:
1055 1055 raise RevlogError(_('unknown flags (%#04x) in version %d '
1056 1056 'revlog %s') %
1057 1057 (flags >> 16, fmt, self.indexfile))
1058 1058 else:
1059 1059 raise RevlogError(_('unknown version (%d) in revlog %s') %
1060 1060 (fmt, self.indexfile))
1061 1061
1062 1062 self.storedeltachains = True
1063 1063
1064 1064 self._io = revlogio()
1065 1065 if self.version == REVLOGV0:
1066 1066 self._io = revlogoldio()
1067 1067 try:
1068 1068 d = self._io.parseindex(indexdata, self._inline)
1069 1069 except (ValueError, IndexError):
1070 1070 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
1071 1071 self.index, nodemap, self._chunkcache = d
1072 1072 if nodemap is not None:
1073 1073 self.nodemap = self._nodecache = nodemap
1074 1074 if not self._chunkcache:
1075 1075 self._chunkclear()
1076 1076 # revnum -> (chain-length, sum-delta-length)
1077 1077 self._chaininfocache = {}
1078 1078 # revlog header -> revlog compressor
1079 1079 self._decompressors = {}
1080 1080
1081 1081 @util.propertycache
1082 1082 def _compressor(self):
1083 1083 return util.compengines[self._compengine].revlogcompressor()
1084 1084
1085 1085 def _indexfp(self, mode='r'):
1086 1086 """file object for the revlog's index file"""
1087 1087 args = {r'mode': mode}
1088 1088 if mode != 'r':
1089 1089 args[r'checkambig'] = self._checkambig
1090 1090 if mode == 'w':
1091 1091 args[r'atomictemp'] = True
1092 1092 return self.opener(self.indexfile, **args)
1093 1093
1094 1094 def _datafp(self, mode='r'):
1095 1095 """file object for the revlog's data file"""
1096 1096 return self.opener(self.datafile, mode=mode)
1097 1097
1098 1098 @contextlib.contextmanager
1099 1099 def _datareadfp(self, existingfp=None):
1100 1100 """file object suitable to read data"""
1101 1101 if existingfp is not None:
1102 1102 yield existingfp
1103 1103 else:
1104 1104 if self._inline:
1105 1105 func = self._indexfp
1106 1106 else:
1107 1107 func = self._datafp
1108 1108 with func() as fp:
1109 1109 yield fp
1110 1110
1111 1111 def tip(self):
1112 1112 return self.node(len(self.index) - 1)
1113 1113 def __contains__(self, rev):
1114 1114 return 0 <= rev < len(self)
1115 1115 def __len__(self):
1116 1116 return len(self.index)
1117 1117 def __iter__(self):
1118 1118 return iter(pycompat.xrange(len(self)))
1119 1119 def revs(self, start=0, stop=None):
1120 1120 """iterate over all rev in this revlog (from start to stop)"""
1121 1121 step = 1
1122 1122 length = len(self)
1123 1123 if stop is not None:
1124 1124 if start > stop:
1125 1125 step = -1
1126 1126 stop += step
1127 1127 if stop > length:
1128 1128 stop = length
1129 1129 else:
1130 1130 stop = length
1131 1131 return pycompat.xrange(start, stop, step)
1132 1132
1133 1133 @util.propertycache
1134 1134 def nodemap(self):
1135 1135 if self.index:
1136 1136 # populate mapping down to the initial node
1137 1137 node0 = self.index[0][7] # get around changelog filtering
1138 1138 self.rev(node0)
1139 1139 return self._nodecache
1140 1140
1141 1141 def hasnode(self, node):
1142 1142 try:
1143 1143 self.rev(node)
1144 1144 return True
1145 1145 except KeyError:
1146 1146 return False
1147 1147
1148 1148 def candelta(self, baserev, rev):
1149 1149 """whether two revisions (baserev, rev) can be delta-ed or not"""
1150 1150 # Disable delta if either rev requires a content-changing flag
1151 1151 # processor (ex. LFS). This is because such flag processor can alter
1152 1152 # the rawtext content that the delta will be based on, and two clients
1153 1153 # could have a same revlog node with different flags (i.e. different
1154 1154 # rawtext contents) and the delta could be incompatible.
1155 1155 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
1156 1156 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
1157 1157 return False
1158 1158 return True
1159 1159
1160 1160 def clearcaches(self):
1161 1161 self._cache = None
1162 1162 self._chainbasecache.clear()
1163 1163 self._chunkcache = (0, '')
1164 1164 self._pcache = {}
1165 1165
1166 1166 try:
1167 1167 self._nodecache.clearcaches()
1168 1168 except AttributeError:
1169 1169 self._nodecache = {nullid: nullrev}
1170 1170 self._nodepos = None
1171 1171
1172 1172 def rev(self, node):
1173 1173 try:
1174 1174 return self._nodecache[node]
1175 1175 except TypeError:
1176 1176 raise
1177 1177 except RevlogError:
1178 1178 # parsers.c radix tree lookup failed
1179 1179 if node == wdirid or node in wdirfilenodeids:
1180 1180 raise error.WdirUnsupported
1181 1181 raise LookupError(node, self.indexfile, _('no node'))
1182 1182 except KeyError:
1183 1183 # pure python cache lookup failed
1184 1184 n = self._nodecache
1185 1185 i = self.index
1186 1186 p = self._nodepos
1187 1187 if p is None:
1188 1188 p = len(i) - 1
1189 1189 else:
1190 1190 assert p < len(i)
1191 1191 for r in pycompat.xrange(p, -1, -1):
1192 1192 v = i[r][7]
1193 1193 n[v] = r
1194 1194 if v == node:
1195 1195 self._nodepos = r - 1
1196 1196 return r
1197 1197 if node == wdirid or node in wdirfilenodeids:
1198 1198 raise error.WdirUnsupported
1199 1199 raise LookupError(node, self.indexfile, _('no node'))
1200 1200
1201 1201 # Accessors for index entries.
1202 1202
1203 1203 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1204 1204 # are flags.
1205 1205 def start(self, rev):
1206 1206 return int(self.index[rev][0] >> 16)
1207 1207
1208 1208 def flags(self, rev):
1209 1209 return self.index[rev][0] & 0xFFFF
1210 1210
1211 1211 def length(self, rev):
1212 1212 return self.index[rev][1]
1213 1213
1214 1214 def rawsize(self, rev):
1215 1215 """return the length of the uncompressed text for a given revision"""
1216 1216 l = self.index[rev][2]
1217 1217 if l >= 0:
1218 1218 return l
1219 1219
1220 1220 t = self.revision(rev, raw=True)
1221 1221 return len(t)
1222 1222
1223 1223 def size(self, rev):
1224 1224 """length of non-raw text (processed by a "read" flag processor)"""
1225 1225 # fast path: if no "read" flag processor could change the content,
1226 1226 # size is rawsize. note: ELLIPSIS is known to not change the content.
1227 1227 flags = self.flags(rev)
1228 1228 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1229 1229 return self.rawsize(rev)
1230 1230
1231 1231 return len(self.revision(rev, raw=False))
1232 1232
1233 1233 def chainbase(self, rev):
1234 1234 base = self._chainbasecache.get(rev)
1235 1235 if base is not None:
1236 1236 return base
1237 1237
1238 1238 index = self.index
1239 1239 iterrev = rev
1240 1240 base = index[iterrev][3]
1241 1241 while base != iterrev:
1242 1242 iterrev = base
1243 1243 base = index[iterrev][3]
1244 1244
1245 1245 self._chainbasecache[rev] = base
1246 1246 return base
1247 1247
1248 1248 def linkrev(self, rev):
1249 1249 return self.index[rev][4]
1250 1250
1251 1251 def parentrevs(self, rev):
1252 1252 try:
1253 1253 entry = self.index[rev]
1254 1254 except IndexError:
1255 1255 if rev == wdirrev:
1256 1256 raise error.WdirUnsupported
1257 1257 raise
1258 1258
1259 1259 return entry[5], entry[6]
1260 1260
1261 1261 def node(self, rev):
1262 1262 try:
1263 1263 return self.index[rev][7]
1264 1264 except IndexError:
1265 1265 if rev == wdirrev:
1266 1266 raise error.WdirUnsupported
1267 1267 raise
1268 1268
1269 1269 # Derived from index values.
1270 1270
1271 1271 def end(self, rev):
1272 1272 return self.start(rev) + self.length(rev)
1273 1273
1274 1274 def parents(self, node):
1275 1275 i = self.index
1276 1276 d = i[self.rev(node)]
1277 1277 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
1278 1278
1279 1279 def chainlen(self, rev):
1280 1280 return self._chaininfo(rev)[0]
1281 1281
1282 1282 def _chaininfo(self, rev):
1283 1283 chaininfocache = self._chaininfocache
1284 1284 if rev in chaininfocache:
1285 1285 return chaininfocache[rev]
1286 1286 index = self.index
1287 1287 generaldelta = self._generaldelta
1288 1288 iterrev = rev
1289 1289 e = index[iterrev]
1290 1290 clen = 0
1291 1291 compresseddeltalen = 0
1292 1292 while iterrev != e[3]:
1293 1293 clen += 1
1294 1294 compresseddeltalen += e[1]
1295 1295 if generaldelta:
1296 1296 iterrev = e[3]
1297 1297 else:
1298 1298 iterrev -= 1
1299 1299 if iterrev in chaininfocache:
1300 1300 t = chaininfocache[iterrev]
1301 1301 clen += t[0]
1302 1302 compresseddeltalen += t[1]
1303 1303 break
1304 1304 e = index[iterrev]
1305 1305 else:
1306 1306 # Add text length of base since decompressing that also takes
1307 1307 # work. For cache hits the length is already included.
1308 1308 compresseddeltalen += e[1]
1309 1309 r = (clen, compresseddeltalen)
1310 1310 chaininfocache[rev] = r
1311 1311 return r
1312 1312
1313 1313 def _deltachain(self, rev, stoprev=None):
1314 1314 """Obtain the delta chain for a revision.
1315 1315
1316 1316 ``stoprev`` specifies a revision to stop at. If not specified, we
1317 1317 stop at the base of the chain.
1318 1318
1319 1319 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1320 1320 revs in ascending order and ``stopped`` is a bool indicating whether
1321 1321 ``stoprev`` was hit.
1322 1322 """
1323 1323 # Try C implementation.
1324 1324 try:
1325 1325 return self.index.deltachain(rev, stoprev, self._generaldelta)
1326 1326 except AttributeError:
1327 1327 pass
1328 1328
1329 1329 chain = []
1330 1330
1331 1331 # Alias to prevent attribute lookup in tight loop.
1332 1332 index = self.index
1333 1333 generaldelta = self._generaldelta
1334 1334
1335 1335 iterrev = rev
1336 1336 e = index[iterrev]
1337 1337 while iterrev != e[3] and iterrev != stoprev:
1338 1338 chain.append(iterrev)
1339 1339 if generaldelta:
1340 1340 iterrev = e[3]
1341 1341 else:
1342 1342 iterrev -= 1
1343 1343 e = index[iterrev]
1344 1344
1345 1345 if iterrev == stoprev:
1346 1346 stopped = True
1347 1347 else:
1348 1348 chain.append(iterrev)
1349 1349 stopped = False
1350 1350
1351 1351 chain.reverse()
1352 1352 return chain, stopped
1353 1353
1354 1354 def ancestors(self, revs, stoprev=0, inclusive=False):
1355 1355 """Generate the ancestors of 'revs' in reverse topological order.
1356 1356 Does not generate revs lower than stoprev.
1357 1357
1358 1358 See the documentation for ancestor.lazyancestors for more details."""
1359 1359
1360 1360 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1361 1361 inclusive=inclusive)
1362 1362
1363 1363 def descendants(self, revs):
1364 1364 """Generate the descendants of 'revs' in revision order.
1365 1365
1366 1366 Yield a sequence of revision numbers starting with a child of
1367 1367 some rev in revs, i.e., each revision is *not* considered a
1368 1368 descendant of itself. Results are ordered by revision number (a
1369 1369 topological sort)."""
1370 1370 first = min(revs)
1371 1371 if first == nullrev:
1372 1372 for i in self:
1373 1373 yield i
1374 1374 return
1375 1375
1376 1376 seen = set(revs)
1377 1377 for i in self.revs(start=first + 1):
1378 1378 for x in self.parentrevs(i):
1379 1379 if x != nullrev and x in seen:
1380 1380 seen.add(i)
1381 1381 yield i
1382 1382 break
1383 1383
1384 1384 def findcommonmissing(self, common=None, heads=None):
1385 1385 """Return a tuple of the ancestors of common and the ancestors of heads
1386 1386 that are not ancestors of common. In revset terminology, we return the
1387 1387 tuple:
1388 1388
1389 1389 ::common, (::heads) - (::common)
1390 1390
1391 1391 The list is sorted by revision number, meaning it is
1392 1392 topologically sorted.
1393 1393
1394 1394 'heads' and 'common' are both lists of node IDs. If heads is
1395 1395 not supplied, uses all of the revlog's heads. If common is not
1396 1396 supplied, uses nullid."""
1397 1397 if common is None:
1398 1398 common = [nullid]
1399 1399 if heads is None:
1400 1400 heads = self.heads()
1401 1401
1402 1402 common = [self.rev(n) for n in common]
1403 1403 heads = [self.rev(n) for n in heads]
1404 1404
1405 1405 # we want the ancestors, but inclusive
1406 1406 class lazyset(object):
1407 1407 def __init__(self, lazyvalues):
1408 1408 self.addedvalues = set()
1409 1409 self.lazyvalues = lazyvalues
1410 1410
1411 1411 def __contains__(self, value):
1412 1412 return value in self.addedvalues or value in self.lazyvalues
1413 1413
1414 1414 def __iter__(self):
1415 1415 added = self.addedvalues
1416 1416 for r in added:
1417 1417 yield r
1418 1418 for r in self.lazyvalues:
1419 1419 if not r in added:
1420 1420 yield r
1421 1421
1422 1422 def add(self, value):
1423 1423 self.addedvalues.add(value)
1424 1424
1425 1425 def update(self, values):
1426 1426 self.addedvalues.update(values)
1427 1427
1428 1428 has = lazyset(self.ancestors(common))
1429 1429 has.add(nullrev)
1430 1430 has.update(common)
1431 1431
1432 1432 # take all ancestors from heads that aren't in has
1433 1433 missing = set()
1434 1434 visit = collections.deque(r for r in heads if r not in has)
1435 1435 while visit:
1436 1436 r = visit.popleft()
1437 1437 if r in missing:
1438 1438 continue
1439 1439 else:
1440 1440 missing.add(r)
1441 1441 for p in self.parentrevs(r):
1442 1442 if p not in has:
1443 1443 visit.append(p)
1444 1444 missing = list(missing)
1445 1445 missing.sort()
1446 1446 return has, [self.node(miss) for miss in missing]
1447 1447
1448 1448 def incrementalmissingrevs(self, common=None):
1449 1449 """Return an object that can be used to incrementally compute the
1450 1450 revision numbers of the ancestors of arbitrary sets that are not
1451 1451 ancestors of common. This is an ancestor.incrementalmissingancestors
1452 1452 object.
1453 1453
1454 1454 'common' is a list of revision numbers. If common is not supplied, uses
1455 1455 nullrev.
1456 1456 """
1457 1457 if common is None:
1458 1458 common = [nullrev]
1459 1459
1460 1460 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1461 1461
1462 1462 def findmissingrevs(self, common=None, heads=None):
1463 1463 """Return the revision numbers of the ancestors of heads that
1464 1464 are not ancestors of common.
1465 1465
1466 1466 More specifically, return a list of revision numbers corresponding to
1467 1467 nodes N such that every N satisfies the following constraints:
1468 1468
1469 1469 1. N is an ancestor of some node in 'heads'
1470 1470 2. N is not an ancestor of any node in 'common'
1471 1471
1472 1472 The list is sorted by revision number, meaning it is
1473 1473 topologically sorted.
1474 1474
1475 1475 'heads' and 'common' are both lists of revision numbers. If heads is
1476 1476 not supplied, uses all of the revlog's heads. If common is not
1477 1477 supplied, uses nullid."""
1478 1478 if common is None:
1479 1479 common = [nullrev]
1480 1480 if heads is None:
1481 1481 heads = self.headrevs()
1482 1482
1483 1483 inc = self.incrementalmissingrevs(common=common)
1484 1484 return inc.missingancestors(heads)
1485 1485
1486 1486 def findmissing(self, common=None, heads=None):
1487 1487 """Return the ancestors of heads that are not ancestors of common.
1488 1488
1489 1489 More specifically, return a list of nodes N such that every N
1490 1490 satisfies the following constraints:
1491 1491
1492 1492 1. N is an ancestor of some node in 'heads'
1493 1493 2. N is not an ancestor of any node in 'common'
1494 1494
1495 1495 The list is sorted by revision number, meaning it is
1496 1496 topologically sorted.
1497 1497
1498 1498 'heads' and 'common' are both lists of node IDs. If heads is
1499 1499 not supplied, uses all of the revlog's heads. If common is not
1500 1500 supplied, uses nullid."""
1501 1501 if common is None:
1502 1502 common = [nullid]
1503 1503 if heads is None:
1504 1504 heads = self.heads()
1505 1505
1506 1506 common = [self.rev(n) for n in common]
1507 1507 heads = [self.rev(n) for n in heads]
1508 1508
1509 1509 inc = self.incrementalmissingrevs(common=common)
1510 1510 return [self.node(r) for r in inc.missingancestors(heads)]
1511 1511
1512 1512 def nodesbetween(self, roots=None, heads=None):
1513 1513 """Return a topological path from 'roots' to 'heads'.
1514 1514
1515 1515 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1516 1516 topologically sorted list of all nodes N that satisfy both of
1517 1517 these constraints:
1518 1518
1519 1519 1. N is a descendant of some node in 'roots'
1520 1520 2. N is an ancestor of some node in 'heads'
1521 1521
1522 1522 Every node is considered to be both a descendant and an ancestor
1523 1523 of itself, so every reachable node in 'roots' and 'heads' will be
1524 1524 included in 'nodes'.
1525 1525
1526 1526 'outroots' is the list of reachable nodes in 'roots', i.e., the
1527 1527 subset of 'roots' that is returned in 'nodes'. Likewise,
1528 1528 'outheads' is the subset of 'heads' that is also in 'nodes'.
1529 1529
1530 1530 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1531 1531 unspecified, uses nullid as the only root. If 'heads' is
1532 1532 unspecified, uses list of all of the revlog's heads."""
1533 1533 nonodes = ([], [], [])
1534 1534 if roots is not None:
1535 1535 roots = list(roots)
1536 1536 if not roots:
1537 1537 return nonodes
1538 1538 lowestrev = min([self.rev(n) for n in roots])
1539 1539 else:
1540 1540 roots = [nullid] # Everybody's a descendant of nullid
1541 1541 lowestrev = nullrev
1542 1542 if (lowestrev == nullrev) and (heads is None):
1543 1543 # We want _all_ the nodes!
1544 1544 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1545 1545 if heads is None:
1546 1546 # All nodes are ancestors, so the latest ancestor is the last
1547 1547 # node.
1548 1548 highestrev = len(self) - 1
1549 1549 # Set ancestors to None to signal that every node is an ancestor.
1550 1550 ancestors = None
1551 1551 # Set heads to an empty dictionary for later discovery of heads
1552 1552 heads = {}
1553 1553 else:
1554 1554 heads = list(heads)
1555 1555 if not heads:
1556 1556 return nonodes
1557 1557 ancestors = set()
1558 1558 # Turn heads into a dictionary so we can remove 'fake' heads.
1559 1559 # Also, later we will be using it to filter out the heads we can't
1560 1560 # find from roots.
1561 1561 heads = dict.fromkeys(heads, False)
1562 1562 # Start at the top and keep marking parents until we're done.
1563 1563 nodestotag = set(heads)
1564 1564 # Remember where the top was so we can use it as a limit later.
1565 1565 highestrev = max([self.rev(n) for n in nodestotag])
1566 1566 while nodestotag:
1567 1567 # grab a node to tag
1568 1568 n = nodestotag.pop()
1569 1569 # Never tag nullid
1570 1570 if n == nullid:
1571 1571 continue
1572 1572 # A node's revision number represents its place in a
1573 1573 # topologically sorted list of nodes.
1574 1574 r = self.rev(n)
1575 1575 if r >= lowestrev:
1576 1576 if n not in ancestors:
1577 1577 # If we are possibly a descendant of one of the roots
1578 1578 # and we haven't already been marked as an ancestor
1579 1579 ancestors.add(n) # Mark as ancestor
1580 1580 # Add non-nullid parents to list of nodes to tag.
1581 1581 nodestotag.update([p for p in self.parents(n) if
1582 1582 p != nullid])
1583 1583 elif n in heads: # We've seen it before, is it a fake head?
1584 1584 # So it is, real heads should not be the ancestors of
1585 1585 # any other heads.
1586 1586 heads.pop(n)
1587 1587 if not ancestors:
1588 1588 return nonodes
1589 1589 # Now that we have our set of ancestors, we want to remove any
1590 1590 # roots that are not ancestors.
1591 1591
1592 1592 # If one of the roots was nullid, everything is included anyway.
1593 1593 if lowestrev > nullrev:
1594 1594 # But, since we weren't, let's recompute the lowest rev to not
1595 1595 # include roots that aren't ancestors.
1596 1596
1597 1597 # Filter out roots that aren't ancestors of heads
1598 1598 roots = [root for root in roots if root in ancestors]
1599 1599 # Recompute the lowest revision
1600 1600 if roots:
1601 1601 lowestrev = min([self.rev(root) for root in roots])
1602 1602 else:
1603 1603 # No more roots? Return empty list
1604 1604 return nonodes
1605 1605 else:
1606 1606 # We are descending from nullid, and don't need to care about
1607 1607 # any other roots.
1608 1608 lowestrev = nullrev
1609 1609 roots = [nullid]
1610 1610 # Transform our roots list into a set.
1611 1611 descendants = set(roots)
1612 1612 # Also, keep the original roots so we can filter out roots that aren't
1613 1613 # 'real' roots (i.e. are descended from other roots).
1614 1614 roots = descendants.copy()
1615 1615 # Our topologically sorted list of output nodes.
1616 1616 orderedout = []
1617 1617 # Don't start at nullid since we don't want nullid in our output list,
1618 1618 # and if nullid shows up in descendants, empty parents will look like
1619 1619 # they're descendants.
1620 1620 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1621 1621 n = self.node(r)
1622 1622 isdescendant = False
1623 1623 if lowestrev == nullrev: # Everybody is a descendant of nullid
1624 1624 isdescendant = True
1625 1625 elif n in descendants:
1626 1626 # n is already a descendant
1627 1627 isdescendant = True
1628 1628 # This check only needs to be done here because all the roots
1629 1629 # will start being marked is descendants before the loop.
1630 1630 if n in roots:
1631 1631 # If n was a root, check if it's a 'real' root.
1632 1632 p = tuple(self.parents(n))
1633 1633 # If any of its parents are descendants, it's not a root.
1634 1634 if (p[0] in descendants) or (p[1] in descendants):
1635 1635 roots.remove(n)
1636 1636 else:
1637 1637 p = tuple(self.parents(n))
1638 1638 # A node is a descendant if either of its parents are
1639 1639 # descendants. (We seeded the dependents list with the roots
1640 1640 # up there, remember?)
1641 1641 if (p[0] in descendants) or (p[1] in descendants):
1642 1642 descendants.add(n)
1643 1643 isdescendant = True
1644 1644 if isdescendant and ((ancestors is None) or (n in ancestors)):
1645 1645 # Only include nodes that are both descendants and ancestors.
1646 1646 orderedout.append(n)
1647 1647 if (ancestors is not None) and (n in heads):
1648 1648 # We're trying to figure out which heads are reachable
1649 1649 # from roots.
1650 1650 # Mark this head as having been reached
1651 1651 heads[n] = True
1652 1652 elif ancestors is None:
1653 1653 # Otherwise, we're trying to discover the heads.
1654 1654 # Assume this is a head because if it isn't, the next step
1655 1655 # will eventually remove it.
1656 1656 heads[n] = True
1657 1657 # But, obviously its parents aren't.
1658 1658 for p in self.parents(n):
1659 1659 heads.pop(p, None)
1660 1660 heads = [head for head, flag in heads.iteritems() if flag]
1661 1661 roots = list(roots)
1662 1662 assert orderedout
1663 1663 assert roots
1664 1664 assert heads
1665 1665 return (orderedout, roots, heads)
1666 1666
1667 1667 def headrevs(self):
1668 1668 try:
1669 1669 return self.index.headrevs()
1670 1670 except AttributeError:
1671 1671 return self._headrevs()
1672 1672
1673 1673 def computephases(self, roots):
1674 1674 return self.index.computephasesmapsets(roots)
1675 1675
1676 1676 def _headrevs(self):
1677 1677 count = len(self)
1678 1678 if not count:
1679 1679 return [nullrev]
1680 1680 # we won't iter over filtered rev so nobody is a head at start
1681 1681 ishead = [0] * (count + 1)
1682 1682 index = self.index
1683 1683 for r in self:
1684 1684 ishead[r] = 1 # I may be an head
1685 1685 e = index[r]
1686 1686 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1687 1687 return [r for r, val in enumerate(ishead) if val]
1688 1688
1689 1689 def heads(self, start=None, stop=None):
1690 1690 """return the list of all nodes that have no children
1691 1691
1692 1692 if start is specified, only heads that are descendants of
1693 1693 start will be returned
1694 1694 if stop is specified, it will consider all the revs from stop
1695 1695 as if they had no children
1696 1696 """
1697 1697 if start is None and stop is None:
1698 1698 if not len(self):
1699 1699 return [nullid]
1700 1700 return [self.node(r) for r in self.headrevs()]
1701 1701
1702 1702 if start is None:
1703 1703 start = nullid
1704 1704 if stop is None:
1705 1705 stop = []
1706 1706 stoprevs = set([self.rev(n) for n in stop])
1707 1707 startrev = self.rev(start)
1708 1708 reachable = {startrev}
1709 1709 heads = {startrev}
1710 1710
1711 1711 parentrevs = self.parentrevs
1712 1712 for r in self.revs(start=startrev + 1):
1713 1713 for p in parentrevs(r):
1714 1714 if p in reachable:
1715 1715 if r not in stoprevs:
1716 1716 reachable.add(r)
1717 1717 heads.add(r)
1718 1718 if p in heads and p not in stoprevs:
1719 1719 heads.remove(p)
1720 1720
1721 1721 return [self.node(r) for r in heads]
1722 1722
1723 1723 def children(self, node):
1724 1724 """find the children of a given node"""
1725 1725 c = []
1726 1726 p = self.rev(node)
1727 1727 for r in self.revs(start=p + 1):
1728 1728 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1729 1729 if prevs:
1730 1730 for pr in prevs:
1731 1731 if pr == p:
1732 1732 c.append(self.node(r))
1733 1733 elif p == nullrev:
1734 1734 c.append(self.node(r))
1735 1735 return c
1736 1736
1737 1737 def commonancestorsheads(self, a, b):
1738 1738 """calculate all the heads of the common ancestors of nodes a and b"""
1739 1739 a, b = self.rev(a), self.rev(b)
1740 1740 ancs = self._commonancestorsheads(a, b)
1741 1741 return pycompat.maplist(self.node, ancs)
1742 1742
1743 1743 def _commonancestorsheads(self, *revs):
1744 1744 """calculate all the heads of the common ancestors of revs"""
1745 1745 try:
1746 1746 ancs = self.index.commonancestorsheads(*revs)
1747 1747 except (AttributeError, OverflowError): # C implementation failed
1748 1748 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1749 1749 return ancs
1750 1750
1751 1751 def isancestor(self, a, b):
1752 1752 """return True if node a is an ancestor of node b
1753 1753
1754 1754 A revision is considered an ancestor of itself."""
1755 1755 a, b = self.rev(a), self.rev(b)
1756 1756 return self.isancestorrev(a, b)
1757 1757
1758 1758 def isancestorrev(self, a, b):
1759 1759 """return True if revision a is an ancestor of revision b
1760 1760
1761 1761 A revision is considered an ancestor of itself.
1762 1762
1763 1763 The implementation of this is trivial but the use of
1764 1764 commonancestorsheads is not."""
1765 1765 if a == nullrev:
1766 1766 return True
1767 1767 elif a == b:
1768 1768 return True
1769 1769 elif a > b:
1770 1770 return False
1771 1771 return a in self._commonancestorsheads(a, b)
1772 1772
1773 1773 def ancestor(self, a, b):
1774 1774 """calculate the "best" common ancestor of nodes a and b"""
1775 1775
1776 1776 a, b = self.rev(a), self.rev(b)
1777 1777 try:
1778 1778 ancs = self.index.ancestors(a, b)
1779 1779 except (AttributeError, OverflowError):
1780 1780 ancs = ancestor.ancestors(self.parentrevs, a, b)
1781 1781 if ancs:
1782 1782 # choose a consistent winner when there's a tie
1783 1783 return min(map(self.node, ancs))
1784 1784 return nullid
1785 1785
1786 1786 def _match(self, id):
1787 1787 if isinstance(id, int):
1788 1788 # rev
1789 1789 return self.node(id)
1790 1790 if len(id) == 20:
1791 1791 # possibly a binary node
1792 1792 # odds of a binary node being all hex in ASCII are 1 in 10**25
1793 1793 try:
1794 1794 node = id
1795 1795 self.rev(node) # quick search the index
1796 1796 return node
1797 1797 except LookupError:
1798 1798 pass # may be partial hex id
1799 1799 try:
1800 1800 # str(rev)
1801 1801 rev = int(id)
1802 1802 if "%d" % rev != id:
1803 1803 raise ValueError
1804 1804 if rev < 0:
1805 1805 rev = len(self) + rev
1806 1806 if rev < 0 or rev >= len(self):
1807 1807 raise ValueError
1808 1808 return self.node(rev)
1809 1809 except (ValueError, OverflowError):
1810 1810 pass
1811 1811 if len(id) == 40:
1812 1812 try:
1813 1813 # a full hex nodeid?
1814 1814 node = bin(id)
1815 1815 self.rev(node)
1816 1816 return node
1817 1817 except (TypeError, LookupError):
1818 1818 pass
1819 1819
1820 1820 def _partialmatch(self, id):
1821 1821 # we don't care wdirfilenodeids as they should be always full hash
1822 1822 maybewdir = wdirhex.startswith(id)
1823 1823 try:
1824 1824 partial = self.index.partialmatch(id)
1825 1825 if partial and self.hasnode(partial):
1826 1826 if maybewdir:
1827 1827 # single 'ff...' match in radix tree, ambiguous with wdir
1828 1828 raise RevlogError
1829 1829 return partial
1830 1830 if maybewdir:
1831 1831 # no 'ff...' match in radix tree, wdir identified
1832 1832 raise error.WdirUnsupported
1833 1833 return None
1834 1834 except RevlogError:
1835 1835 # parsers.c radix tree lookup gave multiple matches
1836 1836 # fast path: for unfiltered changelog, radix tree is accurate
1837 1837 if not getattr(self, 'filteredrevs', None):
1838 1838 raise AmbiguousPrefixLookupError(id, self.indexfile,
1839 1839 _('ambiguous identifier'))
1840 1840 # fall through to slow path that filters hidden revisions
1841 1841 except (AttributeError, ValueError):
1842 1842 # we are pure python, or key was too short to search radix tree
1843 1843 pass
1844 1844
1845 1845 if id in self._pcache:
1846 1846 return self._pcache[id]
1847 1847
1848 1848 if len(id) <= 40:
1849 1849 try:
1850 1850 # hex(node)[:...]
1851 1851 l = len(id) // 2 # grab an even number of digits
1852 1852 prefix = bin(id[:l * 2])
1853 1853 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1854 1854 nl = [n for n in nl if hex(n).startswith(id) and
1855 1855 self.hasnode(n)]
1856 1856 if len(nl) > 0:
1857 1857 if len(nl) == 1 and not maybewdir:
1858 1858 self._pcache[id] = nl[0]
1859 1859 return nl[0]
1860 1860 raise AmbiguousPrefixLookupError(id, self.indexfile,
1861 1861 _('ambiguous identifier'))
1862 1862 if maybewdir:
1863 1863 raise error.WdirUnsupported
1864 1864 return None
1865 1865 except TypeError:
1866 1866 pass
1867 1867
1868 1868 def lookup(self, id):
1869 1869 """locate a node based on:
1870 1870 - revision number or str(revision number)
1871 1871 - nodeid or subset of hex nodeid
1872 1872 """
1873 1873 n = self._match(id)
1874 1874 if n is not None:
1875 1875 return n
1876 1876 n = self._partialmatch(id)
1877 1877 if n:
1878 1878 return n
1879 1879
1880 1880 raise LookupError(id, self.indexfile, _('no match found'))
1881 1881
1882 1882 def shortest(self, node, minlength=1):
1883 1883 """Find the shortest unambiguous prefix that matches node."""
1884 1884 def isvalid(prefix):
1885 1885 try:
1886 1886 node = self._partialmatch(prefix)
1887 1887 except error.RevlogError:
1888 1888 return False
1889 1889 except error.WdirUnsupported:
1890 1890 # single 'ff...' match
1891 1891 return True
1892 1892 if node is None:
1893 1893 raise LookupError(node, self.indexfile, _('no node'))
1894 1894 return True
1895 1895
1896 1896 def maybewdir(prefix):
1897 1897 return all(c == 'f' for c in prefix)
1898 1898
1899 1899 hexnode = hex(node)
1900 1900
1901 1901 def disambiguate(hexnode, minlength):
1902 1902 """Disambiguate against wdirid."""
1903 1903 for length in range(minlength, 41):
1904 1904 prefix = hexnode[:length]
1905 1905 if not maybewdir(prefix):
1906 1906 return prefix
1907 1907
1908 1908 if not getattr(self, 'filteredrevs', None):
1909 1909 try:
1910 1910 length = max(self.index.shortest(node), minlength)
1911 1911 return disambiguate(hexnode, length)
1912 1912 except RevlogError:
1913 1913 if node != wdirid:
1914 1914 raise LookupError(node, self.indexfile, _('no node'))
1915 1915 except AttributeError:
1916 1916 # Fall through to pure code
1917 1917 pass
1918 1918
1919 1919 if node == wdirid:
1920 1920 for length in range(minlength, 41):
1921 1921 prefix = hexnode[:length]
1922 1922 if isvalid(prefix):
1923 1923 return prefix
1924 1924
1925 1925 for length in range(minlength, 41):
1926 1926 prefix = hexnode[:length]
1927 1927 if isvalid(prefix):
1928 1928 return disambiguate(hexnode, length)
1929 1929
1930 1930 def cmp(self, node, text):
1931 1931 """compare text with a given file revision
1932 1932
1933 1933 returns True if text is different than what is stored.
1934 1934 """
1935 1935 p1, p2 = self.parents(node)
1936 1936 return hash(text, p1, p2) != node
1937 1937
1938 1938 def _cachesegment(self, offset, data):
1939 1939 """Add a segment to the revlog cache.
1940 1940
1941 1941 Accepts an absolute offset and the data that is at that location.
1942 1942 """
1943 1943 o, d = self._chunkcache
1944 1944 # try to add to existing cache
1945 1945 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1946 1946 self._chunkcache = o, d + data
1947 1947 else:
1948 1948 self._chunkcache = offset, data
1949 1949
1950 1950 def _readsegment(self, offset, length, df=None):
1951 1951 """Load a segment of raw data from the revlog.
1952 1952
1953 1953 Accepts an absolute offset, length to read, and an optional existing
1954 1954 file handle to read from.
1955 1955
1956 1956 If an existing file handle is passed, it will be seeked and the
1957 1957 original seek position will NOT be restored.
1958 1958
1959 1959 Returns a str or buffer of raw byte data.
1960 1960 """
1961 1961 # Cache data both forward and backward around the requested
1962 1962 # data, in a fixed size window. This helps speed up operations
1963 1963 # involving reading the revlog backwards.
1964 1964 cachesize = self._chunkcachesize
1965 1965 realoffset = offset & ~(cachesize - 1)
1966 1966 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1967 1967 - realoffset)
1968 1968 with self._datareadfp(df) as df:
1969 1969 df.seek(realoffset)
1970 1970 d = df.read(reallength)
1971 1971 self._cachesegment(realoffset, d)
1972 1972 if offset != realoffset or reallength != length:
1973 1973 return util.buffer(d, offset - realoffset, length)
1974 1974 return d
1975 1975
1976 1976 def _getsegment(self, offset, length, df=None):
1977 1977 """Obtain a segment of raw data from the revlog.
1978 1978
1979 1979 Accepts an absolute offset, length of bytes to obtain, and an
1980 1980 optional file handle to the already-opened revlog. If the file
1981 1981 handle is used, it's original seek position will not be preserved.
1982 1982
1983 1983 Requests for data may be returned from a cache.
1984 1984
1985 1985 Returns a str or a buffer instance of raw byte data.
1986 1986 """
1987 1987 o, d = self._chunkcache
1988 1988 l = len(d)
1989 1989
1990 1990 # is it in the cache?
1991 1991 cachestart = offset - o
1992 1992 cacheend = cachestart + length
1993 1993 if cachestart >= 0 and cacheend <= l:
1994 1994 if cachestart == 0 and cacheend == l:
1995 1995 return d # avoid a copy
1996 1996 return util.buffer(d, cachestart, cacheend - cachestart)
1997 1997
1998 1998 return self._readsegment(offset, length, df=df)
1999 1999
2000 2000 def _getsegmentforrevs(self, startrev, endrev, df=None):
2001 2001 """Obtain a segment of raw data corresponding to a range of revisions.
2002 2002
2003 2003 Accepts the start and end revisions and an optional already-open
2004 2004 file handle to be used for reading. If the file handle is read, its
2005 2005 seek position will not be preserved.
2006 2006
2007 2007 Requests for data may be satisfied by a cache.
2008 2008
2009 2009 Returns a 2-tuple of (offset, data) for the requested range of
2010 2010 revisions. Offset is the integer offset from the beginning of the
2011 2011 revlog and data is a str or buffer of the raw byte data.
2012 2012
2013 2013 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
2014 2014 to determine where each revision's data begins and ends.
2015 2015 """
2016 2016 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
2017 2017 # (functions are expensive).
2018 2018 index = self.index
2019 2019 istart = index[startrev]
2020 2020 start = int(istart[0] >> 16)
2021 2021 if startrev == endrev:
2022 2022 end = start + istart[1]
2023 2023 else:
2024 2024 iend = index[endrev]
2025 2025 end = int(iend[0] >> 16) + iend[1]
2026 2026
2027 2027 if self._inline:
2028 2028 start += (startrev + 1) * self._io.size
2029 2029 end += (endrev + 1) * self._io.size
2030 2030 length = end - start
2031 2031
2032 2032 return start, self._getsegment(start, length, df=df)
2033 2033
2034 2034 def _chunk(self, rev, df=None):
2035 2035 """Obtain a single decompressed chunk for a revision.
2036 2036
2037 2037 Accepts an integer revision and an optional already-open file handle
2038 2038 to be used for reading. If used, the seek position of the file will not
2039 2039 be preserved.
2040 2040
2041 2041 Returns a str holding uncompressed data for the requested revision.
2042 2042 """
2043 2043 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
2044 2044
2045 2045 def _chunks(self, revs, df=None, targetsize=None):
2046 2046 """Obtain decompressed chunks for the specified revisions.
2047 2047
2048 2048 Accepts an iterable of numeric revisions that are assumed to be in
2049 2049 ascending order. Also accepts an optional already-open file handle
2050 2050 to be used for reading. If used, the seek position of the file will
2051 2051 not be preserved.
2052 2052
2053 2053 This function is similar to calling ``self._chunk()`` multiple times,
2054 2054 but is faster.
2055 2055
2056 2056 Returns a list with decompressed data for each requested revision.
2057 2057 """
2058 2058 if not revs:
2059 2059 return []
2060 2060 start = self.start
2061 2061 length = self.length
2062 2062 inline = self._inline
2063 2063 iosize = self._io.size
2064 2064 buffer = util.buffer
2065 2065
2066 2066 l = []
2067 2067 ladd = l.append
2068 2068
2069 2069 if not self._withsparseread:
2070 2070 slicedchunks = (revs,)
2071 2071 else:
2072 2072 slicedchunks = _slicechunk(self, revs, targetsize=targetsize)
2073 2073
2074 2074 for revschunk in slicedchunks:
2075 2075 firstrev = revschunk[0]
2076 2076 # Skip trailing revisions with empty diff
2077 2077 for lastrev in revschunk[::-1]:
2078 2078 if length(lastrev) != 0:
2079 2079 break
2080 2080
2081 2081 try:
2082 2082 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
2083 2083 except OverflowError:
2084 2084 # issue4215 - we can't cache a run of chunks greater than
2085 2085 # 2G on Windows
2086 2086 return [self._chunk(rev, df=df) for rev in revschunk]
2087 2087
2088 2088 decomp = self.decompress
2089 2089 for rev in revschunk:
2090 2090 chunkstart = start(rev)
2091 2091 if inline:
2092 2092 chunkstart += (rev + 1) * iosize
2093 2093 chunklength = length(rev)
2094 2094 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
2095 2095
2096 2096 return l
2097 2097
2098 2098 def _chunkclear(self):
2099 2099 """Clear the raw chunk cache."""
2100 2100 self._chunkcache = (0, '')
2101 2101
2102 2102 def deltaparent(self, rev):
2103 2103 """return deltaparent of the given revision"""
2104 2104 base = self.index[rev][3]
2105 2105 if base == rev:
2106 2106 return nullrev
2107 2107 elif self._generaldelta:
2108 2108 return base
2109 2109 else:
2110 2110 return rev - 1
2111 2111
2112 2112 def issnapshot(self, rev):
2113 2113 """tells whether rev is a snapshot
2114 2114 """
2115 2115 if rev == nullrev:
2116 2116 return True
2117 2117 deltap = self.deltaparent(rev)
2118 2118 if deltap == nullrev:
2119 2119 return True
2120 2120 p1, p2 = self.parentrevs(rev)
2121 2121 if deltap in (p1, p2):
2122 2122 return False
2123 2123 return self.issnapshot(deltap)
2124 2124
2125 2125 def snapshotdepth(self, rev):
2126 2126 """number of snapshot in the chain before this one"""
2127 2127 if not self.issnapshot(rev):
2128 2128 raise ProgrammingError('revision %d not a snapshot')
2129 2129 return len(self._deltachain(rev)[0]) - 1
2130 2130
2131 2131 def revdiff(self, rev1, rev2):
2132 2132 """return or calculate a delta between two revisions
2133 2133
2134 2134 The delta calculated is in binary form and is intended to be written to
2135 2135 revlog data directly. So this function needs raw revision data.
2136 2136 """
2137 2137 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2138 2138 return bytes(self._chunk(rev2))
2139 2139
2140 2140 return mdiff.textdiff(self.revision(rev1, raw=True),
2141 2141 self.revision(rev2, raw=True))
2142 2142
2143 2143 def revision(self, nodeorrev, _df=None, raw=False):
2144 2144 """return an uncompressed revision of a given node or revision
2145 2145 number.
2146 2146
2147 2147 _df - an existing file handle to read from. (internal-only)
2148 2148 raw - an optional argument specifying if the revision data is to be
2149 2149 treated as raw data when applying flag transforms. 'raw' should be set
2150 2150 to True when generating changegroups or in debug commands.
2151 2151 """
2152 2152 if isinstance(nodeorrev, int):
2153 2153 rev = nodeorrev
2154 2154 node = self.node(rev)
2155 2155 else:
2156 2156 node = nodeorrev
2157 2157 rev = None
2158 2158
2159 2159 cachedrev = None
2160 2160 flags = None
2161 2161 rawtext = None
2162 2162 if node == nullid:
2163 2163 return ""
2164 2164 if self._cache:
2165 2165 if self._cache[0] == node:
2166 2166 # _cache only stores rawtext
2167 2167 if raw:
2168 2168 return self._cache[2]
2169 2169 # duplicated, but good for perf
2170 2170 if rev is None:
2171 2171 rev = self.rev(node)
2172 2172 if flags is None:
2173 2173 flags = self.flags(rev)
2174 2174 # no extra flags set, no flag processor runs, text = rawtext
2175 2175 if flags == REVIDX_DEFAULT_FLAGS:
2176 2176 return self._cache[2]
2177 2177 # rawtext is reusable. need to run flag processor
2178 2178 rawtext = self._cache[2]
2179 2179
2180 2180 cachedrev = self._cache[1]
2181 2181
2182 2182 # look up what we need to read
2183 2183 if rawtext is None:
2184 2184 if rev is None:
2185 2185 rev = self.rev(node)
2186 2186
2187 2187 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2188 2188 if stopped:
2189 2189 rawtext = self._cache[2]
2190 2190
2191 2191 # drop cache to save memory
2192 2192 self._cache = None
2193 2193
2194 2194 targetsize = None
2195 2195 rawsize = self.index[rev][2]
2196 2196 if 0 <= rawsize:
2197 2197 targetsize = 4 * rawsize
2198 2198
2199 2199 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2200 2200 if rawtext is None:
2201 2201 rawtext = bytes(bins[0])
2202 2202 bins = bins[1:]
2203 2203
2204 2204 rawtext = mdiff.patches(rawtext, bins)
2205 2205 self._cache = (node, rev, rawtext)
2206 2206
2207 2207 if flags is None:
2208 2208 if rev is None:
2209 2209 rev = self.rev(node)
2210 2210 flags = self.flags(rev)
2211 2211
2212 2212 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
2213 2213 if validatehash:
2214 2214 self.checkhash(text, node, rev=rev)
2215 2215
2216 2216 return text
2217 2217
2218 2218 def hash(self, text, p1, p2):
2219 2219 """Compute a node hash.
2220 2220
2221 2221 Available as a function so that subclasses can replace the hash
2222 2222 as needed.
2223 2223 """
2224 2224 return hash(text, p1, p2)
2225 2225
2226 2226 def _processflags(self, text, flags, operation, raw=False):
2227 2227 """Inspect revision data flags and applies transforms defined by
2228 2228 registered flag processors.
2229 2229
2230 2230 ``text`` - the revision data to process
2231 2231 ``flags`` - the revision flags
2232 2232 ``operation`` - the operation being performed (read or write)
2233 2233 ``raw`` - an optional argument describing if the raw transform should be
2234 2234 applied.
2235 2235
2236 2236 This method processes the flags in the order (or reverse order if
2237 2237 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
2238 2238 flag processors registered for present flags. The order of flags defined
2239 2239 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
2240 2240
2241 2241 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
2242 2242 processed text and ``validatehash`` is a bool indicating whether the
2243 2243 returned text should be checked for hash integrity.
2244 2244
2245 2245 Note: If the ``raw`` argument is set, it has precedence over the
2246 2246 operation and will only update the value of ``validatehash``.
2247 2247 """
2248 2248 # fast path: no flag processors will run
2249 2249 if flags == 0:
2250 2250 return text, True
2251 2251 if not operation in ('read', 'write'):
2252 2252 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
2253 2253 # Check all flags are known.
2254 2254 if flags & ~REVIDX_KNOWN_FLAGS:
2255 2255 raise RevlogError(_("incompatible revision flag '%#x'") %
2256 2256 (flags & ~REVIDX_KNOWN_FLAGS))
2257 2257 validatehash = True
2258 2258 # Depending on the operation (read or write), the order might be
2259 2259 # reversed due to non-commutative transforms.
2260 2260 orderedflags = REVIDX_FLAGS_ORDER
2261 2261 if operation == 'write':
2262 2262 orderedflags = reversed(orderedflags)
2263 2263
2264 2264 for flag in orderedflags:
2265 2265 # If a flagprocessor has been registered for a known flag, apply the
2266 2266 # related operation transform and update result tuple.
2267 2267 if flag & flags:
2268 2268 vhash = True
2269 2269
2270 2270 if flag not in _flagprocessors:
2271 2271 message = _("missing processor for flag '%#x'") % (flag)
2272 2272 raise RevlogError(message)
2273 2273
2274 2274 processor = _flagprocessors[flag]
2275 2275 if processor is not None:
2276 2276 readtransform, writetransform, rawtransform = processor
2277 2277
2278 2278 if raw:
2279 2279 vhash = rawtransform(self, text)
2280 2280 elif operation == 'read':
2281 2281 text, vhash = readtransform(self, text)
2282 2282 else: # write operation
2283 2283 text, vhash = writetransform(self, text)
2284 2284 validatehash = validatehash and vhash
2285 2285
2286 2286 return text, validatehash
2287 2287
2288 2288 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2289 2289 """Check node hash integrity.
2290 2290
2291 2291 Available as a function so that subclasses can extend hash mismatch
2292 2292 behaviors as needed.
2293 2293 """
2294 2294 try:
2295 2295 if p1 is None and p2 is None:
2296 2296 p1, p2 = self.parents(node)
2297 2297 if node != self.hash(text, p1, p2):
2298 2298 revornode = rev
2299 2299 if revornode is None:
2300 2300 revornode = templatefilters.short(hex(node))
2301 2301 raise RevlogError(_("integrity check failed on %s:%s")
2302 2302 % (self.indexfile, pycompat.bytestr(revornode)))
2303 2303 except RevlogError:
2304 2304 if self._censorable and _censoredtext(text):
2305 2305 raise error.CensoredNodeError(self.indexfile, node, text)
2306 2306 raise
2307 2307
2308 2308 def _enforceinlinesize(self, tr, fp=None):
2309 2309 """Check if the revlog is too big for inline and convert if so.
2310 2310
2311 2311 This should be called after revisions are added to the revlog. If the
2312 2312 revlog has grown too large to be an inline revlog, it will convert it
2313 2313 to use multiple index and data files.
2314 2314 """
2315 2315 tiprev = len(self) - 1
2316 2316 if (not self._inline or
2317 2317 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
2318 2318 return
2319 2319
2320 2320 trinfo = tr.find(self.indexfile)
2321 2321 if trinfo is None:
2322 2322 raise RevlogError(_("%s not found in the transaction")
2323 2323 % self.indexfile)
2324 2324
2325 2325 trindex = trinfo[2]
2326 2326 if trindex is not None:
2327 2327 dataoff = self.start(trindex)
2328 2328 else:
2329 2329 # revlog was stripped at start of transaction, use all leftover data
2330 2330 trindex = len(self) - 1
2331 2331 dataoff = self.end(tiprev)
2332 2332
2333 2333 tr.add(self.datafile, dataoff)
2334 2334
2335 2335 if fp:
2336 2336 fp.flush()
2337 2337 fp.close()
2338 2338
2339 2339 with self._datafp('w') as df:
2340 2340 for r in self:
2341 2341 df.write(self._getsegmentforrevs(r, r)[1])
2342 2342
2343 2343 with self._indexfp('w') as fp:
2344 2344 self.version &= ~FLAG_INLINE_DATA
2345 2345 self._inline = False
2346 2346 io = self._io
2347 2347 for i in self:
2348 2348 e = io.packentry(self.index[i], self.node, self.version, i)
2349 2349 fp.write(e)
2350 2350
2351 2351 # the temp file replace the real index when we exit the context
2352 2352 # manager
2353 2353
2354 2354 tr.replace(self.indexfile, trindex * self._io.size)
2355 2355 self._chunkclear()
2356 2356
2357 2357 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
2358 2358 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
2359 2359 """add a revision to the log
2360 2360
2361 2361 text - the revision data to add
2362 2362 transaction - the transaction object used for rollback
2363 2363 link - the linkrev data to add
2364 2364 p1, p2 - the parent nodeids of the revision
2365 2365 cachedelta - an optional precomputed delta
2366 2366 node - nodeid of revision; typically node is not specified, and it is
2367 2367 computed by default as hash(text, p1, p2), however subclasses might
2368 2368 use different hashing method (and override checkhash() in such case)
2369 2369 flags - the known flags to set on the revision
2370 2370 deltacomputer - an optional _deltacomputer instance shared between
2371 2371 multiple calls
2372 2372 """
2373 2373 if link == nullrev:
2374 2374 raise RevlogError(_("attempted to add linkrev -1 to %s")
2375 2375 % self.indexfile)
2376 2376
2377 2377 if flags:
2378 2378 node = node or self.hash(text, p1, p2)
2379 2379
2380 2380 rawtext, validatehash = self._processflags(text, flags, 'write')
2381 2381
2382 2382 # If the flag processor modifies the revision data, ignore any provided
2383 2383 # cachedelta.
2384 2384 if rawtext != text:
2385 2385 cachedelta = None
2386 2386
2387 2387 if len(rawtext) > _maxentrysize:
2388 2388 raise RevlogError(
2389 2389 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2390 2390 % (self.indexfile, len(rawtext)))
2391 2391
2392 2392 node = node or self.hash(rawtext, p1, p2)
2393 2393 if node in self.nodemap:
2394 2394 return node
2395 2395
2396 2396 if validatehash:
2397 2397 self.checkhash(rawtext, node, p1=p1, p2=p2)
2398 2398
2399 2399 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2400 2400 flags, cachedelta=cachedelta,
2401 2401 deltacomputer=deltacomputer)
2402 2402
2403 2403 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2404 2404 cachedelta=None, deltacomputer=None):
2405 2405 """add a raw revision with known flags, node and parents
2406 2406 useful when reusing a revision not stored in this revlog (ex: received
2407 2407 over wire, or read from an external bundle).
2408 2408 """
2409 2409 dfh = None
2410 2410 if not self._inline:
2411 2411 dfh = self._datafp("a+")
2412 2412 ifh = self._indexfp("a+")
2413 2413 try:
2414 2414 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2415 2415 flags, cachedelta, ifh, dfh,
2416 2416 deltacomputer=deltacomputer)
2417 2417 finally:
2418 2418 if dfh:
2419 2419 dfh.close()
2420 2420 ifh.close()
2421 2421
2422 2422 def compress(self, data):
2423 2423 """Generate a possibly-compressed representation of data."""
2424 2424 if not data:
2425 2425 return '', data
2426 2426
2427 2427 compressed = self._compressor.compress(data)
2428 2428
2429 2429 if compressed:
2430 2430 # The revlog compressor added the header in the returned data.
2431 2431 return '', compressed
2432 2432
2433 2433 if data[0:1] == '\0':
2434 2434 return '', data
2435 2435 return 'u', data
2436 2436
2437 2437 def decompress(self, data):
2438 2438 """Decompress a revlog chunk.
2439 2439
2440 2440 The chunk is expected to begin with a header identifying the
2441 2441 format type so it can be routed to an appropriate decompressor.
2442 2442 """
2443 2443 if not data:
2444 2444 return data
2445 2445
2446 2446 # Revlogs are read much more frequently than they are written and many
2447 2447 # chunks only take microseconds to decompress, so performance is
2448 2448 # important here.
2449 2449 #
2450 2450 # We can make a few assumptions about revlogs:
2451 2451 #
2452 2452 # 1) the majority of chunks will be compressed (as opposed to inline
2453 2453 # raw data).
2454 2454 # 2) decompressing *any* data will likely by at least 10x slower than
2455 2455 # returning raw inline data.
2456 2456 # 3) we want to prioritize common and officially supported compression
2457 2457 # engines
2458 2458 #
2459 2459 # It follows that we want to optimize for "decompress compressed data
2460 2460 # when encoded with common and officially supported compression engines"
2461 2461 # case over "raw data" and "data encoded by less common or non-official
2462 2462 # compression engines." That is why we have the inline lookup first
2463 2463 # followed by the compengines lookup.
2464 2464 #
2465 2465 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2466 2466 # compressed chunks. And this matters for changelog and manifest reads.
2467 2467 t = data[0:1]
2468 2468
2469 2469 if t == 'x':
2470 2470 try:
2471 2471 return _zlibdecompress(data)
2472 2472 except zlib.error as e:
2473 2473 raise RevlogError(_('revlog decompress error: %s') %
2474 2474 stringutil.forcebytestr(e))
2475 2475 # '\0' is more common than 'u' so it goes first.
2476 2476 elif t == '\0':
2477 2477 return data
2478 2478 elif t == 'u':
2479 2479 return util.buffer(data, 1)
2480 2480
2481 2481 try:
2482 2482 compressor = self._decompressors[t]
2483 2483 except KeyError:
2484 2484 try:
2485 2485 engine = util.compengines.forrevlogheader(t)
2486 2486 compressor = engine.revlogcompressor()
2487 2487 self._decompressors[t] = compressor
2488 2488 except KeyError:
2489 2489 raise RevlogError(_('unknown compression type %r') % t)
2490 2490
2491 2491 return compressor.decompress(data)
2492 2492
2493 2493 def _isgooddeltainfo(self, deltainfo, revinfo):
2494 2494 """Returns True if the given delta is good. Good means that it is within
2495 2495 the disk span, disk size, and chain length bounds that we know to be
2496 2496 performant."""
2497 2497 if deltainfo is None:
2498 2498 return False
2499 2499
2500 2500 # - 'deltainfo.distance' is the distance from the base revision --
2501 2501 # bounding it limits the amount of I/O we need to do.
2502 2502 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2503 2503 # deltas we need to apply -- bounding it limits the amount of CPU
2504 2504 # we consume.
2505 2505
2506 2506 if self._sparserevlog:
2507 2507 # As sparse-read will be used, we can consider that the distance,
2508 2508 # instead of being the span of the whole chunk,
2509 2509 # is the span of the largest read chunk
2510 2510 base = deltainfo.base
2511 2511
2512 2512 if base != nullrev:
2513 2513 deltachain = self._deltachain(base)[0]
2514 2514 else:
2515 2515 deltachain = []
2516 2516
2517 # search for the first non-snapshot revision
2518 for idx, r in enumerate(deltachain):
2519 if not self.issnapshot(r):
2520 break
2521 deltachain = deltachain[idx:]
2517 2522 chunks = _slicechunk(self, deltachain, deltainfo)
2518 2523 all_span = [_segmentspan(self, revs, deltainfo) for revs in chunks]
2519 2524 distance = max(all_span)
2520 2525 else:
2521 2526 distance = deltainfo.distance
2522 2527
2523 2528 textlen = revinfo.textlen
2524 2529 defaultmax = textlen * 4
2525 2530 maxdist = self._maxdeltachainspan
2526 2531 if not maxdist:
2527 2532 maxdist = distance # ensure the conditional pass
2528 2533 maxdist = max(maxdist, defaultmax)
2529 2534 if self._sparserevlog and maxdist < self._srmingapsize:
2530 2535 # In multiple place, we are ignoring irrelevant data range below a
2531 2536 # certain size. Be also apply this tradeoff here and relax span
2532 2537 # constraint for small enought content.
2533 2538 maxdist = self._srmingapsize
2534 2539
2535 2540 # Bad delta from read span:
2536 2541 #
2537 2542 # If the span of data read is larger than the maximum allowed.
2538 2543 if maxdist < distance:
2539 2544 return False
2540 2545
2541 2546 # Bad delta from new delta size:
2542 2547 #
2543 2548 # If the delta size is larger than the target text, storing the
2544 2549 # delta will be inefficient.
2545 2550 if textlen < deltainfo.deltalen:
2546 2551 return False
2547 2552
2548 2553 # Bad delta from cumulated payload size:
2549 2554 #
2550 2555 # If the sum of delta get larger than K * target text length.
2551 2556 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
2552 2557 return False
2553 2558
2554 2559 # Bad delta from chain length:
2555 2560 #
2556 2561 # If the number of delta in the chain gets too high.
2557 2562 if self._maxchainlen and self._maxchainlen < deltainfo.chainlen:
2558 2563 return False
2559 2564
2560 2565 # bad delta from intermediate snapshot size limit
2561 2566 #
2562 2567 # If an intermediate snapshot size is higher than the limit. The
2563 2568 # limit exist to prevent endless chain of intermediate delta to be
2564 2569 # created.
2565 2570 if (deltainfo.snapshotdepth is not None and
2566 2571 (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
2567 2572 return False
2568 2573
2569 2574 # bad delta if new intermediate snapshot is larger than the previous
2570 2575 # snapshot
2571 2576 if (deltainfo.snapshotdepth
2572 2577 and self.length(deltainfo.base) < deltainfo.deltalen):
2573 2578 return False
2574 2579
2575 2580 return True
2576 2581
2577 2582 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2578 2583 cachedelta, ifh, dfh, alwayscache=False,
2579 2584 deltacomputer=None):
2580 2585 """internal function to add revisions to the log
2581 2586
2582 2587 see addrevision for argument descriptions.
2583 2588
2584 2589 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2585 2590
2586 2591 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2587 2592 be used.
2588 2593
2589 2594 invariants:
2590 2595 - rawtext is optional (can be None); if not set, cachedelta must be set.
2591 2596 if both are set, they must correspond to each other.
2592 2597 """
2593 2598 if node == nullid:
2594 2599 raise RevlogError(_("%s: attempt to add null revision") %
2595 2600 (self.indexfile))
2596 2601 if node == wdirid or node in wdirfilenodeids:
2597 2602 raise RevlogError(_("%s: attempt to add wdir revision") %
2598 2603 (self.indexfile))
2599 2604
2600 2605 if self._inline:
2601 2606 fh = ifh
2602 2607 else:
2603 2608 fh = dfh
2604 2609
2605 2610 btext = [rawtext]
2606 2611
2607 2612 curr = len(self)
2608 2613 prev = curr - 1
2609 2614 offset = self.end(prev)
2610 2615 p1r, p2r = self.rev(p1), self.rev(p2)
2611 2616
2612 2617 # full versions are inserted when the needed deltas
2613 2618 # become comparable to the uncompressed text
2614 2619 if rawtext is None:
2615 2620 # need rawtext size, before changed by flag processors, which is
2616 2621 # the non-raw size. use revlog explicitly to avoid filelog's extra
2617 2622 # logic that might remove metadata size.
2618 2623 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2619 2624 cachedelta[1])
2620 2625 else:
2621 2626 textlen = len(rawtext)
2622 2627
2623 2628 if deltacomputer is None:
2624 2629 deltacomputer = _deltacomputer(self)
2625 2630
2626 2631 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2627 2632
2628 2633 # no delta for flag processor revision (see "candelta" for why)
2629 2634 # not calling candelta since only one revision needs test, also to
2630 2635 # avoid overhead fetching flags again.
2631 2636 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2632 2637 deltainfo = None
2633 2638 else:
2634 2639 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2635 2640
2636 2641 if deltainfo is not None:
2637 2642 base = deltainfo.base
2638 2643 chainbase = deltainfo.chainbase
2639 2644 data = deltainfo.data
2640 2645 l = deltainfo.deltalen
2641 2646 else:
2642 2647 rawtext = deltacomputer.buildtext(revinfo, fh)
2643 2648 data = self.compress(rawtext)
2644 2649 l = len(data[1]) + len(data[0])
2645 2650 base = chainbase = curr
2646 2651
2647 2652 e = (offset_type(offset, flags), l, textlen,
2648 2653 base, link, p1r, p2r, node)
2649 2654 self.index.append(e)
2650 2655 self.nodemap[node] = curr
2651 2656
2652 2657 entry = self._io.packentry(e, self.node, self.version, curr)
2653 2658 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2654 2659
2655 2660 if alwayscache and rawtext is None:
2656 2661 rawtext = deltacomputer._buildtext(revinfo, fh)
2657 2662
2658 2663 if type(rawtext) == bytes: # only accept immutable objects
2659 2664 self._cache = (node, curr, rawtext)
2660 2665 self._chainbasecache[curr] = chainbase
2661 2666 return node
2662 2667
2663 2668 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2664 2669 # Files opened in a+ mode have inconsistent behavior on various
2665 2670 # platforms. Windows requires that a file positioning call be made
2666 2671 # when the file handle transitions between reads and writes. See
2667 2672 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2668 2673 # platforms, Python or the platform itself can be buggy. Some versions
2669 2674 # of Solaris have been observed to not append at the end of the file
2670 2675 # if the file was seeked to before the end. See issue4943 for more.
2671 2676 #
2672 2677 # We work around this issue by inserting a seek() before writing.
2673 2678 # Note: This is likely not necessary on Python 3.
2674 2679 ifh.seek(0, os.SEEK_END)
2675 2680 if dfh:
2676 2681 dfh.seek(0, os.SEEK_END)
2677 2682
2678 2683 curr = len(self) - 1
2679 2684 if not self._inline:
2680 2685 transaction.add(self.datafile, offset)
2681 2686 transaction.add(self.indexfile, curr * len(entry))
2682 2687 if data[0]:
2683 2688 dfh.write(data[0])
2684 2689 dfh.write(data[1])
2685 2690 ifh.write(entry)
2686 2691 else:
2687 2692 offset += curr * self._io.size
2688 2693 transaction.add(self.indexfile, offset, curr)
2689 2694 ifh.write(entry)
2690 2695 ifh.write(data[0])
2691 2696 ifh.write(data[1])
2692 2697 self._enforceinlinesize(transaction, ifh)
2693 2698
2694 2699 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2695 2700 """
2696 2701 add a delta group
2697 2702
2698 2703 given a set of deltas, add them to the revision log. the
2699 2704 first delta is against its parent, which should be in our
2700 2705 log, the rest are against the previous delta.
2701 2706
2702 2707 If ``addrevisioncb`` is defined, it will be called with arguments of
2703 2708 this revlog and the node that was added.
2704 2709 """
2705 2710
2706 2711 nodes = []
2707 2712
2708 2713 r = len(self)
2709 2714 end = 0
2710 2715 if r:
2711 2716 end = self.end(r - 1)
2712 2717 ifh = self._indexfp("a+")
2713 2718 isize = r * self._io.size
2714 2719 if self._inline:
2715 2720 transaction.add(self.indexfile, end + isize, r)
2716 2721 dfh = None
2717 2722 else:
2718 2723 transaction.add(self.indexfile, isize, r)
2719 2724 transaction.add(self.datafile, end)
2720 2725 dfh = self._datafp("a+")
2721 2726 def flush():
2722 2727 if dfh:
2723 2728 dfh.flush()
2724 2729 ifh.flush()
2725 2730 try:
2726 2731 deltacomputer = _deltacomputer(self)
2727 2732 # loop through our set of deltas
2728 2733 for data in deltas:
2729 2734 node, p1, p2, linknode, deltabase, delta, flags = data
2730 2735 link = linkmapper(linknode)
2731 2736 flags = flags or REVIDX_DEFAULT_FLAGS
2732 2737
2733 2738 nodes.append(node)
2734 2739
2735 2740 if node in self.nodemap:
2736 2741 # this can happen if two branches make the same change
2737 2742 continue
2738 2743
2739 2744 for p in (p1, p2):
2740 2745 if p not in self.nodemap:
2741 2746 raise LookupError(p, self.indexfile,
2742 2747 _('unknown parent'))
2743 2748
2744 2749 if deltabase not in self.nodemap:
2745 2750 raise LookupError(deltabase, self.indexfile,
2746 2751 _('unknown delta base'))
2747 2752
2748 2753 baserev = self.rev(deltabase)
2749 2754
2750 2755 if baserev != nullrev and self.iscensored(baserev):
2751 2756 # if base is censored, delta must be full replacement in a
2752 2757 # single patch operation
2753 2758 hlen = struct.calcsize(">lll")
2754 2759 oldlen = self.rawsize(baserev)
2755 2760 newlen = len(delta) - hlen
2756 2761 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2757 2762 raise error.CensoredBaseError(self.indexfile,
2758 2763 self.node(baserev))
2759 2764
2760 2765 if not flags and self._peek_iscensored(baserev, delta, flush):
2761 2766 flags |= REVIDX_ISCENSORED
2762 2767
2763 2768 # We assume consumers of addrevisioncb will want to retrieve
2764 2769 # the added revision, which will require a call to
2765 2770 # revision(). revision() will fast path if there is a cache
2766 2771 # hit. So, we tell _addrevision() to always cache in this case.
2767 2772 # We're only using addgroup() in the context of changegroup
2768 2773 # generation so the revision data can always be handled as raw
2769 2774 # by the flagprocessor.
2770 2775 self._addrevision(node, None, transaction, link,
2771 2776 p1, p2, flags, (baserev, delta),
2772 2777 ifh, dfh,
2773 2778 alwayscache=bool(addrevisioncb),
2774 2779 deltacomputer=deltacomputer)
2775 2780
2776 2781 if addrevisioncb:
2777 2782 addrevisioncb(self, node)
2778 2783
2779 2784 if not dfh and not self._inline:
2780 2785 # addrevision switched from inline to conventional
2781 2786 # reopen the index
2782 2787 ifh.close()
2783 2788 dfh = self._datafp("a+")
2784 2789 ifh = self._indexfp("a+")
2785 2790 finally:
2786 2791 if dfh:
2787 2792 dfh.close()
2788 2793 ifh.close()
2789 2794
2790 2795 return nodes
2791 2796
2792 2797 def iscensored(self, rev):
2793 2798 """Check if a file revision is censored."""
2794 2799 if not self._censorable:
2795 2800 return False
2796 2801
2797 2802 return self.flags(rev) & REVIDX_ISCENSORED
2798 2803
2799 2804 def _peek_iscensored(self, baserev, delta, flush):
2800 2805 """Quickly check if a delta produces a censored revision."""
2801 2806 if not self._censorable:
2802 2807 return False
2803 2808
2804 2809 # Fragile heuristic: unless new file meta keys are added alphabetically
2805 2810 # preceding "censored", all censored revisions are prefixed by
2806 2811 # "\1\ncensored:". A delta producing such a censored revision must be a
2807 2812 # full-replacement delta, so we inspect the first and only patch in the
2808 2813 # delta for this prefix.
2809 2814 hlen = struct.calcsize(">lll")
2810 2815 if len(delta) <= hlen:
2811 2816 return False
2812 2817
2813 2818 oldlen = self.rawsize(baserev)
2814 2819 newlen = len(delta) - hlen
2815 2820 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2816 2821 return False
2817 2822
2818 2823 add = "\1\ncensored:"
2819 2824 addlen = len(add)
2820 2825 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2821 2826
2822 2827 def getstrippoint(self, minlink):
2823 2828 """find the minimum rev that must be stripped to strip the linkrev
2824 2829
2825 2830 Returns a tuple containing the minimum rev and a set of all revs that
2826 2831 have linkrevs that will be broken by this strip.
2827 2832 """
2828 2833 brokenrevs = set()
2829 2834 strippoint = len(self)
2830 2835
2831 2836 heads = {}
2832 2837 futurelargelinkrevs = set()
2833 2838 for head in self.headrevs():
2834 2839 headlinkrev = self.linkrev(head)
2835 2840 heads[head] = headlinkrev
2836 2841 if headlinkrev >= minlink:
2837 2842 futurelargelinkrevs.add(headlinkrev)
2838 2843
2839 2844 # This algorithm involves walking down the rev graph, starting at the
2840 2845 # heads. Since the revs are topologically sorted according to linkrev,
2841 2846 # once all head linkrevs are below the minlink, we know there are
2842 2847 # no more revs that could have a linkrev greater than minlink.
2843 2848 # So we can stop walking.
2844 2849 while futurelargelinkrevs:
2845 2850 strippoint -= 1
2846 2851 linkrev = heads.pop(strippoint)
2847 2852
2848 2853 if linkrev < minlink:
2849 2854 brokenrevs.add(strippoint)
2850 2855 else:
2851 2856 futurelargelinkrevs.remove(linkrev)
2852 2857
2853 2858 for p in self.parentrevs(strippoint):
2854 2859 if p != nullrev:
2855 2860 plinkrev = self.linkrev(p)
2856 2861 heads[p] = plinkrev
2857 2862 if plinkrev >= minlink:
2858 2863 futurelargelinkrevs.add(plinkrev)
2859 2864
2860 2865 return strippoint, brokenrevs
2861 2866
2862 2867 def strip(self, minlink, transaction):
2863 2868 """truncate the revlog on the first revision with a linkrev >= minlink
2864 2869
2865 2870 This function is called when we're stripping revision minlink and
2866 2871 its descendants from the repository.
2867 2872
2868 2873 We have to remove all revisions with linkrev >= minlink, because
2869 2874 the equivalent changelog revisions will be renumbered after the
2870 2875 strip.
2871 2876
2872 2877 So we truncate the revlog on the first of these revisions, and
2873 2878 trust that the caller has saved the revisions that shouldn't be
2874 2879 removed and that it'll re-add them after this truncation.
2875 2880 """
2876 2881 if len(self) == 0:
2877 2882 return
2878 2883
2879 2884 rev, _ = self.getstrippoint(minlink)
2880 2885 if rev == len(self):
2881 2886 return
2882 2887
2883 2888 # first truncate the files on disk
2884 2889 end = self.start(rev)
2885 2890 if not self._inline:
2886 2891 transaction.add(self.datafile, end)
2887 2892 end = rev * self._io.size
2888 2893 else:
2889 2894 end += rev * self._io.size
2890 2895
2891 2896 transaction.add(self.indexfile, end)
2892 2897
2893 2898 # then reset internal state in memory to forget those revisions
2894 2899 self._cache = None
2895 2900 self._chaininfocache = {}
2896 2901 self._chunkclear()
2897 2902 for x in pycompat.xrange(rev, len(self)):
2898 2903 del self.nodemap[self.node(x)]
2899 2904
2900 2905 del self.index[rev:-1]
2901 2906 self._nodepos = None
2902 2907
2903 2908 def checksize(self):
2904 2909 expected = 0
2905 2910 if len(self):
2906 2911 expected = max(0, self.end(len(self) - 1))
2907 2912
2908 2913 try:
2909 2914 with self._datafp() as f:
2910 2915 f.seek(0, 2)
2911 2916 actual = f.tell()
2912 2917 dd = actual - expected
2913 2918 except IOError as inst:
2914 2919 if inst.errno != errno.ENOENT:
2915 2920 raise
2916 2921 dd = 0
2917 2922
2918 2923 try:
2919 2924 f = self.opener(self.indexfile)
2920 2925 f.seek(0, 2)
2921 2926 actual = f.tell()
2922 2927 f.close()
2923 2928 s = self._io.size
2924 2929 i = max(0, actual // s)
2925 2930 di = actual - (i * s)
2926 2931 if self._inline:
2927 2932 databytes = 0
2928 2933 for r in self:
2929 2934 databytes += max(0, self.length(r))
2930 2935 dd = 0
2931 2936 di = actual - len(self) * s - databytes
2932 2937 except IOError as inst:
2933 2938 if inst.errno != errno.ENOENT:
2934 2939 raise
2935 2940 di = 0
2936 2941
2937 2942 return (dd, di)
2938 2943
2939 2944 def files(self):
2940 2945 res = [self.indexfile]
2941 2946 if not self._inline:
2942 2947 res.append(self.datafile)
2943 2948 return res
2944 2949
2945 2950 DELTAREUSEALWAYS = 'always'
2946 2951 DELTAREUSESAMEREVS = 'samerevs'
2947 2952 DELTAREUSENEVER = 'never'
2948 2953
2949 2954 DELTAREUSEFULLADD = 'fulladd'
2950 2955
2951 2956 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2952 2957
2953 2958 def clone(self, tr, destrevlog, addrevisioncb=None,
2954 2959 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2955 2960 """Copy this revlog to another, possibly with format changes.
2956 2961
2957 2962 The destination revlog will contain the same revisions and nodes.
2958 2963 However, it may not be bit-for-bit identical due to e.g. delta encoding
2959 2964 differences.
2960 2965
2961 2966 The ``deltareuse`` argument control how deltas from the existing revlog
2962 2967 are preserved in the destination revlog. The argument can have the
2963 2968 following values:
2964 2969
2965 2970 DELTAREUSEALWAYS
2966 2971 Deltas will always be reused (if possible), even if the destination
2967 2972 revlog would not select the same revisions for the delta. This is the
2968 2973 fastest mode of operation.
2969 2974 DELTAREUSESAMEREVS
2970 2975 Deltas will be reused if the destination revlog would pick the same
2971 2976 revisions for the delta. This mode strikes a balance between speed
2972 2977 and optimization.
2973 2978 DELTAREUSENEVER
2974 2979 Deltas will never be reused. This is the slowest mode of execution.
2975 2980 This mode can be used to recompute deltas (e.g. if the diff/delta
2976 2981 algorithm changes).
2977 2982
2978 2983 Delta computation can be slow, so the choice of delta reuse policy can
2979 2984 significantly affect run time.
2980 2985
2981 2986 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2982 2987 two extremes. Deltas will be reused if they are appropriate. But if the
2983 2988 delta could choose a better revision, it will do so. This means if you
2984 2989 are converting a non-generaldelta revlog to a generaldelta revlog,
2985 2990 deltas will be recomputed if the delta's parent isn't a parent of the
2986 2991 revision.
2987 2992
2988 2993 In addition to the delta policy, the ``deltabothparents`` argument
2989 2994 controls whether to compute deltas against both parents for merges.
2990 2995 By default, the current default is used.
2991 2996 """
2992 2997 if deltareuse not in self.DELTAREUSEALL:
2993 2998 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2994 2999
2995 3000 if len(destrevlog):
2996 3001 raise ValueError(_('destination revlog is not empty'))
2997 3002
2998 3003 if getattr(self, 'filteredrevs', None):
2999 3004 raise ValueError(_('source revlog has filtered revisions'))
3000 3005 if getattr(destrevlog, 'filteredrevs', None):
3001 3006 raise ValueError(_('destination revlog has filtered revisions'))
3002 3007
3003 3008 # lazydeltabase controls whether to reuse a cached delta, if possible.
3004 3009 oldlazydeltabase = destrevlog._lazydeltabase
3005 3010 oldamd = destrevlog._deltabothparents
3006 3011
3007 3012 try:
3008 3013 if deltareuse == self.DELTAREUSEALWAYS:
3009 3014 destrevlog._lazydeltabase = True
3010 3015 elif deltareuse == self.DELTAREUSESAMEREVS:
3011 3016 destrevlog._lazydeltabase = False
3012 3017
3013 3018 destrevlog._deltabothparents = deltabothparents or oldamd
3014 3019
3015 3020 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
3016 3021 self.DELTAREUSESAMEREVS)
3017 3022
3018 3023 deltacomputer = _deltacomputer(destrevlog)
3019 3024 index = self.index
3020 3025 for rev in self:
3021 3026 entry = index[rev]
3022 3027
3023 3028 # Some classes override linkrev to take filtered revs into
3024 3029 # account. Use raw entry from index.
3025 3030 flags = entry[0] & 0xffff
3026 3031 linkrev = entry[4]
3027 3032 p1 = index[entry[5]][7]
3028 3033 p2 = index[entry[6]][7]
3029 3034 node = entry[7]
3030 3035
3031 3036 # (Possibly) reuse the delta from the revlog if allowed and
3032 3037 # the revlog chunk is a delta.
3033 3038 cachedelta = None
3034 3039 rawtext = None
3035 3040 if populatecachedelta:
3036 3041 dp = self.deltaparent(rev)
3037 3042 if dp != nullrev:
3038 3043 cachedelta = (dp, bytes(self._chunk(rev)))
3039 3044
3040 3045 if not cachedelta:
3041 3046 rawtext = self.revision(rev, raw=True)
3042 3047
3043 3048
3044 3049 if deltareuse == self.DELTAREUSEFULLADD:
3045 3050 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
3046 3051 cachedelta=cachedelta,
3047 3052 node=node, flags=flags,
3048 3053 deltacomputer=deltacomputer)
3049 3054 else:
3050 3055 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
3051 3056 checkambig=False)
3052 3057 dfh = None
3053 3058 if not destrevlog._inline:
3054 3059 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
3055 3060 try:
3056 3061 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
3057 3062 p2, flags, cachedelta, ifh, dfh,
3058 3063 deltacomputer=deltacomputer)
3059 3064 finally:
3060 3065 if dfh:
3061 3066 dfh.close()
3062 3067 ifh.close()
3063 3068
3064 3069 if addrevisioncb:
3065 3070 addrevisioncb(self, rev, node)
3066 3071 finally:
3067 3072 destrevlog._lazydeltabase = oldlazydeltabase
3068 3073 destrevlog._deltabothparents = oldamd
General Comments 0
You need to be logged in to leave comments. Login now