##// END OF EJS Templates
revlog: also detect intermediate snapshots...
Paul Morelle -
r39186:f39efa88 default
parent child Browse files
Show More
@@ -1,3030 +1,3033 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import hashlib
20 20 import heapq
21 21 import os
22 22 import re
23 23 import struct
24 24 import zlib
25 25
26 26 # import stuff from node for others to import from revlog
27 27 from .node import (
28 28 bin,
29 29 hex,
30 30 nullid,
31 31 nullrev,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .thirdparty import (
39 39 attr,
40 40 )
41 41 from . import (
42 42 ancestor,
43 43 error,
44 44 mdiff,
45 45 policy,
46 46 pycompat,
47 47 templatefilters,
48 48 util,
49 49 )
50 50 from .utils import (
51 51 stringutil,
52 52 )
53 53
54 54 parsers = policy.importmod(r'parsers')
55 55
56 56 # Aliased for performance.
57 57 _zlibdecompress = zlib.decompress
58 58
59 59 # revlog header flags
60 60 REVLOGV0 = 0
61 61 REVLOGV1 = 1
62 62 # Dummy value until file format is finalized.
63 63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
64 64 REVLOGV2 = 0xDEAD
65 65 FLAG_INLINE_DATA = (1 << 16)
66 66 FLAG_GENERALDELTA = (1 << 17)
67 67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
68 68 REVLOG_DEFAULT_FORMAT = REVLOGV1
69 69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
70 70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
71 71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
72 72
73 73 # revlog index flags
74 74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
75 75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
76 76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
77 77 REVIDX_DEFAULT_FLAGS = 0
78 78 # stable order in which flags need to be processed and their processors applied
79 79 REVIDX_FLAGS_ORDER = [
80 80 REVIDX_ISCENSORED,
81 81 REVIDX_ELLIPSIS,
82 82 REVIDX_EXTSTORED,
83 83 ]
84 84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
85 85 # bitmark for flags that could cause rawdata content change
86 86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
87 87
88 88 # max size of revlog with inline data
89 89 _maxinline = 131072
90 90 _chunksize = 1048576
91 91
92 92 RevlogError = error.RevlogError
93 93 LookupError = error.LookupError
94 94 AmbiguousPrefixLookupError = error.AmbiguousPrefixLookupError
95 95 CensoredNodeError = error.CensoredNodeError
96 96 ProgrammingError = error.ProgrammingError
97 97
98 98 # Store flag processors (cf. 'addflagprocessor()' to register)
99 99 _flagprocessors = {
100 100 REVIDX_ISCENSORED: None,
101 101 }
102 102
103 103 _mdre = re.compile('\1\n')
104 104 def parsemeta(text):
105 105 """return (metadatadict, metadatasize)"""
106 106 # text can be buffer, so we can't use .startswith or .index
107 107 if text[:2] != '\1\n':
108 108 return None, None
109 109 s = _mdre.search(text, 2).start()
110 110 mtext = text[2:s]
111 111 meta = {}
112 112 for l in mtext.splitlines():
113 113 k, v = l.split(": ", 1)
114 114 meta[k] = v
115 115 return meta, (s + 2)
116 116
117 117 def packmeta(meta, text):
118 118 keys = sorted(meta)
119 119 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
120 120 return "\1\n%s\1\n%s" % (metatext, text)
121 121
122 122 def _censoredtext(text):
123 123 m, offs = parsemeta(text)
124 124 return m and "censored" in m
125 125
126 126 def addflagprocessor(flag, processor):
127 127 """Register a flag processor on a revision data flag.
128 128
129 129 Invariant:
130 130 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
131 131 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
132 132 - Only one flag processor can be registered on a specific flag.
133 133 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
134 134 following signatures:
135 135 - (read) f(self, rawtext) -> text, bool
136 136 - (write) f(self, text) -> rawtext, bool
137 137 - (raw) f(self, rawtext) -> bool
138 138 "text" is presented to the user. "rawtext" is stored in revlog data, not
139 139 directly visible to the user.
140 140 The boolean returned by these transforms is used to determine whether
141 141 the returned text can be used for hash integrity checking. For example,
142 142 if "write" returns False, then "text" is used to generate hash. If
143 143 "write" returns True, that basically means "rawtext" returned by "write"
144 144 should be used to generate hash. Usually, "write" and "read" return
145 145 different booleans. And "raw" returns a same boolean as "write".
146 146
147 147 Note: The 'raw' transform is used for changegroup generation and in some
148 148 debug commands. In this case the transform only indicates whether the
149 149 contents can be used for hash integrity checks.
150 150 """
151 151 if not flag & REVIDX_KNOWN_FLAGS:
152 152 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
153 153 raise ProgrammingError(msg)
154 154 if flag not in REVIDX_FLAGS_ORDER:
155 155 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
156 156 raise ProgrammingError(msg)
157 157 if flag in _flagprocessors:
158 158 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
159 159 raise error.Abort(msg)
160 160 _flagprocessors[flag] = processor
161 161
162 162 def getoffset(q):
163 163 return int(q >> 16)
164 164
165 165 def gettype(q):
166 166 return int(q & 0xFFFF)
167 167
168 168 def offset_type(offset, type):
169 169 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
170 170 raise ValueError('unknown revlog index flags')
171 171 return int(int(offset) << 16 | type)
172 172
173 173 _nullhash = hashlib.sha1(nullid)
174 174
175 175 def hash(text, p1, p2):
176 176 """generate a hash from the given text and its parent hashes
177 177
178 178 This hash combines both the current file contents and its history
179 179 in a manner that makes it easy to distinguish nodes with the same
180 180 content in the revision graph.
181 181 """
182 182 # As of now, if one of the parent node is null, p2 is null
183 183 if p2 == nullid:
184 184 # deep copy of a hash is faster than creating one
185 185 s = _nullhash.copy()
186 186 s.update(p1)
187 187 else:
188 188 # none of the parent nodes are nullid
189 189 if p1 < p2:
190 190 a = p1
191 191 b = p2
192 192 else:
193 193 a = p2
194 194 b = p1
195 195 s = hashlib.sha1(a)
196 196 s.update(b)
197 197 s.update(text)
198 198 return s.digest()
199 199
200 200 class _testrevlog(object):
201 201 """minimalist fake revlog to use in doctests"""
202 202
203 203 def __init__(self, data, density=0.5, mingap=0):
204 204 """data is an list of revision payload boundaries"""
205 205 self._data = data
206 206 self._srdensitythreshold = density
207 207 self._srmingapsize = mingap
208 208
209 209 def start(self, rev):
210 210 if rev == 0:
211 211 return 0
212 212 return self._data[rev - 1]
213 213
214 214 def end(self, rev):
215 215 return self._data[rev]
216 216
217 217 def length(self, rev):
218 218 return self.end(rev) - self.start(rev)
219 219
220 220 def __len__(self):
221 221 return len(self._data)
222 222
223 223 def _trimchunk(revlog, revs, startidx, endidx=None):
224 224 """returns revs[startidx:endidx] without empty trailing revs
225 225
226 226 Doctest Setup
227 227 >>> revlog = _testrevlog([
228 228 ... 5, #0
229 229 ... 10, #1
230 230 ... 12, #2
231 231 ... 12, #3 (empty)
232 232 ... 17, #4
233 233 ... 21, #5
234 234 ... 21, #6 (empty)
235 235 ... ])
236 236
237 237 Contiguous cases:
238 238 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
239 239 [0, 1, 2, 3, 4, 5]
240 240 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
241 241 [0, 1, 2, 3, 4]
242 242 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
243 243 [0, 1, 2]
244 244 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
245 245 [2]
246 246 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
247 247 [3, 4, 5]
248 248 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
249 249 [3, 4]
250 250
251 251 Discontiguous cases:
252 252 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
253 253 [1, 3, 5]
254 254 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
255 255 [1]
256 256 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
257 257 [3, 5]
258 258 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
259 259 [3, 5]
260 260 """
261 261 length = revlog.length
262 262
263 263 if endidx is None:
264 264 endidx = len(revs)
265 265
266 266 # If we have a non-emtpy delta candidate, there are nothing to trim
267 267 if revs[endidx - 1] < len(revlog):
268 268 # Trim empty revs at the end, except the very first revision of a chain
269 269 while (endidx > 1
270 270 and endidx > startidx
271 271 and length(revs[endidx - 1]) == 0):
272 272 endidx -= 1
273 273
274 274 return revs[startidx:endidx]
275 275
276 276 def _segmentspan(revlog, revs, deltainfo=None):
277 277 """Get the byte span of a segment of revisions
278 278
279 279 revs is a sorted array of revision numbers
280 280
281 281 >>> revlog = _testrevlog([
282 282 ... 5, #0
283 283 ... 10, #1
284 284 ... 12, #2
285 285 ... 12, #3 (empty)
286 286 ... 17, #4
287 287 ... ])
288 288
289 289 >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
290 290 17
291 291 >>> _segmentspan(revlog, [0, 4])
292 292 17
293 293 >>> _segmentspan(revlog, [3, 4])
294 294 5
295 295 >>> _segmentspan(revlog, [1, 2, 3,])
296 296 7
297 297 >>> _segmentspan(revlog, [1, 3])
298 298 7
299 299 """
300 300 if not revs:
301 301 return 0
302 302 if deltainfo is not None and len(revlog) <= revs[-1]:
303 303 if len(revs) == 1:
304 304 return deltainfo.deltalen
305 305 offset = revlog.end(len(revlog) - 1)
306 306 end = deltainfo.deltalen + offset
307 307 else:
308 308 end = revlog.end(revs[-1])
309 309 return end - revlog.start(revs[0])
310 310
311 311 def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):
312 312 """slice revs to reduce the amount of unrelated data to be read from disk.
313 313
314 314 ``revs`` is sliced into groups that should be read in one time.
315 315 Assume that revs are sorted.
316 316
317 317 The initial chunk is sliced until the overall density (payload/chunks-span
318 318 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
319 319 `revlog._srmingapsize` is skipped.
320 320
321 321 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
322 322 For consistency with other slicing choice, this limit won't go lower than
323 323 `revlog._srmingapsize`.
324 324
325 325 If individual revisions chunk are larger than this limit, they will still
326 326 be raised individually.
327 327
328 328 >>> revlog = _testrevlog([
329 329 ... 5, #00 (5)
330 330 ... 10, #01 (5)
331 331 ... 12, #02 (2)
332 332 ... 12, #03 (empty)
333 333 ... 27, #04 (15)
334 334 ... 31, #05 (4)
335 335 ... 31, #06 (empty)
336 336 ... 42, #07 (11)
337 337 ... 47, #08 (5)
338 338 ... 47, #09 (empty)
339 339 ... 48, #10 (1)
340 340 ... 51, #11 (3)
341 341 ... 74, #12 (23)
342 342 ... 85, #13 (11)
343 343 ... 86, #14 (1)
344 344 ... 91, #15 (5)
345 345 ... ])
346 346
347 347 >>> list(_slicechunk(revlog, list(range(16))))
348 348 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
349 349 >>> list(_slicechunk(revlog, [0, 15]))
350 350 [[0], [15]]
351 351 >>> list(_slicechunk(revlog, [0, 11, 15]))
352 352 [[0], [11], [15]]
353 353 >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
354 354 [[0], [11, 13, 15]]
355 355 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
356 356 [[1, 2], [5, 8, 10, 11], [14]]
357 357
358 358 Slicing with a maximum chunk size
359 359 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
360 360 [[0], [11], [13], [15]]
361 361 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
362 362 [[0], [11], [13, 15]]
363 363 """
364 364 if targetsize is not None:
365 365 targetsize = max(targetsize, revlog._srmingapsize)
366 366 # targetsize should not be specified when evaluating delta candidates:
367 367 # * targetsize is used to ensure we stay within specification when reading,
368 368 # * deltainfo is used to pick are good delta chain when writing.
369 369 if not (deltainfo is None or targetsize is None):
370 370 msg = 'cannot use `targetsize` with a `deltainfo`'
371 371 raise error.ProgrammingError(msg)
372 372 for chunk in _slicechunktodensity(revlog, revs,
373 373 deltainfo,
374 374 revlog._srdensitythreshold,
375 375 revlog._srmingapsize):
376 376 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
377 377 yield subchunk
378 378
379 379 def _slicechunktosize(revlog, revs, targetsize=None):
380 380 """slice revs to match the target size
381 381
382 382 This is intended to be used on chunk that density slicing selected by that
383 383 are still too large compared to the read garantee of revlog. This might
384 384 happens when "minimal gap size" interrupted the slicing or when chain are
385 385 built in a way that create large blocks next to each other.
386 386
387 387 >>> revlog = _testrevlog([
388 388 ... 3, #0 (3)
389 389 ... 5, #1 (2)
390 390 ... 6, #2 (1)
391 391 ... 8, #3 (2)
392 392 ... 8, #4 (empty)
393 393 ... 11, #5 (3)
394 394 ... 12, #6 (1)
395 395 ... 13, #7 (1)
396 396 ... 14, #8 (1)
397 397 ... ])
398 398
399 399 Cases where chunk is already small enough
400 400 >>> list(_slicechunktosize(revlog, [0], 3))
401 401 [[0]]
402 402 >>> list(_slicechunktosize(revlog, [6, 7], 3))
403 403 [[6, 7]]
404 404 >>> list(_slicechunktosize(revlog, [0], None))
405 405 [[0]]
406 406 >>> list(_slicechunktosize(revlog, [6, 7], None))
407 407 [[6, 7]]
408 408
409 409 cases where we need actual slicing
410 410 >>> list(_slicechunktosize(revlog, [0, 1], 3))
411 411 [[0], [1]]
412 412 >>> list(_slicechunktosize(revlog, [1, 3], 3))
413 413 [[1], [3]]
414 414 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
415 415 [[1, 2], [3]]
416 416 >>> list(_slicechunktosize(revlog, [3, 5], 3))
417 417 [[3], [5]]
418 418 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
419 419 [[3], [5]]
420 420 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
421 421 [[5], [6, 7, 8]]
422 422 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
423 423 [[0], [1, 2], [3], [5], [6, 7, 8]]
424 424
425 425 Case with too large individual chunk (must return valid chunk)
426 426 >>> list(_slicechunktosize(revlog, [0, 1], 2))
427 427 [[0], [1]]
428 428 >>> list(_slicechunktosize(revlog, [1, 3], 1))
429 429 [[1], [3]]
430 430 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
431 431 [[3], [5]]
432 432 """
433 433 assert targetsize is None or 0 <= targetsize
434 434 if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
435 435 yield revs
436 436 return
437 437
438 438 startrevidx = 0
439 439 startdata = revlog.start(revs[0])
440 440 endrevidx = 0
441 441 iterrevs = enumerate(revs)
442 442 next(iterrevs) # skip first rev.
443 443 for idx, r in iterrevs:
444 444 span = revlog.end(r) - startdata
445 445 if span <= targetsize:
446 446 endrevidx = idx
447 447 else:
448 448 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
449 449 if chunk:
450 450 yield chunk
451 451 startrevidx = idx
452 452 startdata = revlog.start(r)
453 453 endrevidx = idx
454 454 yield _trimchunk(revlog, revs, startrevidx)
455 455
456 456 def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
457 457 mingapsize=0):
458 458 """slice revs to reduce the amount of unrelated data to be read from disk.
459 459
460 460 ``revs`` is sliced into groups that should be read in one time.
461 461 Assume that revs are sorted.
462 462
463 463 ``deltainfo`` is a _deltainfo instance of a revision that we would append
464 464 to the top of the revlog.
465 465
466 466 The initial chunk is sliced until the overall density (payload/chunks-span
467 467 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
468 468 skipped.
469 469
470 470 >>> revlog = _testrevlog([
471 471 ... 5, #00 (5)
472 472 ... 10, #01 (5)
473 473 ... 12, #02 (2)
474 474 ... 12, #03 (empty)
475 475 ... 27, #04 (15)
476 476 ... 31, #05 (4)
477 477 ... 31, #06 (empty)
478 478 ... 42, #07 (11)
479 479 ... 47, #08 (5)
480 480 ... 47, #09 (empty)
481 481 ... 48, #10 (1)
482 482 ... 51, #11 (3)
483 483 ... 74, #12 (23)
484 484 ... 85, #13 (11)
485 485 ... 86, #14 (1)
486 486 ... 91, #15 (5)
487 487 ... ])
488 488
489 489 >>> list(_slicechunktodensity(revlog, list(range(16))))
490 490 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
491 491 >>> list(_slicechunktodensity(revlog, [0, 15]))
492 492 [[0], [15]]
493 493 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
494 494 [[0], [11], [15]]
495 495 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
496 496 [[0], [11, 13, 15]]
497 497 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
498 498 [[1, 2], [5, 8, 10, 11], [14]]
499 499 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
500 500 ... mingapsize=20))
501 501 [[1, 2, 3, 5, 8, 10, 11], [14]]
502 502 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
503 503 ... targetdensity=0.95))
504 504 [[1, 2], [5], [8, 10, 11], [14]]
505 505 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
506 506 ... targetdensity=0.95, mingapsize=12))
507 507 [[1, 2], [5, 8, 10, 11], [14]]
508 508 """
509 509 start = revlog.start
510 510 length = revlog.length
511 511
512 512 if len(revs) <= 1:
513 513 yield revs
514 514 return
515 515
516 516 nextrev = len(revlog)
517 517 nextoffset = revlog.end(nextrev - 1)
518 518
519 519 if deltainfo is None:
520 520 deltachainspan = _segmentspan(revlog, revs)
521 521 chainpayload = sum(length(r) for r in revs)
522 522 else:
523 523 deltachainspan = deltainfo.distance
524 524 chainpayload = deltainfo.compresseddeltalen
525 525
526 526 if deltachainspan < mingapsize:
527 527 yield revs
528 528 return
529 529
530 530 readdata = deltachainspan
531 531
532 532 if deltachainspan:
533 533 density = chainpayload / float(deltachainspan)
534 534 else:
535 535 density = 1.0
536 536
537 537 if density >= targetdensity:
538 538 yield revs
539 539 return
540 540
541 541 if deltainfo is not None and deltainfo.deltalen:
542 542 revs = list(revs)
543 543 revs.append(nextrev)
544 544
545 545 # Store the gaps in a heap to have them sorted by decreasing size
546 546 gapsheap = []
547 547 heapq.heapify(gapsheap)
548 548 prevend = None
549 549 for i, rev in enumerate(revs):
550 550 if rev < nextrev:
551 551 revstart = start(rev)
552 552 revlen = length(rev)
553 553 else:
554 554 revstart = nextoffset
555 555 revlen = deltainfo.deltalen
556 556
557 557 # Skip empty revisions to form larger holes
558 558 if revlen == 0:
559 559 continue
560 560
561 561 if prevend is not None:
562 562 gapsize = revstart - prevend
563 563 # only consider holes that are large enough
564 564 if gapsize > mingapsize:
565 565 heapq.heappush(gapsheap, (-gapsize, i))
566 566
567 567 prevend = revstart + revlen
568 568
569 569 # Collect the indices of the largest holes until the density is acceptable
570 570 indicesheap = []
571 571 heapq.heapify(indicesheap)
572 572 while gapsheap and density < targetdensity:
573 573 oppgapsize, gapidx = heapq.heappop(gapsheap)
574 574
575 575 heapq.heappush(indicesheap, gapidx)
576 576
577 577 # the gap sizes are stored as negatives to be sorted decreasingly
578 578 # by the heap
579 579 readdata -= (-oppgapsize)
580 580 if readdata > 0:
581 581 density = chainpayload / float(readdata)
582 582 else:
583 583 density = 1.0
584 584
585 585 # Cut the revs at collected indices
586 586 previdx = 0
587 587 while indicesheap:
588 588 idx = heapq.heappop(indicesheap)
589 589
590 590 chunk = _trimchunk(revlog, revs, previdx, idx)
591 591 if chunk:
592 592 yield chunk
593 593
594 594 previdx = idx
595 595
596 596 chunk = _trimchunk(revlog, revs, previdx)
597 597 if chunk:
598 598 yield chunk
599 599
600 600 @attr.s(slots=True, frozen=True)
601 601 class _deltainfo(object):
602 602 distance = attr.ib()
603 603 deltalen = attr.ib()
604 604 data = attr.ib()
605 605 base = attr.ib()
606 606 chainbase = attr.ib()
607 607 chainlen = attr.ib()
608 608 compresseddeltalen = attr.ib()
609 609
610 610 class _deltacomputer(object):
611 611 def __init__(self, revlog):
612 612 self.revlog = revlog
613 613
614 614 def _getcandidaterevs(self, p1, p2, cachedelta):
615 615 """
616 616 Provides revisions that present an interest to be diffed against,
617 617 grouped by level of easiness.
618 618 """
619 619 revlog = self.revlog
620 620 gdelta = revlog._generaldelta
621 621 curr = len(revlog)
622 622 prev = curr - 1
623 623 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
624 624
625 625 # should we try to build a delta?
626 626 if prev != nullrev and revlog.storedeltachains:
627 627 tested = set()
628 628 # This condition is true most of the time when processing
629 629 # changegroup data into a generaldelta repo. The only time it
630 630 # isn't true is if this is the first revision in a delta chain
631 631 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
632 632 if cachedelta and gdelta and revlog._lazydeltabase:
633 633 # Assume what we received from the server is a good choice
634 634 # build delta will reuse the cache
635 635 yield (cachedelta[0],)
636 636 tested.add(cachedelta[0])
637 637
638 638 if gdelta:
639 639 # exclude already lazy tested base if any
640 640 parents = [p for p in (p1r, p2r)
641 641 if p != nullrev and p not in tested]
642 642
643 643 if not revlog._deltabothparents and len(parents) == 2:
644 644 parents.sort()
645 645 # To minimize the chance of having to build a fulltext,
646 646 # pick first whichever parent is closest to us (max rev)
647 647 yield (parents[1],)
648 648 # then the other one (min rev) if the first did not fit
649 649 yield (parents[0],)
650 650 tested.update(parents)
651 651 elif len(parents) > 0:
652 652 # Test all parents (1 or 2), and keep the best candidate
653 653 yield parents
654 654 tested.update(parents)
655 655
656 656 if prev not in tested:
657 657 # other approach failed try against prev to hopefully save us a
658 658 # fulltext.
659 659 yield (prev,)
660 660 tested.add(prev)
661 661
662 662 def buildtext(self, revinfo, fh):
663 663 """Builds a fulltext version of a revision
664 664
665 665 revinfo: _revisioninfo instance that contains all needed info
666 666 fh: file handle to either the .i or the .d revlog file,
667 667 depending on whether it is inlined or not
668 668 """
669 669 btext = revinfo.btext
670 670 if btext[0] is not None:
671 671 return btext[0]
672 672
673 673 revlog = self.revlog
674 674 cachedelta = revinfo.cachedelta
675 675 flags = revinfo.flags
676 676 node = revinfo.node
677 677
678 678 baserev = cachedelta[0]
679 679 delta = cachedelta[1]
680 680 # special case deltas which replace entire base; no need to decode
681 681 # base revision. this neatly avoids censored bases, which throw when
682 682 # they're decoded.
683 683 hlen = struct.calcsize(">lll")
684 684 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
685 685 len(delta) - hlen):
686 686 btext[0] = delta[hlen:]
687 687 else:
688 688 # deltabase is rawtext before changed by flag processors, which is
689 689 # equivalent to non-raw text
690 690 basetext = revlog.revision(baserev, _df=fh, raw=False)
691 691 btext[0] = mdiff.patch(basetext, delta)
692 692
693 693 try:
694 694 res = revlog._processflags(btext[0], flags, 'read', raw=True)
695 695 btext[0], validatehash = res
696 696 if validatehash:
697 697 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
698 698 if flags & REVIDX_ISCENSORED:
699 699 raise RevlogError(_('node %s is not censored') % node)
700 700 except CensoredNodeError:
701 701 # must pass the censored index flag to add censored revisions
702 702 if not flags & REVIDX_ISCENSORED:
703 703 raise
704 704 return btext[0]
705 705
706 706 def _builddeltadiff(self, base, revinfo, fh):
707 707 revlog = self.revlog
708 708 t = self.buildtext(revinfo, fh)
709 709 if revlog.iscensored(base):
710 710 # deltas based on a censored revision must replace the
711 711 # full content in one patch, so delta works everywhere
712 712 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
713 713 delta = header + t
714 714 else:
715 715 ptext = revlog.revision(base, _df=fh, raw=True)
716 716 delta = mdiff.textdiff(ptext, t)
717 717
718 718 return delta
719 719
720 720 def _builddeltainfo(self, revinfo, base, fh):
721 721 # can we use the cached delta?
722 722 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
723 723 delta = revinfo.cachedelta[1]
724 724 else:
725 725 delta = self._builddeltadiff(base, revinfo, fh)
726 726 revlog = self.revlog
727 727 header, data = revlog.compress(delta)
728 728 deltalen = len(header) + len(data)
729 729 chainbase = revlog.chainbase(base)
730 730 offset = revlog.end(len(revlog) - 1)
731 731 dist = deltalen + offset - revlog.start(chainbase)
732 732 if revlog._generaldelta:
733 733 deltabase = base
734 734 else:
735 735 deltabase = chainbase
736 736 chainlen, compresseddeltalen = revlog._chaininfo(base)
737 737 chainlen += 1
738 738 compresseddeltalen += deltalen
739 739 return _deltainfo(dist, deltalen, (header, data), deltabase,
740 740 chainbase, chainlen, compresseddeltalen)
741 741
742 742 def finddeltainfo(self, revinfo, fh):
743 743 """Find an acceptable delta against a candidate revision
744 744
745 745 revinfo: information about the revision (instance of _revisioninfo)
746 746 fh: file handle to either the .i or the .d revlog file,
747 747 depending on whether it is inlined or not
748 748
749 749 Returns the first acceptable candidate revision, as ordered by
750 750 _getcandidaterevs
751 751 """
752 752 if not revinfo.textlen:
753 753 return None # empty file do not need delta
754 754
755 755 cachedelta = revinfo.cachedelta
756 756 p1 = revinfo.p1
757 757 p2 = revinfo.p2
758 758 revlog = self.revlog
759 759
760 760 deltalength = self.revlog.length
761 761 deltaparent = self.revlog.deltaparent
762 762
763 763 deltainfo = None
764 764 deltas_limit = revinfo.textlen * LIMIT_DELTA2TEXT
765 765 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
766 766 # filter out delta base that will never produce good delta
767 767 candidaterevs = [r for r in candidaterevs
768 768 if self.revlog.length(r) <= deltas_limit]
769 769 nominateddeltas = []
770 770 for candidaterev in candidaterevs:
771 771 # skip over empty delta (no need to include them in a chain)
772 772 while candidaterev != nullrev and not deltalength(candidaterev):
773 773 candidaterev = deltaparent(candidaterev)
774 774 # no need to try a delta against nullid, this will be handled
775 775 # by fulltext later.
776 776 if candidaterev == nullrev:
777 777 continue
778 778 # no delta for rawtext-changing revs (see "candelta" for why)
779 779 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
780 780 continue
781 781 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
782 782 if revlog._isgooddeltainfo(candidatedelta, revinfo):
783 783 nominateddeltas.append(candidatedelta)
784 784 if nominateddeltas:
785 785 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
786 786 break
787 787
788 788 return deltainfo
789 789
790 790 @attr.s(slots=True, frozen=True)
791 791 class _revisioninfo(object):
792 792 """Information about a revision that allows building its fulltext
793 793 node: expected hash of the revision
794 794 p1, p2: parent revs of the revision
795 795 btext: built text cache consisting of a one-element list
796 796 cachedelta: (baserev, uncompressed_delta) or None
797 797 flags: flags associated to the revision storage
798 798
799 799 One of btext[0] or cachedelta must be set.
800 800 """
801 801 node = attr.ib()
802 802 p1 = attr.ib()
803 803 p2 = attr.ib()
804 804 btext = attr.ib()
805 805 textlen = attr.ib()
806 806 cachedelta = attr.ib()
807 807 flags = attr.ib()
808 808
809 809 # index v0:
810 810 # 4 bytes: offset
811 811 # 4 bytes: compressed length
812 812 # 4 bytes: base rev
813 813 # 4 bytes: link rev
814 814 # 20 bytes: parent 1 nodeid
815 815 # 20 bytes: parent 2 nodeid
816 816 # 20 bytes: nodeid
817 817 indexformatv0 = struct.Struct(">4l20s20s20s")
818 818 indexformatv0_pack = indexformatv0.pack
819 819 indexformatv0_unpack = indexformatv0.unpack
820 820
821 821 class revlogoldindex(list):
822 822 def __getitem__(self, i):
823 823 if i == -1:
824 824 return (0, 0, 0, -1, -1, -1, -1, nullid)
825 825 return list.__getitem__(self, i)
826 826
827 827 # maximum <delta-chain-data>/<revision-text-length> ratio
828 828 LIMIT_DELTA2TEXT = 2
829 829
830 830 class revlogoldio(object):
831 831 def __init__(self):
832 832 self.size = indexformatv0.size
833 833
834 834 def parseindex(self, data, inline):
835 835 s = self.size
836 836 index = []
837 837 nodemap = {nullid: nullrev}
838 838 n = off = 0
839 839 l = len(data)
840 840 while off + s <= l:
841 841 cur = data[off:off + s]
842 842 off += s
843 843 e = indexformatv0_unpack(cur)
844 844 # transform to revlogv1 format
845 845 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
846 846 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
847 847 index.append(e2)
848 848 nodemap[e[6]] = n
849 849 n += 1
850 850
851 851 return revlogoldindex(index), nodemap, None
852 852
853 853 def packentry(self, entry, node, version, rev):
854 854 if gettype(entry[0]):
855 855 raise RevlogError(_('index entry flags need revlog version 1'))
856 856 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
857 857 node(entry[5]), node(entry[6]), entry[7])
858 858 return indexformatv0_pack(*e2)
859 859
860 860 # index ng:
861 861 # 6 bytes: offset
862 862 # 2 bytes: flags
863 863 # 4 bytes: compressed length
864 864 # 4 bytes: uncompressed length
865 865 # 4 bytes: base rev
866 866 # 4 bytes: link rev
867 867 # 4 bytes: parent 1 rev
868 868 # 4 bytes: parent 2 rev
869 869 # 32 bytes: nodeid
870 870 indexformatng = struct.Struct(">Qiiiiii20s12x")
871 871 indexformatng_pack = indexformatng.pack
872 872 versionformat = struct.Struct(">I")
873 873 versionformat_pack = versionformat.pack
874 874 versionformat_unpack = versionformat.unpack
875 875
876 876 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
877 877 # signed integer)
878 878 _maxentrysize = 0x7fffffff
879 879
880 880 class revlogio(object):
881 881 def __init__(self):
882 882 self.size = indexformatng.size
883 883
884 884 def parseindex(self, data, inline):
885 885 # call the C implementation to parse the index data
886 886 index, cache = parsers.parse_index2(data, inline)
887 887 return index, getattr(index, 'nodemap', None), cache
888 888
889 889 def packentry(self, entry, node, version, rev):
890 890 p = indexformatng_pack(*entry)
891 891 if rev == 0:
892 892 p = versionformat_pack(version) + p[4:]
893 893 return p
894 894
895 895 class revlog(object):
896 896 """
897 897 the underlying revision storage object
898 898
899 899 A revlog consists of two parts, an index and the revision data.
900 900
901 901 The index is a file with a fixed record size containing
902 902 information on each revision, including its nodeid (hash), the
903 903 nodeids of its parents, the position and offset of its data within
904 904 the data file, and the revision it's based on. Finally, each entry
905 905 contains a linkrev entry that can serve as a pointer to external
906 906 data.
907 907
908 908 The revision data itself is a linear collection of data chunks.
909 909 Each chunk represents a revision and is usually represented as a
910 910 delta against the previous chunk. To bound lookup time, runs of
911 911 deltas are limited to about 2 times the length of the original
912 912 version data. This makes retrieval of a version proportional to
913 913 its size, or O(1) relative to the number of revisions.
914 914
915 915 Both pieces of the revlog are written to in an append-only
916 916 fashion, which means we never need to rewrite a file to insert or
917 917 remove data, and can use some simple techniques to avoid the need
918 918 for locking while reading.
919 919
920 920 If checkambig, indexfile is opened with checkambig=True at
921 921 writing, to avoid file stat ambiguity.
922 922
923 923 If mmaplargeindex is True, and an mmapindexthreshold is set, the
924 924 index will be mmapped rather than read if it is larger than the
925 925 configured threshold.
926 926
927 927 If censorable is True, the revlog can have censored revisions.
928 928 """
929 929 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
930 930 mmaplargeindex=False, censorable=False):
931 931 """
932 932 create a revlog object
933 933
934 934 opener is a function that abstracts the file opening operation
935 935 and can be used to implement COW semantics or the like.
936 936 """
937 937 self.indexfile = indexfile
938 938 self.datafile = datafile or (indexfile[:-2] + ".d")
939 939 self.opener = opener
940 940 # When True, indexfile is opened with checkambig=True at writing, to
941 941 # avoid file stat ambiguity.
942 942 self._checkambig = checkambig
943 943 self._censorable = censorable
944 944 # 3-tuple of (node, rev, text) for a raw revision.
945 945 self._cache = None
946 946 # Maps rev to chain base rev.
947 947 self._chainbasecache = util.lrucachedict(100)
948 948 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
949 949 self._chunkcache = (0, '')
950 950 # How much data to read and cache into the raw revlog data cache.
951 951 self._chunkcachesize = 65536
952 952 self._maxchainlen = None
953 953 self._deltabothparents = True
954 954 self.index = []
955 955 # Mapping of partial identifiers to full nodes.
956 956 self._pcache = {}
957 957 # Mapping of revision integer to full node.
958 958 self._nodecache = {nullid: nullrev}
959 959 self._nodepos = None
960 960 self._compengine = 'zlib'
961 961 self._maxdeltachainspan = -1
962 962 self._withsparseread = False
963 963 self._sparserevlog = False
964 964 self._srdensitythreshold = 0.50
965 965 self._srmingapsize = 262144
966 966
967 967 mmapindexthreshold = None
968 968 v = REVLOG_DEFAULT_VERSION
969 969 opts = getattr(opener, 'options', None)
970 970 if opts is not None:
971 971 if 'revlogv2' in opts:
972 972 # version 2 revlogs always use generaldelta.
973 973 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
974 974 elif 'revlogv1' in opts:
975 975 if 'generaldelta' in opts:
976 976 v |= FLAG_GENERALDELTA
977 977 else:
978 978 v = 0
979 979 if 'chunkcachesize' in opts:
980 980 self._chunkcachesize = opts['chunkcachesize']
981 981 if 'maxchainlen' in opts:
982 982 self._maxchainlen = opts['maxchainlen']
983 983 if 'deltabothparents' in opts:
984 984 self._deltabothparents = opts['deltabothparents']
985 985 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
986 986 if 'compengine' in opts:
987 987 self._compengine = opts['compengine']
988 988 if 'maxdeltachainspan' in opts:
989 989 self._maxdeltachainspan = opts['maxdeltachainspan']
990 990 if mmaplargeindex and 'mmapindexthreshold' in opts:
991 991 mmapindexthreshold = opts['mmapindexthreshold']
992 992 self._sparserevlog = bool(opts.get('sparse-revlog', False))
993 993 withsparseread = bool(opts.get('with-sparse-read', False))
994 994 # sparse-revlog forces sparse-read
995 995 self._withsparseread = self._sparserevlog or withsparseread
996 996 if 'sparse-read-density-threshold' in opts:
997 997 self._srdensitythreshold = opts['sparse-read-density-threshold']
998 998 if 'sparse-read-min-gap-size' in opts:
999 999 self._srmingapsize = opts['sparse-read-min-gap-size']
1000 1000
1001 1001 if self._chunkcachesize <= 0:
1002 1002 raise RevlogError(_('revlog chunk cache size %r is not greater '
1003 1003 'than 0') % self._chunkcachesize)
1004 1004 elif self._chunkcachesize & (self._chunkcachesize - 1):
1005 1005 raise RevlogError(_('revlog chunk cache size %r is not a power '
1006 1006 'of 2') % self._chunkcachesize)
1007 1007
1008 1008 indexdata = ''
1009 1009 self._initempty = True
1010 1010 try:
1011 1011 with self._indexfp() as f:
1012 1012 if (mmapindexthreshold is not None and
1013 1013 self.opener.fstat(f).st_size >= mmapindexthreshold):
1014 1014 indexdata = util.buffer(util.mmapread(f))
1015 1015 else:
1016 1016 indexdata = f.read()
1017 1017 if len(indexdata) > 0:
1018 1018 v = versionformat_unpack(indexdata[:4])[0]
1019 1019 self._initempty = False
1020 1020 except IOError as inst:
1021 1021 if inst.errno != errno.ENOENT:
1022 1022 raise
1023 1023
1024 1024 self.version = v
1025 1025 self._inline = v & FLAG_INLINE_DATA
1026 1026 self._generaldelta = v & FLAG_GENERALDELTA
1027 1027 flags = v & ~0xFFFF
1028 1028 fmt = v & 0xFFFF
1029 1029 if fmt == REVLOGV0:
1030 1030 if flags:
1031 1031 raise RevlogError(_('unknown flags (%#04x) in version %d '
1032 1032 'revlog %s') %
1033 1033 (flags >> 16, fmt, self.indexfile))
1034 1034 elif fmt == REVLOGV1:
1035 1035 if flags & ~REVLOGV1_FLAGS:
1036 1036 raise RevlogError(_('unknown flags (%#04x) in version %d '
1037 1037 'revlog %s') %
1038 1038 (flags >> 16, fmt, self.indexfile))
1039 1039 elif fmt == REVLOGV2:
1040 1040 if flags & ~REVLOGV2_FLAGS:
1041 1041 raise RevlogError(_('unknown flags (%#04x) in version %d '
1042 1042 'revlog %s') %
1043 1043 (flags >> 16, fmt, self.indexfile))
1044 1044 else:
1045 1045 raise RevlogError(_('unknown version (%d) in revlog %s') %
1046 1046 (fmt, self.indexfile))
1047 1047
1048 1048 self.storedeltachains = True
1049 1049
1050 1050 self._io = revlogio()
1051 1051 if self.version == REVLOGV0:
1052 1052 self._io = revlogoldio()
1053 1053 try:
1054 1054 d = self._io.parseindex(indexdata, self._inline)
1055 1055 except (ValueError, IndexError):
1056 1056 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
1057 1057 self.index, nodemap, self._chunkcache = d
1058 1058 if nodemap is not None:
1059 1059 self.nodemap = self._nodecache = nodemap
1060 1060 if not self._chunkcache:
1061 1061 self._chunkclear()
1062 1062 # revnum -> (chain-length, sum-delta-length)
1063 1063 self._chaininfocache = {}
1064 1064 # revlog header -> revlog compressor
1065 1065 self._decompressors = {}
1066 1066
1067 1067 @util.propertycache
1068 1068 def _compressor(self):
1069 1069 return util.compengines[self._compengine].revlogcompressor()
1070 1070
1071 1071 def _indexfp(self, mode='r'):
1072 1072 """file object for the revlog's index file"""
1073 1073 args = {r'mode': mode}
1074 1074 if mode != 'r':
1075 1075 args[r'checkambig'] = self._checkambig
1076 1076 if mode == 'w':
1077 1077 args[r'atomictemp'] = True
1078 1078 return self.opener(self.indexfile, **args)
1079 1079
1080 1080 def _datafp(self, mode='r'):
1081 1081 """file object for the revlog's data file"""
1082 1082 return self.opener(self.datafile, mode=mode)
1083 1083
1084 1084 @contextlib.contextmanager
1085 1085 def _datareadfp(self, existingfp=None):
1086 1086 """file object suitable to read data"""
1087 1087 if existingfp is not None:
1088 1088 yield existingfp
1089 1089 else:
1090 1090 if self._inline:
1091 1091 func = self._indexfp
1092 1092 else:
1093 1093 func = self._datafp
1094 1094 with func() as fp:
1095 1095 yield fp
1096 1096
1097 1097 def tip(self):
1098 1098 return self.node(len(self.index) - 1)
1099 1099 def __contains__(self, rev):
1100 1100 return 0 <= rev < len(self)
1101 1101 def __len__(self):
1102 1102 return len(self.index)
1103 1103 def __iter__(self):
1104 1104 return iter(pycompat.xrange(len(self)))
1105 1105 def revs(self, start=0, stop=None):
1106 1106 """iterate over all rev in this revlog (from start to stop)"""
1107 1107 step = 1
1108 1108 length = len(self)
1109 1109 if stop is not None:
1110 1110 if start > stop:
1111 1111 step = -1
1112 1112 stop += step
1113 1113 if stop > length:
1114 1114 stop = length
1115 1115 else:
1116 1116 stop = length
1117 1117 return pycompat.xrange(start, stop, step)
1118 1118
1119 1119 @util.propertycache
1120 1120 def nodemap(self):
1121 1121 if self.index:
1122 1122 # populate mapping down to the initial node
1123 1123 node0 = self.index[0][7] # get around changelog filtering
1124 1124 self.rev(node0)
1125 1125 return self._nodecache
1126 1126
1127 1127 def hasnode(self, node):
1128 1128 try:
1129 1129 self.rev(node)
1130 1130 return True
1131 1131 except KeyError:
1132 1132 return False
1133 1133
1134 1134 def candelta(self, baserev, rev):
1135 1135 """whether two revisions (baserev, rev) can be delta-ed or not"""
1136 1136 # Disable delta if either rev requires a content-changing flag
1137 1137 # processor (ex. LFS). This is because such flag processor can alter
1138 1138 # the rawtext content that the delta will be based on, and two clients
1139 1139 # could have a same revlog node with different flags (i.e. different
1140 1140 # rawtext contents) and the delta could be incompatible.
1141 1141 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
1142 1142 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
1143 1143 return False
1144 1144 return True
1145 1145
1146 1146 def clearcaches(self):
1147 1147 self._cache = None
1148 1148 self._chainbasecache.clear()
1149 1149 self._chunkcache = (0, '')
1150 1150 self._pcache = {}
1151 1151
1152 1152 try:
1153 1153 self._nodecache.clearcaches()
1154 1154 except AttributeError:
1155 1155 self._nodecache = {nullid: nullrev}
1156 1156 self._nodepos = None
1157 1157
1158 1158 def rev(self, node):
1159 1159 try:
1160 1160 return self._nodecache[node]
1161 1161 except TypeError:
1162 1162 raise
1163 1163 except RevlogError:
1164 1164 # parsers.c radix tree lookup failed
1165 1165 if node == wdirid or node in wdirfilenodeids:
1166 1166 raise error.WdirUnsupported
1167 1167 raise LookupError(node, self.indexfile, _('no node'))
1168 1168 except KeyError:
1169 1169 # pure python cache lookup failed
1170 1170 n = self._nodecache
1171 1171 i = self.index
1172 1172 p = self._nodepos
1173 1173 if p is None:
1174 1174 p = len(i) - 1
1175 1175 else:
1176 1176 assert p < len(i)
1177 1177 for r in pycompat.xrange(p, -1, -1):
1178 1178 v = i[r][7]
1179 1179 n[v] = r
1180 1180 if v == node:
1181 1181 self._nodepos = r - 1
1182 1182 return r
1183 1183 if node == wdirid or node in wdirfilenodeids:
1184 1184 raise error.WdirUnsupported
1185 1185 raise LookupError(node, self.indexfile, _('no node'))
1186 1186
1187 1187 # Accessors for index entries.
1188 1188
1189 1189 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1190 1190 # are flags.
1191 1191 def start(self, rev):
1192 1192 return int(self.index[rev][0] >> 16)
1193 1193
1194 1194 def flags(self, rev):
1195 1195 return self.index[rev][0] & 0xFFFF
1196 1196
1197 1197 def length(self, rev):
1198 1198 return self.index[rev][1]
1199 1199
1200 1200 def rawsize(self, rev):
1201 1201 """return the length of the uncompressed text for a given revision"""
1202 1202 l = self.index[rev][2]
1203 1203 if l >= 0:
1204 1204 return l
1205 1205
1206 1206 t = self.revision(rev, raw=True)
1207 1207 return len(t)
1208 1208
1209 1209 def size(self, rev):
1210 1210 """length of non-raw text (processed by a "read" flag processor)"""
1211 1211 # fast path: if no "read" flag processor could change the content,
1212 1212 # size is rawsize. note: ELLIPSIS is known to not change the content.
1213 1213 flags = self.flags(rev)
1214 1214 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1215 1215 return self.rawsize(rev)
1216 1216
1217 1217 return len(self.revision(rev, raw=False))
1218 1218
1219 1219 def chainbase(self, rev):
1220 1220 base = self._chainbasecache.get(rev)
1221 1221 if base is not None:
1222 1222 return base
1223 1223
1224 1224 index = self.index
1225 1225 iterrev = rev
1226 1226 base = index[iterrev][3]
1227 1227 while base != iterrev:
1228 1228 iterrev = base
1229 1229 base = index[iterrev][3]
1230 1230
1231 1231 self._chainbasecache[rev] = base
1232 1232 return base
1233 1233
1234 1234 def linkrev(self, rev):
1235 1235 return self.index[rev][4]
1236 1236
1237 1237 def parentrevs(self, rev):
1238 1238 try:
1239 1239 entry = self.index[rev]
1240 1240 except IndexError:
1241 1241 if rev == wdirrev:
1242 1242 raise error.WdirUnsupported
1243 1243 raise
1244 1244
1245 1245 return entry[5], entry[6]
1246 1246
1247 1247 def node(self, rev):
1248 1248 try:
1249 1249 return self.index[rev][7]
1250 1250 except IndexError:
1251 1251 if rev == wdirrev:
1252 1252 raise error.WdirUnsupported
1253 1253 raise
1254 1254
1255 1255 # Derived from index values.
1256 1256
1257 1257 def end(self, rev):
1258 1258 return self.start(rev) + self.length(rev)
1259 1259
1260 1260 def parents(self, node):
1261 1261 i = self.index
1262 1262 d = i[self.rev(node)]
1263 1263 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
1264 1264
1265 1265 def chainlen(self, rev):
1266 1266 return self._chaininfo(rev)[0]
1267 1267
1268 1268 def _chaininfo(self, rev):
1269 1269 chaininfocache = self._chaininfocache
1270 1270 if rev in chaininfocache:
1271 1271 return chaininfocache[rev]
1272 1272 index = self.index
1273 1273 generaldelta = self._generaldelta
1274 1274 iterrev = rev
1275 1275 e = index[iterrev]
1276 1276 clen = 0
1277 1277 compresseddeltalen = 0
1278 1278 while iterrev != e[3]:
1279 1279 clen += 1
1280 1280 compresseddeltalen += e[1]
1281 1281 if generaldelta:
1282 1282 iterrev = e[3]
1283 1283 else:
1284 1284 iterrev -= 1
1285 1285 if iterrev in chaininfocache:
1286 1286 t = chaininfocache[iterrev]
1287 1287 clen += t[0]
1288 1288 compresseddeltalen += t[1]
1289 1289 break
1290 1290 e = index[iterrev]
1291 1291 else:
1292 1292 # Add text length of base since decompressing that also takes
1293 1293 # work. For cache hits the length is already included.
1294 1294 compresseddeltalen += e[1]
1295 1295 r = (clen, compresseddeltalen)
1296 1296 chaininfocache[rev] = r
1297 1297 return r
1298 1298
1299 1299 def _deltachain(self, rev, stoprev=None):
1300 1300 """Obtain the delta chain for a revision.
1301 1301
1302 1302 ``stoprev`` specifies a revision to stop at. If not specified, we
1303 1303 stop at the base of the chain.
1304 1304
1305 1305 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1306 1306 revs in ascending order and ``stopped`` is a bool indicating whether
1307 1307 ``stoprev`` was hit.
1308 1308 """
1309 1309 # Try C implementation.
1310 1310 try:
1311 1311 return self.index.deltachain(rev, stoprev, self._generaldelta)
1312 1312 except AttributeError:
1313 1313 pass
1314 1314
1315 1315 chain = []
1316 1316
1317 1317 # Alias to prevent attribute lookup in tight loop.
1318 1318 index = self.index
1319 1319 generaldelta = self._generaldelta
1320 1320
1321 1321 iterrev = rev
1322 1322 e = index[iterrev]
1323 1323 while iterrev != e[3] and iterrev != stoprev:
1324 1324 chain.append(iterrev)
1325 1325 if generaldelta:
1326 1326 iterrev = e[3]
1327 1327 else:
1328 1328 iterrev -= 1
1329 1329 e = index[iterrev]
1330 1330
1331 1331 if iterrev == stoprev:
1332 1332 stopped = True
1333 1333 else:
1334 1334 chain.append(iterrev)
1335 1335 stopped = False
1336 1336
1337 1337 chain.reverse()
1338 1338 return chain, stopped
1339 1339
1340 1340 def ancestors(self, revs, stoprev=0, inclusive=False):
1341 1341 """Generate the ancestors of 'revs' in reverse topological order.
1342 1342 Does not generate revs lower than stoprev.
1343 1343
1344 1344 See the documentation for ancestor.lazyancestors for more details."""
1345 1345
1346 1346 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1347 1347 inclusive=inclusive)
1348 1348
1349 1349 def descendants(self, revs):
1350 1350 """Generate the descendants of 'revs' in revision order.
1351 1351
1352 1352 Yield a sequence of revision numbers starting with a child of
1353 1353 some rev in revs, i.e., each revision is *not* considered a
1354 1354 descendant of itself. Results are ordered by revision number (a
1355 1355 topological sort)."""
1356 1356 first = min(revs)
1357 1357 if first == nullrev:
1358 1358 for i in self:
1359 1359 yield i
1360 1360 return
1361 1361
1362 1362 seen = set(revs)
1363 1363 for i in self.revs(start=first + 1):
1364 1364 for x in self.parentrevs(i):
1365 1365 if x != nullrev and x in seen:
1366 1366 seen.add(i)
1367 1367 yield i
1368 1368 break
1369 1369
1370 1370 def findcommonmissing(self, common=None, heads=None):
1371 1371 """Return a tuple of the ancestors of common and the ancestors of heads
1372 1372 that are not ancestors of common. In revset terminology, we return the
1373 1373 tuple:
1374 1374
1375 1375 ::common, (::heads) - (::common)
1376 1376
1377 1377 The list is sorted by revision number, meaning it is
1378 1378 topologically sorted.
1379 1379
1380 1380 'heads' and 'common' are both lists of node IDs. If heads is
1381 1381 not supplied, uses all of the revlog's heads. If common is not
1382 1382 supplied, uses nullid."""
1383 1383 if common is None:
1384 1384 common = [nullid]
1385 1385 if heads is None:
1386 1386 heads = self.heads()
1387 1387
1388 1388 common = [self.rev(n) for n in common]
1389 1389 heads = [self.rev(n) for n in heads]
1390 1390
1391 1391 # we want the ancestors, but inclusive
1392 1392 class lazyset(object):
1393 1393 def __init__(self, lazyvalues):
1394 1394 self.addedvalues = set()
1395 1395 self.lazyvalues = lazyvalues
1396 1396
1397 1397 def __contains__(self, value):
1398 1398 return value in self.addedvalues or value in self.lazyvalues
1399 1399
1400 1400 def __iter__(self):
1401 1401 added = self.addedvalues
1402 1402 for r in added:
1403 1403 yield r
1404 1404 for r in self.lazyvalues:
1405 1405 if not r in added:
1406 1406 yield r
1407 1407
1408 1408 def add(self, value):
1409 1409 self.addedvalues.add(value)
1410 1410
1411 1411 def update(self, values):
1412 1412 self.addedvalues.update(values)
1413 1413
1414 1414 has = lazyset(self.ancestors(common))
1415 1415 has.add(nullrev)
1416 1416 has.update(common)
1417 1417
1418 1418 # take all ancestors from heads that aren't in has
1419 1419 missing = set()
1420 1420 visit = collections.deque(r for r in heads if r not in has)
1421 1421 while visit:
1422 1422 r = visit.popleft()
1423 1423 if r in missing:
1424 1424 continue
1425 1425 else:
1426 1426 missing.add(r)
1427 1427 for p in self.parentrevs(r):
1428 1428 if p not in has:
1429 1429 visit.append(p)
1430 1430 missing = list(missing)
1431 1431 missing.sort()
1432 1432 return has, [self.node(miss) for miss in missing]
1433 1433
1434 1434 def incrementalmissingrevs(self, common=None):
1435 1435 """Return an object that can be used to incrementally compute the
1436 1436 revision numbers of the ancestors of arbitrary sets that are not
1437 1437 ancestors of common. This is an ancestor.incrementalmissingancestors
1438 1438 object.
1439 1439
1440 1440 'common' is a list of revision numbers. If common is not supplied, uses
1441 1441 nullrev.
1442 1442 """
1443 1443 if common is None:
1444 1444 common = [nullrev]
1445 1445
1446 1446 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1447 1447
1448 1448 def findmissingrevs(self, common=None, heads=None):
1449 1449 """Return the revision numbers of the ancestors of heads that
1450 1450 are not ancestors of common.
1451 1451
1452 1452 More specifically, return a list of revision numbers corresponding to
1453 1453 nodes N such that every N satisfies the following constraints:
1454 1454
1455 1455 1. N is an ancestor of some node in 'heads'
1456 1456 2. N is not an ancestor of any node in 'common'
1457 1457
1458 1458 The list is sorted by revision number, meaning it is
1459 1459 topologically sorted.
1460 1460
1461 1461 'heads' and 'common' are both lists of revision numbers. If heads is
1462 1462 not supplied, uses all of the revlog's heads. If common is not
1463 1463 supplied, uses nullid."""
1464 1464 if common is None:
1465 1465 common = [nullrev]
1466 1466 if heads is None:
1467 1467 heads = self.headrevs()
1468 1468
1469 1469 inc = self.incrementalmissingrevs(common=common)
1470 1470 return inc.missingancestors(heads)
1471 1471
1472 1472 def findmissing(self, common=None, heads=None):
1473 1473 """Return the ancestors of heads that are not ancestors of common.
1474 1474
1475 1475 More specifically, return a list of nodes N such that every N
1476 1476 satisfies the following constraints:
1477 1477
1478 1478 1. N is an ancestor of some node in 'heads'
1479 1479 2. N is not an ancestor of any node in 'common'
1480 1480
1481 1481 The list is sorted by revision number, meaning it is
1482 1482 topologically sorted.
1483 1483
1484 1484 'heads' and 'common' are both lists of node IDs. If heads is
1485 1485 not supplied, uses all of the revlog's heads. If common is not
1486 1486 supplied, uses nullid."""
1487 1487 if common is None:
1488 1488 common = [nullid]
1489 1489 if heads is None:
1490 1490 heads = self.heads()
1491 1491
1492 1492 common = [self.rev(n) for n in common]
1493 1493 heads = [self.rev(n) for n in heads]
1494 1494
1495 1495 inc = self.incrementalmissingrevs(common=common)
1496 1496 return [self.node(r) for r in inc.missingancestors(heads)]
1497 1497
1498 1498 def nodesbetween(self, roots=None, heads=None):
1499 1499 """Return a topological path from 'roots' to 'heads'.
1500 1500
1501 1501 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1502 1502 topologically sorted list of all nodes N that satisfy both of
1503 1503 these constraints:
1504 1504
1505 1505 1. N is a descendant of some node in 'roots'
1506 1506 2. N is an ancestor of some node in 'heads'
1507 1507
1508 1508 Every node is considered to be both a descendant and an ancestor
1509 1509 of itself, so every reachable node in 'roots' and 'heads' will be
1510 1510 included in 'nodes'.
1511 1511
1512 1512 'outroots' is the list of reachable nodes in 'roots', i.e., the
1513 1513 subset of 'roots' that is returned in 'nodes'. Likewise,
1514 1514 'outheads' is the subset of 'heads' that is also in 'nodes'.
1515 1515
1516 1516 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1517 1517 unspecified, uses nullid as the only root. If 'heads' is
1518 1518 unspecified, uses list of all of the revlog's heads."""
1519 1519 nonodes = ([], [], [])
1520 1520 if roots is not None:
1521 1521 roots = list(roots)
1522 1522 if not roots:
1523 1523 return nonodes
1524 1524 lowestrev = min([self.rev(n) for n in roots])
1525 1525 else:
1526 1526 roots = [nullid] # Everybody's a descendant of nullid
1527 1527 lowestrev = nullrev
1528 1528 if (lowestrev == nullrev) and (heads is None):
1529 1529 # We want _all_ the nodes!
1530 1530 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1531 1531 if heads is None:
1532 1532 # All nodes are ancestors, so the latest ancestor is the last
1533 1533 # node.
1534 1534 highestrev = len(self) - 1
1535 1535 # Set ancestors to None to signal that every node is an ancestor.
1536 1536 ancestors = None
1537 1537 # Set heads to an empty dictionary for later discovery of heads
1538 1538 heads = {}
1539 1539 else:
1540 1540 heads = list(heads)
1541 1541 if not heads:
1542 1542 return nonodes
1543 1543 ancestors = set()
1544 1544 # Turn heads into a dictionary so we can remove 'fake' heads.
1545 1545 # Also, later we will be using it to filter out the heads we can't
1546 1546 # find from roots.
1547 1547 heads = dict.fromkeys(heads, False)
1548 1548 # Start at the top and keep marking parents until we're done.
1549 1549 nodestotag = set(heads)
1550 1550 # Remember where the top was so we can use it as a limit later.
1551 1551 highestrev = max([self.rev(n) for n in nodestotag])
1552 1552 while nodestotag:
1553 1553 # grab a node to tag
1554 1554 n = nodestotag.pop()
1555 1555 # Never tag nullid
1556 1556 if n == nullid:
1557 1557 continue
1558 1558 # A node's revision number represents its place in a
1559 1559 # topologically sorted list of nodes.
1560 1560 r = self.rev(n)
1561 1561 if r >= lowestrev:
1562 1562 if n not in ancestors:
1563 1563 # If we are possibly a descendant of one of the roots
1564 1564 # and we haven't already been marked as an ancestor
1565 1565 ancestors.add(n) # Mark as ancestor
1566 1566 # Add non-nullid parents to list of nodes to tag.
1567 1567 nodestotag.update([p for p in self.parents(n) if
1568 1568 p != nullid])
1569 1569 elif n in heads: # We've seen it before, is it a fake head?
1570 1570 # So it is, real heads should not be the ancestors of
1571 1571 # any other heads.
1572 1572 heads.pop(n)
1573 1573 if not ancestors:
1574 1574 return nonodes
1575 1575 # Now that we have our set of ancestors, we want to remove any
1576 1576 # roots that are not ancestors.
1577 1577
1578 1578 # If one of the roots was nullid, everything is included anyway.
1579 1579 if lowestrev > nullrev:
1580 1580 # But, since we weren't, let's recompute the lowest rev to not
1581 1581 # include roots that aren't ancestors.
1582 1582
1583 1583 # Filter out roots that aren't ancestors of heads
1584 1584 roots = [root for root in roots if root in ancestors]
1585 1585 # Recompute the lowest revision
1586 1586 if roots:
1587 1587 lowestrev = min([self.rev(root) for root in roots])
1588 1588 else:
1589 1589 # No more roots? Return empty list
1590 1590 return nonodes
1591 1591 else:
1592 1592 # We are descending from nullid, and don't need to care about
1593 1593 # any other roots.
1594 1594 lowestrev = nullrev
1595 1595 roots = [nullid]
1596 1596 # Transform our roots list into a set.
1597 1597 descendants = set(roots)
1598 1598 # Also, keep the original roots so we can filter out roots that aren't
1599 1599 # 'real' roots (i.e. are descended from other roots).
1600 1600 roots = descendants.copy()
1601 1601 # Our topologically sorted list of output nodes.
1602 1602 orderedout = []
1603 1603 # Don't start at nullid since we don't want nullid in our output list,
1604 1604 # and if nullid shows up in descendants, empty parents will look like
1605 1605 # they're descendants.
1606 1606 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1607 1607 n = self.node(r)
1608 1608 isdescendant = False
1609 1609 if lowestrev == nullrev: # Everybody is a descendant of nullid
1610 1610 isdescendant = True
1611 1611 elif n in descendants:
1612 1612 # n is already a descendant
1613 1613 isdescendant = True
1614 1614 # This check only needs to be done here because all the roots
1615 1615 # will start being marked is descendants before the loop.
1616 1616 if n in roots:
1617 1617 # If n was a root, check if it's a 'real' root.
1618 1618 p = tuple(self.parents(n))
1619 1619 # If any of its parents are descendants, it's not a root.
1620 1620 if (p[0] in descendants) or (p[1] in descendants):
1621 1621 roots.remove(n)
1622 1622 else:
1623 1623 p = tuple(self.parents(n))
1624 1624 # A node is a descendant if either of its parents are
1625 1625 # descendants. (We seeded the dependents list with the roots
1626 1626 # up there, remember?)
1627 1627 if (p[0] in descendants) or (p[1] in descendants):
1628 1628 descendants.add(n)
1629 1629 isdescendant = True
1630 1630 if isdescendant and ((ancestors is None) or (n in ancestors)):
1631 1631 # Only include nodes that are both descendants and ancestors.
1632 1632 orderedout.append(n)
1633 1633 if (ancestors is not None) and (n in heads):
1634 1634 # We're trying to figure out which heads are reachable
1635 1635 # from roots.
1636 1636 # Mark this head as having been reached
1637 1637 heads[n] = True
1638 1638 elif ancestors is None:
1639 1639 # Otherwise, we're trying to discover the heads.
1640 1640 # Assume this is a head because if it isn't, the next step
1641 1641 # will eventually remove it.
1642 1642 heads[n] = True
1643 1643 # But, obviously its parents aren't.
1644 1644 for p in self.parents(n):
1645 1645 heads.pop(p, None)
1646 1646 heads = [head for head, flag in heads.iteritems() if flag]
1647 1647 roots = list(roots)
1648 1648 assert orderedout
1649 1649 assert roots
1650 1650 assert heads
1651 1651 return (orderedout, roots, heads)
1652 1652
1653 1653 def headrevs(self):
1654 1654 try:
1655 1655 return self.index.headrevs()
1656 1656 except AttributeError:
1657 1657 return self._headrevs()
1658 1658
1659 1659 def computephases(self, roots):
1660 1660 return self.index.computephasesmapsets(roots)
1661 1661
1662 1662 def _headrevs(self):
1663 1663 count = len(self)
1664 1664 if not count:
1665 1665 return [nullrev]
1666 1666 # we won't iter over filtered rev so nobody is a head at start
1667 1667 ishead = [0] * (count + 1)
1668 1668 index = self.index
1669 1669 for r in self:
1670 1670 ishead[r] = 1 # I may be an head
1671 1671 e = index[r]
1672 1672 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1673 1673 return [r for r, val in enumerate(ishead) if val]
1674 1674
1675 1675 def heads(self, start=None, stop=None):
1676 1676 """return the list of all nodes that have no children
1677 1677
1678 1678 if start is specified, only heads that are descendants of
1679 1679 start will be returned
1680 1680 if stop is specified, it will consider all the revs from stop
1681 1681 as if they had no children
1682 1682 """
1683 1683 if start is None and stop is None:
1684 1684 if not len(self):
1685 1685 return [nullid]
1686 1686 return [self.node(r) for r in self.headrevs()]
1687 1687
1688 1688 if start is None:
1689 1689 start = nullid
1690 1690 if stop is None:
1691 1691 stop = []
1692 1692 stoprevs = set([self.rev(n) for n in stop])
1693 1693 startrev = self.rev(start)
1694 1694 reachable = {startrev}
1695 1695 heads = {startrev}
1696 1696
1697 1697 parentrevs = self.parentrevs
1698 1698 for r in self.revs(start=startrev + 1):
1699 1699 for p in parentrevs(r):
1700 1700 if p in reachable:
1701 1701 if r not in stoprevs:
1702 1702 reachable.add(r)
1703 1703 heads.add(r)
1704 1704 if p in heads and p not in stoprevs:
1705 1705 heads.remove(p)
1706 1706
1707 1707 return [self.node(r) for r in heads]
1708 1708
1709 1709 def children(self, node):
1710 1710 """find the children of a given node"""
1711 1711 c = []
1712 1712 p = self.rev(node)
1713 1713 for r in self.revs(start=p + 1):
1714 1714 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1715 1715 if prevs:
1716 1716 for pr in prevs:
1717 1717 if pr == p:
1718 1718 c.append(self.node(r))
1719 1719 elif p == nullrev:
1720 1720 c.append(self.node(r))
1721 1721 return c
1722 1722
1723 1723 def commonancestorsheads(self, a, b):
1724 1724 """calculate all the heads of the common ancestors of nodes a and b"""
1725 1725 a, b = self.rev(a), self.rev(b)
1726 1726 ancs = self._commonancestorsheads(a, b)
1727 1727 return pycompat.maplist(self.node, ancs)
1728 1728
1729 1729 def _commonancestorsheads(self, *revs):
1730 1730 """calculate all the heads of the common ancestors of revs"""
1731 1731 try:
1732 1732 ancs = self.index.commonancestorsheads(*revs)
1733 1733 except (AttributeError, OverflowError): # C implementation failed
1734 1734 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1735 1735 return ancs
1736 1736
1737 1737 def isancestor(self, a, b):
1738 1738 """return True if node a is an ancestor of node b
1739 1739
1740 1740 A revision is considered an ancestor of itself."""
1741 1741 a, b = self.rev(a), self.rev(b)
1742 1742 return self.isancestorrev(a, b)
1743 1743
1744 1744 def isancestorrev(self, a, b):
1745 1745 """return True if revision a is an ancestor of revision b
1746 1746
1747 1747 A revision is considered an ancestor of itself.
1748 1748
1749 1749 The implementation of this is trivial but the use of
1750 1750 commonancestorsheads is not."""
1751 1751 if a == nullrev:
1752 1752 return True
1753 1753 elif a == b:
1754 1754 return True
1755 1755 elif a > b:
1756 1756 return False
1757 1757 return a in self._commonancestorsheads(a, b)
1758 1758
1759 1759 def ancestor(self, a, b):
1760 1760 """calculate the "best" common ancestor of nodes a and b"""
1761 1761
1762 1762 a, b = self.rev(a), self.rev(b)
1763 1763 try:
1764 1764 ancs = self.index.ancestors(a, b)
1765 1765 except (AttributeError, OverflowError):
1766 1766 ancs = ancestor.ancestors(self.parentrevs, a, b)
1767 1767 if ancs:
1768 1768 # choose a consistent winner when there's a tie
1769 1769 return min(map(self.node, ancs))
1770 1770 return nullid
1771 1771
1772 1772 def _match(self, id):
1773 1773 if isinstance(id, int):
1774 1774 # rev
1775 1775 return self.node(id)
1776 1776 if len(id) == 20:
1777 1777 # possibly a binary node
1778 1778 # odds of a binary node being all hex in ASCII are 1 in 10**25
1779 1779 try:
1780 1780 node = id
1781 1781 self.rev(node) # quick search the index
1782 1782 return node
1783 1783 except LookupError:
1784 1784 pass # may be partial hex id
1785 1785 try:
1786 1786 # str(rev)
1787 1787 rev = int(id)
1788 1788 if "%d" % rev != id:
1789 1789 raise ValueError
1790 1790 if rev < 0:
1791 1791 rev = len(self) + rev
1792 1792 if rev < 0 or rev >= len(self):
1793 1793 raise ValueError
1794 1794 return self.node(rev)
1795 1795 except (ValueError, OverflowError):
1796 1796 pass
1797 1797 if len(id) == 40:
1798 1798 try:
1799 1799 # a full hex nodeid?
1800 1800 node = bin(id)
1801 1801 self.rev(node)
1802 1802 return node
1803 1803 except (TypeError, LookupError):
1804 1804 pass
1805 1805
1806 1806 def _partialmatch(self, id):
1807 1807 # we don't care wdirfilenodeids as they should be always full hash
1808 1808 maybewdir = wdirhex.startswith(id)
1809 1809 try:
1810 1810 partial = self.index.partialmatch(id)
1811 1811 if partial and self.hasnode(partial):
1812 1812 if maybewdir:
1813 1813 # single 'ff...' match in radix tree, ambiguous with wdir
1814 1814 raise RevlogError
1815 1815 return partial
1816 1816 if maybewdir:
1817 1817 # no 'ff...' match in radix tree, wdir identified
1818 1818 raise error.WdirUnsupported
1819 1819 return None
1820 1820 except RevlogError:
1821 1821 # parsers.c radix tree lookup gave multiple matches
1822 1822 # fast path: for unfiltered changelog, radix tree is accurate
1823 1823 if not getattr(self, 'filteredrevs', None):
1824 1824 raise AmbiguousPrefixLookupError(id, self.indexfile,
1825 1825 _('ambiguous identifier'))
1826 1826 # fall through to slow path that filters hidden revisions
1827 1827 except (AttributeError, ValueError):
1828 1828 # we are pure python, or key was too short to search radix tree
1829 1829 pass
1830 1830
1831 1831 if id in self._pcache:
1832 1832 return self._pcache[id]
1833 1833
1834 1834 if len(id) <= 40:
1835 1835 try:
1836 1836 # hex(node)[:...]
1837 1837 l = len(id) // 2 # grab an even number of digits
1838 1838 prefix = bin(id[:l * 2])
1839 1839 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1840 1840 nl = [n for n in nl if hex(n).startswith(id) and
1841 1841 self.hasnode(n)]
1842 1842 if len(nl) > 0:
1843 1843 if len(nl) == 1 and not maybewdir:
1844 1844 self._pcache[id] = nl[0]
1845 1845 return nl[0]
1846 1846 raise AmbiguousPrefixLookupError(id, self.indexfile,
1847 1847 _('ambiguous identifier'))
1848 1848 if maybewdir:
1849 1849 raise error.WdirUnsupported
1850 1850 return None
1851 1851 except TypeError:
1852 1852 pass
1853 1853
1854 1854 def lookup(self, id):
1855 1855 """locate a node based on:
1856 1856 - revision number or str(revision number)
1857 1857 - nodeid or subset of hex nodeid
1858 1858 """
1859 1859 n = self._match(id)
1860 1860 if n is not None:
1861 1861 return n
1862 1862 n = self._partialmatch(id)
1863 1863 if n:
1864 1864 return n
1865 1865
1866 1866 raise LookupError(id, self.indexfile, _('no match found'))
1867 1867
1868 1868 def shortest(self, node, minlength=1):
1869 1869 """Find the shortest unambiguous prefix that matches node."""
1870 1870 def isvalid(prefix):
1871 1871 try:
1872 1872 node = self._partialmatch(prefix)
1873 1873 except error.RevlogError:
1874 1874 return False
1875 1875 except error.WdirUnsupported:
1876 1876 # single 'ff...' match
1877 1877 return True
1878 1878 if node is None:
1879 1879 raise LookupError(node, self.indexfile, _('no node'))
1880 1880 return True
1881 1881
1882 1882 def maybewdir(prefix):
1883 1883 return all(c == 'f' for c in prefix)
1884 1884
1885 1885 hexnode = hex(node)
1886 1886
1887 1887 def disambiguate(hexnode, minlength):
1888 1888 """Disambiguate against wdirid."""
1889 1889 for length in range(minlength, 41):
1890 1890 prefix = hexnode[:length]
1891 1891 if not maybewdir(prefix):
1892 1892 return prefix
1893 1893
1894 1894 if not getattr(self, 'filteredrevs', None):
1895 1895 try:
1896 1896 length = max(self.index.shortest(node), minlength)
1897 1897 return disambiguate(hexnode, length)
1898 1898 except RevlogError:
1899 1899 if node != wdirid:
1900 1900 raise LookupError(node, self.indexfile, _('no node'))
1901 1901 except AttributeError:
1902 1902 # Fall through to pure code
1903 1903 pass
1904 1904
1905 1905 if node == wdirid:
1906 1906 for length in range(minlength, 41):
1907 1907 prefix = hexnode[:length]
1908 1908 if isvalid(prefix):
1909 1909 return prefix
1910 1910
1911 1911 for length in range(minlength, 41):
1912 1912 prefix = hexnode[:length]
1913 1913 if isvalid(prefix):
1914 1914 return disambiguate(hexnode, length)
1915 1915
1916 1916 def cmp(self, node, text):
1917 1917 """compare text with a given file revision
1918 1918
1919 1919 returns True if text is different than what is stored.
1920 1920 """
1921 1921 p1, p2 = self.parents(node)
1922 1922 return hash(text, p1, p2) != node
1923 1923
1924 1924 def _cachesegment(self, offset, data):
1925 1925 """Add a segment to the revlog cache.
1926 1926
1927 1927 Accepts an absolute offset and the data that is at that location.
1928 1928 """
1929 1929 o, d = self._chunkcache
1930 1930 # try to add to existing cache
1931 1931 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1932 1932 self._chunkcache = o, d + data
1933 1933 else:
1934 1934 self._chunkcache = offset, data
1935 1935
1936 1936 def _readsegment(self, offset, length, df=None):
1937 1937 """Load a segment of raw data from the revlog.
1938 1938
1939 1939 Accepts an absolute offset, length to read, and an optional existing
1940 1940 file handle to read from.
1941 1941
1942 1942 If an existing file handle is passed, it will be seeked and the
1943 1943 original seek position will NOT be restored.
1944 1944
1945 1945 Returns a str or buffer of raw byte data.
1946 1946 """
1947 1947 # Cache data both forward and backward around the requested
1948 1948 # data, in a fixed size window. This helps speed up operations
1949 1949 # involving reading the revlog backwards.
1950 1950 cachesize = self._chunkcachesize
1951 1951 realoffset = offset & ~(cachesize - 1)
1952 1952 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1953 1953 - realoffset)
1954 1954 with self._datareadfp(df) as df:
1955 1955 df.seek(realoffset)
1956 1956 d = df.read(reallength)
1957 1957 self._cachesegment(realoffset, d)
1958 1958 if offset != realoffset or reallength != length:
1959 1959 return util.buffer(d, offset - realoffset, length)
1960 1960 return d
1961 1961
1962 1962 def _getsegment(self, offset, length, df=None):
1963 1963 """Obtain a segment of raw data from the revlog.
1964 1964
1965 1965 Accepts an absolute offset, length of bytes to obtain, and an
1966 1966 optional file handle to the already-opened revlog. If the file
1967 1967 handle is used, it's original seek position will not be preserved.
1968 1968
1969 1969 Requests for data may be returned from a cache.
1970 1970
1971 1971 Returns a str or a buffer instance of raw byte data.
1972 1972 """
1973 1973 o, d = self._chunkcache
1974 1974 l = len(d)
1975 1975
1976 1976 # is it in the cache?
1977 1977 cachestart = offset - o
1978 1978 cacheend = cachestart + length
1979 1979 if cachestart >= 0 and cacheend <= l:
1980 1980 if cachestart == 0 and cacheend == l:
1981 1981 return d # avoid a copy
1982 1982 return util.buffer(d, cachestart, cacheend - cachestart)
1983 1983
1984 1984 return self._readsegment(offset, length, df=df)
1985 1985
1986 1986 def _getsegmentforrevs(self, startrev, endrev, df=None):
1987 1987 """Obtain a segment of raw data corresponding to a range of revisions.
1988 1988
1989 1989 Accepts the start and end revisions and an optional already-open
1990 1990 file handle to be used for reading. If the file handle is read, its
1991 1991 seek position will not be preserved.
1992 1992
1993 1993 Requests for data may be satisfied by a cache.
1994 1994
1995 1995 Returns a 2-tuple of (offset, data) for the requested range of
1996 1996 revisions. Offset is the integer offset from the beginning of the
1997 1997 revlog and data is a str or buffer of the raw byte data.
1998 1998
1999 1999 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
2000 2000 to determine where each revision's data begins and ends.
2001 2001 """
2002 2002 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
2003 2003 # (functions are expensive).
2004 2004 index = self.index
2005 2005 istart = index[startrev]
2006 2006 start = int(istart[0] >> 16)
2007 2007 if startrev == endrev:
2008 2008 end = start + istart[1]
2009 2009 else:
2010 2010 iend = index[endrev]
2011 2011 end = int(iend[0] >> 16) + iend[1]
2012 2012
2013 2013 if self._inline:
2014 2014 start += (startrev + 1) * self._io.size
2015 2015 end += (endrev + 1) * self._io.size
2016 2016 length = end - start
2017 2017
2018 2018 return start, self._getsegment(start, length, df=df)
2019 2019
2020 2020 def _chunk(self, rev, df=None):
2021 2021 """Obtain a single decompressed chunk for a revision.
2022 2022
2023 2023 Accepts an integer revision and an optional already-open file handle
2024 2024 to be used for reading. If used, the seek position of the file will not
2025 2025 be preserved.
2026 2026
2027 2027 Returns a str holding uncompressed data for the requested revision.
2028 2028 """
2029 2029 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
2030 2030
2031 2031 def _chunks(self, revs, df=None, targetsize=None):
2032 2032 """Obtain decompressed chunks for the specified revisions.
2033 2033
2034 2034 Accepts an iterable of numeric revisions that are assumed to be in
2035 2035 ascending order. Also accepts an optional already-open file handle
2036 2036 to be used for reading. If used, the seek position of the file will
2037 2037 not be preserved.
2038 2038
2039 2039 This function is similar to calling ``self._chunk()`` multiple times,
2040 2040 but is faster.
2041 2041
2042 2042 Returns a list with decompressed data for each requested revision.
2043 2043 """
2044 2044 if not revs:
2045 2045 return []
2046 2046 start = self.start
2047 2047 length = self.length
2048 2048 inline = self._inline
2049 2049 iosize = self._io.size
2050 2050 buffer = util.buffer
2051 2051
2052 2052 l = []
2053 2053 ladd = l.append
2054 2054
2055 2055 if not self._withsparseread:
2056 2056 slicedchunks = (revs,)
2057 2057 else:
2058 2058 slicedchunks = _slicechunk(self, revs, targetsize=targetsize)
2059 2059
2060 2060 for revschunk in slicedchunks:
2061 2061 firstrev = revschunk[0]
2062 2062 # Skip trailing revisions with empty diff
2063 2063 for lastrev in revschunk[::-1]:
2064 2064 if length(lastrev) != 0:
2065 2065 break
2066 2066
2067 2067 try:
2068 2068 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
2069 2069 except OverflowError:
2070 2070 # issue4215 - we can't cache a run of chunks greater than
2071 2071 # 2G on Windows
2072 2072 return [self._chunk(rev, df=df) for rev in revschunk]
2073 2073
2074 2074 decomp = self.decompress
2075 2075 for rev in revschunk:
2076 2076 chunkstart = start(rev)
2077 2077 if inline:
2078 2078 chunkstart += (rev + 1) * iosize
2079 2079 chunklength = length(rev)
2080 2080 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
2081 2081
2082 2082 return l
2083 2083
2084 2084 def _chunkclear(self):
2085 2085 """Clear the raw chunk cache."""
2086 2086 self._chunkcache = (0, '')
2087 2087
2088 2088 def deltaparent(self, rev):
2089 2089 """return deltaparent of the given revision"""
2090 2090 base = self.index[rev][3]
2091 2091 if base == rev:
2092 2092 return nullrev
2093 2093 elif self._generaldelta:
2094 2094 return base
2095 2095 else:
2096 2096 return rev - 1
2097 2097
2098 2098 def issnapshot(self, rev):
2099 2099 """tells whether rev is a snapshot
2100 2100 """
2101 2101 if rev == nullrev:
2102 2102 return True
2103 2103 deltap = self.deltaparent(rev)
2104 2104 if deltap == nullrev:
2105 2105 return True
2106 return False
2106 p1, p2 = self.parentrevs(rev)
2107 if deltap in (p1, p2):
2108 return False
2109 return self.issnapshot(deltap)
2107 2110
2108 2111 def revdiff(self, rev1, rev2):
2109 2112 """return or calculate a delta between two revisions
2110 2113
2111 2114 The delta calculated is in binary form and is intended to be written to
2112 2115 revlog data directly. So this function needs raw revision data.
2113 2116 """
2114 2117 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2115 2118 return bytes(self._chunk(rev2))
2116 2119
2117 2120 return mdiff.textdiff(self.revision(rev1, raw=True),
2118 2121 self.revision(rev2, raw=True))
2119 2122
2120 2123 def revision(self, nodeorrev, _df=None, raw=False):
2121 2124 """return an uncompressed revision of a given node or revision
2122 2125 number.
2123 2126
2124 2127 _df - an existing file handle to read from. (internal-only)
2125 2128 raw - an optional argument specifying if the revision data is to be
2126 2129 treated as raw data when applying flag transforms. 'raw' should be set
2127 2130 to True when generating changegroups or in debug commands.
2128 2131 """
2129 2132 if isinstance(nodeorrev, int):
2130 2133 rev = nodeorrev
2131 2134 node = self.node(rev)
2132 2135 else:
2133 2136 node = nodeorrev
2134 2137 rev = None
2135 2138
2136 2139 cachedrev = None
2137 2140 flags = None
2138 2141 rawtext = None
2139 2142 if node == nullid:
2140 2143 return ""
2141 2144 if self._cache:
2142 2145 if self._cache[0] == node:
2143 2146 # _cache only stores rawtext
2144 2147 if raw:
2145 2148 return self._cache[2]
2146 2149 # duplicated, but good for perf
2147 2150 if rev is None:
2148 2151 rev = self.rev(node)
2149 2152 if flags is None:
2150 2153 flags = self.flags(rev)
2151 2154 # no extra flags set, no flag processor runs, text = rawtext
2152 2155 if flags == REVIDX_DEFAULT_FLAGS:
2153 2156 return self._cache[2]
2154 2157 # rawtext is reusable. need to run flag processor
2155 2158 rawtext = self._cache[2]
2156 2159
2157 2160 cachedrev = self._cache[1]
2158 2161
2159 2162 # look up what we need to read
2160 2163 if rawtext is None:
2161 2164 if rev is None:
2162 2165 rev = self.rev(node)
2163 2166
2164 2167 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2165 2168 if stopped:
2166 2169 rawtext = self._cache[2]
2167 2170
2168 2171 # drop cache to save memory
2169 2172 self._cache = None
2170 2173
2171 2174 targetsize = None
2172 2175 rawsize = self.index[rev][2]
2173 2176 if 0 <= rawsize:
2174 2177 targetsize = 4 * rawsize
2175 2178
2176 2179 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2177 2180 if rawtext is None:
2178 2181 rawtext = bytes(bins[0])
2179 2182 bins = bins[1:]
2180 2183
2181 2184 rawtext = mdiff.patches(rawtext, bins)
2182 2185 self._cache = (node, rev, rawtext)
2183 2186
2184 2187 if flags is None:
2185 2188 if rev is None:
2186 2189 rev = self.rev(node)
2187 2190 flags = self.flags(rev)
2188 2191
2189 2192 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
2190 2193 if validatehash:
2191 2194 self.checkhash(text, node, rev=rev)
2192 2195
2193 2196 return text
2194 2197
2195 2198 def hash(self, text, p1, p2):
2196 2199 """Compute a node hash.
2197 2200
2198 2201 Available as a function so that subclasses can replace the hash
2199 2202 as needed.
2200 2203 """
2201 2204 return hash(text, p1, p2)
2202 2205
2203 2206 def _processflags(self, text, flags, operation, raw=False):
2204 2207 """Inspect revision data flags and applies transforms defined by
2205 2208 registered flag processors.
2206 2209
2207 2210 ``text`` - the revision data to process
2208 2211 ``flags`` - the revision flags
2209 2212 ``operation`` - the operation being performed (read or write)
2210 2213 ``raw`` - an optional argument describing if the raw transform should be
2211 2214 applied.
2212 2215
2213 2216 This method processes the flags in the order (or reverse order if
2214 2217 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
2215 2218 flag processors registered for present flags. The order of flags defined
2216 2219 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
2217 2220
2218 2221 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
2219 2222 processed text and ``validatehash`` is a bool indicating whether the
2220 2223 returned text should be checked for hash integrity.
2221 2224
2222 2225 Note: If the ``raw`` argument is set, it has precedence over the
2223 2226 operation and will only update the value of ``validatehash``.
2224 2227 """
2225 2228 # fast path: no flag processors will run
2226 2229 if flags == 0:
2227 2230 return text, True
2228 2231 if not operation in ('read', 'write'):
2229 2232 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
2230 2233 # Check all flags are known.
2231 2234 if flags & ~REVIDX_KNOWN_FLAGS:
2232 2235 raise RevlogError(_("incompatible revision flag '%#x'") %
2233 2236 (flags & ~REVIDX_KNOWN_FLAGS))
2234 2237 validatehash = True
2235 2238 # Depending on the operation (read or write), the order might be
2236 2239 # reversed due to non-commutative transforms.
2237 2240 orderedflags = REVIDX_FLAGS_ORDER
2238 2241 if operation == 'write':
2239 2242 orderedflags = reversed(orderedflags)
2240 2243
2241 2244 for flag in orderedflags:
2242 2245 # If a flagprocessor has been registered for a known flag, apply the
2243 2246 # related operation transform and update result tuple.
2244 2247 if flag & flags:
2245 2248 vhash = True
2246 2249
2247 2250 if flag not in _flagprocessors:
2248 2251 message = _("missing processor for flag '%#x'") % (flag)
2249 2252 raise RevlogError(message)
2250 2253
2251 2254 processor = _flagprocessors[flag]
2252 2255 if processor is not None:
2253 2256 readtransform, writetransform, rawtransform = processor
2254 2257
2255 2258 if raw:
2256 2259 vhash = rawtransform(self, text)
2257 2260 elif operation == 'read':
2258 2261 text, vhash = readtransform(self, text)
2259 2262 else: # write operation
2260 2263 text, vhash = writetransform(self, text)
2261 2264 validatehash = validatehash and vhash
2262 2265
2263 2266 return text, validatehash
2264 2267
2265 2268 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2266 2269 """Check node hash integrity.
2267 2270
2268 2271 Available as a function so that subclasses can extend hash mismatch
2269 2272 behaviors as needed.
2270 2273 """
2271 2274 try:
2272 2275 if p1 is None and p2 is None:
2273 2276 p1, p2 = self.parents(node)
2274 2277 if node != self.hash(text, p1, p2):
2275 2278 revornode = rev
2276 2279 if revornode is None:
2277 2280 revornode = templatefilters.short(hex(node))
2278 2281 raise RevlogError(_("integrity check failed on %s:%s")
2279 2282 % (self.indexfile, pycompat.bytestr(revornode)))
2280 2283 except RevlogError:
2281 2284 if self._censorable and _censoredtext(text):
2282 2285 raise error.CensoredNodeError(self.indexfile, node, text)
2283 2286 raise
2284 2287
2285 2288 def _enforceinlinesize(self, tr, fp=None):
2286 2289 """Check if the revlog is too big for inline and convert if so.
2287 2290
2288 2291 This should be called after revisions are added to the revlog. If the
2289 2292 revlog has grown too large to be an inline revlog, it will convert it
2290 2293 to use multiple index and data files.
2291 2294 """
2292 2295 tiprev = len(self) - 1
2293 2296 if (not self._inline or
2294 2297 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
2295 2298 return
2296 2299
2297 2300 trinfo = tr.find(self.indexfile)
2298 2301 if trinfo is None:
2299 2302 raise RevlogError(_("%s not found in the transaction")
2300 2303 % self.indexfile)
2301 2304
2302 2305 trindex = trinfo[2]
2303 2306 if trindex is not None:
2304 2307 dataoff = self.start(trindex)
2305 2308 else:
2306 2309 # revlog was stripped at start of transaction, use all leftover data
2307 2310 trindex = len(self) - 1
2308 2311 dataoff = self.end(tiprev)
2309 2312
2310 2313 tr.add(self.datafile, dataoff)
2311 2314
2312 2315 if fp:
2313 2316 fp.flush()
2314 2317 fp.close()
2315 2318
2316 2319 with self._datafp('w') as df:
2317 2320 for r in self:
2318 2321 df.write(self._getsegmentforrevs(r, r)[1])
2319 2322
2320 2323 with self._indexfp('w') as fp:
2321 2324 self.version &= ~FLAG_INLINE_DATA
2322 2325 self._inline = False
2323 2326 io = self._io
2324 2327 for i in self:
2325 2328 e = io.packentry(self.index[i], self.node, self.version, i)
2326 2329 fp.write(e)
2327 2330
2328 2331 # the temp file replace the real index when we exit the context
2329 2332 # manager
2330 2333
2331 2334 tr.replace(self.indexfile, trindex * self._io.size)
2332 2335 self._chunkclear()
2333 2336
2334 2337 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
2335 2338 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
2336 2339 """add a revision to the log
2337 2340
2338 2341 text - the revision data to add
2339 2342 transaction - the transaction object used for rollback
2340 2343 link - the linkrev data to add
2341 2344 p1, p2 - the parent nodeids of the revision
2342 2345 cachedelta - an optional precomputed delta
2343 2346 node - nodeid of revision; typically node is not specified, and it is
2344 2347 computed by default as hash(text, p1, p2), however subclasses might
2345 2348 use different hashing method (and override checkhash() in such case)
2346 2349 flags - the known flags to set on the revision
2347 2350 deltacomputer - an optional _deltacomputer instance shared between
2348 2351 multiple calls
2349 2352 """
2350 2353 if link == nullrev:
2351 2354 raise RevlogError(_("attempted to add linkrev -1 to %s")
2352 2355 % self.indexfile)
2353 2356
2354 2357 if flags:
2355 2358 node = node or self.hash(text, p1, p2)
2356 2359
2357 2360 rawtext, validatehash = self._processflags(text, flags, 'write')
2358 2361
2359 2362 # If the flag processor modifies the revision data, ignore any provided
2360 2363 # cachedelta.
2361 2364 if rawtext != text:
2362 2365 cachedelta = None
2363 2366
2364 2367 if len(rawtext) > _maxentrysize:
2365 2368 raise RevlogError(
2366 2369 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2367 2370 % (self.indexfile, len(rawtext)))
2368 2371
2369 2372 node = node or self.hash(rawtext, p1, p2)
2370 2373 if node in self.nodemap:
2371 2374 return node
2372 2375
2373 2376 if validatehash:
2374 2377 self.checkhash(rawtext, node, p1=p1, p2=p2)
2375 2378
2376 2379 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2377 2380 flags, cachedelta=cachedelta,
2378 2381 deltacomputer=deltacomputer)
2379 2382
2380 2383 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2381 2384 cachedelta=None, deltacomputer=None):
2382 2385 """add a raw revision with known flags, node and parents
2383 2386 useful when reusing a revision not stored in this revlog (ex: received
2384 2387 over wire, or read from an external bundle).
2385 2388 """
2386 2389 dfh = None
2387 2390 if not self._inline:
2388 2391 dfh = self._datafp("a+")
2389 2392 ifh = self._indexfp("a+")
2390 2393 try:
2391 2394 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2392 2395 flags, cachedelta, ifh, dfh,
2393 2396 deltacomputer=deltacomputer)
2394 2397 finally:
2395 2398 if dfh:
2396 2399 dfh.close()
2397 2400 ifh.close()
2398 2401
2399 2402 def compress(self, data):
2400 2403 """Generate a possibly-compressed representation of data."""
2401 2404 if not data:
2402 2405 return '', data
2403 2406
2404 2407 compressed = self._compressor.compress(data)
2405 2408
2406 2409 if compressed:
2407 2410 # The revlog compressor added the header in the returned data.
2408 2411 return '', compressed
2409 2412
2410 2413 if data[0:1] == '\0':
2411 2414 return '', data
2412 2415 return 'u', data
2413 2416
2414 2417 def decompress(self, data):
2415 2418 """Decompress a revlog chunk.
2416 2419
2417 2420 The chunk is expected to begin with a header identifying the
2418 2421 format type so it can be routed to an appropriate decompressor.
2419 2422 """
2420 2423 if not data:
2421 2424 return data
2422 2425
2423 2426 # Revlogs are read much more frequently than they are written and many
2424 2427 # chunks only take microseconds to decompress, so performance is
2425 2428 # important here.
2426 2429 #
2427 2430 # We can make a few assumptions about revlogs:
2428 2431 #
2429 2432 # 1) the majority of chunks will be compressed (as opposed to inline
2430 2433 # raw data).
2431 2434 # 2) decompressing *any* data will likely by at least 10x slower than
2432 2435 # returning raw inline data.
2433 2436 # 3) we want to prioritize common and officially supported compression
2434 2437 # engines
2435 2438 #
2436 2439 # It follows that we want to optimize for "decompress compressed data
2437 2440 # when encoded with common and officially supported compression engines"
2438 2441 # case over "raw data" and "data encoded by less common or non-official
2439 2442 # compression engines." That is why we have the inline lookup first
2440 2443 # followed by the compengines lookup.
2441 2444 #
2442 2445 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2443 2446 # compressed chunks. And this matters for changelog and manifest reads.
2444 2447 t = data[0:1]
2445 2448
2446 2449 if t == 'x':
2447 2450 try:
2448 2451 return _zlibdecompress(data)
2449 2452 except zlib.error as e:
2450 2453 raise RevlogError(_('revlog decompress error: %s') %
2451 2454 stringutil.forcebytestr(e))
2452 2455 # '\0' is more common than 'u' so it goes first.
2453 2456 elif t == '\0':
2454 2457 return data
2455 2458 elif t == 'u':
2456 2459 return util.buffer(data, 1)
2457 2460
2458 2461 try:
2459 2462 compressor = self._decompressors[t]
2460 2463 except KeyError:
2461 2464 try:
2462 2465 engine = util.compengines.forrevlogheader(t)
2463 2466 compressor = engine.revlogcompressor()
2464 2467 self._decompressors[t] = compressor
2465 2468 except KeyError:
2466 2469 raise RevlogError(_('unknown compression type %r') % t)
2467 2470
2468 2471 return compressor.decompress(data)
2469 2472
2470 2473 def _isgooddeltainfo(self, deltainfo, revinfo):
2471 2474 """Returns True if the given delta is good. Good means that it is within
2472 2475 the disk span, disk size, and chain length bounds that we know to be
2473 2476 performant."""
2474 2477 if deltainfo is None:
2475 2478 return False
2476 2479
2477 2480 # - 'deltainfo.distance' is the distance from the base revision --
2478 2481 # bounding it limits the amount of I/O we need to do.
2479 2482 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2480 2483 # deltas we need to apply -- bounding it limits the amount of CPU
2481 2484 # we consume.
2482 2485
2483 2486 if self._sparserevlog:
2484 2487 # As sparse-read will be used, we can consider that the distance,
2485 2488 # instead of being the span of the whole chunk,
2486 2489 # is the span of the largest read chunk
2487 2490 base = deltainfo.base
2488 2491
2489 2492 if base != nullrev:
2490 2493 deltachain = self._deltachain(base)[0]
2491 2494 else:
2492 2495 deltachain = []
2493 2496
2494 2497 chunks = _slicechunk(self, deltachain, deltainfo)
2495 2498 all_span = [_segmentspan(self, revs, deltainfo) for revs in chunks]
2496 2499 distance = max(all_span)
2497 2500 else:
2498 2501 distance = deltainfo.distance
2499 2502
2500 2503 textlen = revinfo.textlen
2501 2504 defaultmax = textlen * 4
2502 2505 maxdist = self._maxdeltachainspan
2503 2506 if not maxdist:
2504 2507 maxdist = distance # ensure the conditional pass
2505 2508 maxdist = max(maxdist, defaultmax)
2506 2509 if self._sparserevlog and maxdist < self._srmingapsize:
2507 2510 # In multiple place, we are ignoring irrelevant data range below a
2508 2511 # certain size. Be also apply this tradeoff here and relax span
2509 2512 # constraint for small enought content.
2510 2513 maxdist = self._srmingapsize
2511 2514
2512 2515 # Bad delta from read span:
2513 2516 #
2514 2517 # If the span of data read is larger than the maximum allowed.
2515 2518 if maxdist < distance:
2516 2519 return False
2517 2520
2518 2521 # Bad delta from new delta size:
2519 2522 #
2520 2523 # If the delta size is larger than the target text, storing the
2521 2524 # delta will be inefficient.
2522 2525 if textlen < deltainfo.deltalen:
2523 2526 return False
2524 2527
2525 2528 # Bad delta from cumulated payload size:
2526 2529 #
2527 2530 # If the sum of delta get larger than K * target text length.
2528 2531 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
2529 2532 return False
2530 2533
2531 2534 # Bad delta from chain length:
2532 2535 #
2533 2536 # If the number of delta in the chain gets too high.
2534 2537 if self._maxchainlen and self._maxchainlen < deltainfo.chainlen:
2535 2538 return False
2536 2539
2537 2540 return True
2538 2541
2539 2542 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2540 2543 cachedelta, ifh, dfh, alwayscache=False,
2541 2544 deltacomputer=None):
2542 2545 """internal function to add revisions to the log
2543 2546
2544 2547 see addrevision for argument descriptions.
2545 2548
2546 2549 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2547 2550
2548 2551 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2549 2552 be used.
2550 2553
2551 2554 invariants:
2552 2555 - rawtext is optional (can be None); if not set, cachedelta must be set.
2553 2556 if both are set, they must correspond to each other.
2554 2557 """
2555 2558 if node == nullid:
2556 2559 raise RevlogError(_("%s: attempt to add null revision") %
2557 2560 (self.indexfile))
2558 2561 if node == wdirid or node in wdirfilenodeids:
2559 2562 raise RevlogError(_("%s: attempt to add wdir revision") %
2560 2563 (self.indexfile))
2561 2564
2562 2565 if self._inline:
2563 2566 fh = ifh
2564 2567 else:
2565 2568 fh = dfh
2566 2569
2567 2570 btext = [rawtext]
2568 2571
2569 2572 curr = len(self)
2570 2573 prev = curr - 1
2571 2574 offset = self.end(prev)
2572 2575 p1r, p2r = self.rev(p1), self.rev(p2)
2573 2576
2574 2577 # full versions are inserted when the needed deltas
2575 2578 # become comparable to the uncompressed text
2576 2579 if rawtext is None:
2577 2580 # need rawtext size, before changed by flag processors, which is
2578 2581 # the non-raw size. use revlog explicitly to avoid filelog's extra
2579 2582 # logic that might remove metadata size.
2580 2583 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2581 2584 cachedelta[1])
2582 2585 else:
2583 2586 textlen = len(rawtext)
2584 2587
2585 2588 if deltacomputer is None:
2586 2589 deltacomputer = _deltacomputer(self)
2587 2590
2588 2591 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2589 2592
2590 2593 # no delta for flag processor revision (see "candelta" for why)
2591 2594 # not calling candelta since only one revision needs test, also to
2592 2595 # avoid overhead fetching flags again.
2593 2596 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2594 2597 deltainfo = None
2595 2598 else:
2596 2599 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2597 2600
2598 2601 if deltainfo is not None:
2599 2602 base = deltainfo.base
2600 2603 chainbase = deltainfo.chainbase
2601 2604 data = deltainfo.data
2602 2605 l = deltainfo.deltalen
2603 2606 else:
2604 2607 rawtext = deltacomputer.buildtext(revinfo, fh)
2605 2608 data = self.compress(rawtext)
2606 2609 l = len(data[1]) + len(data[0])
2607 2610 base = chainbase = curr
2608 2611
2609 2612 e = (offset_type(offset, flags), l, textlen,
2610 2613 base, link, p1r, p2r, node)
2611 2614 self.index.append(e)
2612 2615 self.nodemap[node] = curr
2613 2616
2614 2617 entry = self._io.packentry(e, self.node, self.version, curr)
2615 2618 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2616 2619
2617 2620 if alwayscache and rawtext is None:
2618 2621 rawtext = deltacomputer._buildtext(revinfo, fh)
2619 2622
2620 2623 if type(rawtext) == bytes: # only accept immutable objects
2621 2624 self._cache = (node, curr, rawtext)
2622 2625 self._chainbasecache[curr] = chainbase
2623 2626 return node
2624 2627
2625 2628 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2626 2629 # Files opened in a+ mode have inconsistent behavior on various
2627 2630 # platforms. Windows requires that a file positioning call be made
2628 2631 # when the file handle transitions between reads and writes. See
2629 2632 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2630 2633 # platforms, Python or the platform itself can be buggy. Some versions
2631 2634 # of Solaris have been observed to not append at the end of the file
2632 2635 # if the file was seeked to before the end. See issue4943 for more.
2633 2636 #
2634 2637 # We work around this issue by inserting a seek() before writing.
2635 2638 # Note: This is likely not necessary on Python 3.
2636 2639 ifh.seek(0, os.SEEK_END)
2637 2640 if dfh:
2638 2641 dfh.seek(0, os.SEEK_END)
2639 2642
2640 2643 curr = len(self) - 1
2641 2644 if not self._inline:
2642 2645 transaction.add(self.datafile, offset)
2643 2646 transaction.add(self.indexfile, curr * len(entry))
2644 2647 if data[0]:
2645 2648 dfh.write(data[0])
2646 2649 dfh.write(data[1])
2647 2650 ifh.write(entry)
2648 2651 else:
2649 2652 offset += curr * self._io.size
2650 2653 transaction.add(self.indexfile, offset, curr)
2651 2654 ifh.write(entry)
2652 2655 ifh.write(data[0])
2653 2656 ifh.write(data[1])
2654 2657 self._enforceinlinesize(transaction, ifh)
2655 2658
2656 2659 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2657 2660 """
2658 2661 add a delta group
2659 2662
2660 2663 given a set of deltas, add them to the revision log. the
2661 2664 first delta is against its parent, which should be in our
2662 2665 log, the rest are against the previous delta.
2663 2666
2664 2667 If ``addrevisioncb`` is defined, it will be called with arguments of
2665 2668 this revlog and the node that was added.
2666 2669 """
2667 2670
2668 2671 nodes = []
2669 2672
2670 2673 r = len(self)
2671 2674 end = 0
2672 2675 if r:
2673 2676 end = self.end(r - 1)
2674 2677 ifh = self._indexfp("a+")
2675 2678 isize = r * self._io.size
2676 2679 if self._inline:
2677 2680 transaction.add(self.indexfile, end + isize, r)
2678 2681 dfh = None
2679 2682 else:
2680 2683 transaction.add(self.indexfile, isize, r)
2681 2684 transaction.add(self.datafile, end)
2682 2685 dfh = self._datafp("a+")
2683 2686 def flush():
2684 2687 if dfh:
2685 2688 dfh.flush()
2686 2689 ifh.flush()
2687 2690 try:
2688 2691 deltacomputer = _deltacomputer(self)
2689 2692 # loop through our set of deltas
2690 2693 for data in deltas:
2691 2694 node, p1, p2, linknode, deltabase, delta, flags = data
2692 2695 link = linkmapper(linknode)
2693 2696 flags = flags or REVIDX_DEFAULT_FLAGS
2694 2697
2695 2698 nodes.append(node)
2696 2699
2697 2700 if node in self.nodemap:
2698 2701 # this can happen if two branches make the same change
2699 2702 continue
2700 2703
2701 2704 for p in (p1, p2):
2702 2705 if p not in self.nodemap:
2703 2706 raise LookupError(p, self.indexfile,
2704 2707 _('unknown parent'))
2705 2708
2706 2709 if deltabase not in self.nodemap:
2707 2710 raise LookupError(deltabase, self.indexfile,
2708 2711 _('unknown delta base'))
2709 2712
2710 2713 baserev = self.rev(deltabase)
2711 2714
2712 2715 if baserev != nullrev and self.iscensored(baserev):
2713 2716 # if base is censored, delta must be full replacement in a
2714 2717 # single patch operation
2715 2718 hlen = struct.calcsize(">lll")
2716 2719 oldlen = self.rawsize(baserev)
2717 2720 newlen = len(delta) - hlen
2718 2721 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2719 2722 raise error.CensoredBaseError(self.indexfile,
2720 2723 self.node(baserev))
2721 2724
2722 2725 if not flags and self._peek_iscensored(baserev, delta, flush):
2723 2726 flags |= REVIDX_ISCENSORED
2724 2727
2725 2728 # We assume consumers of addrevisioncb will want to retrieve
2726 2729 # the added revision, which will require a call to
2727 2730 # revision(). revision() will fast path if there is a cache
2728 2731 # hit. So, we tell _addrevision() to always cache in this case.
2729 2732 # We're only using addgroup() in the context of changegroup
2730 2733 # generation so the revision data can always be handled as raw
2731 2734 # by the flagprocessor.
2732 2735 self._addrevision(node, None, transaction, link,
2733 2736 p1, p2, flags, (baserev, delta),
2734 2737 ifh, dfh,
2735 2738 alwayscache=bool(addrevisioncb),
2736 2739 deltacomputer=deltacomputer)
2737 2740
2738 2741 if addrevisioncb:
2739 2742 addrevisioncb(self, node)
2740 2743
2741 2744 if not dfh and not self._inline:
2742 2745 # addrevision switched from inline to conventional
2743 2746 # reopen the index
2744 2747 ifh.close()
2745 2748 dfh = self._datafp("a+")
2746 2749 ifh = self._indexfp("a+")
2747 2750 finally:
2748 2751 if dfh:
2749 2752 dfh.close()
2750 2753 ifh.close()
2751 2754
2752 2755 return nodes
2753 2756
2754 2757 def iscensored(self, rev):
2755 2758 """Check if a file revision is censored."""
2756 2759 if not self._censorable:
2757 2760 return False
2758 2761
2759 2762 return self.flags(rev) & REVIDX_ISCENSORED
2760 2763
2761 2764 def _peek_iscensored(self, baserev, delta, flush):
2762 2765 """Quickly check if a delta produces a censored revision."""
2763 2766 if not self._censorable:
2764 2767 return False
2765 2768
2766 2769 # Fragile heuristic: unless new file meta keys are added alphabetically
2767 2770 # preceding "censored", all censored revisions are prefixed by
2768 2771 # "\1\ncensored:". A delta producing such a censored revision must be a
2769 2772 # full-replacement delta, so we inspect the first and only patch in the
2770 2773 # delta for this prefix.
2771 2774 hlen = struct.calcsize(">lll")
2772 2775 if len(delta) <= hlen:
2773 2776 return False
2774 2777
2775 2778 oldlen = self.rawsize(baserev)
2776 2779 newlen = len(delta) - hlen
2777 2780 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2778 2781 return False
2779 2782
2780 2783 add = "\1\ncensored:"
2781 2784 addlen = len(add)
2782 2785 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2783 2786
2784 2787 def getstrippoint(self, minlink):
2785 2788 """find the minimum rev that must be stripped to strip the linkrev
2786 2789
2787 2790 Returns a tuple containing the minimum rev and a set of all revs that
2788 2791 have linkrevs that will be broken by this strip.
2789 2792 """
2790 2793 brokenrevs = set()
2791 2794 strippoint = len(self)
2792 2795
2793 2796 heads = {}
2794 2797 futurelargelinkrevs = set()
2795 2798 for head in self.headrevs():
2796 2799 headlinkrev = self.linkrev(head)
2797 2800 heads[head] = headlinkrev
2798 2801 if headlinkrev >= minlink:
2799 2802 futurelargelinkrevs.add(headlinkrev)
2800 2803
2801 2804 # This algorithm involves walking down the rev graph, starting at the
2802 2805 # heads. Since the revs are topologically sorted according to linkrev,
2803 2806 # once all head linkrevs are below the minlink, we know there are
2804 2807 # no more revs that could have a linkrev greater than minlink.
2805 2808 # So we can stop walking.
2806 2809 while futurelargelinkrevs:
2807 2810 strippoint -= 1
2808 2811 linkrev = heads.pop(strippoint)
2809 2812
2810 2813 if linkrev < minlink:
2811 2814 brokenrevs.add(strippoint)
2812 2815 else:
2813 2816 futurelargelinkrevs.remove(linkrev)
2814 2817
2815 2818 for p in self.parentrevs(strippoint):
2816 2819 if p != nullrev:
2817 2820 plinkrev = self.linkrev(p)
2818 2821 heads[p] = plinkrev
2819 2822 if plinkrev >= minlink:
2820 2823 futurelargelinkrevs.add(plinkrev)
2821 2824
2822 2825 return strippoint, brokenrevs
2823 2826
2824 2827 def strip(self, minlink, transaction):
2825 2828 """truncate the revlog on the first revision with a linkrev >= minlink
2826 2829
2827 2830 This function is called when we're stripping revision minlink and
2828 2831 its descendants from the repository.
2829 2832
2830 2833 We have to remove all revisions with linkrev >= minlink, because
2831 2834 the equivalent changelog revisions will be renumbered after the
2832 2835 strip.
2833 2836
2834 2837 So we truncate the revlog on the first of these revisions, and
2835 2838 trust that the caller has saved the revisions that shouldn't be
2836 2839 removed and that it'll re-add them after this truncation.
2837 2840 """
2838 2841 if len(self) == 0:
2839 2842 return
2840 2843
2841 2844 rev, _ = self.getstrippoint(minlink)
2842 2845 if rev == len(self):
2843 2846 return
2844 2847
2845 2848 # first truncate the files on disk
2846 2849 end = self.start(rev)
2847 2850 if not self._inline:
2848 2851 transaction.add(self.datafile, end)
2849 2852 end = rev * self._io.size
2850 2853 else:
2851 2854 end += rev * self._io.size
2852 2855
2853 2856 transaction.add(self.indexfile, end)
2854 2857
2855 2858 # then reset internal state in memory to forget those revisions
2856 2859 self._cache = None
2857 2860 self._chaininfocache = {}
2858 2861 self._chunkclear()
2859 2862 for x in pycompat.xrange(rev, len(self)):
2860 2863 del self.nodemap[self.node(x)]
2861 2864
2862 2865 del self.index[rev:-1]
2863 2866 self._nodepos = None
2864 2867
2865 2868 def checksize(self):
2866 2869 expected = 0
2867 2870 if len(self):
2868 2871 expected = max(0, self.end(len(self) - 1))
2869 2872
2870 2873 try:
2871 2874 with self._datafp() as f:
2872 2875 f.seek(0, 2)
2873 2876 actual = f.tell()
2874 2877 dd = actual - expected
2875 2878 except IOError as inst:
2876 2879 if inst.errno != errno.ENOENT:
2877 2880 raise
2878 2881 dd = 0
2879 2882
2880 2883 try:
2881 2884 f = self.opener(self.indexfile)
2882 2885 f.seek(0, 2)
2883 2886 actual = f.tell()
2884 2887 f.close()
2885 2888 s = self._io.size
2886 2889 i = max(0, actual // s)
2887 2890 di = actual - (i * s)
2888 2891 if self._inline:
2889 2892 databytes = 0
2890 2893 for r in self:
2891 2894 databytes += max(0, self.length(r))
2892 2895 dd = 0
2893 2896 di = actual - len(self) * s - databytes
2894 2897 except IOError as inst:
2895 2898 if inst.errno != errno.ENOENT:
2896 2899 raise
2897 2900 di = 0
2898 2901
2899 2902 return (dd, di)
2900 2903
2901 2904 def files(self):
2902 2905 res = [self.indexfile]
2903 2906 if not self._inline:
2904 2907 res.append(self.datafile)
2905 2908 return res
2906 2909
2907 2910 DELTAREUSEALWAYS = 'always'
2908 2911 DELTAREUSESAMEREVS = 'samerevs'
2909 2912 DELTAREUSENEVER = 'never'
2910 2913
2911 2914 DELTAREUSEFULLADD = 'fulladd'
2912 2915
2913 2916 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2914 2917
2915 2918 def clone(self, tr, destrevlog, addrevisioncb=None,
2916 2919 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2917 2920 """Copy this revlog to another, possibly with format changes.
2918 2921
2919 2922 The destination revlog will contain the same revisions and nodes.
2920 2923 However, it may not be bit-for-bit identical due to e.g. delta encoding
2921 2924 differences.
2922 2925
2923 2926 The ``deltareuse`` argument control how deltas from the existing revlog
2924 2927 are preserved in the destination revlog. The argument can have the
2925 2928 following values:
2926 2929
2927 2930 DELTAREUSEALWAYS
2928 2931 Deltas will always be reused (if possible), even if the destination
2929 2932 revlog would not select the same revisions for the delta. This is the
2930 2933 fastest mode of operation.
2931 2934 DELTAREUSESAMEREVS
2932 2935 Deltas will be reused if the destination revlog would pick the same
2933 2936 revisions for the delta. This mode strikes a balance between speed
2934 2937 and optimization.
2935 2938 DELTAREUSENEVER
2936 2939 Deltas will never be reused. This is the slowest mode of execution.
2937 2940 This mode can be used to recompute deltas (e.g. if the diff/delta
2938 2941 algorithm changes).
2939 2942
2940 2943 Delta computation can be slow, so the choice of delta reuse policy can
2941 2944 significantly affect run time.
2942 2945
2943 2946 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2944 2947 two extremes. Deltas will be reused if they are appropriate. But if the
2945 2948 delta could choose a better revision, it will do so. This means if you
2946 2949 are converting a non-generaldelta revlog to a generaldelta revlog,
2947 2950 deltas will be recomputed if the delta's parent isn't a parent of the
2948 2951 revision.
2949 2952
2950 2953 In addition to the delta policy, the ``deltabothparents`` argument
2951 2954 controls whether to compute deltas against both parents for merges.
2952 2955 By default, the current default is used.
2953 2956 """
2954 2957 if deltareuse not in self.DELTAREUSEALL:
2955 2958 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2956 2959
2957 2960 if len(destrevlog):
2958 2961 raise ValueError(_('destination revlog is not empty'))
2959 2962
2960 2963 if getattr(self, 'filteredrevs', None):
2961 2964 raise ValueError(_('source revlog has filtered revisions'))
2962 2965 if getattr(destrevlog, 'filteredrevs', None):
2963 2966 raise ValueError(_('destination revlog has filtered revisions'))
2964 2967
2965 2968 # lazydeltabase controls whether to reuse a cached delta, if possible.
2966 2969 oldlazydeltabase = destrevlog._lazydeltabase
2967 2970 oldamd = destrevlog._deltabothparents
2968 2971
2969 2972 try:
2970 2973 if deltareuse == self.DELTAREUSEALWAYS:
2971 2974 destrevlog._lazydeltabase = True
2972 2975 elif deltareuse == self.DELTAREUSESAMEREVS:
2973 2976 destrevlog._lazydeltabase = False
2974 2977
2975 2978 destrevlog._deltabothparents = deltabothparents or oldamd
2976 2979
2977 2980 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2978 2981 self.DELTAREUSESAMEREVS)
2979 2982
2980 2983 deltacomputer = _deltacomputer(destrevlog)
2981 2984 index = self.index
2982 2985 for rev in self:
2983 2986 entry = index[rev]
2984 2987
2985 2988 # Some classes override linkrev to take filtered revs into
2986 2989 # account. Use raw entry from index.
2987 2990 flags = entry[0] & 0xffff
2988 2991 linkrev = entry[4]
2989 2992 p1 = index[entry[5]][7]
2990 2993 p2 = index[entry[6]][7]
2991 2994 node = entry[7]
2992 2995
2993 2996 # (Possibly) reuse the delta from the revlog if allowed and
2994 2997 # the revlog chunk is a delta.
2995 2998 cachedelta = None
2996 2999 rawtext = None
2997 3000 if populatecachedelta:
2998 3001 dp = self.deltaparent(rev)
2999 3002 if dp != nullrev:
3000 3003 cachedelta = (dp, bytes(self._chunk(rev)))
3001 3004
3002 3005 if not cachedelta:
3003 3006 rawtext = self.revision(rev, raw=True)
3004 3007
3005 3008
3006 3009 if deltareuse == self.DELTAREUSEFULLADD:
3007 3010 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
3008 3011 cachedelta=cachedelta,
3009 3012 node=node, flags=flags,
3010 3013 deltacomputer=deltacomputer)
3011 3014 else:
3012 3015 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
3013 3016 checkambig=False)
3014 3017 dfh = None
3015 3018 if not destrevlog._inline:
3016 3019 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
3017 3020 try:
3018 3021 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
3019 3022 p2, flags, cachedelta, ifh, dfh,
3020 3023 deltacomputer=deltacomputer)
3021 3024 finally:
3022 3025 if dfh:
3023 3026 dfh.close()
3024 3027 ifh.close()
3025 3028
3026 3029 if addrevisioncb:
3027 3030 addrevisioncb(self, rev, node)
3028 3031 finally:
3029 3032 destrevlog._lazydeltabase = oldlazydeltabase
3030 3033 destrevlog._deltabothparents = oldamd
General Comments 0
You need to be logged in to leave comments. Login now