##// END OF EJS Templates
revlog: remove legacy usage of `_maxchainlen`...
marmoute -
r51945:e80e2d61 default
parent child Browse files
Show More
@@ -1,1624 +1,1630 b''
1 1 # revlogdeltas.py - Logic around delta computation for revlog
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 # Copyright 2018 Octobus <contact@octobus.net>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8 """Helper class to compute deltas stored inside revlogs"""
9 9
10 10
11 11 import collections
12 12 import struct
13 13
14 14 # import stuff from node for others to import from revlog
15 15 from ..node import nullrev
16 16 from ..i18n import _
17 17
18 18 from .constants import (
19 19 COMP_MODE_DEFAULT,
20 20 COMP_MODE_INLINE,
21 21 COMP_MODE_PLAIN,
22 22 DELTA_BASE_REUSE_FORCE,
23 23 DELTA_BASE_REUSE_NO,
24 24 KIND_CHANGELOG,
25 25 KIND_FILELOG,
26 26 KIND_MANIFESTLOG,
27 27 REVIDX_ISCENSORED,
28 28 REVIDX_RAWTEXT_CHANGING_FLAGS,
29 29 )
30 30
31 31 from ..thirdparty import attr
32 32
33 33 from .. import (
34 34 error,
35 35 mdiff,
36 36 util,
37 37 )
38 38
39 39 from . import flagutil
40 40
41 41 # maximum <delta-chain-data>/<revision-text-length> ratio
42 42 LIMIT_DELTA2TEXT = 2
43 43
44 44
45 45 class _testrevlog:
46 46 """minimalist fake revlog to use in doctests"""
47 47
48 48 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
49 49 """data is an list of revision payload boundaries"""
50 50 from .. import revlog
51 51
52 52 self._data = data
53 53 self._srdensitythreshold = density
54 54 self._srmingapsize = mingap
55 55 self.data_config = revlog.DataConfig()
56 56 self.data_config.sr_density_threshold = density
57 57 self.data_config.sr_min_gap_size = mingap
58 58 self.delta_config = revlog.DeltaConfig()
59 59 self.feature_config = revlog.FeatureConfig()
60 60 self._snapshot = set(snapshot)
61 61 self.index = None
62 62
63 63 def start(self, rev):
64 64 if rev == nullrev:
65 65 return 0
66 66 if rev == 0:
67 67 return 0
68 68 return self._data[rev - 1]
69 69
70 70 def end(self, rev):
71 71 if rev == nullrev:
72 72 return 0
73 73 return self._data[rev]
74 74
75 75 def length(self, rev):
76 76 return self.end(rev) - self.start(rev)
77 77
78 78 def __len__(self):
79 79 return len(self._data)
80 80
81 81 def issnapshot(self, rev):
82 82 if rev == nullrev:
83 83 return True
84 84 return rev in self._snapshot
85 85
86 86
87 87 def slicechunk(revlog, revs, targetsize=None):
88 88 """slice revs to reduce the amount of unrelated data to be read from disk.
89 89
90 90 ``revs`` is sliced into groups that should be read in one time.
91 91 Assume that revs are sorted.
92 92
93 93 The initial chunk is sliced until the overall density (payload/chunks-span
94 94 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
95 95 `revlog._srmingapsize` is skipped.
96 96
97 97 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
98 98 For consistency with other slicing choice, this limit won't go lower than
99 99 `revlog._srmingapsize`.
100 100
101 101 If individual revisions chunk are larger than this limit, they will still
102 102 be raised individually.
103 103
104 104 >>> data = [
105 105 ... 5, #00 (5)
106 106 ... 10, #01 (5)
107 107 ... 12, #02 (2)
108 108 ... 12, #03 (empty)
109 109 ... 27, #04 (15)
110 110 ... 31, #05 (4)
111 111 ... 31, #06 (empty)
112 112 ... 42, #07 (11)
113 113 ... 47, #08 (5)
114 114 ... 47, #09 (empty)
115 115 ... 48, #10 (1)
116 116 ... 51, #11 (3)
117 117 ... 74, #12 (23)
118 118 ... 85, #13 (11)
119 119 ... 86, #14 (1)
120 120 ... 91, #15 (5)
121 121 ... ]
122 122 >>> revlog = _testrevlog(data, snapshot=range(16))
123 123
124 124 >>> list(slicechunk(revlog, list(range(16))))
125 125 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
126 126 >>> list(slicechunk(revlog, [0, 15]))
127 127 [[0], [15]]
128 128 >>> list(slicechunk(revlog, [0, 11, 15]))
129 129 [[0], [11], [15]]
130 130 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
131 131 [[0], [11, 13, 15]]
132 132 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
133 133 [[1, 2], [5, 8, 10, 11], [14]]
134 134
135 135 Slicing with a maximum chunk size
136 136 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
137 137 [[0], [11], [13], [15]]
138 138 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
139 139 [[0], [11], [13, 15]]
140 140
141 141 Slicing involving nullrev
142 142 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
143 143 [[-1, 0], [11], [13, 15]]
144 144 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
145 145 [[-1], [13], [15]]
146 146 """
147 147 if targetsize is not None:
148 148 targetsize = max(targetsize, revlog._srmingapsize)
149 149 # targetsize should not be specified when evaluating delta candidates:
150 150 # * targetsize is used to ensure we stay within specification when reading,
151 151 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
152 152 if densityslicing is None:
153 153 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
154 154 for chunk in densityslicing(
155 155 revs, revlog._srdensitythreshold, revlog._srmingapsize
156 156 ):
157 157 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
158 158 yield subchunk
159 159
160 160
161 161 def _slicechunktosize(revlog, revs, targetsize=None):
162 162 """slice revs to match the target size
163 163
164 164 This is intended to be used on chunk that density slicing selected by that
165 165 are still too large compared to the read garantee of revlog. This might
166 166 happens when "minimal gap size" interrupted the slicing or when chain are
167 167 built in a way that create large blocks next to each other.
168 168
169 169 >>> data = [
170 170 ... 3, #0 (3)
171 171 ... 5, #1 (2)
172 172 ... 6, #2 (1)
173 173 ... 8, #3 (2)
174 174 ... 8, #4 (empty)
175 175 ... 11, #5 (3)
176 176 ... 12, #6 (1)
177 177 ... 13, #7 (1)
178 178 ... 14, #8 (1)
179 179 ... ]
180 180
181 181 == All snapshots cases ==
182 182 >>> revlog = _testrevlog(data, snapshot=range(9))
183 183
184 184 Cases where chunk is already small enough
185 185 >>> list(_slicechunktosize(revlog, [0], 3))
186 186 [[0]]
187 187 >>> list(_slicechunktosize(revlog, [6, 7], 3))
188 188 [[6, 7]]
189 189 >>> list(_slicechunktosize(revlog, [0], None))
190 190 [[0]]
191 191 >>> list(_slicechunktosize(revlog, [6, 7], None))
192 192 [[6, 7]]
193 193
194 194 cases where we need actual slicing
195 195 >>> list(_slicechunktosize(revlog, [0, 1], 3))
196 196 [[0], [1]]
197 197 >>> list(_slicechunktosize(revlog, [1, 3], 3))
198 198 [[1], [3]]
199 199 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
200 200 [[1, 2], [3]]
201 201 >>> list(_slicechunktosize(revlog, [3, 5], 3))
202 202 [[3], [5]]
203 203 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
204 204 [[3], [5]]
205 205 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
206 206 [[5], [6, 7, 8]]
207 207 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
208 208 [[0], [1, 2], [3], [5], [6, 7, 8]]
209 209
210 210 Case with too large individual chunk (must return valid chunk)
211 211 >>> list(_slicechunktosize(revlog, [0, 1], 2))
212 212 [[0], [1]]
213 213 >>> list(_slicechunktosize(revlog, [1, 3], 1))
214 214 [[1], [3]]
215 215 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
216 216 [[3], [5]]
217 217
218 218 == No Snapshot cases ==
219 219 >>> revlog = _testrevlog(data)
220 220
221 221 Cases where chunk is already small enough
222 222 >>> list(_slicechunktosize(revlog, [0], 3))
223 223 [[0]]
224 224 >>> list(_slicechunktosize(revlog, [6, 7], 3))
225 225 [[6, 7]]
226 226 >>> list(_slicechunktosize(revlog, [0], None))
227 227 [[0]]
228 228 >>> list(_slicechunktosize(revlog, [6, 7], None))
229 229 [[6, 7]]
230 230
231 231 cases where we need actual slicing
232 232 >>> list(_slicechunktosize(revlog, [0, 1], 3))
233 233 [[0], [1]]
234 234 >>> list(_slicechunktosize(revlog, [1, 3], 3))
235 235 [[1], [3]]
236 236 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
237 237 [[1], [2, 3]]
238 238 >>> list(_slicechunktosize(revlog, [3, 5], 3))
239 239 [[3], [5]]
240 240 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
241 241 [[3], [4, 5]]
242 242 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
243 243 [[5], [6, 7, 8]]
244 244 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
245 245 [[0], [1, 2], [3], [5], [6, 7, 8]]
246 246
247 247 Case with too large individual chunk (must return valid chunk)
248 248 >>> list(_slicechunktosize(revlog, [0, 1], 2))
249 249 [[0], [1]]
250 250 >>> list(_slicechunktosize(revlog, [1, 3], 1))
251 251 [[1], [3]]
252 252 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
253 253 [[3], [5]]
254 254
255 255 == mixed case ==
256 256 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
257 257 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
258 258 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
259 259 """
260 260 assert targetsize is None or 0 <= targetsize
261 261 startdata = revlog.start(revs[0])
262 262 enddata = revlog.end(revs[-1])
263 263 fullspan = enddata - startdata
264 264 if targetsize is None or fullspan <= targetsize:
265 265 yield revs
266 266 return
267 267
268 268 startrevidx = 0
269 269 endrevidx = 1
270 270 iterrevs = enumerate(revs)
271 271 next(iterrevs) # skip first rev.
272 272 # first step: get snapshots out of the way
273 273 for idx, r in iterrevs:
274 274 span = revlog.end(r) - startdata
275 275 snapshot = revlog.issnapshot(r)
276 276 if span <= targetsize and snapshot:
277 277 endrevidx = idx + 1
278 278 else:
279 279 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
280 280 if chunk:
281 281 yield chunk
282 282 startrevidx = idx
283 283 startdata = revlog.start(r)
284 284 endrevidx = idx + 1
285 285 if not snapshot:
286 286 break
287 287
288 288 # for the others, we use binary slicing to quickly converge toward valid
289 289 # chunks (otherwise, we might end up looking for start/end of many
290 290 # revisions). This logic is not looking for the perfect slicing point, it
291 291 # focuses on quickly converging toward valid chunks.
292 292 nbitem = len(revs)
293 293 while (enddata - startdata) > targetsize:
294 294 endrevidx = nbitem
295 295 if nbitem - startrevidx <= 1:
296 296 break # protect against individual chunk larger than limit
297 297 localenddata = revlog.end(revs[endrevidx - 1])
298 298 span = localenddata - startdata
299 299 while span > targetsize:
300 300 if endrevidx - startrevidx <= 1:
301 301 break # protect against individual chunk larger than limit
302 302 endrevidx -= (endrevidx - startrevidx) // 2
303 303 localenddata = revlog.end(revs[endrevidx - 1])
304 304 span = localenddata - startdata
305 305 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
306 306 if chunk:
307 307 yield chunk
308 308 startrevidx = endrevidx
309 309 startdata = revlog.start(revs[startrevidx])
310 310
311 311 chunk = _trimchunk(revlog, revs, startrevidx)
312 312 if chunk:
313 313 yield chunk
314 314
315 315
316 316 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
317 317 """slice revs to reduce the amount of unrelated data to be read from disk.
318 318
319 319 ``revs`` is sliced into groups that should be read in one time.
320 320 Assume that revs are sorted.
321 321
322 322 The initial chunk is sliced until the overall density (payload/chunks-span
323 323 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
324 324 skipped.
325 325
326 326 >>> revlog = _testrevlog([
327 327 ... 5, #00 (5)
328 328 ... 10, #01 (5)
329 329 ... 12, #02 (2)
330 330 ... 12, #03 (empty)
331 331 ... 27, #04 (15)
332 332 ... 31, #05 (4)
333 333 ... 31, #06 (empty)
334 334 ... 42, #07 (11)
335 335 ... 47, #08 (5)
336 336 ... 47, #09 (empty)
337 337 ... 48, #10 (1)
338 338 ... 51, #11 (3)
339 339 ... 74, #12 (23)
340 340 ... 85, #13 (11)
341 341 ... 86, #14 (1)
342 342 ... 91, #15 (5)
343 343 ... ])
344 344
345 345 >>> list(_slicechunktodensity(revlog, list(range(16))))
346 346 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
347 347 >>> list(_slicechunktodensity(revlog, [0, 15]))
348 348 [[0], [15]]
349 349 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
350 350 [[0], [11], [15]]
351 351 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
352 352 [[0], [11, 13, 15]]
353 353 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
354 354 [[1, 2], [5, 8, 10, 11], [14]]
355 355 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
356 356 ... mingapsize=20))
357 357 [[1, 2, 3, 5, 8, 10, 11], [14]]
358 358 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
359 359 ... targetdensity=0.95))
360 360 [[1, 2], [5], [8, 10, 11], [14]]
361 361 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
362 362 ... targetdensity=0.95, mingapsize=12))
363 363 [[1, 2], [5, 8, 10, 11], [14]]
364 364 """
365 365 start = revlog.start
366 366 length = revlog.length
367 367
368 368 if len(revs) <= 1:
369 369 yield revs
370 370 return
371 371
372 372 deltachainspan = segmentspan(revlog, revs)
373 373
374 374 if deltachainspan < mingapsize:
375 375 yield revs
376 376 return
377 377
378 378 readdata = deltachainspan
379 379 chainpayload = sum(length(r) for r in revs)
380 380
381 381 if deltachainspan:
382 382 density = chainpayload / float(deltachainspan)
383 383 else:
384 384 density = 1.0
385 385
386 386 if density >= targetdensity:
387 387 yield revs
388 388 return
389 389
390 390 # Store the gaps in a heap to have them sorted by decreasing size
391 391 gaps = []
392 392 prevend = None
393 393 for i, rev in enumerate(revs):
394 394 revstart = start(rev)
395 395 revlen = length(rev)
396 396
397 397 # Skip empty revisions to form larger holes
398 398 if revlen == 0:
399 399 continue
400 400
401 401 if prevend is not None:
402 402 gapsize = revstart - prevend
403 403 # only consider holes that are large enough
404 404 if gapsize > mingapsize:
405 405 gaps.append((gapsize, i))
406 406
407 407 prevend = revstart + revlen
408 408 # sort the gaps to pop them from largest to small
409 409 gaps.sort()
410 410
411 411 # Collect the indices of the largest holes until the density is acceptable
412 412 selected = []
413 413 while gaps and density < targetdensity:
414 414 gapsize, gapidx = gaps.pop()
415 415
416 416 selected.append(gapidx)
417 417
418 418 # the gap sizes are stored as negatives to be sorted decreasingly
419 419 # by the heap
420 420 readdata -= gapsize
421 421 if readdata > 0:
422 422 density = chainpayload / float(readdata)
423 423 else:
424 424 density = 1.0
425 425 selected.sort()
426 426
427 427 # Cut the revs at collected indices
428 428 previdx = 0
429 429 for idx in selected:
430 430
431 431 chunk = _trimchunk(revlog, revs, previdx, idx)
432 432 if chunk:
433 433 yield chunk
434 434
435 435 previdx = idx
436 436
437 437 chunk = _trimchunk(revlog, revs, previdx)
438 438 if chunk:
439 439 yield chunk
440 440
441 441
442 442 def _trimchunk(revlog, revs, startidx, endidx=None):
443 443 """returns revs[startidx:endidx] without empty trailing revs
444 444
445 445 Doctest Setup
446 446 >>> revlog = _testrevlog([
447 447 ... 5, #0
448 448 ... 10, #1
449 449 ... 12, #2
450 450 ... 12, #3 (empty)
451 451 ... 17, #4
452 452 ... 21, #5
453 453 ... 21, #6 (empty)
454 454 ... ])
455 455
456 456 Contiguous cases:
457 457 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
458 458 [0, 1, 2, 3, 4, 5]
459 459 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
460 460 [0, 1, 2, 3, 4]
461 461 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
462 462 [0, 1, 2]
463 463 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
464 464 [2]
465 465 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
466 466 [3, 4, 5]
467 467 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
468 468 [3, 4]
469 469
470 470 Discontiguous cases:
471 471 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
472 472 [1, 3, 5]
473 473 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
474 474 [1]
475 475 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
476 476 [3, 5]
477 477 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
478 478 [3, 5]
479 479 """
480 480 length = revlog.length
481 481
482 482 if endidx is None:
483 483 endidx = len(revs)
484 484
485 485 # If we have a non-emtpy delta candidate, there are nothing to trim
486 486 if revs[endidx - 1] < len(revlog):
487 487 # Trim empty revs at the end, except the very first revision of a chain
488 488 while (
489 489 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
490 490 ):
491 491 endidx -= 1
492 492
493 493 return revs[startidx:endidx]
494 494
495 495
496 496 def segmentspan(revlog, revs):
497 497 """Get the byte span of a segment of revisions
498 498
499 499 revs is a sorted array of revision numbers
500 500
501 501 >>> revlog = _testrevlog([
502 502 ... 5, #0
503 503 ... 10, #1
504 504 ... 12, #2
505 505 ... 12, #3 (empty)
506 506 ... 17, #4
507 507 ... ])
508 508
509 509 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
510 510 17
511 511 >>> segmentspan(revlog, [0, 4])
512 512 17
513 513 >>> segmentspan(revlog, [3, 4])
514 514 5
515 515 >>> segmentspan(revlog, [1, 2, 3,])
516 516 7
517 517 >>> segmentspan(revlog, [1, 3])
518 518 7
519 519 """
520 520 if not revs:
521 521 return 0
522 522 end = revlog.end(revs[-1])
523 523 return end - revlog.start(revs[0])
524 524
525 525
526 526 def _textfromdelta(revlog, baserev, delta, p1, p2, flags, expectednode):
527 527 """build full text from a (base, delta) pair and other metadata"""
528 528 # special case deltas which replace entire base; no need to decode
529 529 # base revision. this neatly avoids censored bases, which throw when
530 530 # they're decoded.
531 531 hlen = struct.calcsize(b">lll")
532 532 if delta[:hlen] == mdiff.replacediffheader(
533 533 revlog.rawsize(baserev), len(delta) - hlen
534 534 ):
535 535 fulltext = delta[hlen:]
536 536 else:
537 537 # deltabase is rawtext before changed by flag processors, which is
538 538 # equivalent to non-raw text
539 539 basetext = revlog.revision(baserev)
540 540 fulltext = mdiff.patch(basetext, delta)
541 541
542 542 try:
543 543 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
544 544 if validatehash:
545 545 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
546 546 if flags & REVIDX_ISCENSORED:
547 547 raise error.StorageError(
548 548 _(b'node %s is not censored') % expectednode
549 549 )
550 550 except error.CensoredNodeError:
551 551 # must pass the censored index flag to add censored revisions
552 552 if not flags & REVIDX_ISCENSORED:
553 553 raise
554 554 return fulltext
555 555
556 556
557 557 @attr.s(slots=True, frozen=True)
558 558 class _deltainfo:
559 559 distance = attr.ib()
560 560 deltalen = attr.ib()
561 561 data = attr.ib()
562 562 base = attr.ib()
563 563 chainbase = attr.ib()
564 564 chainlen = attr.ib()
565 565 compresseddeltalen = attr.ib()
566 566 snapshotdepth = attr.ib()
567 567
568 568
569 569 def drop_u_compression(delta):
570 570 """turn into a "u" (no-compression) into no-compression without header
571 571
572 572 This is useful for revlog format that has better compression method.
573 573 """
574 574 assert delta.data[0] == b'u', delta.data[0]
575 575 return _deltainfo(
576 576 delta.distance,
577 577 delta.deltalen - 1,
578 578 (b'', delta.data[1]),
579 579 delta.base,
580 580 delta.chainbase,
581 581 delta.chainlen,
582 582 delta.compresseddeltalen,
583 583 delta.snapshotdepth,
584 584 )
585 585
586 586
587 587 def is_good_delta_info(revlog, deltainfo, revinfo):
588 588 """Returns True if the given delta is good. Good means that it is within
589 589 the disk span, disk size, and chain length bounds that we know to be
590 590 performant."""
591 591 if deltainfo is None:
592 592 return False
593 593
594 594 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so
595 595 # we should never end up asking such question. Adding the assert as a
596 596 # safe-guard to detect anything that would be fishy in this regard.
597 597 assert (
598 598 revinfo.cachedelta is None
599 599 or revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE
600 600 or not revlog.delta_config.general_delta
601 601 )
602 602
603 603 # - 'deltainfo.distance' is the distance from the base revision --
604 604 # bounding it limits the amount of I/O we need to do.
605 605 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
606 606 # deltas we need to apply -- bounding it limits the amount of CPU
607 607 # we consume.
608 608
609 609 textlen = revinfo.textlen
610 610 defaultmax = textlen * 4
611 611 maxdist = revlog._maxdeltachainspan
612 612 if not maxdist:
613 613 maxdist = deltainfo.distance # ensure the conditional pass
614 614 maxdist = max(maxdist, defaultmax)
615 615
616 616 # Bad delta from read span:
617 617 #
618 618 # If the span of data read is larger than the maximum allowed.
619 619 #
620 620 # In the sparse-revlog case, we rely on the associated "sparse reading"
621 621 # to avoid issue related to the span of data. In theory, it would be
622 622 # possible to build pathological revlog where delta pattern would lead
623 623 # to too many reads. However, they do not happen in practice at all. So
624 624 # we skip the span check entirely.
625 625 if not revlog._sparserevlog and maxdist < deltainfo.distance:
626 626 return False
627 627
628 628 # Bad delta from new delta size:
629 629 #
630 630 # If the delta size is larger than the target text, storing the
631 631 # delta will be inefficient.
632 632 if textlen < deltainfo.deltalen:
633 633 return False
634 634
635 635 # Bad delta from cumulated payload size:
636 636 #
637 637 # If the sum of delta get larger than K * target text length.
638 638 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
639 639 return False
640 640
641 641 # Bad delta from chain length:
642 642 #
643 643 # If the number of delta in the chain gets too high.
644 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
644 if (
645 revlog.delta_config.max_chain_len
646 and revlog.delta_config.max_chain_len < deltainfo.chainlen
647 ):
645 648 return False
646 649
647 650 # bad delta from intermediate snapshot size limit
648 651 #
649 652 # If an intermediate snapshot size is higher than the limit. The
650 653 # limit exist to prevent endless chain of intermediate delta to be
651 654 # created.
652 655 if (
653 656 deltainfo.snapshotdepth is not None
654 657 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
655 658 ):
656 659 return False
657 660
658 661 # bad delta if new intermediate snapshot is larger than the previous
659 662 # snapshot
660 663 if (
661 664 deltainfo.snapshotdepth
662 665 and revlog.length(deltainfo.base) < deltainfo.deltalen
663 666 ):
664 667 return False
665 668
666 669 return True
667 670
668 671
669 672 # If a revision's full text is that much bigger than a base candidate full
670 673 # text's, it is very unlikely that it will produce a valid delta. We no longer
671 674 # consider these candidates.
672 675 LIMIT_BASE2TEXT = 500
673 676
674 677
675 678 def _candidategroups(
676 679 revlog,
677 680 textlen,
678 681 p1,
679 682 p2,
680 683 cachedelta,
681 684 excluded_bases=None,
682 685 target_rev=None,
683 686 snapshot_cache=None,
684 687 ):
685 688 """Provides group of revision to be tested as delta base
686 689
687 690 This top level function focus on emitting groups with unique and worthwhile
688 691 content. See _raw_candidate_groups for details about the group order.
689 692 """
690 693 # should we try to build a delta?
691 694 if not (len(revlog) and revlog._storedeltachains):
692 695 yield None
693 696 return
694 697
695 698 if target_rev is None:
696 699 target_rev = len(revlog)
697 700
698 701 if not revlog.delta_config.general_delta:
699 702 # before general delta, there is only one possible delta base
700 703 yield (target_rev - 1,)
701 704 yield None
702 705 return
703 706
704 707 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so
705 708 # we should never end up asking such question. Adding the assert as a
706 709 # safe-guard to detect anything that would be fishy in this regard.
707 710 assert (
708 711 cachedelta is None
709 712 or cachedelta[2] != DELTA_BASE_REUSE_FORCE
710 713 or not revlog.delta_config.general_delta
711 714 )
712 715
713 716 deltalength = revlog.length
714 717 deltaparent = revlog.deltaparent
715 718 sparse = revlog._sparserevlog
716 719 good = None
717 720
718 721 deltas_limit = textlen * LIMIT_DELTA2TEXT
719 722 group_chunk_size = revlog._candidate_group_chunk_size
720 723
721 724 tested = {nullrev}
722 725 candidates = _refinedgroups(
723 726 revlog,
724 727 p1,
725 728 p2,
726 729 cachedelta,
727 730 snapshot_cache=snapshot_cache,
728 731 )
729 732 while True:
730 733 temptative = candidates.send(good)
731 734 if temptative is None:
732 735 break
733 736 group = []
734 737 for rev in temptative:
735 738 # skip over empty delta (no need to include them in a chain)
736 739 while not (rev == nullrev or rev in tested or deltalength(rev)):
737 740 tested.add(rev)
738 741 rev = deltaparent(rev)
739 742 # no need to try a delta against nullrev, this will be done as a
740 743 # last resort.
741 744 if rev == nullrev:
742 745 continue
743 746 # filter out revision we tested already
744 747 if rev in tested:
745 748 continue
746 749
747 750 # an higher authority deamed the base unworthy (e.g. censored)
748 751 if excluded_bases is not None and rev in excluded_bases:
749 752 tested.add(rev)
750 753 continue
751 754 # We are in some recomputation cases and that rev is too high in
752 755 # the revlog
753 756 if target_rev is not None and rev >= target_rev:
754 757 tested.add(rev)
755 758 continue
756 759 # filter out delta base that will never produce good delta
757 760 if deltas_limit < revlog.length(rev):
758 761 tested.add(rev)
759 762 continue
760 763 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
761 764 tested.add(rev)
762 765 continue
763 766 # no delta for rawtext-changing revs (see "candelta" for why)
764 767 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
765 768 tested.add(rev)
766 769 continue
767 770
768 771 # If we reach here, we are about to build and test a delta.
769 772 # The delta building process will compute the chaininfo in all
770 773 # case, since that computation is cached, it is fine to access it
771 774 # here too.
772 775 chainlen, chainsize = revlog._chaininfo(rev)
773 776 # if chain will be too long, skip base
774 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
777 if (
778 revlog.delta_config.max_chain_len
779 and chainlen >= revlog.delta_config.max_chain_len
780 ):
775 781 tested.add(rev)
776 782 continue
777 783 # if chain already have too much data, skip base
778 784 if deltas_limit < chainsize:
779 785 tested.add(rev)
780 786 continue
781 787 if sparse and revlog.upperboundcomp is not None:
782 788 maxcomp = revlog.upperboundcomp
783 789 basenotsnap = (p1, p2, nullrev)
784 790 if rev not in basenotsnap and revlog.issnapshot(rev):
785 791 snapshotdepth = revlog.snapshotdepth(rev)
786 792 # If text is significantly larger than the base, we can
787 793 # expect the resulting delta to be proportional to the size
788 794 # difference
789 795 revsize = revlog.rawsize(rev)
790 796 rawsizedistance = max(textlen - revsize, 0)
791 797 # use an estimate of the compression upper bound.
792 798 lowestrealisticdeltalen = rawsizedistance // maxcomp
793 799
794 800 # check the absolute constraint on the delta size
795 801 snapshotlimit = textlen >> snapshotdepth
796 802 if snapshotlimit < lowestrealisticdeltalen:
797 803 # delta lower bound is larger than accepted upper bound
798 804 tested.add(rev)
799 805 continue
800 806
801 807 # check the relative constraint on the delta size
802 808 revlength = revlog.length(rev)
803 809 if revlength < lowestrealisticdeltalen:
804 810 # delta probable lower bound is larger than target base
805 811 tested.add(rev)
806 812 continue
807 813
808 814 group.append(rev)
809 815 if group:
810 816 # When the size of the candidate group is big, it can result in a
811 817 # quite significant performance impact. To reduce this, we can send
812 818 # them in smaller batches until the new batch does not provide any
813 819 # improvements.
814 820 #
815 821 # This might reduce the overall efficiency of the compression in
816 822 # some corner cases, but that should also prevent very pathological
817 823 # cases from being an issue. (eg. 20 000 candidates).
818 824 #
819 825 # XXX note that the ordering of the group becomes important as it
820 826 # now impacts the final result. The current order is unprocessed
821 827 # and can be improved.
822 828 if group_chunk_size == 0:
823 829 tested.update(group)
824 830 good = yield tuple(group)
825 831 else:
826 832 prev_good = good
827 833 for start in range(0, len(group), group_chunk_size):
828 834 sub_group = group[start : start + group_chunk_size]
829 835 tested.update(sub_group)
830 836 good = yield tuple(sub_group)
831 837 if prev_good == good:
832 838 break
833 839
834 840 yield None
835 841
836 842
837 843 def _refinedgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
838 844 good = None
839 845 # First we try to reuse a the delta contained in the bundle.
840 846 # (or from the source revlog)
841 847 #
842 848 # This logic only applies to general delta repositories and can be disabled
843 849 # through configuration. Disabling reuse source delta is useful when
844 850 # we want to make sure we recomputed "optimal" deltas.
845 851 debug_info = None
846 852 if cachedelta is not None and cachedelta[2] > DELTA_BASE_REUSE_NO:
847 853 # Assume what we received from the server is a good choice
848 854 # build delta will reuse the cache
849 855 if debug_info is not None:
850 856 debug_info['cached-delta.tested'] += 1
851 857 good = yield (cachedelta[0],)
852 858 if good is not None:
853 859 if debug_info is not None:
854 860 debug_info['cached-delta.accepted'] += 1
855 861 yield None
856 862 return
857 863 if snapshot_cache is None:
858 864 snapshot_cache = SnapshotCache()
859 865 groups = _rawgroups(
860 866 revlog,
861 867 p1,
862 868 p2,
863 869 cachedelta,
864 870 snapshot_cache,
865 871 )
866 872 for candidates in groups:
867 873 good = yield candidates
868 874 if good is not None:
869 875 break
870 876
871 877 # If sparse revlog is enabled, we can try to refine the available deltas
872 878 if not revlog._sparserevlog:
873 879 yield None
874 880 return
875 881
876 882 # if we have a refinable value, try to refine it
877 883 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
878 884 # refine snapshot down
879 885 previous = None
880 886 while previous != good:
881 887 previous = good
882 888 base = revlog.deltaparent(good)
883 889 if base == nullrev:
884 890 break
885 891 good = yield (base,)
886 892 # refine snapshot up
887 893 if not snapshot_cache.snapshots:
888 894 snapshot_cache.update(revlog, good + 1)
889 895 previous = None
890 896 while good != previous:
891 897 previous = good
892 898 children = tuple(sorted(c for c in snapshot_cache.snapshots[good]))
893 899 good = yield children
894 900
895 901 if debug_info is not None:
896 902 if good is None:
897 903 debug_info['no-solution'] += 1
898 904
899 905 yield None
900 906
901 907
902 908 def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
903 909 """Provides group of revision to be tested as delta base
904 910
905 911 This lower level function focus on emitting delta theorically interresting
906 912 without looking it any practical details.
907 913
908 914 The group order aims at providing fast or small candidates first.
909 915 """
910 916 # Why search for delta base if we cannot use a delta base ?
911 917 assert revlog.delta_config.general_delta
912 918 # also see issue6056
913 919 sparse = revlog._sparserevlog
914 920 curr = len(revlog)
915 921 prev = curr - 1
916 922 deltachain = lambda rev: revlog._deltachain(rev)[0]
917 923
918 924 # exclude already lazy tested base if any
919 925 parents = [p for p in (p1, p2) if p != nullrev]
920 926
921 927 if not revlog._deltabothparents and len(parents) == 2:
922 928 parents.sort()
923 929 # To minimize the chance of having to build a fulltext,
924 930 # pick first whichever parent is closest to us (max rev)
925 931 yield (parents[1],)
926 932 # then the other one (min rev) if the first did not fit
927 933 yield (parents[0],)
928 934 elif len(parents) > 0:
929 935 # Test all parents (1 or 2), and keep the best candidate
930 936 yield parents
931 937
932 938 if sparse and parents:
933 939 if snapshot_cache is None:
934 940 # map: base-rev: [snapshot-revs]
935 941 snapshot_cache = SnapshotCache()
936 942 # See if we can use an existing snapshot in the parent chains to use as
937 943 # a base for a new intermediate-snapshot
938 944 #
939 945 # search for snapshot in parents delta chain
940 946 # map: snapshot-level: snapshot-rev
941 947 parents_snaps = collections.defaultdict(set)
942 948 candidate_chains = [deltachain(p) for p in parents]
943 949 for chain in candidate_chains:
944 950 for idx, s in enumerate(chain):
945 951 if not revlog.issnapshot(s):
946 952 break
947 953 parents_snaps[idx].add(s)
948 954 snapfloor = min(parents_snaps[0]) + 1
949 955 snapshot_cache.update(revlog, snapfloor)
950 956 # search for the highest "unrelated" revision
951 957 #
952 958 # Adding snapshots used by "unrelated" revision increase the odd we
953 959 # reuse an independant, yet better snapshot chain.
954 960 #
955 961 # XXX instead of building a set of revisions, we could lazily enumerate
956 962 # over the chains. That would be more efficient, however we stick to
957 963 # simple code for now.
958 964 all_revs = set()
959 965 for chain in candidate_chains:
960 966 all_revs.update(chain)
961 967 other = None
962 968 for r in revlog.revs(prev, snapfloor):
963 969 if r not in all_revs:
964 970 other = r
965 971 break
966 972 if other is not None:
967 973 # To avoid unfair competition, we won't use unrelated intermediate
968 974 # snapshot that are deeper than the ones from the parent delta
969 975 # chain.
970 976 max_depth = max(parents_snaps.keys())
971 977 chain = deltachain(other)
972 978 for depth, s in enumerate(chain):
973 979 if s < snapfloor:
974 980 continue
975 981 if max_depth < depth:
976 982 break
977 983 if not revlog.issnapshot(s):
978 984 break
979 985 parents_snaps[depth].add(s)
980 986 # Test them as possible intermediate snapshot base
981 987 # We test them from highest to lowest level. High level one are more
982 988 # likely to result in small delta
983 989 floor = None
984 990 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
985 991 siblings = set()
986 992 for s in snaps:
987 993 siblings.update(snapshot_cache.snapshots[s])
988 994 # Before considering making a new intermediate snapshot, we check
989 995 # if an existing snapshot, children of base we consider, would be
990 996 # suitable.
991 997 #
992 998 # It give a change to reuse a delta chain "unrelated" to the
993 999 # current revision instead of starting our own. Without such
994 1000 # re-use, topological branches would keep reopening new chains.
995 1001 # Creating more and more snapshot as the repository grow.
996 1002
997 1003 if floor is not None:
998 1004 # We only do this for siblings created after the one in our
999 1005 # parent's delta chain. Those created before has less chances
1000 1006 # to be valid base since our ancestors had to create a new
1001 1007 # snapshot.
1002 1008 siblings = [r for r in siblings if floor < r]
1003 1009 yield tuple(sorted(siblings))
1004 1010 # then test the base from our parent's delta chain.
1005 1011 yield tuple(sorted(snaps))
1006 1012 floor = min(snaps)
1007 1013 # No suitable base found in the parent chain, search if any full
1008 1014 # snapshots emitted since parent's base would be a suitable base for an
1009 1015 # intermediate snapshot.
1010 1016 #
1011 1017 # It give a chance to reuse a delta chain unrelated to the current
1012 1018 # revisions instead of starting our own. Without such re-use,
1013 1019 # topological branches would keep reopening new full chains. Creating
1014 1020 # more and more snapshot as the repository grow.
1015 1021 full = [r for r in snapshot_cache.snapshots[nullrev] if snapfloor <= r]
1016 1022 yield tuple(sorted(full))
1017 1023
1018 1024 if not sparse:
1019 1025 # other approach failed try against prev to hopefully save us a
1020 1026 # fulltext.
1021 1027 yield (prev,)
1022 1028
1023 1029
1024 1030 class SnapshotCache:
1025 1031 __slots__ = ('snapshots', '_start_rev', '_end_rev')
1026 1032
1027 1033 def __init__(self):
1028 1034 self.snapshots = collections.defaultdict(set)
1029 1035 self._start_rev = None
1030 1036 self._end_rev = None
1031 1037
1032 1038 def update(self, revlog, start_rev=0):
1033 1039 """find snapshots from start_rev to tip"""
1034 1040 nb_revs = len(revlog)
1035 1041 end_rev = nb_revs - 1
1036 1042 if start_rev > end_rev:
1037 1043 return # range is empty
1038 1044
1039 1045 if self._start_rev is None:
1040 1046 assert self._end_rev is None
1041 1047 self._update(revlog, start_rev, end_rev)
1042 1048 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1043 1049 if start_rev < self._start_rev:
1044 1050 self._update(revlog, start_rev, self._start_rev - 1)
1045 1051 if self._end_rev < end_rev:
1046 1052 self._update(revlog, self._end_rev + 1, end_rev)
1047 1053
1048 1054 if self._start_rev is None:
1049 1055 assert self._end_rev is None
1050 1056 self._end_rev = end_rev
1051 1057 self._start_rev = start_rev
1052 1058 else:
1053 1059 self._start_rev = min(self._start_rev, start_rev)
1054 1060 self._end_rev = max(self._end_rev, end_rev)
1055 1061 assert self._start_rev <= self._end_rev, (
1056 1062 self._start_rev,
1057 1063 self._end_rev,
1058 1064 )
1059 1065
1060 1066 def _update(self, revlog, start_rev, end_rev):
1061 1067 """internal method that actually do update content"""
1062 1068 assert self._start_rev is None or (
1063 1069 start_rev < self._start_rev or start_rev > self._end_rev
1064 1070 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1065 1071 assert self._start_rev is None or (
1066 1072 end_rev < self._start_rev or end_rev > self._end_rev
1067 1073 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1068 1074 cache = self.snapshots
1069 1075 if hasattr(revlog.index, 'findsnapshots'):
1070 1076 revlog.index.findsnapshots(cache, start_rev, end_rev)
1071 1077 else:
1072 1078 deltaparent = revlog.deltaparent
1073 1079 issnapshot = revlog.issnapshot
1074 1080 for rev in revlog.revs(start_rev, end_rev):
1075 1081 if issnapshot(rev):
1076 1082 cache[deltaparent(rev)].add(rev)
1077 1083
1078 1084
1079 1085 class deltacomputer:
1080 1086 def __init__(
1081 1087 self,
1082 1088 revlog,
1083 1089 write_debug=None,
1084 1090 debug_search=False,
1085 1091 debug_info=None,
1086 1092 ):
1087 1093 self.revlog = revlog
1088 1094 self._write_debug = write_debug
1089 1095 if write_debug is None:
1090 1096 self._debug_search = False
1091 1097 else:
1092 1098 self._debug_search = debug_search
1093 1099 self._debug_info = debug_info
1094 1100 self._snapshot_cache = SnapshotCache()
1095 1101
1096 1102 @property
1097 1103 def _gather_debug(self):
1098 1104 return self._write_debug is not None or self._debug_info is not None
1099 1105
1100 1106 def buildtext(self, revinfo):
1101 1107 """Builds a fulltext version of a revision
1102 1108
1103 1109 revinfo: revisioninfo instance that contains all needed info
1104 1110 """
1105 1111 btext = revinfo.btext
1106 1112 if btext[0] is not None:
1107 1113 return btext[0]
1108 1114
1109 1115 revlog = self.revlog
1110 1116 cachedelta = revinfo.cachedelta
1111 1117 baserev = cachedelta[0]
1112 1118 delta = cachedelta[1]
1113 1119
1114 1120 fulltext = btext[0] = _textfromdelta(
1115 1121 revlog,
1116 1122 baserev,
1117 1123 delta,
1118 1124 revinfo.p1,
1119 1125 revinfo.p2,
1120 1126 revinfo.flags,
1121 1127 revinfo.node,
1122 1128 )
1123 1129 return fulltext
1124 1130
1125 1131 def _builddeltadiff(self, base, revinfo):
1126 1132 revlog = self.revlog
1127 1133 t = self.buildtext(revinfo)
1128 1134 if revlog.iscensored(base):
1129 1135 # deltas based on a censored revision must replace the
1130 1136 # full content in one patch, so delta works everywhere
1131 1137 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1132 1138 delta = header + t
1133 1139 else:
1134 1140 ptext = revlog.rawdata(base)
1135 1141 delta = mdiff.textdiff(ptext, t)
1136 1142
1137 1143 return delta
1138 1144
1139 1145 def _builddeltainfo(self, revinfo, base, target_rev=None):
1140 1146 # can we use the cached delta?
1141 1147 revlog = self.revlog
1142 1148 chainbase = revlog.chainbase(base)
1143 1149 if revlog.delta_config.general_delta:
1144 1150 deltabase = base
1145 1151 else:
1146 1152 if target_rev is not None and base != target_rev - 1:
1147 1153 msg = (
1148 1154 b'general delta cannot use delta for something else '
1149 1155 b'than `prev`: %d<-%d'
1150 1156 )
1151 1157 msg %= (base, target_rev)
1152 1158 raise error.ProgrammingError(msg)
1153 1159 deltabase = chainbase
1154 1160 snapshotdepth = None
1155 1161 if revlog._sparserevlog and deltabase == nullrev:
1156 1162 snapshotdepth = 0
1157 1163 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
1158 1164 # A delta chain should always be one full snapshot,
1159 1165 # zero or more semi-snapshots, and zero or more deltas
1160 1166 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1161 1167 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1162 1168 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1163 1169 delta = None
1164 1170 if revinfo.cachedelta:
1165 1171 cachebase = revinfo.cachedelta[0]
1166 1172 # check if the diff still apply
1167 1173 currentbase = cachebase
1168 1174 while (
1169 1175 currentbase != nullrev
1170 1176 and currentbase != base
1171 1177 and self.revlog.length(currentbase) == 0
1172 1178 ):
1173 1179 currentbase = self.revlog.deltaparent(currentbase)
1174 1180 if self.revlog._lazydelta and currentbase == base:
1175 1181 delta = revinfo.cachedelta[1]
1176 1182 if delta is None:
1177 1183 delta = self._builddeltadiff(base, revinfo)
1178 1184 if self._debug_search:
1179 1185 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1180 1186 msg %= len(delta)
1181 1187 self._write_debug(msg)
1182 1188 # snapshotdept need to be neither None nor 0 level snapshot
1183 1189 if revlog.upperboundcomp is not None and snapshotdepth:
1184 1190 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
1185 1191 snapshotlimit = revinfo.textlen >> snapshotdepth
1186 1192 if self._debug_search:
1187 1193 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1188 1194 msg %= lowestrealisticdeltalen
1189 1195 self._write_debug(msg)
1190 1196 if snapshotlimit < lowestrealisticdeltalen:
1191 1197 if self._debug_search:
1192 1198 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1193 1199 self._write_debug(msg)
1194 1200 return None
1195 1201 if revlog.length(base) < lowestrealisticdeltalen:
1196 1202 if self._debug_search:
1197 1203 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1198 1204 self._write_debug(msg)
1199 1205 return None
1200 1206 header, data = revlog.compress(delta)
1201 1207 deltalen = len(header) + len(data)
1202 1208 offset = revlog.end(len(revlog) - 1)
1203 1209 dist = deltalen + offset - revlog.start(chainbase)
1204 1210 chainlen, compresseddeltalen = revlog._chaininfo(base)
1205 1211 chainlen += 1
1206 1212 compresseddeltalen += deltalen
1207 1213
1208 1214 return _deltainfo(
1209 1215 dist,
1210 1216 deltalen,
1211 1217 (header, data),
1212 1218 deltabase,
1213 1219 chainbase,
1214 1220 chainlen,
1215 1221 compresseddeltalen,
1216 1222 snapshotdepth,
1217 1223 )
1218 1224
1219 1225 def _fullsnapshotinfo(self, revinfo, curr):
1220 1226 rawtext = self.buildtext(revinfo)
1221 1227 data = self.revlog.compress(rawtext)
1222 1228 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1223 1229 deltabase = chainbase = curr
1224 1230 snapshotdepth = 0
1225 1231 chainlen = 1
1226 1232
1227 1233 return _deltainfo(
1228 1234 dist,
1229 1235 deltalen,
1230 1236 data,
1231 1237 deltabase,
1232 1238 chainbase,
1233 1239 chainlen,
1234 1240 compresseddeltalen,
1235 1241 snapshotdepth,
1236 1242 )
1237 1243
1238 1244 def finddeltainfo(self, revinfo, excluded_bases=None, target_rev=None):
1239 1245 """Find an acceptable delta against a candidate revision
1240 1246
1241 1247 revinfo: information about the revision (instance of _revisioninfo)
1242 1248
1243 1249 Returns the first acceptable candidate revision, as ordered by
1244 1250 _candidategroups
1245 1251
1246 1252 If no suitable deltabase is found, we return delta info for a full
1247 1253 snapshot.
1248 1254
1249 1255 `excluded_bases` is an optional set of revision that cannot be used as
1250 1256 a delta base. Use this to recompute delta suitable in censor or strip
1251 1257 context.
1252 1258 """
1253 1259 if target_rev is None:
1254 1260 target_rev = len(self.revlog)
1255 1261
1256 1262 gather_debug = self._gather_debug
1257 1263 cachedelta = revinfo.cachedelta
1258 1264 revlog = self.revlog
1259 1265 p1r = p2r = None
1260 1266
1261 1267 if excluded_bases is None:
1262 1268 excluded_bases = set()
1263 1269
1264 1270 if gather_debug:
1265 1271 start = util.timer()
1266 1272 dbg = self._one_dbg_data()
1267 1273 dbg['revision'] = target_rev
1268 1274 target_revlog = b"UNKNOWN"
1269 1275 target_type = self.revlog.target[0]
1270 1276 target_key = self.revlog.target[1]
1271 1277 if target_type == KIND_CHANGELOG:
1272 1278 target_revlog = b'CHANGELOG:'
1273 1279 elif target_type == KIND_MANIFESTLOG:
1274 1280 target_revlog = b'MANIFESTLOG:'
1275 1281 if target_key:
1276 1282 target_revlog += b'%s:' % target_key
1277 1283 elif target_type == KIND_FILELOG:
1278 1284 target_revlog = b'FILELOG:'
1279 1285 if target_key:
1280 1286 target_revlog += b'%s:' % target_key
1281 1287 dbg['target-revlog'] = target_revlog
1282 1288 p1r = revlog.rev(revinfo.p1)
1283 1289 p2r = revlog.rev(revinfo.p2)
1284 1290 if p1r != nullrev:
1285 1291 p1_chain_len = revlog._chaininfo(p1r)[0]
1286 1292 else:
1287 1293 p1_chain_len = -1
1288 1294 if p2r != nullrev:
1289 1295 p2_chain_len = revlog._chaininfo(p2r)[0]
1290 1296 else:
1291 1297 p2_chain_len = -1
1292 1298 dbg['p1-chain-len'] = p1_chain_len
1293 1299 dbg['p2-chain-len'] = p2_chain_len
1294 1300
1295 1301 # 1) if the revision is empty, no amount of delta can beat it
1296 1302 #
1297 1303 # 2) no delta for flag processor revision (see "candelta" for why)
1298 1304 # not calling candelta since only one revision needs test, also to
1299 1305 # avoid overhead fetching flags again.
1300 1306 if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1301 1307 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1302 1308 if gather_debug:
1303 1309 end = util.timer()
1304 1310 dbg['duration'] = end - start
1305 1311 dbg[
1306 1312 'delta-base'
1307 1313 ] = deltainfo.base # pytype: disable=attribute-error
1308 1314 dbg['search_round_count'] = 0
1309 1315 dbg['using-cached-base'] = False
1310 1316 dbg['delta_try_count'] = 0
1311 1317 dbg['type'] = b"full"
1312 1318 dbg['snapshot-depth'] = 0
1313 1319 self._dbg_process_data(dbg)
1314 1320 return deltainfo
1315 1321
1316 1322 deltainfo = None
1317 1323
1318 1324 # If this source delta are to be forcibly reuse, let us comply early.
1319 1325 if (
1320 1326 revlog.delta_config.general_delta
1321 1327 and revinfo.cachedelta is not None
1322 1328 and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
1323 1329 ):
1324 1330 base = revinfo.cachedelta[0]
1325 1331 if base == nullrev:
1326 1332 dbg_type = b"full"
1327 1333 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1328 1334 if gather_debug:
1329 1335 snapshotdepth = 0
1330 1336 elif base not in excluded_bases:
1331 1337 delta = revinfo.cachedelta[1]
1332 1338 header, data = revlog.compress(delta)
1333 1339 deltalen = len(header) + len(data)
1334 1340 if gather_debug:
1335 1341 offset = revlog.end(len(revlog) - 1)
1336 1342 chainbase = revlog.chainbase(base)
1337 1343 distance = deltalen + offset - revlog.start(chainbase)
1338 1344 chainlen, compresseddeltalen = revlog._chaininfo(base)
1339 1345 chainlen += 1
1340 1346 compresseddeltalen += deltalen
1341 1347 if base == p1r or base == p2r:
1342 1348 dbg_type = b"delta"
1343 1349 snapshotdepth = None
1344 1350 elif not revlog.issnapshot(base):
1345 1351 snapshotdepth = None
1346 1352 else:
1347 1353 dbg_type = b"snapshot"
1348 1354 snapshotdepth = revlog.snapshotdepth(base) + 1
1349 1355 else:
1350 1356 distance = None
1351 1357 chainbase = None
1352 1358 chainlen = None
1353 1359 compresseddeltalen = None
1354 1360 snapshotdepth = None
1355 1361 deltainfo = _deltainfo(
1356 1362 distance=distance,
1357 1363 deltalen=deltalen,
1358 1364 data=(header, data),
1359 1365 base=base,
1360 1366 chainbase=chainbase,
1361 1367 chainlen=chainlen,
1362 1368 compresseddeltalen=compresseddeltalen,
1363 1369 snapshotdepth=snapshotdepth,
1364 1370 )
1365 1371
1366 1372 if deltainfo is not None:
1367 1373 if gather_debug:
1368 1374 end = util.timer()
1369 1375 dbg['duration'] = end - start
1370 1376 dbg[
1371 1377 'delta-base'
1372 1378 ] = deltainfo.base # pytype: disable=attribute-error
1373 1379 dbg['search_round_count'] = 0
1374 1380 dbg['using-cached-base'] = True
1375 1381 dbg['delta_try_count'] = 0
1376 1382 dbg['type'] = b"full"
1377 1383 if snapshotdepth is None:
1378 1384 dbg['snapshot-depth'] = 0
1379 1385 else:
1380 1386 dbg['snapshot-depth'] = snapshotdepth
1381 1387 self._dbg_process_data(dbg)
1382 1388 return deltainfo
1383 1389
1384 1390 # count the number of different delta we tried (for debug purpose)
1385 1391 dbg_try_count = 0
1386 1392 # count the number of "search round" we did. (for debug purpose)
1387 1393 dbg_try_rounds = 0
1388 1394 dbg_type = b'unknown'
1389 1395
1390 1396 if p1r is None:
1391 1397 p1r = revlog.rev(revinfo.p1)
1392 1398 p2r = revlog.rev(revinfo.p2)
1393 1399
1394 1400 if self._debug_search:
1395 1401 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1396 1402 msg %= target_rev
1397 1403 self._write_debug(msg)
1398 1404
1399 1405 groups = _candidategroups(
1400 1406 self.revlog,
1401 1407 revinfo.textlen,
1402 1408 p1r,
1403 1409 p2r,
1404 1410 cachedelta,
1405 1411 excluded_bases,
1406 1412 target_rev,
1407 1413 snapshot_cache=self._snapshot_cache,
1408 1414 )
1409 1415 candidaterevs = next(groups)
1410 1416 while candidaterevs is not None:
1411 1417 dbg_try_rounds += 1
1412 1418 if self._debug_search:
1413 1419 prev = None
1414 1420 if deltainfo is not None:
1415 1421 prev = deltainfo.base
1416 1422
1417 1423 if (
1418 1424 cachedelta is not None
1419 1425 and len(candidaterevs) == 1
1420 1426 and cachedelta[0] in candidaterevs
1421 1427 ):
1422 1428 round_type = b"cached-delta"
1423 1429 elif p1r in candidaterevs or p2r in candidaterevs:
1424 1430 round_type = b"parents"
1425 1431 elif prev is not None and all(c < prev for c in candidaterevs):
1426 1432 round_type = b"refine-down"
1427 1433 elif prev is not None and all(c > prev for c in candidaterevs):
1428 1434 round_type = b"refine-up"
1429 1435 else:
1430 1436 round_type = b"search-down"
1431 1437 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1432 1438 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1433 1439 self._write_debug(msg)
1434 1440 nominateddeltas = []
1435 1441 if deltainfo is not None:
1436 1442 if self._debug_search:
1437 1443 msg = (
1438 1444 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1439 1445 )
1440 1446 msg %= (deltainfo.base, deltainfo.deltalen)
1441 1447 self._write_debug(msg)
1442 1448 # if we already found a good delta,
1443 1449 # challenge it against refined candidates
1444 1450 nominateddeltas.append(deltainfo)
1445 1451 for candidaterev in candidaterevs:
1446 1452 if self._debug_search:
1447 1453 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1448 1454 msg %= candidaterev
1449 1455 self._write_debug(msg)
1450 1456 candidate_type = None
1451 1457 if candidaterev == p1r:
1452 1458 candidate_type = b"p1"
1453 1459 elif candidaterev == p2r:
1454 1460 candidate_type = b"p2"
1455 1461 elif self.revlog.issnapshot(candidaterev):
1456 1462 candidate_type = b"snapshot-%d"
1457 1463 candidate_type %= self.revlog.snapshotdepth(
1458 1464 candidaterev
1459 1465 )
1460 1466
1461 1467 if candidate_type is not None:
1462 1468 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1463 1469 msg %= candidate_type
1464 1470 self._write_debug(msg)
1465 1471 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1466 1472 msg %= self.revlog.length(candidaterev)
1467 1473 self._write_debug(msg)
1468 1474 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1469 1475 msg %= self.revlog.deltaparent(candidaterev)
1470 1476 self._write_debug(msg)
1471 1477
1472 1478 dbg_try_count += 1
1473 1479
1474 1480 if self._debug_search:
1475 1481 delta_start = util.timer()
1476 1482 candidatedelta = self._builddeltainfo(
1477 1483 revinfo,
1478 1484 candidaterev,
1479 1485 target_rev=target_rev,
1480 1486 )
1481 1487 if self._debug_search:
1482 1488 delta_end = util.timer()
1483 1489 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1484 1490 msg %= delta_end - delta_start
1485 1491 self._write_debug(msg)
1486 1492 if candidatedelta is not None:
1487 1493 if is_good_delta_info(self.revlog, candidatedelta, revinfo):
1488 1494 if self._debug_search:
1489 1495 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1490 1496 msg %= candidatedelta.deltalen
1491 1497 self._write_debug(msg)
1492 1498 nominateddeltas.append(candidatedelta)
1493 1499 elif self._debug_search:
1494 1500 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1495 1501 msg %= candidatedelta.deltalen
1496 1502 self._write_debug(msg)
1497 1503 elif self._debug_search:
1498 1504 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1499 1505 self._write_debug(msg)
1500 1506 if nominateddeltas:
1501 1507 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1502 1508 if deltainfo is not None:
1503 1509 candidaterevs = groups.send(deltainfo.base)
1504 1510 else:
1505 1511 candidaterevs = next(groups)
1506 1512
1507 1513 if deltainfo is None:
1508 1514 dbg_type = b"full"
1509 1515 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1510 1516 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1511 1517 dbg_type = b"snapshot"
1512 1518 else:
1513 1519 dbg_type = b"delta"
1514 1520
1515 1521 if gather_debug:
1516 1522 end = util.timer()
1517 1523 if dbg_type == b'full':
1518 1524 used_cached = (
1519 1525 cachedelta is not None
1520 1526 and dbg_try_rounds == 0
1521 1527 and dbg_try_count == 0
1522 1528 and cachedelta[0] == nullrev
1523 1529 )
1524 1530 else:
1525 1531 used_cached = (
1526 1532 cachedelta is not None
1527 1533 and dbg_try_rounds == 1
1528 1534 and dbg_try_count == 1
1529 1535 and deltainfo.base == cachedelta[0]
1530 1536 )
1531 1537 dbg['duration'] = end - start
1532 1538 dbg[
1533 1539 'delta-base'
1534 1540 ] = deltainfo.base # pytype: disable=attribute-error
1535 1541 dbg['search_round_count'] = dbg_try_rounds
1536 1542 dbg['using-cached-base'] = used_cached
1537 1543 dbg['delta_try_count'] = dbg_try_count
1538 1544 dbg['type'] = dbg_type
1539 1545 if (
1540 1546 deltainfo.snapshotdepth # pytype: disable=attribute-error
1541 1547 is not None
1542 1548 ):
1543 1549 dbg[
1544 1550 'snapshot-depth'
1545 1551 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1546 1552 else:
1547 1553 dbg['snapshot-depth'] = 0
1548 1554 self._dbg_process_data(dbg)
1549 1555 return deltainfo
1550 1556
1551 1557 def _one_dbg_data(self):
1552 1558 return {
1553 1559 'duration': None,
1554 1560 'revision': None,
1555 1561 'delta-base': None,
1556 1562 'search_round_count': None,
1557 1563 'using-cached-base': None,
1558 1564 'delta_try_count': None,
1559 1565 'type': None,
1560 1566 'p1-chain-len': None,
1561 1567 'p2-chain-len': None,
1562 1568 'snapshot-depth': None,
1563 1569 'target-revlog': None,
1564 1570 }
1565 1571
1566 1572 def _dbg_process_data(self, dbg):
1567 1573 if self._debug_info is not None:
1568 1574 self._debug_info.append(dbg)
1569 1575
1570 1576 if self._write_debug is not None:
1571 1577 msg = (
1572 1578 b"DBG-DELTAS:"
1573 1579 b" %-12s"
1574 1580 b" rev=%d:"
1575 1581 b" delta-base=%d"
1576 1582 b" is-cached=%d"
1577 1583 b" - search-rounds=%d"
1578 1584 b" try-count=%d"
1579 1585 b" - delta-type=%-6s"
1580 1586 b" snap-depth=%d"
1581 1587 b" - p1-chain-length=%d"
1582 1588 b" p2-chain-length=%d"
1583 1589 b" - duration=%f"
1584 1590 b"\n"
1585 1591 )
1586 1592 msg %= (
1587 1593 dbg["target-revlog"],
1588 1594 dbg["revision"],
1589 1595 dbg["delta-base"],
1590 1596 dbg["using-cached-base"],
1591 1597 dbg["search_round_count"],
1592 1598 dbg["delta_try_count"],
1593 1599 dbg["type"],
1594 1600 dbg["snapshot-depth"],
1595 1601 dbg["p1-chain-len"],
1596 1602 dbg["p2-chain-len"],
1597 1603 dbg["duration"],
1598 1604 )
1599 1605 self._write_debug(msg)
1600 1606
1601 1607
1602 1608 def delta_compression(default_compression_header, deltainfo):
1603 1609 """return (COMPRESSION_MODE, deltainfo)
1604 1610
1605 1611 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1606 1612 compression.
1607 1613 """
1608 1614 h, d = deltainfo.data
1609 1615 compression_mode = COMP_MODE_INLINE
1610 1616 if not h and not d:
1611 1617 # not data to store at all... declare them uncompressed
1612 1618 compression_mode = COMP_MODE_PLAIN
1613 1619 elif not h:
1614 1620 t = d[0:1]
1615 1621 if t == b'\0':
1616 1622 compression_mode = COMP_MODE_PLAIN
1617 1623 elif t == default_compression_header:
1618 1624 compression_mode = COMP_MODE_DEFAULT
1619 1625 elif h == b'u':
1620 1626 # we have a more efficient way to declare uncompressed
1621 1627 h = b''
1622 1628 compression_mode = COMP_MODE_PLAIN
1623 1629 deltainfo = drop_u_compression(deltainfo)
1624 1630 return compression_mode, deltainfo
General Comments 0
You need to be logged in to leave comments. Login now