##// END OF EJS Templates
revlog: remove legacy usage of `_srmingapsize`...
marmoute -
r51956:533d6943 default
parent child Browse files
Show More
@@ -1,1629 +1,1630 b''
1 1 # revlogdeltas.py - Logic around delta computation for revlog
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 # Copyright 2018 Octobus <contact@octobus.net>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8 """Helper class to compute deltas stored inside revlogs"""
9 9
10 10
11 11 import collections
12 12 import struct
13 13
14 14 # import stuff from node for others to import from revlog
15 15 from ..node import nullrev
16 16 from ..i18n import _
17 17
18 18 from .constants import (
19 19 COMP_MODE_DEFAULT,
20 20 COMP_MODE_INLINE,
21 21 COMP_MODE_PLAIN,
22 22 DELTA_BASE_REUSE_FORCE,
23 23 DELTA_BASE_REUSE_NO,
24 24 KIND_CHANGELOG,
25 25 KIND_FILELOG,
26 26 KIND_MANIFESTLOG,
27 27 REVIDX_ISCENSORED,
28 28 REVIDX_RAWTEXT_CHANGING_FLAGS,
29 29 )
30 30
31 31 from ..thirdparty import attr
32 32
33 33 from .. import (
34 34 error,
35 35 mdiff,
36 36 util,
37 37 )
38 38
39 39 from . import flagutil
40 40
41 41 # maximum <delta-chain-data>/<revision-text-length> ratio
42 42 LIMIT_DELTA2TEXT = 2
43 43
44 44
45 45 class _testrevlog:
46 46 """minimalist fake revlog to use in doctests"""
47 47
48 48 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
49 49 """data is an list of revision payload boundaries"""
50 50 from .. import revlog
51 51
52 52 self._data = data
53 self._srmingapsize = mingap
54 53 self.data_config = revlog.DataConfig()
55 54 self.data_config.sr_density_threshold = density
56 55 self.data_config.sr_min_gap_size = mingap
57 56 self.delta_config = revlog.DeltaConfig()
58 57 self.feature_config = revlog.FeatureConfig()
59 58 self._snapshot = set(snapshot)
60 59 self.index = None
61 60
62 61 def start(self, rev):
63 62 if rev == nullrev:
64 63 return 0
65 64 if rev == 0:
66 65 return 0
67 66 return self._data[rev - 1]
68 67
69 68 def end(self, rev):
70 69 if rev == nullrev:
71 70 return 0
72 71 return self._data[rev]
73 72
74 73 def length(self, rev):
75 74 return self.end(rev) - self.start(rev)
76 75
77 76 def __len__(self):
78 77 return len(self._data)
79 78
80 79 def issnapshot(self, rev):
81 80 if rev == nullrev:
82 81 return True
83 82 return rev in self._snapshot
84 83
85 84
86 85 def slicechunk(revlog, revs, targetsize=None):
87 86 """slice revs to reduce the amount of unrelated data to be read from disk.
88 87
89 88 ``revs`` is sliced into groups that should be read in one time.
90 89 Assume that revs are sorted.
91 90
92 91 The initial chunk is sliced until the overall density (payload/chunks-span
93 92 ratio) is above `revlog.data_config.sr_density_threshold`. No gap smaller
94 than `revlog._srmingapsize` is skipped.
93 than `revlog.data_config.sr_min_gap_size` is skipped.
95 94
96 95 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
97 96 For consistency with other slicing choice, this limit won't go lower than
98 `revlog._srmingapsize`.
97 `revlog.data_config.sr_min_gap_size`.
99 98
100 99 If individual revisions chunk are larger than this limit, they will still
101 100 be raised individually.
102 101
103 102 >>> data = [
104 103 ... 5, #00 (5)
105 104 ... 10, #01 (5)
106 105 ... 12, #02 (2)
107 106 ... 12, #03 (empty)
108 107 ... 27, #04 (15)
109 108 ... 31, #05 (4)
110 109 ... 31, #06 (empty)
111 110 ... 42, #07 (11)
112 111 ... 47, #08 (5)
113 112 ... 47, #09 (empty)
114 113 ... 48, #10 (1)
115 114 ... 51, #11 (3)
116 115 ... 74, #12 (23)
117 116 ... 85, #13 (11)
118 117 ... 86, #14 (1)
119 118 ... 91, #15 (5)
120 119 ... ]
121 120 >>> revlog = _testrevlog(data, snapshot=range(16))
122 121
123 122 >>> list(slicechunk(revlog, list(range(16))))
124 123 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
125 124 >>> list(slicechunk(revlog, [0, 15]))
126 125 [[0], [15]]
127 126 >>> list(slicechunk(revlog, [0, 11, 15]))
128 127 [[0], [11], [15]]
129 128 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
130 129 [[0], [11, 13, 15]]
131 130 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
132 131 [[1, 2], [5, 8, 10, 11], [14]]
133 132
134 133 Slicing with a maximum chunk size
135 134 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
136 135 [[0], [11], [13], [15]]
137 136 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
138 137 [[0], [11], [13, 15]]
139 138
140 139 Slicing involving nullrev
141 140 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
142 141 [[-1, 0], [11], [13, 15]]
143 142 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
144 143 [[-1], [13], [15]]
145 144 """
146 145 if targetsize is not None:
147 targetsize = max(targetsize, revlog._srmingapsize)
146 targetsize = max(targetsize, revlog.data_config.sr_min_gap_size)
148 147 # targetsize should not be specified when evaluating delta candidates:
149 148 # * targetsize is used to ensure we stay within specification when reading,
150 149 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
151 150 if densityslicing is None:
152 151 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
153 152 for chunk in densityslicing(
154 revs, revlog.data_config.sr_density_threshold, revlog._srmingapsize
153 revs,
154 revlog.data_config.sr_density_threshold,
155 revlog.data_config.sr_min_gap_size,
155 156 ):
156 157 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
157 158 yield subchunk
158 159
159 160
160 161 def _slicechunktosize(revlog, revs, targetsize=None):
161 162 """slice revs to match the target size
162 163
163 164 This is intended to be used on chunk that density slicing selected by that
164 165 are still too large compared to the read garantee of revlog. This might
165 166 happens when "minimal gap size" interrupted the slicing or when chain are
166 167 built in a way that create large blocks next to each other.
167 168
168 169 >>> data = [
169 170 ... 3, #0 (3)
170 171 ... 5, #1 (2)
171 172 ... 6, #2 (1)
172 173 ... 8, #3 (2)
173 174 ... 8, #4 (empty)
174 175 ... 11, #5 (3)
175 176 ... 12, #6 (1)
176 177 ... 13, #7 (1)
177 178 ... 14, #8 (1)
178 179 ... ]
179 180
180 181 == All snapshots cases ==
181 182 >>> revlog = _testrevlog(data, snapshot=range(9))
182 183
183 184 Cases where chunk is already small enough
184 185 >>> list(_slicechunktosize(revlog, [0], 3))
185 186 [[0]]
186 187 >>> list(_slicechunktosize(revlog, [6, 7], 3))
187 188 [[6, 7]]
188 189 >>> list(_slicechunktosize(revlog, [0], None))
189 190 [[0]]
190 191 >>> list(_slicechunktosize(revlog, [6, 7], None))
191 192 [[6, 7]]
192 193
193 194 cases where we need actual slicing
194 195 >>> list(_slicechunktosize(revlog, [0, 1], 3))
195 196 [[0], [1]]
196 197 >>> list(_slicechunktosize(revlog, [1, 3], 3))
197 198 [[1], [3]]
198 199 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
199 200 [[1, 2], [3]]
200 201 >>> list(_slicechunktosize(revlog, [3, 5], 3))
201 202 [[3], [5]]
202 203 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
203 204 [[3], [5]]
204 205 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
205 206 [[5], [6, 7, 8]]
206 207 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
207 208 [[0], [1, 2], [3], [5], [6, 7, 8]]
208 209
209 210 Case with too large individual chunk (must return valid chunk)
210 211 >>> list(_slicechunktosize(revlog, [0, 1], 2))
211 212 [[0], [1]]
212 213 >>> list(_slicechunktosize(revlog, [1, 3], 1))
213 214 [[1], [3]]
214 215 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
215 216 [[3], [5]]
216 217
217 218 == No Snapshot cases ==
218 219 >>> revlog = _testrevlog(data)
219 220
220 221 Cases where chunk is already small enough
221 222 >>> list(_slicechunktosize(revlog, [0], 3))
222 223 [[0]]
223 224 >>> list(_slicechunktosize(revlog, [6, 7], 3))
224 225 [[6, 7]]
225 226 >>> list(_slicechunktosize(revlog, [0], None))
226 227 [[0]]
227 228 >>> list(_slicechunktosize(revlog, [6, 7], None))
228 229 [[6, 7]]
229 230
230 231 cases where we need actual slicing
231 232 >>> list(_slicechunktosize(revlog, [0, 1], 3))
232 233 [[0], [1]]
233 234 >>> list(_slicechunktosize(revlog, [1, 3], 3))
234 235 [[1], [3]]
235 236 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
236 237 [[1], [2, 3]]
237 238 >>> list(_slicechunktosize(revlog, [3, 5], 3))
238 239 [[3], [5]]
239 240 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
240 241 [[3], [4, 5]]
241 242 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
242 243 [[5], [6, 7, 8]]
243 244 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
244 245 [[0], [1, 2], [3], [5], [6, 7, 8]]
245 246
246 247 Case with too large individual chunk (must return valid chunk)
247 248 >>> list(_slicechunktosize(revlog, [0, 1], 2))
248 249 [[0], [1]]
249 250 >>> list(_slicechunktosize(revlog, [1, 3], 1))
250 251 [[1], [3]]
251 252 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
252 253 [[3], [5]]
253 254
254 255 == mixed case ==
255 256 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
256 257 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
257 258 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
258 259 """
259 260 assert targetsize is None or 0 <= targetsize
260 261 startdata = revlog.start(revs[0])
261 262 enddata = revlog.end(revs[-1])
262 263 fullspan = enddata - startdata
263 264 if targetsize is None or fullspan <= targetsize:
264 265 yield revs
265 266 return
266 267
267 268 startrevidx = 0
268 269 endrevidx = 1
269 270 iterrevs = enumerate(revs)
270 271 next(iterrevs) # skip first rev.
271 272 # first step: get snapshots out of the way
272 273 for idx, r in iterrevs:
273 274 span = revlog.end(r) - startdata
274 275 snapshot = revlog.issnapshot(r)
275 276 if span <= targetsize and snapshot:
276 277 endrevidx = idx + 1
277 278 else:
278 279 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
279 280 if chunk:
280 281 yield chunk
281 282 startrevidx = idx
282 283 startdata = revlog.start(r)
283 284 endrevidx = idx + 1
284 285 if not snapshot:
285 286 break
286 287
287 288 # for the others, we use binary slicing to quickly converge toward valid
288 289 # chunks (otherwise, we might end up looking for start/end of many
289 290 # revisions). This logic is not looking for the perfect slicing point, it
290 291 # focuses on quickly converging toward valid chunks.
291 292 nbitem = len(revs)
292 293 while (enddata - startdata) > targetsize:
293 294 endrevidx = nbitem
294 295 if nbitem - startrevidx <= 1:
295 296 break # protect against individual chunk larger than limit
296 297 localenddata = revlog.end(revs[endrevidx - 1])
297 298 span = localenddata - startdata
298 299 while span > targetsize:
299 300 if endrevidx - startrevidx <= 1:
300 301 break # protect against individual chunk larger than limit
301 302 endrevidx -= (endrevidx - startrevidx) // 2
302 303 localenddata = revlog.end(revs[endrevidx - 1])
303 304 span = localenddata - startdata
304 305 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
305 306 if chunk:
306 307 yield chunk
307 308 startrevidx = endrevidx
308 309 startdata = revlog.start(revs[startrevidx])
309 310
310 311 chunk = _trimchunk(revlog, revs, startrevidx)
311 312 if chunk:
312 313 yield chunk
313 314
314 315
315 316 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
316 317 """slice revs to reduce the amount of unrelated data to be read from disk.
317 318
318 319 ``revs`` is sliced into groups that should be read in one time.
319 320 Assume that revs are sorted.
320 321
321 322 The initial chunk is sliced until the overall density (payload/chunks-span
322 323 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
323 324 skipped.
324 325
325 326 >>> revlog = _testrevlog([
326 327 ... 5, #00 (5)
327 328 ... 10, #01 (5)
328 329 ... 12, #02 (2)
329 330 ... 12, #03 (empty)
330 331 ... 27, #04 (15)
331 332 ... 31, #05 (4)
332 333 ... 31, #06 (empty)
333 334 ... 42, #07 (11)
334 335 ... 47, #08 (5)
335 336 ... 47, #09 (empty)
336 337 ... 48, #10 (1)
337 338 ... 51, #11 (3)
338 339 ... 74, #12 (23)
339 340 ... 85, #13 (11)
340 341 ... 86, #14 (1)
341 342 ... 91, #15 (5)
342 343 ... ])
343 344
344 345 >>> list(_slicechunktodensity(revlog, list(range(16))))
345 346 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
346 347 >>> list(_slicechunktodensity(revlog, [0, 15]))
347 348 [[0], [15]]
348 349 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
349 350 [[0], [11], [15]]
350 351 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
351 352 [[0], [11, 13, 15]]
352 353 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
353 354 [[1, 2], [5, 8, 10, 11], [14]]
354 355 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
355 356 ... mingapsize=20))
356 357 [[1, 2, 3, 5, 8, 10, 11], [14]]
357 358 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
358 359 ... targetdensity=0.95))
359 360 [[1, 2], [5], [8, 10, 11], [14]]
360 361 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
361 362 ... targetdensity=0.95, mingapsize=12))
362 363 [[1, 2], [5, 8, 10, 11], [14]]
363 364 """
364 365 start = revlog.start
365 366 length = revlog.length
366 367
367 368 if len(revs) <= 1:
368 369 yield revs
369 370 return
370 371
371 372 deltachainspan = segmentspan(revlog, revs)
372 373
373 374 if deltachainspan < mingapsize:
374 375 yield revs
375 376 return
376 377
377 378 readdata = deltachainspan
378 379 chainpayload = sum(length(r) for r in revs)
379 380
380 381 if deltachainspan:
381 382 density = chainpayload / float(deltachainspan)
382 383 else:
383 384 density = 1.0
384 385
385 386 if density >= targetdensity:
386 387 yield revs
387 388 return
388 389
389 390 # Store the gaps in a heap to have them sorted by decreasing size
390 391 gaps = []
391 392 prevend = None
392 393 for i, rev in enumerate(revs):
393 394 revstart = start(rev)
394 395 revlen = length(rev)
395 396
396 397 # Skip empty revisions to form larger holes
397 398 if revlen == 0:
398 399 continue
399 400
400 401 if prevend is not None:
401 402 gapsize = revstart - prevend
402 403 # only consider holes that are large enough
403 404 if gapsize > mingapsize:
404 405 gaps.append((gapsize, i))
405 406
406 407 prevend = revstart + revlen
407 408 # sort the gaps to pop them from largest to small
408 409 gaps.sort()
409 410
410 411 # Collect the indices of the largest holes until the density is acceptable
411 412 selected = []
412 413 while gaps and density < targetdensity:
413 414 gapsize, gapidx = gaps.pop()
414 415
415 416 selected.append(gapidx)
416 417
417 418 # the gap sizes are stored as negatives to be sorted decreasingly
418 419 # by the heap
419 420 readdata -= gapsize
420 421 if readdata > 0:
421 422 density = chainpayload / float(readdata)
422 423 else:
423 424 density = 1.0
424 425 selected.sort()
425 426
426 427 # Cut the revs at collected indices
427 428 previdx = 0
428 429 for idx in selected:
429 430
430 431 chunk = _trimchunk(revlog, revs, previdx, idx)
431 432 if chunk:
432 433 yield chunk
433 434
434 435 previdx = idx
435 436
436 437 chunk = _trimchunk(revlog, revs, previdx)
437 438 if chunk:
438 439 yield chunk
439 440
440 441
441 442 def _trimchunk(revlog, revs, startidx, endidx=None):
442 443 """returns revs[startidx:endidx] without empty trailing revs
443 444
444 445 Doctest Setup
445 446 >>> revlog = _testrevlog([
446 447 ... 5, #0
447 448 ... 10, #1
448 449 ... 12, #2
449 450 ... 12, #3 (empty)
450 451 ... 17, #4
451 452 ... 21, #5
452 453 ... 21, #6 (empty)
453 454 ... ])
454 455
455 456 Contiguous cases:
456 457 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
457 458 [0, 1, 2, 3, 4, 5]
458 459 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
459 460 [0, 1, 2, 3, 4]
460 461 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
461 462 [0, 1, 2]
462 463 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
463 464 [2]
464 465 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
465 466 [3, 4, 5]
466 467 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
467 468 [3, 4]
468 469
469 470 Discontiguous cases:
470 471 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
471 472 [1, 3, 5]
472 473 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
473 474 [1]
474 475 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
475 476 [3, 5]
476 477 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
477 478 [3, 5]
478 479 """
479 480 length = revlog.length
480 481
481 482 if endidx is None:
482 483 endidx = len(revs)
483 484
484 485 # If we have a non-emtpy delta candidate, there are nothing to trim
485 486 if revs[endidx - 1] < len(revlog):
486 487 # Trim empty revs at the end, except the very first revision of a chain
487 488 while (
488 489 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
489 490 ):
490 491 endidx -= 1
491 492
492 493 return revs[startidx:endidx]
493 494
494 495
495 496 def segmentspan(revlog, revs):
496 497 """Get the byte span of a segment of revisions
497 498
498 499 revs is a sorted array of revision numbers
499 500
500 501 >>> revlog = _testrevlog([
501 502 ... 5, #0
502 503 ... 10, #1
503 504 ... 12, #2
504 505 ... 12, #3 (empty)
505 506 ... 17, #4
506 507 ... ])
507 508
508 509 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
509 510 17
510 511 >>> segmentspan(revlog, [0, 4])
511 512 17
512 513 >>> segmentspan(revlog, [3, 4])
513 514 5
514 515 >>> segmentspan(revlog, [1, 2, 3,])
515 516 7
516 517 >>> segmentspan(revlog, [1, 3])
517 518 7
518 519 """
519 520 if not revs:
520 521 return 0
521 522 end = revlog.end(revs[-1])
522 523 return end - revlog.start(revs[0])
523 524
524 525
525 526 def _textfromdelta(revlog, baserev, delta, p1, p2, flags, expectednode):
526 527 """build full text from a (base, delta) pair and other metadata"""
527 528 # special case deltas which replace entire base; no need to decode
528 529 # base revision. this neatly avoids censored bases, which throw when
529 530 # they're decoded.
530 531 hlen = struct.calcsize(b">lll")
531 532 if delta[:hlen] == mdiff.replacediffheader(
532 533 revlog.rawsize(baserev), len(delta) - hlen
533 534 ):
534 535 fulltext = delta[hlen:]
535 536 else:
536 537 # deltabase is rawtext before changed by flag processors, which is
537 538 # equivalent to non-raw text
538 539 basetext = revlog.revision(baserev)
539 540 fulltext = mdiff.patch(basetext, delta)
540 541
541 542 try:
542 543 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
543 544 if validatehash:
544 545 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
545 546 if flags & REVIDX_ISCENSORED:
546 547 raise error.StorageError(
547 548 _(b'node %s is not censored') % expectednode
548 549 )
549 550 except error.CensoredNodeError:
550 551 # must pass the censored index flag to add censored revisions
551 552 if not flags & REVIDX_ISCENSORED:
552 553 raise
553 554 return fulltext
554 555
555 556
556 557 @attr.s(slots=True, frozen=True)
557 558 class _deltainfo:
558 559 distance = attr.ib()
559 560 deltalen = attr.ib()
560 561 data = attr.ib()
561 562 base = attr.ib()
562 563 chainbase = attr.ib()
563 564 chainlen = attr.ib()
564 565 compresseddeltalen = attr.ib()
565 566 snapshotdepth = attr.ib()
566 567
567 568
568 569 def drop_u_compression(delta):
569 570 """turn into a "u" (no-compression) into no-compression without header
570 571
571 572 This is useful for revlog format that has better compression method.
572 573 """
573 574 assert delta.data[0] == b'u', delta.data[0]
574 575 return _deltainfo(
575 576 delta.distance,
576 577 delta.deltalen - 1,
577 578 (b'', delta.data[1]),
578 579 delta.base,
579 580 delta.chainbase,
580 581 delta.chainlen,
581 582 delta.compresseddeltalen,
582 583 delta.snapshotdepth,
583 584 )
584 585
585 586
586 587 def is_good_delta_info(revlog, deltainfo, revinfo):
587 588 """Returns True if the given delta is good. Good means that it is within
588 589 the disk span, disk size, and chain length bounds that we know to be
589 590 performant."""
590 591 if deltainfo is None:
591 592 return False
592 593
593 594 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so
594 595 # we should never end up asking such question. Adding the assert as a
595 596 # safe-guard to detect anything that would be fishy in this regard.
596 597 assert (
597 598 revinfo.cachedelta is None
598 599 or revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE
599 600 or not revlog.delta_config.general_delta
600 601 )
601 602
602 603 # - 'deltainfo.distance' is the distance from the base revision --
603 604 # bounding it limits the amount of I/O we need to do.
604 605 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
605 606 # deltas we need to apply -- bounding it limits the amount of CPU
606 607 # we consume.
607 608
608 609 textlen = revinfo.textlen
609 610 defaultmax = textlen * 4
610 611 maxdist = revlog.delta_config.max_deltachain_span
611 612 if not maxdist:
612 613 maxdist = deltainfo.distance # ensure the conditional pass
613 614 maxdist = max(maxdist, defaultmax)
614 615
615 616 # Bad delta from read span:
616 617 #
617 618 # If the span of data read is larger than the maximum allowed.
618 619 #
619 620 # In the sparse-revlog case, we rely on the associated "sparse reading"
620 621 # to avoid issue related to the span of data. In theory, it would be
621 622 # possible to build pathological revlog where delta pattern would lead
622 623 # to too many reads. However, they do not happen in practice at all. So
623 624 # we skip the span check entirely.
624 625 if not revlog.delta_config.sparse_revlog and maxdist < deltainfo.distance:
625 626 return False
626 627
627 628 # Bad delta from new delta size:
628 629 #
629 630 # If the delta size is larger than the target text, storing the
630 631 # delta will be inefficient.
631 632 if textlen < deltainfo.deltalen:
632 633 return False
633 634
634 635 # Bad delta from cumulated payload size:
635 636 #
636 637 # If the sum of delta get larger than K * target text length.
637 638 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
638 639 return False
639 640
640 641 # Bad delta from chain length:
641 642 #
642 643 # If the number of delta in the chain gets too high.
643 644 if (
644 645 revlog.delta_config.max_chain_len
645 646 and revlog.delta_config.max_chain_len < deltainfo.chainlen
646 647 ):
647 648 return False
648 649
649 650 # bad delta from intermediate snapshot size limit
650 651 #
651 652 # If an intermediate snapshot size is higher than the limit. The
652 653 # limit exist to prevent endless chain of intermediate delta to be
653 654 # created.
654 655 if (
655 656 deltainfo.snapshotdepth is not None
656 657 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
657 658 ):
658 659 return False
659 660
660 661 # bad delta if new intermediate snapshot is larger than the previous
661 662 # snapshot
662 663 if (
663 664 deltainfo.snapshotdepth
664 665 and revlog.length(deltainfo.base) < deltainfo.deltalen
665 666 ):
666 667 return False
667 668
668 669 return True
669 670
670 671
671 672 # If a revision's full text is that much bigger than a base candidate full
672 673 # text's, it is very unlikely that it will produce a valid delta. We no longer
673 674 # consider these candidates.
674 675 LIMIT_BASE2TEXT = 500
675 676
676 677
677 678 def _candidategroups(
678 679 revlog,
679 680 textlen,
680 681 p1,
681 682 p2,
682 683 cachedelta,
683 684 excluded_bases=None,
684 685 target_rev=None,
685 686 snapshot_cache=None,
686 687 ):
687 688 """Provides group of revision to be tested as delta base
688 689
689 690 This top level function focus on emitting groups with unique and worthwhile
690 691 content. See _raw_candidate_groups for details about the group order.
691 692 """
692 693 # should we try to build a delta?
693 694 if not (len(revlog) and revlog._storedeltachains):
694 695 yield None
695 696 return
696 697
697 698 if target_rev is None:
698 699 target_rev = len(revlog)
699 700
700 701 if not revlog.delta_config.general_delta:
701 702 # before general delta, there is only one possible delta base
702 703 yield (target_rev - 1,)
703 704 yield None
704 705 return
705 706
706 707 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so
707 708 # we should never end up asking such question. Adding the assert as a
708 709 # safe-guard to detect anything that would be fishy in this regard.
709 710 assert (
710 711 cachedelta is None
711 712 or cachedelta[2] != DELTA_BASE_REUSE_FORCE
712 713 or not revlog.delta_config.general_delta
713 714 )
714 715
715 716 deltalength = revlog.length
716 717 deltaparent = revlog.deltaparent
717 718 sparse = revlog.delta_config.sparse_revlog
718 719 good = None
719 720
720 721 deltas_limit = textlen * LIMIT_DELTA2TEXT
721 722 group_chunk_size = revlog.delta_config.candidate_group_chunk_size
722 723
723 724 tested = {nullrev}
724 725 candidates = _refinedgroups(
725 726 revlog,
726 727 p1,
727 728 p2,
728 729 cachedelta,
729 730 snapshot_cache=snapshot_cache,
730 731 )
731 732 while True:
732 733 temptative = candidates.send(good)
733 734 if temptative is None:
734 735 break
735 736 group = []
736 737 for rev in temptative:
737 738 # skip over empty delta (no need to include them in a chain)
738 739 while not (rev == nullrev or rev in tested or deltalength(rev)):
739 740 tested.add(rev)
740 741 rev = deltaparent(rev)
741 742 # no need to try a delta against nullrev, this will be done as a
742 743 # last resort.
743 744 if rev == nullrev:
744 745 continue
745 746 # filter out revision we tested already
746 747 if rev in tested:
747 748 continue
748 749
749 750 # an higher authority deamed the base unworthy (e.g. censored)
750 751 if excluded_bases is not None and rev in excluded_bases:
751 752 tested.add(rev)
752 753 continue
753 754 # We are in some recomputation cases and that rev is too high in
754 755 # the revlog
755 756 if target_rev is not None and rev >= target_rev:
756 757 tested.add(rev)
757 758 continue
758 759 # filter out delta base that will never produce good delta
759 760 if deltas_limit < revlog.length(rev):
760 761 tested.add(rev)
761 762 continue
762 763 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
763 764 tested.add(rev)
764 765 continue
765 766 # no delta for rawtext-changing revs (see "candelta" for why)
766 767 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
767 768 tested.add(rev)
768 769 continue
769 770
770 771 # If we reach here, we are about to build and test a delta.
771 772 # The delta building process will compute the chaininfo in all
772 773 # case, since that computation is cached, it is fine to access it
773 774 # here too.
774 775 chainlen, chainsize = revlog._chaininfo(rev)
775 776 # if chain will be too long, skip base
776 777 if (
777 778 revlog.delta_config.max_chain_len
778 779 and chainlen >= revlog.delta_config.max_chain_len
779 780 ):
780 781 tested.add(rev)
781 782 continue
782 783 # if chain already have too much data, skip base
783 784 if deltas_limit < chainsize:
784 785 tested.add(rev)
785 786 continue
786 787 if sparse and revlog.upperboundcomp is not None:
787 788 maxcomp = revlog.upperboundcomp
788 789 basenotsnap = (p1, p2, nullrev)
789 790 if rev not in basenotsnap and revlog.issnapshot(rev):
790 791 snapshotdepth = revlog.snapshotdepth(rev)
791 792 # If text is significantly larger than the base, we can
792 793 # expect the resulting delta to be proportional to the size
793 794 # difference
794 795 revsize = revlog.rawsize(rev)
795 796 rawsizedistance = max(textlen - revsize, 0)
796 797 # use an estimate of the compression upper bound.
797 798 lowestrealisticdeltalen = rawsizedistance // maxcomp
798 799
799 800 # check the absolute constraint on the delta size
800 801 snapshotlimit = textlen >> snapshotdepth
801 802 if snapshotlimit < lowestrealisticdeltalen:
802 803 # delta lower bound is larger than accepted upper bound
803 804 tested.add(rev)
804 805 continue
805 806
806 807 # check the relative constraint on the delta size
807 808 revlength = revlog.length(rev)
808 809 if revlength < lowestrealisticdeltalen:
809 810 # delta probable lower bound is larger than target base
810 811 tested.add(rev)
811 812 continue
812 813
813 814 group.append(rev)
814 815 if group:
815 816 # When the size of the candidate group is big, it can result in a
816 817 # quite significant performance impact. To reduce this, we can send
817 818 # them in smaller batches until the new batch does not provide any
818 819 # improvements.
819 820 #
820 821 # This might reduce the overall efficiency of the compression in
821 822 # some corner cases, but that should also prevent very pathological
822 823 # cases from being an issue. (eg. 20 000 candidates).
823 824 #
824 825 # XXX note that the ordering of the group becomes important as it
825 826 # now impacts the final result. The current order is unprocessed
826 827 # and can be improved.
827 828 if group_chunk_size == 0:
828 829 tested.update(group)
829 830 good = yield tuple(group)
830 831 else:
831 832 prev_good = good
832 833 for start in range(0, len(group), group_chunk_size):
833 834 sub_group = group[start : start + group_chunk_size]
834 835 tested.update(sub_group)
835 836 good = yield tuple(sub_group)
836 837 if prev_good == good:
837 838 break
838 839
839 840 yield None
840 841
841 842
842 843 def _refinedgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
843 844 good = None
844 845 # First we try to reuse a the delta contained in the bundle.
845 846 # (or from the source revlog)
846 847 #
847 848 # This logic only applies to general delta repositories and can be disabled
848 849 # through configuration. Disabling reuse source delta is useful when
849 850 # we want to make sure we recomputed "optimal" deltas.
850 851 debug_info = None
851 852 if cachedelta is not None and cachedelta[2] > DELTA_BASE_REUSE_NO:
852 853 # Assume what we received from the server is a good choice
853 854 # build delta will reuse the cache
854 855 if debug_info is not None:
855 856 debug_info['cached-delta.tested'] += 1
856 857 good = yield (cachedelta[0],)
857 858 if good is not None:
858 859 if debug_info is not None:
859 860 debug_info['cached-delta.accepted'] += 1
860 861 yield None
861 862 return
862 863 if snapshot_cache is None:
863 864 snapshot_cache = SnapshotCache()
864 865 groups = _rawgroups(
865 866 revlog,
866 867 p1,
867 868 p2,
868 869 cachedelta,
869 870 snapshot_cache,
870 871 )
871 872 for candidates in groups:
872 873 good = yield candidates
873 874 if good is not None:
874 875 break
875 876
876 877 # If sparse revlog is enabled, we can try to refine the available deltas
877 878 if not revlog.delta_config.sparse_revlog:
878 879 yield None
879 880 return
880 881
881 882 # if we have a refinable value, try to refine it
882 883 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
883 884 # refine snapshot down
884 885 previous = None
885 886 while previous != good:
886 887 previous = good
887 888 base = revlog.deltaparent(good)
888 889 if base == nullrev:
889 890 break
890 891 good = yield (base,)
891 892 # refine snapshot up
892 893 if not snapshot_cache.snapshots:
893 894 snapshot_cache.update(revlog, good + 1)
894 895 previous = None
895 896 while good != previous:
896 897 previous = good
897 898 children = tuple(sorted(c for c in snapshot_cache.snapshots[good]))
898 899 good = yield children
899 900
900 901 if debug_info is not None:
901 902 if good is None:
902 903 debug_info['no-solution'] += 1
903 904
904 905 yield None
905 906
906 907
907 908 def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
908 909 """Provides group of revision to be tested as delta base
909 910
910 911 This lower level function focus on emitting delta theorically interresting
911 912 without looking it any practical details.
912 913
913 914 The group order aims at providing fast or small candidates first.
914 915 """
915 916 # Why search for delta base if we cannot use a delta base ?
916 917 assert revlog.delta_config.general_delta
917 918 # also see issue6056
918 919 sparse = revlog.delta_config.sparse_revlog
919 920 curr = len(revlog)
920 921 prev = curr - 1
921 922 deltachain = lambda rev: revlog._deltachain(rev)[0]
922 923
923 924 # exclude already lazy tested base if any
924 925 parents = [p for p in (p1, p2) if p != nullrev]
925 926
926 927 if not revlog.delta_config.delta_both_parents and len(parents) == 2:
927 928 parents.sort()
928 929 # To minimize the chance of having to build a fulltext,
929 930 # pick first whichever parent is closest to us (max rev)
930 931 yield (parents[1],)
931 932 # then the other one (min rev) if the first did not fit
932 933 yield (parents[0],)
933 934 elif len(parents) > 0:
934 935 # Test all parents (1 or 2), and keep the best candidate
935 936 yield parents
936 937
937 938 if sparse and parents:
938 939 if snapshot_cache is None:
939 940 # map: base-rev: [snapshot-revs]
940 941 snapshot_cache = SnapshotCache()
941 942 # See if we can use an existing snapshot in the parent chains to use as
942 943 # a base for a new intermediate-snapshot
943 944 #
944 945 # search for snapshot in parents delta chain
945 946 # map: snapshot-level: snapshot-rev
946 947 parents_snaps = collections.defaultdict(set)
947 948 candidate_chains = [deltachain(p) for p in parents]
948 949 for chain in candidate_chains:
949 950 for idx, s in enumerate(chain):
950 951 if not revlog.issnapshot(s):
951 952 break
952 953 parents_snaps[idx].add(s)
953 954 snapfloor = min(parents_snaps[0]) + 1
954 955 snapshot_cache.update(revlog, snapfloor)
955 956 # search for the highest "unrelated" revision
956 957 #
957 958 # Adding snapshots used by "unrelated" revision increase the odd we
958 959 # reuse an independant, yet better snapshot chain.
959 960 #
960 961 # XXX instead of building a set of revisions, we could lazily enumerate
961 962 # over the chains. That would be more efficient, however we stick to
962 963 # simple code for now.
963 964 all_revs = set()
964 965 for chain in candidate_chains:
965 966 all_revs.update(chain)
966 967 other = None
967 968 for r in revlog.revs(prev, snapfloor):
968 969 if r not in all_revs:
969 970 other = r
970 971 break
971 972 if other is not None:
972 973 # To avoid unfair competition, we won't use unrelated intermediate
973 974 # snapshot that are deeper than the ones from the parent delta
974 975 # chain.
975 976 max_depth = max(parents_snaps.keys())
976 977 chain = deltachain(other)
977 978 for depth, s in enumerate(chain):
978 979 if s < snapfloor:
979 980 continue
980 981 if max_depth < depth:
981 982 break
982 983 if not revlog.issnapshot(s):
983 984 break
984 985 parents_snaps[depth].add(s)
985 986 # Test them as possible intermediate snapshot base
986 987 # We test them from highest to lowest level. High level one are more
987 988 # likely to result in small delta
988 989 floor = None
989 990 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
990 991 siblings = set()
991 992 for s in snaps:
992 993 siblings.update(snapshot_cache.snapshots[s])
993 994 # Before considering making a new intermediate snapshot, we check
994 995 # if an existing snapshot, children of base we consider, would be
995 996 # suitable.
996 997 #
997 998 # It give a change to reuse a delta chain "unrelated" to the
998 999 # current revision instead of starting our own. Without such
999 1000 # re-use, topological branches would keep reopening new chains.
1000 1001 # Creating more and more snapshot as the repository grow.
1001 1002
1002 1003 if floor is not None:
1003 1004 # We only do this for siblings created after the one in our
1004 1005 # parent's delta chain. Those created before has less chances
1005 1006 # to be valid base since our ancestors had to create a new
1006 1007 # snapshot.
1007 1008 siblings = [r for r in siblings if floor < r]
1008 1009 yield tuple(sorted(siblings))
1009 1010 # then test the base from our parent's delta chain.
1010 1011 yield tuple(sorted(snaps))
1011 1012 floor = min(snaps)
1012 1013 # No suitable base found in the parent chain, search if any full
1013 1014 # snapshots emitted since parent's base would be a suitable base for an
1014 1015 # intermediate snapshot.
1015 1016 #
1016 1017 # It give a chance to reuse a delta chain unrelated to the current
1017 1018 # revisions instead of starting our own. Without such re-use,
1018 1019 # topological branches would keep reopening new full chains. Creating
1019 1020 # more and more snapshot as the repository grow.
1020 1021 full = [r for r in snapshot_cache.snapshots[nullrev] if snapfloor <= r]
1021 1022 yield tuple(sorted(full))
1022 1023
1023 1024 if not sparse:
1024 1025 # other approach failed try against prev to hopefully save us a
1025 1026 # fulltext.
1026 1027 yield (prev,)
1027 1028
1028 1029
1029 1030 class SnapshotCache:
1030 1031 __slots__ = ('snapshots', '_start_rev', '_end_rev')
1031 1032
1032 1033 def __init__(self):
1033 1034 self.snapshots = collections.defaultdict(set)
1034 1035 self._start_rev = None
1035 1036 self._end_rev = None
1036 1037
1037 1038 def update(self, revlog, start_rev=0):
1038 1039 """find snapshots from start_rev to tip"""
1039 1040 nb_revs = len(revlog)
1040 1041 end_rev = nb_revs - 1
1041 1042 if start_rev > end_rev:
1042 1043 return # range is empty
1043 1044
1044 1045 if self._start_rev is None:
1045 1046 assert self._end_rev is None
1046 1047 self._update(revlog, start_rev, end_rev)
1047 1048 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1048 1049 if start_rev < self._start_rev:
1049 1050 self._update(revlog, start_rev, self._start_rev - 1)
1050 1051 if self._end_rev < end_rev:
1051 1052 self._update(revlog, self._end_rev + 1, end_rev)
1052 1053
1053 1054 if self._start_rev is None:
1054 1055 assert self._end_rev is None
1055 1056 self._end_rev = end_rev
1056 1057 self._start_rev = start_rev
1057 1058 else:
1058 1059 self._start_rev = min(self._start_rev, start_rev)
1059 1060 self._end_rev = max(self._end_rev, end_rev)
1060 1061 assert self._start_rev <= self._end_rev, (
1061 1062 self._start_rev,
1062 1063 self._end_rev,
1063 1064 )
1064 1065
1065 1066 def _update(self, revlog, start_rev, end_rev):
1066 1067 """internal method that actually do update content"""
1067 1068 assert self._start_rev is None or (
1068 1069 start_rev < self._start_rev or start_rev > self._end_rev
1069 1070 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1070 1071 assert self._start_rev is None or (
1071 1072 end_rev < self._start_rev or end_rev > self._end_rev
1072 1073 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1073 1074 cache = self.snapshots
1074 1075 if hasattr(revlog.index, 'findsnapshots'):
1075 1076 revlog.index.findsnapshots(cache, start_rev, end_rev)
1076 1077 else:
1077 1078 deltaparent = revlog.deltaparent
1078 1079 issnapshot = revlog.issnapshot
1079 1080 for rev in revlog.revs(start_rev, end_rev):
1080 1081 if issnapshot(rev):
1081 1082 cache[deltaparent(rev)].add(rev)
1082 1083
1083 1084
1084 1085 class deltacomputer:
1085 1086 def __init__(
1086 1087 self,
1087 1088 revlog,
1088 1089 write_debug=None,
1089 1090 debug_search=False,
1090 1091 debug_info=None,
1091 1092 ):
1092 1093 self.revlog = revlog
1093 1094 self._write_debug = write_debug
1094 1095 if write_debug is None:
1095 1096 self._debug_search = False
1096 1097 else:
1097 1098 self._debug_search = debug_search
1098 1099 self._debug_info = debug_info
1099 1100 self._snapshot_cache = SnapshotCache()
1100 1101
1101 1102 @property
1102 1103 def _gather_debug(self):
1103 1104 return self._write_debug is not None or self._debug_info is not None
1104 1105
1105 1106 def buildtext(self, revinfo):
1106 1107 """Builds a fulltext version of a revision
1107 1108
1108 1109 revinfo: revisioninfo instance that contains all needed info
1109 1110 """
1110 1111 btext = revinfo.btext
1111 1112 if btext[0] is not None:
1112 1113 return btext[0]
1113 1114
1114 1115 revlog = self.revlog
1115 1116 cachedelta = revinfo.cachedelta
1116 1117 baserev = cachedelta[0]
1117 1118 delta = cachedelta[1]
1118 1119
1119 1120 fulltext = btext[0] = _textfromdelta(
1120 1121 revlog,
1121 1122 baserev,
1122 1123 delta,
1123 1124 revinfo.p1,
1124 1125 revinfo.p2,
1125 1126 revinfo.flags,
1126 1127 revinfo.node,
1127 1128 )
1128 1129 return fulltext
1129 1130
1130 1131 def _builddeltadiff(self, base, revinfo):
1131 1132 revlog = self.revlog
1132 1133 t = self.buildtext(revinfo)
1133 1134 if revlog.iscensored(base):
1134 1135 # deltas based on a censored revision must replace the
1135 1136 # full content in one patch, so delta works everywhere
1136 1137 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1137 1138 delta = header + t
1138 1139 else:
1139 1140 ptext = revlog.rawdata(base)
1140 1141 delta = mdiff.textdiff(ptext, t)
1141 1142
1142 1143 return delta
1143 1144
1144 1145 def _builddeltainfo(self, revinfo, base, target_rev=None):
1145 1146 # can we use the cached delta?
1146 1147 revlog = self.revlog
1147 1148 chainbase = revlog.chainbase(base)
1148 1149 if revlog.delta_config.general_delta:
1149 1150 deltabase = base
1150 1151 else:
1151 1152 if target_rev is not None and base != target_rev - 1:
1152 1153 msg = (
1153 1154 b'general delta cannot use delta for something else '
1154 1155 b'than `prev`: %d<-%d'
1155 1156 )
1156 1157 msg %= (base, target_rev)
1157 1158 raise error.ProgrammingError(msg)
1158 1159 deltabase = chainbase
1159 1160 snapshotdepth = None
1160 1161 if revlog.delta_config.sparse_revlog and deltabase == nullrev:
1161 1162 snapshotdepth = 0
1162 1163 elif revlog.delta_config.sparse_revlog and revlog.issnapshot(deltabase):
1163 1164 # A delta chain should always be one full snapshot,
1164 1165 # zero or more semi-snapshots, and zero or more deltas
1165 1166 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1166 1167 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1167 1168 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1168 1169 delta = None
1169 1170 if revinfo.cachedelta:
1170 1171 cachebase = revinfo.cachedelta[0]
1171 1172 # check if the diff still apply
1172 1173 currentbase = cachebase
1173 1174 while (
1174 1175 currentbase != nullrev
1175 1176 and currentbase != base
1176 1177 and self.revlog.length(currentbase) == 0
1177 1178 ):
1178 1179 currentbase = self.revlog.deltaparent(currentbase)
1179 1180 if self.revlog._lazydelta and currentbase == base:
1180 1181 delta = revinfo.cachedelta[1]
1181 1182 if delta is None:
1182 1183 delta = self._builddeltadiff(base, revinfo)
1183 1184 if self._debug_search:
1184 1185 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1185 1186 msg %= len(delta)
1186 1187 self._write_debug(msg)
1187 1188 # snapshotdept need to be neither None nor 0 level snapshot
1188 1189 if revlog.upperboundcomp is not None and snapshotdepth:
1189 1190 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
1190 1191 snapshotlimit = revinfo.textlen >> snapshotdepth
1191 1192 if self._debug_search:
1192 1193 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1193 1194 msg %= lowestrealisticdeltalen
1194 1195 self._write_debug(msg)
1195 1196 if snapshotlimit < lowestrealisticdeltalen:
1196 1197 if self._debug_search:
1197 1198 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1198 1199 self._write_debug(msg)
1199 1200 return None
1200 1201 if revlog.length(base) < lowestrealisticdeltalen:
1201 1202 if self._debug_search:
1202 1203 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1203 1204 self._write_debug(msg)
1204 1205 return None
1205 1206 header, data = revlog.compress(delta)
1206 1207 deltalen = len(header) + len(data)
1207 1208 offset = revlog.end(len(revlog) - 1)
1208 1209 dist = deltalen + offset - revlog.start(chainbase)
1209 1210 chainlen, compresseddeltalen = revlog._chaininfo(base)
1210 1211 chainlen += 1
1211 1212 compresseddeltalen += deltalen
1212 1213
1213 1214 return _deltainfo(
1214 1215 dist,
1215 1216 deltalen,
1216 1217 (header, data),
1217 1218 deltabase,
1218 1219 chainbase,
1219 1220 chainlen,
1220 1221 compresseddeltalen,
1221 1222 snapshotdepth,
1222 1223 )
1223 1224
1224 1225 def _fullsnapshotinfo(self, revinfo, curr):
1225 1226 rawtext = self.buildtext(revinfo)
1226 1227 data = self.revlog.compress(rawtext)
1227 1228 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1228 1229 deltabase = chainbase = curr
1229 1230 snapshotdepth = 0
1230 1231 chainlen = 1
1231 1232
1232 1233 return _deltainfo(
1233 1234 dist,
1234 1235 deltalen,
1235 1236 data,
1236 1237 deltabase,
1237 1238 chainbase,
1238 1239 chainlen,
1239 1240 compresseddeltalen,
1240 1241 snapshotdepth,
1241 1242 )
1242 1243
1243 1244 def finddeltainfo(self, revinfo, excluded_bases=None, target_rev=None):
1244 1245 """Find an acceptable delta against a candidate revision
1245 1246
1246 1247 revinfo: information about the revision (instance of _revisioninfo)
1247 1248
1248 1249 Returns the first acceptable candidate revision, as ordered by
1249 1250 _candidategroups
1250 1251
1251 1252 If no suitable deltabase is found, we return delta info for a full
1252 1253 snapshot.
1253 1254
1254 1255 `excluded_bases` is an optional set of revision that cannot be used as
1255 1256 a delta base. Use this to recompute delta suitable in censor or strip
1256 1257 context.
1257 1258 """
1258 1259 if target_rev is None:
1259 1260 target_rev = len(self.revlog)
1260 1261
1261 1262 gather_debug = self._gather_debug
1262 1263 cachedelta = revinfo.cachedelta
1263 1264 revlog = self.revlog
1264 1265 p1r = p2r = None
1265 1266
1266 1267 if excluded_bases is None:
1267 1268 excluded_bases = set()
1268 1269
1269 1270 if gather_debug:
1270 1271 start = util.timer()
1271 1272 dbg = self._one_dbg_data()
1272 1273 dbg['revision'] = target_rev
1273 1274 target_revlog = b"UNKNOWN"
1274 1275 target_type = self.revlog.target[0]
1275 1276 target_key = self.revlog.target[1]
1276 1277 if target_type == KIND_CHANGELOG:
1277 1278 target_revlog = b'CHANGELOG:'
1278 1279 elif target_type == KIND_MANIFESTLOG:
1279 1280 target_revlog = b'MANIFESTLOG:'
1280 1281 if target_key:
1281 1282 target_revlog += b'%s:' % target_key
1282 1283 elif target_type == KIND_FILELOG:
1283 1284 target_revlog = b'FILELOG:'
1284 1285 if target_key:
1285 1286 target_revlog += b'%s:' % target_key
1286 1287 dbg['target-revlog'] = target_revlog
1287 1288 p1r = revlog.rev(revinfo.p1)
1288 1289 p2r = revlog.rev(revinfo.p2)
1289 1290 if p1r != nullrev:
1290 1291 p1_chain_len = revlog._chaininfo(p1r)[0]
1291 1292 else:
1292 1293 p1_chain_len = -1
1293 1294 if p2r != nullrev:
1294 1295 p2_chain_len = revlog._chaininfo(p2r)[0]
1295 1296 else:
1296 1297 p2_chain_len = -1
1297 1298 dbg['p1-chain-len'] = p1_chain_len
1298 1299 dbg['p2-chain-len'] = p2_chain_len
1299 1300
1300 1301 # 1) if the revision is empty, no amount of delta can beat it
1301 1302 #
1302 1303 # 2) no delta for flag processor revision (see "candelta" for why)
1303 1304 # not calling candelta since only one revision needs test, also to
1304 1305 # avoid overhead fetching flags again.
1305 1306 if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1306 1307 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1307 1308 if gather_debug:
1308 1309 end = util.timer()
1309 1310 dbg['duration'] = end - start
1310 1311 dbg[
1311 1312 'delta-base'
1312 1313 ] = deltainfo.base # pytype: disable=attribute-error
1313 1314 dbg['search_round_count'] = 0
1314 1315 dbg['using-cached-base'] = False
1315 1316 dbg['delta_try_count'] = 0
1316 1317 dbg['type'] = b"full"
1317 1318 dbg['snapshot-depth'] = 0
1318 1319 self._dbg_process_data(dbg)
1319 1320 return deltainfo
1320 1321
1321 1322 deltainfo = None
1322 1323
1323 1324 # If this source delta are to be forcibly reuse, let us comply early.
1324 1325 if (
1325 1326 revlog.delta_config.general_delta
1326 1327 and revinfo.cachedelta is not None
1327 1328 and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
1328 1329 ):
1329 1330 base = revinfo.cachedelta[0]
1330 1331 if base == nullrev:
1331 1332 dbg_type = b"full"
1332 1333 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1333 1334 if gather_debug:
1334 1335 snapshotdepth = 0
1335 1336 elif base not in excluded_bases:
1336 1337 delta = revinfo.cachedelta[1]
1337 1338 header, data = revlog.compress(delta)
1338 1339 deltalen = len(header) + len(data)
1339 1340 if gather_debug:
1340 1341 offset = revlog.end(len(revlog) - 1)
1341 1342 chainbase = revlog.chainbase(base)
1342 1343 distance = deltalen + offset - revlog.start(chainbase)
1343 1344 chainlen, compresseddeltalen = revlog._chaininfo(base)
1344 1345 chainlen += 1
1345 1346 compresseddeltalen += deltalen
1346 1347 if base == p1r or base == p2r:
1347 1348 dbg_type = b"delta"
1348 1349 snapshotdepth = None
1349 1350 elif not revlog.issnapshot(base):
1350 1351 snapshotdepth = None
1351 1352 else:
1352 1353 dbg_type = b"snapshot"
1353 1354 snapshotdepth = revlog.snapshotdepth(base) + 1
1354 1355 else:
1355 1356 distance = None
1356 1357 chainbase = None
1357 1358 chainlen = None
1358 1359 compresseddeltalen = None
1359 1360 snapshotdepth = None
1360 1361 deltainfo = _deltainfo(
1361 1362 distance=distance,
1362 1363 deltalen=deltalen,
1363 1364 data=(header, data),
1364 1365 base=base,
1365 1366 chainbase=chainbase,
1366 1367 chainlen=chainlen,
1367 1368 compresseddeltalen=compresseddeltalen,
1368 1369 snapshotdepth=snapshotdepth,
1369 1370 )
1370 1371
1371 1372 if deltainfo is not None:
1372 1373 if gather_debug:
1373 1374 end = util.timer()
1374 1375 dbg['duration'] = end - start
1375 1376 dbg[
1376 1377 'delta-base'
1377 1378 ] = deltainfo.base # pytype: disable=attribute-error
1378 1379 dbg['search_round_count'] = 0
1379 1380 dbg['using-cached-base'] = True
1380 1381 dbg['delta_try_count'] = 0
1381 1382 dbg['type'] = b"full"
1382 1383 if snapshotdepth is None:
1383 1384 dbg['snapshot-depth'] = 0
1384 1385 else:
1385 1386 dbg['snapshot-depth'] = snapshotdepth
1386 1387 self._dbg_process_data(dbg)
1387 1388 return deltainfo
1388 1389
1389 1390 # count the number of different delta we tried (for debug purpose)
1390 1391 dbg_try_count = 0
1391 1392 # count the number of "search round" we did. (for debug purpose)
1392 1393 dbg_try_rounds = 0
1393 1394 dbg_type = b'unknown'
1394 1395
1395 1396 if p1r is None:
1396 1397 p1r = revlog.rev(revinfo.p1)
1397 1398 p2r = revlog.rev(revinfo.p2)
1398 1399
1399 1400 if self._debug_search:
1400 1401 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1401 1402 msg %= target_rev
1402 1403 self._write_debug(msg)
1403 1404
1404 1405 groups = _candidategroups(
1405 1406 self.revlog,
1406 1407 revinfo.textlen,
1407 1408 p1r,
1408 1409 p2r,
1409 1410 cachedelta,
1410 1411 excluded_bases,
1411 1412 target_rev,
1412 1413 snapshot_cache=self._snapshot_cache,
1413 1414 )
1414 1415 candidaterevs = next(groups)
1415 1416 while candidaterevs is not None:
1416 1417 dbg_try_rounds += 1
1417 1418 if self._debug_search:
1418 1419 prev = None
1419 1420 if deltainfo is not None:
1420 1421 prev = deltainfo.base
1421 1422
1422 1423 if (
1423 1424 cachedelta is not None
1424 1425 and len(candidaterevs) == 1
1425 1426 and cachedelta[0] in candidaterevs
1426 1427 ):
1427 1428 round_type = b"cached-delta"
1428 1429 elif p1r in candidaterevs or p2r in candidaterevs:
1429 1430 round_type = b"parents"
1430 1431 elif prev is not None and all(c < prev for c in candidaterevs):
1431 1432 round_type = b"refine-down"
1432 1433 elif prev is not None and all(c > prev for c in candidaterevs):
1433 1434 round_type = b"refine-up"
1434 1435 else:
1435 1436 round_type = b"search-down"
1436 1437 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1437 1438 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1438 1439 self._write_debug(msg)
1439 1440 nominateddeltas = []
1440 1441 if deltainfo is not None:
1441 1442 if self._debug_search:
1442 1443 msg = (
1443 1444 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1444 1445 )
1445 1446 msg %= (deltainfo.base, deltainfo.deltalen)
1446 1447 self._write_debug(msg)
1447 1448 # if we already found a good delta,
1448 1449 # challenge it against refined candidates
1449 1450 nominateddeltas.append(deltainfo)
1450 1451 for candidaterev in candidaterevs:
1451 1452 if self._debug_search:
1452 1453 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1453 1454 msg %= candidaterev
1454 1455 self._write_debug(msg)
1455 1456 candidate_type = None
1456 1457 if candidaterev == p1r:
1457 1458 candidate_type = b"p1"
1458 1459 elif candidaterev == p2r:
1459 1460 candidate_type = b"p2"
1460 1461 elif self.revlog.issnapshot(candidaterev):
1461 1462 candidate_type = b"snapshot-%d"
1462 1463 candidate_type %= self.revlog.snapshotdepth(
1463 1464 candidaterev
1464 1465 )
1465 1466
1466 1467 if candidate_type is not None:
1467 1468 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1468 1469 msg %= candidate_type
1469 1470 self._write_debug(msg)
1470 1471 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1471 1472 msg %= self.revlog.length(candidaterev)
1472 1473 self._write_debug(msg)
1473 1474 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1474 1475 msg %= self.revlog.deltaparent(candidaterev)
1475 1476 self._write_debug(msg)
1476 1477
1477 1478 dbg_try_count += 1
1478 1479
1479 1480 if self._debug_search:
1480 1481 delta_start = util.timer()
1481 1482 candidatedelta = self._builddeltainfo(
1482 1483 revinfo,
1483 1484 candidaterev,
1484 1485 target_rev=target_rev,
1485 1486 )
1486 1487 if self._debug_search:
1487 1488 delta_end = util.timer()
1488 1489 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1489 1490 msg %= delta_end - delta_start
1490 1491 self._write_debug(msg)
1491 1492 if candidatedelta is not None:
1492 1493 if is_good_delta_info(self.revlog, candidatedelta, revinfo):
1493 1494 if self._debug_search:
1494 1495 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1495 1496 msg %= candidatedelta.deltalen
1496 1497 self._write_debug(msg)
1497 1498 nominateddeltas.append(candidatedelta)
1498 1499 elif self._debug_search:
1499 1500 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1500 1501 msg %= candidatedelta.deltalen
1501 1502 self._write_debug(msg)
1502 1503 elif self._debug_search:
1503 1504 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1504 1505 self._write_debug(msg)
1505 1506 if nominateddeltas:
1506 1507 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1507 1508 if deltainfo is not None:
1508 1509 candidaterevs = groups.send(deltainfo.base)
1509 1510 else:
1510 1511 candidaterevs = next(groups)
1511 1512
1512 1513 if deltainfo is None:
1513 1514 dbg_type = b"full"
1514 1515 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1515 1516 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1516 1517 dbg_type = b"snapshot"
1517 1518 else:
1518 1519 dbg_type = b"delta"
1519 1520
1520 1521 if gather_debug:
1521 1522 end = util.timer()
1522 1523 if dbg_type == b'full':
1523 1524 used_cached = (
1524 1525 cachedelta is not None
1525 1526 and dbg_try_rounds == 0
1526 1527 and dbg_try_count == 0
1527 1528 and cachedelta[0] == nullrev
1528 1529 )
1529 1530 else:
1530 1531 used_cached = (
1531 1532 cachedelta is not None
1532 1533 and dbg_try_rounds == 1
1533 1534 and dbg_try_count == 1
1534 1535 and deltainfo.base == cachedelta[0]
1535 1536 )
1536 1537 dbg['duration'] = end - start
1537 1538 dbg[
1538 1539 'delta-base'
1539 1540 ] = deltainfo.base # pytype: disable=attribute-error
1540 1541 dbg['search_round_count'] = dbg_try_rounds
1541 1542 dbg['using-cached-base'] = used_cached
1542 1543 dbg['delta_try_count'] = dbg_try_count
1543 1544 dbg['type'] = dbg_type
1544 1545 if (
1545 1546 deltainfo.snapshotdepth # pytype: disable=attribute-error
1546 1547 is not None
1547 1548 ):
1548 1549 dbg[
1549 1550 'snapshot-depth'
1550 1551 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1551 1552 else:
1552 1553 dbg['snapshot-depth'] = 0
1553 1554 self._dbg_process_data(dbg)
1554 1555 return deltainfo
1555 1556
1556 1557 def _one_dbg_data(self):
1557 1558 return {
1558 1559 'duration': None,
1559 1560 'revision': None,
1560 1561 'delta-base': None,
1561 1562 'search_round_count': None,
1562 1563 'using-cached-base': None,
1563 1564 'delta_try_count': None,
1564 1565 'type': None,
1565 1566 'p1-chain-len': None,
1566 1567 'p2-chain-len': None,
1567 1568 'snapshot-depth': None,
1568 1569 'target-revlog': None,
1569 1570 }
1570 1571
1571 1572 def _dbg_process_data(self, dbg):
1572 1573 if self._debug_info is not None:
1573 1574 self._debug_info.append(dbg)
1574 1575
1575 1576 if self._write_debug is not None:
1576 1577 msg = (
1577 1578 b"DBG-DELTAS:"
1578 1579 b" %-12s"
1579 1580 b" rev=%d:"
1580 1581 b" delta-base=%d"
1581 1582 b" is-cached=%d"
1582 1583 b" - search-rounds=%d"
1583 1584 b" try-count=%d"
1584 1585 b" - delta-type=%-6s"
1585 1586 b" snap-depth=%d"
1586 1587 b" - p1-chain-length=%d"
1587 1588 b" p2-chain-length=%d"
1588 1589 b" - duration=%f"
1589 1590 b"\n"
1590 1591 )
1591 1592 msg %= (
1592 1593 dbg["target-revlog"],
1593 1594 dbg["revision"],
1594 1595 dbg["delta-base"],
1595 1596 dbg["using-cached-base"],
1596 1597 dbg["search_round_count"],
1597 1598 dbg["delta_try_count"],
1598 1599 dbg["type"],
1599 1600 dbg["snapshot-depth"],
1600 1601 dbg["p1-chain-len"],
1601 1602 dbg["p2-chain-len"],
1602 1603 dbg["duration"],
1603 1604 )
1604 1605 self._write_debug(msg)
1605 1606
1606 1607
1607 1608 def delta_compression(default_compression_header, deltainfo):
1608 1609 """return (COMPRESSION_MODE, deltainfo)
1609 1610
1610 1611 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1611 1612 compression.
1612 1613 """
1613 1614 h, d = deltainfo.data
1614 1615 compression_mode = COMP_MODE_INLINE
1615 1616 if not h and not d:
1616 1617 # not data to store at all... declare them uncompressed
1617 1618 compression_mode = COMP_MODE_PLAIN
1618 1619 elif not h:
1619 1620 t = d[0:1]
1620 1621 if t == b'\0':
1621 1622 compression_mode = COMP_MODE_PLAIN
1622 1623 elif t == default_compression_header:
1623 1624 compression_mode = COMP_MODE_DEFAULT
1624 1625 elif h == b'u':
1625 1626 # we have a more efficient way to declare uncompressed
1626 1627 h = b''
1627 1628 compression_mode = COMP_MODE_PLAIN
1628 1629 deltainfo = drop_u_compression(deltainfo)
1629 1630 return compression_mode, deltainfo
General Comments 0
You need to be logged in to leave comments. Login now