##// END OF EJS Templates
delta-find: remove dead code intended to deal with forced delta reuse...
marmoute -
r51591:f1b57672 default
parent child Browse files
Show More
@@ -1,1631 +1,1624 b''
1 1 # revlogdeltas.py - Logic around delta computation for revlog
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 # Copyright 2018 Octobus <contact@octobus.net>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8 """Helper class to compute deltas stored inside revlogs"""
9 9
10 10
11 11 import collections
12 12 import struct
13 13
14 14 # import stuff from node for others to import from revlog
15 15 from ..node import nullrev
16 16 from ..i18n import _
17 17 from ..pycompat import getattr
18 18
19 19 from .constants import (
20 20 COMP_MODE_DEFAULT,
21 21 COMP_MODE_INLINE,
22 22 COMP_MODE_PLAIN,
23 23 DELTA_BASE_REUSE_FORCE,
24 24 DELTA_BASE_REUSE_NO,
25 25 KIND_CHANGELOG,
26 26 KIND_FILELOG,
27 27 KIND_MANIFESTLOG,
28 28 REVIDX_ISCENSORED,
29 29 REVIDX_RAWTEXT_CHANGING_FLAGS,
30 30 )
31 31
32 32 from ..thirdparty import attr
33 33
34 34 from .. import (
35 35 error,
36 36 mdiff,
37 37 util,
38 38 )
39 39
40 40 from . import flagutil
41 41
42 42 # maximum <delta-chain-data>/<revision-text-length> ratio
43 43 LIMIT_DELTA2TEXT = 2
44 44
45 45
46 46 class _testrevlog:
47 47 """minimalist fake revlog to use in doctests"""
48 48
49 49 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
50 50 """data is an list of revision payload boundaries"""
51 51 self._data = data
52 52 self._srdensitythreshold = density
53 53 self._srmingapsize = mingap
54 54 self._snapshot = set(snapshot)
55 55 self.index = None
56 56
57 57 def start(self, rev):
58 58 if rev == nullrev:
59 59 return 0
60 60 if rev == 0:
61 61 return 0
62 62 return self._data[rev - 1]
63 63
64 64 def end(self, rev):
65 65 if rev == nullrev:
66 66 return 0
67 67 return self._data[rev]
68 68
69 69 def length(self, rev):
70 70 return self.end(rev) - self.start(rev)
71 71
72 72 def __len__(self):
73 73 return len(self._data)
74 74
75 75 def issnapshot(self, rev):
76 76 if rev == nullrev:
77 77 return True
78 78 return rev in self._snapshot
79 79
80 80
81 81 def slicechunk(revlog, revs, targetsize=None):
82 82 """slice revs to reduce the amount of unrelated data to be read from disk.
83 83
84 84 ``revs`` is sliced into groups that should be read in one time.
85 85 Assume that revs are sorted.
86 86
87 87 The initial chunk is sliced until the overall density (payload/chunks-span
88 88 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
89 89 `revlog._srmingapsize` is skipped.
90 90
91 91 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
92 92 For consistency with other slicing choice, this limit won't go lower than
93 93 `revlog._srmingapsize`.
94 94
95 95 If individual revisions chunk are larger than this limit, they will still
96 96 be raised individually.
97 97
98 98 >>> data = [
99 99 ... 5, #00 (5)
100 100 ... 10, #01 (5)
101 101 ... 12, #02 (2)
102 102 ... 12, #03 (empty)
103 103 ... 27, #04 (15)
104 104 ... 31, #05 (4)
105 105 ... 31, #06 (empty)
106 106 ... 42, #07 (11)
107 107 ... 47, #08 (5)
108 108 ... 47, #09 (empty)
109 109 ... 48, #10 (1)
110 110 ... 51, #11 (3)
111 111 ... 74, #12 (23)
112 112 ... 85, #13 (11)
113 113 ... 86, #14 (1)
114 114 ... 91, #15 (5)
115 115 ... ]
116 116 >>> revlog = _testrevlog(data, snapshot=range(16))
117 117
118 118 >>> list(slicechunk(revlog, list(range(16))))
119 119 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
120 120 >>> list(slicechunk(revlog, [0, 15]))
121 121 [[0], [15]]
122 122 >>> list(slicechunk(revlog, [0, 11, 15]))
123 123 [[0], [11], [15]]
124 124 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
125 125 [[0], [11, 13, 15]]
126 126 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
127 127 [[1, 2], [5, 8, 10, 11], [14]]
128 128
129 129 Slicing with a maximum chunk size
130 130 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
131 131 [[0], [11], [13], [15]]
132 132 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
133 133 [[0], [11], [13, 15]]
134 134
135 135 Slicing involving nullrev
136 136 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
137 137 [[-1, 0], [11], [13, 15]]
138 138 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
139 139 [[-1], [13], [15]]
140 140 """
141 141 if targetsize is not None:
142 142 targetsize = max(targetsize, revlog._srmingapsize)
143 143 # targetsize should not be specified when evaluating delta candidates:
144 144 # * targetsize is used to ensure we stay within specification when reading,
145 145 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
146 146 if densityslicing is None:
147 147 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
148 148 for chunk in densityslicing(
149 149 revs, revlog._srdensitythreshold, revlog._srmingapsize
150 150 ):
151 151 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
152 152 yield subchunk
153 153
154 154
155 155 def _slicechunktosize(revlog, revs, targetsize=None):
156 156 """slice revs to match the target size
157 157
158 158 This is intended to be used on chunk that density slicing selected by that
159 159 are still too large compared to the read garantee of revlog. This might
160 160 happens when "minimal gap size" interrupted the slicing or when chain are
161 161 built in a way that create large blocks next to each other.
162 162
163 163 >>> data = [
164 164 ... 3, #0 (3)
165 165 ... 5, #1 (2)
166 166 ... 6, #2 (1)
167 167 ... 8, #3 (2)
168 168 ... 8, #4 (empty)
169 169 ... 11, #5 (3)
170 170 ... 12, #6 (1)
171 171 ... 13, #7 (1)
172 172 ... 14, #8 (1)
173 173 ... ]
174 174
175 175 == All snapshots cases ==
176 176 >>> revlog = _testrevlog(data, snapshot=range(9))
177 177
178 178 Cases where chunk is already small enough
179 179 >>> list(_slicechunktosize(revlog, [0], 3))
180 180 [[0]]
181 181 >>> list(_slicechunktosize(revlog, [6, 7], 3))
182 182 [[6, 7]]
183 183 >>> list(_slicechunktosize(revlog, [0], None))
184 184 [[0]]
185 185 >>> list(_slicechunktosize(revlog, [6, 7], None))
186 186 [[6, 7]]
187 187
188 188 cases where we need actual slicing
189 189 >>> list(_slicechunktosize(revlog, [0, 1], 3))
190 190 [[0], [1]]
191 191 >>> list(_slicechunktosize(revlog, [1, 3], 3))
192 192 [[1], [3]]
193 193 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
194 194 [[1, 2], [3]]
195 195 >>> list(_slicechunktosize(revlog, [3, 5], 3))
196 196 [[3], [5]]
197 197 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
198 198 [[3], [5]]
199 199 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
200 200 [[5], [6, 7, 8]]
201 201 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
202 202 [[0], [1, 2], [3], [5], [6, 7, 8]]
203 203
204 204 Case with too large individual chunk (must return valid chunk)
205 205 >>> list(_slicechunktosize(revlog, [0, 1], 2))
206 206 [[0], [1]]
207 207 >>> list(_slicechunktosize(revlog, [1, 3], 1))
208 208 [[1], [3]]
209 209 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
210 210 [[3], [5]]
211 211
212 212 == No Snapshot cases ==
213 213 >>> revlog = _testrevlog(data)
214 214
215 215 Cases where chunk is already small enough
216 216 >>> list(_slicechunktosize(revlog, [0], 3))
217 217 [[0]]
218 218 >>> list(_slicechunktosize(revlog, [6, 7], 3))
219 219 [[6, 7]]
220 220 >>> list(_slicechunktosize(revlog, [0], None))
221 221 [[0]]
222 222 >>> list(_slicechunktosize(revlog, [6, 7], None))
223 223 [[6, 7]]
224 224
225 225 cases where we need actual slicing
226 226 >>> list(_slicechunktosize(revlog, [0, 1], 3))
227 227 [[0], [1]]
228 228 >>> list(_slicechunktosize(revlog, [1, 3], 3))
229 229 [[1], [3]]
230 230 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
231 231 [[1], [2, 3]]
232 232 >>> list(_slicechunktosize(revlog, [3, 5], 3))
233 233 [[3], [5]]
234 234 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
235 235 [[3], [4, 5]]
236 236 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
237 237 [[5], [6, 7, 8]]
238 238 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
239 239 [[0], [1, 2], [3], [5], [6, 7, 8]]
240 240
241 241 Case with too large individual chunk (must return valid chunk)
242 242 >>> list(_slicechunktosize(revlog, [0, 1], 2))
243 243 [[0], [1]]
244 244 >>> list(_slicechunktosize(revlog, [1, 3], 1))
245 245 [[1], [3]]
246 246 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
247 247 [[3], [5]]
248 248
249 249 == mixed case ==
250 250 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
251 251 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
252 252 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
253 253 """
254 254 assert targetsize is None or 0 <= targetsize
255 255 startdata = revlog.start(revs[0])
256 256 enddata = revlog.end(revs[-1])
257 257 fullspan = enddata - startdata
258 258 if targetsize is None or fullspan <= targetsize:
259 259 yield revs
260 260 return
261 261
262 262 startrevidx = 0
263 263 endrevidx = 1
264 264 iterrevs = enumerate(revs)
265 265 next(iterrevs) # skip first rev.
266 266 # first step: get snapshots out of the way
267 267 for idx, r in iterrevs:
268 268 span = revlog.end(r) - startdata
269 269 snapshot = revlog.issnapshot(r)
270 270 if span <= targetsize and snapshot:
271 271 endrevidx = idx + 1
272 272 else:
273 273 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
274 274 if chunk:
275 275 yield chunk
276 276 startrevidx = idx
277 277 startdata = revlog.start(r)
278 278 endrevidx = idx + 1
279 279 if not snapshot:
280 280 break
281 281
282 282 # for the others, we use binary slicing to quickly converge toward valid
283 283 # chunks (otherwise, we might end up looking for start/end of many
284 284 # revisions). This logic is not looking for the perfect slicing point, it
285 285 # focuses on quickly converging toward valid chunks.
286 286 nbitem = len(revs)
287 287 while (enddata - startdata) > targetsize:
288 288 endrevidx = nbitem
289 289 if nbitem - startrevidx <= 1:
290 290 break # protect against individual chunk larger than limit
291 291 localenddata = revlog.end(revs[endrevidx - 1])
292 292 span = localenddata - startdata
293 293 while span > targetsize:
294 294 if endrevidx - startrevidx <= 1:
295 295 break # protect against individual chunk larger than limit
296 296 endrevidx -= (endrevidx - startrevidx) // 2
297 297 localenddata = revlog.end(revs[endrevidx - 1])
298 298 span = localenddata - startdata
299 299 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
300 300 if chunk:
301 301 yield chunk
302 302 startrevidx = endrevidx
303 303 startdata = revlog.start(revs[startrevidx])
304 304
305 305 chunk = _trimchunk(revlog, revs, startrevidx)
306 306 if chunk:
307 307 yield chunk
308 308
309 309
310 310 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
311 311 """slice revs to reduce the amount of unrelated data to be read from disk.
312 312
313 313 ``revs`` is sliced into groups that should be read in one time.
314 314 Assume that revs are sorted.
315 315
316 316 The initial chunk is sliced until the overall density (payload/chunks-span
317 317 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
318 318 skipped.
319 319
320 320 >>> revlog = _testrevlog([
321 321 ... 5, #00 (5)
322 322 ... 10, #01 (5)
323 323 ... 12, #02 (2)
324 324 ... 12, #03 (empty)
325 325 ... 27, #04 (15)
326 326 ... 31, #05 (4)
327 327 ... 31, #06 (empty)
328 328 ... 42, #07 (11)
329 329 ... 47, #08 (5)
330 330 ... 47, #09 (empty)
331 331 ... 48, #10 (1)
332 332 ... 51, #11 (3)
333 333 ... 74, #12 (23)
334 334 ... 85, #13 (11)
335 335 ... 86, #14 (1)
336 336 ... 91, #15 (5)
337 337 ... ])
338 338
339 339 >>> list(_slicechunktodensity(revlog, list(range(16))))
340 340 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
341 341 >>> list(_slicechunktodensity(revlog, [0, 15]))
342 342 [[0], [15]]
343 343 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
344 344 [[0], [11], [15]]
345 345 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
346 346 [[0], [11, 13, 15]]
347 347 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
348 348 [[1, 2], [5, 8, 10, 11], [14]]
349 349 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
350 350 ... mingapsize=20))
351 351 [[1, 2, 3, 5, 8, 10, 11], [14]]
352 352 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
353 353 ... targetdensity=0.95))
354 354 [[1, 2], [5], [8, 10, 11], [14]]
355 355 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
356 356 ... targetdensity=0.95, mingapsize=12))
357 357 [[1, 2], [5, 8, 10, 11], [14]]
358 358 """
359 359 start = revlog.start
360 360 length = revlog.length
361 361
362 362 if len(revs) <= 1:
363 363 yield revs
364 364 return
365 365
366 366 deltachainspan = segmentspan(revlog, revs)
367 367
368 368 if deltachainspan < mingapsize:
369 369 yield revs
370 370 return
371 371
372 372 readdata = deltachainspan
373 373 chainpayload = sum(length(r) for r in revs)
374 374
375 375 if deltachainspan:
376 376 density = chainpayload / float(deltachainspan)
377 377 else:
378 378 density = 1.0
379 379
380 380 if density >= targetdensity:
381 381 yield revs
382 382 return
383 383
384 384 # Store the gaps in a heap to have them sorted by decreasing size
385 385 gaps = []
386 386 prevend = None
387 387 for i, rev in enumerate(revs):
388 388 revstart = start(rev)
389 389 revlen = length(rev)
390 390
391 391 # Skip empty revisions to form larger holes
392 392 if revlen == 0:
393 393 continue
394 394
395 395 if prevend is not None:
396 396 gapsize = revstart - prevend
397 397 # only consider holes that are large enough
398 398 if gapsize > mingapsize:
399 399 gaps.append((gapsize, i))
400 400
401 401 prevend = revstart + revlen
402 402 # sort the gaps to pop them from largest to small
403 403 gaps.sort()
404 404
405 405 # Collect the indices of the largest holes until the density is acceptable
406 406 selected = []
407 407 while gaps and density < targetdensity:
408 408 gapsize, gapidx = gaps.pop()
409 409
410 410 selected.append(gapidx)
411 411
412 412 # the gap sizes are stored as negatives to be sorted decreasingly
413 413 # by the heap
414 414 readdata -= gapsize
415 415 if readdata > 0:
416 416 density = chainpayload / float(readdata)
417 417 else:
418 418 density = 1.0
419 419 selected.sort()
420 420
421 421 # Cut the revs at collected indices
422 422 previdx = 0
423 423 for idx in selected:
424 424
425 425 chunk = _trimchunk(revlog, revs, previdx, idx)
426 426 if chunk:
427 427 yield chunk
428 428
429 429 previdx = idx
430 430
431 431 chunk = _trimchunk(revlog, revs, previdx)
432 432 if chunk:
433 433 yield chunk
434 434
435 435
436 436 def _trimchunk(revlog, revs, startidx, endidx=None):
437 437 """returns revs[startidx:endidx] without empty trailing revs
438 438
439 439 Doctest Setup
440 440 >>> revlog = _testrevlog([
441 441 ... 5, #0
442 442 ... 10, #1
443 443 ... 12, #2
444 444 ... 12, #3 (empty)
445 445 ... 17, #4
446 446 ... 21, #5
447 447 ... 21, #6 (empty)
448 448 ... ])
449 449
450 450 Contiguous cases:
451 451 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
452 452 [0, 1, 2, 3, 4, 5]
453 453 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
454 454 [0, 1, 2, 3, 4]
455 455 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
456 456 [0, 1, 2]
457 457 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
458 458 [2]
459 459 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
460 460 [3, 4, 5]
461 461 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
462 462 [3, 4]
463 463
464 464 Discontiguous cases:
465 465 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
466 466 [1, 3, 5]
467 467 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
468 468 [1]
469 469 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
470 470 [3, 5]
471 471 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
472 472 [3, 5]
473 473 """
474 474 length = revlog.length
475 475
476 476 if endidx is None:
477 477 endidx = len(revs)
478 478
479 479 # If we have a non-emtpy delta candidate, there are nothing to trim
480 480 if revs[endidx - 1] < len(revlog):
481 481 # Trim empty revs at the end, except the very first revision of a chain
482 482 while (
483 483 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
484 484 ):
485 485 endidx -= 1
486 486
487 487 return revs[startidx:endidx]
488 488
489 489
490 490 def segmentspan(revlog, revs):
491 491 """Get the byte span of a segment of revisions
492 492
493 493 revs is a sorted array of revision numbers
494 494
495 495 >>> revlog = _testrevlog([
496 496 ... 5, #0
497 497 ... 10, #1
498 498 ... 12, #2
499 499 ... 12, #3 (empty)
500 500 ... 17, #4
501 501 ... ])
502 502
503 503 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
504 504 17
505 505 >>> segmentspan(revlog, [0, 4])
506 506 17
507 507 >>> segmentspan(revlog, [3, 4])
508 508 5
509 509 >>> segmentspan(revlog, [1, 2, 3,])
510 510 7
511 511 >>> segmentspan(revlog, [1, 3])
512 512 7
513 513 """
514 514 if not revs:
515 515 return 0
516 516 end = revlog.end(revs[-1])
517 517 return end - revlog.start(revs[0])
518 518
519 519
520 520 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
521 521 """build full text from a (base, delta) pair and other metadata"""
522 522 # special case deltas which replace entire base; no need to decode
523 523 # base revision. this neatly avoids censored bases, which throw when
524 524 # they're decoded.
525 525 hlen = struct.calcsize(b">lll")
526 526 if delta[:hlen] == mdiff.replacediffheader(
527 527 revlog.rawsize(baserev), len(delta) - hlen
528 528 ):
529 529 fulltext = delta[hlen:]
530 530 else:
531 531 # deltabase is rawtext before changed by flag processors, which is
532 532 # equivalent to non-raw text
533 533 basetext = revlog.revision(baserev, _df=fh)
534 534 fulltext = mdiff.patch(basetext, delta)
535 535
536 536 try:
537 537 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
538 538 if validatehash:
539 539 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
540 540 if flags & REVIDX_ISCENSORED:
541 541 raise error.StorageError(
542 542 _(b'node %s is not censored') % expectednode
543 543 )
544 544 except error.CensoredNodeError:
545 545 # must pass the censored index flag to add censored revisions
546 546 if not flags & REVIDX_ISCENSORED:
547 547 raise
548 548 return fulltext
549 549
550 550
551 551 @attr.s(slots=True, frozen=True)
552 552 class _deltainfo:
553 553 distance = attr.ib()
554 554 deltalen = attr.ib()
555 555 data = attr.ib()
556 556 base = attr.ib()
557 557 chainbase = attr.ib()
558 558 chainlen = attr.ib()
559 559 compresseddeltalen = attr.ib()
560 560 snapshotdepth = attr.ib()
561 561
562 562
563 563 def drop_u_compression(delta):
564 564 """turn into a "u" (no-compression) into no-compression without header
565 565
566 566 This is useful for revlog format that has better compression method.
567 567 """
568 568 assert delta.data[0] == b'u', delta.data[0]
569 569 return _deltainfo(
570 570 delta.distance,
571 571 delta.deltalen - 1,
572 572 (b'', delta.data[1]),
573 573 delta.base,
574 574 delta.chainbase,
575 575 delta.chainlen,
576 576 delta.compresseddeltalen,
577 577 delta.snapshotdepth,
578 578 )
579 579
580 580
581 581 def is_good_delta_info(revlog, deltainfo, revinfo):
582 582 """Returns True if the given delta is good. Good means that it is within
583 583 the disk span, disk size, and chain length bounds that we know to be
584 584 performant."""
585 585 if deltainfo is None:
586 586 return False
587 587
588 if (
589 revinfo.cachedelta is not None
590 and deltainfo.base == revinfo.cachedelta[0]
591 and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
592 ):
593 return True
588 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so
589 # we should never end up asking such question. Adding the assert as a
590 # safe-guard to detect anything that would be fishy in this regard.
591 assert (
592 revinfo.cachedelta is None
593 or revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE
594 or not revlog._generaldelta
595 )
594 596
595 597 # - 'deltainfo.distance' is the distance from the base revision --
596 598 # bounding it limits the amount of I/O we need to do.
597 599 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
598 600 # deltas we need to apply -- bounding it limits the amount of CPU
599 601 # we consume.
600 602
601 603 textlen = revinfo.textlen
602 604 defaultmax = textlen * 4
603 605 maxdist = revlog._maxdeltachainspan
604 606 if not maxdist:
605 607 maxdist = deltainfo.distance # ensure the conditional pass
606 608 maxdist = max(maxdist, defaultmax)
607 609
608 610 # Bad delta from read span:
609 611 #
610 612 # If the span of data read is larger than the maximum allowed.
611 613 #
612 614 # In the sparse-revlog case, we rely on the associated "sparse reading"
613 615 # to avoid issue related to the span of data. In theory, it would be
614 616 # possible to build pathological revlog where delta pattern would lead
615 617 # to too many reads. However, they do not happen in practice at all. So
616 618 # we skip the span check entirely.
617 619 if not revlog._sparserevlog and maxdist < deltainfo.distance:
618 620 return False
619 621
620 622 # Bad delta from new delta size:
621 623 #
622 624 # If the delta size is larger than the target text, storing the
623 625 # delta will be inefficient.
624 626 if textlen < deltainfo.deltalen:
625 627 return False
626 628
627 629 # Bad delta from cumulated payload size:
628 630 #
629 631 # If the sum of delta get larger than K * target text length.
630 632 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
631 633 return False
632 634
633 635 # Bad delta from chain length:
634 636 #
635 637 # If the number of delta in the chain gets too high.
636 638 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
637 639 return False
638 640
639 641 # bad delta from intermediate snapshot size limit
640 642 #
641 643 # If an intermediate snapshot size is higher than the limit. The
642 644 # limit exist to prevent endless chain of intermediate delta to be
643 645 # created.
644 646 if (
645 647 deltainfo.snapshotdepth is not None
646 648 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
647 649 ):
648 650 return False
649 651
650 652 # bad delta if new intermediate snapshot is larger than the previous
651 653 # snapshot
652 654 if (
653 655 deltainfo.snapshotdepth
654 656 and revlog.length(deltainfo.base) < deltainfo.deltalen
655 657 ):
656 658 return False
657 659
658 660 return True
659 661
660 662
661 663 # If a revision's full text is that much bigger than a base candidate full
662 664 # text's, it is very unlikely that it will produce a valid delta. We no longer
663 665 # consider these candidates.
664 666 LIMIT_BASE2TEXT = 500
665 667
666 668
667 669 def _candidategroups(
668 670 revlog,
669 671 textlen,
670 672 p1,
671 673 p2,
672 674 cachedelta,
673 675 excluded_bases=None,
674 676 target_rev=None,
675 677 snapshot_cache=None,
676 678 ):
677 679 """Provides group of revision to be tested as delta base
678 680
679 681 This top level function focus on emitting groups with unique and worthwhile
680 682 content. See _raw_candidate_groups for details about the group order.
681 683 """
682 684 # should we try to build a delta?
683 685 if not (len(revlog) and revlog._storedeltachains):
684 686 yield None
685 687 return
686 688
687 689 if target_rev is None:
688 690 target_rev = len(revlog)
689 691
690 692 if not revlog._generaldelta:
691 693 # before general delta, there is only one possible delta base
692 694 yield (target_rev - 1,)
693 695 yield None
694 696 return
695 697
696 if (
697 cachedelta is not None
698 and nullrev == cachedelta[0]
699 and cachedelta[2] == DELTA_BASE_REUSE_FORCE
700 ):
701 # instruction are to forcibly do a full snapshot
702 yield None
703 return
698 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so
699 # we should never end up asking such question. Adding the assert as a
700 # safe-guard to detect anything that would be fishy in this regard.
701 assert (
702 cachedelta is None
703 or cachedelta[2] != DELTA_BASE_REUSE_FORCE
704 or not revlog._generaldelta
705 )
704 706
705 707 deltalength = revlog.length
706 708 deltaparent = revlog.deltaparent
707 709 sparse = revlog._sparserevlog
708 710 good = None
709 711
710 712 deltas_limit = textlen * LIMIT_DELTA2TEXT
711 713 group_chunk_size = revlog._candidate_group_chunk_size
712 714
713 715 tested = {nullrev}
714 716 candidates = _refinedgroups(
715 717 revlog,
716 718 p1,
717 719 p2,
718 720 cachedelta,
719 721 snapshot_cache=snapshot_cache,
720 722 )
721 723 while True:
722 724 temptative = candidates.send(good)
723 725 if temptative is None:
724 726 break
725 727 group = []
726 728 for rev in temptative:
727 729 # skip over empty delta (no need to include them in a chain)
728 730 while not (rev == nullrev or rev in tested or deltalength(rev)):
729 731 tested.add(rev)
730 732 rev = deltaparent(rev)
731 733 # no need to try a delta against nullrev, this will be done as a
732 734 # last resort.
733 735 if rev == nullrev:
734 736 continue
735 737 # filter out revision we tested already
736 738 if rev in tested:
737 739 continue
738 740
739 if (
740 cachedelta is not None
741 and rev == cachedelta[0]
742 and cachedelta[2] == DELTA_BASE_REUSE_FORCE
743 ):
744 # instructions are to forcibly consider/use this delta base
745 group.append(rev)
746 continue
747
748 741 # an higher authority deamed the base unworthy (e.g. censored)
749 742 if excluded_bases is not None and rev in excluded_bases:
750 743 tested.add(rev)
751 744 continue
752 745 # We are in some recomputation cases and that rev is too high in
753 746 # the revlog
754 747 if target_rev is not None and rev >= target_rev:
755 748 tested.add(rev)
756 749 continue
757 750 # filter out delta base that will never produce good delta
758 751 if deltas_limit < revlog.length(rev):
759 752 tested.add(rev)
760 753 continue
761 754 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
762 755 tested.add(rev)
763 756 continue
764 757 # no delta for rawtext-changing revs (see "candelta" for why)
765 758 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
766 759 tested.add(rev)
767 760 continue
768 761
769 762 # If we reach here, we are about to build and test a delta.
770 763 # The delta building process will compute the chaininfo in all
771 764 # case, since that computation is cached, it is fine to access it
772 765 # here too.
773 766 chainlen, chainsize = revlog._chaininfo(rev)
774 767 # if chain will be too long, skip base
775 768 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
776 769 tested.add(rev)
777 770 continue
778 771 # if chain already have too much data, skip base
779 772 if deltas_limit < chainsize:
780 773 tested.add(rev)
781 774 continue
782 775 if sparse and revlog.upperboundcomp is not None:
783 776 maxcomp = revlog.upperboundcomp
784 777 basenotsnap = (p1, p2, nullrev)
785 778 if rev not in basenotsnap and revlog.issnapshot(rev):
786 779 snapshotdepth = revlog.snapshotdepth(rev)
787 780 # If text is significantly larger than the base, we can
788 781 # expect the resulting delta to be proportional to the size
789 782 # difference
790 783 revsize = revlog.rawsize(rev)
791 784 rawsizedistance = max(textlen - revsize, 0)
792 785 # use an estimate of the compression upper bound.
793 786 lowestrealisticdeltalen = rawsizedistance // maxcomp
794 787
795 788 # check the absolute constraint on the delta size
796 789 snapshotlimit = textlen >> snapshotdepth
797 790 if snapshotlimit < lowestrealisticdeltalen:
798 791 # delta lower bound is larger than accepted upper bound
799 792 tested.add(rev)
800 793 continue
801 794
802 795 # check the relative constraint on the delta size
803 796 revlength = revlog.length(rev)
804 797 if revlength < lowestrealisticdeltalen:
805 798 # delta probable lower bound is larger than target base
806 799 tested.add(rev)
807 800 continue
808 801
809 802 group.append(rev)
810 803 if group:
811 804 # When the size of the candidate group is big, it can result in a
812 805 # quite significant performance impact. To reduce this, we can send
813 806 # them in smaller batches until the new batch does not provide any
814 807 # improvements.
815 808 #
816 809 # This might reduce the overall efficiency of the compression in
817 810 # some corner cases, but that should also prevent very pathological
818 811 # cases from being an issue. (eg. 20 000 candidates).
819 812 #
820 813 # XXX note that the ordering of the group becomes important as it
821 814 # now impacts the final result. The current order is unprocessed
822 815 # and can be improved.
823 816 if group_chunk_size == 0:
824 817 tested.update(group)
825 818 good = yield tuple(group)
826 819 else:
827 820 prev_good = good
828 821 for start in range(0, len(group), group_chunk_size):
829 822 sub_group = group[start : start + group_chunk_size]
830 823 tested.update(sub_group)
831 824 good = yield tuple(sub_group)
832 825 if prev_good == good:
833 826 break
834 827
835 828 yield None
836 829
837 830
838 831 def _refinedgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
839 832 good = None
840 833 # First we try to reuse a the delta contained in the bundle.
841 834 # (or from the source revlog)
842 835 #
843 836 # This logic only applies to general delta repositories and can be disabled
844 837 # through configuration. Disabling reuse source delta is useful when
845 838 # we want to make sure we recomputed "optimal" deltas.
846 839 debug_info = None
847 840 if cachedelta is not None and cachedelta[2] > DELTA_BASE_REUSE_NO:
848 841 # Assume what we received from the server is a good choice
849 842 # build delta will reuse the cache
850 843 if debug_info is not None:
851 844 debug_info['cached-delta.tested'] += 1
852 845 good = yield (cachedelta[0],)
853 846 if good is not None:
854 847 if debug_info is not None:
855 848 debug_info['cached-delta.accepted'] += 1
856 849 yield None
857 850 return
858 851 if snapshot_cache is None:
859 852 snapshot_cache = SnapshotCache()
860 853 groups = _rawgroups(
861 854 revlog,
862 855 p1,
863 856 p2,
864 857 cachedelta,
865 858 snapshot_cache,
866 859 )
867 860 for candidates in groups:
868 861 good = yield candidates
869 862 if good is not None:
870 863 break
871 864
872 865 # If sparse revlog is enabled, we can try to refine the available deltas
873 866 if not revlog._sparserevlog:
874 867 yield None
875 868 return
876 869
877 870 # if we have a refinable value, try to refine it
878 871 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
879 872 # refine snapshot down
880 873 previous = None
881 874 while previous != good:
882 875 previous = good
883 876 base = revlog.deltaparent(good)
884 877 if base == nullrev:
885 878 break
886 879 good = yield (base,)
887 880 # refine snapshot up
888 881 if not snapshot_cache.snapshots:
889 882 snapshot_cache.update(revlog, good + 1)
890 883 previous = None
891 884 while good != previous:
892 885 previous = good
893 886 children = tuple(sorted(c for c in snapshot_cache.snapshots[good]))
894 887 good = yield children
895 888
896 889 if debug_info is not None:
897 890 if good is None:
898 891 debug_info['no-solution'] += 1
899 892
900 893 yield None
901 894
902 895
903 896 def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
904 897 """Provides group of revision to be tested as delta base
905 898
906 899 This lower level function focus on emitting delta theorically interresting
907 900 without looking it any practical details.
908 901
909 902 The group order aims at providing fast or small candidates first.
910 903 """
911 904 # Why search for delta base if we cannot use a delta base ?
912 905 assert revlog._generaldelta
913 906 # also see issue6056
914 907 sparse = revlog._sparserevlog
915 908 curr = len(revlog)
916 909 prev = curr - 1
917 910 deltachain = lambda rev: revlog._deltachain(rev)[0]
918 911
919 912 # exclude already lazy tested base if any
920 913 parents = [p for p in (p1, p2) if p != nullrev]
921 914
922 915 if not revlog._deltabothparents and len(parents) == 2:
923 916 parents.sort()
924 917 # To minimize the chance of having to build a fulltext,
925 918 # pick first whichever parent is closest to us (max rev)
926 919 yield (parents[1],)
927 920 # then the other one (min rev) if the first did not fit
928 921 yield (parents[0],)
929 922 elif len(parents) > 0:
930 923 # Test all parents (1 or 2), and keep the best candidate
931 924 yield parents
932 925
933 926 if sparse and parents:
934 927 if snapshot_cache is None:
935 928 # map: base-rev: [snapshot-revs]
936 929 snapshot_cache = SnapshotCache()
937 930 # See if we can use an existing snapshot in the parent chains to use as
938 931 # a base for a new intermediate-snapshot
939 932 #
940 933 # search for snapshot in parents delta chain
941 934 # map: snapshot-level: snapshot-rev
942 935 parents_snaps = collections.defaultdict(set)
943 936 candidate_chains = [deltachain(p) for p in parents]
944 937 for chain in candidate_chains:
945 938 for idx, s in enumerate(chain):
946 939 if not revlog.issnapshot(s):
947 940 break
948 941 parents_snaps[idx].add(s)
949 942 snapfloor = min(parents_snaps[0]) + 1
950 943 snapshot_cache.update(revlog, snapfloor)
951 944 # search for the highest "unrelated" revision
952 945 #
953 946 # Adding snapshots used by "unrelated" revision increase the odd we
954 947 # reuse an independant, yet better snapshot chain.
955 948 #
956 949 # XXX instead of building a set of revisions, we could lazily enumerate
957 950 # over the chains. That would be more efficient, however we stick to
958 951 # simple code for now.
959 952 all_revs = set()
960 953 for chain in candidate_chains:
961 954 all_revs.update(chain)
962 955 other = None
963 956 for r in revlog.revs(prev, snapfloor):
964 957 if r not in all_revs:
965 958 other = r
966 959 break
967 960 if other is not None:
968 961 # To avoid unfair competition, we won't use unrelated intermediate
969 962 # snapshot that are deeper than the ones from the parent delta
970 963 # chain.
971 964 max_depth = max(parents_snaps.keys())
972 965 chain = deltachain(other)
973 966 for depth, s in enumerate(chain):
974 967 if s < snapfloor:
975 968 continue
976 969 if max_depth < depth:
977 970 break
978 971 if not revlog.issnapshot(s):
979 972 break
980 973 parents_snaps[depth].add(s)
981 974 # Test them as possible intermediate snapshot base
982 975 # We test them from highest to lowest level. High level one are more
983 976 # likely to result in small delta
984 977 floor = None
985 978 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
986 979 siblings = set()
987 980 for s in snaps:
988 981 siblings.update(snapshot_cache.snapshots[s])
989 982 # Before considering making a new intermediate snapshot, we check
990 983 # if an existing snapshot, children of base we consider, would be
991 984 # suitable.
992 985 #
993 986 # It give a change to reuse a delta chain "unrelated" to the
994 987 # current revision instead of starting our own. Without such
995 988 # re-use, topological branches would keep reopening new chains.
996 989 # Creating more and more snapshot as the repository grow.
997 990
998 991 if floor is not None:
999 992 # We only do this for siblings created after the one in our
1000 993 # parent's delta chain. Those created before has less chances
1001 994 # to be valid base since our ancestors had to create a new
1002 995 # snapshot.
1003 996 siblings = [r for r in siblings if floor < r]
1004 997 yield tuple(sorted(siblings))
1005 998 # then test the base from our parent's delta chain.
1006 999 yield tuple(sorted(snaps))
1007 1000 floor = min(snaps)
1008 1001 # No suitable base found in the parent chain, search if any full
1009 1002 # snapshots emitted since parent's base would be a suitable base for an
1010 1003 # intermediate snapshot.
1011 1004 #
1012 1005 # It give a chance to reuse a delta chain unrelated to the current
1013 1006 # revisions instead of starting our own. Without such re-use,
1014 1007 # topological branches would keep reopening new full chains. Creating
1015 1008 # more and more snapshot as the repository grow.
1016 1009 full = [r for r in snapshot_cache.snapshots[nullrev] if snapfloor <= r]
1017 1010 yield tuple(sorted(full))
1018 1011
1019 1012 if not sparse:
1020 1013 # other approach failed try against prev to hopefully save us a
1021 1014 # fulltext.
1022 1015 yield (prev,)
1023 1016
1024 1017
1025 1018 class SnapshotCache:
1026 1019 __slots__ = ('snapshots', '_start_rev', '_end_rev')
1027 1020
1028 1021 def __init__(self):
1029 1022 self.snapshots = collections.defaultdict(set)
1030 1023 self._start_rev = None
1031 1024 self._end_rev = None
1032 1025
1033 1026 def update(self, revlog, start_rev=0):
1034 1027 """find snapshots from start_rev to tip"""
1035 1028 nb_revs = len(revlog)
1036 1029 end_rev = nb_revs - 1
1037 1030 if start_rev > end_rev:
1038 1031 return # range is empty
1039 1032
1040 1033 if self._start_rev is None:
1041 1034 assert self._end_rev is None
1042 1035 self._update(revlog, start_rev, end_rev)
1043 1036 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1044 1037 if start_rev < self._start_rev:
1045 1038 self._update(revlog, start_rev, self._start_rev - 1)
1046 1039 if self._end_rev < end_rev:
1047 1040 self._update(revlog, self._end_rev + 1, end_rev)
1048 1041
1049 1042 if self._start_rev is None:
1050 1043 assert self._end_rev is None
1051 1044 self._end_rev = end_rev
1052 1045 self._start_rev = start_rev
1053 1046 else:
1054 1047 self._start_rev = min(self._start_rev, start_rev)
1055 1048 self._end_rev = max(self._end_rev, end_rev)
1056 1049 assert self._start_rev <= self._end_rev, (
1057 1050 self._start_rev,
1058 1051 self._end_rev,
1059 1052 )
1060 1053
1061 1054 def _update(self, revlog, start_rev, end_rev):
1062 1055 """internal method that actually do update content"""
1063 1056 assert self._start_rev is None or (
1064 1057 start_rev < self._start_rev or start_rev > self._end_rev
1065 1058 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1066 1059 assert self._start_rev is None or (
1067 1060 end_rev < self._start_rev or end_rev > self._end_rev
1068 1061 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1069 1062 cache = self.snapshots
1070 1063 if util.safehasattr(revlog.index, 'findsnapshots'):
1071 1064 revlog.index.findsnapshots(cache, start_rev, end_rev)
1072 1065 else:
1073 1066 deltaparent = revlog.deltaparent
1074 1067 issnapshot = revlog.issnapshot
1075 1068 for rev in revlog.revs(start_rev, end_rev):
1076 1069 if issnapshot(rev):
1077 1070 cache[deltaparent(rev)].add(rev)
1078 1071
1079 1072
1080 1073 class deltacomputer:
1081 1074 def __init__(
1082 1075 self,
1083 1076 revlog,
1084 1077 write_debug=None,
1085 1078 debug_search=False,
1086 1079 debug_info=None,
1087 1080 ):
1088 1081 self.revlog = revlog
1089 1082 self._write_debug = write_debug
1090 1083 if write_debug is None:
1091 1084 self._debug_search = False
1092 1085 else:
1093 1086 self._debug_search = debug_search
1094 1087 self._debug_info = debug_info
1095 1088 self._snapshot_cache = SnapshotCache()
1096 1089
1097 1090 @property
1098 1091 def _gather_debug(self):
1099 1092 return self._write_debug is not None or self._debug_info is not None
1100 1093
1101 1094 def buildtext(self, revinfo, fh):
1102 1095 """Builds a fulltext version of a revision
1103 1096
1104 1097 revinfo: revisioninfo instance that contains all needed info
1105 1098 fh: file handle to either the .i or the .d revlog file,
1106 1099 depending on whether it is inlined or not
1107 1100 """
1108 1101 btext = revinfo.btext
1109 1102 if btext[0] is not None:
1110 1103 return btext[0]
1111 1104
1112 1105 revlog = self.revlog
1113 1106 cachedelta = revinfo.cachedelta
1114 1107 baserev = cachedelta[0]
1115 1108 delta = cachedelta[1]
1116 1109
1117 1110 fulltext = btext[0] = _textfromdelta(
1118 1111 fh,
1119 1112 revlog,
1120 1113 baserev,
1121 1114 delta,
1122 1115 revinfo.p1,
1123 1116 revinfo.p2,
1124 1117 revinfo.flags,
1125 1118 revinfo.node,
1126 1119 )
1127 1120 return fulltext
1128 1121
1129 1122 def _builddeltadiff(self, base, revinfo, fh):
1130 1123 revlog = self.revlog
1131 1124 t = self.buildtext(revinfo, fh)
1132 1125 if revlog.iscensored(base):
1133 1126 # deltas based on a censored revision must replace the
1134 1127 # full content in one patch, so delta works everywhere
1135 1128 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1136 1129 delta = header + t
1137 1130 else:
1138 1131 ptext = revlog.rawdata(base, _df=fh)
1139 1132 delta = mdiff.textdiff(ptext, t)
1140 1133
1141 1134 return delta
1142 1135
1143 1136 def _builddeltainfo(self, revinfo, base, fh, target_rev=None):
1144 1137 # can we use the cached delta?
1145 1138 revlog = self.revlog
1146 1139 chainbase = revlog.chainbase(base)
1147 1140 if revlog._generaldelta:
1148 1141 deltabase = base
1149 1142 else:
1150 1143 if target_rev is not None and base != target_rev - 1:
1151 1144 msg = (
1152 1145 b'general delta cannot use delta for something else '
1153 1146 b'than `prev`: %d<-%d'
1154 1147 )
1155 1148 msg %= (base, target_rev)
1156 1149 raise error.ProgrammingError(msg)
1157 1150 deltabase = chainbase
1158 1151 snapshotdepth = None
1159 1152 if revlog._sparserevlog and deltabase == nullrev:
1160 1153 snapshotdepth = 0
1161 1154 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
1162 1155 # A delta chain should always be one full snapshot,
1163 1156 # zero or more semi-snapshots, and zero or more deltas
1164 1157 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1165 1158 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1166 1159 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1167 1160 delta = None
1168 1161 if revinfo.cachedelta:
1169 1162 cachebase = revinfo.cachedelta[0]
1170 1163 # check if the diff still apply
1171 1164 currentbase = cachebase
1172 1165 while (
1173 1166 currentbase != nullrev
1174 1167 and currentbase != base
1175 1168 and self.revlog.length(currentbase) == 0
1176 1169 ):
1177 1170 currentbase = self.revlog.deltaparent(currentbase)
1178 1171 if self.revlog._lazydelta and currentbase == base:
1179 1172 delta = revinfo.cachedelta[1]
1180 1173 if delta is None:
1181 1174 delta = self._builddeltadiff(base, revinfo, fh)
1182 1175 if self._debug_search:
1183 1176 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1184 1177 msg %= len(delta)
1185 1178 self._write_debug(msg)
1186 1179 # snapshotdept need to be neither None nor 0 level snapshot
1187 1180 if revlog.upperboundcomp is not None and snapshotdepth:
1188 1181 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
1189 1182 snapshotlimit = revinfo.textlen >> snapshotdepth
1190 1183 if self._debug_search:
1191 1184 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1192 1185 msg %= lowestrealisticdeltalen
1193 1186 self._write_debug(msg)
1194 1187 if snapshotlimit < lowestrealisticdeltalen:
1195 1188 if self._debug_search:
1196 1189 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1197 1190 self._write_debug(msg)
1198 1191 return None
1199 1192 if revlog.length(base) < lowestrealisticdeltalen:
1200 1193 if self._debug_search:
1201 1194 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1202 1195 self._write_debug(msg)
1203 1196 return None
1204 1197 header, data = revlog.compress(delta)
1205 1198 deltalen = len(header) + len(data)
1206 1199 offset = revlog.end(len(revlog) - 1)
1207 1200 dist = deltalen + offset - revlog.start(chainbase)
1208 1201 chainlen, compresseddeltalen = revlog._chaininfo(base)
1209 1202 chainlen += 1
1210 1203 compresseddeltalen += deltalen
1211 1204
1212 1205 return _deltainfo(
1213 1206 dist,
1214 1207 deltalen,
1215 1208 (header, data),
1216 1209 deltabase,
1217 1210 chainbase,
1218 1211 chainlen,
1219 1212 compresseddeltalen,
1220 1213 snapshotdepth,
1221 1214 )
1222 1215
1223 1216 def _fullsnapshotinfo(self, fh, revinfo, curr):
1224 1217 rawtext = self.buildtext(revinfo, fh)
1225 1218 data = self.revlog.compress(rawtext)
1226 1219 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1227 1220 deltabase = chainbase = curr
1228 1221 snapshotdepth = 0
1229 1222 chainlen = 1
1230 1223
1231 1224 return _deltainfo(
1232 1225 dist,
1233 1226 deltalen,
1234 1227 data,
1235 1228 deltabase,
1236 1229 chainbase,
1237 1230 chainlen,
1238 1231 compresseddeltalen,
1239 1232 snapshotdepth,
1240 1233 )
1241 1234
1242 1235 def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):
1243 1236 """Find an acceptable delta against a candidate revision
1244 1237
1245 1238 revinfo: information about the revision (instance of _revisioninfo)
1246 1239 fh: file handle to either the .i or the .d revlog file,
1247 1240 depending on whether it is inlined or not
1248 1241
1249 1242 Returns the first acceptable candidate revision, as ordered by
1250 1243 _candidategroups
1251 1244
1252 1245 If no suitable deltabase is found, we return delta info for a full
1253 1246 snapshot.
1254 1247
1255 1248 `excluded_bases` is an optional set of revision that cannot be used as
1256 1249 a delta base. Use this to recompute delta suitable in censor or strip
1257 1250 context.
1258 1251 """
1259 1252 if target_rev is None:
1260 1253 target_rev = len(self.revlog)
1261 1254
1262 1255 gather_debug = self._gather_debug
1263 1256 cachedelta = revinfo.cachedelta
1264 1257 revlog = self.revlog
1265 1258 p1r = p2r = None
1266 1259
1267 1260 if excluded_bases is None:
1268 1261 excluded_bases = set()
1269 1262
1270 1263 if gather_debug:
1271 1264 start = util.timer()
1272 1265 dbg = self._one_dbg_data()
1273 1266 dbg['revision'] = target_rev
1274 1267 target_revlog = b"UNKNOWN"
1275 1268 target_type = self.revlog.target[0]
1276 1269 target_key = self.revlog.target[1]
1277 1270 if target_type == KIND_CHANGELOG:
1278 1271 target_revlog = b'CHANGELOG:'
1279 1272 elif target_type == KIND_MANIFESTLOG:
1280 1273 target_revlog = b'MANIFESTLOG:'
1281 1274 if target_key:
1282 1275 target_revlog += b'%s:' % target_key
1283 1276 elif target_type == KIND_FILELOG:
1284 1277 target_revlog = b'FILELOG:'
1285 1278 if target_key:
1286 1279 target_revlog += b'%s:' % target_key
1287 1280 dbg['target-revlog'] = target_revlog
1288 1281 p1r = revlog.rev(revinfo.p1)
1289 1282 p2r = revlog.rev(revinfo.p2)
1290 1283 if p1r != nullrev:
1291 1284 p1_chain_len = revlog._chaininfo(p1r)[0]
1292 1285 else:
1293 1286 p1_chain_len = -1
1294 1287 if p2r != nullrev:
1295 1288 p2_chain_len = revlog._chaininfo(p2r)[0]
1296 1289 else:
1297 1290 p2_chain_len = -1
1298 1291 dbg['p1-chain-len'] = p1_chain_len
1299 1292 dbg['p2-chain-len'] = p2_chain_len
1300 1293
1301 1294 # 1) if the revision is empty, no amount of delta can beat it
1302 1295 #
1303 1296 # 2) no delta for flag processor revision (see "candelta" for why)
1304 1297 # not calling candelta since only one revision needs test, also to
1305 1298 # avoid overhead fetching flags again.
1306 1299 if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1307 1300 deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
1308 1301 if gather_debug:
1309 1302 end = util.timer()
1310 1303 dbg['duration'] = end - start
1311 1304 dbg[
1312 1305 'delta-base'
1313 1306 ] = deltainfo.base # pytype: disable=attribute-error
1314 1307 dbg['search_round_count'] = 0
1315 1308 dbg['using-cached-base'] = False
1316 1309 dbg['delta_try_count'] = 0
1317 1310 dbg['type'] = b"full"
1318 1311 dbg['snapshot-depth'] = 0
1319 1312 self._dbg_process_data(dbg)
1320 1313 return deltainfo
1321 1314
1322 1315 deltainfo = None
1323 1316
1324 1317 # If this source delta are to be forcibly reuse, let us comply early.
1325 1318 if (
1326 1319 revlog._generaldelta
1327 1320 and revinfo.cachedelta is not None
1328 1321 and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
1329 1322 ):
1330 1323 base = revinfo.cachedelta[0]
1331 1324 if base == nullrev:
1332 1325 dbg_type = b"full"
1333 1326 deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
1334 1327 if gather_debug:
1335 1328 snapshotdepth = 0
1336 1329 elif base not in excluded_bases:
1337 1330 delta = revinfo.cachedelta[1]
1338 1331 header, data = revlog.compress(delta)
1339 1332 deltalen = len(header) + len(data)
1340 1333 if gather_debug:
1341 1334 offset = revlog.end(len(revlog) - 1)
1342 1335 chainbase = revlog.chainbase(base)
1343 1336 distance = deltalen + offset - revlog.start(chainbase)
1344 1337 chainlen, compresseddeltalen = revlog._chaininfo(base)
1345 1338 chainlen += 1
1346 1339 compresseddeltalen += deltalen
1347 1340 if base == p1r or base == p2r:
1348 1341 dbg_type = b"delta"
1349 1342 snapshotdepth = None
1350 1343 elif not revlog.issnapshot(base):
1351 1344 snapshotdepth = None
1352 1345 else:
1353 1346 dbg_type = b"snapshot"
1354 1347 snapshotdepth = revlog.snapshotdepth(base) + 1
1355 1348 else:
1356 1349 distance = None
1357 1350 chainbase = None
1358 1351 chainlen = None
1359 1352 compresseddeltalen = None
1360 1353 snapshotdepth = None
1361 1354 deltainfo = _deltainfo(
1362 1355 distance=distance,
1363 1356 deltalen=deltalen,
1364 1357 data=(header, data),
1365 1358 base=base,
1366 1359 chainbase=chainbase,
1367 1360 chainlen=chainlen,
1368 1361 compresseddeltalen=compresseddeltalen,
1369 1362 snapshotdepth=snapshotdepth,
1370 1363 )
1371 1364
1372 1365 if deltainfo is not None:
1373 1366 if gather_debug:
1374 1367 end = util.timer()
1375 1368 dbg['duration'] = end - start
1376 1369 dbg[
1377 1370 'delta-base'
1378 1371 ] = deltainfo.base # pytype: disable=attribute-error
1379 1372 dbg['search_round_count'] = 0
1380 1373 dbg['using-cached-base'] = True
1381 1374 dbg['delta_try_count'] = 0
1382 1375 dbg['type'] = b"full"
1383 1376 if snapshotdepth is None:
1384 1377 dbg['snapshot-depth'] = 0
1385 1378 else:
1386 1379 dbg['snapshot-depth'] = snapshotdepth
1387 1380 self._dbg_process_data(dbg)
1388 1381 return deltainfo
1389 1382
1390 1383 # count the number of different delta we tried (for debug purpose)
1391 1384 dbg_try_count = 0
1392 1385 # count the number of "search round" we did. (for debug purpose)
1393 1386 dbg_try_rounds = 0
1394 1387 dbg_type = b'unknown'
1395 1388
1396 1389 if p1r is None:
1397 1390 p1r = revlog.rev(revinfo.p1)
1398 1391 p2r = revlog.rev(revinfo.p2)
1399 1392
1400 1393 if self._debug_search:
1401 1394 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1402 1395 msg %= target_rev
1403 1396 self._write_debug(msg)
1404 1397
1405 1398 groups = _candidategroups(
1406 1399 self.revlog,
1407 1400 revinfo.textlen,
1408 1401 p1r,
1409 1402 p2r,
1410 1403 cachedelta,
1411 1404 excluded_bases,
1412 1405 target_rev,
1413 1406 snapshot_cache=self._snapshot_cache,
1414 1407 )
1415 1408 candidaterevs = next(groups)
1416 1409 while candidaterevs is not None:
1417 1410 dbg_try_rounds += 1
1418 1411 if self._debug_search:
1419 1412 prev = None
1420 1413 if deltainfo is not None:
1421 1414 prev = deltainfo.base
1422 1415
1423 1416 if (
1424 1417 cachedelta is not None
1425 1418 and len(candidaterevs) == 1
1426 1419 and cachedelta[0] in candidaterevs
1427 1420 ):
1428 1421 round_type = b"cached-delta"
1429 1422 elif p1r in candidaterevs or p2r in candidaterevs:
1430 1423 round_type = b"parents"
1431 1424 elif prev is not None and all(c < prev for c in candidaterevs):
1432 1425 round_type = b"refine-down"
1433 1426 elif prev is not None and all(c > prev for c in candidaterevs):
1434 1427 round_type = b"refine-up"
1435 1428 else:
1436 1429 round_type = b"search-down"
1437 1430 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1438 1431 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1439 1432 self._write_debug(msg)
1440 1433 nominateddeltas = []
1441 1434 if deltainfo is not None:
1442 1435 if self._debug_search:
1443 1436 msg = (
1444 1437 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1445 1438 )
1446 1439 msg %= (deltainfo.base, deltainfo.deltalen)
1447 1440 self._write_debug(msg)
1448 1441 # if we already found a good delta,
1449 1442 # challenge it against refined candidates
1450 1443 nominateddeltas.append(deltainfo)
1451 1444 for candidaterev in candidaterevs:
1452 1445 if self._debug_search:
1453 1446 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1454 1447 msg %= candidaterev
1455 1448 self._write_debug(msg)
1456 1449 candidate_type = None
1457 1450 if candidaterev == p1r:
1458 1451 candidate_type = b"p1"
1459 1452 elif candidaterev == p2r:
1460 1453 candidate_type = b"p2"
1461 1454 elif self.revlog.issnapshot(candidaterev):
1462 1455 candidate_type = b"snapshot-%d"
1463 1456 candidate_type %= self.revlog.snapshotdepth(
1464 1457 candidaterev
1465 1458 )
1466 1459
1467 1460 if candidate_type is not None:
1468 1461 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1469 1462 msg %= candidate_type
1470 1463 self._write_debug(msg)
1471 1464 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1472 1465 msg %= self.revlog.length(candidaterev)
1473 1466 self._write_debug(msg)
1474 1467 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1475 1468 msg %= self.revlog.deltaparent(candidaterev)
1476 1469 self._write_debug(msg)
1477 1470
1478 1471 dbg_try_count += 1
1479 1472
1480 1473 if self._debug_search:
1481 1474 delta_start = util.timer()
1482 1475 candidatedelta = self._builddeltainfo(
1483 1476 revinfo,
1484 1477 candidaterev,
1485 1478 fh,
1486 1479 target_rev=target_rev,
1487 1480 )
1488 1481 if self._debug_search:
1489 1482 delta_end = util.timer()
1490 1483 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1491 1484 msg %= delta_end - delta_start
1492 1485 self._write_debug(msg)
1493 1486 if candidatedelta is not None:
1494 1487 if is_good_delta_info(self.revlog, candidatedelta, revinfo):
1495 1488 if self._debug_search:
1496 1489 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1497 1490 msg %= candidatedelta.deltalen
1498 1491 self._write_debug(msg)
1499 1492 nominateddeltas.append(candidatedelta)
1500 1493 elif self._debug_search:
1501 1494 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1502 1495 msg %= candidatedelta.deltalen
1503 1496 self._write_debug(msg)
1504 1497 elif self._debug_search:
1505 1498 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1506 1499 self._write_debug(msg)
1507 1500 if nominateddeltas:
1508 1501 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1509 1502 if deltainfo is not None:
1510 1503 candidaterevs = groups.send(deltainfo.base)
1511 1504 else:
1512 1505 candidaterevs = next(groups)
1513 1506
1514 1507 if deltainfo is None:
1515 1508 dbg_type = b"full"
1516 1509 deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
1517 1510 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1518 1511 dbg_type = b"snapshot"
1519 1512 else:
1520 1513 dbg_type = b"delta"
1521 1514
1522 1515 if gather_debug:
1523 1516 end = util.timer()
1524 1517 if dbg_type == b'full':
1525 1518 used_cached = (
1526 1519 cachedelta is not None
1527 1520 and dbg_try_rounds == 0
1528 1521 and dbg_try_count == 0
1529 1522 and cachedelta[0] == nullrev
1530 1523 )
1531 1524 else:
1532 1525 used_cached = (
1533 1526 cachedelta is not None
1534 1527 and dbg_try_rounds == 1
1535 1528 and dbg_try_count == 1
1536 1529 and deltainfo.base == cachedelta[0]
1537 1530 )
1538 1531 dbg['duration'] = end - start
1539 1532 dbg[
1540 1533 'delta-base'
1541 1534 ] = deltainfo.base # pytype: disable=attribute-error
1542 1535 dbg['search_round_count'] = dbg_try_rounds
1543 1536 dbg['using-cached-base'] = used_cached
1544 1537 dbg['delta_try_count'] = dbg_try_count
1545 1538 dbg['type'] = dbg_type
1546 1539 if (
1547 1540 deltainfo.snapshotdepth # pytype: disable=attribute-error
1548 1541 is not None
1549 1542 ):
1550 1543 dbg[
1551 1544 'snapshot-depth'
1552 1545 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1553 1546 else:
1554 1547 dbg['snapshot-depth'] = 0
1555 1548 self._dbg_process_data(dbg)
1556 1549 return deltainfo
1557 1550
1558 1551 def _one_dbg_data(self):
1559 1552 return {
1560 1553 'duration': None,
1561 1554 'revision': None,
1562 1555 'delta-base': None,
1563 1556 'search_round_count': None,
1564 1557 'using-cached-base': None,
1565 1558 'delta_try_count': None,
1566 1559 'type': None,
1567 1560 'p1-chain-len': None,
1568 1561 'p2-chain-len': None,
1569 1562 'snapshot-depth': None,
1570 1563 'target-revlog': None,
1571 1564 }
1572 1565
1573 1566 def _dbg_process_data(self, dbg):
1574 1567 if self._debug_info is not None:
1575 1568 self._debug_info.append(dbg)
1576 1569
1577 1570 if self._write_debug is not None:
1578 1571 msg = (
1579 1572 b"DBG-DELTAS:"
1580 1573 b" %-12s"
1581 1574 b" rev=%d:"
1582 1575 b" delta-base=%d"
1583 1576 b" is-cached=%d"
1584 1577 b" - search-rounds=%d"
1585 1578 b" try-count=%d"
1586 1579 b" - delta-type=%-6s"
1587 1580 b" snap-depth=%d"
1588 1581 b" - p1-chain-length=%d"
1589 1582 b" p2-chain-length=%d"
1590 1583 b" - duration=%f"
1591 1584 b"\n"
1592 1585 )
1593 1586 msg %= (
1594 1587 dbg["target-revlog"],
1595 1588 dbg["revision"],
1596 1589 dbg["delta-base"],
1597 1590 dbg["using-cached-base"],
1598 1591 dbg["search_round_count"],
1599 1592 dbg["delta_try_count"],
1600 1593 dbg["type"],
1601 1594 dbg["snapshot-depth"],
1602 1595 dbg["p1-chain-len"],
1603 1596 dbg["p2-chain-len"],
1604 1597 dbg["duration"],
1605 1598 )
1606 1599 self._write_debug(msg)
1607 1600
1608 1601
1609 1602 def delta_compression(default_compression_header, deltainfo):
1610 1603 """return (COMPRESSION_MODE, deltainfo)
1611 1604
1612 1605 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1613 1606 compression.
1614 1607 """
1615 1608 h, d = deltainfo.data
1616 1609 compression_mode = COMP_MODE_INLINE
1617 1610 if not h and not d:
1618 1611 # not data to store at all... declare them uncompressed
1619 1612 compression_mode = COMP_MODE_PLAIN
1620 1613 elif not h:
1621 1614 t = d[0:1]
1622 1615 if t == b'\0':
1623 1616 compression_mode = COMP_MODE_PLAIN
1624 1617 elif t == default_compression_header:
1625 1618 compression_mode = COMP_MODE_DEFAULT
1626 1619 elif h == b'u':
1627 1620 # we have a more efficient way to declare uncompressed
1628 1621 h = b''
1629 1622 compression_mode = COMP_MODE_PLAIN
1630 1623 deltainfo = drop_u_compression(deltainfo)
1631 1624 return compression_mode, deltainfo
General Comments 0
You need to be logged in to leave comments. Login now