##// END OF EJS Templates
delta-find: move sparse-revlog pre-filtering in the associated class...
marmoute -
r52252:a224ce56 default
parent child Browse files
Show More
@@ -1,1881 +1,1887 b''
1 1 # revlogdeltas.py - Logic around delta computation for revlog
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 # Copyright 2018 Octobus <contact@octobus.net>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8 """Helper class to compute deltas stored inside revlogs"""
9 9
10 10
11 11 import abc
12 12 import collections
13 13 import struct
14 14
15 15 # import stuff from node for others to import from revlog
16 16 from ..node import nullrev
17 17 from ..i18n import _
18 18
19 19 from .constants import (
20 20 COMP_MODE_DEFAULT,
21 21 COMP_MODE_INLINE,
22 22 COMP_MODE_PLAIN,
23 23 DELTA_BASE_REUSE_FORCE,
24 24 DELTA_BASE_REUSE_NO,
25 25 KIND_CHANGELOG,
26 26 KIND_FILELOG,
27 27 KIND_MANIFESTLOG,
28 28 REVIDX_ISCENSORED,
29 29 REVIDX_RAWTEXT_CHANGING_FLAGS,
30 30 )
31 31
32 32 from ..thirdparty import attr
33 33
34 34 from .. import (
35 35 error,
36 36 mdiff,
37 37 util,
38 38 )
39 39
40 40 from . import flagutil
41 41
42 42 # maximum <delta-chain-data>/<revision-text-length> ratio
43 43 LIMIT_DELTA2TEXT = 2
44 44
45 45
46 46 class _testrevlog:
47 47 """minimalist fake revlog to use in doctests"""
48 48
49 49 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
50 50 """data is an list of revision payload boundaries"""
51 51 from .. import revlog
52 52
53 53 self._data = data
54 54 self.data_config = revlog.DataConfig()
55 55 self.data_config.sr_density_threshold = density
56 56 self.data_config.sr_min_gap_size = mingap
57 57 self.delta_config = revlog.DeltaConfig()
58 58 self.feature_config = revlog.FeatureConfig()
59 59 self._snapshot = set(snapshot)
60 60 self.index = None
61 61
62 62 def start(self, rev):
63 63 if rev == nullrev:
64 64 return 0
65 65 if rev == 0:
66 66 return 0
67 67 return self._data[rev - 1]
68 68
69 69 def end(self, rev):
70 70 if rev == nullrev:
71 71 return 0
72 72 return self._data[rev]
73 73
74 74 def length(self, rev):
75 75 return self.end(rev) - self.start(rev)
76 76
77 77 def __len__(self):
78 78 return len(self._data)
79 79
80 80 def issnapshot(self, rev):
81 81 if rev == nullrev:
82 82 return True
83 83 return rev in self._snapshot
84 84
85 85
86 86 def slicechunk(revlog, revs, targetsize=None):
87 87 """slice revs to reduce the amount of unrelated data to be read from disk.
88 88
89 89 ``revs`` is sliced into groups that should be read in one time.
90 90 Assume that revs are sorted.
91 91
92 92 The initial chunk is sliced until the overall density (payload/chunks-span
93 93 ratio) is above `revlog.data_config.sr_density_threshold`. No gap smaller
94 94 than `revlog.data_config.sr_min_gap_size` is skipped.
95 95
96 96 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
97 97 For consistency with other slicing choice, this limit won't go lower than
98 98 `revlog.data_config.sr_min_gap_size`.
99 99
100 100 If individual revisions chunk are larger than this limit, they will still
101 101 be raised individually.
102 102
103 103 >>> data = [
104 104 ... 5, #00 (5)
105 105 ... 10, #01 (5)
106 106 ... 12, #02 (2)
107 107 ... 12, #03 (empty)
108 108 ... 27, #04 (15)
109 109 ... 31, #05 (4)
110 110 ... 31, #06 (empty)
111 111 ... 42, #07 (11)
112 112 ... 47, #08 (5)
113 113 ... 47, #09 (empty)
114 114 ... 48, #10 (1)
115 115 ... 51, #11 (3)
116 116 ... 74, #12 (23)
117 117 ... 85, #13 (11)
118 118 ... 86, #14 (1)
119 119 ... 91, #15 (5)
120 120 ... ]
121 121 >>> revlog = _testrevlog(data, snapshot=range(16))
122 122
123 123 >>> list(slicechunk(revlog, list(range(16))))
124 124 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
125 125 >>> list(slicechunk(revlog, [0, 15]))
126 126 [[0], [15]]
127 127 >>> list(slicechunk(revlog, [0, 11, 15]))
128 128 [[0], [11], [15]]
129 129 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
130 130 [[0], [11, 13, 15]]
131 131 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
132 132 [[1, 2], [5, 8, 10, 11], [14]]
133 133
134 134 Slicing with a maximum chunk size
135 135 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
136 136 [[0], [11], [13], [15]]
137 137 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
138 138 [[0], [11], [13, 15]]
139 139
140 140 Slicing involving nullrev
141 141 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
142 142 [[-1, 0], [11], [13, 15]]
143 143 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
144 144 [[-1], [13], [15]]
145 145 """
146 146 if targetsize is not None:
147 147 targetsize = max(targetsize, revlog.data_config.sr_min_gap_size)
148 148 # targetsize should not be specified when evaluating delta candidates:
149 149 # * targetsize is used to ensure we stay within specification when reading,
150 150 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
151 151 if densityslicing is None:
152 152 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
153 153 for chunk in densityslicing(
154 154 revs,
155 155 revlog.data_config.sr_density_threshold,
156 156 revlog.data_config.sr_min_gap_size,
157 157 ):
158 158 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
159 159 yield subchunk
160 160
161 161
162 162 def _slicechunktosize(revlog, revs, targetsize=None):
163 163 """slice revs to match the target size
164 164
165 165 This is intended to be used on chunk that density slicing selected by that
166 166 are still too large compared to the read garantee of revlog. This might
167 167 happens when "minimal gap size" interrupted the slicing or when chain are
168 168 built in a way that create large blocks next to each other.
169 169
170 170 >>> data = [
171 171 ... 3, #0 (3)
172 172 ... 5, #1 (2)
173 173 ... 6, #2 (1)
174 174 ... 8, #3 (2)
175 175 ... 8, #4 (empty)
176 176 ... 11, #5 (3)
177 177 ... 12, #6 (1)
178 178 ... 13, #7 (1)
179 179 ... 14, #8 (1)
180 180 ... ]
181 181
182 182 == All snapshots cases ==
183 183 >>> revlog = _testrevlog(data, snapshot=range(9))
184 184
185 185 Cases where chunk is already small enough
186 186 >>> list(_slicechunktosize(revlog, [0], 3))
187 187 [[0]]
188 188 >>> list(_slicechunktosize(revlog, [6, 7], 3))
189 189 [[6, 7]]
190 190 >>> list(_slicechunktosize(revlog, [0], None))
191 191 [[0]]
192 192 >>> list(_slicechunktosize(revlog, [6, 7], None))
193 193 [[6, 7]]
194 194
195 195 cases where we need actual slicing
196 196 >>> list(_slicechunktosize(revlog, [0, 1], 3))
197 197 [[0], [1]]
198 198 >>> list(_slicechunktosize(revlog, [1, 3], 3))
199 199 [[1], [3]]
200 200 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
201 201 [[1, 2], [3]]
202 202 >>> list(_slicechunktosize(revlog, [3, 5], 3))
203 203 [[3], [5]]
204 204 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
205 205 [[3], [5]]
206 206 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
207 207 [[5], [6, 7, 8]]
208 208 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
209 209 [[0], [1, 2], [3], [5], [6, 7, 8]]
210 210
211 211 Case with too large individual chunk (must return valid chunk)
212 212 >>> list(_slicechunktosize(revlog, [0, 1], 2))
213 213 [[0], [1]]
214 214 >>> list(_slicechunktosize(revlog, [1, 3], 1))
215 215 [[1], [3]]
216 216 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
217 217 [[3], [5]]
218 218
219 219 == No Snapshot cases ==
220 220 >>> revlog = _testrevlog(data)
221 221
222 222 Cases where chunk is already small enough
223 223 >>> list(_slicechunktosize(revlog, [0], 3))
224 224 [[0]]
225 225 >>> list(_slicechunktosize(revlog, [6, 7], 3))
226 226 [[6, 7]]
227 227 >>> list(_slicechunktosize(revlog, [0], None))
228 228 [[0]]
229 229 >>> list(_slicechunktosize(revlog, [6, 7], None))
230 230 [[6, 7]]
231 231
232 232 cases where we need actual slicing
233 233 >>> list(_slicechunktosize(revlog, [0, 1], 3))
234 234 [[0], [1]]
235 235 >>> list(_slicechunktosize(revlog, [1, 3], 3))
236 236 [[1], [3]]
237 237 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
238 238 [[1], [2, 3]]
239 239 >>> list(_slicechunktosize(revlog, [3, 5], 3))
240 240 [[3], [5]]
241 241 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
242 242 [[3], [4, 5]]
243 243 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
244 244 [[5], [6, 7, 8]]
245 245 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
246 246 [[0], [1, 2], [3], [5], [6, 7, 8]]
247 247
248 248 Case with too large individual chunk (must return valid chunk)
249 249 >>> list(_slicechunktosize(revlog, [0, 1], 2))
250 250 [[0], [1]]
251 251 >>> list(_slicechunktosize(revlog, [1, 3], 1))
252 252 [[1], [3]]
253 253 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
254 254 [[3], [5]]
255 255
256 256 == mixed case ==
257 257 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
258 258 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
259 259 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
260 260 """
261 261 assert targetsize is None or 0 <= targetsize
262 262 startdata = revlog.start(revs[0])
263 263 enddata = revlog.end(revs[-1])
264 264 fullspan = enddata - startdata
265 265 if targetsize is None or fullspan <= targetsize:
266 266 yield revs
267 267 return
268 268
269 269 startrevidx = 0
270 270 endrevidx = 1
271 271 iterrevs = enumerate(revs)
272 272 next(iterrevs) # skip first rev.
273 273 # first step: get snapshots out of the way
274 274 for idx, r in iterrevs:
275 275 span = revlog.end(r) - startdata
276 276 snapshot = revlog.issnapshot(r)
277 277 if span <= targetsize and snapshot:
278 278 endrevidx = idx + 1
279 279 else:
280 280 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
281 281 if chunk:
282 282 yield chunk
283 283 startrevidx = idx
284 284 startdata = revlog.start(r)
285 285 endrevidx = idx + 1
286 286 if not snapshot:
287 287 break
288 288
289 289 # for the others, we use binary slicing to quickly converge toward valid
290 290 # chunks (otherwise, we might end up looking for start/end of many
291 291 # revisions). This logic is not looking for the perfect slicing point, it
292 292 # focuses on quickly converging toward valid chunks.
293 293 nbitem = len(revs)
294 294 while (enddata - startdata) > targetsize:
295 295 endrevidx = nbitem
296 296 if nbitem - startrevidx <= 1:
297 297 break # protect against individual chunk larger than limit
298 298 localenddata = revlog.end(revs[endrevidx - 1])
299 299 span = localenddata - startdata
300 300 while span > targetsize:
301 301 if endrevidx - startrevidx <= 1:
302 302 break # protect against individual chunk larger than limit
303 303 endrevidx -= (endrevidx - startrevidx) // 2
304 304 localenddata = revlog.end(revs[endrevidx - 1])
305 305 span = localenddata - startdata
306 306 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
307 307 if chunk:
308 308 yield chunk
309 309 startrevidx = endrevidx
310 310 startdata = revlog.start(revs[startrevidx])
311 311
312 312 chunk = _trimchunk(revlog, revs, startrevidx)
313 313 if chunk:
314 314 yield chunk
315 315
316 316
317 317 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
318 318 """slice revs to reduce the amount of unrelated data to be read from disk.
319 319
320 320 ``revs`` is sliced into groups that should be read in one time.
321 321 Assume that revs are sorted.
322 322
323 323 The initial chunk is sliced until the overall density (payload/chunks-span
324 324 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
325 325 skipped.
326 326
327 327 >>> revlog = _testrevlog([
328 328 ... 5, #00 (5)
329 329 ... 10, #01 (5)
330 330 ... 12, #02 (2)
331 331 ... 12, #03 (empty)
332 332 ... 27, #04 (15)
333 333 ... 31, #05 (4)
334 334 ... 31, #06 (empty)
335 335 ... 42, #07 (11)
336 336 ... 47, #08 (5)
337 337 ... 47, #09 (empty)
338 338 ... 48, #10 (1)
339 339 ... 51, #11 (3)
340 340 ... 74, #12 (23)
341 341 ... 85, #13 (11)
342 342 ... 86, #14 (1)
343 343 ... 91, #15 (5)
344 344 ... ])
345 345
346 346 >>> list(_slicechunktodensity(revlog, list(range(16))))
347 347 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
348 348 >>> list(_slicechunktodensity(revlog, [0, 15]))
349 349 [[0], [15]]
350 350 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
351 351 [[0], [11], [15]]
352 352 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
353 353 [[0], [11, 13, 15]]
354 354 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
355 355 [[1, 2], [5, 8, 10, 11], [14]]
356 356 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
357 357 ... mingapsize=20))
358 358 [[1, 2, 3, 5, 8, 10, 11], [14]]
359 359 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
360 360 ... targetdensity=0.95))
361 361 [[1, 2], [5], [8, 10, 11], [14]]
362 362 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
363 363 ... targetdensity=0.95, mingapsize=12))
364 364 [[1, 2], [5, 8, 10, 11], [14]]
365 365 """
366 366 start = revlog.start
367 367 length = revlog.length
368 368
369 369 if len(revs) <= 1:
370 370 yield revs
371 371 return
372 372
373 373 deltachainspan = segmentspan(revlog, revs)
374 374
375 375 if deltachainspan < mingapsize:
376 376 yield revs
377 377 return
378 378
379 379 readdata = deltachainspan
380 380 chainpayload = sum(length(r) for r in revs)
381 381
382 382 if deltachainspan:
383 383 density = chainpayload / float(deltachainspan)
384 384 else:
385 385 density = 1.0
386 386
387 387 if density >= targetdensity:
388 388 yield revs
389 389 return
390 390
391 391 # Store the gaps in a heap to have them sorted by decreasing size
392 392 gaps = []
393 393 prevend = None
394 394 for i, rev in enumerate(revs):
395 395 revstart = start(rev)
396 396 revlen = length(rev)
397 397
398 398 # Skip empty revisions to form larger holes
399 399 if revlen == 0:
400 400 continue
401 401
402 402 if prevend is not None:
403 403 gapsize = revstart - prevend
404 404 # only consider holes that are large enough
405 405 if gapsize > mingapsize:
406 406 gaps.append((gapsize, i))
407 407
408 408 prevend = revstart + revlen
409 409 # sort the gaps to pop them from largest to small
410 410 gaps.sort()
411 411
412 412 # Collect the indices of the largest holes until the density is acceptable
413 413 selected = []
414 414 while gaps and density < targetdensity:
415 415 gapsize, gapidx = gaps.pop()
416 416
417 417 selected.append(gapidx)
418 418
419 419 # the gap sizes are stored as negatives to be sorted decreasingly
420 420 # by the heap
421 421 readdata -= gapsize
422 422 if readdata > 0:
423 423 density = chainpayload / float(readdata)
424 424 else:
425 425 density = 1.0
426 426 selected.sort()
427 427
428 428 # Cut the revs at collected indices
429 429 previdx = 0
430 430 for idx in selected:
431 431
432 432 chunk = _trimchunk(revlog, revs, previdx, idx)
433 433 if chunk:
434 434 yield chunk
435 435
436 436 previdx = idx
437 437
438 438 chunk = _trimchunk(revlog, revs, previdx)
439 439 if chunk:
440 440 yield chunk
441 441
442 442
443 443 def _trimchunk(revlog, revs, startidx, endidx=None):
444 444 """returns revs[startidx:endidx] without empty trailing revs
445 445
446 446 Doctest Setup
447 447 >>> revlog = _testrevlog([
448 448 ... 5, #0
449 449 ... 10, #1
450 450 ... 12, #2
451 451 ... 12, #3 (empty)
452 452 ... 17, #4
453 453 ... 21, #5
454 454 ... 21, #6 (empty)
455 455 ... ])
456 456
457 457 Contiguous cases:
458 458 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
459 459 [0, 1, 2, 3, 4, 5]
460 460 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
461 461 [0, 1, 2, 3, 4]
462 462 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
463 463 [0, 1, 2]
464 464 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
465 465 [2]
466 466 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
467 467 [3, 4, 5]
468 468 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
469 469 [3, 4]
470 470
471 471 Discontiguous cases:
472 472 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
473 473 [1, 3, 5]
474 474 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
475 475 [1]
476 476 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
477 477 [3, 5]
478 478 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
479 479 [3, 5]
480 480 """
481 481 length = revlog.length
482 482
483 483 if endidx is None:
484 484 endidx = len(revs)
485 485
486 486 # If we have a non-emtpy delta candidate, there are nothing to trim
487 487 if revs[endidx - 1] < len(revlog):
488 488 # Trim empty revs at the end, except the very first revision of a chain
489 489 while (
490 490 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
491 491 ):
492 492 endidx -= 1
493 493
494 494 return revs[startidx:endidx]
495 495
496 496
497 497 def segmentspan(revlog, revs):
498 498 """Get the byte span of a segment of revisions
499 499
500 500 revs is a sorted array of revision numbers
501 501
502 502 >>> revlog = _testrevlog([
503 503 ... 5, #0
504 504 ... 10, #1
505 505 ... 12, #2
506 506 ... 12, #3 (empty)
507 507 ... 17, #4
508 508 ... ])
509 509
510 510 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
511 511 17
512 512 >>> segmentspan(revlog, [0, 4])
513 513 17
514 514 >>> segmentspan(revlog, [3, 4])
515 515 5
516 516 >>> segmentspan(revlog, [1, 2, 3,])
517 517 7
518 518 >>> segmentspan(revlog, [1, 3])
519 519 7
520 520 """
521 521 if not revs:
522 522 return 0
523 523 end = revlog.end(revs[-1])
524 524 return end - revlog.start(revs[0])
525 525
526 526
527 527 def _textfromdelta(revlog, baserev, delta, p1, p2, flags, expectednode):
528 528 """build full text from a (base, delta) pair and other metadata"""
529 529 # special case deltas which replace entire base; no need to decode
530 530 # base revision. this neatly avoids censored bases, which throw when
531 531 # they're decoded.
532 532 hlen = struct.calcsize(b">lll")
533 533 if delta[:hlen] == mdiff.replacediffheader(
534 534 revlog.rawsize(baserev), len(delta) - hlen
535 535 ):
536 536 fulltext = delta[hlen:]
537 537 else:
538 538 # deltabase is rawtext before changed by flag processors, which is
539 539 # equivalent to non-raw text
540 540 basetext = revlog.revision(baserev)
541 541 fulltext = mdiff.patch(basetext, delta)
542 542
543 543 try:
544 544 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
545 545 if validatehash:
546 546 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
547 547 if flags & REVIDX_ISCENSORED:
548 548 raise error.StorageError(
549 549 _(b'node %s is not censored') % expectednode
550 550 )
551 551 except error.CensoredNodeError:
552 552 # must pass the censored index flag to add censored revisions
553 553 if not flags & REVIDX_ISCENSORED:
554 554 raise
555 555 return fulltext
556 556
557 557
558 558 @attr.s(slots=True, frozen=True)
559 559 class _deltainfo:
560 560 distance = attr.ib()
561 561 deltalen = attr.ib()
562 562 data = attr.ib()
563 563 base = attr.ib()
564 564 chainbase = attr.ib()
565 565 chainlen = attr.ib()
566 566 compresseddeltalen = attr.ib()
567 567 snapshotdepth = attr.ib()
568 568
569 569
570 570 def drop_u_compression(delta):
571 571 """turn into a "u" (no-compression) into no-compression without header
572 572
573 573 This is useful for revlog format that has better compression method.
574 574 """
575 575 assert delta.data[0] == b'u', delta.data[0]
576 576 return _deltainfo(
577 577 delta.distance,
578 578 delta.deltalen - 1,
579 579 (b'', delta.data[1]),
580 580 delta.base,
581 581 delta.chainbase,
582 582 delta.chainlen,
583 583 delta.compresseddeltalen,
584 584 delta.snapshotdepth,
585 585 )
586 586
587 587
588 588 # If a revision's full text is that much bigger than a base candidate full
589 589 # text's, it is very unlikely that it will produce a valid delta. We no longer
590 590 # consider these candidates.
591 591 LIMIT_BASE2TEXT = 500
592 592
593 593 ### stage of the search, used for debug and to select and to adjust some logic.
594 594 # initial stage, next step is unknown
595 595 _STAGE_UNSPECIFIED = "unspecified"
596 596 # trying the cached delta
597 597 _STAGE_CACHED = "cached"
598 598 # trying delta based on parents
599 599 _STAGE_PARENTS = "parents"
600 600 # trying to build a valid snapshot of any level
601 601 _STAGE_SNAPSHOT = "snapshot"
602 602 # trying to build a delta based of the previous revision
603 603 _STAGE_PREV = "prev"
604 604 # trying to build a full snapshot
605 605 _STAGE_FULL = "full"
606 606
607 607
608 608 class _BaseDeltaSearch(abc.ABC):
609 609 """perform the search of a good delta for a single revlog revision
610 610
611 611 note: some of the deltacomputer.finddeltainfo logic should probably move
612 612 here.
613 613 """
614 614
615 615 def __init__(
616 616 self,
617 617 revlog,
618 618 revinfo,
619 619 p1,
620 620 p2,
621 621 cachedelta,
622 622 excluded_bases=None,
623 623 target_rev=None,
624 624 snapshot_cache=None,
625 625 ):
626 626 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner
627 627 # so we should never end up asking such question. Adding the assert as
628 628 # a safe-guard to detect anything that would be fishy in this regard.
629 629 assert (
630 630 cachedelta is None
631 631 or cachedelta[2] != DELTA_BASE_REUSE_FORCE
632 632 or not revlog.delta_config.general_delta
633 633 )
634 634 self.revlog = revlog
635 635 self.revinfo = revinfo
636 636 self.textlen = revinfo.textlen
637 637 self.p1 = p1
638 638 self.p2 = p2
639 639 self.cachedelta = cachedelta
640 640 self.excluded_bases = excluded_bases
641 641 if target_rev is None:
642 642 self.target_rev = len(self.revlog)
643 643 self.target_rev = target_rev
644 644 if snapshot_cache is None:
645 645 # map: base-rev: [snapshot-revs]
646 646 snapshot_cache = SnapshotCache()
647 647 self.snapshot_cache = snapshot_cache
648 648
649 649 self.tested = {nullrev}
650 650
651 651 self.current_stage = _STAGE_UNSPECIFIED
652 652 self.current_group = None
653 653 self._init_group()
654 654
655 655 def is_good_delta_info(self, deltainfo):
656 656 """Returns True if the given delta is good.
657 657
658 658 Good means that it is within the disk span, disk size, and chain length
659 659 bounds that we know to be performant.
660 660 """
661 661 if not self._is_good_delta_info_universal(deltainfo):
662 662 return False
663 663 if not self._is_good_delta_info_chain_quality(deltainfo):
664 664 return False
665 665 return True
666 666
667 667 def _is_good_delta_info_universal(self, deltainfo):
668 668 """Returns True if the given delta is good.
669 669
670 670 This performs generic checks needed by all format variants.
671 671
672 672 This is used by is_good_delta_info.
673 673 """
674 674
675 675 if deltainfo is None:
676 676 return False
677 677
678 678 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner
679 679 # so we should never end up asking such question. Adding the assert as
680 680 # a safe-guard to detect anything that would be fishy in this regard.
681 681 assert (
682 682 self.revinfo.cachedelta is None
683 683 or self.revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE
684 684 or not self.revlog.delta_config.general_delta
685 685 )
686 686
687 687 # Bad delta from new delta size:
688 688 #
689 689 # If the delta size is larger than the target text, storing the delta
690 690 # will be inefficient.
691 691 if self.revinfo.textlen < deltainfo.deltalen:
692 692 return False
693 693
694 694 return True
695 695
696 696 def _is_good_delta_info_chain_quality(self, deltainfo):
697 697 """Returns True if the chain associated with the delta is good.
698 698
699 699 This performs checks for format that use delta chains.
700 700
701 701 This is used by is_good_delta_info.
702 702 """
703 703 # - 'deltainfo.distance' is the distance from the base revision --
704 704 # bounding it limits the amount of I/O we need to do.
705 705
706 706 defaultmax = self.revinfo.textlen * 4
707 707 maxdist = self.revlog.delta_config.max_deltachain_span
708 708 if not maxdist:
709 709 maxdist = deltainfo.distance # ensure the conditional pass
710 710 maxdist = max(maxdist, defaultmax)
711 711
712 712 # Bad delta from read span:
713 713 #
714 714 # If the span of data read is larger than the maximum allowed.
715 715 #
716 716 # In the sparse-revlog case, we rely on the associated "sparse
717 717 # reading" to avoid issue related to the span of data. In theory, it
718 718 # would be possible to build pathological revlog where delta pattern
719 719 # would lead to too many reads. However, they do not happen in
720 720 # practice at all. So we skip the span check entirely.
721 721 if (
722 722 not self.revlog.delta_config.sparse_revlog
723 723 and maxdist < deltainfo.distance
724 724 ):
725 725 return False
726 726
727 727 # Bad delta from cumulated payload size:
728 728 #
729 729 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
730 730 # deltas we need to apply -- bounding it limits the amount of CPU
731 731 # we consume.
732 732 max_chain_data = self.revinfo.textlen * LIMIT_DELTA2TEXT
733 733 # If the sum of delta get larger than K * target text length.
734 734 if max_chain_data < deltainfo.compresseddeltalen:
735 735 return False
736 736
737 737 # Bad delta from chain length:
738 738 #
739 739 # If the number of delta in the chain gets too high.
740 740 if (
741 741 self.revlog.delta_config.max_chain_len
742 742 and self.revlog.delta_config.max_chain_len < deltainfo.chainlen
743 743 ):
744 744 return False
745 745 return True
746 746
747 747 @property
748 748 def done(self):
749 749 """True when all possible candidate have been tested"""
750 750 return self.current_group is None
751 751
752 752 @abc.abstractmethod
753 753 def next_group(self, good_delta=None):
754 754 """move to the next group to test
755 755
756 756 The group of revision to test will be available in
757 757 `self.current_group`. If the previous group had any good delta, the
758 758 best one can be passed as the `good_delta` parameter to help selecting
759 759 the next group.
760 760
761 761 If not revision remains to be, `self.done` will be True and
762 762 `self.current_group` will be None.
763 763 """
764 764 pass
765 765
766 766 @abc.abstractmethod
767 767 def _init_group(self):
768 768 pass
769 769
770 770
771 771 class _NoDeltaSearch(_BaseDeltaSearch):
772 772 """Search for no delta.
773 773
774 774 This search variant is to be used in case where we should not store delta.
775 775 """
776 776
777 777 def _init_group(self):
778 778 self.current_stage = _STAGE_FULL
779 779
780 780 def next_group(self, good_delta=None):
781 781 pass
782 782
783 783
784 784 class _PrevDeltaSearch(_BaseDeltaSearch):
785 785 """Search for delta against the previous revision only
786 786
787 787 This search variant is to be used when the format does not allow for delta
788 788 against arbitrary bases.
789 789 """
790 790
791 791 def _init_group(self):
792 792 self.current_stage = _STAGE_PREV
793 793 self.current_group = [self.target_rev - 1]
794 794 self.tested.update(self.current_group)
795 795
796 796 def next_group(self, good_delta=None):
797 797 self.current_stage = _STAGE_FULL
798 798 self.current_group = None
799 799
800 800
801 801 class _GeneralDeltaSearch(_BaseDeltaSearch):
802 802 """Delta search variant for general-delta repository"""
803 803
804 804 def _init_group(self):
805 805 # Why search for delta base if we cannot use a delta base ?
806 806 # also see issue6056
807 807 assert self.revlog.delta_config.general_delta
808 808 self._candidates_iterator = self._iter_groups()
809 809 self._last_good = None
810 810 if (
811 811 self.cachedelta is not None
812 812 and self.cachedelta[2] > DELTA_BASE_REUSE_NO
813 813 and self._pre_filter_rev(self.cachedelta[0])
814 814 ):
815 815 # First we try to reuse a the delta contained in the bundle. (or from
816 816 # the source revlog)
817 817 #
818 818 # This logic only applies to general delta repositories and can be
819 819 # disabled through configuration. Disabling reuse source delta is
820 820 # useful when we want to make sure we recomputed "optimal" deltas.
821 821 self.current_stage = _STAGE_CACHED
822 822 self._internal_group = (self.cachedelta[0],)
823 823 self._internal_idx = 0
824 824 self.current_group = self._internal_group
825 825 self.tested.update(self.current_group)
826 826 else:
827 827 self._next_internal_group()
828 828
829 829 def _next_internal_group(self):
830 830 # self._internal_group can be larger than self.current_group
831 831 self._internal_idx = 0
832 832 group = self._candidates_iterator.send(self._last_good)
833 833 if group is not None:
834 834 group = self._pre_filter_candidate_revs(group)
835 835 self._internal_group = group
836 836 if self._internal_group is None:
837 837 self.current_group = None
838 838 elif len(self._internal_group) == 0:
839 839 self.next_group()
840 840 else:
841 841 chunk_size = self.revlog.delta_config.candidate_group_chunk_size
842 842 if chunk_size > 0:
843 843 self.current_group = self._internal_group[:chunk_size]
844 844 self._internal_idx += chunk_size
845 845 else:
846 846 self.current_group = self._internal_group
847 847 self._internal_idx += len(self.current_group)
848 848
849 849 self.tested.update(self.current_group)
850 850
851 851 def next_group(self, good_delta=None):
852 852 old_good = self._last_good
853 853 if good_delta is not None:
854 854 self._last_good = good_delta.base
855 855 if self.current_stage == _STAGE_CACHED and good_delta is not None:
856 856 # the cache is good, let us use the cache as requested
857 857 self._candidates_iterator = None
858 858 self._internal_group = None
859 859 self._internal_idx = None
860 860 self.current_group = None
861 861 return
862 862
863 863 if (self._internal_idx < len(self._internal_group)) and (
864 864 old_good != good_delta
865 865 ):
866 866 # When the size of the candidate group is big, it can result in
867 867 # a quite significant performance impact. To reduce this, we
868 868 # can send them in smaller batches until the new batch does not
869 869 # provide any improvements.
870 870 #
871 871 # This might reduce the overall efficiency of the compression
872 872 # in some corner cases, but that should also prevent very
873 873 # pathological cases from being an issue. (eg. 20 000
874 874 # candidates).
875 875 #
876 876 # XXX note that the ordering of the group becomes important as
877 877 # it now impacts the final result. The current order is
878 878 # unprocessed and can be improved.
879 879 next_idx = self._internal_idx + self._group_chunk_size
880 880 self.current_group = self._internal_group[
881 881 self._internal_idx : next_idx
882 882 ]
883 883 self.tested.update(self.current_group)
884 884 self._internal_idx = next_idx
885 885 else:
886 886 self._next_internal_group()
887 887
888 888 def _pre_filter_candidate_revs(self, temptative):
889 889 """filter possible candidate before computing a delta
890 890
891 891 This function use various criteria to pre-filter candidate delta base
892 892 before we compute a delta and evaluate its quality.
893 893
894 894 Such pre-filter limit the number of computed delta, an expensive operation.
895 895
896 896 return the updated list of revision to test
897 897 """
898 898 deltalength = self.revlog.length
899 899 deltaparent = self.revlog.deltaparent
900 900
901 901 tested = self.tested
902 902 group = []
903 903 for rev in temptative:
904 904 # skip over empty delta (no need to include them in a chain)
905 905 while not (rev == nullrev or rev in tested or deltalength(rev)):
906 906 tested.add(rev)
907 907 rev = deltaparent(rev)
908 908 if self._pre_filter_rev(rev):
909 909 group.append(rev)
910 910 else:
911 911 self.tested.add(rev)
912 912 return group
913 913
914 914 def _pre_filter_rev_universal(self, rev):
915 915 """pre filtering that is need in all cases.
916 916
917 917 return True if it seems okay to test a rev, False otherwise.
918 918
919 919 used by _pre_filter_rev.
920 920 """
921 921 # no need to try a delta against nullrev, this will be done as
922 922 # a last resort.
923 923 if rev == nullrev:
924 924 return False
925 925 # filter out revision we tested already
926 926 if rev in self.tested:
927 927 return False
928 928
929 929 # an higher authority deamed the base unworthy (e.g. censored)
930 930 if self.excluded_bases is not None and rev in self.excluded_bases:
931 931 return False
932 932 # We are in some recomputation cases and that rev is too high
933 933 # in the revlog
934 934 if self.target_rev is not None and rev >= self.target_rev:
935 935 return False
936 936 # no delta for rawtext-changing revs (see "candelta" for why)
937 937 if self.revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
938 938 return False
939 939 return True
940 940
941 def _pre_filter_rev_sparse(self, rev):
942 """pre filtering that is needed in sparse revlog cases
943
944 return True if it seems okay to test a rev, False otherwise.
945
946 used by _pre_filter_rev.
947 """
948 assert self.revlog.delta_config.sparse_revlog
949 # if the revision we test again is too small, the resulting delta
950 # will be large anyway as that amount of data to be added is big
951 if self.revlog.rawsize(rev) < (self.textlen // LIMIT_BASE2TEXT):
952 return False
953
954 if self.revlog.delta_config.upper_bound_comp is not None:
955 maxcomp = self.revlog.delta_config.upper_bound_comp
956 basenotsnap = (self.p1, self.p2, nullrev)
957 if rev not in basenotsnap and self.revlog.issnapshot(rev):
958 snapshotdepth = self.revlog.snapshotdepth(rev)
959 # If text is significantly larger than the base, we can
960 # expect the resulting delta to be proportional to the size
961 # difference
962 revsize = self.revlog.rawsize(rev)
963 rawsizedistance = max(self.textlen - revsize, 0)
964 # use an estimate of the compression upper bound.
965 lowestrealisticdeltalen = rawsizedistance // maxcomp
966
967 # check the absolute constraint on the delta size
968 snapshotlimit = self.textlen >> snapshotdepth
969 if snapshotlimit < lowestrealisticdeltalen:
970 # delta lower bound is larger than accepted upper
971 # bound
972 return False
973
974 # check the relative constraint on the delta size
975 revlength = self.revlog.length(rev)
976 if revlength < lowestrealisticdeltalen:
977 # delta probable lower bound is larger than target
978 # base
979 return False
980 return True
981
982 941 def _pre_filter_rev_delta_chain(self, rev):
983 942 """pre filtering that is needed in sparse revlog cases
984 943
985 944 return True if it seems okay to test a rev, False otherwise.
986 945
987 946 used by _pre_filter_rev.
988 947 """
989 948 deltas_limit = self.revinfo.textlen * LIMIT_DELTA2TEXT
990 949 # filter out delta base that will never produce good delta
991 950 #
992 951 # if the delta of that base is already bigger than the limit
993 952 # for the delta chain size, doing a delta is hopeless.
994 953 if deltas_limit < self.revlog.length(rev):
995 954 return False
996 955
997 956 # If we reach here, we are about to build and test a delta.
998 957 # The delta building process will compute the chaininfo in all
999 958 # case, since that computation is cached, it is fine to access
1000 959 # it here too.
1001 960 chainlen, chainsize = self.revlog._chaininfo(rev)
1002 961 # if chain will be too long, skip base
1003 962 if (
1004 963 self.revlog.delta_config.max_chain_len
1005 964 and chainlen >= self.revlog.delta_config.max_chain_len
1006 965 ):
1007 966 return False
1008 967 # if chain already have too much data, skip base
1009 968 if deltas_limit < chainsize:
1010 969 return False
1011 970 return True
1012 971
1013 972 def _pre_filter_rev(self, rev):
1014 973 """return True if it seems okay to test a rev, False otherwise"""
1015 974 if not self._pre_filter_rev_universal(rev):
1016 975 return False
1017 976 if not self._pre_filter_rev_delta_chain(rev):
1018 977 return False
1019 if self.revlog.delta_config.sparse_revlog:
1020 if not self._pre_filter_rev_sparse(rev):
1021 return False
1022
1023 978 return True
1024 979
1025 980 def _iter_parents(self):
1026 981 # exclude already lazy tested base if any
1027 982 parents = [p for p in (self.p1, self.p2) if p != nullrev]
1028 983
1029 984 self.current_stage = _STAGE_PARENTS
1030 985 if (
1031 986 not self.revlog.delta_config.delta_both_parents
1032 987 and len(parents) == 2
1033 988 ):
1034 989 parents.sort()
1035 990 # To minimize the chance of having to build a fulltext,
1036 991 # pick first whichever parent is closest to us (max rev)
1037 992 yield (parents[1],)
1038 993 # then the other one (min rev) if the first did not fit
1039 994 yield (parents[0],)
1040 995 elif len(parents) > 0:
1041 996 # Test all parents (1 or 2), and keep the best candidate
1042 997 yield parents
1043 998
1044 999 def _iter_prev(self):
1045 1000 # other approach failed try against prev to hopefully save us a
1046 1001 # fulltext.
1047 1002 self.current_stage = _STAGE_PREV
1048 1003 yield (self.target_rev - 1,)
1049 1004
1050 1005 def _iter_groups(self):
1051 1006 good = None
1052 1007 for group in self._iter_parents():
1053 1008 good = yield group
1054 1009 if good is not None:
1055 1010 break
1056 1011 else:
1057 1012 assert good is None
1058 1013 yield from self._iter_prev()
1059 1014 yield None
1060 1015
1061 1016
1062 1017 class _SparseDeltaSearch(_GeneralDeltaSearch):
1063 1018 """Delta search variants for sparse-revlog"""
1064 1019
1065 1020 def is_good_delta_info(self, deltainfo):
1066 1021 """Returns True if the given delta is good.
1067 1022
1068 1023 Good means that it is within the disk span, disk size, and chain length
1069 1024 bounds that we know to be performant.
1070 1025 """
1071 1026 if not self._is_good_delta_info_universal(deltainfo):
1072 1027 return False
1073 1028 if not self._is_good_delta_info_chain_quality(deltainfo):
1074 1029 return False
1075 1030 if not self._is_good_delta_info_snapshot_constraints(deltainfo):
1076 1031 return False
1077 1032 return True
1078 1033
1079 1034 def _is_good_delta_info_snapshot_constraints(self, deltainfo):
1080 1035 """Returns True if the chain associated with snapshots
1081 1036
1082 1037 This performs checks for format that use sparse-revlog and intermediate
1083 1038 snapshots.
1084 1039
1085 1040 This is used by is_good_delta_info.
1086 1041 """
1087 1042 # if not a snapshot, this method has no filtering to do
1088 1043 if deltainfo.snapshotdepth is None:
1089 1044 return True
1090 1045 # bad delta from intermediate snapshot size limit
1091 1046 #
1092 1047 # If an intermediate snapshot size is higher than the limit. The
1093 1048 # limit exist to prevent endless chain of intermediate delta to be
1094 1049 # created.
1095 1050 if (
1096 1051 self.revinfo.textlen >> deltainfo.snapshotdepth
1097 1052 ) < deltainfo.deltalen:
1098 1053 return False
1099 1054
1100 1055 # bad delta if new intermediate snapshot is larger than the previous
1101 1056 # snapshot
1102 1057 if self.revlog.length(deltainfo.base) < deltainfo.deltalen:
1103 1058 return False
1104 1059
1105 1060 return True
1106 1061
1062 def _pre_filter_rev(self, rev):
1063 """return True if it seems okay to test a rev, False otherwise"""
1064 if not self._pre_filter_rev_universal(rev):
1065 return False
1066 if not self._pre_filter_rev_delta_chain(rev):
1067 return False
1068 if not self._pre_filter_rev_sparse(rev):
1069 return False
1070 return True
1071
1072 def _pre_filter_rev_sparse(self, rev):
1073 """pre filtering that is needed in sparse revlog cases
1074
1075 return True if it seems okay to test a rev, False otherwise.
1076
1077 used by _pre_filter_rev.
1078 """
1079 assert self.revlog.delta_config.sparse_revlog
1080 # if the revision we test again is too small, the resulting delta
1081 # will be large anyway as that amount of data to be added is big
1082 if self.revlog.rawsize(rev) < (self.textlen // LIMIT_BASE2TEXT):
1083 return False
1084
1085 if self.revlog.delta_config.upper_bound_comp is not None:
1086 maxcomp = self.revlog.delta_config.upper_bound_comp
1087 basenotsnap = (self.p1, self.p2, nullrev)
1088 if rev not in basenotsnap and self.revlog.issnapshot(rev):
1089 snapshotdepth = self.revlog.snapshotdepth(rev)
1090 # If text is significantly larger than the base, we can
1091 # expect the resulting delta to be proportional to the size
1092 # difference
1093 revsize = self.revlog.rawsize(rev)
1094 rawsizedistance = max(self.textlen - revsize, 0)
1095 # use an estimate of the compression upper bound.
1096 lowestrealisticdeltalen = rawsizedistance // maxcomp
1097
1098 # check the absolute constraint on the delta size
1099 snapshotlimit = self.textlen >> snapshotdepth
1100 if snapshotlimit < lowestrealisticdeltalen:
1101 # delta lower bound is larger than accepted upper
1102 # bound
1103 return False
1104
1105 # check the relative constraint on the delta size
1106 revlength = self.revlog.length(rev)
1107 if revlength < lowestrealisticdeltalen:
1108 # delta probable lower bound is larger than target
1109 # base
1110 return False
1111 return True
1112
1107 1113 def _iter_snapshots_base(self):
1108 1114 assert self.revlog.delta_config.sparse_revlog
1109 1115 assert self.current_stage == _STAGE_SNAPSHOT
1110 1116 prev = self.target_rev - 1
1111 1117 deltachain = lambda rev: self.revlog._deltachain(rev)[0]
1112 1118
1113 1119 parents = [p for p in (self.p1, self.p2) if p != nullrev]
1114 1120 if not parents:
1115 1121 return
1116 1122 # See if we can use an existing snapshot in the parent chains to
1117 1123 # use as a base for a new intermediate-snapshot
1118 1124 #
1119 1125 # search for snapshot in parents delta chain map: snapshot-level:
1120 1126 # snapshot-rev
1121 1127 parents_snaps = collections.defaultdict(set)
1122 1128 candidate_chains = [deltachain(p) for p in parents]
1123 1129 for chain in candidate_chains:
1124 1130 for idx, s in enumerate(chain):
1125 1131 if not self.revlog.issnapshot(s):
1126 1132 break
1127 1133 parents_snaps[idx].add(s)
1128 1134 snapfloor = min(parents_snaps[0]) + 1
1129 1135 self.snapshot_cache.update(self.revlog, snapfloor)
1130 1136 # search for the highest "unrelated" revision
1131 1137 #
1132 1138 # Adding snapshots used by "unrelated" revision increase the odd we
1133 1139 # reuse an independant, yet better snapshot chain.
1134 1140 #
1135 1141 # XXX instead of building a set of revisions, we could lazily
1136 1142 # enumerate over the chains. That would be more efficient, however
1137 1143 # we stick to simple code for now.
1138 1144 all_revs = set()
1139 1145 for chain in candidate_chains:
1140 1146 all_revs.update(chain)
1141 1147 other = None
1142 1148 for r in self.revlog.revs(prev, snapfloor):
1143 1149 if r not in all_revs:
1144 1150 other = r
1145 1151 break
1146 1152 if other is not None:
1147 1153 # To avoid unfair competition, we won't use unrelated
1148 1154 # intermediate snapshot that are deeper than the ones from the
1149 1155 # parent delta chain.
1150 1156 max_depth = max(parents_snaps.keys())
1151 1157 chain = deltachain(other)
1152 1158 for depth, s in enumerate(chain):
1153 1159 if s < snapfloor:
1154 1160 continue
1155 1161 if max_depth < depth:
1156 1162 break
1157 1163 if not self.revlog.issnapshot(s):
1158 1164 break
1159 1165 parents_snaps[depth].add(s)
1160 1166 # Test them as possible intermediate snapshot base We test them
1161 1167 # from highest to lowest level. High level one are more likely to
1162 1168 # result in small delta
1163 1169 floor = None
1164 1170 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
1165 1171 siblings = set()
1166 1172 for s in snaps:
1167 1173 siblings.update(self.snapshot_cache.snapshots[s])
1168 1174 # Before considering making a new intermediate snapshot, we
1169 1175 # check if an existing snapshot, children of base we consider,
1170 1176 # would be suitable.
1171 1177 #
1172 1178 # It give a change to reuse a delta chain "unrelated" to the
1173 1179 # current revision instead of starting our own. Without such
1174 1180 # re-use, topological branches would keep reopening new chains.
1175 1181 # Creating more and more snapshot as the repository grow.
1176 1182
1177 1183 if floor is not None:
1178 1184 # We only do this for siblings created after the one in our
1179 1185 # parent's delta chain. Those created before has less
1180 1186 # chances to be valid base since our ancestors had to
1181 1187 # create a new snapshot.
1182 1188 siblings = [r for r in siblings if floor < r]
1183 1189 yield tuple(sorted(siblings))
1184 1190 # then test the base from our parent's delta chain.
1185 1191 yield tuple(sorted(snaps))
1186 1192 floor = min(snaps)
1187 1193 # No suitable base found in the parent chain, search if any full
1188 1194 # snapshots emitted since parent's base would be a suitable base
1189 1195 # for an intermediate snapshot.
1190 1196 #
1191 1197 # It give a chance to reuse a delta chain unrelated to the current
1192 1198 # revisions instead of starting our own. Without such re-use,
1193 1199 # topological branches would keep reopening new full chains.
1194 1200 # Creating more and more snapshot as the repository grow.
1195 1201 full = [
1196 1202 r for r in self.snapshot_cache.snapshots[nullrev] if snapfloor <= r
1197 1203 ]
1198 1204 yield tuple(sorted(full))
1199 1205
1200 1206 def _iter_snapshots(self):
1201 1207 assert self.revlog.delta_config.sparse_revlog
1202 1208 self.current_stage = _STAGE_SNAPSHOT
1203 1209 good = None
1204 1210 groups = self._iter_snapshots_base()
1205 1211 for candidates in groups:
1206 1212 good = yield candidates
1207 1213 if good is not None:
1208 1214 break
1209 1215 # if we have a refinable value, try to refine it
1210 1216 if (
1211 1217 good is not None
1212 1218 and good not in (self.p1, self.p2)
1213 1219 and self.revlog.issnapshot(good)
1214 1220 ):
1215 1221 assert self.current_stage == _STAGE_SNAPSHOT
1216 1222 # refine snapshot down
1217 1223 previous = None
1218 1224 while previous != good:
1219 1225 previous = good
1220 1226 base = self.revlog.deltaparent(good)
1221 1227 if base == nullrev:
1222 1228 break
1223 1229 good = yield (base,)
1224 1230 # refine snapshot up
1225 1231 if not self.snapshot_cache.snapshots:
1226 1232 self.snapshot_cache.update(self.revlog, good + 1)
1227 1233 previous = None
1228 1234 while good != previous:
1229 1235 previous = good
1230 1236 children = tuple(
1231 1237 sorted(c for c in self.snapshot_cache.snapshots[good])
1232 1238 )
1233 1239 good = yield children
1234 1240 yield None
1235 1241
1236 1242 def _iter_groups(self):
1237 1243 good = None
1238 1244 for group in self._iter_parents():
1239 1245 good = yield group
1240 1246 if good is not None:
1241 1247 break
1242 1248 else:
1243 1249 assert good is None
1244 1250 assert self.revlog.delta_config.sparse_revlog
1245 1251 # If sparse revlog is enabled, we can try to refine the
1246 1252 # available deltas
1247 1253 iter_snap = self._iter_snapshots()
1248 1254 group = iter_snap.send(None)
1249 1255 while group is not None:
1250 1256 good = yield group
1251 1257 group = iter_snap.send(good)
1252 1258 yield None
1253 1259
1254 1260
1255 1261 class SnapshotCache:
1256 1262 __slots__ = ('snapshots', '_start_rev', '_end_rev')
1257 1263
1258 1264 def __init__(self):
1259 1265 self.snapshots = collections.defaultdict(set)
1260 1266 self._start_rev = None
1261 1267 self._end_rev = None
1262 1268
1263 1269 def update(self, revlog, start_rev=0):
1264 1270 """find snapshots from start_rev to tip"""
1265 1271 nb_revs = len(revlog)
1266 1272 end_rev = nb_revs - 1
1267 1273 if start_rev > end_rev:
1268 1274 return # range is empty
1269 1275
1270 1276 if self._start_rev is None:
1271 1277 assert self._end_rev is None
1272 1278 self._update(revlog, start_rev, end_rev)
1273 1279 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1274 1280 if start_rev < self._start_rev:
1275 1281 self._update(revlog, start_rev, self._start_rev - 1)
1276 1282 if self._end_rev < end_rev:
1277 1283 self._update(revlog, self._end_rev + 1, end_rev)
1278 1284
1279 1285 if self._start_rev is None:
1280 1286 assert self._end_rev is None
1281 1287 self._end_rev = end_rev
1282 1288 self._start_rev = start_rev
1283 1289 else:
1284 1290 self._start_rev = min(self._start_rev, start_rev)
1285 1291 self._end_rev = max(self._end_rev, end_rev)
1286 1292 assert self._start_rev <= self._end_rev, (
1287 1293 self._start_rev,
1288 1294 self._end_rev,
1289 1295 )
1290 1296
1291 1297 def _update(self, revlog, start_rev, end_rev):
1292 1298 """internal method that actually do update content"""
1293 1299 assert self._start_rev is None or (
1294 1300 start_rev < self._start_rev or start_rev > self._end_rev
1295 1301 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1296 1302 assert self._start_rev is None or (
1297 1303 end_rev < self._start_rev or end_rev > self._end_rev
1298 1304 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1299 1305 cache = self.snapshots
1300 1306 if hasattr(revlog.index, 'findsnapshots'):
1301 1307 revlog.index.findsnapshots(cache, start_rev, end_rev)
1302 1308 else:
1303 1309 deltaparent = revlog.deltaparent
1304 1310 issnapshot = revlog.issnapshot
1305 1311 for rev in revlog.revs(start_rev, end_rev):
1306 1312 if issnapshot(rev):
1307 1313 cache[deltaparent(rev)].add(rev)
1308 1314
1309 1315
1310 1316 class deltacomputer:
1311 1317 """object capable of computing delta and finding delta for multiple revision
1312 1318
1313 1319 This object is meant to compute and find multiple delta applied to the same
1314 1320 revlog.
1315 1321 """
1316 1322
1317 1323 def __init__(
1318 1324 self,
1319 1325 revlog,
1320 1326 write_debug=None,
1321 1327 debug_search=False,
1322 1328 debug_info=None,
1323 1329 ):
1324 1330 self.revlog = revlog
1325 1331 self._write_debug = write_debug
1326 1332 if write_debug is None:
1327 1333 self._debug_search = False
1328 1334 else:
1329 1335 self._debug_search = debug_search
1330 1336 self._debug_info = debug_info
1331 1337 self._snapshot_cache = SnapshotCache()
1332 1338
1333 1339 @property
1334 1340 def _gather_debug(self):
1335 1341 return self._write_debug is not None or self._debug_info is not None
1336 1342
1337 1343 def buildtext(self, revinfo):
1338 1344 """Builds a fulltext version of a revision
1339 1345
1340 1346 revinfo: revisioninfo instance that contains all needed info
1341 1347 """
1342 1348 btext = revinfo.btext
1343 1349 if btext[0] is not None:
1344 1350 return btext[0]
1345 1351
1346 1352 revlog = self.revlog
1347 1353 cachedelta = revinfo.cachedelta
1348 1354 baserev = cachedelta[0]
1349 1355 delta = cachedelta[1]
1350 1356
1351 1357 fulltext = btext[0] = _textfromdelta(
1352 1358 revlog,
1353 1359 baserev,
1354 1360 delta,
1355 1361 revinfo.p1,
1356 1362 revinfo.p2,
1357 1363 revinfo.flags,
1358 1364 revinfo.node,
1359 1365 )
1360 1366 return fulltext
1361 1367
1362 1368 def _builddeltadiff(self, base, revinfo):
1363 1369 revlog = self.revlog
1364 1370 t = self.buildtext(revinfo)
1365 1371 if revlog.iscensored(base):
1366 1372 # deltas based on a censored revision must replace the
1367 1373 # full content in one patch, so delta works everywhere
1368 1374 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1369 1375 delta = header + t
1370 1376 else:
1371 1377 ptext = revlog.rawdata(base)
1372 1378 delta = mdiff.textdiff(ptext, t)
1373 1379
1374 1380 return delta
1375 1381
1376 1382 def _builddeltainfo(
1377 1383 self, revinfo, base, target_rev=None, as_snapshot=False
1378 1384 ):
1379 1385 # can we use the cached delta?
1380 1386 revlog = self.revlog
1381 1387 chainbase = revlog.chainbase(base)
1382 1388 if revlog.delta_config.general_delta:
1383 1389 deltabase = base
1384 1390 else:
1385 1391 if target_rev is not None and base != target_rev - 1:
1386 1392 msg = (
1387 1393 b'general delta cannot use delta for something else '
1388 1394 b'than `prev`: %d<-%d'
1389 1395 )
1390 1396 msg %= (base, target_rev)
1391 1397 raise error.ProgrammingError(msg)
1392 1398 deltabase = chainbase
1393 1399 snapshotdepth = None
1394 1400 if revlog.delta_config.sparse_revlog and deltabase == nullrev:
1395 1401 snapshotdepth = 0
1396 1402 elif revlog.delta_config.sparse_revlog and as_snapshot:
1397 1403 assert revlog.issnapshot(deltabase)
1398 1404 # A delta chain should always be one full snapshot,
1399 1405 # zero or more semi-snapshots, and zero or more deltas
1400 1406 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1401 1407 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1402 1408 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1403 1409 delta = None
1404 1410 if revinfo.cachedelta:
1405 1411 cachebase = revinfo.cachedelta[0]
1406 1412 # check if the diff still apply
1407 1413 currentbase = cachebase
1408 1414 while (
1409 1415 currentbase != nullrev
1410 1416 and currentbase != base
1411 1417 and self.revlog.length(currentbase) == 0
1412 1418 ):
1413 1419 currentbase = self.revlog.deltaparent(currentbase)
1414 1420 if self.revlog.delta_config.lazy_delta and currentbase == base:
1415 1421 delta = revinfo.cachedelta[1]
1416 1422 if delta is None:
1417 1423 delta = self._builddeltadiff(base, revinfo)
1418 1424 if self._debug_search:
1419 1425 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1420 1426 msg %= len(delta)
1421 1427 self._write_debug(msg)
1422 1428 # snapshotdept need to be neither None nor 0 level snapshot
1423 1429 if revlog.delta_config.upper_bound_comp is not None and snapshotdepth:
1424 1430 lowestrealisticdeltalen = (
1425 1431 len(delta) // revlog.delta_config.upper_bound_comp
1426 1432 )
1427 1433 snapshotlimit = revinfo.textlen >> snapshotdepth
1428 1434 if self._debug_search:
1429 1435 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1430 1436 msg %= lowestrealisticdeltalen
1431 1437 self._write_debug(msg)
1432 1438 if snapshotlimit < lowestrealisticdeltalen:
1433 1439 if self._debug_search:
1434 1440 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1435 1441 self._write_debug(msg)
1436 1442 return None
1437 1443 if revlog.length(base) < lowestrealisticdeltalen:
1438 1444 if self._debug_search:
1439 1445 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1440 1446 self._write_debug(msg)
1441 1447 return None
1442 1448 header, data = revlog._inner.compress(delta)
1443 1449 deltalen = len(header) + len(data)
1444 1450 offset = revlog.end(len(revlog) - 1)
1445 1451 dist = deltalen + offset - revlog.start(chainbase)
1446 1452 chainlen, compresseddeltalen = revlog._chaininfo(base)
1447 1453 chainlen += 1
1448 1454 compresseddeltalen += deltalen
1449 1455
1450 1456 return _deltainfo(
1451 1457 dist,
1452 1458 deltalen,
1453 1459 (header, data),
1454 1460 deltabase,
1455 1461 chainbase,
1456 1462 chainlen,
1457 1463 compresseddeltalen,
1458 1464 snapshotdepth,
1459 1465 )
1460 1466
1461 1467 def _fullsnapshotinfo(self, revinfo, curr):
1462 1468 rawtext = self.buildtext(revinfo)
1463 1469 data = self.revlog._inner.compress(rawtext)
1464 1470 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1465 1471 deltabase = chainbase = curr
1466 1472 snapshotdepth = 0
1467 1473 chainlen = 1
1468 1474
1469 1475 return _deltainfo(
1470 1476 dist,
1471 1477 deltalen,
1472 1478 data,
1473 1479 deltabase,
1474 1480 chainbase,
1475 1481 chainlen,
1476 1482 compresseddeltalen,
1477 1483 snapshotdepth,
1478 1484 )
1479 1485
1480 1486 def finddeltainfo(self, revinfo, excluded_bases=None, target_rev=None):
1481 1487 """Find an acceptable delta against a candidate revision
1482 1488
1483 1489 revinfo: information about the revision (instance of _revisioninfo)
1484 1490
1485 1491 Returns the first acceptable candidate revision, as ordered by
1486 1492 _candidategroups
1487 1493
1488 1494 If no suitable deltabase is found, we return delta info for a full
1489 1495 snapshot.
1490 1496
1491 1497 `excluded_bases` is an optional set of revision that cannot be used as
1492 1498 a delta base. Use this to recompute delta suitable in censor or strip
1493 1499 context.
1494 1500 """
1495 1501 if target_rev is None:
1496 1502 target_rev = len(self.revlog)
1497 1503
1498 1504 gather_debug = self._gather_debug
1499 1505 cachedelta = revinfo.cachedelta
1500 1506 revlog = self.revlog
1501 1507 p1r = p2r = None
1502 1508
1503 1509 if excluded_bases is None:
1504 1510 excluded_bases = set()
1505 1511
1506 1512 if gather_debug:
1507 1513 start = util.timer()
1508 1514 dbg = self._one_dbg_data()
1509 1515 dbg['revision'] = target_rev
1510 1516 p1r = revlog.rev(revinfo.p1)
1511 1517 p2r = revlog.rev(revinfo.p2)
1512 1518 if p1r != nullrev:
1513 1519 p1_chain_len = revlog._chaininfo(p1r)[0]
1514 1520 else:
1515 1521 p1_chain_len = -1
1516 1522 if p2r != nullrev:
1517 1523 p2_chain_len = revlog._chaininfo(p2r)[0]
1518 1524 else:
1519 1525 p2_chain_len = -1
1520 1526 dbg['p1-chain-len'] = p1_chain_len
1521 1527 dbg['p2-chain-len'] = p2_chain_len
1522 1528
1523 1529 # 1) if the revision is empty, no amount of delta can beat it
1524 1530 #
1525 1531 # 2) no delta for flag processor revision (see "candelta" for why)
1526 1532 # not calling candelta since only one revision needs test, also to
1527 1533 # avoid overhead fetching flags again.
1528 1534 if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1529 1535 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1530 1536 if gather_debug:
1531 1537 end = util.timer()
1532 1538 dbg['duration'] = end - start
1533 1539 dbg[
1534 1540 'delta-base'
1535 1541 ] = deltainfo.base # pytype: disable=attribute-error
1536 1542 dbg['search_round_count'] = 0
1537 1543 dbg['using-cached-base'] = False
1538 1544 dbg['delta_try_count'] = 0
1539 1545 dbg['type'] = b"full"
1540 1546 dbg['snapshot-depth'] = 0
1541 1547 self._dbg_process_data(dbg)
1542 1548 return deltainfo
1543 1549
1544 1550 deltainfo = None
1545 1551
1546 1552 # If this source delta are to be forcibly reuse, let us comply early.
1547 1553 if (
1548 1554 revlog.delta_config.general_delta
1549 1555 and revinfo.cachedelta is not None
1550 1556 and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
1551 1557 ):
1552 1558 base = revinfo.cachedelta[0]
1553 1559 if base == nullrev:
1554 1560 dbg_type = b"full"
1555 1561 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1556 1562 if gather_debug:
1557 1563 snapshotdepth = 0
1558 1564 elif base not in excluded_bases:
1559 1565 delta = revinfo.cachedelta[1]
1560 1566 header, data = revlog.compress(delta)
1561 1567 deltalen = len(header) + len(data)
1562 1568 if gather_debug:
1563 1569 offset = revlog.end(len(revlog) - 1)
1564 1570 chainbase = revlog.chainbase(base)
1565 1571 distance = deltalen + offset - revlog.start(chainbase)
1566 1572 chainlen, compresseddeltalen = revlog._chaininfo(base)
1567 1573 chainlen += 1
1568 1574 compresseddeltalen += deltalen
1569 1575 if base == p1r or base == p2r:
1570 1576 dbg_type = b"delta"
1571 1577 snapshotdepth = None
1572 1578 elif not revlog.issnapshot(base):
1573 1579 snapshotdepth = None
1574 1580 else:
1575 1581 dbg_type = b"snapshot"
1576 1582 snapshotdepth = revlog.snapshotdepth(base) + 1
1577 1583 else:
1578 1584 distance = None
1579 1585 chainbase = None
1580 1586 chainlen = None
1581 1587 compresseddeltalen = None
1582 1588 snapshotdepth = None
1583 1589 deltainfo = _deltainfo(
1584 1590 distance=distance,
1585 1591 deltalen=deltalen,
1586 1592 data=(header, data),
1587 1593 base=base,
1588 1594 chainbase=chainbase,
1589 1595 chainlen=chainlen,
1590 1596 compresseddeltalen=compresseddeltalen,
1591 1597 snapshotdepth=snapshotdepth,
1592 1598 )
1593 1599
1594 1600 if deltainfo is not None:
1595 1601 if gather_debug:
1596 1602 end = util.timer()
1597 1603 dbg['duration'] = end - start
1598 1604 dbg[
1599 1605 'delta-base'
1600 1606 ] = deltainfo.base # pytype: disable=attribute-error
1601 1607 dbg['search_round_count'] = 0
1602 1608 dbg['using-cached-base'] = True
1603 1609 dbg['delta_try_count'] = 0
1604 1610 dbg['type'] = b"full"
1605 1611 if snapshotdepth is None:
1606 1612 dbg['snapshot-depth'] = -1
1607 1613 else:
1608 1614 dbg['snapshot-depth'] = snapshotdepth
1609 1615 self._dbg_process_data(dbg)
1610 1616 return deltainfo
1611 1617
1612 1618 # count the number of different delta we tried (for debug purpose)
1613 1619 dbg_try_count = 0
1614 1620 # count the number of "search round" we did. (for debug purpose)
1615 1621 dbg_try_rounds = 0
1616 1622 dbg_type = b'unknown'
1617 1623
1618 1624 if p1r is None:
1619 1625 p1r = revlog.rev(revinfo.p1)
1620 1626 p2r = revlog.rev(revinfo.p2)
1621 1627
1622 1628 if self._debug_search:
1623 1629 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1624 1630 msg %= target_rev
1625 1631 self._write_debug(msg)
1626 1632
1627 1633 # should we try to build a delta?
1628 1634 if not (len(self.revlog) and self.revlog._storedeltachains):
1629 1635 search_cls = _NoDeltaSearch
1630 1636 elif self.revlog.delta_config.sparse_revlog:
1631 1637 search_cls = _SparseDeltaSearch
1632 1638 elif self.revlog.delta_config.general_delta:
1633 1639 search_cls = _GeneralDeltaSearch
1634 1640 else:
1635 1641 # before general delta, there is only one possible delta base
1636 1642 search_cls = _PrevDeltaSearch
1637 1643
1638 1644 search = search_cls(
1639 1645 self.revlog,
1640 1646 revinfo,
1641 1647 p1r,
1642 1648 p2r,
1643 1649 cachedelta,
1644 1650 excluded_bases,
1645 1651 target_rev,
1646 1652 snapshot_cache=self._snapshot_cache,
1647 1653 )
1648 1654
1649 1655 while not search.done:
1650 1656 current_group = search.current_group
1651 1657 # current_group can be `None`, but not is search.done is False
1652 1658 # We add this assert to help pytype
1653 1659 assert current_group is not None
1654 1660 candidaterevs = current_group
1655 1661 dbg_try_rounds += 1
1656 1662 if self._debug_search:
1657 1663 prev = None
1658 1664 if deltainfo is not None:
1659 1665 prev = deltainfo.base
1660 1666
1661 1667 if (
1662 1668 cachedelta is not None
1663 1669 and len(candidaterevs) == 1
1664 1670 and cachedelta[0] in candidaterevs
1665 1671 ):
1666 1672 round_type = b"cached-delta"
1667 1673 elif p1r in candidaterevs or p2r in candidaterevs:
1668 1674 round_type = b"parents"
1669 1675 elif prev is not None and all(c < prev for c in candidaterevs):
1670 1676 round_type = b"refine-down"
1671 1677 elif prev is not None and all(c > prev for c in candidaterevs):
1672 1678 round_type = b"refine-up"
1673 1679 else:
1674 1680 round_type = b"search-down"
1675 1681 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1676 1682 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1677 1683 self._write_debug(msg)
1678 1684 nominateddeltas = []
1679 1685 if deltainfo is not None:
1680 1686 if self._debug_search:
1681 1687 msg = (
1682 1688 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1683 1689 )
1684 1690 msg %= (deltainfo.base, deltainfo.deltalen)
1685 1691 self._write_debug(msg)
1686 1692 # if we already found a good delta,
1687 1693 # challenge it against refined candidates
1688 1694 nominateddeltas.append(deltainfo)
1689 1695 for candidaterev in candidaterevs:
1690 1696 if self._debug_search:
1691 1697 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1692 1698 msg %= candidaterev
1693 1699 self._write_debug(msg)
1694 1700 candidate_type = None
1695 1701 if candidaterev == p1r:
1696 1702 candidate_type = b"p1"
1697 1703 elif candidaterev == p2r:
1698 1704 candidate_type = b"p2"
1699 1705 elif self.revlog.issnapshot(candidaterev):
1700 1706 candidate_type = b"snapshot-%d"
1701 1707 candidate_type %= self.revlog.snapshotdepth(
1702 1708 candidaterev
1703 1709 )
1704 1710
1705 1711 if candidate_type is not None:
1706 1712 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1707 1713 msg %= candidate_type
1708 1714 self._write_debug(msg)
1709 1715 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1710 1716 msg %= self.revlog.length(candidaterev)
1711 1717 self._write_debug(msg)
1712 1718 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1713 1719 msg %= self.revlog.deltaparent(candidaterev)
1714 1720 self._write_debug(msg)
1715 1721
1716 1722 dbg_try_count += 1
1717 1723
1718 1724 if self._debug_search:
1719 1725 delta_start = util.timer()
1720 1726 candidatedelta = self._builddeltainfo(
1721 1727 revinfo,
1722 1728 candidaterev,
1723 1729 target_rev=target_rev,
1724 1730 as_snapshot=search.current_stage == _STAGE_SNAPSHOT,
1725 1731 )
1726 1732 if self._debug_search:
1727 1733 delta_end = util.timer()
1728 1734 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1729 1735 msg %= delta_end - delta_start
1730 1736 self._write_debug(msg)
1731 1737 if candidatedelta is not None:
1732 1738 if search.is_good_delta_info(candidatedelta):
1733 1739 if self._debug_search:
1734 1740 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1735 1741 msg %= candidatedelta.deltalen
1736 1742 self._write_debug(msg)
1737 1743 nominateddeltas.append(candidatedelta)
1738 1744 elif self._debug_search:
1739 1745 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1740 1746 msg %= candidatedelta.deltalen
1741 1747 self._write_debug(msg)
1742 1748 elif self._debug_search:
1743 1749 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1744 1750 self._write_debug(msg)
1745 1751 if nominateddeltas:
1746 1752 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1747 1753 search.next_group(deltainfo)
1748 1754
1749 1755 if deltainfo is None:
1750 1756 dbg_type = b"full"
1751 1757 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1752 1758 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1753 1759 dbg_type = b"snapshot"
1754 1760 else:
1755 1761 dbg_type = b"delta"
1756 1762
1757 1763 if gather_debug:
1758 1764 end = util.timer()
1759 1765 if dbg_type == b'full':
1760 1766 used_cached = (
1761 1767 cachedelta is not None
1762 1768 and dbg_try_rounds == 0
1763 1769 and dbg_try_count == 0
1764 1770 and cachedelta[0] == nullrev
1765 1771 )
1766 1772 else:
1767 1773 used_cached = (
1768 1774 cachedelta is not None
1769 1775 and dbg_try_rounds == 1
1770 1776 and dbg_try_count == 1
1771 1777 and deltainfo.base == cachedelta[0]
1772 1778 )
1773 1779 dbg['duration'] = end - start
1774 1780 dbg[
1775 1781 'delta-base'
1776 1782 ] = deltainfo.base # pytype: disable=attribute-error
1777 1783 dbg['search_round_count'] = dbg_try_rounds
1778 1784 dbg['using-cached-base'] = used_cached
1779 1785 dbg['delta_try_count'] = dbg_try_count
1780 1786 dbg['type'] = dbg_type
1781 1787 if (
1782 1788 deltainfo.snapshotdepth # pytype: disable=attribute-error
1783 1789 is not None
1784 1790 ):
1785 1791 dbg[
1786 1792 'snapshot-depth'
1787 1793 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1788 1794 else:
1789 1795 dbg['snapshot-depth'] = -1
1790 1796 self._dbg_process_data(dbg)
1791 1797 return deltainfo
1792 1798
1793 1799 def _one_dbg_data(self):
1794 1800 dbg = {
1795 1801 'duration': None,
1796 1802 'revision': None,
1797 1803 'delta-base': None,
1798 1804 'search_round_count': None,
1799 1805 'using-cached-base': None,
1800 1806 'delta_try_count': None,
1801 1807 'type': None,
1802 1808 'p1-chain-len': None,
1803 1809 'p2-chain-len': None,
1804 1810 'snapshot-depth': None,
1805 1811 'target-revlog': None,
1806 1812 }
1807 1813 target_revlog = b"UNKNOWN"
1808 1814 target_type = self.revlog.target[0]
1809 1815 target_key = self.revlog.target[1]
1810 1816 if target_type == KIND_CHANGELOG:
1811 1817 target_revlog = b'CHANGELOG:'
1812 1818 elif target_type == KIND_MANIFESTLOG:
1813 1819 target_revlog = b'MANIFESTLOG:'
1814 1820 if target_key:
1815 1821 target_revlog += b'%s:' % target_key
1816 1822 elif target_type == KIND_FILELOG:
1817 1823 target_revlog = b'FILELOG:'
1818 1824 if target_key:
1819 1825 target_revlog += b'%s:' % target_key
1820 1826 dbg['target-revlog'] = target_revlog
1821 1827 return dbg
1822 1828
1823 1829 def _dbg_process_data(self, dbg):
1824 1830 if self._debug_info is not None:
1825 1831 self._debug_info.append(dbg)
1826 1832
1827 1833 if self._write_debug is not None:
1828 1834 msg = (
1829 1835 b"DBG-DELTAS:"
1830 1836 b" %-12s"
1831 1837 b" rev=%d:"
1832 1838 b" delta-base=%d"
1833 1839 b" is-cached=%d"
1834 1840 b" - search-rounds=%d"
1835 1841 b" try-count=%d"
1836 1842 b" - delta-type=%-6s"
1837 1843 b" snap-depth=%d"
1838 1844 b" - p1-chain-length=%d"
1839 1845 b" p2-chain-length=%d"
1840 1846 b" - duration=%f"
1841 1847 b"\n"
1842 1848 )
1843 1849 msg %= (
1844 1850 dbg["target-revlog"],
1845 1851 dbg["revision"],
1846 1852 dbg["delta-base"],
1847 1853 dbg["using-cached-base"],
1848 1854 dbg["search_round_count"],
1849 1855 dbg["delta_try_count"],
1850 1856 dbg["type"],
1851 1857 dbg["snapshot-depth"],
1852 1858 dbg["p1-chain-len"],
1853 1859 dbg["p2-chain-len"],
1854 1860 dbg["duration"],
1855 1861 )
1856 1862 self._write_debug(msg)
1857 1863
1858 1864
1859 1865 def delta_compression(default_compression_header, deltainfo):
1860 1866 """return (COMPRESSION_MODE, deltainfo)
1861 1867
1862 1868 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1863 1869 compression.
1864 1870 """
1865 1871 h, d = deltainfo.data
1866 1872 compression_mode = COMP_MODE_INLINE
1867 1873 if not h and not d:
1868 1874 # not data to store at all... declare them uncompressed
1869 1875 compression_mode = COMP_MODE_PLAIN
1870 1876 elif not h:
1871 1877 t = d[0:1]
1872 1878 if t == b'\0':
1873 1879 compression_mode = COMP_MODE_PLAIN
1874 1880 elif t == default_compression_header:
1875 1881 compression_mode = COMP_MODE_DEFAULT
1876 1882 elif h == b'u':
1877 1883 # we have a more efficient way to declare uncompressed
1878 1884 h = b''
1879 1885 compression_mode = COMP_MODE_PLAIN
1880 1886 deltainfo = drop_u_compression(deltainfo)
1881 1887 return compression_mode, deltainfo
General Comments 0
You need to be logged in to leave comments. Login now