##// END OF EJS Templates
delta-search: fix crash caused by unbound variable...
Raphaël Gomès -
r52402:e7bbeaa4 stable
parent child Browse files
Show More
@@ -1,1883 +1,1884 b''
1 1 # revlogdeltas.py - Logic around delta computation for revlog
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 # Copyright 2018 Octobus <contact@octobus.net>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8 """Helper class to compute deltas stored inside revlogs"""
9 9
10 10
11 11 import abc
12 12 import collections
13 13 import struct
14 14
15 15 # import stuff from node for others to import from revlog
16 16 from ..node import nullrev
17 17 from ..i18n import _
18 18
19 19 from .constants import (
20 20 COMP_MODE_DEFAULT,
21 21 COMP_MODE_INLINE,
22 22 COMP_MODE_PLAIN,
23 23 DELTA_BASE_REUSE_FORCE,
24 24 DELTA_BASE_REUSE_NO,
25 25 KIND_CHANGELOG,
26 26 KIND_FILELOG,
27 27 KIND_MANIFESTLOG,
28 28 REVIDX_ISCENSORED,
29 29 REVIDX_RAWTEXT_CHANGING_FLAGS,
30 30 )
31 31
32 32 from ..thirdparty import attr
33 33
34 34 from .. import (
35 35 error,
36 36 mdiff,
37 37 util,
38 38 )
39 39
40 40 from . import flagutil
41 41
42 42 # maximum <delta-chain-data>/<revision-text-length> ratio
43 43 LIMIT_DELTA2TEXT = 2
44 44
45 45
46 46 class _testrevlog:
47 47 """minimalist fake revlog to use in doctests"""
48 48
49 49 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
50 50 """data is an list of revision payload boundaries"""
51 51 from .. import revlog
52 52
53 53 self._data = data
54 54 self.data_config = revlog.DataConfig()
55 55 self.data_config.sr_density_threshold = density
56 56 self.data_config.sr_min_gap_size = mingap
57 57 self.delta_config = revlog.DeltaConfig()
58 58 self.feature_config = revlog.FeatureConfig()
59 59 self._snapshot = set(snapshot)
60 60 self.index = None
61 61
62 62 def start(self, rev):
63 63 if rev == nullrev:
64 64 return 0
65 65 if rev == 0:
66 66 return 0
67 67 return self._data[rev - 1]
68 68
69 69 def end(self, rev):
70 70 if rev == nullrev:
71 71 return 0
72 72 return self._data[rev]
73 73
74 74 def length(self, rev):
75 75 return self.end(rev) - self.start(rev)
76 76
77 77 def __len__(self):
78 78 return len(self._data)
79 79
80 80 def issnapshot(self, rev):
81 81 if rev == nullrev:
82 82 return True
83 83 return rev in self._snapshot
84 84
85 85
86 86 def slicechunk(revlog, revs, targetsize=None):
87 87 """slice revs to reduce the amount of unrelated data to be read from disk.
88 88
89 89 ``revs`` is sliced into groups that should be read in one time.
90 90 Assume that revs are sorted.
91 91
92 92 The initial chunk is sliced until the overall density (payload/chunks-span
93 93 ratio) is above `revlog.data_config.sr_density_threshold`. No gap smaller
94 94 than `revlog.data_config.sr_min_gap_size` is skipped.
95 95
96 96 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
97 97 For consistency with other slicing choice, this limit won't go lower than
98 98 `revlog.data_config.sr_min_gap_size`.
99 99
100 100 If individual revisions chunk are larger than this limit, they will still
101 101 be raised individually.
102 102
103 103 >>> data = [
104 104 ... 5, #00 (5)
105 105 ... 10, #01 (5)
106 106 ... 12, #02 (2)
107 107 ... 12, #03 (empty)
108 108 ... 27, #04 (15)
109 109 ... 31, #05 (4)
110 110 ... 31, #06 (empty)
111 111 ... 42, #07 (11)
112 112 ... 47, #08 (5)
113 113 ... 47, #09 (empty)
114 114 ... 48, #10 (1)
115 115 ... 51, #11 (3)
116 116 ... 74, #12 (23)
117 117 ... 85, #13 (11)
118 118 ... 86, #14 (1)
119 119 ... 91, #15 (5)
120 120 ... ]
121 121 >>> revlog = _testrevlog(data, snapshot=range(16))
122 122
123 123 >>> list(slicechunk(revlog, list(range(16))))
124 124 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
125 125 >>> list(slicechunk(revlog, [0, 15]))
126 126 [[0], [15]]
127 127 >>> list(slicechunk(revlog, [0, 11, 15]))
128 128 [[0], [11], [15]]
129 129 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
130 130 [[0], [11, 13, 15]]
131 131 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
132 132 [[1, 2], [5, 8, 10, 11], [14]]
133 133
134 134 Slicing with a maximum chunk size
135 135 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
136 136 [[0], [11], [13], [15]]
137 137 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
138 138 [[0], [11], [13, 15]]
139 139
140 140 Slicing involving nullrev
141 141 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
142 142 [[-1, 0], [11], [13, 15]]
143 143 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
144 144 [[-1], [13], [15]]
145 145 """
146 146 if targetsize is not None:
147 147 targetsize = max(targetsize, revlog.data_config.sr_min_gap_size)
148 148 # targetsize should not be specified when evaluating delta candidates:
149 149 # * targetsize is used to ensure we stay within specification when reading,
150 150 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
151 151 if densityslicing is None:
152 152 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
153 153 for chunk in densityslicing(
154 154 revs,
155 155 revlog.data_config.sr_density_threshold,
156 156 revlog.data_config.sr_min_gap_size,
157 157 ):
158 158 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
159 159 yield subchunk
160 160
161 161
162 162 def _slicechunktosize(revlog, revs, targetsize=None):
163 163 """slice revs to match the target size
164 164
165 165 This is intended to be used on chunk that density slicing selected by that
166 166 are still too large compared to the read garantee of revlog. This might
167 167 happens when "minimal gap size" interrupted the slicing or when chain are
168 168 built in a way that create large blocks next to each other.
169 169
170 170 >>> data = [
171 171 ... 3, #0 (3)
172 172 ... 5, #1 (2)
173 173 ... 6, #2 (1)
174 174 ... 8, #3 (2)
175 175 ... 8, #4 (empty)
176 176 ... 11, #5 (3)
177 177 ... 12, #6 (1)
178 178 ... 13, #7 (1)
179 179 ... 14, #8 (1)
180 180 ... ]
181 181
182 182 == All snapshots cases ==
183 183 >>> revlog = _testrevlog(data, snapshot=range(9))
184 184
185 185 Cases where chunk is already small enough
186 186 >>> list(_slicechunktosize(revlog, [0], 3))
187 187 [[0]]
188 188 >>> list(_slicechunktosize(revlog, [6, 7], 3))
189 189 [[6, 7]]
190 190 >>> list(_slicechunktosize(revlog, [0], None))
191 191 [[0]]
192 192 >>> list(_slicechunktosize(revlog, [6, 7], None))
193 193 [[6, 7]]
194 194
195 195 cases where we need actual slicing
196 196 >>> list(_slicechunktosize(revlog, [0, 1], 3))
197 197 [[0], [1]]
198 198 >>> list(_slicechunktosize(revlog, [1, 3], 3))
199 199 [[1], [3]]
200 200 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
201 201 [[1, 2], [3]]
202 202 >>> list(_slicechunktosize(revlog, [3, 5], 3))
203 203 [[3], [5]]
204 204 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
205 205 [[3], [5]]
206 206 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
207 207 [[5], [6, 7, 8]]
208 208 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
209 209 [[0], [1, 2], [3], [5], [6, 7, 8]]
210 210
211 211 Case with too large individual chunk (must return valid chunk)
212 212 >>> list(_slicechunktosize(revlog, [0, 1], 2))
213 213 [[0], [1]]
214 214 >>> list(_slicechunktosize(revlog, [1, 3], 1))
215 215 [[1], [3]]
216 216 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
217 217 [[3], [5]]
218 218
219 219 == No Snapshot cases ==
220 220 >>> revlog = _testrevlog(data)
221 221
222 222 Cases where chunk is already small enough
223 223 >>> list(_slicechunktosize(revlog, [0], 3))
224 224 [[0]]
225 225 >>> list(_slicechunktosize(revlog, [6, 7], 3))
226 226 [[6, 7]]
227 227 >>> list(_slicechunktosize(revlog, [0], None))
228 228 [[0]]
229 229 >>> list(_slicechunktosize(revlog, [6, 7], None))
230 230 [[6, 7]]
231 231
232 232 cases where we need actual slicing
233 233 >>> list(_slicechunktosize(revlog, [0, 1], 3))
234 234 [[0], [1]]
235 235 >>> list(_slicechunktosize(revlog, [1, 3], 3))
236 236 [[1], [3]]
237 237 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
238 238 [[1], [2, 3]]
239 239 >>> list(_slicechunktosize(revlog, [3, 5], 3))
240 240 [[3], [5]]
241 241 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
242 242 [[3], [4, 5]]
243 243 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
244 244 [[5], [6, 7, 8]]
245 245 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
246 246 [[0], [1, 2], [3], [5], [6, 7, 8]]
247 247
248 248 Case with too large individual chunk (must return valid chunk)
249 249 >>> list(_slicechunktosize(revlog, [0, 1], 2))
250 250 [[0], [1]]
251 251 >>> list(_slicechunktosize(revlog, [1, 3], 1))
252 252 [[1], [3]]
253 253 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
254 254 [[3], [5]]
255 255
256 256 == mixed case ==
257 257 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
258 258 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
259 259 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
260 260 """
261 261 assert targetsize is None or 0 <= targetsize
262 262 startdata = revlog.start(revs[0])
263 263 enddata = revlog.end(revs[-1])
264 264 fullspan = enddata - startdata
265 265 if targetsize is None or fullspan <= targetsize:
266 266 yield revs
267 267 return
268 268
269 269 startrevidx = 0
270 270 endrevidx = 1
271 271 iterrevs = enumerate(revs)
272 272 next(iterrevs) # skip first rev.
273 273 # first step: get snapshots out of the way
274 274 for idx, r in iterrevs:
275 275 span = revlog.end(r) - startdata
276 276 snapshot = revlog.issnapshot(r)
277 277 if span <= targetsize and snapshot:
278 278 endrevidx = idx + 1
279 279 else:
280 280 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
281 281 if chunk:
282 282 yield chunk
283 283 startrevidx = idx
284 284 startdata = revlog.start(r)
285 285 endrevidx = idx + 1
286 286 if not snapshot:
287 287 break
288 288
289 289 # for the others, we use binary slicing to quickly converge toward valid
290 290 # chunks (otherwise, we might end up looking for start/end of many
291 291 # revisions). This logic is not looking for the perfect slicing point, it
292 292 # focuses on quickly converging toward valid chunks.
293 293 nbitem = len(revs)
294 294 while (enddata - startdata) > targetsize:
295 295 endrevidx = nbitem
296 296 if nbitem - startrevidx <= 1:
297 297 break # protect against individual chunk larger than limit
298 298 localenddata = revlog.end(revs[endrevidx - 1])
299 299 span = localenddata - startdata
300 300 while span > targetsize:
301 301 if endrevidx - startrevidx <= 1:
302 302 break # protect against individual chunk larger than limit
303 303 endrevidx -= (endrevidx - startrevidx) // 2
304 304 localenddata = revlog.end(revs[endrevidx - 1])
305 305 span = localenddata - startdata
306 306 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
307 307 if chunk:
308 308 yield chunk
309 309 startrevidx = endrevidx
310 310 startdata = revlog.start(revs[startrevidx])
311 311
312 312 chunk = _trimchunk(revlog, revs, startrevidx)
313 313 if chunk:
314 314 yield chunk
315 315
316 316
317 317 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
318 318 """slice revs to reduce the amount of unrelated data to be read from disk.
319 319
320 320 ``revs`` is sliced into groups that should be read in one time.
321 321 Assume that revs are sorted.
322 322
323 323 The initial chunk is sliced until the overall density (payload/chunks-span
324 324 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
325 325 skipped.
326 326
327 327 >>> revlog = _testrevlog([
328 328 ... 5, #00 (5)
329 329 ... 10, #01 (5)
330 330 ... 12, #02 (2)
331 331 ... 12, #03 (empty)
332 332 ... 27, #04 (15)
333 333 ... 31, #05 (4)
334 334 ... 31, #06 (empty)
335 335 ... 42, #07 (11)
336 336 ... 47, #08 (5)
337 337 ... 47, #09 (empty)
338 338 ... 48, #10 (1)
339 339 ... 51, #11 (3)
340 340 ... 74, #12 (23)
341 341 ... 85, #13 (11)
342 342 ... 86, #14 (1)
343 343 ... 91, #15 (5)
344 344 ... ])
345 345
346 346 >>> list(_slicechunktodensity(revlog, list(range(16))))
347 347 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
348 348 >>> list(_slicechunktodensity(revlog, [0, 15]))
349 349 [[0], [15]]
350 350 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
351 351 [[0], [11], [15]]
352 352 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
353 353 [[0], [11, 13, 15]]
354 354 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
355 355 [[1, 2], [5, 8, 10, 11], [14]]
356 356 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
357 357 ... mingapsize=20))
358 358 [[1, 2, 3, 5, 8, 10, 11], [14]]
359 359 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
360 360 ... targetdensity=0.95))
361 361 [[1, 2], [5], [8, 10, 11], [14]]
362 362 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
363 363 ... targetdensity=0.95, mingapsize=12))
364 364 [[1, 2], [5, 8, 10, 11], [14]]
365 365 """
366 366 start = revlog.start
367 367 length = revlog.length
368 368
369 369 if len(revs) <= 1:
370 370 yield revs
371 371 return
372 372
373 373 deltachainspan = segmentspan(revlog, revs)
374 374
375 375 if deltachainspan < mingapsize:
376 376 yield revs
377 377 return
378 378
379 379 readdata = deltachainspan
380 380 chainpayload = sum(length(r) for r in revs)
381 381
382 382 if deltachainspan:
383 383 density = chainpayload / float(deltachainspan)
384 384 else:
385 385 density = 1.0
386 386
387 387 if density >= targetdensity:
388 388 yield revs
389 389 return
390 390
391 391 # Store the gaps in a heap to have them sorted by decreasing size
392 392 gaps = []
393 393 prevend = None
394 394 for i, rev in enumerate(revs):
395 395 revstart = start(rev)
396 396 revlen = length(rev)
397 397
398 398 # Skip empty revisions to form larger holes
399 399 if revlen == 0:
400 400 continue
401 401
402 402 if prevend is not None:
403 403 gapsize = revstart - prevend
404 404 # only consider holes that are large enough
405 405 if gapsize > mingapsize:
406 406 gaps.append((gapsize, i))
407 407
408 408 prevend = revstart + revlen
409 409 # sort the gaps to pop them from largest to small
410 410 gaps.sort()
411 411
412 412 # Collect the indices of the largest holes until the density is acceptable
413 413 selected = []
414 414 while gaps and density < targetdensity:
415 415 gapsize, gapidx = gaps.pop()
416 416
417 417 selected.append(gapidx)
418 418
419 419 # the gap sizes are stored as negatives to be sorted decreasingly
420 420 # by the heap
421 421 readdata -= gapsize
422 422 if readdata > 0:
423 423 density = chainpayload / float(readdata)
424 424 else:
425 425 density = 1.0
426 426 selected.sort()
427 427
428 428 # Cut the revs at collected indices
429 429 previdx = 0
430 430 for idx in selected:
431 431
432 432 chunk = _trimchunk(revlog, revs, previdx, idx)
433 433 if chunk:
434 434 yield chunk
435 435
436 436 previdx = idx
437 437
438 438 chunk = _trimchunk(revlog, revs, previdx)
439 439 if chunk:
440 440 yield chunk
441 441
442 442
443 443 def _trimchunk(revlog, revs, startidx, endidx=None):
444 444 """returns revs[startidx:endidx] without empty trailing revs
445 445
446 446 Doctest Setup
447 447 >>> revlog = _testrevlog([
448 448 ... 5, #0
449 449 ... 10, #1
450 450 ... 12, #2
451 451 ... 12, #3 (empty)
452 452 ... 17, #4
453 453 ... 21, #5
454 454 ... 21, #6 (empty)
455 455 ... ])
456 456
457 457 Contiguous cases:
458 458 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
459 459 [0, 1, 2, 3, 4, 5]
460 460 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
461 461 [0, 1, 2, 3, 4]
462 462 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
463 463 [0, 1, 2]
464 464 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
465 465 [2]
466 466 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
467 467 [3, 4, 5]
468 468 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
469 469 [3, 4]
470 470
471 471 Discontiguous cases:
472 472 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
473 473 [1, 3, 5]
474 474 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
475 475 [1]
476 476 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
477 477 [3, 5]
478 478 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
479 479 [3, 5]
480 480 """
481 481 length = revlog.length
482 482
483 483 if endidx is None:
484 484 endidx = len(revs)
485 485
486 486 # If we have a non-emtpy delta candidate, there are nothing to trim
487 487 if revs[endidx - 1] < len(revlog):
488 488 # Trim empty revs at the end, except the very first revision of a chain
489 489 while (
490 490 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
491 491 ):
492 492 endidx -= 1
493 493
494 494 return revs[startidx:endidx]
495 495
496 496
497 497 def segmentspan(revlog, revs):
498 498 """Get the byte span of a segment of revisions
499 499
500 500 revs is a sorted array of revision numbers
501 501
502 502 >>> revlog = _testrevlog([
503 503 ... 5, #0
504 504 ... 10, #1
505 505 ... 12, #2
506 506 ... 12, #3 (empty)
507 507 ... 17, #4
508 508 ... ])
509 509
510 510 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
511 511 17
512 512 >>> segmentspan(revlog, [0, 4])
513 513 17
514 514 >>> segmentspan(revlog, [3, 4])
515 515 5
516 516 >>> segmentspan(revlog, [1, 2, 3,])
517 517 7
518 518 >>> segmentspan(revlog, [1, 3])
519 519 7
520 520 """
521 521 if not revs:
522 522 return 0
523 523 end = revlog.end(revs[-1])
524 524 return end - revlog.start(revs[0])
525 525
526 526
527 527 def _textfromdelta(revlog, baserev, delta, p1, p2, flags, expectednode):
528 528 """build full text from a (base, delta) pair and other metadata"""
529 529 # special case deltas which replace entire base; no need to decode
530 530 # base revision. this neatly avoids censored bases, which throw when
531 531 # they're decoded.
532 532 hlen = struct.calcsize(b">lll")
533 533 if delta[:hlen] == mdiff.replacediffheader(
534 534 revlog.rawsize(baserev), len(delta) - hlen
535 535 ):
536 536 fulltext = delta[hlen:]
537 537 else:
538 538 # deltabase is rawtext before changed by flag processors, which is
539 539 # equivalent to non-raw text
540 540 basetext = revlog.revision(baserev)
541 541 fulltext = mdiff.patch(basetext, delta)
542 542
543 543 try:
544 544 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
545 545 if validatehash:
546 546 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
547 547 if flags & REVIDX_ISCENSORED:
548 548 raise error.StorageError(
549 549 _(b'node %s is not censored') % expectednode
550 550 )
551 551 except error.CensoredNodeError:
552 552 # must pass the censored index flag to add censored revisions
553 553 if not flags & REVIDX_ISCENSORED:
554 554 raise
555 555 return fulltext
556 556
557 557
558 558 @attr.s(slots=True, frozen=True)
559 559 class _deltainfo:
560 560 distance = attr.ib()
561 561 deltalen = attr.ib()
562 562 data = attr.ib()
563 563 base = attr.ib()
564 564 chainbase = attr.ib()
565 565 chainlen = attr.ib()
566 566 compresseddeltalen = attr.ib()
567 567 snapshotdepth = attr.ib()
568 568
569 569
570 570 def drop_u_compression(delta):
571 571 """turn into a "u" (no-compression) into no-compression without header
572 572
573 573 This is useful for revlog format that has better compression method.
574 574 """
575 575 assert delta.data[0] == b'u', delta.data[0]
576 576 return _deltainfo(
577 577 delta.distance,
578 578 delta.deltalen - 1,
579 579 (b'', delta.data[1]),
580 580 delta.base,
581 581 delta.chainbase,
582 582 delta.chainlen,
583 583 delta.compresseddeltalen,
584 584 delta.snapshotdepth,
585 585 )
586 586
587 587
588 588 # If a revision's full text is that much bigger than a base candidate full
589 589 # text's, it is very unlikely that it will produce a valid delta. We no longer
590 590 # consider these candidates.
591 591 LIMIT_BASE2TEXT = 500
592 592
593 593 ### stage of the search, used for debug and to select and to adjust some logic.
594 594 # initial stage, next step is unknown
595 595 _STAGE_UNSPECIFIED = "unspecified"
596 596 # trying the cached delta
597 597 _STAGE_CACHED = "cached"
598 598 # trying delta based on parents
599 599 _STAGE_PARENTS = "parents"
600 600 # trying to build a valid snapshot of any level
601 601 _STAGE_SNAPSHOT = "snapshot"
602 602 # trying to build a delta based of the previous revision
603 603 _STAGE_PREV = "prev"
604 604 # trying to build a full snapshot
605 605 _STAGE_FULL = "full"
606 606
607 607
608 608 class _BaseDeltaSearch(abc.ABC):
609 609 """perform the search of a good delta for a single revlog revision
610 610
611 611 note: some of the deltacomputer.finddeltainfo logic should probably move
612 612 here.
613 613 """
614 614
615 615 def __init__(
616 616 self,
617 617 revlog,
618 618 revinfo,
619 619 p1,
620 620 p2,
621 621 cachedelta,
622 622 excluded_bases=None,
623 623 target_rev=None,
624 624 snapshot_cache=None,
625 625 ):
626 626 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner
627 627 # so we should never end up asking such question. Adding the assert as
628 628 # a safe-guard to detect anything that would be fishy in this regard.
629 629 assert (
630 630 cachedelta is None
631 631 or cachedelta[2] != DELTA_BASE_REUSE_FORCE
632 632 or not revlog.delta_config.general_delta
633 633 )
634 634 self.revlog = revlog
635 635 self.revinfo = revinfo
636 636 self.textlen = revinfo.textlen
637 637 self.p1 = p1
638 638 self.p2 = p2
639 639 self.cachedelta = cachedelta
640 640 self.excluded_bases = excluded_bases
641 641 if target_rev is None:
642 642 self.target_rev = len(self.revlog)
643 643 self.target_rev = target_rev
644 644 if snapshot_cache is None:
645 645 # map: base-rev: [snapshot-revs]
646 646 snapshot_cache = SnapshotCache()
647 647 self.snapshot_cache = snapshot_cache
648 648
649 649 self.tested = {nullrev}
650 650
651 651 self.current_stage = _STAGE_UNSPECIFIED
652 652 self.current_group = None
653 653 self._init_group()
654 654
655 655 def is_good_delta_info(self, deltainfo):
656 656 """Returns True if the given delta is good.
657 657
658 658 Good means that it is within the disk span, disk size, and chain length
659 659 bounds that we know to be performant.
660 660 """
661 661 if not self._is_good_delta_info_universal(deltainfo):
662 662 return False
663 663 if not self._is_good_delta_info_chain_quality(deltainfo):
664 664 return False
665 665 return True
666 666
667 667 def _is_good_delta_info_universal(self, deltainfo):
668 668 """Returns True if the given delta is good.
669 669
670 670 This performs generic checks needed by all format variants.
671 671
672 672 This is used by is_good_delta_info.
673 673 """
674 674
675 675 if deltainfo is None:
676 676 return False
677 677
678 678 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner
679 679 # so we should never end up asking such question. Adding the assert as
680 680 # a safe-guard to detect anything that would be fishy in this regard.
681 681 assert (
682 682 self.revinfo.cachedelta is None
683 683 or self.revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE
684 684 or not self.revlog.delta_config.general_delta
685 685 )
686 686
687 687 # Bad delta from new delta size:
688 688 #
689 689 # If the delta size is larger than the target text, storing the delta
690 690 # will be inefficient.
691 691 if self.revinfo.textlen < deltainfo.deltalen:
692 692 return False
693 693
694 694 return True
695 695
696 696 def _is_good_delta_info_chain_quality(self, deltainfo):
697 697 """Returns True if the chain associated with the delta is good.
698 698
699 699 This performs checks for format that use delta chains.
700 700
701 701 This is used by is_good_delta_info.
702 702 """
703 703 # - 'deltainfo.distance' is the distance from the base revision --
704 704 # bounding it limits the amount of I/O we need to do.
705 705
706 706 defaultmax = self.revinfo.textlen * 4
707 707 maxdist = self.revlog.delta_config.max_deltachain_span
708 708 if not maxdist:
709 709 maxdist = deltainfo.distance # ensure the conditional pass
710 710 maxdist = max(maxdist, defaultmax)
711 711
712 712 # Bad delta from read span:
713 713 #
714 714 # If the span of data read is larger than the maximum allowed.
715 715 #
716 716 # In the sparse-revlog case, we rely on the associated "sparse
717 717 # reading" to avoid issue related to the span of data. In theory, it
718 718 # would be possible to build pathological revlog where delta pattern
719 719 # would lead to too many reads. However, they do not happen in
720 720 # practice at all. So we skip the span check entirely.
721 721 if (
722 722 not self.revlog.delta_config.sparse_revlog
723 723 and maxdist < deltainfo.distance
724 724 ):
725 725 return False
726 726
727 727 # Bad delta from cumulated payload size:
728 728 #
729 729 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
730 730 # deltas we need to apply -- bounding it limits the amount of CPU
731 731 # we consume.
732 732 max_chain_data = self.revinfo.textlen * LIMIT_DELTA2TEXT
733 733 # If the sum of delta get larger than K * target text length.
734 734 if max_chain_data < deltainfo.compresseddeltalen:
735 735 return False
736 736
737 737 # Bad delta from chain length:
738 738 #
739 739 # If the number of delta in the chain gets too high.
740 740 if (
741 741 self.revlog.delta_config.max_chain_len
742 742 and self.revlog.delta_config.max_chain_len < deltainfo.chainlen
743 743 ):
744 744 return False
745 745 return True
746 746
747 747 @property
748 748 def done(self):
749 749 """True when all possible candidate have been tested"""
750 750 return self.current_group is None
751 751
752 752 @abc.abstractmethod
753 753 def next_group(self, good_delta=None):
754 754 """move to the next group to test
755 755
756 756 The group of revision to test will be available in
757 757 `self.current_group`. If the previous group had any good delta, the
758 758 best one can be passed as the `good_delta` parameter to help selecting
759 759 the next group.
760 760
761 761 If not revision remains to be, `self.done` will be True and
762 762 `self.current_group` will be None.
763 763 """
764 764 pass
765 765
766 766 @abc.abstractmethod
767 767 def _init_group(self):
768 768 pass
769 769
770 770
771 771 class _NoDeltaSearch(_BaseDeltaSearch):
772 772 """Search for no delta.
773 773
774 774 This search variant is to be used in case where we should not store delta.
775 775 """
776 776
777 777 def _init_group(self):
778 778 self.current_stage = _STAGE_FULL
779 779
780 780 def next_group(self, good_delta=None):
781 781 pass
782 782
783 783
784 784 class _PrevDeltaSearch(_BaseDeltaSearch):
785 785 """Search for delta against the previous revision only
786 786
787 787 This search variant is to be used when the format does not allow for delta
788 788 against arbitrary bases.
789 789 """
790 790
791 791 def _init_group(self):
792 792 self.current_stage = _STAGE_PREV
793 793 self.current_group = [self.target_rev - 1]
794 794 self.tested.update(self.current_group)
795 795
796 796 def next_group(self, good_delta=None):
797 797 self.current_stage = _STAGE_FULL
798 798 self.current_group = None
799 799
800 800
801 801 class _GeneralDeltaSearch(_BaseDeltaSearch):
802 802 """Delta search variant for general-delta repository"""
803 803
804 804 def _init_group(self):
805 805 # Why search for delta base if we cannot use a delta base ?
806 806 # also see issue6056
807 807 assert self.revlog.delta_config.general_delta
808 808 self._candidates_iterator = self._iter_groups()
809 809 self._last_good = None
810 810 if (
811 811 self.cachedelta is not None
812 812 and self.cachedelta[2] > DELTA_BASE_REUSE_NO
813 813 and self._pre_filter_rev(self.cachedelta[0])
814 814 ):
815 815 # First we try to reuse a the delta contained in the bundle. (or from
816 816 # the source revlog)
817 817 #
818 818 # This logic only applies to general delta repositories and can be
819 819 # disabled through configuration. Disabling reuse source delta is
820 820 # useful when we want to make sure we recomputed "optimal" deltas.
821 821 self.current_stage = _STAGE_CACHED
822 822 self._internal_group = (self.cachedelta[0],)
823 823 self._internal_idx = 0
824 824 self.current_group = self._internal_group
825 825 self.tested.update(self.current_group)
826 826 else:
827 827 self._next_internal_group()
828 828
829 829 def _next_internal_group(self):
830 830 # self._internal_group can be larger than self.current_group
831 831 self._internal_idx = 0
832 832 group = self._candidates_iterator.send(self._last_good)
833 833 if group is not None:
834 834 group = self._pre_filter_candidate_revs(group)
835 835 self._internal_group = group
836 836 if self._internal_group is None:
837 837 self.current_group = None
838 838 elif len(self._internal_group) == 0:
839 839 self.next_group()
840 840 else:
841 841 chunk_size = self.revlog.delta_config.candidate_group_chunk_size
842 842 if chunk_size > 0:
843 843 self.current_group = self._internal_group[:chunk_size]
844 844 self._internal_idx += chunk_size
845 845 else:
846 846 self.current_group = self._internal_group
847 847 self._internal_idx += len(self.current_group)
848 848
849 849 self.tested.update(self.current_group)
850 850
851 851 def next_group(self, good_delta=None):
852 852 old_good = self._last_good
853 853 if good_delta is not None:
854 854 self._last_good = good_delta
855 855 if self.current_stage == _STAGE_CACHED and good_delta is not None:
856 856 # the cache is good, let us use the cache as requested
857 857 self._candidates_iterator = None
858 858 self._internal_group = None
859 859 self._internal_idx = None
860 860 self.current_group = None
861 861 return
862 862
863 863 if (self._internal_idx < len(self._internal_group)) and (
864 864 old_good != good_delta
865 865 ):
866 866 # When the size of the candidate group is big, it can result in
867 867 # a quite significant performance impact. To reduce this, we
868 868 # can send them in smaller batches until the new batch does not
869 869 # provide any improvements.
870 870 #
871 871 # This might reduce the overall efficiency of the compression
872 872 # in some corner cases, but that should also prevent very
873 873 # pathological cases from being an issue. (eg. 20 000
874 874 # candidates).
875 875 #
876 876 # XXX note that the ordering of the group becomes important as
877 877 # it now impacts the final result. The current order is
878 878 # unprocessed and can be improved.
879 next_idx = self._internal_idx + self._group_chunk_size
879 chunk_size = self.revlog.delta_config.candidate_group_chunk_size
880 next_idx = self._internal_idx + chunk_size
880 881 self.current_group = self._internal_group[
881 882 self._internal_idx : next_idx
882 883 ]
883 884 self.tested.update(self.current_group)
884 885 self._internal_idx = next_idx
885 886 else:
886 887 self._next_internal_group()
887 888
888 889 def _pre_filter_candidate_revs(self, temptative):
889 890 """filter possible candidate before computing a delta
890 891
891 892 This function use various criteria to pre-filter candidate delta base
892 893 before we compute a delta and evaluate its quality.
893 894
894 895 Such pre-filter limit the number of computed delta, an expensive operation.
895 896
896 897 return the updated list of revision to test
897 898 """
898 899 deltalength = self.revlog.length
899 900 deltaparent = self.revlog.deltaparent
900 901
901 902 tested = self.tested
902 903 group = []
903 904 for rev in temptative:
904 905 # skip over empty delta (no need to include them in a chain)
905 906 while not (rev == nullrev or rev in tested or deltalength(rev)):
906 907 tested.add(rev)
907 908 rev = deltaparent(rev)
908 909 if self._pre_filter_rev(rev):
909 910 group.append(rev)
910 911 else:
911 912 self.tested.add(rev)
912 913 return group
913 914
914 915 def _pre_filter_rev_universal(self, rev):
915 916 """pre filtering that is need in all cases.
916 917
917 918 return True if it seems okay to test a rev, False otherwise.
918 919
919 920 used by _pre_filter_rev.
920 921 """
921 922 # no need to try a delta against nullrev, this will be done as
922 923 # a last resort.
923 924 if rev == nullrev:
924 925 return False
925 926 # filter out revision we tested already
926 927 if rev in self.tested:
927 928 return False
928 929
929 930 # an higher authority deamed the base unworthy (e.g. censored)
930 931 if self.excluded_bases is not None and rev in self.excluded_bases:
931 932 return False
932 933 # We are in some recomputation cases and that rev is too high
933 934 # in the revlog
934 935 if self.target_rev is not None and rev >= self.target_rev:
935 936 return False
936 937 # no delta for rawtext-changing revs (see "candelta" for why)
937 938 if self.revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
938 939 return False
939 940 return True
940 941
941 942 def _pre_filter_rev_delta_chain(self, rev):
942 943 """pre filtering that is needed in sparse revlog cases
943 944
944 945 return True if it seems okay to test a rev, False otherwise.
945 946
946 947 used by _pre_filter_rev.
947 948 """
948 949 deltas_limit = self.revinfo.textlen * LIMIT_DELTA2TEXT
949 950 # filter out delta base that will never produce good delta
950 951 #
951 952 # if the delta of that base is already bigger than the limit
952 953 # for the delta chain size, doing a delta is hopeless.
953 954 if deltas_limit < self.revlog.length(rev):
954 955 return False
955 956
956 957 # If we reach here, we are about to build and test a delta.
957 958 # The delta building process will compute the chaininfo in all
958 959 # case, since that computation is cached, it is fine to access
959 960 # it here too.
960 961 chainlen, chainsize = self.revlog._chaininfo(rev)
961 962 # if chain will be too long, skip base
962 963 if (
963 964 self.revlog.delta_config.max_chain_len
964 965 and chainlen >= self.revlog.delta_config.max_chain_len
965 966 ):
966 967 return False
967 968 # if chain already have too much data, skip base
968 969 if deltas_limit < chainsize:
969 970 return False
970 971 return True
971 972
972 973 def _pre_filter_rev(self, rev):
973 974 """return True if it seems okay to test a rev, False otherwise"""
974 975 if not self._pre_filter_rev_universal(rev):
975 976 return False
976 977 if not self._pre_filter_rev_delta_chain(rev):
977 978 return False
978 979 return True
979 980
980 981 def _iter_parents(self):
981 982 # exclude already lazy tested base if any
982 983 parents = [p for p in (self.p1, self.p2) if p != nullrev]
983 984
984 985 self.current_stage = _STAGE_PARENTS
985 986 if (
986 987 not self.revlog.delta_config.delta_both_parents
987 988 and len(parents) == 2
988 989 ):
989 990 parents.sort()
990 991 # To minimize the chance of having to build a fulltext,
991 992 # pick first whichever parent is closest to us (max rev)
992 993 yield (parents[1],)
993 994 # then the other one (min rev) if the first did not fit
994 995 yield (parents[0],)
995 996 elif len(parents) > 0:
996 997 # Test all parents (1 or 2), and keep the best candidate
997 998 yield parents
998 999
999 1000 def _iter_prev(self):
1000 1001 # other approach failed try against prev to hopefully save us a
1001 1002 # fulltext.
1002 1003 self.current_stage = _STAGE_PREV
1003 1004 yield (self.target_rev - 1,)
1004 1005
1005 1006 def _iter_groups(self):
1006 1007 good = None
1007 1008 for group in self._iter_parents():
1008 1009 good = yield group
1009 1010 if good is not None:
1010 1011 break
1011 1012 else:
1012 1013 assert good is None
1013 1014 yield from self._iter_prev()
1014 1015 yield None
1015 1016
1016 1017
1017 1018 class _SparseDeltaSearch(_GeneralDeltaSearch):
1018 1019 """Delta search variants for sparse-revlog"""
1019 1020
1020 1021 def is_good_delta_info(self, deltainfo):
1021 1022 """Returns True if the given delta is good.
1022 1023
1023 1024 Good means that it is within the disk span, disk size, and chain length
1024 1025 bounds that we know to be performant.
1025 1026 """
1026 1027 if not self._is_good_delta_info_universal(deltainfo):
1027 1028 return False
1028 1029 if not self._is_good_delta_info_chain_quality(deltainfo):
1029 1030 return False
1030 1031 if not self._is_good_delta_info_snapshot_constraints(deltainfo):
1031 1032 return False
1032 1033 return True
1033 1034
1034 1035 def _is_good_delta_info_snapshot_constraints(self, deltainfo):
1035 1036 """Returns True if the chain associated with snapshots
1036 1037
1037 1038 This performs checks for format that use sparse-revlog and intermediate
1038 1039 snapshots.
1039 1040
1040 1041 This is used by is_good_delta_info.
1041 1042 """
1042 1043 # if not a snapshot, this method has no filtering to do
1043 1044 if deltainfo.snapshotdepth is None:
1044 1045 return True
1045 1046 # bad delta from intermediate snapshot size limit
1046 1047 #
1047 1048 # If an intermediate snapshot size is higher than the limit. The
1048 1049 # limit exist to prevent endless chain of intermediate delta to be
1049 1050 # created.
1050 1051 if (
1051 1052 self.revinfo.textlen >> deltainfo.snapshotdepth
1052 1053 ) < deltainfo.deltalen:
1053 1054 return False
1054 1055
1055 1056 # bad delta if new intermediate snapshot is larger than the previous
1056 1057 # snapshot
1057 1058 if self.revlog.length(deltainfo.base) < deltainfo.deltalen:
1058 1059 return False
1059 1060
1060 1061 return True
1061 1062
1062 1063 def _pre_filter_rev(self, rev):
1063 1064 """return True if it seems okay to test a rev, False otherwise"""
1064 1065 if not self._pre_filter_rev_universal(rev):
1065 1066 return False
1066 1067 if not self._pre_filter_rev_delta_chain(rev):
1067 1068 return False
1068 1069 if not self._pre_filter_rev_sparse(rev):
1069 1070 return False
1070 1071 return True
1071 1072
1072 1073 def _pre_filter_rev_sparse(self, rev):
1073 1074 """pre filtering that is needed in sparse revlog cases
1074 1075
1075 1076 return True if it seems okay to test a rev, False otherwise.
1076 1077
1077 1078 used by _pre_filter_rev.
1078 1079 """
1079 1080 assert self.revlog.delta_config.sparse_revlog
1080 1081 # if the revision we test again is too small, the resulting delta
1081 1082 # will be large anyway as that amount of data to be added is big
1082 1083 if self.revlog.rawsize(rev) < (self.textlen // LIMIT_BASE2TEXT):
1083 1084 return False
1084 1085
1085 1086 if self.revlog.delta_config.upper_bound_comp is not None:
1086 1087 maxcomp = self.revlog.delta_config.upper_bound_comp
1087 1088 basenotsnap = (self.p1, self.p2, nullrev)
1088 1089 if rev not in basenotsnap and self.revlog.issnapshot(rev):
1089 1090 snapshotdepth = self.revlog.snapshotdepth(rev)
1090 1091 # If text is significantly larger than the base, we can
1091 1092 # expect the resulting delta to be proportional to the size
1092 1093 # difference
1093 1094 revsize = self.revlog.rawsize(rev)
1094 1095 rawsizedistance = max(self.textlen - revsize, 0)
1095 1096 # use an estimate of the compression upper bound.
1096 1097 lowestrealisticdeltalen = rawsizedistance // maxcomp
1097 1098
1098 1099 # check the absolute constraint on the delta size
1099 1100 snapshotlimit = self.textlen >> snapshotdepth
1100 1101 if snapshotlimit < lowestrealisticdeltalen:
1101 1102 # delta lower bound is larger than accepted upper
1102 1103 # bound
1103 1104 return False
1104 1105
1105 1106 # check the relative constraint on the delta size
1106 1107 revlength = self.revlog.length(rev)
1107 1108 if revlength < lowestrealisticdeltalen:
1108 1109 # delta probable lower bound is larger than target
1109 1110 # base
1110 1111 return False
1111 1112 return True
1112 1113
1113 1114 def _iter_snapshots_base(self):
1114 1115 assert self.revlog.delta_config.sparse_revlog
1115 1116 assert self.current_stage == _STAGE_SNAPSHOT
1116 1117 prev = self.target_rev - 1
1117 1118 deltachain = lambda rev: self.revlog._deltachain(rev)[0]
1118 1119
1119 1120 parents = [p for p in (self.p1, self.p2) if p != nullrev]
1120 1121 if not parents:
1121 1122 return
1122 1123 # See if we can use an existing snapshot in the parent chains to
1123 1124 # use as a base for a new intermediate-snapshot
1124 1125 #
1125 1126 # search for snapshot in parents delta chain map: snapshot-level:
1126 1127 # snapshot-rev
1127 1128 parents_snaps = collections.defaultdict(set)
1128 1129 candidate_chains = [deltachain(p) for p in parents]
1129 1130 for chain in candidate_chains:
1130 1131 for idx, s in enumerate(chain):
1131 1132 if not self.revlog.issnapshot(s):
1132 1133 break
1133 1134 parents_snaps[idx].add(s)
1134 1135 snapfloor = min(parents_snaps[0]) + 1
1135 1136 self.snapshot_cache.update(self.revlog, snapfloor)
1136 1137 # search for the highest "unrelated" revision
1137 1138 #
1138 1139 # Adding snapshots used by "unrelated" revision increase the odd we
1139 1140 # reuse an independant, yet better snapshot chain.
1140 1141 #
1141 1142 # XXX instead of building a set of revisions, we could lazily
1142 1143 # enumerate over the chains. That would be more efficient, however
1143 1144 # we stick to simple code for now.
1144 1145 all_revs = set()
1145 1146 for chain in candidate_chains:
1146 1147 all_revs.update(chain)
1147 1148 other = None
1148 1149 for r in self.revlog.revs(prev, snapfloor):
1149 1150 if r not in all_revs:
1150 1151 other = r
1151 1152 break
1152 1153 if other is not None:
1153 1154 # To avoid unfair competition, we won't use unrelated
1154 1155 # intermediate snapshot that are deeper than the ones from the
1155 1156 # parent delta chain.
1156 1157 max_depth = max(parents_snaps.keys())
1157 1158 chain = deltachain(other)
1158 1159 for depth, s in enumerate(chain):
1159 1160 if s < snapfloor:
1160 1161 continue
1161 1162 if max_depth < depth:
1162 1163 break
1163 1164 if not self.revlog.issnapshot(s):
1164 1165 break
1165 1166 parents_snaps[depth].add(s)
1166 1167 # Test them as possible intermediate snapshot base We test them
1167 1168 # from highest to lowest level. High level one are more likely to
1168 1169 # result in small delta
1169 1170 floor = None
1170 1171 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
1171 1172 siblings = set()
1172 1173 for s in snaps:
1173 1174 siblings.update(self.snapshot_cache.snapshots[s])
1174 1175 # Before considering making a new intermediate snapshot, we
1175 1176 # check if an existing snapshot, children of base we consider,
1176 1177 # would be suitable.
1177 1178 #
1178 1179 # It give a change to reuse a delta chain "unrelated" to the
1179 1180 # current revision instead of starting our own. Without such
1180 1181 # re-use, topological branches would keep reopening new chains.
1181 1182 # Creating more and more snapshot as the repository grow.
1182 1183
1183 1184 if floor is not None:
1184 1185 # We only do this for siblings created after the one in our
1185 1186 # parent's delta chain. Those created before has less
1186 1187 # chances to be valid base since our ancestors had to
1187 1188 # create a new snapshot.
1188 1189 siblings = [r for r in siblings if floor < r]
1189 1190 yield tuple(sorted(siblings))
1190 1191 # then test the base from our parent's delta chain.
1191 1192 yield tuple(sorted(snaps))
1192 1193 floor = min(snaps)
1193 1194 # No suitable base found in the parent chain, search if any full
1194 1195 # snapshots emitted since parent's base would be a suitable base
1195 1196 # for an intermediate snapshot.
1196 1197 #
1197 1198 # It give a chance to reuse a delta chain unrelated to the current
1198 1199 # revisions instead of starting our own. Without such re-use,
1199 1200 # topological branches would keep reopening new full chains.
1200 1201 # Creating more and more snapshot as the repository grow.
1201 1202 full = [
1202 1203 r for r in self.snapshot_cache.snapshots[nullrev] if snapfloor <= r
1203 1204 ]
1204 1205 yield tuple(sorted(full))
1205 1206
1206 1207 def _iter_snapshots(self):
1207 1208 assert self.revlog.delta_config.sparse_revlog
1208 1209 self.current_stage = _STAGE_SNAPSHOT
1209 1210 good = None
1210 1211 groups = self._iter_snapshots_base()
1211 1212 for candidates in groups:
1212 1213 good = yield candidates
1213 1214 if good is not None:
1214 1215 break
1215 1216 # if we have a refinable value, try to refine it
1216 1217 if good is not None and good.snapshotdepth is not None:
1217 1218 assert self.current_stage == _STAGE_SNAPSHOT
1218 1219 # refine snapshot down
1219 1220 previous = None
1220 1221 while previous != good:
1221 1222 previous = good
1222 1223 base = self.revlog.deltaparent(good.base)
1223 1224 if base == nullrev:
1224 1225 break
1225 1226 good = yield (base,)
1226 1227 # refine snapshot up
1227 1228 if not self.snapshot_cache.snapshots:
1228 1229 self.snapshot_cache.update(self.revlog, good.base + 1)
1229 1230 previous = None
1230 1231 while good != previous:
1231 1232 previous = good
1232 1233 children = tuple(
1233 1234 sorted(c for c in self.snapshot_cache.snapshots[good.base])
1234 1235 )
1235 1236 good = yield children
1236 1237 yield None
1237 1238
1238 1239 def _iter_groups(self):
1239 1240 good = None
1240 1241 for group in self._iter_parents():
1241 1242 good = yield group
1242 1243 if good is not None:
1243 1244 break
1244 1245 else:
1245 1246 assert good is None
1246 1247 assert self.revlog.delta_config.sparse_revlog
1247 1248 # If sparse revlog is enabled, we can try to refine the
1248 1249 # available deltas
1249 1250 iter_snap = self._iter_snapshots()
1250 1251 group = iter_snap.send(None)
1251 1252 while group is not None:
1252 1253 good = yield group
1253 1254 group = iter_snap.send(good)
1254 1255 yield None
1255 1256
1256 1257
1257 1258 class SnapshotCache:
1258 1259 __slots__ = ('snapshots', '_start_rev', '_end_rev')
1259 1260
1260 1261 def __init__(self):
1261 1262 self.snapshots = collections.defaultdict(set)
1262 1263 self._start_rev = None
1263 1264 self._end_rev = None
1264 1265
1265 1266 def update(self, revlog, start_rev=0):
1266 1267 """find snapshots from start_rev to tip"""
1267 1268 nb_revs = len(revlog)
1268 1269 end_rev = nb_revs - 1
1269 1270 if start_rev > end_rev:
1270 1271 return # range is empty
1271 1272
1272 1273 if self._start_rev is None:
1273 1274 assert self._end_rev is None
1274 1275 self._update(revlog, start_rev, end_rev)
1275 1276 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1276 1277 if start_rev < self._start_rev:
1277 1278 self._update(revlog, start_rev, self._start_rev - 1)
1278 1279 if self._end_rev < end_rev:
1279 1280 self._update(revlog, self._end_rev + 1, end_rev)
1280 1281
1281 1282 if self._start_rev is None:
1282 1283 assert self._end_rev is None
1283 1284 self._end_rev = end_rev
1284 1285 self._start_rev = start_rev
1285 1286 else:
1286 1287 self._start_rev = min(self._start_rev, start_rev)
1287 1288 self._end_rev = max(self._end_rev, end_rev)
1288 1289 assert self._start_rev <= self._end_rev, (
1289 1290 self._start_rev,
1290 1291 self._end_rev,
1291 1292 )
1292 1293
1293 1294 def _update(self, revlog, start_rev, end_rev):
1294 1295 """internal method that actually do update content"""
1295 1296 assert self._start_rev is None or (
1296 1297 start_rev < self._start_rev or start_rev > self._end_rev
1297 1298 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1298 1299 assert self._start_rev is None or (
1299 1300 end_rev < self._start_rev or end_rev > self._end_rev
1300 1301 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1301 1302 cache = self.snapshots
1302 1303 if hasattr(revlog.index, 'findsnapshots'):
1303 1304 revlog.index.findsnapshots(cache, start_rev, end_rev)
1304 1305 else:
1305 1306 deltaparent = revlog.deltaparent
1306 1307 issnapshot = revlog.issnapshot
1307 1308 for rev in revlog.revs(start_rev, end_rev):
1308 1309 if issnapshot(rev):
1309 1310 cache[deltaparent(rev)].add(rev)
1310 1311
1311 1312
1312 1313 class deltacomputer:
1313 1314 """object capable of computing delta and finding delta for multiple revision
1314 1315
1315 1316 This object is meant to compute and find multiple delta applied to the same
1316 1317 revlog.
1317 1318 """
1318 1319
1319 1320 def __init__(
1320 1321 self,
1321 1322 revlog,
1322 1323 write_debug=None,
1323 1324 debug_search=False,
1324 1325 debug_info=None,
1325 1326 ):
1326 1327 self.revlog = revlog
1327 1328 self._write_debug = write_debug
1328 1329 if write_debug is None:
1329 1330 self._debug_search = False
1330 1331 else:
1331 1332 self._debug_search = debug_search
1332 1333 self._debug_info = debug_info
1333 1334 self._snapshot_cache = SnapshotCache()
1334 1335
1335 1336 @property
1336 1337 def _gather_debug(self):
1337 1338 return self._write_debug is not None or self._debug_info is not None
1338 1339
1339 1340 def buildtext(self, revinfo):
1340 1341 """Builds a fulltext version of a revision
1341 1342
1342 1343 revinfo: revisioninfo instance that contains all needed info
1343 1344 """
1344 1345 btext = revinfo.btext
1345 1346 if btext[0] is not None:
1346 1347 return btext[0]
1347 1348
1348 1349 revlog = self.revlog
1349 1350 cachedelta = revinfo.cachedelta
1350 1351 baserev = cachedelta[0]
1351 1352 delta = cachedelta[1]
1352 1353
1353 1354 fulltext = btext[0] = _textfromdelta(
1354 1355 revlog,
1355 1356 baserev,
1356 1357 delta,
1357 1358 revinfo.p1,
1358 1359 revinfo.p2,
1359 1360 revinfo.flags,
1360 1361 revinfo.node,
1361 1362 )
1362 1363 return fulltext
1363 1364
1364 1365 def _builddeltadiff(self, base, revinfo):
1365 1366 revlog = self.revlog
1366 1367 t = self.buildtext(revinfo)
1367 1368 if revlog.iscensored(base):
1368 1369 # deltas based on a censored revision must replace the
1369 1370 # full content in one patch, so delta works everywhere
1370 1371 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1371 1372 delta = header + t
1372 1373 else:
1373 1374 ptext = revlog.rawdata(base)
1374 1375 delta = mdiff.textdiff(ptext, t)
1375 1376
1376 1377 return delta
1377 1378
1378 1379 def _builddeltainfo(
1379 1380 self, revinfo, base, target_rev=None, as_snapshot=False
1380 1381 ):
1381 1382 # can we use the cached delta?
1382 1383 revlog = self.revlog
1383 1384 chainbase = revlog.chainbase(base)
1384 1385 if revlog.delta_config.general_delta:
1385 1386 deltabase = base
1386 1387 else:
1387 1388 if target_rev is not None and base != target_rev - 1:
1388 1389 msg = (
1389 1390 b'general delta cannot use delta for something else '
1390 1391 b'than `prev`: %d<-%d'
1391 1392 )
1392 1393 msg %= (base, target_rev)
1393 1394 raise error.ProgrammingError(msg)
1394 1395 deltabase = chainbase
1395 1396 snapshotdepth = None
1396 1397 if revlog.delta_config.sparse_revlog and deltabase == nullrev:
1397 1398 snapshotdepth = 0
1398 1399 elif revlog.delta_config.sparse_revlog and as_snapshot:
1399 1400 assert revlog.issnapshot(deltabase)
1400 1401 # A delta chain should always be one full snapshot,
1401 1402 # zero or more semi-snapshots, and zero or more deltas
1402 1403 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1403 1404 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1404 1405 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1405 1406 delta = None
1406 1407 if revinfo.cachedelta:
1407 1408 cachebase = revinfo.cachedelta[0]
1408 1409 # check if the diff still apply
1409 1410 currentbase = cachebase
1410 1411 while (
1411 1412 currentbase != nullrev
1412 1413 and currentbase != base
1413 1414 and self.revlog.length(currentbase) == 0
1414 1415 ):
1415 1416 currentbase = self.revlog.deltaparent(currentbase)
1416 1417 if self.revlog.delta_config.lazy_delta and currentbase == base:
1417 1418 delta = revinfo.cachedelta[1]
1418 1419 if delta is None:
1419 1420 delta = self._builddeltadiff(base, revinfo)
1420 1421 if self._debug_search:
1421 1422 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1422 1423 msg %= len(delta)
1423 1424 self._write_debug(msg)
1424 1425 # snapshotdept need to be neither None nor 0 level snapshot
1425 1426 if revlog.delta_config.upper_bound_comp is not None and snapshotdepth:
1426 1427 lowestrealisticdeltalen = (
1427 1428 len(delta) // revlog.delta_config.upper_bound_comp
1428 1429 )
1429 1430 snapshotlimit = revinfo.textlen >> snapshotdepth
1430 1431 if self._debug_search:
1431 1432 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1432 1433 msg %= lowestrealisticdeltalen
1433 1434 self._write_debug(msg)
1434 1435 if snapshotlimit < lowestrealisticdeltalen:
1435 1436 if self._debug_search:
1436 1437 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1437 1438 self._write_debug(msg)
1438 1439 return None
1439 1440 if revlog.length(base) < lowestrealisticdeltalen:
1440 1441 if self._debug_search:
1441 1442 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1442 1443 self._write_debug(msg)
1443 1444 return None
1444 1445 header, data = revlog._inner.compress(delta)
1445 1446 deltalen = len(header) + len(data)
1446 1447 offset = revlog.end(len(revlog) - 1)
1447 1448 dist = deltalen + offset - revlog.start(chainbase)
1448 1449 chainlen, compresseddeltalen = revlog._chaininfo(base)
1449 1450 chainlen += 1
1450 1451 compresseddeltalen += deltalen
1451 1452
1452 1453 return _deltainfo(
1453 1454 dist,
1454 1455 deltalen,
1455 1456 (header, data),
1456 1457 deltabase,
1457 1458 chainbase,
1458 1459 chainlen,
1459 1460 compresseddeltalen,
1460 1461 snapshotdepth,
1461 1462 )
1462 1463
1463 1464 def _fullsnapshotinfo(self, revinfo, curr):
1464 1465 rawtext = self.buildtext(revinfo)
1465 1466 data = self.revlog._inner.compress(rawtext)
1466 1467 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1467 1468 deltabase = chainbase = curr
1468 1469 snapshotdepth = 0
1469 1470 chainlen = 1
1470 1471
1471 1472 return _deltainfo(
1472 1473 dist,
1473 1474 deltalen,
1474 1475 data,
1475 1476 deltabase,
1476 1477 chainbase,
1477 1478 chainlen,
1478 1479 compresseddeltalen,
1479 1480 snapshotdepth,
1480 1481 )
1481 1482
1482 1483 def finddeltainfo(self, revinfo, excluded_bases=None, target_rev=None):
1483 1484 """Find an acceptable delta against a candidate revision
1484 1485
1485 1486 revinfo: information about the revision (instance of _revisioninfo)
1486 1487
1487 1488 Returns the first acceptable candidate revision, as ordered by
1488 1489 _candidategroups
1489 1490
1490 1491 If no suitable deltabase is found, we return delta info for a full
1491 1492 snapshot.
1492 1493
1493 1494 `excluded_bases` is an optional set of revision that cannot be used as
1494 1495 a delta base. Use this to recompute delta suitable in censor or strip
1495 1496 context.
1496 1497 """
1497 1498 if target_rev is None:
1498 1499 target_rev = len(self.revlog)
1499 1500
1500 1501 gather_debug = self._gather_debug
1501 1502 cachedelta = revinfo.cachedelta
1502 1503 revlog = self.revlog
1503 1504 p1r = p2r = None
1504 1505
1505 1506 if excluded_bases is None:
1506 1507 excluded_bases = set()
1507 1508
1508 1509 if gather_debug:
1509 1510 start = util.timer()
1510 1511 dbg = self._one_dbg_data()
1511 1512 dbg['revision'] = target_rev
1512 1513 p1r = revlog.rev(revinfo.p1)
1513 1514 p2r = revlog.rev(revinfo.p2)
1514 1515 if p1r != nullrev:
1515 1516 p1_chain_len = revlog._chaininfo(p1r)[0]
1516 1517 else:
1517 1518 p1_chain_len = -1
1518 1519 if p2r != nullrev:
1519 1520 p2_chain_len = revlog._chaininfo(p2r)[0]
1520 1521 else:
1521 1522 p2_chain_len = -1
1522 1523 dbg['p1-chain-len'] = p1_chain_len
1523 1524 dbg['p2-chain-len'] = p2_chain_len
1524 1525
1525 1526 # 1) if the revision is empty, no amount of delta can beat it
1526 1527 #
1527 1528 # 2) no delta for flag processor revision (see "candelta" for why)
1528 1529 # not calling candelta since only one revision needs test, also to
1529 1530 # avoid overhead fetching flags again.
1530 1531 if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1531 1532 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1532 1533 if gather_debug:
1533 1534 end = util.timer()
1534 1535 dbg['duration'] = end - start
1535 1536 dbg[
1536 1537 'delta-base'
1537 1538 ] = deltainfo.base # pytype: disable=attribute-error
1538 1539 dbg['search_round_count'] = 0
1539 1540 dbg['using-cached-base'] = False
1540 1541 dbg['delta_try_count'] = 0
1541 1542 dbg['type'] = b"full"
1542 1543 dbg['snapshot-depth'] = 0
1543 1544 self._dbg_process_data(dbg)
1544 1545 return deltainfo
1545 1546
1546 1547 deltainfo = None
1547 1548
1548 1549 # If this source delta are to be forcibly reuse, let us comply early.
1549 1550 if (
1550 1551 revlog.delta_config.general_delta
1551 1552 and revinfo.cachedelta is not None
1552 1553 and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
1553 1554 ):
1554 1555 base = revinfo.cachedelta[0]
1555 1556 if base == nullrev:
1556 1557 dbg_type = b"full"
1557 1558 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1558 1559 if gather_debug:
1559 1560 snapshotdepth = 0
1560 1561 elif base not in excluded_bases:
1561 1562 delta = revinfo.cachedelta[1]
1562 1563 header, data = revlog.compress(delta)
1563 1564 deltalen = len(header) + len(data)
1564 1565 if gather_debug:
1565 1566 offset = revlog.end(len(revlog) - 1)
1566 1567 chainbase = revlog.chainbase(base)
1567 1568 distance = deltalen + offset - revlog.start(chainbase)
1568 1569 chainlen, compresseddeltalen = revlog._chaininfo(base)
1569 1570 chainlen += 1
1570 1571 compresseddeltalen += deltalen
1571 1572 if base == p1r or base == p2r:
1572 1573 dbg_type = b"delta"
1573 1574 snapshotdepth = None
1574 1575 elif not revlog.issnapshot(base):
1575 1576 snapshotdepth = None
1576 1577 else:
1577 1578 dbg_type = b"snapshot"
1578 1579 snapshotdepth = revlog.snapshotdepth(base) + 1
1579 1580 else:
1580 1581 distance = None
1581 1582 chainbase = None
1582 1583 chainlen = None
1583 1584 compresseddeltalen = None
1584 1585 snapshotdepth = None
1585 1586 deltainfo = _deltainfo(
1586 1587 distance=distance,
1587 1588 deltalen=deltalen,
1588 1589 data=(header, data),
1589 1590 base=base,
1590 1591 chainbase=chainbase,
1591 1592 chainlen=chainlen,
1592 1593 compresseddeltalen=compresseddeltalen,
1593 1594 snapshotdepth=snapshotdepth,
1594 1595 )
1595 1596
1596 1597 if deltainfo is not None:
1597 1598 if gather_debug:
1598 1599 end = util.timer()
1599 1600 dbg['duration'] = end - start
1600 1601 dbg[
1601 1602 'delta-base'
1602 1603 ] = deltainfo.base # pytype: disable=attribute-error
1603 1604 dbg['search_round_count'] = 0
1604 1605 dbg['using-cached-base'] = True
1605 1606 dbg['delta_try_count'] = 0
1606 1607 dbg['type'] = b"full"
1607 1608 if snapshotdepth is None:
1608 1609 dbg['snapshot-depth'] = -1
1609 1610 else:
1610 1611 dbg['snapshot-depth'] = snapshotdepth
1611 1612 self._dbg_process_data(dbg)
1612 1613 return deltainfo
1613 1614
1614 1615 # count the number of different delta we tried (for debug purpose)
1615 1616 dbg_try_count = 0
1616 1617 # count the number of "search round" we did. (for debug purpose)
1617 1618 dbg_try_rounds = 0
1618 1619 dbg_type = b'unknown'
1619 1620
1620 1621 if p1r is None:
1621 1622 p1r = revlog.rev(revinfo.p1)
1622 1623 p2r = revlog.rev(revinfo.p2)
1623 1624
1624 1625 if self._debug_search:
1625 1626 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1626 1627 msg %= target_rev
1627 1628 self._write_debug(msg)
1628 1629
1629 1630 # should we try to build a delta?
1630 1631 if not (len(self.revlog) and self.revlog._storedeltachains):
1631 1632 search_cls = _NoDeltaSearch
1632 1633 elif self.revlog.delta_config.sparse_revlog:
1633 1634 search_cls = _SparseDeltaSearch
1634 1635 elif self.revlog.delta_config.general_delta:
1635 1636 search_cls = _GeneralDeltaSearch
1636 1637 else:
1637 1638 # before general delta, there is only one possible delta base
1638 1639 search_cls = _PrevDeltaSearch
1639 1640
1640 1641 search = search_cls(
1641 1642 self.revlog,
1642 1643 revinfo,
1643 1644 p1r,
1644 1645 p2r,
1645 1646 cachedelta,
1646 1647 excluded_bases,
1647 1648 target_rev,
1648 1649 snapshot_cache=self._snapshot_cache,
1649 1650 )
1650 1651
1651 1652 while not search.done:
1652 1653 current_group = search.current_group
1653 1654 # current_group can be `None`, but not is search.done is False
1654 1655 # We add this assert to help pytype
1655 1656 assert current_group is not None
1656 1657 candidaterevs = current_group
1657 1658 dbg_try_rounds += 1
1658 1659 if self._debug_search:
1659 1660 prev = None
1660 1661 if deltainfo is not None:
1661 1662 prev = deltainfo.base
1662 1663
1663 1664 if (
1664 1665 cachedelta is not None
1665 1666 and len(candidaterevs) == 1
1666 1667 and cachedelta[0] in candidaterevs
1667 1668 ):
1668 1669 round_type = b"cached-delta"
1669 1670 elif p1r in candidaterevs or p2r in candidaterevs:
1670 1671 round_type = b"parents"
1671 1672 elif prev is not None and all(c < prev for c in candidaterevs):
1672 1673 round_type = b"refine-down"
1673 1674 elif prev is not None and all(c > prev for c in candidaterevs):
1674 1675 round_type = b"refine-up"
1675 1676 else:
1676 1677 round_type = b"search-down"
1677 1678 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1678 1679 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1679 1680 self._write_debug(msg)
1680 1681 nominateddeltas = []
1681 1682 if deltainfo is not None:
1682 1683 if self._debug_search:
1683 1684 msg = (
1684 1685 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1685 1686 )
1686 1687 msg %= (deltainfo.base, deltainfo.deltalen)
1687 1688 self._write_debug(msg)
1688 1689 # if we already found a good delta,
1689 1690 # challenge it against refined candidates
1690 1691 nominateddeltas.append(deltainfo)
1691 1692 for candidaterev in candidaterevs:
1692 1693 if self._debug_search:
1693 1694 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1694 1695 msg %= candidaterev
1695 1696 self._write_debug(msg)
1696 1697 candidate_type = None
1697 1698 if candidaterev == p1r:
1698 1699 candidate_type = b"p1"
1699 1700 elif candidaterev == p2r:
1700 1701 candidate_type = b"p2"
1701 1702 elif self.revlog.issnapshot(candidaterev):
1702 1703 candidate_type = b"snapshot-%d"
1703 1704 candidate_type %= self.revlog.snapshotdepth(
1704 1705 candidaterev
1705 1706 )
1706 1707
1707 1708 if candidate_type is not None:
1708 1709 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1709 1710 msg %= candidate_type
1710 1711 self._write_debug(msg)
1711 1712 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1712 1713 msg %= self.revlog.length(candidaterev)
1713 1714 self._write_debug(msg)
1714 1715 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1715 1716 msg %= self.revlog.deltaparent(candidaterev)
1716 1717 self._write_debug(msg)
1717 1718
1718 1719 dbg_try_count += 1
1719 1720
1720 1721 if self._debug_search:
1721 1722 delta_start = util.timer()
1722 1723 candidatedelta = self._builddeltainfo(
1723 1724 revinfo,
1724 1725 candidaterev,
1725 1726 target_rev=target_rev,
1726 1727 as_snapshot=search.current_stage == _STAGE_SNAPSHOT,
1727 1728 )
1728 1729 if self._debug_search:
1729 1730 delta_end = util.timer()
1730 1731 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1731 1732 msg %= delta_end - delta_start
1732 1733 self._write_debug(msg)
1733 1734 if candidatedelta is not None:
1734 1735 if search.is_good_delta_info(candidatedelta):
1735 1736 if self._debug_search:
1736 1737 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1737 1738 msg %= candidatedelta.deltalen
1738 1739 self._write_debug(msg)
1739 1740 nominateddeltas.append(candidatedelta)
1740 1741 elif self._debug_search:
1741 1742 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1742 1743 msg %= candidatedelta.deltalen
1743 1744 self._write_debug(msg)
1744 1745 elif self._debug_search:
1745 1746 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1746 1747 self._write_debug(msg)
1747 1748 if nominateddeltas:
1748 1749 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1749 1750 search.next_group(deltainfo)
1750 1751
1751 1752 if deltainfo is None:
1752 1753 dbg_type = b"full"
1753 1754 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1754 1755 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1755 1756 dbg_type = b"snapshot"
1756 1757 else:
1757 1758 dbg_type = b"delta"
1758 1759
1759 1760 if gather_debug:
1760 1761 end = util.timer()
1761 1762 if dbg_type == b'full':
1762 1763 used_cached = (
1763 1764 cachedelta is not None
1764 1765 and dbg_try_rounds == 0
1765 1766 and dbg_try_count == 0
1766 1767 and cachedelta[0] == nullrev
1767 1768 )
1768 1769 else:
1769 1770 used_cached = (
1770 1771 cachedelta is not None
1771 1772 and dbg_try_rounds == 1
1772 1773 and dbg_try_count == 1
1773 1774 and deltainfo.base == cachedelta[0]
1774 1775 )
1775 1776 dbg['duration'] = end - start
1776 1777 dbg[
1777 1778 'delta-base'
1778 1779 ] = deltainfo.base # pytype: disable=attribute-error
1779 1780 dbg['search_round_count'] = dbg_try_rounds
1780 1781 dbg['using-cached-base'] = used_cached
1781 1782 dbg['delta_try_count'] = dbg_try_count
1782 1783 dbg['type'] = dbg_type
1783 1784 if (
1784 1785 deltainfo.snapshotdepth # pytype: disable=attribute-error
1785 1786 is not None
1786 1787 ):
1787 1788 dbg[
1788 1789 'snapshot-depth'
1789 1790 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1790 1791 else:
1791 1792 dbg['snapshot-depth'] = -1
1792 1793 self._dbg_process_data(dbg)
1793 1794 return deltainfo
1794 1795
1795 1796 def _one_dbg_data(self):
1796 1797 dbg = {
1797 1798 'duration': None,
1798 1799 'revision': None,
1799 1800 'delta-base': None,
1800 1801 'search_round_count': None,
1801 1802 'using-cached-base': None,
1802 1803 'delta_try_count': None,
1803 1804 'type': None,
1804 1805 'p1-chain-len': None,
1805 1806 'p2-chain-len': None,
1806 1807 'snapshot-depth': None,
1807 1808 'target-revlog': None,
1808 1809 }
1809 1810 target_revlog = b"UNKNOWN"
1810 1811 target_type = self.revlog.target[0]
1811 1812 target_key = self.revlog.target[1]
1812 1813 if target_type == KIND_CHANGELOG:
1813 1814 target_revlog = b'CHANGELOG:'
1814 1815 elif target_type == KIND_MANIFESTLOG:
1815 1816 target_revlog = b'MANIFESTLOG:'
1816 1817 if target_key:
1817 1818 target_revlog += b'%s:' % target_key
1818 1819 elif target_type == KIND_FILELOG:
1819 1820 target_revlog = b'FILELOG:'
1820 1821 if target_key:
1821 1822 target_revlog += b'%s:' % target_key
1822 1823 dbg['target-revlog'] = target_revlog
1823 1824 return dbg
1824 1825
1825 1826 def _dbg_process_data(self, dbg):
1826 1827 if self._debug_info is not None:
1827 1828 self._debug_info.append(dbg)
1828 1829
1829 1830 if self._write_debug is not None:
1830 1831 msg = (
1831 1832 b"DBG-DELTAS:"
1832 1833 b" %-12s"
1833 1834 b" rev=%d:"
1834 1835 b" delta-base=%d"
1835 1836 b" is-cached=%d"
1836 1837 b" - search-rounds=%d"
1837 1838 b" try-count=%d"
1838 1839 b" - delta-type=%-6s"
1839 1840 b" snap-depth=%d"
1840 1841 b" - p1-chain-length=%d"
1841 1842 b" p2-chain-length=%d"
1842 1843 b" - duration=%f"
1843 1844 b"\n"
1844 1845 )
1845 1846 msg %= (
1846 1847 dbg["target-revlog"],
1847 1848 dbg["revision"],
1848 1849 dbg["delta-base"],
1849 1850 dbg["using-cached-base"],
1850 1851 dbg["search_round_count"],
1851 1852 dbg["delta_try_count"],
1852 1853 dbg["type"],
1853 1854 dbg["snapshot-depth"],
1854 1855 dbg["p1-chain-len"],
1855 1856 dbg["p2-chain-len"],
1856 1857 dbg["duration"],
1857 1858 )
1858 1859 self._write_debug(msg)
1859 1860
1860 1861
1861 1862 def delta_compression(default_compression_header, deltainfo):
1862 1863 """return (COMPRESSION_MODE, deltainfo)
1863 1864
1864 1865 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1865 1866 compression.
1866 1867 """
1867 1868 h, d = deltainfo.data
1868 1869 compression_mode = COMP_MODE_INLINE
1869 1870 if not h and not d:
1870 1871 # not data to store at all... declare them uncompressed
1871 1872 compression_mode = COMP_MODE_PLAIN
1872 1873 elif not h:
1873 1874 t = d[0:1]
1874 1875 if t == b'\0':
1875 1876 compression_mode = COMP_MODE_PLAIN
1876 1877 elif t == default_compression_header:
1877 1878 compression_mode = COMP_MODE_DEFAULT
1878 1879 elif h == b'u':
1879 1880 # we have a more efficient way to declare uncompressed
1880 1881 h = b''
1881 1882 compression_mode = COMP_MODE_PLAIN
1882 1883 deltainfo = drop_u_compression(deltainfo)
1883 1884 return compression_mode, deltainfo
General Comments 0
You need to be logged in to leave comments. Login now