##// END OF EJS Templates
delta-find: adjust the moment when we mark something as "tested"...
marmoute -
r50511:4956942c default
parent child Browse files
Show More
@@ -1,1398 +1,1407 b''
1 1 # revlogdeltas.py - Logic around delta computation for revlog
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 # Copyright 2018 Octobus <contact@octobus.net>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8 """Helper class to compute deltas stored inside revlogs"""
9 9
10 10
11 11 import collections
12 12 import struct
13 13
14 14 # import stuff from node for others to import from revlog
15 15 from ..node import nullrev
16 16 from ..i18n import _
17 17 from ..pycompat import getattr
18 18
19 19 from .constants import (
20 20 COMP_MODE_DEFAULT,
21 21 COMP_MODE_INLINE,
22 22 COMP_MODE_PLAIN,
23 23 KIND_CHANGELOG,
24 24 KIND_FILELOG,
25 25 KIND_MANIFESTLOG,
26 26 REVIDX_ISCENSORED,
27 27 REVIDX_RAWTEXT_CHANGING_FLAGS,
28 28 )
29 29
30 30 from ..thirdparty import attr
31 31
32 32 from .. import (
33 33 error,
34 34 mdiff,
35 35 util,
36 36 )
37 37
38 38 from . import flagutil
39 39
40 40 # maximum <delta-chain-data>/<revision-text-length> ratio
41 41 LIMIT_DELTA2TEXT = 2
42 42
43 43
44 44 class _testrevlog:
45 45 """minimalist fake revlog to use in doctests"""
46 46
47 47 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
48 48 """data is an list of revision payload boundaries"""
49 49 self._data = data
50 50 self._srdensitythreshold = density
51 51 self._srmingapsize = mingap
52 52 self._snapshot = set(snapshot)
53 53 self.index = None
54 54
55 55 def start(self, rev):
56 56 if rev == nullrev:
57 57 return 0
58 58 if rev == 0:
59 59 return 0
60 60 return self._data[rev - 1]
61 61
62 62 def end(self, rev):
63 63 if rev == nullrev:
64 64 return 0
65 65 return self._data[rev]
66 66
67 67 def length(self, rev):
68 68 return self.end(rev) - self.start(rev)
69 69
70 70 def __len__(self):
71 71 return len(self._data)
72 72
73 73 def issnapshot(self, rev):
74 74 if rev == nullrev:
75 75 return True
76 76 return rev in self._snapshot
77 77
78 78
79 79 def slicechunk(revlog, revs, targetsize=None):
80 80 """slice revs to reduce the amount of unrelated data to be read from disk.
81 81
82 82 ``revs`` is sliced into groups that should be read in one time.
83 83 Assume that revs are sorted.
84 84
85 85 The initial chunk is sliced until the overall density (payload/chunks-span
86 86 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
87 87 `revlog._srmingapsize` is skipped.
88 88
89 89 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
90 90 For consistency with other slicing choice, this limit won't go lower than
91 91 `revlog._srmingapsize`.
92 92
93 93 If individual revisions chunk are larger than this limit, they will still
94 94 be raised individually.
95 95
96 96 >>> data = [
97 97 ... 5, #00 (5)
98 98 ... 10, #01 (5)
99 99 ... 12, #02 (2)
100 100 ... 12, #03 (empty)
101 101 ... 27, #04 (15)
102 102 ... 31, #05 (4)
103 103 ... 31, #06 (empty)
104 104 ... 42, #07 (11)
105 105 ... 47, #08 (5)
106 106 ... 47, #09 (empty)
107 107 ... 48, #10 (1)
108 108 ... 51, #11 (3)
109 109 ... 74, #12 (23)
110 110 ... 85, #13 (11)
111 111 ... 86, #14 (1)
112 112 ... 91, #15 (5)
113 113 ... ]
114 114 >>> revlog = _testrevlog(data, snapshot=range(16))
115 115
116 116 >>> list(slicechunk(revlog, list(range(16))))
117 117 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
118 118 >>> list(slicechunk(revlog, [0, 15]))
119 119 [[0], [15]]
120 120 >>> list(slicechunk(revlog, [0, 11, 15]))
121 121 [[0], [11], [15]]
122 122 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
123 123 [[0], [11, 13, 15]]
124 124 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
125 125 [[1, 2], [5, 8, 10, 11], [14]]
126 126
127 127 Slicing with a maximum chunk size
128 128 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
129 129 [[0], [11], [13], [15]]
130 130 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
131 131 [[0], [11], [13, 15]]
132 132
133 133 Slicing involving nullrev
134 134 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
135 135 [[-1, 0], [11], [13, 15]]
136 136 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
137 137 [[-1], [13], [15]]
138 138 """
139 139 if targetsize is not None:
140 140 targetsize = max(targetsize, revlog._srmingapsize)
141 141 # targetsize should not be specified when evaluating delta candidates:
142 142 # * targetsize is used to ensure we stay within specification when reading,
143 143 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
144 144 if densityslicing is None:
145 145 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
146 146 for chunk in densityslicing(
147 147 revs, revlog._srdensitythreshold, revlog._srmingapsize
148 148 ):
149 149 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
150 150 yield subchunk
151 151
152 152
153 153 def _slicechunktosize(revlog, revs, targetsize=None):
154 154 """slice revs to match the target size
155 155
156 156 This is intended to be used on chunk that density slicing selected by that
157 157 are still too large compared to the read garantee of revlog. This might
158 158 happens when "minimal gap size" interrupted the slicing or when chain are
159 159 built in a way that create large blocks next to each other.
160 160
161 161 >>> data = [
162 162 ... 3, #0 (3)
163 163 ... 5, #1 (2)
164 164 ... 6, #2 (1)
165 165 ... 8, #3 (2)
166 166 ... 8, #4 (empty)
167 167 ... 11, #5 (3)
168 168 ... 12, #6 (1)
169 169 ... 13, #7 (1)
170 170 ... 14, #8 (1)
171 171 ... ]
172 172
173 173 == All snapshots cases ==
174 174 >>> revlog = _testrevlog(data, snapshot=range(9))
175 175
176 176 Cases where chunk is already small enough
177 177 >>> list(_slicechunktosize(revlog, [0], 3))
178 178 [[0]]
179 179 >>> list(_slicechunktosize(revlog, [6, 7], 3))
180 180 [[6, 7]]
181 181 >>> list(_slicechunktosize(revlog, [0], None))
182 182 [[0]]
183 183 >>> list(_slicechunktosize(revlog, [6, 7], None))
184 184 [[6, 7]]
185 185
186 186 cases where we need actual slicing
187 187 >>> list(_slicechunktosize(revlog, [0, 1], 3))
188 188 [[0], [1]]
189 189 >>> list(_slicechunktosize(revlog, [1, 3], 3))
190 190 [[1], [3]]
191 191 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
192 192 [[1, 2], [3]]
193 193 >>> list(_slicechunktosize(revlog, [3, 5], 3))
194 194 [[3], [5]]
195 195 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
196 196 [[3], [5]]
197 197 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
198 198 [[5], [6, 7, 8]]
199 199 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
200 200 [[0], [1, 2], [3], [5], [6, 7, 8]]
201 201
202 202 Case with too large individual chunk (must return valid chunk)
203 203 >>> list(_slicechunktosize(revlog, [0, 1], 2))
204 204 [[0], [1]]
205 205 >>> list(_slicechunktosize(revlog, [1, 3], 1))
206 206 [[1], [3]]
207 207 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
208 208 [[3], [5]]
209 209
210 210 == No Snapshot cases ==
211 211 >>> revlog = _testrevlog(data)
212 212
213 213 Cases where chunk is already small enough
214 214 >>> list(_slicechunktosize(revlog, [0], 3))
215 215 [[0]]
216 216 >>> list(_slicechunktosize(revlog, [6, 7], 3))
217 217 [[6, 7]]
218 218 >>> list(_slicechunktosize(revlog, [0], None))
219 219 [[0]]
220 220 >>> list(_slicechunktosize(revlog, [6, 7], None))
221 221 [[6, 7]]
222 222
223 223 cases where we need actual slicing
224 224 >>> list(_slicechunktosize(revlog, [0, 1], 3))
225 225 [[0], [1]]
226 226 >>> list(_slicechunktosize(revlog, [1, 3], 3))
227 227 [[1], [3]]
228 228 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
229 229 [[1], [2, 3]]
230 230 >>> list(_slicechunktosize(revlog, [3, 5], 3))
231 231 [[3], [5]]
232 232 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
233 233 [[3], [4, 5]]
234 234 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
235 235 [[5], [6, 7, 8]]
236 236 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
237 237 [[0], [1, 2], [3], [5], [6, 7, 8]]
238 238
239 239 Case with too large individual chunk (must return valid chunk)
240 240 >>> list(_slicechunktosize(revlog, [0, 1], 2))
241 241 [[0], [1]]
242 242 >>> list(_slicechunktosize(revlog, [1, 3], 1))
243 243 [[1], [3]]
244 244 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
245 245 [[3], [5]]
246 246
247 247 == mixed case ==
248 248 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
249 249 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
250 250 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
251 251 """
252 252 assert targetsize is None or 0 <= targetsize
253 253 startdata = revlog.start(revs[0])
254 254 enddata = revlog.end(revs[-1])
255 255 fullspan = enddata - startdata
256 256 if targetsize is None or fullspan <= targetsize:
257 257 yield revs
258 258 return
259 259
260 260 startrevidx = 0
261 261 endrevidx = 1
262 262 iterrevs = enumerate(revs)
263 263 next(iterrevs) # skip first rev.
264 264 # first step: get snapshots out of the way
265 265 for idx, r in iterrevs:
266 266 span = revlog.end(r) - startdata
267 267 snapshot = revlog.issnapshot(r)
268 268 if span <= targetsize and snapshot:
269 269 endrevidx = idx + 1
270 270 else:
271 271 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
272 272 if chunk:
273 273 yield chunk
274 274 startrevidx = idx
275 275 startdata = revlog.start(r)
276 276 endrevidx = idx + 1
277 277 if not snapshot:
278 278 break
279 279
280 280 # for the others, we use binary slicing to quickly converge toward valid
281 281 # chunks (otherwise, we might end up looking for start/end of many
282 282 # revisions). This logic is not looking for the perfect slicing point, it
283 283 # focuses on quickly converging toward valid chunks.
284 284 nbitem = len(revs)
285 285 while (enddata - startdata) > targetsize:
286 286 endrevidx = nbitem
287 287 if nbitem - startrevidx <= 1:
288 288 break # protect against individual chunk larger than limit
289 289 localenddata = revlog.end(revs[endrevidx - 1])
290 290 span = localenddata - startdata
291 291 while span > targetsize:
292 292 if endrevidx - startrevidx <= 1:
293 293 break # protect against individual chunk larger than limit
294 294 endrevidx -= (endrevidx - startrevidx) // 2
295 295 localenddata = revlog.end(revs[endrevidx - 1])
296 296 span = localenddata - startdata
297 297 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
298 298 if chunk:
299 299 yield chunk
300 300 startrevidx = endrevidx
301 301 startdata = revlog.start(revs[startrevidx])
302 302
303 303 chunk = _trimchunk(revlog, revs, startrevidx)
304 304 if chunk:
305 305 yield chunk
306 306
307 307
308 308 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
309 309 """slice revs to reduce the amount of unrelated data to be read from disk.
310 310
311 311 ``revs`` is sliced into groups that should be read in one time.
312 312 Assume that revs are sorted.
313 313
314 314 The initial chunk is sliced until the overall density (payload/chunks-span
315 315 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
316 316 skipped.
317 317
318 318 >>> revlog = _testrevlog([
319 319 ... 5, #00 (5)
320 320 ... 10, #01 (5)
321 321 ... 12, #02 (2)
322 322 ... 12, #03 (empty)
323 323 ... 27, #04 (15)
324 324 ... 31, #05 (4)
325 325 ... 31, #06 (empty)
326 326 ... 42, #07 (11)
327 327 ... 47, #08 (5)
328 328 ... 47, #09 (empty)
329 329 ... 48, #10 (1)
330 330 ... 51, #11 (3)
331 331 ... 74, #12 (23)
332 332 ... 85, #13 (11)
333 333 ... 86, #14 (1)
334 334 ... 91, #15 (5)
335 335 ... ])
336 336
337 337 >>> list(_slicechunktodensity(revlog, list(range(16))))
338 338 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
339 339 >>> list(_slicechunktodensity(revlog, [0, 15]))
340 340 [[0], [15]]
341 341 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
342 342 [[0], [11], [15]]
343 343 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
344 344 [[0], [11, 13, 15]]
345 345 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
346 346 [[1, 2], [5, 8, 10, 11], [14]]
347 347 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
348 348 ... mingapsize=20))
349 349 [[1, 2, 3, 5, 8, 10, 11], [14]]
350 350 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
351 351 ... targetdensity=0.95))
352 352 [[1, 2], [5], [8, 10, 11], [14]]
353 353 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
354 354 ... targetdensity=0.95, mingapsize=12))
355 355 [[1, 2], [5, 8, 10, 11], [14]]
356 356 """
357 357 start = revlog.start
358 358 length = revlog.length
359 359
360 360 if len(revs) <= 1:
361 361 yield revs
362 362 return
363 363
364 364 deltachainspan = segmentspan(revlog, revs)
365 365
366 366 if deltachainspan < mingapsize:
367 367 yield revs
368 368 return
369 369
370 370 readdata = deltachainspan
371 371 chainpayload = sum(length(r) for r in revs)
372 372
373 373 if deltachainspan:
374 374 density = chainpayload / float(deltachainspan)
375 375 else:
376 376 density = 1.0
377 377
378 378 if density >= targetdensity:
379 379 yield revs
380 380 return
381 381
382 382 # Store the gaps in a heap to have them sorted by decreasing size
383 383 gaps = []
384 384 prevend = None
385 385 for i, rev in enumerate(revs):
386 386 revstart = start(rev)
387 387 revlen = length(rev)
388 388
389 389 # Skip empty revisions to form larger holes
390 390 if revlen == 0:
391 391 continue
392 392
393 393 if prevend is not None:
394 394 gapsize = revstart - prevend
395 395 # only consider holes that are large enough
396 396 if gapsize > mingapsize:
397 397 gaps.append((gapsize, i))
398 398
399 399 prevend = revstart + revlen
400 400 # sort the gaps to pop them from largest to small
401 401 gaps.sort()
402 402
403 403 # Collect the indices of the largest holes until the density is acceptable
404 404 selected = []
405 405 while gaps and density < targetdensity:
406 406 gapsize, gapidx = gaps.pop()
407 407
408 408 selected.append(gapidx)
409 409
410 410 # the gap sizes are stored as negatives to be sorted decreasingly
411 411 # by the heap
412 412 readdata -= gapsize
413 413 if readdata > 0:
414 414 density = chainpayload / float(readdata)
415 415 else:
416 416 density = 1.0
417 417 selected.sort()
418 418
419 419 # Cut the revs at collected indices
420 420 previdx = 0
421 421 for idx in selected:
422 422
423 423 chunk = _trimchunk(revlog, revs, previdx, idx)
424 424 if chunk:
425 425 yield chunk
426 426
427 427 previdx = idx
428 428
429 429 chunk = _trimchunk(revlog, revs, previdx)
430 430 if chunk:
431 431 yield chunk
432 432
433 433
434 434 def _trimchunk(revlog, revs, startidx, endidx=None):
435 435 """returns revs[startidx:endidx] without empty trailing revs
436 436
437 437 Doctest Setup
438 438 >>> revlog = _testrevlog([
439 439 ... 5, #0
440 440 ... 10, #1
441 441 ... 12, #2
442 442 ... 12, #3 (empty)
443 443 ... 17, #4
444 444 ... 21, #5
445 445 ... 21, #6 (empty)
446 446 ... ])
447 447
448 448 Contiguous cases:
449 449 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
450 450 [0, 1, 2, 3, 4, 5]
451 451 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
452 452 [0, 1, 2, 3, 4]
453 453 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
454 454 [0, 1, 2]
455 455 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
456 456 [2]
457 457 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
458 458 [3, 4, 5]
459 459 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
460 460 [3, 4]
461 461
462 462 Discontiguous cases:
463 463 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
464 464 [1, 3, 5]
465 465 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
466 466 [1]
467 467 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
468 468 [3, 5]
469 469 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
470 470 [3, 5]
471 471 """
472 472 length = revlog.length
473 473
474 474 if endidx is None:
475 475 endidx = len(revs)
476 476
477 477 # If we have a non-emtpy delta candidate, there are nothing to trim
478 478 if revs[endidx - 1] < len(revlog):
479 479 # Trim empty revs at the end, except the very first revision of a chain
480 480 while (
481 481 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
482 482 ):
483 483 endidx -= 1
484 484
485 485 return revs[startidx:endidx]
486 486
487 487
488 488 def segmentspan(revlog, revs):
489 489 """Get the byte span of a segment of revisions
490 490
491 491 revs is a sorted array of revision numbers
492 492
493 493 >>> revlog = _testrevlog([
494 494 ... 5, #0
495 495 ... 10, #1
496 496 ... 12, #2
497 497 ... 12, #3 (empty)
498 498 ... 17, #4
499 499 ... ])
500 500
501 501 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
502 502 17
503 503 >>> segmentspan(revlog, [0, 4])
504 504 17
505 505 >>> segmentspan(revlog, [3, 4])
506 506 5
507 507 >>> segmentspan(revlog, [1, 2, 3,])
508 508 7
509 509 >>> segmentspan(revlog, [1, 3])
510 510 7
511 511 """
512 512 if not revs:
513 513 return 0
514 514 end = revlog.end(revs[-1])
515 515 return end - revlog.start(revs[0])
516 516
517 517
518 518 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
519 519 """build full text from a (base, delta) pair and other metadata"""
520 520 # special case deltas which replace entire base; no need to decode
521 521 # base revision. this neatly avoids censored bases, which throw when
522 522 # they're decoded.
523 523 hlen = struct.calcsize(b">lll")
524 524 if delta[:hlen] == mdiff.replacediffheader(
525 525 revlog.rawsize(baserev), len(delta) - hlen
526 526 ):
527 527 fulltext = delta[hlen:]
528 528 else:
529 529 # deltabase is rawtext before changed by flag processors, which is
530 530 # equivalent to non-raw text
531 531 basetext = revlog.revision(baserev, _df=fh)
532 532 fulltext = mdiff.patch(basetext, delta)
533 533
534 534 try:
535 535 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
536 536 if validatehash:
537 537 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
538 538 if flags & REVIDX_ISCENSORED:
539 539 raise error.StorageError(
540 540 _(b'node %s is not censored') % expectednode
541 541 )
542 542 except error.CensoredNodeError:
543 543 # must pass the censored index flag to add censored revisions
544 544 if not flags & REVIDX_ISCENSORED:
545 545 raise
546 546 return fulltext
547 547
548 548
549 549 @attr.s(slots=True, frozen=True)
550 550 class _deltainfo:
551 551 distance = attr.ib()
552 552 deltalen = attr.ib()
553 553 data = attr.ib()
554 554 base = attr.ib()
555 555 chainbase = attr.ib()
556 556 chainlen = attr.ib()
557 557 compresseddeltalen = attr.ib()
558 558 snapshotdepth = attr.ib()
559 559
560 560
561 561 def drop_u_compression(delta):
562 562 """turn into a "u" (no-compression) into no-compression without header
563 563
564 564 This is useful for revlog format that has better compression method.
565 565 """
566 566 assert delta.data[0] == b'u', delta.data[0]
567 567 return _deltainfo(
568 568 delta.distance,
569 569 delta.deltalen - 1,
570 570 (b'', delta.data[1]),
571 571 delta.base,
572 572 delta.chainbase,
573 573 delta.chainlen,
574 574 delta.compresseddeltalen,
575 575 delta.snapshotdepth,
576 576 )
577 577
578 578
579 579 def isgooddeltainfo(revlog, deltainfo, revinfo):
580 580 """Returns True if the given delta is good. Good means that it is within
581 581 the disk span, disk size, and chain length bounds that we know to be
582 582 performant."""
583 583 if deltainfo is None:
584 584 return False
585 585
586 586 # - 'deltainfo.distance' is the distance from the base revision --
587 587 # bounding it limits the amount of I/O we need to do.
588 588 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
589 589 # deltas we need to apply -- bounding it limits the amount of CPU
590 590 # we consume.
591 591
592 592 textlen = revinfo.textlen
593 593 defaultmax = textlen * 4
594 594 maxdist = revlog._maxdeltachainspan
595 595 if not maxdist:
596 596 maxdist = deltainfo.distance # ensure the conditional pass
597 597 maxdist = max(maxdist, defaultmax)
598 598
599 599 # Bad delta from read span:
600 600 #
601 601 # If the span of data read is larger than the maximum allowed.
602 602 #
603 603 # In the sparse-revlog case, we rely on the associated "sparse reading"
604 604 # to avoid issue related to the span of data. In theory, it would be
605 605 # possible to build pathological revlog where delta pattern would lead
606 606 # to too many reads. However, they do not happen in practice at all. So
607 607 # we skip the span check entirely.
608 608 if not revlog._sparserevlog and maxdist < deltainfo.distance:
609 609 return False
610 610
611 611 # Bad delta from new delta size:
612 612 #
613 613 # If the delta size is larger than the target text, storing the
614 614 # delta will be inefficient.
615 615 if textlen < deltainfo.deltalen:
616 616 return False
617 617
618 618 # Bad delta from cumulated payload size:
619 619 #
620 620 # If the sum of delta get larger than K * target text length.
621 621 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
622 622 return False
623 623
624 624 # Bad delta from chain length:
625 625 #
626 626 # If the number of delta in the chain gets too high.
627 627 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
628 628 return False
629 629
630 630 # bad delta from intermediate snapshot size limit
631 631 #
632 632 # If an intermediate snapshot size is higher than the limit. The
633 633 # limit exist to prevent endless chain of intermediate delta to be
634 634 # created.
635 635 if (
636 636 deltainfo.snapshotdepth is not None
637 637 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
638 638 ):
639 639 return False
640 640
641 641 # bad delta if new intermediate snapshot is larger than the previous
642 642 # snapshot
643 643 if (
644 644 deltainfo.snapshotdepth
645 645 and revlog.length(deltainfo.base) < deltainfo.deltalen
646 646 ):
647 647 return False
648 648
649 649 return True
650 650
651 651
652 652 # If a revision's full text is that much bigger than a base candidate full
653 653 # text's, it is very unlikely that it will produce a valid delta. We no longer
654 654 # consider these candidates.
655 655 LIMIT_BASE2TEXT = 500
656 656
657 657
658 658 def _candidategroups(
659 659 revlog,
660 660 textlen,
661 661 p1,
662 662 p2,
663 663 cachedelta,
664 664 excluded_bases=None,
665 665 target_rev=None,
666 666 ):
667 667 """Provides group of revision to be tested as delta base
668 668
669 669 This top level function focus on emitting groups with unique and worthwhile
670 670 content. See _raw_candidate_groups for details about the group order.
671 671 """
672 672 # should we try to build a delta?
673 673 if not (len(revlog) and revlog._storedeltachains):
674 674 yield None
675 675 return
676 676
677 677 deltalength = revlog.length
678 678 deltaparent = revlog.deltaparent
679 679 sparse = revlog._sparserevlog
680 680 good = None
681 681
682 682 deltas_limit = textlen * LIMIT_DELTA2TEXT
683 683
684 684 tested = {nullrev}
685 685 candidates = _refinedgroups(
686 686 revlog,
687 687 p1,
688 688 p2,
689 689 cachedelta,
690 690 )
691 691 while True:
692 692 temptative = candidates.send(good)
693 693 if temptative is None:
694 694 break
695 695 group = []
696 696 for rev in temptative:
697 697 # skip over empty delta (no need to include them in a chain)
698 698 while revlog._generaldelta and not (
699 699 rev == nullrev or rev in tested or deltalength(rev)
700 700 ):
701 701 tested.add(rev)
702 702 rev = deltaparent(rev)
703 703 # no need to try a delta against nullrev, this will be done as a
704 704 # last resort.
705 705 if rev == nullrev:
706 706 continue
707 707 # filter out revision we tested already
708 708 if rev in tested:
709 709 continue
710 tested.add(rev)
711 710 # an higher authority deamed the base unworthy (e.g. censored)
712 711 if excluded_bases is not None and rev in excluded_bases:
712 tested.add(rev)
713 713 continue
714 714 # We are in some recomputation cases and that rev is too high in
715 715 # the revlog
716 716 if target_rev is not None and rev >= target_rev:
717 tested.add(rev)
717 718 continue
718 719 # filter out delta base that will never produce good delta
719 720 if deltas_limit < revlog.length(rev):
721 tested.add(rev)
720 722 continue
721 723 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
724 tested.add(rev)
722 725 continue
723 726 # no delta for rawtext-changing revs (see "candelta" for why)
724 727 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
728 tested.add(rev)
725 729 continue
726 730
727 731 # If we reach here, we are about to build and test a delta.
728 732 # The delta building process will compute the chaininfo in all
729 733 # case, since that computation is cached, it is fine to access it
730 734 # here too.
731 735 chainlen, chainsize = revlog._chaininfo(rev)
732 736 # if chain will be too long, skip base
733 737 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
738 tested.add(rev)
734 739 continue
735 740 # if chain already have too much data, skip base
736 741 if deltas_limit < chainsize:
742 tested.add(rev)
737 743 continue
738 744 if sparse and revlog.upperboundcomp is not None:
739 745 maxcomp = revlog.upperboundcomp
740 746 basenotsnap = (p1, p2, nullrev)
741 747 if rev not in basenotsnap and revlog.issnapshot(rev):
742 748 snapshotdepth = revlog.snapshotdepth(rev)
743 749 # If text is significantly larger than the base, we can
744 750 # expect the resulting delta to be proportional to the size
745 751 # difference
746 752 revsize = revlog.rawsize(rev)
747 753 rawsizedistance = max(textlen - revsize, 0)
748 754 # use an estimate of the compression upper bound.
749 755 lowestrealisticdeltalen = rawsizedistance // maxcomp
750 756
751 757 # check the absolute constraint on the delta size
752 758 snapshotlimit = textlen >> snapshotdepth
753 759 if snapshotlimit < lowestrealisticdeltalen:
754 760 # delta lower bound is larger than accepted upper bound
761 tested.add(rev)
755 762 continue
756 763
757 764 # check the relative constraint on the delta size
758 765 revlength = revlog.length(rev)
759 766 if revlength < lowestrealisticdeltalen:
760 767 # delta probable lower bound is larger than target base
768 tested.add(rev)
761 769 continue
762 770
763 771 group.append(rev)
764 772 if group:
765 773 # XXX: in the sparse revlog case, group can become large,
766 774 # impacting performances. Some bounding or slicing mecanism
767 775 # would help to reduce this impact.
776 tested.update(group)
768 777 good = yield tuple(group)
769 778 yield None
770 779
771 780
772 781 def _findsnapshots(revlog, cache, start_rev):
773 782 """find snapshot from start_rev to tip"""
774 783 if util.safehasattr(revlog.index, b'findsnapshots'):
775 784 revlog.index.findsnapshots(cache, start_rev)
776 785 else:
777 786 deltaparent = revlog.deltaparent
778 787 issnapshot = revlog.issnapshot
779 788 for rev in revlog.revs(start_rev):
780 789 if issnapshot(rev):
781 790 cache[deltaparent(rev)].append(rev)
782 791
783 792
784 793 def _refinedgroups(revlog, p1, p2, cachedelta):
785 794 good = None
786 795 # First we try to reuse a the delta contained in the bundle.
787 796 # (or from the source revlog)
788 797 #
789 798 # This logic only applies to general delta repositories and can be disabled
790 799 # through configuration. Disabling reuse source delta is useful when
791 800 # we want to make sure we recomputed "optimal" deltas.
792 801 debug_info = None
793 802 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
794 803 # Assume what we received from the server is a good choice
795 804 # build delta will reuse the cache
796 805 if debug_info is not None:
797 806 debug_info['cached-delta.tested'] += 1
798 807 good = yield (cachedelta[0],)
799 808 if good is not None:
800 809 if debug_info is not None:
801 810 debug_info['cached-delta.accepted'] += 1
802 811 yield None
803 812 return
804 813 # XXX cache me higher
805 814 snapshots = collections.defaultdict(list)
806 815 groups = _rawgroups(
807 816 revlog,
808 817 p1,
809 818 p2,
810 819 cachedelta,
811 820 snapshots,
812 821 )
813 822 for candidates in groups:
814 823 good = yield candidates
815 824 if good is not None:
816 825 break
817 826
818 827 # If sparse revlog is enabled, we can try to refine the available deltas
819 828 if not revlog._sparserevlog:
820 829 yield None
821 830 return
822 831
823 832 # if we have a refinable value, try to refine it
824 833 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
825 834 # refine snapshot down
826 835 previous = None
827 836 while previous != good:
828 837 previous = good
829 838 base = revlog.deltaparent(good)
830 839 if base == nullrev:
831 840 break
832 841 good = yield (base,)
833 842 # refine snapshot up
834 843 if not snapshots:
835 844 _findsnapshots(revlog, snapshots, good + 1)
836 845 previous = None
837 846 while good != previous:
838 847 previous = good
839 848 children = tuple(sorted(c for c in snapshots[good]))
840 849 good = yield children
841 850
842 851 if debug_info is not None:
843 852 if good is None:
844 853 debug_info['no-solution'] += 1
845 854
846 855 yield None
847 856
848 857
849 858 def _rawgroups(revlog, p1, p2, cachedelta, snapshots=None):
850 859 """Provides group of revision to be tested as delta base
851 860
852 861 This lower level function focus on emitting delta theorically interresting
853 862 without looking it any practical details.
854 863
855 864 The group order aims at providing fast or small candidates first.
856 865 """
857 866 gdelta = revlog._generaldelta
858 867 # gate sparse behind general-delta because of issue6056
859 868 sparse = gdelta and revlog._sparserevlog
860 869 curr = len(revlog)
861 870 prev = curr - 1
862 871 deltachain = lambda rev: revlog._deltachain(rev)[0]
863 872
864 873 if gdelta:
865 874 # exclude already lazy tested base if any
866 875 parents = [p for p in (p1, p2) if p != nullrev]
867 876
868 877 if not revlog._deltabothparents and len(parents) == 2:
869 878 parents.sort()
870 879 # To minimize the chance of having to build a fulltext,
871 880 # pick first whichever parent is closest to us (max rev)
872 881 yield (parents[1],)
873 882 # then the other one (min rev) if the first did not fit
874 883 yield (parents[0],)
875 884 elif len(parents) > 0:
876 885 # Test all parents (1 or 2), and keep the best candidate
877 886 yield parents
878 887
879 888 if sparse and parents:
880 889 if snapshots is None:
881 890 # map: base-rev: [snapshot-revs]
882 891 snapshots = collections.defaultdict(list)
883 892 # See if we can use an existing snapshot in the parent chains to use as
884 893 # a base for a new intermediate-snapshot
885 894 #
886 895 # search for snapshot in parents delta chain
887 896 # map: snapshot-level: snapshot-rev
888 897 parents_snaps = collections.defaultdict(set)
889 898 candidate_chains = [deltachain(p) for p in parents]
890 899 for chain in candidate_chains:
891 900 for idx, s in enumerate(chain):
892 901 if not revlog.issnapshot(s):
893 902 break
894 903 parents_snaps[idx].add(s)
895 904 snapfloor = min(parents_snaps[0]) + 1
896 905 _findsnapshots(revlog, snapshots, snapfloor)
897 906 # search for the highest "unrelated" revision
898 907 #
899 908 # Adding snapshots used by "unrelated" revision increase the odd we
900 909 # reuse an independant, yet better snapshot chain.
901 910 #
902 911 # XXX instead of building a set of revisions, we could lazily enumerate
903 912 # over the chains. That would be more efficient, however we stick to
904 913 # simple code for now.
905 914 all_revs = set()
906 915 for chain in candidate_chains:
907 916 all_revs.update(chain)
908 917 other = None
909 918 for r in revlog.revs(prev, snapfloor):
910 919 if r not in all_revs:
911 920 other = r
912 921 break
913 922 if other is not None:
914 923 # To avoid unfair competition, we won't use unrelated intermediate
915 924 # snapshot that are deeper than the ones from the parent delta
916 925 # chain.
917 926 max_depth = max(parents_snaps.keys())
918 927 chain = deltachain(other)
919 928 for depth, s in enumerate(chain):
920 929 if s < snapfloor:
921 930 continue
922 931 if max_depth < depth:
923 932 break
924 933 if not revlog.issnapshot(s):
925 934 break
926 935 parents_snaps[depth].add(s)
927 936 # Test them as possible intermediate snapshot base
928 937 # We test them from highest to lowest level. High level one are more
929 938 # likely to result in small delta
930 939 floor = None
931 940 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
932 941 siblings = set()
933 942 for s in snaps:
934 943 siblings.update(snapshots[s])
935 944 # Before considering making a new intermediate snapshot, we check
936 945 # if an existing snapshot, children of base we consider, would be
937 946 # suitable.
938 947 #
939 948 # It give a change to reuse a delta chain "unrelated" to the
940 949 # current revision instead of starting our own. Without such
941 950 # re-use, topological branches would keep reopening new chains.
942 951 # Creating more and more snapshot as the repository grow.
943 952
944 953 if floor is not None:
945 954 # We only do this for siblings created after the one in our
946 955 # parent's delta chain. Those created before has less chances
947 956 # to be valid base since our ancestors had to create a new
948 957 # snapshot.
949 958 siblings = [r for r in siblings if floor < r]
950 959 yield tuple(sorted(siblings))
951 960 # then test the base from our parent's delta chain.
952 961 yield tuple(sorted(snaps))
953 962 floor = min(snaps)
954 963 # No suitable base found in the parent chain, search if any full
955 964 # snapshots emitted since parent's base would be a suitable base for an
956 965 # intermediate snapshot.
957 966 #
958 967 # It give a chance to reuse a delta chain unrelated to the current
959 968 # revisions instead of starting our own. Without such re-use,
960 969 # topological branches would keep reopening new full chains. Creating
961 970 # more and more snapshot as the repository grow.
962 971 yield tuple(snapshots[nullrev])
963 972
964 973 if not sparse:
965 974 # other approach failed try against prev to hopefully save us a
966 975 # fulltext.
967 976 yield (prev,)
968 977
969 978
970 979 class deltacomputer:
971 980 def __init__(
972 981 self,
973 982 revlog,
974 983 write_debug=None,
975 984 debug_search=False,
976 985 debug_info=None,
977 986 ):
978 987 self.revlog = revlog
979 988 self._write_debug = write_debug
980 989 self._debug_search = debug_search
981 990 self._debug_info = debug_info
982 991
983 992 def buildtext(self, revinfo, fh):
984 993 """Builds a fulltext version of a revision
985 994
986 995 revinfo: revisioninfo instance that contains all needed info
987 996 fh: file handle to either the .i or the .d revlog file,
988 997 depending on whether it is inlined or not
989 998 """
990 999 btext = revinfo.btext
991 1000 if btext[0] is not None:
992 1001 return btext[0]
993 1002
994 1003 revlog = self.revlog
995 1004 cachedelta = revinfo.cachedelta
996 1005 baserev = cachedelta[0]
997 1006 delta = cachedelta[1]
998 1007
999 1008 fulltext = btext[0] = _textfromdelta(
1000 1009 fh,
1001 1010 revlog,
1002 1011 baserev,
1003 1012 delta,
1004 1013 revinfo.p1,
1005 1014 revinfo.p2,
1006 1015 revinfo.flags,
1007 1016 revinfo.node,
1008 1017 )
1009 1018 return fulltext
1010 1019
1011 1020 def _builddeltadiff(self, base, revinfo, fh):
1012 1021 revlog = self.revlog
1013 1022 t = self.buildtext(revinfo, fh)
1014 1023 if revlog.iscensored(base):
1015 1024 # deltas based on a censored revision must replace the
1016 1025 # full content in one patch, so delta works everywhere
1017 1026 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1018 1027 delta = header + t
1019 1028 else:
1020 1029 ptext = revlog.rawdata(base, _df=fh)
1021 1030 delta = mdiff.textdiff(ptext, t)
1022 1031
1023 1032 return delta
1024 1033
1025 1034 def _builddeltainfo(self, revinfo, base, fh):
1026 1035 # can we use the cached delta?
1027 1036 revlog = self.revlog
1028 1037 debug_search = self._write_debug is not None and self._debug_search
1029 1038 chainbase = revlog.chainbase(base)
1030 1039 if revlog._generaldelta:
1031 1040 deltabase = base
1032 1041 else:
1033 1042 deltabase = chainbase
1034 1043 snapshotdepth = None
1035 1044 if revlog._sparserevlog and deltabase == nullrev:
1036 1045 snapshotdepth = 0
1037 1046 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
1038 1047 # A delta chain should always be one full snapshot,
1039 1048 # zero or more semi-snapshots, and zero or more deltas
1040 1049 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1041 1050 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1042 1051 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1043 1052 delta = None
1044 1053 if revinfo.cachedelta:
1045 1054 cachebase, cachediff = revinfo.cachedelta
1046 1055 # check if the diff still apply
1047 1056 currentbase = cachebase
1048 1057 while (
1049 1058 currentbase != nullrev
1050 1059 and currentbase != base
1051 1060 and self.revlog.length(currentbase) == 0
1052 1061 ):
1053 1062 currentbase = self.revlog.deltaparent(currentbase)
1054 1063 if self.revlog._lazydelta and currentbase == base:
1055 1064 delta = revinfo.cachedelta[1]
1056 1065 if delta is None:
1057 1066 delta = self._builddeltadiff(base, revinfo, fh)
1058 1067 if debug_search:
1059 1068 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1060 1069 msg %= len(delta)
1061 1070 self._write_debug(msg)
1062 1071 # snapshotdept need to be neither None nor 0 level snapshot
1063 1072 if revlog.upperboundcomp is not None and snapshotdepth:
1064 1073 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
1065 1074 snapshotlimit = revinfo.textlen >> snapshotdepth
1066 1075 if debug_search:
1067 1076 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1068 1077 msg %= lowestrealisticdeltalen
1069 1078 self._write_debug(msg)
1070 1079 if snapshotlimit < lowestrealisticdeltalen:
1071 1080 if debug_search:
1072 1081 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1073 1082 self._write_debug(msg)
1074 1083 return None
1075 1084 if revlog.length(base) < lowestrealisticdeltalen:
1076 1085 if debug_search:
1077 1086 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1078 1087 self._write_debug(msg)
1079 1088 return None
1080 1089 header, data = revlog.compress(delta)
1081 1090 deltalen = len(header) + len(data)
1082 1091 offset = revlog.end(len(revlog) - 1)
1083 1092 dist = deltalen + offset - revlog.start(chainbase)
1084 1093 chainlen, compresseddeltalen = revlog._chaininfo(base)
1085 1094 chainlen += 1
1086 1095 compresseddeltalen += deltalen
1087 1096
1088 1097 return _deltainfo(
1089 1098 dist,
1090 1099 deltalen,
1091 1100 (header, data),
1092 1101 deltabase,
1093 1102 chainbase,
1094 1103 chainlen,
1095 1104 compresseddeltalen,
1096 1105 snapshotdepth,
1097 1106 )
1098 1107
1099 1108 def _fullsnapshotinfo(self, fh, revinfo, curr):
1100 1109 rawtext = self.buildtext(revinfo, fh)
1101 1110 data = self.revlog.compress(rawtext)
1102 1111 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1103 1112 deltabase = chainbase = curr
1104 1113 snapshotdepth = 0
1105 1114 chainlen = 1
1106 1115
1107 1116 return _deltainfo(
1108 1117 dist,
1109 1118 deltalen,
1110 1119 data,
1111 1120 deltabase,
1112 1121 chainbase,
1113 1122 chainlen,
1114 1123 compresseddeltalen,
1115 1124 snapshotdepth,
1116 1125 )
1117 1126
1118 1127 def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):
1119 1128 """Find an acceptable delta against a candidate revision
1120 1129
1121 1130 revinfo: information about the revision (instance of _revisioninfo)
1122 1131 fh: file handle to either the .i or the .d revlog file,
1123 1132 depending on whether it is inlined or not
1124 1133
1125 1134 Returns the first acceptable candidate revision, as ordered by
1126 1135 _candidategroups
1127 1136
1128 1137 If no suitable deltabase is found, we return delta info for a full
1129 1138 snapshot.
1130 1139
1131 1140 `excluded_bases` is an optional set of revision that cannot be used as
1132 1141 a delta base. Use this to recompute delta suitable in censor or strip
1133 1142 context.
1134 1143 """
1135 1144 if target_rev is None:
1136 1145 target_rev = len(self.revlog)
1137 1146
1138 1147 if not revinfo.textlen:
1139 1148 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1140 1149
1141 1150 if excluded_bases is None:
1142 1151 excluded_bases = set()
1143 1152
1144 1153 # no delta for flag processor revision (see "candelta" for why)
1145 1154 # not calling candelta since only one revision needs test, also to
1146 1155 # avoid overhead fetching flags again.
1147 1156 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1148 1157 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1149 1158
1150 1159 gather_debug = (
1151 1160 self._write_debug is not None or self._debug_info is not None
1152 1161 )
1153 1162 debug_search = self._write_debug is not None and self._debug_search
1154 1163
1155 1164 if gather_debug:
1156 1165 start = util.timer()
1157 1166
1158 1167 # count the number of different delta we tried (for debug purpose)
1159 1168 dbg_try_count = 0
1160 1169 # count the number of "search round" we did. (for debug purpose)
1161 1170 dbg_try_rounds = 0
1162 1171 dbg_type = b'unknown'
1163 1172
1164 1173 cachedelta = revinfo.cachedelta
1165 1174 p1 = revinfo.p1
1166 1175 p2 = revinfo.p2
1167 1176 revlog = self.revlog
1168 1177
1169 1178 deltainfo = None
1170 1179 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
1171 1180
1172 1181 if gather_debug:
1173 1182 if p1r != nullrev:
1174 1183 p1_chain_len = revlog._chaininfo(p1r)[0]
1175 1184 else:
1176 1185 p1_chain_len = -1
1177 1186 if p2r != nullrev:
1178 1187 p2_chain_len = revlog._chaininfo(p2r)[0]
1179 1188 else:
1180 1189 p2_chain_len = -1
1181 1190 if debug_search:
1182 1191 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1183 1192 msg %= target_rev
1184 1193 self._write_debug(msg)
1185 1194
1186 1195 groups = _candidategroups(
1187 1196 self.revlog,
1188 1197 revinfo.textlen,
1189 1198 p1r,
1190 1199 p2r,
1191 1200 cachedelta,
1192 1201 excluded_bases,
1193 1202 target_rev,
1194 1203 )
1195 1204 candidaterevs = next(groups)
1196 1205 while candidaterevs is not None:
1197 1206 dbg_try_rounds += 1
1198 1207 if debug_search:
1199 1208 prev = None
1200 1209 if deltainfo is not None:
1201 1210 prev = deltainfo.base
1202 1211
1203 1212 if (
1204 1213 cachedelta is not None
1205 1214 and len(candidaterevs) == 1
1206 1215 and cachedelta[0] in candidaterevs
1207 1216 ):
1208 1217 round_type = b"cached-delta"
1209 1218 elif p1 in candidaterevs or p2 in candidaterevs:
1210 1219 round_type = b"parents"
1211 1220 elif prev is not None and all(c < prev for c in candidaterevs):
1212 1221 round_type = b"refine-down"
1213 1222 elif prev is not None and all(c > prev for c in candidaterevs):
1214 1223 round_type = b"refine-up"
1215 1224 else:
1216 1225 round_type = b"search-down"
1217 1226 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1218 1227 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1219 1228 self._write_debug(msg)
1220 1229 nominateddeltas = []
1221 1230 if deltainfo is not None:
1222 1231 if debug_search:
1223 1232 msg = (
1224 1233 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1225 1234 )
1226 1235 msg %= (deltainfo.base, deltainfo.deltalen)
1227 1236 self._write_debug(msg)
1228 1237 # if we already found a good delta,
1229 1238 # challenge it against refined candidates
1230 1239 nominateddeltas.append(deltainfo)
1231 1240 for candidaterev in candidaterevs:
1232 1241 if debug_search:
1233 1242 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1234 1243 msg %= candidaterev
1235 1244 self._write_debug(msg)
1236 1245 candidate_type = None
1237 1246 if candidaterev == p1:
1238 1247 candidate_type = b"p1"
1239 1248 elif candidaterev == p2:
1240 1249 candidate_type = b"p2"
1241 1250 elif self.revlog.issnapshot(candidaterev):
1242 1251 candidate_type = b"snapshot-%d"
1243 1252 candidate_type %= self.revlog.snapshotdepth(
1244 1253 candidaterev
1245 1254 )
1246 1255
1247 1256 if candidate_type is not None:
1248 1257 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1249 1258 msg %= candidate_type
1250 1259 self._write_debug(msg)
1251 1260 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1252 1261 msg %= self.revlog.length(candidaterev)
1253 1262 self._write_debug(msg)
1254 1263 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1255 1264 msg %= self.revlog.deltaparent(candidaterev)
1256 1265 self._write_debug(msg)
1257 1266
1258 1267 dbg_try_count += 1
1259 1268
1260 1269 if debug_search:
1261 1270 delta_start = util.timer()
1262 1271 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
1263 1272 if debug_search:
1264 1273 delta_end = util.timer()
1265 1274 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1266 1275 msg %= delta_end - delta_start
1267 1276 self._write_debug(msg)
1268 1277 if candidatedelta is not None:
1269 1278 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
1270 1279 if debug_search:
1271 1280 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1272 1281 msg %= candidatedelta.deltalen
1273 1282 self._write_debug(msg)
1274 1283 nominateddeltas.append(candidatedelta)
1275 1284 elif debug_search:
1276 1285 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1277 1286 msg %= candidatedelta.deltalen
1278 1287 self._write_debug(msg)
1279 1288 elif debug_search:
1280 1289 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1281 1290 self._write_debug(msg)
1282 1291 if nominateddeltas:
1283 1292 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1284 1293 if deltainfo is not None:
1285 1294 candidaterevs = groups.send(deltainfo.base)
1286 1295 else:
1287 1296 candidaterevs = next(groups)
1288 1297
1289 1298 if deltainfo is None:
1290 1299 dbg_type = b"full"
1291 1300 deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
1292 1301 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1293 1302 dbg_type = b"snapshot"
1294 1303 else:
1295 1304 dbg_type = b"delta"
1296 1305
1297 1306 if gather_debug:
1298 1307 end = util.timer()
1299 1308 used_cached = (
1300 1309 cachedelta is not None
1301 1310 and dbg_try_rounds == 1
1302 1311 and dbg_try_count == 1
1303 1312 and deltainfo.base == cachedelta[0]
1304 1313 )
1305 1314 dbg = {
1306 1315 'duration': end - start,
1307 1316 'revision': target_rev,
1308 1317 'delta-base': deltainfo.base, # pytype: disable=attribute-error
1309 1318 'search_round_count': dbg_try_rounds,
1310 1319 'using-cached-base': used_cached,
1311 1320 'delta_try_count': dbg_try_count,
1312 1321 'type': dbg_type,
1313 1322 'p1-chain-len': p1_chain_len,
1314 1323 'p2-chain-len': p2_chain_len,
1315 1324 }
1316 1325 if (
1317 1326 deltainfo.snapshotdepth # pytype: disable=attribute-error
1318 1327 is not None
1319 1328 ):
1320 1329 dbg[
1321 1330 'snapshot-depth'
1322 1331 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1323 1332 else:
1324 1333 dbg['snapshot-depth'] = 0
1325 1334 target_revlog = b"UNKNOWN"
1326 1335 target_type = self.revlog.target[0]
1327 1336 target_key = self.revlog.target[1]
1328 1337 if target_type == KIND_CHANGELOG:
1329 1338 target_revlog = b'CHANGELOG:'
1330 1339 elif target_type == KIND_MANIFESTLOG:
1331 1340 target_revlog = b'MANIFESTLOG:'
1332 1341 if target_key:
1333 1342 target_revlog += b'%s:' % target_key
1334 1343 elif target_type == KIND_FILELOG:
1335 1344 target_revlog = b'FILELOG:'
1336 1345 if target_key:
1337 1346 target_revlog += b'%s:' % target_key
1338 1347 dbg['target-revlog'] = target_revlog
1339 1348
1340 1349 if self._debug_info is not None:
1341 1350 self._debug_info.append(dbg)
1342 1351
1343 1352 if self._write_debug is not None:
1344 1353 msg = (
1345 1354 b"DBG-DELTAS:"
1346 1355 b" %-12s"
1347 1356 b" rev=%d:"
1348 1357 b" delta-base=%d"
1349 1358 b" is-cached=%d"
1350 1359 b" - search-rounds=%d"
1351 1360 b" try-count=%d"
1352 1361 b" - delta-type=%-6s"
1353 1362 b" snap-depth=%d"
1354 1363 b" - p1-chain-length=%d"
1355 1364 b" p2-chain-length=%d"
1356 1365 b" - duration=%f"
1357 1366 b"\n"
1358 1367 )
1359 1368 msg %= (
1360 1369 dbg["target-revlog"],
1361 1370 dbg["revision"],
1362 1371 dbg["delta-base"],
1363 1372 dbg["using-cached-base"],
1364 1373 dbg["search_round_count"],
1365 1374 dbg["delta_try_count"],
1366 1375 dbg["type"],
1367 1376 dbg["snapshot-depth"],
1368 1377 dbg["p1-chain-len"],
1369 1378 dbg["p2-chain-len"],
1370 1379 dbg["duration"],
1371 1380 )
1372 1381 self._write_debug(msg)
1373 1382 return deltainfo
1374 1383
1375 1384
1376 1385 def delta_compression(default_compression_header, deltainfo):
1377 1386 """return (COMPRESSION_MODE, deltainfo)
1378 1387
1379 1388 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1380 1389 compression.
1381 1390 """
1382 1391 h, d = deltainfo.data
1383 1392 compression_mode = COMP_MODE_INLINE
1384 1393 if not h and not d:
1385 1394 # not data to store at all... declare them uncompressed
1386 1395 compression_mode = COMP_MODE_PLAIN
1387 1396 elif not h:
1388 1397 t = d[0:1]
1389 1398 if t == b'\0':
1390 1399 compression_mode = COMP_MODE_PLAIN
1391 1400 elif t == default_compression_header:
1392 1401 compression_mode = COMP_MODE_DEFAULT
1393 1402 elif h == b'u':
1394 1403 # we have a more efficient way to declare uncompressed
1395 1404 h = b''
1396 1405 compression_mode = COMP_MODE_PLAIN
1397 1406 deltainfo = drop_u_compression(deltainfo)
1398 1407 return compression_mode, deltainfo
General Comments 0
You need to be logged in to leave comments. Login now