##// END OF EJS Templates
delta-find: use a single snapshot cache when applying a group to an object...
marmoute -
r50576:c261a628 default
parent child Browse files
Show More
@@ -1,1472 +1,1476 b''
1 1 # revlogdeltas.py - Logic around delta computation for revlog
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 # Copyright 2018 Octobus <contact@octobus.net>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8 """Helper class to compute deltas stored inside revlogs"""
9 9
10 10
11 11 import collections
12 12 import struct
13 13
14 14 # import stuff from node for others to import from revlog
15 15 from ..node import nullrev
16 16 from ..i18n import _
17 17 from ..pycompat import getattr
18 18
19 19 from .constants import (
20 20 COMP_MODE_DEFAULT,
21 21 COMP_MODE_INLINE,
22 22 COMP_MODE_PLAIN,
23 23 DELTA_BASE_REUSE_NO,
24 24 KIND_CHANGELOG,
25 25 KIND_FILELOG,
26 26 KIND_MANIFESTLOG,
27 27 REVIDX_ISCENSORED,
28 28 REVIDX_RAWTEXT_CHANGING_FLAGS,
29 29 )
30 30
31 31 from ..thirdparty import attr
32 32
33 33 from .. import (
34 34 error,
35 35 mdiff,
36 36 util,
37 37 )
38 38
39 39 from . import flagutil
40 40
41 41 # maximum <delta-chain-data>/<revision-text-length> ratio
42 42 LIMIT_DELTA2TEXT = 2
43 43
44 44
45 45 class _testrevlog:
46 46 """minimalist fake revlog to use in doctests"""
47 47
48 48 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
49 49 """data is an list of revision payload boundaries"""
50 50 self._data = data
51 51 self._srdensitythreshold = density
52 52 self._srmingapsize = mingap
53 53 self._snapshot = set(snapshot)
54 54 self.index = None
55 55
56 56 def start(self, rev):
57 57 if rev == nullrev:
58 58 return 0
59 59 if rev == 0:
60 60 return 0
61 61 return self._data[rev - 1]
62 62
63 63 def end(self, rev):
64 64 if rev == nullrev:
65 65 return 0
66 66 return self._data[rev]
67 67
68 68 def length(self, rev):
69 69 return self.end(rev) - self.start(rev)
70 70
71 71 def __len__(self):
72 72 return len(self._data)
73 73
74 74 def issnapshot(self, rev):
75 75 if rev == nullrev:
76 76 return True
77 77 return rev in self._snapshot
78 78
79 79
80 80 def slicechunk(revlog, revs, targetsize=None):
81 81 """slice revs to reduce the amount of unrelated data to be read from disk.
82 82
83 83 ``revs`` is sliced into groups that should be read in one time.
84 84 Assume that revs are sorted.
85 85
86 86 The initial chunk is sliced until the overall density (payload/chunks-span
87 87 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
88 88 `revlog._srmingapsize` is skipped.
89 89
90 90 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
91 91 For consistency with other slicing choice, this limit won't go lower than
92 92 `revlog._srmingapsize`.
93 93
94 94 If individual revisions chunk are larger than this limit, they will still
95 95 be raised individually.
96 96
97 97 >>> data = [
98 98 ... 5, #00 (5)
99 99 ... 10, #01 (5)
100 100 ... 12, #02 (2)
101 101 ... 12, #03 (empty)
102 102 ... 27, #04 (15)
103 103 ... 31, #05 (4)
104 104 ... 31, #06 (empty)
105 105 ... 42, #07 (11)
106 106 ... 47, #08 (5)
107 107 ... 47, #09 (empty)
108 108 ... 48, #10 (1)
109 109 ... 51, #11 (3)
110 110 ... 74, #12 (23)
111 111 ... 85, #13 (11)
112 112 ... 86, #14 (1)
113 113 ... 91, #15 (5)
114 114 ... ]
115 115 >>> revlog = _testrevlog(data, snapshot=range(16))
116 116
117 117 >>> list(slicechunk(revlog, list(range(16))))
118 118 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
119 119 >>> list(slicechunk(revlog, [0, 15]))
120 120 [[0], [15]]
121 121 >>> list(slicechunk(revlog, [0, 11, 15]))
122 122 [[0], [11], [15]]
123 123 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
124 124 [[0], [11, 13, 15]]
125 125 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
126 126 [[1, 2], [5, 8, 10, 11], [14]]
127 127
128 128 Slicing with a maximum chunk size
129 129 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
130 130 [[0], [11], [13], [15]]
131 131 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
132 132 [[0], [11], [13, 15]]
133 133
134 134 Slicing involving nullrev
135 135 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
136 136 [[-1, 0], [11], [13, 15]]
137 137 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
138 138 [[-1], [13], [15]]
139 139 """
140 140 if targetsize is not None:
141 141 targetsize = max(targetsize, revlog._srmingapsize)
142 142 # targetsize should not be specified when evaluating delta candidates:
143 143 # * targetsize is used to ensure we stay within specification when reading,
144 144 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
145 145 if densityslicing is None:
146 146 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
147 147 for chunk in densityslicing(
148 148 revs, revlog._srdensitythreshold, revlog._srmingapsize
149 149 ):
150 150 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
151 151 yield subchunk
152 152
153 153
154 154 def _slicechunktosize(revlog, revs, targetsize=None):
155 155 """slice revs to match the target size
156 156
157 157 This is intended to be used on chunk that density slicing selected by that
158 158 are still too large compared to the read garantee of revlog. This might
159 159 happens when "minimal gap size" interrupted the slicing or when chain are
160 160 built in a way that create large blocks next to each other.
161 161
162 162 >>> data = [
163 163 ... 3, #0 (3)
164 164 ... 5, #1 (2)
165 165 ... 6, #2 (1)
166 166 ... 8, #3 (2)
167 167 ... 8, #4 (empty)
168 168 ... 11, #5 (3)
169 169 ... 12, #6 (1)
170 170 ... 13, #7 (1)
171 171 ... 14, #8 (1)
172 172 ... ]
173 173
174 174 == All snapshots cases ==
175 175 >>> revlog = _testrevlog(data, snapshot=range(9))
176 176
177 177 Cases where chunk is already small enough
178 178 >>> list(_slicechunktosize(revlog, [0], 3))
179 179 [[0]]
180 180 >>> list(_slicechunktosize(revlog, [6, 7], 3))
181 181 [[6, 7]]
182 182 >>> list(_slicechunktosize(revlog, [0], None))
183 183 [[0]]
184 184 >>> list(_slicechunktosize(revlog, [6, 7], None))
185 185 [[6, 7]]
186 186
187 187 cases where we need actual slicing
188 188 >>> list(_slicechunktosize(revlog, [0, 1], 3))
189 189 [[0], [1]]
190 190 >>> list(_slicechunktosize(revlog, [1, 3], 3))
191 191 [[1], [3]]
192 192 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
193 193 [[1, 2], [3]]
194 194 >>> list(_slicechunktosize(revlog, [3, 5], 3))
195 195 [[3], [5]]
196 196 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
197 197 [[3], [5]]
198 198 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
199 199 [[5], [6, 7, 8]]
200 200 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
201 201 [[0], [1, 2], [3], [5], [6, 7, 8]]
202 202
203 203 Case with too large individual chunk (must return valid chunk)
204 204 >>> list(_slicechunktosize(revlog, [0, 1], 2))
205 205 [[0], [1]]
206 206 >>> list(_slicechunktosize(revlog, [1, 3], 1))
207 207 [[1], [3]]
208 208 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
209 209 [[3], [5]]
210 210
211 211 == No Snapshot cases ==
212 212 >>> revlog = _testrevlog(data)
213 213
214 214 Cases where chunk is already small enough
215 215 >>> list(_slicechunktosize(revlog, [0], 3))
216 216 [[0]]
217 217 >>> list(_slicechunktosize(revlog, [6, 7], 3))
218 218 [[6, 7]]
219 219 >>> list(_slicechunktosize(revlog, [0], None))
220 220 [[0]]
221 221 >>> list(_slicechunktosize(revlog, [6, 7], None))
222 222 [[6, 7]]
223 223
224 224 cases where we need actual slicing
225 225 >>> list(_slicechunktosize(revlog, [0, 1], 3))
226 226 [[0], [1]]
227 227 >>> list(_slicechunktosize(revlog, [1, 3], 3))
228 228 [[1], [3]]
229 229 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
230 230 [[1], [2, 3]]
231 231 >>> list(_slicechunktosize(revlog, [3, 5], 3))
232 232 [[3], [5]]
233 233 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
234 234 [[3], [4, 5]]
235 235 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
236 236 [[5], [6, 7, 8]]
237 237 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
238 238 [[0], [1, 2], [3], [5], [6, 7, 8]]
239 239
240 240 Case with too large individual chunk (must return valid chunk)
241 241 >>> list(_slicechunktosize(revlog, [0, 1], 2))
242 242 [[0], [1]]
243 243 >>> list(_slicechunktosize(revlog, [1, 3], 1))
244 244 [[1], [3]]
245 245 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
246 246 [[3], [5]]
247 247
248 248 == mixed case ==
249 249 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
250 250 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
251 251 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
252 252 """
253 253 assert targetsize is None or 0 <= targetsize
254 254 startdata = revlog.start(revs[0])
255 255 enddata = revlog.end(revs[-1])
256 256 fullspan = enddata - startdata
257 257 if targetsize is None or fullspan <= targetsize:
258 258 yield revs
259 259 return
260 260
261 261 startrevidx = 0
262 262 endrevidx = 1
263 263 iterrevs = enumerate(revs)
264 264 next(iterrevs) # skip first rev.
265 265 # first step: get snapshots out of the way
266 266 for idx, r in iterrevs:
267 267 span = revlog.end(r) - startdata
268 268 snapshot = revlog.issnapshot(r)
269 269 if span <= targetsize and snapshot:
270 270 endrevidx = idx + 1
271 271 else:
272 272 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
273 273 if chunk:
274 274 yield chunk
275 275 startrevidx = idx
276 276 startdata = revlog.start(r)
277 277 endrevidx = idx + 1
278 278 if not snapshot:
279 279 break
280 280
281 281 # for the others, we use binary slicing to quickly converge toward valid
282 282 # chunks (otherwise, we might end up looking for start/end of many
283 283 # revisions). This logic is not looking for the perfect slicing point, it
284 284 # focuses on quickly converging toward valid chunks.
285 285 nbitem = len(revs)
286 286 while (enddata - startdata) > targetsize:
287 287 endrevidx = nbitem
288 288 if nbitem - startrevidx <= 1:
289 289 break # protect against individual chunk larger than limit
290 290 localenddata = revlog.end(revs[endrevidx - 1])
291 291 span = localenddata - startdata
292 292 while span > targetsize:
293 293 if endrevidx - startrevidx <= 1:
294 294 break # protect against individual chunk larger than limit
295 295 endrevidx -= (endrevidx - startrevidx) // 2
296 296 localenddata = revlog.end(revs[endrevidx - 1])
297 297 span = localenddata - startdata
298 298 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
299 299 if chunk:
300 300 yield chunk
301 301 startrevidx = endrevidx
302 302 startdata = revlog.start(revs[startrevidx])
303 303
304 304 chunk = _trimchunk(revlog, revs, startrevidx)
305 305 if chunk:
306 306 yield chunk
307 307
308 308
309 309 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
310 310 """slice revs to reduce the amount of unrelated data to be read from disk.
311 311
312 312 ``revs`` is sliced into groups that should be read in one time.
313 313 Assume that revs are sorted.
314 314
315 315 The initial chunk is sliced until the overall density (payload/chunks-span
316 316 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
317 317 skipped.
318 318
319 319 >>> revlog = _testrevlog([
320 320 ... 5, #00 (5)
321 321 ... 10, #01 (5)
322 322 ... 12, #02 (2)
323 323 ... 12, #03 (empty)
324 324 ... 27, #04 (15)
325 325 ... 31, #05 (4)
326 326 ... 31, #06 (empty)
327 327 ... 42, #07 (11)
328 328 ... 47, #08 (5)
329 329 ... 47, #09 (empty)
330 330 ... 48, #10 (1)
331 331 ... 51, #11 (3)
332 332 ... 74, #12 (23)
333 333 ... 85, #13 (11)
334 334 ... 86, #14 (1)
335 335 ... 91, #15 (5)
336 336 ... ])
337 337
338 338 >>> list(_slicechunktodensity(revlog, list(range(16))))
339 339 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
340 340 >>> list(_slicechunktodensity(revlog, [0, 15]))
341 341 [[0], [15]]
342 342 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
343 343 [[0], [11], [15]]
344 344 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
345 345 [[0], [11, 13, 15]]
346 346 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
347 347 [[1, 2], [5, 8, 10, 11], [14]]
348 348 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
349 349 ... mingapsize=20))
350 350 [[1, 2, 3, 5, 8, 10, 11], [14]]
351 351 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
352 352 ... targetdensity=0.95))
353 353 [[1, 2], [5], [8, 10, 11], [14]]
354 354 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
355 355 ... targetdensity=0.95, mingapsize=12))
356 356 [[1, 2], [5, 8, 10, 11], [14]]
357 357 """
358 358 start = revlog.start
359 359 length = revlog.length
360 360
361 361 if len(revs) <= 1:
362 362 yield revs
363 363 return
364 364
365 365 deltachainspan = segmentspan(revlog, revs)
366 366
367 367 if deltachainspan < mingapsize:
368 368 yield revs
369 369 return
370 370
371 371 readdata = deltachainspan
372 372 chainpayload = sum(length(r) for r in revs)
373 373
374 374 if deltachainspan:
375 375 density = chainpayload / float(deltachainspan)
376 376 else:
377 377 density = 1.0
378 378
379 379 if density >= targetdensity:
380 380 yield revs
381 381 return
382 382
383 383 # Store the gaps in a heap to have them sorted by decreasing size
384 384 gaps = []
385 385 prevend = None
386 386 for i, rev in enumerate(revs):
387 387 revstart = start(rev)
388 388 revlen = length(rev)
389 389
390 390 # Skip empty revisions to form larger holes
391 391 if revlen == 0:
392 392 continue
393 393
394 394 if prevend is not None:
395 395 gapsize = revstart - prevend
396 396 # only consider holes that are large enough
397 397 if gapsize > mingapsize:
398 398 gaps.append((gapsize, i))
399 399
400 400 prevend = revstart + revlen
401 401 # sort the gaps to pop them from largest to small
402 402 gaps.sort()
403 403
404 404 # Collect the indices of the largest holes until the density is acceptable
405 405 selected = []
406 406 while gaps and density < targetdensity:
407 407 gapsize, gapidx = gaps.pop()
408 408
409 409 selected.append(gapidx)
410 410
411 411 # the gap sizes are stored as negatives to be sorted decreasingly
412 412 # by the heap
413 413 readdata -= gapsize
414 414 if readdata > 0:
415 415 density = chainpayload / float(readdata)
416 416 else:
417 417 density = 1.0
418 418 selected.sort()
419 419
420 420 # Cut the revs at collected indices
421 421 previdx = 0
422 422 for idx in selected:
423 423
424 424 chunk = _trimchunk(revlog, revs, previdx, idx)
425 425 if chunk:
426 426 yield chunk
427 427
428 428 previdx = idx
429 429
430 430 chunk = _trimchunk(revlog, revs, previdx)
431 431 if chunk:
432 432 yield chunk
433 433
434 434
435 435 def _trimchunk(revlog, revs, startidx, endidx=None):
436 436 """returns revs[startidx:endidx] without empty trailing revs
437 437
438 438 Doctest Setup
439 439 >>> revlog = _testrevlog([
440 440 ... 5, #0
441 441 ... 10, #1
442 442 ... 12, #2
443 443 ... 12, #3 (empty)
444 444 ... 17, #4
445 445 ... 21, #5
446 446 ... 21, #6 (empty)
447 447 ... ])
448 448
449 449 Contiguous cases:
450 450 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
451 451 [0, 1, 2, 3, 4, 5]
452 452 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
453 453 [0, 1, 2, 3, 4]
454 454 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
455 455 [0, 1, 2]
456 456 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
457 457 [2]
458 458 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
459 459 [3, 4, 5]
460 460 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
461 461 [3, 4]
462 462
463 463 Discontiguous cases:
464 464 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
465 465 [1, 3, 5]
466 466 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
467 467 [1]
468 468 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
469 469 [3, 5]
470 470 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
471 471 [3, 5]
472 472 """
473 473 length = revlog.length
474 474
475 475 if endidx is None:
476 476 endidx = len(revs)
477 477
478 478 # If we have a non-emtpy delta candidate, there are nothing to trim
479 479 if revs[endidx - 1] < len(revlog):
480 480 # Trim empty revs at the end, except the very first revision of a chain
481 481 while (
482 482 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
483 483 ):
484 484 endidx -= 1
485 485
486 486 return revs[startidx:endidx]
487 487
488 488
489 489 def segmentspan(revlog, revs):
490 490 """Get the byte span of a segment of revisions
491 491
492 492 revs is a sorted array of revision numbers
493 493
494 494 >>> revlog = _testrevlog([
495 495 ... 5, #0
496 496 ... 10, #1
497 497 ... 12, #2
498 498 ... 12, #3 (empty)
499 499 ... 17, #4
500 500 ... ])
501 501
502 502 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
503 503 17
504 504 >>> segmentspan(revlog, [0, 4])
505 505 17
506 506 >>> segmentspan(revlog, [3, 4])
507 507 5
508 508 >>> segmentspan(revlog, [1, 2, 3,])
509 509 7
510 510 >>> segmentspan(revlog, [1, 3])
511 511 7
512 512 """
513 513 if not revs:
514 514 return 0
515 515 end = revlog.end(revs[-1])
516 516 return end - revlog.start(revs[0])
517 517
518 518
519 519 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
520 520 """build full text from a (base, delta) pair and other metadata"""
521 521 # special case deltas which replace entire base; no need to decode
522 522 # base revision. this neatly avoids censored bases, which throw when
523 523 # they're decoded.
524 524 hlen = struct.calcsize(b">lll")
525 525 if delta[:hlen] == mdiff.replacediffheader(
526 526 revlog.rawsize(baserev), len(delta) - hlen
527 527 ):
528 528 fulltext = delta[hlen:]
529 529 else:
530 530 # deltabase is rawtext before changed by flag processors, which is
531 531 # equivalent to non-raw text
532 532 basetext = revlog.revision(baserev, _df=fh)
533 533 fulltext = mdiff.patch(basetext, delta)
534 534
535 535 try:
536 536 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
537 537 if validatehash:
538 538 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
539 539 if flags & REVIDX_ISCENSORED:
540 540 raise error.StorageError(
541 541 _(b'node %s is not censored') % expectednode
542 542 )
543 543 except error.CensoredNodeError:
544 544 # must pass the censored index flag to add censored revisions
545 545 if not flags & REVIDX_ISCENSORED:
546 546 raise
547 547 return fulltext
548 548
549 549
550 550 @attr.s(slots=True, frozen=True)
551 551 class _deltainfo:
552 552 distance = attr.ib()
553 553 deltalen = attr.ib()
554 554 data = attr.ib()
555 555 base = attr.ib()
556 556 chainbase = attr.ib()
557 557 chainlen = attr.ib()
558 558 compresseddeltalen = attr.ib()
559 559 snapshotdepth = attr.ib()
560 560
561 561
562 562 def drop_u_compression(delta):
563 563 """turn into a "u" (no-compression) into no-compression without header
564 564
565 565 This is useful for revlog format that has better compression method.
566 566 """
567 567 assert delta.data[0] == b'u', delta.data[0]
568 568 return _deltainfo(
569 569 delta.distance,
570 570 delta.deltalen - 1,
571 571 (b'', delta.data[1]),
572 572 delta.base,
573 573 delta.chainbase,
574 574 delta.chainlen,
575 575 delta.compresseddeltalen,
576 576 delta.snapshotdepth,
577 577 )
578 578
579 579
580 580 def is_good_delta_info(revlog, deltainfo, revinfo):
581 581 """Returns True if the given delta is good. Good means that it is within
582 582 the disk span, disk size, and chain length bounds that we know to be
583 583 performant."""
584 584 if deltainfo is None:
585 585 return False
586 586
587 587 # - 'deltainfo.distance' is the distance from the base revision --
588 588 # bounding it limits the amount of I/O we need to do.
589 589 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
590 590 # deltas we need to apply -- bounding it limits the amount of CPU
591 591 # we consume.
592 592
593 593 textlen = revinfo.textlen
594 594 defaultmax = textlen * 4
595 595 maxdist = revlog._maxdeltachainspan
596 596 if not maxdist:
597 597 maxdist = deltainfo.distance # ensure the conditional pass
598 598 maxdist = max(maxdist, defaultmax)
599 599
600 600 # Bad delta from read span:
601 601 #
602 602 # If the span of data read is larger than the maximum allowed.
603 603 #
604 604 # In the sparse-revlog case, we rely on the associated "sparse reading"
605 605 # to avoid issue related to the span of data. In theory, it would be
606 606 # possible to build pathological revlog where delta pattern would lead
607 607 # to too many reads. However, they do not happen in practice at all. So
608 608 # we skip the span check entirely.
609 609 if not revlog._sparserevlog and maxdist < deltainfo.distance:
610 610 return False
611 611
612 612 # Bad delta from new delta size:
613 613 #
614 614 # If the delta size is larger than the target text, storing the
615 615 # delta will be inefficient.
616 616 if textlen < deltainfo.deltalen:
617 617 return False
618 618
619 619 # Bad delta from cumulated payload size:
620 620 #
621 621 # If the sum of delta get larger than K * target text length.
622 622 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
623 623 return False
624 624
625 625 # Bad delta from chain length:
626 626 #
627 627 # If the number of delta in the chain gets too high.
628 628 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
629 629 return False
630 630
631 631 # bad delta from intermediate snapshot size limit
632 632 #
633 633 # If an intermediate snapshot size is higher than the limit. The
634 634 # limit exist to prevent endless chain of intermediate delta to be
635 635 # created.
636 636 if (
637 637 deltainfo.snapshotdepth is not None
638 638 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
639 639 ):
640 640 return False
641 641
642 642 # bad delta if new intermediate snapshot is larger than the previous
643 643 # snapshot
644 644 if (
645 645 deltainfo.snapshotdepth
646 646 and revlog.length(deltainfo.base) < deltainfo.deltalen
647 647 ):
648 648 return False
649 649
650 650 return True
651 651
652 652
653 653 # If a revision's full text is that much bigger than a base candidate full
654 654 # text's, it is very unlikely that it will produce a valid delta. We no longer
655 655 # consider these candidates.
656 656 LIMIT_BASE2TEXT = 500
657 657
658 658
659 659 def _candidategroups(
660 660 revlog,
661 661 textlen,
662 662 p1,
663 663 p2,
664 664 cachedelta,
665 665 excluded_bases=None,
666 666 target_rev=None,
667 snapshot_cache=None,
667 668 ):
668 669 """Provides group of revision to be tested as delta base
669 670
670 671 This top level function focus on emitting groups with unique and worthwhile
671 672 content. See _raw_candidate_groups for details about the group order.
672 673 """
673 674 # should we try to build a delta?
674 675 if not (len(revlog) and revlog._storedeltachains):
675 676 yield None
676 677 return
677 678
678 679 deltalength = revlog.length
679 680 deltaparent = revlog.deltaparent
680 681 sparse = revlog._sparserevlog
681 682 good = None
682 683
683 684 deltas_limit = textlen * LIMIT_DELTA2TEXT
684 685 group_chunk_size = revlog._candidate_group_chunk_size
685 686
686 687 tested = {nullrev}
687 688 candidates = _refinedgroups(
688 689 revlog,
689 690 p1,
690 691 p2,
691 692 cachedelta,
693 snapshot_cache=snapshot_cache,
692 694 )
693 695 while True:
694 696 temptative = candidates.send(good)
695 697 if temptative is None:
696 698 break
697 699 group = []
698 700 for rev in temptative:
699 701 # skip over empty delta (no need to include them in a chain)
700 702 while revlog._generaldelta and not (
701 703 rev == nullrev or rev in tested or deltalength(rev)
702 704 ):
703 705 tested.add(rev)
704 706 rev = deltaparent(rev)
705 707 # no need to try a delta against nullrev, this will be done as a
706 708 # last resort.
707 709 if rev == nullrev:
708 710 continue
709 711 # filter out revision we tested already
710 712 if rev in tested:
711 713 continue
712 714 # an higher authority deamed the base unworthy (e.g. censored)
713 715 if excluded_bases is not None and rev in excluded_bases:
714 716 tested.add(rev)
715 717 continue
716 718 # We are in some recomputation cases and that rev is too high in
717 719 # the revlog
718 720 if target_rev is not None and rev >= target_rev:
719 721 tested.add(rev)
720 722 continue
721 723 # filter out delta base that will never produce good delta
722 724 if deltas_limit < revlog.length(rev):
723 725 tested.add(rev)
724 726 continue
725 727 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
726 728 tested.add(rev)
727 729 continue
728 730 # no delta for rawtext-changing revs (see "candelta" for why)
729 731 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
730 732 tested.add(rev)
731 733 continue
732 734
733 735 # If we reach here, we are about to build and test a delta.
734 736 # The delta building process will compute the chaininfo in all
735 737 # case, since that computation is cached, it is fine to access it
736 738 # here too.
737 739 chainlen, chainsize = revlog._chaininfo(rev)
738 740 # if chain will be too long, skip base
739 741 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
740 742 tested.add(rev)
741 743 continue
742 744 # if chain already have too much data, skip base
743 745 if deltas_limit < chainsize:
744 746 tested.add(rev)
745 747 continue
746 748 if sparse and revlog.upperboundcomp is not None:
747 749 maxcomp = revlog.upperboundcomp
748 750 basenotsnap = (p1, p2, nullrev)
749 751 if rev not in basenotsnap and revlog.issnapshot(rev):
750 752 snapshotdepth = revlog.snapshotdepth(rev)
751 753 # If text is significantly larger than the base, we can
752 754 # expect the resulting delta to be proportional to the size
753 755 # difference
754 756 revsize = revlog.rawsize(rev)
755 757 rawsizedistance = max(textlen - revsize, 0)
756 758 # use an estimate of the compression upper bound.
757 759 lowestrealisticdeltalen = rawsizedistance // maxcomp
758 760
759 761 # check the absolute constraint on the delta size
760 762 snapshotlimit = textlen >> snapshotdepth
761 763 if snapshotlimit < lowestrealisticdeltalen:
762 764 # delta lower bound is larger than accepted upper bound
763 765 tested.add(rev)
764 766 continue
765 767
766 768 # check the relative constraint on the delta size
767 769 revlength = revlog.length(rev)
768 770 if revlength < lowestrealisticdeltalen:
769 771 # delta probable lower bound is larger than target base
770 772 tested.add(rev)
771 773 continue
772 774
773 775 group.append(rev)
774 776 if group:
775 777 # When the size of the candidate group is big, it can result in a
776 778 # quite significant performance impact. To reduce this, we can send
777 779 # them in smaller batches until the new batch does not provide any
778 780 # improvements.
779 781 #
780 782 # This might reduce the overall efficiency of the compression in
781 783 # some corner cases, but that should also prevent very pathological
782 784 # cases from being an issue. (eg. 20 000 candidates).
783 785 #
784 786 # XXX note that the ordering of the group becomes important as it
785 787 # now impacts the final result. The current order is unprocessed
786 788 # and can be improved.
787 789 if group_chunk_size == 0:
788 790 tested.update(group)
789 791 good = yield tuple(group)
790 792 else:
791 793 prev_good = good
792 794 for start in range(0, len(group), group_chunk_size):
793 795 sub_group = group[start : start + group_chunk_size]
794 796 tested.update(sub_group)
795 797 good = yield tuple(sub_group)
796 798 if prev_good == good:
797 799 break
798 800
799 801 yield None
800 802
801 803
802 def _refinedgroups(revlog, p1, p2, cachedelta):
804 def _refinedgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
803 805 good = None
804 806 # First we try to reuse a the delta contained in the bundle.
805 807 # (or from the source revlog)
806 808 #
807 809 # This logic only applies to general delta repositories and can be disabled
808 810 # through configuration. Disabling reuse source delta is useful when
809 811 # we want to make sure we recomputed "optimal" deltas.
810 812 debug_info = None
811 813 if cachedelta is not None and cachedelta[2] > DELTA_BASE_REUSE_NO:
812 814 # Assume what we received from the server is a good choice
813 815 # build delta will reuse the cache
814 816 if debug_info is not None:
815 817 debug_info['cached-delta.tested'] += 1
816 818 good = yield (cachedelta[0],)
817 819 if good is not None:
818 820 if debug_info is not None:
819 821 debug_info['cached-delta.accepted'] += 1
820 822 yield None
821 823 return
822 # XXX cache me higher
824 if snapshot_cache is None:
823 825 snapshot_cache = SnapshotCache()
824 826 groups = _rawgroups(
825 827 revlog,
826 828 p1,
827 829 p2,
828 830 cachedelta,
829 831 snapshot_cache,
830 832 )
831 833 for candidates in groups:
832 834 good = yield candidates
833 835 if good is not None:
834 836 break
835 837
836 838 # If sparse revlog is enabled, we can try to refine the available deltas
837 839 if not revlog._sparserevlog:
838 840 yield None
839 841 return
840 842
841 843 # if we have a refinable value, try to refine it
842 844 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
843 845 # refine snapshot down
844 846 previous = None
845 847 while previous != good:
846 848 previous = good
847 849 base = revlog.deltaparent(good)
848 850 if base == nullrev:
849 851 break
850 852 good = yield (base,)
851 853 # refine snapshot up
852 854 if not snapshot_cache.snapshots:
853 855 snapshot_cache.update(revlog, good + 1)
854 856 previous = None
855 857 while good != previous:
856 858 previous = good
857 859 children = tuple(sorted(c for c in snapshot_cache.snapshots[good]))
858 860 good = yield children
859 861
860 862 if debug_info is not None:
861 863 if good is None:
862 864 debug_info['no-solution'] += 1
863 865
864 866 yield None
865 867
866 868
867 869 def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
868 870 """Provides group of revision to be tested as delta base
869 871
870 872 This lower level function focus on emitting delta theorically interresting
871 873 without looking it any practical details.
872 874
873 875 The group order aims at providing fast or small candidates first.
874 876 """
875 877 gdelta = revlog._generaldelta
876 878 # gate sparse behind general-delta because of issue6056
877 879 sparse = gdelta and revlog._sparserevlog
878 880 curr = len(revlog)
879 881 prev = curr - 1
880 882 deltachain = lambda rev: revlog._deltachain(rev)[0]
881 883
882 884 if gdelta:
883 885 # exclude already lazy tested base if any
884 886 parents = [p for p in (p1, p2) if p != nullrev]
885 887
886 888 if not revlog._deltabothparents and len(parents) == 2:
887 889 parents.sort()
888 890 # To minimize the chance of having to build a fulltext,
889 891 # pick first whichever parent is closest to us (max rev)
890 892 yield (parents[1],)
891 893 # then the other one (min rev) if the first did not fit
892 894 yield (parents[0],)
893 895 elif len(parents) > 0:
894 896 # Test all parents (1 or 2), and keep the best candidate
895 897 yield parents
896 898
897 899 if sparse and parents:
898 900 if snapshot_cache is None:
899 901 # map: base-rev: [snapshot-revs]
900 902 snapshot_cache = SnapshotCache()
901 903 # See if we can use an existing snapshot in the parent chains to use as
902 904 # a base for a new intermediate-snapshot
903 905 #
904 906 # search for snapshot in parents delta chain
905 907 # map: snapshot-level: snapshot-rev
906 908 parents_snaps = collections.defaultdict(set)
907 909 candidate_chains = [deltachain(p) for p in parents]
908 910 for chain in candidate_chains:
909 911 for idx, s in enumerate(chain):
910 912 if not revlog.issnapshot(s):
911 913 break
912 914 parents_snaps[idx].add(s)
913 915 snapfloor = min(parents_snaps[0]) + 1
914 916 snapshot_cache.update(revlog, snapfloor)
915 917 # search for the highest "unrelated" revision
916 918 #
917 919 # Adding snapshots used by "unrelated" revision increase the odd we
918 920 # reuse an independant, yet better snapshot chain.
919 921 #
920 922 # XXX instead of building a set of revisions, we could lazily enumerate
921 923 # over the chains. That would be more efficient, however we stick to
922 924 # simple code for now.
923 925 all_revs = set()
924 926 for chain in candidate_chains:
925 927 all_revs.update(chain)
926 928 other = None
927 929 for r in revlog.revs(prev, snapfloor):
928 930 if r not in all_revs:
929 931 other = r
930 932 break
931 933 if other is not None:
932 934 # To avoid unfair competition, we won't use unrelated intermediate
933 935 # snapshot that are deeper than the ones from the parent delta
934 936 # chain.
935 937 max_depth = max(parents_snaps.keys())
936 938 chain = deltachain(other)
937 939 for depth, s in enumerate(chain):
938 940 if s < snapfloor:
939 941 continue
940 942 if max_depth < depth:
941 943 break
942 944 if not revlog.issnapshot(s):
943 945 break
944 946 parents_snaps[depth].add(s)
945 947 # Test them as possible intermediate snapshot base
946 948 # We test them from highest to lowest level. High level one are more
947 949 # likely to result in small delta
948 950 floor = None
949 951 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
950 952 siblings = set()
951 953 for s in snaps:
952 954 siblings.update(snapshot_cache.snapshots[s])
953 955 # Before considering making a new intermediate snapshot, we check
954 956 # if an existing snapshot, children of base we consider, would be
955 957 # suitable.
956 958 #
957 959 # It give a change to reuse a delta chain "unrelated" to the
958 960 # current revision instead of starting our own. Without such
959 961 # re-use, topological branches would keep reopening new chains.
960 962 # Creating more and more snapshot as the repository grow.
961 963
962 964 if floor is not None:
963 965 # We only do this for siblings created after the one in our
964 966 # parent's delta chain. Those created before has less chances
965 967 # to be valid base since our ancestors had to create a new
966 968 # snapshot.
967 969 siblings = [r for r in siblings if floor < r]
968 970 yield tuple(sorted(siblings))
969 971 # then test the base from our parent's delta chain.
970 972 yield tuple(sorted(snaps))
971 973 floor = min(snaps)
972 974 # No suitable base found in the parent chain, search if any full
973 975 # snapshots emitted since parent's base would be a suitable base for an
974 976 # intermediate snapshot.
975 977 #
976 978 # It give a chance to reuse a delta chain unrelated to the current
977 979 # revisions instead of starting our own. Without such re-use,
978 980 # topological branches would keep reopening new full chains. Creating
979 981 # more and more snapshot as the repository grow.
980 982 full = [r for r in snapshot_cache.snapshots[nullrev] if snapfloor <= r]
981 983 yield tuple(sorted(full))
982 984
983 985 if not sparse:
984 986 # other approach failed try against prev to hopefully save us a
985 987 # fulltext.
986 988 yield (prev,)
987 989
988 990
989 991 class SnapshotCache:
990 992 __slots__ = ('snapshots', '_start_rev', '_end_rev')
991 993
992 994 def __init__(self):
993 995 self.snapshots = collections.defaultdict(set)
994 996 self._start_rev = None
995 997 self._end_rev = None
996 998
997 999 def update(self, revlog, start_rev=0):
998 1000 """find snapshots from start_rev to tip"""
999 1001 nb_revs = len(revlog)
1000 1002 end_rev = nb_revs - 1
1001 1003 if start_rev > end_rev:
1002 1004 return # range is empty
1003 1005
1004 1006 if self._start_rev is None:
1005 1007 assert self._end_rev is None
1006 1008 self._update(revlog, start_rev, end_rev)
1007 1009 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1008 1010 if start_rev < self._start_rev:
1009 1011 self._update(revlog, start_rev, self._start_rev - 1)
1010 1012 if self._end_rev < end_rev:
1011 1013 self._update(revlog, self._end_rev + 1, end_rev)
1012 1014
1013 1015 if self._start_rev is None:
1014 1016 assert self._end_rev is None
1015 1017 self._end_rev = end_rev
1016 1018 self._start_rev = start_rev
1017 1019 else:
1018 1020 self._start_rev = min(self._start_rev, start_rev)
1019 1021 self._end_rev = max(self._end_rev, end_rev)
1020 1022 assert self._start_rev <= self._end_rev, (
1021 1023 self._start_rev,
1022 1024 self._end_rev,
1023 1025 )
1024 1026
1025 1027 def _update(self, revlog, start_rev, end_rev):
1026 1028 """internal method that actually do update content"""
1027 1029 assert self._start_rev is None or (
1028 1030 start_rev < self._start_rev or start_rev > self._end_rev
1029 1031 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1030 1032 assert self._start_rev is None or (
1031 1033 end_rev < self._start_rev or end_rev > self._end_rev
1032 1034 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1033 1035 cache = self.snapshots
1034 1036 if util.safehasattr(revlog.index, b'findsnapshots'):
1035 1037 revlog.index.findsnapshots(cache, start_rev, end_rev)
1036 1038 else:
1037 1039 deltaparent = revlog.deltaparent
1038 1040 issnapshot = revlog.issnapshot
1039 1041 for rev in revlog.revs(start_rev, end_rev):
1040 1042 if issnapshot(rev):
1041 1043 cache[deltaparent(rev)].add(rev)
1042 1044
1043 1045
1044 1046 class deltacomputer:
1045 1047 def __init__(
1046 1048 self,
1047 1049 revlog,
1048 1050 write_debug=None,
1049 1051 debug_search=False,
1050 1052 debug_info=None,
1051 1053 ):
1052 1054 self.revlog = revlog
1053 1055 self._write_debug = write_debug
1054 1056 self._debug_search = debug_search
1055 1057 self._debug_info = debug_info
1058 self._snapshot_cache = SnapshotCache()
1056 1059
1057 1060 def buildtext(self, revinfo, fh):
1058 1061 """Builds a fulltext version of a revision
1059 1062
1060 1063 revinfo: revisioninfo instance that contains all needed info
1061 1064 fh: file handle to either the .i or the .d revlog file,
1062 1065 depending on whether it is inlined or not
1063 1066 """
1064 1067 btext = revinfo.btext
1065 1068 if btext[0] is not None:
1066 1069 return btext[0]
1067 1070
1068 1071 revlog = self.revlog
1069 1072 cachedelta = revinfo.cachedelta
1070 1073 baserev = cachedelta[0]
1071 1074 delta = cachedelta[1]
1072 1075
1073 1076 fulltext = btext[0] = _textfromdelta(
1074 1077 fh,
1075 1078 revlog,
1076 1079 baserev,
1077 1080 delta,
1078 1081 revinfo.p1,
1079 1082 revinfo.p2,
1080 1083 revinfo.flags,
1081 1084 revinfo.node,
1082 1085 )
1083 1086 return fulltext
1084 1087
1085 1088 def _builddeltadiff(self, base, revinfo, fh):
1086 1089 revlog = self.revlog
1087 1090 t = self.buildtext(revinfo, fh)
1088 1091 if revlog.iscensored(base):
1089 1092 # deltas based on a censored revision must replace the
1090 1093 # full content in one patch, so delta works everywhere
1091 1094 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1092 1095 delta = header + t
1093 1096 else:
1094 1097 ptext = revlog.rawdata(base, _df=fh)
1095 1098 delta = mdiff.textdiff(ptext, t)
1096 1099
1097 1100 return delta
1098 1101
1099 1102 def _builddeltainfo(self, revinfo, base, fh):
1100 1103 # can we use the cached delta?
1101 1104 revlog = self.revlog
1102 1105 debug_search = self._write_debug is not None and self._debug_search
1103 1106 chainbase = revlog.chainbase(base)
1104 1107 if revlog._generaldelta:
1105 1108 deltabase = base
1106 1109 else:
1107 1110 deltabase = chainbase
1108 1111 snapshotdepth = None
1109 1112 if revlog._sparserevlog and deltabase == nullrev:
1110 1113 snapshotdepth = 0
1111 1114 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
1112 1115 # A delta chain should always be one full snapshot,
1113 1116 # zero or more semi-snapshots, and zero or more deltas
1114 1117 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1115 1118 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1116 1119 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1117 1120 delta = None
1118 1121 if revinfo.cachedelta:
1119 1122 cachebase = revinfo.cachedelta[0]
1120 1123 # check if the diff still apply
1121 1124 currentbase = cachebase
1122 1125 while (
1123 1126 currentbase != nullrev
1124 1127 and currentbase != base
1125 1128 and self.revlog.length(currentbase) == 0
1126 1129 ):
1127 1130 currentbase = self.revlog.deltaparent(currentbase)
1128 1131 if self.revlog._lazydelta and currentbase == base:
1129 1132 delta = revinfo.cachedelta[1]
1130 1133 if delta is None:
1131 1134 delta = self._builddeltadiff(base, revinfo, fh)
1132 1135 if debug_search:
1133 1136 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1134 1137 msg %= len(delta)
1135 1138 self._write_debug(msg)
1136 1139 # snapshotdept need to be neither None nor 0 level snapshot
1137 1140 if revlog.upperboundcomp is not None and snapshotdepth:
1138 1141 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
1139 1142 snapshotlimit = revinfo.textlen >> snapshotdepth
1140 1143 if debug_search:
1141 1144 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1142 1145 msg %= lowestrealisticdeltalen
1143 1146 self._write_debug(msg)
1144 1147 if snapshotlimit < lowestrealisticdeltalen:
1145 1148 if debug_search:
1146 1149 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1147 1150 self._write_debug(msg)
1148 1151 return None
1149 1152 if revlog.length(base) < lowestrealisticdeltalen:
1150 1153 if debug_search:
1151 1154 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1152 1155 self._write_debug(msg)
1153 1156 return None
1154 1157 header, data = revlog.compress(delta)
1155 1158 deltalen = len(header) + len(data)
1156 1159 offset = revlog.end(len(revlog) - 1)
1157 1160 dist = deltalen + offset - revlog.start(chainbase)
1158 1161 chainlen, compresseddeltalen = revlog._chaininfo(base)
1159 1162 chainlen += 1
1160 1163 compresseddeltalen += deltalen
1161 1164
1162 1165 return _deltainfo(
1163 1166 dist,
1164 1167 deltalen,
1165 1168 (header, data),
1166 1169 deltabase,
1167 1170 chainbase,
1168 1171 chainlen,
1169 1172 compresseddeltalen,
1170 1173 snapshotdepth,
1171 1174 )
1172 1175
1173 1176 def _fullsnapshotinfo(self, fh, revinfo, curr):
1174 1177 rawtext = self.buildtext(revinfo, fh)
1175 1178 data = self.revlog.compress(rawtext)
1176 1179 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1177 1180 deltabase = chainbase = curr
1178 1181 snapshotdepth = 0
1179 1182 chainlen = 1
1180 1183
1181 1184 return _deltainfo(
1182 1185 dist,
1183 1186 deltalen,
1184 1187 data,
1185 1188 deltabase,
1186 1189 chainbase,
1187 1190 chainlen,
1188 1191 compresseddeltalen,
1189 1192 snapshotdepth,
1190 1193 )
1191 1194
1192 1195 def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):
1193 1196 """Find an acceptable delta against a candidate revision
1194 1197
1195 1198 revinfo: information about the revision (instance of _revisioninfo)
1196 1199 fh: file handle to either the .i or the .d revlog file,
1197 1200 depending on whether it is inlined or not
1198 1201
1199 1202 Returns the first acceptable candidate revision, as ordered by
1200 1203 _candidategroups
1201 1204
1202 1205 If no suitable deltabase is found, we return delta info for a full
1203 1206 snapshot.
1204 1207
1205 1208 `excluded_bases` is an optional set of revision that cannot be used as
1206 1209 a delta base. Use this to recompute delta suitable in censor or strip
1207 1210 context.
1208 1211 """
1209 1212 if target_rev is None:
1210 1213 target_rev = len(self.revlog)
1211 1214
1212 1215 if not revinfo.textlen:
1213 1216 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1214 1217
1215 1218 if excluded_bases is None:
1216 1219 excluded_bases = set()
1217 1220
1218 1221 # no delta for flag processor revision (see "candelta" for why)
1219 1222 # not calling candelta since only one revision needs test, also to
1220 1223 # avoid overhead fetching flags again.
1221 1224 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1222 1225 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1223 1226
1224 1227 gather_debug = (
1225 1228 self._write_debug is not None or self._debug_info is not None
1226 1229 )
1227 1230 debug_search = self._write_debug is not None and self._debug_search
1228 1231
1229 1232 if gather_debug:
1230 1233 start = util.timer()
1231 1234
1232 1235 # count the number of different delta we tried (for debug purpose)
1233 1236 dbg_try_count = 0
1234 1237 # count the number of "search round" we did. (for debug purpose)
1235 1238 dbg_try_rounds = 0
1236 1239 dbg_type = b'unknown'
1237 1240
1238 1241 cachedelta = revinfo.cachedelta
1239 1242 p1 = revinfo.p1
1240 1243 p2 = revinfo.p2
1241 1244 revlog = self.revlog
1242 1245
1243 1246 deltainfo = None
1244 1247 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
1245 1248
1246 1249 if gather_debug:
1247 1250 if p1r != nullrev:
1248 1251 p1_chain_len = revlog._chaininfo(p1r)[0]
1249 1252 else:
1250 1253 p1_chain_len = -1
1251 1254 if p2r != nullrev:
1252 1255 p2_chain_len = revlog._chaininfo(p2r)[0]
1253 1256 else:
1254 1257 p2_chain_len = -1
1255 1258 if debug_search:
1256 1259 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1257 1260 msg %= target_rev
1258 1261 self._write_debug(msg)
1259 1262
1260 1263 groups = _candidategroups(
1261 1264 self.revlog,
1262 1265 revinfo.textlen,
1263 1266 p1r,
1264 1267 p2r,
1265 1268 cachedelta,
1266 1269 excluded_bases,
1267 1270 target_rev,
1271 snapshot_cache=self._snapshot_cache,
1268 1272 )
1269 1273 candidaterevs = next(groups)
1270 1274 while candidaterevs is not None:
1271 1275 dbg_try_rounds += 1
1272 1276 if debug_search:
1273 1277 prev = None
1274 1278 if deltainfo is not None:
1275 1279 prev = deltainfo.base
1276 1280
1277 1281 if (
1278 1282 cachedelta is not None
1279 1283 and len(candidaterevs) == 1
1280 1284 and cachedelta[0] in candidaterevs
1281 1285 ):
1282 1286 round_type = b"cached-delta"
1283 1287 elif p1 in candidaterevs or p2 in candidaterevs:
1284 1288 round_type = b"parents"
1285 1289 elif prev is not None and all(c < prev for c in candidaterevs):
1286 1290 round_type = b"refine-down"
1287 1291 elif prev is not None and all(c > prev for c in candidaterevs):
1288 1292 round_type = b"refine-up"
1289 1293 else:
1290 1294 round_type = b"search-down"
1291 1295 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1292 1296 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1293 1297 self._write_debug(msg)
1294 1298 nominateddeltas = []
1295 1299 if deltainfo is not None:
1296 1300 if debug_search:
1297 1301 msg = (
1298 1302 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1299 1303 )
1300 1304 msg %= (deltainfo.base, deltainfo.deltalen)
1301 1305 self._write_debug(msg)
1302 1306 # if we already found a good delta,
1303 1307 # challenge it against refined candidates
1304 1308 nominateddeltas.append(deltainfo)
1305 1309 for candidaterev in candidaterevs:
1306 1310 if debug_search:
1307 1311 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1308 1312 msg %= candidaterev
1309 1313 self._write_debug(msg)
1310 1314 candidate_type = None
1311 1315 if candidaterev == p1:
1312 1316 candidate_type = b"p1"
1313 1317 elif candidaterev == p2:
1314 1318 candidate_type = b"p2"
1315 1319 elif self.revlog.issnapshot(candidaterev):
1316 1320 candidate_type = b"snapshot-%d"
1317 1321 candidate_type %= self.revlog.snapshotdepth(
1318 1322 candidaterev
1319 1323 )
1320 1324
1321 1325 if candidate_type is not None:
1322 1326 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1323 1327 msg %= candidate_type
1324 1328 self._write_debug(msg)
1325 1329 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1326 1330 msg %= self.revlog.length(candidaterev)
1327 1331 self._write_debug(msg)
1328 1332 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1329 1333 msg %= self.revlog.deltaparent(candidaterev)
1330 1334 self._write_debug(msg)
1331 1335
1332 1336 dbg_try_count += 1
1333 1337
1334 1338 if debug_search:
1335 1339 delta_start = util.timer()
1336 1340 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
1337 1341 if debug_search:
1338 1342 delta_end = util.timer()
1339 1343 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1340 1344 msg %= delta_end - delta_start
1341 1345 self._write_debug(msg)
1342 1346 if candidatedelta is not None:
1343 1347 if is_good_delta_info(self.revlog, candidatedelta, revinfo):
1344 1348 if debug_search:
1345 1349 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1346 1350 msg %= candidatedelta.deltalen
1347 1351 self._write_debug(msg)
1348 1352 nominateddeltas.append(candidatedelta)
1349 1353 elif debug_search:
1350 1354 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1351 1355 msg %= candidatedelta.deltalen
1352 1356 self._write_debug(msg)
1353 1357 elif debug_search:
1354 1358 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1355 1359 self._write_debug(msg)
1356 1360 if nominateddeltas:
1357 1361 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1358 1362 if deltainfo is not None:
1359 1363 candidaterevs = groups.send(deltainfo.base)
1360 1364 else:
1361 1365 candidaterevs = next(groups)
1362 1366
1363 1367 if deltainfo is None:
1364 1368 dbg_type = b"full"
1365 1369 deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
1366 1370 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1367 1371 dbg_type = b"snapshot"
1368 1372 else:
1369 1373 dbg_type = b"delta"
1370 1374
1371 1375 if gather_debug:
1372 1376 end = util.timer()
1373 1377 used_cached = (
1374 1378 cachedelta is not None
1375 1379 and dbg_try_rounds == 1
1376 1380 and dbg_try_count == 1
1377 1381 and deltainfo.base == cachedelta[0]
1378 1382 )
1379 1383 dbg = {
1380 1384 'duration': end - start,
1381 1385 'revision': target_rev,
1382 1386 'delta-base': deltainfo.base, # pytype: disable=attribute-error
1383 1387 'search_round_count': dbg_try_rounds,
1384 1388 'using-cached-base': used_cached,
1385 1389 'delta_try_count': dbg_try_count,
1386 1390 'type': dbg_type,
1387 1391 'p1-chain-len': p1_chain_len,
1388 1392 'p2-chain-len': p2_chain_len,
1389 1393 }
1390 1394 if (
1391 1395 deltainfo.snapshotdepth # pytype: disable=attribute-error
1392 1396 is not None
1393 1397 ):
1394 1398 dbg[
1395 1399 'snapshot-depth'
1396 1400 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1397 1401 else:
1398 1402 dbg['snapshot-depth'] = 0
1399 1403 target_revlog = b"UNKNOWN"
1400 1404 target_type = self.revlog.target[0]
1401 1405 target_key = self.revlog.target[1]
1402 1406 if target_type == KIND_CHANGELOG:
1403 1407 target_revlog = b'CHANGELOG:'
1404 1408 elif target_type == KIND_MANIFESTLOG:
1405 1409 target_revlog = b'MANIFESTLOG:'
1406 1410 if target_key:
1407 1411 target_revlog += b'%s:' % target_key
1408 1412 elif target_type == KIND_FILELOG:
1409 1413 target_revlog = b'FILELOG:'
1410 1414 if target_key:
1411 1415 target_revlog += b'%s:' % target_key
1412 1416 dbg['target-revlog'] = target_revlog
1413 1417
1414 1418 if self._debug_info is not None:
1415 1419 self._debug_info.append(dbg)
1416 1420
1417 1421 if self._write_debug is not None:
1418 1422 msg = (
1419 1423 b"DBG-DELTAS:"
1420 1424 b" %-12s"
1421 1425 b" rev=%d:"
1422 1426 b" delta-base=%d"
1423 1427 b" is-cached=%d"
1424 1428 b" - search-rounds=%d"
1425 1429 b" try-count=%d"
1426 1430 b" - delta-type=%-6s"
1427 1431 b" snap-depth=%d"
1428 1432 b" - p1-chain-length=%d"
1429 1433 b" p2-chain-length=%d"
1430 1434 b" - duration=%f"
1431 1435 b"\n"
1432 1436 )
1433 1437 msg %= (
1434 1438 dbg["target-revlog"],
1435 1439 dbg["revision"],
1436 1440 dbg["delta-base"],
1437 1441 dbg["using-cached-base"],
1438 1442 dbg["search_round_count"],
1439 1443 dbg["delta_try_count"],
1440 1444 dbg["type"],
1441 1445 dbg["snapshot-depth"],
1442 1446 dbg["p1-chain-len"],
1443 1447 dbg["p2-chain-len"],
1444 1448 dbg["duration"],
1445 1449 )
1446 1450 self._write_debug(msg)
1447 1451 return deltainfo
1448 1452
1449 1453
1450 1454 def delta_compression(default_compression_header, deltainfo):
1451 1455 """return (COMPRESSION_MODE, deltainfo)
1452 1456
1453 1457 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1454 1458 compression.
1455 1459 """
1456 1460 h, d = deltainfo.data
1457 1461 compression_mode = COMP_MODE_INLINE
1458 1462 if not h and not d:
1459 1463 # not data to store at all... declare them uncompressed
1460 1464 compression_mode = COMP_MODE_PLAIN
1461 1465 elif not h:
1462 1466 t = d[0:1]
1463 1467 if t == b'\0':
1464 1468 compression_mode = COMP_MODE_PLAIN
1465 1469 elif t == default_compression_header:
1466 1470 compression_mode = COMP_MODE_DEFAULT
1467 1471 elif h == b'u':
1468 1472 # we have a more efficient way to declare uncompressed
1469 1473 h = b''
1470 1474 compression_mode = COMP_MODE_PLAIN
1471 1475 deltainfo = drop_u_compression(deltainfo)
1472 1476 return compression_mode, deltainfo
General Comments 0
You need to be logged in to leave comments. Login now