##// END OF EJS Templates
bundle: when forcing acceptance of incoming delta also accept snapshot...
marmoute -
r50677:acdb9a15 default
parent child Browse files
Show More
@@ -1,1502 +1,1511 b''
1 1 # revlogdeltas.py - Logic around delta computation for revlog
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 # Copyright 2018 Octobus <contact@octobus.net>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8 """Helper class to compute deltas stored inside revlogs"""
9 9
10 10
11 11 import collections
12 12 import struct
13 13
14 14 # import stuff from node for others to import from revlog
15 15 from ..node import nullrev
16 16 from ..i18n import _
17 17 from ..pycompat import getattr
18 18
19 19 from .constants import (
20 20 COMP_MODE_DEFAULT,
21 21 COMP_MODE_INLINE,
22 22 COMP_MODE_PLAIN,
23 23 DELTA_BASE_REUSE_FORCE,
24 24 DELTA_BASE_REUSE_NO,
25 25 KIND_CHANGELOG,
26 26 KIND_FILELOG,
27 27 KIND_MANIFESTLOG,
28 28 REVIDX_ISCENSORED,
29 29 REVIDX_RAWTEXT_CHANGING_FLAGS,
30 30 )
31 31
32 32 from ..thirdparty import attr
33 33
34 34 from .. import (
35 35 error,
36 36 mdiff,
37 37 util,
38 38 )
39 39
40 40 from . import flagutil
41 41
42 42 # maximum <delta-chain-data>/<revision-text-length> ratio
43 43 LIMIT_DELTA2TEXT = 2
44 44
45 45
46 46 class _testrevlog:
47 47 """minimalist fake revlog to use in doctests"""
48 48
49 49 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
50 50 """data is an list of revision payload boundaries"""
51 51 self._data = data
52 52 self._srdensitythreshold = density
53 53 self._srmingapsize = mingap
54 54 self._snapshot = set(snapshot)
55 55 self.index = None
56 56
57 57 def start(self, rev):
58 58 if rev == nullrev:
59 59 return 0
60 60 if rev == 0:
61 61 return 0
62 62 return self._data[rev - 1]
63 63
64 64 def end(self, rev):
65 65 if rev == nullrev:
66 66 return 0
67 67 return self._data[rev]
68 68
69 69 def length(self, rev):
70 70 return self.end(rev) - self.start(rev)
71 71
72 72 def __len__(self):
73 73 return len(self._data)
74 74
75 75 def issnapshot(self, rev):
76 76 if rev == nullrev:
77 77 return True
78 78 return rev in self._snapshot
79 79
80 80
81 81 def slicechunk(revlog, revs, targetsize=None):
82 82 """slice revs to reduce the amount of unrelated data to be read from disk.
83 83
84 84 ``revs`` is sliced into groups that should be read in one time.
85 85 Assume that revs are sorted.
86 86
87 87 The initial chunk is sliced until the overall density (payload/chunks-span
88 88 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
89 89 `revlog._srmingapsize` is skipped.
90 90
91 91 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
92 92 For consistency with other slicing choice, this limit won't go lower than
93 93 `revlog._srmingapsize`.
94 94
95 95 If individual revisions chunk are larger than this limit, they will still
96 96 be raised individually.
97 97
98 98 >>> data = [
99 99 ... 5, #00 (5)
100 100 ... 10, #01 (5)
101 101 ... 12, #02 (2)
102 102 ... 12, #03 (empty)
103 103 ... 27, #04 (15)
104 104 ... 31, #05 (4)
105 105 ... 31, #06 (empty)
106 106 ... 42, #07 (11)
107 107 ... 47, #08 (5)
108 108 ... 47, #09 (empty)
109 109 ... 48, #10 (1)
110 110 ... 51, #11 (3)
111 111 ... 74, #12 (23)
112 112 ... 85, #13 (11)
113 113 ... 86, #14 (1)
114 114 ... 91, #15 (5)
115 115 ... ]
116 116 >>> revlog = _testrevlog(data, snapshot=range(16))
117 117
118 118 >>> list(slicechunk(revlog, list(range(16))))
119 119 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
120 120 >>> list(slicechunk(revlog, [0, 15]))
121 121 [[0], [15]]
122 122 >>> list(slicechunk(revlog, [0, 11, 15]))
123 123 [[0], [11], [15]]
124 124 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
125 125 [[0], [11, 13, 15]]
126 126 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
127 127 [[1, 2], [5, 8, 10, 11], [14]]
128 128
129 129 Slicing with a maximum chunk size
130 130 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
131 131 [[0], [11], [13], [15]]
132 132 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
133 133 [[0], [11], [13, 15]]
134 134
135 135 Slicing involving nullrev
136 136 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
137 137 [[-1, 0], [11], [13, 15]]
138 138 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
139 139 [[-1], [13], [15]]
140 140 """
141 141 if targetsize is not None:
142 142 targetsize = max(targetsize, revlog._srmingapsize)
143 143 # targetsize should not be specified when evaluating delta candidates:
144 144 # * targetsize is used to ensure we stay within specification when reading,
145 145 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
146 146 if densityslicing is None:
147 147 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
148 148 for chunk in densityslicing(
149 149 revs, revlog._srdensitythreshold, revlog._srmingapsize
150 150 ):
151 151 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
152 152 yield subchunk
153 153
154 154
155 155 def _slicechunktosize(revlog, revs, targetsize=None):
156 156 """slice revs to match the target size
157 157
158 158 This is intended to be used on chunk that density slicing selected by that
159 159 are still too large compared to the read garantee of revlog. This might
160 160 happens when "minimal gap size" interrupted the slicing or when chain are
161 161 built in a way that create large blocks next to each other.
162 162
163 163 >>> data = [
164 164 ... 3, #0 (3)
165 165 ... 5, #1 (2)
166 166 ... 6, #2 (1)
167 167 ... 8, #3 (2)
168 168 ... 8, #4 (empty)
169 169 ... 11, #5 (3)
170 170 ... 12, #6 (1)
171 171 ... 13, #7 (1)
172 172 ... 14, #8 (1)
173 173 ... ]
174 174
175 175 == All snapshots cases ==
176 176 >>> revlog = _testrevlog(data, snapshot=range(9))
177 177
178 178 Cases where chunk is already small enough
179 179 >>> list(_slicechunktosize(revlog, [0], 3))
180 180 [[0]]
181 181 >>> list(_slicechunktosize(revlog, [6, 7], 3))
182 182 [[6, 7]]
183 183 >>> list(_slicechunktosize(revlog, [0], None))
184 184 [[0]]
185 185 >>> list(_slicechunktosize(revlog, [6, 7], None))
186 186 [[6, 7]]
187 187
188 188 cases where we need actual slicing
189 189 >>> list(_slicechunktosize(revlog, [0, 1], 3))
190 190 [[0], [1]]
191 191 >>> list(_slicechunktosize(revlog, [1, 3], 3))
192 192 [[1], [3]]
193 193 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
194 194 [[1, 2], [3]]
195 195 >>> list(_slicechunktosize(revlog, [3, 5], 3))
196 196 [[3], [5]]
197 197 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
198 198 [[3], [5]]
199 199 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
200 200 [[5], [6, 7, 8]]
201 201 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
202 202 [[0], [1, 2], [3], [5], [6, 7, 8]]
203 203
204 204 Case with too large individual chunk (must return valid chunk)
205 205 >>> list(_slicechunktosize(revlog, [0, 1], 2))
206 206 [[0], [1]]
207 207 >>> list(_slicechunktosize(revlog, [1, 3], 1))
208 208 [[1], [3]]
209 209 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
210 210 [[3], [5]]
211 211
212 212 == No Snapshot cases ==
213 213 >>> revlog = _testrevlog(data)
214 214
215 215 Cases where chunk is already small enough
216 216 >>> list(_slicechunktosize(revlog, [0], 3))
217 217 [[0]]
218 218 >>> list(_slicechunktosize(revlog, [6, 7], 3))
219 219 [[6, 7]]
220 220 >>> list(_slicechunktosize(revlog, [0], None))
221 221 [[0]]
222 222 >>> list(_slicechunktosize(revlog, [6, 7], None))
223 223 [[6, 7]]
224 224
225 225 cases where we need actual slicing
226 226 >>> list(_slicechunktosize(revlog, [0, 1], 3))
227 227 [[0], [1]]
228 228 >>> list(_slicechunktosize(revlog, [1, 3], 3))
229 229 [[1], [3]]
230 230 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
231 231 [[1], [2, 3]]
232 232 >>> list(_slicechunktosize(revlog, [3, 5], 3))
233 233 [[3], [5]]
234 234 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
235 235 [[3], [4, 5]]
236 236 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
237 237 [[5], [6, 7, 8]]
238 238 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
239 239 [[0], [1, 2], [3], [5], [6, 7, 8]]
240 240
241 241 Case with too large individual chunk (must return valid chunk)
242 242 >>> list(_slicechunktosize(revlog, [0, 1], 2))
243 243 [[0], [1]]
244 244 >>> list(_slicechunktosize(revlog, [1, 3], 1))
245 245 [[1], [3]]
246 246 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
247 247 [[3], [5]]
248 248
249 249 == mixed case ==
250 250 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
251 251 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
252 252 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
253 253 """
254 254 assert targetsize is None or 0 <= targetsize
255 255 startdata = revlog.start(revs[0])
256 256 enddata = revlog.end(revs[-1])
257 257 fullspan = enddata - startdata
258 258 if targetsize is None or fullspan <= targetsize:
259 259 yield revs
260 260 return
261 261
262 262 startrevidx = 0
263 263 endrevidx = 1
264 264 iterrevs = enumerate(revs)
265 265 next(iterrevs) # skip first rev.
266 266 # first step: get snapshots out of the way
267 267 for idx, r in iterrevs:
268 268 span = revlog.end(r) - startdata
269 269 snapshot = revlog.issnapshot(r)
270 270 if span <= targetsize and snapshot:
271 271 endrevidx = idx + 1
272 272 else:
273 273 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
274 274 if chunk:
275 275 yield chunk
276 276 startrevidx = idx
277 277 startdata = revlog.start(r)
278 278 endrevidx = idx + 1
279 279 if not snapshot:
280 280 break
281 281
282 282 # for the others, we use binary slicing to quickly converge toward valid
283 283 # chunks (otherwise, we might end up looking for start/end of many
284 284 # revisions). This logic is not looking for the perfect slicing point, it
285 285 # focuses on quickly converging toward valid chunks.
286 286 nbitem = len(revs)
287 287 while (enddata - startdata) > targetsize:
288 288 endrevidx = nbitem
289 289 if nbitem - startrevidx <= 1:
290 290 break # protect against individual chunk larger than limit
291 291 localenddata = revlog.end(revs[endrevidx - 1])
292 292 span = localenddata - startdata
293 293 while span > targetsize:
294 294 if endrevidx - startrevidx <= 1:
295 295 break # protect against individual chunk larger than limit
296 296 endrevidx -= (endrevidx - startrevidx) // 2
297 297 localenddata = revlog.end(revs[endrevidx - 1])
298 298 span = localenddata - startdata
299 299 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
300 300 if chunk:
301 301 yield chunk
302 302 startrevidx = endrevidx
303 303 startdata = revlog.start(revs[startrevidx])
304 304
305 305 chunk = _trimchunk(revlog, revs, startrevidx)
306 306 if chunk:
307 307 yield chunk
308 308
309 309
310 310 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
311 311 """slice revs to reduce the amount of unrelated data to be read from disk.
312 312
313 313 ``revs`` is sliced into groups that should be read in one time.
314 314 Assume that revs are sorted.
315 315
316 316 The initial chunk is sliced until the overall density (payload/chunks-span
317 317 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
318 318 skipped.
319 319
320 320 >>> revlog = _testrevlog([
321 321 ... 5, #00 (5)
322 322 ... 10, #01 (5)
323 323 ... 12, #02 (2)
324 324 ... 12, #03 (empty)
325 325 ... 27, #04 (15)
326 326 ... 31, #05 (4)
327 327 ... 31, #06 (empty)
328 328 ... 42, #07 (11)
329 329 ... 47, #08 (5)
330 330 ... 47, #09 (empty)
331 331 ... 48, #10 (1)
332 332 ... 51, #11 (3)
333 333 ... 74, #12 (23)
334 334 ... 85, #13 (11)
335 335 ... 86, #14 (1)
336 336 ... 91, #15 (5)
337 337 ... ])
338 338
339 339 >>> list(_slicechunktodensity(revlog, list(range(16))))
340 340 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
341 341 >>> list(_slicechunktodensity(revlog, [0, 15]))
342 342 [[0], [15]]
343 343 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
344 344 [[0], [11], [15]]
345 345 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
346 346 [[0], [11, 13, 15]]
347 347 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
348 348 [[1, 2], [5, 8, 10, 11], [14]]
349 349 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
350 350 ... mingapsize=20))
351 351 [[1, 2, 3, 5, 8, 10, 11], [14]]
352 352 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
353 353 ... targetdensity=0.95))
354 354 [[1, 2], [5], [8, 10, 11], [14]]
355 355 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
356 356 ... targetdensity=0.95, mingapsize=12))
357 357 [[1, 2], [5, 8, 10, 11], [14]]
358 358 """
359 359 start = revlog.start
360 360 length = revlog.length
361 361
362 362 if len(revs) <= 1:
363 363 yield revs
364 364 return
365 365
366 366 deltachainspan = segmentspan(revlog, revs)
367 367
368 368 if deltachainspan < mingapsize:
369 369 yield revs
370 370 return
371 371
372 372 readdata = deltachainspan
373 373 chainpayload = sum(length(r) for r in revs)
374 374
375 375 if deltachainspan:
376 376 density = chainpayload / float(deltachainspan)
377 377 else:
378 378 density = 1.0
379 379
380 380 if density >= targetdensity:
381 381 yield revs
382 382 return
383 383
384 384 # Store the gaps in a heap to have them sorted by decreasing size
385 385 gaps = []
386 386 prevend = None
387 387 for i, rev in enumerate(revs):
388 388 revstart = start(rev)
389 389 revlen = length(rev)
390 390
391 391 # Skip empty revisions to form larger holes
392 392 if revlen == 0:
393 393 continue
394 394
395 395 if prevend is not None:
396 396 gapsize = revstart - prevend
397 397 # only consider holes that are large enough
398 398 if gapsize > mingapsize:
399 399 gaps.append((gapsize, i))
400 400
401 401 prevend = revstart + revlen
402 402 # sort the gaps to pop them from largest to small
403 403 gaps.sort()
404 404
405 405 # Collect the indices of the largest holes until the density is acceptable
406 406 selected = []
407 407 while gaps and density < targetdensity:
408 408 gapsize, gapidx = gaps.pop()
409 409
410 410 selected.append(gapidx)
411 411
412 412 # the gap sizes are stored as negatives to be sorted decreasingly
413 413 # by the heap
414 414 readdata -= gapsize
415 415 if readdata > 0:
416 416 density = chainpayload / float(readdata)
417 417 else:
418 418 density = 1.0
419 419 selected.sort()
420 420
421 421 # Cut the revs at collected indices
422 422 previdx = 0
423 423 for idx in selected:
424 424
425 425 chunk = _trimchunk(revlog, revs, previdx, idx)
426 426 if chunk:
427 427 yield chunk
428 428
429 429 previdx = idx
430 430
431 431 chunk = _trimchunk(revlog, revs, previdx)
432 432 if chunk:
433 433 yield chunk
434 434
435 435
436 436 def _trimchunk(revlog, revs, startidx, endidx=None):
437 437 """returns revs[startidx:endidx] without empty trailing revs
438 438
439 439 Doctest Setup
440 440 >>> revlog = _testrevlog([
441 441 ... 5, #0
442 442 ... 10, #1
443 443 ... 12, #2
444 444 ... 12, #3 (empty)
445 445 ... 17, #4
446 446 ... 21, #5
447 447 ... 21, #6 (empty)
448 448 ... ])
449 449
450 450 Contiguous cases:
451 451 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
452 452 [0, 1, 2, 3, 4, 5]
453 453 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
454 454 [0, 1, 2, 3, 4]
455 455 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
456 456 [0, 1, 2]
457 457 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
458 458 [2]
459 459 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
460 460 [3, 4, 5]
461 461 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
462 462 [3, 4]
463 463
464 464 Discontiguous cases:
465 465 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
466 466 [1, 3, 5]
467 467 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
468 468 [1]
469 469 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
470 470 [3, 5]
471 471 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
472 472 [3, 5]
473 473 """
474 474 length = revlog.length
475 475
476 476 if endidx is None:
477 477 endidx = len(revs)
478 478
479 479 # If we have a non-emtpy delta candidate, there are nothing to trim
480 480 if revs[endidx - 1] < len(revlog):
481 481 # Trim empty revs at the end, except the very first revision of a chain
482 482 while (
483 483 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
484 484 ):
485 485 endidx -= 1
486 486
487 487 return revs[startidx:endidx]
488 488
489 489
490 490 def segmentspan(revlog, revs):
491 491 """Get the byte span of a segment of revisions
492 492
493 493 revs is a sorted array of revision numbers
494 494
495 495 >>> revlog = _testrevlog([
496 496 ... 5, #0
497 497 ... 10, #1
498 498 ... 12, #2
499 499 ... 12, #3 (empty)
500 500 ... 17, #4
501 501 ... ])
502 502
503 503 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
504 504 17
505 505 >>> segmentspan(revlog, [0, 4])
506 506 17
507 507 >>> segmentspan(revlog, [3, 4])
508 508 5
509 509 >>> segmentspan(revlog, [1, 2, 3,])
510 510 7
511 511 >>> segmentspan(revlog, [1, 3])
512 512 7
513 513 """
514 514 if not revs:
515 515 return 0
516 516 end = revlog.end(revs[-1])
517 517 return end - revlog.start(revs[0])
518 518
519 519
520 520 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
521 521 """build full text from a (base, delta) pair and other metadata"""
522 522 # special case deltas which replace entire base; no need to decode
523 523 # base revision. this neatly avoids censored bases, which throw when
524 524 # they're decoded.
525 525 hlen = struct.calcsize(b">lll")
526 526 if delta[:hlen] == mdiff.replacediffheader(
527 527 revlog.rawsize(baserev), len(delta) - hlen
528 528 ):
529 529 fulltext = delta[hlen:]
530 530 else:
531 531 # deltabase is rawtext before changed by flag processors, which is
532 532 # equivalent to non-raw text
533 533 basetext = revlog.revision(baserev, _df=fh)
534 534 fulltext = mdiff.patch(basetext, delta)
535 535
536 536 try:
537 537 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
538 538 if validatehash:
539 539 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
540 540 if flags & REVIDX_ISCENSORED:
541 541 raise error.StorageError(
542 542 _(b'node %s is not censored') % expectednode
543 543 )
544 544 except error.CensoredNodeError:
545 545 # must pass the censored index flag to add censored revisions
546 546 if not flags & REVIDX_ISCENSORED:
547 547 raise
548 548 return fulltext
549 549
550 550
551 551 @attr.s(slots=True, frozen=True)
552 552 class _deltainfo:
553 553 distance = attr.ib()
554 554 deltalen = attr.ib()
555 555 data = attr.ib()
556 556 base = attr.ib()
557 557 chainbase = attr.ib()
558 558 chainlen = attr.ib()
559 559 compresseddeltalen = attr.ib()
560 560 snapshotdepth = attr.ib()
561 561
562 562
563 563 def drop_u_compression(delta):
564 564 """turn into a "u" (no-compression) into no-compression without header
565 565
566 566 This is useful for revlog format that has better compression method.
567 567 """
568 568 assert delta.data[0] == b'u', delta.data[0]
569 569 return _deltainfo(
570 570 delta.distance,
571 571 delta.deltalen - 1,
572 572 (b'', delta.data[1]),
573 573 delta.base,
574 574 delta.chainbase,
575 575 delta.chainlen,
576 576 delta.compresseddeltalen,
577 577 delta.snapshotdepth,
578 578 )
579 579
580 580
581 581 def is_good_delta_info(revlog, deltainfo, revinfo):
582 582 """Returns True if the given delta is good. Good means that it is within
583 583 the disk span, disk size, and chain length bounds that we know to be
584 584 performant."""
585 585 if deltainfo is None:
586 586 return False
587 587
588 588 if (
589 589 revinfo.cachedelta is not None
590 590 and deltainfo.base == revinfo.cachedelta[0]
591 591 and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
592 592 ):
593 593 return True
594 594
595 595 # - 'deltainfo.distance' is the distance from the base revision --
596 596 # bounding it limits the amount of I/O we need to do.
597 597 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
598 598 # deltas we need to apply -- bounding it limits the amount of CPU
599 599 # we consume.
600 600
601 601 textlen = revinfo.textlen
602 602 defaultmax = textlen * 4
603 603 maxdist = revlog._maxdeltachainspan
604 604 if not maxdist:
605 605 maxdist = deltainfo.distance # ensure the conditional pass
606 606 maxdist = max(maxdist, defaultmax)
607 607
608 608 # Bad delta from read span:
609 609 #
610 610 # If the span of data read is larger than the maximum allowed.
611 611 #
612 612 # In the sparse-revlog case, we rely on the associated "sparse reading"
613 613 # to avoid issue related to the span of data. In theory, it would be
614 614 # possible to build pathological revlog where delta pattern would lead
615 615 # to too many reads. However, they do not happen in practice at all. So
616 616 # we skip the span check entirely.
617 617 if not revlog._sparserevlog and maxdist < deltainfo.distance:
618 618 return False
619 619
620 620 # Bad delta from new delta size:
621 621 #
622 622 # If the delta size is larger than the target text, storing the
623 623 # delta will be inefficient.
624 624 if textlen < deltainfo.deltalen:
625 625 return False
626 626
627 627 # Bad delta from cumulated payload size:
628 628 #
629 629 # If the sum of delta get larger than K * target text length.
630 630 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
631 631 return False
632 632
633 633 # Bad delta from chain length:
634 634 #
635 635 # If the number of delta in the chain gets too high.
636 636 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
637 637 return False
638 638
639 639 # bad delta from intermediate snapshot size limit
640 640 #
641 641 # If an intermediate snapshot size is higher than the limit. The
642 642 # limit exist to prevent endless chain of intermediate delta to be
643 643 # created.
644 644 if (
645 645 deltainfo.snapshotdepth is not None
646 646 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
647 647 ):
648 648 return False
649 649
650 650 # bad delta if new intermediate snapshot is larger than the previous
651 651 # snapshot
652 652 if (
653 653 deltainfo.snapshotdepth
654 654 and revlog.length(deltainfo.base) < deltainfo.deltalen
655 655 ):
656 656 return False
657 657
658 658 return True
659 659
660 660
661 661 # If a revision's full text is that much bigger than a base candidate full
662 662 # text's, it is very unlikely that it will produce a valid delta. We no longer
663 663 # consider these candidates.
664 664 LIMIT_BASE2TEXT = 500
665 665
666 666
667 667 def _candidategroups(
668 668 revlog,
669 669 textlen,
670 670 p1,
671 671 p2,
672 672 cachedelta,
673 673 excluded_bases=None,
674 674 target_rev=None,
675 675 snapshot_cache=None,
676 676 ):
677 677 """Provides group of revision to be tested as delta base
678 678
679 679 This top level function focus on emitting groups with unique and worthwhile
680 680 content. See _raw_candidate_groups for details about the group order.
681 681 """
682 682 # should we try to build a delta?
683 683 if not (len(revlog) and revlog._storedeltachains):
684 684 yield None
685 685 return
686 686
687 if (
688 cachedelta is not None
689 and nullrev == cachedelta[0]
690 and cachedelta[2] == DELTA_BASE_REUSE_FORCE
691 ):
692 # instruction are to forcibly do a full snapshot
693 yield None
694 return
695
687 696 deltalength = revlog.length
688 697 deltaparent = revlog.deltaparent
689 698 sparse = revlog._sparserevlog
690 699 good = None
691 700
692 701 deltas_limit = textlen * LIMIT_DELTA2TEXT
693 702 group_chunk_size = revlog._candidate_group_chunk_size
694 703
695 704 tested = {nullrev}
696 705 candidates = _refinedgroups(
697 706 revlog,
698 707 p1,
699 708 p2,
700 709 cachedelta,
701 710 snapshot_cache=snapshot_cache,
702 711 )
703 712 while True:
704 713 temptative = candidates.send(good)
705 714 if temptative is None:
706 715 break
707 716 group = []
708 717 for rev in temptative:
709 718 # skip over empty delta (no need to include them in a chain)
710 719 while revlog._generaldelta and not (
711 720 rev == nullrev or rev in tested or deltalength(rev)
712 721 ):
713 722 tested.add(rev)
714 723 rev = deltaparent(rev)
715 724 # no need to try a delta against nullrev, this will be done as a
716 725 # last resort.
717 726 if rev == nullrev:
718 727 continue
719 728 # filter out revision we tested already
720 729 if rev in tested:
721 730 continue
722 731
723 732 if (
724 733 cachedelta is not None
725 734 and rev == cachedelta[0]
726 735 and cachedelta[2] == DELTA_BASE_REUSE_FORCE
727 736 ):
728 737 # instructions are to forcibly consider/use this delta base
729 738 group.append(rev)
730 739 continue
731 740
732 741 # an higher authority deamed the base unworthy (e.g. censored)
733 742 if excluded_bases is not None and rev in excluded_bases:
734 743 tested.add(rev)
735 744 continue
736 745 # We are in some recomputation cases and that rev is too high in
737 746 # the revlog
738 747 if target_rev is not None and rev >= target_rev:
739 748 tested.add(rev)
740 749 continue
741 750 # filter out delta base that will never produce good delta
742 751 if deltas_limit < revlog.length(rev):
743 752 tested.add(rev)
744 753 continue
745 754 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
746 755 tested.add(rev)
747 756 continue
748 757 # no delta for rawtext-changing revs (see "candelta" for why)
749 758 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
750 759 tested.add(rev)
751 760 continue
752 761
753 762 # If we reach here, we are about to build and test a delta.
754 763 # The delta building process will compute the chaininfo in all
755 764 # case, since that computation is cached, it is fine to access it
756 765 # here too.
757 766 chainlen, chainsize = revlog._chaininfo(rev)
758 767 # if chain will be too long, skip base
759 768 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
760 769 tested.add(rev)
761 770 continue
762 771 # if chain already have too much data, skip base
763 772 if deltas_limit < chainsize:
764 773 tested.add(rev)
765 774 continue
766 775 if sparse and revlog.upperboundcomp is not None:
767 776 maxcomp = revlog.upperboundcomp
768 777 basenotsnap = (p1, p2, nullrev)
769 778 if rev not in basenotsnap and revlog.issnapshot(rev):
770 779 snapshotdepth = revlog.snapshotdepth(rev)
771 780 # If text is significantly larger than the base, we can
772 781 # expect the resulting delta to be proportional to the size
773 782 # difference
774 783 revsize = revlog.rawsize(rev)
775 784 rawsizedistance = max(textlen - revsize, 0)
776 785 # use an estimate of the compression upper bound.
777 786 lowestrealisticdeltalen = rawsizedistance // maxcomp
778 787
779 788 # check the absolute constraint on the delta size
780 789 snapshotlimit = textlen >> snapshotdepth
781 790 if snapshotlimit < lowestrealisticdeltalen:
782 791 # delta lower bound is larger than accepted upper bound
783 792 tested.add(rev)
784 793 continue
785 794
786 795 # check the relative constraint on the delta size
787 796 revlength = revlog.length(rev)
788 797 if revlength < lowestrealisticdeltalen:
789 798 # delta probable lower bound is larger than target base
790 799 tested.add(rev)
791 800 continue
792 801
793 802 group.append(rev)
794 803 if group:
795 804 # When the size of the candidate group is big, it can result in a
796 805 # quite significant performance impact. To reduce this, we can send
797 806 # them in smaller batches until the new batch does not provide any
798 807 # improvements.
799 808 #
800 809 # This might reduce the overall efficiency of the compression in
801 810 # some corner cases, but that should also prevent very pathological
802 811 # cases from being an issue. (eg. 20 000 candidates).
803 812 #
804 813 # XXX note that the ordering of the group becomes important as it
805 814 # now impacts the final result. The current order is unprocessed
806 815 # and can be improved.
807 816 if group_chunk_size == 0:
808 817 tested.update(group)
809 818 good = yield tuple(group)
810 819 else:
811 820 prev_good = good
812 821 for start in range(0, len(group), group_chunk_size):
813 822 sub_group = group[start : start + group_chunk_size]
814 823 tested.update(sub_group)
815 824 good = yield tuple(sub_group)
816 825 if prev_good == good:
817 826 break
818 827
819 828 yield None
820 829
821 830
822 831 def _refinedgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
823 832 good = None
824 833 # First we try to reuse a the delta contained in the bundle.
825 834 # (or from the source revlog)
826 835 #
827 836 # This logic only applies to general delta repositories and can be disabled
828 837 # through configuration. Disabling reuse source delta is useful when
829 838 # we want to make sure we recomputed "optimal" deltas.
830 839 debug_info = None
831 840 if cachedelta is not None and cachedelta[2] > DELTA_BASE_REUSE_NO:
832 841 # Assume what we received from the server is a good choice
833 842 # build delta will reuse the cache
834 843 if debug_info is not None:
835 844 debug_info['cached-delta.tested'] += 1
836 845 good = yield (cachedelta[0],)
837 846 if good is not None:
838 847 if debug_info is not None:
839 848 debug_info['cached-delta.accepted'] += 1
840 849 yield None
841 850 return
842 851 if snapshot_cache is None:
843 852 snapshot_cache = SnapshotCache()
844 853 groups = _rawgroups(
845 854 revlog,
846 855 p1,
847 856 p2,
848 857 cachedelta,
849 858 snapshot_cache,
850 859 )
851 860 for candidates in groups:
852 861 good = yield candidates
853 862 if good is not None:
854 863 break
855 864
856 865 # If sparse revlog is enabled, we can try to refine the available deltas
857 866 if not revlog._sparserevlog:
858 867 yield None
859 868 return
860 869
861 870 # if we have a refinable value, try to refine it
862 871 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
863 872 # refine snapshot down
864 873 previous = None
865 874 while previous != good:
866 875 previous = good
867 876 base = revlog.deltaparent(good)
868 877 if base == nullrev:
869 878 break
870 879 good = yield (base,)
871 880 # refine snapshot up
872 881 if not snapshot_cache.snapshots:
873 882 snapshot_cache.update(revlog, good + 1)
874 883 previous = None
875 884 while good != previous:
876 885 previous = good
877 886 children = tuple(sorted(c for c in snapshot_cache.snapshots[good]))
878 887 good = yield children
879 888
880 889 if debug_info is not None:
881 890 if good is None:
882 891 debug_info['no-solution'] += 1
883 892
884 893 yield None
885 894
886 895
887 896 def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
888 897 """Provides group of revision to be tested as delta base
889 898
890 899 This lower level function focus on emitting delta theorically interresting
891 900 without looking it any practical details.
892 901
893 902 The group order aims at providing fast or small candidates first.
894 903 """
895 904 gdelta = revlog._generaldelta
896 905 # gate sparse behind general-delta because of issue6056
897 906 sparse = gdelta and revlog._sparserevlog
898 907 curr = len(revlog)
899 908 prev = curr - 1
900 909 deltachain = lambda rev: revlog._deltachain(rev)[0]
901 910
902 911 if gdelta:
903 912 # exclude already lazy tested base if any
904 913 parents = [p for p in (p1, p2) if p != nullrev]
905 914
906 915 if not revlog._deltabothparents and len(parents) == 2:
907 916 parents.sort()
908 917 # To minimize the chance of having to build a fulltext,
909 918 # pick first whichever parent is closest to us (max rev)
910 919 yield (parents[1],)
911 920 # then the other one (min rev) if the first did not fit
912 921 yield (parents[0],)
913 922 elif len(parents) > 0:
914 923 # Test all parents (1 or 2), and keep the best candidate
915 924 yield parents
916 925
917 926 if sparse and parents:
918 927 if snapshot_cache is None:
919 928 # map: base-rev: [snapshot-revs]
920 929 snapshot_cache = SnapshotCache()
921 930 # See if we can use an existing snapshot in the parent chains to use as
922 931 # a base for a new intermediate-snapshot
923 932 #
924 933 # search for snapshot in parents delta chain
925 934 # map: snapshot-level: snapshot-rev
926 935 parents_snaps = collections.defaultdict(set)
927 936 candidate_chains = [deltachain(p) for p in parents]
928 937 for chain in candidate_chains:
929 938 for idx, s in enumerate(chain):
930 939 if not revlog.issnapshot(s):
931 940 break
932 941 parents_snaps[idx].add(s)
933 942 snapfloor = min(parents_snaps[0]) + 1
934 943 snapshot_cache.update(revlog, snapfloor)
935 944 # search for the highest "unrelated" revision
936 945 #
937 946 # Adding snapshots used by "unrelated" revision increase the odd we
938 947 # reuse an independant, yet better snapshot chain.
939 948 #
940 949 # XXX instead of building a set of revisions, we could lazily enumerate
941 950 # over the chains. That would be more efficient, however we stick to
942 951 # simple code for now.
943 952 all_revs = set()
944 953 for chain in candidate_chains:
945 954 all_revs.update(chain)
946 955 other = None
947 956 for r in revlog.revs(prev, snapfloor):
948 957 if r not in all_revs:
949 958 other = r
950 959 break
951 960 if other is not None:
952 961 # To avoid unfair competition, we won't use unrelated intermediate
953 962 # snapshot that are deeper than the ones from the parent delta
954 963 # chain.
955 964 max_depth = max(parents_snaps.keys())
956 965 chain = deltachain(other)
957 966 for depth, s in enumerate(chain):
958 967 if s < snapfloor:
959 968 continue
960 969 if max_depth < depth:
961 970 break
962 971 if not revlog.issnapshot(s):
963 972 break
964 973 parents_snaps[depth].add(s)
965 974 # Test them as possible intermediate snapshot base
966 975 # We test them from highest to lowest level. High level one are more
967 976 # likely to result in small delta
968 977 floor = None
969 978 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
970 979 siblings = set()
971 980 for s in snaps:
972 981 siblings.update(snapshot_cache.snapshots[s])
973 982 # Before considering making a new intermediate snapshot, we check
974 983 # if an existing snapshot, children of base we consider, would be
975 984 # suitable.
976 985 #
977 986 # It give a change to reuse a delta chain "unrelated" to the
978 987 # current revision instead of starting our own. Without such
979 988 # re-use, topological branches would keep reopening new chains.
980 989 # Creating more and more snapshot as the repository grow.
981 990
982 991 if floor is not None:
983 992 # We only do this for siblings created after the one in our
984 993 # parent's delta chain. Those created before has less chances
985 994 # to be valid base since our ancestors had to create a new
986 995 # snapshot.
987 996 siblings = [r for r in siblings if floor < r]
988 997 yield tuple(sorted(siblings))
989 998 # then test the base from our parent's delta chain.
990 999 yield tuple(sorted(snaps))
991 1000 floor = min(snaps)
992 1001 # No suitable base found in the parent chain, search if any full
993 1002 # snapshots emitted since parent's base would be a suitable base for an
994 1003 # intermediate snapshot.
995 1004 #
996 1005 # It give a chance to reuse a delta chain unrelated to the current
997 1006 # revisions instead of starting our own. Without such re-use,
998 1007 # topological branches would keep reopening new full chains. Creating
999 1008 # more and more snapshot as the repository grow.
1000 1009 full = [r for r in snapshot_cache.snapshots[nullrev] if snapfloor <= r]
1001 1010 yield tuple(sorted(full))
1002 1011
1003 1012 if not sparse:
1004 1013 # other approach failed try against prev to hopefully save us a
1005 1014 # fulltext.
1006 1015 yield (prev,)
1007 1016
1008 1017
1009 1018 class SnapshotCache:
1010 1019 __slots__ = ('snapshots', '_start_rev', '_end_rev')
1011 1020
1012 1021 def __init__(self):
1013 1022 self.snapshots = collections.defaultdict(set)
1014 1023 self._start_rev = None
1015 1024 self._end_rev = None
1016 1025
1017 1026 def update(self, revlog, start_rev=0):
1018 1027 """find snapshots from start_rev to tip"""
1019 1028 nb_revs = len(revlog)
1020 1029 end_rev = nb_revs - 1
1021 1030 if start_rev > end_rev:
1022 1031 return # range is empty
1023 1032
1024 1033 if self._start_rev is None:
1025 1034 assert self._end_rev is None
1026 1035 self._update(revlog, start_rev, end_rev)
1027 1036 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1028 1037 if start_rev < self._start_rev:
1029 1038 self._update(revlog, start_rev, self._start_rev - 1)
1030 1039 if self._end_rev < end_rev:
1031 1040 self._update(revlog, self._end_rev + 1, end_rev)
1032 1041
1033 1042 if self._start_rev is None:
1034 1043 assert self._end_rev is None
1035 1044 self._end_rev = end_rev
1036 1045 self._start_rev = start_rev
1037 1046 else:
1038 1047 self._start_rev = min(self._start_rev, start_rev)
1039 1048 self._end_rev = max(self._end_rev, end_rev)
1040 1049 assert self._start_rev <= self._end_rev, (
1041 1050 self._start_rev,
1042 1051 self._end_rev,
1043 1052 )
1044 1053
1045 1054 def _update(self, revlog, start_rev, end_rev):
1046 1055 """internal method that actually do update content"""
1047 1056 assert self._start_rev is None or (
1048 1057 start_rev < self._start_rev or start_rev > self._end_rev
1049 1058 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1050 1059 assert self._start_rev is None or (
1051 1060 end_rev < self._start_rev or end_rev > self._end_rev
1052 1061 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1053 1062 cache = self.snapshots
1054 1063 if util.safehasattr(revlog.index, b'findsnapshots'):
1055 1064 revlog.index.findsnapshots(cache, start_rev, end_rev)
1056 1065 else:
1057 1066 deltaparent = revlog.deltaparent
1058 1067 issnapshot = revlog.issnapshot
1059 1068 for rev in revlog.revs(start_rev, end_rev):
1060 1069 if issnapshot(rev):
1061 1070 cache[deltaparent(rev)].add(rev)
1062 1071
1063 1072
1064 1073 class deltacomputer:
1065 1074 def __init__(
1066 1075 self,
1067 1076 revlog,
1068 1077 write_debug=None,
1069 1078 debug_search=False,
1070 1079 debug_info=None,
1071 1080 ):
1072 1081 self.revlog = revlog
1073 1082 self._write_debug = write_debug
1074 1083 self._debug_search = debug_search
1075 1084 self._debug_info = debug_info
1076 1085 self._snapshot_cache = SnapshotCache()
1077 1086
1078 1087 def buildtext(self, revinfo, fh):
1079 1088 """Builds a fulltext version of a revision
1080 1089
1081 1090 revinfo: revisioninfo instance that contains all needed info
1082 1091 fh: file handle to either the .i or the .d revlog file,
1083 1092 depending on whether it is inlined or not
1084 1093 """
1085 1094 btext = revinfo.btext
1086 1095 if btext[0] is not None:
1087 1096 return btext[0]
1088 1097
1089 1098 revlog = self.revlog
1090 1099 cachedelta = revinfo.cachedelta
1091 1100 baserev = cachedelta[0]
1092 1101 delta = cachedelta[1]
1093 1102
1094 1103 fulltext = btext[0] = _textfromdelta(
1095 1104 fh,
1096 1105 revlog,
1097 1106 baserev,
1098 1107 delta,
1099 1108 revinfo.p1,
1100 1109 revinfo.p2,
1101 1110 revinfo.flags,
1102 1111 revinfo.node,
1103 1112 )
1104 1113 return fulltext
1105 1114
1106 1115 def _builddeltadiff(self, base, revinfo, fh):
1107 1116 revlog = self.revlog
1108 1117 t = self.buildtext(revinfo, fh)
1109 1118 if revlog.iscensored(base):
1110 1119 # deltas based on a censored revision must replace the
1111 1120 # full content in one patch, so delta works everywhere
1112 1121 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1113 1122 delta = header + t
1114 1123 else:
1115 1124 ptext = revlog.rawdata(base, _df=fh)
1116 1125 delta = mdiff.textdiff(ptext, t)
1117 1126
1118 1127 return delta
1119 1128
1120 1129 def _builddeltainfo(self, revinfo, base, fh):
1121 1130 # can we use the cached delta?
1122 1131 revlog = self.revlog
1123 1132 debug_search = self._write_debug is not None and self._debug_search
1124 1133 chainbase = revlog.chainbase(base)
1125 1134 if revlog._generaldelta:
1126 1135 deltabase = base
1127 1136 else:
1128 1137 deltabase = chainbase
1129 1138 snapshotdepth = None
1130 1139 if revlog._sparserevlog and deltabase == nullrev:
1131 1140 snapshotdepth = 0
1132 1141 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
1133 1142 # A delta chain should always be one full snapshot,
1134 1143 # zero or more semi-snapshots, and zero or more deltas
1135 1144 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1136 1145 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1137 1146 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1138 1147 delta = None
1139 1148 if revinfo.cachedelta:
1140 1149 cachebase = revinfo.cachedelta[0]
1141 1150 # check if the diff still apply
1142 1151 currentbase = cachebase
1143 1152 while (
1144 1153 currentbase != nullrev
1145 1154 and currentbase != base
1146 1155 and self.revlog.length(currentbase) == 0
1147 1156 ):
1148 1157 currentbase = self.revlog.deltaparent(currentbase)
1149 1158 if self.revlog._lazydelta and currentbase == base:
1150 1159 delta = revinfo.cachedelta[1]
1151 1160 if delta is None:
1152 1161 delta = self._builddeltadiff(base, revinfo, fh)
1153 1162 if debug_search:
1154 1163 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1155 1164 msg %= len(delta)
1156 1165 self._write_debug(msg)
1157 1166 # snapshotdept need to be neither None nor 0 level snapshot
1158 1167 if revlog.upperboundcomp is not None and snapshotdepth:
1159 1168 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
1160 1169 snapshotlimit = revinfo.textlen >> snapshotdepth
1161 1170 if debug_search:
1162 1171 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1163 1172 msg %= lowestrealisticdeltalen
1164 1173 self._write_debug(msg)
1165 1174 if snapshotlimit < lowestrealisticdeltalen:
1166 1175 if debug_search:
1167 1176 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1168 1177 self._write_debug(msg)
1169 1178 return None
1170 1179 if revlog.length(base) < lowestrealisticdeltalen:
1171 1180 if debug_search:
1172 1181 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1173 1182 self._write_debug(msg)
1174 1183 return None
1175 1184 header, data = revlog.compress(delta)
1176 1185 deltalen = len(header) + len(data)
1177 1186 offset = revlog.end(len(revlog) - 1)
1178 1187 dist = deltalen + offset - revlog.start(chainbase)
1179 1188 chainlen, compresseddeltalen = revlog._chaininfo(base)
1180 1189 chainlen += 1
1181 1190 compresseddeltalen += deltalen
1182 1191
1183 1192 return _deltainfo(
1184 1193 dist,
1185 1194 deltalen,
1186 1195 (header, data),
1187 1196 deltabase,
1188 1197 chainbase,
1189 1198 chainlen,
1190 1199 compresseddeltalen,
1191 1200 snapshotdepth,
1192 1201 )
1193 1202
1194 1203 def _fullsnapshotinfo(self, fh, revinfo, curr):
1195 1204 rawtext = self.buildtext(revinfo, fh)
1196 1205 data = self.revlog.compress(rawtext)
1197 1206 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1198 1207 deltabase = chainbase = curr
1199 1208 snapshotdepth = 0
1200 1209 chainlen = 1
1201 1210
1202 1211 return _deltainfo(
1203 1212 dist,
1204 1213 deltalen,
1205 1214 data,
1206 1215 deltabase,
1207 1216 chainbase,
1208 1217 chainlen,
1209 1218 compresseddeltalen,
1210 1219 snapshotdepth,
1211 1220 )
1212 1221
1213 1222 def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):
1214 1223 """Find an acceptable delta against a candidate revision
1215 1224
1216 1225 revinfo: information about the revision (instance of _revisioninfo)
1217 1226 fh: file handle to either the .i or the .d revlog file,
1218 1227 depending on whether it is inlined or not
1219 1228
1220 1229 Returns the first acceptable candidate revision, as ordered by
1221 1230 _candidategroups
1222 1231
1223 1232 If no suitable deltabase is found, we return delta info for a full
1224 1233 snapshot.
1225 1234
1226 1235 `excluded_bases` is an optional set of revision that cannot be used as
1227 1236 a delta base. Use this to recompute delta suitable in censor or strip
1228 1237 context.
1229 1238 """
1230 1239 if target_rev is None:
1231 1240 target_rev = len(self.revlog)
1232 1241
1233 1242 if not revinfo.textlen:
1234 1243 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1235 1244
1236 1245 if excluded_bases is None:
1237 1246 excluded_bases = set()
1238 1247
1239 1248 # no delta for flag processor revision (see "candelta" for why)
1240 1249 # not calling candelta since only one revision needs test, also to
1241 1250 # avoid overhead fetching flags again.
1242 1251 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1243 1252 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1244 1253
1245 1254 gather_debug = (
1246 1255 self._write_debug is not None or self._debug_info is not None
1247 1256 )
1248 1257 debug_search = self._write_debug is not None and self._debug_search
1249 1258
1250 1259 if gather_debug:
1251 1260 start = util.timer()
1252 1261
1253 1262 # count the number of different delta we tried (for debug purpose)
1254 1263 dbg_try_count = 0
1255 1264 # count the number of "search round" we did. (for debug purpose)
1256 1265 dbg_try_rounds = 0
1257 1266 dbg_type = b'unknown'
1258 1267
1259 1268 cachedelta = revinfo.cachedelta
1260 1269 p1 = revinfo.p1
1261 1270 p2 = revinfo.p2
1262 1271 revlog = self.revlog
1263 1272
1264 1273 deltainfo = None
1265 1274 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
1266 1275
1267 1276 if gather_debug:
1268 1277 if p1r != nullrev:
1269 1278 p1_chain_len = revlog._chaininfo(p1r)[0]
1270 1279 else:
1271 1280 p1_chain_len = -1
1272 1281 if p2r != nullrev:
1273 1282 p2_chain_len = revlog._chaininfo(p2r)[0]
1274 1283 else:
1275 1284 p2_chain_len = -1
1276 1285 if debug_search:
1277 1286 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1278 1287 msg %= target_rev
1279 1288 self._write_debug(msg)
1280 1289
1281 1290 groups = _candidategroups(
1282 1291 self.revlog,
1283 1292 revinfo.textlen,
1284 1293 p1r,
1285 1294 p2r,
1286 1295 cachedelta,
1287 1296 excluded_bases,
1288 1297 target_rev,
1289 1298 snapshot_cache=self._snapshot_cache,
1290 1299 )
1291 1300 candidaterevs = next(groups)
1292 1301 while candidaterevs is not None:
1293 1302 dbg_try_rounds += 1
1294 1303 if debug_search:
1295 1304 prev = None
1296 1305 if deltainfo is not None:
1297 1306 prev = deltainfo.base
1298 1307
1299 1308 if (
1300 1309 cachedelta is not None
1301 1310 and len(candidaterevs) == 1
1302 1311 and cachedelta[0] in candidaterevs
1303 1312 ):
1304 1313 round_type = b"cached-delta"
1305 1314 elif p1 in candidaterevs or p2 in candidaterevs:
1306 1315 round_type = b"parents"
1307 1316 elif prev is not None and all(c < prev for c in candidaterevs):
1308 1317 round_type = b"refine-down"
1309 1318 elif prev is not None and all(c > prev for c in candidaterevs):
1310 1319 round_type = b"refine-up"
1311 1320 else:
1312 1321 round_type = b"search-down"
1313 1322 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1314 1323 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1315 1324 self._write_debug(msg)
1316 1325 nominateddeltas = []
1317 1326 if deltainfo is not None:
1318 1327 if debug_search:
1319 1328 msg = (
1320 1329 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1321 1330 )
1322 1331 msg %= (deltainfo.base, deltainfo.deltalen)
1323 1332 self._write_debug(msg)
1324 1333 # if we already found a good delta,
1325 1334 # challenge it against refined candidates
1326 1335 nominateddeltas.append(deltainfo)
1327 1336 for candidaterev in candidaterevs:
1328 1337 if debug_search:
1329 1338 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1330 1339 msg %= candidaterev
1331 1340 self._write_debug(msg)
1332 1341 candidate_type = None
1333 1342 if candidaterev == p1:
1334 1343 candidate_type = b"p1"
1335 1344 elif candidaterev == p2:
1336 1345 candidate_type = b"p2"
1337 1346 elif self.revlog.issnapshot(candidaterev):
1338 1347 candidate_type = b"snapshot-%d"
1339 1348 candidate_type %= self.revlog.snapshotdepth(
1340 1349 candidaterev
1341 1350 )
1342 1351
1343 1352 if candidate_type is not None:
1344 1353 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1345 1354 msg %= candidate_type
1346 1355 self._write_debug(msg)
1347 1356 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1348 1357 msg %= self.revlog.length(candidaterev)
1349 1358 self._write_debug(msg)
1350 1359 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1351 1360 msg %= self.revlog.deltaparent(candidaterev)
1352 1361 self._write_debug(msg)
1353 1362
1354 1363 dbg_try_count += 1
1355 1364
1356 1365 if debug_search:
1357 1366 delta_start = util.timer()
1358 1367 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
1359 1368 if debug_search:
1360 1369 delta_end = util.timer()
1361 1370 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1362 1371 msg %= delta_end - delta_start
1363 1372 self._write_debug(msg)
1364 1373 if candidatedelta is not None:
1365 1374 if is_good_delta_info(self.revlog, candidatedelta, revinfo):
1366 1375 if debug_search:
1367 1376 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1368 1377 msg %= candidatedelta.deltalen
1369 1378 self._write_debug(msg)
1370 1379 nominateddeltas.append(candidatedelta)
1371 1380 elif debug_search:
1372 1381 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1373 1382 msg %= candidatedelta.deltalen
1374 1383 self._write_debug(msg)
1375 1384 elif debug_search:
1376 1385 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1377 1386 self._write_debug(msg)
1378 1387 if nominateddeltas:
1379 1388 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1380 1389 if deltainfo is not None:
1381 1390 candidaterevs = groups.send(deltainfo.base)
1382 1391 else:
1383 1392 candidaterevs = next(groups)
1384 1393
1385 1394 if deltainfo is None:
1386 1395 dbg_type = b"full"
1387 1396 deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
1388 1397 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1389 1398 dbg_type = b"snapshot"
1390 1399 else:
1391 1400 dbg_type = b"delta"
1392 1401
1393 1402 if gather_debug:
1394 1403 end = util.timer()
1395 1404 if dbg_type == b'full':
1396 1405 used_cached = (
1397 1406 cachedelta is not None
1398 1407 and dbg_try_rounds == 0
1399 1408 and dbg_try_count == 0
1400 1409 and cachedelta[0] == nullrev
1401 1410 )
1402 1411 else:
1403 1412 used_cached = (
1404 1413 cachedelta is not None
1405 1414 and dbg_try_rounds == 1
1406 1415 and dbg_try_count == 1
1407 1416 and deltainfo.base == cachedelta[0]
1408 1417 )
1409 1418 dbg = {
1410 1419 'duration': end - start,
1411 1420 'revision': target_rev,
1412 1421 'delta-base': deltainfo.base, # pytype: disable=attribute-error
1413 1422 'search_round_count': dbg_try_rounds,
1414 1423 'using-cached-base': used_cached,
1415 1424 'delta_try_count': dbg_try_count,
1416 1425 'type': dbg_type,
1417 1426 'p1-chain-len': p1_chain_len,
1418 1427 'p2-chain-len': p2_chain_len,
1419 1428 }
1420 1429 if (
1421 1430 deltainfo.snapshotdepth # pytype: disable=attribute-error
1422 1431 is not None
1423 1432 ):
1424 1433 dbg[
1425 1434 'snapshot-depth'
1426 1435 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1427 1436 else:
1428 1437 dbg['snapshot-depth'] = 0
1429 1438 target_revlog = b"UNKNOWN"
1430 1439 target_type = self.revlog.target[0]
1431 1440 target_key = self.revlog.target[1]
1432 1441 if target_type == KIND_CHANGELOG:
1433 1442 target_revlog = b'CHANGELOG:'
1434 1443 elif target_type == KIND_MANIFESTLOG:
1435 1444 target_revlog = b'MANIFESTLOG:'
1436 1445 if target_key:
1437 1446 target_revlog += b'%s:' % target_key
1438 1447 elif target_type == KIND_FILELOG:
1439 1448 target_revlog = b'FILELOG:'
1440 1449 if target_key:
1441 1450 target_revlog += b'%s:' % target_key
1442 1451 dbg['target-revlog'] = target_revlog
1443 1452
1444 1453 if self._debug_info is not None:
1445 1454 self._debug_info.append(dbg)
1446 1455
1447 1456 if self._write_debug is not None:
1448 1457 msg = (
1449 1458 b"DBG-DELTAS:"
1450 1459 b" %-12s"
1451 1460 b" rev=%d:"
1452 1461 b" delta-base=%d"
1453 1462 b" is-cached=%d"
1454 1463 b" - search-rounds=%d"
1455 1464 b" try-count=%d"
1456 1465 b" - delta-type=%-6s"
1457 1466 b" snap-depth=%d"
1458 1467 b" - p1-chain-length=%d"
1459 1468 b" p2-chain-length=%d"
1460 1469 b" - duration=%f"
1461 1470 b"\n"
1462 1471 )
1463 1472 msg %= (
1464 1473 dbg["target-revlog"],
1465 1474 dbg["revision"],
1466 1475 dbg["delta-base"],
1467 1476 dbg["using-cached-base"],
1468 1477 dbg["search_round_count"],
1469 1478 dbg["delta_try_count"],
1470 1479 dbg["type"],
1471 1480 dbg["snapshot-depth"],
1472 1481 dbg["p1-chain-len"],
1473 1482 dbg["p2-chain-len"],
1474 1483 dbg["duration"],
1475 1484 )
1476 1485 self._write_debug(msg)
1477 1486 return deltainfo
1478 1487
1479 1488
1480 1489 def delta_compression(default_compression_header, deltainfo):
1481 1490 """return (COMPRESSION_MODE, deltainfo)
1482 1491
1483 1492 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1484 1493 compression.
1485 1494 """
1486 1495 h, d = deltainfo.data
1487 1496 compression_mode = COMP_MODE_INLINE
1488 1497 if not h and not d:
1489 1498 # not data to store at all... declare them uncompressed
1490 1499 compression_mode = COMP_MODE_PLAIN
1491 1500 elif not h:
1492 1501 t = d[0:1]
1493 1502 if t == b'\0':
1494 1503 compression_mode = COMP_MODE_PLAIN
1495 1504 elif t == default_compression_header:
1496 1505 compression_mode = COMP_MODE_DEFAULT
1497 1506 elif h == b'u':
1498 1507 # we have a more efficient way to declare uncompressed
1499 1508 h = b''
1500 1509 compression_mode = COMP_MODE_PLAIN
1501 1510 deltainfo = drop_u_compression(deltainfo)
1502 1511 return compression_mode, deltainfo
General Comments 0
You need to be logged in to leave comments. Login now