##// END OF EJS Templates
delta-find: small documentation update...
marmoute -
r50509:5447c150 default
parent child Browse files
Show More
@@ -1,1398 +1,1398 b''
1 1 # revlogdeltas.py - Logic around delta computation for revlog
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 # Copyright 2018 Octobus <contact@octobus.net>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8 """Helper class to compute deltas stored inside revlogs"""
9 9
10 10
11 11 import collections
12 12 import struct
13 13
14 14 # import stuff from node for others to import from revlog
15 15 from ..node import nullrev
16 16 from ..i18n import _
17 17 from ..pycompat import getattr
18 18
19 19 from .constants import (
20 20 COMP_MODE_DEFAULT,
21 21 COMP_MODE_INLINE,
22 22 COMP_MODE_PLAIN,
23 23 KIND_CHANGELOG,
24 24 KIND_FILELOG,
25 25 KIND_MANIFESTLOG,
26 26 REVIDX_ISCENSORED,
27 27 REVIDX_RAWTEXT_CHANGING_FLAGS,
28 28 )
29 29
30 30 from ..thirdparty import attr
31 31
32 32 from .. import (
33 33 error,
34 34 mdiff,
35 35 util,
36 36 )
37 37
38 38 from . import flagutil
39 39
40 40 # maximum <delta-chain-data>/<revision-text-length> ratio
41 41 LIMIT_DELTA2TEXT = 2
42 42
43 43
44 44 class _testrevlog:
45 45 """minimalist fake revlog to use in doctests"""
46 46
47 47 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
48 48 """data is an list of revision payload boundaries"""
49 49 self._data = data
50 50 self._srdensitythreshold = density
51 51 self._srmingapsize = mingap
52 52 self._snapshot = set(snapshot)
53 53 self.index = None
54 54
55 55 def start(self, rev):
56 56 if rev == nullrev:
57 57 return 0
58 58 if rev == 0:
59 59 return 0
60 60 return self._data[rev - 1]
61 61
62 62 def end(self, rev):
63 63 if rev == nullrev:
64 64 return 0
65 65 return self._data[rev]
66 66
67 67 def length(self, rev):
68 68 return self.end(rev) - self.start(rev)
69 69
70 70 def __len__(self):
71 71 return len(self._data)
72 72
73 73 def issnapshot(self, rev):
74 74 if rev == nullrev:
75 75 return True
76 76 return rev in self._snapshot
77 77
78 78
79 79 def slicechunk(revlog, revs, targetsize=None):
80 80 """slice revs to reduce the amount of unrelated data to be read from disk.
81 81
82 82 ``revs`` is sliced into groups that should be read in one time.
83 83 Assume that revs are sorted.
84 84
85 85 The initial chunk is sliced until the overall density (payload/chunks-span
86 86 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
87 87 `revlog._srmingapsize` is skipped.
88 88
89 89 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
90 90 For consistency with other slicing choice, this limit won't go lower than
91 91 `revlog._srmingapsize`.
92 92
93 93 If individual revisions chunk are larger than this limit, they will still
94 94 be raised individually.
95 95
96 96 >>> data = [
97 97 ... 5, #00 (5)
98 98 ... 10, #01 (5)
99 99 ... 12, #02 (2)
100 100 ... 12, #03 (empty)
101 101 ... 27, #04 (15)
102 102 ... 31, #05 (4)
103 103 ... 31, #06 (empty)
104 104 ... 42, #07 (11)
105 105 ... 47, #08 (5)
106 106 ... 47, #09 (empty)
107 107 ... 48, #10 (1)
108 108 ... 51, #11 (3)
109 109 ... 74, #12 (23)
110 110 ... 85, #13 (11)
111 111 ... 86, #14 (1)
112 112 ... 91, #15 (5)
113 113 ... ]
114 114 >>> revlog = _testrevlog(data, snapshot=range(16))
115 115
116 116 >>> list(slicechunk(revlog, list(range(16))))
117 117 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
118 118 >>> list(slicechunk(revlog, [0, 15]))
119 119 [[0], [15]]
120 120 >>> list(slicechunk(revlog, [0, 11, 15]))
121 121 [[0], [11], [15]]
122 122 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
123 123 [[0], [11, 13, 15]]
124 124 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
125 125 [[1, 2], [5, 8, 10, 11], [14]]
126 126
127 127 Slicing with a maximum chunk size
128 128 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
129 129 [[0], [11], [13], [15]]
130 130 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
131 131 [[0], [11], [13, 15]]
132 132
133 133 Slicing involving nullrev
134 134 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
135 135 [[-1, 0], [11], [13, 15]]
136 136 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
137 137 [[-1], [13], [15]]
138 138 """
139 139 if targetsize is not None:
140 140 targetsize = max(targetsize, revlog._srmingapsize)
141 141 # targetsize should not be specified when evaluating delta candidates:
142 142 # * targetsize is used to ensure we stay within specification when reading,
143 143 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
144 144 if densityslicing is None:
145 145 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
146 146 for chunk in densityslicing(
147 147 revs, revlog._srdensitythreshold, revlog._srmingapsize
148 148 ):
149 149 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
150 150 yield subchunk
151 151
152 152
153 153 def _slicechunktosize(revlog, revs, targetsize=None):
154 154 """slice revs to match the target size
155 155
156 156 This is intended to be used on chunk that density slicing selected by that
157 157 are still too large compared to the read garantee of revlog. This might
158 158 happens when "minimal gap size" interrupted the slicing or when chain are
159 159 built in a way that create large blocks next to each other.
160 160
161 161 >>> data = [
162 162 ... 3, #0 (3)
163 163 ... 5, #1 (2)
164 164 ... 6, #2 (1)
165 165 ... 8, #3 (2)
166 166 ... 8, #4 (empty)
167 167 ... 11, #5 (3)
168 168 ... 12, #6 (1)
169 169 ... 13, #7 (1)
170 170 ... 14, #8 (1)
171 171 ... ]
172 172
173 173 == All snapshots cases ==
174 174 >>> revlog = _testrevlog(data, snapshot=range(9))
175 175
176 176 Cases where chunk is already small enough
177 177 >>> list(_slicechunktosize(revlog, [0], 3))
178 178 [[0]]
179 179 >>> list(_slicechunktosize(revlog, [6, 7], 3))
180 180 [[6, 7]]
181 181 >>> list(_slicechunktosize(revlog, [0], None))
182 182 [[0]]
183 183 >>> list(_slicechunktosize(revlog, [6, 7], None))
184 184 [[6, 7]]
185 185
186 186 cases where we need actual slicing
187 187 >>> list(_slicechunktosize(revlog, [0, 1], 3))
188 188 [[0], [1]]
189 189 >>> list(_slicechunktosize(revlog, [1, 3], 3))
190 190 [[1], [3]]
191 191 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
192 192 [[1, 2], [3]]
193 193 >>> list(_slicechunktosize(revlog, [3, 5], 3))
194 194 [[3], [5]]
195 195 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
196 196 [[3], [5]]
197 197 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
198 198 [[5], [6, 7, 8]]
199 199 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
200 200 [[0], [1, 2], [3], [5], [6, 7, 8]]
201 201
202 202 Case with too large individual chunk (must return valid chunk)
203 203 >>> list(_slicechunktosize(revlog, [0, 1], 2))
204 204 [[0], [1]]
205 205 >>> list(_slicechunktosize(revlog, [1, 3], 1))
206 206 [[1], [3]]
207 207 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
208 208 [[3], [5]]
209 209
210 210 == No Snapshot cases ==
211 211 >>> revlog = _testrevlog(data)
212 212
213 213 Cases where chunk is already small enough
214 214 >>> list(_slicechunktosize(revlog, [0], 3))
215 215 [[0]]
216 216 >>> list(_slicechunktosize(revlog, [6, 7], 3))
217 217 [[6, 7]]
218 218 >>> list(_slicechunktosize(revlog, [0], None))
219 219 [[0]]
220 220 >>> list(_slicechunktosize(revlog, [6, 7], None))
221 221 [[6, 7]]
222 222
223 223 cases where we need actual slicing
224 224 >>> list(_slicechunktosize(revlog, [0, 1], 3))
225 225 [[0], [1]]
226 226 >>> list(_slicechunktosize(revlog, [1, 3], 3))
227 227 [[1], [3]]
228 228 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
229 229 [[1], [2, 3]]
230 230 >>> list(_slicechunktosize(revlog, [3, 5], 3))
231 231 [[3], [5]]
232 232 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
233 233 [[3], [4, 5]]
234 234 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
235 235 [[5], [6, 7, 8]]
236 236 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
237 237 [[0], [1, 2], [3], [5], [6, 7, 8]]
238 238
239 239 Case with too large individual chunk (must return valid chunk)
240 240 >>> list(_slicechunktosize(revlog, [0, 1], 2))
241 241 [[0], [1]]
242 242 >>> list(_slicechunktosize(revlog, [1, 3], 1))
243 243 [[1], [3]]
244 244 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
245 245 [[3], [5]]
246 246
247 247 == mixed case ==
248 248 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
249 249 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
250 250 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
251 251 """
252 252 assert targetsize is None or 0 <= targetsize
253 253 startdata = revlog.start(revs[0])
254 254 enddata = revlog.end(revs[-1])
255 255 fullspan = enddata - startdata
256 256 if targetsize is None or fullspan <= targetsize:
257 257 yield revs
258 258 return
259 259
260 260 startrevidx = 0
261 261 endrevidx = 1
262 262 iterrevs = enumerate(revs)
263 263 next(iterrevs) # skip first rev.
264 264 # first step: get snapshots out of the way
265 265 for idx, r in iterrevs:
266 266 span = revlog.end(r) - startdata
267 267 snapshot = revlog.issnapshot(r)
268 268 if span <= targetsize and snapshot:
269 269 endrevidx = idx + 1
270 270 else:
271 271 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
272 272 if chunk:
273 273 yield chunk
274 274 startrevidx = idx
275 275 startdata = revlog.start(r)
276 276 endrevidx = idx + 1
277 277 if not snapshot:
278 278 break
279 279
280 280 # for the others, we use binary slicing to quickly converge toward valid
281 281 # chunks (otherwise, we might end up looking for start/end of many
282 282 # revisions). This logic is not looking for the perfect slicing point, it
283 283 # focuses on quickly converging toward valid chunks.
284 284 nbitem = len(revs)
285 285 while (enddata - startdata) > targetsize:
286 286 endrevidx = nbitem
287 287 if nbitem - startrevidx <= 1:
288 288 break # protect against individual chunk larger than limit
289 289 localenddata = revlog.end(revs[endrevidx - 1])
290 290 span = localenddata - startdata
291 291 while span > targetsize:
292 292 if endrevidx - startrevidx <= 1:
293 293 break # protect against individual chunk larger than limit
294 294 endrevidx -= (endrevidx - startrevidx) // 2
295 295 localenddata = revlog.end(revs[endrevidx - 1])
296 296 span = localenddata - startdata
297 297 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
298 298 if chunk:
299 299 yield chunk
300 300 startrevidx = endrevidx
301 301 startdata = revlog.start(revs[startrevidx])
302 302
303 303 chunk = _trimchunk(revlog, revs, startrevidx)
304 304 if chunk:
305 305 yield chunk
306 306
307 307
308 308 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
309 309 """slice revs to reduce the amount of unrelated data to be read from disk.
310 310
311 311 ``revs`` is sliced into groups that should be read in one time.
312 312 Assume that revs are sorted.
313 313
314 314 The initial chunk is sliced until the overall density (payload/chunks-span
315 315 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
316 316 skipped.
317 317
318 318 >>> revlog = _testrevlog([
319 319 ... 5, #00 (5)
320 320 ... 10, #01 (5)
321 321 ... 12, #02 (2)
322 322 ... 12, #03 (empty)
323 323 ... 27, #04 (15)
324 324 ... 31, #05 (4)
325 325 ... 31, #06 (empty)
326 326 ... 42, #07 (11)
327 327 ... 47, #08 (5)
328 328 ... 47, #09 (empty)
329 329 ... 48, #10 (1)
330 330 ... 51, #11 (3)
331 331 ... 74, #12 (23)
332 332 ... 85, #13 (11)
333 333 ... 86, #14 (1)
334 334 ... 91, #15 (5)
335 335 ... ])
336 336
337 337 >>> list(_slicechunktodensity(revlog, list(range(16))))
338 338 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
339 339 >>> list(_slicechunktodensity(revlog, [0, 15]))
340 340 [[0], [15]]
341 341 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
342 342 [[0], [11], [15]]
343 343 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
344 344 [[0], [11, 13, 15]]
345 345 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
346 346 [[1, 2], [5, 8, 10, 11], [14]]
347 347 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
348 348 ... mingapsize=20))
349 349 [[1, 2, 3, 5, 8, 10, 11], [14]]
350 350 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
351 351 ... targetdensity=0.95))
352 352 [[1, 2], [5], [8, 10, 11], [14]]
353 353 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
354 354 ... targetdensity=0.95, mingapsize=12))
355 355 [[1, 2], [5, 8, 10, 11], [14]]
356 356 """
357 357 start = revlog.start
358 358 length = revlog.length
359 359
360 360 if len(revs) <= 1:
361 361 yield revs
362 362 return
363 363
364 364 deltachainspan = segmentspan(revlog, revs)
365 365
366 366 if deltachainspan < mingapsize:
367 367 yield revs
368 368 return
369 369
370 370 readdata = deltachainspan
371 371 chainpayload = sum(length(r) for r in revs)
372 372
373 373 if deltachainspan:
374 374 density = chainpayload / float(deltachainspan)
375 375 else:
376 376 density = 1.0
377 377
378 378 if density >= targetdensity:
379 379 yield revs
380 380 return
381 381
382 382 # Store the gaps in a heap to have them sorted by decreasing size
383 383 gaps = []
384 384 prevend = None
385 385 for i, rev in enumerate(revs):
386 386 revstart = start(rev)
387 387 revlen = length(rev)
388 388
389 389 # Skip empty revisions to form larger holes
390 390 if revlen == 0:
391 391 continue
392 392
393 393 if prevend is not None:
394 394 gapsize = revstart - prevend
395 395 # only consider holes that are large enough
396 396 if gapsize > mingapsize:
397 397 gaps.append((gapsize, i))
398 398
399 399 prevend = revstart + revlen
400 400 # sort the gaps to pop them from largest to small
401 401 gaps.sort()
402 402
403 403 # Collect the indices of the largest holes until the density is acceptable
404 404 selected = []
405 405 while gaps and density < targetdensity:
406 406 gapsize, gapidx = gaps.pop()
407 407
408 408 selected.append(gapidx)
409 409
410 410 # the gap sizes are stored as negatives to be sorted decreasingly
411 411 # by the heap
412 412 readdata -= gapsize
413 413 if readdata > 0:
414 414 density = chainpayload / float(readdata)
415 415 else:
416 416 density = 1.0
417 417 selected.sort()
418 418
419 419 # Cut the revs at collected indices
420 420 previdx = 0
421 421 for idx in selected:
422 422
423 423 chunk = _trimchunk(revlog, revs, previdx, idx)
424 424 if chunk:
425 425 yield chunk
426 426
427 427 previdx = idx
428 428
429 429 chunk = _trimchunk(revlog, revs, previdx)
430 430 if chunk:
431 431 yield chunk
432 432
433 433
434 434 def _trimchunk(revlog, revs, startidx, endidx=None):
435 435 """returns revs[startidx:endidx] without empty trailing revs
436 436
437 437 Doctest Setup
438 438 >>> revlog = _testrevlog([
439 439 ... 5, #0
440 440 ... 10, #1
441 441 ... 12, #2
442 442 ... 12, #3 (empty)
443 443 ... 17, #4
444 444 ... 21, #5
445 445 ... 21, #6 (empty)
446 446 ... ])
447 447
448 448 Contiguous cases:
449 449 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
450 450 [0, 1, 2, 3, 4, 5]
451 451 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
452 452 [0, 1, 2, 3, 4]
453 453 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
454 454 [0, 1, 2]
455 455 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
456 456 [2]
457 457 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
458 458 [3, 4, 5]
459 459 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
460 460 [3, 4]
461 461
462 462 Discontiguous cases:
463 463 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
464 464 [1, 3, 5]
465 465 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
466 466 [1]
467 467 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
468 468 [3, 5]
469 469 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
470 470 [3, 5]
471 471 """
472 472 length = revlog.length
473 473
474 474 if endidx is None:
475 475 endidx = len(revs)
476 476
477 477 # If we have a non-emtpy delta candidate, there are nothing to trim
478 478 if revs[endidx - 1] < len(revlog):
479 479 # Trim empty revs at the end, except the very first revision of a chain
480 480 while (
481 481 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
482 482 ):
483 483 endidx -= 1
484 484
485 485 return revs[startidx:endidx]
486 486
487 487
488 488 def segmentspan(revlog, revs):
489 489 """Get the byte span of a segment of revisions
490 490
491 491 revs is a sorted array of revision numbers
492 492
493 493 >>> revlog = _testrevlog([
494 494 ... 5, #0
495 495 ... 10, #1
496 496 ... 12, #2
497 497 ... 12, #3 (empty)
498 498 ... 17, #4
499 499 ... ])
500 500
501 501 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
502 502 17
503 503 >>> segmentspan(revlog, [0, 4])
504 504 17
505 505 >>> segmentspan(revlog, [3, 4])
506 506 5
507 507 >>> segmentspan(revlog, [1, 2, 3,])
508 508 7
509 509 >>> segmentspan(revlog, [1, 3])
510 510 7
511 511 """
512 512 if not revs:
513 513 return 0
514 514 end = revlog.end(revs[-1])
515 515 return end - revlog.start(revs[0])
516 516
517 517
518 518 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
519 519 """build full text from a (base, delta) pair and other metadata"""
520 520 # special case deltas which replace entire base; no need to decode
521 521 # base revision. this neatly avoids censored bases, which throw when
522 522 # they're decoded.
523 523 hlen = struct.calcsize(b">lll")
524 524 if delta[:hlen] == mdiff.replacediffheader(
525 525 revlog.rawsize(baserev), len(delta) - hlen
526 526 ):
527 527 fulltext = delta[hlen:]
528 528 else:
529 529 # deltabase is rawtext before changed by flag processors, which is
530 530 # equivalent to non-raw text
531 531 basetext = revlog.revision(baserev, _df=fh)
532 532 fulltext = mdiff.patch(basetext, delta)
533 533
534 534 try:
535 535 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
536 536 if validatehash:
537 537 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
538 538 if flags & REVIDX_ISCENSORED:
539 539 raise error.StorageError(
540 540 _(b'node %s is not censored') % expectednode
541 541 )
542 542 except error.CensoredNodeError:
543 543 # must pass the censored index flag to add censored revisions
544 544 if not flags & REVIDX_ISCENSORED:
545 545 raise
546 546 return fulltext
547 547
548 548
549 549 @attr.s(slots=True, frozen=True)
550 550 class _deltainfo:
551 551 distance = attr.ib()
552 552 deltalen = attr.ib()
553 553 data = attr.ib()
554 554 base = attr.ib()
555 555 chainbase = attr.ib()
556 556 chainlen = attr.ib()
557 557 compresseddeltalen = attr.ib()
558 558 snapshotdepth = attr.ib()
559 559
560 560
561 561 def drop_u_compression(delta):
562 562 """turn into a "u" (no-compression) into no-compression without header
563 563
564 564 This is useful for revlog format that has better compression method.
565 565 """
566 566 assert delta.data[0] == b'u', delta.data[0]
567 567 return _deltainfo(
568 568 delta.distance,
569 569 delta.deltalen - 1,
570 570 (b'', delta.data[1]),
571 571 delta.base,
572 572 delta.chainbase,
573 573 delta.chainlen,
574 574 delta.compresseddeltalen,
575 575 delta.snapshotdepth,
576 576 )
577 577
578 578
579 579 def isgooddeltainfo(revlog, deltainfo, revinfo):
580 580 """Returns True if the given delta is good. Good means that it is within
581 581 the disk span, disk size, and chain length bounds that we know to be
582 582 performant."""
583 583 if deltainfo is None:
584 584 return False
585 585
586 586 # - 'deltainfo.distance' is the distance from the base revision --
587 587 # bounding it limits the amount of I/O we need to do.
588 588 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
589 589 # deltas we need to apply -- bounding it limits the amount of CPU
590 590 # we consume.
591 591
592 592 textlen = revinfo.textlen
593 593 defaultmax = textlen * 4
594 594 maxdist = revlog._maxdeltachainspan
595 595 if not maxdist:
596 596 maxdist = deltainfo.distance # ensure the conditional pass
597 597 maxdist = max(maxdist, defaultmax)
598 598
599 599 # Bad delta from read span:
600 600 #
601 601 # If the span of data read is larger than the maximum allowed.
602 602 #
603 603 # In the sparse-revlog case, we rely on the associated "sparse reading"
604 604 # to avoid issue related to the span of data. In theory, it would be
605 605 # possible to build pathological revlog where delta pattern would lead
606 606 # to too many reads. However, they do not happen in practice at all. So
607 607 # we skip the span check entirely.
608 608 if not revlog._sparserevlog and maxdist < deltainfo.distance:
609 609 return False
610 610
611 611 # Bad delta from new delta size:
612 612 #
613 613 # If the delta size is larger than the target text, storing the
614 614 # delta will be inefficient.
615 615 if textlen < deltainfo.deltalen:
616 616 return False
617 617
618 618 # Bad delta from cumulated payload size:
619 619 #
620 620 # If the sum of delta get larger than K * target text length.
621 621 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
622 622 return False
623 623
624 624 # Bad delta from chain length:
625 625 #
626 626 # If the number of delta in the chain gets too high.
627 627 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
628 628 return False
629 629
630 630 # bad delta from intermediate snapshot size limit
631 631 #
632 632 # If an intermediate snapshot size is higher than the limit. The
633 633 # limit exist to prevent endless chain of intermediate delta to be
634 634 # created.
635 635 if (
636 636 deltainfo.snapshotdepth is not None
637 637 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
638 638 ):
639 639 return False
640 640
641 641 # bad delta if new intermediate snapshot is larger than the previous
642 642 # snapshot
643 643 if (
644 644 deltainfo.snapshotdepth
645 645 and revlog.length(deltainfo.base) < deltainfo.deltalen
646 646 ):
647 647 return False
648 648
649 649 return True
650 650
651 651
652 652 # If a revision's full text is that much bigger than a base candidate full
653 653 # text's, it is very unlikely that it will produce a valid delta. We no longer
654 654 # consider these candidates.
655 655 LIMIT_BASE2TEXT = 500
656 656
657 657
658 658 def _candidategroups(
659 659 revlog,
660 660 textlen,
661 661 p1,
662 662 p2,
663 663 cachedelta,
664 664 excluded_bases=None,
665 665 target_rev=None,
666 666 ):
667 667 """Provides group of revision to be tested as delta base
668 668
669 669 This top level function focus on emitting groups with unique and worthwhile
670 670 content. See _raw_candidate_groups for details about the group order.
671 671 """
672 672 # should we try to build a delta?
673 673 if not (len(revlog) and revlog._storedeltachains):
674 674 yield None
675 675 return
676 676
677 677 deltalength = revlog.length
678 678 deltaparent = revlog.deltaparent
679 679 sparse = revlog._sparserevlog
680 680 good = None
681 681
682 682 deltas_limit = textlen * LIMIT_DELTA2TEXT
683 683
684 684 tested = {nullrev}
685 685 candidates = _refinedgroups(
686 686 revlog,
687 687 p1,
688 688 p2,
689 689 cachedelta,
690 690 )
691 691 while True:
692 692 temptative = candidates.send(good)
693 693 if temptative is None:
694 694 break
695 695 group = []
696 696 for rev in temptative:
697 697 # skip over empty delta (no need to include them in a chain)
698 698 while revlog._generaldelta and not (
699 699 rev == nullrev or rev in tested or deltalength(rev)
700 700 ):
701 701 tested.add(rev)
702 702 rev = deltaparent(rev)
703 703 # no need to try a delta against nullrev, this will be done as a
704 704 # last resort.
705 705 if rev == nullrev:
706 706 continue
707 707 # filter out revision we tested already
708 708 if rev in tested:
709 709 continue
710 710 tested.add(rev)
711 711 # an higher authority deamed the base unworthy (e.g. censored)
712 712 if excluded_bases is not None and rev in excluded_bases:
713 713 continue
714 714 # We are in some recomputation cases and that rev is too high in
715 715 # the revlog
716 716 if target_rev is not None and rev >= target_rev:
717 717 continue
718 718 # filter out delta base that will never produce good delta
719 719 if deltas_limit < revlog.length(rev):
720 720 continue
721 721 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
722 722 continue
723 723 # no delta for rawtext-changing revs (see "candelta" for why)
724 724 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
725 725 continue
726 726
727 727 # If we reach here, we are about to build and test a delta.
728 728 # The delta building process will compute the chaininfo in all
729 729 # case, since that computation is cached, it is fine to access it
730 730 # here too.
731 731 chainlen, chainsize = revlog._chaininfo(rev)
732 732 # if chain will be too long, skip base
733 733 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
734 734 continue
735 735 # if chain already have too much data, skip base
736 736 if deltas_limit < chainsize:
737 737 continue
738 738 if sparse and revlog.upperboundcomp is not None:
739 739 maxcomp = revlog.upperboundcomp
740 740 basenotsnap = (p1, p2, nullrev)
741 741 if rev not in basenotsnap and revlog.issnapshot(rev):
742 742 snapshotdepth = revlog.snapshotdepth(rev)
743 743 # If text is significantly larger than the base, we can
744 744 # expect the resulting delta to be proportional to the size
745 745 # difference
746 746 revsize = revlog.rawsize(rev)
747 747 rawsizedistance = max(textlen - revsize, 0)
748 748 # use an estimate of the compression upper bound.
749 749 lowestrealisticdeltalen = rawsizedistance // maxcomp
750 750
751 751 # check the absolute constraint on the delta size
752 752 snapshotlimit = textlen >> snapshotdepth
753 753 if snapshotlimit < lowestrealisticdeltalen:
754 754 # delta lower bound is larger than accepted upper bound
755 755 continue
756 756
757 757 # check the relative constraint on the delta size
758 758 revlength = revlog.length(rev)
759 759 if revlength < lowestrealisticdeltalen:
760 760 # delta probable lower bound is larger than target base
761 761 continue
762 762
763 763 group.append(rev)
764 764 if group:
765 765 # XXX: in the sparse revlog case, group can become large,
766 766 # impacting performances. Some bounding or slicing mecanism
767 767 # would help to reduce this impact.
768 768 good = yield tuple(group)
769 769 yield None
770 770
771 771
772 772 def _findsnapshots(revlog, cache, start_rev):
773 773 """find snapshot from start_rev to tip"""
774 774 if util.safehasattr(revlog.index, b'findsnapshots'):
775 775 revlog.index.findsnapshots(cache, start_rev)
776 776 else:
777 777 deltaparent = revlog.deltaparent
778 778 issnapshot = revlog.issnapshot
779 779 for rev in revlog.revs(start_rev):
780 780 if issnapshot(rev):
781 781 cache[deltaparent(rev)].append(rev)
782 782
783 783
784 784 def _refinedgroups(revlog, p1, p2, cachedelta):
785 785 good = None
786 786 # First we try to reuse a the delta contained in the bundle.
787 787 # (or from the source revlog)
788 788 #
789 789 # This logic only applies to general delta repositories and can be disabled
790 790 # through configuration. Disabling reuse source delta is useful when
791 791 # we want to make sure we recomputed "optimal" deltas.
792 792 debug_info = None
793 793 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
794 794 # Assume what we received from the server is a good choice
795 795 # build delta will reuse the cache
796 796 if debug_info is not None:
797 797 debug_info['cached-delta.tested'] += 1
798 798 good = yield (cachedelta[0],)
799 799 if good is not None:
800 800 if debug_info is not None:
801 801 debug_info['cached-delta.accepted'] += 1
802 802 yield None
803 803 return
804 804 # XXX cache me higher
805 805 snapshots = collections.defaultdict(list)
806 806 groups = _rawgroups(
807 807 revlog,
808 808 p1,
809 809 p2,
810 810 cachedelta,
811 811 snapshots,
812 812 )
813 813 for candidates in groups:
814 814 good = yield candidates
815 815 if good is not None:
816 816 break
817 817
818 818 # If sparse revlog is enabled, we can try to refine the available deltas
819 819 if not revlog._sparserevlog:
820 820 yield None
821 821 return
822 822
823 823 # if we have a refinable value, try to refine it
824 824 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
825 825 # refine snapshot down
826 826 previous = None
827 827 while previous != good:
828 828 previous = good
829 829 base = revlog.deltaparent(good)
830 830 if base == nullrev:
831 831 break
832 832 good = yield (base,)
833 833 # refine snapshot up
834 834 if not snapshots:
835 835 _findsnapshots(revlog, snapshots, good + 1)
836 836 previous = None
837 837 while good != previous:
838 838 previous = good
839 839 children = tuple(sorted(c for c in snapshots[good]))
840 840 good = yield children
841 841
842 842 if debug_info is not None:
843 843 if good is None:
844 844 debug_info['no-solution'] += 1
845 845
846 846 yield None
847 847
848 848
849 849 def _rawgroups(revlog, p1, p2, cachedelta, snapshots=None):
850 850 """Provides group of revision to be tested as delta base
851 851
852 852 This lower level function focus on emitting delta theorically interresting
853 853 without looking it any practical details.
854 854
855 855 The group order aims at providing fast or small candidates first.
856 856 """
857 857 gdelta = revlog._generaldelta
858 858 # gate sparse behind general-delta because of issue6056
859 859 sparse = gdelta and revlog._sparserevlog
860 860 curr = len(revlog)
861 861 prev = curr - 1
862 862 deltachain = lambda rev: revlog._deltachain(rev)[0]
863 863
864 864 if gdelta:
865 865 # exclude already lazy tested base if any
866 866 parents = [p for p in (p1, p2) if p != nullrev]
867 867
868 868 if not revlog._deltabothparents and len(parents) == 2:
869 869 parents.sort()
870 870 # To minimize the chance of having to build a fulltext,
871 871 # pick first whichever parent is closest to us (max rev)
872 872 yield (parents[1],)
873 873 # then the other one (min rev) if the first did not fit
874 874 yield (parents[0],)
875 875 elif len(parents) > 0:
876 876 # Test all parents (1 or 2), and keep the best candidate
877 877 yield parents
878 878
879 879 if sparse and parents:
880 880 if snapshots is None:
881 # map: base-rev: snapshot-rev
881 # map: base-rev: [snapshot-revs]
882 882 snapshots = collections.defaultdict(list)
883 883 # See if we can use an existing snapshot in the parent chains to use as
884 884 # a base for a new intermediate-snapshot
885 885 #
886 886 # search for snapshot in parents delta chain
887 887 # map: snapshot-level: snapshot-rev
888 888 parents_snaps = collections.defaultdict(set)
889 889 candidate_chains = [deltachain(p) for p in parents]
890 890 for chain in candidate_chains:
891 891 for idx, s in enumerate(chain):
892 892 if not revlog.issnapshot(s):
893 893 break
894 894 parents_snaps[idx].add(s)
895 895 snapfloor = min(parents_snaps[0]) + 1
896 896 _findsnapshots(revlog, snapshots, snapfloor)
897 897 # search for the highest "unrelated" revision
898 898 #
899 899 # Adding snapshots used by "unrelated" revision increase the odd we
900 900 # reuse an independant, yet better snapshot chain.
901 901 #
902 902 # XXX instead of building a set of revisions, we could lazily enumerate
903 903 # over the chains. That would be more efficient, however we stick to
904 904 # simple code for now.
905 905 all_revs = set()
906 906 for chain in candidate_chains:
907 907 all_revs.update(chain)
908 908 other = None
909 909 for r in revlog.revs(prev, snapfloor):
910 910 if r not in all_revs:
911 911 other = r
912 912 break
913 913 if other is not None:
914 914 # To avoid unfair competition, we won't use unrelated intermediate
915 915 # snapshot that are deeper than the ones from the parent delta
916 916 # chain.
917 917 max_depth = max(parents_snaps.keys())
918 918 chain = deltachain(other)
919 919 for idx, s in enumerate(chain):
920 920 if s < snapfloor:
921 921 continue
922 922 if max_depth < idx:
923 923 break
924 924 if not revlog.issnapshot(s):
925 925 break
926 926 parents_snaps[idx].add(s)
927 927 # Test them as possible intermediate snapshot base
928 928 # We test them from highest to lowest level. High level one are more
929 929 # likely to result in small delta
930 930 floor = None
931 931 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
932 932 siblings = set()
933 933 for s in snaps:
934 934 siblings.update(snapshots[s])
935 935 # Before considering making a new intermediate snapshot, we check
936 936 # if an existing snapshot, children of base we consider, would be
937 937 # suitable.
938 938 #
939 939 # It give a change to reuse a delta chain "unrelated" to the
940 940 # current revision instead of starting our own. Without such
941 941 # re-use, topological branches would keep reopening new chains.
942 942 # Creating more and more snapshot as the repository grow.
943 943
944 944 if floor is not None:
945 945 # We only do this for siblings created after the one in our
946 946 # parent's delta chain. Those created before has less chances
947 947 # to be valid base since our ancestors had to create a new
948 948 # snapshot.
949 949 siblings = [r for r in siblings if floor < r]
950 950 yield tuple(sorted(siblings))
951 951 # then test the base from our parent's delta chain.
952 952 yield tuple(sorted(snaps))
953 953 floor = min(snaps)
954 954 # No suitable base found in the parent chain, search if any full
955 955 # snapshots emitted since parent's base would be a suitable base for an
956 956 # intermediate snapshot.
957 957 #
958 958 # It give a chance to reuse a delta chain unrelated to the current
959 959 # revisions instead of starting our own. Without such re-use,
960 960 # topological branches would keep reopening new full chains. Creating
961 961 # more and more snapshot as the repository grow.
962 962 yield tuple(snapshots[nullrev])
963 963
964 964 if not sparse:
965 965 # other approach failed try against prev to hopefully save us a
966 966 # fulltext.
967 967 yield (prev,)
968 968
969 969
970 970 class deltacomputer:
971 971 def __init__(
972 972 self,
973 973 revlog,
974 974 write_debug=None,
975 975 debug_search=False,
976 976 debug_info=None,
977 977 ):
978 978 self.revlog = revlog
979 979 self._write_debug = write_debug
980 980 self._debug_search = debug_search
981 981 self._debug_info = debug_info
982 982
983 983 def buildtext(self, revinfo, fh):
984 984 """Builds a fulltext version of a revision
985 985
986 986 revinfo: revisioninfo instance that contains all needed info
987 987 fh: file handle to either the .i or the .d revlog file,
988 988 depending on whether it is inlined or not
989 989 """
990 990 btext = revinfo.btext
991 991 if btext[0] is not None:
992 992 return btext[0]
993 993
994 994 revlog = self.revlog
995 995 cachedelta = revinfo.cachedelta
996 996 baserev = cachedelta[0]
997 997 delta = cachedelta[1]
998 998
999 999 fulltext = btext[0] = _textfromdelta(
1000 1000 fh,
1001 1001 revlog,
1002 1002 baserev,
1003 1003 delta,
1004 1004 revinfo.p1,
1005 1005 revinfo.p2,
1006 1006 revinfo.flags,
1007 1007 revinfo.node,
1008 1008 )
1009 1009 return fulltext
1010 1010
1011 1011 def _builddeltadiff(self, base, revinfo, fh):
1012 1012 revlog = self.revlog
1013 1013 t = self.buildtext(revinfo, fh)
1014 1014 if revlog.iscensored(base):
1015 1015 # deltas based on a censored revision must replace the
1016 1016 # full content in one patch, so delta works everywhere
1017 1017 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1018 1018 delta = header + t
1019 1019 else:
1020 1020 ptext = revlog.rawdata(base, _df=fh)
1021 1021 delta = mdiff.textdiff(ptext, t)
1022 1022
1023 1023 return delta
1024 1024
1025 1025 def _builddeltainfo(self, revinfo, base, fh):
1026 1026 # can we use the cached delta?
1027 1027 revlog = self.revlog
1028 1028 debug_search = self._write_debug is not None and self._debug_search
1029 1029 chainbase = revlog.chainbase(base)
1030 1030 if revlog._generaldelta:
1031 1031 deltabase = base
1032 1032 else:
1033 1033 deltabase = chainbase
1034 1034 snapshotdepth = None
1035 1035 if revlog._sparserevlog and deltabase == nullrev:
1036 1036 snapshotdepth = 0
1037 1037 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
1038 1038 # A delta chain should always be one full snapshot,
1039 1039 # zero or more semi-snapshots, and zero or more deltas
1040 1040 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1041 1041 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1042 1042 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1043 1043 delta = None
1044 1044 if revinfo.cachedelta:
1045 1045 cachebase, cachediff = revinfo.cachedelta
1046 1046 # check if the diff still apply
1047 1047 currentbase = cachebase
1048 1048 while (
1049 1049 currentbase != nullrev
1050 1050 and currentbase != base
1051 1051 and self.revlog.length(currentbase) == 0
1052 1052 ):
1053 1053 currentbase = self.revlog.deltaparent(currentbase)
1054 1054 if self.revlog._lazydelta and currentbase == base:
1055 1055 delta = revinfo.cachedelta[1]
1056 1056 if delta is None:
1057 1057 delta = self._builddeltadiff(base, revinfo, fh)
1058 1058 if debug_search:
1059 1059 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1060 1060 msg %= len(delta)
1061 1061 self._write_debug(msg)
1062 1062 # snapshotdept need to be neither None nor 0 level snapshot
1063 1063 if revlog.upperboundcomp is not None and snapshotdepth:
1064 1064 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
1065 1065 snapshotlimit = revinfo.textlen >> snapshotdepth
1066 1066 if debug_search:
1067 1067 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1068 1068 msg %= lowestrealisticdeltalen
1069 1069 self._write_debug(msg)
1070 1070 if snapshotlimit < lowestrealisticdeltalen:
1071 1071 if debug_search:
1072 1072 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1073 1073 self._write_debug(msg)
1074 1074 return None
1075 1075 if revlog.length(base) < lowestrealisticdeltalen:
1076 1076 if debug_search:
1077 1077 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1078 1078 self._write_debug(msg)
1079 1079 return None
1080 1080 header, data = revlog.compress(delta)
1081 1081 deltalen = len(header) + len(data)
1082 1082 offset = revlog.end(len(revlog) - 1)
1083 1083 dist = deltalen + offset - revlog.start(chainbase)
1084 1084 chainlen, compresseddeltalen = revlog._chaininfo(base)
1085 1085 chainlen += 1
1086 1086 compresseddeltalen += deltalen
1087 1087
1088 1088 return _deltainfo(
1089 1089 dist,
1090 1090 deltalen,
1091 1091 (header, data),
1092 1092 deltabase,
1093 1093 chainbase,
1094 1094 chainlen,
1095 1095 compresseddeltalen,
1096 1096 snapshotdepth,
1097 1097 )
1098 1098
1099 1099 def _fullsnapshotinfo(self, fh, revinfo, curr):
1100 1100 rawtext = self.buildtext(revinfo, fh)
1101 1101 data = self.revlog.compress(rawtext)
1102 1102 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1103 1103 deltabase = chainbase = curr
1104 1104 snapshotdepth = 0
1105 1105 chainlen = 1
1106 1106
1107 1107 return _deltainfo(
1108 1108 dist,
1109 1109 deltalen,
1110 1110 data,
1111 1111 deltabase,
1112 1112 chainbase,
1113 1113 chainlen,
1114 1114 compresseddeltalen,
1115 1115 snapshotdepth,
1116 1116 )
1117 1117
1118 1118 def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):
1119 1119 """Find an acceptable delta against a candidate revision
1120 1120
1121 1121 revinfo: information about the revision (instance of _revisioninfo)
1122 1122 fh: file handle to either the .i or the .d revlog file,
1123 1123 depending on whether it is inlined or not
1124 1124
1125 1125 Returns the first acceptable candidate revision, as ordered by
1126 1126 _candidategroups
1127 1127
1128 1128 If no suitable deltabase is found, we return delta info for a full
1129 1129 snapshot.
1130 1130
1131 1131 `excluded_bases` is an optional set of revision that cannot be used as
1132 1132 a delta base. Use this to recompute delta suitable in censor or strip
1133 1133 context.
1134 1134 """
1135 1135 if target_rev is None:
1136 1136 target_rev = len(self.revlog)
1137 1137
1138 1138 if not revinfo.textlen:
1139 1139 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1140 1140
1141 1141 if excluded_bases is None:
1142 1142 excluded_bases = set()
1143 1143
1144 1144 # no delta for flag processor revision (see "candelta" for why)
1145 1145 # not calling candelta since only one revision needs test, also to
1146 1146 # avoid overhead fetching flags again.
1147 1147 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1148 1148 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1149 1149
1150 1150 gather_debug = (
1151 1151 self._write_debug is not None or self._debug_info is not None
1152 1152 )
1153 1153 debug_search = self._write_debug is not None and self._debug_search
1154 1154
1155 1155 if gather_debug:
1156 1156 start = util.timer()
1157 1157
1158 1158 # count the number of different delta we tried (for debug purpose)
1159 1159 dbg_try_count = 0
1160 1160 # count the number of "search round" we did. (for debug purpose)
1161 1161 dbg_try_rounds = 0
1162 1162 dbg_type = b'unknown'
1163 1163
1164 1164 cachedelta = revinfo.cachedelta
1165 1165 p1 = revinfo.p1
1166 1166 p2 = revinfo.p2
1167 1167 revlog = self.revlog
1168 1168
1169 1169 deltainfo = None
1170 1170 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
1171 1171
1172 1172 if gather_debug:
1173 1173 if p1r != nullrev:
1174 1174 p1_chain_len = revlog._chaininfo(p1r)[0]
1175 1175 else:
1176 1176 p1_chain_len = -1
1177 1177 if p2r != nullrev:
1178 1178 p2_chain_len = revlog._chaininfo(p2r)[0]
1179 1179 else:
1180 1180 p2_chain_len = -1
1181 1181 if debug_search:
1182 1182 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1183 1183 msg %= target_rev
1184 1184 self._write_debug(msg)
1185 1185
1186 1186 groups = _candidategroups(
1187 1187 self.revlog,
1188 1188 revinfo.textlen,
1189 1189 p1r,
1190 1190 p2r,
1191 1191 cachedelta,
1192 1192 excluded_bases,
1193 1193 target_rev,
1194 1194 )
1195 1195 candidaterevs = next(groups)
1196 1196 while candidaterevs is not None:
1197 1197 dbg_try_rounds += 1
1198 1198 if debug_search:
1199 1199 prev = None
1200 1200 if deltainfo is not None:
1201 1201 prev = deltainfo.base
1202 1202
1203 1203 if (
1204 1204 cachedelta is not None
1205 1205 and len(candidaterevs) == 1
1206 1206 and cachedelta[0] in candidaterevs
1207 1207 ):
1208 1208 round_type = b"cached-delta"
1209 1209 elif p1 in candidaterevs or p2 in candidaterevs:
1210 1210 round_type = b"parents"
1211 1211 elif prev is not None and all(c < prev for c in candidaterevs):
1212 1212 round_type = b"refine-down"
1213 1213 elif prev is not None and all(c > prev for c in candidaterevs):
1214 1214 round_type = b"refine-up"
1215 1215 else:
1216 1216 round_type = b"search-down"
1217 1217 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1218 1218 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1219 1219 self._write_debug(msg)
1220 1220 nominateddeltas = []
1221 1221 if deltainfo is not None:
1222 1222 if debug_search:
1223 1223 msg = (
1224 1224 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1225 1225 )
1226 1226 msg %= (deltainfo.base, deltainfo.deltalen)
1227 1227 self._write_debug(msg)
1228 1228 # if we already found a good delta,
1229 1229 # challenge it against refined candidates
1230 1230 nominateddeltas.append(deltainfo)
1231 1231 for candidaterev in candidaterevs:
1232 1232 if debug_search:
1233 1233 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1234 1234 msg %= candidaterev
1235 1235 self._write_debug(msg)
1236 1236 candidate_type = None
1237 1237 if candidaterev == p1:
1238 1238 candidate_type = b"p1"
1239 1239 elif candidaterev == p2:
1240 1240 candidate_type = b"p2"
1241 1241 elif self.revlog.issnapshot(candidaterev):
1242 1242 candidate_type = b"snapshot-%d"
1243 1243 candidate_type %= self.revlog.snapshotdepth(
1244 1244 candidaterev
1245 1245 )
1246 1246
1247 1247 if candidate_type is not None:
1248 1248 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1249 1249 msg %= candidate_type
1250 1250 self._write_debug(msg)
1251 1251 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1252 1252 msg %= self.revlog.length(candidaterev)
1253 1253 self._write_debug(msg)
1254 1254 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1255 1255 msg %= self.revlog.deltaparent(candidaterev)
1256 1256 self._write_debug(msg)
1257 1257
1258 1258 dbg_try_count += 1
1259 1259
1260 1260 if debug_search:
1261 1261 delta_start = util.timer()
1262 1262 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
1263 1263 if debug_search:
1264 1264 delta_end = util.timer()
1265 1265 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1266 1266 msg %= delta_end - delta_start
1267 1267 self._write_debug(msg)
1268 1268 if candidatedelta is not None:
1269 1269 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
1270 1270 if debug_search:
1271 1271 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1272 1272 msg %= candidatedelta.deltalen
1273 1273 self._write_debug(msg)
1274 1274 nominateddeltas.append(candidatedelta)
1275 1275 elif debug_search:
1276 1276 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1277 1277 msg %= candidatedelta.deltalen
1278 1278 self._write_debug(msg)
1279 1279 elif debug_search:
1280 1280 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1281 1281 self._write_debug(msg)
1282 1282 if nominateddeltas:
1283 1283 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1284 1284 if deltainfo is not None:
1285 1285 candidaterevs = groups.send(deltainfo.base)
1286 1286 else:
1287 1287 candidaterevs = next(groups)
1288 1288
1289 1289 if deltainfo is None:
1290 1290 dbg_type = b"full"
1291 1291 deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
1292 1292 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1293 1293 dbg_type = b"snapshot"
1294 1294 else:
1295 1295 dbg_type = b"delta"
1296 1296
1297 1297 if gather_debug:
1298 1298 end = util.timer()
1299 1299 used_cached = (
1300 1300 cachedelta is not None
1301 1301 and dbg_try_rounds == 1
1302 1302 and dbg_try_count == 1
1303 1303 and deltainfo.base == cachedelta[0]
1304 1304 )
1305 1305 dbg = {
1306 1306 'duration': end - start,
1307 1307 'revision': target_rev,
1308 1308 'delta-base': deltainfo.base, # pytype: disable=attribute-error
1309 1309 'search_round_count': dbg_try_rounds,
1310 1310 'using-cached-base': used_cached,
1311 1311 'delta_try_count': dbg_try_count,
1312 1312 'type': dbg_type,
1313 1313 'p1-chain-len': p1_chain_len,
1314 1314 'p2-chain-len': p2_chain_len,
1315 1315 }
1316 1316 if (
1317 1317 deltainfo.snapshotdepth # pytype: disable=attribute-error
1318 1318 is not None
1319 1319 ):
1320 1320 dbg[
1321 1321 'snapshot-depth'
1322 1322 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1323 1323 else:
1324 1324 dbg['snapshot-depth'] = 0
1325 1325 target_revlog = b"UNKNOWN"
1326 1326 target_type = self.revlog.target[0]
1327 1327 target_key = self.revlog.target[1]
1328 1328 if target_type == KIND_CHANGELOG:
1329 1329 target_revlog = b'CHANGELOG:'
1330 1330 elif target_type == KIND_MANIFESTLOG:
1331 1331 target_revlog = b'MANIFESTLOG:'
1332 1332 if target_key:
1333 1333 target_revlog += b'%s:' % target_key
1334 1334 elif target_type == KIND_FILELOG:
1335 1335 target_revlog = b'FILELOG:'
1336 1336 if target_key:
1337 1337 target_revlog += b'%s:' % target_key
1338 1338 dbg['target-revlog'] = target_revlog
1339 1339
1340 1340 if self._debug_info is not None:
1341 1341 self._debug_info.append(dbg)
1342 1342
1343 1343 if self._write_debug is not None:
1344 1344 msg = (
1345 1345 b"DBG-DELTAS:"
1346 1346 b" %-12s"
1347 1347 b" rev=%d:"
1348 1348 b" delta-base=%d"
1349 1349 b" is-cached=%d"
1350 1350 b" - search-rounds=%d"
1351 1351 b" try-count=%d"
1352 1352 b" - delta-type=%-6s"
1353 1353 b" snap-depth=%d"
1354 1354 b" - p1-chain-length=%d"
1355 1355 b" p2-chain-length=%d"
1356 1356 b" - duration=%f"
1357 1357 b"\n"
1358 1358 )
1359 1359 msg %= (
1360 1360 dbg["target-revlog"],
1361 1361 dbg["revision"],
1362 1362 dbg["delta-base"],
1363 1363 dbg["using-cached-base"],
1364 1364 dbg["search_round_count"],
1365 1365 dbg["delta_try_count"],
1366 1366 dbg["type"],
1367 1367 dbg["snapshot-depth"],
1368 1368 dbg["p1-chain-len"],
1369 1369 dbg["p2-chain-len"],
1370 1370 dbg["duration"],
1371 1371 )
1372 1372 self._write_debug(msg)
1373 1373 return deltainfo
1374 1374
1375 1375
1376 1376 def delta_compression(default_compression_header, deltainfo):
1377 1377 """return (COMPRESSION_MODE, deltainfo)
1378 1378
1379 1379 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1380 1380 compression.
1381 1381 """
1382 1382 h, d = deltainfo.data
1383 1383 compression_mode = COMP_MODE_INLINE
1384 1384 if not h and not d:
1385 1385 # not data to store at all... declare them uncompressed
1386 1386 compression_mode = COMP_MODE_PLAIN
1387 1387 elif not h:
1388 1388 t = d[0:1]
1389 1389 if t == b'\0':
1390 1390 compression_mode = COMP_MODE_PLAIN
1391 1391 elif t == default_compression_header:
1392 1392 compression_mode = COMP_MODE_DEFAULT
1393 1393 elif h == b'u':
1394 1394 # we have a more efficient way to declare uncompressed
1395 1395 h = b''
1396 1396 compression_mode = COMP_MODE_PLAIN
1397 1397 deltainfo = drop_u_compression(deltainfo)
1398 1398 return compression_mode, deltainfo
General Comments 0
You need to be logged in to leave comments. Login now