##// END OF EJS Templates
delta-find: fix pulled-delta-reuse-policy=forced behavior...
marmoute -
r51550:e77ca247 stable
parent child Browse files
Show More
@@ -1,1565 +1,1631 b''
1 1 # revlogdeltas.py - Logic around delta computation for revlog
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 # Copyright 2018 Octobus <contact@octobus.net>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8 """Helper class to compute deltas stored inside revlogs"""
9 9
10 10
11 11 import collections
12 12 import struct
13 13
14 14 # import stuff from node for others to import from revlog
15 15 from ..node import nullrev
16 16 from ..i18n import _
17 17 from ..pycompat import getattr
18 18
19 19 from .constants import (
20 20 COMP_MODE_DEFAULT,
21 21 COMP_MODE_INLINE,
22 22 COMP_MODE_PLAIN,
23 23 DELTA_BASE_REUSE_FORCE,
24 24 DELTA_BASE_REUSE_NO,
25 25 KIND_CHANGELOG,
26 26 KIND_FILELOG,
27 27 KIND_MANIFESTLOG,
28 28 REVIDX_ISCENSORED,
29 29 REVIDX_RAWTEXT_CHANGING_FLAGS,
30 30 )
31 31
32 32 from ..thirdparty import attr
33 33
34 34 from .. import (
35 35 error,
36 36 mdiff,
37 37 util,
38 38 )
39 39
40 40 from . import flagutil
41 41
42 42 # maximum <delta-chain-data>/<revision-text-length> ratio
43 43 LIMIT_DELTA2TEXT = 2
44 44
45 45
46 46 class _testrevlog:
47 47 """minimalist fake revlog to use in doctests"""
48 48
49 49 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
50 50 """data is an list of revision payload boundaries"""
51 51 self._data = data
52 52 self._srdensitythreshold = density
53 53 self._srmingapsize = mingap
54 54 self._snapshot = set(snapshot)
55 55 self.index = None
56 56
57 57 def start(self, rev):
58 58 if rev == nullrev:
59 59 return 0
60 60 if rev == 0:
61 61 return 0
62 62 return self._data[rev - 1]
63 63
64 64 def end(self, rev):
65 65 if rev == nullrev:
66 66 return 0
67 67 return self._data[rev]
68 68
69 69 def length(self, rev):
70 70 return self.end(rev) - self.start(rev)
71 71
72 72 def __len__(self):
73 73 return len(self._data)
74 74
75 75 def issnapshot(self, rev):
76 76 if rev == nullrev:
77 77 return True
78 78 return rev in self._snapshot
79 79
80 80
81 81 def slicechunk(revlog, revs, targetsize=None):
82 82 """slice revs to reduce the amount of unrelated data to be read from disk.
83 83
84 84 ``revs`` is sliced into groups that should be read in one time.
85 85 Assume that revs are sorted.
86 86
87 87 The initial chunk is sliced until the overall density (payload/chunks-span
88 88 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
89 89 `revlog._srmingapsize` is skipped.
90 90
91 91 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
92 92 For consistency with other slicing choice, this limit won't go lower than
93 93 `revlog._srmingapsize`.
94 94
95 95 If individual revisions chunk are larger than this limit, they will still
96 96 be raised individually.
97 97
98 98 >>> data = [
99 99 ... 5, #00 (5)
100 100 ... 10, #01 (5)
101 101 ... 12, #02 (2)
102 102 ... 12, #03 (empty)
103 103 ... 27, #04 (15)
104 104 ... 31, #05 (4)
105 105 ... 31, #06 (empty)
106 106 ... 42, #07 (11)
107 107 ... 47, #08 (5)
108 108 ... 47, #09 (empty)
109 109 ... 48, #10 (1)
110 110 ... 51, #11 (3)
111 111 ... 74, #12 (23)
112 112 ... 85, #13 (11)
113 113 ... 86, #14 (1)
114 114 ... 91, #15 (5)
115 115 ... ]
116 116 >>> revlog = _testrevlog(data, snapshot=range(16))
117 117
118 118 >>> list(slicechunk(revlog, list(range(16))))
119 119 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
120 120 >>> list(slicechunk(revlog, [0, 15]))
121 121 [[0], [15]]
122 122 >>> list(slicechunk(revlog, [0, 11, 15]))
123 123 [[0], [11], [15]]
124 124 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
125 125 [[0], [11, 13, 15]]
126 126 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
127 127 [[1, 2], [5, 8, 10, 11], [14]]
128 128
129 129 Slicing with a maximum chunk size
130 130 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
131 131 [[0], [11], [13], [15]]
132 132 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
133 133 [[0], [11], [13, 15]]
134 134
135 135 Slicing involving nullrev
136 136 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
137 137 [[-1, 0], [11], [13, 15]]
138 138 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
139 139 [[-1], [13], [15]]
140 140 """
141 141 if targetsize is not None:
142 142 targetsize = max(targetsize, revlog._srmingapsize)
143 143 # targetsize should not be specified when evaluating delta candidates:
144 144 # * targetsize is used to ensure we stay within specification when reading,
145 145 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
146 146 if densityslicing is None:
147 147 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
148 148 for chunk in densityslicing(
149 149 revs, revlog._srdensitythreshold, revlog._srmingapsize
150 150 ):
151 151 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
152 152 yield subchunk
153 153
154 154
155 155 def _slicechunktosize(revlog, revs, targetsize=None):
156 156 """slice revs to match the target size
157 157
158 158 This is intended to be used on chunk that density slicing selected by that
159 159 are still too large compared to the read garantee of revlog. This might
160 160 happens when "minimal gap size" interrupted the slicing or when chain are
161 161 built in a way that create large blocks next to each other.
162 162
163 163 >>> data = [
164 164 ... 3, #0 (3)
165 165 ... 5, #1 (2)
166 166 ... 6, #2 (1)
167 167 ... 8, #3 (2)
168 168 ... 8, #4 (empty)
169 169 ... 11, #5 (3)
170 170 ... 12, #6 (1)
171 171 ... 13, #7 (1)
172 172 ... 14, #8 (1)
173 173 ... ]
174 174
175 175 == All snapshots cases ==
176 176 >>> revlog = _testrevlog(data, snapshot=range(9))
177 177
178 178 Cases where chunk is already small enough
179 179 >>> list(_slicechunktosize(revlog, [0], 3))
180 180 [[0]]
181 181 >>> list(_slicechunktosize(revlog, [6, 7], 3))
182 182 [[6, 7]]
183 183 >>> list(_slicechunktosize(revlog, [0], None))
184 184 [[0]]
185 185 >>> list(_slicechunktosize(revlog, [6, 7], None))
186 186 [[6, 7]]
187 187
188 188 cases where we need actual slicing
189 189 >>> list(_slicechunktosize(revlog, [0, 1], 3))
190 190 [[0], [1]]
191 191 >>> list(_slicechunktosize(revlog, [1, 3], 3))
192 192 [[1], [3]]
193 193 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
194 194 [[1, 2], [3]]
195 195 >>> list(_slicechunktosize(revlog, [3, 5], 3))
196 196 [[3], [5]]
197 197 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
198 198 [[3], [5]]
199 199 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
200 200 [[5], [6, 7, 8]]
201 201 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
202 202 [[0], [1, 2], [3], [5], [6, 7, 8]]
203 203
204 204 Case with too large individual chunk (must return valid chunk)
205 205 >>> list(_slicechunktosize(revlog, [0, 1], 2))
206 206 [[0], [1]]
207 207 >>> list(_slicechunktosize(revlog, [1, 3], 1))
208 208 [[1], [3]]
209 209 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
210 210 [[3], [5]]
211 211
212 212 == No Snapshot cases ==
213 213 >>> revlog = _testrevlog(data)
214 214
215 215 Cases where chunk is already small enough
216 216 >>> list(_slicechunktosize(revlog, [0], 3))
217 217 [[0]]
218 218 >>> list(_slicechunktosize(revlog, [6, 7], 3))
219 219 [[6, 7]]
220 220 >>> list(_slicechunktosize(revlog, [0], None))
221 221 [[0]]
222 222 >>> list(_slicechunktosize(revlog, [6, 7], None))
223 223 [[6, 7]]
224 224
225 225 cases where we need actual slicing
226 226 >>> list(_slicechunktosize(revlog, [0, 1], 3))
227 227 [[0], [1]]
228 228 >>> list(_slicechunktosize(revlog, [1, 3], 3))
229 229 [[1], [3]]
230 230 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
231 231 [[1], [2, 3]]
232 232 >>> list(_slicechunktosize(revlog, [3, 5], 3))
233 233 [[3], [5]]
234 234 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
235 235 [[3], [4, 5]]
236 236 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
237 237 [[5], [6, 7, 8]]
238 238 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
239 239 [[0], [1, 2], [3], [5], [6, 7, 8]]
240 240
241 241 Case with too large individual chunk (must return valid chunk)
242 242 >>> list(_slicechunktosize(revlog, [0, 1], 2))
243 243 [[0], [1]]
244 244 >>> list(_slicechunktosize(revlog, [1, 3], 1))
245 245 [[1], [3]]
246 246 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
247 247 [[3], [5]]
248 248
249 249 == mixed case ==
250 250 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
251 251 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
252 252 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
253 253 """
254 254 assert targetsize is None or 0 <= targetsize
255 255 startdata = revlog.start(revs[0])
256 256 enddata = revlog.end(revs[-1])
257 257 fullspan = enddata - startdata
258 258 if targetsize is None or fullspan <= targetsize:
259 259 yield revs
260 260 return
261 261
262 262 startrevidx = 0
263 263 endrevidx = 1
264 264 iterrevs = enumerate(revs)
265 265 next(iterrevs) # skip first rev.
266 266 # first step: get snapshots out of the way
267 267 for idx, r in iterrevs:
268 268 span = revlog.end(r) - startdata
269 269 snapshot = revlog.issnapshot(r)
270 270 if span <= targetsize and snapshot:
271 271 endrevidx = idx + 1
272 272 else:
273 273 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
274 274 if chunk:
275 275 yield chunk
276 276 startrevidx = idx
277 277 startdata = revlog.start(r)
278 278 endrevidx = idx + 1
279 279 if not snapshot:
280 280 break
281 281
282 282 # for the others, we use binary slicing to quickly converge toward valid
283 283 # chunks (otherwise, we might end up looking for start/end of many
284 284 # revisions). This logic is not looking for the perfect slicing point, it
285 285 # focuses on quickly converging toward valid chunks.
286 286 nbitem = len(revs)
287 287 while (enddata - startdata) > targetsize:
288 288 endrevidx = nbitem
289 289 if nbitem - startrevidx <= 1:
290 290 break # protect against individual chunk larger than limit
291 291 localenddata = revlog.end(revs[endrevidx - 1])
292 292 span = localenddata - startdata
293 293 while span > targetsize:
294 294 if endrevidx - startrevidx <= 1:
295 295 break # protect against individual chunk larger than limit
296 296 endrevidx -= (endrevidx - startrevidx) // 2
297 297 localenddata = revlog.end(revs[endrevidx - 1])
298 298 span = localenddata - startdata
299 299 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
300 300 if chunk:
301 301 yield chunk
302 302 startrevidx = endrevidx
303 303 startdata = revlog.start(revs[startrevidx])
304 304
305 305 chunk = _trimchunk(revlog, revs, startrevidx)
306 306 if chunk:
307 307 yield chunk
308 308
309 309
310 310 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
311 311 """slice revs to reduce the amount of unrelated data to be read from disk.
312 312
313 313 ``revs`` is sliced into groups that should be read in one time.
314 314 Assume that revs are sorted.
315 315
316 316 The initial chunk is sliced until the overall density (payload/chunks-span
317 317 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
318 318 skipped.
319 319
320 320 >>> revlog = _testrevlog([
321 321 ... 5, #00 (5)
322 322 ... 10, #01 (5)
323 323 ... 12, #02 (2)
324 324 ... 12, #03 (empty)
325 325 ... 27, #04 (15)
326 326 ... 31, #05 (4)
327 327 ... 31, #06 (empty)
328 328 ... 42, #07 (11)
329 329 ... 47, #08 (5)
330 330 ... 47, #09 (empty)
331 331 ... 48, #10 (1)
332 332 ... 51, #11 (3)
333 333 ... 74, #12 (23)
334 334 ... 85, #13 (11)
335 335 ... 86, #14 (1)
336 336 ... 91, #15 (5)
337 337 ... ])
338 338
339 339 >>> list(_slicechunktodensity(revlog, list(range(16))))
340 340 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
341 341 >>> list(_slicechunktodensity(revlog, [0, 15]))
342 342 [[0], [15]]
343 343 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
344 344 [[0], [11], [15]]
345 345 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
346 346 [[0], [11, 13, 15]]
347 347 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
348 348 [[1, 2], [5, 8, 10, 11], [14]]
349 349 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
350 350 ... mingapsize=20))
351 351 [[1, 2, 3, 5, 8, 10, 11], [14]]
352 352 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
353 353 ... targetdensity=0.95))
354 354 [[1, 2], [5], [8, 10, 11], [14]]
355 355 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
356 356 ... targetdensity=0.95, mingapsize=12))
357 357 [[1, 2], [5, 8, 10, 11], [14]]
358 358 """
359 359 start = revlog.start
360 360 length = revlog.length
361 361
362 362 if len(revs) <= 1:
363 363 yield revs
364 364 return
365 365
366 366 deltachainspan = segmentspan(revlog, revs)
367 367
368 368 if deltachainspan < mingapsize:
369 369 yield revs
370 370 return
371 371
372 372 readdata = deltachainspan
373 373 chainpayload = sum(length(r) for r in revs)
374 374
375 375 if deltachainspan:
376 376 density = chainpayload / float(deltachainspan)
377 377 else:
378 378 density = 1.0
379 379
380 380 if density >= targetdensity:
381 381 yield revs
382 382 return
383 383
384 384 # Store the gaps in a heap to have them sorted by decreasing size
385 385 gaps = []
386 386 prevend = None
387 387 for i, rev in enumerate(revs):
388 388 revstart = start(rev)
389 389 revlen = length(rev)
390 390
391 391 # Skip empty revisions to form larger holes
392 392 if revlen == 0:
393 393 continue
394 394
395 395 if prevend is not None:
396 396 gapsize = revstart - prevend
397 397 # only consider holes that are large enough
398 398 if gapsize > mingapsize:
399 399 gaps.append((gapsize, i))
400 400
401 401 prevend = revstart + revlen
402 402 # sort the gaps to pop them from largest to small
403 403 gaps.sort()
404 404
405 405 # Collect the indices of the largest holes until the density is acceptable
406 406 selected = []
407 407 while gaps and density < targetdensity:
408 408 gapsize, gapidx = gaps.pop()
409 409
410 410 selected.append(gapidx)
411 411
412 412 # the gap sizes are stored as negatives to be sorted decreasingly
413 413 # by the heap
414 414 readdata -= gapsize
415 415 if readdata > 0:
416 416 density = chainpayload / float(readdata)
417 417 else:
418 418 density = 1.0
419 419 selected.sort()
420 420
421 421 # Cut the revs at collected indices
422 422 previdx = 0
423 423 for idx in selected:
424 424
425 425 chunk = _trimchunk(revlog, revs, previdx, idx)
426 426 if chunk:
427 427 yield chunk
428 428
429 429 previdx = idx
430 430
431 431 chunk = _trimchunk(revlog, revs, previdx)
432 432 if chunk:
433 433 yield chunk
434 434
435 435
436 436 def _trimchunk(revlog, revs, startidx, endidx=None):
437 437 """returns revs[startidx:endidx] without empty trailing revs
438 438
439 439 Doctest Setup
440 440 >>> revlog = _testrevlog([
441 441 ... 5, #0
442 442 ... 10, #1
443 443 ... 12, #2
444 444 ... 12, #3 (empty)
445 445 ... 17, #4
446 446 ... 21, #5
447 447 ... 21, #6 (empty)
448 448 ... ])
449 449
450 450 Contiguous cases:
451 451 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
452 452 [0, 1, 2, 3, 4, 5]
453 453 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
454 454 [0, 1, 2, 3, 4]
455 455 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
456 456 [0, 1, 2]
457 457 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
458 458 [2]
459 459 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
460 460 [3, 4, 5]
461 461 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
462 462 [3, 4]
463 463
464 464 Discontiguous cases:
465 465 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
466 466 [1, 3, 5]
467 467 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
468 468 [1]
469 469 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
470 470 [3, 5]
471 471 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
472 472 [3, 5]
473 473 """
474 474 length = revlog.length
475 475
476 476 if endidx is None:
477 477 endidx = len(revs)
478 478
479 479 # If we have a non-emtpy delta candidate, there are nothing to trim
480 480 if revs[endidx - 1] < len(revlog):
481 481 # Trim empty revs at the end, except the very first revision of a chain
482 482 while (
483 483 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
484 484 ):
485 485 endidx -= 1
486 486
487 487 return revs[startidx:endidx]
488 488
489 489
490 490 def segmentspan(revlog, revs):
491 491 """Get the byte span of a segment of revisions
492 492
493 493 revs is a sorted array of revision numbers
494 494
495 495 >>> revlog = _testrevlog([
496 496 ... 5, #0
497 497 ... 10, #1
498 498 ... 12, #2
499 499 ... 12, #3 (empty)
500 500 ... 17, #4
501 501 ... ])
502 502
503 503 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
504 504 17
505 505 >>> segmentspan(revlog, [0, 4])
506 506 17
507 507 >>> segmentspan(revlog, [3, 4])
508 508 5
509 509 >>> segmentspan(revlog, [1, 2, 3,])
510 510 7
511 511 >>> segmentspan(revlog, [1, 3])
512 512 7
513 513 """
514 514 if not revs:
515 515 return 0
516 516 end = revlog.end(revs[-1])
517 517 return end - revlog.start(revs[0])
518 518
519 519
520 520 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
521 521 """build full text from a (base, delta) pair and other metadata"""
522 522 # special case deltas which replace entire base; no need to decode
523 523 # base revision. this neatly avoids censored bases, which throw when
524 524 # they're decoded.
525 525 hlen = struct.calcsize(b">lll")
526 526 if delta[:hlen] == mdiff.replacediffheader(
527 527 revlog.rawsize(baserev), len(delta) - hlen
528 528 ):
529 529 fulltext = delta[hlen:]
530 530 else:
531 531 # deltabase is rawtext before changed by flag processors, which is
532 532 # equivalent to non-raw text
533 533 basetext = revlog.revision(baserev, _df=fh)
534 534 fulltext = mdiff.patch(basetext, delta)
535 535
536 536 try:
537 537 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
538 538 if validatehash:
539 539 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
540 540 if flags & REVIDX_ISCENSORED:
541 541 raise error.StorageError(
542 542 _(b'node %s is not censored') % expectednode
543 543 )
544 544 except error.CensoredNodeError:
545 545 # must pass the censored index flag to add censored revisions
546 546 if not flags & REVIDX_ISCENSORED:
547 547 raise
548 548 return fulltext
549 549
550 550
551 551 @attr.s(slots=True, frozen=True)
552 552 class _deltainfo:
553 553 distance = attr.ib()
554 554 deltalen = attr.ib()
555 555 data = attr.ib()
556 556 base = attr.ib()
557 557 chainbase = attr.ib()
558 558 chainlen = attr.ib()
559 559 compresseddeltalen = attr.ib()
560 560 snapshotdepth = attr.ib()
561 561
562 562
563 563 def drop_u_compression(delta):
564 564 """turn into a "u" (no-compression) into no-compression without header
565 565
566 566 This is useful for revlog format that has better compression method.
567 567 """
568 568 assert delta.data[0] == b'u', delta.data[0]
569 569 return _deltainfo(
570 570 delta.distance,
571 571 delta.deltalen - 1,
572 572 (b'', delta.data[1]),
573 573 delta.base,
574 574 delta.chainbase,
575 575 delta.chainlen,
576 576 delta.compresseddeltalen,
577 577 delta.snapshotdepth,
578 578 )
579 579
580 580
581 581 def is_good_delta_info(revlog, deltainfo, revinfo):
582 582 """Returns True if the given delta is good. Good means that it is within
583 583 the disk span, disk size, and chain length bounds that we know to be
584 584 performant."""
585 585 if deltainfo is None:
586 586 return False
587 587
588 588 if (
589 589 revinfo.cachedelta is not None
590 590 and deltainfo.base == revinfo.cachedelta[0]
591 591 and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
592 592 ):
593 593 return True
594 594
595 595 # - 'deltainfo.distance' is the distance from the base revision --
596 596 # bounding it limits the amount of I/O we need to do.
597 597 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
598 598 # deltas we need to apply -- bounding it limits the amount of CPU
599 599 # we consume.
600 600
601 601 textlen = revinfo.textlen
602 602 defaultmax = textlen * 4
603 603 maxdist = revlog._maxdeltachainspan
604 604 if not maxdist:
605 605 maxdist = deltainfo.distance # ensure the conditional pass
606 606 maxdist = max(maxdist, defaultmax)
607 607
608 608 # Bad delta from read span:
609 609 #
610 610 # If the span of data read is larger than the maximum allowed.
611 611 #
612 612 # In the sparse-revlog case, we rely on the associated "sparse reading"
613 613 # to avoid issue related to the span of data. In theory, it would be
614 614 # possible to build pathological revlog where delta pattern would lead
615 615 # to too many reads. However, they do not happen in practice at all. So
616 616 # we skip the span check entirely.
617 617 if not revlog._sparserevlog and maxdist < deltainfo.distance:
618 618 return False
619 619
620 620 # Bad delta from new delta size:
621 621 #
622 622 # If the delta size is larger than the target text, storing the
623 623 # delta will be inefficient.
624 624 if textlen < deltainfo.deltalen:
625 625 return False
626 626
627 627 # Bad delta from cumulated payload size:
628 628 #
629 629 # If the sum of delta get larger than K * target text length.
630 630 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
631 631 return False
632 632
633 633 # Bad delta from chain length:
634 634 #
635 635 # If the number of delta in the chain gets too high.
636 636 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
637 637 return False
638 638
639 639 # bad delta from intermediate snapshot size limit
640 640 #
641 641 # If an intermediate snapshot size is higher than the limit. The
642 642 # limit exist to prevent endless chain of intermediate delta to be
643 643 # created.
644 644 if (
645 645 deltainfo.snapshotdepth is not None
646 646 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
647 647 ):
648 648 return False
649 649
650 650 # bad delta if new intermediate snapshot is larger than the previous
651 651 # snapshot
652 652 if (
653 653 deltainfo.snapshotdepth
654 654 and revlog.length(deltainfo.base) < deltainfo.deltalen
655 655 ):
656 656 return False
657 657
658 658 return True
659 659
660 660
661 661 # If a revision's full text is that much bigger than a base candidate full
662 662 # text's, it is very unlikely that it will produce a valid delta. We no longer
663 663 # consider these candidates.
664 664 LIMIT_BASE2TEXT = 500
665 665
666 666
667 667 def _candidategroups(
668 668 revlog,
669 669 textlen,
670 670 p1,
671 671 p2,
672 672 cachedelta,
673 673 excluded_bases=None,
674 674 target_rev=None,
675 675 snapshot_cache=None,
676 676 ):
677 677 """Provides group of revision to be tested as delta base
678 678
679 679 This top level function focus on emitting groups with unique and worthwhile
680 680 content. See _raw_candidate_groups for details about the group order.
681 681 """
682 682 # should we try to build a delta?
683 683 if not (len(revlog) and revlog._storedeltachains):
684 684 yield None
685 685 return
686 686
687 687 if target_rev is None:
688 688 target_rev = len(revlog)
689 689
690 690 if not revlog._generaldelta:
691 691 # before general delta, there is only one possible delta base
692 692 yield (target_rev - 1,)
693 693 yield None
694 694 return
695 695
696 696 if (
697 697 cachedelta is not None
698 698 and nullrev == cachedelta[0]
699 699 and cachedelta[2] == DELTA_BASE_REUSE_FORCE
700 700 ):
701 701 # instruction are to forcibly do a full snapshot
702 702 yield None
703 703 return
704 704
705 705 deltalength = revlog.length
706 706 deltaparent = revlog.deltaparent
707 707 sparse = revlog._sparserevlog
708 708 good = None
709 709
710 710 deltas_limit = textlen * LIMIT_DELTA2TEXT
711 711 group_chunk_size = revlog._candidate_group_chunk_size
712 712
713 713 tested = {nullrev}
714 714 candidates = _refinedgroups(
715 715 revlog,
716 716 p1,
717 717 p2,
718 718 cachedelta,
719 719 snapshot_cache=snapshot_cache,
720 720 )
721 721 while True:
722 722 temptative = candidates.send(good)
723 723 if temptative is None:
724 724 break
725 725 group = []
726 726 for rev in temptative:
727 727 # skip over empty delta (no need to include them in a chain)
728 728 while not (rev == nullrev or rev in tested or deltalength(rev)):
729 729 tested.add(rev)
730 730 rev = deltaparent(rev)
731 731 # no need to try a delta against nullrev, this will be done as a
732 732 # last resort.
733 733 if rev == nullrev:
734 734 continue
735 735 # filter out revision we tested already
736 736 if rev in tested:
737 737 continue
738 738
739 739 if (
740 740 cachedelta is not None
741 741 and rev == cachedelta[0]
742 742 and cachedelta[2] == DELTA_BASE_REUSE_FORCE
743 743 ):
744 744 # instructions are to forcibly consider/use this delta base
745 745 group.append(rev)
746 746 continue
747 747
748 748 # an higher authority deamed the base unworthy (e.g. censored)
749 749 if excluded_bases is not None and rev in excluded_bases:
750 750 tested.add(rev)
751 751 continue
752 752 # We are in some recomputation cases and that rev is too high in
753 753 # the revlog
754 754 if target_rev is not None and rev >= target_rev:
755 755 tested.add(rev)
756 756 continue
757 757 # filter out delta base that will never produce good delta
758 758 if deltas_limit < revlog.length(rev):
759 759 tested.add(rev)
760 760 continue
761 761 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
762 762 tested.add(rev)
763 763 continue
764 764 # no delta for rawtext-changing revs (see "candelta" for why)
765 765 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
766 766 tested.add(rev)
767 767 continue
768 768
769 769 # If we reach here, we are about to build and test a delta.
770 770 # The delta building process will compute the chaininfo in all
771 771 # case, since that computation is cached, it is fine to access it
772 772 # here too.
773 773 chainlen, chainsize = revlog._chaininfo(rev)
774 774 # if chain will be too long, skip base
775 775 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
776 776 tested.add(rev)
777 777 continue
778 778 # if chain already have too much data, skip base
779 779 if deltas_limit < chainsize:
780 780 tested.add(rev)
781 781 continue
782 782 if sparse and revlog.upperboundcomp is not None:
783 783 maxcomp = revlog.upperboundcomp
784 784 basenotsnap = (p1, p2, nullrev)
785 785 if rev not in basenotsnap and revlog.issnapshot(rev):
786 786 snapshotdepth = revlog.snapshotdepth(rev)
787 787 # If text is significantly larger than the base, we can
788 788 # expect the resulting delta to be proportional to the size
789 789 # difference
790 790 revsize = revlog.rawsize(rev)
791 791 rawsizedistance = max(textlen - revsize, 0)
792 792 # use an estimate of the compression upper bound.
793 793 lowestrealisticdeltalen = rawsizedistance // maxcomp
794 794
795 795 # check the absolute constraint on the delta size
796 796 snapshotlimit = textlen >> snapshotdepth
797 797 if snapshotlimit < lowestrealisticdeltalen:
798 798 # delta lower bound is larger than accepted upper bound
799 799 tested.add(rev)
800 800 continue
801 801
802 802 # check the relative constraint on the delta size
803 803 revlength = revlog.length(rev)
804 804 if revlength < lowestrealisticdeltalen:
805 805 # delta probable lower bound is larger than target base
806 806 tested.add(rev)
807 807 continue
808 808
809 809 group.append(rev)
810 810 if group:
811 811 # When the size of the candidate group is big, it can result in a
812 812 # quite significant performance impact. To reduce this, we can send
813 813 # them in smaller batches until the new batch does not provide any
814 814 # improvements.
815 815 #
816 816 # This might reduce the overall efficiency of the compression in
817 817 # some corner cases, but that should also prevent very pathological
818 818 # cases from being an issue. (eg. 20 000 candidates).
819 819 #
820 820 # XXX note that the ordering of the group becomes important as it
821 821 # now impacts the final result. The current order is unprocessed
822 822 # and can be improved.
823 823 if group_chunk_size == 0:
824 824 tested.update(group)
825 825 good = yield tuple(group)
826 826 else:
827 827 prev_good = good
828 828 for start in range(0, len(group), group_chunk_size):
829 829 sub_group = group[start : start + group_chunk_size]
830 830 tested.update(sub_group)
831 831 good = yield tuple(sub_group)
832 832 if prev_good == good:
833 833 break
834 834
835 835 yield None
836 836
837 837
838 838 def _refinedgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
839 839 good = None
840 840 # First we try to reuse a the delta contained in the bundle.
841 841 # (or from the source revlog)
842 842 #
843 843 # This logic only applies to general delta repositories and can be disabled
844 844 # through configuration. Disabling reuse source delta is useful when
845 845 # we want to make sure we recomputed "optimal" deltas.
846 846 debug_info = None
847 847 if cachedelta is not None and cachedelta[2] > DELTA_BASE_REUSE_NO:
848 848 # Assume what we received from the server is a good choice
849 849 # build delta will reuse the cache
850 850 if debug_info is not None:
851 851 debug_info['cached-delta.tested'] += 1
852 852 good = yield (cachedelta[0],)
853 853 if good is not None:
854 854 if debug_info is not None:
855 855 debug_info['cached-delta.accepted'] += 1
856 856 yield None
857 857 return
858 858 if snapshot_cache is None:
859 859 snapshot_cache = SnapshotCache()
860 860 groups = _rawgroups(
861 861 revlog,
862 862 p1,
863 863 p2,
864 864 cachedelta,
865 865 snapshot_cache,
866 866 )
867 867 for candidates in groups:
868 868 good = yield candidates
869 869 if good is not None:
870 870 break
871 871
872 872 # If sparse revlog is enabled, we can try to refine the available deltas
873 873 if not revlog._sparserevlog:
874 874 yield None
875 875 return
876 876
877 877 # if we have a refinable value, try to refine it
878 878 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
879 879 # refine snapshot down
880 880 previous = None
881 881 while previous != good:
882 882 previous = good
883 883 base = revlog.deltaparent(good)
884 884 if base == nullrev:
885 885 break
886 886 good = yield (base,)
887 887 # refine snapshot up
888 888 if not snapshot_cache.snapshots:
889 889 snapshot_cache.update(revlog, good + 1)
890 890 previous = None
891 891 while good != previous:
892 892 previous = good
893 893 children = tuple(sorted(c for c in snapshot_cache.snapshots[good]))
894 894 good = yield children
895 895
896 896 if debug_info is not None:
897 897 if good is None:
898 898 debug_info['no-solution'] += 1
899 899
900 900 yield None
901 901
902 902
903 903 def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
904 904 """Provides group of revision to be tested as delta base
905 905
906 906 This lower level function focus on emitting delta theorically interresting
907 907 without looking it any practical details.
908 908
909 909 The group order aims at providing fast or small candidates first.
910 910 """
911 911 # Why search for delta base if we cannot use a delta base ?
912 912 assert revlog._generaldelta
913 913 # also see issue6056
914 914 sparse = revlog._sparserevlog
915 915 curr = len(revlog)
916 916 prev = curr - 1
917 917 deltachain = lambda rev: revlog._deltachain(rev)[0]
918 918
919 919 # exclude already lazy tested base if any
920 920 parents = [p for p in (p1, p2) if p != nullrev]
921 921
922 922 if not revlog._deltabothparents and len(parents) == 2:
923 923 parents.sort()
924 924 # To minimize the chance of having to build a fulltext,
925 925 # pick first whichever parent is closest to us (max rev)
926 926 yield (parents[1],)
927 927 # then the other one (min rev) if the first did not fit
928 928 yield (parents[0],)
929 929 elif len(parents) > 0:
930 930 # Test all parents (1 or 2), and keep the best candidate
931 931 yield parents
932 932
933 933 if sparse and parents:
934 934 if snapshot_cache is None:
935 935 # map: base-rev: [snapshot-revs]
936 936 snapshot_cache = SnapshotCache()
937 937 # See if we can use an existing snapshot in the parent chains to use as
938 938 # a base for a new intermediate-snapshot
939 939 #
940 940 # search for snapshot in parents delta chain
941 941 # map: snapshot-level: snapshot-rev
942 942 parents_snaps = collections.defaultdict(set)
943 943 candidate_chains = [deltachain(p) for p in parents]
944 944 for chain in candidate_chains:
945 945 for idx, s in enumerate(chain):
946 946 if not revlog.issnapshot(s):
947 947 break
948 948 parents_snaps[idx].add(s)
949 949 snapfloor = min(parents_snaps[0]) + 1
950 950 snapshot_cache.update(revlog, snapfloor)
951 951 # search for the highest "unrelated" revision
952 952 #
953 953 # Adding snapshots used by "unrelated" revision increase the odd we
954 954 # reuse an independant, yet better snapshot chain.
955 955 #
956 956 # XXX instead of building a set of revisions, we could lazily enumerate
957 957 # over the chains. That would be more efficient, however we stick to
958 958 # simple code for now.
959 959 all_revs = set()
960 960 for chain in candidate_chains:
961 961 all_revs.update(chain)
962 962 other = None
963 963 for r in revlog.revs(prev, snapfloor):
964 964 if r not in all_revs:
965 965 other = r
966 966 break
967 967 if other is not None:
968 968 # To avoid unfair competition, we won't use unrelated intermediate
969 969 # snapshot that are deeper than the ones from the parent delta
970 970 # chain.
971 971 max_depth = max(parents_snaps.keys())
972 972 chain = deltachain(other)
973 973 for depth, s in enumerate(chain):
974 974 if s < snapfloor:
975 975 continue
976 976 if max_depth < depth:
977 977 break
978 978 if not revlog.issnapshot(s):
979 979 break
980 980 parents_snaps[depth].add(s)
981 981 # Test them as possible intermediate snapshot base
982 982 # We test them from highest to lowest level. High level one are more
983 983 # likely to result in small delta
984 984 floor = None
985 985 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
986 986 siblings = set()
987 987 for s in snaps:
988 988 siblings.update(snapshot_cache.snapshots[s])
989 989 # Before considering making a new intermediate snapshot, we check
990 990 # if an existing snapshot, children of base we consider, would be
991 991 # suitable.
992 992 #
993 993 # It give a change to reuse a delta chain "unrelated" to the
994 994 # current revision instead of starting our own. Without such
995 995 # re-use, topological branches would keep reopening new chains.
996 996 # Creating more and more snapshot as the repository grow.
997 997
998 998 if floor is not None:
999 999 # We only do this for siblings created after the one in our
1000 1000 # parent's delta chain. Those created before has less chances
1001 1001 # to be valid base since our ancestors had to create a new
1002 1002 # snapshot.
1003 1003 siblings = [r for r in siblings if floor < r]
1004 1004 yield tuple(sorted(siblings))
1005 1005 # then test the base from our parent's delta chain.
1006 1006 yield tuple(sorted(snaps))
1007 1007 floor = min(snaps)
1008 1008 # No suitable base found in the parent chain, search if any full
1009 1009 # snapshots emitted since parent's base would be a suitable base for an
1010 1010 # intermediate snapshot.
1011 1011 #
1012 1012 # It give a chance to reuse a delta chain unrelated to the current
1013 1013 # revisions instead of starting our own. Without such re-use,
1014 1014 # topological branches would keep reopening new full chains. Creating
1015 1015 # more and more snapshot as the repository grow.
1016 1016 full = [r for r in snapshot_cache.snapshots[nullrev] if snapfloor <= r]
1017 1017 yield tuple(sorted(full))
1018 1018
1019 1019 if not sparse:
1020 1020 # other approach failed try against prev to hopefully save us a
1021 1021 # fulltext.
1022 1022 yield (prev,)
1023 1023
1024 1024
1025 1025 class SnapshotCache:
1026 1026 __slots__ = ('snapshots', '_start_rev', '_end_rev')
1027 1027
1028 1028 def __init__(self):
1029 1029 self.snapshots = collections.defaultdict(set)
1030 1030 self._start_rev = None
1031 1031 self._end_rev = None
1032 1032
1033 1033 def update(self, revlog, start_rev=0):
1034 1034 """find snapshots from start_rev to tip"""
1035 1035 nb_revs = len(revlog)
1036 1036 end_rev = nb_revs - 1
1037 1037 if start_rev > end_rev:
1038 1038 return # range is empty
1039 1039
1040 1040 if self._start_rev is None:
1041 1041 assert self._end_rev is None
1042 1042 self._update(revlog, start_rev, end_rev)
1043 1043 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1044 1044 if start_rev < self._start_rev:
1045 1045 self._update(revlog, start_rev, self._start_rev - 1)
1046 1046 if self._end_rev < end_rev:
1047 1047 self._update(revlog, self._end_rev + 1, end_rev)
1048 1048
1049 1049 if self._start_rev is None:
1050 1050 assert self._end_rev is None
1051 1051 self._end_rev = end_rev
1052 1052 self._start_rev = start_rev
1053 1053 else:
1054 1054 self._start_rev = min(self._start_rev, start_rev)
1055 1055 self._end_rev = max(self._end_rev, end_rev)
1056 1056 assert self._start_rev <= self._end_rev, (
1057 1057 self._start_rev,
1058 1058 self._end_rev,
1059 1059 )
1060 1060
1061 1061 def _update(self, revlog, start_rev, end_rev):
1062 1062 """internal method that actually do update content"""
1063 1063 assert self._start_rev is None or (
1064 1064 start_rev < self._start_rev or start_rev > self._end_rev
1065 1065 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1066 1066 assert self._start_rev is None or (
1067 1067 end_rev < self._start_rev or end_rev > self._end_rev
1068 1068 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1069 1069 cache = self.snapshots
1070 1070 if util.safehasattr(revlog.index, b'findsnapshots'):
1071 1071 revlog.index.findsnapshots(cache, start_rev, end_rev)
1072 1072 else:
1073 1073 deltaparent = revlog.deltaparent
1074 1074 issnapshot = revlog.issnapshot
1075 1075 for rev in revlog.revs(start_rev, end_rev):
1076 1076 if issnapshot(rev):
1077 1077 cache[deltaparent(rev)].add(rev)
1078 1078
1079 1079
1080 1080 class deltacomputer:
1081 1081 def __init__(
1082 1082 self,
1083 1083 revlog,
1084 1084 write_debug=None,
1085 1085 debug_search=False,
1086 1086 debug_info=None,
1087 1087 ):
1088 1088 self.revlog = revlog
1089 1089 self._write_debug = write_debug
1090 1090 if write_debug is None:
1091 1091 self._debug_search = False
1092 1092 else:
1093 1093 self._debug_search = debug_search
1094 1094 self._debug_info = debug_info
1095 1095 self._snapshot_cache = SnapshotCache()
1096 1096
1097 1097 @property
1098 1098 def _gather_debug(self):
1099 1099 return self._write_debug is not None or self._debug_info is not None
1100 1100
1101 1101 def buildtext(self, revinfo, fh):
1102 1102 """Builds a fulltext version of a revision
1103 1103
1104 1104 revinfo: revisioninfo instance that contains all needed info
1105 1105 fh: file handle to either the .i or the .d revlog file,
1106 1106 depending on whether it is inlined or not
1107 1107 """
1108 1108 btext = revinfo.btext
1109 1109 if btext[0] is not None:
1110 1110 return btext[0]
1111 1111
1112 1112 revlog = self.revlog
1113 1113 cachedelta = revinfo.cachedelta
1114 1114 baserev = cachedelta[0]
1115 1115 delta = cachedelta[1]
1116 1116
1117 1117 fulltext = btext[0] = _textfromdelta(
1118 1118 fh,
1119 1119 revlog,
1120 1120 baserev,
1121 1121 delta,
1122 1122 revinfo.p1,
1123 1123 revinfo.p2,
1124 1124 revinfo.flags,
1125 1125 revinfo.node,
1126 1126 )
1127 1127 return fulltext
1128 1128
1129 1129 def _builddeltadiff(self, base, revinfo, fh):
1130 1130 revlog = self.revlog
1131 1131 t = self.buildtext(revinfo, fh)
1132 1132 if revlog.iscensored(base):
1133 1133 # deltas based on a censored revision must replace the
1134 1134 # full content in one patch, so delta works everywhere
1135 1135 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1136 1136 delta = header + t
1137 1137 else:
1138 1138 ptext = revlog.rawdata(base, _df=fh)
1139 1139 delta = mdiff.textdiff(ptext, t)
1140 1140
1141 1141 return delta
1142 1142
1143 1143 def _builddeltainfo(self, revinfo, base, fh, target_rev=None):
1144 1144 # can we use the cached delta?
1145 1145 revlog = self.revlog
1146 1146 chainbase = revlog.chainbase(base)
1147 1147 if revlog._generaldelta:
1148 1148 deltabase = base
1149 1149 else:
1150 1150 if target_rev is not None and base != target_rev - 1:
1151 1151 msg = (
1152 1152 b'general delta cannot use delta for something else '
1153 1153 b'than `prev`: %d<-%d'
1154 1154 )
1155 1155 msg %= (base, target_rev)
1156 1156 raise error.ProgrammingError(msg)
1157 1157 deltabase = chainbase
1158 1158 snapshotdepth = None
1159 1159 if revlog._sparserevlog and deltabase == nullrev:
1160 1160 snapshotdepth = 0
1161 1161 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
1162 1162 # A delta chain should always be one full snapshot,
1163 1163 # zero or more semi-snapshots, and zero or more deltas
1164 1164 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1165 1165 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1166 1166 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1167 1167 delta = None
1168 1168 if revinfo.cachedelta:
1169 1169 cachebase = revinfo.cachedelta[0]
1170 1170 # check if the diff still apply
1171 1171 currentbase = cachebase
1172 1172 while (
1173 1173 currentbase != nullrev
1174 1174 and currentbase != base
1175 1175 and self.revlog.length(currentbase) == 0
1176 1176 ):
1177 1177 currentbase = self.revlog.deltaparent(currentbase)
1178 1178 if self.revlog._lazydelta and currentbase == base:
1179 1179 delta = revinfo.cachedelta[1]
1180 1180 if delta is None:
1181 1181 delta = self._builddeltadiff(base, revinfo, fh)
1182 1182 if self._debug_search:
1183 1183 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1184 1184 msg %= len(delta)
1185 1185 self._write_debug(msg)
1186 1186 # snapshotdept need to be neither None nor 0 level snapshot
1187 1187 if revlog.upperboundcomp is not None and snapshotdepth:
1188 1188 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
1189 1189 snapshotlimit = revinfo.textlen >> snapshotdepth
1190 1190 if self._debug_search:
1191 1191 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1192 1192 msg %= lowestrealisticdeltalen
1193 1193 self._write_debug(msg)
1194 1194 if snapshotlimit < lowestrealisticdeltalen:
1195 1195 if self._debug_search:
1196 1196 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1197 1197 self._write_debug(msg)
1198 1198 return None
1199 1199 if revlog.length(base) < lowestrealisticdeltalen:
1200 1200 if self._debug_search:
1201 1201 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1202 1202 self._write_debug(msg)
1203 1203 return None
1204 1204 header, data = revlog.compress(delta)
1205 1205 deltalen = len(header) + len(data)
1206 1206 offset = revlog.end(len(revlog) - 1)
1207 1207 dist = deltalen + offset - revlog.start(chainbase)
1208 1208 chainlen, compresseddeltalen = revlog._chaininfo(base)
1209 1209 chainlen += 1
1210 1210 compresseddeltalen += deltalen
1211 1211
1212 1212 return _deltainfo(
1213 1213 dist,
1214 1214 deltalen,
1215 1215 (header, data),
1216 1216 deltabase,
1217 1217 chainbase,
1218 1218 chainlen,
1219 1219 compresseddeltalen,
1220 1220 snapshotdepth,
1221 1221 )
1222 1222
1223 1223 def _fullsnapshotinfo(self, fh, revinfo, curr):
1224 1224 rawtext = self.buildtext(revinfo, fh)
1225 1225 data = self.revlog.compress(rawtext)
1226 1226 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1227 1227 deltabase = chainbase = curr
1228 1228 snapshotdepth = 0
1229 1229 chainlen = 1
1230 1230
1231 1231 return _deltainfo(
1232 1232 dist,
1233 1233 deltalen,
1234 1234 data,
1235 1235 deltabase,
1236 1236 chainbase,
1237 1237 chainlen,
1238 1238 compresseddeltalen,
1239 1239 snapshotdepth,
1240 1240 )
1241 1241
1242 1242 def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):
1243 1243 """Find an acceptable delta against a candidate revision
1244 1244
1245 1245 revinfo: information about the revision (instance of _revisioninfo)
1246 1246 fh: file handle to either the .i or the .d revlog file,
1247 1247 depending on whether it is inlined or not
1248 1248
1249 1249 Returns the first acceptable candidate revision, as ordered by
1250 1250 _candidategroups
1251 1251
1252 1252 If no suitable deltabase is found, we return delta info for a full
1253 1253 snapshot.
1254 1254
1255 1255 `excluded_bases` is an optional set of revision that cannot be used as
1256 1256 a delta base. Use this to recompute delta suitable in censor or strip
1257 1257 context.
1258 1258 """
1259 1259 if target_rev is None:
1260 1260 target_rev = len(self.revlog)
1261 1261
1262 1262 gather_debug = self._gather_debug
1263 1263 cachedelta = revinfo.cachedelta
1264 1264 revlog = self.revlog
1265 p1r = p2r = None
1265 1266
1266 p1r = p2r = None
1267 if excluded_bases is None:
1268 excluded_bases = set()
1267 1269
1268 1270 if gather_debug:
1269 1271 start = util.timer()
1270 1272 dbg = self._one_dbg_data()
1271 1273 dbg['revision'] = target_rev
1272 1274 target_revlog = b"UNKNOWN"
1273 1275 target_type = self.revlog.target[0]
1274 1276 target_key = self.revlog.target[1]
1275 1277 if target_type == KIND_CHANGELOG:
1276 1278 target_revlog = b'CHANGELOG:'
1277 1279 elif target_type == KIND_MANIFESTLOG:
1278 1280 target_revlog = b'MANIFESTLOG:'
1279 1281 if target_key:
1280 1282 target_revlog += b'%s:' % target_key
1281 1283 elif target_type == KIND_FILELOG:
1282 1284 target_revlog = b'FILELOG:'
1283 1285 if target_key:
1284 1286 target_revlog += b'%s:' % target_key
1285 1287 dbg['target-revlog'] = target_revlog
1286 1288 p1r = revlog.rev(revinfo.p1)
1287 1289 p2r = revlog.rev(revinfo.p2)
1288 1290 if p1r != nullrev:
1289 1291 p1_chain_len = revlog._chaininfo(p1r)[0]
1290 1292 else:
1291 1293 p1_chain_len = -1
1292 1294 if p2r != nullrev:
1293 1295 p2_chain_len = revlog._chaininfo(p2r)[0]
1294 1296 else:
1295 1297 p2_chain_len = -1
1296 1298 dbg['p1-chain-len'] = p1_chain_len
1297 1299 dbg['p2-chain-len'] = p2_chain_len
1298 1300
1299 1301 # 1) if the revision is empty, no amount of delta can beat it
1300 1302 #
1301 1303 # 2) no delta for flag processor revision (see "candelta" for why)
1302 1304 # not calling candelta since only one revision needs test, also to
1303 1305 # avoid overhead fetching flags again.
1304 1306 if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1305 1307 deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
1306 1308 if gather_debug:
1307 1309 end = util.timer()
1308 1310 dbg['duration'] = end - start
1309 1311 dbg[
1310 1312 'delta-base'
1311 1313 ] = deltainfo.base # pytype: disable=attribute-error
1312 1314 dbg['search_round_count'] = 0
1313 dbg['using-cached-base'] = True
1315 dbg['using-cached-base'] = False
1314 1316 dbg['delta_try_count'] = 0
1315 1317 dbg['type'] = b"full"
1316 1318 dbg['snapshot-depth'] = 0
1317 1319 self._dbg_process_data(dbg)
1318 1320 return deltainfo
1319 1321
1320 if excluded_bases is None:
1321 excluded_bases = set()
1322 deltainfo = None
1323
1324 # If this source delta are to be forcibly reuse, let us comply early.
1325 if (
1326 revlog._generaldelta
1327 and revinfo.cachedelta is not None
1328 and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
1329 ):
1330 base = revinfo.cachedelta[0]
1331 if base == nullrev:
1332 dbg_type = b"full"
1333 deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
1334 if gather_debug:
1335 snapshotdepth = 0
1336 elif base not in excluded_bases:
1337 delta = revinfo.cachedelta[1]
1338 header, data = revlog.compress(delta)
1339 deltalen = len(header) + len(data)
1340 if gather_debug:
1341 offset = revlog.end(len(revlog) - 1)
1342 chainbase = revlog.chainbase(base)
1343 distance = deltalen + offset - revlog.start(chainbase)
1344 chainlen, compresseddeltalen = revlog._chaininfo(base)
1345 chainlen += 1
1346 compresseddeltalen += deltalen
1347 if base == p1r or base == p2r:
1348 dbg_type = b"delta"
1349 snapshotdepth = None
1350 elif not revlog.issnapshot(base):
1351 snapshotdepth = None
1352 else:
1353 dbg_type = b"snapshot"
1354 snapshotdepth = revlog.snapshotdepth(base) + 1
1355 else:
1356 distance = None
1357 chainbase = None
1358 chainlen = None
1359 compresseddeltalen = None
1360 snapshotdepth = None
1361 deltainfo = _deltainfo(
1362 distance=distance,
1363 deltalen=deltalen,
1364 data=(header, data),
1365 base=base,
1366 chainbase=chainbase,
1367 chainlen=chainlen,
1368 compresseddeltalen=compresseddeltalen,
1369 snapshotdepth=snapshotdepth,
1370 )
1371
1372 if deltainfo is not None:
1373 if gather_debug:
1374 end = util.timer()
1375 dbg['duration'] = end - start
1376 dbg[
1377 'delta-base'
1378 ] = deltainfo.base # pytype: disable=attribute-error
1379 dbg['search_round_count'] = 0
1380 dbg['using-cached-base'] = True
1381 dbg['delta_try_count'] = 0
1382 dbg['type'] = b"full"
1383 if snapshotdepth is None:
1384 dbg['snapshot-depth'] = 0
1385 else:
1386 dbg['snapshot-depth'] = snapshotdepth
1387 self._dbg_process_data(dbg)
1388 return deltainfo
1322 1389
1323 1390 # count the number of different delta we tried (for debug purpose)
1324 1391 dbg_try_count = 0
1325 1392 # count the number of "search round" we did. (for debug purpose)
1326 1393 dbg_try_rounds = 0
1327 1394 dbg_type = b'unknown'
1328 1395
1329 deltainfo = None
1330 1396 if p1r is None:
1331 1397 p1r = revlog.rev(revinfo.p1)
1332 1398 p2r = revlog.rev(revinfo.p2)
1333 1399
1334 1400 if self._debug_search:
1335 1401 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1336 1402 msg %= target_rev
1337 1403 self._write_debug(msg)
1338 1404
1339 1405 groups = _candidategroups(
1340 1406 self.revlog,
1341 1407 revinfo.textlen,
1342 1408 p1r,
1343 1409 p2r,
1344 1410 cachedelta,
1345 1411 excluded_bases,
1346 1412 target_rev,
1347 1413 snapshot_cache=self._snapshot_cache,
1348 1414 )
1349 1415 candidaterevs = next(groups)
1350 1416 while candidaterevs is not None:
1351 1417 dbg_try_rounds += 1
1352 1418 if self._debug_search:
1353 1419 prev = None
1354 1420 if deltainfo is not None:
1355 1421 prev = deltainfo.base
1356 1422
1357 1423 if (
1358 1424 cachedelta is not None
1359 1425 and len(candidaterevs) == 1
1360 1426 and cachedelta[0] in candidaterevs
1361 1427 ):
1362 1428 round_type = b"cached-delta"
1363 1429 elif p1r in candidaterevs or p2r in candidaterevs:
1364 1430 round_type = b"parents"
1365 1431 elif prev is not None and all(c < prev for c in candidaterevs):
1366 1432 round_type = b"refine-down"
1367 1433 elif prev is not None and all(c > prev for c in candidaterevs):
1368 1434 round_type = b"refine-up"
1369 1435 else:
1370 1436 round_type = b"search-down"
1371 1437 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1372 1438 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1373 1439 self._write_debug(msg)
1374 1440 nominateddeltas = []
1375 1441 if deltainfo is not None:
1376 1442 if self._debug_search:
1377 1443 msg = (
1378 1444 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1379 1445 )
1380 1446 msg %= (deltainfo.base, deltainfo.deltalen)
1381 1447 self._write_debug(msg)
1382 1448 # if we already found a good delta,
1383 1449 # challenge it against refined candidates
1384 1450 nominateddeltas.append(deltainfo)
1385 1451 for candidaterev in candidaterevs:
1386 1452 if self._debug_search:
1387 1453 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1388 1454 msg %= candidaterev
1389 1455 self._write_debug(msg)
1390 1456 candidate_type = None
1391 1457 if candidaterev == p1r:
1392 1458 candidate_type = b"p1"
1393 1459 elif candidaterev == p2r:
1394 1460 candidate_type = b"p2"
1395 1461 elif self.revlog.issnapshot(candidaterev):
1396 1462 candidate_type = b"snapshot-%d"
1397 1463 candidate_type %= self.revlog.snapshotdepth(
1398 1464 candidaterev
1399 1465 )
1400 1466
1401 1467 if candidate_type is not None:
1402 1468 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1403 1469 msg %= candidate_type
1404 1470 self._write_debug(msg)
1405 1471 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1406 1472 msg %= self.revlog.length(candidaterev)
1407 1473 self._write_debug(msg)
1408 1474 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1409 1475 msg %= self.revlog.deltaparent(candidaterev)
1410 1476 self._write_debug(msg)
1411 1477
1412 1478 dbg_try_count += 1
1413 1479
1414 1480 if self._debug_search:
1415 1481 delta_start = util.timer()
1416 1482 candidatedelta = self._builddeltainfo(
1417 1483 revinfo,
1418 1484 candidaterev,
1419 1485 fh,
1420 1486 target_rev=target_rev,
1421 1487 )
1422 1488 if self._debug_search:
1423 1489 delta_end = util.timer()
1424 1490 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1425 1491 msg %= delta_end - delta_start
1426 1492 self._write_debug(msg)
1427 1493 if candidatedelta is not None:
1428 1494 if is_good_delta_info(self.revlog, candidatedelta, revinfo):
1429 1495 if self._debug_search:
1430 1496 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1431 1497 msg %= candidatedelta.deltalen
1432 1498 self._write_debug(msg)
1433 1499 nominateddeltas.append(candidatedelta)
1434 1500 elif self._debug_search:
1435 1501 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1436 1502 msg %= candidatedelta.deltalen
1437 1503 self._write_debug(msg)
1438 1504 elif self._debug_search:
1439 1505 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1440 1506 self._write_debug(msg)
1441 1507 if nominateddeltas:
1442 1508 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1443 1509 if deltainfo is not None:
1444 1510 candidaterevs = groups.send(deltainfo.base)
1445 1511 else:
1446 1512 candidaterevs = next(groups)
1447 1513
1448 1514 if deltainfo is None:
1449 1515 dbg_type = b"full"
1450 1516 deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
1451 1517 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1452 1518 dbg_type = b"snapshot"
1453 1519 else:
1454 1520 dbg_type = b"delta"
1455 1521
1456 1522 if gather_debug:
1457 1523 end = util.timer()
1458 1524 if dbg_type == b'full':
1459 1525 used_cached = (
1460 1526 cachedelta is not None
1461 1527 and dbg_try_rounds == 0
1462 1528 and dbg_try_count == 0
1463 1529 and cachedelta[0] == nullrev
1464 1530 )
1465 1531 else:
1466 1532 used_cached = (
1467 1533 cachedelta is not None
1468 1534 and dbg_try_rounds == 1
1469 1535 and dbg_try_count == 1
1470 1536 and deltainfo.base == cachedelta[0]
1471 1537 )
1472 1538 dbg['duration'] = end - start
1473 1539 dbg[
1474 1540 'delta-base'
1475 1541 ] = deltainfo.base # pytype: disable=attribute-error
1476 1542 dbg['search_round_count'] = dbg_try_rounds
1477 1543 dbg['using-cached-base'] = used_cached
1478 1544 dbg['delta_try_count'] = dbg_try_count
1479 1545 dbg['type'] = dbg_type
1480 1546 if (
1481 1547 deltainfo.snapshotdepth # pytype: disable=attribute-error
1482 1548 is not None
1483 1549 ):
1484 1550 dbg[
1485 1551 'snapshot-depth'
1486 1552 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1487 1553 else:
1488 1554 dbg['snapshot-depth'] = 0
1489 1555 self._dbg_process_data(dbg)
1490 1556 return deltainfo
1491 1557
1492 1558 def _one_dbg_data(self):
1493 1559 return {
1494 1560 'duration': None,
1495 1561 'revision': None,
1496 1562 'delta-base': None,
1497 1563 'search_round_count': None,
1498 1564 'using-cached-base': None,
1499 1565 'delta_try_count': None,
1500 1566 'type': None,
1501 1567 'p1-chain-len': None,
1502 1568 'p2-chain-len': None,
1503 1569 'snapshot-depth': None,
1504 1570 'target-revlog': None,
1505 1571 }
1506 1572
1507 1573 def _dbg_process_data(self, dbg):
1508 1574 if self._debug_info is not None:
1509 1575 self._debug_info.append(dbg)
1510 1576
1511 1577 if self._write_debug is not None:
1512 1578 msg = (
1513 1579 b"DBG-DELTAS:"
1514 1580 b" %-12s"
1515 1581 b" rev=%d:"
1516 1582 b" delta-base=%d"
1517 1583 b" is-cached=%d"
1518 1584 b" - search-rounds=%d"
1519 1585 b" try-count=%d"
1520 1586 b" - delta-type=%-6s"
1521 1587 b" snap-depth=%d"
1522 1588 b" - p1-chain-length=%d"
1523 1589 b" p2-chain-length=%d"
1524 1590 b" - duration=%f"
1525 1591 b"\n"
1526 1592 )
1527 1593 msg %= (
1528 1594 dbg["target-revlog"],
1529 1595 dbg["revision"],
1530 1596 dbg["delta-base"],
1531 1597 dbg["using-cached-base"],
1532 1598 dbg["search_round_count"],
1533 1599 dbg["delta_try_count"],
1534 1600 dbg["type"],
1535 1601 dbg["snapshot-depth"],
1536 1602 dbg["p1-chain-len"],
1537 1603 dbg["p2-chain-len"],
1538 1604 dbg["duration"],
1539 1605 )
1540 1606 self._write_debug(msg)
1541 1607
1542 1608
1543 1609 def delta_compression(default_compression_header, deltainfo):
1544 1610 """return (COMPRESSION_MODE, deltainfo)
1545 1611
1546 1612 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1547 1613 compression.
1548 1614 """
1549 1615 h, d = deltainfo.data
1550 1616 compression_mode = COMP_MODE_INLINE
1551 1617 if not h and not d:
1552 1618 # not data to store at all... declare them uncompressed
1553 1619 compression_mode = COMP_MODE_PLAIN
1554 1620 elif not h:
1555 1621 t = d[0:1]
1556 1622 if t == b'\0':
1557 1623 compression_mode = COMP_MODE_PLAIN
1558 1624 elif t == default_compression_header:
1559 1625 compression_mode = COMP_MODE_DEFAULT
1560 1626 elif h == b'u':
1561 1627 # we have a more efficient way to declare uncompressed
1562 1628 h = b''
1563 1629 compression_mode = COMP_MODE_PLAIN
1564 1630 deltainfo = drop_u_compression(deltainfo)
1565 1631 return compression_mode, deltainfo
@@ -1,362 +1,362 b''
1 1 ==========================================================
2 2 Test various things around delta computation within revlog
3 3 ==========================================================
4 4
5 5
6 6 basic setup
7 7 -----------
8 8
9 9 $ cat << EOF >> $HGRCPATH
10 10 > [debug]
11 11 > revlog.debug-delta=yes
12 12 > EOF
13 13 $ cat << EOF >> sha256line.py
14 14 > # a way to quickly produce file of significant size and poorly compressable content.
15 15 > import hashlib
16 16 > import sys
17 17 > for line in sys.stdin:
18 18 > print(hashlib.sha256(line.encode('utf8')).hexdigest())
19 19 > EOF
20 20
21 21 $ hg init base-repo
22 22 $ cd base-repo
23 23
24 24 create a "large" file
25 25
26 26 $ $TESTDIR/seq.py 1000 | $PYTHON $TESTTMP/sha256line.py > my-file.txt
27 27 $ hg add my-file.txt
28 28 $ hg commit -m initial-commit
29 29 DBG-DELTAS: FILELOG:my-file.txt: rev=0: delta-base=0 * (glob)
30 30 DBG-DELTAS: MANIFESTLOG: * (glob)
31 31 DBG-DELTAS: CHANGELOG: * (glob)
32 32
33 33 Add more change at the end of the file
34 34
35 35 $ $TESTDIR/seq.py 1001 1200 | $PYTHON $TESTTMP/sha256line.py >> my-file.txt
36 36 $ hg commit -m "large-change"
37 37 DBG-DELTAS: FILELOG:my-file.txt: rev=1: delta-base=0 * (glob)
38 38 DBG-DELTAS: MANIFESTLOG: * (glob)
39 39 DBG-DELTAS: CHANGELOG: * (glob)
40 40
41 41 Add small change at the start
42 42
43 43 $ hg up 'desc("initial-commit")' --quiet
44 44 $ mv my-file.txt foo
45 45 $ echo "small change at the start" > my-file.txt
46 46 $ cat foo >> my-file.txt
47 47 $ rm foo
48 48 $ hg commit -m "small-change"
49 49 DBG-DELTAS: FILELOG:my-file.txt: rev=2: delta-base=0 * (glob)
50 50 DBG-DELTAS: MANIFESTLOG: * (glob)
51 51 DBG-DELTAS: CHANGELOG: * (glob)
52 52 created new head
53 53
54 54
55 55 $ hg log -r 'head()' -T '{node}\n' >> ../base-heads.nodes
56 56 $ hg log -r 'desc("initial-commit")' -T '{node}\n' >> ../initial.node
57 57 $ hg log -r 'desc("small-change")' -T '{node}\n' >> ../small.node
58 58 $ hg log -r 'desc("large-change")' -T '{node}\n' >> ../large.node
59 59 $ cd ..
60 60
61 61 Check delta find policy and result for merge on commit
62 62 ======================================================
63 63
64 64 Check that delta of merge pick best of the two parents
65 65 ------------------------------------------------------
66 66
67 67 As we check against both parents, the one with the largest change should
68 68 produce the smallest delta and be picked.
69 69
70 70 $ hg clone base-repo test-parents --quiet
71 71 $ hg -R test-parents update 'nodefromfile("small.node")' --quiet
72 72 $ hg -R test-parents merge 'nodefromfile("large.node")' --quiet
73 73
74 74 The delta base is the "large" revision as it produce a smaller delta.
75 75
76 76 $ hg -R test-parents commit -m "merge from small change"
77 77 DBG-DELTAS: FILELOG:my-file.txt: rev=3: delta-base=1 * (glob)
78 78 DBG-DELTAS: MANIFESTLOG: * (glob)
79 79 DBG-DELTAS: CHANGELOG: * (glob)
80 80
81 81 Check that the behavior tested above can we disabled
82 82 ----------------------------------------------------
83 83
84 84 We disable the checking of both parent at the same time. The `small` change,
85 85 that produce a less optimal delta, should be picked first as it is "closer" to
86 86 the new commit.
87 87
88 88 $ hg clone base-repo test-no-parents --quiet
89 89 $ hg -R test-no-parents update 'nodefromfile("small.node")' --quiet
90 90 $ hg -R test-no-parents merge 'nodefromfile("large.node")' --quiet
91 91
92 92 The delta base is the "large" revision as it produce a smaller delta.
93 93
94 94 $ hg -R test-no-parents commit -m "merge from small change" \
95 95 > --config storage.revlog.optimize-delta-parent-choice=no
96 96 DBG-DELTAS: FILELOG:my-file.txt: rev=3: delta-base=2 * (glob)
97 97 DBG-DELTAS: MANIFESTLOG: * (glob)
98 98 DBG-DELTAS: CHANGELOG: * (glob)
99 99
100 100
101 101 Check delta-find policy and result when unbundling
102 102 ==================================================
103 103
104 104 Build a bundle with all delta built against p1
105 105
106 106 $ hg bundle -R test-parents --all --config devel.bundle.delta=p1 all-p1.hg
107 107 4 changesets found
108 108
109 109 Default policy of trusting delta from the bundle
110 110 ------------------------------------------------
111 111
112 112 Keeping the `p1` delta used in the bundle is sub-optimal for storage, but
113 113 strusting in-bundle delta is faster to apply.
114 114
115 115 $ hg init bundle-default
116 116 $ hg -R bundle-default unbundle all-p1.hg --quiet
117 117 DBG-DELTAS: CHANGELOG: * (glob)
118 118 DBG-DELTAS: CHANGELOG: * (glob)
119 119 DBG-DELTAS: CHANGELOG: * (glob)
120 120 DBG-DELTAS: CHANGELOG: * (glob)
121 121 DBG-DELTAS: MANIFESTLOG: * (glob)
122 122 DBG-DELTAS: MANIFESTLOG: * (glob)
123 123 DBG-DELTAS: MANIFESTLOG: * (glob)
124 124 DBG-DELTAS: MANIFESTLOG: * (glob)
125 125 DBG-DELTAS: FILELOG:my-file.txt: rev=0: delta-base=0 * (glob)
126 126 DBG-DELTAS: FILELOG:my-file.txt: rev=1: delta-base=0 * (glob)
127 127 DBG-DELTAS: FILELOG:my-file.txt: rev=2: delta-base=0 * (glob)
128 128 DBG-DELTAS: FILELOG:my-file.txt: rev=3: delta-base=2 * (glob)
129 129
130 130 (confirm the file revision are in the same order, 2 should be smaller than 1)
131 131
132 132 $ hg -R bundle-default debugdata my-file.txt 2 | wc -l
133 133 \s*1001 (re)
134 134 $ hg -R bundle-default debugdata my-file.txt 1 | wc -l
135 135 \s*1200 (re)
136 136
137 137 explicitly enabled
138 138 ------------------
139 139
140 140 Keeping the `p1` delta used in the bundle is sub-optimal for storage, but
141 141 strusting in-bundle delta is faster to apply.
142 142
143 143 $ hg init bundle-reuse-enabled
144 144 $ hg -R bundle-reuse-enabled unbundle all-p1.hg --quiet \
145 145 > --config storage.revlog.reuse-external-delta-parent=yes
146 146 DBG-DELTAS: CHANGELOG: * (glob)
147 147 DBG-DELTAS: CHANGELOG: * (glob)
148 148 DBG-DELTAS: CHANGELOG: * (glob)
149 149 DBG-DELTAS: CHANGELOG: * (glob)
150 150 DBG-DELTAS: MANIFESTLOG: * (glob)
151 151 DBG-DELTAS: MANIFESTLOG: * (glob)
152 152 DBG-DELTAS: MANIFESTLOG: * (glob)
153 153 DBG-DELTAS: MANIFESTLOG: * (glob)
154 154 DBG-DELTAS: FILELOG:my-file.txt: rev=0: delta-base=0 * (glob)
155 155 DBG-DELTAS: FILELOG:my-file.txt: rev=1: delta-base=0 * (glob)
156 156 DBG-DELTAS: FILELOG:my-file.txt: rev=2: delta-base=0 * (glob)
157 157 DBG-DELTAS: FILELOG:my-file.txt: rev=3: delta-base=2 * (glob)
158 158
159 159 (confirm the file revision are in the same order, 2 should be smaller than 1)
160 160
161 161 $ hg -R bundle-reuse-enabled debugdata my-file.txt 2 | wc -l
162 162 \s*1001 (re)
163 163 $ hg -R bundle-reuse-enabled debugdata my-file.txt 1 | wc -l
164 164 \s*1200 (re)
165 165
166 166 explicitly disabled
167 167 -------------------
168 168
169 169 Not reusing the delta-base from the parent means we the delta will be made
170 170 against the "best" parent. (so not the same as the previous two)
171 171
172 172 $ hg init bundle-reuse-disabled
173 173 $ hg -R bundle-reuse-disabled unbundle all-p1.hg --quiet \
174 174 > --config storage.revlog.reuse-external-delta-parent=no
175 175 DBG-DELTAS: CHANGELOG: * (glob)
176 176 DBG-DELTAS: CHANGELOG: * (glob)
177 177 DBG-DELTAS: CHANGELOG: * (glob)
178 178 DBG-DELTAS: CHANGELOG: * (glob)
179 179 DBG-DELTAS: MANIFESTLOG: * (glob)
180 180 DBG-DELTAS: MANIFESTLOG: * (glob)
181 181 DBG-DELTAS: MANIFESTLOG: * (glob)
182 182 DBG-DELTAS: MANIFESTLOG: * (glob)
183 183 DBG-DELTAS: FILELOG:my-file.txt: rev=0: delta-base=0 * (glob)
184 184 DBG-DELTAS: FILELOG:my-file.txt: rev=1: delta-base=0 * (glob)
185 185 DBG-DELTAS: FILELOG:my-file.txt: rev=2: delta-base=0 * (glob)
186 186 DBG-DELTAS: FILELOG:my-file.txt: rev=3: delta-base=1 * (glob)
187 187
188 188 (confirm the file revision are in the same order, 2 should be smaller than 1)
189 189
190 190 $ hg -R bundle-reuse-disabled debugdata my-file.txt 2 | wc -l
191 191 \s*1001 (re)
192 192 $ hg -R bundle-reuse-disabled debugdata my-file.txt 1 | wc -l
193 193 \s*1200 (re)
194 194
195 195
196 196 Check the path.*:pulled-delta-reuse-policy option
197 197 ==========================================
198 198
199 199 Get a repository with the bad parent picked and a clone ready to pull the merge
200 200
201 201 $ cp -ar bundle-reuse-enabled peer-bad-delta
202 202 $ hg clone peer-bad-delta local-pre-pull --rev `cat large.node` --rev `cat small.node` --quiet
203 203 DBG-DELTAS: CHANGELOG: * (glob)
204 204 DBG-DELTAS: CHANGELOG: * (glob)
205 205 DBG-DELTAS: CHANGELOG: * (glob)
206 206 DBG-DELTAS: MANIFESTLOG: * (glob)
207 207 DBG-DELTAS: MANIFESTLOG: * (glob)
208 208 DBG-DELTAS: MANIFESTLOG: * (glob)
209 209 DBG-DELTAS: FILELOG:my-file.txt: rev=0: delta-base=0 * (glob)
210 210 DBG-DELTAS: FILELOG:my-file.txt: rev=1: delta-base=0 * (glob)
211 211 DBG-DELTAS: FILELOG:my-file.txt: rev=2: delta-base=0 * (glob)
212 212
213 213 Check the parent order for the file
214 214
215 215 $ hg -R local-pre-pull debugdata my-file.txt 2 | wc -l
216 216 \s*1001 (re)
217 217 $ hg -R local-pre-pull debugdata my-file.txt 1 | wc -l
218 218 \s*1200 (re)
219 219
220 220 Pull with no value (so the default)
221 221 -----------------------------------
222 222
223 223 default is to reuse the (bad) delta
224 224
225 225 $ cp -ar local-pre-pull local-no-value
226 226 $ hg -R local-no-value pull --quiet
227 227 DBG-DELTAS: CHANGELOG: * (glob)
228 228 DBG-DELTAS: MANIFESTLOG: * (glob)
229 229 DBG-DELTAS: FILELOG:my-file.txt: rev=3: delta-base=2 * (glob)
230 230
231 231 Pull with explicitly the default
232 232 --------------------------------
233 233
234 234 default is to reuse the (bad) delta
235 235
236 236 $ cp -ar local-pre-pull local-default
237 237 $ hg -R local-default pull --quiet --config 'paths.default:pulled-delta-reuse-policy=default'
238 238 DBG-DELTAS: CHANGELOG: * (glob)
239 239 DBG-DELTAS: MANIFESTLOG: * (glob)
240 240 DBG-DELTAS: FILELOG:my-file.txt: rev=3: delta-base=2 * (glob)
241 241
242 242 Pull with no-reuse
243 243 ------------------
244 244
245 245 We don't reuse the base, so we get a better delta
246 246
247 247 $ cp -ar local-pre-pull local-no-reuse
248 248 $ hg -R local-no-reuse pull --quiet --config 'paths.default:pulled-delta-reuse-policy=no-reuse'
249 249 DBG-DELTAS: CHANGELOG: * (glob)
250 250 DBG-DELTAS: MANIFESTLOG: * (glob)
251 251 DBG-DELTAS: FILELOG:my-file.txt: rev=3: delta-base=1 * (glob)
252 252
253 253 Pull with try-base
254 254 ------------------
255 255
256 256 We requested to use the (bad) delta
257 257
258 258 $ cp -ar local-pre-pull local-try-base
259 259 $ hg -R local-try-base pull --quiet --config 'paths.default:pulled-delta-reuse-policy=try-base'
260 260 DBG-DELTAS: CHANGELOG: * (glob)
261 261 DBG-DELTAS: MANIFESTLOG: * (glob)
262 262 DBG-DELTAS: FILELOG:my-file.txt: rev=3: delta-base=2 * (glob)
263 263
264 264 Case where we force a "bad" delta to be applied
265 265 ===============================================
266 266
267 267 We build a very different file content to force a full snapshot
268 268
269 269 $ cp -ar peer-bad-delta peer-bad-delta-with-full
270 270 $ cp -ar local-pre-pull local-pre-pull-full
271 271 $ echo '[paths]' >> local-pre-pull-full/.hg/hgrc
272 272 $ echo 'default=../peer-bad-delta-with-full' >> local-pre-pull-full/.hg/hgrc
273 273
274 274 $ hg -R peer-bad-delta-with-full update 'desc("merge")' --quiet
275 275 $ ($TESTDIR/seq.py 2000 2100; $TESTDIR/seq.py 500 510; $TESTDIR/seq.py 3000 3050) \
276 276 > | $PYTHON $TESTTMP/sha256line.py > peer-bad-delta-with-full/my-file.txt
277 277 $ hg -R peer-bad-delta-with-full commit -m 'trigger-full'
278 278 DBG-DELTAS: FILELOG:my-file.txt: rev=4: delta-base=4 * (glob)
279 279 DBG-DELTAS: MANIFESTLOG: * (glob)
280 280 DBG-DELTAS: CHANGELOG: * (glob)
281 281
282 282 Check that "try-base" behavior challenge the delta
283 283 --------------------------------------------------
284 284
285 285 The bundling process creates a delta against the previous revision, however this
286 286 is an invalid chain for the client, so it is not considered and we do a full
287 287 snapshot again.
288 288
289 289 $ cp -ar local-pre-pull-full local-try-base-full
290 290 $ hg -R local-try-base-full pull --quiet \
291 291 > --config 'paths.default:pulled-delta-reuse-policy=try-base'
292 292 DBG-DELTAS: CHANGELOG: * (glob)
293 293 DBG-DELTAS: CHANGELOG: * (glob)
294 294 DBG-DELTAS: MANIFESTLOG: * (glob)
295 295 DBG-DELTAS: MANIFESTLOG: * (glob)
296 296 DBG-DELTAS: FILELOG:my-file.txt: rev=3: delta-base=2 * (glob)
297 297 DBG-DELTAS: FILELOG:my-file.txt: rev=4: delta-base=4 * (glob)
298 298
299 299 Check that "forced" behavior do not challenge the delta, even if it is full.
300 300 ---------------------------------------------------------------------------
301 301
302 302 A full bundle should be accepted as full bundle without recomputation
303 303
304 304 $ cp -ar local-pre-pull-full local-forced-full
305 305 $ hg -R local-forced-full pull --quiet \
306 306 > --config 'paths.default:pulled-delta-reuse-policy=forced'
307 307 DBG-DELTAS: CHANGELOG: * (glob)
308 308 DBG-DELTAS: CHANGELOG: * (glob)
309 309 DBG-DELTAS: MANIFESTLOG: * (glob)
310 310 DBG-DELTAS: MANIFESTLOG: * (glob)
311 311 DBG-DELTAS: FILELOG:my-file.txt: rev=3: delta-base=2 * (glob)
312 312 DBG-DELTAS: FILELOG:my-file.txt: rev=4: delta-base=4 is-cached=1 - search-rounds=0 try-count=0 - delta-type=full snap-depth=0 - * (glob)
313 313
314 314 Check that "forced" behavior do not challenge the delta, even if it is bad.
315 315 ---------------------------------------------------------------------------
316 316
317 317 The client does not challenge anything and applies the bizarre delta directly.
318 318
319 319 Note: If the bundling process becomes smarter, this test might no longer work
320 320 (as the server won't be sending "bad" deltas anymore) and might need something
321 321 more subtle to test this behavior.
322 322
323 323 $ hg bundle -R peer-bad-delta-with-full --all --config devel.bundle.delta=p1 all-p1.hg
324 324 5 changesets found
325 325 $ cp -ar local-pre-pull-full local-forced-full-p1
326 326 $ hg -R local-forced-full-p1 pull --quiet \
327 327 > --config 'paths.*:pulled-delta-reuse-policy=forced' all-p1.hg
328 328 DBG-DELTAS: CHANGELOG: * (glob)
329 329 DBG-DELTAS: CHANGELOG: * (glob)
330 330 DBG-DELTAS: MANIFESTLOG: * (glob)
331 331 DBG-DELTAS: MANIFESTLOG: * (glob)
332 DBG-DELTAS: FILELOG:my-file.txt: rev=3: delta-base=2 is-cached=1 *search-rounds=1 try-count=1* (glob)
333 DBG-DELTAS: FILELOG:my-file.txt: rev=4: delta-base=3 is-cached=1 *search-rounds=1 try-count=1* (glob)
332 DBG-DELTAS: FILELOG:my-file.txt: rev=3: delta-base=2 is-cached=1 *search-rounds=0 try-count=0* (glob)
333 DBG-DELTAS: FILELOG:my-file.txt: rev=4: delta-base=3 is-cached=1 *search-rounds=0 try-count=0* (glob)
334 334
335 335 Check that running "forced" on a non-general delta repository does not corrupt it
336 336 ---------------------------------------------------------------------------------
337 337
338 338 Even if requested to be used, some of the delta in the revlog cannot be stored on a non-general delta repository. We check that the bundle application was correct.
339 339
340 340 $ hg init \
341 341 > --config format.usegeneraldelta=no \
342 342 > --config format.sparse-revlog=no \
343 343 > local-forced-full-p1-no-gd
344 344 $ hg debugformat -R local-forced-full-p1-no-gd | grep generaldelta
345 345 generaldelta: no
346 346 $ hg -R local-forced-full-p1-no-gd pull --quiet local-pre-pull-full \
347 347 > --config debug.revlog.debug-delta=no
348 348 $ hg -R local-forced-full-p1-no-gd pull --quiet \
349 349 > --config 'paths.*:pulled-delta-reuse-policy=forced' all-p1.hg
350 350 DBG-DELTAS: CHANGELOG: * (glob)
351 351 DBG-DELTAS: CHANGELOG: * (glob)
352 352 DBG-DELTAS: MANIFESTLOG: * (glob)
353 353 DBG-DELTAS: MANIFESTLOG: * (glob)
354 354 DBG-DELTAS: FILELOG:my-file.txt: rev=3: delta-base=0 * - search-rounds=1 try-count=1 * (glob)
355 355 DBG-DELTAS: FILELOG:my-file.txt: rev=4: delta-base=4 * - search-rounds=1 try-count=1 * (glob)
356 356 $ hg -R local-forced-full-p1-no-gd verify
357 357 checking changesets
358 358 checking manifests
359 359 crosschecking files in changesets and manifests
360 360 checking files
361 361 checking dirstate
362 362 checked 5 changesets with 5 changes to 1 files
General Comments 0
You need to be logged in to leave comments. Login now