##// END OF EJS Templates
delta-find: explicitly track stage of the search...
marmoute -
r52242:cc806f20 default
parent child Browse files
Show More
@@ -1,1814 +1,1838 b''
1 1 # revlogdeltas.py - Logic around delta computation for revlog
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 # Copyright 2018 Octobus <contact@octobus.net>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8 """Helper class to compute deltas stored inside revlogs"""
9 9
10 10
11 11 import abc
12 12 import collections
13 13 import struct
14 14
15 15 # import stuff from node for others to import from revlog
16 16 from ..node import nullrev
17 17 from ..i18n import _
18 18
19 19 from .constants import (
20 20 COMP_MODE_DEFAULT,
21 21 COMP_MODE_INLINE,
22 22 COMP_MODE_PLAIN,
23 23 DELTA_BASE_REUSE_FORCE,
24 24 DELTA_BASE_REUSE_NO,
25 25 KIND_CHANGELOG,
26 26 KIND_FILELOG,
27 27 KIND_MANIFESTLOG,
28 28 REVIDX_ISCENSORED,
29 29 REVIDX_RAWTEXT_CHANGING_FLAGS,
30 30 )
31 31
32 32 from ..thirdparty import attr
33 33
34 34 from .. import (
35 35 error,
36 36 mdiff,
37 37 util,
38 38 )
39 39
40 40 from . import flagutil
41 41
42 42 # maximum <delta-chain-data>/<revision-text-length> ratio
43 43 LIMIT_DELTA2TEXT = 2
44 44
45 45
46 46 class _testrevlog:
47 47 """minimalist fake revlog to use in doctests"""
48 48
49 49 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
50 50 """data is an list of revision payload boundaries"""
51 51 from .. import revlog
52 52
53 53 self._data = data
54 54 self.data_config = revlog.DataConfig()
55 55 self.data_config.sr_density_threshold = density
56 56 self.data_config.sr_min_gap_size = mingap
57 57 self.delta_config = revlog.DeltaConfig()
58 58 self.feature_config = revlog.FeatureConfig()
59 59 self._snapshot = set(snapshot)
60 60 self.index = None
61 61
62 62 def start(self, rev):
63 63 if rev == nullrev:
64 64 return 0
65 65 if rev == 0:
66 66 return 0
67 67 return self._data[rev - 1]
68 68
69 69 def end(self, rev):
70 70 if rev == nullrev:
71 71 return 0
72 72 return self._data[rev]
73 73
74 74 def length(self, rev):
75 75 return self.end(rev) - self.start(rev)
76 76
77 77 def __len__(self):
78 78 return len(self._data)
79 79
80 80 def issnapshot(self, rev):
81 81 if rev == nullrev:
82 82 return True
83 83 return rev in self._snapshot
84 84
85 85
86 86 def slicechunk(revlog, revs, targetsize=None):
87 87 """slice revs to reduce the amount of unrelated data to be read from disk.
88 88
89 89 ``revs`` is sliced into groups that should be read in one time.
90 90 Assume that revs are sorted.
91 91
92 92 The initial chunk is sliced until the overall density (payload/chunks-span
93 93 ratio) is above `revlog.data_config.sr_density_threshold`. No gap smaller
94 94 than `revlog.data_config.sr_min_gap_size` is skipped.
95 95
96 96 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
97 97 For consistency with other slicing choice, this limit won't go lower than
98 98 `revlog.data_config.sr_min_gap_size`.
99 99
100 100 If individual revisions chunk are larger than this limit, they will still
101 101 be raised individually.
102 102
103 103 >>> data = [
104 104 ... 5, #00 (5)
105 105 ... 10, #01 (5)
106 106 ... 12, #02 (2)
107 107 ... 12, #03 (empty)
108 108 ... 27, #04 (15)
109 109 ... 31, #05 (4)
110 110 ... 31, #06 (empty)
111 111 ... 42, #07 (11)
112 112 ... 47, #08 (5)
113 113 ... 47, #09 (empty)
114 114 ... 48, #10 (1)
115 115 ... 51, #11 (3)
116 116 ... 74, #12 (23)
117 117 ... 85, #13 (11)
118 118 ... 86, #14 (1)
119 119 ... 91, #15 (5)
120 120 ... ]
121 121 >>> revlog = _testrevlog(data, snapshot=range(16))
122 122
123 123 >>> list(slicechunk(revlog, list(range(16))))
124 124 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
125 125 >>> list(slicechunk(revlog, [0, 15]))
126 126 [[0], [15]]
127 127 >>> list(slicechunk(revlog, [0, 11, 15]))
128 128 [[0], [11], [15]]
129 129 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
130 130 [[0], [11, 13, 15]]
131 131 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
132 132 [[1, 2], [5, 8, 10, 11], [14]]
133 133
134 134 Slicing with a maximum chunk size
135 135 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
136 136 [[0], [11], [13], [15]]
137 137 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
138 138 [[0], [11], [13, 15]]
139 139
140 140 Slicing involving nullrev
141 141 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
142 142 [[-1, 0], [11], [13, 15]]
143 143 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
144 144 [[-1], [13], [15]]
145 145 """
146 146 if targetsize is not None:
147 147 targetsize = max(targetsize, revlog.data_config.sr_min_gap_size)
148 148 # targetsize should not be specified when evaluating delta candidates:
149 149 # * targetsize is used to ensure we stay within specification when reading,
150 150 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
151 151 if densityslicing is None:
152 152 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
153 153 for chunk in densityslicing(
154 154 revs,
155 155 revlog.data_config.sr_density_threshold,
156 156 revlog.data_config.sr_min_gap_size,
157 157 ):
158 158 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
159 159 yield subchunk
160 160
161 161
162 162 def _slicechunktosize(revlog, revs, targetsize=None):
163 163 """slice revs to match the target size
164 164
165 165 This is intended to be used on chunk that density slicing selected by that
166 166 are still too large compared to the read garantee of revlog. This might
167 167 happens when "minimal gap size" interrupted the slicing or when chain are
168 168 built in a way that create large blocks next to each other.
169 169
170 170 >>> data = [
171 171 ... 3, #0 (3)
172 172 ... 5, #1 (2)
173 173 ... 6, #2 (1)
174 174 ... 8, #3 (2)
175 175 ... 8, #4 (empty)
176 176 ... 11, #5 (3)
177 177 ... 12, #6 (1)
178 178 ... 13, #7 (1)
179 179 ... 14, #8 (1)
180 180 ... ]
181 181
182 182 == All snapshots cases ==
183 183 >>> revlog = _testrevlog(data, snapshot=range(9))
184 184
185 185 Cases where chunk is already small enough
186 186 >>> list(_slicechunktosize(revlog, [0], 3))
187 187 [[0]]
188 188 >>> list(_slicechunktosize(revlog, [6, 7], 3))
189 189 [[6, 7]]
190 190 >>> list(_slicechunktosize(revlog, [0], None))
191 191 [[0]]
192 192 >>> list(_slicechunktosize(revlog, [6, 7], None))
193 193 [[6, 7]]
194 194
195 195 cases where we need actual slicing
196 196 >>> list(_slicechunktosize(revlog, [0, 1], 3))
197 197 [[0], [1]]
198 198 >>> list(_slicechunktosize(revlog, [1, 3], 3))
199 199 [[1], [3]]
200 200 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
201 201 [[1, 2], [3]]
202 202 >>> list(_slicechunktosize(revlog, [3, 5], 3))
203 203 [[3], [5]]
204 204 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
205 205 [[3], [5]]
206 206 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
207 207 [[5], [6, 7, 8]]
208 208 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
209 209 [[0], [1, 2], [3], [5], [6, 7, 8]]
210 210
211 211 Case with too large individual chunk (must return valid chunk)
212 212 >>> list(_slicechunktosize(revlog, [0, 1], 2))
213 213 [[0], [1]]
214 214 >>> list(_slicechunktosize(revlog, [1, 3], 1))
215 215 [[1], [3]]
216 216 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
217 217 [[3], [5]]
218 218
219 219 == No Snapshot cases ==
220 220 >>> revlog = _testrevlog(data)
221 221
222 222 Cases where chunk is already small enough
223 223 >>> list(_slicechunktosize(revlog, [0], 3))
224 224 [[0]]
225 225 >>> list(_slicechunktosize(revlog, [6, 7], 3))
226 226 [[6, 7]]
227 227 >>> list(_slicechunktosize(revlog, [0], None))
228 228 [[0]]
229 229 >>> list(_slicechunktosize(revlog, [6, 7], None))
230 230 [[6, 7]]
231 231
232 232 cases where we need actual slicing
233 233 >>> list(_slicechunktosize(revlog, [0, 1], 3))
234 234 [[0], [1]]
235 235 >>> list(_slicechunktosize(revlog, [1, 3], 3))
236 236 [[1], [3]]
237 237 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
238 238 [[1], [2, 3]]
239 239 >>> list(_slicechunktosize(revlog, [3, 5], 3))
240 240 [[3], [5]]
241 241 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
242 242 [[3], [4, 5]]
243 243 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
244 244 [[5], [6, 7, 8]]
245 245 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
246 246 [[0], [1, 2], [3], [5], [6, 7, 8]]
247 247
248 248 Case with too large individual chunk (must return valid chunk)
249 249 >>> list(_slicechunktosize(revlog, [0, 1], 2))
250 250 [[0], [1]]
251 251 >>> list(_slicechunktosize(revlog, [1, 3], 1))
252 252 [[1], [3]]
253 253 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
254 254 [[3], [5]]
255 255
256 256 == mixed case ==
257 257 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
258 258 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
259 259 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
260 260 """
261 261 assert targetsize is None or 0 <= targetsize
262 262 startdata = revlog.start(revs[0])
263 263 enddata = revlog.end(revs[-1])
264 264 fullspan = enddata - startdata
265 265 if targetsize is None or fullspan <= targetsize:
266 266 yield revs
267 267 return
268 268
269 269 startrevidx = 0
270 270 endrevidx = 1
271 271 iterrevs = enumerate(revs)
272 272 next(iterrevs) # skip first rev.
273 273 # first step: get snapshots out of the way
274 274 for idx, r in iterrevs:
275 275 span = revlog.end(r) - startdata
276 276 snapshot = revlog.issnapshot(r)
277 277 if span <= targetsize and snapshot:
278 278 endrevidx = idx + 1
279 279 else:
280 280 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
281 281 if chunk:
282 282 yield chunk
283 283 startrevidx = idx
284 284 startdata = revlog.start(r)
285 285 endrevidx = idx + 1
286 286 if not snapshot:
287 287 break
288 288
289 289 # for the others, we use binary slicing to quickly converge toward valid
290 290 # chunks (otherwise, we might end up looking for start/end of many
291 291 # revisions). This logic is not looking for the perfect slicing point, it
292 292 # focuses on quickly converging toward valid chunks.
293 293 nbitem = len(revs)
294 294 while (enddata - startdata) > targetsize:
295 295 endrevidx = nbitem
296 296 if nbitem - startrevidx <= 1:
297 297 break # protect against individual chunk larger than limit
298 298 localenddata = revlog.end(revs[endrevidx - 1])
299 299 span = localenddata - startdata
300 300 while span > targetsize:
301 301 if endrevidx - startrevidx <= 1:
302 302 break # protect against individual chunk larger than limit
303 303 endrevidx -= (endrevidx - startrevidx) // 2
304 304 localenddata = revlog.end(revs[endrevidx - 1])
305 305 span = localenddata - startdata
306 306 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
307 307 if chunk:
308 308 yield chunk
309 309 startrevidx = endrevidx
310 310 startdata = revlog.start(revs[startrevidx])
311 311
312 312 chunk = _trimchunk(revlog, revs, startrevidx)
313 313 if chunk:
314 314 yield chunk
315 315
316 316
317 317 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
318 318 """slice revs to reduce the amount of unrelated data to be read from disk.
319 319
320 320 ``revs`` is sliced into groups that should be read in one time.
321 321 Assume that revs are sorted.
322 322
323 323 The initial chunk is sliced until the overall density (payload/chunks-span
324 324 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
325 325 skipped.
326 326
327 327 >>> revlog = _testrevlog([
328 328 ... 5, #00 (5)
329 329 ... 10, #01 (5)
330 330 ... 12, #02 (2)
331 331 ... 12, #03 (empty)
332 332 ... 27, #04 (15)
333 333 ... 31, #05 (4)
334 334 ... 31, #06 (empty)
335 335 ... 42, #07 (11)
336 336 ... 47, #08 (5)
337 337 ... 47, #09 (empty)
338 338 ... 48, #10 (1)
339 339 ... 51, #11 (3)
340 340 ... 74, #12 (23)
341 341 ... 85, #13 (11)
342 342 ... 86, #14 (1)
343 343 ... 91, #15 (5)
344 344 ... ])
345 345
346 346 >>> list(_slicechunktodensity(revlog, list(range(16))))
347 347 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
348 348 >>> list(_slicechunktodensity(revlog, [0, 15]))
349 349 [[0], [15]]
350 350 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
351 351 [[0], [11], [15]]
352 352 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
353 353 [[0], [11, 13, 15]]
354 354 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
355 355 [[1, 2], [5, 8, 10, 11], [14]]
356 356 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
357 357 ... mingapsize=20))
358 358 [[1, 2, 3, 5, 8, 10, 11], [14]]
359 359 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
360 360 ... targetdensity=0.95))
361 361 [[1, 2], [5], [8, 10, 11], [14]]
362 362 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
363 363 ... targetdensity=0.95, mingapsize=12))
364 364 [[1, 2], [5, 8, 10, 11], [14]]
365 365 """
366 366 start = revlog.start
367 367 length = revlog.length
368 368
369 369 if len(revs) <= 1:
370 370 yield revs
371 371 return
372 372
373 373 deltachainspan = segmentspan(revlog, revs)
374 374
375 375 if deltachainspan < mingapsize:
376 376 yield revs
377 377 return
378 378
379 379 readdata = deltachainspan
380 380 chainpayload = sum(length(r) for r in revs)
381 381
382 382 if deltachainspan:
383 383 density = chainpayload / float(deltachainspan)
384 384 else:
385 385 density = 1.0
386 386
387 387 if density >= targetdensity:
388 388 yield revs
389 389 return
390 390
391 391 # Store the gaps in a heap to have them sorted by decreasing size
392 392 gaps = []
393 393 prevend = None
394 394 for i, rev in enumerate(revs):
395 395 revstart = start(rev)
396 396 revlen = length(rev)
397 397
398 398 # Skip empty revisions to form larger holes
399 399 if revlen == 0:
400 400 continue
401 401
402 402 if prevend is not None:
403 403 gapsize = revstart - prevend
404 404 # only consider holes that are large enough
405 405 if gapsize > mingapsize:
406 406 gaps.append((gapsize, i))
407 407
408 408 prevend = revstart + revlen
409 409 # sort the gaps to pop them from largest to small
410 410 gaps.sort()
411 411
412 412 # Collect the indices of the largest holes until the density is acceptable
413 413 selected = []
414 414 while gaps and density < targetdensity:
415 415 gapsize, gapidx = gaps.pop()
416 416
417 417 selected.append(gapidx)
418 418
419 419 # the gap sizes are stored as negatives to be sorted decreasingly
420 420 # by the heap
421 421 readdata -= gapsize
422 422 if readdata > 0:
423 423 density = chainpayload / float(readdata)
424 424 else:
425 425 density = 1.0
426 426 selected.sort()
427 427
428 428 # Cut the revs at collected indices
429 429 previdx = 0
430 430 for idx in selected:
431 431
432 432 chunk = _trimchunk(revlog, revs, previdx, idx)
433 433 if chunk:
434 434 yield chunk
435 435
436 436 previdx = idx
437 437
438 438 chunk = _trimchunk(revlog, revs, previdx)
439 439 if chunk:
440 440 yield chunk
441 441
442 442
443 443 def _trimchunk(revlog, revs, startidx, endidx=None):
444 444 """returns revs[startidx:endidx] without empty trailing revs
445 445
446 446 Doctest Setup
447 447 >>> revlog = _testrevlog([
448 448 ... 5, #0
449 449 ... 10, #1
450 450 ... 12, #2
451 451 ... 12, #3 (empty)
452 452 ... 17, #4
453 453 ... 21, #5
454 454 ... 21, #6 (empty)
455 455 ... ])
456 456
457 457 Contiguous cases:
458 458 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
459 459 [0, 1, 2, 3, 4, 5]
460 460 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
461 461 [0, 1, 2, 3, 4]
462 462 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
463 463 [0, 1, 2]
464 464 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
465 465 [2]
466 466 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
467 467 [3, 4, 5]
468 468 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
469 469 [3, 4]
470 470
471 471 Discontiguous cases:
472 472 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
473 473 [1, 3, 5]
474 474 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
475 475 [1]
476 476 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
477 477 [3, 5]
478 478 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
479 479 [3, 5]
480 480 """
481 481 length = revlog.length
482 482
483 483 if endidx is None:
484 484 endidx = len(revs)
485 485
486 486 # If we have a non-emtpy delta candidate, there are nothing to trim
487 487 if revs[endidx - 1] < len(revlog):
488 488 # Trim empty revs at the end, except the very first revision of a chain
489 489 while (
490 490 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
491 491 ):
492 492 endidx -= 1
493 493
494 494 return revs[startidx:endidx]
495 495
496 496
497 497 def segmentspan(revlog, revs):
498 498 """Get the byte span of a segment of revisions
499 499
500 500 revs is a sorted array of revision numbers
501 501
502 502 >>> revlog = _testrevlog([
503 503 ... 5, #0
504 504 ... 10, #1
505 505 ... 12, #2
506 506 ... 12, #3 (empty)
507 507 ... 17, #4
508 508 ... ])
509 509
510 510 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
511 511 17
512 512 >>> segmentspan(revlog, [0, 4])
513 513 17
514 514 >>> segmentspan(revlog, [3, 4])
515 515 5
516 516 >>> segmentspan(revlog, [1, 2, 3,])
517 517 7
518 518 >>> segmentspan(revlog, [1, 3])
519 519 7
520 520 """
521 521 if not revs:
522 522 return 0
523 523 end = revlog.end(revs[-1])
524 524 return end - revlog.start(revs[0])
525 525
526 526
527 527 def _textfromdelta(revlog, baserev, delta, p1, p2, flags, expectednode):
528 528 """build full text from a (base, delta) pair and other metadata"""
529 529 # special case deltas which replace entire base; no need to decode
530 530 # base revision. this neatly avoids censored bases, which throw when
531 531 # they're decoded.
532 532 hlen = struct.calcsize(b">lll")
533 533 if delta[:hlen] == mdiff.replacediffheader(
534 534 revlog.rawsize(baserev), len(delta) - hlen
535 535 ):
536 536 fulltext = delta[hlen:]
537 537 else:
538 538 # deltabase is rawtext before changed by flag processors, which is
539 539 # equivalent to non-raw text
540 540 basetext = revlog.revision(baserev)
541 541 fulltext = mdiff.patch(basetext, delta)
542 542
543 543 try:
544 544 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
545 545 if validatehash:
546 546 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
547 547 if flags & REVIDX_ISCENSORED:
548 548 raise error.StorageError(
549 549 _(b'node %s is not censored') % expectednode
550 550 )
551 551 except error.CensoredNodeError:
552 552 # must pass the censored index flag to add censored revisions
553 553 if not flags & REVIDX_ISCENSORED:
554 554 raise
555 555 return fulltext
556 556
557 557
558 558 @attr.s(slots=True, frozen=True)
559 559 class _deltainfo:
560 560 distance = attr.ib()
561 561 deltalen = attr.ib()
562 562 data = attr.ib()
563 563 base = attr.ib()
564 564 chainbase = attr.ib()
565 565 chainlen = attr.ib()
566 566 compresseddeltalen = attr.ib()
567 567 snapshotdepth = attr.ib()
568 568
569 569
570 570 def drop_u_compression(delta):
571 571 """turn into a "u" (no-compression) into no-compression without header
572 572
573 573 This is useful for revlog format that has better compression method.
574 574 """
575 575 assert delta.data[0] == b'u', delta.data[0]
576 576 return _deltainfo(
577 577 delta.distance,
578 578 delta.deltalen - 1,
579 579 (b'', delta.data[1]),
580 580 delta.base,
581 581 delta.chainbase,
582 582 delta.chainlen,
583 583 delta.compresseddeltalen,
584 584 delta.snapshotdepth,
585 585 )
586 586
587 587
588 588 # If a revision's full text is that much bigger than a base candidate full
589 589 # text's, it is very unlikely that it will produce a valid delta. We no longer
590 590 # consider these candidates.
591 591 LIMIT_BASE2TEXT = 500
592 592
593 ### stage of the search, used for debug and to select and to adjust some logic.
594 # initial stage, next step is unknown
595 _STAGE_UNSPECIFIED = "unspecified"
596 # trying the cached delta
597 _STAGE_CACHED = "cached"
598 # trying delta based on parents
599 _STAGE_PARENTS = "parents"
600 # trying to build a valid snapshot of any level
601 _STAGE_SNAPSHOT = "snapshot"
602 # trying to build a delta based of the previous revision
603 _STAGE_PREV = "prev"
604 # trying to build a full snapshot
605 _STAGE_FULL = "full"
606
593 607
594 608 class _BaseDeltaSearch(abc.ABC):
595 609 """perform the search of a good delta for a single revlog revision
596 610
597 611 note: some of the deltacomputer.finddeltainfo logic should probably move
598 612 here.
599 613 """
600 614
601 615 def __init__(
602 616 self,
603 617 revlog,
604 618 revinfo,
605 619 p1,
606 620 p2,
607 621 cachedelta,
608 622 excluded_bases=None,
609 623 target_rev=None,
610 624 snapshot_cache=None,
611 625 ):
612 626 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner
613 627 # so we should never end up asking such question. Adding the assert as
614 628 # a safe-guard to detect anything that would be fishy in this regard.
615 629 assert (
616 630 cachedelta is None
617 631 or cachedelta[2] != DELTA_BASE_REUSE_FORCE
618 632 or not revlog.delta_config.general_delta
619 633 )
620 634 self.revlog = revlog
621 635 self.revinfo = revinfo
622 636 self.textlen = revinfo.textlen
623 637 self.p1 = p1
624 638 self.p2 = p2
625 639 self.cachedelta = cachedelta
626 640 self.excluded_bases = excluded_bases
627 641 if target_rev is None:
628 642 self.target_rev = len(self.revlog)
629 643 self.target_rev = target_rev
630 644 if snapshot_cache is None:
631 645 # map: base-rev: [snapshot-revs]
632 646 snapshot_cache = SnapshotCache()
633 647 self.snapshot_cache = snapshot_cache
634 648
635 649 self.tested = {nullrev}
636 650
651 self.current_stage = _STAGE_UNSPECIFIED
637 652 self.current_group = None
638 653 self._init_group()
639 654
640 655 def is_good_delta_info(self, deltainfo):
641 656 """Returns True if the given delta is good.
642 657
643 658 Good means that it is within the disk span, disk size, and chain length
644 659 bounds that we know to be performant.
645 660 """
646 661 if not self._is_good_delta_info_universal(deltainfo):
647 662 return False
648 663 if not self._is_good_delta_info_chain_quality(deltainfo):
649 664 return False
650 665 if not self._is_good_delta_info_snapshot_constraints(deltainfo):
651 666 return False
652 667 return True
653 668
654 669 def _is_good_delta_info_universal(self, deltainfo):
655 670 """Returns True if the given delta is good.
656 671
657 672 This performs generic checks needed by all format variants.
658 673
659 674 This is used by is_good_delta_info.
660 675 """
661 676
662 677 if deltainfo is None:
663 678 return False
664 679
665 680 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner
666 681 # so we should never end up asking such question. Adding the assert as
667 682 # a safe-guard to detect anything that would be fishy in this regard.
668 683 assert (
669 684 self.revinfo.cachedelta is None
670 685 or self.revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE
671 686 or not self.revlog.delta_config.general_delta
672 687 )
673 688
674 689 # Bad delta from new delta size:
675 690 #
676 691 # If the delta size is larger than the target text, storing the delta
677 692 # will be inefficient.
678 693 if self.revinfo.textlen < deltainfo.deltalen:
679 694 return False
680 695
681 696 return True
682 697
683 698 def _is_good_delta_info_chain_quality(self, deltainfo):
684 699 """Returns True if the chain associated with the delta is good.
685 700
686 701 This performs checks for format that use delta chains.
687 702
688 703 This is used by is_good_delta_info.
689 704 """
690 705 # - 'deltainfo.distance' is the distance from the base revision --
691 706 # bounding it limits the amount of I/O we need to do.
692 707
693 708 defaultmax = self.revinfo.textlen * 4
694 709 maxdist = self.revlog.delta_config.max_deltachain_span
695 710 if not maxdist:
696 711 maxdist = deltainfo.distance # ensure the conditional pass
697 712 maxdist = max(maxdist, defaultmax)
698 713
699 714 # Bad delta from read span:
700 715 #
701 716 # If the span of data read is larger than the maximum allowed.
702 717 #
703 718 # In the sparse-revlog case, we rely on the associated "sparse
704 719 # reading" to avoid issue related to the span of data. In theory, it
705 720 # would be possible to build pathological revlog where delta pattern
706 721 # would lead to too many reads. However, they do not happen in
707 722 # practice at all. So we skip the span check entirely.
708 723 if (
709 724 not self.revlog.delta_config.sparse_revlog
710 725 and maxdist < deltainfo.distance
711 726 ):
712 727 return False
713 728
714 729 # Bad delta from cumulated payload size:
715 730 #
716 731 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
717 732 # deltas we need to apply -- bounding it limits the amount of CPU
718 733 # we consume.
719 734 max_chain_data = self.revinfo.textlen * LIMIT_DELTA2TEXT
720 735 # If the sum of delta get larger than K * target text length.
721 736 if max_chain_data < deltainfo.compresseddeltalen:
722 737 return False
723 738
724 739 # Bad delta from chain length:
725 740 #
726 741 # If the number of delta in the chain gets too high.
727 742 if (
728 743 self.revlog.delta_config.max_chain_len
729 744 and self.revlog.delta_config.max_chain_len < deltainfo.chainlen
730 745 ):
731 746 return False
732 747 return True
733 748
734 749 def _is_good_delta_info_snapshot_constraints(self, deltainfo):
735 750 """Returns True if the chain associated with snapshots
736 751
737 752 This performs checks for format that use sparse-revlog and intermediate
738 753 snapshots.
739 754
740 755 This is used by is_good_delta_info.
741 756 """
742 757 # if not a snapshot, this method has no filtering to do
743 758 if deltainfo.snapshotdepth is None:
744 759 return True
745 760 # bad delta from intermediate snapshot size limit
746 761 #
747 762 # If an intermediate snapshot size is higher than the limit. The
748 763 # limit exist to prevent endless chain of intermediate delta to be
749 764 # created.
750 765 if (
751 766 self.revinfo.textlen >> deltainfo.snapshotdepth
752 767 ) < deltainfo.deltalen:
753 768 return False
754 769
755 770 # bad delta if new intermediate snapshot is larger than the previous
756 771 # snapshot
757 772 if self.revlog.length(deltainfo.base) < deltainfo.deltalen:
758 773 return False
759 774
760 775 return True
761 776
762 777 @property
763 778 def done(self):
764 779 """True when all possible candidate have been tested"""
765 780 return self.current_group is None
766 781
767 782 @abc.abstractmethod
768 783 def next_group(self, good_delta=None):
769 784 """move to the next group to test
770 785
771 786 The group of revision to test will be available in
772 787 `self.current_group`. If the previous group had any good delta, the
773 788 best one can be passed as the `good_delta` parameter to help selecting
774 789 the next group.
775 790
776 791 If not revision remains to be, `self.done` will be True and
777 792 `self.current_group` will be None.
778 793 """
779 794 pass
780 795
781 796 @abc.abstractmethod
782 797 def _init_group(self):
783 798 pass
784 799
785 800
786 801 class _NoDeltaSearch(_BaseDeltaSearch):
787 802 """Search for no delta.
788 803
789 804 This search variant is to be used in case where we should not store delta.
790 805 """
791 806
792 807 def _init_group(self):
793 pass
808 self.current_stage = _STAGE_FULL
794 809
795 810 def next_group(self, good_delta=None):
796 811 pass
797 812
798 813
799 814 class _PrevDeltaSearch(_BaseDeltaSearch):
800 815 """Search for delta against the previous revision only
801 816
802 817 This search variant is to be used when the format does not allow for delta
803 818 against arbitrary bases.
804 819 """
805 820
806 821 def _init_group(self):
822 self.current_stage = _STAGE_PREV
807 823 self.current_group = [self.target_rev - 1]
808 824
809 825 def next_group(self, good_delta=None):
826 self.current_stage = _STAGE_FULL
810 827 self.current_group = None
811 828
812 829
813 830 class _DeltaSearch(_BaseDeltaSearch):
814 831 """Generic delta search variants
815 832
816 833 (expect this to be split further)
817 834 """
818 835
819 836 def _init_group(self):
820 837 # Why search for delta base if we cannot use a delta base ?
821 838 # also see issue6056
822 839 assert self.revlog.delta_config.general_delta
823 840 self._candidates_iterator = self._candidate_groups()
824 841 self._last_good = None
825 842 self.current_group = self._candidates_iterator.send(self._last_good)
826 843
827 844 def next_group(self, good_delta=None):
828 845 if good_delta is not None:
829 846 self._last_good = good_delta.base
830 847 self.current_group = self._candidates_iterator.send(self._last_good)
831 848
832 849 def _candidate_groups(self):
833 850 """Provides group of revision to be tested as delta base
834 851
835 852 This top level function focus on emitting groups with unique and
836 853 worthwhile content. See _raw_candidate_groups for details about the
837 854 group order.
838 855 """
839 856 good = None
840 857
841 858 group_chunk_size = self.revlog.delta_config.candidate_group_chunk_size
842 859
843 860 tested = self.tested # prefetch for speed and code compactness
844 861 candidates = self._refined_groups()
845 862 while True:
846 863 temptative = candidates.send(good)
847 864 if temptative is None:
848 865 break
849 866 group = self._pre_filter_candidate_revs(temptative)
850 867 if group:
851 868 # When the size of the candidate group is big, it can result in
852 869 # a quite significant performance impact. To reduce this, we
853 870 # can send them in smaller batches until the new batch does not
854 871 # provide any improvements.
855 872 #
856 873 # This might reduce the overall efficiency of the compression
857 874 # in some corner cases, but that should also prevent very
858 875 # pathological cases from being an issue. (eg. 20 000
859 876 # candidates).
860 877 #
861 878 # XXX note that the ordering of the group becomes important as
862 879 # it now impacts the final result. The current order is
863 880 # unprocessed and can be improved.
864 881 if group_chunk_size == 0:
865 882 tested.update(group)
866 883 good = yield tuple(group)
867 884 else:
868 885 prev_good = good
869 886 for start in range(0, len(group), group_chunk_size):
870 887 sub_group = group[start : start + group_chunk_size]
871 888 tested.update(sub_group)
872 889 good = yield tuple(sub_group)
873 890 if prev_good == good:
874 891 break
875 892
876 893 yield None
877 894
878 895 def _pre_filter_candidate_revs(self, temptative):
879 896 """filter possible candidate before computing a delta
880 897
881 898 This function use various criteria to pre-filter candidate delta base
882 899 before we compute a delta and evaluate its quality.
883 900
884 901 Such pre-filter limit the number of computed delta, an expensive operation.
885 902
886 903 return the updated list of revision to test
887 904 """
888 905 deltalength = self.revlog.length
889 906 deltaparent = self.revlog.deltaparent
890 907
891 908 tested = self.tested
892 909 group = []
893 910 for rev in temptative:
894 911 # skip over empty delta (no need to include them in a chain)
895 912 while not (rev == nullrev or rev in tested or deltalength(rev)):
896 913 tested.add(rev)
897 914 rev = deltaparent(rev)
898 915 if self._pre_filter_rev(rev):
899 916 group.append(rev)
900 917 else:
901 918 self.tested.add(rev)
902 919 return group
903 920
904 921 def _pre_filter_rev_universal(self, rev):
905 922 """pre filtering that is need in all cases.
906 923
907 924 return True if it seems okay to test a rev, False otherwise.
908 925
909 926 used by _pre_filter_rev.
910 927 """
911 928 # no need to try a delta against nullrev, this will be done as
912 929 # a last resort.
913 930 if rev == nullrev:
914 931 return False
915 932 # filter out revision we tested already
916 933 if rev in self.tested:
917 934 return False
918 935
919 936 # an higher authority deamed the base unworthy (e.g. censored)
920 937 if self.excluded_bases is not None and rev in self.excluded_bases:
921 938 return False
922 939 # We are in some recomputation cases and that rev is too high
923 940 # in the revlog
924 941 if self.target_rev is not None and rev >= self.target_rev:
925 942 return False
926 943 # no delta for rawtext-changing revs (see "candelta" for why)
927 944 if self.revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
928 945 return False
929 946 return True
930 947
931 948 def _pre_filter_rev_sparse(self, rev):
932 949 """pre filtering that is needed in sparse revlog cases
933 950
934 951 return True if it seems okay to test a rev, False otherwise.
935 952
936 953 used by _pre_filter_rev.
937 954 """
938 955 assert self.revlog.delta_config.sparse_revlog
939 956 # if the revision we test again is too small, the resulting delta
940 957 # will be large anyway as that amount of data to be added is big
941 958 if self.revlog.rawsize(rev) < (self.textlen // LIMIT_BASE2TEXT):
942 959 return False
943 960
944 961 if self.revlog.delta_config.upper_bound_comp is not None:
945 962 maxcomp = self.revlog.delta_config.upper_bound_comp
946 963 basenotsnap = (self.p1, self.p2, nullrev)
947 964 if rev not in basenotsnap and self.revlog.issnapshot(rev):
948 965 snapshotdepth = self.revlog.snapshotdepth(rev)
949 966 # If text is significantly larger than the base, we can
950 967 # expect the resulting delta to be proportional to the size
951 968 # difference
952 969 revsize = self.revlog.rawsize(rev)
953 970 rawsizedistance = max(self.textlen - revsize, 0)
954 971 # use an estimate of the compression upper bound.
955 972 lowestrealisticdeltalen = rawsizedistance // maxcomp
956 973
957 974 # check the absolute constraint on the delta size
958 975 snapshotlimit = self.textlen >> snapshotdepth
959 976 if snapshotlimit < lowestrealisticdeltalen:
960 977 # delta lower bound is larger than accepted upper
961 978 # bound
962 979 return False
963 980
964 981 # check the relative constraint on the delta size
965 982 revlength = self.revlog.length(rev)
966 983 if revlength < lowestrealisticdeltalen:
967 984 # delta probable lower bound is larger than target
968 985 # base
969 986 return False
970 987 return True
971 988
972 989 def _pre_filter_rev_delta_chain(self, rev):
973 990 """pre filtering that is needed in sparse revlog cases
974 991
975 992 return True if it seems okay to test a rev, False otherwise.
976 993
977 994 used by _pre_filter_rev.
978 995 """
979 996 deltas_limit = self.revinfo.textlen * LIMIT_DELTA2TEXT
980 997 # filter out delta base that will never produce good delta
981 998 #
982 999 # if the delta of that base is already bigger than the limit
983 1000 # for the delta chain size, doing a delta is hopeless.
984 1001 if deltas_limit < self.revlog.length(rev):
985 1002 return False
986 1003
987 1004 # If we reach here, we are about to build and test a delta.
988 1005 # The delta building process will compute the chaininfo in all
989 1006 # case, since that computation is cached, it is fine to access
990 1007 # it here too.
991 1008 chainlen, chainsize = self.revlog._chaininfo(rev)
992 1009 # if chain will be too long, skip base
993 1010 if (
994 1011 self.revlog.delta_config.max_chain_len
995 1012 and chainlen >= self.revlog.delta_config.max_chain_len
996 1013 ):
997 1014 return False
998 1015 # if chain already have too much data, skip base
999 1016 if deltas_limit < chainsize:
1000 1017 return False
1001 1018 return True
1002 1019
1003 1020 def _pre_filter_rev(self, rev):
1004 1021 """return True if it seems okay to test a rev, False otherwise"""
1005 1022 if not self._pre_filter_rev_universal(rev):
1006 1023 return False
1007 1024 if not self._pre_filter_rev_delta_chain(rev):
1008 1025 return False
1009 1026 if self.revlog.delta_config.sparse_revlog:
1010 1027 if not self._pre_filter_rev_sparse(rev):
1011 1028 return False
1012 1029
1013 1030 return True
1014 1031
1015 1032 def _refined_groups(self):
1016 1033 good = None
1017 1034 # First we try to reuse a the delta contained in the bundle. (or from
1018 1035 # the source revlog)
1019 1036 #
1020 1037 # This logic only applies to general delta repositories and can be
1021 1038 # disabled through configuration. Disabling reuse source delta is
1022 1039 # useful when we want to make sure we recomputed "optimal" deltas.
1023 1040 if (
1024 1041 self.cachedelta is not None
1025 1042 and self.cachedelta[2] > DELTA_BASE_REUSE_NO
1026 1043 ):
1027 1044 # Assume what we received from the server is a good choice
1028 1045 # build delta will reuse the cache
1046 self.current_stage = _STAGE_CACHED
1029 1047 good = yield (self.cachedelta[0],)
1030 1048 if good is not None:
1031 1049 yield None
1032 1050 return
1033 1051 groups = self._raw_groups()
1034 1052 for candidates in groups:
1035 1053 good = yield candidates
1036 1054 if good is not None:
1037 1055 break
1038 1056
1039 1057 # If sparse revlog is enabled, we can try to refine the available
1040 1058 # deltas
1041 1059 if not self.revlog.delta_config.sparse_revlog:
1060 self.current_stage = _STAGE_FULL
1042 1061 yield None
1043 1062 return
1044 1063
1045 1064 # if we have a refinable value, try to refine it
1046 1065 if (
1047 1066 good is not None
1048 1067 and good not in (self.p1, self.p2)
1049 1068 and self.revlog.issnapshot(good)
1050 1069 ):
1070 self.current_stage = _STAGE_SNAPSHOT
1051 1071 # refine snapshot down
1052 1072 previous = None
1053 1073 while previous != good:
1054 1074 previous = good
1055 1075 base = self.revlog.deltaparent(good)
1056 1076 if base == nullrev:
1057 1077 break
1058 1078 good = yield (base,)
1059 1079 # refine snapshot up
1060 1080 if not self.snapshot_cache.snapshots:
1061 1081 self.snapshot_cache.update(self.revlog, good + 1)
1062 1082 previous = None
1063 1083 while good != previous:
1064 1084 previous = good
1065 1085 children = tuple(
1066 1086 sorted(c for c in self.snapshot_cache.snapshots[good])
1067 1087 )
1068 1088 good = yield children
1069 1089
1090 self.current_stage = _STAGE_FULL
1070 1091 yield None
1071 1092
1072 1093 def _raw_groups(self):
1073 1094 """Provides group of revision to be tested as delta base
1074 1095
1075 1096 This lower level function focus on emitting delta theorically
1076 1097 interresting without looking it any practical details.
1077 1098
1078 1099 The group order aims at providing fast or small candidates first.
1079 1100 """
1080 1101 sparse = self.revlog.delta_config.sparse_revlog
1081 1102 prev = self.target_rev - 1
1082 1103 deltachain = lambda rev: self.revlog._deltachain(rev)[0]
1083 1104
1084 1105 # exclude already lazy tested base if any
1085 1106 parents = [p for p in (self.p1, self.p2) if p != nullrev]
1086 1107
1108 self.current_stage = _STAGE_PARENTS
1087 1109 if (
1088 1110 not self.revlog.delta_config.delta_both_parents
1089 1111 and len(parents) == 2
1090 1112 ):
1091 1113 parents.sort()
1092 1114 # To minimize the chance of having to build a fulltext,
1093 1115 # pick first whichever parent is closest to us (max rev)
1094 1116 yield (parents[1],)
1095 1117 # then the other one (min rev) if the first did not fit
1096 1118 yield (parents[0],)
1097 1119 elif len(parents) > 0:
1098 1120 # Test all parents (1 or 2), and keep the best candidate
1099 1121 yield parents
1100 1122
1101 1123 if sparse and parents:
1124 self.current_stage = _STAGE_SNAPSHOT
1102 1125 # See if we can use an existing snapshot in the parent chains to
1103 1126 # use as a base for a new intermediate-snapshot
1104 1127 #
1105 1128 # search for snapshot in parents delta chain map: snapshot-level:
1106 1129 # snapshot-rev
1107 1130 parents_snaps = collections.defaultdict(set)
1108 1131 candidate_chains = [deltachain(p) for p in parents]
1109 1132 for chain in candidate_chains:
1110 1133 for idx, s in enumerate(chain):
1111 1134 if not self.revlog.issnapshot(s):
1112 1135 break
1113 1136 parents_snaps[idx].add(s)
1114 1137 snapfloor = min(parents_snaps[0]) + 1
1115 1138 self.snapshot_cache.update(self.revlog, snapfloor)
1116 1139 # search for the highest "unrelated" revision
1117 1140 #
1118 1141 # Adding snapshots used by "unrelated" revision increase the odd we
1119 1142 # reuse an independant, yet better snapshot chain.
1120 1143 #
1121 1144 # XXX instead of building a set of revisions, we could lazily
1122 1145 # enumerate over the chains. That would be more efficient, however
1123 1146 # we stick to simple code for now.
1124 1147 all_revs = set()
1125 1148 for chain in candidate_chains:
1126 1149 all_revs.update(chain)
1127 1150 other = None
1128 1151 for r in self.revlog.revs(prev, snapfloor):
1129 1152 if r not in all_revs:
1130 1153 other = r
1131 1154 break
1132 1155 if other is not None:
1133 1156 # To avoid unfair competition, we won't use unrelated
1134 1157 # intermediate snapshot that are deeper than the ones from the
1135 1158 # parent delta chain.
1136 1159 max_depth = max(parents_snaps.keys())
1137 1160 chain = deltachain(other)
1138 1161 for depth, s in enumerate(chain):
1139 1162 if s < snapfloor:
1140 1163 continue
1141 1164 if max_depth < depth:
1142 1165 break
1143 1166 if not self.revlog.issnapshot(s):
1144 1167 break
1145 1168 parents_snaps[depth].add(s)
1146 1169 # Test them as possible intermediate snapshot base We test them
1147 1170 # from highest to lowest level. High level one are more likely to
1148 1171 # result in small delta
1149 1172 floor = None
1150 1173 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
1151 1174 siblings = set()
1152 1175 for s in snaps:
1153 1176 siblings.update(self.snapshot_cache.snapshots[s])
1154 1177 # Before considering making a new intermediate snapshot, we
1155 1178 # check if an existing snapshot, children of base we consider,
1156 1179 # would be suitable.
1157 1180 #
1158 1181 # It give a change to reuse a delta chain "unrelated" to the
1159 1182 # current revision instead of starting our own. Without such
1160 1183 # re-use, topological branches would keep reopening new chains.
1161 1184 # Creating more and more snapshot as the repository grow.
1162 1185
1163 1186 if floor is not None:
1164 1187 # We only do this for siblings created after the one in our
1165 1188 # parent's delta chain. Those created before has less
1166 1189 # chances to be valid base since our ancestors had to
1167 1190 # create a new snapshot.
1168 1191 siblings = [r for r in siblings if floor < r]
1169 1192 yield tuple(sorted(siblings))
1170 1193 # then test the base from our parent's delta chain.
1171 1194 yield tuple(sorted(snaps))
1172 1195 floor = min(snaps)
1173 1196 # No suitable base found in the parent chain, search if any full
1174 1197 # snapshots emitted since parent's base would be a suitable base
1175 1198 # for an intermediate snapshot.
1176 1199 #
1177 1200 # It give a chance to reuse a delta chain unrelated to the current
1178 1201 # revisions instead of starting our own. Without such re-use,
1179 1202 # topological branches would keep reopening new full chains.
1180 1203 # Creating more and more snapshot as the repository grow.
1181 1204 full = [
1182 1205 r
1183 1206 for r in self.snapshot_cache.snapshots[nullrev]
1184 1207 if snapfloor <= r
1185 1208 ]
1186 1209 yield tuple(sorted(full))
1187 1210
1188 1211 if not sparse:
1189 1212 # other approach failed try against prev to hopefully save us a
1190 1213 # fulltext.
1214 self.current_stage = _STAGE_PREV
1191 1215 yield (prev,)
1192 1216
1193 1217
1194 1218 class SnapshotCache:
1195 1219 __slots__ = ('snapshots', '_start_rev', '_end_rev')
1196 1220
1197 1221 def __init__(self):
1198 1222 self.snapshots = collections.defaultdict(set)
1199 1223 self._start_rev = None
1200 1224 self._end_rev = None
1201 1225
1202 1226 def update(self, revlog, start_rev=0):
1203 1227 """find snapshots from start_rev to tip"""
1204 1228 nb_revs = len(revlog)
1205 1229 end_rev = nb_revs - 1
1206 1230 if start_rev > end_rev:
1207 1231 return # range is empty
1208 1232
1209 1233 if self._start_rev is None:
1210 1234 assert self._end_rev is None
1211 1235 self._update(revlog, start_rev, end_rev)
1212 1236 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1213 1237 if start_rev < self._start_rev:
1214 1238 self._update(revlog, start_rev, self._start_rev - 1)
1215 1239 if self._end_rev < end_rev:
1216 1240 self._update(revlog, self._end_rev + 1, end_rev)
1217 1241
1218 1242 if self._start_rev is None:
1219 1243 assert self._end_rev is None
1220 1244 self._end_rev = end_rev
1221 1245 self._start_rev = start_rev
1222 1246 else:
1223 1247 self._start_rev = min(self._start_rev, start_rev)
1224 1248 self._end_rev = max(self._end_rev, end_rev)
1225 1249 assert self._start_rev <= self._end_rev, (
1226 1250 self._start_rev,
1227 1251 self._end_rev,
1228 1252 )
1229 1253
1230 1254 def _update(self, revlog, start_rev, end_rev):
1231 1255 """internal method that actually do update content"""
1232 1256 assert self._start_rev is None or (
1233 1257 start_rev < self._start_rev or start_rev > self._end_rev
1234 1258 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1235 1259 assert self._start_rev is None or (
1236 1260 end_rev < self._start_rev or end_rev > self._end_rev
1237 1261 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1238 1262 cache = self.snapshots
1239 1263 if hasattr(revlog.index, 'findsnapshots'):
1240 1264 revlog.index.findsnapshots(cache, start_rev, end_rev)
1241 1265 else:
1242 1266 deltaparent = revlog.deltaparent
1243 1267 issnapshot = revlog.issnapshot
1244 1268 for rev in revlog.revs(start_rev, end_rev):
1245 1269 if issnapshot(rev):
1246 1270 cache[deltaparent(rev)].add(rev)
1247 1271
1248 1272
1249 1273 class deltacomputer:
1250 1274 """object capable of computing delta and finding delta for multiple revision
1251 1275
1252 1276 This object is meant to compute and find multiple delta applied to the same
1253 1277 revlog.
1254 1278 """
1255 1279
1256 1280 def __init__(
1257 1281 self,
1258 1282 revlog,
1259 1283 write_debug=None,
1260 1284 debug_search=False,
1261 1285 debug_info=None,
1262 1286 ):
1263 1287 self.revlog = revlog
1264 1288 self._write_debug = write_debug
1265 1289 if write_debug is None:
1266 1290 self._debug_search = False
1267 1291 else:
1268 1292 self._debug_search = debug_search
1269 1293 self._debug_info = debug_info
1270 1294 self._snapshot_cache = SnapshotCache()
1271 1295
1272 1296 @property
1273 1297 def _gather_debug(self):
1274 1298 return self._write_debug is not None or self._debug_info is not None
1275 1299
1276 1300 def buildtext(self, revinfo):
1277 1301 """Builds a fulltext version of a revision
1278 1302
1279 1303 revinfo: revisioninfo instance that contains all needed info
1280 1304 """
1281 1305 btext = revinfo.btext
1282 1306 if btext[0] is not None:
1283 1307 return btext[0]
1284 1308
1285 1309 revlog = self.revlog
1286 1310 cachedelta = revinfo.cachedelta
1287 1311 baserev = cachedelta[0]
1288 1312 delta = cachedelta[1]
1289 1313
1290 1314 fulltext = btext[0] = _textfromdelta(
1291 1315 revlog,
1292 1316 baserev,
1293 1317 delta,
1294 1318 revinfo.p1,
1295 1319 revinfo.p2,
1296 1320 revinfo.flags,
1297 1321 revinfo.node,
1298 1322 )
1299 1323 return fulltext
1300 1324
1301 1325 def _builddeltadiff(self, base, revinfo):
1302 1326 revlog = self.revlog
1303 1327 t = self.buildtext(revinfo)
1304 1328 if revlog.iscensored(base):
1305 1329 # deltas based on a censored revision must replace the
1306 1330 # full content in one patch, so delta works everywhere
1307 1331 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1308 1332 delta = header + t
1309 1333 else:
1310 1334 ptext = revlog.rawdata(base)
1311 1335 delta = mdiff.textdiff(ptext, t)
1312 1336
1313 1337 return delta
1314 1338
1315 1339 def _builddeltainfo(self, revinfo, base, target_rev=None):
1316 1340 # can we use the cached delta?
1317 1341 revlog = self.revlog
1318 1342 chainbase = revlog.chainbase(base)
1319 1343 if revlog.delta_config.general_delta:
1320 1344 deltabase = base
1321 1345 else:
1322 1346 if target_rev is not None and base != target_rev - 1:
1323 1347 msg = (
1324 1348 b'general delta cannot use delta for something else '
1325 1349 b'than `prev`: %d<-%d'
1326 1350 )
1327 1351 msg %= (base, target_rev)
1328 1352 raise error.ProgrammingError(msg)
1329 1353 deltabase = chainbase
1330 1354 snapshotdepth = None
1331 1355 if revlog.delta_config.sparse_revlog and deltabase == nullrev:
1332 1356 snapshotdepth = 0
1333 1357 elif revlog.delta_config.sparse_revlog and revlog.issnapshot(deltabase):
1334 1358 # A delta chain should always be one full snapshot,
1335 1359 # zero or more semi-snapshots, and zero or more deltas
1336 1360 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1337 1361 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1338 1362 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1339 1363 delta = None
1340 1364 if revinfo.cachedelta:
1341 1365 cachebase = revinfo.cachedelta[0]
1342 1366 # check if the diff still apply
1343 1367 currentbase = cachebase
1344 1368 while (
1345 1369 currentbase != nullrev
1346 1370 and currentbase != base
1347 1371 and self.revlog.length(currentbase) == 0
1348 1372 ):
1349 1373 currentbase = self.revlog.deltaparent(currentbase)
1350 1374 if self.revlog.delta_config.lazy_delta and currentbase == base:
1351 1375 delta = revinfo.cachedelta[1]
1352 1376 if delta is None:
1353 1377 delta = self._builddeltadiff(base, revinfo)
1354 1378 if self._debug_search:
1355 1379 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1356 1380 msg %= len(delta)
1357 1381 self._write_debug(msg)
1358 1382 # snapshotdept need to be neither None nor 0 level snapshot
1359 1383 if revlog.delta_config.upper_bound_comp is not None and snapshotdepth:
1360 1384 lowestrealisticdeltalen = (
1361 1385 len(delta) // revlog.delta_config.upper_bound_comp
1362 1386 )
1363 1387 snapshotlimit = revinfo.textlen >> snapshotdepth
1364 1388 if self._debug_search:
1365 1389 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1366 1390 msg %= lowestrealisticdeltalen
1367 1391 self._write_debug(msg)
1368 1392 if snapshotlimit < lowestrealisticdeltalen:
1369 1393 if self._debug_search:
1370 1394 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1371 1395 self._write_debug(msg)
1372 1396 return None
1373 1397 if revlog.length(base) < lowestrealisticdeltalen:
1374 1398 if self._debug_search:
1375 1399 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1376 1400 self._write_debug(msg)
1377 1401 return None
1378 1402 header, data = revlog._inner.compress(delta)
1379 1403 deltalen = len(header) + len(data)
1380 1404 offset = revlog.end(len(revlog) - 1)
1381 1405 dist = deltalen + offset - revlog.start(chainbase)
1382 1406 chainlen, compresseddeltalen = revlog._chaininfo(base)
1383 1407 chainlen += 1
1384 1408 compresseddeltalen += deltalen
1385 1409
1386 1410 return _deltainfo(
1387 1411 dist,
1388 1412 deltalen,
1389 1413 (header, data),
1390 1414 deltabase,
1391 1415 chainbase,
1392 1416 chainlen,
1393 1417 compresseddeltalen,
1394 1418 snapshotdepth,
1395 1419 )
1396 1420
1397 1421 def _fullsnapshotinfo(self, revinfo, curr):
1398 1422 rawtext = self.buildtext(revinfo)
1399 1423 data = self.revlog._inner.compress(rawtext)
1400 1424 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1401 1425 deltabase = chainbase = curr
1402 1426 snapshotdepth = 0
1403 1427 chainlen = 1
1404 1428
1405 1429 return _deltainfo(
1406 1430 dist,
1407 1431 deltalen,
1408 1432 data,
1409 1433 deltabase,
1410 1434 chainbase,
1411 1435 chainlen,
1412 1436 compresseddeltalen,
1413 1437 snapshotdepth,
1414 1438 )
1415 1439
1416 1440 def finddeltainfo(self, revinfo, excluded_bases=None, target_rev=None):
1417 1441 """Find an acceptable delta against a candidate revision
1418 1442
1419 1443 revinfo: information about the revision (instance of _revisioninfo)
1420 1444
1421 1445 Returns the first acceptable candidate revision, as ordered by
1422 1446 _candidategroups
1423 1447
1424 1448 If no suitable deltabase is found, we return delta info for a full
1425 1449 snapshot.
1426 1450
1427 1451 `excluded_bases` is an optional set of revision that cannot be used as
1428 1452 a delta base. Use this to recompute delta suitable in censor or strip
1429 1453 context.
1430 1454 """
1431 1455 if target_rev is None:
1432 1456 target_rev = len(self.revlog)
1433 1457
1434 1458 gather_debug = self._gather_debug
1435 1459 cachedelta = revinfo.cachedelta
1436 1460 revlog = self.revlog
1437 1461 p1r = p2r = None
1438 1462
1439 1463 if excluded_bases is None:
1440 1464 excluded_bases = set()
1441 1465
1442 1466 if gather_debug:
1443 1467 start = util.timer()
1444 1468 dbg = self._one_dbg_data()
1445 1469 dbg['revision'] = target_rev
1446 1470 p1r = revlog.rev(revinfo.p1)
1447 1471 p2r = revlog.rev(revinfo.p2)
1448 1472 if p1r != nullrev:
1449 1473 p1_chain_len = revlog._chaininfo(p1r)[0]
1450 1474 else:
1451 1475 p1_chain_len = -1
1452 1476 if p2r != nullrev:
1453 1477 p2_chain_len = revlog._chaininfo(p2r)[0]
1454 1478 else:
1455 1479 p2_chain_len = -1
1456 1480 dbg['p1-chain-len'] = p1_chain_len
1457 1481 dbg['p2-chain-len'] = p2_chain_len
1458 1482
1459 1483 # 1) if the revision is empty, no amount of delta can beat it
1460 1484 #
1461 1485 # 2) no delta for flag processor revision (see "candelta" for why)
1462 1486 # not calling candelta since only one revision needs test, also to
1463 1487 # avoid overhead fetching flags again.
1464 1488 if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1465 1489 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1466 1490 if gather_debug:
1467 1491 end = util.timer()
1468 1492 dbg['duration'] = end - start
1469 1493 dbg[
1470 1494 'delta-base'
1471 1495 ] = deltainfo.base # pytype: disable=attribute-error
1472 1496 dbg['search_round_count'] = 0
1473 1497 dbg['using-cached-base'] = False
1474 1498 dbg['delta_try_count'] = 0
1475 1499 dbg['type'] = b"full"
1476 1500 dbg['snapshot-depth'] = 0
1477 1501 self._dbg_process_data(dbg)
1478 1502 return deltainfo
1479 1503
1480 1504 deltainfo = None
1481 1505
1482 1506 # If this source delta are to be forcibly reuse, let us comply early.
1483 1507 if (
1484 1508 revlog.delta_config.general_delta
1485 1509 and revinfo.cachedelta is not None
1486 1510 and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
1487 1511 ):
1488 1512 base = revinfo.cachedelta[0]
1489 1513 if base == nullrev:
1490 1514 dbg_type = b"full"
1491 1515 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1492 1516 if gather_debug:
1493 1517 snapshotdepth = 0
1494 1518 elif base not in excluded_bases:
1495 1519 delta = revinfo.cachedelta[1]
1496 1520 header, data = revlog.compress(delta)
1497 1521 deltalen = len(header) + len(data)
1498 1522 if gather_debug:
1499 1523 offset = revlog.end(len(revlog) - 1)
1500 1524 chainbase = revlog.chainbase(base)
1501 1525 distance = deltalen + offset - revlog.start(chainbase)
1502 1526 chainlen, compresseddeltalen = revlog._chaininfo(base)
1503 1527 chainlen += 1
1504 1528 compresseddeltalen += deltalen
1505 1529 if base == p1r or base == p2r:
1506 1530 dbg_type = b"delta"
1507 1531 snapshotdepth = None
1508 1532 elif not revlog.issnapshot(base):
1509 1533 snapshotdepth = None
1510 1534 else:
1511 1535 dbg_type = b"snapshot"
1512 1536 snapshotdepth = revlog.snapshotdepth(base) + 1
1513 1537 else:
1514 1538 distance = None
1515 1539 chainbase = None
1516 1540 chainlen = None
1517 1541 compresseddeltalen = None
1518 1542 snapshotdepth = None
1519 1543 deltainfo = _deltainfo(
1520 1544 distance=distance,
1521 1545 deltalen=deltalen,
1522 1546 data=(header, data),
1523 1547 base=base,
1524 1548 chainbase=chainbase,
1525 1549 chainlen=chainlen,
1526 1550 compresseddeltalen=compresseddeltalen,
1527 1551 snapshotdepth=snapshotdepth,
1528 1552 )
1529 1553
1530 1554 if deltainfo is not None:
1531 1555 if gather_debug:
1532 1556 end = util.timer()
1533 1557 dbg['duration'] = end - start
1534 1558 dbg[
1535 1559 'delta-base'
1536 1560 ] = deltainfo.base # pytype: disable=attribute-error
1537 1561 dbg['search_round_count'] = 0
1538 1562 dbg['using-cached-base'] = True
1539 1563 dbg['delta_try_count'] = 0
1540 1564 dbg['type'] = b"full"
1541 1565 if snapshotdepth is None:
1542 1566 dbg['snapshot-depth'] = -1
1543 1567 else:
1544 1568 dbg['snapshot-depth'] = snapshotdepth
1545 1569 self._dbg_process_data(dbg)
1546 1570 return deltainfo
1547 1571
1548 1572 # count the number of different delta we tried (for debug purpose)
1549 1573 dbg_try_count = 0
1550 1574 # count the number of "search round" we did. (for debug purpose)
1551 1575 dbg_try_rounds = 0
1552 1576 dbg_type = b'unknown'
1553 1577
1554 1578 if p1r is None:
1555 1579 p1r = revlog.rev(revinfo.p1)
1556 1580 p2r = revlog.rev(revinfo.p2)
1557 1581
1558 1582 if self._debug_search:
1559 1583 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1560 1584 msg %= target_rev
1561 1585 self._write_debug(msg)
1562 1586
1563 1587 # should we try to build a delta?
1564 1588 if not (len(self.revlog) and self.revlog._storedeltachains):
1565 1589 search_cls = _NoDeltaSearch
1566 1590 elif not self.revlog.delta_config.general_delta:
1567 1591 # before general delta, there is only one possible delta base
1568 1592 search_cls = _PrevDeltaSearch
1569 1593 else:
1570 1594 search_cls = _DeltaSearch
1571 1595
1572 1596 search = search_cls(
1573 1597 self.revlog,
1574 1598 revinfo,
1575 1599 p1r,
1576 1600 p2r,
1577 1601 cachedelta,
1578 1602 excluded_bases,
1579 1603 target_rev,
1580 1604 snapshot_cache=self._snapshot_cache,
1581 1605 )
1582 1606
1583 1607 while not search.done:
1584 1608 current_group = search.current_group
1585 1609 # current_group can be `None`, but not is search.done is False
1586 1610 # We add this assert to help pytype
1587 1611 assert current_group is not None
1588 1612 candidaterevs = current_group
1589 1613 dbg_try_rounds += 1
1590 1614 if self._debug_search:
1591 1615 prev = None
1592 1616 if deltainfo is not None:
1593 1617 prev = deltainfo.base
1594 1618
1595 1619 if (
1596 1620 cachedelta is not None
1597 1621 and len(candidaterevs) == 1
1598 1622 and cachedelta[0] in candidaterevs
1599 1623 ):
1600 1624 round_type = b"cached-delta"
1601 1625 elif p1r in candidaterevs or p2r in candidaterevs:
1602 1626 round_type = b"parents"
1603 1627 elif prev is not None and all(c < prev for c in candidaterevs):
1604 1628 round_type = b"refine-down"
1605 1629 elif prev is not None and all(c > prev for c in candidaterevs):
1606 1630 round_type = b"refine-up"
1607 1631 else:
1608 1632 round_type = b"search-down"
1609 1633 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1610 1634 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1611 1635 self._write_debug(msg)
1612 1636 nominateddeltas = []
1613 1637 if deltainfo is not None:
1614 1638 if self._debug_search:
1615 1639 msg = (
1616 1640 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1617 1641 )
1618 1642 msg %= (deltainfo.base, deltainfo.deltalen)
1619 1643 self._write_debug(msg)
1620 1644 # if we already found a good delta,
1621 1645 # challenge it against refined candidates
1622 1646 nominateddeltas.append(deltainfo)
1623 1647 for candidaterev in candidaterevs:
1624 1648 if self._debug_search:
1625 1649 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1626 1650 msg %= candidaterev
1627 1651 self._write_debug(msg)
1628 1652 candidate_type = None
1629 1653 if candidaterev == p1r:
1630 1654 candidate_type = b"p1"
1631 1655 elif candidaterev == p2r:
1632 1656 candidate_type = b"p2"
1633 1657 elif self.revlog.issnapshot(candidaterev):
1634 1658 candidate_type = b"snapshot-%d"
1635 1659 candidate_type %= self.revlog.snapshotdepth(
1636 1660 candidaterev
1637 1661 )
1638 1662
1639 1663 if candidate_type is not None:
1640 1664 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1641 1665 msg %= candidate_type
1642 1666 self._write_debug(msg)
1643 1667 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1644 1668 msg %= self.revlog.length(candidaterev)
1645 1669 self._write_debug(msg)
1646 1670 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1647 1671 msg %= self.revlog.deltaparent(candidaterev)
1648 1672 self._write_debug(msg)
1649 1673
1650 1674 dbg_try_count += 1
1651 1675
1652 1676 if self._debug_search:
1653 1677 delta_start = util.timer()
1654 1678 candidatedelta = self._builddeltainfo(
1655 1679 revinfo,
1656 1680 candidaterev,
1657 1681 target_rev=target_rev,
1658 1682 )
1659 1683 if self._debug_search:
1660 1684 delta_end = util.timer()
1661 1685 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1662 1686 msg %= delta_end - delta_start
1663 1687 self._write_debug(msg)
1664 1688 if candidatedelta is not None:
1665 1689 if search.is_good_delta_info(candidatedelta):
1666 1690 if self._debug_search:
1667 1691 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1668 1692 msg %= candidatedelta.deltalen
1669 1693 self._write_debug(msg)
1670 1694 nominateddeltas.append(candidatedelta)
1671 1695 elif self._debug_search:
1672 1696 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1673 1697 msg %= candidatedelta.deltalen
1674 1698 self._write_debug(msg)
1675 1699 elif self._debug_search:
1676 1700 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1677 1701 self._write_debug(msg)
1678 1702 if nominateddeltas:
1679 1703 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1680 1704 search.next_group(deltainfo)
1681 1705
1682 1706 if deltainfo is None:
1683 1707 dbg_type = b"full"
1684 1708 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1685 1709 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1686 1710 dbg_type = b"snapshot"
1687 1711 else:
1688 1712 dbg_type = b"delta"
1689 1713
1690 1714 if gather_debug:
1691 1715 end = util.timer()
1692 1716 if dbg_type == b'full':
1693 1717 used_cached = (
1694 1718 cachedelta is not None
1695 1719 and dbg_try_rounds == 0
1696 1720 and dbg_try_count == 0
1697 1721 and cachedelta[0] == nullrev
1698 1722 )
1699 1723 else:
1700 1724 used_cached = (
1701 1725 cachedelta is not None
1702 1726 and dbg_try_rounds == 1
1703 1727 and dbg_try_count == 1
1704 1728 and deltainfo.base == cachedelta[0]
1705 1729 )
1706 1730 dbg['duration'] = end - start
1707 1731 dbg[
1708 1732 'delta-base'
1709 1733 ] = deltainfo.base # pytype: disable=attribute-error
1710 1734 dbg['search_round_count'] = dbg_try_rounds
1711 1735 dbg['using-cached-base'] = used_cached
1712 1736 dbg['delta_try_count'] = dbg_try_count
1713 1737 dbg['type'] = dbg_type
1714 1738 if (
1715 1739 deltainfo.snapshotdepth # pytype: disable=attribute-error
1716 1740 is not None
1717 1741 ):
1718 1742 dbg[
1719 1743 'snapshot-depth'
1720 1744 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1721 1745 else:
1722 1746 dbg['snapshot-depth'] = -1
1723 1747 self._dbg_process_data(dbg)
1724 1748 return deltainfo
1725 1749
1726 1750 def _one_dbg_data(self):
1727 1751 dbg = {
1728 1752 'duration': None,
1729 1753 'revision': None,
1730 1754 'delta-base': None,
1731 1755 'search_round_count': None,
1732 1756 'using-cached-base': None,
1733 1757 'delta_try_count': None,
1734 1758 'type': None,
1735 1759 'p1-chain-len': None,
1736 1760 'p2-chain-len': None,
1737 1761 'snapshot-depth': None,
1738 1762 'target-revlog': None,
1739 1763 }
1740 1764 target_revlog = b"UNKNOWN"
1741 1765 target_type = self.revlog.target[0]
1742 1766 target_key = self.revlog.target[1]
1743 1767 if target_type == KIND_CHANGELOG:
1744 1768 target_revlog = b'CHANGELOG:'
1745 1769 elif target_type == KIND_MANIFESTLOG:
1746 1770 target_revlog = b'MANIFESTLOG:'
1747 1771 if target_key:
1748 1772 target_revlog += b'%s:' % target_key
1749 1773 elif target_type == KIND_FILELOG:
1750 1774 target_revlog = b'FILELOG:'
1751 1775 if target_key:
1752 1776 target_revlog += b'%s:' % target_key
1753 1777 dbg['target-revlog'] = target_revlog
1754 1778 return dbg
1755 1779
1756 1780 def _dbg_process_data(self, dbg):
1757 1781 if self._debug_info is not None:
1758 1782 self._debug_info.append(dbg)
1759 1783
1760 1784 if self._write_debug is not None:
1761 1785 msg = (
1762 1786 b"DBG-DELTAS:"
1763 1787 b" %-12s"
1764 1788 b" rev=%d:"
1765 1789 b" delta-base=%d"
1766 1790 b" is-cached=%d"
1767 1791 b" - search-rounds=%d"
1768 1792 b" try-count=%d"
1769 1793 b" - delta-type=%-6s"
1770 1794 b" snap-depth=%d"
1771 1795 b" - p1-chain-length=%d"
1772 1796 b" p2-chain-length=%d"
1773 1797 b" - duration=%f"
1774 1798 b"\n"
1775 1799 )
1776 1800 msg %= (
1777 1801 dbg["target-revlog"],
1778 1802 dbg["revision"],
1779 1803 dbg["delta-base"],
1780 1804 dbg["using-cached-base"],
1781 1805 dbg["search_round_count"],
1782 1806 dbg["delta_try_count"],
1783 1807 dbg["type"],
1784 1808 dbg["snapshot-depth"],
1785 1809 dbg["p1-chain-len"],
1786 1810 dbg["p2-chain-len"],
1787 1811 dbg["duration"],
1788 1812 )
1789 1813 self._write_debug(msg)
1790 1814
1791 1815
1792 1816 def delta_compression(default_compression_header, deltainfo):
1793 1817 """return (COMPRESSION_MODE, deltainfo)
1794 1818
1795 1819 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1796 1820 compression.
1797 1821 """
1798 1822 h, d = deltainfo.data
1799 1823 compression_mode = COMP_MODE_INLINE
1800 1824 if not h and not d:
1801 1825 # not data to store at all... declare them uncompressed
1802 1826 compression_mode = COMP_MODE_PLAIN
1803 1827 elif not h:
1804 1828 t = d[0:1]
1805 1829 if t == b'\0':
1806 1830 compression_mode = COMP_MODE_PLAIN
1807 1831 elif t == default_compression_header:
1808 1832 compression_mode = COMP_MODE_DEFAULT
1809 1833 elif h == b'u':
1810 1834 # we have a more efficient way to declare uncompressed
1811 1835 h = b''
1812 1836 compression_mode = COMP_MODE_PLAIN
1813 1837 deltainfo = drop_u_compression(deltainfo)
1814 1838 return compression_mode, deltainfo
General Comments 0
You need to be logged in to leave comments. Login now