##// END OF EJS Templates
deltas: add code to display information about the result of `finddeltainfo`...
marmoute -
r50121:e6b7c6fb default
parent child Browse files
Show More
@@ -1,1146 +1,1233 b''
1 1 # revlogdeltas.py - Logic around delta computation for revlog
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 # Copyright 2018 Octobus <contact@octobus.net>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8 """Helper class to compute deltas stored inside revlogs"""
9 9
10 10
11 11 import collections
12 12 import struct
13 13
14 14 # import stuff from node for others to import from revlog
15 15 from ..node import nullrev
16 16 from ..i18n import _
17 17 from ..pycompat import getattr
18 18
19 19 from .constants import (
20 20 COMP_MODE_DEFAULT,
21 21 COMP_MODE_INLINE,
22 22 COMP_MODE_PLAIN,
23 KIND_CHANGELOG,
24 KIND_FILELOG,
25 KIND_MANIFESTLOG,
23 26 REVIDX_ISCENSORED,
24 27 REVIDX_RAWTEXT_CHANGING_FLAGS,
25 28 )
26 29
27 30 from ..thirdparty import attr
28 31
29 32 from .. import (
30 33 error,
31 34 mdiff,
32 35 util,
33 36 )
34 37
35 38 from . import flagutil
36 39
37 40 # maximum <delta-chain-data>/<revision-text-length> ratio
38 41 LIMIT_DELTA2TEXT = 2
39 42
40 43
41 44 class _testrevlog:
42 45 """minimalist fake revlog to use in doctests"""
43 46
44 47 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
45 48 """data is an list of revision payload boundaries"""
46 49 self._data = data
47 50 self._srdensitythreshold = density
48 51 self._srmingapsize = mingap
49 52 self._snapshot = set(snapshot)
50 53 self.index = None
51 54
52 55 def start(self, rev):
53 56 if rev == nullrev:
54 57 return 0
55 58 if rev == 0:
56 59 return 0
57 60 return self._data[rev - 1]
58 61
59 62 def end(self, rev):
60 63 if rev == nullrev:
61 64 return 0
62 65 return self._data[rev]
63 66
64 67 def length(self, rev):
65 68 return self.end(rev) - self.start(rev)
66 69
67 70 def __len__(self):
68 71 return len(self._data)
69 72
70 73 def issnapshot(self, rev):
71 74 if rev == nullrev:
72 75 return True
73 76 return rev in self._snapshot
74 77
75 78
76 79 def slicechunk(revlog, revs, targetsize=None):
77 80 """slice revs to reduce the amount of unrelated data to be read from disk.
78 81
79 82 ``revs`` is sliced into groups that should be read in one time.
80 83 Assume that revs are sorted.
81 84
82 85 The initial chunk is sliced until the overall density (payload/chunks-span
83 86 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
84 87 `revlog._srmingapsize` is skipped.
85 88
86 89 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
87 90 For consistency with other slicing choice, this limit won't go lower than
88 91 `revlog._srmingapsize`.
89 92
90 93 If individual revisions chunk are larger than this limit, they will still
91 94 be raised individually.
92 95
93 96 >>> data = [
94 97 ... 5, #00 (5)
95 98 ... 10, #01 (5)
96 99 ... 12, #02 (2)
97 100 ... 12, #03 (empty)
98 101 ... 27, #04 (15)
99 102 ... 31, #05 (4)
100 103 ... 31, #06 (empty)
101 104 ... 42, #07 (11)
102 105 ... 47, #08 (5)
103 106 ... 47, #09 (empty)
104 107 ... 48, #10 (1)
105 108 ... 51, #11 (3)
106 109 ... 74, #12 (23)
107 110 ... 85, #13 (11)
108 111 ... 86, #14 (1)
109 112 ... 91, #15 (5)
110 113 ... ]
111 114 >>> revlog = _testrevlog(data, snapshot=range(16))
112 115
113 116 >>> list(slicechunk(revlog, list(range(16))))
114 117 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
115 118 >>> list(slicechunk(revlog, [0, 15]))
116 119 [[0], [15]]
117 120 >>> list(slicechunk(revlog, [0, 11, 15]))
118 121 [[0], [11], [15]]
119 122 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
120 123 [[0], [11, 13, 15]]
121 124 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
122 125 [[1, 2], [5, 8, 10, 11], [14]]
123 126
124 127 Slicing with a maximum chunk size
125 128 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
126 129 [[0], [11], [13], [15]]
127 130 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
128 131 [[0], [11], [13, 15]]
129 132
130 133 Slicing involving nullrev
131 134 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
132 135 [[-1, 0], [11], [13, 15]]
133 136 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
134 137 [[-1], [13], [15]]
135 138 """
136 139 if targetsize is not None:
137 140 targetsize = max(targetsize, revlog._srmingapsize)
138 141 # targetsize should not be specified when evaluating delta candidates:
139 142 # * targetsize is used to ensure we stay within specification when reading,
140 143 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
141 144 if densityslicing is None:
142 145 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
143 146 for chunk in densityslicing(
144 147 revs, revlog._srdensitythreshold, revlog._srmingapsize
145 148 ):
146 149 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
147 150 yield subchunk
148 151
149 152
150 153 def _slicechunktosize(revlog, revs, targetsize=None):
151 154 """slice revs to match the target size
152 155
153 156 This is intended to be used on chunk that density slicing selected by that
154 157 are still too large compared to the read garantee of revlog. This might
155 158 happens when "minimal gap size" interrupted the slicing or when chain are
156 159 built in a way that create large blocks next to each other.
157 160
158 161 >>> data = [
159 162 ... 3, #0 (3)
160 163 ... 5, #1 (2)
161 164 ... 6, #2 (1)
162 165 ... 8, #3 (2)
163 166 ... 8, #4 (empty)
164 167 ... 11, #5 (3)
165 168 ... 12, #6 (1)
166 169 ... 13, #7 (1)
167 170 ... 14, #8 (1)
168 171 ... ]
169 172
170 173 == All snapshots cases ==
171 174 >>> revlog = _testrevlog(data, snapshot=range(9))
172 175
173 176 Cases where chunk is already small enough
174 177 >>> list(_slicechunktosize(revlog, [0], 3))
175 178 [[0]]
176 179 >>> list(_slicechunktosize(revlog, [6, 7], 3))
177 180 [[6, 7]]
178 181 >>> list(_slicechunktosize(revlog, [0], None))
179 182 [[0]]
180 183 >>> list(_slicechunktosize(revlog, [6, 7], None))
181 184 [[6, 7]]
182 185
183 186 cases where we need actual slicing
184 187 >>> list(_slicechunktosize(revlog, [0, 1], 3))
185 188 [[0], [1]]
186 189 >>> list(_slicechunktosize(revlog, [1, 3], 3))
187 190 [[1], [3]]
188 191 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
189 192 [[1, 2], [3]]
190 193 >>> list(_slicechunktosize(revlog, [3, 5], 3))
191 194 [[3], [5]]
192 195 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
193 196 [[3], [5]]
194 197 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
195 198 [[5], [6, 7, 8]]
196 199 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
197 200 [[0], [1, 2], [3], [5], [6, 7, 8]]
198 201
199 202 Case with too large individual chunk (must return valid chunk)
200 203 >>> list(_slicechunktosize(revlog, [0, 1], 2))
201 204 [[0], [1]]
202 205 >>> list(_slicechunktosize(revlog, [1, 3], 1))
203 206 [[1], [3]]
204 207 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
205 208 [[3], [5]]
206 209
207 210 == No Snapshot cases ==
208 211 >>> revlog = _testrevlog(data)
209 212
210 213 Cases where chunk is already small enough
211 214 >>> list(_slicechunktosize(revlog, [0], 3))
212 215 [[0]]
213 216 >>> list(_slicechunktosize(revlog, [6, 7], 3))
214 217 [[6, 7]]
215 218 >>> list(_slicechunktosize(revlog, [0], None))
216 219 [[0]]
217 220 >>> list(_slicechunktosize(revlog, [6, 7], None))
218 221 [[6, 7]]
219 222
220 223 cases where we need actual slicing
221 224 >>> list(_slicechunktosize(revlog, [0, 1], 3))
222 225 [[0], [1]]
223 226 >>> list(_slicechunktosize(revlog, [1, 3], 3))
224 227 [[1], [3]]
225 228 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
226 229 [[1], [2, 3]]
227 230 >>> list(_slicechunktosize(revlog, [3, 5], 3))
228 231 [[3], [5]]
229 232 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
230 233 [[3], [4, 5]]
231 234 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
232 235 [[5], [6, 7, 8]]
233 236 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
234 237 [[0], [1, 2], [3], [5], [6, 7, 8]]
235 238
236 239 Case with too large individual chunk (must return valid chunk)
237 240 >>> list(_slicechunktosize(revlog, [0, 1], 2))
238 241 [[0], [1]]
239 242 >>> list(_slicechunktosize(revlog, [1, 3], 1))
240 243 [[1], [3]]
241 244 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
242 245 [[3], [5]]
243 246
244 247 == mixed case ==
245 248 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
246 249 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
247 250 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
248 251 """
249 252 assert targetsize is None or 0 <= targetsize
250 253 startdata = revlog.start(revs[0])
251 254 enddata = revlog.end(revs[-1])
252 255 fullspan = enddata - startdata
253 256 if targetsize is None or fullspan <= targetsize:
254 257 yield revs
255 258 return
256 259
257 260 startrevidx = 0
258 261 endrevidx = 1
259 262 iterrevs = enumerate(revs)
260 263 next(iterrevs) # skip first rev.
261 264 # first step: get snapshots out of the way
262 265 for idx, r in iterrevs:
263 266 span = revlog.end(r) - startdata
264 267 snapshot = revlog.issnapshot(r)
265 268 if span <= targetsize and snapshot:
266 269 endrevidx = idx + 1
267 270 else:
268 271 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
269 272 if chunk:
270 273 yield chunk
271 274 startrevidx = idx
272 275 startdata = revlog.start(r)
273 276 endrevidx = idx + 1
274 277 if not snapshot:
275 278 break
276 279
277 280 # for the others, we use binary slicing to quickly converge toward valid
278 281 # chunks (otherwise, we might end up looking for start/end of many
279 282 # revisions). This logic is not looking for the perfect slicing point, it
280 283 # focuses on quickly converging toward valid chunks.
281 284 nbitem = len(revs)
282 285 while (enddata - startdata) > targetsize:
283 286 endrevidx = nbitem
284 287 if nbitem - startrevidx <= 1:
285 288 break # protect against individual chunk larger than limit
286 289 localenddata = revlog.end(revs[endrevidx - 1])
287 290 span = localenddata - startdata
288 291 while span > targetsize:
289 292 if endrevidx - startrevidx <= 1:
290 293 break # protect against individual chunk larger than limit
291 294 endrevidx -= (endrevidx - startrevidx) // 2
292 295 localenddata = revlog.end(revs[endrevidx - 1])
293 296 span = localenddata - startdata
294 297 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
295 298 if chunk:
296 299 yield chunk
297 300 startrevidx = endrevidx
298 301 startdata = revlog.start(revs[startrevidx])
299 302
300 303 chunk = _trimchunk(revlog, revs, startrevidx)
301 304 if chunk:
302 305 yield chunk
303 306
304 307
305 308 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
306 309 """slice revs to reduce the amount of unrelated data to be read from disk.
307 310
308 311 ``revs`` is sliced into groups that should be read in one time.
309 312 Assume that revs are sorted.
310 313
311 314 The initial chunk is sliced until the overall density (payload/chunks-span
312 315 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
313 316 skipped.
314 317
315 318 >>> revlog = _testrevlog([
316 319 ... 5, #00 (5)
317 320 ... 10, #01 (5)
318 321 ... 12, #02 (2)
319 322 ... 12, #03 (empty)
320 323 ... 27, #04 (15)
321 324 ... 31, #05 (4)
322 325 ... 31, #06 (empty)
323 326 ... 42, #07 (11)
324 327 ... 47, #08 (5)
325 328 ... 47, #09 (empty)
326 329 ... 48, #10 (1)
327 330 ... 51, #11 (3)
328 331 ... 74, #12 (23)
329 332 ... 85, #13 (11)
330 333 ... 86, #14 (1)
331 334 ... 91, #15 (5)
332 335 ... ])
333 336
334 337 >>> list(_slicechunktodensity(revlog, list(range(16))))
335 338 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
336 339 >>> list(_slicechunktodensity(revlog, [0, 15]))
337 340 [[0], [15]]
338 341 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
339 342 [[0], [11], [15]]
340 343 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
341 344 [[0], [11, 13, 15]]
342 345 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
343 346 [[1, 2], [5, 8, 10, 11], [14]]
344 347 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
345 348 ... mingapsize=20))
346 349 [[1, 2, 3, 5, 8, 10, 11], [14]]
347 350 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
348 351 ... targetdensity=0.95))
349 352 [[1, 2], [5], [8, 10, 11], [14]]
350 353 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
351 354 ... targetdensity=0.95, mingapsize=12))
352 355 [[1, 2], [5, 8, 10, 11], [14]]
353 356 """
354 357 start = revlog.start
355 358 length = revlog.length
356 359
357 360 if len(revs) <= 1:
358 361 yield revs
359 362 return
360 363
361 364 deltachainspan = segmentspan(revlog, revs)
362 365
363 366 if deltachainspan < mingapsize:
364 367 yield revs
365 368 return
366 369
367 370 readdata = deltachainspan
368 371 chainpayload = sum(length(r) for r in revs)
369 372
370 373 if deltachainspan:
371 374 density = chainpayload / float(deltachainspan)
372 375 else:
373 376 density = 1.0
374 377
375 378 if density >= targetdensity:
376 379 yield revs
377 380 return
378 381
379 382 # Store the gaps in a heap to have them sorted by decreasing size
380 383 gaps = []
381 384 prevend = None
382 385 for i, rev in enumerate(revs):
383 386 revstart = start(rev)
384 387 revlen = length(rev)
385 388
386 389 # Skip empty revisions to form larger holes
387 390 if revlen == 0:
388 391 continue
389 392
390 393 if prevend is not None:
391 394 gapsize = revstart - prevend
392 395 # only consider holes that are large enough
393 396 if gapsize > mingapsize:
394 397 gaps.append((gapsize, i))
395 398
396 399 prevend = revstart + revlen
397 400 # sort the gaps to pop them from largest to small
398 401 gaps.sort()
399 402
400 403 # Collect the indices of the largest holes until the density is acceptable
401 404 selected = []
402 405 while gaps and density < targetdensity:
403 406 gapsize, gapidx = gaps.pop()
404 407
405 408 selected.append(gapidx)
406 409
407 410 # the gap sizes are stored as negatives to be sorted decreasingly
408 411 # by the heap
409 412 readdata -= gapsize
410 413 if readdata > 0:
411 414 density = chainpayload / float(readdata)
412 415 else:
413 416 density = 1.0
414 417 selected.sort()
415 418
416 419 # Cut the revs at collected indices
417 420 previdx = 0
418 421 for idx in selected:
419 422
420 423 chunk = _trimchunk(revlog, revs, previdx, idx)
421 424 if chunk:
422 425 yield chunk
423 426
424 427 previdx = idx
425 428
426 429 chunk = _trimchunk(revlog, revs, previdx)
427 430 if chunk:
428 431 yield chunk
429 432
430 433
431 434 def _trimchunk(revlog, revs, startidx, endidx=None):
432 435 """returns revs[startidx:endidx] without empty trailing revs
433 436
434 437 Doctest Setup
435 438 >>> revlog = _testrevlog([
436 439 ... 5, #0
437 440 ... 10, #1
438 441 ... 12, #2
439 442 ... 12, #3 (empty)
440 443 ... 17, #4
441 444 ... 21, #5
442 445 ... 21, #6 (empty)
443 446 ... ])
444 447
445 448 Contiguous cases:
446 449 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
447 450 [0, 1, 2, 3, 4, 5]
448 451 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
449 452 [0, 1, 2, 3, 4]
450 453 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
451 454 [0, 1, 2]
452 455 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
453 456 [2]
454 457 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
455 458 [3, 4, 5]
456 459 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
457 460 [3, 4]
458 461
459 462 Discontiguous cases:
460 463 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
461 464 [1, 3, 5]
462 465 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
463 466 [1]
464 467 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
465 468 [3, 5]
466 469 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
467 470 [3, 5]
468 471 """
469 472 length = revlog.length
470 473
471 474 if endidx is None:
472 475 endidx = len(revs)
473 476
474 477 # If we have a non-emtpy delta candidate, there are nothing to trim
475 478 if revs[endidx - 1] < len(revlog):
476 479 # Trim empty revs at the end, except the very first revision of a chain
477 480 while (
478 481 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
479 482 ):
480 483 endidx -= 1
481 484
482 485 return revs[startidx:endidx]
483 486
484 487
485 488 def segmentspan(revlog, revs):
486 489 """Get the byte span of a segment of revisions
487 490
488 491 revs is a sorted array of revision numbers
489 492
490 493 >>> revlog = _testrevlog([
491 494 ... 5, #0
492 495 ... 10, #1
493 496 ... 12, #2
494 497 ... 12, #3 (empty)
495 498 ... 17, #4
496 499 ... ])
497 500
498 501 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
499 502 17
500 503 >>> segmentspan(revlog, [0, 4])
501 504 17
502 505 >>> segmentspan(revlog, [3, 4])
503 506 5
504 507 >>> segmentspan(revlog, [1, 2, 3,])
505 508 7
506 509 >>> segmentspan(revlog, [1, 3])
507 510 7
508 511 """
509 512 if not revs:
510 513 return 0
511 514 end = revlog.end(revs[-1])
512 515 return end - revlog.start(revs[0])
513 516
514 517
515 518 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
516 519 """build full text from a (base, delta) pair and other metadata"""
517 520 # special case deltas which replace entire base; no need to decode
518 521 # base revision. this neatly avoids censored bases, which throw when
519 522 # they're decoded.
520 523 hlen = struct.calcsize(b">lll")
521 524 if delta[:hlen] == mdiff.replacediffheader(
522 525 revlog.rawsize(baserev), len(delta) - hlen
523 526 ):
524 527 fulltext = delta[hlen:]
525 528 else:
526 529 # deltabase is rawtext before changed by flag processors, which is
527 530 # equivalent to non-raw text
528 531 basetext = revlog.revision(baserev, _df=fh)
529 532 fulltext = mdiff.patch(basetext, delta)
530 533
531 534 try:
532 535 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
533 536 if validatehash:
534 537 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
535 538 if flags & REVIDX_ISCENSORED:
536 539 raise error.StorageError(
537 540 _(b'node %s is not censored') % expectednode
538 541 )
539 542 except error.CensoredNodeError:
540 543 # must pass the censored index flag to add censored revisions
541 544 if not flags & REVIDX_ISCENSORED:
542 545 raise
543 546 return fulltext
544 547
545 548
546 549 @attr.s(slots=True, frozen=True)
547 550 class _deltainfo:
548 551 distance = attr.ib()
549 552 deltalen = attr.ib()
550 553 data = attr.ib()
551 554 base = attr.ib()
552 555 chainbase = attr.ib()
553 556 chainlen = attr.ib()
554 557 compresseddeltalen = attr.ib()
555 558 snapshotdepth = attr.ib()
556 559
557 560
558 561 def drop_u_compression(delta):
559 562 """turn into a "u" (no-compression) into no-compression without header
560 563
561 564 This is useful for revlog format that has better compression method.
562 565 """
563 566 assert delta.data[0] == b'u', delta.data[0]
564 567 return _deltainfo(
565 568 delta.distance,
566 569 delta.deltalen - 1,
567 570 (b'', delta.data[1]),
568 571 delta.base,
569 572 delta.chainbase,
570 573 delta.chainlen,
571 574 delta.compresseddeltalen,
572 575 delta.snapshotdepth,
573 576 )
574 577
575 578
576 579 def isgooddeltainfo(revlog, deltainfo, revinfo):
577 580 """Returns True if the given delta is good. Good means that it is within
578 581 the disk span, disk size, and chain length bounds that we know to be
579 582 performant."""
580 583 if deltainfo is None:
581 584 return False
582 585
583 586 # - 'deltainfo.distance' is the distance from the base revision --
584 587 # bounding it limits the amount of I/O we need to do.
585 588 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
586 589 # deltas we need to apply -- bounding it limits the amount of CPU
587 590 # we consume.
588 591
589 592 textlen = revinfo.textlen
590 593 defaultmax = textlen * 4
591 594 maxdist = revlog._maxdeltachainspan
592 595 if not maxdist:
593 596 maxdist = deltainfo.distance # ensure the conditional pass
594 597 maxdist = max(maxdist, defaultmax)
595 598
596 599 # Bad delta from read span:
597 600 #
598 601 # If the span of data read is larger than the maximum allowed.
599 602 #
600 603 # In the sparse-revlog case, we rely on the associated "sparse reading"
601 604 # to avoid issue related to the span of data. In theory, it would be
602 605 # possible to build pathological revlog where delta pattern would lead
603 606 # to too many reads. However, they do not happen in practice at all. So
604 607 # we skip the span check entirely.
605 608 if not revlog._sparserevlog and maxdist < deltainfo.distance:
606 609 return False
607 610
608 611 # Bad delta from new delta size:
609 612 #
610 613 # If the delta size is larger than the target text, storing the
611 614 # delta will be inefficient.
612 615 if textlen < deltainfo.deltalen:
613 616 return False
614 617
615 618 # Bad delta from cumulated payload size:
616 619 #
617 620 # If the sum of delta get larger than K * target text length.
618 621 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
619 622 return False
620 623
621 624 # Bad delta from chain length:
622 625 #
623 626 # If the number of delta in the chain gets too high.
624 627 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
625 628 return False
626 629
627 630 # bad delta from intermediate snapshot size limit
628 631 #
629 632 # If an intermediate snapshot size is higher than the limit. The
630 633 # limit exist to prevent endless chain of intermediate delta to be
631 634 # created.
632 635 if (
633 636 deltainfo.snapshotdepth is not None
634 637 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
635 638 ):
636 639 return False
637 640
638 641 # bad delta if new intermediate snapshot is larger than the previous
639 642 # snapshot
640 643 if (
641 644 deltainfo.snapshotdepth
642 645 and revlog.length(deltainfo.base) < deltainfo.deltalen
643 646 ):
644 647 return False
645 648
646 649 return True
647 650
648 651
649 652 # If a revision's full text is that much bigger than a base candidate full
650 653 # text's, it is very unlikely that it will produce a valid delta. We no longer
651 654 # consider these candidates.
652 655 LIMIT_BASE2TEXT = 500
653 656
654 657
655 658 def _candidategroups(revlog, textlen, p1, p2, cachedelta):
656 659 """Provides group of revision to be tested as delta base
657 660
658 661 This top level function focus on emitting groups with unique and worthwhile
659 662 content. See _raw_candidate_groups for details about the group order.
660 663 """
661 664 # should we try to build a delta?
662 665 if not (len(revlog) and revlog._storedeltachains):
663 666 yield None
664 667 return
665 668
666 669 deltalength = revlog.length
667 670 deltaparent = revlog.deltaparent
668 671 sparse = revlog._sparserevlog
669 672 good = None
670 673
671 674 deltas_limit = textlen * LIMIT_DELTA2TEXT
672 675
673 676 tested = {nullrev}
674 677 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
675 678 while True:
676 679 temptative = candidates.send(good)
677 680 if temptative is None:
678 681 break
679 682 group = []
680 683 for rev in temptative:
681 684 # skip over empty delta (no need to include them in a chain)
682 685 while revlog._generaldelta and not (
683 686 rev == nullrev or rev in tested or deltalength(rev)
684 687 ):
685 688 tested.add(rev)
686 689 rev = deltaparent(rev)
687 690 # no need to try a delta against nullrev, this will be done as a
688 691 # last resort.
689 692 if rev == nullrev:
690 693 continue
691 694 # filter out revision we tested already
692 695 if rev in tested:
693 696 continue
694 697 tested.add(rev)
695 698 # filter out delta base that will never produce good delta
696 699 if deltas_limit < revlog.length(rev):
697 700 continue
698 701 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
699 702 continue
700 703 # no delta for rawtext-changing revs (see "candelta" for why)
701 704 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
702 705 continue
703 706 # If we reach here, we are about to build and test a delta.
704 707 # The delta building process will compute the chaininfo in all
705 708 # case, since that computation is cached, it is fine to access it
706 709 # here too.
707 710 chainlen, chainsize = revlog._chaininfo(rev)
708 711 # if chain will be too long, skip base
709 712 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
710 713 continue
711 714 # if chain already have too much data, skip base
712 715 if deltas_limit < chainsize:
713 716 continue
714 717 if sparse and revlog.upperboundcomp is not None:
715 718 maxcomp = revlog.upperboundcomp
716 719 basenotsnap = (p1, p2, nullrev)
717 720 if rev not in basenotsnap and revlog.issnapshot(rev):
718 721 snapshotdepth = revlog.snapshotdepth(rev)
719 722 # If text is significantly larger than the base, we can
720 723 # expect the resulting delta to be proportional to the size
721 724 # difference
722 725 revsize = revlog.rawsize(rev)
723 726 rawsizedistance = max(textlen - revsize, 0)
724 727 # use an estimate of the compression upper bound.
725 728 lowestrealisticdeltalen = rawsizedistance // maxcomp
726 729
727 730 # check the absolute constraint on the delta size
728 731 snapshotlimit = textlen >> snapshotdepth
729 732 if snapshotlimit < lowestrealisticdeltalen:
730 733 # delta lower bound is larger than accepted upper bound
731 734 continue
732 735
733 736 # check the relative constraint on the delta size
734 737 revlength = revlog.length(rev)
735 738 if revlength < lowestrealisticdeltalen:
736 739 # delta probable lower bound is larger than target base
737 740 continue
738 741
739 742 group.append(rev)
740 743 if group:
741 744 # XXX: in the sparse revlog case, group can become large,
742 745 # impacting performances. Some bounding or slicing mecanism
743 746 # would help to reduce this impact.
744 747 good = yield tuple(group)
745 748 yield None
746 749
747 750
748 751 def _findsnapshots(revlog, cache, start_rev):
749 752 """find snapshot from start_rev to tip"""
750 753 if util.safehasattr(revlog.index, b'findsnapshots'):
751 754 revlog.index.findsnapshots(cache, start_rev)
752 755 else:
753 756 deltaparent = revlog.deltaparent
754 757 issnapshot = revlog.issnapshot
755 758 for rev in revlog.revs(start_rev):
756 759 if issnapshot(rev):
757 760 cache[deltaparent(rev)].append(rev)
758 761
759 762
760 763 def _refinedgroups(revlog, p1, p2, cachedelta):
761 764 good = None
762 765 # First we try to reuse a the delta contained in the bundle.
763 766 # (or from the source revlog)
764 767 #
765 768 # This logic only applies to general delta repositories and can be disabled
766 769 # through configuration. Disabling reuse source delta is useful when
767 770 # we want to make sure we recomputed "optimal" deltas.
768 771 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
769 772 # Assume what we received from the server is a good choice
770 773 # build delta will reuse the cache
771 774 good = yield (cachedelta[0],)
772 775 if good is not None:
773 776 yield None
774 777 return
775 778 snapshots = collections.defaultdict(list)
776 779 for candidates in _rawgroups(revlog, p1, p2, cachedelta, snapshots):
777 780 good = yield candidates
778 781 if good is not None:
779 782 break
780 783
781 784 # If sparse revlog is enabled, we can try to refine the available deltas
782 785 if not revlog._sparserevlog:
783 786 yield None
784 787 return
785 788
786 789 # if we have a refinable value, try to refine it
787 790 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
788 791 # refine snapshot down
789 792 previous = None
790 793 while previous != good:
791 794 previous = good
792 795 base = revlog.deltaparent(good)
793 796 if base == nullrev:
794 797 break
795 798 good = yield (base,)
796 799 # refine snapshot up
797 800 if not snapshots:
798 801 _findsnapshots(revlog, snapshots, good + 1)
799 802 previous = None
800 803 while good != previous:
801 804 previous = good
802 805 children = tuple(sorted(c for c in snapshots[good]))
803 806 good = yield children
804 807
805 808 # we have found nothing
806 809 yield None
807 810
808 811
809 812 def _rawgroups(revlog, p1, p2, cachedelta, snapshots=None):
810 813 """Provides group of revision to be tested as delta base
811 814
812 815 This lower level function focus on emitting delta theorically interresting
813 816 without looking it any practical details.
814 817
815 818 The group order aims at providing fast or small candidates first.
816 819 """
817 820 gdelta = revlog._generaldelta
818 821 # gate sparse behind general-delta because of issue6056
819 822 sparse = gdelta and revlog._sparserevlog
820 823 curr = len(revlog)
821 824 prev = curr - 1
822 825 deltachain = lambda rev: revlog._deltachain(rev)[0]
823 826
824 827 if gdelta:
825 828 # exclude already lazy tested base if any
826 829 parents = [p for p in (p1, p2) if p != nullrev]
827 830
828 831 if not revlog._deltabothparents and len(parents) == 2:
829 832 parents.sort()
830 833 # To minimize the chance of having to build a fulltext,
831 834 # pick first whichever parent is closest to us (max rev)
832 835 yield (parents[1],)
833 836 # then the other one (min rev) if the first did not fit
834 837 yield (parents[0],)
835 838 elif len(parents) > 0:
836 839 # Test all parents (1 or 2), and keep the best candidate
837 840 yield parents
838 841
839 842 if sparse and parents:
840 843 if snapshots is None:
841 844 # map: base-rev: snapshot-rev
842 845 snapshots = collections.defaultdict(list)
843 846 # See if we can use an existing snapshot in the parent chains to use as
844 847 # a base for a new intermediate-snapshot
845 848 #
846 849 # search for snapshot in parents delta chain
847 850 # map: snapshot-level: snapshot-rev
848 851 parents_snaps = collections.defaultdict(set)
849 852 candidate_chains = [deltachain(p) for p in parents]
850 853 for chain in candidate_chains:
851 854 for idx, s in enumerate(chain):
852 855 if not revlog.issnapshot(s):
853 856 break
854 857 parents_snaps[idx].add(s)
855 858 snapfloor = min(parents_snaps[0]) + 1
856 859 _findsnapshots(revlog, snapshots, snapfloor)
857 860 # search for the highest "unrelated" revision
858 861 #
859 862 # Adding snapshots used by "unrelated" revision increase the odd we
860 863 # reuse an independant, yet better snapshot chain.
861 864 #
862 865 # XXX instead of building a set of revisions, we could lazily enumerate
863 866 # over the chains. That would be more efficient, however we stick to
864 867 # simple code for now.
865 868 all_revs = set()
866 869 for chain in candidate_chains:
867 870 all_revs.update(chain)
868 871 other = None
869 872 for r in revlog.revs(prev, snapfloor):
870 873 if r not in all_revs:
871 874 other = r
872 875 break
873 876 if other is not None:
874 877 # To avoid unfair competition, we won't use unrelated intermediate
875 878 # snapshot that are deeper than the ones from the parent delta
876 879 # chain.
877 880 max_depth = max(parents_snaps.keys())
878 881 chain = deltachain(other)
879 882 for idx, s in enumerate(chain):
880 883 if s < snapfloor:
881 884 continue
882 885 if max_depth < idx:
883 886 break
884 887 if not revlog.issnapshot(s):
885 888 break
886 889 parents_snaps[idx].add(s)
887 890 # Test them as possible intermediate snapshot base
888 891 # We test them from highest to lowest level. High level one are more
889 892 # likely to result in small delta
890 893 floor = None
891 894 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
892 895 siblings = set()
893 896 for s in snaps:
894 897 siblings.update(snapshots[s])
895 898 # Before considering making a new intermediate snapshot, we check
896 899 # if an existing snapshot, children of base we consider, would be
897 900 # suitable.
898 901 #
899 902 # It give a change to reuse a delta chain "unrelated" to the
900 903 # current revision instead of starting our own. Without such
901 904 # re-use, topological branches would keep reopening new chains.
902 905 # Creating more and more snapshot as the repository grow.
903 906
904 907 if floor is not None:
905 908 # We only do this for siblings created after the one in our
906 909 # parent's delta chain. Those created before has less chances
907 910 # to be valid base since our ancestors had to create a new
908 911 # snapshot.
909 912 siblings = [r for r in siblings if floor < r]
910 913 yield tuple(sorted(siblings))
911 914 # then test the base from our parent's delta chain.
912 915 yield tuple(sorted(snaps))
913 916 floor = min(snaps)
914 917 # No suitable base found in the parent chain, search if any full
915 918 # snapshots emitted since parent's base would be a suitable base for an
916 919 # intermediate snapshot.
917 920 #
918 921 # It give a chance to reuse a delta chain unrelated to the current
919 922 # revisions instead of starting our own. Without such re-use,
920 923 # topological branches would keep reopening new full chains. Creating
921 924 # more and more snapshot as the repository grow.
922 925 yield tuple(snapshots[nullrev])
923 926
924 927 if not sparse:
925 928 # other approach failed try against prev to hopefully save us a
926 929 # fulltext.
927 930 yield (prev,)
928 931
929 932
930 933 class deltacomputer:
931 def __init__(self, revlog):
934 def __init__(self, revlog, write_debug=None):
932 935 self.revlog = revlog
936 self._write_debug = write_debug
933 937
934 938 def buildtext(self, revinfo, fh):
935 939 """Builds a fulltext version of a revision
936 940
937 941 revinfo: revisioninfo instance that contains all needed info
938 942 fh: file handle to either the .i or the .d revlog file,
939 943 depending on whether it is inlined or not
940 944 """
941 945 btext = revinfo.btext
942 946 if btext[0] is not None:
943 947 return btext[0]
944 948
945 949 revlog = self.revlog
946 950 cachedelta = revinfo.cachedelta
947 951 baserev = cachedelta[0]
948 952 delta = cachedelta[1]
949 953
950 954 fulltext = btext[0] = _textfromdelta(
951 955 fh,
952 956 revlog,
953 957 baserev,
954 958 delta,
955 959 revinfo.p1,
956 960 revinfo.p2,
957 961 revinfo.flags,
958 962 revinfo.node,
959 963 )
960 964 return fulltext
961 965
962 966 def _builddeltadiff(self, base, revinfo, fh):
963 967 revlog = self.revlog
964 968 t = self.buildtext(revinfo, fh)
965 969 if revlog.iscensored(base):
966 970 # deltas based on a censored revision must replace the
967 971 # full content in one patch, so delta works everywhere
968 972 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
969 973 delta = header + t
970 974 else:
971 975 ptext = revlog.rawdata(base, _df=fh)
972 976 delta = mdiff.textdiff(ptext, t)
973 977
974 978 return delta
975 979
976 980 def _builddeltainfo(self, revinfo, base, fh):
977 981 # can we use the cached delta?
978 982 revlog = self.revlog
979 983 chainbase = revlog.chainbase(base)
980 984 if revlog._generaldelta:
981 985 deltabase = base
982 986 else:
983 987 deltabase = chainbase
984 988 snapshotdepth = None
985 989 if revlog._sparserevlog and deltabase == nullrev:
986 990 snapshotdepth = 0
987 991 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
988 992 # A delta chain should always be one full snapshot,
989 993 # zero or more semi-snapshots, and zero or more deltas
990 994 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
991 995 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
992 996 snapshotdepth = len(revlog._deltachain(deltabase)[0])
993 997 delta = None
994 998 if revinfo.cachedelta:
995 999 cachebase, cachediff = revinfo.cachedelta
996 1000 # check if the diff still apply
997 1001 currentbase = cachebase
998 1002 while (
999 1003 currentbase != nullrev
1000 1004 and currentbase != base
1001 1005 and self.revlog.length(currentbase) == 0
1002 1006 ):
1003 1007 currentbase = self.revlog.deltaparent(currentbase)
1004 1008 if self.revlog._lazydelta and currentbase == base:
1005 1009 delta = revinfo.cachedelta[1]
1006 1010 if delta is None:
1007 1011 delta = self._builddeltadiff(base, revinfo, fh)
1008 1012 # snapshotdept need to be neither None nor 0 level snapshot
1009 1013 if revlog.upperboundcomp is not None and snapshotdepth:
1010 1014 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
1011 1015 snapshotlimit = revinfo.textlen >> snapshotdepth
1012 1016 if snapshotlimit < lowestrealisticdeltalen:
1013 1017 return None
1014 1018 if revlog.length(base) < lowestrealisticdeltalen:
1015 1019 return None
1016 1020 header, data = revlog.compress(delta)
1017 1021 deltalen = len(header) + len(data)
1018 1022 offset = revlog.end(len(revlog) - 1)
1019 1023 dist = deltalen + offset - revlog.start(chainbase)
1020 1024 chainlen, compresseddeltalen = revlog._chaininfo(base)
1021 1025 chainlen += 1
1022 1026 compresseddeltalen += deltalen
1023 1027
1024 1028 return _deltainfo(
1025 1029 dist,
1026 1030 deltalen,
1027 1031 (header, data),
1028 1032 deltabase,
1029 1033 chainbase,
1030 1034 chainlen,
1031 1035 compresseddeltalen,
1032 1036 snapshotdepth,
1033 1037 )
1034 1038
1035 1039 def _fullsnapshotinfo(self, fh, revinfo, curr):
1036 1040 rawtext = self.buildtext(revinfo, fh)
1037 1041 data = self.revlog.compress(rawtext)
1038 1042 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1039 1043 deltabase = chainbase = curr
1040 1044 snapshotdepth = 0
1041 1045 chainlen = 1
1042 1046
1043 1047 return _deltainfo(
1044 1048 dist,
1045 1049 deltalen,
1046 1050 data,
1047 1051 deltabase,
1048 1052 chainbase,
1049 1053 chainlen,
1050 1054 compresseddeltalen,
1051 1055 snapshotdepth,
1052 1056 )
1053 1057
1054 1058 def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):
1055 1059 """Find an acceptable delta against a candidate revision
1056 1060
1057 1061 revinfo: information about the revision (instance of _revisioninfo)
1058 1062 fh: file handle to either the .i or the .d revlog file,
1059 1063 depending on whether it is inlined or not
1060 1064
1061 1065 Returns the first acceptable candidate revision, as ordered by
1062 1066 _candidategroups
1063 1067
1064 1068 If no suitable deltabase is found, we return delta info for a full
1065 1069 snapshot.
1066 1070
1067 1071 `excluded_bases` is an optional set of revision that cannot be used as
1068 1072 a delta base. Use this to recompute delta suitable in censor or strip
1069 1073 context.
1070 1074 """
1071 1075 if target_rev is None:
1072 1076 target_rev = len(self.revlog)
1073 1077
1074 1078 if not revinfo.textlen:
1075 1079 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1076 1080
1077 1081 if excluded_bases is None:
1078 1082 excluded_bases = set()
1079 1083
1080 1084 # no delta for flag processor revision (see "candelta" for why)
1081 1085 # not calling candelta since only one revision needs test, also to
1082 1086 # avoid overhead fetching flags again.
1083 1087 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1084 1088 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1085 1089
1090 if self._write_debug is not None:
1091 start = util.timer()
1092
1093 # count the number of different delta we tried (for debug purpose)
1094 dbg_try_count = 0
1095 # count the number of "search round" we did. (for debug purpose)
1096 dbg_try_rounds = 0
1097 dbg_type = b'unknown'
1098
1086 1099 cachedelta = revinfo.cachedelta
1087 1100 p1 = revinfo.p1
1088 1101 p2 = revinfo.p2
1089 1102 revlog = self.revlog
1090 1103
1091 1104 deltainfo = None
1092 1105 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
1106
1107 if self._write_debug is not None:
1108 if p1r != nullrev:
1109 p1_chain_len = revlog._chaininfo(p1r)[0]
1110 else:
1111 p1_chain_len = -1
1112 if p2r != nullrev:
1113 p2_chain_len = revlog._chaininfo(p2r)[0]
1114 else:
1115 p2_chain_len = -1
1116
1093 1117 groups = _candidategroups(
1094 1118 self.revlog, revinfo.textlen, p1r, p2r, cachedelta
1095 1119 )
1096 1120 candidaterevs = next(groups)
1097 1121 while candidaterevs is not None:
1122 dbg_try_rounds += 1
1098 1123 nominateddeltas = []
1099 1124 if deltainfo is not None:
1100 1125 # if we already found a good delta,
1101 1126 # challenge it against refined candidates
1102 1127 nominateddeltas.append(deltainfo)
1103 1128 for candidaterev in candidaterevs:
1104 1129 if candidaterev in excluded_bases:
1105 1130 continue
1106 1131 if candidaterev >= target_rev:
1107 1132 continue
1133 dbg_try_count += 1
1108 1134 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
1109 1135 if candidatedelta is not None:
1110 1136 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
1111 1137 nominateddeltas.append(candidatedelta)
1112 1138 if nominateddeltas:
1113 1139 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1114 1140 if deltainfo is not None:
1115 1141 candidaterevs = groups.send(deltainfo.base)
1116 1142 else:
1117 1143 candidaterevs = next(groups)
1118 1144
1119 1145 if deltainfo is None:
1146 dbg_type = b"full"
1120 1147 deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
1148 elif deltainfo.snapshotdepth:
1149 dbg_type = b"snapshot"
1150 else:
1151 dbg_type = b"delta"
1152
1153 if self._write_debug is not None:
1154 end = util.timer()
1155 dbg = {
1156 'duration': end - start,
1157 'revision': target_rev,
1158 'search_round_count': dbg_try_rounds,
1159 'delta_try_count': dbg_try_count,
1160 'type': dbg_type,
1161 'p1-chain-len': p1_chain_len,
1162 'p2-chain-len': p2_chain_len,
1163 }
1164 if deltainfo.snapshotdepth is not None:
1165 dbg['snapshot-depth'] = deltainfo.snapshotdepth
1166 else:
1167 dbg['snapshot-depth'] = 0
1168 target_revlog = b"UNKNOWN"
1169 target_type = self.revlog.target[0]
1170 target_key = self.revlog.target[1]
1171 if target_type == KIND_CHANGELOG:
1172 target_revlog = b'CHANGELOG:'
1173 elif target_type == KIND_MANIFESTLOG:
1174 target_revlog = b'MANIFESTLOG:'
1175 if target_key:
1176 target_revlog += b'%s:' % target_key
1177 elif target_type == KIND_FILELOG:
1178 target_revlog = b'FILELOG:'
1179 if target_key:
1180 target_revlog += b'%s:' % target_key
1181 dbg['target-revlog'] = target_revlog
1182
1183 msg = (
1184 b"DBG-DELTAS:"
1185 b" %-12s"
1186 b" rev=%d:"
1187 b" search-rounds=%d"
1188 b" try-count=%d"
1189 b" - delta-type=%-6s"
1190 b" snap-depth=%d"
1191 b" - p1-chain-length=%d"
1192 b" p2-chain-length=%d"
1193 b" - duration=%f"
1194 b"\n"
1195 )
1196 msg %= (
1197 dbg["target-revlog"],
1198 dbg["revision"],
1199 dbg["search_round_count"],
1200 dbg["delta_try_count"],
1201 dbg["type"],
1202 dbg["snapshot-depth"],
1203 dbg["p1-chain-len"],
1204 dbg["p2-chain-len"],
1205 dbg["duration"],
1206 )
1207 self._write_debug(msg)
1121 1208 return deltainfo
1122 1209
1123 1210
1124 1211 def delta_compression(default_compression_header, deltainfo):
1125 1212 """return (COMPRESSION_MODE, deltainfo)
1126 1213
1127 1214 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1128 1215 compression.
1129 1216 """
1130 1217 h, d = deltainfo.data
1131 1218 compression_mode = COMP_MODE_INLINE
1132 1219 if not h and not d:
1133 1220 # not data to store at all... declare them uncompressed
1134 1221 compression_mode = COMP_MODE_PLAIN
1135 1222 elif not h:
1136 1223 t = d[0:1]
1137 1224 if t == b'\0':
1138 1225 compression_mode = COMP_MODE_PLAIN
1139 1226 elif t == default_compression_header:
1140 1227 compression_mode = COMP_MODE_DEFAULT
1141 1228 elif h == b'u':
1142 1229 # we have a more efficient way to declare uncompressed
1143 1230 h = b''
1144 1231 compression_mode = COMP_MODE_PLAIN
1145 1232 deltainfo = drop_u_compression(deltainfo)
1146 1233 return compression_mode, deltainfo
General Comments 0
You need to be logged in to leave comments. Login now