##// END OF EJS Templates
sparse-revlog: implement algorithm to write sparse delta chains (issue5480)...
Paul Morelle -
r38740:f8762ea7 default
parent child Browse files
Show More
@@ -216,6 +216,9 b' class _testrevlog(object):'
216 216 def length(self, rev):
217 217 return self.end(rev) - self.start(rev)
218 218
219 def __len__(self):
220 return len(self._data)
221
219 222 def _trimchunk(revlog, revs, startidx, endidx=None):
220 223 """returns revs[startidx:endidx] without empty trailing revs
221 224
@@ -293,7 +296,7 b' def _segmentspan(revlog, revs):'
293 296 return 0
294 297 return revlog.end(revs[-1]) - revlog.start(revs[0])
295 298
296 def _slicechunk(revlog, revs, targetsize=None):
299 def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):
297 300 """slice revs to reduce the amount of unrelated data to be read from disk.
298 301
299 302 ``revs`` is sliced into groups that should be read in one time.
@@ -341,20 +344,27 b' def _slicechunk(revlog, revs, targetsize'
341 344 [[1, 2], [5, 8, 10, 11], [14]]
342 345
343 346 Slicing with a maximum chunk size
344 >>> list(_slicechunk(revlog, [0, 11, 13, 15], 15))
347 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
345 348 [[0], [11], [13], [15]]
346 >>> list(_slicechunk(revlog, [0, 11, 13, 15], 20))
349 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
347 350 [[0], [11], [13, 15]]
348 351 """
349 352 if targetsize is not None:
350 353 targetsize = max(targetsize, revlog._srmingapsize)
354 # targetsize should not be specified when evaluating delta candidates:
355 # * targetsize is used to ensure we stay within specification when reading,
356 # * deltainfo is used to pick are good delta chain when writing.
357 if not (deltainfo is None or targetsize is None):
358 msg = 'cannot use `targetsize` with a `deltainfo`'
359 raise error.ProgrammingError(msg)
351 360 for chunk in _slicechunktodensity(revlog, revs,
361 deltainfo,
352 362 revlog._srdensitythreshold,
353 363 revlog._srmingapsize):
354 364 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
355 365 yield subchunk
356 366
357 def _slicechunktosize(revlog, revs, targetsize):
367 def _slicechunktosize(revlog, revs, targetsize=None):
358 368 """slice revs to match the target size
359 369
360 370 This is intended to be used on chunk that density slicing selected by that
@@ -431,12 +441,16 b' def _slicechunktosize(revlog, revs, targ'
431 441 endrevidx = idx
432 442 yield _trimchunk(revlog, revs, startrevidx)
433 443
434 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
444 def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
445 mingapsize=0):
435 446 """slice revs to reduce the amount of unrelated data to be read from disk.
436 447
437 448 ``revs`` is sliced into groups that should be read in one time.
438 449 Assume that revs are sorted.
439 450
451 ``deltainfo`` is a _deltainfo instance of a revision that we would append
452 to the top of the revlog.
453
440 454 The initial chunk is sliced until the overall density (payload/chunks-span
441 455 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
442 456 skipped.
@@ -487,13 +501,21 b' def _slicechunktodensity(revlog, revs, t'
487 501 yield revs
488 502 return
489 503
490 readdata = deltachainspan = _segmentspan(revlog, revs)
504 nextrev = len(revlog)
505 nextoffset = revlog.end(nextrev - 1)
506
507 if deltainfo is None:
508 deltachainspan = _segmentspan(revlog, revs)
509 chainpayload = sum(length(r) for r in revs)
510 else:
511 deltachainspan = deltainfo.distance
512 chainpayload = deltainfo.compresseddeltalen
491 513
492 514 if deltachainspan < mingapsize:
493 515 yield revs
494 516 return
495 517
496 chainpayload = sum(length(r) for r in revs)
518 readdata = deltachainspan
497 519
498 520 if deltachainspan:
499 521 density = chainpayload / float(deltachainspan)
@@ -504,13 +526,21 b' def _slicechunktodensity(revlog, revs, t'
504 526 yield revs
505 527 return
506 528
529 if deltainfo is not None:
530 revs = list(revs)
531 revs.append(nextrev)
532
507 533 # Store the gaps in a heap to have them sorted by decreasing size
508 534 gapsheap = []
509 535 heapq.heapify(gapsheap)
510 536 prevend = None
511 537 for i, rev in enumerate(revs):
512 revstart = start(rev)
513 revlen = length(rev)
538 if rev < nextrev:
539 revstart = start(rev)
540 revlen = length(rev)
541 else:
542 revstart = nextoffset
543 revlen = deltainfo.deltalen
514 544
515 545 # Skip empty revisions to form larger holes
516 546 if revlen == 0:
@@ -1989,7 +2019,7 b' class revlog(object):'
1989 2019 if not self._withsparseread:
1990 2020 slicedchunks = (revs,)
1991 2021 else:
1992 slicedchunks = _slicechunk(self, revs, targetsize)
2022 slicedchunks = _slicechunk(self, revs, targetsize=targetsize)
1993 2023
1994 2024 for revschunk in slicedchunks:
1995 2025 firstrev = revschunk[0]
@@ -2402,13 +2432,33 b' class revlog(object):'
2402 2432 # deltas we need to apply -- bounding it limits the amount of CPU
2403 2433 # we consume.
2404 2434
2405 distance = deltainfo.distance
2435 if self._sparserevlog:
2436 # As sparse-read will be used, we can consider that the distance,
2437 # instead of being the span of the whole chunk,
2438 # is the span of the largest read chunk
2439 base = deltainfo.base
2440
2441 if base != nullrev:
2442 deltachain = self._deltachain(base)[0]
2443 else:
2444 deltachain = []
2445
2446 chunks = _slicechunk(self, deltachain, deltainfo)
2447 distance = max(map(lambda revs:_segmentspan(self, revs), chunks))
2448 else:
2449 distance = deltainfo.distance
2450
2406 2451 textlen = revinfo.textlen
2407 2452 defaultmax = textlen * 4
2408 2453 maxdist = self._maxdeltachainspan
2409 2454 if not maxdist:
2410 2455 maxdist = distance # ensure the conditional pass
2411 2456 maxdist = max(maxdist, defaultmax)
2457 if self._sparserevlog and maxdist < self._srmingapsize:
2458 # In multiple place, we are ignoring irrelevant data range below a
2459 # certain size. Be also apply this tradeoff here and relax span
2460 # constraint for small enought content.
2461 maxdist = self._srmingapsize
2412 2462 if (distance > maxdist or deltainfo.deltalen > textlen or
2413 2463 deltainfo.compresseddeltalen > textlen * 2 or
2414 2464 (self._maxchainlen and deltainfo.chainlen > self._maxchainlen)):
General Comments 0
You need to be logged in to leave comments. Login now