##// END OF EJS Templates
sparse-revlog: implement algorithm to write sparse delta chains (issue5480)...
Paul Morelle -
r38740:f8762ea7 default
parent child Browse files
Show More
@@ -216,6 +216,9 b' class _testrevlog(object):'
216 def length(self, rev):
216 def length(self, rev):
217 return self.end(rev) - self.start(rev)
217 return self.end(rev) - self.start(rev)
218
218
219 def __len__(self):
220 return len(self._data)
221
219 def _trimchunk(revlog, revs, startidx, endidx=None):
222 def _trimchunk(revlog, revs, startidx, endidx=None):
220 """returns revs[startidx:endidx] without empty trailing revs
223 """returns revs[startidx:endidx] without empty trailing revs
221
224
@@ -293,7 +296,7 b' def _segmentspan(revlog, revs):'
293 return 0
296 return 0
294 return revlog.end(revs[-1]) - revlog.start(revs[0])
297 return revlog.end(revs[-1]) - revlog.start(revs[0])
295
298
296 def _slicechunk(revlog, revs, targetsize=None):
299 def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):
297 """slice revs to reduce the amount of unrelated data to be read from disk.
300 """slice revs to reduce the amount of unrelated data to be read from disk.
298
301
299 ``revs`` is sliced into groups that should be read in one time.
302 ``revs`` is sliced into groups that should be read in one time.
@@ -341,20 +344,27 b' def _slicechunk(revlog, revs, targetsize'
341 [[1, 2], [5, 8, 10, 11], [14]]
344 [[1, 2], [5, 8, 10, 11], [14]]
342
345
343 Slicing with a maximum chunk size
346 Slicing with a maximum chunk size
344 >>> list(_slicechunk(revlog, [0, 11, 13, 15], 15))
347 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
345 [[0], [11], [13], [15]]
348 [[0], [11], [13], [15]]
346 >>> list(_slicechunk(revlog, [0, 11, 13, 15], 20))
349 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
347 [[0], [11], [13, 15]]
350 [[0], [11], [13, 15]]
348 """
351 """
349 if targetsize is not None:
352 if targetsize is not None:
350 targetsize = max(targetsize, revlog._srmingapsize)
353 targetsize = max(targetsize, revlog._srmingapsize)
354 # targetsize should not be specified when evaluating delta candidates:
355 # * targetsize is used to ensure we stay within specification when reading,
356 # * deltainfo is used to pick are good delta chain when writing.
357 if not (deltainfo is None or targetsize is None):
358 msg = 'cannot use `targetsize` with a `deltainfo`'
359 raise error.ProgrammingError(msg)
351 for chunk in _slicechunktodensity(revlog, revs,
360 for chunk in _slicechunktodensity(revlog, revs,
361 deltainfo,
352 revlog._srdensitythreshold,
362 revlog._srdensitythreshold,
353 revlog._srmingapsize):
363 revlog._srmingapsize):
354 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
364 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
355 yield subchunk
365 yield subchunk
356
366
357 def _slicechunktosize(revlog, revs, targetsize):
367 def _slicechunktosize(revlog, revs, targetsize=None):
358 """slice revs to match the target size
368 """slice revs to match the target size
359
369
360 This is intended to be used on chunk that density slicing selected by that
370 This is intended to be used on chunk that density slicing selected by that
@@ -431,12 +441,16 b' def _slicechunktosize(revlog, revs, targ'
431 endrevidx = idx
441 endrevidx = idx
432 yield _trimchunk(revlog, revs, startrevidx)
442 yield _trimchunk(revlog, revs, startrevidx)
433
443
434 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
444 def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
445 mingapsize=0):
435 """slice revs to reduce the amount of unrelated data to be read from disk.
446 """slice revs to reduce the amount of unrelated data to be read from disk.
436
447
437 ``revs`` is sliced into groups that should be read in one time.
448 ``revs`` is sliced into groups that should be read in one time.
438 Assume that revs are sorted.
449 Assume that revs are sorted.
439
450
451 ``deltainfo`` is a _deltainfo instance of a revision that we would append
452 to the top of the revlog.
453
440 The initial chunk is sliced until the overall density (payload/chunks-span
454 The initial chunk is sliced until the overall density (payload/chunks-span
441 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
455 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
442 skipped.
456 skipped.
@@ -487,13 +501,21 b' def _slicechunktodensity(revlog, revs, t'
487 yield revs
501 yield revs
488 return
502 return
489
503
490 readdata = deltachainspan = _segmentspan(revlog, revs)
504 nextrev = len(revlog)
505 nextoffset = revlog.end(nextrev - 1)
506
507 if deltainfo is None:
508 deltachainspan = _segmentspan(revlog, revs)
509 chainpayload = sum(length(r) for r in revs)
510 else:
511 deltachainspan = deltainfo.distance
512 chainpayload = deltainfo.compresseddeltalen
491
513
492 if deltachainspan < mingapsize:
514 if deltachainspan < mingapsize:
493 yield revs
515 yield revs
494 return
516 return
495
517
496 chainpayload = sum(length(r) for r in revs)
518 readdata = deltachainspan
497
519
498 if deltachainspan:
520 if deltachainspan:
499 density = chainpayload / float(deltachainspan)
521 density = chainpayload / float(deltachainspan)
@@ -504,13 +526,21 b' def _slicechunktodensity(revlog, revs, t'
504 yield revs
526 yield revs
505 return
527 return
506
528
529 if deltainfo is not None:
530 revs = list(revs)
531 revs.append(nextrev)
532
507 # Store the gaps in a heap to have them sorted by decreasing size
533 # Store the gaps in a heap to have them sorted by decreasing size
508 gapsheap = []
534 gapsheap = []
509 heapq.heapify(gapsheap)
535 heapq.heapify(gapsheap)
510 prevend = None
536 prevend = None
511 for i, rev in enumerate(revs):
537 for i, rev in enumerate(revs):
512 revstart = start(rev)
538 if rev < nextrev:
513 revlen = length(rev)
539 revstart = start(rev)
540 revlen = length(rev)
541 else:
542 revstart = nextoffset
543 revlen = deltainfo.deltalen
514
544
515 # Skip empty revisions to form larger holes
545 # Skip empty revisions to form larger holes
516 if revlen == 0:
546 if revlen == 0:
@@ -1989,7 +2019,7 b' class revlog(object):'
1989 if not self._withsparseread:
2019 if not self._withsparseread:
1990 slicedchunks = (revs,)
2020 slicedchunks = (revs,)
1991 else:
2021 else:
1992 slicedchunks = _slicechunk(self, revs, targetsize)
2022 slicedchunks = _slicechunk(self, revs, targetsize=targetsize)
1993
2023
1994 for revschunk in slicedchunks:
2024 for revschunk in slicedchunks:
1995 firstrev = revschunk[0]
2025 firstrev = revschunk[0]
@@ -2402,13 +2432,33 b' class revlog(object):'
2402 # deltas we need to apply -- bounding it limits the amount of CPU
2432 # deltas we need to apply -- bounding it limits the amount of CPU
2403 # we consume.
2433 # we consume.
2404
2434
2405 distance = deltainfo.distance
2435 if self._sparserevlog:
2436 # As sparse-read will be used, we can consider that the distance,
2437 # instead of being the span of the whole chunk,
2438 # is the span of the largest read chunk
2439 base = deltainfo.base
2440
2441 if base != nullrev:
2442 deltachain = self._deltachain(base)[0]
2443 else:
2444 deltachain = []
2445
2446 chunks = _slicechunk(self, deltachain, deltainfo)
2447 distance = max(map(lambda revs:_segmentspan(self, revs), chunks))
2448 else:
2449 distance = deltainfo.distance
2450
2406 textlen = revinfo.textlen
2451 textlen = revinfo.textlen
2407 defaultmax = textlen * 4
2452 defaultmax = textlen * 4
2408 maxdist = self._maxdeltachainspan
2453 maxdist = self._maxdeltachainspan
2409 if not maxdist:
2454 if not maxdist:
2410 maxdist = distance # ensure the conditional pass
2455 maxdist = distance # ensure the conditional pass
2411 maxdist = max(maxdist, defaultmax)
2456 maxdist = max(maxdist, defaultmax)
2457 if self._sparserevlog and maxdist < self._srmingapsize:
2458 # In multiple place, we are ignoring irrelevant data range below a
2459 # certain size. Be also apply this tradeoff here and relax span
2460 # constraint for small enought content.
2461 maxdist = self._srmingapsize
2412 if (distance > maxdist or deltainfo.deltalen > textlen or
2462 if (distance > maxdist or deltainfo.deltalen > textlen or
2413 deltainfo.compresseddeltalen > textlen * 2 or
2463 deltainfo.compresseddeltalen > textlen * 2 or
2414 (self._maxchainlen and deltainfo.chainlen > self._maxchainlen)):
2464 (self._maxchainlen and deltainfo.chainlen > self._maxchainlen)):
General Comments 0
You need to be logged in to leave comments. Login now