##// END OF EJS Templates
revlog: introduce an experimental flag to slice chunks reads when too sparse...
Paul Morelle -
r34825:e2ad93bc default
parent child Browse files
Show More
@@ -411,6 +411,12 b" coreconfigitem('experimental', 'revlogv2"
411 coreconfigitem('experimental', 'spacemovesdown',
411 coreconfigitem('experimental', 'spacemovesdown',
412 default=False,
412 default=False,
413 )
413 )
414 coreconfigitem('experimental', 'sparse-read',
415 default=False,
416 )
417 coreconfigitem('experimental', 'sparse-read.density-threshold',
418 default=0.25,
419 )
414 coreconfigitem('experimental', 'treemanifest',
420 coreconfigitem('experimental', 'treemanifest',
415 default=False,
421 default=False,
416 )
422 )
@@ -608,6 +608,11 b' class localrepository(object):'
608 'mmapindexthreshold')
608 'mmapindexthreshold')
609 if mmapindexthreshold is not None:
609 if mmapindexthreshold is not None:
610 self.svfs.options['mmapindexthreshold'] = mmapindexthreshold
610 self.svfs.options['mmapindexthreshold'] = mmapindexthreshold
611 withsparseread = self.ui.configbool('experimental', 'sparse-read')
612 srdensitythres = float(self.ui.config('experimental',
613 'sparse-read.density-threshold'))
614 self.svfs.options['with-sparse-read'] = withsparseread
615 self.svfs.options['sparse-read-density-threshold'] = srdensitythres
611
616
612 for r in self.requirements:
617 for r in self.requirements:
613 if r.startswith('exp-compression-'):
618 if r.startswith('exp-compression-'):
@@ -161,6 +161,59 b' def hash(text, p1, p2):'
161 s.update(text)
161 s.update(text)
162 return s.digest()
162 return s.digest()
163
163
164 def _slicechunk(revlog, revs):
165 """slice revs to reduce the amount of unrelated data to be read from disk.
166
167 ``revs`` is sliced into groups that should be read in one time.
168 Assume that revs are sorted.
169 """
170 start = revlog.start
171 length = revlog.length
172
173 chunkqueue = collections.deque()
174 chunkqueue.append((revs, 0))
175
176 while chunkqueue:
177 revs, depth = chunkqueue.popleft()
178
179 startbyte = start(revs[0])
180 endbyte = start(revs[-1]) + length(revs[-1])
181 deltachainspan = endbyte - startbyte
182
183 if len(revs) <= 1:
184 yield revs
185 continue
186
187 # Find where is the largest hole (this is where we would split) and
188 # sum up the lengths of useful data to compute the density of the span
189 textlen = 0
190 prevend = None
191 largesthole = 0
192 idxlargesthole = -1
193 for i, rev in enumerate(revs):
194 revstart = start(rev)
195 revlen = length(rev)
196
197 if prevend is not None:
198 hole = revstart - prevend
199 if hole > largesthole:
200 largesthole = hole
201 idxlargesthole = i
202
203 textlen += revlen
204 prevend = revstart + revlen
205
206 density = textlen / float(deltachainspan) if deltachainspan > 0 else 1.0
207
208 if density > revlog._srdensitythreshold:
209 yield revs
210 continue
211
212 # Add the left and right parts so that they will be sliced
213 # recursively too
214 chunkqueue.append((revs[:idxlargesthole], depth + 1))
215 chunkqueue.append((revs[idxlargesthole:], depth + 1))
216
164 # index v0:
217 # index v0:
165 # 4 bytes: offset
218 # 4 bytes: offset
166 # 4 bytes: compressed length
219 # 4 bytes: compressed length
@@ -305,6 +358,8 b' class revlog(object):'
305 self._nodepos = None
358 self._nodepos = None
306 self._compengine = 'zlib'
359 self._compengine = 'zlib'
307 self._maxdeltachainspan = -1
360 self._maxdeltachainspan = -1
361 self._withsparseread = False
362 self._srdensitythreshold = 0.25
308
363
309 mmapindexthreshold = None
364 mmapindexthreshold = None
310 v = REVLOG_DEFAULT_VERSION
365 v = REVLOG_DEFAULT_VERSION
@@ -331,6 +386,9 b' class revlog(object):'
331 self._maxdeltachainspan = opts['maxdeltachainspan']
386 self._maxdeltachainspan = opts['maxdeltachainspan']
332 if mmaplargeindex and 'mmapindexthreshold' in opts:
387 if mmaplargeindex and 'mmapindexthreshold' in opts:
333 mmapindexthreshold = opts['mmapindexthreshold']
388 mmapindexthreshold = opts['mmapindexthreshold']
389 self._withsparseread = bool(opts.get('with-sparse-read', False))
390 if 'sparse-read-density-threshold' in opts:
391 self._srdensitythreshold = opts['sparse-read-density-threshold']
334
392
335 if self._chunkcachesize <= 0:
393 if self._chunkcachesize <= 0:
336 raise RevlogError(_('revlog chunk cache size %r is not greater '
394 raise RevlogError(_('revlog chunk cache size %r is not greater '
@@ -1327,26 +1385,32 b' class revlog(object):'
1327 l = []
1385 l = []
1328 ladd = l.append
1386 ladd = l.append
1329
1387
1330 firstrev = revs[0]
1388 if not self._withsparseread:
1331 # Skip trailing revisions with empty diff
1389 slicedchunks = (revs,)
1332 for lastrev in revs[::-1]:
1390 else:
1333 if length(lastrev) != 0:
1391 slicedchunks = _slicechunk(self, revs)
1334 break
1392
1393 for revschunk in slicedchunks:
1394 firstrev = revschunk[0]
1395 # Skip trailing revisions with empty diff
1396 for lastrev in revschunk[::-1]:
1397 if length(lastrev) != 0:
1398 break
1335
1399
1336 try:
1400 try:
1337 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1401 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1338 except OverflowError:
1402 except OverflowError:
1339 # issue4215 - we can't cache a run of chunks greater than
1403 # issue4215 - we can't cache a run of chunks greater than
1340 # 2G on Windows
1404 # 2G on Windows
1341 return [self._chunk(rev, df=df) for rev in revs]
1405 return [self._chunk(rev, df=df) for rev in revschunk]
1342
1406
1343 decomp = self.decompress
1407 decomp = self.decompress
1344 for rev in revs:
1408 for rev in revschunk:
1345 chunkstart = start(rev)
1409 chunkstart = start(rev)
1346 if inline:
1410 if inline:
1347 chunkstart += (rev + 1) * iosize
1411 chunkstart += (rev + 1) * iosize
1348 chunklength = length(rev)
1412 chunklength = length(rev)
1349 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1413 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1350
1414
1351 return l
1415 return l
1352
1416
General Comments 0
You need to be logged in to leave comments. Login now