##// END OF EJS Templates
revlog: move the compression/decompression logic on the inner object...
marmoute -
r51984:a8270490 default
parent child Browse files
Show More
@@ -3833,14 +3833,16 b' def perfrevlogchunks(ui, repo, file_=Non'
3833 def docompress(compressor):
3833 def docompress(compressor):
3834 rl.clearcaches()
3834 rl.clearcaches()
3835
3835
3836 compressor_holder = getattr(rl, '_inner', rl)
3837
3836 try:
3838 try:
3837 # Swap in the requested compression engine.
3839 # Swap in the requested compression engine.
3838 oldcompressor = rl._compressor
3840 oldcompressor = compressor_holder._compressor
3839 rl._compressor = compressor
3841 compressor_holder._compressor = compressor
3840 for chunk in chunks[0]:
3842 for chunk in chunks[0]:
3841 rl.compress(chunk)
3843 rl.compress(chunk)
3842 finally:
3844 finally:
3843 rl._compressor = oldcompressor
3845 compressor_holder._compressor = oldcompressor
3844
3846
3845 benches = [
3847 benches = [
3846 (lambda: doread(), b'read'),
3848 (lambda: doread(), b'read'),
@@ -353,7 +353,9 b' class _InnerRevlog:'
353 sidedata_file,
353 sidedata_file,
354 inline,
354 inline,
355 data_config,
355 data_config,
356 feature_config,
356 chunk_cache,
357 chunk_cache,
358 default_compression_header,
357 ):
359 ):
358 self.opener = opener
360 self.opener = opener
359 self.index = index
361 self.index = index
@@ -363,6 +365,9 b' class _InnerRevlog:'
363 self.sidedata_file = sidedata_file
365 self.sidedata_file = sidedata_file
364 self.inline = inline
366 self.inline = inline
365 self.data_config = data_config
367 self.data_config = data_config
368 self.feature_config = feature_config
369
370 self._default_compression_header = default_compression_header
366
371
367 # index
372 # index
368
373
@@ -381,6 +386,9 b' class _InnerRevlog:'
381 self.data_config.chunk_cache_size,
386 self.data_config.chunk_cache_size,
382 )
387 )
383
388
389 # revlog header -> revlog compressor
390 self._decompressors = {}
391
384 @property
392 @property
385 def index_file(self):
393 def index_file(self):
386 return self.__index_file
394 return self.__index_file
@@ -405,6 +413,103 b' class _InnerRevlog:'
405 """the end of the data chunk for this revision"""
413 """the end of the data chunk for this revision"""
406 return self.start(rev) + self.length(rev)
414 return self.start(rev) + self.length(rev)
407
415
416 @util.propertycache
417 def _compressor(self):
418 engine = util.compengines[self.feature_config.compression_engine]
419 return engine.revlogcompressor(
420 self.feature_config.compression_engine_options
421 )
422
423 @util.propertycache
424 def _decompressor(self):
425 """the default decompressor"""
426 if self._default_compression_header is None:
427 return None
428 t = self._default_compression_header
429 c = self._get_decompressor(t)
430 return c.decompress
431
432 def _get_decompressor(self, t):
433 try:
434 compressor = self._decompressors[t]
435 except KeyError:
436 try:
437 engine = util.compengines.forrevlogheader(t)
438 compressor = engine.revlogcompressor(
439 self.feature_config.compression_engine_options
440 )
441 self._decompressors[t] = compressor
442 except KeyError:
443 raise error.RevlogError(
444 _(b'unknown compression type %s') % binascii.hexlify(t)
445 )
446 return compressor
447
448 def compress(self, data):
449 """Generate a possibly-compressed representation of data."""
450 if not data:
451 return b'', data
452
453 compressed = self._compressor.compress(data)
454
455 if compressed:
456 # The revlog compressor added the header in the returned data.
457 return b'', compressed
458
459 if data[0:1] == b'\0':
460 return b'', data
461 return b'u', data
462
463 def decompress(self, data):
464 """Decompress a revlog chunk.
465
466 The chunk is expected to begin with a header identifying the
467 format type so it can be routed to an appropriate decompressor.
468 """
469 if not data:
470 return data
471
472 # Revlogs are read much more frequently than they are written and many
473 # chunks only take microseconds to decompress, so performance is
474 # important here.
475 #
476 # We can make a few assumptions about revlogs:
477 #
478 # 1) the majority of chunks will be compressed (as opposed to inline
479 # raw data).
480 # 2) decompressing *any* data will likely by at least 10x slower than
481 # returning raw inline data.
482 # 3) we want to prioritize common and officially supported compression
483 # engines
484 #
485 # It follows that we want to optimize for "decompress compressed data
486 # when encoded with common and officially supported compression engines"
487 # case over "raw data" and "data encoded by less common or non-official
488 # compression engines." That is why we have the inline lookup first
489 # followed by the compengines lookup.
490 #
491 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
492 # compressed chunks. And this matters for changelog and manifest reads.
493 t = data[0:1]
494
495 if t == b'x':
496 try:
497 return _zlibdecompress(data)
498 except zlib.error as e:
499 raise error.RevlogError(
500 _(b'revlog decompress error: %s')
501 % stringutil.forcebytestr(e)
502 )
503 # '\0' is more common than 'u' so it goes first.
504 elif t == b'\0':
505 return data
506 elif t == b'u':
507 return util.buffer(data, 1)
508
509 compressor = self._get_decompressor(t)
510
511 return compressor.decompress(data)
512
408 @contextlib.contextmanager
513 @contextlib.contextmanager
409 def reading(self):
514 def reading(self):
410 """Context manager that keeps data and sidedata files open for reading"""
515 """Context manager that keeps data and sidedata files open for reading"""
@@ -1284,12 +1389,15 b' class revlog:'
1284 self.index = index
1389 self.index = index
1285 # revnum -> (chain-length, sum-delta-length)
1390 # revnum -> (chain-length, sum-delta-length)
1286 self._chaininfocache = util.lrucachedict(500)
1391 self._chaininfocache = util.lrucachedict(500)
1287 # revlog header -> revlog compressor
1288 self._decompressors = {}
1289
1392
1290 return chunkcache
1393 return chunkcache
1291
1394
1292 def _load_inner(self, chunk_cache):
1395 def _load_inner(self, chunk_cache):
1396 if self._docket is None:
1397 default_compression_header = None
1398 else:
1399 default_compression_header = self._docket.default_compression_header
1400
1293 self._inner = _InnerRevlog(
1401 self._inner = _InnerRevlog(
1294 opener=self.opener,
1402 opener=self.opener,
1295 index=self.index,
1403 index=self.index,
@@ -1298,7 +1406,9 b' class revlog:'
1298 sidedata_file=self._sidedatafile,
1406 sidedata_file=self._sidedatafile,
1299 inline=self._inline,
1407 inline=self._inline,
1300 data_config=self.data_config,
1408 data_config=self.data_config,
1409 feature_config=self.feature_config,
1301 chunk_cache=chunk_cache,
1410 chunk_cache=chunk_cache,
1411 default_compression_header=default_compression_header,
1302 )
1412 )
1303
1413
1304 def get_revlog(self):
1414 def get_revlog(self):
@@ -1319,38 +1429,6 b' class revlog:'
1319 else:
1429 else:
1320 return self.radix
1430 return self.radix
1321
1431
1322 def _get_decompressor(self, t):
1323 try:
1324 compressor = self._decompressors[t]
1325 except KeyError:
1326 try:
1327 engine = util.compengines.forrevlogheader(t)
1328 compressor = engine.revlogcompressor(
1329 self.feature_config.compression_engine_options
1330 )
1331 self._decompressors[t] = compressor
1332 except KeyError:
1333 raise error.RevlogError(
1334 _(b'unknown compression type %s') % binascii.hexlify(t)
1335 )
1336 return compressor
1337
1338 @util.propertycache
1339 def _compressor(self):
1340 engine = util.compengines[self.feature_config.compression_engine]
1341 return engine.revlogcompressor(
1342 self.feature_config.compression_engine_options
1343 )
1344
1345 @util.propertycache
1346 def _decompressor(self):
1347 """the default decompressor"""
1348 if self._docket is None:
1349 return None
1350 t = self._docket.default_compression_header
1351 c = self._get_decompressor(t)
1352 return c.decompress
1353
1354 def _datafp(self, mode=b'r'):
1432 def _datafp(self, mode=b'r'):
1355 """file object for the revlog's data file"""
1433 """file object for the revlog's data file"""
1356 return self.opener(self._datafile, mode=mode)
1434 return self.opener(self._datafile, mode=mode)
@@ -2272,9 +2350,9 b' class revlog:'
2272 if compression_mode == COMP_MODE_PLAIN:
2350 if compression_mode == COMP_MODE_PLAIN:
2273 return data
2351 return data
2274 elif compression_mode == COMP_MODE_DEFAULT:
2352 elif compression_mode == COMP_MODE_DEFAULT:
2275 return self._decompressor(data)
2353 return self._inner._decompressor(data)
2276 elif compression_mode == COMP_MODE_INLINE:
2354 elif compression_mode == COMP_MODE_INLINE:
2277 return self.decompress(data)
2355 return self._inner.decompress(data)
2278 else:
2356 else:
2279 msg = b'unknown compression mode %d'
2357 msg = b'unknown compression mode %d'
2280 msg %= compression_mode
2358 msg %= compression_mode
@@ -2328,9 +2406,9 b' class revlog:'
2328 # 2G on Windows
2406 # 2G on Windows
2329 return [self._chunk(rev) for rev in revschunk]
2407 return [self._chunk(rev) for rev in revschunk]
2330
2408
2331 decomp = self.decompress
2409 decomp = self._inner.decompress
2332 # self._decompressor might be None, but will not be used in that case
2410 # self._decompressor might be None, but will not be used in that case
2333 def_decomp = self._decompressor
2411 def_decomp = self._inner._decompressor
2334 for rev in revschunk:
2412 for rev in revschunk:
2335 chunkstart = start(rev)
2413 chunkstart = start(rev)
2336 if inline:
2414 if inline:
@@ -2544,9 +2622,9 b' class revlog:'
2544 if comp == COMP_MODE_PLAIN:
2622 if comp == COMP_MODE_PLAIN:
2545 segment = comp_segment
2623 segment = comp_segment
2546 elif comp == COMP_MODE_DEFAULT:
2624 elif comp == COMP_MODE_DEFAULT:
2547 segment = self._decompressor(comp_segment)
2625 segment = self._inner._decompressor(comp_segment)
2548 elif comp == COMP_MODE_INLINE:
2626 elif comp == COMP_MODE_INLINE:
2549 segment = self.decompress(comp_segment)
2627 segment = self._inner.decompress(comp_segment)
2550 else:
2628 else:
2551 msg = b'unknown compression mode %d'
2629 msg = b'unknown compression mode %d'
2552 msg %= comp
2630 msg %= comp
@@ -2842,69 +2920,10 b' class revlog:'
2842 )
2920 )
2843
2921
2844 def compress(self, data):
2922 def compress(self, data):
2845 """Generate a possibly-compressed representation of data."""
2923 return self._inner.compress(data)
2846 if not data:
2847 return b'', data
2848
2849 compressed = self._compressor.compress(data)
2850
2851 if compressed:
2852 # The revlog compressor added the header in the returned data.
2853 return b'', compressed
2854
2855 if data[0:1] == b'\0':
2856 return b'', data
2857 return b'u', data
2858
2924
2859 def decompress(self, data):
2925 def decompress(self, data):
2860 """Decompress a revlog chunk.
2926 return self._inner.decompress(data)
2861
2862 The chunk is expected to begin with a header identifying the
2863 format type so it can be routed to an appropriate decompressor.
2864 """
2865 if not data:
2866 return data
2867
2868 # Revlogs are read much more frequently than they are written and many
2869 # chunks only take microseconds to decompress, so performance is
2870 # important here.
2871 #
2872 # We can make a few assumptions about revlogs:
2873 #
2874 # 1) the majority of chunks will be compressed (as opposed to inline
2875 # raw data).
2876 # 2) decompressing *any* data will likely by at least 10x slower than
2877 # returning raw inline data.
2878 # 3) we want to prioritize common and officially supported compression
2879 # engines
2880 #
2881 # It follows that we want to optimize for "decompress compressed data
2882 # when encoded with common and officially supported compression engines"
2883 # case over "raw data" and "data encoded by less common or non-official
2884 # compression engines." That is why we have the inline lookup first
2885 # followed by the compengines lookup.
2886 #
2887 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2888 # compressed chunks. And this matters for changelog and manifest reads.
2889 t = data[0:1]
2890
2891 if t == b'x':
2892 try:
2893 return _zlibdecompress(data)
2894 except zlib.error as e:
2895 raise error.RevlogError(
2896 _(b'revlog decompress error: %s')
2897 % stringutil.forcebytestr(e)
2898 )
2899 # '\0' is more common than 'u' so it goes first.
2900 elif t == b'\0':
2901 return data
2902 elif t == b'u':
2903 return util.buffer(data, 1)
2904
2905 compressor = self._get_decompressor(t)
2906
2907 return compressor.decompress(data)
2908
2927
2909 def _addrevision(
2928 def _addrevision(
2910 self,
2929 self,
@@ -3029,7 +3048,7 b' class revlog:'
3029 sidedata_compression_mode = COMP_MODE_PLAIN
3048 sidedata_compression_mode = COMP_MODE_PLAIN
3030 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3049 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3031 sidedata_offset = self._docket.sidedata_end
3050 sidedata_offset = self._docket.sidedata_end
3032 h, comp_sidedata = self.compress(serialized_sidedata)
3051 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3033 if (
3052 if (
3034 h != b'u'
3053 h != b'u'
3035 and comp_sidedata[0:1] != b'\0'
3054 and comp_sidedata[0:1] != b'\0'
@@ -3876,7 +3895,7 b' class revlog:'
3876 sidedata_compression_mode = COMP_MODE_INLINE
3895 sidedata_compression_mode = COMP_MODE_INLINE
3877 if serialized_sidedata and self.feature_config.has_side_data:
3896 if serialized_sidedata and self.feature_config.has_side_data:
3878 sidedata_compression_mode = COMP_MODE_PLAIN
3897 sidedata_compression_mode = COMP_MODE_PLAIN
3879 h, comp_sidedata = self.compress(serialized_sidedata)
3898 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3880 if (
3899 if (
3881 h != b'u'
3900 h != b'u'
3882 and comp_sidedata[0] != b'\0'
3901 and comp_sidedata[0] != b'\0'
@@ -1205,7 +1205,7 b' class deltacomputer:'
1205 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1205 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1206 self._write_debug(msg)
1206 self._write_debug(msg)
1207 return None
1207 return None
1208 header, data = revlog.compress(delta)
1208 header, data = revlog._inner.compress(delta)
1209 deltalen = len(header) + len(data)
1209 deltalen = len(header) + len(data)
1210 offset = revlog.end(len(revlog) - 1)
1210 offset = revlog.end(len(revlog) - 1)
1211 dist = deltalen + offset - revlog.start(chainbase)
1211 dist = deltalen + offset - revlog.start(chainbase)
@@ -1226,7 +1226,7 b' class deltacomputer:'
1226
1226
1227 def _fullsnapshotinfo(self, revinfo, curr):
1227 def _fullsnapshotinfo(self, revinfo, curr):
1228 rawtext = self.buildtext(revinfo)
1228 rawtext = self.buildtext(revinfo)
1229 data = self.revlog.compress(rawtext)
1229 data = self.revlog._inner.compress(rawtext)
1230 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1230 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1231 deltabase = chainbase = curr
1231 deltabase = chainbase = curr
1232 snapshotdepth = 0
1232 snapshotdepth = 0
General Comments 0
You need to be logged in to leave comments. Login now