Show More
@@ -3833,14 +3833,16 b' def perfrevlogchunks(ui, repo, file_=Non' | |||||
3833 | def docompress(compressor): |
|
3833 | def docompress(compressor): | |
3834 | rl.clearcaches() |
|
3834 | rl.clearcaches() | |
3835 |
|
3835 | |||
|
3836 | compressor_holder = getattr(rl, '_inner', rl) | |||
|
3837 | ||||
3836 | try: |
|
3838 | try: | |
3837 | # Swap in the requested compression engine. |
|
3839 | # Swap in the requested compression engine. | |
3838 | oldcompressor = rl._compressor |
|
3840 | oldcompressor = compressor_holder._compressor | |
3839 | rl._compressor = compressor |
|
3841 | compressor_holder._compressor = compressor | |
3840 | for chunk in chunks[0]: |
|
3842 | for chunk in chunks[0]: | |
3841 | rl.compress(chunk) |
|
3843 | rl.compress(chunk) | |
3842 | finally: |
|
3844 | finally: | |
3843 | rl._compressor = oldcompressor |
|
3845 | compressor_holder._compressor = oldcompressor | |
3844 |
|
3846 | |||
3845 | benches = [ |
|
3847 | benches = [ | |
3846 | (lambda: doread(), b'read'), |
|
3848 | (lambda: doread(), b'read'), |
@@ -353,7 +353,9 b' class _InnerRevlog:' | |||||
353 | sidedata_file, |
|
353 | sidedata_file, | |
354 | inline, |
|
354 | inline, | |
355 | data_config, |
|
355 | data_config, | |
|
356 | feature_config, | |||
356 | chunk_cache, |
|
357 | chunk_cache, | |
|
358 | default_compression_header, | |||
357 | ): |
|
359 | ): | |
358 | self.opener = opener |
|
360 | self.opener = opener | |
359 | self.index = index |
|
361 | self.index = index | |
@@ -363,6 +365,9 b' class _InnerRevlog:' | |||||
363 | self.sidedata_file = sidedata_file |
|
365 | self.sidedata_file = sidedata_file | |
364 | self.inline = inline |
|
366 | self.inline = inline | |
365 | self.data_config = data_config |
|
367 | self.data_config = data_config | |
|
368 | self.feature_config = feature_config | |||
|
369 | ||||
|
370 | self._default_compression_header = default_compression_header | |||
366 |
|
371 | |||
367 | # index |
|
372 | # index | |
368 |
|
373 | |||
@@ -381,6 +386,9 b' class _InnerRevlog:' | |||||
381 | self.data_config.chunk_cache_size, |
|
386 | self.data_config.chunk_cache_size, | |
382 | ) |
|
387 | ) | |
383 |
|
388 | |||
|
389 | # revlog header -> revlog compressor | |||
|
390 | self._decompressors = {} | |||
|
391 | ||||
384 | @property |
|
392 | @property | |
385 | def index_file(self): |
|
393 | def index_file(self): | |
386 | return self.__index_file |
|
394 | return self.__index_file | |
@@ -405,6 +413,103 b' class _InnerRevlog:' | |||||
405 | """the end of the data chunk for this revision""" |
|
413 | """the end of the data chunk for this revision""" | |
406 | return self.start(rev) + self.length(rev) |
|
414 | return self.start(rev) + self.length(rev) | |
407 |
|
415 | |||
|
416 | @util.propertycache | |||
|
417 | def _compressor(self): | |||
|
418 | engine = util.compengines[self.feature_config.compression_engine] | |||
|
419 | return engine.revlogcompressor( | |||
|
420 | self.feature_config.compression_engine_options | |||
|
421 | ) | |||
|
422 | ||||
|
423 | @util.propertycache | |||
|
424 | def _decompressor(self): | |||
|
425 | """the default decompressor""" | |||
|
426 | if self._default_compression_header is None: | |||
|
427 | return None | |||
|
428 | t = self._default_compression_header | |||
|
429 | c = self._get_decompressor(t) | |||
|
430 | return c.decompress | |||
|
431 | ||||
|
432 | def _get_decompressor(self, t): | |||
|
433 | try: | |||
|
434 | compressor = self._decompressors[t] | |||
|
435 | except KeyError: | |||
|
436 | try: | |||
|
437 | engine = util.compengines.forrevlogheader(t) | |||
|
438 | compressor = engine.revlogcompressor( | |||
|
439 | self.feature_config.compression_engine_options | |||
|
440 | ) | |||
|
441 | self._decompressors[t] = compressor | |||
|
442 | except KeyError: | |||
|
443 | raise error.RevlogError( | |||
|
444 | _(b'unknown compression type %s') % binascii.hexlify(t) | |||
|
445 | ) | |||
|
446 | return compressor | |||
|
447 | ||||
|
448 | def compress(self, data): | |||
|
449 | """Generate a possibly-compressed representation of data.""" | |||
|
450 | if not data: | |||
|
451 | return b'', data | |||
|
452 | ||||
|
453 | compressed = self._compressor.compress(data) | |||
|
454 | ||||
|
455 | if compressed: | |||
|
456 | # The revlog compressor added the header in the returned data. | |||
|
457 | return b'', compressed | |||
|
458 | ||||
|
459 | if data[0:1] == b'\0': | |||
|
460 | return b'', data | |||
|
461 | return b'u', data | |||
|
462 | ||||
|
463 | def decompress(self, data): | |||
|
464 | """Decompress a revlog chunk. | |||
|
465 | ||||
|
466 | The chunk is expected to begin with a header identifying the | |||
|
467 | format type so it can be routed to an appropriate decompressor. | |||
|
468 | """ | |||
|
469 | if not data: | |||
|
470 | return data | |||
|
471 | ||||
|
472 | # Revlogs are read much more frequently than they are written and many | |||
|
473 | # chunks only take microseconds to decompress, so performance is | |||
|
474 | # important here. | |||
|
475 | # | |||
|
476 | # We can make a few assumptions about revlogs: | |||
|
477 | # | |||
|
478 | # 1) the majority of chunks will be compressed (as opposed to inline | |||
|
479 | # raw data). | |||
|
480 | # 2) decompressing *any* data will likely by at least 10x slower than | |||
|
481 | # returning raw inline data. | |||
|
482 | # 3) we want to prioritize common and officially supported compression | |||
|
483 | # engines | |||
|
484 | # | |||
|
485 | # It follows that we want to optimize for "decompress compressed data | |||
|
486 | # when encoded with common and officially supported compression engines" | |||
|
487 | # case over "raw data" and "data encoded by less common or non-official | |||
|
488 | # compression engines." That is why we have the inline lookup first | |||
|
489 | # followed by the compengines lookup. | |||
|
490 | # | |||
|
491 | # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib | |||
|
492 | # compressed chunks. And this matters for changelog and manifest reads. | |||
|
493 | t = data[0:1] | |||
|
494 | ||||
|
495 | if t == b'x': | |||
|
496 | try: | |||
|
497 | return _zlibdecompress(data) | |||
|
498 | except zlib.error as e: | |||
|
499 | raise error.RevlogError( | |||
|
500 | _(b'revlog decompress error: %s') | |||
|
501 | % stringutil.forcebytestr(e) | |||
|
502 | ) | |||
|
503 | # '\0' is more common than 'u' so it goes first. | |||
|
504 | elif t == b'\0': | |||
|
505 | return data | |||
|
506 | elif t == b'u': | |||
|
507 | return util.buffer(data, 1) | |||
|
508 | ||||
|
509 | compressor = self._get_decompressor(t) | |||
|
510 | ||||
|
511 | return compressor.decompress(data) | |||
|
512 | ||||
408 | @contextlib.contextmanager |
|
513 | @contextlib.contextmanager | |
409 | def reading(self): |
|
514 | def reading(self): | |
410 | """Context manager that keeps data and sidedata files open for reading""" |
|
515 | """Context manager that keeps data and sidedata files open for reading""" | |
@@ -1284,12 +1389,15 b' class revlog:' | |||||
1284 | self.index = index |
|
1389 | self.index = index | |
1285 | # revnum -> (chain-length, sum-delta-length) |
|
1390 | # revnum -> (chain-length, sum-delta-length) | |
1286 | self._chaininfocache = util.lrucachedict(500) |
|
1391 | self._chaininfocache = util.lrucachedict(500) | |
1287 | # revlog header -> revlog compressor |
|
|||
1288 | self._decompressors = {} |
|
|||
1289 |
|
1392 | |||
1290 | return chunkcache |
|
1393 | return chunkcache | |
1291 |
|
1394 | |||
1292 | def _load_inner(self, chunk_cache): |
|
1395 | def _load_inner(self, chunk_cache): | |
|
1396 | if self._docket is None: | |||
|
1397 | default_compression_header = None | |||
|
1398 | else: | |||
|
1399 | default_compression_header = self._docket.default_compression_header | |||
|
1400 | ||||
1293 | self._inner = _InnerRevlog( |
|
1401 | self._inner = _InnerRevlog( | |
1294 | opener=self.opener, |
|
1402 | opener=self.opener, | |
1295 | index=self.index, |
|
1403 | index=self.index, | |
@@ -1298,7 +1406,9 b' class revlog:' | |||||
1298 | sidedata_file=self._sidedatafile, |
|
1406 | sidedata_file=self._sidedatafile, | |
1299 | inline=self._inline, |
|
1407 | inline=self._inline, | |
1300 | data_config=self.data_config, |
|
1408 | data_config=self.data_config, | |
|
1409 | feature_config=self.feature_config, | |||
1301 | chunk_cache=chunk_cache, |
|
1410 | chunk_cache=chunk_cache, | |
|
1411 | default_compression_header=default_compression_header, | |||
1302 | ) |
|
1412 | ) | |
1303 |
|
1413 | |||
1304 | def get_revlog(self): |
|
1414 | def get_revlog(self): | |
@@ -1319,38 +1429,6 b' class revlog:' | |||||
1319 | else: |
|
1429 | else: | |
1320 | return self.radix |
|
1430 | return self.radix | |
1321 |
|
1431 | |||
1322 | def _get_decompressor(self, t): |
|
|||
1323 | try: |
|
|||
1324 | compressor = self._decompressors[t] |
|
|||
1325 | except KeyError: |
|
|||
1326 | try: |
|
|||
1327 | engine = util.compengines.forrevlogheader(t) |
|
|||
1328 | compressor = engine.revlogcompressor( |
|
|||
1329 | self.feature_config.compression_engine_options |
|
|||
1330 | ) |
|
|||
1331 | self._decompressors[t] = compressor |
|
|||
1332 | except KeyError: |
|
|||
1333 | raise error.RevlogError( |
|
|||
1334 | _(b'unknown compression type %s') % binascii.hexlify(t) |
|
|||
1335 | ) |
|
|||
1336 | return compressor |
|
|||
1337 |
|
||||
1338 | @util.propertycache |
|
|||
1339 | def _compressor(self): |
|
|||
1340 | engine = util.compengines[self.feature_config.compression_engine] |
|
|||
1341 | return engine.revlogcompressor( |
|
|||
1342 | self.feature_config.compression_engine_options |
|
|||
1343 | ) |
|
|||
1344 |
|
||||
1345 | @util.propertycache |
|
|||
1346 | def _decompressor(self): |
|
|||
1347 | """the default decompressor""" |
|
|||
1348 | if self._docket is None: |
|
|||
1349 | return None |
|
|||
1350 | t = self._docket.default_compression_header |
|
|||
1351 | c = self._get_decompressor(t) |
|
|||
1352 | return c.decompress |
|
|||
1353 |
|
||||
1354 | def _datafp(self, mode=b'r'): |
|
1432 | def _datafp(self, mode=b'r'): | |
1355 | """file object for the revlog's data file""" |
|
1433 | """file object for the revlog's data file""" | |
1356 | return self.opener(self._datafile, mode=mode) |
|
1434 | return self.opener(self._datafile, mode=mode) | |
@@ -2272,9 +2350,9 b' class revlog:' | |||||
2272 | if compression_mode == COMP_MODE_PLAIN: |
|
2350 | if compression_mode == COMP_MODE_PLAIN: | |
2273 | return data |
|
2351 | return data | |
2274 | elif compression_mode == COMP_MODE_DEFAULT: |
|
2352 | elif compression_mode == COMP_MODE_DEFAULT: | |
2275 | return self._decompressor(data) |
|
2353 | return self._inner._decompressor(data) | |
2276 | elif compression_mode == COMP_MODE_INLINE: |
|
2354 | elif compression_mode == COMP_MODE_INLINE: | |
2277 | return self.decompress(data) |
|
2355 | return self._inner.decompress(data) | |
2278 | else: |
|
2356 | else: | |
2279 | msg = b'unknown compression mode %d' |
|
2357 | msg = b'unknown compression mode %d' | |
2280 | msg %= compression_mode |
|
2358 | msg %= compression_mode | |
@@ -2328,9 +2406,9 b' class revlog:' | |||||
2328 | # 2G on Windows |
|
2406 | # 2G on Windows | |
2329 | return [self._chunk(rev) for rev in revschunk] |
|
2407 | return [self._chunk(rev) for rev in revschunk] | |
2330 |
|
2408 | |||
2331 | decomp = self.decompress |
|
2409 | decomp = self._inner.decompress | |
2332 | # self._decompressor might be None, but will not be used in that case |
|
2410 | # self._decompressor might be None, but will not be used in that case | |
2333 | def_decomp = self._decompressor |
|
2411 | def_decomp = self._inner._decompressor | |
2334 | for rev in revschunk: |
|
2412 | for rev in revschunk: | |
2335 | chunkstart = start(rev) |
|
2413 | chunkstart = start(rev) | |
2336 | if inline: |
|
2414 | if inline: | |
@@ -2544,9 +2622,9 b' class revlog:' | |||||
2544 | if comp == COMP_MODE_PLAIN: |
|
2622 | if comp == COMP_MODE_PLAIN: | |
2545 | segment = comp_segment |
|
2623 | segment = comp_segment | |
2546 | elif comp == COMP_MODE_DEFAULT: |
|
2624 | elif comp == COMP_MODE_DEFAULT: | |
2547 | segment = self._decompressor(comp_segment) |
|
2625 | segment = self._inner._decompressor(comp_segment) | |
2548 | elif comp == COMP_MODE_INLINE: |
|
2626 | elif comp == COMP_MODE_INLINE: | |
2549 | segment = self.decompress(comp_segment) |
|
2627 | segment = self._inner.decompress(comp_segment) | |
2550 | else: |
|
2628 | else: | |
2551 | msg = b'unknown compression mode %d' |
|
2629 | msg = b'unknown compression mode %d' | |
2552 | msg %= comp |
|
2630 | msg %= comp | |
@@ -2842,69 +2920,10 b' class revlog:' | |||||
2842 | ) |
|
2920 | ) | |
2843 |
|
2921 | |||
2844 | def compress(self, data): |
|
2922 | def compress(self, data): | |
2845 | """Generate a possibly-compressed representation of data.""" |
|
2923 | return self._inner.compress(data) | |
2846 | if not data: |
|
|||
2847 | return b'', data |
|
|||
2848 |
|
||||
2849 | compressed = self._compressor.compress(data) |
|
|||
2850 |
|
||||
2851 | if compressed: |
|
|||
2852 | # The revlog compressor added the header in the returned data. |
|
|||
2853 | return b'', compressed |
|
|||
2854 |
|
||||
2855 | if data[0:1] == b'\0': |
|
|||
2856 | return b'', data |
|
|||
2857 | return b'u', data |
|
|||
2858 |
|
2924 | |||
2859 | def decompress(self, data): |
|
2925 | def decompress(self, data): | |
2860 | """Decompress a revlog chunk. |
|
2926 | return self._inner.decompress(data) | |
2861 |
|
||||
2862 | The chunk is expected to begin with a header identifying the |
|
|||
2863 | format type so it can be routed to an appropriate decompressor. |
|
|||
2864 | """ |
|
|||
2865 | if not data: |
|
|||
2866 | return data |
|
|||
2867 |
|
||||
2868 | # Revlogs are read much more frequently than they are written and many |
|
|||
2869 | # chunks only take microseconds to decompress, so performance is |
|
|||
2870 | # important here. |
|
|||
2871 | # |
|
|||
2872 | # We can make a few assumptions about revlogs: |
|
|||
2873 | # |
|
|||
2874 | # 1) the majority of chunks will be compressed (as opposed to inline |
|
|||
2875 | # raw data). |
|
|||
2876 | # 2) decompressing *any* data will likely by at least 10x slower than |
|
|||
2877 | # returning raw inline data. |
|
|||
2878 | # 3) we want to prioritize common and officially supported compression |
|
|||
2879 | # engines |
|
|||
2880 | # |
|
|||
2881 | # It follows that we want to optimize for "decompress compressed data |
|
|||
2882 | # when encoded with common and officially supported compression engines" |
|
|||
2883 | # case over "raw data" and "data encoded by less common or non-official |
|
|||
2884 | # compression engines." That is why we have the inline lookup first |
|
|||
2885 | # followed by the compengines lookup. |
|
|||
2886 | # |
|
|||
2887 | # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib |
|
|||
2888 | # compressed chunks. And this matters for changelog and manifest reads. |
|
|||
2889 | t = data[0:1] |
|
|||
2890 |
|
||||
2891 | if t == b'x': |
|
|||
2892 | try: |
|
|||
2893 | return _zlibdecompress(data) |
|
|||
2894 | except zlib.error as e: |
|
|||
2895 | raise error.RevlogError( |
|
|||
2896 | _(b'revlog decompress error: %s') |
|
|||
2897 | % stringutil.forcebytestr(e) |
|
|||
2898 | ) |
|
|||
2899 | # '\0' is more common than 'u' so it goes first. |
|
|||
2900 | elif t == b'\0': |
|
|||
2901 | return data |
|
|||
2902 | elif t == b'u': |
|
|||
2903 | return util.buffer(data, 1) |
|
|||
2904 |
|
||||
2905 | compressor = self._get_decompressor(t) |
|
|||
2906 |
|
||||
2907 | return compressor.decompress(data) |
|
|||
2908 |
|
2927 | |||
2909 | def _addrevision( |
|
2928 | def _addrevision( | |
2910 | self, |
|
2929 | self, | |
@@ -3029,7 +3048,7 b' class revlog:' | |||||
3029 | sidedata_compression_mode = COMP_MODE_PLAIN |
|
3048 | sidedata_compression_mode = COMP_MODE_PLAIN | |
3030 | serialized_sidedata = sidedatautil.serialize_sidedata(sidedata) |
|
3049 | serialized_sidedata = sidedatautil.serialize_sidedata(sidedata) | |
3031 | sidedata_offset = self._docket.sidedata_end |
|
3050 | sidedata_offset = self._docket.sidedata_end | |
3032 | h, comp_sidedata = self.compress(serialized_sidedata) |
|
3051 | h, comp_sidedata = self._inner.compress(serialized_sidedata) | |
3033 | if ( |
|
3052 | if ( | |
3034 | h != b'u' |
|
3053 | h != b'u' | |
3035 | and comp_sidedata[0:1] != b'\0' |
|
3054 | and comp_sidedata[0:1] != b'\0' | |
@@ -3876,7 +3895,7 b' class revlog:' | |||||
3876 | sidedata_compression_mode = COMP_MODE_INLINE |
|
3895 | sidedata_compression_mode = COMP_MODE_INLINE | |
3877 | if serialized_sidedata and self.feature_config.has_side_data: |
|
3896 | if serialized_sidedata and self.feature_config.has_side_data: | |
3878 | sidedata_compression_mode = COMP_MODE_PLAIN |
|
3897 | sidedata_compression_mode = COMP_MODE_PLAIN | |
3879 | h, comp_sidedata = self.compress(serialized_sidedata) |
|
3898 | h, comp_sidedata = self._inner.compress(serialized_sidedata) | |
3880 | if ( |
|
3899 | if ( | |
3881 | h != b'u' |
|
3900 | h != b'u' | |
3882 | and comp_sidedata[0] != b'\0' |
|
3901 | and comp_sidedata[0] != b'\0' |
@@ -1205,7 +1205,7 b' class deltacomputer:' | |||||
1205 | msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n" |
|
1205 | msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n" | |
1206 | self._write_debug(msg) |
|
1206 | self._write_debug(msg) | |
1207 | return None |
|
1207 | return None | |
1208 | header, data = revlog.compress(delta) |
|
1208 | header, data = revlog._inner.compress(delta) | |
1209 | deltalen = len(header) + len(data) |
|
1209 | deltalen = len(header) + len(data) | |
1210 | offset = revlog.end(len(revlog) - 1) |
|
1210 | offset = revlog.end(len(revlog) - 1) | |
1211 | dist = deltalen + offset - revlog.start(chainbase) |
|
1211 | dist = deltalen + offset - revlog.start(chainbase) | |
@@ -1226,7 +1226,7 b' class deltacomputer:' | |||||
1226 |
|
1226 | |||
1227 | def _fullsnapshotinfo(self, revinfo, curr): |
|
1227 | def _fullsnapshotinfo(self, revinfo, curr): | |
1228 | rawtext = self.buildtext(revinfo) |
|
1228 | rawtext = self.buildtext(revinfo) | |
1229 | data = self.revlog.compress(rawtext) |
|
1229 | data = self.revlog._inner.compress(rawtext) | |
1230 | compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0]) |
|
1230 | compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0]) | |
1231 | deltabase = chainbase = curr |
|
1231 | deltabase = chainbase = curr | |
1232 | snapshotdepth = 0 |
|
1232 | snapshotdepth = 0 |
General Comments 0
You need to be logged in to leave comments.
Login now