##// END OF EJS Templates
revlog: use compression engine APIs for decompression...
Gregory Szorc -
r30817:2b279126 default
parent child Browse files
Show More
@@ -39,7 +39,8 b' from . import ('
39
39
40 _pack = struct.pack
40 _pack = struct.pack
41 _unpack = struct.unpack
41 _unpack = struct.unpack
42 _decompress = zlib.decompress
42 # Aliased for performance.
43 _zlibdecompress = zlib.decompress
43
44
44 # revlog header flags
45 # revlog header flags
45 REVLOGV0 = 0
46 REVLOGV0 = 0
@@ -339,6 +340,8 b' class revlog(object):'
339 self._chunkclear()
340 self._chunkclear()
340 # revnum -> (chain-length, sum-delta-length)
341 # revnum -> (chain-length, sum-delta-length)
341 self._chaininfocache = {}
342 self._chaininfocache = {}
343 # revlog header -> revlog compressor
344 self._decompressors = {}
342
345
343 @util.propertycache
346 @util.propertycache
344 def _compressor(self):
347 def _compressor(self):
@@ -1491,17 +1494,52 b' class revlog(object):'
1491 """
1494 """
1492 if not data:
1495 if not data:
1493 return data
1496 return data
1497
1498 # Revlogs are read much more frequently than they are written and many
1499 # chunks only take microseconds to decompress, so performance is
1500 # important here.
1501 #
1502 # We can make a few assumptions about revlogs:
1503 #
1504 # 1) the majority of chunks will be compressed (as opposed to inline
1505 # raw data).
1506 # 2) decompressing *any* data will likely by at least 10x slower than
1507 # returning raw inline data.
1508 # 3) we want to prioritize common and officially supported compression
1509 # engines
1510 #
1511 # It follows that we want to optimize for "decompress compressed data
1512 # when encoded with common and officially supported compression engines"
1513 # case over "raw data" and "data encoded by less common or non-official
1514 # compression engines." That is why we have the inline lookup first
1515 # followed by the compengines lookup.
1516 #
1517 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1518 # compressed chunks. And this matters for changelog and manifest reads.
1494 t = data[0]
1519 t = data[0]
1495 if t == '\0':
1520
1496 return data
1497 if t == 'x':
1521 if t == 'x':
1498 try:
1522 try:
1499 return _decompress(data)
1523 return _zlibdecompress(data)
1500 except zlib.error as e:
1524 except zlib.error as e:
1501 raise RevlogError(_('revlog decompress error: %s') % str(e))
1525 raise RevlogError(_('revlog decompress error: %s') % str(e))
1502 if t == 'u':
1526 # '\0' is more common than 'u' so it goes first.
1527 elif t == '\0':
1528 return data
1529 elif t == 'u':
1503 return util.buffer(data, 1)
1530 return util.buffer(data, 1)
1504 raise RevlogError(_('unknown compression type %r') % t)
1531
1532 try:
1533 compressor = self._decompressors[t]
1534 except KeyError:
1535 try:
1536 engine = util.compengines.forrevlogheader(t)
1537 compressor = engine.revlogcompressor()
1538 self._decompressors[t] = compressor
1539 except KeyError:
1540 raise RevlogError(_('unknown compression type %r') % t)
1541
1542 return compressor.decompress(data)
1505
1543
1506 def _isgooddelta(self, d, textlen):
1544 def _isgooddelta(self, d, textlen):
1507 """Returns True if the given delta is good. Good means that it is within
1545 """Returns True if the given delta is good. Good means that it is within
General Comments 0
You need to be logged in to leave comments. Login now