##// END OF EJS Templates
util: compression APIs to support revlog compression...
Gregory Szorc -
r30794:31e1f0d4 default
parent child Browse files
Show More
@@ -3207,6 +3207,19 b' class compressionengine(object):'
3207 3207 """
3208 3208 raise NotImplementedError()
3209 3209
3210 def revlogcompressor(self, opts=None):
3211 """Obtain an object that can be used to compress revlog entries.
3212
3213 The object has a ``compress(data)`` method that compresses binary
3214 data. This method returns compressed binary data or ``None`` if
3215 the data could not be compressed (too small, not compressible, etc).
3216 The returned data should have a header uniquely identifying this
3217 compression format so decompression can be routed to this engine.
3218
3219 The object is reusable but is not thread safe.
3220 """
3221 raise NotImplementedError()
3222
3210 3223 class _zlibengine(compressionengine):
3211 3224 def name(self):
3212 3225 return 'zlib'
@@ -3241,6 +3254,41 b' class _zlibengine(compressionengine):'
3241 3254
3242 3255 return chunkbuffer(gen())
3243 3256
3257 class zlibrevlogcompressor(object):
3258 def compress(self, data):
3259 insize = len(data)
3260 # Caller handles empty input case.
3261 assert insize > 0
3262
3263 if insize < 44:
3264 return None
3265
3266 elif insize <= 1000000:
3267 compressed = zlib.compress(data)
3268 if len(compressed) < insize:
3269 return compressed
3270 return None
3271
3272 # zlib makes an internal copy of the input buffer, doubling
3273 # memory usage for large inputs. So do streaming compression
3274 # on large inputs.
3275 else:
3276 z = zlib.compressobj()
3277 parts = []
3278 pos = 0
3279 while pos < insize:
3280 pos2 = pos + 2**20
3281 parts.append(z.compress(data[pos:pos2]))
3282 pos = pos2
3283 parts.append(z.flush())
3284
3285 if sum(map(len, parts)) < insize:
3286 return ''.join(parts)
3287 return None
3288
3289 def revlogcompressor(self, opts=None):
3290 return self.zlibrevlogcompressor()
3291
3244 3292 compengines.register(_zlibengine())
3245 3293
3246 3294 class _bz2engine(compressionengine):
@@ -3315,6 +3363,13 b' class _noopengine(compressionengine):'
3315 3363 def decompressorreader(self, fh):
3316 3364 return fh
3317 3365
3366 class nooprevlogcompressor(object):
3367 def compress(self, data):
3368 return None
3369
3370 def revlogcompressor(self, opts=None):
3371 return self.nooprevlogcompressor()
3372
3318 3373 compengines.register(_noopengine())
3319 3374
3320 3375 class _zstdengine(compressionengine):
@@ -3363,6 +3418,49 b' class _zstdengine(compressionengine):'
3363 3418 dctx = zstd.ZstdDecompressor()
3364 3419 return chunkbuffer(dctx.read_from(fh))
3365 3420
3421 class zstdrevlogcompressor(object):
3422 def __init__(self, zstd, level=3):
3423 # Writing the content size adds a few bytes to the output. However,
3424 # it allows decompression to be more optimal since we can
3425 # pre-allocate a buffer to hold the result.
3426 self._cctx = zstd.ZstdCompressor(level=level,
3427 write_content_size=True)
3428 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3429
3430 def compress(self, data):
3431 insize = len(data)
3432 # Caller handles empty input case.
3433 assert insize > 0
3434
3435 if insize < 50:
3436 return None
3437
3438 elif insize <= 1000000:
3439 compressed = self._cctx.compress(data)
3440 if len(compressed) < insize:
3441 return compressed
3442 return None
3443 else:
3444 z = self._cctx.compressobj()
3445 chunks = []
3446 pos = 0
3447 while pos < insize:
3448 pos2 = pos + self._compinsize
3449 chunk = z.compress(data[pos:pos2])
3450 if chunk:
3451 chunks.append(chunk)
3452 pos = pos2
3453 chunks.append(z.flush())
3454
3455 if sum(map(len, chunks)) < insize:
3456 return ''.join(chunks)
3457 return None
3458
3459 def revlogcompressor(self, opts=None):
3460 opts = opts or {}
3461 return self.zstdrevlogcompressor(self._module,
3462 level=opts.get('level', 3))
3463
3366 3464 compengines.register(_zstdengine())
3367 3465
3368 3466 # convenient shortcut
General Comments 0
You need to be logged in to leave comments. Login now