##// END OF EJS Templates
revlog: support none compression...
Joerg Sonnenberger -
r46737:84130fd7 default
parent child Browse files
Show More
@@ -1,808 +1,810 b''
1 1 # compression.py - Mercurial utility functions for compression
2 2 #
3 3 # This software may be used and distributed according to the terms of the
4 4 # GNU General Public License version 2 or any later version.
5 5
6 6
7 7 from __future__ import absolute_import, print_function
8 8
9 9 import bz2
10 10 import collections
11 11 import zlib
12 12
13 13 from ..pycompat import getattr
14 14 from .. import (
15 15 error,
16 16 i18n,
17 17 pycompat,
18 18 )
19 19 from . import stringutil
20 20
21 21 safehasattr = pycompat.safehasattr
22 22
23 23
24 24 _ = i18n._
25 25
26 26 # compression code
27 27
28 28 SERVERROLE = b'server'
29 29 CLIENTROLE = b'client'
30 30
31 31 compewireprotosupport = collections.namedtuple(
32 32 'compenginewireprotosupport',
33 33 ('name', 'serverpriority', 'clientpriority'),
34 34 )
35 35
36 36
37 37 class propertycache(object):
38 38 def __init__(self, func):
39 39 self.func = func
40 40 self.name = func.__name__
41 41
42 42 def __get__(self, obj, type=None):
43 43 result = self.func(obj)
44 44 self.cachevalue(obj, result)
45 45 return result
46 46
47 47 def cachevalue(self, obj, value):
48 48 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
49 49 obj.__dict__[self.name] = value
50 50
51 51
52 52 class compressormanager(object):
53 53 """Holds registrations of various compression engines.
54 54
55 55 This class essentially abstracts the differences between compression
56 56 engines to allow new compression formats to be added easily, possibly from
57 57 extensions.
58 58
59 59 Compressors are registered against the global instance by calling its
60 60 ``register()`` method.
61 61 """
62 62
63 63 def __init__(self):
64 64 self._engines = {}
65 65 # Bundle spec human name to engine name.
66 66 self._bundlenames = {}
67 67 # Internal bundle identifier to engine name.
68 68 self._bundletypes = {}
69 69 # Revlog header to engine name.
70 70 self._revlogheaders = {}
71 71 # Wire proto identifier to engine name.
72 72 self._wiretypes = {}
73 73
74 74 def __getitem__(self, key):
75 75 return self._engines[key]
76 76
77 77 def __contains__(self, key):
78 78 return key in self._engines
79 79
80 80 def __iter__(self):
81 81 return iter(self._engines.keys())
82 82
83 83 def register(self, engine):
84 84 """Register a compression engine with the manager.
85 85
86 86 The argument must be a ``compressionengine`` instance.
87 87 """
88 88 if not isinstance(engine, compressionengine):
89 89 raise ValueError(_(b'argument must be a compressionengine'))
90 90
91 91 name = engine.name()
92 92
93 93 if name in self._engines:
94 94 raise error.Abort(
95 95 _(b'compression engine %s already registered') % name
96 96 )
97 97
98 98 bundleinfo = engine.bundletype()
99 99 if bundleinfo:
100 100 bundlename, bundletype = bundleinfo
101 101
102 102 if bundlename in self._bundlenames:
103 103 raise error.Abort(
104 104 _(b'bundle name %s already registered') % bundlename
105 105 )
106 106 if bundletype in self._bundletypes:
107 107 raise error.Abort(
108 108 _(b'bundle type %s already registered by %s')
109 109 % (bundletype, self._bundletypes[bundletype])
110 110 )
111 111
112 112 # No external facing name declared.
113 113 if bundlename:
114 114 self._bundlenames[bundlename] = name
115 115
116 116 self._bundletypes[bundletype] = name
117 117
118 118 wiresupport = engine.wireprotosupport()
119 119 if wiresupport:
120 120 wiretype = wiresupport.name
121 121 if wiretype in self._wiretypes:
122 122 raise error.Abort(
123 123 _(
124 124 b'wire protocol compression %s already '
125 125 b'registered by %s'
126 126 )
127 127 % (wiretype, self._wiretypes[wiretype])
128 128 )
129 129
130 130 self._wiretypes[wiretype] = name
131 131
132 132 revlogheader = engine.revlogheader()
133 133 if revlogheader and revlogheader in self._revlogheaders:
134 134 raise error.Abort(
135 135 _(b'revlog header %s already registered by %s')
136 136 % (revlogheader, self._revlogheaders[revlogheader])
137 137 )
138 138
139 139 if revlogheader:
140 140 self._revlogheaders[revlogheader] = name
141 141
142 142 self._engines[name] = engine
143 143
144 144 @property
145 145 def supportedbundlenames(self):
146 146 return set(self._bundlenames.keys())
147 147
148 148 @property
149 149 def supportedbundletypes(self):
150 150 return set(self._bundletypes.keys())
151 151
152 152 def forbundlename(self, bundlename):
153 153 """Obtain a compression engine registered to a bundle name.
154 154
155 155 Will raise KeyError if the bundle type isn't registered.
156 156
157 157 Will abort if the engine is known but not available.
158 158 """
159 159 engine = self._engines[self._bundlenames[bundlename]]
160 160 if not engine.available():
161 161 raise error.Abort(
162 162 _(b'compression engine %s could not be loaded') % engine.name()
163 163 )
164 164 return engine
165 165
166 166 def forbundletype(self, bundletype):
167 167 """Obtain a compression engine registered to a bundle type.
168 168
169 169 Will raise KeyError if the bundle type isn't registered.
170 170
171 171 Will abort if the engine is known but not available.
172 172 """
173 173 engine = self._engines[self._bundletypes[bundletype]]
174 174 if not engine.available():
175 175 raise error.Abort(
176 176 _(b'compression engine %s could not be loaded') % engine.name()
177 177 )
178 178 return engine
179 179
180 180 def supportedwireengines(self, role, onlyavailable=True):
181 181 """Obtain compression engines that support the wire protocol.
182 182
183 183 Returns a list of engines in prioritized order, most desired first.
184 184
185 185 If ``onlyavailable`` is set, filter out engines that can't be
186 186 loaded.
187 187 """
188 188 assert role in (SERVERROLE, CLIENTROLE)
189 189
190 190 attr = b'serverpriority' if role == SERVERROLE else b'clientpriority'
191 191
192 192 engines = [self._engines[e] for e in self._wiretypes.values()]
193 193 if onlyavailable:
194 194 engines = [e for e in engines if e.available()]
195 195
196 196 def getkey(e):
197 197 # Sort first by priority, highest first. In case of tie, sort
198 198 # alphabetically. This is arbitrary, but ensures output is
199 199 # stable.
200 200 w = e.wireprotosupport()
201 201 return -1 * getattr(w, attr), w.name
202 202
203 203 return list(sorted(engines, key=getkey))
204 204
205 205 def forwiretype(self, wiretype):
206 206 engine = self._engines[self._wiretypes[wiretype]]
207 207 if not engine.available():
208 208 raise error.Abort(
209 209 _(b'compression engine %s could not be loaded') % engine.name()
210 210 )
211 211 return engine
212 212
213 213 def forrevlogheader(self, header):
214 214 """Obtain a compression engine registered to a revlog header.
215 215
216 216 Will raise KeyError if the revlog header value isn't registered.
217 217 """
218 218 return self._engines[self._revlogheaders[header]]
219 219
220 220
221 221 compengines = compressormanager()
222 222
223 223
224 224 class compressionengine(object):
225 225 """Base class for compression engines.
226 226
227 227 Compression engines must implement the interface defined by this class.
228 228 """
229 229
230 230 def name(self):
231 231 """Returns the name of the compression engine.
232 232
233 233 This is the key the engine is registered under.
234 234
235 235 This method must be implemented.
236 236 """
237 237 raise NotImplementedError()
238 238
239 239 def available(self):
240 240 """Whether the compression engine is available.
241 241
242 242 The intent of this method is to allow optional compression engines
243 243 that may not be available in all installations (such as engines relying
244 244 on C extensions that may not be present).
245 245 """
246 246 return True
247 247
248 248 def bundletype(self):
249 249 """Describes bundle identifiers for this engine.
250 250
251 251 If this compression engine isn't supported for bundles, returns None.
252 252
253 253 If this engine can be used for bundles, returns a 2-tuple of strings of
254 254 the user-facing "bundle spec" compression name and an internal
255 255 identifier used to denote the compression format within bundles. To
256 256 exclude the name from external usage, set the first element to ``None``.
257 257
258 258 If bundle compression is supported, the class must also implement
259 259 ``compressstream`` and `decompressorreader``.
260 260
261 261 The docstring of this method is used in the help system to tell users
262 262 about this engine.
263 263 """
264 264 return None
265 265
266 266 def wireprotosupport(self):
267 267 """Declare support for this compression format on the wire protocol.
268 268
269 269 If this compression engine isn't supported for compressing wire
270 270 protocol payloads, returns None.
271 271
272 272 Otherwise, returns ``compenginewireprotosupport`` with the following
273 273 fields:
274 274
275 275 * String format identifier
276 276 * Integer priority for the server
277 277 * Integer priority for the client
278 278
279 279 The integer priorities are used to order the advertisement of format
280 280 support by server and client. The highest integer is advertised
281 281 first. Integers with non-positive values aren't advertised.
282 282
283 283 The priority values are somewhat arbitrary and only used for default
284 284 ordering. The relative order can be changed via config options.
285 285
286 286 If wire protocol compression is supported, the class must also implement
287 287 ``compressstream`` and ``decompressorreader``.
288 288 """
289 289 return None
290 290
291 291 def revlogheader(self):
292 292 """Header added to revlog chunks that identifies this engine.
293 293
294 294 If this engine can be used to compress revlogs, this method should
295 295 return the bytes used to identify chunks compressed with this engine.
296 296 Else, the method should return ``None`` to indicate it does not
297 297 participate in revlog compression.
298 298 """
299 299 return None
300 300
301 301 def compressstream(self, it, opts=None):
302 302 """Compress an iterator of chunks.
303 303
304 304 The method receives an iterator (ideally a generator) of chunks of
305 305 bytes to be compressed. It returns an iterator (ideally a generator)
306 306 of bytes of chunks representing the compressed output.
307 307
308 308 Optionally accepts an argument defining how to perform compression.
309 309 Each engine treats this argument differently.
310 310 """
311 311 raise NotImplementedError()
312 312
313 313 def decompressorreader(self, fh):
314 314 """Perform decompression on a file object.
315 315
316 316 Argument is an object with a ``read(size)`` method that returns
317 317 compressed data. Return value is an object with a ``read(size)`` that
318 318 returns uncompressed data.
319 319 """
320 320 raise NotImplementedError()
321 321
322 322 def revlogcompressor(self, opts=None):
323 323 """Obtain an object that can be used to compress revlog entries.
324 324
325 325 The object has a ``compress(data)`` method that compresses binary
326 326 data. This method returns compressed binary data or ``None`` if
327 327 the data could not be compressed (too small, not compressible, etc).
328 328 The returned data should have a header uniquely identifying this
329 329 compression format so decompression can be routed to this engine.
330 330 This header should be identified by the ``revlogheader()`` return
331 331 value.
332 332
333 333 The object has a ``decompress(data)`` method that decompresses
334 334 data. The method will only be called if ``data`` begins with
335 335 ``revlogheader()``. The method should return the raw, uncompressed
336 336 data or raise a ``StorageError``.
337 337
338 338 The object is reusable but is not thread safe.
339 339 """
340 340 raise NotImplementedError()
341 341
342 342
343 343 class _CompressedStreamReader(object):
344 344 def __init__(self, fh):
345 345 if safehasattr(fh, 'unbufferedread'):
346 346 self._reader = fh.unbufferedread
347 347 else:
348 348 self._reader = fh.read
349 349 self._pending = []
350 350 self._pos = 0
351 351 self._eof = False
352 352
353 353 def _decompress(self, chunk):
354 354 raise NotImplementedError()
355 355
356 356 def read(self, l):
357 357 buf = []
358 358 while True:
359 359 while self._pending:
360 360 if len(self._pending[0]) > l + self._pos:
361 361 newbuf = self._pending[0]
362 362 buf.append(newbuf[self._pos : self._pos + l])
363 363 self._pos += l
364 364 return b''.join(buf)
365 365
366 366 newbuf = self._pending.pop(0)
367 367 if self._pos:
368 368 buf.append(newbuf[self._pos :])
369 369 l -= len(newbuf) - self._pos
370 370 else:
371 371 buf.append(newbuf)
372 372 l -= len(newbuf)
373 373 self._pos = 0
374 374
375 375 if self._eof:
376 376 return b''.join(buf)
377 377 chunk = self._reader(65536)
378 378 self._decompress(chunk)
379 379 if not chunk and not self._pending and not self._eof:
380 380 # No progress and no new data, bail out
381 381 return b''.join(buf)
382 382
383 383
384 384 class _GzipCompressedStreamReader(_CompressedStreamReader):
385 385 def __init__(self, fh):
386 386 super(_GzipCompressedStreamReader, self).__init__(fh)
387 387 self._decompobj = zlib.decompressobj()
388 388
389 389 def _decompress(self, chunk):
390 390 newbuf = self._decompobj.decompress(chunk)
391 391 if newbuf:
392 392 self._pending.append(newbuf)
393 393 d = self._decompobj.copy()
394 394 try:
395 395 d.decompress(b'x')
396 396 d.flush()
397 397 if d.unused_data == b'x':
398 398 self._eof = True
399 399 except zlib.error:
400 400 pass
401 401
402 402
403 403 class _BZ2CompressedStreamReader(_CompressedStreamReader):
404 404 def __init__(self, fh):
405 405 super(_BZ2CompressedStreamReader, self).__init__(fh)
406 406 self._decompobj = bz2.BZ2Decompressor()
407 407
408 408 def _decompress(self, chunk):
409 409 newbuf = self._decompobj.decompress(chunk)
410 410 if newbuf:
411 411 self._pending.append(newbuf)
412 412 try:
413 413 while True:
414 414 newbuf = self._decompobj.decompress(b'')
415 415 if newbuf:
416 416 self._pending.append(newbuf)
417 417 else:
418 418 break
419 419 except EOFError:
420 420 self._eof = True
421 421
422 422
423 423 class _TruncatedBZ2CompressedStreamReader(_BZ2CompressedStreamReader):
424 424 def __init__(self, fh):
425 425 super(_TruncatedBZ2CompressedStreamReader, self).__init__(fh)
426 426 newbuf = self._decompobj.decompress(b'BZ')
427 427 if newbuf:
428 428 self._pending.append(newbuf)
429 429
430 430
431 431 class _ZstdCompressedStreamReader(_CompressedStreamReader):
432 432 def __init__(self, fh, zstd):
433 433 super(_ZstdCompressedStreamReader, self).__init__(fh)
434 434 self._zstd = zstd
435 435 self._decompobj = zstd.ZstdDecompressor().decompressobj()
436 436
437 437 def _decompress(self, chunk):
438 438 newbuf = self._decompobj.decompress(chunk)
439 439 if newbuf:
440 440 self._pending.append(newbuf)
441 441 try:
442 442 while True:
443 443 newbuf = self._decompobj.decompress(b'')
444 444 if newbuf:
445 445 self._pending.append(newbuf)
446 446 else:
447 447 break
448 448 except self._zstd.ZstdError:
449 449 self._eof = True
450 450
451 451
452 452 class _zlibengine(compressionengine):
453 453 def name(self):
454 454 return b'zlib'
455 455
456 456 def bundletype(self):
457 457 """zlib compression using the DEFLATE algorithm.
458 458
459 459 All Mercurial clients should support this format. The compression
460 460 algorithm strikes a reasonable balance between compression ratio
461 461 and size.
462 462 """
463 463 return b'gzip', b'GZ'
464 464
465 465 def wireprotosupport(self):
466 466 return compewireprotosupport(b'zlib', 20, 20)
467 467
468 468 def revlogheader(self):
469 469 return b'x'
470 470
471 471 def compressstream(self, it, opts=None):
472 472 opts = opts or {}
473 473
474 474 z = zlib.compressobj(opts.get(b'level', -1))
475 475 for chunk in it:
476 476 data = z.compress(chunk)
477 477 # Not all calls to compress emit data. It is cheaper to inspect
478 478 # here than to feed empty chunks through generator.
479 479 if data:
480 480 yield data
481 481
482 482 yield z.flush()
483 483
484 484 def decompressorreader(self, fh):
485 485 return _GzipCompressedStreamReader(fh)
486 486
487 487 class zlibrevlogcompressor(object):
488 488 def __init__(self, level=None):
489 489 self._level = level
490 490
491 491 def compress(self, data):
492 492 insize = len(data)
493 493 # Caller handles empty input case.
494 494 assert insize > 0
495 495
496 496 if insize < 44:
497 497 return None
498 498
499 499 elif insize <= 1000000:
500 500 if self._level is None:
501 501 compressed = zlib.compress(data)
502 502 else:
503 503 compressed = zlib.compress(data, self._level)
504 504 if len(compressed) < insize:
505 505 return compressed
506 506 return None
507 507
508 508 # zlib makes an internal copy of the input buffer, doubling
509 509 # memory usage for large inputs. So do streaming compression
510 510 # on large inputs.
511 511 else:
512 512 if self._level is None:
513 513 z = zlib.compressobj()
514 514 else:
515 515 z = zlib.compressobj(level=self._level)
516 516 parts = []
517 517 pos = 0
518 518 while pos < insize:
519 519 pos2 = pos + 2 ** 20
520 520 parts.append(z.compress(data[pos:pos2]))
521 521 pos = pos2
522 522 parts.append(z.flush())
523 523
524 524 if sum(map(len, parts)) < insize:
525 525 return b''.join(parts)
526 526 return None
527 527
528 528 def decompress(self, data):
529 529 try:
530 530 return zlib.decompress(data)
531 531 except zlib.error as e:
532 532 raise error.StorageError(
533 533 _(b'revlog decompress error: %s')
534 534 % stringutil.forcebytestr(e)
535 535 )
536 536
537 537 def revlogcompressor(self, opts=None):
538 538 level = None
539 539 if opts is not None:
540 540 level = opts.get(b'zlib.level')
541 541 return self.zlibrevlogcompressor(level)
542 542
543 543
544 544 compengines.register(_zlibengine())
545 545
546 546
547 547 class _bz2engine(compressionengine):
548 548 def name(self):
549 549 return b'bz2'
550 550
551 551 def bundletype(self):
552 552 """An algorithm that produces smaller bundles than ``gzip``.
553 553
554 554 All Mercurial clients should support this format.
555 555
556 556 This engine will likely produce smaller bundles than ``gzip`` but
557 557 will be significantly slower, both during compression and
558 558 decompression.
559 559
560 560 If available, the ``zstd`` engine can yield similar or better
561 561 compression at much higher speeds.
562 562 """
563 563 return b'bzip2', b'BZ'
564 564
565 565 # We declare a protocol name but don't advertise by default because
566 566 # it is slow.
567 567 def wireprotosupport(self):
568 568 return compewireprotosupport(b'bzip2', 0, 0)
569 569
570 570 def compressstream(self, it, opts=None):
571 571 opts = opts or {}
572 572 z = bz2.BZ2Compressor(opts.get(b'level', 9))
573 573 for chunk in it:
574 574 data = z.compress(chunk)
575 575 if data:
576 576 yield data
577 577
578 578 yield z.flush()
579 579
580 580 def decompressorreader(self, fh):
581 581 return _BZ2CompressedStreamReader(fh)
582 582
583 583
584 584 compengines.register(_bz2engine())
585 585
586 586
587 587 class _truncatedbz2engine(compressionengine):
588 588 def name(self):
589 589 return b'bz2truncated'
590 590
591 591 def bundletype(self):
592 592 return None, b'_truncatedBZ'
593 593
594 594 # We don't implement compressstream because it is hackily handled elsewhere.
595 595
596 596 def decompressorreader(self, fh):
597 597 return _TruncatedBZ2CompressedStreamReader(fh)
598 598
599 599
600 600 compengines.register(_truncatedbz2engine())
601 601
602 602
603 603 class _noopengine(compressionengine):
604 604 def name(self):
605 605 return b'none'
606 606
607 607 def bundletype(self):
608 608 """No compression is performed.
609 609
610 610 Use this compression engine to explicitly disable compression.
611 611 """
612 612 return b'none', b'UN'
613 613
614 614 # Clients always support uncompressed payloads. Servers don't because
615 615 # unless you are on a fast network, uncompressed payloads can easily
616 616 # saturate your network pipe.
617 617 def wireprotosupport(self):
618 618 return compewireprotosupport(b'none', 0, 10)
619 619
620 # We don't implement revlogheader because it is handled specially
621 # in the revlog class.
620 # revlog special cases the uncompressed case, but implementing
621 # revlogheader allows forcing uncompressed storage.
622 def revlogheader(self):
623 return b'\0'
622 624
623 625 def compressstream(self, it, opts=None):
624 626 return it
625 627
626 628 def decompressorreader(self, fh):
627 629 return fh
628 630
629 631 class nooprevlogcompressor(object):
630 632 def compress(self, data):
631 633 return None
632 634
633 635 def revlogcompressor(self, opts=None):
634 636 return self.nooprevlogcompressor()
635 637
636 638
637 639 compengines.register(_noopengine())
638 640
639 641
640 642 class _zstdengine(compressionengine):
641 643 def name(self):
642 644 return b'zstd'
643 645
644 646 @propertycache
645 647 def _module(self):
646 648 # Not all installs have the zstd module available. So defer importing
647 649 # until first access.
648 650 try:
649 651 from .. import zstd # pytype: disable=import-error
650 652
651 653 # Force delayed import.
652 654 zstd.__version__
653 655 return zstd
654 656 except ImportError:
655 657 return None
656 658
657 659 def available(self):
658 660 return bool(self._module)
659 661
660 662 def bundletype(self):
661 663 """A modern compression algorithm that is fast and highly flexible.
662 664
663 665 Only supported by Mercurial 4.1 and newer clients.
664 666
665 667 With the default settings, zstd compression is both faster and yields
666 668 better compression than ``gzip``. It also frequently yields better
667 669 compression than ``bzip2`` while operating at much higher speeds.
668 670
669 671 If this engine is available and backwards compatibility is not a
670 672 concern, it is likely the best available engine.
671 673 """
672 674 return b'zstd', b'ZS'
673 675
674 676 def wireprotosupport(self):
675 677 return compewireprotosupport(b'zstd', 50, 50)
676 678
677 679 def revlogheader(self):
678 680 return b'\x28'
679 681
680 682 def compressstream(self, it, opts=None):
681 683 opts = opts or {}
682 684 # zstd level 3 is almost always significantly faster than zlib
683 685 # while providing no worse compression. It strikes a good balance
684 686 # between speed and compression.
685 687 level = opts.get(b'level', 3)
686 688
687 689 zstd = self._module
688 690 z = zstd.ZstdCompressor(level=level).compressobj()
689 691 for chunk in it:
690 692 data = z.compress(chunk)
691 693 if data:
692 694 yield data
693 695
694 696 yield z.flush()
695 697
696 698 def decompressorreader(self, fh):
697 699 return _ZstdCompressedStreamReader(fh, self._module)
698 700
699 701 class zstdrevlogcompressor(object):
700 702 def __init__(self, zstd, level=3):
701 703 # TODO consider omitting frame magic to save 4 bytes.
702 704 # This writes content sizes into the frame header. That is
703 705 # extra storage. But it allows a correct size memory allocation
704 706 # to hold the result.
705 707 self._cctx = zstd.ZstdCompressor(level=level)
706 708 self._dctx = zstd.ZstdDecompressor()
707 709 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
708 710 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
709 711
710 712 def compress(self, data):
711 713 insize = len(data)
712 714 # Caller handles empty input case.
713 715 assert insize > 0
714 716
715 717 if insize < 50:
716 718 return None
717 719
718 720 elif insize <= 1000000:
719 721 compressed = self._cctx.compress(data)
720 722 if len(compressed) < insize:
721 723 return compressed
722 724 return None
723 725 else:
724 726 z = self._cctx.compressobj()
725 727 chunks = []
726 728 pos = 0
727 729 while pos < insize:
728 730 pos2 = pos + self._compinsize
729 731 chunk = z.compress(data[pos:pos2])
730 732 if chunk:
731 733 chunks.append(chunk)
732 734 pos = pos2
733 735 chunks.append(z.flush())
734 736
735 737 if sum(map(len, chunks)) < insize:
736 738 return b''.join(chunks)
737 739 return None
738 740
739 741 def decompress(self, data):
740 742 insize = len(data)
741 743
742 744 try:
743 745 # This was measured to be faster than other streaming
744 746 # decompressors.
745 747 dobj = self._dctx.decompressobj()
746 748 chunks = []
747 749 pos = 0
748 750 while pos < insize:
749 751 pos2 = pos + self._decompinsize
750 752 chunk = dobj.decompress(data[pos:pos2])
751 753 if chunk:
752 754 chunks.append(chunk)
753 755 pos = pos2
754 756 # Frame should be exhausted, so no finish() API.
755 757
756 758 return b''.join(chunks)
757 759 except Exception as e:
758 760 raise error.StorageError(
759 761 _(b'revlog decompress error: %s')
760 762 % stringutil.forcebytestr(e)
761 763 )
762 764
763 765 def revlogcompressor(self, opts=None):
764 766 opts = opts or {}
765 767 level = opts.get(b'zstd.level')
766 768 if level is None:
767 769 level = opts.get(b'level')
768 770 if level is None:
769 771 level = 3
770 772 return self.zstdrevlogcompressor(self._module, level=level)
771 773
772 774
773 775 compengines.register(_zstdengine())
774 776
775 777
776 778 def bundlecompressiontopics():
777 779 """Obtains a list of available bundle compressions for use in help."""
778 780 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
779 781 items = {}
780 782
781 783 # We need to format the docstring. So use a dummy object/type to hold it
782 784 # rather than mutating the original.
783 785 class docobject(object):
784 786 pass
785 787
786 788 for name in compengines:
787 789 engine = compengines[name]
788 790
789 791 if not engine.available():
790 792 continue
791 793
792 794 bt = engine.bundletype()
793 795 if not bt or not bt[0]:
794 796 continue
795 797
796 798 doc = b'``%s``\n %s' % (bt[0], pycompat.getdoc(engine.bundletype))
797 799
798 800 value = docobject()
799 801 value.__doc__ = pycompat.sysstr(doc)
800 802 value._origdoc = engine.bundletype.__doc__
801 803 value._origfunc = engine.bundletype
802 804
803 805 items[bt[0]] = value
804 806
805 807 return items
806 808
807 809
808 810 i18nfunctions = bundlecompressiontopics().values()
@@ -1,204 +1,243 b''
1 1 A new repository uses zlib storage, which doesn't need a requirement
2 2
3 3 $ hg init default
4 4 $ cd default
5 5 $ cat .hg/requires
6 6 dotencode
7 7 fncache
8 8 generaldelta
9 9 revlogv1
10 10 sparserevlog
11 11 store
12 12 testonly-simplestore (reposimplestore !)
13 13
14 14 $ touch foo
15 15 $ hg -q commit -A -m 'initial commit with a lot of repeated repeated repeated text to trigger compression'
16 16 $ hg debugrevlog -c | grep 0x78
17 17 0x78 (x) : 1 (100.00%)
18 18 0x78 (x) : 110 (100.00%)
19 19
20 20 $ cd ..
21 21
22 22 Unknown compression engine to format.compression aborts
23 23
24 24 $ hg --config format.revlog-compression=unknown init unknown
25 25 abort: compression engines "unknown" defined by format.revlog-compression not available
26 26 (run "hg debuginstall" to list available compression engines)
27 27 [255]
28 28
29 29 unknown compression engine in a list with known one works fine
30 30
31 31 $ hg --config format.revlog-compression=zlib,unknown init zlib-before-unknow
32 32 $ hg --config format.revlog-compression=unknown,zlib init unknown-before-zlib
33 33
34 34 A requirement specifying an unknown compression engine results in bail
35 35
36 36 $ hg init unknownrequirement
37 37 $ cd unknownrequirement
38 38 $ echo exp-compression-unknown >> .hg/requires
39 39 $ hg log
40 40 abort: repository requires features unknown to this Mercurial: exp-compression-unknown
41 41 (see https://mercurial-scm.org/wiki/MissingRequirement for more information)
42 42 [255]
43 43
44 44 $ cd ..
45 45
46 Specifying a new format.compression on an existing repo won't introduce data
47 with that engine or a requirement
48
49 $ cd default
50 $ touch bar
51 $ hg --config format.revlog-compression=none -q commit -A -m 'add bar with a lot of repeated repeated repeated text'
52
53 $ cat .hg/requires
54 dotencode
55 fncache
56 generaldelta
57 revlogv1
58 sparserevlog
59 store
60 testonly-simplestore (reposimplestore !)
61
62 $ hg debugrevlog -c | grep 0x78
63 0x78 (x) : 2 (100.00%)
64 0x78 (x) : 199 (100.00%)
65 $ cd ..
66
46 67 #if zstd
47 68
48 69 $ hg --config format.revlog-compression=zstd init zstd
49 70 $ cd zstd
50 71 $ cat .hg/requires
51 72 dotencode
52 73 fncache
53 74 generaldelta
54 75 revlog-compression-zstd
55 76 revlogv1
56 77 sparserevlog
57 78 store
58 79 testonly-simplestore (reposimplestore !)
59 80
60 81 $ touch foo
61 82 $ hg -q commit -A -m 'initial commit with a lot of repeated repeated repeated text'
62 83
63 84 $ hg debugrevlog -c | grep 0x28
64 85 0x28 : 1 (100.00%)
65 86 0x28 : 98 (100.00%)
66 87
67 88 $ cd ..
68 89
69 Specifying a new format.compression on an existing repo won't introduce data
70 with that engine or a requirement
71
72 $ cd default
73 $ touch bar
74 $ hg --config format.revlog-compression=zstd -q commit -A -m 'add bar with a lot of repeated repeated repeated text'
75
76 $ cat .hg/requires
77 dotencode
78 fncache
79 generaldelta
80 revlogv1
81 sparserevlog
82 store
83 testonly-simplestore (reposimplestore !)
84
85 $ hg debugrevlog -c | grep 0x78
86 0x78 (x) : 2 (100.00%)
87 0x78 (x) : 199 (100.00%)
88 90
89 91 #endif
90 92
91 93 checking zlib options
92 94 =====================
93 95
94 96 $ hg init zlib-level-default
95 97 $ hg init zlib-level-1
96 98 $ cat << EOF >> zlib-level-1/.hg/hgrc
97 99 > [storage]
98 100 > revlog.zlib.level=1
99 101 > EOF
100 102 $ hg init zlib-level-9
101 103 $ cat << EOF >> zlib-level-9/.hg/hgrc
102 104 > [storage]
103 105 > revlog.zlib.level=9
104 106 > EOF
105 107
106 108
107 109 $ commitone() {
108 110 > repo=$1
109 111 > cp $RUNTESTDIR/bundles/issue4438-r1.hg $repo/a
110 112 > hg -R $repo add $repo/a
111 113 > hg -R $repo commit -m some-commit
112 114 > }
113 115
114 116 $ for repo in zlib-level-default zlib-level-1 zlib-level-9; do
115 117 > commitone $repo
116 118 > done
117 119
118 120 $ $RUNTESTDIR/f -s */.hg/store/data/*
119 default/.hg/store/data/foo.i: size=64 (pure !)
121 default/.hg/store/data/bar.i: size=64
122 default/.hg/store/data/foo.i: size=64
120 123 zlib-level-1/.hg/store/data/a.i: size=4146
121 124 zlib-level-9/.hg/store/data/a.i: size=4138
122 125 zlib-level-default/.hg/store/data/a.i: size=4138
126 zstd/.hg/store/data/foo.i: size=64 (zstd !)
123 127
124 128 Test error cases
125 129
126 130 $ hg init zlib-level-invalid
127 131 $ cat << EOF >> zlib-level-invalid/.hg/hgrc
128 132 > [storage]
129 133 > revlog.zlib.level=foobar
130 134 > EOF
131 135 $ commitone zlib-level-invalid
132 136 config error: storage.revlog.zlib.level is not a valid integer ('foobar')
133 137 config error: storage.revlog.zlib.level is not a valid integer ('foobar')
134 138 [30]
135 139
136 140 $ hg init zlib-level-out-of-range
137 141 $ cat << EOF >> zlib-level-out-of-range/.hg/hgrc
138 142 > [storage]
139 143 > revlog.zlib.level=42
140 144 > EOF
141 145
142 146 $ commitone zlib-level-out-of-range
143 147 abort: invalid value for `storage.revlog.zlib.level` config: 42
144 148 abort: invalid value for `storage.revlog.zlib.level` config: 42
145 149 [255]
146 150
151 checking details of none compression
152 ====================================
153
154 $ hg init none-compression --config format.revlog-compression=none
155
156 $ commitone() {
157 > repo=$1
158 > cp $RUNTESTDIR/bundles/issue4438-r1.hg $repo/a
159 > hg -R $repo add $repo/a
160 > hg -R $repo commit -m some-commit
161 > }
162
163 $ commitone none-compression
164
165 $ hg log -R none-compression
166 changeset: 0:68b53da39cd8
167 tag: tip
168 user: test
169 date: Thu Jan 01 00:00:00 1970 +0000
170 summary: some-commit
171
172
173 $ cat none-compression/.hg/requires
174 dotencode
175 exp-compression-none
176 fncache
177 generaldelta
178 revlogv1
179 sparserevlog
180 store
181 testonly-simplestore (reposimplestore !)
182
183 $ $RUNTESTDIR/f -s none-compression/.hg/store/data/*
184 none-compression/.hg/store/data/a.i: size=4216
185
147 186 #if zstd
148 187
149 188 checking zstd options
150 189 =====================
151 190
152 191 $ hg init zstd-level-default --config format.revlog-compression=zstd
153 192 $ hg init zstd-level-1 --config format.revlog-compression=zstd
154 193 $ cat << EOF >> zstd-level-1/.hg/hgrc
155 194 > [storage]
156 195 > revlog.zstd.level=1
157 196 > EOF
158 197 $ hg init zstd-level-22 --config format.revlog-compression=zstd
159 198 $ cat << EOF >> zstd-level-22/.hg/hgrc
160 199 > [storage]
161 200 > revlog.zstd.level=22
162 201 > EOF
163 202
164 203
165 204 $ commitone() {
166 205 > repo=$1
167 206 > cp $RUNTESTDIR/bundles/issue4438-r1.hg $repo/a
168 207 > hg -R $repo add $repo/a
169 208 > hg -R $repo commit -m some-commit
170 209 > }
171 210
172 211 $ for repo in zstd-level-default zstd-level-1 zstd-level-22; do
173 212 > commitone $repo
174 213 > done
175 214
176 215 $ $RUNTESTDIR/f -s zstd-*/.hg/store/data/*
177 216 zstd-level-1/.hg/store/data/a.i: size=4114
178 217 zstd-level-22/.hg/store/data/a.i: size=4091
179 218 zstd-level-default/\.hg/store/data/a\.i: size=(4094|4102) (re)
180 219
181 220 Test error cases
182 221
183 222 $ hg init zstd-level-invalid --config format.revlog-compression=zstd
184 223 $ cat << EOF >> zstd-level-invalid/.hg/hgrc
185 224 > [storage]
186 225 > revlog.zstd.level=foobar
187 226 > EOF
188 227 $ commitone zstd-level-invalid
189 228 config error: storage.revlog.zstd.level is not a valid integer ('foobar')
190 229 config error: storage.revlog.zstd.level is not a valid integer ('foobar')
191 230 [30]
192 231
193 232 $ hg init zstd-level-out-of-range --config format.revlog-compression=zstd
194 233 $ cat << EOF >> zstd-level-out-of-range/.hg/hgrc
195 234 > [storage]
196 235 > revlog.zstd.level=42
197 236 > EOF
198 237
199 238 $ commitone zstd-level-out-of-range
200 239 abort: invalid value for `storage.revlog.zstd.level` config: 42
201 240 abort: invalid value for `storage.revlog.zstd.level` config: 42
202 241 [255]
203 242
204 243 #endif
General Comments 0
You need to be logged in to leave comments. Login now