upstream/mercurial-mirror Files · mercurial/pure/base85.py

diff: do not concatenate immutable bytes while building a/b bodies (issue6445)...

diff: do not concatenate immutable bytes while building a/b bodies (issue6445) Use bytearray instead. I don't know what's changed since Python 2, but bytes concatenation is 100x slow on Python 3. % python2.7 -m timeit -s "s = b''" "for i in range(10000): s += b'line'" 1000 loops, best of 3: 321 usec per loop % python3.9 -m timeit -s "s = b''" "for i in range(10000): s += b'line'" 5 loops, best of 5: 39.2 msec per loop Benchmark using tailwind.css (measuring the fast path, a is empty): % HGRCPATH=/dev/null python2.7 ./hg log -R /tmp/issue6445 -p --time \ --color=always --config diff.word-diff=true >/dev/null (prev) time: real 1.580 secs (user 1.560+0.000 sys 0.020+0.000) (this) time: real 1.610 secs (user 1.570+0.000 sys 0.030+0.000) % HGRCPATH=/dev/null python3.9 ./hg log -R /tmp/issue6445 -p --time \ --color=always --config diff.word-diff=true >/dev/null (prev) time: real 114.500 secs (user 114.460+0.000 sys 0.030+0.000) (this) time: real 2.180 secs (user 2.140+0.000 sys 0.040+0.000) Benchmark using random tabular text data (not the fast path): % dd if=/dev/urandom bs=1k count=1000 | hexdump -v -e '16/1 "%3u," "\n"' > ttf % hg ci -ma % dd if=/dev/urandom bs=1k count=1000 | hexdump -v -e '16/1 "%3u," "\n"' > ttf % hg ci -mb % HGRCPATH=/dev/null python2.7 ./hg log -R /tmp/issue6445 -p --time \ --color=always --config diff.word-diff=true >/dev/null (prev) time: real 3.240 secs (user 3.040+0.000 sys 0.200+0.000 (this) time: real 3.230 secs (user 3.070+0.000 sys 0.160+0.000) % HGRCPATH=/dev/null python3.9 ./hg log -R /tmp/issue6445 -p --time \ --color=always --config diff.word-diff=true >/dev/null (prev) time: real 44.130 secs (user 43.850+0.000 sys 0.270+0.000) (this) time: real 4.170 secs (user 3.850+0.000 sys 0.310+0.000)

marmoute - - Load All Authors

File last commit:

r44082:4cd91104 stable


                r46624:210f9b8d

stable

Download file

             base85.py
        
                    88 lines
            
             | 2.0 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / mercurial / pure / base85.py
          
                    History
                
                 |
                  Annotation
                 | Raw
                 |Copy content
                 |Copy permalink

      # base85.py: pure python base85 codec

      #

      # Copyright (C) 2009 Brendan Cully <brendan@kublai.com>

      #

      # This software may be used and distributed according to the terms of the

      # GNU General Public License version 2 or any later version.

      from __future__ import absolute_import

      import struct

      from .. import pycompat

      _b85chars = pycompat.bytestr(

          b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdef"

          b"ghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~"

      )

      _b85chars2 = [(a + b) for a in _b85chars for b in _b85chars]

      _b85dec = {}

      def _mkb85dec():

          for i, c in enumerate(_b85chars):

              _b85dec[c] = i

      def b85encode(text, pad=False):

          """encode text in base85 format"""

          l = len(text)

          r = l % 4

          if r:

              text += b'\0' * (4 - r)

          longs = len(text) >> 2

          words = struct.unpack(b'>%dL' % longs, text)

          out = b''.join(

              _b85chars[(word // 52200625) % 85]

              + _b85chars2[(word // 7225) % 7225]

              + _b85chars2[word % 7225]

              for word in words

          )

          if pad:

              return out

          # Trim padding

          olen = l % 4

          if olen:

              olen += 1

          olen += l // 4 * 5

          return out[:olen]

      def b85decode(text):

          """decode base85-encoded text"""

          if not _b85dec:

              _mkb85dec()

          l = len(text)

          out = []

          for i in range(0, len(text), 5):

              chunk = text[i : i + 5]

              chunk = pycompat.bytestr(chunk)

              acc = 0

              for j, c in enumerate(chunk):

                  try:

                      acc = acc * 85 + _b85dec[c]

                  except KeyError:

                      raise ValueError(

                          'bad base85 character at position %d' % (i + j)

                      )

              if acc > 4294967295:

                  raise ValueError('Base85 overflow in hunk starting at byte %d' % i)

              out.append(acc)

          # Pad final chunk if necessary

          cl = l % 5

          if cl:

              acc *= 85 ** (5 - cl)

              if cl > 1:

                  acc += 0xFFFFFF >> (cl - 2) * 8

              out[-1] = acc

          out = struct.pack(b'>%dL' % (len(out)), *out)

          if cl:

              out = out[: -(5 - cl)]

          return out

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

				# base85.py: pure python base85 codec
				#
				# Copyright (C) 2009 Brendan Cully <brendan@kublai.com>
				#
				# This software may be used and distributed according to the terms of the
				# GNU General Public License version 2 or any later version.

				from __future__ import absolute_import

				import struct

				from .. import pycompat

				_b85chars = pycompat.bytestr(
				b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdef"
				b"ghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{\|}~"
				)
				_b85chars2 = [(a + b) for a in _b85chars for b in _b85chars]
				_b85dec = {}


				def _mkb85dec():
				for i, c in enumerate(_b85chars):
				_b85dec[c] = i


				def b85encode(text, pad=False):
				"""encode text in base85 format"""
				l = len(text)
				r = l % 4
				if r:
				text += b'\0' * (4 - r)
				longs = len(text) >> 2
				words = struct.unpack(b'>%dL' % longs, text)

				out = b''.join(
				_b85chars[(word // 52200625) % 85]
				+ _b85chars2[(word // 7225) % 7225]
				+ _b85chars2[word % 7225]
				for word in words
				)

				if pad:
				return out

				# Trim padding
				olen = l % 4
				if olen:
				olen += 1
				olen += l // 4 * 5
				return out[:olen]


				def b85decode(text):
				"""decode base85-encoded text"""
				if not _b85dec:
				_mkb85dec()

				l = len(text)
				out = []
				for i in range(0, len(text), 5):
				chunk = text[i : i + 5]
				chunk = pycompat.bytestr(chunk)
				acc = 0
				for j, c in enumerate(chunk):
				try:
				acc = acc * 85 + _b85dec[c]
				except KeyError:
				raise ValueError(
				'bad base85 character at position %d' % (i + j)
				)
				if acc > 4294967295:
				raise ValueError('Base85 overflow in hunk starting at byte %d' % i)
				out.append(acc)

				# Pad final chunk if necessary
				cl = l % 5
				if cl:
				acc = 85 * (5 - cl)
				if cl > 1:
				acc += 0xFFFFFF >> (cl - 2) * 8
				out[-1] = acc

				out = struct.pack(b'>%dL' % (len(out)), *out)
				if cl:
				out = out[: -(5 - cl)]

				return out