upstream/mercurial-mirror Files · mercurial/pure/base85.py

revset: improve performance of _generatorset.__contains__ (issue 4201)...

revset: improve performance of _generatorset.__contains__ (issue 4201) _generatorset.__contains__ and __contains__ from child classes were calling into __iter__ to look for values. Since all previously-encountered values from the generator were cached and checked in __contains__ before this iteration, __contains__ was effectively performing iteration busy work which could lead to an explosion of redundant work. This patch changes __contains__ to be more intelligent. Instead of looking at all values via __iter__, __contains__ will instead go straight to "new" values from the underlying generator. On a clone of the Firefox repository with around 200,000 changesets, this patch decreases the execution time of the revset '::(200067)::' from ~100s to ~4s on the author's machine. Rebase operations (which use the aforementioned revset), speed up accordingly.

Patrick Mezard - - Load All Authors

File last commit:

r16598:20a9d823 stable


                r20828:3210b793

default

Download file

             base85.py
        
                    75 lines
            
             | 1.9 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / mercurial / pure / base85.py
          
                    History
                
                 |
                  Annotation
                 | Raw
                 |Copy content
                 |Copy permalink

      # base85.py: pure python base85 codec

      #

      # Copyright (C) 2009 Brendan Cully <brendan@kublai.com>

      #

      # This software may be used and distributed according to the terms of the

      # GNU General Public License version 2 or any later version.

      import struct

      _b85chars = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" \

                  "abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~"

      _b85chars2 = [(a + b) for a in _b85chars for b in _b85chars]

      _b85dec = {}

      def _mkb85dec():

          for i, c in enumerate(_b85chars):

              _b85dec[c] = i

      def b85encode(text, pad=False):

          """encode text in base85 format"""

          l = len(text)

          r = l % 4

          if r:

              text += '\0' * (4 - r)

          longs = len(text) >> 2

          words = struct.unpack('>%dL' % (longs), text)

          out = ''.join(_b85chars[(word // 52200625) % 85] +

                        _b85chars2[(word // 7225) % 7225] +

                        _b85chars2[word % 7225]

                        for word in words)

          if pad:

              return out

          # Trim padding

          olen = l % 4

          if olen:

              olen += 1

          olen += l // 4 * 5

          return out[:olen]

      def b85decode(text):

          """decode base85-encoded text"""

          if not _b85dec:

              _mkb85dec()

          l = len(text)

          out = []

          for i in range(0, len(text), 5):

              chunk = text[i:i + 5]

              acc = 0

              for j, c in enumerate(chunk):

                  try:

                      acc = acc * 85 + _b85dec[c]

                  except KeyError:

                      raise ValueError('bad base85 character at position %d'

                                       % (i + j))

              if acc > 4294967295:

                  raise ValueError('Base85 overflow in hunk starting at byte %d' % i)

              out.append(acc)

          # Pad final chunk if necessary

          cl = l % 5

          if cl:

              acc *= 85 ** (5 - cl)

              if cl > 1:

                  acc += 0xffffff >> (cl - 2) * 8

              out[-1] = acc

          out = struct.pack('>%dL' % (len(out)), *out)

          if cl:

              out = out[:-(5 - cl)]

          return out

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

				# base85.py: pure python base85 codec
				#
				# Copyright (C) 2009 Brendan Cully <brendan@kublai.com>
				#
				# This software may be used and distributed according to the terms of the
				# GNU General Public License version 2 or any later version.

				import struct

				_b85chars = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" \
				"abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{\|}~"
				_b85chars2 = [(a + b) for a in _b85chars for b in _b85chars]
				_b85dec = {}

				def _mkb85dec():
				for i, c in enumerate(_b85chars):
				_b85dec[c] = i

				def b85encode(text, pad=False):
				"""encode text in base85 format"""
				l = len(text)
				r = l % 4
				if r:
				text += '\0' * (4 - r)
				longs = len(text) >> 2
				words = struct.unpack('>%dL' % (longs), text)

				out = ''.join(_b85chars[(word // 52200625) % 85] +
				_b85chars2[(word // 7225) % 7225] +
				_b85chars2[word % 7225]
				for word in words)

				if pad:
				return out

				# Trim padding
				olen = l % 4
				if olen:
				olen += 1
				olen += l // 4 * 5
				return out[:olen]

				def b85decode(text):
				"""decode base85-encoded text"""
				if not _b85dec:
				_mkb85dec()

				l = len(text)
				out = []
				for i in range(0, len(text), 5):
				chunk = text[i:i + 5]
				acc = 0
				for j, c in enumerate(chunk):
				try:
				acc = acc * 85 + _b85dec[c]
				except KeyError:
				raise ValueError('bad base85 character at position %d'
				% (i + j))
				if acc > 4294967295:
				raise ValueError('Base85 overflow in hunk starting at byte %d' % i)
				out.append(acc)

				# Pad final chunk if necessary
				cl = l % 5
				if cl:
				acc = 85 * (5 - cl)
				if cl > 1:
				acc += 0xffffff >> (cl - 2) * 8
				out[-1] = acc

				out = struct.pack('>%dL' % (len(out)), *out)
				if cl:
				out = out[:-(5 - cl)]

				return out