upstream/mercurial-mirror Files · mercurial/py3kcompat.py

encoding: avoid localstr when a string can be encoded losslessly (issue2763)...

encoding: avoid localstr when a string can be encoded losslessly (issue2763) localstr's hash method exists to prevent bogus matching on lossy local encodings. For instance, we don't want 'caf?' to match 'café' in an ASCII locale. But when café can be losslessly encoded in the local charset, we can simply use a normal string and avoid the hashing trick. This avoids using localstr's hash method, which would prevent a match between

Renato Cunha - - Load All Authors

File last commit:

r11878:8bb1481c default


                r13940:b7b26e54

stable

Download file

             py3kcompat.py
        
                    72 lines
            
             | 2.3 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / mercurial / py3kcompat.py
          
                    History
                
                 |
                  Annotation
                 | Raw
                 |Copy content
                 |Copy permalink

      # py3kcompat.py - compatibility definitions for running hg in py3k

      #

      # Copyright 2010 Renato Cunha <renatoc@gmail.com>

      #

      # This software may be used and distributed according to the terms of the

      # GNU General Public License version 2 or any later version.

      import os, builtins

      from numbers import Number

      def bytesformatter(format, args):

          '''Custom implementation of a formatter for bytestrings.

          This function currently relias on the string formatter to do the

          formatting and always returns bytes objects.

          >>> bytesformatter(20, 10)

          0

          >>> bytesformatter('unicode %s, %s!', ('string', 'foo'))

          b'unicode string, foo!'

          >>> bytesformatter(b'test %s', 'me')

          b'test me'

          >>> bytesformatter('test %s', 'me')

          b'test me'

          >>> bytesformatter(b'test %s', b'me')

          b'test me'

          >>> bytesformatter('test %s', b'me')

          b'test me'

          >>> bytesformatter('test %d: %s', (1, b'result'))

          b'test 1: result'

          '''

          # The current implementation just converts from bytes to unicode, do

          # what's needed and then convert the results back to bytes.

          # Another alternative is to use the Python C API implementation.

          if isinstance(format, Number):

              # If the fixer erroneously passes a number remainder operation to

              # bytesformatter, we just return the correct operation

              return format % args

          if isinstance(format, bytes):

              format = format.decode('utf-8', 'surrogateescape')

          if isinstance(args, bytes):

              args = args.decode('utf-8', 'surrogateescape')

          if isinstance(args, tuple):

              newargs = []

              for arg in args:

                  if isinstance(arg, bytes):

                      arg = arg.decode('utf-8', 'surrogateescape')

                  newargs.append(arg)

              args = tuple(newargs)

          ret = format % args

          return ret.encode('utf-8', 'surrogateescape')

      builtins.bytesformatter = bytesformatter

      # Create bytes equivalents for os.environ values

      for key in list(os.environ.keys()):

          # UTF-8 is fine for us

          bkey = key.encode('utf-8', 'surrogateescape')

          bvalue = os.environ[key].encode('utf-8', 'surrogateescape')

          os.environ[bkey] = bvalue

      origord = builtins.ord

      def fakeord(char):

          if isinstance(char, int):

              return char

          return origord(char)

      builtins.ord = fakeord

      if __name__ == '__main__':

          import doctest

          doctest.testmod()

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

				# py3kcompat.py - compatibility definitions for running hg in py3k
				#
				# Copyright 2010 Renato Cunha <renatoc@gmail.com>
				#
				# This software may be used and distributed according to the terms of the
				# GNU General Public License version 2 or any later version.

				import os, builtins

				from numbers import Number

				def bytesformatter(format, args):
				'''Custom implementation of a formatter for bytestrings.

				This function currently relias on the string formatter to do the
				formatting and always returns bytes objects.

				>>> bytesformatter(20, 10)
				0
				>>> bytesformatter('unicode %s, %s!', ('string', 'foo'))
				b'unicode string, foo!'
				>>> bytesformatter(b'test %s', 'me')
				b'test me'
				>>> bytesformatter('test %s', 'me')
				b'test me'
				>>> bytesformatter(b'test %s', b'me')
				b'test me'
				>>> bytesformatter('test %s', b'me')
				b'test me'
				>>> bytesformatter('test %d: %s', (1, b'result'))
				b'test 1: result'
				'''
				# The current implementation just converts from bytes to unicode, do
				# what's needed and then convert the results back to bytes.
				# Another alternative is to use the Python C API implementation.
				if isinstance(format, Number):
				# If the fixer erroneously passes a number remainder operation to
				# bytesformatter, we just return the correct operation
				return format % args
				if isinstance(format, bytes):
				format = format.decode('utf-8', 'surrogateescape')
				if isinstance(args, bytes):
				args = args.decode('utf-8', 'surrogateescape')
				if isinstance(args, tuple):
				newargs = []
				for arg in args:
				if isinstance(arg, bytes):
				arg = arg.decode('utf-8', 'surrogateescape')
				newargs.append(arg)
				args = tuple(newargs)
				ret = format % args
				return ret.encode('utf-8', 'surrogateescape')
				builtins.bytesformatter = bytesformatter

				# Create bytes equivalents for os.environ values
				for key in list(os.environ.keys()):
				# UTF-8 is fine for us
				bkey = key.encode('utf-8', 'surrogateescape')
				bvalue = os.environ[key].encode('utf-8', 'surrogateescape')
				os.environ[bkey] = bvalue

				origord = builtins.ord
				def fakeord(char):
				if isinstance(char, int):
				return char
				return origord(char)
				builtins.ord = fakeord

				if __name__ == '__main__':
				import doctest
				doctest.testmod()