upstream/mercurial-mirror Files · mercurial/pure/charencode.py

py3: use pycompat.bytestr to convert _b85chars to bytes...

py3: use pycompat.bytestr to convert _b85chars to bytes The tranformer does append b'' to the value and make that a bytes but bytes in Python 3 returns the ascii value on getting characters using indexing. Characters of this string are queried using indexing multiple times in the file and to support that we use pycompat.bytestr which returns the bytechrs using indexing. Differential Revision: https://phab.mercurial-scm.org/D2072

Yuya Nishihara - - Load All Authors

File last commit:

r34215:aa877860 default


                r35962:01b4d88c

default

Download file

             charencode.py
        
                    85 lines
            
             | 2.3 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / mercurial / pure / charencode.py
          
                    History
                
                 |
                  Source
                 | Raw
                 |Copy content
                 |Copy permalink

        Yuya Nishihara
    
encoding: drop circular import by proxying through '<policy>.charencode'...

              r33756
            
      # charencode.py - miscellaneous character encoding

      #

      #  Copyright 2005-2009 Matt Mackall <mpm@selenic.com> and others

      #

      # This software may be used and distributed according to the terms of the

      # GNU General Public License version 2 or any later version.

      from __future__ import absolute_import

        Yuya Nishihara
    
encoding: extract stub for fast JSON escape...

              r33925
            
      import array

      from .. import (

          pycompat,

      )

        Yuya Nishihara
    
encoding: add function to test if a str consists of ASCII characters...

              r33927
            
      def isasciistr(s):

          try:

              s.decode('ascii')

              return True

          except UnicodeDecodeError:

              return False

        Yuya Nishihara
    
encoding: drop circular import by proxying through '<policy>.charencode'...

              r33756
            
      def asciilower(s):

          '''convert a string to lowercase if ASCII

          Raises UnicodeDecodeError if non-ASCII characters are found.'''

          s.decode('ascii')

          return s.lower()

      def asciiupper(s):

          '''convert a string to uppercase if ASCII

          Raises UnicodeDecodeError if non-ASCII characters are found.'''

          s.decode('ascii')

          return s.upper()

        Yuya Nishihara
    
encoding: extract stub for fast JSON escape...

              r33925
            
      _jsonmap = []

      _jsonmap.extend("\\u%04x" % x for x in range(32))

      _jsonmap.extend(pycompat.bytechr(x) for x in range(32, 127))

      _jsonmap.append('\\u007f')

      _jsonmap[0x09] = '\\t'

      _jsonmap[0x0a] = '\\n'

      _jsonmap[0x22] = '\\"'

      _jsonmap[0x5c] = '\\\\'

      _jsonmap[0x08] = '\\b'

      _jsonmap[0x0c] = '\\f'

      _jsonmap[0x0d] = '\\r'

      _paranoidjsonmap = _jsonmap[:]

      _paranoidjsonmap[0x3c] = '\\u003c'  # '<' (e.g. escape "</script>")

      _paranoidjsonmap[0x3e] = '\\u003e'  # '>'

      _jsonmap.extend(pycompat.bytechr(x) for x in range(128, 256))

      def jsonescapeu8fast(u8chars, paranoid):

          """Convert a UTF-8 byte string to JSON-escaped form (fast path)

          Raises ValueError if non-ASCII characters have to be escaped.

          """

          if paranoid:

              jm = _paranoidjsonmap

          else:

              jm = _jsonmap

          try:

              return ''.join(jm[x] for x in bytearray(u8chars))

          except IndexError:

              raise ValueError

        Yuya Nishihara
    
py3: use 'surrogatepass' error handler to process U+DCxx transparently...

              r34215
            
      if pycompat.ispy3:

          _utf8strict = r'surrogatepass'

      else:

          _utf8strict = r'strict'

        Yuya Nishihara
    
encoding: extract stub for fast JSON escape...

              r33925
            
      def jsonescapeu8fallback(u8chars, paranoid):

          """Convert a UTF-8 byte string to JSON-escaped form (slow path)

          Escapes all non-ASCII characters no matter if paranoid is False.

          """

          if paranoid:

              jm = _paranoidjsonmap

          else:

              jm = _jsonmap

          # non-BMP char is represented as UTF-16 surrogate pair

        Yuya Nishihara
    
py3: use 'surrogatepass' error handler to process U+DCxx transparently...

              r34215
            
          u16b = u8chars.decode('utf-8', _utf8strict).encode('utf-16', _utf8strict)

          u16codes = array.array(r'H', u16b)

        Yuya Nishihara
    
encoding: extract stub for fast JSON escape...

              r33925
            
          u16codes.pop(0)  # drop BOM

          return ''.join(jm[x] if x < 128 else '\\u%04x' % x for x in u16codes)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

Yuya Nishihara encoding: drop circular import by proxying through '<policy>.charencode'...	r33756	# charencode.py - miscellaneous character encoding
		#
		# Copyright 2005-2009 Matt Mackall <mpm@selenic.com> and others
		#
		# This software may be used and distributed according to the terms of the
		# GNU General Public License version 2 or any later version.

		from __future__ import absolute_import

Yuya Nishihara encoding: extract stub for fast JSON escape...	r33925	import array

		from .. import (
		pycompat,
		)

Yuya Nishihara encoding: add function to test if a str consists of ASCII characters...	r33927	def isasciistr(s):
		try:
		s.decode('ascii')
		return True
		except UnicodeDecodeError:
		return False

Yuya Nishihara encoding: drop circular import by proxying through '<policy>.charencode'...	r33756	def asciilower(s):
		'''convert a string to lowercase if ASCII

		Raises UnicodeDecodeError if non-ASCII characters are found.'''
		s.decode('ascii')
		return s.lower()

		def asciiupper(s):
		'''convert a string to uppercase if ASCII

		Raises UnicodeDecodeError if non-ASCII characters are found.'''
		s.decode('ascii')
		return s.upper()
Yuya Nishihara encoding: extract stub for fast JSON escape...	r33925
		_jsonmap = []
		_jsonmap.extend("\\u%04x" % x for x in range(32))
		_jsonmap.extend(pycompat.bytechr(x) for x in range(32, 127))
		_jsonmap.append('\\u007f')
		_jsonmap[0x09] = '\\t'
		_jsonmap[0x0a] = '\\n'
		_jsonmap[0x22] = '\\"'
		_jsonmap[0x5c] = '\\\\'
		_jsonmap[0x08] = '\\b'
		_jsonmap[0x0c] = '\\f'
		_jsonmap[0x0d] = '\\r'
		_paranoidjsonmap = _jsonmap[:]
		_paranoidjsonmap[0x3c] = '\\u003c' # '<' (e.g. escape "</script>")
		_paranoidjsonmap[0x3e] = '\\u003e' # '>'
		_jsonmap.extend(pycompat.bytechr(x) for x in range(128, 256))

		def jsonescapeu8fast(u8chars, paranoid):
		"""Convert a UTF-8 byte string to JSON-escaped form (fast path)

		Raises ValueError if non-ASCII characters have to be escaped.
		"""
		if paranoid:
		jm = _paranoidjsonmap
		else:
		jm = _jsonmap
		try:
		return ''.join(jm[x] for x in bytearray(u8chars))
		except IndexError:
		raise ValueError

Yuya Nishihara py3: use 'surrogatepass' error handler to process U+DCxx transparently...	r34215	if pycompat.ispy3:
		_utf8strict = r'surrogatepass'
		else:
		_utf8strict = r'strict'

Yuya Nishihara encoding: extract stub for fast JSON escape...	r33925	def jsonescapeu8fallback(u8chars, paranoid):
		"""Convert a UTF-8 byte string to JSON-escaped form (slow path)

		Escapes all non-ASCII characters no matter if paranoid is False.
		"""
		if paranoid:
		jm = _paranoidjsonmap
		else:
		jm = _jsonmap
		# non-BMP char is represented as UTF-16 surrogate pair
Yuya Nishihara py3: use 'surrogatepass' error handler to process U+DCxx transparently...	r34215	u16b = u8chars.decode('utf-8', _utf8strict).encode('utf-16', _utf8strict)
		u16codes = array.array(r'H', u16b)
Yuya Nishihara encoding: extract stub for fast JSON escape...	r33925	u16codes.pop(0) # drop BOM
		return ''.join(jm[x] if x < 128 else '\\u%04x' % x for x in u16codes)