upstream/mercurial-mirror Files · mercurial/pure/charencode.py

tests: make the grep pattern in remotefilelog-gcrepack portable (issue6122)...

tests: make the grep pattern in remotefilelog-gcrepack portable (issue6122) test-remotefilelog-gcrepack was using "\" to escape "|" in the grep pattern. The most of implementations ignore "\" when it is followed by "|", so the regex works. However, OpenBSD doesn't ignore "\" and considers "|" part of the text instead of create two branches. Neither of both behaviors violate POSIX. This change removes the unnecessary escape character and changes grep to egrep, so the extended regular expression works on every unix. This is part of the bug 6122. Tested on OpenBSD, GNU, FreeBSD, NetBSD, Solaris 11 and BusyBox. Credits to Todd C. Miller, Paul de Weerd and Ingo Schwarze for helping me with it.

Yuya Nishihara - - Load All Authors

File last commit:

r34215:aa877860 default


                r42545:f2213131

default

Download file

             charencode.py
        
                    85 lines
            
             | 2.3 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / mercurial / pure / charencode.py
          
                    History
                
                 |
                  Source
                 | Raw
                 |Copy content
                 |Copy permalink

        Yuya Nishihara
    
encoding: drop circular import by proxying through '<policy>.charencode'...

              r33756
            
      # charencode.py - miscellaneous character encoding

      #

      #  Copyright 2005-2009 Matt Mackall <mpm@selenic.com> and others

      #

      # This software may be used and distributed according to the terms of the

      # GNU General Public License version 2 or any later version.

      from __future__ import absolute_import

        Yuya Nishihara
    
encoding: extract stub for fast JSON escape...

              r33925
            
      import array

      from .. import (

          pycompat,

      )

        Yuya Nishihara
    
encoding: add function to test if a str consists of ASCII characters...

              r33927
            
      def isasciistr(s):

          try:

              s.decode('ascii')

              return True

          except UnicodeDecodeError:

              return False

        Yuya Nishihara
    
encoding: drop circular import by proxying through '<policy>.charencode'...

              r33756
            
      def asciilower(s):

          '''convert a string to lowercase if ASCII

          Raises UnicodeDecodeError if non-ASCII characters are found.'''

          s.decode('ascii')

          return s.lower()

      def asciiupper(s):

          '''convert a string to uppercase if ASCII

          Raises UnicodeDecodeError if non-ASCII characters are found.'''

          s.decode('ascii')

          return s.upper()

        Yuya Nishihara
    
encoding: extract stub for fast JSON escape...

              r33925
            
      _jsonmap = []

      _jsonmap.extend("\\u%04x" % x for x in range(32))

      _jsonmap.extend(pycompat.bytechr(x) for x in range(32, 127))

      _jsonmap.append('\\u007f')

      _jsonmap[0x09] = '\\t'

      _jsonmap[0x0a] = '\\n'

      _jsonmap[0x22] = '\\"'

      _jsonmap[0x5c] = '\\\\'

      _jsonmap[0x08] = '\\b'

      _jsonmap[0x0c] = '\\f'

      _jsonmap[0x0d] = '\\r'

      _paranoidjsonmap = _jsonmap[:]

      _paranoidjsonmap[0x3c] = '\\u003c'  # '<' (e.g. escape "</script>")

      _paranoidjsonmap[0x3e] = '\\u003e'  # '>'

      _jsonmap.extend(pycompat.bytechr(x) for x in range(128, 256))

      def jsonescapeu8fast(u8chars, paranoid):

          """Convert a UTF-8 byte string to JSON-escaped form (fast path)

          Raises ValueError if non-ASCII characters have to be escaped.

          """

          if paranoid:

              jm = _paranoidjsonmap

          else:

              jm = _jsonmap

          try:

              return ''.join(jm[x] for x in bytearray(u8chars))

          except IndexError:

              raise ValueError

        Yuya Nishihara
    
py3: use 'surrogatepass' error handler to process U+DCxx transparently...

              r34215
            
      if pycompat.ispy3:

          _utf8strict = r'surrogatepass'

      else:

          _utf8strict = r'strict'

        Yuya Nishihara
    
encoding: extract stub for fast JSON escape...

              r33925
            
      def jsonescapeu8fallback(u8chars, paranoid):

          """Convert a UTF-8 byte string to JSON-escaped form (slow path)

          Escapes all non-ASCII characters no matter if paranoid is False.

          """

          if paranoid:

              jm = _paranoidjsonmap

          else:

              jm = _jsonmap

          # non-BMP char is represented as UTF-16 surrogate pair

        Yuya Nishihara
    
py3: use 'surrogatepass' error handler to process U+DCxx transparently...

              r34215
            
          u16b = u8chars.decode('utf-8', _utf8strict).encode('utf-16', _utf8strict)

          u16codes = array.array(r'H', u16b)

        Yuya Nishihara
    
encoding: extract stub for fast JSON escape...

              r33925
            
          u16codes.pop(0)  # drop BOM

          return ''.join(jm[x] if x < 128 else '\\u%04x' % x for x in u16codes)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

Yuya Nishihara encoding: drop circular import by proxying through '<policy>.charencode'...	r33756	# charencode.py - miscellaneous character encoding
		#
		# Copyright 2005-2009 Matt Mackall <mpm@selenic.com> and others
		#
		# This software may be used and distributed according to the terms of the
		# GNU General Public License version 2 or any later version.

		from __future__ import absolute_import

Yuya Nishihara encoding: extract stub for fast JSON escape...	r33925	import array

		from .. import (
		pycompat,
		)

Yuya Nishihara encoding: add function to test if a str consists of ASCII characters...	r33927	def isasciistr(s):
		try:
		s.decode('ascii')
		return True
		except UnicodeDecodeError:
		return False

Yuya Nishihara encoding: drop circular import by proxying through '<policy>.charencode'...	r33756	def asciilower(s):
		'''convert a string to lowercase if ASCII

		Raises UnicodeDecodeError if non-ASCII characters are found.'''
		s.decode('ascii')
		return s.lower()

		def asciiupper(s):
		'''convert a string to uppercase if ASCII

		Raises UnicodeDecodeError if non-ASCII characters are found.'''
		s.decode('ascii')
		return s.upper()
Yuya Nishihara encoding: extract stub for fast JSON escape...	r33925
		_jsonmap = []
		_jsonmap.extend("\\u%04x" % x for x in range(32))
		_jsonmap.extend(pycompat.bytechr(x) for x in range(32, 127))
		_jsonmap.append('\\u007f')
		_jsonmap[0x09] = '\\t'
		_jsonmap[0x0a] = '\\n'
		_jsonmap[0x22] = '\\"'
		_jsonmap[0x5c] = '\\\\'
		_jsonmap[0x08] = '\\b'
		_jsonmap[0x0c] = '\\f'
		_jsonmap[0x0d] = '\\r'
		_paranoidjsonmap = _jsonmap[:]
		_paranoidjsonmap[0x3c] = '\\u003c' # '<' (e.g. escape "</script>")
		_paranoidjsonmap[0x3e] = '\\u003e' # '>'
		_jsonmap.extend(pycompat.bytechr(x) for x in range(128, 256))

		def jsonescapeu8fast(u8chars, paranoid):
		"""Convert a UTF-8 byte string to JSON-escaped form (fast path)

		Raises ValueError if non-ASCII characters have to be escaped.
		"""
		if paranoid:
		jm = _paranoidjsonmap
		else:
		jm = _jsonmap
		try:
		return ''.join(jm[x] for x in bytearray(u8chars))
		except IndexError:
		raise ValueError

Yuya Nishihara py3: use 'surrogatepass' error handler to process U+DCxx transparently...	r34215	if pycompat.ispy3:
		_utf8strict = r'surrogatepass'
		else:
		_utf8strict = r'strict'

Yuya Nishihara encoding: extract stub for fast JSON escape...	r33925	def jsonescapeu8fallback(u8chars, paranoid):
		"""Convert a UTF-8 byte string to JSON-escaped form (slow path)

		Escapes all non-ASCII characters no matter if paranoid is False.
		"""
		if paranoid:
		jm = _paranoidjsonmap
		else:
		jm = _jsonmap
		# non-BMP char is represented as UTF-16 surrogate pair
Yuya Nishihara py3: use 'surrogatepass' error handler to process U+DCxx transparently...	r34215	u16b = u8chars.decode('utf-8', _utf8strict).encode('utf-16', _utf8strict)
		u16codes = array.array(r'H', u16b)
Yuya Nishihara encoding: extract stub for fast JSON escape...	r33925	u16codes.pop(0) # drop BOM
		return ''.join(jm[x] if x < 128 else '\\u%04x' % x for x in u16codes)