##// END OF EJS Templates
Added tag 5.4rc0 for changeset 26ce8e751503
Added tag 5.4rc0 for changeset 26ce8e751503

File last commit:

r43812:2fe6121c default
r45224:644091f4 stable
Show More
charencode.py
90 lines | 2.3 KiB | text/x-python | PythonLexer
Yuya Nishihara
encoding: drop circular import by proxying through '<policy>.charencode'...
r33756 # charencode.py - miscellaneous character encoding
#
# Copyright 2005-2009 Matt Mackall <mpm@selenic.com> and others
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
from __future__ import absolute_import
Yuya Nishihara
encoding: extract stub for fast JSON escape...
r33925 import array
Augie Fackler
formatting: blacken the codebase...
r43346 from .. import pycompat
Yuya Nishihara
encoding: extract stub for fast JSON escape...
r33925
Yuya Nishihara
encoding: add function to test if a str consists of ASCII characters...
r33927 def isasciistr(s):
try:
s.decode('ascii')
return True
except UnicodeDecodeError:
return False
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
encoding: drop circular import by proxying through '<policy>.charencode'...
r33756 def asciilower(s):
'''convert a string to lowercase if ASCII
Raises UnicodeDecodeError if non-ASCII characters are found.'''
s.decode('ascii')
return s.lower()
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
encoding: drop circular import by proxying through '<policy>.charencode'...
r33756 def asciiupper(s):
'''convert a string to uppercase if ASCII
Raises UnicodeDecodeError if non-ASCII characters are found.'''
s.decode('ascii')
return s.upper()
Yuya Nishihara
encoding: extract stub for fast JSON escape...
r33925
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
encoding: extract stub for fast JSON escape...
r33925 _jsonmap = []
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 _jsonmap.extend(b"\\u%04x" % x for x in range(32))
Yuya Nishihara
encoding: extract stub for fast JSON escape...
r33925 _jsonmap.extend(pycompat.bytechr(x) for x in range(32, 127))
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 _jsonmap.append(b'\\u007f')
_jsonmap[0x09] = b'\\t'
_jsonmap[0x0A] = b'\\n'
_jsonmap[0x22] = b'\\"'
_jsonmap[0x5C] = b'\\\\'
_jsonmap[0x08] = b'\\b'
_jsonmap[0x0C] = b'\\f'
_jsonmap[0x0D] = b'\\r'
Yuya Nishihara
encoding: extract stub for fast JSON escape...
r33925 _paranoidjsonmap = _jsonmap[:]
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 _paranoidjsonmap[0x3C] = b'\\u003c' # '<' (e.g. escape "</script>")
_paranoidjsonmap[0x3E] = b'\\u003e' # '>'
Yuya Nishihara
encoding: extract stub for fast JSON escape...
r33925 _jsonmap.extend(pycompat.bytechr(x) for x in range(128, 256))
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
encoding: extract stub for fast JSON escape...
r33925 def jsonescapeu8fast(u8chars, paranoid):
"""Convert a UTF-8 byte string to JSON-escaped form (fast path)
Raises ValueError if non-ASCII characters have to be escaped.
"""
if paranoid:
jm = _paranoidjsonmap
else:
jm = _jsonmap
try:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 return b''.join(jm[x] for x in bytearray(u8chars))
Yuya Nishihara
encoding: extract stub for fast JSON escape...
r33925 except IndexError:
raise ValueError
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
py3: use 'surrogatepass' error handler to process U+DCxx transparently...
r34215 if pycompat.ispy3:
_utf8strict = r'surrogatepass'
else:
_utf8strict = r'strict'
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
encoding: extract stub for fast JSON escape...
r33925 def jsonescapeu8fallback(u8chars, paranoid):
"""Convert a UTF-8 byte string to JSON-escaped form (slow path)
Escapes all non-ASCII characters no matter if paranoid is False.
"""
if paranoid:
jm = _paranoidjsonmap
else:
jm = _jsonmap
# non-BMP char is represented as UTF-16 surrogate pair
Yuya Nishihara
py3: use 'surrogatepass' error handler to process U+DCxx transparently...
r34215 u16b = u8chars.decode('utf-8', _utf8strict).encode('utf-16', _utf8strict)
Augie Fackler
cleanup: remove pointless r-prefixes on single-quoted strings...
r43906 u16codes = array.array('H', u16b)
Yuya Nishihara
encoding: extract stub for fast JSON escape...
r33925 u16codes.pop(0) # drop BOM
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 return b''.join(jm[x] if x < 128 else b'\\u%04x' % x for x in u16codes)