charencode.py
90 lines
| 2.3 KiB
| text/x-python
|
PythonLexer
Yuya Nishihara
|
r33756 | # charencode.py - miscellaneous character encoding | ||
# | ||||
# Copyright 2005-2009 Matt Mackall <mpm@selenic.com> and others | ||||
# | ||||
# This software may be used and distributed according to the terms of the | ||||
# GNU General Public License version 2 or any later version. | ||||
from __future__ import absolute_import | ||||
Yuya Nishihara
|
r33925 | import array | ||
Augie Fackler
|
r43346 | from .. import pycompat | ||
Yuya Nishihara
|
r33925 | |||
Yuya Nishihara
|
r33927 | def isasciistr(s): | ||
try: | ||||
s.decode('ascii') | ||||
return True | ||||
except UnicodeDecodeError: | ||||
return False | ||||
Augie Fackler
|
r43346 | |||
Yuya Nishihara
|
r33756 | def asciilower(s): | ||
'''convert a string to lowercase if ASCII | ||||
Raises UnicodeDecodeError if non-ASCII characters are found.''' | ||||
s.decode('ascii') | ||||
return s.lower() | ||||
Augie Fackler
|
r43346 | |||
Yuya Nishihara
|
r33756 | def asciiupper(s): | ||
'''convert a string to uppercase if ASCII | ||||
Raises UnicodeDecodeError if non-ASCII characters are found.''' | ||||
s.decode('ascii') | ||||
return s.upper() | ||||
Yuya Nishihara
|
r33925 | |||
Augie Fackler
|
r43346 | |||
Yuya Nishihara
|
r33925 | _jsonmap = [] | ||
Augie Fackler
|
r43347 | _jsonmap.extend(b"\\u%04x" % x for x in range(32)) | ||
Yuya Nishihara
|
r33925 | _jsonmap.extend(pycompat.bytechr(x) for x in range(32, 127)) | ||
Augie Fackler
|
r43347 | _jsonmap.append(b'\\u007f') | ||
_jsonmap[0x09] = b'\\t' | ||||
_jsonmap[0x0A] = b'\\n' | ||||
_jsonmap[0x22] = b'\\"' | ||||
_jsonmap[0x5C] = b'\\\\' | ||||
_jsonmap[0x08] = b'\\b' | ||||
_jsonmap[0x0C] = b'\\f' | ||||
_jsonmap[0x0D] = b'\\r' | ||||
Yuya Nishihara
|
r33925 | _paranoidjsonmap = _jsonmap[:] | ||
Augie Fackler
|
r43347 | _paranoidjsonmap[0x3C] = b'\\u003c' # '<' (e.g. escape "</script>") | ||
_paranoidjsonmap[0x3E] = b'\\u003e' # '>' | ||||
Yuya Nishihara
|
r33925 | _jsonmap.extend(pycompat.bytechr(x) for x in range(128, 256)) | ||
Augie Fackler
|
r43346 | |||
Yuya Nishihara
|
r33925 | def jsonescapeu8fast(u8chars, paranoid): | ||
"""Convert a UTF-8 byte string to JSON-escaped form (fast path) | ||||
Raises ValueError if non-ASCII characters have to be escaped. | ||||
""" | ||||
if paranoid: | ||||
jm = _paranoidjsonmap | ||||
else: | ||||
jm = _jsonmap | ||||
try: | ||||
Augie Fackler
|
r43347 | return b''.join(jm[x] for x in bytearray(u8chars)) | ||
Yuya Nishihara
|
r33925 | except IndexError: | ||
raise ValueError | ||||
Augie Fackler
|
r43346 | |||
Yuya Nishihara
|
r34215 | if pycompat.ispy3: | ||
_utf8strict = r'surrogatepass' | ||||
else: | ||||
_utf8strict = r'strict' | ||||
Augie Fackler
|
r43346 | |||
Yuya Nishihara
|
r33925 | def jsonescapeu8fallback(u8chars, paranoid): | ||
"""Convert a UTF-8 byte string to JSON-escaped form (slow path) | ||||
Escapes all non-ASCII characters no matter if paranoid is False. | ||||
""" | ||||
if paranoid: | ||||
jm = _paranoidjsonmap | ||||
else: | ||||
jm = _jsonmap | ||||
# non-BMP char is represented as UTF-16 surrogate pair | ||||
Yuya Nishihara
|
r34215 | u16b = u8chars.decode('utf-8', _utf8strict).encode('utf-16', _utf8strict) | ||
Augie Fackler
|
r43812 | u16codes = array.array('H', u16b) | ||
Yuya Nishihara
|
r33925 | u16codes.pop(0) # drop BOM | ||
Augie Fackler
|
r43347 | return b''.join(jm[x] if x < 128 else b'\\u%04x' % x for x in u16codes) | ||