charencode.py
87 lines
| 2.4 KiB
| text/x-python
|
PythonLexer
Yuya Nishihara
|
r33756 | # charencode.py - miscellaneous character encoding | ||
# | ||||
Raphaël Gomès
|
r47575 | # Copyright 2005-2009 Olivia Mackall <olivia@selenic.com> and others | ||
Yuya Nishihara
|
r33756 | # | ||
# This software may be used and distributed according to the terms of the | ||||
# GNU General Public License version 2 or any later version. | ||||
Matt Harbison
|
r52756 | from __future__ import annotations | ||
Yuya Nishihara
|
r33756 | |||
Yuya Nishihara
|
r33925 | import array | ||
Augie Fackler
|
r43346 | from .. import pycompat | ||
Yuya Nishihara
|
r33925 | |||
Matt Harbison
|
r52615 | def isasciistr(s: bytes) -> bool: | ||
Yuya Nishihara
|
r33927 | try: | ||
s.decode('ascii') | ||||
return True | ||||
except UnicodeDecodeError: | ||||
return False | ||||
Augie Fackler
|
r43346 | |||
Matt Harbison
|
r52615 | def asciilower(s: bytes) -> bytes: | ||
Augie Fackler
|
r46554 | """convert a string to lowercase if ASCII | ||
Yuya Nishihara
|
r33756 | |||
Augie Fackler
|
r46554 | Raises UnicodeDecodeError if non-ASCII characters are found.""" | ||
Yuya Nishihara
|
r33756 | s.decode('ascii') | ||
return s.lower() | ||||
Augie Fackler
|
r43346 | |||
Matt Harbison
|
r52615 | def asciiupper(s: bytes) -> bytes: | ||
Augie Fackler
|
r46554 | """convert a string to uppercase if ASCII | ||
Yuya Nishihara
|
r33756 | |||
Augie Fackler
|
r46554 | Raises UnicodeDecodeError if non-ASCII characters are found.""" | ||
Yuya Nishihara
|
r33756 | s.decode('ascii') | ||
return s.upper() | ||||
Yuya Nishihara
|
r33925 | |||
Augie Fackler
|
r43346 | |||
Yuya Nishihara
|
r33925 | _jsonmap = [] | ||
Augie Fackler
|
r43347 | _jsonmap.extend(b"\\u%04x" % x for x in range(32)) | ||
Yuya Nishihara
|
r33925 | _jsonmap.extend(pycompat.bytechr(x) for x in range(32, 127)) | ||
Augie Fackler
|
r43347 | _jsonmap.append(b'\\u007f') | ||
_jsonmap[0x09] = b'\\t' | ||||
_jsonmap[0x0A] = b'\\n' | ||||
_jsonmap[0x22] = b'\\"' | ||||
_jsonmap[0x5C] = b'\\\\' | ||||
_jsonmap[0x08] = b'\\b' | ||||
_jsonmap[0x0C] = b'\\f' | ||||
_jsonmap[0x0D] = b'\\r' | ||||
Yuya Nishihara
|
r33925 | _paranoidjsonmap = _jsonmap[:] | ||
Augie Fackler
|
r43347 | _paranoidjsonmap[0x3C] = b'\\u003c' # '<' (e.g. escape "</script>") | ||
_paranoidjsonmap[0x3E] = b'\\u003e' # '>' | ||||
Yuya Nishihara
|
r33925 | _jsonmap.extend(pycompat.bytechr(x) for x in range(128, 256)) | ||
Augie Fackler
|
r43346 | |||
Matt Harbison
|
r52615 | def jsonescapeu8fast(u8chars: bytes, paranoid: bool) -> bytes: | ||
Yuya Nishihara
|
r33925 | """Convert a UTF-8 byte string to JSON-escaped form (fast path) | ||
Raises ValueError if non-ASCII characters have to be escaped. | ||||
""" | ||||
if paranoid: | ||||
jm = _paranoidjsonmap | ||||
else: | ||||
jm = _jsonmap | ||||
try: | ||||
Augie Fackler
|
r43347 | return b''.join(jm[x] for x in bytearray(u8chars)) | ||
Yuya Nishihara
|
r33925 | except IndexError: | ||
raise ValueError | ||||
Augie Fackler
|
r43346 | |||
Gregory Szorc
|
r49762 | _utf8strict = r'surrogatepass' | ||
Yuya Nishihara
|
r34215 | |||
Augie Fackler
|
r43346 | |||
Matt Harbison
|
r52615 | def jsonescapeu8fallback(u8chars: bytes, paranoid: bool) -> bytes: | ||
Yuya Nishihara
|
r33925 | """Convert a UTF-8 byte string to JSON-escaped form (slow path) | ||
Escapes all non-ASCII characters no matter if paranoid is False. | ||||
""" | ||||
if paranoid: | ||||
jm = _paranoidjsonmap | ||||
else: | ||||
jm = _jsonmap | ||||
# non-BMP char is represented as UTF-16 surrogate pair | ||||
Yuya Nishihara
|
r34215 | u16b = u8chars.decode('utf-8', _utf8strict).encode('utf-16', _utf8strict) | ||
Augie Fackler
|
r43906 | u16codes = array.array('H', u16b) | ||
Yuya Nishihara
|
r33925 | u16codes.pop(0) # drop BOM | ||
Augie Fackler
|
r43347 | return b''.join(jm[x] if x < 128 else b'\\u%04x' % x for x in u16codes) | ||