##// END OF EJS Templates
encoding: extract stub for fast JSON escape...
Yuya Nishihara -
r33925:b9101467 default
parent child Browse files
Show More
@@ -7,7 +7,6 b''
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import array
11 import io
10 import io
12 import locale
11 import locale
13 import os
12 import os
@@ -19,10 +18,15 b' from . import ('
19 pycompat,
18 pycompat,
20 )
19 )
21
20
21 from .pure import (
22 charencode as charencodepure,
23 )
24
22 charencode = policy.importmod(r'charencode')
25 charencode = policy.importmod(r'charencode')
23
26
24 asciilower = charencode.asciilower
27 asciilower = charencode.asciilower
25 asciiupper = charencode.asciiupper
28 asciiupper = charencode.asciiupper
29 _jsonescapeu8fast = charencodepure.jsonescapeu8fast # TODO: no "pure"
26
30
27 _sysstr = pycompat.sysstr
31 _sysstr = pycompat.sysstr
28
32
@@ -383,22 +387,6 b' class normcasespecs(object):'
383 upper = 1
387 upper = 1
384 other = 0
388 other = 0
385
389
386 _jsonmap = []
387 _jsonmap.extend("\\u%04x" % x for x in range(32))
388 _jsonmap.extend(pycompat.bytechr(x) for x in range(32, 127))
389 _jsonmap.append('\\u007f')
390 _jsonmap[0x09] = '\\t'
391 _jsonmap[0x0a] = '\\n'
392 _jsonmap[0x22] = '\\"'
393 _jsonmap[0x5c] = '\\\\'
394 _jsonmap[0x08] = '\\b'
395 _jsonmap[0x0c] = '\\f'
396 _jsonmap[0x0d] = '\\r'
397 _paranoidjsonmap = _jsonmap[:]
398 _paranoidjsonmap[0x3c] = '\\u003c' # '<' (e.g. escape "</script>")
399 _paranoidjsonmap[0x3e] = '\\u003e' # '>'
400 _jsonmap.extend(pycompat.bytechr(x) for x in range(128, 256))
401
402 def jsonescape(s, paranoid=False):
390 def jsonescape(s, paranoid=False):
403 '''returns a string suitable for JSON
391 '''returns a string suitable for JSON
404
392
@@ -440,20 +428,12 b' def jsonescape(s, paranoid=False):'
440 '\\\\u003cfoo@example.org\\\\u003e'
428 '\\\\u003cfoo@example.org\\\\u003e'
441 '''
429 '''
442
430
443 if paranoid:
444 jm = _paranoidjsonmap
445 else:
446 jm = _jsonmap
447
448 u8chars = toutf8b(s)
431 u8chars = toutf8b(s)
449 try:
432 try:
450 return ''.join(jm[x] for x in bytearray(u8chars)) # fast path
433 return _jsonescapeu8fast(u8chars, paranoid)
451 except IndexError:
434 except ValueError:
452 pass
435 pass
453 # non-BMP char is represented as UTF-16 surrogate pair
436 return charencodepure.jsonescapeu8fallback(u8chars, paranoid)
454 u16codes = array.array('H', u8chars.decode('utf-8').encode('utf-16'))
455 u16codes.pop(0) # drop BOM
456 return ''.join(jm[x] if x < 128 else '\\u%04x' % x for x in u16codes)
457
437
458 _utf8len = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 4]
438 _utf8len = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 4]
459
439
@@ -7,6 +7,12 b''
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import array
11
12 from .. import (
13 pycompat,
14 )
15
10 def asciilower(s):
16 def asciilower(s):
11 '''convert a string to lowercase if ASCII
17 '''convert a string to lowercase if ASCII
12
18
@@ -20,3 +26,47 b' def asciiupper(s):'
20 Raises UnicodeDecodeError if non-ASCII characters are found.'''
26 Raises UnicodeDecodeError if non-ASCII characters are found.'''
21 s.decode('ascii')
27 s.decode('ascii')
22 return s.upper()
28 return s.upper()
29
30 _jsonmap = []
31 _jsonmap.extend("\\u%04x" % x for x in range(32))
32 _jsonmap.extend(pycompat.bytechr(x) for x in range(32, 127))
33 _jsonmap.append('\\u007f')
34 _jsonmap[0x09] = '\\t'
35 _jsonmap[0x0a] = '\\n'
36 _jsonmap[0x22] = '\\"'
37 _jsonmap[0x5c] = '\\\\'
38 _jsonmap[0x08] = '\\b'
39 _jsonmap[0x0c] = '\\f'
40 _jsonmap[0x0d] = '\\r'
41 _paranoidjsonmap = _jsonmap[:]
42 _paranoidjsonmap[0x3c] = '\\u003c' # '<' (e.g. escape "</script>")
43 _paranoidjsonmap[0x3e] = '\\u003e' # '>'
44 _jsonmap.extend(pycompat.bytechr(x) for x in range(128, 256))
45
46 def jsonescapeu8fast(u8chars, paranoid):
47 """Convert a UTF-8 byte string to JSON-escaped form (fast path)
48
49 Raises ValueError if non-ASCII characters have to be escaped.
50 """
51 if paranoid:
52 jm = _paranoidjsonmap
53 else:
54 jm = _jsonmap
55 try:
56 return ''.join(jm[x] for x in bytearray(u8chars))
57 except IndexError:
58 raise ValueError
59
60 def jsonescapeu8fallback(u8chars, paranoid):
61 """Convert a UTF-8 byte string to JSON-escaped form (slow path)
62
63 Escapes all non-ASCII characters no matter if paranoid is False.
64 """
65 if paranoid:
66 jm = _paranoidjsonmap
67 else:
68 jm = _jsonmap
69 # non-BMP char is represented as UTF-16 surrogate pair
70 u16codes = array.array('H', u8chars.decode('utf-8').encode('utf-16'))
71 u16codes.pop(0) # drop BOM
72 return ''.join(jm[x] if x < 128 else '\\u%04x' % x for x in u16codes)
General Comments 0
You need to be logged in to leave comments. Login now