Show More
@@ -7,7 +7,6 b'' | |||||
7 |
|
7 | |||
8 | from __future__ import absolute_import |
|
8 | from __future__ import absolute_import | |
9 |
|
9 | |||
10 | import array |
|
|||
11 | import io |
|
10 | import io | |
12 | import locale |
|
11 | import locale | |
13 | import os |
|
12 | import os | |
@@ -19,10 +18,15 b' from . import (' | |||||
19 | pycompat, |
|
18 | pycompat, | |
20 | ) |
|
19 | ) | |
21 |
|
20 | |||
|
21 | from .pure import ( | |||
|
22 | charencode as charencodepure, | |||
|
23 | ) | |||
|
24 | ||||
22 | charencode = policy.importmod(r'charencode') |
|
25 | charencode = policy.importmod(r'charencode') | |
23 |
|
26 | |||
24 | asciilower = charencode.asciilower |
|
27 | asciilower = charencode.asciilower | |
25 | asciiupper = charencode.asciiupper |
|
28 | asciiupper = charencode.asciiupper | |
|
29 | _jsonescapeu8fast = charencodepure.jsonescapeu8fast # TODO: no "pure" | |||
26 |
|
30 | |||
27 | _sysstr = pycompat.sysstr |
|
31 | _sysstr = pycompat.sysstr | |
28 |
|
32 | |||
@@ -383,22 +387,6 b' class normcasespecs(object):' | |||||
383 | upper = 1 |
|
387 | upper = 1 | |
384 | other = 0 |
|
388 | other = 0 | |
385 |
|
389 | |||
386 | _jsonmap = [] |
|
|||
387 | _jsonmap.extend("\\u%04x" % x for x in range(32)) |
|
|||
388 | _jsonmap.extend(pycompat.bytechr(x) for x in range(32, 127)) |
|
|||
389 | _jsonmap.append('\\u007f') |
|
|||
390 | _jsonmap[0x09] = '\\t' |
|
|||
391 | _jsonmap[0x0a] = '\\n' |
|
|||
392 | _jsonmap[0x22] = '\\"' |
|
|||
393 | _jsonmap[0x5c] = '\\\\' |
|
|||
394 | _jsonmap[0x08] = '\\b' |
|
|||
395 | _jsonmap[0x0c] = '\\f' |
|
|||
396 | _jsonmap[0x0d] = '\\r' |
|
|||
397 | _paranoidjsonmap = _jsonmap[:] |
|
|||
398 | _paranoidjsonmap[0x3c] = '\\u003c' # '<' (e.g. escape "</script>") |
|
|||
399 | _paranoidjsonmap[0x3e] = '\\u003e' # '>' |
|
|||
400 | _jsonmap.extend(pycompat.bytechr(x) for x in range(128, 256)) |
|
|||
401 |
|
||||
402 | def jsonescape(s, paranoid=False): |
|
390 | def jsonescape(s, paranoid=False): | |
403 | '''returns a string suitable for JSON |
|
391 | '''returns a string suitable for JSON | |
404 |
|
392 | |||
@@ -440,20 +428,12 b' def jsonescape(s, paranoid=False):' | |||||
440 | '\\\\u003cfoo@example.org\\\\u003e' |
|
428 | '\\\\u003cfoo@example.org\\\\u003e' | |
441 | ''' |
|
429 | ''' | |
442 |
|
430 | |||
443 | if paranoid: |
|
|||
444 | jm = _paranoidjsonmap |
|
|||
445 | else: |
|
|||
446 | jm = _jsonmap |
|
|||
447 |
|
||||
448 | u8chars = toutf8b(s) |
|
431 | u8chars = toutf8b(s) | |
449 | try: |
|
432 | try: | |
450 | return ''.join(jm[x] for x in bytearray(u8chars)) # fast path |
|
433 | return _jsonescapeu8fast(u8chars, paranoid) | |
451 |
except |
|
434 | except ValueError: | |
452 | pass |
|
435 | pass | |
453 | # non-BMP char is represented as UTF-16 surrogate pair |
|
436 | return charencodepure.jsonescapeu8fallback(u8chars, paranoid) | |
454 | u16codes = array.array('H', u8chars.decode('utf-8').encode('utf-16')) |
|
|||
455 | u16codes.pop(0) # drop BOM |
|
|||
456 | return ''.join(jm[x] if x < 128 else '\\u%04x' % x for x in u16codes) |
|
|||
457 |
|
437 | |||
458 | _utf8len = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 4] |
|
438 | _utf8len = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 4] | |
459 |
|
439 |
@@ -7,6 +7,12 b'' | |||||
7 |
|
7 | |||
8 | from __future__ import absolute_import |
|
8 | from __future__ import absolute_import | |
9 |
|
9 | |||
|
10 | import array | |||
|
11 | ||||
|
12 | from .. import ( | |||
|
13 | pycompat, | |||
|
14 | ) | |||
|
15 | ||||
10 | def asciilower(s): |
|
16 | def asciilower(s): | |
11 | '''convert a string to lowercase if ASCII |
|
17 | '''convert a string to lowercase if ASCII | |
12 |
|
18 | |||
@@ -20,3 +26,47 b' def asciiupper(s):' | |||||
20 | Raises UnicodeDecodeError if non-ASCII characters are found.''' |
|
26 | Raises UnicodeDecodeError if non-ASCII characters are found.''' | |
21 | s.decode('ascii') |
|
27 | s.decode('ascii') | |
22 | return s.upper() |
|
28 | return s.upper() | |
|
29 | ||||
|
30 | _jsonmap = [] | |||
|
31 | _jsonmap.extend("\\u%04x" % x for x in range(32)) | |||
|
32 | _jsonmap.extend(pycompat.bytechr(x) for x in range(32, 127)) | |||
|
33 | _jsonmap.append('\\u007f') | |||
|
34 | _jsonmap[0x09] = '\\t' | |||
|
35 | _jsonmap[0x0a] = '\\n' | |||
|
36 | _jsonmap[0x22] = '\\"' | |||
|
37 | _jsonmap[0x5c] = '\\\\' | |||
|
38 | _jsonmap[0x08] = '\\b' | |||
|
39 | _jsonmap[0x0c] = '\\f' | |||
|
40 | _jsonmap[0x0d] = '\\r' | |||
|
41 | _paranoidjsonmap = _jsonmap[:] | |||
|
42 | _paranoidjsonmap[0x3c] = '\\u003c' # '<' (e.g. escape "</script>") | |||
|
43 | _paranoidjsonmap[0x3e] = '\\u003e' # '>' | |||
|
44 | _jsonmap.extend(pycompat.bytechr(x) for x in range(128, 256)) | |||
|
45 | ||||
|
46 | def jsonescapeu8fast(u8chars, paranoid): | |||
|
47 | """Convert a UTF-8 byte string to JSON-escaped form (fast path) | |||
|
48 | ||||
|
49 | Raises ValueError if non-ASCII characters have to be escaped. | |||
|
50 | """ | |||
|
51 | if paranoid: | |||
|
52 | jm = _paranoidjsonmap | |||
|
53 | else: | |||
|
54 | jm = _jsonmap | |||
|
55 | try: | |||
|
56 | return ''.join(jm[x] for x in bytearray(u8chars)) | |||
|
57 | except IndexError: | |||
|
58 | raise ValueError | |||
|
59 | ||||
|
60 | def jsonescapeu8fallback(u8chars, paranoid): | |||
|
61 | """Convert a UTF-8 byte string to JSON-escaped form (slow path) | |||
|
62 | ||||
|
63 | Escapes all non-ASCII characters no matter if paranoid is False. | |||
|
64 | """ | |||
|
65 | if paranoid: | |||
|
66 | jm = _paranoidjsonmap | |||
|
67 | else: | |||
|
68 | jm = _jsonmap | |||
|
69 | # non-BMP char is represented as UTF-16 surrogate pair | |||
|
70 | u16codes = array.array('H', u8chars.decode('utf-8').encode('utf-16')) | |||
|
71 | u16codes.pop(0) # drop BOM | |||
|
72 | return ''.join(jm[x] if x < 128 else '\\u%04x' % x for x in u16codes) |
General Comments 0
You need to be logged in to leave comments.
Login now