Show More
@@ -7,6 +7,7 b'' | |||||
7 |
|
7 | |||
8 | from __future__ import absolute_import |
|
8 | from __future__ import absolute_import | |
9 |
|
9 | |||
|
10 | import array | |||
10 | import locale |
|
11 | import locale | |
11 | import os |
|
12 | import os | |
12 | import unicodedata |
|
13 | import unicodedata | |
@@ -380,8 +381,8 b' class normcasespecs(object):' | |||||
380 |
|
381 | |||
381 | _jsonmap = [] |
|
382 | _jsonmap = [] | |
382 | _jsonmap.extend("\\u%04x" % x for x in xrange(32)) |
|
383 | _jsonmap.extend("\\u%04x" % x for x in xrange(32)) | |
383 |
_jsonmap.extend(chr(x) for x in xrange(32, |
|
384 | _jsonmap.extend(chr(x) for x in xrange(32, 127)) | |
384 |
_jsonmap |
|
385 | _jsonmap.append('\\u007f') | |
385 | _jsonmap[0x09] = '\\t' |
|
386 | _jsonmap[0x09] = '\\t' | |
386 | _jsonmap[0x0a] = '\\n' |
|
387 | _jsonmap[0x0a] = '\\n' | |
387 | _jsonmap[0x22] = '\\"' |
|
388 | _jsonmap[0x22] = '\\"' | |
@@ -389,8 +390,10 b' class normcasespecs(object):' | |||||
389 | _jsonmap[0x08] = '\\b' |
|
390 | _jsonmap[0x08] = '\\b' | |
390 | _jsonmap[0x0c] = '\\f' |
|
391 | _jsonmap[0x0c] = '\\f' | |
391 | _jsonmap[0x0d] = '\\r' |
|
392 | _jsonmap[0x0d] = '\\r' | |
|
393 | _paranoidjsonmap = _jsonmap[:] | |||
|
394 | _jsonmap.extend(chr(x) for x in xrange(128, 256)) | |||
392 |
|
395 | |||
393 | def jsonescape(s): |
|
396 | def jsonescape(s, paranoid=False): | |
394 | '''returns a string suitable for JSON |
|
397 | '''returns a string suitable for JSON | |
395 |
|
398 | |||
396 | JSON is problematic for us because it doesn't support non-Unicode |
|
399 | JSON is problematic for us because it doesn't support non-Unicode | |
@@ -415,9 +418,34 b' def jsonescape(s):' | |||||
415 | 'utf-8: caf\\xc3\\xa9' |
|
418 | 'utf-8: caf\\xc3\\xa9' | |
416 | >>> jsonescape('') |
|
419 | >>> jsonescape('') | |
417 | '' |
|
420 | '' | |
|
421 | ||||
|
422 | If paranoid, non-ascii characters are also escaped. This is suitable for | |||
|
423 | web output. | |||
|
424 | ||||
|
425 | >>> jsonescape('escape boundary: \\x7e \\x7f \\xc2\\x80', paranoid=True) | |||
|
426 | 'escape boundary: ~ \\\\u007f \\\\u0080' | |||
|
427 | >>> jsonescape('a weird byte: \\xdd', paranoid=True) | |||
|
428 | 'a weird byte: \\\\udcdd' | |||
|
429 | >>> jsonescape('utf-8: caf\\xc3\\xa9', paranoid=True) | |||
|
430 | 'utf-8: caf\\\\u00e9' | |||
|
431 | >>> jsonescape('non-BMP: \\xf0\\x9d\\x84\\x9e', paranoid=True) | |||
|
432 | 'non-BMP: \\\\ud834\\\\udd1e' | |||
418 | ''' |
|
433 | ''' | |
419 |
|
434 | |||
420 | return ''.join(_jsonmap[x] for x in bytearray(toutf8b(s))) |
|
435 | if paranoid: | |
|
436 | jm = _paranoidjsonmap | |||
|
437 | else: | |||
|
438 | jm = _jsonmap | |||
|
439 | ||||
|
440 | u8chars = toutf8b(s) | |||
|
441 | try: | |||
|
442 | return ''.join(jm[x] for x in bytearray(u8chars)) # fast path | |||
|
443 | except IndexError: | |||
|
444 | pass | |||
|
445 | # non-BMP char is represented as UTF-16 surrogate pair | |||
|
446 | u16codes = array.array('H', u8chars.decode('utf-8').encode('utf-16')) | |||
|
447 | u16codes.pop(0) # drop BOM | |||
|
448 | return ''.join(jm[x] if x < 128 else '\\u%04x' % x for x in u16codes) | |||
421 |
|
449 | |||
422 | _utf8len = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 4] |
|
450 | _utf8len = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 4] | |
423 |
|
451 |
General Comments 0
You need to be logged in to leave comments.
Login now