##// END OF EJS Templates
encoding: check overflow while calculating size of JSON escape buffer...
Yuya Nishihara -
r34032:e97be042 default
parent child Browse files
Show More
@@ -1,382 +1,392 b''
1 /*
1 /*
2 charencode.c - miscellaneous character encoding
2 charencode.c - miscellaneous character encoding
3
3
4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8 */
8 */
9
9
10 #define PY_SSIZE_T_CLEAN
10 #define PY_SSIZE_T_CLEAN
11 #include <Python.h>
11 #include <Python.h>
12 #include <assert.h>
12 #include <assert.h>
13
13
14 #include "charencode.h"
14 #include "charencode.h"
15 #include "compat.h"
15 #include "compat.h"
16 #include "util.h"
16 #include "util.h"
17
17
18 #ifdef IS_PY3K
18 #ifdef IS_PY3K
19 /* The mapping of Python types is meant to be temporary to get Python
19 /* The mapping of Python types is meant to be temporary to get Python
20 * 3 to compile. We should remove this once Python 3 support is fully
20 * 3 to compile. We should remove this once Python 3 support is fully
21 * supported and proper types are used in the extensions themselves. */
21 * supported and proper types are used in the extensions themselves. */
22 #define PyInt_Type PyLong_Type
22 #define PyInt_Type PyLong_Type
23 #define PyInt_AS_LONG PyLong_AS_LONG
23 #define PyInt_AS_LONG PyLong_AS_LONG
24 #endif
24 #endif
25
25
26 static const char lowertable[128] = {
26 static const char lowertable[128] = {
27 '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
27 '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
28 '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
28 '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
29 '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
29 '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
30 '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
30 '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
31 '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
31 '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
32 '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
32 '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
33 '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
33 '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
34 '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
34 '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
35 '\x40',
35 '\x40',
36 '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67', /* A-G */
36 '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67', /* A-G */
37 '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f', /* H-O */
37 '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f', /* H-O */
38 '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77', /* P-W */
38 '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77', /* P-W */
39 '\x78', '\x79', '\x7a', /* X-Z */
39 '\x78', '\x79', '\x7a', /* X-Z */
40 '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
40 '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
41 '\x60', '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67',
41 '\x60', '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67',
42 '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f',
42 '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f',
43 '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77',
43 '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77',
44 '\x78', '\x79', '\x7a', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f'
44 '\x78', '\x79', '\x7a', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f'
45 };
45 };
46
46
47 static const char uppertable[128] = {
47 static const char uppertable[128] = {
48 '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
48 '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
49 '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
49 '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
50 '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
50 '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
51 '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
51 '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
52 '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
52 '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
53 '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
53 '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
54 '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
54 '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
55 '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
55 '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
56 '\x40', '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47',
56 '\x40', '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47',
57 '\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f',
57 '\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f',
58 '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57',
58 '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57',
59 '\x58', '\x59', '\x5a', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
59 '\x58', '\x59', '\x5a', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
60 '\x60',
60 '\x60',
61 '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47', /* a-g */
61 '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47', /* a-g */
62 '\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f', /* h-o */
62 '\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f', /* h-o */
63 '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57', /* p-w */
63 '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57', /* p-w */
64 '\x58', '\x59', '\x5a', /* x-z */
64 '\x58', '\x59', '\x5a', /* x-z */
65 '\x7b', '\x7c', '\x7d', '\x7e', '\x7f'
65 '\x7b', '\x7c', '\x7d', '\x7e', '\x7f'
66 };
66 };
67
67
68 /* 1: no escape, 2: \<c>, 6: \u<x> */
68 /* 1: no escape, 2: \<c>, 6: \u<x> */
69 static const uint8_t jsonlentable[256] = {
69 static const uint8_t jsonlentable[256] = {
70 6, 6, 6, 6, 6, 6, 6, 6, 2, 2, 2, 6, 2, 2, 6, 6, /* b, t, n, f, r */
70 6, 6, 6, 6, 6, 6, 6, 6, 2, 2, 2, 6, 2, 2, 6, 6, /* b, t, n, f, r */
71 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
71 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
72 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* " */
72 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* " */
73 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
73 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
74 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
74 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
75 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, /* \\ */
75 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, /* \\ */
76 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
76 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
77 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, /* DEL */
77 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, /* DEL */
78 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
78 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
79 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
79 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
80 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
80 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
81 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
81 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
82 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
82 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
83 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
83 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
84 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
84 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
85 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
85 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
86 };
86 };
87
87
88 static const uint8_t jsonparanoidlentable[128] = {
88 static const uint8_t jsonparanoidlentable[128] = {
89 6, 6, 6, 6, 6, 6, 6, 6, 2, 2, 2, 6, 2, 2, 6, 6, /* b, t, n, f, r */
89 6, 6, 6, 6, 6, 6, 6, 6, 2, 2, 2, 6, 2, 2, 6, 6, /* b, t, n, f, r */
90 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
90 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
91 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* " */
91 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* " */
92 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, 1, 6, 1, /* <, > */
92 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, 1, 6, 1, /* <, > */
93 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
93 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
94 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, /* \\ */
94 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, /* \\ */
95 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
95 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
96 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, /* DEL */
96 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, /* DEL */
97 };
97 };
98
98
99 static const char hexchartable[16] = {
99 static const char hexchartable[16] = {
100 '0', '1', '2', '3', '4', '5', '6', '7',
100 '0', '1', '2', '3', '4', '5', '6', '7',
101 '8', '9', 'a', 'b', 'c', 'd', 'e', 'f',
101 '8', '9', 'a', 'b', 'c', 'd', 'e', 'f',
102 };
102 };
103
103
104 /*
104 /*
105 * Turn a hex-encoded string into binary.
105 * Turn a hex-encoded string into binary.
106 */
106 */
107 PyObject *unhexlify(const char *str, Py_ssize_t len)
107 PyObject *unhexlify(const char *str, Py_ssize_t len)
108 {
108 {
109 PyObject *ret;
109 PyObject *ret;
110 char *d;
110 char *d;
111 Py_ssize_t i;
111 Py_ssize_t i;
112
112
113 ret = PyBytes_FromStringAndSize(NULL, len / 2);
113 ret = PyBytes_FromStringAndSize(NULL, len / 2);
114
114
115 if (!ret)
115 if (!ret)
116 return NULL;
116 return NULL;
117
117
118 d = PyBytes_AsString(ret);
118 d = PyBytes_AsString(ret);
119
119
120 for (i = 0; i < len;) {
120 for (i = 0; i < len;) {
121 int hi = hexdigit(str, i++);
121 int hi = hexdigit(str, i++);
122 int lo = hexdigit(str, i++);
122 int lo = hexdigit(str, i++);
123 *d++ = (hi << 4) | lo;
123 *d++ = (hi << 4) | lo;
124 }
124 }
125
125
126 return ret;
126 return ret;
127 }
127 }
128
128
129 PyObject *isasciistr(PyObject *self, PyObject *args)
129 PyObject *isasciistr(PyObject *self, PyObject *args)
130 {
130 {
131 const char *buf;
131 const char *buf;
132 Py_ssize_t i, len;
132 Py_ssize_t i, len;
133 if (!PyArg_ParseTuple(args, "s#:isasciistr", &buf, &len))
133 if (!PyArg_ParseTuple(args, "s#:isasciistr", &buf, &len))
134 return NULL;
134 return NULL;
135 i = 0;
135 i = 0;
136 /* char array in PyStringObject should be at least 4-byte aligned */
136 /* char array in PyStringObject should be at least 4-byte aligned */
137 if (((uintptr_t)buf & 3) == 0) {
137 if (((uintptr_t)buf & 3) == 0) {
138 const uint32_t *p = (const uint32_t *)buf;
138 const uint32_t *p = (const uint32_t *)buf;
139 for (; i < len / 4; i++) {
139 for (; i < len / 4; i++) {
140 if (p[i] & 0x80808080U)
140 if (p[i] & 0x80808080U)
141 Py_RETURN_FALSE;
141 Py_RETURN_FALSE;
142 }
142 }
143 i *= 4;
143 i *= 4;
144 }
144 }
145 for (; i < len; i++) {
145 for (; i < len; i++) {
146 if (buf[i] & 0x80)
146 if (buf[i] & 0x80)
147 Py_RETURN_FALSE;
147 Py_RETURN_FALSE;
148 }
148 }
149 Py_RETURN_TRUE;
149 Py_RETURN_TRUE;
150 }
150 }
151
151
152 static inline PyObject *_asciitransform(PyObject *str_obj,
152 static inline PyObject *_asciitransform(PyObject *str_obj,
153 const char table[128],
153 const char table[128],
154 PyObject *fallback_fn)
154 PyObject *fallback_fn)
155 {
155 {
156 char *str, *newstr;
156 char *str, *newstr;
157 Py_ssize_t i, len;
157 Py_ssize_t i, len;
158 PyObject *newobj = NULL;
158 PyObject *newobj = NULL;
159 PyObject *ret = NULL;
159 PyObject *ret = NULL;
160
160
161 str = PyBytes_AS_STRING(str_obj);
161 str = PyBytes_AS_STRING(str_obj);
162 len = PyBytes_GET_SIZE(str_obj);
162 len = PyBytes_GET_SIZE(str_obj);
163
163
164 newobj = PyBytes_FromStringAndSize(NULL, len);
164 newobj = PyBytes_FromStringAndSize(NULL, len);
165 if (!newobj)
165 if (!newobj)
166 goto quit;
166 goto quit;
167
167
168 newstr = PyBytes_AS_STRING(newobj);
168 newstr = PyBytes_AS_STRING(newobj);
169
169
170 for (i = 0; i < len; i++) {
170 for (i = 0; i < len; i++) {
171 char c = str[i];
171 char c = str[i];
172 if (c & 0x80) {
172 if (c & 0x80) {
173 if (fallback_fn != NULL) {
173 if (fallback_fn != NULL) {
174 ret = PyObject_CallFunctionObjArgs(fallback_fn,
174 ret = PyObject_CallFunctionObjArgs(fallback_fn,
175 str_obj, NULL);
175 str_obj, NULL);
176 } else {
176 } else {
177 PyObject *err = PyUnicodeDecodeError_Create(
177 PyObject *err = PyUnicodeDecodeError_Create(
178 "ascii", str, len, i, (i + 1),
178 "ascii", str, len, i, (i + 1),
179 "unexpected code byte");
179 "unexpected code byte");
180 PyErr_SetObject(PyExc_UnicodeDecodeError, err);
180 PyErr_SetObject(PyExc_UnicodeDecodeError, err);
181 Py_XDECREF(err);
181 Py_XDECREF(err);
182 }
182 }
183 goto quit;
183 goto quit;
184 }
184 }
185 newstr[i] = table[(unsigned char)c];
185 newstr[i] = table[(unsigned char)c];
186 }
186 }
187
187
188 ret = newobj;
188 ret = newobj;
189 Py_INCREF(ret);
189 Py_INCREF(ret);
190 quit:
190 quit:
191 Py_XDECREF(newobj);
191 Py_XDECREF(newobj);
192 return ret;
192 return ret;
193 }
193 }
194
194
195 PyObject *asciilower(PyObject *self, PyObject *args)
195 PyObject *asciilower(PyObject *self, PyObject *args)
196 {
196 {
197 PyObject *str_obj;
197 PyObject *str_obj;
198 if (!PyArg_ParseTuple(args, "O!:asciilower", &PyBytes_Type, &str_obj))
198 if (!PyArg_ParseTuple(args, "O!:asciilower", &PyBytes_Type, &str_obj))
199 return NULL;
199 return NULL;
200 return _asciitransform(str_obj, lowertable, NULL);
200 return _asciitransform(str_obj, lowertable, NULL);
201 }
201 }
202
202
203 PyObject *asciiupper(PyObject *self, PyObject *args)
203 PyObject *asciiupper(PyObject *self, PyObject *args)
204 {
204 {
205 PyObject *str_obj;
205 PyObject *str_obj;
206 if (!PyArg_ParseTuple(args, "O!:asciiupper", &PyBytes_Type, &str_obj))
206 if (!PyArg_ParseTuple(args, "O!:asciiupper", &PyBytes_Type, &str_obj))
207 return NULL;
207 return NULL;
208 return _asciitransform(str_obj, uppertable, NULL);
208 return _asciitransform(str_obj, uppertable, NULL);
209 }
209 }
210
210
211 PyObject *make_file_foldmap(PyObject *self, PyObject *args)
211 PyObject *make_file_foldmap(PyObject *self, PyObject *args)
212 {
212 {
213 PyObject *dmap, *spec_obj, *normcase_fallback;
213 PyObject *dmap, *spec_obj, *normcase_fallback;
214 PyObject *file_foldmap = NULL;
214 PyObject *file_foldmap = NULL;
215 enum normcase_spec spec;
215 enum normcase_spec spec;
216 PyObject *k, *v;
216 PyObject *k, *v;
217 dirstateTupleObject *tuple;
217 dirstateTupleObject *tuple;
218 Py_ssize_t pos = 0;
218 Py_ssize_t pos = 0;
219 const char *table;
219 const char *table;
220
220
221 if (!PyArg_ParseTuple(args, "O!O!O!:make_file_foldmap",
221 if (!PyArg_ParseTuple(args, "O!O!O!:make_file_foldmap",
222 &PyDict_Type, &dmap,
222 &PyDict_Type, &dmap,
223 &PyInt_Type, &spec_obj,
223 &PyInt_Type, &spec_obj,
224 &PyFunction_Type, &normcase_fallback))
224 &PyFunction_Type, &normcase_fallback))
225 goto quit;
225 goto quit;
226
226
227 spec = (int)PyInt_AS_LONG(spec_obj);
227 spec = (int)PyInt_AS_LONG(spec_obj);
228 switch (spec) {
228 switch (spec) {
229 case NORMCASE_LOWER:
229 case NORMCASE_LOWER:
230 table = lowertable;
230 table = lowertable;
231 break;
231 break;
232 case NORMCASE_UPPER:
232 case NORMCASE_UPPER:
233 table = uppertable;
233 table = uppertable;
234 break;
234 break;
235 case NORMCASE_OTHER:
235 case NORMCASE_OTHER:
236 table = NULL;
236 table = NULL;
237 break;
237 break;
238 default:
238 default:
239 PyErr_SetString(PyExc_TypeError, "invalid normcasespec");
239 PyErr_SetString(PyExc_TypeError, "invalid normcasespec");
240 goto quit;
240 goto quit;
241 }
241 }
242
242
243 /* Add some more entries to deal with additions outside this
243 /* Add some more entries to deal with additions outside this
244 function. */
244 function. */
245 file_foldmap = _dict_new_presized((PyDict_Size(dmap) / 10) * 11);
245 file_foldmap = _dict_new_presized((PyDict_Size(dmap) / 10) * 11);
246 if (file_foldmap == NULL)
246 if (file_foldmap == NULL)
247 goto quit;
247 goto quit;
248
248
249 while (PyDict_Next(dmap, &pos, &k, &v)) {
249 while (PyDict_Next(dmap, &pos, &k, &v)) {
250 if (!dirstate_tuple_check(v)) {
250 if (!dirstate_tuple_check(v)) {
251 PyErr_SetString(PyExc_TypeError,
251 PyErr_SetString(PyExc_TypeError,
252 "expected a dirstate tuple");
252 "expected a dirstate tuple");
253 goto quit;
253 goto quit;
254 }
254 }
255
255
256 tuple = (dirstateTupleObject *)v;
256 tuple = (dirstateTupleObject *)v;
257 if (tuple->state != 'r') {
257 if (tuple->state != 'r') {
258 PyObject *normed;
258 PyObject *normed;
259 if (table != NULL) {
259 if (table != NULL) {
260 normed = _asciitransform(k, table,
260 normed = _asciitransform(k, table,
261 normcase_fallback);
261 normcase_fallback);
262 } else {
262 } else {
263 normed = PyObject_CallFunctionObjArgs(
263 normed = PyObject_CallFunctionObjArgs(
264 normcase_fallback, k, NULL);
264 normcase_fallback, k, NULL);
265 }
265 }
266
266
267 if (normed == NULL)
267 if (normed == NULL)
268 goto quit;
268 goto quit;
269 if (PyDict_SetItem(file_foldmap, normed, k) == -1) {
269 if (PyDict_SetItem(file_foldmap, normed, k) == -1) {
270 Py_DECREF(normed);
270 Py_DECREF(normed);
271 goto quit;
271 goto quit;
272 }
272 }
273 Py_DECREF(normed);
273 Py_DECREF(normed);
274 }
274 }
275 }
275 }
276 return file_foldmap;
276 return file_foldmap;
277 quit:
277 quit:
278 Py_XDECREF(file_foldmap);
278 Py_XDECREF(file_foldmap);
279 return NULL;
279 return NULL;
280 }
280 }
281
281
282 /* calculate length of JSON-escaped string; returns -1 if unsupported */
282 /* calculate length of JSON-escaped string; returns -1 if unsupported */
283 static Py_ssize_t jsonescapelen(const char *buf, Py_ssize_t len, bool paranoid)
283 static Py_ssize_t jsonescapelen(const char *buf, Py_ssize_t len, bool paranoid)
284 {
284 {
285 Py_ssize_t i, esclen = 0;
285 Py_ssize_t i, esclen = 0;
286
286
287 if (paranoid) {
287 if (paranoid) {
288 /* don't want to process multi-byte escapes in C */
288 /* don't want to process multi-byte escapes in C */
289 for (i = 0; i < len; i++) {
289 for (i = 0; i < len; i++) {
290 char c = buf[i];
290 char c = buf[i];
291 if (c & 0x80) {
291 if (c & 0x80) {
292 PyErr_SetString(PyExc_ValueError,
292 PyErr_SetString(PyExc_ValueError,
293 "cannot process non-ascii str");
293 "cannot process non-ascii str");
294 return -1;
294 return -1;
295 }
295 }
296 esclen += jsonparanoidlentable[(unsigned char)c];
296 esclen += jsonparanoidlentable[(unsigned char)c];
297 if (esclen < 0) {
298 PyErr_SetString(PyExc_MemoryError,
299 "overflow in jsonescapelen");
300 return -1;
301 }
297 }
302 }
298 } else {
303 } else {
299 for (i = 0; i < len; i++) {
304 for (i = 0; i < len; i++) {
300 char c = buf[i];
305 char c = buf[i];
301 esclen += jsonlentable[(unsigned char)c];
306 esclen += jsonlentable[(unsigned char)c];
307 if (esclen < 0) {
308 PyErr_SetString(PyExc_MemoryError,
309 "overflow in jsonescapelen");
310 return -1;
311 }
302 }
312 }
303 }
313 }
304
314
305 return esclen;
315 return esclen;
306 }
316 }
307
317
308 /* map '\<c>' escape character */
318 /* map '\<c>' escape character */
309 static char jsonescapechar2(char c)
319 static char jsonescapechar2(char c)
310 {
320 {
311 switch (c) {
321 switch (c) {
312 case '\b': return 'b';
322 case '\b': return 'b';
313 case '\t': return 't';
323 case '\t': return 't';
314 case '\n': return 'n';
324 case '\n': return 'n';
315 case '\f': return 'f';
325 case '\f': return 'f';
316 case '\r': return 'r';
326 case '\r': return 'r';
317 case '"': return '"';
327 case '"': return '"';
318 case '\\': return '\\';
328 case '\\': return '\\';
319 }
329 }
320 return '\0'; /* should not happen */
330 return '\0'; /* should not happen */
321 }
331 }
322
332
323 /* convert 'origbuf' to JSON-escaped form 'escbuf'; 'origbuf' should only
333 /* convert 'origbuf' to JSON-escaped form 'escbuf'; 'origbuf' should only
324 include characters mappable by json(paranoid)lentable */
334 include characters mappable by json(paranoid)lentable */
325 static void encodejsonescape(char *escbuf, Py_ssize_t esclen,
335 static void encodejsonescape(char *escbuf, Py_ssize_t esclen,
326 const char *origbuf, Py_ssize_t origlen,
336 const char *origbuf, Py_ssize_t origlen,
327 bool paranoid)
337 bool paranoid)
328 {
338 {
329 const uint8_t *lentable =
339 const uint8_t *lentable =
330 (paranoid) ? jsonparanoidlentable : jsonlentable;
340 (paranoid) ? jsonparanoidlentable : jsonlentable;
331 Py_ssize_t i, j;
341 Py_ssize_t i, j;
332
342
333 for (i = 0, j = 0; i < origlen; i++) {
343 for (i = 0, j = 0; i < origlen; i++) {
334 char c = origbuf[i];
344 char c = origbuf[i];
335 uint8_t l = lentable[(unsigned char)c];
345 uint8_t l = lentable[(unsigned char)c];
336 assert(j + l <= esclen);
346 assert(j + l <= esclen);
337 switch (l) {
347 switch (l) {
338 case 1:
348 case 1:
339 escbuf[j] = c;
349 escbuf[j] = c;
340 break;
350 break;
341 case 2:
351 case 2:
342 escbuf[j] = '\\';
352 escbuf[j] = '\\';
343 escbuf[j + 1] = jsonescapechar2(c);
353 escbuf[j + 1] = jsonescapechar2(c);
344 break;
354 break;
345 case 6:
355 case 6:
346 memcpy(escbuf + j, "\\u00", 4);
356 memcpy(escbuf + j, "\\u00", 4);
347 escbuf[j + 4] = hexchartable[(unsigned char)c >> 4];
357 escbuf[j + 4] = hexchartable[(unsigned char)c >> 4];
348 escbuf[j + 5] = hexchartable[(unsigned char)c & 0xf];
358 escbuf[j + 5] = hexchartable[(unsigned char)c & 0xf];
349 break;
359 break;
350 }
360 }
351 j += l;
361 j += l;
352 }
362 }
353 }
363 }
354
364
355 PyObject *jsonescapeu8fast(PyObject *self, PyObject *args)
365 PyObject *jsonescapeu8fast(PyObject *self, PyObject *args)
356 {
366 {
357 PyObject *origstr, *escstr;
367 PyObject *origstr, *escstr;
358 const char *origbuf;
368 const char *origbuf;
359 Py_ssize_t origlen, esclen;
369 Py_ssize_t origlen, esclen;
360 int paranoid;
370 int paranoid;
361 if (!PyArg_ParseTuple(args, "O!i:jsonescapeu8fast",
371 if (!PyArg_ParseTuple(args, "O!i:jsonescapeu8fast",
362 &PyBytes_Type, &origstr, &paranoid))
372 &PyBytes_Type, &origstr, &paranoid))
363 return NULL;
373 return NULL;
364
374
365 origbuf = PyBytes_AS_STRING(origstr);
375 origbuf = PyBytes_AS_STRING(origstr);
366 origlen = PyBytes_GET_SIZE(origstr);
376 origlen = PyBytes_GET_SIZE(origstr);
367 esclen = jsonescapelen(origbuf, origlen, paranoid);
377 esclen = jsonescapelen(origbuf, origlen, paranoid);
368 if (esclen < 0)
378 if (esclen < 0)
369 return NULL; /* unsupported char found */
379 return NULL; /* unsupported char found or overflow */
370 if (origlen == esclen) {
380 if (origlen == esclen) {
371 Py_INCREF(origstr);
381 Py_INCREF(origstr);
372 return origstr;
382 return origstr;
373 }
383 }
374
384
375 escstr = PyBytes_FromStringAndSize(NULL, esclen);
385 escstr = PyBytes_FromStringAndSize(NULL, esclen);
376 if (!escstr)
386 if (!escstr)
377 return NULL;
387 return NULL;
378 encodejsonescape(PyBytes_AS_STRING(escstr), esclen, origbuf, origlen,
388 encodejsonescape(PyBytes_AS_STRING(escstr), esclen, origbuf, origlen,
379 paranoid);
389 paranoid);
380
390
381 return escstr;
391 return escstr;
382 }
392 }
General Comments 0
You need to be logged in to leave comments. Login now