##// END OF EJS Templates
charencode: adjust clang-format enable/disable comments...
Augie Fackler -
r36075:81199632 default
parent child Browse files
Show More
@@ -1,401 +1,401 b''
1 /*
1 /*
2 charencode.c - miscellaneous character encoding
2 charencode.c - miscellaneous character encoding
3
3
4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8 */
8 */
9
9
10 #define PY_SSIZE_T_CLEAN
10 #define PY_SSIZE_T_CLEAN
11 #include <Python.h>
11 #include <Python.h>
12 #include <assert.h>
12 #include <assert.h>
13
13
14 #include "charencode.h"
14 #include "charencode.h"
15 #include "compat.h"
15 #include "compat.h"
16 #include "util.h"
16 #include "util.h"
17
17
18 #ifdef IS_PY3K
18 #ifdef IS_PY3K
19 /* The mapping of Python types is meant to be temporary to get Python
19 /* The mapping of Python types is meant to be temporary to get Python
20 * 3 to compile. We should remove this once Python 3 support is fully
20 * 3 to compile. We should remove this once Python 3 support is fully
21 * supported and proper types are used in the extensions themselves. */
21 * supported and proper types are used in the extensions themselves. */
22 #define PyInt_Type PyLong_Type
22 #define PyInt_Type PyLong_Type
23 #define PyInt_AS_LONG PyLong_AS_LONG
23 #define PyInt_AS_LONG PyLong_AS_LONG
24 #endif
24 #endif
25
25
26 /* clang-format off */
26 /* clang-format off */
27 static const char lowertable[128] = {
27 static const char lowertable[128] = {
28 '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
28 '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
29 '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
29 '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
30 '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
30 '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
31 '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
31 '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
32 '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
32 '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
33 '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
33 '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
34 '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
34 '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
35 '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
35 '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
36 '\x40',
36 '\x40',
37 '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67', /* A-G */
37 '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67', /* A-G */
38 '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f', /* H-O */
38 '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f', /* H-O */
39 '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77', /* P-W */
39 '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77', /* P-W */
40 '\x78', '\x79', '\x7a', /* X-Z */
40 '\x78', '\x79', '\x7a', /* X-Z */
41 '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
41 '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
42 '\x60', '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67',
42 '\x60', '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67',
43 '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f',
43 '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f',
44 '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77',
44 '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77',
45 '\x78', '\x79', '\x7a', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f'
45 '\x78', '\x79', '\x7a', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f'
46 };
46 };
47
47
48 static const char uppertable[128] = {
48 static const char uppertable[128] = {
49 '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
49 '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
50 '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
50 '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
51 '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
51 '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
52 '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
52 '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
53 '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
53 '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
54 '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
54 '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
55 '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
55 '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
56 '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
56 '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
57 '\x40', '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47',
57 '\x40', '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47',
58 '\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f',
58 '\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f',
59 '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57',
59 '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57',
60 '\x58', '\x59', '\x5a', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
60 '\x58', '\x59', '\x5a', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
61 '\x60',
61 '\x60',
62 '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47', /* a-g */
62 '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47', /* a-g */
63 '\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f', /* h-o */
63 '\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f', /* h-o */
64 '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57', /* p-w */
64 '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57', /* p-w */
65 '\x58', '\x59', '\x5a', /* x-z */
65 '\x58', '\x59', '\x5a', /* x-z */
66 '\x7b', '\x7c', '\x7d', '\x7e', '\x7f'
66 '\x7b', '\x7c', '\x7d', '\x7e', '\x7f'
67 };
67 };
68 /* clang-format on */
69
68
70 /* 1: no escape, 2: \<c>, 6: \u<x> */
69 /* 1: no escape, 2: \<c>, 6: \u<x> */
71 static const uint8_t jsonlentable[256] = {
70 static const uint8_t jsonlentable[256] = {
72 6, 6, 6, 6, 6, 6, 6, 6, 2, 2, 2, 6, 2, 2, 6, 6, /* b, t, n, f, r */
71 6, 6, 6, 6, 6, 6, 6, 6, 2, 2, 2, 6, 2, 2, 6, 6, /* b, t, n, f, r */
73 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
72 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
74 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* " */
73 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* " */
75 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
74 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
76 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
75 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
77 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, /* \\ */
76 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, /* \\ */
78 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
77 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
79 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, /* DEL */
78 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, /* DEL */
80 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
79 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
81 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
80 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
82 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
81 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
83 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
82 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
84 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
83 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
85 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
84 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
86 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
85 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
87 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
86 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
88 };
87 };
89
88
90 static const uint8_t jsonparanoidlentable[128] = {
89 static const uint8_t jsonparanoidlentable[128] = {
91 6, 6, 6, 6, 6, 6, 6, 6, 2, 2, 2, 6, 2, 2, 6, 6, /* b, t, n, f, r */
90 6, 6, 6, 6, 6, 6, 6, 6, 2, 2, 2, 6, 2, 2, 6, 6, /* b, t, n, f, r */
92 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
91 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
93 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* " */
92 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* " */
94 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, 1, 6, 1, /* <, > */
93 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, 1, 6, 1, /* <, > */
95 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
94 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
96 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, /* \\ */
95 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, /* \\ */
97 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
96 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
98 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, /* DEL */
97 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, /* DEL */
99 };
98 };
100
99
101 static const char hexchartable[16] = {
100 static const char hexchartable[16] = {
102 '0', '1', '2', '3', '4', '5', '6', '7',
101 '0', '1', '2', '3', '4', '5', '6', '7',
103 '8', '9', 'a', 'b', 'c', 'd', 'e', 'f',
102 '8', '9', 'a', 'b', 'c', 'd', 'e', 'f',
104 };
103 };
104 /* clang-format on */
105
105
106 /*
106 /*
107 * Turn a hex-encoded string into binary.
107 * Turn a hex-encoded string into binary.
108 */
108 */
109 PyObject *unhexlify(const char *str, Py_ssize_t len)
109 PyObject *unhexlify(const char *str, Py_ssize_t len)
110 {
110 {
111 PyObject *ret;
111 PyObject *ret;
112 char *d;
112 char *d;
113 Py_ssize_t i;
113 Py_ssize_t i;
114
114
115 ret = PyBytes_FromStringAndSize(NULL, len / 2);
115 ret = PyBytes_FromStringAndSize(NULL, len / 2);
116
116
117 if (!ret)
117 if (!ret)
118 return NULL;
118 return NULL;
119
119
120 d = PyBytes_AsString(ret);
120 d = PyBytes_AsString(ret);
121
121
122 for (i = 0; i < len;) {
122 for (i = 0; i < len;) {
123 int hi = hexdigit(str, i++);
123 int hi = hexdigit(str, i++);
124 int lo = hexdigit(str, i++);
124 int lo = hexdigit(str, i++);
125 *d++ = (hi << 4) | lo;
125 *d++ = (hi << 4) | lo;
126 }
126 }
127
127
128 return ret;
128 return ret;
129 }
129 }
130
130
131 PyObject *isasciistr(PyObject *self, PyObject *args)
131 PyObject *isasciistr(PyObject *self, PyObject *args)
132 {
132 {
133 const char *buf;
133 const char *buf;
134 Py_ssize_t i, len;
134 Py_ssize_t i, len;
135 if (!PyArg_ParseTuple(args, "s#:isasciistr", &buf, &len))
135 if (!PyArg_ParseTuple(args, "s#:isasciistr", &buf, &len))
136 return NULL;
136 return NULL;
137 i = 0;
137 i = 0;
138 /* char array in PyStringObject should be at least 4-byte aligned */
138 /* char array in PyStringObject should be at least 4-byte aligned */
139 if (((uintptr_t)buf & 3) == 0) {
139 if (((uintptr_t)buf & 3) == 0) {
140 const uint32_t *p = (const uint32_t *)buf;
140 const uint32_t *p = (const uint32_t *)buf;
141 for (; i < len / 4; i++) {
141 for (; i < len / 4; i++) {
142 if (p[i] & 0x80808080U)
142 if (p[i] & 0x80808080U)
143 Py_RETURN_FALSE;
143 Py_RETURN_FALSE;
144 }
144 }
145 i *= 4;
145 i *= 4;
146 }
146 }
147 for (; i < len; i++) {
147 for (; i < len; i++) {
148 if (buf[i] & 0x80)
148 if (buf[i] & 0x80)
149 Py_RETURN_FALSE;
149 Py_RETURN_FALSE;
150 }
150 }
151 Py_RETURN_TRUE;
151 Py_RETURN_TRUE;
152 }
152 }
153
153
154 static inline PyObject *_asciitransform(PyObject *str_obj,
154 static inline PyObject *_asciitransform(PyObject *str_obj,
155 const char table[128],
155 const char table[128],
156 PyObject *fallback_fn)
156 PyObject *fallback_fn)
157 {
157 {
158 char *str, *newstr;
158 char *str, *newstr;
159 Py_ssize_t i, len;
159 Py_ssize_t i, len;
160 PyObject *newobj = NULL;
160 PyObject *newobj = NULL;
161 PyObject *ret = NULL;
161 PyObject *ret = NULL;
162
162
163 str = PyBytes_AS_STRING(str_obj);
163 str = PyBytes_AS_STRING(str_obj);
164 len = PyBytes_GET_SIZE(str_obj);
164 len = PyBytes_GET_SIZE(str_obj);
165
165
166 newobj = PyBytes_FromStringAndSize(NULL, len);
166 newobj = PyBytes_FromStringAndSize(NULL, len);
167 if (!newobj)
167 if (!newobj)
168 goto quit;
168 goto quit;
169
169
170 newstr = PyBytes_AS_STRING(newobj);
170 newstr = PyBytes_AS_STRING(newobj);
171
171
172 for (i = 0; i < len; i++) {
172 for (i = 0; i < len; i++) {
173 char c = str[i];
173 char c = str[i];
174 if (c & 0x80) {
174 if (c & 0x80) {
175 if (fallback_fn != NULL) {
175 if (fallback_fn != NULL) {
176 ret = PyObject_CallFunctionObjArgs(fallback_fn,
176 ret = PyObject_CallFunctionObjArgs(fallback_fn,
177 str_obj, NULL);
177 str_obj, NULL);
178 } else {
178 } else {
179 PyObject *err = PyUnicodeDecodeError_Create(
179 PyObject *err = PyUnicodeDecodeError_Create(
180 "ascii", str, len, i, (i + 1),
180 "ascii", str, len, i, (i + 1),
181 "unexpected code byte");
181 "unexpected code byte");
182 PyErr_SetObject(PyExc_UnicodeDecodeError, err);
182 PyErr_SetObject(PyExc_UnicodeDecodeError, err);
183 Py_XDECREF(err);
183 Py_XDECREF(err);
184 }
184 }
185 goto quit;
185 goto quit;
186 }
186 }
187 newstr[i] = table[(unsigned char)c];
187 newstr[i] = table[(unsigned char)c];
188 }
188 }
189
189
190 ret = newobj;
190 ret = newobj;
191 Py_INCREF(ret);
191 Py_INCREF(ret);
192 quit:
192 quit:
193 Py_XDECREF(newobj);
193 Py_XDECREF(newobj);
194 return ret;
194 return ret;
195 }
195 }
196
196
197 PyObject *asciilower(PyObject *self, PyObject *args)
197 PyObject *asciilower(PyObject *self, PyObject *args)
198 {
198 {
199 PyObject *str_obj;
199 PyObject *str_obj;
200 if (!PyArg_ParseTuple(args, "O!:asciilower", &PyBytes_Type, &str_obj))
200 if (!PyArg_ParseTuple(args, "O!:asciilower", &PyBytes_Type, &str_obj))
201 return NULL;
201 return NULL;
202 return _asciitransform(str_obj, lowertable, NULL);
202 return _asciitransform(str_obj, lowertable, NULL);
203 }
203 }
204
204
205 PyObject *asciiupper(PyObject *self, PyObject *args)
205 PyObject *asciiupper(PyObject *self, PyObject *args)
206 {
206 {
207 PyObject *str_obj;
207 PyObject *str_obj;
208 if (!PyArg_ParseTuple(args, "O!:asciiupper", &PyBytes_Type, &str_obj))
208 if (!PyArg_ParseTuple(args, "O!:asciiupper", &PyBytes_Type, &str_obj))
209 return NULL;
209 return NULL;
210 return _asciitransform(str_obj, uppertable, NULL);
210 return _asciitransform(str_obj, uppertable, NULL);
211 }
211 }
212
212
213 PyObject *make_file_foldmap(PyObject *self, PyObject *args)
213 PyObject *make_file_foldmap(PyObject *self, PyObject *args)
214 {
214 {
215 PyObject *dmap, *spec_obj, *normcase_fallback;
215 PyObject *dmap, *spec_obj, *normcase_fallback;
216 PyObject *file_foldmap = NULL;
216 PyObject *file_foldmap = NULL;
217 enum normcase_spec spec;
217 enum normcase_spec spec;
218 PyObject *k, *v;
218 PyObject *k, *v;
219 dirstateTupleObject *tuple;
219 dirstateTupleObject *tuple;
220 Py_ssize_t pos = 0;
220 Py_ssize_t pos = 0;
221 const char *table;
221 const char *table;
222
222
223 if (!PyArg_ParseTuple(args, "O!O!O!:make_file_foldmap",
223 if (!PyArg_ParseTuple(args, "O!O!O!:make_file_foldmap",
224 &PyDict_Type, &dmap,
224 &PyDict_Type, &dmap,
225 &PyInt_Type, &spec_obj,
225 &PyInt_Type, &spec_obj,
226 &PyFunction_Type, &normcase_fallback))
226 &PyFunction_Type, &normcase_fallback))
227 goto quit;
227 goto quit;
228
228
229 spec = (int)PyInt_AS_LONG(spec_obj);
229 spec = (int)PyInt_AS_LONG(spec_obj);
230 switch (spec) {
230 switch (spec) {
231 case NORMCASE_LOWER:
231 case NORMCASE_LOWER:
232 table = lowertable;
232 table = lowertable;
233 break;
233 break;
234 case NORMCASE_UPPER:
234 case NORMCASE_UPPER:
235 table = uppertable;
235 table = uppertable;
236 break;
236 break;
237 case NORMCASE_OTHER:
237 case NORMCASE_OTHER:
238 table = NULL;
238 table = NULL;
239 break;
239 break;
240 default:
240 default:
241 PyErr_SetString(PyExc_TypeError, "invalid normcasespec");
241 PyErr_SetString(PyExc_TypeError, "invalid normcasespec");
242 goto quit;
242 goto quit;
243 }
243 }
244
244
245 /* Add some more entries to deal with additions outside this
245 /* Add some more entries to deal with additions outside this
246 function. */
246 function. */
247 file_foldmap = _dict_new_presized((PyDict_Size(dmap) / 10) * 11);
247 file_foldmap = _dict_new_presized((PyDict_Size(dmap) / 10) * 11);
248 if (file_foldmap == NULL)
248 if (file_foldmap == NULL)
249 goto quit;
249 goto quit;
250
250
251 while (PyDict_Next(dmap, &pos, &k, &v)) {
251 while (PyDict_Next(dmap, &pos, &k, &v)) {
252 if (!dirstate_tuple_check(v)) {
252 if (!dirstate_tuple_check(v)) {
253 PyErr_SetString(PyExc_TypeError,
253 PyErr_SetString(PyExc_TypeError,
254 "expected a dirstate tuple");
254 "expected a dirstate tuple");
255 goto quit;
255 goto quit;
256 }
256 }
257
257
258 tuple = (dirstateTupleObject *)v;
258 tuple = (dirstateTupleObject *)v;
259 if (tuple->state != 'r') {
259 if (tuple->state != 'r') {
260 PyObject *normed;
260 PyObject *normed;
261 if (table != NULL) {
261 if (table != NULL) {
262 normed = _asciitransform(k, table,
262 normed = _asciitransform(k, table,
263 normcase_fallback);
263 normcase_fallback);
264 } else {
264 } else {
265 normed = PyObject_CallFunctionObjArgs(
265 normed = PyObject_CallFunctionObjArgs(
266 normcase_fallback, k, NULL);
266 normcase_fallback, k, NULL);
267 }
267 }
268
268
269 if (normed == NULL)
269 if (normed == NULL)
270 goto quit;
270 goto quit;
271 if (PyDict_SetItem(file_foldmap, normed, k) == -1) {
271 if (PyDict_SetItem(file_foldmap, normed, k) == -1) {
272 Py_DECREF(normed);
272 Py_DECREF(normed);
273 goto quit;
273 goto quit;
274 }
274 }
275 Py_DECREF(normed);
275 Py_DECREF(normed);
276 }
276 }
277 }
277 }
278 return file_foldmap;
278 return file_foldmap;
279 quit:
279 quit:
280 Py_XDECREF(file_foldmap);
280 Py_XDECREF(file_foldmap);
281 return NULL;
281 return NULL;
282 }
282 }
283
283
284 /* calculate length of JSON-escaped string; returns -1 if unsupported */
284 /* calculate length of JSON-escaped string; returns -1 if unsupported */
285 static Py_ssize_t jsonescapelen(const char *buf, Py_ssize_t len, bool paranoid)
285 static Py_ssize_t jsonescapelen(const char *buf, Py_ssize_t len, bool paranoid)
286 {
286 {
287 Py_ssize_t i, esclen = 0;
287 Py_ssize_t i, esclen = 0;
288
288
289 if (paranoid) {
289 if (paranoid) {
290 /* don't want to process multi-byte escapes in C */
290 /* don't want to process multi-byte escapes in C */
291 for (i = 0; i < len; i++) {
291 for (i = 0; i < len; i++) {
292 char c = buf[i];
292 char c = buf[i];
293 if (c & 0x80) {
293 if (c & 0x80) {
294 PyErr_SetString(PyExc_ValueError,
294 PyErr_SetString(PyExc_ValueError,
295 "cannot process non-ascii str");
295 "cannot process non-ascii str");
296 return -1;
296 return -1;
297 }
297 }
298 esclen += jsonparanoidlentable[(unsigned char)c];
298 esclen += jsonparanoidlentable[(unsigned char)c];
299 if (esclen < 0) {
299 if (esclen < 0) {
300 PyErr_SetString(PyExc_MemoryError,
300 PyErr_SetString(PyExc_MemoryError,
301 "overflow in jsonescapelen");
301 "overflow in jsonescapelen");
302 return -1;
302 return -1;
303 }
303 }
304 }
304 }
305 } else {
305 } else {
306 for (i = 0; i < len; i++) {
306 for (i = 0; i < len; i++) {
307 char c = buf[i];
307 char c = buf[i];
308 esclen += jsonlentable[(unsigned char)c];
308 esclen += jsonlentable[(unsigned char)c];
309 if (esclen < 0) {
309 if (esclen < 0) {
310 PyErr_SetString(PyExc_MemoryError,
310 PyErr_SetString(PyExc_MemoryError,
311 "overflow in jsonescapelen");
311 "overflow in jsonescapelen");
312 return -1;
312 return -1;
313 }
313 }
314 }
314 }
315 }
315 }
316
316
317 return esclen;
317 return esclen;
318 }
318 }
319
319
320 /* map '\<c>' escape character */
320 /* map '\<c>' escape character */
321 static char jsonescapechar2(char c)
321 static char jsonescapechar2(char c)
322 {
322 {
323 switch (c) {
323 switch (c) {
324 case '\b':
324 case '\b':
325 return 'b';
325 return 'b';
326 case '\t':
326 case '\t':
327 return 't';
327 return 't';
328 case '\n':
328 case '\n':
329 return 'n';
329 return 'n';
330 case '\f':
330 case '\f':
331 return 'f';
331 return 'f';
332 case '\r':
332 case '\r':
333 return 'r';
333 return 'r';
334 case '"':
334 case '"':
335 return '"';
335 return '"';
336 case '\\':
336 case '\\':
337 return '\\';
337 return '\\';
338 }
338 }
339 return '\0'; /* should not happen */
339 return '\0'; /* should not happen */
340 }
340 }
341
341
342 /* convert 'origbuf' to JSON-escaped form 'escbuf'; 'origbuf' should only
342 /* convert 'origbuf' to JSON-escaped form 'escbuf'; 'origbuf' should only
343 include characters mappable by json(paranoid)lentable */
343 include characters mappable by json(paranoid)lentable */
344 static void encodejsonescape(char *escbuf, Py_ssize_t esclen,
344 static void encodejsonescape(char *escbuf, Py_ssize_t esclen,
345 const char *origbuf, Py_ssize_t origlen,
345 const char *origbuf, Py_ssize_t origlen,
346 bool paranoid)
346 bool paranoid)
347 {
347 {
348 const uint8_t *lentable =
348 const uint8_t *lentable =
349 (paranoid) ? jsonparanoidlentable : jsonlentable;
349 (paranoid) ? jsonparanoidlentable : jsonlentable;
350 Py_ssize_t i, j;
350 Py_ssize_t i, j;
351
351
352 for (i = 0, j = 0; i < origlen; i++) {
352 for (i = 0, j = 0; i < origlen; i++) {
353 char c = origbuf[i];
353 char c = origbuf[i];
354 uint8_t l = lentable[(unsigned char)c];
354 uint8_t l = lentable[(unsigned char)c];
355 assert(j + l <= esclen);
355 assert(j + l <= esclen);
356 switch (l) {
356 switch (l) {
357 case 1:
357 case 1:
358 escbuf[j] = c;
358 escbuf[j] = c;
359 break;
359 break;
360 case 2:
360 case 2:
361 escbuf[j] = '\\';
361 escbuf[j] = '\\';
362 escbuf[j + 1] = jsonescapechar2(c);
362 escbuf[j + 1] = jsonescapechar2(c);
363 break;
363 break;
364 case 6:
364 case 6:
365 memcpy(escbuf + j, "\\u00", 4);
365 memcpy(escbuf + j, "\\u00", 4);
366 escbuf[j + 4] = hexchartable[(unsigned char)c >> 4];
366 escbuf[j + 4] = hexchartable[(unsigned char)c >> 4];
367 escbuf[j + 5] = hexchartable[(unsigned char)c & 0xf];
367 escbuf[j + 5] = hexchartable[(unsigned char)c & 0xf];
368 break;
368 break;
369 }
369 }
370 j += l;
370 j += l;
371 }
371 }
372 }
372 }
373
373
374 PyObject *jsonescapeu8fast(PyObject *self, PyObject *args)
374 PyObject *jsonescapeu8fast(PyObject *self, PyObject *args)
375 {
375 {
376 PyObject *origstr, *escstr;
376 PyObject *origstr, *escstr;
377 const char *origbuf;
377 const char *origbuf;
378 Py_ssize_t origlen, esclen;
378 Py_ssize_t origlen, esclen;
379 int paranoid;
379 int paranoid;
380 if (!PyArg_ParseTuple(args, "O!i:jsonescapeu8fast",
380 if (!PyArg_ParseTuple(args, "O!i:jsonescapeu8fast",
381 &PyBytes_Type, &origstr, &paranoid))
381 &PyBytes_Type, &origstr, &paranoid))
382 return NULL;
382 return NULL;
383
383
384 origbuf = PyBytes_AS_STRING(origstr);
384 origbuf = PyBytes_AS_STRING(origstr);
385 origlen = PyBytes_GET_SIZE(origstr);
385 origlen = PyBytes_GET_SIZE(origstr);
386 esclen = jsonescapelen(origbuf, origlen, paranoid);
386 esclen = jsonescapelen(origbuf, origlen, paranoid);
387 if (esclen < 0)
387 if (esclen < 0)
388 return NULL; /* unsupported char found or overflow */
388 return NULL; /* unsupported char found or overflow */
389 if (origlen == esclen) {
389 if (origlen == esclen) {
390 Py_INCREF(origstr);
390 Py_INCREF(origstr);
391 return origstr;
391 return origstr;
392 }
392 }
393
393
394 escstr = PyBytes_FromStringAndSize(NULL, esclen);
394 escstr = PyBytes_FromStringAndSize(NULL, esclen);
395 if (!escstr)
395 if (!escstr)
396 return NULL;
396 return NULL;
397 encodejsonescape(PyBytes_AS_STRING(escstr), esclen, origbuf, origlen,
397 encodejsonescape(PyBytes_AS_STRING(escstr), esclen, origbuf, origlen,
398 paranoid);
398 paranoid);
399
399
400 return escstr;
400 return escstr;
401 }
401 }
@@ -1,59 +1,61 b''
1 /*
1 /*
2 charencode.h - miscellaneous character encoding
2 charencode.h - miscellaneous character encoding
3
3
4 This software may be used and distributed according to the terms of
4 This software may be used and distributed according to the terms of
5 the GNU General Public License, incorporated herein by reference.
5 the GNU General Public License, incorporated herein by reference.
6 */
6 */
7
7
8 #ifndef _HG_CHARENCODE_H_
8 #ifndef _HG_CHARENCODE_H_
9 #define _HG_CHARENCODE_H_
9 #define _HG_CHARENCODE_H_
10
10
11 #include <Python.h>
11 #include <Python.h>
12 #include "compat.h"
12 #include "compat.h"
13
13
14 /* This should be kept in sync with normcasespecs in encoding.py. */
14 /* This should be kept in sync with normcasespecs in encoding.py. */
15 enum normcase_spec {
15 enum normcase_spec {
16 NORMCASE_LOWER = -1,
16 NORMCASE_LOWER = -1,
17 NORMCASE_UPPER = 1,
17 NORMCASE_UPPER = 1,
18 NORMCASE_OTHER = 0
18 NORMCASE_OTHER = 0
19 };
19 };
20
20
21 PyObject *unhexlify(const char *str, Py_ssize_t len);
21 PyObject *unhexlify(const char *str, Py_ssize_t len);
22 PyObject *isasciistr(PyObject *self, PyObject *args);
22 PyObject *isasciistr(PyObject *self, PyObject *args);
23 PyObject *asciilower(PyObject *self, PyObject *args);
23 PyObject *asciilower(PyObject *self, PyObject *args);
24 PyObject *asciiupper(PyObject *self, PyObject *args);
24 PyObject *asciiupper(PyObject *self, PyObject *args);
25 PyObject *make_file_foldmap(PyObject *self, PyObject *args);
25 PyObject *make_file_foldmap(PyObject *self, PyObject *args);
26 PyObject *jsonescapeu8fast(PyObject *self, PyObject *args);
26 PyObject *jsonescapeu8fast(PyObject *self, PyObject *args);
27
27
28 /* clang-format off */
28 static const int8_t hextable[256] = {
29 static const int8_t hextable[256] = {
29 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
30 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
30 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
31 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
31 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
32 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
32 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, /* 0-9 */
33 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, /* 0-9 */
33 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* A-F */
34 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* A-F */
34 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
35 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
35 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* a-f */
36 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* a-f */
36 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
37 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
37 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
38 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
38 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
39 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
39 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
40 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
40 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
41 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
41 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
42 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
42 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
43 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
43 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
44 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
44 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
45 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
45 };
46 };
47 /* clang-format on */
46
48
47 static inline int hexdigit(const char *p, Py_ssize_t off)
49 static inline int hexdigit(const char *p, Py_ssize_t off)
48 {
50 {
49 int8_t val = hextable[(unsigned char)p[off]];
51 int8_t val = hextable[(unsigned char)p[off]];
50
52
51 if (val >= 0) {
53 if (val >= 0) {
52 return val;
54 return val;
53 }
55 }
54
56
55 PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
57 PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
56 return 0;
58 return 0;
57 }
59 }
58
60
59 #endif /* _HG_CHARENCODE_H_ */
61 #endif /* _HG_CHARENCODE_H_ */
General Comments 0
You need to be logged in to leave comments. Login now