##// END OF EJS Templates
parsers: add a function to efficiently lowercase ASCII strings...
Siddharth Agarwal -
r22778:80f2b63d default
parent child Browse files
Show More
@@ -5,7 +5,7 b''
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 import error
8 import error, parsers
9 import unicodedata, locale, os
9 import unicodedata, locale, os
10
10
11 def _getpreferredencoding():
11 def _getpreferredencoding():
@@ -258,6 +258,15 b" def trim(s, width, ellipsis='', leftside"
258 return concat(usub.encode(encoding))
258 return concat(usub.encode(encoding))
259 return ellipsis # no enough room for multi-column characters
259 return ellipsis # no enough room for multi-column characters
260
260
261 def asciilower(s):
262 '''convert a string to lowercase if ASCII
263
264 Raises UnicodeDecodeError if non-ASCII characters are found.'''
265 s.decode('ascii')
266 return s.lower()
267
268 asciilower = getattr(parsers, 'asciilower', asciilower)
269
261 def lower(s):
270 def lower(s):
262 "best-effort encoding-aware case-folding of local string s"
271 "best-effort encoding-aware case-folding of local string s"
263 try:
272 try:
@@ -35,6 +35,27 b' static int8_t hextable[256] = {'
35 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
35 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
36 };
36 };
37
37
38 static char lowertable[128] = {
39 '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
40 '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
41 '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
42 '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
43 '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
44 '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
45 '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
46 '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
47 '\x40',
48 '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67', /* A-G */
49 '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f', /* H-O */
50 '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77', /* P-W */
51 '\x78', '\x79', '\x7a', /* X-Z */
52 '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
53 '\x60', '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67',
54 '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f',
55 '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77',
56 '\x78', '\x79', '\x7a', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f'
57 };
58
38 static inline int hexdigit(const char *p, Py_ssize_t off)
59 static inline int hexdigit(const char *p, Py_ssize_t off)
39 {
60 {
40 int8_t val = hextable[(unsigned char)p[off]];
61 int8_t val = hextable[(unsigned char)p[off]];
@@ -72,6 +93,39 b' static PyObject *unhexlify(const char *s'
72 return ret;
93 return ret;
73 }
94 }
74
95
96 static PyObject *asciilower(PyObject *self, PyObject *args)
97 {
98 char *str, *newstr;
99 int i, len;
100 PyObject *newobj = NULL;
101
102 if (!PyArg_ParseTuple(args, "s#", &str, &len))
103 goto quit;
104
105 newobj = PyBytes_FromStringAndSize(NULL, len);
106 if (!newobj)
107 goto quit;
108
109 newstr = PyBytes_AS_STRING(newobj);
110
111 for (i = 0; i < len; i++) {
112 char c = str[i];
113 if (c & 0x80) {
114 PyObject *err = PyUnicodeDecodeError_Create(
115 "ascii", str, len, i, (i + 1),
116 "unexpected code byte");
117 PyErr_SetObject(PyExc_UnicodeDecodeError, err);
118 goto quit;
119 }
120 newstr[i] = lowertable[(unsigned char)c];
121 }
122
123 return newobj;
124 quit:
125 Py_XDECREF(newobj);
126 return NULL;
127 }
128
75 /*
129 /*
76 * This code assumes that a manifest is stitched together with newline
130 * This code assumes that a manifest is stitched together with newline
77 * ('\n') characters.
131 * ('\n') characters.
@@ -2165,6 +2219,7 b' static PyMethodDef methods[] = {'
2165 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
2219 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
2166 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
2220 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
2167 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
2221 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
2222 {"asciilower", asciilower, METH_VARARGS, "lowercase an ASCII string\n"},
2168 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
2223 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
2169 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
2224 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
2170 {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
2225 {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
General Comments 0
You need to be logged in to leave comments. Login now