##// END OF EJS Templates
tests: alter email `From` line to a value that's consistently parsed...
tests: alter email `From` line to a value that's consistently parsed Python2: >>> email.header.decode_header('=?UTF-8?q?Rapha=C3=ABl=20Hertzog?= <hertzog@debian.org>') [('Rapha\xc3\xabl Hertzog', 'utf-8'), ('<hertzog@debian.org>', None)] Python3: >>> email.header.decode_header('=?UTF-8?q?Rapha=C3=ABl=20Hertzog?= <hertzog@debian.org>') [(b'Rapha\xc3\xabl Hertzog', 'utf-8'), (b' <hertzog@debian.org>', None)] So alter the input to an input that parses to the same result consistently. After skimming the relevant RFC (1342), I'm not sure if what we had was valid, or how I could modify it to be more consistent. Differential Revision: https://phab.mercurial-scm.org/D5769

File last commit:

r36075:81199632 default
r41641:8c13f7b0 default
Show More
charencode.h
61 lines | 2.1 KiB | text/x-c | CLexer
/*
charencode.h - miscellaneous character encoding
This software may be used and distributed according to the terms of
the GNU General Public License, incorporated herein by reference.
*/
#ifndef _HG_CHARENCODE_H_
#define _HG_CHARENCODE_H_
#include <Python.h>
#include "compat.h"
/* This should be kept in sync with normcasespecs in encoding.py. */
enum normcase_spec {
NORMCASE_LOWER = -1,
NORMCASE_UPPER = 1,
NORMCASE_OTHER = 0
};
PyObject *unhexlify(const char *str, Py_ssize_t len);
PyObject *isasciistr(PyObject *self, PyObject *args);
PyObject *asciilower(PyObject *self, PyObject *args);
PyObject *asciiupper(PyObject *self, PyObject *args);
PyObject *make_file_foldmap(PyObject *self, PyObject *args);
PyObject *jsonescapeu8fast(PyObject *self, PyObject *args);
/* clang-format off */
static const int8_t hextable[256] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, /* 0-9 */
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* A-F */
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* a-f */
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
};
/* clang-format on */
static inline int hexdigit(const char *p, Py_ssize_t off)
{
int8_t val = hextable[(unsigned char)p[off]];
if (val >= 0) {
return val;
}
PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
return 0;
}
#endif /* _HG_CHARENCODE_H_ */