pathencode.c
802 lines
| 17.0 KiB
| text/x-c
|
CLexer
Yuya Nishihara
|
r32372 | /* | ||
pathencode.c - efficient path name encoding | ||||
Copyright 2012 Facebook | ||||
This software may be used and distributed according to the terms of | ||||
the GNU General Public License, incorporated herein by reference. | ||||
*/ | ||||
/* | ||||
* An implementation of the name encoding scheme used by the fncache | ||||
* store. The common case is of a path < 120 bytes long, which is | ||||
* handled either in a single pass with no allocations or two passes | ||||
* with a single allocation. For longer paths, multiple passes are | ||||
* required. | ||||
*/ | ||||
#define PY_SSIZE_T_CLEAN | ||||
#include <Python.h> | ||||
#include <assert.h> | ||||
#include <ctype.h> | ||||
#include <stdlib.h> | ||||
#include <string.h> | ||||
Victor Stinner
|
r47090 | #include "pythoncapi_compat.h" | ||
Yuya Nishihara
|
r32372 | |||
#include "util.h" | ||||
/* state machine for the fast path */ | ||||
enum path_state { | ||||
Augie Fackler
|
r36073 | START, /* first byte of a path component */ | ||
A, /* "AUX" */ | ||||
Yuya Nishihara
|
r32372 | AU, | ||
Augie Fackler
|
r36073 | THIRD, /* third of a 3-byte sequence, e.g. "AUX", "NUL" */ | ||
C, /* "CON" or "COMn" */ | ||||
Yuya Nishihara
|
r32372 | CO, | ||
Augie Fackler
|
r36073 | COMLPT, /* "COM" or "LPT" */ | ||
Yuya Nishihara
|
r32372 | COMLPTn, | ||
L, | ||||
LP, | ||||
N, | ||||
NU, | ||||
Augie Fackler
|
r36073 | P, /* "PRN" */ | ||
Yuya Nishihara
|
r32372 | PR, | ||
Augie Fackler
|
r36073 | LDOT, /* leading '.' */ | ||
DOT, /* '.' in a non-leading position */ | ||||
H, /* ".h" */ | ||||
HGDI, /* ".hg", ".d", or ".i" */ | ||||
Yuya Nishihara
|
r32372 | SPACE, | ||
Augie Fackler
|
r36073 | DEFAULT, /* byte of a path component after the first */ | ||
Yuya Nishihara
|
r32372 | }; | ||
/* state machine for dir-encoding */ | ||||
enum dir_state { | ||||
DDOT, | ||||
DH, | ||||
DHGDI, | ||||
Augie Fackler
|
r36073 | DDEFAULT, | ||
Yuya Nishihara
|
r32372 | }; | ||
static inline int inset(const uint32_t bitset[], char c) | ||||
{ | ||||
return bitset[((uint8_t)c) >> 5] & (1 << (((uint8_t)c) & 31)); | ||||
} | ||||
static inline void charcopy(char *dest, Py_ssize_t *destlen, size_t destsize, | ||||
char c) | ||||
{ | ||||
if (dest) { | ||||
assert(*destlen < destsize); | ||||
dest[*destlen] = c; | ||||
} | ||||
(*destlen)++; | ||||
} | ||||
static inline void memcopy(char *dest, Py_ssize_t *destlen, size_t destsize, | ||||
const void *src, Py_ssize_t len) | ||||
{ | ||||
if (dest) { | ||||
assert(*destlen + len < destsize); | ||||
memcpy((void *)&dest[*destlen], src, len); | ||||
} | ||||
*destlen += len; | ||||
} | ||||
static inline void hexencode(char *dest, Py_ssize_t *destlen, size_t destsize, | ||||
Augie Fackler
|
r36073 | uint8_t c) | ||
Yuya Nishihara
|
r32372 | { | ||
static const char hexdigit[] = "0123456789abcdef"; | ||||
charcopy(dest, destlen, destsize, hexdigit[c >> 4]); | ||||
charcopy(dest, destlen, destsize, hexdigit[c & 15]); | ||||
} | ||||
/* 3-byte escape: tilde followed by two hex digits */ | ||||
static inline void escape3(char *dest, Py_ssize_t *destlen, size_t destsize, | ||||
Augie Fackler
|
r36073 | char c) | ||
Yuya Nishihara
|
r32372 | { | ||
charcopy(dest, destlen, destsize, '~'); | ||||
hexencode(dest, destlen, destsize, c); | ||||
} | ||||
Augie Fackler
|
r36073 | static Py_ssize_t _encodedir(char *dest, size_t destsize, const char *src, | ||
Py_ssize_t len) | ||||
Yuya Nishihara
|
r32372 | { | ||
enum dir_state state = DDEFAULT; | ||||
Py_ssize_t i = 0, destlen = 0; | ||||
while (i < len) { | ||||
switch (state) { | ||||
case DDOT: | ||||
switch (src[i]) { | ||||
case 'd': | ||||
case 'i': | ||||
state = DHGDI; | ||||
charcopy(dest, &destlen, destsize, src[i++]); | ||||
break; | ||||
case 'h': | ||||
state = DH; | ||||
charcopy(dest, &destlen, destsize, src[i++]); | ||||
break; | ||||
default: | ||||
state = DDEFAULT; | ||||
break; | ||||
} | ||||
break; | ||||
case DH: | ||||
if (src[i] == 'g') { | ||||
state = DHGDI; | ||||
charcopy(dest, &destlen, destsize, src[i++]); | ||||
Augie Fackler
|
r41367 | } else { | ||
Augie Fackler
|
r36073 | state = DDEFAULT; | ||
Augie Fackler
|
r41367 | } | ||
Yuya Nishihara
|
r32372 | break; | ||
case DHGDI: | ||||
if (src[i] == '/') { | ||||
memcopy(dest, &destlen, destsize, ".hg", 3); | ||||
charcopy(dest, &destlen, destsize, src[i++]); | ||||
} | ||||
state = DDEFAULT; | ||||
break; | ||||
case DDEFAULT: | ||||
Augie Fackler
|
r41367 | if (src[i] == '.') { | ||
Yuya Nishihara
|
r32372 | state = DDOT; | ||
Augie Fackler
|
r41367 | } | ||
Yuya Nishihara
|
r32372 | charcopy(dest, &destlen, destsize, src[i++]); | ||
break; | ||||
} | ||||
} | ||||
return destlen; | ||||
} | ||||
PyObject *encodedir(PyObject *self, PyObject *args) | ||||
{ | ||||
Py_ssize_t len, newlen; | ||||
PyObject *pathobj, *newobj; | ||||
char *path; | ||||
Augie Fackler
|
r41367 | if (!PyArg_ParseTuple(args, "O:encodedir", &pathobj)) { | ||
Yuya Nishihara
|
r32372 | return NULL; | ||
Augie Fackler
|
r41367 | } | ||
Yuya Nishihara
|
r32372 | |||
if (PyBytes_AsStringAndSize(pathobj, &path, &len) == -1) { | ||||
PyErr_SetString(PyExc_TypeError, "expected a string"); | ||||
return NULL; | ||||
} | ||||
newlen = len ? _encodedir(NULL, 0, path, len + 1) : 1; | ||||
if (newlen == len + 1) { | ||||
Py_INCREF(pathobj); | ||||
return pathobj; | ||||
} | ||||
newobj = PyBytes_FromStringAndSize(NULL, newlen); | ||||
if (newobj) { | ||||
assert(PyBytes_Check(newobj)); | ||||
Mads Kiilerich
|
r49153 | Py_SET_SIZE(newobj, Py_SIZE(newobj) - 1); | ||
Augie Fackler
|
r36073 | _encodedir(PyBytes_AS_STRING(newobj), newlen, path, len + 1); | ||
Yuya Nishihara
|
r32372 | } | ||
return newobj; | ||||
} | ||||
static Py_ssize_t _encode(const uint32_t twobytes[8], const uint32_t onebyte[8], | ||||
Augie Fackler
|
r36073 | char *dest, Py_ssize_t destlen, size_t destsize, | ||
const char *src, Py_ssize_t len, int encodedir) | ||||
Yuya Nishihara
|
r32372 | { | ||
enum path_state state = START; | ||||
Py_ssize_t i = 0; | ||||
/* | ||||
* Python strings end with a zero byte, which we use as a | ||||
* terminal token as they are not valid inside path names. | ||||
*/ | ||||
while (i < len) { | ||||
switch (state) { | ||||
case START: | ||||
switch (src[i]) { | ||||
case '/': | ||||
charcopy(dest, &destlen, destsize, src[i++]); | ||||
break; | ||||
case '.': | ||||
state = LDOT; | ||||
escape3(dest, &destlen, destsize, src[i++]); | ||||
break; | ||||
case ' ': | ||||
state = DEFAULT; | ||||
escape3(dest, &destlen, destsize, src[i++]); | ||||
break; | ||||
case 'a': | ||||
state = A; | ||||
charcopy(dest, &destlen, destsize, src[i++]); | ||||
break; | ||||
case 'c': | ||||
state = C; | ||||
charcopy(dest, &destlen, destsize, src[i++]); | ||||
break; | ||||
case 'l': | ||||
state = L; | ||||
charcopy(dest, &destlen, destsize, src[i++]); | ||||
break; | ||||
case 'n': | ||||
state = N; | ||||
charcopy(dest, &destlen, destsize, src[i++]); | ||||
break; | ||||
case 'p': | ||||
state = P; | ||||
charcopy(dest, &destlen, destsize, src[i++]); | ||||
break; | ||||
default: | ||||
state = DEFAULT; | ||||
break; | ||||
} | ||||
break; | ||||
case A: | ||||
if (src[i] == 'u') { | ||||
state = AU; | ||||
charcopy(dest, &destlen, destsize, src[i++]); | ||||
Augie Fackler
|
r41367 | } else { | ||
Augie Fackler
|
r36073 | state = DEFAULT; | ||
Augie Fackler
|
r41367 | } | ||
Yuya Nishihara
|
r32372 | break; | ||
case AU: | ||||
if (src[i] == 'x') { | ||||
state = THIRD; | ||||
i++; | ||||
Augie Fackler
|
r41367 | } else { | ||
Augie Fackler
|
r36073 | state = DEFAULT; | ||
Augie Fackler
|
r41367 | } | ||
Yuya Nishihara
|
r32372 | break; | ||
case THIRD: | ||||
state = DEFAULT; | ||||
switch (src[i]) { | ||||
case '.': | ||||
case '/': | ||||
case '\0': | ||||
escape3(dest, &destlen, destsize, src[i - 1]); | ||||
break; | ||||
default: | ||||
i--; | ||||
break; | ||||
} | ||||
break; | ||||
case C: | ||||
if (src[i] == 'o') { | ||||
state = CO; | ||||
charcopy(dest, &destlen, destsize, src[i++]); | ||||
Augie Fackler
|
r41367 | } else { | ||
Augie Fackler
|
r36073 | state = DEFAULT; | ||
Augie Fackler
|
r41367 | } | ||
Yuya Nishihara
|
r32372 | break; | ||
case CO: | ||||
if (src[i] == 'm') { | ||||
state = COMLPT; | ||||
i++; | ||||
Augie Fackler
|
r36073 | } else if (src[i] == 'n') { | ||
Yuya Nishihara
|
r32372 | state = THIRD; | ||
i++; | ||||
Augie Fackler
|
r41367 | } else { | ||
Augie Fackler
|
r36073 | state = DEFAULT; | ||
Augie Fackler
|
r41367 | } | ||
Yuya Nishihara
|
r32372 | break; | ||
case COMLPT: | ||||
switch (src[i]) { | ||||
Augie Fackler
|
r36073 | case '1': | ||
case '2': | ||||
case '3': | ||||
case '4': | ||||
case '5': | ||||
case '6': | ||||
case '7': | ||||
case '8': | ||||
case '9': | ||||
Yuya Nishihara
|
r32372 | state = COMLPTn; | ||
i++; | ||||
break; | ||||
default: | ||||
state = DEFAULT; | ||||
charcopy(dest, &destlen, destsize, src[i - 1]); | ||||
break; | ||||
} | ||||
break; | ||||
case COMLPTn: | ||||
state = DEFAULT; | ||||
switch (src[i]) { | ||||
case '.': | ||||
case '/': | ||||
case '\0': | ||||
escape3(dest, &destlen, destsize, src[i - 2]); | ||||
charcopy(dest, &destlen, destsize, src[i - 1]); | ||||
break; | ||||
default: | ||||
Augie Fackler
|
r36073 | memcopy(dest, &destlen, destsize, &src[i - 2], | ||
2); | ||||
Yuya Nishihara
|
r32372 | break; | ||
} | ||||
break; | ||||
case L: | ||||
if (src[i] == 'p') { | ||||
state = LP; | ||||
charcopy(dest, &destlen, destsize, src[i++]); | ||||
Augie Fackler
|
r41367 | } else { | ||
Augie Fackler
|
r36073 | state = DEFAULT; | ||
Augie Fackler
|
r41367 | } | ||
Yuya Nishihara
|
r32372 | break; | ||
case LP: | ||||
if (src[i] == 't') { | ||||
state = COMLPT; | ||||
i++; | ||||
Augie Fackler
|
r41367 | } else { | ||
Augie Fackler
|
r36073 | state = DEFAULT; | ||
Augie Fackler
|
r41367 | } | ||
Yuya Nishihara
|
r32372 | break; | ||
case N: | ||||
if (src[i] == 'u') { | ||||
state = NU; | ||||
charcopy(dest, &destlen, destsize, src[i++]); | ||||
Augie Fackler
|
r41367 | } else { | ||
Augie Fackler
|
r36073 | state = DEFAULT; | ||
Augie Fackler
|
r41367 | } | ||
Yuya Nishihara
|
r32372 | break; | ||
case NU: | ||||
if (src[i] == 'l') { | ||||
state = THIRD; | ||||
i++; | ||||
Augie Fackler
|
r41367 | } else { | ||
Augie Fackler
|
r36073 | state = DEFAULT; | ||
Augie Fackler
|
r41367 | } | ||
Yuya Nishihara
|
r32372 | break; | ||
case P: | ||||
if (src[i] == 'r') { | ||||
state = PR; | ||||
charcopy(dest, &destlen, destsize, src[i++]); | ||||
Augie Fackler
|
r41367 | } else { | ||
Augie Fackler
|
r36073 | state = DEFAULT; | ||
Augie Fackler
|
r41367 | } | ||
Yuya Nishihara
|
r32372 | break; | ||
case PR: | ||||
if (src[i] == 'n') { | ||||
state = THIRD; | ||||
i++; | ||||
Augie Fackler
|
r41367 | } else { | ||
Augie Fackler
|
r36073 | state = DEFAULT; | ||
Augie Fackler
|
r41367 | } | ||
Yuya Nishihara
|
r32372 | break; | ||
case LDOT: | ||||
switch (src[i]) { | ||||
case 'd': | ||||
case 'i': | ||||
state = HGDI; | ||||
charcopy(dest, &destlen, destsize, src[i++]); | ||||
break; | ||||
case 'h': | ||||
state = H; | ||||
charcopy(dest, &destlen, destsize, src[i++]); | ||||
break; | ||||
default: | ||||
state = DEFAULT; | ||||
break; | ||||
} | ||||
break; | ||||
case DOT: | ||||
switch (src[i]) { | ||||
case '/': | ||||
case '\0': | ||||
state = START; | ||||
memcopy(dest, &destlen, destsize, "~2e", 3); | ||||
charcopy(dest, &destlen, destsize, src[i++]); | ||||
break; | ||||
case 'd': | ||||
case 'i': | ||||
state = HGDI; | ||||
charcopy(dest, &destlen, destsize, '.'); | ||||
charcopy(dest, &destlen, destsize, src[i++]); | ||||
break; | ||||
case 'h': | ||||
state = H; | ||||
memcopy(dest, &destlen, destsize, ".h", 2); | ||||
i++; | ||||
break; | ||||
default: | ||||
state = DEFAULT; | ||||
charcopy(dest, &destlen, destsize, '.'); | ||||
break; | ||||
} | ||||
break; | ||||
case H: | ||||
if (src[i] == 'g') { | ||||
state = HGDI; | ||||
charcopy(dest, &destlen, destsize, src[i++]); | ||||
Augie Fackler
|
r41367 | } else { | ||
Augie Fackler
|
r36073 | state = DEFAULT; | ||
Augie Fackler
|
r41367 | } | ||
Yuya Nishihara
|
r32372 | break; | ||
case HGDI: | ||||
if (src[i] == '/') { | ||||
state = START; | ||||
Augie Fackler
|
r41367 | if (encodedir) { | ||
Yuya Nishihara
|
r32372 | memcopy(dest, &destlen, destsize, ".hg", | ||
Augie Fackler
|
r36073 | 3); | ||
Augie Fackler
|
r41367 | } | ||
Yuya Nishihara
|
r32372 | charcopy(dest, &destlen, destsize, src[i++]); | ||
Augie Fackler
|
r41367 | } else { | ||
Augie Fackler
|
r36073 | state = DEFAULT; | ||
Augie Fackler
|
r41367 | } | ||
Yuya Nishihara
|
r32372 | break; | ||
case SPACE: | ||||
switch (src[i]) { | ||||
case '/': | ||||
case '\0': | ||||
state = START; | ||||
memcopy(dest, &destlen, destsize, "~20", 3); | ||||
charcopy(dest, &destlen, destsize, src[i++]); | ||||
break; | ||||
default: | ||||
state = DEFAULT; | ||||
charcopy(dest, &destlen, destsize, ' '); | ||||
break; | ||||
} | ||||
break; | ||||
case DEFAULT: | ||||
while (inset(onebyte, src[i])) { | ||||
charcopy(dest, &destlen, destsize, src[i++]); | ||||
Augie Fackler
|
r41367 | if (i == len) { | ||
Yuya Nishihara
|
r32372 | goto done; | ||
Augie Fackler
|
r41367 | } | ||
Yuya Nishihara
|
r32372 | } | ||
switch (src[i]) { | ||||
case '.': | ||||
state = DOT; | ||||
i++; | ||||
break; | ||||
case ' ': | ||||
state = SPACE; | ||||
i++; | ||||
break; | ||||
case '/': | ||||
state = START; | ||||
charcopy(dest, &destlen, destsize, '/'); | ||||
i++; | ||||
break; | ||||
default: | ||||
if (inset(onebyte, src[i])) { | ||||
do { | ||||
charcopy(dest, &destlen, | ||||
Augie Fackler
|
r36073 | destsize, src[i++]); | ||
Yuya Nishihara
|
r32372 | } while (i < len && | ||
Augie Fackler
|
r36073 | inset(onebyte, src[i])); | ||
} else if (inset(twobytes, src[i])) { | ||||
Yuya Nishihara
|
r32372 | char c = src[i++]; | ||
charcopy(dest, &destlen, destsize, '_'); | ||||
charcopy(dest, &destlen, destsize, | ||||
Augie Fackler
|
r36073 | c == '_' ? '_' : c + 32); | ||
Augie Fackler
|
r41367 | } else { | ||
Yuya Nishihara
|
r32372 | escape3(dest, &destlen, destsize, | ||
Augie Fackler
|
r36073 | src[i++]); | ||
Augie Fackler
|
r41367 | } | ||
Yuya Nishihara
|
r32372 | break; | ||
} | ||||
break; | ||||
} | ||||
} | ||||
done: | ||||
return destlen; | ||||
} | ||||
Augie Fackler
|
r36073 | static Py_ssize_t basicencode(char *dest, size_t destsize, const char *src, | ||
Py_ssize_t len) | ||||
Yuya Nishihara
|
r32372 | { | ||
Augie Fackler
|
r36073 | static const uint32_t twobytes[8] = {0, 0, 0x87fffffe}; | ||
Yuya Nishihara
|
r32372 | |||
static const uint32_t onebyte[8] = { | ||||
Yuya Nishihara
|
r38724 | 1, | ||
0x2bff3bfa, | ||||
0x68000001, | ||||
0x2fffffff, | ||||
Yuya Nishihara
|
r32372 | }; | ||
Py_ssize_t destlen = 0; | ||||
Augie Fackler
|
r36073 | return _encode(twobytes, onebyte, dest, destlen, destsize, src, len, 1); | ||
Yuya Nishihara
|
r32372 | } | ||
static const Py_ssize_t maxstorepathlen = 120; | ||||
Augie Fackler
|
r36073 | static Py_ssize_t _lowerencode(char *dest, size_t destsize, const char *src, | ||
Py_ssize_t len) | ||||
Yuya Nishihara
|
r32372 | { | ||
Augie Fackler
|
r36073 | static const uint32_t onebyte[8] = {1, 0x2bfffbfb, 0xe8000001, | ||
0x2fffffff}; | ||||
Yuya Nishihara
|
r32372 | |||
Augie Fackler
|
r36073 | static const uint32_t lower[8] = {0, 0, 0x7fffffe}; | ||
Yuya Nishihara
|
r32372 | |||
Py_ssize_t i, destlen = 0; | ||||
for (i = 0; i < len; i++) { | ||||
Augie Fackler
|
r41367 | if (inset(onebyte, src[i])) { | ||
Yuya Nishihara
|
r32372 | charcopy(dest, &destlen, destsize, src[i]); | ||
Augie Fackler
|
r41367 | } else if (inset(lower, src[i])) { | ||
Yuya Nishihara
|
r32372 | charcopy(dest, &destlen, destsize, src[i] + 32); | ||
Augie Fackler
|
r41367 | } else { | ||
Yuya Nishihara
|
r32372 | escape3(dest, &destlen, destsize, src[i]); | ||
Augie Fackler
|
r41367 | } | ||
Yuya Nishihara
|
r32372 | } | ||
return destlen; | ||||
} | ||||
PyObject *lowerencode(PyObject *self, PyObject *args) | ||||
{ | ||||
char *path; | ||||
Py_ssize_t len, newlen; | ||||
PyObject *ret; | ||||
Gregory Szorc
|
r49676 | if (!PyArg_ParseTuple(args, "y#:lowerencode", &path, &len)) { | ||
Yuya Nishihara
|
r32372 | return NULL; | ||
Augie Fackler
|
r41367 | } | ||
Yuya Nishihara
|
r32372 | |||
newlen = _lowerencode(NULL, 0, path, len); | ||||
ret = PyBytes_FromStringAndSize(NULL, newlen); | ||||
Augie Fackler
|
r41367 | if (ret) { | ||
Yuya Nishihara
|
r32372 | _lowerencode(PyBytes_AS_STRING(ret), newlen, path, len); | ||
Augie Fackler
|
r41367 | } | ||
Yuya Nishihara
|
r32372 | |||
return ret; | ||||
} | ||||
/* See store.py:_auxencode for a description. */ | ||||
Augie Fackler
|
r36073 | static Py_ssize_t auxencode(char *dest, size_t destsize, const char *src, | ||
Py_ssize_t len) | ||||
Yuya Nishihara
|
r32372 | { | ||
static const uint32_t twobytes[8]; | ||||
static const uint32_t onebyte[8] = { | ||||
Augie Fackler
|
r36073 | ~0U, 0xffff3ffe, ~0U, ~0U, ~0U, ~0U, ~0U, ~0U, | ||
Yuya Nishihara
|
r32372 | }; | ||
return _encode(twobytes, onebyte, dest, 0, destsize, src, len, 0); | ||||
} | ||||
static PyObject *hashmangle(const char *src, Py_ssize_t len, const char sha[20]) | ||||
{ | ||||
static const Py_ssize_t dirprefixlen = 8; | ||||
static const Py_ssize_t maxshortdirslen = 68; | ||||
char *dest; | ||||
PyObject *ret; | ||||
Py_ssize_t i, d, p, lastslash = len - 1, lastdot = -1; | ||||
Py_ssize_t destsize, destlen = 0, slop, used; | ||||
while (lastslash >= 0 && src[lastslash] != '/') { | ||||
Augie Fackler
|
r41367 | if (src[lastslash] == '.' && lastdot == -1) { | ||
Yuya Nishihara
|
r32372 | lastdot = lastslash; | ||
Augie Fackler
|
r41367 | } | ||
Yuya Nishihara
|
r32372 | lastslash--; | ||
} | ||||
#if 0 | ||||
/* All paths should end in a suffix of ".i" or ".d". | ||||
Unfortunately, the file names in test-hybridencode.py | ||||
violate this rule. */ | ||||
if (lastdot != len - 3) { | ||||
PyErr_SetString(PyExc_ValueError, | ||||
"suffix missing or wrong length"); | ||||
return NULL; | ||||
} | ||||
#endif | ||||
/* If src contains a suffix, we will append it to the end of | ||||
the new string, so make room. */ | ||||
destsize = 120; | ||||
Augie Fackler
|
r41367 | if (lastdot >= 0) { | ||
Yuya Nishihara
|
r32372 | destsize += len - lastdot - 1; | ||
Augie Fackler
|
r41367 | } | ||
Yuya Nishihara
|
r32372 | |||
ret = PyBytes_FromStringAndSize(NULL, destsize); | ||||
Augie Fackler
|
r41367 | if (ret == NULL) { | ||
Yuya Nishihara
|
r32372 | return NULL; | ||
Augie Fackler
|
r41367 | } | ||
Yuya Nishihara
|
r32372 | |||
dest = PyBytes_AS_STRING(ret); | ||||
memcopy(dest, &destlen, destsize, "dh/", 3); | ||||
/* Copy up to dirprefixlen bytes of each path component, up to | ||||
a limit of maxshortdirslen bytes. */ | ||||
for (i = d = p = 0; i < lastslash; i++, p++) { | ||||
if (src[i] == '/') { | ||||
char d = dest[destlen - 1]; | ||||
/* After truncation, a directory name may end | ||||
in a space or dot, which are unportable. */ | ||||
Augie Fackler
|
r41367 | if (d == '.' || d == ' ') { | ||
Yuya Nishihara
|
r32372 | dest[destlen - 1] = '_'; | ||
Augie Fackler
|
r41367 | /* The + 3 is to account for "dh/" in the | ||
* beginning */ | ||||
} | ||||
if (destlen > maxshortdirslen + 3) { | ||||
Yuya Nishihara
|
r32372 | break; | ||
Augie Fackler
|
r41367 | } | ||
Yuya Nishihara
|
r32372 | charcopy(dest, &destlen, destsize, src[i]); | ||
p = -1; | ||||
Augie Fackler
|
r41367 | } else if (p < dirprefixlen) { | ||
Yuya Nishihara
|
r32372 | charcopy(dest, &destlen, destsize, src[i]); | ||
Augie Fackler
|
r41367 | } | ||
Yuya Nishihara
|
r32372 | } | ||
/* Rewind to just before the last slash copied. */ | ||||
Augie Fackler
|
r41367 | if (destlen > maxshortdirslen + 3) { | ||
Yuya Nishihara
|
r32372 | do { | ||
destlen--; | ||||
} while (destlen > 0 && dest[destlen] != '/'); | ||||
Augie Fackler
|
r41367 | } | ||
Yuya Nishihara
|
r32372 | |||
if (destlen > 3) { | ||||
if (lastslash > 0) { | ||||
char d = dest[destlen - 1]; | ||||
/* The last directory component may be | ||||
truncated, so make it safe. */ | ||||
Augie Fackler
|
r41367 | if (d == '.' || d == ' ') { | ||
Yuya Nishihara
|
r32372 | dest[destlen - 1] = '_'; | ||
Augie Fackler
|
r41367 | } | ||
Yuya Nishihara
|
r32372 | } | ||
charcopy(dest, &destlen, destsize, '/'); | ||||
} | ||||
/* Add a prefix of the original file's name. Its length | ||||
depends on the number of bytes left after accounting for | ||||
hash and suffix. */ | ||||
used = destlen + 40; | ||||
Augie Fackler
|
r41367 | if (lastdot >= 0) { | ||
Yuya Nishihara
|
r32372 | used += len - lastdot - 1; | ||
Augie Fackler
|
r41367 | } | ||
Yuya Nishihara
|
r32372 | slop = maxstorepathlen - used; | ||
if (slop > 0) { | ||||
Py_ssize_t basenamelen = | ||||
Augie Fackler
|
r36073 | lastslash >= 0 ? len - lastslash - 2 : len - 1; | ||
Yuya Nishihara
|
r32372 | |||
Augie Fackler
|
r41367 | if (basenamelen > slop) { | ||
Yuya Nishihara
|
r32372 | basenamelen = slop; | ||
Augie Fackler
|
r41367 | } | ||
if (basenamelen > 0) { | ||||
Yuya Nishihara
|
r32372 | memcopy(dest, &destlen, destsize, &src[lastslash + 1], | ||
Augie Fackler
|
r36073 | basenamelen); | ||
Augie Fackler
|
r41367 | } | ||
Yuya Nishihara
|
r32372 | } | ||
/* Add hash and suffix. */ | ||||
Augie Fackler
|
r41367 | for (i = 0; i < 20; i++) { | ||
Yuya Nishihara
|
r32372 | hexencode(dest, &destlen, destsize, sha[i]); | ||
Augie Fackler
|
r41367 | } | ||
Yuya Nishihara
|
r32372 | |||
Augie Fackler
|
r41367 | if (lastdot >= 0) { | ||
Yuya Nishihara
|
r32372 | memcopy(dest, &destlen, destsize, &src[lastdot], | ||
Augie Fackler
|
r36073 | len - lastdot - 1); | ||
Augie Fackler
|
r41367 | } | ||
Yuya Nishihara
|
r32372 | |||
assert(PyBytes_Check(ret)); | ||||
Victor Stinner
|
r47090 | Py_SET_SIZE(ret, destlen); | ||
Yuya Nishihara
|
r32372 | |||
return ret; | ||||
} | ||||
/* | ||||
* Avoiding a trip through Python would improve performance by 50%, | ||||
* but we don't encounter enough long names to be worth the code. | ||||
*/ | ||||
static int sha1hash(char hash[20], const char *str, Py_ssize_t len) | ||||
{ | ||||
static PyObject *shafunc; | ||||
PyObject *shaobj, *hashobj; | ||||
if (shafunc == NULL) { | ||||
Augie Fackler
|
r38125 | PyObject *hashlib = PyImport_ImportModule("hashlib"); | ||
Yuya Nishihara
|
r32372 | if (hashlib == NULL) { | ||
Augie Fackler
|
r38067 | PyErr_SetString(PyExc_ImportError, | ||
"pathencode failed to find hashlib"); | ||||
Yuya Nishihara
|
r32372 | return -1; | ||
} | ||||
shafunc = PyObject_GetAttrString(hashlib, "sha1"); | ||||
Py_DECREF(hashlib); | ||||
if (shafunc == NULL) { | ||||
PyErr_SetString(PyExc_AttributeError, | ||||
Augie Fackler
|
r36073 | "module 'hashlib' has no " | ||
Augie Fackler
|
r38067 | "attribute 'sha1' in pathencode"); | ||
Yuya Nishihara
|
r32372 | return -1; | ||
} | ||||
} | ||||
Gregory Szorc
|
r49676 | shaobj = PyObject_CallFunction(shafunc, "y#", str, len); | ||
Yuya Nishihara
|
r32372 | |||
Augie Fackler
|
r41367 | if (shaobj == NULL) { | ||
Yuya Nishihara
|
r32372 | return -1; | ||
Augie Fackler
|
r41367 | } | ||
Yuya Nishihara
|
r32372 | |||
hashobj = PyObject_CallMethod(shaobj, "digest", ""); | ||||
Py_DECREF(shaobj); | ||||
Augie Fackler
|
r41367 | if (hashobj == NULL) { | ||
Yuya Nishihara
|
r32372 | return -1; | ||
Augie Fackler
|
r41367 | } | ||
Yuya Nishihara
|
r32372 | |||
if (!PyBytes_Check(hashobj) || PyBytes_GET_SIZE(hashobj) != 20) { | ||||
PyErr_SetString(PyExc_TypeError, | ||||
Augie Fackler
|
r36073 | "result of digest is not a 20-byte hash"); | ||
Yuya Nishihara
|
r32372 | Py_DECREF(hashobj); | ||
return -1; | ||||
} | ||||
memcpy(hash, PyBytes_AS_STRING(hashobj), 20); | ||||
Py_DECREF(hashobj); | ||||
return 0; | ||||
} | ||||
#define MAXENCODE 4096 * 4 | ||||
static PyObject *hashencode(const char *src, Py_ssize_t len) | ||||
{ | ||||
char dired[MAXENCODE]; | ||||
char lowered[MAXENCODE]; | ||||
char auxed[MAXENCODE]; | ||||
Py_ssize_t dirlen, lowerlen, auxlen, baselen; | ||||
char sha[20]; | ||||
baselen = (len - 5) * 3; | ||||
if (baselen >= MAXENCODE) { | ||||
PyErr_SetString(PyExc_ValueError, "string too long"); | ||||
return NULL; | ||||
} | ||||
dirlen = _encodedir(dired, baselen, src, len); | ||||
Augie Fackler
|
r41367 | if (sha1hash(sha, dired, dirlen - 1) == -1) { | ||
Yuya Nishihara
|
r32372 | return NULL; | ||
Augie Fackler
|
r41367 | } | ||
Yuya Nishihara
|
r32372 | lowerlen = _lowerencode(lowered, baselen, dired + 5, dirlen - 5); | ||
auxlen = auxencode(auxed, baselen, lowered, lowerlen); | ||||
return hashmangle(auxed, auxlen, sha); | ||||
} | ||||
PyObject *pathencode(PyObject *self, PyObject *args) | ||||
{ | ||||
Py_ssize_t len, newlen; | ||||
PyObject *pathobj, *newobj; | ||||
char *path; | ||||
Augie Fackler
|
r41367 | if (!PyArg_ParseTuple(args, "O:pathencode", &pathobj)) { | ||
Yuya Nishihara
|
r32372 | return NULL; | ||
Augie Fackler
|
r41367 | } | ||
Yuya Nishihara
|
r32372 | |||
if (PyBytes_AsStringAndSize(pathobj, &path, &len) == -1) { | ||||
PyErr_SetString(PyExc_TypeError, "expected a string"); | ||||
return NULL; | ||||
} | ||||
Augie Fackler
|
r41367 | if (len > maxstorepathlen) { | ||
Yuya Nishihara
|
r32372 | newlen = maxstorepathlen + 2; | ||
Augie Fackler
|
r41367 | } else { | ||
Yuya Nishihara
|
r32372 | newlen = len ? basicencode(NULL, 0, path, len + 1) : 1; | ||
Augie Fackler
|
r41367 | } | ||
Yuya Nishihara
|
r32372 | |||
if (newlen <= maxstorepathlen + 1) { | ||||
if (newlen == len + 1) { | ||||
Py_INCREF(pathobj); | ||||
return pathobj; | ||||
} | ||||
newobj = PyBytes_FromStringAndSize(NULL, newlen); | ||||
if (newobj) { | ||||
assert(PyBytes_Check(newobj)); | ||||
Mads Kiilerich
|
r49153 | Py_SET_SIZE(newobj, Py_SIZE(newobj) - 1); | ||
Yuya Nishihara
|
r32372 | basicencode(PyBytes_AS_STRING(newobj), newlen, path, | ||
Augie Fackler
|
r36073 | len + 1); | ||
Yuya Nishihara
|
r32372 | } | ||
Augie Fackler
|
r41367 | } else { | ||
Yuya Nishihara
|
r32372 | newobj = hashencode(path, len + 1); | ||
Augie Fackler
|
r41367 | } | ||
Yuya Nishihara
|
r32372 | |||
return newobj; | ||||
} | ||||