##// END OF EJS Templates
revset: use phasecache.getrevset...
revset: use phasecache.getrevset This is part of a refactoring that moves some phase query optimization from revset.py to phases.py. See the previous patch for motivation. This patch changes revset code to use phasecache.getrevset so it no longer accesses the private field: _phasecache._phasesets directly. For performance impact, this patch was tested using the following query, on my hg-committed repo: for i in 'public()' 'not public()' 'draft()' 'not draft()'; do echo $i; hg perfrevset "$i"; hg perfrevset "$i" --hidden; done For the CPython implementation, most operations are unchanged (within +/- 1%), while "not public()" and "draft()" is noticeably faster on an unfiltered repo. It may be because the new code avoids a set copy if filteredrevs is empty. revset | public() | not public() | draft() | not draft() hidden | yes | no | yes | no | yes | no | yes | no ------------------------------------------------------------------ before | 19006 | 17352 | 239 | 286 | 180 | 228 | 7690 | 5745 after | 19137 | 17231 | 240 | 207 | 182 | 150 | 7687 | 5658 delta | | -38% | | -52% | (timed in microseconds) For the pure Python implementation, some operations are faster while "not draft()" is noticeably slower: revset | public() | not public() | draft() | not draft() hidden | yes | no | yes | no | yes | no | yes | no ------------------------------------------------------------------------ before | 18852 | 17183 | 17758 | 15921 | 17505 | 15973 | 41521 | 39822 after | 18924 | 17380 | 17558 | 14545 | 16727 | 13593 | 48356 | 43992 delta | | -9% | -5% | -15% | +16% | +10% That may be the different performance characters of generatorset vs. filteredset. The "not draft()" query could be optimized in this case where both "public" and "secret" are passed to "getrevsets" so it won't iterate the whole repo twice.

File last commit:

r30163:f5607b62 default
r31017:17b5cda5 default
Show More
pathencode.c
765 lines | 16.4 KiB | text/x-c | CLexer
Adrian Buehlmann
pathencode: new C module with fast encodedir() function...
r17606 /*
pathencode.c - efficient path name encoding
Copyright 2012 Facebook
This software may be used and distributed according to the terms of
the GNU General Public License, incorporated herein by reference.
*/
Bryan O'Sullivan
store: implement fncache basic path encoding in C...
r17616 /*
* An implementation of the name encoding scheme used by the fncache
* store. The common case is of a path < 120 bytes long, which is
* handled either in a single pass with no allocations or two passes
* with a single allocation. For longer paths, multiple passes are
* required.
*/
Bryan O'Sullivan
store: implement lowerencode in C
r18430 #define PY_SSIZE_T_CLEAN
Adrian Buehlmann
pathencode: new C module with fast encodedir() function...
r17606 #include <Python.h>
#include <assert.h>
#include <ctype.h>
#include <stdlib.h>
#include <string.h>
#include "util.h"
Bryan O'Sullivan
store: implement fncache basic path encoding in C...
r17616 /* state machine for the fast path */
enum path_state {
START, /* first byte of a path component */
A, /* "AUX" */
AU,
THIRD, /* third of a 3-byte sequence, e.g. "AUX", "NUL" */
C, /* "CON" or "COMn" */
CO,
COMLPT, /* "COM" or "LPT" */
COMLPTn,
L,
LP,
N,
NU,
P, /* "PRN" */
PR,
LDOT, /* leading '.' */
DOT, /* '.' in a non-leading position */
H, /* ".h" */
HGDI, /* ".hg", ".d", or ".i" */
SPACE,
Yuya Nishihara
pathencode: eliminate comma at end of enum list to avoid pedantic warning
r19051 DEFAULT /* byte of a path component after the first */
Bryan O'Sullivan
store: implement fncache basic path encoding in C...
r17616 };
Adrian Buehlmann
pathencode: new C module with fast encodedir() function...
r17606 /* state machine for dir-encoding */
enum dir_state {
DDOT,
DH,
DHGDI,
Yuya Nishihara
pathencode: eliminate comma at end of enum list to avoid pedantic warning
r19051 DDEFAULT
Adrian Buehlmann
pathencode: new C module with fast encodedir() function...
r17606 };
André Sintzoff
pathencode: change isset name to avoid name collision...
r17699 static inline int inset(const uint32_t bitset[], char c)
Bryan O'Sullivan
store: implement fncache basic path encoding in C...
r17616 {
return bitset[((uint8_t)c) >> 5] & (1 << (((uint8_t)c) & 31));
}
Adrian Buehlmann
pathencode: new C module with fast encodedir() function...
r17606 static inline void charcopy(char *dest, Py_ssize_t *destlen, size_t destsize,
char c)
{
if (dest) {
assert(*destlen < destsize);
dest[*destlen] = c;
}
(*destlen)++;
}
static inline void memcopy(char *dest, Py_ssize_t *destlen, size_t destsize,
const void *src, Py_ssize_t len)
{
if (dest) {
assert(*destlen + len < destsize);
memcpy((void *)&dest[*destlen], src, len);
}
*destlen += len;
}
Bryan O'Sullivan
store: implement fncache basic path encoding in C...
r17616 static inline void hexencode(char *dest, Py_ssize_t *destlen, size_t destsize,
uint8_t c)
{
static const char hexdigit[] = "0123456789abcdef";
charcopy(dest, destlen, destsize, hexdigit[c >> 4]);
charcopy(dest, destlen, destsize, hexdigit[c & 15]);
}
/* 3-byte escape: tilde followed by two hex digits */
static inline void escape3(char *dest, Py_ssize_t *destlen, size_t destsize,
char c)
{
charcopy(dest, destlen, destsize, '~');
hexencode(dest, destlen, destsize, c);
}
Adrian Buehlmann
pathencode: new C module with fast encodedir() function...
r17606 static Py_ssize_t _encodedir(char *dest, size_t destsize,
const char *src, Py_ssize_t len)
{
enum dir_state state = DDEFAULT;
Py_ssize_t i = 0, destlen = 0;
while (i < len) {
switch (state) {
case DDOT:
switch (src[i]) {
case 'd':
case 'i':
state = DHGDI;
charcopy(dest, &destlen, destsize, src[i++]);
break;
case 'h':
state = DH;
charcopy(dest, &destlen, destsize, src[i++]);
break;
default:
state = DDEFAULT;
break;
}
break;
case DH:
if (src[i] == 'g') {
state = DHGDI;
charcopy(dest, &destlen, destsize, src[i++]);
}
else state = DDEFAULT;
break;
case DHGDI:
if (src[i] == '/') {
memcopy(dest, &destlen, destsize, ".hg", 3);
charcopy(dest, &destlen, destsize, src[i++]);
}
state = DDEFAULT;
break;
case DDEFAULT:
if (src[i] == '.')
state = DDOT;
charcopy(dest, &destlen, destsize, src[i++]);
break;
}
}
return destlen;
}
PyObject *encodedir(PyObject *self, PyObject *args)
{
Py_ssize_t len, newlen;
PyObject *pathobj, *newobj;
char *path;
if (!PyArg_ParseTuple(args, "O:encodedir", &pathobj))
return NULL;
Gregory Szorc
pathencode: convert PyString* to PyBytes*
r30099 if (PyBytes_AsStringAndSize(pathobj, &path, &len) == -1) {
Adrian Buehlmann
pathencode: new C module with fast encodedir() function...
r17606 PyErr_SetString(PyExc_TypeError, "expected a string");
return NULL;
}
newlen = len ? _encodedir(NULL, 0, path, len + 1) : 1;
if (newlen == len + 1) {
Py_INCREF(pathobj);
return pathobj;
}
Gregory Szorc
pathencode: convert PyString* to PyBytes*
r30099 newobj = PyBytes_FromStringAndSize(NULL, newlen);
Adrian Buehlmann
pathencode: new C module with fast encodedir() function...
r17606
if (newobj) {
Gregory Szorc
pathencode: use Py_SIZE directly...
r30102 assert(PyBytes_Check(newobj));
Py_SIZE(newobj)--;
Gregory Szorc
pathencode: convert PyString* to PyBytes*
r30099 _encodedir(PyBytes_AS_STRING(newobj), newlen, path,
Adrian Buehlmann
pathencode: new C module with fast encodedir() function...
r17606 len + 1);
}
return newobj;
}
Bryan O'Sullivan
store: implement fncache basic path encoding in C...
r17616
static Py_ssize_t _encode(const uint32_t twobytes[8], const uint32_t onebyte[8],
char *dest, Py_ssize_t destlen, size_t destsize,
const char *src, Py_ssize_t len,
int encodedir)
{
enum path_state state = START;
Py_ssize_t i = 0;
/*
* Python strings end with a zero byte, which we use as a
* terminal token as they are not valid inside path names.
*/
while (i < len) {
switch (state) {
case START:
switch (src[i]) {
case '/':
charcopy(dest, &destlen, destsize, src[i++]);
break;
case '.':
state = LDOT;
escape3(dest, &destlen, destsize, src[i++]);
break;
case ' ':
state = DEFAULT;
escape3(dest, &destlen, destsize, src[i++]);
break;
case 'a':
state = A;
charcopy(dest, &destlen, destsize, src[i++]);
break;
case 'c':
state = C;
charcopy(dest, &destlen, destsize, src[i++]);
break;
case 'l':
state = L;
charcopy(dest, &destlen, destsize, src[i++]);
break;
case 'n':
state = N;
charcopy(dest, &destlen, destsize, src[i++]);
break;
case 'p':
state = P;
charcopy(dest, &destlen, destsize, src[i++]);
break;
default:
state = DEFAULT;
break;
}
break;
case A:
if (src[i] == 'u') {
state = AU;
charcopy(dest, &destlen, destsize, src[i++]);
}
else state = DEFAULT;
break;
case AU:
if (src[i] == 'x') {
state = THIRD;
i++;
}
else state = DEFAULT;
break;
case THIRD:
state = DEFAULT;
switch (src[i]) {
case '.':
case '/':
case '\0':
escape3(dest, &destlen, destsize, src[i - 1]);
break;
default:
i--;
break;
}
break;
case C:
if (src[i] == 'o') {
state = CO;
charcopy(dest, &destlen, destsize, src[i++]);
}
else state = DEFAULT;
break;
case CO:
if (src[i] == 'm') {
state = COMLPT;
i++;
}
else if (src[i] == 'n') {
state = THIRD;
i++;
}
else state = DEFAULT;
break;
case COMLPT:
switch (src[i]) {
case '1': case '2': case '3': case '4': case '5':
case '6': case '7': case '8': case '9':
state = COMLPTn;
i++;
break;
default:
state = DEFAULT;
charcopy(dest, &destlen, destsize, src[i - 1]);
break;
}
break;
case COMLPTn:
state = DEFAULT;
switch (src[i]) {
case '.':
case '/':
case '\0':
escape3(dest, &destlen, destsize, src[i - 2]);
charcopy(dest, &destlen, destsize, src[i - 1]);
break;
default:
memcopy(dest, &destlen, destsize,
&src[i - 2], 2);
break;
}
break;
case L:
if (src[i] == 'p') {
state = LP;
charcopy(dest, &destlen, destsize, src[i++]);
}
else state = DEFAULT;
break;
case LP:
if (src[i] == 't') {
state = COMLPT;
i++;
}
else state = DEFAULT;
break;
case N:
if (src[i] == 'u') {
state = NU;
charcopy(dest, &destlen, destsize, src[i++]);
}
else state = DEFAULT;
break;
case NU:
if (src[i] == 'l') {
state = THIRD;
i++;
}
else state = DEFAULT;
break;
case P:
if (src[i] == 'r') {
state = PR;
charcopy(dest, &destlen, destsize, src[i++]);
}
else state = DEFAULT;
break;
case PR:
if (src[i] == 'n') {
state = THIRD;
i++;
}
else state = DEFAULT;
break;
case LDOT:
switch (src[i]) {
case 'd':
case 'i':
state = HGDI;
charcopy(dest, &destlen, destsize, src[i++]);
break;
case 'h':
state = H;
charcopy(dest, &destlen, destsize, src[i++]);
break;
default:
state = DEFAULT;
break;
}
break;
case DOT:
switch (src[i]) {
case '/':
case '\0':
state = START;
memcopy(dest, &destlen, destsize, "~2e", 3);
charcopy(dest, &destlen, destsize, src[i++]);
break;
case 'd':
case 'i':
state = HGDI;
charcopy(dest, &destlen, destsize, '.');
charcopy(dest, &destlen, destsize, src[i++]);
break;
case 'h':
state = H;
memcopy(dest, &destlen, destsize, ".h", 2);
i++;
break;
default:
state = DEFAULT;
charcopy(dest, &destlen, destsize, '.');
break;
}
break;
case H:
if (src[i] == 'g') {
state = HGDI;
charcopy(dest, &destlen, destsize, src[i++]);
}
else state = DEFAULT;
break;
case HGDI:
if (src[i] == '/') {
state = START;
if (encodedir)
memcopy(dest, &destlen, destsize, ".hg",
3);
charcopy(dest, &destlen, destsize, src[i++]);
}
else state = DEFAULT;
break;
case SPACE:
switch (src[i]) {
case '/':
case '\0':
state = START;
memcopy(dest, &destlen, destsize, "~20", 3);
charcopy(dest, &destlen, destsize, src[i++]);
break;
default:
state = DEFAULT;
charcopy(dest, &destlen, destsize, ' ');
break;
}
break;
case DEFAULT:
André Sintzoff
pathencode: change isset name to avoid name collision...
r17699 while (inset(onebyte, src[i])) {
Bryan O'Sullivan
store: implement fncache basic path encoding in C...
r17616 charcopy(dest, &destlen, destsize, src[i++]);
if (i == len)
goto done;
}
switch (src[i]) {
case '.':
state = DOT;
i++;
break;
case ' ':
state = SPACE;
i++;
break;
case '/':
state = START;
charcopy(dest, &destlen, destsize, '/');
i++;
break;
default:
André Sintzoff
pathencode: change isset name to avoid name collision...
r17699 if (inset(onebyte, src[i])) {
Bryan O'Sullivan
store: implement fncache basic path encoding in C...
r17616 do {
charcopy(dest, &destlen,
destsize, src[i++]);
} while (i < len &&
André Sintzoff
pathencode: change isset name to avoid name collision...
r17699 inset(onebyte, src[i]));
Bryan O'Sullivan
store: implement fncache basic path encoding in C...
r17616 }
André Sintzoff
pathencode: change isset name to avoid name collision...
r17699 else if (inset(twobytes, src[i])) {
Bryan O'Sullivan
store: implement fncache basic path encoding in C...
r17616 char c = src[i++];
charcopy(dest, &destlen, destsize, '_');
charcopy(dest, &destlen, destsize,
c == '_' ? '_' : c + 32);
}
else
escape3(dest, &destlen, destsize,
src[i++]);
break;
}
break;
}
}
done:
return destlen;
}
static Py_ssize_t basicencode(char *dest, size_t destsize,
const char *src, Py_ssize_t len)
{
static const uint32_t twobytes[8] = { 0, 0, 0x87fffffe };
static const uint32_t onebyte[8] = {
1, 0x2bff3bfa, 0x68000001, 0x2fffffff,
};
Py_ssize_t destlen = 0;
return _encode(twobytes, onebyte, dest, destlen, destsize,
Adrian Buehlmann
pathencode: simplify basicencode
r17691 src, len, 1);
Bryan O'Sullivan
store: implement fncache basic path encoding in C...
r17616 }
static const Py_ssize_t maxstorepathlen = 120;
Bryan O'Sullivan
store: implement lowerencode in C
r18430 static Py_ssize_t _lowerencode(char *dest, size_t destsize,
const char *src, Py_ssize_t len)
{
static const uint32_t onebyte[8] = {
1, 0x2bfffbfb, 0xe8000001, 0x2fffffff
};
static const uint32_t lower[8] = { 0, 0, 0x7fffffe };
Py_ssize_t i, destlen = 0;
for (i = 0; i < len; i++) {
if (inset(onebyte, src[i]))
charcopy(dest, &destlen, destsize, src[i]);
else if (inset(lower, src[i]))
charcopy(dest, &destlen, destsize, src[i] + 32);
else
escape3(dest, &destlen, destsize, src[i]);
}
return destlen;
}
PyObject *lowerencode(PyObject *self, PyObject *args)
{
char *path;
Py_ssize_t len, newlen;
PyObject *ret;
if (!PyArg_ParseTuple(args, "s#:lowerencode", &path, &len))
return NULL;
newlen = _lowerencode(NULL, 0, path, len);
Gregory Szorc
pathencode: convert PyString* to PyBytes*
r30099 ret = PyBytes_FromStringAndSize(NULL, newlen);
Bryan O'Sullivan
store: implement lowerencode in C
r18430 if (ret)
Gregory Szorc
pathencode: convert PyString* to PyBytes*
r30099 _lowerencode(PyBytes_AS_STRING(ret), newlen, path, len);
Bryan O'Sullivan
store: implement lowerencode in C
r18430
return ret;
}
Bryan O'Sullivan
pathencode: implement hashed encoding in C...
r18433 /* See store.py:_auxencode for a description. */
static Py_ssize_t auxencode(char *dest, size_t destsize,
const char *src, Py_ssize_t len)
{
static const uint32_t twobytes[8];
static const uint32_t onebyte[8] = {
Danek Duvall
pathencode: eliminate signed integer warnings...
r20535 ~0U, 0xffff3ffe, ~0U, ~0U, ~0U, ~0U, ~0U, ~0U,
Bryan O'Sullivan
pathencode: implement hashed encoding in C...
r18433 };
return _encode(twobytes, onebyte, dest, 0, destsize, src, len, 0);
}
Bryan O'Sullivan
pathencode: implement the "mangling" part of hashed encoding in C...
r18432 static PyObject *hashmangle(const char *src, Py_ssize_t len, const char sha[20])
{
static const Py_ssize_t dirprefixlen = 8;
static const Py_ssize_t maxshortdirslen = 68;
char *dest;
PyObject *ret;
Py_ssize_t i, d, p, lastslash = len - 1, lastdot = -1;
Py_ssize_t destsize, destlen = 0, slop, used;
while (lastslash >= 0 && src[lastslash] != '/') {
if (src[lastslash] == '.' && lastdot == -1)
lastdot = lastslash;
lastslash--;
}
#if 0
/* All paths should end in a suffix of ".i" or ".d".
Unfortunately, the file names in test-hybridencode.py
violate this rule. */
if (lastdot != len - 3) {
PyErr_SetString(PyExc_ValueError,
"suffix missing or wrong length");
return NULL;
}
#endif
/* If src contains a suffix, we will append it to the end of
the new string, so make room. */
destsize = 120;
if (lastdot >= 0)
destsize += len - lastdot - 1;
Gregory Szorc
pathencode: convert PyString* to PyBytes*
r30099 ret = PyBytes_FromStringAndSize(NULL, destsize);
Bryan O'Sullivan
pathencode: implement the "mangling" part of hashed encoding in C...
r18432 if (ret == NULL)
return NULL;
Gregory Szorc
pathencode: convert PyString* to PyBytes*
r30099 dest = PyBytes_AS_STRING(ret);
Bryan O'Sullivan
pathencode: implement the "mangling" part of hashed encoding in C...
r18432 memcopy(dest, &destlen, destsize, "dh/", 3);
/* Copy up to dirprefixlen bytes of each path component, up to
a limit of maxshortdirslen bytes. */
for (i = d = p = 0; i < lastslash; i++, p++) {
if (src[i] == '/') {
char d = dest[destlen - 1];
/* After truncation, a directory name may end
in a space or dot, which are unportable. */
if (d == '.' || d == ' ')
dest[destlen - 1] = '_';
Siddharth Agarwal
pathencode: fix hashmangle short dir limit (issue3958)...
r19317 /* The + 3 is to account for "dh/" in the beginning */
if (destlen > maxshortdirslen + 3)
Bryan O'Sullivan
pathencode: implement the "mangling" part of hashed encoding in C...
r18432 break;
charcopy(dest, &destlen, destsize, src[i]);
p = -1;
}
else if (p < dirprefixlen)
charcopy(dest, &destlen, destsize, src[i]);
}
/* Rewind to just before the last slash copied. */
if (destlen > maxshortdirslen + 3)
do {
destlen--;
} while (destlen > 0 && dest[destlen] != '/');
if (destlen > 3) {
if (lastslash > 0) {
char d = dest[destlen - 1];
/* The last directory component may be
truncated, so make it safe. */
if (d == '.' || d == ' ')
dest[destlen - 1] = '_';
}
charcopy(dest, &destlen, destsize, '/');
}
/* Add a prefix of the original file's name. Its length
depends on the number of bytes left after accounting for
hash and suffix. */
used = destlen + 40;
if (lastdot >= 0)
used += len - lastdot - 1;
slop = maxstorepathlen - used;
if (slop > 0) {
Py_ssize_t basenamelen =
lastslash >= 0 ? len - lastslash - 2 : len - 1;
if (basenamelen > slop)
basenamelen = slop;
if (basenamelen > 0)
memcopy(dest, &destlen, destsize, &src[lastslash + 1],
basenamelen);
}
/* Add hash and suffix. */
for (i = 0; i < 20; i++)
hexencode(dest, &destlen, destsize, sha[i]);
if (lastdot >= 0)
memcopy(dest, &destlen, destsize, &src[lastdot],
len - lastdot - 1);
Gregory Szorc
pathencode: use assert() for PyBytes_Check()...
r30163 assert(PyBytes_Check(ret));
Gregory Szorc
pathencode: use Py_SIZE directly...
r30102 Py_SIZE(ret) = destlen;
Bryan O'Sullivan
pathencode: implement the "mangling" part of hashed encoding in C...
r18432
return ret;
}
Bryan O'Sullivan
store: implement fncache basic path encoding in C...
r17616 /*
Bryan O'Sullivan
pathencode: add a SHA-1 hash function...
r18431 * Avoiding a trip through Python would improve performance by 50%,
* but we don't encounter enough long names to be worth the code.
*/
static int sha1hash(char hash[20], const char *str, Py_ssize_t len)
{
static PyObject *shafunc;
PyObject *shaobj, *hashobj;
if (shafunc == NULL) {
Gregory Szorc
pathencode: convert PyString* to PyBytes*
r30099 PyObject *hashlib, *name = PyBytes_FromString("hashlib");
Bryan O'Sullivan
pathencode: add a SHA-1 hash function...
r18431
if (name == NULL)
return -1;
Augie Fackler
pathencode: use hashlib.sha1 directly instead of indirecting through util
r29340 hashlib = PyImport_Import(name);
Bryan O'Sullivan
pathencode: add a SHA-1 hash function...
r18431 Py_DECREF(name);
Augie Fackler
pathencode: use hashlib.sha1 directly instead of indirecting through util
r29340 if (hashlib == NULL) {
PyErr_SetString(PyExc_ImportError, "hashlib");
Bryan O'Sullivan
pathencode: add a SHA-1 hash function...
r18431 return -1;
}
Augie Fackler
pathencode: use hashlib.sha1 directly instead of indirecting through util
r29340 shafunc = PyObject_GetAttrString(hashlib, "sha1");
Py_DECREF(hashlib);
Bryan O'Sullivan
pathencode: add a SHA-1 hash function...
r18431
if (shafunc == NULL) {
PyErr_SetString(PyExc_AttributeError,
Augie Fackler
pathencode: use hashlib.sha1 directly instead of indirecting through util
r29340 "module 'hashlib' has no "
Bryan O'Sullivan
pathencode: add a SHA-1 hash function...
r18431 "attribute 'sha1'");
return -1;
}
}
shaobj = PyObject_CallFunction(shafunc, "s#", str, len);
if (shaobj == NULL)
return -1;
hashobj = PyObject_CallMethod(shaobj, "digest", "");
Py_DECREF(shaobj);
Augie Fackler
pathencode: check result of .digest() method in sha1hash...
r26050 if (hashobj == NULL)
return -1;
Bryan O'Sullivan
pathencode: add a SHA-1 hash function...
r18431
Gregory Szorc
pathencode: convert PyString* to PyBytes*
r30099 if (!PyBytes_Check(hashobj) || PyBytes_GET_SIZE(hashobj) != 20) {
Bryan O'Sullivan
pathencode: add a SHA-1 hash function...
r18431 PyErr_SetString(PyExc_TypeError,
"result of digest is not a 20-byte hash");
Py_DECREF(hashobj);
return -1;
}
Gregory Szorc
pathencode: convert PyString* to PyBytes*
r30099 memcpy(hash, PyBytes_AS_STRING(hashobj), 20);
Bryan O'Sullivan
pathencode: add a SHA-1 hash function...
r18431 Py_DECREF(hashobj);
return 0;
}
Matt Mackall
pathencode: grow buffers to increase safety margin
r19185 #define MAXENCODE 4096 * 4
Matt Mackall
pathencode: don't use alloca() for safety/portability
r18452
Bryan O'Sullivan
pathencode: implement hashed encoding in C...
r18433 static PyObject *hashencode(const char *src, Py_ssize_t len)
{
Matt Mackall
pathencode: don't use alloca() for safety/portability
r18452 char dired[MAXENCODE];
char lowered[MAXENCODE];
char auxed[MAXENCODE];
Py_ssize_t dirlen, lowerlen, auxlen, baselen;
Bryan O'Sullivan
pathencode: implement hashed encoding in C...
r18433 char sha[20];
Matt Mackall
pathencode: don't use alloca() for safety/portability
r18452 baselen = (len - 5) * 3;
if (baselen >= MAXENCODE) {
PyErr_SetString(PyExc_ValueError, "string too long");
return NULL;
}
Bryan O'Sullivan
pathencode: implement hashed encoding in C...
r18433 dirlen = _encodedir(dired, baselen, src, len);
if (sha1hash(sha, dired, dirlen - 1) == -1)
return NULL;
lowerlen = _lowerencode(lowered, baselen, dired + 5, dirlen - 5);
auxlen = auxencode(auxed, baselen, lowered, lowerlen);
return hashmangle(auxed, auxlen, sha);
}
Bryan O'Sullivan
store: implement fncache basic path encoding in C...
r17616 PyObject *pathencode(PyObject *self, PyObject *args)
{
Py_ssize_t len, newlen;
PyObject *pathobj, *newobj;
char *path;
if (!PyArg_ParseTuple(args, "O:pathencode", &pathobj))
return NULL;
Gregory Szorc
pathencode: convert PyString* to PyBytes*
r30099 if (PyBytes_AsStringAndSize(pathobj, &path, &len) == -1) {
Bryan O'Sullivan
store: implement fncache basic path encoding in C...
r17616 PyErr_SetString(PyExc_TypeError, "expected a string");
return NULL;
}
Bryan O'Sullivan
pathencode: implement both basic and hashed encoding in C
r18434 if (len > maxstorepathlen)
newlen = maxstorepathlen + 2;
else
newlen = len ? basicencode(NULL, 0, path, len + 1) : 1;
Bryan O'Sullivan
store: implement fncache basic path encoding in C...
r17616
if (newlen <= maxstorepathlen + 1) {
if (newlen == len + 1) {
Py_INCREF(pathobj);
return pathobj;
}
Gregory Szorc
pathencode: convert PyString* to PyBytes*
r30099 newobj = PyBytes_FromStringAndSize(NULL, newlen);
Bryan O'Sullivan
store: implement fncache basic path encoding in C...
r17616
if (newobj) {
Gregory Szorc
pathencode: use assert() for PyBytes_Check()...
r30163 assert(PyBytes_Check(newobj));
Gregory Szorc
pathencode: use Py_SIZE directly...
r30102 Py_SIZE(newobj)--;
Gregory Szorc
pathencode: convert PyString* to PyBytes*
r30099 basicencode(PyBytes_AS_STRING(newobj), newlen, path,
Bryan O'Sullivan
store: implement fncache basic path encoding in C...
r17616 len + 1);
}
}
Bryan O'Sullivan
pathencode: implement both basic and hashed encoding in C
r18434 else
newobj = hashencode(path, len + 1);
Bryan O'Sullivan
store: implement fncache basic path encoding in C...
r17616
return newobj;
}