upstream/mercurial-mirror Files · mercurial/pathencode.c

pure: write a really lazy version of pure indexObject...

pure: write a really lazy version of pure indexObject On PyPy this version performs reasonably well compared to C version. Example command is "hg id" which gets faster, depending on details of your operating system and hard drive (it's bottlenecked on stat mostly) There is potential for improvements by storing extra as a condensed struct too.

Bryan O'Sullivan - - Load All Authors

File last commit:

r27342:673ba596 default


                r29133:25527471

default

Download file

             pathencode.c
        
                    762 lines
            
             | 16.4 KiB
            
                | text/x-c
            
             |
                CLexer
            
             / mercurial / pathencode.c
          
                    History
                
                 |
                  Source
                 | Raw
                 |Copy content
                 |Copy permalink

        Adrian Buehlmann
    
pathencode: new C module with fast encodedir() function...

              r17606
            
      /*

       pathencode.c - efficient path name encoding

       Copyright 2012 Facebook

       This software may be used and distributed according to the terms of

       the GNU General Public License, incorporated herein by reference.

      */

        Bryan O'Sullivan
    
store: implement fncache basic path encoding in C...

              r17616
            
      /*

       * An implementation of the name encoding scheme used by the fncache

       * store.  The common case is of a path < 120 bytes long, which is

       * handled either in a single pass with no allocations or two passes

       * with a single allocation.  For longer paths, multiple passes are

       * required.

       */

        Bryan O'Sullivan
    
store: implement lowerencode in C

              r18430
            
      #define PY_SSIZE_T_CLEAN

        Adrian Buehlmann
    
pathencode: new C module with fast encodedir() function...

              r17606
            
      #include <Python.h>

      #include <assert.h>

      #include <ctype.h>

      #include <stdlib.h>

      #include <string.h>

      #include "util.h"

        Bryan O'Sullivan
    
store: implement fncache basic path encoding in C...

              r17616
            
      /* state machine for the fast path */

      enum path_state {

      	START,   /* first byte of a path component */

      	A,       /* "AUX" */

      	AU,

      	THIRD,   /* third of a 3-byte sequence, e.g. "AUX", "NUL" */

      	C,       /* "CON" or "COMn" */

      	CO,

      	COMLPT,  /* "COM" or "LPT" */

      	COMLPTn,

      	L,

      	LP,

      	N,

      	NU,

      	P,       /* "PRN" */

      	PR,

      	LDOT,    /* leading '.' */

      	DOT,     /* '.' in a non-leading position */

      	H,       /* ".h" */

      	HGDI,    /* ".hg", ".d", or ".i" */

      	SPACE,

        Yuya Nishihara
    
pathencode: eliminate comma at end of enum list to avoid pedantic warning

              r19051
            
      	DEFAULT  /* byte of a path component after the first */

        Bryan O'Sullivan
    
store: implement fncache basic path encoding in C...

              r17616
            
      };

        Adrian Buehlmann
    
pathencode: new C module with fast encodedir() function...

              r17606
            
      /* state machine for dir-encoding */

      enum dir_state {

      	DDOT,

      	DH,

      	DHGDI,

        Yuya Nishihara
    
pathencode: eliminate comma at end of enum list to avoid pedantic warning

              r19051
            
      	DDEFAULT

        Adrian Buehlmann
    
pathencode: new C module with fast encodedir() function...

              r17606
            
      };

        André Sintzoff
    
pathencode: change isset name to avoid name collision...

              r17699
            
      static inline int inset(const uint32_t bitset[], char c)

        Bryan O'Sullivan
    
store: implement fncache basic path encoding in C...

              r17616
            
      {

      	return bitset[((uint8_t)c) >> 5] & (1 << (((uint8_t)c) & 31));

      }

        Adrian Buehlmann
    
pathencode: new C module with fast encodedir() function...

              r17606
            
      static inline void charcopy(char *dest, Py_ssize_t *destlen, size_t destsize,

                                  char c)

      {

      	if (dest) {

      		assert(*destlen < destsize);

      		dest[*destlen] = c;

      	}

      	(*destlen)++;

      }

      static inline void memcopy(char *dest, Py_ssize_t *destlen, size_t destsize,

                                 const void *src, Py_ssize_t len)

      {

      	if (dest) {

      		assert(*destlen + len < destsize);

      		memcpy((void *)&dest[*destlen], src, len);

      	}

      	*destlen += len;

      }

        Bryan O'Sullivan
    
store: implement fncache basic path encoding in C...

              r17616
            
      static inline void hexencode(char *dest, Py_ssize_t *destlen, size_t destsize,

      			     uint8_t c)

      {

      	static const char hexdigit[] = "0123456789abcdef";

      	charcopy(dest, destlen, destsize, hexdigit[c >> 4]);

      	charcopy(dest, destlen, destsize, hexdigit[c & 15]);

      }

      /* 3-byte escape: tilde followed by two hex digits */

      static inline void escape3(char *dest, Py_ssize_t *destlen, size_t destsize,

      			   char c)

      {

      	charcopy(dest, destlen, destsize, '~');

      	hexencode(dest, destlen, destsize, c);

      }

        Adrian Buehlmann
    
pathencode: new C module with fast encodedir() function...

              r17606
            
      static Py_ssize_t _encodedir(char *dest, size_t destsize,

                                   const char *src, Py_ssize_t len)

      {

      	enum dir_state state = DDEFAULT;

      	Py_ssize_t i = 0, destlen = 0;

      	while (i < len) {

      		switch (state) {

      		case DDOT:

      			switch (src[i]) {

      			case 'd':

      			case 'i':

      				state = DHGDI;

      				charcopy(dest, &destlen, destsize, src[i++]);

      				break;

      			case 'h':

      				state = DH;

      				charcopy(dest, &destlen, destsize, src[i++]);

      				break;

      			default:

      				state = DDEFAULT;

      				break;

      			}

      			break;

      		case DH:

      			if (src[i] == 'g') {

      				state = DHGDI;

      				charcopy(dest, &destlen, destsize, src[i++]);

      			}

      			else state = DDEFAULT;

      			break;

      		case DHGDI:

      			if (src[i] == '/') {

      				memcopy(dest, &destlen, destsize, ".hg", 3);

      				charcopy(dest, &destlen, destsize, src[i++]);

      			}

      			state = DDEFAULT;

      			break;

      		case DDEFAULT:

      			if (src[i] == '.')

      				state = DDOT;

      			charcopy(dest, &destlen, destsize, src[i++]);

      			break;

      		}

      	}

      	return destlen;

      }

      PyObject *encodedir(PyObject *self, PyObject *args)

      {

      	Py_ssize_t len, newlen;

      	PyObject *pathobj, *newobj;

      	char *path;

      	if (!PyArg_ParseTuple(args, "O:encodedir", &pathobj))

      		return NULL;

      	if (PyString_AsStringAndSize(pathobj, &path, &len) == -1) {

      		PyErr_SetString(PyExc_TypeError, "expected a string");

      		return NULL;

      	}

      	newlen = len ? _encodedir(NULL, 0, path, len + 1) : 1;

      	if (newlen == len + 1) {

      		Py_INCREF(pathobj);

      		return pathobj;

      	}

      	newobj = PyString_FromStringAndSize(NULL, newlen);

      	if (newobj) {

      		PyString_GET_SIZE(newobj)--;

      		_encodedir(PyString_AS_STRING(newobj), newlen, path,

      			   len + 1);

      	}

      	return newobj;

      }

        Bryan O'Sullivan
    
store: implement fncache basic path encoding in C...

              r17616
            
      static Py_ssize_t _encode(const uint32_t twobytes[8], const uint32_t onebyte[8],

      			  char *dest, Py_ssize_t destlen, size_t destsize,

      			  const char *src, Py_ssize_t len,

      			  int encodedir)

      {

      	enum path_state state = START;

      	Py_ssize_t i = 0;

      	/*

      	 * Python strings end with a zero byte, which we use as a

      	 * terminal token as they are not valid inside path names.

      	 */

      	while (i < len) {

      		switch (state) {

      		case START:

      			switch (src[i]) {

      			case '/':

      				charcopy(dest, &destlen, destsize, src[i++]);

      				break;

      			case '.':

      				state = LDOT;

      				escape3(dest, &destlen, destsize, src[i++]);

      				break;

      			case ' ':

      				state = DEFAULT;

      				escape3(dest, &destlen, destsize, src[i++]);

      				break;

      			case 'a':

      				state = A;

      				charcopy(dest, &destlen, destsize, src[i++]);

      				break;

      			case 'c':

      				state = C;

      				charcopy(dest, &destlen, destsize, src[i++]);

      				break;

      			case 'l':

      				state = L;

      				charcopy(dest, &destlen, destsize, src[i++]);

      				break;

      			case 'n':

      				state = N;

      				charcopy(dest, &destlen, destsize, src[i++]);

      				break;

      			case 'p':

      				state = P;

      				charcopy(dest, &destlen, destsize, src[i++]);

      				break;

      			default:

      				state = DEFAULT;

      				break;

      			}

      			break;

      		case A:

      			if (src[i] == 'u') {

      				state = AU;

      				charcopy(dest, &destlen, destsize, src[i++]);

      			}

      			else state = DEFAULT;

      			break;

      		case AU:

      			if (src[i] == 'x') {

      				state = THIRD;

      				i++;

      			}

      			else state = DEFAULT;

      			break;

      		case THIRD:

      			state = DEFAULT;

      			switch (src[i]) {

      			case '.':

      			case '/':

      			case '\0':

      				escape3(dest, &destlen, destsize, src[i - 1]);

      				break;

      			default:

      				i--;

      				break;

      			}

      			break;

      		case C:

      			if (src[i] == 'o') {

      				state = CO;

      				charcopy(dest, &destlen, destsize, src[i++]);

      			}

      			else state = DEFAULT;

      			break;

      		case CO:

      			if (src[i] == 'm') {

      				state = COMLPT;

      				i++;

      			}

      			else if (src[i] == 'n') {

      				state = THIRD;

      				i++;

      			}

      			else state = DEFAULT;

      			break;

      		case COMLPT:

      			switch (src[i]) {

      			case '1': case '2': case '3': case '4': case '5':

      			case '6': case '7': case '8': case '9':

      				state = COMLPTn;

      				i++;

      				break;

      			default:

      				state = DEFAULT;

      				charcopy(dest, &destlen, destsize, src[i - 1]);

      				break;

      			}

      			break;

      		case COMLPTn:

      			state = DEFAULT;

      			switch (src[i]) {

      			case '.':

      			case '/':

      			case '\0':

      				escape3(dest, &destlen, destsize, src[i - 2]);

      				charcopy(dest, &destlen, destsize, src[i - 1]);

      				break;

      			default:

      				memcopy(dest, &destlen, destsize,

      					&src[i - 2], 2);

      				break;

      			}

      			break;

      		case L:

      			if (src[i] == 'p') {

      				state = LP;

      				charcopy(dest, &destlen, destsize, src[i++]);

      			}

      			else state = DEFAULT;

      			break;

      		case LP:

      			if (src[i] == 't') {

      				state = COMLPT;

      				i++;

      			}

      			else state = DEFAULT;

      			break;

      		case N:

      			if (src[i] == 'u') {

      				state = NU;

      				charcopy(dest, &destlen, destsize, src[i++]);

      			}

      			else state = DEFAULT;

      			break;

      		case NU:

      			if (src[i] == 'l') {

      				state = THIRD;

      				i++;

      			}

      			else state = DEFAULT;

      			break;

      		case P:

      			if (src[i] == 'r') {

      				state = PR;

      				charcopy(dest, &destlen, destsize, src[i++]);

      			}

      			else state = DEFAULT;

      			break;

      		case PR:

      			if (src[i] == 'n') {

      				state = THIRD;

      				i++;

      			}

      			else state = DEFAULT;

      			break;

      		case LDOT:

      			switch (src[i]) {

      			case 'd':

      			case 'i':

      				state = HGDI;

      				charcopy(dest, &destlen, destsize, src[i++]);

      				break;

      			case 'h':

      				state = H;

      				charcopy(dest, &destlen, destsize, src[i++]);

      				break;

      			default:

      				state = DEFAULT;

      				break;

      			}

      			break;

      		case DOT:

      			switch (src[i]) {

      			case '/':

      			case '\0':

      				state = START;

      				memcopy(dest, &destlen, destsize, "~2e", 3);

      				charcopy(dest, &destlen, destsize, src[i++]);

      				break;

      			case 'd':

      			case 'i':

      				state = HGDI;

      				charcopy(dest, &destlen, destsize, '.');

      				charcopy(dest, &destlen, destsize, src[i++]);

      				break;

      			case 'h':

      				state = H;

      				memcopy(dest, &destlen, destsize, ".h", 2);

      				i++;

      				break;

      			default:

      				state = DEFAULT;

      				charcopy(dest, &destlen, destsize, '.');

      				break;

      			}

      			break;

      		case H:

      			if (src[i] == 'g') {

      				state = HGDI;

      				charcopy(dest, &destlen, destsize, src[i++]);

      			}

      			else state = DEFAULT;

      			break;

      		case HGDI:

      			if (src[i] == '/') {

      				state = START;

      				if (encodedir)

      					memcopy(dest, &destlen, destsize, ".hg",

      						3);

      				charcopy(dest, &destlen, destsize, src[i++]);

      			}

      			else state = DEFAULT;

      			break;

      		case SPACE:

      			switch (src[i]) {

      			case '/':

      			case '\0':

      				state = START;

      				memcopy(dest, &destlen, destsize, "~20", 3);

      				charcopy(dest, &destlen, destsize, src[i++]);

      				break;

      			default:

      				state = DEFAULT;

      				charcopy(dest, &destlen, destsize, ' ');

      				break;

      			}

      			break;

      		case DEFAULT:

        André Sintzoff
    
pathencode: change isset name to avoid name collision...

              r17699
            
      			while (inset(onebyte, src[i])) {

        Bryan O'Sullivan
    
store: implement fncache basic path encoding in C...

              r17616
            
      				charcopy(dest, &destlen, destsize, src[i++]);

      				if (i == len)

      					goto done;

      			}

      			switch (src[i]) {

      			case '.':

      				state = DOT;

      				i++;

      				break;

      			case ' ':

      				state = SPACE;

      				i++;

      				break;

      			case '/':

      				state = START;

      				charcopy(dest, &destlen, destsize, '/');

      				i++;

      				break;

      			default:

        André Sintzoff
    
pathencode: change isset name to avoid name collision...

              r17699
            
      				if (inset(onebyte, src[i])) {

        Bryan O'Sullivan
    
store: implement fncache basic path encoding in C...

              r17616
            
      					do {

      						charcopy(dest, &destlen,

      							 destsize, src[i++]);

      					} while (i < len &&

        André Sintzoff
    
pathencode: change isset name to avoid name collision...

              r17699
            
      						 inset(onebyte, src[i]));

        Bryan O'Sullivan
    
store: implement fncache basic path encoding in C...

              r17616
            
      				}

        André Sintzoff
    
pathencode: change isset name to avoid name collision...

              r17699
            
      				else if (inset(twobytes, src[i])) {

        Bryan O'Sullivan
    
store: implement fncache basic path encoding in C...

              r17616
            
      					char c = src[i++];

      					charcopy(dest, &destlen, destsize, '_');

      					charcopy(dest, &destlen, destsize,

      						 c == '_' ? '_' : c + 32);

      				}

      				else

      					escape3(dest, &destlen, destsize,

      						src[i++]);

      				break;

      			}

      			break;

      		}

      	}

      done:

      	return destlen;

      }

      static Py_ssize_t basicencode(char *dest, size_t destsize,

      			      const char *src, Py_ssize_t len)

      {

      	static const uint32_t twobytes[8] = { 0, 0, 0x87fffffe };

      	static const uint32_t onebyte[8] = {

      		1, 0x2bff3bfa, 0x68000001, 0x2fffffff,

      	};

      	Py_ssize_t destlen = 0;

      	return _encode(twobytes, onebyte, dest, destlen, destsize,

        Adrian Buehlmann
    
pathencode: simplify basicencode

              r17691
            
      		       src, len, 1);

        Bryan O'Sullivan
    
store: implement fncache basic path encoding in C...

              r17616
            
      }

      static const Py_ssize_t maxstorepathlen = 120;

        Bryan O'Sullivan
    
store: implement lowerencode in C

              r18430
            
      static Py_ssize_t _lowerencode(char *dest, size_t destsize,

      			       const char *src, Py_ssize_t len)

      {

      	static const uint32_t onebyte[8] = {

      		1, 0x2bfffbfb, 0xe8000001, 0x2fffffff

      	};

      	static const uint32_t lower[8] = { 0, 0, 0x7fffffe };

      	Py_ssize_t i, destlen = 0;

      	for (i = 0; i < len; i++) {

      		if (inset(onebyte, src[i]))

      			charcopy(dest, &destlen, destsize, src[i]);

      		else if (inset(lower, src[i]))

      			charcopy(dest, &destlen, destsize, src[i] + 32);

      		else

      			escape3(dest, &destlen, destsize, src[i]);

      	}

      	return destlen;

      }

      PyObject *lowerencode(PyObject *self, PyObject *args)

      {

      	char *path;

      	Py_ssize_t len, newlen;

      	PyObject *ret;

      	if (!PyArg_ParseTuple(args, "s#:lowerencode", &path, &len))

      		return NULL;

      	newlen = _lowerencode(NULL, 0, path, len);

      	ret = PyString_FromStringAndSize(NULL, newlen);

      	if (ret)

        Bryan O'Sullivan
    
pathencode: remove an unused assignment

              r27342
            
      		_lowerencode(PyString_AS_STRING(ret), newlen, path, len);

        Bryan O'Sullivan
    
store: implement lowerencode in C

              r18430
            
      	return ret;

      }

        Bryan O'Sullivan
    
pathencode: implement hashed encoding in C...

              r18433
            
      /* See store.py:_auxencode for a description. */

      static Py_ssize_t auxencode(char *dest, size_t destsize,

      			    const char *src, Py_ssize_t len)

      {

      	static const uint32_t twobytes[8];

      	static const uint32_t onebyte[8] = {

        Danek Duvall
    
pathencode: eliminate signed integer warnings...

              r20535
            
      		~0U, 0xffff3ffe, ~0U, ~0U, ~0U, ~0U, ~0U, ~0U,

        Bryan O'Sullivan
    
pathencode: implement hashed encoding in C...

              r18433
            
      	};

      	return _encode(twobytes, onebyte, dest, 0, destsize, src, len, 0);

      }

        Bryan O'Sullivan
    
pathencode: implement the "mangling" part of hashed encoding in C...

              r18432
            
      static PyObject *hashmangle(const char *src, Py_ssize_t len, const char sha[20])

      {

      	static const Py_ssize_t dirprefixlen = 8;

      	static const Py_ssize_t maxshortdirslen = 68;

      	char *dest;

      	PyObject *ret;

      	Py_ssize_t i, d, p, lastslash = len - 1, lastdot = -1;

      	Py_ssize_t destsize, destlen = 0, slop, used;

      	while (lastslash >= 0 && src[lastslash] != '/') {

      		if (src[lastslash] == '.' && lastdot == -1)

      			lastdot = lastslash;

      		lastslash--;

      	}

      #if 0

      	/* All paths should end in a suffix of ".i" or ".d".

                 Unfortunately, the file names in test-hybridencode.py

                 violate this rule.  */

      	if (lastdot != len - 3) {

      		PyErr_SetString(PyExc_ValueError,

      				"suffix missing or wrong length");

      		return NULL;

      	}

      #endif

      	/* If src contains a suffix, we will append it to the end of

      	   the new string, so make room. */

      	destsize = 120;

      	if (lastdot >= 0)

      		destsize += len - lastdot - 1;

      	ret = PyString_FromStringAndSize(NULL, destsize);

      	if (ret == NULL)

      		return NULL;

      	dest = PyString_AS_STRING(ret);

      	memcopy(dest, &destlen, destsize, "dh/", 3);

      	/* Copy up to dirprefixlen bytes of each path component, up to

      	   a limit of maxshortdirslen bytes. */

      	for (i = d = p = 0; i < lastslash; i++, p++) {

      		if (src[i] == '/') {

      			char d = dest[destlen - 1];

      			/* After truncation, a directory name may end

      			   in a space or dot, which are unportable. */

      			if (d == '.' || d == ' ')

      				dest[destlen - 1] = '_';

        Siddharth Agarwal
    
pathencode: fix hashmangle short dir limit (issue3958)...

              r19317
            
      			/* The + 3 is to account for "dh/" in the beginning */

      			if (destlen > maxshortdirslen + 3)

        Bryan O'Sullivan
    
pathencode: implement the "mangling" part of hashed encoding in C...

              r18432
            
      				break;

      			charcopy(dest, &destlen, destsize, src[i]);

      			p = -1;

      		}

      		else if (p < dirprefixlen)

      			charcopy(dest, &destlen, destsize, src[i]);

      	}

      	/* Rewind to just before the last slash copied. */

      	if (destlen > maxshortdirslen + 3)

      		do {

      			destlen--;

      		} while (destlen > 0 && dest[destlen] != '/');

      	if (destlen > 3) {

      		if (lastslash > 0) {

      			char d = dest[destlen - 1];

      			/* The last directory component may be

      			   truncated, so make it safe. */

      			if (d == '.' || d == ' ')

      				dest[destlen - 1] = '_';

      		}

      		charcopy(dest, &destlen, destsize, '/');

      	}

      	/* Add a prefix of the original file's name. Its length

      	   depends on the number of bytes left after accounting for

      	   hash and suffix. */

      	used = destlen + 40;

      	if (lastdot >= 0)

      		used += len - lastdot - 1;

      	slop = maxstorepathlen - used;

      	if (slop > 0) {

      		Py_ssize_t basenamelen =

      			lastslash >= 0 ? len - lastslash - 2 : len - 1;

      		if (basenamelen > slop)

      			basenamelen = slop;

      		if (basenamelen > 0)

      			memcopy(dest, &destlen, destsize, &src[lastslash + 1],

      				basenamelen);

      	}

      	/* Add hash and suffix. */

      	for (i = 0; i < 20; i++)

      		hexencode(dest, &destlen, destsize, sha[i]);

      	if (lastdot >= 0)

      		memcopy(dest, &destlen, destsize, &src[lastdot],

      			len - lastdot - 1);

      	PyString_GET_SIZE(ret) = destlen;

      	return ret;

      }

        Bryan O'Sullivan
    
store: implement fncache basic path encoding in C...

              r17616
            
      /*

        Bryan O'Sullivan
    
pathencode: add a SHA-1 hash function...

              r18431
            
       * Avoiding a trip through Python would improve performance by 50%,

       * but we don't encounter enough long names to be worth the code.

       */

      static int sha1hash(char hash[20], const char *str, Py_ssize_t len)

      {

      	static PyObject *shafunc;

      	PyObject *shaobj, *hashobj;

      	if (shafunc == NULL) {

      		PyObject *util, *name = PyString_FromString("mercurial.util");

      		if (name == NULL)

      			return -1;

      		util = PyImport_Import(name);

      		Py_DECREF(name);

      		if (util == NULL) {

      			PyErr_SetString(PyExc_ImportError, "mercurial.util");

      			return -1;

      		}

      		shafunc = PyObject_GetAttrString(util, "sha1");

      		Py_DECREF(util);

      		if (shafunc == NULL) {

      			PyErr_SetString(PyExc_AttributeError,

      					"module 'mercurial.util' has no "

      					"attribute 'sha1'");

      			return -1;

      		}

      	}

      	shaobj = PyObject_CallFunction(shafunc, "s#", str, len);

      	if (shaobj == NULL)

      		return -1;

      	hashobj = PyObject_CallMethod(shaobj, "digest", "");

      	Py_DECREF(shaobj);

        Augie Fackler
    
pathencode: check result of .digest() method in sha1hash...

              r26050
            
      	if (hashobj == NULL)

      		return -1;

        Bryan O'Sullivan
    
pathencode: add a SHA-1 hash function...

              r18431
            
      	if (!PyString_Check(hashobj) || PyString_GET_SIZE(hashobj) != 20) {

      		PyErr_SetString(PyExc_TypeError,

      				"result of digest is not a 20-byte hash");

      		Py_DECREF(hashobj);

      		return -1;

      	}

      	memcpy(hash, PyString_AS_STRING(hashobj), 20);

      	Py_DECREF(hashobj);

      	return 0;

      }

        Matt Mackall
    
pathencode: grow buffers to increase safety margin

              r19185
            
      #define MAXENCODE 4096 * 4

        Matt Mackall
    
pathencode: don't use alloca() for safety/portability

              r18452
            
        Bryan O'Sullivan
    
pathencode: implement hashed encoding in C...

              r18433
            
      static PyObject *hashencode(const char *src, Py_ssize_t len)

      {

        Matt Mackall
    
pathencode: don't use alloca() for safety/portability

              r18452
            
      	char dired[MAXENCODE];

      	char lowered[MAXENCODE];

      	char auxed[MAXENCODE];

      	Py_ssize_t dirlen, lowerlen, auxlen, baselen;

        Bryan O'Sullivan
    
pathencode: implement hashed encoding in C...

              r18433
            
      	char sha[20];

        Matt Mackall
    
pathencode: don't use alloca() for safety/portability

              r18452
            
      	baselen = (len - 5) * 3;

      	if (baselen >= MAXENCODE) {

      		PyErr_SetString(PyExc_ValueError, "string too long");

      		return NULL;

      	}

        Bryan O'Sullivan
    
pathencode: implement hashed encoding in C...

              r18433
            
      	dirlen = _encodedir(dired, baselen, src, len);

      	if (sha1hash(sha, dired, dirlen - 1) == -1)

      		return NULL;

      	lowerlen = _lowerencode(lowered, baselen, dired + 5, dirlen - 5);

      	auxlen = auxencode(auxed, baselen, lowered, lowerlen);

      	return hashmangle(auxed, auxlen, sha);

      }

        Bryan O'Sullivan
    
store: implement fncache basic path encoding in C...

              r17616
            
      PyObject *pathencode(PyObject *self, PyObject *args)

      {

      	Py_ssize_t len, newlen;

      	PyObject *pathobj, *newobj;

      	char *path;

      	if (!PyArg_ParseTuple(args, "O:pathencode", &pathobj))

      		return NULL;

      	if (PyString_AsStringAndSize(pathobj, &path, &len) == -1) {

      		PyErr_SetString(PyExc_TypeError, "expected a string");

      		return NULL;

      	}

        Bryan O'Sullivan
    
pathencode: implement both basic and hashed encoding in C

              r18434
            
      	if (len > maxstorepathlen)

      		newlen = maxstorepathlen + 2;

      	else

      		newlen = len ? basicencode(NULL, 0, path, len + 1) : 1;

        Bryan O'Sullivan
    
store: implement fncache basic path encoding in C...

              r17616
            
      	if (newlen <= maxstorepathlen + 1) {

      		if (newlen == len + 1) {

      			Py_INCREF(pathobj);

      			return pathobj;

      		}

      		newobj = PyString_FromStringAndSize(NULL, newlen);

      		if (newobj) {

      			PyString_GET_SIZE(newobj)--;

      			basicencode(PyString_AS_STRING(newobj), newlen, path,

      				    len + 1);

      		}

      	}

        Bryan O'Sullivan
    
pathencode: implement both basic and hashed encoding in C

              r18434
            
      	else

      		newobj = hashencode(path, len + 1);

        Bryan O'Sullivan
    
store: implement fncache basic path encoding in C...

              r17616
            
      	return newobj;

      }

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

Adrian Buehlmann pathencode: new C module with fast encodedir() function...	r17606	/*
		pathencode.c - efficient path name encoding

		Copyright 2012 Facebook

		This software may be used and distributed according to the terms of
		the GNU General Public License, incorporated herein by reference.
		*/

Bryan O'Sullivan store: implement fncache basic path encoding in C...	r17616	/*
		* An implementation of the name encoding scheme used by the fncache
		* store. The common case is of a path < 120 bytes long, which is
		* handled either in a single pass with no allocations or two passes
		* with a single allocation. For longer paths, multiple passes are
		* required.
		*/

Bryan O'Sullivan store: implement lowerencode in C	r18430	#define PY_SSIZE_T_CLEAN
Adrian Buehlmann pathencode: new C module with fast encodedir() function...	r17606	#include <Python.h>
		#include <assert.h>
		#include <ctype.h>
		#include <stdlib.h>
		#include <string.h>

		#include "util.h"

Bryan O'Sullivan store: implement fncache basic path encoding in C...	r17616	/* state machine for the fast path */
		enum path_state {
		START, /* first byte of a path component */
		A, /* "AUX" */
		AU,
		THIRD, /* third of a 3-byte sequence, e.g. "AUX", "NUL" */
		C, /* "CON" or "COMn" */
		CO,
		COMLPT, /* "COM" or "LPT" */
		COMLPTn,
		L,
		LP,
		N,
		NU,
		P, /* "PRN" */
		PR,
		LDOT, /* leading '.' */
		DOT, /* '.' in a non-leading position */
		H, /* ".h" */
		HGDI, /* ".hg", ".d", or ".i" */
		SPACE,
Yuya Nishihara pathencode: eliminate comma at end of enum list to avoid pedantic warning	r19051	DEFAULT /* byte of a path component after the first */
Bryan O'Sullivan store: implement fncache basic path encoding in C...	r17616	};

Adrian Buehlmann pathencode: new C module with fast encodedir() function...	r17606	/* state machine for dir-encoding */
		enum dir_state {
		DDOT,
		DH,
		DHGDI,
Yuya Nishihara pathencode: eliminate comma at end of enum list to avoid pedantic warning	r19051	DDEFAULT
Adrian Buehlmann pathencode: new C module with fast encodedir() function...	r17606	};

André Sintzoff pathencode: change isset name to avoid name collision...	r17699	static inline int inset(const uint32_t bitset[], char c)
Bryan O'Sullivan store: implement fncache basic path encoding in C...	r17616	{
		return bitset[((uint8_t)c) >> 5] & (1 << (((uint8_t)c) & 31));
		}

Adrian Buehlmann pathencode: new C module with fast encodedir() function...	r17606	static inline void charcopy(char dest, Py_ssize_t destlen, size_t destsize,
		char c)
		{
		if (dest) {
		assert(*destlen < destsize);
		dest[*destlen] = c;
		}
		(*destlen)++;
		}

		static inline void memcopy(char dest, Py_ssize_t destlen, size_t destsize,
		const void *src, Py_ssize_t len)
		{
		if (dest) {
		assert(*destlen + len < destsize);
		memcpy((void )&dest[destlen], src, len);
		}
		*destlen += len;
		}

Bryan O'Sullivan store: implement fncache basic path encoding in C...	r17616	static inline void hexencode(char dest, Py_ssize_t destlen, size_t destsize,
		uint8_t c)
		{
		static const char hexdigit[] = "0123456789abcdef";

		charcopy(dest, destlen, destsize, hexdigit[c >> 4]);
		charcopy(dest, destlen, destsize, hexdigit[c & 15]);
		}

		/* 3-byte escape: tilde followed by two hex digits */
		static inline void escape3(char dest, Py_ssize_t destlen, size_t destsize,
		char c)
		{
		charcopy(dest, destlen, destsize, '~');
		hexencode(dest, destlen, destsize, c);
		}

Adrian Buehlmann pathencode: new C module with fast encodedir() function...	r17606	static Py_ssize_t _encodedir(char *dest, size_t destsize,
		const char *src, Py_ssize_t len)
		{
		enum dir_state state = DDEFAULT;
		Py_ssize_t i = 0, destlen = 0;

		while (i < len) {
		switch (state) {
		case DDOT:
		switch (src[i]) {
		case 'd':
		case 'i':
		state = DHGDI;
		charcopy(dest, &destlen, destsize, src[i++]);
		break;
		case 'h':
		state = DH;
		charcopy(dest, &destlen, destsize, src[i++]);
		break;
		default:
		state = DDEFAULT;
		break;
		}
		break;
		case DH:
		if (src[i] == 'g') {
		state = DHGDI;
		charcopy(dest, &destlen, destsize, src[i++]);
		}
		else state = DDEFAULT;
		break;
		case DHGDI:
		if (src[i] == '/') {
		memcopy(dest, &destlen, destsize, ".hg", 3);
		charcopy(dest, &destlen, destsize, src[i++]);
		}
		state = DDEFAULT;
		break;
		case DDEFAULT:
		if (src[i] == '.')
		state = DDOT;
		charcopy(dest, &destlen, destsize, src[i++]);
		break;
		}
		}

		return destlen;
		}

		PyObject encodedir(PyObject self, PyObject *args)
		{
		Py_ssize_t len, newlen;
		PyObject pathobj, newobj;
		char *path;

		if (!PyArg_ParseTuple(args, "O:encodedir", &pathobj))
		return NULL;

		if (PyString_AsStringAndSize(pathobj, &path, &len) == -1) {
		PyErr_SetString(PyExc_TypeError, "expected a string");
		return NULL;
		}

		newlen = len ? _encodedir(NULL, 0, path, len + 1) : 1;

		if (newlen == len + 1) {
		Py_INCREF(pathobj);
		return pathobj;
		}

		newobj = PyString_FromStringAndSize(NULL, newlen);

		if (newobj) {
		PyString_GET_SIZE(newobj)--;
		_encodedir(PyString_AS_STRING(newobj), newlen, path,
		len + 1);
		}

		return newobj;
		}
Bryan O'Sullivan store: implement fncache basic path encoding in C...	r17616
		static Py_ssize_t _encode(const uint32_t twobytes[8], const uint32_t onebyte[8],
		char *dest, Py_ssize_t destlen, size_t destsize,
		const char *src, Py_ssize_t len,
		int encodedir)
		{
		enum path_state state = START;
		Py_ssize_t i = 0;

		/*
		* Python strings end with a zero byte, which we use as a
		* terminal token as they are not valid inside path names.
		*/

		while (i < len) {
		switch (state) {
		case START:
		switch (src[i]) {
		case '/':
		charcopy(dest, &destlen, destsize, src[i++]);
		break;
		case '.':
		state = LDOT;
		escape3(dest, &destlen, destsize, src[i++]);
		break;
		case ' ':
		state = DEFAULT;
		escape3(dest, &destlen, destsize, src[i++]);
		break;
		case 'a':
		state = A;
		charcopy(dest, &destlen, destsize, src[i++]);
		break;
		case 'c':
		state = C;
		charcopy(dest, &destlen, destsize, src[i++]);
		break;
		case 'l':
		state = L;
		charcopy(dest, &destlen, destsize, src[i++]);
		break;
		case 'n':
		state = N;
		charcopy(dest, &destlen, destsize, src[i++]);
		break;
		case 'p':
		state = P;
		charcopy(dest, &destlen, destsize, src[i++]);
		break;
		default:
		state = DEFAULT;
		break;
		}
		break;
		case A:
		if (src[i] == 'u') {
		state = AU;
		charcopy(dest, &destlen, destsize, src[i++]);
		}
		else state = DEFAULT;
		break;
		case AU:
		if (src[i] == 'x') {
		state = THIRD;
		i++;
		}
		else state = DEFAULT;
		break;
		case THIRD:
		state = DEFAULT;
		switch (src[i]) {
		case '.':
		case '/':
		case '\0':
		escape3(dest, &destlen, destsize, src[i - 1]);
		break;
		default:
		i--;
		break;
		}
		break;
		case C:
		if (src[i] == 'o') {
		state = CO;
		charcopy(dest, &destlen, destsize, src[i++]);
		}
		else state = DEFAULT;
		break;
		case CO:
		if (src[i] == 'm') {
		state = COMLPT;
		i++;
		}
		else if (src[i] == 'n') {
		state = THIRD;
		i++;
		}
		else state = DEFAULT;
		break;
		case COMLPT:
		switch (src[i]) {
		case '1': case '2': case '3': case '4': case '5':
		case '6': case '7': case '8': case '9':
		state = COMLPTn;
		i++;
		break;
		default:
		state = DEFAULT;
		charcopy(dest, &destlen, destsize, src[i - 1]);
		break;
		}
		break;
		case COMLPTn:
		state = DEFAULT;
		switch (src[i]) {
		case '.':
		case '/':
		case '\0':
		escape3(dest, &destlen, destsize, src[i - 2]);
		charcopy(dest, &destlen, destsize, src[i - 1]);
		break;
		default:
		memcopy(dest, &destlen, destsize,
		&src[i - 2], 2);
		break;
		}
		break;
		case L:
		if (src[i] == 'p') {
		state = LP;
		charcopy(dest, &destlen, destsize, src[i++]);
		}
		else state = DEFAULT;
		break;
		case LP:
		if (src[i] == 't') {
		state = COMLPT;
		i++;
		}
		else state = DEFAULT;
		break;
		case N:
		if (src[i] == 'u') {
		state = NU;
		charcopy(dest, &destlen, destsize, src[i++]);
		}
		else state = DEFAULT;
		break;
		case NU:
		if (src[i] == 'l') {
		state = THIRD;
		i++;
		}
		else state = DEFAULT;
		break;
		case P:
		if (src[i] == 'r') {
		state = PR;
		charcopy(dest, &destlen, destsize, src[i++]);
		}
		else state = DEFAULT;
		break;
		case PR:
		if (src[i] == 'n') {
		state = THIRD;
		i++;
		}
		else state = DEFAULT;
		break;
		case LDOT:
		switch (src[i]) {
		case 'd':
		case 'i':
		state = HGDI;
		charcopy(dest, &destlen, destsize, src[i++]);
		break;
		case 'h':
		state = H;
		charcopy(dest, &destlen, destsize, src[i++]);
		break;
		default:
		state = DEFAULT;
		break;
		}
		break;
		case DOT:
		switch (src[i]) {
		case '/':
		case '\0':
		state = START;
		memcopy(dest, &destlen, destsize, "~2e", 3);
		charcopy(dest, &destlen, destsize, src[i++]);
		break;
		case 'd':
		case 'i':
		state = HGDI;
		charcopy(dest, &destlen, destsize, '.');
		charcopy(dest, &destlen, destsize, src[i++]);
		break;
		case 'h':
		state = H;
		memcopy(dest, &destlen, destsize, ".h", 2);
		i++;
		break;
		default:
		state = DEFAULT;
		charcopy(dest, &destlen, destsize, '.');
		break;
		}
		break;
		case H:
		if (src[i] == 'g') {
		state = HGDI;
		charcopy(dest, &destlen, destsize, src[i++]);
		}
		else state = DEFAULT;
		break;
		case HGDI:
		if (src[i] == '/') {
		state = START;
		if (encodedir)
		memcopy(dest, &destlen, destsize, ".hg",
		3);
		charcopy(dest, &destlen, destsize, src[i++]);
		}
		else state = DEFAULT;
		break;
		case SPACE:
		switch (src[i]) {
		case '/':
		case '\0':
		state = START;
		memcopy(dest, &destlen, destsize, "~20", 3);
		charcopy(dest, &destlen, destsize, src[i++]);
		break;
		default:
		state = DEFAULT;
		charcopy(dest, &destlen, destsize, ' ');
		break;
		}
		break;
		case DEFAULT:
André Sintzoff pathencode: change isset name to avoid name collision...	r17699	while (inset(onebyte, src[i])) {
Bryan O'Sullivan store: implement fncache basic path encoding in C...	r17616	charcopy(dest, &destlen, destsize, src[i++]);
		if (i == len)
		goto done;
		}
		switch (src[i]) {
		case '.':
		state = DOT;
		i++;
		break;
		case ' ':
		state = SPACE;
		i++;
		break;
		case '/':
		state = START;
		charcopy(dest, &destlen, destsize, '/');
		i++;
		break;
		default:
André Sintzoff pathencode: change isset name to avoid name collision...	r17699	if (inset(onebyte, src[i])) {
Bryan O'Sullivan store: implement fncache basic path encoding in C...	r17616	do {
		charcopy(dest, &destlen,
		destsize, src[i++]);
		} while (i < len &&
André Sintzoff pathencode: change isset name to avoid name collision...	r17699	inset(onebyte, src[i]));
Bryan O'Sullivan store: implement fncache basic path encoding in C...	r17616	}
André Sintzoff pathencode: change isset name to avoid name collision...	r17699	else if (inset(twobytes, src[i])) {
Bryan O'Sullivan store: implement fncache basic path encoding in C...	r17616	char c = src[i++];
		charcopy(dest, &destlen, destsize, '_');
		charcopy(dest, &destlen, destsize,
		c == '_' ? '_' : c + 32);
		}
		else
		escape3(dest, &destlen, destsize,
		src[i++]);
		break;
		}
		break;
		}
		}
		done:
		return destlen;
		}

		static Py_ssize_t basicencode(char *dest, size_t destsize,
		const char *src, Py_ssize_t len)
		{
		static const uint32_t twobytes[8] = { 0, 0, 0x87fffffe };

		static const uint32_t onebyte[8] = {
		1, 0x2bff3bfa, 0x68000001, 0x2fffffff,
		};

		Py_ssize_t destlen = 0;

		return _encode(twobytes, onebyte, dest, destlen, destsize,
Adrian Buehlmann pathencode: simplify basicencode	r17691	src, len, 1);
Bryan O'Sullivan store: implement fncache basic path encoding in C...	r17616	}

		static const Py_ssize_t maxstorepathlen = 120;

Bryan O'Sullivan store: implement lowerencode in C	r18430	static Py_ssize_t _lowerencode(char *dest, size_t destsize,
		const char *src, Py_ssize_t len)
		{
		static const uint32_t onebyte[8] = {
		1, 0x2bfffbfb, 0xe8000001, 0x2fffffff
		};

		static const uint32_t lower[8] = { 0, 0, 0x7fffffe };

		Py_ssize_t i, destlen = 0;

		for (i = 0; i < len; i++) {
		if (inset(onebyte, src[i]))
		charcopy(dest, &destlen, destsize, src[i]);
		else if (inset(lower, src[i]))
		charcopy(dest, &destlen, destsize, src[i] + 32);
		else
		escape3(dest, &destlen, destsize, src[i]);
		}

		return destlen;
		}

		PyObject lowerencode(PyObject self, PyObject *args)
		{
		char *path;
		Py_ssize_t len, newlen;
		PyObject *ret;

		if (!PyArg_ParseTuple(args, "s#:lowerencode", &path, &len))
		return NULL;

		newlen = _lowerencode(NULL, 0, path, len);
		ret = PyString_FromStringAndSize(NULL, newlen);
		if (ret)
Bryan O'Sullivan pathencode: remove an unused assignment	r27342	_lowerencode(PyString_AS_STRING(ret), newlen, path, len);
Bryan O'Sullivan store: implement lowerencode in C	r18430
		return ret;
		}

Bryan O'Sullivan pathencode: implement hashed encoding in C...	r18433	/* See store.py:_auxencode for a description. */
		static Py_ssize_t auxencode(char *dest, size_t destsize,
		const char *src, Py_ssize_t len)
		{
		static const uint32_t twobytes[8];

		static const uint32_t onebyte[8] = {
Danek Duvall pathencode: eliminate signed integer warnings...	r20535	~0U, 0xffff3ffe, ~0U, ~0U, ~0U, ~0U, ~0U, ~0U,
Bryan O'Sullivan pathencode: implement hashed encoding in C...	r18433	};

		return _encode(twobytes, onebyte, dest, 0, destsize, src, len, 0);
		}

Bryan O'Sullivan pathencode: implement the "mangling" part of hashed encoding in C...	r18432	static PyObject hashmangle(const char src, Py_ssize_t len, const char sha[20])
		{
		static const Py_ssize_t dirprefixlen = 8;
		static const Py_ssize_t maxshortdirslen = 68;
		char *dest;
		PyObject *ret;

		Py_ssize_t i, d, p, lastslash = len - 1, lastdot = -1;
		Py_ssize_t destsize, destlen = 0, slop, used;

		while (lastslash >= 0 && src[lastslash] != '/') {
		if (src[lastslash] == '.' && lastdot == -1)
		lastdot = lastslash;
		lastslash--;
		}

		#if 0
		/* All paths should end in a suffix of ".i" or ".d".
		Unfortunately, the file names in test-hybridencode.py
		violate this rule. */
		if (lastdot != len - 3) {
		PyErr_SetString(PyExc_ValueError,
		"suffix missing or wrong length");
		return NULL;
		}
		#endif

		/* If src contains a suffix, we will append it to the end of
		the new string, so make room. */
		destsize = 120;
		if (lastdot >= 0)
		destsize += len - lastdot - 1;

		ret = PyString_FromStringAndSize(NULL, destsize);
		if (ret == NULL)
		return NULL;

		dest = PyString_AS_STRING(ret);
		memcopy(dest, &destlen, destsize, "dh/", 3);

		/* Copy up to dirprefixlen bytes of each path component, up to
		a limit of maxshortdirslen bytes. */
		for (i = d = p = 0; i < lastslash; i++, p++) {
		if (src[i] == '/') {
		char d = dest[destlen - 1];
		/* After truncation, a directory name may end
		in a space or dot, which are unportable. */
		if (d == '.' \|\| d == ' ')
		dest[destlen - 1] = '_';
Siddharth Agarwal pathencode: fix hashmangle short dir limit (issue3958)...	r19317	/* The + 3 is to account for "dh/" in the beginning */
		if (destlen > maxshortdirslen + 3)
Bryan O'Sullivan pathencode: implement the "mangling" part of hashed encoding in C...	r18432	break;
		charcopy(dest, &destlen, destsize, src[i]);
		p = -1;
		}
		else if (p < dirprefixlen)
		charcopy(dest, &destlen, destsize, src[i]);
		}

		/* Rewind to just before the last slash copied. */
		if (destlen > maxshortdirslen + 3)
		do {
		destlen--;
		} while (destlen > 0 && dest[destlen] != '/');

		if (destlen > 3) {
		if (lastslash > 0) {
		char d = dest[destlen - 1];
		/* The last directory component may be
		truncated, so make it safe. */
		if (d == '.' \|\| d == ' ')
		dest[destlen - 1] = '_';
		}

		charcopy(dest, &destlen, destsize, '/');
		}

		/* Add a prefix of the original file's name. Its length
		depends on the number of bytes left after accounting for
		hash and suffix. */
		used = destlen + 40;
		if (lastdot >= 0)
		used += len - lastdot - 1;
		slop = maxstorepathlen - used;
		if (slop > 0) {
		Py_ssize_t basenamelen =
		lastslash >= 0 ? len - lastslash - 2 : len - 1;

		if (basenamelen > slop)
		basenamelen = slop;
		if (basenamelen > 0)
		memcopy(dest, &destlen, destsize, &src[lastslash + 1],
		basenamelen);
		}

		/* Add hash and suffix. */
		for (i = 0; i < 20; i++)
		hexencode(dest, &destlen, destsize, sha[i]);

		if (lastdot >= 0)
		memcopy(dest, &destlen, destsize, &src[lastdot],
		len - lastdot - 1);

		PyString_GET_SIZE(ret) = destlen;

		return ret;
		}

Bryan O'Sullivan store: implement fncache basic path encoding in C...	r17616	/*
Bryan O'Sullivan pathencode: add a SHA-1 hash function...	r18431	* Avoiding a trip through Python would improve performance by 50%,
		* but we don't encounter enough long names to be worth the code.
		*/
		static int sha1hash(char hash[20], const char *str, Py_ssize_t len)
		{
		static PyObject *shafunc;
		PyObject shaobj, hashobj;

		if (shafunc == NULL) {
		PyObject util, name = PyString_FromString("mercurial.util");

		if (name == NULL)
		return -1;

		util = PyImport_Import(name);
		Py_DECREF(name);

		if (util == NULL) {
		PyErr_SetString(PyExc_ImportError, "mercurial.util");
		return -1;
		}
		shafunc = PyObject_GetAttrString(util, "sha1");
		Py_DECREF(util);

		if (shafunc == NULL) {
		PyErr_SetString(PyExc_AttributeError,
		"module 'mercurial.util' has no "
		"attribute 'sha1'");
		return -1;
		}
		}

		shaobj = PyObject_CallFunction(shafunc, "s#", str, len);

		if (shaobj == NULL)
		return -1;

		hashobj = PyObject_CallMethod(shaobj, "digest", "");
		Py_DECREF(shaobj);
Augie Fackler pathencode: check result of .digest() method in sha1hash...	r26050	if (hashobj == NULL)
		return -1;
Bryan O'Sullivan pathencode: add a SHA-1 hash function...	r18431
		if (!PyString_Check(hashobj) \|\| PyString_GET_SIZE(hashobj) != 20) {
		PyErr_SetString(PyExc_TypeError,
		"result of digest is not a 20-byte hash");
		Py_DECREF(hashobj);
		return -1;
		}

		memcpy(hash, PyString_AS_STRING(hashobj), 20);
		Py_DECREF(hashobj);
		return 0;
		}

Matt Mackall pathencode: grow buffers to increase safety margin	r19185	#define MAXENCODE 4096 * 4
Matt Mackall pathencode: don't use alloca() for safety/portability	r18452
Bryan O'Sullivan pathencode: implement hashed encoding in C...	r18433	static PyObject hashencode(const char src, Py_ssize_t len)
		{
Matt Mackall pathencode: don't use alloca() for safety/portability	r18452	char dired[MAXENCODE];
		char lowered[MAXENCODE];
		char auxed[MAXENCODE];
		Py_ssize_t dirlen, lowerlen, auxlen, baselen;
Bryan O'Sullivan pathencode: implement hashed encoding in C...	r18433	char sha[20];

Matt Mackall pathencode: don't use alloca() for safety/portability	r18452	baselen = (len - 5) * 3;
		if (baselen >= MAXENCODE) {
		PyErr_SetString(PyExc_ValueError, "string too long");
		return NULL;
		}

Bryan O'Sullivan pathencode: implement hashed encoding in C...	r18433	dirlen = _encodedir(dired, baselen, src, len);
		if (sha1hash(sha, dired, dirlen - 1) == -1)
		return NULL;
		lowerlen = _lowerencode(lowered, baselen, dired + 5, dirlen - 5);
		auxlen = auxencode(auxed, baselen, lowered, lowerlen);
		return hashmangle(auxed, auxlen, sha);
		}

Bryan O'Sullivan store: implement fncache basic path encoding in C...	r17616	PyObject pathencode(PyObject self, PyObject *args)
		{
		Py_ssize_t len, newlen;
		PyObject pathobj, newobj;
		char *path;

		if (!PyArg_ParseTuple(args, "O:pathencode", &pathobj))
		return NULL;

		if (PyString_AsStringAndSize(pathobj, &path, &len) == -1) {
		PyErr_SetString(PyExc_TypeError, "expected a string");
		return NULL;
		}

Bryan O'Sullivan pathencode: implement both basic and hashed encoding in C	r18434	if (len > maxstorepathlen)
		newlen = maxstorepathlen + 2;
		else
		newlen = len ? basicencode(NULL, 0, path, len + 1) : 1;
Bryan O'Sullivan store: implement fncache basic path encoding in C...	r17616
		if (newlen <= maxstorepathlen + 1) {
		if (newlen == len + 1) {
		Py_INCREF(pathobj);
		return pathobj;
		}

		newobj = PyString_FromStringAndSize(NULL, newlen);

		if (newobj) {
		PyString_GET_SIZE(newobj)--;
		basicencode(PyString_AS_STRING(newobj), newlen, path,
		len + 1);
		}
		}
Bryan O'Sullivan pathencode: implement both basic and hashed encoding in C	r18434	else
		newobj = hashencode(path, len + 1);
Bryan O'Sullivan store: implement fncache basic path encoding in C...	r17616
		return newobj;
		}