upstream/mercurial-mirror Files · mercurial/pathencode.c

dispatch: protect against malicious 'hg serve --stdio' invocations (sec)...

dispatch: protect against malicious 'hg serve --stdio' invocations (sec) Some shared-ssh installations assume that 'hg serve --stdio' is a safe command to run for minimally trusted users. Unfortunately, the messy implementation of argument parsing here meant that trying to access a repo named '--debugger' would give the user a pdb prompt, thereby sidestepping any hoped-for sandboxing. Serving repositories over HTTP(S) is unaffected. We're not currently hardening any subcommands other than 'serve'. If your service exposes other commands to users with arbitrary repository names, it is imperative that you defend against repository names of '--debugger' and anything starting with '--config'. The read-only mode of hg-ssh stopped working because it provided its hook configuration to "hg serve --stdio" via --config parameter. This is banned for security reasons now. This patch switches it to directly call ui.setconfig(). If your custom hosting infrastructure relies on passing --config to "hg serve --stdio", you'll need to find a different way to get that configuration into Mercurial, either by using ui.setconfig() as hg-ssh does in this patch, or by placing an hgrc file someplace where Mercurial will read it. mitrandir@fb.com provided some extra fixes for the dispatch code and for hg-ssh in places that I overlooked.

Gregory Szorc - - Load All Authors

File last commit:

r30163:f5607b62 default


                r32050:77eaf953

4.1.3 stable

Download file

             pathencode.c
        
                    765 lines
            
             | 16.4 KiB
            
                | text/x-c
            
             |
                CLexer
            
             / mercurial / pathencode.c
          
                    History
                
                 |
                  Source
                 | Raw
                 |Copy content
                 |Copy permalink

        Adrian Buehlmann
    
pathencode: new C module with fast encodedir() function...

              r17606
            
      /*

       pathencode.c - efficient path name encoding

       Copyright 2012 Facebook

       This software may be used and distributed according to the terms of

       the GNU General Public License, incorporated herein by reference.

      */

        Bryan O'Sullivan
    
store: implement fncache basic path encoding in C...

              r17616
            
      /*

       * An implementation of the name encoding scheme used by the fncache

       * store.  The common case is of a path < 120 bytes long, which is

       * handled either in a single pass with no allocations or two passes

       * with a single allocation.  For longer paths, multiple passes are

       * required.

       */

        Bryan O'Sullivan
    
store: implement lowerencode in C

              r18430
            
      #define PY_SSIZE_T_CLEAN

        Adrian Buehlmann
    
pathencode: new C module with fast encodedir() function...

              r17606
            
      #include <Python.h>

      #include <assert.h>

      #include <ctype.h>

      #include <stdlib.h>

      #include <string.h>

      #include "util.h"

        Bryan O'Sullivan
    
store: implement fncache basic path encoding in C...

              r17616
            
      /* state machine for the fast path */

      enum path_state {

      	START,   /* first byte of a path component */

      	A,       /* "AUX" */

      	AU,

      	THIRD,   /* third of a 3-byte sequence, e.g. "AUX", "NUL" */

      	C,       /* "CON" or "COMn" */

      	CO,

      	COMLPT,  /* "COM" or "LPT" */

      	COMLPTn,

      	L,

      	LP,

      	N,

      	NU,

      	P,       /* "PRN" */

      	PR,

      	LDOT,    /* leading '.' */

      	DOT,     /* '.' in a non-leading position */

      	H,       /* ".h" */

      	HGDI,    /* ".hg", ".d", or ".i" */

      	SPACE,

        Yuya Nishihara
    
pathencode: eliminate comma at end of enum list to avoid pedantic warning

              r19051
            
      	DEFAULT  /* byte of a path component after the first */

        Bryan O'Sullivan
    
store: implement fncache basic path encoding in C...

              r17616
            
      };

        Adrian Buehlmann
    
pathencode: new C module with fast encodedir() function...

              r17606
            
      /* state machine for dir-encoding */

      enum dir_state {

      	DDOT,

      	DH,

      	DHGDI,

        Yuya Nishihara
    
pathencode: eliminate comma at end of enum list to avoid pedantic warning

              r19051
            
      	DDEFAULT

        Adrian Buehlmann
    
pathencode: new C module with fast encodedir() function...

              r17606
            
      };

        André Sintzoff
    
pathencode: change isset name to avoid name collision...

              r17699
            
      static inline int inset(const uint32_t bitset[], char c)

        Bryan O'Sullivan
    
store: implement fncache basic path encoding in C...

              r17616
            
      {

      	return bitset[((uint8_t)c) >> 5] & (1 << (((uint8_t)c) & 31));

      }

        Adrian Buehlmann
    
pathencode: new C module with fast encodedir() function...

              r17606
            
      static inline void charcopy(char *dest, Py_ssize_t *destlen, size_t destsize,

                                  char c)

      {

      	if (dest) {

      		assert(*destlen < destsize);

      		dest[*destlen] = c;

      	}

      	(*destlen)++;

      }

      static inline void memcopy(char *dest, Py_ssize_t *destlen, size_t destsize,

                                 const void *src, Py_ssize_t len)

      {

      	if (dest) {

      		assert(*destlen + len < destsize);

      		memcpy((void *)&dest[*destlen], src, len);

      	}

      	*destlen += len;

      }

        Bryan O'Sullivan
    
store: implement fncache basic path encoding in C...

              r17616
            
      static inline void hexencode(char *dest, Py_ssize_t *destlen, size_t destsize,

      			     uint8_t c)

      {

      	static const char hexdigit[] = "0123456789abcdef";

      	charcopy(dest, destlen, destsize, hexdigit[c >> 4]);

      	charcopy(dest, destlen, destsize, hexdigit[c & 15]);

      }

      /* 3-byte escape: tilde followed by two hex digits */

      static inline void escape3(char *dest, Py_ssize_t *destlen, size_t destsize,

      			   char c)

      {

      	charcopy(dest, destlen, destsize, '~');

      	hexencode(dest, destlen, destsize, c);

      }

        Adrian Buehlmann
    
pathencode: new C module with fast encodedir() function...

              r17606
            
      static Py_ssize_t _encodedir(char *dest, size_t destsize,

                                   const char *src, Py_ssize_t len)

      {

      	enum dir_state state = DDEFAULT;

      	Py_ssize_t i = 0, destlen = 0;

      	while (i < len) {

      		switch (state) {

      		case DDOT:

      			switch (src[i]) {

      			case 'd':

      			case 'i':

      				state = DHGDI;

      				charcopy(dest, &destlen, destsize, src[i++]);

      				break;

      			case 'h':

      				state = DH;

      				charcopy(dest, &destlen, destsize, src[i++]);

      				break;

      			default:

      				state = DDEFAULT;

      				break;

      			}

      			break;

      		case DH:

      			if (src[i] == 'g') {

      				state = DHGDI;

      				charcopy(dest, &destlen, destsize, src[i++]);

      			}

      			else state = DDEFAULT;

      			break;

      		case DHGDI:

      			if (src[i] == '/') {

      				memcopy(dest, &destlen, destsize, ".hg", 3);

      				charcopy(dest, &destlen, destsize, src[i++]);

      			}

      			state = DDEFAULT;

      			break;

      		case DDEFAULT:

      			if (src[i] == '.')

      				state = DDOT;

      			charcopy(dest, &destlen, destsize, src[i++]);

      			break;

      		}

      	}

      	return destlen;

      }

      PyObject *encodedir(PyObject *self, PyObject *args)

      {

      	Py_ssize_t len, newlen;

      	PyObject *pathobj, *newobj;

      	char *path;

      	if (!PyArg_ParseTuple(args, "O:encodedir", &pathobj))

      		return NULL;

        Gregory Szorc
    
pathencode: convert PyString* to PyBytes*

              r30099
            
      	if (PyBytes_AsStringAndSize(pathobj, &path, &len) == -1) {

        Adrian Buehlmann
    
pathencode: new C module with fast encodedir() function...

              r17606
            
      		PyErr_SetString(PyExc_TypeError, "expected a string");

      		return NULL;

      	}

      	newlen = len ? _encodedir(NULL, 0, path, len + 1) : 1;

      	if (newlen == len + 1) {

      		Py_INCREF(pathobj);

      		return pathobj;

      	}

        Gregory Szorc
    
pathencode: convert PyString* to PyBytes*

              r30099
            
      	newobj = PyBytes_FromStringAndSize(NULL, newlen);

        Adrian Buehlmann
    
pathencode: new C module with fast encodedir() function...

              r17606
            
      	if (newobj) {

        Gregory Szorc
    
pathencode: use Py_SIZE directly...

              r30102
            
      		assert(PyBytes_Check(newobj));

      		Py_SIZE(newobj)--;

        Gregory Szorc
    
pathencode: convert PyString* to PyBytes*

              r30099
            
      		_encodedir(PyBytes_AS_STRING(newobj), newlen, path,

        Adrian Buehlmann
    
pathencode: new C module with fast encodedir() function...

              r17606
            
      			   len + 1);

      	}

      	return newobj;

      }

        Bryan O'Sullivan
    
store: implement fncache basic path encoding in C...

              r17616
            
      static Py_ssize_t _encode(const uint32_t twobytes[8], const uint32_t onebyte[8],

      			  char *dest, Py_ssize_t destlen, size_t destsize,

      			  const char *src, Py_ssize_t len,

      			  int encodedir)

      {

      	enum path_state state = START;

      	Py_ssize_t i = 0;

      	/*

      	 * Python strings end with a zero byte, which we use as a

      	 * terminal token as they are not valid inside path names.

      	 */

      	while (i < len) {

      		switch (state) {

      		case START:

      			switch (src[i]) {

      			case '/':

      				charcopy(dest, &destlen, destsize, src[i++]);

      				break;

      			case '.':

      				state = LDOT;

      				escape3(dest, &destlen, destsize, src[i++]);

      				break;

      			case ' ':

      				state = DEFAULT;

      				escape3(dest, &destlen, destsize, src[i++]);

      				break;

      			case 'a':

      				state = A;

      				charcopy(dest, &destlen, destsize, src[i++]);

      				break;

      			case 'c':

      				state = C;

      				charcopy(dest, &destlen, destsize, src[i++]);

      				break;

      			case 'l':

      				state = L;

      				charcopy(dest, &destlen, destsize, src[i++]);

      				break;

      			case 'n':

      				state = N;

      				charcopy(dest, &destlen, destsize, src[i++]);

      				break;

      			case 'p':

      				state = P;

      				charcopy(dest, &destlen, destsize, src[i++]);

      				break;

      			default:

      				state = DEFAULT;

      				break;

      			}

      			break;

      		case A:

      			if (src[i] == 'u') {

      				state = AU;

      				charcopy(dest, &destlen, destsize, src[i++]);

      			}

      			else state = DEFAULT;

      			break;

      		case AU:

      			if (src[i] == 'x') {

      				state = THIRD;

      				i++;

      			}

      			else state = DEFAULT;

      			break;

      		case THIRD:

      			state = DEFAULT;

      			switch (src[i]) {

      			case '.':

      			case '/':

      			case '\0':

      				escape3(dest, &destlen, destsize, src[i - 1]);

      				break;

      			default:

      				i--;

      				break;

      			}

      			break;

      		case C:

      			if (src[i] == 'o') {

      				state = CO;

      				charcopy(dest, &destlen, destsize, src[i++]);

      			}

      			else state = DEFAULT;

      			break;

      		case CO:

      			if (src[i] == 'm') {

      				state = COMLPT;

      				i++;

      			}

      			else if (src[i] == 'n') {

      				state = THIRD;

      				i++;

      			}

      			else state = DEFAULT;

      			break;

      		case COMLPT:

      			switch (src[i]) {

      			case '1': case '2': case '3': case '4': case '5':

      			case '6': case '7': case '8': case '9':

      				state = COMLPTn;

      				i++;

      				break;

      			default:

      				state = DEFAULT;

      				charcopy(dest, &destlen, destsize, src[i - 1]);

      				break;

      			}

      			break;

      		case COMLPTn:

      			state = DEFAULT;

      			switch (src[i]) {

      			case '.':

      			case '/':

      			case '\0':

      				escape3(dest, &destlen, destsize, src[i - 2]);

      				charcopy(dest, &destlen, destsize, src[i - 1]);

      				break;

      			default:

      				memcopy(dest, &destlen, destsize,

      					&src[i - 2], 2);

      				break;

      			}

      			break;

      		case L:

      			if (src[i] == 'p') {

      				state = LP;

      				charcopy(dest, &destlen, destsize, src[i++]);

      			}

      			else state = DEFAULT;

      			break;

      		case LP:

      			if (src[i] == 't') {

      				state = COMLPT;

      				i++;

      			}

      			else state = DEFAULT;

      			break;

      		case N:

      			if (src[i] == 'u') {

      				state = NU;

      				charcopy(dest, &destlen, destsize, src[i++]);

      			}

      			else state = DEFAULT;

      			break;

      		case NU:

      			if (src[i] == 'l') {

      				state = THIRD;

      				i++;

      			}

      			else state = DEFAULT;

      			break;

      		case P:

      			if (src[i] == 'r') {

      				state = PR;

      				charcopy(dest, &destlen, destsize, src[i++]);

      			}

      			else state = DEFAULT;

      			break;

      		case PR:

      			if (src[i] == 'n') {

      				state = THIRD;

      				i++;

      			}

      			else state = DEFAULT;

      			break;

      		case LDOT:

      			switch (src[i]) {

      			case 'd':

      			case 'i':

      				state = HGDI;

      				charcopy(dest, &destlen, destsize, src[i++]);

      				break;

      			case 'h':

      				state = H;

      				charcopy(dest, &destlen, destsize, src[i++]);

      				break;

      			default:

      				state = DEFAULT;

      				break;

      			}

      			break;

      		case DOT:

      			switch (src[i]) {

      			case '/':

      			case '\0':

      				state = START;

      				memcopy(dest, &destlen, destsize, "~2e", 3);

      				charcopy(dest, &destlen, destsize, src[i++]);

      				break;

      			case 'd':

      			case 'i':

      				state = HGDI;

      				charcopy(dest, &destlen, destsize, '.');

      				charcopy(dest, &destlen, destsize, src[i++]);

      				break;

      			case 'h':

      				state = H;

      				memcopy(dest, &destlen, destsize, ".h", 2);

      				i++;

      				break;

      			default:

      				state = DEFAULT;

      				charcopy(dest, &destlen, destsize, '.');

      				break;

      			}

      			break;

      		case H:

      			if (src[i] == 'g') {

      				state = HGDI;

      				charcopy(dest, &destlen, destsize, src[i++]);

      			}

      			else state = DEFAULT;

      			break;

      		case HGDI:

      			if (src[i] == '/') {

      				state = START;

      				if (encodedir)

      					memcopy(dest, &destlen, destsize, ".hg",

      						3);

      				charcopy(dest, &destlen, destsize, src[i++]);

      			}

      			else state = DEFAULT;

      			break;

      		case SPACE:

      			switch (src[i]) {

      			case '/':

      			case '\0':

      				state = START;

      				memcopy(dest, &destlen, destsize, "~20", 3);

      				charcopy(dest, &destlen, destsize, src[i++]);

      				break;

      			default:

      				state = DEFAULT;

      				charcopy(dest, &destlen, destsize, ' ');

      				break;

      			}

      			break;

      		case DEFAULT:

        André Sintzoff
    
pathencode: change isset name to avoid name collision...

              r17699
            
      			while (inset(onebyte, src[i])) {

        Bryan O'Sullivan
    
store: implement fncache basic path encoding in C...

              r17616
            
      				charcopy(dest, &destlen, destsize, src[i++]);

      				if (i == len)

      					goto done;

      			}

      			switch (src[i]) {

      			case '.':

      				state = DOT;

      				i++;

      				break;

      			case ' ':

      				state = SPACE;

      				i++;

      				break;

      			case '/':

      				state = START;

      				charcopy(dest, &destlen, destsize, '/');

      				i++;

      				break;

      			default:

        André Sintzoff
    
pathencode: change isset name to avoid name collision...

              r17699
            
      				if (inset(onebyte, src[i])) {

        Bryan O'Sullivan
    
store: implement fncache basic path encoding in C...

              r17616
            
      					do {

      						charcopy(dest, &destlen,

      							 destsize, src[i++]);

      					} while (i < len &&

        André Sintzoff
    
pathencode: change isset name to avoid name collision...

              r17699
            
      						 inset(onebyte, src[i]));

        Bryan O'Sullivan
    
store: implement fncache basic path encoding in C...

              r17616
            
      				}

        André Sintzoff
    
pathencode: change isset name to avoid name collision...

              r17699
            
      				else if (inset(twobytes, src[i])) {

        Bryan O'Sullivan
    
store: implement fncache basic path encoding in C...

              r17616
            
      					char c = src[i++];

      					charcopy(dest, &destlen, destsize, '_');

      					charcopy(dest, &destlen, destsize,

      						 c == '_' ? '_' : c + 32);

      				}

      				else

      					escape3(dest, &destlen, destsize,

      						src[i++]);

      				break;

      			}

      			break;

      		}

      	}

      done:

      	return destlen;

      }

      static Py_ssize_t basicencode(char *dest, size_t destsize,

      			      const char *src, Py_ssize_t len)

      {

      	static const uint32_t twobytes[8] = { 0, 0, 0x87fffffe };

      	static const uint32_t onebyte[8] = {

      		1, 0x2bff3bfa, 0x68000001, 0x2fffffff,

      	};

      	Py_ssize_t destlen = 0;

      	return _encode(twobytes, onebyte, dest, destlen, destsize,

        Adrian Buehlmann
    
pathencode: simplify basicencode

              r17691
            
      		       src, len, 1);

        Bryan O'Sullivan
    
store: implement fncache basic path encoding in C...

              r17616
            
      }

      static const Py_ssize_t maxstorepathlen = 120;

        Bryan O'Sullivan
    
store: implement lowerencode in C

              r18430
            
      static Py_ssize_t _lowerencode(char *dest, size_t destsize,

      			       const char *src, Py_ssize_t len)

      {

      	static const uint32_t onebyte[8] = {

      		1, 0x2bfffbfb, 0xe8000001, 0x2fffffff

      	};

      	static const uint32_t lower[8] = { 0, 0, 0x7fffffe };

      	Py_ssize_t i, destlen = 0;

      	for (i = 0; i < len; i++) {

      		if (inset(onebyte, src[i]))

      			charcopy(dest, &destlen, destsize, src[i]);

      		else if (inset(lower, src[i]))

      			charcopy(dest, &destlen, destsize, src[i] + 32);

      		else

      			escape3(dest, &destlen, destsize, src[i]);

      	}

      	return destlen;

      }

      PyObject *lowerencode(PyObject *self, PyObject *args)

      {

      	char *path;

      	Py_ssize_t len, newlen;

      	PyObject *ret;

      	if (!PyArg_ParseTuple(args, "s#:lowerencode", &path, &len))

      		return NULL;

      	newlen = _lowerencode(NULL, 0, path, len);

        Gregory Szorc
    
pathencode: convert PyString* to PyBytes*

              r30099
            
      	ret = PyBytes_FromStringAndSize(NULL, newlen);

        Bryan O'Sullivan
    
store: implement lowerencode in C

              r18430
            
      	if (ret)

        Gregory Szorc
    
pathencode: convert PyString* to PyBytes*

              r30099
            
      		_lowerencode(PyBytes_AS_STRING(ret), newlen, path, len);

        Bryan O'Sullivan
    
store: implement lowerencode in C

              r18430
            
      	return ret;

      }

        Bryan O'Sullivan
    
pathencode: implement hashed encoding in C...

              r18433
            
      /* See store.py:_auxencode for a description. */

      static Py_ssize_t auxencode(char *dest, size_t destsize,

      			    const char *src, Py_ssize_t len)

      {

      	static const uint32_t twobytes[8];

      	static const uint32_t onebyte[8] = {

        Danek Duvall
    
pathencode: eliminate signed integer warnings...

              r20535
            
      		~0U, 0xffff3ffe, ~0U, ~0U, ~0U, ~0U, ~0U, ~0U,

        Bryan O'Sullivan
    
pathencode: implement hashed encoding in C...

              r18433
            
      	};

      	return _encode(twobytes, onebyte, dest, 0, destsize, src, len, 0);

      }

        Bryan O'Sullivan
    
pathencode: implement the "mangling" part of hashed encoding in C...

              r18432
            
      static PyObject *hashmangle(const char *src, Py_ssize_t len, const char sha[20])

      {

      	static const Py_ssize_t dirprefixlen = 8;

      	static const Py_ssize_t maxshortdirslen = 68;

      	char *dest;

      	PyObject *ret;

      	Py_ssize_t i, d, p, lastslash = len - 1, lastdot = -1;

      	Py_ssize_t destsize, destlen = 0, slop, used;

      	while (lastslash >= 0 && src[lastslash] != '/') {

      		if (src[lastslash] == '.' && lastdot == -1)

      			lastdot = lastslash;

      		lastslash--;

      	}

      #if 0

      	/* All paths should end in a suffix of ".i" or ".d".

                 Unfortunately, the file names in test-hybridencode.py

                 violate this rule.  */

      	if (lastdot != len - 3) {

      		PyErr_SetString(PyExc_ValueError,

      				"suffix missing or wrong length");

      		return NULL;

      	}

      #endif

      	/* If src contains a suffix, we will append it to the end of

      	   the new string, so make room. */

      	destsize = 120;

      	if (lastdot >= 0)

      		destsize += len - lastdot - 1;

        Gregory Szorc
    
pathencode: convert PyString* to PyBytes*

              r30099
            
      	ret = PyBytes_FromStringAndSize(NULL, destsize);

        Bryan O'Sullivan
    
pathencode: implement the "mangling" part of hashed encoding in C...

              r18432
            
      	if (ret == NULL)

      		return NULL;

        Gregory Szorc
    
pathencode: convert PyString* to PyBytes*

              r30099
            
      	dest = PyBytes_AS_STRING(ret);

        Bryan O'Sullivan
    
pathencode: implement the "mangling" part of hashed encoding in C...

              r18432
            
      	memcopy(dest, &destlen, destsize, "dh/", 3);

      	/* Copy up to dirprefixlen bytes of each path component, up to

      	   a limit of maxshortdirslen bytes. */

      	for (i = d = p = 0; i < lastslash; i++, p++) {

      		if (src[i] == '/') {

      			char d = dest[destlen - 1];

      			/* After truncation, a directory name may end

      			   in a space or dot, which are unportable. */

      			if (d == '.' || d == ' ')

      				dest[destlen - 1] = '_';

        Siddharth Agarwal
    
pathencode: fix hashmangle short dir limit (issue3958)...

              r19317
            
      			/* The + 3 is to account for "dh/" in the beginning */

      			if (destlen > maxshortdirslen + 3)

        Bryan O'Sullivan
    
pathencode: implement the "mangling" part of hashed encoding in C...

              r18432
            
      				break;

      			charcopy(dest, &destlen, destsize, src[i]);

      			p = -1;

      		}

      		else if (p < dirprefixlen)

      			charcopy(dest, &destlen, destsize, src[i]);

      	}

      	/* Rewind to just before the last slash copied. */

      	if (destlen > maxshortdirslen + 3)

      		do {

      			destlen--;

      		} while (destlen > 0 && dest[destlen] != '/');

      	if (destlen > 3) {

      		if (lastslash > 0) {

      			char d = dest[destlen - 1];

      			/* The last directory component may be

      			   truncated, so make it safe. */

      			if (d == '.' || d == ' ')

      				dest[destlen - 1] = '_';

      		}

      		charcopy(dest, &destlen, destsize, '/');

      	}

      	/* Add a prefix of the original file's name. Its length

      	   depends on the number of bytes left after accounting for

      	   hash and suffix. */

      	used = destlen + 40;

      	if (lastdot >= 0)

      		used += len - lastdot - 1;

      	slop = maxstorepathlen - used;

      	if (slop > 0) {

      		Py_ssize_t basenamelen =

      			lastslash >= 0 ? len - lastslash - 2 : len - 1;

      		if (basenamelen > slop)

      			basenamelen = slop;

      		if (basenamelen > 0)

      			memcopy(dest, &destlen, destsize, &src[lastslash + 1],

      				basenamelen);

      	}

      	/* Add hash and suffix. */

      	for (i = 0; i < 20; i++)

      		hexencode(dest, &destlen, destsize, sha[i]);

      	if (lastdot >= 0)

      		memcopy(dest, &destlen, destsize, &src[lastdot],

      			len - lastdot - 1);

        Gregory Szorc
    
pathencode: use assert() for PyBytes_Check()...

              r30163
            
      	assert(PyBytes_Check(ret));

        Gregory Szorc
    
pathencode: use Py_SIZE directly...

              r30102
            
      	Py_SIZE(ret) = destlen;

        Bryan O'Sullivan
    
pathencode: implement the "mangling" part of hashed encoding in C...

              r18432
            
      	return ret;

      }

        Bryan O'Sullivan
    
store: implement fncache basic path encoding in C...

              r17616
            
      /*

        Bryan O'Sullivan
    
pathencode: add a SHA-1 hash function...

              r18431
            
       * Avoiding a trip through Python would improve performance by 50%,

       * but we don't encounter enough long names to be worth the code.

       */

      static int sha1hash(char hash[20], const char *str, Py_ssize_t len)

      {

      	static PyObject *shafunc;

      	PyObject *shaobj, *hashobj;

      	if (shafunc == NULL) {

        Gregory Szorc
    
pathencode: convert PyString* to PyBytes*

              r30099
            
      		PyObject *hashlib, *name = PyBytes_FromString("hashlib");

        Bryan O'Sullivan
    
pathencode: add a SHA-1 hash function...

              r18431
            
      		if (name == NULL)

      			return -1;

        Augie Fackler
    
pathencode: use hashlib.sha1 directly instead of indirecting through util

              r29340
            
      		hashlib = PyImport_Import(name);

        Bryan O'Sullivan
    
pathencode: add a SHA-1 hash function...

              r18431
            
      		Py_DECREF(name);

        Augie Fackler
    
pathencode: use hashlib.sha1 directly instead of indirecting through util

              r29340
            
      		if (hashlib == NULL) {

      			PyErr_SetString(PyExc_ImportError, "hashlib");

        Bryan O'Sullivan
    
pathencode: add a SHA-1 hash function...

              r18431
            
      			return -1;

      		}

        Augie Fackler
    
pathencode: use hashlib.sha1 directly instead of indirecting through util

              r29340
            
      		shafunc = PyObject_GetAttrString(hashlib, "sha1");

      		Py_DECREF(hashlib);

        Bryan O'Sullivan
    
pathencode: add a SHA-1 hash function...

              r18431
            
      		if (shafunc == NULL) {

      			PyErr_SetString(PyExc_AttributeError,

        Augie Fackler
    
pathencode: use hashlib.sha1 directly instead of indirecting through util

              r29340
            
      					"module 'hashlib' has no "

        Bryan O'Sullivan
    
pathencode: add a SHA-1 hash function...

              r18431
            
      					"attribute 'sha1'");

      			return -1;

      		}

      	}

      	shaobj = PyObject_CallFunction(shafunc, "s#", str, len);

      	if (shaobj == NULL)

      		return -1;

      	hashobj = PyObject_CallMethod(shaobj, "digest", "");

      	Py_DECREF(shaobj);

        Augie Fackler
    
pathencode: check result of .digest() method in sha1hash...

              r26050
            
      	if (hashobj == NULL)

      		return -1;

        Bryan O'Sullivan
    
pathencode: add a SHA-1 hash function...

              r18431
            
        Gregory Szorc
    
pathencode: convert PyString* to PyBytes*

              r30099
            
      	if (!PyBytes_Check(hashobj) || PyBytes_GET_SIZE(hashobj) != 20) {

        Bryan O'Sullivan
    
pathencode: add a SHA-1 hash function...

              r18431
            
      		PyErr_SetString(PyExc_TypeError,

      				"result of digest is not a 20-byte hash");

      		Py_DECREF(hashobj);

      		return -1;

      	}

        Gregory Szorc
    
pathencode: convert PyString* to PyBytes*

              r30099
            
      	memcpy(hash, PyBytes_AS_STRING(hashobj), 20);

        Bryan O'Sullivan
    
pathencode: add a SHA-1 hash function...

              r18431
            
      	Py_DECREF(hashobj);

      	return 0;

      }

        Matt Mackall
    
pathencode: grow buffers to increase safety margin

              r19185
            
      #define MAXENCODE 4096 * 4

        Matt Mackall
    
pathencode: don't use alloca() for safety/portability

              r18452
            
        Bryan O'Sullivan
    
pathencode: implement hashed encoding in C...

              r18433
            
      static PyObject *hashencode(const char *src, Py_ssize_t len)

      {

        Matt Mackall
    
pathencode: don't use alloca() for safety/portability

              r18452
            
      	char dired[MAXENCODE];

      	char lowered[MAXENCODE];

      	char auxed[MAXENCODE];

      	Py_ssize_t dirlen, lowerlen, auxlen, baselen;

        Bryan O'Sullivan
    
pathencode: implement hashed encoding in C...

              r18433
            
      	char sha[20];

        Matt Mackall
    
pathencode: don't use alloca() for safety/portability

              r18452
            
      	baselen = (len - 5) * 3;

      	if (baselen >= MAXENCODE) {

      		PyErr_SetString(PyExc_ValueError, "string too long");

      		return NULL;

      	}

        Bryan O'Sullivan
    
pathencode: implement hashed encoding in C...

              r18433
            
      	dirlen = _encodedir(dired, baselen, src, len);

      	if (sha1hash(sha, dired, dirlen - 1) == -1)

      		return NULL;

      	lowerlen = _lowerencode(lowered, baselen, dired + 5, dirlen - 5);

      	auxlen = auxencode(auxed, baselen, lowered, lowerlen);

      	return hashmangle(auxed, auxlen, sha);

      }

        Bryan O'Sullivan
    
store: implement fncache basic path encoding in C...

              r17616
            
      PyObject *pathencode(PyObject *self, PyObject *args)

      {

      	Py_ssize_t len, newlen;

      	PyObject *pathobj, *newobj;

      	char *path;

      	if (!PyArg_ParseTuple(args, "O:pathencode", &pathobj))

      		return NULL;

        Gregory Szorc
    
pathencode: convert PyString* to PyBytes*

              r30099
            
      	if (PyBytes_AsStringAndSize(pathobj, &path, &len) == -1) {

        Bryan O'Sullivan
    
store: implement fncache basic path encoding in C...

              r17616
            
      		PyErr_SetString(PyExc_TypeError, "expected a string");

      		return NULL;

      	}

        Bryan O'Sullivan
    
pathencode: implement both basic and hashed encoding in C

              r18434
            
      	if (len > maxstorepathlen)

      		newlen = maxstorepathlen + 2;

      	else

      		newlen = len ? basicencode(NULL, 0, path, len + 1) : 1;

        Bryan O'Sullivan
    
store: implement fncache basic path encoding in C...

              r17616
            
      	if (newlen <= maxstorepathlen + 1) {

      		if (newlen == len + 1) {

      			Py_INCREF(pathobj);

      			return pathobj;

      		}

        Gregory Szorc
    
pathencode: convert PyString* to PyBytes*

              r30099
            
      		newobj = PyBytes_FromStringAndSize(NULL, newlen);

        Bryan O'Sullivan
    
store: implement fncache basic path encoding in C...

              r17616
            
      		if (newobj) {

        Gregory Szorc
    
pathencode: use assert() for PyBytes_Check()...

              r30163
            
      			assert(PyBytes_Check(newobj));

        Gregory Szorc
    
pathencode: use Py_SIZE directly...

              r30102
            
      			Py_SIZE(newobj)--;

        Gregory Szorc
    
pathencode: convert PyString* to PyBytes*

              r30099
            
      			basicencode(PyBytes_AS_STRING(newobj), newlen, path,

        Bryan O'Sullivan
    
store: implement fncache basic path encoding in C...

              r17616
            
      				    len + 1);

      		}

      	}

        Bryan O'Sullivan
    
pathencode: implement both basic and hashed encoding in C

              r18434
            
      	else

      		newobj = hashencode(path, len + 1);

        Bryan O'Sullivan
    
store: implement fncache basic path encoding in C...

              r17616
            
      	return newobj;

      }

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages