upstream/mercurial-mirror Files · mercurial/base85.c

changelog: add class to represent parsed changelog revisions...

changelog: add class to represent parsed changelog revisions Currently, changelog entries are parsed into their respective components at read time. Many operations are only interested in a subset of fields of a changelog entry. The parsing and storing of all the fields adds avoidable overhead. This patch introduces the "changelogrevision" class. It takes changelog raw text and exposes the parsed results as attributes. The code for parsing changelog entries has been moved into its construction function. changelog.read() has been modified to use the new class internally while maintaining its existing API. Future patches will make revision parsing lazy. We implement the construction function of the new class with __new__ instead of __init__ so we can use a named tuple to represent the empty revision. This saves overhead and complexity of coercing later versions of this class to represent an empty instance. While we are here, we add a method on changelog to obtain an instance of the new type. The overhead of constructing the new class regresses performance of revsets accessing this data: author(mpm) 0.896565 0.929984 desc(bug) 0.887169 0.935642 105% date(2015) 0.878797 0.908094 extra(rebase_source) 0.865446 0.922624 106% author(mpm) or author(greg) 1.801832 1.902112 105% author(mpm) or desc(bug) 1.812438 1.860977 date(2015) or branch(default) 0.968276 1.005824 author(mpm) or desc(bug) or date(2015) or extra(rebase_source) 3.656193 3.743381 Once lazy parsing is implemented, these revsets will all be faster than before. There is no performance change on revsets that do not access this data. There /could/ be a performance regression on operations that perform several changelog reads. However, I can't think of anything outside of revsets and `hg log` (basically the same as a revset) that would be impacted.

Augie Fackler - - Load All Authors

File last commit:

r27060:4613a89b default


                r28487:98d98a64

default

Download file

             base85.c
        
                    182 lines
            
             | 3.4 KiB
            
                | text/x-c
            
             |
                CLexer
            
             / mercurial / base85.c
          
                    History
                
                 |
                  Annotation
                 | Raw
                 |Copy content
                 |Copy permalink

      /*

       base85 codec

       Copyright 2006 Brendan Cully <brendan@kublai.com>

       This software may be used and distributed according to the terms of

       the GNU General Public License, incorporated herein by reference.

       Largely based on git's implementation

      */

      #define PY_SSIZE_T_CLEAN

      #include <Python.h>

      #include "util.h"

      static const char b85chars[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"

      	"abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~";

      static char b85dec[256];

      static void b85prep(void)

      {

      	unsigned i;

      	memset(b85dec, 0, sizeof(b85dec));

      	for (i = 0; i < sizeof(b85chars); i++)

      		b85dec[(int)(b85chars[i])] = i + 1;

      }

      static PyObject *b85encode(PyObject *self, PyObject *args)

      {

      	const unsigned char *text;

      	PyObject *out;

      	char *dst;

      	Py_ssize_t len, olen, i;

      	unsigned int acc, val, ch;

      	int pad = 0;

      	if (!PyArg_ParseTuple(args, "s#|i", &text, &len, &pad))

      		return NULL;

      	if (pad)

      		olen = ((len + 3) / 4 * 5) - 3;

      	else {

      		olen = len % 4;

      		if (olen)

      			olen++;

      		olen += len / 4 * 5;

      	}

      	if (!(out = PyBytes_FromStringAndSize(NULL, olen + 3)))

      		return NULL;

      	dst = PyBytes_AsString(out);

      	while (len) {

      		acc = 0;

      		for (i = 24; i >= 0; i -= 8) {

      			ch = *text++;

      			acc |= ch << i;

      			if (--len == 0)

      				break;

      		}

      		for (i = 4; i >= 0; i--) {

      			val = acc % 85;

      			acc /= 85;

      			dst[i] = b85chars[val];

      		}

      		dst += 5;

      	}

      	if (!pad)

      		_PyBytes_Resize(&out, olen);

      	return out;

      }

      static PyObject *b85decode(PyObject *self, PyObject *args)

      {

      	PyObject *out;

      	const char *text;

      	char *dst;

      	Py_ssize_t len, i, j, olen, cap;

      	int c;

      	unsigned int acc;

      	if (!PyArg_ParseTuple(args, "s#", &text, &len))

      		return NULL;

      	olen = len / 5 * 4;

      	i = len % 5;

      	if (i)

      		olen += i - 1;

      	if (!(out = PyBytes_FromStringAndSize(NULL, olen)))

      		return NULL;

      	dst = PyBytes_AsString(out);

      	i = 0;

      	while (i < len)

      	{

      		acc = 0;

      		cap = len - i - 1;

      		if (cap > 4)

      			cap = 4;

      		for (j = 0; j < cap; i++, j++)

      		{

      			c = b85dec[(int)*text++] - 1;

      			if (c < 0)

      				return PyErr_Format(

      					PyExc_ValueError,

      					"bad base85 character at position %d",

      					(int)i);

      			acc = acc * 85 + c;

      		}

      		if (i++ < len)

      		{

      			c = b85dec[(int)*text++] - 1;

      			if (c < 0)

      				return PyErr_Format(

      					PyExc_ValueError,

      					"bad base85 character at position %d",

      					(int)i);

      			/* overflow detection: 0xffffffff == "|NsC0",

      			 * "|NsC" == 0x03030303 */

      			if (acc > 0x03030303 || (acc *= 85) > 0xffffffff - c)

      				return PyErr_Format(

      					PyExc_ValueError,

      					"bad base85 sequence at position %d",

      					(int)i);

      			acc += c;

      		}

      		cap = olen < 4 ? olen : 4;

      		olen -= cap;

      		for (j = 0; j < 4 - cap; j++)

      			acc *= 85;

      		if (cap && cap < 4)

      			acc += 0xffffff >> (cap - 1) * 8;

      		for (j = 0; j < cap; j++)

      		{

      			acc = (acc << 8) | (acc >> 24);

      			*dst++ = acc;

      		}

      	}

      	return out;

      }

      static char base85_doc[] = "Base85 Data Encoding";

      static PyMethodDef methods[] = {

      	{"b85encode", b85encode, METH_VARARGS,

      	 "Encode text in base85.\n\n"

      	 "If the second parameter is true, pad the result to a multiple of "

      	 "five characters.\n"},

      	{"b85decode", b85decode, METH_VARARGS, "Decode base85 text.\n"},

      	{NULL, NULL}

      };

      #ifdef IS_PY3K

      static struct PyModuleDef base85_module = {

      	PyModuleDef_HEAD_INIT,

      	"base85",

      	base85_doc,

      	-1,

      	methods

      };

      PyMODINIT_FUNC PyInit_base85(void)

      {

      	b85prep();

      	return PyModule_Create(&base85_module);

      }

      #else

      PyMODINIT_FUNC initbase85(void)

      {

      	Py_InitModule3("base85", methods, base85_doc);

      	b85prep();

      }

      #endif

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

				/*
				base85 codec

				Copyright 2006 Brendan Cully <brendan@kublai.com>

				This software may be used and distributed according to the terms of
				the GNU General Public License, incorporated herein by reference.

				Largely based on git's implementation
				*/

				#define PY_SSIZE_T_CLEAN
				#include <Python.h>

				#include "util.h"

				static const char b85chars[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
				"abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{\|}~";
				static char b85dec[256];

				static void b85prep(void)
				{
				unsigned i;

				memset(b85dec, 0, sizeof(b85dec));
				for (i = 0; i < sizeof(b85chars); i++)
				b85dec[(int)(b85chars[i])] = i + 1;
				}

				static PyObject b85encode(PyObject self, PyObject *args)
				{
				const unsigned char *text;
				PyObject *out;
				char *dst;
				Py_ssize_t len, olen, i;
				unsigned int acc, val, ch;
				int pad = 0;

				if (!PyArg_ParseTuple(args, "s#\|i", &text, &len, &pad))
				return NULL;

				if (pad)
				olen = ((len + 3) / 4 * 5) - 3;
				else {
				olen = len % 4;
				if (olen)
				olen++;
				olen += len / 4 * 5;
				}
				if (!(out = PyBytes_FromStringAndSize(NULL, olen + 3)))
				return NULL;

				dst = PyBytes_AsString(out);

				while (len) {
				acc = 0;
				for (i = 24; i >= 0; i -= 8) {
				ch = *text++;
				acc \|= ch << i;
				if (--len == 0)
				break;
				}
				for (i = 4; i >= 0; i--) {
				val = acc % 85;
				acc /= 85;
				dst[i] = b85chars[val];
				}
				dst += 5;
				}

				if (!pad)
				_PyBytes_Resize(&out, olen);

				return out;
				}

				static PyObject b85decode(PyObject self, PyObject *args)
				{
				PyObject *out;
				const char *text;
				char *dst;
				Py_ssize_t len, i, j, olen, cap;
				int c;
				unsigned int acc;

				if (!PyArg_ParseTuple(args, "s#", &text, &len))
				return NULL;

				olen = len / 5 * 4;
				i = len % 5;
				if (i)
				olen += i - 1;
				if (!(out = PyBytes_FromStringAndSize(NULL, olen)))
				return NULL;

				dst = PyBytes_AsString(out);

				i = 0;
				while (i < len)
				{
				acc = 0;
				cap = len - i - 1;
				if (cap > 4)
				cap = 4;
				for (j = 0; j < cap; i++, j++)
				{
				c = b85dec[(int)*text++] - 1;
				if (c < 0)
				return PyErr_Format(
				PyExc_ValueError,
				"bad base85 character at position %d",
				(int)i);
				acc = acc * 85 + c;
				}
				if (i++ < len)
				{
				c = b85dec[(int)*text++] - 1;
				if (c < 0)
				return PyErr_Format(
				PyExc_ValueError,
				"bad base85 character at position %d",
				(int)i);
				/* overflow detection: 0xffffffff == "\|NsC0",
				* "\|NsC" == 0x03030303 */
				if (acc > 0x03030303 \|\| (acc *= 85) > 0xffffffff - c)
				return PyErr_Format(
				PyExc_ValueError,
				"bad base85 sequence at position %d",
				(int)i);
				acc += c;
				}

				cap = olen < 4 ? olen : 4;
				olen -= cap;
				for (j = 0; j < 4 - cap; j++)
				acc *= 85;
				if (cap && cap < 4)
				acc += 0xffffff >> (cap - 1) * 8;
				for (j = 0; j < cap; j++)
				{
				acc = (acc << 8) \| (acc >> 24);
				*dst++ = acc;
				}
				}

				return out;
				}

				static char base85_doc[] = "Base85 Data Encoding";

				static PyMethodDef methods[] = {
				{"b85encode", b85encode, METH_VARARGS,
				"Encode text in base85.\n\n"
				"If the second parameter is true, pad the result to a multiple of "
				"five characters.\n"},
				{"b85decode", b85decode, METH_VARARGS, "Decode base85 text.\n"},
				{NULL, NULL}
				};

				#ifdef IS_PY3K
				static struct PyModuleDef base85_module = {
				PyModuleDef_HEAD_INIT,
				"base85",
				base85_doc,
				-1,
				methods
				};

				PyMODINIT_FUNC PyInit_base85(void)
				{
				b85prep();

				return PyModule_Create(&base85_module);
				}
				#else
				PyMODINIT_FUNC initbase85(void)
				{
				Py_InitModule3("base85", methods, base85_doc);

				b85prep();
				}
				#endif