# HG changeset patch # User Zack Hricz # Date 2016-12-22 19:22:32 # Node ID 16f4b341288d6c562845335c784b2820ec77d44f # Parent f35397fe0c0494123ab527604cbd96821f1a89a2 fsmonitor: refresh pywatchman to upstream Update to upstream to version c77452. The refresh includes fixes to improve windows compatibility. There is a minor update to 'test-check-py3-compat.t' as c77452 no longer have the py3 compatibility issues the previous version had. # no-check-commit diff --git a/hgext/fsmonitor/pywatchman/__init__.py b/hgext/fsmonitor/pywatchman/__init__.py --- a/hgext/fsmonitor/pywatchman/__init__.py +++ b/hgext/fsmonitor/pywatchman/__init__.py @@ -26,9 +26,14 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +# no unicode literals + +import inspect +import math import os -import errno -import math import socket import subprocess import time @@ -36,11 +41,20 @@ import time # Sometimes it's really hard to get Python extensions to compile, # so fall back to a pure Python implementation. try: - import bser + from . import bser + # Demandimport causes modules to be loaded lazily. Force the load now + # so that we can fall back on pybser if bser doesn't exist + bser.pdu_info except ImportError: - import pybser as bser + from . import pybser as bser -import capabilities +from . import ( + capabilities, + compat, + encoding, + load, +) + if os.name == 'nt': import ctypes @@ -55,18 +69,29 @@ if os.name == 'nt': FORMAT_MESSAGE_FROM_SYSTEM = 0x00001000 FORMAT_MESSAGE_ALLOCATE_BUFFER = 0x00000100 FORMAT_MESSAGE_IGNORE_INSERTS = 0x00000200 + WAIT_FAILED = 0xFFFFFFFF WAIT_TIMEOUT = 0x00000102 WAIT_OBJECT_0 = 0x00000000 - ERROR_IO_PENDING = 997 + WAIT_IO_COMPLETION = 0x000000C0 + INFINITE = 0xFFFFFFFF + + # Overlapped I/O operation is in progress. (997) + ERROR_IO_PENDING = 0x000003E5 + + # The pointer size follows the architecture + # We use WPARAM since this type is already conditionally defined + ULONG_PTR = ctypes.wintypes.WPARAM class OVERLAPPED(ctypes.Structure): _fields_ = [ - ("Internal", wintypes.ULONG), ("InternalHigh", wintypes.ULONG), + ("Internal", ULONG_PTR), ("InternalHigh", ULONG_PTR), ("Offset", wintypes.DWORD), ("OffsetHigh", wintypes.DWORD), ("hEvent", wintypes.HANDLE) ] def __init__(self): + self.Internal = 0 + self.InternalHigh = 0 self.Offset = 0 self.OffsetHigh = 0 self.hEvent = 0 @@ -97,6 +122,10 @@ if os.name == 'nt': GetLastError.argtypes = [] GetLastError.restype = wintypes.DWORD + SetLastError = ctypes.windll.kernel32.SetLastError + SetLastError.argtypes = [wintypes.DWORD] + SetLastError.restype = None + FormatMessage = ctypes.windll.kernel32.FormatMessageA FormatMessage.argtypes = [wintypes.DWORD, wintypes.LPVOID, wintypes.DWORD, wintypes.DWORD, ctypes.POINTER(wintypes.LPSTR), @@ -105,12 +134,30 @@ if os.name == 'nt': LocalFree = ctypes.windll.kernel32.LocalFree - GetOverlappedResultEx = ctypes.windll.kernel32.GetOverlappedResultEx - GetOverlappedResultEx.argtypes = [wintypes.HANDLE, - ctypes.POINTER(OVERLAPPED), LPDWORD, - wintypes.DWORD, wintypes.BOOL] - GetOverlappedResultEx.restype = wintypes.BOOL + GetOverlappedResult = ctypes.windll.kernel32.GetOverlappedResult + GetOverlappedResult.argtypes = [wintypes.HANDLE, + ctypes.POINTER(OVERLAPPED), LPDWORD, + wintypes.BOOL] + GetOverlappedResult.restype = wintypes.BOOL + GetOverlappedResultEx = getattr(ctypes.windll.kernel32, + 'GetOverlappedResultEx', None) + if GetOverlappedResultEx is not None: + GetOverlappedResultEx.argtypes = [wintypes.HANDLE, + ctypes.POINTER(OVERLAPPED), LPDWORD, + wintypes.DWORD, wintypes.BOOL] + GetOverlappedResultEx.restype = wintypes.BOOL + + WaitForSingleObjectEx = ctypes.windll.kernel32.WaitForSingleObjectEx + WaitForSingleObjectEx.argtypes = [wintypes.HANDLE, wintypes.DWORD, wintypes.BOOL] + WaitForSingleObjectEx.restype = wintypes.DWORD + + CreateEvent = ctypes.windll.kernel32.CreateEventA + CreateEvent.argtypes = [LPDWORD, wintypes.BOOL, wintypes.BOOL, + wintypes.LPSTR] + CreateEvent.restype = wintypes.HANDLE + + # Windows Vista is the minimum supported client for CancelIoEx. CancelIoEx = ctypes.windll.kernel32.CancelIoEx CancelIoEx.argtypes = [wintypes.HANDLE, ctypes.POINTER(OVERLAPPED)] CancelIoEx.restype = wintypes.BOOL @@ -132,8 +179,47 @@ else: pass +def _win32_strerror(err): + """ expand a win32 error code into a human readable message """ + + # FormatMessage will allocate memory and assign it here + buf = ctypes.c_char_p() + FormatMessage( + FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_ALLOCATE_BUFFER + | FORMAT_MESSAGE_IGNORE_INSERTS, None, err, 0, buf, 0, None) + try: + return buf.value + finally: + LocalFree(buf) + + class WatchmanError(Exception): - pass + def __init__(self, msg=None, cmd=None): + self.msg = msg + self.cmd = cmd + + def setCommand(self, cmd): + self.cmd = cmd + + def __str__(self): + if self.cmd: + return '%s, while executing %s' % (self.msg, self.cmd) + return self.msg + + +class WatchmanEnvironmentError(WatchmanError): + def __init__(self, msg, errno, errmsg, cmd=None): + super(WatchmanEnvironmentError, self).__init__( + '{0}: errno={1} errmsg={2}'.format(msg, errno, errmsg), + cmd) + + +class SocketConnectError(WatchmanError): + def __init__(self, sockpath, exc): + super(SocketConnectError, self).__init__( + 'unable to connect to %s: %s' % (sockpath, exc)) + self.sockpath = sockpath + self.exc = exc class SocketTimeout(WatchmanError): @@ -151,19 +237,11 @@ class CommandError(WatchmanError): self.msg is the message returned by watchman. """ - def __init__(self, msg, cmd=None): - self.msg = msg - self.cmd = cmd - super(CommandError, self).__init__('watchman command error: %s' % msg) - - def setCommand(self, cmd): - self.cmd = cmd - - def __str__(self): - if self.cmd: - return '%s, while executing %s' % (self.msg, self.cmd) - return self.msg + super(CommandError, self).__init__( + 'watchman command error: %s' % (msg, ), + cmd, + ) class Transport(object): @@ -195,16 +273,16 @@ class Transport(object): # Buffer may already have a line if we've received unilateral # response(s) from the server - if len(self.buf) == 1 and "\n" in self.buf[0]: - (line, b) = self.buf[0].split("\n", 1) + if len(self.buf) == 1 and b"\n" in self.buf[0]: + (line, b) = self.buf[0].split(b"\n", 1) self.buf = [b] return line while True: b = self.readBytes(4096) - if "\n" in b: - result = ''.join(self.buf) - (line, b) = b.split("\n", 1) + if b"\n" in b: + result = b''.join(self.buf) + (line, b) = b.split(b"\n", 1) self.buf = [b] return result + line self.buf.append(b) @@ -241,8 +319,8 @@ class UnixSocketTransport(Transport): sock.connect(self.sockpath) self.sock = sock except socket.error as e: - raise WatchmanError('unable to connect to %s: %s' % - (self.sockpath, e)) + sock.close() + raise SocketConnectError(self.sockpath, e) def close(self): self.sock.close() @@ -268,6 +346,46 @@ class UnixSocketTransport(Transport): raise SocketTimeout('timed out sending query command') +def _get_overlapped_result_ex_impl(pipe, olap, nbytes, millis, alertable): + """ Windows 7 and earlier does not support GetOverlappedResultEx. The + alternative is to use GetOverlappedResult and wait for read or write + operation to complete. This is done be using CreateEvent and + WaitForSingleObjectEx. CreateEvent, WaitForSingleObjectEx + and GetOverlappedResult are all part of Windows API since WindowsXP. + This is the exact same implementation that can be found in the watchman + source code (see get_overlapped_result_ex_impl in stream_win.c). This + way, maintenance should be simplified. + """ + log('Preparing to wait for maximum %dms', millis ) + if millis != 0: + waitReturnCode = WaitForSingleObjectEx(olap.hEvent, millis, alertable) + if waitReturnCode == WAIT_OBJECT_0: + # Event is signaled, overlapped IO operation result should be available. + pass + elif waitReturnCode == WAIT_IO_COMPLETION: + # WaitForSingleObjectEx returnes because the system added an I/O completion + # routine or an asynchronous procedure call (APC) to the thread queue. + SetLastError(WAIT_IO_COMPLETION) + pass + elif waitReturnCode == WAIT_TIMEOUT: + # We reached the maximum allowed wait time, the IO operation failed + # to complete in timely fashion. + SetLastError(WAIT_TIMEOUT) + return False + elif waitReturnCode == WAIT_FAILED: + # something went wrong calling WaitForSingleObjectEx + err = GetLastError() + log('WaitForSingleObjectEx failed: %s', _win32_strerror(err)) + return False + else: + # unexpected situation deserving investigation. + err = GetLastError() + log('Unexpected error: %s', _win32_strerror(err)) + return False + + return GetOverlappedResult(pipe, olap, nbytes, False) + + class WindowsNamedPipeTransport(Transport): """ connect to a named pipe """ @@ -284,28 +402,35 @@ class WindowsNamedPipeTransport(Transpor self._raise_win_err('failed to open pipe %s' % sockpath, GetLastError()) - def _win32_strerror(self, err): - """ expand a win32 error code into a human readable message """ + # event for the overlapped I/O operations + self._waitable = CreateEvent(None, True, False, None) + if self._waitable is None: + self._raise_win_err('CreateEvent failed', GetLastError()) - # FormatMessage will allocate memory and assign it here - buf = ctypes.c_char_p() - FormatMessage( - FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_ALLOCATE_BUFFER - | FORMAT_MESSAGE_IGNORE_INSERTS, None, err, 0, buf, 0, None) - try: - return buf.value - finally: - LocalFree(buf) + self._get_overlapped_result_ex = GetOverlappedResultEx + if (os.getenv('WATCHMAN_WIN7_COMPAT') == '1' or + self._get_overlapped_result_ex is None): + self._get_overlapped_result_ex = _get_overlapped_result_ex_impl def _raise_win_err(self, msg, err): raise IOError('%s win32 error code: %d %s' % - (msg, err, self._win32_strerror(err))) + (msg, err, _win32_strerror(err))) def close(self): if self.pipe: + log('Closing pipe') CloseHandle(self.pipe) self.pipe = None + if self._waitable is not None: + # We release the handle for the event + CloseHandle(self._waitable) + self._waitable = None + + def setTimeout(self, value): + # convert to milliseconds + self.timeout = int(value * 1000) + def readBytes(self, size): """ A read can block for an unbounded amount of time, even if the kernel reports that the pipe handle is signalled, so we need to @@ -325,6 +450,7 @@ class WindowsNamedPipeTransport(Transpor # We need to initiate a read buf = ctypes.create_string_buffer(size) olap = OVERLAPPED() + olap.hEvent = self._waitable log('made read buff of size %d', size) @@ -339,8 +465,9 @@ class WindowsNamedPipeTransport(Transpor GetLastError()) nread = wintypes.DWORD() - if not GetOverlappedResultEx(self.pipe, olap, nread, - 0 if immediate else self.timeout, True): + if not self._get_overlapped_result_ex(self.pipe, olap, nread, + 0 if immediate else self.timeout, + True): err = GetLastError() CancelIoEx(self.pipe, olap) @@ -374,6 +501,8 @@ class WindowsNamedPipeTransport(Transpor def write(self, data): olap = OVERLAPPED() + olap.hEvent = self._waitable + immediate = WriteFile(self.pipe, ctypes.c_char_p(data), len(data), None, olap) @@ -385,8 +514,10 @@ class WindowsNamedPipeTransport(Transpor # Obtain results, waiting if needed nwrote = wintypes.DWORD() - if GetOverlappedResultEx(self.pipe, olap, nwrote, 0 if immediate else - self.timeout, True): + if self._get_overlapped_result_ex(self.pipe, olap, nwrote, + 0 if immediate else self.timeout, + True): + log('made write of %d bytes', nwrote.value) return nwrote.value err = GetLastError() @@ -430,7 +561,10 @@ class CLIProcessTransport(Transport): def close(self): if self.proc: - self.proc.kill() + if self.proc.pid is not None: + self.proc.kill() + self.proc.stdin.close() + self.proc.stdout.close() self.proc = None def _connect(self): @@ -438,7 +572,7 @@ class CLIProcessTransport(Transport): return self.proc args = [ 'watchman', - '--sockname={}'.format(self.sockpath), + '--sockname={0}'.format(self.sockpath), '--logfile=/BOGUS', '--statefile=/BOGUS', '--no-spawn', @@ -460,8 +594,8 @@ class CLIProcessTransport(Transport): def write(self, data): if self.closed: + self.close() self.closed = False - self.proc = None self._connect() res = self.proc.stdin.write(data) self.proc.stdin.close() @@ -473,21 +607,21 @@ class BserCodec(Codec): """ use the BSER encoding. This is the default, preferred codec """ def _loads(self, response): - return bser.loads(response) + return bser.loads(response) # Defaults to BSER v1 def receive(self): buf = [self.transport.readBytes(sniff_len)] if not buf[0]: raise WatchmanError('empty watchman response') - elen = bser.pdu_len(buf[0]) + _1, _2, elen = bser.pdu_info(buf[0]) rlen = len(buf[0]) while elen > rlen: buf.append(self.transport.readBytes(elen - rlen)) rlen += len(buf[-1]) - response = ''.join(buf) + response = b''.join(buf) try: res = self._loads(response) return res @@ -495,7 +629,7 @@ class BserCodec(Codec): raise WatchmanError('watchman response decode error: %s' % e) def send(self, *args): - cmd = bser.dumps(*args) + cmd = bser.dumps(*args) # Defaults to BSER v1 self.transport.write(cmd) @@ -504,7 +638,64 @@ class ImmutableBserCodec(BserCodec): immutable object support """ def _loads(self, response): - return bser.loads(response, False) + return bser.loads(response, False) # Defaults to BSER v1 + + +class Bser2WithFallbackCodec(BserCodec): + """ use BSER v2 encoding """ + + def __init__(self, transport): + super(Bser2WithFallbackCodec, self).__init__(transport) + # Once the server advertises support for bser-v2 we should switch this + # to 'required' on Python 3. + self.send(["version", {"optional": ["bser-v2"]}]) + + capabilities = self.receive() + + if 'error' in capabilities: + raise Exception('Unsupported BSER version') + + if capabilities['capabilities']['bser-v2']: + self.bser_version = 2 + self.bser_capabilities = 0 + else: + self.bser_version = 1 + self.bser_capabilities = 0 + + def _loads(self, response): + return bser.loads(response) + + def receive(self): + buf = [self.transport.readBytes(sniff_len)] + if not buf[0]: + raise WatchmanError('empty watchman response') + + recv_bser_version, recv_bser_capabilities, elen = bser.pdu_info(buf[0]) + + if hasattr(self, 'bser_version'): + # Readjust BSER version and capabilities if necessary + self.bser_version = max(self.bser_version, recv_bser_version) + self.capabilities = self.bser_capabilities & recv_bser_capabilities + + rlen = len(buf[0]) + while elen > rlen: + buf.append(self.transport.readBytes(elen - rlen)) + rlen += len(buf[-1]) + + response = b''.join(buf) + try: + res = self._loads(response) + return res + except ValueError as e: + raise WatchmanError('watchman response decode error: %s' % e) + + def send(self, *args): + if hasattr(self, 'bser_version'): + cmd = bser.dumps(*args, version=self.bser_version, + capabilities=self.bser_capabilities) + else: + cmd = bser.dumps(*args) + self.transport.write(cmd) class JsonCodec(Codec): @@ -520,6 +711,13 @@ class JsonCodec(Codec): def receive(self): line = self.transport.readLine() try: + # In Python 3, json.loads is a transformation from Unicode string to + # objects possibly containing Unicode strings. We typically expect + # the JSON blob to be ASCII-only with non-ASCII characters escaped, + # but it's possible we might get non-ASCII bytes that are valid + # UTF-8. + if compat.PYTHON3: + line = line.decode('utf-8') return self.json.loads(line) except Exception as e: print(e, line) @@ -527,7 +725,12 @@ class JsonCodec(Codec): def send(self, *args): cmd = self.json.dumps(*args) - self.transport.write(cmd + "\n") + # In Python 3, json.dumps is a transformation from objects possibly + # containing Unicode strings to Unicode string. Even with (the default) + # ensure_ascii=True, dumps returns a Unicode string. + if compat.PYTHON3: + cmd = cmd.encode('ascii') + self.transport.write(cmd + b"\n") class client(object): @@ -556,22 +759,27 @@ class client(object): self.timeout = timeout self.useImmutableBser = useImmutableBser - transport = transport or os.getenv('WATCHMAN_TRANSPORT') or 'local' - if transport == 'local' and os.name == 'nt': - self.transport = WindowsNamedPipeTransport - elif transport == 'local': - self.transport = UnixSocketTransport - elif transport == 'cli': - self.transport = CLIProcessTransport - if sendEncoding is None: - sendEncoding = 'json' - if recvEncoding is None: - recvEncoding = sendEncoding + if inspect.isclass(transport) and issubclass(transport, Transport): + self.transport = transport else: - raise WatchmanError('invalid transport %s' % transport) + transport = transport or os.getenv('WATCHMAN_TRANSPORT') or 'local' + if transport == 'local' and os.name == 'nt': + self.transport = WindowsNamedPipeTransport + elif transport == 'local': + self.transport = UnixSocketTransport + elif transport == 'cli': + self.transport = CLIProcessTransport + if sendEncoding is None: + sendEncoding = 'json' + if recvEncoding is None: + recvEncoding = sendEncoding + else: + raise WatchmanError('invalid transport %s' % transport) - sendEncoding = sendEncoding or os.getenv('WATCHMAN_ENCODING') or 'bser' - recvEncoding = recvEncoding or os.getenv('WATCHMAN_ENCODING') or 'bser' + sendEncoding = str(sendEncoding or os.getenv('WATCHMAN_ENCODING') or + 'bser') + recvEncoding = str(recvEncoding or os.getenv('WATCHMAN_ENCODING') or + 'bser') self.recvCodec = self._parseEncoding(recvEncoding) self.sendCodec = self._parseEncoding(sendEncoding) @@ -581,6 +789,8 @@ class client(object): if self.useImmutableBser: return ImmutableBserCodec return BserCodec + elif enc == 'experimental-bser-v2': + return Bser2WithFallbackCodec elif enc == 'json': return JsonCodec else: @@ -600,10 +810,20 @@ class client(object): cmd = ['watchman', '--output-encoding=bser', 'get-sockname'] try: - p = subprocess.Popen(cmd, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - close_fds=os.name != 'nt') + args = dict(stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + close_fds=os.name != 'nt') + + if os.name == 'nt': + # if invoked via an application with graphical user interface, + # this call will cause a brief command window pop-up. + # Using the flag STARTF_USESHOWWINDOW to avoid this behavior. + startupinfo = subprocess.STARTUPINFO() + startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW + args['startupinfo'] = startupinfo + + p = subprocess.Popen(cmd, **args) + except OSError as e: raise WatchmanError('"watchman" executable not in PATH (%s)', e) @@ -614,10 +834,10 @@ class client(object): raise WatchmanError("watchman exited with code %d" % exitcode) result = bser.loads(stdout) - if 'error' in result: + if b'error' in result: raise WatchmanError('get-sockname error: %s' % result['error']) - return result['sockname'] + return result[b'sockname'] def _connect(self): """ establish transport connection """ @@ -660,10 +880,16 @@ class client(object): self._connect() result = self.recvConn.receive() if self._hasprop(result, 'error'): - raise CommandError(result['error']) + error = result['error'] + if compat.PYTHON3 and isinstance(self.recvConn, BserCodec): + error = result['error'].decode('utf-8', 'surrogateescape') + raise CommandError(error) if self._hasprop(result, 'log'): - self.logs.append(result['log']) + log = result['log'] + if compat.PYTHON3 and isinstance(self.recvConn, BserCodec): + log = log.decode('utf-8', 'surrogateescape') + self.logs.append(log) if self._hasprop(result, 'subscription'): sub = result['subscription'] @@ -682,6 +908,9 @@ class client(object): return result def isUnilateralResponse(self, res): + if 'unilateral' in res and res['unilateral']: + return True + # Fall back to checking for known unilateral responses for k in self.unilateral: if k in res: return True @@ -712,6 +941,13 @@ class client(object): remove processing impacts both the unscoped and scoped stores for the subscription data. """ + if compat.PYTHON3 and issubclass(self.recvCodec, BserCodec): + # People may pass in Unicode strings here -- but currently BSER only + # returns bytestrings. Deal with that. + if isinstance(root, str): + root = encoding.encode_local(root) + if isinstance(name, str): + name = name.encode('utf-8') if root is not None: if not root in self.sub_by_root: @@ -752,9 +988,17 @@ class client(object): res = self.receive() return res - except CommandError as ex: + except EnvironmentError as ee: + # When we can depend on Python 3, we can use PEP 3134 + # exception chaining here. + raise WatchmanEnvironmentError( + 'I/O error communicating with watchman daemon', + ee.errno, + ee.strerror, + args) + except WatchmanError as ex: ex.setCommand(args) - raise ex + raise def capabilityCheck(self, optional=None, required=None): """ Perform a server capability check """ @@ -775,5 +1019,3 @@ class client(object): def setTimeout(self, value): self.recvConn.setTimeout(value) self.sendConn.setTimeout(value) - -# no-check-code -- this is a 3rd party library diff --git a/hgext/fsmonitor/pywatchman/bser.c b/hgext/fsmonitor/pywatchman/bser.c --- a/hgext/fsmonitor/pywatchman/bser.c +++ b/hgext/fsmonitor/pywatchman/bser.c @@ -29,11 +29,27 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ #include +#include #ifdef _MSC_VER #define inline __inline -#include "msc_stdint.h" +#if _MSC_VER >= 1800 +#include +#else +// The compiler associated with Python 2.7 on Windows doesn't ship +// with stdint.h, so define the small subset that we use here. +typedef __int8 int8_t; +typedef __int16 int16_t; +typedef __int32 int32_t; +typedef __int64 int64_t; +typedef unsigned __int8 uint8_t; +typedef unsigned __int16 uint16_t; +typedef unsigned __int32 uint32_t; +typedef unsigned __int64 uint64_t; +#define UINT32_MAX 4294967295U +#endif #endif +// clang-format off /* Return the smallest size int that can store the value */ #define INT_SIZE(x) (((x) == ((int8_t)x)) ? 1 : \ ((x) == ((int16_t)x)) ? 2 : \ @@ -41,7 +57,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE #define BSER_ARRAY 0x00 #define BSER_OBJECT 0x01 -#define BSER_STRING 0x02 +#define BSER_BYTESTRING 0x02 #define BSER_INT8 0x03 #define BSER_INT16 0x04 #define BSER_INT32 0x05 @@ -52,6 +68,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE #define BSER_NULL 0x0a #define BSER_TEMPLATE 0x0b #define BSER_SKIP 0x0c +#define BSER_UTF8STRING 0x0d +// clang-format on // An immutable object representation of BSER_OBJECT. // Rather than build a hash table, key -> value are obtained @@ -64,24 +82,27 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE // approach, this is still faster for the mercurial use case // as it helps to eliminate creating N other objects to // represent the stat information in the hgwatchman extension +// clang-format off typedef struct { PyObject_HEAD PyObject *keys; // tuple of field names PyObject *values; // tuple of values } bserObject; +// clang-format on -static Py_ssize_t bserobj_tuple_length(PyObject *o) { - bserObject *obj = (bserObject*)o; +static Py_ssize_t bserobj_tuple_length(PyObject* o) { + bserObject* obj = (bserObject*)o; return PySequence_Length(obj->keys); } -static PyObject *bserobj_tuple_item(PyObject *o, Py_ssize_t i) { - bserObject *obj = (bserObject*)o; +static PyObject* bserobj_tuple_item(PyObject* o, Py_ssize_t i) { + bserObject* obj = (bserObject*)o; return PySequence_GetItem(obj->values, i); } +// clang-format off static PySequenceMethods bserobj_sq = { bserobj_tuple_length, /* sq_length */ 0, /* sq_concat */ @@ -92,49 +113,72 @@ static PySequenceMethods bserobj_sq = { 0, /* sq_inplace_concat */ 0 /* sq_inplace_repeat */ }; +// clang-format on -static void bserobj_dealloc(PyObject *o) { - bserObject *obj = (bserObject*)o; +static void bserobj_dealloc(PyObject* o) { + bserObject* obj = (bserObject*)o; Py_CLEAR(obj->keys); Py_CLEAR(obj->values); PyObject_Del(o); } -static PyObject *bserobj_getattrro(PyObject *o, PyObject *name) { - bserObject *obj = (bserObject*)o; +static PyObject* bserobj_getattrro(PyObject* o, PyObject* name) { + bserObject* obj = (bserObject*)o; Py_ssize_t i, n; - const char *namestr; + PyObject* name_bytes = NULL; + PyObject* ret = NULL; + const char* namestr; if (PyIndex_Check(name)) { i = PyNumber_AsSsize_t(name, PyExc_IndexError); if (i == -1 && PyErr_Occurred()) { - return NULL; + goto bail; } - return PySequence_GetItem(obj->values, i); + ret = PySequence_GetItem(obj->values, i); + goto bail; } + // We can be passed in Unicode objects here -- we don't support anything other + // than UTF-8 for keys. + if (PyUnicode_Check(name)) { + name_bytes = PyUnicode_AsUTF8String(name); + if (name_bytes == NULL) { + goto bail; + } + namestr = PyBytes_AsString(name_bytes); + } else { + namestr = PyBytes_AsString(name); + } + + if (namestr == NULL) { + goto bail; + } // hack^Wfeature to allow mercurial to use "st_size" to reference "size" - namestr = PyString_AsString(name); if (!strncmp(namestr, "st_", 3)) { namestr += 3; } n = PyTuple_GET_SIZE(obj->keys); for (i = 0; i < n; i++) { - const char *item_name = NULL; - PyObject *key = PyTuple_GET_ITEM(obj->keys, i); + const char* item_name = NULL; + PyObject* key = PyTuple_GET_ITEM(obj->keys, i); - item_name = PyString_AsString(key); + item_name = PyBytes_AsString(key); if (!strcmp(item_name, namestr)) { - return PySequence_GetItem(obj->values, i); + ret = PySequence_GetItem(obj->values, i); + goto bail; } } - PyErr_Format(PyExc_AttributeError, - "bserobject has no attribute '%.400s'", namestr); - return NULL; + + PyErr_Format( + PyExc_AttributeError, "bserobject has no attribute '%.400s'", namestr); +bail: + Py_XDECREF(name_bytes); + return ret; } +// clang-format off static PyMappingMethods bserobj_map = { bserobj_tuple_length, /* mp_length */ bserobj_getattrro, /* mp_subscript */ @@ -181,20 +225,27 @@ PyTypeObject bserObjectType = { 0, /* tp_alloc */ 0, /* tp_new */ }; - +// clang-format on -static PyObject *bser_loads_recursive(const char **ptr, const char *end, - int mutable); +typedef struct loads_ctx { + int mutable; + const char* value_encoding; + const char* value_errors; + uint32_t bser_version; + uint32_t bser_capabilities; +} unser_ctx_t; + +static PyObject* +bser_loads_recursive(const char** ptr, const char* end, const unser_ctx_t* ctx); static const char bser_true = BSER_TRUE; static const char bser_false = BSER_FALSE; static const char bser_null = BSER_NULL; -static const char bser_string_hdr = BSER_STRING; +static const char bser_bytestring_hdr = BSER_BYTESTRING; static const char bser_array_hdr = BSER_ARRAY; static const char bser_object_hdr = BSER_OBJECT; -static inline uint32_t next_power_2(uint32_t n) -{ +static inline uint32_t next_power_2(uint32_t n) { n |= (n >> 16); n |= (n >> 8); n |= (n >> 4); @@ -205,16 +256,17 @@ static inline uint32_t next_power_2(uint // A buffer we use for building up the serialized result struct bser_buffer { - char *buf; + char* buf; int wpos, allocd; + uint32_t bser_version; + uint32_t capabilities; }; typedef struct bser_buffer bser_t; -static int bser_append(bser_t *bser, const char *data, uint32_t len) -{ +static int bser_append(bser_t* bser, const char* data, uint32_t len) { int newlen = next_power_2(bser->wpos + len); if (newlen > bser->allocd) { - char *nbuf = realloc(bser->buf, newlen); + char* nbuf = realloc(bser->buf, newlen); if (!nbuf) { return 0; } @@ -228,40 +280,46 @@ static int bser_append(bser_t *bser, con return 1; } -static int bser_init(bser_t *bser) -{ +static int bser_init(bser_t* bser, uint32_t version, uint32_t capabilities) { bser->allocd = 8192; bser->wpos = 0; bser->buf = malloc(bser->allocd); - + bser->bser_version = version; + bser->capabilities = capabilities; if (!bser->buf) { return 0; } - // Leave room for the serialization header, which includes - // our overall length. To make things simpler, we'll use an - // int32 for the header +// Leave room for the serialization header, which includes +// our overall length. To make things simpler, we'll use an +// int32 for the header #define EMPTY_HEADER "\x00\x01\x05\x00\x00\x00\x00" - bser_append(bser, EMPTY_HEADER, sizeof(EMPTY_HEADER)-1); + +// Version 2 also carries an integer indicating the capabilities. The +// capabilities integer comes before the PDU size. +#define EMPTY_HEADER_V2 "\x00\x02\x00\x00\x00\x00\x05\x00\x00\x00\x00" + if (version == 2) { + bser_append(bser, EMPTY_HEADER_V2, sizeof(EMPTY_HEADER_V2) - 1); + } else { + bser_append(bser, EMPTY_HEADER, sizeof(EMPTY_HEADER) - 1); + } return 1; } -static void bser_dtor(bser_t *bser) -{ +static void bser_dtor(bser_t* bser) { free(bser->buf); bser->buf = NULL; } -static int bser_long(bser_t *bser, int64_t val) -{ +static int bser_long(bser_t* bser, int64_t val) { int8_t i8; int16_t i16; int32_t i32; int64_t i64; char sz; int size = INT_SIZE(val); - char *iptr; + char* iptr; switch (size) { case 1: @@ -285,8 +343,7 @@ static int bser_long(bser_t *bser, int64 iptr = (char*)&i64; break; default: - PyErr_SetString(PyExc_RuntimeError, - "Cannot represent this long value!?"); + PyErr_SetString(PyExc_RuntimeError, "Cannot represent this long value!?"); return 0; } @@ -297,25 +354,24 @@ static int bser_long(bser_t *bser, int64 return bser_append(bser, iptr, size); } -static int bser_string(bser_t *bser, PyObject *sval) -{ - char *buf = NULL; +static int bser_bytestring(bser_t* bser, PyObject* sval) { + char* buf = NULL; Py_ssize_t len; int res; - PyObject *utf = NULL; + PyObject* utf = NULL; if (PyUnicode_Check(sval)) { utf = PyUnicode_AsEncodedString(sval, "utf-8", "ignore"); sval = utf; } - res = PyString_AsStringAndSize(sval, &buf, &len); + res = PyBytes_AsStringAndSize(sval, &buf, &len); if (res == -1) { res = 0; goto out; } - if (!bser_append(bser, &bser_string_hdr, sizeof(bser_string_hdr))) { + if (!bser_append(bser, &bser_bytestring_hdr, sizeof(bser_bytestring_hdr))) { res = 0; goto out; } @@ -341,8 +397,7 @@ out: return res; } -static int bser_recursive(bser_t *bser, PyObject *val) -{ +static int bser_recursive(bser_t* bser, PyObject* val) { if (PyBool_Check(val)) { if (val == Py_True) { return bser_append(bser, &bser_true, sizeof(bser_true)); @@ -354,19 +409,21 @@ static int bser_recursive(bser_t *bser, return bser_append(bser, &bser_null, sizeof(bser_null)); } +// Python 3 has one integer type. +#if PY_MAJOR_VERSION < 3 if (PyInt_Check(val)) { return bser_long(bser, PyInt_AS_LONG(val)); } +#endif // PY_MAJOR_VERSION < 3 if (PyLong_Check(val)) { return bser_long(bser, PyLong_AsLongLong(val)); } - if (PyString_Check(val) || PyUnicode_Check(val)) { - return bser_string(bser, val); + if (PyBytes_Check(val) || PyUnicode_Check(val)) { + return bser_bytestring(bser, val); } - if (PyFloat_Check(val)) { double dval = PyFloat_AS_DOUBLE(val); char sz = BSER_REAL; @@ -390,7 +447,7 @@ static int bser_recursive(bser_t *bser, } for (i = 0; i < len; i++) { - PyObject *ele = PyList_GET_ITEM(val, i); + PyObject* ele = PyList_GET_ITEM(val, i); if (!bser_recursive(bser, ele)) { return 0; @@ -412,7 +469,7 @@ static int bser_recursive(bser_t *bser, } for (i = 0; i < len; i++) { - PyObject *ele = PyTuple_GET_ITEM(val, i); + PyObject* ele = PyTuple_GET_ITEM(val, i); if (!bser_recursive(bser, ele)) { return 0; @@ -436,7 +493,7 @@ static int bser_recursive(bser_t *bser, } while (PyDict_Next(val, &pos, &key, &ele)) { - if (!bser_string(bser, key)) { + if (!bser_bytestring(bser, key)) { return 0; } if (!bser_recursive(bser, ele)) { @@ -451,17 +508,25 @@ static int bser_recursive(bser_t *bser, return 0; } -static PyObject *bser_dumps(PyObject *self, PyObject *args) -{ +static PyObject* bser_dumps(PyObject* self, PyObject* args, PyObject* kw) { PyObject *val = NULL, *res; bser_t bser; - uint32_t len; + uint32_t len, bser_version = 1, bser_capabilities = 0; + + static char* kw_list[] = {"val", "version", "capabilities", NULL}; - if (!PyArg_ParseTuple(args, "O", &val)) { + if (!PyArg_ParseTupleAndKeywords( + args, + kw, + "O|ii:dumps", + kw_list, + &val, + &bser_version, + &bser_capabilities)) { return NULL; } - if (!bser_init(&bser)) { + if (!bser_init(&bser, bser_version, bser_capabilities)) { return PyErr_NoMemory(); } @@ -475,19 +540,25 @@ static PyObject *bser_dumps(PyObject *se } // Now fill in the overall length - len = bser.wpos - (sizeof(EMPTY_HEADER) - 1); - memcpy(bser.buf + 3, &len, sizeof(len)); + if (bser_version == 1) { + len = bser.wpos - (sizeof(EMPTY_HEADER) - 1); + memcpy(bser.buf + 3, &len, sizeof(len)); + } else { + len = bser.wpos - (sizeof(EMPTY_HEADER_V2) - 1); + // The BSER capabilities block comes before the PDU length + memcpy(bser.buf + 2, &bser_capabilities, sizeof(bser_capabilities)); + memcpy(bser.buf + 7, &len, sizeof(len)); + } - res = PyString_FromStringAndSize(bser.buf, bser.wpos); + res = PyBytes_FromStringAndSize(bser.buf, bser.wpos); bser_dtor(&bser); return res; } -int bunser_int(const char **ptr, const char *end, int64_t *val) -{ +int bunser_int(const char** ptr, const char* end, int64_t* val) { int needed; - const char *buf = *ptr; + const char* buf = *ptr; int8_t i8; int16_t i16; int32_t i32; @@ -507,8 +578,8 @@ int bunser_int(const char **ptr, const c needed = 9; break; default: - PyErr_Format(PyExc_ValueError, - "invalid bser int encoding 0x%02x", buf[0]); + PyErr_Format( + PyExc_ValueError, "invalid bser int encoding 0x%02x", buf[0]); return 0; } if (end - buf < needed) { @@ -538,10 +609,12 @@ int bunser_int(const char **ptr, const c } } -static int bunser_string(const char **ptr, const char *end, - const char **start, int64_t *len) -{ - const char *buf = *ptr; +static int bunser_bytestring( + const char** ptr, + const char* end, + const char** start, + int64_t* len) { + const char* buf = *ptr; // skip string marker buf++; @@ -559,11 +632,12 @@ static int bunser_string(const char **pt return 1; } -static PyObject *bunser_array(const char **ptr, const char *end, int mutable) -{ - const char *buf = *ptr; +static PyObject* +bunser_array(const char** ptr, const char* end, const unser_ctx_t* ctx) { + const char* buf = *ptr; int64_t nitems, i; - PyObject *res; + int mutable = ctx->mutable; + PyObject* res; // skip array header buf++; @@ -584,7 +658,7 @@ static PyObject *bunser_array(const char } for (i = 0; i < nitems; i++) { - PyObject *ele = bser_loads_recursive(ptr, end, mutable); + PyObject* ele = bser_loads_recursive(ptr, end, ctx); if (!ele) { Py_DECREF(res); @@ -602,13 +676,13 @@ static PyObject *bunser_array(const char return res; } -static PyObject *bunser_object(const char **ptr, const char *end, - int mutable) -{ - const char *buf = *ptr; +static PyObject* +bunser_object(const char** ptr, const char* end, const unser_ctx_t* ctx) { + const char* buf = *ptr; int64_t nitems, i; - PyObject *res; - bserObject *obj; + int mutable = ctx->mutable; + PyObject* res; + bserObject* obj; // skip array header buf++; @@ -627,12 +701,12 @@ static PyObject *bunser_object(const cha } for (i = 0; i < nitems; i++) { - const char *keystr; + const char* keystr; int64_t keylen; - PyObject *key; - PyObject *ele; + PyObject* key; + PyObject* ele; - if (!bunser_string(ptr, end, &keystr, &keylen)) { + if (!bunser_bytestring(ptr, end, &keystr, &keylen)) { Py_DECREF(res); return NULL; } @@ -643,13 +717,24 @@ static PyObject *bunser_object(const cha return NULL; } - key = PyString_FromStringAndSize(keystr, (Py_ssize_t)keylen); + if (mutable) { + // This will interpret the key as UTF-8. + key = PyUnicode_FromStringAndSize(keystr, (Py_ssize_t)keylen); + } else { + // For immutable objects we'll manage key lookups, so we can avoid going + // through the Unicode APIs. This avoids a potentially expensive and + // definitely unnecessary conversion to UTF-16 and back for Python 2. + // TODO: On Python 3 the Unicode APIs are smarter: we might be able to use + // Unicode keys there without an appreciable performance loss. + key = PyBytes_FromStringAndSize(keystr, (Py_ssize_t)keylen); + } + if (!key) { Py_DECREF(res); return NULL; } - ele = bser_loads_recursive(ptr, end, mutable); + ele = bser_loads_recursive(ptr, end, ctx); if (!ele) { Py_DECREF(key); @@ -671,14 +756,24 @@ static PyObject *bunser_object(const cha return res; } -static PyObject *bunser_template(const char **ptr, const char *end, - int mutable) -{ - const char *buf = *ptr; +static PyObject* +bunser_template(const char** ptr, const char* end, const unser_ctx_t* ctx) { + const char* buf = *ptr; int64_t nitems, i; - PyObject *arrval; - PyObject *keys; + int mutable = ctx->mutable; + PyObject* arrval; + PyObject* keys; Py_ssize_t numkeys, keyidx; + unser_ctx_t keys_ctx = {0}; + if (mutable) { + keys_ctx.mutable = 1; + // Decode keys as UTF-8 in this case. + keys_ctx.value_encoding = "utf-8"; + keys_ctx.value_errors = "strict"; + } else { + // Treat keys as bytestrings in this case -- we'll do Unicode conversions at + // lookup time. + } if (buf[1] != BSER_ARRAY) { PyErr_Format(PyExc_ValueError, "Expect ARRAY to follow TEMPLATE"); @@ -689,8 +784,9 @@ static PyObject *bunser_template(const c buf++; *ptr = buf; - // Load template keys - keys = bunser_array(ptr, end, mutable); + // Load template keys. + // For keys we don't want to do any decoding right now. + keys = bunser_array(ptr, end, &keys_ctx); if (!keys) { return NULL; } @@ -716,8 +812,8 @@ static PyObject *bunser_template(const c } for (i = 0; i < nitems; i++) { - PyObject *dict = NULL; - bserObject *obj = NULL; + PyObject* dict = NULL; + bserObject* obj = NULL; if (mutable) { dict = PyDict_New(); @@ -731,22 +827,22 @@ static PyObject *bunser_template(const c dict = (PyObject*)obj; } if (!dict) { -fail: + fail: Py_DECREF(keys); Py_DECREF(arrval); return NULL; } for (keyidx = 0; keyidx < numkeys; keyidx++) { - PyObject *key; - PyObject *ele; + PyObject* key; + PyObject* ele; if (**ptr == BSER_SKIP) { *ptr = *ptr + 1; ele = Py_None; Py_INCREF(ele); } else { - ele = bser_loads_recursive(ptr, end, mutable); + ele = bser_loads_recursive(ptr, end, ctx); } if (!ele) { @@ -772,34 +868,38 @@ fail: return arrval; } -static PyObject *bser_loads_recursive(const char **ptr, const char *end, - int mutable) -{ - const char *buf = *ptr; +static PyObject* bser_loads_recursive( + const char** ptr, + const char* end, + const unser_ctx_t* ctx) { + const char* buf = *ptr; switch (buf[0]) { case BSER_INT8: case BSER_INT16: case BSER_INT32: - case BSER_INT64: - { - int64_t ival; - if (!bunser_int(ptr, end, &ival)) { - return NULL; - } - if (ival < LONG_MIN || ival > LONG_MAX) { - return PyLong_FromLongLong(ival); - } - return PyInt_FromSsize_t(Py_SAFE_DOWNCAST(ival, int64_t, Py_ssize_t)); + case BSER_INT64: { + int64_t ival; + if (!bunser_int(ptr, end, &ival)) { + return NULL; } +// Python 3 has one integer type. +#if PY_MAJOR_VERSION >= 3 + return PyLong_FromLongLong(ival); +#else + if (ival < LONG_MIN || ival > LONG_MAX) { + return PyLong_FromLongLong(ival); + } + return PyInt_FromSsize_t(Py_SAFE_DOWNCAST(ival, int64_t, Py_ssize_t)); +#endif // PY_MAJOR_VERSION >= 3 + } - case BSER_REAL: - { - double dval; - memcpy(&dval, buf + 1, sizeof(dval)); - *ptr = buf + 1 + sizeof(double); - return PyFloat_FromDouble(dval); - } + case BSER_REAL: { + double dval; + memcpy(&dval, buf + 1, sizeof(dval)); + *ptr = buf + 1 + sizeof(double); + return PyFloat_FromDouble(dval); + } case BSER_TRUE: *ptr = buf + 1; @@ -816,31 +916,51 @@ static PyObject *bser_loads_recursive(co Py_INCREF(Py_None); return Py_None; - case BSER_STRING: - { - const char *start; - int64_t len; + case BSER_BYTESTRING: { + const char* start; + int64_t len; - if (!bunser_string(ptr, end, &start, &len)) { - return NULL; - } + if (!bunser_bytestring(ptr, end, &start, &len)) { + return NULL; + } - if (len > LONG_MAX) { - PyErr_Format(PyExc_ValueError, "string too long for python"); - return NULL; - } - - return PyString_FromStringAndSize(start, (long)len); + if (len > LONG_MAX) { + PyErr_Format(PyExc_ValueError, "string too long for python"); + return NULL; } + if (ctx->value_encoding != NULL) { + return PyUnicode_Decode( + start, (long)len, ctx->value_encoding, ctx->value_errors); + } else { + return PyBytes_FromStringAndSize(start, (long)len); + } + } + + case BSER_UTF8STRING: { + const char* start; + int64_t len; + + if (!bunser_bytestring(ptr, end, &start, &len)) { + return NULL; + } + + if (len > LONG_MAX) { + PyErr_Format(PyExc_ValueError, "string too long for python"); + return NULL; + } + + return PyUnicode_Decode(start, (long)len, "utf-8", "strict"); + } + case BSER_ARRAY: - return bunser_array(ptr, end, mutable); + return bunser_array(ptr, end, ctx); case BSER_OBJECT: - return bunser_object(ptr, end, mutable); + return bunser_object(ptr, end, ctx); case BSER_TEMPLATE: - return bunser_template(ptr, end, mutable); + return bunser_template(ptr, end, ctx); default: PyErr_Format(PyExc_ValueError, "unhandled bser opcode 0x%02x", buf[0]); @@ -849,102 +969,244 @@ static PyObject *bser_loads_recursive(co return NULL; } -// Expected use case is to read a packet from the socket and -// then call bser.pdu_len on the packet. It returns the total -// length of the entire response that the peer is sending, -// including the bytes already received. This allows the client -// to compute the data size it needs to read before it can -// decode the data -static PyObject *bser_pdu_len(PyObject *self, PyObject *args) -{ - const char *start = NULL; - const char *data = NULL; - int datalen = 0; - const char *end; - int64_t expected_len, total_len; +static int _pdu_info_helper( + const char* data, + const char* end, + uint32_t* bser_version_out, + uint32_t* bser_capabilities_out, + int64_t* expected_len_out, + off_t* position_out) { + uint32_t bser_version; + uint32_t bser_capabilities = 0; + int64_t expected_len; - if (!PyArg_ParseTuple(args, "s#", &start, &datalen)) { - return NULL; - } - data = start; - end = data + datalen; - + const char* start; + start = data; // Validate the header and length - if (memcmp(data, EMPTY_HEADER, 2) != 0) { + if (memcmp(data, EMPTY_HEADER, 2) == 0) { + bser_version = 1; + } else if (memcmp(data, EMPTY_HEADER_V2, 2) == 0) { + bser_version = 2; + } else { PyErr_SetString(PyExc_ValueError, "invalid bser header"); - return NULL; + return 0; } data += 2; + if (bser_version == 2) { + // Expect an integer telling us what capabilities are supported by the + // remote server (currently unused). + if (!memcpy(&bser_capabilities, &data, sizeof(bser_capabilities))) { + return 0; + } + data += sizeof(bser_capabilities); + } + // Expect an integer telling us how big the rest of the data // should be if (!bunser_int(&data, end, &expected_len)) { + return 0; + } + + *bser_version_out = bser_version; + *bser_capabilities_out = (uint32_t)bser_capabilities; + *expected_len_out = expected_len; + *position_out = (off_t)(data - start); + return 1; +} + +// This function parses the PDU header and provides info about the packet +// Returns false if unsuccessful +static int pdu_info_helper( + PyObject* self, + PyObject* args, + uint32_t* bser_version_out, + uint32_t* bser_capabilities_out, + int64_t* total_len_out) { + const char* start = NULL; + const char* data = NULL; + int datalen = 0; + const char* end; + int64_t expected_len; + off_t position; + + if (!PyArg_ParseTuple(args, "s#", &start, &datalen)) { + return 0; + } + data = start; + end = data + datalen; + + if (!_pdu_info_helper( + data, + end, + bser_version_out, + bser_capabilities_out, + &expected_len, + &position)) { + return 0; + } + *total_len_out = (int64_t)(expected_len + position); + return 1; +} + +// Expected use case is to read a packet from the socket and then call +// bser.pdu_info on the packet. It returns the BSER version, BSER capabilities, +// and the total length of the entire response that the peer is sending, +// including the bytes already received. This allows the client to compute the +// data size it needs to read before it can decode the data. +static PyObject* bser_pdu_info(PyObject* self, PyObject* args) { + uint32_t version, capabilities; + int64_t total_len; + if (!pdu_info_helper(self, args, &version, &capabilities, &total_len)) { + return NULL; + } + return Py_BuildValue("kkL", version, capabilities, total_len); +} + +static PyObject* bser_pdu_len(PyObject* self, PyObject* args) { + uint32_t version, capabilities; + int64_t total_len; + if (!pdu_info_helper(self, args, &version, &capabilities, &total_len)) { + return NULL; + } + return Py_BuildValue("L", total_len); +} + +static PyObject* bser_loads(PyObject* self, PyObject* args, PyObject* kw) { + const char* data = NULL; + int datalen = 0; + const char* start; + const char* end; + int64_t expected_len; + off_t position; + PyObject* mutable_obj = NULL; + const char* value_encoding = NULL; + const char* value_errors = NULL; + unser_ctx_t ctx = {1, 0}; + + static char* kw_list[] = { + "buf", "mutable", "value_encoding", "value_errors", NULL}; + + if (!PyArg_ParseTupleAndKeywords( + args, + kw, + "s#|Ozz:loads", + kw_list, + &start, + &datalen, + &mutable_obj, + &value_encoding, + &value_errors)) { return NULL; } - total_len = expected_len + (data - start); - if (total_len > LONG_MAX) { - return PyLong_FromLongLong(total_len); + if (mutable_obj) { + ctx.mutable = PyObject_IsTrue(mutable_obj) > 0 ? 1 : 0; } - return PyInt_FromLong((long)total_len); -} - -static PyObject *bser_loads(PyObject *self, PyObject *args) -{ - const char *data = NULL; - int datalen = 0; - const char *end; - int64_t expected_len; - int mutable = 1; - PyObject *mutable_obj = NULL; - - if (!PyArg_ParseTuple(args, "s#|O:loads", &data, &datalen, &mutable_obj)) { - return NULL; + ctx.value_encoding = value_encoding; + if (value_encoding == NULL) { + ctx.value_errors = NULL; + } else if (value_errors == NULL) { + ctx.value_errors = "strict"; + } else { + ctx.value_errors = value_errors; } - if (mutable_obj) { - mutable = PyObject_IsTrue(mutable_obj) > 0 ? 1 : 0; - } - + data = start; end = data + datalen; - // Validate the header and length - if (memcmp(data, EMPTY_HEADER, 2) != 0) { - PyErr_SetString(PyExc_ValueError, "invalid bser header"); + if (!_pdu_info_helper( + data, + end, + &ctx.bser_version, + &ctx.bser_capabilities, + &expected_len, + &position)) { return NULL; } - data += 2; - - // Expect an integer telling us how big the rest of the data - // should be - if (!bunser_int(&data, end, &expected_len)) { - return NULL; - } - + data = start + position; // Verify if (expected_len + data != end) { PyErr_SetString(PyExc_ValueError, "bser data len != header len"); return NULL; } - return bser_loads_recursive(&data, end, mutable); + return bser_loads_recursive(&data, end, &ctx); } +static PyObject* bser_load(PyObject* self, PyObject* args, PyObject* kw) { + PyObject *load, *string; + PyObject* fp = NULL; + PyObject* mutable_obj = NULL; + const char* value_encoding = NULL; + const char* value_errors = NULL; + + static char* kw_list[] = { + "fp", "mutable", "value_encoding", "value_errors", NULL}; + + if (!PyArg_ParseTupleAndKeywords( + args, + kw, + "OOzz:load", + kw_list, + &fp, + &mutable_obj, + &value_encoding, + &value_errors)) { + return NULL; + } + + load = PyImport_ImportModule("pywatchman.load"); + if (load == NULL) { + return NULL; + } + string = PyObject_CallMethod( + load, "load", "OOzz", fp, mutable_obj, value_encoding, value_errors); + Py_DECREF(load); + return string; +} + +// clang-format off static PyMethodDef bser_methods[] = { - {"loads", bser_loads, METH_VARARGS, "Deserialize string."}, - {"pdu_len", bser_pdu_len, METH_VARARGS, "Extract PDU length."}, - {"dumps", bser_dumps, METH_VARARGS, "Serialize string."}, + {"loads", (PyCFunction)bser_loads, METH_VARARGS | METH_KEYWORDS, + "Deserialize string."}, + {"load", (PyCFunction)bser_load, METH_VARARGS | METH_KEYWORDS, + "Deserialize a file object"}, + {"pdu_info", (PyCFunction)bser_pdu_info, METH_VARARGS, + "Extract PDU information."}, + {"pdu_len", (PyCFunction)bser_pdu_len, METH_VARARGS, + "Extract total PDU length."}, + {"dumps", (PyCFunction)bser_dumps, METH_VARARGS | METH_KEYWORDS, + "Serialize string."}, {NULL, NULL, 0, NULL} }; -PyMODINIT_FUNC initbser(void) -{ +#if PY_MAJOR_VERSION >= 3 +static struct PyModuleDef bser_module = { + PyModuleDef_HEAD_INIT, + "bser", + "Efficient encoding and decoding of BSER.", + -1, + bser_methods +}; +// clang-format on + +PyMODINIT_FUNC PyInit_bser(void) { + PyObject* mod; + + mod = PyModule_Create(&bser_module); + PyType_Ready(&bserObjectType); + + return mod; +} +#else + +PyMODINIT_FUNC initbser(void) { (void)Py_InitModule("bser", bser_methods); PyType_Ready(&bserObjectType); } +#endif // PY_MAJOR_VERSION >= 3 /* vim:ts=2:sw=2:et: */ - -// no-check-code -- this is a 3rd party library diff --git a/hgext/fsmonitor/pywatchman/capabilities.py b/hgext/fsmonitor/pywatchman/capabilities.py --- a/hgext/fsmonitor/pywatchman/capabilities.py +++ b/hgext/fsmonitor/pywatchman/capabilities.py @@ -26,6 +26,11 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +# no unicode literals + import re def parse_version(vstr): @@ -65,5 +70,3 @@ def synthesize(vers, opts): vers['error'] = 'client required capability `' + name + \ '` is not supported by this server' return vers - -# no-check-code -- this is a 3rd party library diff --git a/hgext/fsmonitor/pywatchman/compat.py b/hgext/fsmonitor/pywatchman/compat.py new file mode 100644 --- /dev/null +++ b/hgext/fsmonitor/pywatchman/compat.py @@ -0,0 +1,65 @@ +# Copyright 2016-present Facebook, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name Facebook nor the names of its contributors may be used to +# endorse or promote products derived from this software without specific +# prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +# no unicode literals + +'''Compatibility module across Python 2 and 3.''' + +import sys + +PYTHON3 = sys.version_info >= (3, 0) + +# This is adapted from https://bitbucket.org/gutworth/six, and used under the +# MIT license. See LICENSE for a full copyright notice. +if PYTHON3: + def reraise(tp, value, tb=None): + try: + if value is None: + value = tp() + if value.__traceback__ is not tb: + raise value.with_traceback(tb) + raise value + finally: + value = None + tb = None +else: + exec(''' +def reraise(tp, value, tb=None): + try: + raise tp, value, tb + finally: + tb = None +'''.strip()) + +if PYTHON3: + UNICODE = str +else: + UNICODE = unicode diff --git a/hgext/fsmonitor/pywatchman/encoding.py b/hgext/fsmonitor/pywatchman/encoding.py new file mode 100644 --- /dev/null +++ b/hgext/fsmonitor/pywatchman/encoding.py @@ -0,0 +1,73 @@ +# Copyright 2016-present Facebook, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name Facebook nor the names of its contributors may be used to +# endorse or promote products derived from this software without specific +# prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +# no unicode literals + +'''Module to deal with filename encoding on the local system, as returned by +Watchman.''' + +import sys + +from . import ( + compat, +) + +if compat.PYTHON3: + default_local_errors = 'surrogateescape' + + def get_local_encoding(): + if sys.platform == 'win32': + # Watchman always returns UTF-8 encoded strings on Windows. + return 'utf-8' + # On the Python 3 versions we support, sys.getfilesystemencoding never + # returns None. + return sys.getfilesystemencoding() +else: + # Python 2 doesn't support surrogateescape, so use 'strict' by + # default. Users can register a custom surrogateescape error handler and use + # that if they so desire. + default_local_errors = 'strict' + + def get_local_encoding(): + if sys.platform == 'win32': + # Watchman always returns UTF-8 encoded strings on Windows. + return 'utf-8' + fsencoding = sys.getfilesystemencoding() + if fsencoding is None: + # This is very unlikely to happen, but if it does, just use UTF-8 + fsencoding = 'utf-8' + return fsencoding + +def encode_local(s): + return s.encode(get_local_encoding(), default_local_errors) + +def decode_local(bs): + return bs.decode(get_local_encoding(), default_local_errors) diff --git a/hgext/fsmonitor/pywatchman/load.py b/hgext/fsmonitor/pywatchman/load.py new file mode 100644 --- /dev/null +++ b/hgext/fsmonitor/pywatchman/load.py @@ -0,0 +1,107 @@ +# Copyright 2016 Facebook, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name Facebook nor the names of its contributors may be used to +# endorse or promote products derived from this software without specific +# prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +# no unicode literals + +try: + from . import bser +except ImportError: + from . import pybser as bser + +import ctypes + +EMPTY_HEADER = b"\x00\x01\x05\x00\x00\x00\x00" + + +def _read_bytes(fp, buf): + """Read bytes from a file-like object + + @param fp: File-like object that implements read(int) + @type fp: file + + @param buf: Buffer to read into + @type buf: bytes + + @return: buf + """ + + # Do the first read without resizing the input buffer + offset = 0 + remaining = len(buf) + while remaining > 0: + l = fp.readinto((ctypes.c_char * remaining).from_buffer(buf, offset)) + if l is None or l == 0: + return offset + offset += l + remaining -= l + return offset + + +def load(fp, mutable=True, value_encoding=None, value_errors=None): + """Deserialize a BSER-encoded blob. + + @param fp: The file-object to deserialize. + @type file: + + @param mutable: Whether to return mutable results. + @type mutable: bool + + @param value_encoding: Optional codec to use to decode values. If + unspecified or None, return values as bytestrings. + @type value_encoding: str + + @param value_errors: Optional error handler for codec. 'strict' by default. + The other most common argument is 'surrogateescape' on + Python 3. If value_encoding is None, this is ignored. + @type value_errors: str + """ + buf = ctypes.create_string_buffer(8192) + SNIFF_BUFFER_SIZE = len(EMPTY_HEADER) + header = (ctypes.c_char * SNIFF_BUFFER_SIZE).from_buffer(buf) + read_len = _read_bytes(fp, header) + if read_len < len(header): + return None + + total_len = bser.pdu_len(buf) + if total_len > len(buf): + ctypes.resize(buf, total_len) + + body = (ctypes.c_char * (total_len - len(header))).from_buffer( + buf, len(header)) + read_len = _read_bytes(fp, body) + if read_len < len(body): + raise RuntimeError('bser data ended early') + + return bser.loads( + (ctypes.c_char * total_len).from_buffer(buf, 0), + mutable, + value_encoding, + value_errors) diff --git a/hgext/fsmonitor/pywatchman/msc_stdint.h b/hgext/fsmonitor/pywatchman/msc_stdint.h deleted file mode 100644 --- a/hgext/fsmonitor/pywatchman/msc_stdint.h +++ /dev/null @@ -1,260 +0,0 @@ -// no-check-code -// ISO C9x compliant stdint.h for Microsoft Visual Studio -// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 -// -// Copyright (c) 2006-2013 Alexander Chemeris -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// 1. Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the product nor the names of its contributors may -// be used to endorse or promote products derived from this software -// without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED -// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO -// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; -// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, -// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR -// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF -// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -/////////////////////////////////////////////////////////////////////////////// - -#ifndef _MSC_VER // [ -#error "Use this header only with Microsoft Visual C++ compilers!" -#endif // _MSC_VER ] - -#ifndef _MSC_STDINT_H_ // [ -#define _MSC_STDINT_H_ - -#if _MSC_VER > 1000 -#pragma once -#endif - -#if _MSC_VER >= 1600 // [ -#include -#else // ] _MSC_VER >= 1600 [ - -#include - -// For Visual Studio 6 in C++ mode and for many Visual Studio versions when -// compiling for ARM we should wrap include with 'extern "C++" {}' -// or compiler give many errors like this: -// error C2733: second C linkage of overloaded function 'wmemchr' not allowed -#ifdef __cplusplus -extern "C" { -#endif -# include -#ifdef __cplusplus -} -#endif - -// Define _W64 macros to mark types changing their size, like intptr_t. -#ifndef _W64 -# if !defined(__midl) && (defined(_X86_) || defined(_M_IX86)) && _MSC_VER >= 1300 -# define _W64 __w64 -# else -# define _W64 -# endif -#endif - - -// 7.18.1 Integer types - -// 7.18.1.1 Exact-width integer types - -// Visual Studio 6 and Embedded Visual C++ 4 doesn't -// realize that, e.g. char has the same size as __int8 -// so we give up on __intX for them. -#if (_MSC_VER < 1300) - typedef signed char int8_t; - typedef signed short int16_t; - typedef signed int int32_t; - typedef unsigned char uint8_t; - typedef unsigned short uint16_t; - typedef unsigned int uint32_t; -#else - typedef signed __int8 int8_t; - typedef signed __int16 int16_t; - typedef signed __int32 int32_t; - typedef unsigned __int8 uint8_t; - typedef unsigned __int16 uint16_t; - typedef unsigned __int32 uint32_t; -#endif -typedef signed __int64 int64_t; -typedef unsigned __int64 uint64_t; - - -// 7.18.1.2 Minimum-width integer types -typedef int8_t int_least8_t; -typedef int16_t int_least16_t; -typedef int32_t int_least32_t; -typedef int64_t int_least64_t; -typedef uint8_t uint_least8_t; -typedef uint16_t uint_least16_t; -typedef uint32_t uint_least32_t; -typedef uint64_t uint_least64_t; - -// 7.18.1.3 Fastest minimum-width integer types -typedef int8_t int_fast8_t; -typedef int16_t int_fast16_t; -typedef int32_t int_fast32_t; -typedef int64_t int_fast64_t; -typedef uint8_t uint_fast8_t; -typedef uint16_t uint_fast16_t; -typedef uint32_t uint_fast32_t; -typedef uint64_t uint_fast64_t; - -// 7.18.1.4 Integer types capable of holding object pointers -#ifdef _WIN64 // [ - typedef signed __int64 intptr_t; - typedef unsigned __int64 uintptr_t; -#else // _WIN64 ][ - typedef _W64 signed int intptr_t; - typedef _W64 unsigned int uintptr_t; -#endif // _WIN64 ] - -// 7.18.1.5 Greatest-width integer types -typedef int64_t intmax_t; -typedef uint64_t uintmax_t; - - -// 7.18.2 Limits of specified-width integer types - -#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [ See footnote 220 at page 257 and footnote 221 at page 259 - -// 7.18.2.1 Limits of exact-width integer types -#define INT8_MIN ((int8_t)_I8_MIN) -#define INT8_MAX _I8_MAX -#define INT16_MIN ((int16_t)_I16_MIN) -#define INT16_MAX _I16_MAX -#define INT32_MIN ((int32_t)_I32_MIN) -#define INT32_MAX _I32_MAX -#define INT64_MIN ((int64_t)_I64_MIN) -#define INT64_MAX _I64_MAX -#define UINT8_MAX _UI8_MAX -#define UINT16_MAX _UI16_MAX -#define UINT32_MAX _UI32_MAX -#define UINT64_MAX _UI64_MAX - -// 7.18.2.2 Limits of minimum-width integer types -#define INT_LEAST8_MIN INT8_MIN -#define INT_LEAST8_MAX INT8_MAX -#define INT_LEAST16_MIN INT16_MIN -#define INT_LEAST16_MAX INT16_MAX -#define INT_LEAST32_MIN INT32_MIN -#define INT_LEAST32_MAX INT32_MAX -#define INT_LEAST64_MIN INT64_MIN -#define INT_LEAST64_MAX INT64_MAX -#define UINT_LEAST8_MAX UINT8_MAX -#define UINT_LEAST16_MAX UINT16_MAX -#define UINT_LEAST32_MAX UINT32_MAX -#define UINT_LEAST64_MAX UINT64_MAX - -// 7.18.2.3 Limits of fastest minimum-width integer types -#define INT_FAST8_MIN INT8_MIN -#define INT_FAST8_MAX INT8_MAX -#define INT_FAST16_MIN INT16_MIN -#define INT_FAST16_MAX INT16_MAX -#define INT_FAST32_MIN INT32_MIN -#define INT_FAST32_MAX INT32_MAX -#define INT_FAST64_MIN INT64_MIN -#define INT_FAST64_MAX INT64_MAX -#define UINT_FAST8_MAX UINT8_MAX -#define UINT_FAST16_MAX UINT16_MAX -#define UINT_FAST32_MAX UINT32_MAX -#define UINT_FAST64_MAX UINT64_MAX - -// 7.18.2.4 Limits of integer types capable of holding object pointers -#ifdef _WIN64 // [ -# define INTPTR_MIN INT64_MIN -# define INTPTR_MAX INT64_MAX -# define UINTPTR_MAX UINT64_MAX -#else // _WIN64 ][ -# define INTPTR_MIN INT32_MIN -# define INTPTR_MAX INT32_MAX -# define UINTPTR_MAX UINT32_MAX -#endif // _WIN64 ] - -// 7.18.2.5 Limits of greatest-width integer types -#define INTMAX_MIN INT64_MIN -#define INTMAX_MAX INT64_MAX -#define UINTMAX_MAX UINT64_MAX - -// 7.18.3 Limits of other integer types - -#ifdef _WIN64 // [ -# define PTRDIFF_MIN _I64_MIN -# define PTRDIFF_MAX _I64_MAX -#else // _WIN64 ][ -# define PTRDIFF_MIN _I32_MIN -# define PTRDIFF_MAX _I32_MAX -#endif // _WIN64 ] - -#define SIG_ATOMIC_MIN INT_MIN -#define SIG_ATOMIC_MAX INT_MAX - -#ifndef SIZE_MAX // [ -# ifdef _WIN64 // [ -# define SIZE_MAX _UI64_MAX -# else // _WIN64 ][ -# define SIZE_MAX _UI32_MAX -# endif // _WIN64 ] -#endif // SIZE_MAX ] - -// WCHAR_MIN and WCHAR_MAX are also defined in -#ifndef WCHAR_MIN // [ -# define WCHAR_MIN 0 -#endif // WCHAR_MIN ] -#ifndef WCHAR_MAX // [ -# define WCHAR_MAX _UI16_MAX -#endif // WCHAR_MAX ] - -#define WINT_MIN 0 -#define WINT_MAX _UI16_MAX - -#endif // __STDC_LIMIT_MACROS ] - - -// 7.18.4 Limits of other integer types - -#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [ See footnote 224 at page 260 - -// 7.18.4.1 Macros for minimum-width integer constants - -#define INT8_C(val) val##i8 -#define INT16_C(val) val##i16 -#define INT32_C(val) val##i32 -#define INT64_C(val) val##i64 - -#define UINT8_C(val) val##ui8 -#define UINT16_C(val) val##ui16 -#define UINT32_C(val) val##ui32 -#define UINT64_C(val) val##ui64 - -// 7.18.4.2 Macros for greatest-width integer constants -// These #ifndef's are needed to prevent collisions with . -// Check out Issue 9 for the details. -#ifndef INTMAX_C // [ -# define INTMAX_C INT64_C -#endif // INTMAX_C ] -#ifndef UINTMAX_C // [ -# define UINTMAX_C UINT64_C -#endif // UINTMAX_C ] - -#endif // __STDC_CONSTANT_MACROS ] - -#endif // _MSC_VER >= 1600 ] - -#endif // _MSC_STDINT_H_ ] diff --git a/hgext/fsmonitor/pywatchman/pybser.py b/hgext/fsmonitor/pywatchman/pybser.py --- a/hgext/fsmonitor/pywatchman/pybser.py +++ b/hgext/fsmonitor/pywatchman/pybser.py @@ -26,33 +26,51 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +# no unicode literals + +import binascii import collections import ctypes import struct import sys -BSER_ARRAY = '\x00' -BSER_OBJECT = '\x01' -BSER_STRING = '\x02' -BSER_INT8 = '\x03' -BSER_INT16 = '\x04' -BSER_INT32 = '\x05' -BSER_INT64 = '\x06' -BSER_REAL = '\x07' -BSER_TRUE = '\x08' -BSER_FALSE = '\x09' -BSER_NULL = '\x0a' -BSER_TEMPLATE = '\x0b' -BSER_SKIP = '\x0c' +from . import ( + compat, +) + +BSER_ARRAY = b'\x00' +BSER_OBJECT = b'\x01' +BSER_BYTESTRING = b'\x02' +BSER_INT8 = b'\x03' +BSER_INT16 = b'\x04' +BSER_INT32 = b'\x05' +BSER_INT64 = b'\x06' +BSER_REAL = b'\x07' +BSER_TRUE = b'\x08' +BSER_FALSE = b'\x09' +BSER_NULL = b'\x0a' +BSER_TEMPLATE = b'\x0b' +BSER_SKIP = b'\x0c' +BSER_UTF8STRING = b'\x0d' + +if compat.PYTHON3: + STRING_TYPES = (str, bytes) + unicode = str + def tobytes(i): + return str(i).encode('ascii') + long = int +else: + STRING_TYPES = (unicode, str) + tobytes = bytes # Leave room for the serialization header, which includes # our overall length. To make things simpler, we'll use an # int32 for the header -EMPTY_HEADER = "\x00\x01\x05\x00\x00\x00\x00" - -# Python 3 conditional for supporting Python 2's int/long types -if sys.version_info > (3,): - long = int +EMPTY_HEADER = b"\x00\x01\x05\x00\x00\x00\x00" +EMPTY_HEADER_V2 = b"\x00\x02\x00\x00\x00\x00\x05\x00\x00\x00\x00" def _int_size(x): """Return the smallest size int that can store the value""" @@ -67,13 +85,28 @@ def _int_size(x): else: raise RuntimeError('Cannot represent value: ' + str(x)) +def _buf_pos(buf, pos): + ret = buf[pos] + # In Python 2, buf is a str array so buf[pos] is a string. In Python 3, buf + # is a bytes array and buf[pos] is an integer. + if compat.PYTHON3: + ret = bytes((ret,)) + return ret class _bser_buffer(object): - def __init__(self): + def __init__(self, version): + self.bser_version = version self.buf = ctypes.create_string_buffer(8192) - struct.pack_into(str(len(EMPTY_HEADER)) + 's', self.buf, 0, EMPTY_HEADER) - self.wpos = len(EMPTY_HEADER) + if self.bser_version == 1: + struct.pack_into(tobytes(len(EMPTY_HEADER)) + b's', self.buf, 0, + EMPTY_HEADER) + self.wpos = len(EMPTY_HEADER) + else: + assert self.bser_version == 2 + struct.pack_into(tobytes(len(EMPTY_HEADER_V2)) + b's', self.buf, 0, + EMPTY_HEADER_V2) + self.wpos = len(EMPTY_HEADER_V2) def ensure_size(self, size): while ctypes.sizeof(self.buf) - self.wpos < size: @@ -84,13 +117,13 @@ class _bser_buffer(object): to_write = size + 1 self.ensure_size(to_write) if size == 1: - struct.pack_into('=cb', self.buf, self.wpos, BSER_INT8, val) + struct.pack_into(b'=cb', self.buf, self.wpos, BSER_INT8, val) elif size == 2: - struct.pack_into('=ch', self.buf, self.wpos, BSER_INT16, val) + struct.pack_into(b'=ch', self.buf, self.wpos, BSER_INT16, val) elif size == 4: - struct.pack_into('=ci', self.buf, self.wpos, BSER_INT32, val) + struct.pack_into(b'=ci', self.buf, self.wpos, BSER_INT32, val) elif size == 8: - struct.pack_into('=cq', self.buf, self.wpos, BSER_INT64, val) + struct.pack_into(b'=cq', self.buf, self.wpos, BSER_INT64, val) else: raise RuntimeError('Cannot represent this long value') self.wpos += to_write @@ -104,13 +137,17 @@ class _bser_buffer(object): to_write = 2 + size + s_len self.ensure_size(to_write) if size == 1: - struct.pack_into('=ccb' + str(s_len) + 's', self.buf, self.wpos, BSER_STRING, BSER_INT8, s_len, s) + struct.pack_into(b'=ccb' + tobytes(s_len) + b's', self.buf, + self.wpos, BSER_BYTESTRING, BSER_INT8, s_len, s) elif size == 2: - struct.pack_into('=cch' + str(s_len) + 's', self.buf, self.wpos, BSER_STRING, BSER_INT16, s_len, s) + struct.pack_into(b'=cch' + tobytes(s_len) + b's', self.buf, + self.wpos, BSER_BYTESTRING, BSER_INT16, s_len, s) elif size == 4: - struct.pack_into('=cci' + str(s_len) + 's', self.buf, self.wpos, BSER_STRING, BSER_INT32, s_len, s) + struct.pack_into(b'=cci' + tobytes(s_len) + b's', self.buf, + self.wpos, BSER_BYTESTRING, BSER_INT32, s_len, s) elif size == 8: - struct.pack_into('=ccq' + str(s_len) + 's', self.buf, self.wpos, BSER_STRING, BSER_INT64, s_len, s) + struct.pack_into(b'=ccq' + tobytes(s_len) + b's', self.buf, + self.wpos, BSER_BYTESTRING, BSER_INT64, s_len, s) else: raise RuntimeError('Cannot represent this string value') self.wpos += to_write @@ -124,54 +161,68 @@ class _bser_buffer(object): to_encode = BSER_TRUE else: to_encode = BSER_FALSE - struct.pack_into('=c', self.buf, self.wpos, to_encode) + struct.pack_into(b'=c', self.buf, self.wpos, to_encode) self.wpos += needed elif val is None: needed = 1 self.ensure_size(needed) - struct.pack_into('=c', self.buf, self.wpos, BSER_NULL) + struct.pack_into(b'=c', self.buf, self.wpos, BSER_NULL) self.wpos += needed elif isinstance(val, (int, long)): self.append_long(val) - elif isinstance(val, (str, unicode)): + elif isinstance(val, STRING_TYPES): self.append_string(val) elif isinstance(val, float): needed = 9 self.ensure_size(needed) - struct.pack_into('=cd', self.buf, self.wpos, BSER_REAL, val) + struct.pack_into(b'=cd', self.buf, self.wpos, BSER_REAL, val) self.wpos += needed - elif isinstance(val, collections.Mapping) and isinstance(val, collections.Sized): + elif isinstance(val, collections.Mapping) and \ + isinstance(val, collections.Sized): val_len = len(val) size = _int_size(val_len) needed = 2 + size self.ensure_size(needed) if size == 1: - struct.pack_into('=ccb', self.buf, self.wpos, BSER_OBJECT, BSER_INT8, val_len) + struct.pack_into(b'=ccb', self.buf, self.wpos, BSER_OBJECT, + BSER_INT8, val_len) elif size == 2: - struct.pack_into('=cch', self.buf, self.wpos, BSER_OBJECT, BSER_INT16, val_len) + struct.pack_into(b'=cch', self.buf, self.wpos, BSER_OBJECT, + BSER_INT16, val_len) elif size == 4: - struct.pack_into('=cci', self.buf, self.wpos, BSER_OBJECT, BSER_INT32, val_len) + struct.pack_into(b'=cci', self.buf, self.wpos, BSER_OBJECT, + BSER_INT32, val_len) elif size == 8: - struct.pack_into('=ccq', self.buf, self.wpos, BSER_OBJECT, BSER_INT64, val_len) + struct.pack_into(b'=ccq', self.buf, self.wpos, BSER_OBJECT, + BSER_INT64, val_len) else: raise RuntimeError('Cannot represent this mapping value') self.wpos += needed - for k, v in val.iteritems(): + if compat.PYTHON3: + iteritems = val.items() + else: + iteritems = val.iteritems() + for k, v in iteritems: self.append_string(k) self.append_recursive(v) - elif isinstance(val, collections.Iterable) and isinstance(val, collections.Sized): + elif isinstance(val, collections.Iterable) and \ + isinstance(val, collections.Sized): val_len = len(val) size = _int_size(val_len) needed = 2 + size self.ensure_size(needed) if size == 1: - struct.pack_into('=ccb', self.buf, self.wpos, BSER_ARRAY, BSER_INT8, val_len) + struct.pack_into(b'=ccb', self.buf, self.wpos, BSER_ARRAY, + BSER_INT8, val_len) elif size == 2: - struct.pack_into('=cch', self.buf, self.wpos, BSER_ARRAY, BSER_INT16, val_len) + struct.pack_into(b'=cch', self.buf, self.wpos, BSER_ARRAY, + BSER_INT16, val_len) elif size == 4: - struct.pack_into('=cci', self.buf, self.wpos, BSER_ARRAY, BSER_INT32, val_len) + struct.pack_into(b'=cci', self.buf, self.wpos, BSER_ARRAY, + BSER_INT32, val_len) elif size == 8: - struct.pack_into('=ccq', self.buf, self.wpos, BSER_ARRAY, BSER_INT64, val_len) + struct.pack_into(b'=ccq', self.buf, self.wpos, BSER_ARRAY, + BSER_INT64, val_len) else: raise RuntimeError('Cannot represent this sequence value') self.wpos += needed @@ -181,56 +232,18 @@ class _bser_buffer(object): raise RuntimeError('Cannot represent unknown value type') -def dumps(obj): - bser_buf = _bser_buffer() +def dumps(obj, version=1, capabilities=0): + bser_buf = _bser_buffer(version=version) bser_buf.append_recursive(obj) # Now fill in the overall length - obj_len = bser_buf.wpos - len(EMPTY_HEADER) - struct.pack_into('=i', bser_buf.buf, 3, obj_len) - return bser_buf.buf.raw[:bser_buf.wpos] - - -def _bunser_int(buf, pos): - try: - int_type = buf[pos] - except IndexError: - raise ValueError('Invalid bser int encoding, pos out of range') - if int_type == BSER_INT8: - needed = 2 - fmt = '=b' - elif int_type == BSER_INT16: - needed = 3 - fmt = '=h' - elif int_type == BSER_INT32: - needed = 5 - fmt = '=i' - elif int_type == BSER_INT64: - needed = 9 - fmt = '=q' + if version == 1: + obj_len = bser_buf.wpos - len(EMPTY_HEADER) + struct.pack_into(b'=i', bser_buf.buf, 3, obj_len) else: - raise ValueError('Invalid bser int encoding 0x%02x' % int(int_type)) - int_val = struct.unpack_from(fmt, buf, pos + 1)[0] - return (int_val, pos + needed) - - -def _bunser_string(buf, pos): - str_len, pos = _bunser_int(buf, pos + 1) - str_val = struct.unpack_from(str(str_len) + 's', buf, pos)[0] - return (str_val, pos + str_len) - - -def _bunser_array(buf, pos, mutable=True): - arr_len, pos = _bunser_int(buf, pos + 1) - arr = [] - for i in range(arr_len): - arr_item, pos = _bser_loads_recursive(buf, pos, mutable) - arr.append(arr_item) - - if not mutable: - arr = tuple(arr) - - return arr, pos - + obj_len = bser_buf.wpos - len(EMPTY_HEADER_V2) + struct.pack_into(b'=i', bser_buf.buf, 2, capabilities) + struct.pack_into(b'=i', bser_buf.buf, 7, obj_len) + return bser_buf.buf.raw[:bser_buf.wpos] # This is a quack-alike with the bserObjectType in bser.c # It provides by getattr accessors and getitem for both index @@ -260,100 +273,212 @@ class _BunserDict(object): def __len__(self): return len(self._keys) -def _bunser_object(buf, pos, mutable=True): - obj_len, pos = _bunser_int(buf, pos + 1) - if mutable: - obj = {} - else: - keys = [] - vals = [] +class Bunser(object): + def __init__(self, mutable=True, value_encoding=None, value_errors=None): + self.mutable = mutable + self.value_encoding = value_encoding + + if value_encoding is None: + self.value_errors = None + elif value_errors is None: + self.value_errors = 'strict' + else: + self.value_errors = value_errors - for i in range(obj_len): - key, pos = _bunser_string(buf, pos) - val, pos = _bser_loads_recursive(buf, pos, mutable) - if mutable: - obj[key] = val + @staticmethod + def unser_int(buf, pos): + try: + int_type = _buf_pos(buf, pos) + except IndexError: + raise ValueError('Invalid bser int encoding, pos out of range') + if int_type == BSER_INT8: + needed = 2 + fmt = b'=b' + elif int_type == BSER_INT16: + needed = 3 + fmt = b'=h' + elif int_type == BSER_INT32: + needed = 5 + fmt = b'=i' + elif int_type == BSER_INT64: + needed = 9 + fmt = b'=q' else: - keys.append(key) - vals.append(val) + raise ValueError('Invalid bser int encoding 0x%s' % + binascii.hexlify(int_type).decode('ascii')) + int_val = struct.unpack_from(fmt, buf, pos + 1)[0] + return (int_val, pos + needed) - if not mutable: - obj = _BunserDict(keys, vals) - - return obj, pos - + def unser_utf8_string(self, buf, pos): + str_len, pos = self.unser_int(buf, pos + 1) + str_val = struct.unpack_from(tobytes(str_len) + b's', buf, pos)[0] + return (str_val.decode('utf-8'), pos + str_len) -def _bunser_template(buf, pos, mutable=True): - if buf[pos + 1] != BSER_ARRAY: - raise RuntimeError('Expect ARRAY to follow TEMPLATE') - keys, pos = _bunser_array(buf, pos + 1) - nitems, pos = _bunser_int(buf, pos) - arr = [] - for i in range(nitems): - if mutable: + def unser_bytestring(self, buf, pos): + str_len, pos = self.unser_int(buf, pos + 1) + str_val = struct.unpack_from(tobytes(str_len) + b's', buf, pos)[0] + if self.value_encoding is not None: + str_val = str_val.decode(self.value_encoding, self.value_errors) + # str_len stays the same because that's the length in bytes + return (str_val, pos + str_len) + + def unser_array(self, buf, pos): + arr_len, pos = self.unser_int(buf, pos + 1) + arr = [] + for i in range(arr_len): + arr_item, pos = self.loads_recursive(buf, pos) + arr.append(arr_item) + + if not self.mutable: + arr = tuple(arr) + + return arr, pos + + def unser_object(self, buf, pos): + obj_len, pos = self.unser_int(buf, pos + 1) + if self.mutable: obj = {} else: + keys = [] vals = [] - for keyidx in range(len(keys)): - if buf[pos] == BSER_SKIP: - pos += 1 - ele = None + for i in range(obj_len): + key, pos = self.unser_utf8_string(buf, pos) + val, pos = self.loads_recursive(buf, pos) + if self.mutable: + obj[key] = val else: - ele, pos = _bser_loads_recursive(buf, pos, mutable) + keys.append(key) + vals.append(val) - if mutable: - key = keys[keyidx] - obj[key] = ele - else: - vals.append(ele) - - if not mutable: + if not self.mutable: obj = _BunserDict(keys, vals) - arr.append(obj) - return arr, pos + return obj, pos + + def unser_template(self, buf, pos): + val_type = _buf_pos(buf, pos + 1) + if val_type != BSER_ARRAY: + raise RuntimeError('Expect ARRAY to follow TEMPLATE') + # force UTF-8 on keys + keys_bunser = Bunser(mutable=self.mutable, value_encoding='utf-8') + keys, pos = keys_bunser.unser_array(buf, pos + 1) + nitems, pos = self.unser_int(buf, pos) + arr = [] + for i in range(nitems): + if self.mutable: + obj = {} + else: + vals = [] + + for keyidx in range(len(keys)): + if _buf_pos(buf, pos) == BSER_SKIP: + pos += 1 + ele = None + else: + ele, pos = self.loads_recursive(buf, pos) + + if self.mutable: + key = keys[keyidx] + obj[key] = ele + else: + vals.append(ele) + + if not self.mutable: + obj = _BunserDict(keys, vals) + + arr.append(obj) + return arr, pos + + def loads_recursive(self, buf, pos): + val_type = _buf_pos(buf, pos) + if (val_type == BSER_INT8 or val_type == BSER_INT16 or + val_type == BSER_INT32 or val_type == BSER_INT64): + return self.unser_int(buf, pos) + elif val_type == BSER_REAL: + val = struct.unpack_from(b'=d', buf, pos + 1)[0] + return (val, pos + 9) + elif val_type == BSER_TRUE: + return (True, pos + 1) + elif val_type == BSER_FALSE: + return (False, pos + 1) + elif val_type == BSER_NULL: + return (None, pos + 1) + elif val_type == BSER_BYTESTRING: + return self.unser_bytestring(buf, pos) + elif val_type == BSER_UTF8STRING: + return self.unser_utf8_string(buf, pos) + elif val_type == BSER_ARRAY: + return self.unser_array(buf, pos) + elif val_type == BSER_OBJECT: + return self.unser_object(buf, pos) + elif val_type == BSER_TEMPLATE: + return self.unser_template(buf, pos) + else: + raise ValueError('unhandled bser opcode 0x%s' % + binascii.hexlify(val_type).decode('ascii')) -def _bser_loads_recursive(buf, pos, mutable=True): - val_type = buf[pos] - if (val_type == BSER_INT8 or val_type == BSER_INT16 or - val_type == BSER_INT32 or val_type == BSER_INT64): - return _bunser_int(buf, pos) - elif val_type == BSER_REAL: - val = struct.unpack_from('=d', buf, pos + 1)[0] - return (val, pos + 9) - elif val_type == BSER_TRUE: - return (True, pos + 1) - elif val_type == BSER_FALSE: - return (False, pos + 1) - elif val_type == BSER_NULL: - return (None, pos + 1) - elif val_type == BSER_STRING: - return _bunser_string(buf, pos) - elif val_type == BSER_ARRAY: - return _bunser_array(buf, pos, mutable) - elif val_type == BSER_OBJECT: - return _bunser_object(buf, pos, mutable) - elif val_type == BSER_TEMPLATE: - return _bunser_template(buf, pos, mutable) +def _pdu_info_helper(buf): + bser_version = -1 + if buf[0:2] == EMPTY_HEADER[0:2]: + bser_version = 1 + bser_capabilities = 0 + expected_len, pos2 = Bunser.unser_int(buf, 2) + elif buf[0:2] == EMPTY_HEADER_V2[0:2]: + if len(buf) < 8: + raise ValueError('Invalid BSER header') + bser_version = 2 + bser_capabilities = struct.unpack_from("I", buf, 2)[0] + expected_len, pos2 = Bunser.unser_int(buf, 6) else: - raise RuntimeError('unhandled bser opcode 0x%02x' % (val_type,)) + raise ValueError('Invalid BSER header') + + return bser_version, bser_capabilities, expected_len, pos2 + + +def pdu_info(buf): + info = _pdu_info_helper(buf) + return info[0], info[1], info[2] + info[3] def pdu_len(buf): - if buf[0:2] != EMPTY_HEADER[0:2]: - raise RuntimeError('Invalid BSER header') - expected_len, pos = _bunser_int(buf, 2) - return expected_len + pos + info = _pdu_info_helper(buf) + return info[2] + info[3] -def loads(buf, mutable=True): - if buf[0:2] != EMPTY_HEADER[0:2]: - raise RuntimeError('Invalid BSER header') - expected_len, pos = _bunser_int(buf, 2) +def loads(buf, mutable=True, value_encoding=None, value_errors=None): + """Deserialize a BSER-encoded blob. + + @param buf: The buffer to deserialize. + @type buf: bytes + + @param mutable: Whether to return mutable results. + @type mutable: bool + + @param value_encoding: Optional codec to use to decode values. If + unspecified or None, return values as bytestrings. + @type value_encoding: str + + @param value_errors: Optional error handler for codec. 'strict' by default. + The other most common argument is 'surrogateescape' on + Python 3. If value_encoding is None, this is ignored. + @type value_errors: str + """ + + info = _pdu_info_helper(buf) + expected_len = info[2] + pos = info[3] + if len(buf) != expected_len + pos: - raise RuntimeError('bser data len != header len') - return _bser_loads_recursive(buf, pos, mutable)[0] + raise ValueError('bser data len != header len') + + bunser = Bunser(mutable=mutable, value_encoding=value_encoding, + value_errors=value_errors) -# no-check-code -- this is a 3rd party library + return bunser.loads_recursive(buf, pos)[0] + + +def load(fp, mutable=True, value_encoding=None, value_errors=None): + from . import load + return load.load(fp, mutable, value_encoding, value_errors) diff --git a/tests/test-check-py3-compat.t b/tests/test-check-py3-compat.t --- a/tests/test-check-py3-compat.t +++ b/tests/test-check-py3-compat.t @@ -15,10 +15,6 @@ contrib/python-zstandard/tests/test_module_attributes.py not using absolute_import contrib/python-zstandard/tests/test_roundtrip.py not using absolute_import contrib/python-zstandard/tests/test_train_dictionary.py not using absolute_import - hgext/fsmonitor/pywatchman/__init__.py not using absolute_import - hgext/fsmonitor/pywatchman/__init__.py requires print_function - hgext/fsmonitor/pywatchman/capabilities.py not using absolute_import - hgext/fsmonitor/pywatchman/pybser.py not using absolute_import i18n/check-translation.py not using absolute_import setup.py not using absolute_import tests/test-demandimport.py not using absolute_import