# Copyright 2015 Facebook, Inc. # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # * Redistributions of source code must retain the above copyright notice, # this list of conditions and the following disclaimer. # # * Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # * Neither the name Facebook nor the names of its contributors may be used to # endorse or promote products derived from this software without specific # prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import collections import ctypes import struct import sys BSER_ARRAY = '\x00' BSER_OBJECT = '\x01' BSER_STRING = '\x02' BSER_INT8 = '\x03' BSER_INT16 = '\x04' BSER_INT32 = '\x05' BSER_INT64 = '\x06' BSER_REAL = '\x07' BSER_TRUE = '\x08' BSER_FALSE = '\x09' BSER_NULL = '\x0a' BSER_TEMPLATE = '\x0b' BSER_SKIP = '\x0c' # Leave room for the serialization header, which includes # our overall length. To make things simpler, we'll use an # int32 for the header EMPTY_HEADER = "\x00\x01\x05\x00\x00\x00\x00" # Python 3 conditional for supporting Python 2's int/long types if sys.version_info > (3,): long = int def _int_size(x): """Return the smallest size int that can store the value""" if -0x80 <= x <= 0x7F: return 1 elif -0x8000 <= x <= 0x7FFF: return 2 elif -0x80000000 <= x <= 0x7FFFFFFF: return 4 elif long(-0x8000000000000000) <= x <= long(0x7FFFFFFFFFFFFFFF): return 8 else: raise RuntimeError('Cannot represent value: ' + str(x)) class _bser_buffer(object): def __init__(self): self.buf = ctypes.create_string_buffer(8192) struct.pack_into(str(len(EMPTY_HEADER)) + 's', self.buf, 0, EMPTY_HEADER) self.wpos = len(EMPTY_HEADER) def ensure_size(self, size): while ctypes.sizeof(self.buf) - self.wpos < size: ctypes.resize(self.buf, ctypes.sizeof(self.buf) * 2) def append_long(self, val): size = _int_size(val) to_write = size + 1 self.ensure_size(to_write) if size == 1: struct.pack_into('=cb', self.buf, self.wpos, BSER_INT8, val) elif size == 2: struct.pack_into('=ch', self.buf, self.wpos, BSER_INT16, val) elif size == 4: struct.pack_into('=ci', self.buf, self.wpos, BSER_INT32, val) elif size == 8: struct.pack_into('=cq', self.buf, self.wpos, BSER_INT64, val) else: raise RuntimeError('Cannot represent this long value') self.wpos += to_write def append_string(self, s): if isinstance(s, unicode): s = s.encode('utf-8') s_len = len(s) size = _int_size(s_len) to_write = 2 + size + s_len self.ensure_size(to_write) if size == 1: struct.pack_into('=ccb' + str(s_len) + 's', self.buf, self.wpos, BSER_STRING, BSER_INT8, s_len, s) elif size == 2: struct.pack_into('=cch' + str(s_len) + 's', self.buf, self.wpos, BSER_STRING, BSER_INT16, s_len, s) elif size == 4: struct.pack_into('=cci' + str(s_len) + 's', self.buf, self.wpos, BSER_STRING, BSER_INT32, s_len, s) elif size == 8: struct.pack_into('=ccq' + str(s_len) + 's', self.buf, self.wpos, BSER_STRING, BSER_INT64, s_len, s) else: raise RuntimeError('Cannot represent this string value') self.wpos += to_write def append_recursive(self, val): if isinstance(val, bool): needed = 1 self.ensure_size(needed) if val: to_encode = BSER_TRUE else: to_encode = BSER_FALSE struct.pack_into('=c', self.buf, self.wpos, to_encode) self.wpos += needed elif val is None: needed = 1 self.ensure_size(needed) struct.pack_into('=c', self.buf, self.wpos, BSER_NULL) self.wpos += needed elif isinstance(val, (int, long)): self.append_long(val) elif isinstance(val, (str, unicode)): self.append_string(val) elif isinstance(val, float): needed = 9 self.ensure_size(needed) struct.pack_into('=cd', self.buf, self.wpos, BSER_REAL, val) self.wpos += needed elif isinstance(val, collections.Mapping) and isinstance(val, collections.Sized): val_len = len(val) size = _int_size(val_len) needed = 2 + size self.ensure_size(needed) if size == 1: struct.pack_into('=ccb', self.buf, self.wpos, BSER_OBJECT, BSER_INT8, val_len) elif size == 2: struct.pack_into('=cch', self.buf, self.wpos, BSER_OBJECT, BSER_INT16, val_len) elif size == 4: struct.pack_into('=cci', self.buf, self.wpos, BSER_OBJECT, BSER_INT32, val_len) elif size == 8: struct.pack_into('=ccq', self.buf, self.wpos, BSER_OBJECT, BSER_INT64, val_len) else: raise RuntimeError('Cannot represent this mapping value') self.wpos += needed for k, v in val.iteritems(): self.append_string(k) self.append_recursive(v) elif isinstance(val, collections.Iterable) and isinstance(val, collections.Sized): val_len = len(val) size = _int_size(val_len) needed = 2 + size self.ensure_size(needed) if size == 1: struct.pack_into('=ccb', self.buf, self.wpos, BSER_ARRAY, BSER_INT8, val_len) elif size == 2: struct.pack_into('=cch', self.buf, self.wpos, BSER_ARRAY, BSER_INT16, val_len) elif size == 4: struct.pack_into('=cci', self.buf, self.wpos, BSER_ARRAY, BSER_INT32, val_len) elif size == 8: struct.pack_into('=ccq', self.buf, self.wpos, BSER_ARRAY, BSER_INT64, val_len) else: raise RuntimeError('Cannot represent this sequence value') self.wpos += needed for v in val: self.append_recursive(v) else: raise RuntimeError('Cannot represent unknown value type') def dumps(obj): bser_buf = _bser_buffer() bser_buf.append_recursive(obj) # Now fill in the overall length obj_len = bser_buf.wpos - len(EMPTY_HEADER) struct.pack_into('=i', bser_buf.buf, 3, obj_len) return bser_buf.buf.raw[:bser_buf.wpos] def _bunser_int(buf, pos): try: int_type = buf[pos] except IndexError: raise ValueError('Invalid bser int encoding, pos out of range') if int_type == BSER_INT8: needed = 2 fmt = '=b' elif int_type == BSER_INT16: needed = 3 fmt = '=h' elif int_type == BSER_INT32: needed = 5 fmt = '=i' elif int_type == BSER_INT64: needed = 9 fmt = '=q' else: raise ValueError('Invalid bser int encoding 0x%02x' % int(int_type)) int_val = struct.unpack_from(fmt, buf, pos + 1)[0] return (int_val, pos + needed) def _bunser_string(buf, pos): str_len, pos = _bunser_int(buf, pos + 1) str_val = struct.unpack_from(str(str_len) + 's', buf, pos)[0] return (str_val, pos + str_len) def _bunser_array(buf, pos, mutable=True): arr_len, pos = _bunser_int(buf, pos + 1) arr = [] for i in range(arr_len): arr_item, pos = _bser_loads_recursive(buf, pos, mutable) arr.append(arr_item) if not mutable: arr = tuple(arr) return arr, pos # This is a quack-alike with the bserObjectType in bser.c # It provides by getattr accessors and getitem for both index # and name. class _BunserDict(object): __slots__ = ('_keys', '_values') def __init__(self, keys, values): self._keys = keys self._values = values def __getattr__(self, name): return self.__getitem__(name) def __getitem__(self, key): if isinstance(key, (int, long)): return self._values[key] elif key.startswith('st_'): # hack^Wfeature to allow mercurial to use "st_size" to # reference "size" key = key[3:] try: return self._values[self._keys.index(key)] except ValueError as ex: raise KeyError('_BunserDict has no key %s' % key) def __len__(self): return len(self._keys) def _bunser_object(buf, pos, mutable=True): obj_len, pos = _bunser_int(buf, pos + 1) if mutable: obj = {} else: keys = [] vals = [] for i in range(obj_len): key, pos = _bunser_string(buf, pos) val, pos = _bser_loads_recursive(buf, pos, mutable) if mutable: obj[key] = val else: keys.append(key) vals.append(val) if not mutable: obj = _BunserDict(keys, vals) return obj, pos def _bunser_template(buf, pos, mutable=True): if buf[pos + 1] != BSER_ARRAY: raise RuntimeError('Expect ARRAY to follow TEMPLATE') keys, pos = _bunser_array(buf, pos + 1) nitems, pos = _bunser_int(buf, pos) arr = [] for i in range(nitems): if mutable: obj = {} else: vals = [] for keyidx in range(len(keys)): if buf[pos] == BSER_SKIP: pos += 1 ele = None else: ele, pos = _bser_loads_recursive(buf, pos, mutable) if mutable: key = keys[keyidx] obj[key] = ele else: vals.append(ele) if not mutable: obj = _BunserDict(keys, vals) arr.append(obj) return arr, pos def _bser_loads_recursive(buf, pos, mutable=True): val_type = buf[pos] if (val_type == BSER_INT8 or val_type == BSER_INT16 or val_type == BSER_INT32 or val_type == BSER_INT64): return _bunser_int(buf, pos) elif val_type == BSER_REAL: val = struct.unpack_from('=d', buf, pos + 1)[0] return (val, pos + 9) elif val_type == BSER_TRUE: return (True, pos + 1) elif val_type == BSER_FALSE: return (False, pos + 1) elif val_type == BSER_NULL: return (None, pos + 1) elif val_type == BSER_STRING: return _bunser_string(buf, pos) elif val_type == BSER_ARRAY: return _bunser_array(buf, pos, mutable) elif val_type == BSER_OBJECT: return _bunser_object(buf, pos, mutable) elif val_type == BSER_TEMPLATE: return _bunser_template(buf, pos, mutable) else: raise RuntimeError('unhandled bser opcode 0x%02x' % (val_type,)) def pdu_len(buf): if buf[0:2] != EMPTY_HEADER[0:2]: raise RuntimeError('Invalid BSER header') expected_len, pos = _bunser_int(buf, 2) return expected_len + pos def loads(buf, mutable=True): if buf[0:2] != EMPTY_HEADER[0:2]: raise RuntimeError('Invalid BSER header') expected_len, pos = _bunser_int(buf, 2) if len(buf) != expected_len + pos: raise RuntimeError('bser data len != header len') return _bser_loads_recursive(buf, pos, mutable)[0] # no-check-code -- this is a 3rd party library