##// END OF EJS Templates
fsmonitor: refresh pywatchman to upstream...
Zack Hricz -
r30656:16f4b341 default
parent child Browse files
Show More
@@ -0,0 +1,65 b''
1 # Copyright 2016-present Facebook, Inc.
2 # All rights reserved.
3 #
4 # Redistribution and use in source and binary forms, with or without
5 # modification, are permitted provided that the following conditions are met:
6 #
7 # * Redistributions of source code must retain the above copyright notice,
8 # this list of conditions and the following disclaimer.
9 #
10 # * Redistributions in binary form must reproduce the above copyright notice,
11 # this list of conditions and the following disclaimer in the documentation
12 # and/or other materials provided with the distribution.
13 #
14 # * Neither the name Facebook nor the names of its contributors may be used to
15 # endorse or promote products derived from this software without specific
16 # prior written permission.
17 #
18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29 from __future__ import absolute_import
30 from __future__ import division
31 from __future__ import print_function
32 # no unicode literals
33
34 '''Compatibility module across Python 2 and 3.'''
35
36 import sys
37
38 PYTHON3 = sys.version_info >= (3, 0)
39
40 # This is adapted from https://bitbucket.org/gutworth/six, and used under the
41 # MIT license. See LICENSE for a full copyright notice.
42 if PYTHON3:
43 def reraise(tp, value, tb=None):
44 try:
45 if value is None:
46 value = tp()
47 if value.__traceback__ is not tb:
48 raise value.with_traceback(tb)
49 raise value
50 finally:
51 value = None
52 tb = None
53 else:
54 exec('''
55 def reraise(tp, value, tb=None):
56 try:
57 raise tp, value, tb
58 finally:
59 tb = None
60 '''.strip())
61
62 if PYTHON3:
63 UNICODE = str
64 else:
65 UNICODE = unicode
@@ -0,0 +1,73 b''
1 # Copyright 2016-present Facebook, Inc.
2 # All rights reserved.
3 #
4 # Redistribution and use in source and binary forms, with or without
5 # modification, are permitted provided that the following conditions are met:
6 #
7 # * Redistributions of source code must retain the above copyright notice,
8 # this list of conditions and the following disclaimer.
9 #
10 # * Redistributions in binary form must reproduce the above copyright notice,
11 # this list of conditions and the following disclaimer in the documentation
12 # and/or other materials provided with the distribution.
13 #
14 # * Neither the name Facebook nor the names of its contributors may be used to
15 # endorse or promote products derived from this software without specific
16 # prior written permission.
17 #
18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29 from __future__ import absolute_import
30 from __future__ import division
31 from __future__ import print_function
32 # no unicode literals
33
34 '''Module to deal with filename encoding on the local system, as returned by
35 Watchman.'''
36
37 import sys
38
39 from . import (
40 compat,
41 )
42
43 if compat.PYTHON3:
44 default_local_errors = 'surrogateescape'
45
46 def get_local_encoding():
47 if sys.platform == 'win32':
48 # Watchman always returns UTF-8 encoded strings on Windows.
49 return 'utf-8'
50 # On the Python 3 versions we support, sys.getfilesystemencoding never
51 # returns None.
52 return sys.getfilesystemencoding()
53 else:
54 # Python 2 doesn't support surrogateescape, so use 'strict' by
55 # default. Users can register a custom surrogateescape error handler and use
56 # that if they so desire.
57 default_local_errors = 'strict'
58
59 def get_local_encoding():
60 if sys.platform == 'win32':
61 # Watchman always returns UTF-8 encoded strings on Windows.
62 return 'utf-8'
63 fsencoding = sys.getfilesystemencoding()
64 if fsencoding is None:
65 # This is very unlikely to happen, but if it does, just use UTF-8
66 fsencoding = 'utf-8'
67 return fsencoding
68
69 def encode_local(s):
70 return s.encode(get_local_encoding(), default_local_errors)
71
72 def decode_local(bs):
73 return bs.decode(get_local_encoding(), default_local_errors)
@@ -0,0 +1,107 b''
1 # Copyright 2016 Facebook, Inc.
2 # All rights reserved.
3 #
4 # Redistribution and use in source and binary forms, with or without
5 # modification, are permitted provided that the following conditions are met:
6 #
7 # * Redistributions of source code must retain the above copyright notice,
8 # this list of conditions and the following disclaimer.
9 #
10 # * Redistributions in binary form must reproduce the above copyright notice,
11 # this list of conditions and the following disclaimer in the documentation
12 # and/or other materials provided with the distribution.
13 #
14 # * Neither the name Facebook nor the names of its contributors may be used to
15 # endorse or promote products derived from this software without specific
16 # prior written permission.
17 #
18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29 from __future__ import absolute_import
30 from __future__ import division
31 from __future__ import print_function
32 # no unicode literals
33
34 try:
35 from . import bser
36 except ImportError:
37 from . import pybser as bser
38
39 import ctypes
40
41 EMPTY_HEADER = b"\x00\x01\x05\x00\x00\x00\x00"
42
43
44 def _read_bytes(fp, buf):
45 """Read bytes from a file-like object
46
47 @param fp: File-like object that implements read(int)
48 @type fp: file
49
50 @param buf: Buffer to read into
51 @type buf: bytes
52
53 @return: buf
54 """
55
56 # Do the first read without resizing the input buffer
57 offset = 0
58 remaining = len(buf)
59 while remaining > 0:
60 l = fp.readinto((ctypes.c_char * remaining).from_buffer(buf, offset))
61 if l is None or l == 0:
62 return offset
63 offset += l
64 remaining -= l
65 return offset
66
67
68 def load(fp, mutable=True, value_encoding=None, value_errors=None):
69 """Deserialize a BSER-encoded blob.
70
71 @param fp: The file-object to deserialize.
72 @type file:
73
74 @param mutable: Whether to return mutable results.
75 @type mutable: bool
76
77 @param value_encoding: Optional codec to use to decode values. If
78 unspecified or None, return values as bytestrings.
79 @type value_encoding: str
80
81 @param value_errors: Optional error handler for codec. 'strict' by default.
82 The other most common argument is 'surrogateescape' on
83 Python 3. If value_encoding is None, this is ignored.
84 @type value_errors: str
85 """
86 buf = ctypes.create_string_buffer(8192)
87 SNIFF_BUFFER_SIZE = len(EMPTY_HEADER)
88 header = (ctypes.c_char * SNIFF_BUFFER_SIZE).from_buffer(buf)
89 read_len = _read_bytes(fp, header)
90 if read_len < len(header):
91 return None
92
93 total_len = bser.pdu_len(buf)
94 if total_len > len(buf):
95 ctypes.resize(buf, total_len)
96
97 body = (ctypes.c_char * (total_len - len(header))).from_buffer(
98 buf, len(header))
99 read_len = _read_bytes(fp, body)
100 if read_len < len(body):
101 raise RuntimeError('bser data ended early')
102
103 return bser.loads(
104 (ctypes.c_char * total_len).from_buffer(buf, 0),
105 mutable,
106 value_encoding,
107 value_errors)
@@ -26,9 +26,14 b''
26 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
28
29 from __future__ import absolute_import
30 from __future__ import division
31 from __future__ import print_function
32 # no unicode literals
33
34 import inspect
35 import math
29 import os
36 import os
30 import errno
31 import math
32 import socket
37 import socket
33 import subprocess
38 import subprocess
34 import time
39 import time
@@ -36,11 +41,20 b' import time'
36 # Sometimes it's really hard to get Python extensions to compile,
41 # Sometimes it's really hard to get Python extensions to compile,
37 # so fall back to a pure Python implementation.
42 # so fall back to a pure Python implementation.
38 try:
43 try:
39 import bser
44 from . import bser
45 # Demandimport causes modules to be loaded lazily. Force the load now
46 # so that we can fall back on pybser if bser doesn't exist
47 bser.pdu_info
40 except ImportError:
48 except ImportError:
41 import pybser as bser
49 from . import pybser as bser
42
50
43 import capabilities
51 from . import (
52 capabilities,
53 compat,
54 encoding,
55 load,
56 )
57
44
58
45 if os.name == 'nt':
59 if os.name == 'nt':
46 import ctypes
60 import ctypes
@@ -55,18 +69,29 b" if os.name == 'nt':"
55 FORMAT_MESSAGE_FROM_SYSTEM = 0x00001000
69 FORMAT_MESSAGE_FROM_SYSTEM = 0x00001000
56 FORMAT_MESSAGE_ALLOCATE_BUFFER = 0x00000100
70 FORMAT_MESSAGE_ALLOCATE_BUFFER = 0x00000100
57 FORMAT_MESSAGE_IGNORE_INSERTS = 0x00000200
71 FORMAT_MESSAGE_IGNORE_INSERTS = 0x00000200
72 WAIT_FAILED = 0xFFFFFFFF
58 WAIT_TIMEOUT = 0x00000102
73 WAIT_TIMEOUT = 0x00000102
59 WAIT_OBJECT_0 = 0x00000000
74 WAIT_OBJECT_0 = 0x00000000
60 ERROR_IO_PENDING = 997
75 WAIT_IO_COMPLETION = 0x000000C0
76 INFINITE = 0xFFFFFFFF
77
78 # Overlapped I/O operation is in progress. (997)
79 ERROR_IO_PENDING = 0x000003E5
80
81 # The pointer size follows the architecture
82 # We use WPARAM since this type is already conditionally defined
83 ULONG_PTR = ctypes.wintypes.WPARAM
61
84
62 class OVERLAPPED(ctypes.Structure):
85 class OVERLAPPED(ctypes.Structure):
63 _fields_ = [
86 _fields_ = [
64 ("Internal", wintypes.ULONG), ("InternalHigh", wintypes.ULONG),
87 ("Internal", ULONG_PTR), ("InternalHigh", ULONG_PTR),
65 ("Offset", wintypes.DWORD), ("OffsetHigh", wintypes.DWORD),
88 ("Offset", wintypes.DWORD), ("OffsetHigh", wintypes.DWORD),
66 ("hEvent", wintypes.HANDLE)
89 ("hEvent", wintypes.HANDLE)
67 ]
90 ]
68
91
69 def __init__(self):
92 def __init__(self):
93 self.Internal = 0
94 self.InternalHigh = 0
70 self.Offset = 0
95 self.Offset = 0
71 self.OffsetHigh = 0
96 self.OffsetHigh = 0
72 self.hEvent = 0
97 self.hEvent = 0
@@ -97,6 +122,10 b" if os.name == 'nt':"
97 GetLastError.argtypes = []
122 GetLastError.argtypes = []
98 GetLastError.restype = wintypes.DWORD
123 GetLastError.restype = wintypes.DWORD
99
124
125 SetLastError = ctypes.windll.kernel32.SetLastError
126 SetLastError.argtypes = [wintypes.DWORD]
127 SetLastError.restype = None
128
100 FormatMessage = ctypes.windll.kernel32.FormatMessageA
129 FormatMessage = ctypes.windll.kernel32.FormatMessageA
101 FormatMessage.argtypes = [wintypes.DWORD, wintypes.LPVOID, wintypes.DWORD,
130 FormatMessage.argtypes = [wintypes.DWORD, wintypes.LPVOID, wintypes.DWORD,
102 wintypes.DWORD, ctypes.POINTER(wintypes.LPSTR),
131 wintypes.DWORD, ctypes.POINTER(wintypes.LPSTR),
@@ -105,12 +134,30 b" if os.name == 'nt':"
105
134
106 LocalFree = ctypes.windll.kernel32.LocalFree
135 LocalFree = ctypes.windll.kernel32.LocalFree
107
136
108 GetOverlappedResultEx = ctypes.windll.kernel32.GetOverlappedResultEx
137 GetOverlappedResult = ctypes.windll.kernel32.GetOverlappedResult
109 GetOverlappedResultEx.argtypes = [wintypes.HANDLE,
138 GetOverlappedResult.argtypes = [wintypes.HANDLE,
110 ctypes.POINTER(OVERLAPPED), LPDWORD,
139 ctypes.POINTER(OVERLAPPED), LPDWORD,
111 wintypes.DWORD, wintypes.BOOL]
140 wintypes.BOOL]
112 GetOverlappedResultEx.restype = wintypes.BOOL
141 GetOverlappedResult.restype = wintypes.BOOL
113
142
143 GetOverlappedResultEx = getattr(ctypes.windll.kernel32,
144 'GetOverlappedResultEx', None)
145 if GetOverlappedResultEx is not None:
146 GetOverlappedResultEx.argtypes = [wintypes.HANDLE,
147 ctypes.POINTER(OVERLAPPED), LPDWORD,
148 wintypes.DWORD, wintypes.BOOL]
149 GetOverlappedResultEx.restype = wintypes.BOOL
150
151 WaitForSingleObjectEx = ctypes.windll.kernel32.WaitForSingleObjectEx
152 WaitForSingleObjectEx.argtypes = [wintypes.HANDLE, wintypes.DWORD, wintypes.BOOL]
153 WaitForSingleObjectEx.restype = wintypes.DWORD
154
155 CreateEvent = ctypes.windll.kernel32.CreateEventA
156 CreateEvent.argtypes = [LPDWORD, wintypes.BOOL, wintypes.BOOL,
157 wintypes.LPSTR]
158 CreateEvent.restype = wintypes.HANDLE
159
160 # Windows Vista is the minimum supported client for CancelIoEx.
114 CancelIoEx = ctypes.windll.kernel32.CancelIoEx
161 CancelIoEx = ctypes.windll.kernel32.CancelIoEx
115 CancelIoEx.argtypes = [wintypes.HANDLE, ctypes.POINTER(OVERLAPPED)]
162 CancelIoEx.argtypes = [wintypes.HANDLE, ctypes.POINTER(OVERLAPPED)]
116 CancelIoEx.restype = wintypes.BOOL
163 CancelIoEx.restype = wintypes.BOOL
@@ -132,8 +179,47 b' else:'
132 pass
179 pass
133
180
134
181
182 def _win32_strerror(err):
183 """ expand a win32 error code into a human readable message """
184
185 # FormatMessage will allocate memory and assign it here
186 buf = ctypes.c_char_p()
187 FormatMessage(
188 FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_ALLOCATE_BUFFER
189 | FORMAT_MESSAGE_IGNORE_INSERTS, None, err, 0, buf, 0, None)
190 try:
191 return buf.value
192 finally:
193 LocalFree(buf)
194
195
135 class WatchmanError(Exception):
196 class WatchmanError(Exception):
136 pass
197 def __init__(self, msg=None, cmd=None):
198 self.msg = msg
199 self.cmd = cmd
200
201 def setCommand(self, cmd):
202 self.cmd = cmd
203
204 def __str__(self):
205 if self.cmd:
206 return '%s, while executing %s' % (self.msg, self.cmd)
207 return self.msg
208
209
210 class WatchmanEnvironmentError(WatchmanError):
211 def __init__(self, msg, errno, errmsg, cmd=None):
212 super(WatchmanEnvironmentError, self).__init__(
213 '{0}: errno={1} errmsg={2}'.format(msg, errno, errmsg),
214 cmd)
215
216
217 class SocketConnectError(WatchmanError):
218 def __init__(self, sockpath, exc):
219 super(SocketConnectError, self).__init__(
220 'unable to connect to %s: %s' % (sockpath, exc))
221 self.sockpath = sockpath
222 self.exc = exc
137
223
138
224
139 class SocketTimeout(WatchmanError):
225 class SocketTimeout(WatchmanError):
@@ -151,19 +237,11 b' class CommandError(WatchmanError):'
151
237
152 self.msg is the message returned by watchman.
238 self.msg is the message returned by watchman.
153 """
239 """
154
155 def __init__(self, msg, cmd=None):
240 def __init__(self, msg, cmd=None):
156 self.msg = msg
241 super(CommandError, self).__init__(
157 self.cmd = cmd
242 'watchman command error: %s' % (msg, ),
158 super(CommandError, self).__init__('watchman command error: %s' % msg)
243 cmd,
159
244 )
160 def setCommand(self, cmd):
161 self.cmd = cmd
162
163 def __str__(self):
164 if self.cmd:
165 return '%s, while executing %s' % (self.msg, self.cmd)
166 return self.msg
167
245
168
246
169 class Transport(object):
247 class Transport(object):
@@ -195,16 +273,16 b' class Transport(object):'
195
273
196 # Buffer may already have a line if we've received unilateral
274 # Buffer may already have a line if we've received unilateral
197 # response(s) from the server
275 # response(s) from the server
198 if len(self.buf) == 1 and "\n" in self.buf[0]:
276 if len(self.buf) == 1 and b"\n" in self.buf[0]:
199 (line, b) = self.buf[0].split("\n", 1)
277 (line, b) = self.buf[0].split(b"\n", 1)
200 self.buf = [b]
278 self.buf = [b]
201 return line
279 return line
202
280
203 while True:
281 while True:
204 b = self.readBytes(4096)
282 b = self.readBytes(4096)
205 if "\n" in b:
283 if b"\n" in b:
206 result = ''.join(self.buf)
284 result = b''.join(self.buf)
207 (line, b) = b.split("\n", 1)
285 (line, b) = b.split(b"\n", 1)
208 self.buf = [b]
286 self.buf = [b]
209 return result + line
287 return result + line
210 self.buf.append(b)
288 self.buf.append(b)
@@ -241,8 +319,8 b' class UnixSocketTransport(Transport):'
241 sock.connect(self.sockpath)
319 sock.connect(self.sockpath)
242 self.sock = sock
320 self.sock = sock
243 except socket.error as e:
321 except socket.error as e:
244 raise WatchmanError('unable to connect to %s: %s' %
322 sock.close()
245 (self.sockpath, e))
323 raise SocketConnectError(self.sockpath, e)
246
324
247 def close(self):
325 def close(self):
248 self.sock.close()
326 self.sock.close()
@@ -268,6 +346,46 b' class UnixSocketTransport(Transport):'
268 raise SocketTimeout('timed out sending query command')
346 raise SocketTimeout('timed out sending query command')
269
347
270
348
349 def _get_overlapped_result_ex_impl(pipe, olap, nbytes, millis, alertable):
350 """ Windows 7 and earlier does not support GetOverlappedResultEx. The
351 alternative is to use GetOverlappedResult and wait for read or write
352 operation to complete. This is done be using CreateEvent and
353 WaitForSingleObjectEx. CreateEvent, WaitForSingleObjectEx
354 and GetOverlappedResult are all part of Windows API since WindowsXP.
355 This is the exact same implementation that can be found in the watchman
356 source code (see get_overlapped_result_ex_impl in stream_win.c). This
357 way, maintenance should be simplified.
358 """
359 log('Preparing to wait for maximum %dms', millis )
360 if millis != 0:
361 waitReturnCode = WaitForSingleObjectEx(olap.hEvent, millis, alertable)
362 if waitReturnCode == WAIT_OBJECT_0:
363 # Event is signaled, overlapped IO operation result should be available.
364 pass
365 elif waitReturnCode == WAIT_IO_COMPLETION:
366 # WaitForSingleObjectEx returnes because the system added an I/O completion
367 # routine or an asynchronous procedure call (APC) to the thread queue.
368 SetLastError(WAIT_IO_COMPLETION)
369 pass
370 elif waitReturnCode == WAIT_TIMEOUT:
371 # We reached the maximum allowed wait time, the IO operation failed
372 # to complete in timely fashion.
373 SetLastError(WAIT_TIMEOUT)
374 return False
375 elif waitReturnCode == WAIT_FAILED:
376 # something went wrong calling WaitForSingleObjectEx
377 err = GetLastError()
378 log('WaitForSingleObjectEx failed: %s', _win32_strerror(err))
379 return False
380 else:
381 # unexpected situation deserving investigation.
382 err = GetLastError()
383 log('Unexpected error: %s', _win32_strerror(err))
384 return False
385
386 return GetOverlappedResult(pipe, olap, nbytes, False)
387
388
271 class WindowsNamedPipeTransport(Transport):
389 class WindowsNamedPipeTransport(Transport):
272 """ connect to a named pipe """
390 """ connect to a named pipe """
273
391
@@ -284,28 +402,35 b' class WindowsNamedPipeTransport(Transpor'
284 self._raise_win_err('failed to open pipe %s' % sockpath,
402 self._raise_win_err('failed to open pipe %s' % sockpath,
285 GetLastError())
403 GetLastError())
286
404
287 def _win32_strerror(self, err):
405 # event for the overlapped I/O operations
288 """ expand a win32 error code into a human readable message """
406 self._waitable = CreateEvent(None, True, False, None)
407 if self._waitable is None:
408 self._raise_win_err('CreateEvent failed', GetLastError())
289
409
290 # FormatMessage will allocate memory and assign it here
410 self._get_overlapped_result_ex = GetOverlappedResultEx
291 buf = ctypes.c_char_p()
411 if (os.getenv('WATCHMAN_WIN7_COMPAT') == '1' or
292 FormatMessage(
412 self._get_overlapped_result_ex is None):
293 FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_ALLOCATE_BUFFER
413 self._get_overlapped_result_ex = _get_overlapped_result_ex_impl
294 | FORMAT_MESSAGE_IGNORE_INSERTS, None, err, 0, buf, 0, None)
295 try:
296 return buf.value
297 finally:
298 LocalFree(buf)
299
414
300 def _raise_win_err(self, msg, err):
415 def _raise_win_err(self, msg, err):
301 raise IOError('%s win32 error code: %d %s' %
416 raise IOError('%s win32 error code: %d %s' %
302 (msg, err, self._win32_strerror(err)))
417 (msg, err, _win32_strerror(err)))
303
418
304 def close(self):
419 def close(self):
305 if self.pipe:
420 if self.pipe:
421 log('Closing pipe')
306 CloseHandle(self.pipe)
422 CloseHandle(self.pipe)
307 self.pipe = None
423 self.pipe = None
308
424
425 if self._waitable is not None:
426 # We release the handle for the event
427 CloseHandle(self._waitable)
428 self._waitable = None
429
430 def setTimeout(self, value):
431 # convert to milliseconds
432 self.timeout = int(value * 1000)
433
309 def readBytes(self, size):
434 def readBytes(self, size):
310 """ A read can block for an unbounded amount of time, even if the
435 """ A read can block for an unbounded amount of time, even if the
311 kernel reports that the pipe handle is signalled, so we need to
436 kernel reports that the pipe handle is signalled, so we need to
@@ -325,6 +450,7 b' class WindowsNamedPipeTransport(Transpor'
325 # We need to initiate a read
450 # We need to initiate a read
326 buf = ctypes.create_string_buffer(size)
451 buf = ctypes.create_string_buffer(size)
327 olap = OVERLAPPED()
452 olap = OVERLAPPED()
453 olap.hEvent = self._waitable
328
454
329 log('made read buff of size %d', size)
455 log('made read buff of size %d', size)
330
456
@@ -339,8 +465,9 b' class WindowsNamedPipeTransport(Transpor'
339 GetLastError())
465 GetLastError())
340
466
341 nread = wintypes.DWORD()
467 nread = wintypes.DWORD()
342 if not GetOverlappedResultEx(self.pipe, olap, nread,
468 if not self._get_overlapped_result_ex(self.pipe, olap, nread,
343 0 if immediate else self.timeout, True):
469 0 if immediate else self.timeout,
470 True):
344 err = GetLastError()
471 err = GetLastError()
345 CancelIoEx(self.pipe, olap)
472 CancelIoEx(self.pipe, olap)
346
473
@@ -374,6 +501,8 b' class WindowsNamedPipeTransport(Transpor'
374
501
375 def write(self, data):
502 def write(self, data):
376 olap = OVERLAPPED()
503 olap = OVERLAPPED()
504 olap.hEvent = self._waitable
505
377 immediate = WriteFile(self.pipe, ctypes.c_char_p(data), len(data),
506 immediate = WriteFile(self.pipe, ctypes.c_char_p(data), len(data),
378 None, olap)
507 None, olap)
379
508
@@ -385,8 +514,10 b' class WindowsNamedPipeTransport(Transpor'
385
514
386 # Obtain results, waiting if needed
515 # Obtain results, waiting if needed
387 nwrote = wintypes.DWORD()
516 nwrote = wintypes.DWORD()
388 if GetOverlappedResultEx(self.pipe, olap, nwrote, 0 if immediate else
517 if self._get_overlapped_result_ex(self.pipe, olap, nwrote,
389 self.timeout, True):
518 0 if immediate else self.timeout,
519 True):
520 log('made write of %d bytes', nwrote.value)
390 return nwrote.value
521 return nwrote.value
391
522
392 err = GetLastError()
523 err = GetLastError()
@@ -430,7 +561,10 b' class CLIProcessTransport(Transport):'
430
561
431 def close(self):
562 def close(self):
432 if self.proc:
563 if self.proc:
433 self.proc.kill()
564 if self.proc.pid is not None:
565 self.proc.kill()
566 self.proc.stdin.close()
567 self.proc.stdout.close()
434 self.proc = None
568 self.proc = None
435
569
436 def _connect(self):
570 def _connect(self):
@@ -438,7 +572,7 b' class CLIProcessTransport(Transport):'
438 return self.proc
572 return self.proc
439 args = [
573 args = [
440 'watchman',
574 'watchman',
441 '--sockname={}'.format(self.sockpath),
575 '--sockname={0}'.format(self.sockpath),
442 '--logfile=/BOGUS',
576 '--logfile=/BOGUS',
443 '--statefile=/BOGUS',
577 '--statefile=/BOGUS',
444 '--no-spawn',
578 '--no-spawn',
@@ -460,8 +594,8 b' class CLIProcessTransport(Transport):'
460
594
461 def write(self, data):
595 def write(self, data):
462 if self.closed:
596 if self.closed:
597 self.close()
463 self.closed = False
598 self.closed = False
464 self.proc = None
465 self._connect()
599 self._connect()
466 res = self.proc.stdin.write(data)
600 res = self.proc.stdin.write(data)
467 self.proc.stdin.close()
601 self.proc.stdin.close()
@@ -473,21 +607,21 b' class BserCodec(Codec):'
473 """ use the BSER encoding. This is the default, preferred codec """
607 """ use the BSER encoding. This is the default, preferred codec """
474
608
475 def _loads(self, response):
609 def _loads(self, response):
476 return bser.loads(response)
610 return bser.loads(response) # Defaults to BSER v1
477
611
478 def receive(self):
612 def receive(self):
479 buf = [self.transport.readBytes(sniff_len)]
613 buf = [self.transport.readBytes(sniff_len)]
480 if not buf[0]:
614 if not buf[0]:
481 raise WatchmanError('empty watchman response')
615 raise WatchmanError('empty watchman response')
482
616
483 elen = bser.pdu_len(buf[0])
617 _1, _2, elen = bser.pdu_info(buf[0])
484
618
485 rlen = len(buf[0])
619 rlen = len(buf[0])
486 while elen > rlen:
620 while elen > rlen:
487 buf.append(self.transport.readBytes(elen - rlen))
621 buf.append(self.transport.readBytes(elen - rlen))
488 rlen += len(buf[-1])
622 rlen += len(buf[-1])
489
623
490 response = ''.join(buf)
624 response = b''.join(buf)
491 try:
625 try:
492 res = self._loads(response)
626 res = self._loads(response)
493 return res
627 return res
@@ -495,7 +629,7 b' class BserCodec(Codec):'
495 raise WatchmanError('watchman response decode error: %s' % e)
629 raise WatchmanError('watchman response decode error: %s' % e)
496
630
497 def send(self, *args):
631 def send(self, *args):
498 cmd = bser.dumps(*args)
632 cmd = bser.dumps(*args) # Defaults to BSER v1
499 self.transport.write(cmd)
633 self.transport.write(cmd)
500
634
501
635
@@ -504,7 +638,64 b' class ImmutableBserCodec(BserCodec):'
504 immutable object support """
638 immutable object support """
505
639
506 def _loads(self, response):
640 def _loads(self, response):
507 return bser.loads(response, False)
641 return bser.loads(response, False) # Defaults to BSER v1
642
643
644 class Bser2WithFallbackCodec(BserCodec):
645 """ use BSER v2 encoding """
646
647 def __init__(self, transport):
648 super(Bser2WithFallbackCodec, self).__init__(transport)
649 # Once the server advertises support for bser-v2 we should switch this
650 # to 'required' on Python 3.
651 self.send(["version", {"optional": ["bser-v2"]}])
652
653 capabilities = self.receive()
654
655 if 'error' in capabilities:
656 raise Exception('Unsupported BSER version')
657
658 if capabilities['capabilities']['bser-v2']:
659 self.bser_version = 2
660 self.bser_capabilities = 0
661 else:
662 self.bser_version = 1
663 self.bser_capabilities = 0
664
665 def _loads(self, response):
666 return bser.loads(response)
667
668 def receive(self):
669 buf = [self.transport.readBytes(sniff_len)]
670 if not buf[0]:
671 raise WatchmanError('empty watchman response')
672
673 recv_bser_version, recv_bser_capabilities, elen = bser.pdu_info(buf[0])
674
675 if hasattr(self, 'bser_version'):
676 # Readjust BSER version and capabilities if necessary
677 self.bser_version = max(self.bser_version, recv_bser_version)
678 self.capabilities = self.bser_capabilities & recv_bser_capabilities
679
680 rlen = len(buf[0])
681 while elen > rlen:
682 buf.append(self.transport.readBytes(elen - rlen))
683 rlen += len(buf[-1])
684
685 response = b''.join(buf)
686 try:
687 res = self._loads(response)
688 return res
689 except ValueError as e:
690 raise WatchmanError('watchman response decode error: %s' % e)
691
692 def send(self, *args):
693 if hasattr(self, 'bser_version'):
694 cmd = bser.dumps(*args, version=self.bser_version,
695 capabilities=self.bser_capabilities)
696 else:
697 cmd = bser.dumps(*args)
698 self.transport.write(cmd)
508
699
509
700
510 class JsonCodec(Codec):
701 class JsonCodec(Codec):
@@ -520,6 +711,13 b' class JsonCodec(Codec):'
520 def receive(self):
711 def receive(self):
521 line = self.transport.readLine()
712 line = self.transport.readLine()
522 try:
713 try:
714 # In Python 3, json.loads is a transformation from Unicode string to
715 # objects possibly containing Unicode strings. We typically expect
716 # the JSON blob to be ASCII-only with non-ASCII characters escaped,
717 # but it's possible we might get non-ASCII bytes that are valid
718 # UTF-8.
719 if compat.PYTHON3:
720 line = line.decode('utf-8')
523 return self.json.loads(line)
721 return self.json.loads(line)
524 except Exception as e:
722 except Exception as e:
525 print(e, line)
723 print(e, line)
@@ -527,7 +725,12 b' class JsonCodec(Codec):'
527
725
528 def send(self, *args):
726 def send(self, *args):
529 cmd = self.json.dumps(*args)
727 cmd = self.json.dumps(*args)
530 self.transport.write(cmd + "\n")
728 # In Python 3, json.dumps is a transformation from objects possibly
729 # containing Unicode strings to Unicode string. Even with (the default)
730 # ensure_ascii=True, dumps returns a Unicode string.
731 if compat.PYTHON3:
732 cmd = cmd.encode('ascii')
733 self.transport.write(cmd + b"\n")
531
734
532
735
533 class client(object):
736 class client(object):
@@ -556,22 +759,27 b' class client(object):'
556 self.timeout = timeout
759 self.timeout = timeout
557 self.useImmutableBser = useImmutableBser
760 self.useImmutableBser = useImmutableBser
558
761
559 transport = transport or os.getenv('WATCHMAN_TRANSPORT') or 'local'
762 if inspect.isclass(transport) and issubclass(transport, Transport):
560 if transport == 'local' and os.name == 'nt':
763 self.transport = transport
561 self.transport = WindowsNamedPipeTransport
562 elif transport == 'local':
563 self.transport = UnixSocketTransport
564 elif transport == 'cli':
565 self.transport = CLIProcessTransport
566 if sendEncoding is None:
567 sendEncoding = 'json'
568 if recvEncoding is None:
569 recvEncoding = sendEncoding
570 else:
764 else:
571 raise WatchmanError('invalid transport %s' % transport)
765 transport = transport or os.getenv('WATCHMAN_TRANSPORT') or 'local'
766 if transport == 'local' and os.name == 'nt':
767 self.transport = WindowsNamedPipeTransport
768 elif transport == 'local':
769 self.transport = UnixSocketTransport
770 elif transport == 'cli':
771 self.transport = CLIProcessTransport
772 if sendEncoding is None:
773 sendEncoding = 'json'
774 if recvEncoding is None:
775 recvEncoding = sendEncoding
776 else:
777 raise WatchmanError('invalid transport %s' % transport)
572
778
573 sendEncoding = sendEncoding or os.getenv('WATCHMAN_ENCODING') or 'bser'
779 sendEncoding = str(sendEncoding or os.getenv('WATCHMAN_ENCODING') or
574 recvEncoding = recvEncoding or os.getenv('WATCHMAN_ENCODING') or 'bser'
780 'bser')
781 recvEncoding = str(recvEncoding or os.getenv('WATCHMAN_ENCODING') or
782 'bser')
575
783
576 self.recvCodec = self._parseEncoding(recvEncoding)
784 self.recvCodec = self._parseEncoding(recvEncoding)
577 self.sendCodec = self._parseEncoding(sendEncoding)
785 self.sendCodec = self._parseEncoding(sendEncoding)
@@ -581,6 +789,8 b' class client(object):'
581 if self.useImmutableBser:
789 if self.useImmutableBser:
582 return ImmutableBserCodec
790 return ImmutableBserCodec
583 return BserCodec
791 return BserCodec
792 elif enc == 'experimental-bser-v2':
793 return Bser2WithFallbackCodec
584 elif enc == 'json':
794 elif enc == 'json':
585 return JsonCodec
795 return JsonCodec
586 else:
796 else:
@@ -600,10 +810,20 b' class client(object):'
600
810
601 cmd = ['watchman', '--output-encoding=bser', 'get-sockname']
811 cmd = ['watchman', '--output-encoding=bser', 'get-sockname']
602 try:
812 try:
603 p = subprocess.Popen(cmd,
813 args = dict(stdout=subprocess.PIPE,
604 stdout=subprocess.PIPE,
814 stderr=subprocess.PIPE,
605 stderr=subprocess.PIPE,
815 close_fds=os.name != 'nt')
606 close_fds=os.name != 'nt')
816
817 if os.name == 'nt':
818 # if invoked via an application with graphical user interface,
819 # this call will cause a brief command window pop-up.
820 # Using the flag STARTF_USESHOWWINDOW to avoid this behavior.
821 startupinfo = subprocess.STARTUPINFO()
822 startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
823 args['startupinfo'] = startupinfo
824
825 p = subprocess.Popen(cmd, **args)
826
607 except OSError as e:
827 except OSError as e:
608 raise WatchmanError('"watchman" executable not in PATH (%s)', e)
828 raise WatchmanError('"watchman" executable not in PATH (%s)', e)
609
829
@@ -614,10 +834,10 b' class client(object):'
614 raise WatchmanError("watchman exited with code %d" % exitcode)
834 raise WatchmanError("watchman exited with code %d" % exitcode)
615
835
616 result = bser.loads(stdout)
836 result = bser.loads(stdout)
617 if 'error' in result:
837 if b'error' in result:
618 raise WatchmanError('get-sockname error: %s' % result['error'])
838 raise WatchmanError('get-sockname error: %s' % result['error'])
619
839
620 return result['sockname']
840 return result[b'sockname']
621
841
622 def _connect(self):
842 def _connect(self):
623 """ establish transport connection """
843 """ establish transport connection """
@@ -660,10 +880,16 b' class client(object):'
660 self._connect()
880 self._connect()
661 result = self.recvConn.receive()
881 result = self.recvConn.receive()
662 if self._hasprop(result, 'error'):
882 if self._hasprop(result, 'error'):
663 raise CommandError(result['error'])
883 error = result['error']
884 if compat.PYTHON3 and isinstance(self.recvConn, BserCodec):
885 error = result['error'].decode('utf-8', 'surrogateescape')
886 raise CommandError(error)
664
887
665 if self._hasprop(result, 'log'):
888 if self._hasprop(result, 'log'):
666 self.logs.append(result['log'])
889 log = result['log']
890 if compat.PYTHON3 and isinstance(self.recvConn, BserCodec):
891 log = log.decode('utf-8', 'surrogateescape')
892 self.logs.append(log)
667
893
668 if self._hasprop(result, 'subscription'):
894 if self._hasprop(result, 'subscription'):
669 sub = result['subscription']
895 sub = result['subscription']
@@ -682,6 +908,9 b' class client(object):'
682 return result
908 return result
683
909
684 def isUnilateralResponse(self, res):
910 def isUnilateralResponse(self, res):
911 if 'unilateral' in res and res['unilateral']:
912 return True
913 # Fall back to checking for known unilateral responses
685 for k in self.unilateral:
914 for k in self.unilateral:
686 if k in res:
915 if k in res:
687 return True
916 return True
@@ -712,6 +941,13 b' class client(object):'
712 remove processing impacts both the unscoped and scoped stores
941 remove processing impacts both the unscoped and scoped stores
713 for the subscription data.
942 for the subscription data.
714 """
943 """
944 if compat.PYTHON3 and issubclass(self.recvCodec, BserCodec):
945 # People may pass in Unicode strings here -- but currently BSER only
946 # returns bytestrings. Deal with that.
947 if isinstance(root, str):
948 root = encoding.encode_local(root)
949 if isinstance(name, str):
950 name = name.encode('utf-8')
715
951
716 if root is not None:
952 if root is not None:
717 if not root in self.sub_by_root:
953 if not root in self.sub_by_root:
@@ -752,9 +988,17 b' class client(object):'
752 res = self.receive()
988 res = self.receive()
753
989
754 return res
990 return res
755 except CommandError as ex:
991 except EnvironmentError as ee:
992 # When we can depend on Python 3, we can use PEP 3134
993 # exception chaining here.
994 raise WatchmanEnvironmentError(
995 'I/O error communicating with watchman daemon',
996 ee.errno,
997 ee.strerror,
998 args)
999 except WatchmanError as ex:
756 ex.setCommand(args)
1000 ex.setCommand(args)
757 raise ex
1001 raise
758
1002
759 def capabilityCheck(self, optional=None, required=None):
1003 def capabilityCheck(self, optional=None, required=None):
760 """ Perform a server capability check """
1004 """ Perform a server capability check """
@@ -775,5 +1019,3 b' class client(object):'
775 def setTimeout(self, value):
1019 def setTimeout(self, value):
776 self.recvConn.setTimeout(value)
1020 self.recvConn.setTimeout(value)
777 self.sendConn.setTimeout(value)
1021 self.sendConn.setTimeout(value)
778
779 # no-check-code -- this is a 3rd party library
This diff has been collapsed as it changes many lines, (672 lines changed) Show them Hide them
@@ -29,11 +29,27 b' OF THIS SOFTWARE, EVEN IF ADVISED OF THE'
29 */
29 */
30
30
31 #include <Python.h>
31 #include <Python.h>
32 #include <bytesobject.h>
32 #ifdef _MSC_VER
33 #ifdef _MSC_VER
33 #define inline __inline
34 #define inline __inline
34 #include "msc_stdint.h"
35 #if _MSC_VER >= 1800
36 #include <stdint.h>
37 #else
38 // The compiler associated with Python 2.7 on Windows doesn't ship
39 // with stdint.h, so define the small subset that we use here.
40 typedef __int8 int8_t;
41 typedef __int16 int16_t;
42 typedef __int32 int32_t;
43 typedef __int64 int64_t;
44 typedef unsigned __int8 uint8_t;
45 typedef unsigned __int16 uint16_t;
46 typedef unsigned __int32 uint32_t;
47 typedef unsigned __int64 uint64_t;
48 #define UINT32_MAX 4294967295U
49 #endif
35 #endif
50 #endif
36
51
52 // clang-format off
37 /* Return the smallest size int that can store the value */
53 /* Return the smallest size int that can store the value */
38 #define INT_SIZE(x) (((x) == ((int8_t)x)) ? 1 : \
54 #define INT_SIZE(x) (((x) == ((int8_t)x)) ? 1 : \
39 ((x) == ((int16_t)x)) ? 2 : \
55 ((x) == ((int16_t)x)) ? 2 : \
@@ -41,7 +57,7 b' OF THIS SOFTWARE, EVEN IF ADVISED OF THE'
41
57
42 #define BSER_ARRAY 0x00
58 #define BSER_ARRAY 0x00
43 #define BSER_OBJECT 0x01
59 #define BSER_OBJECT 0x01
44 #define BSER_STRING 0x02
60 #define BSER_BYTESTRING 0x02
45 #define BSER_INT8 0x03
61 #define BSER_INT8 0x03
46 #define BSER_INT16 0x04
62 #define BSER_INT16 0x04
47 #define BSER_INT32 0x05
63 #define BSER_INT32 0x05
@@ -52,6 +68,8 b' OF THIS SOFTWARE, EVEN IF ADVISED OF THE'
52 #define BSER_NULL 0x0a
68 #define BSER_NULL 0x0a
53 #define BSER_TEMPLATE 0x0b
69 #define BSER_TEMPLATE 0x0b
54 #define BSER_SKIP 0x0c
70 #define BSER_SKIP 0x0c
71 #define BSER_UTF8STRING 0x0d
72 // clang-format on
55
73
56 // An immutable object representation of BSER_OBJECT.
74 // An immutable object representation of BSER_OBJECT.
57 // Rather than build a hash table, key -> value are obtained
75 // Rather than build a hash table, key -> value are obtained
@@ -64,24 +82,27 b' OF THIS SOFTWARE, EVEN IF ADVISED OF THE'
64 // approach, this is still faster for the mercurial use case
82 // approach, this is still faster for the mercurial use case
65 // as it helps to eliminate creating N other objects to
83 // as it helps to eliminate creating N other objects to
66 // represent the stat information in the hgwatchman extension
84 // represent the stat information in the hgwatchman extension
85 // clang-format off
67 typedef struct {
86 typedef struct {
68 PyObject_HEAD
87 PyObject_HEAD
69 PyObject *keys; // tuple of field names
88 PyObject *keys; // tuple of field names
70 PyObject *values; // tuple of values
89 PyObject *values; // tuple of values
71 } bserObject;
90 } bserObject;
91 // clang-format on
72
92
73 static Py_ssize_t bserobj_tuple_length(PyObject *o) {
93 static Py_ssize_t bserobj_tuple_length(PyObject* o) {
74 bserObject *obj = (bserObject*)o;
94 bserObject* obj = (bserObject*)o;
75
95
76 return PySequence_Length(obj->keys);
96 return PySequence_Length(obj->keys);
77 }
97 }
78
98
79 static PyObject *bserobj_tuple_item(PyObject *o, Py_ssize_t i) {
99 static PyObject* bserobj_tuple_item(PyObject* o, Py_ssize_t i) {
80 bserObject *obj = (bserObject*)o;
100 bserObject* obj = (bserObject*)o;
81
101
82 return PySequence_GetItem(obj->values, i);
102 return PySequence_GetItem(obj->values, i);
83 }
103 }
84
104
105 // clang-format off
85 static PySequenceMethods bserobj_sq = {
106 static PySequenceMethods bserobj_sq = {
86 bserobj_tuple_length, /* sq_length */
107 bserobj_tuple_length, /* sq_length */
87 0, /* sq_concat */
108 0, /* sq_concat */
@@ -92,49 +113,72 b' static PySequenceMethods bserobj_sq = {'
92 0, /* sq_inplace_concat */
113 0, /* sq_inplace_concat */
93 0 /* sq_inplace_repeat */
114 0 /* sq_inplace_repeat */
94 };
115 };
116 // clang-format on
95
117
96 static void bserobj_dealloc(PyObject *o) {
118 static void bserobj_dealloc(PyObject* o) {
97 bserObject *obj = (bserObject*)o;
119 bserObject* obj = (bserObject*)o;
98
120
99 Py_CLEAR(obj->keys);
121 Py_CLEAR(obj->keys);
100 Py_CLEAR(obj->values);
122 Py_CLEAR(obj->values);
101 PyObject_Del(o);
123 PyObject_Del(o);
102 }
124 }
103
125
104 static PyObject *bserobj_getattrro(PyObject *o, PyObject *name) {
126 static PyObject* bserobj_getattrro(PyObject* o, PyObject* name) {
105 bserObject *obj = (bserObject*)o;
127 bserObject* obj = (bserObject*)o;
106 Py_ssize_t i, n;
128 Py_ssize_t i, n;
107 const char *namestr;
129 PyObject* name_bytes = NULL;
130 PyObject* ret = NULL;
131 const char* namestr;
108
132
109 if (PyIndex_Check(name)) {
133 if (PyIndex_Check(name)) {
110 i = PyNumber_AsSsize_t(name, PyExc_IndexError);
134 i = PyNumber_AsSsize_t(name, PyExc_IndexError);
111 if (i == -1 && PyErr_Occurred()) {
135 if (i == -1 && PyErr_Occurred()) {
112 return NULL;
136 goto bail;
113 }
137 }
114 return PySequence_GetItem(obj->values, i);
138 ret = PySequence_GetItem(obj->values, i);
139 goto bail;
115 }
140 }
116
141
142 // We can be passed in Unicode objects here -- we don't support anything other
143 // than UTF-8 for keys.
144 if (PyUnicode_Check(name)) {
145 name_bytes = PyUnicode_AsUTF8String(name);
146 if (name_bytes == NULL) {
147 goto bail;
148 }
149 namestr = PyBytes_AsString(name_bytes);
150 } else {
151 namestr = PyBytes_AsString(name);
152 }
153
154 if (namestr == NULL) {
155 goto bail;
156 }
117 // hack^Wfeature to allow mercurial to use "st_size" to reference "size"
157 // hack^Wfeature to allow mercurial to use "st_size" to reference "size"
118 namestr = PyString_AsString(name);
119 if (!strncmp(namestr, "st_", 3)) {
158 if (!strncmp(namestr, "st_", 3)) {
120 namestr += 3;
159 namestr += 3;
121 }
160 }
122
161
123 n = PyTuple_GET_SIZE(obj->keys);
162 n = PyTuple_GET_SIZE(obj->keys);
124 for (i = 0; i < n; i++) {
163 for (i = 0; i < n; i++) {
125 const char *item_name = NULL;
164 const char* item_name = NULL;
126 PyObject *key = PyTuple_GET_ITEM(obj->keys, i);
165 PyObject* key = PyTuple_GET_ITEM(obj->keys, i);
127
166
128 item_name = PyString_AsString(key);
167 item_name = PyBytes_AsString(key);
129 if (!strcmp(item_name, namestr)) {
168 if (!strcmp(item_name, namestr)) {
130 return PySequence_GetItem(obj->values, i);
169 ret = PySequence_GetItem(obj->values, i);
170 goto bail;
131 }
171 }
132 }
172 }
133 PyErr_Format(PyExc_AttributeError,
173
134 "bserobject has no attribute '%.400s'", namestr);
174 PyErr_Format(
135 return NULL;
175 PyExc_AttributeError, "bserobject has no attribute '%.400s'", namestr);
176 bail:
177 Py_XDECREF(name_bytes);
178 return ret;
136 }
179 }
137
180
181 // clang-format off
138 static PyMappingMethods bserobj_map = {
182 static PyMappingMethods bserobj_map = {
139 bserobj_tuple_length, /* mp_length */
183 bserobj_tuple_length, /* mp_length */
140 bserobj_getattrro, /* mp_subscript */
184 bserobj_getattrro, /* mp_subscript */
@@ -181,20 +225,27 b' PyTypeObject bserObjectType = {'
181 0, /* tp_alloc */
225 0, /* tp_alloc */
182 0, /* tp_new */
226 0, /* tp_new */
183 };
227 };
184
228 // clang-format on
185
229
186 static PyObject *bser_loads_recursive(const char **ptr, const char *end,
230 typedef struct loads_ctx {
187 int mutable);
231 int mutable;
232 const char* value_encoding;
233 const char* value_errors;
234 uint32_t bser_version;
235 uint32_t bser_capabilities;
236 } unser_ctx_t;
237
238 static PyObject*
239 bser_loads_recursive(const char** ptr, const char* end, const unser_ctx_t* ctx);
188
240
189 static const char bser_true = BSER_TRUE;
241 static const char bser_true = BSER_TRUE;
190 static const char bser_false = BSER_FALSE;
242 static const char bser_false = BSER_FALSE;
191 static const char bser_null = BSER_NULL;
243 static const char bser_null = BSER_NULL;
192 static const char bser_string_hdr = BSER_STRING;
244 static const char bser_bytestring_hdr = BSER_BYTESTRING;
193 static const char bser_array_hdr = BSER_ARRAY;
245 static const char bser_array_hdr = BSER_ARRAY;
194 static const char bser_object_hdr = BSER_OBJECT;
246 static const char bser_object_hdr = BSER_OBJECT;
195
247
196 static inline uint32_t next_power_2(uint32_t n)
248 static inline uint32_t next_power_2(uint32_t n) {
197 {
198 n |= (n >> 16);
249 n |= (n >> 16);
199 n |= (n >> 8);
250 n |= (n >> 8);
200 n |= (n >> 4);
251 n |= (n >> 4);
@@ -205,16 +256,17 b' static inline uint32_t next_power_2(uint'
205
256
206 // A buffer we use for building up the serialized result
257 // A buffer we use for building up the serialized result
207 struct bser_buffer {
258 struct bser_buffer {
208 char *buf;
259 char* buf;
209 int wpos, allocd;
260 int wpos, allocd;
261 uint32_t bser_version;
262 uint32_t capabilities;
210 };
263 };
211 typedef struct bser_buffer bser_t;
264 typedef struct bser_buffer bser_t;
212
265
213 static int bser_append(bser_t *bser, const char *data, uint32_t len)
266 static int bser_append(bser_t* bser, const char* data, uint32_t len) {
214 {
215 int newlen = next_power_2(bser->wpos + len);
267 int newlen = next_power_2(bser->wpos + len);
216 if (newlen > bser->allocd) {
268 if (newlen > bser->allocd) {
217 char *nbuf = realloc(bser->buf, newlen);
269 char* nbuf = realloc(bser->buf, newlen);
218 if (!nbuf) {
270 if (!nbuf) {
219 return 0;
271 return 0;
220 }
272 }
@@ -228,40 +280,46 b' static int bser_append(bser_t *bser, con'
228 return 1;
280 return 1;
229 }
281 }
230
282
231 static int bser_init(bser_t *bser)
283 static int bser_init(bser_t* bser, uint32_t version, uint32_t capabilities) {
232 {
233 bser->allocd = 8192;
284 bser->allocd = 8192;
234 bser->wpos = 0;
285 bser->wpos = 0;
235 bser->buf = malloc(bser->allocd);
286 bser->buf = malloc(bser->allocd);
236
287 bser->bser_version = version;
288 bser->capabilities = capabilities;
237 if (!bser->buf) {
289 if (!bser->buf) {
238 return 0;
290 return 0;
239 }
291 }
240
292
241 // Leave room for the serialization header, which includes
293 // Leave room for the serialization header, which includes
242 // our overall length. To make things simpler, we'll use an
294 // our overall length. To make things simpler, we'll use an
243 // int32 for the header
295 // int32 for the header
244 #define EMPTY_HEADER "\x00\x01\x05\x00\x00\x00\x00"
296 #define EMPTY_HEADER "\x00\x01\x05\x00\x00\x00\x00"
245 bser_append(bser, EMPTY_HEADER, sizeof(EMPTY_HEADER)-1);
297
298 // Version 2 also carries an integer indicating the capabilities. The
299 // capabilities integer comes before the PDU size.
300 #define EMPTY_HEADER_V2 "\x00\x02\x00\x00\x00\x00\x05\x00\x00\x00\x00"
301 if (version == 2) {
302 bser_append(bser, EMPTY_HEADER_V2, sizeof(EMPTY_HEADER_V2) - 1);
303 } else {
304 bser_append(bser, EMPTY_HEADER, sizeof(EMPTY_HEADER) - 1);
305 }
246
306
247 return 1;
307 return 1;
248 }
308 }
249
309
250 static void bser_dtor(bser_t *bser)
310 static void bser_dtor(bser_t* bser) {
251 {
252 free(bser->buf);
311 free(bser->buf);
253 bser->buf = NULL;
312 bser->buf = NULL;
254 }
313 }
255
314
256 static int bser_long(bser_t *bser, int64_t val)
315 static int bser_long(bser_t* bser, int64_t val) {
257 {
258 int8_t i8;
316 int8_t i8;
259 int16_t i16;
317 int16_t i16;
260 int32_t i32;
318 int32_t i32;
261 int64_t i64;
319 int64_t i64;
262 char sz;
320 char sz;
263 int size = INT_SIZE(val);
321 int size = INT_SIZE(val);
264 char *iptr;
322 char* iptr;
265
323
266 switch (size) {
324 switch (size) {
267 case 1:
325 case 1:
@@ -285,8 +343,7 b' static int bser_long(bser_t *bser, int64'
285 iptr = (char*)&i64;
343 iptr = (char*)&i64;
286 break;
344 break;
287 default:
345 default:
288 PyErr_SetString(PyExc_RuntimeError,
346 PyErr_SetString(PyExc_RuntimeError, "Cannot represent this long value!?");
289 "Cannot represent this long value!?");
290 return 0;
347 return 0;
291 }
348 }
292
349
@@ -297,25 +354,24 b' static int bser_long(bser_t *bser, int64'
297 return bser_append(bser, iptr, size);
354 return bser_append(bser, iptr, size);
298 }
355 }
299
356
300 static int bser_string(bser_t *bser, PyObject *sval)
357 static int bser_bytestring(bser_t* bser, PyObject* sval) {
301 {
358 char* buf = NULL;
302 char *buf = NULL;
303 Py_ssize_t len;
359 Py_ssize_t len;
304 int res;
360 int res;
305 PyObject *utf = NULL;
361 PyObject* utf = NULL;
306
362
307 if (PyUnicode_Check(sval)) {
363 if (PyUnicode_Check(sval)) {
308 utf = PyUnicode_AsEncodedString(sval, "utf-8", "ignore");
364 utf = PyUnicode_AsEncodedString(sval, "utf-8", "ignore");
309 sval = utf;
365 sval = utf;
310 }
366 }
311
367
312 res = PyString_AsStringAndSize(sval, &buf, &len);
368 res = PyBytes_AsStringAndSize(sval, &buf, &len);
313 if (res == -1) {
369 if (res == -1) {
314 res = 0;
370 res = 0;
315 goto out;
371 goto out;
316 }
372 }
317
373
318 if (!bser_append(bser, &bser_string_hdr, sizeof(bser_string_hdr))) {
374 if (!bser_append(bser, &bser_bytestring_hdr, sizeof(bser_bytestring_hdr))) {
319 res = 0;
375 res = 0;
320 goto out;
376 goto out;
321 }
377 }
@@ -341,8 +397,7 b' out:'
341 return res;
397 return res;
342 }
398 }
343
399
344 static int bser_recursive(bser_t *bser, PyObject *val)
400 static int bser_recursive(bser_t* bser, PyObject* val) {
345 {
346 if (PyBool_Check(val)) {
401 if (PyBool_Check(val)) {
347 if (val == Py_True) {
402 if (val == Py_True) {
348 return bser_append(bser, &bser_true, sizeof(bser_true));
403 return bser_append(bser, &bser_true, sizeof(bser_true));
@@ -354,19 +409,21 b' static int bser_recursive(bser_t *bser, '
354 return bser_append(bser, &bser_null, sizeof(bser_null));
409 return bser_append(bser, &bser_null, sizeof(bser_null));
355 }
410 }
356
411
412 // Python 3 has one integer type.
413 #if PY_MAJOR_VERSION < 3
357 if (PyInt_Check(val)) {
414 if (PyInt_Check(val)) {
358 return bser_long(bser, PyInt_AS_LONG(val));
415 return bser_long(bser, PyInt_AS_LONG(val));
359 }
416 }
417 #endif // PY_MAJOR_VERSION < 3
360
418
361 if (PyLong_Check(val)) {
419 if (PyLong_Check(val)) {
362 return bser_long(bser, PyLong_AsLongLong(val));
420 return bser_long(bser, PyLong_AsLongLong(val));
363 }
421 }
364
422
365 if (PyString_Check(val) || PyUnicode_Check(val)) {
423 if (PyBytes_Check(val) || PyUnicode_Check(val)) {
366 return bser_string(bser, val);
424 return bser_bytestring(bser, val);
367 }
425 }
368
426
369
370 if (PyFloat_Check(val)) {
427 if (PyFloat_Check(val)) {
371 double dval = PyFloat_AS_DOUBLE(val);
428 double dval = PyFloat_AS_DOUBLE(val);
372 char sz = BSER_REAL;
429 char sz = BSER_REAL;
@@ -390,7 +447,7 b' static int bser_recursive(bser_t *bser, '
390 }
447 }
391
448
392 for (i = 0; i < len; i++) {
449 for (i = 0; i < len; i++) {
393 PyObject *ele = PyList_GET_ITEM(val, i);
450 PyObject* ele = PyList_GET_ITEM(val, i);
394
451
395 if (!bser_recursive(bser, ele)) {
452 if (!bser_recursive(bser, ele)) {
396 return 0;
453 return 0;
@@ -412,7 +469,7 b' static int bser_recursive(bser_t *bser, '
412 }
469 }
413
470
414 for (i = 0; i < len; i++) {
471 for (i = 0; i < len; i++) {
415 PyObject *ele = PyTuple_GET_ITEM(val, i);
472 PyObject* ele = PyTuple_GET_ITEM(val, i);
416
473
417 if (!bser_recursive(bser, ele)) {
474 if (!bser_recursive(bser, ele)) {
418 return 0;
475 return 0;
@@ -436,7 +493,7 b' static int bser_recursive(bser_t *bser, '
436 }
493 }
437
494
438 while (PyDict_Next(val, &pos, &key, &ele)) {
495 while (PyDict_Next(val, &pos, &key, &ele)) {
439 if (!bser_string(bser, key)) {
496 if (!bser_bytestring(bser, key)) {
440 return 0;
497 return 0;
441 }
498 }
442 if (!bser_recursive(bser, ele)) {
499 if (!bser_recursive(bser, ele)) {
@@ -451,17 +508,25 b' static int bser_recursive(bser_t *bser, '
451 return 0;
508 return 0;
452 }
509 }
453
510
454 static PyObject *bser_dumps(PyObject *self, PyObject *args)
511 static PyObject* bser_dumps(PyObject* self, PyObject* args, PyObject* kw) {
455 {
456 PyObject *val = NULL, *res;
512 PyObject *val = NULL, *res;
457 bser_t bser;
513 bser_t bser;
458 uint32_t len;
514 uint32_t len, bser_version = 1, bser_capabilities = 0;
515
516 static char* kw_list[] = {"val", "version", "capabilities", NULL};
459
517
460 if (!PyArg_ParseTuple(args, "O", &val)) {
518 if (!PyArg_ParseTupleAndKeywords(
519 args,
520 kw,
521 "O|ii:dumps",
522 kw_list,
523 &val,
524 &bser_version,
525 &bser_capabilities)) {
461 return NULL;
526 return NULL;
462 }
527 }
463
528
464 if (!bser_init(&bser)) {
529 if (!bser_init(&bser, bser_version, bser_capabilities)) {
465 return PyErr_NoMemory();
530 return PyErr_NoMemory();
466 }
531 }
467
532
@@ -475,19 +540,25 b' static PyObject *bser_dumps(PyObject *se'
475 }
540 }
476
541
477 // Now fill in the overall length
542 // Now fill in the overall length
478 len = bser.wpos - (sizeof(EMPTY_HEADER) - 1);
543 if (bser_version == 1) {
479 memcpy(bser.buf + 3, &len, sizeof(len));
544 len = bser.wpos - (sizeof(EMPTY_HEADER) - 1);
545 memcpy(bser.buf + 3, &len, sizeof(len));
546 } else {
547 len = bser.wpos - (sizeof(EMPTY_HEADER_V2) - 1);
548 // The BSER capabilities block comes before the PDU length
549 memcpy(bser.buf + 2, &bser_capabilities, sizeof(bser_capabilities));
550 memcpy(bser.buf + 7, &len, sizeof(len));
551 }
480
552
481 res = PyString_FromStringAndSize(bser.buf, bser.wpos);
553 res = PyBytes_FromStringAndSize(bser.buf, bser.wpos);
482 bser_dtor(&bser);
554 bser_dtor(&bser);
483
555
484 return res;
556 return res;
485 }
557 }
486
558
487 int bunser_int(const char **ptr, const char *end, int64_t *val)
559 int bunser_int(const char** ptr, const char* end, int64_t* val) {
488 {
489 int needed;
560 int needed;
490 const char *buf = *ptr;
561 const char* buf = *ptr;
491 int8_t i8;
562 int8_t i8;
492 int16_t i16;
563 int16_t i16;
493 int32_t i32;
564 int32_t i32;
@@ -507,8 +578,8 b' int bunser_int(const char **ptr, const c'
507 needed = 9;
578 needed = 9;
508 break;
579 break;
509 default:
580 default:
510 PyErr_Format(PyExc_ValueError,
581 PyErr_Format(
511 "invalid bser int encoding 0x%02x", buf[0]);
582 PyExc_ValueError, "invalid bser int encoding 0x%02x", buf[0]);
512 return 0;
583 return 0;
513 }
584 }
514 if (end - buf < needed) {
585 if (end - buf < needed) {
@@ -538,10 +609,12 b' int bunser_int(const char **ptr, const c'
538 }
609 }
539 }
610 }
540
611
541 static int bunser_string(const char **ptr, const char *end,
612 static int bunser_bytestring(
542 const char **start, int64_t *len)
613 const char** ptr,
543 {
614 const char* end,
544 const char *buf = *ptr;
615 const char** start,
616 int64_t* len) {
617 const char* buf = *ptr;
545
618
546 // skip string marker
619 // skip string marker
547 buf++;
620 buf++;
@@ -559,11 +632,12 b' static int bunser_string(const char **pt'
559 return 1;
632 return 1;
560 }
633 }
561
634
562 static PyObject *bunser_array(const char **ptr, const char *end, int mutable)
635 static PyObject*
563 {
636 bunser_array(const char** ptr, const char* end, const unser_ctx_t* ctx) {
564 const char *buf = *ptr;
637 const char* buf = *ptr;
565 int64_t nitems, i;
638 int64_t nitems, i;
566 PyObject *res;
639 int mutable = ctx->mutable;
640 PyObject* res;
567
641
568 // skip array header
642 // skip array header
569 buf++;
643 buf++;
@@ -584,7 +658,7 b' static PyObject *bunser_array(const char'
584 }
658 }
585
659
586 for (i = 0; i < nitems; i++) {
660 for (i = 0; i < nitems; i++) {
587 PyObject *ele = bser_loads_recursive(ptr, end, mutable);
661 PyObject* ele = bser_loads_recursive(ptr, end, ctx);
588
662
589 if (!ele) {
663 if (!ele) {
590 Py_DECREF(res);
664 Py_DECREF(res);
@@ -602,13 +676,13 b' static PyObject *bunser_array(const char'
602 return res;
676 return res;
603 }
677 }
604
678
605 static PyObject *bunser_object(const char **ptr, const char *end,
679 static PyObject*
606 int mutable)
680 bunser_object(const char** ptr, const char* end, const unser_ctx_t* ctx) {
607 {
681 const char* buf = *ptr;
608 const char *buf = *ptr;
609 int64_t nitems, i;
682 int64_t nitems, i;
610 PyObject *res;
683 int mutable = ctx->mutable;
611 bserObject *obj;
684 PyObject* res;
685 bserObject* obj;
612
686
613 // skip array header
687 // skip array header
614 buf++;
688 buf++;
@@ -627,12 +701,12 b' static PyObject *bunser_object(const cha'
627 }
701 }
628
702
629 for (i = 0; i < nitems; i++) {
703 for (i = 0; i < nitems; i++) {
630 const char *keystr;
704 const char* keystr;
631 int64_t keylen;
705 int64_t keylen;
632 PyObject *key;
706 PyObject* key;
633 PyObject *ele;
707 PyObject* ele;
634
708
635 if (!bunser_string(ptr, end, &keystr, &keylen)) {
709 if (!bunser_bytestring(ptr, end, &keystr, &keylen)) {
636 Py_DECREF(res);
710 Py_DECREF(res);
637 return NULL;
711 return NULL;
638 }
712 }
@@ -643,13 +717,24 b' static PyObject *bunser_object(const cha'
643 return NULL;
717 return NULL;
644 }
718 }
645
719
646 key = PyString_FromStringAndSize(keystr, (Py_ssize_t)keylen);
720 if (mutable) {
721 // This will interpret the key as UTF-8.
722 key = PyUnicode_FromStringAndSize(keystr, (Py_ssize_t)keylen);
723 } else {
724 // For immutable objects we'll manage key lookups, so we can avoid going
725 // through the Unicode APIs. This avoids a potentially expensive and
726 // definitely unnecessary conversion to UTF-16 and back for Python 2.
727 // TODO: On Python 3 the Unicode APIs are smarter: we might be able to use
728 // Unicode keys there without an appreciable performance loss.
729 key = PyBytes_FromStringAndSize(keystr, (Py_ssize_t)keylen);
730 }
731
647 if (!key) {
732 if (!key) {
648 Py_DECREF(res);
733 Py_DECREF(res);
649 return NULL;
734 return NULL;
650 }
735 }
651
736
652 ele = bser_loads_recursive(ptr, end, mutable);
737 ele = bser_loads_recursive(ptr, end, ctx);
653
738
654 if (!ele) {
739 if (!ele) {
655 Py_DECREF(key);
740 Py_DECREF(key);
@@ -671,14 +756,24 b' static PyObject *bunser_object(const cha'
671 return res;
756 return res;
672 }
757 }
673
758
674 static PyObject *bunser_template(const char **ptr, const char *end,
759 static PyObject*
675 int mutable)
760 bunser_template(const char** ptr, const char* end, const unser_ctx_t* ctx) {
676 {
761 const char* buf = *ptr;
677 const char *buf = *ptr;
678 int64_t nitems, i;
762 int64_t nitems, i;
679 PyObject *arrval;
763 int mutable = ctx->mutable;
680 PyObject *keys;
764 PyObject* arrval;
765 PyObject* keys;
681 Py_ssize_t numkeys, keyidx;
766 Py_ssize_t numkeys, keyidx;
767 unser_ctx_t keys_ctx = {0};
768 if (mutable) {
769 keys_ctx.mutable = 1;
770 // Decode keys as UTF-8 in this case.
771 keys_ctx.value_encoding = "utf-8";
772 keys_ctx.value_errors = "strict";
773 } else {
774 // Treat keys as bytestrings in this case -- we'll do Unicode conversions at
775 // lookup time.
776 }
682
777
683 if (buf[1] != BSER_ARRAY) {
778 if (buf[1] != BSER_ARRAY) {
684 PyErr_Format(PyExc_ValueError, "Expect ARRAY to follow TEMPLATE");
779 PyErr_Format(PyExc_ValueError, "Expect ARRAY to follow TEMPLATE");
@@ -689,8 +784,9 b' static PyObject *bunser_template(const c'
689 buf++;
784 buf++;
690 *ptr = buf;
785 *ptr = buf;
691
786
692 // Load template keys
787 // Load template keys.
693 keys = bunser_array(ptr, end, mutable);
788 // For keys we don't want to do any decoding right now.
789 keys = bunser_array(ptr, end, &keys_ctx);
694 if (!keys) {
790 if (!keys) {
695 return NULL;
791 return NULL;
696 }
792 }
@@ -716,8 +812,8 b' static PyObject *bunser_template(const c'
716 }
812 }
717
813
718 for (i = 0; i < nitems; i++) {
814 for (i = 0; i < nitems; i++) {
719 PyObject *dict = NULL;
815 PyObject* dict = NULL;
720 bserObject *obj = NULL;
816 bserObject* obj = NULL;
721
817
722 if (mutable) {
818 if (mutable) {
723 dict = PyDict_New();
819 dict = PyDict_New();
@@ -731,22 +827,22 b' static PyObject *bunser_template(const c'
731 dict = (PyObject*)obj;
827 dict = (PyObject*)obj;
732 }
828 }
733 if (!dict) {
829 if (!dict) {
734 fail:
830 fail:
735 Py_DECREF(keys);
831 Py_DECREF(keys);
736 Py_DECREF(arrval);
832 Py_DECREF(arrval);
737 return NULL;
833 return NULL;
738 }
834 }
739
835
740 for (keyidx = 0; keyidx < numkeys; keyidx++) {
836 for (keyidx = 0; keyidx < numkeys; keyidx++) {
741 PyObject *key;
837 PyObject* key;
742 PyObject *ele;
838 PyObject* ele;
743
839
744 if (**ptr == BSER_SKIP) {
840 if (**ptr == BSER_SKIP) {
745 *ptr = *ptr + 1;
841 *ptr = *ptr + 1;
746 ele = Py_None;
842 ele = Py_None;
747 Py_INCREF(ele);
843 Py_INCREF(ele);
748 } else {
844 } else {
749 ele = bser_loads_recursive(ptr, end, mutable);
845 ele = bser_loads_recursive(ptr, end, ctx);
750 }
846 }
751
847
752 if (!ele) {
848 if (!ele) {
@@ -772,34 +868,38 b' fail:'
772 return arrval;
868 return arrval;
773 }
869 }
774
870
775 static PyObject *bser_loads_recursive(const char **ptr, const char *end,
871 static PyObject* bser_loads_recursive(
776 int mutable)
872 const char** ptr,
777 {
873 const char* end,
778 const char *buf = *ptr;
874 const unser_ctx_t* ctx) {
875 const char* buf = *ptr;
779
876
780 switch (buf[0]) {
877 switch (buf[0]) {
781 case BSER_INT8:
878 case BSER_INT8:
782 case BSER_INT16:
879 case BSER_INT16:
783 case BSER_INT32:
880 case BSER_INT32:
784 case BSER_INT64:
881 case BSER_INT64: {
785 {
882 int64_t ival;
786 int64_t ival;
883 if (!bunser_int(ptr, end, &ival)) {
787 if (!bunser_int(ptr, end, &ival)) {
884 return NULL;
788 return NULL;
789 }
790 if (ival < LONG_MIN || ival > LONG_MAX) {
791 return PyLong_FromLongLong(ival);
792 }
793 return PyInt_FromSsize_t(Py_SAFE_DOWNCAST(ival, int64_t, Py_ssize_t));
794 }
885 }
886 // Python 3 has one integer type.
887 #if PY_MAJOR_VERSION >= 3
888 return PyLong_FromLongLong(ival);
889 #else
890 if (ival < LONG_MIN || ival > LONG_MAX) {
891 return PyLong_FromLongLong(ival);
892 }
893 return PyInt_FromSsize_t(Py_SAFE_DOWNCAST(ival, int64_t, Py_ssize_t));
894 #endif // PY_MAJOR_VERSION >= 3
895 }
795
896
796 case BSER_REAL:
897 case BSER_REAL: {
797 {
898 double dval;
798 double dval;
899 memcpy(&dval, buf + 1, sizeof(dval));
799 memcpy(&dval, buf + 1, sizeof(dval));
900 *ptr = buf + 1 + sizeof(double);
800 *ptr = buf + 1 + sizeof(double);
901 return PyFloat_FromDouble(dval);
801 return PyFloat_FromDouble(dval);
902 }
802 }
803
903
804 case BSER_TRUE:
904 case BSER_TRUE:
805 *ptr = buf + 1;
905 *ptr = buf + 1;
@@ -816,31 +916,51 b' static PyObject *bser_loads_recursive(co'
816 Py_INCREF(Py_None);
916 Py_INCREF(Py_None);
817 return Py_None;
917 return Py_None;
818
918
819 case BSER_STRING:
919 case BSER_BYTESTRING: {
820 {
920 const char* start;
821 const char *start;
921 int64_t len;
822 int64_t len;
823
922
824 if (!bunser_string(ptr, end, &start, &len)) {
923 if (!bunser_bytestring(ptr, end, &start, &len)) {
825 return NULL;
924 return NULL;
826 }
925 }
827
926
828 if (len > LONG_MAX) {
927 if (len > LONG_MAX) {
829 PyErr_Format(PyExc_ValueError, "string too long for python");
928 PyErr_Format(PyExc_ValueError, "string too long for python");
830 return NULL;
929 return NULL;
831 }
832
833 return PyString_FromStringAndSize(start, (long)len);
834 }
930 }
835
931
932 if (ctx->value_encoding != NULL) {
933 return PyUnicode_Decode(
934 start, (long)len, ctx->value_encoding, ctx->value_errors);
935 } else {
936 return PyBytes_FromStringAndSize(start, (long)len);
937 }
938 }
939
940 case BSER_UTF8STRING: {
941 const char* start;
942 int64_t len;
943
944 if (!bunser_bytestring(ptr, end, &start, &len)) {
945 return NULL;
946 }
947
948 if (len > LONG_MAX) {
949 PyErr_Format(PyExc_ValueError, "string too long for python");
950 return NULL;
951 }
952
953 return PyUnicode_Decode(start, (long)len, "utf-8", "strict");
954 }
955
836 case BSER_ARRAY:
956 case BSER_ARRAY:
837 return bunser_array(ptr, end, mutable);
957 return bunser_array(ptr, end, ctx);
838
958
839 case BSER_OBJECT:
959 case BSER_OBJECT:
840 return bunser_object(ptr, end, mutable);
960 return bunser_object(ptr, end, ctx);
841
961
842 case BSER_TEMPLATE:
962 case BSER_TEMPLATE:
843 return bunser_template(ptr, end, mutable);
963 return bunser_template(ptr, end, ctx);
844
964
845 default:
965 default:
846 PyErr_Format(PyExc_ValueError, "unhandled bser opcode 0x%02x", buf[0]);
966 PyErr_Format(PyExc_ValueError, "unhandled bser opcode 0x%02x", buf[0]);
@@ -849,102 +969,244 b' static PyObject *bser_loads_recursive(co'
849 return NULL;
969 return NULL;
850 }
970 }
851
971
852 // Expected use case is to read a packet from the socket and
972 static int _pdu_info_helper(
853 // then call bser.pdu_len on the packet. It returns the total
973 const char* data,
854 // length of the entire response that the peer is sending,
974 const char* end,
855 // including the bytes already received. This allows the client
975 uint32_t* bser_version_out,
856 // to compute the data size it needs to read before it can
976 uint32_t* bser_capabilities_out,
857 // decode the data
977 int64_t* expected_len_out,
858 static PyObject *bser_pdu_len(PyObject *self, PyObject *args)
978 off_t* position_out) {
859 {
979 uint32_t bser_version;
860 const char *start = NULL;
980 uint32_t bser_capabilities = 0;
861 const char *data = NULL;
981 int64_t expected_len;
862 int datalen = 0;
863 const char *end;
864 int64_t expected_len, total_len;
865
982
866 if (!PyArg_ParseTuple(args, "s#", &start, &datalen)) {
983 const char* start;
867 return NULL;
984 start = data;
868 }
869 data = start;
870 end = data + datalen;
871
872 // Validate the header and length
985 // Validate the header and length
873 if (memcmp(data, EMPTY_HEADER, 2) != 0) {
986 if (memcmp(data, EMPTY_HEADER, 2) == 0) {
987 bser_version = 1;
988 } else if (memcmp(data, EMPTY_HEADER_V2, 2) == 0) {
989 bser_version = 2;
990 } else {
874 PyErr_SetString(PyExc_ValueError, "invalid bser header");
991 PyErr_SetString(PyExc_ValueError, "invalid bser header");
875 return NULL;
992 return 0;
876 }
993 }
877
994
878 data += 2;
995 data += 2;
879
996
997 if (bser_version == 2) {
998 // Expect an integer telling us what capabilities are supported by the
999 // remote server (currently unused).
1000 if (!memcpy(&bser_capabilities, &data, sizeof(bser_capabilities))) {
1001 return 0;
1002 }
1003 data += sizeof(bser_capabilities);
1004 }
1005
880 // Expect an integer telling us how big the rest of the data
1006 // Expect an integer telling us how big the rest of the data
881 // should be
1007 // should be
882 if (!bunser_int(&data, end, &expected_len)) {
1008 if (!bunser_int(&data, end, &expected_len)) {
1009 return 0;
1010 }
1011
1012 *bser_version_out = bser_version;
1013 *bser_capabilities_out = (uint32_t)bser_capabilities;
1014 *expected_len_out = expected_len;
1015 *position_out = (off_t)(data - start);
1016 return 1;
1017 }
1018
1019 // This function parses the PDU header and provides info about the packet
1020 // Returns false if unsuccessful
1021 static int pdu_info_helper(
1022 PyObject* self,
1023 PyObject* args,
1024 uint32_t* bser_version_out,
1025 uint32_t* bser_capabilities_out,
1026 int64_t* total_len_out) {
1027 const char* start = NULL;
1028 const char* data = NULL;
1029 int datalen = 0;
1030 const char* end;
1031 int64_t expected_len;
1032 off_t position;
1033
1034 if (!PyArg_ParseTuple(args, "s#", &start, &datalen)) {
1035 return 0;
1036 }
1037 data = start;
1038 end = data + datalen;
1039
1040 if (!_pdu_info_helper(
1041 data,
1042 end,
1043 bser_version_out,
1044 bser_capabilities_out,
1045 &expected_len,
1046 &position)) {
1047 return 0;
1048 }
1049 *total_len_out = (int64_t)(expected_len + position);
1050 return 1;
1051 }
1052
1053 // Expected use case is to read a packet from the socket and then call
1054 // bser.pdu_info on the packet. It returns the BSER version, BSER capabilities,
1055 // and the total length of the entire response that the peer is sending,
1056 // including the bytes already received. This allows the client to compute the
1057 // data size it needs to read before it can decode the data.
1058 static PyObject* bser_pdu_info(PyObject* self, PyObject* args) {
1059 uint32_t version, capabilities;
1060 int64_t total_len;
1061 if (!pdu_info_helper(self, args, &version, &capabilities, &total_len)) {
1062 return NULL;
1063 }
1064 return Py_BuildValue("kkL", version, capabilities, total_len);
1065 }
1066
1067 static PyObject* bser_pdu_len(PyObject* self, PyObject* args) {
1068 uint32_t version, capabilities;
1069 int64_t total_len;
1070 if (!pdu_info_helper(self, args, &version, &capabilities, &total_len)) {
1071 return NULL;
1072 }
1073 return Py_BuildValue("L", total_len);
1074 }
1075
1076 static PyObject* bser_loads(PyObject* self, PyObject* args, PyObject* kw) {
1077 const char* data = NULL;
1078 int datalen = 0;
1079 const char* start;
1080 const char* end;
1081 int64_t expected_len;
1082 off_t position;
1083 PyObject* mutable_obj = NULL;
1084 const char* value_encoding = NULL;
1085 const char* value_errors = NULL;
1086 unser_ctx_t ctx = {1, 0};
1087
1088 static char* kw_list[] = {
1089 "buf", "mutable", "value_encoding", "value_errors", NULL};
1090
1091 if (!PyArg_ParseTupleAndKeywords(
1092 args,
1093 kw,
1094 "s#|Ozz:loads",
1095 kw_list,
1096 &start,
1097 &datalen,
1098 &mutable_obj,
1099 &value_encoding,
1100 &value_errors)) {
883 return NULL;
1101 return NULL;
884 }
1102 }
885
1103
886 total_len = expected_len + (data - start);
1104 if (mutable_obj) {
887 if (total_len > LONG_MAX) {
1105 ctx.mutable = PyObject_IsTrue(mutable_obj) > 0 ? 1 : 0;
888 return PyLong_FromLongLong(total_len);
889 }
1106 }
890 return PyInt_FromLong((long)total_len);
1107 ctx.value_encoding = value_encoding;
891 }
1108 if (value_encoding == NULL) {
892
1109 ctx.value_errors = NULL;
893 static PyObject *bser_loads(PyObject *self, PyObject *args)
1110 } else if (value_errors == NULL) {
894 {
1111 ctx.value_errors = "strict";
895 const char *data = NULL;
1112 } else {
896 int datalen = 0;
1113 ctx.value_errors = value_errors;
897 const char *end;
898 int64_t expected_len;
899 int mutable = 1;
900 PyObject *mutable_obj = NULL;
901
902 if (!PyArg_ParseTuple(args, "s#|O:loads", &data, &datalen, &mutable_obj)) {
903 return NULL;
904 }
1114 }
905 if (mutable_obj) {
1115 data = start;
906 mutable = PyObject_IsTrue(mutable_obj) > 0 ? 1 : 0;
907 }
908
909 end = data + datalen;
1116 end = data + datalen;
910
1117
911 // Validate the header and length
1118 if (!_pdu_info_helper(
912 if (memcmp(data, EMPTY_HEADER, 2) != 0) {
1119 data,
913 PyErr_SetString(PyExc_ValueError, "invalid bser header");
1120 end,
1121 &ctx.bser_version,
1122 &ctx.bser_capabilities,
1123 &expected_len,
1124 &position)) {
914 return NULL;
1125 return NULL;
915 }
1126 }
916
1127
917 data += 2;
1128 data = start + position;
918
919 // Expect an integer telling us how big the rest of the data
920 // should be
921 if (!bunser_int(&data, end, &expected_len)) {
922 return NULL;
923 }
924
925 // Verify
1129 // Verify
926 if (expected_len + data != end) {
1130 if (expected_len + data != end) {
927 PyErr_SetString(PyExc_ValueError, "bser data len != header len");
1131 PyErr_SetString(PyExc_ValueError, "bser data len != header len");
928 return NULL;
1132 return NULL;
929 }
1133 }
930
1134
931 return bser_loads_recursive(&data, end, mutable);
1135 return bser_loads_recursive(&data, end, &ctx);
932 }
1136 }
933
1137
1138 static PyObject* bser_load(PyObject* self, PyObject* args, PyObject* kw) {
1139 PyObject *load, *string;
1140 PyObject* fp = NULL;
1141 PyObject* mutable_obj = NULL;
1142 const char* value_encoding = NULL;
1143 const char* value_errors = NULL;
1144
1145 static char* kw_list[] = {
1146 "fp", "mutable", "value_encoding", "value_errors", NULL};
1147
1148 if (!PyArg_ParseTupleAndKeywords(
1149 args,
1150 kw,
1151 "OOzz:load",
1152 kw_list,
1153 &fp,
1154 &mutable_obj,
1155 &value_encoding,
1156 &value_errors)) {
1157 return NULL;
1158 }
1159
1160 load = PyImport_ImportModule("pywatchman.load");
1161 if (load == NULL) {
1162 return NULL;
1163 }
1164 string = PyObject_CallMethod(
1165 load, "load", "OOzz", fp, mutable_obj, value_encoding, value_errors);
1166 Py_DECREF(load);
1167 return string;
1168 }
1169
1170 // clang-format off
934 static PyMethodDef bser_methods[] = {
1171 static PyMethodDef bser_methods[] = {
935 {"loads", bser_loads, METH_VARARGS, "Deserialize string."},
1172 {"loads", (PyCFunction)bser_loads, METH_VARARGS | METH_KEYWORDS,
936 {"pdu_len", bser_pdu_len, METH_VARARGS, "Extract PDU length."},
1173 "Deserialize string."},
937 {"dumps", bser_dumps, METH_VARARGS, "Serialize string."},
1174 {"load", (PyCFunction)bser_load, METH_VARARGS | METH_KEYWORDS,
1175 "Deserialize a file object"},
1176 {"pdu_info", (PyCFunction)bser_pdu_info, METH_VARARGS,
1177 "Extract PDU information."},
1178 {"pdu_len", (PyCFunction)bser_pdu_len, METH_VARARGS,
1179 "Extract total PDU length."},
1180 {"dumps", (PyCFunction)bser_dumps, METH_VARARGS | METH_KEYWORDS,
1181 "Serialize string."},
938 {NULL, NULL, 0, NULL}
1182 {NULL, NULL, 0, NULL}
939 };
1183 };
940
1184
941 PyMODINIT_FUNC initbser(void)
1185 #if PY_MAJOR_VERSION >= 3
942 {
1186 static struct PyModuleDef bser_module = {
1187 PyModuleDef_HEAD_INIT,
1188 "bser",
1189 "Efficient encoding and decoding of BSER.",
1190 -1,
1191 bser_methods
1192 };
1193 // clang-format on
1194
1195 PyMODINIT_FUNC PyInit_bser(void) {
1196 PyObject* mod;
1197
1198 mod = PyModule_Create(&bser_module);
1199 PyType_Ready(&bserObjectType);
1200
1201 return mod;
1202 }
1203 #else
1204
1205 PyMODINIT_FUNC initbser(void) {
943 (void)Py_InitModule("bser", bser_methods);
1206 (void)Py_InitModule("bser", bser_methods);
944 PyType_Ready(&bserObjectType);
1207 PyType_Ready(&bserObjectType);
945 }
1208 }
1209 #endif // PY_MAJOR_VERSION >= 3
946
1210
947 /* vim:ts=2:sw=2:et:
1211 /* vim:ts=2:sw=2:et:
948 */
1212 */
949
950 // no-check-code -- this is a 3rd party library
@@ -26,6 +26,11 b''
26 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
28
29 from __future__ import absolute_import
30 from __future__ import division
31 from __future__ import print_function
32 # no unicode literals
33
29 import re
34 import re
30
35
31 def parse_version(vstr):
36 def parse_version(vstr):
@@ -65,5 +70,3 b' def synthesize(vers, opts):'
65 vers['error'] = 'client required capability `' + name + \
70 vers['error'] = 'client required capability `' + name + \
66 '` is not supported by this server'
71 '` is not supported by this server'
67 return vers
72 return vers
68
69 # no-check-code -- this is a 3rd party library
@@ -26,33 +26,51 b''
26 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
28
29 from __future__ import absolute_import
30 from __future__ import division
31 from __future__ import print_function
32 # no unicode literals
33
34 import binascii
29 import collections
35 import collections
30 import ctypes
36 import ctypes
31 import struct
37 import struct
32 import sys
38 import sys
33
39
34 BSER_ARRAY = '\x00'
40 from . import (
35 BSER_OBJECT = '\x01'
41 compat,
36 BSER_STRING = '\x02'
42 )
37 BSER_INT8 = '\x03'
43
38 BSER_INT16 = '\x04'
44 BSER_ARRAY = b'\x00'
39 BSER_INT32 = '\x05'
45 BSER_OBJECT = b'\x01'
40 BSER_INT64 = '\x06'
46 BSER_BYTESTRING = b'\x02'
41 BSER_REAL = '\x07'
47 BSER_INT8 = b'\x03'
42 BSER_TRUE = '\x08'
48 BSER_INT16 = b'\x04'
43 BSER_FALSE = '\x09'
49 BSER_INT32 = b'\x05'
44 BSER_NULL = '\x0a'
50 BSER_INT64 = b'\x06'
45 BSER_TEMPLATE = '\x0b'
51 BSER_REAL = b'\x07'
46 BSER_SKIP = '\x0c'
52 BSER_TRUE = b'\x08'
53 BSER_FALSE = b'\x09'
54 BSER_NULL = b'\x0a'
55 BSER_TEMPLATE = b'\x0b'
56 BSER_SKIP = b'\x0c'
57 BSER_UTF8STRING = b'\x0d'
58
59 if compat.PYTHON3:
60 STRING_TYPES = (str, bytes)
61 unicode = str
62 def tobytes(i):
63 return str(i).encode('ascii')
64 long = int
65 else:
66 STRING_TYPES = (unicode, str)
67 tobytes = bytes
47
68
48 # Leave room for the serialization header, which includes
69 # Leave room for the serialization header, which includes
49 # our overall length. To make things simpler, we'll use an
70 # our overall length. To make things simpler, we'll use an
50 # int32 for the header
71 # int32 for the header
51 EMPTY_HEADER = "\x00\x01\x05\x00\x00\x00\x00"
72 EMPTY_HEADER = b"\x00\x01\x05\x00\x00\x00\x00"
52
73 EMPTY_HEADER_V2 = b"\x00\x02\x00\x00\x00\x00\x05\x00\x00\x00\x00"
53 # Python 3 conditional for supporting Python 2's int/long types
54 if sys.version_info > (3,):
55 long = int
56
74
57 def _int_size(x):
75 def _int_size(x):
58 """Return the smallest size int that can store the value"""
76 """Return the smallest size int that can store the value"""
@@ -67,13 +85,28 b' def _int_size(x):'
67 else:
85 else:
68 raise RuntimeError('Cannot represent value: ' + str(x))
86 raise RuntimeError('Cannot represent value: ' + str(x))
69
87
88 def _buf_pos(buf, pos):
89 ret = buf[pos]
90 # In Python 2, buf is a str array so buf[pos] is a string. In Python 3, buf
91 # is a bytes array and buf[pos] is an integer.
92 if compat.PYTHON3:
93 ret = bytes((ret,))
94 return ret
70
95
71 class _bser_buffer(object):
96 class _bser_buffer(object):
72
97
73 def __init__(self):
98 def __init__(self, version):
99 self.bser_version = version
74 self.buf = ctypes.create_string_buffer(8192)
100 self.buf = ctypes.create_string_buffer(8192)
75 struct.pack_into(str(len(EMPTY_HEADER)) + 's', self.buf, 0, EMPTY_HEADER)
101 if self.bser_version == 1:
76 self.wpos = len(EMPTY_HEADER)
102 struct.pack_into(tobytes(len(EMPTY_HEADER)) + b's', self.buf, 0,
103 EMPTY_HEADER)
104 self.wpos = len(EMPTY_HEADER)
105 else:
106 assert self.bser_version == 2
107 struct.pack_into(tobytes(len(EMPTY_HEADER_V2)) + b's', self.buf, 0,
108 EMPTY_HEADER_V2)
109 self.wpos = len(EMPTY_HEADER_V2)
77
110
78 def ensure_size(self, size):
111 def ensure_size(self, size):
79 while ctypes.sizeof(self.buf) - self.wpos < size:
112 while ctypes.sizeof(self.buf) - self.wpos < size:
@@ -84,13 +117,13 b' class _bser_buffer(object):'
84 to_write = size + 1
117 to_write = size + 1
85 self.ensure_size(to_write)
118 self.ensure_size(to_write)
86 if size == 1:
119 if size == 1:
87 struct.pack_into('=cb', self.buf, self.wpos, BSER_INT8, val)
120 struct.pack_into(b'=cb', self.buf, self.wpos, BSER_INT8, val)
88 elif size == 2:
121 elif size == 2:
89 struct.pack_into('=ch', self.buf, self.wpos, BSER_INT16, val)
122 struct.pack_into(b'=ch', self.buf, self.wpos, BSER_INT16, val)
90 elif size == 4:
123 elif size == 4:
91 struct.pack_into('=ci', self.buf, self.wpos, BSER_INT32, val)
124 struct.pack_into(b'=ci', self.buf, self.wpos, BSER_INT32, val)
92 elif size == 8:
125 elif size == 8:
93 struct.pack_into('=cq', self.buf, self.wpos, BSER_INT64, val)
126 struct.pack_into(b'=cq', self.buf, self.wpos, BSER_INT64, val)
94 else:
127 else:
95 raise RuntimeError('Cannot represent this long value')
128 raise RuntimeError('Cannot represent this long value')
96 self.wpos += to_write
129 self.wpos += to_write
@@ -104,13 +137,17 b' class _bser_buffer(object):'
104 to_write = 2 + size + s_len
137 to_write = 2 + size + s_len
105 self.ensure_size(to_write)
138 self.ensure_size(to_write)
106 if size == 1:
139 if size == 1:
107 struct.pack_into('=ccb' + str(s_len) + 's', self.buf, self.wpos, BSER_STRING, BSER_INT8, s_len, s)
140 struct.pack_into(b'=ccb' + tobytes(s_len) + b's', self.buf,
141 self.wpos, BSER_BYTESTRING, BSER_INT8, s_len, s)
108 elif size == 2:
142 elif size == 2:
109 struct.pack_into('=cch' + str(s_len) + 's', self.buf, self.wpos, BSER_STRING, BSER_INT16, s_len, s)
143 struct.pack_into(b'=cch' + tobytes(s_len) + b's', self.buf,
144 self.wpos, BSER_BYTESTRING, BSER_INT16, s_len, s)
110 elif size == 4:
145 elif size == 4:
111 struct.pack_into('=cci' + str(s_len) + 's', self.buf, self.wpos, BSER_STRING, BSER_INT32, s_len, s)
146 struct.pack_into(b'=cci' + tobytes(s_len) + b's', self.buf,
147 self.wpos, BSER_BYTESTRING, BSER_INT32, s_len, s)
112 elif size == 8:
148 elif size == 8:
113 struct.pack_into('=ccq' + str(s_len) + 's', self.buf, self.wpos, BSER_STRING, BSER_INT64, s_len, s)
149 struct.pack_into(b'=ccq' + tobytes(s_len) + b's', self.buf,
150 self.wpos, BSER_BYTESTRING, BSER_INT64, s_len, s)
114 else:
151 else:
115 raise RuntimeError('Cannot represent this string value')
152 raise RuntimeError('Cannot represent this string value')
116 self.wpos += to_write
153 self.wpos += to_write
@@ -124,54 +161,68 b' class _bser_buffer(object):'
124 to_encode = BSER_TRUE
161 to_encode = BSER_TRUE
125 else:
162 else:
126 to_encode = BSER_FALSE
163 to_encode = BSER_FALSE
127 struct.pack_into('=c', self.buf, self.wpos, to_encode)
164 struct.pack_into(b'=c', self.buf, self.wpos, to_encode)
128 self.wpos += needed
165 self.wpos += needed
129 elif val is None:
166 elif val is None:
130 needed = 1
167 needed = 1
131 self.ensure_size(needed)
168 self.ensure_size(needed)
132 struct.pack_into('=c', self.buf, self.wpos, BSER_NULL)
169 struct.pack_into(b'=c', self.buf, self.wpos, BSER_NULL)
133 self.wpos += needed
170 self.wpos += needed
134 elif isinstance(val, (int, long)):
171 elif isinstance(val, (int, long)):
135 self.append_long(val)
172 self.append_long(val)
136 elif isinstance(val, (str, unicode)):
173 elif isinstance(val, STRING_TYPES):
137 self.append_string(val)
174 self.append_string(val)
138 elif isinstance(val, float):
175 elif isinstance(val, float):
139 needed = 9
176 needed = 9
140 self.ensure_size(needed)
177 self.ensure_size(needed)
141 struct.pack_into('=cd', self.buf, self.wpos, BSER_REAL, val)
178 struct.pack_into(b'=cd', self.buf, self.wpos, BSER_REAL, val)
142 self.wpos += needed
179 self.wpos += needed
143 elif isinstance(val, collections.Mapping) and isinstance(val, collections.Sized):
180 elif isinstance(val, collections.Mapping) and \
181 isinstance(val, collections.Sized):
144 val_len = len(val)
182 val_len = len(val)
145 size = _int_size(val_len)
183 size = _int_size(val_len)
146 needed = 2 + size
184 needed = 2 + size
147 self.ensure_size(needed)
185 self.ensure_size(needed)
148 if size == 1:
186 if size == 1:
149 struct.pack_into('=ccb', self.buf, self.wpos, BSER_OBJECT, BSER_INT8, val_len)
187 struct.pack_into(b'=ccb', self.buf, self.wpos, BSER_OBJECT,
188 BSER_INT8, val_len)
150 elif size == 2:
189 elif size == 2:
151 struct.pack_into('=cch', self.buf, self.wpos, BSER_OBJECT, BSER_INT16, val_len)
190 struct.pack_into(b'=cch', self.buf, self.wpos, BSER_OBJECT,
191 BSER_INT16, val_len)
152 elif size == 4:
192 elif size == 4:
153 struct.pack_into('=cci', self.buf, self.wpos, BSER_OBJECT, BSER_INT32, val_len)
193 struct.pack_into(b'=cci', self.buf, self.wpos, BSER_OBJECT,
194 BSER_INT32, val_len)
154 elif size == 8:
195 elif size == 8:
155 struct.pack_into('=ccq', self.buf, self.wpos, BSER_OBJECT, BSER_INT64, val_len)
196 struct.pack_into(b'=ccq', self.buf, self.wpos, BSER_OBJECT,
197 BSER_INT64, val_len)
156 else:
198 else:
157 raise RuntimeError('Cannot represent this mapping value')
199 raise RuntimeError('Cannot represent this mapping value')
158 self.wpos += needed
200 self.wpos += needed
159 for k, v in val.iteritems():
201 if compat.PYTHON3:
202 iteritems = val.items()
203 else:
204 iteritems = val.iteritems()
205 for k, v in iteritems:
160 self.append_string(k)
206 self.append_string(k)
161 self.append_recursive(v)
207 self.append_recursive(v)
162 elif isinstance(val, collections.Iterable) and isinstance(val, collections.Sized):
208 elif isinstance(val, collections.Iterable) and \
209 isinstance(val, collections.Sized):
163 val_len = len(val)
210 val_len = len(val)
164 size = _int_size(val_len)
211 size = _int_size(val_len)
165 needed = 2 + size
212 needed = 2 + size
166 self.ensure_size(needed)
213 self.ensure_size(needed)
167 if size == 1:
214 if size == 1:
168 struct.pack_into('=ccb', self.buf, self.wpos, BSER_ARRAY, BSER_INT8, val_len)
215 struct.pack_into(b'=ccb', self.buf, self.wpos, BSER_ARRAY,
216 BSER_INT8, val_len)
169 elif size == 2:
217 elif size == 2:
170 struct.pack_into('=cch', self.buf, self.wpos, BSER_ARRAY, BSER_INT16, val_len)
218 struct.pack_into(b'=cch', self.buf, self.wpos, BSER_ARRAY,
219 BSER_INT16, val_len)
171 elif size == 4:
220 elif size == 4:
172 struct.pack_into('=cci', self.buf, self.wpos, BSER_ARRAY, BSER_INT32, val_len)
221 struct.pack_into(b'=cci', self.buf, self.wpos, BSER_ARRAY,
222 BSER_INT32, val_len)
173 elif size == 8:
223 elif size == 8:
174 struct.pack_into('=ccq', self.buf, self.wpos, BSER_ARRAY, BSER_INT64, val_len)
224 struct.pack_into(b'=ccq', self.buf, self.wpos, BSER_ARRAY,
225 BSER_INT64, val_len)
175 else:
226 else:
176 raise RuntimeError('Cannot represent this sequence value')
227 raise RuntimeError('Cannot represent this sequence value')
177 self.wpos += needed
228 self.wpos += needed
@@ -181,56 +232,18 b' class _bser_buffer(object):'
181 raise RuntimeError('Cannot represent unknown value type')
232 raise RuntimeError('Cannot represent unknown value type')
182
233
183
234
184 def dumps(obj):
235 def dumps(obj, version=1, capabilities=0):
185 bser_buf = _bser_buffer()
236 bser_buf = _bser_buffer(version=version)
186 bser_buf.append_recursive(obj)
237 bser_buf.append_recursive(obj)
187 # Now fill in the overall length
238 # Now fill in the overall length
188 obj_len = bser_buf.wpos - len(EMPTY_HEADER)
239 if version == 1:
189 struct.pack_into('=i', bser_buf.buf, 3, obj_len)
240 obj_len = bser_buf.wpos - len(EMPTY_HEADER)
190 return bser_buf.buf.raw[:bser_buf.wpos]
241 struct.pack_into(b'=i', bser_buf.buf, 3, obj_len)
191
192
193 def _bunser_int(buf, pos):
194 try:
195 int_type = buf[pos]
196 except IndexError:
197 raise ValueError('Invalid bser int encoding, pos out of range')
198 if int_type == BSER_INT8:
199 needed = 2
200 fmt = '=b'
201 elif int_type == BSER_INT16:
202 needed = 3
203 fmt = '=h'
204 elif int_type == BSER_INT32:
205 needed = 5
206 fmt = '=i'
207 elif int_type == BSER_INT64:
208 needed = 9
209 fmt = '=q'
210 else:
242 else:
211 raise ValueError('Invalid bser int encoding 0x%02x' % int(int_type))
243 obj_len = bser_buf.wpos - len(EMPTY_HEADER_V2)
212 int_val = struct.unpack_from(fmt, buf, pos + 1)[0]
244 struct.pack_into(b'=i', bser_buf.buf, 2, capabilities)
213 return (int_val, pos + needed)
245 struct.pack_into(b'=i', bser_buf.buf, 7, obj_len)
214
246 return bser_buf.buf.raw[:bser_buf.wpos]
215
216 def _bunser_string(buf, pos):
217 str_len, pos = _bunser_int(buf, pos + 1)
218 str_val = struct.unpack_from(str(str_len) + 's', buf, pos)[0]
219 return (str_val, pos + str_len)
220
221
222 def _bunser_array(buf, pos, mutable=True):
223 arr_len, pos = _bunser_int(buf, pos + 1)
224 arr = []
225 for i in range(arr_len):
226 arr_item, pos = _bser_loads_recursive(buf, pos, mutable)
227 arr.append(arr_item)
228
229 if not mutable:
230 arr = tuple(arr)
231
232 return arr, pos
233
234
247
235 # This is a quack-alike with the bserObjectType in bser.c
248 # This is a quack-alike with the bserObjectType in bser.c
236 # It provides by getattr accessors and getitem for both index
249 # It provides by getattr accessors and getitem for both index
@@ -260,100 +273,212 b' class _BunserDict(object):'
260 def __len__(self):
273 def __len__(self):
261 return len(self._keys)
274 return len(self._keys)
262
275
263 def _bunser_object(buf, pos, mutable=True):
276 class Bunser(object):
264 obj_len, pos = _bunser_int(buf, pos + 1)
277 def __init__(self, mutable=True, value_encoding=None, value_errors=None):
265 if mutable:
278 self.mutable = mutable
266 obj = {}
279 self.value_encoding = value_encoding
267 else:
280
268 keys = []
281 if value_encoding is None:
269 vals = []
282 self.value_errors = None
283 elif value_errors is None:
284 self.value_errors = 'strict'
285 else:
286 self.value_errors = value_errors
270
287
271 for i in range(obj_len):
288 @staticmethod
272 key, pos = _bunser_string(buf, pos)
289 def unser_int(buf, pos):
273 val, pos = _bser_loads_recursive(buf, pos, mutable)
290 try:
274 if mutable:
291 int_type = _buf_pos(buf, pos)
275 obj[key] = val
292 except IndexError:
293 raise ValueError('Invalid bser int encoding, pos out of range')
294 if int_type == BSER_INT8:
295 needed = 2
296 fmt = b'=b'
297 elif int_type == BSER_INT16:
298 needed = 3
299 fmt = b'=h'
300 elif int_type == BSER_INT32:
301 needed = 5
302 fmt = b'=i'
303 elif int_type == BSER_INT64:
304 needed = 9
305 fmt = b'=q'
276 else:
306 else:
277 keys.append(key)
307 raise ValueError('Invalid bser int encoding 0x%s' %
278 vals.append(val)
308 binascii.hexlify(int_type).decode('ascii'))
309 int_val = struct.unpack_from(fmt, buf, pos + 1)[0]
310 return (int_val, pos + needed)
279
311
280 if not mutable:
312 def unser_utf8_string(self, buf, pos):
281 obj = _BunserDict(keys, vals)
313 str_len, pos = self.unser_int(buf, pos + 1)
282
314 str_val = struct.unpack_from(tobytes(str_len) + b's', buf, pos)[0]
283 return obj, pos
315 return (str_val.decode('utf-8'), pos + str_len)
284
285
316
286 def _bunser_template(buf, pos, mutable=True):
317 def unser_bytestring(self, buf, pos):
287 if buf[pos + 1] != BSER_ARRAY:
318 str_len, pos = self.unser_int(buf, pos + 1)
288 raise RuntimeError('Expect ARRAY to follow TEMPLATE')
319 str_val = struct.unpack_from(tobytes(str_len) + b's', buf, pos)[0]
289 keys, pos = _bunser_array(buf, pos + 1)
320 if self.value_encoding is not None:
290 nitems, pos = _bunser_int(buf, pos)
321 str_val = str_val.decode(self.value_encoding, self.value_errors)
291 arr = []
322 # str_len stays the same because that's the length in bytes
292 for i in range(nitems):
323 return (str_val, pos + str_len)
293 if mutable:
324
325 def unser_array(self, buf, pos):
326 arr_len, pos = self.unser_int(buf, pos + 1)
327 arr = []
328 for i in range(arr_len):
329 arr_item, pos = self.loads_recursive(buf, pos)
330 arr.append(arr_item)
331
332 if not self.mutable:
333 arr = tuple(arr)
334
335 return arr, pos
336
337 def unser_object(self, buf, pos):
338 obj_len, pos = self.unser_int(buf, pos + 1)
339 if self.mutable:
294 obj = {}
340 obj = {}
295 else:
341 else:
342 keys = []
296 vals = []
343 vals = []
297
344
298 for keyidx in range(len(keys)):
345 for i in range(obj_len):
299 if buf[pos] == BSER_SKIP:
346 key, pos = self.unser_utf8_string(buf, pos)
300 pos += 1
347 val, pos = self.loads_recursive(buf, pos)
301 ele = None
348 if self.mutable:
349 obj[key] = val
302 else:
350 else:
303 ele, pos = _bser_loads_recursive(buf, pos, mutable)
351 keys.append(key)
352 vals.append(val)
304
353
305 if mutable:
354 if not self.mutable:
306 key = keys[keyidx]
307 obj[key] = ele
308 else:
309 vals.append(ele)
310
311 if not mutable:
312 obj = _BunserDict(keys, vals)
355 obj = _BunserDict(keys, vals)
313
356
314 arr.append(obj)
357 return obj, pos
315 return arr, pos
358
359 def unser_template(self, buf, pos):
360 val_type = _buf_pos(buf, pos + 1)
361 if val_type != BSER_ARRAY:
362 raise RuntimeError('Expect ARRAY to follow TEMPLATE')
363 # force UTF-8 on keys
364 keys_bunser = Bunser(mutable=self.mutable, value_encoding='utf-8')
365 keys, pos = keys_bunser.unser_array(buf, pos + 1)
366 nitems, pos = self.unser_int(buf, pos)
367 arr = []
368 for i in range(nitems):
369 if self.mutable:
370 obj = {}
371 else:
372 vals = []
373
374 for keyidx in range(len(keys)):
375 if _buf_pos(buf, pos) == BSER_SKIP:
376 pos += 1
377 ele = None
378 else:
379 ele, pos = self.loads_recursive(buf, pos)
380
381 if self.mutable:
382 key = keys[keyidx]
383 obj[key] = ele
384 else:
385 vals.append(ele)
386
387 if not self.mutable:
388 obj = _BunserDict(keys, vals)
389
390 arr.append(obj)
391 return arr, pos
392
393 def loads_recursive(self, buf, pos):
394 val_type = _buf_pos(buf, pos)
395 if (val_type == BSER_INT8 or val_type == BSER_INT16 or
396 val_type == BSER_INT32 or val_type == BSER_INT64):
397 return self.unser_int(buf, pos)
398 elif val_type == BSER_REAL:
399 val = struct.unpack_from(b'=d', buf, pos + 1)[0]
400 return (val, pos + 9)
401 elif val_type == BSER_TRUE:
402 return (True, pos + 1)
403 elif val_type == BSER_FALSE:
404 return (False, pos + 1)
405 elif val_type == BSER_NULL:
406 return (None, pos + 1)
407 elif val_type == BSER_BYTESTRING:
408 return self.unser_bytestring(buf, pos)
409 elif val_type == BSER_UTF8STRING:
410 return self.unser_utf8_string(buf, pos)
411 elif val_type == BSER_ARRAY:
412 return self.unser_array(buf, pos)
413 elif val_type == BSER_OBJECT:
414 return self.unser_object(buf, pos)
415 elif val_type == BSER_TEMPLATE:
416 return self.unser_template(buf, pos)
417 else:
418 raise ValueError('unhandled bser opcode 0x%s' %
419 binascii.hexlify(val_type).decode('ascii'))
316
420
317
421
318 def _bser_loads_recursive(buf, pos, mutable=True):
422 def _pdu_info_helper(buf):
319 val_type = buf[pos]
423 bser_version = -1
320 if (val_type == BSER_INT8 or val_type == BSER_INT16 or
424 if buf[0:2] == EMPTY_HEADER[0:2]:
321 val_type == BSER_INT32 or val_type == BSER_INT64):
425 bser_version = 1
322 return _bunser_int(buf, pos)
426 bser_capabilities = 0
323 elif val_type == BSER_REAL:
427 expected_len, pos2 = Bunser.unser_int(buf, 2)
324 val = struct.unpack_from('=d', buf, pos + 1)[0]
428 elif buf[0:2] == EMPTY_HEADER_V2[0:2]:
325 return (val, pos + 9)
429 if len(buf) < 8:
326 elif val_type == BSER_TRUE:
430 raise ValueError('Invalid BSER header')
327 return (True, pos + 1)
431 bser_version = 2
328 elif val_type == BSER_FALSE:
432 bser_capabilities = struct.unpack_from("I", buf, 2)[0]
329 return (False, pos + 1)
433 expected_len, pos2 = Bunser.unser_int(buf, 6)
330 elif val_type == BSER_NULL:
331 return (None, pos + 1)
332 elif val_type == BSER_STRING:
333 return _bunser_string(buf, pos)
334 elif val_type == BSER_ARRAY:
335 return _bunser_array(buf, pos, mutable)
336 elif val_type == BSER_OBJECT:
337 return _bunser_object(buf, pos, mutable)
338 elif val_type == BSER_TEMPLATE:
339 return _bunser_template(buf, pos, mutable)
340 else:
434 else:
341 raise RuntimeError('unhandled bser opcode 0x%02x' % (val_type,))
435 raise ValueError('Invalid BSER header')
436
437 return bser_version, bser_capabilities, expected_len, pos2
438
439
440 def pdu_info(buf):
441 info = _pdu_info_helper(buf)
442 return info[0], info[1], info[2] + info[3]
342
443
343
444
344 def pdu_len(buf):
445 def pdu_len(buf):
345 if buf[0:2] != EMPTY_HEADER[0:2]:
446 info = _pdu_info_helper(buf)
346 raise RuntimeError('Invalid BSER header')
447 return info[2] + info[3]
347 expected_len, pos = _bunser_int(buf, 2)
348 return expected_len + pos
349
448
350
449
351 def loads(buf, mutable=True):
450 def loads(buf, mutable=True, value_encoding=None, value_errors=None):
352 if buf[0:2] != EMPTY_HEADER[0:2]:
451 """Deserialize a BSER-encoded blob.
353 raise RuntimeError('Invalid BSER header')
452
354 expected_len, pos = _bunser_int(buf, 2)
453 @param buf: The buffer to deserialize.
454 @type buf: bytes
455
456 @param mutable: Whether to return mutable results.
457 @type mutable: bool
458
459 @param value_encoding: Optional codec to use to decode values. If
460 unspecified or None, return values as bytestrings.
461 @type value_encoding: str
462
463 @param value_errors: Optional error handler for codec. 'strict' by default.
464 The other most common argument is 'surrogateescape' on
465 Python 3. If value_encoding is None, this is ignored.
466 @type value_errors: str
467 """
468
469 info = _pdu_info_helper(buf)
470 expected_len = info[2]
471 pos = info[3]
472
355 if len(buf) != expected_len + pos:
473 if len(buf) != expected_len + pos:
356 raise RuntimeError('bser data len != header len')
474 raise ValueError('bser data len != header len')
357 return _bser_loads_recursive(buf, pos, mutable)[0]
475
476 bunser = Bunser(mutable=mutable, value_encoding=value_encoding,
477 value_errors=value_errors)
358
478
359 # no-check-code -- this is a 3rd party library
479 return bunser.loads_recursive(buf, pos)[0]
480
481
482 def load(fp, mutable=True, value_encoding=None, value_errors=None):
483 from . import load
484 return load.load(fp, mutable, value_encoding, value_errors)
@@ -15,10 +15,6 b''
15 contrib/python-zstandard/tests/test_module_attributes.py not using absolute_import
15 contrib/python-zstandard/tests/test_module_attributes.py not using absolute_import
16 contrib/python-zstandard/tests/test_roundtrip.py not using absolute_import
16 contrib/python-zstandard/tests/test_roundtrip.py not using absolute_import
17 contrib/python-zstandard/tests/test_train_dictionary.py not using absolute_import
17 contrib/python-zstandard/tests/test_train_dictionary.py not using absolute_import
18 hgext/fsmonitor/pywatchman/__init__.py not using absolute_import
19 hgext/fsmonitor/pywatchman/__init__.py requires print_function
20 hgext/fsmonitor/pywatchman/capabilities.py not using absolute_import
21 hgext/fsmonitor/pywatchman/pybser.py not using absolute_import
22 i18n/check-translation.py not using absolute_import
18 i18n/check-translation.py not using absolute_import
23 setup.py not using absolute_import
19 setup.py not using absolute_import
24 tests/test-demandimport.py not using absolute_import
20 tests/test-demandimport.py not using absolute_import
1 NO CONTENT: file was removed
NO CONTENT: file was removed
General Comments 0
You need to be logged in to leave comments. Login now