##// END OF EJS Templates
fsmonitor: refresh pywatchman to upstream...
Zack Hricz -
r30656:16f4b341 default
parent child Browse files
Show More
@@ -0,0 +1,65 b''
1 # Copyright 2016-present Facebook, Inc.
2 # All rights reserved.
3 #
4 # Redistribution and use in source and binary forms, with or without
5 # modification, are permitted provided that the following conditions are met:
6 #
7 # * Redistributions of source code must retain the above copyright notice,
8 # this list of conditions and the following disclaimer.
9 #
10 # * Redistributions in binary form must reproduce the above copyright notice,
11 # this list of conditions and the following disclaimer in the documentation
12 # and/or other materials provided with the distribution.
13 #
14 # * Neither the name Facebook nor the names of its contributors may be used to
15 # endorse or promote products derived from this software without specific
16 # prior written permission.
17 #
18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29 from __future__ import absolute_import
30 from __future__ import division
31 from __future__ import print_function
32 # no unicode literals
33
34 '''Compatibility module across Python 2 and 3.'''
35
36 import sys
37
38 PYTHON3 = sys.version_info >= (3, 0)
39
40 # This is adapted from https://bitbucket.org/gutworth/six, and used under the
41 # MIT license. See LICENSE for a full copyright notice.
42 if PYTHON3:
43 def reraise(tp, value, tb=None):
44 try:
45 if value is None:
46 value = tp()
47 if value.__traceback__ is not tb:
48 raise value.with_traceback(tb)
49 raise value
50 finally:
51 value = None
52 tb = None
53 else:
54 exec('''
55 def reraise(tp, value, tb=None):
56 try:
57 raise tp, value, tb
58 finally:
59 tb = None
60 '''.strip())
61
62 if PYTHON3:
63 UNICODE = str
64 else:
65 UNICODE = unicode
@@ -0,0 +1,73 b''
1 # Copyright 2016-present Facebook, Inc.
2 # All rights reserved.
3 #
4 # Redistribution and use in source and binary forms, with or without
5 # modification, are permitted provided that the following conditions are met:
6 #
7 # * Redistributions of source code must retain the above copyright notice,
8 # this list of conditions and the following disclaimer.
9 #
10 # * Redistributions in binary form must reproduce the above copyright notice,
11 # this list of conditions and the following disclaimer in the documentation
12 # and/or other materials provided with the distribution.
13 #
14 # * Neither the name Facebook nor the names of its contributors may be used to
15 # endorse or promote products derived from this software without specific
16 # prior written permission.
17 #
18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29 from __future__ import absolute_import
30 from __future__ import division
31 from __future__ import print_function
32 # no unicode literals
33
34 '''Module to deal with filename encoding on the local system, as returned by
35 Watchman.'''
36
37 import sys
38
39 from . import (
40 compat,
41 )
42
43 if compat.PYTHON3:
44 default_local_errors = 'surrogateescape'
45
46 def get_local_encoding():
47 if sys.platform == 'win32':
48 # Watchman always returns UTF-8 encoded strings on Windows.
49 return 'utf-8'
50 # On the Python 3 versions we support, sys.getfilesystemencoding never
51 # returns None.
52 return sys.getfilesystemencoding()
53 else:
54 # Python 2 doesn't support surrogateescape, so use 'strict' by
55 # default. Users can register a custom surrogateescape error handler and use
56 # that if they so desire.
57 default_local_errors = 'strict'
58
59 def get_local_encoding():
60 if sys.platform == 'win32':
61 # Watchman always returns UTF-8 encoded strings on Windows.
62 return 'utf-8'
63 fsencoding = sys.getfilesystemencoding()
64 if fsencoding is None:
65 # This is very unlikely to happen, but if it does, just use UTF-8
66 fsencoding = 'utf-8'
67 return fsencoding
68
69 def encode_local(s):
70 return s.encode(get_local_encoding(), default_local_errors)
71
72 def decode_local(bs):
73 return bs.decode(get_local_encoding(), default_local_errors)
@@ -0,0 +1,107 b''
1 # Copyright 2016 Facebook, Inc.
2 # All rights reserved.
3 #
4 # Redistribution and use in source and binary forms, with or without
5 # modification, are permitted provided that the following conditions are met:
6 #
7 # * Redistributions of source code must retain the above copyright notice,
8 # this list of conditions and the following disclaimer.
9 #
10 # * Redistributions in binary form must reproduce the above copyright notice,
11 # this list of conditions and the following disclaimer in the documentation
12 # and/or other materials provided with the distribution.
13 #
14 # * Neither the name Facebook nor the names of its contributors may be used to
15 # endorse or promote products derived from this software without specific
16 # prior written permission.
17 #
18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29 from __future__ import absolute_import
30 from __future__ import division
31 from __future__ import print_function
32 # no unicode literals
33
34 try:
35 from . import bser
36 except ImportError:
37 from . import pybser as bser
38
39 import ctypes
40
41 EMPTY_HEADER = b"\x00\x01\x05\x00\x00\x00\x00"
42
43
44 def _read_bytes(fp, buf):
45 """Read bytes from a file-like object
46
47 @param fp: File-like object that implements read(int)
48 @type fp: file
49
50 @param buf: Buffer to read into
51 @type buf: bytes
52
53 @return: buf
54 """
55
56 # Do the first read without resizing the input buffer
57 offset = 0
58 remaining = len(buf)
59 while remaining > 0:
60 l = fp.readinto((ctypes.c_char * remaining).from_buffer(buf, offset))
61 if l is None or l == 0:
62 return offset
63 offset += l
64 remaining -= l
65 return offset
66
67
68 def load(fp, mutable=True, value_encoding=None, value_errors=None):
69 """Deserialize a BSER-encoded blob.
70
71 @param fp: The file-object to deserialize.
72 @type file:
73
74 @param mutable: Whether to return mutable results.
75 @type mutable: bool
76
77 @param value_encoding: Optional codec to use to decode values. If
78 unspecified or None, return values as bytestrings.
79 @type value_encoding: str
80
81 @param value_errors: Optional error handler for codec. 'strict' by default.
82 The other most common argument is 'surrogateescape' on
83 Python 3. If value_encoding is None, this is ignored.
84 @type value_errors: str
85 """
86 buf = ctypes.create_string_buffer(8192)
87 SNIFF_BUFFER_SIZE = len(EMPTY_HEADER)
88 header = (ctypes.c_char * SNIFF_BUFFER_SIZE).from_buffer(buf)
89 read_len = _read_bytes(fp, header)
90 if read_len < len(header):
91 return None
92
93 total_len = bser.pdu_len(buf)
94 if total_len > len(buf):
95 ctypes.resize(buf, total_len)
96
97 body = (ctypes.c_char * (total_len - len(header))).from_buffer(
98 buf, len(header))
99 read_len = _read_bytes(fp, body)
100 if read_len < len(body):
101 raise RuntimeError('bser data ended early')
102
103 return bser.loads(
104 (ctypes.c_char * total_len).from_buffer(buf, 0),
105 mutable,
106 value_encoding,
107 value_errors)
@@ -26,9 +26,14 b''
26 26 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 27 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 28
29 from __future__ import absolute_import
30 from __future__ import division
31 from __future__ import print_function
32 # no unicode literals
33
34 import inspect
35 import math
29 36 import os
30 import errno
31 import math
32 37 import socket
33 38 import subprocess
34 39 import time
@@ -36,11 +41,20 b' import time'
36 41 # Sometimes it's really hard to get Python extensions to compile,
37 42 # so fall back to a pure Python implementation.
38 43 try:
39 import bser
44 from . import bser
45 # Demandimport causes modules to be loaded lazily. Force the load now
46 # so that we can fall back on pybser if bser doesn't exist
47 bser.pdu_info
40 48 except ImportError:
41 import pybser as bser
49 from . import pybser as bser
42 50
43 import capabilities
51 from . import (
52 capabilities,
53 compat,
54 encoding,
55 load,
56 )
57
44 58
45 59 if os.name == 'nt':
46 60 import ctypes
@@ -55,18 +69,29 b" if os.name == 'nt':"
55 69 FORMAT_MESSAGE_FROM_SYSTEM = 0x00001000
56 70 FORMAT_MESSAGE_ALLOCATE_BUFFER = 0x00000100
57 71 FORMAT_MESSAGE_IGNORE_INSERTS = 0x00000200
72 WAIT_FAILED = 0xFFFFFFFF
58 73 WAIT_TIMEOUT = 0x00000102
59 74 WAIT_OBJECT_0 = 0x00000000
60 ERROR_IO_PENDING = 997
75 WAIT_IO_COMPLETION = 0x000000C0
76 INFINITE = 0xFFFFFFFF
77
78 # Overlapped I/O operation is in progress. (997)
79 ERROR_IO_PENDING = 0x000003E5
80
81 # The pointer size follows the architecture
82 # We use WPARAM since this type is already conditionally defined
83 ULONG_PTR = ctypes.wintypes.WPARAM
61 84
62 85 class OVERLAPPED(ctypes.Structure):
63 86 _fields_ = [
64 ("Internal", wintypes.ULONG), ("InternalHigh", wintypes.ULONG),
87 ("Internal", ULONG_PTR), ("InternalHigh", ULONG_PTR),
65 88 ("Offset", wintypes.DWORD), ("OffsetHigh", wintypes.DWORD),
66 89 ("hEvent", wintypes.HANDLE)
67 90 ]
68 91
69 92 def __init__(self):
93 self.Internal = 0
94 self.InternalHigh = 0
70 95 self.Offset = 0
71 96 self.OffsetHigh = 0
72 97 self.hEvent = 0
@@ -97,6 +122,10 b" if os.name == 'nt':"
97 122 GetLastError.argtypes = []
98 123 GetLastError.restype = wintypes.DWORD
99 124
125 SetLastError = ctypes.windll.kernel32.SetLastError
126 SetLastError.argtypes = [wintypes.DWORD]
127 SetLastError.restype = None
128
100 129 FormatMessage = ctypes.windll.kernel32.FormatMessageA
101 130 FormatMessage.argtypes = [wintypes.DWORD, wintypes.LPVOID, wintypes.DWORD,
102 131 wintypes.DWORD, ctypes.POINTER(wintypes.LPSTR),
@@ -105,12 +134,30 b" if os.name == 'nt':"
105 134
106 135 LocalFree = ctypes.windll.kernel32.LocalFree
107 136
108 GetOverlappedResultEx = ctypes.windll.kernel32.GetOverlappedResultEx
137 GetOverlappedResult = ctypes.windll.kernel32.GetOverlappedResult
138 GetOverlappedResult.argtypes = [wintypes.HANDLE,
139 ctypes.POINTER(OVERLAPPED), LPDWORD,
140 wintypes.BOOL]
141 GetOverlappedResult.restype = wintypes.BOOL
142
143 GetOverlappedResultEx = getattr(ctypes.windll.kernel32,
144 'GetOverlappedResultEx', None)
145 if GetOverlappedResultEx is not None:
109 146 GetOverlappedResultEx.argtypes = [wintypes.HANDLE,
110 147 ctypes.POINTER(OVERLAPPED), LPDWORD,
111 148 wintypes.DWORD, wintypes.BOOL]
112 149 GetOverlappedResultEx.restype = wintypes.BOOL
113 150
151 WaitForSingleObjectEx = ctypes.windll.kernel32.WaitForSingleObjectEx
152 WaitForSingleObjectEx.argtypes = [wintypes.HANDLE, wintypes.DWORD, wintypes.BOOL]
153 WaitForSingleObjectEx.restype = wintypes.DWORD
154
155 CreateEvent = ctypes.windll.kernel32.CreateEventA
156 CreateEvent.argtypes = [LPDWORD, wintypes.BOOL, wintypes.BOOL,
157 wintypes.LPSTR]
158 CreateEvent.restype = wintypes.HANDLE
159
160 # Windows Vista is the minimum supported client for CancelIoEx.
114 161 CancelIoEx = ctypes.windll.kernel32.CancelIoEx
115 162 CancelIoEx.argtypes = [wintypes.HANDLE, ctypes.POINTER(OVERLAPPED)]
116 163 CancelIoEx.restype = wintypes.BOOL
@@ -132,8 +179,47 b' else:'
132 179 pass
133 180
134 181
182 def _win32_strerror(err):
183 """ expand a win32 error code into a human readable message """
184
185 # FormatMessage will allocate memory and assign it here
186 buf = ctypes.c_char_p()
187 FormatMessage(
188 FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_ALLOCATE_BUFFER
189 | FORMAT_MESSAGE_IGNORE_INSERTS, None, err, 0, buf, 0, None)
190 try:
191 return buf.value
192 finally:
193 LocalFree(buf)
194
195
135 196 class WatchmanError(Exception):
136 pass
197 def __init__(self, msg=None, cmd=None):
198 self.msg = msg
199 self.cmd = cmd
200
201 def setCommand(self, cmd):
202 self.cmd = cmd
203
204 def __str__(self):
205 if self.cmd:
206 return '%s, while executing %s' % (self.msg, self.cmd)
207 return self.msg
208
209
210 class WatchmanEnvironmentError(WatchmanError):
211 def __init__(self, msg, errno, errmsg, cmd=None):
212 super(WatchmanEnvironmentError, self).__init__(
213 '{0}: errno={1} errmsg={2}'.format(msg, errno, errmsg),
214 cmd)
215
216
217 class SocketConnectError(WatchmanError):
218 def __init__(self, sockpath, exc):
219 super(SocketConnectError, self).__init__(
220 'unable to connect to %s: %s' % (sockpath, exc))
221 self.sockpath = sockpath
222 self.exc = exc
137 223
138 224
139 225 class SocketTimeout(WatchmanError):
@@ -151,19 +237,11 b' class CommandError(WatchmanError):'
151 237
152 238 self.msg is the message returned by watchman.
153 239 """
154
155 240 def __init__(self, msg, cmd=None):
156 self.msg = msg
157 self.cmd = cmd
158 super(CommandError, self).__init__('watchman command error: %s' % msg)
159
160 def setCommand(self, cmd):
161 self.cmd = cmd
162
163 def __str__(self):
164 if self.cmd:
165 return '%s, while executing %s' % (self.msg, self.cmd)
166 return self.msg
241 super(CommandError, self).__init__(
242 'watchman command error: %s' % (msg, ),
243 cmd,
244 )
167 245
168 246
169 247 class Transport(object):
@@ -195,16 +273,16 b' class Transport(object):'
195 273
196 274 # Buffer may already have a line if we've received unilateral
197 275 # response(s) from the server
198 if len(self.buf) == 1 and "\n" in self.buf[0]:
199 (line, b) = self.buf[0].split("\n", 1)
276 if len(self.buf) == 1 and b"\n" in self.buf[0]:
277 (line, b) = self.buf[0].split(b"\n", 1)
200 278 self.buf = [b]
201 279 return line
202 280
203 281 while True:
204 282 b = self.readBytes(4096)
205 if "\n" in b:
206 result = ''.join(self.buf)
207 (line, b) = b.split("\n", 1)
283 if b"\n" in b:
284 result = b''.join(self.buf)
285 (line, b) = b.split(b"\n", 1)
208 286 self.buf = [b]
209 287 return result + line
210 288 self.buf.append(b)
@@ -241,8 +319,8 b' class UnixSocketTransport(Transport):'
241 319 sock.connect(self.sockpath)
242 320 self.sock = sock
243 321 except socket.error as e:
244 raise WatchmanError('unable to connect to %s: %s' %
245 (self.sockpath, e))
322 sock.close()
323 raise SocketConnectError(self.sockpath, e)
246 324
247 325 def close(self):
248 326 self.sock.close()
@@ -268,6 +346,46 b' class UnixSocketTransport(Transport):'
268 346 raise SocketTimeout('timed out sending query command')
269 347
270 348
349 def _get_overlapped_result_ex_impl(pipe, olap, nbytes, millis, alertable):
350 """ Windows 7 and earlier does not support GetOverlappedResultEx. The
351 alternative is to use GetOverlappedResult and wait for read or write
352 operation to complete. This is done be using CreateEvent and
353 WaitForSingleObjectEx. CreateEvent, WaitForSingleObjectEx
354 and GetOverlappedResult are all part of Windows API since WindowsXP.
355 This is the exact same implementation that can be found in the watchman
356 source code (see get_overlapped_result_ex_impl in stream_win.c). This
357 way, maintenance should be simplified.
358 """
359 log('Preparing to wait for maximum %dms', millis )
360 if millis != 0:
361 waitReturnCode = WaitForSingleObjectEx(olap.hEvent, millis, alertable)
362 if waitReturnCode == WAIT_OBJECT_0:
363 # Event is signaled, overlapped IO operation result should be available.
364 pass
365 elif waitReturnCode == WAIT_IO_COMPLETION:
366 # WaitForSingleObjectEx returnes because the system added an I/O completion
367 # routine or an asynchronous procedure call (APC) to the thread queue.
368 SetLastError(WAIT_IO_COMPLETION)
369 pass
370 elif waitReturnCode == WAIT_TIMEOUT:
371 # We reached the maximum allowed wait time, the IO operation failed
372 # to complete in timely fashion.
373 SetLastError(WAIT_TIMEOUT)
374 return False
375 elif waitReturnCode == WAIT_FAILED:
376 # something went wrong calling WaitForSingleObjectEx
377 err = GetLastError()
378 log('WaitForSingleObjectEx failed: %s', _win32_strerror(err))
379 return False
380 else:
381 # unexpected situation deserving investigation.
382 err = GetLastError()
383 log('Unexpected error: %s', _win32_strerror(err))
384 return False
385
386 return GetOverlappedResult(pipe, olap, nbytes, False)
387
388
271 389 class WindowsNamedPipeTransport(Transport):
272 390 """ connect to a named pipe """
273 391
@@ -284,28 +402,35 b' class WindowsNamedPipeTransport(Transpor'
284 402 self._raise_win_err('failed to open pipe %s' % sockpath,
285 403 GetLastError())
286 404
287 def _win32_strerror(self, err):
288 """ expand a win32 error code into a human readable message """
405 # event for the overlapped I/O operations
406 self._waitable = CreateEvent(None, True, False, None)
407 if self._waitable is None:
408 self._raise_win_err('CreateEvent failed', GetLastError())
289 409
290 # FormatMessage will allocate memory and assign it here
291 buf = ctypes.c_char_p()
292 FormatMessage(
293 FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_ALLOCATE_BUFFER
294 | FORMAT_MESSAGE_IGNORE_INSERTS, None, err, 0, buf, 0, None)
295 try:
296 return buf.value
297 finally:
298 LocalFree(buf)
410 self._get_overlapped_result_ex = GetOverlappedResultEx
411 if (os.getenv('WATCHMAN_WIN7_COMPAT') == '1' or
412 self._get_overlapped_result_ex is None):
413 self._get_overlapped_result_ex = _get_overlapped_result_ex_impl
299 414
300 415 def _raise_win_err(self, msg, err):
301 416 raise IOError('%s win32 error code: %d %s' %
302 (msg, err, self._win32_strerror(err)))
417 (msg, err, _win32_strerror(err)))
303 418
304 419 def close(self):
305 420 if self.pipe:
421 log('Closing pipe')
306 422 CloseHandle(self.pipe)
307 423 self.pipe = None
308 424
425 if self._waitable is not None:
426 # We release the handle for the event
427 CloseHandle(self._waitable)
428 self._waitable = None
429
430 def setTimeout(self, value):
431 # convert to milliseconds
432 self.timeout = int(value * 1000)
433
309 434 def readBytes(self, size):
310 435 """ A read can block for an unbounded amount of time, even if the
311 436 kernel reports that the pipe handle is signalled, so we need to
@@ -325,6 +450,7 b' class WindowsNamedPipeTransport(Transpor'
325 450 # We need to initiate a read
326 451 buf = ctypes.create_string_buffer(size)
327 452 olap = OVERLAPPED()
453 olap.hEvent = self._waitable
328 454
329 455 log('made read buff of size %d', size)
330 456
@@ -339,8 +465,9 b' class WindowsNamedPipeTransport(Transpor'
339 465 GetLastError())
340 466
341 467 nread = wintypes.DWORD()
342 if not GetOverlappedResultEx(self.pipe, olap, nread,
343 0 if immediate else self.timeout, True):
468 if not self._get_overlapped_result_ex(self.pipe, olap, nread,
469 0 if immediate else self.timeout,
470 True):
344 471 err = GetLastError()
345 472 CancelIoEx(self.pipe, olap)
346 473
@@ -374,6 +501,8 b' class WindowsNamedPipeTransport(Transpor'
374 501
375 502 def write(self, data):
376 503 olap = OVERLAPPED()
504 olap.hEvent = self._waitable
505
377 506 immediate = WriteFile(self.pipe, ctypes.c_char_p(data), len(data),
378 507 None, olap)
379 508
@@ -385,8 +514,10 b' class WindowsNamedPipeTransport(Transpor'
385 514
386 515 # Obtain results, waiting if needed
387 516 nwrote = wintypes.DWORD()
388 if GetOverlappedResultEx(self.pipe, olap, nwrote, 0 if immediate else
389 self.timeout, True):
517 if self._get_overlapped_result_ex(self.pipe, olap, nwrote,
518 0 if immediate else self.timeout,
519 True):
520 log('made write of %d bytes', nwrote.value)
390 521 return nwrote.value
391 522
392 523 err = GetLastError()
@@ -430,7 +561,10 b' class CLIProcessTransport(Transport):'
430 561
431 562 def close(self):
432 563 if self.proc:
564 if self.proc.pid is not None:
433 565 self.proc.kill()
566 self.proc.stdin.close()
567 self.proc.stdout.close()
434 568 self.proc = None
435 569
436 570 def _connect(self):
@@ -438,7 +572,7 b' class CLIProcessTransport(Transport):'
438 572 return self.proc
439 573 args = [
440 574 'watchman',
441 '--sockname={}'.format(self.sockpath),
575 '--sockname={0}'.format(self.sockpath),
442 576 '--logfile=/BOGUS',
443 577 '--statefile=/BOGUS',
444 578 '--no-spawn',
@@ -460,8 +594,8 b' class CLIProcessTransport(Transport):'
460 594
461 595 def write(self, data):
462 596 if self.closed:
597 self.close()
463 598 self.closed = False
464 self.proc = None
465 599 self._connect()
466 600 res = self.proc.stdin.write(data)
467 601 self.proc.stdin.close()
@@ -473,21 +607,21 b' class BserCodec(Codec):'
473 607 """ use the BSER encoding. This is the default, preferred codec """
474 608
475 609 def _loads(self, response):
476 return bser.loads(response)
610 return bser.loads(response) # Defaults to BSER v1
477 611
478 612 def receive(self):
479 613 buf = [self.transport.readBytes(sniff_len)]
480 614 if not buf[0]:
481 615 raise WatchmanError('empty watchman response')
482 616
483 elen = bser.pdu_len(buf[0])
617 _1, _2, elen = bser.pdu_info(buf[0])
484 618
485 619 rlen = len(buf[0])
486 620 while elen > rlen:
487 621 buf.append(self.transport.readBytes(elen - rlen))
488 622 rlen += len(buf[-1])
489 623
490 response = ''.join(buf)
624 response = b''.join(buf)
491 625 try:
492 626 res = self._loads(response)
493 627 return res
@@ -495,7 +629,7 b' class BserCodec(Codec):'
495 629 raise WatchmanError('watchman response decode error: %s' % e)
496 630
497 631 def send(self, *args):
498 cmd = bser.dumps(*args)
632 cmd = bser.dumps(*args) # Defaults to BSER v1
499 633 self.transport.write(cmd)
500 634
501 635
@@ -504,7 +638,64 b' class ImmutableBserCodec(BserCodec):'
504 638 immutable object support """
505 639
506 640 def _loads(self, response):
507 return bser.loads(response, False)
641 return bser.loads(response, False) # Defaults to BSER v1
642
643
644 class Bser2WithFallbackCodec(BserCodec):
645 """ use BSER v2 encoding """
646
647 def __init__(self, transport):
648 super(Bser2WithFallbackCodec, self).__init__(transport)
649 # Once the server advertises support for bser-v2 we should switch this
650 # to 'required' on Python 3.
651 self.send(["version", {"optional": ["bser-v2"]}])
652
653 capabilities = self.receive()
654
655 if 'error' in capabilities:
656 raise Exception('Unsupported BSER version')
657
658 if capabilities['capabilities']['bser-v2']:
659 self.bser_version = 2
660 self.bser_capabilities = 0
661 else:
662 self.bser_version = 1
663 self.bser_capabilities = 0
664
665 def _loads(self, response):
666 return bser.loads(response)
667
668 def receive(self):
669 buf = [self.transport.readBytes(sniff_len)]
670 if not buf[0]:
671 raise WatchmanError('empty watchman response')
672
673 recv_bser_version, recv_bser_capabilities, elen = bser.pdu_info(buf[0])
674
675 if hasattr(self, 'bser_version'):
676 # Readjust BSER version and capabilities if necessary
677 self.bser_version = max(self.bser_version, recv_bser_version)
678 self.capabilities = self.bser_capabilities & recv_bser_capabilities
679
680 rlen = len(buf[0])
681 while elen > rlen:
682 buf.append(self.transport.readBytes(elen - rlen))
683 rlen += len(buf[-1])
684
685 response = b''.join(buf)
686 try:
687 res = self._loads(response)
688 return res
689 except ValueError as e:
690 raise WatchmanError('watchman response decode error: %s' % e)
691
692 def send(self, *args):
693 if hasattr(self, 'bser_version'):
694 cmd = bser.dumps(*args, version=self.bser_version,
695 capabilities=self.bser_capabilities)
696 else:
697 cmd = bser.dumps(*args)
698 self.transport.write(cmd)
508 699
509 700
510 701 class JsonCodec(Codec):
@@ -520,6 +711,13 b' class JsonCodec(Codec):'
520 711 def receive(self):
521 712 line = self.transport.readLine()
522 713 try:
714 # In Python 3, json.loads is a transformation from Unicode string to
715 # objects possibly containing Unicode strings. We typically expect
716 # the JSON blob to be ASCII-only with non-ASCII characters escaped,
717 # but it's possible we might get non-ASCII bytes that are valid
718 # UTF-8.
719 if compat.PYTHON3:
720 line = line.decode('utf-8')
523 721 return self.json.loads(line)
524 722 except Exception as e:
525 723 print(e, line)
@@ -527,7 +725,12 b' class JsonCodec(Codec):'
527 725
528 726 def send(self, *args):
529 727 cmd = self.json.dumps(*args)
530 self.transport.write(cmd + "\n")
728 # In Python 3, json.dumps is a transformation from objects possibly
729 # containing Unicode strings to Unicode string. Even with (the default)
730 # ensure_ascii=True, dumps returns a Unicode string.
731 if compat.PYTHON3:
732 cmd = cmd.encode('ascii')
733 self.transport.write(cmd + b"\n")
531 734
532 735
533 736 class client(object):
@@ -556,6 +759,9 b' class client(object):'
556 759 self.timeout = timeout
557 760 self.useImmutableBser = useImmutableBser
558 761
762 if inspect.isclass(transport) and issubclass(transport, Transport):
763 self.transport = transport
764 else:
559 765 transport = transport or os.getenv('WATCHMAN_TRANSPORT') or 'local'
560 766 if transport == 'local' and os.name == 'nt':
561 767 self.transport = WindowsNamedPipeTransport
@@ -570,8 +776,10 b' class client(object):'
570 776 else:
571 777 raise WatchmanError('invalid transport %s' % transport)
572 778
573 sendEncoding = sendEncoding or os.getenv('WATCHMAN_ENCODING') or 'bser'
574 recvEncoding = recvEncoding or os.getenv('WATCHMAN_ENCODING') or 'bser'
779 sendEncoding = str(sendEncoding or os.getenv('WATCHMAN_ENCODING') or
780 'bser')
781 recvEncoding = str(recvEncoding or os.getenv('WATCHMAN_ENCODING') or
782 'bser')
575 783
576 784 self.recvCodec = self._parseEncoding(recvEncoding)
577 785 self.sendCodec = self._parseEncoding(sendEncoding)
@@ -581,6 +789,8 b' class client(object):'
581 789 if self.useImmutableBser:
582 790 return ImmutableBserCodec
583 791 return BserCodec
792 elif enc == 'experimental-bser-v2':
793 return Bser2WithFallbackCodec
584 794 elif enc == 'json':
585 795 return JsonCodec
586 796 else:
@@ -600,10 +810,20 b' class client(object):'
600 810
601 811 cmd = ['watchman', '--output-encoding=bser', 'get-sockname']
602 812 try:
603 p = subprocess.Popen(cmd,
604 stdout=subprocess.PIPE,
813 args = dict(stdout=subprocess.PIPE,
605 814 stderr=subprocess.PIPE,
606 815 close_fds=os.name != 'nt')
816
817 if os.name == 'nt':
818 # if invoked via an application with graphical user interface,
819 # this call will cause a brief command window pop-up.
820 # Using the flag STARTF_USESHOWWINDOW to avoid this behavior.
821 startupinfo = subprocess.STARTUPINFO()
822 startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
823 args['startupinfo'] = startupinfo
824
825 p = subprocess.Popen(cmd, **args)
826
607 827 except OSError as e:
608 828 raise WatchmanError('"watchman" executable not in PATH (%s)', e)
609 829
@@ -614,10 +834,10 b' class client(object):'
614 834 raise WatchmanError("watchman exited with code %d" % exitcode)
615 835
616 836 result = bser.loads(stdout)
617 if 'error' in result:
837 if b'error' in result:
618 838 raise WatchmanError('get-sockname error: %s' % result['error'])
619 839
620 return result['sockname']
840 return result[b'sockname']
621 841
622 842 def _connect(self):
623 843 """ establish transport connection """
@@ -660,10 +880,16 b' class client(object):'
660 880 self._connect()
661 881 result = self.recvConn.receive()
662 882 if self._hasprop(result, 'error'):
663 raise CommandError(result['error'])
883 error = result['error']
884 if compat.PYTHON3 and isinstance(self.recvConn, BserCodec):
885 error = result['error'].decode('utf-8', 'surrogateescape')
886 raise CommandError(error)
664 887
665 888 if self._hasprop(result, 'log'):
666 self.logs.append(result['log'])
889 log = result['log']
890 if compat.PYTHON3 and isinstance(self.recvConn, BserCodec):
891 log = log.decode('utf-8', 'surrogateescape')
892 self.logs.append(log)
667 893
668 894 if self._hasprop(result, 'subscription'):
669 895 sub = result['subscription']
@@ -682,6 +908,9 b' class client(object):'
682 908 return result
683 909
684 910 def isUnilateralResponse(self, res):
911 if 'unilateral' in res and res['unilateral']:
912 return True
913 # Fall back to checking for known unilateral responses
685 914 for k in self.unilateral:
686 915 if k in res:
687 916 return True
@@ -712,6 +941,13 b' class client(object):'
712 941 remove processing impacts both the unscoped and scoped stores
713 942 for the subscription data.
714 943 """
944 if compat.PYTHON3 and issubclass(self.recvCodec, BserCodec):
945 # People may pass in Unicode strings here -- but currently BSER only
946 # returns bytestrings. Deal with that.
947 if isinstance(root, str):
948 root = encoding.encode_local(root)
949 if isinstance(name, str):
950 name = name.encode('utf-8')
715 951
716 952 if root is not None:
717 953 if not root in self.sub_by_root:
@@ -752,9 +988,17 b' class client(object):'
752 988 res = self.receive()
753 989
754 990 return res
755 except CommandError as ex:
991 except EnvironmentError as ee:
992 # When we can depend on Python 3, we can use PEP 3134
993 # exception chaining here.
994 raise WatchmanEnvironmentError(
995 'I/O error communicating with watchman daemon',
996 ee.errno,
997 ee.strerror,
998 args)
999 except WatchmanError as ex:
756 1000 ex.setCommand(args)
757 raise ex
1001 raise
758 1002
759 1003 def capabilityCheck(self, optional=None, required=None):
760 1004 """ Perform a server capability check """
@@ -775,5 +1019,3 b' class client(object):'
775 1019 def setTimeout(self, value):
776 1020 self.recvConn.setTimeout(value)
777 1021 self.sendConn.setTimeout(value)
778
779 # no-check-code -- this is a 3rd party library
This diff has been collapsed as it changes many lines, (524 lines changed) Show them Hide them
@@ -29,11 +29,27 b' OF THIS SOFTWARE, EVEN IF ADVISED OF THE'
29 29 */
30 30
31 31 #include <Python.h>
32 #include <bytesobject.h>
32 33 #ifdef _MSC_VER
33 34 #define inline __inline
34 #include "msc_stdint.h"
35 #if _MSC_VER >= 1800
36 #include <stdint.h>
37 #else
38 // The compiler associated with Python 2.7 on Windows doesn't ship
39 // with stdint.h, so define the small subset that we use here.
40 typedef __int8 int8_t;
41 typedef __int16 int16_t;
42 typedef __int32 int32_t;
43 typedef __int64 int64_t;
44 typedef unsigned __int8 uint8_t;
45 typedef unsigned __int16 uint16_t;
46 typedef unsigned __int32 uint32_t;
47 typedef unsigned __int64 uint64_t;
48 #define UINT32_MAX 4294967295U
49 #endif
35 50 #endif
36 51
52 // clang-format off
37 53 /* Return the smallest size int that can store the value */
38 54 #define INT_SIZE(x) (((x) == ((int8_t)x)) ? 1 : \
39 55 ((x) == ((int16_t)x)) ? 2 : \
@@ -41,7 +57,7 b' OF THIS SOFTWARE, EVEN IF ADVISED OF THE'
41 57
42 58 #define BSER_ARRAY 0x00
43 59 #define BSER_OBJECT 0x01
44 #define BSER_STRING 0x02
60 #define BSER_BYTESTRING 0x02
45 61 #define BSER_INT8 0x03
46 62 #define BSER_INT16 0x04
47 63 #define BSER_INT32 0x05
@@ -52,6 +68,8 b' OF THIS SOFTWARE, EVEN IF ADVISED OF THE'
52 68 #define BSER_NULL 0x0a
53 69 #define BSER_TEMPLATE 0x0b
54 70 #define BSER_SKIP 0x0c
71 #define BSER_UTF8STRING 0x0d
72 // clang-format on
55 73
56 74 // An immutable object representation of BSER_OBJECT.
57 75 // Rather than build a hash table, key -> value are obtained
@@ -64,11 +82,13 b' OF THIS SOFTWARE, EVEN IF ADVISED OF THE'
64 82 // approach, this is still faster for the mercurial use case
65 83 // as it helps to eliminate creating N other objects to
66 84 // represent the stat information in the hgwatchman extension
85 // clang-format off
67 86 typedef struct {
68 87 PyObject_HEAD
69 88 PyObject *keys; // tuple of field names
70 89 PyObject *values; // tuple of values
71 90 } bserObject;
91 // clang-format on
72 92
73 93 static Py_ssize_t bserobj_tuple_length(PyObject *o) {
74 94 bserObject *obj = (bserObject*)o;
@@ -82,6 +102,7 b' static PyObject *bserobj_tuple_item(PyOb'
82 102 return PySequence_GetItem(obj->values, i);
83 103 }
84 104
105 // clang-format off
85 106 static PySequenceMethods bserobj_sq = {
86 107 bserobj_tuple_length, /* sq_length */
87 108 0, /* sq_concat */
@@ -92,6 +113,7 b' static PySequenceMethods bserobj_sq = {'
92 113 0, /* sq_inplace_concat */
93 114 0 /* sq_inplace_repeat */
94 115 };
116 // clang-format on
95 117
96 118 static void bserobj_dealloc(PyObject *o) {
97 119 bserObject *obj = (bserObject*)o;
@@ -104,18 +126,35 b' static void bserobj_dealloc(PyObject *o)'
104 126 static PyObject *bserobj_getattrro(PyObject *o, PyObject *name) {
105 127 bserObject *obj = (bserObject*)o;
106 128 Py_ssize_t i, n;
129 PyObject* name_bytes = NULL;
130 PyObject* ret = NULL;
107 131 const char *namestr;
108 132
109 133 if (PyIndex_Check(name)) {
110 134 i = PyNumber_AsSsize_t(name, PyExc_IndexError);
111 135 if (i == -1 && PyErr_Occurred()) {
112 return NULL;
136 goto bail;
113 137 }
114 return PySequence_GetItem(obj->values, i);
138 ret = PySequence_GetItem(obj->values, i);
139 goto bail;
115 140 }
116 141
142 // We can be passed in Unicode objects here -- we don't support anything other
143 // than UTF-8 for keys.
144 if (PyUnicode_Check(name)) {
145 name_bytes = PyUnicode_AsUTF8String(name);
146 if (name_bytes == NULL) {
147 goto bail;
148 }
149 namestr = PyBytes_AsString(name_bytes);
150 } else {
151 namestr = PyBytes_AsString(name);
152 }
153
154 if (namestr == NULL) {
155 goto bail;
156 }
117 157 // hack^Wfeature to allow mercurial to use "st_size" to reference "size"
118 namestr = PyString_AsString(name);
119 158 if (!strncmp(namestr, "st_", 3)) {
120 159 namestr += 3;
121 160 }
@@ -125,16 +164,21 b' static PyObject *bserobj_getattrro(PyObj'
125 164 const char *item_name = NULL;
126 165 PyObject *key = PyTuple_GET_ITEM(obj->keys, i);
127 166
128 item_name = PyString_AsString(key);
167 item_name = PyBytes_AsString(key);
129 168 if (!strcmp(item_name, namestr)) {
130 return PySequence_GetItem(obj->values, i);
169 ret = PySequence_GetItem(obj->values, i);
170 goto bail;
131 171 }
132 172 }
133 PyErr_Format(PyExc_AttributeError,
134 "bserobject has no attribute '%.400s'", namestr);
135 return NULL;
173
174 PyErr_Format(
175 PyExc_AttributeError, "bserobject has no attribute '%.400s'", namestr);
176 bail:
177 Py_XDECREF(name_bytes);
178 return ret;
136 179 }
137 180
181 // clang-format off
138 182 static PyMappingMethods bserobj_map = {
139 183 bserobj_tuple_length, /* mp_length */
140 184 bserobj_getattrro, /* mp_subscript */
@@ -181,20 +225,27 b' PyTypeObject bserObjectType = {'
181 225 0, /* tp_alloc */
182 226 0, /* tp_new */
183 227 };
184
228 // clang-format on
185 229
186 static PyObject *bser_loads_recursive(const char **ptr, const char *end,
187 int mutable);
230 typedef struct loads_ctx {
231 int mutable;
232 const char* value_encoding;
233 const char* value_errors;
234 uint32_t bser_version;
235 uint32_t bser_capabilities;
236 } unser_ctx_t;
237
238 static PyObject*
239 bser_loads_recursive(const char** ptr, const char* end, const unser_ctx_t* ctx);
188 240
189 241 static const char bser_true = BSER_TRUE;
190 242 static const char bser_false = BSER_FALSE;
191 243 static const char bser_null = BSER_NULL;
192 static const char bser_string_hdr = BSER_STRING;
244 static const char bser_bytestring_hdr = BSER_BYTESTRING;
193 245 static const char bser_array_hdr = BSER_ARRAY;
194 246 static const char bser_object_hdr = BSER_OBJECT;
195 247
196 static inline uint32_t next_power_2(uint32_t n)
197 {
248 static inline uint32_t next_power_2(uint32_t n) {
198 249 n |= (n >> 16);
199 250 n |= (n >> 8);
200 251 n |= (n >> 4);
@@ -207,11 +258,12 b' static inline uint32_t next_power_2(uint'
207 258 struct bser_buffer {
208 259 char *buf;
209 260 int wpos, allocd;
261 uint32_t bser_version;
262 uint32_t capabilities;
210 263 };
211 264 typedef struct bser_buffer bser_t;
212 265
213 static int bser_append(bser_t *bser, const char *data, uint32_t len)
214 {
266 static int bser_append(bser_t* bser, const char* data, uint32_t len) {
215 267 int newlen = next_power_2(bser->wpos + len);
216 268 if (newlen > bser->allocd) {
217 269 char *nbuf = realloc(bser->buf, newlen);
@@ -228,12 +280,12 b' static int bser_append(bser_t *bser, con'
228 280 return 1;
229 281 }
230 282
231 static int bser_init(bser_t *bser)
232 {
283 static int bser_init(bser_t* bser, uint32_t version, uint32_t capabilities) {
233 284 bser->allocd = 8192;
234 285 bser->wpos = 0;
235 286 bser->buf = malloc(bser->allocd);
236
287 bser->bser_version = version;
288 bser->capabilities = capabilities;
237 289 if (!bser->buf) {
238 290 return 0;
239 291 }
@@ -242,19 +294,25 b' static int bser_init(bser_t *bser)'
242 294 // our overall length. To make things simpler, we'll use an
243 295 // int32 for the header
244 296 #define EMPTY_HEADER "\x00\x01\x05\x00\x00\x00\x00"
297
298 // Version 2 also carries an integer indicating the capabilities. The
299 // capabilities integer comes before the PDU size.
300 #define EMPTY_HEADER_V2 "\x00\x02\x00\x00\x00\x00\x05\x00\x00\x00\x00"
301 if (version == 2) {
302 bser_append(bser, EMPTY_HEADER_V2, sizeof(EMPTY_HEADER_V2) - 1);
303 } else {
245 304 bser_append(bser, EMPTY_HEADER, sizeof(EMPTY_HEADER)-1);
305 }
246 306
247 307 return 1;
248 308 }
249 309
250 static void bser_dtor(bser_t *bser)
251 {
310 static void bser_dtor(bser_t* bser) {
252 311 free(bser->buf);
253 312 bser->buf = NULL;
254 313 }
255 314
256 static int bser_long(bser_t *bser, int64_t val)
257 {
315 static int bser_long(bser_t* bser, int64_t val) {
258 316 int8_t i8;
259 317 int16_t i16;
260 318 int32_t i32;
@@ -285,8 +343,7 b' static int bser_long(bser_t *bser, int64'
285 343 iptr = (char*)&i64;
286 344 break;
287 345 default:
288 PyErr_SetString(PyExc_RuntimeError,
289 "Cannot represent this long value!?");
346 PyErr_SetString(PyExc_RuntimeError, "Cannot represent this long value!?");
290 347 return 0;
291 348 }
292 349
@@ -297,8 +354,7 b' static int bser_long(bser_t *bser, int64'
297 354 return bser_append(bser, iptr, size);
298 355 }
299 356
300 static int bser_string(bser_t *bser, PyObject *sval)
301 {
357 static int bser_bytestring(bser_t* bser, PyObject* sval) {
302 358 char *buf = NULL;
303 359 Py_ssize_t len;
304 360 int res;
@@ -309,13 +365,13 b' static int bser_string(bser_t *bser, PyO'
309 365 sval = utf;
310 366 }
311 367
312 res = PyString_AsStringAndSize(sval, &buf, &len);
368 res = PyBytes_AsStringAndSize(sval, &buf, &len);
313 369 if (res == -1) {
314 370 res = 0;
315 371 goto out;
316 372 }
317 373
318 if (!bser_append(bser, &bser_string_hdr, sizeof(bser_string_hdr))) {
374 if (!bser_append(bser, &bser_bytestring_hdr, sizeof(bser_bytestring_hdr))) {
319 375 res = 0;
320 376 goto out;
321 377 }
@@ -341,8 +397,7 b' out:'
341 397 return res;
342 398 }
343 399
344 static int bser_recursive(bser_t *bser, PyObject *val)
345 {
400 static int bser_recursive(bser_t* bser, PyObject* val) {
346 401 if (PyBool_Check(val)) {
347 402 if (val == Py_True) {
348 403 return bser_append(bser, &bser_true, sizeof(bser_true));
@@ -354,19 +409,21 b' static int bser_recursive(bser_t *bser, '
354 409 return bser_append(bser, &bser_null, sizeof(bser_null));
355 410 }
356 411
412 // Python 3 has one integer type.
413 #if PY_MAJOR_VERSION < 3
357 414 if (PyInt_Check(val)) {
358 415 return bser_long(bser, PyInt_AS_LONG(val));
359 416 }
417 #endif // PY_MAJOR_VERSION < 3
360 418
361 419 if (PyLong_Check(val)) {
362 420 return bser_long(bser, PyLong_AsLongLong(val));
363 421 }
364 422
365 if (PyString_Check(val) || PyUnicode_Check(val)) {
366 return bser_string(bser, val);
423 if (PyBytes_Check(val) || PyUnicode_Check(val)) {
424 return bser_bytestring(bser, val);
367 425 }
368 426
369
370 427 if (PyFloat_Check(val)) {
371 428 double dval = PyFloat_AS_DOUBLE(val);
372 429 char sz = BSER_REAL;
@@ -436,7 +493,7 b' static int bser_recursive(bser_t *bser, '
436 493 }
437 494
438 495 while (PyDict_Next(val, &pos, &key, &ele)) {
439 if (!bser_string(bser, key)) {
496 if (!bser_bytestring(bser, key)) {
440 497 return 0;
441 498 }
442 499 if (!bser_recursive(bser, ele)) {
@@ -451,17 +508,25 b' static int bser_recursive(bser_t *bser, '
451 508 return 0;
452 509 }
453 510
454 static PyObject *bser_dumps(PyObject *self, PyObject *args)
455 {
511 static PyObject* bser_dumps(PyObject* self, PyObject* args, PyObject* kw) {
456 512 PyObject *val = NULL, *res;
457 513 bser_t bser;
458 uint32_t len;
514 uint32_t len, bser_version = 1, bser_capabilities = 0;
515
516 static char* kw_list[] = {"val", "version", "capabilities", NULL};
459 517
460 if (!PyArg_ParseTuple(args, "O", &val)) {
518 if (!PyArg_ParseTupleAndKeywords(
519 args,
520 kw,
521 "O|ii:dumps",
522 kw_list,
523 &val,
524 &bser_version,
525 &bser_capabilities)) {
461 526 return NULL;
462 527 }
463 528
464 if (!bser_init(&bser)) {
529 if (!bser_init(&bser, bser_version, bser_capabilities)) {
465 530 return PyErr_NoMemory();
466 531 }
467 532
@@ -475,17 +540,23 b' static PyObject *bser_dumps(PyObject *se'
475 540 }
476 541
477 542 // Now fill in the overall length
543 if (bser_version == 1) {
478 544 len = bser.wpos - (sizeof(EMPTY_HEADER) - 1);
479 545 memcpy(bser.buf + 3, &len, sizeof(len));
546 } else {
547 len = bser.wpos - (sizeof(EMPTY_HEADER_V2) - 1);
548 // The BSER capabilities block comes before the PDU length
549 memcpy(bser.buf + 2, &bser_capabilities, sizeof(bser_capabilities));
550 memcpy(bser.buf + 7, &len, sizeof(len));
551 }
480 552
481 res = PyString_FromStringAndSize(bser.buf, bser.wpos);
553 res = PyBytes_FromStringAndSize(bser.buf, bser.wpos);
482 554 bser_dtor(&bser);
483 555
484 556 return res;
485 557 }
486 558
487 int bunser_int(const char **ptr, const char *end, int64_t *val)
488 {
559 int bunser_int(const char** ptr, const char* end, int64_t* val) {
489 560 int needed;
490 561 const char *buf = *ptr;
491 562 int8_t i8;
@@ -507,8 +578,8 b' int bunser_int(const char **ptr, const c'
507 578 needed = 9;
508 579 break;
509 580 default:
510 PyErr_Format(PyExc_ValueError,
511 "invalid bser int encoding 0x%02x", buf[0]);
581 PyErr_Format(
582 PyExc_ValueError, "invalid bser int encoding 0x%02x", buf[0]);
512 583 return 0;
513 584 }
514 585 if (end - buf < needed) {
@@ -538,9 +609,11 b' int bunser_int(const char **ptr, const c'
538 609 }
539 610 }
540 611
541 static int bunser_string(const char **ptr, const char *end,
542 const char **start, int64_t *len)
543 {
612 static int bunser_bytestring(
613 const char** ptr,
614 const char* end,
615 const char** start,
616 int64_t* len) {
544 617 const char *buf = *ptr;
545 618
546 619 // skip string marker
@@ -559,10 +632,11 b' static int bunser_string(const char **pt'
559 632 return 1;
560 633 }
561 634
562 static PyObject *bunser_array(const char **ptr, const char *end, int mutable)
563 {
635 static PyObject*
636 bunser_array(const char** ptr, const char* end, const unser_ctx_t* ctx) {
564 637 const char *buf = *ptr;
565 638 int64_t nitems, i;
639 int mutable = ctx->mutable;
566 640 PyObject *res;
567 641
568 642 // skip array header
@@ -584,7 +658,7 b' static PyObject *bunser_array(const char'
584 658 }
585 659
586 660 for (i = 0; i < nitems; i++) {
587 PyObject *ele = bser_loads_recursive(ptr, end, mutable);
661 PyObject* ele = bser_loads_recursive(ptr, end, ctx);
588 662
589 663 if (!ele) {
590 664 Py_DECREF(res);
@@ -602,11 +676,11 b' static PyObject *bunser_array(const char'
602 676 return res;
603 677 }
604 678
605 static PyObject *bunser_object(const char **ptr, const char *end,
606 int mutable)
607 {
679 static PyObject*
680 bunser_object(const char** ptr, const char* end, const unser_ctx_t* ctx) {
608 681 const char *buf = *ptr;
609 682 int64_t nitems, i;
683 int mutable = ctx->mutable;
610 684 PyObject *res;
611 685 bserObject *obj;
612 686
@@ -632,7 +706,7 b' static PyObject *bunser_object(const cha'
632 706 PyObject *key;
633 707 PyObject *ele;
634 708
635 if (!bunser_string(ptr, end, &keystr, &keylen)) {
709 if (!bunser_bytestring(ptr, end, &keystr, &keylen)) {
636 710 Py_DECREF(res);
637 711 return NULL;
638 712 }
@@ -643,13 +717,24 b' static PyObject *bunser_object(const cha'
643 717 return NULL;
644 718 }
645 719
646 key = PyString_FromStringAndSize(keystr, (Py_ssize_t)keylen);
720 if (mutable) {
721 // This will interpret the key as UTF-8.
722 key = PyUnicode_FromStringAndSize(keystr, (Py_ssize_t)keylen);
723 } else {
724 // For immutable objects we'll manage key lookups, so we can avoid going
725 // through the Unicode APIs. This avoids a potentially expensive and
726 // definitely unnecessary conversion to UTF-16 and back for Python 2.
727 // TODO: On Python 3 the Unicode APIs are smarter: we might be able to use
728 // Unicode keys there without an appreciable performance loss.
729 key = PyBytes_FromStringAndSize(keystr, (Py_ssize_t)keylen);
730 }
731
647 732 if (!key) {
648 733 Py_DECREF(res);
649 734 return NULL;
650 735 }
651 736
652 ele = bser_loads_recursive(ptr, end, mutable);
737 ele = bser_loads_recursive(ptr, end, ctx);
653 738
654 739 if (!ele) {
655 740 Py_DECREF(key);
@@ -671,14 +756,24 b' static PyObject *bunser_object(const cha'
671 756 return res;
672 757 }
673 758
674 static PyObject *bunser_template(const char **ptr, const char *end,
675 int mutable)
676 {
759 static PyObject*
760 bunser_template(const char** ptr, const char* end, const unser_ctx_t* ctx) {
677 761 const char *buf = *ptr;
678 762 int64_t nitems, i;
763 int mutable = ctx->mutable;
679 764 PyObject *arrval;
680 765 PyObject *keys;
681 766 Py_ssize_t numkeys, keyidx;
767 unser_ctx_t keys_ctx = {0};
768 if (mutable) {
769 keys_ctx.mutable = 1;
770 // Decode keys as UTF-8 in this case.
771 keys_ctx.value_encoding = "utf-8";
772 keys_ctx.value_errors = "strict";
773 } else {
774 // Treat keys as bytestrings in this case -- we'll do Unicode conversions at
775 // lookup time.
776 }
682 777
683 778 if (buf[1] != BSER_ARRAY) {
684 779 PyErr_Format(PyExc_ValueError, "Expect ARRAY to follow TEMPLATE");
@@ -689,8 +784,9 b' static PyObject *bunser_template(const c'
689 784 buf++;
690 785 *ptr = buf;
691 786
692 // Load template keys
693 keys = bunser_array(ptr, end, mutable);
787 // Load template keys.
788 // For keys we don't want to do any decoding right now.
789 keys = bunser_array(ptr, end, &keys_ctx);
694 790 if (!keys) {
695 791 return NULL;
696 792 }
@@ -746,7 +842,7 b' fail:'
746 842 ele = Py_None;
747 843 Py_INCREF(ele);
748 844 } else {
749 ele = bser_loads_recursive(ptr, end, mutable);
845 ele = bser_loads_recursive(ptr, end, ctx);
750 846 }
751 847
752 848 if (!ele) {
@@ -772,29 +868,33 b' fail:'
772 868 return arrval;
773 869 }
774 870
775 static PyObject *bser_loads_recursive(const char **ptr, const char *end,
776 int mutable)
777 {
871 static PyObject* bser_loads_recursive(
872 const char** ptr,
873 const char* end,
874 const unser_ctx_t* ctx) {
778 875 const char *buf = *ptr;
779 876
780 877 switch (buf[0]) {
781 878 case BSER_INT8:
782 879 case BSER_INT16:
783 880 case BSER_INT32:
784 case BSER_INT64:
785 {
881 case BSER_INT64: {
786 882 int64_t ival;
787 883 if (!bunser_int(ptr, end, &ival)) {
788 884 return NULL;
789 885 }
886 // Python 3 has one integer type.
887 #if PY_MAJOR_VERSION >= 3
888 return PyLong_FromLongLong(ival);
889 #else
790 890 if (ival < LONG_MIN || ival > LONG_MAX) {
791 891 return PyLong_FromLongLong(ival);
792 892 }
793 893 return PyInt_FromSsize_t(Py_SAFE_DOWNCAST(ival, int64_t, Py_ssize_t));
894 #endif // PY_MAJOR_VERSION >= 3
794 895 }
795 896
796 case BSER_REAL:
797 {
897 case BSER_REAL: {
798 898 double dval;
799 899 memcpy(&dval, buf + 1, sizeof(dval));
800 900 *ptr = buf + 1 + sizeof(double);
@@ -816,12 +916,11 b' static PyObject *bser_loads_recursive(co'
816 916 Py_INCREF(Py_None);
817 917 return Py_None;
818 918
819 case BSER_STRING:
820 {
919 case BSER_BYTESTRING: {
821 920 const char *start;
822 921 int64_t len;
823 922
824 if (!bunser_string(ptr, end, &start, &len)) {
923 if (!bunser_bytestring(ptr, end, &start, &len)) {
825 924 return NULL;
826 925 }
827 926
@@ -830,17 +929,38 b' static PyObject *bser_loads_recursive(co'
830 929 return NULL;
831 930 }
832 931
833 return PyString_FromStringAndSize(start, (long)len);
932 if (ctx->value_encoding != NULL) {
933 return PyUnicode_Decode(
934 start, (long)len, ctx->value_encoding, ctx->value_errors);
935 } else {
936 return PyBytes_FromStringAndSize(start, (long)len);
937 }
938 }
939
940 case BSER_UTF8STRING: {
941 const char* start;
942 int64_t len;
943
944 if (!bunser_bytestring(ptr, end, &start, &len)) {
945 return NULL;
946 }
947
948 if (len > LONG_MAX) {
949 PyErr_Format(PyExc_ValueError, "string too long for python");
950 return NULL;
951 }
952
953 return PyUnicode_Decode(start, (long)len, "utf-8", "strict");
834 954 }
835 955
836 956 case BSER_ARRAY:
837 return bunser_array(ptr, end, mutable);
957 return bunser_array(ptr, end, ctx);
838 958
839 959 case BSER_OBJECT:
840 return bunser_object(ptr, end, mutable);
960 return bunser_object(ptr, end, ctx);
841 961
842 962 case BSER_TEMPLATE:
843 return bunser_template(ptr, end, mutable);
963 return bunser_template(ptr, end, ctx);
844 964
845 965 default:
846 966 PyErr_Format(PyExc_ValueError, "unhandled bser opcode 0x%02x", buf[0]);
@@ -849,102 +969,244 b' static PyObject *bser_loads_recursive(co'
849 969 return NULL;
850 970 }
851 971
852 // Expected use case is to read a packet from the socket and
853 // then call bser.pdu_len on the packet. It returns the total
854 // length of the entire response that the peer is sending,
855 // including the bytes already received. This allows the client
856 // to compute the data size it needs to read before it can
857 // decode the data
858 static PyObject *bser_pdu_len(PyObject *self, PyObject *args)
859 {
860 const char *start = NULL;
861 const char *data = NULL;
862 int datalen = 0;
863 const char *end;
864 int64_t expected_len, total_len;
972 static int _pdu_info_helper(
973 const char* data,
974 const char* end,
975 uint32_t* bser_version_out,
976 uint32_t* bser_capabilities_out,
977 int64_t* expected_len_out,
978 off_t* position_out) {
979 uint32_t bser_version;
980 uint32_t bser_capabilities = 0;
981 int64_t expected_len;
865 982
866 if (!PyArg_ParseTuple(args, "s#", &start, &datalen)) {
867 return NULL;
868 }
869 data = start;
870 end = data + datalen;
871
983 const char* start;
984 start = data;
872 985 // Validate the header and length
873 if (memcmp(data, EMPTY_HEADER, 2) != 0) {
986 if (memcmp(data, EMPTY_HEADER, 2) == 0) {
987 bser_version = 1;
988 } else if (memcmp(data, EMPTY_HEADER_V2, 2) == 0) {
989 bser_version = 2;
990 } else {
874 991 PyErr_SetString(PyExc_ValueError, "invalid bser header");
875 return NULL;
992 return 0;
876 993 }
877 994
878 995 data += 2;
879 996
997 if (bser_version == 2) {
998 // Expect an integer telling us what capabilities are supported by the
999 // remote server (currently unused).
1000 if (!memcpy(&bser_capabilities, &data, sizeof(bser_capabilities))) {
1001 return 0;
1002 }
1003 data += sizeof(bser_capabilities);
1004 }
1005
880 1006 // Expect an integer telling us how big the rest of the data
881 1007 // should be
882 1008 if (!bunser_int(&data, end, &expected_len)) {
883 return NULL;
1009 return 0;
1010 }
1011
1012 *bser_version_out = bser_version;
1013 *bser_capabilities_out = (uint32_t)bser_capabilities;
1014 *expected_len_out = expected_len;
1015 *position_out = (off_t)(data - start);
1016 return 1;
884 1017 }
885 1018
886 total_len = expected_len + (data - start);
887 if (total_len > LONG_MAX) {
888 return PyLong_FromLongLong(total_len);
889 }
890 return PyInt_FromLong((long)total_len);
891 }
892
893 static PyObject *bser_loads(PyObject *self, PyObject *args)
894 {
1019 // This function parses the PDU header and provides info about the packet
1020 // Returns false if unsuccessful
1021 static int pdu_info_helper(
1022 PyObject* self,
1023 PyObject* args,
1024 uint32_t* bser_version_out,
1025 uint32_t* bser_capabilities_out,
1026 int64_t* total_len_out) {
1027 const char* start = NULL;
895 1028 const char *data = NULL;
896 1029 int datalen = 0;
897 1030 const char *end;
898 1031 int64_t expected_len;
899 int mutable = 1;
900 PyObject *mutable_obj = NULL;
1032 off_t position;
1033
1034 if (!PyArg_ParseTuple(args, "s#", &start, &datalen)) {
1035 return 0;
1036 }
1037 data = start;
1038 end = data + datalen;
901 1039
902 if (!PyArg_ParseTuple(args, "s#|O:loads", &data, &datalen, &mutable_obj)) {
1040 if (!_pdu_info_helper(
1041 data,
1042 end,
1043 bser_version_out,
1044 bser_capabilities_out,
1045 &expected_len,
1046 &position)) {
1047 return 0;
1048 }
1049 *total_len_out = (int64_t)(expected_len + position);
1050 return 1;
1051 }
1052
1053 // Expected use case is to read a packet from the socket and then call
1054 // bser.pdu_info on the packet. It returns the BSER version, BSER capabilities,
1055 // and the total length of the entire response that the peer is sending,
1056 // including the bytes already received. This allows the client to compute the
1057 // data size it needs to read before it can decode the data.
1058 static PyObject* bser_pdu_info(PyObject* self, PyObject* args) {
1059 uint32_t version, capabilities;
1060 int64_t total_len;
1061 if (!pdu_info_helper(self, args, &version, &capabilities, &total_len)) {
903 1062 return NULL;
904 1063 }
905 if (mutable_obj) {
906 mutable = PyObject_IsTrue(mutable_obj) > 0 ? 1 : 0;
1064 return Py_BuildValue("kkL", version, capabilities, total_len);
1065 }
1066
1067 static PyObject* bser_pdu_len(PyObject* self, PyObject* args) {
1068 uint32_t version, capabilities;
1069 int64_t total_len;
1070 if (!pdu_info_helper(self, args, &version, &capabilities, &total_len)) {
1071 return NULL;
1072 }
1073 return Py_BuildValue("L", total_len);
907 1074 }
908 1075
909 end = data + datalen;
1076 static PyObject* bser_loads(PyObject* self, PyObject* args, PyObject* kw) {
1077 const char* data = NULL;
1078 int datalen = 0;
1079 const char* start;
1080 const char* end;
1081 int64_t expected_len;
1082 off_t position;
1083 PyObject* mutable_obj = NULL;
1084 const char* value_encoding = NULL;
1085 const char* value_errors = NULL;
1086 unser_ctx_t ctx = {1, 0};
910 1087
911 // Validate the header and length
912 if (memcmp(data, EMPTY_HEADER, 2) != 0) {
913 PyErr_SetString(PyExc_ValueError, "invalid bser header");
1088 static char* kw_list[] = {
1089 "buf", "mutable", "value_encoding", "value_errors", NULL};
1090
1091 if (!PyArg_ParseTupleAndKeywords(
1092 args,
1093 kw,
1094 "s#|Ozz:loads",
1095 kw_list,
1096 &start,
1097 &datalen,
1098 &mutable_obj,
1099 &value_encoding,
1100 &value_errors)) {
914 1101 return NULL;
915 1102 }
916 1103
917 data += 2;
1104 if (mutable_obj) {
1105 ctx.mutable = PyObject_IsTrue(mutable_obj) > 0 ? 1 : 0;
1106 }
1107 ctx.value_encoding = value_encoding;
1108 if (value_encoding == NULL) {
1109 ctx.value_errors = NULL;
1110 } else if (value_errors == NULL) {
1111 ctx.value_errors = "strict";
1112 } else {
1113 ctx.value_errors = value_errors;
1114 }
1115 data = start;
1116 end = data + datalen;
918 1117
919 // Expect an integer telling us how big the rest of the data
920 // should be
921 if (!bunser_int(&data, end, &expected_len)) {
1118 if (!_pdu_info_helper(
1119 data,
1120 end,
1121 &ctx.bser_version,
1122 &ctx.bser_capabilities,
1123 &expected_len,
1124 &position)) {
922 1125 return NULL;
923 1126 }
924 1127
1128 data = start + position;
925 1129 // Verify
926 1130 if (expected_len + data != end) {
927 1131 PyErr_SetString(PyExc_ValueError, "bser data len != header len");
928 1132 return NULL;
929 1133 }
930 1134
931 return bser_loads_recursive(&data, end, mutable);
1135 return bser_loads_recursive(&data, end, &ctx);
932 1136 }
933 1137
1138 static PyObject* bser_load(PyObject* self, PyObject* args, PyObject* kw) {
1139 PyObject *load, *string;
1140 PyObject* fp = NULL;
1141 PyObject* mutable_obj = NULL;
1142 const char* value_encoding = NULL;
1143 const char* value_errors = NULL;
1144
1145 static char* kw_list[] = {
1146 "fp", "mutable", "value_encoding", "value_errors", NULL};
1147
1148 if (!PyArg_ParseTupleAndKeywords(
1149 args,
1150 kw,
1151 "OOzz:load",
1152 kw_list,
1153 &fp,
1154 &mutable_obj,
1155 &value_encoding,
1156 &value_errors)) {
1157 return NULL;
1158 }
1159
1160 load = PyImport_ImportModule("pywatchman.load");
1161 if (load == NULL) {
1162 return NULL;
1163 }
1164 string = PyObject_CallMethod(
1165 load, "load", "OOzz", fp, mutable_obj, value_encoding, value_errors);
1166 Py_DECREF(load);
1167 return string;
1168 }
1169
1170 // clang-format off
934 1171 static PyMethodDef bser_methods[] = {
935 {"loads", bser_loads, METH_VARARGS, "Deserialize string."},
936 {"pdu_len", bser_pdu_len, METH_VARARGS, "Extract PDU length."},
937 {"dumps", bser_dumps, METH_VARARGS, "Serialize string."},
1172 {"loads", (PyCFunction)bser_loads, METH_VARARGS | METH_KEYWORDS,
1173 "Deserialize string."},
1174 {"load", (PyCFunction)bser_load, METH_VARARGS | METH_KEYWORDS,
1175 "Deserialize a file object"},
1176 {"pdu_info", (PyCFunction)bser_pdu_info, METH_VARARGS,
1177 "Extract PDU information."},
1178 {"pdu_len", (PyCFunction)bser_pdu_len, METH_VARARGS,
1179 "Extract total PDU length."},
1180 {"dumps", (PyCFunction)bser_dumps, METH_VARARGS | METH_KEYWORDS,
1181 "Serialize string."},
938 1182 {NULL, NULL, 0, NULL}
939 1183 };
940 1184
941 PyMODINIT_FUNC initbser(void)
942 {
1185 #if PY_MAJOR_VERSION >= 3
1186 static struct PyModuleDef bser_module = {
1187 PyModuleDef_HEAD_INIT,
1188 "bser",
1189 "Efficient encoding and decoding of BSER.",
1190 -1,
1191 bser_methods
1192 };
1193 // clang-format on
1194
1195 PyMODINIT_FUNC PyInit_bser(void) {
1196 PyObject* mod;
1197
1198 mod = PyModule_Create(&bser_module);
1199 PyType_Ready(&bserObjectType);
1200
1201 return mod;
1202 }
1203 #else
1204
1205 PyMODINIT_FUNC initbser(void) {
943 1206 (void)Py_InitModule("bser", bser_methods);
944 1207 PyType_Ready(&bserObjectType);
945 1208 }
1209 #endif // PY_MAJOR_VERSION >= 3
946 1210
947 1211 /* vim:ts=2:sw=2:et:
948 1212 */
949
950 // no-check-code -- this is a 3rd party library
@@ -26,6 +26,11 b''
26 26 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 27 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 28
29 from __future__ import absolute_import
30 from __future__ import division
31 from __future__ import print_function
32 # no unicode literals
33
29 34 import re
30 35
31 36 def parse_version(vstr):
@@ -65,5 +70,3 b' def synthesize(vers, opts):'
65 70 vers['error'] = 'client required capability `' + name + \
66 71 '` is not supported by this server'
67 72 return vers
68
69 # no-check-code -- this is a 3rd party library
@@ -26,33 +26,51 b''
26 26 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 27 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 28
29 from __future__ import absolute_import
30 from __future__ import division
31 from __future__ import print_function
32 # no unicode literals
33
34 import binascii
29 35 import collections
30 36 import ctypes
31 37 import struct
32 38 import sys
33 39
34 BSER_ARRAY = '\x00'
35 BSER_OBJECT = '\x01'
36 BSER_STRING = '\x02'
37 BSER_INT8 = '\x03'
38 BSER_INT16 = '\x04'
39 BSER_INT32 = '\x05'
40 BSER_INT64 = '\x06'
41 BSER_REAL = '\x07'
42 BSER_TRUE = '\x08'
43 BSER_FALSE = '\x09'
44 BSER_NULL = '\x0a'
45 BSER_TEMPLATE = '\x0b'
46 BSER_SKIP = '\x0c'
40 from . import (
41 compat,
42 )
43
44 BSER_ARRAY = b'\x00'
45 BSER_OBJECT = b'\x01'
46 BSER_BYTESTRING = b'\x02'
47 BSER_INT8 = b'\x03'
48 BSER_INT16 = b'\x04'
49 BSER_INT32 = b'\x05'
50 BSER_INT64 = b'\x06'
51 BSER_REAL = b'\x07'
52 BSER_TRUE = b'\x08'
53 BSER_FALSE = b'\x09'
54 BSER_NULL = b'\x0a'
55 BSER_TEMPLATE = b'\x0b'
56 BSER_SKIP = b'\x0c'
57 BSER_UTF8STRING = b'\x0d'
58
59 if compat.PYTHON3:
60 STRING_TYPES = (str, bytes)
61 unicode = str
62 def tobytes(i):
63 return str(i).encode('ascii')
64 long = int
65 else:
66 STRING_TYPES = (unicode, str)
67 tobytes = bytes
47 68
48 69 # Leave room for the serialization header, which includes
49 70 # our overall length. To make things simpler, we'll use an
50 71 # int32 for the header
51 EMPTY_HEADER = "\x00\x01\x05\x00\x00\x00\x00"
52
53 # Python 3 conditional for supporting Python 2's int/long types
54 if sys.version_info > (3,):
55 long = int
72 EMPTY_HEADER = b"\x00\x01\x05\x00\x00\x00\x00"
73 EMPTY_HEADER_V2 = b"\x00\x02\x00\x00\x00\x00\x05\x00\x00\x00\x00"
56 74
57 75 def _int_size(x):
58 76 """Return the smallest size int that can store the value"""
@@ -67,13 +85,28 b' def _int_size(x):'
67 85 else:
68 86 raise RuntimeError('Cannot represent value: ' + str(x))
69 87
88 def _buf_pos(buf, pos):
89 ret = buf[pos]
90 # In Python 2, buf is a str array so buf[pos] is a string. In Python 3, buf
91 # is a bytes array and buf[pos] is an integer.
92 if compat.PYTHON3:
93 ret = bytes((ret,))
94 return ret
70 95
71 96 class _bser_buffer(object):
72 97
73 def __init__(self):
98 def __init__(self, version):
99 self.bser_version = version
74 100 self.buf = ctypes.create_string_buffer(8192)
75 struct.pack_into(str(len(EMPTY_HEADER)) + 's', self.buf, 0, EMPTY_HEADER)
101 if self.bser_version == 1:
102 struct.pack_into(tobytes(len(EMPTY_HEADER)) + b's', self.buf, 0,
103 EMPTY_HEADER)
76 104 self.wpos = len(EMPTY_HEADER)
105 else:
106 assert self.bser_version == 2
107 struct.pack_into(tobytes(len(EMPTY_HEADER_V2)) + b's', self.buf, 0,
108 EMPTY_HEADER_V2)
109 self.wpos = len(EMPTY_HEADER_V2)
77 110
78 111 def ensure_size(self, size):
79 112 while ctypes.sizeof(self.buf) - self.wpos < size:
@@ -84,13 +117,13 b' class _bser_buffer(object):'
84 117 to_write = size + 1
85 118 self.ensure_size(to_write)
86 119 if size == 1:
87 struct.pack_into('=cb', self.buf, self.wpos, BSER_INT8, val)
120 struct.pack_into(b'=cb', self.buf, self.wpos, BSER_INT8, val)
88 121 elif size == 2:
89 struct.pack_into('=ch', self.buf, self.wpos, BSER_INT16, val)
122 struct.pack_into(b'=ch', self.buf, self.wpos, BSER_INT16, val)
90 123 elif size == 4:
91 struct.pack_into('=ci', self.buf, self.wpos, BSER_INT32, val)
124 struct.pack_into(b'=ci', self.buf, self.wpos, BSER_INT32, val)
92 125 elif size == 8:
93 struct.pack_into('=cq', self.buf, self.wpos, BSER_INT64, val)
126 struct.pack_into(b'=cq', self.buf, self.wpos, BSER_INT64, val)
94 127 else:
95 128 raise RuntimeError('Cannot represent this long value')
96 129 self.wpos += to_write
@@ -104,13 +137,17 b' class _bser_buffer(object):'
104 137 to_write = 2 + size + s_len
105 138 self.ensure_size(to_write)
106 139 if size == 1:
107 struct.pack_into('=ccb' + str(s_len) + 's', self.buf, self.wpos, BSER_STRING, BSER_INT8, s_len, s)
140 struct.pack_into(b'=ccb' + tobytes(s_len) + b's', self.buf,
141 self.wpos, BSER_BYTESTRING, BSER_INT8, s_len, s)
108 142 elif size == 2:
109 struct.pack_into('=cch' + str(s_len) + 's', self.buf, self.wpos, BSER_STRING, BSER_INT16, s_len, s)
143 struct.pack_into(b'=cch' + tobytes(s_len) + b's', self.buf,
144 self.wpos, BSER_BYTESTRING, BSER_INT16, s_len, s)
110 145 elif size == 4:
111 struct.pack_into('=cci' + str(s_len) + 's', self.buf, self.wpos, BSER_STRING, BSER_INT32, s_len, s)
146 struct.pack_into(b'=cci' + tobytes(s_len) + b's', self.buf,
147 self.wpos, BSER_BYTESTRING, BSER_INT32, s_len, s)
112 148 elif size == 8:
113 struct.pack_into('=ccq' + str(s_len) + 's', self.buf, self.wpos, BSER_STRING, BSER_INT64, s_len, s)
149 struct.pack_into(b'=ccq' + tobytes(s_len) + b's', self.buf,
150 self.wpos, BSER_BYTESTRING, BSER_INT64, s_len, s)
114 151 else:
115 152 raise RuntimeError('Cannot represent this string value')
116 153 self.wpos += to_write
@@ -124,54 +161,68 b' class _bser_buffer(object):'
124 161 to_encode = BSER_TRUE
125 162 else:
126 163 to_encode = BSER_FALSE
127 struct.pack_into('=c', self.buf, self.wpos, to_encode)
164 struct.pack_into(b'=c', self.buf, self.wpos, to_encode)
128 165 self.wpos += needed
129 166 elif val is None:
130 167 needed = 1
131 168 self.ensure_size(needed)
132 struct.pack_into('=c', self.buf, self.wpos, BSER_NULL)
169 struct.pack_into(b'=c', self.buf, self.wpos, BSER_NULL)
133 170 self.wpos += needed
134 171 elif isinstance(val, (int, long)):
135 172 self.append_long(val)
136 elif isinstance(val, (str, unicode)):
173 elif isinstance(val, STRING_TYPES):
137 174 self.append_string(val)
138 175 elif isinstance(val, float):
139 176 needed = 9
140 177 self.ensure_size(needed)
141 struct.pack_into('=cd', self.buf, self.wpos, BSER_REAL, val)
178 struct.pack_into(b'=cd', self.buf, self.wpos, BSER_REAL, val)
142 179 self.wpos += needed
143 elif isinstance(val, collections.Mapping) and isinstance(val, collections.Sized):
180 elif isinstance(val, collections.Mapping) and \
181 isinstance(val, collections.Sized):
144 182 val_len = len(val)
145 183 size = _int_size(val_len)
146 184 needed = 2 + size
147 185 self.ensure_size(needed)
148 186 if size == 1:
149 struct.pack_into('=ccb', self.buf, self.wpos, BSER_OBJECT, BSER_INT8, val_len)
187 struct.pack_into(b'=ccb', self.buf, self.wpos, BSER_OBJECT,
188 BSER_INT8, val_len)
150 189 elif size == 2:
151 struct.pack_into('=cch', self.buf, self.wpos, BSER_OBJECT, BSER_INT16, val_len)
190 struct.pack_into(b'=cch', self.buf, self.wpos, BSER_OBJECT,
191 BSER_INT16, val_len)
152 192 elif size == 4:
153 struct.pack_into('=cci', self.buf, self.wpos, BSER_OBJECT, BSER_INT32, val_len)
193 struct.pack_into(b'=cci', self.buf, self.wpos, BSER_OBJECT,
194 BSER_INT32, val_len)
154 195 elif size == 8:
155 struct.pack_into('=ccq', self.buf, self.wpos, BSER_OBJECT, BSER_INT64, val_len)
196 struct.pack_into(b'=ccq', self.buf, self.wpos, BSER_OBJECT,
197 BSER_INT64, val_len)
156 198 else:
157 199 raise RuntimeError('Cannot represent this mapping value')
158 200 self.wpos += needed
159 for k, v in val.iteritems():
201 if compat.PYTHON3:
202 iteritems = val.items()
203 else:
204 iteritems = val.iteritems()
205 for k, v in iteritems:
160 206 self.append_string(k)
161 207 self.append_recursive(v)
162 elif isinstance(val, collections.Iterable) and isinstance(val, collections.Sized):
208 elif isinstance(val, collections.Iterable) and \
209 isinstance(val, collections.Sized):
163 210 val_len = len(val)
164 211 size = _int_size(val_len)
165 212 needed = 2 + size
166 213 self.ensure_size(needed)
167 214 if size == 1:
168 struct.pack_into('=ccb', self.buf, self.wpos, BSER_ARRAY, BSER_INT8, val_len)
215 struct.pack_into(b'=ccb', self.buf, self.wpos, BSER_ARRAY,
216 BSER_INT8, val_len)
169 217 elif size == 2:
170 struct.pack_into('=cch', self.buf, self.wpos, BSER_ARRAY, BSER_INT16, val_len)
218 struct.pack_into(b'=cch', self.buf, self.wpos, BSER_ARRAY,
219 BSER_INT16, val_len)
171 220 elif size == 4:
172 struct.pack_into('=cci', self.buf, self.wpos, BSER_ARRAY, BSER_INT32, val_len)
221 struct.pack_into(b'=cci', self.buf, self.wpos, BSER_ARRAY,
222 BSER_INT32, val_len)
173 223 elif size == 8:
174 struct.pack_into('=ccq', self.buf, self.wpos, BSER_ARRAY, BSER_INT64, val_len)
224 struct.pack_into(b'=ccq', self.buf, self.wpos, BSER_ARRAY,
225 BSER_INT64, val_len)
175 226 else:
176 227 raise RuntimeError('Cannot represent this sequence value')
177 228 self.wpos += needed
@@ -181,56 +232,18 b' class _bser_buffer(object):'
181 232 raise RuntimeError('Cannot represent unknown value type')
182 233
183 234
184 def dumps(obj):
185 bser_buf = _bser_buffer()
235 def dumps(obj, version=1, capabilities=0):
236 bser_buf = _bser_buffer(version=version)
186 237 bser_buf.append_recursive(obj)
187 238 # Now fill in the overall length
239 if version == 1:
188 240 obj_len = bser_buf.wpos - len(EMPTY_HEADER)
189 struct.pack_into('=i', bser_buf.buf, 3, obj_len)
190 return bser_buf.buf.raw[:bser_buf.wpos]
191
192
193 def _bunser_int(buf, pos):
194 try:
195 int_type = buf[pos]
196 except IndexError:
197 raise ValueError('Invalid bser int encoding, pos out of range')
198 if int_type == BSER_INT8:
199 needed = 2
200 fmt = '=b'
201 elif int_type == BSER_INT16:
202 needed = 3
203 fmt = '=h'
204 elif int_type == BSER_INT32:
205 needed = 5
206 fmt = '=i'
207 elif int_type == BSER_INT64:
208 needed = 9
209 fmt = '=q'
241 struct.pack_into(b'=i', bser_buf.buf, 3, obj_len)
210 242 else:
211 raise ValueError('Invalid bser int encoding 0x%02x' % int(int_type))
212 int_val = struct.unpack_from(fmt, buf, pos + 1)[0]
213 return (int_val, pos + needed)
214
215
216 def _bunser_string(buf, pos):
217 str_len, pos = _bunser_int(buf, pos + 1)
218 str_val = struct.unpack_from(str(str_len) + 's', buf, pos)[0]
219 return (str_val, pos + str_len)
220
221
222 def _bunser_array(buf, pos, mutable=True):
223 arr_len, pos = _bunser_int(buf, pos + 1)
224 arr = []
225 for i in range(arr_len):
226 arr_item, pos = _bser_loads_recursive(buf, pos, mutable)
227 arr.append(arr_item)
228
229 if not mutable:
230 arr = tuple(arr)
231
232 return arr, pos
233
243 obj_len = bser_buf.wpos - len(EMPTY_HEADER_V2)
244 struct.pack_into(b'=i', bser_buf.buf, 2, capabilities)
245 struct.pack_into(b'=i', bser_buf.buf, 7, obj_len)
246 return bser_buf.buf.raw[:bser_buf.wpos]
234 247
235 248 # This is a quack-alike with the bserObjectType in bser.c
236 249 # It provides by getattr accessors and getitem for both index
@@ -260,68 +273,130 b' class _BunserDict(object):'
260 273 def __len__(self):
261 274 return len(self._keys)
262 275
263 def _bunser_object(buf, pos, mutable=True):
264 obj_len, pos = _bunser_int(buf, pos + 1)
265 if mutable:
276 class Bunser(object):
277 def __init__(self, mutable=True, value_encoding=None, value_errors=None):
278 self.mutable = mutable
279 self.value_encoding = value_encoding
280
281 if value_encoding is None:
282 self.value_errors = None
283 elif value_errors is None:
284 self.value_errors = 'strict'
285 else:
286 self.value_errors = value_errors
287
288 @staticmethod
289 def unser_int(buf, pos):
290 try:
291 int_type = _buf_pos(buf, pos)
292 except IndexError:
293 raise ValueError('Invalid bser int encoding, pos out of range')
294 if int_type == BSER_INT8:
295 needed = 2
296 fmt = b'=b'
297 elif int_type == BSER_INT16:
298 needed = 3
299 fmt = b'=h'
300 elif int_type == BSER_INT32:
301 needed = 5
302 fmt = b'=i'
303 elif int_type == BSER_INT64:
304 needed = 9
305 fmt = b'=q'
306 else:
307 raise ValueError('Invalid bser int encoding 0x%s' %
308 binascii.hexlify(int_type).decode('ascii'))
309 int_val = struct.unpack_from(fmt, buf, pos + 1)[0]
310 return (int_val, pos + needed)
311
312 def unser_utf8_string(self, buf, pos):
313 str_len, pos = self.unser_int(buf, pos + 1)
314 str_val = struct.unpack_from(tobytes(str_len) + b's', buf, pos)[0]
315 return (str_val.decode('utf-8'), pos + str_len)
316
317 def unser_bytestring(self, buf, pos):
318 str_len, pos = self.unser_int(buf, pos + 1)
319 str_val = struct.unpack_from(tobytes(str_len) + b's', buf, pos)[0]
320 if self.value_encoding is not None:
321 str_val = str_val.decode(self.value_encoding, self.value_errors)
322 # str_len stays the same because that's the length in bytes
323 return (str_val, pos + str_len)
324
325 def unser_array(self, buf, pos):
326 arr_len, pos = self.unser_int(buf, pos + 1)
327 arr = []
328 for i in range(arr_len):
329 arr_item, pos = self.loads_recursive(buf, pos)
330 arr.append(arr_item)
331
332 if not self.mutable:
333 arr = tuple(arr)
334
335 return arr, pos
336
337 def unser_object(self, buf, pos):
338 obj_len, pos = self.unser_int(buf, pos + 1)
339 if self.mutable:
266 340 obj = {}
267 341 else:
268 342 keys = []
269 343 vals = []
270 344
271 345 for i in range(obj_len):
272 key, pos = _bunser_string(buf, pos)
273 val, pos = _bser_loads_recursive(buf, pos, mutable)
274 if mutable:
346 key, pos = self.unser_utf8_string(buf, pos)
347 val, pos = self.loads_recursive(buf, pos)
348 if self.mutable:
275 349 obj[key] = val
276 350 else:
277 351 keys.append(key)
278 352 vals.append(val)
279 353
280 if not mutable:
354 if not self.mutable:
281 355 obj = _BunserDict(keys, vals)
282 356
283 357 return obj, pos
284 358
285
286 def _bunser_template(buf, pos, mutable=True):
287 if buf[pos + 1] != BSER_ARRAY:
359 def unser_template(self, buf, pos):
360 val_type = _buf_pos(buf, pos + 1)
361 if val_type != BSER_ARRAY:
288 362 raise RuntimeError('Expect ARRAY to follow TEMPLATE')
289 keys, pos = _bunser_array(buf, pos + 1)
290 nitems, pos = _bunser_int(buf, pos)
363 # force UTF-8 on keys
364 keys_bunser = Bunser(mutable=self.mutable, value_encoding='utf-8')
365 keys, pos = keys_bunser.unser_array(buf, pos + 1)
366 nitems, pos = self.unser_int(buf, pos)
291 367 arr = []
292 368 for i in range(nitems):
293 if mutable:
369 if self.mutable:
294 370 obj = {}
295 371 else:
296 372 vals = []
297 373
298 374 for keyidx in range(len(keys)):
299 if buf[pos] == BSER_SKIP:
375 if _buf_pos(buf, pos) == BSER_SKIP:
300 376 pos += 1
301 377 ele = None
302 378 else:
303 ele, pos = _bser_loads_recursive(buf, pos, mutable)
379 ele, pos = self.loads_recursive(buf, pos)
304 380
305 if mutable:
381 if self.mutable:
306 382 key = keys[keyidx]
307 383 obj[key] = ele
308 384 else:
309 385 vals.append(ele)
310 386
311 if not mutable:
387 if not self.mutable:
312 388 obj = _BunserDict(keys, vals)
313 389
314 390 arr.append(obj)
315 391 return arr, pos
316 392
317
318 def _bser_loads_recursive(buf, pos, mutable=True):
319 val_type = buf[pos]
393 def loads_recursive(self, buf, pos):
394 val_type = _buf_pos(buf, pos)
320 395 if (val_type == BSER_INT8 or val_type == BSER_INT16 or
321 396 val_type == BSER_INT32 or val_type == BSER_INT64):
322 return _bunser_int(buf, pos)
397 return self.unser_int(buf, pos)
323 398 elif val_type == BSER_REAL:
324 val = struct.unpack_from('=d', buf, pos + 1)[0]
399 val = struct.unpack_from(b'=d', buf, pos + 1)[0]
325 400 return (val, pos + 9)
326 401 elif val_type == BSER_TRUE:
327 402 return (True, pos + 1)
@@ -329,31 +404,81 b' def _bser_loads_recursive(buf, pos, muta'
329 404 return (False, pos + 1)
330 405 elif val_type == BSER_NULL:
331 406 return (None, pos + 1)
332 elif val_type == BSER_STRING:
333 return _bunser_string(buf, pos)
407 elif val_type == BSER_BYTESTRING:
408 return self.unser_bytestring(buf, pos)
409 elif val_type == BSER_UTF8STRING:
410 return self.unser_utf8_string(buf, pos)
334 411 elif val_type == BSER_ARRAY:
335 return _bunser_array(buf, pos, mutable)
412 return self.unser_array(buf, pos)
336 413 elif val_type == BSER_OBJECT:
337 return _bunser_object(buf, pos, mutable)
414 return self.unser_object(buf, pos)
338 415 elif val_type == BSER_TEMPLATE:
339 return _bunser_template(buf, pos, mutable)
416 return self.unser_template(buf, pos)
340 417 else:
341 raise RuntimeError('unhandled bser opcode 0x%02x' % (val_type,))
418 raise ValueError('unhandled bser opcode 0x%s' %
419 binascii.hexlify(val_type).decode('ascii'))
420
421
422 def _pdu_info_helper(buf):
423 bser_version = -1
424 if buf[0:2] == EMPTY_HEADER[0:2]:
425 bser_version = 1
426 bser_capabilities = 0
427 expected_len, pos2 = Bunser.unser_int(buf, 2)
428 elif buf[0:2] == EMPTY_HEADER_V2[0:2]:
429 if len(buf) < 8:
430 raise ValueError('Invalid BSER header')
431 bser_version = 2
432 bser_capabilities = struct.unpack_from("I", buf, 2)[0]
433 expected_len, pos2 = Bunser.unser_int(buf, 6)
434 else:
435 raise ValueError('Invalid BSER header')
436
437 return bser_version, bser_capabilities, expected_len, pos2
438
439
440 def pdu_info(buf):
441 info = _pdu_info_helper(buf)
442 return info[0], info[1], info[2] + info[3]
342 443
343 444
344 445 def pdu_len(buf):
345 if buf[0:2] != EMPTY_HEADER[0:2]:
346 raise RuntimeError('Invalid BSER header')
347 expected_len, pos = _bunser_int(buf, 2)
348 return expected_len + pos
446 info = _pdu_info_helper(buf)
447 return info[2] + info[3]
349 448
350 449
351 def loads(buf, mutable=True):
352 if buf[0:2] != EMPTY_HEADER[0:2]:
353 raise RuntimeError('Invalid BSER header')
354 expected_len, pos = _bunser_int(buf, 2)
450 def loads(buf, mutable=True, value_encoding=None, value_errors=None):
451 """Deserialize a BSER-encoded blob.
452
453 @param buf: The buffer to deserialize.
454 @type buf: bytes
455
456 @param mutable: Whether to return mutable results.
457 @type mutable: bool
458
459 @param value_encoding: Optional codec to use to decode values. If
460 unspecified or None, return values as bytestrings.
461 @type value_encoding: str
462
463 @param value_errors: Optional error handler for codec. 'strict' by default.
464 The other most common argument is 'surrogateescape' on
465 Python 3. If value_encoding is None, this is ignored.
466 @type value_errors: str
467 """
468
469 info = _pdu_info_helper(buf)
470 expected_len = info[2]
471 pos = info[3]
472
355 473 if len(buf) != expected_len + pos:
356 raise RuntimeError('bser data len != header len')
357 return _bser_loads_recursive(buf, pos, mutable)[0]
474 raise ValueError('bser data len != header len')
475
476 bunser = Bunser(mutable=mutable, value_encoding=value_encoding,
477 value_errors=value_errors)
358 478
359 # no-check-code -- this is a 3rd party library
479 return bunser.loads_recursive(buf, pos)[0]
480
481
482 def load(fp, mutable=True, value_encoding=None, value_errors=None):
483 from . import load
484 return load.load(fp, mutable, value_encoding, value_errors)
@@ -15,10 +15,6 b''
15 15 contrib/python-zstandard/tests/test_module_attributes.py not using absolute_import
16 16 contrib/python-zstandard/tests/test_roundtrip.py not using absolute_import
17 17 contrib/python-zstandard/tests/test_train_dictionary.py not using absolute_import
18 hgext/fsmonitor/pywatchman/__init__.py not using absolute_import
19 hgext/fsmonitor/pywatchman/__init__.py requires print_function
20 hgext/fsmonitor/pywatchman/capabilities.py not using absolute_import
21 hgext/fsmonitor/pywatchman/pybser.py not using absolute_import
22 18 i18n/check-translation.py not using absolute_import
23 19 setup.py not using absolute_import
24 20 tests/test-demandimport.py not using absolute_import
1 NO CONTENT: file was removed
General Comments 0
You need to be logged in to leave comments. Login now