# HG changeset patch
# User Zack Hricz <zphricz@fb.com>
# Date 2016-12-22 19:22:32
# Node ID 16f4b341288d6c562845335c784b2820ec77d44f
# Parent  f35397fe0c0494123ab527604cbd96821f1a89a2

fsmonitor: refresh pywatchman to upstream

Update to upstream to version c77452. The refresh includes fixes to improve
windows compatibility.

There is a minor update to 'test-check-py3-compat.t' as c77452 no longer have
the py3 compatibility issues the previous version had.

# no-check-commit

diff --git a/hgext/fsmonitor/pywatchman/__init__.py b/hgext/fsmonitor/pywatchman/__init__.py
--- a/hgext/fsmonitor/pywatchman/__init__.py
+++ b/hgext/fsmonitor/pywatchman/__init__.py
@@ -26,9 +26,14 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+# no unicode literals
+
+import inspect
+import math
 import os
-import errno
-import math
 import socket
 import subprocess
 import time
@@ -36,11 +41,20 @@ import time
 # Sometimes it's really hard to get Python extensions to compile,
 # so fall back to a pure Python implementation.
 try:
-    import bser
+    from . import bser
+    # Demandimport causes modules to be loaded lazily. Force the load now
+    # so that we can fall back on pybser if bser doesn't exist
+    bser.pdu_info
 except ImportError:
-    import pybser as bser
+    from . import pybser as bser
 
-import capabilities
+from . import (
+    capabilities,
+    compat,
+    encoding,
+    load,
+)
+
 
 if os.name == 'nt':
     import ctypes
@@ -55,18 +69,29 @@ if os.name == 'nt':
     FORMAT_MESSAGE_FROM_SYSTEM = 0x00001000
     FORMAT_MESSAGE_ALLOCATE_BUFFER = 0x00000100
     FORMAT_MESSAGE_IGNORE_INSERTS = 0x00000200
+    WAIT_FAILED = 0xFFFFFFFF
     WAIT_TIMEOUT = 0x00000102
     WAIT_OBJECT_0 = 0x00000000
-    ERROR_IO_PENDING = 997
+    WAIT_IO_COMPLETION = 0x000000C0
+    INFINITE = 0xFFFFFFFF
+
+    # Overlapped I/O operation is in progress. (997)
+    ERROR_IO_PENDING = 0x000003E5
+
+    # The pointer size follows the architecture
+    # We use WPARAM since this type is already conditionally defined
+    ULONG_PTR = ctypes.wintypes.WPARAM
 
     class OVERLAPPED(ctypes.Structure):
         _fields_ = [
-            ("Internal", wintypes.ULONG), ("InternalHigh", wintypes.ULONG),
+            ("Internal", ULONG_PTR), ("InternalHigh", ULONG_PTR),
             ("Offset", wintypes.DWORD), ("OffsetHigh", wintypes.DWORD),
             ("hEvent", wintypes.HANDLE)
         ]
 
         def __init__(self):
+            self.Internal = 0
+            self.InternalHigh = 0
             self.Offset = 0
             self.OffsetHigh = 0
             self.hEvent = 0
@@ -97,6 +122,10 @@ if os.name == 'nt':
     GetLastError.argtypes = []
     GetLastError.restype = wintypes.DWORD
 
+    SetLastError = ctypes.windll.kernel32.SetLastError
+    SetLastError.argtypes = [wintypes.DWORD]
+    SetLastError.restype = None
+
     FormatMessage = ctypes.windll.kernel32.FormatMessageA
     FormatMessage.argtypes = [wintypes.DWORD, wintypes.LPVOID, wintypes.DWORD,
                               wintypes.DWORD, ctypes.POINTER(wintypes.LPSTR),
@@ -105,12 +134,30 @@ if os.name == 'nt':
 
     LocalFree = ctypes.windll.kernel32.LocalFree
 
-    GetOverlappedResultEx = ctypes.windll.kernel32.GetOverlappedResultEx
-    GetOverlappedResultEx.argtypes = [wintypes.HANDLE,
-                                      ctypes.POINTER(OVERLAPPED), LPDWORD,
-                                      wintypes.DWORD, wintypes.BOOL]
-    GetOverlappedResultEx.restype = wintypes.BOOL
+    GetOverlappedResult = ctypes.windll.kernel32.GetOverlappedResult
+    GetOverlappedResult.argtypes = [wintypes.HANDLE,
+                                    ctypes.POINTER(OVERLAPPED), LPDWORD,
+                                    wintypes.BOOL]
+    GetOverlappedResult.restype = wintypes.BOOL
 
+    GetOverlappedResultEx = getattr(ctypes.windll.kernel32,
+                                    'GetOverlappedResultEx', None)
+    if GetOverlappedResultEx is not None:
+        GetOverlappedResultEx.argtypes = [wintypes.HANDLE,
+                                          ctypes.POINTER(OVERLAPPED), LPDWORD,
+                                          wintypes.DWORD, wintypes.BOOL]
+        GetOverlappedResultEx.restype = wintypes.BOOL
+
+    WaitForSingleObjectEx = ctypes.windll.kernel32.WaitForSingleObjectEx
+    WaitForSingleObjectEx.argtypes = [wintypes.HANDLE, wintypes.DWORD, wintypes.BOOL]
+    WaitForSingleObjectEx.restype = wintypes.DWORD
+
+    CreateEvent = ctypes.windll.kernel32.CreateEventA
+    CreateEvent.argtypes = [LPDWORD, wintypes.BOOL, wintypes.BOOL,
+                            wintypes.LPSTR]
+    CreateEvent.restype = wintypes.HANDLE
+
+    # Windows Vista is the minimum supported client for CancelIoEx.
     CancelIoEx = ctypes.windll.kernel32.CancelIoEx
     CancelIoEx.argtypes = [wintypes.HANDLE, ctypes.POINTER(OVERLAPPED)]
     CancelIoEx.restype = wintypes.BOOL
@@ -132,8 +179,47 @@ else:
         pass
 
 
+def _win32_strerror(err):
+    """ expand a win32 error code into a human readable message """
+
+    # FormatMessage will allocate memory and assign it here
+    buf = ctypes.c_char_p()
+    FormatMessage(
+        FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_ALLOCATE_BUFFER
+        | FORMAT_MESSAGE_IGNORE_INSERTS, None, err, 0, buf, 0, None)
+    try:
+        return buf.value
+    finally:
+        LocalFree(buf)
+
+
 class WatchmanError(Exception):
-    pass
+    def __init__(self, msg=None, cmd=None):
+        self.msg = msg
+        self.cmd = cmd
+
+    def setCommand(self, cmd):
+        self.cmd = cmd
+
+    def __str__(self):
+        if self.cmd:
+            return '%s, while executing %s' % (self.msg, self.cmd)
+        return self.msg
+
+
+class WatchmanEnvironmentError(WatchmanError):
+    def __init__(self, msg, errno, errmsg, cmd=None):
+        super(WatchmanEnvironmentError, self).__init__(
+            '{0}: errno={1} errmsg={2}'.format(msg, errno, errmsg),
+            cmd)
+
+
+class SocketConnectError(WatchmanError):
+    def __init__(self, sockpath, exc):
+        super(SocketConnectError, self).__init__(
+            'unable to connect to %s: %s' % (sockpath, exc))
+        self.sockpath = sockpath
+        self.exc = exc
 
 
 class SocketTimeout(WatchmanError):
@@ -151,19 +237,11 @@ class CommandError(WatchmanError):
 
     self.msg is the message returned by watchman.
     """
-
     def __init__(self, msg, cmd=None):
-        self.msg = msg
-        self.cmd = cmd
-        super(CommandError, self).__init__('watchman command error: %s' % msg)
-
-    def setCommand(self, cmd):
-        self.cmd = cmd
-
-    def __str__(self):
-        if self.cmd:
-            return '%s, while executing %s' % (self.msg, self.cmd)
-        return self.msg
+        super(CommandError, self).__init__(
+            'watchman command error: %s' % (msg, ),
+            cmd,
+        )
 
 
 class Transport(object):
@@ -195,16 +273,16 @@ class Transport(object):
 
         # Buffer may already have a line if we've received unilateral
         # response(s) from the server
-        if len(self.buf) == 1 and "\n" in self.buf[0]:
-            (line, b) = self.buf[0].split("\n", 1)
+        if len(self.buf) == 1 and b"\n" in self.buf[0]:
+            (line, b) = self.buf[0].split(b"\n", 1)
             self.buf = [b]
             return line
 
         while True:
             b = self.readBytes(4096)
-            if "\n" in b:
-                result = ''.join(self.buf)
-                (line, b) = b.split("\n", 1)
+            if b"\n" in b:
+                result = b''.join(self.buf)
+                (line, b) = b.split(b"\n", 1)
                 self.buf = [b]
                 return result + line
             self.buf.append(b)
@@ -241,8 +319,8 @@ class UnixSocketTransport(Transport):
             sock.connect(self.sockpath)
             self.sock = sock
         except socket.error as e:
-            raise WatchmanError('unable to connect to %s: %s' %
-                                (self.sockpath, e))
+            sock.close()
+            raise SocketConnectError(self.sockpath, e)
 
     def close(self):
         self.sock.close()
@@ -268,6 +346,46 @@ class UnixSocketTransport(Transport):
             raise SocketTimeout('timed out sending query command')
 
 
+def _get_overlapped_result_ex_impl(pipe, olap, nbytes, millis, alertable):
+    """ Windows 7 and earlier does not support GetOverlappedResultEx. The
+    alternative is to use GetOverlappedResult and wait for read or write
+    operation to complete. This is done be using CreateEvent and
+    WaitForSingleObjectEx. CreateEvent, WaitForSingleObjectEx
+    and GetOverlappedResult are all part of Windows API since WindowsXP.
+    This is the exact same implementation that can be found in the watchman
+    source code (see get_overlapped_result_ex_impl in stream_win.c). This
+    way, maintenance should be simplified.
+    """
+    log('Preparing to wait for maximum %dms', millis )
+    if millis != 0:
+        waitReturnCode = WaitForSingleObjectEx(olap.hEvent, millis, alertable)
+        if waitReturnCode == WAIT_OBJECT_0:
+            # Event is signaled, overlapped IO operation result should be available.
+            pass
+        elif waitReturnCode == WAIT_IO_COMPLETION:
+            # WaitForSingleObjectEx returnes because the system added an I/O completion
+            # routine or an asynchronous procedure call (APC) to the thread queue.
+            SetLastError(WAIT_IO_COMPLETION)
+            pass
+        elif waitReturnCode == WAIT_TIMEOUT:
+            # We reached the maximum allowed wait time, the IO operation failed
+            # to complete in timely fashion.
+            SetLastError(WAIT_TIMEOUT)
+            return False
+        elif waitReturnCode == WAIT_FAILED:
+            # something went wrong calling WaitForSingleObjectEx
+            err = GetLastError()
+            log('WaitForSingleObjectEx failed: %s', _win32_strerror(err))
+            return False
+        else:
+            # unexpected situation deserving investigation.
+            err = GetLastError()
+            log('Unexpected error: %s', _win32_strerror(err))
+            return False
+
+    return GetOverlappedResult(pipe, olap, nbytes, False)
+
+
 class WindowsNamedPipeTransport(Transport):
     """ connect to a named pipe """
 
@@ -284,28 +402,35 @@ class WindowsNamedPipeTransport(Transpor
             self._raise_win_err('failed to open pipe %s' % sockpath,
                                 GetLastError())
 
-    def _win32_strerror(self, err):
-        """ expand a win32 error code into a human readable message """
+        # event for the overlapped I/O operations
+        self._waitable = CreateEvent(None, True, False, None)
+        if self._waitable is None:
+            self._raise_win_err('CreateEvent failed', GetLastError())
 
-        # FormatMessage will allocate memory and assign it here
-        buf = ctypes.c_char_p()
-        FormatMessage(
-            FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_ALLOCATE_BUFFER
-            | FORMAT_MESSAGE_IGNORE_INSERTS, None, err, 0, buf, 0, None)
-        try:
-            return buf.value
-        finally:
-            LocalFree(buf)
+        self._get_overlapped_result_ex = GetOverlappedResultEx
+        if (os.getenv('WATCHMAN_WIN7_COMPAT') == '1' or
+            self._get_overlapped_result_ex is None):
+            self._get_overlapped_result_ex = _get_overlapped_result_ex_impl
 
     def _raise_win_err(self, msg, err):
         raise IOError('%s win32 error code: %d %s' %
-                      (msg, err, self._win32_strerror(err)))
+                      (msg, err, _win32_strerror(err)))
 
     def close(self):
         if self.pipe:
+            log('Closing pipe')
             CloseHandle(self.pipe)
         self.pipe = None
 
+        if self._waitable is not None:
+            # We release the handle for the event
+            CloseHandle(self._waitable)
+        self._waitable = None
+
+    def setTimeout(self, value):
+        # convert to milliseconds
+        self.timeout = int(value * 1000)
+
     def readBytes(self, size):
         """ A read can block for an unbounded amount of time, even if the
             kernel reports that the pipe handle is signalled, so we need to
@@ -325,6 +450,7 @@ class WindowsNamedPipeTransport(Transpor
         # We need to initiate a read
         buf = ctypes.create_string_buffer(size)
         olap = OVERLAPPED()
+        olap.hEvent = self._waitable
 
         log('made read buff of size %d', size)
 
@@ -339,8 +465,9 @@ class WindowsNamedPipeTransport(Transpor
                                     GetLastError())
 
         nread = wintypes.DWORD()
-        if not GetOverlappedResultEx(self.pipe, olap, nread,
-                                     0 if immediate else self.timeout, True):
+        if not self._get_overlapped_result_ex(self.pipe, olap, nread,
+                                              0 if immediate else self.timeout,
+                                              True):
             err = GetLastError()
             CancelIoEx(self.pipe, olap)
 
@@ -374,6 +501,8 @@ class WindowsNamedPipeTransport(Transpor
 
     def write(self, data):
         olap = OVERLAPPED()
+        olap.hEvent = self._waitable
+
         immediate = WriteFile(self.pipe, ctypes.c_char_p(data), len(data),
                               None, olap)
 
@@ -385,8 +514,10 @@ class WindowsNamedPipeTransport(Transpor
 
         # Obtain results, waiting if needed
         nwrote = wintypes.DWORD()
-        if GetOverlappedResultEx(self.pipe, olap, nwrote, 0 if immediate else
-                                 self.timeout, True):
+        if self._get_overlapped_result_ex(self.pipe, olap, nwrote,
+                                          0 if immediate else self.timeout,
+                                          True):
+            log('made write of %d bytes', nwrote.value)
             return nwrote.value
 
         err = GetLastError()
@@ -430,7 +561,10 @@ class CLIProcessTransport(Transport):
 
     def close(self):
         if self.proc:
-            self.proc.kill()
+            if self.proc.pid is not None:
+                self.proc.kill()
+            self.proc.stdin.close()
+            self.proc.stdout.close()
             self.proc = None
 
     def _connect(self):
@@ -438,7 +572,7 @@ class CLIProcessTransport(Transport):
             return self.proc
         args = [
             'watchman',
-            '--sockname={}'.format(self.sockpath),
+            '--sockname={0}'.format(self.sockpath),
             '--logfile=/BOGUS',
             '--statefile=/BOGUS',
             '--no-spawn',
@@ -460,8 +594,8 @@ class CLIProcessTransport(Transport):
 
     def write(self, data):
         if self.closed:
+            self.close()
             self.closed = False
-            self.proc = None
         self._connect()
         res = self.proc.stdin.write(data)
         self.proc.stdin.close()
@@ -473,21 +607,21 @@ class BserCodec(Codec):
     """ use the BSER encoding.  This is the default, preferred codec """
 
     def _loads(self, response):
-        return bser.loads(response)
+        return bser.loads(response) # Defaults to BSER v1
 
     def receive(self):
         buf = [self.transport.readBytes(sniff_len)]
         if not buf[0]:
             raise WatchmanError('empty watchman response')
 
-        elen = bser.pdu_len(buf[0])
+        _1, _2, elen = bser.pdu_info(buf[0])
 
         rlen = len(buf[0])
         while elen > rlen:
             buf.append(self.transport.readBytes(elen - rlen))
             rlen += len(buf[-1])
 
-        response = ''.join(buf)
+        response = b''.join(buf)
         try:
             res = self._loads(response)
             return res
@@ -495,7 +629,7 @@ class BserCodec(Codec):
             raise WatchmanError('watchman response decode error: %s' % e)
 
     def send(self, *args):
-        cmd = bser.dumps(*args)
+        cmd = bser.dumps(*args) # Defaults to BSER v1
         self.transport.write(cmd)
 
 
@@ -504,7 +638,64 @@ class ImmutableBserCodec(BserCodec):
         immutable object support """
 
     def _loads(self, response):
-        return bser.loads(response, False)
+        return bser.loads(response, False) # Defaults to BSER v1
+
+
+class Bser2WithFallbackCodec(BserCodec):
+    """ use BSER v2 encoding """
+
+    def __init__(self, transport):
+        super(Bser2WithFallbackCodec, self).__init__(transport)
+        # Once the server advertises support for bser-v2 we should switch this
+        # to 'required' on Python 3.
+        self.send(["version", {"optional": ["bser-v2"]}])
+
+        capabilities = self.receive()
+
+        if 'error' in capabilities:
+          raise Exception('Unsupported BSER version')
+
+        if capabilities['capabilities']['bser-v2']:
+            self.bser_version = 2
+            self.bser_capabilities = 0
+        else:
+            self.bser_version = 1
+            self.bser_capabilities = 0
+
+    def _loads(self, response):
+        return bser.loads(response)
+
+    def receive(self):
+        buf = [self.transport.readBytes(sniff_len)]
+        if not buf[0]:
+            raise WatchmanError('empty watchman response')
+
+        recv_bser_version, recv_bser_capabilities, elen = bser.pdu_info(buf[0])
+
+        if hasattr(self, 'bser_version'):
+          # Readjust BSER version and capabilities if necessary
+          self.bser_version = max(self.bser_version, recv_bser_version)
+          self.capabilities = self.bser_capabilities & recv_bser_capabilities
+
+        rlen = len(buf[0])
+        while elen > rlen:
+            buf.append(self.transport.readBytes(elen - rlen))
+            rlen += len(buf[-1])
+
+        response = b''.join(buf)
+        try:
+            res = self._loads(response)
+            return res
+        except ValueError as e:
+            raise WatchmanError('watchman response decode error: %s' % e)
+
+    def send(self, *args):
+        if hasattr(self, 'bser_version'):
+            cmd = bser.dumps(*args, version=self.bser_version,
+                capabilities=self.bser_capabilities)
+        else:
+            cmd = bser.dumps(*args)
+        self.transport.write(cmd)
 
 
 class JsonCodec(Codec):
@@ -520,6 +711,13 @@ class JsonCodec(Codec):
     def receive(self):
         line = self.transport.readLine()
         try:
+            # In Python 3, json.loads is a transformation from Unicode string to
+            # objects possibly containing Unicode strings. We typically expect
+            # the JSON blob to be ASCII-only with non-ASCII characters escaped,
+            # but it's possible we might get non-ASCII bytes that are valid
+            # UTF-8.
+            if compat.PYTHON3:
+                line = line.decode('utf-8')
             return self.json.loads(line)
         except Exception as e:
             print(e, line)
@@ -527,7 +725,12 @@ class JsonCodec(Codec):
 
     def send(self, *args):
         cmd = self.json.dumps(*args)
-        self.transport.write(cmd + "\n")
+        # In Python 3, json.dumps is a transformation from objects possibly
+        # containing Unicode strings to Unicode string. Even with (the default)
+        # ensure_ascii=True, dumps returns a Unicode string.
+        if compat.PYTHON3:
+            cmd = cmd.encode('ascii')
+        self.transport.write(cmd + b"\n")
 
 
 class client(object):
@@ -556,22 +759,27 @@ class client(object):
         self.timeout = timeout
         self.useImmutableBser = useImmutableBser
 
-        transport = transport or os.getenv('WATCHMAN_TRANSPORT') or 'local'
-        if transport == 'local' and os.name == 'nt':
-            self.transport = WindowsNamedPipeTransport
-        elif transport == 'local':
-            self.transport = UnixSocketTransport
-        elif transport == 'cli':
-            self.transport = CLIProcessTransport
-            if sendEncoding is None:
-                sendEncoding = 'json'
-            if recvEncoding is None:
-                recvEncoding = sendEncoding
+        if inspect.isclass(transport) and issubclass(transport, Transport):
+            self.transport = transport
         else:
-            raise WatchmanError('invalid transport %s' % transport)
+            transport = transport or os.getenv('WATCHMAN_TRANSPORT') or 'local'
+            if transport == 'local' and os.name == 'nt':
+                self.transport = WindowsNamedPipeTransport
+            elif transport == 'local':
+                self.transport = UnixSocketTransport
+            elif transport == 'cli':
+                self.transport = CLIProcessTransport
+                if sendEncoding is None:
+                    sendEncoding = 'json'
+                if recvEncoding is None:
+                    recvEncoding = sendEncoding
+            else:
+                raise WatchmanError('invalid transport %s' % transport)
 
-        sendEncoding = sendEncoding or os.getenv('WATCHMAN_ENCODING') or 'bser'
-        recvEncoding = recvEncoding or os.getenv('WATCHMAN_ENCODING') or 'bser'
+        sendEncoding = str(sendEncoding or os.getenv('WATCHMAN_ENCODING') or
+                           'bser')
+        recvEncoding = str(recvEncoding or os.getenv('WATCHMAN_ENCODING') or
+                           'bser')
 
         self.recvCodec = self._parseEncoding(recvEncoding)
         self.sendCodec = self._parseEncoding(sendEncoding)
@@ -581,6 +789,8 @@ class client(object):
             if self.useImmutableBser:
                 return ImmutableBserCodec
             return BserCodec
+        elif enc == 'experimental-bser-v2':
+          return Bser2WithFallbackCodec
         elif enc == 'json':
             return JsonCodec
         else:
@@ -600,10 +810,20 @@ class client(object):
 
         cmd = ['watchman', '--output-encoding=bser', 'get-sockname']
         try:
-            p = subprocess.Popen(cmd,
-                                 stdout=subprocess.PIPE,
-                                 stderr=subprocess.PIPE,
-                                 close_fds=os.name != 'nt')
+            args = dict(stdout=subprocess.PIPE,
+                        stderr=subprocess.PIPE,
+                        close_fds=os.name != 'nt')
+
+            if os.name == 'nt':
+                # if invoked via an application with graphical user interface,
+                # this call will cause a brief command window pop-up.
+                # Using the flag STARTF_USESHOWWINDOW to avoid this behavior.
+                startupinfo = subprocess.STARTUPINFO()
+                startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
+                args['startupinfo'] = startupinfo
+
+            p = subprocess.Popen(cmd, **args)
+
         except OSError as e:
             raise WatchmanError('"watchman" executable not in PATH (%s)', e)
 
@@ -614,10 +834,10 @@ class client(object):
             raise WatchmanError("watchman exited with code %d" % exitcode)
 
         result = bser.loads(stdout)
-        if 'error' in result:
+        if b'error' in result:
             raise WatchmanError('get-sockname error: %s' % result['error'])
 
-        return result['sockname']
+        return result[b'sockname']
 
     def _connect(self):
         """ establish transport connection """
@@ -660,10 +880,16 @@ class client(object):
         self._connect()
         result = self.recvConn.receive()
         if self._hasprop(result, 'error'):
-            raise CommandError(result['error'])
+            error = result['error']
+            if compat.PYTHON3 and isinstance(self.recvConn, BserCodec):
+                error = result['error'].decode('utf-8', 'surrogateescape')
+            raise CommandError(error)
 
         if self._hasprop(result, 'log'):
-            self.logs.append(result['log'])
+            log = result['log']
+            if compat.PYTHON3 and isinstance(self.recvConn, BserCodec):
+                log = log.decode('utf-8', 'surrogateescape')
+            self.logs.append(log)
 
         if self._hasprop(result, 'subscription'):
             sub = result['subscription']
@@ -682,6 +908,9 @@ class client(object):
         return result
 
     def isUnilateralResponse(self, res):
+        if 'unilateral' in res and res['unilateral']:
+            return True
+        # Fall back to checking for known unilateral responses
         for k in self.unilateral:
             if k in res:
                 return True
@@ -712,6 +941,13 @@ class client(object):
         remove processing impacts both the unscoped and scoped stores
         for the subscription data.
         """
+        if compat.PYTHON3 and issubclass(self.recvCodec, BserCodec):
+            # People may pass in Unicode strings here -- but currently BSER only
+            # returns bytestrings. Deal with that.
+            if isinstance(root, str):
+                root = encoding.encode_local(root)
+            if isinstance(name, str):
+                name = name.encode('utf-8')
 
         if root is not None:
             if not root in self.sub_by_root:
@@ -752,9 +988,17 @@ class client(object):
                 res = self.receive()
 
             return res
-        except CommandError as ex:
+        except EnvironmentError as ee:
+            # When we can depend on Python 3, we can use PEP 3134
+            # exception chaining here.
+            raise WatchmanEnvironmentError(
+                'I/O error communicating with watchman daemon',
+                ee.errno,
+                ee.strerror,
+                args)
+        except WatchmanError as ex:
             ex.setCommand(args)
-            raise ex
+            raise
 
     def capabilityCheck(self, optional=None, required=None):
         """ Perform a server capability check """
@@ -775,5 +1019,3 @@ class client(object):
     def setTimeout(self, value):
         self.recvConn.setTimeout(value)
         self.sendConn.setTimeout(value)
-
-# no-check-code -- this is a 3rd party library
diff --git a/hgext/fsmonitor/pywatchman/bser.c b/hgext/fsmonitor/pywatchman/bser.c
--- a/hgext/fsmonitor/pywatchman/bser.c
+++ b/hgext/fsmonitor/pywatchman/bser.c
@@ -29,11 +29,27 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 */
 
 #include <Python.h>
+#include <bytesobject.h>
 #ifdef _MSC_VER
 #define inline __inline
-#include "msc_stdint.h"
+#if _MSC_VER >= 1800
+#include <stdint.h>
+#else
+// The compiler associated with Python 2.7 on Windows doesn't ship
+// with stdint.h, so define the small subset that we use here.
+typedef __int8 int8_t;
+typedef __int16 int16_t;
+typedef __int32 int32_t;
+typedef __int64 int64_t;
+typedef unsigned __int8 uint8_t;
+typedef unsigned __int16 uint16_t;
+typedef unsigned __int32 uint32_t;
+typedef unsigned __int64 uint64_t;
+#define UINT32_MAX 4294967295U
+#endif
 #endif
 
+// clang-format off
 /* Return the smallest size int that can store the value */
 #define INT_SIZE(x) (((x) == ((int8_t)x))  ? 1 :    \
                      ((x) == ((int16_t)x)) ? 2 :    \
@@ -41,7 +57,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 
 #define BSER_ARRAY     0x00
 #define BSER_OBJECT    0x01
-#define BSER_STRING    0x02
+#define BSER_BYTESTRING 0x02
 #define BSER_INT8      0x03
 #define BSER_INT16     0x04
 #define BSER_INT32     0x05
@@ -52,6 +68,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 #define BSER_NULL      0x0a
 #define BSER_TEMPLATE  0x0b
 #define BSER_SKIP      0x0c
+#define BSER_UTF8STRING 0x0d
+// clang-format on
 
 // An immutable object representation of BSER_OBJECT.
 // Rather than build a hash table, key -> value are obtained
@@ -64,24 +82,27 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 // approach, this is still faster for the mercurial use case
 // as it helps to eliminate creating N other objects to
 // represent the stat information in the hgwatchman extension
+// clang-format off
 typedef struct {
   PyObject_HEAD
   PyObject *keys;   // tuple of field names
   PyObject *values; // tuple of values
 } bserObject;
+// clang-format on
 
-static Py_ssize_t bserobj_tuple_length(PyObject *o) {
-  bserObject *obj = (bserObject*)o;
+static Py_ssize_t bserobj_tuple_length(PyObject* o) {
+  bserObject* obj = (bserObject*)o;
 
   return PySequence_Length(obj->keys);
 }
 
-static PyObject *bserobj_tuple_item(PyObject *o, Py_ssize_t i) {
-  bserObject *obj = (bserObject*)o;
+static PyObject* bserobj_tuple_item(PyObject* o, Py_ssize_t i) {
+  bserObject* obj = (bserObject*)o;
 
   return PySequence_GetItem(obj->values, i);
 }
 
+// clang-format off
 static PySequenceMethods bserobj_sq = {
   bserobj_tuple_length,      /* sq_length */
   0,                         /* sq_concat */
@@ -92,49 +113,72 @@ static PySequenceMethods bserobj_sq = {
   0,                         /* sq_inplace_concat */
   0                          /* sq_inplace_repeat */
 };
+// clang-format on
 
-static void bserobj_dealloc(PyObject *o) {
-  bserObject *obj = (bserObject*)o;
+static void bserobj_dealloc(PyObject* o) {
+  bserObject* obj = (bserObject*)o;
 
   Py_CLEAR(obj->keys);
   Py_CLEAR(obj->values);
   PyObject_Del(o);
 }
 
-static PyObject *bserobj_getattrro(PyObject *o, PyObject *name) {
-  bserObject *obj = (bserObject*)o;
+static PyObject* bserobj_getattrro(PyObject* o, PyObject* name) {
+  bserObject* obj = (bserObject*)o;
   Py_ssize_t i, n;
-  const char *namestr;
+  PyObject* name_bytes = NULL;
+  PyObject* ret = NULL;
+  const char* namestr;
 
   if (PyIndex_Check(name)) {
     i = PyNumber_AsSsize_t(name, PyExc_IndexError);
     if (i == -1 && PyErr_Occurred()) {
-      return NULL;
+      goto bail;
     }
-    return PySequence_GetItem(obj->values, i);
+    ret = PySequence_GetItem(obj->values, i);
+    goto bail;
   }
 
+  // We can be passed in Unicode objects here -- we don't support anything other
+  // than UTF-8 for keys.
+  if (PyUnicode_Check(name)) {
+    name_bytes = PyUnicode_AsUTF8String(name);
+    if (name_bytes == NULL) {
+      goto bail;
+    }
+    namestr = PyBytes_AsString(name_bytes);
+  } else {
+    namestr = PyBytes_AsString(name);
+  }
+
+  if (namestr == NULL) {
+    goto bail;
+  }
   // hack^Wfeature to allow mercurial to use "st_size" to reference "size"
-  namestr = PyString_AsString(name);
   if (!strncmp(namestr, "st_", 3)) {
     namestr += 3;
   }
 
   n = PyTuple_GET_SIZE(obj->keys);
   for (i = 0; i < n; i++) {
-    const char *item_name = NULL;
-    PyObject *key = PyTuple_GET_ITEM(obj->keys, i);
+    const char* item_name = NULL;
+    PyObject* key = PyTuple_GET_ITEM(obj->keys, i);
 
-    item_name = PyString_AsString(key);
+    item_name = PyBytes_AsString(key);
     if (!strcmp(item_name, namestr)) {
-      return PySequence_GetItem(obj->values, i);
+      ret = PySequence_GetItem(obj->values, i);
+      goto bail;
     }
   }
-  PyErr_Format(PyExc_AttributeError,
-              "bserobject has no attribute '%.400s'", namestr);
-  return NULL;
+
+  PyErr_Format(
+      PyExc_AttributeError, "bserobject has no attribute '%.400s'", namestr);
+bail:
+  Py_XDECREF(name_bytes);
+  return ret;
 }
 
+// clang-format off
 static PyMappingMethods bserobj_map = {
   bserobj_tuple_length,     /* mp_length */
   bserobj_getattrro,        /* mp_subscript */
@@ -181,20 +225,27 @@ PyTypeObject bserObjectType = {
   0,                         /* tp_alloc */
   0,                         /* tp_new */
 };
-
+// clang-format on
 
-static PyObject *bser_loads_recursive(const char **ptr, const char *end,
-    int mutable);
+typedef struct loads_ctx {
+  int mutable;
+  const char* value_encoding;
+  const char* value_errors;
+  uint32_t bser_version;
+  uint32_t bser_capabilities;
+} unser_ctx_t;
+
+static PyObject*
+bser_loads_recursive(const char** ptr, const char* end, const unser_ctx_t* ctx);
 
 static const char bser_true = BSER_TRUE;
 static const char bser_false = BSER_FALSE;
 static const char bser_null = BSER_NULL;
-static const char bser_string_hdr = BSER_STRING;
+static const char bser_bytestring_hdr = BSER_BYTESTRING;
 static const char bser_array_hdr = BSER_ARRAY;
 static const char bser_object_hdr = BSER_OBJECT;
 
-static inline uint32_t next_power_2(uint32_t n)
-{
+static inline uint32_t next_power_2(uint32_t n) {
   n |= (n >> 16);
   n |= (n >> 8);
   n |= (n >> 4);
@@ -205,16 +256,17 @@ static inline uint32_t next_power_2(uint
 
 // A buffer we use for building up the serialized result
 struct bser_buffer {
-  char *buf;
+  char* buf;
   int wpos, allocd;
+  uint32_t bser_version;
+  uint32_t capabilities;
 };
 typedef struct bser_buffer bser_t;
 
-static int bser_append(bser_t *bser, const char *data, uint32_t len)
-{
+static int bser_append(bser_t* bser, const char* data, uint32_t len) {
   int newlen = next_power_2(bser->wpos + len);
   if (newlen > bser->allocd) {
-    char *nbuf = realloc(bser->buf, newlen);
+    char* nbuf = realloc(bser->buf, newlen);
     if (!nbuf) {
       return 0;
     }
@@ -228,40 +280,46 @@ static int bser_append(bser_t *bser, con
   return 1;
 }
 
-static int bser_init(bser_t *bser)
-{
+static int bser_init(bser_t* bser, uint32_t version, uint32_t capabilities) {
   bser->allocd = 8192;
   bser->wpos = 0;
   bser->buf = malloc(bser->allocd);
-
+  bser->bser_version = version;
+  bser->capabilities = capabilities;
   if (!bser->buf) {
     return 0;
   }
 
-  // Leave room for the serialization header, which includes
-  // our overall length.  To make things simpler, we'll use an
-  // int32 for the header
+// Leave room for the serialization header, which includes
+// our overall length.  To make things simpler, we'll use an
+// int32 for the header
 #define EMPTY_HEADER "\x00\x01\x05\x00\x00\x00\x00"
-  bser_append(bser, EMPTY_HEADER, sizeof(EMPTY_HEADER)-1);
+
+// Version 2 also carries an integer indicating the capabilities. The
+// capabilities integer comes before the PDU size.
+#define EMPTY_HEADER_V2 "\x00\x02\x00\x00\x00\x00\x05\x00\x00\x00\x00"
+  if (version == 2) {
+    bser_append(bser, EMPTY_HEADER_V2, sizeof(EMPTY_HEADER_V2) - 1);
+  } else {
+    bser_append(bser, EMPTY_HEADER, sizeof(EMPTY_HEADER) - 1);
+  }
 
   return 1;
 }
 
-static void bser_dtor(bser_t *bser)
-{
+static void bser_dtor(bser_t* bser) {
   free(bser->buf);
   bser->buf = NULL;
 }
 
-static int bser_long(bser_t *bser, int64_t val)
-{
+static int bser_long(bser_t* bser, int64_t val) {
   int8_t i8;
   int16_t i16;
   int32_t i32;
   int64_t i64;
   char sz;
   int size = INT_SIZE(val);
-  char *iptr;
+  char* iptr;
 
   switch (size) {
     case 1:
@@ -285,8 +343,7 @@ static int bser_long(bser_t *bser, int64
       iptr = (char*)&i64;
       break;
     default:
-      PyErr_SetString(PyExc_RuntimeError,
-          "Cannot represent this long value!?");
+      PyErr_SetString(PyExc_RuntimeError, "Cannot represent this long value!?");
       return 0;
   }
 
@@ -297,25 +354,24 @@ static int bser_long(bser_t *bser, int64
   return bser_append(bser, iptr, size);
 }
 
-static int bser_string(bser_t *bser, PyObject *sval)
-{
-  char *buf = NULL;
+static int bser_bytestring(bser_t* bser, PyObject* sval) {
+  char* buf = NULL;
   Py_ssize_t len;
   int res;
-  PyObject *utf = NULL;
+  PyObject* utf = NULL;
 
   if (PyUnicode_Check(sval)) {
     utf = PyUnicode_AsEncodedString(sval, "utf-8", "ignore");
     sval = utf;
   }
 
-  res = PyString_AsStringAndSize(sval, &buf, &len);
+  res = PyBytes_AsStringAndSize(sval, &buf, &len);
   if (res == -1) {
     res = 0;
     goto out;
   }
 
-  if (!bser_append(bser, &bser_string_hdr, sizeof(bser_string_hdr))) {
+  if (!bser_append(bser, &bser_bytestring_hdr, sizeof(bser_bytestring_hdr))) {
     res = 0;
     goto out;
   }
@@ -341,8 +397,7 @@ out:
   return res;
 }
 
-static int bser_recursive(bser_t *bser, PyObject *val)
-{
+static int bser_recursive(bser_t* bser, PyObject* val) {
   if (PyBool_Check(val)) {
     if (val == Py_True) {
       return bser_append(bser, &bser_true, sizeof(bser_true));
@@ -354,19 +409,21 @@ static int bser_recursive(bser_t *bser, 
     return bser_append(bser, &bser_null, sizeof(bser_null));
   }
 
+// Python 3 has one integer type.
+#if PY_MAJOR_VERSION < 3
   if (PyInt_Check(val)) {
     return bser_long(bser, PyInt_AS_LONG(val));
   }
+#endif // PY_MAJOR_VERSION < 3
 
   if (PyLong_Check(val)) {
     return bser_long(bser, PyLong_AsLongLong(val));
   }
 
-  if (PyString_Check(val) || PyUnicode_Check(val)) {
-    return bser_string(bser, val);
+  if (PyBytes_Check(val) || PyUnicode_Check(val)) {
+    return bser_bytestring(bser, val);
   }
 
-
   if (PyFloat_Check(val)) {
     double dval = PyFloat_AS_DOUBLE(val);
     char sz = BSER_REAL;
@@ -390,7 +447,7 @@ static int bser_recursive(bser_t *bser, 
     }
 
     for (i = 0; i < len; i++) {
-      PyObject *ele = PyList_GET_ITEM(val, i);
+      PyObject* ele = PyList_GET_ITEM(val, i);
 
       if (!bser_recursive(bser, ele)) {
         return 0;
@@ -412,7 +469,7 @@ static int bser_recursive(bser_t *bser, 
     }
 
     for (i = 0; i < len; i++) {
-      PyObject *ele = PyTuple_GET_ITEM(val, i);
+      PyObject* ele = PyTuple_GET_ITEM(val, i);
 
       if (!bser_recursive(bser, ele)) {
         return 0;
@@ -436,7 +493,7 @@ static int bser_recursive(bser_t *bser, 
     }
 
     while (PyDict_Next(val, &pos, &key, &ele)) {
-      if (!bser_string(bser, key)) {
+      if (!bser_bytestring(bser, key)) {
         return 0;
       }
       if (!bser_recursive(bser, ele)) {
@@ -451,17 +508,25 @@ static int bser_recursive(bser_t *bser, 
   return 0;
 }
 
-static PyObject *bser_dumps(PyObject *self, PyObject *args)
-{
+static PyObject* bser_dumps(PyObject* self, PyObject* args, PyObject* kw) {
   PyObject *val = NULL, *res;
   bser_t bser;
-  uint32_t len;
+  uint32_t len, bser_version = 1, bser_capabilities = 0;
+
+  static char* kw_list[] = {"val", "version", "capabilities", NULL};
 
-  if (!PyArg_ParseTuple(args, "O", &val)) {
+  if (!PyArg_ParseTupleAndKeywords(
+          args,
+          kw,
+          "O|ii:dumps",
+          kw_list,
+          &val,
+          &bser_version,
+          &bser_capabilities)) {
     return NULL;
   }
 
-  if (!bser_init(&bser)) {
+  if (!bser_init(&bser, bser_version, bser_capabilities)) {
     return PyErr_NoMemory();
   }
 
@@ -475,19 +540,25 @@ static PyObject *bser_dumps(PyObject *se
   }
 
   // Now fill in the overall length
-  len = bser.wpos - (sizeof(EMPTY_HEADER) - 1);
-  memcpy(bser.buf + 3, &len, sizeof(len));
+  if (bser_version == 1) {
+    len = bser.wpos - (sizeof(EMPTY_HEADER) - 1);
+    memcpy(bser.buf + 3, &len, sizeof(len));
+  } else {
+    len = bser.wpos - (sizeof(EMPTY_HEADER_V2) - 1);
+    // The BSER capabilities block comes before the PDU length
+    memcpy(bser.buf + 2, &bser_capabilities, sizeof(bser_capabilities));
+    memcpy(bser.buf + 7, &len, sizeof(len));
+  }
 
-  res = PyString_FromStringAndSize(bser.buf, bser.wpos);
+  res = PyBytes_FromStringAndSize(bser.buf, bser.wpos);
   bser_dtor(&bser);
 
   return res;
 }
 
-int bunser_int(const char **ptr, const char *end, int64_t *val)
-{
+int bunser_int(const char** ptr, const char* end, int64_t* val) {
   int needed;
-  const char *buf = *ptr;
+  const char* buf = *ptr;
   int8_t i8;
   int16_t i16;
   int32_t i32;
@@ -507,8 +578,8 @@ int bunser_int(const char **ptr, const c
       needed = 9;
       break;
     default:
-      PyErr_Format(PyExc_ValueError,
-          "invalid bser int encoding 0x%02x", buf[0]);
+      PyErr_Format(
+          PyExc_ValueError, "invalid bser int encoding 0x%02x", buf[0]);
       return 0;
   }
   if (end - buf < needed) {
@@ -538,10 +609,12 @@ int bunser_int(const char **ptr, const c
   }
 }
 
-static int bunser_string(const char **ptr, const char *end,
-    const char **start, int64_t *len)
-{
-  const char *buf = *ptr;
+static int bunser_bytestring(
+    const char** ptr,
+    const char* end,
+    const char** start,
+    int64_t* len) {
+  const char* buf = *ptr;
 
   // skip string marker
   buf++;
@@ -559,11 +632,12 @@ static int bunser_string(const char **pt
   return 1;
 }
 
-static PyObject *bunser_array(const char **ptr, const char *end, int mutable)
-{
-  const char *buf = *ptr;
+static PyObject*
+bunser_array(const char** ptr, const char* end, const unser_ctx_t* ctx) {
+  const char* buf = *ptr;
   int64_t nitems, i;
-  PyObject *res;
+  int mutable = ctx->mutable;
+  PyObject* res;
 
   // skip array header
   buf++;
@@ -584,7 +658,7 @@ static PyObject *bunser_array(const char
   }
 
   for (i = 0; i < nitems; i++) {
-    PyObject *ele = bser_loads_recursive(ptr, end, mutable);
+    PyObject* ele = bser_loads_recursive(ptr, end, ctx);
 
     if (!ele) {
       Py_DECREF(res);
@@ -602,13 +676,13 @@ static PyObject *bunser_array(const char
   return res;
 }
 
-static PyObject *bunser_object(const char **ptr, const char *end,
-    int mutable)
-{
-  const char *buf = *ptr;
+static PyObject*
+bunser_object(const char** ptr, const char* end, const unser_ctx_t* ctx) {
+  const char* buf = *ptr;
   int64_t nitems, i;
-  PyObject *res;
-  bserObject *obj;
+  int mutable = ctx->mutable;
+  PyObject* res;
+  bserObject* obj;
 
   // skip array header
   buf++;
@@ -627,12 +701,12 @@ static PyObject *bunser_object(const cha
   }
 
   for (i = 0; i < nitems; i++) {
-    const char *keystr;
+    const char* keystr;
     int64_t keylen;
-    PyObject *key;
-    PyObject *ele;
+    PyObject* key;
+    PyObject* ele;
 
-    if (!bunser_string(ptr, end, &keystr, &keylen)) {
+    if (!bunser_bytestring(ptr, end, &keystr, &keylen)) {
       Py_DECREF(res);
       return NULL;
     }
@@ -643,13 +717,24 @@ static PyObject *bunser_object(const cha
       return NULL;
     }
 
-    key = PyString_FromStringAndSize(keystr, (Py_ssize_t)keylen);
+    if (mutable) {
+      // This will interpret the key as UTF-8.
+      key = PyUnicode_FromStringAndSize(keystr, (Py_ssize_t)keylen);
+    } else {
+      // For immutable objects we'll manage key lookups, so we can avoid going
+      // through the Unicode APIs. This avoids a potentially expensive and
+      // definitely unnecessary conversion to UTF-16 and back for Python 2.
+      // TODO: On Python 3 the Unicode APIs are smarter: we might be able to use
+      // Unicode keys there without an appreciable performance loss.
+      key = PyBytes_FromStringAndSize(keystr, (Py_ssize_t)keylen);
+    }
+
     if (!key) {
       Py_DECREF(res);
       return NULL;
     }
 
-    ele = bser_loads_recursive(ptr, end, mutable);
+    ele = bser_loads_recursive(ptr, end, ctx);
 
     if (!ele) {
       Py_DECREF(key);
@@ -671,14 +756,24 @@ static PyObject *bunser_object(const cha
   return res;
 }
 
-static PyObject *bunser_template(const char **ptr, const char *end,
-    int mutable)
-{
-  const char *buf = *ptr;
+static PyObject*
+bunser_template(const char** ptr, const char* end, const unser_ctx_t* ctx) {
+  const char* buf = *ptr;
   int64_t nitems, i;
-  PyObject *arrval;
-  PyObject *keys;
+  int mutable = ctx->mutable;
+  PyObject* arrval;
+  PyObject* keys;
   Py_ssize_t numkeys, keyidx;
+  unser_ctx_t keys_ctx = {0};
+  if (mutable) {
+    keys_ctx.mutable = 1;
+    // Decode keys as UTF-8 in this case.
+    keys_ctx.value_encoding = "utf-8";
+    keys_ctx.value_errors = "strict";
+  } else {
+    // Treat keys as bytestrings in this case -- we'll do Unicode conversions at
+    // lookup time.
+  }
 
   if (buf[1] != BSER_ARRAY) {
     PyErr_Format(PyExc_ValueError, "Expect ARRAY to follow TEMPLATE");
@@ -689,8 +784,9 @@ static PyObject *bunser_template(const c
   buf++;
   *ptr = buf;
 
-  // Load template keys
-  keys = bunser_array(ptr, end, mutable);
+  // Load template keys.
+  // For keys we don't want to do any decoding right now.
+  keys = bunser_array(ptr, end, &keys_ctx);
   if (!keys) {
     return NULL;
   }
@@ -716,8 +812,8 @@ static PyObject *bunser_template(const c
   }
 
   for (i = 0; i < nitems; i++) {
-    PyObject *dict = NULL;
-    bserObject *obj = NULL;
+    PyObject* dict = NULL;
+    bserObject* obj = NULL;
 
     if (mutable) {
       dict = PyDict_New();
@@ -731,22 +827,22 @@ static PyObject *bunser_template(const c
       dict = (PyObject*)obj;
     }
     if (!dict) {
-fail:
+    fail:
       Py_DECREF(keys);
       Py_DECREF(arrval);
       return NULL;
     }
 
     for (keyidx = 0; keyidx < numkeys; keyidx++) {
-      PyObject *key;
-      PyObject *ele;
+      PyObject* key;
+      PyObject* ele;
 
       if (**ptr == BSER_SKIP) {
         *ptr = *ptr + 1;
         ele = Py_None;
         Py_INCREF(ele);
       } else {
-        ele = bser_loads_recursive(ptr, end, mutable);
+        ele = bser_loads_recursive(ptr, end, ctx);
       }
 
       if (!ele) {
@@ -772,34 +868,38 @@ fail:
   return arrval;
 }
 
-static PyObject *bser_loads_recursive(const char **ptr, const char *end,
-    int mutable)
-{
-  const char *buf = *ptr;
+static PyObject* bser_loads_recursive(
+    const char** ptr,
+    const char* end,
+    const unser_ctx_t* ctx) {
+  const char* buf = *ptr;
 
   switch (buf[0]) {
     case BSER_INT8:
     case BSER_INT16:
     case BSER_INT32:
-    case BSER_INT64:
-      {
-        int64_t ival;
-        if (!bunser_int(ptr, end, &ival)) {
-          return NULL;
-        }
-        if (ival < LONG_MIN || ival > LONG_MAX) {
-          return PyLong_FromLongLong(ival);
-        }
-        return PyInt_FromSsize_t(Py_SAFE_DOWNCAST(ival, int64_t, Py_ssize_t));
+    case BSER_INT64: {
+      int64_t ival;
+      if (!bunser_int(ptr, end, &ival)) {
+        return NULL;
       }
+// Python 3 has one integer type.
+#if PY_MAJOR_VERSION >= 3
+      return PyLong_FromLongLong(ival);
+#else
+      if (ival < LONG_MIN || ival > LONG_MAX) {
+        return PyLong_FromLongLong(ival);
+      }
+      return PyInt_FromSsize_t(Py_SAFE_DOWNCAST(ival, int64_t, Py_ssize_t));
+#endif // PY_MAJOR_VERSION >= 3
+    }
 
-    case BSER_REAL:
-      {
-        double dval;
-        memcpy(&dval, buf + 1, sizeof(dval));
-        *ptr = buf + 1 + sizeof(double);
-        return PyFloat_FromDouble(dval);
-      }
+    case BSER_REAL: {
+      double dval;
+      memcpy(&dval, buf + 1, sizeof(dval));
+      *ptr = buf + 1 + sizeof(double);
+      return PyFloat_FromDouble(dval);
+    }
 
     case BSER_TRUE:
       *ptr = buf + 1;
@@ -816,31 +916,51 @@ static PyObject *bser_loads_recursive(co
       Py_INCREF(Py_None);
       return Py_None;
 
-    case BSER_STRING:
-      {
-        const char *start;
-        int64_t len;
+    case BSER_BYTESTRING: {
+      const char* start;
+      int64_t len;
 
-        if (!bunser_string(ptr, end, &start, &len)) {
-          return NULL;
-        }
+      if (!bunser_bytestring(ptr, end, &start, &len)) {
+        return NULL;
+      }
 
-        if (len > LONG_MAX) {
-          PyErr_Format(PyExc_ValueError, "string too long for python");
-          return NULL;
-        }
-
-        return PyString_FromStringAndSize(start, (long)len);
+      if (len > LONG_MAX) {
+        PyErr_Format(PyExc_ValueError, "string too long for python");
+        return NULL;
       }
 
+      if (ctx->value_encoding != NULL) {
+        return PyUnicode_Decode(
+            start, (long)len, ctx->value_encoding, ctx->value_errors);
+      } else {
+        return PyBytes_FromStringAndSize(start, (long)len);
+      }
+    }
+
+    case BSER_UTF8STRING: {
+      const char* start;
+      int64_t len;
+
+      if (!bunser_bytestring(ptr, end, &start, &len)) {
+        return NULL;
+      }
+
+      if (len > LONG_MAX) {
+        PyErr_Format(PyExc_ValueError, "string too long for python");
+        return NULL;
+      }
+
+      return PyUnicode_Decode(start, (long)len, "utf-8", "strict");
+    }
+
     case BSER_ARRAY:
-      return bunser_array(ptr, end, mutable);
+      return bunser_array(ptr, end, ctx);
 
     case BSER_OBJECT:
-      return bunser_object(ptr, end, mutable);
+      return bunser_object(ptr, end, ctx);
 
     case BSER_TEMPLATE:
-      return bunser_template(ptr, end, mutable);
+      return bunser_template(ptr, end, ctx);
 
     default:
       PyErr_Format(PyExc_ValueError, "unhandled bser opcode 0x%02x", buf[0]);
@@ -849,102 +969,244 @@ static PyObject *bser_loads_recursive(co
   return NULL;
 }
 
-// Expected use case is to read a packet from the socket and
-// then call bser.pdu_len on the packet.  It returns the total
-// length of the entire response that the peer is sending,
-// including the bytes already received.  This allows the client
-// to compute the data size it needs to read before it can
-// decode the data
-static PyObject *bser_pdu_len(PyObject *self, PyObject *args)
-{
-  const char *start = NULL;
-  const char *data = NULL;
-  int datalen = 0;
-  const char *end;
-  int64_t expected_len, total_len;
+static int _pdu_info_helper(
+    const char* data,
+    const char* end,
+    uint32_t* bser_version_out,
+    uint32_t* bser_capabilities_out,
+    int64_t* expected_len_out,
+    off_t* position_out) {
+  uint32_t bser_version;
+  uint32_t bser_capabilities = 0;
+  int64_t expected_len;
 
-  if (!PyArg_ParseTuple(args, "s#", &start, &datalen)) {
-    return NULL;
-  }
-  data = start;
-  end = data + datalen;
-
+  const char* start;
+  start = data;
   // Validate the header and length
-  if (memcmp(data, EMPTY_HEADER, 2) != 0) {
+  if (memcmp(data, EMPTY_HEADER, 2) == 0) {
+    bser_version = 1;
+  } else if (memcmp(data, EMPTY_HEADER_V2, 2) == 0) {
+    bser_version = 2;
+  } else {
     PyErr_SetString(PyExc_ValueError, "invalid bser header");
-    return NULL;
+    return 0;
   }
 
   data += 2;
 
+  if (bser_version == 2) {
+    // Expect an integer telling us what capabilities are supported by the
+    // remote server (currently unused).
+    if (!memcpy(&bser_capabilities, &data, sizeof(bser_capabilities))) {
+      return 0;
+    }
+    data += sizeof(bser_capabilities);
+  }
+
   // Expect an integer telling us how big the rest of the data
   // should be
   if (!bunser_int(&data, end, &expected_len)) {
+    return 0;
+  }
+
+  *bser_version_out = bser_version;
+  *bser_capabilities_out = (uint32_t)bser_capabilities;
+  *expected_len_out = expected_len;
+  *position_out = (off_t)(data - start);
+  return 1;
+}
+
+// This function parses the PDU header and provides info about the packet
+// Returns false if unsuccessful
+static int pdu_info_helper(
+    PyObject* self,
+    PyObject* args,
+    uint32_t* bser_version_out,
+    uint32_t* bser_capabilities_out,
+    int64_t* total_len_out) {
+  const char* start = NULL;
+  const char* data = NULL;
+  int datalen = 0;
+  const char* end;
+  int64_t expected_len;
+  off_t position;
+
+  if (!PyArg_ParseTuple(args, "s#", &start, &datalen)) {
+    return 0;
+  }
+  data = start;
+  end = data + datalen;
+
+  if (!_pdu_info_helper(
+          data,
+          end,
+          bser_version_out,
+          bser_capabilities_out,
+          &expected_len,
+          &position)) {
+    return 0;
+  }
+  *total_len_out = (int64_t)(expected_len + position);
+  return 1;
+}
+
+// Expected use case is to read a packet from the socket and then call
+// bser.pdu_info on the packet.  It returns the BSER version, BSER capabilities,
+// and the total length of the entire response that the peer is sending,
+// including the bytes already received. This allows the client  to compute the
+// data size it needs to read before it can decode the data.
+static PyObject* bser_pdu_info(PyObject* self, PyObject* args) {
+  uint32_t version, capabilities;
+  int64_t total_len;
+  if (!pdu_info_helper(self, args, &version, &capabilities, &total_len)) {
+    return NULL;
+  }
+  return Py_BuildValue("kkL", version, capabilities, total_len);
+}
+
+static PyObject* bser_pdu_len(PyObject* self, PyObject* args) {
+  uint32_t version, capabilities;
+  int64_t total_len;
+  if (!pdu_info_helper(self, args, &version, &capabilities, &total_len)) {
+    return NULL;
+  }
+  return Py_BuildValue("L", total_len);
+}
+
+static PyObject* bser_loads(PyObject* self, PyObject* args, PyObject* kw) {
+  const char* data = NULL;
+  int datalen = 0;
+  const char* start;
+  const char* end;
+  int64_t expected_len;
+  off_t position;
+  PyObject* mutable_obj = NULL;
+  const char* value_encoding = NULL;
+  const char* value_errors = NULL;
+  unser_ctx_t ctx = {1, 0};
+
+  static char* kw_list[] = {
+      "buf", "mutable", "value_encoding", "value_errors", NULL};
+
+  if (!PyArg_ParseTupleAndKeywords(
+          args,
+          kw,
+          "s#|Ozz:loads",
+          kw_list,
+          &start,
+          &datalen,
+          &mutable_obj,
+          &value_encoding,
+          &value_errors)) {
     return NULL;
   }
 
-  total_len = expected_len + (data - start);
-  if (total_len > LONG_MAX) {
-    return PyLong_FromLongLong(total_len);
+  if (mutable_obj) {
+    ctx.mutable = PyObject_IsTrue(mutable_obj) > 0 ? 1 : 0;
   }
-  return PyInt_FromLong((long)total_len);
-}
-
-static PyObject *bser_loads(PyObject *self, PyObject *args)
-{
-  const char *data = NULL;
-  int datalen = 0;
-  const char *end;
-  int64_t expected_len;
-  int mutable = 1;
-  PyObject *mutable_obj = NULL;
-
-  if (!PyArg_ParseTuple(args, "s#|O:loads", &data, &datalen, &mutable_obj)) {
-    return NULL;
+  ctx.value_encoding = value_encoding;
+  if (value_encoding == NULL) {
+    ctx.value_errors = NULL;
+  } else if (value_errors == NULL) {
+    ctx.value_errors = "strict";
+  } else {
+    ctx.value_errors = value_errors;
   }
-  if (mutable_obj) {
-    mutable = PyObject_IsTrue(mutable_obj) > 0 ? 1 : 0;
-  }
-
+  data = start;
   end = data + datalen;
 
-  // Validate the header and length
-  if (memcmp(data, EMPTY_HEADER, 2) != 0) {
-    PyErr_SetString(PyExc_ValueError, "invalid bser header");
+  if (!_pdu_info_helper(
+          data,
+          end,
+          &ctx.bser_version,
+          &ctx.bser_capabilities,
+          &expected_len,
+          &position)) {
     return NULL;
   }
 
-  data += 2;
-
-  // Expect an integer telling us how big the rest of the data
-  // should be
-  if (!bunser_int(&data, end, &expected_len)) {
-    return NULL;
-  }
-
+  data = start + position;
   // Verify
   if (expected_len + data != end) {
     PyErr_SetString(PyExc_ValueError, "bser data len != header len");
     return NULL;
   }
 
-  return bser_loads_recursive(&data, end, mutable);
+  return bser_loads_recursive(&data, end, &ctx);
 }
 
+static PyObject* bser_load(PyObject* self, PyObject* args, PyObject* kw) {
+  PyObject *load, *string;
+  PyObject* fp = NULL;
+  PyObject* mutable_obj = NULL;
+  const char* value_encoding = NULL;
+  const char* value_errors = NULL;
+
+  static char* kw_list[] = {
+      "fp", "mutable", "value_encoding", "value_errors", NULL};
+
+  if (!PyArg_ParseTupleAndKeywords(
+          args,
+          kw,
+          "OOzz:load",
+          kw_list,
+          &fp,
+          &mutable_obj,
+          &value_encoding,
+          &value_errors)) {
+    return NULL;
+  }
+
+  load = PyImport_ImportModule("pywatchman.load");
+  if (load == NULL) {
+    return NULL;
+  }
+  string = PyObject_CallMethod(
+      load, "load", "OOzz", fp, mutable_obj, value_encoding, value_errors);
+  Py_DECREF(load);
+  return string;
+}
+
+// clang-format off
 static PyMethodDef bser_methods[] = {
-  {"loads",  bser_loads, METH_VARARGS, "Deserialize string."},
-  {"pdu_len", bser_pdu_len, METH_VARARGS, "Extract PDU length."},
-  {"dumps",  bser_dumps, METH_VARARGS, "Serialize string."},
+  {"loads", (PyCFunction)bser_loads, METH_VARARGS | METH_KEYWORDS,
+   "Deserialize string."},
+  {"load", (PyCFunction)bser_load, METH_VARARGS | METH_KEYWORDS,
+   "Deserialize a file object"},
+  {"pdu_info", (PyCFunction)bser_pdu_info, METH_VARARGS,
+   "Extract PDU information."},
+  {"pdu_len", (PyCFunction)bser_pdu_len, METH_VARARGS,
+   "Extract total PDU length."},
+  {"dumps",  (PyCFunction)bser_dumps, METH_VARARGS | METH_KEYWORDS,
+   "Serialize string."},
   {NULL, NULL, 0, NULL}
 };
 
-PyMODINIT_FUNC initbser(void)
-{
+#if PY_MAJOR_VERSION >= 3
+static struct PyModuleDef bser_module = {
+  PyModuleDef_HEAD_INIT,
+  "bser",
+  "Efficient encoding and decoding of BSER.",
+  -1,
+  bser_methods
+};
+// clang-format on
+
+PyMODINIT_FUNC PyInit_bser(void) {
+  PyObject* mod;
+
+  mod = PyModule_Create(&bser_module);
+  PyType_Ready(&bserObjectType);
+
+  return mod;
+}
+#else
+
+PyMODINIT_FUNC initbser(void) {
   (void)Py_InitModule("bser", bser_methods);
   PyType_Ready(&bserObjectType);
 }
+#endif // PY_MAJOR_VERSION >= 3
 
 /* vim:ts=2:sw=2:et:
  */
-
-// no-check-code -- this is a 3rd party library
diff --git a/hgext/fsmonitor/pywatchman/capabilities.py b/hgext/fsmonitor/pywatchman/capabilities.py
--- a/hgext/fsmonitor/pywatchman/capabilities.py
+++ b/hgext/fsmonitor/pywatchman/capabilities.py
@@ -26,6 +26,11 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+# no unicode literals
+
 import re
 
 def parse_version(vstr):
@@ -65,5 +70,3 @@ def synthesize(vers, opts):
             vers['error'] = 'client required capability `' + name + \
                             '` is not supported by this server'
     return vers
-
-# no-check-code -- this is a 3rd party library
diff --git a/hgext/fsmonitor/pywatchman/compat.py b/hgext/fsmonitor/pywatchman/compat.py
new file mode 100644
--- /dev/null
+++ b/hgext/fsmonitor/pywatchman/compat.py
@@ -0,0 +1,65 @@
+# Copyright 2016-present Facebook, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+#  * Redistributions of source code must retain the above copyright notice,
+#    this list of conditions and the following disclaimer.
+#
+#  * Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+#  * Neither the name Facebook nor the names of its contributors may be used to
+#    endorse or promote products derived from this software without specific
+#    prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+# no unicode literals
+
+'''Compatibility module across Python 2 and 3.'''
+
+import sys
+
+PYTHON3 = sys.version_info >= (3, 0)
+
+# This is adapted from https://bitbucket.org/gutworth/six, and used under the
+# MIT license. See LICENSE for a full copyright notice.
+if PYTHON3:
+    def reraise(tp, value, tb=None):
+        try:
+            if value is None:
+                value = tp()
+            if value.__traceback__ is not tb:
+                raise value.with_traceback(tb)
+            raise value
+        finally:
+            value = None
+            tb = None
+else:
+    exec('''
+def reraise(tp, value, tb=None):
+    try:
+        raise tp, value, tb
+    finally:
+        tb = None
+'''.strip())
+
+if PYTHON3:
+    UNICODE = str
+else:
+    UNICODE = unicode
diff --git a/hgext/fsmonitor/pywatchman/encoding.py b/hgext/fsmonitor/pywatchman/encoding.py
new file mode 100644
--- /dev/null
+++ b/hgext/fsmonitor/pywatchman/encoding.py
@@ -0,0 +1,73 @@
+# Copyright 2016-present Facebook, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+#  * Redistributions of source code must retain the above copyright notice,
+#    this list of conditions and the following disclaimer.
+#
+#  * Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+#  * Neither the name Facebook nor the names of its contributors may be used to
+#    endorse or promote products derived from this software without specific
+#    prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+# no unicode literals
+
+'''Module to deal with filename encoding on the local system, as returned by
+Watchman.'''
+
+import sys
+
+from . import (
+    compat,
+)
+
+if compat.PYTHON3:
+    default_local_errors = 'surrogateescape'
+
+    def get_local_encoding():
+        if sys.platform == 'win32':
+            # Watchman always returns UTF-8 encoded strings on Windows.
+            return 'utf-8'
+        # On the Python 3 versions we support, sys.getfilesystemencoding never
+        # returns None.
+        return sys.getfilesystemencoding()
+else:
+    # Python 2 doesn't support surrogateescape, so use 'strict' by
+    # default. Users can register a custom surrogateescape error handler and use
+    # that if they so desire.
+    default_local_errors = 'strict'
+
+    def get_local_encoding():
+        if sys.platform == 'win32':
+            # Watchman always returns UTF-8 encoded strings on Windows.
+            return 'utf-8'
+        fsencoding = sys.getfilesystemencoding()
+        if fsencoding is None:
+            # This is very unlikely to happen, but if it does, just use UTF-8
+            fsencoding = 'utf-8'
+        return fsencoding
+
+def encode_local(s):
+    return s.encode(get_local_encoding(), default_local_errors)
+
+def decode_local(bs):
+    return bs.decode(get_local_encoding(), default_local_errors)
diff --git a/hgext/fsmonitor/pywatchman/load.py b/hgext/fsmonitor/pywatchman/load.py
new file mode 100644
--- /dev/null
+++ b/hgext/fsmonitor/pywatchman/load.py
@@ -0,0 +1,107 @@
+# Copyright 2016 Facebook, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+#  * Redistributions of source code must retain the above copyright notice,
+#    this list of conditions and the following disclaimer.
+#
+#  * Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+#  * Neither the name Facebook nor the names of its contributors may be used to
+#    endorse or promote products derived from this software without specific
+#    prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+# no unicode literals
+
+try:
+    from . import bser
+except ImportError:
+    from . import pybser as bser
+
+import ctypes
+
+EMPTY_HEADER = b"\x00\x01\x05\x00\x00\x00\x00"
+
+
+def _read_bytes(fp, buf):
+    """Read bytes from a file-like object
+
+    @param fp: File-like object that implements read(int)
+    @type fp: file
+
+    @param buf: Buffer to read into
+    @type buf: bytes
+
+    @return: buf
+    """
+
+    # Do the first read without resizing the input buffer
+    offset = 0
+    remaining = len(buf)
+    while remaining > 0:
+        l = fp.readinto((ctypes.c_char * remaining).from_buffer(buf, offset))
+        if l is None or l == 0:
+            return offset
+        offset += l
+        remaining -= l
+    return offset
+
+
+def load(fp, mutable=True, value_encoding=None, value_errors=None):
+    """Deserialize a BSER-encoded blob.
+
+    @param fp: The file-object to deserialize.
+    @type file:
+
+    @param mutable: Whether to return mutable results.
+    @type mutable: bool
+
+    @param value_encoding: Optional codec to use to decode values. If
+                           unspecified or None, return values as bytestrings.
+    @type value_encoding: str
+
+    @param value_errors: Optional error handler for codec. 'strict' by default.
+                         The other most common argument is 'surrogateescape' on
+                         Python 3. If value_encoding is None, this is ignored.
+    @type value_errors: str
+    """
+    buf = ctypes.create_string_buffer(8192)
+    SNIFF_BUFFER_SIZE = len(EMPTY_HEADER)
+    header = (ctypes.c_char * SNIFF_BUFFER_SIZE).from_buffer(buf)
+    read_len = _read_bytes(fp, header)
+    if read_len < len(header):
+        return None
+
+    total_len = bser.pdu_len(buf)
+    if total_len > len(buf):
+        ctypes.resize(buf, total_len)
+
+    body = (ctypes.c_char * (total_len - len(header))).from_buffer(
+        buf, len(header))
+    read_len = _read_bytes(fp, body)
+    if read_len < len(body):
+        raise RuntimeError('bser data ended early')
+
+    return bser.loads(
+        (ctypes.c_char * total_len).from_buffer(buf, 0),
+        mutable,
+        value_encoding,
+        value_errors)
diff --git a/hgext/fsmonitor/pywatchman/msc_stdint.h b/hgext/fsmonitor/pywatchman/msc_stdint.h
deleted file mode 100644
--- a/hgext/fsmonitor/pywatchman/msc_stdint.h
+++ /dev/null
@@ -1,260 +0,0 @@
-// no-check-code
-// ISO C9x  compliant stdint.h for Microsoft Visual Studio
-// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 
-// 
-//  Copyright (c) 2006-2013 Alexander Chemeris
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-// 
-//   1. Redistributions of source code must retain the above copyright notice,
-//      this list of conditions and the following disclaimer.
-// 
-//   2. Redistributions in binary form must reproduce the above copyright
-//      notice, this list of conditions and the following disclaimer in the
-//      documentation and/or other materials provided with the distribution.
-// 
-//   3. Neither the name of the product nor the names of its contributors may
-//      be used to endorse or promote products derived from this software
-//      without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
-// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
-// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
-// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
-// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
-// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
-// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
-// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-// 
-///////////////////////////////////////////////////////////////////////////////
-
-#ifndef _MSC_VER // [
-#error "Use this header only with Microsoft Visual C++ compilers!"
-#endif // _MSC_VER ]
-
-#ifndef _MSC_STDINT_H_ // [
-#define _MSC_STDINT_H_
-
-#if _MSC_VER > 1000
-#pragma once
-#endif
-
-#if _MSC_VER >= 1600 // [
-#include <stdint.h>
-#else // ] _MSC_VER >= 1600 [
-
-#include <limits.h>
-
-// For Visual Studio 6 in C++ mode and for many Visual Studio versions when
-// compiling for ARM we should wrap <wchar.h> include with 'extern "C++" {}'
-// or compiler give many errors like this:
-//   error C2733: second C linkage of overloaded function 'wmemchr' not allowed
-#ifdef __cplusplus
-extern "C" {
-#endif
-#  include <wchar.h>
-#ifdef __cplusplus
-}
-#endif
-
-// Define _W64 macros to mark types changing their size, like intptr_t.
-#ifndef _W64
-#  if !defined(__midl) && (defined(_X86_) || defined(_M_IX86)) && _MSC_VER >= 1300
-#     define _W64 __w64
-#  else
-#     define _W64
-#  endif
-#endif
-
-
-// 7.18.1 Integer types
-
-// 7.18.1.1 Exact-width integer types
-
-// Visual Studio 6 and Embedded Visual C++ 4 doesn't
-// realize that, e.g. char has the same size as __int8
-// so we give up on __intX for them.
-#if (_MSC_VER < 1300)
-   typedef signed char       int8_t;
-   typedef signed short      int16_t;
-   typedef signed int        int32_t;
-   typedef unsigned char     uint8_t;
-   typedef unsigned short    uint16_t;
-   typedef unsigned int      uint32_t;
-#else
-   typedef signed __int8     int8_t;
-   typedef signed __int16    int16_t;
-   typedef signed __int32    int32_t;
-   typedef unsigned __int8   uint8_t;
-   typedef unsigned __int16  uint16_t;
-   typedef unsigned __int32  uint32_t;
-#endif
-typedef signed __int64       int64_t;
-typedef unsigned __int64     uint64_t;
-
-
-// 7.18.1.2 Minimum-width integer types
-typedef int8_t    int_least8_t;
-typedef int16_t   int_least16_t;
-typedef int32_t   int_least32_t;
-typedef int64_t   int_least64_t;
-typedef uint8_t   uint_least8_t;
-typedef uint16_t  uint_least16_t;
-typedef uint32_t  uint_least32_t;
-typedef uint64_t  uint_least64_t;
-
-// 7.18.1.3 Fastest minimum-width integer types
-typedef int8_t    int_fast8_t;
-typedef int16_t   int_fast16_t;
-typedef int32_t   int_fast32_t;
-typedef int64_t   int_fast64_t;
-typedef uint8_t   uint_fast8_t;
-typedef uint16_t  uint_fast16_t;
-typedef uint32_t  uint_fast32_t;
-typedef uint64_t  uint_fast64_t;
-
-// 7.18.1.4 Integer types capable of holding object pointers
-#ifdef _WIN64 // [
-   typedef signed __int64    intptr_t;
-   typedef unsigned __int64  uintptr_t;
-#else // _WIN64 ][
-   typedef _W64 signed int   intptr_t;
-   typedef _W64 unsigned int uintptr_t;
-#endif // _WIN64 ]
-
-// 7.18.1.5 Greatest-width integer types
-typedef int64_t   intmax_t;
-typedef uint64_t  uintmax_t;
-
-
-// 7.18.2 Limits of specified-width integer types
-
-#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [   See footnote 220 at page 257 and footnote 221 at page 259
-
-// 7.18.2.1 Limits of exact-width integer types
-#define INT8_MIN     ((int8_t)_I8_MIN)
-#define INT8_MAX     _I8_MAX
-#define INT16_MIN    ((int16_t)_I16_MIN)
-#define INT16_MAX    _I16_MAX
-#define INT32_MIN    ((int32_t)_I32_MIN)
-#define INT32_MAX    _I32_MAX
-#define INT64_MIN    ((int64_t)_I64_MIN)
-#define INT64_MAX    _I64_MAX
-#define UINT8_MAX    _UI8_MAX
-#define UINT16_MAX   _UI16_MAX
-#define UINT32_MAX   _UI32_MAX
-#define UINT64_MAX   _UI64_MAX
-
-// 7.18.2.2 Limits of minimum-width integer types
-#define INT_LEAST8_MIN    INT8_MIN
-#define INT_LEAST8_MAX    INT8_MAX
-#define INT_LEAST16_MIN   INT16_MIN
-#define INT_LEAST16_MAX   INT16_MAX
-#define INT_LEAST32_MIN   INT32_MIN
-#define INT_LEAST32_MAX   INT32_MAX
-#define INT_LEAST64_MIN   INT64_MIN
-#define INT_LEAST64_MAX   INT64_MAX
-#define UINT_LEAST8_MAX   UINT8_MAX
-#define UINT_LEAST16_MAX  UINT16_MAX
-#define UINT_LEAST32_MAX  UINT32_MAX
-#define UINT_LEAST64_MAX  UINT64_MAX
-
-// 7.18.2.3 Limits of fastest minimum-width integer types
-#define INT_FAST8_MIN    INT8_MIN
-#define INT_FAST8_MAX    INT8_MAX
-#define INT_FAST16_MIN   INT16_MIN
-#define INT_FAST16_MAX   INT16_MAX
-#define INT_FAST32_MIN   INT32_MIN
-#define INT_FAST32_MAX   INT32_MAX
-#define INT_FAST64_MIN   INT64_MIN
-#define INT_FAST64_MAX   INT64_MAX
-#define UINT_FAST8_MAX   UINT8_MAX
-#define UINT_FAST16_MAX  UINT16_MAX
-#define UINT_FAST32_MAX  UINT32_MAX
-#define UINT_FAST64_MAX  UINT64_MAX
-
-// 7.18.2.4 Limits of integer types capable of holding object pointers
-#ifdef _WIN64 // [
-#  define INTPTR_MIN   INT64_MIN
-#  define INTPTR_MAX   INT64_MAX
-#  define UINTPTR_MAX  UINT64_MAX
-#else // _WIN64 ][
-#  define INTPTR_MIN   INT32_MIN
-#  define INTPTR_MAX   INT32_MAX
-#  define UINTPTR_MAX  UINT32_MAX
-#endif // _WIN64 ]
-
-// 7.18.2.5 Limits of greatest-width integer types
-#define INTMAX_MIN   INT64_MIN
-#define INTMAX_MAX   INT64_MAX
-#define UINTMAX_MAX  UINT64_MAX
-
-// 7.18.3 Limits of other integer types
-
-#ifdef _WIN64 // [
-#  define PTRDIFF_MIN  _I64_MIN
-#  define PTRDIFF_MAX  _I64_MAX
-#else  // _WIN64 ][
-#  define PTRDIFF_MIN  _I32_MIN
-#  define PTRDIFF_MAX  _I32_MAX
-#endif  // _WIN64 ]
-
-#define SIG_ATOMIC_MIN  INT_MIN
-#define SIG_ATOMIC_MAX  INT_MAX
-
-#ifndef SIZE_MAX // [
-#  ifdef _WIN64 // [
-#     define SIZE_MAX  _UI64_MAX
-#  else // _WIN64 ][
-#     define SIZE_MAX  _UI32_MAX
-#  endif // _WIN64 ]
-#endif // SIZE_MAX ]
-
-// WCHAR_MIN and WCHAR_MAX are also defined in <wchar.h>
-#ifndef WCHAR_MIN // [
-#  define WCHAR_MIN  0
-#endif  // WCHAR_MIN ]
-#ifndef WCHAR_MAX // [
-#  define WCHAR_MAX  _UI16_MAX
-#endif  // WCHAR_MAX ]
-
-#define WINT_MIN  0
-#define WINT_MAX  _UI16_MAX
-
-#endif // __STDC_LIMIT_MACROS ]
-
-
-// 7.18.4 Limits of other integer types
-
-#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [   See footnote 224 at page 260
-
-// 7.18.4.1 Macros for minimum-width integer constants
-
-#define INT8_C(val)  val##i8
-#define INT16_C(val) val##i16
-#define INT32_C(val) val##i32
-#define INT64_C(val) val##i64
-
-#define UINT8_C(val)  val##ui8
-#define UINT16_C(val) val##ui16
-#define UINT32_C(val) val##ui32
-#define UINT64_C(val) val##ui64
-
-// 7.18.4.2 Macros for greatest-width integer constants
-// These #ifndef's are needed to prevent collisions with <boost/cstdint.hpp>.
-// Check out Issue 9 for the details.
-#ifndef INTMAX_C //   [
-#  define INTMAX_C   INT64_C
-#endif // INTMAX_C    ]
-#ifndef UINTMAX_C //  [
-#  define UINTMAX_C  UINT64_C
-#endif // UINTMAX_C   ]
-
-#endif // __STDC_CONSTANT_MACROS ]
-
-#endif // _MSC_VER >= 1600 ]
-
-#endif // _MSC_STDINT_H_ ]
diff --git a/hgext/fsmonitor/pywatchman/pybser.py b/hgext/fsmonitor/pywatchman/pybser.py
--- a/hgext/fsmonitor/pywatchman/pybser.py
+++ b/hgext/fsmonitor/pywatchman/pybser.py
@@ -26,33 +26,51 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+# no unicode literals
+
+import binascii
 import collections
 import ctypes
 import struct
 import sys
 
-BSER_ARRAY = '\x00'
-BSER_OBJECT = '\x01'
-BSER_STRING = '\x02'
-BSER_INT8 = '\x03'
-BSER_INT16 = '\x04'
-BSER_INT32 = '\x05'
-BSER_INT64 = '\x06'
-BSER_REAL = '\x07'
-BSER_TRUE = '\x08'
-BSER_FALSE = '\x09'
-BSER_NULL = '\x0a'
-BSER_TEMPLATE = '\x0b'
-BSER_SKIP = '\x0c'
+from . import (
+    compat,
+)
+
+BSER_ARRAY = b'\x00'
+BSER_OBJECT = b'\x01'
+BSER_BYTESTRING = b'\x02'
+BSER_INT8 = b'\x03'
+BSER_INT16 = b'\x04'
+BSER_INT32 = b'\x05'
+BSER_INT64 = b'\x06'
+BSER_REAL = b'\x07'
+BSER_TRUE = b'\x08'
+BSER_FALSE = b'\x09'
+BSER_NULL = b'\x0a'
+BSER_TEMPLATE = b'\x0b'
+BSER_SKIP = b'\x0c'
+BSER_UTF8STRING = b'\x0d'
+
+if compat.PYTHON3:
+    STRING_TYPES = (str, bytes)
+    unicode = str
+    def tobytes(i):
+        return str(i).encode('ascii')
+    long = int
+else:
+    STRING_TYPES = (unicode, str)
+    tobytes = bytes
 
 # Leave room for the serialization header, which includes
 # our overall length.  To make things simpler, we'll use an
 # int32 for the header
-EMPTY_HEADER = "\x00\x01\x05\x00\x00\x00\x00"
-
-# Python 3 conditional for supporting Python 2's int/long types
-if sys.version_info > (3,):
-    long = int
+EMPTY_HEADER = b"\x00\x01\x05\x00\x00\x00\x00"
+EMPTY_HEADER_V2 = b"\x00\x02\x00\x00\x00\x00\x05\x00\x00\x00\x00"
 
 def _int_size(x):
     """Return the smallest size int that can store the value"""
@@ -67,13 +85,28 @@ def _int_size(x):
     else:
         raise RuntimeError('Cannot represent value: ' + str(x))
 
+def _buf_pos(buf, pos):
+    ret = buf[pos]
+    # In Python 2, buf is a str array so buf[pos] is a string. In Python 3, buf
+    # is a bytes array and buf[pos] is an integer.
+    if compat.PYTHON3:
+        ret = bytes((ret,))
+    return ret
 
 class _bser_buffer(object):
 
-    def __init__(self):
+    def __init__(self, version):
+        self.bser_version = version
         self.buf = ctypes.create_string_buffer(8192)
-        struct.pack_into(str(len(EMPTY_HEADER)) + 's', self.buf, 0, EMPTY_HEADER)
-        self.wpos = len(EMPTY_HEADER)
+        if self.bser_version == 1:
+            struct.pack_into(tobytes(len(EMPTY_HEADER)) + b's', self.buf, 0,
+                             EMPTY_HEADER)
+            self.wpos = len(EMPTY_HEADER)
+        else:
+            assert self.bser_version == 2
+            struct.pack_into(tobytes(len(EMPTY_HEADER_V2)) + b's', self.buf, 0,
+                             EMPTY_HEADER_V2)
+            self.wpos = len(EMPTY_HEADER_V2)
 
     def ensure_size(self, size):
         while ctypes.sizeof(self.buf) - self.wpos < size:
@@ -84,13 +117,13 @@ class _bser_buffer(object):
         to_write = size + 1
         self.ensure_size(to_write)
         if size == 1:
-            struct.pack_into('=cb', self.buf, self.wpos, BSER_INT8, val)
+            struct.pack_into(b'=cb', self.buf, self.wpos, BSER_INT8, val)
         elif size == 2:
-            struct.pack_into('=ch', self.buf, self.wpos, BSER_INT16, val)
+            struct.pack_into(b'=ch', self.buf, self.wpos, BSER_INT16, val)
         elif size == 4:
-            struct.pack_into('=ci', self.buf, self.wpos, BSER_INT32, val)
+            struct.pack_into(b'=ci', self.buf, self.wpos, BSER_INT32, val)
         elif size == 8:
-            struct.pack_into('=cq', self.buf, self.wpos, BSER_INT64, val)
+            struct.pack_into(b'=cq', self.buf, self.wpos, BSER_INT64, val)
         else:
             raise RuntimeError('Cannot represent this long value')
         self.wpos += to_write
@@ -104,13 +137,17 @@ class _bser_buffer(object):
         to_write = 2 + size + s_len
         self.ensure_size(to_write)
         if size == 1:
-            struct.pack_into('=ccb' + str(s_len) + 's', self.buf, self.wpos, BSER_STRING, BSER_INT8, s_len, s)
+            struct.pack_into(b'=ccb' + tobytes(s_len) + b's', self.buf,
+                self.wpos, BSER_BYTESTRING, BSER_INT8, s_len, s)
         elif size == 2:
-            struct.pack_into('=cch' + str(s_len) + 's', self.buf, self.wpos, BSER_STRING, BSER_INT16, s_len, s)
+            struct.pack_into(b'=cch' + tobytes(s_len) + b's', self.buf,
+                self.wpos, BSER_BYTESTRING, BSER_INT16, s_len, s)
         elif size == 4:
-            struct.pack_into('=cci' + str(s_len) + 's', self.buf, self.wpos, BSER_STRING, BSER_INT32, s_len, s)
+            struct.pack_into(b'=cci' + tobytes(s_len) + b's', self.buf,
+                self.wpos, BSER_BYTESTRING, BSER_INT32, s_len, s)
         elif size == 8:
-            struct.pack_into('=ccq' + str(s_len) + 's', self.buf, self.wpos, BSER_STRING, BSER_INT64, s_len, s)
+            struct.pack_into(b'=ccq' + tobytes(s_len) + b's', self.buf,
+                self.wpos, BSER_BYTESTRING, BSER_INT64, s_len, s)
         else:
             raise RuntimeError('Cannot represent this string value')
         self.wpos += to_write
@@ -124,54 +161,68 @@ class _bser_buffer(object):
                 to_encode = BSER_TRUE
             else:
                 to_encode = BSER_FALSE
-            struct.pack_into('=c', self.buf, self.wpos, to_encode)
+            struct.pack_into(b'=c', self.buf, self.wpos, to_encode)
             self.wpos += needed
         elif val is None:
             needed = 1
             self.ensure_size(needed)
-            struct.pack_into('=c', self.buf, self.wpos, BSER_NULL)
+            struct.pack_into(b'=c', self.buf, self.wpos, BSER_NULL)
             self.wpos += needed
         elif isinstance(val, (int, long)):
             self.append_long(val)
-        elif isinstance(val, (str, unicode)):
+        elif isinstance(val, STRING_TYPES):
             self.append_string(val)
         elif isinstance(val, float):
             needed = 9
             self.ensure_size(needed)
-            struct.pack_into('=cd', self.buf, self.wpos, BSER_REAL, val)
+            struct.pack_into(b'=cd', self.buf, self.wpos, BSER_REAL, val)
             self.wpos += needed
-        elif isinstance(val, collections.Mapping) and isinstance(val, collections.Sized):
+        elif isinstance(val, collections.Mapping) and \
+            isinstance(val, collections.Sized):
             val_len = len(val)
             size = _int_size(val_len)
             needed = 2 + size
             self.ensure_size(needed)
             if size == 1:
-                struct.pack_into('=ccb', self.buf, self.wpos, BSER_OBJECT, BSER_INT8, val_len)
+                struct.pack_into(b'=ccb', self.buf, self.wpos, BSER_OBJECT,
+                    BSER_INT8, val_len)
             elif size == 2:
-                struct.pack_into('=cch', self.buf, self.wpos, BSER_OBJECT, BSER_INT16, val_len)
+                struct.pack_into(b'=cch', self.buf, self.wpos, BSER_OBJECT,
+                    BSER_INT16, val_len)
             elif size == 4:
-                struct.pack_into('=cci', self.buf, self.wpos, BSER_OBJECT, BSER_INT32, val_len)
+                struct.pack_into(b'=cci', self.buf, self.wpos, BSER_OBJECT,
+                    BSER_INT32, val_len)
             elif size == 8:
-                struct.pack_into('=ccq', self.buf, self.wpos, BSER_OBJECT, BSER_INT64, val_len)
+                struct.pack_into(b'=ccq', self.buf, self.wpos, BSER_OBJECT,
+                    BSER_INT64, val_len)
             else:
                 raise RuntimeError('Cannot represent this mapping value')
             self.wpos += needed
-            for k, v in val.iteritems():
+            if compat.PYTHON3:
+                iteritems = val.items()
+            else:
+                iteritems = val.iteritems()
+            for k, v in iteritems:
                 self.append_string(k)
                 self.append_recursive(v)
-        elif isinstance(val, collections.Iterable) and isinstance(val, collections.Sized):
+        elif isinstance(val, collections.Iterable) and \
+            isinstance(val, collections.Sized):
             val_len = len(val)
             size = _int_size(val_len)
             needed = 2 + size
             self.ensure_size(needed)
             if size == 1:
-                struct.pack_into('=ccb', self.buf, self.wpos, BSER_ARRAY, BSER_INT8, val_len)
+                struct.pack_into(b'=ccb', self.buf, self.wpos, BSER_ARRAY,
+                    BSER_INT8, val_len)
             elif size == 2:
-                struct.pack_into('=cch', self.buf, self.wpos, BSER_ARRAY, BSER_INT16, val_len)
+                struct.pack_into(b'=cch', self.buf, self.wpos, BSER_ARRAY,
+                    BSER_INT16, val_len)
             elif size == 4:
-                struct.pack_into('=cci', self.buf, self.wpos, BSER_ARRAY, BSER_INT32, val_len)
+                struct.pack_into(b'=cci', self.buf, self.wpos, BSER_ARRAY,
+                    BSER_INT32, val_len)
             elif size == 8:
-                struct.pack_into('=ccq', self.buf, self.wpos, BSER_ARRAY, BSER_INT64, val_len)
+                struct.pack_into(b'=ccq', self.buf, self.wpos, BSER_ARRAY,
+                    BSER_INT64, val_len)
             else:
                 raise RuntimeError('Cannot represent this sequence value')
             self.wpos += needed
@@ -181,56 +232,18 @@ class _bser_buffer(object):
             raise RuntimeError('Cannot represent unknown value type')
 
 
-def dumps(obj):
-    bser_buf = _bser_buffer()
+def dumps(obj, version=1, capabilities=0):
+    bser_buf = _bser_buffer(version=version)
     bser_buf.append_recursive(obj)
     # Now fill in the overall length
-    obj_len = bser_buf.wpos - len(EMPTY_HEADER)
-    struct.pack_into('=i', bser_buf.buf, 3, obj_len)
-    return bser_buf.buf.raw[:bser_buf.wpos]
-
-
-def _bunser_int(buf, pos):
-    try:
-        int_type = buf[pos]
-    except IndexError:
-        raise ValueError('Invalid bser int encoding, pos out of range')
-    if int_type == BSER_INT8:
-        needed = 2
-        fmt = '=b'
-    elif int_type == BSER_INT16:
-        needed = 3
-        fmt = '=h'
-    elif int_type == BSER_INT32:
-        needed = 5
-        fmt = '=i'
-    elif int_type == BSER_INT64:
-        needed = 9
-        fmt = '=q'
+    if version == 1:
+        obj_len = bser_buf.wpos - len(EMPTY_HEADER)
+        struct.pack_into(b'=i', bser_buf.buf, 3, obj_len)
     else:
-        raise ValueError('Invalid bser int encoding 0x%02x' % int(int_type))
-    int_val = struct.unpack_from(fmt, buf, pos + 1)[0]
-    return (int_val, pos + needed)
-
-
-def _bunser_string(buf, pos):
-    str_len, pos = _bunser_int(buf, pos + 1)
-    str_val = struct.unpack_from(str(str_len) + 's', buf, pos)[0]
-    return (str_val, pos + str_len)
-
-
-def _bunser_array(buf, pos, mutable=True):
-    arr_len, pos = _bunser_int(buf, pos + 1)
-    arr = []
-    for i in range(arr_len):
-        arr_item, pos = _bser_loads_recursive(buf, pos, mutable)
-        arr.append(arr_item)
-
-    if not mutable:
-      arr = tuple(arr)
-
-    return arr, pos
-
+        obj_len = bser_buf.wpos - len(EMPTY_HEADER_V2)
+        struct.pack_into(b'=i', bser_buf.buf, 2, capabilities)
+        struct.pack_into(b'=i', bser_buf.buf, 7, obj_len)
+    return bser_buf.buf.raw[:bser_buf.wpos]
 
 # This is a quack-alike with the bserObjectType in bser.c
 # It provides by getattr accessors and getitem for both index
@@ -260,100 +273,212 @@ class _BunserDict(object):
     def __len__(self):
         return len(self._keys)
 
-def _bunser_object(buf, pos, mutable=True):
-    obj_len, pos = _bunser_int(buf, pos + 1)
-    if mutable:
-        obj = {}
-    else:
-        keys = []
-        vals = []
+class Bunser(object):
+    def __init__(self, mutable=True, value_encoding=None, value_errors=None):
+        self.mutable = mutable
+        self.value_encoding = value_encoding
+
+        if value_encoding is None:
+            self.value_errors = None
+        elif value_errors is None:
+            self.value_errors = 'strict'
+        else:
+            self.value_errors = value_errors
 
-    for i in range(obj_len):
-        key, pos = _bunser_string(buf, pos)
-        val, pos = _bser_loads_recursive(buf, pos, mutable)
-        if mutable:
-            obj[key] = val
+    @staticmethod
+    def unser_int(buf, pos):
+        try:
+            int_type = _buf_pos(buf, pos)
+        except IndexError:
+            raise ValueError('Invalid bser int encoding, pos out of range')
+        if int_type == BSER_INT8:
+            needed = 2
+            fmt = b'=b'
+        elif int_type == BSER_INT16:
+            needed = 3
+            fmt = b'=h'
+        elif int_type == BSER_INT32:
+            needed = 5
+            fmt = b'=i'
+        elif int_type == BSER_INT64:
+            needed = 9
+            fmt = b'=q'
         else:
-            keys.append(key)
-            vals.append(val)
+            raise ValueError('Invalid bser int encoding 0x%s' %
+                             binascii.hexlify(int_type).decode('ascii'))
+        int_val = struct.unpack_from(fmt, buf, pos + 1)[0]
+        return (int_val, pos + needed)
 
-    if not mutable:
-        obj = _BunserDict(keys, vals)
-
-    return obj, pos
-
+    def unser_utf8_string(self, buf, pos):
+        str_len, pos = self.unser_int(buf, pos + 1)
+        str_val = struct.unpack_from(tobytes(str_len) + b's', buf, pos)[0]
+        return (str_val.decode('utf-8'), pos + str_len)
 
-def _bunser_template(buf, pos, mutable=True):
-    if buf[pos + 1] != BSER_ARRAY:
-        raise RuntimeError('Expect ARRAY to follow TEMPLATE')
-    keys, pos = _bunser_array(buf, pos + 1)
-    nitems, pos = _bunser_int(buf, pos)
-    arr = []
-    for i in range(nitems):
-        if mutable:
+    def unser_bytestring(self, buf, pos):
+        str_len, pos = self.unser_int(buf, pos + 1)
+        str_val = struct.unpack_from(tobytes(str_len) + b's', buf, pos)[0]
+        if self.value_encoding is not None:
+            str_val = str_val.decode(self.value_encoding, self.value_errors)
+            # str_len stays the same because that's the length in bytes
+        return (str_val, pos + str_len)
+
+    def unser_array(self, buf, pos):
+        arr_len, pos = self.unser_int(buf, pos + 1)
+        arr = []
+        for i in range(arr_len):
+            arr_item, pos = self.loads_recursive(buf, pos)
+            arr.append(arr_item)
+
+        if not self.mutable:
+          arr = tuple(arr)
+
+        return arr, pos
+
+    def unser_object(self, buf, pos):
+        obj_len, pos = self.unser_int(buf, pos + 1)
+        if self.mutable:
             obj = {}
         else:
+            keys = []
             vals = []
 
-        for keyidx in range(len(keys)):
-            if buf[pos] == BSER_SKIP:
-                pos += 1
-                ele = None
+        for i in range(obj_len):
+            key, pos = self.unser_utf8_string(buf, pos)
+            val, pos = self.loads_recursive(buf, pos)
+            if self.mutable:
+                obj[key] = val
             else:
-                ele, pos = _bser_loads_recursive(buf, pos, mutable)
+                keys.append(key)
+                vals.append(val)
 
-            if mutable:
-                key = keys[keyidx]
-                obj[key] = ele
-            else:
-                vals.append(ele)
-
-        if not mutable:
+        if not self.mutable:
             obj = _BunserDict(keys, vals)
 
-        arr.append(obj)
-    return arr, pos
+        return obj, pos
+
+    def unser_template(self, buf, pos):
+        val_type = _buf_pos(buf, pos + 1)
+        if val_type != BSER_ARRAY:
+            raise RuntimeError('Expect ARRAY to follow TEMPLATE')
+        # force UTF-8 on keys
+        keys_bunser = Bunser(mutable=self.mutable, value_encoding='utf-8')
+        keys, pos = keys_bunser.unser_array(buf, pos + 1)
+        nitems, pos = self.unser_int(buf, pos)
+        arr = []
+        for i in range(nitems):
+            if self.mutable:
+                obj = {}
+            else:
+                vals = []
+
+            for keyidx in range(len(keys)):
+                if _buf_pos(buf, pos) == BSER_SKIP:
+                    pos += 1
+                    ele = None
+                else:
+                    ele, pos = self.loads_recursive(buf, pos)
+
+                if self.mutable:
+                    key = keys[keyidx]
+                    obj[key] = ele
+                else:
+                    vals.append(ele)
+
+            if not self.mutable:
+                obj = _BunserDict(keys, vals)
+
+            arr.append(obj)
+        return arr, pos
+
+    def loads_recursive(self, buf, pos):
+        val_type = _buf_pos(buf, pos)
+        if (val_type == BSER_INT8 or val_type == BSER_INT16 or
+            val_type == BSER_INT32 or val_type == BSER_INT64):
+            return self.unser_int(buf, pos)
+        elif val_type == BSER_REAL:
+            val = struct.unpack_from(b'=d', buf, pos + 1)[0]
+            return (val, pos + 9)
+        elif val_type == BSER_TRUE:
+            return (True, pos + 1)
+        elif val_type == BSER_FALSE:
+            return (False, pos + 1)
+        elif val_type == BSER_NULL:
+            return (None, pos + 1)
+        elif val_type == BSER_BYTESTRING:
+            return self.unser_bytestring(buf, pos)
+        elif val_type == BSER_UTF8STRING:
+            return self.unser_utf8_string(buf, pos)
+        elif val_type == BSER_ARRAY:
+            return self.unser_array(buf, pos)
+        elif val_type == BSER_OBJECT:
+            return self.unser_object(buf, pos)
+        elif val_type == BSER_TEMPLATE:
+            return self.unser_template(buf, pos)
+        else:
+            raise ValueError('unhandled bser opcode 0x%s' %
+                             binascii.hexlify(val_type).decode('ascii'))
 
 
-def _bser_loads_recursive(buf, pos, mutable=True):
-    val_type = buf[pos]
-    if (val_type == BSER_INT8 or val_type == BSER_INT16 or
-        val_type == BSER_INT32 or val_type == BSER_INT64):
-        return _bunser_int(buf, pos)
-    elif val_type == BSER_REAL:
-        val = struct.unpack_from('=d', buf, pos + 1)[0]
-        return (val, pos + 9)
-    elif val_type == BSER_TRUE:
-        return (True, pos + 1)
-    elif val_type == BSER_FALSE:
-        return (False, pos + 1)
-    elif val_type == BSER_NULL:
-        return (None, pos + 1)
-    elif val_type == BSER_STRING:
-        return _bunser_string(buf, pos)
-    elif val_type == BSER_ARRAY:
-        return _bunser_array(buf, pos, mutable)
-    elif val_type == BSER_OBJECT:
-        return _bunser_object(buf, pos, mutable)
-    elif val_type == BSER_TEMPLATE:
-        return _bunser_template(buf, pos, mutable)
+def _pdu_info_helper(buf):
+    bser_version = -1
+    if buf[0:2] == EMPTY_HEADER[0:2]:
+        bser_version = 1
+        bser_capabilities = 0
+        expected_len, pos2 = Bunser.unser_int(buf, 2)
+    elif buf[0:2] == EMPTY_HEADER_V2[0:2]:
+        if len(buf) < 8:
+            raise ValueError('Invalid BSER header')
+        bser_version = 2
+        bser_capabilities = struct.unpack_from("I", buf, 2)[0]
+        expected_len, pos2 = Bunser.unser_int(buf, 6)
     else:
-        raise RuntimeError('unhandled bser opcode 0x%02x' % (val_type,))
+        raise ValueError('Invalid BSER header')
+
+    return bser_version, bser_capabilities, expected_len, pos2
+
+
+def pdu_info(buf):
+    info = _pdu_info_helper(buf)
+    return info[0], info[1], info[2] + info[3]
 
 
 def pdu_len(buf):
-    if buf[0:2] != EMPTY_HEADER[0:2]:
-        raise RuntimeError('Invalid BSER header')
-    expected_len, pos = _bunser_int(buf, 2)
-    return expected_len + pos
+    info = _pdu_info_helper(buf)
+    return info[2] + info[3]
 
 
-def loads(buf, mutable=True):
-    if buf[0:2] != EMPTY_HEADER[0:2]:
-        raise RuntimeError('Invalid BSER header')
-    expected_len, pos = _bunser_int(buf, 2)
+def loads(buf, mutable=True, value_encoding=None, value_errors=None):
+    """Deserialize a BSER-encoded blob.
+
+    @param buf: The buffer to deserialize.
+    @type buf: bytes
+
+    @param mutable: Whether to return mutable results.
+    @type mutable: bool
+
+    @param value_encoding: Optional codec to use to decode values. If
+                           unspecified or None, return values as bytestrings.
+    @type value_encoding: str
+
+    @param value_errors: Optional error handler for codec. 'strict' by default.
+                         The other most common argument is 'surrogateescape' on
+                         Python 3. If value_encoding is None, this is ignored.
+    @type value_errors: str
+    """
+
+    info = _pdu_info_helper(buf)
+    expected_len = info[2]
+    pos = info[3]
+
     if len(buf) != expected_len + pos:
-        raise RuntimeError('bser data len != header len')
-    return _bser_loads_recursive(buf, pos, mutable)[0]
+        raise ValueError('bser data len != header len')
+
+    bunser = Bunser(mutable=mutable, value_encoding=value_encoding,
+                    value_errors=value_errors)
 
-# no-check-code -- this is a 3rd party library
+    return bunser.loads_recursive(buf, pos)[0]
+
+
+def load(fp, mutable=True, value_encoding=None, value_errors=None):
+    from . import load
+    return load.load(fp, mutable, value_encoding, value_errors)
diff --git a/tests/test-check-py3-compat.t b/tests/test-check-py3-compat.t
--- a/tests/test-check-py3-compat.t
+++ b/tests/test-check-py3-compat.t
@@ -15,10 +15,6 @@
   contrib/python-zstandard/tests/test_module_attributes.py not using absolute_import
   contrib/python-zstandard/tests/test_roundtrip.py not using absolute_import
   contrib/python-zstandard/tests/test_train_dictionary.py not using absolute_import
-  hgext/fsmonitor/pywatchman/__init__.py not using absolute_import
-  hgext/fsmonitor/pywatchman/__init__.py requires print_function
-  hgext/fsmonitor/pywatchman/capabilities.py not using absolute_import
-  hgext/fsmonitor/pywatchman/pybser.py not using absolute_import
   i18n/check-translation.py not using absolute_import
   setup.py not using absolute_import
   tests/test-demandimport.py not using absolute_import