# HG changeset patch # User Olivier Trempe # Date 2017-03-08 14:03:42 # Node ID 1064a296a2a74a0621b253392639ef6ca0be3e87 # Parent 86246530b8d2f9bc59c3d67c04b164eb158fabc0 fsmonitor: match watchman and filesystem encoding watchman's paths encoding can differ from filesystem encoding. For example, on Windows, it's always utf-8. Before this patch, on Windows, mismatch in path comparison between fsmonitor state and osutil.statfiles would yield a clean status for added/modified files. In addition to status reporting wrong results, this leads to files being discarded from changesets while doing history editing operations such as rebase. Benchmark: There is a little overhead at module import: python -m timeit "import hgext.fsmonitor" Windows before patch: 1000000 loops, best of 3: 0.563 usec per loop Windows after patch: 1000000 loops, best of 3: 0.583 usec per loop Linx before patch: 1000000 loops, best of 3: 0.579 usec per loop Linux after patch: 1000000 loops, best of 3: 0.588 usec per loop 10000 calls to _watchmantofsencoding: python -m timeit -s "from hgext.fsmonitor import _watchmantofsencoding, _fixencoding" "fname = '/path/to/file'" "for i in range(10000):" " if _fixencoding: fname = _watchmantofsencoding(fname)" Windows (_fixencoding is True): 100 loops, best of 3: 19.5 msec per loop Linux (_fixencoding is False): 100 loops, best of 3: 3.08 msec per loop diff --git a/hgext/fsmonitor/__init__.py b/hgext/fsmonitor/__init__.py --- a/hgext/fsmonitor/__init__.py +++ b/hgext/fsmonitor/__init__.py @@ -91,14 +91,17 @@ will disable itself if any of those are from __future__ import absolute_import +import codecs import hashlib import os import stat +import sys from mercurial.i18n import _ from mercurial import ( context, encoding, + error, extensions, localrepo, merge, @@ -110,6 +113,7 @@ from mercurial import ( from mercurial import match as matchmod from . import ( + pywatchman, state, watchmanclient, ) @@ -159,6 +163,28 @@ def _hashignore(ignore): sha1.update('\0') return sha1.hexdigest() +_watchmanencoding = pywatchman.encoding.get_local_encoding() +_fsencoding = sys.getfilesystemencoding() or sys.getdefaultencoding() +_fixencoding = codecs.lookup(_watchmanencoding) != codecs.lookup(_fsencoding) + +def _watchmantofsencoding(path): + """Fix path to match watchman and local filesystem encoding + + watchman's paths encoding can differ from filesystem encoding. For example, + on Windows, it's always utf-8. + """ + try: + decoded = path.decode(_watchmanencoding) + except UnicodeDecodeError as e: + raise error.Abort(str(e), hint='watchman encoding error') + + try: + encoded = decoded.encode(_fsencoding, 'strict') + except UnicodeEncodeError as e: + raise error.Abort(str(e)) + + return encoded + def overridewalk(orig, self, match, subrepos, unknown, ignored, full=True): '''Replacement for dirstate.walk, hooking into Watchman. @@ -303,6 +329,8 @@ def overridewalk(orig, self, match, subr # for name case changes. for entry in result['files']: fname = entry['name'] + if _fixencoding: + fname = _watchmantofsencoding(fname) if switch_slashes: fname = fname.replace('\\', '/') if normalize: diff --git a/tests/test-check-py3-compat.t b/tests/test-check-py3-compat.t --- a/tests/test-check-py3-compat.t +++ b/tests/test-check-py3-compat.t @@ -26,8 +26,8 @@ > | sed 's|\\|/|g' | xargs $PYTHON3 contrib/check-py3-compat.py \ > | sed 's/[0-9][0-9]*)$/*)/' hgext/convert/transport.py: error importing: <*Error> No module named 'svn.client' (error at transport.py:*) (glob) - hgext/fsmonitor/state.py: error importing: from __future__ imports must occur at the beginning of the file (__init__.py, line 30) (error at watchmanclient.py:*) - hgext/fsmonitor/watchmanclient.py: error importing: from __future__ imports must occur at the beginning of the file (__init__.py, line 30) (error at watchmanclient.py:*) + hgext/fsmonitor/state.py: error importing: from __future__ imports must occur at the beginning of the file (__init__.py, line 30) (error at __init__.py:*) + hgext/fsmonitor/watchmanclient.py: error importing: from __future__ imports must occur at the beginning of the file (__init__.py, line 30) (error at __init__.py:*) mercurial/cffi/bdiff.py: error importing: <*Error> No module named 'mercurial.cffi' (error at check-py3-compat.py:*) (glob) mercurial/cffi/mpatch.py: error importing: <*Error> No module named 'mercurial.cffi' (error at check-py3-compat.py:*) (glob) mercurial/cffi/osutil.py: error importing: <*Error> No module named 'mercurial.cffi' (error at check-py3-compat.py:*) (glob)