# HG changeset patch # User Shun-ichi Goto # Date 2008-08-14 01:18:40 # Node ID 304484c7e0ba9d2fcec0d31e4fcaf9b5d5246dfc # Parent 41aaaa23745f4ef89068bea848c4c3efb69e9787 Update win32mbcs extension * Code cleanup by Matt. * Fix the issue with case-insensitive fs support by wrapping also util.fspath() and util.checkcase() * Abort program when path conversion is failed. diff --git a/hgext/win32mbcs.py b/hgext/win32mbcs.py --- a/hgext/win32mbcs.py +++ b/hgext/win32mbcs.py @@ -1,35 +1,43 @@ -# win32mbcs.py -- MBCS filename support for Mercurial on Windows +# win32mbcs.py -- MBCS filename support for Mercurial # # Copyright (c) 2008 Shun-ichi Goto # -# Version: 0.1 +# Version: 0.2 # Author: Shun-ichi Goto # # This software may be used and distributed according to the terms # of the GNU General Public License, incorporated herein by reference. # -"""Allow to use shift_jis/big5 filenames on Windows. - -There is a well known issue "0x5c problem" on Windows. It is a -trouble on handling path name as raw encoded byte sequence of -problematic encodings like shift_jis or big5. The primary intent -of this extension is to allow using such a encoding on Mercurial -without strange file operation error. +"""Allow to use MBCS path with problematic encoding. -By enabling this extension, hook mechanism is activated and some -functions are altered. Usually, this encoding is your local encoding -on your system by default. So you can get benefit simply by enabling -this extension. - -The encoding for filename is same one for terminal by default. You -can change the encoding by setting HGENCODING environment variable. +Some MBCS encodings are not good for some path operations +(i.e. splitting path, case conversion, etc.) with its encoded bytes. +We call such a encoding (i.e. shift_jis and big5) as "problematic +encoding". This extension can be used to fix the issue with those +encodings by wrapping some functions to convert to unicode string +before path operation. This extension is usefull for: - * Japanese Windows user using shift_jis encoding. - * Chinese Windows user using big5 encoding. - * Users who want to use a repository created with such a encoding. + * Japanese Windows users using shift_jis encoding. + * Chinese Windows users using big5 encoding. + * All users who use a repository with one of problematic encodings + on case-insensitive file system. + +This extension is not needed for: + * Any user who use only ascii chars in path. + * Any user who do not use any of problematic encodings. -Note: Unix people does not need to use this extension. +Note that there are some limitations on using this extension: + * You should use single encoding in one repository. + * You should set same encoding for the repository by locale or HGENCODING. + +To use this extension, enable the extension in .hg/hgrc or ~/.hgrc: + + [extensions] + hgext.win32mbcs = + +Path encoding conversion are done between unicode and util._encoding +which is decided by mercurial from current locale setting or HGENCODING. """ @@ -37,122 +45,78 @@ import os from mercurial.i18n import _ from mercurial import util -__all__ = ['install', 'uninstall', 'reposetup'] +def decode(arg): + if isinstance(arg, str): + uarg = arg.decode(util._encoding) + if arg == uarg.encode(util._encoding): + return uarg + raise UnicodeError("Not local encoding") + elif isinstance(arg, tuple): + return tuple(map(decode, arg)) + elif isinstance(arg, list): + return map(decode, arg) + return arg + +def encode(arg): + if isinstance(arg, unicode): + return arg.encode(util._encoding) + elif isinstance(arg, tuple): + return tuple(map(encode, arg)) + elif isinstance(arg, list): + return map(encode, arg) + return arg + +def wrapper(func, args): + # check argument is unicode, then call original + for arg in args: + if isinstance(arg, unicode): + return func(*args) + try: + # convert arguments to unicode, call func, then convert back + return encode(func(*decode(args))) + except UnicodeError: + # If not encoded with util._encoding, report it then + # continue with calling original function. + raise util.Abort(_("[win32mbcs] filename conversion fail with" + " %s encoding\n") % (util._encoding)) + +def wrapname(name): + idx = name.rfind('.') + module = name[:idx] + name = name[idx+1:] + module = eval(module) + func = getattr(module, name) + def f(*args): + return wrapper(func, args) + try: + f.__name__ = func.__name__ # fail with python23 + except Exception: + pass + setattr(module, name, f) + +# List of functions to be wrapped. +# NOTE: os.path.dirname() and os.path.basename() are safe because +# they use result of os.path.split() +funcs = '''os.path.join os.path.split os.path.splitext + os.path.splitunc os.path.normpath os.path.normcase os.makedirs + util.endswithsep util.splitpath util.checkcase util.fspath''' # codec and alias names of sjis and big5 to be faked. -_problematic_encodings = util.frozenset([ - 'big5', 'big5-tw', 'csbig5', - 'big5hkscs', 'big5-hkscs', 'hkscs', - 'cp932', '932', 'ms932', 'mskanji', 'ms-kanji', - 'shift_jis', 'csshiftjis', 'shiftjis', 'sjis', 's_jis', - 'shift_jis_2004', 'shiftjis2004', 'sjis_2004', 'sjis2004', - 'shift_jisx0213', 'shiftjisx0213', 'sjisx0213', 's_jisx0213', - ]) - -# attribute name to store original function -_ORIGINAL = '_original' - -_ui = None - -def decode_with_check(arg): - if isinstance(arg, tuple): - return tuple(map(decode_with_check, arg)) - elif isinstance(arg, list): - return map(decode_with_check, arg) - elif isinstance(arg, str): - uarg = arg.decode(util._encoding) - if arg == uarg.encode(util._encoding): - return uarg - else: - raise UnicodeError("Not local encoding") - else: - return arg - -def encode_with_check(arg): - if isinstance(arg, tuple): - return tuple(map(encode_with_check, arg)) - elif isinstance(arg, list): - return map(encode_with_check, arg) - elif isinstance(arg, unicode): - ret = arg.encode(util._encoding) - return ret - else: - return arg - -def wrap(func): - - def wrapped(*args): - # check argument is unicode, then call original - for arg in args: - if isinstance(arg, unicode): - return func(*args) - # make decoded argument list into uargs - try: - args = decode_with_check(args) - except UnicodeError, exc: - # If not encoded with _local_fs_encoding, report it then - # continue with calling original function. - _ui.warn(_("WARNING: [win32mbcs] filename conversion fail for" + - " %s: '%s'\n") % (util._encoding, args)) - return func(*args) - # call as unicode operation, then return with encoding - return encode_with_check(func(*args)) - - # fake is only for relevant environment. - if hasattr(func, _ORIGINAL) or \ - util._encoding.lower() not in _problematic_encodings: - return func - else: - f = wrapped - f.__name__ = func.__name__ - setattr(f, _ORIGINAL, func) # hold original to restore - return f - -def unwrap(func): - return getattr(func, _ORIGINAL, func) - -def install(): - # wrap some python functions and mercurial functions - # to handle raw bytes on Windows. - # NOTE: dirname and basename is safe because they use result - # of os.path.split() - global _ui - if not _ui: - from mercurial import ui - _ui = ui.ui() - os.path.join = wrap(os.path.join) - os.path.split = wrap(os.path.split) - os.path.splitext = wrap(os.path.splitext) - os.path.splitunc = wrap(os.path.splitunc) - os.path.normpath = wrap(os.path.normpath) - os.path.normcase = wrap(os.path.normcase) - os.makedirs = wrap(os.makedirs) - util.endswithsep = wrap(util.endswithsep) - util.splitpath = wrap(util.splitpath) - -def uninstall(): - # restore original functions. - os.path.join = unwrap(os.path.join) - os.path.split = unwrap(os.path.split) - os.path.splitext = unwrap(os.path.splitext) - os.path.splitunc = unwrap(os.path.splitunc) - os.path.normpath = unwrap(os.path.normpath) - os.path.normcase = unwrap(os.path.normcase) - os.makedirs = unwrap(os.makedirs) - util.endswithsep = unwrap(util.endswithsep) - util.splitpath = unwrap(util.splitpath) - +problematic_encodings = '''big5 big5-tw csbig5 big5hkscs big5-hkscs + hkscs cp932 932 ms932 mskanji ms-kanji shift_jis csshiftjis shiftjis + sjis s_jis shift_jis_2004 shiftjis2004 sjis_2004 sjis2004 + shift_jisx0213 shiftjisx0213 sjisx0213 s_jisx0213''' def reposetup(ui, repo): - # TODO: decide use of config section for this extension - global _ui - _ui = ui - if not os.path.supports_unicode_filenames: - ui.warn(_("[win32mbcs] cannot activate on this platform.\n")) - return - # install features of this extension - install() - ui.debug(_("[win32mbcs] activeted with encoding: %s\n") % util._encoding) + # TODO: decide use of config section for this extension + if not os.path.supports_unicode_filenames: + ui.warn(_("[win32mbcs] cannot activate on this platform.\n")) + return -# win32mbcs.py ends here + # fake is only for relevant environment. + if util._encoding.lower() in problematic_encodings.split(): + for f in funcs.split(): + wrapname(f) + ui.debug(_("[win32mbcs] activated with encoding: %s\n") % util._encoding) +