win32mbcs.py
125 lines
| 4.2 KiB
| text/x-python
|
PythonLexer
/ hgext / win32mbcs.py
Shun-ichi Goto
|
r6887 | # win32mbcs.py -- MBCS filename support for Mercurial | ||
Shun-ichi GOTO
|
r5846 | # | ||
# Copyright (c) 2008 Shun-ichi Goto <shunichi.goto@gmail.com> | ||||
# | ||||
Shun-ichi Goto
|
r6887 | # Version: 0.2 | ||
Shun-ichi GOTO
|
r5846 | # Author: Shun-ichi Goto <shunichi.goto@gmail.com> | ||
# | ||||
# This software may be used and distributed according to the terms | ||||
# of the GNU General Public License, incorporated herein by reference. | ||||
# | ||||
Martin Geisler
|
r7598 | """allow to use MBCS path with problematic encoding. | ||
Shun-ichi GOTO
|
r5846 | |||
Martin Geisler
|
r8001 | Some MBCS encodings are not good for some path operations (i.e. | ||
splitting path, case conversion, etc.) with its encoded bytes. We call | ||||
such a encoding (i.e. shift_jis and big5) as "problematic encoding". | ||||
This extension can be used to fix the issue with those encodings by | ||||
wrapping some functions to convert to unicode string before path | ||||
operation. | ||||
Shun-ichi GOTO
|
r5846 | |||
This extension is usefull for: | ||||
Shun-ichi Goto
|
r6887 | * Japanese Windows users using shift_jis encoding. | ||
* Chinese Windows users using big5 encoding. | ||||
Martin Geisler
|
r8001 | * All users who use a repository with one of problematic encodings on | ||
case-insensitive file system. | ||||
Shun-ichi Goto
|
r6887 | |||
This extension is not needed for: | ||||
* Any user who use only ascii chars in path. | ||||
* Any user who do not use any of problematic encodings. | ||||
Shun-ichi GOTO
|
r5846 | |||
Shun-ichi Goto
|
r6887 | Note that there are some limitations on using this extension: | ||
* You should use single encoding in one repository. | ||||
Martin Geisler
|
r8001 | * You should set same encoding for the repository by locale or | ||
HGENCODING. | ||||
Shun-ichi Goto
|
r6887 | |||
To use this extension, enable the extension in .hg/hgrc or ~/.hgrc: | ||||
[extensions] | ||||
hgext.win32mbcs = | ||||
Martin Geisler
|
r8001 | Path encoding conversion are done between unicode and | ||
encoding.encoding which is decided by mercurial from current locale | ||||
setting or HGENCODING. | ||||
Shun-ichi GOTO
|
r5846 | |||
""" | ||||
import os | ||||
from mercurial.i18n import _ | ||||
Matt Mackall
|
r7948 | from mercurial import util, encoding | ||
Shun-ichi GOTO
|
r5846 | |||
Shun-ichi Goto
|
r6887 | def decode(arg): | ||
Peter Arrenbrecht
|
r7877 | if isinstance(arg, str): | ||
Matt Mackall
|
r7948 | uarg = arg.decode(encoding.encoding) | ||
if arg == uarg.encode(encoding.encoding): | ||||
Peter Arrenbrecht
|
r7877 | return uarg | ||
raise UnicodeError("Not local encoding") | ||||
elif isinstance(arg, tuple): | ||||
return tuple(map(decode, arg)) | ||||
elif isinstance(arg, list): | ||||
return map(decode, arg) | ||||
return arg | ||||
Shun-ichi Goto
|
r6887 | |||
def encode(arg): | ||||
Peter Arrenbrecht
|
r7877 | if isinstance(arg, unicode): | ||
Matt Mackall
|
r7948 | return arg.encode(encoding.encoding) | ||
Peter Arrenbrecht
|
r7877 | elif isinstance(arg, tuple): | ||
return tuple(map(encode, arg)) | ||||
elif isinstance(arg, list): | ||||
return map(encode, arg) | ||||
return arg | ||||
Shun-ichi Goto
|
r6887 | |||
def wrapper(func, args): | ||||
Peter Arrenbrecht
|
r7877 | # check argument is unicode, then call original | ||
for arg in args: | ||||
if isinstance(arg, unicode): | ||||
return func(*args) | ||||
Shun-ichi GOTO
|
r5846 | |||
Peter Arrenbrecht
|
r7877 | try: | ||
# convert arguments to unicode, call func, then convert back | ||||
return encode(func(*decode(args))) | ||||
except UnicodeError: | ||||
Matt Mackall
|
r7948 | # If not encoded with encoding.encoding, report it then | ||
Peter Arrenbrecht
|
r7877 | # continue with calling original function. | ||
raise util.Abort(_("[win32mbcs] filename conversion fail with" | ||||
Matt Mackall
|
r7948 | " %s encoding\n") % (encoding.encoding)) | ||
Shun-ichi Goto
|
r6887 | |||
def wrapname(name): | ||||
Peter Arrenbrecht
|
r7877 | idx = name.rfind('.') | ||
module = name[:idx] | ||||
name = name[idx+1:] | ||||
module = eval(module) | ||||
func = getattr(module, name) | ||||
def f(*args): | ||||
return wrapper(func, args) | ||||
try: | ||||
f.__name__ = func.__name__ # fail with python23 | ||||
except Exception: | ||||
pass | ||||
setattr(module, name, f) | ||||
Shun-ichi Goto
|
r6887 | |||
# List of functions to be wrapped. | ||||
# NOTE: os.path.dirname() and os.path.basename() are safe because | ||||
# they use result of os.path.split() | ||||
funcs = '''os.path.join os.path.split os.path.splitext | ||||
os.path.splitunc os.path.normpath os.path.normcase os.makedirs | ||||
util.endswithsep util.splitpath util.checkcase util.fspath''' | ||||
Shun-ichi GOTO
|
r5846 | |||
# codec and alias names of sjis and big5 to be faked. | ||||
Shun-ichi Goto
|
r6887 | problematic_encodings = '''big5 big5-tw csbig5 big5hkscs big5-hkscs | ||
hkscs cp932 932 ms932 mskanji ms-kanji shift_jis csshiftjis shiftjis | ||||
sjis s_jis shift_jis_2004 shiftjis2004 sjis_2004 sjis2004 | ||||
shift_jisx0213 shiftjisx0213 sjisx0213 s_jisx0213''' | ||||
Shun-ichi GOTO
|
r5846 | |||
def reposetup(ui, repo): | ||||
Peter Arrenbrecht
|
r7877 | # TODO: decide use of config section for this extension | ||
if not os.path.supports_unicode_filenames: | ||||
ui.warn(_("[win32mbcs] cannot activate on this platform.\n")) | ||||
return | ||||
Shun-ichi GOTO
|
r5846 | |||
Peter Arrenbrecht
|
r7877 | # fake is only for relevant environment. | ||
Matt Mackall
|
r7948 | if encoding.encoding.lower() in problematic_encodings.split(): | ||
Peter Arrenbrecht
|
r7877 | for f in funcs.split(): | ||
wrapname(f) | ||||
Matt Mackall
|
r7948 | ui.debug(_("[win32mbcs] activated with encoding: %s\n") | ||
% encoding.encoding) | ||||
Shun-ichi Goto
|
r6887 | |||