win32mbcs.py
222 lines
| 7.0 KiB
| text/x-python
|
PythonLexer
/ hgext / win32mbcs.py
Shun-ichi Goto
|
r6887 | # win32mbcs.py -- MBCS filename support for Mercurial | ||
Shun-ichi GOTO
|
r5846 | # | ||
# Copyright (c) 2008 Shun-ichi Goto <shunichi.goto@gmail.com> | ||||
# | ||||
Shun-ichi GOTO
|
r10050 | # Version: 0.3 | ||
Shun-ichi GOTO
|
r5846 | # Author: Shun-ichi Goto <shunichi.goto@gmail.com> | ||
# | ||||
Martin Geisler
|
r8225 | # This software may be used and distributed according to the terms of the | ||
Matt Mackall
|
r10263 | # GNU General Public License version 2 or any later version. | ||
Shun-ichi GOTO
|
r5846 | # | ||
Martin Geisler
|
r8228 | |||
Dirkjan Ochtman
|
r8932 | '''allow the use of MBCS paths with problematic encodings | ||
Shun-ichi GOTO
|
r5846 | |||
Martin Geisler
|
r8001 | Some MBCS encodings are not good for some path operations (i.e. | ||
splitting path, case conversion, etc.) with its encoded bytes. We call | ||||
such a encoding (i.e. shift_jis and big5) as "problematic encoding". | ||||
This extension can be used to fix the issue with those encodings by | ||||
Martin Geisler
|
r8665 | wrapping some functions to convert to Unicode string before path | ||
Martin Geisler
|
r8001 | operation. | ||
Shun-ichi GOTO
|
r5846 | |||
Martin Geisler
|
r8668 | This extension is useful for: | ||
Martin Geisler
|
r9216 | |||
- Japanese Windows users using shift_jis encoding. | ||||
- Chinese Windows users using big5 encoding. | ||||
- All users who use a repository with one of problematic encodings on | ||||
case-insensitive file system. | ||||
Shun-ichi Goto
|
r6887 | |||
This extension is not needed for: | ||||
Martin Geisler
|
r9216 | |||
- Any user who use only ASCII chars in path. | ||||
- Any user who do not use any of problematic encodings. | ||||
Shun-ichi GOTO
|
r5846 | |||
Shun-ichi Goto
|
r6887 | Note that there are some limitations on using this extension: | ||
Martin Geisler
|
r9216 | |||
- You should use single encoding in one repository. | ||||
Shun-ichi GOTO
|
r13067 | - If the repository path ends with 0x5c, .hg/hgrc cannot be read. | ||
Javi Merino
|
r13330 | - win32mbcs is not compatible with fixutf8 extension. | ||
Shun-ichi GOTO
|
r10050 | |||
Martin Geisler
|
r10067 | By default, win32mbcs uses encoding.encoding decided by Mercurial. | ||
You can specify the encoding by config option:: | ||||
Shun-ichi Goto
|
r6887 | |||
Shun-ichi GOTO
|
r10050 | [win32mbcs] | ||
encoding = sjis | ||||
Martin Geisler
|
r10067 | It is useful for the users who want to commit with UTF-8 log message. | ||
Cédric Duval
|
r8894 | ''' | ||
Shun-ichi GOTO
|
r5846 | |||
timeless
|
r28417 | import os | ||
import sys | ||||
Yuya Nishihara
|
r29205 | from mercurial.i18n import _ | ||
timeless
|
r28417 | from mercurial import ( | ||
encoding, | ||||
error, | ||||
Pulkit Goyal
|
r30616 | pycompat, | ||
Boris Feld
|
r34181 | registrar, | ||
timeless
|
r28417 | ) | ||
Augie Fackler
|
r29841 | # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for | ||
Augie Fackler
|
r25186 | # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should | ||
# be specifying the version(s) of Mercurial they are tested with, or | ||||
# leave the attribute unspecified. | ||||
Augie Fackler
|
r43347 | testedwith = b'ships-with-hg-core' | ||
Shun-ichi GOTO
|
r5846 | |||
Boris Feld
|
r34181 | configtable = {} | ||
configitem = registrar.configitem(configtable) | ||||
# Encoding.encoding may be updated by --encoding option. | ||||
# Use a lambda do delay the resolution. | ||||
Augie Fackler
|
r43346 | configitem( | ||
Augie Fackler
|
r46554 | b'win32mbcs', | ||
b'encoding', | ||||
default=lambda: encoding.encoding, | ||||
Boris Feld
|
r34181 | ) | ||
Matt Harbison
|
r52573 | _encoding: str = "" # see extsetup | ||
Augie Fackler
|
r43346 | |||
Shun-ichi GOTO
|
r10050 | |||
Shun-ichi Goto
|
r6887 | def decode(arg): | ||
Matt Harbison
|
r44181 | if isinstance(arg, bytes): | ||
Shun-ichi GOTO
|
r10050 | uarg = arg.decode(_encoding) | ||
if arg == uarg.encode(_encoding): | ||||
Peter Arrenbrecht
|
r7877 | return uarg | ||
Matt Harbison
|
r51635 | raise UnicodeError("Not local encoding") | ||
Peter Arrenbrecht
|
r7877 | elif isinstance(arg, tuple): | ||
return tuple(map(decode, arg)) | ||||
elif isinstance(arg, list): | ||||
return map(decode, arg) | ||||
Shun-ichi GOTO
|
r9131 | elif isinstance(arg, dict): | ||
for k, v in arg.items(): | ||||
arg[k] = decode(v) | ||||
Peter Arrenbrecht
|
r7877 | return arg | ||
Shun-ichi Goto
|
r6887 | |||
Augie Fackler
|
r43346 | |||
Shun-ichi Goto
|
r6887 | def encode(arg): | ||
Gregory Szorc
|
r49789 | if isinstance(arg, str): | ||
Shun-ichi GOTO
|
r10050 | return arg.encode(_encoding) | ||
Peter Arrenbrecht
|
r7877 | elif isinstance(arg, tuple): | ||
return tuple(map(encode, arg)) | ||||
elif isinstance(arg, list): | ||||
return map(encode, arg) | ||||
Shun-ichi GOTO
|
r9131 | elif isinstance(arg, dict): | ||
for k, v in arg.items(): | ||||
arg[k] = encode(v) | ||||
Peter Arrenbrecht
|
r7877 | return arg | ||
Shun-ichi Goto
|
r6887 | |||
Augie Fackler
|
r43346 | |||
Shun-ichi GOTO
|
r9132 | def appendsep(s): | ||
# ensure the path ends with os.sep, appending it if necessary. | ||||
try: | ||||
us = decode(s) | ||||
except UnicodeError: | ||||
Matt Harbison
|
r51635 | us = s # TODO: how to handle this bytes case?? | ||
if us and us[-1] not in ':/\\': | ||||
Pulkit Goyal
|
r30616 | s += pycompat.ossep | ||
Shun-ichi GOTO
|
r9132 | return s | ||
Shun-ichi GOTO
|
r17798 | |||
def basewrapper(func, argtype, enc, dec, args, kwds): | ||||
# check check already converted, then call original | ||||
Peter Arrenbrecht
|
r7877 | for arg in args: | ||
Shun-ichi GOTO
|
r17798 | if isinstance(arg, argtype): | ||
Shun-ichi GOTO
|
r9131 | return func(*args, **kwds) | ||
Shun-ichi GOTO
|
r5846 | |||
Peter Arrenbrecht
|
r7877 | try: | ||
Shun-ichi GOTO
|
r17798 | # convert string arguments, call func, then convert back the | ||
# return value. | ||||
return enc(func(*dec(args), **dec(kwds))) | ||||
Peter Arrenbrecht
|
r7877 | except UnicodeError: | ||
Augie Fackler
|
r43346 | raise error.Abort( | ||
Martin von Zweigbergk
|
r43387 | _(b"[win32mbcs] filename conversion failed with %s encoding\n") | ||
Matt Harbison
|
r52573 | % encoding.strtolocal(_encoding) | ||
Augie Fackler
|
r43346 | ) | ||
Shun-ichi Goto
|
r6887 | |||
Shun-ichi GOTO
|
r17798 | def wrapper(func, args, kwds): | ||
Gregory Szorc
|
r49789 | return basewrapper(func, str, encode, decode, args, kwds) | ||
Shun-ichi GOTO
|
r17798 | |||
def reversewrapper(func, args, kwds): | ||||
return basewrapper(func, str, decode, encode, args, kwds) | ||||
Augie Fackler
|
r43346 | |||
Shun-ichi GOTO
|
r9132 | def wrapperforlistdir(func, args, kwds): | ||
# Ensure 'path' argument ends with os.sep to avoids | ||||
# misinterpreting last 0x5c of MBCS 2nd byte as path separator. | ||||
if args: | ||||
args = list(args) | ||||
args[0] = appendsep(args[0]) | ||||
Matt Harbison
|
r51635 | if 'path' in kwds: | ||
kwds['path'] = appendsep(kwds['path']) | ||||
Shun-ichi GOTO
|
r9132 | return func(*args, **kwds) | ||
Augie Fackler
|
r43346 | |||
Matt Harbison
|
r51635 | def wrapname(name: str, wrapper): | ||
module, name = name.rsplit('.', 1) | ||||
Brodie Rao
|
r9098 | module = sys.modules[module] | ||
Peter Arrenbrecht
|
r7877 | func = getattr(module, name) | ||
Augie Fackler
|
r43346 | |||
Shun-ichi GOTO
|
r9131 | def f(*args, **kwds): | ||
return wrapper(func, args, kwds) | ||||
Augie Fackler
|
r43346 | |||
Augie Fackler
|
r30476 | f.__name__ = func.__name__ | ||
Peter Arrenbrecht
|
r7877 | setattr(module, name, f) | ||
Shun-ichi Goto
|
r6887 | |||
Augie Fackler
|
r43346 | |||
Shun-ichi Goto
|
r6887 | # List of functions to be wrapped. | ||
# NOTE: os.path.dirname() and os.path.basename() are safe because | ||||
# they use result of os.path.split() | ||||
Matt Harbison
|
r51635 | funcs = '''os.path.join os.path.split os.path.splitext | ||
Martin von Zweigbergk
|
r29889 | os.path.normpath os.makedirs mercurial.util.endswithsep | ||
mercurial.util.splitpath mercurial.util.fscasesensitive | ||||
Shun-ichi GOTO
|
r14841 | mercurial.util.fspath mercurial.util.pconvert mercurial.util.normpath | ||
Shun-ichi GOTO
|
r19383 | mercurial.util.checkwinfilename mercurial.util.checkosfilename | ||
mercurial.util.split''' | ||||
Shun-ichi GOTO
|
r5846 | |||
Shun-ichi GOTO
|
r17798 | # These functions are required to be called with local encoded string | ||
# because they expects argument is local encoded string and cause | ||||
# problem with unicode string. | ||||
Matt Harbison
|
r51635 | rfuncs = '''mercurial.encoding.upper mercurial.encoding.lower | ||
FUJIWARA Katsunori
|
r32566 | mercurial.util._filenamebytestr''' | ||
Shun-ichi GOTO
|
r17798 | |||
FUJIWARA Katsunori
|
r15724 | # List of Windows specific functions to be wrapped. | ||
Matt Harbison
|
r51635 | winfuncs = '''os.path.splitunc''' | ||
FUJIWARA Katsunori
|
r15724 | |||
Shun-ichi GOTO
|
r5846 | # codec and alias names of sjis and big5 to be faked. | ||
Augie Fackler
|
r43347 | problematic_encodings = b'''big5 big5-tw csbig5 big5hkscs big5-hkscs | ||
Shun-ichi Goto
|
r6887 | hkscs cp932 932 ms932 mskanji ms-kanji shift_jis csshiftjis shiftjis | ||
sjis s_jis shift_jis_2004 shiftjis2004 sjis_2004 sjis2004 | ||||
Shun-ichi GOTO
|
r8714 | shift_jisx0213 shiftjisx0213 sjisx0213 s_jisx0213 950 cp950 ms950 ''' | ||
Shun-ichi GOTO
|
r5846 | |||
Augie Fackler
|
r43346 | |||
Shun-ichi GOTO
|
r13067 | def extsetup(ui): | ||
Peter Arrenbrecht
|
r7877 | # TODO: decide use of config section for this extension | ||
Augie Fackler
|
r43346 | if (not os.path.supports_unicode_filenames) and ( | ||
Augie Fackler
|
r43347 | pycompat.sysplatform != b'cygwin' | ||
Augie Fackler
|
r43346 | ): | ||
Augie Fackler
|
r43347 | ui.warn(_(b"[win32mbcs] cannot activate on this platform.\n")) | ||
Peter Arrenbrecht
|
r7877 | return | ||
Shun-ichi GOTO
|
r10050 | # determine encoding for filename | ||
global _encoding | ||||
Matt Harbison
|
r52573 | _encoding = encoding.strfromlocal(ui.config(b'win32mbcs', b'encoding')) | ||
Peter Arrenbrecht
|
r7877 | # fake is only for relevant environment. | ||
Shun-ichi GOTO
|
r10050 | if _encoding.lower() in problematic_encodings.split(): | ||
Peter Arrenbrecht
|
r7877 | for f in funcs.split(): | ||
Shun-ichi GOTO
|
r9132 | wrapname(f, wrapper) | ||
Jun Wu
|
r34646 | if pycompat.iswindows: | ||
FUJIWARA Katsunori
|
r15724 | for f in winfuncs.split(): | ||
wrapname(f, wrapper) | ||||
Matt Harbison
|
r51635 | wrapname("mercurial.util.listdir", wrapperforlistdir) | ||
wrapname("mercurial.windows.listdir", wrapperforlistdir) | ||||
Shun-ichi GOTO
|
r17798 | # wrap functions to be called with local byte string arguments | ||
for f in rfuncs.split(): | ||||
wrapname(f, reversewrapper) | ||||
Shun-ichi GOTO
|
r13067 | # Check sys.args manually instead of using ui.debug() because | ||
# command line options is not yet applied when | ||||
# extensions.loadall() is called. | ||||
Matt Harbison
|
r51635 | if '--debug' in sys.argv: | ||
r43364 | ui.writenoi18n( | |||
Matt Harbison
|
r52573 | b"[win32mbcs] activated with encoding: %s\n" | ||
% encoding.strtolocal(_encoding) | ||||
r43364 | ) | |||