##// END OF EJS Templates
Update win32mbcs extension...
Shun-ichi Goto -
r6887:304484c7 default
parent child Browse files
Show More
@@ -1,158 +1,122 b''
1 # win32mbcs.py -- MBCS filename support for Mercurial on Windows
1 # win32mbcs.py -- MBCS filename support for Mercurial
2 #
2 #
3 # Copyright (c) 2008 Shun-ichi Goto <shunichi.goto@gmail.com>
3 # Copyright (c) 2008 Shun-ichi Goto <shunichi.goto@gmail.com>
4 #
4 #
5 # Version: 0.1
5 # Version: 0.2
6 # Author: Shun-ichi Goto <shunichi.goto@gmail.com>
6 # Author: Shun-ichi Goto <shunichi.goto@gmail.com>
7 #
7 #
8 # This software may be used and distributed according to the terms
8 # This software may be used and distributed according to the terms
9 # of the GNU General Public License, incorporated herein by reference.
9 # of the GNU General Public License, incorporated herein by reference.
10 #
10 #
11 """Allow to use shift_jis/big5 filenames on Windows.
11 """Allow to use MBCS path with problematic encoding.
12
13 There is a well known issue "0x5c problem" on Windows. It is a
14 trouble on handling path name as raw encoded byte sequence of
15 problematic encodings like shift_jis or big5. The primary intent
16 of this extension is to allow using such a encoding on Mercurial
17 without strange file operation error.
18
12
19 By enabling this extension, hook mechanism is activated and some
13 Some MBCS encodings are not good for some path operations
20 functions are altered. Usually, this encoding is your local encoding
14 (i.e. splitting path, case conversion, etc.) with its encoded bytes.
21 on your system by default. So you can get benefit simply by enabling
15 We call such a encoding (i.e. shift_jis and big5) as "problematic
22 this extension.
16 encoding". This extension can be used to fix the issue with those
23
17 encodings by wrapping some functions to convert to unicode string
24 The encoding for filename is same one for terminal by default. You
18 before path operation.
25 can change the encoding by setting HGENCODING environment variable.
26
19
27 This extension is usefull for:
20 This extension is usefull for:
28 * Japanese Windows user using shift_jis encoding.
21 * Japanese Windows users using shift_jis encoding.
29 * Chinese Windows user using big5 encoding.
22 * Chinese Windows users using big5 encoding.
30 * Users who want to use a repository created with such a encoding.
23 * All users who use a repository with one of problematic encodings
24 on case-insensitive file system.
25
26 This extension is not needed for:
27 * Any user who use only ascii chars in path.
28 * Any user who do not use any of problematic encodings.
31
29
32 Note: Unix people does not need to use this extension.
30 Note that there are some limitations on using this extension:
31 * You should use single encoding in one repository.
32 * You should set same encoding for the repository by locale or HGENCODING.
33
34 To use this extension, enable the extension in .hg/hgrc or ~/.hgrc:
35
36 [extensions]
37 hgext.win32mbcs =
38
39 Path encoding conversion are done between unicode and util._encoding
40 which is decided by mercurial from current locale setting or HGENCODING.
33
41
34 """
42 """
35
43
36 import os
44 import os
37 from mercurial.i18n import _
45 from mercurial.i18n import _
38 from mercurial import util
46 from mercurial import util
39
47
40 __all__ = ['install', 'uninstall', 'reposetup']
48 def decode(arg):
41
49 if isinstance(arg, str):
42
43 # codec and alias names of sjis and big5 to be faked.
44 _problematic_encodings = util.frozenset([
45 'big5', 'big5-tw', 'csbig5',
46 'big5hkscs', 'big5-hkscs', 'hkscs',
47 'cp932', '932', 'ms932', 'mskanji', 'ms-kanji',
48 'shift_jis', 'csshiftjis', 'shiftjis', 'sjis', 's_jis',
49 'shift_jis_2004', 'shiftjis2004', 'sjis_2004', 'sjis2004',
50 'shift_jisx0213', 'shiftjisx0213', 'sjisx0213', 's_jisx0213',
51 ])
52
53 # attribute name to store original function
54 _ORIGINAL = '_original'
55
56 _ui = None
57
58 def decode_with_check(arg):
59 if isinstance(arg, tuple):
60 return tuple(map(decode_with_check, arg))
61 elif isinstance(arg, list):
62 return map(decode_with_check, arg)
63 elif isinstance(arg, str):
64 uarg = arg.decode(util._encoding)
50 uarg = arg.decode(util._encoding)
65 if arg == uarg.encode(util._encoding):
51 if arg == uarg.encode(util._encoding):
66 return uarg
52 return uarg
67 else:
68 raise UnicodeError("Not local encoding")
53 raise UnicodeError("Not local encoding")
69 else:
54 elif isinstance(arg, tuple):
55 return tuple(map(decode, arg))
56 elif isinstance(arg, list):
57 return map(decode, arg)
70 return arg
58 return arg
71
59
72 def encode_with_check(arg):
60 def encode(arg):
73 if isinstance(arg, tuple):
61 if isinstance(arg, unicode):
74 return tuple(map(encode_with_check, arg))
62 return arg.encode(util._encoding)
63 elif isinstance(arg, tuple):
64 return tuple(map(encode, arg))
75 elif isinstance(arg, list):
65 elif isinstance(arg, list):
76 return map(encode_with_check, arg)
66 return map(encode, arg)
77 elif isinstance(arg, unicode):
78 ret = arg.encode(util._encoding)
79 return ret
80 else:
81 return arg
67 return arg
82
68
83 def wrap(func):
69 def wrapper(func, args):
84
85 def wrapped(*args):
86 # check argument is unicode, then call original
70 # check argument is unicode, then call original
87 for arg in args:
71 for arg in args:
88 if isinstance(arg, unicode):
72 if isinstance(arg, unicode):
89 return func(*args)
73 return func(*args)
90 # make decoded argument list into uargs
74
91 try:
75 try:
92 args = decode_with_check(args)
76 # convert arguments to unicode, call func, then convert back
93 except UnicodeError, exc:
77 return encode(func(*decode(args)))
94 # If not encoded with _local_fs_encoding, report it then
78 except UnicodeError:
79 # If not encoded with util._encoding, report it then
95 # continue with calling original function.
80 # continue with calling original function.
96 _ui.warn(_("WARNING: [win32mbcs] filename conversion fail for" +
81 raise util.Abort(_("[win32mbcs] filename conversion fail with"
97 " %s: '%s'\n") % (util._encoding, args))
82 " %s encoding\n") % (util._encoding))
98 return func(*args)
99 # call as unicode operation, then return with encoding
100 return encode_with_check(func(*args))
101
102 # fake is only for relevant environment.
103 if hasattr(func, _ORIGINAL) or \
104 util._encoding.lower() not in _problematic_encodings:
105 return func
106 else:
107 f = wrapped
108 f.__name__ = func.__name__
109 setattr(f, _ORIGINAL, func) # hold original to restore
110 return f
111
112 def unwrap(func):
113 return getattr(func, _ORIGINAL, func)
114
83
115 def install():
84 def wrapname(name):
116 # wrap some python functions and mercurial functions
85 idx = name.rfind('.')
117 # to handle raw bytes on Windows.
86 module = name[:idx]
118 # NOTE: dirname and basename is safe because they use result
87 name = name[idx+1:]
119 # of os.path.split()
88 module = eval(module)
120 global _ui
89 func = getattr(module, name)
121 if not _ui:
90 def f(*args):
122 from mercurial import ui
91 return wrapper(func, args)
123 _ui = ui.ui()
92 try:
124 os.path.join = wrap(os.path.join)
93 f.__name__ = func.__name__ # fail with python23
125 os.path.split = wrap(os.path.split)
94 except Exception:
126 os.path.splitext = wrap(os.path.splitext)
95 pass
127 os.path.splitunc = wrap(os.path.splitunc)
96 setattr(module, name, f)
128 os.path.normpath = wrap(os.path.normpath)
129 os.path.normcase = wrap(os.path.normcase)
130 os.makedirs = wrap(os.makedirs)
131 util.endswithsep = wrap(util.endswithsep)
132 util.splitpath = wrap(util.splitpath)
133
97
134 def uninstall():
98 # List of functions to be wrapped.
135 # restore original functions.
99 # NOTE: os.path.dirname() and os.path.basename() are safe because
136 os.path.join = unwrap(os.path.join)
100 # they use result of os.path.split()
137 os.path.split = unwrap(os.path.split)
101 funcs = '''os.path.join os.path.split os.path.splitext
138 os.path.splitext = unwrap(os.path.splitext)
102 os.path.splitunc os.path.normpath os.path.normcase os.makedirs
139 os.path.splitunc = unwrap(os.path.splitunc)
103 util.endswithsep util.splitpath util.checkcase util.fspath'''
140 os.path.normpath = unwrap(os.path.normpath)
141 os.path.normcase = unwrap(os.path.normcase)
142 os.makedirs = unwrap(os.makedirs)
143 util.endswithsep = unwrap(util.endswithsep)
144 util.splitpath = unwrap(util.splitpath)
145
104
105 # codec and alias names of sjis and big5 to be faked.
106 problematic_encodings = '''big5 big5-tw csbig5 big5hkscs big5-hkscs
107 hkscs cp932 932 ms932 mskanji ms-kanji shift_jis csshiftjis shiftjis
108 sjis s_jis shift_jis_2004 shiftjis2004 sjis_2004 sjis2004
109 shift_jisx0213 shiftjisx0213 sjisx0213 s_jisx0213'''
146
110
147 def reposetup(ui, repo):
111 def reposetup(ui, repo):
148 # TODO: decide use of config section for this extension
112 # TODO: decide use of config section for this extension
149 global _ui
150 _ui = ui
151 if not os.path.supports_unicode_filenames:
113 if not os.path.supports_unicode_filenames:
152 ui.warn(_("[win32mbcs] cannot activate on this platform.\n"))
114 ui.warn(_("[win32mbcs] cannot activate on this platform.\n"))
153 return
115 return
154 # install features of this extension
155 install()
156 ui.debug(_("[win32mbcs] activeted with encoding: %s\n") % util._encoding)
157
116
158 # win32mbcs.py ends here
117 # fake is only for relevant environment.
118 if util._encoding.lower() in problematic_encodings.split():
119 for f in funcs.split():
120 wrapname(f)
121 ui.debug(_("[win32mbcs] activated with encoding: %s\n") % util._encoding)
122
General Comments 0
You need to be logged in to leave comments. Login now