Show More
@@ -1,158 +1,122 b'' | |||||
1 |
# win32mbcs.py -- MBCS filename support for Mercurial |
|
1 | # win32mbcs.py -- MBCS filename support for Mercurial | |
2 | # |
|
2 | # | |
3 | # Copyright (c) 2008 Shun-ichi Goto <shunichi.goto@gmail.com> |
|
3 | # Copyright (c) 2008 Shun-ichi Goto <shunichi.goto@gmail.com> | |
4 | # |
|
4 | # | |
5 |
# Version: 0. |
|
5 | # Version: 0.2 | |
6 | # Author: Shun-ichi Goto <shunichi.goto@gmail.com> |
|
6 | # Author: Shun-ichi Goto <shunichi.goto@gmail.com> | |
7 | # |
|
7 | # | |
8 | # This software may be used and distributed according to the terms |
|
8 | # This software may be used and distributed according to the terms | |
9 | # of the GNU General Public License, incorporated herein by reference. |
|
9 | # of the GNU General Public License, incorporated herein by reference. | |
10 | # |
|
10 | # | |
11 | """Allow to use shift_jis/big5 filenames on Windows. |
|
11 | """Allow to use MBCS path with problematic encoding. | |
12 |
|
||||
13 | There is a well known issue "0x5c problem" on Windows. It is a |
|
|||
14 | trouble on handling path name as raw encoded byte sequence of |
|
|||
15 | problematic encodings like shift_jis or big5. The primary intent |
|
|||
16 | of this extension is to allow using such a encoding on Mercurial |
|
|||
17 | without strange file operation error. |
|
|||
18 |
|
12 | |||
19 | By enabling this extension, hook mechanism is activated and some |
|
13 | Some MBCS encodings are not good for some path operations | |
20 | functions are altered. Usually, this encoding is your local encoding |
|
14 | (i.e. splitting path, case conversion, etc.) with its encoded bytes. | |
21 | on your system by default. So you can get benefit simply by enabling |
|
15 | We call such a encoding (i.e. shift_jis and big5) as "problematic | |
22 | this extension. |
|
16 | encoding". This extension can be used to fix the issue with those | |
23 |
|
17 | encodings by wrapping some functions to convert to unicode string | ||
24 | The encoding for filename is same one for terminal by default. You |
|
18 | before path operation. | |
25 | can change the encoding by setting HGENCODING environment variable. |
|
|||
26 |
|
19 | |||
27 | This extension is usefull for: |
|
20 | This extension is usefull for: | |
28 | * Japanese Windows user using shift_jis encoding. |
|
21 | * Japanese Windows users using shift_jis encoding. | |
29 | * Chinese Windows user using big5 encoding. |
|
22 | * Chinese Windows users using big5 encoding. | |
30 |
* |
|
23 | * All users who use a repository with one of problematic encodings | |
|
24 | on case-insensitive file system. | |||
|
25 | ||||
|
26 | This extension is not needed for: | |||
|
27 | * Any user who use only ascii chars in path. | |||
|
28 | * Any user who do not use any of problematic encodings. | |||
31 |
|
29 | |||
32 | Note: Unix people does not need to use this extension. |
|
30 | Note that there are some limitations on using this extension: | |
|
31 | * You should use single encoding in one repository. | |||
|
32 | * You should set same encoding for the repository by locale or HGENCODING. | |||
|
33 | ||||
|
34 | To use this extension, enable the extension in .hg/hgrc or ~/.hgrc: | |||
|
35 | ||||
|
36 | [extensions] | |||
|
37 | hgext.win32mbcs = | |||
|
38 | ||||
|
39 | Path encoding conversion are done between unicode and util._encoding | |||
|
40 | which is decided by mercurial from current locale setting or HGENCODING. | |||
33 |
|
41 | |||
34 | """ |
|
42 | """ | |
35 |
|
43 | |||
36 | import os |
|
44 | import os | |
37 | from mercurial.i18n import _ |
|
45 | from mercurial.i18n import _ | |
38 | from mercurial import util |
|
46 | from mercurial import util | |
39 |
|
47 | |||
40 | __all__ = ['install', 'uninstall', 'reposetup'] |
|
48 | def decode(arg): | |
|
49 | if isinstance(arg, str): | |||
|
50 | uarg = arg.decode(util._encoding) | |||
|
51 | if arg == uarg.encode(util._encoding): | |||
|
52 | return uarg | |||
|
53 | raise UnicodeError("Not local encoding") | |||
|
54 | elif isinstance(arg, tuple): | |||
|
55 | return tuple(map(decode, arg)) | |||
|
56 | elif isinstance(arg, list): | |||
|
57 | return map(decode, arg) | |||
|
58 | return arg | |||
|
59 | ||||
|
60 | def encode(arg): | |||
|
61 | if isinstance(arg, unicode): | |||
|
62 | return arg.encode(util._encoding) | |||
|
63 | elif isinstance(arg, tuple): | |||
|
64 | return tuple(map(encode, arg)) | |||
|
65 | elif isinstance(arg, list): | |||
|
66 | return map(encode, arg) | |||
|
67 | return arg | |||
|
68 | ||||
|
69 | def wrapper(func, args): | |||
|
70 | # check argument is unicode, then call original | |||
|
71 | for arg in args: | |||
|
72 | if isinstance(arg, unicode): | |||
|
73 | return func(*args) | |||
41 |
|
74 | |||
|
75 | try: | |||
|
76 | # convert arguments to unicode, call func, then convert back | |||
|
77 | return encode(func(*decode(args))) | |||
|
78 | except UnicodeError: | |||
|
79 | # If not encoded with util._encoding, report it then | |||
|
80 | # continue with calling original function. | |||
|
81 | raise util.Abort(_("[win32mbcs] filename conversion fail with" | |||
|
82 | " %s encoding\n") % (util._encoding)) | |||
|
83 | ||||
|
84 | def wrapname(name): | |||
|
85 | idx = name.rfind('.') | |||
|
86 | module = name[:idx] | |||
|
87 | name = name[idx+1:] | |||
|
88 | module = eval(module) | |||
|
89 | func = getattr(module, name) | |||
|
90 | def f(*args): | |||
|
91 | return wrapper(func, args) | |||
|
92 | try: | |||
|
93 | f.__name__ = func.__name__ # fail with python23 | |||
|
94 | except Exception: | |||
|
95 | pass | |||
|
96 | setattr(module, name, f) | |||
|
97 | ||||
|
98 | # List of functions to be wrapped. | |||
|
99 | # NOTE: os.path.dirname() and os.path.basename() are safe because | |||
|
100 | # they use result of os.path.split() | |||
|
101 | funcs = '''os.path.join os.path.split os.path.splitext | |||
|
102 | os.path.splitunc os.path.normpath os.path.normcase os.makedirs | |||
|
103 | util.endswithsep util.splitpath util.checkcase util.fspath''' | |||
42 |
|
104 | |||
43 | # codec and alias names of sjis and big5 to be faked. |
|
105 | # codec and alias names of sjis and big5 to be faked. | |
44 | _problematic_encodings = util.frozenset([ |
|
106 | problematic_encodings = '''big5 big5-tw csbig5 big5hkscs big5-hkscs | |
45 | 'big5', 'big5-tw', 'csbig5', |
|
107 | hkscs cp932 932 ms932 mskanji ms-kanji shift_jis csshiftjis shiftjis | |
46 | 'big5hkscs', 'big5-hkscs', 'hkscs', |
|
108 | sjis s_jis shift_jis_2004 shiftjis2004 sjis_2004 sjis2004 | |
47 | 'cp932', '932', 'ms932', 'mskanji', 'ms-kanji', |
|
109 | shift_jisx0213 shiftjisx0213 sjisx0213 s_jisx0213''' | |
48 | 'shift_jis', 'csshiftjis', 'shiftjis', 'sjis', 's_jis', |
|
|||
49 | 'shift_jis_2004', 'shiftjis2004', 'sjis_2004', 'sjis2004', |
|
|||
50 | 'shift_jisx0213', 'shiftjisx0213', 'sjisx0213', 's_jisx0213', |
|
|||
51 | ]) |
|
|||
52 |
|
||||
53 | # attribute name to store original function |
|
|||
54 | _ORIGINAL = '_original' |
|
|||
55 |
|
||||
56 | _ui = None |
|
|||
57 |
|
||||
58 | def decode_with_check(arg): |
|
|||
59 | if isinstance(arg, tuple): |
|
|||
60 | return tuple(map(decode_with_check, arg)) |
|
|||
61 | elif isinstance(arg, list): |
|
|||
62 | return map(decode_with_check, arg) |
|
|||
63 | elif isinstance(arg, str): |
|
|||
64 | uarg = arg.decode(util._encoding) |
|
|||
65 | if arg == uarg.encode(util._encoding): |
|
|||
66 | return uarg |
|
|||
67 | else: |
|
|||
68 | raise UnicodeError("Not local encoding") |
|
|||
69 | else: |
|
|||
70 | return arg |
|
|||
71 |
|
||||
72 | def encode_with_check(arg): |
|
|||
73 | if isinstance(arg, tuple): |
|
|||
74 | return tuple(map(encode_with_check, arg)) |
|
|||
75 | elif isinstance(arg, list): |
|
|||
76 | return map(encode_with_check, arg) |
|
|||
77 | elif isinstance(arg, unicode): |
|
|||
78 | ret = arg.encode(util._encoding) |
|
|||
79 | return ret |
|
|||
80 | else: |
|
|||
81 | return arg |
|
|||
82 |
|
||||
83 | def wrap(func): |
|
|||
84 |
|
||||
85 | def wrapped(*args): |
|
|||
86 | # check argument is unicode, then call original |
|
|||
87 | for arg in args: |
|
|||
88 | if isinstance(arg, unicode): |
|
|||
89 | return func(*args) |
|
|||
90 | # make decoded argument list into uargs |
|
|||
91 | try: |
|
|||
92 | args = decode_with_check(args) |
|
|||
93 | except UnicodeError, exc: |
|
|||
94 | # If not encoded with _local_fs_encoding, report it then |
|
|||
95 | # continue with calling original function. |
|
|||
96 | _ui.warn(_("WARNING: [win32mbcs] filename conversion fail for" + |
|
|||
97 | " %s: '%s'\n") % (util._encoding, args)) |
|
|||
98 | return func(*args) |
|
|||
99 | # call as unicode operation, then return with encoding |
|
|||
100 | return encode_with_check(func(*args)) |
|
|||
101 |
|
||||
102 | # fake is only for relevant environment. |
|
|||
103 | if hasattr(func, _ORIGINAL) or \ |
|
|||
104 | util._encoding.lower() not in _problematic_encodings: |
|
|||
105 | return func |
|
|||
106 | else: |
|
|||
107 | f = wrapped |
|
|||
108 | f.__name__ = func.__name__ |
|
|||
109 | setattr(f, _ORIGINAL, func) # hold original to restore |
|
|||
110 | return f |
|
|||
111 |
|
||||
112 | def unwrap(func): |
|
|||
113 | return getattr(func, _ORIGINAL, func) |
|
|||
114 |
|
||||
115 | def install(): |
|
|||
116 | # wrap some python functions and mercurial functions |
|
|||
117 | # to handle raw bytes on Windows. |
|
|||
118 | # NOTE: dirname and basename is safe because they use result |
|
|||
119 | # of os.path.split() |
|
|||
120 | global _ui |
|
|||
121 | if not _ui: |
|
|||
122 | from mercurial import ui |
|
|||
123 | _ui = ui.ui() |
|
|||
124 | os.path.join = wrap(os.path.join) |
|
|||
125 | os.path.split = wrap(os.path.split) |
|
|||
126 | os.path.splitext = wrap(os.path.splitext) |
|
|||
127 | os.path.splitunc = wrap(os.path.splitunc) |
|
|||
128 | os.path.normpath = wrap(os.path.normpath) |
|
|||
129 | os.path.normcase = wrap(os.path.normcase) |
|
|||
130 | os.makedirs = wrap(os.makedirs) |
|
|||
131 | util.endswithsep = wrap(util.endswithsep) |
|
|||
132 | util.splitpath = wrap(util.splitpath) |
|
|||
133 |
|
||||
134 | def uninstall(): |
|
|||
135 | # restore original functions. |
|
|||
136 | os.path.join = unwrap(os.path.join) |
|
|||
137 | os.path.split = unwrap(os.path.split) |
|
|||
138 | os.path.splitext = unwrap(os.path.splitext) |
|
|||
139 | os.path.splitunc = unwrap(os.path.splitunc) |
|
|||
140 | os.path.normpath = unwrap(os.path.normpath) |
|
|||
141 | os.path.normcase = unwrap(os.path.normcase) |
|
|||
142 | os.makedirs = unwrap(os.makedirs) |
|
|||
143 | util.endswithsep = unwrap(util.endswithsep) |
|
|||
144 | util.splitpath = unwrap(util.splitpath) |
|
|||
145 |
|
||||
146 |
|
110 | |||
147 | def reposetup(ui, repo): |
|
111 | def reposetup(ui, repo): | |
148 |
|
|
112 | # TODO: decide use of config section for this extension | |
149 | global _ui |
|
113 | if not os.path.supports_unicode_filenames: | |
150 | _ui = ui |
|
114 | ui.warn(_("[win32mbcs] cannot activate on this platform.\n")) | |
151 | if not os.path.supports_unicode_filenames: |
|
115 | return | |
152 | ui.warn(_("[win32mbcs] cannot activate on this platform.\n")) |
|
|||
153 | return |
|
|||
154 | # install features of this extension |
|
|||
155 | install() |
|
|||
156 | ui.debug(_("[win32mbcs] activeted with encoding: %s\n") % util._encoding) |
|
|||
157 |
|
116 | |||
158 | # win32mbcs.py ends here |
|
117 | # fake is only for relevant environment. | |
|
118 | if util._encoding.lower() in problematic_encodings.split(): | |||
|
119 | for f in funcs.split(): | |||
|
120 | wrapname(f) | |||
|
121 | ui.debug(_("[win32mbcs] activated with encoding: %s\n") % util._encoding) | |||
|
122 |
General Comments 0
You need to be logged in to leave comments.
Login now