##// END OF EJS Templates
win32mbcs: use str for encoding value...
Matt Harbison -
r52573:e6508d1e stable
parent child Browse files
Show More
@@ -1,221 +1,222
1 # win32mbcs.py -- MBCS filename support for Mercurial
1 # win32mbcs.py -- MBCS filename support for Mercurial
2 #
2 #
3 # Copyright (c) 2008 Shun-ichi Goto <shunichi.goto@gmail.com>
3 # Copyright (c) 2008 Shun-ichi Goto <shunichi.goto@gmail.com>
4 #
4 #
5 # Version: 0.3
5 # Version: 0.3
6 # Author: Shun-ichi Goto <shunichi.goto@gmail.com>
6 # Author: Shun-ichi Goto <shunichi.goto@gmail.com>
7 #
7 #
8 # This software may be used and distributed according to the terms of the
8 # This software may be used and distributed according to the terms of the
9 # GNU General Public License version 2 or any later version.
9 # GNU General Public License version 2 or any later version.
10 #
10 #
11
11
12 '''allow the use of MBCS paths with problematic encodings
12 '''allow the use of MBCS paths with problematic encodings
13
13
14 Some MBCS encodings are not good for some path operations (i.e.
14 Some MBCS encodings are not good for some path operations (i.e.
15 splitting path, case conversion, etc.) with its encoded bytes. We call
15 splitting path, case conversion, etc.) with its encoded bytes. We call
16 such a encoding (i.e. shift_jis and big5) as "problematic encoding".
16 such a encoding (i.e. shift_jis and big5) as "problematic encoding".
17 This extension can be used to fix the issue with those encodings by
17 This extension can be used to fix the issue with those encodings by
18 wrapping some functions to convert to Unicode string before path
18 wrapping some functions to convert to Unicode string before path
19 operation.
19 operation.
20
20
21 This extension is useful for:
21 This extension is useful for:
22
22
23 - Japanese Windows users using shift_jis encoding.
23 - Japanese Windows users using shift_jis encoding.
24 - Chinese Windows users using big5 encoding.
24 - Chinese Windows users using big5 encoding.
25 - All users who use a repository with one of problematic encodings on
25 - All users who use a repository with one of problematic encodings on
26 case-insensitive file system.
26 case-insensitive file system.
27
27
28 This extension is not needed for:
28 This extension is not needed for:
29
29
30 - Any user who use only ASCII chars in path.
30 - Any user who use only ASCII chars in path.
31 - Any user who do not use any of problematic encodings.
31 - Any user who do not use any of problematic encodings.
32
32
33 Note that there are some limitations on using this extension:
33 Note that there are some limitations on using this extension:
34
34
35 - You should use single encoding in one repository.
35 - You should use single encoding in one repository.
36 - If the repository path ends with 0x5c, .hg/hgrc cannot be read.
36 - If the repository path ends with 0x5c, .hg/hgrc cannot be read.
37 - win32mbcs is not compatible with fixutf8 extension.
37 - win32mbcs is not compatible with fixutf8 extension.
38
38
39 By default, win32mbcs uses encoding.encoding decided by Mercurial.
39 By default, win32mbcs uses encoding.encoding decided by Mercurial.
40 You can specify the encoding by config option::
40 You can specify the encoding by config option::
41
41
42 [win32mbcs]
42 [win32mbcs]
43 encoding = sjis
43 encoding = sjis
44
44
45 It is useful for the users who want to commit with UTF-8 log message.
45 It is useful for the users who want to commit with UTF-8 log message.
46 '''
46 '''
47
47
48 import os
48 import os
49 import sys
49 import sys
50
50
51 from mercurial.i18n import _
51 from mercurial.i18n import _
52 from mercurial import (
52 from mercurial import (
53 encoding,
53 encoding,
54 error,
54 error,
55 pycompat,
55 pycompat,
56 registrar,
56 registrar,
57 )
57 )
58
58
59 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
59 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
60 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
60 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
61 # be specifying the version(s) of Mercurial they are tested with, or
61 # be specifying the version(s) of Mercurial they are tested with, or
62 # leave the attribute unspecified.
62 # leave the attribute unspecified.
63 testedwith = b'ships-with-hg-core'
63 testedwith = b'ships-with-hg-core'
64
64
65 configtable = {}
65 configtable = {}
66 configitem = registrar.configitem(configtable)
66 configitem = registrar.configitem(configtable)
67
67
68 # Encoding.encoding may be updated by --encoding option.
68 # Encoding.encoding may be updated by --encoding option.
69 # Use a lambda do delay the resolution.
69 # Use a lambda do delay the resolution.
70 configitem(
70 configitem(
71 b'win32mbcs',
71 b'win32mbcs',
72 b'encoding',
72 b'encoding',
73 default=lambda: encoding.encoding,
73 default=lambda: encoding.encoding,
74 )
74 )
75
75
76 _encoding = None # see extsetup
76 _encoding: str = "" # see extsetup
77
77
78
78
79 def decode(arg):
79 def decode(arg):
80 if isinstance(arg, bytes):
80 if isinstance(arg, bytes):
81 uarg = arg.decode(_encoding)
81 uarg = arg.decode(_encoding)
82 if arg == uarg.encode(_encoding):
82 if arg == uarg.encode(_encoding):
83 return uarg
83 return uarg
84 raise UnicodeError("Not local encoding")
84 raise UnicodeError("Not local encoding")
85 elif isinstance(arg, tuple):
85 elif isinstance(arg, tuple):
86 return tuple(map(decode, arg))
86 return tuple(map(decode, arg))
87 elif isinstance(arg, list):
87 elif isinstance(arg, list):
88 return map(decode, arg)
88 return map(decode, arg)
89 elif isinstance(arg, dict):
89 elif isinstance(arg, dict):
90 for k, v in arg.items():
90 for k, v in arg.items():
91 arg[k] = decode(v)
91 arg[k] = decode(v)
92 return arg
92 return arg
93
93
94
94
95 def encode(arg):
95 def encode(arg):
96 if isinstance(arg, str):
96 if isinstance(arg, str):
97 return arg.encode(_encoding)
97 return arg.encode(_encoding)
98 elif isinstance(arg, tuple):
98 elif isinstance(arg, tuple):
99 return tuple(map(encode, arg))
99 return tuple(map(encode, arg))
100 elif isinstance(arg, list):
100 elif isinstance(arg, list):
101 return map(encode, arg)
101 return map(encode, arg)
102 elif isinstance(arg, dict):
102 elif isinstance(arg, dict):
103 for k, v in arg.items():
103 for k, v in arg.items():
104 arg[k] = encode(v)
104 arg[k] = encode(v)
105 return arg
105 return arg
106
106
107
107
108 def appendsep(s):
108 def appendsep(s):
109 # ensure the path ends with os.sep, appending it if necessary.
109 # ensure the path ends with os.sep, appending it if necessary.
110 try:
110 try:
111 us = decode(s)
111 us = decode(s)
112 except UnicodeError:
112 except UnicodeError:
113 us = s # TODO: how to handle this bytes case??
113 us = s # TODO: how to handle this bytes case??
114 if us and us[-1] not in ':/\\':
114 if us and us[-1] not in ':/\\':
115 s += pycompat.ossep
115 s += pycompat.ossep
116 return s
116 return s
117
117
118
118
119 def basewrapper(func, argtype, enc, dec, args, kwds):
119 def basewrapper(func, argtype, enc, dec, args, kwds):
120 # check check already converted, then call original
120 # check check already converted, then call original
121 for arg in args:
121 for arg in args:
122 if isinstance(arg, argtype):
122 if isinstance(arg, argtype):
123 return func(*args, **kwds)
123 return func(*args, **kwds)
124
124
125 try:
125 try:
126 # convert string arguments, call func, then convert back the
126 # convert string arguments, call func, then convert back the
127 # return value.
127 # return value.
128 return enc(func(*dec(args), **dec(kwds)))
128 return enc(func(*dec(args), **dec(kwds)))
129 except UnicodeError:
129 except UnicodeError:
130 raise error.Abort(
130 raise error.Abort(
131 _(b"[win32mbcs] filename conversion failed with %s encoding\n")
131 _(b"[win32mbcs] filename conversion failed with %s encoding\n")
132 % _encoding
132 % encoding.strtolocal(_encoding)
133 )
133 )
134
134
135
135
136 def wrapper(func, args, kwds):
136 def wrapper(func, args, kwds):
137 return basewrapper(func, str, encode, decode, args, kwds)
137 return basewrapper(func, str, encode, decode, args, kwds)
138
138
139
139
140 def reversewrapper(func, args, kwds):
140 def reversewrapper(func, args, kwds):
141 return basewrapper(func, str, decode, encode, args, kwds)
141 return basewrapper(func, str, decode, encode, args, kwds)
142
142
143
143
144 def wrapperforlistdir(func, args, kwds):
144 def wrapperforlistdir(func, args, kwds):
145 # Ensure 'path' argument ends with os.sep to avoids
145 # Ensure 'path' argument ends with os.sep to avoids
146 # misinterpreting last 0x5c of MBCS 2nd byte as path separator.
146 # misinterpreting last 0x5c of MBCS 2nd byte as path separator.
147 if args:
147 if args:
148 args = list(args)
148 args = list(args)
149 args[0] = appendsep(args[0])
149 args[0] = appendsep(args[0])
150 if 'path' in kwds:
150 if 'path' in kwds:
151 kwds['path'] = appendsep(kwds['path'])
151 kwds['path'] = appendsep(kwds['path'])
152 return func(*args, **kwds)
152 return func(*args, **kwds)
153
153
154
154
155 def wrapname(name: str, wrapper):
155 def wrapname(name: str, wrapper):
156 module, name = name.rsplit('.', 1)
156 module, name = name.rsplit('.', 1)
157 module = sys.modules[module]
157 module = sys.modules[module]
158 func = getattr(module, name)
158 func = getattr(module, name)
159
159
160 def f(*args, **kwds):
160 def f(*args, **kwds):
161 return wrapper(func, args, kwds)
161 return wrapper(func, args, kwds)
162
162
163 f.__name__ = func.__name__
163 f.__name__ = func.__name__
164 setattr(module, name, f)
164 setattr(module, name, f)
165
165
166
166
167 # List of functions to be wrapped.
167 # List of functions to be wrapped.
168 # NOTE: os.path.dirname() and os.path.basename() are safe because
168 # NOTE: os.path.dirname() and os.path.basename() are safe because
169 # they use result of os.path.split()
169 # they use result of os.path.split()
170 funcs = '''os.path.join os.path.split os.path.splitext
170 funcs = '''os.path.join os.path.split os.path.splitext
171 os.path.normpath os.makedirs mercurial.util.endswithsep
171 os.path.normpath os.makedirs mercurial.util.endswithsep
172 mercurial.util.splitpath mercurial.util.fscasesensitive
172 mercurial.util.splitpath mercurial.util.fscasesensitive
173 mercurial.util.fspath mercurial.util.pconvert mercurial.util.normpath
173 mercurial.util.fspath mercurial.util.pconvert mercurial.util.normpath
174 mercurial.util.checkwinfilename mercurial.util.checkosfilename
174 mercurial.util.checkwinfilename mercurial.util.checkosfilename
175 mercurial.util.split'''
175 mercurial.util.split'''
176
176
177 # These functions are required to be called with local encoded string
177 # These functions are required to be called with local encoded string
178 # because they expects argument is local encoded string and cause
178 # because they expects argument is local encoded string and cause
179 # problem with unicode string.
179 # problem with unicode string.
180 rfuncs = '''mercurial.encoding.upper mercurial.encoding.lower
180 rfuncs = '''mercurial.encoding.upper mercurial.encoding.lower
181 mercurial.util._filenamebytestr'''
181 mercurial.util._filenamebytestr'''
182
182
183 # List of Windows specific functions to be wrapped.
183 # List of Windows specific functions to be wrapped.
184 winfuncs = '''os.path.splitunc'''
184 winfuncs = '''os.path.splitunc'''
185
185
186 # codec and alias names of sjis and big5 to be faked.
186 # codec and alias names of sjis and big5 to be faked.
187 problematic_encodings = b'''big5 big5-tw csbig5 big5hkscs big5-hkscs
187 problematic_encodings = b'''big5 big5-tw csbig5 big5hkscs big5-hkscs
188 hkscs cp932 932 ms932 mskanji ms-kanji shift_jis csshiftjis shiftjis
188 hkscs cp932 932 ms932 mskanji ms-kanji shift_jis csshiftjis shiftjis
189 sjis s_jis shift_jis_2004 shiftjis2004 sjis_2004 sjis2004
189 sjis s_jis shift_jis_2004 shiftjis2004 sjis_2004 sjis2004
190 shift_jisx0213 shiftjisx0213 sjisx0213 s_jisx0213 950 cp950 ms950 '''
190 shift_jisx0213 shiftjisx0213 sjisx0213 s_jisx0213 950 cp950 ms950 '''
191
191
192
192
193 def extsetup(ui):
193 def extsetup(ui):
194 # TODO: decide use of config section for this extension
194 # TODO: decide use of config section for this extension
195 if (not os.path.supports_unicode_filenames) and (
195 if (not os.path.supports_unicode_filenames) and (
196 pycompat.sysplatform != b'cygwin'
196 pycompat.sysplatform != b'cygwin'
197 ):
197 ):
198 ui.warn(_(b"[win32mbcs] cannot activate on this platform.\n"))
198 ui.warn(_(b"[win32mbcs] cannot activate on this platform.\n"))
199 return
199 return
200 # determine encoding for filename
200 # determine encoding for filename
201 global _encoding
201 global _encoding
202 _encoding = ui.config(b'win32mbcs', b'encoding')
202 _encoding = encoding.strfromlocal(ui.config(b'win32mbcs', b'encoding'))
203 # fake is only for relevant environment.
203 # fake is only for relevant environment.
204 if _encoding.lower() in problematic_encodings.split():
204 if _encoding.lower() in problematic_encodings.split():
205 for f in funcs.split():
205 for f in funcs.split():
206 wrapname(f, wrapper)
206 wrapname(f, wrapper)
207 if pycompat.iswindows:
207 if pycompat.iswindows:
208 for f in winfuncs.split():
208 for f in winfuncs.split():
209 wrapname(f, wrapper)
209 wrapname(f, wrapper)
210 wrapname("mercurial.util.listdir", wrapperforlistdir)
210 wrapname("mercurial.util.listdir", wrapperforlistdir)
211 wrapname("mercurial.windows.listdir", wrapperforlistdir)
211 wrapname("mercurial.windows.listdir", wrapperforlistdir)
212 # wrap functions to be called with local byte string arguments
212 # wrap functions to be called with local byte string arguments
213 for f in rfuncs.split():
213 for f in rfuncs.split():
214 wrapname(f, reversewrapper)
214 wrapname(f, reversewrapper)
215 # Check sys.args manually instead of using ui.debug() because
215 # Check sys.args manually instead of using ui.debug() because
216 # command line options is not yet applied when
216 # command line options is not yet applied when
217 # extensions.loadall() is called.
217 # extensions.loadall() is called.
218 if '--debug' in sys.argv:
218 if '--debug' in sys.argv:
219 ui.writenoi18n(
219 ui.writenoi18n(
220 b"[win32mbcs] activated with encoding: %s\n" % _encoding
220 b"[win32mbcs] activated with encoding: %s\n"
221 % encoding.strtolocal(_encoding)
221 )
222 )
General Comments 0
You need to be logged in to leave comments. Login now