##// END OF EJS Templates
win32mbcs: unbyteify some strings for py3 support...
Matt Harbison -
r51635:48d9af6b stable
parent child Browse files
Show More
@@ -1,222 +1,222 b''
1 # win32mbcs.py -- MBCS filename support for Mercurial
1 # win32mbcs.py -- MBCS filename support for Mercurial
2 #
2 #
3 # Copyright (c) 2008 Shun-ichi Goto <shunichi.goto@gmail.com>
3 # Copyright (c) 2008 Shun-ichi Goto <shunichi.goto@gmail.com>
4 #
4 #
5 # Version: 0.3
5 # Version: 0.3
6 # Author: Shun-ichi Goto <shunichi.goto@gmail.com>
6 # Author: Shun-ichi Goto <shunichi.goto@gmail.com>
7 #
7 #
8 # This software may be used and distributed according to the terms of the
8 # This software may be used and distributed according to the terms of the
9 # GNU General Public License version 2 or any later version.
9 # GNU General Public License version 2 or any later version.
10 #
10 #
11
11
12 '''allow the use of MBCS paths with problematic encodings
12 '''allow the use of MBCS paths with problematic encodings
13
13
14 Some MBCS encodings are not good for some path operations (i.e.
14 Some MBCS encodings are not good for some path operations (i.e.
15 splitting path, case conversion, etc.) with its encoded bytes. We call
15 splitting path, case conversion, etc.) with its encoded bytes. We call
16 such a encoding (i.e. shift_jis and big5) as "problematic encoding".
16 such a encoding (i.e. shift_jis and big5) as "problematic encoding".
17 This extension can be used to fix the issue with those encodings by
17 This extension can be used to fix the issue with those encodings by
18 wrapping some functions to convert to Unicode string before path
18 wrapping some functions to convert to Unicode string before path
19 operation.
19 operation.
20
20
21 This extension is useful for:
21 This extension is useful for:
22
22
23 - Japanese Windows users using shift_jis encoding.
23 - Japanese Windows users using shift_jis encoding.
24 - Chinese Windows users using big5 encoding.
24 - Chinese Windows users using big5 encoding.
25 - All users who use a repository with one of problematic encodings on
25 - All users who use a repository with one of problematic encodings on
26 case-insensitive file system.
26 case-insensitive file system.
27
27
28 This extension is not needed for:
28 This extension is not needed for:
29
29
30 - Any user who use only ASCII chars in path.
30 - Any user who use only ASCII chars in path.
31 - Any user who do not use any of problematic encodings.
31 - Any user who do not use any of problematic encodings.
32
32
33 Note that there are some limitations on using this extension:
33 Note that there are some limitations on using this extension:
34
34
35 - You should use single encoding in one repository.
35 - You should use single encoding in one repository.
36 - If the repository path ends with 0x5c, .hg/hgrc cannot be read.
36 - If the repository path ends with 0x5c, .hg/hgrc cannot be read.
37 - win32mbcs is not compatible with fixutf8 extension.
37 - win32mbcs is not compatible with fixutf8 extension.
38
38
39 By default, win32mbcs uses encoding.encoding decided by Mercurial.
39 By default, win32mbcs uses encoding.encoding decided by Mercurial.
40 You can specify the encoding by config option::
40 You can specify the encoding by config option::
41
41
42 [win32mbcs]
42 [win32mbcs]
43 encoding = sjis
43 encoding = sjis
44
44
45 It is useful for the users who want to commit with UTF-8 log message.
45 It is useful for the users who want to commit with UTF-8 log message.
46 '''
46 '''
47
47
48 import os
48 import os
49 import sys
49 import sys
50
50
51 from mercurial.i18n import _
51 from mercurial.i18n import _
52 from mercurial.pycompat import getattr, setattr
52 from mercurial.pycompat import getattr, setattr
53 from mercurial import (
53 from mercurial import (
54 encoding,
54 encoding,
55 error,
55 error,
56 pycompat,
56 pycompat,
57 registrar,
57 registrar,
58 )
58 )
59
59
60 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
60 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
61 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
61 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
62 # be specifying the version(s) of Mercurial they are tested with, or
62 # be specifying the version(s) of Mercurial they are tested with, or
63 # leave the attribute unspecified.
63 # leave the attribute unspecified.
64 testedwith = b'ships-with-hg-core'
64 testedwith = b'ships-with-hg-core'
65
65
66 configtable = {}
66 configtable = {}
67 configitem = registrar.configitem(configtable)
67 configitem = registrar.configitem(configtable)
68
68
69 # Encoding.encoding may be updated by --encoding option.
69 # Encoding.encoding may be updated by --encoding option.
70 # Use a lambda do delay the resolution.
70 # Use a lambda do delay the resolution.
71 configitem(
71 configitem(
72 b'win32mbcs',
72 b'win32mbcs',
73 b'encoding',
73 b'encoding',
74 default=lambda: encoding.encoding,
74 default=lambda: encoding.encoding,
75 )
75 )
76
76
77 _encoding = None # see extsetup
77 _encoding = None # see extsetup
78
78
79
79
80 def decode(arg):
80 def decode(arg):
81 if isinstance(arg, bytes):
81 if isinstance(arg, bytes):
82 uarg = arg.decode(_encoding)
82 uarg = arg.decode(_encoding)
83 if arg == uarg.encode(_encoding):
83 if arg == uarg.encode(_encoding):
84 return uarg
84 return uarg
85 raise UnicodeError(b"Not local encoding")
85 raise UnicodeError("Not local encoding")
86 elif isinstance(arg, tuple):
86 elif isinstance(arg, tuple):
87 return tuple(map(decode, arg))
87 return tuple(map(decode, arg))
88 elif isinstance(arg, list):
88 elif isinstance(arg, list):
89 return map(decode, arg)
89 return map(decode, arg)
90 elif isinstance(arg, dict):
90 elif isinstance(arg, dict):
91 for k, v in arg.items():
91 for k, v in arg.items():
92 arg[k] = decode(v)
92 arg[k] = decode(v)
93 return arg
93 return arg
94
94
95
95
96 def encode(arg):
96 def encode(arg):
97 if isinstance(arg, str):
97 if isinstance(arg, str):
98 return arg.encode(_encoding)
98 return arg.encode(_encoding)
99 elif isinstance(arg, tuple):
99 elif isinstance(arg, tuple):
100 return tuple(map(encode, arg))
100 return tuple(map(encode, arg))
101 elif isinstance(arg, list):
101 elif isinstance(arg, list):
102 return map(encode, arg)
102 return map(encode, arg)
103 elif isinstance(arg, dict):
103 elif isinstance(arg, dict):
104 for k, v in arg.items():
104 for k, v in arg.items():
105 arg[k] = encode(v)
105 arg[k] = encode(v)
106 return arg
106 return arg
107
107
108
108
109 def appendsep(s):
109 def appendsep(s):
110 # ensure the path ends with os.sep, appending it if necessary.
110 # ensure the path ends with os.sep, appending it if necessary.
111 try:
111 try:
112 us = decode(s)
112 us = decode(s)
113 except UnicodeError:
113 except UnicodeError:
114 us = s
114 us = s # TODO: how to handle this bytes case??
115 if us and us[-1] not in b':/\\':
115 if us and us[-1] not in ':/\\':
116 s += pycompat.ossep
116 s += pycompat.ossep
117 return s
117 return s
118
118
119
119
120 def basewrapper(func, argtype, enc, dec, args, kwds):
120 def basewrapper(func, argtype, enc, dec, args, kwds):
121 # check check already converted, then call original
121 # check check already converted, then call original
122 for arg in args:
122 for arg in args:
123 if isinstance(arg, argtype):
123 if isinstance(arg, argtype):
124 return func(*args, **kwds)
124 return func(*args, **kwds)
125
125
126 try:
126 try:
127 # convert string arguments, call func, then convert back the
127 # convert string arguments, call func, then convert back the
128 # return value.
128 # return value.
129 return enc(func(*dec(args), **dec(kwds)))
129 return enc(func(*dec(args), **dec(kwds)))
130 except UnicodeError:
130 except UnicodeError:
131 raise error.Abort(
131 raise error.Abort(
132 _(b"[win32mbcs] filename conversion failed with %s encoding\n")
132 _(b"[win32mbcs] filename conversion failed with %s encoding\n")
133 % _encoding
133 % _encoding
134 )
134 )
135
135
136
136
137 def wrapper(func, args, kwds):
137 def wrapper(func, args, kwds):
138 return basewrapper(func, str, encode, decode, args, kwds)
138 return basewrapper(func, str, encode, decode, args, kwds)
139
139
140
140
141 def reversewrapper(func, args, kwds):
141 def reversewrapper(func, args, kwds):
142 return basewrapper(func, str, decode, encode, args, kwds)
142 return basewrapper(func, str, decode, encode, args, kwds)
143
143
144
144
145 def wrapperforlistdir(func, args, kwds):
145 def wrapperforlistdir(func, args, kwds):
146 # Ensure 'path' argument ends with os.sep to avoids
146 # Ensure 'path' argument ends with os.sep to avoids
147 # misinterpreting last 0x5c of MBCS 2nd byte as path separator.
147 # misinterpreting last 0x5c of MBCS 2nd byte as path separator.
148 if args:
148 if args:
149 args = list(args)
149 args = list(args)
150 args[0] = appendsep(args[0])
150 args[0] = appendsep(args[0])
151 if b'path' in kwds:
151 if 'path' in kwds:
152 kwds[b'path'] = appendsep(kwds[b'path'])
152 kwds['path'] = appendsep(kwds['path'])
153 return func(*args, **kwds)
153 return func(*args, **kwds)
154
154
155
155
156 def wrapname(name, wrapper):
156 def wrapname(name: str, wrapper):
157 module, name = name.rsplit(b'.', 1)
157 module, name = name.rsplit('.', 1)
158 module = sys.modules[module]
158 module = sys.modules[module]
159 func = getattr(module, name)
159 func = getattr(module, name)
160
160
161 def f(*args, **kwds):
161 def f(*args, **kwds):
162 return wrapper(func, args, kwds)
162 return wrapper(func, args, kwds)
163
163
164 f.__name__ = func.__name__
164 f.__name__ = func.__name__
165 setattr(module, name, f)
165 setattr(module, name, f)
166
166
167
167
168 # List of functions to be wrapped.
168 # List of functions to be wrapped.
169 # NOTE: os.path.dirname() and os.path.basename() are safe because
169 # NOTE: os.path.dirname() and os.path.basename() are safe because
170 # they use result of os.path.split()
170 # they use result of os.path.split()
171 funcs = b'''os.path.join os.path.split os.path.splitext
171 funcs = '''os.path.join os.path.split os.path.splitext
172 os.path.normpath os.makedirs mercurial.util.endswithsep
172 os.path.normpath os.makedirs mercurial.util.endswithsep
173 mercurial.util.splitpath mercurial.util.fscasesensitive
173 mercurial.util.splitpath mercurial.util.fscasesensitive
174 mercurial.util.fspath mercurial.util.pconvert mercurial.util.normpath
174 mercurial.util.fspath mercurial.util.pconvert mercurial.util.normpath
175 mercurial.util.checkwinfilename mercurial.util.checkosfilename
175 mercurial.util.checkwinfilename mercurial.util.checkosfilename
176 mercurial.util.split'''
176 mercurial.util.split'''
177
177
178 # These functions are required to be called with local encoded string
178 # These functions are required to be called with local encoded string
179 # because they expects argument is local encoded string and cause
179 # because they expects argument is local encoded string and cause
180 # problem with unicode string.
180 # problem with unicode string.
181 rfuncs = b'''mercurial.encoding.upper mercurial.encoding.lower
181 rfuncs = '''mercurial.encoding.upper mercurial.encoding.lower
182 mercurial.util._filenamebytestr'''
182 mercurial.util._filenamebytestr'''
183
183
184 # List of Windows specific functions to be wrapped.
184 # List of Windows specific functions to be wrapped.
185 winfuncs = b'''os.path.splitunc'''
185 winfuncs = '''os.path.splitunc'''
186
186
187 # codec and alias names of sjis and big5 to be faked.
187 # codec and alias names of sjis and big5 to be faked.
188 problematic_encodings = b'''big5 big5-tw csbig5 big5hkscs big5-hkscs
188 problematic_encodings = b'''big5 big5-tw csbig5 big5hkscs big5-hkscs
189 hkscs cp932 932 ms932 mskanji ms-kanji shift_jis csshiftjis shiftjis
189 hkscs cp932 932 ms932 mskanji ms-kanji shift_jis csshiftjis shiftjis
190 sjis s_jis shift_jis_2004 shiftjis2004 sjis_2004 sjis2004
190 sjis s_jis shift_jis_2004 shiftjis2004 sjis_2004 sjis2004
191 shift_jisx0213 shiftjisx0213 sjisx0213 s_jisx0213 950 cp950 ms950 '''
191 shift_jisx0213 shiftjisx0213 sjisx0213 s_jisx0213 950 cp950 ms950 '''
192
192
193
193
194 def extsetup(ui):
194 def extsetup(ui):
195 # TODO: decide use of config section for this extension
195 # TODO: decide use of config section for this extension
196 if (not os.path.supports_unicode_filenames) and (
196 if (not os.path.supports_unicode_filenames) and (
197 pycompat.sysplatform != b'cygwin'
197 pycompat.sysplatform != b'cygwin'
198 ):
198 ):
199 ui.warn(_(b"[win32mbcs] cannot activate on this platform.\n"))
199 ui.warn(_(b"[win32mbcs] cannot activate on this platform.\n"))
200 return
200 return
201 # determine encoding for filename
201 # determine encoding for filename
202 global _encoding
202 global _encoding
203 _encoding = ui.config(b'win32mbcs', b'encoding')
203 _encoding = ui.config(b'win32mbcs', b'encoding')
204 # fake is only for relevant environment.
204 # fake is only for relevant environment.
205 if _encoding.lower() in problematic_encodings.split():
205 if _encoding.lower() in problematic_encodings.split():
206 for f in funcs.split():
206 for f in funcs.split():
207 wrapname(f, wrapper)
207 wrapname(f, wrapper)
208 if pycompat.iswindows:
208 if pycompat.iswindows:
209 for f in winfuncs.split():
209 for f in winfuncs.split():
210 wrapname(f, wrapper)
210 wrapname(f, wrapper)
211 wrapname(b"mercurial.util.listdir", wrapperforlistdir)
211 wrapname("mercurial.util.listdir", wrapperforlistdir)
212 wrapname(b"mercurial.windows.listdir", wrapperforlistdir)
212 wrapname("mercurial.windows.listdir", wrapperforlistdir)
213 # wrap functions to be called with local byte string arguments
213 # wrap functions to be called with local byte string arguments
214 for f in rfuncs.split():
214 for f in rfuncs.split():
215 wrapname(f, reversewrapper)
215 wrapname(f, reversewrapper)
216 # Check sys.args manually instead of using ui.debug() because
216 # Check sys.args manually instead of using ui.debug() because
217 # command line options is not yet applied when
217 # command line options is not yet applied when
218 # extensions.loadall() is called.
218 # extensions.loadall() is called.
219 if b'--debug' in sys.argv:
219 if '--debug' in sys.argv:
220 ui.writenoi18n(
220 ui.writenoi18n(
221 b"[win32mbcs] activated with encoding: %s\n" % _encoding
221 b"[win32mbcs] activated with encoding: %s\n" % _encoding
222 )
222 )
General Comments 0
You need to be logged in to leave comments. Login now