##// END OF EJS Templates
win32mbcs: unbyteify some strings for py3 support...
Matt Harbison -
r51635:48d9af6b stable
parent child Browse files
Show More
@@ -1,222 +1,222 b''
1 1 # win32mbcs.py -- MBCS filename support for Mercurial
2 2 #
3 3 # Copyright (c) 2008 Shun-ichi Goto <shunichi.goto@gmail.com>
4 4 #
5 5 # Version: 0.3
6 6 # Author: Shun-ichi Goto <shunichi.goto@gmail.com>
7 7 #
8 8 # This software may be used and distributed according to the terms of the
9 9 # GNU General Public License version 2 or any later version.
10 10 #
11 11
12 12 '''allow the use of MBCS paths with problematic encodings
13 13
14 14 Some MBCS encodings are not good for some path operations (i.e.
15 15 splitting path, case conversion, etc.) with its encoded bytes. We call
16 16 such a encoding (i.e. shift_jis and big5) as "problematic encoding".
17 17 This extension can be used to fix the issue with those encodings by
18 18 wrapping some functions to convert to Unicode string before path
19 19 operation.
20 20
21 21 This extension is useful for:
22 22
23 23 - Japanese Windows users using shift_jis encoding.
24 24 - Chinese Windows users using big5 encoding.
25 25 - All users who use a repository with one of problematic encodings on
26 26 case-insensitive file system.
27 27
28 28 This extension is not needed for:
29 29
30 30 - Any user who use only ASCII chars in path.
31 31 - Any user who do not use any of problematic encodings.
32 32
33 33 Note that there are some limitations on using this extension:
34 34
35 35 - You should use single encoding in one repository.
36 36 - If the repository path ends with 0x5c, .hg/hgrc cannot be read.
37 37 - win32mbcs is not compatible with fixutf8 extension.
38 38
39 39 By default, win32mbcs uses encoding.encoding decided by Mercurial.
40 40 You can specify the encoding by config option::
41 41
42 42 [win32mbcs]
43 43 encoding = sjis
44 44
45 45 It is useful for the users who want to commit with UTF-8 log message.
46 46 '''
47 47
48 48 import os
49 49 import sys
50 50
51 51 from mercurial.i18n import _
52 52 from mercurial.pycompat import getattr, setattr
53 53 from mercurial import (
54 54 encoding,
55 55 error,
56 56 pycompat,
57 57 registrar,
58 58 )
59 59
60 60 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
61 61 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
62 62 # be specifying the version(s) of Mercurial they are tested with, or
63 63 # leave the attribute unspecified.
64 64 testedwith = b'ships-with-hg-core'
65 65
66 66 configtable = {}
67 67 configitem = registrar.configitem(configtable)
68 68
69 69 # Encoding.encoding may be updated by --encoding option.
70 70 # Use a lambda do delay the resolution.
71 71 configitem(
72 72 b'win32mbcs',
73 73 b'encoding',
74 74 default=lambda: encoding.encoding,
75 75 )
76 76
77 77 _encoding = None # see extsetup
78 78
79 79
80 80 def decode(arg):
81 81 if isinstance(arg, bytes):
82 82 uarg = arg.decode(_encoding)
83 83 if arg == uarg.encode(_encoding):
84 84 return uarg
85 raise UnicodeError(b"Not local encoding")
85 raise UnicodeError("Not local encoding")
86 86 elif isinstance(arg, tuple):
87 87 return tuple(map(decode, arg))
88 88 elif isinstance(arg, list):
89 89 return map(decode, arg)
90 90 elif isinstance(arg, dict):
91 91 for k, v in arg.items():
92 92 arg[k] = decode(v)
93 93 return arg
94 94
95 95
96 96 def encode(arg):
97 97 if isinstance(arg, str):
98 98 return arg.encode(_encoding)
99 99 elif isinstance(arg, tuple):
100 100 return tuple(map(encode, arg))
101 101 elif isinstance(arg, list):
102 102 return map(encode, arg)
103 103 elif isinstance(arg, dict):
104 104 for k, v in arg.items():
105 105 arg[k] = encode(v)
106 106 return arg
107 107
108 108
109 109 def appendsep(s):
110 110 # ensure the path ends with os.sep, appending it if necessary.
111 111 try:
112 112 us = decode(s)
113 113 except UnicodeError:
114 us = s
115 if us and us[-1] not in b':/\\':
114 us = s # TODO: how to handle this bytes case??
115 if us and us[-1] not in ':/\\':
116 116 s += pycompat.ossep
117 117 return s
118 118
119 119
120 120 def basewrapper(func, argtype, enc, dec, args, kwds):
121 121 # check check already converted, then call original
122 122 for arg in args:
123 123 if isinstance(arg, argtype):
124 124 return func(*args, **kwds)
125 125
126 126 try:
127 127 # convert string arguments, call func, then convert back the
128 128 # return value.
129 129 return enc(func(*dec(args), **dec(kwds)))
130 130 except UnicodeError:
131 131 raise error.Abort(
132 132 _(b"[win32mbcs] filename conversion failed with %s encoding\n")
133 133 % _encoding
134 134 )
135 135
136 136
137 137 def wrapper(func, args, kwds):
138 138 return basewrapper(func, str, encode, decode, args, kwds)
139 139
140 140
141 141 def reversewrapper(func, args, kwds):
142 142 return basewrapper(func, str, decode, encode, args, kwds)
143 143
144 144
145 145 def wrapperforlistdir(func, args, kwds):
146 146 # Ensure 'path' argument ends with os.sep to avoids
147 147 # misinterpreting last 0x5c of MBCS 2nd byte as path separator.
148 148 if args:
149 149 args = list(args)
150 150 args[0] = appendsep(args[0])
151 if b'path' in kwds:
152 kwds[b'path'] = appendsep(kwds[b'path'])
151 if 'path' in kwds:
152 kwds['path'] = appendsep(kwds['path'])
153 153 return func(*args, **kwds)
154 154
155 155
156 def wrapname(name, wrapper):
157 module, name = name.rsplit(b'.', 1)
156 def wrapname(name: str, wrapper):
157 module, name = name.rsplit('.', 1)
158 158 module = sys.modules[module]
159 159 func = getattr(module, name)
160 160
161 161 def f(*args, **kwds):
162 162 return wrapper(func, args, kwds)
163 163
164 164 f.__name__ = func.__name__
165 165 setattr(module, name, f)
166 166
167 167
168 168 # List of functions to be wrapped.
169 169 # NOTE: os.path.dirname() and os.path.basename() are safe because
170 170 # they use result of os.path.split()
171 funcs = b'''os.path.join os.path.split os.path.splitext
171 funcs = '''os.path.join os.path.split os.path.splitext
172 172 os.path.normpath os.makedirs mercurial.util.endswithsep
173 173 mercurial.util.splitpath mercurial.util.fscasesensitive
174 174 mercurial.util.fspath mercurial.util.pconvert mercurial.util.normpath
175 175 mercurial.util.checkwinfilename mercurial.util.checkosfilename
176 176 mercurial.util.split'''
177 177
178 178 # These functions are required to be called with local encoded string
179 179 # because they expects argument is local encoded string and cause
180 180 # problem with unicode string.
181 rfuncs = b'''mercurial.encoding.upper mercurial.encoding.lower
181 rfuncs = '''mercurial.encoding.upper mercurial.encoding.lower
182 182 mercurial.util._filenamebytestr'''
183 183
184 184 # List of Windows specific functions to be wrapped.
185 winfuncs = b'''os.path.splitunc'''
185 winfuncs = '''os.path.splitunc'''
186 186
187 187 # codec and alias names of sjis and big5 to be faked.
188 188 problematic_encodings = b'''big5 big5-tw csbig5 big5hkscs big5-hkscs
189 189 hkscs cp932 932 ms932 mskanji ms-kanji shift_jis csshiftjis shiftjis
190 190 sjis s_jis shift_jis_2004 shiftjis2004 sjis_2004 sjis2004
191 191 shift_jisx0213 shiftjisx0213 sjisx0213 s_jisx0213 950 cp950 ms950 '''
192 192
193 193
194 194 def extsetup(ui):
195 195 # TODO: decide use of config section for this extension
196 196 if (not os.path.supports_unicode_filenames) and (
197 197 pycompat.sysplatform != b'cygwin'
198 198 ):
199 199 ui.warn(_(b"[win32mbcs] cannot activate on this platform.\n"))
200 200 return
201 201 # determine encoding for filename
202 202 global _encoding
203 203 _encoding = ui.config(b'win32mbcs', b'encoding')
204 204 # fake is only for relevant environment.
205 205 if _encoding.lower() in problematic_encodings.split():
206 206 for f in funcs.split():
207 207 wrapname(f, wrapper)
208 208 if pycompat.iswindows:
209 209 for f in winfuncs.split():
210 210 wrapname(f, wrapper)
211 wrapname(b"mercurial.util.listdir", wrapperforlistdir)
212 wrapname(b"mercurial.windows.listdir", wrapperforlistdir)
211 wrapname("mercurial.util.listdir", wrapperforlistdir)
212 wrapname("mercurial.windows.listdir", wrapperforlistdir)
213 213 # wrap functions to be called with local byte string arguments
214 214 for f in rfuncs.split():
215 215 wrapname(f, reversewrapper)
216 216 # Check sys.args manually instead of using ui.debug() because
217 217 # command line options is not yet applied when
218 218 # extensions.loadall() is called.
219 if b'--debug' in sys.argv:
219 if '--debug' in sys.argv:
220 220 ui.writenoi18n(
221 221 b"[win32mbcs] activated with encoding: %s\n" % _encoding
222 222 )
General Comments 0
You need to be logged in to leave comments. Login now