##// END OF EJS Templates
win32mbcs: use str for encoding value...
Matt Harbison -
r52573:e6508d1e stable
parent child Browse files
Show More
@@ -1,221 +1,222
1 1 # win32mbcs.py -- MBCS filename support for Mercurial
2 2 #
3 3 # Copyright (c) 2008 Shun-ichi Goto <shunichi.goto@gmail.com>
4 4 #
5 5 # Version: 0.3
6 6 # Author: Shun-ichi Goto <shunichi.goto@gmail.com>
7 7 #
8 8 # This software may be used and distributed according to the terms of the
9 9 # GNU General Public License version 2 or any later version.
10 10 #
11 11
12 12 '''allow the use of MBCS paths with problematic encodings
13 13
14 14 Some MBCS encodings are not good for some path operations (i.e.
15 15 splitting path, case conversion, etc.) with its encoded bytes. We call
16 16 such a encoding (i.e. shift_jis and big5) as "problematic encoding".
17 17 This extension can be used to fix the issue with those encodings by
18 18 wrapping some functions to convert to Unicode string before path
19 19 operation.
20 20
21 21 This extension is useful for:
22 22
23 23 - Japanese Windows users using shift_jis encoding.
24 24 - Chinese Windows users using big5 encoding.
25 25 - All users who use a repository with one of problematic encodings on
26 26 case-insensitive file system.
27 27
28 28 This extension is not needed for:
29 29
30 30 - Any user who use only ASCII chars in path.
31 31 - Any user who do not use any of problematic encodings.
32 32
33 33 Note that there are some limitations on using this extension:
34 34
35 35 - You should use single encoding in one repository.
36 36 - If the repository path ends with 0x5c, .hg/hgrc cannot be read.
37 37 - win32mbcs is not compatible with fixutf8 extension.
38 38
39 39 By default, win32mbcs uses encoding.encoding decided by Mercurial.
40 40 You can specify the encoding by config option::
41 41
42 42 [win32mbcs]
43 43 encoding = sjis
44 44
45 45 It is useful for the users who want to commit with UTF-8 log message.
46 46 '''
47 47
48 48 import os
49 49 import sys
50 50
51 51 from mercurial.i18n import _
52 52 from mercurial import (
53 53 encoding,
54 54 error,
55 55 pycompat,
56 56 registrar,
57 57 )
58 58
59 59 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
60 60 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
61 61 # be specifying the version(s) of Mercurial they are tested with, or
62 62 # leave the attribute unspecified.
63 63 testedwith = b'ships-with-hg-core'
64 64
65 65 configtable = {}
66 66 configitem = registrar.configitem(configtable)
67 67
68 68 # Encoding.encoding may be updated by --encoding option.
69 69 # Use a lambda do delay the resolution.
70 70 configitem(
71 71 b'win32mbcs',
72 72 b'encoding',
73 73 default=lambda: encoding.encoding,
74 74 )
75 75
76 _encoding = None # see extsetup
76 _encoding: str = "" # see extsetup
77 77
78 78
79 79 def decode(arg):
80 80 if isinstance(arg, bytes):
81 81 uarg = arg.decode(_encoding)
82 82 if arg == uarg.encode(_encoding):
83 83 return uarg
84 84 raise UnicodeError("Not local encoding")
85 85 elif isinstance(arg, tuple):
86 86 return tuple(map(decode, arg))
87 87 elif isinstance(arg, list):
88 88 return map(decode, arg)
89 89 elif isinstance(arg, dict):
90 90 for k, v in arg.items():
91 91 arg[k] = decode(v)
92 92 return arg
93 93
94 94
95 95 def encode(arg):
96 96 if isinstance(arg, str):
97 97 return arg.encode(_encoding)
98 98 elif isinstance(arg, tuple):
99 99 return tuple(map(encode, arg))
100 100 elif isinstance(arg, list):
101 101 return map(encode, arg)
102 102 elif isinstance(arg, dict):
103 103 for k, v in arg.items():
104 104 arg[k] = encode(v)
105 105 return arg
106 106
107 107
108 108 def appendsep(s):
109 109 # ensure the path ends with os.sep, appending it if necessary.
110 110 try:
111 111 us = decode(s)
112 112 except UnicodeError:
113 113 us = s # TODO: how to handle this bytes case??
114 114 if us and us[-1] not in ':/\\':
115 115 s += pycompat.ossep
116 116 return s
117 117
118 118
119 119 def basewrapper(func, argtype, enc, dec, args, kwds):
120 120 # check check already converted, then call original
121 121 for arg in args:
122 122 if isinstance(arg, argtype):
123 123 return func(*args, **kwds)
124 124
125 125 try:
126 126 # convert string arguments, call func, then convert back the
127 127 # return value.
128 128 return enc(func(*dec(args), **dec(kwds)))
129 129 except UnicodeError:
130 130 raise error.Abort(
131 131 _(b"[win32mbcs] filename conversion failed with %s encoding\n")
132 % _encoding
132 % encoding.strtolocal(_encoding)
133 133 )
134 134
135 135
136 136 def wrapper(func, args, kwds):
137 137 return basewrapper(func, str, encode, decode, args, kwds)
138 138
139 139
140 140 def reversewrapper(func, args, kwds):
141 141 return basewrapper(func, str, decode, encode, args, kwds)
142 142
143 143
144 144 def wrapperforlistdir(func, args, kwds):
145 145 # Ensure 'path' argument ends with os.sep to avoids
146 146 # misinterpreting last 0x5c of MBCS 2nd byte as path separator.
147 147 if args:
148 148 args = list(args)
149 149 args[0] = appendsep(args[0])
150 150 if 'path' in kwds:
151 151 kwds['path'] = appendsep(kwds['path'])
152 152 return func(*args, **kwds)
153 153
154 154
155 155 def wrapname(name: str, wrapper):
156 156 module, name = name.rsplit('.', 1)
157 157 module = sys.modules[module]
158 158 func = getattr(module, name)
159 159
160 160 def f(*args, **kwds):
161 161 return wrapper(func, args, kwds)
162 162
163 163 f.__name__ = func.__name__
164 164 setattr(module, name, f)
165 165
166 166
167 167 # List of functions to be wrapped.
168 168 # NOTE: os.path.dirname() and os.path.basename() are safe because
169 169 # they use result of os.path.split()
170 170 funcs = '''os.path.join os.path.split os.path.splitext
171 171 os.path.normpath os.makedirs mercurial.util.endswithsep
172 172 mercurial.util.splitpath mercurial.util.fscasesensitive
173 173 mercurial.util.fspath mercurial.util.pconvert mercurial.util.normpath
174 174 mercurial.util.checkwinfilename mercurial.util.checkosfilename
175 175 mercurial.util.split'''
176 176
177 177 # These functions are required to be called with local encoded string
178 178 # because they expects argument is local encoded string and cause
179 179 # problem with unicode string.
180 180 rfuncs = '''mercurial.encoding.upper mercurial.encoding.lower
181 181 mercurial.util._filenamebytestr'''
182 182
183 183 # List of Windows specific functions to be wrapped.
184 184 winfuncs = '''os.path.splitunc'''
185 185
186 186 # codec and alias names of sjis and big5 to be faked.
187 187 problematic_encodings = b'''big5 big5-tw csbig5 big5hkscs big5-hkscs
188 188 hkscs cp932 932 ms932 mskanji ms-kanji shift_jis csshiftjis shiftjis
189 189 sjis s_jis shift_jis_2004 shiftjis2004 sjis_2004 sjis2004
190 190 shift_jisx0213 shiftjisx0213 sjisx0213 s_jisx0213 950 cp950 ms950 '''
191 191
192 192
193 193 def extsetup(ui):
194 194 # TODO: decide use of config section for this extension
195 195 if (not os.path.supports_unicode_filenames) and (
196 196 pycompat.sysplatform != b'cygwin'
197 197 ):
198 198 ui.warn(_(b"[win32mbcs] cannot activate on this platform.\n"))
199 199 return
200 200 # determine encoding for filename
201 201 global _encoding
202 _encoding = ui.config(b'win32mbcs', b'encoding')
202 _encoding = encoding.strfromlocal(ui.config(b'win32mbcs', b'encoding'))
203 203 # fake is only for relevant environment.
204 204 if _encoding.lower() in problematic_encodings.split():
205 205 for f in funcs.split():
206 206 wrapname(f, wrapper)
207 207 if pycompat.iswindows:
208 208 for f in winfuncs.split():
209 209 wrapname(f, wrapper)
210 210 wrapname("mercurial.util.listdir", wrapperforlistdir)
211 211 wrapname("mercurial.windows.listdir", wrapperforlistdir)
212 212 # wrap functions to be called with local byte string arguments
213 213 for f in rfuncs.split():
214 214 wrapname(f, reversewrapper)
215 215 # Check sys.args manually instead of using ui.debug() because
216 216 # command line options is not yet applied when
217 217 # extensions.loadall() is called.
218 218 if '--debug' in sys.argv:
219 219 ui.writenoi18n(
220 b"[win32mbcs] activated with encoding: %s\n" % _encoding
220 b"[win32mbcs] activated with encoding: %s\n"
221 % encoding.strtolocal(_encoding)
221 222 )
General Comments 0
You need to be logged in to leave comments. Login now