##// END OF EJS Templates
configitems: register the 'win32mbcs.encoding' config
Boris Feld -
r34181:37513324 default
parent child Browse files
Show More
@@ -1,196 +1,206
1 1 # win32mbcs.py -- MBCS filename support for Mercurial
2 2 #
3 3 # Copyright (c) 2008 Shun-ichi Goto <shunichi.goto@gmail.com>
4 4 #
5 5 # Version: 0.3
6 6 # Author: Shun-ichi Goto <shunichi.goto@gmail.com>
7 7 #
8 8 # This software may be used and distributed according to the terms of the
9 9 # GNU General Public License version 2 or any later version.
10 10 #
11 11
12 12 '''allow the use of MBCS paths with problematic encodings
13 13
14 14 Some MBCS encodings are not good for some path operations (i.e.
15 15 splitting path, case conversion, etc.) with its encoded bytes. We call
16 16 such a encoding (i.e. shift_jis and big5) as "problematic encoding".
17 17 This extension can be used to fix the issue with those encodings by
18 18 wrapping some functions to convert to Unicode string before path
19 19 operation.
20 20
21 21 This extension is useful for:
22 22
23 23 - Japanese Windows users using shift_jis encoding.
24 24 - Chinese Windows users using big5 encoding.
25 25 - All users who use a repository with one of problematic encodings on
26 26 case-insensitive file system.
27 27
28 28 This extension is not needed for:
29 29
30 30 - Any user who use only ASCII chars in path.
31 31 - Any user who do not use any of problematic encodings.
32 32
33 33 Note that there are some limitations on using this extension:
34 34
35 35 - You should use single encoding in one repository.
36 36 - If the repository path ends with 0x5c, .hg/hgrc cannot be read.
37 37 - win32mbcs is not compatible with fixutf8 extension.
38 38
39 39 By default, win32mbcs uses encoding.encoding decided by Mercurial.
40 40 You can specify the encoding by config option::
41 41
42 42 [win32mbcs]
43 43 encoding = sjis
44 44
45 45 It is useful for the users who want to commit with UTF-8 log message.
46 46 '''
47 47 from __future__ import absolute_import
48 48
49 49 import os
50 50 import sys
51 51
52 52 from mercurial.i18n import _
53 53 from mercurial import (
54 54 encoding,
55 55 error,
56 56 pycompat,
57 registrar,
57 58 )
58 59
59 60 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
60 61 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
61 62 # be specifying the version(s) of Mercurial they are tested with, or
62 63 # leave the attribute unspecified.
63 64 testedwith = 'ships-with-hg-core'
64 65
66 configtable = {}
67 configitem = registrar.configitem(configtable)
68
69 # Encoding.encoding may be updated by --encoding option.
70 # Use a lambda do delay the resolution.
71 configitem('win32mbcs', 'encoding',
72 default=lambda: encoding.encoding,
73 )
74
65 75 _encoding = None # see extsetup
66 76
67 77 def decode(arg):
68 78 if isinstance(arg, str):
69 79 uarg = arg.decode(_encoding)
70 80 if arg == uarg.encode(_encoding):
71 81 return uarg
72 82 raise UnicodeError("Not local encoding")
73 83 elif isinstance(arg, tuple):
74 84 return tuple(map(decode, arg))
75 85 elif isinstance(arg, list):
76 86 return map(decode, arg)
77 87 elif isinstance(arg, dict):
78 88 for k, v in arg.items():
79 89 arg[k] = decode(v)
80 90 return arg
81 91
82 92 def encode(arg):
83 93 if isinstance(arg, unicode):
84 94 return arg.encode(_encoding)
85 95 elif isinstance(arg, tuple):
86 96 return tuple(map(encode, arg))
87 97 elif isinstance(arg, list):
88 98 return map(encode, arg)
89 99 elif isinstance(arg, dict):
90 100 for k, v in arg.items():
91 101 arg[k] = encode(v)
92 102 return arg
93 103
94 104 def appendsep(s):
95 105 # ensure the path ends with os.sep, appending it if necessary.
96 106 try:
97 107 us = decode(s)
98 108 except UnicodeError:
99 109 us = s
100 110 if us and us[-1] not in ':/\\':
101 111 s += pycompat.ossep
102 112 return s
103 113
104 114
105 115 def basewrapper(func, argtype, enc, dec, args, kwds):
106 116 # check check already converted, then call original
107 117 for arg in args:
108 118 if isinstance(arg, argtype):
109 119 return func(*args, **kwds)
110 120
111 121 try:
112 122 # convert string arguments, call func, then convert back the
113 123 # return value.
114 124 return enc(func(*dec(args), **dec(kwds)))
115 125 except UnicodeError:
116 126 raise error.Abort(_("[win32mbcs] filename conversion failed with"
117 127 " %s encoding\n") % (_encoding))
118 128
119 129 def wrapper(func, args, kwds):
120 130 return basewrapper(func, unicode, encode, decode, args, kwds)
121 131
122 132
123 133 def reversewrapper(func, args, kwds):
124 134 return basewrapper(func, str, decode, encode, args, kwds)
125 135
126 136 def wrapperforlistdir(func, args, kwds):
127 137 # Ensure 'path' argument ends with os.sep to avoids
128 138 # misinterpreting last 0x5c of MBCS 2nd byte as path separator.
129 139 if args:
130 140 args = list(args)
131 141 args[0] = appendsep(args[0])
132 142 if 'path' in kwds:
133 143 kwds['path'] = appendsep(kwds['path'])
134 144 return func(*args, **kwds)
135 145
136 146 def wrapname(name, wrapper):
137 147 module, name = name.rsplit('.', 1)
138 148 module = sys.modules[module]
139 149 func = getattr(module, name)
140 150 def f(*args, **kwds):
141 151 return wrapper(func, args, kwds)
142 152 f.__name__ = func.__name__
143 153 setattr(module, name, f)
144 154
145 155 # List of functions to be wrapped.
146 156 # NOTE: os.path.dirname() and os.path.basename() are safe because
147 157 # they use result of os.path.split()
148 158 funcs = '''os.path.join os.path.split os.path.splitext
149 159 os.path.normpath os.makedirs mercurial.util.endswithsep
150 160 mercurial.util.splitpath mercurial.util.fscasesensitive
151 161 mercurial.util.fspath mercurial.util.pconvert mercurial.util.normpath
152 162 mercurial.util.checkwinfilename mercurial.util.checkosfilename
153 163 mercurial.util.split'''
154 164
155 165 # These functions are required to be called with local encoded string
156 166 # because they expects argument is local encoded string and cause
157 167 # problem with unicode string.
158 168 rfuncs = '''mercurial.encoding.upper mercurial.encoding.lower
159 169 mercurial.util._filenamebytestr'''
160 170
161 171 # List of Windows specific functions to be wrapped.
162 172 winfuncs = '''os.path.splitunc'''
163 173
164 174 # codec and alias names of sjis and big5 to be faked.
165 175 problematic_encodings = '''big5 big5-tw csbig5 big5hkscs big5-hkscs
166 176 hkscs cp932 932 ms932 mskanji ms-kanji shift_jis csshiftjis shiftjis
167 177 sjis s_jis shift_jis_2004 shiftjis2004 sjis_2004 sjis2004
168 178 shift_jisx0213 shiftjisx0213 sjisx0213 s_jisx0213 950 cp950 ms950 '''
169 179
170 180 def extsetup(ui):
171 181 # TODO: decide use of config section for this extension
172 182 if ((not os.path.supports_unicode_filenames) and
173 183 (pycompat.sysplatform != 'cygwin')):
174 184 ui.warn(_("[win32mbcs] cannot activate on this platform.\n"))
175 185 return
176 186 # determine encoding for filename
177 187 global _encoding
178 _encoding = ui.config('win32mbcs', 'encoding', encoding.encoding)
188 _encoding = ui.config('win32mbcs', 'encoding')
179 189 # fake is only for relevant environment.
180 190 if _encoding.lower() in problematic_encodings.split():
181 191 for f in funcs.split():
182 192 wrapname(f, wrapper)
183 193 if pycompat.osname == 'nt':
184 194 for f in winfuncs.split():
185 195 wrapname(f, wrapper)
186 196 wrapname("mercurial.util.listdir", wrapperforlistdir)
187 197 wrapname("mercurial.windows.listdir", wrapperforlistdir)
188 198 # wrap functions to be called with local byte string arguments
189 199 for f in rfuncs.split():
190 200 wrapname(f, reversewrapper)
191 201 # Check sys.args manually instead of using ui.debug() because
192 202 # command line options is not yet applied when
193 203 # extensions.loadall() is called.
194 204 if '--debug' in sys.argv:
195 205 ui.write(("[win32mbcs] activated with encoding: %s\n")
196 206 % _encoding)
General Comments 0
You need to be logged in to leave comments. Login now