##// END OF EJS Templates
encoding: drop circular import by proxying through '<policy>.charencode'...
Yuya Nishihara -
r33756:f5fc54e7 default
parent child Browse files
Show More
@@ -0,0 +1,22 b''
1 # charencode.py - miscellaneous character encoding
2 #
3 # Copyright 2005-2009 Matt Mackall <mpm@selenic.com> and others
4 #
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
7
8 from __future__ import absolute_import
9
10 def asciilower(s):
11 '''convert a string to lowercase if ASCII
12
13 Raises UnicodeDecodeError if non-ASCII characters are found.'''
14 s.decode('ascii')
15 return s.lower()
16
17 def asciiupper(s):
18 '''convert a string to uppercase if ASCII
19
20 Raises UnicodeDecodeError if non-ASCII characters are found.'''
21 s.decode('ascii')
22 return s.upper()
@@ -1,602 +1,575 b''
1 # encoding.py - character transcoding support for Mercurial
1 # encoding.py - character transcoding support for Mercurial
2 #
2 #
3 # Copyright 2005-2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2005-2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import array
10 import array
11 import locale
11 import locale
12 import os
12 import os
13 import unicodedata
13 import unicodedata
14
14
15 from . import (
15 from . import (
16 error,
16 error,
17 policy,
17 policy,
18 pycompat,
18 pycompat,
19 )
19 )
20
20
21 charencode = policy.importmod(r'charencode')
22
23 asciilower = charencode.asciilower
24 asciiupper = charencode.asciiupper
25
21 _sysstr = pycompat.sysstr
26 _sysstr = pycompat.sysstr
22
27
23 if pycompat.ispy3:
28 if pycompat.ispy3:
24 unichr = chr
29 unichr = chr
25
30
26 # These unicode characters are ignored by HFS+ (Apple Technote 1150,
31 # These unicode characters are ignored by HFS+ (Apple Technote 1150,
27 # "Unicode Subtleties"), so we need to ignore them in some places for
32 # "Unicode Subtleties"), so we need to ignore them in some places for
28 # sanity.
33 # sanity.
29 _ignore = [unichr(int(x, 16)).encode("utf-8") for x in
34 _ignore = [unichr(int(x, 16)).encode("utf-8") for x in
30 "200c 200d 200e 200f 202a 202b 202c 202d 202e "
35 "200c 200d 200e 200f 202a 202b 202c 202d 202e "
31 "206a 206b 206c 206d 206e 206f feff".split()]
36 "206a 206b 206c 206d 206e 206f feff".split()]
32 # verify the next function will work
37 # verify the next function will work
33 assert all(i.startswith(("\xe2", "\xef")) for i in _ignore)
38 assert all(i.startswith(("\xe2", "\xef")) for i in _ignore)
34
39
35 def hfsignoreclean(s):
40 def hfsignoreclean(s):
36 """Remove codepoints ignored by HFS+ from s.
41 """Remove codepoints ignored by HFS+ from s.
37
42
38 >>> hfsignoreclean(u'.h\u200cg'.encode('utf-8'))
43 >>> hfsignoreclean(u'.h\u200cg'.encode('utf-8'))
39 '.hg'
44 '.hg'
40 >>> hfsignoreclean(u'.h\ufeffg'.encode('utf-8'))
45 >>> hfsignoreclean(u'.h\ufeffg'.encode('utf-8'))
41 '.hg'
46 '.hg'
42 """
47 """
43 if "\xe2" in s or "\xef" in s:
48 if "\xe2" in s or "\xef" in s:
44 for c in _ignore:
49 for c in _ignore:
45 s = s.replace(c, '')
50 s = s.replace(c, '')
46 return s
51 return s
47
52
48 # encoding.environ is provided read-only, which may not be used to modify
53 # encoding.environ is provided read-only, which may not be used to modify
49 # the process environment
54 # the process environment
50 _nativeenviron = (not pycompat.ispy3 or os.supports_bytes_environ)
55 _nativeenviron = (not pycompat.ispy3 or os.supports_bytes_environ)
51 if not pycompat.ispy3:
56 if not pycompat.ispy3:
52 environ = os.environ # re-exports
57 environ = os.environ # re-exports
53 elif _nativeenviron:
58 elif _nativeenviron:
54 environ = os.environb # re-exports
59 environ = os.environb # re-exports
55 else:
60 else:
56 # preferred encoding isn't known yet; use utf-8 to avoid unicode error
61 # preferred encoding isn't known yet; use utf-8 to avoid unicode error
57 # and recreate it once encoding is settled
62 # and recreate it once encoding is settled
58 environ = dict((k.encode(u'utf-8'), v.encode(u'utf-8'))
63 environ = dict((k.encode(u'utf-8'), v.encode(u'utf-8'))
59 for k, v in os.environ.items()) # re-exports
64 for k, v in os.environ.items()) # re-exports
60
65
61 _encodingfixers = {
66 _encodingfixers = {
62 '646': lambda: 'ascii',
67 '646': lambda: 'ascii',
63 'ANSI_X3.4-1968': lambda: 'ascii',
68 'ANSI_X3.4-1968': lambda: 'ascii',
64 }
69 }
65
70
66 try:
71 try:
67 encoding = environ.get("HGENCODING")
72 encoding = environ.get("HGENCODING")
68 if not encoding:
73 if not encoding:
69 encoding = locale.getpreferredencoding().encode('ascii') or 'ascii'
74 encoding = locale.getpreferredencoding().encode('ascii') or 'ascii'
70 encoding = _encodingfixers.get(encoding, lambda: encoding)()
75 encoding = _encodingfixers.get(encoding, lambda: encoding)()
71 except locale.Error:
76 except locale.Error:
72 encoding = 'ascii'
77 encoding = 'ascii'
73 encodingmode = environ.get("HGENCODINGMODE", "strict")
78 encodingmode = environ.get("HGENCODINGMODE", "strict")
74 fallbackencoding = 'ISO-8859-1'
79 fallbackencoding = 'ISO-8859-1'
75
80
76 class localstr(str):
81 class localstr(str):
77 '''This class allows strings that are unmodified to be
82 '''This class allows strings that are unmodified to be
78 round-tripped to the local encoding and back'''
83 round-tripped to the local encoding and back'''
79 def __new__(cls, u, l):
84 def __new__(cls, u, l):
80 s = str.__new__(cls, l)
85 s = str.__new__(cls, l)
81 s._utf8 = u
86 s._utf8 = u
82 return s
87 return s
83 def __hash__(self):
88 def __hash__(self):
84 return hash(self._utf8) # avoid collisions in local string space
89 return hash(self._utf8) # avoid collisions in local string space
85
90
86 def tolocal(s):
91 def tolocal(s):
87 """
92 """
88 Convert a string from internal UTF-8 to local encoding
93 Convert a string from internal UTF-8 to local encoding
89
94
90 All internal strings should be UTF-8 but some repos before the
95 All internal strings should be UTF-8 but some repos before the
91 implementation of locale support may contain latin1 or possibly
96 implementation of locale support may contain latin1 or possibly
92 other character sets. We attempt to decode everything strictly
97 other character sets. We attempt to decode everything strictly
93 using UTF-8, then Latin-1, and failing that, we use UTF-8 and
98 using UTF-8, then Latin-1, and failing that, we use UTF-8 and
94 replace unknown characters.
99 replace unknown characters.
95
100
96 The localstr class is used to cache the known UTF-8 encoding of
101 The localstr class is used to cache the known UTF-8 encoding of
97 strings next to their local representation to allow lossless
102 strings next to their local representation to allow lossless
98 round-trip conversion back to UTF-8.
103 round-trip conversion back to UTF-8.
99
104
100 >>> u = 'foo: \\xc3\\xa4' # utf-8
105 >>> u = 'foo: \\xc3\\xa4' # utf-8
101 >>> l = tolocal(u)
106 >>> l = tolocal(u)
102 >>> l
107 >>> l
103 'foo: ?'
108 'foo: ?'
104 >>> fromlocal(l)
109 >>> fromlocal(l)
105 'foo: \\xc3\\xa4'
110 'foo: \\xc3\\xa4'
106 >>> u2 = 'foo: \\xc3\\xa1'
111 >>> u2 = 'foo: \\xc3\\xa1'
107 >>> d = { l: 1, tolocal(u2): 2 }
112 >>> d = { l: 1, tolocal(u2): 2 }
108 >>> len(d) # no collision
113 >>> len(d) # no collision
109 2
114 2
110 >>> 'foo: ?' in d
115 >>> 'foo: ?' in d
111 False
116 False
112 >>> l1 = 'foo: \\xe4' # historical latin1 fallback
117 >>> l1 = 'foo: \\xe4' # historical latin1 fallback
113 >>> l = tolocal(l1)
118 >>> l = tolocal(l1)
114 >>> l
119 >>> l
115 'foo: ?'
120 'foo: ?'
116 >>> fromlocal(l) # magically in utf-8
121 >>> fromlocal(l) # magically in utf-8
117 'foo: \\xc3\\xa4'
122 'foo: \\xc3\\xa4'
118 """
123 """
119
124
120 try:
125 try:
121 try:
126 try:
122 # make sure string is actually stored in UTF-8
127 # make sure string is actually stored in UTF-8
123 u = s.decode('UTF-8')
128 u = s.decode('UTF-8')
124 if encoding == 'UTF-8':
129 if encoding == 'UTF-8':
125 # fast path
130 # fast path
126 return s
131 return s
127 r = u.encode(_sysstr(encoding), u"replace")
132 r = u.encode(_sysstr(encoding), u"replace")
128 if u == r.decode(_sysstr(encoding)):
133 if u == r.decode(_sysstr(encoding)):
129 # r is a safe, non-lossy encoding of s
134 # r is a safe, non-lossy encoding of s
130 return r
135 return r
131 return localstr(s, r)
136 return localstr(s, r)
132 except UnicodeDecodeError:
137 except UnicodeDecodeError:
133 # we should only get here if we're looking at an ancient changeset
138 # we should only get here if we're looking at an ancient changeset
134 try:
139 try:
135 u = s.decode(_sysstr(fallbackencoding))
140 u = s.decode(_sysstr(fallbackencoding))
136 r = u.encode(_sysstr(encoding), u"replace")
141 r = u.encode(_sysstr(encoding), u"replace")
137 if u == r.decode(_sysstr(encoding)):
142 if u == r.decode(_sysstr(encoding)):
138 # r is a safe, non-lossy encoding of s
143 # r is a safe, non-lossy encoding of s
139 return r
144 return r
140 return localstr(u.encode('UTF-8'), r)
145 return localstr(u.encode('UTF-8'), r)
141 except UnicodeDecodeError:
146 except UnicodeDecodeError:
142 u = s.decode("utf-8", "replace") # last ditch
147 u = s.decode("utf-8", "replace") # last ditch
143 # can't round-trip
148 # can't round-trip
144 return u.encode(_sysstr(encoding), u"replace")
149 return u.encode(_sysstr(encoding), u"replace")
145 except LookupError as k:
150 except LookupError as k:
146 raise error.Abort(k, hint="please check your locale settings")
151 raise error.Abort(k, hint="please check your locale settings")
147
152
148 def fromlocal(s):
153 def fromlocal(s):
149 """
154 """
150 Convert a string from the local character encoding to UTF-8
155 Convert a string from the local character encoding to UTF-8
151
156
152 We attempt to decode strings using the encoding mode set by
157 We attempt to decode strings using the encoding mode set by
153 HGENCODINGMODE, which defaults to 'strict'. In this mode, unknown
158 HGENCODINGMODE, which defaults to 'strict'. In this mode, unknown
154 characters will cause an error message. Other modes include
159 characters will cause an error message. Other modes include
155 'replace', which replaces unknown characters with a special
160 'replace', which replaces unknown characters with a special
156 Unicode character, and 'ignore', which drops the character.
161 Unicode character, and 'ignore', which drops the character.
157 """
162 """
158
163
159 # can we do a lossless round-trip?
164 # can we do a lossless round-trip?
160 if isinstance(s, localstr):
165 if isinstance(s, localstr):
161 return s._utf8
166 return s._utf8
162
167
163 try:
168 try:
164 u = s.decode(_sysstr(encoding), _sysstr(encodingmode))
169 u = s.decode(_sysstr(encoding), _sysstr(encodingmode))
165 return u.encode("utf-8")
170 return u.encode("utf-8")
166 except UnicodeDecodeError as inst:
171 except UnicodeDecodeError as inst:
167 sub = s[max(0, inst.start - 10):inst.start + 10]
172 sub = s[max(0, inst.start - 10):inst.start + 10]
168 raise error.Abort("decoding near '%s': %s!" % (sub, inst))
173 raise error.Abort("decoding near '%s': %s!" % (sub, inst))
169 except LookupError as k:
174 except LookupError as k:
170 raise error.Abort(k, hint="please check your locale settings")
175 raise error.Abort(k, hint="please check your locale settings")
171
176
172 def unitolocal(u):
177 def unitolocal(u):
173 """Convert a unicode string to a byte string of local encoding"""
178 """Convert a unicode string to a byte string of local encoding"""
174 return tolocal(u.encode('utf-8'))
179 return tolocal(u.encode('utf-8'))
175
180
176 def unifromlocal(s):
181 def unifromlocal(s):
177 """Convert a byte string of local encoding to a unicode string"""
182 """Convert a byte string of local encoding to a unicode string"""
178 return fromlocal(s).decode('utf-8')
183 return fromlocal(s).decode('utf-8')
179
184
180 def unimethod(bytesfunc):
185 def unimethod(bytesfunc):
181 """Create a proxy method that forwards __unicode__() and __str__() of
186 """Create a proxy method that forwards __unicode__() and __str__() of
182 Python 3 to __bytes__()"""
187 Python 3 to __bytes__()"""
183 def unifunc(obj):
188 def unifunc(obj):
184 return unifromlocal(bytesfunc(obj))
189 return unifromlocal(bytesfunc(obj))
185 return unifunc
190 return unifunc
186
191
187 # converter functions between native str and byte string. use these if the
192 # converter functions between native str and byte string. use these if the
188 # character encoding is not aware (e.g. exception message) or is known to
193 # character encoding is not aware (e.g. exception message) or is known to
189 # be locale dependent (e.g. date formatting.)
194 # be locale dependent (e.g. date formatting.)
190 if pycompat.ispy3:
195 if pycompat.ispy3:
191 strtolocal = unitolocal
196 strtolocal = unitolocal
192 strfromlocal = unifromlocal
197 strfromlocal = unifromlocal
193 strmethod = unimethod
198 strmethod = unimethod
194 else:
199 else:
195 strtolocal = pycompat.identity
200 strtolocal = pycompat.identity
196 strfromlocal = pycompat.identity
201 strfromlocal = pycompat.identity
197 strmethod = pycompat.identity
202 strmethod = pycompat.identity
198
203
199 if not _nativeenviron:
204 if not _nativeenviron:
200 # now encoding and helper functions are available, recreate the environ
205 # now encoding and helper functions are available, recreate the environ
201 # dict to be exported to other modules
206 # dict to be exported to other modules
202 environ = dict((tolocal(k.encode(u'utf-8')), tolocal(v.encode(u'utf-8')))
207 environ = dict((tolocal(k.encode(u'utf-8')), tolocal(v.encode(u'utf-8')))
203 for k, v in os.environ.items()) # re-exports
208 for k, v in os.environ.items()) # re-exports
204
209
205 # How to treat ambiguous-width characters. Set to 'wide' to treat as wide.
210 # How to treat ambiguous-width characters. Set to 'wide' to treat as wide.
206 _wide = _sysstr(environ.get("HGENCODINGAMBIGUOUS", "narrow") == "wide"
211 _wide = _sysstr(environ.get("HGENCODINGAMBIGUOUS", "narrow") == "wide"
207 and "WFA" or "WF")
212 and "WFA" or "WF")
208
213
209 def colwidth(s):
214 def colwidth(s):
210 "Find the column width of a string for display in the local encoding"
215 "Find the column width of a string for display in the local encoding"
211 return ucolwidth(s.decode(_sysstr(encoding), u'replace'))
216 return ucolwidth(s.decode(_sysstr(encoding), u'replace'))
212
217
213 def ucolwidth(d):
218 def ucolwidth(d):
214 "Find the column width of a Unicode string for display"
219 "Find the column width of a Unicode string for display"
215 eaw = getattr(unicodedata, 'east_asian_width', None)
220 eaw = getattr(unicodedata, 'east_asian_width', None)
216 if eaw is not None:
221 if eaw is not None:
217 return sum([eaw(c) in _wide and 2 or 1 for c in d])
222 return sum([eaw(c) in _wide and 2 or 1 for c in d])
218 return len(d)
223 return len(d)
219
224
220 def getcols(s, start, c):
225 def getcols(s, start, c):
221 '''Use colwidth to find a c-column substring of s starting at byte
226 '''Use colwidth to find a c-column substring of s starting at byte
222 index start'''
227 index start'''
223 for x in xrange(start + c, len(s)):
228 for x in xrange(start + c, len(s)):
224 t = s[start:x]
229 t = s[start:x]
225 if colwidth(t) == c:
230 if colwidth(t) == c:
226 return t
231 return t
227
232
228 def trim(s, width, ellipsis='', leftside=False):
233 def trim(s, width, ellipsis='', leftside=False):
229 """Trim string 's' to at most 'width' columns (including 'ellipsis').
234 """Trim string 's' to at most 'width' columns (including 'ellipsis').
230
235
231 If 'leftside' is True, left side of string 's' is trimmed.
236 If 'leftside' is True, left side of string 's' is trimmed.
232 'ellipsis' is always placed at trimmed side.
237 'ellipsis' is always placed at trimmed side.
233
238
234 >>> ellipsis = '+++'
239 >>> ellipsis = '+++'
235 >>> from . import encoding
240 >>> from . import encoding
236 >>> encoding.encoding = 'utf-8'
241 >>> encoding.encoding = 'utf-8'
237 >>> t= '1234567890'
242 >>> t= '1234567890'
238 >>> print trim(t, 12, ellipsis=ellipsis)
243 >>> print trim(t, 12, ellipsis=ellipsis)
239 1234567890
244 1234567890
240 >>> print trim(t, 10, ellipsis=ellipsis)
245 >>> print trim(t, 10, ellipsis=ellipsis)
241 1234567890
246 1234567890
242 >>> print trim(t, 8, ellipsis=ellipsis)
247 >>> print trim(t, 8, ellipsis=ellipsis)
243 12345+++
248 12345+++
244 >>> print trim(t, 8, ellipsis=ellipsis, leftside=True)
249 >>> print trim(t, 8, ellipsis=ellipsis, leftside=True)
245 +++67890
250 +++67890
246 >>> print trim(t, 8)
251 >>> print trim(t, 8)
247 12345678
252 12345678
248 >>> print trim(t, 8, leftside=True)
253 >>> print trim(t, 8, leftside=True)
249 34567890
254 34567890
250 >>> print trim(t, 3, ellipsis=ellipsis)
255 >>> print trim(t, 3, ellipsis=ellipsis)
251 +++
256 +++
252 >>> print trim(t, 1, ellipsis=ellipsis)
257 >>> print trim(t, 1, ellipsis=ellipsis)
253 +
258 +
254 >>> u = u'\u3042\u3044\u3046\u3048\u304a' # 2 x 5 = 10 columns
259 >>> u = u'\u3042\u3044\u3046\u3048\u304a' # 2 x 5 = 10 columns
255 >>> t = u.encode(encoding.encoding)
260 >>> t = u.encode(encoding.encoding)
256 >>> print trim(t, 12, ellipsis=ellipsis)
261 >>> print trim(t, 12, ellipsis=ellipsis)
257 \xe3\x81\x82\xe3\x81\x84\xe3\x81\x86\xe3\x81\x88\xe3\x81\x8a
262 \xe3\x81\x82\xe3\x81\x84\xe3\x81\x86\xe3\x81\x88\xe3\x81\x8a
258 >>> print trim(t, 10, ellipsis=ellipsis)
263 >>> print trim(t, 10, ellipsis=ellipsis)
259 \xe3\x81\x82\xe3\x81\x84\xe3\x81\x86\xe3\x81\x88\xe3\x81\x8a
264 \xe3\x81\x82\xe3\x81\x84\xe3\x81\x86\xe3\x81\x88\xe3\x81\x8a
260 >>> print trim(t, 8, ellipsis=ellipsis)
265 >>> print trim(t, 8, ellipsis=ellipsis)
261 \xe3\x81\x82\xe3\x81\x84+++
266 \xe3\x81\x82\xe3\x81\x84+++
262 >>> print trim(t, 8, ellipsis=ellipsis, leftside=True)
267 >>> print trim(t, 8, ellipsis=ellipsis, leftside=True)
263 +++\xe3\x81\x88\xe3\x81\x8a
268 +++\xe3\x81\x88\xe3\x81\x8a
264 >>> print trim(t, 5)
269 >>> print trim(t, 5)
265 \xe3\x81\x82\xe3\x81\x84
270 \xe3\x81\x82\xe3\x81\x84
266 >>> print trim(t, 5, leftside=True)
271 >>> print trim(t, 5, leftside=True)
267 \xe3\x81\x88\xe3\x81\x8a
272 \xe3\x81\x88\xe3\x81\x8a
268 >>> print trim(t, 4, ellipsis=ellipsis)
273 >>> print trim(t, 4, ellipsis=ellipsis)
269 +++
274 +++
270 >>> print trim(t, 4, ellipsis=ellipsis, leftside=True)
275 >>> print trim(t, 4, ellipsis=ellipsis, leftside=True)
271 +++
276 +++
272 >>> t = '\x11\x22\x33\x44\x55\x66\x77\x88\x99\xaa' # invalid byte sequence
277 >>> t = '\x11\x22\x33\x44\x55\x66\x77\x88\x99\xaa' # invalid byte sequence
273 >>> print trim(t, 12, ellipsis=ellipsis)
278 >>> print trim(t, 12, ellipsis=ellipsis)
274 \x11\x22\x33\x44\x55\x66\x77\x88\x99\xaa
279 \x11\x22\x33\x44\x55\x66\x77\x88\x99\xaa
275 >>> print trim(t, 10, ellipsis=ellipsis)
280 >>> print trim(t, 10, ellipsis=ellipsis)
276 \x11\x22\x33\x44\x55\x66\x77\x88\x99\xaa
281 \x11\x22\x33\x44\x55\x66\x77\x88\x99\xaa
277 >>> print trim(t, 8, ellipsis=ellipsis)
282 >>> print trim(t, 8, ellipsis=ellipsis)
278 \x11\x22\x33\x44\x55+++
283 \x11\x22\x33\x44\x55+++
279 >>> print trim(t, 8, ellipsis=ellipsis, leftside=True)
284 >>> print trim(t, 8, ellipsis=ellipsis, leftside=True)
280 +++\x66\x77\x88\x99\xaa
285 +++\x66\x77\x88\x99\xaa
281 >>> print trim(t, 8)
286 >>> print trim(t, 8)
282 \x11\x22\x33\x44\x55\x66\x77\x88
287 \x11\x22\x33\x44\x55\x66\x77\x88
283 >>> print trim(t, 8, leftside=True)
288 >>> print trim(t, 8, leftside=True)
284 \x33\x44\x55\x66\x77\x88\x99\xaa
289 \x33\x44\x55\x66\x77\x88\x99\xaa
285 >>> print trim(t, 3, ellipsis=ellipsis)
290 >>> print trim(t, 3, ellipsis=ellipsis)
286 +++
291 +++
287 >>> print trim(t, 1, ellipsis=ellipsis)
292 >>> print trim(t, 1, ellipsis=ellipsis)
288 +
293 +
289 """
294 """
290 try:
295 try:
291 u = s.decode(_sysstr(encoding))
296 u = s.decode(_sysstr(encoding))
292 except UnicodeDecodeError:
297 except UnicodeDecodeError:
293 if len(s) <= width: # trimming is not needed
298 if len(s) <= width: # trimming is not needed
294 return s
299 return s
295 width -= len(ellipsis)
300 width -= len(ellipsis)
296 if width <= 0: # no enough room even for ellipsis
301 if width <= 0: # no enough room even for ellipsis
297 return ellipsis[:width + len(ellipsis)]
302 return ellipsis[:width + len(ellipsis)]
298 if leftside:
303 if leftside:
299 return ellipsis + s[-width:]
304 return ellipsis + s[-width:]
300 return s[:width] + ellipsis
305 return s[:width] + ellipsis
301
306
302 if ucolwidth(u) <= width: # trimming is not needed
307 if ucolwidth(u) <= width: # trimming is not needed
303 return s
308 return s
304
309
305 width -= len(ellipsis)
310 width -= len(ellipsis)
306 if width <= 0: # no enough room even for ellipsis
311 if width <= 0: # no enough room even for ellipsis
307 return ellipsis[:width + len(ellipsis)]
312 return ellipsis[:width + len(ellipsis)]
308
313
309 if leftside:
314 if leftside:
310 uslice = lambda i: u[i:]
315 uslice = lambda i: u[i:]
311 concat = lambda s: ellipsis + s
316 concat = lambda s: ellipsis + s
312 else:
317 else:
313 uslice = lambda i: u[:-i]
318 uslice = lambda i: u[:-i]
314 concat = lambda s: s + ellipsis
319 concat = lambda s: s + ellipsis
315 for i in xrange(1, len(u)):
320 for i in xrange(1, len(u)):
316 usub = uslice(i)
321 usub = uslice(i)
317 if ucolwidth(usub) <= width:
322 if ucolwidth(usub) <= width:
318 return concat(usub.encode(_sysstr(encoding)))
323 return concat(usub.encode(_sysstr(encoding)))
319 return ellipsis # no enough room for multi-column characters
324 return ellipsis # no enough room for multi-column characters
320
325
321 def _asciilower(s):
322 '''convert a string to lowercase if ASCII
323
324 Raises UnicodeDecodeError if non-ASCII characters are found.'''
325 s.decode('ascii')
326 return s.lower()
327
328 def asciilower(s):
329 # delay importing avoids cyclic dependency around "parsers" in
330 # pure Python build (util => i18n => encoding => parsers => util)
331 parsers = policy.importmod(r'parsers')
332 impl = getattr(parsers, 'asciilower', _asciilower)
333 global asciilower
334 asciilower = impl
335 return impl(s)
336
337 def _asciiupper(s):
338 '''convert a string to uppercase if ASCII
339
340 Raises UnicodeDecodeError if non-ASCII characters are found.'''
341 s.decode('ascii')
342 return s.upper()
343
344 def asciiupper(s):
345 # delay importing avoids cyclic dependency around "parsers" in
346 # pure Python build (util => i18n => encoding => parsers => util)
347 parsers = policy.importmod(r'parsers')
348 impl = getattr(parsers, 'asciiupper', _asciiupper)
349 global asciiupper
350 asciiupper = impl
351 return impl(s)
352
353 def lower(s):
326 def lower(s):
354 "best-effort encoding-aware case-folding of local string s"
327 "best-effort encoding-aware case-folding of local string s"
355 try:
328 try:
356 return asciilower(s)
329 return asciilower(s)
357 except UnicodeDecodeError:
330 except UnicodeDecodeError:
358 pass
331 pass
359 try:
332 try:
360 if isinstance(s, localstr):
333 if isinstance(s, localstr):
361 u = s._utf8.decode("utf-8")
334 u = s._utf8.decode("utf-8")
362 else:
335 else:
363 u = s.decode(_sysstr(encoding), _sysstr(encodingmode))
336 u = s.decode(_sysstr(encoding), _sysstr(encodingmode))
364
337
365 lu = u.lower()
338 lu = u.lower()
366 if u == lu:
339 if u == lu:
367 return s # preserve localstring
340 return s # preserve localstring
368 return lu.encode(_sysstr(encoding))
341 return lu.encode(_sysstr(encoding))
369 except UnicodeError:
342 except UnicodeError:
370 return s.lower() # we don't know how to fold this except in ASCII
343 return s.lower() # we don't know how to fold this except in ASCII
371 except LookupError as k:
344 except LookupError as k:
372 raise error.Abort(k, hint="please check your locale settings")
345 raise error.Abort(k, hint="please check your locale settings")
373
346
374 def upper(s):
347 def upper(s):
375 "best-effort encoding-aware case-folding of local string s"
348 "best-effort encoding-aware case-folding of local string s"
376 try:
349 try:
377 return asciiupper(s)
350 return asciiupper(s)
378 except UnicodeDecodeError:
351 except UnicodeDecodeError:
379 return upperfallback(s)
352 return upperfallback(s)
380
353
381 def upperfallback(s):
354 def upperfallback(s):
382 try:
355 try:
383 if isinstance(s, localstr):
356 if isinstance(s, localstr):
384 u = s._utf8.decode("utf-8")
357 u = s._utf8.decode("utf-8")
385 else:
358 else:
386 u = s.decode(_sysstr(encoding), _sysstr(encodingmode))
359 u = s.decode(_sysstr(encoding), _sysstr(encodingmode))
387
360
388 uu = u.upper()
361 uu = u.upper()
389 if u == uu:
362 if u == uu:
390 return s # preserve localstring
363 return s # preserve localstring
391 return uu.encode(_sysstr(encoding))
364 return uu.encode(_sysstr(encoding))
392 except UnicodeError:
365 except UnicodeError:
393 return s.upper() # we don't know how to fold this except in ASCII
366 return s.upper() # we don't know how to fold this except in ASCII
394 except LookupError as k:
367 except LookupError as k:
395 raise error.Abort(k, hint="please check your locale settings")
368 raise error.Abort(k, hint="please check your locale settings")
396
369
397 class normcasespecs(object):
370 class normcasespecs(object):
398 '''what a platform's normcase does to ASCII strings
371 '''what a platform's normcase does to ASCII strings
399
372
400 This is specified per platform, and should be consistent with what normcase
373 This is specified per platform, and should be consistent with what normcase
401 on that platform actually does.
374 on that platform actually does.
402
375
403 lower: normcase lowercases ASCII strings
376 lower: normcase lowercases ASCII strings
404 upper: normcase uppercases ASCII strings
377 upper: normcase uppercases ASCII strings
405 other: the fallback function should always be called
378 other: the fallback function should always be called
406
379
407 This should be kept in sync with normcase_spec in util.h.'''
380 This should be kept in sync with normcase_spec in util.h.'''
408 lower = -1
381 lower = -1
409 upper = 1
382 upper = 1
410 other = 0
383 other = 0
411
384
412 _jsonmap = []
385 _jsonmap = []
413 _jsonmap.extend("\\u%04x" % x for x in range(32))
386 _jsonmap.extend("\\u%04x" % x for x in range(32))
414 _jsonmap.extend(pycompat.bytechr(x) for x in range(32, 127))
387 _jsonmap.extend(pycompat.bytechr(x) for x in range(32, 127))
415 _jsonmap.append('\\u007f')
388 _jsonmap.append('\\u007f')
416 _jsonmap[0x09] = '\\t'
389 _jsonmap[0x09] = '\\t'
417 _jsonmap[0x0a] = '\\n'
390 _jsonmap[0x0a] = '\\n'
418 _jsonmap[0x22] = '\\"'
391 _jsonmap[0x22] = '\\"'
419 _jsonmap[0x5c] = '\\\\'
392 _jsonmap[0x5c] = '\\\\'
420 _jsonmap[0x08] = '\\b'
393 _jsonmap[0x08] = '\\b'
421 _jsonmap[0x0c] = '\\f'
394 _jsonmap[0x0c] = '\\f'
422 _jsonmap[0x0d] = '\\r'
395 _jsonmap[0x0d] = '\\r'
423 _paranoidjsonmap = _jsonmap[:]
396 _paranoidjsonmap = _jsonmap[:]
424 _paranoidjsonmap[0x3c] = '\\u003c' # '<' (e.g. escape "</script>")
397 _paranoidjsonmap[0x3c] = '\\u003c' # '<' (e.g. escape "</script>")
425 _paranoidjsonmap[0x3e] = '\\u003e' # '>'
398 _paranoidjsonmap[0x3e] = '\\u003e' # '>'
426 _jsonmap.extend(pycompat.bytechr(x) for x in range(128, 256))
399 _jsonmap.extend(pycompat.bytechr(x) for x in range(128, 256))
427
400
428 def jsonescape(s, paranoid=False):
401 def jsonescape(s, paranoid=False):
429 '''returns a string suitable for JSON
402 '''returns a string suitable for JSON
430
403
431 JSON is problematic for us because it doesn't support non-Unicode
404 JSON is problematic for us because it doesn't support non-Unicode
432 bytes. To deal with this, we take the following approach:
405 bytes. To deal with this, we take the following approach:
433
406
434 - localstr objects are converted back to UTF-8
407 - localstr objects are converted back to UTF-8
435 - valid UTF-8/ASCII strings are passed as-is
408 - valid UTF-8/ASCII strings are passed as-is
436 - other strings are converted to UTF-8b surrogate encoding
409 - other strings are converted to UTF-8b surrogate encoding
437 - apply JSON-specified string escaping
410 - apply JSON-specified string escaping
438
411
439 (escapes are doubled in these tests)
412 (escapes are doubled in these tests)
440
413
441 >>> jsonescape('this is a test')
414 >>> jsonescape('this is a test')
442 'this is a test'
415 'this is a test'
443 >>> jsonescape('escape characters: \\0 \\x0b \\x7f')
416 >>> jsonescape('escape characters: \\0 \\x0b \\x7f')
444 'escape characters: \\\\u0000 \\\\u000b \\\\u007f'
417 'escape characters: \\\\u0000 \\\\u000b \\\\u007f'
445 >>> jsonescape('escape characters: \\t \\n \\r \\" \\\\')
418 >>> jsonescape('escape characters: \\t \\n \\r \\" \\\\')
446 'escape characters: \\\\t \\\\n \\\\r \\\\" \\\\\\\\'
419 'escape characters: \\\\t \\\\n \\\\r \\\\" \\\\\\\\'
447 >>> jsonescape('a weird byte: \\xdd')
420 >>> jsonescape('a weird byte: \\xdd')
448 'a weird byte: \\xed\\xb3\\x9d'
421 'a weird byte: \\xed\\xb3\\x9d'
449 >>> jsonescape('utf-8: caf\\xc3\\xa9')
422 >>> jsonescape('utf-8: caf\\xc3\\xa9')
450 'utf-8: caf\\xc3\\xa9'
423 'utf-8: caf\\xc3\\xa9'
451 >>> jsonescape('')
424 >>> jsonescape('')
452 ''
425 ''
453
426
454 If paranoid, non-ascii and common troublesome characters are also escaped.
427 If paranoid, non-ascii and common troublesome characters are also escaped.
455 This is suitable for web output.
428 This is suitable for web output.
456
429
457 >>> jsonescape('escape boundary: \\x7e \\x7f \\xc2\\x80', paranoid=True)
430 >>> jsonescape('escape boundary: \\x7e \\x7f \\xc2\\x80', paranoid=True)
458 'escape boundary: ~ \\\\u007f \\\\u0080'
431 'escape boundary: ~ \\\\u007f \\\\u0080'
459 >>> jsonescape('a weird byte: \\xdd', paranoid=True)
432 >>> jsonescape('a weird byte: \\xdd', paranoid=True)
460 'a weird byte: \\\\udcdd'
433 'a weird byte: \\\\udcdd'
461 >>> jsonescape('utf-8: caf\\xc3\\xa9', paranoid=True)
434 >>> jsonescape('utf-8: caf\\xc3\\xa9', paranoid=True)
462 'utf-8: caf\\\\u00e9'
435 'utf-8: caf\\\\u00e9'
463 >>> jsonescape('non-BMP: \\xf0\\x9d\\x84\\x9e', paranoid=True)
436 >>> jsonescape('non-BMP: \\xf0\\x9d\\x84\\x9e', paranoid=True)
464 'non-BMP: \\\\ud834\\\\udd1e'
437 'non-BMP: \\\\ud834\\\\udd1e'
465 >>> jsonescape('<foo@example.org>', paranoid=True)
438 >>> jsonescape('<foo@example.org>', paranoid=True)
466 '\\\\u003cfoo@example.org\\\\u003e'
439 '\\\\u003cfoo@example.org\\\\u003e'
467 '''
440 '''
468
441
469 if paranoid:
442 if paranoid:
470 jm = _paranoidjsonmap
443 jm = _paranoidjsonmap
471 else:
444 else:
472 jm = _jsonmap
445 jm = _jsonmap
473
446
474 u8chars = toutf8b(s)
447 u8chars = toutf8b(s)
475 try:
448 try:
476 return ''.join(jm[x] for x in bytearray(u8chars)) # fast path
449 return ''.join(jm[x] for x in bytearray(u8chars)) # fast path
477 except IndexError:
450 except IndexError:
478 pass
451 pass
479 # non-BMP char is represented as UTF-16 surrogate pair
452 # non-BMP char is represented as UTF-16 surrogate pair
480 u16codes = array.array('H', u8chars.decode('utf-8').encode('utf-16'))
453 u16codes = array.array('H', u8chars.decode('utf-8').encode('utf-16'))
481 u16codes.pop(0) # drop BOM
454 u16codes.pop(0) # drop BOM
482 return ''.join(jm[x] if x < 128 else '\\u%04x' % x for x in u16codes)
455 return ''.join(jm[x] if x < 128 else '\\u%04x' % x for x in u16codes)
483
456
484 _utf8len = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 4]
457 _utf8len = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 4]
485
458
486 def getutf8char(s, pos):
459 def getutf8char(s, pos):
487 '''get the next full utf-8 character in the given string, starting at pos
460 '''get the next full utf-8 character in the given string, starting at pos
488
461
489 Raises a UnicodeError if the given location does not start a valid
462 Raises a UnicodeError if the given location does not start a valid
490 utf-8 character.
463 utf-8 character.
491 '''
464 '''
492
465
493 # find how many bytes to attempt decoding from first nibble
466 # find how many bytes to attempt decoding from first nibble
494 l = _utf8len[ord(s[pos]) >> 4]
467 l = _utf8len[ord(s[pos]) >> 4]
495 if not l: # ascii
468 if not l: # ascii
496 return s[pos]
469 return s[pos]
497
470
498 c = s[pos:pos + l]
471 c = s[pos:pos + l]
499 # validate with attempted decode
472 # validate with attempted decode
500 c.decode("utf-8")
473 c.decode("utf-8")
501 return c
474 return c
502
475
503 def toutf8b(s):
476 def toutf8b(s):
504 '''convert a local, possibly-binary string into UTF-8b
477 '''convert a local, possibly-binary string into UTF-8b
505
478
506 This is intended as a generic method to preserve data when working
479 This is intended as a generic method to preserve data when working
507 with schemes like JSON and XML that have no provision for
480 with schemes like JSON and XML that have no provision for
508 arbitrary byte strings. As Mercurial often doesn't know
481 arbitrary byte strings. As Mercurial often doesn't know
509 what encoding data is in, we use so-called UTF-8b.
482 what encoding data is in, we use so-called UTF-8b.
510
483
511 If a string is already valid UTF-8 (or ASCII), it passes unmodified.
484 If a string is already valid UTF-8 (or ASCII), it passes unmodified.
512 Otherwise, unsupported bytes are mapped to UTF-16 surrogate range,
485 Otherwise, unsupported bytes are mapped to UTF-16 surrogate range,
513 uDC00-uDCFF.
486 uDC00-uDCFF.
514
487
515 Principles of operation:
488 Principles of operation:
516
489
517 - ASCII and UTF-8 data successfully round-trips and is understood
490 - ASCII and UTF-8 data successfully round-trips and is understood
518 by Unicode-oriented clients
491 by Unicode-oriented clients
519 - filenames and file contents in arbitrary other encodings can have
492 - filenames and file contents in arbitrary other encodings can have
520 be round-tripped or recovered by clueful clients
493 be round-tripped or recovered by clueful clients
521 - local strings that have a cached known UTF-8 encoding (aka
494 - local strings that have a cached known UTF-8 encoding (aka
522 localstr) get sent as UTF-8 so Unicode-oriented clients get the
495 localstr) get sent as UTF-8 so Unicode-oriented clients get the
523 Unicode data they want
496 Unicode data they want
524 - because we must preserve UTF-8 bytestring in places such as
497 - because we must preserve UTF-8 bytestring in places such as
525 filenames, metadata can't be roundtripped without help
498 filenames, metadata can't be roundtripped without help
526
499
527 (Note: "UTF-8b" often refers to decoding a mix of valid UTF-8 and
500 (Note: "UTF-8b" often refers to decoding a mix of valid UTF-8 and
528 arbitrary bytes into an internal Unicode format that can be
501 arbitrary bytes into an internal Unicode format that can be
529 re-encoded back into the original. Here we are exposing the
502 re-encoded back into the original. Here we are exposing the
530 internal surrogate encoding as a UTF-8 string.)
503 internal surrogate encoding as a UTF-8 string.)
531 '''
504 '''
532
505
533 if "\xed" not in s:
506 if "\xed" not in s:
534 if isinstance(s, localstr):
507 if isinstance(s, localstr):
535 return s._utf8
508 return s._utf8
536 try:
509 try:
537 s.decode('utf-8')
510 s.decode('utf-8')
538 return s
511 return s
539 except UnicodeDecodeError:
512 except UnicodeDecodeError:
540 pass
513 pass
541
514
542 r = ""
515 r = ""
543 pos = 0
516 pos = 0
544 l = len(s)
517 l = len(s)
545 while pos < l:
518 while pos < l:
546 try:
519 try:
547 c = getutf8char(s, pos)
520 c = getutf8char(s, pos)
548 if "\xed\xb0\x80" <= c <= "\xed\xb3\xbf":
521 if "\xed\xb0\x80" <= c <= "\xed\xb3\xbf":
549 # have to re-escape existing U+DCxx characters
522 # have to re-escape existing U+DCxx characters
550 c = unichr(0xdc00 + ord(s[pos])).encode('utf-8')
523 c = unichr(0xdc00 + ord(s[pos])).encode('utf-8')
551 pos += 1
524 pos += 1
552 else:
525 else:
553 pos += len(c)
526 pos += len(c)
554 except UnicodeDecodeError:
527 except UnicodeDecodeError:
555 c = unichr(0xdc00 + ord(s[pos])).encode('utf-8')
528 c = unichr(0xdc00 + ord(s[pos])).encode('utf-8')
556 pos += 1
529 pos += 1
557 r += c
530 r += c
558 return r
531 return r
559
532
560 def fromutf8b(s):
533 def fromutf8b(s):
561 '''Given a UTF-8b string, return a local, possibly-binary string.
534 '''Given a UTF-8b string, return a local, possibly-binary string.
562
535
563 return the original binary string. This
536 return the original binary string. This
564 is a round-trip process for strings like filenames, but metadata
537 is a round-trip process for strings like filenames, but metadata
565 that's was passed through tolocal will remain in UTF-8.
538 that's was passed through tolocal will remain in UTF-8.
566
539
567 >>> roundtrip = lambda x: fromutf8b(toutf8b(x)) == x
540 >>> roundtrip = lambda x: fromutf8b(toutf8b(x)) == x
568 >>> m = "\\xc3\\xa9\\x99abcd"
541 >>> m = "\\xc3\\xa9\\x99abcd"
569 >>> toutf8b(m)
542 >>> toutf8b(m)
570 '\\xc3\\xa9\\xed\\xb2\\x99abcd'
543 '\\xc3\\xa9\\xed\\xb2\\x99abcd'
571 >>> roundtrip(m)
544 >>> roundtrip(m)
572 True
545 True
573 >>> roundtrip("\\xc2\\xc2\\x80")
546 >>> roundtrip("\\xc2\\xc2\\x80")
574 True
547 True
575 >>> roundtrip("\\xef\\xbf\\xbd")
548 >>> roundtrip("\\xef\\xbf\\xbd")
576 True
549 True
577 >>> roundtrip("\\xef\\xef\\xbf\\xbd")
550 >>> roundtrip("\\xef\\xef\\xbf\\xbd")
578 True
551 True
579 >>> roundtrip("\\xf1\\x80\\x80\\x80\\x80")
552 >>> roundtrip("\\xf1\\x80\\x80\\x80\\x80")
580 True
553 True
581 '''
554 '''
582
555
583 # fast path - look for uDxxx prefixes in s
556 # fast path - look for uDxxx prefixes in s
584 if "\xed" not in s:
557 if "\xed" not in s:
585 return s
558 return s
586
559
587 # We could do this with the unicode type but some Python builds
560 # We could do this with the unicode type but some Python builds
588 # use UTF-16 internally (issue5031) which causes non-BMP code
561 # use UTF-16 internally (issue5031) which causes non-BMP code
589 # points to be escaped. Instead, we use our handy getutf8char
562 # points to be escaped. Instead, we use our handy getutf8char
590 # helper again to walk the string without "decoding" it.
563 # helper again to walk the string without "decoding" it.
591
564
592 r = ""
565 r = ""
593 pos = 0
566 pos = 0
594 l = len(s)
567 l = len(s)
595 while pos < l:
568 while pos < l:
596 c = getutf8char(s, pos)
569 c = getutf8char(s, pos)
597 pos += len(c)
570 pos += len(c)
598 # unescape U+DCxx characters
571 # unescape U+DCxx characters
599 if "\xed\xb0\x80" <= c <= "\xed\xb3\xbf":
572 if "\xed\xb0\x80" <= c <= "\xed\xb3\xbf":
600 c = chr(ord(c.decode("utf-8")) & 0xff)
573 c = chr(ord(c.decode("utf-8")) & 0xff)
601 r += c
574 r += c
602 return r
575 return r
@@ -1,114 +1,116 b''
1 # policy.py - module policy logic for Mercurial.
1 # policy.py - module policy logic for Mercurial.
2 #
2 #
3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import os
10 import os
11 import sys
11 import sys
12
12
13 # Rules for how modules can be loaded. Values are:
13 # Rules for how modules can be loaded. Values are:
14 #
14 #
15 # c - require C extensions
15 # c - require C extensions
16 # allow - allow pure Python implementation when C loading fails
16 # allow - allow pure Python implementation when C loading fails
17 # cffi - required cffi versions (implemented within pure module)
17 # cffi - required cffi versions (implemented within pure module)
18 # cffi-allow - allow pure Python implementation if cffi version is missing
18 # cffi-allow - allow pure Python implementation if cffi version is missing
19 # py - only load pure Python modules
19 # py - only load pure Python modules
20 #
20 #
21 # By default, fall back to the pure modules so the in-place build can
21 # By default, fall back to the pure modules so the in-place build can
22 # run without recompiling the C extensions. This will be overridden by
22 # run without recompiling the C extensions. This will be overridden by
23 # __modulepolicy__ generated by setup.py.
23 # __modulepolicy__ generated by setup.py.
24 policy = b'allow'
24 policy = b'allow'
25 _packageprefs = {
25 _packageprefs = {
26 # policy: (versioned package, pure package)
26 # policy: (versioned package, pure package)
27 b'c': (r'cext', None),
27 b'c': (r'cext', None),
28 b'allow': (r'cext', r'pure'),
28 b'allow': (r'cext', r'pure'),
29 b'cffi': (r'cffi', None),
29 b'cffi': (r'cffi', None),
30 b'cffi-allow': (r'cffi', r'pure'),
30 b'cffi-allow': (r'cffi', r'pure'),
31 b'py': (None, r'pure'),
31 b'py': (None, r'pure'),
32 }
32 }
33
33
34 try:
34 try:
35 from . import __modulepolicy__
35 from . import __modulepolicy__
36 policy = __modulepolicy__.modulepolicy
36 policy = __modulepolicy__.modulepolicy
37 except ImportError:
37 except ImportError:
38 pass
38 pass
39
39
40 # PyPy doesn't load C extensions.
40 # PyPy doesn't load C extensions.
41 #
41 #
42 # The canonical way to do this is to test platform.python_implementation().
42 # The canonical way to do this is to test platform.python_implementation().
43 # But we don't import platform and don't bloat for it here.
43 # But we don't import platform and don't bloat for it here.
44 if r'__pypy__' in sys.builtin_module_names:
44 if r'__pypy__' in sys.builtin_module_names:
45 policy = b'cffi'
45 policy = b'cffi'
46
46
47 # Our C extensions aren't yet compatible with Python 3. So use pure Python
47 # Our C extensions aren't yet compatible with Python 3. So use pure Python
48 # on Python 3 for now.
48 # on Python 3 for now.
49 if sys.version_info[0] >= 3:
49 if sys.version_info[0] >= 3:
50 policy = b'py'
50 policy = b'py'
51
51
52 # Environment variable can always force settings.
52 # Environment variable can always force settings.
53 if sys.version_info[0] >= 3:
53 if sys.version_info[0] >= 3:
54 if r'HGMODULEPOLICY' in os.environ:
54 if r'HGMODULEPOLICY' in os.environ:
55 policy = os.environ[r'HGMODULEPOLICY'].encode(r'utf-8')
55 policy = os.environ[r'HGMODULEPOLICY'].encode(r'utf-8')
56 else:
56 else:
57 policy = os.environ.get(r'HGMODULEPOLICY', policy)
57 policy = os.environ.get(r'HGMODULEPOLICY', policy)
58
58
59 def _importfrom(pkgname, modname):
59 def _importfrom(pkgname, modname):
60 # from .<pkgname> import <modname> (where . is looked through this module)
60 # from .<pkgname> import <modname> (where . is looked through this module)
61 fakelocals = {}
61 fakelocals = {}
62 pkg = __import__(pkgname, globals(), fakelocals, [modname], level=1)
62 pkg = __import__(pkgname, globals(), fakelocals, [modname], level=1)
63 try:
63 try:
64 fakelocals[modname] = mod = getattr(pkg, modname)
64 fakelocals[modname] = mod = getattr(pkg, modname)
65 except AttributeError:
65 except AttributeError:
66 raise ImportError(r'cannot import name %s' % modname)
66 raise ImportError(r'cannot import name %s' % modname)
67 # force import; fakelocals[modname] may be replaced with the real module
67 # force import; fakelocals[modname] may be replaced with the real module
68 getattr(mod, r'__doc__', None)
68 getattr(mod, r'__doc__', None)
69 return fakelocals[modname]
69 return fakelocals[modname]
70
70
71 # keep in sync with "version" in C modules
71 # keep in sync with "version" in C modules
72 _cextversions = {
72 _cextversions = {
73 (r'cext', r'base85'): 1,
73 (r'cext', r'base85'): 1,
74 (r'cext', r'bdiff'): 1,
74 (r'cext', r'bdiff'): 1,
75 (r'cext', r'diffhelpers'): 1,
75 (r'cext', r'diffhelpers'): 1,
76 (r'cext', r'mpatch'): 1,
76 (r'cext', r'mpatch'): 1,
77 (r'cext', r'osutil'): 1,
77 (r'cext', r'osutil'): 1,
78 (r'cext', r'parsers'): 1,
78 (r'cext', r'parsers'): 1,
79 }
79 }
80
80
81 # map import request to other package or module
81 # map import request to other package or module
82 _modredirects = {
82 _modredirects = {
83 (r'cext', r'charencode'): (r'cext', r'parsers'),
83 (r'cffi', r'base85'): (r'pure', r'base85'),
84 (r'cffi', r'base85'): (r'pure', r'base85'),
85 (r'cffi', r'charencode'): (r'pure', r'charencode'),
84 (r'cffi', r'diffhelpers'): (r'pure', r'diffhelpers'),
86 (r'cffi', r'diffhelpers'): (r'pure', r'diffhelpers'),
85 (r'cffi', r'parsers'): (r'pure', r'parsers'),
87 (r'cffi', r'parsers'): (r'pure', r'parsers'),
86 }
88 }
87
89
88 def _checkmod(pkgname, modname, mod):
90 def _checkmod(pkgname, modname, mod):
89 expected = _cextversions.get((pkgname, modname))
91 expected = _cextversions.get((pkgname, modname))
90 actual = getattr(mod, r'version', None)
92 actual = getattr(mod, r'version', None)
91 if actual != expected:
93 if actual != expected:
92 raise ImportError(r'cannot import module %s.%s '
94 raise ImportError(r'cannot import module %s.%s '
93 r'(expected version: %d, actual: %r)'
95 r'(expected version: %d, actual: %r)'
94 % (pkgname, modname, expected, actual))
96 % (pkgname, modname, expected, actual))
95
97
96 def importmod(modname):
98 def importmod(modname):
97 """Import module according to policy and check API version"""
99 """Import module according to policy and check API version"""
98 try:
100 try:
99 verpkg, purepkg = _packageprefs[policy]
101 verpkg, purepkg = _packageprefs[policy]
100 except KeyError:
102 except KeyError:
101 raise ImportError(r'invalid HGMODULEPOLICY %r' % policy)
103 raise ImportError(r'invalid HGMODULEPOLICY %r' % policy)
102 assert verpkg or purepkg
104 assert verpkg or purepkg
103 if verpkg:
105 if verpkg:
104 pn, mn = _modredirects.get((verpkg, modname), (verpkg, modname))
106 pn, mn = _modredirects.get((verpkg, modname), (verpkg, modname))
105 try:
107 try:
106 mod = _importfrom(pn, mn)
108 mod = _importfrom(pn, mn)
107 if pn == verpkg:
109 if pn == verpkg:
108 _checkmod(pn, mn, mod)
110 _checkmod(pn, mn, mod)
109 return mod
111 return mod
110 except ImportError:
112 except ImportError:
111 if not purepkg:
113 if not purepkg:
112 raise
114 raise
113 pn, mn = _modredirects.get((purepkg, modname), (purepkg, modname))
115 pn, mn = _modredirects.get((purepkg, modname), (purepkg, modname))
114 return _importfrom(pn, mn)
116 return _importfrom(pn, mn)
General Comments 0
You need to be logged in to leave comments. Login now