##// END OF EJS Templates
pycompat: add bytestr wrapper which mostly acts as a Python 2 str...
Yuya Nishihara -
r31439:b70407bd default
parent child Browse files
Show More
@@ -1,323 +1,385 b''
1 1 # pycompat.py - portability shim for python 3
2 2 #
3 3 # This software may be used and distributed according to the terms of the
4 4 # GNU General Public License version 2 or any later version.
5 5
6 6 """Mercurial portability shim for python 3.
7 7
8 8 This contains aliases to hide python version-specific details from the core.
9 9 """
10 10
11 11 from __future__ import absolute_import
12 12
13 13 import getopt
14 14 import os
15 15 import shlex
16 16 import sys
17 17
18 18 ispy3 = (sys.version_info[0] >= 3)
19 19
20 20 if not ispy3:
21 21 import cPickle as pickle
22 22 import httplib
23 23 import Queue as _queue
24 24 import SocketServer as socketserver
25 25 import urlparse
26 26 urlunquote = urlparse.unquote
27 27 import xmlrpclib
28 28 else:
29 29 import http.client as httplib
30 30 import pickle
31 31 import queue as _queue
32 32 import socketserver
33 33 import urllib.parse as urlparse
34 34 urlunquote = urlparse.unquote_to_bytes
35 35 import xmlrpc.client as xmlrpclib
36 36
37 37 if ispy3:
38 38 import builtins
39 39 import functools
40 40 import io
41 41 import struct
42 42
43 43 fsencode = os.fsencode
44 44 fsdecode = os.fsdecode
45 45 # A bytes version of os.name.
46 46 osname = os.name.encode('ascii')
47 47 ospathsep = os.pathsep.encode('ascii')
48 48 ossep = os.sep.encode('ascii')
49 49 osaltsep = os.altsep
50 50 if osaltsep:
51 51 osaltsep = osaltsep.encode('ascii')
52 52 # os.getcwd() on Python 3 returns string, but it has os.getcwdb() which
53 53 # returns bytes.
54 54 getcwd = os.getcwdb
55 55 sysplatform = sys.platform.encode('ascii')
56 56 sysexecutable = sys.executable
57 57 if sysexecutable:
58 58 sysexecutable = os.fsencode(sysexecutable)
59 59 stringio = io.BytesIO
60 60
61 61 # TODO: .buffer might not exist if std streams were replaced; we'll need
62 62 # a silly wrapper to make a bytes stream backed by a unicode one.
63 63 stdin = sys.stdin.buffer
64 64 stdout = sys.stdout.buffer
65 65 stderr = sys.stderr.buffer
66 66
67 67 # Since Python 3 converts argv to wchar_t type by Py_DecodeLocale() on Unix,
68 68 # we can use os.fsencode() to get back bytes argv.
69 69 #
70 70 # https://hg.python.org/cpython/file/v3.5.1/Programs/python.c#l55
71 71 #
72 72 # TODO: On Windows, the native argv is wchar_t, so we'll need a different
73 73 # workaround to simulate the Python 2 (i.e. ANSI Win32 API) behavior.
74 74 if getattr(sys, 'argv', None) is not None:
75 75 sysargv = list(map(os.fsencode, sys.argv))
76 76
77 77 bytechr = struct.Struct('>B').pack
78 78
79 class bytestr(bytes):
80 """A bytes which mostly acts as a Python 2 str
81
82 >>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1)
83 (b'', b'foo', b'ascii', b'1')
84 >>> s = bytestr(b'foo')
85 >>> assert s is bytestr(s)
86
87 There's no implicit conversion from non-ascii str as its encoding is
88 unknown:
89
90 >>> bytestr(chr(0x80)) # doctest: +ELLIPSIS
91 Traceback (most recent call last):
92 ...
93 UnicodeEncodeError: ...
94
95 Comparison between bytestr and bytes should work:
96
97 >>> assert bytestr(b'foo') == b'foo'
98 >>> assert b'foo' == bytestr(b'foo')
99 >>> assert b'f' in bytestr(b'foo')
100 >>> assert bytestr(b'f') in b'foo'
101
102 Sliced elements should be bytes, not integer:
103
104 >>> s[1], s[:2]
105 (b'o', b'fo')
106 >>> list(s), list(reversed(s))
107 ([b'f', b'o', b'o'], [b'o', b'o', b'f'])
108
109 As bytestr type isn't propagated across operations, you need to cast
110 bytes to bytestr explicitly:
111
112 >>> s = bytestr(b'foo').upper()
113 >>> t = bytestr(s)
114 >>> s[0], t[0]
115 (70, b'F')
116
117 Be careful to not pass a bytestr object to a function which expects
118 bytearray-like behavior.
119
120 >>> t = bytes(t) # cast to bytes
121 >>> assert type(t) is bytes
122 """
123
124 def __new__(cls, s=b''):
125 if isinstance(s, bytestr):
126 return s
127 if not isinstance(s, (bytes, bytearray)):
128 s = str(s).encode(u'ascii')
129 return bytes.__new__(cls, s)
130
131 def __getitem__(self, key):
132 s = bytes.__getitem__(self, key)
133 if not isinstance(s, bytes):
134 s = bytechr(s)
135 return s
136
137 def __iter__(self):
138 return iterbytestr(bytes.__iter__(self))
139
79 140 def iterbytestr(s):
80 141 """Iterate bytes as if it were a str object of Python 2"""
81 142 return map(bytechr, s)
82 143
83 144 def sysstr(s):
84 145 """Return a keyword str to be passed to Python functions such as
85 146 getattr() and str.encode()
86 147
87 148 This never raises UnicodeDecodeError. Non-ascii characters are
88 149 considered invalid and mapped to arbitrary but unique code points
89 150 such that 'sysstr(a) != sysstr(b)' for all 'a != b'.
90 151 """
91 152 if isinstance(s, builtins.str):
92 153 return s
93 154 return s.decode(u'latin-1')
94 155
95 156 def _wrapattrfunc(f):
96 157 @functools.wraps(f)
97 158 def w(object, name, *args):
98 159 return f(object, sysstr(name), *args)
99 160 return w
100 161
101 162 # these wrappers are automagically imported by hgloader
102 163 delattr = _wrapattrfunc(builtins.delattr)
103 164 getattr = _wrapattrfunc(builtins.getattr)
104 165 hasattr = _wrapattrfunc(builtins.hasattr)
105 166 setattr = _wrapattrfunc(builtins.setattr)
106 167 xrange = builtins.range
107 168
108 169 def open(name, mode='r', buffering=-1):
109 170 return builtins.open(name, sysstr(mode), buffering)
110 171
111 172 # getopt.getopt() on Python 3 deals with unicodes internally so we cannot
112 173 # pass bytes there. Passing unicodes will result in unicodes as return
113 174 # values which we need to convert again to bytes.
114 175 def getoptb(args, shortlist, namelist):
115 176 args = [a.decode('latin-1') for a in args]
116 177 shortlist = shortlist.decode('latin-1')
117 178 namelist = [a.decode('latin-1') for a in namelist]
118 179 opts, args = getopt.getopt(args, shortlist, namelist)
119 180 opts = [(a[0].encode('latin-1'), a[1].encode('latin-1'))
120 181 for a in opts]
121 182 args = [a.encode('latin-1') for a in args]
122 183 return opts, args
123 184
124 185 # keys of keyword arguments in Python need to be strings which are unicodes
125 186 # Python 3. This function takes keyword arguments, convert the keys to str.
126 187 def strkwargs(dic):
127 188 dic = dict((k.decode('latin-1'), v) for k, v in dic.iteritems())
128 189 return dic
129 190
130 191 # keys of keyword arguments need to be unicode while passing into
131 192 # a function. This function helps us to convert those keys back to bytes
132 193 # again as we need to deal with bytes.
133 194 def byteskwargs(dic):
134 195 dic = dict((k.encode('latin-1'), v) for k, v in dic.iteritems())
135 196 return dic
136 197
137 198 # shlex.split() accepts unicodes on Python 3. This function takes bytes
138 199 # argument, convert it into unicodes, pass into shlex.split(), convert the
139 200 # returned value to bytes and return that.
140 201 # TODO: handle shlex.shlex().
141 202 def shlexsplit(s):
142 203 ret = shlex.split(s.decode('latin-1'))
143 204 return [a.encode('latin-1') for a in ret]
144 205
145 206 else:
146 207 import cStringIO
147 208
148 209 bytechr = chr
210 bytestr = str
149 211 iterbytestr = iter
150 212
151 213 def sysstr(s):
152 214 return s
153 215
154 216 # Partial backport from os.py in Python 3, which only accepts bytes.
155 217 # In Python 2, our paths should only ever be bytes, a unicode path
156 218 # indicates a bug.
157 219 def fsencode(filename):
158 220 if isinstance(filename, str):
159 221 return filename
160 222 else:
161 223 raise TypeError(
162 224 "expect str, not %s" % type(filename).__name__)
163 225
164 226 # In Python 2, fsdecode() has a very chance to receive bytes. So it's
165 227 # better not to touch Python 2 part as it's already working fine.
166 228 def fsdecode(filename):
167 229 return filename
168 230
169 231 def getoptb(args, shortlist, namelist):
170 232 return getopt.getopt(args, shortlist, namelist)
171 233
172 234 def strkwargs(dic):
173 235 return dic
174 236
175 237 def byteskwargs(dic):
176 238 return dic
177 239
178 240 osname = os.name
179 241 ospathsep = os.pathsep
180 242 ossep = os.sep
181 243 osaltsep = os.altsep
182 244 stdin = sys.stdin
183 245 stdout = sys.stdout
184 246 stderr = sys.stderr
185 247 if getattr(sys, 'argv', None) is not None:
186 248 sysargv = sys.argv
187 249 sysplatform = sys.platform
188 250 getcwd = os.getcwd
189 251 sysexecutable = sys.executable
190 252 shlexsplit = shlex.split
191 253 stringio = cStringIO.StringIO
192 254
193 255 empty = _queue.Empty
194 256 queue = _queue.Queue
195 257
196 258 class _pycompatstub(object):
197 259 def __init__(self):
198 260 self._aliases = {}
199 261
200 262 def _registeraliases(self, origin, items):
201 263 """Add items that will be populated at the first access"""
202 264 items = map(sysstr, items)
203 265 self._aliases.update(
204 266 (item.replace(sysstr('_'), sysstr('')).lower(), (origin, item))
205 267 for item in items)
206 268
207 269 def __getattr__(self, name):
208 270 try:
209 271 origin, item = self._aliases[name]
210 272 except KeyError:
211 273 raise AttributeError(name)
212 274 self.__dict__[name] = obj = getattr(origin, item)
213 275 return obj
214 276
215 277 httpserver = _pycompatstub()
216 278 urlreq = _pycompatstub()
217 279 urlerr = _pycompatstub()
218 280 if not ispy3:
219 281 import BaseHTTPServer
220 282 import CGIHTTPServer
221 283 import SimpleHTTPServer
222 284 import urllib2
223 285 import urllib
224 286 urlreq._registeraliases(urllib, (
225 287 "addclosehook",
226 288 "addinfourl",
227 289 "ftpwrapper",
228 290 "pathname2url",
229 291 "quote",
230 292 "splitattr",
231 293 "splitpasswd",
232 294 "splitport",
233 295 "splituser",
234 296 "unquote",
235 297 "url2pathname",
236 298 "urlencode",
237 299 ))
238 300 urlreq._registeraliases(urllib2, (
239 301 "AbstractHTTPHandler",
240 302 "BaseHandler",
241 303 "build_opener",
242 304 "FileHandler",
243 305 "FTPHandler",
244 306 "HTTPBasicAuthHandler",
245 307 "HTTPDigestAuthHandler",
246 308 "HTTPHandler",
247 309 "HTTPPasswordMgrWithDefaultRealm",
248 310 "HTTPSHandler",
249 311 "install_opener",
250 312 "ProxyHandler",
251 313 "Request",
252 314 "urlopen",
253 315 ))
254 316 urlerr._registeraliases(urllib2, (
255 317 "HTTPError",
256 318 "URLError",
257 319 ))
258 320 httpserver._registeraliases(BaseHTTPServer, (
259 321 "HTTPServer",
260 322 "BaseHTTPRequestHandler",
261 323 ))
262 324 httpserver._registeraliases(SimpleHTTPServer, (
263 325 "SimpleHTTPRequestHandler",
264 326 ))
265 327 httpserver._registeraliases(CGIHTTPServer, (
266 328 "CGIHTTPRequestHandler",
267 329 ))
268 330
269 331 else:
270 332 import urllib.parse
271 333 urlreq._registeraliases(urllib.parse, (
272 334 "splitattr",
273 335 "splitpasswd",
274 336 "splitport",
275 337 "splituser",
276 338 "unquote",
277 339 ))
278 340 import urllib.request
279 341 urlreq._registeraliases(urllib.request, (
280 342 "AbstractHTTPHandler",
281 343 "BaseHandler",
282 344 "build_opener",
283 345 "FileHandler",
284 346 "FTPHandler",
285 347 "ftpwrapper",
286 348 "HTTPHandler",
287 349 "HTTPSHandler",
288 350 "install_opener",
289 351 "pathname2url",
290 352 "HTTPBasicAuthHandler",
291 353 "HTTPDigestAuthHandler",
292 354 "HTTPPasswordMgrWithDefaultRealm",
293 355 "ProxyHandler",
294 356 "Request",
295 357 "url2pathname",
296 358 "urlopen",
297 359 ))
298 360 import urllib.response
299 361 urlreq._registeraliases(urllib.response, (
300 362 "addclosehook",
301 363 "addinfourl",
302 364 ))
303 365 import urllib.error
304 366 urlerr._registeraliases(urllib.error, (
305 367 "HTTPError",
306 368 "URLError",
307 369 ))
308 370 import http.server
309 371 httpserver._registeraliases(http.server, (
310 372 "HTTPServer",
311 373 "BaseHTTPRequestHandler",
312 374 "SimpleHTTPRequestHandler",
313 375 "CGIHTTPRequestHandler",
314 376 ))
315 377
316 378 # urllib.parse.quote() accepts both str and bytes, decodes bytes
317 379 # (if necessary), and returns str. This is wonky. We provide a custom
318 380 # implementation that only accepts bytes and emits bytes.
319 381 def quote(s, safe=r'/'):
320 382 s = urllib.parse.quote_from_bytes(s, safe=safe)
321 383 return s.encode('ascii', 'strict')
322 384
323 385 urlreq.quote = quote
@@ -1,52 +1,53 b''
1 1 # this is hack to make sure no escape characters are inserted into the output
2 2
3 3 from __future__ import absolute_import
4 4
5 5 import doctest
6 6 import os
7 7 import sys
8 8
9 9 ispy3 = (sys.version_info[0] >= 3)
10 10
11 11 if 'TERM' in os.environ:
12 12 del os.environ['TERM']
13 13
14 14 # TODO: migrate doctests to py3 and enable them on both versions
15 15 def testmod(name, optionflags=0, testtarget=None, py2=True, py3=False):
16 16 if not (not ispy3 and py2 or ispy3 and py3):
17 17 return
18 18 __import__(name)
19 19 mod = sys.modules[name]
20 20 if testtarget is not None:
21 21 mod = getattr(mod, testtarget)
22 22 doctest.testmod(mod, optionflags=optionflags)
23 23
24 24 testmod('mercurial.changegroup')
25 25 testmod('mercurial.changelog')
26 26 testmod('mercurial.dagparser', optionflags=doctest.NORMALIZE_WHITESPACE)
27 27 testmod('mercurial.dispatch')
28 28 testmod('mercurial.encoding')
29 29 testmod('mercurial.formatter')
30 30 testmod('mercurial.hg')
31 31 testmod('mercurial.hgweb.hgwebdir_mod')
32 32 testmod('mercurial.match')
33 33 testmod('mercurial.minirst')
34 34 testmod('mercurial.patch')
35 35 testmod('mercurial.pathutil')
36 36 testmod('mercurial.parser')
37 testmod('mercurial.pycompat', py3=True)
37 38 testmod('mercurial.revsetlang')
38 39 testmod('mercurial.smartset')
39 40 testmod('mercurial.store')
40 41 testmod('mercurial.subrepo')
41 42 testmod('mercurial.templatefilters')
42 43 testmod('mercurial.templater')
43 44 testmod('mercurial.ui')
44 45 testmod('mercurial.url')
45 46 testmod('mercurial.util')
46 47 testmod('mercurial.util', testtarget='platform')
47 48 testmod('hgext.convert.convcmd')
48 49 testmod('hgext.convert.cvsps')
49 50 testmod('hgext.convert.filemap')
50 51 testmod('hgext.convert.p4')
51 52 testmod('hgext.convert.subversion')
52 53 testmod('hgext.mq')
General Comments 0
You need to be logged in to leave comments. Login now