##// END OF EJS Templates
pycompat: alias urlreq.unquote to unquote_to_bytes...
Gregory Szorc -
r31566:c6df6a23 default
parent child Browse files
Show More
@@ -1,387 +1,391 b''
1 1 # pycompat.py - portability shim for python 3
2 2 #
3 3 # This software may be used and distributed according to the terms of the
4 4 # GNU General Public License version 2 or any later version.
5 5
6 6 """Mercurial portability shim for python 3.
7 7
8 8 This contains aliases to hide python version-specific details from the core.
9 9 """
10 10
11 11 from __future__ import absolute_import
12 12
13 13 import getopt
14 14 import os
15 15 import shlex
16 16 import sys
17 17
18 18 ispy3 = (sys.version_info[0] >= 3)
19 19
20 20 if not ispy3:
21 21 import cPickle as pickle
22 22 import httplib
23 23 import Queue as _queue
24 24 import SocketServer as socketserver
25 25 import urlparse
26 26 urlunquote = urlparse.unquote
27 27 import xmlrpclib
28 28 else:
29 29 import http.client as httplib
30 30 import pickle
31 31 import queue as _queue
32 32 import socketserver
33 33 import urllib.parse as urlparse
34 34 urlunquote = urlparse.unquote_to_bytes
35 35 import xmlrpc.client as xmlrpclib
36 36
37 37 if ispy3:
38 38 import builtins
39 39 import functools
40 40 import io
41 41 import struct
42 42
43 43 fsencode = os.fsencode
44 44 fsdecode = os.fsdecode
45 45 # A bytes version of os.name.
46 46 osname = os.name.encode('ascii')
47 47 ospathsep = os.pathsep.encode('ascii')
48 48 ossep = os.sep.encode('ascii')
49 49 osaltsep = os.altsep
50 50 if osaltsep:
51 51 osaltsep = osaltsep.encode('ascii')
52 52 # os.getcwd() on Python 3 returns string, but it has os.getcwdb() which
53 53 # returns bytes.
54 54 getcwd = os.getcwdb
55 55 sysplatform = sys.platform.encode('ascii')
56 56 sysexecutable = sys.executable
57 57 if sysexecutable:
58 58 sysexecutable = os.fsencode(sysexecutable)
59 59 stringio = io.BytesIO
60 60 maplist = lambda *args: list(map(*args))
61 61
62 62 # TODO: .buffer might not exist if std streams were replaced; we'll need
63 63 # a silly wrapper to make a bytes stream backed by a unicode one.
64 64 stdin = sys.stdin.buffer
65 65 stdout = sys.stdout.buffer
66 66 stderr = sys.stderr.buffer
67 67
68 68 # Since Python 3 converts argv to wchar_t type by Py_DecodeLocale() on Unix,
69 69 # we can use os.fsencode() to get back bytes argv.
70 70 #
71 71 # https://hg.python.org/cpython/file/v3.5.1/Programs/python.c#l55
72 72 #
73 73 # TODO: On Windows, the native argv is wchar_t, so we'll need a different
74 74 # workaround to simulate the Python 2 (i.e. ANSI Win32 API) behavior.
75 75 if getattr(sys, 'argv', None) is not None:
76 76 sysargv = list(map(os.fsencode, sys.argv))
77 77
78 78 bytechr = struct.Struct('>B').pack
79 79
80 80 class bytestr(bytes):
81 81 """A bytes which mostly acts as a Python 2 str
82 82
83 83 >>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1)
84 84 (b'', b'foo', b'ascii', b'1')
85 85 >>> s = bytestr(b'foo')
86 86 >>> assert s is bytestr(s)
87 87
88 88 There's no implicit conversion from non-ascii str as its encoding is
89 89 unknown:
90 90
91 91 >>> bytestr(chr(0x80)) # doctest: +ELLIPSIS
92 92 Traceback (most recent call last):
93 93 ...
94 94 UnicodeEncodeError: ...
95 95
96 96 Comparison between bytestr and bytes should work:
97 97
98 98 >>> assert bytestr(b'foo') == b'foo'
99 99 >>> assert b'foo' == bytestr(b'foo')
100 100 >>> assert b'f' in bytestr(b'foo')
101 101 >>> assert bytestr(b'f') in b'foo'
102 102
103 103 Sliced elements should be bytes, not integer:
104 104
105 105 >>> s[1], s[:2]
106 106 (b'o', b'fo')
107 107 >>> list(s), list(reversed(s))
108 108 ([b'f', b'o', b'o'], [b'o', b'o', b'f'])
109 109
110 110 As bytestr type isn't propagated across operations, you need to cast
111 111 bytes to bytestr explicitly:
112 112
113 113 >>> s = bytestr(b'foo').upper()
114 114 >>> t = bytestr(s)
115 115 >>> s[0], t[0]
116 116 (70, b'F')
117 117
118 118 Be careful to not pass a bytestr object to a function which expects
119 119 bytearray-like behavior.
120 120
121 121 >>> t = bytes(t) # cast to bytes
122 122 >>> assert type(t) is bytes
123 123 """
124 124
125 125 def __new__(cls, s=b''):
126 126 if isinstance(s, bytestr):
127 127 return s
128 128 if not isinstance(s, (bytes, bytearray)):
129 129 s = str(s).encode(u'ascii')
130 130 return bytes.__new__(cls, s)
131 131
132 132 def __getitem__(self, key):
133 133 s = bytes.__getitem__(self, key)
134 134 if not isinstance(s, bytes):
135 135 s = bytechr(s)
136 136 return s
137 137
138 138 def __iter__(self):
139 139 return iterbytestr(bytes.__iter__(self))
140 140
141 141 def iterbytestr(s):
142 142 """Iterate bytes as if it were a str object of Python 2"""
143 143 return map(bytechr, s)
144 144
145 145 def sysstr(s):
146 146 """Return a keyword str to be passed to Python functions such as
147 147 getattr() and str.encode()
148 148
149 149 This never raises UnicodeDecodeError. Non-ascii characters are
150 150 considered invalid and mapped to arbitrary but unique code points
151 151 such that 'sysstr(a) != sysstr(b)' for all 'a != b'.
152 152 """
153 153 if isinstance(s, builtins.str):
154 154 return s
155 155 return s.decode(u'latin-1')
156 156
157 157 def _wrapattrfunc(f):
158 158 @functools.wraps(f)
159 159 def w(object, name, *args):
160 160 return f(object, sysstr(name), *args)
161 161 return w
162 162
163 163 # these wrappers are automagically imported by hgloader
164 164 delattr = _wrapattrfunc(builtins.delattr)
165 165 getattr = _wrapattrfunc(builtins.getattr)
166 166 hasattr = _wrapattrfunc(builtins.hasattr)
167 167 setattr = _wrapattrfunc(builtins.setattr)
168 168 xrange = builtins.range
169 169
170 170 def open(name, mode='r', buffering=-1):
171 171 return builtins.open(name, sysstr(mode), buffering)
172 172
173 173 # getopt.getopt() on Python 3 deals with unicodes internally so we cannot
174 174 # pass bytes there. Passing unicodes will result in unicodes as return
175 175 # values which we need to convert again to bytes.
176 176 def getoptb(args, shortlist, namelist):
177 177 args = [a.decode('latin-1') for a in args]
178 178 shortlist = shortlist.decode('latin-1')
179 179 namelist = [a.decode('latin-1') for a in namelist]
180 180 opts, args = getopt.getopt(args, shortlist, namelist)
181 181 opts = [(a[0].encode('latin-1'), a[1].encode('latin-1'))
182 182 for a in opts]
183 183 args = [a.encode('latin-1') for a in args]
184 184 return opts, args
185 185
186 186 # keys of keyword arguments in Python need to be strings which are unicodes
187 187 # Python 3. This function takes keyword arguments, convert the keys to str.
188 188 def strkwargs(dic):
189 189 dic = dict((k.decode('latin-1'), v) for k, v in dic.iteritems())
190 190 return dic
191 191
192 192 # keys of keyword arguments need to be unicode while passing into
193 193 # a function. This function helps us to convert those keys back to bytes
194 194 # again as we need to deal with bytes.
195 195 def byteskwargs(dic):
196 196 dic = dict((k.encode('latin-1'), v) for k, v in dic.iteritems())
197 197 return dic
198 198
199 199 # shlex.split() accepts unicodes on Python 3. This function takes bytes
200 200 # argument, convert it into unicodes, pass into shlex.split(), convert the
201 201 # returned value to bytes and return that.
202 202 # TODO: handle shlex.shlex().
203 203 def shlexsplit(s):
204 204 ret = shlex.split(s.decode('latin-1'))
205 205 return [a.encode('latin-1') for a in ret]
206 206
207 207 else:
208 208 import cStringIO
209 209
210 210 bytechr = chr
211 211 bytestr = str
212 212 iterbytestr = iter
213 213
214 214 def sysstr(s):
215 215 return s
216 216
217 217 # Partial backport from os.py in Python 3, which only accepts bytes.
218 218 # In Python 2, our paths should only ever be bytes, a unicode path
219 219 # indicates a bug.
220 220 def fsencode(filename):
221 221 if isinstance(filename, str):
222 222 return filename
223 223 else:
224 224 raise TypeError(
225 225 "expect str, not %s" % type(filename).__name__)
226 226
227 227 # In Python 2, fsdecode() has a very chance to receive bytes. So it's
228 228 # better not to touch Python 2 part as it's already working fine.
229 229 def fsdecode(filename):
230 230 return filename
231 231
232 232 def getoptb(args, shortlist, namelist):
233 233 return getopt.getopt(args, shortlist, namelist)
234 234
235 235 def strkwargs(dic):
236 236 return dic
237 237
238 238 def byteskwargs(dic):
239 239 return dic
240 240
241 241 osname = os.name
242 242 ospathsep = os.pathsep
243 243 ossep = os.sep
244 244 osaltsep = os.altsep
245 245 stdin = sys.stdin
246 246 stdout = sys.stdout
247 247 stderr = sys.stderr
248 248 if getattr(sys, 'argv', None) is not None:
249 249 sysargv = sys.argv
250 250 sysplatform = sys.platform
251 251 getcwd = os.getcwd
252 252 sysexecutable = sys.executable
253 253 shlexsplit = shlex.split
254 254 stringio = cStringIO.StringIO
255 255 maplist = map
256 256
257 257 empty = _queue.Empty
258 258 queue = _queue.Queue
259 259
260 260 class _pycompatstub(object):
261 261 def __init__(self):
262 262 self._aliases = {}
263 263
264 264 def _registeraliases(self, origin, items):
265 265 """Add items that will be populated at the first access"""
266 266 items = map(sysstr, items)
267 267 self._aliases.update(
268 268 (item.replace(sysstr('_'), sysstr('')).lower(), (origin, item))
269 269 for item in items)
270 270
271 def _registeralias(self, origin, attr, name):
272 """Alias ``origin``.``attr`` as ``name``"""
273 self._aliases[sysstr(name)] = (origin, sysstr(attr))
274
271 275 def __getattr__(self, name):
272 276 try:
273 277 origin, item = self._aliases[name]
274 278 except KeyError:
275 279 raise AttributeError(name)
276 280 self.__dict__[name] = obj = getattr(origin, item)
277 281 return obj
278 282
279 283 httpserver = _pycompatstub()
280 284 urlreq = _pycompatstub()
281 285 urlerr = _pycompatstub()
282 286 if not ispy3:
283 287 import BaseHTTPServer
284 288 import CGIHTTPServer
285 289 import SimpleHTTPServer
286 290 import urllib2
287 291 import urllib
288 292 urlreq._registeraliases(urllib, (
289 293 "addclosehook",
290 294 "addinfourl",
291 295 "ftpwrapper",
292 296 "pathname2url",
293 297 "quote",
294 298 "splitattr",
295 299 "splitpasswd",
296 300 "splitport",
297 301 "splituser",
298 302 "unquote",
299 303 "url2pathname",
300 304 "urlencode",
301 305 ))
302 306 urlreq._registeraliases(urllib2, (
303 307 "AbstractHTTPHandler",
304 308 "BaseHandler",
305 309 "build_opener",
306 310 "FileHandler",
307 311 "FTPHandler",
308 312 "HTTPBasicAuthHandler",
309 313 "HTTPDigestAuthHandler",
310 314 "HTTPHandler",
311 315 "HTTPPasswordMgrWithDefaultRealm",
312 316 "HTTPSHandler",
313 317 "install_opener",
314 318 "ProxyHandler",
315 319 "Request",
316 320 "urlopen",
317 321 ))
318 322 urlerr._registeraliases(urllib2, (
319 323 "HTTPError",
320 324 "URLError",
321 325 ))
322 326 httpserver._registeraliases(BaseHTTPServer, (
323 327 "HTTPServer",
324 328 "BaseHTTPRequestHandler",
325 329 ))
326 330 httpserver._registeraliases(SimpleHTTPServer, (
327 331 "SimpleHTTPRequestHandler",
328 332 ))
329 333 httpserver._registeraliases(CGIHTTPServer, (
330 334 "CGIHTTPRequestHandler",
331 335 ))
332 336
333 337 else:
334 338 import urllib.parse
335 339 urlreq._registeraliases(urllib.parse, (
336 340 "splitattr",
337 341 "splitpasswd",
338 342 "splitport",
339 343 "splituser",
340 "unquote",
341 344 ))
345 urlreq._registeralias(urllib.parse, "unquote_to_bytes", "unquote")
342 346 import urllib.request
343 347 urlreq._registeraliases(urllib.request, (
344 348 "AbstractHTTPHandler",
345 349 "BaseHandler",
346 350 "build_opener",
347 351 "FileHandler",
348 352 "FTPHandler",
349 353 "ftpwrapper",
350 354 "HTTPHandler",
351 355 "HTTPSHandler",
352 356 "install_opener",
353 357 "pathname2url",
354 358 "HTTPBasicAuthHandler",
355 359 "HTTPDigestAuthHandler",
356 360 "HTTPPasswordMgrWithDefaultRealm",
357 361 "ProxyHandler",
358 362 "Request",
359 363 "url2pathname",
360 364 "urlopen",
361 365 ))
362 366 import urllib.response
363 367 urlreq._registeraliases(urllib.response, (
364 368 "addclosehook",
365 369 "addinfourl",
366 370 ))
367 371 import urllib.error
368 372 urlerr._registeraliases(urllib.error, (
369 373 "HTTPError",
370 374 "URLError",
371 375 ))
372 376 import http.server
373 377 httpserver._registeraliases(http.server, (
374 378 "HTTPServer",
375 379 "BaseHTTPRequestHandler",
376 380 "SimpleHTTPRequestHandler",
377 381 "CGIHTTPRequestHandler",
378 382 ))
379 383
380 384 # urllib.parse.quote() accepts both str and bytes, decodes bytes
381 385 # (if necessary), and returns str. This is wonky. We provide a custom
382 386 # implementation that only accepts bytes and emits bytes.
383 387 def quote(s, safe=r'/'):
384 388 s = urllib.parse.quote_from_bytes(s, safe=safe)
385 389 return s.encode('ascii', 'strict')
386 390
387 391 urlreq.quote = quote
General Comments 0
You need to be logged in to leave comments. Login now