##// END OF EJS Templates
url: some bytes/str cleanup where we interface with stdlib funcs...
Augie Fackler -
r37753:126998dc default
parent child Browse files
Show More
@@ -1,596 +1,597
1 1 # url.py - HTTP handling for mercurial
2 2 #
3 3 # Copyright 2005, 2006, 2007, 2008 Matt Mackall <mpm@selenic.com>
4 4 # Copyright 2006, 2007 Alexis S. L. Carvalho <alexis@cecm.usp.br>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 from __future__ import absolute_import
11 11
12 12 import base64
13 13 import os
14 14 import socket
15 15 import sys
16 16
17 17 from .i18n import _
18 18 from . import (
19 19 encoding,
20 20 error,
21 21 httpconnection as httpconnectionmod,
22 22 keepalive,
23 23 pycompat,
24 24 sslutil,
25 25 urllibcompat,
26 26 util,
27 27 )
28 28 from .utils import (
29 29 stringutil,
30 30 )
31 31
32 32 httplib = util.httplib
33 33 stringio = util.stringio
34 34 urlerr = util.urlerr
35 35 urlreq = util.urlreq
36 36
37 37 def escape(s, quote=None):
38 38 '''Replace special characters "&", "<" and ">" to HTML-safe sequences.
39 39 If the optional flag quote is true, the quotation mark character (")
40 40 is also translated.
41 41
42 42 This is the same as cgi.escape in Python, but always operates on
43 43 bytes, whereas cgi.escape in Python 3 only works on unicodes.
44 44 '''
45 45 s = s.replace(b"&", b"&amp;")
46 46 s = s.replace(b"<", b"&lt;")
47 47 s = s.replace(b">", b"&gt;")
48 48 if quote:
49 49 s = s.replace(b'"', b"&quot;")
50 50 return s
51 51
52 52 class passwordmgr(object):
53 53 def __init__(self, ui, passwddb):
54 54 self.ui = ui
55 55 self.passwddb = passwddb
56 56
57 57 def add_password(self, realm, uri, user, passwd):
58 58 return self.passwddb.add_password(realm, uri, user, passwd)
59 59
60 60 def find_user_password(self, realm, authuri):
61 61 authinfo = self.passwddb.find_user_password(realm, authuri)
62 62 user, passwd = authinfo
63 63 if user and passwd:
64 64 self._writedebug(user, passwd)
65 65 return (user, passwd)
66 66
67 67 if not user or not passwd:
68 68 res = httpconnectionmod.readauthforuri(self.ui, authuri, user)
69 69 if res:
70 70 group, auth = res
71 71 user, passwd = auth.get('username'), auth.get('password')
72 72 self.ui.debug("using auth.%s.* for authentication\n" % group)
73 73 if not user or not passwd:
74 74 u = util.url(pycompat.bytesurl(authuri))
75 75 u.query = None
76 76 if not self.ui.interactive():
77 77 raise error.Abort(_('http authorization required for %s') %
78 78 util.hidepassword(bytes(u)))
79 79
80 80 self.ui.write(_("http authorization required for %s\n") %
81 81 util.hidepassword(bytes(u)))
82 82 self.ui.write(_("realm: %s\n") % pycompat.bytesurl(realm))
83 83 if user:
84 84 self.ui.write(_("user: %s\n") % user)
85 85 else:
86 86 user = self.ui.prompt(_("user:"), default=None)
87 87
88 88 if not passwd:
89 89 passwd = self.ui.getpass()
90 90
91 91 self.passwddb.add_password(realm, authuri, user, passwd)
92 92 self._writedebug(user, passwd)
93 93 return (user, passwd)
94 94
95 95 def _writedebug(self, user, passwd):
96 96 msg = _('http auth: user %s, password %s\n')
97 97 self.ui.debug(msg % (user, passwd and '*' * len(passwd) or 'not set'))
98 98
99 99 def find_stored_password(self, authuri):
100 100 return self.passwddb.find_user_password(None, authuri)
101 101
102 102 class proxyhandler(urlreq.proxyhandler):
103 103 def __init__(self, ui):
104 104 proxyurl = (ui.config("http_proxy", "host") or
105 105 encoding.environ.get('http_proxy'))
106 106 # XXX proxyauthinfo = None
107 107
108 108 if proxyurl:
109 109 # proxy can be proper url or host[:port]
110 110 if not (proxyurl.startswith('http:') or
111 111 proxyurl.startswith('https:')):
112 112 proxyurl = 'http://' + proxyurl + '/'
113 113 proxy = util.url(proxyurl)
114 114 if not proxy.user:
115 115 proxy.user = ui.config("http_proxy", "user")
116 116 proxy.passwd = ui.config("http_proxy", "passwd")
117 117
118 118 # see if we should use a proxy for this url
119 119 no_list = ["localhost", "127.0.0.1"]
120 120 no_list.extend([p.lower() for
121 121 p in ui.configlist("http_proxy", "no")])
122 122 no_list.extend([p.strip().lower() for
123 123 p in encoding.environ.get("no_proxy", '').split(',')
124 124 if p.strip()])
125 125 # "http_proxy.always" config is for running tests on localhost
126 126 if ui.configbool("http_proxy", "always"):
127 127 self.no_list = []
128 128 else:
129 129 self.no_list = no_list
130 130
131 131 proxyurl = bytes(proxy)
132 132 proxies = {'http': proxyurl, 'https': proxyurl}
133 133 ui.debug('proxying through %s\n' % util.hidepassword(proxyurl))
134 134 else:
135 135 proxies = {}
136 136
137 137 urlreq.proxyhandler.__init__(self, proxies)
138 138 self.ui = ui
139 139
140 140 def proxy_open(self, req, proxy, type_):
141 141 host = urllibcompat.gethost(req).split(':')[0]
142 142 for e in self.no_list:
143 143 if host == e:
144 144 return None
145 145 if e.startswith('*.') and host.endswith(e[2:]):
146 146 return None
147 147 if e.startswith('.') and host.endswith(e[1:]):
148 148 return None
149 149
150 150 return urlreq.proxyhandler.proxy_open(self, req, proxy, type_)
151 151
152 152 def _gen_sendfile(orgsend):
153 153 def _sendfile(self, data):
154 154 # send a file
155 155 if isinstance(data, httpconnectionmod.httpsendfile):
156 156 # if auth required, some data sent twice, so rewind here
157 157 data.seek(0)
158 158 for chunk in util.filechunkiter(data):
159 159 orgsend(self, chunk)
160 160 else:
161 161 orgsend(self, data)
162 162 return _sendfile
163 163
164 164 has_https = util.safehasattr(urlreq, 'httpshandler')
165 165
166 166 class httpconnection(keepalive.HTTPConnection):
167 167 # must be able to send big bundle as stream.
168 168 send = _gen_sendfile(keepalive.HTTPConnection.send)
169 169
170 170 def getresponse(self):
171 171 proxyres = getattr(self, 'proxyres', None)
172 172 if proxyres:
173 173 if proxyres.will_close:
174 174 self.close()
175 175 self.proxyres = None
176 176 return proxyres
177 177 return keepalive.HTTPConnection.getresponse(self)
178 178
179 179 # general transaction handler to support different ways to handle
180 180 # HTTPS proxying before and after Python 2.6.3.
181 181 def _generic_start_transaction(handler, h, req):
182 182 tunnel_host = getattr(req, '_tunnel_host', None)
183 183 if tunnel_host:
184 184 if tunnel_host[:7] not in ['http://', 'https:/']:
185 185 tunnel_host = 'https://' + tunnel_host
186 186 new_tunnel = True
187 187 else:
188 188 tunnel_host = urllibcompat.getselector(req)
189 189 new_tunnel = False
190 190
191 191 if new_tunnel or tunnel_host == urllibcompat.getfullurl(req): # has proxy
192 192 u = util.url(tunnel_host)
193 193 if new_tunnel or u.scheme == 'https': # only use CONNECT for HTTPS
194 194 h.realhostport = ':'.join([u.host, (u.port or '443')])
195 195 h.headers = req.headers.copy()
196 196 h.headers.update(handler.parent.addheaders)
197 197 return
198 198
199 199 h.realhostport = None
200 200 h.headers = None
201 201
202 202 def _generic_proxytunnel(self):
203 203 proxyheaders = dict(
204 204 [(x, self.headers[x]) for x in self.headers
205 205 if x.lower().startswith('proxy-')])
206 206 self.send('CONNECT %s HTTP/1.0\r\n' % self.realhostport)
207 207 for header in proxyheaders.iteritems():
208 208 self.send('%s: %s\r\n' % header)
209 209 self.send('\r\n')
210 210
211 211 # majority of the following code is duplicated from
212 212 # httplib.HTTPConnection as there are no adequate places to
213 213 # override functions to provide the needed functionality
214 214 res = self.response_class(self.sock,
215 215 strict=self.strict,
216 216 method=self._method)
217 217
218 218 while True:
219 219 version, status, reason = res._read_status()
220 220 if status != httplib.CONTINUE:
221 221 break
222 222 # skip lines that are all whitespace
223 223 list(iter(lambda: res.fp.readline().strip(), ''))
224 224 res.status = status
225 225 res.reason = reason.strip()
226 226
227 227 if res.status == 200:
228 228 # skip lines until we find a blank line
229 229 list(iter(res.fp.readline, '\r\n'))
230 230 return True
231 231
232 232 if version == 'HTTP/1.0':
233 233 res.version = 10
234 234 elif version.startswith('HTTP/1.'):
235 235 res.version = 11
236 236 elif version == 'HTTP/0.9':
237 237 res.version = 9
238 238 else:
239 239 raise httplib.UnknownProtocol(version)
240 240
241 241 if res.version == 9:
242 242 res.length = None
243 243 res.chunked = 0
244 244 res.will_close = 1
245 245 res.msg = httplib.HTTPMessage(stringio())
246 246 return False
247 247
248 248 res.msg = httplib.HTTPMessage(res.fp)
249 249 res.msg.fp = None
250 250
251 251 # are we using the chunked-style of transfer encoding?
252 252 trenc = res.msg.getheader('transfer-encoding')
253 253 if trenc and trenc.lower() == "chunked":
254 254 res.chunked = 1
255 255 res.chunk_left = None
256 256 else:
257 257 res.chunked = 0
258 258
259 259 # will the connection close at the end of the response?
260 260 res.will_close = res._check_close()
261 261
262 262 # do we have a Content-Length?
263 263 # NOTE: RFC 2616, section 4.4, #3 says we ignore this if
264 264 # transfer-encoding is "chunked"
265 265 length = res.msg.getheader('content-length')
266 266 if length and not res.chunked:
267 267 try:
268 268 res.length = int(length)
269 269 except ValueError:
270 270 res.length = None
271 271 else:
272 272 if res.length < 0: # ignore nonsensical negative lengths
273 273 res.length = None
274 274 else:
275 275 res.length = None
276 276
277 277 # does the body have a fixed length? (of zero)
278 278 if (status == httplib.NO_CONTENT or status == httplib.NOT_MODIFIED or
279 279 100 <= status < 200 or # 1xx codes
280 280 res._method == 'HEAD'):
281 281 res.length = 0
282 282
283 283 # if the connection remains open, and we aren't using chunked, and
284 284 # a content-length was not provided, then assume that the connection
285 285 # WILL close.
286 286 if (not res.will_close and
287 287 not res.chunked and
288 288 res.length is None):
289 289 res.will_close = 1
290 290
291 291 self.proxyres = res
292 292
293 293 return False
294 294
295 295 class httphandler(keepalive.HTTPHandler):
296 296 def http_open(self, req):
297 297 return self.do_open(httpconnection, req)
298 298
299 299 def _start_transaction(self, h, req):
300 300 _generic_start_transaction(self, h, req)
301 301 return keepalive.HTTPHandler._start_transaction(self, h, req)
302 302
303 303 class logginghttpconnection(keepalive.HTTPConnection):
304 304 def __init__(self, createconn, *args, **kwargs):
305 305 keepalive.HTTPConnection.__init__(self, *args, **kwargs)
306 306 self._create_connection = createconn
307 307
308 308 if sys.version_info < (2, 7, 7):
309 309 # copied from 2.7.14, since old implementations directly call
310 310 # socket.create_connection()
311 311 def connect(self):
312 312 self.sock = self._create_connection((self.host, self.port),
313 313 self.timeout,
314 314 self.source_address)
315 315 if self._tunnel_host:
316 316 self._tunnel()
317 317
318 318 class logginghttphandler(httphandler):
319 319 """HTTP handler that logs socket I/O."""
320 320 def __init__(self, logfh, name, observeropts):
321 321 super(logginghttphandler, self).__init__()
322 322
323 323 self._logfh = logfh
324 324 self._logname = name
325 325 self._observeropts = observeropts
326 326
327 327 # do_open() calls the passed class to instantiate an HTTPConnection. We
328 328 # pass in a callable method that creates a custom HTTPConnection instance
329 329 # whose callback to create the socket knows how to proxy the socket.
330 330 def http_open(self, req):
331 331 return self.do_open(self._makeconnection, req)
332 332
333 333 def _makeconnection(self, *args, **kwargs):
334 334 def createconnection(*args, **kwargs):
335 335 sock = socket.create_connection(*args, **kwargs)
336 336 return util.makeloggingsocket(self._logfh, sock, self._logname,
337 337 **self._observeropts)
338 338
339 339 return logginghttpconnection(createconnection, *args, **kwargs)
340 340
341 341 if has_https:
342 342 class httpsconnection(httplib.HTTPConnection):
343 343 response_class = keepalive.HTTPResponse
344 344 default_port = httplib.HTTPS_PORT
345 345 # must be able to send big bundle as stream.
346 346 send = _gen_sendfile(keepalive.safesend)
347 347 getresponse = keepalive.wrapgetresponse(httplib.HTTPConnection)
348 348
349 349 def __init__(self, host, port=None, key_file=None, cert_file=None,
350 350 *args, **kwargs):
351 351 httplib.HTTPConnection.__init__(self, host, port, *args, **kwargs)
352 352 self.key_file = key_file
353 353 self.cert_file = cert_file
354 354
355 355 def connect(self):
356 356 self.sock = socket.create_connection((self.host, self.port))
357 357
358 358 host = self.host
359 359 if self.realhostport: # use CONNECT proxy
360 360 _generic_proxytunnel(self)
361 361 host = self.realhostport.rsplit(':', 1)[0]
362 362 self.sock = sslutil.wrapsocket(
363 363 self.sock, self.key_file, self.cert_file, ui=self.ui,
364 364 serverhostname=host)
365 365 sslutil.validatesocket(self.sock)
366 366
367 367 class httpshandler(keepalive.KeepAliveHandler, urlreq.httpshandler):
368 368 def __init__(self, ui):
369 369 keepalive.KeepAliveHandler.__init__(self)
370 370 urlreq.httpshandler.__init__(self)
371 371 self.ui = ui
372 372 self.pwmgr = passwordmgr(self.ui,
373 373 self.ui.httppasswordmgrdb)
374 374
375 375 def _start_transaction(self, h, req):
376 376 _generic_start_transaction(self, h, req)
377 377 return keepalive.KeepAliveHandler._start_transaction(self, h, req)
378 378
379 379 def https_open(self, req):
380 380 # urllibcompat.getfullurl() does not contain credentials
381 381 # and we may need them to match the certificates.
382 382 url = urllibcompat.getfullurl(req)
383 383 user, password = self.pwmgr.find_stored_password(url)
384 384 res = httpconnectionmod.readauthforuri(self.ui, url, user)
385 385 if res:
386 386 group, auth = res
387 387 self.auth = auth
388 388 self.ui.debug("using auth.%s.* for authentication\n" % group)
389 389 else:
390 390 self.auth = None
391 391 return self.do_open(self._makeconnection, req)
392 392
393 393 def _makeconnection(self, host, port=None, *args, **kwargs):
394 394 keyfile = None
395 395 certfile = None
396 396
397 397 if len(args) >= 1: # key_file
398 398 keyfile = args[0]
399 399 if len(args) >= 2: # cert_file
400 400 certfile = args[1]
401 401 args = args[2:]
402 402
403 403 # if the user has specified different key/cert files in
404 404 # hgrc, we prefer these
405 405 if self.auth and 'key' in self.auth and 'cert' in self.auth:
406 406 keyfile = self.auth['key']
407 407 certfile = self.auth['cert']
408 408
409 409 conn = httpsconnection(host, port, keyfile, certfile, *args,
410 410 **kwargs)
411 411 conn.ui = self.ui
412 412 return conn
413 413
414 414 class httpdigestauthhandler(urlreq.httpdigestauthhandler):
415 415 def __init__(self, *args, **kwargs):
416 416 urlreq.httpdigestauthhandler.__init__(self, *args, **kwargs)
417 417 self.retried_req = None
418 418
419 419 def reset_retry_count(self):
420 420 # Python 2.6.5 will call this on 401 or 407 errors and thus loop
421 421 # forever. We disable reset_retry_count completely and reset in
422 422 # http_error_auth_reqed instead.
423 423 pass
424 424
425 425 def http_error_auth_reqed(self, auth_header, host, req, headers):
426 426 # Reset the retry counter once for each request.
427 427 if req is not self.retried_req:
428 428 self.retried_req = req
429 429 self.retried = 0
430 430 return urlreq.httpdigestauthhandler.http_error_auth_reqed(
431 431 self, auth_header, host, req, headers)
432 432
433 433 class httpbasicauthhandler(urlreq.httpbasicauthhandler):
434 434 def __init__(self, *args, **kwargs):
435 435 self.auth = None
436 436 urlreq.httpbasicauthhandler.__init__(self, *args, **kwargs)
437 437 self.retried_req = None
438 438
439 439 def http_request(self, request):
440 440 if self.auth:
441 441 request.add_unredirected_header(self.auth_header, self.auth)
442 442
443 443 return request
444 444
445 445 def https_request(self, request):
446 446 if self.auth:
447 447 request.add_unredirected_header(self.auth_header, self.auth)
448 448
449 449 return request
450 450
451 451 def reset_retry_count(self):
452 452 # Python 2.6.5 will call this on 401 or 407 errors and thus loop
453 453 # forever. We disable reset_retry_count completely and reset in
454 454 # http_error_auth_reqed instead.
455 455 pass
456 456
457 457 def http_error_auth_reqed(self, auth_header, host, req, headers):
458 458 # Reset the retry counter once for each request.
459 459 if req is not self.retried_req:
460 460 self.retried_req = req
461 461 self.retried = 0
462 462 return urlreq.httpbasicauthhandler.http_error_auth_reqed(
463 463 self, auth_header, host, req, headers)
464 464
465 465 def retry_http_basic_auth(self, host, req, realm):
466 466 user, pw = self.passwd.find_user_password(
467 467 realm, urllibcompat.getfullurl(req))
468 468 if pw is not None:
469 469 raw = "%s:%s" % (pycompat.bytesurl(user), pycompat.bytesurl(pw))
470 470 auth = r'Basic %s' % pycompat.strurl(base64.b64encode(raw).strip())
471 471 if req.get_header(self.auth_header, None) == auth:
472 472 return None
473 473 self.auth = auth
474 474 req.add_unredirected_header(self.auth_header, auth)
475 475 return self.parent.open(req)
476 476 else:
477 477 return None
478 478
479 479 class cookiehandler(urlreq.basehandler):
480 480 def __init__(self, ui):
481 481 self.cookiejar = None
482 482
483 483 cookiefile = ui.config('auth', 'cookiefile')
484 484 if not cookiefile:
485 485 return
486 486
487 487 cookiefile = util.expandpath(cookiefile)
488 488 try:
489 cookiejar = util.cookielib.MozillaCookieJar(cookiefile)
489 cookiejar = util.cookielib.MozillaCookieJar(
490 pycompat.fsdecode(cookiefile))
490 491 cookiejar.load()
491 492 self.cookiejar = cookiejar
492 493 except util.cookielib.LoadError as e:
493 494 ui.warn(_('(error loading cookie file %s: %s; continuing without '
494 495 'cookies)\n') % (cookiefile, stringutil.forcebytestr(e)))
495 496
496 497 def http_request(self, request):
497 498 if self.cookiejar:
498 499 self.cookiejar.add_cookie_header(request)
499 500
500 501 return request
501 502
502 503 def https_request(self, request):
503 504 if self.cookiejar:
504 505 self.cookiejar.add_cookie_header(request)
505 506
506 507 return request
507 508
508 509 handlerfuncs = []
509 510
510 511 def opener(ui, authinfo=None, useragent=None, loggingfh=None,
511 512 loggingname=b's', loggingopts=None, sendaccept=True):
512 513 '''
513 514 construct an opener suitable for urllib2
514 515 authinfo will be added to the password manager
515 516
516 517 The opener can be configured to log socket events if the various
517 518 ``logging*`` arguments are specified.
518 519
519 520 ``loggingfh`` denotes a file object to log events to.
520 521 ``loggingname`` denotes the name of the to print when logging.
521 522 ``loggingopts`` is a dict of keyword arguments to pass to the constructed
522 523 ``util.socketobserver`` instance.
523 524
524 525 ``sendaccept`` allows controlling whether the ``Accept`` request header
525 526 is sent. The header is sent by default.
526 527 '''
527 528 handlers = []
528 529
529 530 if loggingfh:
530 531 handlers.append(logginghttphandler(loggingfh, loggingname,
531 532 loggingopts or {}))
532 533 # We don't yet support HTTPS when logging I/O. If we attempt to open
533 534 # an HTTPS URL, we'll likely fail due to unknown protocol.
534 535
535 536 else:
536 537 handlers.append(httphandler())
537 538 if has_https:
538 539 handlers.append(httpshandler(ui))
539 540
540 541 handlers.append(proxyhandler(ui))
541 542
542 543 passmgr = passwordmgr(ui, ui.httppasswordmgrdb)
543 544 if authinfo is not None:
544 545 realm, uris, user, passwd = authinfo
545 546 saveduser, savedpass = passmgr.find_stored_password(uris[0])
546 547 if user != saveduser or passwd:
547 548 passmgr.add_password(realm, uris, user, passwd)
548 549 ui.debug('http auth: user %s, password %s\n' %
549 550 (user, passwd and '*' * len(passwd) or 'not set'))
550 551
551 552 handlers.extend((httpbasicauthhandler(passmgr),
552 553 httpdigestauthhandler(passmgr)))
553 554 handlers.extend([h(ui, passmgr) for h in handlerfuncs])
554 555 handlers.append(cookiehandler(ui))
555 556 opener = urlreq.buildopener(*handlers)
556 557
557 558 # The user agent should should *NOT* be used by servers for e.g.
558 559 # protocol detection or feature negotiation: there are other
559 560 # facilities for that.
560 561 #
561 562 # "mercurial/proto-1.0" was the original user agent string and
562 563 # exists for backwards compatibility reasons.
563 564 #
564 565 # The "(Mercurial %s)" string contains the distribution
565 566 # name and version. Other client implementations should choose their
566 567 # own distribution name. Since servers should not be using the user
567 568 # agent string for anything, clients should be able to define whatever
568 569 # user agent they deem appropriate.
569 570 #
570 571 # The custom user agent is for lfs, because unfortunately some servers
571 572 # do look at this value.
572 573 if not useragent:
573 574 agent = 'mercurial/proto-1.0 (Mercurial %s)' % util.version()
574 575 opener.addheaders = [(r'User-agent', pycompat.sysstr(agent))]
575 576 else:
576 577 opener.addheaders = [(r'User-agent', pycompat.sysstr(useragent))]
577 578
578 579 # This header should only be needed by wire protocol requests. But it has
579 580 # been sent on all requests since forever. We keep sending it for backwards
580 581 # compatibility reasons. Modern versions of the wire protocol use
581 582 # X-HgProto-<N> for advertising client support.
582 583 if sendaccept:
583 584 opener.addheaders.append((r'Accept', r'application/mercurial-0.1'))
584 585
585 586 return opener
586 587
587 588 def open(ui, url_, data=None):
588 589 u = util.url(url_)
589 590 if u.scheme:
590 591 u.scheme = u.scheme.lower()
591 592 url_, authinfo = u.authinfo()
592 593 else:
593 594 path = util.normpath(os.path.abspath(url_))
594 url_ = 'file://' + urlreq.pathname2url(path)
595 url_ = 'file://' + pycompat.bytesurl(urlreq.pathname2url(path))
595 596 authinfo = None
596 597 return opener(ui, authinfo).open(pycompat.strurl(url_), data)
General Comments 0
You need to be logged in to leave comments. Login now