##// END OF EJS Templates
httprepo: use separate handlers for HTTP and HTTPS...
Alexis S. L. Carvalho -
r5983:6f1fcbc5 default
parent child Browse files
Show More
@@ -1,459 +1,458
1 1 # httprepo.py - HTTP repository proxy classes for mercurial
2 2 #
3 3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 4 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
5 5 #
6 6 # This software may be used and distributed according to the terms
7 7 # of the GNU General Public License, incorporated herein by reference.
8 8
9 9 from node import *
10 10 from remoterepo import *
11 11 from i18n import _
12 12 import repo, os, urllib, urllib2, urlparse, zlib, util, httplib
13 13 import errno, keepalive, tempfile, socket, changegroup
14 14
15 15 class passwordmgr(urllib2.HTTPPasswordMgrWithDefaultRealm):
16 16 def __init__(self, ui):
17 17 urllib2.HTTPPasswordMgrWithDefaultRealm.__init__(self)
18 18 self.ui = ui
19 19
20 20 def find_user_password(self, realm, authuri):
21 21 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm.find_user_password(
22 22 self, realm, authuri)
23 23 user, passwd = authinfo
24 24 if user and passwd:
25 25 return (user, passwd)
26 26
27 27 if not self.ui.interactive:
28 28 raise util.Abort(_('http authorization required'))
29 29
30 30 self.ui.write(_("http authorization required\n"))
31 31 self.ui.status(_("realm: %s\n") % realm)
32 32 if user:
33 33 self.ui.status(_("user: %s\n") % user)
34 34 else:
35 35 user = self.ui.prompt(_("user:"), default=None)
36 36
37 37 if not passwd:
38 38 passwd = self.ui.getpass()
39 39
40 40 self.add_password(realm, authuri, user, passwd)
41 41 return (user, passwd)
42 42
43 43 def netlocsplit(netloc):
44 44 '''split [user[:passwd]@]host[:port] into 4-tuple.'''
45 45
46 46 a = netloc.find('@')
47 47 if a == -1:
48 48 user, passwd = None, None
49 49 else:
50 50 userpass, netloc = netloc[:a], netloc[a+1:]
51 51 c = userpass.find(':')
52 52 if c == -1:
53 53 user, passwd = urllib.unquote(userpass), None
54 54 else:
55 55 user = urllib.unquote(userpass[:c])
56 56 passwd = urllib.unquote(userpass[c+1:])
57 57 c = netloc.find(':')
58 58 if c == -1:
59 59 host, port = netloc, None
60 60 else:
61 61 host, port = netloc[:c], netloc[c+1:]
62 62 return host, port, user, passwd
63 63
64 64 def netlocunsplit(host, port, user=None, passwd=None):
65 65 '''turn host, port, user, passwd into [user[:passwd]@]host[:port].'''
66 66 if port:
67 67 hostport = host + ':' + port
68 68 else:
69 69 hostport = host
70 70 if user:
71 71 if passwd:
72 72 userpass = urllib.quote(user) + ':' + urllib.quote(passwd)
73 73 else:
74 74 userpass = urllib.quote(user)
75 75 return userpass + '@' + hostport
76 76 return hostport
77 77
78 78 # work around a bug in Python < 2.4.2
79 79 # (it leaves a "\n" at the end of Proxy-authorization headers)
80 80 class request(urllib2.Request):
81 81 def add_header(self, key, val):
82 82 if key.lower() == 'proxy-authorization':
83 83 val = val.strip()
84 84 return urllib2.Request.add_header(self, key, val)
85 85
86 86 class httpsendfile(file):
87 87 def __len__(self):
88 88 return os.fstat(self.fileno()).st_size
89 89
90 90 def _gen_sendfile(connection):
91 91 def _sendfile(self, data):
92 92 # send a file
93 93 if isinstance(data, httpsendfile):
94 94 # if auth required, some data sent twice, so rewind here
95 95 data.seek(0)
96 96 for chunk in util.filechunkiter(data):
97 97 connection.send(self, chunk)
98 98 else:
99 99 connection.send(self, data)
100 100 return _sendfile
101 101
102 102 class httpconnection(keepalive.HTTPConnection):
103 103 # must be able to send big bundle as stream.
104 104 send = _gen_sendfile(keepalive.HTTPConnection)
105 105
106 class basehttphandler(keepalive.HTTPHandler):
106 class httphandler(keepalive.HTTPHandler):
107 107 def http_open(self, req):
108 108 return self.do_open(httpconnection, req)
109 109
110 110 def __del__(self):
111 111 self.close_all()
112 112
113 113 has_https = hasattr(urllib2, 'HTTPSHandler')
114 114 if has_https:
115 115 class httpsconnection(httplib.HTTPSConnection):
116 116 response_class = keepalive.HTTPResponse
117 117 # must be able to send big bundle as stream.
118 118 send = _gen_sendfile(httplib.HTTPSConnection)
119 119
120 class httphandler(basehttphandler, urllib2.HTTPSHandler):
120 class httpshandler(keepalive.KeepAliveHandler, urllib2.HTTPSHandler):
121 121 def https_open(self, req):
122 122 return self.do_open(httpsconnection, req)
123 else:
124 class httphandler(basehttphandler):
125 pass
126 123
127 124 # In python < 2.5 AbstractDigestAuthHandler raises a ValueError if
128 125 # it doesn't know about the auth type requested. This can happen if
129 126 # somebody is using BasicAuth and types a bad password.
130 127 class httpdigestauthhandler(urllib2.HTTPDigestAuthHandler):
131 128 def http_error_auth_reqed(self, auth_header, host, req, headers):
132 129 try:
133 130 return urllib2.HTTPDigestAuthHandler.http_error_auth_reqed(
134 131 self, auth_header, host, req, headers)
135 132 except ValueError, inst:
136 133 arg = inst.args[0]
137 134 if arg.startswith("AbstractDigestAuthHandler doesn't know "):
138 135 return
139 136 raise
140 137
141 138 def zgenerator(f):
142 139 zd = zlib.decompressobj()
143 140 try:
144 141 for chunk in util.filechunkiter(f):
145 142 yield zd.decompress(chunk)
146 143 except httplib.HTTPException, inst:
147 144 raise IOError(None, _('connection ended unexpectedly'))
148 145 yield zd.flush()
149 146
150 147 _safe = ('abcdefghijklmnopqrstuvwxyz'
151 148 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
152 149 '0123456789' '_.-/')
153 150 _safeset = None
154 151 _hex = None
155 152 def quotepath(path):
156 153 '''quote the path part of a URL
157 154
158 155 This is similar to urllib.quote, but it also tries to avoid
159 156 quoting things twice (inspired by wget):
160 157
161 158 >>> quotepath('abc def')
162 159 'abc%20def'
163 160 >>> quotepath('abc%20def')
164 161 'abc%20def'
165 162 >>> quotepath('abc%20 def')
166 163 'abc%20%20def'
167 164 >>> quotepath('abc def%20')
168 165 'abc%20def%20'
169 166 >>> quotepath('abc def%2')
170 167 'abc%20def%252'
171 168 >>> quotepath('abc def%')
172 169 'abc%20def%25'
173 170 '''
174 171 global _safeset, _hex
175 172 if _safeset is None:
176 173 _safeset = util.set(_safe)
177 174 _hex = util.set('abcdefABCDEF0123456789')
178 175 l = list(path)
179 176 for i in xrange(len(l)):
180 177 c = l[i]
181 178 if c == '%' and i + 2 < len(l) and (l[i+1] in _hex and l[i+2] in _hex):
182 179 pass
183 180 elif c not in _safeset:
184 181 l[i] = '%%%02X' % ord(c)
185 182 return ''.join(l)
186 183
187 184 class httprepository(remoterepository):
188 185 def __init__(self, ui, path):
189 186 self.path = path
190 187 self.caps = None
191 188 self.handler = None
192 189 scheme, netloc, urlpath, query, frag = urlparse.urlsplit(path)
193 190 if query or frag:
194 191 raise util.Abort(_('unsupported URL component: "%s"') %
195 192 (query or frag))
196 193 if not urlpath:
197 194 urlpath = '/'
198 195 urlpath = quotepath(urlpath)
199 196 host, port, user, passwd = netlocsplit(netloc)
200 197
201 198 # urllib cannot handle URLs with embedded user or passwd
202 199 self._url = urlparse.urlunsplit((scheme, netlocunsplit(host, port),
203 200 urlpath, '', ''))
204 201 self.ui = ui
205 202 self.ui.debug(_('using %s\n') % self._url)
206 203
207 204 proxyurl = ui.config("http_proxy", "host") or os.getenv('http_proxy')
208 205 # XXX proxyauthinfo = None
209 206 handlers = [httphandler()]
207 if has_https:
208 handlers.append(httpshandler())
210 209
211 210 if proxyurl:
212 211 # proxy can be proper url or host[:port]
213 212 if not (proxyurl.startswith('http:') or
214 213 proxyurl.startswith('https:')):
215 214 proxyurl = 'http://' + proxyurl + '/'
216 215 snpqf = urlparse.urlsplit(proxyurl)
217 216 proxyscheme, proxynetloc, proxypath, proxyquery, proxyfrag = snpqf
218 217 hpup = netlocsplit(proxynetloc)
219 218
220 219 proxyhost, proxyport, proxyuser, proxypasswd = hpup
221 220 if not proxyuser:
222 221 proxyuser = ui.config("http_proxy", "user")
223 222 proxypasswd = ui.config("http_proxy", "passwd")
224 223
225 224 # see if we should use a proxy for this url
226 225 no_list = [ "localhost", "127.0.0.1" ]
227 226 no_list.extend([p.lower() for
228 227 p in ui.configlist("http_proxy", "no")])
229 228 no_list.extend([p.strip().lower() for
230 229 p in os.getenv("no_proxy", '').split(',')
231 230 if p.strip()])
232 231 # "http_proxy.always" config is for running tests on localhost
233 232 if (not ui.configbool("http_proxy", "always") and
234 233 host.lower() in no_list):
235 234 # avoid auto-detection of proxy settings by appending
236 235 # a ProxyHandler with no proxies defined.
237 236 handlers.append(urllib2.ProxyHandler({}))
238 237 ui.debug(_('disabling proxy for %s\n') % host)
239 238 else:
240 239 proxyurl = urlparse.urlunsplit((
241 240 proxyscheme, netlocunsplit(proxyhost, proxyport,
242 241 proxyuser, proxypasswd or ''),
243 242 proxypath, proxyquery, proxyfrag))
244 243 handlers.append(urllib2.ProxyHandler({scheme: proxyurl}))
245 244 ui.debug(_('proxying through http://%s:%s\n') %
246 245 (proxyhost, proxyport))
247 246
248 247 # urllib2 takes proxy values from the environment and those
249 248 # will take precedence if found, so drop them
250 249 for env in ["HTTP_PROXY", "http_proxy", "no_proxy"]:
251 250 try:
252 251 if os.environ.has_key(env):
253 252 del os.environ[env]
254 253 except OSError:
255 254 pass
256 255
257 256 passmgr = passwordmgr(ui)
258 257 if user:
259 258 ui.debug(_('http auth: user %s, password %s\n') %
260 259 (user, passwd and '*' * len(passwd) or 'not set'))
261 260 netloc = host
262 261 if port:
263 262 netloc += ':' + port
264 263 # Python < 2.4.3 uses only the netloc to search for a password
265 264 passmgr.add_password(None, (self._url, netloc), user, passwd or '')
266 265
267 266 handlers.extend((urllib2.HTTPBasicAuthHandler(passmgr),
268 267 httpdigestauthhandler(passmgr)))
269 268 opener = urllib2.build_opener(*handlers)
270 269
271 270 # 1.0 here is the _protocol_ version
272 271 opener.addheaders = [('User-agent', 'mercurial/proto-1.0')]
273 272 urllib2.install_opener(opener)
274 273
275 274 def url(self):
276 275 return self.path
277 276
278 277 # look up capabilities only when needed
279 278
280 279 def get_caps(self):
281 280 if self.caps is None:
282 281 try:
283 282 self.caps = util.set(self.do_read('capabilities').split())
284 283 except repo.RepoError:
285 284 self.caps = util.set()
286 285 self.ui.debug(_('capabilities: %s\n') %
287 286 (' '.join(self.caps or ['none'])))
288 287 return self.caps
289 288
290 289 capabilities = property(get_caps)
291 290
292 291 def lock(self):
293 292 raise util.Abort(_('operation not supported over http'))
294 293
295 294 def do_cmd(self, cmd, **args):
296 295 data = args.pop('data', None)
297 296 headers = args.pop('headers', {})
298 297 self.ui.debug(_("sending %s command\n") % cmd)
299 298 q = {"cmd": cmd}
300 299 q.update(args)
301 300 qs = '?%s' % urllib.urlencode(q)
302 301 cu = "%s%s" % (self._url, qs)
303 302 try:
304 303 if data:
305 304 self.ui.debug(_("sending %s bytes\n") % len(data))
306 305 resp = urllib2.urlopen(request(cu, data, headers))
307 306 except urllib2.HTTPError, inst:
308 307 if inst.code == 401:
309 308 raise util.Abort(_('authorization failed'))
310 309 raise
311 310 except httplib.HTTPException, inst:
312 311 self.ui.debug(_('http error while sending %s command\n') % cmd)
313 312 self.ui.print_exc()
314 313 raise IOError(None, inst)
315 314 except IndexError:
316 315 # this only happens with Python 2.3, later versions raise URLError
317 316 raise util.Abort(_('http error, possibly caused by proxy setting'))
318 317 # record the url we got redirected to
319 318 resp_url = resp.geturl()
320 319 if resp_url.endswith(qs):
321 320 resp_url = resp_url[:-len(qs)]
322 321 if self._url != resp_url:
323 322 self.ui.status(_('real URL is %s\n') % resp_url)
324 323 self._url = resp_url
325 324 try:
326 325 proto = resp.getheader('content-type')
327 326 except AttributeError:
328 327 proto = resp.headers['content-type']
329 328
330 329 # accept old "text/plain" and "application/hg-changegroup" for now
331 330 if not (proto.startswith('application/mercurial-') or
332 331 proto.startswith('text/plain') or
333 332 proto.startswith('application/hg-changegroup')):
334 333 self.ui.debug(_("Requested URL: '%s'\n") % cu)
335 334 raise repo.RepoError(_("'%s' does not appear to be an hg repository")
336 335 % self._url)
337 336
338 337 if proto.startswith('application/mercurial-'):
339 338 try:
340 339 version = proto.split('-', 1)[1]
341 340 version_info = tuple([int(n) for n in version.split('.')])
342 341 except ValueError:
343 342 raise repo.RepoError(_("'%s' sent a broken Content-type "
344 343 "header (%s)") % (self._url, proto))
345 344 if version_info > (0, 1):
346 345 raise repo.RepoError(_("'%s' uses newer protocol %s") %
347 346 (self._url, version))
348 347
349 348 return resp
350 349
351 350 def do_read(self, cmd, **args):
352 351 fp = self.do_cmd(cmd, **args)
353 352 try:
354 353 return fp.read()
355 354 finally:
356 355 # if using keepalive, allow connection to be reused
357 356 fp.close()
358 357
359 358 def lookup(self, key):
360 359 self.requirecap('lookup', _('look up remote revision'))
361 360 d = self.do_cmd("lookup", key = key).read()
362 361 success, data = d[:-1].split(' ', 1)
363 362 if int(success):
364 363 return bin(data)
365 364 raise repo.RepoError(data)
366 365
367 366 def heads(self):
368 367 d = self.do_read("heads")
369 368 try:
370 369 return map(bin, d[:-1].split(" "))
371 370 except:
372 371 raise util.UnexpectedOutput(_("unexpected response:"), d)
373 372
374 373 def branches(self, nodes):
375 374 n = " ".join(map(hex, nodes))
376 375 d = self.do_read("branches", nodes=n)
377 376 try:
378 377 br = [ tuple(map(bin, b.split(" "))) for b in d.splitlines() ]
379 378 return br
380 379 except:
381 380 raise util.UnexpectedOutput(_("unexpected response:"), d)
382 381
383 382 def between(self, pairs):
384 383 n = "\n".join(["-".join(map(hex, p)) for p in pairs])
385 384 d = self.do_read("between", pairs=n)
386 385 try:
387 386 p = [ l and map(bin, l.split(" ")) or [] for l in d.splitlines() ]
388 387 return p
389 388 except:
390 389 raise util.UnexpectedOutput(_("unexpected response:"), d)
391 390
392 391 def changegroup(self, nodes, kind):
393 392 n = " ".join(map(hex, nodes))
394 393 f = self.do_cmd("changegroup", roots=n)
395 394 return util.chunkbuffer(zgenerator(f))
396 395
397 396 def changegroupsubset(self, bases, heads, source):
398 397 self.requirecap('changegroupsubset', _('look up remote changes'))
399 398 baselst = " ".join([hex(n) for n in bases])
400 399 headlst = " ".join([hex(n) for n in heads])
401 400 f = self.do_cmd("changegroupsubset", bases=baselst, heads=headlst)
402 401 return util.chunkbuffer(zgenerator(f))
403 402
404 403 def unbundle(self, cg, heads, source):
405 404 # have to stream bundle to a temp file because we do not have
406 405 # http 1.1 chunked transfer.
407 406
408 407 type = ""
409 408 types = self.capable('unbundle')
410 409 # servers older than d1b16a746db6 will send 'unbundle' as a
411 410 # boolean capability
412 411 try:
413 412 types = types.split(',')
414 413 except AttributeError:
415 414 types = [""]
416 415 if types:
417 416 for x in types:
418 417 if x in changegroup.bundletypes:
419 418 type = x
420 419 break
421 420
422 421 tempname = changegroup.writebundle(cg, None, type)
423 422 fp = httpsendfile(tempname, "rb")
424 423 try:
425 424 try:
426 425 rfp = self.do_cmd(
427 426 'unbundle', data=fp,
428 427 headers={'content-type': 'application/octet-stream'},
429 428 heads=' '.join(map(hex, heads)))
430 429 try:
431 430 ret = int(rfp.readline())
432 431 self.ui.write(rfp.read())
433 432 return ret
434 433 finally:
435 434 rfp.close()
436 435 except socket.error, err:
437 436 if err[0] in (errno.ECONNRESET, errno.EPIPE):
438 437 raise util.Abort(_('push failed: %s') % err[1])
439 438 raise util.Abort(err[1])
440 439 finally:
441 440 fp.close()
442 441 os.unlink(tempname)
443 442
444 443 def stream_out(self):
445 444 return self.do_cmd('stream_out')
446 445
447 446 class httpsrepository(httprepository):
448 447 def __init__(self, ui, path):
449 448 if not has_https:
450 449 raise util.Abort(_('Python support for SSL and HTTPS '
451 450 'is not installed'))
452 451 httprepository.__init__(self, ui, path)
453 452
454 453 def instance(ui, path, create):
455 454 if create:
456 455 raise util.Abort(_('cannot create new http repository'))
457 456 if path.startswith('https:'):
458 457 return httpsrepository(ui, path)
459 458 return httprepository(ui, path)
@@ -1,579 +1,582
1 1 # This library is free software; you can redistribute it and/or
2 2 # modify it under the terms of the GNU Lesser General Public
3 3 # License as published by the Free Software Foundation; either
4 4 # version 2.1 of the License, or (at your option) any later version.
5 5 #
6 6 # This library is distributed in the hope that it will be useful,
7 7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
8 8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9 9 # Lesser General Public License for more details.
10 10 #
11 11 # You should have received a copy of the GNU Lesser General Public
12 12 # License along with this library; if not, write to the
13 13 # Free Software Foundation, Inc.,
14 14 # 59 Temple Place, Suite 330,
15 15 # Boston, MA 02111-1307 USA
16 16
17 17 # This file is part of urlgrabber, a high-level cross-protocol url-grabber
18 18 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
19 19
20 20 # Modified by Benoit Boissinot:
21 21 # - fix for digest auth (inspired from urllib2.py @ Python v2.4)
22 22
23 23 """An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive.
24 24
25 25 >>> import urllib2
26 26 >>> from keepalive import HTTPHandler
27 27 >>> keepalive_handler = HTTPHandler()
28 28 >>> opener = urllib2.build_opener(keepalive_handler)
29 29 >>> urllib2.install_opener(opener)
30 30 >>>
31 31 >>> fo = urllib2.urlopen('http://www.python.org')
32 32
33 33 If a connection to a given host is requested, and all of the existing
34 34 connections are still in use, another connection will be opened. If
35 35 the handler tries to use an existing connection but it fails in some
36 36 way, it will be closed and removed from the pool.
37 37
38 38 To remove the handler, simply re-run build_opener with no arguments, and
39 39 install that opener.
40 40
41 41 You can explicitly close connections by using the close_connection()
42 42 method of the returned file-like object (described below) or you can
43 43 use the handler methods:
44 44
45 45 close_connection(host)
46 46 close_all()
47 47 open_connections()
48 48
49 49 NOTE: using the close_connection and close_all methods of the handler
50 50 should be done with care when using multiple threads.
51 51 * there is nothing that prevents another thread from creating new
52 52 connections immediately after connections are closed
53 53 * no checks are done to prevent in-use connections from being closed
54 54
55 55 >>> keepalive_handler.close_all()
56 56
57 57 EXTRA ATTRIBUTES AND METHODS
58 58
59 59 Upon a status of 200, the object returned has a few additional
60 60 attributes and methods, which should not be used if you want to
61 61 remain consistent with the normal urllib2-returned objects:
62 62
63 63 close_connection() - close the connection to the host
64 64 readlines() - you know, readlines()
65 65 status - the return status (ie 404)
66 66 reason - english translation of status (ie 'File not found')
67 67
68 68 If you want the best of both worlds, use this inside an
69 69 AttributeError-catching try:
70 70
71 71 >>> try: status = fo.status
72 72 >>> except AttributeError: status = None
73 73
74 74 Unfortunately, these are ONLY there if status == 200, so it's not
75 75 easy to distinguish between non-200 responses. The reason is that
76 76 urllib2 tries to do clever things with error codes 301, 302, 401,
77 77 and 407, and it wraps the object upon return.
78 78
79 79 For python versions earlier than 2.4, you can avoid this fancy error
80 80 handling by setting the module-level global HANDLE_ERRORS to zero.
81 81 You see, prior to 2.4, it's the HTTP Handler's job to determine what
82 82 to handle specially, and what to just pass up. HANDLE_ERRORS == 0
83 83 means "pass everything up". In python 2.4, however, this job no
84 84 longer belongs to the HTTP Handler and is now done by a NEW handler,
85 85 HTTPErrorProcessor. Here's the bottom line:
86 86
87 87 python version < 2.4
88 88 HANDLE_ERRORS == 1 (default) pass up 200, treat the rest as
89 89 errors
90 90 HANDLE_ERRORS == 0 pass everything up, error processing is
91 91 left to the calling code
92 92 python version >= 2.4
93 93 HANDLE_ERRORS == 1 pass up 200, treat the rest as errors
94 94 HANDLE_ERRORS == 0 (default) pass everything up, let the
95 95 other handlers (specifically,
96 96 HTTPErrorProcessor) decide what to do
97 97
98 98 In practice, setting the variable either way makes little difference
99 99 in python 2.4, so for the most consistent behavior across versions,
100 100 you probably just want to use the defaults, which will give you
101 101 exceptions on errors.
102 102
103 103 """
104 104
105 105 # $Id: keepalive.py,v 1.14 2006/04/04 21:00:32 mstenner Exp $
106 106
107 107 import urllib2
108 108 import httplib
109 109 import socket
110 110 import thread
111 111
112 112 DEBUG = None
113 113
114 114 import sys
115 115 if sys.version_info < (2, 4): HANDLE_ERRORS = 1
116 116 else: HANDLE_ERRORS = 0
117 117
118 118 class ConnectionManager:
119 119 """
120 120 The connection manager must be able to:
121 121 * keep track of all existing
122 122 """
123 123 def __init__(self):
124 124 self._lock = thread.allocate_lock()
125 125 self._hostmap = {} # map hosts to a list of connections
126 126 self._connmap = {} # map connections to host
127 127 self._readymap = {} # map connection to ready state
128 128
129 129 def add(self, host, connection, ready):
130 130 self._lock.acquire()
131 131 try:
132 132 if not self._hostmap.has_key(host): self._hostmap[host] = []
133 133 self._hostmap[host].append(connection)
134 134 self._connmap[connection] = host
135 135 self._readymap[connection] = ready
136 136 finally:
137 137 self._lock.release()
138 138
139 139 def remove(self, connection):
140 140 self._lock.acquire()
141 141 try:
142 142 try:
143 143 host = self._connmap[connection]
144 144 except KeyError:
145 145 pass
146 146 else:
147 147 del self._connmap[connection]
148 148 del self._readymap[connection]
149 149 self._hostmap[host].remove(connection)
150 150 if not self._hostmap[host]: del self._hostmap[host]
151 151 finally:
152 152 self._lock.release()
153 153
154 154 def set_ready(self, connection, ready):
155 155 try: self._readymap[connection] = ready
156 156 except KeyError: pass
157 157
158 158 def get_ready_conn(self, host):
159 159 conn = None
160 160 self._lock.acquire()
161 161 try:
162 162 if self._hostmap.has_key(host):
163 163 for c in self._hostmap[host]:
164 164 if self._readymap[c]:
165 165 self._readymap[c] = 0
166 166 conn = c
167 167 break
168 168 finally:
169 169 self._lock.release()
170 170 return conn
171 171
172 172 def get_all(self, host=None):
173 173 if host:
174 174 return list(self._hostmap.get(host, []))
175 175 else:
176 176 return dict(self._hostmap)
177 177
178 class HTTPHandler(urllib2.HTTPHandler):
178 class KeepAliveHandler:
179 179 def __init__(self):
180 180 self._cm = ConnectionManager()
181 181
182 182 #### Connection Management
183 183 def open_connections(self):
184 184 """return a list of connected hosts and the number of connections
185 185 to each. [('foo.com:80', 2), ('bar.org', 1)]"""
186 186 return [(host, len(li)) for (host, li) in self._cm.get_all().items()]
187 187
188 188 def close_connection(self, host):
189 189 """close connection(s) to <host>
190 190 host is the host:port spec, as in 'www.cnn.com:8080' as passed in.
191 191 no error occurs if there is no connection to that host."""
192 192 for h in self._cm.get_all(host):
193 193 self._cm.remove(h)
194 194 h.close()
195 195
196 196 def close_all(self):
197 197 """close all open connections"""
198 198 for host, conns in self._cm.get_all().items():
199 199 for h in conns:
200 200 self._cm.remove(h)
201 201 h.close()
202 202
203 203 def _request_closed(self, request, host, connection):
204 204 """tells us that this request is now closed and the the
205 205 connection is ready for another request"""
206 206 self._cm.set_ready(connection, 1)
207 207
208 208 def _remove_connection(self, host, connection, close=0):
209 209 if close: connection.close()
210 210 self._cm.remove(connection)
211 211
212 212 #### Transaction Execution
213 213 def http_open(self, req):
214 214 return self.do_open(HTTPConnection, req)
215 215
216 216 def do_open(self, http_class, req):
217 217 host = req.get_host()
218 218 if not host:
219 219 raise urllib2.URLError('no host given')
220 220
221 221 try:
222 222 h = self._cm.get_ready_conn(host)
223 223 while h:
224 224 r = self._reuse_connection(h, req, host)
225 225
226 226 # if this response is non-None, then it worked and we're
227 227 # done. Break out, skipping the else block.
228 228 if r: break
229 229
230 230 # connection is bad - possibly closed by server
231 231 # discard it and ask for the next free connection
232 232 h.close()
233 233 self._cm.remove(h)
234 234 h = self._cm.get_ready_conn(host)
235 235 else:
236 236 # no (working) free connections were found. Create a new one.
237 237 h = http_class(host)
238 238 if DEBUG: DEBUG.info("creating new connection to %s (%d)",
239 239 host, id(h))
240 240 self._cm.add(host, h, 0)
241 241 self._start_transaction(h, req)
242 242 r = h.getresponse()
243 243 except (socket.error, httplib.HTTPException), err:
244 244 raise urllib2.URLError(err)
245 245
246 246 # if not a persistent connection, don't try to reuse it
247 247 if r.will_close: self._cm.remove(h)
248 248
249 249 if DEBUG: DEBUG.info("STATUS: %s, %s", r.status, r.reason)
250 250 r._handler = self
251 251 r._host = host
252 252 r._url = req.get_full_url()
253 253 r._connection = h
254 254 r.code = r.status
255 255 r.headers = r.msg
256 256 r.msg = r.reason
257 257
258 258 if r.status == 200 or not HANDLE_ERRORS:
259 259 return r
260 260 else:
261 261 return self.parent.error('http', req, r,
262 262 r.status, r.msg, r.headers)
263 263
264 264 def _reuse_connection(self, h, req, host):
265 265 """start the transaction with a re-used connection
266 266 return a response object (r) upon success or None on failure.
267 267 This DOES not close or remove bad connections in cases where
268 268 it returns. However, if an unexpected exception occurs, it
269 269 will close and remove the connection before re-raising.
270 270 """
271 271 try:
272 272 self._start_transaction(h, req)
273 273 r = h.getresponse()
274 274 # note: just because we got something back doesn't mean it
275 275 # worked. We'll check the version below, too.
276 276 except (socket.error, httplib.HTTPException):
277 277 r = None
278 278 except:
279 279 # adding this block just in case we've missed
280 280 # something we will still raise the exception, but
281 281 # lets try and close the connection and remove it
282 282 # first. We previously got into a nasty loop
283 283 # where an exception was uncaught, and so the
284 284 # connection stayed open. On the next try, the
285 285 # same exception was raised, etc. The tradeoff is
286 286 # that it's now possible this call will raise
287 287 # a DIFFERENT exception
288 288 if DEBUG: DEBUG.error("unexpected exception - closing " + \
289 289 "connection to %s (%d)", host, id(h))
290 290 self._cm.remove(h)
291 291 h.close()
292 292 raise
293 293
294 294 if r is None or r.version == 9:
295 295 # httplib falls back to assuming HTTP 0.9 if it gets a
296 296 # bad header back. This is most likely to happen if
297 297 # the socket has been closed by the server since we
298 298 # last used the connection.
299 299 if DEBUG: DEBUG.info("failed to re-use connection to %s (%d)",
300 300 host, id(h))
301 301 r = None
302 302 else:
303 303 if DEBUG: DEBUG.info("re-using connection to %s (%d)", host, id(h))
304 304
305 305 return r
306 306
307 307 def _start_transaction(self, h, req):
308 308 headers = req.headers.copy()
309 309 body = req.data
310 310 if sys.version_info >= (2, 4):
311 311 headers.update(req.unredirected_hdrs)
312 312 try:
313 313 h.request(req.get_method(), req.get_selector(), body, headers)
314 314 except socket.error, err: # XXX what error?
315 315 raise urllib2.URLError(err)
316 316
317 class HTTPHandler(KeepAliveHandler, urllib2.HTTPHandler):
318 pass
319
317 320 class HTTPResponse(httplib.HTTPResponse):
318 321 # we need to subclass HTTPResponse in order to
319 322 # 1) add readline() and readlines() methods
320 323 # 2) add close_connection() methods
321 324 # 3) add info() and geturl() methods
322 325
323 326 # in order to add readline(), read must be modified to deal with a
324 327 # buffer. example: readline must read a buffer and then spit back
325 328 # one line at a time. The only real alternative is to read one
326 329 # BYTE at a time (ick). Once something has been read, it can't be
327 330 # put back (ok, maybe it can, but that's even uglier than this),
328 331 # so if you THEN do a normal read, you must first take stuff from
329 332 # the buffer.
330 333
331 334 # the read method wraps the original to accomodate buffering,
332 335 # although read() never adds to the buffer.
333 336 # Both readline and readlines have been stolen with almost no
334 337 # modification from socket.py
335 338
336 339
337 340 def __init__(self, sock, debuglevel=0, strict=0, method=None):
338 341 if method: # the httplib in python 2.3 uses the method arg
339 342 httplib.HTTPResponse.__init__(self, sock, debuglevel, method)
340 343 else: # 2.2 doesn't
341 344 httplib.HTTPResponse.__init__(self, sock, debuglevel)
342 345 self.fileno = sock.fileno
343 346 self.code = None
344 347 self._rbuf = ''
345 348 self._rbufsize = 8096
346 349 self._handler = None # inserted by the handler later
347 350 self._host = None # (same)
348 351 self._url = None # (same)
349 352 self._connection = None # (same)
350 353
351 354 _raw_read = httplib.HTTPResponse.read
352 355
353 356 def close(self):
354 357 if self.fp:
355 358 self.fp.close()
356 359 self.fp = None
357 360 if self._handler:
358 361 self._handler._request_closed(self, self._host,
359 362 self._connection)
360 363
361 364 def close_connection(self):
362 365 self._handler._remove_connection(self._host, self._connection, close=1)
363 366 self.close()
364 367
365 368 def info(self):
366 369 return self.headers
367 370
368 371 def geturl(self):
369 372 return self._url
370 373
371 374 def read(self, amt=None):
372 375 # the _rbuf test is only in this first if for speed. It's not
373 376 # logically necessary
374 377 if self._rbuf and not amt is None:
375 378 L = len(self._rbuf)
376 379 if amt > L:
377 380 amt -= L
378 381 else:
379 382 s = self._rbuf[:amt]
380 383 self._rbuf = self._rbuf[amt:]
381 384 return s
382 385
383 386 s = self._rbuf + self._raw_read(amt)
384 387 self._rbuf = ''
385 388 return s
386 389
387 390 def readline(self, limit=-1):
388 391 data = ""
389 392 i = self._rbuf.find('\n')
390 393 while i < 0 and not (0 < limit <= len(self._rbuf)):
391 394 new = self._raw_read(self._rbufsize)
392 395 if not new: break
393 396 i = new.find('\n')
394 397 if i >= 0: i = i + len(self._rbuf)
395 398 self._rbuf = self._rbuf + new
396 399 if i < 0: i = len(self._rbuf)
397 400 else: i = i+1
398 401 if 0 <= limit < len(self._rbuf): i = limit
399 402 data, self._rbuf = self._rbuf[:i], self._rbuf[i:]
400 403 return data
401 404
402 405 def readlines(self, sizehint = 0):
403 406 total = 0
404 407 list = []
405 408 while 1:
406 409 line = self.readline()
407 410 if not line: break
408 411 list.append(line)
409 412 total += len(line)
410 413 if sizehint and total >= sizehint:
411 414 break
412 415 return list
413 416
414 417
415 418 class HTTPConnection(httplib.HTTPConnection):
416 419 # use the modified response class
417 420 response_class = HTTPResponse
418 421
419 422 #########################################################################
420 423 ##### TEST FUNCTIONS
421 424 #########################################################################
422 425
423 426 def error_handler(url):
424 427 global HANDLE_ERRORS
425 428 orig = HANDLE_ERRORS
426 429 keepalive_handler = HTTPHandler()
427 430 opener = urllib2.build_opener(keepalive_handler)
428 431 urllib2.install_opener(opener)
429 432 pos = {0: 'off', 1: 'on'}
430 433 for i in (0, 1):
431 434 print " fancy error handling %s (HANDLE_ERRORS = %i)" % (pos[i], i)
432 435 HANDLE_ERRORS = i
433 436 try:
434 437 fo = urllib2.urlopen(url)
435 438 foo = fo.read()
436 439 fo.close()
437 440 try: status, reason = fo.status, fo.reason
438 441 except AttributeError: status, reason = None, None
439 442 except IOError, e:
440 443 print " EXCEPTION: %s" % e
441 444 raise
442 445 else:
443 446 print " status = %s, reason = %s" % (status, reason)
444 447 HANDLE_ERRORS = orig
445 448 hosts = keepalive_handler.open_connections()
446 449 print "open connections:", hosts
447 450 keepalive_handler.close_all()
448 451
449 452 def continuity(url):
450 453 import md5
451 454 format = '%25s: %s'
452 455
453 456 # first fetch the file with the normal http handler
454 457 opener = urllib2.build_opener()
455 458 urllib2.install_opener(opener)
456 459 fo = urllib2.urlopen(url)
457 460 foo = fo.read()
458 461 fo.close()
459 462 m = md5.new(foo)
460 463 print format % ('normal urllib', m.hexdigest())
461 464
462 465 # now install the keepalive handler and try again
463 466 opener = urllib2.build_opener(HTTPHandler())
464 467 urllib2.install_opener(opener)
465 468
466 469 fo = urllib2.urlopen(url)
467 470 foo = fo.read()
468 471 fo.close()
469 472 m = md5.new(foo)
470 473 print format % ('keepalive read', m.hexdigest())
471 474
472 475 fo = urllib2.urlopen(url)
473 476 foo = ''
474 477 while 1:
475 478 f = fo.readline()
476 479 if f: foo = foo + f
477 480 else: break
478 481 fo.close()
479 482 m = md5.new(foo)
480 483 print format % ('keepalive readline', m.hexdigest())
481 484
482 485 def comp(N, url):
483 486 print ' making %i connections to:\n %s' % (N, url)
484 487
485 488 sys.stdout.write(' first using the normal urllib handlers')
486 489 # first use normal opener
487 490 opener = urllib2.build_opener()
488 491 urllib2.install_opener(opener)
489 492 t1 = fetch(N, url)
490 493 print ' TIME: %.3f s' % t1
491 494
492 495 sys.stdout.write(' now using the keepalive handler ')
493 496 # now install the keepalive handler and try again
494 497 opener = urllib2.build_opener(HTTPHandler())
495 498 urllib2.install_opener(opener)
496 499 t2 = fetch(N, url)
497 500 print ' TIME: %.3f s' % t2
498 501 print ' improvement factor: %.2f' % (t1/t2, )
499 502
500 503 def fetch(N, url, delay=0):
501 504 import time
502 505 lens = []
503 506 starttime = time.time()
504 507 for i in range(N):
505 508 if delay and i > 0: time.sleep(delay)
506 509 fo = urllib2.urlopen(url)
507 510 foo = fo.read()
508 511 fo.close()
509 512 lens.append(len(foo))
510 513 diff = time.time() - starttime
511 514
512 515 j = 0
513 516 for i in lens[1:]:
514 517 j = j + 1
515 518 if not i == lens[0]:
516 519 print "WARNING: inconsistent length on read %i: %i" % (j, i)
517 520
518 521 return diff
519 522
520 523 def test_timeout(url):
521 524 global DEBUG
522 525 dbbackup = DEBUG
523 526 class FakeLogger:
524 527 def debug(self, msg, *args): print msg % args
525 528 info = warning = error = debug
526 529 DEBUG = FakeLogger()
527 530 print " fetching the file to establish a connection"
528 531 fo = urllib2.urlopen(url)
529 532 data1 = fo.read()
530 533 fo.close()
531 534
532 535 i = 20
533 536 print " waiting %i seconds for the server to close the connection" % i
534 537 while i > 0:
535 538 sys.stdout.write('\r %2i' % i)
536 539 sys.stdout.flush()
537 540 time.sleep(1)
538 541 i -= 1
539 542 sys.stderr.write('\r')
540 543
541 544 print " fetching the file a second time"
542 545 fo = urllib2.urlopen(url)
543 546 data2 = fo.read()
544 547 fo.close()
545 548
546 549 if data1 == data2:
547 550 print ' data are identical'
548 551 else:
549 552 print ' ERROR: DATA DIFFER'
550 553
551 554 DEBUG = dbbackup
552 555
553 556
554 557 def test(url, N=10):
555 558 print "checking error hander (do this on a non-200)"
556 559 try: error_handler(url)
557 560 except IOError, e:
558 561 print "exiting - exception will prevent further tests"
559 562 sys.exit()
560 563 print
561 564 print "performing continuity test (making sure stuff isn't corrupted)"
562 565 continuity(url)
563 566 print
564 567 print "performing speed comparison"
565 568 comp(N, url)
566 569 print
567 570 print "performing dropped-connection check"
568 571 test_timeout(url)
569 572
570 573 if __name__ == '__main__':
571 574 import time
572 575 import sys
573 576 try:
574 577 N = int(sys.argv[1])
575 578 url = sys.argv[2]
576 579 except:
577 580 print "%s <integer> <url>" % sys.argv[0]
578 581 else:
579 582 test(url, N)
General Comments 0
You need to be logged in to leave comments. Login now