##// END OF EJS Templates
py3: stop subscripting socket.error...
Matt Harbison -
r40909:34835265 default
parent child Browse files
Show More
@@ -1,373 +1,373 b''
1 1 # hgweb/server.py - The standalone hg web server.
2 2 #
3 3 # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from __future__ import absolute_import
10 10
11 11 import errno
12 12 import os
13 13 import socket
14 14 import sys
15 15 import traceback
16 16 import wsgiref.validate
17 17
18 18 from ..i18n import _
19 19
20 20 from .. import (
21 21 encoding,
22 22 error,
23 23 pycompat,
24 24 util,
25 25 )
26 26
27 27 httpservermod = util.httpserver
28 28 socketserver = util.socketserver
29 29 urlerr = util.urlerr
30 30 urlreq = util.urlreq
31 31
32 32 from . import (
33 33 common,
34 34 )
35 35
36 36 def _splitURI(uri):
37 37 """Return path and query that has been split from uri
38 38
39 39 Just like CGI environment, the path is unquoted, the query is
40 40 not.
41 41 """
42 42 if r'?' in uri:
43 43 path, query = uri.split(r'?', 1)
44 44 else:
45 45 path, query = uri, r''
46 46 return urlreq.unquote(path), query
47 47
48 48 class _error_logger(object):
49 49 def __init__(self, handler):
50 50 self.handler = handler
51 51 def flush(self):
52 52 pass
53 53 def write(self, str):
54 54 self.writelines(str.split('\n'))
55 55 def writelines(self, seq):
56 56 for msg in seq:
57 57 self.handler.log_error("HG error: %s", msg)
58 58
59 59 class _httprequesthandler(httpservermod.basehttprequesthandler):
60 60
61 61 url_scheme = 'http'
62 62
63 63 @staticmethod
64 64 def preparehttpserver(httpserver, ui):
65 65 """Prepare .socket of new HTTPServer instance"""
66 66
67 67 def __init__(self, *args, **kargs):
68 68 self.protocol_version = r'HTTP/1.1'
69 69 httpservermod.basehttprequesthandler.__init__(self, *args, **kargs)
70 70
71 71 def _log_any(self, fp, format, *args):
72 72 fp.write(pycompat.sysbytes(
73 73 r"%s - - [%s] %s" % (self.client_address[0],
74 74 self.log_date_time_string(),
75 75 format % args)) + '\n')
76 76 fp.flush()
77 77
78 78 def log_error(self, format, *args):
79 79 self._log_any(self.server.errorlog, format, *args)
80 80
81 81 def log_message(self, format, *args):
82 82 self._log_any(self.server.accesslog, format, *args)
83 83
84 84 def log_request(self, code=r'-', size=r'-'):
85 85 xheaders = []
86 86 if util.safehasattr(self, 'headers'):
87 87 xheaders = [h for h in self.headers.items()
88 88 if h[0].startswith(r'x-')]
89 89 self.log_message(r'"%s" %s %s%s',
90 90 self.requestline, str(code), str(size),
91 91 r''.join([r' %s:%s' % h for h in sorted(xheaders)]))
92 92
93 93 def do_write(self):
94 94 try:
95 95 self.do_hgweb()
96 96 except socket.error as inst:
97 if inst[0] != errno.EPIPE:
97 if inst.errno != errno.EPIPE:
98 98 raise
99 99
100 100 def do_POST(self):
101 101 try:
102 102 self.do_write()
103 103 except Exception:
104 104 self._start_response(r"500 Internal Server Error", [])
105 105 self._write(b"Internal Server Error")
106 106 self._done()
107 107 tb = r"".join(traceback.format_exception(*sys.exc_info()))
108 108 # We need a native-string newline to poke in the log
109 109 # message, because we won't get a newline when using an
110 110 # r-string. This is the easy way out.
111 111 newline = chr(10)
112 112 self.log_error(r"Exception happened during processing "
113 113 r"request '%s':%s%s", self.path, newline, tb)
114 114
115 115 def do_PUT(self):
116 116 self.do_POST()
117 117
118 118 def do_GET(self):
119 119 self.do_POST()
120 120
121 121 def do_hgweb(self):
122 122 self.sent_headers = False
123 123 path, query = _splitURI(self.path)
124 124
125 125 # Ensure the slicing of path below is valid
126 126 if (path != self.server.prefix
127 127 and not path.startswith(self.server.prefix + b'/')):
128 128 self._start_response(pycompat.strurl(common.statusmessage(404)),
129 129 [])
130 130 self._write(b"Not Found")
131 131 self._done()
132 132 return
133 133
134 134 env = {}
135 135 env[r'GATEWAY_INTERFACE'] = r'CGI/1.1'
136 136 env[r'REQUEST_METHOD'] = self.command
137 137 env[r'SERVER_NAME'] = self.server.server_name
138 138 env[r'SERVER_PORT'] = str(self.server.server_port)
139 139 env[r'REQUEST_URI'] = self.path
140 140 env[r'SCRIPT_NAME'] = pycompat.sysstr(self.server.prefix)
141 141 env[r'PATH_INFO'] = pycompat.sysstr(path[len(self.server.prefix):])
142 142 env[r'REMOTE_HOST'] = self.client_address[0]
143 143 env[r'REMOTE_ADDR'] = self.client_address[0]
144 144 env[r'QUERY_STRING'] = query or r''
145 145
146 146 if pycompat.ispy3:
147 147 if self.headers.get_content_type() is None:
148 148 env[r'CONTENT_TYPE'] = self.headers.get_default_type()
149 149 else:
150 150 env[r'CONTENT_TYPE'] = self.headers.get_content_type()
151 151 length = self.headers.get(r'content-length')
152 152 else:
153 153 if self.headers.typeheader is None:
154 154 env[r'CONTENT_TYPE'] = self.headers.type
155 155 else:
156 156 env[r'CONTENT_TYPE'] = self.headers.typeheader
157 157 length = self.headers.getheader(r'content-length')
158 158 if length:
159 159 env[r'CONTENT_LENGTH'] = length
160 160 for header in [h for h in self.headers.keys()
161 161 if h not in (r'content-type', r'content-length')]:
162 162 hkey = r'HTTP_' + header.replace(r'-', r'_').upper()
163 163 hval = self.headers.get(header)
164 164 hval = hval.replace(r'\n', r'').strip()
165 165 if hval:
166 166 env[hkey] = hval
167 167 env[r'SERVER_PROTOCOL'] = self.request_version
168 168 env[r'wsgi.version'] = (1, 0)
169 169 env[r'wsgi.url_scheme'] = pycompat.sysstr(self.url_scheme)
170 170 if env.get(r'HTTP_EXPECT', '').lower() == '100-continue':
171 171 self.rfile = common.continuereader(self.rfile, self.wfile.write)
172 172
173 173 env[r'wsgi.input'] = self.rfile
174 174 env[r'wsgi.errors'] = _error_logger(self)
175 175 env[r'wsgi.multithread'] = isinstance(self.server,
176 176 socketserver.ThreadingMixIn)
177 177 if util.safehasattr(socketserver, 'ForkingMixIn'):
178 178 env[r'wsgi.multiprocess'] = isinstance(self.server,
179 179 socketserver.ForkingMixIn)
180 180 else:
181 181 env[r'wsgi.multiprocess'] = False
182 182
183 183 env[r'wsgi.run_once'] = 0
184 184
185 185 wsgiref.validate.check_environ(env)
186 186
187 187 self.saved_status = None
188 188 self.saved_headers = []
189 189 self.length = None
190 190 self._chunked = None
191 191 for chunk in self.server.application(env, self._start_response):
192 192 self._write(chunk)
193 193 if not self.sent_headers:
194 194 self.send_headers()
195 195 self._done()
196 196
197 197 def send_headers(self):
198 198 if not self.saved_status:
199 199 raise AssertionError("Sending headers before "
200 200 "start_response() called")
201 201 saved_status = self.saved_status.split(None, 1)
202 202 saved_status[0] = int(saved_status[0])
203 203 self.send_response(*saved_status)
204 204 self.length = None
205 205 self._chunked = False
206 206 for h in self.saved_headers:
207 207 self.send_header(*h)
208 208 if h[0].lower() == r'content-length':
209 209 self.length = int(h[1])
210 210 if (self.length is None and
211 211 saved_status[0] != common.HTTP_NOT_MODIFIED):
212 212 self._chunked = (not self.close_connection and
213 213 self.request_version == r'HTTP/1.1')
214 214 if self._chunked:
215 215 self.send_header(r'Transfer-Encoding', r'chunked')
216 216 else:
217 217 self.send_header(r'Connection', r'close')
218 218 self.end_headers()
219 219 self.sent_headers = True
220 220
221 221 def _start_response(self, http_status, headers, exc_info=None):
222 222 assert isinstance(http_status, str)
223 223 code, msg = http_status.split(None, 1)
224 224 code = int(code)
225 225 self.saved_status = http_status
226 226 bad_headers = (r'connection', r'transfer-encoding')
227 227 self.saved_headers = [h for h in headers
228 228 if h[0].lower() not in bad_headers]
229 229 return self._write
230 230
231 231 def _write(self, data):
232 232 if not self.saved_status:
233 233 raise AssertionError("data written before start_response() called")
234 234 elif not self.sent_headers:
235 235 self.send_headers()
236 236 if self.length is not None:
237 237 if len(data) > self.length:
238 238 raise AssertionError("Content-length header sent, but more "
239 239 "bytes than specified are being written.")
240 240 self.length = self.length - len(data)
241 241 elif self._chunked and data:
242 242 data = '%x\r\n%s\r\n' % (len(data), data)
243 243 self.wfile.write(data)
244 244 self.wfile.flush()
245 245
246 246 def _done(self):
247 247 if self._chunked:
248 248 self.wfile.write('0\r\n\r\n')
249 249 self.wfile.flush()
250 250
251 251 def version_string(self):
252 252 if self.server.serverheader:
253 253 return encoding.strfromlocal(self.server.serverheader)
254 254 return httpservermod.basehttprequesthandler.version_string(self)
255 255
256 256 class _httprequesthandlerssl(_httprequesthandler):
257 257 """HTTPS handler based on Python's ssl module"""
258 258
259 259 url_scheme = 'https'
260 260
261 261 @staticmethod
262 262 def preparehttpserver(httpserver, ui):
263 263 try:
264 264 from .. import sslutil
265 265 sslutil.modernssl
266 266 except ImportError:
267 267 raise error.Abort(_("SSL support is unavailable"))
268 268
269 269 certfile = ui.config('web', 'certificate')
270 270
271 271 # These config options are currently only meant for testing. Use
272 272 # at your own risk.
273 273 cafile = ui.config('devel', 'servercafile')
274 274 reqcert = ui.configbool('devel', 'serverrequirecert')
275 275
276 276 httpserver.socket = sslutil.wrapserversocket(httpserver.socket,
277 277 ui,
278 278 certfile=certfile,
279 279 cafile=cafile,
280 280 requireclientcert=reqcert)
281 281
282 282 def setup(self):
283 283 self.connection = self.request
284 284 self.rfile = self.request.makefile(r"rb", self.rbufsize)
285 285 self.wfile = self.request.makefile(r"wb", self.wbufsize)
286 286
287 287 try:
288 288 import threading
289 289 threading.activeCount() # silence pyflakes and bypass demandimport
290 290 _mixin = socketserver.ThreadingMixIn
291 291 except ImportError:
292 292 if util.safehasattr(os, "fork"):
293 293 _mixin = socketserver.ForkingMixIn
294 294 else:
295 295 class _mixin(object):
296 296 pass
297 297
298 298 def openlog(opt, default):
299 299 if opt and opt != '-':
300 300 return open(opt, 'ab')
301 301 return default
302 302
303 303 class MercurialHTTPServer(_mixin, httpservermod.httpserver, object):
304 304
305 305 # SO_REUSEADDR has broken semantics on windows
306 306 if pycompat.iswindows:
307 307 allow_reuse_address = 0
308 308
309 309 def __init__(self, ui, app, addr, handler, **kwargs):
310 310 httpservermod.httpserver.__init__(self, addr, handler, **kwargs)
311 311 self.daemon_threads = True
312 312 self.application = app
313 313
314 314 handler.preparehttpserver(self, ui)
315 315
316 316 prefix = ui.config('web', 'prefix')
317 317 if prefix:
318 318 prefix = '/' + prefix.strip('/')
319 319 self.prefix = prefix
320 320
321 321 alog = openlog(ui.config('web', 'accesslog'), ui.fout)
322 322 elog = openlog(ui.config('web', 'errorlog'), ui.ferr)
323 323 self.accesslog = alog
324 324 self.errorlog = elog
325 325
326 326 self.addr, self.port = self.socket.getsockname()[0:2]
327 327 self.fqaddr = socket.getfqdn(addr[0])
328 328
329 329 self.serverheader = ui.config('web', 'server-header')
330 330
331 331 class IPv6HTTPServer(MercurialHTTPServer):
332 332 address_family = getattr(socket, 'AF_INET6', None)
333 333 def __init__(self, *args, **kwargs):
334 334 if self.address_family is None:
335 335 raise error.RepoError(_('IPv6 is not available on this system'))
336 336 super(IPv6HTTPServer, self).__init__(*args, **kwargs)
337 337
338 338 def create_server(ui, app):
339 339
340 340 if ui.config('web', 'certificate'):
341 341 handler = _httprequesthandlerssl
342 342 else:
343 343 handler = _httprequesthandler
344 344
345 345 if ui.configbool('web', 'ipv6'):
346 346 cls = IPv6HTTPServer
347 347 else:
348 348 cls = MercurialHTTPServer
349 349
350 350 # ugly hack due to python issue5853 (for threaded use)
351 351 try:
352 352 import mimetypes
353 353 mimetypes.init()
354 354 except UnicodeDecodeError:
355 355 # Python 2.x's mimetypes module attempts to decode strings
356 356 # from Windows' ANSI APIs as ascii (fail), then re-encode them
357 357 # as ascii (clown fail), because the default Python Unicode
358 358 # codec is hardcoded as ascii.
359 359
360 360 sys.argv # unwrap demand-loader so that reload() works
361 361 reload(sys) # resurrect sys.setdefaultencoding()
362 362 oldenc = sys.getdefaultencoding()
363 363 sys.setdefaultencoding("latin1") # or any full 8-bit encoding
364 364 mimetypes.init()
365 365 sys.setdefaultencoding(oldenc)
366 366
367 367 address = ui.config('web', 'address')
368 368 port = util.getport(ui.config('web', 'port'))
369 369 try:
370 370 return cls(ui, app, (address, port), handler)
371 371 except socket.error as inst:
372 372 raise error.Abort(_("cannot start server at '%s:%d': %s")
373 373 % (address, port, encoding.strtolocal(inst.args[1])))
@@ -1,810 +1,810 b''
1 1 # This library is free software; you can redistribute it and/or
2 2 # modify it under the terms of the GNU Lesser General Public
3 3 # License as published by the Free Software Foundation; either
4 4 # version 2.1 of the License, or (at your option) any later version.
5 5 #
6 6 # This library is distributed in the hope that it will be useful,
7 7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
8 8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9 9 # Lesser General Public License for more details.
10 10 #
11 11 # You should have received a copy of the GNU Lesser General Public
12 12 # License along with this library; if not, see
13 13 # <http://www.gnu.org/licenses/>.
14 14
15 15 # This file is part of urlgrabber, a high-level cross-protocol url-grabber
16 16 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
17 17
18 18 # Modified by Benoit Boissinot:
19 19 # - fix for digest auth (inspired from urllib2.py @ Python v2.4)
20 20 # Modified by Dirkjan Ochtman:
21 21 # - import md5 function from a local util module
22 22 # Modified by Augie Fackler:
23 23 # - add safesend method and use it to prevent broken pipe errors
24 24 # on large POST requests
25 25
26 26 """An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive.
27 27
28 28 >>> import urllib2
29 29 >>> from keepalive import HTTPHandler
30 30 >>> keepalive_handler = HTTPHandler()
31 31 >>> opener = urlreq.buildopener(keepalive_handler)
32 32 >>> urlreq.installopener(opener)
33 33 >>>
34 34 >>> fo = urlreq.urlopen('http://www.python.org')
35 35
36 36 If a connection to a given host is requested, and all of the existing
37 37 connections are still in use, another connection will be opened. If
38 38 the handler tries to use an existing connection but it fails in some
39 39 way, it will be closed and removed from the pool.
40 40
41 41 To remove the handler, simply re-run build_opener with no arguments, and
42 42 install that opener.
43 43
44 44 You can explicitly close connections by using the close_connection()
45 45 method of the returned file-like object (described below) or you can
46 46 use the handler methods:
47 47
48 48 close_connection(host)
49 49 close_all()
50 50 open_connections()
51 51
52 52 NOTE: using the close_connection and close_all methods of the handler
53 53 should be done with care when using multiple threads.
54 54 * there is nothing that prevents another thread from creating new
55 55 connections immediately after connections are closed
56 56 * no checks are done to prevent in-use connections from being closed
57 57
58 58 >>> keepalive_handler.close_all()
59 59
60 60 EXTRA ATTRIBUTES AND METHODS
61 61
62 62 Upon a status of 200, the object returned has a few additional
63 63 attributes and methods, which should not be used if you want to
64 64 remain consistent with the normal urllib2-returned objects:
65 65
66 66 close_connection() - close the connection to the host
67 67 readlines() - you know, readlines()
68 68 status - the return status (i.e. 404)
69 69 reason - english translation of status (i.e. 'File not found')
70 70
71 71 If you want the best of both worlds, use this inside an
72 72 AttributeError-catching try:
73 73
74 74 >>> try: status = fo.status
75 75 >>> except AttributeError: status = None
76 76
77 77 Unfortunately, these are ONLY there if status == 200, so it's not
78 78 easy to distinguish between non-200 responses. The reason is that
79 79 urllib2 tries to do clever things with error codes 301, 302, 401,
80 80 and 407, and it wraps the object upon return.
81 81 """
82 82
83 83 # $Id: keepalive.py,v 1.14 2006/04/04 21:00:32 mstenner Exp $
84 84
85 85 from __future__ import absolute_import, print_function
86 86
87 87 import errno
88 88 import hashlib
89 89 import socket
90 90 import sys
91 91 import threading
92 92
93 93 from .i18n import _
94 94 from . import (
95 95 node,
96 96 pycompat,
97 97 urllibcompat,
98 98 util,
99 99 )
100 100 from .utils import (
101 101 procutil,
102 102 )
103 103
104 104 httplib = util.httplib
105 105 urlerr = util.urlerr
106 106 urlreq = util.urlreq
107 107
108 108 DEBUG = None
109 109
110 110 class ConnectionManager(object):
111 111 """
112 112 The connection manager must be able to:
113 113 * keep track of all existing
114 114 """
115 115 def __init__(self):
116 116 self._lock = threading.Lock()
117 117 self._hostmap = {} # map hosts to a list of connections
118 118 self._connmap = {} # map connections to host
119 119 self._readymap = {} # map connection to ready state
120 120
121 121 def add(self, host, connection, ready):
122 122 self._lock.acquire()
123 123 try:
124 124 if host not in self._hostmap:
125 125 self._hostmap[host] = []
126 126 self._hostmap[host].append(connection)
127 127 self._connmap[connection] = host
128 128 self._readymap[connection] = ready
129 129 finally:
130 130 self._lock.release()
131 131
132 132 def remove(self, connection):
133 133 self._lock.acquire()
134 134 try:
135 135 try:
136 136 host = self._connmap[connection]
137 137 except KeyError:
138 138 pass
139 139 else:
140 140 del self._connmap[connection]
141 141 del self._readymap[connection]
142 142 self._hostmap[host].remove(connection)
143 143 if not self._hostmap[host]:
144 144 del self._hostmap[host]
145 145 finally:
146 146 self._lock.release()
147 147
148 148 def set_ready(self, connection, ready):
149 149 try:
150 150 self._readymap[connection] = ready
151 151 except KeyError:
152 152 pass
153 153
154 154 def get_ready_conn(self, host):
155 155 conn = None
156 156 self._lock.acquire()
157 157 try:
158 158 if host in self._hostmap:
159 159 for c in self._hostmap[host]:
160 160 if self._readymap[c]:
161 161 self._readymap[c] = 0
162 162 conn = c
163 163 break
164 164 finally:
165 165 self._lock.release()
166 166 return conn
167 167
168 168 def get_all(self, host=None):
169 169 if host:
170 170 return list(self._hostmap.get(host, []))
171 171 else:
172 172 return dict(self._hostmap)
173 173
174 174 class KeepAliveHandler(object):
175 175 def __init__(self, timeout=None):
176 176 self._cm = ConnectionManager()
177 177 self._timeout = timeout
178 178 self.requestscount = 0
179 179 self.sentbytescount = 0
180 180
181 181 #### Connection Management
182 182 def open_connections(self):
183 183 """return a list of connected hosts and the number of connections
184 184 to each. [('foo.com:80', 2), ('bar.org', 1)]"""
185 185 return [(host, len(li)) for (host, li) in self._cm.get_all().items()]
186 186
187 187 def close_connection(self, host):
188 188 """close connection(s) to <host>
189 189 host is the host:port spec, as in 'www.cnn.com:8080' as passed in.
190 190 no error occurs if there is no connection to that host."""
191 191 for h in self._cm.get_all(host):
192 192 self._cm.remove(h)
193 193 h.close()
194 194
195 195 def close_all(self):
196 196 """close all open connections"""
197 197 for host, conns in self._cm.get_all().iteritems():
198 198 for h in conns:
199 199 self._cm.remove(h)
200 200 h.close()
201 201
202 202 def _request_closed(self, request, host, connection):
203 203 """tells us that this request is now closed and that the
204 204 connection is ready for another request"""
205 205 self._cm.set_ready(connection, 1)
206 206
207 207 def _remove_connection(self, host, connection, close=0):
208 208 if close:
209 209 connection.close()
210 210 self._cm.remove(connection)
211 211
212 212 #### Transaction Execution
213 213 def http_open(self, req):
214 214 return self.do_open(HTTPConnection, req)
215 215
216 216 def do_open(self, http_class, req):
217 217 host = urllibcompat.gethost(req)
218 218 if not host:
219 219 raise urlerr.urlerror('no host given')
220 220
221 221 try:
222 222 h = self._cm.get_ready_conn(host)
223 223 while h:
224 224 r = self._reuse_connection(h, req, host)
225 225
226 226 # if this response is non-None, then it worked and we're
227 227 # done. Break out, skipping the else block.
228 228 if r:
229 229 break
230 230
231 231 # connection is bad - possibly closed by server
232 232 # discard it and ask for the next free connection
233 233 h.close()
234 234 self._cm.remove(h)
235 235 h = self._cm.get_ready_conn(host)
236 236 else:
237 237 # no (working) free connections were found. Create a new one.
238 238 h = http_class(host, timeout=self._timeout)
239 239 if DEBUG:
240 240 DEBUG.info("creating new connection to %s (%d)",
241 241 host, id(h))
242 242 self._cm.add(host, h, 0)
243 243 self._start_transaction(h, req)
244 244 r = h.getresponse()
245 245 # The string form of BadStatusLine is the status line. Add some context
246 246 # to make the error message slightly more useful.
247 247 except httplib.BadStatusLine as err:
248 248 raise urlerr.urlerror(
249 249 _('bad HTTP status line: %s') % pycompat.sysbytes(err.line))
250 250 except (socket.error, httplib.HTTPException) as err:
251 251 raise urlerr.urlerror(err)
252 252
253 253 # If not a persistent connection, don't try to reuse it. Look
254 254 # for this using getattr() since vcr doesn't define this
255 255 # attribute, and in that case always close the connection.
256 256 if getattr(r, r'will_close', True):
257 257 self._cm.remove(h)
258 258
259 259 if DEBUG:
260 260 DEBUG.info("STATUS: %s, %s", r.status, r.reason)
261 261 r._handler = self
262 262 r._host = host
263 263 r._url = req.get_full_url()
264 264 r._connection = h
265 265 r.code = r.status
266 266 r.headers = r.msg
267 267 r.msg = r.reason
268 268
269 269 return r
270 270
271 271 def _reuse_connection(self, h, req, host):
272 272 """start the transaction with a re-used connection
273 273 return a response object (r) upon success or None on failure.
274 274 This DOES not close or remove bad connections in cases where
275 275 it returns. However, if an unexpected exception occurs, it
276 276 will close and remove the connection before re-raising.
277 277 """
278 278 try:
279 279 self._start_transaction(h, req)
280 280 r = h.getresponse()
281 281 # note: just because we got something back doesn't mean it
282 282 # worked. We'll check the version below, too.
283 283 except (socket.error, httplib.HTTPException):
284 284 r = None
285 285 except: # re-raises
286 286 # adding this block just in case we've missed
287 287 # something we will still raise the exception, but
288 288 # lets try and close the connection and remove it
289 289 # first. We previously got into a nasty loop
290 290 # where an exception was uncaught, and so the
291 291 # connection stayed open. On the next try, the
292 292 # same exception was raised, etc. The trade-off is
293 293 # that it's now possible this call will raise
294 294 # a DIFFERENT exception
295 295 if DEBUG:
296 296 DEBUG.error("unexpected exception - closing "
297 297 "connection to %s (%d)", host, id(h))
298 298 self._cm.remove(h)
299 299 h.close()
300 300 raise
301 301
302 302 if r is None or r.version == 9:
303 303 # httplib falls back to assuming HTTP 0.9 if it gets a
304 304 # bad header back. This is most likely to happen if
305 305 # the socket has been closed by the server since we
306 306 # last used the connection.
307 307 if DEBUG:
308 308 DEBUG.info("failed to re-use connection to %s (%d)",
309 309 host, id(h))
310 310 r = None
311 311 else:
312 312 if DEBUG:
313 313 DEBUG.info("re-using connection to %s (%d)", host, id(h))
314 314
315 315 return r
316 316
317 317 def _start_transaction(self, h, req):
318 318 oldbytescount = getattr(h, 'sentbytescount', 0)
319 319
320 320 # What follows mostly reimplements HTTPConnection.request()
321 321 # except it adds self.parent.addheaders in the mix and sends headers
322 322 # in a deterministic order (to make testing easier).
323 323 headers = util.sortdict(self.parent.addheaders)
324 324 headers.update(sorted(req.headers.items()))
325 325 headers.update(sorted(req.unredirected_hdrs.items()))
326 326 headers = util.sortdict((n.lower(), v) for n, v in headers.items())
327 327 skipheaders = {}
328 328 for n in (r'host', r'accept-encoding'):
329 329 if n in headers:
330 330 skipheaders[r'skip_' + n.replace(r'-', r'_')] = 1
331 331 try:
332 332 if urllibcompat.hasdata(req):
333 333 data = urllibcompat.getdata(req)
334 334 h.putrequest(
335 335 req.get_method(), urllibcompat.getselector(req),
336 336 **skipheaders)
337 337 if r'content-type' not in headers:
338 338 h.putheader(r'Content-type',
339 339 r'application/x-www-form-urlencoded')
340 340 if r'content-length' not in headers:
341 341 h.putheader(r'Content-length', r'%d' % len(data))
342 342 else:
343 343 h.putrequest(
344 344 req.get_method(), urllibcompat.getselector(req),
345 345 **skipheaders)
346 346 except socket.error as err:
347 347 raise urlerr.urlerror(err)
348 348 for k, v in headers.items():
349 349 h.putheader(k, v)
350 350 h.endheaders()
351 351 if urllibcompat.hasdata(req):
352 352 h.send(data)
353 353
354 354 # This will fail to record events in case of I/O failure. That's OK.
355 355 self.requestscount += 1
356 356 self.sentbytescount += getattr(h, 'sentbytescount', 0) - oldbytescount
357 357
358 358 try:
359 359 self.parent.requestscount += 1
360 360 self.parent.sentbytescount += (
361 361 getattr(h, 'sentbytescount', 0) - oldbytescount)
362 362 except AttributeError:
363 363 pass
364 364
365 365 class HTTPHandler(KeepAliveHandler, urlreq.httphandler):
366 366 pass
367 367
368 368 class HTTPResponse(httplib.HTTPResponse):
369 369 # we need to subclass HTTPResponse in order to
370 370 # 1) add readline(), readlines(), and readinto() methods
371 371 # 2) add close_connection() methods
372 372 # 3) add info() and geturl() methods
373 373
374 374 # in order to add readline(), read must be modified to deal with a
375 375 # buffer. example: readline must read a buffer and then spit back
376 376 # one line at a time. The only real alternative is to read one
377 377 # BYTE at a time (ick). Once something has been read, it can't be
378 378 # put back (ok, maybe it can, but that's even uglier than this),
379 379 # so if you THEN do a normal read, you must first take stuff from
380 380 # the buffer.
381 381
382 382 # the read method wraps the original to accommodate buffering,
383 383 # although read() never adds to the buffer.
384 384 # Both readline and readlines have been stolen with almost no
385 385 # modification from socket.py
386 386
387 387
388 388 def __init__(self, sock, debuglevel=0, strict=0, method=None):
389 389 extrakw = {}
390 390 if not pycompat.ispy3:
391 391 extrakw[r'strict'] = True
392 392 extrakw[r'buffering'] = True
393 393 httplib.HTTPResponse.__init__(self, sock, debuglevel=debuglevel,
394 394 method=method, **extrakw)
395 395 self.fileno = sock.fileno
396 396 self.code = None
397 397 self.receivedbytescount = 0
398 398 self._rbuf = ''
399 399 self._rbufsize = 8096
400 400 self._handler = None # inserted by the handler later
401 401 self._host = None # (same)
402 402 self._url = None # (same)
403 403 self._connection = None # (same)
404 404
405 405 _raw_read = httplib.HTTPResponse.read
406 406 _raw_readinto = getattr(httplib.HTTPResponse, 'readinto', None)
407 407
408 408 def close(self):
409 409 if self.fp:
410 410 self.fp.close()
411 411 self.fp = None
412 412 if self._handler:
413 413 self._handler._request_closed(self, self._host,
414 414 self._connection)
415 415
416 416 def close_connection(self):
417 417 self._handler._remove_connection(self._host, self._connection, close=1)
418 418 self.close()
419 419
420 420 def info(self):
421 421 return self.headers
422 422
423 423 def geturl(self):
424 424 return self._url
425 425
426 426 def read(self, amt=None):
427 427 # the _rbuf test is only in this first if for speed. It's not
428 428 # logically necessary
429 429 if self._rbuf and amt is not None:
430 430 L = len(self._rbuf)
431 431 if amt > L:
432 432 amt -= L
433 433 else:
434 434 s = self._rbuf[:amt]
435 435 self._rbuf = self._rbuf[amt:]
436 436 return s
437 437 # Careful! http.client.HTTPResponse.read() on Python 3 is
438 438 # implemented using readinto(), which can duplicate self._rbuf
439 439 # if it's not empty.
440 440 s = self._rbuf
441 441 self._rbuf = ''
442 442 data = self._raw_read(amt)
443 443
444 444 self.receivedbytescount += len(data)
445 445 try:
446 446 self._connection.receivedbytescount += len(data)
447 447 except AttributeError:
448 448 pass
449 449 try:
450 450 self._handler.parent.receivedbytescount += len(data)
451 451 except AttributeError:
452 452 pass
453 453
454 454 s += data
455 455 return s
456 456
457 457 # stolen from Python SVN #68532 to fix issue1088
458 458 def _read_chunked(self, amt):
459 459 chunk_left = self.chunk_left
460 460 parts = []
461 461
462 462 while True:
463 463 if chunk_left is None:
464 464 line = self.fp.readline()
465 465 i = line.find(';')
466 466 if i >= 0:
467 467 line = line[:i] # strip chunk-extensions
468 468 try:
469 469 chunk_left = int(line, 16)
470 470 except ValueError:
471 471 # close the connection as protocol synchronization is
472 472 # probably lost
473 473 self.close()
474 474 raise httplib.IncompleteRead(''.join(parts))
475 475 if chunk_left == 0:
476 476 break
477 477 if amt is None:
478 478 parts.append(self._safe_read(chunk_left))
479 479 elif amt < chunk_left:
480 480 parts.append(self._safe_read(amt))
481 481 self.chunk_left = chunk_left - amt
482 482 return ''.join(parts)
483 483 elif amt == chunk_left:
484 484 parts.append(self._safe_read(amt))
485 485 self._safe_read(2) # toss the CRLF at the end of the chunk
486 486 self.chunk_left = None
487 487 return ''.join(parts)
488 488 else:
489 489 parts.append(self._safe_read(chunk_left))
490 490 amt -= chunk_left
491 491
492 492 # we read the whole chunk, get another
493 493 self._safe_read(2) # toss the CRLF at the end of the chunk
494 494 chunk_left = None
495 495
496 496 # read and discard trailer up to the CRLF terminator
497 497 ### note: we shouldn't have any trailers!
498 498 while True:
499 499 line = self.fp.readline()
500 500 if not line:
501 501 # a vanishingly small number of sites EOF without
502 502 # sending the trailer
503 503 break
504 504 if line == '\r\n':
505 505 break
506 506
507 507 # we read everything; close the "file"
508 508 self.close()
509 509
510 510 return ''.join(parts)
511 511
512 512 def readline(self):
513 513 # Fast path for a line is already available in read buffer.
514 514 i = self._rbuf.find('\n')
515 515 if i >= 0:
516 516 i += 1
517 517 line = self._rbuf[:i]
518 518 self._rbuf = self._rbuf[i:]
519 519 return line
520 520
521 521 # No newline in local buffer. Read until we find one.
522 522 chunks = [self._rbuf]
523 523 i = -1
524 524 readsize = self._rbufsize
525 525 while True:
526 526 new = self._raw_read(readsize)
527 527 if not new:
528 528 break
529 529
530 530 self.receivedbytescount += len(new)
531 531 self._connection.receivedbytescount += len(new)
532 532 try:
533 533 self._handler.parent.receivedbytescount += len(new)
534 534 except AttributeError:
535 535 pass
536 536
537 537 chunks.append(new)
538 538 i = new.find('\n')
539 539 if i >= 0:
540 540 break
541 541
542 542 # We either have exhausted the stream or have a newline in chunks[-1].
543 543
544 544 # EOF
545 545 if i == -1:
546 546 self._rbuf = ''
547 547 return ''.join(chunks)
548 548
549 549 i += 1
550 550 self._rbuf = chunks[-1][i:]
551 551 chunks[-1] = chunks[-1][:i]
552 552 return ''.join(chunks)
553 553
554 554 def readlines(self, sizehint=0):
555 555 total = 0
556 556 list = []
557 557 while True:
558 558 line = self.readline()
559 559 if not line:
560 560 break
561 561 list.append(line)
562 562 total += len(line)
563 563 if sizehint and total >= sizehint:
564 564 break
565 565 return list
566 566
567 567 def readinto(self, dest):
568 568 if self._raw_readinto is None:
569 569 res = self.read(len(dest))
570 570 if not res:
571 571 return 0
572 572 dest[0:len(res)] = res
573 573 return len(res)
574 574 total = len(dest)
575 575 have = len(self._rbuf)
576 576 if have >= total:
577 577 dest[0:total] = self._rbuf[:total]
578 578 self._rbuf = self._rbuf[total:]
579 579 return total
580 580 mv = memoryview(dest)
581 581 got = self._raw_readinto(mv[have:total])
582 582
583 583 self.receivedbytescount += got
584 584 self._connection.receivedbytescount += got
585 585 try:
586 586 self._handler.receivedbytescount += got
587 587 except AttributeError:
588 588 pass
589 589
590 590 dest[0:have] = self._rbuf
591 591 got += len(self._rbuf)
592 592 self._rbuf = ''
593 593 return got
594 594
595 595 def safesend(self, str):
596 596 """Send `str' to the server.
597 597
598 598 Shamelessly ripped off from httplib to patch a bad behavior.
599 599 """
600 600 # _broken_pipe_resp is an attribute we set in this function
601 601 # if the socket is closed while we're sending data but
602 602 # the server sent us a response before hanging up.
603 603 # In that case, we want to pretend to send the rest of the
604 604 # outgoing data, and then let the user use getresponse()
605 605 # (which we wrap) to get this last response before
606 606 # opening a new socket.
607 607 if getattr(self, '_broken_pipe_resp', None) is not None:
608 608 return
609 609
610 610 if self.sock is None:
611 611 if self.auto_open:
612 612 self.connect()
613 613 else:
614 614 raise httplib.NotConnected
615 615
616 616 # send the data to the server. if we get a broken pipe, then close
617 617 # the socket. we want to reconnect when somebody tries to send again.
618 618 #
619 619 # NOTE: we DO propagate the error, though, because we cannot simply
620 620 # ignore the error... the caller will know if they can retry.
621 621 if self.debuglevel > 0:
622 622 print("send:", repr(str))
623 623 try:
624 624 blocksize = 8192
625 625 read = getattr(str, 'read', None)
626 626 if read is not None:
627 627 if self.debuglevel > 0:
628 628 print("sending a read()able")
629 629 data = read(blocksize)
630 630 while data:
631 631 self.sock.sendall(data)
632 632 self.sentbytescount += len(data)
633 633 data = read(blocksize)
634 634 else:
635 635 self.sock.sendall(str)
636 636 self.sentbytescount += len(str)
637 637 except socket.error as v:
638 638 reraise = True
639 if v[0] == errno.EPIPE: # Broken pipe
639 if v.args[0] == errno.EPIPE: # Broken pipe
640 640 if self._HTTPConnection__state == httplib._CS_REQ_SENT:
641 641 self._broken_pipe_resp = None
642 642 self._broken_pipe_resp = self.getresponse()
643 643 reraise = False
644 644 self.close()
645 645 if reraise:
646 646 raise
647 647
648 648 def wrapgetresponse(cls):
649 649 """Wraps getresponse in cls with a broken-pipe sane version.
650 650 """
651 651 def safegetresponse(self):
652 652 # In safesend() we might set the _broken_pipe_resp
653 653 # attribute, in which case the socket has already
654 654 # been closed and we just need to give them the response
655 655 # back. Otherwise, we use the normal response path.
656 656 r = getattr(self, '_broken_pipe_resp', None)
657 657 if r is not None:
658 658 return r
659 659 return cls.getresponse(self)
660 660 safegetresponse.__doc__ = cls.getresponse.__doc__
661 661 return safegetresponse
662 662
663 663 class HTTPConnection(httplib.HTTPConnection):
664 664 # url.httpsconnection inherits from this. So when adding/removing
665 665 # attributes, be sure to audit httpsconnection() for unintended
666 666 # consequences.
667 667
668 668 # use the modified response class
669 669 response_class = HTTPResponse
670 670 send = safesend
671 671 getresponse = wrapgetresponse(httplib.HTTPConnection)
672 672
673 673 def __init__(self, *args, **kwargs):
674 674 httplib.HTTPConnection.__init__(self, *args, **kwargs)
675 675 self.sentbytescount = 0
676 676 self.receivedbytescount = 0
677 677
678 678 #########################################################################
679 679 ##### TEST FUNCTIONS
680 680 #########################################################################
681 681
682 682
683 683 def continuity(url):
684 684 md5 = hashlib.md5
685 685 format = '%25s: %s'
686 686
687 687 # first fetch the file with the normal http handler
688 688 opener = urlreq.buildopener()
689 689 urlreq.installopener(opener)
690 690 fo = urlreq.urlopen(url)
691 691 foo = fo.read()
692 692 fo.close()
693 693 m = md5(foo)
694 694 print(format % ('normal urllib', node.hex(m.digest())))
695 695
696 696 # now install the keepalive handler and try again
697 697 opener = urlreq.buildopener(HTTPHandler())
698 698 urlreq.installopener(opener)
699 699
700 700 fo = urlreq.urlopen(url)
701 701 foo = fo.read()
702 702 fo.close()
703 703 m = md5(foo)
704 704 print(format % ('keepalive read', node.hex(m.digest())))
705 705
706 706 fo = urlreq.urlopen(url)
707 707 foo = ''
708 708 while True:
709 709 f = fo.readline()
710 710 if f:
711 711 foo = foo + f
712 712 else:
713 713 break
714 714 fo.close()
715 715 m = md5(foo)
716 716 print(format % ('keepalive readline', node.hex(m.digest())))
717 717
718 718 def comp(N, url):
719 719 print(' making %i connections to:\n %s' % (N, url))
720 720
721 721 procutil.stdout.write(' first using the normal urllib handlers')
722 722 # first use normal opener
723 723 opener = urlreq.buildopener()
724 724 urlreq.installopener(opener)
725 725 t1 = fetch(N, url)
726 726 print(' TIME: %.3f s' % t1)
727 727
728 728 procutil.stdout.write(' now using the keepalive handler ')
729 729 # now install the keepalive handler and try again
730 730 opener = urlreq.buildopener(HTTPHandler())
731 731 urlreq.installopener(opener)
732 732 t2 = fetch(N, url)
733 733 print(' TIME: %.3f s' % t2)
734 734 print(' improvement factor: %.2f' % (t1 / t2))
735 735
736 736 def fetch(N, url, delay=0):
737 737 import time
738 738 lens = []
739 739 starttime = time.time()
740 740 for i in range(N):
741 741 if delay and i > 0:
742 742 time.sleep(delay)
743 743 fo = urlreq.urlopen(url)
744 744 foo = fo.read()
745 745 fo.close()
746 746 lens.append(len(foo))
747 747 diff = time.time() - starttime
748 748
749 749 j = 0
750 750 for i in lens[1:]:
751 751 j = j + 1
752 752 if not i == lens[0]:
753 753 print("WARNING: inconsistent length on read %i: %i" % (j, i))
754 754
755 755 return diff
756 756
757 757 def test_timeout(url):
758 758 global DEBUG
759 759 dbbackup = DEBUG
760 760 class FakeLogger(object):
761 761 def debug(self, msg, *args):
762 762 print(msg % args)
763 763 info = warning = error = debug
764 764 DEBUG = FakeLogger()
765 765 print(" fetching the file to establish a connection")
766 766 fo = urlreq.urlopen(url)
767 767 data1 = fo.read()
768 768 fo.close()
769 769
770 770 i = 20
771 771 print(" waiting %i seconds for the server to close the connection" % i)
772 772 while i > 0:
773 773 procutil.stdout.write('\r %2i' % i)
774 774 procutil.stdout.flush()
775 775 time.sleep(1)
776 776 i -= 1
777 777 procutil.stderr.write('\r')
778 778
779 779 print(" fetching the file a second time")
780 780 fo = urlreq.urlopen(url)
781 781 data2 = fo.read()
782 782 fo.close()
783 783
784 784 if data1 == data2:
785 785 print(' data are identical')
786 786 else:
787 787 print(' ERROR: DATA DIFFER')
788 788
789 789 DEBUG = dbbackup
790 790
791 791
792 792 def test(url, N=10):
793 793 print("performing continuity test (making sure stuff isn't corrupted)")
794 794 continuity(url)
795 795 print('')
796 796 print("performing speed comparison")
797 797 comp(N, url)
798 798 print('')
799 799 print("performing dropped-connection check")
800 800 test_timeout(url)
801 801
802 802 if __name__ == '__main__':
803 803 import time
804 804 try:
805 805 N = int(sys.argv[1])
806 806 url = sys.argv[2]
807 807 except (IndexError, ValueError):
808 808 print("%s <integer> <url>" % sys.argv[0])
809 809 else:
810 810 test(url, N)
General Comments 0
You need to be logged in to leave comments. Login now