##// END OF EJS Templates
keepalive: handle broken pipes gracefully during large POSTs
Augie Fackler -
r9726:430e59ff default
parent child Browse files
Show More
@@ -1,671 +1,741
1 # This library is free software; you can redistribute it and/or
1 # This library is free software; you can redistribute it and/or
2 # modify it under the terms of the GNU Lesser General Public
2 # modify it under the terms of the GNU Lesser General Public
3 # License as published by the Free Software Foundation; either
3 # License as published by the Free Software Foundation; either
4 # version 2.1 of the License, or (at your option) any later version.
4 # version 2.1 of the License, or (at your option) any later version.
5 #
5 #
6 # This library is distributed in the hope that it will be useful,
6 # This library is distributed in the hope that it will be useful,
7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9 # Lesser General Public License for more details.
9 # Lesser General Public License for more details.
10 #
10 #
11 # You should have received a copy of the GNU Lesser General Public
11 # You should have received a copy of the GNU Lesser General Public
12 # License along with this library; if not, write to the
12 # License along with this library; if not, write to the
13 # Free Software Foundation, Inc.,
13 # Free Software Foundation, Inc.,
14 # 59 Temple Place, Suite 330,
14 # 59 Temple Place, Suite 330,
15 # Boston, MA 02111-1307 USA
15 # Boston, MA 02111-1307 USA
16
16
17 # This file is part of urlgrabber, a high-level cross-protocol url-grabber
17 # This file is part of urlgrabber, a high-level cross-protocol url-grabber
18 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
18 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
19
19
20 # Modified by Benoit Boissinot:
20 # Modified by Benoit Boissinot:
21 # - fix for digest auth (inspired from urllib2.py @ Python v2.4)
21 # - fix for digest auth (inspired from urllib2.py @ Python v2.4)
22 # Modified by Dirkjan Ochtman:
22 # Modified by Dirkjan Ochtman:
23 # - import md5 function from a local util module
23 # - import md5 function from a local util module
24 # Modified by Martin Geisler:
24 # Modified by Martin Geisler:
25 # - moved md5 function from local util module to this module
25 # - moved md5 function from local util module to this module
26 # Modified by Augie Fackler:
27 # - add safesend method and use it to prevent broken pipe errors
28 # on large POST requests
26
29
27 """An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive.
30 """An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive.
28
31
29 >>> import urllib2
32 >>> import urllib2
30 >>> from keepalive import HTTPHandler
33 >>> from keepalive import HTTPHandler
31 >>> keepalive_handler = HTTPHandler()
34 >>> keepalive_handler = HTTPHandler()
32 >>> opener = urllib2.build_opener(keepalive_handler)
35 >>> opener = urllib2.build_opener(keepalive_handler)
33 >>> urllib2.install_opener(opener)
36 >>> urllib2.install_opener(opener)
34 >>>
37 >>>
35 >>> fo = urllib2.urlopen('http://www.python.org')
38 >>> fo = urllib2.urlopen('http://www.python.org')
36
39
37 If a connection to a given host is requested, and all of the existing
40 If a connection to a given host is requested, and all of the existing
38 connections are still in use, another connection will be opened. If
41 connections are still in use, another connection will be opened. If
39 the handler tries to use an existing connection but it fails in some
42 the handler tries to use an existing connection but it fails in some
40 way, it will be closed and removed from the pool.
43 way, it will be closed and removed from the pool.
41
44
42 To remove the handler, simply re-run build_opener with no arguments, and
45 To remove the handler, simply re-run build_opener with no arguments, and
43 install that opener.
46 install that opener.
44
47
45 You can explicitly close connections by using the close_connection()
48 You can explicitly close connections by using the close_connection()
46 method of the returned file-like object (described below) or you can
49 method of the returned file-like object (described below) or you can
47 use the handler methods:
50 use the handler methods:
48
51
49 close_connection(host)
52 close_connection(host)
50 close_all()
53 close_all()
51 open_connections()
54 open_connections()
52
55
53 NOTE: using the close_connection and close_all methods of the handler
56 NOTE: using the close_connection and close_all methods of the handler
54 should be done with care when using multiple threads.
57 should be done with care when using multiple threads.
55 * there is nothing that prevents another thread from creating new
58 * there is nothing that prevents another thread from creating new
56 connections immediately after connections are closed
59 connections immediately after connections are closed
57 * no checks are done to prevent in-use connections from being closed
60 * no checks are done to prevent in-use connections from being closed
58
61
59 >>> keepalive_handler.close_all()
62 >>> keepalive_handler.close_all()
60
63
61 EXTRA ATTRIBUTES AND METHODS
64 EXTRA ATTRIBUTES AND METHODS
62
65
63 Upon a status of 200, the object returned has a few additional
66 Upon a status of 200, the object returned has a few additional
64 attributes and methods, which should not be used if you want to
67 attributes and methods, which should not be used if you want to
65 remain consistent with the normal urllib2-returned objects:
68 remain consistent with the normal urllib2-returned objects:
66
69
67 close_connection() - close the connection to the host
70 close_connection() - close the connection to the host
68 readlines() - you know, readlines()
71 readlines() - you know, readlines()
69 status - the return status (ie 404)
72 status - the return status (ie 404)
70 reason - english translation of status (ie 'File not found')
73 reason - english translation of status (ie 'File not found')
71
74
72 If you want the best of both worlds, use this inside an
75 If you want the best of both worlds, use this inside an
73 AttributeError-catching try:
76 AttributeError-catching try:
74
77
75 >>> try: status = fo.status
78 >>> try: status = fo.status
76 >>> except AttributeError: status = None
79 >>> except AttributeError: status = None
77
80
78 Unfortunately, these are ONLY there if status == 200, so it's not
81 Unfortunately, these are ONLY there if status == 200, so it's not
79 easy to distinguish between non-200 responses. The reason is that
82 easy to distinguish between non-200 responses. The reason is that
80 urllib2 tries to do clever things with error codes 301, 302, 401,
83 urllib2 tries to do clever things with error codes 301, 302, 401,
81 and 407, and it wraps the object upon return.
84 and 407, and it wraps the object upon return.
82
85
83 For python versions earlier than 2.4, you can avoid this fancy error
86 For python versions earlier than 2.4, you can avoid this fancy error
84 handling by setting the module-level global HANDLE_ERRORS to zero.
87 handling by setting the module-level global HANDLE_ERRORS to zero.
85 You see, prior to 2.4, it's the HTTP Handler's job to determine what
88 You see, prior to 2.4, it's the HTTP Handler's job to determine what
86 to handle specially, and what to just pass up. HANDLE_ERRORS == 0
89 to handle specially, and what to just pass up. HANDLE_ERRORS == 0
87 means "pass everything up". In python 2.4, however, this job no
90 means "pass everything up". In python 2.4, however, this job no
88 longer belongs to the HTTP Handler and is now done by a NEW handler,
91 longer belongs to the HTTP Handler and is now done by a NEW handler,
89 HTTPErrorProcessor. Here's the bottom line:
92 HTTPErrorProcessor. Here's the bottom line:
90
93
91 python version < 2.4
94 python version < 2.4
92 HANDLE_ERRORS == 1 (default) pass up 200, treat the rest as
95 HANDLE_ERRORS == 1 (default) pass up 200, treat the rest as
93 errors
96 errors
94 HANDLE_ERRORS == 0 pass everything up, error processing is
97 HANDLE_ERRORS == 0 pass everything up, error processing is
95 left to the calling code
98 left to the calling code
96 python version >= 2.4
99 python version >= 2.4
97 HANDLE_ERRORS == 1 pass up 200, treat the rest as errors
100 HANDLE_ERRORS == 1 pass up 200, treat the rest as errors
98 HANDLE_ERRORS == 0 (default) pass everything up, let the
101 HANDLE_ERRORS == 0 (default) pass everything up, let the
99 other handlers (specifically,
102 other handlers (specifically,
100 HTTPErrorProcessor) decide what to do
103 HTTPErrorProcessor) decide what to do
101
104
102 In practice, setting the variable either way makes little difference
105 In practice, setting the variable either way makes little difference
103 in python 2.4, so for the most consistent behavior across versions,
106 in python 2.4, so for the most consistent behavior across versions,
104 you probably just want to use the defaults, which will give you
107 you probably just want to use the defaults, which will give you
105 exceptions on errors.
108 exceptions on errors.
106
109
107 """
110 """
108
111
109 # $Id: keepalive.py,v 1.14 2006/04/04 21:00:32 mstenner Exp $
112 # $Id: keepalive.py,v 1.14 2006/04/04 21:00:32 mstenner Exp $
110
113
111 import urllib2
114 import errno
112 import httplib
115 import httplib
113 import socket
116 import socket
114 import thread
117 import thread
118 import urllib2
115
119
116 DEBUG = None
120 DEBUG = None
117
121
118 import sys
122 import sys
119 if sys.version_info < (2, 4): HANDLE_ERRORS = 1
123 if sys.version_info < (2, 4): HANDLE_ERRORS = 1
120 else: HANDLE_ERRORS = 0
124 else: HANDLE_ERRORS = 0
121
125
122 class ConnectionManager:
126 class ConnectionManager:
123 """
127 """
124 The connection manager must be able to:
128 The connection manager must be able to:
125 * keep track of all existing
129 * keep track of all existing
126 """
130 """
127 def __init__(self):
131 def __init__(self):
128 self._lock = thread.allocate_lock()
132 self._lock = thread.allocate_lock()
129 self._hostmap = {} # map hosts to a list of connections
133 self._hostmap = {} # map hosts to a list of connections
130 self._connmap = {} # map connections to host
134 self._connmap = {} # map connections to host
131 self._readymap = {} # map connection to ready state
135 self._readymap = {} # map connection to ready state
132
136
133 def add(self, host, connection, ready):
137 def add(self, host, connection, ready):
134 self._lock.acquire()
138 self._lock.acquire()
135 try:
139 try:
136 if not host in self._hostmap: self._hostmap[host] = []
140 if not host in self._hostmap: self._hostmap[host] = []
137 self._hostmap[host].append(connection)
141 self._hostmap[host].append(connection)
138 self._connmap[connection] = host
142 self._connmap[connection] = host
139 self._readymap[connection] = ready
143 self._readymap[connection] = ready
140 finally:
144 finally:
141 self._lock.release()
145 self._lock.release()
142
146
143 def remove(self, connection):
147 def remove(self, connection):
144 self._lock.acquire()
148 self._lock.acquire()
145 try:
149 try:
146 try:
150 try:
147 host = self._connmap[connection]
151 host = self._connmap[connection]
148 except KeyError:
152 except KeyError:
149 pass
153 pass
150 else:
154 else:
151 del self._connmap[connection]
155 del self._connmap[connection]
152 del self._readymap[connection]
156 del self._readymap[connection]
153 self._hostmap[host].remove(connection)
157 self._hostmap[host].remove(connection)
154 if not self._hostmap[host]: del self._hostmap[host]
158 if not self._hostmap[host]: del self._hostmap[host]
155 finally:
159 finally:
156 self._lock.release()
160 self._lock.release()
157
161
158 def set_ready(self, connection, ready):
162 def set_ready(self, connection, ready):
159 try: self._readymap[connection] = ready
163 try: self._readymap[connection] = ready
160 except KeyError: pass
164 except KeyError: pass
161
165
162 def get_ready_conn(self, host):
166 def get_ready_conn(self, host):
163 conn = None
167 conn = None
164 self._lock.acquire()
168 self._lock.acquire()
165 try:
169 try:
166 if host in self._hostmap:
170 if host in self._hostmap:
167 for c in self._hostmap[host]:
171 for c in self._hostmap[host]:
168 if self._readymap[c]:
172 if self._readymap[c]:
169 self._readymap[c] = 0
173 self._readymap[c] = 0
170 conn = c
174 conn = c
171 break
175 break
172 finally:
176 finally:
173 self._lock.release()
177 self._lock.release()
174 return conn
178 return conn
175
179
176 def get_all(self, host=None):
180 def get_all(self, host=None):
177 if host:
181 if host:
178 return list(self._hostmap.get(host, []))
182 return list(self._hostmap.get(host, []))
179 else:
183 else:
180 return dict(self._hostmap)
184 return dict(self._hostmap)
181
185
182 class KeepAliveHandler:
186 class KeepAliveHandler:
183 def __init__(self):
187 def __init__(self):
184 self._cm = ConnectionManager()
188 self._cm = ConnectionManager()
185
189
186 #### Connection Management
190 #### Connection Management
187 def open_connections(self):
191 def open_connections(self):
188 """return a list of connected hosts and the number of connections
192 """return a list of connected hosts and the number of connections
189 to each. [('foo.com:80', 2), ('bar.org', 1)]"""
193 to each. [('foo.com:80', 2), ('bar.org', 1)]"""
190 return [(host, len(li)) for (host, li) in self._cm.get_all().items()]
194 return [(host, len(li)) for (host, li) in self._cm.get_all().items()]
191
195
192 def close_connection(self, host):
196 def close_connection(self, host):
193 """close connection(s) to <host>
197 """close connection(s) to <host>
194 host is the host:port spec, as in 'www.cnn.com:8080' as passed in.
198 host is the host:port spec, as in 'www.cnn.com:8080' as passed in.
195 no error occurs if there is no connection to that host."""
199 no error occurs if there is no connection to that host."""
196 for h in self._cm.get_all(host):
200 for h in self._cm.get_all(host):
197 self._cm.remove(h)
201 self._cm.remove(h)
198 h.close()
202 h.close()
199
203
200 def close_all(self):
204 def close_all(self):
201 """close all open connections"""
205 """close all open connections"""
202 for host, conns in self._cm.get_all().iteritems():
206 for host, conns in self._cm.get_all().iteritems():
203 for h in conns:
207 for h in conns:
204 self._cm.remove(h)
208 self._cm.remove(h)
205 h.close()
209 h.close()
206
210
207 def _request_closed(self, request, host, connection):
211 def _request_closed(self, request, host, connection):
208 """tells us that this request is now closed and the the
212 """tells us that this request is now closed and the the
209 connection is ready for another request"""
213 connection is ready for another request"""
210 self._cm.set_ready(connection, 1)
214 self._cm.set_ready(connection, 1)
211
215
212 def _remove_connection(self, host, connection, close=0):
216 def _remove_connection(self, host, connection, close=0):
213 if close: connection.close()
217 if close: connection.close()
214 self._cm.remove(connection)
218 self._cm.remove(connection)
215
219
216 #### Transaction Execution
220 #### Transaction Execution
217 def http_open(self, req):
221 def http_open(self, req):
218 return self.do_open(HTTPConnection, req)
222 return self.do_open(HTTPConnection, req)
219
223
220 def do_open(self, http_class, req):
224 def do_open(self, http_class, req):
221 host = req.get_host()
225 host = req.get_host()
222 if not host:
226 if not host:
223 raise urllib2.URLError('no host given')
227 raise urllib2.URLError('no host given')
224
228
225 try:
229 try:
226 h = self._cm.get_ready_conn(host)
230 h = self._cm.get_ready_conn(host)
227 while h:
231 while h:
228 r = self._reuse_connection(h, req, host)
232 r = self._reuse_connection(h, req, host)
229
233
230 # if this response is non-None, then it worked and we're
234 # if this response is non-None, then it worked and we're
231 # done. Break out, skipping the else block.
235 # done. Break out, skipping the else block.
232 if r: break
236 if r: break
233
237
234 # connection is bad - possibly closed by server
238 # connection is bad - possibly closed by server
235 # discard it and ask for the next free connection
239 # discard it and ask for the next free connection
236 h.close()
240 h.close()
237 self._cm.remove(h)
241 self._cm.remove(h)
238 h = self._cm.get_ready_conn(host)
242 h = self._cm.get_ready_conn(host)
239 else:
243 else:
240 # no (working) free connections were found. Create a new one.
244 # no (working) free connections were found. Create a new one.
241 h = http_class(host)
245 h = http_class(host)
242 if DEBUG: DEBUG.info("creating new connection to %s (%d)",
246 if DEBUG: DEBUG.info("creating new connection to %s (%d)",
243 host, id(h))
247 host, id(h))
244 self._cm.add(host, h, 0)
248 self._cm.add(host, h, 0)
245 self._start_transaction(h, req)
249 self._start_transaction(h, req)
246 r = h.getresponse()
250 r = h.getresponse()
247 except (socket.error, httplib.HTTPException), err:
251 except (socket.error, httplib.HTTPException), err:
248 raise urllib2.URLError(err)
252 raise urllib2.URLError(err)
249
253
250 # if not a persistent connection, don't try to reuse it
254 # if not a persistent connection, don't try to reuse it
251 if r.will_close: self._cm.remove(h)
255 if r.will_close: self._cm.remove(h)
252
256
253 if DEBUG: DEBUG.info("STATUS: %s, %s", r.status, r.reason)
257 if DEBUG: DEBUG.info("STATUS: %s, %s", r.status, r.reason)
254 r._handler = self
258 r._handler = self
255 r._host = host
259 r._host = host
256 r._url = req.get_full_url()
260 r._url = req.get_full_url()
257 r._connection = h
261 r._connection = h
258 r.code = r.status
262 r.code = r.status
259 r.headers = r.msg
263 r.headers = r.msg
260 r.msg = r.reason
264 r.msg = r.reason
261
265
262 if r.status == 200 or not HANDLE_ERRORS:
266 if r.status == 200 or not HANDLE_ERRORS:
263 return r
267 return r
264 else:
268 else:
265 return self.parent.error('http', req, r,
269 return self.parent.error('http', req, r,
266 r.status, r.msg, r.headers)
270 r.status, r.msg, r.headers)
267
271
268 def _reuse_connection(self, h, req, host):
272 def _reuse_connection(self, h, req, host):
269 """start the transaction with a re-used connection
273 """start the transaction with a re-used connection
270 return a response object (r) upon success or None on failure.
274 return a response object (r) upon success or None on failure.
271 This DOES not close or remove bad connections in cases where
275 This DOES not close or remove bad connections in cases where
272 it returns. However, if an unexpected exception occurs, it
276 it returns. However, if an unexpected exception occurs, it
273 will close and remove the connection before re-raising.
277 will close and remove the connection before re-raising.
274 """
278 """
275 try:
279 try:
276 self._start_transaction(h, req)
280 self._start_transaction(h, req)
277 r = h.getresponse()
281 r = h.getresponse()
278 # note: just because we got something back doesn't mean it
282 # note: just because we got something back doesn't mean it
279 # worked. We'll check the version below, too.
283 # worked. We'll check the version below, too.
280 except (socket.error, httplib.HTTPException):
284 except (socket.error, httplib.HTTPException):
281 r = None
285 r = None
282 except:
286 except:
283 # adding this block just in case we've missed
287 # adding this block just in case we've missed
284 # something we will still raise the exception, but
288 # something we will still raise the exception, but
285 # lets try and close the connection and remove it
289 # lets try and close the connection and remove it
286 # first. We previously got into a nasty loop
290 # first. We previously got into a nasty loop
287 # where an exception was uncaught, and so the
291 # where an exception was uncaught, and so the
288 # connection stayed open. On the next try, the
292 # connection stayed open. On the next try, the
289 # same exception was raised, etc. The tradeoff is
293 # same exception was raised, etc. The tradeoff is
290 # that it's now possible this call will raise
294 # that it's now possible this call will raise
291 # a DIFFERENT exception
295 # a DIFFERENT exception
292 if DEBUG: DEBUG.error("unexpected exception - closing " + \
296 if DEBUG: DEBUG.error("unexpected exception - closing " + \
293 "connection to %s (%d)", host, id(h))
297 "connection to %s (%d)", host, id(h))
294 self._cm.remove(h)
298 self._cm.remove(h)
295 h.close()
299 h.close()
296 raise
300 raise
297
301
298 if r is None or r.version == 9:
302 if r is None or r.version == 9:
299 # httplib falls back to assuming HTTP 0.9 if it gets a
303 # httplib falls back to assuming HTTP 0.9 if it gets a
300 # bad header back. This is most likely to happen if
304 # bad header back. This is most likely to happen if
301 # the socket has been closed by the server since we
305 # the socket has been closed by the server since we
302 # last used the connection.
306 # last used the connection.
303 if DEBUG: DEBUG.info("failed to re-use connection to %s (%d)",
307 if DEBUG: DEBUG.info("failed to re-use connection to %s (%d)",
304 host, id(h))
308 host, id(h))
305 r = None
309 r = None
306 else:
310 else:
307 if DEBUG: DEBUG.info("re-using connection to %s (%d)", host, id(h))
311 if DEBUG: DEBUG.info("re-using connection to %s (%d)", host, id(h))
308
312
309 return r
313 return r
310
314
311 def _start_transaction(self, h, req):
315 def _start_transaction(self, h, req):
312 # What follows mostly reimplements HTTPConnection.request()
316 # What follows mostly reimplements HTTPConnection.request()
313 # except it adds self.parent.addheaders in the mix.
317 # except it adds self.parent.addheaders in the mix.
314 headers = req.headers.copy()
318 headers = req.headers.copy()
315 if sys.version_info >= (2, 4):
319 if sys.version_info >= (2, 4):
316 headers.update(req.unredirected_hdrs)
320 headers.update(req.unredirected_hdrs)
317 headers.update(self.parent.addheaders)
321 headers.update(self.parent.addheaders)
318 headers = dict((n.lower(), v) for n,v in headers.items())
322 headers = dict((n.lower(), v) for n,v in headers.items())
319 skipheaders = {}
323 skipheaders = {}
320 for n in ('host', 'accept-encoding'):
324 for n in ('host', 'accept-encoding'):
321 if n in headers:
325 if n in headers:
322 skipheaders['skip_' + n.replace('-', '_')] = 1
326 skipheaders['skip_' + n.replace('-', '_')] = 1
323 try:
327 try:
324 if req.has_data():
328 if req.has_data():
325 data = req.get_data()
329 data = req.get_data()
326 h.putrequest('POST', req.get_selector(), **skipheaders)
330 h.putrequest('POST', req.get_selector(), **skipheaders)
327 if 'content-type' not in headers:
331 if 'content-type' not in headers:
328 h.putheader('Content-type',
332 h.putheader('Content-type',
329 'application/x-www-form-urlencoded')
333 'application/x-www-form-urlencoded')
330 if 'content-length' not in headers:
334 if 'content-length' not in headers:
331 h.putheader('Content-length', '%d' % len(data))
335 h.putheader('Content-length', '%d' % len(data))
332 else:
336 else:
333 h.putrequest('GET', req.get_selector(), **skipheaders)
337 h.putrequest('GET', req.get_selector(), **skipheaders)
334 except (socket.error), err:
338 except (socket.error), err:
335 raise urllib2.URLError(err)
339 raise urllib2.URLError(err)
336 for k, v in headers.items():
340 for k, v in headers.items():
337 h.putheader(k, v)
341 h.putheader(k, v)
338 h.endheaders()
342 h.endheaders()
339 if req.has_data():
343 if req.has_data():
340 h.send(data)
344 h.send(data)
341
345
342 class HTTPHandler(KeepAliveHandler, urllib2.HTTPHandler):
346 class HTTPHandler(KeepAliveHandler, urllib2.HTTPHandler):
343 pass
347 pass
344
348
345 class HTTPResponse(httplib.HTTPResponse):
349 class HTTPResponse(httplib.HTTPResponse):
346 # we need to subclass HTTPResponse in order to
350 # we need to subclass HTTPResponse in order to
347 # 1) add readline() and readlines() methods
351 # 1) add readline() and readlines() methods
348 # 2) add close_connection() methods
352 # 2) add close_connection() methods
349 # 3) add info() and geturl() methods
353 # 3) add info() and geturl() methods
350
354
351 # in order to add readline(), read must be modified to deal with a
355 # in order to add readline(), read must be modified to deal with a
352 # buffer. example: readline must read a buffer and then spit back
356 # buffer. example: readline must read a buffer and then spit back
353 # one line at a time. The only real alternative is to read one
357 # one line at a time. The only real alternative is to read one
354 # BYTE at a time (ick). Once something has been read, it can't be
358 # BYTE at a time (ick). Once something has been read, it can't be
355 # put back (ok, maybe it can, but that's even uglier than this),
359 # put back (ok, maybe it can, but that's even uglier than this),
356 # so if you THEN do a normal read, you must first take stuff from
360 # so if you THEN do a normal read, you must first take stuff from
357 # the buffer.
361 # the buffer.
358
362
359 # the read method wraps the original to accomodate buffering,
363 # the read method wraps the original to accomodate buffering,
360 # although read() never adds to the buffer.
364 # although read() never adds to the buffer.
361 # Both readline and readlines have been stolen with almost no
365 # Both readline and readlines have been stolen with almost no
362 # modification from socket.py
366 # modification from socket.py
363
367
364
368
365 def __init__(self, sock, debuglevel=0, strict=0, method=None):
369 def __init__(self, sock, debuglevel=0, strict=0, method=None):
366 if method: # the httplib in python 2.3 uses the method arg
370 if method: # the httplib in python 2.3 uses the method arg
367 httplib.HTTPResponse.__init__(self, sock, debuglevel, method)
371 httplib.HTTPResponse.__init__(self, sock, debuglevel, method)
368 else: # 2.2 doesn't
372 else: # 2.2 doesn't
369 httplib.HTTPResponse.__init__(self, sock, debuglevel)
373 httplib.HTTPResponse.__init__(self, sock, debuglevel)
370 self.fileno = sock.fileno
374 self.fileno = sock.fileno
371 self.code = None
375 self.code = None
372 self._rbuf = ''
376 self._rbuf = ''
373 self._rbufsize = 8096
377 self._rbufsize = 8096
374 self._handler = None # inserted by the handler later
378 self._handler = None # inserted by the handler later
375 self._host = None # (same)
379 self._host = None # (same)
376 self._url = None # (same)
380 self._url = None # (same)
377 self._connection = None # (same)
381 self._connection = None # (same)
378
382
379 _raw_read = httplib.HTTPResponse.read
383 _raw_read = httplib.HTTPResponse.read
380
384
381 def close(self):
385 def close(self):
382 if self.fp:
386 if self.fp:
383 self.fp.close()
387 self.fp.close()
384 self.fp = None
388 self.fp = None
385 if self._handler:
389 if self._handler:
386 self._handler._request_closed(self, self._host,
390 self._handler._request_closed(self, self._host,
387 self._connection)
391 self._connection)
388
392
389 def close_connection(self):
393 def close_connection(self):
390 self._handler._remove_connection(self._host, self._connection, close=1)
394 self._handler._remove_connection(self._host, self._connection, close=1)
391 self.close()
395 self.close()
392
396
393 def info(self):
397 def info(self):
394 return self.headers
398 return self.headers
395
399
396 def geturl(self):
400 def geturl(self):
397 return self._url
401 return self._url
398
402
399 def read(self, amt=None):
403 def read(self, amt=None):
400 # the _rbuf test is only in this first if for speed. It's not
404 # the _rbuf test is only in this first if for speed. It's not
401 # logically necessary
405 # logically necessary
402 if self._rbuf and not amt is None:
406 if self._rbuf and not amt is None:
403 L = len(self._rbuf)
407 L = len(self._rbuf)
404 if amt > L:
408 if amt > L:
405 amt -= L
409 amt -= L
406 else:
410 else:
407 s = self._rbuf[:amt]
411 s = self._rbuf[:amt]
408 self._rbuf = self._rbuf[amt:]
412 self._rbuf = self._rbuf[amt:]
409 return s
413 return s
410
414
411 s = self._rbuf + self._raw_read(amt)
415 s = self._rbuf + self._raw_read(amt)
412 self._rbuf = ''
416 self._rbuf = ''
413 return s
417 return s
414
418
415 # stolen from Python SVN #68532 to fix issue1088
419 # stolen from Python SVN #68532 to fix issue1088
416 def _read_chunked(self, amt):
420 def _read_chunked(self, amt):
417 chunk_left = self.chunk_left
421 chunk_left = self.chunk_left
418 value = ''
422 value = ''
419
423
420 # XXX This accumulates chunks by repeated string concatenation,
424 # XXX This accumulates chunks by repeated string concatenation,
421 # which is not efficient as the number or size of chunks gets big.
425 # which is not efficient as the number or size of chunks gets big.
422 while True:
426 while True:
423 if chunk_left is None:
427 if chunk_left is None:
424 line = self.fp.readline()
428 line = self.fp.readline()
425 i = line.find(';')
429 i = line.find(';')
426 if i >= 0:
430 if i >= 0:
427 line = line[:i] # strip chunk-extensions
431 line = line[:i] # strip chunk-extensions
428 try:
432 try:
429 chunk_left = int(line, 16)
433 chunk_left = int(line, 16)
430 except ValueError:
434 except ValueError:
431 # close the connection as protocol synchronisation is
435 # close the connection as protocol synchronisation is
432 # probably lost
436 # probably lost
433 self.close()
437 self.close()
434 raise httplib.IncompleteRead(value)
438 raise httplib.IncompleteRead(value)
435 if chunk_left == 0:
439 if chunk_left == 0:
436 break
440 break
437 if amt is None:
441 if amt is None:
438 value += self._safe_read(chunk_left)
442 value += self._safe_read(chunk_left)
439 elif amt < chunk_left:
443 elif amt < chunk_left:
440 value += self._safe_read(amt)
444 value += self._safe_read(amt)
441 self.chunk_left = chunk_left - amt
445 self.chunk_left = chunk_left - amt
442 return value
446 return value
443 elif amt == chunk_left:
447 elif amt == chunk_left:
444 value += self._safe_read(amt)
448 value += self._safe_read(amt)
445 self._safe_read(2) # toss the CRLF at the end of the chunk
449 self._safe_read(2) # toss the CRLF at the end of the chunk
446 self.chunk_left = None
450 self.chunk_left = None
447 return value
451 return value
448 else:
452 else:
449 value += self._safe_read(chunk_left)
453 value += self._safe_read(chunk_left)
450 amt -= chunk_left
454 amt -= chunk_left
451
455
452 # we read the whole chunk, get another
456 # we read the whole chunk, get another
453 self._safe_read(2) # toss the CRLF at the end of the chunk
457 self._safe_read(2) # toss the CRLF at the end of the chunk
454 chunk_left = None
458 chunk_left = None
455
459
456 # read and discard trailer up to the CRLF terminator
460 # read and discard trailer up to the CRLF terminator
457 ### note: we shouldn't have any trailers!
461 ### note: we shouldn't have any trailers!
458 while True:
462 while True:
459 line = self.fp.readline()
463 line = self.fp.readline()
460 if not line:
464 if not line:
461 # a vanishingly small number of sites EOF without
465 # a vanishingly small number of sites EOF without
462 # sending the trailer
466 # sending the trailer
463 break
467 break
464 if line == '\r\n':
468 if line == '\r\n':
465 break
469 break
466
470
467 # we read everything; close the "file"
471 # we read everything; close the "file"
468 self.close()
472 self.close()
469
473
470 return value
474 return value
471
475
472 def readline(self, limit=-1):
476 def readline(self, limit=-1):
473 i = self._rbuf.find('\n')
477 i = self._rbuf.find('\n')
474 while i < 0 and not (0 < limit <= len(self._rbuf)):
478 while i < 0 and not (0 < limit <= len(self._rbuf)):
475 new = self._raw_read(self._rbufsize)
479 new = self._raw_read(self._rbufsize)
476 if not new: break
480 if not new: break
477 i = new.find('\n')
481 i = new.find('\n')
478 if i >= 0: i = i + len(self._rbuf)
482 if i >= 0: i = i + len(self._rbuf)
479 self._rbuf = self._rbuf + new
483 self._rbuf = self._rbuf + new
480 if i < 0: i = len(self._rbuf)
484 if i < 0: i = len(self._rbuf)
481 else: i = i+1
485 else: i = i+1
482 if 0 <= limit < len(self._rbuf): i = limit
486 if 0 <= limit < len(self._rbuf): i = limit
483 data, self._rbuf = self._rbuf[:i], self._rbuf[i:]
487 data, self._rbuf = self._rbuf[:i], self._rbuf[i:]
484 return data
488 return data
485
489
486 def readlines(self, sizehint = 0):
490 def readlines(self, sizehint = 0):
487 total = 0
491 total = 0
488 list = []
492 list = []
489 while 1:
493 while 1:
490 line = self.readline()
494 line = self.readline()
491 if not line: break
495 if not line: break
492 list.append(line)
496 list.append(line)
493 total += len(line)
497 total += len(line)
494 if sizehint and total >= sizehint:
498 if sizehint and total >= sizehint:
495 break
499 break
496 return list
500 return list
497
501
502 def safesend(self, str):
503 """Send `str' to the server.
504
505 Shamelessly ripped off from httplib to patch a bad behavior.
506 """
507 # _broken_pipe_resp is an attribute we set in this function
508 # if the socket is closed while we're sending data but
509 # the server sent us a response before hanging up.
510 # In that case, we want to pretend to send the rest of the
511 # outgoing data, and then let the user use getresponse()
512 # (which we wrap) to get this last response before
513 # opening a new socket.
514 if getattr(self, '_broken_pipe_resp', None) is not None:
515 return
516
517 if self.sock is None:
518 if self.auto_open:
519 self.connect()
520 else:
521 raise httplib.NotConnected()
522
523 # send the data to the server. if we get a broken pipe, then close
524 # the socket. we want to reconnect when somebody tries to send again.
525 #
526 # NOTE: we DO propagate the error, though, because we cannot simply
527 # ignore the error... the caller will know if they can retry.
528 if self.debuglevel > 0:
529 print "send:", repr(str)
530 try:
531 blocksize=8192
532 if hasattr(str,'read') :
533 if self.debuglevel > 0: print "sendIng a read()able"
534 data=str.read(blocksize)
535 while data:
536 self.sock.sendall(data)
537 data=str.read(blocksize)
538 else:
539 self.sock.sendall(str)
540 except socket.error, v:
541 reraise = True
542 if v[0] == errno.EPIPE: # Broken pipe
543 if self._HTTPConnection__state == httplib._CS_REQ_SENT:
544 self._broken_pipe_resp = None
545 self._broken_pipe_resp = self.getresponse()
546 reraise = False
547 self.close()
548 if reraise:
549 raise
550
551 def wrapgetresponse(cls):
552 """Wraps getresponse in cls with a broken-pipe sane version.
553 """
554 def safegetresponse(self):
555 # In safesend() we might set the _broken_pipe_resp
556 # attribute, in which case the socket has already
557 # been closed and we just need to give them the response
558 # back. Otherwise, we use the normal response path.
559 r = getattr(self, '_broken_pipe_resp', None)
560 if r is not None:
561 return r
562 return cls.getresponse(self)
563 safegetresponse.__doc__ = cls.getresponse.__doc__
564 return safegetresponse
498
565
499 class HTTPConnection(httplib.HTTPConnection):
566 class HTTPConnection(httplib.HTTPConnection):
500 # use the modified response class
567 # use the modified response class
501 response_class = HTTPResponse
568 response_class = HTTPResponse
569 send = safesend
570 getresponse = wrapgetresponse(httplib.HTTPConnection)
571
502
572
503 #########################################################################
573 #########################################################################
504 ##### TEST FUNCTIONS
574 ##### TEST FUNCTIONS
505 #########################################################################
575 #########################################################################
506
576
507 def error_handler(url):
577 def error_handler(url):
508 global HANDLE_ERRORS
578 global HANDLE_ERRORS
509 orig = HANDLE_ERRORS
579 orig = HANDLE_ERRORS
510 keepalive_handler = HTTPHandler()
580 keepalive_handler = HTTPHandler()
511 opener = urllib2.build_opener(keepalive_handler)
581 opener = urllib2.build_opener(keepalive_handler)
512 urllib2.install_opener(opener)
582 urllib2.install_opener(opener)
513 pos = {0: 'off', 1: 'on'}
583 pos = {0: 'off', 1: 'on'}
514 for i in (0, 1):
584 for i in (0, 1):
515 print " fancy error handling %s (HANDLE_ERRORS = %i)" % (pos[i], i)
585 print " fancy error handling %s (HANDLE_ERRORS = %i)" % (pos[i], i)
516 HANDLE_ERRORS = i
586 HANDLE_ERRORS = i
517 try:
587 try:
518 fo = urllib2.urlopen(url)
588 fo = urllib2.urlopen(url)
519 fo.read()
589 fo.read()
520 fo.close()
590 fo.close()
521 try: status, reason = fo.status, fo.reason
591 try: status, reason = fo.status, fo.reason
522 except AttributeError: status, reason = None, None
592 except AttributeError: status, reason = None, None
523 except IOError, e:
593 except IOError, e:
524 print " EXCEPTION: %s" % e
594 print " EXCEPTION: %s" % e
525 raise
595 raise
526 else:
596 else:
527 print " status = %s, reason = %s" % (status, reason)
597 print " status = %s, reason = %s" % (status, reason)
528 HANDLE_ERRORS = orig
598 HANDLE_ERRORS = orig
529 hosts = keepalive_handler.open_connections()
599 hosts = keepalive_handler.open_connections()
530 print "open connections:", hosts
600 print "open connections:", hosts
531 keepalive_handler.close_all()
601 keepalive_handler.close_all()
532
602
533 def md5(s):
603 def md5(s):
534 try:
604 try:
535 from hashlib import md5 as _md5
605 from hashlib import md5 as _md5
536 except ImportError:
606 except ImportError:
537 from md5 import md5 as _md5
607 from md5 import md5 as _md5
538 global md5
608 global md5
539 md5 = _md5
609 md5 = _md5
540 return _md5(s)
610 return _md5(s)
541
611
542 def continuity(url):
612 def continuity(url):
543 format = '%25s: %s'
613 format = '%25s: %s'
544
614
545 # first fetch the file with the normal http handler
615 # first fetch the file with the normal http handler
546 opener = urllib2.build_opener()
616 opener = urllib2.build_opener()
547 urllib2.install_opener(opener)
617 urllib2.install_opener(opener)
548 fo = urllib2.urlopen(url)
618 fo = urllib2.urlopen(url)
549 foo = fo.read()
619 foo = fo.read()
550 fo.close()
620 fo.close()
551 m = md5.new(foo)
621 m = md5.new(foo)
552 print format % ('normal urllib', m.hexdigest())
622 print format % ('normal urllib', m.hexdigest())
553
623
554 # now install the keepalive handler and try again
624 # now install the keepalive handler and try again
555 opener = urllib2.build_opener(HTTPHandler())
625 opener = urllib2.build_opener(HTTPHandler())
556 urllib2.install_opener(opener)
626 urllib2.install_opener(opener)
557
627
558 fo = urllib2.urlopen(url)
628 fo = urllib2.urlopen(url)
559 foo = fo.read()
629 foo = fo.read()
560 fo.close()
630 fo.close()
561 m = md5.new(foo)
631 m = md5.new(foo)
562 print format % ('keepalive read', m.hexdigest())
632 print format % ('keepalive read', m.hexdigest())
563
633
564 fo = urllib2.urlopen(url)
634 fo = urllib2.urlopen(url)
565 foo = ''
635 foo = ''
566 while 1:
636 while 1:
567 f = fo.readline()
637 f = fo.readline()
568 if f: foo = foo + f
638 if f: foo = foo + f
569 else: break
639 else: break
570 fo.close()
640 fo.close()
571 m = md5.new(foo)
641 m = md5.new(foo)
572 print format % ('keepalive readline', m.hexdigest())
642 print format % ('keepalive readline', m.hexdigest())
573
643
574 def comp(N, url):
644 def comp(N, url):
575 print ' making %i connections to:\n %s' % (N, url)
645 print ' making %i connections to:\n %s' % (N, url)
576
646
577 sys.stdout.write(' first using the normal urllib handlers')
647 sys.stdout.write(' first using the normal urllib handlers')
578 # first use normal opener
648 # first use normal opener
579 opener = urllib2.build_opener()
649 opener = urllib2.build_opener()
580 urllib2.install_opener(opener)
650 urllib2.install_opener(opener)
581 t1 = fetch(N, url)
651 t1 = fetch(N, url)
582 print ' TIME: %.3f s' % t1
652 print ' TIME: %.3f s' % t1
583
653
584 sys.stdout.write(' now using the keepalive handler ')
654 sys.stdout.write(' now using the keepalive handler ')
585 # now install the keepalive handler and try again
655 # now install the keepalive handler and try again
586 opener = urllib2.build_opener(HTTPHandler())
656 opener = urllib2.build_opener(HTTPHandler())
587 urllib2.install_opener(opener)
657 urllib2.install_opener(opener)
588 t2 = fetch(N, url)
658 t2 = fetch(N, url)
589 print ' TIME: %.3f s' % t2
659 print ' TIME: %.3f s' % t2
590 print ' improvement factor: %.2f' % (t1/t2, )
660 print ' improvement factor: %.2f' % (t1/t2, )
591
661
592 def fetch(N, url, delay=0):
662 def fetch(N, url, delay=0):
593 import time
663 import time
594 lens = []
664 lens = []
595 starttime = time.time()
665 starttime = time.time()
596 for i in range(N):
666 for i in range(N):
597 if delay and i > 0: time.sleep(delay)
667 if delay and i > 0: time.sleep(delay)
598 fo = urllib2.urlopen(url)
668 fo = urllib2.urlopen(url)
599 foo = fo.read()
669 foo = fo.read()
600 fo.close()
670 fo.close()
601 lens.append(len(foo))
671 lens.append(len(foo))
602 diff = time.time() - starttime
672 diff = time.time() - starttime
603
673
604 j = 0
674 j = 0
605 for i in lens[1:]:
675 for i in lens[1:]:
606 j = j + 1
676 j = j + 1
607 if not i == lens[0]:
677 if not i == lens[0]:
608 print "WARNING: inconsistent length on read %i: %i" % (j, i)
678 print "WARNING: inconsistent length on read %i: %i" % (j, i)
609
679
610 return diff
680 return diff
611
681
612 def test_timeout(url):
682 def test_timeout(url):
613 global DEBUG
683 global DEBUG
614 dbbackup = DEBUG
684 dbbackup = DEBUG
615 class FakeLogger:
685 class FakeLogger:
616 def debug(self, msg, *args): print msg % args
686 def debug(self, msg, *args): print msg % args
617 info = warning = error = debug
687 info = warning = error = debug
618 DEBUG = FakeLogger()
688 DEBUG = FakeLogger()
619 print " fetching the file to establish a connection"
689 print " fetching the file to establish a connection"
620 fo = urllib2.urlopen(url)
690 fo = urllib2.urlopen(url)
621 data1 = fo.read()
691 data1 = fo.read()
622 fo.close()
692 fo.close()
623
693
624 i = 20
694 i = 20
625 print " waiting %i seconds for the server to close the connection" % i
695 print " waiting %i seconds for the server to close the connection" % i
626 while i > 0:
696 while i > 0:
627 sys.stdout.write('\r %2i' % i)
697 sys.stdout.write('\r %2i' % i)
628 sys.stdout.flush()
698 sys.stdout.flush()
629 time.sleep(1)
699 time.sleep(1)
630 i -= 1
700 i -= 1
631 sys.stderr.write('\r')
701 sys.stderr.write('\r')
632
702
633 print " fetching the file a second time"
703 print " fetching the file a second time"
634 fo = urllib2.urlopen(url)
704 fo = urllib2.urlopen(url)
635 data2 = fo.read()
705 data2 = fo.read()
636 fo.close()
706 fo.close()
637
707
638 if data1 == data2:
708 if data1 == data2:
639 print ' data are identical'
709 print ' data are identical'
640 else:
710 else:
641 print ' ERROR: DATA DIFFER'
711 print ' ERROR: DATA DIFFER'
642
712
643 DEBUG = dbbackup
713 DEBUG = dbbackup
644
714
645
715
646 def test(url, N=10):
716 def test(url, N=10):
647 print "checking error hander (do this on a non-200)"
717 print "checking error hander (do this on a non-200)"
648 try: error_handler(url)
718 try: error_handler(url)
649 except IOError:
719 except IOError:
650 print "exiting - exception will prevent further tests"
720 print "exiting - exception will prevent further tests"
651 sys.exit()
721 sys.exit()
652 print
722 print
653 print "performing continuity test (making sure stuff isn't corrupted)"
723 print "performing continuity test (making sure stuff isn't corrupted)"
654 continuity(url)
724 continuity(url)
655 print
725 print
656 print "performing speed comparison"
726 print "performing speed comparison"
657 comp(N, url)
727 comp(N, url)
658 print
728 print
659 print "performing dropped-connection check"
729 print "performing dropped-connection check"
660 test_timeout(url)
730 test_timeout(url)
661
731
662 if __name__ == '__main__':
732 if __name__ == '__main__':
663 import time
733 import time
664 import sys
734 import sys
665 try:
735 try:
666 N = int(sys.argv[1])
736 N = int(sys.argv[1])
667 url = sys.argv[2]
737 url = sys.argv[2]
668 except:
738 except:
669 print "%s <integer> <url>" % sys.argv[0]
739 print "%s <integer> <url>" % sys.argv[0]
670 else:
740 else:
671 test(url, N)
741 test(url, N)
@@ -1,533 +1,537
1 # url.py - HTTP handling for mercurial
1 # url.py - HTTP handling for mercurial
2 #
2 #
3 # Copyright 2005, 2006, 2007, 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005, 2006, 2007, 2008 Matt Mackall <mpm@selenic.com>
4 # Copyright 2006, 2007 Alexis S. L. Carvalho <alexis@cecm.usp.br>
4 # Copyright 2006, 2007 Alexis S. L. Carvalho <alexis@cecm.usp.br>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 #
6 #
7 # This software may be used and distributed according to the terms of the
7 # This software may be used and distributed according to the terms of the
8 # GNU General Public License version 2, incorporated herein by reference.
8 # GNU General Public License version 2, incorporated herein by reference.
9
9
10 import urllib, urllib2, urlparse, httplib, os, re, socket, cStringIO
10 import urllib, urllib2, urlparse, httplib, os, re, socket, cStringIO
11 from i18n import _
11 from i18n import _
12 import keepalive, util
12 import keepalive, util
13
13
14 def hidepassword(url):
14 def hidepassword(url):
15 '''hide user credential in a url string'''
15 '''hide user credential in a url string'''
16 scheme, netloc, path, params, query, fragment = urlparse.urlparse(url)
16 scheme, netloc, path, params, query, fragment = urlparse.urlparse(url)
17 netloc = re.sub('([^:]*):([^@]*)@(.*)', r'\1:***@\3', netloc)
17 netloc = re.sub('([^:]*):([^@]*)@(.*)', r'\1:***@\3', netloc)
18 return urlparse.urlunparse((scheme, netloc, path, params, query, fragment))
18 return urlparse.urlunparse((scheme, netloc, path, params, query, fragment))
19
19
20 def removeauth(url):
20 def removeauth(url):
21 '''remove all authentication information from a url string'''
21 '''remove all authentication information from a url string'''
22 scheme, netloc, path, params, query, fragment = urlparse.urlparse(url)
22 scheme, netloc, path, params, query, fragment = urlparse.urlparse(url)
23 netloc = netloc[netloc.find('@')+1:]
23 netloc = netloc[netloc.find('@')+1:]
24 return urlparse.urlunparse((scheme, netloc, path, params, query, fragment))
24 return urlparse.urlunparse((scheme, netloc, path, params, query, fragment))
25
25
26 def netlocsplit(netloc):
26 def netlocsplit(netloc):
27 '''split [user[:passwd]@]host[:port] into 4-tuple.'''
27 '''split [user[:passwd]@]host[:port] into 4-tuple.'''
28
28
29 a = netloc.find('@')
29 a = netloc.find('@')
30 if a == -1:
30 if a == -1:
31 user, passwd = None, None
31 user, passwd = None, None
32 else:
32 else:
33 userpass, netloc = netloc[:a], netloc[a+1:]
33 userpass, netloc = netloc[:a], netloc[a+1:]
34 c = userpass.find(':')
34 c = userpass.find(':')
35 if c == -1:
35 if c == -1:
36 user, passwd = urllib.unquote(userpass), None
36 user, passwd = urllib.unquote(userpass), None
37 else:
37 else:
38 user = urllib.unquote(userpass[:c])
38 user = urllib.unquote(userpass[:c])
39 passwd = urllib.unquote(userpass[c+1:])
39 passwd = urllib.unquote(userpass[c+1:])
40 c = netloc.find(':')
40 c = netloc.find(':')
41 if c == -1:
41 if c == -1:
42 host, port = netloc, None
42 host, port = netloc, None
43 else:
43 else:
44 host, port = netloc[:c], netloc[c+1:]
44 host, port = netloc[:c], netloc[c+1:]
45 return host, port, user, passwd
45 return host, port, user, passwd
46
46
47 def netlocunsplit(host, port, user=None, passwd=None):
47 def netlocunsplit(host, port, user=None, passwd=None):
48 '''turn host, port, user, passwd into [user[:passwd]@]host[:port].'''
48 '''turn host, port, user, passwd into [user[:passwd]@]host[:port].'''
49 if port:
49 if port:
50 hostport = host + ':' + port
50 hostport = host + ':' + port
51 else:
51 else:
52 hostport = host
52 hostport = host
53 if user:
53 if user:
54 if passwd:
54 if passwd:
55 userpass = urllib.quote(user) + ':' + urllib.quote(passwd)
55 userpass = urllib.quote(user) + ':' + urllib.quote(passwd)
56 else:
56 else:
57 userpass = urllib.quote(user)
57 userpass = urllib.quote(user)
58 return userpass + '@' + hostport
58 return userpass + '@' + hostport
59 return hostport
59 return hostport
60
60
61 _safe = ('abcdefghijklmnopqrstuvwxyz'
61 _safe = ('abcdefghijklmnopqrstuvwxyz'
62 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
62 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
63 '0123456789' '_.-/')
63 '0123456789' '_.-/')
64 _safeset = None
64 _safeset = None
65 _hex = None
65 _hex = None
66 def quotepath(path):
66 def quotepath(path):
67 '''quote the path part of a URL
67 '''quote the path part of a URL
68
68
69 This is similar to urllib.quote, but it also tries to avoid
69 This is similar to urllib.quote, but it also tries to avoid
70 quoting things twice (inspired by wget):
70 quoting things twice (inspired by wget):
71
71
72 >>> quotepath('abc def')
72 >>> quotepath('abc def')
73 'abc%20def'
73 'abc%20def'
74 >>> quotepath('abc%20def')
74 >>> quotepath('abc%20def')
75 'abc%20def'
75 'abc%20def'
76 >>> quotepath('abc%20 def')
76 >>> quotepath('abc%20 def')
77 'abc%20%20def'
77 'abc%20%20def'
78 >>> quotepath('abc def%20')
78 >>> quotepath('abc def%20')
79 'abc%20def%20'
79 'abc%20def%20'
80 >>> quotepath('abc def%2')
80 >>> quotepath('abc def%2')
81 'abc%20def%252'
81 'abc%20def%252'
82 >>> quotepath('abc def%')
82 >>> quotepath('abc def%')
83 'abc%20def%25'
83 'abc%20def%25'
84 '''
84 '''
85 global _safeset, _hex
85 global _safeset, _hex
86 if _safeset is None:
86 if _safeset is None:
87 _safeset = set(_safe)
87 _safeset = set(_safe)
88 _hex = set('abcdefABCDEF0123456789')
88 _hex = set('abcdefABCDEF0123456789')
89 l = list(path)
89 l = list(path)
90 for i in xrange(len(l)):
90 for i in xrange(len(l)):
91 c = l[i]
91 c = l[i]
92 if c == '%' and i + 2 < len(l) and (l[i+1] in _hex and l[i+2] in _hex):
92 if c == '%' and i + 2 < len(l) and (l[i+1] in _hex and l[i+2] in _hex):
93 pass
93 pass
94 elif c not in _safeset:
94 elif c not in _safeset:
95 l[i] = '%%%02X' % ord(c)
95 l[i] = '%%%02X' % ord(c)
96 return ''.join(l)
96 return ''.join(l)
97
97
98 class passwordmgr(urllib2.HTTPPasswordMgrWithDefaultRealm):
98 class passwordmgr(urllib2.HTTPPasswordMgrWithDefaultRealm):
99 def __init__(self, ui):
99 def __init__(self, ui):
100 urllib2.HTTPPasswordMgrWithDefaultRealm.__init__(self)
100 urllib2.HTTPPasswordMgrWithDefaultRealm.__init__(self)
101 self.ui = ui
101 self.ui = ui
102
102
103 def find_user_password(self, realm, authuri):
103 def find_user_password(self, realm, authuri):
104 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm.find_user_password(
104 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm.find_user_password(
105 self, realm, authuri)
105 self, realm, authuri)
106 user, passwd = authinfo
106 user, passwd = authinfo
107 if user and passwd:
107 if user and passwd:
108 self._writedebug(user, passwd)
108 self._writedebug(user, passwd)
109 return (user, passwd)
109 return (user, passwd)
110
110
111 if not user:
111 if not user:
112 auth = self.readauthtoken(authuri)
112 auth = self.readauthtoken(authuri)
113 if auth:
113 if auth:
114 user, passwd = auth.get('username'), auth.get('password')
114 user, passwd = auth.get('username'), auth.get('password')
115 if not user or not passwd:
115 if not user or not passwd:
116 if not self.ui.interactive():
116 if not self.ui.interactive():
117 raise util.Abort(_('http authorization required'))
117 raise util.Abort(_('http authorization required'))
118
118
119 self.ui.write(_("http authorization required\n"))
119 self.ui.write(_("http authorization required\n"))
120 self.ui.status(_("realm: %s\n") % realm)
120 self.ui.status(_("realm: %s\n") % realm)
121 if user:
121 if user:
122 self.ui.status(_("user: %s\n") % user)
122 self.ui.status(_("user: %s\n") % user)
123 else:
123 else:
124 user = self.ui.prompt(_("user:"), default=None)
124 user = self.ui.prompt(_("user:"), default=None)
125
125
126 if not passwd:
126 if not passwd:
127 passwd = self.ui.getpass()
127 passwd = self.ui.getpass()
128
128
129 self.add_password(realm, authuri, user, passwd)
129 self.add_password(realm, authuri, user, passwd)
130 self._writedebug(user, passwd)
130 self._writedebug(user, passwd)
131 return (user, passwd)
131 return (user, passwd)
132
132
133 def _writedebug(self, user, passwd):
133 def _writedebug(self, user, passwd):
134 msg = _('http auth: user %s, password %s\n')
134 msg = _('http auth: user %s, password %s\n')
135 self.ui.debug(msg % (user, passwd and '*' * len(passwd) or 'not set'))
135 self.ui.debug(msg % (user, passwd and '*' * len(passwd) or 'not set'))
136
136
137 def readauthtoken(self, uri):
137 def readauthtoken(self, uri):
138 # Read configuration
138 # Read configuration
139 config = dict()
139 config = dict()
140 for key, val in self.ui.configitems('auth'):
140 for key, val in self.ui.configitems('auth'):
141 group, setting = key.split('.', 1)
141 group, setting = key.split('.', 1)
142 gdict = config.setdefault(group, dict())
142 gdict = config.setdefault(group, dict())
143 gdict[setting] = val
143 gdict[setting] = val
144
144
145 # Find the best match
145 # Find the best match
146 scheme, hostpath = uri.split('://', 1)
146 scheme, hostpath = uri.split('://', 1)
147 bestlen = 0
147 bestlen = 0
148 bestauth = None
148 bestauth = None
149 for auth in config.itervalues():
149 for auth in config.itervalues():
150 prefix = auth.get('prefix')
150 prefix = auth.get('prefix')
151 if not prefix: continue
151 if not prefix: continue
152 p = prefix.split('://', 1)
152 p = prefix.split('://', 1)
153 if len(p) > 1:
153 if len(p) > 1:
154 schemes, prefix = [p[0]], p[1]
154 schemes, prefix = [p[0]], p[1]
155 else:
155 else:
156 schemes = (auth.get('schemes') or 'https').split()
156 schemes = (auth.get('schemes') or 'https').split()
157 if (prefix == '*' or hostpath.startswith(prefix)) and \
157 if (prefix == '*' or hostpath.startswith(prefix)) and \
158 len(prefix) > bestlen and scheme in schemes:
158 len(prefix) > bestlen and scheme in schemes:
159 bestlen = len(prefix)
159 bestlen = len(prefix)
160 bestauth = auth
160 bestauth = auth
161 return bestauth
161 return bestauth
162
162
163 class proxyhandler(urllib2.ProxyHandler):
163 class proxyhandler(urllib2.ProxyHandler):
164 def __init__(self, ui):
164 def __init__(self, ui):
165 proxyurl = ui.config("http_proxy", "host") or os.getenv('http_proxy')
165 proxyurl = ui.config("http_proxy", "host") or os.getenv('http_proxy')
166 # XXX proxyauthinfo = None
166 # XXX proxyauthinfo = None
167
167
168 if proxyurl:
168 if proxyurl:
169 # proxy can be proper url or host[:port]
169 # proxy can be proper url or host[:port]
170 if not (proxyurl.startswith('http:') or
170 if not (proxyurl.startswith('http:') or
171 proxyurl.startswith('https:')):
171 proxyurl.startswith('https:')):
172 proxyurl = 'http://' + proxyurl + '/'
172 proxyurl = 'http://' + proxyurl + '/'
173 snpqf = urlparse.urlsplit(proxyurl)
173 snpqf = urlparse.urlsplit(proxyurl)
174 proxyscheme, proxynetloc, proxypath, proxyquery, proxyfrag = snpqf
174 proxyscheme, proxynetloc, proxypath, proxyquery, proxyfrag = snpqf
175 hpup = netlocsplit(proxynetloc)
175 hpup = netlocsplit(proxynetloc)
176
176
177 proxyhost, proxyport, proxyuser, proxypasswd = hpup
177 proxyhost, proxyport, proxyuser, proxypasswd = hpup
178 if not proxyuser:
178 if not proxyuser:
179 proxyuser = ui.config("http_proxy", "user")
179 proxyuser = ui.config("http_proxy", "user")
180 proxypasswd = ui.config("http_proxy", "passwd")
180 proxypasswd = ui.config("http_proxy", "passwd")
181
181
182 # see if we should use a proxy for this url
182 # see if we should use a proxy for this url
183 no_list = [ "localhost", "127.0.0.1" ]
183 no_list = [ "localhost", "127.0.0.1" ]
184 no_list.extend([p.lower() for
184 no_list.extend([p.lower() for
185 p in ui.configlist("http_proxy", "no")])
185 p in ui.configlist("http_proxy", "no")])
186 no_list.extend([p.strip().lower() for
186 no_list.extend([p.strip().lower() for
187 p in os.getenv("no_proxy", '').split(',')
187 p in os.getenv("no_proxy", '').split(',')
188 if p.strip()])
188 if p.strip()])
189 # "http_proxy.always" config is for running tests on localhost
189 # "http_proxy.always" config is for running tests on localhost
190 if ui.configbool("http_proxy", "always"):
190 if ui.configbool("http_proxy", "always"):
191 self.no_list = []
191 self.no_list = []
192 else:
192 else:
193 self.no_list = no_list
193 self.no_list = no_list
194
194
195 proxyurl = urlparse.urlunsplit((
195 proxyurl = urlparse.urlunsplit((
196 proxyscheme, netlocunsplit(proxyhost, proxyport,
196 proxyscheme, netlocunsplit(proxyhost, proxyport,
197 proxyuser, proxypasswd or ''),
197 proxyuser, proxypasswd or ''),
198 proxypath, proxyquery, proxyfrag))
198 proxypath, proxyquery, proxyfrag))
199 proxies = {'http': proxyurl, 'https': proxyurl}
199 proxies = {'http': proxyurl, 'https': proxyurl}
200 ui.debug('proxying through http://%s:%s\n' %
200 ui.debug('proxying through http://%s:%s\n' %
201 (proxyhost, proxyport))
201 (proxyhost, proxyport))
202 else:
202 else:
203 proxies = {}
203 proxies = {}
204
204
205 # urllib2 takes proxy values from the environment and those
205 # urllib2 takes proxy values from the environment and those
206 # will take precedence if found, so drop them
206 # will take precedence if found, so drop them
207 for env in ["HTTP_PROXY", "http_proxy", "no_proxy"]:
207 for env in ["HTTP_PROXY", "http_proxy", "no_proxy"]:
208 try:
208 try:
209 if env in os.environ:
209 if env in os.environ:
210 del os.environ[env]
210 del os.environ[env]
211 except OSError:
211 except OSError:
212 pass
212 pass
213
213
214 urllib2.ProxyHandler.__init__(self, proxies)
214 urllib2.ProxyHandler.__init__(self, proxies)
215 self.ui = ui
215 self.ui = ui
216
216
217 def proxy_open(self, req, proxy, type_):
217 def proxy_open(self, req, proxy, type_):
218 host = req.get_host().split(':')[0]
218 host = req.get_host().split(':')[0]
219 if host in self.no_list:
219 if host in self.no_list:
220 return None
220 return None
221
221
222 # work around a bug in Python < 2.4.2
222 # work around a bug in Python < 2.4.2
223 # (it leaves a "\n" at the end of Proxy-authorization headers)
223 # (it leaves a "\n" at the end of Proxy-authorization headers)
224 baseclass = req.__class__
224 baseclass = req.__class__
225 class _request(baseclass):
225 class _request(baseclass):
226 def add_header(self, key, val):
226 def add_header(self, key, val):
227 if key.lower() == 'proxy-authorization':
227 if key.lower() == 'proxy-authorization':
228 val = val.strip()
228 val = val.strip()
229 return baseclass.add_header(self, key, val)
229 return baseclass.add_header(self, key, val)
230 req.__class__ = _request
230 req.__class__ = _request
231
231
232 return urllib2.ProxyHandler.proxy_open(self, req, proxy, type_)
232 return urllib2.ProxyHandler.proxy_open(self, req, proxy, type_)
233
233
234 class httpsendfile(file):
234 class httpsendfile(file):
235 def __len__(self):
235 def __len__(self):
236 return os.fstat(self.fileno()).st_size
236 return os.fstat(self.fileno()).st_size
237
237
238 def _gen_sendfile(connection):
238 def _gen_sendfile(connection):
239 def _sendfile(self, data):
239 def _sendfile(self, data):
240 # send a file
240 # send a file
241 if isinstance(data, httpsendfile):
241 if isinstance(data, httpsendfile):
242 # if auth required, some data sent twice, so rewind here
242 # if auth required, some data sent twice, so rewind here
243 data.seek(0)
243 data.seek(0)
244 for chunk in util.filechunkiter(data):
244 for chunk in util.filechunkiter(data):
245 connection.send(self, chunk)
245 connection.send(self, chunk)
246 else:
246 else:
247 connection.send(self, data)
247 connection.send(self, data)
248 return _sendfile
248 return _sendfile
249
249
250 has_https = hasattr(urllib2, 'HTTPSHandler')
250 has_https = hasattr(urllib2, 'HTTPSHandler')
251 if has_https:
251 if has_https:
252 try:
252 try:
253 # avoid using deprecated/broken FakeSocket in python 2.6
253 # avoid using deprecated/broken FakeSocket in python 2.6
254 import ssl
254 import ssl
255 _ssl_wrap_socket = ssl.wrap_socket
255 _ssl_wrap_socket = ssl.wrap_socket
256 except ImportError:
256 except ImportError:
257 def _ssl_wrap_socket(sock, key_file, cert_file):
257 def _ssl_wrap_socket(sock, key_file, cert_file):
258 ssl = socket.ssl(sock, key_file, cert_file)
258 ssl = socket.ssl(sock, key_file, cert_file)
259 return httplib.FakeSocket(sock, ssl)
259 return httplib.FakeSocket(sock, ssl)
260
260
261 class httpconnection(keepalive.HTTPConnection):
261 class httpconnection(keepalive.HTTPConnection):
262 # must be able to send big bundle as stream.
262 # must be able to send big bundle as stream.
263 send = _gen_sendfile(keepalive.HTTPConnection)
263 send = _gen_sendfile(keepalive.HTTPConnection)
264
264
265 def _proxytunnel(self):
265 def _proxytunnel(self):
266 proxyheaders = dict(
266 proxyheaders = dict(
267 [(x, self.headers[x]) for x in self.headers
267 [(x, self.headers[x]) for x in self.headers
268 if x.lower().startswith('proxy-')])
268 if x.lower().startswith('proxy-')])
269 self._set_hostport(self.host, self.port)
269 self._set_hostport(self.host, self.port)
270 self.send('CONNECT %s:%d HTTP/1.0\r\n' % (self.realhost, self.realport))
270 self.send('CONNECT %s:%d HTTP/1.0\r\n' % (self.realhost, self.realport))
271 for header in proxyheaders.iteritems():
271 for header in proxyheaders.iteritems():
272 self.send('%s: %s\r\n' % header)
272 self.send('%s: %s\r\n' % header)
273 self.send('\r\n')
273 self.send('\r\n')
274
274
275 # majority of the following code is duplicated from
275 # majority of the following code is duplicated from
276 # httplib.HTTPConnection as there are no adequate places to
276 # httplib.HTTPConnection as there are no adequate places to
277 # override functions to provide the needed functionality
277 # override functions to provide the needed functionality
278 res = self.response_class(self.sock,
278 res = self.response_class(self.sock,
279 strict=self.strict,
279 strict=self.strict,
280 method=self._method)
280 method=self._method)
281
281
282 while True:
282 while True:
283 version, status, reason = res._read_status()
283 version, status, reason = res._read_status()
284 if status != httplib.CONTINUE:
284 if status != httplib.CONTINUE:
285 break
285 break
286 while True:
286 while True:
287 skip = res.fp.readline().strip()
287 skip = res.fp.readline().strip()
288 if not skip:
288 if not skip:
289 break
289 break
290 res.status = status
290 res.status = status
291 res.reason = reason.strip()
291 res.reason = reason.strip()
292
292
293 if res.status == 200:
293 if res.status == 200:
294 while True:
294 while True:
295 line = res.fp.readline()
295 line = res.fp.readline()
296 if line == '\r\n':
296 if line == '\r\n':
297 break
297 break
298 return True
298 return True
299
299
300 if version == 'HTTP/1.0':
300 if version == 'HTTP/1.0':
301 res.version = 10
301 res.version = 10
302 elif version.startswith('HTTP/1.'):
302 elif version.startswith('HTTP/1.'):
303 res.version = 11
303 res.version = 11
304 elif version == 'HTTP/0.9':
304 elif version == 'HTTP/0.9':
305 res.version = 9
305 res.version = 9
306 else:
306 else:
307 raise httplib.UnknownProtocol(version)
307 raise httplib.UnknownProtocol(version)
308
308
309 if res.version == 9:
309 if res.version == 9:
310 res.length = None
310 res.length = None
311 res.chunked = 0
311 res.chunked = 0
312 res.will_close = 1
312 res.will_close = 1
313 res.msg = httplib.HTTPMessage(cStringIO.StringIO())
313 res.msg = httplib.HTTPMessage(cStringIO.StringIO())
314 return False
314 return False
315
315
316 res.msg = httplib.HTTPMessage(res.fp)
316 res.msg = httplib.HTTPMessage(res.fp)
317 res.msg.fp = None
317 res.msg.fp = None
318
318
319 # are we using the chunked-style of transfer encoding?
319 # are we using the chunked-style of transfer encoding?
320 trenc = res.msg.getheader('transfer-encoding')
320 trenc = res.msg.getheader('transfer-encoding')
321 if trenc and trenc.lower() == "chunked":
321 if trenc and trenc.lower() == "chunked":
322 res.chunked = 1
322 res.chunked = 1
323 res.chunk_left = None
323 res.chunk_left = None
324 else:
324 else:
325 res.chunked = 0
325 res.chunked = 0
326
326
327 # will the connection close at the end of the response?
327 # will the connection close at the end of the response?
328 res.will_close = res._check_close()
328 res.will_close = res._check_close()
329
329
330 # do we have a Content-Length?
330 # do we have a Content-Length?
331 # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
331 # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
332 length = res.msg.getheader('content-length')
332 length = res.msg.getheader('content-length')
333 if length and not res.chunked:
333 if length and not res.chunked:
334 try:
334 try:
335 res.length = int(length)
335 res.length = int(length)
336 except ValueError:
336 except ValueError:
337 res.length = None
337 res.length = None
338 else:
338 else:
339 if res.length < 0: # ignore nonsensical negative lengths
339 if res.length < 0: # ignore nonsensical negative lengths
340 res.length = None
340 res.length = None
341 else:
341 else:
342 res.length = None
342 res.length = None
343
343
344 # does the body have a fixed length? (of zero)
344 # does the body have a fixed length? (of zero)
345 if (status == httplib.NO_CONTENT or status == httplib.NOT_MODIFIED or
345 if (status == httplib.NO_CONTENT or status == httplib.NOT_MODIFIED or
346 100 <= status < 200 or # 1xx codes
346 100 <= status < 200 or # 1xx codes
347 res._method == 'HEAD'):
347 res._method == 'HEAD'):
348 res.length = 0
348 res.length = 0
349
349
350 # if the connection remains open, and we aren't using chunked, and
350 # if the connection remains open, and we aren't using chunked, and
351 # a content-length was not provided, then assume that the connection
351 # a content-length was not provided, then assume that the connection
352 # WILL close.
352 # WILL close.
353 if (not res.will_close and
353 if (not res.will_close and
354 not res.chunked and
354 not res.chunked and
355 res.length is None):
355 res.length is None):
356 res.will_close = 1
356 res.will_close = 1
357
357
358 self.proxyres = res
358 self.proxyres = res
359
359
360 return False
360 return False
361
361
362 def connect(self):
362 def connect(self):
363 if has_https and self.realhost: # use CONNECT proxy
363 if has_https and self.realhost: # use CONNECT proxy
364 self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
364 self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
365 self.sock.connect((self.host, self.port))
365 self.sock.connect((self.host, self.port))
366 if self._proxytunnel():
366 if self._proxytunnel():
367 # we do not support client x509 certificates
367 # we do not support client x509 certificates
368 self.sock = _ssl_wrap_socket(self.sock, None, None)
368 self.sock = _ssl_wrap_socket(self.sock, None, None)
369 else:
369 else:
370 keepalive.HTTPConnection.connect(self)
370 keepalive.HTTPConnection.connect(self)
371
371
372 def getresponse(self):
372 def getresponse(self):
373 proxyres = getattr(self, 'proxyres', None)
373 proxyres = getattr(self, 'proxyres', None)
374 if proxyres:
374 if proxyres:
375 if proxyres.will_close:
375 if proxyres.will_close:
376 self.close()
376 self.close()
377 self.proxyres = None
377 self.proxyres = None
378 return proxyres
378 return proxyres
379 return keepalive.HTTPConnection.getresponse(self)
379 return keepalive.HTTPConnection.getresponse(self)
380
380
381 class httphandler(keepalive.HTTPHandler):
381 class httphandler(keepalive.HTTPHandler):
382 def http_open(self, req):
382 def http_open(self, req):
383 return self.do_open(httpconnection, req)
383 return self.do_open(httpconnection, req)
384
384
385 def _start_transaction(self, h, req):
385 def _start_transaction(self, h, req):
386 if req.get_selector() == req.get_full_url(): # has proxy
386 if req.get_selector() == req.get_full_url(): # has proxy
387 urlparts = urlparse.urlparse(req.get_selector())
387 urlparts = urlparse.urlparse(req.get_selector())
388 if urlparts[0] == 'https': # only use CONNECT for HTTPS
388 if urlparts[0] == 'https': # only use CONNECT for HTTPS
389 if ':' in urlparts[1]:
389 if ':' in urlparts[1]:
390 realhost, realport = urlparts[1].split(':')
390 realhost, realport = urlparts[1].split(':')
391 realport = int(realport)
391 realport = int(realport)
392 else:
392 else:
393 realhost = urlparts[1]
393 realhost = urlparts[1]
394 realport = 443
394 realport = 443
395
395
396 h.realhost = realhost
396 h.realhost = realhost
397 h.realport = realport
397 h.realport = realport
398 h.headers = req.headers.copy()
398 h.headers = req.headers.copy()
399 h.headers.update(self.parent.addheaders)
399 h.headers.update(self.parent.addheaders)
400 return keepalive.HTTPHandler._start_transaction(self, h, req)
400 return keepalive.HTTPHandler._start_transaction(self, h, req)
401
401
402 h.realhost = None
402 h.realhost = None
403 h.realport = None
403 h.realport = None
404 h.headers = None
404 h.headers = None
405 return keepalive.HTTPHandler._start_transaction(self, h, req)
405 return keepalive.HTTPHandler._start_transaction(self, h, req)
406
406
407 def __del__(self):
407 def __del__(self):
408 self.close_all()
408 self.close_all()
409
409
410 if has_https:
410 if has_https:
411 class httpsconnection(httplib.HTTPSConnection):
411 class BetterHTTPS(httplib.HTTPSConnection):
412 send = keepalive.safesend
413
414 class httpsconnection(BetterHTTPS):
412 response_class = keepalive.HTTPResponse
415 response_class = keepalive.HTTPResponse
413 # must be able to send big bundle as stream.
416 # must be able to send big bundle as stream.
414 send = _gen_sendfile(httplib.HTTPSConnection)
417 send = _gen_sendfile(BetterHTTPS)
418 getresponse = keepalive.wrapgetresponse(httplib.HTTPSConnection)
415
419
416 class httpshandler(keepalive.KeepAliveHandler, urllib2.HTTPSHandler):
420 class httpshandler(keepalive.KeepAliveHandler, urllib2.HTTPSHandler):
417 def __init__(self, ui):
421 def __init__(self, ui):
418 keepalive.KeepAliveHandler.__init__(self)
422 keepalive.KeepAliveHandler.__init__(self)
419 urllib2.HTTPSHandler.__init__(self)
423 urllib2.HTTPSHandler.__init__(self)
420 self.ui = ui
424 self.ui = ui
421 self.pwmgr = passwordmgr(self.ui)
425 self.pwmgr = passwordmgr(self.ui)
422
426
423 def https_open(self, req):
427 def https_open(self, req):
424 self.auth = self.pwmgr.readauthtoken(req.get_full_url())
428 self.auth = self.pwmgr.readauthtoken(req.get_full_url())
425 return self.do_open(self._makeconnection, req)
429 return self.do_open(self._makeconnection, req)
426
430
427 def _makeconnection(self, host, port=443, *args, **kwargs):
431 def _makeconnection(self, host, port=443, *args, **kwargs):
428 keyfile = None
432 keyfile = None
429 certfile = None
433 certfile = None
430
434
431 if args: # key_file
435 if args: # key_file
432 keyfile = args.pop(0)
436 keyfile = args.pop(0)
433 if args: # cert_file
437 if args: # cert_file
434 certfile = args.pop(0)
438 certfile = args.pop(0)
435
439
436 # if the user has specified different key/cert files in
440 # if the user has specified different key/cert files in
437 # hgrc, we prefer these
441 # hgrc, we prefer these
438 if self.auth and 'key' in self.auth and 'cert' in self.auth:
442 if self.auth and 'key' in self.auth and 'cert' in self.auth:
439 keyfile = self.auth['key']
443 keyfile = self.auth['key']
440 certfile = self.auth['cert']
444 certfile = self.auth['cert']
441
445
442 # let host port take precedence
446 # let host port take precedence
443 if ':' in host and '[' not in host or ']:' in host:
447 if ':' in host and '[' not in host or ']:' in host:
444 host, port = host.rsplit(':', 1)
448 host, port = host.rsplit(':', 1)
445 port = int(port)
449 port = int(port)
446 if '[' in host:
450 if '[' in host:
447 host = host[1:-1]
451 host = host[1:-1]
448
452
449 return httpsconnection(host, port, keyfile, certfile, *args, **kwargs)
453 return httpsconnection(host, port, keyfile, certfile, *args, **kwargs)
450
454
451 # In python < 2.5 AbstractDigestAuthHandler raises a ValueError if
455 # In python < 2.5 AbstractDigestAuthHandler raises a ValueError if
452 # it doesn't know about the auth type requested. This can happen if
456 # it doesn't know about the auth type requested. This can happen if
453 # somebody is using BasicAuth and types a bad password.
457 # somebody is using BasicAuth and types a bad password.
454 class httpdigestauthhandler(urllib2.HTTPDigestAuthHandler):
458 class httpdigestauthhandler(urllib2.HTTPDigestAuthHandler):
455 def http_error_auth_reqed(self, auth_header, host, req, headers):
459 def http_error_auth_reqed(self, auth_header, host, req, headers):
456 try:
460 try:
457 return urllib2.HTTPDigestAuthHandler.http_error_auth_reqed(
461 return urllib2.HTTPDigestAuthHandler.http_error_auth_reqed(
458 self, auth_header, host, req, headers)
462 self, auth_header, host, req, headers)
459 except ValueError, inst:
463 except ValueError, inst:
460 arg = inst.args[0]
464 arg = inst.args[0]
461 if arg.startswith("AbstractDigestAuthHandler doesn't know "):
465 if arg.startswith("AbstractDigestAuthHandler doesn't know "):
462 return
466 return
463 raise
467 raise
464
468
465 def getauthinfo(path):
469 def getauthinfo(path):
466 scheme, netloc, urlpath, query, frag = urlparse.urlsplit(path)
470 scheme, netloc, urlpath, query, frag = urlparse.urlsplit(path)
467 if not urlpath:
471 if not urlpath:
468 urlpath = '/'
472 urlpath = '/'
469 if scheme != 'file':
473 if scheme != 'file':
470 # XXX: why are we quoting the path again with some smart
474 # XXX: why are we quoting the path again with some smart
471 # heuristic here? Anyway, it cannot be done with file://
475 # heuristic here? Anyway, it cannot be done with file://
472 # urls since path encoding is os/fs dependent (see
476 # urls since path encoding is os/fs dependent (see
473 # urllib.pathname2url() for details).
477 # urllib.pathname2url() for details).
474 urlpath = quotepath(urlpath)
478 urlpath = quotepath(urlpath)
475 host, port, user, passwd = netlocsplit(netloc)
479 host, port, user, passwd = netlocsplit(netloc)
476
480
477 # urllib cannot handle URLs with embedded user or passwd
481 # urllib cannot handle URLs with embedded user or passwd
478 url = urlparse.urlunsplit((scheme, netlocunsplit(host, port),
482 url = urlparse.urlunsplit((scheme, netlocunsplit(host, port),
479 urlpath, query, frag))
483 urlpath, query, frag))
480 if user:
484 if user:
481 netloc = host
485 netloc = host
482 if port:
486 if port:
483 netloc += ':' + port
487 netloc += ':' + port
484 # Python < 2.4.3 uses only the netloc to search for a password
488 # Python < 2.4.3 uses only the netloc to search for a password
485 authinfo = (None, (url, netloc), user, passwd or '')
489 authinfo = (None, (url, netloc), user, passwd or '')
486 else:
490 else:
487 authinfo = None
491 authinfo = None
488 return url, authinfo
492 return url, authinfo
489
493
490 handlerfuncs = []
494 handlerfuncs = []
491
495
492 def opener(ui, authinfo=None):
496 def opener(ui, authinfo=None):
493 '''
497 '''
494 construct an opener suitable for urllib2
498 construct an opener suitable for urllib2
495 authinfo will be added to the password manager
499 authinfo will be added to the password manager
496 '''
500 '''
497 handlers = [httphandler()]
501 handlers = [httphandler()]
498 if has_https:
502 if has_https:
499 handlers.append(httpshandler(ui))
503 handlers.append(httpshandler(ui))
500
504
501 handlers.append(proxyhandler(ui))
505 handlers.append(proxyhandler(ui))
502
506
503 passmgr = passwordmgr(ui)
507 passmgr = passwordmgr(ui)
504 if authinfo is not None:
508 if authinfo is not None:
505 passmgr.add_password(*authinfo)
509 passmgr.add_password(*authinfo)
506 user, passwd = authinfo[2:4]
510 user, passwd = authinfo[2:4]
507 ui.debug('http auth: user %s, password %s\n' %
511 ui.debug('http auth: user %s, password %s\n' %
508 (user, passwd and '*' * len(passwd) or 'not set'))
512 (user, passwd and '*' * len(passwd) or 'not set'))
509
513
510 handlers.extend((urllib2.HTTPBasicAuthHandler(passmgr),
514 handlers.extend((urllib2.HTTPBasicAuthHandler(passmgr),
511 httpdigestauthhandler(passmgr)))
515 httpdigestauthhandler(passmgr)))
512 handlers.extend([h(ui, passmgr) for h in handlerfuncs])
516 handlers.extend([h(ui, passmgr) for h in handlerfuncs])
513 opener = urllib2.build_opener(*handlers)
517 opener = urllib2.build_opener(*handlers)
514
518
515 # 1.0 here is the _protocol_ version
519 # 1.0 here is the _protocol_ version
516 opener.addheaders = [('User-agent', 'mercurial/proto-1.0')]
520 opener.addheaders = [('User-agent', 'mercurial/proto-1.0')]
517 opener.addheaders.append(('Accept', 'application/mercurial-0.1'))
521 opener.addheaders.append(('Accept', 'application/mercurial-0.1'))
518 return opener
522 return opener
519
523
520 scheme_re = re.compile(r'^([a-zA-Z0-9+-.]+)://')
524 scheme_re = re.compile(r'^([a-zA-Z0-9+-.]+)://')
521
525
522 def open(ui, url, data=None):
526 def open(ui, url, data=None):
523 scheme = None
527 scheme = None
524 m = scheme_re.search(url)
528 m = scheme_re.search(url)
525 if m:
529 if m:
526 scheme = m.group(1).lower()
530 scheme = m.group(1).lower()
527 if not scheme:
531 if not scheme:
528 path = util.normpath(os.path.abspath(url))
532 path = util.normpath(os.path.abspath(url))
529 url = 'file://' + urllib.pathname2url(path)
533 url = 'file://' + urllib.pathname2url(path)
530 authinfo = None
534 authinfo = None
531 else:
535 else:
532 url, authinfo = getauthinfo(url)
536 url, authinfo = getauthinfo(url)
533 return opener(ui, authinfo).open(url, data)
537 return opener(ui, authinfo).open(url, data)
General Comments 0
You need to be logged in to leave comments. Login now