##// END OF EJS Templates
httprepo: use separate handlers for HTTP and HTTPS...
Alexis S. L. Carvalho -
r5983:6f1fcbc5 default
parent child Browse files
Show More
@@ -1,459 +1,458
1 # httprepo.py - HTTP repository proxy classes for mercurial
1 # httprepo.py - HTTP repository proxy classes for mercurial
2 #
2 #
3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
4 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
5 #
5 #
6 # This software may be used and distributed according to the terms
6 # This software may be used and distributed according to the terms
7 # of the GNU General Public License, incorporated herein by reference.
7 # of the GNU General Public License, incorporated herein by reference.
8
8
9 from node import *
9 from node import *
10 from remoterepo import *
10 from remoterepo import *
11 from i18n import _
11 from i18n import _
12 import repo, os, urllib, urllib2, urlparse, zlib, util, httplib
12 import repo, os, urllib, urllib2, urlparse, zlib, util, httplib
13 import errno, keepalive, tempfile, socket, changegroup
13 import errno, keepalive, tempfile, socket, changegroup
14
14
15 class passwordmgr(urllib2.HTTPPasswordMgrWithDefaultRealm):
15 class passwordmgr(urllib2.HTTPPasswordMgrWithDefaultRealm):
16 def __init__(self, ui):
16 def __init__(self, ui):
17 urllib2.HTTPPasswordMgrWithDefaultRealm.__init__(self)
17 urllib2.HTTPPasswordMgrWithDefaultRealm.__init__(self)
18 self.ui = ui
18 self.ui = ui
19
19
20 def find_user_password(self, realm, authuri):
20 def find_user_password(self, realm, authuri):
21 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm.find_user_password(
21 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm.find_user_password(
22 self, realm, authuri)
22 self, realm, authuri)
23 user, passwd = authinfo
23 user, passwd = authinfo
24 if user and passwd:
24 if user and passwd:
25 return (user, passwd)
25 return (user, passwd)
26
26
27 if not self.ui.interactive:
27 if not self.ui.interactive:
28 raise util.Abort(_('http authorization required'))
28 raise util.Abort(_('http authorization required'))
29
29
30 self.ui.write(_("http authorization required\n"))
30 self.ui.write(_("http authorization required\n"))
31 self.ui.status(_("realm: %s\n") % realm)
31 self.ui.status(_("realm: %s\n") % realm)
32 if user:
32 if user:
33 self.ui.status(_("user: %s\n") % user)
33 self.ui.status(_("user: %s\n") % user)
34 else:
34 else:
35 user = self.ui.prompt(_("user:"), default=None)
35 user = self.ui.prompt(_("user:"), default=None)
36
36
37 if not passwd:
37 if not passwd:
38 passwd = self.ui.getpass()
38 passwd = self.ui.getpass()
39
39
40 self.add_password(realm, authuri, user, passwd)
40 self.add_password(realm, authuri, user, passwd)
41 return (user, passwd)
41 return (user, passwd)
42
42
43 def netlocsplit(netloc):
43 def netlocsplit(netloc):
44 '''split [user[:passwd]@]host[:port] into 4-tuple.'''
44 '''split [user[:passwd]@]host[:port] into 4-tuple.'''
45
45
46 a = netloc.find('@')
46 a = netloc.find('@')
47 if a == -1:
47 if a == -1:
48 user, passwd = None, None
48 user, passwd = None, None
49 else:
49 else:
50 userpass, netloc = netloc[:a], netloc[a+1:]
50 userpass, netloc = netloc[:a], netloc[a+1:]
51 c = userpass.find(':')
51 c = userpass.find(':')
52 if c == -1:
52 if c == -1:
53 user, passwd = urllib.unquote(userpass), None
53 user, passwd = urllib.unquote(userpass), None
54 else:
54 else:
55 user = urllib.unquote(userpass[:c])
55 user = urllib.unquote(userpass[:c])
56 passwd = urllib.unquote(userpass[c+1:])
56 passwd = urllib.unquote(userpass[c+1:])
57 c = netloc.find(':')
57 c = netloc.find(':')
58 if c == -1:
58 if c == -1:
59 host, port = netloc, None
59 host, port = netloc, None
60 else:
60 else:
61 host, port = netloc[:c], netloc[c+1:]
61 host, port = netloc[:c], netloc[c+1:]
62 return host, port, user, passwd
62 return host, port, user, passwd
63
63
64 def netlocunsplit(host, port, user=None, passwd=None):
64 def netlocunsplit(host, port, user=None, passwd=None):
65 '''turn host, port, user, passwd into [user[:passwd]@]host[:port].'''
65 '''turn host, port, user, passwd into [user[:passwd]@]host[:port].'''
66 if port:
66 if port:
67 hostport = host + ':' + port
67 hostport = host + ':' + port
68 else:
68 else:
69 hostport = host
69 hostport = host
70 if user:
70 if user:
71 if passwd:
71 if passwd:
72 userpass = urllib.quote(user) + ':' + urllib.quote(passwd)
72 userpass = urllib.quote(user) + ':' + urllib.quote(passwd)
73 else:
73 else:
74 userpass = urllib.quote(user)
74 userpass = urllib.quote(user)
75 return userpass + '@' + hostport
75 return userpass + '@' + hostport
76 return hostport
76 return hostport
77
77
78 # work around a bug in Python < 2.4.2
78 # work around a bug in Python < 2.4.2
79 # (it leaves a "\n" at the end of Proxy-authorization headers)
79 # (it leaves a "\n" at the end of Proxy-authorization headers)
80 class request(urllib2.Request):
80 class request(urllib2.Request):
81 def add_header(self, key, val):
81 def add_header(self, key, val):
82 if key.lower() == 'proxy-authorization':
82 if key.lower() == 'proxy-authorization':
83 val = val.strip()
83 val = val.strip()
84 return urllib2.Request.add_header(self, key, val)
84 return urllib2.Request.add_header(self, key, val)
85
85
86 class httpsendfile(file):
86 class httpsendfile(file):
87 def __len__(self):
87 def __len__(self):
88 return os.fstat(self.fileno()).st_size
88 return os.fstat(self.fileno()).st_size
89
89
90 def _gen_sendfile(connection):
90 def _gen_sendfile(connection):
91 def _sendfile(self, data):
91 def _sendfile(self, data):
92 # send a file
92 # send a file
93 if isinstance(data, httpsendfile):
93 if isinstance(data, httpsendfile):
94 # if auth required, some data sent twice, so rewind here
94 # if auth required, some data sent twice, so rewind here
95 data.seek(0)
95 data.seek(0)
96 for chunk in util.filechunkiter(data):
96 for chunk in util.filechunkiter(data):
97 connection.send(self, chunk)
97 connection.send(self, chunk)
98 else:
98 else:
99 connection.send(self, data)
99 connection.send(self, data)
100 return _sendfile
100 return _sendfile
101
101
102 class httpconnection(keepalive.HTTPConnection):
102 class httpconnection(keepalive.HTTPConnection):
103 # must be able to send big bundle as stream.
103 # must be able to send big bundle as stream.
104 send = _gen_sendfile(keepalive.HTTPConnection)
104 send = _gen_sendfile(keepalive.HTTPConnection)
105
105
106 class basehttphandler(keepalive.HTTPHandler):
106 class httphandler(keepalive.HTTPHandler):
107 def http_open(self, req):
107 def http_open(self, req):
108 return self.do_open(httpconnection, req)
108 return self.do_open(httpconnection, req)
109
109
110 def __del__(self):
110 def __del__(self):
111 self.close_all()
111 self.close_all()
112
112
113 has_https = hasattr(urllib2, 'HTTPSHandler')
113 has_https = hasattr(urllib2, 'HTTPSHandler')
114 if has_https:
114 if has_https:
115 class httpsconnection(httplib.HTTPSConnection):
115 class httpsconnection(httplib.HTTPSConnection):
116 response_class = keepalive.HTTPResponse
116 response_class = keepalive.HTTPResponse
117 # must be able to send big bundle as stream.
117 # must be able to send big bundle as stream.
118 send = _gen_sendfile(httplib.HTTPSConnection)
118 send = _gen_sendfile(httplib.HTTPSConnection)
119
119
120 class httphandler(basehttphandler, urllib2.HTTPSHandler):
120 class httpshandler(keepalive.KeepAliveHandler, urllib2.HTTPSHandler):
121 def https_open(self, req):
121 def https_open(self, req):
122 return self.do_open(httpsconnection, req)
122 return self.do_open(httpsconnection, req)
123 else:
124 class httphandler(basehttphandler):
125 pass
126
123
127 # In python < 2.5 AbstractDigestAuthHandler raises a ValueError if
124 # In python < 2.5 AbstractDigestAuthHandler raises a ValueError if
128 # it doesn't know about the auth type requested. This can happen if
125 # it doesn't know about the auth type requested. This can happen if
129 # somebody is using BasicAuth and types a bad password.
126 # somebody is using BasicAuth and types a bad password.
130 class httpdigestauthhandler(urllib2.HTTPDigestAuthHandler):
127 class httpdigestauthhandler(urllib2.HTTPDigestAuthHandler):
131 def http_error_auth_reqed(self, auth_header, host, req, headers):
128 def http_error_auth_reqed(self, auth_header, host, req, headers):
132 try:
129 try:
133 return urllib2.HTTPDigestAuthHandler.http_error_auth_reqed(
130 return urllib2.HTTPDigestAuthHandler.http_error_auth_reqed(
134 self, auth_header, host, req, headers)
131 self, auth_header, host, req, headers)
135 except ValueError, inst:
132 except ValueError, inst:
136 arg = inst.args[0]
133 arg = inst.args[0]
137 if arg.startswith("AbstractDigestAuthHandler doesn't know "):
134 if arg.startswith("AbstractDigestAuthHandler doesn't know "):
138 return
135 return
139 raise
136 raise
140
137
141 def zgenerator(f):
138 def zgenerator(f):
142 zd = zlib.decompressobj()
139 zd = zlib.decompressobj()
143 try:
140 try:
144 for chunk in util.filechunkiter(f):
141 for chunk in util.filechunkiter(f):
145 yield zd.decompress(chunk)
142 yield zd.decompress(chunk)
146 except httplib.HTTPException, inst:
143 except httplib.HTTPException, inst:
147 raise IOError(None, _('connection ended unexpectedly'))
144 raise IOError(None, _('connection ended unexpectedly'))
148 yield zd.flush()
145 yield zd.flush()
149
146
150 _safe = ('abcdefghijklmnopqrstuvwxyz'
147 _safe = ('abcdefghijklmnopqrstuvwxyz'
151 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
148 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
152 '0123456789' '_.-/')
149 '0123456789' '_.-/')
153 _safeset = None
150 _safeset = None
154 _hex = None
151 _hex = None
155 def quotepath(path):
152 def quotepath(path):
156 '''quote the path part of a URL
153 '''quote the path part of a URL
157
154
158 This is similar to urllib.quote, but it also tries to avoid
155 This is similar to urllib.quote, but it also tries to avoid
159 quoting things twice (inspired by wget):
156 quoting things twice (inspired by wget):
160
157
161 >>> quotepath('abc def')
158 >>> quotepath('abc def')
162 'abc%20def'
159 'abc%20def'
163 >>> quotepath('abc%20def')
160 >>> quotepath('abc%20def')
164 'abc%20def'
161 'abc%20def'
165 >>> quotepath('abc%20 def')
162 >>> quotepath('abc%20 def')
166 'abc%20%20def'
163 'abc%20%20def'
167 >>> quotepath('abc def%20')
164 >>> quotepath('abc def%20')
168 'abc%20def%20'
165 'abc%20def%20'
169 >>> quotepath('abc def%2')
166 >>> quotepath('abc def%2')
170 'abc%20def%252'
167 'abc%20def%252'
171 >>> quotepath('abc def%')
168 >>> quotepath('abc def%')
172 'abc%20def%25'
169 'abc%20def%25'
173 '''
170 '''
174 global _safeset, _hex
171 global _safeset, _hex
175 if _safeset is None:
172 if _safeset is None:
176 _safeset = util.set(_safe)
173 _safeset = util.set(_safe)
177 _hex = util.set('abcdefABCDEF0123456789')
174 _hex = util.set('abcdefABCDEF0123456789')
178 l = list(path)
175 l = list(path)
179 for i in xrange(len(l)):
176 for i in xrange(len(l)):
180 c = l[i]
177 c = l[i]
181 if c == '%' and i + 2 < len(l) and (l[i+1] in _hex and l[i+2] in _hex):
178 if c == '%' and i + 2 < len(l) and (l[i+1] in _hex and l[i+2] in _hex):
182 pass
179 pass
183 elif c not in _safeset:
180 elif c not in _safeset:
184 l[i] = '%%%02X' % ord(c)
181 l[i] = '%%%02X' % ord(c)
185 return ''.join(l)
182 return ''.join(l)
186
183
187 class httprepository(remoterepository):
184 class httprepository(remoterepository):
188 def __init__(self, ui, path):
185 def __init__(self, ui, path):
189 self.path = path
186 self.path = path
190 self.caps = None
187 self.caps = None
191 self.handler = None
188 self.handler = None
192 scheme, netloc, urlpath, query, frag = urlparse.urlsplit(path)
189 scheme, netloc, urlpath, query, frag = urlparse.urlsplit(path)
193 if query or frag:
190 if query or frag:
194 raise util.Abort(_('unsupported URL component: "%s"') %
191 raise util.Abort(_('unsupported URL component: "%s"') %
195 (query or frag))
192 (query or frag))
196 if not urlpath:
193 if not urlpath:
197 urlpath = '/'
194 urlpath = '/'
198 urlpath = quotepath(urlpath)
195 urlpath = quotepath(urlpath)
199 host, port, user, passwd = netlocsplit(netloc)
196 host, port, user, passwd = netlocsplit(netloc)
200
197
201 # urllib cannot handle URLs with embedded user or passwd
198 # urllib cannot handle URLs with embedded user or passwd
202 self._url = urlparse.urlunsplit((scheme, netlocunsplit(host, port),
199 self._url = urlparse.urlunsplit((scheme, netlocunsplit(host, port),
203 urlpath, '', ''))
200 urlpath, '', ''))
204 self.ui = ui
201 self.ui = ui
205 self.ui.debug(_('using %s\n') % self._url)
202 self.ui.debug(_('using %s\n') % self._url)
206
203
207 proxyurl = ui.config("http_proxy", "host") or os.getenv('http_proxy')
204 proxyurl = ui.config("http_proxy", "host") or os.getenv('http_proxy')
208 # XXX proxyauthinfo = None
205 # XXX proxyauthinfo = None
209 handlers = [httphandler()]
206 handlers = [httphandler()]
207 if has_https:
208 handlers.append(httpshandler())
210
209
211 if proxyurl:
210 if proxyurl:
212 # proxy can be proper url or host[:port]
211 # proxy can be proper url or host[:port]
213 if not (proxyurl.startswith('http:') or
212 if not (proxyurl.startswith('http:') or
214 proxyurl.startswith('https:')):
213 proxyurl.startswith('https:')):
215 proxyurl = 'http://' + proxyurl + '/'
214 proxyurl = 'http://' + proxyurl + '/'
216 snpqf = urlparse.urlsplit(proxyurl)
215 snpqf = urlparse.urlsplit(proxyurl)
217 proxyscheme, proxynetloc, proxypath, proxyquery, proxyfrag = snpqf
216 proxyscheme, proxynetloc, proxypath, proxyquery, proxyfrag = snpqf
218 hpup = netlocsplit(proxynetloc)
217 hpup = netlocsplit(proxynetloc)
219
218
220 proxyhost, proxyport, proxyuser, proxypasswd = hpup
219 proxyhost, proxyport, proxyuser, proxypasswd = hpup
221 if not proxyuser:
220 if not proxyuser:
222 proxyuser = ui.config("http_proxy", "user")
221 proxyuser = ui.config("http_proxy", "user")
223 proxypasswd = ui.config("http_proxy", "passwd")
222 proxypasswd = ui.config("http_proxy", "passwd")
224
223
225 # see if we should use a proxy for this url
224 # see if we should use a proxy for this url
226 no_list = [ "localhost", "127.0.0.1" ]
225 no_list = [ "localhost", "127.0.0.1" ]
227 no_list.extend([p.lower() for
226 no_list.extend([p.lower() for
228 p in ui.configlist("http_proxy", "no")])
227 p in ui.configlist("http_proxy", "no")])
229 no_list.extend([p.strip().lower() for
228 no_list.extend([p.strip().lower() for
230 p in os.getenv("no_proxy", '').split(',')
229 p in os.getenv("no_proxy", '').split(',')
231 if p.strip()])
230 if p.strip()])
232 # "http_proxy.always" config is for running tests on localhost
231 # "http_proxy.always" config is for running tests on localhost
233 if (not ui.configbool("http_proxy", "always") and
232 if (not ui.configbool("http_proxy", "always") and
234 host.lower() in no_list):
233 host.lower() in no_list):
235 # avoid auto-detection of proxy settings by appending
234 # avoid auto-detection of proxy settings by appending
236 # a ProxyHandler with no proxies defined.
235 # a ProxyHandler with no proxies defined.
237 handlers.append(urllib2.ProxyHandler({}))
236 handlers.append(urllib2.ProxyHandler({}))
238 ui.debug(_('disabling proxy for %s\n') % host)
237 ui.debug(_('disabling proxy for %s\n') % host)
239 else:
238 else:
240 proxyurl = urlparse.urlunsplit((
239 proxyurl = urlparse.urlunsplit((
241 proxyscheme, netlocunsplit(proxyhost, proxyport,
240 proxyscheme, netlocunsplit(proxyhost, proxyport,
242 proxyuser, proxypasswd or ''),
241 proxyuser, proxypasswd or ''),
243 proxypath, proxyquery, proxyfrag))
242 proxypath, proxyquery, proxyfrag))
244 handlers.append(urllib2.ProxyHandler({scheme: proxyurl}))
243 handlers.append(urllib2.ProxyHandler({scheme: proxyurl}))
245 ui.debug(_('proxying through http://%s:%s\n') %
244 ui.debug(_('proxying through http://%s:%s\n') %
246 (proxyhost, proxyport))
245 (proxyhost, proxyport))
247
246
248 # urllib2 takes proxy values from the environment and those
247 # urllib2 takes proxy values from the environment and those
249 # will take precedence if found, so drop them
248 # will take precedence if found, so drop them
250 for env in ["HTTP_PROXY", "http_proxy", "no_proxy"]:
249 for env in ["HTTP_PROXY", "http_proxy", "no_proxy"]:
251 try:
250 try:
252 if os.environ.has_key(env):
251 if os.environ.has_key(env):
253 del os.environ[env]
252 del os.environ[env]
254 except OSError:
253 except OSError:
255 pass
254 pass
256
255
257 passmgr = passwordmgr(ui)
256 passmgr = passwordmgr(ui)
258 if user:
257 if user:
259 ui.debug(_('http auth: user %s, password %s\n') %
258 ui.debug(_('http auth: user %s, password %s\n') %
260 (user, passwd and '*' * len(passwd) or 'not set'))
259 (user, passwd and '*' * len(passwd) or 'not set'))
261 netloc = host
260 netloc = host
262 if port:
261 if port:
263 netloc += ':' + port
262 netloc += ':' + port
264 # Python < 2.4.3 uses only the netloc to search for a password
263 # Python < 2.4.3 uses only the netloc to search for a password
265 passmgr.add_password(None, (self._url, netloc), user, passwd or '')
264 passmgr.add_password(None, (self._url, netloc), user, passwd or '')
266
265
267 handlers.extend((urllib2.HTTPBasicAuthHandler(passmgr),
266 handlers.extend((urllib2.HTTPBasicAuthHandler(passmgr),
268 httpdigestauthhandler(passmgr)))
267 httpdigestauthhandler(passmgr)))
269 opener = urllib2.build_opener(*handlers)
268 opener = urllib2.build_opener(*handlers)
270
269
271 # 1.0 here is the _protocol_ version
270 # 1.0 here is the _protocol_ version
272 opener.addheaders = [('User-agent', 'mercurial/proto-1.0')]
271 opener.addheaders = [('User-agent', 'mercurial/proto-1.0')]
273 urllib2.install_opener(opener)
272 urllib2.install_opener(opener)
274
273
275 def url(self):
274 def url(self):
276 return self.path
275 return self.path
277
276
278 # look up capabilities only when needed
277 # look up capabilities only when needed
279
278
280 def get_caps(self):
279 def get_caps(self):
281 if self.caps is None:
280 if self.caps is None:
282 try:
281 try:
283 self.caps = util.set(self.do_read('capabilities').split())
282 self.caps = util.set(self.do_read('capabilities').split())
284 except repo.RepoError:
283 except repo.RepoError:
285 self.caps = util.set()
284 self.caps = util.set()
286 self.ui.debug(_('capabilities: %s\n') %
285 self.ui.debug(_('capabilities: %s\n') %
287 (' '.join(self.caps or ['none'])))
286 (' '.join(self.caps or ['none'])))
288 return self.caps
287 return self.caps
289
288
290 capabilities = property(get_caps)
289 capabilities = property(get_caps)
291
290
292 def lock(self):
291 def lock(self):
293 raise util.Abort(_('operation not supported over http'))
292 raise util.Abort(_('operation not supported over http'))
294
293
295 def do_cmd(self, cmd, **args):
294 def do_cmd(self, cmd, **args):
296 data = args.pop('data', None)
295 data = args.pop('data', None)
297 headers = args.pop('headers', {})
296 headers = args.pop('headers', {})
298 self.ui.debug(_("sending %s command\n") % cmd)
297 self.ui.debug(_("sending %s command\n") % cmd)
299 q = {"cmd": cmd}
298 q = {"cmd": cmd}
300 q.update(args)
299 q.update(args)
301 qs = '?%s' % urllib.urlencode(q)
300 qs = '?%s' % urllib.urlencode(q)
302 cu = "%s%s" % (self._url, qs)
301 cu = "%s%s" % (self._url, qs)
303 try:
302 try:
304 if data:
303 if data:
305 self.ui.debug(_("sending %s bytes\n") % len(data))
304 self.ui.debug(_("sending %s bytes\n") % len(data))
306 resp = urllib2.urlopen(request(cu, data, headers))
305 resp = urllib2.urlopen(request(cu, data, headers))
307 except urllib2.HTTPError, inst:
306 except urllib2.HTTPError, inst:
308 if inst.code == 401:
307 if inst.code == 401:
309 raise util.Abort(_('authorization failed'))
308 raise util.Abort(_('authorization failed'))
310 raise
309 raise
311 except httplib.HTTPException, inst:
310 except httplib.HTTPException, inst:
312 self.ui.debug(_('http error while sending %s command\n') % cmd)
311 self.ui.debug(_('http error while sending %s command\n') % cmd)
313 self.ui.print_exc()
312 self.ui.print_exc()
314 raise IOError(None, inst)
313 raise IOError(None, inst)
315 except IndexError:
314 except IndexError:
316 # this only happens with Python 2.3, later versions raise URLError
315 # this only happens with Python 2.3, later versions raise URLError
317 raise util.Abort(_('http error, possibly caused by proxy setting'))
316 raise util.Abort(_('http error, possibly caused by proxy setting'))
318 # record the url we got redirected to
317 # record the url we got redirected to
319 resp_url = resp.geturl()
318 resp_url = resp.geturl()
320 if resp_url.endswith(qs):
319 if resp_url.endswith(qs):
321 resp_url = resp_url[:-len(qs)]
320 resp_url = resp_url[:-len(qs)]
322 if self._url != resp_url:
321 if self._url != resp_url:
323 self.ui.status(_('real URL is %s\n') % resp_url)
322 self.ui.status(_('real URL is %s\n') % resp_url)
324 self._url = resp_url
323 self._url = resp_url
325 try:
324 try:
326 proto = resp.getheader('content-type')
325 proto = resp.getheader('content-type')
327 except AttributeError:
326 except AttributeError:
328 proto = resp.headers['content-type']
327 proto = resp.headers['content-type']
329
328
330 # accept old "text/plain" and "application/hg-changegroup" for now
329 # accept old "text/plain" and "application/hg-changegroup" for now
331 if not (proto.startswith('application/mercurial-') or
330 if not (proto.startswith('application/mercurial-') or
332 proto.startswith('text/plain') or
331 proto.startswith('text/plain') or
333 proto.startswith('application/hg-changegroup')):
332 proto.startswith('application/hg-changegroup')):
334 self.ui.debug(_("Requested URL: '%s'\n") % cu)
333 self.ui.debug(_("Requested URL: '%s'\n") % cu)
335 raise repo.RepoError(_("'%s' does not appear to be an hg repository")
334 raise repo.RepoError(_("'%s' does not appear to be an hg repository")
336 % self._url)
335 % self._url)
337
336
338 if proto.startswith('application/mercurial-'):
337 if proto.startswith('application/mercurial-'):
339 try:
338 try:
340 version = proto.split('-', 1)[1]
339 version = proto.split('-', 1)[1]
341 version_info = tuple([int(n) for n in version.split('.')])
340 version_info = tuple([int(n) for n in version.split('.')])
342 except ValueError:
341 except ValueError:
343 raise repo.RepoError(_("'%s' sent a broken Content-type "
342 raise repo.RepoError(_("'%s' sent a broken Content-type "
344 "header (%s)") % (self._url, proto))
343 "header (%s)") % (self._url, proto))
345 if version_info > (0, 1):
344 if version_info > (0, 1):
346 raise repo.RepoError(_("'%s' uses newer protocol %s") %
345 raise repo.RepoError(_("'%s' uses newer protocol %s") %
347 (self._url, version))
346 (self._url, version))
348
347
349 return resp
348 return resp
350
349
351 def do_read(self, cmd, **args):
350 def do_read(self, cmd, **args):
352 fp = self.do_cmd(cmd, **args)
351 fp = self.do_cmd(cmd, **args)
353 try:
352 try:
354 return fp.read()
353 return fp.read()
355 finally:
354 finally:
356 # if using keepalive, allow connection to be reused
355 # if using keepalive, allow connection to be reused
357 fp.close()
356 fp.close()
358
357
359 def lookup(self, key):
358 def lookup(self, key):
360 self.requirecap('lookup', _('look up remote revision'))
359 self.requirecap('lookup', _('look up remote revision'))
361 d = self.do_cmd("lookup", key = key).read()
360 d = self.do_cmd("lookup", key = key).read()
362 success, data = d[:-1].split(' ', 1)
361 success, data = d[:-1].split(' ', 1)
363 if int(success):
362 if int(success):
364 return bin(data)
363 return bin(data)
365 raise repo.RepoError(data)
364 raise repo.RepoError(data)
366
365
367 def heads(self):
366 def heads(self):
368 d = self.do_read("heads")
367 d = self.do_read("heads")
369 try:
368 try:
370 return map(bin, d[:-1].split(" "))
369 return map(bin, d[:-1].split(" "))
371 except:
370 except:
372 raise util.UnexpectedOutput(_("unexpected response:"), d)
371 raise util.UnexpectedOutput(_("unexpected response:"), d)
373
372
374 def branches(self, nodes):
373 def branches(self, nodes):
375 n = " ".join(map(hex, nodes))
374 n = " ".join(map(hex, nodes))
376 d = self.do_read("branches", nodes=n)
375 d = self.do_read("branches", nodes=n)
377 try:
376 try:
378 br = [ tuple(map(bin, b.split(" "))) for b in d.splitlines() ]
377 br = [ tuple(map(bin, b.split(" "))) for b in d.splitlines() ]
379 return br
378 return br
380 except:
379 except:
381 raise util.UnexpectedOutput(_("unexpected response:"), d)
380 raise util.UnexpectedOutput(_("unexpected response:"), d)
382
381
383 def between(self, pairs):
382 def between(self, pairs):
384 n = "\n".join(["-".join(map(hex, p)) for p in pairs])
383 n = "\n".join(["-".join(map(hex, p)) for p in pairs])
385 d = self.do_read("between", pairs=n)
384 d = self.do_read("between", pairs=n)
386 try:
385 try:
387 p = [ l and map(bin, l.split(" ")) or [] for l in d.splitlines() ]
386 p = [ l and map(bin, l.split(" ")) or [] for l in d.splitlines() ]
388 return p
387 return p
389 except:
388 except:
390 raise util.UnexpectedOutput(_("unexpected response:"), d)
389 raise util.UnexpectedOutput(_("unexpected response:"), d)
391
390
392 def changegroup(self, nodes, kind):
391 def changegroup(self, nodes, kind):
393 n = " ".join(map(hex, nodes))
392 n = " ".join(map(hex, nodes))
394 f = self.do_cmd("changegroup", roots=n)
393 f = self.do_cmd("changegroup", roots=n)
395 return util.chunkbuffer(zgenerator(f))
394 return util.chunkbuffer(zgenerator(f))
396
395
397 def changegroupsubset(self, bases, heads, source):
396 def changegroupsubset(self, bases, heads, source):
398 self.requirecap('changegroupsubset', _('look up remote changes'))
397 self.requirecap('changegroupsubset', _('look up remote changes'))
399 baselst = " ".join([hex(n) for n in bases])
398 baselst = " ".join([hex(n) for n in bases])
400 headlst = " ".join([hex(n) for n in heads])
399 headlst = " ".join([hex(n) for n in heads])
401 f = self.do_cmd("changegroupsubset", bases=baselst, heads=headlst)
400 f = self.do_cmd("changegroupsubset", bases=baselst, heads=headlst)
402 return util.chunkbuffer(zgenerator(f))
401 return util.chunkbuffer(zgenerator(f))
403
402
404 def unbundle(self, cg, heads, source):
403 def unbundle(self, cg, heads, source):
405 # have to stream bundle to a temp file because we do not have
404 # have to stream bundle to a temp file because we do not have
406 # http 1.1 chunked transfer.
405 # http 1.1 chunked transfer.
407
406
408 type = ""
407 type = ""
409 types = self.capable('unbundle')
408 types = self.capable('unbundle')
410 # servers older than d1b16a746db6 will send 'unbundle' as a
409 # servers older than d1b16a746db6 will send 'unbundle' as a
411 # boolean capability
410 # boolean capability
412 try:
411 try:
413 types = types.split(',')
412 types = types.split(',')
414 except AttributeError:
413 except AttributeError:
415 types = [""]
414 types = [""]
416 if types:
415 if types:
417 for x in types:
416 for x in types:
418 if x in changegroup.bundletypes:
417 if x in changegroup.bundletypes:
419 type = x
418 type = x
420 break
419 break
421
420
422 tempname = changegroup.writebundle(cg, None, type)
421 tempname = changegroup.writebundle(cg, None, type)
423 fp = httpsendfile(tempname, "rb")
422 fp = httpsendfile(tempname, "rb")
424 try:
423 try:
425 try:
424 try:
426 rfp = self.do_cmd(
425 rfp = self.do_cmd(
427 'unbundle', data=fp,
426 'unbundle', data=fp,
428 headers={'content-type': 'application/octet-stream'},
427 headers={'content-type': 'application/octet-stream'},
429 heads=' '.join(map(hex, heads)))
428 heads=' '.join(map(hex, heads)))
430 try:
429 try:
431 ret = int(rfp.readline())
430 ret = int(rfp.readline())
432 self.ui.write(rfp.read())
431 self.ui.write(rfp.read())
433 return ret
432 return ret
434 finally:
433 finally:
435 rfp.close()
434 rfp.close()
436 except socket.error, err:
435 except socket.error, err:
437 if err[0] in (errno.ECONNRESET, errno.EPIPE):
436 if err[0] in (errno.ECONNRESET, errno.EPIPE):
438 raise util.Abort(_('push failed: %s') % err[1])
437 raise util.Abort(_('push failed: %s') % err[1])
439 raise util.Abort(err[1])
438 raise util.Abort(err[1])
440 finally:
439 finally:
441 fp.close()
440 fp.close()
442 os.unlink(tempname)
441 os.unlink(tempname)
443
442
444 def stream_out(self):
443 def stream_out(self):
445 return self.do_cmd('stream_out')
444 return self.do_cmd('stream_out')
446
445
447 class httpsrepository(httprepository):
446 class httpsrepository(httprepository):
448 def __init__(self, ui, path):
447 def __init__(self, ui, path):
449 if not has_https:
448 if not has_https:
450 raise util.Abort(_('Python support for SSL and HTTPS '
449 raise util.Abort(_('Python support for SSL and HTTPS '
451 'is not installed'))
450 'is not installed'))
452 httprepository.__init__(self, ui, path)
451 httprepository.__init__(self, ui, path)
453
452
454 def instance(ui, path, create):
453 def instance(ui, path, create):
455 if create:
454 if create:
456 raise util.Abort(_('cannot create new http repository'))
455 raise util.Abort(_('cannot create new http repository'))
457 if path.startswith('https:'):
456 if path.startswith('https:'):
458 return httpsrepository(ui, path)
457 return httpsrepository(ui, path)
459 return httprepository(ui, path)
458 return httprepository(ui, path)
@@ -1,579 +1,582
1 # This library is free software; you can redistribute it and/or
1 # This library is free software; you can redistribute it and/or
2 # modify it under the terms of the GNU Lesser General Public
2 # modify it under the terms of the GNU Lesser General Public
3 # License as published by the Free Software Foundation; either
3 # License as published by the Free Software Foundation; either
4 # version 2.1 of the License, or (at your option) any later version.
4 # version 2.1 of the License, or (at your option) any later version.
5 #
5 #
6 # This library is distributed in the hope that it will be useful,
6 # This library is distributed in the hope that it will be useful,
7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9 # Lesser General Public License for more details.
9 # Lesser General Public License for more details.
10 #
10 #
11 # You should have received a copy of the GNU Lesser General Public
11 # You should have received a copy of the GNU Lesser General Public
12 # License along with this library; if not, write to the
12 # License along with this library; if not, write to the
13 # Free Software Foundation, Inc.,
13 # Free Software Foundation, Inc.,
14 # 59 Temple Place, Suite 330,
14 # 59 Temple Place, Suite 330,
15 # Boston, MA 02111-1307 USA
15 # Boston, MA 02111-1307 USA
16
16
17 # This file is part of urlgrabber, a high-level cross-protocol url-grabber
17 # This file is part of urlgrabber, a high-level cross-protocol url-grabber
18 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
18 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
19
19
20 # Modified by Benoit Boissinot:
20 # Modified by Benoit Boissinot:
21 # - fix for digest auth (inspired from urllib2.py @ Python v2.4)
21 # - fix for digest auth (inspired from urllib2.py @ Python v2.4)
22
22
23 """An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive.
23 """An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive.
24
24
25 >>> import urllib2
25 >>> import urllib2
26 >>> from keepalive import HTTPHandler
26 >>> from keepalive import HTTPHandler
27 >>> keepalive_handler = HTTPHandler()
27 >>> keepalive_handler = HTTPHandler()
28 >>> opener = urllib2.build_opener(keepalive_handler)
28 >>> opener = urllib2.build_opener(keepalive_handler)
29 >>> urllib2.install_opener(opener)
29 >>> urllib2.install_opener(opener)
30 >>>
30 >>>
31 >>> fo = urllib2.urlopen('http://www.python.org')
31 >>> fo = urllib2.urlopen('http://www.python.org')
32
32
33 If a connection to a given host is requested, and all of the existing
33 If a connection to a given host is requested, and all of the existing
34 connections are still in use, another connection will be opened. If
34 connections are still in use, another connection will be opened. If
35 the handler tries to use an existing connection but it fails in some
35 the handler tries to use an existing connection but it fails in some
36 way, it will be closed and removed from the pool.
36 way, it will be closed and removed from the pool.
37
37
38 To remove the handler, simply re-run build_opener with no arguments, and
38 To remove the handler, simply re-run build_opener with no arguments, and
39 install that opener.
39 install that opener.
40
40
41 You can explicitly close connections by using the close_connection()
41 You can explicitly close connections by using the close_connection()
42 method of the returned file-like object (described below) or you can
42 method of the returned file-like object (described below) or you can
43 use the handler methods:
43 use the handler methods:
44
44
45 close_connection(host)
45 close_connection(host)
46 close_all()
46 close_all()
47 open_connections()
47 open_connections()
48
48
49 NOTE: using the close_connection and close_all methods of the handler
49 NOTE: using the close_connection and close_all methods of the handler
50 should be done with care when using multiple threads.
50 should be done with care when using multiple threads.
51 * there is nothing that prevents another thread from creating new
51 * there is nothing that prevents another thread from creating new
52 connections immediately after connections are closed
52 connections immediately after connections are closed
53 * no checks are done to prevent in-use connections from being closed
53 * no checks are done to prevent in-use connections from being closed
54
54
55 >>> keepalive_handler.close_all()
55 >>> keepalive_handler.close_all()
56
56
57 EXTRA ATTRIBUTES AND METHODS
57 EXTRA ATTRIBUTES AND METHODS
58
58
59 Upon a status of 200, the object returned has a few additional
59 Upon a status of 200, the object returned has a few additional
60 attributes and methods, which should not be used if you want to
60 attributes and methods, which should not be used if you want to
61 remain consistent with the normal urllib2-returned objects:
61 remain consistent with the normal urllib2-returned objects:
62
62
63 close_connection() - close the connection to the host
63 close_connection() - close the connection to the host
64 readlines() - you know, readlines()
64 readlines() - you know, readlines()
65 status - the return status (ie 404)
65 status - the return status (ie 404)
66 reason - english translation of status (ie 'File not found')
66 reason - english translation of status (ie 'File not found')
67
67
68 If you want the best of both worlds, use this inside an
68 If you want the best of both worlds, use this inside an
69 AttributeError-catching try:
69 AttributeError-catching try:
70
70
71 >>> try: status = fo.status
71 >>> try: status = fo.status
72 >>> except AttributeError: status = None
72 >>> except AttributeError: status = None
73
73
74 Unfortunately, these are ONLY there if status == 200, so it's not
74 Unfortunately, these are ONLY there if status == 200, so it's not
75 easy to distinguish between non-200 responses. The reason is that
75 easy to distinguish between non-200 responses. The reason is that
76 urllib2 tries to do clever things with error codes 301, 302, 401,
76 urllib2 tries to do clever things with error codes 301, 302, 401,
77 and 407, and it wraps the object upon return.
77 and 407, and it wraps the object upon return.
78
78
79 For python versions earlier than 2.4, you can avoid this fancy error
79 For python versions earlier than 2.4, you can avoid this fancy error
80 handling by setting the module-level global HANDLE_ERRORS to zero.
80 handling by setting the module-level global HANDLE_ERRORS to zero.
81 You see, prior to 2.4, it's the HTTP Handler's job to determine what
81 You see, prior to 2.4, it's the HTTP Handler's job to determine what
82 to handle specially, and what to just pass up. HANDLE_ERRORS == 0
82 to handle specially, and what to just pass up. HANDLE_ERRORS == 0
83 means "pass everything up". In python 2.4, however, this job no
83 means "pass everything up". In python 2.4, however, this job no
84 longer belongs to the HTTP Handler and is now done by a NEW handler,
84 longer belongs to the HTTP Handler and is now done by a NEW handler,
85 HTTPErrorProcessor. Here's the bottom line:
85 HTTPErrorProcessor. Here's the bottom line:
86
86
87 python version < 2.4
87 python version < 2.4
88 HANDLE_ERRORS == 1 (default) pass up 200, treat the rest as
88 HANDLE_ERRORS == 1 (default) pass up 200, treat the rest as
89 errors
89 errors
90 HANDLE_ERRORS == 0 pass everything up, error processing is
90 HANDLE_ERRORS == 0 pass everything up, error processing is
91 left to the calling code
91 left to the calling code
92 python version >= 2.4
92 python version >= 2.4
93 HANDLE_ERRORS == 1 pass up 200, treat the rest as errors
93 HANDLE_ERRORS == 1 pass up 200, treat the rest as errors
94 HANDLE_ERRORS == 0 (default) pass everything up, let the
94 HANDLE_ERRORS == 0 (default) pass everything up, let the
95 other handlers (specifically,
95 other handlers (specifically,
96 HTTPErrorProcessor) decide what to do
96 HTTPErrorProcessor) decide what to do
97
97
98 In practice, setting the variable either way makes little difference
98 In practice, setting the variable either way makes little difference
99 in python 2.4, so for the most consistent behavior across versions,
99 in python 2.4, so for the most consistent behavior across versions,
100 you probably just want to use the defaults, which will give you
100 you probably just want to use the defaults, which will give you
101 exceptions on errors.
101 exceptions on errors.
102
102
103 """
103 """
104
104
105 # $Id: keepalive.py,v 1.14 2006/04/04 21:00:32 mstenner Exp $
105 # $Id: keepalive.py,v 1.14 2006/04/04 21:00:32 mstenner Exp $
106
106
107 import urllib2
107 import urllib2
108 import httplib
108 import httplib
109 import socket
109 import socket
110 import thread
110 import thread
111
111
112 DEBUG = None
112 DEBUG = None
113
113
114 import sys
114 import sys
115 if sys.version_info < (2, 4): HANDLE_ERRORS = 1
115 if sys.version_info < (2, 4): HANDLE_ERRORS = 1
116 else: HANDLE_ERRORS = 0
116 else: HANDLE_ERRORS = 0
117
117
118 class ConnectionManager:
118 class ConnectionManager:
119 """
119 """
120 The connection manager must be able to:
120 The connection manager must be able to:
121 * keep track of all existing
121 * keep track of all existing
122 """
122 """
123 def __init__(self):
123 def __init__(self):
124 self._lock = thread.allocate_lock()
124 self._lock = thread.allocate_lock()
125 self._hostmap = {} # map hosts to a list of connections
125 self._hostmap = {} # map hosts to a list of connections
126 self._connmap = {} # map connections to host
126 self._connmap = {} # map connections to host
127 self._readymap = {} # map connection to ready state
127 self._readymap = {} # map connection to ready state
128
128
129 def add(self, host, connection, ready):
129 def add(self, host, connection, ready):
130 self._lock.acquire()
130 self._lock.acquire()
131 try:
131 try:
132 if not self._hostmap.has_key(host): self._hostmap[host] = []
132 if not self._hostmap.has_key(host): self._hostmap[host] = []
133 self._hostmap[host].append(connection)
133 self._hostmap[host].append(connection)
134 self._connmap[connection] = host
134 self._connmap[connection] = host
135 self._readymap[connection] = ready
135 self._readymap[connection] = ready
136 finally:
136 finally:
137 self._lock.release()
137 self._lock.release()
138
138
139 def remove(self, connection):
139 def remove(self, connection):
140 self._lock.acquire()
140 self._lock.acquire()
141 try:
141 try:
142 try:
142 try:
143 host = self._connmap[connection]
143 host = self._connmap[connection]
144 except KeyError:
144 except KeyError:
145 pass
145 pass
146 else:
146 else:
147 del self._connmap[connection]
147 del self._connmap[connection]
148 del self._readymap[connection]
148 del self._readymap[connection]
149 self._hostmap[host].remove(connection)
149 self._hostmap[host].remove(connection)
150 if not self._hostmap[host]: del self._hostmap[host]
150 if not self._hostmap[host]: del self._hostmap[host]
151 finally:
151 finally:
152 self._lock.release()
152 self._lock.release()
153
153
154 def set_ready(self, connection, ready):
154 def set_ready(self, connection, ready):
155 try: self._readymap[connection] = ready
155 try: self._readymap[connection] = ready
156 except KeyError: pass
156 except KeyError: pass
157
157
158 def get_ready_conn(self, host):
158 def get_ready_conn(self, host):
159 conn = None
159 conn = None
160 self._lock.acquire()
160 self._lock.acquire()
161 try:
161 try:
162 if self._hostmap.has_key(host):
162 if self._hostmap.has_key(host):
163 for c in self._hostmap[host]:
163 for c in self._hostmap[host]:
164 if self._readymap[c]:
164 if self._readymap[c]:
165 self._readymap[c] = 0
165 self._readymap[c] = 0
166 conn = c
166 conn = c
167 break
167 break
168 finally:
168 finally:
169 self._lock.release()
169 self._lock.release()
170 return conn
170 return conn
171
171
172 def get_all(self, host=None):
172 def get_all(self, host=None):
173 if host:
173 if host:
174 return list(self._hostmap.get(host, []))
174 return list(self._hostmap.get(host, []))
175 else:
175 else:
176 return dict(self._hostmap)
176 return dict(self._hostmap)
177
177
178 class HTTPHandler(urllib2.HTTPHandler):
178 class KeepAliveHandler:
179 def __init__(self):
179 def __init__(self):
180 self._cm = ConnectionManager()
180 self._cm = ConnectionManager()
181
181
182 #### Connection Management
182 #### Connection Management
183 def open_connections(self):
183 def open_connections(self):
184 """return a list of connected hosts and the number of connections
184 """return a list of connected hosts and the number of connections
185 to each. [('foo.com:80', 2), ('bar.org', 1)]"""
185 to each. [('foo.com:80', 2), ('bar.org', 1)]"""
186 return [(host, len(li)) for (host, li) in self._cm.get_all().items()]
186 return [(host, len(li)) for (host, li) in self._cm.get_all().items()]
187
187
188 def close_connection(self, host):
188 def close_connection(self, host):
189 """close connection(s) to <host>
189 """close connection(s) to <host>
190 host is the host:port spec, as in 'www.cnn.com:8080' as passed in.
190 host is the host:port spec, as in 'www.cnn.com:8080' as passed in.
191 no error occurs if there is no connection to that host."""
191 no error occurs if there is no connection to that host."""
192 for h in self._cm.get_all(host):
192 for h in self._cm.get_all(host):
193 self._cm.remove(h)
193 self._cm.remove(h)
194 h.close()
194 h.close()
195
195
196 def close_all(self):
196 def close_all(self):
197 """close all open connections"""
197 """close all open connections"""
198 for host, conns in self._cm.get_all().items():
198 for host, conns in self._cm.get_all().items():
199 for h in conns:
199 for h in conns:
200 self._cm.remove(h)
200 self._cm.remove(h)
201 h.close()
201 h.close()
202
202
203 def _request_closed(self, request, host, connection):
203 def _request_closed(self, request, host, connection):
204 """tells us that this request is now closed and the the
204 """tells us that this request is now closed and the the
205 connection is ready for another request"""
205 connection is ready for another request"""
206 self._cm.set_ready(connection, 1)
206 self._cm.set_ready(connection, 1)
207
207
208 def _remove_connection(self, host, connection, close=0):
208 def _remove_connection(self, host, connection, close=0):
209 if close: connection.close()
209 if close: connection.close()
210 self._cm.remove(connection)
210 self._cm.remove(connection)
211
211
212 #### Transaction Execution
212 #### Transaction Execution
213 def http_open(self, req):
213 def http_open(self, req):
214 return self.do_open(HTTPConnection, req)
214 return self.do_open(HTTPConnection, req)
215
215
216 def do_open(self, http_class, req):
216 def do_open(self, http_class, req):
217 host = req.get_host()
217 host = req.get_host()
218 if not host:
218 if not host:
219 raise urllib2.URLError('no host given')
219 raise urllib2.URLError('no host given')
220
220
221 try:
221 try:
222 h = self._cm.get_ready_conn(host)
222 h = self._cm.get_ready_conn(host)
223 while h:
223 while h:
224 r = self._reuse_connection(h, req, host)
224 r = self._reuse_connection(h, req, host)
225
225
226 # if this response is non-None, then it worked and we're
226 # if this response is non-None, then it worked and we're
227 # done. Break out, skipping the else block.
227 # done. Break out, skipping the else block.
228 if r: break
228 if r: break
229
229
230 # connection is bad - possibly closed by server
230 # connection is bad - possibly closed by server
231 # discard it and ask for the next free connection
231 # discard it and ask for the next free connection
232 h.close()
232 h.close()
233 self._cm.remove(h)
233 self._cm.remove(h)
234 h = self._cm.get_ready_conn(host)
234 h = self._cm.get_ready_conn(host)
235 else:
235 else:
236 # no (working) free connections were found. Create a new one.
236 # no (working) free connections were found. Create a new one.
237 h = http_class(host)
237 h = http_class(host)
238 if DEBUG: DEBUG.info("creating new connection to %s (%d)",
238 if DEBUG: DEBUG.info("creating new connection to %s (%d)",
239 host, id(h))
239 host, id(h))
240 self._cm.add(host, h, 0)
240 self._cm.add(host, h, 0)
241 self._start_transaction(h, req)
241 self._start_transaction(h, req)
242 r = h.getresponse()
242 r = h.getresponse()
243 except (socket.error, httplib.HTTPException), err:
243 except (socket.error, httplib.HTTPException), err:
244 raise urllib2.URLError(err)
244 raise urllib2.URLError(err)
245
245
246 # if not a persistent connection, don't try to reuse it
246 # if not a persistent connection, don't try to reuse it
247 if r.will_close: self._cm.remove(h)
247 if r.will_close: self._cm.remove(h)
248
248
249 if DEBUG: DEBUG.info("STATUS: %s, %s", r.status, r.reason)
249 if DEBUG: DEBUG.info("STATUS: %s, %s", r.status, r.reason)
250 r._handler = self
250 r._handler = self
251 r._host = host
251 r._host = host
252 r._url = req.get_full_url()
252 r._url = req.get_full_url()
253 r._connection = h
253 r._connection = h
254 r.code = r.status
254 r.code = r.status
255 r.headers = r.msg
255 r.headers = r.msg
256 r.msg = r.reason
256 r.msg = r.reason
257
257
258 if r.status == 200 or not HANDLE_ERRORS:
258 if r.status == 200 or not HANDLE_ERRORS:
259 return r
259 return r
260 else:
260 else:
261 return self.parent.error('http', req, r,
261 return self.parent.error('http', req, r,
262 r.status, r.msg, r.headers)
262 r.status, r.msg, r.headers)
263
263
264 def _reuse_connection(self, h, req, host):
264 def _reuse_connection(self, h, req, host):
265 """start the transaction with a re-used connection
265 """start the transaction with a re-used connection
266 return a response object (r) upon success or None on failure.
266 return a response object (r) upon success or None on failure.
267 This DOES not close or remove bad connections in cases where
267 This DOES not close or remove bad connections in cases where
268 it returns. However, if an unexpected exception occurs, it
268 it returns. However, if an unexpected exception occurs, it
269 will close and remove the connection before re-raising.
269 will close and remove the connection before re-raising.
270 """
270 """
271 try:
271 try:
272 self._start_transaction(h, req)
272 self._start_transaction(h, req)
273 r = h.getresponse()
273 r = h.getresponse()
274 # note: just because we got something back doesn't mean it
274 # note: just because we got something back doesn't mean it
275 # worked. We'll check the version below, too.
275 # worked. We'll check the version below, too.
276 except (socket.error, httplib.HTTPException):
276 except (socket.error, httplib.HTTPException):
277 r = None
277 r = None
278 except:
278 except:
279 # adding this block just in case we've missed
279 # adding this block just in case we've missed
280 # something we will still raise the exception, but
280 # something we will still raise the exception, but
281 # lets try and close the connection and remove it
281 # lets try and close the connection and remove it
282 # first. We previously got into a nasty loop
282 # first. We previously got into a nasty loop
283 # where an exception was uncaught, and so the
283 # where an exception was uncaught, and so the
284 # connection stayed open. On the next try, the
284 # connection stayed open. On the next try, the
285 # same exception was raised, etc. The tradeoff is
285 # same exception was raised, etc. The tradeoff is
286 # that it's now possible this call will raise
286 # that it's now possible this call will raise
287 # a DIFFERENT exception
287 # a DIFFERENT exception
288 if DEBUG: DEBUG.error("unexpected exception - closing " + \
288 if DEBUG: DEBUG.error("unexpected exception - closing " + \
289 "connection to %s (%d)", host, id(h))
289 "connection to %s (%d)", host, id(h))
290 self._cm.remove(h)
290 self._cm.remove(h)
291 h.close()
291 h.close()
292 raise
292 raise
293
293
294 if r is None or r.version == 9:
294 if r is None or r.version == 9:
295 # httplib falls back to assuming HTTP 0.9 if it gets a
295 # httplib falls back to assuming HTTP 0.9 if it gets a
296 # bad header back. This is most likely to happen if
296 # bad header back. This is most likely to happen if
297 # the socket has been closed by the server since we
297 # the socket has been closed by the server since we
298 # last used the connection.
298 # last used the connection.
299 if DEBUG: DEBUG.info("failed to re-use connection to %s (%d)",
299 if DEBUG: DEBUG.info("failed to re-use connection to %s (%d)",
300 host, id(h))
300 host, id(h))
301 r = None
301 r = None
302 else:
302 else:
303 if DEBUG: DEBUG.info("re-using connection to %s (%d)", host, id(h))
303 if DEBUG: DEBUG.info("re-using connection to %s (%d)", host, id(h))
304
304
305 return r
305 return r
306
306
307 def _start_transaction(self, h, req):
307 def _start_transaction(self, h, req):
308 headers = req.headers.copy()
308 headers = req.headers.copy()
309 body = req.data
309 body = req.data
310 if sys.version_info >= (2, 4):
310 if sys.version_info >= (2, 4):
311 headers.update(req.unredirected_hdrs)
311 headers.update(req.unredirected_hdrs)
312 try:
312 try:
313 h.request(req.get_method(), req.get_selector(), body, headers)
313 h.request(req.get_method(), req.get_selector(), body, headers)
314 except socket.error, err: # XXX what error?
314 except socket.error, err: # XXX what error?
315 raise urllib2.URLError(err)
315 raise urllib2.URLError(err)
316
316
317 class HTTPHandler(KeepAliveHandler, urllib2.HTTPHandler):
318 pass
319
317 class HTTPResponse(httplib.HTTPResponse):
320 class HTTPResponse(httplib.HTTPResponse):
318 # we need to subclass HTTPResponse in order to
321 # we need to subclass HTTPResponse in order to
319 # 1) add readline() and readlines() methods
322 # 1) add readline() and readlines() methods
320 # 2) add close_connection() methods
323 # 2) add close_connection() methods
321 # 3) add info() and geturl() methods
324 # 3) add info() and geturl() methods
322
325
323 # in order to add readline(), read must be modified to deal with a
326 # in order to add readline(), read must be modified to deal with a
324 # buffer. example: readline must read a buffer and then spit back
327 # buffer. example: readline must read a buffer and then spit back
325 # one line at a time. The only real alternative is to read one
328 # one line at a time. The only real alternative is to read one
326 # BYTE at a time (ick). Once something has been read, it can't be
329 # BYTE at a time (ick). Once something has been read, it can't be
327 # put back (ok, maybe it can, but that's even uglier than this),
330 # put back (ok, maybe it can, but that's even uglier than this),
328 # so if you THEN do a normal read, you must first take stuff from
331 # so if you THEN do a normal read, you must first take stuff from
329 # the buffer.
332 # the buffer.
330
333
331 # the read method wraps the original to accomodate buffering,
334 # the read method wraps the original to accomodate buffering,
332 # although read() never adds to the buffer.
335 # although read() never adds to the buffer.
333 # Both readline and readlines have been stolen with almost no
336 # Both readline and readlines have been stolen with almost no
334 # modification from socket.py
337 # modification from socket.py
335
338
336
339
337 def __init__(self, sock, debuglevel=0, strict=0, method=None):
340 def __init__(self, sock, debuglevel=0, strict=0, method=None):
338 if method: # the httplib in python 2.3 uses the method arg
341 if method: # the httplib in python 2.3 uses the method arg
339 httplib.HTTPResponse.__init__(self, sock, debuglevel, method)
342 httplib.HTTPResponse.__init__(self, sock, debuglevel, method)
340 else: # 2.2 doesn't
343 else: # 2.2 doesn't
341 httplib.HTTPResponse.__init__(self, sock, debuglevel)
344 httplib.HTTPResponse.__init__(self, sock, debuglevel)
342 self.fileno = sock.fileno
345 self.fileno = sock.fileno
343 self.code = None
346 self.code = None
344 self._rbuf = ''
347 self._rbuf = ''
345 self._rbufsize = 8096
348 self._rbufsize = 8096
346 self._handler = None # inserted by the handler later
349 self._handler = None # inserted by the handler later
347 self._host = None # (same)
350 self._host = None # (same)
348 self._url = None # (same)
351 self._url = None # (same)
349 self._connection = None # (same)
352 self._connection = None # (same)
350
353
351 _raw_read = httplib.HTTPResponse.read
354 _raw_read = httplib.HTTPResponse.read
352
355
353 def close(self):
356 def close(self):
354 if self.fp:
357 if self.fp:
355 self.fp.close()
358 self.fp.close()
356 self.fp = None
359 self.fp = None
357 if self._handler:
360 if self._handler:
358 self._handler._request_closed(self, self._host,
361 self._handler._request_closed(self, self._host,
359 self._connection)
362 self._connection)
360
363
361 def close_connection(self):
364 def close_connection(self):
362 self._handler._remove_connection(self._host, self._connection, close=1)
365 self._handler._remove_connection(self._host, self._connection, close=1)
363 self.close()
366 self.close()
364
367
365 def info(self):
368 def info(self):
366 return self.headers
369 return self.headers
367
370
368 def geturl(self):
371 def geturl(self):
369 return self._url
372 return self._url
370
373
371 def read(self, amt=None):
374 def read(self, amt=None):
372 # the _rbuf test is only in this first if for speed. It's not
375 # the _rbuf test is only in this first if for speed. It's not
373 # logically necessary
376 # logically necessary
374 if self._rbuf and not amt is None:
377 if self._rbuf and not amt is None:
375 L = len(self._rbuf)
378 L = len(self._rbuf)
376 if amt > L:
379 if amt > L:
377 amt -= L
380 amt -= L
378 else:
381 else:
379 s = self._rbuf[:amt]
382 s = self._rbuf[:amt]
380 self._rbuf = self._rbuf[amt:]
383 self._rbuf = self._rbuf[amt:]
381 return s
384 return s
382
385
383 s = self._rbuf + self._raw_read(amt)
386 s = self._rbuf + self._raw_read(amt)
384 self._rbuf = ''
387 self._rbuf = ''
385 return s
388 return s
386
389
387 def readline(self, limit=-1):
390 def readline(self, limit=-1):
388 data = ""
391 data = ""
389 i = self._rbuf.find('\n')
392 i = self._rbuf.find('\n')
390 while i < 0 and not (0 < limit <= len(self._rbuf)):
393 while i < 0 and not (0 < limit <= len(self._rbuf)):
391 new = self._raw_read(self._rbufsize)
394 new = self._raw_read(self._rbufsize)
392 if not new: break
395 if not new: break
393 i = new.find('\n')
396 i = new.find('\n')
394 if i >= 0: i = i + len(self._rbuf)
397 if i >= 0: i = i + len(self._rbuf)
395 self._rbuf = self._rbuf + new
398 self._rbuf = self._rbuf + new
396 if i < 0: i = len(self._rbuf)
399 if i < 0: i = len(self._rbuf)
397 else: i = i+1
400 else: i = i+1
398 if 0 <= limit < len(self._rbuf): i = limit
401 if 0 <= limit < len(self._rbuf): i = limit
399 data, self._rbuf = self._rbuf[:i], self._rbuf[i:]
402 data, self._rbuf = self._rbuf[:i], self._rbuf[i:]
400 return data
403 return data
401
404
402 def readlines(self, sizehint = 0):
405 def readlines(self, sizehint = 0):
403 total = 0
406 total = 0
404 list = []
407 list = []
405 while 1:
408 while 1:
406 line = self.readline()
409 line = self.readline()
407 if not line: break
410 if not line: break
408 list.append(line)
411 list.append(line)
409 total += len(line)
412 total += len(line)
410 if sizehint and total >= sizehint:
413 if sizehint and total >= sizehint:
411 break
414 break
412 return list
415 return list
413
416
414
417
415 class HTTPConnection(httplib.HTTPConnection):
418 class HTTPConnection(httplib.HTTPConnection):
416 # use the modified response class
419 # use the modified response class
417 response_class = HTTPResponse
420 response_class = HTTPResponse
418
421
419 #########################################################################
422 #########################################################################
420 ##### TEST FUNCTIONS
423 ##### TEST FUNCTIONS
421 #########################################################################
424 #########################################################################
422
425
423 def error_handler(url):
426 def error_handler(url):
424 global HANDLE_ERRORS
427 global HANDLE_ERRORS
425 orig = HANDLE_ERRORS
428 orig = HANDLE_ERRORS
426 keepalive_handler = HTTPHandler()
429 keepalive_handler = HTTPHandler()
427 opener = urllib2.build_opener(keepalive_handler)
430 opener = urllib2.build_opener(keepalive_handler)
428 urllib2.install_opener(opener)
431 urllib2.install_opener(opener)
429 pos = {0: 'off', 1: 'on'}
432 pos = {0: 'off', 1: 'on'}
430 for i in (0, 1):
433 for i in (0, 1):
431 print " fancy error handling %s (HANDLE_ERRORS = %i)" % (pos[i], i)
434 print " fancy error handling %s (HANDLE_ERRORS = %i)" % (pos[i], i)
432 HANDLE_ERRORS = i
435 HANDLE_ERRORS = i
433 try:
436 try:
434 fo = urllib2.urlopen(url)
437 fo = urllib2.urlopen(url)
435 foo = fo.read()
438 foo = fo.read()
436 fo.close()
439 fo.close()
437 try: status, reason = fo.status, fo.reason
440 try: status, reason = fo.status, fo.reason
438 except AttributeError: status, reason = None, None
441 except AttributeError: status, reason = None, None
439 except IOError, e:
442 except IOError, e:
440 print " EXCEPTION: %s" % e
443 print " EXCEPTION: %s" % e
441 raise
444 raise
442 else:
445 else:
443 print " status = %s, reason = %s" % (status, reason)
446 print " status = %s, reason = %s" % (status, reason)
444 HANDLE_ERRORS = orig
447 HANDLE_ERRORS = orig
445 hosts = keepalive_handler.open_connections()
448 hosts = keepalive_handler.open_connections()
446 print "open connections:", hosts
449 print "open connections:", hosts
447 keepalive_handler.close_all()
450 keepalive_handler.close_all()
448
451
449 def continuity(url):
452 def continuity(url):
450 import md5
453 import md5
451 format = '%25s: %s'
454 format = '%25s: %s'
452
455
453 # first fetch the file with the normal http handler
456 # first fetch the file with the normal http handler
454 opener = urllib2.build_opener()
457 opener = urllib2.build_opener()
455 urllib2.install_opener(opener)
458 urllib2.install_opener(opener)
456 fo = urllib2.urlopen(url)
459 fo = urllib2.urlopen(url)
457 foo = fo.read()
460 foo = fo.read()
458 fo.close()
461 fo.close()
459 m = md5.new(foo)
462 m = md5.new(foo)
460 print format % ('normal urllib', m.hexdigest())
463 print format % ('normal urllib', m.hexdigest())
461
464
462 # now install the keepalive handler and try again
465 # now install the keepalive handler and try again
463 opener = urllib2.build_opener(HTTPHandler())
466 opener = urllib2.build_opener(HTTPHandler())
464 urllib2.install_opener(opener)
467 urllib2.install_opener(opener)
465
468
466 fo = urllib2.urlopen(url)
469 fo = urllib2.urlopen(url)
467 foo = fo.read()
470 foo = fo.read()
468 fo.close()
471 fo.close()
469 m = md5.new(foo)
472 m = md5.new(foo)
470 print format % ('keepalive read', m.hexdigest())
473 print format % ('keepalive read', m.hexdigest())
471
474
472 fo = urllib2.urlopen(url)
475 fo = urllib2.urlopen(url)
473 foo = ''
476 foo = ''
474 while 1:
477 while 1:
475 f = fo.readline()
478 f = fo.readline()
476 if f: foo = foo + f
479 if f: foo = foo + f
477 else: break
480 else: break
478 fo.close()
481 fo.close()
479 m = md5.new(foo)
482 m = md5.new(foo)
480 print format % ('keepalive readline', m.hexdigest())
483 print format % ('keepalive readline', m.hexdigest())
481
484
482 def comp(N, url):
485 def comp(N, url):
483 print ' making %i connections to:\n %s' % (N, url)
486 print ' making %i connections to:\n %s' % (N, url)
484
487
485 sys.stdout.write(' first using the normal urllib handlers')
488 sys.stdout.write(' first using the normal urllib handlers')
486 # first use normal opener
489 # first use normal opener
487 opener = urllib2.build_opener()
490 opener = urllib2.build_opener()
488 urllib2.install_opener(opener)
491 urllib2.install_opener(opener)
489 t1 = fetch(N, url)
492 t1 = fetch(N, url)
490 print ' TIME: %.3f s' % t1
493 print ' TIME: %.3f s' % t1
491
494
492 sys.stdout.write(' now using the keepalive handler ')
495 sys.stdout.write(' now using the keepalive handler ')
493 # now install the keepalive handler and try again
496 # now install the keepalive handler and try again
494 opener = urllib2.build_opener(HTTPHandler())
497 opener = urllib2.build_opener(HTTPHandler())
495 urllib2.install_opener(opener)
498 urllib2.install_opener(opener)
496 t2 = fetch(N, url)
499 t2 = fetch(N, url)
497 print ' TIME: %.3f s' % t2
500 print ' TIME: %.3f s' % t2
498 print ' improvement factor: %.2f' % (t1/t2, )
501 print ' improvement factor: %.2f' % (t1/t2, )
499
502
500 def fetch(N, url, delay=0):
503 def fetch(N, url, delay=0):
501 import time
504 import time
502 lens = []
505 lens = []
503 starttime = time.time()
506 starttime = time.time()
504 for i in range(N):
507 for i in range(N):
505 if delay and i > 0: time.sleep(delay)
508 if delay and i > 0: time.sleep(delay)
506 fo = urllib2.urlopen(url)
509 fo = urllib2.urlopen(url)
507 foo = fo.read()
510 foo = fo.read()
508 fo.close()
511 fo.close()
509 lens.append(len(foo))
512 lens.append(len(foo))
510 diff = time.time() - starttime
513 diff = time.time() - starttime
511
514
512 j = 0
515 j = 0
513 for i in lens[1:]:
516 for i in lens[1:]:
514 j = j + 1
517 j = j + 1
515 if not i == lens[0]:
518 if not i == lens[0]:
516 print "WARNING: inconsistent length on read %i: %i" % (j, i)
519 print "WARNING: inconsistent length on read %i: %i" % (j, i)
517
520
518 return diff
521 return diff
519
522
520 def test_timeout(url):
523 def test_timeout(url):
521 global DEBUG
524 global DEBUG
522 dbbackup = DEBUG
525 dbbackup = DEBUG
523 class FakeLogger:
526 class FakeLogger:
524 def debug(self, msg, *args): print msg % args
527 def debug(self, msg, *args): print msg % args
525 info = warning = error = debug
528 info = warning = error = debug
526 DEBUG = FakeLogger()
529 DEBUG = FakeLogger()
527 print " fetching the file to establish a connection"
530 print " fetching the file to establish a connection"
528 fo = urllib2.urlopen(url)
531 fo = urllib2.urlopen(url)
529 data1 = fo.read()
532 data1 = fo.read()
530 fo.close()
533 fo.close()
531
534
532 i = 20
535 i = 20
533 print " waiting %i seconds for the server to close the connection" % i
536 print " waiting %i seconds for the server to close the connection" % i
534 while i > 0:
537 while i > 0:
535 sys.stdout.write('\r %2i' % i)
538 sys.stdout.write('\r %2i' % i)
536 sys.stdout.flush()
539 sys.stdout.flush()
537 time.sleep(1)
540 time.sleep(1)
538 i -= 1
541 i -= 1
539 sys.stderr.write('\r')
542 sys.stderr.write('\r')
540
543
541 print " fetching the file a second time"
544 print " fetching the file a second time"
542 fo = urllib2.urlopen(url)
545 fo = urllib2.urlopen(url)
543 data2 = fo.read()
546 data2 = fo.read()
544 fo.close()
547 fo.close()
545
548
546 if data1 == data2:
549 if data1 == data2:
547 print ' data are identical'
550 print ' data are identical'
548 else:
551 else:
549 print ' ERROR: DATA DIFFER'
552 print ' ERROR: DATA DIFFER'
550
553
551 DEBUG = dbbackup
554 DEBUG = dbbackup
552
555
553
556
554 def test(url, N=10):
557 def test(url, N=10):
555 print "checking error hander (do this on a non-200)"
558 print "checking error hander (do this on a non-200)"
556 try: error_handler(url)
559 try: error_handler(url)
557 except IOError, e:
560 except IOError, e:
558 print "exiting - exception will prevent further tests"
561 print "exiting - exception will prevent further tests"
559 sys.exit()
562 sys.exit()
560 print
563 print
561 print "performing continuity test (making sure stuff isn't corrupted)"
564 print "performing continuity test (making sure stuff isn't corrupted)"
562 continuity(url)
565 continuity(url)
563 print
566 print
564 print "performing speed comparison"
567 print "performing speed comparison"
565 comp(N, url)
568 comp(N, url)
566 print
569 print
567 print "performing dropped-connection check"
570 print "performing dropped-connection check"
568 test_timeout(url)
571 test_timeout(url)
569
572
570 if __name__ == '__main__':
573 if __name__ == '__main__':
571 import time
574 import time
572 import sys
575 import sys
573 try:
576 try:
574 N = int(sys.argv[1])
577 N = int(sys.argv[1])
575 url = sys.argv[2]
578 url = sys.argv[2]
576 except:
579 except:
577 print "%s <integer> <url>" % sys.argv[0]
580 print "%s <integer> <url>" % sys.argv[0]
578 else:
581 else:
579 test(url, N)
582 test(url, N)
General Comments 0
You need to be logged in to leave comments. Login now