upstream/mercurial-mirror Commit - r2435:ff2bac73

http client: support persistent connections....

Vadim Gelfer -

r2435:ff2bac73 default

parent child

mercurial/keepalive.py

0 created 644 +587 0

This diff has been collapsed as it changes many lines, (587 lines changed) Show them Hide them
		@@ -0,0 +1,587 b''
	1	# This library is free software; you can redistribute it and/or
	2	# modify it under the terms of the GNU Lesser General Public
	3	# License as published by the Free Software Foundation; either
	4	# version 2.1 of the License, or (at your option) any later version.
	5	#
	6	# This library is distributed in the hope that it will be useful,
	7	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	8	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	9	# Lesser General Public License for more details.
	10	#
	11	# You should have received a copy of the GNU Lesser General Public
	12	# License along with this library; if not, write to the
	13	# Free Software Foundation, Inc.,
	14	# 59 Temple Place, Suite 330,
	15	# Boston, MA 02111-1307 USA
	16
	17	# This file is part of urlgrabber, a high-level cross-protocol url-grabber
	18	# Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
	19
	20	"""An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive.
	21
	22	>>> import urllib2
	23	>>> from keepalive import HTTPHandler
	24	>>> keepalive_handler = HTTPHandler()
	25	>>> opener = urllib2.build_opener(keepalive_handler)
	26	>>> urllib2.install_opener(opener)
	27	>>>
	28	>>> fo = urllib2.urlopen('http://www.python.org')
	29
	30	If a connection to a given host is requested, and all of the existing
	31	connections are still in use, another connection will be opened. If
	32	the handler tries to use an existing connection but it fails in some
	33	way, it will be closed and removed from the pool.
	34
	35	To remove the handler, simply re-run build_opener with no arguments, and
	36	install that opener.
	37
	38	You can explicitly close connections by using the close_connection()
	39	method of the returned file-like object (described below) or you can
	40	use the handler methods:
	41
	42	close_connection(host)
	43	close_all()
	44	open_connections()
	45
	46	NOTE: using the close_connection and close_all methods of the handler
	47	should be done with care when using multiple threads.
	48	* there is nothing that prevents another thread from creating new
	49	connections immediately after connections are closed
	50	* no checks are done to prevent in-use connections from being closed
	51
	52	>>> keepalive_handler.close_all()
	53
	54	EXTRA ATTRIBUTES AND METHODS
	55
	56	Upon a status of 200, the object returned has a few additional
	57	attributes and methods, which should not be used if you want to
	58	remain consistent with the normal urllib2-returned objects:
	59
	60	close_connection() - close the connection to the host
	61	readlines() - you know, readlines()
	62	status - the return status (ie 404)
	63	reason - english translation of status (ie 'File not found')
	64
	65	If you want the best of both worlds, use this inside an
	66	AttributeError-catching try:
	67
	68	>>> try: status = fo.status
	69	>>> except AttributeError: status = None
	70
	71	Unfortunately, these are ONLY there if status == 200, so it's not
	72	easy to distinguish between non-200 responses. The reason is that
	73	urllib2 tries to do clever things with error codes 301, 302, 401,
	74	and 407, and it wraps the object upon return.
	75
	76	For python versions earlier than 2.4, you can avoid this fancy error
	77	handling by setting the module-level global HANDLE_ERRORS to zero.
	78	You see, prior to 2.4, it's the HTTP Handler's job to determine what
	79	to handle specially, and what to just pass up. HANDLE_ERRORS == 0
	80	means "pass everything up". In python 2.4, however, this job no
	81	longer belongs to the HTTP Handler and is now done by a NEW handler,
	82	HTTPErrorProcessor. Here's the bottom line:
	83
	84	python version < 2.4
	85	HANDLE_ERRORS == 1 (default) pass up 200, treat the rest as
	86	errors
	87	HANDLE_ERRORS == 0 pass everything up, error processing is
	88	left to the calling code
	89	python version >= 2.4
	90	HANDLE_ERRORS == 1 pass up 200, treat the rest as errors
	91	HANDLE_ERRORS == 0 (default) pass everything up, let the
	92	other handlers (specifically,
	93	HTTPErrorProcessor) decide what to do
	94
	95	In practice, setting the variable either way makes little difference
	96	in python 2.4, so for the most consistent behavior across versions,
	97	you probably just want to use the defaults, which will give you
	98	exceptions on errors.
	99
	100	"""
	101
	102	# $Id: keepalive.py,v 1.13 2005/10/22 21:57:28 mstenner Exp $
	103
	104	import urllib2
	105	import httplib
	106	import socket
	107	import thread
	108
	109	DEBUG = None
	110
	111	import sys
	112	if sys.version_info < (2, 4): HANDLE_ERRORS = 1
	113	else: HANDLE_ERRORS = 0
	114
	115	class ConnectionManager:
	116	"""
	117	The connection manager must be able to:
	118	* keep track of all existing
	119	"""
	120	def __init__(self):
	121	self._lock = thread.allocate_lock()
	122	self._hostmap = {} # map hosts to a list of connections
	123	self._connmap = {} # map connections to host
	124	self._readymap = {} # map connection to ready state
	125
	126	def add(self, host, connection, ready):
	127	self._lock.acquire()
	128	try:
	129	if not self._hostmap.has_key(host): self._hostmap[host] = []
	130	self._hostmap[host].append(connection)
	131	self._connmap[connection] = host
	132	self._readymap[connection] = ready
	133	finally:
	134	self._lock.release()
	135
	136	def remove(self, connection):
	137	self._lock.acquire()
	138	try:
	139	try:
	140	host = self._connmap[connection]
	141	except KeyError:
	142	pass
	143	else:
	144	del self._connmap[connection]
	145	del self._readymap[connection]
	146	self._hostmap[host].remove(connection)
	147	if not self._hostmap[host]: del self._hostmap[host]
	148	finally:
	149	self._lock.release()
	150
	151	def set_ready(self, connection, ready):
	152	try: self._readymap[connection] = ready
	153	except KeyError: pass
	154
	155	def get_ready_conn(self, host):
	156	conn = None
	157	self._lock.acquire()
	158	try:
	159	if self._hostmap.has_key(host):
	160	for c in self._hostmap[host]:
	161	if self._readymap[c]:
	162	self._readymap[c] = 0
	163	conn = c
	164	break
	165	finally:
	166	self._lock.release()
	167	return conn
	168
	169	def get_all(self, host=None):
	170	if host:
	171	return list(self._hostmap.get(host, []))
	172	else:
	173	return dict(self._hostmap)
	174
	175	class HTTPHandler(urllib2.HTTPHandler):
	176	def __init__(self):
	177	self._cm = ConnectionManager()
	178
	179	#### Connection Management
	180	def open_connections(self):
	181	"""return a list of connected hosts and the number of connections
	182	to each. [('foo.com:80', 2), ('bar.org', 1)]"""
	183	return [(host, len(li)) for (host, li) in self._cm.get_all().items()]
	184
	185	def close_connection(self, host):
	186	"""close connection(s) to <host>
	187	host is the host:port spec, as in 'www.cnn.com:8080' as passed in.
	188	no error occurs if there is no connection to that host."""
	189	for h in self._cm.get_all(host):
	190	self._cm.remove(h)
	191	h.close()
	192
	193	def close_all(self):
	194	"""close all open connections"""
	195	for host, conns in self._cm.get_all().items():
	196	for h in conns:
	197	self._cm.remove(h)
	198	h.close()
	199
	200	def _request_closed(self, request, host, connection):
	201	"""tells us that this request is now closed and the the
	202	connection is ready for another request"""
	203	self._cm.set_ready(connection, 1)
	204
	205	def _remove_connection(self, host, connection, close=0):
	206	if close: connection.close()
	207	self._cm.remove(connection)
	208
	209	#### Transaction Execution
	210	def http_open(self, req):
	211	return self.do_open(HTTPConnection, req)
	212
	213	def do_open(self, http_class, req):
	214	host = req.get_host()
	215	if not host:
	216	raise urllib2.URLError('no host given')
	217
	218	try:
	219	h = self._cm.get_ready_conn(host)
	220	while h:
	221	r = self._reuse_connection(h, req, host)
	222
	223	# if this response is non-None, then it worked and we're
	224	# done. Break out, skipping the else block.
	225	if r: break
	226
	227	# connection is bad - possibly closed by server
	228	# discard it and ask for the next free connection
	229	h.close()
	230	self._cm.remove(h)
	231	h = self._cm.get_ready_conn(host)
	232	else:
	233	# no (working) free connections were found. Create a new one.
	234	h = http_class(host)
	235	if DEBUG: DEBUG.info("creating new connection to %s (%d)",
	236	host, id(h))
	237	self._cm.add(host, h, 0)
	238	self._start_transaction(h, req)
	239	r = h.getresponse()
	240	except (socket.error, httplib.HTTPException), err:
	241	raise urllib2.URLError(err)
	242
	243	# if not a persistent connection, don't try to reuse it
	244	if r.will_close: self._cm.remove(h)
	245
	246	if DEBUG: DEBUG.info("STATUS: %s, %s", r.status, r.reason)
	247	r._handler = self
	248	r._host = host
	249	r._url = req.get_full_url()
	250	r._connection = h
	251	r.code = r.status
	252
	253	if r.status == 200 or not HANDLE_ERRORS:
	254	return r
	255	else:
	256	return self.parent.error('http', req, r, r.status, r.reason, r.msg)
	257
	258
	259	def _reuse_connection(self, h, req, host):
	260	"""start the transaction with a re-used connection
	261	return a response object (r) upon success or None on failure.
	262	This DOES not close or remove bad connections in cases where
	263	it returns. However, if an unexpected exception occurs, it
	264	will close and remove the connection before re-raising.
	265	"""
	266	try:
	267	self._start_transaction(h, req)
	268	r = h.getresponse()
	269	# note: just because we got something back doesn't mean it
	270	# worked. We'll check the version below, too.
	271	except (socket.error, httplib.HTTPException):
	272	r = None
	273	except:
	274	# adding this block just in case we've missed
	275	# something we will still raise the exception, but
	276	# lets try and close the connection and remove it
	277	# first. We previously got into a nasty loop
	278	# where an exception was uncaught, and so the
	279	# connection stayed open. On the next try, the
	280	# same exception was raised, etc. The tradeoff is
	281	# that it's now possible this call will raise
	282	# a DIFFERENT exception
	283	if DEBUG: DEBUG.error("unexpected exception - closing " + \
	284	"connection to %s (%d)", host, id(h))
	285	self._cm.remove(h)
	286	h.close()
	287	raise
	288
	289	if r is None or r.version == 9:
	290	# httplib falls back to assuming HTTP 0.9 if it gets a
	291	# bad header back. This is most likely to happen if
	292	# the socket has been closed by the server since we
	293	# last used the connection.
	294	if DEBUG: DEBUG.info("failed to re-use connection to %s (%d)",
	295	host, id(h))
	296	r = None
	297	else:
	298	if DEBUG: DEBUG.info("re-using connection to %s (%d)", host, id(h))
	299
	300	return r
	301
	302	def _start_transaction(self, h, req):
	303	try:
	304	if req.has_data():
	305	data = req.get_data()
	306	h.putrequest('POST', req.get_selector())
	307	if not req.headers.has_key('Content-type'):
	308	h.putheader('Content-type',
	309	'application/x-www-form-urlencoded')
	310	if not req.headers.has_key('Content-length'):
	311	h.putheader('Content-length', '%d' % len(data))
	312	else:
	313	h.putrequest('GET', req.get_selector())
	314	except (socket.error, httplib.HTTPException), err:
	315	raise urllib2.URLError(err)
	316
	317	for args in self.parent.addheaders:
	318	h.putheader(*args)
	319	for k, v in req.headers.items():
	320	h.putheader(k, v)
	321	h.endheaders()
	322	if req.has_data():
	323	h.send(data)
	324
	325	class HTTPResponse(httplib.HTTPResponse):
	326	# we need to subclass HTTPResponse in order to
	327	# 1) add readline() and readlines() methods
	328	# 2) add close_connection() methods
	329	# 3) add info() and geturl() methods
	330
	331	# in order to add readline(), read must be modified to deal with a
	332	# buffer. example: readline must read a buffer and then spit back
	333	# one line at a time. The only real alternative is to read one
	334	# BYTE at a time (ick). Once something has been read, it can't be
	335	# put back (ok, maybe it can, but that's even uglier than this),
	336	# so if you THEN do a normal read, you must first take stuff from
	337	# the buffer.
	338
	339	# the read method wraps the original to accomodate buffering,
	340	# although read() never adds to the buffer.
	341	# Both readline and readlines have been stolen with almost no
	342	# modification from socket.py
	343
	344
	345	def __init__(self, sock, debuglevel=0, strict=0, method=None):
	346	if method: # the httplib in python 2.3 uses the method arg
	347	httplib.HTTPResponse.__init__(self, sock, debuglevel, method)
	348	else: # 2.2 doesn't
	349	httplib.HTTPResponse.__init__(self, sock, debuglevel)
	350	self.fileno = sock.fileno
	351	self.code = None
	352	self._rbuf = ''
	353	self._rbufsize = 8096
	354	self._handler = None # inserted by the handler later
	355	self._host = None # (same)
	356	self._url = None # (same)
	357	self._connection = None # (same)
	358
	359	_raw_read = httplib.HTTPResponse.read
	360
	361	def close(self):
	362	if self.fp:
	363	self.fp.close()
	364	self.fp = None
	365	if self._handler:
	366	self._handler._request_closed(self, self._host,
	367	self._connection)
	368
	369	def close_connection(self):
	370	self._handler._remove_connection(self._host, self._connection, close=1)
	371	self.close()
	372
	373	def info(self):
	374	return self.msg
	375
	376	def geturl(self):
	377	return self._url
	378
	379	def read(self, amt=None):
	380	# the _rbuf test is only in this first if for speed. It's not
	381	# logically necessary
	382	if self._rbuf and not amt is None:
	383	L = len(self._rbuf)
	384	if amt > L:
	385	amt -= L
	386	else:
	387	s = self._rbuf[:amt]
	388	self._rbuf = self._rbuf[amt:]
	389	return s
	390
	391	s = self._rbuf + self._raw_read(amt)
	392	self._rbuf = ''
	393	return s
	394
	395	def readline(self, limit=-1):
	396	data = ""
	397	i = self._rbuf.find('\n')
	398	while i < 0 and not (0 < limit <= len(self._rbuf)):
	399	new = self._raw_read(self._rbufsize)
	400	if not new: break
	401	i = new.find('\n')
	402	if i >= 0: i = i + len(self._rbuf)
	403	self._rbuf = self._rbuf + new
	404	if i < 0: i = len(self._rbuf)
	405	else: i = i+1
	406	if 0 <= limit < len(self._rbuf): i = limit
	407	data, self._rbuf = self._rbuf[:i], self._rbuf[i:]
	408	return data
	409
	410	def readlines(self, sizehint = 0):
	411	total = 0
	412	list = []
	413	while 1:
	414	line = self.readline()
	415	if not line: break
	416	list.append(line)
	417	total += len(line)
	418	if sizehint and total >= sizehint:
	419	break
	420	return list
	421
	422
	423	class HTTPConnection(httplib.HTTPConnection):
	424	# use the modified response class
	425	response_class = HTTPResponse
	426
	427	#########################################################################
	428	##### TEST FUNCTIONS
	429	#########################################################################
	430
	431	def error_handler(url):
	432	global HANDLE_ERRORS
	433	orig = HANDLE_ERRORS
	434	keepalive_handler = HTTPHandler()
	435	opener = urllib2.build_opener(keepalive_handler)
	436	urllib2.install_opener(opener)
	437	pos = {0: 'off', 1: 'on'}
	438	for i in (0, 1):
	439	print " fancy error handling %s (HANDLE_ERRORS = %i)" % (pos[i], i)
	440	HANDLE_ERRORS = i
	441	try:
	442	fo = urllib2.urlopen(url)
	443	foo = fo.read()
	444	fo.close()
	445	try: status, reason = fo.status, fo.reason
	446	except AttributeError: status, reason = None, None
	447	except IOError, e:
	448	print " EXCEPTION: %s" % e
	449	raise
	450	else:
	451	print " status = %s, reason = %s" % (status, reason)
	452	HANDLE_ERRORS = orig
	453	hosts = keepalive_handler.open_connections()
	454	print "open connections:", hosts
	455	keepalive_handler.close_all()
	456
	457	def continuity(url):
	458	import md5
	459	format = '%25s: %s'
	460
	461	# first fetch the file with the normal http handler
	462	opener = urllib2.build_opener()
	463	urllib2.install_opener(opener)
	464	fo = urllib2.urlopen(url)
	465	foo = fo.read()
	466	fo.close()
	467	m = md5.new(foo)
	468	print format % ('normal urllib', m.hexdigest())
	469
	470	# now install the keepalive handler and try again
	471	opener = urllib2.build_opener(HTTPHandler())
	472	urllib2.install_opener(opener)
	473
	474	fo = urllib2.urlopen(url)
	475	foo = fo.read()
	476	fo.close()
	477	m = md5.new(foo)
	478	print format % ('keepalive read', m.hexdigest())
	479
	480	fo = urllib2.urlopen(url)
	481	foo = ''
	482	while 1:
	483	f = fo.readline()
	484	if f: foo = foo + f
	485	else: break
	486	fo.close()
	487	m = md5.new(foo)
	488	print format % ('keepalive readline', m.hexdigest())
	489
	490	def comp(N, url):
	491	print ' making %i connections to:\n %s' % (N, url)
	492
	493	sys.stdout.write(' first using the normal urllib handlers')
	494	# first use normal opener
	495	opener = urllib2.build_opener()
	496	urllib2.install_opener(opener)
	497	t1 = fetch(N, url)
	498	print ' TIME: %.3f s' % t1
	499
	500	sys.stdout.write(' now using the keepalive handler ')
	501	# now install the keepalive handler and try again
	502	opener = urllib2.build_opener(HTTPHandler())
	503	urllib2.install_opener(opener)
	504	t2 = fetch(N, url)
	505	print ' TIME: %.3f s' % t2
	506	print ' improvement factor: %.2f' % (t1/t2, )
	507
	508	def fetch(N, url, delay=0):
	509	import time
	510	lens = []
	511	starttime = time.time()
	512	for i in range(N):
	513	if delay and i > 0: time.sleep(delay)
	514	fo = urllib2.urlopen(url)
	515	foo = fo.read()
	516	fo.close()
	517	lens.append(len(foo))
	518	diff = time.time() - starttime
	519
	520	j = 0
	521	for i in lens[1:]:
	522	j = j + 1
	523	if not i == lens[0]:
	524	print "WARNING: inconsistent length on read %i: %i" % (j, i)
	525
	526	return diff
	527
	528	def test_timeout(url):
	529	global DEBUG
	530	dbbackup = DEBUG
	531	class FakeLogger:
	532	def debug(self, msg, *args): print msg % args
	533	info = warning = error = debug
	534	DEBUG = FakeLogger()
	535	print " fetching the file to establish a connection"
	536	fo = urllib2.urlopen(url)
	537	data1 = fo.read()
	538	fo.close()
	539
	540	i = 20
	541	print " waiting %i seconds for the server to close the connection" % i
	542	while i > 0:
	543	sys.stdout.write('\r %2i' % i)
	544	sys.stdout.flush()
	545	time.sleep(1)
	546	i -= 1
	547	sys.stderr.write('\r')
	548
	549	print " fetching the file a second time"
	550	fo = urllib2.urlopen(url)
	551	data2 = fo.read()
	552	fo.close()
	553
	554	if data1 == data2:
	555	print ' data are identical'
	556	else:
	557	print ' ERROR: DATA DIFFER'
	558
	559	DEBUG = dbbackup
	560
	561
	562	def test(url, N=10):
	563	print "checking error hander (do this on a non-200)"
	564	try: error_handler(url)
	565	except IOError, e:
	566	print "exiting - exception will prevent further tests"
	567	sys.exit()
	568	print
	569	print "performing continuity test (making sure stuff isn't corrupted)"
	570	continuity(url)
	571	print
	572	print "performing speed comparison"
	573	comp(N, url)
	574	print
	575	print "performing dropped-connection check"
	576	test_timeout(url)
	577
	578	if __name__ == '__main__':
	579	import time
	580	import sys
	581	try:
	582	N = int(sys.argv[1])
	583	url = sys.argv[2]
	584	except:
	585	print "%s <integer> <url>" % sys.argv[0]
	586	else:
	587	test(url, N)

mercurial/httprepo.py

0 +16 -4

              from i18n import gettext as _
              from demandload import *
              demandload(globals(), "hg os urllib urllib2 urlparse zlib util httplib")
+             demandload(globals(), "keepalive")
              class passwordmgr(urllib2.HTTPPasswordMgr):
                  def __init__(self, ui):
                      proxyurl = ui.config("http_proxy", "host") or os.getenv('http_proxy')
                      proxyauthinfo = None
-                     handler = urllib2.BaseHandler()
+                     handler = keepalive.HTTPHandler()
                      if proxyurl:
                          # proxy can be proper url or host[:port]
                          self.ui.debug(_('http error while sending %s command\n') % cmd)
                          self.ui.print_exc()
                          raise IOError(None, inst)
+                     try:
+                         proto = resp.getheader('content-type')
+                     except AttributeError:
                      proto = resp.headers['content-type']
                      # accept old "text/plain" and "application/hg-changegroup" for now
                      return resp
+                 def do_read(self, cmd, **args):
+                     fp = self.do_cmd(cmd, **args)
+                     try:
+                         return fp.read()
+                     finally:
+                         # if using keepalive, allow connection to be reused
+                         fp.close()
                  def heads(self):
-                     d = self.do_cmd("heads").read()
+                     d = self.do_read("heads")
                      try:
                          return map(bin, d[:-1].split(" "))
                      except:
                  def branches(self, nodes):
                      n = " ".join(map(hex, nodes))
-                     d = self.do_cmd("branches", nodes=n).read()
+                     d = self.do_read("branches", nodes=n)
                      try:
                          br = [ tuple(map(bin, b.split(" "))) for b in d.splitlines() ]
                          return br
                  def between(self, pairs):
                      n = "\n".join(["-".join(map(hex, p)) for p in pairs])
-                     d = self.do_cmd("between", pairs=n).read()
+                     d = self.do_read("between", pairs=n)
                      try:
                          p = [ l and map(bin, l.split(" ")) or [] for l in d.splitlines() ]
                          return p

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages