##// END OF EJS Templates
httprepo: handle large lengths by bypassing the len() operator
Matt Mackall -
r14430:c864f5e7 default
parent child Browse files
Show More
@@ -1,268 +1,268 b''
1 1 # httpconnection.py - urllib2 handler for new http support
2 2 #
3 3 # Copyright 2005, 2006, 2007, 2008 Matt Mackall <mpm@selenic.com>
4 4 # Copyright 2006, 2007 Alexis S. L. Carvalho <alexis@cecm.usp.br>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 # Copyright 2011 Google, Inc.
7 7 #
8 8 # This software may be used and distributed according to the terms of the
9 9 # GNU General Public License version 2 or any later version.
10 10 import logging
11 11 import socket
12 12 import urllib
13 13 import urllib2
14 14 import os
15 15
16 16 from mercurial import httpclient
17 17 from mercurial import sslutil
18 18 from mercurial import util
19 19 from mercurial.i18n import _
20 20
21 21 # moved here from url.py to avoid a cycle
22 22 class httpsendfile(object):
23 23 """This is a wrapper around the objects returned by python's "open".
24 24
25 25 Its purpose is to send file-like objects via HTTP and, to do so, it
26 26 defines a __len__ attribute to feed the Content-Length header.
27 27 """
28 28
29 29 def __init__(self, ui, *args, **kwargs):
30 30 # We can't just "self._data = open(*args, **kwargs)" here because there
31 31 # is an "open" function defined in this module that shadows the global
32 32 # one
33 33 self.ui = ui
34 34 self._data = open(*args, **kwargs)
35 35 self.seek = self._data.seek
36 36 self.close = self._data.close
37 37 self.write = self._data.write
38 38 self._len = os.fstat(self._data.fileno()).st_size
39 39 self._pos = 0
40 self._total = len(self) / 1024 * 2
40 self._total = self._len / 1024 * 2
41 41
42 42 def read(self, *args, **kwargs):
43 43 try:
44 44 ret = self._data.read(*args, **kwargs)
45 45 except EOFError:
46 46 self.ui.progress(_('sending'), None)
47 47 self._pos += len(ret)
48 48 # We pass double the max for total because we currently have
49 49 # to send the bundle twice in the case of a server that
50 50 # requires authentication. Since we can't know until we try
51 51 # once whether authentication will be required, just lie to
52 52 # the user and maybe the push succeeds suddenly at 50%.
53 53 self.ui.progress(_('sending'), self._pos / 1024,
54 54 unit=_('kb'), total=self._total)
55 55 return ret
56 56
57 57 def __len__(self):
58 58 return self._len
59 59
60 60 # moved here from url.py to avoid a cycle
61 61 def readauthforuri(ui, uri):
62 62 # Read configuration
63 63 config = dict()
64 64 for key, val in ui.configitems('auth'):
65 65 if '.' not in key:
66 66 ui.warn(_("ignoring invalid [auth] key '%s'\n") % key)
67 67 continue
68 68 group, setting = key.rsplit('.', 1)
69 69 gdict = config.setdefault(group, dict())
70 70 if setting in ('username', 'cert', 'key'):
71 71 val = util.expandpath(val)
72 72 gdict[setting] = val
73 73
74 74 # Find the best match
75 75 scheme, hostpath = uri.split('://', 1)
76 76 bestlen = 0
77 77 bestauth = None
78 78 for group, auth in config.iteritems():
79 79 prefix = auth.get('prefix')
80 80 if not prefix:
81 81 continue
82 82 p = prefix.split('://', 1)
83 83 if len(p) > 1:
84 84 schemes, prefix = [p[0]], p[1]
85 85 else:
86 86 schemes = (auth.get('schemes') or 'https').split()
87 87 if (prefix == '*' or hostpath.startswith(prefix)) and \
88 88 len(prefix) > bestlen and scheme in schemes:
89 89 bestlen = len(prefix)
90 90 bestauth = group, auth
91 91 return bestauth
92 92
93 93 # Mercurial (at least until we can remove the old codepath) requires
94 94 # that the http response object be sufficiently file-like, so we
95 95 # provide a close() method here.
96 96 class HTTPResponse(httpclient.HTTPResponse):
97 97 def close(self):
98 98 pass
99 99
100 100 class HTTPConnection(httpclient.HTTPConnection):
101 101 response_class = HTTPResponse
102 102 def request(self, method, uri, body=None, headers={}):
103 103 if isinstance(body, httpsendfile):
104 104 body.seek(0)
105 105 httpclient.HTTPConnection.request(self, method, uri, body=body,
106 106 headers=headers)
107 107
108 108
109 109 _configuredlogging = False
110 110 LOGFMT = '%(levelname)s:%(name)s:%(lineno)d:%(message)s'
111 111 # Subclass BOTH of these because otherwise urllib2 "helpfully"
112 112 # reinserts them since it notices we don't include any subclasses of
113 113 # them.
114 114 class http2handler(urllib2.HTTPHandler, urllib2.HTTPSHandler):
115 115 def __init__(self, ui, pwmgr):
116 116 global _configuredlogging
117 117 urllib2.AbstractHTTPHandler.__init__(self)
118 118 self.ui = ui
119 119 self.pwmgr = pwmgr
120 120 self._connections = {}
121 121 loglevel = ui.config('ui', 'http2debuglevel', default=None)
122 122 if loglevel and not _configuredlogging:
123 123 _configuredlogging = True
124 124 logger = logging.getLogger('mercurial.httpclient')
125 125 logger.setLevel(getattr(logging, loglevel.upper()))
126 126 handler = logging.StreamHandler()
127 127 handler.setFormatter(logging.Formatter(LOGFMT))
128 128 logger.addHandler(handler)
129 129
130 130 def close_all(self):
131 131 """Close and remove all connection objects being kept for reuse."""
132 132 for openconns in self._connections.values():
133 133 for conn in openconns:
134 134 conn.close()
135 135 self._connections = {}
136 136
137 137 # shamelessly borrowed from urllib2.AbstractHTTPHandler
138 138 def do_open(self, http_class, req, use_ssl):
139 139 """Return an addinfourl object for the request, using http_class.
140 140
141 141 http_class must implement the HTTPConnection API from httplib.
142 142 The addinfourl return value is a file-like object. It also
143 143 has methods and attributes including:
144 144 - info(): return a mimetools.Message object for the headers
145 145 - geturl(): return the original request URL
146 146 - code: HTTP status code
147 147 """
148 148 # If using a proxy, the host returned by get_host() is
149 149 # actually the proxy. On Python 2.6.1, the real destination
150 150 # hostname is encoded in the URI in the urllib2 request
151 151 # object. On Python 2.6.5, it's stored in the _tunnel_host
152 152 # attribute which has no accessor.
153 153 tunhost = getattr(req, '_tunnel_host', None)
154 154 host = req.get_host()
155 155 if tunhost:
156 156 proxyhost = host
157 157 host = tunhost
158 158 elif req.has_proxy():
159 159 proxyhost = req.get_host()
160 160 host = req.get_selector().split('://', 1)[1].split('/', 1)[0]
161 161 else:
162 162 proxyhost = None
163 163
164 164 if proxyhost:
165 165 if ':' in proxyhost:
166 166 # Note: this means we'll explode if we try and use an
167 167 # IPv6 http proxy. This isn't a regression, so we
168 168 # won't worry about it for now.
169 169 proxyhost, proxyport = proxyhost.rsplit(':', 1)
170 170 else:
171 171 proxyport = 3128 # squid default
172 172 proxy = (proxyhost, proxyport)
173 173 else:
174 174 proxy = None
175 175
176 176 if not host:
177 177 raise urllib2.URLError('no host given')
178 178
179 179 connkey = use_ssl, host, proxy
180 180 allconns = self._connections.get(connkey, [])
181 181 conns = [c for c in allconns if not c.busy()]
182 182 if conns:
183 183 h = conns[0]
184 184 else:
185 185 if allconns:
186 186 self.ui.debug('all connections for %s busy, making a new '
187 187 'one\n' % host)
188 188 timeout = None
189 189 if req.timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
190 190 timeout = req.timeout
191 191 h = http_class(host, timeout=timeout, proxy_hostport=proxy)
192 192 self._connections.setdefault(connkey, []).append(h)
193 193
194 194 headers = dict(req.headers)
195 195 headers.update(req.unredirected_hdrs)
196 196 headers = dict(
197 197 (name.title(), val) for name, val in headers.items())
198 198 try:
199 199 path = req.get_selector()
200 200 if '://' in path:
201 201 path = path.split('://', 1)[1].split('/', 1)[1]
202 202 if path[0] != '/':
203 203 path = '/' + path
204 204 h.request(req.get_method(), path, req.data, headers)
205 205 r = h.getresponse()
206 206 except socket.error, err: # XXX what error?
207 207 raise urllib2.URLError(err)
208 208
209 209 # Pick apart the HTTPResponse object to get the addinfourl
210 210 # object initialized properly.
211 211 r.recv = r.read
212 212
213 213 resp = urllib.addinfourl(r, r.headers, req.get_full_url())
214 214 resp.code = r.status
215 215 resp.msg = r.reason
216 216 return resp
217 217
218 218 # httplib always uses the given host/port as the socket connect
219 219 # target, and then allows full URIs in the request path, which it
220 220 # then observes and treats as a signal to do proxying instead.
221 221 def http_open(self, req):
222 222 if req.get_full_url().startswith('https'):
223 223 return self.https_open(req)
224 224 return self.do_open(HTTPConnection, req, False)
225 225
226 226 def https_open(self, req):
227 227 res = readauthforuri(self.ui, req.get_full_url())
228 228 if res:
229 229 group, auth = res
230 230 self.auth = auth
231 231 self.ui.debug("using auth.%s.* for authentication\n" % group)
232 232 else:
233 233 self.auth = None
234 234 return self.do_open(self._makesslconnection, req, True)
235 235
236 236 def _makesslconnection(self, host, port=443, *args, **kwargs):
237 237 keyfile = None
238 238 certfile = None
239 239
240 240 if args: # key_file
241 241 keyfile = args.pop(0)
242 242 if args: # cert_file
243 243 certfile = args.pop(0)
244 244
245 245 # if the user has specified different key/cert files in
246 246 # hgrc, we prefer these
247 247 if self.auth and 'key' in self.auth and 'cert' in self.auth:
248 248 keyfile = self.auth['key']
249 249 certfile = self.auth['cert']
250 250
251 251 # let host port take precedence
252 252 if ':' in host and '[' not in host or ']:' in host:
253 253 host, port = host.rsplit(':', 1)
254 254 port = int(port)
255 255 if '[' in host:
256 256 host = host[1:-1]
257 257
258 258 if keyfile:
259 259 kwargs['keyfile'] = keyfile
260 260 if certfile:
261 261 kwargs['certfile'] = certfile
262 262
263 263 kwargs.update(sslutil.sslkwargs(self.ui, host))
264 264
265 265 con = HTTPConnection(host, port, use_ssl=True,
266 266 ssl_validator=sslutil.validator(self.ui, host),
267 267 **kwargs)
268 268 return con
General Comments 0
You need to be logged in to leave comments. Login now