diff --git a/mercurial/hgweb/request.py b/mercurial/hgweb/request.py --- a/mercurial/hgweb/request.py +++ b/mercurial/hgweb/request.py @@ -254,12 +254,6 @@ class wsgirequest(object): self.server_write = None self.headers = [] - def drain(self): - '''need to read all data from request, httplib is half-duplex''' - length = int(self.env.get('CONTENT_LENGTH') or 0) - for s in util.filechunkiter(self.inp, limit=length): - pass - def respond(self, status, type, filename=None, body=None): if not isinstance(type, str): type = pycompat.sysstr(type) @@ -292,6 +286,53 @@ class wsgirequest(object): elif isinstance(status, int): status = statusmessage(status) + # Various HTTP clients (notably httplib) won't read the HTTP + # response until the HTTP request has been sent in full. If servers + # (us) send a response before the HTTP request has been fully sent, + # the connection may deadlock because neither end is reading. + # + # We work around this by "draining" the request data before + # sending any response in some conditions. + drain = False + close = False + + # If the client sent Expect: 100-continue, we assume it is smart + # enough to deal with the server sending a response before reading + # the request. (httplib doesn't do this.) + if self.env.get(r'HTTP_EXPECT', r'').lower() == r'100-continue': + pass + # Only tend to request methods that have bodies. Strictly speaking, + # we should sniff for a body. But this is fine for our existing + # WSGI applications. + elif self.env[r'REQUEST_METHOD'] not in (r'POST', r'PUT'): + pass + else: + # If we don't know how much data to read, there's no guarantee + # that we can drain the request responsibly. The WSGI + # specification only says that servers *should* ensure the + # input stream doesn't overrun the actual request. So there's + # no guarantee that reading until EOF won't corrupt the stream + # state. + if not isinstance(self.inp, util.cappedreader): + close = True + else: + # We /could/ only drain certain HTTP response codes. But 200 + # and non-200 wire protocol responses both require draining. + # Since we have a capped reader in place for all situations + # where we drain, it is safe to read from that stream. We'll + # either do a drain or no-op if we're already at EOF. + drain = True + + if close: + self.headers.append((r'Connection', r'Close')) + + if drain: + assert isinstance(self.inp, util.cappedreader) + while True: + chunk = self.inp.read(32768) + if not chunk: + break + self.server_write = self._start_response( pycompat.sysstr(status), self.headers) self._start_response = None diff --git a/mercurial/wireprotoserver.py b/mercurial/wireprotoserver.py --- a/mercurial/wireprotoserver.py +++ b/mercurial/wireprotoserver.py @@ -301,9 +301,6 @@ def _callhttp(repo, wsgireq, req, proto, wsgireq.respond(HTTP_OK, HGTYPE, body=rsp) return [] elif isinstance(rsp, wireprototypes.pusherr): - # This is the httplib workaround documented in _handlehttperror(). - wsgireq.drain() - rsp = '0\n%s\n' % rsp.res wsgireq.respond(HTTP_OK, HGTYPE, body=rsp) return [] @@ -316,21 +313,6 @@ def _callhttp(repo, wsgireq, req, proto, def _handlehttperror(e, wsgireq, req): """Called when an ErrorResponse is raised during HTTP request processing.""" - # Clients using Python's httplib are stateful: the HTTP client - # won't process an HTTP response until all request data is - # sent to the server. The intent of this code is to ensure - # we always read HTTP request data from the client, thus - # ensuring httplib transitions to a state that allows it to read - # the HTTP response. In other words, it helps prevent deadlocks - # on clients using httplib. - - if (req.method == 'POST' and - # But not if Expect: 100-continue is being used. - (req.headers.get('Expect', '').lower() != '100-continue')): - wsgireq.drain() - else: - wsgireq.headers.append((r'Connection', r'Close')) - # TODO This response body assumes the failed command was # "unbundle." That assumption is not always valid. wsgireq.respond(e, HGTYPE, body='0\n%s\n' % pycompat.bytestr(e))