diff --git a/mercurial/hgweb/request.py b/mercurial/hgweb/request.py
--- a/mercurial/hgweb/request.py
+++ b/mercurial/hgweb/request.py
@@ -254,12 +254,6 @@ class wsgirequest(object):
         self.server_write = None
         self.headers = []
 
-    def drain(self):
-        '''need to read all data from request, httplib is half-duplex'''
-        length = int(self.env.get('CONTENT_LENGTH') or 0)
-        for s in util.filechunkiter(self.inp, limit=length):
-            pass
-
     def respond(self, status, type, filename=None, body=None):
         if not isinstance(type, str):
             type = pycompat.sysstr(type)
@@ -292,6 +286,53 @@ class wsgirequest(object):
             elif isinstance(status, int):
                 status = statusmessage(status)
 
+            # Various HTTP clients (notably httplib) won't read the HTTP
+            # response until the HTTP request has been sent in full. If servers
+            # (us) send a response before the HTTP request has been fully sent,
+            # the connection may deadlock because neither end is reading.
+            #
+            # We work around this by "draining" the request data before
+            # sending any response in some conditions.
+            drain = False
+            close = False
+
+            # If the client sent Expect: 100-continue, we assume it is smart
+            # enough to deal with the server sending a response before reading
+            # the request. (httplib doesn't do this.)
+            if self.env.get(r'HTTP_EXPECT', r'').lower() == r'100-continue':
+                pass
+            # Only tend to request methods that have bodies. Strictly speaking,
+            # we should sniff for a body. But this is fine for our existing
+            # WSGI applications.
+            elif self.env[r'REQUEST_METHOD'] not in (r'POST', r'PUT'):
+                pass
+            else:
+                # If we don't know how much data to read, there's no guarantee
+                # that we can drain the request responsibly. The WSGI
+                # specification only says that servers *should* ensure the
+                # input stream doesn't overrun the actual request. So there's
+                # no guarantee that reading until EOF won't corrupt the stream
+                # state.
+                if not isinstance(self.inp, util.cappedreader):
+                    close = True
+                else:
+                    # We /could/ only drain certain HTTP response codes. But 200
+                    # and non-200 wire protocol responses both require draining.
+                    # Since we have a capped reader in place for all situations
+                    # where we drain, it is safe to read from that stream. We'll
+                    # either do a drain or no-op if we're already at EOF.
+                    drain = True
+
+            if close:
+                self.headers.append((r'Connection', r'Close'))
+
+            if drain:
+                assert isinstance(self.inp, util.cappedreader)
+                while True:
+                    chunk = self.inp.read(32768)
+                    if not chunk:
+                        break
+
             self.server_write = self._start_response(
                 pycompat.sysstr(status), self.headers)
             self._start_response = None
diff --git a/mercurial/wireprotoserver.py b/mercurial/wireprotoserver.py
--- a/mercurial/wireprotoserver.py
+++ b/mercurial/wireprotoserver.py
@@ -301,9 +301,6 @@ def _callhttp(repo, wsgireq, req, proto,
         wsgireq.respond(HTTP_OK, HGTYPE, body=rsp)
         return []
     elif isinstance(rsp, wireprototypes.pusherr):
-        # This is the httplib workaround documented in _handlehttperror().
-        wsgireq.drain()
-
         rsp = '0\n%s\n' % rsp.res
         wsgireq.respond(HTTP_OK, HGTYPE, body=rsp)
         return []
@@ -316,21 +313,6 @@ def _callhttp(repo, wsgireq, req, proto,
 def _handlehttperror(e, wsgireq, req):
     """Called when an ErrorResponse is raised during HTTP request processing."""
 
-    # Clients using Python's httplib are stateful: the HTTP client
-    # won't process an HTTP response until all request data is
-    # sent to the server. The intent of this code is to ensure
-    # we always read HTTP request data from the client, thus
-    # ensuring httplib transitions to a state that allows it to read
-    # the HTTP response. In other words, it helps prevent deadlocks
-    # on clients using httplib.
-
-    if (req.method == 'POST' and
-        # But not if Expect: 100-continue is being used.
-        (req.headers.get('Expect', '').lower() != '100-continue')):
-        wsgireq.drain()
-    else:
-        wsgireq.headers.append((r'Connection', r'Close'))
-
     # TODO This response body assumes the failed command was
     # "unbundle." That assumption is not always valid.
     wsgireq.respond(e, HGTYPE, body='0\n%s\n' % pycompat.bytestr(e))